1 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2 /* */
3 /* This file is part of the program and library */
4 /* SCIP --- Solving Constraint Integer Programs */
5 /* */
6 /* Copyright (C) 2002-2021 Konrad-Zuse-Zentrum */
7 /* fuer Informationstechnik Berlin */
8 /* */
9 /* SCIP is distributed under the terms of the ZIB Academic License. */
10 /* */
11 /* You should have received a copy of the ZIB Academic License */
12 /* along with SCIP; see the file COPYING. If not visit scipopt.org. */
13 /* */
14 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
15
16 /**@file xmldef.h
17 * @ingroup OTHER_CFILES
18 * @brief declarations for XML parsing
19 * @author Thorsten Koch
20 * @author Marc Pfetsch
21 *
22 * If SPEC_LIKE_SPACE_HANDLING is not defined, all LF,CR will be changed into spaces and from a
23 * sequence of spaces only one will be used.
24 *
25 * @todo Implement possibility to avoid the construction of parsing information for certain tags
26 * (and their children). For solution files this would avoid parsing the constraints section.
27 */
28
29 /*---+----1----+----2----+----3----+----4----+----5----+----6----+----7----+----8----+----9----+----0----+----1----+----2*/
30
31 #include <blockmemshell/memory.h>
32
33 #include "xml.h"
34 #include "xmldef.h"
35 #include "scip/misc.h"
36
37
38 #include <sys/types.h>
39 #ifdef SCIP_WITH_ZLIB
40 #if defined(_WIN32) || defined(_WIN64)
41 #define R_OK _A_RDONLY
42 #define access _access
43 #include <io.h>
44 #else
45 #include <unistd.h>
46 #endif
47 #endif
48 #include <stdio.h>
49 #include <stdlib.h>
50 #include <assert.h>
51 #include <ctype.h>
52 #include <string.h>
53
54
55 #define NAME_EXT_SIZE 128
56 #define ATTR_EXT_SIZE 4096
57 #define DATA_EXT_SIZE 4096
58 #define LINE_BUF_SIZE 8192
59
60 #define xmlError(a, b) xmlErrmsg(a, b, FALSE, __FILE__, __LINE__)
61
62
63 /* forward declarations */
64 typedef struct parse_stack_struct PSTACK;
65 typedef struct parse_pos_struct PPOS;
66
67 /** state of the parser */
68 enum parse_state_enum
69 {
70 XML_STATE_ERROR,
71 XML_STATE_BEFORE,
72 XML_STATE_IN_TAG,
73 XML_STATE_PCDATA,
74 XML_STATE_EOF
75 };
76 typedef enum parse_state_enum PSTATE;
77
78 /** Stack as a (singly) linked list. The top element is the current node. */
79 struct parse_stack_struct
80 {
81 XML_NODE* node;
82 PSTACK* next;
83 };
84
85 /** Store the current position in the file and the state of the parser. */
86 struct parse_pos_struct
87 {
88 const char* filename;
89 FPTYPE fp;
90 char buf[LINE_BUF_SIZE];
91 int pos;
92 int lineno;
93 int nextsym;
94 int lastsym;
95 PSTATE state;
96 PSTACK* top;
97 };
98
99
100 /** output error message with corresponding line and position */
xmlErrmsg(PPOS * ppos,const char * msg,XML_Bool msg_only,const char * file,int line)101 static void xmlErrmsg(
102 PPOS* ppos,
103 const char* msg,
104 XML_Bool msg_only,
105 const char* file,
106 int line
107 )
108 {
109 #ifndef NDEBUG
110 int ret;
111 assert( ppos != NULL );
112
113 if ( ! msg_only )
114 {
115 ret = fprintf(stderr, "%s(%d) Error in file %s line %d\n", file, line, ppos->filename, ppos->lineno);
116 assert(ret >= 0);
117
118 ret = fprintf(stderr, "%s", ppos->buf);
119 assert(ret >= 0);
120
121 if ( strchr(ppos->buf, '\n') == NULL )
122 {
123 int retc;
124
125 retc = fputc('\n', stderr);
126 assert(retc != EOF);
127 }
128
129 ret = fprintf(stderr, "%*s\n", ppos->pos, "^");
130 assert(ret >= 0);
131 }
132 ret = fprintf(stderr, "%s\n\n", msg);
133 assert(ret >= 0);
134
135 #else
136
137 if ( ! msg_only )
138 {
139 (void) fprintf(stderr, "%s(%d) Error in file %s line %d\n", file, line, ppos->filename, ppos->lineno);
140
141 (void) fprintf(stderr, "%s", ppos->buf);
142
143 if ( strchr(ppos->buf, '\n') == NULL )
144 {
145 (void) fputc('\n', stderr);
146 }
147
148 (void) fprintf(stderr, "%*s\n", ppos->pos, "^");
149 }
150 (void) fprintf(stderr, "%s\n\n", msg);
151 #endif
152 }
153
154
155 /** Push new element on the parse stack.
156 *
157 * TRUE if it worked, FAILURE otherwise.
158 */
159 static
pushPstack(PPOS * ppos,XML_NODE * node)160 XML_Bool pushPstack(
161 PPOS* ppos,
162 XML_NODE* node
163 )
164 {
165 PSTACK* p;
166
167 assert(ppos != NULL);
168 assert(node != NULL);
169
170 debugMessage("Pushing %s\n", node->name);
171
172 ALLOC_FALSE( BMSallocMemory(&p) );
173 assert(p != NULL);
174
175 p->node = node;
176 p->next = ppos->top;
177 ppos->top = p;
178
179 return TRUE;
180 }
181
182 /** returns top element on stack (which has to be present) */
topPstack(const PPOS * ppos)183 static XML_NODE* topPstack(
184 const PPOS* ppos
185 )
186 {
187 assert(ppos != NULL);
188 assert(ppos->top != NULL);
189
190 return ppos->top->node;
191 }
192
193 /** remove top element from stack and deletes it
194 *
195 * TRUE if ok, FALSE otherwise
196 */
197 static
popPstack(PPOS * ppos)198 XML_Bool popPstack(
199 PPOS* ppos /**< input stream position */
200 )
201 {
202 PSTACK* p;
203 XML_Bool result;
204
205 assert(ppos != NULL);
206
207 if ( ppos->top == NULL )
208 {
209 xmlError(ppos, "Stack underflow");
210 result = FALSE;
211 }
212 else
213 {
214 result = TRUE;
215 p = ppos->top;
216 ppos->top = p->next;
217
218 debugMessage("Poping %s\n", p->node->name);
219 BMSfreeMemory(&p);
220 }
221 return result;
222 }
223
224 /** remove complete stack */
225 static
clearPstack(PPOS * ppos)226 void clearPstack(
227 PPOS* ppos
228 )
229 {
230 assert(ppos != NULL);
231
232 while ( ppos->top != NULL )
233 (void) popPstack(ppos);
234 }
235
236 /** Returns the next character from the input buffer and fills the buffer if it is empty (similar to fgetc()). */
237 static
mygetc(PPOS * ppos)238 int mygetc(
239 PPOS* ppos
240 )
241 {
242 assert(ppos != NULL);
243 assert(ppos->fp != NULL);
244 assert(ppos->pos < LINE_BUF_SIZE);
245
246 if ( ppos->buf[ppos->pos] == '\0' )
247 {
248 #ifdef SCIP_DISABLED_CODE
249 /* the low level function gzread/fread used below seem to be faster */
250 if ( NULL == FGETS(ppos->buf, sizeof(ppos->buf), ppos->fp) )
251 return EOF;
252 #else
253 size_t len = (size_t) FREAD(ppos->buf, sizeof(ppos->buf) - 1, ppos->fp); /*lint !e571 !e747*/
254
255 if( len == 0 || len > sizeof(ppos->buf) - 1 )
256 return EOF;
257
258 ppos->buf[len] = '\0';
259 #endif
260 ppos->pos = 0;
261 }
262 return (unsigned char)ppos->buf[ppos->pos++];
263 }
264
265
266 #ifdef SPEC_LIKE_SPACE_HANDLING
267 /** Read input from fp_in.
268 *
269 * If there is a LF, CR, CR/LF, or LF/CR it returns exactly on LF. Also counts the number of
270 * characters.
271 */
272 static
getsymbol(PPOS * ppos)273 int getsymbol(
274 PPOS* ppos
275 )
276 {
277 int c;
278
279 assert(ppos != NULL);
280
281 if ( ppos->nextsym == 0 )
282 c = mygetc(ppos);
283 else
284 {
285 c = ppos->nextsym;
286 ppos->nextsym = 0;
287 }
288 assert(ppos->nextsym == 0);
289
290 if (((c == '\n') && (ppos->lastsym == '\r')) || ((c == '\r') && (ppos->lastsym == '\n')))
291 c = mygetc(ppos);
292
293 ppos->lastsym = c;
294
295 if ( c == '\r' )
296 c = '\n';
297
298 if ( c == '\n' )
299 ++ppos->lineno;
300
301 return c;
302 }
303 #else
304 /** Read input from fp_in (variant).
305 *
306 * Here we convert all LF or CR into SPACE and return maximally one SPACE after the other.
307 *
308 * @note This function counts lines differently. On systems that have only one '\\r' as line feed
309 * (MAC) it does not count correctly.
310 */
311 static
getsymbol(PPOS * ppos)312 int getsymbol(
313 PPOS* ppos
314 )
315 {
316 int c;
317
318 assert(ppos != NULL);
319
320 do
321 {
322 if ( ppos->nextsym == 0 )
323 c = mygetc(ppos);
324 else
325 {
326 c = ppos->nextsym;
327 ppos->nextsym = 0;
328 }
329 assert(ppos->nextsym == 0);
330
331 if ( c == '\n' )
332 ++ppos->lineno;
333
334 if ((c == '\n') || (c == '\r'))
335 c = ' ';
336 } while((c == ' ') && (ppos->lastsym == c));
337
338 ppos->lastsym = c;
339
340 debugMessage("[%c]\n", c);
341
342 return c;
343 }
344 #endif
345
346 /** Reinserts a character into the input stream */
347 static
ungetsymbol(PPOS * ppos,int c)348 void ungetsymbol(
349 PPOS* ppos,
350 int c
351 )
352 {
353 assert(ppos != NULL);
354 assert(ppos->nextsym == 0);
355
356 ppos->nextsym = c;
357 }
358
359 /** Skip all spaces and return the next non-space character or EOF */
360 static
skipSpace(PPOS * ppos)361 int skipSpace(
362 PPOS* ppos
363 )
364 {
365 int c;
366
367 assert(ppos != NULL);
368
369 do
370 {
371 c = getsymbol(ppos);
372 }
373 while(isspace(c));
374
375 return c;
376 }
377
378 /** Get name of a TAG or attribute from the input stream.
379 *
380 * Either it returns a pointer to allocated memory which contains the name or it returns NULL if
381 * there is some error.
382 */
383 static
getName(PPOS * ppos)384 char* getName(
385 PPOS* ppos
386 )
387 {
388 char* name = NULL;
389 size_t size = 0;
390 size_t len = 0;
391 int c;
392
393 assert(ppos != NULL);
394
395 c = getsymbol(ppos);
396
397 if ( ! isalpha(c) && (c != '_') && (c != ':') )
398 {
399 xmlError(ppos, "Name starting with illegal charater");
400 return NULL;
401 }
402
403 /* The following is wrong: Here almost all characters that we casted to unicode are feasible */
404 while ( isalnum(c) || (c == '_') || (c == ':') || (c == '.') || (c == '-') )
405 {
406 if ( len + 1 >= size )
407 {
408 size += NAME_EXT_SIZE;
409
410 if ( name == NULL )
411 {
412 ALLOC_ABORT( BMSallocMemoryArray(&name, size) );
413 }
414 else
415 {
416 ALLOC_ABORT( BMSreallocMemoryArray(&name, size) );
417 }
418 }
419 assert(name != NULL);
420 assert(size > len);
421
422 name[len++] = (char)c;
423
424 c = getsymbol(ppos);
425 }
426 if ( c != EOF )
427 ungetsymbol(ppos, c);
428
429 assert(name != NULL);
430
431 if ( len == 0 )
432 {
433 BMSfreeMemoryArray(&name);
434 name = NULL;
435 }
436 else
437 name[len] = '\0';
438
439 return name;
440 }
441
442 /** Read the value of an attribute from the input stream.
443 *
444 * The value has to be between two " or ' (the other character is then valid as well). The function
445 * returns a pointer to allocated memory containing the value or it returns NULL in case of an
446 * error.
447 */
448 static
getAttrval(PPOS * ppos)449 char* getAttrval(
450 PPOS* ppos
451 )
452 {
453 char* attr = NULL;
454 int c;
455 int stop;
456 size_t len = 0;
457 size_t size = 0;
458
459 assert(ppos != NULL);
460
461 /* The following is not allowed according to the specification (the value has to be directly
462 * after the equation sign). */
463 c = skipSpace(ppos);
464
465 if ( (c != '"') && (c != '\'') )
466 {
467 xmlError(ppos, "Atribute value does not start with \" or \'");
468 return NULL;
469 }
470 stop = c;
471
472 for(;;)
473 {
474 if ( len == size )
475 {
476 size += ATTR_EXT_SIZE;
477
478 if ( attr == NULL )
479 {
480 ALLOC_ABORT( BMSallocMemoryArray(&attr, size) );
481 }
482 else
483 {
484 ALLOC_ABORT( BMSreallocMemoryArray(&attr, size) );
485 }
486 }
487 assert(attr != NULL);
488 assert(size > len);
489
490 c = getsymbol(ppos);
491
492 if ( (c == stop) || (c == EOF) )
493 break;
494
495 attr[len++] = (char)c;
496 }
497
498 if ( c != EOF )
499 attr[len] = '\0';
500 else
501 {
502 BMSfreeMemoryArray(&attr);
503 attr = NULL;
504 }
505 return attr;
506 }
507
508 /** Skip comment
509 *
510 * Return FALSE if an error occurs.
511 */
512 static
doComment(PPOS * ppos)513 XML_Bool doComment(
514 PPOS* ppos
515 )
516 {
517 XML_Bool result = TRUE;
518 int c;
519 int state = 0;
520
521 assert(ppos != NULL);
522
523 for(;;)
524 {
525 c = getsymbol(ppos);
526
527 if ( c == EOF )
528 break;
529
530 if ( (c == '>') && (state >= 2) )
531 break;
532
533 state = (c == '-') ? state + 1 : 0;
534 }
535 if ( c == EOF )
536 {
537 xmlError(ppos, "Unexpected EOF in comment");
538 result = FALSE;
539 }
540 return result;
541 }
542
543 /** Handles a CDATA section.
544 *
545 * Returns a pointer to allocated memory containing the data of this section or NULL in case of an
546 * error.
547 */
548 static
doCdata(PPOS * ppos)549 char* doCdata(
550 PPOS* ppos
551 )
552 {
553 char* data = NULL;
554 size_t size = 0;
555 size_t len = 0;
556 int state = 0;
557 int c;
558
559 assert(ppos != NULL);
560
561 for(;;)
562 {
563 c = getsymbol(ppos);
564
565 if ( c == EOF )
566 break;
567
568 if ( c == ']' )
569 state++;
570 else
571 if ( (c == '>') && (state >= 2) )
572 break;
573 else
574 state = 0;
575
576 if ( len == size )
577 {
578 size += DATA_EXT_SIZE;
579
580 if ( data == NULL )
581 {
582 ALLOC_ABORT( BMSallocMemoryArray(&data, size) );
583 }
584 else
585 {
586 ALLOC_ABORT( BMSreallocMemoryArray(&data, size) );
587 }
588 }
589 assert(data != NULL);
590 assert(size > len);
591
592 data[len++] = (char)c;
593 }
594 assert(data != NULL);
595
596 /*lint --e{527}*/
597 if ( c != EOF )
598 {
599 assert(len >= 2);
600 assert(data != NULL);
601
602 data[len - 2] = '\0'; /*lint !e413*/
603 }
604 else
605 {
606 BMSfreeMemoryArray(&data);
607 data = NULL;
608 xmlError(ppos, "Unexpected EOF in CDATA");
609 }
610 return data;
611 }
612
613 /** Handle processing instructions (skipping) */
614 static
handlePi(PPOS * ppos)615 void handlePi(
616 PPOS* ppos
617 )
618 {
619 int c;
620
621 assert(ppos != NULL);
622 assert(ppos->state == XML_STATE_BEFORE);
623
624 do
625 {
626 c = getsymbol(ppos);
627 }
628 while ( (c != EOF) && (c != '>') );
629
630 if ( c != EOF )
631 ppos->state = XML_STATE_PCDATA;
632 else
633 {
634 xmlError(ppos, "Unexpected EOF in PI");
635 ppos->state = XML_STATE_ERROR;
636 }
637 }
638
639 /** Handles declarations that start with a <!.
640 *
641 * This includes comments. Does currenlty not work very well, because of DTDs.
642 */
643 static
handleDecl(PPOS * ppos)644 void handleDecl(
645 PPOS* ppos
646 )
647 {
648 enum XmlSection
649 {
650 IS_COMMENT,
651 IS_ATTLIST,
652 IS_DOCTYPE,
653 IS_ELEMENT,
654 IS_ENTITY,
655 IS_NOTATION,
656 IS_CDATA
657 };
658 typedef enum XmlSection XMLSECTION;
659
660 static struct
661 {
662 const char* name;
663 XMLSECTION what;
664 } key[] =
665 {
666 { "--", IS_COMMENT },
667 { "ATTLIST", IS_ATTLIST },
668 { "DOCTYPE", IS_DOCTYPE },
669 { "ELEMENT", IS_ELEMENT },
670 { "ENTITY", IS_ENTITY },
671 { "NOTATION", IS_NOTATION },
672 { "[CDATA[", IS_CDATA }
673 };
674 XML_NODE* node;
675 char* data;
676 int c;
677 int k = 0;
678 int beg = 0;
679 int end;
680
681 assert(ppos != NULL);
682 assert(ppos->state == XML_STATE_BEFORE);
683
684 end = (int) (sizeof(key) / sizeof(key[0])) - 1;
685 do
686 {
687 c = getsymbol(ppos);
688
689 for(; (beg <= end) && (c != key[beg].name[k]); beg++)
690 ;
691 for(; (end >= beg) && (c != key[end].name[k]); end--)
692 ;
693 k++;
694 }
695 while(beg < end);
696
697 if ( beg != end )
698 {
699 xmlError(ppos, "Unknown declaration");
700
701 while ( (c != EOF) && (c != '>') )
702 c = getsymbol(ppos);
703 }
704 else
705 {
706 assert(beg == end);
707 assert(beg < (int)(sizeof(key) / sizeof(*key)));
708 assert(beg >= 0);
709
710 switch(key[beg].what)
711 {
712 case IS_COMMENT :
713 if ( ! doComment(ppos) )
714 ppos->state = XML_STATE_ERROR;
715 break;
716 case IS_CDATA :
717 if ( (data = doCdata(ppos)) == NULL )
718 ppos->state = XML_STATE_ERROR;
719 else
720 {
721 if ( NULL == (node = xmlNewNode("#CDATA", ppos->lineno)) )
722 {
723 xmlError(ppos, "Can't create new node");
724 ppos->state = XML_STATE_ERROR;
725 }
726 else
727 {
728 BMSduplicateMemoryArray(&node->data, data, strlen(data)+1);
729 BMSfreeMemoryArray(&data);
730 xmlAppendChild(topPstack(ppos), node);
731 }
732 }
733 break;
734 case IS_ATTLIST :
735 case IS_ELEMENT :
736 case IS_NOTATION :
737 case IS_ENTITY :
738 case IS_DOCTYPE :
739 break;
740 default :
741 abort();
742 }
743 }
744 }
745
746 /** Handle end tag */
747 static
handleEndtag(PPOS * ppos)748 void handleEndtag(
749 PPOS* ppos
750 )
751 {
752 char* name;
753 int c;
754
755 assert(ppos != NULL);
756
757 if ( (name = getName(ppos)) == NULL )
758 xmlError(ppos, "Missing name in endtag");
759 else
760 {
761 c = skipSpace(ppos);
762
763 if ( c != '>' )
764 {
765 xmlError(ppos, "Missing '>' in endtag");
766 ppos->state = XML_STATE_ERROR;
767 }
768 else
769 {
770 if ( strcmp(name, topPstack(ppos)->name) )
771 {
772 xmlError(ppos, "Name of endtag does not match starttag");
773 ppos->state = XML_STATE_ERROR;
774 }
775 else
776 {
777 if ( popPstack(ppos) )
778 ppos->state = XML_STATE_PCDATA;
779 else
780 ppos->state = XML_STATE_ERROR;
781 }
782 }
783
784 BMSfreeMemoryArray(&name);
785 }
786 }
787
788 /** Handle start tag */
789 static
handleStarttag(PPOS * ppos)790 void handleStarttag(
791 PPOS* ppos
792 )
793 {
794 XML_NODE* node;
795 char* name;
796
797 assert(ppos != NULL);
798
799 name = getName(ppos);
800 if ( name == NULL )
801 {
802 xmlError(ppos, "Missing name in tagstart");
803 ppos->state = XML_STATE_ERROR;
804 }
805 else
806 {
807 node = xmlNewNode(name, ppos->lineno);
808 if ( node == NULL )
809 {
810 xmlError(ppos, "Can't create new node");
811 ppos->state = XML_STATE_ERROR;
812 }
813 else
814 {
815 xmlAppendChild(topPstack(ppos), node);
816
817 if ( pushPstack(ppos, node) )
818 ppos->state = XML_STATE_IN_TAG;
819 else
820 ppos->state = XML_STATE_ERROR;
821 }
822 BMSfreeMemoryArray(&name);
823 }
824 }
825
826 /** Checks for next tag */
827 static
procBefore(PPOS * ppos)828 void procBefore(
829 PPOS* ppos /**< input stream position */
830 )
831 {
832 int c;
833
834 assert(ppos != NULL);
835 assert(ppos->state == XML_STATE_BEFORE);
836
837 c = skipSpace(ppos);
838
839 if ( c != '<' )
840 {
841 xmlError(ppos, "Expecting '<'");
842 ppos->state = XML_STATE_ERROR;
843 }
844 else
845 {
846 c = getsymbol(ppos);
847
848 switch(c)
849 {
850 case EOF :
851 xmlError(ppos, "Unexpected EOF");
852 ppos->state = XML_STATE_ERROR;
853 break;
854 case '!' :
855 handleDecl(ppos);
856 break;
857 case '?' :
858 handlePi(ppos);
859 break;
860 case '/' :
861 handleEndtag(ppos);
862 break;
863 default :
864 ungetsymbol(ppos, c);
865 handleStarttag(ppos);
866 break;
867 }
868 }
869 }
870
871 /** Process tag */
872 static
procInTag(PPOS * ppos)873 void procInTag(
874 PPOS* ppos /**< input stream position */
875 )
876 {
877 XML_ATTR* attr;
878 int c;
879 XML_Bool empty = FALSE;
880 char* name;
881 char* value;
882
883 assert(ppos != NULL);
884 assert(ppos->state == XML_STATE_IN_TAG);
885
886 c = skipSpace(ppos);
887
888 if ( (c == '/') || (c == '>') || (c == EOF) )
889 {
890 if ( c == '/' )
891 {
892 empty = TRUE;
893 c = getsymbol(ppos);
894 }
895
896 if ( c == EOF )
897 {
898 xmlError(ppos, "Unexpected EOF while in a tag");
899 ppos->state = XML_STATE_ERROR;
900 }
901
902 if ( c == '>' )
903 {
904 ppos->state = XML_STATE_PCDATA;
905
906 if (empty && ! popPstack(ppos))
907 ppos->state = XML_STATE_ERROR;
908 }
909 else
910 {
911 xmlError(ppos, "Expected tag end marker '>'");
912 ppos->state = XML_STATE_ERROR;
913 }
914 }
915 else
916 {
917 ungetsymbol(ppos, c);
918
919 name = getName(ppos);
920 if ( name == NULL )
921 {
922 xmlError(ppos, "No name for attribute");
923 ppos->state = XML_STATE_ERROR;
924 }
925 else
926 {
927 c = skipSpace(ppos);
928
929 if ( (c != '=') || ((value = getAttrval(ppos)) == NULL) )
930 {
931 xmlError(ppos, "Missing attribute value");
932 ppos->state = XML_STATE_ERROR;
933 BMSfreeMemoryArray(&name);
934 }
935 else
936 {
937 attr = xmlNewAttr(name, value);
938 if ( attr == NULL )
939 {
940 xmlError(ppos, "Can't create new attribute");
941 ppos->state = XML_STATE_ERROR;
942 }
943 else
944 {
945 xmlAddAttr(topPstack(ppos), attr);
946 }
947 BMSfreeMemoryArray(&name);
948 BMSfreeMemoryArray(&value);
949 }
950 }
951 }
952 }
953
954 /* Handles PCDATA */
955 static
procPcdata(PPOS * ppos)956 void procPcdata(
957 PPOS* ppos /**< input stream position */
958 )
959 {
960 XML_NODE* node;
961 char* data = NULL;
962 size_t size = 0;
963 size_t len = 0;
964 int c;
965
966 assert(ppos != NULL);
967 assert(ppos->state == XML_STATE_PCDATA);
968
969 #ifndef SPEC_LIKE_SPACE_HANDLING
970 c = skipSpace(ppos);
971 if ( c != EOF )
972 ungetsymbol(ppos, c);
973 #endif
974 c = getsymbol(ppos);
975
976 while ( (c != EOF) && (c != '<') )
977 {
978 if ( len + 1 >= size ) /* leave space for terminating '\0' */
979 {
980 size += DATA_EXT_SIZE;
981
982 if ( data == NULL )
983 {
984 ALLOC_ABORT( BMSallocMemoryArray(&data, size) );
985 }
986 else
987 {
988 ALLOC_ABORT( BMSreallocMemoryArray(&data, size) );
989 }
990 }
991 assert(data != NULL);
992 assert(size > len + 1);
993
994 data[len++] = (char)c;
995
996 c = getsymbol(ppos);
997 }
998 if ( data == NULL )
999 {
1000 if ( c == EOF )
1001 ppos->state = XML_STATE_EOF;
1002 else
1003 {
1004 assert(c == '<');
1005 ppos->state = XML_STATE_BEFORE;
1006 ungetsymbol(ppos, c);
1007 }
1008 }
1009 else
1010 {
1011 assert(len < size);
1012 data[len] = '\0';
1013
1014 if ( c == EOF )
1015 ppos->state = XML_STATE_ERROR;
1016 else
1017 {
1018 ungetsymbol(ppos, c);
1019
1020 node = xmlNewNode("#PCDATA", ppos->lineno);
1021 if ( node == NULL )
1022 {
1023 xmlError(ppos, "Can't create new node");
1024 ppos->state = XML_STATE_ERROR;
1025 }
1026 else
1027 {
1028 BMSduplicateMemoryArray(&node->data, data, strlen(data)+1);
1029 xmlAppendChild(topPstack(ppos), node);
1030 ppos->state = XML_STATE_BEFORE;
1031 }
1032 }
1033
1034 BMSfreeMemoryArray(&data);
1035 }
1036 }
1037
1038 /** Parse input stream */
1039 static
xmlParse(PPOS * ppos)1040 XML_Bool xmlParse(
1041 PPOS* ppos /**< input stream position */
1042 )
1043 {
1044 XML_Bool ok = TRUE;
1045
1046 while (ok)
1047 {
1048 debugMessage("state=%d\n", ppos->state);
1049
1050 switch (ppos->state)
1051 {
1052 case XML_STATE_BEFORE :
1053 procBefore(ppos);
1054 break;
1055 case XML_STATE_IN_TAG :
1056 procInTag(ppos);
1057 break;
1058 case XML_STATE_PCDATA :
1059 procPcdata(ppos);
1060 break;
1061 case XML_STATE_EOF :
1062 ok = FALSE;
1063 break;
1064 case XML_STATE_ERROR :
1065 ok = FALSE;
1066 break;
1067 default :
1068 xmlError(ppos, "Internal Error, illegal state");
1069 ok = FALSE;
1070 }
1071 }
1072 return (ppos->state == XML_STATE_EOF);
1073 }
1074
1075 /** Parse file */
xmlProcess(const char * filename)1076 XML_NODE* xmlProcess(
1077 const char* filename /**< XML file name */
1078 )
1079 {
1080 PPOS ppos;
1081 XML_NODE* node = NULL;
1082 XML_ATTR* attr;
1083 XML_Bool result = FALSE;
1084 char* myfilename;
1085 size_t filenamelen;
1086
1087 /* allocate space and copy filename (possibly modified below) in two steps in order to satisfy valgrind */
1088 assert( filename != NULL );
1089 filenamelen = strlen(filename);
1090 if ( BMSallocMemoryArray(&myfilename, filenamelen + 5) == NULL )
1091 return NULL;
1092 BMScopyMemoryArray(myfilename, filename, filenamelen + 1);
1093
1094 #ifdef SCIP_WITH_ZLIB
1095 if ( access(filename, R_OK) != 0 )
1096 {
1097 strcat(myfilename, ".gz");
1098
1099 /* If .gz also does not work, revert to the old name
1100 * to get a better error message.
1101 */
1102 if ( access(myfilename, R_OK) != 0 )
1103 (void)SCIPstrncpy(myfilename, filename, (int)filenamelen + 5);
1104 }
1105 #endif
1106 ppos.fp = FOPEN(myfilename, "r");
1107 if ( ppos.fp == NULL )
1108 perror(myfilename);
1109 else
1110 {
1111 ppos.filename = myfilename;
1112 ppos.buf[0] = '\0';
1113 ppos.pos = 0;
1114 ppos.lineno = 1;
1115 ppos.nextsym = 0;
1116 ppos.lastsym = 0;
1117 ppos.state = XML_STATE_BEFORE;
1118 ppos.top = NULL;
1119
1120 node = xmlNewNode("#ROOT", ppos.lineno);
1121 if ( node == NULL )
1122 {
1123 xmlError(&ppos, "Can't create new node");
1124 }
1125 else
1126 {
1127 attr = xmlNewAttr("filename", myfilename);
1128 if ( attr == NULL )
1129 xmlError(&ppos, "Can't create new attribute");
1130 else
1131 {
1132 xmlAddAttr(node, attr);
1133
1134 /* push root node on stack and start to process */
1135 if ( pushPstack(&ppos, node) )
1136 {
1137 result = xmlParse(&ppos);
1138
1139 clearPstack(&ppos);
1140 }
1141 }
1142 }
1143
1144 if ( ! result && (node != NULL) )
1145 {
1146 xmlErrmsg(&ppos, "Parsing error, processing stopped", TRUE, __FILE__, __LINE__);
1147 xmlFreeNode(node);
1148 node = NULL;
1149 }
1150 if ( FCLOSE(ppos.fp) )
1151 perror(myfilename);
1152 }
1153 BMSfreeMemoryArray(&myfilename);
1154
1155 return node;
1156 }
1157
1158
1159
1160
1161
1162
1163 /*----------------------------------------------------------------------------------------------*/
1164
1165
1166 /** create new node */
xmlNewNode(const char * name,int lineno)1167 XML_NODE* xmlNewNode(
1168 const char* name,
1169 int lineno
1170 )
1171 {
1172 XML_NODE* n = NULL;
1173
1174 assert(name != NULL);
1175
1176 if ( BMSallocMemory(&n) != NULL )
1177 {
1178 BMSclearMemory(n);
1179 BMSduplicateMemoryArray(&n->name, name, strlen(name)+1);
1180 n->lineno = lineno;
1181 }
1182 return n;
1183 }
1184
1185 /** create new attribute */
xmlNewAttr(const char * name,const char * value)1186 XML_ATTR* xmlNewAttr(
1187 const char* name,
1188 const char* value
1189 )
1190 {
1191 XML_ATTR* a = NULL;
1192
1193 assert(name != NULL);
1194 assert(value != NULL);
1195
1196 if ( BMSallocMemory(&a) != NULL )
1197 {
1198 BMSclearMemory(a);
1199 BMSduplicateMemoryArray(&a->name, name, strlen(name)+1);
1200 BMSduplicateMemoryArray(&a->value, value, strlen(value)+1);
1201 }
1202 return a;
1203 }
1204
1205 /** add attribute */
xmlAddAttr(XML_NODE * n,XML_ATTR * a)1206 void xmlAddAttr(
1207 XML_NODE* n,
1208 XML_ATTR* a
1209 )
1210 {
1211 assert(n != NULL);
1212 assert(a != NULL);
1213
1214 a->next = n->attrlist;
1215 n->attrlist = a;
1216 }
1217
1218 /** append child node */
xmlAppendChild(XML_NODE * parent,XML_NODE * child)1219 void xmlAppendChild(
1220 XML_NODE* parent,
1221 XML_NODE* child
1222 )
1223 {
1224 assert(parent != NULL);
1225 assert(child != NULL);
1226
1227 child->parent = parent;
1228 child->prevsibl = parent->lastchild;
1229 child->nextsibl = NULL;
1230 parent->lastchild = child;
1231
1232 if ( child->prevsibl != NULL )
1233 child->prevsibl->nextsibl = child;
1234
1235 if ( parent->firstchild == NULL )
1236 parent->firstchild = child;
1237 }
1238
1239 /** free attribute */
1240 static
xmlFreeAttr(XML_ATTR * attr)1241 void xmlFreeAttr(
1242 XML_ATTR* attr
1243 )
1244 {
1245 XML_ATTR* a;
1246
1247 /* Note: use an iterative implementation instead of a recursive one; the latter is much slower for large instances
1248 * and might overflow the heap. */
1249 a = attr;
1250 while (a != NULL)
1251 {
1252 XML_ATTR* b;
1253 b = a->next;
1254
1255 assert(a->name != NULL);
1256 assert(a->value != NULL);
1257
1258 BMSfreeMemoryArray(&a->name);
1259 BMSfreeMemoryArray(&a->value);
1260 BMSfreeMemory(&a);
1261 a = b;
1262 }
1263 }
1264
1265 /** free node */
xmlFreeNode(XML_NODE * node)1266 void xmlFreeNode(
1267 XML_NODE* node
1268 )
1269 {
1270 XML_NODE* n;
1271
1272 if ( node == NULL )
1273 return;
1274
1275 /* Free data from back to front (because free is faster this way). */
1276 /* Note: use an iterative implementation instead of a recursive one; the latter is much slower for large instances
1277 * and might overflow the heap. */
1278 n = node->lastchild;
1279 while ( n != NULL )
1280 {
1281 XML_NODE* m;
1282 m = n->prevsibl;
1283 xmlFreeNode(n);
1284 n = m;
1285 }
1286
1287 xmlFreeAttr(node->attrlist);
1288
1289 if ( node->data != NULL )
1290 {
1291 BMSfreeMemoryArray(&node->data);
1292 }
1293 assert(node->name != NULL);
1294
1295 BMSfreeMemoryArray(&node->name);
1296 BMSfreeMemory(&node);
1297 }
1298
1299 /** output node */
xmlShowNode(const XML_NODE * root)1300 void xmlShowNode(
1301 const XML_NODE* root
1302 )
1303 {
1304 const XML_NODE* n;
1305 const XML_ATTR* a;
1306
1307 assert(root != NULL);
1308
1309 for (n = root; n != NULL; n = n->nextsibl)
1310 {
1311 infoMessage("Name: %s\n", n->name);
1312 infoMessage("Line: %d\n", n->lineno);
1313 infoMessage("Data: %s\n", (n->data != NULL) ? n->data : "***");
1314
1315 for (a = n->attrlist; a != NULL; a = a->next)
1316 infoMessage("Attr: %s = [%s]\n", a->name, a->value);
1317
1318 if ( n->firstchild != NULL )
1319 {
1320 infoMessage("->\n");
1321 xmlShowNode(n->firstchild);
1322 infoMessage("<-\n");
1323 }
1324 }
1325 }
1326
1327 /** get attribute value */
xmlGetAttrval(const XML_NODE * node,const char * name)1328 const char* xmlGetAttrval(
1329 const XML_NODE* node,
1330 const char* name
1331 )
1332 {
1333 XML_ATTR* a;
1334
1335 assert(node != NULL);
1336 assert(name != NULL);
1337
1338 for (a = node->attrlist; a != NULL; a = a->next)
1339 {
1340 if ( ! strcmp(name, a->name) )
1341 break;
1342 }
1343
1344 #ifdef SCIP_DEBUG
1345 if (a == NULL)
1346 infoMessage("Error: Attribute %s in TAG <%s> not found\n", name, node->name);
1347 #endif
1348
1349 return (a == NULL) ? NULL : a->value;
1350 }
1351
1352 /** return first node */
xmlFirstNode(const XML_NODE * node,const char * name)1353 const XML_NODE* xmlFirstNode(
1354 const XML_NODE* node,
1355 const char* name
1356 )
1357 {
1358 const XML_NODE* n;
1359
1360 assert(node != NULL);
1361 assert(name != NULL);
1362
1363 for (n = node; n != NULL; n = n->nextsibl)
1364 {
1365 if ( ! strcmp(name, n->name) )
1366 break;
1367 }
1368
1369 return n;
1370 }
1371
1372 /** return next node */
xmlNextNode(const XML_NODE * node,const char * name)1373 const XML_NODE* xmlNextNode(
1374 const XML_NODE* node,
1375 const char* name
1376 )
1377 {
1378 assert(node != NULL);
1379 assert(name != NULL);
1380
1381 return (node->nextsibl == NULL) ? NULL : xmlFirstNode(node->nextsibl, name);
1382 }
1383
1384 /** find node */
xmlFindNode(const XML_NODE * node,const char * name)1385 const XML_NODE* xmlFindNode(
1386 const XML_NODE* node,
1387 const char* name
1388 )
1389 {
1390 const XML_NODE* n;
1391 const XML_NODE* r;
1392
1393 assert(node != NULL);
1394 assert(name != NULL);
1395
1396 if ( ! strcmp(name, node->name) )
1397 return node;
1398
1399 for (n = node->firstchild; n != NULL; n = n->nextsibl)
1400 {
1401 r = xmlFindNode(n, name);
1402 if ( r != NULL )
1403 return r;
1404 }
1405
1406 return NULL;
1407 }
1408
1409 /** find node with bound on the depth */
xmlFindNodeMaxdepth(const XML_NODE * node,const char * name,int depth,int maxdepth)1410 const XML_NODE* xmlFindNodeMaxdepth(
1411 const XML_NODE* node, /**< current node - use start node to begin */
1412 const char* name, /**< name of tag to search for */
1413 int depth, /**< current depth - start with 0 for root */
1414 int maxdepth /**< maximal depth */
1415 )
1416 {
1417 const XML_NODE* n;
1418 const XML_NODE* r;
1419
1420 assert(node != NULL);
1421 assert(name != NULL);
1422
1423 if ( ! strcmp(name, node->name) )
1424 return node;
1425
1426 if ( depth < maxdepth )
1427 {
1428 for (n = node->firstchild; n != NULL; n = n->nextsibl)
1429 {
1430 r = xmlFindNodeMaxdepth(n, name, depth+1, maxdepth);
1431 if ( r != NULL )
1432 return r;
1433 }
1434 }
1435
1436 return NULL;
1437 }
1438
1439 /** return next sibling */
xmlNextSibl(const XML_NODE * node)1440 const XML_NODE* xmlNextSibl(
1441 const XML_NODE* node
1442 )
1443 {
1444 assert(node != NULL);
1445
1446 return node->nextsibl;
1447 }
1448
1449 /** return previous sibling */
xmlPrevSibl(const XML_NODE * node)1450 const XML_NODE* xmlPrevSibl(
1451 const XML_NODE* node
1452 )
1453 {
1454 assert(node != NULL);
1455
1456 return node->prevsibl;
1457 }
1458
1459 /** return first child */
xmlFirstChild(const XML_NODE * node)1460 const XML_NODE* xmlFirstChild(
1461 const XML_NODE* node
1462 )
1463 {
1464 assert(node != NULL);
1465
1466 return node->firstchild;
1467 }
1468
1469 /** return last child */
xmlLastChild(const XML_NODE * node)1470 const XML_NODE* xmlLastChild(
1471 const XML_NODE* node
1472 )
1473 {
1474 assert(node != NULL);
1475
1476 return node->lastchild;
1477 }
1478
1479 /** return name of node */
xmlGetName(const XML_NODE * node)1480 const char* xmlGetName(
1481 const XML_NODE* node
1482 )
1483 {
1484 assert(node != NULL);
1485
1486 return node->name;
1487 }
1488
1489 /** get line number */
xmlGetLine(const XML_NODE * node)1490 int xmlGetLine(
1491 const XML_NODE* node
1492 )
1493 {
1494 assert(node != NULL);
1495
1496 return node->lineno;
1497 }
1498
1499 /** get data */
xmlGetData(const XML_NODE * node)1500 const char* xmlGetData(
1501 const XML_NODE* node
1502 )
1503 {
1504 assert(node != NULL);
1505
1506 return node->data;
1507 }
1508
1509 /** find PCDATA */
xmlFindPcdata(const XML_NODE * node,const char * name)1510 const char* xmlFindPcdata(
1511 const XML_NODE* node,
1512 const char* name
1513 )
1514 {
1515 const XML_NODE* n;
1516
1517 assert(node != NULL);
1518 assert(name != NULL);
1519
1520 n = xmlFindNode(node, name);
1521 if ( n == NULL )
1522 return NULL;
1523
1524 if ( ! strcmp(n->firstchild->name, "#PCDATA") )
1525 return n->firstchild->data;
1526
1527 return NULL;
1528 }
1529