1 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2 /*                                                                           */
3 /*                  This file is part of the program and library             */
4 /*         SCIP --- Solving Constraint Integer Programs                      */
5 /*                                                                           */
6 /*    Copyright (C) 2002-2021 Konrad-Zuse-Zentrum                            */
7 /*                            fuer Informationstechnik Berlin                */
8 /*                                                                           */
9 /*  SCIP is distributed under the terms of the ZIB Academic License.         */
10 /*                                                                           */
11 /*  You should have received a copy of the ZIB Academic License              */
12 /*  along with SCIP; see the file COPYING. If not visit scipopt.org.         */
13 /*                                                                           */
14 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
15 
16 /**@file   xmldef.h
17  * @ingroup OTHER_CFILES
18  * @brief  declarations for XML parsing
19  * @author Thorsten Koch
20  * @author Marc Pfetsch
21  *
22  * If SPEC_LIKE_SPACE_HANDLING is not defined, all LF,CR will be changed into spaces and from a
23  * sequence of spaces only one will be used.
24  *
25  * @todo Implement possibility to avoid the construction of parsing information for certain tags
26  * (and their children). For solution files this would avoid parsing the constraints section.
27  */
28 
29 /*---+----1----+----2----+----3----+----4----+----5----+----6----+----7----+----8----+----9----+----0----+----1----+----2*/
30 
31 #include <blockmemshell/memory.h>
32 
33 #include "xml.h"
34 #include "xmldef.h"
35 #include "scip/misc.h"
36 
37 
38 #include <sys/types.h>
39 #ifdef SCIP_WITH_ZLIB
40 #if defined(_WIN32) || defined(_WIN64)
41 #define R_OK _A_RDONLY
42 #define access _access
43 #include <io.h>
44 #else
45 #include <unistd.h>
46 #endif
47 #endif
48 #include <stdio.h>
49 #include <stdlib.h>
50 #include <assert.h>
51 #include <ctype.h>
52 #include <string.h>
53 
54 
55 #define NAME_EXT_SIZE 128
56 #define ATTR_EXT_SIZE 4096
57 #define DATA_EXT_SIZE 4096
58 #define LINE_BUF_SIZE 8192
59 
60 #define xmlError(a, b) xmlErrmsg(a, b, FALSE, __FILE__, __LINE__)
61 
62 
63 /* forward declarations */
64 typedef struct parse_stack_struct PSTACK;
65 typedef struct parse_pos_struct   PPOS;
66 
67 /** state of the parser */
68 enum parse_state_enum
69 {
70    XML_STATE_ERROR,
71    XML_STATE_BEFORE,
72    XML_STATE_IN_TAG,
73    XML_STATE_PCDATA,
74    XML_STATE_EOF
75 };
76 typedef enum   parse_state_enum   PSTATE;
77 
78 /** Stack as a (singly) linked list. The top element is the current node. */
79 struct parse_stack_struct
80 {
81    XML_NODE*             node;
82    PSTACK*               next;
83 };
84 
85 /** Store the current position in the file and the state of the parser. */
86 struct parse_pos_struct
87 {
88    const char*           filename;
89    FPTYPE                fp;
90    char                  buf[LINE_BUF_SIZE];
91    int                   pos;
92    int                   lineno;
93    int                   nextsym;
94    int                   lastsym;
95    PSTATE                state;
96    PSTACK*               top;
97 };
98 
99 
100 /** output error message with corresponding line and position */
xmlErrmsg(PPOS * ppos,const char * msg,XML_Bool msg_only,const char * file,int line)101 static void xmlErrmsg(
102    PPOS*                 ppos,
103    const char*           msg,
104    XML_Bool              msg_only,
105    const char*           file,
106    int                   line
107    )
108 {
109 #ifndef NDEBUG
110    int ret;
111    assert( ppos != NULL );
112 
113    if ( ! msg_only )
114    {
115       ret = fprintf(stderr, "%s(%d) Error in file %s line %d\n", file, line, ppos->filename, ppos->lineno);
116       assert(ret >= 0);
117 
118       ret = fprintf(stderr, "%s", ppos->buf);
119       assert(ret >= 0);
120 
121       if ( strchr(ppos->buf, '\n') == NULL )
122       {
123          int retc;
124 
125          retc = fputc('\n', stderr);
126          assert(retc != EOF);
127       }
128 
129       ret = fprintf(stderr, "%*s\n", ppos->pos, "^");
130       assert(ret >= 0);
131    }
132    ret = fprintf(stderr, "%s\n\n", msg);
133    assert(ret >= 0);
134 
135 #else
136 
137    if ( ! msg_only )
138    {
139       (void) fprintf(stderr, "%s(%d) Error in file %s line %d\n", file, line, ppos->filename, ppos->lineno);
140 
141       (void) fprintf(stderr, "%s", ppos->buf);
142 
143       if ( strchr(ppos->buf, '\n') == NULL )
144       {
145          (void) fputc('\n', stderr);
146       }
147 
148       (void) fprintf(stderr, "%*s\n", ppos->pos, "^");
149    }
150    (void) fprintf(stderr, "%s\n\n", msg);
151 #endif
152 }
153 
154 
155 /** Push new element on the parse stack.
156  *
157  *  TRUE if it worked, FAILURE otherwise.
158  */
159 static
pushPstack(PPOS * ppos,XML_NODE * node)160 XML_Bool pushPstack(
161    PPOS*                 ppos,
162    XML_NODE*             node
163    )
164 {
165    PSTACK* p;
166 
167    assert(ppos != NULL);
168    assert(node != NULL);
169 
170    debugMessage("Pushing %s\n", node->name);
171 
172    ALLOC_FALSE( BMSallocMemory(&p) );
173    assert(p != NULL);
174 
175    p->node   = node;
176    p->next   = ppos->top;
177    ppos->top = p;
178 
179    return TRUE;
180 }
181 
182 /** returns top element on stack (which has to be present) */
topPstack(const PPOS * ppos)183 static XML_NODE* topPstack(
184    const PPOS*           ppos
185    )
186 {
187    assert(ppos      != NULL);
188    assert(ppos->top != NULL);
189 
190    return ppos->top->node;
191 }
192 
193 /** remove top element from stack and deletes it
194  *
195  *  TRUE if ok, FALSE otherwise
196  */
197 static
popPstack(PPOS * ppos)198 XML_Bool popPstack(
199    PPOS*                 ppos                /**< input stream position */
200    )
201 {
202    PSTACK* p;
203    XML_Bool result;
204 
205    assert(ppos != NULL);
206 
207    if ( ppos->top == NULL )
208    {
209       xmlError(ppos, "Stack underflow");
210       result = FALSE;
211    }
212    else
213    {
214       result = TRUE;
215       p = ppos->top;
216       ppos->top = p->next;
217 
218       debugMessage("Poping %s\n", p->node->name);
219       BMSfreeMemory(&p);
220    }
221    return result;
222 }
223 
224 /** remove complete stack */
225 static
clearPstack(PPOS * ppos)226 void clearPstack(
227    PPOS*                 ppos
228    )
229 {
230    assert(ppos != NULL);
231 
232    while ( ppos->top != NULL )
233       (void) popPstack(ppos);
234 }
235 
236 /** Returns the next character from the input buffer and fills the buffer if it is empty (similar to fgetc()). */
237 static
mygetc(PPOS * ppos)238 int mygetc(
239    PPOS*                 ppos
240    )
241 {
242    assert(ppos      != NULL);
243    assert(ppos->fp  != NULL);
244    assert(ppos->pos <  LINE_BUF_SIZE);
245 
246    if ( ppos->buf[ppos->pos] == '\0' )
247    {
248 #ifdef SCIP_DISABLED_CODE
249       /* the low level function gzread/fread used below seem to be faster */
250       if ( NULL == FGETS(ppos->buf, sizeof(ppos->buf), ppos->fp) )
251          return EOF;
252 #else
253       size_t len = (size_t) FREAD(ppos->buf, sizeof(ppos->buf) - 1, ppos->fp); /*lint !e571 !e747*/
254 
255       if( len == 0 || len > sizeof(ppos->buf) - 1 )
256          return EOF;
257 
258       ppos->buf[len] = '\0';
259 #endif
260       ppos->pos = 0;
261    }
262    return (unsigned char)ppos->buf[ppos->pos++];
263 }
264 
265 
266 #ifdef SPEC_LIKE_SPACE_HANDLING
267 /** Read input from fp_in.
268  *
269  * If there is a LF, CR, CR/LF, or LF/CR it returns exactly on LF.  Also counts the number of
270  * characters.
271  */
272 static
getsymbol(PPOS * ppos)273 int getsymbol(
274    PPOS*                 ppos
275    )
276 {
277    int c;
278 
279    assert(ppos != NULL);
280 
281    if ( ppos->nextsym == 0 )
282       c = mygetc(ppos);
283    else
284    {
285       c = ppos->nextsym;
286       ppos->nextsym = 0;
287    }
288    assert(ppos->nextsym == 0);
289 
290    if (((c == '\n') && (ppos->lastsym == '\r')) || ((c == '\r') && (ppos->lastsym == '\n')))
291       c = mygetc(ppos);
292 
293    ppos->lastsym = c;
294 
295    if ( c == '\r' )
296       c = '\n';
297 
298    if ( c == '\n' )
299       ++ppos->lineno;
300 
301    return c;
302 }
303 #else
304 /** Read input from fp_in (variant).
305  *
306  *  Here we convert all LF or CR into SPACE and return maximally one SPACE after the other.
307  *
308  *  @note This function counts lines differently. On systems that have only one '\\r' as line feed
309  *  (MAC) it does not count correctly.
310  */
311 static
getsymbol(PPOS * ppos)312 int getsymbol(
313    PPOS*                 ppos
314    )
315 {
316    int c;
317 
318    assert(ppos != NULL);
319 
320    do
321    {
322       if ( ppos->nextsym == 0 )
323          c = mygetc(ppos);
324       else
325       {
326          c = ppos->nextsym;
327          ppos->nextsym = 0;
328       }
329       assert(ppos->nextsym == 0);
330 
331       if ( c == '\n' )
332          ++ppos->lineno;
333 
334       if ((c == '\n') || (c == '\r'))
335          c = ' ';
336    } while((c == ' ') && (ppos->lastsym == c));
337 
338    ppos->lastsym = c;
339 
340    debugMessage("[%c]\n", c);
341 
342    return c;
343 }
344 #endif
345 
346 /** Reinserts a character into the input stream */
347 static
ungetsymbol(PPOS * ppos,int c)348 void ungetsymbol(
349    PPOS*                 ppos,
350    int                   c
351    )
352 {
353    assert(ppos          != NULL);
354    assert(ppos->nextsym == 0);
355 
356    ppos->nextsym = c;
357 }
358 
359 /** Skip all spaces and return the next non-space character or EOF */
360 static
skipSpace(PPOS * ppos)361 int skipSpace(
362    PPOS*                 ppos
363    )
364 {
365    int c;
366 
367    assert(ppos != NULL);
368 
369    do
370    {
371       c = getsymbol(ppos);
372    }
373    while(isspace(c));
374 
375    return c;
376 }
377 
378 /** Get name of a TAG or attribute from the input stream.
379  *
380  *  Either it returns a pointer to allocated memory which contains the name or it returns NULL if
381  *  there is some error.
382  */
383 static
getName(PPOS * ppos)384 char* getName(
385    PPOS*                 ppos
386    )
387 {
388    char* name = NULL;
389    size_t size = 0;
390    size_t len  = 0;
391    int c;
392 
393    assert(ppos != NULL);
394 
395    c = getsymbol(ppos);
396 
397    if ( ! isalpha(c) && (c != '_') && (c != ':') )
398    {
399       xmlError(ppos, "Name starting with illegal charater");
400       return NULL;
401    }
402 
403    /* The following is wrong: Here almost all characters that we casted to unicode are feasible */
404    while ( isalnum(c) || (c == '_') || (c == ':') || (c == '.') || (c == '-') )
405    {
406       if ( len + 1 >= size )
407       {
408          size += NAME_EXT_SIZE;
409 
410          if ( name == NULL )
411          {
412             ALLOC_ABORT( BMSallocMemoryArray(&name, size) );
413          }
414          else
415          {
416             ALLOC_ABORT( BMSreallocMemoryArray(&name, size) );
417          }
418       }
419       assert(name != NULL);
420       assert(size > len);
421 
422       name[len++] = (char)c;
423 
424       c = getsymbol(ppos);
425    }
426    if ( c != EOF )
427       ungetsymbol(ppos, c);
428 
429    assert(name != NULL);
430 
431    if ( len == 0 )
432    {
433       BMSfreeMemoryArray(&name);
434       name = NULL;
435    }
436    else
437       name[len] = '\0';
438 
439    return name;
440 }
441 
442 /** Read the value of an attribute from the input stream.
443  *
444  *  The value has to be between two " or ' (the other character is then valid as well). The function
445  *  returns a pointer to allocated memory containing the value or it returns NULL in case of an
446  *  error.
447  */
448 static
getAttrval(PPOS * ppos)449 char* getAttrval(
450    PPOS*                 ppos
451    )
452 {
453    char*  attr = NULL;
454    int    c;
455    int    stop;
456    size_t len = 0;
457    size_t size = 0;
458 
459    assert(ppos != NULL);
460 
461    /* The following is not allowed according to the specification (the value has to be directly
462     * after the equation sign). */
463    c = skipSpace(ppos);
464 
465    if ( (c != '"') && (c != '\'') )
466    {
467       xmlError(ppos, "Atribute value does not start with \" or \'");
468       return NULL;
469    }
470    stop = c;
471 
472    for(;;)
473    {
474       if ( len == size )
475       {
476          size += ATTR_EXT_SIZE;
477 
478          if ( attr == NULL )
479          {
480             ALLOC_ABORT( BMSallocMemoryArray(&attr, size) );
481          }
482          else
483          {
484             ALLOC_ABORT( BMSreallocMemoryArray(&attr, size) );
485          }
486       }
487       assert(attr != NULL);
488       assert(size >  len);
489 
490       c = getsymbol(ppos);
491 
492       if ( (c == stop) || (c == EOF) )
493          break;
494 
495       attr[len++] = (char)c;
496    }
497 
498    if ( c != EOF )
499       attr[len] = '\0';
500    else
501    {
502       BMSfreeMemoryArray(&attr);
503       attr = NULL;
504    }
505    return attr;
506 }
507 
508 /** Skip comment
509  *
510  *  Return FALSE if an error occurs.
511  */
512 static
doComment(PPOS * ppos)513 XML_Bool doComment(
514    PPOS*                 ppos
515    )
516 {
517    XML_Bool result = TRUE;
518    int c;
519    int state = 0;
520 
521    assert(ppos != NULL);
522 
523    for(;;)
524    {
525       c = getsymbol(ppos);
526 
527       if ( c == EOF )
528          break;
529 
530       if ( (c == '>') && (state >= 2) )
531          break;
532 
533       state = (c == '-') ? state + 1 : 0;
534    }
535    if ( c == EOF )
536    {
537       xmlError(ppos, "Unexpected EOF in comment");
538       result = FALSE;
539    }
540    return result;
541 }
542 
543 /** Handles a CDATA section.
544  *
545  *  Returns a pointer to allocated memory containing the data of this section or NULL in case of an
546  *  error.
547  */
548 static
doCdata(PPOS * ppos)549 char* doCdata(
550    PPOS*                 ppos
551    )
552 {
553    char* data  = NULL;
554    size_t size  = 0;
555    size_t len   = 0;
556    int state = 0;
557    int c;
558 
559    assert(ppos != NULL);
560 
561    for(;;)
562    {
563       c = getsymbol(ppos);
564 
565       if ( c == EOF )
566          break;
567 
568       if ( c == ']' )
569          state++;
570       else
571          if ( (c == '>') && (state >= 2) )
572             break;
573          else
574             state = 0;
575 
576       if ( len == size )
577       {
578          size += DATA_EXT_SIZE;
579 
580          if ( data == NULL )
581          {
582             ALLOC_ABORT( BMSallocMemoryArray(&data, size) );
583          }
584          else
585          {
586             ALLOC_ABORT( BMSreallocMemoryArray(&data, size) );
587          }
588       }
589       assert(data != NULL);
590       assert(size >  len);
591 
592       data[len++] = (char)c;
593    }
594    assert(data != NULL);
595 
596    /*lint --e{527}*/
597    if ( c != EOF )
598    {
599       assert(len  >= 2);
600       assert(data != NULL);
601 
602       data[len - 2] = '\0'; /*lint !e413*/
603    }
604    else
605    {
606       BMSfreeMemoryArray(&data);
607       data = NULL;
608       xmlError(ppos, "Unexpected EOF in CDATA");
609    }
610    return data;
611 }
612 
613 /** Handle processing instructions (skipping) */
614 static
handlePi(PPOS * ppos)615 void handlePi(
616    PPOS*                 ppos
617    )
618 {
619    int c;
620 
621    assert(ppos        != NULL);
622    assert(ppos->state == XML_STATE_BEFORE);
623 
624    do
625    {
626       c = getsymbol(ppos);
627    }
628    while ( (c != EOF) && (c != '>') );
629 
630    if ( c != EOF )
631       ppos->state = XML_STATE_PCDATA;
632    else
633    {
634       xmlError(ppos, "Unexpected EOF in PI");
635       ppos->state = XML_STATE_ERROR;
636    }
637 }
638 
639 /** Handles declarations that start with a <!.
640  *
641  *  This includes comments. Does currenlty not work very well, because of DTDs.
642  */
643 static
handleDecl(PPOS * ppos)644 void handleDecl(
645    PPOS*                 ppos
646    )
647 {
648    enum XmlSection
649    {
650       IS_COMMENT,
651       IS_ATTLIST,
652       IS_DOCTYPE,
653       IS_ELEMENT,
654       IS_ENTITY,
655       IS_NOTATION,
656       IS_CDATA
657    };
658    typedef enum XmlSection XMLSECTION;
659 
660    static struct
661    {
662       const char* name;
663       XMLSECTION  what;
664    } key[] =
665    {
666       { "--",       IS_COMMENT  },
667       { "ATTLIST",  IS_ATTLIST  },
668       { "DOCTYPE",  IS_DOCTYPE  },
669       { "ELEMENT",  IS_ELEMENT  },
670       { "ENTITY",   IS_ENTITY   },
671       { "NOTATION", IS_NOTATION },
672       { "[CDATA[",  IS_CDATA    }
673    };
674    XML_NODE* node;
675    char* data;
676    int c;
677    int k = 0;
678    int beg = 0;
679    int end;
680 
681    assert(ppos        != NULL);
682    assert(ppos->state == XML_STATE_BEFORE);
683 
684    end = (int) (sizeof(key) / sizeof(key[0])) - 1;
685    do
686    {
687       c = getsymbol(ppos);
688 
689       for(; (beg <= end) && (c != key[beg].name[k]); beg++)
690          ;
691       for(; (end >= beg) && (c != key[end].name[k]); end--)
692          ;
693       k++;
694    }
695    while(beg < end);
696 
697    if ( beg != end )
698    {
699       xmlError(ppos, "Unknown declaration");
700 
701       while ( (c != EOF) && (c != '>') )
702          c = getsymbol(ppos);
703    }
704    else
705    {
706       assert(beg == end);
707       assert(beg <  (int)(sizeof(key) / sizeof(*key)));
708       assert(beg >= 0);
709 
710       switch(key[beg].what)
711       {
712       case IS_COMMENT :
713          if ( ! doComment(ppos) )
714             ppos->state = XML_STATE_ERROR;
715          break;
716       case IS_CDATA :
717          if ( (data = doCdata(ppos)) == NULL )
718             ppos->state = XML_STATE_ERROR;
719          else
720          {
721             if ( NULL == (node = xmlNewNode("#CDATA", ppos->lineno)) )
722             {
723                xmlError(ppos, "Can't create new node");
724                ppos->state = XML_STATE_ERROR;
725             }
726             else
727             {
728                BMSduplicateMemoryArray(&node->data, data, strlen(data)+1);
729                BMSfreeMemoryArray(&data);
730                xmlAppendChild(topPstack(ppos), node);
731             }
732          }
733          break;
734       case IS_ATTLIST :
735       case IS_ELEMENT :
736       case IS_NOTATION :
737       case IS_ENTITY :
738       case IS_DOCTYPE :
739          break;
740       default :
741          abort();
742       }
743    }
744 }
745 
746 /** Handle end tag */
747 static
handleEndtag(PPOS * ppos)748 void handleEndtag(
749    PPOS*                 ppos
750    )
751 {
752    char* name;
753    int   c;
754 
755    assert(ppos != NULL);
756 
757    if ( (name = getName(ppos)) == NULL )
758       xmlError(ppos, "Missing name in endtag");
759    else
760    {
761       c = skipSpace(ppos);
762 
763       if ( c != '>' )
764       {
765          xmlError(ppos, "Missing '>' in endtag");
766          ppos->state = XML_STATE_ERROR;
767       }
768       else
769       {
770          if ( strcmp(name, topPstack(ppos)->name) )
771          {
772             xmlError(ppos, "Name of endtag does not match starttag");
773             ppos->state = XML_STATE_ERROR;
774          }
775          else
776          {
777             if ( popPstack(ppos) )
778                ppos->state = XML_STATE_PCDATA;
779             else
780                ppos->state = XML_STATE_ERROR;
781          }
782       }
783 
784       BMSfreeMemoryArray(&name);
785    }
786 }
787 
788 /** Handle start tag */
789 static
handleStarttag(PPOS * ppos)790 void handleStarttag(
791    PPOS*                 ppos
792    )
793 {
794    XML_NODE* node;
795    char* name;
796 
797    assert(ppos != NULL);
798 
799    name = getName(ppos);
800    if ( name == NULL )
801    {
802       xmlError(ppos, "Missing name in tagstart");
803       ppos->state = XML_STATE_ERROR;
804    }
805    else
806    {
807       node = xmlNewNode(name, ppos->lineno);
808       if ( node == NULL )
809       {
810          xmlError(ppos, "Can't create new node");
811          ppos->state = XML_STATE_ERROR;
812       }
813       else
814       {
815          xmlAppendChild(topPstack(ppos), node);
816 
817          if ( pushPstack(ppos, node) )
818             ppos->state = XML_STATE_IN_TAG;
819          else
820             ppos->state = XML_STATE_ERROR;
821       }
822       BMSfreeMemoryArray(&name);
823    }
824 }
825 
826 /** Checks for next tag */
827 static
procBefore(PPOS * ppos)828 void procBefore(
829    PPOS*                 ppos                /**< input stream position */
830    )
831 {
832    int c;
833 
834    assert(ppos        != NULL);
835    assert(ppos->state == XML_STATE_BEFORE);
836 
837    c = skipSpace(ppos);
838 
839    if ( c != '<' )
840    {
841       xmlError(ppos, "Expecting '<'");
842       ppos->state = XML_STATE_ERROR;
843    }
844    else
845    {
846       c = getsymbol(ppos);
847 
848       switch(c)
849       {
850       case EOF :
851          xmlError(ppos, "Unexpected EOF");
852          ppos->state = XML_STATE_ERROR;
853          break;
854       case '!' :
855          handleDecl(ppos);
856          break;
857       case '?' :
858          handlePi(ppos);
859          break;
860       case '/' :
861          handleEndtag(ppos);
862          break;
863       default :
864          ungetsymbol(ppos, c);
865          handleStarttag(ppos);
866          break;
867       }
868    }
869 }
870 
871 /** Process tag */
872 static
procInTag(PPOS * ppos)873 void procInTag(
874    PPOS*                 ppos                /**< input stream position */
875    )
876 {
877    XML_ATTR* attr;
878    int     c;
879    XML_Bool empty = FALSE;
880    char*   name;
881    char*   value;
882 
883    assert(ppos        != NULL);
884    assert(ppos->state == XML_STATE_IN_TAG);
885 
886    c = skipSpace(ppos);
887 
888    if ( (c == '/') || (c == '>') || (c == EOF) )
889    {
890       if ( c == '/' )
891       {
892          empty = TRUE;
893          c = getsymbol(ppos);
894       }
895 
896       if ( c == EOF )
897       {
898          xmlError(ppos, "Unexpected EOF while in a tag");
899          ppos->state = XML_STATE_ERROR;
900       }
901 
902       if ( c == '>' )
903       {
904          ppos->state = XML_STATE_PCDATA;
905 
906          if (empty && ! popPstack(ppos))
907             ppos->state = XML_STATE_ERROR;
908       }
909       else
910       {
911          xmlError(ppos, "Expected tag end marker '>'");
912          ppos->state = XML_STATE_ERROR;
913       }
914    }
915    else
916    {
917       ungetsymbol(ppos, c);
918 
919       name = getName(ppos);
920       if ( name == NULL )
921       {
922          xmlError(ppos, "No name for attribute");
923          ppos->state = XML_STATE_ERROR;
924       }
925       else
926       {
927          c = skipSpace(ppos);
928 
929          if ( (c != '=') || ((value = getAttrval(ppos)) == NULL) )
930          {
931             xmlError(ppos, "Missing attribute value");
932             ppos->state = XML_STATE_ERROR;
933             BMSfreeMemoryArray(&name);
934          }
935          else
936          {
937             attr = xmlNewAttr(name, value);
938             if ( attr == NULL )
939             {
940                xmlError(ppos, "Can't create new attribute");
941                ppos->state = XML_STATE_ERROR;
942             }
943             else
944             {
945                xmlAddAttr(topPstack(ppos), attr);
946             }
947             BMSfreeMemoryArray(&name);
948             BMSfreeMemoryArray(&value);
949          }
950       }
951    }
952 }
953 
954 /* Handles PCDATA */
955 static
procPcdata(PPOS * ppos)956 void procPcdata(
957    PPOS*                 ppos                /**< input stream position */
958    )
959 {
960    XML_NODE* node;
961    char*   data   = NULL;
962    size_t  size   = 0;
963    size_t  len    = 0;
964    int     c;
965 
966    assert(ppos        != NULL);
967    assert(ppos->state == XML_STATE_PCDATA);
968 
969 #ifndef SPEC_LIKE_SPACE_HANDLING
970    c = skipSpace(ppos);
971    if ( c != EOF )
972       ungetsymbol(ppos, c);
973 #endif
974    c = getsymbol(ppos);
975 
976    while ( (c != EOF) && (c != '<') )
977    {
978       if ( len + 1 >= size ) /* leave space for terminating '\0' */
979       {
980          size += DATA_EXT_SIZE;
981 
982          if ( data == NULL )
983          {
984             ALLOC_ABORT( BMSallocMemoryArray(&data, size) );
985          }
986          else
987          {
988             ALLOC_ABORT( BMSreallocMemoryArray(&data, size) );
989          }
990       }
991       assert(data != NULL);
992       assert(size > len + 1);
993 
994       data[len++] = (char)c;
995 
996       c = getsymbol(ppos);
997    }
998    if ( data == NULL )
999    {
1000       if ( c == EOF )
1001          ppos->state = XML_STATE_EOF;
1002       else
1003       {
1004          assert(c == '<');
1005          ppos->state = XML_STATE_BEFORE;
1006          ungetsymbol(ppos, c);
1007       }
1008    }
1009    else
1010    {
1011       assert(len < size);
1012       data[len] = '\0';
1013 
1014       if ( c == EOF )
1015          ppos->state = XML_STATE_ERROR;
1016       else
1017       {
1018          ungetsymbol(ppos, c);
1019 
1020          node = xmlNewNode("#PCDATA", ppos->lineno);
1021          if ( node == NULL )
1022          {
1023             xmlError(ppos, "Can't create new node");
1024             ppos->state = XML_STATE_ERROR;
1025          }
1026          else
1027          {
1028             BMSduplicateMemoryArray(&node->data, data, strlen(data)+1);
1029             xmlAppendChild(topPstack(ppos), node);
1030             ppos->state = XML_STATE_BEFORE;
1031          }
1032       }
1033 
1034       BMSfreeMemoryArray(&data);
1035    }
1036 }
1037 
1038 /** Parse input stream */
1039 static
xmlParse(PPOS * ppos)1040 XML_Bool xmlParse(
1041    PPOS*                 ppos                /**< input stream position */
1042    )
1043 {
1044    XML_Bool ok = TRUE;
1045 
1046    while (ok)
1047    {
1048       debugMessage("state=%d\n", ppos->state);
1049 
1050       switch (ppos->state)
1051       {
1052       case XML_STATE_BEFORE :
1053          procBefore(ppos);
1054          break;
1055       case XML_STATE_IN_TAG :
1056          procInTag(ppos);
1057          break;
1058       case XML_STATE_PCDATA :
1059          procPcdata(ppos);
1060          break;
1061       case XML_STATE_EOF :
1062          ok = FALSE;
1063          break;
1064       case XML_STATE_ERROR :
1065          ok = FALSE;
1066          break;
1067       default :
1068          xmlError(ppos, "Internal Error, illegal state");
1069          ok = FALSE;
1070       }
1071    }
1072    return (ppos->state == XML_STATE_EOF);
1073 }
1074 
1075 /** Parse file */
xmlProcess(const char * filename)1076 XML_NODE* xmlProcess(
1077    const char*           filename            /**< XML file name */
1078    )
1079 {
1080    PPOS      ppos;
1081    XML_NODE* node = NULL;
1082    XML_ATTR* attr;
1083    XML_Bool  result = FALSE;
1084    char*     myfilename;
1085    size_t    filenamelen;
1086 
1087    /* allocate space and copy filename (possibly modified below) in two steps in order to satisfy valgrind */
1088    assert( filename != NULL );
1089    filenamelen = strlen(filename);
1090    if ( BMSallocMemoryArray(&myfilename, filenamelen + 5) == NULL )
1091       return NULL;
1092    BMScopyMemoryArray(myfilename, filename, filenamelen + 1);
1093 
1094 #ifdef SCIP_WITH_ZLIB
1095    if ( access(filename, R_OK) != 0 )
1096    {
1097       strcat(myfilename, ".gz");
1098 
1099       /* If .gz also does not work, revert to the old name
1100        * to get a better error message.
1101        */
1102       if ( access(myfilename, R_OK) != 0 )
1103          (void)SCIPstrncpy(myfilename, filename, (int)filenamelen + 5);
1104    }
1105 #endif
1106    ppos.fp = FOPEN(myfilename, "r");
1107    if ( ppos.fp == NULL )
1108       perror(myfilename);
1109    else
1110    {
1111       ppos.filename = myfilename;
1112       ppos.buf[0]   = '\0';
1113       ppos.pos      = 0;
1114       ppos.lineno   = 1;
1115       ppos.nextsym  = 0;
1116       ppos.lastsym  = 0;
1117       ppos.state    = XML_STATE_BEFORE;
1118       ppos.top      = NULL;
1119 
1120       node = xmlNewNode("#ROOT", ppos.lineno);
1121       if ( node == NULL )
1122       {
1123          xmlError(&ppos, "Can't create new node");
1124       }
1125       else
1126       {
1127          attr = xmlNewAttr("filename", myfilename);
1128          if ( attr == NULL )
1129             xmlError(&ppos, "Can't create new attribute");
1130          else
1131          {
1132             xmlAddAttr(node, attr);
1133 
1134             /* push root node on stack and start to process */
1135             if ( pushPstack(&ppos, node) )
1136             {
1137                result = xmlParse(&ppos);
1138 
1139                clearPstack(&ppos);
1140             }
1141          }
1142       }
1143 
1144       if ( ! result && (node != NULL) )
1145       {
1146          xmlErrmsg(&ppos, "Parsing error, processing stopped", TRUE, __FILE__, __LINE__);
1147          xmlFreeNode(node);
1148          node = NULL;
1149       }
1150       if ( FCLOSE(ppos.fp) )
1151          perror(myfilename);
1152    }
1153    BMSfreeMemoryArray(&myfilename);
1154 
1155    return node;
1156 }
1157 
1158 
1159 
1160 
1161 
1162 
1163 /*----------------------------------------------------------------------------------------------*/
1164 
1165 
1166 /** create new node */
xmlNewNode(const char * name,int lineno)1167 XML_NODE* xmlNewNode(
1168    const char*           name,
1169    int                   lineno
1170    )
1171 {
1172    XML_NODE* n = NULL;
1173 
1174    assert(name != NULL);
1175 
1176    if ( BMSallocMemory(&n) != NULL )
1177    {
1178       BMSclearMemory(n);
1179       BMSduplicateMemoryArray(&n->name, name, strlen(name)+1);
1180       n->lineno = lineno;
1181    }
1182    return n;
1183 }
1184 
1185 /** create new attribute */
xmlNewAttr(const char * name,const char * value)1186 XML_ATTR* xmlNewAttr(
1187    const char*           name,
1188    const char*           value
1189    )
1190 {
1191    XML_ATTR* a = NULL;
1192 
1193    assert(name  != NULL);
1194    assert(value != NULL);
1195 
1196    if ( BMSallocMemory(&a) != NULL )
1197    {
1198       BMSclearMemory(a);
1199       BMSduplicateMemoryArray(&a->name, name, strlen(name)+1);
1200       BMSduplicateMemoryArray(&a->value, value, strlen(value)+1);
1201    }
1202    return a;
1203 }
1204 
1205 /** add attribute */
xmlAddAttr(XML_NODE * n,XML_ATTR * a)1206 void xmlAddAttr(
1207    XML_NODE*             n,
1208    XML_ATTR*             a
1209    )
1210 {
1211    assert(n != NULL);
1212    assert(a != NULL);
1213 
1214    a->next = n->attrlist;
1215    n->attrlist = a;
1216 }
1217 
1218 /** append child node */
xmlAppendChild(XML_NODE * parent,XML_NODE * child)1219 void xmlAppendChild(
1220    XML_NODE*             parent,
1221    XML_NODE*             child
1222    )
1223 {
1224    assert(parent != NULL);
1225    assert(child  != NULL);
1226 
1227    child->parent = parent;
1228    child->prevsibl = parent->lastchild;
1229    child->nextsibl = NULL;
1230    parent->lastchild = child;
1231 
1232    if ( child->prevsibl != NULL )
1233       child->prevsibl->nextsibl = child;
1234 
1235    if ( parent->firstchild == NULL )
1236       parent->firstchild = child;
1237 }
1238 
1239 /** free attribute */
1240 static
xmlFreeAttr(XML_ATTR * attr)1241 void xmlFreeAttr(
1242    XML_ATTR*             attr
1243    )
1244 {
1245    XML_ATTR* a;
1246 
1247    /* Note: use an iterative implementation instead of a recursive one; the latter is much slower for large instances
1248     * and might overflow the heap. */
1249    a = attr;
1250    while (a != NULL)
1251    {
1252       XML_ATTR* b;
1253       b = a->next;
1254 
1255       assert(a->name  != NULL);
1256       assert(a->value != NULL);
1257 
1258       BMSfreeMemoryArray(&a->name);
1259       BMSfreeMemoryArray(&a->value);
1260       BMSfreeMemory(&a);
1261       a = b;
1262    }
1263 }
1264 
1265 /** free node */
xmlFreeNode(XML_NODE * node)1266 void xmlFreeNode(
1267    XML_NODE*             node
1268    )
1269 {
1270    XML_NODE* n;
1271 
1272    if ( node == NULL )
1273       return;
1274 
1275    /* Free data from back to front (because free is faster this way). */
1276    /* Note: use an iterative implementation instead of a recursive one; the latter is much slower for large instances
1277     * and might overflow the heap. */
1278    n = node->lastchild;
1279    while ( n != NULL )
1280    {
1281       XML_NODE* m;
1282       m = n->prevsibl;
1283       xmlFreeNode(n);
1284       n = m;
1285    }
1286 
1287    xmlFreeAttr(node->attrlist);
1288 
1289    if ( node->data != NULL )
1290    {
1291       BMSfreeMemoryArray(&node->data);
1292    }
1293    assert(node->name != NULL);
1294 
1295    BMSfreeMemoryArray(&node->name);
1296    BMSfreeMemory(&node);
1297 }
1298 
1299 /** output node */
xmlShowNode(const XML_NODE * root)1300 void xmlShowNode(
1301    const XML_NODE*       root
1302    )
1303 {
1304    const XML_NODE* n;
1305    const XML_ATTR* a;
1306 
1307    assert(root != NULL);
1308 
1309    for (n = root; n != NULL; n = n->nextsibl)
1310    {
1311       infoMessage("Name: %s\n", n->name);
1312       infoMessage("Line: %d\n", n->lineno);
1313       infoMessage("Data: %s\n", (n->data != NULL) ? n->data : "***");
1314 
1315       for (a = n->attrlist; a != NULL; a = a->next)
1316          infoMessage("Attr: %s = [%s]\n", a->name, a->value);
1317 
1318       if ( n->firstchild != NULL )
1319       {
1320          infoMessage("->\n");
1321          xmlShowNode(n->firstchild);
1322          infoMessage("<-\n");
1323       }
1324    }
1325 }
1326 
1327 /** get attribute value */
xmlGetAttrval(const XML_NODE * node,const char * name)1328 const char* xmlGetAttrval(
1329    const XML_NODE*       node,
1330    const char*           name
1331    )
1332 {
1333    XML_ATTR* a;
1334 
1335    assert(node != NULL);
1336    assert(name != NULL);
1337 
1338    for (a = node->attrlist; a != NULL; a = a->next)
1339    {
1340       if ( ! strcmp(name, a->name) )
1341          break;
1342    }
1343 
1344 #ifdef SCIP_DEBUG
1345    if (a == NULL)
1346       infoMessage("Error: Attribute %s in TAG <%s> not found\n", name, node->name);
1347 #endif
1348 
1349    return (a == NULL) ? NULL : a->value;
1350 }
1351 
1352 /** return first node */
xmlFirstNode(const XML_NODE * node,const char * name)1353 const XML_NODE* xmlFirstNode(
1354    const XML_NODE*       node,
1355    const char*           name
1356    )
1357 {
1358    const XML_NODE* n;
1359 
1360    assert(node != NULL);
1361    assert(name != NULL);
1362 
1363    for (n = node; n != NULL; n = n->nextsibl)
1364    {
1365       if ( ! strcmp(name, n->name) )
1366          break;
1367    }
1368 
1369    return n;
1370 }
1371 
1372 /** return next node */
xmlNextNode(const XML_NODE * node,const char * name)1373 const XML_NODE* xmlNextNode(
1374    const XML_NODE*       node,
1375    const char*           name
1376    )
1377 {
1378    assert(node != NULL);
1379    assert(name != NULL);
1380 
1381    return (node->nextsibl == NULL) ? NULL : xmlFirstNode(node->nextsibl, name);
1382 }
1383 
1384 /** find node */
xmlFindNode(const XML_NODE * node,const char * name)1385 const XML_NODE* xmlFindNode(
1386    const XML_NODE*       node,
1387    const char*           name
1388    )
1389 {
1390    const XML_NODE* n;
1391    const XML_NODE* r;
1392 
1393    assert(node != NULL);
1394    assert(name != NULL);
1395 
1396    if ( ! strcmp(name, node->name) )
1397       return node;
1398 
1399    for (n = node->firstchild; n != NULL; n = n->nextsibl)
1400    {
1401       r = xmlFindNode(n, name);
1402       if ( r != NULL )
1403          return r;
1404    }
1405 
1406    return NULL;
1407 }
1408 
1409 /** find node with bound on the depth */
xmlFindNodeMaxdepth(const XML_NODE * node,const char * name,int depth,int maxdepth)1410 const XML_NODE* xmlFindNodeMaxdepth(
1411    const XML_NODE*       node,               /**< current node - use start node to begin */
1412    const char*           name,               /**< name of tag to search for */
1413    int                   depth,              /**< current depth - start with 0 for root */
1414    int                   maxdepth            /**< maximal depth */
1415    )
1416 {
1417    const XML_NODE* n;
1418    const XML_NODE* r;
1419 
1420    assert(node != NULL);
1421    assert(name != NULL);
1422 
1423    if ( ! strcmp(name, node->name) )
1424       return node;
1425 
1426    if ( depth < maxdepth )
1427    {
1428       for (n = node->firstchild; n != NULL; n = n->nextsibl)
1429       {
1430          r = xmlFindNodeMaxdepth(n, name, depth+1, maxdepth);
1431          if ( r != NULL )
1432             return r;
1433       }
1434    }
1435 
1436    return NULL;
1437 }
1438 
1439 /** return next sibling */
xmlNextSibl(const XML_NODE * node)1440 const XML_NODE* xmlNextSibl(
1441    const XML_NODE*       node
1442    )
1443 {
1444    assert(node != NULL);
1445 
1446    return node->nextsibl;
1447 }
1448 
1449 /** return previous sibling */
xmlPrevSibl(const XML_NODE * node)1450 const XML_NODE* xmlPrevSibl(
1451    const XML_NODE*       node
1452    )
1453 {
1454    assert(node != NULL);
1455 
1456    return node->prevsibl;
1457 }
1458 
1459 /** return first child */
xmlFirstChild(const XML_NODE * node)1460 const XML_NODE* xmlFirstChild(
1461    const XML_NODE*       node
1462    )
1463 {
1464    assert(node != NULL);
1465 
1466    return node->firstchild;
1467 }
1468 
1469 /** return last child */
xmlLastChild(const XML_NODE * node)1470 const XML_NODE* xmlLastChild(
1471    const XML_NODE*       node
1472    )
1473 {
1474    assert(node != NULL);
1475 
1476    return node->lastchild;
1477 }
1478 
1479 /** return name of node */
xmlGetName(const XML_NODE * node)1480 const char* xmlGetName(
1481    const XML_NODE*       node
1482    )
1483 {
1484    assert(node != NULL);
1485 
1486    return node->name;
1487 }
1488 
1489 /** get line number */
xmlGetLine(const XML_NODE * node)1490 int xmlGetLine(
1491    const XML_NODE*       node
1492    )
1493 {
1494    assert(node != NULL);
1495 
1496    return node->lineno;
1497 }
1498 
1499 /** get data */
xmlGetData(const XML_NODE * node)1500 const char* xmlGetData(
1501    const XML_NODE*       node
1502    )
1503 {
1504    assert(node != NULL);
1505 
1506    return node->data;
1507 }
1508 
1509 /** find PCDATA */
xmlFindPcdata(const XML_NODE * node,const char * name)1510 const char* xmlFindPcdata(
1511    const XML_NODE*       node,
1512    const char*           name
1513    )
1514 {
1515    const XML_NODE* n;
1516 
1517    assert(node != NULL);
1518    assert(name != NULL);
1519 
1520    n = xmlFindNode(node, name);
1521    if ( n == NULL )
1522       return NULL;
1523 
1524    if ( ! strcmp(n->firstchild->name, "#PCDATA") )
1525       return n->firstchild->data;
1526 
1527    return NULL;
1528 }
1529