1 /**********************************************************************
2  *
3  * Project:  CPL - Common Portability Library
4  * Purpose:  Implementation of MiniXML Parser and handling.
5  * Author:   Frank Warmerdam, warmerdam@pobox.com
6  *
7  **********************************************************************
8  * Copyright (c) 2001, Frank Warmerdam
9  *
10  * Permission is hereby granted, free of charge, to any person obtaining a
11  * copy of this software and associated documentation files (the "Software"),
12  * to deal in the Software without restriction, including without limitation
13  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
14  * and/or sell copies of the Software, and to permit persons to whom the
15  * Software is furnished to do so, subject to the following conditions:
16  *
17  * The above copyright notice and this permission notice shall be included
18  * in all copies or substantial portions of the Software.
19  *
20  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
23  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
25  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
26  * DEALINGS IN THE SOFTWARE.
27  **********************************************************************
28  *
29  * Independent Security Audit 2003/04/05 Andrey Kiselev:
30  *   Completed audit of this module. Any documents may be parsed without
31  *   buffer overflows and stack corruptions.
32  *
33  * Security Audit 2003/03/28 warmerda:
34  *   Completed security audit.  I believe that this module may be safely used
35  *   to parse, and serialize arbitrary documents provided by a potentially
36  *   hostile source.
37  *
38  * $Log: cpl_minixml.cpp,v $
39  * Revision 1.1.1.1  2006/08/21 05:52:20  dsr
40  * Initial import as opencpn, GNU Automake compliant.
41  *
42  * Revision 1.1.1.1  2006/04/19 03:23:29  dsr
43  * Rename/Import to OpenCPN
44  *
45  * Revision 1.29  2004/01/29 17:01:51  warmerda
46  * Added reference to spec.
47  *
48  * Revision 1.28  2004/01/29 15:29:28  warmerda
49  * Added CPLCleanXMLElementName
50  *
51  * Revision 1.27  2003/12/04 15:46:51  warmerda
52  * Added CPLAddXMLSibling()
53  *
54  * Revision 1.26  2003/12/04 15:19:43  warmerda
55  * Added "=" support for "sidesearching" in a document.
56  *
57  * Revision 1.25  2003/11/07 19:40:19  warmerda
58  * ensure CPLGetXMLValue() works for nodes with attributes
59  *
60  * Revision 1.24  2003/11/05 20:14:21  warmerda
61  * added lots of documentation
62  *
63  * Revision 1.23  2003/05/21 03:32:43  warmerda
64  * expand tabs
65  *
66  * Revision 1.22  2003/04/05 07:12:25  dron
67  * Completed security audit.
68  *
69  * Revision 1.21  2003/03/28 17:38:39  warmerda
70  * Added NULL check in CPLParseXMLString().
71  *
72  * Revision 1.20  2003/03/28 05:05:18  warmerda
73  * Completed security audit. Several bugs related to possible buffer
74  * overruns correct, notably with regard to CPLError() calls.
75  *
76  * Revision 1.19  2003/03/27 18:12:41  warmerda
77  * Added NULL pszNameSpace support in namespace stripper (all namespaces).
78  * Added XML file read/write functions.
79  *
80  * Revision 1.18  2003/03/24 16:47:30  warmerda
81  * Added CPLStripXMLNamespace().
82  * CPLAddXMLChild() will now ensure that attributes are inserted before
83  * non-attributes nodes.
84  *
85  * Revision 1.17  2003/02/14 18:44:29  warmerda
86  * proper tokens may include a dash
87  *
88  * Revision 1.16  2002/11/16 20:42:40  warmerda
89  * improved inline comments
90  *
91  * Revision 1.15  2002/11/16 20:38:34  warmerda
92  * added support for literals like DOCTYPE
93  *
94  * Revision 1.14  2002/07/16 15:06:26  warmerda
95  * ensure that attributes are serialized properly regardless of their order
96  *
97  * Revision 1.13  2002/07/09 20:25:25  warmerda
98  * expand tabs
99  *
100  * Revision 1.12  2002/05/28 18:54:05  warmerda
101  * added escaping/unescaping support
102  *
103  * Revision 1.11  2002/05/24 04:09:10  warmerda
104  * added clone and SetXMLValue functions
105  *
106  * Revision 1.10  2002/04/01 16:08:21  warmerda
107  * allow periods in tokens
108  *
109  * Revision 1.9  2002/03/07 22:19:20  warmerda
110  * don't do operations within CPLAssert(), in UnreadChar()
111  *
112  * Revision 1.8  2002/03/05 14:26:57  warmerda
113  * expanded tabs
114  *
115  * Revision 1.7  2002/01/23 20:45:05  warmerda
116  * handle <?...?> and comment elements
117  *
118  * Revision 1.6  2002/01/22 18:54:48  warmerda
119  * ensure text is property initialized when serializing
120  *
121  * Revision 1.5  2002/01/16 03:58:51  warmerda
122  * support single quotes as well as double quotes
123  *
124  * Revision 1.4  2001/12/06 18:13:49  warmerda
125  * added CPLAddXMLChild and CPLCreateElmentAndValue
126  *
127  * Revision 1.3  2001/11/16 21:20:16  warmerda
128  * fixed typo
129  *
130  * Revision 1.2  2001/11/16 20:29:58  warmerda
131  * fixed lost char in normal CString tokens
132  *
133  * Revision 1.1  2001/11/16 15:39:48  warmerda
134  * New
135  */
136 
137 #include <ctype.h>
138 #include "cpl_minixml.h"
139 #include "cpl_error.h"
140 #include "cpl_conv.h"
141 #include "cpl_string.h"
142 
143 typedef enum {
144     TNone,
145     TString,
146     TOpen,
147     TClose,
148     TEqual,
149     TToken,
150     TSlashClose,
151     TQuestionClose,
152     TComment,
153     TLiteral
154 } TokenType;
155 
156 typedef struct {
157     const char *pszInput;
158     int        nInputOffset;
159     int        nInputLine;
160 
161     int        bInElement;
162     TokenType  eTokenType;
163     char       *pszToken;
164     int        nTokenMaxSize;
165     int        nTokenSize;
166 
167     int        nStackMaxSize;
168     int        nStackSize;
169     CPLXMLNode **papsStack;
170 
171     CPLXMLNode *psFirstNode;
172 } ParseContext;
173 
174 /************************************************************************/
175 /*                              ReadChar()                              */
176 /************************************************************************/
177 
ReadChar(ParseContext * psContext)178 static char ReadChar( ParseContext *psContext )
179 
180 {
181     char        chReturn;
182 
183     chReturn = psContext->pszInput[psContext->nInputOffset++];
184 
185     if( chReturn == '\0' )
186         psContext->nInputOffset--;
187     else if( chReturn == 10 )
188         psContext->nInputLine++;
189 
190     return chReturn;
191 }
192 
193 /************************************************************************/
194 /*                             UnreadChar()                             */
195 /************************************************************************/
196 
UnreadChar(ParseContext * psContext,char chToUnread)197 static void UnreadChar( ParseContext *psContext, char chToUnread )
198 
199 {
200     if( chToUnread == '\0' )
201     {
202         /* do nothing */
203     }
204     else
205     {
206         CPLAssert( chToUnread
207                    == psContext->pszInput[psContext->nInputOffset-1] );
208 
209         psContext->nInputOffset--;
210 
211         if( chToUnread == 10 )
212             psContext->nInputLine--;
213     }
214 }
215 
216 /************************************************************************/
217 /*                             AddToToken()                             */
218 /************************************************************************/
219 
AddToToken(ParseContext * psContext,char chNewChar)220 static void AddToToken( ParseContext *psContext, char chNewChar )
221 
222 {
223     if( psContext->pszToken == NULL )
224     {
225         psContext->nTokenMaxSize = 10;
226         psContext->pszToken = (char *) CPLMalloc(psContext->nTokenMaxSize);
227     }
228     else if( psContext->nTokenSize >= psContext->nTokenMaxSize - 2 )
229     {
230         psContext->nTokenMaxSize *= 2;
231         psContext->pszToken = (char *)
232             CPLRealloc(psContext->pszToken,psContext->nTokenMaxSize);
233     }
234 
235     psContext->pszToken[psContext->nTokenSize++] = chNewChar;
236     psContext->pszToken[psContext->nTokenSize] = '\0';
237 }
238 
239 /************************************************************************/
240 /*                             ReadToken()                              */
241 /************************************************************************/
242 
ReadToken(ParseContext * psContext)243 static TokenType ReadToken( ParseContext *psContext )
244 
245 {
246     char        chNext;
247 
248     psContext->nTokenSize = 0;
249     psContext->pszToken[0] = '\0';
250 
251     chNext = ReadChar( psContext );
252     while( isspace(chNext) )
253         chNext = ReadChar( psContext );
254 
255 /* -------------------------------------------------------------------- */
256 /*      Handle comments.                                                */
257 /* -------------------------------------------------------------------- */
258     if( chNext == '<'
259         && EQUALN(psContext->pszInput+psContext->nInputOffset,"!--",3) )
260     {
261         psContext->eTokenType = TComment;
262 
263         // Skip "!--" characters
264         ReadChar(psContext);
265         ReadChar(psContext);
266         ReadChar(psContext);
267 
268         while( !EQUALN(psContext->pszInput+psContext->nInputOffset,"-->",3)
269                && (chNext = ReadChar(psContext)) != '\0' )
270             AddToToken( psContext, chNext );
271 
272         // Skip "-->" characters
273         ReadChar(psContext);
274         ReadChar(psContext);
275         ReadChar(psContext);
276     }
277 /* -------------------------------------------------------------------- */
278 /*      Handle DOCTYPE or other literals.                               */
279 /* -------------------------------------------------------------------- */
280     else if( chNext == '<'
281           && EQUALN(psContext->pszInput+psContext->nInputOffset,"!DOCTYPE",8) )
282     {
283         int   bInQuotes = FALSE;
284         psContext->eTokenType = TLiteral;
285 
286         AddToToken( psContext, '<' );
287         do {
288             chNext = ReadChar(psContext);
289             if( chNext == '\0' )
290             {
291                 CPLError( CE_Failure, CPLE_AppDefined,
292                           "Parse error in DOCTYPE on or before line %d, "
293                           "reached end of file without '>'.",
294                           psContext->nInputLine );
295 
296                 break;
297             }
298 
299             if( chNext == '\"' )
300                 bInQuotes = !bInQuotes;
301 
302              if( chNext == '>' && !bInQuotes )
303             {
304                 AddToToken( psContext, '>' );
305                 break;
306             }
307 
308             AddToToken( psContext, chNext );
309         } while( TRUE );
310     }
311 /* -------------------------------------------------------------------- */
312 /*      Simple single tokens of interest.                               */
313 /* -------------------------------------------------------------------- */
314     else if( chNext == '<' && !psContext->bInElement )
315     {
316         psContext->eTokenType = TOpen;
317         psContext->bInElement = TRUE;
318     }
319     else if( chNext == '>' && psContext->bInElement )
320     {
321         psContext->eTokenType = TClose;
322         psContext->bInElement = FALSE;
323     }
324     else if( chNext == '=' && psContext->bInElement )
325     {
326         psContext->eTokenType = TEqual;
327     }
328     else if( chNext == '\0' )
329     {
330         psContext->eTokenType = TNone;
331     }
332 /* -------------------------------------------------------------------- */
333 /*      Handle the /> token terminator.                                 */
334 /* -------------------------------------------------------------------- */
335     else if( chNext == '/' && psContext->bInElement
336              && psContext->pszInput[psContext->nInputOffset] == '>' )
337     {
338         chNext = ReadChar( psContext );
339         CPLAssert( chNext == '>' );
340 
341         psContext->eTokenType = TSlashClose;
342         psContext->bInElement = FALSE;
343     }
344 /* -------------------------------------------------------------------- */
345 /*      Handle the ?> token terminator.                                 */
346 /* -------------------------------------------------------------------- */
347     else if( chNext == '?' && psContext->bInElement
348              && psContext->pszInput[psContext->nInputOffset] == '>' )
349     {
350         chNext = ReadChar( psContext );
351 
352         CPLAssert( chNext == '>' );
353 
354         psContext->eTokenType = TQuestionClose;
355         psContext->bInElement = FALSE;
356     }
357 
358 /* -------------------------------------------------------------------- */
359 /*      Collect a quoted string.                                        */
360 /* -------------------------------------------------------------------- */
361     else if( psContext->bInElement && chNext == '"' )
362     {
363         psContext->eTokenType = TString;
364 
365         while( (chNext = ReadChar(psContext)) != '"'
366                && chNext != '\0' )
367             AddToToken( psContext, chNext );
368 
369         if( chNext != '"' )
370         {
371             psContext->eTokenType = TNone;
372             CPLError( CE_Failure, CPLE_AppDefined,
373                   "Parse error on line %d, reached EOF before closing quote.",
374                       psContext->nInputLine );
375         }
376 
377         /* Do we need to unescape it? */
378         if( strchr(psContext->pszToken,'&') != NULL )
379         {
380             int  nLength;
381             char *pszUnescaped = CPLUnescapeString( psContext->pszToken,
382                                                     &nLength, CPLES_XML );
383             strcpy( psContext->pszToken, pszUnescaped );
384             CPLFree( pszUnescaped );
385             psContext->nTokenSize = strlen(psContext->pszToken );
386         }
387     }
388 
389     else if( psContext->bInElement && chNext == '\'' )
390     {
391         psContext->eTokenType = TString;
392 
393         while( (chNext = ReadChar(psContext)) != '\''
394                && chNext != '\0' )
395             AddToToken( psContext, chNext );
396 
397         if( chNext != '\'' )
398         {
399             psContext->eTokenType = TNone;
400             CPLError( CE_Failure, CPLE_AppDefined,
401                   "Parse error on line %d, reached EOF before closing quote.",
402                       psContext->nInputLine );
403         }
404 
405         /* Do we need to unescape it? */
406         if( strchr(psContext->pszToken,'&') != NULL )
407         {
408             int  nLength;
409             char *pszUnescaped = CPLUnescapeString( psContext->pszToken,
410                                                     &nLength, CPLES_XML );
411             strcpy( psContext->pszToken, pszUnescaped );
412             CPLFree( pszUnescaped );
413             psContext->nTokenSize = strlen(psContext->pszToken );
414         }
415     }
416 
417 /* -------------------------------------------------------------------- */
418 /*      Collect an unquoted string, terminated by a open angle          */
419 /*      bracket.                                                        */
420 /* -------------------------------------------------------------------- */
421     else if( !psContext->bInElement )
422     {
423         psContext->eTokenType = TString;
424 
425         AddToToken( psContext, chNext );
426         while( (chNext = ReadChar(psContext)) != '<'
427                && chNext != '\0' )
428             AddToToken( psContext, chNext );
429         UnreadChar( psContext, chNext );
430 
431         /* Do we need to unescape it? */
432         if( strchr(psContext->pszToken,'&') != NULL )
433         {
434             int  nLength;
435             char *pszUnescaped = CPLUnescapeString( psContext->pszToken,
436                                                     &nLength, CPLES_XML );
437             strcpy( psContext->pszToken, pszUnescaped );
438             CPLFree( pszUnescaped );
439             psContext->nTokenSize = strlen(psContext->pszToken );
440         }
441     }
442 
443 /* -------------------------------------------------------------------- */
444 /*      Collect a regular token terminated by white space, or           */
445 /*      special character(s) like an equal sign.                        */
446 /* -------------------------------------------------------------------- */
447     else
448     {
449         psContext->eTokenType = TToken;
450 
451         /* add the first character to the token regardless of what it is */
452         AddToToken( psContext, chNext );
453 
454         for( chNext = ReadChar(psContext);
455              (chNext >= 'A' && chNext <= 'Z')
456                  || (chNext >= 'a' && chNext <= 'z')
457                  || chNext == '-'
458                  || chNext == '_'
459                  || chNext == '.'
460                  || chNext == ':'
461                  || (chNext >= '0' && chNext <= '9');
462              chNext = ReadChar(psContext) )
463         {
464             AddToToken( psContext, chNext );
465         }
466 
467         UnreadChar(psContext, chNext);
468     }
469 
470     return psContext->eTokenType;
471 }
472 
473 /************************************************************************/
474 /*                              PushNode()                              */
475 /************************************************************************/
476 
PushNode(ParseContext * psContext,CPLXMLNode * psNode)477 static void PushNode( ParseContext *psContext, CPLXMLNode *psNode )
478 
479 {
480     if( psContext->nStackMaxSize <= psContext->nStackSize )
481     {
482         psContext->nStackMaxSize += 10;
483         psContext->papsStack = (CPLXMLNode **)
484             CPLRealloc(psContext->papsStack,
485                        sizeof(CPLXMLNode*) * psContext->nStackMaxSize);
486     }
487 
488     psContext->papsStack[psContext->nStackSize++] = psNode;
489 }
490 
491 /************************************************************************/
492 /*                             AttachNode()                             */
493 /*                                                                      */
494 /*      Attach the passed node as a child of the current node.          */
495 /*      Special handling exists for adding siblings to psFirst if       */
496 /*      there is nothing on the stack.                                  */
497 /************************************************************************/
498 
AttachNode(ParseContext * psContext,CPLXMLNode * psNode)499 static void AttachNode( ParseContext *psContext, CPLXMLNode *psNode )
500 
501 {
502     if( psContext->psFirstNode == NULL )
503         psContext->psFirstNode = psNode;
504     else if( psContext->nStackSize == 0 )
505     {
506         CPLXMLNode *psSibling;
507 
508         psSibling = psContext->psFirstNode;
509         while( psSibling->psNext != NULL )
510             psSibling = psSibling->psNext;
511         psSibling->psNext = psNode;
512     }
513     else if( psContext->papsStack[psContext->nStackSize-1]->psChild == NULL )
514     {
515         psContext->papsStack[psContext->nStackSize-1]->psChild = psNode;
516     }
517     else
518     {
519         CPLXMLNode *psSibling;
520 
521         psSibling = psContext->papsStack[psContext->nStackSize-1]->psChild;
522         while( psSibling->psNext != NULL )
523             psSibling = psSibling->psNext;
524         psSibling->psNext = psNode;
525     }
526 }
527 
528 /************************************************************************/
529 /*                         CPLParseXMLString()                          */
530 /************************************************************************/
531 
532 /**
533  * Parse an XML string into tree form.
534  *
535  * The passed document is parsed into a CPLXMLNode tree representation.
536  * If the document is not well formed XML then NULL is returned, and errors
537  * are reported via CPLError().  No validation beyond wellformedness is
538  * done.  The CPLParseXMLFile() convenience function can be used to parse
539  * from a file.
540  *
541  * The returned document tree is is owned by the caller and should be freed
542  * with CPLDestroyXMLNode() when no longer needed.
543  *
544  * If the document has more than one "root level" element then those after the
545  * first will be attached to the first as siblings (via the psNext pointers)
546  * even though there is no common parent.  A document with no XML structure
547  * (no angle brackets for instance) would be considered well formed, and
548  * returned as a single CXT_Text node.
549  *
550  * @param pszString the document to parse.
551  *
552  * @return parsed tree or NULL on error.
553  */
554 
CPLParseXMLString(const char * pszString)555 CPLXMLNode *CPLParseXMLString( const char *pszString )
556 
557 {
558     ParseContext sContext;
559 
560     CPLErrorReset();
561 
562     if( pszString == NULL )
563     {
564         CPLError( CE_Failure, CPLE_AppDefined,
565                   "CPLParseXMLString() called with NULL pointer." );
566         return NULL;
567     }
568 
569 /* -------------------------------------------------------------------- */
570 /*      Initialize parse context.                                       */
571 /* -------------------------------------------------------------------- */
572     sContext.pszInput = pszString;
573     sContext.nInputOffset = 0;
574     sContext.nInputLine = 0;
575     sContext.bInElement = FALSE;
576     sContext.pszToken = NULL;
577     sContext.nTokenMaxSize = 0;
578     sContext.nTokenSize = 0;
579     sContext.eTokenType = TNone;
580     sContext.nStackMaxSize = 0;
581     sContext.nStackSize = 0;
582     sContext.papsStack = NULL;
583     sContext.psFirstNode = NULL;
584 
585     /* ensure token is initialized */
586     AddToToken( &sContext, ' ' );
587 
588 /* ==================================================================== */
589 /*      Loop reading tokens.                                            */
590 /* ==================================================================== */
591     while( ReadToken( &sContext ) != TNone )
592     {
593 /* -------------------------------------------------------------------- */
594 /*      Create a new element.                                           */
595 /* -------------------------------------------------------------------- */
596         if( sContext.eTokenType == TOpen )
597         {
598             CPLXMLNode *psElement;
599 
600             if( ReadToken(&sContext) != TToken )
601             {
602                 CPLError( CE_Failure, CPLE_AppDefined,
603                           "Line %d: Didn't find element token after open angle bracket.",
604                           sContext.nInputLine );
605                 break;
606             }
607 
608             if( sContext.pszToken[0] != '/' )
609             {
610                 psElement = CPLCreateXMLNode( NULL, CXT_Element,
611                                               sContext.pszToken );
612                 AttachNode( &sContext, psElement );
613                 PushNode( &sContext, psElement );
614             }
615             else
616             {
617                 if( sContext.nStackSize == 0
618                     || !EQUAL(sContext.pszToken+1,
619                          sContext.papsStack[sContext.nStackSize-1]->pszValue) )
620                 {
621                     CPLError( CE_Failure, CPLE_AppDefined,
622                               "Line %d: <%.500s> doesn't have matching <%.500s>.",
623                               sContext.nInputLine,
624                               sContext.pszToken, sContext.pszToken+1 );
625                     break;
626                 }
627                 else
628                 {
629                     if( ReadToken(&sContext) != TClose )
630                     {
631                         CPLError( CE_Failure, CPLE_AppDefined,
632                                   "Line %d: Missing close angle bracket after <%.500s.",
633                                   sContext.nInputLine,
634                                   sContext.pszToken );
635                         break;
636                     }
637 
638                     /* pop element off stack */
639                     sContext.nStackSize--;
640                 }
641             }
642         }
643 
644 /* -------------------------------------------------------------------- */
645 /*      Add an attribute to a token.                                    */
646 /* -------------------------------------------------------------------- */
647         else if( sContext.eTokenType == TToken )
648         {
649             CPLXMLNode *psAttr;
650 
651             psAttr = CPLCreateXMLNode(NULL, CXT_Attribute, sContext.pszToken);
652             AttachNode( &sContext, psAttr );
653 
654             if( ReadToken(&sContext) != TEqual )
655             {
656                 CPLError( CE_Failure, CPLE_AppDefined,
657                           "Line %d: Didn't find expected '=' for value of attribute '%.500s'.",
658                           sContext.nInputLine, psAttr->pszValue );
659                 break;
660             }
661 
662             if( ReadToken(&sContext) != TString
663                 && sContext.eTokenType != TToken )
664             {
665                 CPLError( CE_Failure, CPLE_AppDefined,
666                           "Line %d: Didn't find expected attribute value.",
667                           sContext.nInputLine );
668                 break;
669             }
670 
671             CPLCreateXMLNode( psAttr, CXT_Text, sContext.pszToken );
672         }
673 
674 /* -------------------------------------------------------------------- */
675 /*      Close the start section of an element.                          */
676 /* -------------------------------------------------------------------- */
677         else if( sContext.eTokenType == TClose )
678         {
679             if( sContext.nStackSize == 0 )
680             {
681                 CPLError( CE_Failure, CPLE_AppDefined,
682                           "Line %d: Found unbalanced '>'.",
683                           sContext.nInputLine );
684                 break;
685             }
686         }
687 
688 /* -------------------------------------------------------------------- */
689 /*      Close the start section of an element, and pop it               */
690 /*      immediately.                                                    */
691 /* -------------------------------------------------------------------- */
692         else if( sContext.eTokenType == TSlashClose )
693         {
694             if( sContext.nStackSize == 0 )
695             {
696                 CPLError( CE_Failure, CPLE_AppDefined,
697                           "Line %d: Found unbalanced '/>'.",
698                           sContext.nInputLine );
699                 break;
700             }
701 
702             sContext.nStackSize--;
703         }
704 
705 /* -------------------------------------------------------------------- */
706 /*      Close the start section of a <?...?> element, and pop it        */
707 /*      immediately.                                                    */
708 /* -------------------------------------------------------------------- */
709         else if( sContext.eTokenType == TQuestionClose )
710         {
711             if( sContext.nStackSize == 0 )
712             {
713                 CPLError( CE_Failure, CPLE_AppDefined,
714                           "Line %d: Found unbalanced '?>'.",
715                           sContext.nInputLine );
716                 break;
717             }
718             else if( sContext.papsStack[sContext.nStackSize-1]->pszValue[0] != '?' )
719             {
720                 CPLError( CE_Failure, CPLE_AppDefined,
721                           "Line %d: Found '?>' without matching '<?'.",
722                           sContext.nInputLine );
723                 break;
724             }
725 
726             sContext.nStackSize--;
727         }
728 
729 /* -------------------------------------------------------------------- */
730 /*      Handle comments.  They are returned as a whole token with the     */
731 /*      prefix and postfix omitted.  No processing of white space       */
732 /*      will be done.                                                   */
733 /* -------------------------------------------------------------------- */
734         else if( sContext.eTokenType == TComment )
735         {
736             CPLXMLNode *psValue;
737 
738             psValue = CPLCreateXMLNode(NULL, CXT_Comment, sContext.pszToken);
739             AttachNode( &sContext, psValue );
740         }
741 
742 /* -------------------------------------------------------------------- */
743 /*      Handle literals.  They are returned without processing.         */
744 /* -------------------------------------------------------------------- */
745         else if( sContext.eTokenType == TLiteral )
746         {
747             CPLXMLNode *psValue;
748 
749             psValue = CPLCreateXMLNode(NULL, CXT_Literal, sContext.pszToken);
750             AttachNode( &sContext, psValue );
751         }
752 
753 /* -------------------------------------------------------------------- */
754 /*      Add a text value node as a child of the current element.        */
755 /* -------------------------------------------------------------------- */
756         else if( sContext.eTokenType == TString && !sContext.bInElement )
757         {
758             CPLXMLNode *psValue;
759 
760             psValue = CPLCreateXMLNode(NULL, CXT_Text, sContext.pszToken);
761             AttachNode( &sContext, psValue );
762         }
763 /* -------------------------------------------------------------------- */
764 /*      Anything else is an error.                                      */
765 /* -------------------------------------------------------------------- */
766         else
767         {
768             CPLError( CE_Failure, CPLE_AppDefined,
769                       "Parse error at line %d, unexpected token:%.500s\n",
770                       sContext.nInputLine, sContext.pszToken );
771             break;
772         }
773     }
774 
775 /* -------------------------------------------------------------------- */
776 /*      Did we pop all the way out of our stack?                        */
777 /* -------------------------------------------------------------------- */
778     if( CPLGetLastErrorType() == CE_None && sContext.nStackSize != 0 )
779     {
780         CPLError( CE_Failure, CPLE_AppDefined,
781                   "Parse error at EOF, not all elements have been closed,\n"
782                   "starting with %.500s\n",
783                   sContext.papsStack[sContext.nStackSize-1]->pszValue );
784     }
785 
786 /* -------------------------------------------------------------------- */
787 /*      Cleanup                                                         */
788 /* -------------------------------------------------------------------- */
789     CPLFree( sContext.pszToken );
790     if( sContext.papsStack != NULL )
791         CPLFree( sContext.papsStack );
792 
793     if( CPLGetLastErrorType() != CE_None )
794     {
795         CPLDestroyXMLNode( sContext.psFirstNode );
796         sContext.psFirstNode = NULL;
797     }
798 
799     return sContext.psFirstNode;
800 }
801 
802 /************************************************************************/
803 /*                            _GrowBuffer()                             */
804 /************************************************************************/
805 
_GrowBuffer(unsigned int nNeeded,char ** ppszText,unsigned int * pnMaxLength)806 static void _GrowBuffer( unsigned int nNeeded,
807                          char **ppszText, unsigned int *pnMaxLength )
808 
809 {
810     if( nNeeded+1 >= *pnMaxLength )
811     {
812         *pnMaxLength = MAX(*pnMaxLength * 2,nNeeded+1);
813         *ppszText = (char *) CPLRealloc(*ppszText, *pnMaxLength);
814     }
815 }
816 
817 /************************************************************************/
818 /*                        CPLSerializeXMLNode()                         */
819 /************************************************************************/
820 
821 static void
CPLSerializeXMLNode(CPLXMLNode * psNode,int nIndent,char ** ppszText,unsigned int * pnLength,unsigned int * pnMaxLength)822 CPLSerializeXMLNode( CPLXMLNode *psNode, int nIndent,
823                      char **ppszText, unsigned int *pnLength,
824                      unsigned int *pnMaxLength )
825 
826 {
827     if( psNode == NULL )
828         return;
829 
830 /* -------------------------------------------------------------------- */
831 /*      Ensure the buffer is plenty large to hold this additional       */
832 /*      string.                                                         */
833 /* -------------------------------------------------------------------- */
834     *pnLength += strlen(*ppszText + *pnLength);
835     _GrowBuffer( strlen(psNode->pszValue) + *pnLength + 40 + nIndent,
836                  ppszText, pnMaxLength );
837 
838 /* -------------------------------------------------------------------- */
839 /*      Text is just directly emitted.                                  */
840 /* -------------------------------------------------------------------- */
841     if( psNode->eType == CXT_Text )
842     {
843         char *pszEscaped = CPLEscapeString( psNode->pszValue, -1, CPLES_XML );
844 
845         CPLAssert( psNode->psChild == NULL );
846 
847         /* Escaped text might be bigger than expected. */
848         _GrowBuffer( strlen(pszEscaped) + *pnLength,
849                      ppszText, pnMaxLength );
850         strcat( *ppszText + *pnLength, pszEscaped );
851 
852         CPLFree( pszEscaped );
853     }
854 
855 /* -------------------------------------------------------------------- */
856 /*      Attributes require a little formatting.                         */
857 /* -------------------------------------------------------------------- */
858     else if( psNode->eType == CXT_Attribute )
859     {
860         CPLAssert( psNode->psChild != NULL
861                    && psNode->psChild->eType == CXT_Text );
862 
863         sprintf( *ppszText + *pnLength, " %s=\"", psNode->pszValue );
864         CPLSerializeXMLNode( psNode->psChild, 0, ppszText,
865                              pnLength, pnMaxLength );
866         strcat( *ppszText + *pnLength, "\"" );
867     }
868 
869 /* -------------------------------------------------------------------- */
870 /*      Handle comment output.                                          */
871 /* -------------------------------------------------------------------- */
872     else if( psNode->eType == CXT_Comment )
873     {
874         int     i;
875 
876         CPLAssert( psNode->psChild == NULL );
877 
878         for( i = 0; i < nIndent; i++ )
879             (*ppszText)[(*pnLength)++] = ' ';
880 
881         sprintf( *ppszText + *pnLength, "<!--%s-->\n",
882                  psNode->pszValue );
883     }
884 
885 /* -------------------------------------------------------------------- */
886 /*      Handle literal output (like <!DOCTYPE...>)                      */
887 /* -------------------------------------------------------------------- */
888     else if( psNode->eType == CXT_Literal )
889     {
890         int     i;
891 
892         CPLAssert( psNode->psChild == NULL );
893 
894         for( i = 0; i < nIndent; i++ )
895             (*ppszText)[(*pnLength)++] = ' ';
896 
897         strcpy( *ppszText + *pnLength, psNode->pszValue );
898         strcat( *ppszText + *pnLength, "\n" );
899     }
900 
901 /* -------------------------------------------------------------------- */
902 /*      Elements actually have to deal with general children, and       */
903 /*      various formatting issues.                                      */
904 /* -------------------------------------------------------------------- */
905     else if( psNode->eType == CXT_Element )
906     {
907         int             bHasNonAttributeChildren = FALSE;
908         CPLXMLNode      *psChild;
909 
910         if(nIndent)
911             memset( *ppszText + *pnLength, ' ', nIndent );
912         *pnLength += nIndent;
913         (*ppszText)[*pnLength] = '\0';
914 
915         sprintf( *ppszText + *pnLength, "<%s", psNode->pszValue );
916 
917         /* Serialize *all* the attribute children, regardless of order */
918         for( psChild = psNode->psChild;
919              psChild != NULL;
920              psChild = psChild->psNext )
921         {
922             if( psChild->eType == CXT_Attribute )
923                 CPLSerializeXMLNode( psChild, 0, ppszText, pnLength,
924                                      pnMaxLength );
925             else
926                 bHasNonAttributeChildren = TRUE;
927         }
928 
929         if( !bHasNonAttributeChildren )
930         {
931             if( psNode->pszValue[0] == '?' )
932                 strcat( *ppszText + *pnLength, "?>\n" );
933             else
934                 strcat( *ppszText + *pnLength, "/>\n" );
935         }
936         else
937         {
938             int         bJustText = TRUE;
939 
940             strcat( *ppszText + *pnLength, ">" );
941 
942             for( psChild = psNode->psChild;
943                  psChild != NULL;
944                  psChild = psChild->psNext )
945             {
946                 if( psChild->eType == CXT_Attribute )
947                     continue;
948 
949                 if( psChild->eType != CXT_Text && bJustText )
950                 {
951                     bJustText = FALSE;
952                     strcat( *ppszText + *pnLength, "\n" );
953                 }
954 
955                 CPLSerializeXMLNode( psChild, nIndent + 2, ppszText, pnLength,
956                                      pnMaxLength );
957             }
958 
959             *pnLength += strlen(*ppszText + *pnLength);
960             _GrowBuffer( strlen(psNode->pszValue) + *pnLength + 40 + nIndent,
961                          ppszText, pnMaxLength );
962 
963             if( !bJustText )
964             {
965                 if(nIndent)
966                     memset( *ppszText + *pnLength, ' ', nIndent );
967                 *pnLength += nIndent;
968                 (*ppszText)[*pnLength] = '\0';
969             }
970 
971             *pnLength += strlen(*ppszText + *pnLength);
972             sprintf( *ppszText + *pnLength, "</%s>\n", psNode->pszValue );
973         }
974     }
975 }
976 
977 /************************************************************************/
978 /*                        CPLSerializeXMLTree()                         */
979 /************************************************************************/
980 
981 /**
982  * Convert tree into string document.
983  *
984  * This function converts a CPLXMLNode tree representation of a document
985  * into a flat string representation.  White space indentation is used
986  * visually preserve the tree structure of the document.  The returned
987  * document becomes owned by the caller and should be freed with CPLFree()
988  * when no longer needed.
989  *
990  * @param psNode
991  *
992  * @return the document on success or NULL on failure.
993  */
994 
CPLSerializeXMLTree(CPLXMLNode * psNode)995 char *CPLSerializeXMLTree( CPLXMLNode *psNode )
996 
997 {
998     unsigned int nMaxLength = 100, nLength = 0;
999     char *pszText = NULL;
1000     CPLXMLNode *psThis;
1001 
1002     pszText = (char *) CPLMalloc(nMaxLength);
1003     pszText[0] = '\0';
1004 
1005     for( psThis = psNode; psThis != NULL; psThis = psThis->psNext )
1006         CPLSerializeXMLNode( psThis, 0, &pszText, &nLength, &nMaxLength );
1007 
1008     return pszText;
1009 }
1010 
1011 /************************************************************************/
1012 /*                          CPLCreateXMLNode()                          */
1013 /************************************************************************/
1014 
1015 /**
1016  * Create an document tree item.
1017  *
1018  * Create a single CPLXMLNode object with the desired value and type, and
1019  * attach it as a child of the indicated parent.
1020  *
1021  * @param poParent the parent to which this node should be attached as a
1022  * child.  May be NULL to keep as free standing.
1023  *
1024  * @return the newly created node, now owned by the caller (or parent node).
1025  */
1026 
CPLCreateXMLNode(CPLXMLNode * poParent,CPLXMLNodeType eType,const char * pszText)1027 CPLXMLNode *CPLCreateXMLNode( CPLXMLNode *poParent, CPLXMLNodeType eType,
1028                               const char *pszText )
1029 
1030 {
1031     CPLXMLNode  *psNode;
1032 
1033 /* -------------------------------------------------------------------- */
1034 /*      Create new node.                                                */
1035 /* -------------------------------------------------------------------- */
1036     psNode = (CPLXMLNode *) CPLCalloc(sizeof(CPLXMLNode),1);
1037 
1038     psNode->eType = eType;
1039     psNode->pszValue = CPLStrdup( pszText );
1040 
1041 /* -------------------------------------------------------------------- */
1042 /*      Attach to parent, if provided.                                  */
1043 /* -------------------------------------------------------------------- */
1044     if( poParent != NULL )
1045     {
1046         if( poParent->psChild == NULL )
1047             poParent->psChild = psNode;
1048         else
1049         {
1050             CPLXMLNode  *psLink = poParent->psChild;
1051 
1052             while( psLink->psNext != NULL )
1053                 psLink = psLink->psNext;
1054 
1055             psLink->psNext = psNode;
1056         }
1057     }
1058 
1059     return psNode;
1060 }
1061 
1062 /************************************************************************/
1063 /*                         CPLDestroyXMLNode()                          */
1064 /************************************************************************/
1065 
1066 /**
1067  * Destroy a tree.
1068  *
1069  * This function frees resources associated with a CPLXMLNode and all its
1070  * children nodes.
1071  *
1072  * @param psNode the tree to free.
1073  */
1074 
CPLDestroyXMLNode(CPLXMLNode * psNode)1075 void CPLDestroyXMLNode( CPLXMLNode *psNode )
1076 
1077 {
1078     if( psNode->psChild != NULL )
1079         CPLDestroyXMLNode( psNode->psChild );
1080 
1081     if( psNode->psNext != NULL )
1082         CPLDestroyXMLNode( psNode->psNext );
1083 
1084     CPLFree( psNode->pszValue );
1085     CPLFree( psNode );
1086 }
1087 
1088 /************************************************************************/
1089 /*                           CPLGetXMLNode()                            */
1090 /************************************************************************/
1091 
1092 /**
1093  * Find node by path.
1094  *
1095  * Searches the document or subdocument indicated by psRoot for an element
1096  * (or attribute) with the given path.  The path should consist of a set of
1097  * element names separated by dots, not including the name of the root
1098  * element (psRoot).  If the requested element is not found NULL is returned.
1099  *
1100  * Attribute names may only appear as the last item in the path.
1101  *
1102  * The search is done from the root nodes children, but all intermediate
1103  * nodes in the path must be specified.  Seaching for "name" would only find
1104  * a name element or attribute if it is a direct child of the root, not at any
1105  * level in the subdocument.
1106  *
1107  * If the pszPath is prefixed by "=" then the search will begin with the
1108  * root node, and it's siblings, instead of the root nodes children.  This
1109  * is particularly useful when searching within a whole document which is
1110  * often prefixed by one or more "junk" nodes like the <?xml> declaration.
1111  *
1112  * @param psRoot the subtree in which to search.  This should be a node of
1113  * type CXT_Element.  NULL is safe.
1114  *
1115  * @param pszPath the list of element names in the path (dot separated).
1116  *
1117  * @return the requested element node, or NULL if not found.
1118  */
1119 
CPLGetXMLNode(CPLXMLNode * psRoot,const char * pszPath)1120 CPLXMLNode *CPLGetXMLNode( CPLXMLNode *psRoot, const char *pszPath )
1121 
1122 {
1123     char        **papszTokens;
1124     int         iToken = 0;
1125     int         bSideSearch = FALSE;
1126 
1127     if( psRoot == NULL )
1128         return NULL;
1129 
1130     if( *pszPath == '=' )
1131     {
1132         bSideSearch = TRUE;
1133         pszPath++;
1134     }
1135 
1136     papszTokens = CSLTokenizeStringComplex( pszPath, ".", FALSE, FALSE );
1137 
1138     while( papszTokens[iToken] != NULL && psRoot != NULL )
1139     {
1140         CPLXMLNode *psChild;
1141 
1142         if( bSideSearch )
1143         {
1144             psChild = psRoot;
1145             bSideSearch = FALSE;
1146         }
1147         else
1148             psChild = psRoot->psChild;
1149 
1150         for( ; psChild != NULL; psChild = psChild->psNext )
1151         {
1152             if( psChild->eType != CXT_Text
1153                 && EQUAL(papszTokens[iToken],psChild->pszValue) )
1154                 break;
1155         }
1156 
1157         if( psChild == NULL )
1158         {
1159             psRoot = NULL;
1160             break;
1161         }
1162 
1163         psRoot = psChild;
1164         iToken++;
1165     }
1166 
1167     CSLDestroy( papszTokens );
1168     return psRoot;
1169 }
1170 
1171 /************************************************************************/
1172 /*                           CPLGetXMLValue()                           */
1173 /************************************************************************/
1174 
1175 /**
1176  * Fetch element/attribute value.
1177  *
1178  * Searches the document for the element/attribute value associated with
1179  * the path.  The corresponding node is internally found with CPLGetXMLNode()
1180  * (see there for details on path handling).  Once found, the value is
1181  * considered to be the first CXT_Text child of the node.
1182  *
1183  * If the attribute/element search fails, or if the found node has not
1184  * value then the passed default value is returned.
1185  *
1186  * The returned value points to memory within the document tree, and should
1187  * not be altered or freed.
1188  *
1189  * @param psRoot the subtree in which to search.  This should be a node of
1190  * type CXT_Element.  NULL is safe.
1191  *
1192  * @param pszPath the list of element names in the path (dot separated).
1193  *
1194  * @param pszDefault the value to return if a corresponding value is not
1195  * found, may be NULL.
1196  *
1197  * @return the requested value or pszDefault if not found.
1198  */
1199 
CPLGetXMLValue(CPLXMLNode * poRoot,const char * pszPath,const char * pszDefault)1200 const char *CPLGetXMLValue( CPLXMLNode *poRoot, const char *pszPath,
1201                             const char *pszDefault )
1202 
1203 {
1204     CPLXMLNode  *psTarget;
1205 
1206     psTarget = CPLGetXMLNode( poRoot, pszPath );
1207     if( psTarget == NULL )
1208         return pszDefault;
1209 
1210     if( psTarget->eType == CXT_Attribute )
1211     {
1212         CPLAssert( psTarget->psChild != NULL
1213                    && psTarget->psChild->eType == CXT_Text );
1214 
1215         return psTarget->psChild->pszValue;
1216     }
1217 
1218     if( psTarget->eType == CXT_Element )
1219     {
1220         // Find first non-attribute child, and verify it is a single text
1221         // with no siblings
1222 
1223         psTarget = psTarget->psChild;
1224 
1225         while( psTarget != NULL && psTarget->eType == CXT_Attribute )
1226             psTarget = psTarget->psNext;
1227 
1228         if( psTarget != NULL
1229             && psTarget->eType == CXT_Text
1230             && psTarget->psNext == NULL )
1231             return psTarget->pszValue;
1232     }
1233 
1234     return pszDefault;
1235 }
1236 
1237 /************************************************************************/
1238 /*                           CPLAddXMLChild()                           */
1239 /************************************************************************/
1240 
1241 /**
1242  * Add child node to parent.
1243  *
1244  * The passed child is added to the list of children of the indicated
1245  * parent.  Normally the child is added at the end of the parents child
1246  * list, but attributes (CXT_Attribute) will be inserted after any other
1247  * attributes but before any other element type.  Ownership of the child
1248  * node is effectively assumed by the parent node.   If the child has
1249  * siblings (it's psNext is not NULL) they will be trimmed, but if the child
1250  * has children they are carried with it.
1251  *
1252  * @param psParent the node to attach the child to.  May not be NULL.
1253  *
1254  * @param psChild the child to add to the parent.  May not be NULL.  Should
1255  * not be a child of any other parent.
1256  */
1257 
CPLAddXMLChild(CPLXMLNode * psParent,CPLXMLNode * psChild)1258 void CPLAddXMLChild( CPLXMLNode *psParent, CPLXMLNode *psChild )
1259 
1260 {
1261     CPLXMLNode *psSib;
1262 
1263     CPLAssert( psChild->psNext == NULL );
1264     psChild->psNext = NULL;
1265 
1266     if( psParent->psChild == NULL )
1267     {
1268         psParent->psChild = psChild;
1269         return;
1270     }
1271 
1272     // Insert at head of list if first child is not attribute.
1273     if( psChild->eType == CXT_Attribute
1274         && psParent->psChild->eType != CXT_Attribute )
1275     {
1276         psChild->psNext = psParent->psChild;
1277         psParent->psChild = psChild;
1278         return;
1279     }
1280 
1281     // Search for end of list.
1282     for( psSib = psParent->psChild;
1283          psSib->psNext != NULL;
1284          psSib = psSib->psNext )
1285     {
1286         // Insert attributes if the next node is not an attribute.
1287         if( psChild->eType == CXT_Attribute
1288             && psSib->psNext != NULL
1289             && psSib->psNext->eType != CXT_Attribute )
1290         {
1291             psChild->psNext = psSib->psNext;
1292             psSib->psNext = psChild;
1293             return;
1294         }
1295     }
1296 
1297     psSib->psNext = psChild;
1298 }
1299 
1300 /************************************************************************/
1301 /*                          CPLAddXMLSibling()                          */
1302 /************************************************************************/
1303 
1304 /**
1305  * Add new sibling.
1306  *
1307  * The passed psNewSibling is added to the end of siblings of the
1308  * psOlderSibling node.  That is, it is added to the end of the psNext
1309  * chain.  There is no special handling if psNewSibling is an attribute.
1310  * If this is required, use CPLAddXMLChild().
1311  *
1312  * @param psOlderSibling the node to attach the sibling after.
1313  *
1314  * @param psNewSibling the node to add at the end of psOlderSiblings psNext
1315  * chain.
1316  */
1317 
CPLAddXMLSibling(CPLXMLNode * psOlderSibling,CPLXMLNode * psNewSibling)1318 void CPLAddXMLSibling( CPLXMLNode *psOlderSibling, CPLXMLNode *psNewSibling )
1319 
1320 {
1321     if( psOlderSibling == NULL )
1322         return;
1323 
1324     while( psOlderSibling->psNext != NULL )
1325         psOlderSibling = psOlderSibling->psNext;
1326 
1327     psOlderSibling->psNext = psNewSibling;
1328 }
1329 
1330 /************************************************************************/
1331 /*                    CPLCreateXMLElementAndValue()                     */
1332 /************************************************************************/
1333 
1334 /**
1335  * Create an element and text value.
1336  *
1337  * This is function is a convenient short form for:
1338  *
1339  *     return CPLCreateXMLNode(
1340  *        CPLCreateXMLNode( psParent, CXT_Element, pszName ),
1341  *        CXT_Text, pszValue );
1342  *
1343  * It creates a CXT_Element node, with a CXT_Text child, and
1344  * attaches the element to the passed parent.
1345  *
1346  * @param psParent the parent node to which the resulting node should
1347  * be attached.  May be NULL to keep as freestanding.
1348  *
1349  * @param pszName the element name to create.
1350  * @param pszValue the text to attach to the element. Must not be NULL.
1351  *
1352  * @return the pointer to the new element node.
1353  */
1354 
CPLCreateXMLElementAndValue(CPLXMLNode * psParent,const char * pszName,const char * pszValue)1355 CPLXMLNode *CPLCreateXMLElementAndValue( CPLXMLNode *psParent,
1356                                          const char *pszName,
1357                                          const char *pszValue )
1358 
1359 {
1360     return CPLCreateXMLNode(
1361         CPLCreateXMLNode( psParent, CXT_Element, pszName ),
1362         CXT_Text, pszValue );
1363 }
1364 
1365 /************************************************************************/
1366 /*                          CPLCloneXMLTree()                           */
1367 /************************************************************************/
1368 
1369 /**
1370  * Copy tree.
1371  *
1372  * Creates a deep copy of a CPLXMLNode tree.
1373  *
1374  * @param psTree the tree to duplicate.
1375  *
1376  * @return a copy of the whole tree.
1377  */
1378 
CPLCloneXMLTree(CPLXMLNode * psTree)1379 CPLXMLNode *CPLCloneXMLTree( CPLXMLNode *psTree )
1380 
1381 {
1382     CPLXMLNode *psPrevious = NULL;
1383     CPLXMLNode *psReturn = NULL;
1384 
1385     while( psTree != NULL )
1386     {
1387         CPLXMLNode *psCopy;
1388 
1389         psCopy = CPLCreateXMLNode( NULL, psTree->eType, psTree->pszValue );
1390         if( psReturn == NULL )
1391             psReturn = psCopy;
1392         if( psPrevious != NULL )
1393             psPrevious->psNext = psCopy;
1394 
1395         if( psTree->psChild != NULL )
1396             psCopy->psChild = CPLCloneXMLTree( psTree->psChild );
1397 
1398         psPrevious = psCopy;
1399         psTree = psTree->psNext;
1400     }
1401 
1402     return psReturn;
1403 }
1404 
1405 /************************************************************************/
1406 /*                           CPLSetXMLValue()                           */
1407 /************************************************************************/
1408 
1409 /**
1410  * Set element value by path.
1411  *
1412  * Find (or create) the target element or attribute specified in the
1413  * path, and assign it the indicated value.
1414  *
1415  * Any path elements that do not already exist will be created.  The target
1416  * nodes value (the first CXT_Text child) will be replaced with the provided
1417  * value.
1418  *
1419  * If the target node is an attribute instead of an element, the last separator
1420  * should be a "#" instead of the normal period path separator.
1421  *
1422  * Example:
1423  *   CPLSetXMLValue( "Citation.Id.Description", "DOQ dataset" );
1424  *   CPLSetXMLValue( "Citation.Id.Description#name", "doq" );
1425  *
1426  * @param psRoot the subdocument to be updated.
1427  *
1428  * @param pszPath the dot seperated path to the target element/attribute.
1429  *
1430  * @param pszValue the text value to assign.
1431  *
1432  * @return TRUE on success.
1433  */
1434 
CPLSetXMLValue(CPLXMLNode * psRoot,const char * pszPath,const char * pszValue)1435 int CPLSetXMLValue( CPLXMLNode *psRoot,  const char *pszPath,
1436                     const char *pszValue )
1437 
1438 {
1439     if( psRoot == NULL )
1440         return FALSE;
1441 
1442     char        **papszTokens;
1443     int         iToken = 0;
1444 
1445     papszTokens = CSLTokenizeStringComplex( pszPath, ".", FALSE, FALSE );
1446 
1447     while( papszTokens[iToken] != NULL && psRoot != NULL )
1448     {
1449         CPLXMLNode *psChild;
1450         int        bIsAttribute = FALSE;
1451         const char *pszName = papszTokens[iToken];
1452 
1453         if( pszName[0] == '#' )
1454         {
1455             bIsAttribute = TRUE;
1456             pszName++;
1457         }
1458 
1459         if( psRoot->eType != CXT_Element )
1460             return FALSE;
1461 
1462         for( psChild = psRoot->psChild; psChild != NULL;
1463              psChild = psChild->psNext )
1464         {
1465             if( psChild->eType != CXT_Text
1466                 && EQUAL(pszName,psChild->pszValue) )
1467                 break;
1468         }
1469 
1470         if( psChild == NULL )
1471         {
1472             if( bIsAttribute )
1473                 psChild = CPLCreateXMLNode( psRoot, CXT_Attribute, pszName );
1474             else
1475                 psChild = CPLCreateXMLNode( psRoot, CXT_Element, pszName );
1476         }
1477 
1478         psRoot = psChild;
1479         iToken++;
1480     }
1481 
1482     CSLDestroy( papszTokens );
1483 
1484 /* -------------------------------------------------------------------- */
1485 /*      Now set a value node under this node.                           */
1486 /* -------------------------------------------------------------------- */
1487     if( psRoot ){
1488         if( psRoot->psChild == NULL )
1489             CPLCreateXMLNode( psRoot, CXT_Text, pszValue );
1490         else if( psRoot->psChild->eType != CXT_Text )
1491             return FALSE;
1492         else
1493         {
1494             CPLFree( psRoot->psChild->pszValue );
1495             psRoot->psChild->pszValue = CPLStrdup( pszValue );
1496         }
1497 
1498         return TRUE;
1499     }
1500     else {
1501         return FALSE;
1502     }
1503 }
1504 
1505 /************************************************************************/
1506 /*                        CPLStripXMLNamespace()                        */
1507 /************************************************************************/
1508 
1509 /**
1510  * Strip indicated namespaces.
1511  *
1512  * The subdocument (psRoot) is recursively examined, and any elements
1513  * with the indicated namespace prefix will have the namespace prefix
1514  * stripped from the element names.  If the passed namespace is NULL, then
1515  * all namespace prefixes will be stripped.
1516  *
1517  * Nodes other than elements should remain unaffected.  The changes are
1518  * made "in place", and should not alter any node locations, only the
1519  * pszValue field of affected nodes.
1520  *
1521  * @param psRoot the document to operate on.
1522  * @param pszNamespace the name space prefix (not including colon), or NULL.
1523  * @param bRecurse TRUE to recurse over whole document, or FALSE to only
1524  * operate on the passed node.
1525  */
1526 
CPLStripXMLNamespace(CPLXMLNode * psRoot,const char * pszNamespace,int bRecurse)1527 void CPLStripXMLNamespace( CPLXMLNode *psRoot,
1528                            const char *pszNamespace,
1529                            int bRecurse )
1530 
1531 {
1532     if( psRoot == NULL )
1533         return;
1534 
1535     if( pszNamespace != NULL )
1536     {
1537         if( psRoot->eType == CXT_Element
1538             && EQUALN(pszNamespace,psRoot->pszValue,strlen(pszNamespace))
1539             && psRoot->pszValue[strlen(pszNamespace)] == ':' )
1540         {
1541             char *pszNewValue =
1542                 CPLStrdup(psRoot->pszValue+strlen(pszNamespace)+1);
1543 
1544             CPLFree( psRoot->pszValue );
1545             psRoot->pszValue = pszNewValue;
1546         }
1547     }
1548     else
1549     {
1550         const char *pszCheck;
1551 
1552         for( pszCheck = psRoot->pszValue; *pszCheck != '\0'; pszCheck++ )
1553         {
1554             if( *pszCheck == ':' )
1555             {
1556                 char *pszNewValue = CPLStrdup( pszCheck+1 );
1557 
1558                 CPLFree( psRoot->pszValue );
1559                 psRoot->pszValue = pszNewValue;
1560                 break;
1561             }
1562         }
1563     }
1564 
1565     if( bRecurse )
1566     {
1567         if( psRoot->psChild != NULL )
1568             CPLStripXMLNamespace( psRoot->psChild, pszNamespace, 1 );
1569         if( psRoot->psNext != NULL )
1570             CPLStripXMLNamespace( psRoot->psNext, pszNamespace, 1 );
1571     }
1572 }
1573 
1574 /************************************************************************/
1575 /*                          CPLParseXMLFile()                           */
1576 /************************************************************************/
1577 
1578 /**
1579  * Parse XML file into tree.
1580  *
1581  * The named file is opened, loaded into memory as a big string, and
1582  * parsed with CPLParseXMLString().  Errors in reading the file or parsing
1583  * the XML will be reported by CPLError().
1584  *
1585  * @param pszFilename the file to open.
1586  *
1587  * @return NULL on failure, or the document tree on success.
1588  */
1589 
CPLParseXMLFile(const char * pszFilename)1590 CPLXMLNode *CPLParseXMLFile( const char *pszFilename )
1591 
1592 {
1593     FILE        *fp;
1594     int nLen;
1595     char *pszDoc;
1596     CPLXMLNode *psTree;
1597 
1598 /* -------------------------------------------------------------------- */
1599 /*      Read the file.                                                  */
1600 /* -------------------------------------------------------------------- */
1601     fp = VSIFOpen( pszFilename, "rb" );
1602     if( fp == NULL )
1603     {
1604         CPLError( CE_Failure, CPLE_OpenFailed,
1605                   "Failed to open %.500s to read.", pszFilename );
1606         return NULL;
1607     }
1608 
1609     VSIFSeek( fp, 0, SEEK_END );
1610     nLen = VSIFTell( fp );
1611     VSIFSeek( fp, 0, SEEK_SET );
1612 
1613     pszDoc = (char *) VSIMalloc(nLen+1);
1614     if( pszDoc == NULL )
1615     {
1616         CPLError( CE_Failure, CPLE_OutOfMemory,
1617                   "Out of memory allocating space for %d byte buffer in\n"
1618                   "CPLParseXMLFile(%.500s).",
1619                   nLen+1, pszFilename );
1620         VSIFClose( fp );
1621         return NULL;
1622     }
1623     if( (int) VSIFRead( pszDoc, 1, nLen, fp ) < nLen )
1624     {
1625         CPLError( CE_Failure, CPLE_FileIO,
1626                   "VSIFRead() result short of expected %d bytes from %.500s.",
1627                   nLen, pszFilename );
1628         pszDoc[0] = '\0';
1629     }
1630     VSIFClose( fp );
1631 
1632     pszDoc[nLen] = '\0';
1633 
1634 /* -------------------------------------------------------------------- */
1635 /*      Parse it.                                                       */
1636 /* -------------------------------------------------------------------- */
1637     psTree = CPLParseXMLString( pszDoc );
1638     CPLFree( pszDoc );
1639 
1640     return psTree;
1641 }
1642 
1643 /************************************************************************/
1644 /*                     CPLSerializeXMLTreeToFile()                      */
1645 /************************************************************************/
1646 
1647 /**
1648  * Write document tree to a file.
1649  *
1650  * The passed document tree is converted into one big string (with
1651  * CPLSerializeXMLTree()) and then written to the named file.  Errors writing
1652  * the file will be reported by CPLError().  The source document tree is
1653  * not altered.  If the output file already exists it will be overwritten.
1654  *
1655  * @param psTree the document tree to write.
1656  * @param pszFilename the name of the file to write to.
1657  */
1658 
CPLSerializeXMLTreeToFile(CPLXMLNode * psTree,const char * pszFilename)1659 int CPLSerializeXMLTreeToFile( CPLXMLNode *psTree, const char *pszFilename )
1660 
1661 {
1662     char *pszDoc;
1663     FILE *fp;
1664     int  nLength;
1665 
1666 /* -------------------------------------------------------------------- */
1667 /*      Serialize document.                                             */
1668 /* -------------------------------------------------------------------- */
1669     pszDoc = CPLSerializeXMLTree( psTree );
1670     if( pszDoc == NULL )
1671         return FALSE;
1672 
1673     nLength = strlen(pszDoc);
1674 
1675 /* -------------------------------------------------------------------- */
1676 /*      Create file.                                                    */
1677 /* -------------------------------------------------------------------- */
1678     fp = VSIFOpen( pszFilename, "wt" );
1679     if( fp == NULL )
1680     {
1681         CPLError( CE_Failure, CPLE_OpenFailed,
1682                   "Failed to open %.500s to write.", pszFilename );
1683         return FALSE;
1684     }
1685 
1686 /* -------------------------------------------------------------------- */
1687 /*      Write file.                                                     */
1688 /* -------------------------------------------------------------------- */
1689     if( (int) VSIFWrite( pszDoc, 1, nLength, fp ) != nLength )
1690     {
1691         CPLError( CE_Failure, CPLE_FileIO,
1692                   "Failed to write whole XML document (%.500s).",
1693                   pszFilename );
1694         VSIFClose( fp );
1695         CPLFree( pszDoc );
1696         return FALSE;
1697     }
1698 
1699 /* -------------------------------------------------------------------- */
1700 /*      Cleanup                                                         */
1701 /* -------------------------------------------------------------------- */
1702     VSIFClose( fp );
1703     CPLFree( pszDoc );
1704 
1705     return TRUE;
1706 }
1707 
1708 /************************************************************************/
1709 /*                       CPLCleanXMLElementName()                       */
1710 /************************************************************************/
1711 
1712 /**
1713  * Make string into safe XML token.
1714  *
1715  * Modififies a string in place to try and make it into a legal
1716  * XML token that can be used as an element name.   This is accomplished
1717  * by changing any characters not legal in a token into an underscore.
1718  *
1719  * NOTE: This function should implement the rules in section 2.3 of
1720  * http://www.w3.org/TR/xml11/ but it doesn't yet do that properly.  We
1721  * only do a rough approximation of that.
1722  *
1723  * @param pszTarget the string to be adjusted.  It is altered in place.
1724  */
1725 
CPLCleanXMLElementName(char * pszTarget)1726 void       CPL_DLL CPLCleanXMLElementName( char *pszTarget )
1727 
1728 {
1729     if( pszTarget == NULL )
1730         return;
1731 
1732     for( ; *pszTarget != '\0'; pszTarget++ )
1733     {
1734         if( (*((unsigned char *) pszTarget) & 0x80) || isalnum( *pszTarget )
1735             || *pszTarget == '_' || *pszTarget == '.' )
1736         {
1737             /* ok */
1738         }
1739         else
1740         {
1741             *pszTarget = '_';
1742         }
1743     }
1744 }
1745