1 /**********************************************************************
2 *
3 * Project: CPL - Common Portability Library
4 * Purpose: Implementation of MiniXML Parser and handling.
5 * Author: Frank Warmerdam, warmerdam@pobox.com
6 *
7 **********************************************************************
8 * Copyright (c) 2001, Frank Warmerdam
9 *
10 * Permission is hereby granted, free of charge, to any person obtaining a
11 * copy of this software and associated documentation files (the "Software"),
12 * to deal in the Software without restriction, including without limitation
13 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
14 * and/or sell copies of the Software, and to permit persons to whom the
15 * Software is furnished to do so, subject to the following conditions:
16 *
17 * The above copyright notice and this permission notice shall be included
18 * in all copies or substantial portions of the Software.
19 *
20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
23 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
25 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
26 * DEALINGS IN THE SOFTWARE.
27 **********************************************************************
28 *
29 * Independent Security Audit 2003/04/05 Andrey Kiselev:
30 * Completed audit of this module. Any documents may be parsed without
31 * buffer overflows and stack corruptions.
32 *
33 * Security Audit 2003/03/28 warmerda:
34 * Completed security audit. I believe that this module may be safely used
35 * to parse, and serialize arbitrary documents provided by a potentially
36 * hostile source.
37 *
38 * $Log: cpl_minixml.cpp,v $
39 * Revision 1.1.1.1 2006/08/21 05:52:20 dsr
40 * Initial import as opencpn, GNU Automake compliant.
41 *
42 * Revision 1.1.1.1 2006/04/19 03:23:29 dsr
43 * Rename/Import to OpenCPN
44 *
45 * Revision 1.29 2004/01/29 17:01:51 warmerda
46 * Added reference to spec.
47 *
48 * Revision 1.28 2004/01/29 15:29:28 warmerda
49 * Added CPLCleanXMLElementName
50 *
51 * Revision 1.27 2003/12/04 15:46:51 warmerda
52 * Added CPLAddXMLSibling()
53 *
54 * Revision 1.26 2003/12/04 15:19:43 warmerda
55 * Added "=" support for "sidesearching" in a document.
56 *
57 * Revision 1.25 2003/11/07 19:40:19 warmerda
58 * ensure CPLGetXMLValue() works for nodes with attributes
59 *
60 * Revision 1.24 2003/11/05 20:14:21 warmerda
61 * added lots of documentation
62 *
63 * Revision 1.23 2003/05/21 03:32:43 warmerda
64 * expand tabs
65 *
66 * Revision 1.22 2003/04/05 07:12:25 dron
67 * Completed security audit.
68 *
69 * Revision 1.21 2003/03/28 17:38:39 warmerda
70 * Added NULL check in CPLParseXMLString().
71 *
72 * Revision 1.20 2003/03/28 05:05:18 warmerda
73 * Completed security audit. Several bugs related to possible buffer
74 * overruns correct, notably with regard to CPLError() calls.
75 *
76 * Revision 1.19 2003/03/27 18:12:41 warmerda
77 * Added NULL pszNameSpace support in namespace stripper (all namespaces).
78 * Added XML file read/write functions.
79 *
80 * Revision 1.18 2003/03/24 16:47:30 warmerda
81 * Added CPLStripXMLNamespace().
82 * CPLAddXMLChild() will now ensure that attributes are inserted before
83 * non-attributes nodes.
84 *
85 * Revision 1.17 2003/02/14 18:44:29 warmerda
86 * proper tokens may include a dash
87 *
88 * Revision 1.16 2002/11/16 20:42:40 warmerda
89 * improved inline comments
90 *
91 * Revision 1.15 2002/11/16 20:38:34 warmerda
92 * added support for literals like DOCTYPE
93 *
94 * Revision 1.14 2002/07/16 15:06:26 warmerda
95 * ensure that attributes are serialized properly regardless of their order
96 *
97 * Revision 1.13 2002/07/09 20:25:25 warmerda
98 * expand tabs
99 *
100 * Revision 1.12 2002/05/28 18:54:05 warmerda
101 * added escaping/unescaping support
102 *
103 * Revision 1.11 2002/05/24 04:09:10 warmerda
104 * added clone and SetXMLValue functions
105 *
106 * Revision 1.10 2002/04/01 16:08:21 warmerda
107 * allow periods in tokens
108 *
109 * Revision 1.9 2002/03/07 22:19:20 warmerda
110 * don't do operations within CPLAssert(), in UnreadChar()
111 *
112 * Revision 1.8 2002/03/05 14:26:57 warmerda
113 * expanded tabs
114 *
115 * Revision 1.7 2002/01/23 20:45:05 warmerda
116 * handle <?...?> and comment elements
117 *
118 * Revision 1.6 2002/01/22 18:54:48 warmerda
119 * ensure text is property initialized when serializing
120 *
121 * Revision 1.5 2002/01/16 03:58:51 warmerda
122 * support single quotes as well as double quotes
123 *
124 * Revision 1.4 2001/12/06 18:13:49 warmerda
125 * added CPLAddXMLChild and CPLCreateElmentAndValue
126 *
127 * Revision 1.3 2001/11/16 21:20:16 warmerda
128 * fixed typo
129 *
130 * Revision 1.2 2001/11/16 20:29:58 warmerda
131 * fixed lost char in normal CString tokens
132 *
133 * Revision 1.1 2001/11/16 15:39:48 warmerda
134 * New
135 */
136
137 #include <ctype.h>
138 #include "cpl_minixml.h"
139 #include "cpl_error.h"
140 #include "cpl_conv.h"
141 #include "cpl_string.h"
142
143 typedef enum {
144 TNone,
145 TString,
146 TOpen,
147 TClose,
148 TEqual,
149 TToken,
150 TSlashClose,
151 TQuestionClose,
152 TComment,
153 TLiteral
154 } TokenType;
155
156 typedef struct {
157 const char *pszInput;
158 int nInputOffset;
159 int nInputLine;
160
161 int bInElement;
162 TokenType eTokenType;
163 char *pszToken;
164 int nTokenMaxSize;
165 int nTokenSize;
166
167 int nStackMaxSize;
168 int nStackSize;
169 CPLXMLNode **papsStack;
170
171 CPLXMLNode *psFirstNode;
172 } ParseContext;
173
174 /************************************************************************/
175 /* ReadChar() */
176 /************************************************************************/
177
ReadChar(ParseContext * psContext)178 static char ReadChar( ParseContext *psContext )
179
180 {
181 char chReturn;
182
183 chReturn = psContext->pszInput[psContext->nInputOffset++];
184
185 if( chReturn == '\0' )
186 psContext->nInputOffset--;
187 else if( chReturn == 10 )
188 psContext->nInputLine++;
189
190 return chReturn;
191 }
192
193 /************************************************************************/
194 /* UnreadChar() */
195 /************************************************************************/
196
UnreadChar(ParseContext * psContext,char chToUnread)197 static void UnreadChar( ParseContext *psContext, char chToUnread )
198
199 {
200 if( chToUnread == '\0' )
201 {
202 /* do nothing */
203 }
204 else
205 {
206 CPLAssert( chToUnread
207 == psContext->pszInput[psContext->nInputOffset-1] );
208
209 psContext->nInputOffset--;
210
211 if( chToUnread == 10 )
212 psContext->nInputLine--;
213 }
214 }
215
216 /************************************************************************/
217 /* AddToToken() */
218 /************************************************************************/
219
AddToToken(ParseContext * psContext,char chNewChar)220 static void AddToToken( ParseContext *psContext, char chNewChar )
221
222 {
223 if( psContext->pszToken == NULL )
224 {
225 psContext->nTokenMaxSize = 10;
226 psContext->pszToken = (char *) CPLMalloc(psContext->nTokenMaxSize);
227 }
228 else if( psContext->nTokenSize >= psContext->nTokenMaxSize - 2 )
229 {
230 psContext->nTokenMaxSize *= 2;
231 psContext->pszToken = (char *)
232 CPLRealloc(psContext->pszToken,psContext->nTokenMaxSize);
233 }
234
235 psContext->pszToken[psContext->nTokenSize++] = chNewChar;
236 psContext->pszToken[psContext->nTokenSize] = '\0';
237 }
238
239 /************************************************************************/
240 /* ReadToken() */
241 /************************************************************************/
242
ReadToken(ParseContext * psContext)243 static TokenType ReadToken( ParseContext *psContext )
244
245 {
246 char chNext;
247
248 psContext->nTokenSize = 0;
249 psContext->pszToken[0] = '\0';
250
251 chNext = ReadChar( psContext );
252 while( isspace(chNext) )
253 chNext = ReadChar( psContext );
254
255 /* -------------------------------------------------------------------- */
256 /* Handle comments. */
257 /* -------------------------------------------------------------------- */
258 if( chNext == '<'
259 && EQUALN(psContext->pszInput+psContext->nInputOffset,"!--",3) )
260 {
261 psContext->eTokenType = TComment;
262
263 // Skip "!--" characters
264 ReadChar(psContext);
265 ReadChar(psContext);
266 ReadChar(psContext);
267
268 while( !EQUALN(psContext->pszInput+psContext->nInputOffset,"-->",3)
269 && (chNext = ReadChar(psContext)) != '\0' )
270 AddToToken( psContext, chNext );
271
272 // Skip "-->" characters
273 ReadChar(psContext);
274 ReadChar(psContext);
275 ReadChar(psContext);
276 }
277 /* -------------------------------------------------------------------- */
278 /* Handle DOCTYPE or other literals. */
279 /* -------------------------------------------------------------------- */
280 else if( chNext == '<'
281 && EQUALN(psContext->pszInput+psContext->nInputOffset,"!DOCTYPE",8) )
282 {
283 int bInQuotes = FALSE;
284 psContext->eTokenType = TLiteral;
285
286 AddToToken( psContext, '<' );
287 do {
288 chNext = ReadChar(psContext);
289 if( chNext == '\0' )
290 {
291 CPLError( CE_Failure, CPLE_AppDefined,
292 "Parse error in DOCTYPE on or before line %d, "
293 "reached end of file without '>'.",
294 psContext->nInputLine );
295
296 break;
297 }
298
299 if( chNext == '\"' )
300 bInQuotes = !bInQuotes;
301
302 if( chNext == '>' && !bInQuotes )
303 {
304 AddToToken( psContext, '>' );
305 break;
306 }
307
308 AddToToken( psContext, chNext );
309 } while( TRUE );
310 }
311 /* -------------------------------------------------------------------- */
312 /* Simple single tokens of interest. */
313 /* -------------------------------------------------------------------- */
314 else if( chNext == '<' && !psContext->bInElement )
315 {
316 psContext->eTokenType = TOpen;
317 psContext->bInElement = TRUE;
318 }
319 else if( chNext == '>' && psContext->bInElement )
320 {
321 psContext->eTokenType = TClose;
322 psContext->bInElement = FALSE;
323 }
324 else if( chNext == '=' && psContext->bInElement )
325 {
326 psContext->eTokenType = TEqual;
327 }
328 else if( chNext == '\0' )
329 {
330 psContext->eTokenType = TNone;
331 }
332 /* -------------------------------------------------------------------- */
333 /* Handle the /> token terminator. */
334 /* -------------------------------------------------------------------- */
335 else if( chNext == '/' && psContext->bInElement
336 && psContext->pszInput[psContext->nInputOffset] == '>' )
337 {
338 chNext = ReadChar( psContext );
339 CPLAssert( chNext == '>' );
340
341 psContext->eTokenType = TSlashClose;
342 psContext->bInElement = FALSE;
343 }
344 /* -------------------------------------------------------------------- */
345 /* Handle the ?> token terminator. */
346 /* -------------------------------------------------------------------- */
347 else if( chNext == '?' && psContext->bInElement
348 && psContext->pszInput[psContext->nInputOffset] == '>' )
349 {
350 chNext = ReadChar( psContext );
351
352 CPLAssert( chNext == '>' );
353
354 psContext->eTokenType = TQuestionClose;
355 psContext->bInElement = FALSE;
356 }
357
358 /* -------------------------------------------------------------------- */
359 /* Collect a quoted string. */
360 /* -------------------------------------------------------------------- */
361 else if( psContext->bInElement && chNext == '"' )
362 {
363 psContext->eTokenType = TString;
364
365 while( (chNext = ReadChar(psContext)) != '"'
366 && chNext != '\0' )
367 AddToToken( psContext, chNext );
368
369 if( chNext != '"' )
370 {
371 psContext->eTokenType = TNone;
372 CPLError( CE_Failure, CPLE_AppDefined,
373 "Parse error on line %d, reached EOF before closing quote.",
374 psContext->nInputLine );
375 }
376
377 /* Do we need to unescape it? */
378 if( strchr(psContext->pszToken,'&') != NULL )
379 {
380 int nLength;
381 char *pszUnescaped = CPLUnescapeString( psContext->pszToken,
382 &nLength, CPLES_XML );
383 strcpy( psContext->pszToken, pszUnescaped );
384 CPLFree( pszUnescaped );
385 psContext->nTokenSize = strlen(psContext->pszToken );
386 }
387 }
388
389 else if( psContext->bInElement && chNext == '\'' )
390 {
391 psContext->eTokenType = TString;
392
393 while( (chNext = ReadChar(psContext)) != '\''
394 && chNext != '\0' )
395 AddToToken( psContext, chNext );
396
397 if( chNext != '\'' )
398 {
399 psContext->eTokenType = TNone;
400 CPLError( CE_Failure, CPLE_AppDefined,
401 "Parse error on line %d, reached EOF before closing quote.",
402 psContext->nInputLine );
403 }
404
405 /* Do we need to unescape it? */
406 if( strchr(psContext->pszToken,'&') != NULL )
407 {
408 int nLength;
409 char *pszUnescaped = CPLUnescapeString( psContext->pszToken,
410 &nLength, CPLES_XML );
411 strcpy( psContext->pszToken, pszUnescaped );
412 CPLFree( pszUnescaped );
413 psContext->nTokenSize = strlen(psContext->pszToken );
414 }
415 }
416
417 /* -------------------------------------------------------------------- */
418 /* Collect an unquoted string, terminated by a open angle */
419 /* bracket. */
420 /* -------------------------------------------------------------------- */
421 else if( !psContext->bInElement )
422 {
423 psContext->eTokenType = TString;
424
425 AddToToken( psContext, chNext );
426 while( (chNext = ReadChar(psContext)) != '<'
427 && chNext != '\0' )
428 AddToToken( psContext, chNext );
429 UnreadChar( psContext, chNext );
430
431 /* Do we need to unescape it? */
432 if( strchr(psContext->pszToken,'&') != NULL )
433 {
434 int nLength;
435 char *pszUnescaped = CPLUnescapeString( psContext->pszToken,
436 &nLength, CPLES_XML );
437 strcpy( psContext->pszToken, pszUnescaped );
438 CPLFree( pszUnescaped );
439 psContext->nTokenSize = strlen(psContext->pszToken );
440 }
441 }
442
443 /* -------------------------------------------------------------------- */
444 /* Collect a regular token terminated by white space, or */
445 /* special character(s) like an equal sign. */
446 /* -------------------------------------------------------------------- */
447 else
448 {
449 psContext->eTokenType = TToken;
450
451 /* add the first character to the token regardless of what it is */
452 AddToToken( psContext, chNext );
453
454 for( chNext = ReadChar(psContext);
455 (chNext >= 'A' && chNext <= 'Z')
456 || (chNext >= 'a' && chNext <= 'z')
457 || chNext == '-'
458 || chNext == '_'
459 || chNext == '.'
460 || chNext == ':'
461 || (chNext >= '0' && chNext <= '9');
462 chNext = ReadChar(psContext) )
463 {
464 AddToToken( psContext, chNext );
465 }
466
467 UnreadChar(psContext, chNext);
468 }
469
470 return psContext->eTokenType;
471 }
472
473 /************************************************************************/
474 /* PushNode() */
475 /************************************************************************/
476
PushNode(ParseContext * psContext,CPLXMLNode * psNode)477 static void PushNode( ParseContext *psContext, CPLXMLNode *psNode )
478
479 {
480 if( psContext->nStackMaxSize <= psContext->nStackSize )
481 {
482 psContext->nStackMaxSize += 10;
483 psContext->papsStack = (CPLXMLNode **)
484 CPLRealloc(psContext->papsStack,
485 sizeof(CPLXMLNode*) * psContext->nStackMaxSize);
486 }
487
488 psContext->papsStack[psContext->nStackSize++] = psNode;
489 }
490
491 /************************************************************************/
492 /* AttachNode() */
493 /* */
494 /* Attach the passed node as a child of the current node. */
495 /* Special handling exists for adding siblings to psFirst if */
496 /* there is nothing on the stack. */
497 /************************************************************************/
498
AttachNode(ParseContext * psContext,CPLXMLNode * psNode)499 static void AttachNode( ParseContext *psContext, CPLXMLNode *psNode )
500
501 {
502 if( psContext->psFirstNode == NULL )
503 psContext->psFirstNode = psNode;
504 else if( psContext->nStackSize == 0 )
505 {
506 CPLXMLNode *psSibling;
507
508 psSibling = psContext->psFirstNode;
509 while( psSibling->psNext != NULL )
510 psSibling = psSibling->psNext;
511 psSibling->psNext = psNode;
512 }
513 else if( psContext->papsStack[psContext->nStackSize-1]->psChild == NULL )
514 {
515 psContext->papsStack[psContext->nStackSize-1]->psChild = psNode;
516 }
517 else
518 {
519 CPLXMLNode *psSibling;
520
521 psSibling = psContext->papsStack[psContext->nStackSize-1]->psChild;
522 while( psSibling->psNext != NULL )
523 psSibling = psSibling->psNext;
524 psSibling->psNext = psNode;
525 }
526 }
527
528 /************************************************************************/
529 /* CPLParseXMLString() */
530 /************************************************************************/
531
532 /**
533 * Parse an XML string into tree form.
534 *
535 * The passed document is parsed into a CPLXMLNode tree representation.
536 * If the document is not well formed XML then NULL is returned, and errors
537 * are reported via CPLError(). No validation beyond wellformedness is
538 * done. The CPLParseXMLFile() convenience function can be used to parse
539 * from a file.
540 *
541 * The returned document tree is is owned by the caller and should be freed
542 * with CPLDestroyXMLNode() when no longer needed.
543 *
544 * If the document has more than one "root level" element then those after the
545 * first will be attached to the first as siblings (via the psNext pointers)
546 * even though there is no common parent. A document with no XML structure
547 * (no angle brackets for instance) would be considered well formed, and
548 * returned as a single CXT_Text node.
549 *
550 * @param pszString the document to parse.
551 *
552 * @return parsed tree or NULL on error.
553 */
554
CPLParseXMLString(const char * pszString)555 CPLXMLNode *CPLParseXMLString( const char *pszString )
556
557 {
558 ParseContext sContext;
559
560 CPLErrorReset();
561
562 if( pszString == NULL )
563 {
564 CPLError( CE_Failure, CPLE_AppDefined,
565 "CPLParseXMLString() called with NULL pointer." );
566 return NULL;
567 }
568
569 /* -------------------------------------------------------------------- */
570 /* Initialize parse context. */
571 /* -------------------------------------------------------------------- */
572 sContext.pszInput = pszString;
573 sContext.nInputOffset = 0;
574 sContext.nInputLine = 0;
575 sContext.bInElement = FALSE;
576 sContext.pszToken = NULL;
577 sContext.nTokenMaxSize = 0;
578 sContext.nTokenSize = 0;
579 sContext.eTokenType = TNone;
580 sContext.nStackMaxSize = 0;
581 sContext.nStackSize = 0;
582 sContext.papsStack = NULL;
583 sContext.psFirstNode = NULL;
584
585 /* ensure token is initialized */
586 AddToToken( &sContext, ' ' );
587
588 /* ==================================================================== */
589 /* Loop reading tokens. */
590 /* ==================================================================== */
591 while( ReadToken( &sContext ) != TNone )
592 {
593 /* -------------------------------------------------------------------- */
594 /* Create a new element. */
595 /* -------------------------------------------------------------------- */
596 if( sContext.eTokenType == TOpen )
597 {
598 CPLXMLNode *psElement;
599
600 if( ReadToken(&sContext) != TToken )
601 {
602 CPLError( CE_Failure, CPLE_AppDefined,
603 "Line %d: Didn't find element token after open angle bracket.",
604 sContext.nInputLine );
605 break;
606 }
607
608 if( sContext.pszToken[0] != '/' )
609 {
610 psElement = CPLCreateXMLNode( NULL, CXT_Element,
611 sContext.pszToken );
612 AttachNode( &sContext, psElement );
613 PushNode( &sContext, psElement );
614 }
615 else
616 {
617 if( sContext.nStackSize == 0
618 || !EQUAL(sContext.pszToken+1,
619 sContext.papsStack[sContext.nStackSize-1]->pszValue) )
620 {
621 CPLError( CE_Failure, CPLE_AppDefined,
622 "Line %d: <%.500s> doesn't have matching <%.500s>.",
623 sContext.nInputLine,
624 sContext.pszToken, sContext.pszToken+1 );
625 break;
626 }
627 else
628 {
629 if( ReadToken(&sContext) != TClose )
630 {
631 CPLError( CE_Failure, CPLE_AppDefined,
632 "Line %d: Missing close angle bracket after <%.500s.",
633 sContext.nInputLine,
634 sContext.pszToken );
635 break;
636 }
637
638 /* pop element off stack */
639 sContext.nStackSize--;
640 }
641 }
642 }
643
644 /* -------------------------------------------------------------------- */
645 /* Add an attribute to a token. */
646 /* -------------------------------------------------------------------- */
647 else if( sContext.eTokenType == TToken )
648 {
649 CPLXMLNode *psAttr;
650
651 psAttr = CPLCreateXMLNode(NULL, CXT_Attribute, sContext.pszToken);
652 AttachNode( &sContext, psAttr );
653
654 if( ReadToken(&sContext) != TEqual )
655 {
656 CPLError( CE_Failure, CPLE_AppDefined,
657 "Line %d: Didn't find expected '=' for value of attribute '%.500s'.",
658 sContext.nInputLine, psAttr->pszValue );
659 break;
660 }
661
662 if( ReadToken(&sContext) != TString
663 && sContext.eTokenType != TToken )
664 {
665 CPLError( CE_Failure, CPLE_AppDefined,
666 "Line %d: Didn't find expected attribute value.",
667 sContext.nInputLine );
668 break;
669 }
670
671 CPLCreateXMLNode( psAttr, CXT_Text, sContext.pszToken );
672 }
673
674 /* -------------------------------------------------------------------- */
675 /* Close the start section of an element. */
676 /* -------------------------------------------------------------------- */
677 else if( sContext.eTokenType == TClose )
678 {
679 if( sContext.nStackSize == 0 )
680 {
681 CPLError( CE_Failure, CPLE_AppDefined,
682 "Line %d: Found unbalanced '>'.",
683 sContext.nInputLine );
684 break;
685 }
686 }
687
688 /* -------------------------------------------------------------------- */
689 /* Close the start section of an element, and pop it */
690 /* immediately. */
691 /* -------------------------------------------------------------------- */
692 else if( sContext.eTokenType == TSlashClose )
693 {
694 if( sContext.nStackSize == 0 )
695 {
696 CPLError( CE_Failure, CPLE_AppDefined,
697 "Line %d: Found unbalanced '/>'.",
698 sContext.nInputLine );
699 break;
700 }
701
702 sContext.nStackSize--;
703 }
704
705 /* -------------------------------------------------------------------- */
706 /* Close the start section of a <?...?> element, and pop it */
707 /* immediately. */
708 /* -------------------------------------------------------------------- */
709 else if( sContext.eTokenType == TQuestionClose )
710 {
711 if( sContext.nStackSize == 0 )
712 {
713 CPLError( CE_Failure, CPLE_AppDefined,
714 "Line %d: Found unbalanced '?>'.",
715 sContext.nInputLine );
716 break;
717 }
718 else if( sContext.papsStack[sContext.nStackSize-1]->pszValue[0] != '?' )
719 {
720 CPLError( CE_Failure, CPLE_AppDefined,
721 "Line %d: Found '?>' without matching '<?'.",
722 sContext.nInputLine );
723 break;
724 }
725
726 sContext.nStackSize--;
727 }
728
729 /* -------------------------------------------------------------------- */
730 /* Handle comments. They are returned as a whole token with the */
731 /* prefix and postfix omitted. No processing of white space */
732 /* will be done. */
733 /* -------------------------------------------------------------------- */
734 else if( sContext.eTokenType == TComment )
735 {
736 CPLXMLNode *psValue;
737
738 psValue = CPLCreateXMLNode(NULL, CXT_Comment, sContext.pszToken);
739 AttachNode( &sContext, psValue );
740 }
741
742 /* -------------------------------------------------------------------- */
743 /* Handle literals. They are returned without processing. */
744 /* -------------------------------------------------------------------- */
745 else if( sContext.eTokenType == TLiteral )
746 {
747 CPLXMLNode *psValue;
748
749 psValue = CPLCreateXMLNode(NULL, CXT_Literal, sContext.pszToken);
750 AttachNode( &sContext, psValue );
751 }
752
753 /* -------------------------------------------------------------------- */
754 /* Add a text value node as a child of the current element. */
755 /* -------------------------------------------------------------------- */
756 else if( sContext.eTokenType == TString && !sContext.bInElement )
757 {
758 CPLXMLNode *psValue;
759
760 psValue = CPLCreateXMLNode(NULL, CXT_Text, sContext.pszToken);
761 AttachNode( &sContext, psValue );
762 }
763 /* -------------------------------------------------------------------- */
764 /* Anything else is an error. */
765 /* -------------------------------------------------------------------- */
766 else
767 {
768 CPLError( CE_Failure, CPLE_AppDefined,
769 "Parse error at line %d, unexpected token:%.500s\n",
770 sContext.nInputLine, sContext.pszToken );
771 break;
772 }
773 }
774
775 /* -------------------------------------------------------------------- */
776 /* Did we pop all the way out of our stack? */
777 /* -------------------------------------------------------------------- */
778 if( CPLGetLastErrorType() == CE_None && sContext.nStackSize != 0 )
779 {
780 CPLError( CE_Failure, CPLE_AppDefined,
781 "Parse error at EOF, not all elements have been closed,\n"
782 "starting with %.500s\n",
783 sContext.papsStack[sContext.nStackSize-1]->pszValue );
784 }
785
786 /* -------------------------------------------------------------------- */
787 /* Cleanup */
788 /* -------------------------------------------------------------------- */
789 CPLFree( sContext.pszToken );
790 if( sContext.papsStack != NULL )
791 CPLFree( sContext.papsStack );
792
793 if( CPLGetLastErrorType() != CE_None )
794 {
795 CPLDestroyXMLNode( sContext.psFirstNode );
796 sContext.psFirstNode = NULL;
797 }
798
799 return sContext.psFirstNode;
800 }
801
802 /************************************************************************/
803 /* _GrowBuffer() */
804 /************************************************************************/
805
_GrowBuffer(unsigned int nNeeded,char ** ppszText,unsigned int * pnMaxLength)806 static void _GrowBuffer( unsigned int nNeeded,
807 char **ppszText, unsigned int *pnMaxLength )
808
809 {
810 if( nNeeded+1 >= *pnMaxLength )
811 {
812 *pnMaxLength = MAX(*pnMaxLength * 2,nNeeded+1);
813 *ppszText = (char *) CPLRealloc(*ppszText, *pnMaxLength);
814 }
815 }
816
817 /************************************************************************/
818 /* CPLSerializeXMLNode() */
819 /************************************************************************/
820
821 static void
CPLSerializeXMLNode(CPLXMLNode * psNode,int nIndent,char ** ppszText,unsigned int * pnLength,unsigned int * pnMaxLength)822 CPLSerializeXMLNode( CPLXMLNode *psNode, int nIndent,
823 char **ppszText, unsigned int *pnLength,
824 unsigned int *pnMaxLength )
825
826 {
827 if( psNode == NULL )
828 return;
829
830 /* -------------------------------------------------------------------- */
831 /* Ensure the buffer is plenty large to hold this additional */
832 /* string. */
833 /* -------------------------------------------------------------------- */
834 *pnLength += strlen(*ppszText + *pnLength);
835 _GrowBuffer( strlen(psNode->pszValue) + *pnLength + 40 + nIndent,
836 ppszText, pnMaxLength );
837
838 /* -------------------------------------------------------------------- */
839 /* Text is just directly emitted. */
840 /* -------------------------------------------------------------------- */
841 if( psNode->eType == CXT_Text )
842 {
843 char *pszEscaped = CPLEscapeString( psNode->pszValue, -1, CPLES_XML );
844
845 CPLAssert( psNode->psChild == NULL );
846
847 /* Escaped text might be bigger than expected. */
848 _GrowBuffer( strlen(pszEscaped) + *pnLength,
849 ppszText, pnMaxLength );
850 strcat( *ppszText + *pnLength, pszEscaped );
851
852 CPLFree( pszEscaped );
853 }
854
855 /* -------------------------------------------------------------------- */
856 /* Attributes require a little formatting. */
857 /* -------------------------------------------------------------------- */
858 else if( psNode->eType == CXT_Attribute )
859 {
860 CPLAssert( psNode->psChild != NULL
861 && psNode->psChild->eType == CXT_Text );
862
863 sprintf( *ppszText + *pnLength, " %s=\"", psNode->pszValue );
864 CPLSerializeXMLNode( psNode->psChild, 0, ppszText,
865 pnLength, pnMaxLength );
866 strcat( *ppszText + *pnLength, "\"" );
867 }
868
869 /* -------------------------------------------------------------------- */
870 /* Handle comment output. */
871 /* -------------------------------------------------------------------- */
872 else if( psNode->eType == CXT_Comment )
873 {
874 int i;
875
876 CPLAssert( psNode->psChild == NULL );
877
878 for( i = 0; i < nIndent; i++ )
879 (*ppszText)[(*pnLength)++] = ' ';
880
881 sprintf( *ppszText + *pnLength, "<!--%s-->\n",
882 psNode->pszValue );
883 }
884
885 /* -------------------------------------------------------------------- */
886 /* Handle literal output (like <!DOCTYPE...>) */
887 /* -------------------------------------------------------------------- */
888 else if( psNode->eType == CXT_Literal )
889 {
890 int i;
891
892 CPLAssert( psNode->psChild == NULL );
893
894 for( i = 0; i < nIndent; i++ )
895 (*ppszText)[(*pnLength)++] = ' ';
896
897 strcpy( *ppszText + *pnLength, psNode->pszValue );
898 strcat( *ppszText + *pnLength, "\n" );
899 }
900
901 /* -------------------------------------------------------------------- */
902 /* Elements actually have to deal with general children, and */
903 /* various formatting issues. */
904 /* -------------------------------------------------------------------- */
905 else if( psNode->eType == CXT_Element )
906 {
907 int bHasNonAttributeChildren = FALSE;
908 CPLXMLNode *psChild;
909
910 if(nIndent)
911 memset( *ppszText + *pnLength, ' ', nIndent );
912 *pnLength += nIndent;
913 (*ppszText)[*pnLength] = '\0';
914
915 sprintf( *ppszText + *pnLength, "<%s", psNode->pszValue );
916
917 /* Serialize *all* the attribute children, regardless of order */
918 for( psChild = psNode->psChild;
919 psChild != NULL;
920 psChild = psChild->psNext )
921 {
922 if( psChild->eType == CXT_Attribute )
923 CPLSerializeXMLNode( psChild, 0, ppszText, pnLength,
924 pnMaxLength );
925 else
926 bHasNonAttributeChildren = TRUE;
927 }
928
929 if( !bHasNonAttributeChildren )
930 {
931 if( psNode->pszValue[0] == '?' )
932 strcat( *ppszText + *pnLength, "?>\n" );
933 else
934 strcat( *ppszText + *pnLength, "/>\n" );
935 }
936 else
937 {
938 int bJustText = TRUE;
939
940 strcat( *ppszText + *pnLength, ">" );
941
942 for( psChild = psNode->psChild;
943 psChild != NULL;
944 psChild = psChild->psNext )
945 {
946 if( psChild->eType == CXT_Attribute )
947 continue;
948
949 if( psChild->eType != CXT_Text && bJustText )
950 {
951 bJustText = FALSE;
952 strcat( *ppszText + *pnLength, "\n" );
953 }
954
955 CPLSerializeXMLNode( psChild, nIndent + 2, ppszText, pnLength,
956 pnMaxLength );
957 }
958
959 *pnLength += strlen(*ppszText + *pnLength);
960 _GrowBuffer( strlen(psNode->pszValue) + *pnLength + 40 + nIndent,
961 ppszText, pnMaxLength );
962
963 if( !bJustText )
964 {
965 if(nIndent)
966 memset( *ppszText + *pnLength, ' ', nIndent );
967 *pnLength += nIndent;
968 (*ppszText)[*pnLength] = '\0';
969 }
970
971 *pnLength += strlen(*ppszText + *pnLength);
972 sprintf( *ppszText + *pnLength, "</%s>\n", psNode->pszValue );
973 }
974 }
975 }
976
977 /************************************************************************/
978 /* CPLSerializeXMLTree() */
979 /************************************************************************/
980
981 /**
982 * Convert tree into string document.
983 *
984 * This function converts a CPLXMLNode tree representation of a document
985 * into a flat string representation. White space indentation is used
986 * visually preserve the tree structure of the document. The returned
987 * document becomes owned by the caller and should be freed with CPLFree()
988 * when no longer needed.
989 *
990 * @param psNode
991 *
992 * @return the document on success or NULL on failure.
993 */
994
CPLSerializeXMLTree(CPLXMLNode * psNode)995 char *CPLSerializeXMLTree( CPLXMLNode *psNode )
996
997 {
998 unsigned int nMaxLength = 100, nLength = 0;
999 char *pszText = NULL;
1000 CPLXMLNode *psThis;
1001
1002 pszText = (char *) CPLMalloc(nMaxLength);
1003 pszText[0] = '\0';
1004
1005 for( psThis = psNode; psThis != NULL; psThis = psThis->psNext )
1006 CPLSerializeXMLNode( psThis, 0, &pszText, &nLength, &nMaxLength );
1007
1008 return pszText;
1009 }
1010
1011 /************************************************************************/
1012 /* CPLCreateXMLNode() */
1013 /************************************************************************/
1014
1015 /**
1016 * Create an document tree item.
1017 *
1018 * Create a single CPLXMLNode object with the desired value and type, and
1019 * attach it as a child of the indicated parent.
1020 *
1021 * @param poParent the parent to which this node should be attached as a
1022 * child. May be NULL to keep as free standing.
1023 *
1024 * @return the newly created node, now owned by the caller (or parent node).
1025 */
1026
CPLCreateXMLNode(CPLXMLNode * poParent,CPLXMLNodeType eType,const char * pszText)1027 CPLXMLNode *CPLCreateXMLNode( CPLXMLNode *poParent, CPLXMLNodeType eType,
1028 const char *pszText )
1029
1030 {
1031 CPLXMLNode *psNode;
1032
1033 /* -------------------------------------------------------------------- */
1034 /* Create new node. */
1035 /* -------------------------------------------------------------------- */
1036 psNode = (CPLXMLNode *) CPLCalloc(sizeof(CPLXMLNode),1);
1037
1038 psNode->eType = eType;
1039 psNode->pszValue = CPLStrdup( pszText );
1040
1041 /* -------------------------------------------------------------------- */
1042 /* Attach to parent, if provided. */
1043 /* -------------------------------------------------------------------- */
1044 if( poParent != NULL )
1045 {
1046 if( poParent->psChild == NULL )
1047 poParent->psChild = psNode;
1048 else
1049 {
1050 CPLXMLNode *psLink = poParent->psChild;
1051
1052 while( psLink->psNext != NULL )
1053 psLink = psLink->psNext;
1054
1055 psLink->psNext = psNode;
1056 }
1057 }
1058
1059 return psNode;
1060 }
1061
1062 /************************************************************************/
1063 /* CPLDestroyXMLNode() */
1064 /************************************************************************/
1065
1066 /**
1067 * Destroy a tree.
1068 *
1069 * This function frees resources associated with a CPLXMLNode and all its
1070 * children nodes.
1071 *
1072 * @param psNode the tree to free.
1073 */
1074
CPLDestroyXMLNode(CPLXMLNode * psNode)1075 void CPLDestroyXMLNode( CPLXMLNode *psNode )
1076
1077 {
1078 if( psNode->psChild != NULL )
1079 CPLDestroyXMLNode( psNode->psChild );
1080
1081 if( psNode->psNext != NULL )
1082 CPLDestroyXMLNode( psNode->psNext );
1083
1084 CPLFree( psNode->pszValue );
1085 CPLFree( psNode );
1086 }
1087
1088 /************************************************************************/
1089 /* CPLGetXMLNode() */
1090 /************************************************************************/
1091
1092 /**
1093 * Find node by path.
1094 *
1095 * Searches the document or subdocument indicated by psRoot for an element
1096 * (or attribute) with the given path. The path should consist of a set of
1097 * element names separated by dots, not including the name of the root
1098 * element (psRoot). If the requested element is not found NULL is returned.
1099 *
1100 * Attribute names may only appear as the last item in the path.
1101 *
1102 * The search is done from the root nodes children, but all intermediate
1103 * nodes in the path must be specified. Seaching for "name" would only find
1104 * a name element or attribute if it is a direct child of the root, not at any
1105 * level in the subdocument.
1106 *
1107 * If the pszPath is prefixed by "=" then the search will begin with the
1108 * root node, and it's siblings, instead of the root nodes children. This
1109 * is particularly useful when searching within a whole document which is
1110 * often prefixed by one or more "junk" nodes like the <?xml> declaration.
1111 *
1112 * @param psRoot the subtree in which to search. This should be a node of
1113 * type CXT_Element. NULL is safe.
1114 *
1115 * @param pszPath the list of element names in the path (dot separated).
1116 *
1117 * @return the requested element node, or NULL if not found.
1118 */
1119
CPLGetXMLNode(CPLXMLNode * psRoot,const char * pszPath)1120 CPLXMLNode *CPLGetXMLNode( CPLXMLNode *psRoot, const char *pszPath )
1121
1122 {
1123 char **papszTokens;
1124 int iToken = 0;
1125 int bSideSearch = FALSE;
1126
1127 if( psRoot == NULL )
1128 return NULL;
1129
1130 if( *pszPath == '=' )
1131 {
1132 bSideSearch = TRUE;
1133 pszPath++;
1134 }
1135
1136 papszTokens = CSLTokenizeStringComplex( pszPath, ".", FALSE, FALSE );
1137
1138 while( papszTokens[iToken] != NULL && psRoot != NULL )
1139 {
1140 CPLXMLNode *psChild;
1141
1142 if( bSideSearch )
1143 {
1144 psChild = psRoot;
1145 bSideSearch = FALSE;
1146 }
1147 else
1148 psChild = psRoot->psChild;
1149
1150 for( ; psChild != NULL; psChild = psChild->psNext )
1151 {
1152 if( psChild->eType != CXT_Text
1153 && EQUAL(papszTokens[iToken],psChild->pszValue) )
1154 break;
1155 }
1156
1157 if( psChild == NULL )
1158 {
1159 psRoot = NULL;
1160 break;
1161 }
1162
1163 psRoot = psChild;
1164 iToken++;
1165 }
1166
1167 CSLDestroy( papszTokens );
1168 return psRoot;
1169 }
1170
1171 /************************************************************************/
1172 /* CPLGetXMLValue() */
1173 /************************************************************************/
1174
1175 /**
1176 * Fetch element/attribute value.
1177 *
1178 * Searches the document for the element/attribute value associated with
1179 * the path. The corresponding node is internally found with CPLGetXMLNode()
1180 * (see there for details on path handling). Once found, the value is
1181 * considered to be the first CXT_Text child of the node.
1182 *
1183 * If the attribute/element search fails, or if the found node has not
1184 * value then the passed default value is returned.
1185 *
1186 * The returned value points to memory within the document tree, and should
1187 * not be altered or freed.
1188 *
1189 * @param psRoot the subtree in which to search. This should be a node of
1190 * type CXT_Element. NULL is safe.
1191 *
1192 * @param pszPath the list of element names in the path (dot separated).
1193 *
1194 * @param pszDefault the value to return if a corresponding value is not
1195 * found, may be NULL.
1196 *
1197 * @return the requested value or pszDefault if not found.
1198 */
1199
CPLGetXMLValue(CPLXMLNode * poRoot,const char * pszPath,const char * pszDefault)1200 const char *CPLGetXMLValue( CPLXMLNode *poRoot, const char *pszPath,
1201 const char *pszDefault )
1202
1203 {
1204 CPLXMLNode *psTarget;
1205
1206 psTarget = CPLGetXMLNode( poRoot, pszPath );
1207 if( psTarget == NULL )
1208 return pszDefault;
1209
1210 if( psTarget->eType == CXT_Attribute )
1211 {
1212 CPLAssert( psTarget->psChild != NULL
1213 && psTarget->psChild->eType == CXT_Text );
1214
1215 return psTarget->psChild->pszValue;
1216 }
1217
1218 if( psTarget->eType == CXT_Element )
1219 {
1220 // Find first non-attribute child, and verify it is a single text
1221 // with no siblings
1222
1223 psTarget = psTarget->psChild;
1224
1225 while( psTarget != NULL && psTarget->eType == CXT_Attribute )
1226 psTarget = psTarget->psNext;
1227
1228 if( psTarget != NULL
1229 && psTarget->eType == CXT_Text
1230 && psTarget->psNext == NULL )
1231 return psTarget->pszValue;
1232 }
1233
1234 return pszDefault;
1235 }
1236
1237 /************************************************************************/
1238 /* CPLAddXMLChild() */
1239 /************************************************************************/
1240
1241 /**
1242 * Add child node to parent.
1243 *
1244 * The passed child is added to the list of children of the indicated
1245 * parent. Normally the child is added at the end of the parents child
1246 * list, but attributes (CXT_Attribute) will be inserted after any other
1247 * attributes but before any other element type. Ownership of the child
1248 * node is effectively assumed by the parent node. If the child has
1249 * siblings (it's psNext is not NULL) they will be trimmed, but if the child
1250 * has children they are carried with it.
1251 *
1252 * @param psParent the node to attach the child to. May not be NULL.
1253 *
1254 * @param psChild the child to add to the parent. May not be NULL. Should
1255 * not be a child of any other parent.
1256 */
1257
CPLAddXMLChild(CPLXMLNode * psParent,CPLXMLNode * psChild)1258 void CPLAddXMLChild( CPLXMLNode *psParent, CPLXMLNode *psChild )
1259
1260 {
1261 CPLXMLNode *psSib;
1262
1263 CPLAssert( psChild->psNext == NULL );
1264 psChild->psNext = NULL;
1265
1266 if( psParent->psChild == NULL )
1267 {
1268 psParent->psChild = psChild;
1269 return;
1270 }
1271
1272 // Insert at head of list if first child is not attribute.
1273 if( psChild->eType == CXT_Attribute
1274 && psParent->psChild->eType != CXT_Attribute )
1275 {
1276 psChild->psNext = psParent->psChild;
1277 psParent->psChild = psChild;
1278 return;
1279 }
1280
1281 // Search for end of list.
1282 for( psSib = psParent->psChild;
1283 psSib->psNext != NULL;
1284 psSib = psSib->psNext )
1285 {
1286 // Insert attributes if the next node is not an attribute.
1287 if( psChild->eType == CXT_Attribute
1288 && psSib->psNext != NULL
1289 && psSib->psNext->eType != CXT_Attribute )
1290 {
1291 psChild->psNext = psSib->psNext;
1292 psSib->psNext = psChild;
1293 return;
1294 }
1295 }
1296
1297 psSib->psNext = psChild;
1298 }
1299
1300 /************************************************************************/
1301 /* CPLAddXMLSibling() */
1302 /************************************************************************/
1303
1304 /**
1305 * Add new sibling.
1306 *
1307 * The passed psNewSibling is added to the end of siblings of the
1308 * psOlderSibling node. That is, it is added to the end of the psNext
1309 * chain. There is no special handling if psNewSibling is an attribute.
1310 * If this is required, use CPLAddXMLChild().
1311 *
1312 * @param psOlderSibling the node to attach the sibling after.
1313 *
1314 * @param psNewSibling the node to add at the end of psOlderSiblings psNext
1315 * chain.
1316 */
1317
CPLAddXMLSibling(CPLXMLNode * psOlderSibling,CPLXMLNode * psNewSibling)1318 void CPLAddXMLSibling( CPLXMLNode *psOlderSibling, CPLXMLNode *psNewSibling )
1319
1320 {
1321 if( psOlderSibling == NULL )
1322 return;
1323
1324 while( psOlderSibling->psNext != NULL )
1325 psOlderSibling = psOlderSibling->psNext;
1326
1327 psOlderSibling->psNext = psNewSibling;
1328 }
1329
1330 /************************************************************************/
1331 /* CPLCreateXMLElementAndValue() */
1332 /************************************************************************/
1333
1334 /**
1335 * Create an element and text value.
1336 *
1337 * This is function is a convenient short form for:
1338 *
1339 * return CPLCreateXMLNode(
1340 * CPLCreateXMLNode( psParent, CXT_Element, pszName ),
1341 * CXT_Text, pszValue );
1342 *
1343 * It creates a CXT_Element node, with a CXT_Text child, and
1344 * attaches the element to the passed parent.
1345 *
1346 * @param psParent the parent node to which the resulting node should
1347 * be attached. May be NULL to keep as freestanding.
1348 *
1349 * @param pszName the element name to create.
1350 * @param pszValue the text to attach to the element. Must not be NULL.
1351 *
1352 * @return the pointer to the new element node.
1353 */
1354
CPLCreateXMLElementAndValue(CPLXMLNode * psParent,const char * pszName,const char * pszValue)1355 CPLXMLNode *CPLCreateXMLElementAndValue( CPLXMLNode *psParent,
1356 const char *pszName,
1357 const char *pszValue )
1358
1359 {
1360 return CPLCreateXMLNode(
1361 CPLCreateXMLNode( psParent, CXT_Element, pszName ),
1362 CXT_Text, pszValue );
1363 }
1364
1365 /************************************************************************/
1366 /* CPLCloneXMLTree() */
1367 /************************************************************************/
1368
1369 /**
1370 * Copy tree.
1371 *
1372 * Creates a deep copy of a CPLXMLNode tree.
1373 *
1374 * @param psTree the tree to duplicate.
1375 *
1376 * @return a copy of the whole tree.
1377 */
1378
CPLCloneXMLTree(CPLXMLNode * psTree)1379 CPLXMLNode *CPLCloneXMLTree( CPLXMLNode *psTree )
1380
1381 {
1382 CPLXMLNode *psPrevious = NULL;
1383 CPLXMLNode *psReturn = NULL;
1384
1385 while( psTree != NULL )
1386 {
1387 CPLXMLNode *psCopy;
1388
1389 psCopy = CPLCreateXMLNode( NULL, psTree->eType, psTree->pszValue );
1390 if( psReturn == NULL )
1391 psReturn = psCopy;
1392 if( psPrevious != NULL )
1393 psPrevious->psNext = psCopy;
1394
1395 if( psTree->psChild != NULL )
1396 psCopy->psChild = CPLCloneXMLTree( psTree->psChild );
1397
1398 psPrevious = psCopy;
1399 psTree = psTree->psNext;
1400 }
1401
1402 return psReturn;
1403 }
1404
1405 /************************************************************************/
1406 /* CPLSetXMLValue() */
1407 /************************************************************************/
1408
1409 /**
1410 * Set element value by path.
1411 *
1412 * Find (or create) the target element or attribute specified in the
1413 * path, and assign it the indicated value.
1414 *
1415 * Any path elements that do not already exist will be created. The target
1416 * nodes value (the first CXT_Text child) will be replaced with the provided
1417 * value.
1418 *
1419 * If the target node is an attribute instead of an element, the last separator
1420 * should be a "#" instead of the normal period path separator.
1421 *
1422 * Example:
1423 * CPLSetXMLValue( "Citation.Id.Description", "DOQ dataset" );
1424 * CPLSetXMLValue( "Citation.Id.Description#name", "doq" );
1425 *
1426 * @param psRoot the subdocument to be updated.
1427 *
1428 * @param pszPath the dot seperated path to the target element/attribute.
1429 *
1430 * @param pszValue the text value to assign.
1431 *
1432 * @return TRUE on success.
1433 */
1434
CPLSetXMLValue(CPLXMLNode * psRoot,const char * pszPath,const char * pszValue)1435 int CPLSetXMLValue( CPLXMLNode *psRoot, const char *pszPath,
1436 const char *pszValue )
1437
1438 {
1439 if( psRoot == NULL )
1440 return FALSE;
1441
1442 char **papszTokens;
1443 int iToken = 0;
1444
1445 papszTokens = CSLTokenizeStringComplex( pszPath, ".", FALSE, FALSE );
1446
1447 while( papszTokens[iToken] != NULL && psRoot != NULL )
1448 {
1449 CPLXMLNode *psChild;
1450 int bIsAttribute = FALSE;
1451 const char *pszName = papszTokens[iToken];
1452
1453 if( pszName[0] == '#' )
1454 {
1455 bIsAttribute = TRUE;
1456 pszName++;
1457 }
1458
1459 if( psRoot->eType != CXT_Element )
1460 return FALSE;
1461
1462 for( psChild = psRoot->psChild; psChild != NULL;
1463 psChild = psChild->psNext )
1464 {
1465 if( psChild->eType != CXT_Text
1466 && EQUAL(pszName,psChild->pszValue) )
1467 break;
1468 }
1469
1470 if( psChild == NULL )
1471 {
1472 if( bIsAttribute )
1473 psChild = CPLCreateXMLNode( psRoot, CXT_Attribute, pszName );
1474 else
1475 psChild = CPLCreateXMLNode( psRoot, CXT_Element, pszName );
1476 }
1477
1478 psRoot = psChild;
1479 iToken++;
1480 }
1481
1482 CSLDestroy( papszTokens );
1483
1484 /* -------------------------------------------------------------------- */
1485 /* Now set a value node under this node. */
1486 /* -------------------------------------------------------------------- */
1487 if( psRoot ){
1488 if( psRoot->psChild == NULL )
1489 CPLCreateXMLNode( psRoot, CXT_Text, pszValue );
1490 else if( psRoot->psChild->eType != CXT_Text )
1491 return FALSE;
1492 else
1493 {
1494 CPLFree( psRoot->psChild->pszValue );
1495 psRoot->psChild->pszValue = CPLStrdup( pszValue );
1496 }
1497
1498 return TRUE;
1499 }
1500 else {
1501 return FALSE;
1502 }
1503 }
1504
1505 /************************************************************************/
1506 /* CPLStripXMLNamespace() */
1507 /************************************************************************/
1508
1509 /**
1510 * Strip indicated namespaces.
1511 *
1512 * The subdocument (psRoot) is recursively examined, and any elements
1513 * with the indicated namespace prefix will have the namespace prefix
1514 * stripped from the element names. If the passed namespace is NULL, then
1515 * all namespace prefixes will be stripped.
1516 *
1517 * Nodes other than elements should remain unaffected. The changes are
1518 * made "in place", and should not alter any node locations, only the
1519 * pszValue field of affected nodes.
1520 *
1521 * @param psRoot the document to operate on.
1522 * @param pszNamespace the name space prefix (not including colon), or NULL.
1523 * @param bRecurse TRUE to recurse over whole document, or FALSE to only
1524 * operate on the passed node.
1525 */
1526
CPLStripXMLNamespace(CPLXMLNode * psRoot,const char * pszNamespace,int bRecurse)1527 void CPLStripXMLNamespace( CPLXMLNode *psRoot,
1528 const char *pszNamespace,
1529 int bRecurse )
1530
1531 {
1532 if( psRoot == NULL )
1533 return;
1534
1535 if( pszNamespace != NULL )
1536 {
1537 if( psRoot->eType == CXT_Element
1538 && EQUALN(pszNamespace,psRoot->pszValue,strlen(pszNamespace))
1539 && psRoot->pszValue[strlen(pszNamespace)] == ':' )
1540 {
1541 char *pszNewValue =
1542 CPLStrdup(psRoot->pszValue+strlen(pszNamespace)+1);
1543
1544 CPLFree( psRoot->pszValue );
1545 psRoot->pszValue = pszNewValue;
1546 }
1547 }
1548 else
1549 {
1550 const char *pszCheck;
1551
1552 for( pszCheck = psRoot->pszValue; *pszCheck != '\0'; pszCheck++ )
1553 {
1554 if( *pszCheck == ':' )
1555 {
1556 char *pszNewValue = CPLStrdup( pszCheck+1 );
1557
1558 CPLFree( psRoot->pszValue );
1559 psRoot->pszValue = pszNewValue;
1560 break;
1561 }
1562 }
1563 }
1564
1565 if( bRecurse )
1566 {
1567 if( psRoot->psChild != NULL )
1568 CPLStripXMLNamespace( psRoot->psChild, pszNamespace, 1 );
1569 if( psRoot->psNext != NULL )
1570 CPLStripXMLNamespace( psRoot->psNext, pszNamespace, 1 );
1571 }
1572 }
1573
1574 /************************************************************************/
1575 /* CPLParseXMLFile() */
1576 /************************************************************************/
1577
1578 /**
1579 * Parse XML file into tree.
1580 *
1581 * The named file is opened, loaded into memory as a big string, and
1582 * parsed with CPLParseXMLString(). Errors in reading the file or parsing
1583 * the XML will be reported by CPLError().
1584 *
1585 * @param pszFilename the file to open.
1586 *
1587 * @return NULL on failure, or the document tree on success.
1588 */
1589
CPLParseXMLFile(const char * pszFilename)1590 CPLXMLNode *CPLParseXMLFile( const char *pszFilename )
1591
1592 {
1593 FILE *fp;
1594 int nLen;
1595 char *pszDoc;
1596 CPLXMLNode *psTree;
1597
1598 /* -------------------------------------------------------------------- */
1599 /* Read the file. */
1600 /* -------------------------------------------------------------------- */
1601 fp = VSIFOpen( pszFilename, "rb" );
1602 if( fp == NULL )
1603 {
1604 CPLError( CE_Failure, CPLE_OpenFailed,
1605 "Failed to open %.500s to read.", pszFilename );
1606 return NULL;
1607 }
1608
1609 VSIFSeek( fp, 0, SEEK_END );
1610 nLen = VSIFTell( fp );
1611 VSIFSeek( fp, 0, SEEK_SET );
1612
1613 pszDoc = (char *) VSIMalloc(nLen+1);
1614 if( pszDoc == NULL )
1615 {
1616 CPLError( CE_Failure, CPLE_OutOfMemory,
1617 "Out of memory allocating space for %d byte buffer in\n"
1618 "CPLParseXMLFile(%.500s).",
1619 nLen+1, pszFilename );
1620 VSIFClose( fp );
1621 return NULL;
1622 }
1623 if( (int) VSIFRead( pszDoc, 1, nLen, fp ) < nLen )
1624 {
1625 CPLError( CE_Failure, CPLE_FileIO,
1626 "VSIFRead() result short of expected %d bytes from %.500s.",
1627 nLen, pszFilename );
1628 pszDoc[0] = '\0';
1629 }
1630 VSIFClose( fp );
1631
1632 pszDoc[nLen] = '\0';
1633
1634 /* -------------------------------------------------------------------- */
1635 /* Parse it. */
1636 /* -------------------------------------------------------------------- */
1637 psTree = CPLParseXMLString( pszDoc );
1638 CPLFree( pszDoc );
1639
1640 return psTree;
1641 }
1642
1643 /************************************************************************/
1644 /* CPLSerializeXMLTreeToFile() */
1645 /************************************************************************/
1646
1647 /**
1648 * Write document tree to a file.
1649 *
1650 * The passed document tree is converted into one big string (with
1651 * CPLSerializeXMLTree()) and then written to the named file. Errors writing
1652 * the file will be reported by CPLError(). The source document tree is
1653 * not altered. If the output file already exists it will be overwritten.
1654 *
1655 * @param psTree the document tree to write.
1656 * @param pszFilename the name of the file to write to.
1657 */
1658
CPLSerializeXMLTreeToFile(CPLXMLNode * psTree,const char * pszFilename)1659 int CPLSerializeXMLTreeToFile( CPLXMLNode *psTree, const char *pszFilename )
1660
1661 {
1662 char *pszDoc;
1663 FILE *fp;
1664 int nLength;
1665
1666 /* -------------------------------------------------------------------- */
1667 /* Serialize document. */
1668 /* -------------------------------------------------------------------- */
1669 pszDoc = CPLSerializeXMLTree( psTree );
1670 if( pszDoc == NULL )
1671 return FALSE;
1672
1673 nLength = strlen(pszDoc);
1674
1675 /* -------------------------------------------------------------------- */
1676 /* Create file. */
1677 /* -------------------------------------------------------------------- */
1678 fp = VSIFOpen( pszFilename, "wt" );
1679 if( fp == NULL )
1680 {
1681 CPLError( CE_Failure, CPLE_OpenFailed,
1682 "Failed to open %.500s to write.", pszFilename );
1683 return FALSE;
1684 }
1685
1686 /* -------------------------------------------------------------------- */
1687 /* Write file. */
1688 /* -------------------------------------------------------------------- */
1689 if( (int) VSIFWrite( pszDoc, 1, nLength, fp ) != nLength )
1690 {
1691 CPLError( CE_Failure, CPLE_FileIO,
1692 "Failed to write whole XML document (%.500s).",
1693 pszFilename );
1694 VSIFClose( fp );
1695 CPLFree( pszDoc );
1696 return FALSE;
1697 }
1698
1699 /* -------------------------------------------------------------------- */
1700 /* Cleanup */
1701 /* -------------------------------------------------------------------- */
1702 VSIFClose( fp );
1703 CPLFree( pszDoc );
1704
1705 return TRUE;
1706 }
1707
1708 /************************************************************************/
1709 /* CPLCleanXMLElementName() */
1710 /************************************************************************/
1711
1712 /**
1713 * Make string into safe XML token.
1714 *
1715 * Modififies a string in place to try and make it into a legal
1716 * XML token that can be used as an element name. This is accomplished
1717 * by changing any characters not legal in a token into an underscore.
1718 *
1719 * NOTE: This function should implement the rules in section 2.3 of
1720 * http://www.w3.org/TR/xml11/ but it doesn't yet do that properly. We
1721 * only do a rough approximation of that.
1722 *
1723 * @param pszTarget the string to be adjusted. It is altered in place.
1724 */
1725
CPLCleanXMLElementName(char * pszTarget)1726 void CPL_DLL CPLCleanXMLElementName( char *pszTarget )
1727
1728 {
1729 if( pszTarget == NULL )
1730 return;
1731
1732 for( ; *pszTarget != '\0'; pszTarget++ )
1733 {
1734 if( (*((unsigned char *) pszTarget) & 0x80) || isalnum( *pszTarget )
1735 || *pszTarget == '_' || *pszTarget == '.' )
1736 {
1737 /* ok */
1738 }
1739 else
1740 {
1741 *pszTarget = '_';
1742 }
1743 }
1744 }
1745