1 /*
2 www.sourceforge.net/projects/tinyxml
3 Original code (2.0 and earlier )copyright (c) 2000-2002 Lee Thomason (www.grinninglizard.com)
4 
5 This software is provided 'as-is', without any express or implied
6 warranty. In no event will the authors be held liable for any
7 damages arising from the use of this software.
8 
9 Permission is granted to anyone to use this software for any
10 purpose, including commercial applications, and to alter it and
11 redistribute it freely, subject to the following restrictions:
12 
13 1. The origin of this software must not be misrepresented; you must
14 not claim that you wrote the original software. If you use this
15 software in a product, an acknowledgment in the product documentation
16 would be appreciated but is not required.
17 
18 2. Altered source versions must be plainly marked as such, and
19 must not be misrepresented as being the original software.
20 
21 3. This notice may not be removed or altered from any source
22 distribution.
23 */
24 
25 #include "tinyxml/tinyxml.h"
26 
27 #include <ctype.h>
28 
29 //#define DEBUG_PARSER
30 
31 #if defined( _DEBUG ) && defined( _MSC_VER )
32 #include <windows.h>
33 #define TIXML_LOG OutputDebugString
34 #else
35 #define TIXML_LOG printf
36 #endif
37 
38 // Note tha "PutString" hardcodes the same list. This
39 // is less flexible than it appears. Changing the entries
40 // or order will break putstring.
41 TiXmlBase::Entity TiXmlBase::entity[ NUM_ENTITY ] =
42 {
43 	{ "&amp;",  5, '&' },
44 	{ "&lt;",   4, '<' },
45 	{ "&gt;",   4, '>' },
46 	{ "&quot;", 6, '\"' },
47 	{ "&apos;", 6, '\'' }
48 };
49 
50 // Bunch of unicode info at:
51 //		http://www.unicode.org/faq/utf_bom.html
52 // Including the basic of this table, which determines the #bytes in the
53 // sequence from the lead byte. 1 placed for invalid sequences --
54 // although the result will be junk, pass it through as much as possible.
55 // Beware of the non-characters in UTF-8:
56 //				ef bb bf (Microsoft "lead bytes")
57 //				ef bf be
58 //				ef bf bf
59 
60 const char TIXML_UTF_LEAD_0 = (const char)0xef;
61 const char TIXML_UTF_LEAD_1 = (const char)0xbb;
62 const char TIXML_UTF_LEAD_2 = (const char)0xbf;
63 
64 const int TiXmlBase::utf8ByteTable[256] =
65 {
66 	//	0	1	2	3	4	5	6	7	8	9	a	b	c	d	e	f
67 		1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	// 0x00
68 		1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	// 0x10
69 		1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	// 0x20
70 		1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	// 0x30
71 		1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	// 0x40
72 		1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	// 0x50
73 		1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	// 0x60
74 		1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	// 0x70	End of ASCII range
75 		1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	// 0x80 0x80 to 0xc1 invalid
76 		1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	// 0x90
77 		1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	// 0xa0
78 		1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	// 0xb0
79 		1,	1,	2,	2,	2,	2,	2,	2,	2,	2,	2,	2,	2,	2,	2,	2,	// 0xc0 0xc2 to 0xdf 2 byte
80 		2,	2,	2,	2,	2,	2,	2,	2,	2,	2,	2,	2,	2,	2,	2,	2,	// 0xd0
81 		3,	3,	3,	3,	3,	3,	3,	3,	3,	3,	3,	3,	3,	3,	3,	3,	// 0xe0 0xe0 to 0xef 3 byte
82 		4,	4,	4,	4,	4,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1	// 0xf0 0xf0 to 0xf4 4 byte, 0xf5 and higher invalid
83 };
84 
85 
ConvertUTF32ToUTF8(unsigned long input,char * output,int * length)86 void TiXmlBase::ConvertUTF32ToUTF8( unsigned long input, char* output, int* length )
87 {
88 	const unsigned long BYTE_MASK = 0xBF;
89 	const unsigned long BYTE_MARK = 0x80;
90 	const unsigned long FIRST_BYTE_MARK[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
91 
92 	if (input < 0x80)
93 		*length = 1;
94 	else if ( input < 0x800 )
95 		*length = 2;
96 	else if ( input < 0x10000 )
97 		*length = 3;
98 	else if ( input < 0x200000 )
99 		*length = 4;
100 	else
101 		{ *length = 0; return; }	// This code won't covert this correctly anyway.
102 
103 	output += *length;
104 
105 	// Scary scary fall throughs.
106 	switch (*length)
107 	{
108 		case 4:
109 			--output;
110 			*output = (char)((input | BYTE_MARK) & BYTE_MASK);
111 			input >>= 6;
112 		case 3:
113 			--output;
114 			*output = (char)((input | BYTE_MARK) & BYTE_MASK);
115 			input >>= 6;
116 		case 2:
117 			--output;
118 			*output = (char)((input | BYTE_MARK) & BYTE_MASK);
119 			input >>= 6;
120 		case 1:
121 			--output;
122 			*output = (char)(input | FIRST_BYTE_MARK[*length]);
123 	}
124 }
125 
126 
IsAlpha(unsigned char anyByte,TiXmlEncoding)127 /*static*/ int TiXmlBase::IsAlpha( unsigned char anyByte, TiXmlEncoding )
128 {
129 	// This will only work for low-ascii, everything else is assumed to be a valid
130 	// letter. I'm not sure this is the best approach, but it is quite tricky trying
131 	// to figure out alhabetical vs. not across encoding. So take a very
132 	// conservative approach.
133 
134 //	if ( encoding == TIXML_ENCODING_UTF8 )
135 //	{
136 		if ( anyByte < 127 )
137 			return isalpha( anyByte );
138 		else
139 			return 1;	// What else to do? The unicode set is huge...get the english ones right.
140 //	}
141 //	else
142 //	{
143 //		return isalpha( anyByte );
144 //	}
145 }
146 
147 
IsAlphaNum(unsigned char anyByte,TiXmlEncoding)148 /*static*/ int TiXmlBase::IsAlphaNum( unsigned char anyByte, TiXmlEncoding )
149 {
150 	// This will only work for low-ascii, everything else is assumed to be a valid
151 	// letter. I'm not sure this is the best approach, but it is quite tricky trying
152 	// to figure out alhabetical vs. not across encoding. So take a very
153 	// conservative approach.
154 
155 //	if ( encoding == TIXML_ENCODING_UTF8 )
156 //	{
157 		if ( anyByte < 127 )
158 			return isalnum( anyByte );
159 		else
160 			return 1;	// What else to do? The unicode set is huge...get the english ones right.
161 //	}
162 //	else
163 //	{
164 //		return isalnum( anyByte );
165 //	}
166 }
167 
168 
169 class TiXmlParsingData
170 {
171 	friend class TiXmlDocument;
172   public:
173 	void Stamp( const char* now, TiXmlEncoding encoding );
174 
Cursor()175 	const TiXmlCursor& Cursor()	{ return cursor; }
176 
177   private:
178 	// Only used by the document!
TiXmlParsingData(const char * start,int _tabsize,int row,int col)179 	TiXmlParsingData( const char* start, int _tabsize, int row, int col )
180 	{
181 		assert( start );
182 		stamp = start;
183 		tabsize = _tabsize;
184 		cursor.row = row;
185 		cursor.col = col;
186 	}
187 
188 	TiXmlCursor		cursor;
189 	const char*		stamp;
190 	int				tabsize;
191 };
192 
193 
Stamp(const char * now,TiXmlEncoding encoding)194 void TiXmlParsingData::Stamp( const char* now, TiXmlEncoding encoding )
195 {
196 	assert( now );
197 
198 	// Do nothing if the tabsize is 0.
199 	if ( tabsize < 1 )
200 	{
201 		return;
202 	}
203 
204 	// Get the current row, column.
205 	int row = cursor.row;
206 	int col = cursor.col;
207 	const char* p = stamp;
208 	assert( p );
209 
210 	while ( p < now )
211 	{
212 		// Code contributed by Fletcher Dunn: (modified by lee)
213 		switch (*p) {
214 			case 0:
215 				// We *should* never get here, but in case we do, don't
216 				// advance past the terminating null character, ever
217 				return;
218 
219 			case '\r':
220 				// bump down to the next line
221 				++row;
222 				col = 0;
223 				// Eat the character
224 				++p;
225 
226 				// Check for \r\n sequence, and treat this as a single character
227 				if (*p == '\n') {
228 					++p;
229 				}
230 				break;
231 
232 			case '\n':
233 				// bump down to the next line
234 				++row;
235 				col = 0;
236 
237 				// Eat the character
238 				++p;
239 
240 				// Check for \n\r sequence, and treat this as a single
241 				// character.  (Yes, this bizarre thing does occur still
242 				// on some arcane platforms...)
243 				if (*p == '\r') {
244 					++p;
245 				}
246 				break;
247 
248 			case '\t':
249 				// Eat the character
250 				++p;
251 
252 				// Skip to next tab stop
253 				col = (col / tabsize + 1) * tabsize;
254 				break;
255 
256 			case TIXML_UTF_LEAD_0:
257 				if ( encoding == TIXML_ENCODING_UTF8 )
258 				{
259 					if ( *(p+1) && *(p+2) )
260 					{
261 						// In these cases, don't advance the column. These are
262 						// 0-width spaces.
263 						if ( *(p+1)==TIXML_UTF_LEAD_1 && *(p+2)==TIXML_UTF_LEAD_2 )
264 							p += 3;
265 						else if ( *(p+1)==(char)(0xbf) && *(p+2)==(char)(0xbe) )
266 							p += 3;
267 						else if ( *(p+1)==(char)(0xbf) && *(p+2)==(char)(0xbf) )
268 							p += 3;
269 						else
270 							{ p +=3; ++col; }	// A normal character.
271 					}
272 				}
273 				else
274 				{
275 					++p;
276 					++col;
277 				}
278 				break;
279 
280 			default:
281 				if ( encoding == TIXML_ENCODING_UTF8 )
282 				{
283 					// Eat the 1 to 4 byte utf8 character.
284 					int step = TiXmlBase::utf8ByteTable[*((unsigned char*)p)];
285 					if ( step == 0 )
286 						step = 1;		// Error case from bad encoding, but handle gracefully.
287 					p += step;
288 
289 					// Just advance one column, of course.
290 					++col;
291 				}
292 				else
293 				{
294 					++p;
295 					++col;
296 				}
297 				break;
298 		}
299 	}
300 	cursor.row = row;
301 	cursor.col = col;
302 	assert( cursor.row >= -1 );
303 	assert( cursor.col >= -1 );
304 	stamp = p;
305 	assert( stamp );
306 }
307 
308 
SkipWhiteSpace(const char * p,TiXmlEncoding encoding)309 const char* TiXmlBase::SkipWhiteSpace( const char* p, TiXmlEncoding encoding )
310 {
311 	if ( !p || !*p )
312 	{
313 		return 0;
314 	}
315 	if ( encoding == TIXML_ENCODING_UTF8 )
316 	{
317 		while ( *p )
318 		{
319 			// Skip the stupid Microsoft UTF-8 Byte order marks
320 			if (	*(p+0)==TIXML_UTF_LEAD_0
321 				 && *(p+1)==TIXML_UTF_LEAD_1
322 				 && *(p+2)==TIXML_UTF_LEAD_2 )
323 			{
324 				p += 3;
325 				continue;
326 			}
327 			else if(*(p+0)==TIXML_UTF_LEAD_0
328 				 && *(p+1)==(const char) 0xbf
329 				 && *(p+2)==(const char) 0xbe )
330 			{
331 				p += 3;
332 				continue;
333 			}
334 			else if(*(p+0)==TIXML_UTF_LEAD_0
335 				 && *(p+1)==(const char) 0xbf
336 				 && *(p+2)==(const char) 0xbf )
337 			{
338 				p += 3;
339 				continue;
340 			}
341 
342 			if ( IsWhiteSpace( *p ) || *p == '\n' || *p =='\r' )		// Still using old rules for white space.
343 				++p;
344 			else
345 				break;
346 		}
347 	}
348 	else
349 	{
350 		while ( *p && ( IsWhiteSpace( *p ) || *p == '\n' || *p =='\r' ) )
351 			++p;
352 	}
353 
354 	return p;
355 }
356 
357 #ifdef TIXML_USE_STL
StreamWhiteSpace(TIXML_ISTREAM * in,TIXML_STRING * tag)358 /*static*/ bool TiXmlBase::StreamWhiteSpace( TIXML_ISTREAM * in, TIXML_STRING * tag )
359 {
360 	for( ;; )
361 	{
362 		if ( !in->good() ) return false;
363 
364 		int c = in->peek();
365 		// At this scope, we can't get to a document. So fail silently.
366 		if ( !IsWhiteSpace( c ) || c <= 0 )
367 			return true;
368 
369 		*tag += (char) in->get();
370 	}
371 }
372 
StreamTo(TIXML_ISTREAM * in,int character,TIXML_STRING * tag)373 /*static*/ bool TiXmlBase::StreamTo( TIXML_ISTREAM * in, int character, TIXML_STRING * tag )
374 {
375 	//assert( character > 0 && character < 128 );	// else it won't work in utf-8
376 	while ( in->good() )
377 	{
378 		int c = in->peek();
379 		if ( c == character )
380 			return true;
381 		if ( c <= 0 )		// Silent failure: can't get document at this scope
382 			return false;
383 
384 		in->get();
385 		*tag += (char) c;
386 	}
387 	return false;
388 }
389 #endif
390 
ReadName(const char * p,TIXML_STRING * name,TiXmlEncoding encoding)391 const char* TiXmlBase::ReadName( const char* p, TIXML_STRING * name, TiXmlEncoding encoding )
392 {
393 	*name = "";
394 	assert( p );
395 
396 	// Names start with letters or underscores.
397 	// Of course, in unicode, tinyxml has no idea what a letter *is*. The
398 	// algorithm is generous.
399 	//
400 	// After that, they can be letters, underscores, numbers,
401 	// hyphens, or colons. (Colons are valid ony for namespaces,
402 	// but tinyxml can't tell namespaces from names.)
403 	if (    p && *p
404 		 && ( IsAlpha( (unsigned char) *p, encoding ) || *p == '_' ) )
405 	{
406 		while(		p && *p
407 				&&	(		IsAlphaNum( (unsigned char ) *p, encoding )
408 						 || *p == '_'
409 						 || *p == '-'
410 						 || *p == '.'
411 						 || *p == ':' ) )
412 		{
413 			(*name) += *p;
414 			++p;
415 		}
416 		return p;
417 	}
418 	return 0;
419 }
420 
GetEntity(const char * p,char * value,int * length,TiXmlEncoding encoding)421 const char* TiXmlBase::GetEntity( const char* p, char* value, int* length, TiXmlEncoding encoding )
422 {
423 	// Presume an entity, and pull it out.
424     TIXML_STRING ent;
425 	int i;
426 	*length = 0;
427 
428 	if ( *(p+1) && *(p+1) == '#' && *(p+2) )
429 	{
430 		unsigned long ucs = 0;
431 		//*ME:	warning C4244: convert '__w64 int' to 'unsigned'
432 		//*ME:	Use size_t instead of unsigned (pointer-arithmetic)
433 		size_t delta = 0;
434 		unsigned mult = 1;
435 
436 		if ( *(p+2) == 'x' )
437 		{
438 			// Hexadecimal.
439 			if ( !*(p+3) ) return 0;
440 
441 			const char* q = p+3;
442 			q = strchr( q, ';' );
443 
444 			if ( !q || !*q ) return 0;
445 
446 			delta = q-p;
447 			--q;
448 
449 			while ( *q != 'x' )
450 			{
451 				if ( *q >= '0' && *q <= '9' )
452 					ucs += mult * (*q - '0');
453 				else if ( *q >= 'a' && *q <= 'f' )
454 					ucs += mult * (*q - 'a' + 10);
455 				else if ( *q >= 'A' && *q <= 'F' )
456 					ucs += mult * (*q - 'A' + 10 );
457 				else
458 					return 0;
459 				mult *= 16;
460 				--q;
461 			}
462 		}
463 		else
464 		{
465 			// Decimal.
466 			if ( !*(p+2) ) return 0;
467 
468 			const char* q = p+2;
469 			q = strchr( q, ';' );
470 
471 			if ( !q || !*q ) return 0;
472 
473 			delta = q-p;
474 			--q;
475 
476 			while ( *q != '#' )
477 			{
478 				if ( *q >= '0' && *q <= '9' )
479 					ucs += mult * (*q - '0');
480 				else
481 					return 0;
482 				mult *= 10;
483 				--q;
484 			}
485 		}
486 		if ( encoding == TIXML_ENCODING_UTF8 )
487 		{
488 			// convert the UCS to UTF-8
489 			ConvertUTF32ToUTF8( ucs, value, length );
490 		}
491 		else
492 		{
493 			*value = (char)ucs;
494 			*length = 1;
495 		}
496 		return p + delta + 1;
497 	}
498 
499 	// Now try to match it.
500 	for( i=0; i<NUM_ENTITY; ++i )
501 	{
502 		if ( strncmp( entity[i].str, p, entity[i].strLength ) == 0 )
503 		{
504 			assert( strlen( entity[i].str ) == entity[i].strLength );
505 			*value = entity[i].chr;
506 			*length = 1;
507 			return ( p + entity[i].strLength );
508 		}
509 	}
510 
511 	// So it wasn't an entity, its unrecognized, or something like that.
512 	*value = *p;	// Don't put back the last one, since we return it!
513 	return p+1;
514 }
515 
516 
StringEqual(const char * p,const char * tag,bool ignoreCase,TiXmlEncoding encoding)517 bool TiXmlBase::StringEqual( const char* p,
518 							 const char* tag,
519 							 bool ignoreCase,
520 							 TiXmlEncoding encoding )
521 {
522 	assert( p );
523 	assert( tag );
524 	if ( !p || !*p )
525 	{
526 		assert( 0 );
527 		return false;
528 	}
529 
530 	const char* q = p;
531 
532 	if ( ignoreCase )
533 	{
534 		while ( *q && *tag && ToLower( *q, encoding ) == ToLower( *tag, encoding ) )
535 		{
536 			++q;
537 			++tag;
538 		}
539 
540 		if ( *tag == 0 )
541 			return true;
542 	}
543 	else
544 	{
545 		while ( *q && *tag && *q == *tag )
546 		{
547 			++q;
548 			++tag;
549 		}
550 
551 		if ( *tag == 0 )		// Have we found the end of the tag, and everything equal?
552 			return true;
553 	}
554 	return false;
555 }
556 
ReadText(const char * p,TIXML_STRING * text,bool trimWhiteSpace,const char * endTag,bool caseInsensitive,TiXmlEncoding encoding)557 const char* TiXmlBase::ReadText(	const char* p,
558 									TIXML_STRING * text,
559 									bool trimWhiteSpace,
560 									const char* endTag,
561 									bool caseInsensitive,
562 									TiXmlEncoding encoding )
563 {
564     *text = "";
565 	if (    !trimWhiteSpace			// certain tags always keep whitespace
566 		 || !condenseWhiteSpace )	// if true, whitespace is always kept
567 	{
568 		// Keep all the white space.
569 		while (	   p && *p
570 				&& !StringEqual( p, endTag, caseInsensitive, encoding )
571 			  )
572 		{
573 			int len;
574 			char cArr[4] = { 0, 0, 0, 0 };
575 			p = GetChar( p, cArr, &len, encoding );
576 			text->append( cArr, len );
577 		}
578 	}
579 	else
580 	{
581 		bool whitespace = false;
582 
583 		// Remove leading white space:
584 		p = SkipWhiteSpace( p, encoding );
585 		while (	   p && *p
586 				&& !StringEqual( p, endTag, caseInsensitive, encoding ) )
587 		{
588 			if ( *p == '\r' || *p == '\n' )
589 			{
590 				whitespace = true;
591 				++p;
592 			}
593 			else if ( IsWhiteSpace( *p ) )
594 			{
595 				whitespace = true;
596 				++p;
597 			}
598 			else
599 			{
600 				// If we've found whitespace, add it before the
601 				// new character. Any whitespace just becomes a space.
602 				if ( whitespace )
603 				{
604 					(*text) += ' ';
605 					whitespace = false;
606 				}
607 				int len;
608 				char cArr[4] = { 0, 0, 0, 0 };
609 				p = GetChar( p, cArr, &len, encoding );
610 				if ( len == 1 )
611 					(*text) += cArr[0];	// more efficient
612 				else
613 					text->append( cArr, len );
614 			}
615 		}
616 	}
617 	return p + strlen( endTag );
618 }
619 
620 #ifdef TIXML_USE_STL
621 
StreamIn(TIXML_ISTREAM * in,TIXML_STRING * tag)622 void TiXmlDocument::StreamIn( TIXML_ISTREAM * in, TIXML_STRING * tag )
623 {
624 	// The basic issue with a document is that we don't know what we're
625 	// streaming. Read something presumed to be a tag (and hope), then
626 	// identify it, and call the appropriate stream method on the tag.
627 	//
628 	// This "pre-streaming" will never read the closing ">" so the
629 	// sub-tag can orient itself.
630 
631 	if ( !StreamTo( in, '<', tag ) )
632 	{
633 		SetError( TIXML_ERROR_PARSING_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN );
634 		return;
635 	}
636 
637 	while ( in->good() )
638 	{
639 		int tagIndex = (int) tag->length();
640 		while ( in->good() && in->peek() != '>' )
641 		{
642 			int c = in->get();
643 			if ( c <= 0 )
644 			{
645 				SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
646 				break;
647 			}
648 			(*tag) += (char) c;
649 		}
650 
651 		if ( in->good() )
652 		{
653 			// We now have something we presume to be a node of
654 			// some sort. Identify it, and call the node to
655 			// continue streaming.
656 			TiXmlNode* node = Identify( tag->c_str() + tagIndex, TIXML_DEFAULT_ENCODING );
657 
658 			if ( node )
659 			{
660 				node->StreamIn( in, tag );
661 				bool isElement = node->ToElement() != 0;
662 				delete node;
663 				node = 0;
664 
665 				// If this is the root element, we're done. Parsing will be
666 				// done by the >> operator.
667 				if ( isElement )
668 				{
669 					return;
670 				}
671 			}
672 			else
673 			{
674 				SetError( TIXML_ERROR, 0, 0, TIXML_ENCODING_UNKNOWN );
675 				return;
676 			}
677 		}
678 	}
679 	// We should have returned sooner.
680 	SetError( TIXML_ERROR, 0, 0, TIXML_ENCODING_UNKNOWN );
681 }
682 
683 #endif
684 
Parse(const char * p,TiXmlParsingData * prevData,TiXmlEncoding encoding)685 const char* TiXmlDocument::Parse( const char* p, TiXmlParsingData* prevData, TiXmlEncoding encoding )
686 {
687 	ClearError();
688 
689 	// Parse away, at the document level. Since a document
690 	// contains nothing but other tags, most of what happens
691 	// here is skipping white space.
692 	if ( !p || !*p )
693 	{
694 		SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN );
695 		return 0;
696 	}
697 
698 	// Note that, for a document, this needs to come
699 	// before the while space skip, so that parsing
700 	// starts from the pointer we are given.
701 	location.Clear();
702 	if ( prevData )
703 	{
704 		location.row = prevData->cursor.row;
705 		location.col = prevData->cursor.col;
706 	}
707 	else
708 	{
709 		location.row = 0;
710 		location.col = 0;
711 	}
712 	TiXmlParsingData data( p, TabSize(), location.row, location.col );
713 	location = data.Cursor();
714 
715 	if ( encoding == TIXML_ENCODING_UNKNOWN )
716 	{
717 		// Check for the Microsoft UTF-8 lead bytes.
718 		if (	*(p+0) && *(p+0) == TIXML_UTF_LEAD_0
719 			 && *(p+1) && *(p+1) == TIXML_UTF_LEAD_1
720 			 && *(p+2) && *(p+2) == TIXML_UTF_LEAD_2 )
721 		{
722 			encoding = TIXML_ENCODING_UTF8;
723 		}
724 	}
725 
726     p = SkipWhiteSpace( p, encoding );
727 	if ( !p )
728 	{
729 		SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN );
730 		return 0;
731 	}
732 
733 	while ( p && *p )
734 	{
735 		TiXmlNode* node = Identify( p, encoding );
736 		if ( node )
737 		{
738 			p = node->Parse( p, &data, encoding );
739 			LinkEndChild( node );
740 		}
741 		else
742 		{
743 			break;
744 		}
745 
746 		// Did we get encoding info?
747 		if (    encoding == TIXML_ENCODING_UNKNOWN
748 			 && node->ToDeclaration() )
749 		{
750 			TiXmlDeclaration* dec = node->ToDeclaration();
751 			const char* enc = dec->Encoding();
752 			assert( enc );
753 
754 			if ( *enc == 0 )
755 				encoding = TIXML_ENCODING_UTF8;
756 			else if ( StringEqual( enc, "UTF-8", true, TIXML_ENCODING_UNKNOWN ) )
757 				encoding = TIXML_ENCODING_UTF8;
758 			else if ( StringEqual( enc, "UTF8", true, TIXML_ENCODING_UNKNOWN ) )
759 				encoding = TIXML_ENCODING_UTF8;	// incorrect, but be nice
760 			else
761 				encoding = TIXML_ENCODING_LEGACY;
762 		}
763 
764 		p = SkipWhiteSpace( p, encoding );
765 	}
766 
767 	// Was this empty?
768 	if ( !firstChild ) {
769 		SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, encoding );
770 		return 0;
771 	}
772 
773 	// All is well.
774 	return p;
775 }
776 
SetError(int err,const char * pError,TiXmlParsingData * data,TiXmlEncoding encoding)777 void TiXmlDocument::SetError( int err, const char* pError, TiXmlParsingData* data, TiXmlEncoding encoding )
778 {
779 	// The first error in a chain is more accurate - don't set again!
780 	if ( error )
781 		return;
782 
783 	assert( err > 0 && err < TIXML_ERROR_STRING_COUNT );
784 	error   = true;
785 	errorId = err;
786 	errorDesc = errorString[ errorId ];
787 
788 	errorLocation.Clear();
789 	if ( pError && data )
790 	{
791 		//TiXmlParsingData data( pError, prevData );
792 		data->Stamp( pError, encoding );
793 		errorLocation = data->Cursor();
794 	}
795 }
796 
797 
Identify(const char * p,TiXmlEncoding encoding)798 TiXmlNode* TiXmlNode::Identify( const char* p, TiXmlEncoding encoding )
799 {
800 	TiXmlNode* returnNode = 0;
801 
802 	p = SkipWhiteSpace( p, encoding );
803 	if( !p || !*p || *p != '<' )
804 	{
805 		return 0;
806 	}
807 
808 	TiXmlDocument* doc = GetDocument();
809 	p = SkipWhiteSpace( p, encoding );
810 
811 	if ( !p || !*p )
812 	{
813 		return 0;
814 	}
815 
816 	// What is this thing?
817 	// - Elements start with a letter or underscore, but xml is reserved.
818 	// - Comments: <!--
819 	// - Decleration: <?xml
820 	// - Everthing else is unknown to tinyxml.
821 	//
822 
823 	const char* xmlHeader = { "<?xml" };
824 	const char* commentHeader = { "<!--" };
825 	const char* dtdHeader = { "<!" };
826 
827 	if ( StringEqual( p, xmlHeader, true, encoding ) )
828 	{
829 		#ifdef DEBUG_PARSER
830 			TIXML_LOG( "XML parsing Declaration\n" );
831 		#endif
832 		returnNode = new TiXmlDeclaration();
833 	}
834 	else if ( StringEqual( p, commentHeader, false, encoding ) )
835 	{
836 		#ifdef DEBUG_PARSER
837 			TIXML_LOG( "XML parsing Comment\n" );
838 		#endif
839 		returnNode = new TiXmlComment();
840 	}
841 	else if ( StringEqual( p, dtdHeader, false, encoding ) )
842 	{
843 		#ifdef DEBUG_PARSER
844 			TIXML_LOG( "XML parsing Unknown(1)\n" );
845 		#endif
846 		returnNode = new TiXmlUnknown();
847 	}
848 	else if (    IsAlpha( *(p+1), encoding )
849 			  || *(p+1) == '_' )
850 	{
851 		#ifdef DEBUG_PARSER
852 			TIXML_LOG( "XML parsing Element\n" );
853 		#endif
854 		returnNode = new TiXmlElement( "" );
855 	}
856 	else
857 	{
858 		#ifdef DEBUG_PARSER
859 			TIXML_LOG( "XML parsing Unknown(2)\n" );
860 		#endif
861 		returnNode = new TiXmlUnknown();
862 	}
863 
864 	if ( returnNode )
865 	{
866 		// Set the parent, so it can report errors
867 		returnNode->parent = this;
868 	}
869 	else
870 	{
871 		if ( doc )
872 			doc->SetError( TIXML_ERROR_OUT_OF_MEMORY, 0, 0, TIXML_ENCODING_UNKNOWN );
873 	}
874 	return returnNode;
875 }
876 
877 #ifdef TIXML_USE_STL
878 
StreamIn(TIXML_ISTREAM * in,TIXML_STRING * tag)879 void TiXmlElement::StreamIn (TIXML_ISTREAM * in, TIXML_STRING * tag)
880 {
881 	// We're called with some amount of pre-parsing. That is, some of "this"
882 	// element is in "tag". Go ahead and stream to the closing ">"
883 	while( in->good() )
884 	{
885 		int c = in->get();
886 		if ( c <= 0 )
887 		{
888 			TiXmlDocument* document = GetDocument();
889 			if ( document )
890 				document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
891 			return;
892 		}
893 		(*tag) += (char) c ;
894 
895 		if ( c == '>' )
896 			break;
897 	}
898 
899 	if ( tag->length() < 3 ) return;
900 
901 	// Okay...if we are a "/>" tag, then we're done. We've read a complete tag.
902 	// If not, identify and stream.
903 
904 	if (    tag->at( tag->length() - 1 ) == '>'
905 		 && tag->at( tag->length() - 2 ) == '/' )
906 	{
907 		// All good!
908 		return;
909 	}
910 	else if ( tag->at( tag->length() - 1 ) == '>' )
911 	{
912 		// There is more. Could be:
913 		//		text
914 		//		closing tag
915 		//		another node.
916 		for ( ;; )
917 		{
918 			StreamWhiteSpace( in, tag );
919 
920 			// Do we have text?
921 			if ( in->good() && in->peek() != '<' )
922 			{
923 				// Yep, text.
924 				TiXmlText text( "" );
925 				text.StreamIn( in, tag );
926 
927 				// What follows text is a closing tag or another node.
928 				// Go around again and figure it out.
929 				continue;
930 			}
931 
932 			// We now have either a closing tag...or another node.
933 			// We should be at a "<", regardless.
934 			if ( !in->good() ) return;
935 			assert( in->peek() == '<' );
936 			size_t tagIndex = tag->length();
937 
938 			bool closingTag = false;
939 			bool firstCharFound = false;
940 
941 			for( ;; )
942 			{
943 				if ( !in->good() )
944 					return;
945 
946 				int c = in->peek();
947 				if ( c <= 0 )
948 				{
949 					TiXmlDocument* document = GetDocument();
950 					if ( document )
951 						document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
952 					return;
953 				}
954 
955 				if ( c == '>' )
956 					break;
957 
958 				*tag += (char) c;
959 				in->get();
960 
961 				if ( !firstCharFound && c != '<' && !IsWhiteSpace( c ) )
962 				{
963 					firstCharFound = true;
964 					if ( c == '/' )
965 						closingTag = true;
966 				}
967 			}
968 			// If it was a closing tag, then read in the closing '>' to clean up the input stream.
969 			// If it was not, the streaming will be done by the tag.
970 			if ( closingTag )
971 			{
972 				if ( !in->good() )
973 					return;
974 
975 				int c = in->get();
976 				if ( c <= 0 )
977 				{
978 					TiXmlDocument* document = GetDocument();
979 					if ( document )
980 						document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
981 					return;
982 				}
983 				assert( c == '>' );
984 				*tag += (char) c;
985 
986 				// We are done, once we've found our closing tag.
987 				return;
988 			}
989 			else
990 			{
991 				// If not a closing tag, id it, and stream.
992 				const char* tagloc = tag->c_str() + tagIndex;
993 				TiXmlNode* node = Identify( tagloc, TIXML_DEFAULT_ENCODING );
994 				if ( !node )
995 					return;
996 				node->StreamIn( in, tag );
997 				delete node;
998 				node = 0;
999 
1000 				// No return: go around from the beginning: text, closing tag, or node.
1001 			}
1002 		}
1003 	}
1004 }
1005 #endif
1006 
Parse(const char * p,TiXmlParsingData * data,TiXmlEncoding encoding)1007 const char* TiXmlElement::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
1008 {
1009 	p = SkipWhiteSpace( p, encoding );
1010 	TiXmlDocument* document = GetDocument();
1011 
1012 	if ( !p || !*p )
1013 	{
1014 		if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, 0, 0, encoding );
1015 		return 0;
1016 	}
1017 
1018 //	TiXmlParsingData data( p, prevData );
1019 	if ( data )
1020 	{
1021 		data->Stamp( p, encoding );
1022 		location = data->Cursor();
1023 	}
1024 
1025 	if ( *p != '<' )
1026 	{
1027 		if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, p, data, encoding );
1028 		return 0;
1029 	}
1030 
1031 	p = SkipWhiteSpace( p+1, encoding );
1032 
1033 	// Read the name.
1034 	const char* pErr = p;
1035 
1036     p = ReadName( p, &value, encoding );
1037 	if ( !p || !*p )
1038 	{
1039 		if ( document )	document->SetError( TIXML_ERROR_FAILED_TO_READ_ELEMENT_NAME, pErr, data, encoding );
1040 		return 0;
1041 	}
1042 
1043     TIXML_STRING endTag ("</");
1044 	endTag += value;
1045 	endTag += ">";
1046 
1047 	// Check for and read attributes. Also look for an empty
1048 	// tag or an end tag.
1049 	while ( p && *p )
1050 	{
1051 		pErr = p;
1052 		p = SkipWhiteSpace( p, encoding );
1053 		if ( !p || !*p )
1054 		{
1055 			if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, pErr, data, encoding );
1056 			return 0;
1057 		}
1058 		if ( *p == '/' )
1059 		{
1060 			++p;
1061 			// Empty tag.
1062 			if ( *p  != '>' )
1063 			{
1064 				if ( document ) document->SetError( TIXML_ERROR_PARSING_EMPTY, p, data, encoding );
1065 				return 0;
1066 			}
1067 			return (p+1);
1068 		}
1069 		else if ( *p == '>' )
1070 		{
1071 			// Done with attributes (if there were any.)
1072 			// Read the value -- which can include other
1073 			// elements -- read the end tag, and return.
1074 			++p;
1075 			p = ReadValue( p, data, encoding );		// Note this is an Element method, and will set the error if one happens.
1076 			if ( !p || !*p )
1077 				return 0;
1078 
1079 			// We should find the end tag now
1080 			if ( StringEqual( p, endTag.c_str(), false, encoding ) )
1081 			{
1082 				p += endTag.length();
1083 				return p;
1084 			}
1085 			else
1086 			{
1087 				if ( document ) document->SetError( TIXML_ERROR_READING_END_TAG, p, data, encoding );
1088 				return 0;
1089 			}
1090 		}
1091 		else
1092 		{
1093 			// Try to read an attribute:
1094 			TiXmlAttribute* attrib = new TiXmlAttribute();
1095 			if ( !attrib )
1096 			{
1097 				if ( document ) document->SetError( TIXML_ERROR_OUT_OF_MEMORY, pErr, data, encoding );
1098 				return 0;
1099 			}
1100 
1101 			attrib->SetDocument( document );
1102 			const char* pErr = p;
1103 			p = attrib->Parse( p, data, encoding );
1104 
1105 			if ( !p || !*p )
1106 			{
1107 				if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, pErr, data, encoding );
1108 				delete attrib;
1109 				return 0;
1110 			}
1111 
1112 			// Handle the strange case of double attributes:
1113 			TiXmlAttribute* node = attributeSet.Find( attrib->Name() );
1114 			if ( node )
1115 			{
1116 				node->SetValue( attrib->Value() );
1117 				delete attrib;
1118 				return 0;
1119 			}
1120 
1121 			attributeSet.Add( attrib );
1122 		}
1123 	}
1124 	return p;
1125 }
1126 
1127 
ReadValue(const char * p,TiXmlParsingData * data,TiXmlEncoding encoding)1128 const char* TiXmlElement::ReadValue( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
1129 {
1130 	TiXmlDocument* document = GetDocument();
1131 
1132 	const char* pWithWhiteSpace = p;
1133 	// Read in text and elements in any order.
1134 	p = SkipWhiteSpace( p, encoding );
1135 	while ( p && *p )
1136 	{
1137 		if ( *p != '<' )
1138 		{
1139 			// Take what we have, make a text element.
1140 			TiXmlText* textNode = new TiXmlText( "" );
1141 
1142 			if ( !textNode )
1143 			{
1144 				if ( document ) document->SetError( TIXML_ERROR_OUT_OF_MEMORY, 0, 0, encoding );
1145 				    return 0;
1146 			}
1147 
1148 			if ( TiXmlBase::IsWhiteSpaceCondensed() )
1149 			{
1150 				p = textNode->Parse( p, data, encoding );
1151 			}
1152 			else
1153 			{
1154 				// Special case: we want to keep the white space
1155 				// so that leading spaces aren't removed.
1156 				p = textNode->Parse( pWithWhiteSpace, data, encoding );
1157 			}
1158 
1159 			if ( !textNode->Blank() )
1160 				LinkEndChild( textNode );
1161 			else
1162 				delete textNode;
1163 		}
1164 		else
1165 		{
1166 			// We hit a '<'
1167 			// Have we hit a new element or an end tag?
1168 			if ( StringEqual( p, "</", false, encoding ) )
1169 			{
1170 				return p;
1171 			}
1172 			else
1173 			{
1174 				TiXmlNode* node = Identify( p, encoding );
1175 				if ( node )
1176 				{
1177 					p = node->Parse( p, data, encoding );
1178 					LinkEndChild( node );
1179 				}
1180 				else
1181 				{
1182 					return 0;
1183 				}
1184 			}
1185 		}
1186 		p = SkipWhiteSpace( p, encoding );
1187 	}
1188 
1189 	if ( !p )
1190 	{
1191 		if ( document ) document->SetError( TIXML_ERROR_READING_ELEMENT_VALUE, 0, 0, encoding );
1192 	}
1193 	return p;
1194 }
1195 
1196 
1197 #ifdef TIXML_USE_STL
StreamIn(TIXML_ISTREAM * in,TIXML_STRING * tag)1198 void TiXmlUnknown::StreamIn( TIXML_ISTREAM * in, TIXML_STRING * tag )
1199 {
1200 	while ( in->good() )
1201 	{
1202 		int c = in->get();
1203 		if ( c <= 0 )
1204 		{
1205 			TiXmlDocument* document = GetDocument();
1206 			if ( document )
1207 				document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
1208 			return;
1209 		}
1210 		(*tag) += (char) c;
1211 
1212 		if ( c == '>' )
1213 		{
1214 			// All is well.
1215 			return;
1216 		}
1217 	}
1218 }
1219 #endif
1220 
1221 
Parse(const char * p,TiXmlParsingData * data,TiXmlEncoding encoding)1222 const char* TiXmlUnknown::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
1223 {
1224 	TiXmlDocument* document = GetDocument();
1225 	p = SkipWhiteSpace( p, encoding );
1226 
1227 //	TiXmlParsingData data( p, prevData );
1228 	if ( data )
1229 	{
1230 		data->Stamp( p, encoding );
1231 		location = data->Cursor();
1232 	}
1233 	if ( !p || !*p || *p != '<' )
1234 	{
1235 		if ( document ) document->SetError( TIXML_ERROR_PARSING_UNKNOWN, p, data, encoding );
1236 		return 0;
1237 	}
1238 	++p;
1239     value = "";
1240 
1241 	while ( p && *p && *p != '>' )
1242 	{
1243 		value += *p;
1244 		++p;
1245 	}
1246 
1247 	if ( !p )
1248 	{
1249 		if ( document )	document->SetError( TIXML_ERROR_PARSING_UNKNOWN, 0, 0, encoding );
1250 	}
1251 	if ( *p == '>' )
1252 		return p+1;
1253 	return p;
1254 }
1255 
1256 #ifdef TIXML_USE_STL
StreamIn(TIXML_ISTREAM * in,TIXML_STRING * tag)1257 void TiXmlComment::StreamIn( TIXML_ISTREAM * in, TIXML_STRING * tag )
1258 {
1259 	while ( in->good() )
1260 	{
1261 		int c = in->get();
1262 		if ( c <= 0 )
1263 		{
1264 			TiXmlDocument* document = GetDocument();
1265 			if ( document )
1266 				document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
1267 			return;
1268 		}
1269 
1270 		(*tag) += (char) c;
1271 
1272 		if ( c == '>'
1273 			 && tag->at( tag->length() - 2 ) == '-'
1274 			 && tag->at( tag->length() - 3 ) == '-' )
1275 		{
1276 			// All is well.
1277 			return;
1278 		}
1279 	}
1280 }
1281 #endif
1282 
1283 
Parse(const char * p,TiXmlParsingData * data,TiXmlEncoding encoding)1284 const char* TiXmlComment::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
1285 {
1286 	TiXmlDocument* document = GetDocument();
1287 	value = "";
1288 
1289 	p = SkipWhiteSpace( p, encoding );
1290 
1291 //	TiXmlParsingData data( p, prevData );
1292 	if ( data )
1293 	{
1294 		data->Stamp( p, encoding );
1295 		location = data->Cursor();
1296 	}
1297 	const char* startTag = "<!--";
1298 	const char* endTag   = "-->";
1299 
1300 	if ( !StringEqual( p, startTag, false, encoding ) )
1301 	{
1302 		document->SetError( TIXML_ERROR_PARSING_COMMENT, p, data, encoding );
1303 		return 0;
1304 	}
1305 	p += strlen( startTag );
1306 	p = ReadText( p, &value, false, endTag, false, encoding );
1307 	return p;
1308 }
1309 
1310 
Parse(const char * p,TiXmlParsingData * data,TiXmlEncoding encoding)1311 const char* TiXmlAttribute::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
1312 {
1313 	p = SkipWhiteSpace( p, encoding );
1314 	if ( !p || !*p ) return 0;
1315 
1316 	int tabsize = 4;
1317 	if ( document )
1318 		tabsize = document->TabSize();
1319 
1320 //	TiXmlParsingData data( p, prevData );
1321 	if ( data )
1322 	{
1323 		data->Stamp( p, encoding );
1324 		location = data->Cursor();
1325 	}
1326 	// Read the name, the '=' and the value.
1327 	const char* pErr = p;
1328 	p = ReadName( p, &name, encoding );
1329 	if ( !p || !*p )
1330 	{
1331 		if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, pErr, data, encoding );
1332 		return 0;
1333 	}
1334 	p = SkipWhiteSpace( p, encoding );
1335 	if ( !p || !*p || *p != '=' )
1336 	{
1337 		if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding );
1338 		return 0;
1339 	}
1340 
1341 	++p;	// skip '='
1342 	p = SkipWhiteSpace( p, encoding );
1343 	if ( !p || !*p )
1344 	{
1345 		if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding );
1346 		return 0;
1347 	}
1348 
1349 	const char* end;
1350 
1351 	if ( *p == '\'' )
1352 	{
1353 		++p;
1354 		end = "\'";
1355 		p = ReadText( p, &value, false, end, false, encoding );
1356 	}
1357 	else if ( *p == '"' )
1358 	{
1359 		++p;
1360 		end = "\"";
1361 		p = ReadText( p, &value, false, end, false, encoding );
1362 	}
1363 	else
1364 	{
1365 		// All attribute values should be in single or double quotes.
1366 		// But this is such a common error that the parser will try
1367 		// its best, even without them.
1368 		value = "";
1369 		while (    p && *p										// existence
1370 				&& !IsWhiteSpace( *p ) && *p != '\n' && *p != '\r'	// whitespace
1371 				&& *p != '/' && *p != '>' )						// tag end
1372 		{
1373 			value += *p;
1374 			++p;
1375 		}
1376 	}
1377 	return p;
1378 }
1379 
1380 #ifdef TIXML_USE_STL
StreamIn(TIXML_ISTREAM * in,TIXML_STRING * tag)1381 void TiXmlText::StreamIn( TIXML_ISTREAM * in, TIXML_STRING * tag )
1382 {
1383 	while ( in->good() )
1384 	{
1385 		int c = in->peek();
1386 		if ( c == '<' )
1387 			return;
1388 		if ( c <= 0 )
1389 		{
1390 			TiXmlDocument* document = GetDocument();
1391 			if ( document )
1392 				document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
1393 			return;
1394 		}
1395 
1396 		(*tag) += (char) c;
1397 		in->get();
1398 	}
1399 }
1400 #endif
1401 
Parse(const char * p,TiXmlParsingData * data,TiXmlEncoding encoding)1402 const char* TiXmlText::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
1403 {
1404 	value = "";
1405 //	TiXmlParsingData data( p, prevData );
1406 	if ( data )
1407 	{
1408 		data->Stamp( p, encoding );
1409 		location = data->Cursor();
1410 	}
1411 	bool ignoreWhite = true;
1412 
1413 	const char* end = "<";
1414 	p = ReadText( p, &value, ignoreWhite, end, false, encoding );
1415 	if ( p )
1416 		return p-1;	// don't truncate the '<'
1417 	return 0;
1418 }
1419 
1420 #ifdef TIXML_USE_STL
StreamIn(TIXML_ISTREAM * in,TIXML_STRING * tag)1421 void TiXmlDeclaration::StreamIn( TIXML_ISTREAM * in, TIXML_STRING * tag )
1422 {
1423 	while ( in->good() )
1424 	{
1425 		int c = in->get();
1426 		if ( c <= 0 )
1427 		{
1428 			TiXmlDocument* document = GetDocument();
1429 			if ( document )
1430 				document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
1431 			return;
1432 		}
1433 		(*tag) += (char) c;
1434 
1435 		if ( c == '>' )
1436 		{
1437 			// All is well.
1438 			return;
1439 		}
1440 	}
1441 }
1442 #endif
1443 
Parse(const char * p,TiXmlParsingData * data,TiXmlEncoding _encoding)1444 const char* TiXmlDeclaration::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding _encoding )
1445 {
1446 	p = SkipWhiteSpace( p, _encoding );
1447 	// Find the beginning, find the end, and look for
1448 	// the stuff in-between.
1449 	TiXmlDocument* document = GetDocument();
1450 	if ( !p || !*p || !StringEqual( p, "<?xml", true, _encoding ) )
1451 	{
1452 		if ( document ) document->SetError( TIXML_ERROR_PARSING_DECLARATION, 0, 0, _encoding );
1453 		return 0;
1454 	}
1455 //	TiXmlParsingData data( p, prevData );
1456 	if ( data )
1457 	{
1458 		data->Stamp( p, _encoding );
1459 		location = data->Cursor();
1460 	}
1461 	p += 5;
1462 
1463 	version = "";
1464 	encoding = "";
1465 	standalone = "";
1466 
1467 	while ( p && *p )
1468 	{
1469 		if ( *p == '>' )
1470 		{
1471 			++p;
1472 			return p;
1473 		}
1474 
1475 		p = SkipWhiteSpace( p, _encoding );
1476 		if ( StringEqual( p, "version", true, _encoding ) )
1477 		{
1478 			TiXmlAttribute attrib;
1479 			p = attrib.Parse( p, data, _encoding );
1480 			version = attrib.Value();
1481 		}
1482 		else if ( StringEqual( p, "encoding", true, _encoding ) )
1483 		{
1484 			TiXmlAttribute attrib;
1485 			p = attrib.Parse( p, data, _encoding );
1486 			encoding = attrib.Value();
1487 		}
1488 		else if ( StringEqual( p, "standalone", true, _encoding ) )
1489 		{
1490 			TiXmlAttribute attrib;
1491 			p = attrib.Parse( p, data, _encoding );
1492 			standalone = attrib.Value();
1493 		}
1494 		else
1495 		{
1496 			// Read over whatever it is.
1497 			while( p && *p && *p != '>' && !IsWhiteSpace( *p ) )
1498 				++p;
1499 		}
1500 	}
1501 	return 0;
1502 }
1503 
Blank() const1504 bool TiXmlText::Blank() const
1505 {
1506 	for ( unsigned i=0; i<value.length(); i++ )
1507 		if ( !IsWhiteSpace( value[i] ) )
1508 			return false;
1509 	return true;
1510 }
1511 
1512