1 /**
2  * pugixml parser - version 1.8
3  * --------------------------------------------------------
4  * Copyright (C) 2006-2016, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com)
5  * Report bugs and download new versions at http://pugixml.org/
6  *
7  * This library is distributed under the MIT License. See notice at the end
8  * of this file.
9  *
10  * This work is based on the pugxml parser, which is:
11  * Copyright (C) 2003, by Kristen Wegner (kristen@tima.net)
12  */
13 
14 #ifndef SOURCE_PUGIXML_CPP
15 #define SOURCE_PUGIXML_CPP
16 
17 #include "pugixml.hpp"
18 
19 #include <stdlib.h>
20 #include <stdio.h>
21 #include <string.h>
22 #include <assert.h>
23 #include <limits.h>
24 
25 #ifdef PUGIXML_WCHAR_MODE
26 #	include <wchar.h>
27 #endif
28 
29 #ifndef PUGIXML_NO_XPATH
30 #	include <math.h>
31 #	include <float.h>
32 #	ifdef PUGIXML_NO_EXCEPTIONS
33 #		include <setjmp.h>
34 #	endif
35 #endif
36 
37 #ifndef PUGIXML_NO_STL
38 #	include <istream>
39 #	include <ostream>
40 #	include <string>
41 #endif
42 
43 // For placement new
44 #include <new>
45 
46 #ifdef _MSC_VER
47 #	pragma warning(push)
48 #	pragma warning(disable: 4127) // conditional expression is constant
49 #	pragma warning(disable: 4324) // structure was padded due to __declspec(align())
50 #	pragma warning(disable: 4611) // interaction between '_setjmp' and C++ object destruction is non-portable
51 #	pragma warning(disable: 4702) // unreachable code
52 #	pragma warning(disable: 4996) // this function or variable may be unsafe
53 #	pragma warning(disable: 4793) // function compiled as native: presence of '_setjmp' makes a function unmanaged
54 #endif
55 
56 #ifdef __INTEL_COMPILER
57 #	pragma warning(disable: 177) // function was declared but never referenced
58 #	pragma warning(disable: 279) // controlling expression is constant
59 #	pragma warning(disable: 1478 1786) // function was declared "deprecated"
60 #	pragma warning(disable: 1684) // conversion from pointer to same-sized integral type
61 #endif
62 
63 #if defined(__BORLANDC__) && defined(PUGIXML_HEADER_ONLY)
64 #	pragma warn -8080 // symbol is declared but never used; disabling this inside push/pop bracket does not make the warning go away
65 #endif
66 
67 #ifdef __BORLANDC__
68 #	pragma option push
69 #	pragma warn -8008 // condition is always false
70 #	pragma warn -8066 // unreachable code
71 #endif
72 
73 #ifdef __SNC__
74 // Using diag_push/diag_pop does not disable the warnings inside templates due to a compiler bug
75 #	pragma diag_suppress=178 // function was declared but never referenced
76 #	pragma diag_suppress=237 // controlling expression is constant
77 #endif
78 
79 // Inlining controls
80 #if defined(_MSC_VER) && _MSC_VER >= 1300
81 #	define PUGI__NO_INLINE __declspec(noinline)
82 #elif defined(__GNUC__)
83 #	define PUGI__NO_INLINE __attribute__((noinline))
84 #else
85 #	define PUGI__NO_INLINE
86 #endif
87 
88 // Branch weight controls
89 #if defined(__GNUC__)
90 #	define PUGI__UNLIKELY(cond) __builtin_expect(cond, 0)
91 #else
92 #	define PUGI__UNLIKELY(cond) (cond)
93 #endif
94 
95 // Simple static assertion
96 #define PUGI__STATIC_ASSERT(cond) { static const char condition_failed[(cond) ? 1 : -1] = {0}; (void)condition_failed[0]; }
97 
98 // Digital Mars C++ bug workaround for passing char loaded from memory via stack
99 #ifdef __DMC__
100 #	define PUGI__DMC_VOLATILE volatile
101 #else
102 #	define PUGI__DMC_VOLATILE
103 #endif
104 
105 // Borland C++ bug workaround for not defining ::memcpy depending on header include order (can't always use std::memcpy because some compilers don't have it at all)
106 #if defined(__BORLANDC__) && !defined(__MEM_H_USING_LIST)
107 using std::memcpy;
108 using std::memmove;
109 using std::memset;
110 #endif
111 
112 // Some MinGW versions have headers that erroneously omit LLONG_MIN/LLONG_MAX/ULLONG_MAX definitions in strict ANSI mode
113 #if defined(PUGIXML_HAS_LONG_LONG) && defined(__MINGW32__) && defined(__STRICT_ANSI__) && !defined(LLONG_MAX) && !defined(LLONG_MIN) && !defined(ULLONG_MAX)
114 #	define LLONG_MAX 9223372036854775807LL
115 #	define LLONG_MIN (-LLONG_MAX-1)
116 #	define ULLONG_MAX (2ULL*LLONG_MAX+1)
117 #endif
118 
119 // In some environments MSVC is a compiler but the CRT lacks certain MSVC-specific features
120 #if defined(_MSC_VER) && !defined(__S3E__)
121 #	define PUGI__MSVC_CRT_VERSION _MSC_VER
122 #endif
123 
124 #ifdef PUGIXML_HEADER_ONLY
125 #	define PUGI__NS_BEGIN namespace pugi { namespace impl {
126 #	define PUGI__NS_END } }
127 #	define PUGI__FN inline
128 #	define PUGI__FN_NO_INLINE inline
129 #else
130 #	if defined(_MSC_VER) && _MSC_VER < 1300 // MSVC6 seems to have an amusing bug with anonymous namespaces inside namespaces
131 #		define PUGI__NS_BEGIN namespace pugi { namespace impl {
132 #		define PUGI__NS_END } }
133 #	else
134 #		define PUGI__NS_BEGIN namespace pugi { namespace impl { namespace {
135 #		define PUGI__NS_END } } }
136 #	endif
137 #	define PUGI__FN
138 #	define PUGI__FN_NO_INLINE PUGI__NO_INLINE
139 #endif
140 
141 // uintptr_t
142 #if (defined(_MSC_VER) && _MSC_VER < 1600) || (defined(__BORLANDC__) && __BORLANDC__ < 0x561)
143 namespace pugi
144 {
145 #	ifndef _UINTPTR_T_DEFINED
146 	typedef size_t uintptr_t;
147 #	endif
148 
149 	typedef unsigned __int8 uint8_t;
150 	typedef unsigned __int16 uint16_t;
151 	typedef unsigned __int32 uint32_t;
152 }
153 #else
154 #	include <stdint.h>
155 #endif
156 
157 // Memory allocation
158 PUGI__NS_BEGIN
default_allocate(size_t size)159 	PUGI__FN void* default_allocate(size_t size)
160 	{
161 		return malloc(size);
162 	}
163 
default_deallocate(void * ptr)164 	PUGI__FN void default_deallocate(void* ptr)
165 	{
166 		free(ptr);
167 	}
168 
169 	template <typename T>
170 	struct xml_memory_management_function_storage
171 	{
172 		static allocation_function allocate;
173 		static deallocation_function deallocate;
174 	};
175 
176 	// Global allocation functions are stored in class statics so that in header mode linker deduplicates them
177 	// Without a template<> we'll get multiple definitions of the same static
178 	template <typename T> allocation_function xml_memory_management_function_storage<T>::allocate = default_allocate;
179 	template <typename T> deallocation_function xml_memory_management_function_storage<T>::deallocate = default_deallocate;
180 
181 	typedef xml_memory_management_function_storage<int> xml_memory;
182 PUGI__NS_END
183 
184 // String utilities
185 PUGI__NS_BEGIN
186 	// Get string length
strlength(const char_t * s)187 	PUGI__FN size_t strlength(const char_t* s)
188 	{
189 		assert(s);
190 
191 	#ifdef PUGIXML_WCHAR_MODE
192 		return wcslen(s);
193 	#else
194 		return strlen(s);
195 	#endif
196 	}
197 
198 	// Compare two strings
strequal(const char_t * src,const char_t * dst)199 	PUGI__FN bool strequal(const char_t* src, const char_t* dst)
200 	{
201 		assert(src && dst);
202 
203 	#ifdef PUGIXML_WCHAR_MODE
204 		return wcscmp(src, dst) == 0;
205 	#else
206 		return strcmp(src, dst) == 0;
207 	#endif
208 	}
209 
210 	// Compare lhs with [rhs_begin, rhs_end)
strequalrange(const char_t * lhs,const char_t * rhs,size_t count)211 	PUGI__FN bool strequalrange(const char_t* lhs, const char_t* rhs, size_t count)
212 	{
213 		for (size_t i = 0; i < count; ++i)
214 			if (lhs[i] != rhs[i])
215 				return false;
216 
217 		return lhs[count] == 0;
218 	}
219 
220 	// Get length of wide string, even if CRT lacks wide character support
strlength_wide(const wchar_t * s)221 	PUGI__FN size_t strlength_wide(const wchar_t* s)
222 	{
223 		assert(s);
224 
225 	#ifdef PUGIXML_WCHAR_MODE
226 		return wcslen(s);
227 	#else
228 		const wchar_t* end = s;
229 		while (*end) end++;
230 		return static_cast<size_t>(end - s);
231 	#endif
232 	}
233 PUGI__NS_END
234 
235 // auto_ptr-like object for exception recovery
236 PUGI__NS_BEGIN
237 	template <typename T> struct auto_deleter
238 	{
239 		typedef void (*D)(T*);
240 
241 		T* data;
242 		D deleter;
243 
auto_deleterauto_deleter244 		auto_deleter(T* data_, D deleter_): data(data_), deleter(deleter_)
245 		{
246 		}
247 
~auto_deleterauto_deleter248 		~auto_deleter()
249 		{
250 			if (data) deleter(data);
251 		}
252 
releaseauto_deleter253 		T* release()
254 		{
255 			T* result = data;
256 			data = 0;
257 			return result;
258 		}
259 	};
260 PUGI__NS_END
261 
262 #ifdef PUGIXML_COMPACT
263 PUGI__NS_BEGIN
264 	class compact_hash_table
265 	{
266 	public:
compact_hash_table()267 		compact_hash_table(): _items(0), _capacity(0), _count(0)
268 		{
269 		}
270 
clear()271 		void clear()
272 		{
273 			if (_items)
274 			{
275 				xml_memory::deallocate(_items);
276 				_items = 0;
277 				_capacity = 0;
278 				_count = 0;
279 			}
280 		}
281 
find(const void * key)282 		void** find(const void* key)
283 		{
284 			assert(key);
285 
286 			if (_capacity == 0) return 0;
287 
288 			size_t hashmod = _capacity - 1;
289 			size_t bucket = hash(key) & hashmod;
290 
291 			for (size_t probe = 0; probe <= hashmod; ++probe)
292 			{
293 				item_t& probe_item = _items[bucket];
294 
295 				if (probe_item.key == key)
296 					return &probe_item.value;
297 
298 				if (probe_item.key == 0)
299 					return 0;
300 
301 				// hash collision, quadratic probing
302 				bucket = (bucket + probe + 1) & hashmod;
303 			}
304 
305 			assert(false && "Hash table is full");
306 			return 0;
307 		}
308 
insert(const void * key)309 		void** insert(const void* key)
310 		{
311 			assert(key);
312 			assert(_capacity != 0 && _count < _capacity - _capacity / 4);
313 
314 			size_t hashmod = _capacity - 1;
315 			size_t bucket = hash(key) & hashmod;
316 
317 			for (size_t probe = 0; probe <= hashmod; ++probe)
318 			{
319 				item_t& probe_item = _items[bucket];
320 
321 				if (probe_item.key == 0)
322 				{
323 					probe_item.key = key;
324 					_count++;
325 					return &probe_item.value;
326 				}
327 
328 				if (probe_item.key == key)
329 					return &probe_item.value;
330 
331 				// hash collision, quadratic probing
332 				bucket = (bucket + probe + 1) & hashmod;
333 			}
334 
335 			assert(false && "Hash table is full");
336 			return 0;
337 		}
338 
reserve()339 		bool reserve()
340 		{
341 			if (_count + 16 >= _capacity - _capacity / 4)
342 				return rehash();
343 
344 			return true;
345 		}
346 
347 	private:
348 		struct item_t
349 		{
350 			const void* key;
351 			void* value;
352 		};
353 
354 		item_t* _items;
355 		size_t _capacity;
356 
357 		size_t _count;
358 
359 		bool rehash();
360 
hash(const void * key)361 		static unsigned int hash(const void* key)
362 		{
363 			unsigned int h = static_cast<unsigned int>(reinterpret_cast<uintptr_t>(key));
364 
365 			// MurmurHash3 32-bit finalizer
366 			h ^= h >> 16;
367 			h *= 0x85ebca6bu;
368 			h ^= h >> 13;
369 			h *= 0xc2b2ae35u;
370 			h ^= h >> 16;
371 
372 			return h;
373 		}
374 	};
375 
rehash()376 	PUGI__FN_NO_INLINE bool compact_hash_table::rehash()
377 	{
378 		compact_hash_table rt;
379 		rt._capacity = (_capacity == 0) ? 32 : _capacity * 2;
380 		rt._items = static_cast<item_t*>(xml_memory::allocate(sizeof(item_t) * rt._capacity));
381 
382 		if (!rt._items)
383 			return false;
384 
385 		memset(rt._items, 0, sizeof(item_t) * rt._capacity);
386 
387 		for (size_t i = 0; i < _capacity; ++i)
388 			if (_items[i].key)
389 				*rt.insert(_items[i].key) = _items[i].value;
390 
391 		if (_items)
392 			xml_memory::deallocate(_items);
393 
394 		_capacity = rt._capacity;
395 		_items = rt._items;
396 
397 		assert(_count == rt._count);
398 
399 		return true;
400 	}
401 
402 PUGI__NS_END
403 #endif
404 
405 PUGI__NS_BEGIN
406 #ifdef PUGIXML_COMPACT
407 	static const uintptr_t xml_memory_block_alignment = 4;
408 #else
409 	static const uintptr_t xml_memory_block_alignment = sizeof(void*);
410 #endif
411 
412 	// extra metadata bits
413 	static const uintptr_t xml_memory_page_contents_shared_mask = 64;
414 	static const uintptr_t xml_memory_page_name_allocated_mask = 32;
415 	static const uintptr_t xml_memory_page_value_allocated_mask = 16;
416 	static const uintptr_t xml_memory_page_type_mask = 15;
417 
418 	// combined masks for string uniqueness
419 	static const uintptr_t xml_memory_page_name_allocated_or_shared_mask = xml_memory_page_name_allocated_mask | xml_memory_page_contents_shared_mask;
420 	static const uintptr_t xml_memory_page_value_allocated_or_shared_mask = xml_memory_page_value_allocated_mask | xml_memory_page_contents_shared_mask;
421 
422 #ifdef PUGIXML_COMPACT
423 	#define PUGI__GETHEADER_IMPL(object, page, flags) // unused
424 	#define PUGI__GETPAGE_IMPL(header) (header).get_page()
425 #else
426 	#define PUGI__GETHEADER_IMPL(object, page, flags) (((reinterpret_cast<char*>(object) - reinterpret_cast<char*>(page)) << 8) | (flags))
427 	// this macro casts pointers through void* to avoid 'cast increases required alignment of target type' warnings
428 	#define PUGI__GETPAGE_IMPL(header) static_cast<impl::xml_memory_page*>(const_cast<void*>(static_cast<const void*>(reinterpret_cast<const char*>(&header) - (header >> 8))))
429 #endif
430 
431 	#define PUGI__GETPAGE(n) PUGI__GETPAGE_IMPL((n)->header)
432 	#define PUGI__NODETYPE(n) static_cast<xml_node_type>((n)->header & impl::xml_memory_page_type_mask)
433 
434 	struct xml_allocator;
435 
436 	struct xml_memory_page
437 	{
constructxml_memory_page438 		static xml_memory_page* construct(void* memory)
439 		{
440 			xml_memory_page* result = static_cast<xml_memory_page*>(memory);
441 
442 			result->allocator = 0;
443 			result->prev = 0;
444 			result->next = 0;
445 			result->busy_size = 0;
446 			result->freed_size = 0;
447 
448 		#ifdef PUGIXML_COMPACT
449 			result->compact_string_base = 0;
450 			result->compact_shared_parent = 0;
451 			result->compact_page_marker = 0;
452 		#endif
453 
454 			return result;
455 		}
456 
457 		xml_allocator* allocator;
458 
459 		xml_memory_page* prev;
460 		xml_memory_page* next;
461 
462 		size_t busy_size;
463 		size_t freed_size;
464 
465 	#ifdef PUGIXML_COMPACT
466 		char_t* compact_string_base;
467 		void* compact_shared_parent;
468 		uint32_t* compact_page_marker;
469 	#endif
470 	};
471 
472 	static const size_t xml_memory_page_size =
473 	#ifdef PUGIXML_MEMORY_PAGE_SIZE
474 		(PUGIXML_MEMORY_PAGE_SIZE)
475 	#else
476 		32768
477 	#endif
478 		- sizeof(xml_memory_page);
479 
480 	struct xml_memory_string_header
481 	{
482 		uint16_t page_offset; // offset from page->data
483 		uint16_t full_size; // 0 if string occupies whole page
484 	};
485 
486 	struct xml_allocator
487 	{
xml_allocatorxml_allocator488 		xml_allocator(xml_memory_page* root): _root(root), _busy_size(root->busy_size)
489 		{
490 		#ifdef PUGIXML_COMPACT
491 			_hash = 0;
492 		#endif
493 		}
494 
allocate_pagexml_allocator495 		xml_memory_page* allocate_page(size_t data_size)
496 		{
497 			size_t size = sizeof(xml_memory_page) + data_size;
498 
499 			// allocate block with some alignment, leaving memory for worst-case padding
500 			void* memory = xml_memory::allocate(size);
501 			if (!memory) return 0;
502 
503 			// prepare page structure
504 			xml_memory_page* page = xml_memory_page::construct(memory);
505 			assert(page);
506 
507 			page->allocator = _root->allocator;
508 
509 			return page;
510 		}
511 
deallocate_pagexml_allocator512 		static void deallocate_page(xml_memory_page* page)
513 		{
514 			xml_memory::deallocate(page);
515 		}
516 
517 		void* allocate_memory_oob(size_t size, xml_memory_page*& out_page);
518 
allocate_memoryxml_allocator519 		void* allocate_memory(size_t size, xml_memory_page*& out_page)
520 		{
521 			if (PUGI__UNLIKELY(_busy_size + size > xml_memory_page_size))
522 				return allocate_memory_oob(size, out_page);
523 
524 			void* buf = reinterpret_cast<char*>(_root) + sizeof(xml_memory_page) + _busy_size;
525 
526 			_busy_size += size;
527 
528 			out_page = _root;
529 
530 			return buf;
531 		}
532 
533 	#ifdef PUGIXML_COMPACT
allocate_objectxml_allocator534 		void* allocate_object(size_t size, xml_memory_page*& out_page)
535 		{
536 			void* result = allocate_memory(size + sizeof(uint32_t), out_page);
537 			if (!result) return 0;
538 
539 			// adjust for marker
540 			ptrdiff_t offset = static_cast<char*>(result) - reinterpret_cast<char*>(out_page->compact_page_marker);
541 
542 			if (PUGI__UNLIKELY(static_cast<uintptr_t>(offset) >= 256 * xml_memory_block_alignment))
543 			{
544 				// insert new marker
545 				uint32_t* marker = static_cast<uint32_t*>(result);
546 
547 				*marker = static_cast<uint32_t>(reinterpret_cast<char*>(marker) - reinterpret_cast<char*>(out_page));
548 				out_page->compact_page_marker = marker;
549 
550 				// since we don't reuse the page space until we reallocate it, we can just pretend that we freed the marker block
551 				// this will make sure deallocate_memory correctly tracks the size
552 				out_page->freed_size += sizeof(uint32_t);
553 
554 				return marker + 1;
555 			}
556 			else
557 			{
558 				// roll back uint32_t part
559 				_busy_size -= sizeof(uint32_t);
560 
561 				return result;
562 			}
563 		}
564 	#else
allocate_objectxml_allocator565 		void* allocate_object(size_t size, xml_memory_page*& out_page)
566 		{
567 			return allocate_memory(size, out_page);
568 		}
569 	#endif
570 
deallocate_memoryxml_allocator571 		void deallocate_memory(void* ptr, size_t size, xml_memory_page* page)
572 		{
573 			if (page == _root) page->busy_size = _busy_size;
574 
575 			assert(ptr >= reinterpret_cast<char*>(page) + sizeof(xml_memory_page) && ptr < reinterpret_cast<char*>(page) + sizeof(xml_memory_page) + page->busy_size);
576 			(void)!ptr;
577 
578 			page->freed_size += size;
579 			assert(page->freed_size <= page->busy_size);
580 
581 			if (page->freed_size == page->busy_size)
582 			{
583 				if (page->next == 0)
584 				{
585 					assert(_root == page);
586 
587 					// top page freed, just reset sizes
588 					page->busy_size = 0;
589 					page->freed_size = 0;
590 
591 				#ifdef PUGIXML_COMPACT
592 					// reset compact state to maximize efficiency
593 					page->compact_string_base = 0;
594 					page->compact_shared_parent = 0;
595 					page->compact_page_marker = 0;
596 				#endif
597 
598 					_busy_size = 0;
599 				}
600 				else
601 				{
602 					assert(_root != page);
603 					assert(page->prev);
604 
605 					// remove from the list
606 					page->prev->next = page->next;
607 					page->next->prev = page->prev;
608 
609 					// deallocate
610 					deallocate_page(page);
611 				}
612 			}
613 		}
614 
allocate_stringxml_allocator615 		char_t* allocate_string(size_t length)
616 		{
617 			static const size_t max_encoded_offset = (1 << 16) * xml_memory_block_alignment;
618 
619 			PUGI__STATIC_ASSERT(xml_memory_page_size <= max_encoded_offset);
620 
621 			// allocate memory for string and header block
622 			size_t size = sizeof(xml_memory_string_header) + length * sizeof(char_t);
623 
624 			// round size up to block alignment boundary
625 			size_t full_size = (size + (xml_memory_block_alignment - 1)) & ~(xml_memory_block_alignment - 1);
626 
627 			xml_memory_page* page;
628 			xml_memory_string_header* header = static_cast<xml_memory_string_header*>(allocate_memory(full_size, page));
629 
630 			if (!header) return 0;
631 
632 			// setup header
633 			ptrdiff_t page_offset = reinterpret_cast<char*>(header) - reinterpret_cast<char*>(page) - sizeof(xml_memory_page);
634 
635 			assert(page_offset % xml_memory_block_alignment == 0);
636 			assert(page_offset >= 0 && static_cast<size_t>(page_offset) < max_encoded_offset);
637 			header->page_offset = static_cast<uint16_t>(static_cast<size_t>(page_offset) / xml_memory_block_alignment);
638 
639 			// full_size == 0 for large strings that occupy the whole page
640 			assert(full_size % xml_memory_block_alignment == 0);
641 			assert(full_size < max_encoded_offset || (page->busy_size == full_size && page_offset == 0));
642 			header->full_size = static_cast<uint16_t>(full_size < max_encoded_offset ? full_size / xml_memory_block_alignment : 0);
643 
644 			// round-trip through void* to avoid 'cast increases required alignment of target type' warning
645 			// header is guaranteed a pointer-sized alignment, which should be enough for char_t
646 			return static_cast<char_t*>(static_cast<void*>(header + 1));
647 		}
648 
deallocate_stringxml_allocator649 		void deallocate_string(char_t* string)
650 		{
651 			// this function casts pointers through void* to avoid 'cast increases required alignment of target type' warnings
652 			// we're guaranteed the proper (pointer-sized) alignment on the input string if it was allocated via allocate_string
653 
654 			// get header
655 			xml_memory_string_header* header = static_cast<xml_memory_string_header*>(static_cast<void*>(string)) - 1;
656 			assert(header);
657 
658 			// deallocate
659 			size_t page_offset = sizeof(xml_memory_page) + header->page_offset * xml_memory_block_alignment;
660 			xml_memory_page* page = reinterpret_cast<xml_memory_page*>(static_cast<void*>(reinterpret_cast<char*>(header) - page_offset));
661 
662 			// if full_size == 0 then this string occupies the whole page
663 			size_t full_size = header->full_size == 0 ? page->busy_size : header->full_size * xml_memory_block_alignment;
664 
665 			deallocate_memory(header, full_size, page);
666 		}
667 
reservexml_allocator668 		bool reserve()
669 		{
670 		#ifdef PUGIXML_COMPACT
671 			return _hash->reserve();
672 		#else
673 			return true;
674 		#endif
675 		}
676 
677 		xml_memory_page* _root;
678 		size_t _busy_size;
679 
680 	#ifdef PUGIXML_COMPACT
681 		compact_hash_table* _hash;
682 	#endif
683 	};
684 
allocate_memory_oob(size_t size,xml_memory_page * & out_page)685 	PUGI__FN_NO_INLINE void* xml_allocator::allocate_memory_oob(size_t size, xml_memory_page*& out_page)
686 	{
687 		const size_t large_allocation_threshold = xml_memory_page_size / 4;
688 
689 		xml_memory_page* page = allocate_page(size <= large_allocation_threshold ? xml_memory_page_size : size);
690 		out_page = page;
691 
692 		if (!page) return 0;
693 
694 		if (size <= large_allocation_threshold)
695 		{
696 			_root->busy_size = _busy_size;
697 
698 			// insert page at the end of linked list
699 			page->prev = _root;
700 			_root->next = page;
701 			_root = page;
702 
703 			_busy_size = size;
704 		}
705 		else
706 		{
707 			// insert page before the end of linked list, so that it is deleted as soon as possible
708 			// the last page is not deleted even if it's empty (see deallocate_memory)
709 			assert(_root->prev);
710 
711 			page->prev = _root->prev;
712 			page->next = _root;
713 
714 			_root->prev->next = page;
715 			_root->prev = page;
716 
717 			page->busy_size = size;
718 		}
719 
720 		return reinterpret_cast<char*>(page) + sizeof(xml_memory_page);
721 	}
722 PUGI__NS_END
723 
724 #ifdef PUGIXML_COMPACT
725 PUGI__NS_BEGIN
726 	static const uintptr_t compact_alignment_log2 = 2;
727 	static const uintptr_t compact_alignment = 1 << compact_alignment_log2;
728 
729 	class compact_header
730 	{
731 	public:
compact_header(xml_memory_page * page,unsigned int flags)732 		compact_header(xml_memory_page* page, unsigned int flags)
733 		{
734 			PUGI__STATIC_ASSERT(xml_memory_block_alignment == compact_alignment);
735 
736 			ptrdiff_t offset = (reinterpret_cast<char*>(this) - reinterpret_cast<char*>(page->compact_page_marker));
737 			assert(offset % compact_alignment == 0 && static_cast<uintptr_t>(offset) < 256 * compact_alignment);
738 
739 			_page = static_cast<unsigned char>(offset >> compact_alignment_log2);
740 			_flags = static_cast<unsigned char>(flags);
741 		}
742 
operator &=(uintptr_t mod)743 		void operator&=(uintptr_t mod)
744 		{
745 			_flags &= static_cast<unsigned char>(mod);
746 		}
747 
operator |=(uintptr_t mod)748 		void operator|=(uintptr_t mod)
749 		{
750 			_flags |= static_cast<unsigned char>(mod);
751 		}
752 
operator &(uintptr_t mod) const753 		uintptr_t operator&(uintptr_t mod) const
754 		{
755 			return _flags & mod;
756 		}
757 
get_page() const758 		xml_memory_page* get_page() const
759 		{
760 			// round-trip through void* to silence 'cast increases required alignment of target type' warnings
761 			const char* page_marker = reinterpret_cast<const char*>(this) - (_page << compact_alignment_log2);
762 			const char* page = page_marker - *reinterpret_cast<const uint32_t*>(static_cast<const void*>(page_marker));
763 
764 			return const_cast<xml_memory_page*>(reinterpret_cast<const xml_memory_page*>(static_cast<const void*>(page)));
765 		}
766 
767 	private:
768 		unsigned char _page;
769 		unsigned char _flags;
770 	};
771 
compact_get_page(const void * object,int header_offset)772 	PUGI__FN xml_memory_page* compact_get_page(const void* object, int header_offset)
773 	{
774 		const compact_header* header = reinterpret_cast<const compact_header*>(static_cast<const char*>(object) - header_offset);
775 
776 		return header->get_page();
777 	}
778 
compact_get_value(const void * object)779 	template <int header_offset, typename T> PUGI__FN_NO_INLINE T* compact_get_value(const void* object)
780 	{
781 		return static_cast<T*>(*compact_get_page(object, header_offset)->allocator->_hash->find(object));
782 	}
783 
compact_set_value(const void * object,T * value)784 	template <int header_offset, typename T> PUGI__FN_NO_INLINE void compact_set_value(const void* object, T* value)
785 	{
786 		*compact_get_page(object, header_offset)->allocator->_hash->insert(object) = value;
787 	}
788 
789 	template <typename T, int header_offset, int start = -126> class compact_pointer
790 	{
791 	public:
compact_pointer()792 		compact_pointer(): _data(0)
793 		{
794 		}
795 
operator =(const compact_pointer & rhs)796 		void operator=(const compact_pointer& rhs)
797 		{
798 			*this = rhs + 0;
799 		}
800 
operator =(T * value)801 		void operator=(T* value)
802 		{
803 			if (value)
804 			{
805 				// value is guaranteed to be compact-aligned; 'this' is not
806 				// our decoding is based on 'this' aligned to compact alignment downwards (see operator T*)
807 				// so for negative offsets (e.g. -3) we need to adjust the diff by compact_alignment - 1 to
808 				// compensate for arithmetic shift rounding for negative values
809 				ptrdiff_t diff = reinterpret_cast<char*>(value) - reinterpret_cast<char*>(this);
810 				ptrdiff_t offset = ((diff + int(compact_alignment - 1)) >> compact_alignment_log2) - start;
811 
812 				if (static_cast<uintptr_t>(offset) <= 253)
813 					_data = static_cast<unsigned char>(offset + 1);
814 				else
815 				{
816 					compact_set_value<header_offset>(this, value);
817 
818 					_data = 255;
819 				}
820 			}
821 			else
822 				_data = 0;
823 		}
824 
operator T*() const825 		operator T*() const
826 		{
827 			if (_data)
828 			{
829 				if (_data < 255)
830 				{
831 					uintptr_t base = reinterpret_cast<uintptr_t>(this) & ~(compact_alignment - 1);
832 
833 					return reinterpret_cast<T*>(base + ((_data - 1 + start) << compact_alignment_log2));
834 				}
835 				else
836 					return compact_get_value<header_offset, T>(this);
837 			}
838 			else
839 				return 0;
840 		}
841 
operator ->() const842 		T* operator->() const
843 		{
844 			return *this;
845 		}
846 
847 	private:
848 		unsigned char _data;
849 	};
850 
851 	template <typename T, int header_offset> class compact_pointer_parent
852 	{
853 	public:
compact_pointer_parent()854 		compact_pointer_parent(): _data(0)
855 		{
856 		}
857 
operator =(const compact_pointer_parent & rhs)858 		void operator=(const compact_pointer_parent& rhs)
859 		{
860 			*this = rhs + 0;
861 		}
862 
operator =(T * value)863 		void operator=(T* value)
864 		{
865 			if (value)
866 			{
867 				// value is guaranteed to be compact-aligned; 'this' is not
868 				// our decoding is based on 'this' aligned to compact alignment downwards (see operator T*)
869 				// so for negative offsets (e.g. -3) we need to adjust the diff by compact_alignment - 1 to
870 				// compensate for arithmetic shift behavior for negative values
871 				ptrdiff_t diff = reinterpret_cast<char*>(value) - reinterpret_cast<char*>(this);
872 				ptrdiff_t offset = ((diff + int(compact_alignment - 1)) >> compact_alignment_log2) + 65533;
873 
874 				if (static_cast<uintptr_t>(offset) <= 65533)
875 				{
876 					_data = static_cast<unsigned short>(offset + 1);
877 				}
878 				else
879 				{
880 					xml_memory_page* page = compact_get_page(this, header_offset);
881 
882 					if (PUGI__UNLIKELY(page->compact_shared_parent == 0))
883 						page->compact_shared_parent = value;
884 
885 					if (page->compact_shared_parent == value)
886 					{
887 						_data = 65534;
888 					}
889 					else
890 					{
891 						compact_set_value<header_offset>(this, value);
892 
893 						_data = 65535;
894 					}
895 				}
896 			}
897 			else
898 			{
899 				_data = 0;
900 			}
901 		}
902 
operator T*() const903 		operator T*() const
904 		{
905 			if (_data)
906 			{
907 				if (_data < 65534)
908 				{
909 					uintptr_t base = reinterpret_cast<uintptr_t>(this) & ~(compact_alignment - 1);
910 
911 					return reinterpret_cast<T*>(base + ((_data - 1 - 65533) << compact_alignment_log2));
912 				}
913 				else if (_data == 65534)
914 					return static_cast<T*>(compact_get_page(this, header_offset)->compact_shared_parent);
915 				else
916 					return compact_get_value<header_offset, T>(this);
917 			}
918 			else
919 				return 0;
920 		}
921 
operator ->() const922 		T* operator->() const
923 		{
924 			return *this;
925 		}
926 
927 	private:
928 		uint16_t _data;
929 	};
930 
931 	template <int header_offset, int base_offset> class compact_string
932 	{
933 	public:
compact_string()934 		compact_string(): _data(0)
935 		{
936 		}
937 
operator =(const compact_string & rhs)938 		void operator=(const compact_string& rhs)
939 		{
940 			*this = rhs + 0;
941 		}
942 
operator =(char_t * value)943 		void operator=(char_t* value)
944 		{
945 			if (value)
946 			{
947 				xml_memory_page* page = compact_get_page(this, header_offset);
948 
949 				if (PUGI__UNLIKELY(page->compact_string_base == 0))
950 					page->compact_string_base = value;
951 
952 				ptrdiff_t offset = value - page->compact_string_base;
953 
954 				if (static_cast<uintptr_t>(offset) < (65535 << 7))
955 				{
956 					// round-trip through void* to silence 'cast increases required alignment of target type' warnings
957 					uint16_t* base = reinterpret_cast<uint16_t*>(static_cast<void*>(reinterpret_cast<char*>(this) - base_offset));
958 
959 					if (*base == 0)
960 					{
961 						*base = static_cast<uint16_t>((offset >> 7) + 1);
962 						_data = static_cast<unsigned char>((offset & 127) + 1);
963 					}
964 					else
965 					{
966 						ptrdiff_t remainder = offset - ((*base - 1) << 7);
967 
968 						if (static_cast<uintptr_t>(remainder) <= 253)
969 						{
970 							_data = static_cast<unsigned char>(remainder + 1);
971 						}
972 						else
973 						{
974 							compact_set_value<header_offset>(this, value);
975 
976 							_data = 255;
977 						}
978 					}
979 				}
980 				else
981 				{
982 					compact_set_value<header_offset>(this, value);
983 
984 					_data = 255;
985 				}
986 			}
987 			else
988 			{
989 				_data = 0;
990 			}
991 		}
992 
operator char_t*() const993 		operator char_t*() const
994 		{
995 			if (_data)
996 			{
997 				if (_data < 255)
998 				{
999 					xml_memory_page* page = compact_get_page(this, header_offset);
1000 
1001 					// round-trip through void* to silence 'cast increases required alignment of target type' warnings
1002 					const uint16_t* base = reinterpret_cast<const uint16_t*>(static_cast<const void*>(reinterpret_cast<const char*>(this) - base_offset));
1003 					assert(*base);
1004 
1005 					ptrdiff_t offset = ((*base - 1) << 7) + (_data - 1);
1006 
1007 					return page->compact_string_base + offset;
1008 				}
1009 				else
1010 				{
1011 					return compact_get_value<header_offset, char_t>(this);
1012 				}
1013 			}
1014 			else
1015 				return 0;
1016 		}
1017 
1018 	private:
1019 		unsigned char _data;
1020 	};
1021 PUGI__NS_END
1022 #endif
1023 
1024 #ifdef PUGIXML_COMPACT
1025 namespace pugi
1026 {
1027 	struct xml_attribute_struct
1028 	{
xml_attribute_structpugi::xml_attribute_struct1029 		xml_attribute_struct(impl::xml_memory_page* page): header(page, 0), namevalue_base(0)
1030 		{
1031 			PUGI__STATIC_ASSERT(sizeof(xml_attribute_struct) == 8);
1032 		}
1033 
1034 		impl::compact_header header;
1035 
1036 		uint16_t namevalue_base;
1037 
1038 		impl::compact_string<4, 2> name;
1039 		impl::compact_string<5, 3> value;
1040 
1041 		impl::compact_pointer<xml_attribute_struct, 6> prev_attribute_c;
1042 		impl::compact_pointer<xml_attribute_struct, 7, 0> next_attribute;
1043 	};
1044 
1045 	struct xml_node_struct
1046 	{
xml_node_structpugi::xml_node_struct1047 		xml_node_struct(impl::xml_memory_page* page, xml_node_type type): header(page, type), namevalue_base(0)
1048 		{
1049 			PUGI__STATIC_ASSERT(sizeof(xml_node_struct) == 12);
1050 		}
1051 
1052 		impl::compact_header header;
1053 
1054 		uint16_t namevalue_base;
1055 
1056 		impl::compact_string<4, 2> name;
1057 		impl::compact_string<5, 3> value;
1058 
1059 		impl::compact_pointer_parent<xml_node_struct, 6> parent;
1060 
1061 		impl::compact_pointer<xml_node_struct, 8, 0> first_child;
1062 
1063 		impl::compact_pointer<xml_node_struct,  9>    prev_sibling_c;
1064 		impl::compact_pointer<xml_node_struct, 10, 0> next_sibling;
1065 
1066 		impl::compact_pointer<xml_attribute_struct, 11, 0> first_attribute;
1067 	};
1068 }
1069 #else
1070 namespace pugi
1071 {
1072 	struct xml_attribute_struct
1073 	{
1074 		xml_attribute_struct(impl::xml_memory_page* page): name(0), value(0), prev_attribute_c(0), next_attribute(0)
1075 		{
1076 			header = PUGI__GETHEADER_IMPL(this, page, 0);
1077 		}
1078 
1079 		uintptr_t header;
1080 
1081 		char_t*	name;
1082 		char_t*	value;
1083 
1084 		xml_attribute_struct* prev_attribute_c;
1085 		xml_attribute_struct* next_attribute;
1086 	};
1087 
1088 	struct xml_node_struct
1089 	{
1090 		xml_node_struct(impl::xml_memory_page* page, xml_node_type type): name(0), value(0), parent(0), first_child(0), prev_sibling_c(0), next_sibling(0), first_attribute(0)
1091 		{
1092 			header = PUGI__GETHEADER_IMPL(this, page, type);
1093 		}
1094 
1095 		uintptr_t header;
1096 
1097 		char_t* name;
1098 		char_t* value;
1099 
1100 		xml_node_struct* parent;
1101 
1102 		xml_node_struct* first_child;
1103 
1104 		xml_node_struct* prev_sibling_c;
1105 		xml_node_struct* next_sibling;
1106 
1107 		xml_attribute_struct* first_attribute;
1108 	};
1109 }
1110 #endif
1111 
1112 PUGI__NS_BEGIN
1113 	struct xml_extra_buffer
1114 	{
1115 		char_t* buffer;
1116 		xml_extra_buffer* next;
1117 	};
1118 
1119 	struct xml_document_struct: public xml_node_struct, public xml_allocator
1120 	{
xml_document_structxml_document_struct1121 		xml_document_struct(xml_memory_page* page): xml_node_struct(page, node_document), xml_allocator(page), buffer(0), extra_buffers(0)
1122 		{
1123 		}
1124 
1125 		const char_t* buffer;
1126 
1127 		xml_extra_buffer* extra_buffers;
1128 
1129 	#ifdef PUGIXML_COMPACT
1130 		compact_hash_table hash;
1131 	#endif
1132 	};
1133 
get_allocator(const Object * object)1134 	template <typename Object> inline xml_allocator& get_allocator(const Object* object)
1135 	{
1136 		assert(object);
1137 
1138 		return *PUGI__GETPAGE(object)->allocator;
1139 	}
1140 
get_document(const Object * object)1141 	template <typename Object> inline xml_document_struct& get_document(const Object* object)
1142 	{
1143 		assert(object);
1144 
1145 		return *static_cast<xml_document_struct*>(PUGI__GETPAGE(object)->allocator);
1146 	}
1147 PUGI__NS_END
1148 
1149 // Low-level DOM operations
1150 PUGI__NS_BEGIN
allocate_attribute(xml_allocator & alloc)1151 	inline xml_attribute_struct* allocate_attribute(xml_allocator& alloc)
1152 	{
1153 		xml_memory_page* page;
1154 		void* memory = alloc.allocate_object(sizeof(xml_attribute_struct), page);
1155 		if (!memory) return 0;
1156 
1157 		return new (memory) xml_attribute_struct(page);
1158 	}
1159 
allocate_node(xml_allocator & alloc,xml_node_type type)1160 	inline xml_node_struct* allocate_node(xml_allocator& alloc, xml_node_type type)
1161 	{
1162 		xml_memory_page* page;
1163 		void* memory = alloc.allocate_object(sizeof(xml_node_struct), page);
1164 		if (!memory) return 0;
1165 
1166 		return new (memory) xml_node_struct(page, type);
1167 	}
1168 
destroy_attribute(xml_attribute_struct * a,xml_allocator & alloc)1169 	inline void destroy_attribute(xml_attribute_struct* a, xml_allocator& alloc)
1170 	{
1171 		if (a->header & impl::xml_memory_page_name_allocated_mask)
1172 			alloc.deallocate_string(a->name);
1173 
1174 		if (a->header & impl::xml_memory_page_value_allocated_mask)
1175 			alloc.deallocate_string(a->value);
1176 
1177 		alloc.deallocate_memory(a, sizeof(xml_attribute_struct), PUGI__GETPAGE(a));
1178 	}
1179 
destroy_node(xml_node_struct * n,xml_allocator & alloc)1180 	inline void destroy_node(xml_node_struct* n, xml_allocator& alloc)
1181 	{
1182 		if (n->header & impl::xml_memory_page_name_allocated_mask)
1183 			alloc.deallocate_string(n->name);
1184 
1185 		if (n->header & impl::xml_memory_page_value_allocated_mask)
1186 			alloc.deallocate_string(n->value);
1187 
1188 		for (xml_attribute_struct* attr = n->first_attribute; attr; )
1189 		{
1190 			xml_attribute_struct* next = attr->next_attribute;
1191 
1192 			destroy_attribute(attr, alloc);
1193 
1194 			attr = next;
1195 		}
1196 
1197 		for (xml_node_struct* child = n->first_child; child; )
1198 		{
1199 			xml_node_struct* next = child->next_sibling;
1200 
1201 			destroy_node(child, alloc);
1202 
1203 			child = next;
1204 		}
1205 
1206 		alloc.deallocate_memory(n, sizeof(xml_node_struct), PUGI__GETPAGE(n));
1207 	}
1208 
append_node(xml_node_struct * child,xml_node_struct * node)1209 	inline void append_node(xml_node_struct* child, xml_node_struct* node)
1210 	{
1211 		child->parent = node;
1212 
1213 		xml_node_struct* head = node->first_child;
1214 
1215 		if (head)
1216 		{
1217 			xml_node_struct* tail = head->prev_sibling_c;
1218 
1219 			tail->next_sibling = child;
1220 			child->prev_sibling_c = tail;
1221 			head->prev_sibling_c = child;
1222 		}
1223 		else
1224 		{
1225 			node->first_child = child;
1226 			child->prev_sibling_c = child;
1227 		}
1228 	}
1229 
prepend_node(xml_node_struct * child,xml_node_struct * node)1230 	inline void prepend_node(xml_node_struct* child, xml_node_struct* node)
1231 	{
1232 		child->parent = node;
1233 
1234 		xml_node_struct* head = node->first_child;
1235 
1236 		if (head)
1237 		{
1238 			child->prev_sibling_c = head->prev_sibling_c;
1239 			head->prev_sibling_c = child;
1240 		}
1241 		else
1242 			child->prev_sibling_c = child;
1243 
1244 		child->next_sibling = head;
1245 		node->first_child = child;
1246 	}
1247 
insert_node_after(xml_node_struct * child,xml_node_struct * node)1248 	inline void insert_node_after(xml_node_struct* child, xml_node_struct* node)
1249 	{
1250 		xml_node_struct* parent = node->parent;
1251 
1252 		child->parent = parent;
1253 
1254 		if (node->next_sibling)
1255 			node->next_sibling->prev_sibling_c = child;
1256 		else
1257 			parent->first_child->prev_sibling_c = child;
1258 
1259 		child->next_sibling = node->next_sibling;
1260 		child->prev_sibling_c = node;
1261 
1262 		node->next_sibling = child;
1263 	}
1264 
insert_node_before(xml_node_struct * child,xml_node_struct * node)1265 	inline void insert_node_before(xml_node_struct* child, xml_node_struct* node)
1266 	{
1267 		xml_node_struct* parent = node->parent;
1268 
1269 		child->parent = parent;
1270 
1271 		if (node->prev_sibling_c->next_sibling)
1272 			node->prev_sibling_c->next_sibling = child;
1273 		else
1274 			parent->first_child = child;
1275 
1276 		child->prev_sibling_c = node->prev_sibling_c;
1277 		child->next_sibling = node;
1278 
1279 		node->prev_sibling_c = child;
1280 	}
1281 
remove_node(xml_node_struct * node)1282 	inline void remove_node(xml_node_struct* node)
1283 	{
1284 		xml_node_struct* parent = node->parent;
1285 
1286 		if (node->next_sibling)
1287 			node->next_sibling->prev_sibling_c = node->prev_sibling_c;
1288 		else
1289 			parent->first_child->prev_sibling_c = node->prev_sibling_c;
1290 
1291 		if (node->prev_sibling_c->next_sibling)
1292 			node->prev_sibling_c->next_sibling = node->next_sibling;
1293 		else
1294 			parent->first_child = node->next_sibling;
1295 
1296 		node->parent = 0;
1297 		node->prev_sibling_c = 0;
1298 		node->next_sibling = 0;
1299 	}
1300 
append_attribute(xml_attribute_struct * attr,xml_node_struct * node)1301 	inline void append_attribute(xml_attribute_struct* attr, xml_node_struct* node)
1302 	{
1303 		xml_attribute_struct* head = node->first_attribute;
1304 
1305 		if (head)
1306 		{
1307 			xml_attribute_struct* tail = head->prev_attribute_c;
1308 
1309 			tail->next_attribute = attr;
1310 			attr->prev_attribute_c = tail;
1311 			head->prev_attribute_c = attr;
1312 		}
1313 		else
1314 		{
1315 			node->first_attribute = attr;
1316 			attr->prev_attribute_c = attr;
1317 		}
1318 	}
1319 
prepend_attribute(xml_attribute_struct * attr,xml_node_struct * node)1320 	inline void prepend_attribute(xml_attribute_struct* attr, xml_node_struct* node)
1321 	{
1322 		xml_attribute_struct* head = node->first_attribute;
1323 
1324 		if (head)
1325 		{
1326 			attr->prev_attribute_c = head->prev_attribute_c;
1327 			head->prev_attribute_c = attr;
1328 		}
1329 		else
1330 			attr->prev_attribute_c = attr;
1331 
1332 		attr->next_attribute = head;
1333 		node->first_attribute = attr;
1334 	}
1335 
insert_attribute_after(xml_attribute_struct * attr,xml_attribute_struct * place,xml_node_struct * node)1336 	inline void insert_attribute_after(xml_attribute_struct* attr, xml_attribute_struct* place, xml_node_struct* node)
1337 	{
1338 		if (place->next_attribute)
1339 			place->next_attribute->prev_attribute_c = attr;
1340 		else
1341 			node->first_attribute->prev_attribute_c = attr;
1342 
1343 		attr->next_attribute = place->next_attribute;
1344 		attr->prev_attribute_c = place;
1345 		place->next_attribute = attr;
1346 	}
1347 
insert_attribute_before(xml_attribute_struct * attr,xml_attribute_struct * place,xml_node_struct * node)1348 	inline void insert_attribute_before(xml_attribute_struct* attr, xml_attribute_struct* place, xml_node_struct* node)
1349 	{
1350 		if (place->prev_attribute_c->next_attribute)
1351 			place->prev_attribute_c->next_attribute = attr;
1352 		else
1353 			node->first_attribute = attr;
1354 
1355 		attr->prev_attribute_c = place->prev_attribute_c;
1356 		attr->next_attribute = place;
1357 		place->prev_attribute_c = attr;
1358 	}
1359 
remove_attribute(xml_attribute_struct * attr,xml_node_struct * node)1360 	inline void remove_attribute(xml_attribute_struct* attr, xml_node_struct* node)
1361 	{
1362 		if (attr->next_attribute)
1363 			attr->next_attribute->prev_attribute_c = attr->prev_attribute_c;
1364 		else
1365 			node->first_attribute->prev_attribute_c = attr->prev_attribute_c;
1366 
1367 		if (attr->prev_attribute_c->next_attribute)
1368 			attr->prev_attribute_c->next_attribute = attr->next_attribute;
1369 		else
1370 			node->first_attribute = attr->next_attribute;
1371 
1372 		attr->prev_attribute_c = 0;
1373 		attr->next_attribute = 0;
1374 	}
1375 
append_new_node(xml_node_struct * node,xml_allocator & alloc,xml_node_type type=node_element)1376 	PUGI__FN_NO_INLINE xml_node_struct* append_new_node(xml_node_struct* node, xml_allocator& alloc, xml_node_type type = node_element)
1377 	{
1378 		if (!alloc.reserve()) return 0;
1379 
1380 		xml_node_struct* child = allocate_node(alloc, type);
1381 		if (!child) return 0;
1382 
1383 		append_node(child, node);
1384 
1385 		return child;
1386 	}
1387 
append_new_attribute(xml_node_struct * node,xml_allocator & alloc)1388 	PUGI__FN_NO_INLINE xml_attribute_struct* append_new_attribute(xml_node_struct* node, xml_allocator& alloc)
1389 	{
1390 		if (!alloc.reserve()) return 0;
1391 
1392 		xml_attribute_struct* attr = allocate_attribute(alloc);
1393 		if (!attr) return 0;
1394 
1395 		append_attribute(attr, node);
1396 
1397 		return attr;
1398 	}
1399 PUGI__NS_END
1400 
1401 // Helper classes for code generation
1402 PUGI__NS_BEGIN
1403 	struct opt_false
1404 	{
1405 		enum { value = 0 };
1406 	};
1407 
1408 	struct opt_true
1409 	{
1410 		enum { value = 1 };
1411 	};
1412 PUGI__NS_END
1413 
1414 // Unicode utilities
1415 PUGI__NS_BEGIN
endian_swap(uint16_t value)1416 	inline uint16_t endian_swap(uint16_t value)
1417 	{
1418 		return static_cast<uint16_t>(((value & 0xff) << 8) | (value >> 8));
1419 	}
1420 
endian_swap(uint32_t value)1421 	inline uint32_t endian_swap(uint32_t value)
1422 	{
1423 		return ((value & 0xff) << 24) | ((value & 0xff00) << 8) | ((value & 0xff0000) >> 8) | (value >> 24);
1424 	}
1425 
1426 	struct utf8_counter
1427 	{
1428 		typedef size_t value_type;
1429 
lowutf8_counter1430 		static value_type low(value_type result, uint32_t ch)
1431 		{
1432 			// U+0000..U+007F
1433 			if (ch < 0x80) return result + 1;
1434 			// U+0080..U+07FF
1435 			else if (ch < 0x800) return result + 2;
1436 			// U+0800..U+FFFF
1437 			else return result + 3;
1438 		}
1439 
highutf8_counter1440 		static value_type high(value_type result, uint32_t)
1441 		{
1442 			// U+10000..U+10FFFF
1443 			return result + 4;
1444 		}
1445 	};
1446 
1447 	struct utf8_writer
1448 	{
1449 		typedef uint8_t* value_type;
1450 
lowutf8_writer1451 		static value_type low(value_type result, uint32_t ch)
1452 		{
1453 			// U+0000..U+007F
1454 			if (ch < 0x80)
1455 			{
1456 				*result = static_cast<uint8_t>(ch);
1457 				return result + 1;
1458 			}
1459 			// U+0080..U+07FF
1460 			else if (ch < 0x800)
1461 			{
1462 				result[0] = static_cast<uint8_t>(0xC0 | (ch >> 6));
1463 				result[1] = static_cast<uint8_t>(0x80 | (ch & 0x3F));
1464 				return result + 2;
1465 			}
1466 			// U+0800..U+FFFF
1467 			else
1468 			{
1469 				result[0] = static_cast<uint8_t>(0xE0 | (ch >> 12));
1470 				result[1] = static_cast<uint8_t>(0x80 | ((ch >> 6) & 0x3F));
1471 				result[2] = static_cast<uint8_t>(0x80 | (ch & 0x3F));
1472 				return result + 3;
1473 			}
1474 		}
1475 
highutf8_writer1476 		static value_type high(value_type result, uint32_t ch)
1477 		{
1478 			// U+10000..U+10FFFF
1479 			result[0] = static_cast<uint8_t>(0xF0 | (ch >> 18));
1480 			result[1] = static_cast<uint8_t>(0x80 | ((ch >> 12) & 0x3F));
1481 			result[2] = static_cast<uint8_t>(0x80 | ((ch >> 6) & 0x3F));
1482 			result[3] = static_cast<uint8_t>(0x80 | (ch & 0x3F));
1483 			return result + 4;
1484 		}
1485 
anyutf8_writer1486 		static value_type any(value_type result, uint32_t ch)
1487 		{
1488 			return (ch < 0x10000) ? low(result, ch) : high(result, ch);
1489 		}
1490 	};
1491 
1492 	struct utf16_counter
1493 	{
1494 		typedef size_t value_type;
1495 
lowutf16_counter1496 		static value_type low(value_type result, uint32_t)
1497 		{
1498 			return result + 1;
1499 		}
1500 
highutf16_counter1501 		static value_type high(value_type result, uint32_t)
1502 		{
1503 			return result + 2;
1504 		}
1505 	};
1506 
1507 	struct utf16_writer
1508 	{
1509 		typedef uint16_t* value_type;
1510 
lowutf16_writer1511 		static value_type low(value_type result, uint32_t ch)
1512 		{
1513 			*result = static_cast<uint16_t>(ch);
1514 
1515 			return result + 1;
1516 		}
1517 
highutf16_writer1518 		static value_type high(value_type result, uint32_t ch)
1519 		{
1520 			uint32_t msh = static_cast<uint32_t>(ch - 0x10000) >> 10;
1521 			uint32_t lsh = static_cast<uint32_t>(ch - 0x10000) & 0x3ff;
1522 
1523 			result[0] = static_cast<uint16_t>(0xD800 + msh);
1524 			result[1] = static_cast<uint16_t>(0xDC00 + lsh);
1525 
1526 			return result + 2;
1527 		}
1528 
anyutf16_writer1529 		static value_type any(value_type result, uint32_t ch)
1530 		{
1531 			return (ch < 0x10000) ? low(result, ch) : high(result, ch);
1532 		}
1533 	};
1534 
1535 	struct utf32_counter
1536 	{
1537 		typedef size_t value_type;
1538 
lowutf32_counter1539 		static value_type low(value_type result, uint32_t)
1540 		{
1541 			return result + 1;
1542 		}
1543 
highutf32_counter1544 		static value_type high(value_type result, uint32_t)
1545 		{
1546 			return result + 1;
1547 		}
1548 	};
1549 
1550 	struct utf32_writer
1551 	{
1552 		typedef uint32_t* value_type;
1553 
lowutf32_writer1554 		static value_type low(value_type result, uint32_t ch)
1555 		{
1556 			*result = ch;
1557 
1558 			return result + 1;
1559 		}
1560 
highutf32_writer1561 		static value_type high(value_type result, uint32_t ch)
1562 		{
1563 			*result = ch;
1564 
1565 			return result + 1;
1566 		}
1567 
anyutf32_writer1568 		static value_type any(value_type result, uint32_t ch)
1569 		{
1570 			*result = ch;
1571 
1572 			return result + 1;
1573 		}
1574 	};
1575 
1576 	struct latin1_writer
1577 	{
1578 		typedef uint8_t* value_type;
1579 
lowlatin1_writer1580 		static value_type low(value_type result, uint32_t ch)
1581 		{
1582 			*result = static_cast<uint8_t>(ch > 255 ? '?' : ch);
1583 
1584 			return result + 1;
1585 		}
1586 
highlatin1_writer1587 		static value_type high(value_type result, uint32_t ch)
1588 		{
1589 			(void)ch;
1590 
1591 			*result = '?';
1592 
1593 			return result + 1;
1594 		}
1595 	};
1596 
1597 	struct utf8_decoder
1598 	{
1599 		typedef uint8_t type;
1600 
processutf8_decoder1601 		template <typename Traits> static inline typename Traits::value_type process(const uint8_t* data, size_t size, typename Traits::value_type result, Traits)
1602 		{
1603 			const uint8_t utf8_byte_mask = 0x3f;
1604 
1605 			while (size)
1606 			{
1607 				uint8_t lead = *data;
1608 
1609 				// 0xxxxxxx -> U+0000..U+007F
1610 				if (lead < 0x80)
1611 				{
1612 					result = Traits::low(result, lead);
1613 					data += 1;
1614 					size -= 1;
1615 
1616 					// process aligned single-byte (ascii) blocks
1617 					if ((reinterpret_cast<uintptr_t>(data) & 3) == 0)
1618 					{
1619 						// round-trip through void* to silence 'cast increases required alignment of target type' warnings
1620 						while (size >= 4 && (*static_cast<const uint32_t*>(static_cast<const void*>(data)) & 0x80808080) == 0)
1621 						{
1622 							result = Traits::low(result, data[0]);
1623 							result = Traits::low(result, data[1]);
1624 							result = Traits::low(result, data[2]);
1625 							result = Traits::low(result, data[3]);
1626 							data += 4;
1627 							size -= 4;
1628 						}
1629 					}
1630 				}
1631 				// 110xxxxx -> U+0080..U+07FF
1632 				else if (static_cast<unsigned int>(lead - 0xC0) < 0x20 && size >= 2 && (data[1] & 0xc0) == 0x80)
1633 				{
1634 					result = Traits::low(result, ((lead & ~0xC0) << 6) | (data[1] & utf8_byte_mask));
1635 					data += 2;
1636 					size -= 2;
1637 				}
1638 				// 1110xxxx -> U+0800-U+FFFF
1639 				else if (static_cast<unsigned int>(lead - 0xE0) < 0x10 && size >= 3 && (data[1] & 0xc0) == 0x80 && (data[2] & 0xc0) == 0x80)
1640 				{
1641 					result = Traits::low(result, ((lead & ~0xE0) << 12) | ((data[1] & utf8_byte_mask) << 6) | (data[2] & utf8_byte_mask));
1642 					data += 3;
1643 					size -= 3;
1644 				}
1645 				// 11110xxx -> U+10000..U+10FFFF
1646 				else if (static_cast<unsigned int>(lead - 0xF0) < 0x08 && size >= 4 && (data[1] & 0xc0) == 0x80 && (data[2] & 0xc0) == 0x80 && (data[3] & 0xc0) == 0x80)
1647 				{
1648 					result = Traits::high(result, ((lead & ~0xF0) << 18) | ((data[1] & utf8_byte_mask) << 12) | ((data[2] & utf8_byte_mask) << 6) | (data[3] & utf8_byte_mask));
1649 					data += 4;
1650 					size -= 4;
1651 				}
1652 				// 10xxxxxx or 11111xxx -> invalid
1653 				else
1654 				{
1655 					data += 1;
1656 					size -= 1;
1657 				}
1658 			}
1659 
1660 			return result;
1661 		}
1662 	};
1663 
1664 	template <typename opt_swap> struct utf16_decoder
1665 	{
1666 		typedef uint16_t type;
1667 
processutf16_decoder1668 		template <typename Traits> static inline typename Traits::value_type process(const uint16_t* data, size_t size, typename Traits::value_type result, Traits)
1669 		{
1670 			while (size)
1671 			{
1672 				uint16_t lead = opt_swap::value ? endian_swap(*data) : *data;
1673 
1674 				// U+0000..U+D7FF
1675 				if (lead < 0xD800)
1676 				{
1677 					result = Traits::low(result, lead);
1678 					data += 1;
1679 					size -= 1;
1680 				}
1681 				// U+E000..U+FFFF
1682 				else if (static_cast<unsigned int>(lead - 0xE000) < 0x2000)
1683 				{
1684 					result = Traits::low(result, lead);
1685 					data += 1;
1686 					size -= 1;
1687 				}
1688 				// surrogate pair lead
1689 				else if (static_cast<unsigned int>(lead - 0xD800) < 0x400 && size >= 2)
1690 				{
1691 					uint16_t next = opt_swap::value ? endian_swap(data[1]) : data[1];
1692 
1693 					if (static_cast<unsigned int>(next - 0xDC00) < 0x400)
1694 					{
1695 						result = Traits::high(result, 0x10000 + ((lead & 0x3ff) << 10) + (next & 0x3ff));
1696 						data += 2;
1697 						size -= 2;
1698 					}
1699 					else
1700 					{
1701 						data += 1;
1702 						size -= 1;
1703 					}
1704 				}
1705 				else
1706 				{
1707 					data += 1;
1708 					size -= 1;
1709 				}
1710 			}
1711 
1712 			return result;
1713 		}
1714 	};
1715 
1716 	template <typename opt_swap> struct utf32_decoder
1717 	{
1718 		typedef uint32_t type;
1719 
processutf32_decoder1720 		template <typename Traits> static inline typename Traits::value_type process(const uint32_t* data, size_t size, typename Traits::value_type result, Traits)
1721 		{
1722 			while (size)
1723 			{
1724 				uint32_t lead = opt_swap::value ? endian_swap(*data) : *data;
1725 
1726 				// U+0000..U+FFFF
1727 				if (lead < 0x10000)
1728 				{
1729 					result = Traits::low(result, lead);
1730 					data += 1;
1731 					size -= 1;
1732 				}
1733 				// U+10000..U+10FFFF
1734 				else
1735 				{
1736 					result = Traits::high(result, lead);
1737 					data += 1;
1738 					size -= 1;
1739 				}
1740 			}
1741 
1742 			return result;
1743 		}
1744 	};
1745 
1746 	struct latin1_decoder
1747 	{
1748 		typedef uint8_t type;
1749 
processlatin1_decoder1750 		template <typename Traits> static inline typename Traits::value_type process(const uint8_t* data, size_t size, typename Traits::value_type result, Traits)
1751 		{
1752 			while (size)
1753 			{
1754 				result = Traits::low(result, *data);
1755 				data += 1;
1756 				size -= 1;
1757 			}
1758 
1759 			return result;
1760 		}
1761 	};
1762 
1763 	template <size_t size> struct wchar_selector;
1764 
1765 	template <> struct wchar_selector<2>
1766 	{
1767 		typedef uint16_t type;
1768 		typedef utf16_counter counter;
1769 		typedef utf16_writer writer;
1770 		typedef utf16_decoder<opt_false> decoder;
1771 	};
1772 
1773 	template <> struct wchar_selector<4>
1774 	{
1775 		typedef uint32_t type;
1776 		typedef utf32_counter counter;
1777 		typedef utf32_writer writer;
1778 		typedef utf32_decoder<opt_false> decoder;
1779 	};
1780 
1781 	typedef wchar_selector<sizeof(wchar_t)>::counter wchar_counter;
1782 	typedef wchar_selector<sizeof(wchar_t)>::writer wchar_writer;
1783 
1784 	struct wchar_decoder
1785 	{
1786 		typedef wchar_t type;
1787 
processwchar_decoder1788 		template <typename Traits> static inline typename Traits::value_type process(const wchar_t* data, size_t size, typename Traits::value_type result, Traits traits)
1789 		{
1790 			typedef wchar_selector<sizeof(wchar_t)>::decoder decoder;
1791 
1792 			return decoder::process(reinterpret_cast<const typename decoder::type*>(data), size, result, traits);
1793 		}
1794 	};
1795 
1796 #ifdef PUGIXML_WCHAR_MODE
convert_wchar_endian_swap(wchar_t * result,const wchar_t * data,size_t length)1797 	PUGI__FN void convert_wchar_endian_swap(wchar_t* result, const wchar_t* data, size_t length)
1798 	{
1799 		for (size_t i = 0; i < length; ++i)
1800 			result[i] = static_cast<wchar_t>(endian_swap(static_cast<wchar_selector<sizeof(wchar_t)>::type>(data[i])));
1801 	}
1802 #endif
1803 PUGI__NS_END
1804 
1805 PUGI__NS_BEGIN
1806 	enum chartype_t
1807 	{
1808 		ct_parse_pcdata = 1,	// \0, &, \r, <
1809 		ct_parse_attr = 2,		// \0, &, \r, ', "
1810 		ct_parse_attr_ws = 4,	// \0, &, \r, ', ", \n, tab
1811 		ct_space = 8,			// \r, \n, space, tab
1812 		ct_parse_cdata = 16,	// \0, ], >, \r
1813 		ct_parse_comment = 32,	// \0, -, >, \r
1814 		ct_symbol = 64,			// Any symbol > 127, a-z, A-Z, 0-9, _, :, -, .
1815 		ct_start_symbol = 128	// Any symbol > 127, a-z, A-Z, _, :
1816 	};
1817 
1818 	static const unsigned char chartype_table[256] =
1819 	{
1820 		55,  0,   0,   0,   0,   0,   0,   0,      0,   12,  12,  0,   0,   63,  0,   0,   // 0-15
1821 		0,   0,   0,   0,   0,   0,   0,   0,      0,   0,   0,   0,   0,   0,   0,   0,   // 16-31
1822 		8,   0,   6,   0,   0,   0,   7,   6,      0,   0,   0,   0,   0,   96,  64,  0,   // 32-47
1823 		64,  64,  64,  64,  64,  64,  64,  64,     64,  64,  192, 0,   1,   0,   48,  0,   // 48-63
1824 		0,   192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 192, 192, 192, 192, 192, // 64-79
1825 		192, 192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 0,   0,   16,  0,   192, // 80-95
1826 		0,   192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 192, 192, 192, 192, 192, // 96-111
1827 		192, 192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 0, 0, 0, 0, 0,           // 112-127
1828 
1829 		192, 192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 192, 192, 192, 192, 192, // 128+
1830 		192, 192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 192, 192, 192, 192, 192,
1831 		192, 192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 192, 192, 192, 192, 192,
1832 		192, 192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 192, 192, 192, 192, 192,
1833 		192, 192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 192, 192, 192, 192, 192,
1834 		192, 192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 192, 192, 192, 192, 192,
1835 		192, 192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 192, 192, 192, 192, 192,
1836 		192, 192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 192, 192, 192, 192, 192
1837 	};
1838 
1839 	enum chartypex_t
1840 	{
1841 		ctx_special_pcdata = 1,   // Any symbol >= 0 and < 32 (except \t, \r, \n), &, <, >
1842 		ctx_special_attr = 2,     // Any symbol >= 0 and < 32 (except \t), &, <, >, "
1843 		ctx_start_symbol = 4,	  // Any symbol > 127, a-z, A-Z, _
1844 		ctx_digit = 8,			  // 0-9
1845 		ctx_symbol = 16			  // Any symbol > 127, a-z, A-Z, 0-9, _, -, .
1846 	};
1847 
1848 	static const unsigned char chartypex_table[256] =
1849 	{
1850 		3,  3,  3,  3,  3,  3,  3,  3,     3,  0,  2,  3,  3,  2,  3,  3,     // 0-15
1851 		3,  3,  3,  3,  3,  3,  3,  3,     3,  3,  3,  3,  3,  3,  3,  3,     // 16-31
1852 		0,  0,  2,  0,  0,  0,  3,  0,     0,  0,  0,  0,  0, 16, 16,  0,     // 32-47
1853 		24, 24, 24, 24, 24, 24, 24, 24,    24, 24, 0,  0,  3,  0,  3,  0,     // 48-63
1854 
1855 		0,  20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 20, 20, 20, 20, 20,    // 64-79
1856 		20, 20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 0,  0,  0,  0,  20,    // 80-95
1857 		0,  20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 20, 20, 20, 20, 20,    // 96-111
1858 		20, 20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 0,  0,  0,  0,  0,     // 112-127
1859 
1860 		20, 20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 20, 20, 20, 20, 20,    // 128+
1861 		20, 20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 20, 20, 20, 20, 20,
1862 		20, 20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 20, 20, 20, 20, 20,
1863 		20, 20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 20, 20, 20, 20, 20,
1864 		20, 20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 20, 20, 20, 20, 20,
1865 		20, 20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 20, 20, 20, 20, 20,
1866 		20, 20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 20, 20, 20, 20, 20,
1867 		20, 20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 20, 20, 20, 20, 20
1868 	};
1869 
1870 #ifdef PUGIXML_WCHAR_MODE
1871 	#define PUGI__IS_CHARTYPE_IMPL(c, ct, table) ((static_cast<unsigned int>(c) < 128 ? table[static_cast<unsigned int>(c)] : table[128]) & (ct))
1872 #else
1873 	#define PUGI__IS_CHARTYPE_IMPL(c, ct, table) (table[static_cast<unsigned char>(c)] & (ct))
1874 #endif
1875 
1876 	#define PUGI__IS_CHARTYPE(c, ct) PUGI__IS_CHARTYPE_IMPL(c, ct, chartype_table)
1877 	#define PUGI__IS_CHARTYPEX(c, ct) PUGI__IS_CHARTYPE_IMPL(c, ct, chartypex_table)
1878 
is_little_endian()1879 	PUGI__FN bool is_little_endian()
1880 	{
1881 		unsigned int ui = 1;
1882 
1883 		return *reinterpret_cast<unsigned char*>(&ui) == 1;
1884 	}
1885 
get_wchar_encoding()1886 	PUGI__FN xml_encoding get_wchar_encoding()
1887 	{
1888 		PUGI__STATIC_ASSERT(sizeof(wchar_t) == 2 || sizeof(wchar_t) == 4);
1889 
1890 		if (sizeof(wchar_t) == 2)
1891 			return is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
1892 		else
1893 			return is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
1894 	}
1895 
parse_declaration_encoding(const uint8_t * data,size_t size,const uint8_t * & out_encoding,size_t & out_length)1896 	PUGI__FN bool parse_declaration_encoding(const uint8_t* data, size_t size, const uint8_t*& out_encoding, size_t& out_length)
1897 	{
1898 	#define PUGI__SCANCHAR(ch) { if (offset >= size || data[offset] != ch) return false; offset++; }
1899 	#define PUGI__SCANCHARTYPE(ct) { while (offset < size && PUGI__IS_CHARTYPE(data[offset], ct)) offset++; }
1900 
1901 		// check if we have a non-empty XML declaration
1902 		if (size < 6 || !((data[0] == '<') & (data[1] == '?') & (data[2] == 'x') & (data[3] == 'm') & (data[4] == 'l') && PUGI__IS_CHARTYPE(data[5], ct_space)))
1903 			return false;
1904 
1905 		// scan XML declaration until the encoding field
1906 		for (size_t i = 6; i + 1 < size; ++i)
1907 		{
1908 			// declaration can not contain ? in quoted values
1909 			if (data[i] == '?')
1910 				return false;
1911 
1912 			if (data[i] == 'e' && data[i + 1] == 'n')
1913 			{
1914 				size_t offset = i;
1915 
1916 				// encoding follows the version field which can't contain 'en' so this has to be the encoding if XML is well formed
1917 				PUGI__SCANCHAR('e'); PUGI__SCANCHAR('n'); PUGI__SCANCHAR('c'); PUGI__SCANCHAR('o');
1918 				PUGI__SCANCHAR('d'); PUGI__SCANCHAR('i'); PUGI__SCANCHAR('n'); PUGI__SCANCHAR('g');
1919 
1920 				// S? = S?
1921 				PUGI__SCANCHARTYPE(ct_space);
1922 				PUGI__SCANCHAR('=');
1923 				PUGI__SCANCHARTYPE(ct_space);
1924 
1925 				// the only two valid delimiters are ' and "
1926 				uint8_t delimiter = (offset < size && data[offset] == '"') ? '"' : '\'';
1927 
1928 				PUGI__SCANCHAR(delimiter);
1929 
1930 				size_t start = offset;
1931 
1932 				out_encoding = data + offset;
1933 
1934 				PUGI__SCANCHARTYPE(ct_symbol);
1935 
1936 				out_length = offset - start;
1937 
1938 				PUGI__SCANCHAR(delimiter);
1939 
1940 				return true;
1941 			}
1942 		}
1943 
1944 		return false;
1945 
1946 	#undef PUGI__SCANCHAR
1947 	#undef PUGI__SCANCHARTYPE
1948 	}
1949 
guess_buffer_encoding(const uint8_t * data,size_t size)1950 	PUGI__FN xml_encoding guess_buffer_encoding(const uint8_t* data, size_t size)
1951 	{
1952 		// skip encoding autodetection if input buffer is too small
1953 		if (size < 4) return encoding_utf8;
1954 
1955 		uint8_t d0 = data[0], d1 = data[1], d2 = data[2], d3 = data[3];
1956 
1957 		// look for BOM in first few bytes
1958 		if (d0 == 0 && d1 == 0 && d2 == 0xfe && d3 == 0xff) return encoding_utf32_be;
1959 		if (d0 == 0xff && d1 == 0xfe && d2 == 0 && d3 == 0) return encoding_utf32_le;
1960 		if (d0 == 0xfe && d1 == 0xff) return encoding_utf16_be;
1961 		if (d0 == 0xff && d1 == 0xfe) return encoding_utf16_le;
1962 		if (d0 == 0xef && d1 == 0xbb && d2 == 0xbf) return encoding_utf8;
1963 
1964 		// look for <, <? or <?xm in various encodings
1965 		if (d0 == 0 && d1 == 0 && d2 == 0 && d3 == 0x3c) return encoding_utf32_be;
1966 		if (d0 == 0x3c && d1 == 0 && d2 == 0 && d3 == 0) return encoding_utf32_le;
1967 		if (d0 == 0 && d1 == 0x3c && d2 == 0 && d3 == 0x3f) return encoding_utf16_be;
1968 		if (d0 == 0x3c && d1 == 0 && d2 == 0x3f && d3 == 0) return encoding_utf16_le;
1969 
1970 		// look for utf16 < followed by node name (this may fail, but is better than utf8 since it's zero terminated so early)
1971 		if (d0 == 0 && d1 == 0x3c) return encoding_utf16_be;
1972 		if (d0 == 0x3c && d1 == 0) return encoding_utf16_le;
1973 
1974 		// no known BOM detected; parse declaration
1975 		const uint8_t* enc = 0;
1976 		size_t enc_length = 0;
1977 
1978 		if (d0 == 0x3c && d1 == 0x3f && d2 == 0x78 && d3 == 0x6d && parse_declaration_encoding(data, size, enc, enc_length))
1979 		{
1980 			// iso-8859-1 (case-insensitive)
1981 			if (enc_length == 10
1982 				&& (enc[0] | ' ') == 'i' && (enc[1] | ' ') == 's' && (enc[2] | ' ') == 'o'
1983 				&& enc[3] == '-' && enc[4] == '8' && enc[5] == '8' && enc[6] == '5' && enc[7] == '9'
1984 				&& enc[8] == '-' && enc[9] == '1')
1985 				return encoding_latin1;
1986 
1987 			// latin1 (case-insensitive)
1988 			if (enc_length == 6
1989 				&& (enc[0] | ' ') == 'l' && (enc[1] | ' ') == 'a' && (enc[2] | ' ') == 't'
1990 				&& (enc[3] | ' ') == 'i' && (enc[4] | ' ') == 'n'
1991 				&& enc[5] == '1')
1992 				return encoding_latin1;
1993 		}
1994 
1995 		return encoding_utf8;
1996 	}
1997 
get_buffer_encoding(xml_encoding encoding,const void * contents,size_t size)1998 	PUGI__FN xml_encoding get_buffer_encoding(xml_encoding encoding, const void* contents, size_t size)
1999 	{
2000 		// replace wchar encoding with utf implementation
2001 		if (encoding == encoding_wchar) return get_wchar_encoding();
2002 
2003 		// replace utf16 encoding with utf16 with specific endianness
2004 		if (encoding == encoding_utf16) return is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
2005 
2006 		// replace utf32 encoding with utf32 with specific endianness
2007 		if (encoding == encoding_utf32) return is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
2008 
2009 		// only do autodetection if no explicit encoding is requested
2010 		if (encoding != encoding_auto) return encoding;
2011 
2012 		// try to guess encoding (based on XML specification, Appendix F.1)
2013 		const uint8_t* data = static_cast<const uint8_t*>(contents);
2014 
2015 		return guess_buffer_encoding(data, size);
2016 	}
2017 
get_mutable_buffer(char_t * & out_buffer,size_t & out_length,const void * contents,size_t size,bool is_mutable)2018 	PUGI__FN bool get_mutable_buffer(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable)
2019 	{
2020 		size_t length = size / sizeof(char_t);
2021 
2022 		if (is_mutable)
2023 		{
2024 			out_buffer = static_cast<char_t*>(const_cast<void*>(contents));
2025 			out_length = length;
2026 		}
2027 		else
2028 		{
2029 			char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
2030 			if (!buffer) return false;
2031 
2032 			if (contents)
2033 				memcpy(buffer, contents, length * sizeof(char_t));
2034 			else
2035 				assert(length == 0);
2036 
2037 			buffer[length] = 0;
2038 
2039 			out_buffer = buffer;
2040 			out_length = length + 1;
2041 		}
2042 
2043 		return true;
2044 	}
2045 
2046 #ifdef PUGIXML_WCHAR_MODE
need_endian_swap_utf(xml_encoding le,xml_encoding re)2047 	PUGI__FN bool need_endian_swap_utf(xml_encoding le, xml_encoding re)
2048 	{
2049 		return (le == encoding_utf16_be && re == encoding_utf16_le) || (le == encoding_utf16_le && re == encoding_utf16_be) ||
2050 			   (le == encoding_utf32_be && re == encoding_utf32_le) || (le == encoding_utf32_le && re == encoding_utf32_be);
2051 	}
2052 
convert_buffer_endian_swap(char_t * & out_buffer,size_t & out_length,const void * contents,size_t size,bool is_mutable)2053 	PUGI__FN bool convert_buffer_endian_swap(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable)
2054 	{
2055 		const char_t* data = static_cast<const char_t*>(contents);
2056 		size_t length = size / sizeof(char_t);
2057 
2058 		if (is_mutable)
2059 		{
2060 			char_t* buffer = const_cast<char_t*>(data);
2061 
2062 			convert_wchar_endian_swap(buffer, data, length);
2063 
2064 			out_buffer = buffer;
2065 			out_length = length;
2066 		}
2067 		else
2068 		{
2069 			char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
2070 			if (!buffer) return false;
2071 
2072 			convert_wchar_endian_swap(buffer, data, length);
2073 			buffer[length] = 0;
2074 
2075 			out_buffer = buffer;
2076 			out_length = length + 1;
2077 		}
2078 
2079 		return true;
2080 	}
2081 
convert_buffer_generic(char_t * & out_buffer,size_t & out_length,const void * contents,size_t size,D)2082 	template <typename D> PUGI__FN bool convert_buffer_generic(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, D)
2083 	{
2084 		const typename D::type* data = static_cast<const typename D::type*>(contents);
2085 		size_t data_length = size / sizeof(typename D::type);
2086 
2087 		// first pass: get length in wchar_t units
2088 		size_t length = D::process(data, data_length, 0, wchar_counter());
2089 
2090 		// allocate buffer of suitable length
2091 		char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
2092 		if (!buffer) return false;
2093 
2094 		// second pass: convert utf16 input to wchar_t
2095 		wchar_writer::value_type obegin = reinterpret_cast<wchar_writer::value_type>(buffer);
2096 		wchar_writer::value_type oend = D::process(data, data_length, obegin, wchar_writer());
2097 
2098 		assert(oend == obegin + length);
2099 		*oend = 0;
2100 
2101 		out_buffer = buffer;
2102 		out_length = length + 1;
2103 
2104 		return true;
2105 	}
2106 
convert_buffer(char_t * & out_buffer,size_t & out_length,xml_encoding encoding,const void * contents,size_t size,bool is_mutable)2107 	PUGI__FN bool convert_buffer(char_t*& out_buffer, size_t& out_length, xml_encoding encoding, const void* contents, size_t size, bool is_mutable)
2108 	{
2109 		// get native encoding
2110 		xml_encoding wchar_encoding = get_wchar_encoding();
2111 
2112 		// fast path: no conversion required
2113 		if (encoding == wchar_encoding)
2114 			return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable);
2115 
2116 		// only endian-swapping is required
2117 		if (need_endian_swap_utf(encoding, wchar_encoding))
2118 			return convert_buffer_endian_swap(out_buffer, out_length, contents, size, is_mutable);
2119 
2120 		// source encoding is utf8
2121 		if (encoding == encoding_utf8)
2122 			return convert_buffer_generic(out_buffer, out_length, contents, size, utf8_decoder());
2123 
2124 		// source encoding is utf16
2125 		if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
2126 		{
2127 			xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
2128 
2129 			return (native_encoding == encoding) ?
2130 				convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder<opt_false>()) :
2131 				convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder<opt_true>());
2132 		}
2133 
2134 		// source encoding is utf32
2135 		if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)
2136 		{
2137 			xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
2138 
2139 			return (native_encoding == encoding) ?
2140 				convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder<opt_false>()) :
2141 				convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder<opt_true>());
2142 		}
2143 
2144 		// source encoding is latin1
2145 		if (encoding == encoding_latin1)
2146 			return convert_buffer_generic(out_buffer, out_length, contents, size, latin1_decoder());
2147 
2148 		assert(false && "Invalid encoding");
2149 		return false;
2150 	}
2151 #else
convert_buffer_generic(char_t * & out_buffer,size_t & out_length,const void * contents,size_t size,D)2152 	template <typename D> PUGI__FN bool convert_buffer_generic(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, D)
2153 	{
2154 		const typename D::type* data = static_cast<const typename D::type*>(contents);
2155 		size_t data_length = size / sizeof(typename D::type);
2156 
2157 		// first pass: get length in utf8 units
2158 		size_t length = D::process(data, data_length, 0, utf8_counter());
2159 
2160 		// allocate buffer of suitable length
2161 		char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
2162 		if (!buffer) return false;
2163 
2164 		// second pass: convert utf16 input to utf8
2165 		uint8_t* obegin = reinterpret_cast<uint8_t*>(buffer);
2166 		uint8_t* oend = D::process(data, data_length, obegin, utf8_writer());
2167 
2168 		assert(oend == obegin + length);
2169 		*oend = 0;
2170 
2171 		out_buffer = buffer;
2172 		out_length = length + 1;
2173 
2174 		return true;
2175 	}
2176 
get_latin1_7bit_prefix_length(const uint8_t * data,size_t size)2177 	PUGI__FN size_t get_latin1_7bit_prefix_length(const uint8_t* data, size_t size)
2178 	{
2179 		for (size_t i = 0; i < size; ++i)
2180 			if (data[i] > 127)
2181 				return i;
2182 
2183 		return size;
2184 	}
2185 
convert_buffer_latin1(char_t * & out_buffer,size_t & out_length,const void * contents,size_t size,bool is_mutable)2186 	PUGI__FN bool convert_buffer_latin1(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable)
2187 	{
2188 		const uint8_t* data = static_cast<const uint8_t*>(contents);
2189 		size_t data_length = size;
2190 
2191 		// get size of prefix that does not need utf8 conversion
2192 		size_t prefix_length = get_latin1_7bit_prefix_length(data, data_length);
2193 		assert(prefix_length <= data_length);
2194 
2195 		const uint8_t* postfix = data + prefix_length;
2196 		size_t postfix_length = data_length - prefix_length;
2197 
2198 		// if no conversion is needed, just return the original buffer
2199 		if (postfix_length == 0) return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable);
2200 
2201 		// first pass: get length in utf8 units
2202 		size_t length = prefix_length + latin1_decoder::process(postfix, postfix_length, 0, utf8_counter());
2203 
2204 		// allocate buffer of suitable length
2205 		char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
2206 		if (!buffer) return false;
2207 
2208 		// second pass: convert latin1 input to utf8
2209 		memcpy(buffer, data, prefix_length);
2210 
2211 		uint8_t* obegin = reinterpret_cast<uint8_t*>(buffer);
2212 		uint8_t* oend = latin1_decoder::process(postfix, postfix_length, obegin + prefix_length, utf8_writer());
2213 
2214 		assert(oend == obegin + length);
2215 		*oend = 0;
2216 
2217 		out_buffer = buffer;
2218 		out_length = length + 1;
2219 
2220 		return true;
2221 	}
2222 
convert_buffer(char_t * & out_buffer,size_t & out_length,xml_encoding encoding,const void * contents,size_t size,bool is_mutable)2223 	PUGI__FN bool convert_buffer(char_t*& out_buffer, size_t& out_length, xml_encoding encoding, const void* contents, size_t size, bool is_mutable)
2224 	{
2225 		// fast path: no conversion required
2226 		if (encoding == encoding_utf8)
2227 			return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable);
2228 
2229 		// source encoding is utf16
2230 		if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
2231 		{
2232 			xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
2233 
2234 			return (native_encoding == encoding) ?
2235 				convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder<opt_false>()) :
2236 				convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder<opt_true>());
2237 		}
2238 
2239 		// source encoding is utf32
2240 		if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)
2241 		{
2242 			xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
2243 
2244 			return (native_encoding == encoding) ?
2245 				convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder<opt_false>()) :
2246 				convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder<opt_true>());
2247 		}
2248 
2249 		// source encoding is latin1
2250 		if (encoding == encoding_latin1)
2251 			return convert_buffer_latin1(out_buffer, out_length, contents, size, is_mutable);
2252 
2253 		assert(false && "Invalid encoding");
2254 		return false;
2255 	}
2256 #endif
2257 
as_utf8_begin(const wchar_t * str,size_t length)2258 	PUGI__FN size_t as_utf8_begin(const wchar_t* str, size_t length)
2259 	{
2260 		// get length in utf8 characters
2261 		return wchar_decoder::process(str, length, 0, utf8_counter());
2262 	}
2263 
as_utf8_end(char * buffer,size_t size,const wchar_t * str,size_t length)2264 	PUGI__FN void as_utf8_end(char* buffer, size_t size, const wchar_t* str, size_t length)
2265 	{
2266 		// convert to utf8
2267 		uint8_t* begin = reinterpret_cast<uint8_t*>(buffer);
2268 		uint8_t* end = wchar_decoder::process(str, length, begin, utf8_writer());
2269 
2270 		assert(begin + size == end);
2271 		(void)!end;
2272 		(void)!size;
2273 	}
2274 
2275 #ifndef PUGIXML_NO_STL
as_utf8_impl(const wchar_t * str,size_t length)2276 	PUGI__FN std::string as_utf8_impl(const wchar_t* str, size_t length)
2277 	{
2278 		// first pass: get length in utf8 characters
2279 		size_t size = as_utf8_begin(str, length);
2280 
2281 		// allocate resulting string
2282 		std::string result;
2283 		result.resize(size);
2284 
2285 		// second pass: convert to utf8
2286 		if (size > 0) as_utf8_end(&result[0], size, str, length);
2287 
2288 		return result;
2289 	}
2290 
as_wide_impl(const char * str,size_t size)2291 	PUGI__FN std::basic_string<wchar_t> as_wide_impl(const char* str, size_t size)
2292 	{
2293 		const uint8_t* data = reinterpret_cast<const uint8_t*>(str);
2294 
2295 		// first pass: get length in wchar_t units
2296 		size_t length = utf8_decoder::process(data, size, 0, wchar_counter());
2297 
2298 		// allocate resulting string
2299 		std::basic_string<wchar_t> result;
2300 		result.resize(length);
2301 
2302 		// second pass: convert to wchar_t
2303 		if (length > 0)
2304 		{
2305 			wchar_writer::value_type begin = reinterpret_cast<wchar_writer::value_type>(&result[0]);
2306 			wchar_writer::value_type end = utf8_decoder::process(data, size, begin, wchar_writer());
2307 
2308 			assert(begin + length == end);
2309 			(void)!end;
2310 		}
2311 
2312 		return result;
2313 	}
2314 #endif
2315 
2316 	template <typename Header>
strcpy_insitu_allow(size_t length,const Header & header,uintptr_t header_mask,char_t * target)2317 	inline bool strcpy_insitu_allow(size_t length, const Header& header, uintptr_t header_mask, char_t* target)
2318 	{
2319 		// never reuse shared memory
2320 		if (header & xml_memory_page_contents_shared_mask) return false;
2321 
2322 		size_t target_length = strlength(target);
2323 
2324 		// always reuse document buffer memory if possible
2325 		if ((header & header_mask) == 0) return target_length >= length;
2326 
2327 		// reuse heap memory if waste is not too great
2328 		const size_t reuse_threshold = 32;
2329 
2330 		return target_length >= length && (target_length < reuse_threshold || target_length - length < target_length / 2);
2331 	}
2332 
2333 	template <typename String, typename Header>
strcpy_insitu(String & dest,Header & header,uintptr_t header_mask,const char_t * source,size_t source_length)2334 	PUGI__FN bool strcpy_insitu(String& dest, Header& header, uintptr_t header_mask, const char_t* source, size_t source_length)
2335 	{
2336 		if (source_length == 0)
2337 		{
2338 			// empty string and null pointer are equivalent, so just deallocate old memory
2339 			xml_allocator* alloc = PUGI__GETPAGE_IMPL(header)->allocator;
2340 
2341 			if (header & header_mask) alloc->deallocate_string(dest);
2342 
2343 			// mark the string as not allocated
2344 			dest = 0;
2345 			header &= ~header_mask;
2346 
2347 			return true;
2348 		}
2349 		else if (dest && strcpy_insitu_allow(source_length, header, header_mask, dest))
2350 		{
2351 			// we can reuse old buffer, so just copy the new data (including zero terminator)
2352 			memcpy(dest, source, source_length * sizeof(char_t));
2353 			dest[source_length] = 0;
2354 
2355 			return true;
2356 		}
2357 		else
2358 		{
2359 			xml_allocator* alloc = PUGI__GETPAGE_IMPL(header)->allocator;
2360 
2361 			if (!alloc->reserve()) return false;
2362 
2363 			// allocate new buffer
2364 			char_t* buf = alloc->allocate_string(source_length + 1);
2365 			if (!buf) return false;
2366 
2367 			// copy the string (including zero terminator)
2368 			memcpy(buf, source, source_length * sizeof(char_t));
2369 			buf[source_length] = 0;
2370 
2371 			// deallocate old buffer (*after* the above to protect against overlapping memory and/or allocation failures)
2372 			if (header & header_mask) alloc->deallocate_string(dest);
2373 
2374 			// the string is now allocated, so set the flag
2375 			dest = buf;
2376 			header |= header_mask;
2377 
2378 			return true;
2379 		}
2380 	}
2381 
2382 	struct gap
2383 	{
2384 		char_t* end;
2385 		size_t size;
2386 
gapgap2387 		gap(): end(0), size(0)
2388 		{
2389 		}
2390 
2391 		// Push new gap, move s count bytes further (skipping the gap).
2392 		// Collapse previous gap.
pushgap2393 		void push(char_t*& s, size_t count)
2394 		{
2395 			if (end) // there was a gap already; collapse it
2396 			{
2397 				// Move [old_gap_end, new_gap_start) to [old_gap_start, ...)
2398 				assert(s >= end);
2399 				memmove(end - size, end, reinterpret_cast<char*>(s) - reinterpret_cast<char*>(end));
2400 			}
2401 
2402 			s += count; // end of current gap
2403 
2404 			// "merge" two gaps
2405 			end = s;
2406 			size += count;
2407 		}
2408 
2409 		// Collapse all gaps, return past-the-end pointer
flushgap2410 		char_t* flush(char_t* s)
2411 		{
2412 			if (end)
2413 			{
2414 				// Move [old_gap_end, current_pos) to [old_gap_start, ...)
2415 				assert(s >= end);
2416 				memmove(end - size, end, reinterpret_cast<char*>(s) - reinterpret_cast<char*>(end));
2417 
2418 				return s - size;
2419 			}
2420 			else return s;
2421 		}
2422 	};
2423 
strconv_escape(char_t * s,gap & g)2424 	PUGI__FN char_t* strconv_escape(char_t* s, gap& g)
2425 	{
2426 		char_t* stre = s + 1;
2427 
2428 		switch (*stre)
2429 		{
2430 			case '#':	// &#...
2431 			{
2432 				unsigned int ucsc = 0;
2433 
2434 				if (stre[1] == 'x') // &#x... (hex code)
2435 				{
2436 					stre += 2;
2437 
2438 					char_t ch = *stre;
2439 
2440 					if (ch == ';') return stre;
2441 
2442 					for (;;)
2443 					{
2444 						if (static_cast<unsigned int>(ch - '0') <= 9)
2445 							ucsc = 16 * ucsc + (ch - '0');
2446 						else if (static_cast<unsigned int>((ch | ' ') - 'a') <= 5)
2447 							ucsc = 16 * ucsc + ((ch | ' ') - 'a' + 10);
2448 						else if (ch == ';')
2449 							break;
2450 						else // cancel
2451 							return stre;
2452 
2453 						ch = *++stre;
2454 					}
2455 
2456 					++stre;
2457 				}
2458 				else	// &#... (dec code)
2459 				{
2460 					char_t ch = *++stre;
2461 
2462 					if (ch == ';') return stre;
2463 
2464 					for (;;)
2465 					{
2466 						if (static_cast<unsigned int>(static_cast<unsigned int>(ch) - '0') <= 9)
2467 							ucsc = 10 * ucsc + (ch - '0');
2468 						else if (ch == ';')
2469 							break;
2470 						else // cancel
2471 							return stre;
2472 
2473 						ch = *++stre;
2474 					}
2475 
2476 					++stre;
2477 				}
2478 
2479 			#ifdef PUGIXML_WCHAR_MODE
2480 				s = reinterpret_cast<char_t*>(wchar_writer::any(reinterpret_cast<wchar_writer::value_type>(s), ucsc));
2481 			#else
2482 				s = reinterpret_cast<char_t*>(utf8_writer::any(reinterpret_cast<uint8_t*>(s), ucsc));
2483 			#endif
2484 
2485 				g.push(s, stre - s);
2486 				return stre;
2487 			}
2488 
2489 			case 'a':	// &a
2490 			{
2491 				++stre;
2492 
2493 				if (*stre == 'm') // &am
2494 				{
2495 					if (*++stre == 'p' && *++stre == ';') // &amp;
2496 					{
2497 						*s++ = '&';
2498 						++stre;
2499 
2500 						g.push(s, stre - s);
2501 						return stre;
2502 					}
2503 				}
2504 				else if (*stre == 'p') // &ap
2505 				{
2506 					if (*++stre == 'o' && *++stre == 's' && *++stre == ';') // &apos;
2507 					{
2508 						*s++ = '\'';
2509 						++stre;
2510 
2511 						g.push(s, stre - s);
2512 						return stre;
2513 					}
2514 				}
2515 				break;
2516 			}
2517 
2518 			case 'g': // &g
2519 			{
2520 				if (*++stre == 't' && *++stre == ';') // &gt;
2521 				{
2522 					*s++ = '>';
2523 					++stre;
2524 
2525 					g.push(s, stre - s);
2526 					return stre;
2527 				}
2528 				break;
2529 			}
2530 
2531 			case 'l': // &l
2532 			{
2533 				if (*++stre == 't' && *++stre == ';') // &lt;
2534 				{
2535 					*s++ = '<';
2536 					++stre;
2537 
2538 					g.push(s, stre - s);
2539 					return stre;
2540 				}
2541 				break;
2542 			}
2543 
2544 			case 'q': // &q
2545 			{
2546 				if (*++stre == 'u' && *++stre == 'o' && *++stre == 't' && *++stre == ';') // &quot;
2547 				{
2548 					*s++ = '"';
2549 					++stre;
2550 
2551 					g.push(s, stre - s);
2552 					return stre;
2553 				}
2554 				break;
2555 			}
2556 
2557 			default:
2558 				break;
2559 		}
2560 
2561 		return stre;
2562 	}
2563 
2564 	// Parser utilities
2565 	#define PUGI__ENDSWITH(c, e)        ((c) == (e) || ((c) == 0 && endch == (e)))
2566 	#define PUGI__SKIPWS()              { while (PUGI__IS_CHARTYPE(*s, ct_space)) ++s; }
2567 	#define PUGI__OPTSET(OPT)           ( optmsk & (OPT) )
2568 	#define PUGI__PUSHNODE(TYPE)        { cursor = append_new_node(cursor, *alloc, TYPE); if (!cursor) PUGI__THROW_ERROR(status_out_of_memory, s); }
2569 	#define PUGI__POPNODE()             { cursor = cursor->parent; }
2570 	#define PUGI__SCANFOR(X)            { while (*s != 0 && !(X)) ++s; }
2571 	#define PUGI__SCANWHILE(X)          { while (X) ++s; }
2572 	#define PUGI__SCANWHILE_UNROLL(X)   { for (;;) { char_t ss = s[0]; if (PUGI__UNLIKELY(!(X))) { break; } ss = s[1]; if (PUGI__UNLIKELY(!(X))) { s += 1; break; } ss = s[2]; if (PUGI__UNLIKELY(!(X))) { s += 2; break; } ss = s[3]; if (PUGI__UNLIKELY(!(X))) { s += 3; break; } s += 4; } }
2573 	#define PUGI__ENDSEG()              { ch = *s; *s = 0; ++s; }
2574 	#define PUGI__THROW_ERROR(err, m)   return error_offset = m, error_status = err, static_cast<char_t*>(0)
2575 	#define PUGI__CHECK_ERROR(err, m)   { if (*s == 0) PUGI__THROW_ERROR(err, m); }
2576 
strconv_comment(char_t * s,char_t endch)2577 	PUGI__FN char_t* strconv_comment(char_t* s, char_t endch)
2578 	{
2579 		gap g;
2580 
2581 		while (true)
2582 		{
2583 			PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_comment));
2584 
2585 			if (*s == '\r') // Either a single 0x0d or 0x0d 0x0a pair
2586 			{
2587 				*s++ = '\n'; // replace first one with 0x0a
2588 
2589 				if (*s == '\n') g.push(s, 1);
2590 			}
2591 			else if (s[0] == '-' && s[1] == '-' && PUGI__ENDSWITH(s[2], '>')) // comment ends here
2592 			{
2593 				*g.flush(s) = 0;
2594 
2595 				return s + (s[2] == '>' ? 3 : 2);
2596 			}
2597 			else if (*s == 0)
2598 			{
2599 				return 0;
2600 			}
2601 			else ++s;
2602 		}
2603 	}
2604 
strconv_cdata(char_t * s,char_t endch)2605 	PUGI__FN char_t* strconv_cdata(char_t* s, char_t endch)
2606 	{
2607 		gap g;
2608 
2609 		while (true)
2610 		{
2611 			PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_cdata));
2612 
2613 			if (*s == '\r') // Either a single 0x0d or 0x0d 0x0a pair
2614 			{
2615 				*s++ = '\n'; // replace first one with 0x0a
2616 
2617 				if (*s == '\n') g.push(s, 1);
2618 			}
2619 			else if (s[0] == ']' && s[1] == ']' && PUGI__ENDSWITH(s[2], '>')) // CDATA ends here
2620 			{
2621 				*g.flush(s) = 0;
2622 
2623 				return s + 1;
2624 			}
2625 			else if (*s == 0)
2626 			{
2627 				return 0;
2628 			}
2629 			else ++s;
2630 		}
2631 	}
2632 
2633 	typedef char_t* (*strconv_pcdata_t)(char_t*);
2634 
2635 	template <typename opt_trim, typename opt_eol, typename opt_escape> struct strconv_pcdata_impl
2636 	{
parsestrconv_pcdata_impl2637 		static char_t* parse(char_t* s)
2638 		{
2639 			gap g;
2640 
2641 			char_t* begin = s;
2642 
2643 			while (true)
2644 			{
2645 				PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_pcdata));
2646 
2647 				if (*s == '<') // PCDATA ends here
2648 				{
2649 					char_t* end = g.flush(s);
2650 
2651 					if (opt_trim::value)
2652 						while (end > begin && PUGI__IS_CHARTYPE(end[-1], ct_space))
2653 							--end;
2654 
2655 					*end = 0;
2656 
2657 					return s + 1;
2658 				}
2659 				else if (opt_eol::value && *s == '\r') // Either a single 0x0d or 0x0d 0x0a pair
2660 				{
2661 					*s++ = '\n'; // replace first one with 0x0a
2662 
2663 					if (*s == '\n') g.push(s, 1);
2664 				}
2665 				else if (opt_escape::value && *s == '&')
2666 				{
2667 					s = strconv_escape(s, g);
2668 				}
2669 				else if (*s == 0)
2670 				{
2671 					char_t* end = g.flush(s);
2672 
2673 					if (opt_trim::value)
2674 						while (end > begin && PUGI__IS_CHARTYPE(end[-1], ct_space))
2675 							--end;
2676 
2677 					*end = 0;
2678 
2679 					return s;
2680 				}
2681 				else ++s;
2682 			}
2683 		}
2684 	};
2685 
get_strconv_pcdata(unsigned int optmask)2686 	PUGI__FN strconv_pcdata_t get_strconv_pcdata(unsigned int optmask)
2687 	{
2688 		PUGI__STATIC_ASSERT(parse_escapes == 0x10 && parse_eol == 0x20 && parse_trim_pcdata == 0x0800);
2689 
2690 		switch (((optmask >> 4) & 3) | ((optmask >> 9) & 4)) // get bitmask for flags (eol escapes trim)
2691 		{
2692 		case 0: return strconv_pcdata_impl<opt_false, opt_false, opt_false>::parse;
2693 		case 1: return strconv_pcdata_impl<opt_false, opt_false, opt_true>::parse;
2694 		case 2: return strconv_pcdata_impl<opt_false, opt_true, opt_false>::parse;
2695 		case 3: return strconv_pcdata_impl<opt_false, opt_true, opt_true>::parse;
2696 		case 4: return strconv_pcdata_impl<opt_true, opt_false, opt_false>::parse;
2697 		case 5: return strconv_pcdata_impl<opt_true, opt_false, opt_true>::parse;
2698 		case 6: return strconv_pcdata_impl<opt_true, opt_true, opt_false>::parse;
2699 		case 7: return strconv_pcdata_impl<opt_true, opt_true, opt_true>::parse;
2700 		default: assert(false); return 0; // should not get here
2701 		}
2702 	}
2703 
2704 	typedef char_t* (*strconv_attribute_t)(char_t*, char_t);
2705 
2706 	template <typename opt_escape> struct strconv_attribute_impl
2707 	{
parse_wnormstrconv_attribute_impl2708 		static char_t* parse_wnorm(char_t* s, char_t end_quote)
2709 		{
2710 			gap g;
2711 
2712 			// trim leading whitespaces
2713 			if (PUGI__IS_CHARTYPE(*s, ct_space))
2714 			{
2715 				char_t* str = s;
2716 
2717 				do ++str;
2718 				while (PUGI__IS_CHARTYPE(*str, ct_space));
2719 
2720 				g.push(s, str - s);
2721 			}
2722 
2723 			while (true)
2724 			{
2725 				PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr_ws | ct_space));
2726 
2727 				if (*s == end_quote)
2728 				{
2729 					char_t* str = g.flush(s);
2730 
2731 					do *str-- = 0;
2732 					while (PUGI__IS_CHARTYPE(*str, ct_space));
2733 
2734 					return s + 1;
2735 				}
2736 				else if (PUGI__IS_CHARTYPE(*s, ct_space))
2737 				{
2738 					*s++ = ' ';
2739 
2740 					if (PUGI__IS_CHARTYPE(*s, ct_space))
2741 					{
2742 						char_t* str = s + 1;
2743 						while (PUGI__IS_CHARTYPE(*str, ct_space)) ++str;
2744 
2745 						g.push(s, str - s);
2746 					}
2747 				}
2748 				else if (opt_escape::value && *s == '&')
2749 				{
2750 					s = strconv_escape(s, g);
2751 				}
2752 				else if (!*s)
2753 				{
2754 					return 0;
2755 				}
2756 				else ++s;
2757 			}
2758 		}
2759 
parse_wconvstrconv_attribute_impl2760 		static char_t* parse_wconv(char_t* s, char_t end_quote)
2761 		{
2762 			gap g;
2763 
2764 			while (true)
2765 			{
2766 				PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr_ws));
2767 
2768 				if (*s == end_quote)
2769 				{
2770 					*g.flush(s) = 0;
2771 
2772 					return s + 1;
2773 				}
2774 				else if (PUGI__IS_CHARTYPE(*s, ct_space))
2775 				{
2776 					if (*s == '\r')
2777 					{
2778 						*s++ = ' ';
2779 
2780 						if (*s == '\n') g.push(s, 1);
2781 					}
2782 					else *s++ = ' ';
2783 				}
2784 				else if (opt_escape::value && *s == '&')
2785 				{
2786 					s = strconv_escape(s, g);
2787 				}
2788 				else if (!*s)
2789 				{
2790 					return 0;
2791 				}
2792 				else ++s;
2793 			}
2794 		}
2795 
parse_eolstrconv_attribute_impl2796 		static char_t* parse_eol(char_t* s, char_t end_quote)
2797 		{
2798 			gap g;
2799 
2800 			while (true)
2801 			{
2802 				PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr));
2803 
2804 				if (*s == end_quote)
2805 				{
2806 					*g.flush(s) = 0;
2807 
2808 					return s + 1;
2809 				}
2810 				else if (*s == '\r')
2811 				{
2812 					*s++ = '\n';
2813 
2814 					if (*s == '\n') g.push(s, 1);
2815 				}
2816 				else if (opt_escape::value && *s == '&')
2817 				{
2818 					s = strconv_escape(s, g);
2819 				}
2820 				else if (!*s)
2821 				{
2822 					return 0;
2823 				}
2824 				else ++s;
2825 			}
2826 		}
2827 
parse_simplestrconv_attribute_impl2828 		static char_t* parse_simple(char_t* s, char_t end_quote)
2829 		{
2830 			gap g;
2831 
2832 			while (true)
2833 			{
2834 				PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr));
2835 
2836 				if (*s == end_quote)
2837 				{
2838 					*g.flush(s) = 0;
2839 
2840 					return s + 1;
2841 				}
2842 				else if (opt_escape::value && *s == '&')
2843 				{
2844 					s = strconv_escape(s, g);
2845 				}
2846 				else if (!*s)
2847 				{
2848 					return 0;
2849 				}
2850 				else ++s;
2851 			}
2852 		}
2853 	};
2854 
get_strconv_attribute(unsigned int optmask)2855 	PUGI__FN strconv_attribute_t get_strconv_attribute(unsigned int optmask)
2856 	{
2857 		PUGI__STATIC_ASSERT(parse_escapes == 0x10 && parse_eol == 0x20 && parse_wconv_attribute == 0x40 && parse_wnorm_attribute == 0x80);
2858 
2859 		switch ((optmask >> 4) & 15) // get bitmask for flags (wconv wnorm eol escapes)
2860 		{
2861 		case 0:  return strconv_attribute_impl<opt_false>::parse_simple;
2862 		case 1:  return strconv_attribute_impl<opt_true>::parse_simple;
2863 		case 2:  return strconv_attribute_impl<opt_false>::parse_eol;
2864 		case 3:  return strconv_attribute_impl<opt_true>::parse_eol;
2865 		case 4:  return strconv_attribute_impl<opt_false>::parse_wconv;
2866 		case 5:  return strconv_attribute_impl<opt_true>::parse_wconv;
2867 		case 6:  return strconv_attribute_impl<opt_false>::parse_wconv;
2868 		case 7:  return strconv_attribute_impl<opt_true>::parse_wconv;
2869 		case 8:  return strconv_attribute_impl<opt_false>::parse_wnorm;
2870 		case 9:  return strconv_attribute_impl<opt_true>::parse_wnorm;
2871 		case 10: return strconv_attribute_impl<opt_false>::parse_wnorm;
2872 		case 11: return strconv_attribute_impl<opt_true>::parse_wnorm;
2873 		case 12: return strconv_attribute_impl<opt_false>::parse_wnorm;
2874 		case 13: return strconv_attribute_impl<opt_true>::parse_wnorm;
2875 		case 14: return strconv_attribute_impl<opt_false>::parse_wnorm;
2876 		case 15: return strconv_attribute_impl<opt_true>::parse_wnorm;
2877 		default: assert(false); return 0; // should not get here
2878 		}
2879 	}
2880 
make_parse_result(xml_parse_status status,ptrdiff_t offset=0)2881 	inline xml_parse_result make_parse_result(xml_parse_status status, ptrdiff_t offset = 0)
2882 	{
2883 		xml_parse_result result;
2884 		result.status = status;
2885 		result.offset = offset;
2886 
2887 		return result;
2888 	}
2889 
2890 	struct xml_parser
2891 	{
2892 		xml_allocator* alloc;
2893 		char_t* error_offset;
2894 		xml_parse_status error_status;
2895 
xml_parserxml_parser2896 		xml_parser(xml_allocator* alloc_): alloc(alloc_), error_offset(0), error_status(status_ok)
2897 		{
2898 		}
2899 
2900 		// DOCTYPE consists of nested sections of the following possible types:
2901 		// <!-- ... -->, <? ... ?>, "...", '...'
2902 		// <![...]]>
2903 		// <!...>
2904 		// First group can not contain nested groups
2905 		// Second group can contain nested groups of the same type
2906 		// Third group can contain all other groups
parse_doctype_primitivexml_parser2907 		char_t* parse_doctype_primitive(char_t* s)
2908 		{
2909 			if (*s == '"' || *s == '\'')
2910 			{
2911 				// quoted string
2912 				char_t ch = *s++;
2913 				PUGI__SCANFOR(*s == ch);
2914 				if (!*s) PUGI__THROW_ERROR(status_bad_doctype, s);
2915 
2916 				s++;
2917 			}
2918 			else if (s[0] == '<' && s[1] == '?')
2919 			{
2920 				// <? ... ?>
2921 				s += 2;
2922 				PUGI__SCANFOR(s[0] == '?' && s[1] == '>'); // no need for ENDSWITH because ?> can't terminate proper doctype
2923 				if (!*s) PUGI__THROW_ERROR(status_bad_doctype, s);
2924 
2925 				s += 2;
2926 			}
2927 			else if (s[0] == '<' && s[1] == '!' && s[2] == '-' && s[3] == '-')
2928 			{
2929 				s += 4;
2930 				PUGI__SCANFOR(s[0] == '-' && s[1] == '-' && s[2] == '>'); // no need for ENDSWITH because --> can't terminate proper doctype
2931 				if (!*s) PUGI__THROW_ERROR(status_bad_doctype, s);
2932 
2933 				s += 3;
2934 			}
2935 			else PUGI__THROW_ERROR(status_bad_doctype, s);
2936 
2937 			return s;
2938 		}
2939 
parse_doctype_ignorexml_parser2940 		char_t* parse_doctype_ignore(char_t* s)
2941 		{
2942 			size_t depth = 0;
2943 
2944 			assert(s[0] == '<' && s[1] == '!' && s[2] == '[');
2945 			s += 3;
2946 
2947 			while (*s)
2948 			{
2949 				if (s[0] == '<' && s[1] == '!' && s[2] == '[')
2950 				{
2951 					// nested ignore section
2952 					s += 3;
2953 					depth++;
2954 				}
2955 				else if (s[0] == ']' && s[1] == ']' && s[2] == '>')
2956 				{
2957 					// ignore section end
2958 					s += 3;
2959 
2960 					if (depth == 0)
2961 						return s;
2962 
2963 					depth--;
2964 				}
2965 				else s++;
2966 			}
2967 
2968 			PUGI__THROW_ERROR(status_bad_doctype, s);
2969 		}
2970 
parse_doctype_groupxml_parser2971 		char_t* parse_doctype_group(char_t* s, char_t endch)
2972 		{
2973 			size_t depth = 0;
2974 
2975 			assert((s[0] == '<' || s[0] == 0) && s[1] == '!');
2976 			s += 2;
2977 
2978 			while (*s)
2979 			{
2980 				if (s[0] == '<' && s[1] == '!' && s[2] != '-')
2981 				{
2982 					if (s[2] == '[')
2983 					{
2984 						// ignore
2985 						s = parse_doctype_ignore(s);
2986 						if (!s) return s;
2987 					}
2988 					else
2989 					{
2990 						// some control group
2991 						s += 2;
2992 						depth++;
2993 					}
2994 				}
2995 				else if (s[0] == '<' || s[0] == '"' || s[0] == '\'')
2996 				{
2997 					// unknown tag (forbidden), or some primitive group
2998 					s = parse_doctype_primitive(s);
2999 					if (!s) return s;
3000 				}
3001 				else if (*s == '>')
3002 				{
3003 					if (depth == 0)
3004 						return s;
3005 
3006 					depth--;
3007 					s++;
3008 				}
3009 				else s++;
3010 			}
3011 
3012 			if (depth != 0 || endch != '>') PUGI__THROW_ERROR(status_bad_doctype, s);
3013 
3014 			return s;
3015 		}
3016 
parse_exclamationxml_parser3017 		char_t* parse_exclamation(char_t* s, xml_node_struct* cursor, unsigned int optmsk, char_t endch)
3018 		{
3019 			// parse node contents, starting with exclamation mark
3020 			++s;
3021 
3022 			if (*s == '-') // '<!-...'
3023 			{
3024 				++s;
3025 
3026 				if (*s == '-') // '<!--...'
3027 				{
3028 					++s;
3029 
3030 					if (PUGI__OPTSET(parse_comments))
3031 					{
3032 						PUGI__PUSHNODE(node_comment); // Append a new node on the tree.
3033 						cursor->value = s; // Save the offset.
3034 					}
3035 
3036 					if (PUGI__OPTSET(parse_eol) && PUGI__OPTSET(parse_comments))
3037 					{
3038 						s = strconv_comment(s, endch);
3039 
3040 						if (!s) PUGI__THROW_ERROR(status_bad_comment, cursor->value);
3041 					}
3042 					else
3043 					{
3044 						// Scan for terminating '-->'.
3045 						PUGI__SCANFOR(s[0] == '-' && s[1] == '-' && PUGI__ENDSWITH(s[2], '>'));
3046 						PUGI__CHECK_ERROR(status_bad_comment, s);
3047 
3048 						if (PUGI__OPTSET(parse_comments))
3049 							*s = 0; // Zero-terminate this segment at the first terminating '-'.
3050 
3051 						s += (s[2] == '>' ? 3 : 2); // Step over the '\0->'.
3052 					}
3053 				}
3054 				else PUGI__THROW_ERROR(status_bad_comment, s);
3055 			}
3056 			else if (*s == '[')
3057 			{
3058 				// '<![CDATA[...'
3059 				if (*++s=='C' && *++s=='D' && *++s=='A' && *++s=='T' && *++s=='A' && *++s == '[')
3060 				{
3061 					++s;
3062 
3063 					if (PUGI__OPTSET(parse_cdata))
3064 					{
3065 						PUGI__PUSHNODE(node_cdata); // Append a new node on the tree.
3066 						cursor->value = s; // Save the offset.
3067 
3068 						if (PUGI__OPTSET(parse_eol))
3069 						{
3070 							s = strconv_cdata(s, endch);
3071 
3072 							if (!s) PUGI__THROW_ERROR(status_bad_cdata, cursor->value);
3073 						}
3074 						else
3075 						{
3076 							// Scan for terminating ']]>'.
3077 							PUGI__SCANFOR(s[0] == ']' && s[1] == ']' && PUGI__ENDSWITH(s[2], '>'));
3078 							PUGI__CHECK_ERROR(status_bad_cdata, s);
3079 
3080 							*s++ = 0; // Zero-terminate this segment.
3081 						}
3082 					}
3083 					else // Flagged for discard, but we still have to scan for the terminator.
3084 					{
3085 						// Scan for terminating ']]>'.
3086 						PUGI__SCANFOR(s[0] == ']' && s[1] == ']' && PUGI__ENDSWITH(s[2], '>'));
3087 						PUGI__CHECK_ERROR(status_bad_cdata, s);
3088 
3089 						++s;
3090 					}
3091 
3092 					s += (s[1] == '>' ? 2 : 1); // Step over the last ']>'.
3093 				}
3094 				else PUGI__THROW_ERROR(status_bad_cdata, s);
3095 			}
3096 			else if (s[0] == 'D' && s[1] == 'O' && s[2] == 'C' && s[3] == 'T' && s[4] == 'Y' && s[5] == 'P' && PUGI__ENDSWITH(s[6], 'E'))
3097 			{
3098 				s -= 2;
3099 
3100 				if (cursor->parent) PUGI__THROW_ERROR(status_bad_doctype, s);
3101 
3102 				char_t* mark = s + 9;
3103 
3104 				s = parse_doctype_group(s, endch);
3105 				if (!s) return s;
3106 
3107 				assert((*s == 0 && endch == '>') || *s == '>');
3108 				if (*s) *s++ = 0;
3109 
3110 				if (PUGI__OPTSET(parse_doctype))
3111 				{
3112 					while (PUGI__IS_CHARTYPE(*mark, ct_space)) ++mark;
3113 
3114 					PUGI__PUSHNODE(node_doctype);
3115 
3116 					cursor->value = mark;
3117 				}
3118 			}
3119 			else if (*s == 0 && endch == '-') PUGI__THROW_ERROR(status_bad_comment, s);
3120 			else if (*s == 0 && endch == '[') PUGI__THROW_ERROR(status_bad_cdata, s);
3121 			else PUGI__THROW_ERROR(status_unrecognized_tag, s);
3122 
3123 			return s;
3124 		}
3125 
parse_questionxml_parser3126 		char_t* parse_question(char_t* s, xml_node_struct*& ref_cursor, unsigned int optmsk, char_t endch)
3127 		{
3128 			// load into registers
3129 			xml_node_struct* cursor = ref_cursor;
3130 			char_t ch = 0;
3131 
3132 			// parse node contents, starting with question mark
3133 			++s;
3134 
3135 			// read PI target
3136 			char_t* target = s;
3137 
3138 			if (!PUGI__IS_CHARTYPE(*s, ct_start_symbol)) PUGI__THROW_ERROR(status_bad_pi, s);
3139 
3140 			PUGI__SCANWHILE(PUGI__IS_CHARTYPE(*s, ct_symbol));
3141 			PUGI__CHECK_ERROR(status_bad_pi, s);
3142 
3143 			// determine node type; stricmp / strcasecmp is not portable
3144 			bool declaration = (target[0] | ' ') == 'x' && (target[1] | ' ') == 'm' && (target[2] | ' ') == 'l' && target + 3 == s;
3145 
3146 			if (declaration ? PUGI__OPTSET(parse_declaration) : PUGI__OPTSET(parse_pi))
3147 			{
3148 				if (declaration)
3149 				{
3150 					// disallow non top-level declarations
3151 					if (cursor->parent) PUGI__THROW_ERROR(status_bad_pi, s);
3152 
3153 					PUGI__PUSHNODE(node_declaration);
3154 				}
3155 				else
3156 				{
3157 					PUGI__PUSHNODE(node_pi);
3158 				}
3159 
3160 				cursor->name = target;
3161 
3162 				PUGI__ENDSEG();
3163 
3164 				// parse value/attributes
3165 				if (ch == '?')
3166 				{
3167 					// empty node
3168 					if (!PUGI__ENDSWITH(*s, '>')) PUGI__THROW_ERROR(status_bad_pi, s);
3169 					s += (*s == '>');
3170 
3171 					PUGI__POPNODE();
3172 				}
3173 				else if (PUGI__IS_CHARTYPE(ch, ct_space))
3174 				{
3175 					PUGI__SKIPWS();
3176 
3177 					// scan for tag end
3178 					char_t* value = s;
3179 
3180 					PUGI__SCANFOR(s[0] == '?' && PUGI__ENDSWITH(s[1], '>'));
3181 					PUGI__CHECK_ERROR(status_bad_pi, s);
3182 
3183 					if (declaration)
3184 					{
3185 						// replace ending ? with / so that 'element' terminates properly
3186 						*s = '/';
3187 
3188 						// we exit from this function with cursor at node_declaration, which is a signal to parse() to go to LOC_ATTRIBUTES
3189 						s = value;
3190 					}
3191 					else
3192 					{
3193 						// store value and step over >
3194 						cursor->value = value;
3195 
3196 						PUGI__POPNODE();
3197 
3198 						PUGI__ENDSEG();
3199 
3200 						s += (*s == '>');
3201 					}
3202 				}
3203 				else PUGI__THROW_ERROR(status_bad_pi, s);
3204 			}
3205 			else
3206 			{
3207 				// scan for tag end
3208 				PUGI__SCANFOR(s[0] == '?' && PUGI__ENDSWITH(s[1], '>'));
3209 				PUGI__CHECK_ERROR(status_bad_pi, s);
3210 
3211 				s += (s[1] == '>' ? 2 : 1);
3212 			}
3213 
3214 			// store from registers
3215 			ref_cursor = cursor;
3216 
3217 			return s;
3218 		}
3219 
parse_treexml_parser3220 		char_t* parse_tree(char_t* s, xml_node_struct* root, unsigned int optmsk, char_t endch)
3221 		{
3222 			strconv_attribute_t strconv_attribute = get_strconv_attribute(optmsk);
3223 			strconv_pcdata_t strconv_pcdata = get_strconv_pcdata(optmsk);
3224 
3225 			char_t ch = 0;
3226 			xml_node_struct* cursor = root;
3227 			char_t* mark = s;
3228 
3229 			while (*s != 0)
3230 			{
3231 				if (*s == '<')
3232 				{
3233 					++s;
3234 
3235 				LOC_TAG:
3236 					if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) // '<#...'
3237 					{
3238 						PUGI__PUSHNODE(node_element); // Append a new node to the tree.
3239 
3240 						cursor->name = s;
3241 
3242 						PUGI__SCANWHILE_UNROLL(PUGI__IS_CHARTYPE(ss, ct_symbol)); // Scan for a terminator.
3243 						PUGI__ENDSEG(); // Save char in 'ch', terminate & step over.
3244 
3245 						if (ch == '>')
3246 						{
3247 							// end of tag
3248 						}
3249 						else if (PUGI__IS_CHARTYPE(ch, ct_space))
3250 						{
3251 						LOC_ATTRIBUTES:
3252 							while (true)
3253 							{
3254 								PUGI__SKIPWS(); // Eat any whitespace.
3255 
3256 								if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) // <... #...
3257 								{
3258 									xml_attribute_struct* a = append_new_attribute(cursor, *alloc); // Make space for this attribute.
3259 									if (!a) PUGI__THROW_ERROR(status_out_of_memory, s);
3260 
3261 									a->name = s; // Save the offset.
3262 
3263 									PUGI__SCANWHILE_UNROLL(PUGI__IS_CHARTYPE(ss, ct_symbol)); // Scan for a terminator.
3264 									PUGI__ENDSEG(); // Save char in 'ch', terminate & step over.
3265 
3266 									if (PUGI__IS_CHARTYPE(ch, ct_space))
3267 									{
3268 										PUGI__SKIPWS(); // Eat any whitespace.
3269 
3270 										ch = *s;
3271 										++s;
3272 									}
3273 
3274 									if (ch == '=') // '<... #=...'
3275 									{
3276 										PUGI__SKIPWS(); // Eat any whitespace.
3277 
3278 										if (*s == '"' || *s == '\'') // '<... #="...'
3279 										{
3280 											ch = *s; // Save quote char to avoid breaking on "''" -or- '""'.
3281 											++s; // Step over the quote.
3282 											a->value = s; // Save the offset.
3283 
3284 											s = strconv_attribute(s, ch);
3285 
3286 											if (!s) PUGI__THROW_ERROR(status_bad_attribute, a->value);
3287 
3288 											// After this line the loop continues from the start;
3289 											// Whitespaces, / and > are ok, symbols and EOF are wrong,
3290 											// everything else will be detected
3291 											if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) PUGI__THROW_ERROR(status_bad_attribute, s);
3292 										}
3293 										else PUGI__THROW_ERROR(status_bad_attribute, s);
3294 									}
3295 									else PUGI__THROW_ERROR(status_bad_attribute, s);
3296 								}
3297 								else if (*s == '/')
3298 								{
3299 									++s;
3300 
3301 									if (*s == '>')
3302 									{
3303 										PUGI__POPNODE();
3304 										s++;
3305 										break;
3306 									}
3307 									else if (*s == 0 && endch == '>')
3308 									{
3309 										PUGI__POPNODE();
3310 										break;
3311 									}
3312 									else PUGI__THROW_ERROR(status_bad_start_element, s);
3313 								}
3314 								else if (*s == '>')
3315 								{
3316 									++s;
3317 
3318 									break;
3319 								}
3320 								else if (*s == 0 && endch == '>')
3321 								{
3322 									break;
3323 								}
3324 								else PUGI__THROW_ERROR(status_bad_start_element, s);
3325 							}
3326 
3327 							// !!!
3328 						}
3329 						else if (ch == '/') // '<#.../'
3330 						{
3331 							if (!PUGI__ENDSWITH(*s, '>')) PUGI__THROW_ERROR(status_bad_start_element, s);
3332 
3333 							PUGI__POPNODE(); // Pop.
3334 
3335 							s += (*s == '>');
3336 						}
3337 						else if (ch == 0)
3338 						{
3339 							// we stepped over null terminator, backtrack & handle closing tag
3340 							--s;
3341 
3342 							if (endch != '>') PUGI__THROW_ERROR(status_bad_start_element, s);
3343 						}
3344 						else PUGI__THROW_ERROR(status_bad_start_element, s);
3345 					}
3346 					else if (*s == '/')
3347 					{
3348 						++s;
3349 
3350 						mark = s;
3351 
3352 						char_t* name = cursor->name;
3353 						if (!name) PUGI__THROW_ERROR(status_end_element_mismatch, mark);
3354 
3355 						while (PUGI__IS_CHARTYPE(*s, ct_symbol))
3356 						{
3357 							if (*s++ != *name++) PUGI__THROW_ERROR(status_end_element_mismatch, mark);
3358 						}
3359 
3360 						if (*name)
3361 						{
3362 							if (*s == 0 && name[0] == endch && name[1] == 0) PUGI__THROW_ERROR(status_bad_end_element, s);
3363 							else PUGI__THROW_ERROR(status_end_element_mismatch, mark);
3364 						}
3365 
3366 						PUGI__POPNODE(); // Pop.
3367 
3368 						PUGI__SKIPWS();
3369 
3370 						if (*s == 0)
3371 						{
3372 							if (endch != '>') PUGI__THROW_ERROR(status_bad_end_element, s);
3373 						}
3374 						else
3375 						{
3376 							if (*s != '>') PUGI__THROW_ERROR(status_bad_end_element, s);
3377 							++s;
3378 						}
3379 					}
3380 					else if (*s == '?') // '<?...'
3381 					{
3382 						s = parse_question(s, cursor, optmsk, endch);
3383 						if (!s) return s;
3384 
3385 						assert(cursor);
3386 						if (PUGI__NODETYPE(cursor) == node_declaration) goto LOC_ATTRIBUTES;
3387 					}
3388 					else if (*s == '!') // '<!...'
3389 					{
3390 						s = parse_exclamation(s, cursor, optmsk, endch);
3391 						if (!s) return s;
3392 					}
3393 					else if (*s == 0 && endch == '?') PUGI__THROW_ERROR(status_bad_pi, s);
3394 					else PUGI__THROW_ERROR(status_unrecognized_tag, s);
3395 				}
3396 				else
3397 				{
3398 					mark = s; // Save this offset while searching for a terminator.
3399 
3400 					PUGI__SKIPWS(); // Eat whitespace if no genuine PCDATA here.
3401 
3402 					if (*s == '<' || !*s)
3403 					{
3404 						// We skipped some whitespace characters because otherwise we would take the tag branch instead of PCDATA one
3405 						assert(mark != s);
3406 
3407 						if (!PUGI__OPTSET(parse_ws_pcdata | parse_ws_pcdata_single) || PUGI__OPTSET(parse_trim_pcdata))
3408 						{
3409 							continue;
3410 						}
3411 						else if (PUGI__OPTSET(parse_ws_pcdata_single))
3412 						{
3413 							if (s[0] != '<' || s[1] != '/' || cursor->first_child) continue;
3414 						}
3415 					}
3416 
3417 					if (!PUGI__OPTSET(parse_trim_pcdata))
3418 						s = mark;
3419 
3420 					if (cursor->parent || PUGI__OPTSET(parse_fragment))
3421 					{
3422 						if (PUGI__OPTSET(parse_embed_pcdata) && cursor->parent && !cursor->first_child && !cursor->value)
3423 						{
3424 							cursor->value = s; // Save the offset.
3425 						}
3426 						else
3427 						{
3428 							PUGI__PUSHNODE(node_pcdata); // Append a new node on the tree.
3429 
3430 							cursor->value = s; // Save the offset.
3431 
3432 							PUGI__POPNODE(); // Pop since this is a standalone.
3433 						}
3434 
3435 						s = strconv_pcdata(s);
3436 
3437 						if (!*s) break;
3438 					}
3439 					else
3440 					{
3441 						PUGI__SCANFOR(*s == '<'); // '...<'
3442 						if (!*s) break;
3443 
3444 						++s;
3445 					}
3446 
3447 					// We're after '<'
3448 					goto LOC_TAG;
3449 				}
3450 			}
3451 
3452 			// check that last tag is closed
3453 			if (cursor != root) PUGI__THROW_ERROR(status_end_element_mismatch, s);
3454 
3455 			return s;
3456 		}
3457 
3458 	#ifdef PUGIXML_WCHAR_MODE
parse_skip_bomxml_parser3459 		static char_t* parse_skip_bom(char_t* s)
3460 		{
3461 			unsigned int bom = 0xfeff;
3462 			return (s[0] == static_cast<wchar_t>(bom)) ? s + 1 : s;
3463 		}
3464 	#else
parse_skip_bomxml_parser3465 		static char_t* parse_skip_bom(char_t* s)
3466 		{
3467 			return (s[0] == '\xef' && s[1] == '\xbb' && s[2] == '\xbf') ? s + 3 : s;
3468 		}
3469 	#endif
3470 
has_element_node_siblingsxml_parser3471 		static bool has_element_node_siblings(xml_node_struct* node)
3472 		{
3473 			while (node)
3474 			{
3475 				if (PUGI__NODETYPE(node) == node_element) return true;
3476 
3477 				node = node->next_sibling;
3478 			}
3479 
3480 			return false;
3481 		}
3482 
parsexml_parser3483 		static xml_parse_result parse(char_t* buffer, size_t length, xml_document_struct* xmldoc, xml_node_struct* root, unsigned int optmsk)
3484 		{
3485 			// early-out for empty documents
3486 			if (length == 0)
3487 				return make_parse_result(PUGI__OPTSET(parse_fragment) ? status_ok : status_no_document_element);
3488 
3489 			// get last child of the root before parsing
3490 			xml_node_struct* last_root_child = root->first_child ? root->first_child->prev_sibling_c + 0 : 0;
3491 
3492 			// create parser on stack
3493 			xml_parser parser(static_cast<xml_allocator*>(xmldoc));
3494 
3495 			// save last character and make buffer zero-terminated (speeds up parsing)
3496 			char_t endch = buffer[length - 1];
3497 			buffer[length - 1] = 0;
3498 
3499 			// skip BOM to make sure it does not end up as part of parse output
3500 			char_t* buffer_data = parse_skip_bom(buffer);
3501 
3502 			// perform actual parsing
3503 			parser.parse_tree(buffer_data, root, optmsk, endch);
3504 
3505 			xml_parse_result result = make_parse_result(parser.error_status, parser.error_offset ? parser.error_offset - buffer : 0);
3506 			assert(result.offset >= 0 && static_cast<size_t>(result.offset) <= length);
3507 
3508 			if (result)
3509 			{
3510 				// since we removed last character, we have to handle the only possible false positive (stray <)
3511 				if (endch == '<')
3512 					return make_parse_result(status_unrecognized_tag, length - 1);
3513 
3514 				// check if there are any element nodes parsed
3515 				xml_node_struct* first_root_child_parsed = last_root_child ? last_root_child->next_sibling + 0 : root->first_child+ 0;
3516 
3517 				if (!PUGI__OPTSET(parse_fragment) && !has_element_node_siblings(first_root_child_parsed))
3518 					return make_parse_result(status_no_document_element, length - 1);
3519 			}
3520 			else
3521 			{
3522 				// roll back offset if it occurs on a null terminator in the source buffer
3523 				if (result.offset > 0 && static_cast<size_t>(result.offset) == length - 1 && endch == 0)
3524 					result.offset--;
3525 			}
3526 
3527 			return result;
3528 		}
3529 	};
3530 
3531 	// Output facilities
get_write_native_encoding()3532 	PUGI__FN xml_encoding get_write_native_encoding()
3533 	{
3534 	#ifdef PUGIXML_WCHAR_MODE
3535 		return get_wchar_encoding();
3536 	#else
3537 		return encoding_utf8;
3538 	#endif
3539 	}
3540 
get_write_encoding(xml_encoding encoding)3541 	PUGI__FN xml_encoding get_write_encoding(xml_encoding encoding)
3542 	{
3543 		// replace wchar encoding with utf implementation
3544 		if (encoding == encoding_wchar) return get_wchar_encoding();
3545 
3546 		// replace utf16 encoding with utf16 with specific endianness
3547 		if (encoding == encoding_utf16) return is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
3548 
3549 		// replace utf32 encoding with utf32 with specific endianness
3550 		if (encoding == encoding_utf32) return is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
3551 
3552 		// only do autodetection if no explicit encoding is requested
3553 		if (encoding != encoding_auto) return encoding;
3554 
3555 		// assume utf8 encoding
3556 		return encoding_utf8;
3557 	}
3558 
convert_buffer_output_generic(typename T::value_type dest,const char_t * data,size_t length,D,T)3559 	template <typename D, typename T> PUGI__FN size_t convert_buffer_output_generic(typename T::value_type dest, const char_t* data, size_t length, D, T)
3560 	{
3561 		PUGI__STATIC_ASSERT(sizeof(char_t) == sizeof(typename D::type));
3562 
3563 		typename T::value_type end = D::process(reinterpret_cast<const typename D::type*>(data), length, dest, T());
3564 
3565 		return static_cast<size_t>(end - dest) * sizeof(*dest);
3566 	}
3567 
convert_buffer_output_generic(typename T::value_type dest,const char_t * data,size_t length,D,T,bool opt_swap)3568 	template <typename D, typename T> PUGI__FN size_t convert_buffer_output_generic(typename T::value_type dest, const char_t* data, size_t length, D, T, bool opt_swap)
3569 	{
3570 		PUGI__STATIC_ASSERT(sizeof(char_t) == sizeof(typename D::type));
3571 
3572 		typename T::value_type end = D::process(reinterpret_cast<const typename D::type*>(data), length, dest, T());
3573 
3574 		if (opt_swap)
3575 		{
3576 			for (typename T::value_type i = dest; i != end; ++i)
3577 				*i = endian_swap(*i);
3578 		}
3579 
3580 		return static_cast<size_t>(end - dest) * sizeof(*dest);
3581 	}
3582 
3583 #ifdef PUGIXML_WCHAR_MODE
get_valid_length(const char_t * data,size_t length)3584 	PUGI__FN size_t get_valid_length(const char_t* data, size_t length)
3585 	{
3586 		if (length < 1) return 0;
3587 
3588 		// discard last character if it's the lead of a surrogate pair
3589 		return (sizeof(wchar_t) == 2 && static_cast<unsigned int>(static_cast<uint16_t>(data[length - 1]) - 0xD800) < 0x400) ? length - 1 : length;
3590 	}
3591 
convert_buffer_output(char_t * r_char,uint8_t * r_u8,uint16_t * r_u16,uint32_t * r_u32,const char_t * data,size_t length,xml_encoding encoding)3592 	PUGI__FN size_t convert_buffer_output(char_t* r_char, uint8_t* r_u8, uint16_t* r_u16, uint32_t* r_u32, const char_t* data, size_t length, xml_encoding encoding)
3593 	{
3594 		// only endian-swapping is required
3595 		if (need_endian_swap_utf(encoding, get_wchar_encoding()))
3596 		{
3597 			convert_wchar_endian_swap(r_char, data, length);
3598 
3599 			return length * sizeof(char_t);
3600 		}
3601 
3602 		// convert to utf8
3603 		if (encoding == encoding_utf8)
3604 			return convert_buffer_output_generic(r_u8, data, length, wchar_decoder(), utf8_writer());
3605 
3606 		// convert to utf16
3607 		if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
3608 		{
3609 			xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
3610 
3611 			return convert_buffer_output_generic(r_u16, data, length, wchar_decoder(), utf16_writer(), native_encoding != encoding);
3612 		}
3613 
3614 		// convert to utf32
3615 		if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)
3616 		{
3617 			xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
3618 
3619 			return convert_buffer_output_generic(r_u32, data, length, wchar_decoder(), utf32_writer(), native_encoding != encoding);
3620 		}
3621 
3622 		// convert to latin1
3623 		if (encoding == encoding_latin1)
3624 			return convert_buffer_output_generic(r_u8, data, length, wchar_decoder(), latin1_writer());
3625 
3626 		assert(false && "Invalid encoding");
3627 		return 0;
3628 	}
3629 #else
get_valid_length(const char_t * data,size_t length)3630 	PUGI__FN size_t get_valid_length(const char_t* data, size_t length)
3631 	{
3632 		if (length < 5) return 0;
3633 
3634 		for (size_t i = 1; i <= 4; ++i)
3635 		{
3636 			uint8_t ch = static_cast<uint8_t>(data[length - i]);
3637 
3638 			// either a standalone character or a leading one
3639 			if ((ch & 0xc0) != 0x80) return length - i;
3640 		}
3641 
3642 		// there are four non-leading characters at the end, sequence tail is broken so might as well process the whole chunk
3643 		return length;
3644 	}
3645 
convert_buffer_output(char_t *,uint8_t * r_u8,uint16_t * r_u16,uint32_t * r_u32,const char_t * data,size_t length,xml_encoding encoding)3646 	PUGI__FN size_t convert_buffer_output(char_t* /* r_char */, uint8_t* r_u8, uint16_t* r_u16, uint32_t* r_u32, const char_t* data, size_t length, xml_encoding encoding)
3647 	{
3648 		if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
3649 		{
3650 			xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
3651 
3652 			return convert_buffer_output_generic(r_u16, data, length, utf8_decoder(), utf16_writer(), native_encoding != encoding);
3653 		}
3654 
3655 		if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)
3656 		{
3657 			xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
3658 
3659 			return convert_buffer_output_generic(r_u32, data, length, utf8_decoder(), utf32_writer(), native_encoding != encoding);
3660 		}
3661 
3662 		if (encoding == encoding_latin1)
3663 			return convert_buffer_output_generic(r_u8, data, length, utf8_decoder(), latin1_writer());
3664 
3665 		assert(false && "Invalid encoding");
3666 		return 0;
3667 	}
3668 #endif
3669 
3670 	class xml_buffered_writer
3671 	{
3672 		xml_buffered_writer(const xml_buffered_writer&);
3673 		xml_buffered_writer& operator=(const xml_buffered_writer&);
3674 
3675 	public:
xml_buffered_writer(xml_writer & writer_,xml_encoding user_encoding)3676 		xml_buffered_writer(xml_writer& writer_, xml_encoding user_encoding): writer(writer_), bufsize(0), encoding(get_write_encoding(user_encoding))
3677 		{
3678 			PUGI__STATIC_ASSERT(bufcapacity >= 8);
3679 		}
3680 
flush()3681 		size_t flush()
3682 		{
3683 			flush(buffer, bufsize);
3684 			bufsize = 0;
3685 			return 0;
3686 		}
3687 
flush(const char_t * data,size_t size)3688 		void flush(const char_t* data, size_t size)
3689 		{
3690 			if (size == 0) return;
3691 
3692 			// fast path, just write data
3693 			if (encoding == get_write_native_encoding())
3694 				writer.write(data, size * sizeof(char_t));
3695 			else
3696 			{
3697 				// convert chunk
3698 				size_t result = convert_buffer_output(scratch.data_char, scratch.data_u8, scratch.data_u16, scratch.data_u32, data, size, encoding);
3699 				assert(result <= sizeof(scratch));
3700 
3701 				// write data
3702 				writer.write(scratch.data_u8, result);
3703 			}
3704 		}
3705 
write_direct(const char_t * data,size_t length)3706 		void write_direct(const char_t* data, size_t length)
3707 		{
3708 			// flush the remaining buffer contents
3709 			flush();
3710 
3711 			// handle large chunks
3712 			if (length > bufcapacity)
3713 			{
3714 				if (encoding == get_write_native_encoding())
3715 				{
3716 					// fast path, can just write data chunk
3717 					writer.write(data, length * sizeof(char_t));
3718 					return;
3719 				}
3720 
3721 				// need to convert in suitable chunks
3722 				while (length > bufcapacity)
3723 				{
3724 					// get chunk size by selecting such number of characters that are guaranteed to fit into scratch buffer
3725 					// and form a complete codepoint sequence (i.e. discard start of last codepoint if necessary)
3726 					size_t chunk_size = get_valid_length(data, bufcapacity);
3727 					assert(chunk_size);
3728 
3729 					// convert chunk and write
3730 					flush(data, chunk_size);
3731 
3732 					// iterate
3733 					data += chunk_size;
3734 					length -= chunk_size;
3735 				}
3736 
3737 				// small tail is copied below
3738 				bufsize = 0;
3739 			}
3740 
3741 			memcpy(buffer + bufsize, data, length * sizeof(char_t));
3742 			bufsize += length;
3743 		}
3744 
write_buffer(const char_t * data,size_t length)3745 		void write_buffer(const char_t* data, size_t length)
3746 		{
3747 			size_t offset = bufsize;
3748 
3749 			if (offset + length <= bufcapacity)
3750 			{
3751 				memcpy(buffer + offset, data, length * sizeof(char_t));
3752 				bufsize = offset + length;
3753 			}
3754 			else
3755 			{
3756 				write_direct(data, length);
3757 			}
3758 		}
3759 
write_string(const char_t * data)3760 		void write_string(const char_t* data)
3761 		{
3762 			// write the part of the string that fits in the buffer
3763 			size_t offset = bufsize;
3764 
3765 			while (*data && offset < bufcapacity)
3766 				buffer[offset++] = *data++;
3767 
3768 			// write the rest
3769 			if (offset < bufcapacity)
3770 			{
3771 				bufsize = offset;
3772 			}
3773 			else
3774 			{
3775 				// backtrack a bit if we have split the codepoint
3776 				size_t length = offset - bufsize;
3777 				size_t extra = length - get_valid_length(data - length, length);
3778 
3779 				bufsize = offset - extra;
3780 
3781 				write_direct(data - extra, strlength(data) + extra);
3782 			}
3783 		}
3784 
write(char_t d0)3785 		void write(char_t d0)
3786 		{
3787 			size_t offset = bufsize;
3788 			if (offset > bufcapacity - 1) offset = flush();
3789 
3790 			buffer[offset + 0] = d0;
3791 			bufsize = offset + 1;
3792 		}
3793 
write(char_t d0,char_t d1)3794 		void write(char_t d0, char_t d1)
3795 		{
3796 			size_t offset = bufsize;
3797 			if (offset > bufcapacity - 2) offset = flush();
3798 
3799 			buffer[offset + 0] = d0;
3800 			buffer[offset + 1] = d1;
3801 			bufsize = offset + 2;
3802 		}
3803 
write(char_t d0,char_t d1,char_t d2)3804 		void write(char_t d0, char_t d1, char_t d2)
3805 		{
3806 			size_t offset = bufsize;
3807 			if (offset > bufcapacity - 3) offset = flush();
3808 
3809 			buffer[offset + 0] = d0;
3810 			buffer[offset + 1] = d1;
3811 			buffer[offset + 2] = d2;
3812 			bufsize = offset + 3;
3813 		}
3814 
write(char_t d0,char_t d1,char_t d2,char_t d3)3815 		void write(char_t d0, char_t d1, char_t d2, char_t d3)
3816 		{
3817 			size_t offset = bufsize;
3818 			if (offset > bufcapacity - 4) offset = flush();
3819 
3820 			buffer[offset + 0] = d0;
3821 			buffer[offset + 1] = d1;
3822 			buffer[offset + 2] = d2;
3823 			buffer[offset + 3] = d3;
3824 			bufsize = offset + 4;
3825 		}
3826 
write(char_t d0,char_t d1,char_t d2,char_t d3,char_t d4)3827 		void write(char_t d0, char_t d1, char_t d2, char_t d3, char_t d4)
3828 		{
3829 			size_t offset = bufsize;
3830 			if (offset > bufcapacity - 5) offset = flush();
3831 
3832 			buffer[offset + 0] = d0;
3833 			buffer[offset + 1] = d1;
3834 			buffer[offset + 2] = d2;
3835 			buffer[offset + 3] = d3;
3836 			buffer[offset + 4] = d4;
3837 			bufsize = offset + 5;
3838 		}
3839 
write(char_t d0,char_t d1,char_t d2,char_t d3,char_t d4,char_t d5)3840 		void write(char_t d0, char_t d1, char_t d2, char_t d3, char_t d4, char_t d5)
3841 		{
3842 			size_t offset = bufsize;
3843 			if (offset > bufcapacity - 6) offset = flush();
3844 
3845 			buffer[offset + 0] = d0;
3846 			buffer[offset + 1] = d1;
3847 			buffer[offset + 2] = d2;
3848 			buffer[offset + 3] = d3;
3849 			buffer[offset + 4] = d4;
3850 			buffer[offset + 5] = d5;
3851 			bufsize = offset + 6;
3852 		}
3853 
3854 		// utf8 maximum expansion: x4 (-> utf32)
3855 		// utf16 maximum expansion: x2 (-> utf32)
3856 		// utf32 maximum expansion: x1
3857 		enum
3858 		{
3859 			bufcapacitybytes =
3860 			#ifdef PUGIXML_MEMORY_OUTPUT_STACK
3861 				PUGIXML_MEMORY_OUTPUT_STACK
3862 			#else
3863 				10240
3864 			#endif
3865 			,
3866 			bufcapacity = bufcapacitybytes / (sizeof(char_t) + 4)
3867 		};
3868 
3869 		char_t buffer[bufcapacity];
3870 
3871 		union
3872 		{
3873 			uint8_t data_u8[4 * bufcapacity];
3874 			uint16_t data_u16[2 * bufcapacity];
3875 			uint32_t data_u32[bufcapacity];
3876 			char_t data_char[bufcapacity];
3877 		} scratch;
3878 
3879 		xml_writer& writer;
3880 		size_t bufsize;
3881 		xml_encoding encoding;
3882 	};
3883 
text_output_escaped(xml_buffered_writer & writer,const char_t * s,chartypex_t type)3884 	PUGI__FN void text_output_escaped(xml_buffered_writer& writer, const char_t* s, chartypex_t type)
3885 	{
3886 		while (*s)
3887 		{
3888 			const char_t* prev = s;
3889 
3890 			// While *s is a usual symbol
3891 			PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPEX(ss, type));
3892 
3893 			writer.write_buffer(prev, static_cast<size_t>(s - prev));
3894 
3895 			switch (*s)
3896 			{
3897 				case 0: break;
3898 				case '&':
3899 					writer.write('&', 'a', 'm', 'p', ';');
3900 					++s;
3901 					break;
3902 				case '<':
3903 					writer.write('&', 'l', 't', ';');
3904 					++s;
3905 					break;
3906 				case '>':
3907 					writer.write('&', 'g', 't', ';');
3908 					++s;
3909 					break;
3910 				case '"':
3911 					writer.write('&', 'q', 'u', 'o', 't', ';');
3912 					++s;
3913 					break;
3914 				default: // s is not a usual symbol
3915 				{
3916 					unsigned int ch = static_cast<unsigned int>(*s++);
3917 					assert(ch < 32);
3918 
3919 					writer.write('&', '#', static_cast<char_t>((ch / 10) + '0'), static_cast<char_t>((ch % 10) + '0'), ';');
3920 				}
3921 			}
3922 		}
3923 	}
3924 
text_output(xml_buffered_writer & writer,const char_t * s,chartypex_t type,unsigned int flags)3925 	PUGI__FN void text_output(xml_buffered_writer& writer, const char_t* s, chartypex_t type, unsigned int flags)
3926 	{
3927 		if (flags & format_no_escapes)
3928 			writer.write_string(s);
3929 		else
3930 			text_output_escaped(writer, s, type);
3931 	}
3932 
text_output_cdata(xml_buffered_writer & writer,const char_t * s)3933 	PUGI__FN void text_output_cdata(xml_buffered_writer& writer, const char_t* s)
3934 	{
3935 		do
3936 		{
3937 			writer.write('<', '!', '[', 'C', 'D');
3938 			writer.write('A', 'T', 'A', '[');
3939 
3940 			const char_t* prev = s;
3941 
3942 			// look for ]]> sequence - we can't output it as is since it terminates CDATA
3943 			while (*s && !(s[0] == ']' && s[1] == ']' && s[2] == '>')) ++s;
3944 
3945 			// skip ]] if we stopped at ]]>, > will go to the next CDATA section
3946 			if (*s) s += 2;
3947 
3948 			writer.write_buffer(prev, static_cast<size_t>(s - prev));
3949 
3950 			writer.write(']', ']', '>');
3951 		}
3952 		while (*s);
3953 	}
3954 
text_output_indent(xml_buffered_writer & writer,const char_t * indent,size_t indent_length,unsigned int depth)3955 	PUGI__FN void text_output_indent(xml_buffered_writer& writer, const char_t* indent, size_t indent_length, unsigned int depth)
3956 	{
3957 		switch (indent_length)
3958 		{
3959 		case 1:
3960 		{
3961 			for (unsigned int i = 0; i < depth; ++i)
3962 				writer.write(indent[0]);
3963 			break;
3964 		}
3965 
3966 		case 2:
3967 		{
3968 			for (unsigned int i = 0; i < depth; ++i)
3969 				writer.write(indent[0], indent[1]);
3970 			break;
3971 		}
3972 
3973 		case 3:
3974 		{
3975 			for (unsigned int i = 0; i < depth; ++i)
3976 				writer.write(indent[0], indent[1], indent[2]);
3977 			break;
3978 		}
3979 
3980 		case 4:
3981 		{
3982 			for (unsigned int i = 0; i < depth; ++i)
3983 				writer.write(indent[0], indent[1], indent[2], indent[3]);
3984 			break;
3985 		}
3986 
3987 		default:
3988 		{
3989 			for (unsigned int i = 0; i < depth; ++i)
3990 				writer.write_buffer(indent, indent_length);
3991 		}
3992 		}
3993 	}
3994 
node_output_comment(xml_buffered_writer & writer,const char_t * s)3995 	PUGI__FN void node_output_comment(xml_buffered_writer& writer, const char_t* s)
3996 	{
3997 		writer.write('<', '!', '-', '-');
3998 
3999 		while (*s)
4000 		{
4001 			const char_t* prev = s;
4002 
4003 			// look for -\0 or -- sequence - we can't output it since -- is illegal in comment body
4004 			while (*s && !(s[0] == '-' && (s[1] == '-' || s[1] == 0))) ++s;
4005 
4006 			writer.write_buffer(prev, static_cast<size_t>(s - prev));
4007 
4008 			if (*s)
4009 			{
4010 				assert(*s == '-');
4011 
4012 				writer.write('-', ' ');
4013 				++s;
4014 			}
4015 		}
4016 
4017 		writer.write('-', '-', '>');
4018 	}
4019 
node_output_pi_value(xml_buffered_writer & writer,const char_t * s)4020 	PUGI__FN void node_output_pi_value(xml_buffered_writer& writer, const char_t* s)
4021 	{
4022 		while (*s)
4023 		{
4024 			const char_t* prev = s;
4025 
4026 			// look for ?> sequence - we can't output it since ?> terminates PI
4027 			while (*s && !(s[0] == '?' && s[1] == '>')) ++s;
4028 
4029 			writer.write_buffer(prev, static_cast<size_t>(s - prev));
4030 
4031 			if (*s)
4032 			{
4033 				assert(s[0] == '?' && s[1] == '>');
4034 
4035 				writer.write('?', ' ', '>');
4036 				s += 2;
4037 			}
4038 		}
4039 	}
4040 
node_output_attributes(xml_buffered_writer & writer,xml_node_struct * node,const char_t * indent,size_t indent_length,unsigned int flags,unsigned int depth)4041 	PUGI__FN void node_output_attributes(xml_buffered_writer& writer, xml_node_struct* node, const char_t* indent, size_t indent_length, unsigned int flags, unsigned int depth)
4042 	{
4043 		const char_t* default_name = PUGIXML_TEXT(":anonymous");
4044 
4045 		for (xml_attribute_struct* a = node->first_attribute; a; a = a->next_attribute)
4046 		{
4047 			if ((flags & (format_indent_attributes | format_raw)) == format_indent_attributes)
4048 			{
4049 				writer.write('\n');
4050 
4051 				text_output_indent(writer, indent, indent_length, depth + 1);
4052 			}
4053 			else
4054 			{
4055 				writer.write(' ');
4056 			}
4057 
4058 			writer.write_string(a->name ? a->name + 0 : default_name);
4059 			writer.write('=', '"');
4060 
4061 			if (a->value)
4062 				text_output(writer, a->value, ctx_special_attr, flags);
4063 
4064 			writer.write('"');
4065 		}
4066 	}
4067 
node_output_start(xml_buffered_writer & writer,xml_node_struct * node,const char_t * indent,size_t indent_length,unsigned int flags,unsigned int depth)4068 	PUGI__FN bool node_output_start(xml_buffered_writer& writer, xml_node_struct* node, const char_t* indent, size_t indent_length, unsigned int flags, unsigned int depth)
4069 	{
4070 		const char_t* default_name = PUGIXML_TEXT(":anonymous");
4071 		const char_t* name = node->name ? node->name + 0 : default_name;
4072 
4073 		writer.write('<');
4074 		writer.write_string(name);
4075 
4076 		if (node->first_attribute)
4077 			node_output_attributes(writer, node, indent, indent_length, flags, depth);
4078 
4079 		// element nodes can have value if parse_embed_pcdata was used
4080 		if (!node->value)
4081 		{
4082 			if (!node->first_child)
4083 			{
4084 				if (flags & format_no_empty_element_tags)
4085 				{
4086 					writer.write('>', '<', '/');
4087 					writer.write_string(name);
4088 					writer.write('>');
4089 
4090 					return false;
4091 				}
4092 				else
4093 				{
4094 					if ((flags & format_raw) == 0)
4095 						writer.write(' ');
4096 
4097 					writer.write('/', '>');
4098 
4099 					return false;
4100 				}
4101 			}
4102 			else
4103 			{
4104 				writer.write('>');
4105 
4106 				return true;
4107 			}
4108 		}
4109 		else
4110 		{
4111 			writer.write('>');
4112 
4113 			text_output(writer, node->value, ctx_special_pcdata, flags);
4114 
4115 			if (!node->first_child)
4116 			{
4117 				writer.write('<', '/');
4118 				writer.write_string(name);
4119 				writer.write('>');
4120 
4121 				return false;
4122 			}
4123 			else
4124 			{
4125 				return true;
4126 			}
4127 		}
4128 	}
4129 
node_output_end(xml_buffered_writer & writer,xml_node_struct * node)4130 	PUGI__FN void node_output_end(xml_buffered_writer& writer, xml_node_struct* node)
4131 	{
4132 		const char_t* default_name = PUGIXML_TEXT(":anonymous");
4133 		const char_t* name = node->name ? node->name + 0 : default_name;
4134 
4135 		writer.write('<', '/');
4136 		writer.write_string(name);
4137 		writer.write('>');
4138 	}
4139 
node_output_simple(xml_buffered_writer & writer,xml_node_struct * node,unsigned int flags)4140 	PUGI__FN void node_output_simple(xml_buffered_writer& writer, xml_node_struct* node, unsigned int flags)
4141 	{
4142 		const char_t* default_name = PUGIXML_TEXT(":anonymous");
4143 
4144 		switch (PUGI__NODETYPE(node))
4145 		{
4146 			case node_pcdata:
4147 				text_output(writer, node->value ? node->value + 0 : PUGIXML_TEXT(""), ctx_special_pcdata, flags);
4148 				break;
4149 
4150 			case node_cdata:
4151 				text_output_cdata(writer, node->value ? node->value + 0 : PUGIXML_TEXT(""));
4152 				break;
4153 
4154 			case node_comment:
4155 				node_output_comment(writer, node->value ? node->value + 0 : PUGIXML_TEXT(""));
4156 				break;
4157 
4158 			case node_pi:
4159 				writer.write('<', '?');
4160 				writer.write_string(node->name ? node->name + 0 : default_name);
4161 
4162 				if (node->value)
4163 				{
4164 					writer.write(' ');
4165 					node_output_pi_value(writer, node->value);
4166 				}
4167 
4168 				writer.write('?', '>');
4169 				break;
4170 
4171 			case node_declaration:
4172 				writer.write('<', '?');
4173 				writer.write_string(node->name ? node->name + 0 : default_name);
4174 				node_output_attributes(writer, node, PUGIXML_TEXT(""), 0, flags | format_raw, 0);
4175 				writer.write('?', '>');
4176 				break;
4177 
4178 			case node_doctype:
4179 				writer.write('<', '!', 'D', 'O', 'C');
4180 				writer.write('T', 'Y', 'P', 'E');
4181 
4182 				if (node->value)
4183 				{
4184 					writer.write(' ');
4185 					writer.write_string(node->value);
4186 				}
4187 
4188 				writer.write('>');
4189 				break;
4190 
4191 			default:
4192 				assert(false && "Invalid node type");
4193 		}
4194 	}
4195 
4196 	enum indent_flags_t
4197 	{
4198 		indent_newline = 1,
4199 		indent_indent = 2
4200 	};
4201 
node_output(xml_buffered_writer & writer,xml_node_struct * root,const char_t * indent,unsigned int flags,unsigned int depth)4202 	PUGI__FN void node_output(xml_buffered_writer& writer, xml_node_struct* root, const char_t* indent, unsigned int flags, unsigned int depth)
4203 	{
4204 		size_t indent_length = ((flags & (format_indent | format_indent_attributes)) && (flags & format_raw) == 0) ? strlength(indent) : 0;
4205 		unsigned int indent_flags = indent_indent;
4206 
4207 		xml_node_struct* node = root;
4208 
4209 		do
4210 		{
4211 			assert(node);
4212 
4213 			// begin writing current node
4214 			if (PUGI__NODETYPE(node) == node_pcdata || PUGI__NODETYPE(node) == node_cdata)
4215 			{
4216 				node_output_simple(writer, node, flags);
4217 
4218 				indent_flags = 0;
4219 			}
4220 			else
4221 			{
4222 				if ((indent_flags & indent_newline) && (flags & format_raw) == 0)
4223 					writer.write('\n');
4224 
4225 				if ((indent_flags & indent_indent) && indent_length)
4226 					text_output_indent(writer, indent, indent_length, depth);
4227 
4228 				if (PUGI__NODETYPE(node) == node_element)
4229 				{
4230 					indent_flags = indent_newline | indent_indent;
4231 
4232 					if (node_output_start(writer, node, indent, indent_length, flags, depth))
4233 					{
4234 						// element nodes can have value if parse_embed_pcdata was used
4235 						if (node->value)
4236 							indent_flags = 0;
4237 
4238 						node = node->first_child;
4239 						depth++;
4240 						continue;
4241 					}
4242 				}
4243 				else if (PUGI__NODETYPE(node) == node_document)
4244 				{
4245 					indent_flags = indent_indent;
4246 
4247 					if (node->first_child)
4248 					{
4249 						node = node->first_child;
4250 						continue;
4251 					}
4252 				}
4253 				else
4254 				{
4255 					node_output_simple(writer, node, flags);
4256 
4257 					indent_flags = indent_newline | indent_indent;
4258 				}
4259 			}
4260 
4261 			// continue to the next node
4262 			while (node != root)
4263 			{
4264 				if (node->next_sibling)
4265 				{
4266 					node = node->next_sibling;
4267 					break;
4268 				}
4269 
4270 				node = node->parent;
4271 
4272 				// write closing node
4273 				if (PUGI__NODETYPE(node) == node_element)
4274 				{
4275 					depth--;
4276 
4277 					if ((indent_flags & indent_newline) && (flags & format_raw) == 0)
4278 						writer.write('\n');
4279 
4280 					if ((indent_flags & indent_indent) && indent_length)
4281 						text_output_indent(writer, indent, indent_length, depth);
4282 
4283 					node_output_end(writer, node);
4284 
4285 					indent_flags = indent_newline | indent_indent;
4286 				}
4287 			}
4288 		}
4289 		while (node != root);
4290 
4291 		if ((indent_flags & indent_newline) && (flags & format_raw) == 0)
4292 			writer.write('\n');
4293 	}
4294 
has_declaration(xml_node_struct * node)4295 	PUGI__FN bool has_declaration(xml_node_struct* node)
4296 	{
4297 		for (xml_node_struct* child = node->first_child; child; child = child->next_sibling)
4298 		{
4299 			xml_node_type type = PUGI__NODETYPE(child);
4300 
4301 			if (type == node_declaration) return true;
4302 			if (type == node_element) return false;
4303 		}
4304 
4305 		return false;
4306 	}
4307 
is_attribute_of(xml_attribute_struct * attr,xml_node_struct * node)4308 	PUGI__FN bool is_attribute_of(xml_attribute_struct* attr, xml_node_struct* node)
4309 	{
4310 		for (xml_attribute_struct* a = node->first_attribute; a; a = a->next_attribute)
4311 			if (a == attr)
4312 				return true;
4313 
4314 		return false;
4315 	}
4316 
allow_insert_attribute(xml_node_type parent)4317 	PUGI__FN bool allow_insert_attribute(xml_node_type parent)
4318 	{
4319 		return parent == node_element || parent == node_declaration;
4320 	}
4321 
allow_insert_child(xml_node_type parent,xml_node_type child)4322 	PUGI__FN bool allow_insert_child(xml_node_type parent, xml_node_type child)
4323 	{
4324 		if (parent != node_document && parent != node_element) return false;
4325 		if (child == node_document || child == node_null) return false;
4326 		if (parent != node_document && (child == node_declaration || child == node_doctype)) return false;
4327 
4328 		return true;
4329 	}
4330 
allow_move(xml_node parent,xml_node child)4331 	PUGI__FN bool allow_move(xml_node parent, xml_node child)
4332 	{
4333 		// check that child can be a child of parent
4334 		if (!allow_insert_child(parent.type(), child.type()))
4335 			return false;
4336 
4337 		// check that node is not moved between documents
4338 		if (parent.root() != child.root())
4339 			return false;
4340 
4341 		// check that new parent is not in the child subtree
4342 		xml_node cur = parent;
4343 
4344 		while (cur)
4345 		{
4346 			if (cur == child)
4347 				return false;
4348 
4349 			cur = cur.parent();
4350 		}
4351 
4352 		return true;
4353 	}
4354 
4355 	template <typename String, typename Header>
node_copy_string(String & dest,Header & header,uintptr_t header_mask,char_t * source,Header & source_header,xml_allocator * alloc)4356 	PUGI__FN void node_copy_string(String& dest, Header& header, uintptr_t header_mask, char_t* source, Header& source_header, xml_allocator* alloc)
4357 	{
4358 		assert(!dest && (header & header_mask) == 0);
4359 
4360 		if (source)
4361 		{
4362 			if (alloc && (source_header & header_mask) == 0)
4363 			{
4364 				dest = source;
4365 
4366 				// since strcpy_insitu can reuse document buffer memory we need to mark both source and dest as shared
4367 				header |= xml_memory_page_contents_shared_mask;
4368 				source_header |= xml_memory_page_contents_shared_mask;
4369 			}
4370 			else
4371 				strcpy_insitu(dest, header, header_mask, source, strlength(source));
4372 		}
4373 	}
4374 
node_copy_contents(xml_node_struct * dn,xml_node_struct * sn,xml_allocator * shared_alloc)4375 	PUGI__FN void node_copy_contents(xml_node_struct* dn, xml_node_struct* sn, xml_allocator* shared_alloc)
4376 	{
4377 		node_copy_string(dn->name, dn->header, xml_memory_page_name_allocated_mask, sn->name, sn->header, shared_alloc);
4378 		node_copy_string(dn->value, dn->header, xml_memory_page_value_allocated_mask, sn->value, sn->header, shared_alloc);
4379 
4380 		for (xml_attribute_struct* sa = sn->first_attribute; sa; sa = sa->next_attribute)
4381 		{
4382 			xml_attribute_struct* da = append_new_attribute(dn, get_allocator(dn));
4383 
4384 			if (da)
4385 			{
4386 				node_copy_string(da->name, da->header, xml_memory_page_name_allocated_mask, sa->name, sa->header, shared_alloc);
4387 				node_copy_string(da->value, da->header, xml_memory_page_value_allocated_mask, sa->value, sa->header, shared_alloc);
4388 			}
4389 		}
4390 	}
4391 
node_copy_tree(xml_node_struct * dn,xml_node_struct * sn)4392 	PUGI__FN void node_copy_tree(xml_node_struct* dn, xml_node_struct* sn)
4393 	{
4394 		xml_allocator& alloc = get_allocator(dn);
4395 		xml_allocator* shared_alloc = (&alloc == &get_allocator(sn)) ? &alloc : 0;
4396 
4397 		node_copy_contents(dn, sn, shared_alloc);
4398 
4399 		xml_node_struct* dit = dn;
4400 		xml_node_struct* sit = sn->first_child;
4401 
4402 		while (sit && sit != sn)
4403 		{
4404 			if (sit != dn)
4405 			{
4406 				xml_node_struct* copy = append_new_node(dit, alloc, PUGI__NODETYPE(sit));
4407 
4408 				if (copy)
4409 				{
4410 					node_copy_contents(copy, sit, shared_alloc);
4411 
4412 					if (sit->first_child)
4413 					{
4414 						dit = copy;
4415 						sit = sit->first_child;
4416 						continue;
4417 					}
4418 				}
4419 			}
4420 
4421 			// continue to the next node
4422 			do
4423 			{
4424 				if (sit->next_sibling)
4425 				{
4426 					sit = sit->next_sibling;
4427 					break;
4428 				}
4429 
4430 				sit = sit->parent;
4431 				dit = dit->parent;
4432 			}
4433 			while (sit != sn);
4434 		}
4435 	}
4436 
node_copy_attribute(xml_attribute_struct * da,xml_attribute_struct * sa)4437 	PUGI__FN void node_copy_attribute(xml_attribute_struct* da, xml_attribute_struct* sa)
4438 	{
4439 		xml_allocator& alloc = get_allocator(da);
4440 		xml_allocator* shared_alloc = (&alloc == &get_allocator(sa)) ? &alloc : 0;
4441 
4442 		node_copy_string(da->name, da->header, xml_memory_page_name_allocated_mask, sa->name, sa->header, shared_alloc);
4443 		node_copy_string(da->value, da->header, xml_memory_page_value_allocated_mask, sa->value, sa->header, shared_alloc);
4444 	}
4445 
is_text_node(xml_node_struct * node)4446 	inline bool is_text_node(xml_node_struct* node)
4447 	{
4448 		xml_node_type type = PUGI__NODETYPE(node);
4449 
4450 		return type == node_pcdata || type == node_cdata;
4451 	}
4452 
4453 	// get value with conversion functions
string_to_integer(const char_t * value,U minneg,U maxpos)4454 	template <typename U> U string_to_integer(const char_t* value, U minneg, U maxpos)
4455 	{
4456 		U result = 0;
4457 		const char_t* s = value;
4458 
4459 		while (PUGI__IS_CHARTYPE(*s, ct_space))
4460 			s++;
4461 
4462 		bool negative = (*s == '-');
4463 
4464 		s += (*s == '+' || *s == '-');
4465 
4466 		bool overflow = false;
4467 
4468 		if (s[0] == '0' && (s[1] | ' ') == 'x')
4469 		{
4470 			s += 2;
4471 
4472 			// since overflow detection relies on length of the sequence skip leading zeros
4473 			while (*s == '0')
4474 				s++;
4475 
4476 			const char_t* start = s;
4477 
4478 			for (;;)
4479 			{
4480 				if (static_cast<unsigned>(*s - '0') < 10)
4481 					result = result * 16 + (*s - '0');
4482 				else if (static_cast<unsigned>((*s | ' ') - 'a') < 6)
4483 					result = result * 16 + ((*s | ' ') - 'a' + 10);
4484 				else
4485 					break;
4486 
4487 				s++;
4488 			}
4489 
4490 			size_t digits = static_cast<size_t>(s - start);
4491 
4492 			overflow = digits > sizeof(U) * 2;
4493 		}
4494 		else
4495 		{
4496 			// since overflow detection relies on length of the sequence skip leading zeros
4497 			while (*s == '0')
4498 				s++;
4499 
4500 			const char_t* start = s;
4501 
4502 			for (;;)
4503 			{
4504 				if (static_cast<unsigned>(*s - '0') < 10)
4505 					result = result * 10 + (*s - '0');
4506 				else
4507 					break;
4508 
4509 				s++;
4510 			}
4511 
4512 			size_t digits = static_cast<size_t>(s - start);
4513 
4514 			PUGI__STATIC_ASSERT(sizeof(U) == 8 || sizeof(U) == 4 || sizeof(U) == 2);
4515 
4516 			const size_t max_digits10 = sizeof(U) == 8 ? 20 : sizeof(U) == 4 ? 10 : 5;
4517 			const char_t max_lead = sizeof(U) == 8 ? '1' : sizeof(U) == 4 ? '4' : '6';
4518 			const size_t high_bit = sizeof(U) * 8 - 1;
4519 
4520 			overflow = digits >= max_digits10 && !(digits == max_digits10 && (*start < max_lead || (*start == max_lead && result >> high_bit)));
4521 		}
4522 
4523 		if (negative)
4524 			return (overflow || result > minneg) ? 0 - minneg : 0 - result;
4525 		else
4526 			return (overflow || result > maxpos) ? maxpos : result;
4527 	}
4528 
get_value_int(const char_t * value)4529 	PUGI__FN int get_value_int(const char_t* value)
4530 	{
4531 		return string_to_integer<unsigned int>(value, 0 - static_cast<unsigned int>(INT_MIN), INT_MAX);
4532 	}
4533 
get_value_uint(const char_t * value)4534 	PUGI__FN unsigned int get_value_uint(const char_t* value)
4535 	{
4536 		return string_to_integer<unsigned int>(value, 0, UINT_MAX);
4537 	}
4538 
get_value_double(const char_t * value)4539 	PUGI__FN double get_value_double(const char_t* value)
4540 	{
4541 	#ifdef PUGIXML_WCHAR_MODE
4542 		return wcstod(value, 0);
4543 	#else
4544 		return strtod(value, 0);
4545 	#endif
4546 	}
4547 
get_value_float(const char_t * value)4548 	PUGI__FN float get_value_float(const char_t* value)
4549 	{
4550 	#ifdef PUGIXML_WCHAR_MODE
4551 		return static_cast<float>(wcstod(value, 0));
4552 	#else
4553 		return static_cast<float>(strtod(value, 0));
4554 	#endif
4555 	}
4556 
get_value_bool(const char_t * value)4557 	PUGI__FN bool get_value_bool(const char_t* value)
4558 	{
4559 		// only look at first char
4560 		char_t first = *value;
4561 
4562 		// 1*, t* (true), T* (True), y* (yes), Y* (YES)
4563 		return (first == '1' || first == 't' || first == 'T' || first == 'y' || first == 'Y');
4564 	}
4565 
4566 #ifdef PUGIXML_HAS_LONG_LONG
get_value_llong(const char_t * value)4567 	PUGI__FN long long get_value_llong(const char_t* value)
4568 	{
4569 		return string_to_integer<unsigned long long>(value, 0 - static_cast<unsigned long long>(LLONG_MIN), LLONG_MAX);
4570 	}
4571 
get_value_ullong(const char_t * value)4572 	PUGI__FN unsigned long long get_value_ullong(const char_t* value)
4573 	{
4574 		return string_to_integer<unsigned long long>(value, 0, ULLONG_MAX);
4575 	}
4576 #endif
4577 
integer_to_string(char_t * begin,char_t * end,U value,bool negative)4578 	template <typename U> PUGI__FN char_t* integer_to_string(char_t* begin, char_t* end, U value, bool negative)
4579 	{
4580 		char_t* result = end - 1;
4581 		U rest = negative ? 0 - value : value;
4582 
4583 		do
4584 		{
4585 			*result-- = static_cast<char_t>('0' + (rest % 10));
4586 			rest /= 10;
4587 		}
4588 		while (rest);
4589 
4590 		assert(result >= begin);
4591 		(void)begin;
4592 
4593 		*result = '-';
4594 
4595 		return result + !negative;
4596 	}
4597 
4598 	// set value with conversion functions
4599 	template <typename String, typename Header>
set_value_ascii(String & dest,Header & header,uintptr_t header_mask,char * buf)4600 	PUGI__FN bool set_value_ascii(String& dest, Header& header, uintptr_t header_mask, char* buf)
4601 	{
4602 	#ifdef PUGIXML_WCHAR_MODE
4603 		char_t wbuf[128];
4604 		assert(strlen(buf) < sizeof(wbuf) / sizeof(wbuf[0]));
4605 
4606 		size_t offset = 0;
4607 		for (; buf[offset]; ++offset) wbuf[offset] = buf[offset];
4608 
4609 		return strcpy_insitu(dest, header, header_mask, wbuf, offset);
4610 	#else
4611 		return strcpy_insitu(dest, header, header_mask, buf, strlen(buf));
4612 	#endif
4613 	}
4614 
4615 	template <typename U, typename String, typename Header>
set_value_integer(String & dest,Header & header,uintptr_t header_mask,U value,bool negative)4616 	PUGI__FN bool set_value_integer(String& dest, Header& header, uintptr_t header_mask, U value, bool negative)
4617 	{
4618 		char_t buf[64];
4619 		char_t* end = buf + sizeof(buf) / sizeof(buf[0]);
4620 		char_t* begin = integer_to_string(buf, end, value, negative);
4621 
4622 		return strcpy_insitu(dest, header, header_mask, begin, end - begin);
4623 	}
4624 
4625 	template <typename String, typename Header>
set_value_convert(String & dest,Header & header,uintptr_t header_mask,float value)4626 	PUGI__FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, float value)
4627 	{
4628 		char buf[128];
4629 		sprintf(buf, "%.9g", value);
4630 
4631 		return set_value_ascii(dest, header, header_mask, buf);
4632 	}
4633 
4634 	template <typename String, typename Header>
set_value_convert(String & dest,Header & header,uintptr_t header_mask,double value)4635 	PUGI__FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, double value)
4636 	{
4637 		char buf[128];
4638 		sprintf(buf, "%.17g", value);
4639 
4640 		return set_value_ascii(dest, header, header_mask, buf);
4641 	}
4642 
4643 	template <typename String, typename Header>
set_value_bool(String & dest,Header & header,uintptr_t header_mask,bool value)4644 	PUGI__FN bool set_value_bool(String& dest, Header& header, uintptr_t header_mask, bool value)
4645 	{
4646 		return strcpy_insitu(dest, header, header_mask, value ? PUGIXML_TEXT("true") : PUGIXML_TEXT("false"), value ? 4 : 5);
4647 	}
4648 
load_buffer_impl(xml_document_struct * doc,xml_node_struct * root,void * contents,size_t size,unsigned int options,xml_encoding encoding,bool is_mutable,bool own,char_t ** out_buffer)4649 	PUGI__FN xml_parse_result load_buffer_impl(xml_document_struct* doc, xml_node_struct* root, void* contents, size_t size, unsigned int options, xml_encoding encoding, bool is_mutable, bool own, char_t** out_buffer)
4650 	{
4651 		// check input buffer
4652 		if (!contents && size) return make_parse_result(status_io_error);
4653 
4654 		// get actual encoding
4655 		xml_encoding buffer_encoding = impl::get_buffer_encoding(encoding, contents, size);
4656 
4657 		// get private buffer
4658 		char_t* buffer = 0;
4659 		size_t length = 0;
4660 
4661 		if (!impl::convert_buffer(buffer, length, buffer_encoding, contents, size, is_mutable)) return impl::make_parse_result(status_out_of_memory);
4662 
4663 		// delete original buffer if we performed a conversion
4664 		if (own && buffer != contents && contents) impl::xml_memory::deallocate(contents);
4665 
4666 		// grab onto buffer if it's our buffer, user is responsible for deallocating contents himself
4667 		if (own || buffer != contents) *out_buffer = buffer;
4668 
4669 		// store buffer for offset_debug
4670 		doc->buffer = buffer;
4671 
4672 		// parse
4673 		xml_parse_result res = impl::xml_parser::parse(buffer, length, doc, root, options);
4674 
4675 		// remember encoding
4676 		res.encoding = buffer_encoding;
4677 
4678 		return res;
4679 	}
4680 
4681 	// we need to get length of entire file to load it in memory; the only (relatively) sane way to do it is via seek/tell trick
get_file_size(FILE * file,size_t & out_result)4682 	PUGI__FN xml_parse_status get_file_size(FILE* file, size_t& out_result)
4683 	{
4684 	#if defined(PUGI__MSVC_CRT_VERSION) && PUGI__MSVC_CRT_VERSION >= 1400 && !defined(_WIN32_WCE)
4685 		// there are 64-bit versions of fseek/ftell, let's use them
4686 		typedef __int64 length_type;
4687 
4688 		_fseeki64(file, 0, SEEK_END);
4689 		length_type length = _ftelli64(file);
4690 		_fseeki64(file, 0, SEEK_SET);
4691 	#elif defined(__MINGW32__) && !defined(__NO_MINGW_LFS) && (!defined(__STRICT_ANSI__) || defined(__MINGW64_VERSION_MAJOR))
4692 		// there are 64-bit versions of fseek/ftell, let's use them
4693 		typedef off64_t length_type;
4694 
4695 		fseeko64(file, 0, SEEK_END);
4696 		length_type length = ftello64(file);
4697 		fseeko64(file, 0, SEEK_SET);
4698 	#else
4699 		// if this is a 32-bit OS, long is enough; if this is a unix system, long is 64-bit, which is enough; otherwise we can't do anything anyway.
4700 		typedef long length_type;
4701 
4702 		fseek(file, 0, SEEK_END);
4703 		length_type length = ftell(file);
4704 		fseek(file, 0, SEEK_SET);
4705 	#endif
4706 
4707 		// check for I/O errors
4708 		if (length < 0) return status_io_error;
4709 
4710 		// check for overflow
4711 		size_t result = static_cast<size_t>(length);
4712 
4713 		if (static_cast<length_type>(result) != length) return status_out_of_memory;
4714 
4715 		// finalize
4716 		out_result = result;
4717 
4718 		return status_ok;
4719 	}
4720 
4721 	// This function assumes that buffer has extra sizeof(char_t) writable bytes after size
zero_terminate_buffer(void * buffer,size_t size,xml_encoding encoding)4722 	PUGI__FN size_t zero_terminate_buffer(void* buffer, size_t size, xml_encoding encoding)
4723 	{
4724 		// We only need to zero-terminate if encoding conversion does not do it for us
4725 	#ifdef PUGIXML_WCHAR_MODE
4726 		xml_encoding wchar_encoding = get_wchar_encoding();
4727 
4728 		if (encoding == wchar_encoding || need_endian_swap_utf(encoding, wchar_encoding))
4729 		{
4730 			size_t length = size / sizeof(char_t);
4731 
4732 			static_cast<char_t*>(buffer)[length] = 0;
4733 			return (length + 1) * sizeof(char_t);
4734 		}
4735 	#else
4736 		if (encoding == encoding_utf8)
4737 		{
4738 			static_cast<char*>(buffer)[size] = 0;
4739 			return size + 1;
4740 		}
4741 	#endif
4742 
4743 		return size;
4744 	}
4745 
load_file_impl(xml_document_struct * doc,FILE * file,unsigned int options,xml_encoding encoding,char_t ** out_buffer)4746 	PUGI__FN xml_parse_result load_file_impl(xml_document_struct* doc, FILE* file, unsigned int options, xml_encoding encoding, char_t** out_buffer)
4747 	{
4748 		if (!file) return make_parse_result(status_file_not_found);
4749 
4750 		// get file size (can result in I/O errors)
4751 		size_t size = 0;
4752 		xml_parse_status size_status = get_file_size(file, size);
4753 		if (size_status != status_ok) return make_parse_result(size_status);
4754 
4755 		size_t max_suffix_size = sizeof(char_t);
4756 
4757 		// allocate buffer for the whole file
4758 		char* contents = static_cast<char*>(xml_memory::allocate(size + max_suffix_size));
4759 		if (!contents) return make_parse_result(status_out_of_memory);
4760 
4761 		// read file in memory
4762 		size_t read_size = fread(contents, 1, size, file);
4763 
4764 		if (read_size != size)
4765 		{
4766 			xml_memory::deallocate(contents);
4767 			return make_parse_result(status_io_error);
4768 		}
4769 
4770 		xml_encoding real_encoding = get_buffer_encoding(encoding, contents, size);
4771 
4772 		return load_buffer_impl(doc, doc, contents, zero_terminate_buffer(contents, size, real_encoding), options, real_encoding, true, true, out_buffer);
4773 	}
4774 
close_file(FILE * file)4775 	PUGI__FN void close_file(FILE* file)
4776 	{
4777 		fclose(file);
4778 	}
4779 
4780 #ifndef PUGIXML_NO_STL
4781 	template <typename T> struct xml_stream_chunk
4782 	{
createxml_stream_chunk4783 		static xml_stream_chunk* create()
4784 		{
4785 			void* memory = xml_memory::allocate(sizeof(xml_stream_chunk));
4786 			if (!memory) return 0;
4787 
4788 			return new (memory) xml_stream_chunk();
4789 		}
4790 
destroyxml_stream_chunk4791 		static void destroy(xml_stream_chunk* chunk)
4792 		{
4793 			// free chunk chain
4794 			while (chunk)
4795 			{
4796 				xml_stream_chunk* next_ = chunk->next;
4797 
4798 				xml_memory::deallocate(chunk);
4799 
4800 				chunk = next_;
4801 			}
4802 		}
4803 
xml_stream_chunkxml_stream_chunk4804 		xml_stream_chunk(): next(0), size(0)
4805 		{
4806 		}
4807 
4808 		xml_stream_chunk* next;
4809 		size_t size;
4810 
4811 		T data[xml_memory_page_size / sizeof(T)];
4812 	};
4813 
load_stream_data_noseek(std::basic_istream<T> & stream,void ** out_buffer,size_t * out_size)4814 	template <typename T> PUGI__FN xml_parse_status load_stream_data_noseek(std::basic_istream<T>& stream, void** out_buffer, size_t* out_size)
4815 	{
4816 		auto_deleter<xml_stream_chunk<T> > chunks(0, xml_stream_chunk<T>::destroy);
4817 
4818 		// read file to a chunk list
4819 		size_t total = 0;
4820 		xml_stream_chunk<T>* last = 0;
4821 
4822 		while (!stream.eof())
4823 		{
4824 			// allocate new chunk
4825 			xml_stream_chunk<T>* chunk = xml_stream_chunk<T>::create();
4826 			if (!chunk) return status_out_of_memory;
4827 
4828 			// append chunk to list
4829 			if (last) last = last->next = chunk;
4830 			else chunks.data = last = chunk;
4831 
4832 			// read data to chunk
4833 			stream.read(chunk->data, static_cast<std::streamsize>(sizeof(chunk->data) / sizeof(T)));
4834 			chunk->size = static_cast<size_t>(stream.gcount()) * sizeof(T);
4835 
4836 			// read may set failbit | eofbit in case gcount() is less than read length, so check for other I/O errors
4837 			if (stream.bad() || (!stream.eof() && stream.fail())) return status_io_error;
4838 
4839 			// guard against huge files (chunk size is small enough to make this overflow check work)
4840 			if (total + chunk->size < total) return status_out_of_memory;
4841 			total += chunk->size;
4842 		}
4843 
4844 		size_t max_suffix_size = sizeof(char_t);
4845 
4846 		// copy chunk list to a contiguous buffer
4847 		char* buffer = static_cast<char*>(xml_memory::allocate(total + max_suffix_size));
4848 		if (!buffer) return status_out_of_memory;
4849 
4850 		char* write = buffer;
4851 
4852 		for (xml_stream_chunk<T>* chunk = chunks.data; chunk; chunk = chunk->next)
4853 		{
4854 			assert(write + chunk->size <= buffer + total);
4855 			memcpy(write, chunk->data, chunk->size);
4856 			write += chunk->size;
4857 		}
4858 
4859 		assert(write == buffer + total);
4860 
4861 		// return buffer
4862 		*out_buffer = buffer;
4863 		*out_size = total;
4864 
4865 		return status_ok;
4866 	}
4867 
load_stream_data_seek(std::basic_istream<T> & stream,void ** out_buffer,size_t * out_size)4868 	template <typename T> PUGI__FN xml_parse_status load_stream_data_seek(std::basic_istream<T>& stream, void** out_buffer, size_t* out_size)
4869 	{
4870 		// get length of remaining data in stream
4871 		typename std::basic_istream<T>::pos_type pos = stream.tellg();
4872 		stream.seekg(0, std::ios::end);
4873 		std::streamoff length = stream.tellg() - pos;
4874 		stream.seekg(pos);
4875 
4876 		if (stream.fail() || pos < 0) return status_io_error;
4877 
4878 		// guard against huge files
4879 		size_t read_length = static_cast<size_t>(length);
4880 
4881 		if (static_cast<std::streamsize>(read_length) != length || length < 0) return status_out_of_memory;
4882 
4883 		size_t max_suffix_size = sizeof(char_t);
4884 
4885 		// read stream data into memory (guard against stream exceptions with buffer holder)
4886 		auto_deleter<void> buffer(xml_memory::allocate(read_length * sizeof(T) + max_suffix_size), xml_memory::deallocate);
4887 		if (!buffer.data) return status_out_of_memory;
4888 
4889 		stream.read(static_cast<T*>(buffer.data), static_cast<std::streamsize>(read_length));
4890 
4891 		// read may set failbit | eofbit in case gcount() is less than read_length (i.e. line ending conversion), so check for other I/O errors
4892 		if (stream.bad() || (!stream.eof() && stream.fail())) return status_io_error;
4893 
4894 		// return buffer
4895 		size_t actual_length = static_cast<size_t>(stream.gcount());
4896 		assert(actual_length <= read_length);
4897 
4898 		*out_buffer = buffer.release();
4899 		*out_size = actual_length * sizeof(T);
4900 
4901 		return status_ok;
4902 	}
4903 
load_stream_impl(xml_document_struct * doc,std::basic_istream<T> & stream,unsigned int options,xml_encoding encoding,char_t ** out_buffer)4904 	template <typename T> PUGI__FN xml_parse_result load_stream_impl(xml_document_struct* doc, std::basic_istream<T>& stream, unsigned int options, xml_encoding encoding, char_t** out_buffer)
4905 	{
4906 		void* buffer = 0;
4907 		size_t size = 0;
4908 		xml_parse_status status = status_ok;
4909 
4910 		// if stream has an error bit set, bail out (otherwise tellg() can fail and we'll clear error bits)
4911 		if (stream.fail()) return make_parse_result(status_io_error);
4912 
4913 		// load stream to memory (using seek-based implementation if possible, since it's faster and takes less memory)
4914 		if (stream.tellg() < 0)
4915 		{
4916 			stream.clear(); // clear error flags that could be set by a failing tellg
4917 			status = load_stream_data_noseek(stream, &buffer, &size);
4918 		}
4919 		else
4920 			status = load_stream_data_seek(stream, &buffer, &size);
4921 
4922 		if (status != status_ok) return make_parse_result(status);
4923 
4924 		xml_encoding real_encoding = get_buffer_encoding(encoding, buffer, size);
4925 
4926 		return load_buffer_impl(doc, doc, buffer, zero_terminate_buffer(buffer, size, real_encoding), options, real_encoding, true, true, out_buffer);
4927 	}
4928 #endif
4929 
4930 #if defined(PUGI__MSVC_CRT_VERSION) || defined(__BORLANDC__) || (defined(__MINGW32__) && (!defined(__STRICT_ANSI__) || defined(__MINGW64_VERSION_MAJOR)))
open_file_wide(const wchar_t * path,const wchar_t * mode)4931 	PUGI__FN FILE* open_file_wide(const wchar_t* path, const wchar_t* mode)
4932 	{
4933 		return _wfopen(path, mode);
4934 	}
4935 #else
convert_path_heap(const wchar_t * str)4936 	PUGI__FN char* convert_path_heap(const wchar_t* str)
4937 	{
4938 		assert(str);
4939 
4940 		// first pass: get length in utf8 characters
4941 		size_t length = strlength_wide(str);
4942 		size_t size = as_utf8_begin(str, length);
4943 
4944 		// allocate resulting string
4945 		char* result = static_cast<char*>(xml_memory::allocate(size + 1));
4946 		if (!result) return 0;
4947 
4948 		// second pass: convert to utf8
4949 		as_utf8_end(result, size, str, length);
4950 
4951 		// zero-terminate
4952 		result[size] = 0;
4953 
4954 		return result;
4955 	}
4956 
open_file_wide(const wchar_t * path,const wchar_t * mode)4957 	PUGI__FN FILE* open_file_wide(const wchar_t* path, const wchar_t* mode)
4958 	{
4959 		// there is no standard function to open wide paths, so our best bet is to try utf8 path
4960 		char* path_utf8 = convert_path_heap(path);
4961 		if (!path_utf8) return 0;
4962 
4963 		// convert mode to ASCII (we mirror _wfopen interface)
4964 		char mode_ascii[4] = {0};
4965 		for (size_t i = 0; mode[i]; ++i) mode_ascii[i] = static_cast<char>(mode[i]);
4966 
4967 		// try to open the utf8 path
4968 		FILE* result = fopen(path_utf8, mode_ascii);
4969 
4970 		// free dummy buffer
4971 		xml_memory::deallocate(path_utf8);
4972 
4973 		return result;
4974 	}
4975 #endif
4976 
save_file_impl(const xml_document & doc,FILE * file,const char_t * indent,unsigned int flags,xml_encoding encoding)4977 	PUGI__FN bool save_file_impl(const xml_document& doc, FILE* file, const char_t* indent, unsigned int flags, xml_encoding encoding)
4978 	{
4979 		if (!file) return false;
4980 
4981 		xml_writer_file writer(file);
4982 		doc.save(writer, indent, flags, encoding);
4983 
4984 		return ferror(file) == 0;
4985 	}
4986 
4987 	struct name_null_sentry
4988 	{
4989 		xml_node_struct* node;
4990 		char_t* name;
4991 
name_null_sentryname_null_sentry4992 		name_null_sentry(xml_node_struct* node_): node(node_), name(node_->name)
4993 		{
4994 			node->name = 0;
4995 		}
4996 
~name_null_sentryname_null_sentry4997 		~name_null_sentry()
4998 		{
4999 			node->name = name;
5000 		}
5001 	};
5002 PUGI__NS_END
5003 
5004 namespace pugi
5005 {
xml_writer_file(void * file_)5006 	PUGI__FN xml_writer_file::xml_writer_file(void* file_): file(file_)
5007 	{
5008 	}
5009 
write(const void * data,size_t size)5010 	PUGI__FN void xml_writer_file::write(const void* data, size_t size)
5011 	{
5012 		size_t result = fwrite(data, 1, size, static_cast<FILE*>(file));
5013 		(void)!result; // unfortunately we can't do proper error handling here
5014 	}
5015 
5016 #ifndef PUGIXML_NO_STL
xml_writer_stream(std::basic_ostream<char,std::char_traits<char>> & stream)5017 	PUGI__FN xml_writer_stream::xml_writer_stream(std::basic_ostream<char, std::char_traits<char> >& stream): narrow_stream(&stream), wide_stream(0)
5018 	{
5019 	}
5020 
xml_writer_stream(std::basic_ostream<wchar_t,std::char_traits<wchar_t>> & stream)5021 	PUGI__FN xml_writer_stream::xml_writer_stream(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream): narrow_stream(0), wide_stream(&stream)
5022 	{
5023 	}
5024 
write(const void * data,size_t size)5025 	PUGI__FN void xml_writer_stream::write(const void* data, size_t size)
5026 	{
5027 		if (narrow_stream)
5028 		{
5029 			assert(!wide_stream);
5030 			narrow_stream->write(reinterpret_cast<const char*>(data), static_cast<std::streamsize>(size));
5031 		}
5032 		else
5033 		{
5034 			assert(wide_stream);
5035 			assert(size % sizeof(wchar_t) == 0);
5036 
5037 			wide_stream->write(reinterpret_cast<const wchar_t*>(data), static_cast<std::streamsize>(size / sizeof(wchar_t)));
5038 		}
5039 	}
5040 #endif
5041 
xml_tree_walker()5042 	PUGI__FN xml_tree_walker::xml_tree_walker(): _depth(0)
5043 	{
5044 	}
5045 
~xml_tree_walker()5046 	PUGI__FN xml_tree_walker::~xml_tree_walker()
5047 	{
5048 	}
5049 
depth() const5050 	PUGI__FN int xml_tree_walker::depth() const
5051 	{
5052 		return _depth;
5053 	}
5054 
begin(xml_node &)5055 	PUGI__FN bool xml_tree_walker::begin(xml_node&)
5056 	{
5057 		return true;
5058 	}
5059 
end(xml_node &)5060 	PUGI__FN bool xml_tree_walker::end(xml_node&)
5061 	{
5062 		return true;
5063 	}
5064 
xml_attribute()5065 	PUGI__FN xml_attribute::xml_attribute(): _attr(0)
5066 	{
5067 	}
5068 
xml_attribute(xml_attribute_struct * attr)5069 	PUGI__FN xml_attribute::xml_attribute(xml_attribute_struct* attr): _attr(attr)
5070 	{
5071 	}
5072 
unspecified_bool_xml_attribute(xml_attribute ***)5073 	PUGI__FN static void unspecified_bool_xml_attribute(xml_attribute***)
5074 	{
5075 	}
5076 
operator xml_attribute::unspecified_bool_type() const5077 	PUGI__FN xml_attribute::operator xml_attribute::unspecified_bool_type() const
5078 	{
5079 		return _attr ? unspecified_bool_xml_attribute : 0;
5080 	}
5081 
operator !() const5082 	PUGI__FN bool xml_attribute::operator!() const
5083 	{
5084 		return !_attr;
5085 	}
5086 
operator ==(const xml_attribute & r) const5087 	PUGI__FN bool xml_attribute::operator==(const xml_attribute& r) const
5088 	{
5089 		return (_attr == r._attr);
5090 	}
5091 
operator !=(const xml_attribute & r) const5092 	PUGI__FN bool xml_attribute::operator!=(const xml_attribute& r) const
5093 	{
5094 		return (_attr != r._attr);
5095 	}
5096 
operator <(const xml_attribute & r) const5097 	PUGI__FN bool xml_attribute::operator<(const xml_attribute& r) const
5098 	{
5099 		return (_attr < r._attr);
5100 	}
5101 
operator >(const xml_attribute & r) const5102 	PUGI__FN bool xml_attribute::operator>(const xml_attribute& r) const
5103 	{
5104 		return (_attr > r._attr);
5105 	}
5106 
operator <=(const xml_attribute & r) const5107 	PUGI__FN bool xml_attribute::operator<=(const xml_attribute& r) const
5108 	{
5109 		return (_attr <= r._attr);
5110 	}
5111 
operator >=(const xml_attribute & r) const5112 	PUGI__FN bool xml_attribute::operator>=(const xml_attribute& r) const
5113 	{
5114 		return (_attr >= r._attr);
5115 	}
5116 
next_attribute() const5117 	PUGI__FN xml_attribute xml_attribute::next_attribute() const
5118 	{
5119 		return _attr ? xml_attribute(_attr->next_attribute) : xml_attribute();
5120 	}
5121 
previous_attribute() const5122 	PUGI__FN xml_attribute xml_attribute::previous_attribute() const
5123 	{
5124 		return _attr && _attr->prev_attribute_c->next_attribute ? xml_attribute(_attr->prev_attribute_c) : xml_attribute();
5125 	}
5126 
as_string(const char_t * def) const5127 	PUGI__FN const char_t* xml_attribute::as_string(const char_t* def) const
5128 	{
5129 		return (_attr && _attr->value) ? _attr->value + 0 : def;
5130 	}
5131 
as_int(int def) const5132 	PUGI__FN int xml_attribute::as_int(int def) const
5133 	{
5134 		return (_attr && _attr->value) ? impl::get_value_int(_attr->value) : def;
5135 	}
5136 
as_uint(unsigned int def) const5137 	PUGI__FN unsigned int xml_attribute::as_uint(unsigned int def) const
5138 	{
5139 		return (_attr && _attr->value) ? impl::get_value_uint(_attr->value) : def;
5140 	}
5141 
as_double(double def) const5142 	PUGI__FN double xml_attribute::as_double(double def) const
5143 	{
5144 		return (_attr && _attr->value) ? impl::get_value_double(_attr->value) : def;
5145 	}
5146 
as_float(float def) const5147 	PUGI__FN float xml_attribute::as_float(float def) const
5148 	{
5149 		return (_attr && _attr->value) ? impl::get_value_float(_attr->value) : def;
5150 	}
5151 
as_bool(bool def) const5152 	PUGI__FN bool xml_attribute::as_bool(bool def) const
5153 	{
5154 		return (_attr && _attr->value) ? impl::get_value_bool(_attr->value) : def;
5155 	}
5156 
5157 #ifdef PUGIXML_HAS_LONG_LONG
as_llong(long long def) const5158 	PUGI__FN long long xml_attribute::as_llong(long long def) const
5159 	{
5160 		return (_attr && _attr->value) ? impl::get_value_llong(_attr->value) : def;
5161 	}
5162 
as_ullong(unsigned long long def) const5163 	PUGI__FN unsigned long long xml_attribute::as_ullong(unsigned long long def) const
5164 	{
5165 		return (_attr && _attr->value) ? impl::get_value_ullong(_attr->value) : def;
5166 	}
5167 #endif
5168 
empty() const5169 	PUGI__FN bool xml_attribute::empty() const
5170 	{
5171 		return !_attr;
5172 	}
5173 
name() const5174 	PUGI__FN const char_t* xml_attribute::name() const
5175 	{
5176 		return (_attr && _attr->name) ? _attr->name + 0 : PUGIXML_TEXT("");
5177 	}
5178 
value() const5179 	PUGI__FN const char_t* xml_attribute::value() const
5180 	{
5181 		return (_attr && _attr->value) ? _attr->value + 0 : PUGIXML_TEXT("");
5182 	}
5183 
hash_value() const5184 	PUGI__FN size_t xml_attribute::hash_value() const
5185 	{
5186 		return static_cast<size_t>(reinterpret_cast<uintptr_t>(_attr) / sizeof(xml_attribute_struct));
5187 	}
5188 
internal_object() const5189 	PUGI__FN xml_attribute_struct* xml_attribute::internal_object() const
5190 	{
5191 		return _attr;
5192 	}
5193 
operator =(const char_t * rhs)5194 	PUGI__FN xml_attribute& xml_attribute::operator=(const char_t* rhs)
5195 	{
5196 		set_value(rhs);
5197 		return *this;
5198 	}
5199 
operator =(int rhs)5200 	PUGI__FN xml_attribute& xml_attribute::operator=(int rhs)
5201 	{
5202 		set_value(rhs);
5203 		return *this;
5204 	}
5205 
operator =(unsigned int rhs)5206 	PUGI__FN xml_attribute& xml_attribute::operator=(unsigned int rhs)
5207 	{
5208 		set_value(rhs);
5209 		return *this;
5210 	}
5211 
operator =(long rhs)5212 	PUGI__FN xml_attribute& xml_attribute::operator=(long rhs)
5213 	{
5214 		set_value(rhs);
5215 		return *this;
5216 	}
5217 
operator =(unsigned long rhs)5218 	PUGI__FN xml_attribute& xml_attribute::operator=(unsigned long rhs)
5219 	{
5220 		set_value(rhs);
5221 		return *this;
5222 	}
5223 
operator =(double rhs)5224 	PUGI__FN xml_attribute& xml_attribute::operator=(double rhs)
5225 	{
5226 		set_value(rhs);
5227 		return *this;
5228 	}
5229 
operator =(float rhs)5230 	PUGI__FN xml_attribute& xml_attribute::operator=(float rhs)
5231 	{
5232 		set_value(rhs);
5233 		return *this;
5234 	}
5235 
operator =(bool rhs)5236 	PUGI__FN xml_attribute& xml_attribute::operator=(bool rhs)
5237 	{
5238 		set_value(rhs);
5239 		return *this;
5240 	}
5241 
5242 #ifdef PUGIXML_HAS_LONG_LONG
operator =(long long rhs)5243 	PUGI__FN xml_attribute& xml_attribute::operator=(long long rhs)
5244 	{
5245 		set_value(rhs);
5246 		return *this;
5247 	}
5248 
operator =(unsigned long long rhs)5249 	PUGI__FN xml_attribute& xml_attribute::operator=(unsigned long long rhs)
5250 	{
5251 		set_value(rhs);
5252 		return *this;
5253 	}
5254 #endif
5255 
set_name(const char_t * rhs)5256 	PUGI__FN bool xml_attribute::set_name(const char_t* rhs)
5257 	{
5258 		if (!_attr) return false;
5259 
5260 		return impl::strcpy_insitu(_attr->name, _attr->header, impl::xml_memory_page_name_allocated_mask, rhs, impl::strlength(rhs));
5261 	}
5262 
set_value(const char_t * rhs)5263 	PUGI__FN bool xml_attribute::set_value(const char_t* rhs)
5264 	{
5265 		if (!_attr) return false;
5266 
5267 		return impl::strcpy_insitu(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, impl::strlength(rhs));
5268 	}
5269 
set_value(int rhs)5270 	PUGI__FN bool xml_attribute::set_value(int rhs)
5271 	{
5272 		if (!_attr) return false;
5273 
5274 		return impl::set_value_integer<unsigned int>(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0);
5275 	}
5276 
set_value(unsigned int rhs)5277 	PUGI__FN bool xml_attribute::set_value(unsigned int rhs)
5278 	{
5279 		if (!_attr) return false;
5280 
5281 		return impl::set_value_integer<unsigned int>(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, false);
5282 	}
5283 
set_value(long rhs)5284 	PUGI__FN bool xml_attribute::set_value(long rhs)
5285 	{
5286 		if (!_attr) return false;
5287 
5288 		return impl::set_value_integer<unsigned long>(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0);
5289 	}
5290 
set_value(unsigned long rhs)5291 	PUGI__FN bool xml_attribute::set_value(unsigned long rhs)
5292 	{
5293 		if (!_attr) return false;
5294 
5295 		return impl::set_value_integer<unsigned long>(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, false);
5296 	}
5297 
set_value(double rhs)5298 	PUGI__FN bool xml_attribute::set_value(double rhs)
5299 	{
5300 		if (!_attr) return false;
5301 
5302 		return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);
5303 	}
5304 
set_value(float rhs)5305 	PUGI__FN bool xml_attribute::set_value(float rhs)
5306 	{
5307 		if (!_attr) return false;
5308 
5309 		return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);
5310 	}
5311 
set_value(bool rhs)5312 	PUGI__FN bool xml_attribute::set_value(bool rhs)
5313 	{
5314 		if (!_attr) return false;
5315 
5316 		return impl::set_value_bool(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);
5317 	}
5318 
5319 #ifdef PUGIXML_HAS_LONG_LONG
set_value(long long rhs)5320 	PUGI__FN bool xml_attribute::set_value(long long rhs)
5321 	{
5322 		if (!_attr) return false;
5323 
5324 		return impl::set_value_integer<unsigned long long>(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0);
5325 	}
5326 
set_value(unsigned long long rhs)5327 	PUGI__FN bool xml_attribute::set_value(unsigned long long rhs)
5328 	{
5329 		if (!_attr) return false;
5330 
5331 		return impl::set_value_integer<unsigned long long>(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, false);
5332 	}
5333 #endif
5334 
5335 #ifdef __BORLANDC__
operator &&(const xml_attribute & lhs,bool rhs)5336 	PUGI__FN bool operator&&(const xml_attribute& lhs, bool rhs)
5337 	{
5338 		return (bool)lhs && rhs;
5339 	}
5340 
operator ||(const xml_attribute & lhs,bool rhs)5341 	PUGI__FN bool operator||(const xml_attribute& lhs, bool rhs)
5342 	{
5343 		return (bool)lhs || rhs;
5344 	}
5345 #endif
5346 
xml_node()5347 	PUGI__FN xml_node::xml_node(): _root(0)
5348 	{
5349 	}
5350 
xml_node(xml_node_struct * p)5351 	PUGI__FN xml_node::xml_node(xml_node_struct* p): _root(p)
5352 	{
5353 	}
5354 
unspecified_bool_xml_node(xml_node ***)5355 	PUGI__FN static void unspecified_bool_xml_node(xml_node***)
5356 	{
5357 	}
5358 
operator xml_node::unspecified_bool_type() const5359 	PUGI__FN xml_node::operator xml_node::unspecified_bool_type() const
5360 	{
5361 		return _root ? unspecified_bool_xml_node : 0;
5362 	}
5363 
operator !() const5364 	PUGI__FN bool xml_node::operator!() const
5365 	{
5366 		return !_root;
5367 	}
5368 
begin() const5369 	PUGI__FN xml_node::iterator xml_node::begin() const
5370 	{
5371 		return iterator(_root ? _root->first_child + 0 : 0, _root);
5372 	}
5373 
end() const5374 	PUGI__FN xml_node::iterator xml_node::end() const
5375 	{
5376 		return iterator(0, _root);
5377 	}
5378 
attributes_begin() const5379 	PUGI__FN xml_node::attribute_iterator xml_node::attributes_begin() const
5380 	{
5381 		return attribute_iterator(_root ? _root->first_attribute + 0 : 0, _root);
5382 	}
5383 
attributes_end() const5384 	PUGI__FN xml_node::attribute_iterator xml_node::attributes_end() const
5385 	{
5386 		return attribute_iterator(0, _root);
5387 	}
5388 
children() const5389 	PUGI__FN xml_object_range<xml_node_iterator> xml_node::children() const
5390 	{
5391 		return xml_object_range<xml_node_iterator>(begin(), end());
5392 	}
5393 
children(const char_t * name_) const5394 	PUGI__FN xml_object_range<xml_named_node_iterator> xml_node::children(const char_t* name_) const
5395 	{
5396 		return xml_object_range<xml_named_node_iterator>(xml_named_node_iterator(child(name_)._root, _root, name_), xml_named_node_iterator(0, _root, name_));
5397 	}
5398 
attributes() const5399 	PUGI__FN xml_object_range<xml_attribute_iterator> xml_node::attributes() const
5400 	{
5401 		return xml_object_range<xml_attribute_iterator>(attributes_begin(), attributes_end());
5402 	}
5403 
operator ==(const xml_node & r) const5404 	PUGI__FN bool xml_node::operator==(const xml_node& r) const
5405 	{
5406 		return (_root == r._root);
5407 	}
5408 
operator !=(const xml_node & r) const5409 	PUGI__FN bool xml_node::operator!=(const xml_node& r) const
5410 	{
5411 		return (_root != r._root);
5412 	}
5413 
operator <(const xml_node & r) const5414 	PUGI__FN bool xml_node::operator<(const xml_node& r) const
5415 	{
5416 		return (_root < r._root);
5417 	}
5418 
operator >(const xml_node & r) const5419 	PUGI__FN bool xml_node::operator>(const xml_node& r) const
5420 	{
5421 		return (_root > r._root);
5422 	}
5423 
operator <=(const xml_node & r) const5424 	PUGI__FN bool xml_node::operator<=(const xml_node& r) const
5425 	{
5426 		return (_root <= r._root);
5427 	}
5428 
operator >=(const xml_node & r) const5429 	PUGI__FN bool xml_node::operator>=(const xml_node& r) const
5430 	{
5431 		return (_root >= r._root);
5432 	}
5433 
empty() const5434 	PUGI__FN bool xml_node::empty() const
5435 	{
5436 		return !_root;
5437 	}
5438 
name() const5439 	PUGI__FN const char_t* xml_node::name() const
5440 	{
5441 		return (_root && _root->name) ? _root->name + 0 : PUGIXML_TEXT("");
5442 	}
5443 
type() const5444 	PUGI__FN xml_node_type xml_node::type() const
5445 	{
5446 		return _root ? PUGI__NODETYPE(_root) : node_null;
5447 	}
5448 
value() const5449 	PUGI__FN const char_t* xml_node::value() const
5450 	{
5451 		return (_root && _root->value) ? _root->value + 0 : PUGIXML_TEXT("");
5452 	}
5453 
child(const char_t * name_) const5454 	PUGI__FN xml_node xml_node::child(const char_t* name_) const
5455 	{
5456 		if (!_root) return xml_node();
5457 
5458 		for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
5459 			if (i->name && impl::strequal(name_, i->name)) return xml_node(i);
5460 
5461 		return xml_node();
5462 	}
5463 
attribute(const char_t * name_) const5464 	PUGI__FN xml_attribute xml_node::attribute(const char_t* name_) const
5465 	{
5466 		if (!_root) return xml_attribute();
5467 
5468 		for (xml_attribute_struct* i = _root->first_attribute; i; i = i->next_attribute)
5469 			if (i->name && impl::strequal(name_, i->name))
5470 				return xml_attribute(i);
5471 
5472 		return xml_attribute();
5473 	}
5474 
next_sibling(const char_t * name_) const5475 	PUGI__FN xml_node xml_node::next_sibling(const char_t* name_) const
5476 	{
5477 		if (!_root) return xml_node();
5478 
5479 		for (xml_node_struct* i = _root->next_sibling; i; i = i->next_sibling)
5480 			if (i->name && impl::strequal(name_, i->name)) return xml_node(i);
5481 
5482 		return xml_node();
5483 	}
5484 
next_sibling() const5485 	PUGI__FN xml_node xml_node::next_sibling() const
5486 	{
5487 		return _root ? xml_node(_root->next_sibling) : xml_node();
5488 	}
5489 
previous_sibling(const char_t * name_) const5490 	PUGI__FN xml_node xml_node::previous_sibling(const char_t* name_) const
5491 	{
5492 		if (!_root) return xml_node();
5493 
5494 		for (xml_node_struct* i = _root->prev_sibling_c; i->next_sibling; i = i->prev_sibling_c)
5495 			if (i->name && impl::strequal(name_, i->name)) return xml_node(i);
5496 
5497 		return xml_node();
5498 	}
5499 
attribute(const char_t * name_,xml_attribute & hint_) const5500 	PUGI__FN xml_attribute xml_node::attribute(const char_t* name_, xml_attribute& hint_) const
5501 	{
5502 		xml_attribute_struct* hint = hint_._attr;
5503 
5504 		// if hint is not an attribute of node, behavior is not defined
5505 		assert(!hint || (_root && impl::is_attribute_of(hint, _root)));
5506 
5507 		if (!_root) return xml_attribute();
5508 
5509 		// optimistically search from hint up until the end
5510 		for (xml_attribute_struct* i = hint; i; i = i->next_attribute)
5511 			if (i->name && impl::strequal(name_, i->name))
5512 			{
5513 				// update hint to maximize efficiency of searching for consecutive attributes
5514 				hint_._attr = i->next_attribute;
5515 
5516 				return xml_attribute(i);
5517 			}
5518 
5519 		// wrap around and search from the first attribute until the hint
5520 		// 'j' null pointer check is technically redundant, but it prevents a crash in case the assertion above fails
5521 		for (xml_attribute_struct* j = _root->first_attribute; j && j != hint; j = j->next_attribute)
5522 			if (j->name && impl::strequal(name_, j->name))
5523 			{
5524 				// update hint to maximize efficiency of searching for consecutive attributes
5525 				hint_._attr = j->next_attribute;
5526 
5527 				return xml_attribute(j);
5528 			}
5529 
5530 		return xml_attribute();
5531 	}
5532 
previous_sibling() const5533 	PUGI__FN xml_node xml_node::previous_sibling() const
5534 	{
5535 		if (!_root) return xml_node();
5536 
5537 		if (_root->prev_sibling_c->next_sibling) return xml_node(_root->prev_sibling_c);
5538 		else return xml_node();
5539 	}
5540 
parent() const5541 	PUGI__FN xml_node xml_node::parent() const
5542 	{
5543 		return _root ? xml_node(_root->parent) : xml_node();
5544 	}
5545 
root() const5546 	PUGI__FN xml_node xml_node::root() const
5547 	{
5548 		return _root ? xml_node(&impl::get_document(_root)) : xml_node();
5549 	}
5550 
text() const5551 	PUGI__FN xml_text xml_node::text() const
5552 	{
5553 		return xml_text(_root);
5554 	}
5555 
child_value() const5556 	PUGI__FN const char_t* xml_node::child_value() const
5557 	{
5558 		if (!_root) return PUGIXML_TEXT("");
5559 
5560 		// element nodes can have value if parse_embed_pcdata was used
5561 		if (PUGI__NODETYPE(_root) == node_element && _root->value)
5562 			return _root->value;
5563 
5564 		for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
5565 			if (impl::is_text_node(i) && i->value)
5566 				return i->value;
5567 
5568 		return PUGIXML_TEXT("");
5569 	}
5570 
child_value(const char_t * name_) const5571 	PUGI__FN const char_t* xml_node::child_value(const char_t* name_) const
5572 	{
5573 		return child(name_).child_value();
5574 	}
5575 
first_attribute() const5576 	PUGI__FN xml_attribute xml_node::first_attribute() const
5577 	{
5578 		return _root ? xml_attribute(_root->first_attribute) : xml_attribute();
5579 	}
5580 
last_attribute() const5581 	PUGI__FN xml_attribute xml_node::last_attribute() const
5582 	{
5583 		return _root && _root->first_attribute ? xml_attribute(_root->first_attribute->prev_attribute_c) : xml_attribute();
5584 	}
5585 
first_child() const5586 	PUGI__FN xml_node xml_node::first_child() const
5587 	{
5588 		return _root ? xml_node(_root->first_child) : xml_node();
5589 	}
5590 
last_child() const5591 	PUGI__FN xml_node xml_node::last_child() const
5592 	{
5593 		return _root && _root->first_child ? xml_node(_root->first_child->prev_sibling_c) : xml_node();
5594 	}
5595 
set_name(const char_t * rhs)5596 	PUGI__FN bool xml_node::set_name(const char_t* rhs)
5597 	{
5598 		xml_node_type type_ = _root ? PUGI__NODETYPE(_root) : node_null;
5599 
5600 		if (type_ != node_element && type_ != node_pi && type_ != node_declaration)
5601 			return false;
5602 
5603 		return impl::strcpy_insitu(_root->name, _root->header, impl::xml_memory_page_name_allocated_mask, rhs, impl::strlength(rhs));
5604 	}
5605 
set_value(const char_t * rhs)5606 	PUGI__FN bool xml_node::set_value(const char_t* rhs)
5607 	{
5608 		xml_node_type type_ = _root ? PUGI__NODETYPE(_root) : node_null;
5609 
5610 		if (type_ != node_pcdata && type_ != node_cdata && type_ != node_comment && type_ != node_pi && type_ != node_doctype)
5611 			return false;
5612 
5613 		return impl::strcpy_insitu(_root->value, _root->header, impl::xml_memory_page_value_allocated_mask, rhs, impl::strlength(rhs));
5614 	}
5615 
append_attribute(const char_t * name_)5616 	PUGI__FN xml_attribute xml_node::append_attribute(const char_t* name_)
5617 	{
5618 		if (!impl::allow_insert_attribute(type())) return xml_attribute();
5619 
5620 		impl::xml_allocator& alloc = impl::get_allocator(_root);
5621 		if (!alloc.reserve()) return xml_attribute();
5622 
5623 		xml_attribute a(impl::allocate_attribute(alloc));
5624 		if (!a) return xml_attribute();
5625 
5626 		impl::append_attribute(a._attr, _root);
5627 
5628 		a.set_name(name_);
5629 
5630 		return a;
5631 	}
5632 
prepend_attribute(const char_t * name_)5633 	PUGI__FN xml_attribute xml_node::prepend_attribute(const char_t* name_)
5634 	{
5635 		if (!impl::allow_insert_attribute(type())) return xml_attribute();
5636 
5637 		impl::xml_allocator& alloc = impl::get_allocator(_root);
5638 		if (!alloc.reserve()) return xml_attribute();
5639 
5640 		xml_attribute a(impl::allocate_attribute(alloc));
5641 		if (!a) return xml_attribute();
5642 
5643 		impl::prepend_attribute(a._attr, _root);
5644 
5645 		a.set_name(name_);
5646 
5647 		return a;
5648 	}
5649 
insert_attribute_after(const char_t * name_,const xml_attribute & attr)5650 	PUGI__FN xml_attribute xml_node::insert_attribute_after(const char_t* name_, const xml_attribute& attr)
5651 	{
5652 		if (!impl::allow_insert_attribute(type())) return xml_attribute();
5653 		if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute();
5654 
5655 		impl::xml_allocator& alloc = impl::get_allocator(_root);
5656 		if (!alloc.reserve()) return xml_attribute();
5657 
5658 		xml_attribute a(impl::allocate_attribute(alloc));
5659 		if (!a) return xml_attribute();
5660 
5661 		impl::insert_attribute_after(a._attr, attr._attr, _root);
5662 
5663 		a.set_name(name_);
5664 
5665 		return a;
5666 	}
5667 
insert_attribute_before(const char_t * name_,const xml_attribute & attr)5668 	PUGI__FN xml_attribute xml_node::insert_attribute_before(const char_t* name_, const xml_attribute& attr)
5669 	{
5670 		if (!impl::allow_insert_attribute(type())) return xml_attribute();
5671 		if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute();
5672 
5673 		impl::xml_allocator& alloc = impl::get_allocator(_root);
5674 		if (!alloc.reserve()) return xml_attribute();
5675 
5676 		xml_attribute a(impl::allocate_attribute(alloc));
5677 		if (!a) return xml_attribute();
5678 
5679 		impl::insert_attribute_before(a._attr, attr._attr, _root);
5680 
5681 		a.set_name(name_);
5682 
5683 		return a;
5684 	}
5685 
append_copy(const xml_attribute & proto)5686 	PUGI__FN xml_attribute xml_node::append_copy(const xml_attribute& proto)
5687 	{
5688 		if (!proto) return xml_attribute();
5689 		if (!impl::allow_insert_attribute(type())) return xml_attribute();
5690 
5691 		impl::xml_allocator& alloc = impl::get_allocator(_root);
5692 		if (!alloc.reserve()) return xml_attribute();
5693 
5694 		xml_attribute a(impl::allocate_attribute(alloc));
5695 		if (!a) return xml_attribute();
5696 
5697 		impl::append_attribute(a._attr, _root);
5698 		impl::node_copy_attribute(a._attr, proto._attr);
5699 
5700 		return a;
5701 	}
5702 
prepend_copy(const xml_attribute & proto)5703 	PUGI__FN xml_attribute xml_node::prepend_copy(const xml_attribute& proto)
5704 	{
5705 		if (!proto) return xml_attribute();
5706 		if (!impl::allow_insert_attribute(type())) return xml_attribute();
5707 
5708 		impl::xml_allocator& alloc = impl::get_allocator(_root);
5709 		if (!alloc.reserve()) return xml_attribute();
5710 
5711 		xml_attribute a(impl::allocate_attribute(alloc));
5712 		if (!a) return xml_attribute();
5713 
5714 		impl::prepend_attribute(a._attr, _root);
5715 		impl::node_copy_attribute(a._attr, proto._attr);
5716 
5717 		return a;
5718 	}
5719 
insert_copy_after(const xml_attribute & proto,const xml_attribute & attr)5720 	PUGI__FN xml_attribute xml_node::insert_copy_after(const xml_attribute& proto, const xml_attribute& attr)
5721 	{
5722 		if (!proto) return xml_attribute();
5723 		if (!impl::allow_insert_attribute(type())) return xml_attribute();
5724 		if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute();
5725 
5726 		impl::xml_allocator& alloc = impl::get_allocator(_root);
5727 		if (!alloc.reserve()) return xml_attribute();
5728 
5729 		xml_attribute a(impl::allocate_attribute(alloc));
5730 		if (!a) return xml_attribute();
5731 
5732 		impl::insert_attribute_after(a._attr, attr._attr, _root);
5733 		impl::node_copy_attribute(a._attr, proto._attr);
5734 
5735 		return a;
5736 	}
5737 
insert_copy_before(const xml_attribute & proto,const xml_attribute & attr)5738 	PUGI__FN xml_attribute xml_node::insert_copy_before(const xml_attribute& proto, const xml_attribute& attr)
5739 	{
5740 		if (!proto) return xml_attribute();
5741 		if (!impl::allow_insert_attribute(type())) return xml_attribute();
5742 		if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute();
5743 
5744 		impl::xml_allocator& alloc = impl::get_allocator(_root);
5745 		if (!alloc.reserve()) return xml_attribute();
5746 
5747 		xml_attribute a(impl::allocate_attribute(alloc));
5748 		if (!a) return xml_attribute();
5749 
5750 		impl::insert_attribute_before(a._attr, attr._attr, _root);
5751 		impl::node_copy_attribute(a._attr, proto._attr);
5752 
5753 		return a;
5754 	}
5755 
append_child(xml_node_type type_)5756 	PUGI__FN xml_node xml_node::append_child(xml_node_type type_)
5757 	{
5758 		if (!impl::allow_insert_child(type(), type_)) return xml_node();
5759 
5760 		impl::xml_allocator& alloc = impl::get_allocator(_root);
5761 		if (!alloc.reserve()) return xml_node();
5762 
5763 		xml_node n(impl::allocate_node(alloc, type_));
5764 		if (!n) return xml_node();
5765 
5766 		impl::append_node(n._root, _root);
5767 
5768 		if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml"));
5769 
5770 		return n;
5771 	}
5772 
prepend_child(xml_node_type type_)5773 	PUGI__FN xml_node xml_node::prepend_child(xml_node_type type_)
5774 	{
5775 		if (!impl::allow_insert_child(type(), type_)) return xml_node();
5776 
5777 		impl::xml_allocator& alloc = impl::get_allocator(_root);
5778 		if (!alloc.reserve()) return xml_node();
5779 
5780 		xml_node n(impl::allocate_node(alloc, type_));
5781 		if (!n) return xml_node();
5782 
5783 		impl::prepend_node(n._root, _root);
5784 
5785 		if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml"));
5786 
5787 		return n;
5788 	}
5789 
insert_child_before(xml_node_type type_,const xml_node & node)5790 	PUGI__FN xml_node xml_node::insert_child_before(xml_node_type type_, const xml_node& node)
5791 	{
5792 		if (!impl::allow_insert_child(type(), type_)) return xml_node();
5793 		if (!node._root || node._root->parent != _root) return xml_node();
5794 
5795 		impl::xml_allocator& alloc = impl::get_allocator(_root);
5796 		if (!alloc.reserve()) return xml_node();
5797 
5798 		xml_node n(impl::allocate_node(alloc, type_));
5799 		if (!n) return xml_node();
5800 
5801 		impl::insert_node_before(n._root, node._root);
5802 
5803 		if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml"));
5804 
5805 		return n;
5806 	}
5807 
insert_child_after(xml_node_type type_,const xml_node & node)5808 	PUGI__FN xml_node xml_node::insert_child_after(xml_node_type type_, const xml_node& node)
5809 	{
5810 		if (!impl::allow_insert_child(type(), type_)) return xml_node();
5811 		if (!node._root || node._root->parent != _root) return xml_node();
5812 
5813 		impl::xml_allocator& alloc = impl::get_allocator(_root);
5814 		if (!alloc.reserve()) return xml_node();
5815 
5816 		xml_node n(impl::allocate_node(alloc, type_));
5817 		if (!n) return xml_node();
5818 
5819 		impl::insert_node_after(n._root, node._root);
5820 
5821 		if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml"));
5822 
5823 		return n;
5824 	}
5825 
append_child(const char_t * name_)5826 	PUGI__FN xml_node xml_node::append_child(const char_t* name_)
5827 	{
5828 		xml_node result = append_child(node_element);
5829 
5830 		result.set_name(name_);
5831 
5832 		return result;
5833 	}
5834 
prepend_child(const char_t * name_)5835 	PUGI__FN xml_node xml_node::prepend_child(const char_t* name_)
5836 	{
5837 		xml_node result = prepend_child(node_element);
5838 
5839 		result.set_name(name_);
5840 
5841 		return result;
5842 	}
5843 
insert_child_after(const char_t * name_,const xml_node & node)5844 	PUGI__FN xml_node xml_node::insert_child_after(const char_t* name_, const xml_node& node)
5845 	{
5846 		xml_node result = insert_child_after(node_element, node);
5847 
5848 		result.set_name(name_);
5849 
5850 		return result;
5851 	}
5852 
insert_child_before(const char_t * name_,const xml_node & node)5853 	PUGI__FN xml_node xml_node::insert_child_before(const char_t* name_, const xml_node& node)
5854 	{
5855 		xml_node result = insert_child_before(node_element, node);
5856 
5857 		result.set_name(name_);
5858 
5859 		return result;
5860 	}
5861 
append_copy(const xml_node & proto)5862 	PUGI__FN xml_node xml_node::append_copy(const xml_node& proto)
5863 	{
5864 		xml_node_type type_ = proto.type();
5865 		if (!impl::allow_insert_child(type(), type_)) return xml_node();
5866 
5867 		impl::xml_allocator& alloc = impl::get_allocator(_root);
5868 		if (!alloc.reserve()) return xml_node();
5869 
5870 		xml_node n(impl::allocate_node(alloc, type_));
5871 		if (!n) return xml_node();
5872 
5873 		impl::append_node(n._root, _root);
5874 		impl::node_copy_tree(n._root, proto._root);
5875 
5876 		return n;
5877 	}
5878 
prepend_copy(const xml_node & proto)5879 	PUGI__FN xml_node xml_node::prepend_copy(const xml_node& proto)
5880 	{
5881 		xml_node_type type_ = proto.type();
5882 		if (!impl::allow_insert_child(type(), type_)) return xml_node();
5883 
5884 		impl::xml_allocator& alloc = impl::get_allocator(_root);
5885 		if (!alloc.reserve()) return xml_node();
5886 
5887 		xml_node n(impl::allocate_node(alloc, type_));
5888 		if (!n) return xml_node();
5889 
5890 		impl::prepend_node(n._root, _root);
5891 		impl::node_copy_tree(n._root, proto._root);
5892 
5893 		return n;
5894 	}
5895 
insert_copy_after(const xml_node & proto,const xml_node & node)5896 	PUGI__FN xml_node xml_node::insert_copy_after(const xml_node& proto, const xml_node& node)
5897 	{
5898 		xml_node_type type_ = proto.type();
5899 		if (!impl::allow_insert_child(type(), type_)) return xml_node();
5900 		if (!node._root || node._root->parent != _root) return xml_node();
5901 
5902 		impl::xml_allocator& alloc = impl::get_allocator(_root);
5903 		if (!alloc.reserve()) return xml_node();
5904 
5905 		xml_node n(impl::allocate_node(alloc, type_));
5906 		if (!n) return xml_node();
5907 
5908 		impl::insert_node_after(n._root, node._root);
5909 		impl::node_copy_tree(n._root, proto._root);
5910 
5911 		return n;
5912 	}
5913 
insert_copy_before(const xml_node & proto,const xml_node & node)5914 	PUGI__FN xml_node xml_node::insert_copy_before(const xml_node& proto, const xml_node& node)
5915 	{
5916 		xml_node_type type_ = proto.type();
5917 		if (!impl::allow_insert_child(type(), type_)) return xml_node();
5918 		if (!node._root || node._root->parent != _root) return xml_node();
5919 
5920 		impl::xml_allocator& alloc = impl::get_allocator(_root);
5921 		if (!alloc.reserve()) return xml_node();
5922 
5923 		xml_node n(impl::allocate_node(alloc, type_));
5924 		if (!n) return xml_node();
5925 
5926 		impl::insert_node_before(n._root, node._root);
5927 		impl::node_copy_tree(n._root, proto._root);
5928 
5929 		return n;
5930 	}
5931 
append_move(const xml_node & moved)5932 	PUGI__FN xml_node xml_node::append_move(const xml_node& moved)
5933 	{
5934 		if (!impl::allow_move(*this, moved)) return xml_node();
5935 
5936 		impl::xml_allocator& alloc = impl::get_allocator(_root);
5937 		if (!alloc.reserve()) return xml_node();
5938 
5939 		// disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers
5940 		impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask;
5941 
5942 		impl::remove_node(moved._root);
5943 		impl::append_node(moved._root, _root);
5944 
5945 		return moved;
5946 	}
5947 
prepend_move(const xml_node & moved)5948 	PUGI__FN xml_node xml_node::prepend_move(const xml_node& moved)
5949 	{
5950 		if (!impl::allow_move(*this, moved)) return xml_node();
5951 
5952 		impl::xml_allocator& alloc = impl::get_allocator(_root);
5953 		if (!alloc.reserve()) return xml_node();
5954 
5955 		// disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers
5956 		impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask;
5957 
5958 		impl::remove_node(moved._root);
5959 		impl::prepend_node(moved._root, _root);
5960 
5961 		return moved;
5962 	}
5963 
insert_move_after(const xml_node & moved,const xml_node & node)5964 	PUGI__FN xml_node xml_node::insert_move_after(const xml_node& moved, const xml_node& node)
5965 	{
5966 		if (!impl::allow_move(*this, moved)) return xml_node();
5967 		if (!node._root || node._root->parent != _root) return xml_node();
5968 		if (moved._root == node._root) return xml_node();
5969 
5970 		impl::xml_allocator& alloc = impl::get_allocator(_root);
5971 		if (!alloc.reserve()) return xml_node();
5972 
5973 		// disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers
5974 		impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask;
5975 
5976 		impl::remove_node(moved._root);
5977 		impl::insert_node_after(moved._root, node._root);
5978 
5979 		return moved;
5980 	}
5981 
insert_move_before(const xml_node & moved,const xml_node & node)5982 	PUGI__FN xml_node xml_node::insert_move_before(const xml_node& moved, const xml_node& node)
5983 	{
5984 		if (!impl::allow_move(*this, moved)) return xml_node();
5985 		if (!node._root || node._root->parent != _root) return xml_node();
5986 		if (moved._root == node._root) return xml_node();
5987 
5988 		impl::xml_allocator& alloc = impl::get_allocator(_root);
5989 		if (!alloc.reserve()) return xml_node();
5990 
5991 		// disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers
5992 		impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask;
5993 
5994 		impl::remove_node(moved._root);
5995 		impl::insert_node_before(moved._root, node._root);
5996 
5997 		return moved;
5998 	}
5999 
remove_attribute(const char_t * name_)6000 	PUGI__FN bool xml_node::remove_attribute(const char_t* name_)
6001 	{
6002 		return remove_attribute(attribute(name_));
6003 	}
6004 
remove_attribute(const xml_attribute & a)6005 	PUGI__FN bool xml_node::remove_attribute(const xml_attribute& a)
6006 	{
6007 		if (!_root || !a._attr) return false;
6008 		if (!impl::is_attribute_of(a._attr, _root)) return false;
6009 
6010 		impl::xml_allocator& alloc = impl::get_allocator(_root);
6011 		if (!alloc.reserve()) return false;
6012 
6013 		impl::remove_attribute(a._attr, _root);
6014 		impl::destroy_attribute(a._attr, alloc);
6015 
6016 		return true;
6017 	}
6018 
remove_child(const char_t * name_)6019 	PUGI__FN bool xml_node::remove_child(const char_t* name_)
6020 	{
6021 		return remove_child(child(name_));
6022 	}
6023 
remove_child(const xml_node & n)6024 	PUGI__FN bool xml_node::remove_child(const xml_node& n)
6025 	{
6026 		if (!_root || !n._root || n._root->parent != _root) return false;
6027 
6028 		impl::xml_allocator& alloc = impl::get_allocator(_root);
6029 		if (!alloc.reserve()) return false;
6030 
6031 		impl::remove_node(n._root);
6032 		impl::destroy_node(n._root, alloc);
6033 
6034 		return true;
6035 	}
6036 
append_buffer(const void * contents,size_t size,unsigned int options,xml_encoding encoding)6037 	PUGI__FN xml_parse_result xml_node::append_buffer(const void* contents, size_t size, unsigned int options, xml_encoding encoding)
6038 	{
6039 		// append_buffer is only valid for elements/documents
6040 		if (!impl::allow_insert_child(type(), node_element)) return impl::make_parse_result(status_append_invalid_root);
6041 
6042 		// get document node
6043 		impl::xml_document_struct* doc = &impl::get_document(_root);
6044 
6045 		// disable document_buffer_order optimization since in a document with multiple buffers comparing buffer pointers does not make sense
6046 		doc->header |= impl::xml_memory_page_contents_shared_mask;
6047 
6048 		// get extra buffer element (we'll store the document fragment buffer there so that we can deallocate it later)
6049 		impl::xml_memory_page* page = 0;
6050 		impl::xml_extra_buffer* extra = static_cast<impl::xml_extra_buffer*>(doc->allocate_memory(sizeof(impl::xml_extra_buffer), page));
6051 		(void)page;
6052 
6053 		if (!extra) return impl::make_parse_result(status_out_of_memory);
6054 
6055 		// add extra buffer to the list
6056 		extra->buffer = 0;
6057 		extra->next = doc->extra_buffers;
6058 		doc->extra_buffers = extra;
6059 
6060 		// name of the root has to be NULL before parsing - otherwise closing node mismatches will not be detected at the top level
6061 		impl::name_null_sentry sentry(_root);
6062 
6063 		return impl::load_buffer_impl(doc, _root, const_cast<void*>(contents), size, options, encoding, false, false, &extra->buffer);
6064 	}
6065 
find_child_by_attribute(const char_t * name_,const char_t * attr_name,const char_t * attr_value) const6066 	PUGI__FN xml_node xml_node::find_child_by_attribute(const char_t* name_, const char_t* attr_name, const char_t* attr_value) const
6067 	{
6068 		if (!_root) return xml_node();
6069 
6070 		for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
6071 			if (i->name && impl::strequal(name_, i->name))
6072 			{
6073 				for (xml_attribute_struct* a = i->first_attribute; a; a = a->next_attribute)
6074 					if (a->name && impl::strequal(attr_name, a->name) && impl::strequal(attr_value, a->value ? a->value + 0 : PUGIXML_TEXT("")))
6075 						return xml_node(i);
6076 			}
6077 
6078 		return xml_node();
6079 	}
6080 
find_child_by_attribute(const char_t * attr_name,const char_t * attr_value) const6081 	PUGI__FN xml_node xml_node::find_child_by_attribute(const char_t* attr_name, const char_t* attr_value) const
6082 	{
6083 		if (!_root) return xml_node();
6084 
6085 		for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
6086 			for (xml_attribute_struct* a = i->first_attribute; a; a = a->next_attribute)
6087 				if (a->name && impl::strequal(attr_name, a->name) && impl::strequal(attr_value, a->value ? a->value + 0 : PUGIXML_TEXT("")))
6088 					return xml_node(i);
6089 
6090 		return xml_node();
6091 	}
6092 
6093 #ifndef PUGIXML_NO_STL
path(char_t delimiter) const6094 	PUGI__FN string_t xml_node::path(char_t delimiter) const
6095 	{
6096 		if (!_root) return string_t();
6097 
6098 		size_t offset = 0;
6099 
6100 		for (xml_node_struct* i = _root; i; i = i->parent)
6101 		{
6102 			offset += (i != _root);
6103 			offset += i->name ? impl::strlength(i->name) : 0;
6104 		}
6105 
6106 		string_t result;
6107 		result.resize(offset);
6108 
6109 		for (xml_node_struct* j = _root; j; j = j->parent)
6110 		{
6111 			if (j != _root)
6112 				result[--offset] = delimiter;
6113 
6114 			if (j->name && *j->name)
6115 			{
6116 				size_t length = impl::strlength(j->name);
6117 
6118 				offset -= length;
6119 				memcpy(&result[offset], j->name, length * sizeof(char_t));
6120 			}
6121 		}
6122 
6123 		assert(offset == 0);
6124 
6125 		return result;
6126 	}
6127 #endif
6128 
first_element_by_path(const char_t * path_,char_t delimiter) const6129 	PUGI__FN xml_node xml_node::first_element_by_path(const char_t* path_, char_t delimiter) const
6130 	{
6131 		xml_node found = *this; // Current search context.
6132 
6133 		if (!_root || !path_ || !path_[0]) return found;
6134 
6135 		if (path_[0] == delimiter)
6136 		{
6137 			// Absolute path; e.g. '/foo/bar'
6138 			found = found.root();
6139 			++path_;
6140 		}
6141 
6142 		const char_t* path_segment = path_;
6143 
6144 		while (*path_segment == delimiter) ++path_segment;
6145 
6146 		const char_t* path_segment_end = path_segment;
6147 
6148 		while (*path_segment_end && *path_segment_end != delimiter) ++path_segment_end;
6149 
6150 		if (path_segment == path_segment_end) return found;
6151 
6152 		const char_t* next_segment = path_segment_end;
6153 
6154 		while (*next_segment == delimiter) ++next_segment;
6155 
6156 		if (*path_segment == '.' && path_segment + 1 == path_segment_end)
6157 			return found.first_element_by_path(next_segment, delimiter);
6158 		else if (*path_segment == '.' && *(path_segment+1) == '.' && path_segment + 2 == path_segment_end)
6159 			return found.parent().first_element_by_path(next_segment, delimiter);
6160 		else
6161 		{
6162 			for (xml_node_struct* j = found._root->first_child; j; j = j->next_sibling)
6163 			{
6164 				if (j->name && impl::strequalrange(j->name, path_segment, static_cast<size_t>(path_segment_end - path_segment)))
6165 				{
6166 					xml_node subsearch = xml_node(j).first_element_by_path(next_segment, delimiter);
6167 
6168 					if (subsearch) return subsearch;
6169 				}
6170 			}
6171 
6172 			return xml_node();
6173 		}
6174 	}
6175 
traverse(xml_tree_walker & walker)6176 	PUGI__FN bool xml_node::traverse(xml_tree_walker& walker)
6177 	{
6178 		walker._depth = -1;
6179 
6180 		xml_node arg_begin = *this;
6181 		if (!walker.begin(arg_begin)) return false;
6182 
6183 		xml_node cur = first_child();
6184 
6185 		if (cur)
6186 		{
6187 			++walker._depth;
6188 
6189 			do
6190 			{
6191 				xml_node arg_for_each = cur;
6192 				if (!walker.for_each(arg_for_each))
6193 					return false;
6194 
6195 				if (cur.first_child())
6196 				{
6197 					++walker._depth;
6198 					cur = cur.first_child();
6199 				}
6200 				else if (cur.next_sibling())
6201 					cur = cur.next_sibling();
6202 				else
6203 				{
6204 					// Borland C++ workaround
6205 					while (!cur.next_sibling() && cur != *this && !cur.parent().empty())
6206 					{
6207 						--walker._depth;
6208 						cur = cur.parent();
6209 					}
6210 
6211 					if (cur != *this)
6212 						cur = cur.next_sibling();
6213 				}
6214 			}
6215 			while (cur && cur != *this);
6216 		}
6217 
6218 		assert(walker._depth == -1);
6219 
6220 		xml_node arg_end = *this;
6221 		return walker.end(arg_end);
6222 	}
6223 
hash_value() const6224 	PUGI__FN size_t xml_node::hash_value() const
6225 	{
6226 		return static_cast<size_t>(reinterpret_cast<uintptr_t>(_root) / sizeof(xml_node_struct));
6227 	}
6228 
internal_object() const6229 	PUGI__FN xml_node_struct* xml_node::internal_object() const
6230 	{
6231 		return _root;
6232 	}
6233 
print(xml_writer & writer,const char_t * indent,unsigned int flags,xml_encoding encoding,unsigned int depth) const6234 	PUGI__FN void xml_node::print(xml_writer& writer, const char_t* indent, unsigned int flags, xml_encoding encoding, unsigned int depth) const
6235 	{
6236 		if (!_root) return;
6237 
6238 		impl::xml_buffered_writer buffered_writer(writer, encoding);
6239 
6240 		impl::node_output(buffered_writer, _root, indent, flags, depth);
6241 
6242 		buffered_writer.flush();
6243 	}
6244 
6245 #ifndef PUGIXML_NO_STL
print(std::basic_ostream<char,std::char_traits<char>> & stream,const char_t * indent,unsigned int flags,xml_encoding encoding,unsigned int depth) const6246 	PUGI__FN void xml_node::print(std::basic_ostream<char, std::char_traits<char> >& stream, const char_t* indent, unsigned int flags, xml_encoding encoding, unsigned int depth) const
6247 	{
6248 		xml_writer_stream writer(stream);
6249 
6250 		print(writer, indent, flags, encoding, depth);
6251 	}
6252 
print(std::basic_ostream<wchar_t,std::char_traits<wchar_t>> & stream,const char_t * indent,unsigned int flags,unsigned int depth) const6253 	PUGI__FN void xml_node::print(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream, const char_t* indent, unsigned int flags, unsigned int depth) const
6254 	{
6255 		xml_writer_stream writer(stream);
6256 
6257 		print(writer, indent, flags, encoding_wchar, depth);
6258 	}
6259 #endif
6260 
offset_debug() const6261 	PUGI__FN ptrdiff_t xml_node::offset_debug() const
6262 	{
6263 		if (!_root) return -1;
6264 
6265 		impl::xml_document_struct& doc = impl::get_document(_root);
6266 
6267 		// we can determine the offset reliably only if there is exactly once parse buffer
6268 		if (!doc.buffer || doc.extra_buffers) return -1;
6269 
6270 		switch (type())
6271 		{
6272 		case node_document:
6273 			return 0;
6274 
6275 		case node_element:
6276 		case node_declaration:
6277 		case node_pi:
6278 			return _root->name && (_root->header & impl::xml_memory_page_name_allocated_or_shared_mask) == 0 ? _root->name - doc.buffer : -1;
6279 
6280 		case node_pcdata:
6281 		case node_cdata:
6282 		case node_comment:
6283 		case node_doctype:
6284 			return _root->value && (_root->header & impl::xml_memory_page_value_allocated_or_shared_mask) == 0 ? _root->value - doc.buffer : -1;
6285 
6286 		default:
6287 			return -1;
6288 		}
6289 	}
6290 
6291 #ifdef __BORLANDC__
operator &&(const xml_node & lhs,bool rhs)6292 	PUGI__FN bool operator&&(const xml_node& lhs, bool rhs)
6293 	{
6294 		return (bool)lhs && rhs;
6295 	}
6296 
operator ||(const xml_node & lhs,bool rhs)6297 	PUGI__FN bool operator||(const xml_node& lhs, bool rhs)
6298 	{
6299 		return (bool)lhs || rhs;
6300 	}
6301 #endif
6302 
xml_text(xml_node_struct * root)6303 	PUGI__FN xml_text::xml_text(xml_node_struct* root): _root(root)
6304 	{
6305 	}
6306 
_data() const6307 	PUGI__FN xml_node_struct* xml_text::_data() const
6308 	{
6309 		if (!_root || impl::is_text_node(_root)) return _root;
6310 
6311 		// element nodes can have value if parse_embed_pcdata was used
6312 		if (PUGI__NODETYPE(_root) == node_element && _root->value)
6313 			return _root;
6314 
6315 		for (xml_node_struct* node = _root->first_child; node; node = node->next_sibling)
6316 			if (impl::is_text_node(node))
6317 				return node;
6318 
6319 		return 0;
6320 	}
6321 
_data_new()6322 	PUGI__FN xml_node_struct* xml_text::_data_new()
6323 	{
6324 		xml_node_struct* d = _data();
6325 		if (d) return d;
6326 
6327 		return xml_node(_root).append_child(node_pcdata).internal_object();
6328 	}
6329 
xml_text()6330 	PUGI__FN xml_text::xml_text(): _root(0)
6331 	{
6332 	}
6333 
unspecified_bool_xml_text(xml_text ***)6334 	PUGI__FN static void unspecified_bool_xml_text(xml_text***)
6335 	{
6336 	}
6337 
operator xml_text::unspecified_bool_type() const6338 	PUGI__FN xml_text::operator xml_text::unspecified_bool_type() const
6339 	{
6340 		return _data() ? unspecified_bool_xml_text : 0;
6341 	}
6342 
operator !() const6343 	PUGI__FN bool xml_text::operator!() const
6344 	{
6345 		return !_data();
6346 	}
6347 
empty() const6348 	PUGI__FN bool xml_text::empty() const
6349 	{
6350 		return _data() == 0;
6351 	}
6352 
get() const6353 	PUGI__FN const char_t* xml_text::get() const
6354 	{
6355 		xml_node_struct* d = _data();
6356 
6357 		return (d && d->value) ? d->value + 0 : PUGIXML_TEXT("");
6358 	}
6359 
as_string(const char_t * def) const6360 	PUGI__FN const char_t* xml_text::as_string(const char_t* def) const
6361 	{
6362 		xml_node_struct* d = _data();
6363 
6364 		return (d && d->value) ? d->value + 0 : def;
6365 	}
6366 
as_int(int def) const6367 	PUGI__FN int xml_text::as_int(int def) const
6368 	{
6369 		xml_node_struct* d = _data();
6370 
6371 		return (d && d->value) ? impl::get_value_int(d->value) : def;
6372 	}
6373 
as_uint(unsigned int def) const6374 	PUGI__FN unsigned int xml_text::as_uint(unsigned int def) const
6375 	{
6376 		xml_node_struct* d = _data();
6377 
6378 		return (d && d->value) ? impl::get_value_uint(d->value) : def;
6379 	}
6380 
as_double(double def) const6381 	PUGI__FN double xml_text::as_double(double def) const
6382 	{
6383 		xml_node_struct* d = _data();
6384 
6385 		return (d && d->value) ? impl::get_value_double(d->value) : def;
6386 	}
6387 
as_float(float def) const6388 	PUGI__FN float xml_text::as_float(float def) const
6389 	{
6390 		xml_node_struct* d = _data();
6391 
6392 		return (d && d->value) ? impl::get_value_float(d->value) : def;
6393 	}
6394 
as_bool(bool def) const6395 	PUGI__FN bool xml_text::as_bool(bool def) const
6396 	{
6397 		xml_node_struct* d = _data();
6398 
6399 		return (d && d->value) ? impl::get_value_bool(d->value) : def;
6400 	}
6401 
6402 #ifdef PUGIXML_HAS_LONG_LONG
as_llong(long long def) const6403 	PUGI__FN long long xml_text::as_llong(long long def) const
6404 	{
6405 		xml_node_struct* d = _data();
6406 
6407 		return (d && d->value) ? impl::get_value_llong(d->value) : def;
6408 	}
6409 
as_ullong(unsigned long long def) const6410 	PUGI__FN unsigned long long xml_text::as_ullong(unsigned long long def) const
6411 	{
6412 		xml_node_struct* d = _data();
6413 
6414 		return (d && d->value) ? impl::get_value_ullong(d->value) : def;
6415 	}
6416 #endif
6417 
set(const char_t * rhs)6418 	PUGI__FN bool xml_text::set(const char_t* rhs)
6419 	{
6420 		xml_node_struct* dn = _data_new();
6421 
6422 		return dn ? impl::strcpy_insitu(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, impl::strlength(rhs)) : false;
6423 	}
6424 
set(int rhs)6425 	PUGI__FN bool xml_text::set(int rhs)
6426 	{
6427 		xml_node_struct* dn = _data_new();
6428 
6429 		return dn ? impl::set_value_integer<unsigned int>(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0) : false;
6430 	}
6431 
set(unsigned int rhs)6432 	PUGI__FN bool xml_text::set(unsigned int rhs)
6433 	{
6434 		xml_node_struct* dn = _data_new();
6435 
6436 		return dn ? impl::set_value_integer<unsigned int>(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, false) : false;
6437 	}
6438 
set(long rhs)6439 	PUGI__FN bool xml_text::set(long rhs)
6440 	{
6441 		xml_node_struct* dn = _data_new();
6442 
6443 		return dn ? impl::set_value_integer<unsigned long>(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0) : false;
6444 	}
6445 
set(unsigned long rhs)6446 	PUGI__FN bool xml_text::set(unsigned long rhs)
6447 	{
6448 		xml_node_struct* dn = _data_new();
6449 
6450 		return dn ? impl::set_value_integer<unsigned long>(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, false) : false;
6451 	}
6452 
set(float rhs)6453 	PUGI__FN bool xml_text::set(float rhs)
6454 	{
6455 		xml_node_struct* dn = _data_new();
6456 
6457 		return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false;
6458 	}
6459 
set(double rhs)6460 	PUGI__FN bool xml_text::set(double rhs)
6461 	{
6462 		xml_node_struct* dn = _data_new();
6463 
6464 		return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false;
6465 	}
6466 
set(bool rhs)6467 	PUGI__FN bool xml_text::set(bool rhs)
6468 	{
6469 		xml_node_struct* dn = _data_new();
6470 
6471 		return dn ? impl::set_value_bool(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false;
6472 	}
6473 
6474 #ifdef PUGIXML_HAS_LONG_LONG
set(long long rhs)6475 	PUGI__FN bool xml_text::set(long long rhs)
6476 	{
6477 		xml_node_struct* dn = _data_new();
6478 
6479 		return dn ? impl::set_value_integer<unsigned long long>(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0) : false;
6480 	}
6481 
set(unsigned long long rhs)6482 	PUGI__FN bool xml_text::set(unsigned long long rhs)
6483 	{
6484 		xml_node_struct* dn = _data_new();
6485 
6486 		return dn ? impl::set_value_integer<unsigned long long>(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, false) : false;
6487 	}
6488 #endif
6489 
operator =(const char_t * rhs)6490 	PUGI__FN xml_text& xml_text::operator=(const char_t* rhs)
6491 	{
6492 		set(rhs);
6493 		return *this;
6494 	}
6495 
operator =(int rhs)6496 	PUGI__FN xml_text& xml_text::operator=(int rhs)
6497 	{
6498 		set(rhs);
6499 		return *this;
6500 	}
6501 
operator =(unsigned int rhs)6502 	PUGI__FN xml_text& xml_text::operator=(unsigned int rhs)
6503 	{
6504 		set(rhs);
6505 		return *this;
6506 	}
6507 
operator =(long rhs)6508 	PUGI__FN xml_text& xml_text::operator=(long rhs)
6509 	{
6510 		set(rhs);
6511 		return *this;
6512 	}
6513 
operator =(unsigned long rhs)6514 	PUGI__FN xml_text& xml_text::operator=(unsigned long rhs)
6515 	{
6516 		set(rhs);
6517 		return *this;
6518 	}
6519 
operator =(double rhs)6520 	PUGI__FN xml_text& xml_text::operator=(double rhs)
6521 	{
6522 		set(rhs);
6523 		return *this;
6524 	}
6525 
operator =(float rhs)6526 	PUGI__FN xml_text& xml_text::operator=(float rhs)
6527 	{
6528 		set(rhs);
6529 		return *this;
6530 	}
6531 
operator =(bool rhs)6532 	PUGI__FN xml_text& xml_text::operator=(bool rhs)
6533 	{
6534 		set(rhs);
6535 		return *this;
6536 	}
6537 
6538 #ifdef PUGIXML_HAS_LONG_LONG
operator =(long long rhs)6539 	PUGI__FN xml_text& xml_text::operator=(long long rhs)
6540 	{
6541 		set(rhs);
6542 		return *this;
6543 	}
6544 
operator =(unsigned long long rhs)6545 	PUGI__FN xml_text& xml_text::operator=(unsigned long long rhs)
6546 	{
6547 		set(rhs);
6548 		return *this;
6549 	}
6550 #endif
6551 
data() const6552 	PUGI__FN xml_node xml_text::data() const
6553 	{
6554 		return xml_node(_data());
6555 	}
6556 
6557 #ifdef __BORLANDC__
operator &&(const xml_text & lhs,bool rhs)6558 	PUGI__FN bool operator&&(const xml_text& lhs, bool rhs)
6559 	{
6560 		return (bool)lhs && rhs;
6561 	}
6562 
operator ||(const xml_text & lhs,bool rhs)6563 	PUGI__FN bool operator||(const xml_text& lhs, bool rhs)
6564 	{
6565 		return (bool)lhs || rhs;
6566 	}
6567 #endif
6568 
xml_node_iterator()6569 	PUGI__FN xml_node_iterator::xml_node_iterator()
6570 	{
6571 	}
6572 
xml_node_iterator(const xml_node & node)6573 	PUGI__FN xml_node_iterator::xml_node_iterator(const xml_node& node): _wrap(node), _parent(node.parent())
6574 	{
6575 	}
6576 
xml_node_iterator(xml_node_struct * ref,xml_node_struct * parent)6577 	PUGI__FN xml_node_iterator::xml_node_iterator(xml_node_struct* ref, xml_node_struct* parent): _wrap(ref), _parent(parent)
6578 	{
6579 	}
6580 
operator ==(const xml_node_iterator & rhs) const6581 	PUGI__FN bool xml_node_iterator::operator==(const xml_node_iterator& rhs) const
6582 	{
6583 		return _wrap._root == rhs._wrap._root && _parent._root == rhs._parent._root;
6584 	}
6585 
operator !=(const xml_node_iterator & rhs) const6586 	PUGI__FN bool xml_node_iterator::operator!=(const xml_node_iterator& rhs) const
6587 	{
6588 		return _wrap._root != rhs._wrap._root || _parent._root != rhs._parent._root;
6589 	}
6590 
operator *() const6591 	PUGI__FN xml_node& xml_node_iterator::operator*() const
6592 	{
6593 		assert(_wrap._root);
6594 		return _wrap;
6595 	}
6596 
operator ->() const6597 	PUGI__FN xml_node* xml_node_iterator::operator->() const
6598 	{
6599 		assert(_wrap._root);
6600 		return const_cast<xml_node*>(&_wrap); // BCC5 workaround
6601 	}
6602 
operator ++()6603 	PUGI__FN const xml_node_iterator& xml_node_iterator::operator++()
6604 	{
6605 		assert(_wrap._root);
6606 		_wrap._root = _wrap._root->next_sibling;
6607 		return *this;
6608 	}
6609 
operator ++(int)6610 	PUGI__FN xml_node_iterator xml_node_iterator::operator++(int)
6611 	{
6612 		xml_node_iterator temp = *this;
6613 		++*this;
6614 		return temp;
6615 	}
6616 
operator --()6617 	PUGI__FN const xml_node_iterator& xml_node_iterator::operator--()
6618 	{
6619 		_wrap = _wrap._root ? _wrap.previous_sibling() : _parent.last_child();
6620 		return *this;
6621 	}
6622 
operator --(int)6623 	PUGI__FN xml_node_iterator xml_node_iterator::operator--(int)
6624 	{
6625 		xml_node_iterator temp = *this;
6626 		--*this;
6627 		return temp;
6628 	}
6629 
xml_attribute_iterator()6630 	PUGI__FN xml_attribute_iterator::xml_attribute_iterator()
6631 	{
6632 	}
6633 
xml_attribute_iterator(const xml_attribute & attr,const xml_node & parent)6634 	PUGI__FN xml_attribute_iterator::xml_attribute_iterator(const xml_attribute& attr, const xml_node& parent): _wrap(attr), _parent(parent)
6635 	{
6636 	}
6637 
xml_attribute_iterator(xml_attribute_struct * ref,xml_node_struct * parent)6638 	PUGI__FN xml_attribute_iterator::xml_attribute_iterator(xml_attribute_struct* ref, xml_node_struct* parent): _wrap(ref), _parent(parent)
6639 	{
6640 	}
6641 
operator ==(const xml_attribute_iterator & rhs) const6642 	PUGI__FN bool xml_attribute_iterator::operator==(const xml_attribute_iterator& rhs) const
6643 	{
6644 		return _wrap._attr == rhs._wrap._attr && _parent._root == rhs._parent._root;
6645 	}
6646 
operator !=(const xml_attribute_iterator & rhs) const6647 	PUGI__FN bool xml_attribute_iterator::operator!=(const xml_attribute_iterator& rhs) const
6648 	{
6649 		return _wrap._attr != rhs._wrap._attr || _parent._root != rhs._parent._root;
6650 	}
6651 
operator *() const6652 	PUGI__FN xml_attribute& xml_attribute_iterator::operator*() const
6653 	{
6654 		assert(_wrap._attr);
6655 		return _wrap;
6656 	}
6657 
operator ->() const6658 	PUGI__FN xml_attribute* xml_attribute_iterator::operator->() const
6659 	{
6660 		assert(_wrap._attr);
6661 		return const_cast<xml_attribute*>(&_wrap); // BCC5 workaround
6662 	}
6663 
operator ++()6664 	PUGI__FN const xml_attribute_iterator& xml_attribute_iterator::operator++()
6665 	{
6666 		assert(_wrap._attr);
6667 		_wrap._attr = _wrap._attr->next_attribute;
6668 		return *this;
6669 	}
6670 
operator ++(int)6671 	PUGI__FN xml_attribute_iterator xml_attribute_iterator::operator++(int)
6672 	{
6673 		xml_attribute_iterator temp = *this;
6674 		++*this;
6675 		return temp;
6676 	}
6677 
operator --()6678 	PUGI__FN const xml_attribute_iterator& xml_attribute_iterator::operator--()
6679 	{
6680 		_wrap = _wrap._attr ? _wrap.previous_attribute() : _parent.last_attribute();
6681 		return *this;
6682 	}
6683 
operator --(int)6684 	PUGI__FN xml_attribute_iterator xml_attribute_iterator::operator--(int)
6685 	{
6686 		xml_attribute_iterator temp = *this;
6687 		--*this;
6688 		return temp;
6689 	}
6690 
xml_named_node_iterator()6691 	PUGI__FN xml_named_node_iterator::xml_named_node_iterator(): _name(0)
6692 	{
6693 	}
6694 
xml_named_node_iterator(const xml_node & node,const char_t * name)6695 	PUGI__FN xml_named_node_iterator::xml_named_node_iterator(const xml_node& node, const char_t* name): _wrap(node), _parent(node.parent()), _name(name)
6696 	{
6697 	}
6698 
xml_named_node_iterator(xml_node_struct * ref,xml_node_struct * parent,const char_t * name)6699 	PUGI__FN xml_named_node_iterator::xml_named_node_iterator(xml_node_struct* ref, xml_node_struct* parent, const char_t* name): _wrap(ref), _parent(parent), _name(name)
6700 	{
6701 	}
6702 
operator ==(const xml_named_node_iterator & rhs) const6703 	PUGI__FN bool xml_named_node_iterator::operator==(const xml_named_node_iterator& rhs) const
6704 	{
6705 		return _wrap._root == rhs._wrap._root && _parent._root == rhs._parent._root;
6706 	}
6707 
operator !=(const xml_named_node_iterator & rhs) const6708 	PUGI__FN bool xml_named_node_iterator::operator!=(const xml_named_node_iterator& rhs) const
6709 	{
6710 		return _wrap._root != rhs._wrap._root || _parent._root != rhs._parent._root;
6711 	}
6712 
operator *() const6713 	PUGI__FN xml_node& xml_named_node_iterator::operator*() const
6714 	{
6715 		assert(_wrap._root);
6716 		return _wrap;
6717 	}
6718 
operator ->() const6719 	PUGI__FN xml_node* xml_named_node_iterator::operator->() const
6720 	{
6721 		assert(_wrap._root);
6722 		return const_cast<xml_node*>(&_wrap); // BCC5 workaround
6723 	}
6724 
operator ++()6725 	PUGI__FN const xml_named_node_iterator& xml_named_node_iterator::operator++()
6726 	{
6727 		assert(_wrap._root);
6728 		_wrap = _wrap.next_sibling(_name);
6729 		return *this;
6730 	}
6731 
operator ++(int)6732 	PUGI__FN xml_named_node_iterator xml_named_node_iterator::operator++(int)
6733 	{
6734 		xml_named_node_iterator temp = *this;
6735 		++*this;
6736 		return temp;
6737 	}
6738 
operator --()6739 	PUGI__FN const xml_named_node_iterator& xml_named_node_iterator::operator--()
6740 	{
6741 		if (_wrap._root)
6742 			_wrap = _wrap.previous_sibling(_name);
6743 		else
6744 		{
6745 			_wrap = _parent.last_child();
6746 
6747 			if (!impl::strequal(_wrap.name(), _name))
6748 				_wrap = _wrap.previous_sibling(_name);
6749 		}
6750 
6751 		return *this;
6752 	}
6753 
operator --(int)6754 	PUGI__FN xml_named_node_iterator xml_named_node_iterator::operator--(int)
6755 	{
6756 		xml_named_node_iterator temp = *this;
6757 		--*this;
6758 		return temp;
6759 	}
6760 
xml_parse_result()6761 	PUGI__FN xml_parse_result::xml_parse_result(): status(status_internal_error), offset(0), encoding(encoding_auto)
6762 	{
6763 	}
6764 
operator bool() const6765 	PUGI__FN xml_parse_result::operator bool() const
6766 	{
6767 		return status == status_ok;
6768 	}
6769 
description() const6770 	PUGI__FN const char* xml_parse_result::description() const
6771 	{
6772 		switch (status)
6773 		{
6774 		case status_ok: return "No error";
6775 
6776 		case status_file_not_found: return "File was not found";
6777 		case status_io_error: return "Error reading from file/stream";
6778 		case status_out_of_memory: return "Could not allocate memory";
6779 		case status_internal_error: return "Internal error occurred";
6780 
6781 		case status_unrecognized_tag: return "Could not determine tag type";
6782 
6783 		case status_bad_pi: return "Error parsing document declaration/processing instruction";
6784 		case status_bad_comment: return "Error parsing comment";
6785 		case status_bad_cdata: return "Error parsing CDATA section";
6786 		case status_bad_doctype: return "Error parsing document type declaration";
6787 		case status_bad_pcdata: return "Error parsing PCDATA section";
6788 		case status_bad_start_element: return "Error parsing start element tag";
6789 		case status_bad_attribute: return "Error parsing element attribute";
6790 		case status_bad_end_element: return "Error parsing end element tag";
6791 		case status_end_element_mismatch: return "Start-end tags mismatch";
6792 
6793 		case status_append_invalid_root: return "Unable to append nodes: root is not an element or document";
6794 
6795 		case status_no_document_element: return "No document element found";
6796 
6797 		default: return "Unknown error";
6798 		}
6799 	}
6800 
xml_document()6801 	PUGI__FN xml_document::xml_document(): _buffer(0)
6802 	{
6803 		_create();
6804 	}
6805 
~xml_document()6806 	PUGI__FN xml_document::~xml_document()
6807 	{
6808 		_destroy();
6809 	}
6810 
reset()6811 	PUGI__FN void xml_document::reset()
6812 	{
6813 		_destroy();
6814 		_create();
6815 	}
6816 
reset(const xml_document & proto)6817 	PUGI__FN void xml_document::reset(const xml_document& proto)
6818 	{
6819 		reset();
6820 
6821 		for (xml_node cur = proto.first_child(); cur; cur = cur.next_sibling())
6822 			append_copy(cur);
6823 	}
6824 
_create()6825 	PUGI__FN void xml_document::_create()
6826 	{
6827 		assert(!_root);
6828 
6829 	#ifdef PUGIXML_COMPACT
6830 		const size_t page_offset = sizeof(uint32_t);
6831 	#else
6832 		const size_t page_offset = 0;
6833 	#endif
6834 
6835 		// initialize sentinel page
6836 		PUGI__STATIC_ASSERT(sizeof(impl::xml_memory_page) + sizeof(impl::xml_document_struct) + page_offset <= sizeof(_memory));
6837 
6838 		// prepare page structure
6839 		impl::xml_memory_page* page = impl::xml_memory_page::construct(_memory);
6840 		assert(page);
6841 
6842 		page->busy_size = impl::xml_memory_page_size;
6843 
6844 		// setup first page marker
6845 	#ifdef PUGIXML_COMPACT
6846 		// round-trip through void* to avoid 'cast increases required alignment of target type' warning
6847 		page->compact_page_marker = reinterpret_cast<uint32_t*>(static_cast<void*>(reinterpret_cast<char*>(page) + sizeof(impl::xml_memory_page)));
6848 		*page->compact_page_marker = sizeof(impl::xml_memory_page);
6849 	#endif
6850 
6851 		// allocate new root
6852 		_root = new (reinterpret_cast<char*>(page) + sizeof(impl::xml_memory_page) + page_offset) impl::xml_document_struct(page);
6853 		_root->prev_sibling_c = _root;
6854 
6855 		// setup sentinel page
6856 		page->allocator = static_cast<impl::xml_document_struct*>(_root);
6857 
6858 		// setup hash table pointer in allocator
6859 	#ifdef PUGIXML_COMPACT
6860 		page->allocator->_hash = &static_cast<impl::xml_document_struct*>(_root)->hash;
6861 	#endif
6862 
6863 		// verify the document allocation
6864 		assert(reinterpret_cast<char*>(_root) + sizeof(impl::xml_document_struct) <= _memory + sizeof(_memory));
6865 	}
6866 
_destroy()6867 	PUGI__FN void xml_document::_destroy()
6868 	{
6869 		assert(_root);
6870 
6871 		// destroy static storage
6872 		if (_buffer)
6873 		{
6874 			impl::xml_memory::deallocate(_buffer);
6875 			_buffer = 0;
6876 		}
6877 
6878 		// destroy extra buffers (note: no need to destroy linked list nodes, they're allocated using document allocator)
6879 		for (impl::xml_extra_buffer* extra = static_cast<impl::xml_document_struct*>(_root)->extra_buffers; extra; extra = extra->next)
6880 		{
6881 			if (extra->buffer) impl::xml_memory::deallocate(extra->buffer);
6882 		}
6883 
6884 		// destroy dynamic storage, leave sentinel page (it's in static memory)
6885 		impl::xml_memory_page* root_page = PUGI__GETPAGE(_root);
6886 		assert(root_page && !root_page->prev);
6887 		assert(reinterpret_cast<char*>(root_page) >= _memory && reinterpret_cast<char*>(root_page) < _memory + sizeof(_memory));
6888 
6889 		for (impl::xml_memory_page* page = root_page->next; page; )
6890 		{
6891 			impl::xml_memory_page* next = page->next;
6892 
6893 			impl::xml_allocator::deallocate_page(page);
6894 
6895 			page = next;
6896 		}
6897 
6898 	#ifdef PUGIXML_COMPACT
6899 		// destroy hash table
6900 		static_cast<impl::xml_document_struct*>(_root)->hash.clear();
6901 	#endif
6902 
6903 		_root = 0;
6904 	}
6905 
6906 #ifndef PUGIXML_NO_STL
load(std::basic_istream<char,std::char_traits<char>> & stream,unsigned int options,xml_encoding encoding)6907 	PUGI__FN xml_parse_result xml_document::load(std::basic_istream<char, std::char_traits<char> >& stream, unsigned int options, xml_encoding encoding)
6908 	{
6909 		reset();
6910 
6911 		return impl::load_stream_impl(static_cast<impl::xml_document_struct*>(_root), stream, options, encoding, &_buffer);
6912 	}
6913 
load(std::basic_istream<wchar_t,std::char_traits<wchar_t>> & stream,unsigned int options)6914 	PUGI__FN xml_parse_result xml_document::load(std::basic_istream<wchar_t, std::char_traits<wchar_t> >& stream, unsigned int options)
6915 	{
6916 		reset();
6917 
6918 		return impl::load_stream_impl(static_cast<impl::xml_document_struct*>(_root), stream, options, encoding_wchar, &_buffer);
6919 	}
6920 #endif
6921 
load_string(const char_t * contents,unsigned int options)6922 	PUGI__FN xml_parse_result xml_document::load_string(const char_t* contents, unsigned int options)
6923 	{
6924 		// Force native encoding (skip autodetection)
6925 	#ifdef PUGIXML_WCHAR_MODE
6926 		xml_encoding encoding = encoding_wchar;
6927 	#else
6928 		xml_encoding encoding = encoding_utf8;
6929 	#endif
6930 
6931 		return load_buffer(contents, impl::strlength(contents) * sizeof(char_t), options, encoding);
6932 	}
6933 
load(const char_t * contents,unsigned int options)6934 	PUGI__FN xml_parse_result xml_document::load(const char_t* contents, unsigned int options)
6935 	{
6936 		return load_string(contents, options);
6937 	}
6938 
load_file(const char * path_,unsigned int options,xml_encoding encoding)6939 	PUGI__FN xml_parse_result xml_document::load_file(const char* path_, unsigned int options, xml_encoding encoding)
6940 	{
6941 		reset();
6942 
6943 		using impl::auto_deleter; // MSVC7 workaround
6944 		auto_deleter<FILE> file(fopen(path_, "rb"), impl::close_file);
6945 
6946 		return impl::load_file_impl(static_cast<impl::xml_document_struct*>(_root), file.data, options, encoding, &_buffer);
6947 	}
6948 
load_file(const wchar_t * path_,unsigned int options,xml_encoding encoding)6949 	PUGI__FN xml_parse_result xml_document::load_file(const wchar_t* path_, unsigned int options, xml_encoding encoding)
6950 	{
6951 		reset();
6952 
6953 		using impl::auto_deleter; // MSVC7 workaround
6954 		auto_deleter<FILE> file(impl::open_file_wide(path_, L"rb"), impl::close_file);
6955 
6956 		return impl::load_file_impl(static_cast<impl::xml_document_struct*>(_root), file.data, options, encoding, &_buffer);
6957 	}
6958 
load_buffer(const void * contents,size_t size,unsigned int options,xml_encoding encoding)6959 	PUGI__FN xml_parse_result xml_document::load_buffer(const void* contents, size_t size, unsigned int options, xml_encoding encoding)
6960 	{
6961 		reset();
6962 
6963 		return impl::load_buffer_impl(static_cast<impl::xml_document_struct*>(_root), _root, const_cast<void*>(contents), size, options, encoding, false, false, &_buffer);
6964 	}
6965 
load_buffer_inplace(void * contents,size_t size,unsigned int options,xml_encoding encoding)6966 	PUGI__FN xml_parse_result xml_document::load_buffer_inplace(void* contents, size_t size, unsigned int options, xml_encoding encoding)
6967 	{
6968 		reset();
6969 
6970 		return impl::load_buffer_impl(static_cast<impl::xml_document_struct*>(_root), _root, contents, size, options, encoding, true, false, &_buffer);
6971 	}
6972 
load_buffer_inplace_own(void * contents,size_t size,unsigned int options,xml_encoding encoding)6973 	PUGI__FN xml_parse_result xml_document::load_buffer_inplace_own(void* contents, size_t size, unsigned int options, xml_encoding encoding)
6974 	{
6975 		reset();
6976 
6977 		return impl::load_buffer_impl(static_cast<impl::xml_document_struct*>(_root), _root, contents, size, options, encoding, true, true, &_buffer);
6978 	}
6979 
save(xml_writer & writer,const char_t * indent,unsigned int flags,xml_encoding encoding) const6980 	PUGI__FN void xml_document::save(xml_writer& writer, const char_t* indent, unsigned int flags, xml_encoding encoding) const
6981 	{
6982 		impl::xml_buffered_writer buffered_writer(writer, encoding);
6983 
6984 		if ((flags & format_write_bom) && encoding != encoding_latin1)
6985 		{
6986 			// BOM always represents the codepoint U+FEFF, so just write it in native encoding
6987 		#ifdef PUGIXML_WCHAR_MODE
6988 			unsigned int bom = 0xfeff;
6989 			buffered_writer.write(static_cast<wchar_t>(bom));
6990 		#else
6991 			buffered_writer.write('\xef', '\xbb', '\xbf');
6992 		#endif
6993 		}
6994 
6995 		if (!(flags & format_no_declaration) && !impl::has_declaration(_root))
6996 		{
6997 			buffered_writer.write_string(PUGIXML_TEXT("<?xml version=\"1.0\""));
6998 			if (encoding == encoding_latin1) buffered_writer.write_string(PUGIXML_TEXT(" encoding=\"ISO-8859-1\""));
6999 			buffered_writer.write('?', '>');
7000 			if (!(flags & format_raw)) buffered_writer.write('\n');
7001 		}
7002 
7003 		impl::node_output(buffered_writer, _root, indent, flags, 0);
7004 
7005 		buffered_writer.flush();
7006 	}
7007 
7008 #ifndef PUGIXML_NO_STL
save(std::basic_ostream<char,std::char_traits<char>> & stream,const char_t * indent,unsigned int flags,xml_encoding encoding) const7009 	PUGI__FN void xml_document::save(std::basic_ostream<char, std::char_traits<char> >& stream, const char_t* indent, unsigned int flags, xml_encoding encoding) const
7010 	{
7011 		xml_writer_stream writer(stream);
7012 
7013 		save(writer, indent, flags, encoding);
7014 	}
7015 
save(std::basic_ostream<wchar_t,std::char_traits<wchar_t>> & stream,const char_t * indent,unsigned int flags) const7016 	PUGI__FN void xml_document::save(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream, const char_t* indent, unsigned int flags) const
7017 	{
7018 		xml_writer_stream writer(stream);
7019 
7020 		save(writer, indent, flags, encoding_wchar);
7021 	}
7022 #endif
7023 
save_file(const char * path_,const char_t * indent,unsigned int flags,xml_encoding encoding) const7024 	PUGI__FN bool xml_document::save_file(const char* path_, const char_t* indent, unsigned int flags, xml_encoding encoding) const
7025 	{
7026 		using impl::auto_deleter; // MSVC7 workaround
7027 		auto_deleter<FILE> file(fopen(path_, (flags & format_save_file_text) ? "w" : "wb"), impl::close_file);
7028 
7029 		return impl::save_file_impl(*this, file.data, indent, flags, encoding);
7030 	}
7031 
save_file(const wchar_t * path_,const char_t * indent,unsigned int flags,xml_encoding encoding) const7032 	PUGI__FN bool xml_document::save_file(const wchar_t* path_, const char_t* indent, unsigned int flags, xml_encoding encoding) const
7033 	{
7034 		using impl::auto_deleter; // MSVC7 workaround
7035 		auto_deleter<FILE> file(impl::open_file_wide(path_, (flags & format_save_file_text) ? L"w" : L"wb"), impl::close_file);
7036 
7037 		return impl::save_file_impl(*this, file.data, indent, flags, encoding);
7038 	}
7039 
document_element() const7040 	PUGI__FN xml_node xml_document::document_element() const
7041 	{
7042 		assert(_root);
7043 
7044 		for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
7045 			if (PUGI__NODETYPE(i) == node_element)
7046 				return xml_node(i);
7047 
7048 		return xml_node();
7049 	}
7050 
7051 #ifndef PUGIXML_NO_STL
as_utf8(const wchar_t * str)7052 	PUGI__FN std::string PUGIXML_FUNCTION as_utf8(const wchar_t* str)
7053 	{
7054 		assert(str);
7055 
7056 		return impl::as_utf8_impl(str, impl::strlength_wide(str));
7057 	}
7058 
as_utf8(const std::basic_string<wchar_t> & str)7059 	PUGI__FN std::string PUGIXML_FUNCTION as_utf8(const std::basic_string<wchar_t>& str)
7060 	{
7061 		return impl::as_utf8_impl(str.c_str(), str.size());
7062 	}
7063 
as_wide(const char * str)7064 	PUGI__FN std::basic_string<wchar_t> PUGIXML_FUNCTION as_wide(const char* str)
7065 	{
7066 		assert(str);
7067 
7068 		return impl::as_wide_impl(str, strlen(str));
7069 	}
7070 
as_wide(const std::string & str)7071 	PUGI__FN std::basic_string<wchar_t> PUGIXML_FUNCTION as_wide(const std::string& str)
7072 	{
7073 		return impl::as_wide_impl(str.c_str(), str.size());
7074 	}
7075 #endif
7076 
set_memory_management_functions(allocation_function allocate,deallocation_function deallocate)7077 	PUGI__FN void PUGIXML_FUNCTION set_memory_management_functions(allocation_function allocate, deallocation_function deallocate)
7078 	{
7079 		impl::xml_memory::allocate = allocate;
7080 		impl::xml_memory::deallocate = deallocate;
7081 	}
7082 
get_memory_allocation_function()7083 	PUGI__FN allocation_function PUGIXML_FUNCTION get_memory_allocation_function()
7084 	{
7085 		return impl::xml_memory::allocate;
7086 	}
7087 
get_memory_deallocation_function()7088 	PUGI__FN deallocation_function PUGIXML_FUNCTION get_memory_deallocation_function()
7089 	{
7090 		return impl::xml_memory::deallocate;
7091 	}
7092 }
7093 
7094 #if !defined(PUGIXML_NO_STL) && (defined(_MSC_VER) || defined(__ICC))
7095 namespace std
7096 {
7097 	// Workarounds for (non-standard) iterator category detection for older versions (MSVC7/IC8 and earlier)
_Iter_cat(const pugi::xml_node_iterator &)7098 	PUGI__FN std::bidirectional_iterator_tag _Iter_cat(const pugi::xml_node_iterator&)
7099 	{
7100 		return std::bidirectional_iterator_tag();
7101 	}
7102 
_Iter_cat(const pugi::xml_attribute_iterator &)7103 	PUGI__FN std::bidirectional_iterator_tag _Iter_cat(const pugi::xml_attribute_iterator&)
7104 	{
7105 		return std::bidirectional_iterator_tag();
7106 	}
7107 
_Iter_cat(const pugi::xml_named_node_iterator &)7108 	PUGI__FN std::bidirectional_iterator_tag _Iter_cat(const pugi::xml_named_node_iterator&)
7109 	{
7110 		return std::bidirectional_iterator_tag();
7111 	}
7112 }
7113 #endif
7114 
7115 #if !defined(PUGIXML_NO_STL) && defined(__SUNPRO_CC)
7116 namespace std
7117 {
7118 	// Workarounds for (non-standard) iterator category detection
__iterator_category(const pugi::xml_node_iterator &)7119 	PUGI__FN std::bidirectional_iterator_tag __iterator_category(const pugi::xml_node_iterator&)
7120 	{
7121 		return std::bidirectional_iterator_tag();
7122 	}
7123 
__iterator_category(const pugi::xml_attribute_iterator &)7124 	PUGI__FN std::bidirectional_iterator_tag __iterator_category(const pugi::xml_attribute_iterator&)
7125 	{
7126 		return std::bidirectional_iterator_tag();
7127 	}
7128 
__iterator_category(const pugi::xml_named_node_iterator &)7129 	PUGI__FN std::bidirectional_iterator_tag __iterator_category(const pugi::xml_named_node_iterator&)
7130 	{
7131 		return std::bidirectional_iterator_tag();
7132 	}
7133 }
7134 #endif
7135 
7136 #ifndef PUGIXML_NO_XPATH
7137 // STL replacements
7138 PUGI__NS_BEGIN
7139 	struct equal_to
7140 	{
operator ()equal_to7141 		template <typename T> bool operator()(const T& lhs, const T& rhs) const
7142 		{
7143 			return lhs == rhs;
7144 		}
7145 	};
7146 
7147 	struct not_equal_to
7148 	{
operator ()not_equal_to7149 		template <typename T> bool operator()(const T& lhs, const T& rhs) const
7150 		{
7151 			return lhs != rhs;
7152 		}
7153 	};
7154 
7155 	struct less
7156 	{
operator ()less7157 		template <typename T> bool operator()(const T& lhs, const T& rhs) const
7158 		{
7159 			return lhs < rhs;
7160 		}
7161 	};
7162 
7163 	struct less_equal
7164 	{
operator ()less_equal7165 		template <typename T> bool operator()(const T& lhs, const T& rhs) const
7166 		{
7167 			return lhs <= rhs;
7168 		}
7169 	};
7170 
swap(T & lhs,T & rhs)7171 	template <typename T> void swap(T& lhs, T& rhs)
7172 	{
7173 		T temp = lhs;
7174 		lhs = rhs;
7175 		rhs = temp;
7176 	}
7177 
7178 	template <typename I, typename Pred> I min_element(I begin, I end, const Pred& pred)
7179 	{
7180 		I result = begin;
7181 
7182 		for (I it = begin + 1; it != end; ++it)
7183 			if (pred(*it, *result))
7184 				result = it;
7185 
7186 		return result;
7187 	}
7188 
reverse(I begin,I end)7189 	template <typename I> void reverse(I begin, I end)
7190 	{
7191 		while (end - begin > 1) swap(*begin++, *--end);
7192 	}
7193 
unique(I begin,I end)7194 	template <typename I> I unique(I begin, I end)
7195 	{
7196 		// fast skip head
7197 		while (end - begin > 1 && *begin != *(begin + 1)) begin++;
7198 
7199 		if (begin == end) return begin;
7200 
7201 		// last written element
7202 		I write = begin++;
7203 
7204 		// merge unique elements
7205 		while (begin != end)
7206 		{
7207 			if (*begin != *write)
7208 				*++write = *begin++;
7209 			else
7210 				begin++;
7211 		}
7212 
7213 		// past-the-end (write points to live element)
7214 		return write + 1;
7215 	}
7216 
copy_backwards(I begin,I end,I target)7217 	template <typename I> void copy_backwards(I begin, I end, I target)
7218 	{
7219 		while (begin != end) *--target = *--end;
7220 	}
7221 
insertion_sort(I begin,I end,const Pred & pred,T *)7222 	template <typename I, typename Pred, typename T> void insertion_sort(I begin, I end, const Pred& pred, T*)
7223 	{
7224 		assert(begin != end);
7225 
7226 		for (I it = begin + 1; it != end; ++it)
7227 		{
7228 			T val = *it;
7229 
7230 			if (pred(val, *begin))
7231 			{
7232 				// move to front
7233 				copy_backwards(begin, it, it + 1);
7234 				*begin = val;
7235 			}
7236 			else
7237 			{
7238 				I hole = it;
7239 
7240 				// move hole backwards
7241 				while (pred(val, *(hole - 1)))
7242 				{
7243 					*hole = *(hole - 1);
7244 					hole--;
7245 				}
7246 
7247 				// fill hole with element
7248 				*hole = val;
7249 			}
7250 		}
7251 	}
7252 
7253 	// std variant for elements with ==
partition(I begin,I middle,I end,const Pred & pred,I * out_eqbeg,I * out_eqend)7254 	template <typename I, typename Pred> void partition(I begin, I middle, I end, const Pred& pred, I* out_eqbeg, I* out_eqend)
7255 	{
7256 		I eqbeg = middle, eqend = middle + 1;
7257 
7258 		// expand equal range
7259 		while (eqbeg != begin && *(eqbeg - 1) == *eqbeg) --eqbeg;
7260 		while (eqend != end && *eqend == *eqbeg) ++eqend;
7261 
7262 		// process outer elements
7263 		I ltend = eqbeg, gtbeg = eqend;
7264 
7265 		for (;;)
7266 		{
7267 			// find the element from the right side that belongs to the left one
7268 			for (; gtbeg != end; ++gtbeg)
7269 				if (!pred(*eqbeg, *gtbeg))
7270 				{
7271 					if (*gtbeg == *eqbeg) swap(*gtbeg, *eqend++);
7272 					else break;
7273 				}
7274 
7275 			// find the element from the left side that belongs to the right one
7276 			for (; ltend != begin; --ltend)
7277 				if (!pred(*(ltend - 1), *eqbeg))
7278 				{
7279 					if (*eqbeg == *(ltend - 1)) swap(*(ltend - 1), *--eqbeg);
7280 					else break;
7281 				}
7282 
7283 			// scanned all elements
7284 			if (gtbeg == end && ltend == begin)
7285 			{
7286 				*out_eqbeg = eqbeg;
7287 				*out_eqend = eqend;
7288 				return;
7289 			}
7290 
7291 			// make room for elements by moving equal area
7292 			if (gtbeg == end)
7293 			{
7294 				if (--ltend != --eqbeg) swap(*ltend, *eqbeg);
7295 				swap(*eqbeg, *--eqend);
7296 			}
7297 			else if (ltend == begin)
7298 			{
7299 				if (eqend != gtbeg) swap(*eqbeg, *eqend);
7300 				++eqend;
7301 				swap(*gtbeg++, *eqbeg++);
7302 			}
7303 			else swap(*gtbeg++, *--ltend);
7304 		}
7305 	}
7306 
median3(I first,I middle,I last,const Pred & pred)7307 	template <typename I, typename Pred> void median3(I first, I middle, I last, const Pred& pred)
7308 	{
7309 		if (pred(*middle, *first)) swap(*middle, *first);
7310 		if (pred(*last, *middle)) swap(*last, *middle);
7311 		if (pred(*middle, *first)) swap(*middle, *first);
7312 	}
7313 
median(I first,I middle,I last,const Pred & pred)7314 	template <typename I, typename Pred> void median(I first, I middle, I last, const Pred& pred)
7315 	{
7316 		if (last - first <= 40)
7317 		{
7318 			// median of three for small chunks
7319 			median3(first, middle, last, pred);
7320 		}
7321 		else
7322 		{
7323 			// median of nine
7324 			size_t step = (last - first + 1) / 8;
7325 
7326 			median3(first, first + step, first + 2 * step, pred);
7327 			median3(middle - step, middle, middle + step, pred);
7328 			median3(last - 2 * step, last - step, last, pred);
7329 			median3(first + step, middle, last - step, pred);
7330 		}
7331 	}
7332 
sort(I begin,I end,const Pred & pred)7333 	template <typename I, typename Pred> void sort(I begin, I end, const Pred& pred)
7334 	{
7335 		// sort large chunks
7336 		while (end - begin > 32)
7337 		{
7338 			// find median element
7339 			I middle = begin + (end - begin) / 2;
7340 			median(begin, middle, end - 1, pred);
7341 
7342 			// partition in three chunks (< = >)
7343 			I eqbeg, eqend;
7344 			partition(begin, middle, end, pred, &eqbeg, &eqend);
7345 
7346 			// loop on larger half
7347 			if (eqbeg - begin > end - eqend)
7348 			{
7349 				sort(eqend, end, pred);
7350 				end = eqbeg;
7351 			}
7352 			else
7353 			{
7354 				sort(begin, eqbeg, pred);
7355 				begin = eqend;
7356 			}
7357 		}
7358 
7359 		// insertion sort small chunk
7360 		if (begin != end) insertion_sort(begin, end, pred, &*begin);
7361 	}
7362 PUGI__NS_END
7363 
7364 // Allocator used for AST and evaluation stacks
7365 PUGI__NS_BEGIN
7366 	static const size_t xpath_memory_page_size =
7367 	#ifdef PUGIXML_MEMORY_XPATH_PAGE_SIZE
7368 		PUGIXML_MEMORY_XPATH_PAGE_SIZE
7369 	#else
7370 		4096
7371 	#endif
7372 		;
7373 
7374 	static const uintptr_t xpath_memory_block_alignment = sizeof(double) > sizeof(void*) ? sizeof(double) : sizeof(void*);
7375 
7376 	struct xpath_memory_block
7377 	{
7378 		xpath_memory_block* next;
7379 		size_t capacity;
7380 
7381 		union
7382 		{
7383 			char data[xpath_memory_page_size];
7384 			double alignment;
7385 		};
7386 	};
7387 
7388 	class xpath_allocator
7389 	{
7390 		xpath_memory_block* _root;
7391 		size_t _root_size;
7392 
7393 	public:
7394 	#ifdef PUGIXML_NO_EXCEPTIONS
7395 		jmp_buf* error_handler;
7396 	#endif
7397 
xpath_allocator(xpath_memory_block * root,size_t root_size=0)7398 		xpath_allocator(xpath_memory_block* root, size_t root_size = 0): _root(root), _root_size(root_size)
7399 		{
7400 		#ifdef PUGIXML_NO_EXCEPTIONS
7401 			error_handler = 0;
7402 		#endif
7403 		}
7404 
allocate_nothrow(size_t size)7405 		void* allocate_nothrow(size_t size)
7406 		{
7407 			// round size up to block alignment boundary
7408 			size = (size + xpath_memory_block_alignment - 1) & ~(xpath_memory_block_alignment - 1);
7409 
7410 			if (_root_size + size <= _root->capacity)
7411 			{
7412 				void* buf = &_root->data[0] + _root_size;
7413 				_root_size += size;
7414 				return buf;
7415 			}
7416 			else
7417 			{
7418 				// make sure we have at least 1/4th of the page free after allocation to satisfy subsequent allocation requests
7419 				size_t block_capacity_base = sizeof(_root->data);
7420 				size_t block_capacity_req = size + block_capacity_base / 4;
7421 				size_t block_capacity = (block_capacity_base > block_capacity_req) ? block_capacity_base : block_capacity_req;
7422 
7423 				size_t block_size = block_capacity + offsetof(xpath_memory_block, data);
7424 
7425 				xpath_memory_block* block = static_cast<xpath_memory_block*>(xml_memory::allocate(block_size));
7426 				if (!block) return 0;
7427 
7428 				block->next = _root;
7429 				block->capacity = block_capacity;
7430 
7431 				_root = block;
7432 				_root_size = size;
7433 
7434 				return block->data;
7435 			}
7436 		}
7437 
allocate(size_t size)7438 		void* allocate(size_t size)
7439 		{
7440 			void* result = allocate_nothrow(size);
7441 
7442 			if (!result)
7443 			{
7444 			#ifdef PUGIXML_NO_EXCEPTIONS
7445 				assert(error_handler);
7446 				longjmp(*error_handler, 1);
7447 			#else
7448 				throw std::bad_alloc();
7449 			#endif
7450 			}
7451 
7452 			return result;
7453 		}
7454 
reallocate(void * ptr,size_t old_size,size_t new_size)7455 		void* reallocate(void* ptr, size_t old_size, size_t new_size)
7456 		{
7457 			// round size up to block alignment boundary
7458 			old_size = (old_size + xpath_memory_block_alignment - 1) & ~(xpath_memory_block_alignment - 1);
7459 			new_size = (new_size + xpath_memory_block_alignment - 1) & ~(xpath_memory_block_alignment - 1);
7460 
7461 			// we can only reallocate the last object
7462 			assert(ptr == 0 || static_cast<char*>(ptr) + old_size == &_root->data[0] + _root_size);
7463 
7464 			// adjust root size so that we have not allocated the object at all
7465 			bool only_object = (_root_size == old_size);
7466 
7467 			if (ptr) _root_size -= old_size;
7468 
7469 			// allocate a new version (this will obviously reuse the memory if possible)
7470 			void* result = allocate(new_size);
7471 			assert(result);
7472 
7473 			// we have a new block
7474 			if (result != ptr && ptr)
7475 			{
7476 				// copy old data
7477 				assert(new_size >= old_size);
7478 				memcpy(result, ptr, old_size);
7479 
7480 				// free the previous page if it had no other objects
7481 				if (only_object)
7482 				{
7483 					assert(_root->data == result);
7484 					assert(_root->next);
7485 
7486 					xpath_memory_block* next = _root->next->next;
7487 
7488 					if (next)
7489 					{
7490 						// deallocate the whole page, unless it was the first one
7491 						xml_memory::deallocate(_root->next);
7492 						_root->next = next;
7493 					}
7494 				}
7495 			}
7496 
7497 			return result;
7498 		}
7499 
revert(const xpath_allocator & state)7500 		void revert(const xpath_allocator& state)
7501 		{
7502 			// free all new pages
7503 			xpath_memory_block* cur = _root;
7504 
7505 			while (cur != state._root)
7506 			{
7507 				xpath_memory_block* next = cur->next;
7508 
7509 				xml_memory::deallocate(cur);
7510 
7511 				cur = next;
7512 			}
7513 
7514 			// restore state
7515 			_root = state._root;
7516 			_root_size = state._root_size;
7517 		}
7518 
release()7519 		void release()
7520 		{
7521 			xpath_memory_block* cur = _root;
7522 			assert(cur);
7523 
7524 			while (cur->next)
7525 			{
7526 				xpath_memory_block* next = cur->next;
7527 
7528 				xml_memory::deallocate(cur);
7529 
7530 				cur = next;
7531 			}
7532 		}
7533 	};
7534 
7535 	struct xpath_allocator_capture
7536 	{
xpath_allocator_capturexpath_allocator_capture7537 		xpath_allocator_capture(xpath_allocator* alloc): _target(alloc), _state(*alloc)
7538 		{
7539 		}
7540 
~xpath_allocator_capturexpath_allocator_capture7541 		~xpath_allocator_capture()
7542 		{
7543 			_target->revert(_state);
7544 		}
7545 
7546 		xpath_allocator* _target;
7547 		xpath_allocator _state;
7548 	};
7549 
7550 	struct xpath_stack
7551 	{
7552 		xpath_allocator* result;
7553 		xpath_allocator* temp;
7554 	};
7555 
7556 	struct xpath_stack_data
7557 	{
7558 		xpath_memory_block blocks[2];
7559 		xpath_allocator result;
7560 		xpath_allocator temp;
7561 		xpath_stack stack;
7562 
7563 	#ifdef PUGIXML_NO_EXCEPTIONS
7564 		jmp_buf error_handler;
7565 	#endif
7566 
xpath_stack_dataxpath_stack_data7567 		xpath_stack_data(): result(blocks + 0), temp(blocks + 1)
7568 		{
7569 			blocks[0].next = blocks[1].next = 0;
7570 			blocks[0].capacity = blocks[1].capacity = sizeof(blocks[0].data);
7571 
7572 			stack.result = &result;
7573 			stack.temp = &temp;
7574 
7575 		#ifdef PUGIXML_NO_EXCEPTIONS
7576 			result.error_handler = temp.error_handler = &error_handler;
7577 		#endif
7578 		}
7579 
~xpath_stack_dataxpath_stack_data7580 		~xpath_stack_data()
7581 		{
7582 			result.release();
7583 			temp.release();
7584 		}
7585 	};
7586 PUGI__NS_END
7587 
7588 // String class
7589 PUGI__NS_BEGIN
7590 	class xpath_string
7591 	{
7592 		const char_t* _buffer;
7593 		bool _uses_heap;
7594 		size_t _length_heap;
7595 
duplicate_string(const char_t * string,size_t length,xpath_allocator * alloc)7596 		static char_t* duplicate_string(const char_t* string, size_t length, xpath_allocator* alloc)
7597 		{
7598 			char_t* result = static_cast<char_t*>(alloc->allocate((length + 1) * sizeof(char_t)));
7599 			assert(result);
7600 
7601 			memcpy(result, string, length * sizeof(char_t));
7602 			result[length] = 0;
7603 
7604 			return result;
7605 		}
7606 
xpath_string(const char_t * buffer,bool uses_heap_,size_t length_heap)7607 		xpath_string(const char_t* buffer, bool uses_heap_, size_t length_heap): _buffer(buffer), _uses_heap(uses_heap_), _length_heap(length_heap)
7608 		{
7609 		}
7610 
7611 	public:
from_const(const char_t * str)7612 		static xpath_string from_const(const char_t* str)
7613 		{
7614 			return xpath_string(str, false, 0);
7615 		}
7616 
from_heap_preallocated(const char_t * begin,const char_t * end)7617 		static xpath_string from_heap_preallocated(const char_t* begin, const char_t* end)
7618 		{
7619 			assert(begin <= end && *end == 0);
7620 
7621 			return xpath_string(begin, true, static_cast<size_t>(end - begin));
7622 		}
7623 
from_heap(const char_t * begin,const char_t * end,xpath_allocator * alloc)7624 		static xpath_string from_heap(const char_t* begin, const char_t* end, xpath_allocator* alloc)
7625 		{
7626 			assert(begin <= end);
7627 
7628 			size_t length = static_cast<size_t>(end - begin);
7629 
7630 			return length == 0 ? xpath_string() : xpath_string(duplicate_string(begin, length, alloc), true, length);
7631 		}
7632 
xpath_string()7633 		xpath_string(): _buffer(PUGIXML_TEXT("")), _uses_heap(false), _length_heap(0)
7634 		{
7635 		}
7636 
append(const xpath_string & o,xpath_allocator * alloc)7637 		void append(const xpath_string& o, xpath_allocator* alloc)
7638 		{
7639 			// skip empty sources
7640 			if (!*o._buffer) return;
7641 
7642 			// fast append for constant empty target and constant source
7643 			if (!*_buffer && !_uses_heap && !o._uses_heap)
7644 			{
7645 				_buffer = o._buffer;
7646 			}
7647 			else
7648 			{
7649 				// need to make heap copy
7650 				size_t target_length = length();
7651 				size_t source_length = o.length();
7652 				size_t result_length = target_length + source_length;
7653 
7654 				// allocate new buffer
7655 				char_t* result = static_cast<char_t*>(alloc->reallocate(_uses_heap ? const_cast<char_t*>(_buffer) : 0, (target_length + 1) * sizeof(char_t), (result_length + 1) * sizeof(char_t)));
7656 				assert(result);
7657 
7658 				// append first string to the new buffer in case there was no reallocation
7659 				if (!_uses_heap) memcpy(result, _buffer, target_length * sizeof(char_t));
7660 
7661 				// append second string to the new buffer
7662 				memcpy(result + target_length, o._buffer, source_length * sizeof(char_t));
7663 				result[result_length] = 0;
7664 
7665 				// finalize
7666 				_buffer = result;
7667 				_uses_heap = true;
7668 				_length_heap = result_length;
7669 			}
7670 		}
7671 
c_str() const7672 		const char_t* c_str() const
7673 		{
7674 			return _buffer;
7675 		}
7676 
length() const7677 		size_t length() const
7678 		{
7679 			return _uses_heap ? _length_heap : strlength(_buffer);
7680 		}
7681 
data(xpath_allocator * alloc)7682 		char_t* data(xpath_allocator* alloc)
7683 		{
7684 			// make private heap copy
7685 			if (!_uses_heap)
7686 			{
7687 				size_t length_ = strlength(_buffer);
7688 
7689 				_buffer = duplicate_string(_buffer, length_, alloc);
7690 				_uses_heap = true;
7691 				_length_heap = length_;
7692 			}
7693 
7694 			return const_cast<char_t*>(_buffer);
7695 		}
7696 
empty() const7697 		bool empty() const
7698 		{
7699 			return *_buffer == 0;
7700 		}
7701 
operator ==(const xpath_string & o) const7702 		bool operator==(const xpath_string& o) const
7703 		{
7704 			return strequal(_buffer, o._buffer);
7705 		}
7706 
operator !=(const xpath_string & o) const7707 		bool operator!=(const xpath_string& o) const
7708 		{
7709 			return !strequal(_buffer, o._buffer);
7710 		}
7711 
uses_heap() const7712 		bool uses_heap() const
7713 		{
7714 			return _uses_heap;
7715 		}
7716 	};
7717 PUGI__NS_END
7718 
7719 PUGI__NS_BEGIN
starts_with(const char_t * string,const char_t * pattern)7720 	PUGI__FN bool starts_with(const char_t* string, const char_t* pattern)
7721 	{
7722 		while (*pattern && *string == *pattern)
7723 		{
7724 			string++;
7725 			pattern++;
7726 		}
7727 
7728 		return *pattern == 0;
7729 	}
7730 
find_char(const char_t * s,char_t c)7731 	PUGI__FN const char_t* find_char(const char_t* s, char_t c)
7732 	{
7733 	#ifdef PUGIXML_WCHAR_MODE
7734 		return wcschr(s, c);
7735 	#else
7736 		return strchr(s, c);
7737 	#endif
7738 	}
7739 
find_substring(const char_t * s,const char_t * p)7740 	PUGI__FN const char_t* find_substring(const char_t* s, const char_t* p)
7741 	{
7742 	#ifdef PUGIXML_WCHAR_MODE
7743 		// MSVC6 wcsstr bug workaround (if s is empty it always returns 0)
7744 		return (*p == 0) ? s : wcsstr(s, p);
7745 	#else
7746 		return strstr(s, p);
7747 	#endif
7748 	}
7749 
7750 	// Converts symbol to lower case, if it is an ASCII one
tolower_ascii(char_t ch)7751 	PUGI__FN char_t tolower_ascii(char_t ch)
7752 	{
7753 		return static_cast<unsigned int>(ch - 'A') < 26 ? static_cast<char_t>(ch | ' ') : ch;
7754 	}
7755 
string_value(const xpath_node & na,xpath_allocator * alloc)7756 	PUGI__FN xpath_string string_value(const xpath_node& na, xpath_allocator* alloc)
7757 	{
7758 		if (na.attribute())
7759 			return xpath_string::from_const(na.attribute().value());
7760 		else
7761 		{
7762 			xml_node n = na.node();
7763 
7764 			switch (n.type())
7765 			{
7766 			case node_pcdata:
7767 			case node_cdata:
7768 			case node_comment:
7769 			case node_pi:
7770 				return xpath_string::from_const(n.value());
7771 
7772 			case node_document:
7773 			case node_element:
7774 			{
7775 				xpath_string result;
7776 
7777 				// element nodes can have value if parse_embed_pcdata was used
7778 				if (n.value()[0])
7779 					result.append(xpath_string::from_const(n.value()), alloc);
7780 
7781 				xml_node cur = n.first_child();
7782 
7783 				while (cur && cur != n)
7784 				{
7785 					if (cur.type() == node_pcdata || cur.type() == node_cdata)
7786 						result.append(xpath_string::from_const(cur.value()), alloc);
7787 
7788 					if (cur.first_child())
7789 						cur = cur.first_child();
7790 					else if (cur.next_sibling())
7791 						cur = cur.next_sibling();
7792 					else
7793 					{
7794 						while (!cur.next_sibling() && cur != n)
7795 							cur = cur.parent();
7796 
7797 						if (cur != n) cur = cur.next_sibling();
7798 					}
7799 				}
7800 
7801 				return result;
7802 			}
7803 
7804 			default:
7805 				return xpath_string();
7806 			}
7807 		}
7808 	}
7809 
node_is_before_sibling(xml_node_struct * ln,xml_node_struct * rn)7810 	PUGI__FN bool node_is_before_sibling(xml_node_struct* ln, xml_node_struct* rn)
7811 	{
7812 		assert(ln->parent == rn->parent);
7813 
7814 		// there is no common ancestor (the shared parent is null), nodes are from different documents
7815 		if (!ln->parent) return ln < rn;
7816 
7817 		// determine sibling order
7818 		xml_node_struct* ls = ln;
7819 		xml_node_struct* rs = rn;
7820 
7821 		while (ls && rs)
7822 		{
7823 			if (ls == rn) return true;
7824 			if (rs == ln) return false;
7825 
7826 			ls = ls->next_sibling;
7827 			rs = rs->next_sibling;
7828 		}
7829 
7830 		// if rn sibling chain ended ln must be before rn
7831 		return !rs;
7832 	}
7833 
node_is_before(xml_node_struct * ln,xml_node_struct * rn)7834 	PUGI__FN bool node_is_before(xml_node_struct* ln, xml_node_struct* rn)
7835 	{
7836 		// find common ancestor at the same depth, if any
7837 		xml_node_struct* lp = ln;
7838 		xml_node_struct* rp = rn;
7839 
7840 		while (lp && rp && lp->parent != rp->parent)
7841 		{
7842 			lp = lp->parent;
7843 			rp = rp->parent;
7844 		}
7845 
7846 		// parents are the same!
7847 		if (lp && rp) return node_is_before_sibling(lp, rp);
7848 
7849 		// nodes are at different depths, need to normalize heights
7850 		bool left_higher = !lp;
7851 
7852 		while (lp)
7853 		{
7854 			lp = lp->parent;
7855 			ln = ln->parent;
7856 		}
7857 
7858 		while (rp)
7859 		{
7860 			rp = rp->parent;
7861 			rn = rn->parent;
7862 		}
7863 
7864 		// one node is the ancestor of the other
7865 		if (ln == rn) return left_higher;
7866 
7867 		// find common ancestor... again
7868 		while (ln->parent != rn->parent)
7869 		{
7870 			ln = ln->parent;
7871 			rn = rn->parent;
7872 		}
7873 
7874 		return node_is_before_sibling(ln, rn);
7875 	}
7876 
node_is_ancestor(xml_node_struct * parent,xml_node_struct * node)7877 	PUGI__FN bool node_is_ancestor(xml_node_struct* parent, xml_node_struct* node)
7878 	{
7879 		while (node && node != parent) node = node->parent;
7880 
7881 		return parent && node == parent;
7882 	}
7883 
document_buffer_order(const xpath_node & xnode)7884 	PUGI__FN const void* document_buffer_order(const xpath_node& xnode)
7885 	{
7886 		xml_node_struct* node = xnode.node().internal_object();
7887 
7888 		if (node)
7889 		{
7890 			if ((get_document(node).header & xml_memory_page_contents_shared_mask) == 0)
7891 			{
7892 				if (node->name && (node->header & impl::xml_memory_page_name_allocated_or_shared_mask) == 0) return node->name;
7893 				if (node->value && (node->header & impl::xml_memory_page_value_allocated_or_shared_mask) == 0) return node->value;
7894 			}
7895 
7896 			return 0;
7897 		}
7898 
7899 		xml_attribute_struct* attr = xnode.attribute().internal_object();
7900 
7901 		if (attr)
7902 		{
7903 			if ((get_document(attr).header & xml_memory_page_contents_shared_mask) == 0)
7904 			{
7905 				if ((attr->header & impl::xml_memory_page_name_allocated_or_shared_mask) == 0) return attr->name;
7906 				if ((attr->header & impl::xml_memory_page_value_allocated_or_shared_mask) == 0) return attr->value;
7907 			}
7908 
7909 			return 0;
7910 		}
7911 
7912 		return 0;
7913 	}
7914 
7915 	struct document_order_comparator
7916 	{
operator ()document_order_comparator7917 		bool operator()(const xpath_node& lhs, const xpath_node& rhs) const
7918 		{
7919 			// optimized document order based check
7920 			const void* lo = document_buffer_order(lhs);
7921 			const void* ro = document_buffer_order(rhs);
7922 
7923 			if (lo && ro) return lo < ro;
7924 
7925 			// slow comparison
7926 			xml_node ln = lhs.node(), rn = rhs.node();
7927 
7928 			// compare attributes
7929 			if (lhs.attribute() && rhs.attribute())
7930 			{
7931 				// shared parent
7932 				if (lhs.parent() == rhs.parent())
7933 				{
7934 					// determine sibling order
7935 					for (xml_attribute a = lhs.attribute(); a; a = a.next_attribute())
7936 						if (a == rhs.attribute())
7937 							return true;
7938 
7939 					return false;
7940 				}
7941 
7942 				// compare attribute parents
7943 				ln = lhs.parent();
7944 				rn = rhs.parent();
7945 			}
7946 			else if (lhs.attribute())
7947 			{
7948 				// attributes go after the parent element
7949 				if (lhs.parent() == rhs.node()) return false;
7950 
7951 				ln = lhs.parent();
7952 			}
7953 			else if (rhs.attribute())
7954 			{
7955 				// attributes go after the parent element
7956 				if (rhs.parent() == lhs.node()) return true;
7957 
7958 				rn = rhs.parent();
7959 			}
7960 
7961 			if (ln == rn) return false;
7962 
7963 			if (!ln || !rn) return ln < rn;
7964 
7965 			return node_is_before(ln.internal_object(), rn.internal_object());
7966 		}
7967 	};
7968 
7969 	struct duplicate_comparator
7970 	{
operator ()duplicate_comparator7971 		bool operator()(const xpath_node& lhs, const xpath_node& rhs) const
7972 		{
7973 			if (lhs.attribute()) return rhs.attribute() ? lhs.attribute() < rhs.attribute() : true;
7974 			else return rhs.attribute() ? false : lhs.node() < rhs.node();
7975 		}
7976 	};
7977 
gen_nan()7978 	PUGI__FN double gen_nan()
7979 	{
7980 	#if defined(__STDC_IEC_559__) || ((FLT_RADIX - 0 == 2) && (FLT_MAX_EXP - 0 == 128) && (FLT_MANT_DIG - 0 == 24))
7981 		PUGI__STATIC_ASSERT(sizeof(float) == sizeof(uint32_t));
7982 		typedef uint32_t UI; // BCC5 workaround
7983 		union { float f; UI i; } u;
7984 		u.i = 0x7fc00000;
7985 		return u.f;
7986 	#else
7987 		// fallback
7988 		const volatile double zero = 0.0;
7989 		return zero / zero;
7990 	#endif
7991 	}
7992 
is_nan(double value)7993 	PUGI__FN bool is_nan(double value)
7994 	{
7995 	#if defined(PUGI__MSVC_CRT_VERSION) || defined(__BORLANDC__)
7996 		return !!_isnan(value);
7997 	#elif defined(fpclassify) && defined(FP_NAN)
7998 		return fpclassify(value) == FP_NAN;
7999 	#else
8000 		// fallback
8001 		const volatile double v = value;
8002 		return v != v;
8003 	#endif
8004 	}
8005 
convert_number_to_string_special(double value)8006 	PUGI__FN const char_t* convert_number_to_string_special(double value)
8007 	{
8008 	#if defined(PUGI__MSVC_CRT_VERSION) || defined(__BORLANDC__)
8009 		if (_finite(value)) return (value == 0) ? PUGIXML_TEXT("0") : 0;
8010 		if (_isnan(value)) return PUGIXML_TEXT("NaN");
8011 		return value > 0 ? PUGIXML_TEXT("Infinity") : PUGIXML_TEXT("-Infinity");
8012 	#elif defined(fpclassify) && defined(FP_NAN) && defined(FP_INFINITE) && defined(FP_ZERO)
8013 		switch (fpclassify(value))
8014 		{
8015 		case FP_NAN:
8016 			return PUGIXML_TEXT("NaN");
8017 
8018 		case FP_INFINITE:
8019 			return value > 0 ? PUGIXML_TEXT("Infinity") : PUGIXML_TEXT("-Infinity");
8020 
8021 		case FP_ZERO:
8022 			return PUGIXML_TEXT("0");
8023 
8024 		default:
8025 			return 0;
8026 		}
8027 	#else
8028 		// fallback
8029 		const volatile double v = value;
8030 
8031 		if (v == 0) return PUGIXML_TEXT("0");
8032 		if (v != v) return PUGIXML_TEXT("NaN");
8033 		if (v * 2 == v) return value > 0 ? PUGIXML_TEXT("Infinity") : PUGIXML_TEXT("-Infinity");
8034 		return 0;
8035 	#endif
8036 	}
8037 
convert_number_to_boolean(double value)8038 	PUGI__FN bool convert_number_to_boolean(double value)
8039 	{
8040 		return (value != 0 && !is_nan(value));
8041 	}
8042 
truncate_zeros(char * begin,char * end)8043 	PUGI__FN void truncate_zeros(char* begin, char* end)
8044 	{
8045 		while (begin != end && end[-1] == '0') end--;
8046 
8047 		*end = 0;
8048 	}
8049 
8050 	// gets mantissa digits in the form of 0.xxxxx with 0. implied and the exponent
8051 #if defined(PUGI__MSVC_CRT_VERSION) && PUGI__MSVC_CRT_VERSION >= 1400 && !defined(_WIN32_WCE)
convert_number_to_mantissa_exponent(double value,char * buffer,size_t buffer_size,char ** out_mantissa,int * out_exponent)8052 	PUGI__FN void convert_number_to_mantissa_exponent(double value, char* buffer, size_t buffer_size, char** out_mantissa, int* out_exponent)
8053 	{
8054 		// get base values
8055 		int sign, exponent;
8056 		_ecvt_s(buffer, buffer_size, value, DBL_DIG + 1, &exponent, &sign);
8057 
8058 		// truncate redundant zeros
8059 		truncate_zeros(buffer, buffer + strlen(buffer));
8060 
8061 		// fill results
8062 		*out_mantissa = buffer;
8063 		*out_exponent = exponent;
8064 	}
8065 #else
convert_number_to_mantissa_exponent(double value,char * buffer,size_t buffer_size,char ** out_mantissa,int * out_exponent)8066 	PUGI__FN void convert_number_to_mantissa_exponent(double value, char* buffer, size_t buffer_size, char** out_mantissa, int* out_exponent)
8067 	{
8068 		// get a scientific notation value with IEEE DBL_DIG decimals
8069 		sprintf(buffer, "%.*e", DBL_DIG, value);
8070 		assert(strlen(buffer) < buffer_size);
8071 		(void)!buffer_size;
8072 
8073 		// get the exponent (possibly negative)
8074 		char* exponent_string = strchr(buffer, 'e');
8075 		assert(exponent_string);
8076 
8077 		int exponent = atoi(exponent_string + 1);
8078 
8079 		// extract mantissa string: skip sign
8080 		char* mantissa = buffer[0] == '-' ? buffer + 1 : buffer;
8081 		assert(mantissa[0] != '0' && mantissa[1] == '.');
8082 
8083 		// divide mantissa by 10 to eliminate integer part
8084 		mantissa[1] = mantissa[0];
8085 		mantissa++;
8086 		exponent++;
8087 
8088 		// remove extra mantissa digits and zero-terminate mantissa
8089 		truncate_zeros(mantissa, exponent_string);
8090 
8091 		// fill results
8092 		*out_mantissa = mantissa;
8093 		*out_exponent = exponent;
8094 	}
8095 #endif
8096 
convert_number_to_string(double value,xpath_allocator * alloc)8097 	PUGI__FN xpath_string convert_number_to_string(double value, xpath_allocator* alloc)
8098 	{
8099 		// try special number conversion
8100 		const char_t* special = convert_number_to_string_special(value);
8101 		if (special) return xpath_string::from_const(special);
8102 
8103 		// get mantissa + exponent form
8104 		char mantissa_buffer[32];
8105 
8106 		char* mantissa;
8107 		int exponent;
8108 		convert_number_to_mantissa_exponent(value, mantissa_buffer, sizeof(mantissa_buffer), &mantissa, &exponent);
8109 
8110 		// allocate a buffer of suitable length for the number
8111 		size_t result_size = strlen(mantissa_buffer) + (exponent > 0 ? exponent : -exponent) + 4;
8112 		char_t* result = static_cast<char_t*>(alloc->allocate(sizeof(char_t) * result_size));
8113 		assert(result);
8114 
8115 		// make the number!
8116 		char_t* s = result;
8117 
8118 		// sign
8119 		if (value < 0) *s++ = '-';
8120 
8121 		// integer part
8122 		if (exponent <= 0)
8123 		{
8124 			*s++ = '0';
8125 		}
8126 		else
8127 		{
8128 			while (exponent > 0)
8129 			{
8130 				assert(*mantissa == 0 || static_cast<unsigned int>(static_cast<unsigned int>(*mantissa) - '0') <= 9);
8131 				*s++ = *mantissa ? *mantissa++ : '0';
8132 				exponent--;
8133 			}
8134 		}
8135 
8136 		// fractional part
8137 		if (*mantissa)
8138 		{
8139 			// decimal point
8140 			*s++ = '.';
8141 
8142 			// extra zeroes from negative exponent
8143 			while (exponent < 0)
8144 			{
8145 				*s++ = '0';
8146 				exponent++;
8147 			}
8148 
8149 			// extra mantissa digits
8150 			while (*mantissa)
8151 			{
8152 				assert(static_cast<unsigned int>(*mantissa - '0') <= 9);
8153 				*s++ = *mantissa++;
8154 			}
8155 		}
8156 
8157 		// zero-terminate
8158 		assert(s < result + result_size);
8159 		*s = 0;
8160 
8161 		return xpath_string::from_heap_preallocated(result, s);
8162 	}
8163 
check_string_to_number_format(const char_t * string)8164 	PUGI__FN bool check_string_to_number_format(const char_t* string)
8165 	{
8166 		// parse leading whitespace
8167 		while (PUGI__IS_CHARTYPE(*string, ct_space)) ++string;
8168 
8169 		// parse sign
8170 		if (*string == '-') ++string;
8171 
8172 		if (!*string) return false;
8173 
8174 		// if there is no integer part, there should be a decimal part with at least one digit
8175 		if (!PUGI__IS_CHARTYPEX(string[0], ctx_digit) && (string[0] != '.' || !PUGI__IS_CHARTYPEX(string[1], ctx_digit))) return false;
8176 
8177 		// parse integer part
8178 		while (PUGI__IS_CHARTYPEX(*string, ctx_digit)) ++string;
8179 
8180 		// parse decimal part
8181 		if (*string == '.')
8182 		{
8183 			++string;
8184 
8185 			while (PUGI__IS_CHARTYPEX(*string, ctx_digit)) ++string;
8186 		}
8187 
8188 		// parse trailing whitespace
8189 		while (PUGI__IS_CHARTYPE(*string, ct_space)) ++string;
8190 
8191 		return *string == 0;
8192 	}
8193 
convert_string_to_number(const char_t * string)8194 	PUGI__FN double convert_string_to_number(const char_t* string)
8195 	{
8196 		// check string format
8197 		if (!check_string_to_number_format(string)) return gen_nan();
8198 
8199 		// parse string
8200 	#ifdef PUGIXML_WCHAR_MODE
8201 		return wcstod(string, 0);
8202 	#else
8203 		return strtod(string, 0);
8204 	#endif
8205 	}
8206 
convert_string_to_number_scratch(char_t (& buffer)[32],const char_t * begin,const char_t * end,double * out_result)8207 	PUGI__FN bool convert_string_to_number_scratch(char_t (&buffer)[32], const char_t* begin, const char_t* end, double* out_result)
8208 	{
8209 		size_t length = static_cast<size_t>(end - begin);
8210 		char_t* scratch = buffer;
8211 
8212 		if (length >= sizeof(buffer) / sizeof(buffer[0]))
8213 		{
8214 			// need to make dummy on-heap copy
8215 			scratch = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
8216 			if (!scratch) return false;
8217 		}
8218 
8219 		// copy string to zero-terminated buffer and perform conversion
8220 		memcpy(scratch, begin, length * sizeof(char_t));
8221 		scratch[length] = 0;
8222 
8223 		*out_result = convert_string_to_number(scratch);
8224 
8225 		// free dummy buffer
8226 		if (scratch != buffer) xml_memory::deallocate(scratch);
8227 
8228 		return true;
8229 	}
8230 
round_nearest(double value)8231 	PUGI__FN double round_nearest(double value)
8232 	{
8233 		return floor(value + 0.5);
8234 	}
8235 
round_nearest_nzero(double value)8236 	PUGI__FN double round_nearest_nzero(double value)
8237 	{
8238 		// same as round_nearest, but returns -0 for [-0.5, -0]
8239 		// ceil is used to differentiate between +0 and -0 (we return -0 for [-0.5, -0] and +0 for +0)
8240 		return (value >= -0.5 && value <= 0) ? ceil(value) : floor(value + 0.5);
8241 	}
8242 
qualified_name(const xpath_node & node)8243 	PUGI__FN const char_t* qualified_name(const xpath_node& node)
8244 	{
8245 		return node.attribute() ? node.attribute().name() : node.node().name();
8246 	}
8247 
local_name(const xpath_node & node)8248 	PUGI__FN const char_t* local_name(const xpath_node& node)
8249 	{
8250 		const char_t* name = qualified_name(node);
8251 		const char_t* p = find_char(name, ':');
8252 
8253 		return p ? p + 1 : name;
8254 	}
8255 
8256 	struct namespace_uri_predicate
8257 	{
8258 		const char_t* prefix;
8259 		size_t prefix_length;
8260 
namespace_uri_predicatenamespace_uri_predicate8261 		namespace_uri_predicate(const char_t* name)
8262 		{
8263 			const char_t* pos = find_char(name, ':');
8264 
8265 			prefix = pos ? name : 0;
8266 			prefix_length = pos ? static_cast<size_t>(pos - name) : 0;
8267 		}
8268 
operator ()namespace_uri_predicate8269 		bool operator()(xml_attribute a) const
8270 		{
8271 			const char_t* name = a.name();
8272 
8273 			if (!starts_with(name, PUGIXML_TEXT("xmlns"))) return false;
8274 
8275 			return prefix ? name[5] == ':' && strequalrange(name + 6, prefix, prefix_length) : name[5] == 0;
8276 		}
8277 	};
8278 
namespace_uri(xml_node node)8279 	PUGI__FN const char_t* namespace_uri(xml_node node)
8280 	{
8281 		namespace_uri_predicate pred = node.name();
8282 
8283 		xml_node p = node;
8284 
8285 		while (p)
8286 		{
8287 			xml_attribute a = p.find_attribute(pred);
8288 
8289 			if (a) return a.value();
8290 
8291 			p = p.parent();
8292 		}
8293 
8294 		return PUGIXML_TEXT("");
8295 	}
8296 
namespace_uri(xml_attribute attr,xml_node parent)8297 	PUGI__FN const char_t* namespace_uri(xml_attribute attr, xml_node parent)
8298 	{
8299 		namespace_uri_predicate pred = attr.name();
8300 
8301 		// Default namespace does not apply to attributes
8302 		if (!pred.prefix) return PUGIXML_TEXT("");
8303 
8304 		xml_node p = parent;
8305 
8306 		while (p)
8307 		{
8308 			xml_attribute a = p.find_attribute(pred);
8309 
8310 			if (a) return a.value();
8311 
8312 			p = p.parent();
8313 		}
8314 
8315 		return PUGIXML_TEXT("");
8316 	}
8317 
namespace_uri(const xpath_node & node)8318 	PUGI__FN const char_t* namespace_uri(const xpath_node& node)
8319 	{
8320 		return node.attribute() ? namespace_uri(node.attribute(), node.parent()) : namespace_uri(node.node());
8321 	}
8322 
normalize_space(char_t * buffer)8323 	PUGI__FN char_t* normalize_space(char_t* buffer)
8324 	{
8325 		char_t* write = buffer;
8326 
8327 		for (char_t* it = buffer; *it; )
8328 		{
8329 			char_t ch = *it++;
8330 
8331 			if (PUGI__IS_CHARTYPE(ch, ct_space))
8332 			{
8333 				// replace whitespace sequence with single space
8334 				while (PUGI__IS_CHARTYPE(*it, ct_space)) it++;
8335 
8336 				// avoid leading spaces
8337 				if (write != buffer) *write++ = ' ';
8338 			}
8339 			else *write++ = ch;
8340 		}
8341 
8342 		// remove trailing space
8343 		if (write != buffer && PUGI__IS_CHARTYPE(write[-1], ct_space)) write--;
8344 
8345 		// zero-terminate
8346 		*write = 0;
8347 
8348 		return write;
8349 	}
8350 
translate(char_t * buffer,const char_t * from,const char_t * to,size_t to_length)8351 	PUGI__FN char_t* translate(char_t* buffer, const char_t* from, const char_t* to, size_t to_length)
8352 	{
8353 		char_t* write = buffer;
8354 
8355 		while (*buffer)
8356 		{
8357 			PUGI__DMC_VOLATILE char_t ch = *buffer++;
8358 
8359 			const char_t* pos = find_char(from, ch);
8360 
8361 			if (!pos)
8362 				*write++ = ch; // do not process
8363 			else if (static_cast<size_t>(pos - from) < to_length)
8364 				*write++ = to[pos - from]; // replace
8365 		}
8366 
8367 		// zero-terminate
8368 		*write = 0;
8369 
8370 		return write;
8371 	}
8372 
translate_table_generate(xpath_allocator * alloc,const char_t * from,const char_t * to)8373 	PUGI__FN unsigned char* translate_table_generate(xpath_allocator* alloc, const char_t* from, const char_t* to)
8374 	{
8375 		unsigned char table[128] = {0};
8376 
8377 		while (*from)
8378 		{
8379 			unsigned int fc = static_cast<unsigned int>(*from);
8380 			unsigned int tc = static_cast<unsigned int>(*to);
8381 
8382 			if (fc >= 128 || tc >= 128)
8383 				return 0;
8384 
8385 			// code=128 means "skip character"
8386 			if (!table[fc])
8387 				table[fc] = static_cast<unsigned char>(tc ? tc : 128);
8388 
8389 			from++;
8390 			if (tc) to++;
8391 		}
8392 
8393 		for (int i = 0; i < 128; ++i)
8394 			if (!table[i])
8395 				table[i] = static_cast<unsigned char>(i);
8396 
8397 		void* result = alloc->allocate_nothrow(sizeof(table));
8398 
8399 		if (result)
8400 		{
8401 			memcpy(result, table, sizeof(table));
8402 		}
8403 
8404 		return static_cast<unsigned char*>(result);
8405 	}
8406 
translate_table(char_t * buffer,const unsigned char * table)8407 	PUGI__FN char_t* translate_table(char_t* buffer, const unsigned char* table)
8408 	{
8409 		char_t* write = buffer;
8410 
8411 		while (*buffer)
8412 		{
8413 			char_t ch = *buffer++;
8414 			unsigned int index = static_cast<unsigned int>(ch);
8415 
8416 			if (index < 128)
8417 			{
8418 				unsigned char code = table[index];
8419 
8420 				// code=128 means "skip character" (table size is 128 so 128 can be a special value)
8421 				// this code skips these characters without extra branches
8422 				*write = static_cast<char_t>(code);
8423 				write += 1 - (code >> 7);
8424 			}
8425 			else
8426 			{
8427 				*write++ = ch;
8428 			}
8429 		}
8430 
8431 		// zero-terminate
8432 		*write = 0;
8433 
8434 		return write;
8435 	}
8436 
is_xpath_attribute(const char_t * name)8437 	inline bool is_xpath_attribute(const char_t* name)
8438 	{
8439 		return !(starts_with(name, PUGIXML_TEXT("xmlns")) && (name[5] == 0 || name[5] == ':'));
8440 	}
8441 
8442 	struct xpath_variable_boolean: xpath_variable
8443 	{
xpath_variable_booleanxpath_variable_boolean8444 		xpath_variable_boolean(): xpath_variable(xpath_type_boolean), value(false)
8445 		{
8446 		}
8447 
8448 		bool value;
8449 		char_t name[1];
8450 	};
8451 
8452 	struct xpath_variable_number: xpath_variable
8453 	{
xpath_variable_numberxpath_variable_number8454 		xpath_variable_number(): xpath_variable(xpath_type_number), value(0)
8455 		{
8456 		}
8457 
8458 		double value;
8459 		char_t name[1];
8460 	};
8461 
8462 	struct xpath_variable_string: xpath_variable
8463 	{
xpath_variable_stringxpath_variable_string8464 		xpath_variable_string(): xpath_variable(xpath_type_string), value(0)
8465 		{
8466 		}
8467 
~xpath_variable_stringxpath_variable_string8468 		~xpath_variable_string()
8469 		{
8470 			if (value) xml_memory::deallocate(value);
8471 		}
8472 
8473 		char_t* value;
8474 		char_t name[1];
8475 	};
8476 
8477 	struct xpath_variable_node_set: xpath_variable
8478 	{
xpath_variable_node_setxpath_variable_node_set8479 		xpath_variable_node_set(): xpath_variable(xpath_type_node_set)
8480 		{
8481 		}
8482 
8483 		xpath_node_set value;
8484 		char_t name[1];
8485 	};
8486 
8487 	static const xpath_node_set dummy_node_set;
8488 
hash_string(const char_t * str)8489 	PUGI__FN unsigned int hash_string(const char_t* str)
8490 	{
8491 		// Jenkins one-at-a-time hash (http://en.wikipedia.org/wiki/Jenkins_hash_function#one-at-a-time)
8492 		unsigned int result = 0;
8493 
8494 		while (*str)
8495 		{
8496 			result += static_cast<unsigned int>(*str++);
8497 			result += result << 10;
8498 			result ^= result >> 6;
8499 		}
8500 
8501 		result += result << 3;
8502 		result ^= result >> 11;
8503 		result += result << 15;
8504 
8505 		return result;
8506 	}
8507 
new_xpath_variable(const char_t * name)8508 	template <typename T> PUGI__FN T* new_xpath_variable(const char_t* name)
8509 	{
8510 		size_t length = strlength(name);
8511 		if (length == 0) return 0; // empty variable names are invalid
8512 
8513 		// $$ we can't use offsetof(T, name) because T is non-POD, so we just allocate additional length characters
8514 		void* memory = xml_memory::allocate(sizeof(T) + length * sizeof(char_t));
8515 		if (!memory) return 0;
8516 
8517 		T* result = new (memory) T();
8518 
8519 		memcpy(result->name, name, (length + 1) * sizeof(char_t));
8520 
8521 		return result;
8522 	}
8523 
new_xpath_variable(xpath_value_type type,const char_t * name)8524 	PUGI__FN xpath_variable* new_xpath_variable(xpath_value_type type, const char_t* name)
8525 	{
8526 		switch (type)
8527 		{
8528 		case xpath_type_node_set:
8529 			return new_xpath_variable<xpath_variable_node_set>(name);
8530 
8531 		case xpath_type_number:
8532 			return new_xpath_variable<xpath_variable_number>(name);
8533 
8534 		case xpath_type_string:
8535 			return new_xpath_variable<xpath_variable_string>(name);
8536 
8537 		case xpath_type_boolean:
8538 			return new_xpath_variable<xpath_variable_boolean>(name);
8539 
8540 		default:
8541 			return 0;
8542 		}
8543 	}
8544 
delete_xpath_variable(T * var)8545 	template <typename T> PUGI__FN void delete_xpath_variable(T* var)
8546 	{
8547 		var->~T();
8548 		xml_memory::deallocate(var);
8549 	}
8550 
delete_xpath_variable(xpath_value_type type,xpath_variable * var)8551 	PUGI__FN void delete_xpath_variable(xpath_value_type type, xpath_variable* var)
8552 	{
8553 		switch (type)
8554 		{
8555 		case xpath_type_node_set:
8556 			delete_xpath_variable(static_cast<xpath_variable_node_set*>(var));
8557 			break;
8558 
8559 		case xpath_type_number:
8560 			delete_xpath_variable(static_cast<xpath_variable_number*>(var));
8561 			break;
8562 
8563 		case xpath_type_string:
8564 			delete_xpath_variable(static_cast<xpath_variable_string*>(var));
8565 			break;
8566 
8567 		case xpath_type_boolean:
8568 			delete_xpath_variable(static_cast<xpath_variable_boolean*>(var));
8569 			break;
8570 
8571 		default:
8572 			assert(false && "Invalid variable type");
8573 		}
8574 	}
8575 
copy_xpath_variable(xpath_variable * lhs,const xpath_variable * rhs)8576 	PUGI__FN bool copy_xpath_variable(xpath_variable* lhs, const xpath_variable* rhs)
8577 	{
8578 		switch (rhs->type())
8579 		{
8580 		case xpath_type_node_set:
8581 			return lhs->set(static_cast<const xpath_variable_node_set*>(rhs)->value);
8582 
8583 		case xpath_type_number:
8584 			return lhs->set(static_cast<const xpath_variable_number*>(rhs)->value);
8585 
8586 		case xpath_type_string:
8587 			return lhs->set(static_cast<const xpath_variable_string*>(rhs)->value);
8588 
8589 		case xpath_type_boolean:
8590 			return lhs->set(static_cast<const xpath_variable_boolean*>(rhs)->value);
8591 
8592 		default:
8593 			assert(false && "Invalid variable type");
8594 			return false;
8595 		}
8596 	}
8597 
get_variable_scratch(char_t (& buffer)[32],xpath_variable_set * set,const char_t * begin,const char_t * end,xpath_variable ** out_result)8598 	PUGI__FN bool get_variable_scratch(char_t (&buffer)[32], xpath_variable_set* set, const char_t* begin, const char_t* end, xpath_variable** out_result)
8599 	{
8600 		size_t length = static_cast<size_t>(end - begin);
8601 		char_t* scratch = buffer;
8602 
8603 		if (length >= sizeof(buffer) / sizeof(buffer[0]))
8604 		{
8605 			// need to make dummy on-heap copy
8606 			scratch = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
8607 			if (!scratch) return false;
8608 		}
8609 
8610 		// copy string to zero-terminated buffer and perform lookup
8611 		memcpy(scratch, begin, length * sizeof(char_t));
8612 		scratch[length] = 0;
8613 
8614 		*out_result = set->get(scratch);
8615 
8616 		// free dummy buffer
8617 		if (scratch != buffer) xml_memory::deallocate(scratch);
8618 
8619 		return true;
8620 	}
8621 PUGI__NS_END
8622 
8623 // Internal node set class
8624 PUGI__NS_BEGIN
xpath_get_order(const xpath_node * begin,const xpath_node * end)8625 	PUGI__FN xpath_node_set::type_t xpath_get_order(const xpath_node* begin, const xpath_node* end)
8626 	{
8627 		if (end - begin < 2)
8628 			return xpath_node_set::type_sorted;
8629 
8630 		document_order_comparator cmp;
8631 
8632 		bool first = cmp(begin[0], begin[1]);
8633 
8634 		for (const xpath_node* it = begin + 1; it + 1 < end; ++it)
8635 			if (cmp(it[0], it[1]) != first)
8636 				return xpath_node_set::type_unsorted;
8637 
8638 		return first ? xpath_node_set::type_sorted : xpath_node_set::type_sorted_reverse;
8639 	}
8640 
xpath_sort(xpath_node * begin,xpath_node * end,xpath_node_set::type_t type,bool rev)8641 	PUGI__FN xpath_node_set::type_t xpath_sort(xpath_node* begin, xpath_node* end, xpath_node_set::type_t type, bool rev)
8642 	{
8643 		xpath_node_set::type_t order = rev ? xpath_node_set::type_sorted_reverse : xpath_node_set::type_sorted;
8644 
8645 		if (type == xpath_node_set::type_unsorted)
8646 		{
8647 			xpath_node_set::type_t sorted = xpath_get_order(begin, end);
8648 
8649 			if (sorted == xpath_node_set::type_unsorted)
8650 			{
8651 				sort(begin, end, document_order_comparator());
8652 
8653 				type = xpath_node_set::type_sorted;
8654 			}
8655 			else
8656 				type = sorted;
8657 		}
8658 
8659 		if (type != order) reverse(begin, end);
8660 
8661 		return order;
8662 	}
8663 
xpath_first(const xpath_node * begin,const xpath_node * end,xpath_node_set::type_t type)8664 	PUGI__FN xpath_node xpath_first(const xpath_node* begin, const xpath_node* end, xpath_node_set::type_t type)
8665 	{
8666 		if (begin == end) return xpath_node();
8667 
8668 		switch (type)
8669 		{
8670 		case xpath_node_set::type_sorted:
8671 			return *begin;
8672 
8673 		case xpath_node_set::type_sorted_reverse:
8674 			return *(end - 1);
8675 
8676 		case xpath_node_set::type_unsorted:
8677 			return *min_element(begin, end, document_order_comparator());
8678 
8679 		default:
8680 			assert(false && "Invalid node set type");
8681 			return xpath_node();
8682 		}
8683 	}
8684 
8685 	class xpath_node_set_raw
8686 	{
8687 		xpath_node_set::type_t _type;
8688 
8689 		xpath_node* _begin;
8690 		xpath_node* _end;
8691 		xpath_node* _eos;
8692 
8693 	public:
xpath_node_set_raw()8694 		xpath_node_set_raw(): _type(xpath_node_set::type_unsorted), _begin(0), _end(0), _eos(0)
8695 		{
8696 		}
8697 
begin() const8698 		xpath_node* begin() const
8699 		{
8700 			return _begin;
8701 		}
8702 
end() const8703 		xpath_node* end() const
8704 		{
8705 			return _end;
8706 		}
8707 
empty() const8708 		bool empty() const
8709 		{
8710 			return _begin == _end;
8711 		}
8712 
size() const8713 		size_t size() const
8714 		{
8715 			return static_cast<size_t>(_end - _begin);
8716 		}
8717 
first() const8718 		xpath_node first() const
8719 		{
8720 			return xpath_first(_begin, _end, _type);
8721 		}
8722 
8723 		void push_back_grow(const xpath_node& node, xpath_allocator* alloc);
8724 
push_back(const xpath_node & node,xpath_allocator * alloc)8725 		void push_back(const xpath_node& node, xpath_allocator* alloc)
8726 		{
8727 			if (_end != _eos)
8728 				*_end++ = node;
8729 			else
8730 				push_back_grow(node, alloc);
8731 		}
8732 
append(const xpath_node * begin_,const xpath_node * end_,xpath_allocator * alloc)8733 		void append(const xpath_node* begin_, const xpath_node* end_, xpath_allocator* alloc)
8734 		{
8735 			if (begin_ == end_) return;
8736 
8737 			size_t size_ = static_cast<size_t>(_end - _begin);
8738 			size_t capacity = static_cast<size_t>(_eos - _begin);
8739 			size_t count = static_cast<size_t>(end_ - begin_);
8740 
8741 			if (size_ + count > capacity)
8742 			{
8743 				// reallocate the old array or allocate a new one
8744 				xpath_node* data = static_cast<xpath_node*>(alloc->reallocate(_begin, capacity * sizeof(xpath_node), (size_ + count) * sizeof(xpath_node)));
8745 				assert(data);
8746 
8747 				// finalize
8748 				_begin = data;
8749 				_end = data + size_;
8750 				_eos = data + size_ + count;
8751 			}
8752 
8753 			memcpy(_end, begin_, count * sizeof(xpath_node));
8754 			_end += count;
8755 		}
8756 
sort_do()8757 		void sort_do()
8758 		{
8759 			_type = xpath_sort(_begin, _end, _type, false);
8760 		}
8761 
truncate(xpath_node * pos)8762 		void truncate(xpath_node* pos)
8763 		{
8764 			assert(_begin <= pos && pos <= _end);
8765 
8766 			_end = pos;
8767 		}
8768 
remove_duplicates()8769 		void remove_duplicates()
8770 		{
8771 			if (_type == xpath_node_set::type_unsorted)
8772 				sort(_begin, _end, duplicate_comparator());
8773 
8774 			_end = unique(_begin, _end);
8775 		}
8776 
type() const8777 		xpath_node_set::type_t type() const
8778 		{
8779 			return _type;
8780 		}
8781 
set_type(xpath_node_set::type_t value)8782 		void set_type(xpath_node_set::type_t value)
8783 		{
8784 			_type = value;
8785 		}
8786 	};
8787 
push_back_grow(const xpath_node & node,xpath_allocator * alloc)8788 	PUGI__FN_NO_INLINE void xpath_node_set_raw::push_back_grow(const xpath_node& node, xpath_allocator* alloc)
8789 	{
8790 		size_t capacity = static_cast<size_t>(_eos - _begin);
8791 
8792 		// get new capacity (1.5x rule)
8793 		size_t new_capacity = capacity + capacity / 2 + 1;
8794 
8795 		// reallocate the old array or allocate a new one
8796 		xpath_node* data = static_cast<xpath_node*>(alloc->reallocate(_begin, capacity * sizeof(xpath_node), new_capacity * sizeof(xpath_node)));
8797 		assert(data);
8798 
8799 		// finalize
8800 		_begin = data;
8801 		_end = data + capacity;
8802 		_eos = data + new_capacity;
8803 
8804 		// push
8805 		*_end++ = node;
8806 	}
8807 PUGI__NS_END
8808 
8809 PUGI__NS_BEGIN
8810 	struct xpath_context
8811 	{
8812 		xpath_node n;
8813 		size_t position, size;
8814 
xpath_contextxpath_context8815 		xpath_context(const xpath_node& n_, size_t position_, size_t size_): n(n_), position(position_), size(size_)
8816 		{
8817 		}
8818 	};
8819 
8820 	enum lexeme_t
8821 	{
8822 		lex_none = 0,
8823 		lex_equal,
8824 		lex_not_equal,
8825 		lex_less,
8826 		lex_greater,
8827 		lex_less_or_equal,
8828 		lex_greater_or_equal,
8829 		lex_plus,
8830 		lex_minus,
8831 		lex_multiply,
8832 		lex_union,
8833 		lex_var_ref,
8834 		lex_open_brace,
8835 		lex_close_brace,
8836 		lex_quoted_string,
8837 		lex_number,
8838 		lex_slash,
8839 		lex_double_slash,
8840 		lex_open_square_brace,
8841 		lex_close_square_brace,
8842 		lex_string,
8843 		lex_comma,
8844 		lex_axis_attribute,
8845 		lex_dot,
8846 		lex_double_dot,
8847 		lex_double_colon,
8848 		lex_eof
8849 	};
8850 
8851 	struct xpath_lexer_string
8852 	{
8853 		const char_t* begin;
8854 		const char_t* end;
8855 
xpath_lexer_stringxpath_lexer_string8856 		xpath_lexer_string(): begin(0), end(0)
8857 		{
8858 		}
8859 
operator ==xpath_lexer_string8860 		bool operator==(const char_t* other) const
8861 		{
8862 			size_t length = static_cast<size_t>(end - begin);
8863 
8864 			return strequalrange(other, begin, length);
8865 		}
8866 	};
8867 
8868 	class xpath_lexer
8869 	{
8870 		const char_t* _cur;
8871 		const char_t* _cur_lexeme_pos;
8872 		xpath_lexer_string _cur_lexeme_contents;
8873 
8874 		lexeme_t _cur_lexeme;
8875 
8876 	public:
xpath_lexer(const char_t * query)8877 		explicit xpath_lexer(const char_t* query): _cur(query)
8878 		{
8879 			next();
8880 		}
8881 
state() const8882 		const char_t* state() const
8883 		{
8884 			return _cur;
8885 		}
8886 
next()8887 		void next()
8888 		{
8889 			const char_t* cur = _cur;
8890 
8891 			while (PUGI__IS_CHARTYPE(*cur, ct_space)) ++cur;
8892 
8893 			// save lexeme position for error reporting
8894 			_cur_lexeme_pos = cur;
8895 
8896 			switch (*cur)
8897 			{
8898 			case 0:
8899 				_cur_lexeme = lex_eof;
8900 				break;
8901 
8902 			case '>':
8903 				if (*(cur+1) == '=')
8904 				{
8905 					cur += 2;
8906 					_cur_lexeme = lex_greater_or_equal;
8907 				}
8908 				else
8909 				{
8910 					cur += 1;
8911 					_cur_lexeme = lex_greater;
8912 				}
8913 				break;
8914 
8915 			case '<':
8916 				if (*(cur+1) == '=')
8917 				{
8918 					cur += 2;
8919 					_cur_lexeme = lex_less_or_equal;
8920 				}
8921 				else
8922 				{
8923 					cur += 1;
8924 					_cur_lexeme = lex_less;
8925 				}
8926 				break;
8927 
8928 			case '!':
8929 				if (*(cur+1) == '=')
8930 				{
8931 					cur += 2;
8932 					_cur_lexeme = lex_not_equal;
8933 				}
8934 				else
8935 				{
8936 					_cur_lexeme = lex_none;
8937 				}
8938 				break;
8939 
8940 			case '=':
8941 				cur += 1;
8942 				_cur_lexeme = lex_equal;
8943 
8944 				break;
8945 
8946 			case '+':
8947 				cur += 1;
8948 				_cur_lexeme = lex_plus;
8949 
8950 				break;
8951 
8952 			case '-':
8953 				cur += 1;
8954 				_cur_lexeme = lex_minus;
8955 
8956 				break;
8957 
8958 			case '*':
8959 				cur += 1;
8960 				_cur_lexeme = lex_multiply;
8961 
8962 				break;
8963 
8964 			case '|':
8965 				cur += 1;
8966 				_cur_lexeme = lex_union;
8967 
8968 				break;
8969 
8970 			case '$':
8971 				cur += 1;
8972 
8973 				if (PUGI__IS_CHARTYPEX(*cur, ctx_start_symbol))
8974 				{
8975 					_cur_lexeme_contents.begin = cur;
8976 
8977 					while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
8978 
8979 					if (cur[0] == ':' && PUGI__IS_CHARTYPEX(cur[1], ctx_symbol)) // qname
8980 					{
8981 						cur++; // :
8982 
8983 						while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
8984 					}
8985 
8986 					_cur_lexeme_contents.end = cur;
8987 
8988 					_cur_lexeme = lex_var_ref;
8989 				}
8990 				else
8991 				{
8992 					_cur_lexeme = lex_none;
8993 				}
8994 
8995 				break;
8996 
8997 			case '(':
8998 				cur += 1;
8999 				_cur_lexeme = lex_open_brace;
9000 
9001 				break;
9002 
9003 			case ')':
9004 				cur += 1;
9005 				_cur_lexeme = lex_close_brace;
9006 
9007 				break;
9008 
9009 			case '[':
9010 				cur += 1;
9011 				_cur_lexeme = lex_open_square_brace;
9012 
9013 				break;
9014 
9015 			case ']':
9016 				cur += 1;
9017 				_cur_lexeme = lex_close_square_brace;
9018 
9019 				break;
9020 
9021 			case ',':
9022 				cur += 1;
9023 				_cur_lexeme = lex_comma;
9024 
9025 				break;
9026 
9027 			case '/':
9028 				if (*(cur+1) == '/')
9029 				{
9030 					cur += 2;
9031 					_cur_lexeme = lex_double_slash;
9032 				}
9033 				else
9034 				{
9035 					cur += 1;
9036 					_cur_lexeme = lex_slash;
9037 				}
9038 				break;
9039 
9040 			case '.':
9041 				if (*(cur+1) == '.')
9042 				{
9043 					cur += 2;
9044 					_cur_lexeme = lex_double_dot;
9045 				}
9046 				else if (PUGI__IS_CHARTYPEX(*(cur+1), ctx_digit))
9047 				{
9048 					_cur_lexeme_contents.begin = cur; // .
9049 
9050 					++cur;
9051 
9052 					while (PUGI__IS_CHARTYPEX(*cur, ctx_digit)) cur++;
9053 
9054 					_cur_lexeme_contents.end = cur;
9055 
9056 					_cur_lexeme = lex_number;
9057 				}
9058 				else
9059 				{
9060 					cur += 1;
9061 					_cur_lexeme = lex_dot;
9062 				}
9063 				break;
9064 
9065 			case '@':
9066 				cur += 1;
9067 				_cur_lexeme = lex_axis_attribute;
9068 
9069 				break;
9070 
9071 			case '"':
9072 			case '\'':
9073 			{
9074 				char_t terminator = *cur;
9075 
9076 				++cur;
9077 
9078 				_cur_lexeme_contents.begin = cur;
9079 				while (*cur && *cur != terminator) cur++;
9080 				_cur_lexeme_contents.end = cur;
9081 
9082 				if (!*cur)
9083 					_cur_lexeme = lex_none;
9084 				else
9085 				{
9086 					cur += 1;
9087 					_cur_lexeme = lex_quoted_string;
9088 				}
9089 
9090 				break;
9091 			}
9092 
9093 			case ':':
9094 				if (*(cur+1) == ':')
9095 				{
9096 					cur += 2;
9097 					_cur_lexeme = lex_double_colon;
9098 				}
9099 				else
9100 				{
9101 					_cur_lexeme = lex_none;
9102 				}
9103 				break;
9104 
9105 			default:
9106 				if (PUGI__IS_CHARTYPEX(*cur, ctx_digit))
9107 				{
9108 					_cur_lexeme_contents.begin = cur;
9109 
9110 					while (PUGI__IS_CHARTYPEX(*cur, ctx_digit)) cur++;
9111 
9112 					if (*cur == '.')
9113 					{
9114 						cur++;
9115 
9116 						while (PUGI__IS_CHARTYPEX(*cur, ctx_digit)) cur++;
9117 					}
9118 
9119 					_cur_lexeme_contents.end = cur;
9120 
9121 					_cur_lexeme = lex_number;
9122 				}
9123 				else if (PUGI__IS_CHARTYPEX(*cur, ctx_start_symbol))
9124 				{
9125 					_cur_lexeme_contents.begin = cur;
9126 
9127 					while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
9128 
9129 					if (cur[0] == ':')
9130 					{
9131 						if (cur[1] == '*') // namespace test ncname:*
9132 						{
9133 							cur += 2; // :*
9134 						}
9135 						else if (PUGI__IS_CHARTYPEX(cur[1], ctx_symbol)) // namespace test qname
9136 						{
9137 							cur++; // :
9138 
9139 							while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
9140 						}
9141 					}
9142 
9143 					_cur_lexeme_contents.end = cur;
9144 
9145 					_cur_lexeme = lex_string;
9146 				}
9147 				else
9148 				{
9149 					_cur_lexeme = lex_none;
9150 				}
9151 			}
9152 
9153 			_cur = cur;
9154 		}
9155 
current() const9156 		lexeme_t current() const
9157 		{
9158 			return _cur_lexeme;
9159 		}
9160 
current_pos() const9161 		const char_t* current_pos() const
9162 		{
9163 			return _cur_lexeme_pos;
9164 		}
9165 
contents() const9166 		const xpath_lexer_string& contents() const
9167 		{
9168 			assert(_cur_lexeme == lex_var_ref || _cur_lexeme == lex_number || _cur_lexeme == lex_string || _cur_lexeme == lex_quoted_string);
9169 
9170 			return _cur_lexeme_contents;
9171 		}
9172 	};
9173 
9174 	enum ast_type_t
9175 	{
9176 		ast_unknown,
9177 		ast_op_or,						// left or right
9178 		ast_op_and,						// left and right
9179 		ast_op_equal,					// left = right
9180 		ast_op_not_equal,				// left != right
9181 		ast_op_less,					// left < right
9182 		ast_op_greater,					// left > right
9183 		ast_op_less_or_equal,			// left <= right
9184 		ast_op_greater_or_equal,		// left >= right
9185 		ast_op_add,						// left + right
9186 		ast_op_subtract,				// left - right
9187 		ast_op_multiply,				// left * right
9188 		ast_op_divide,					// left / right
9189 		ast_op_mod,						// left % right
9190 		ast_op_negate,					// left - right
9191 		ast_op_union,					// left | right
9192 		ast_predicate,					// apply predicate to set; next points to next predicate
9193 		ast_filter,						// select * from left where right
9194 		ast_string_constant,			// string constant
9195 		ast_number_constant,			// number constant
9196 		ast_variable,					// variable
9197 		ast_func_last,					// last()
9198 		ast_func_position,				// position()
9199 		ast_func_count,					// count(left)
9200 		ast_func_id,					// id(left)
9201 		ast_func_local_name_0,			// local-name()
9202 		ast_func_local_name_1,			// local-name(left)
9203 		ast_func_namespace_uri_0,		// namespace-uri()
9204 		ast_func_namespace_uri_1,		// namespace-uri(left)
9205 		ast_func_name_0,				// name()
9206 		ast_func_name_1,				// name(left)
9207 		ast_func_string_0,				// string()
9208 		ast_func_string_1,				// string(left)
9209 		ast_func_concat,				// concat(left, right, siblings)
9210 		ast_func_starts_with,			// starts_with(left, right)
9211 		ast_func_contains,				// contains(left, right)
9212 		ast_func_substring_before,		// substring-before(left, right)
9213 		ast_func_substring_after,		// substring-after(left, right)
9214 		ast_func_substring_2,			// substring(left, right)
9215 		ast_func_substring_3,			// substring(left, right, third)
9216 		ast_func_string_length_0,		// string-length()
9217 		ast_func_string_length_1,		// string-length(left)
9218 		ast_func_normalize_space_0,		// normalize-space()
9219 		ast_func_normalize_space_1,		// normalize-space(left)
9220 		ast_func_translate,				// translate(left, right, third)
9221 		ast_func_boolean,				// boolean(left)
9222 		ast_func_not,					// not(left)
9223 		ast_func_true,					// true()
9224 		ast_func_false,					// false()
9225 		ast_func_lang,					// lang(left)
9226 		ast_func_number_0,				// number()
9227 		ast_func_number_1,				// number(left)
9228 		ast_func_sum,					// sum(left)
9229 		ast_func_floor,					// floor(left)
9230 		ast_func_ceiling,				// ceiling(left)
9231 		ast_func_round,					// round(left)
9232 		ast_step,						// process set left with step
9233 		ast_step_root,					// select root node
9234 
9235 		ast_opt_translate_table,		// translate(left, right, third) where right/third are constants
9236 		ast_opt_compare_attribute		// @name = 'string'
9237 	};
9238 
9239 	enum axis_t
9240 	{
9241 		axis_ancestor,
9242 		axis_ancestor_or_self,
9243 		axis_attribute,
9244 		axis_child,
9245 		axis_descendant,
9246 		axis_descendant_or_self,
9247 		axis_following,
9248 		axis_following_sibling,
9249 		axis_namespace,
9250 		axis_parent,
9251 		axis_preceding,
9252 		axis_preceding_sibling,
9253 		axis_self
9254 	};
9255 
9256 	enum nodetest_t
9257 	{
9258 		nodetest_none,
9259 		nodetest_name,
9260 		nodetest_type_node,
9261 		nodetest_type_comment,
9262 		nodetest_type_pi,
9263 		nodetest_type_text,
9264 		nodetest_pi,
9265 		nodetest_all,
9266 		nodetest_all_in_namespace
9267 	};
9268 
9269 	enum predicate_t
9270 	{
9271 		predicate_default,
9272 		predicate_posinv,
9273 		predicate_constant,
9274 		predicate_constant_one
9275 	};
9276 
9277 	enum nodeset_eval_t
9278 	{
9279 		nodeset_eval_all,
9280 		nodeset_eval_any,
9281 		nodeset_eval_first
9282 	};
9283 
9284 	template <axis_t N> struct axis_to_type
9285 	{
9286 		static const axis_t axis;
9287 	};
9288 
9289 	template <axis_t N> const axis_t axis_to_type<N>::axis = N;
9290 
9291 	class xpath_ast_node
9292 	{
9293 	private:
9294 		// node type
9295 		char _type;
9296 		char _rettype;
9297 
9298 		// for ast_step
9299 		char _axis;
9300 
9301 		// for ast_step/ast_predicate/ast_filter
9302 		char _test;
9303 
9304 		// tree node structure
9305 		xpath_ast_node* _left;
9306 		xpath_ast_node* _right;
9307 		xpath_ast_node* _next;
9308 
9309 		union
9310 		{
9311 			// value for ast_string_constant
9312 			const char_t* string;
9313 			// value for ast_number_constant
9314 			double number;
9315 			// variable for ast_variable
9316 			xpath_variable* variable;
9317 			// node test for ast_step (node name/namespace/node type/pi target)
9318 			const char_t* nodetest;
9319 			// table for ast_opt_translate_table
9320 			const unsigned char* table;
9321 		} _data;
9322 
9323 		xpath_ast_node(const xpath_ast_node&);
9324 		xpath_ast_node& operator=(const xpath_ast_node&);
9325 
compare_eq(xpath_ast_node * lhs,xpath_ast_node * rhs,const xpath_context & c,const xpath_stack & stack,const Comp & comp)9326 		template <class Comp> static bool compare_eq(xpath_ast_node* lhs, xpath_ast_node* rhs, const xpath_context& c, const xpath_stack& stack, const Comp& comp)
9327 		{
9328 			xpath_value_type lt = lhs->rettype(), rt = rhs->rettype();
9329 
9330 			if (lt != xpath_type_node_set && rt != xpath_type_node_set)
9331 			{
9332 				if (lt == xpath_type_boolean || rt == xpath_type_boolean)
9333 					return comp(lhs->eval_boolean(c, stack), rhs->eval_boolean(c, stack));
9334 				else if (lt == xpath_type_number || rt == xpath_type_number)
9335 					return comp(lhs->eval_number(c, stack), rhs->eval_number(c, stack));
9336 				else if (lt == xpath_type_string || rt == xpath_type_string)
9337 				{
9338 					xpath_allocator_capture cr(stack.result);
9339 
9340 					xpath_string ls = lhs->eval_string(c, stack);
9341 					xpath_string rs = rhs->eval_string(c, stack);
9342 
9343 					return comp(ls, rs);
9344 				}
9345 			}
9346 			else if (lt == xpath_type_node_set && rt == xpath_type_node_set)
9347 			{
9348 				xpath_allocator_capture cr(stack.result);
9349 
9350 				xpath_node_set_raw ls = lhs->eval_node_set(c, stack, nodeset_eval_all);
9351 				xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all);
9352 
9353 				for (const xpath_node* li = ls.begin(); li != ls.end(); ++li)
9354 					for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
9355 					{
9356 						xpath_allocator_capture cri(stack.result);
9357 
9358 						if (comp(string_value(*li, stack.result), string_value(*ri, stack.result)))
9359 							return true;
9360 					}
9361 
9362 				return false;
9363 			}
9364 			else
9365 			{
9366 				if (lt == xpath_type_node_set)
9367 				{
9368 					swap(lhs, rhs);
9369 					swap(lt, rt);
9370 				}
9371 
9372 				if (lt == xpath_type_boolean)
9373 					return comp(lhs->eval_boolean(c, stack), rhs->eval_boolean(c, stack));
9374 				else if (lt == xpath_type_number)
9375 				{
9376 					xpath_allocator_capture cr(stack.result);
9377 
9378 					double l = lhs->eval_number(c, stack);
9379 					xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all);
9380 
9381 					for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
9382 					{
9383 						xpath_allocator_capture cri(stack.result);
9384 
9385 						if (comp(l, convert_string_to_number(string_value(*ri, stack.result).c_str())))
9386 							return true;
9387 					}
9388 
9389 					return false;
9390 				}
9391 				else if (lt == xpath_type_string)
9392 				{
9393 					xpath_allocator_capture cr(stack.result);
9394 
9395 					xpath_string l = lhs->eval_string(c, stack);
9396 					xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all);
9397 
9398 					for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
9399 					{
9400 						xpath_allocator_capture cri(stack.result);
9401 
9402 						if (comp(l, string_value(*ri, stack.result)))
9403 							return true;
9404 					}
9405 
9406 					return false;
9407 				}
9408 			}
9409 
9410 			assert(false && "Wrong types");
9411 			return false;
9412 		}
9413 
eval_once(xpath_node_set::type_t type,nodeset_eval_t eval)9414 		static bool eval_once(xpath_node_set::type_t type, nodeset_eval_t eval)
9415 		{
9416 			return type == xpath_node_set::type_sorted ? eval != nodeset_eval_all : eval == nodeset_eval_any;
9417 		}
9418 
compare_rel(xpath_ast_node * lhs,xpath_ast_node * rhs,const xpath_context & c,const xpath_stack & stack,const Comp & comp)9419 		template <class Comp> static bool compare_rel(xpath_ast_node* lhs, xpath_ast_node* rhs, const xpath_context& c, const xpath_stack& stack, const Comp& comp)
9420 		{
9421 			xpath_value_type lt = lhs->rettype(), rt = rhs->rettype();
9422 
9423 			if (lt != xpath_type_node_set && rt != xpath_type_node_set)
9424 				return comp(lhs->eval_number(c, stack), rhs->eval_number(c, stack));
9425 			else if (lt == xpath_type_node_set && rt == xpath_type_node_set)
9426 			{
9427 				xpath_allocator_capture cr(stack.result);
9428 
9429 				xpath_node_set_raw ls = lhs->eval_node_set(c, stack, nodeset_eval_all);
9430 				xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all);
9431 
9432 				for (const xpath_node* li = ls.begin(); li != ls.end(); ++li)
9433 				{
9434 					xpath_allocator_capture cri(stack.result);
9435 
9436 					double l = convert_string_to_number(string_value(*li, stack.result).c_str());
9437 
9438 					for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
9439 					{
9440 						xpath_allocator_capture crii(stack.result);
9441 
9442 						if (comp(l, convert_string_to_number(string_value(*ri, stack.result).c_str())))
9443 							return true;
9444 					}
9445 				}
9446 
9447 				return false;
9448 			}
9449 			else if (lt != xpath_type_node_set && rt == xpath_type_node_set)
9450 			{
9451 				xpath_allocator_capture cr(stack.result);
9452 
9453 				double l = lhs->eval_number(c, stack);
9454 				xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all);
9455 
9456 				for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
9457 				{
9458 					xpath_allocator_capture cri(stack.result);
9459 
9460 					if (comp(l, convert_string_to_number(string_value(*ri, stack.result).c_str())))
9461 						return true;
9462 				}
9463 
9464 				return false;
9465 			}
9466 			else if (lt == xpath_type_node_set && rt != xpath_type_node_set)
9467 			{
9468 				xpath_allocator_capture cr(stack.result);
9469 
9470 				xpath_node_set_raw ls = lhs->eval_node_set(c, stack, nodeset_eval_all);
9471 				double r = rhs->eval_number(c, stack);
9472 
9473 				for (const xpath_node* li = ls.begin(); li != ls.end(); ++li)
9474 				{
9475 					xpath_allocator_capture cri(stack.result);
9476 
9477 					if (comp(convert_string_to_number(string_value(*li, stack.result).c_str()), r))
9478 						return true;
9479 				}
9480 
9481 				return false;
9482 			}
9483 			else
9484 			{
9485 				assert(false && "Wrong types");
9486 				return false;
9487 			}
9488 		}
9489 
apply_predicate_boolean(xpath_node_set_raw & ns,size_t first,xpath_ast_node * expr,const xpath_stack & stack,bool once)9490 		static void apply_predicate_boolean(xpath_node_set_raw& ns, size_t first, xpath_ast_node* expr, const xpath_stack& stack, bool once)
9491 		{
9492 			assert(ns.size() >= first);
9493 			assert(expr->rettype() != xpath_type_number);
9494 
9495 			size_t i = 1;
9496 			size_t size = ns.size() - first;
9497 
9498 			xpath_node* last = ns.begin() + first;
9499 
9500 			// remove_if... or well, sort of
9501 			for (xpath_node* it = last; it != ns.end(); ++it, ++i)
9502 			{
9503 				xpath_context c(*it, i, size);
9504 
9505 				if (expr->eval_boolean(c, stack))
9506 				{
9507 					*last++ = *it;
9508 
9509 					if (once) break;
9510 				}
9511 			}
9512 
9513 			ns.truncate(last);
9514 		}
9515 
apply_predicate_number(xpath_node_set_raw & ns,size_t first,xpath_ast_node * expr,const xpath_stack & stack,bool once)9516 		static void apply_predicate_number(xpath_node_set_raw& ns, size_t first, xpath_ast_node* expr, const xpath_stack& stack, bool once)
9517 		{
9518 			assert(ns.size() >= first);
9519 			assert(expr->rettype() == xpath_type_number);
9520 
9521 			size_t i = 1;
9522 			size_t size = ns.size() - first;
9523 
9524 			xpath_node* last = ns.begin() + first;
9525 
9526 			// remove_if... or well, sort of
9527 			for (xpath_node* it = last; it != ns.end(); ++it, ++i)
9528 			{
9529 				xpath_context c(*it, i, size);
9530 
9531 				if (expr->eval_number(c, stack) == i)
9532 				{
9533 					*last++ = *it;
9534 
9535 					if (once) break;
9536 				}
9537 			}
9538 
9539 			ns.truncate(last);
9540 		}
9541 
apply_predicate_number_const(xpath_node_set_raw & ns,size_t first,xpath_ast_node * expr,const xpath_stack & stack)9542 		static void apply_predicate_number_const(xpath_node_set_raw& ns, size_t first, xpath_ast_node* expr, const xpath_stack& stack)
9543 		{
9544 			assert(ns.size() >= first);
9545 			assert(expr->rettype() == xpath_type_number);
9546 
9547 			size_t size = ns.size() - first;
9548 
9549 			xpath_node* last = ns.begin() + first;
9550 
9551 			xpath_context c(xpath_node(), 1, size);
9552 
9553 			double er = expr->eval_number(c, stack);
9554 
9555 			if (er >= 1.0 && er <= size)
9556 			{
9557 				size_t eri = static_cast<size_t>(er);
9558 
9559 				if (er == eri)
9560 				{
9561 					xpath_node r = last[eri - 1];
9562 
9563 					*last++ = r;
9564 				}
9565 			}
9566 
9567 			ns.truncate(last);
9568 		}
9569 
apply_predicate(xpath_node_set_raw & ns,size_t first,const xpath_stack & stack,bool once)9570 		void apply_predicate(xpath_node_set_raw& ns, size_t first, const xpath_stack& stack, bool once)
9571 		{
9572 			if (ns.size() == first) return;
9573 
9574 			assert(_type == ast_filter || _type == ast_predicate);
9575 
9576 			if (_test == predicate_constant || _test == predicate_constant_one)
9577 				apply_predicate_number_const(ns, first, _right, stack);
9578 			else if (_right->rettype() == xpath_type_number)
9579 				apply_predicate_number(ns, first, _right, stack, once);
9580 			else
9581 				apply_predicate_boolean(ns, first, _right, stack, once);
9582 		}
9583 
apply_predicates(xpath_node_set_raw & ns,size_t first,const xpath_stack & stack,nodeset_eval_t eval)9584 		void apply_predicates(xpath_node_set_raw& ns, size_t first, const xpath_stack& stack, nodeset_eval_t eval)
9585 		{
9586 			if (ns.size() == first) return;
9587 
9588 			bool last_once = eval_once(ns.type(), eval);
9589 
9590 			for (xpath_ast_node* pred = _right; pred; pred = pred->_next)
9591 				pred->apply_predicate(ns, first, stack, !pred->_next && last_once);
9592 		}
9593 
step_push(xpath_node_set_raw & ns,xml_attribute_struct * a,xml_node_struct * parent,xpath_allocator * alloc)9594 		bool step_push(xpath_node_set_raw& ns, xml_attribute_struct* a, xml_node_struct* parent, xpath_allocator* alloc)
9595 		{
9596 			assert(a);
9597 
9598 			const char_t* name = a->name ? a->name + 0 : PUGIXML_TEXT("");
9599 
9600 			switch (_test)
9601 			{
9602 			case nodetest_name:
9603 				if (strequal(name, _data.nodetest) && is_xpath_attribute(name))
9604 				{
9605 					ns.push_back(xpath_node(xml_attribute(a), xml_node(parent)), alloc);
9606 					return true;
9607 				}
9608 				break;
9609 
9610 			case nodetest_type_node:
9611 			case nodetest_all:
9612 				if (is_xpath_attribute(name))
9613 				{
9614 					ns.push_back(xpath_node(xml_attribute(a), xml_node(parent)), alloc);
9615 					return true;
9616 				}
9617 				break;
9618 
9619 			case nodetest_all_in_namespace:
9620 				if (starts_with(name, _data.nodetest) && is_xpath_attribute(name))
9621 				{
9622 					ns.push_back(xpath_node(xml_attribute(a), xml_node(parent)), alloc);
9623 					return true;
9624 				}
9625 				break;
9626 
9627 			default:
9628 				;
9629 			}
9630 
9631 			return false;
9632 		}
9633 
step_push(xpath_node_set_raw & ns,xml_node_struct * n,xpath_allocator * alloc)9634 		bool step_push(xpath_node_set_raw& ns, xml_node_struct* n, xpath_allocator* alloc)
9635 		{
9636 			assert(n);
9637 
9638 			xml_node_type type = PUGI__NODETYPE(n);
9639 
9640 			switch (_test)
9641 			{
9642 			case nodetest_name:
9643 				if (type == node_element && n->name && strequal(n->name, _data.nodetest))
9644 				{
9645 					ns.push_back(xml_node(n), alloc);
9646 					return true;
9647 				}
9648 				break;
9649 
9650 			case nodetest_type_node:
9651 				ns.push_back(xml_node(n), alloc);
9652 				return true;
9653 
9654 			case nodetest_type_comment:
9655 				if (type == node_comment)
9656 				{
9657 					ns.push_back(xml_node(n), alloc);
9658 					return true;
9659 				}
9660 				break;
9661 
9662 			case nodetest_type_text:
9663 				if (type == node_pcdata || type == node_cdata)
9664 				{
9665 					ns.push_back(xml_node(n), alloc);
9666 					return true;
9667 				}
9668 				break;
9669 
9670 			case nodetest_type_pi:
9671 				if (type == node_pi)
9672 				{
9673 					ns.push_back(xml_node(n), alloc);
9674 					return true;
9675 				}
9676 				break;
9677 
9678 			case nodetest_pi:
9679 				if (type == node_pi && n->name && strequal(n->name, _data.nodetest))
9680 				{
9681 					ns.push_back(xml_node(n), alloc);
9682 					return true;
9683 				}
9684 				break;
9685 
9686 			case nodetest_all:
9687 				if (type == node_element)
9688 				{
9689 					ns.push_back(xml_node(n), alloc);
9690 					return true;
9691 				}
9692 				break;
9693 
9694 			case nodetest_all_in_namespace:
9695 				if (type == node_element && n->name && starts_with(n->name, _data.nodetest))
9696 				{
9697 					ns.push_back(xml_node(n), alloc);
9698 					return true;
9699 				}
9700 				break;
9701 
9702 			default:
9703 				assert(false && "Unknown axis");
9704 			}
9705 
9706 			return false;
9707 		}
9708 
step_fill(xpath_node_set_raw & ns,xml_node_struct * n,xpath_allocator * alloc,bool once,T)9709 		template <class T> void step_fill(xpath_node_set_raw& ns, xml_node_struct* n, xpath_allocator* alloc, bool once, T)
9710 		{
9711 			const axis_t axis = T::axis;
9712 
9713 			switch (axis)
9714 			{
9715 			case axis_attribute:
9716 			{
9717 				for (xml_attribute_struct* a = n->first_attribute; a; a = a->next_attribute)
9718 					if (step_push(ns, a, n, alloc) & once)
9719 						return;
9720 
9721 				break;
9722 			}
9723 
9724 			case axis_child:
9725 			{
9726 				for (xml_node_struct* c = n->first_child; c; c = c->next_sibling)
9727 					if (step_push(ns, c, alloc) & once)
9728 						return;
9729 
9730 				break;
9731 			}
9732 
9733 			case axis_descendant:
9734 			case axis_descendant_or_self:
9735 			{
9736 				if (axis == axis_descendant_or_self)
9737 					if (step_push(ns, n, alloc) & once)
9738 						return;
9739 
9740 				xml_node_struct* cur = n->first_child;
9741 
9742 				while (cur)
9743 				{
9744 					if (step_push(ns, cur, alloc) & once)
9745 						return;
9746 
9747 					if (cur->first_child)
9748 						cur = cur->first_child;
9749 					else
9750 					{
9751 						while (!cur->next_sibling)
9752 						{
9753 							cur = cur->parent;
9754 
9755 							if (cur == n) return;
9756 						}
9757 
9758 						cur = cur->next_sibling;
9759 					}
9760 				}
9761 
9762 				break;
9763 			}
9764 
9765 			case axis_following_sibling:
9766 			{
9767 				for (xml_node_struct* c = n->next_sibling; c; c = c->next_sibling)
9768 					if (step_push(ns, c, alloc) & once)
9769 						return;
9770 
9771 				break;
9772 			}
9773 
9774 			case axis_preceding_sibling:
9775 			{
9776 				for (xml_node_struct* c = n->prev_sibling_c; c->next_sibling; c = c->prev_sibling_c)
9777 					if (step_push(ns, c, alloc) & once)
9778 						return;
9779 
9780 				break;
9781 			}
9782 
9783 			case axis_following:
9784 			{
9785 				xml_node_struct* cur = n;
9786 
9787 				// exit from this node so that we don't include descendants
9788 				while (!cur->next_sibling)
9789 				{
9790 					cur = cur->parent;
9791 
9792 					if (!cur) return;
9793 				}
9794 
9795 				cur = cur->next_sibling;
9796 
9797 				while (cur)
9798 				{
9799 					if (step_push(ns, cur, alloc) & once)
9800 						return;
9801 
9802 					if (cur->first_child)
9803 						cur = cur->first_child;
9804 					else
9805 					{
9806 						while (!cur->next_sibling)
9807 						{
9808 							cur = cur->parent;
9809 
9810 							if (!cur) return;
9811 						}
9812 
9813 						cur = cur->next_sibling;
9814 					}
9815 				}
9816 
9817 				break;
9818 			}
9819 
9820 			case axis_preceding:
9821 			{
9822 				xml_node_struct* cur = n;
9823 
9824 				// exit from this node so that we don't include descendants
9825 				while (!cur->prev_sibling_c->next_sibling)
9826 				{
9827 					cur = cur->parent;
9828 
9829 					if (!cur) return;
9830 				}
9831 
9832 				cur = cur->prev_sibling_c;
9833 
9834 				while (cur)
9835 				{
9836 					if (cur->first_child)
9837 						cur = cur->first_child->prev_sibling_c;
9838 					else
9839 					{
9840 						// leaf node, can't be ancestor
9841 						if (step_push(ns, cur, alloc) & once)
9842 							return;
9843 
9844 						while (!cur->prev_sibling_c->next_sibling)
9845 						{
9846 							cur = cur->parent;
9847 
9848 							if (!cur) return;
9849 
9850 							if (!node_is_ancestor(cur, n))
9851 								if (step_push(ns, cur, alloc) & once)
9852 									return;
9853 						}
9854 
9855 						cur = cur->prev_sibling_c;
9856 					}
9857 				}
9858 
9859 				break;
9860 			}
9861 
9862 			case axis_ancestor:
9863 			case axis_ancestor_or_self:
9864 			{
9865 				if (axis == axis_ancestor_or_self)
9866 					if (step_push(ns, n, alloc) & once)
9867 						return;
9868 
9869 				xml_node_struct* cur = n->parent;
9870 
9871 				while (cur)
9872 				{
9873 					if (step_push(ns, cur, alloc) & once)
9874 						return;
9875 
9876 					cur = cur->parent;
9877 				}
9878 
9879 				break;
9880 			}
9881 
9882 			case axis_self:
9883 			{
9884 				step_push(ns, n, alloc);
9885 
9886 				break;
9887 			}
9888 
9889 			case axis_parent:
9890 			{
9891 				if (n->parent)
9892 					step_push(ns, n->parent, alloc);
9893 
9894 				break;
9895 			}
9896 
9897 			default:
9898 				assert(false && "Unimplemented axis");
9899 			}
9900 		}
9901 
step_fill(xpath_node_set_raw & ns,xml_attribute_struct * a,xml_node_struct * p,xpath_allocator * alloc,bool once,T v)9902 		template <class T> void step_fill(xpath_node_set_raw& ns, xml_attribute_struct* a, xml_node_struct* p, xpath_allocator* alloc, bool once, T v)
9903 		{
9904 			const axis_t axis = T::axis;
9905 
9906 			switch (axis)
9907 			{
9908 			case axis_ancestor:
9909 			case axis_ancestor_or_self:
9910 			{
9911 				if (axis == axis_ancestor_or_self && _test == nodetest_type_node) // reject attributes based on principal node type test
9912 					if (step_push(ns, a, p, alloc) & once)
9913 						return;
9914 
9915 				xml_node_struct* cur = p;
9916 
9917 				while (cur)
9918 				{
9919 					if (step_push(ns, cur, alloc) & once)
9920 						return;
9921 
9922 					cur = cur->parent;
9923 				}
9924 
9925 				break;
9926 			}
9927 
9928 			case axis_descendant_or_self:
9929 			case axis_self:
9930 			{
9931 				if (_test == nodetest_type_node) // reject attributes based on principal node type test
9932 					step_push(ns, a, p, alloc);
9933 
9934 				break;
9935 			}
9936 
9937 			case axis_following:
9938 			{
9939 				xml_node_struct* cur = p;
9940 
9941 				while (cur)
9942 				{
9943 					if (cur->first_child)
9944 						cur = cur->first_child;
9945 					else
9946 					{
9947 						while (!cur->next_sibling)
9948 						{
9949 							cur = cur->parent;
9950 
9951 							if (!cur) return;
9952 						}
9953 
9954 						cur = cur->next_sibling;
9955 					}
9956 
9957 					if (step_push(ns, cur, alloc) & once)
9958 						return;
9959 				}
9960 
9961 				break;
9962 			}
9963 
9964 			case axis_parent:
9965 			{
9966 				step_push(ns, p, alloc);
9967 
9968 				break;
9969 			}
9970 
9971 			case axis_preceding:
9972 			{
9973 				// preceding:: axis does not include attribute nodes and attribute ancestors (they are the same as parent's ancestors), so we can reuse node preceding
9974 				step_fill(ns, p, alloc, once, v);
9975 				break;
9976 			}
9977 
9978 			default:
9979 				assert(false && "Unimplemented axis");
9980 			}
9981 		}
9982 
step_fill(xpath_node_set_raw & ns,const xpath_node & xn,xpath_allocator * alloc,bool once,T v)9983 		template <class T> void step_fill(xpath_node_set_raw& ns, const xpath_node& xn, xpath_allocator* alloc, bool once, T v)
9984 		{
9985 			const axis_t axis = T::axis;
9986 			const bool axis_has_attributes = (axis == axis_ancestor || axis == axis_ancestor_or_self || axis == axis_descendant_or_self || axis == axis_following || axis == axis_parent || axis == axis_preceding || axis == axis_self);
9987 
9988 			if (xn.node())
9989 				step_fill(ns, xn.node().internal_object(), alloc, once, v);
9990 			else if (axis_has_attributes && xn.attribute() && xn.parent())
9991 				step_fill(ns, xn.attribute().internal_object(), xn.parent().internal_object(), alloc, once, v);
9992 		}
9993 
step_do(const xpath_context & c,const xpath_stack & stack,nodeset_eval_t eval,T v)9994 		template <class T> xpath_node_set_raw step_do(const xpath_context& c, const xpath_stack& stack, nodeset_eval_t eval, T v)
9995 		{
9996 			const axis_t axis = T::axis;
9997 			const bool axis_reverse = (axis == axis_ancestor || axis == axis_ancestor_or_self || axis == axis_preceding || axis == axis_preceding_sibling);
9998 			const xpath_node_set::type_t axis_type = axis_reverse ? xpath_node_set::type_sorted_reverse : xpath_node_set::type_sorted;
9999 
10000 			bool once =
10001 				(axis == axis_attribute && _test == nodetest_name) ||
10002 				(!_right && eval_once(axis_type, eval)) ||
10003 				(_right && !_right->_next && _right->_test == predicate_constant_one);
10004 
10005 			xpath_node_set_raw ns;
10006 			ns.set_type(axis_type);
10007 
10008 			if (_left)
10009 			{
10010 				xpath_node_set_raw s = _left->eval_node_set(c, stack, nodeset_eval_all);
10011 
10012 				// self axis preserves the original order
10013 				if (axis == axis_self) ns.set_type(s.type());
10014 
10015 				for (const xpath_node* it = s.begin(); it != s.end(); ++it)
10016 				{
10017 					size_t size = ns.size();
10018 
10019 					// in general, all axes generate elements in a particular order, but there is no order guarantee if axis is applied to two nodes
10020 					if (axis != axis_self && size != 0) ns.set_type(xpath_node_set::type_unsorted);
10021 
10022 					step_fill(ns, *it, stack.result, once, v);
10023 					if (_right) apply_predicates(ns, size, stack, eval);
10024 				}
10025 			}
10026 			else
10027 			{
10028 				step_fill(ns, c.n, stack.result, once, v);
10029 				if (_right) apply_predicates(ns, 0, stack, eval);
10030 			}
10031 
10032 			// child, attribute and self axes always generate unique set of nodes
10033 			// for other axis, if the set stayed sorted, it stayed unique because the traversal algorithms do not visit the same node twice
10034 			if (axis != axis_child && axis != axis_attribute && axis != axis_self && ns.type() == xpath_node_set::type_unsorted)
10035 				ns.remove_duplicates();
10036 
10037 			return ns;
10038 		}
10039 
10040 	public:
xpath_ast_node(ast_type_t type,xpath_value_type rettype_,const char_t * value)10041 		xpath_ast_node(ast_type_t type, xpath_value_type rettype_, const char_t* value):
10042 			_type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(0), _right(0), _next(0)
10043 		{
10044 			assert(type == ast_string_constant);
10045 			_data.string = value;
10046 		}
10047 
xpath_ast_node(ast_type_t type,xpath_value_type rettype_,double value)10048 		xpath_ast_node(ast_type_t type, xpath_value_type rettype_, double value):
10049 			_type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(0), _right(0), _next(0)
10050 		{
10051 			assert(type == ast_number_constant);
10052 			_data.number = value;
10053 		}
10054 
xpath_ast_node(ast_type_t type,xpath_value_type rettype_,xpath_variable * value)10055 		xpath_ast_node(ast_type_t type, xpath_value_type rettype_, xpath_variable* value):
10056 			_type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(0), _right(0), _next(0)
10057 		{
10058 			assert(type == ast_variable);
10059 			_data.variable = value;
10060 		}
10061 
xpath_ast_node(ast_type_t type,xpath_value_type rettype_,xpath_ast_node * left=0,xpath_ast_node * right=0)10062 		xpath_ast_node(ast_type_t type, xpath_value_type rettype_, xpath_ast_node* left = 0, xpath_ast_node* right = 0):
10063 			_type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(left), _right(right), _next(0)
10064 		{
10065 		}
10066 
xpath_ast_node(ast_type_t type,xpath_ast_node * left,axis_t axis,nodetest_t test,const char_t * contents)10067 		xpath_ast_node(ast_type_t type, xpath_ast_node* left, axis_t axis, nodetest_t test, const char_t* contents):
10068 			_type(static_cast<char>(type)), _rettype(xpath_type_node_set), _axis(static_cast<char>(axis)), _test(static_cast<char>(test)), _left(left), _right(0), _next(0)
10069 		{
10070 			assert(type == ast_step);
10071 			_data.nodetest = contents;
10072 		}
10073 
xpath_ast_node(ast_type_t type,xpath_ast_node * left,xpath_ast_node * right,predicate_t test)10074 		xpath_ast_node(ast_type_t type, xpath_ast_node* left, xpath_ast_node* right, predicate_t test):
10075 			_type(static_cast<char>(type)), _rettype(xpath_type_node_set), _axis(0), _test(static_cast<char>(test)), _left(left), _right(right), _next(0)
10076 		{
10077 			assert(type == ast_filter || type == ast_predicate);
10078 		}
10079 
set_next(xpath_ast_node * value)10080 		void set_next(xpath_ast_node* value)
10081 		{
10082 			_next = value;
10083 		}
10084 
set_right(xpath_ast_node * value)10085 		void set_right(xpath_ast_node* value)
10086 		{
10087 			_right = value;
10088 		}
10089 
eval_boolean(const xpath_context & c,const xpath_stack & stack)10090 		bool eval_boolean(const xpath_context& c, const xpath_stack& stack)
10091 		{
10092 			switch (_type)
10093 			{
10094 			case ast_op_or:
10095 				return _left->eval_boolean(c, stack) || _right->eval_boolean(c, stack);
10096 
10097 			case ast_op_and:
10098 				return _left->eval_boolean(c, stack) && _right->eval_boolean(c, stack);
10099 
10100 			case ast_op_equal:
10101 				return compare_eq(_left, _right, c, stack, equal_to());
10102 
10103 			case ast_op_not_equal:
10104 				return compare_eq(_left, _right, c, stack, not_equal_to());
10105 
10106 			case ast_op_less:
10107 				return compare_rel(_left, _right, c, stack, less());
10108 
10109 			case ast_op_greater:
10110 				return compare_rel(_right, _left, c, stack, less());
10111 
10112 			case ast_op_less_or_equal:
10113 				return compare_rel(_left, _right, c, stack, less_equal());
10114 
10115 			case ast_op_greater_or_equal:
10116 				return compare_rel(_right, _left, c, stack, less_equal());
10117 
10118 			case ast_func_starts_with:
10119 			{
10120 				xpath_allocator_capture cr(stack.result);
10121 
10122 				xpath_string lr = _left->eval_string(c, stack);
10123 				xpath_string rr = _right->eval_string(c, stack);
10124 
10125 				return starts_with(lr.c_str(), rr.c_str());
10126 			}
10127 
10128 			case ast_func_contains:
10129 			{
10130 				xpath_allocator_capture cr(stack.result);
10131 
10132 				xpath_string lr = _left->eval_string(c, stack);
10133 				xpath_string rr = _right->eval_string(c, stack);
10134 
10135 				return find_substring(lr.c_str(), rr.c_str()) != 0;
10136 			}
10137 
10138 			case ast_func_boolean:
10139 				return _left->eval_boolean(c, stack);
10140 
10141 			case ast_func_not:
10142 				return !_left->eval_boolean(c, stack);
10143 
10144 			case ast_func_true:
10145 				return true;
10146 
10147 			case ast_func_false:
10148 				return false;
10149 
10150 			case ast_func_lang:
10151 			{
10152 				if (c.n.attribute()) return false;
10153 
10154 				xpath_allocator_capture cr(stack.result);
10155 
10156 				xpath_string lang = _left->eval_string(c, stack);
10157 
10158 				for (xml_node n = c.n.node(); n; n = n.parent())
10159 				{
10160 					xml_attribute a = n.attribute(PUGIXML_TEXT("xml:lang"));
10161 
10162 					if (a)
10163 					{
10164 						const char_t* value = a.value();
10165 
10166 						// strnicmp / strncasecmp is not portable
10167 						for (const char_t* lit = lang.c_str(); *lit; ++lit)
10168 						{
10169 							if (tolower_ascii(*lit) != tolower_ascii(*value)) return false;
10170 							++value;
10171 						}
10172 
10173 						return *value == 0 || *value == '-';
10174 					}
10175 				}
10176 
10177 				return false;
10178 			}
10179 
10180 			case ast_opt_compare_attribute:
10181 			{
10182 				const char_t* value = (_right->_type == ast_string_constant) ? _right->_data.string : _right->_data.variable->get_string();
10183 
10184 				xml_attribute attr = c.n.node().attribute(_left->_data.nodetest);
10185 
10186 				return attr && strequal(attr.value(), value) && is_xpath_attribute(attr.name());
10187 			}
10188 
10189 			case ast_variable:
10190 			{
10191 				assert(_rettype == _data.variable->type());
10192 
10193 				if (_rettype == xpath_type_boolean)
10194 					return _data.variable->get_boolean();
10195 
10196 				// fallthrough to type conversion
10197 			}
10198 
10199 			default:
10200 			{
10201 				switch (_rettype)
10202 				{
10203 				case xpath_type_number:
10204 					return convert_number_to_boolean(eval_number(c, stack));
10205 
10206 				case xpath_type_string:
10207 				{
10208 					xpath_allocator_capture cr(stack.result);
10209 
10210 					return !eval_string(c, stack).empty();
10211 				}
10212 
10213 				case xpath_type_node_set:
10214 				{
10215 					xpath_allocator_capture cr(stack.result);
10216 
10217 					return !eval_node_set(c, stack, nodeset_eval_any).empty();
10218 				}
10219 
10220 				default:
10221 					assert(false && "Wrong expression for return type boolean");
10222 					return false;
10223 				}
10224 			}
10225 			}
10226 		}
10227 
eval_number(const xpath_context & c,const xpath_stack & stack)10228 		double eval_number(const xpath_context& c, const xpath_stack& stack)
10229 		{
10230 			switch (_type)
10231 			{
10232 			case ast_op_add:
10233 				return _left->eval_number(c, stack) + _right->eval_number(c, stack);
10234 
10235 			case ast_op_subtract:
10236 				return _left->eval_number(c, stack) - _right->eval_number(c, stack);
10237 
10238 			case ast_op_multiply:
10239 				return _left->eval_number(c, stack) * _right->eval_number(c, stack);
10240 
10241 			case ast_op_divide:
10242 				return _left->eval_number(c, stack) / _right->eval_number(c, stack);
10243 
10244 			case ast_op_mod:
10245 				return fmod(_left->eval_number(c, stack), _right->eval_number(c, stack));
10246 
10247 			case ast_op_negate:
10248 				return -_left->eval_number(c, stack);
10249 
10250 			case ast_number_constant:
10251 				return _data.number;
10252 
10253 			case ast_func_last:
10254 				return static_cast<double>(c.size);
10255 
10256 			case ast_func_position:
10257 				return static_cast<double>(c.position);
10258 
10259 			case ast_func_count:
10260 			{
10261 				xpath_allocator_capture cr(stack.result);
10262 
10263 				return static_cast<double>(_left->eval_node_set(c, stack, nodeset_eval_all).size());
10264 			}
10265 
10266 			case ast_func_string_length_0:
10267 			{
10268 				xpath_allocator_capture cr(stack.result);
10269 
10270 				return static_cast<double>(string_value(c.n, stack.result).length());
10271 			}
10272 
10273 			case ast_func_string_length_1:
10274 			{
10275 				xpath_allocator_capture cr(stack.result);
10276 
10277 				return static_cast<double>(_left->eval_string(c, stack).length());
10278 			}
10279 
10280 			case ast_func_number_0:
10281 			{
10282 				xpath_allocator_capture cr(stack.result);
10283 
10284 				return convert_string_to_number(string_value(c.n, stack.result).c_str());
10285 			}
10286 
10287 			case ast_func_number_1:
10288 				return _left->eval_number(c, stack);
10289 
10290 			case ast_func_sum:
10291 			{
10292 				xpath_allocator_capture cr(stack.result);
10293 
10294 				double r = 0;
10295 
10296 				xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_all);
10297 
10298 				for (const xpath_node* it = ns.begin(); it != ns.end(); ++it)
10299 				{
10300 					xpath_allocator_capture cri(stack.result);
10301 
10302 					r += convert_string_to_number(string_value(*it, stack.result).c_str());
10303 				}
10304 
10305 				return r;
10306 			}
10307 
10308 			case ast_func_floor:
10309 			{
10310 				double r = _left->eval_number(c, stack);
10311 
10312 				return r == r ? floor(r) : r;
10313 			}
10314 
10315 			case ast_func_ceiling:
10316 			{
10317 				double r = _left->eval_number(c, stack);
10318 
10319 				return r == r ? ceil(r) : r;
10320 			}
10321 
10322 			case ast_func_round:
10323 				return round_nearest_nzero(_left->eval_number(c, stack));
10324 
10325 			case ast_variable:
10326 			{
10327 				assert(_rettype == _data.variable->type());
10328 
10329 				if (_rettype == xpath_type_number)
10330 					return _data.variable->get_number();
10331 
10332 				// fallthrough to type conversion
10333 			}
10334 
10335 			default:
10336 			{
10337 				switch (_rettype)
10338 				{
10339 				case xpath_type_boolean:
10340 					return eval_boolean(c, stack) ? 1 : 0;
10341 
10342 				case xpath_type_string:
10343 				{
10344 					xpath_allocator_capture cr(stack.result);
10345 
10346 					return convert_string_to_number(eval_string(c, stack).c_str());
10347 				}
10348 
10349 				case xpath_type_node_set:
10350 				{
10351 					xpath_allocator_capture cr(stack.result);
10352 
10353 					return convert_string_to_number(eval_string(c, stack).c_str());
10354 				}
10355 
10356 				default:
10357 					assert(false && "Wrong expression for return type number");
10358 					return 0;
10359 				}
10360 
10361 			}
10362 			}
10363 		}
10364 
eval_string_concat(const xpath_context & c,const xpath_stack & stack)10365 		xpath_string eval_string_concat(const xpath_context& c, const xpath_stack& stack)
10366 		{
10367 			assert(_type == ast_func_concat);
10368 
10369 			xpath_allocator_capture ct(stack.temp);
10370 
10371 			// count the string number
10372 			size_t count = 1;
10373 			for (xpath_ast_node* nc = _right; nc; nc = nc->_next) count++;
10374 
10375 			// gather all strings
10376 			xpath_string static_buffer[4];
10377 			xpath_string* buffer = static_buffer;
10378 
10379 			// allocate on-heap for large concats
10380 			if (count > sizeof(static_buffer) / sizeof(static_buffer[0]))
10381 			{
10382 				buffer = static_cast<xpath_string*>(stack.temp->allocate(count * sizeof(xpath_string)));
10383 				assert(buffer);
10384 			}
10385 
10386 			// evaluate all strings to temporary stack
10387 			xpath_stack swapped_stack = {stack.temp, stack.result};
10388 
10389 			buffer[0] = _left->eval_string(c, swapped_stack);
10390 
10391 			size_t pos = 1;
10392 			for (xpath_ast_node* n = _right; n; n = n->_next, ++pos) buffer[pos] = n->eval_string(c, swapped_stack);
10393 			assert(pos == count);
10394 
10395 			// get total length
10396 			size_t length = 0;
10397 			for (size_t i = 0; i < count; ++i) length += buffer[i].length();
10398 
10399 			// create final string
10400 			char_t* result = static_cast<char_t*>(stack.result->allocate((length + 1) * sizeof(char_t)));
10401 			assert(result);
10402 
10403 			char_t* ri = result;
10404 
10405 			for (size_t j = 0; j < count; ++j)
10406 				for (const char_t* bi = buffer[j].c_str(); *bi; ++bi)
10407 					*ri++ = *bi;
10408 
10409 			*ri = 0;
10410 
10411 			return xpath_string::from_heap_preallocated(result, ri);
10412 		}
10413 
eval_string(const xpath_context & c,const xpath_stack & stack)10414 		xpath_string eval_string(const xpath_context& c, const xpath_stack& stack)
10415 		{
10416 			switch (_type)
10417 			{
10418 			case ast_string_constant:
10419 				return xpath_string::from_const(_data.string);
10420 
10421 			case ast_func_local_name_0:
10422 			{
10423 				xpath_node na = c.n;
10424 
10425 				return xpath_string::from_const(local_name(na));
10426 			}
10427 
10428 			case ast_func_local_name_1:
10429 			{
10430 				xpath_allocator_capture cr(stack.result);
10431 
10432 				xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_first);
10433 				xpath_node na = ns.first();
10434 
10435 				return xpath_string::from_const(local_name(na));
10436 			}
10437 
10438 			case ast_func_name_0:
10439 			{
10440 				xpath_node na = c.n;
10441 
10442 				return xpath_string::from_const(qualified_name(na));
10443 			}
10444 
10445 			case ast_func_name_1:
10446 			{
10447 				xpath_allocator_capture cr(stack.result);
10448 
10449 				xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_first);
10450 				xpath_node na = ns.first();
10451 
10452 				return xpath_string::from_const(qualified_name(na));
10453 			}
10454 
10455 			case ast_func_namespace_uri_0:
10456 			{
10457 				xpath_node na = c.n;
10458 
10459 				return xpath_string::from_const(namespace_uri(na));
10460 			}
10461 
10462 			case ast_func_namespace_uri_1:
10463 			{
10464 				xpath_allocator_capture cr(stack.result);
10465 
10466 				xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_first);
10467 				xpath_node na = ns.first();
10468 
10469 				return xpath_string::from_const(namespace_uri(na));
10470 			}
10471 
10472 			case ast_func_string_0:
10473 				return string_value(c.n, stack.result);
10474 
10475 			case ast_func_string_1:
10476 				return _left->eval_string(c, stack);
10477 
10478 			case ast_func_concat:
10479 				return eval_string_concat(c, stack);
10480 
10481 			case ast_func_substring_before:
10482 			{
10483 				xpath_allocator_capture cr(stack.temp);
10484 
10485 				xpath_stack swapped_stack = {stack.temp, stack.result};
10486 
10487 				xpath_string s = _left->eval_string(c, swapped_stack);
10488 				xpath_string p = _right->eval_string(c, swapped_stack);
10489 
10490 				const char_t* pos = find_substring(s.c_str(), p.c_str());
10491 
10492 				return pos ? xpath_string::from_heap(s.c_str(), pos, stack.result) : xpath_string();
10493 			}
10494 
10495 			case ast_func_substring_after:
10496 			{
10497 				xpath_allocator_capture cr(stack.temp);
10498 
10499 				xpath_stack swapped_stack = {stack.temp, stack.result};
10500 
10501 				xpath_string s = _left->eval_string(c, swapped_stack);
10502 				xpath_string p = _right->eval_string(c, swapped_stack);
10503 
10504 				const char_t* pos = find_substring(s.c_str(), p.c_str());
10505 				if (!pos) return xpath_string();
10506 
10507 				const char_t* rbegin = pos + p.length();
10508 				const char_t* rend = s.c_str() + s.length();
10509 
10510 				return s.uses_heap() ? xpath_string::from_heap(rbegin, rend, stack.result) : xpath_string::from_const(rbegin);
10511 			}
10512 
10513 			case ast_func_substring_2:
10514 			{
10515 				xpath_allocator_capture cr(stack.temp);
10516 
10517 				xpath_stack swapped_stack = {stack.temp, stack.result};
10518 
10519 				xpath_string s = _left->eval_string(c, swapped_stack);
10520 				size_t s_length = s.length();
10521 
10522 				double first = round_nearest(_right->eval_number(c, stack));
10523 
10524 				if (is_nan(first)) return xpath_string(); // NaN
10525 				else if (first >= s_length + 1) return xpath_string();
10526 
10527 				size_t pos = first < 1 ? 1 : static_cast<size_t>(first);
10528 				assert(1 <= pos && pos <= s_length + 1);
10529 
10530 				const char_t* rbegin = s.c_str() + (pos - 1);
10531 				const char_t* rend = s.c_str() + s.length();
10532 
10533 				return s.uses_heap() ? xpath_string::from_heap(rbegin, rend, stack.result) : xpath_string::from_const(rbegin);
10534 			}
10535 
10536 			case ast_func_substring_3:
10537 			{
10538 				xpath_allocator_capture cr(stack.temp);
10539 
10540 				xpath_stack swapped_stack = {stack.temp, stack.result};
10541 
10542 				xpath_string s = _left->eval_string(c, swapped_stack);
10543 				size_t s_length = s.length();
10544 
10545 				double first = round_nearest(_right->eval_number(c, stack));
10546 				double last = first + round_nearest(_right->_next->eval_number(c, stack));
10547 
10548 				if (is_nan(first) || is_nan(last)) return xpath_string();
10549 				else if (first >= s_length + 1) return xpath_string();
10550 				else if (first >= last) return xpath_string();
10551 				else if (last < 1) return xpath_string();
10552 
10553 				size_t pos = first < 1 ? 1 : static_cast<size_t>(first);
10554 				size_t end = last >= s_length + 1 ? s_length + 1 : static_cast<size_t>(last);
10555 
10556 				assert(1 <= pos && pos <= end && end <= s_length + 1);
10557 				const char_t* rbegin = s.c_str() + (pos - 1);
10558 				const char_t* rend = s.c_str() + (end - 1);
10559 
10560 				return (end == s_length + 1 && !s.uses_heap()) ? xpath_string::from_const(rbegin) : xpath_string::from_heap(rbegin, rend, stack.result);
10561 			}
10562 
10563 			case ast_func_normalize_space_0:
10564 			{
10565 				xpath_string s = string_value(c.n, stack.result);
10566 
10567 				char_t* begin = s.data(stack.result);
10568 				char_t* end = normalize_space(begin);
10569 
10570 				return xpath_string::from_heap_preallocated(begin, end);
10571 			}
10572 
10573 			case ast_func_normalize_space_1:
10574 			{
10575 				xpath_string s = _left->eval_string(c, stack);
10576 
10577 				char_t* begin = s.data(stack.result);
10578 				char_t* end = normalize_space(begin);
10579 
10580 				return xpath_string::from_heap_preallocated(begin, end);
10581 			}
10582 
10583 			case ast_func_translate:
10584 			{
10585 				xpath_allocator_capture cr(stack.temp);
10586 
10587 				xpath_stack swapped_stack = {stack.temp, stack.result};
10588 
10589 				xpath_string s = _left->eval_string(c, stack);
10590 				xpath_string from = _right->eval_string(c, swapped_stack);
10591 				xpath_string to = _right->_next->eval_string(c, swapped_stack);
10592 
10593 				char_t* begin = s.data(stack.result);
10594 				char_t* end = translate(begin, from.c_str(), to.c_str(), to.length());
10595 
10596 				return xpath_string::from_heap_preallocated(begin, end);
10597 			}
10598 
10599 			case ast_opt_translate_table:
10600 			{
10601 				xpath_string s = _left->eval_string(c, stack);
10602 
10603 				char_t* begin = s.data(stack.result);
10604 				char_t* end = translate_table(begin, _data.table);
10605 
10606 				return xpath_string::from_heap_preallocated(begin, end);
10607 			}
10608 
10609 			case ast_variable:
10610 			{
10611 				assert(_rettype == _data.variable->type());
10612 
10613 				if (_rettype == xpath_type_string)
10614 					return xpath_string::from_const(_data.variable->get_string());
10615 
10616 				// fallthrough to type conversion
10617 			}
10618 
10619 			default:
10620 			{
10621 				switch (_rettype)
10622 				{
10623 				case xpath_type_boolean:
10624 					return xpath_string::from_const(eval_boolean(c, stack) ? PUGIXML_TEXT("true") : PUGIXML_TEXT("false"));
10625 
10626 				case xpath_type_number:
10627 					return convert_number_to_string(eval_number(c, stack), stack.result);
10628 
10629 				case xpath_type_node_set:
10630 				{
10631 					xpath_allocator_capture cr(stack.temp);
10632 
10633 					xpath_stack swapped_stack = {stack.temp, stack.result};
10634 
10635 					xpath_node_set_raw ns = eval_node_set(c, swapped_stack, nodeset_eval_first);
10636 					return ns.empty() ? xpath_string() : string_value(ns.first(), stack.result);
10637 				}
10638 
10639 				default:
10640 					assert(false && "Wrong expression for return type string");
10641 					return xpath_string();
10642 				}
10643 			}
10644 			}
10645 		}
10646 
eval_node_set(const xpath_context & c,const xpath_stack & stack,nodeset_eval_t eval)10647 		xpath_node_set_raw eval_node_set(const xpath_context& c, const xpath_stack& stack, nodeset_eval_t eval)
10648 		{
10649 			switch (_type)
10650 			{
10651 			case ast_op_union:
10652 			{
10653 				xpath_allocator_capture cr(stack.temp);
10654 
10655 				xpath_stack swapped_stack = {stack.temp, stack.result};
10656 
10657 				xpath_node_set_raw ls = _left->eval_node_set(c, swapped_stack, eval);
10658 				xpath_node_set_raw rs = _right->eval_node_set(c, stack, eval);
10659 
10660 				// we can optimize merging two sorted sets, but this is a very rare operation, so don't bother
10661 				rs.set_type(xpath_node_set::type_unsorted);
10662 
10663 				rs.append(ls.begin(), ls.end(), stack.result);
10664 				rs.remove_duplicates();
10665 
10666 				return rs;
10667 			}
10668 
10669 			case ast_filter:
10670 			{
10671 				xpath_node_set_raw set = _left->eval_node_set(c, stack, _test == predicate_constant_one ? nodeset_eval_first : nodeset_eval_all);
10672 
10673 				// either expression is a number or it contains position() call; sort by document order
10674 				if (_test != predicate_posinv) set.sort_do();
10675 
10676 				bool once = eval_once(set.type(), eval);
10677 
10678 				apply_predicate(set, 0, stack, once);
10679 
10680 				return set;
10681 			}
10682 
10683 			case ast_func_id:
10684 				return xpath_node_set_raw();
10685 
10686 			case ast_step:
10687 			{
10688 				switch (_axis)
10689 				{
10690 				case axis_ancestor:
10691 					return step_do(c, stack, eval, axis_to_type<axis_ancestor>());
10692 
10693 				case axis_ancestor_or_self:
10694 					return step_do(c, stack, eval, axis_to_type<axis_ancestor_or_self>());
10695 
10696 				case axis_attribute:
10697 					return step_do(c, stack, eval, axis_to_type<axis_attribute>());
10698 
10699 				case axis_child:
10700 					return step_do(c, stack, eval, axis_to_type<axis_child>());
10701 
10702 				case axis_descendant:
10703 					return step_do(c, stack, eval, axis_to_type<axis_descendant>());
10704 
10705 				case axis_descendant_or_self:
10706 					return step_do(c, stack, eval, axis_to_type<axis_descendant_or_self>());
10707 
10708 				case axis_following:
10709 					return step_do(c, stack, eval, axis_to_type<axis_following>());
10710 
10711 				case axis_following_sibling:
10712 					return step_do(c, stack, eval, axis_to_type<axis_following_sibling>());
10713 
10714 				case axis_namespace:
10715 					// namespaced axis is not supported
10716 					return xpath_node_set_raw();
10717 
10718 				case axis_parent:
10719 					return step_do(c, stack, eval, axis_to_type<axis_parent>());
10720 
10721 				case axis_preceding:
10722 					return step_do(c, stack, eval, axis_to_type<axis_preceding>());
10723 
10724 				case axis_preceding_sibling:
10725 					return step_do(c, stack, eval, axis_to_type<axis_preceding_sibling>());
10726 
10727 				case axis_self:
10728 					return step_do(c, stack, eval, axis_to_type<axis_self>());
10729 
10730 				default:
10731 					assert(false && "Unknown axis");
10732 					return xpath_node_set_raw();
10733 				}
10734 			}
10735 
10736 			case ast_step_root:
10737 			{
10738 				assert(!_right); // root step can't have any predicates
10739 
10740 				xpath_node_set_raw ns;
10741 
10742 				ns.set_type(xpath_node_set::type_sorted);
10743 
10744 				if (c.n.node()) ns.push_back(c.n.node().root(), stack.result);
10745 				else if (c.n.attribute()) ns.push_back(c.n.parent().root(), stack.result);
10746 
10747 				return ns;
10748 			}
10749 
10750 			case ast_variable:
10751 			{
10752 				assert(_rettype == _data.variable->type());
10753 
10754 				if (_rettype == xpath_type_node_set)
10755 				{
10756 					const xpath_node_set& s = _data.variable->get_node_set();
10757 
10758 					xpath_node_set_raw ns;
10759 
10760 					ns.set_type(s.type());
10761 					ns.append(s.begin(), s.end(), stack.result);
10762 
10763 					return ns;
10764 				}
10765 
10766 				// fallthrough to type conversion
10767 			}
10768 
10769 			default:
10770 				assert(false && "Wrong expression for return type node set");
10771 				return xpath_node_set_raw();
10772 			}
10773 		}
10774 
optimize(xpath_allocator * alloc)10775 		void optimize(xpath_allocator* alloc)
10776 		{
10777 			if (_left)
10778 				_left->optimize(alloc);
10779 
10780 			if (_right)
10781 				_right->optimize(alloc);
10782 
10783 			if (_next)
10784 				_next->optimize(alloc);
10785 
10786 			optimize_self(alloc);
10787 		}
10788 
optimize_self(xpath_allocator * alloc)10789 		void optimize_self(xpath_allocator* alloc)
10790 		{
10791 			// Rewrite [position()=expr] with [expr]
10792 			// Note that this step has to go before classification to recognize [position()=1]
10793 			if ((_type == ast_filter || _type == ast_predicate) &&
10794 				_right->_type == ast_op_equal && _right->_left->_type == ast_func_position && _right->_right->_rettype == xpath_type_number)
10795 			{
10796 				_right = _right->_right;
10797 			}
10798 
10799 			// Classify filter/predicate ops to perform various optimizations during evaluation
10800 			if (_type == ast_filter || _type == ast_predicate)
10801 			{
10802 				assert(_test == predicate_default);
10803 
10804 				if (_right->_type == ast_number_constant && _right->_data.number == 1.0)
10805 					_test = predicate_constant_one;
10806 				else if (_right->_rettype == xpath_type_number && (_right->_type == ast_number_constant || _right->_type == ast_variable || _right->_type == ast_func_last))
10807 					_test = predicate_constant;
10808 				else if (_right->_rettype != xpath_type_number && _right->is_posinv_expr())
10809 					_test = predicate_posinv;
10810 			}
10811 
10812 			// Rewrite descendant-or-self::node()/child::foo with descendant::foo
10813 			// The former is a full form of //foo, the latter is much faster since it executes the node test immediately
10814 			// Do a similar kind of rewrite for self/descendant/descendant-or-self axes
10815 			// Note that we only rewrite positionally invariant steps (//foo[1] != /descendant::foo[1])
10816 			if (_type == ast_step && (_axis == axis_child || _axis == axis_self || _axis == axis_descendant || _axis == axis_descendant_or_self) && _left &&
10817 				_left->_type == ast_step && _left->_axis == axis_descendant_or_self && _left->_test == nodetest_type_node && !_left->_right &&
10818 				is_posinv_step())
10819 			{
10820 				if (_axis == axis_child || _axis == axis_descendant)
10821 					_axis = axis_descendant;
10822 				else
10823 					_axis = axis_descendant_or_self;
10824 
10825 				_left = _left->_left;
10826 			}
10827 
10828 			// Use optimized lookup table implementation for translate() with constant arguments
10829 			if (_type == ast_func_translate && _right->_type == ast_string_constant && _right->_next->_type == ast_string_constant)
10830 			{
10831 				unsigned char* table = translate_table_generate(alloc, _right->_data.string, _right->_next->_data.string);
10832 
10833 				if (table)
10834 				{
10835 					_type = ast_opt_translate_table;
10836 					_data.table = table;
10837 				}
10838 			}
10839 
10840 			// Use optimized path for @attr = 'value' or @attr = $value
10841 			if (_type == ast_op_equal &&
10842 				_left->_type == ast_step && _left->_axis == axis_attribute && _left->_test == nodetest_name && !_left->_left && !_left->_right &&
10843 				(_right->_type == ast_string_constant || (_right->_type == ast_variable && _right->_rettype == xpath_type_string)))
10844 			{
10845 				_type = ast_opt_compare_attribute;
10846 			}
10847 		}
10848 
is_posinv_expr() const10849 		bool is_posinv_expr() const
10850 		{
10851 			switch (_type)
10852 			{
10853 			case ast_func_position:
10854 			case ast_func_last:
10855 				return false;
10856 
10857 			case ast_string_constant:
10858 			case ast_number_constant:
10859 			case ast_variable:
10860 				return true;
10861 
10862 			case ast_step:
10863 			case ast_step_root:
10864 				return true;
10865 
10866 			case ast_predicate:
10867 			case ast_filter:
10868 				return true;
10869 
10870 			default:
10871 				if (_left && !_left->is_posinv_expr()) return false;
10872 
10873 				for (xpath_ast_node* n = _right; n; n = n->_next)
10874 					if (!n->is_posinv_expr()) return false;
10875 
10876 				return true;
10877 			}
10878 		}
10879 
is_posinv_step() const10880 		bool is_posinv_step() const
10881 		{
10882 			assert(_type == ast_step);
10883 
10884 			for (xpath_ast_node* n = _right; n; n = n->_next)
10885 			{
10886 				assert(n->_type == ast_predicate);
10887 
10888 				if (n->_test != predicate_posinv)
10889 					return false;
10890 			}
10891 
10892 			return true;
10893 		}
10894 
rettype() const10895 		xpath_value_type rettype() const
10896 		{
10897 			return static_cast<xpath_value_type>(_rettype);
10898 		}
10899 	};
10900 
10901 	struct xpath_parser
10902 	{
10903 		xpath_allocator* _alloc;
10904 		xpath_lexer _lexer;
10905 
10906 		const char_t* _query;
10907 		xpath_variable_set* _variables;
10908 
10909 		xpath_parse_result* _result;
10910 
10911 		char_t _scratch[32];
10912 
10913 	#ifdef PUGIXML_NO_EXCEPTIONS
10914 		jmp_buf _error_handler;
10915 	#endif
10916 
throw_errorxpath_parser10917 		void throw_error(const char* message)
10918 		{
10919 			_result->error = message;
10920 			_result->offset = _lexer.current_pos() - _query;
10921 
10922 		#ifdef PUGIXML_NO_EXCEPTIONS
10923 			longjmp(_error_handler, 1);
10924 		#else
10925 			throw xpath_exception(*_result);
10926 		#endif
10927 		}
10928 
throw_error_oomxpath_parser10929 		void throw_error_oom()
10930 		{
10931 		#ifdef PUGIXML_NO_EXCEPTIONS
10932 			throw_error("Out of memory");
10933 		#else
10934 			throw std::bad_alloc();
10935 		#endif
10936 		}
10937 
alloc_nodexpath_parser10938 		void* alloc_node()
10939 		{
10940 			void* result = _alloc->allocate_nothrow(sizeof(xpath_ast_node));
10941 
10942 			if (!result) throw_error_oom();
10943 
10944 			return result;
10945 		}
10946 
alloc_stringxpath_parser10947 		const char_t* alloc_string(const xpath_lexer_string& value)
10948 		{
10949 			if (value.begin)
10950 			{
10951 				size_t length = static_cast<size_t>(value.end - value.begin);
10952 
10953 				char_t* c = static_cast<char_t*>(_alloc->allocate_nothrow((length + 1) * sizeof(char_t)));
10954 				if (!c) throw_error_oom();
10955 				assert(c); // workaround for clang static analysis
10956 
10957 				memcpy(c, value.begin, length * sizeof(char_t));
10958 				c[length] = 0;
10959 
10960 				return c;
10961 			}
10962 			else return 0;
10963 		}
10964 
parse_function_helperxpath_parser10965 		xpath_ast_node* parse_function_helper(ast_type_t type0, ast_type_t type1, size_t argc, xpath_ast_node* args[2])
10966 		{
10967 			assert(argc <= 1);
10968 
10969 			if (argc == 1 && args[0]->rettype() != xpath_type_node_set)
10970 				throw_error("Function has to be applied to node set");
10971 
10972 			return new (alloc_node()) xpath_ast_node(argc == 0 ? type0 : type1, xpath_type_string, args[0]);
10973 		}
10974 
parse_functionxpath_parser10975 		xpath_ast_node* parse_function(const xpath_lexer_string& name, size_t argc, xpath_ast_node* args[2])
10976 		{
10977 			switch (name.begin[0])
10978 			{
10979 			case 'b':
10980 				if (name == PUGIXML_TEXT("boolean") && argc == 1)
10981 					return new (alloc_node()) xpath_ast_node(ast_func_boolean, xpath_type_boolean, args[0]);
10982 
10983 				break;
10984 
10985 			case 'c':
10986 				if (name == PUGIXML_TEXT("count") && argc == 1)
10987 				{
10988 					if (args[0]->rettype() != xpath_type_node_set)
10989 						throw_error("Function has to be applied to node set");
10990 
10991 					return new (alloc_node()) xpath_ast_node(ast_func_count, xpath_type_number, args[0]);
10992 				}
10993 				else if (name == PUGIXML_TEXT("contains") && argc == 2)
10994 					return new (alloc_node()) xpath_ast_node(ast_func_contains, xpath_type_boolean, args[0], args[1]);
10995 				else if (name == PUGIXML_TEXT("concat") && argc >= 2)
10996 					return new (alloc_node()) xpath_ast_node(ast_func_concat, xpath_type_string, args[0], args[1]);
10997 				else if (name == PUGIXML_TEXT("ceiling") && argc == 1)
10998 					return new (alloc_node()) xpath_ast_node(ast_func_ceiling, xpath_type_number, args[0]);
10999 
11000 				break;
11001 
11002 			case 'f':
11003 				if (name == PUGIXML_TEXT("false") && argc == 0)
11004 					return new (alloc_node()) xpath_ast_node(ast_func_false, xpath_type_boolean);
11005 				else if (name == PUGIXML_TEXT("floor") && argc == 1)
11006 					return new (alloc_node()) xpath_ast_node(ast_func_floor, xpath_type_number, args[0]);
11007 
11008 				break;
11009 
11010 			case 'i':
11011 				if (name == PUGIXML_TEXT("id") && argc == 1)
11012 					return new (alloc_node()) xpath_ast_node(ast_func_id, xpath_type_node_set, args[0]);
11013 
11014 				break;
11015 
11016 			case 'l':
11017 				if (name == PUGIXML_TEXT("last") && argc == 0)
11018 					return new (alloc_node()) xpath_ast_node(ast_func_last, xpath_type_number);
11019 				else if (name == PUGIXML_TEXT("lang") && argc == 1)
11020 					return new (alloc_node()) xpath_ast_node(ast_func_lang, xpath_type_boolean, args[0]);
11021 				else if (name == PUGIXML_TEXT("local-name") && argc <= 1)
11022 					return parse_function_helper(ast_func_local_name_0, ast_func_local_name_1, argc, args);
11023 
11024 				break;
11025 
11026 			case 'n':
11027 				if (name == PUGIXML_TEXT("name") && argc <= 1)
11028 					return parse_function_helper(ast_func_name_0, ast_func_name_1, argc, args);
11029 				else if (name == PUGIXML_TEXT("namespace-uri") && argc <= 1)
11030 					return parse_function_helper(ast_func_namespace_uri_0, ast_func_namespace_uri_1, argc, args);
11031 				else if (name == PUGIXML_TEXT("normalize-space") && argc <= 1)
11032 					return new (alloc_node()) xpath_ast_node(argc == 0 ? ast_func_normalize_space_0 : ast_func_normalize_space_1, xpath_type_string, args[0], args[1]);
11033 				else if (name == PUGIXML_TEXT("not") && argc == 1)
11034 					return new (alloc_node()) xpath_ast_node(ast_func_not, xpath_type_boolean, args[0]);
11035 				else if (name == PUGIXML_TEXT("number") && argc <= 1)
11036 					return new (alloc_node()) xpath_ast_node(argc == 0 ? ast_func_number_0 : ast_func_number_1, xpath_type_number, args[0]);
11037 
11038 				break;
11039 
11040 			case 'p':
11041 				if (name == PUGIXML_TEXT("position") && argc == 0)
11042 					return new (alloc_node()) xpath_ast_node(ast_func_position, xpath_type_number);
11043 
11044 				break;
11045 
11046 			case 'r':
11047 				if (name == PUGIXML_TEXT("round") && argc == 1)
11048 					return new (alloc_node()) xpath_ast_node(ast_func_round, xpath_type_number, args[0]);
11049 
11050 				break;
11051 
11052 			case 's':
11053 				if (name == PUGIXML_TEXT("string") && argc <= 1)
11054 					return new (alloc_node()) xpath_ast_node(argc == 0 ? ast_func_string_0 : ast_func_string_1, xpath_type_string, args[0]);
11055 				else if (name == PUGIXML_TEXT("string-length") && argc <= 1)
11056 					return new (alloc_node()) xpath_ast_node(argc == 0 ? ast_func_string_length_0 : ast_func_string_length_1, xpath_type_number, args[0]);
11057 				else if (name == PUGIXML_TEXT("starts-with") && argc == 2)
11058 					return new (alloc_node()) xpath_ast_node(ast_func_starts_with, xpath_type_boolean, args[0], args[1]);
11059 				else if (name == PUGIXML_TEXT("substring-before") && argc == 2)
11060 					return new (alloc_node()) xpath_ast_node(ast_func_substring_before, xpath_type_string, args[0], args[1]);
11061 				else if (name == PUGIXML_TEXT("substring-after") && argc == 2)
11062 					return new (alloc_node()) xpath_ast_node(ast_func_substring_after, xpath_type_string, args[0], args[1]);
11063 				else if (name == PUGIXML_TEXT("substring") && (argc == 2 || argc == 3))
11064 					return new (alloc_node()) xpath_ast_node(argc == 2 ? ast_func_substring_2 : ast_func_substring_3, xpath_type_string, args[0], args[1]);
11065 				else if (name == PUGIXML_TEXT("sum") && argc == 1)
11066 				{
11067 					if (args[0]->rettype() != xpath_type_node_set) throw_error("Function has to be applied to node set");
11068 					return new (alloc_node()) xpath_ast_node(ast_func_sum, xpath_type_number, args[0]);
11069 				}
11070 
11071 				break;
11072 
11073 			case 't':
11074 				if (name == PUGIXML_TEXT("translate") && argc == 3)
11075 					return new (alloc_node()) xpath_ast_node(ast_func_translate, xpath_type_string, args[0], args[1]);
11076 				else if (name == PUGIXML_TEXT("true") && argc == 0)
11077 					return new (alloc_node()) xpath_ast_node(ast_func_true, xpath_type_boolean);
11078 
11079 				break;
11080 
11081 			default:
11082 				break;
11083 			}
11084 
11085 			throw_error("Unrecognized function or wrong parameter count");
11086 
11087 			return 0;
11088 		}
11089 
parse_axis_namexpath_parser11090 		axis_t parse_axis_name(const xpath_lexer_string& name, bool& specified)
11091 		{
11092 			specified = true;
11093 
11094 			switch (name.begin[0])
11095 			{
11096 			case 'a':
11097 				if (name == PUGIXML_TEXT("ancestor"))
11098 					return axis_ancestor;
11099 				else if (name == PUGIXML_TEXT("ancestor-or-self"))
11100 					return axis_ancestor_or_self;
11101 				else if (name == PUGIXML_TEXT("attribute"))
11102 					return axis_attribute;
11103 
11104 				break;
11105 
11106 			case 'c':
11107 				if (name == PUGIXML_TEXT("child"))
11108 					return axis_child;
11109 
11110 				break;
11111 
11112 			case 'd':
11113 				if (name == PUGIXML_TEXT("descendant"))
11114 					return axis_descendant;
11115 				else if (name == PUGIXML_TEXT("descendant-or-self"))
11116 					return axis_descendant_or_self;
11117 
11118 				break;
11119 
11120 			case 'f':
11121 				if (name == PUGIXML_TEXT("following"))
11122 					return axis_following;
11123 				else if (name == PUGIXML_TEXT("following-sibling"))
11124 					return axis_following_sibling;
11125 
11126 				break;
11127 
11128 			case 'n':
11129 				if (name == PUGIXML_TEXT("namespace"))
11130 					return axis_namespace;
11131 
11132 				break;
11133 
11134 			case 'p':
11135 				if (name == PUGIXML_TEXT("parent"))
11136 					return axis_parent;
11137 				else if (name == PUGIXML_TEXT("preceding"))
11138 					return axis_preceding;
11139 				else if (name == PUGIXML_TEXT("preceding-sibling"))
11140 					return axis_preceding_sibling;
11141 
11142 				break;
11143 
11144 			case 's':
11145 				if (name == PUGIXML_TEXT("self"))
11146 					return axis_self;
11147 
11148 				break;
11149 
11150 			default:
11151 				break;
11152 			}
11153 
11154 			specified = false;
11155 			return axis_child;
11156 		}
11157 
parse_node_test_typexpath_parser11158 		nodetest_t parse_node_test_type(const xpath_lexer_string& name)
11159 		{
11160 			switch (name.begin[0])
11161 			{
11162 			case 'c':
11163 				if (name == PUGIXML_TEXT("comment"))
11164 					return nodetest_type_comment;
11165 
11166 				break;
11167 
11168 			case 'n':
11169 				if (name == PUGIXML_TEXT("node"))
11170 					return nodetest_type_node;
11171 
11172 				break;
11173 
11174 			case 'p':
11175 				if (name == PUGIXML_TEXT("processing-instruction"))
11176 					return nodetest_type_pi;
11177 
11178 				break;
11179 
11180 			case 't':
11181 				if (name == PUGIXML_TEXT("text"))
11182 					return nodetest_type_text;
11183 
11184 				break;
11185 
11186 			default:
11187 				break;
11188 			}
11189 
11190 			return nodetest_none;
11191 		}
11192 
11193 		// PrimaryExpr ::= VariableReference | '(' Expr ')' | Literal | Number | FunctionCall
parse_primary_expressionxpath_parser11194 		xpath_ast_node* parse_primary_expression()
11195 		{
11196 			switch (_lexer.current())
11197 			{
11198 			case lex_var_ref:
11199 			{
11200 				xpath_lexer_string name = _lexer.contents();
11201 
11202 				if (!_variables)
11203 					throw_error("Unknown variable: variable set is not provided");
11204 
11205 				xpath_variable* var = 0;
11206 				if (!get_variable_scratch(_scratch, _variables, name.begin, name.end, &var))
11207 					throw_error_oom();
11208 
11209 				if (!var)
11210 					throw_error("Unknown variable: variable set does not contain the given name");
11211 
11212 				_lexer.next();
11213 
11214 				return new (alloc_node()) xpath_ast_node(ast_variable, var->type(), var);
11215 			}
11216 
11217 			case lex_open_brace:
11218 			{
11219 				_lexer.next();
11220 
11221 				xpath_ast_node* n = parse_expression();
11222 
11223 				if (_lexer.current() != lex_close_brace)
11224 					throw_error("Unmatched braces");
11225 
11226 				_lexer.next();
11227 
11228 				return n;
11229 			}
11230 
11231 			case lex_quoted_string:
11232 			{
11233 				const char_t* value = alloc_string(_lexer.contents());
11234 
11235 				xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_string_constant, xpath_type_string, value);
11236 				_lexer.next();
11237 
11238 				return n;
11239 			}
11240 
11241 			case lex_number:
11242 			{
11243 				double value = 0;
11244 
11245 				if (!convert_string_to_number_scratch(_scratch, _lexer.contents().begin, _lexer.contents().end, &value))
11246 					throw_error_oom();
11247 
11248 				xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_number_constant, xpath_type_number, value);
11249 				_lexer.next();
11250 
11251 				return n;
11252 			}
11253 
11254 			case lex_string:
11255 			{
11256 				xpath_ast_node* args[2] = {0};
11257 				size_t argc = 0;
11258 
11259 				xpath_lexer_string function = _lexer.contents();
11260 				_lexer.next();
11261 
11262 				xpath_ast_node* last_arg = 0;
11263 
11264 				if (_lexer.current() != lex_open_brace)
11265 					throw_error("Unrecognized function call");
11266 				_lexer.next();
11267 
11268 				if (_lexer.current() != lex_close_brace)
11269 					args[argc++] = parse_expression();
11270 
11271 				while (_lexer.current() != lex_close_brace)
11272 				{
11273 					if (_lexer.current() != lex_comma)
11274 						throw_error("No comma between function arguments");
11275 					_lexer.next();
11276 
11277 					xpath_ast_node* n = parse_expression();
11278 
11279 					if (argc < 2) args[argc] = n;
11280 					else last_arg->set_next(n);
11281 
11282 					argc++;
11283 					last_arg = n;
11284 				}
11285 
11286 				_lexer.next();
11287 
11288 				return parse_function(function, argc, args);
11289 			}
11290 
11291 			default:
11292 				throw_error("Unrecognizable primary expression");
11293 
11294 				return 0;
11295 			}
11296 		}
11297 
11298 		// FilterExpr ::= PrimaryExpr | FilterExpr Predicate
11299 		// Predicate ::= '[' PredicateExpr ']'
11300 		// PredicateExpr ::= Expr
parse_filter_expressionxpath_parser11301 		xpath_ast_node* parse_filter_expression()
11302 		{
11303 			xpath_ast_node* n = parse_primary_expression();
11304 
11305 			while (_lexer.current() == lex_open_square_brace)
11306 			{
11307 				_lexer.next();
11308 
11309 				xpath_ast_node* expr = parse_expression();
11310 
11311 				if (n->rettype() != xpath_type_node_set)
11312 					throw_error("Predicate has to be applied to node set");
11313 
11314 				n = new (alloc_node()) xpath_ast_node(ast_filter, n, expr, predicate_default);
11315 
11316 				if (_lexer.current() != lex_close_square_brace)
11317 					throw_error("Unmatched square brace");
11318 
11319 				_lexer.next();
11320 			}
11321 
11322 			return n;
11323 		}
11324 
11325 		// Step ::= AxisSpecifier NodeTest Predicate* | AbbreviatedStep
11326 		// AxisSpecifier ::= AxisName '::' | '@'?
11327 		// NodeTest ::= NameTest | NodeType '(' ')' | 'processing-instruction' '(' Literal ')'
11328 		// NameTest ::= '*' | NCName ':' '*' | QName
11329 		// AbbreviatedStep ::= '.' | '..'
parse_stepxpath_parser11330 		xpath_ast_node* parse_step(xpath_ast_node* set)
11331 		{
11332 			if (set && set->rettype() != xpath_type_node_set)
11333 				throw_error("Step has to be applied to node set");
11334 
11335 			bool axis_specified = false;
11336 			axis_t axis = axis_child; // implied child axis
11337 
11338 			if (_lexer.current() == lex_axis_attribute)
11339 			{
11340 				axis = axis_attribute;
11341 				axis_specified = true;
11342 
11343 				_lexer.next();
11344 			}
11345 			else if (_lexer.current() == lex_dot)
11346 			{
11347 				_lexer.next();
11348 
11349 				return new (alloc_node()) xpath_ast_node(ast_step, set, axis_self, nodetest_type_node, 0);
11350 			}
11351 			else if (_lexer.current() == lex_double_dot)
11352 			{
11353 				_lexer.next();
11354 
11355 				return new (alloc_node()) xpath_ast_node(ast_step, set, axis_parent, nodetest_type_node, 0);
11356 			}
11357 
11358 			nodetest_t nt_type = nodetest_none;
11359 			xpath_lexer_string nt_name;
11360 
11361 			if (_lexer.current() == lex_string)
11362 			{
11363 				// node name test
11364 				nt_name = _lexer.contents();
11365 				_lexer.next();
11366 
11367 				// was it an axis name?
11368 				if (_lexer.current() == lex_double_colon)
11369 				{
11370 					// parse axis name
11371 					if (axis_specified)
11372 						throw_error("Two axis specifiers in one step");
11373 
11374 					axis = parse_axis_name(nt_name, axis_specified);
11375 
11376 					if (!axis_specified)
11377 						throw_error("Unknown axis");
11378 
11379 					// read actual node test
11380 					_lexer.next();
11381 
11382 					if (_lexer.current() == lex_multiply)
11383 					{
11384 						nt_type = nodetest_all;
11385 						nt_name = xpath_lexer_string();
11386 						_lexer.next();
11387 					}
11388 					else if (_lexer.current() == lex_string)
11389 					{
11390 						nt_name = _lexer.contents();
11391 						_lexer.next();
11392 					}
11393 					else throw_error("Unrecognized node test");
11394 				}
11395 
11396 				if (nt_type == nodetest_none)
11397 				{
11398 					// node type test or processing-instruction
11399 					if (_lexer.current() == lex_open_brace)
11400 					{
11401 						_lexer.next();
11402 
11403 						if (_lexer.current() == lex_close_brace)
11404 						{
11405 							_lexer.next();
11406 
11407 							nt_type = parse_node_test_type(nt_name);
11408 
11409 							if (nt_type == nodetest_none)
11410 								throw_error("Unrecognized node type");
11411 
11412 							nt_name = xpath_lexer_string();
11413 						}
11414 						else if (nt_name == PUGIXML_TEXT("processing-instruction"))
11415 						{
11416 							if (_lexer.current() != lex_quoted_string)
11417 								throw_error("Only literals are allowed as arguments to processing-instruction()");
11418 
11419 							nt_type = nodetest_pi;
11420 							nt_name = _lexer.contents();
11421 							_lexer.next();
11422 
11423 							if (_lexer.current() != lex_close_brace)
11424 								throw_error("Unmatched brace near processing-instruction()");
11425 							_lexer.next();
11426 						}
11427 						else
11428 						{
11429 							throw_error("Unmatched brace near node type test");
11430 						}
11431 					}
11432 					// QName or NCName:*
11433 					else
11434 					{
11435 						if (nt_name.end - nt_name.begin > 2 && nt_name.end[-2] == ':' && nt_name.end[-1] == '*') // NCName:*
11436 						{
11437 							nt_name.end--; // erase *
11438 
11439 							nt_type = nodetest_all_in_namespace;
11440 						}
11441 						else
11442 						{
11443 							nt_type = nodetest_name;
11444 						}
11445 					}
11446 				}
11447 			}
11448 			else if (_lexer.current() == lex_multiply)
11449 			{
11450 				nt_type = nodetest_all;
11451 				_lexer.next();
11452 			}
11453 			else
11454 			{
11455 				throw_error("Unrecognized node test");
11456 			}
11457 
11458 			const char_t* nt_name_copy = alloc_string(nt_name);
11459 			xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_step, set, axis, nt_type, nt_name_copy);
11460 
11461 			xpath_ast_node* last = 0;
11462 
11463 			while (_lexer.current() == lex_open_square_brace)
11464 			{
11465 				_lexer.next();
11466 
11467 				xpath_ast_node* expr = parse_expression();
11468 
11469 				xpath_ast_node* pred = new (alloc_node()) xpath_ast_node(ast_predicate, 0, expr, predicate_default);
11470 
11471 				if (_lexer.current() != lex_close_square_brace)
11472 					throw_error("Unmatched square brace");
11473 				_lexer.next();
11474 
11475 				if (last) last->set_next(pred);
11476 				else n->set_right(pred);
11477 
11478 				last = pred;
11479 			}
11480 
11481 			return n;
11482 		}
11483 
11484 		// RelativeLocationPath ::= Step | RelativeLocationPath '/' Step | RelativeLocationPath '//' Step
parse_relative_location_pathxpath_parser11485 		xpath_ast_node* parse_relative_location_path(xpath_ast_node* set)
11486 		{
11487 			xpath_ast_node* n = parse_step(set);
11488 
11489 			while (_lexer.current() == lex_slash || _lexer.current() == lex_double_slash)
11490 			{
11491 				lexeme_t l = _lexer.current();
11492 				_lexer.next();
11493 
11494 				if (l == lex_double_slash)
11495 					n = new (alloc_node()) xpath_ast_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0);
11496 
11497 				n = parse_step(n);
11498 			}
11499 
11500 			return n;
11501 		}
11502 
11503 		// LocationPath ::= RelativeLocationPath | AbsoluteLocationPath
11504 		// AbsoluteLocationPath ::= '/' RelativeLocationPath? | '//' RelativeLocationPath
parse_location_pathxpath_parser11505 		xpath_ast_node* parse_location_path()
11506 		{
11507 			if (_lexer.current() == lex_slash)
11508 			{
11509 				_lexer.next();
11510 
11511 				xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_step_root, xpath_type_node_set);
11512 
11513 				// relative location path can start from axis_attribute, dot, double_dot, multiply and string lexemes; any other lexeme means standalone root path
11514 				lexeme_t l = _lexer.current();
11515 
11516 				if (l == lex_string || l == lex_axis_attribute || l == lex_dot || l == lex_double_dot || l == lex_multiply)
11517 					return parse_relative_location_path(n);
11518 				else
11519 					return n;
11520 			}
11521 			else if (_lexer.current() == lex_double_slash)
11522 			{
11523 				_lexer.next();
11524 
11525 				xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_step_root, xpath_type_node_set);
11526 				n = new (alloc_node()) xpath_ast_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0);
11527 
11528 				return parse_relative_location_path(n);
11529 			}
11530 
11531 			// else clause moved outside of if because of bogus warning 'control may reach end of non-void function being inlined' in gcc 4.0.1
11532 			return parse_relative_location_path(0);
11533 		}
11534 
11535 		// PathExpr ::= LocationPath
11536 		//				| FilterExpr
11537 		//				| FilterExpr '/' RelativeLocationPath
11538 		//				| FilterExpr '//' RelativeLocationPath
11539 		// UnionExpr ::= PathExpr | UnionExpr '|' PathExpr
11540 		// UnaryExpr ::= UnionExpr | '-' UnaryExpr
parse_path_or_unary_expressionxpath_parser11541 		xpath_ast_node* parse_path_or_unary_expression()
11542 		{
11543 			// Clarification.
11544 			// PathExpr begins with either LocationPath or FilterExpr.
11545 			// FilterExpr begins with PrimaryExpr
11546 			// PrimaryExpr begins with '$' in case of it being a variable reference,
11547 			// '(' in case of it being an expression, string literal, number constant or
11548 			// function call.
11549 
11550 			if (_lexer.current() == lex_var_ref || _lexer.current() == lex_open_brace ||
11551 				_lexer.current() == lex_quoted_string || _lexer.current() == lex_number ||
11552 				_lexer.current() == lex_string)
11553 			{
11554 				if (_lexer.current() == lex_string)
11555 				{
11556 					// This is either a function call, or not - if not, we shall proceed with location path
11557 					const char_t* state = _lexer.state();
11558 
11559 					while (PUGI__IS_CHARTYPE(*state, ct_space)) ++state;
11560 
11561 					if (*state != '(') return parse_location_path();
11562 
11563 					// This looks like a function call; however this still can be a node-test. Check it.
11564 					if (parse_node_test_type(_lexer.contents()) != nodetest_none)
11565 						return parse_location_path();
11566 				}
11567 
11568 				xpath_ast_node* n = parse_filter_expression();
11569 
11570 				if (_lexer.current() == lex_slash || _lexer.current() == lex_double_slash)
11571 				{
11572 					lexeme_t l = _lexer.current();
11573 					_lexer.next();
11574 
11575 					if (l == lex_double_slash)
11576 					{
11577 						if (n->rettype() != xpath_type_node_set)
11578 							throw_error("Step has to be applied to node set");
11579 
11580 						n = new (alloc_node()) xpath_ast_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0);
11581 					}
11582 
11583 					// select from location path
11584 					return parse_relative_location_path(n);
11585 				}
11586 
11587 				return n;
11588 			}
11589 			else if (_lexer.current() == lex_minus)
11590 			{
11591 				_lexer.next();
11592 
11593 				// precedence 7+ - only parses union expressions
11594 				xpath_ast_node* expr = parse_expression_rec(parse_path_or_unary_expression(), 7);
11595 
11596 				return new (alloc_node()) xpath_ast_node(ast_op_negate, xpath_type_number, expr);
11597 			}
11598 			else
11599 			{
11600 				return parse_location_path();
11601 			}
11602 		}
11603 
11604 		struct binary_op_t
11605 		{
11606 			ast_type_t asttype;
11607 			xpath_value_type rettype;
11608 			int precedence;
11609 
binary_op_txpath_parser::binary_op_t11610 			binary_op_t(): asttype(ast_unknown), rettype(xpath_type_none), precedence(0)
11611 			{
11612 			}
11613 
binary_op_txpath_parser::binary_op_t11614 			binary_op_t(ast_type_t asttype_, xpath_value_type rettype_, int precedence_): asttype(asttype_), rettype(rettype_), precedence(precedence_)
11615 			{
11616 			}
11617 
parsexpath_parser::binary_op_t11618 			static binary_op_t parse(xpath_lexer& lexer)
11619 			{
11620 				switch (lexer.current())
11621 				{
11622 				case lex_string:
11623 					if (lexer.contents() == PUGIXML_TEXT("or"))
11624 						return binary_op_t(ast_op_or, xpath_type_boolean, 1);
11625 					else if (lexer.contents() == PUGIXML_TEXT("and"))
11626 						return binary_op_t(ast_op_and, xpath_type_boolean, 2);
11627 					else if (lexer.contents() == PUGIXML_TEXT("div"))
11628 						return binary_op_t(ast_op_divide, xpath_type_number, 6);
11629 					else if (lexer.contents() == PUGIXML_TEXT("mod"))
11630 						return binary_op_t(ast_op_mod, xpath_type_number, 6);
11631 					else
11632 						return binary_op_t();
11633 
11634 				case lex_equal:
11635 					return binary_op_t(ast_op_equal, xpath_type_boolean, 3);
11636 
11637 				case lex_not_equal:
11638 					return binary_op_t(ast_op_not_equal, xpath_type_boolean, 3);
11639 
11640 				case lex_less:
11641 					return binary_op_t(ast_op_less, xpath_type_boolean, 4);
11642 
11643 				case lex_greater:
11644 					return binary_op_t(ast_op_greater, xpath_type_boolean, 4);
11645 
11646 				case lex_less_or_equal:
11647 					return binary_op_t(ast_op_less_or_equal, xpath_type_boolean, 4);
11648 
11649 				case lex_greater_or_equal:
11650 					return binary_op_t(ast_op_greater_or_equal, xpath_type_boolean, 4);
11651 
11652 				case lex_plus:
11653 					return binary_op_t(ast_op_add, xpath_type_number, 5);
11654 
11655 				case lex_minus:
11656 					return binary_op_t(ast_op_subtract, xpath_type_number, 5);
11657 
11658 				case lex_multiply:
11659 					return binary_op_t(ast_op_multiply, xpath_type_number, 6);
11660 
11661 				case lex_union:
11662 					return binary_op_t(ast_op_union, xpath_type_node_set, 7);
11663 
11664 				default:
11665 					return binary_op_t();
11666 				}
11667 			}
11668 		};
11669 
parse_expression_recxpath_parser11670 		xpath_ast_node* parse_expression_rec(xpath_ast_node* lhs, int limit)
11671 		{
11672 			binary_op_t op = binary_op_t::parse(_lexer);
11673 
11674 			while (op.asttype != ast_unknown && op.precedence >= limit)
11675 			{
11676 				_lexer.next();
11677 
11678 				xpath_ast_node* rhs = parse_path_or_unary_expression();
11679 
11680 				binary_op_t nextop = binary_op_t::parse(_lexer);
11681 
11682 				while (nextop.asttype != ast_unknown && nextop.precedence > op.precedence)
11683 				{
11684 					rhs = parse_expression_rec(rhs, nextop.precedence);
11685 
11686 					nextop = binary_op_t::parse(_lexer);
11687 				}
11688 
11689 				if (op.asttype == ast_op_union && (lhs->rettype() != xpath_type_node_set || rhs->rettype() != xpath_type_node_set))
11690 					throw_error("Union operator has to be applied to node sets");
11691 
11692 				lhs = new (alloc_node()) xpath_ast_node(op.asttype, op.rettype, lhs, rhs);
11693 
11694 				op = binary_op_t::parse(_lexer);
11695 			}
11696 
11697 			return lhs;
11698 		}
11699 
11700 		// Expr ::= OrExpr
11701 		// OrExpr ::= AndExpr | OrExpr 'or' AndExpr
11702 		// AndExpr ::= EqualityExpr | AndExpr 'and' EqualityExpr
11703 		// EqualityExpr ::= RelationalExpr
11704 		//					| EqualityExpr '=' RelationalExpr
11705 		//					| EqualityExpr '!=' RelationalExpr
11706 		// RelationalExpr ::= AdditiveExpr
11707 		//					  | RelationalExpr '<' AdditiveExpr
11708 		//					  | RelationalExpr '>' AdditiveExpr
11709 		//					  | RelationalExpr '<=' AdditiveExpr
11710 		//					  | RelationalExpr '>=' AdditiveExpr
11711 		// AdditiveExpr ::= MultiplicativeExpr
11712 		//					| AdditiveExpr '+' MultiplicativeExpr
11713 		//					| AdditiveExpr '-' MultiplicativeExpr
11714 		// MultiplicativeExpr ::= UnaryExpr
11715 		//						  | MultiplicativeExpr '*' UnaryExpr
11716 		//						  | MultiplicativeExpr 'div' UnaryExpr
11717 		//						  | MultiplicativeExpr 'mod' UnaryExpr
parse_expressionxpath_parser11718 		xpath_ast_node* parse_expression()
11719 		{
11720 			return parse_expression_rec(parse_path_or_unary_expression(), 0);
11721 		}
11722 
xpath_parserxpath_parser11723 		xpath_parser(const char_t* query, xpath_variable_set* variables, xpath_allocator* alloc, xpath_parse_result* result): _alloc(alloc), _lexer(query), _query(query), _variables(variables), _result(result)
11724 		{
11725 		}
11726 
parsexpath_parser11727 		xpath_ast_node* parse()
11728 		{
11729 			xpath_ast_node* result = parse_expression();
11730 
11731 			// check if there are unparsed tokens left
11732 			if (_lexer.current() != lex_eof)
11733 				throw_error("Incorrect query");
11734 
11735 			return result;
11736 		}
11737 
parsexpath_parser11738 		static xpath_ast_node* parse(const char_t* query, xpath_variable_set* variables, xpath_allocator* alloc, xpath_parse_result* result)
11739 		{
11740 			xpath_parser parser(query, variables, alloc, result);
11741 
11742 		#ifdef PUGIXML_NO_EXCEPTIONS
11743 			int error = setjmp(parser._error_handler);
11744 
11745 			return (error == 0) ? parser.parse() : 0;
11746 		#else
11747 			return parser.parse();
11748 		#endif
11749 		}
11750 	};
11751 
11752 	struct xpath_query_impl
11753 	{
createxpath_query_impl11754 		static xpath_query_impl* create()
11755 		{
11756 			void* memory = xml_memory::allocate(sizeof(xpath_query_impl));
11757 			if (!memory) return 0;
11758 
11759 			return new (memory) xpath_query_impl();
11760 		}
11761 
destroyxpath_query_impl11762 		static void destroy(xpath_query_impl* impl)
11763 		{
11764 			// free all allocated pages
11765 			impl->alloc.release();
11766 
11767 			// free allocator memory (with the first page)
11768 			xml_memory::deallocate(impl);
11769 		}
11770 
xpath_query_implxpath_query_impl11771 		xpath_query_impl(): root(0), alloc(&block)
11772 		{
11773 			block.next = 0;
11774 			block.capacity = sizeof(block.data);
11775 		}
11776 
11777 		xpath_ast_node* root;
11778 		xpath_allocator alloc;
11779 		xpath_memory_block block;
11780 	};
11781 
evaluate_string_impl(xpath_query_impl * impl,const xpath_node & n,xpath_stack_data & sd)11782 	PUGI__FN xpath_string evaluate_string_impl(xpath_query_impl* impl, const xpath_node& n, xpath_stack_data& sd)
11783 	{
11784 		if (!impl) return xpath_string();
11785 
11786 	#ifdef PUGIXML_NO_EXCEPTIONS
11787 		if (setjmp(sd.error_handler)) return xpath_string();
11788 	#endif
11789 
11790 		xpath_context c(n, 1, 1);
11791 
11792 		return impl->root->eval_string(c, sd.stack);
11793 	}
11794 
evaluate_node_set_prepare(xpath_query_impl * impl)11795 	PUGI__FN impl::xpath_ast_node* evaluate_node_set_prepare(xpath_query_impl* impl)
11796 	{
11797 		if (!impl) return 0;
11798 
11799 		if (impl->root->rettype() != xpath_type_node_set)
11800 		{
11801 		#ifdef PUGIXML_NO_EXCEPTIONS
11802 			return 0;
11803 		#else
11804 			xpath_parse_result res;
11805 			res.error = "Expression does not evaluate to node set";
11806 
11807 			throw xpath_exception(res);
11808 		#endif
11809 		}
11810 
11811 		return impl->root;
11812 	}
11813 PUGI__NS_END
11814 
11815 namespace pugi
11816 {
11817 #ifndef PUGIXML_NO_EXCEPTIONS
xpath_exception(const xpath_parse_result & result_)11818 	PUGI__FN xpath_exception::xpath_exception(const xpath_parse_result& result_): _result(result_)
11819 	{
11820 		assert(_result.error);
11821 	}
11822 
what() const11823 	PUGI__FN const char* xpath_exception::what() const throw()
11824 	{
11825 		return _result.error;
11826 	}
11827 
result() const11828 	PUGI__FN const xpath_parse_result& xpath_exception::result() const
11829 	{
11830 		return _result;
11831 	}
11832 #endif
11833 
xpath_node()11834 	PUGI__FN xpath_node::xpath_node()
11835 	{
11836 	}
11837 
xpath_node(const xml_node & node_)11838 	PUGI__FN xpath_node::xpath_node(const xml_node& node_): _node(node_)
11839 	{
11840 	}
11841 
xpath_node(const xml_attribute & attribute_,const xml_node & parent_)11842 	PUGI__FN xpath_node::xpath_node(const xml_attribute& attribute_, const xml_node& parent_): _node(attribute_ ? parent_ : xml_node()), _attribute(attribute_)
11843 	{
11844 	}
11845 
node() const11846 	PUGI__FN xml_node xpath_node::node() const
11847 	{
11848 		return _attribute ? xml_node() : _node;
11849 	}
11850 
attribute() const11851 	PUGI__FN xml_attribute xpath_node::attribute() const
11852 	{
11853 		return _attribute;
11854 	}
11855 
parent() const11856 	PUGI__FN xml_node xpath_node::parent() const
11857 	{
11858 		return _attribute ? _node : _node.parent();
11859 	}
11860 
unspecified_bool_xpath_node(xpath_node ***)11861 	PUGI__FN static void unspecified_bool_xpath_node(xpath_node***)
11862 	{
11863 	}
11864 
operator xpath_node::unspecified_bool_type() const11865 	PUGI__FN xpath_node::operator xpath_node::unspecified_bool_type() const
11866 	{
11867 		return (_node || _attribute) ? unspecified_bool_xpath_node : 0;
11868 	}
11869 
operator !() const11870 	PUGI__FN bool xpath_node::operator!() const
11871 	{
11872 		return !(_node || _attribute);
11873 	}
11874 
operator ==(const xpath_node & n) const11875 	PUGI__FN bool xpath_node::operator==(const xpath_node& n) const
11876 	{
11877 		return _node == n._node && _attribute == n._attribute;
11878 	}
11879 
operator !=(const xpath_node & n) const11880 	PUGI__FN bool xpath_node::operator!=(const xpath_node& n) const
11881 	{
11882 		return _node != n._node || _attribute != n._attribute;
11883 	}
11884 
11885 #ifdef __BORLANDC__
operator &&(const xpath_node & lhs,bool rhs)11886 	PUGI__FN bool operator&&(const xpath_node& lhs, bool rhs)
11887 	{
11888 		return (bool)lhs && rhs;
11889 	}
11890 
operator ||(const xpath_node & lhs,bool rhs)11891 	PUGI__FN bool operator||(const xpath_node& lhs, bool rhs)
11892 	{
11893 		return (bool)lhs || rhs;
11894 	}
11895 #endif
11896 
_assign(const_iterator begin_,const_iterator end_,type_t type_)11897 	PUGI__FN void xpath_node_set::_assign(const_iterator begin_, const_iterator end_, type_t type_)
11898 	{
11899 		assert(begin_ <= end_);
11900 
11901 		size_t size_ = static_cast<size_t>(end_ - begin_);
11902 
11903 		if (size_ <= 1)
11904 		{
11905 			// deallocate old buffer
11906 			if (_begin != &_storage) impl::xml_memory::deallocate(_begin);
11907 
11908 			// use internal buffer
11909 			if (begin_ != end_) _storage = *begin_;
11910 
11911 			_begin = &_storage;
11912 			_end = &_storage + size_;
11913 			_type = type_;
11914 		}
11915 		else
11916 		{
11917 			// make heap copy
11918 			xpath_node* storage = static_cast<xpath_node*>(impl::xml_memory::allocate(size_ * sizeof(xpath_node)));
11919 
11920 			if (!storage)
11921 			{
11922 			#ifdef PUGIXML_NO_EXCEPTIONS
11923 				return;
11924 			#else
11925 				throw std::bad_alloc();
11926 			#endif
11927 			}
11928 
11929 			memcpy(storage, begin_, size_ * sizeof(xpath_node));
11930 
11931 			// deallocate old buffer
11932 			if (_begin != &_storage) impl::xml_memory::deallocate(_begin);
11933 
11934 			// finalize
11935 			_begin = storage;
11936 			_end = storage + size_;
11937 			_type = type_;
11938 		}
11939 	}
11940 
11941 #ifdef PUGIXML_HAS_MOVE
_move(xpath_node_set & rhs)11942 	PUGI__FN void xpath_node_set::_move(xpath_node_set& rhs)
11943 	{
11944 		_type = rhs._type;
11945 		_storage = rhs._storage;
11946 		_begin = (rhs._begin == &rhs._storage) ? &_storage : rhs._begin;
11947 		_end = _begin + (rhs._end - rhs._begin);
11948 
11949 		rhs._type = type_unsorted;
11950 		rhs._begin = &rhs._storage;
11951 		rhs._end = rhs._begin;
11952 	}
11953 #endif
11954 
xpath_node_set()11955 	PUGI__FN xpath_node_set::xpath_node_set(): _type(type_unsorted), _begin(&_storage), _end(&_storage)
11956 	{
11957 	}
11958 
xpath_node_set(const_iterator begin_,const_iterator end_,type_t type_)11959 	PUGI__FN xpath_node_set::xpath_node_set(const_iterator begin_, const_iterator end_, type_t type_): _type(type_unsorted), _begin(&_storage), _end(&_storage)
11960 	{
11961 		_assign(begin_, end_, type_);
11962 	}
11963 
~xpath_node_set()11964 	PUGI__FN xpath_node_set::~xpath_node_set()
11965 	{
11966 		if (_begin != &_storage)
11967 			impl::xml_memory::deallocate(_begin);
11968 	}
11969 
xpath_node_set(const xpath_node_set & ns)11970 	PUGI__FN xpath_node_set::xpath_node_set(const xpath_node_set& ns): _type(type_unsorted), _begin(&_storage), _end(&_storage)
11971 	{
11972 		_assign(ns._begin, ns._end, ns._type);
11973 	}
11974 
operator =(const xpath_node_set & ns)11975 	PUGI__FN xpath_node_set& xpath_node_set::operator=(const xpath_node_set& ns)
11976 	{
11977 		if (this == &ns) return *this;
11978 
11979 		_assign(ns._begin, ns._end, ns._type);
11980 
11981 		return *this;
11982 	}
11983 
11984 #ifdef PUGIXML_HAS_MOVE
xpath_node_set(xpath_node_set && rhs)11985 	PUGI__FN xpath_node_set::xpath_node_set(xpath_node_set&& rhs): _type(type_unsorted), _begin(&_storage), _end(&_storage)
11986 	{
11987 		_move(rhs);
11988 	}
11989 
operator =(xpath_node_set && rhs)11990 	PUGI__FN xpath_node_set& xpath_node_set::operator=(xpath_node_set&& rhs)
11991 	{
11992 		if (this == &rhs) return *this;
11993 
11994 		if (_begin != &_storage)
11995 			impl::xml_memory::deallocate(_begin);
11996 
11997 		_move(rhs);
11998 
11999 		return *this;
12000 	}
12001 #endif
12002 
type() const12003 	PUGI__FN xpath_node_set::type_t xpath_node_set::type() const
12004 	{
12005 		return _type;
12006 	}
12007 
size() const12008 	PUGI__FN size_t xpath_node_set::size() const
12009 	{
12010 		return _end - _begin;
12011 	}
12012 
empty() const12013 	PUGI__FN bool xpath_node_set::empty() const
12014 	{
12015 		return _begin == _end;
12016 	}
12017 
operator [](size_t index) const12018 	PUGI__FN const xpath_node& xpath_node_set::operator[](size_t index) const
12019 	{
12020 		assert(index < size());
12021 		return _begin[index];
12022 	}
12023 
begin() const12024 	PUGI__FN xpath_node_set::const_iterator xpath_node_set::begin() const
12025 	{
12026 		return _begin;
12027 	}
12028 
end() const12029 	PUGI__FN xpath_node_set::const_iterator xpath_node_set::end() const
12030 	{
12031 		return _end;
12032 	}
12033 
sort(bool reverse)12034 	PUGI__FN void xpath_node_set::sort(bool reverse)
12035 	{
12036 		_type = impl::xpath_sort(_begin, _end, _type, reverse);
12037 	}
12038 
first() const12039 	PUGI__FN xpath_node xpath_node_set::first() const
12040 	{
12041 		return impl::xpath_first(_begin, _end, _type);
12042 	}
12043 
xpath_parse_result()12044 	PUGI__FN xpath_parse_result::xpath_parse_result(): error("Internal error"), offset(0)
12045 	{
12046 	}
12047 
operator bool() const12048 	PUGI__FN xpath_parse_result::operator bool() const
12049 	{
12050 		return error == 0;
12051 	}
12052 
description() const12053 	PUGI__FN const char* xpath_parse_result::description() const
12054 	{
12055 		return error ? error : "No error";
12056 	}
12057 
xpath_variable(xpath_value_type type_)12058 	PUGI__FN xpath_variable::xpath_variable(xpath_value_type type_): _type(type_), _next(0)
12059 	{
12060 	}
12061 
name() const12062 	PUGI__FN const char_t* xpath_variable::name() const
12063 	{
12064 		switch (_type)
12065 		{
12066 		case xpath_type_node_set:
12067 			return static_cast<const impl::xpath_variable_node_set*>(this)->name;
12068 
12069 		case xpath_type_number:
12070 			return static_cast<const impl::xpath_variable_number*>(this)->name;
12071 
12072 		case xpath_type_string:
12073 			return static_cast<const impl::xpath_variable_string*>(this)->name;
12074 
12075 		case xpath_type_boolean:
12076 			return static_cast<const impl::xpath_variable_boolean*>(this)->name;
12077 
12078 		default:
12079 			assert(false && "Invalid variable type");
12080 			return 0;
12081 		}
12082 	}
12083 
type() const12084 	PUGI__FN xpath_value_type xpath_variable::type() const
12085 	{
12086 		return _type;
12087 	}
12088 
get_boolean() const12089 	PUGI__FN bool xpath_variable::get_boolean() const
12090 	{
12091 		return (_type == xpath_type_boolean) ? static_cast<const impl::xpath_variable_boolean*>(this)->value : false;
12092 	}
12093 
get_number() const12094 	PUGI__FN double xpath_variable::get_number() const
12095 	{
12096 		return (_type == xpath_type_number) ? static_cast<const impl::xpath_variable_number*>(this)->value : impl::gen_nan();
12097 	}
12098 
get_string() const12099 	PUGI__FN const char_t* xpath_variable::get_string() const
12100 	{
12101 		const char_t* value = (_type == xpath_type_string) ? static_cast<const impl::xpath_variable_string*>(this)->value : 0;
12102 		return value ? value : PUGIXML_TEXT("");
12103 	}
12104 
get_node_set() const12105 	PUGI__FN const xpath_node_set& xpath_variable::get_node_set() const
12106 	{
12107 		return (_type == xpath_type_node_set) ? static_cast<const impl::xpath_variable_node_set*>(this)->value : impl::dummy_node_set;
12108 	}
12109 
set(bool value)12110 	PUGI__FN bool xpath_variable::set(bool value)
12111 	{
12112 		if (_type != xpath_type_boolean) return false;
12113 
12114 		static_cast<impl::xpath_variable_boolean*>(this)->value = value;
12115 		return true;
12116 	}
12117 
set(double value)12118 	PUGI__FN bool xpath_variable::set(double value)
12119 	{
12120 		if (_type != xpath_type_number) return false;
12121 
12122 		static_cast<impl::xpath_variable_number*>(this)->value = value;
12123 		return true;
12124 	}
12125 
set(const char_t * value)12126 	PUGI__FN bool xpath_variable::set(const char_t* value)
12127 	{
12128 		if (_type != xpath_type_string) return false;
12129 
12130 		impl::xpath_variable_string* var = static_cast<impl::xpath_variable_string*>(this);
12131 
12132 		// duplicate string
12133 		size_t size = (impl::strlength(value) + 1) * sizeof(char_t);
12134 
12135 		char_t* copy = static_cast<char_t*>(impl::xml_memory::allocate(size));
12136 		if (!copy) return false;
12137 
12138 		memcpy(copy, value, size);
12139 
12140 		// replace old string
12141 		if (var->value) impl::xml_memory::deallocate(var->value);
12142 		var->value = copy;
12143 
12144 		return true;
12145 	}
12146 
set(const xpath_node_set & value)12147 	PUGI__FN bool xpath_variable::set(const xpath_node_set& value)
12148 	{
12149 		if (_type != xpath_type_node_set) return false;
12150 
12151 		static_cast<impl::xpath_variable_node_set*>(this)->value = value;
12152 		return true;
12153 	}
12154 
xpath_variable_set()12155 	PUGI__FN xpath_variable_set::xpath_variable_set()
12156 	{
12157 		for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
12158 			_data[i] = 0;
12159 	}
12160 
~xpath_variable_set()12161 	PUGI__FN xpath_variable_set::~xpath_variable_set()
12162 	{
12163 		for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
12164 			_destroy(_data[i]);
12165 	}
12166 
xpath_variable_set(const xpath_variable_set & rhs)12167 	PUGI__FN xpath_variable_set::xpath_variable_set(const xpath_variable_set& rhs)
12168 	{
12169 		for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
12170 			_data[i] = 0;
12171 
12172 		_assign(rhs);
12173 	}
12174 
operator =(const xpath_variable_set & rhs)12175 	PUGI__FN xpath_variable_set& xpath_variable_set::operator=(const xpath_variable_set& rhs)
12176 	{
12177 		if (this == &rhs) return *this;
12178 
12179 		_assign(rhs);
12180 
12181 		return *this;
12182 	}
12183 
12184 #ifdef PUGIXML_HAS_MOVE
xpath_variable_set(xpath_variable_set && rhs)12185 	PUGI__FN xpath_variable_set::xpath_variable_set(xpath_variable_set&& rhs)
12186 	{
12187 		for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
12188 		{
12189 			_data[i] = rhs._data[i];
12190 			rhs._data[i] = 0;
12191 		}
12192 	}
12193 
operator =(xpath_variable_set && rhs)12194 	PUGI__FN xpath_variable_set& xpath_variable_set::operator=(xpath_variable_set&& rhs)
12195 	{
12196 		for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
12197 		{
12198 			_destroy(_data[i]);
12199 
12200 			_data[i] = rhs._data[i];
12201 			rhs._data[i] = 0;
12202 		}
12203 
12204 		return *this;
12205 	}
12206 #endif
12207 
_assign(const xpath_variable_set & rhs)12208 	PUGI__FN void xpath_variable_set::_assign(const xpath_variable_set& rhs)
12209 	{
12210 		xpath_variable_set temp;
12211 
12212 		for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
12213 			if (rhs._data[i] && !_clone(rhs._data[i], &temp._data[i]))
12214 				return;
12215 
12216 		_swap(temp);
12217 	}
12218 
_swap(xpath_variable_set & rhs)12219 	PUGI__FN void xpath_variable_set::_swap(xpath_variable_set& rhs)
12220 	{
12221 		for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
12222 		{
12223 			xpath_variable* chain = _data[i];
12224 
12225 			_data[i] = rhs._data[i];
12226 			rhs._data[i] = chain;
12227 		}
12228 	}
12229 
_find(const char_t * name) const12230 	PUGI__FN xpath_variable* xpath_variable_set::_find(const char_t* name) const
12231 	{
12232 		const size_t hash_size = sizeof(_data) / sizeof(_data[0]);
12233 		size_t hash = impl::hash_string(name) % hash_size;
12234 
12235 		// look for existing variable
12236 		for (xpath_variable* var = _data[hash]; var; var = var->_next)
12237 			if (impl::strequal(var->name(), name))
12238 				return var;
12239 
12240 		return 0;
12241 	}
12242 
_clone(xpath_variable * var,xpath_variable ** out_result)12243 	PUGI__FN bool xpath_variable_set::_clone(xpath_variable* var, xpath_variable** out_result)
12244 	{
12245 		xpath_variable* last = 0;
12246 
12247 		while (var)
12248 		{
12249 			// allocate storage for new variable
12250 			xpath_variable* nvar = impl::new_xpath_variable(var->_type, var->name());
12251 			if (!nvar) return false;
12252 
12253 			// link the variable to the result immediately to handle failures gracefully
12254 			if (last)
12255 				last->_next = nvar;
12256 			else
12257 				*out_result = nvar;
12258 
12259 			last = nvar;
12260 
12261 			// copy the value; this can fail due to out-of-memory conditions
12262 			if (!impl::copy_xpath_variable(nvar, var)) return false;
12263 
12264 			var = var->_next;
12265 		}
12266 
12267 		return true;
12268 	}
12269 
_destroy(xpath_variable * var)12270 	PUGI__FN void xpath_variable_set::_destroy(xpath_variable* var)
12271 	{
12272 		while (var)
12273 		{
12274 			xpath_variable* next = var->_next;
12275 
12276 			impl::delete_xpath_variable(var->_type, var);
12277 
12278 			var = next;
12279 		}
12280 	}
12281 
add(const char_t * name,xpath_value_type type)12282 	PUGI__FN xpath_variable* xpath_variable_set::add(const char_t* name, xpath_value_type type)
12283 	{
12284 		const size_t hash_size = sizeof(_data) / sizeof(_data[0]);
12285 		size_t hash = impl::hash_string(name) % hash_size;
12286 
12287 		// look for existing variable
12288 		for (xpath_variable* var = _data[hash]; var; var = var->_next)
12289 			if (impl::strequal(var->name(), name))
12290 				return var->type() == type ? var : 0;
12291 
12292 		// add new variable
12293 		xpath_variable* result = impl::new_xpath_variable(type, name);
12294 
12295 		if (result)
12296 		{
12297 			result->_next = _data[hash];
12298 
12299 			_data[hash] = result;
12300 		}
12301 
12302 		return result;
12303 	}
12304 
set(const char_t * name,bool value)12305 	PUGI__FN bool xpath_variable_set::set(const char_t* name, bool value)
12306 	{
12307 		xpath_variable* var = add(name, xpath_type_boolean);
12308 		return var ? var->set(value) : false;
12309 	}
12310 
set(const char_t * name,double value)12311 	PUGI__FN bool xpath_variable_set::set(const char_t* name, double value)
12312 	{
12313 		xpath_variable* var = add(name, xpath_type_number);
12314 		return var ? var->set(value) : false;
12315 	}
12316 
set(const char_t * name,const char_t * value)12317 	PUGI__FN bool xpath_variable_set::set(const char_t* name, const char_t* value)
12318 	{
12319 		xpath_variable* var = add(name, xpath_type_string);
12320 		return var ? var->set(value) : false;
12321 	}
12322 
set(const char_t * name,const xpath_node_set & value)12323 	PUGI__FN bool xpath_variable_set::set(const char_t* name, const xpath_node_set& value)
12324 	{
12325 		xpath_variable* var = add(name, xpath_type_node_set);
12326 		return var ? var->set(value) : false;
12327 	}
12328 
get(const char_t * name)12329 	PUGI__FN xpath_variable* xpath_variable_set::get(const char_t* name)
12330 	{
12331 		return _find(name);
12332 	}
12333 
get(const char_t * name) const12334 	PUGI__FN const xpath_variable* xpath_variable_set::get(const char_t* name) const
12335 	{
12336 		return _find(name);
12337 	}
12338 
xpath_query(const char_t * query,xpath_variable_set * variables)12339 	PUGI__FN xpath_query::xpath_query(const char_t* query, xpath_variable_set* variables): _impl(0)
12340 	{
12341 		impl::xpath_query_impl* qimpl = impl::xpath_query_impl::create();
12342 
12343 		if (!qimpl)
12344 		{
12345 		#ifdef PUGIXML_NO_EXCEPTIONS
12346 			_result.error = "Out of memory";
12347 		#else
12348 			throw std::bad_alloc();
12349 		#endif
12350 		}
12351 		else
12352 		{
12353 			using impl::auto_deleter; // MSVC7 workaround
12354 			auto_deleter<impl::xpath_query_impl> impl(qimpl, impl::xpath_query_impl::destroy);
12355 
12356 			qimpl->root = impl::xpath_parser::parse(query, variables, &qimpl->alloc, &_result);
12357 
12358 			if (qimpl->root)
12359 			{
12360 				qimpl->root->optimize(&qimpl->alloc);
12361 
12362 				_impl = impl.release();
12363 				_result.error = 0;
12364 			}
12365 		}
12366 	}
12367 
xpath_query()12368 	PUGI__FN xpath_query::xpath_query(): _impl(0)
12369 	{
12370 	}
12371 
~xpath_query()12372 	PUGI__FN xpath_query::~xpath_query()
12373 	{
12374 		if (_impl)
12375 			impl::xpath_query_impl::destroy(static_cast<impl::xpath_query_impl*>(_impl));
12376 	}
12377 
12378 #ifdef PUGIXML_HAS_MOVE
xpath_query(xpath_query && rhs)12379 	PUGI__FN xpath_query::xpath_query(xpath_query&& rhs)
12380 	{
12381 		_impl = rhs._impl;
12382 		_result = rhs._result;
12383 		rhs._impl = 0;
12384 		rhs._result = xpath_parse_result();
12385 	}
12386 
operator =(xpath_query && rhs)12387 	PUGI__FN xpath_query& xpath_query::operator=(xpath_query&& rhs)
12388 	{
12389 		if (this == &rhs) return *this;
12390 
12391 		if (_impl)
12392 			impl::xpath_query_impl::destroy(static_cast<impl::xpath_query_impl*>(_impl));
12393 
12394 		_impl = rhs._impl;
12395 		_result = rhs._result;
12396 		rhs._impl = 0;
12397 		rhs._result = xpath_parse_result();
12398 
12399 		return *this;
12400 	}
12401 #endif
12402 
return_type() const12403 	PUGI__FN xpath_value_type xpath_query::return_type() const
12404 	{
12405 		if (!_impl) return xpath_type_none;
12406 
12407 		return static_cast<impl::xpath_query_impl*>(_impl)->root->rettype();
12408 	}
12409 
evaluate_boolean(const xpath_node & n) const12410 	PUGI__FN bool xpath_query::evaluate_boolean(const xpath_node& n) const
12411 	{
12412 		if (!_impl) return false;
12413 
12414 		impl::xpath_context c(n, 1, 1);
12415 		impl::xpath_stack_data sd;
12416 
12417 	#ifdef PUGIXML_NO_EXCEPTIONS
12418 		if (setjmp(sd.error_handler)) return false;
12419 	#endif
12420 
12421 		return static_cast<impl::xpath_query_impl*>(_impl)->root->eval_boolean(c, sd.stack);
12422 	}
12423 
evaluate_number(const xpath_node & n) const12424 	PUGI__FN double xpath_query::evaluate_number(const xpath_node& n) const
12425 	{
12426 		if (!_impl) return impl::gen_nan();
12427 
12428 		impl::xpath_context c(n, 1, 1);
12429 		impl::xpath_stack_data sd;
12430 
12431 	#ifdef PUGIXML_NO_EXCEPTIONS
12432 		if (setjmp(sd.error_handler)) return impl::gen_nan();
12433 	#endif
12434 
12435 		return static_cast<impl::xpath_query_impl*>(_impl)->root->eval_number(c, sd.stack);
12436 	}
12437 
12438 #ifndef PUGIXML_NO_STL
evaluate_string(const xpath_node & n) const12439 	PUGI__FN string_t xpath_query::evaluate_string(const xpath_node& n) const
12440 	{
12441 		impl::xpath_stack_data sd;
12442 
12443 		impl::xpath_string r = impl::evaluate_string_impl(static_cast<impl::xpath_query_impl*>(_impl), n, sd);
12444 
12445 		return string_t(r.c_str(), r.length());
12446 	}
12447 #endif
12448 
evaluate_string(char_t * buffer,size_t capacity,const xpath_node & n) const12449 	PUGI__FN size_t xpath_query::evaluate_string(char_t* buffer, size_t capacity, const xpath_node& n) const
12450 	{
12451 		impl::xpath_stack_data sd;
12452 
12453 		impl::xpath_string r = impl::evaluate_string_impl(static_cast<impl::xpath_query_impl*>(_impl), n, sd);
12454 
12455 		size_t full_size = r.length() + 1;
12456 
12457 		if (capacity > 0)
12458 		{
12459 			size_t size = (full_size < capacity) ? full_size : capacity;
12460 			assert(size > 0);
12461 
12462 			memcpy(buffer, r.c_str(), (size - 1) * sizeof(char_t));
12463 			buffer[size - 1] = 0;
12464 		}
12465 
12466 		return full_size;
12467 	}
12468 
evaluate_node_set(const xpath_node & n) const12469 	PUGI__FN xpath_node_set xpath_query::evaluate_node_set(const xpath_node& n) const
12470 	{
12471 		impl::xpath_ast_node* root = impl::evaluate_node_set_prepare(static_cast<impl::xpath_query_impl*>(_impl));
12472 		if (!root) return xpath_node_set();
12473 
12474 		impl::xpath_context c(n, 1, 1);
12475 		impl::xpath_stack_data sd;
12476 
12477 	#ifdef PUGIXML_NO_EXCEPTIONS
12478 		if (setjmp(sd.error_handler)) return xpath_node_set();
12479 	#endif
12480 
12481 		impl::xpath_node_set_raw r = root->eval_node_set(c, sd.stack, impl::nodeset_eval_all);
12482 
12483 		return xpath_node_set(r.begin(), r.end(), r.type());
12484 	}
12485 
evaluate_node(const xpath_node & n) const12486 	PUGI__FN xpath_node xpath_query::evaluate_node(const xpath_node& n) const
12487 	{
12488 		impl::xpath_ast_node* root = impl::evaluate_node_set_prepare(static_cast<impl::xpath_query_impl*>(_impl));
12489 		if (!root) return xpath_node();
12490 
12491 		impl::xpath_context c(n, 1, 1);
12492 		impl::xpath_stack_data sd;
12493 
12494 	#ifdef PUGIXML_NO_EXCEPTIONS
12495 		if (setjmp(sd.error_handler)) return xpath_node();
12496 	#endif
12497 
12498 		impl::xpath_node_set_raw r = root->eval_node_set(c, sd.stack, impl::nodeset_eval_first);
12499 
12500 		return r.first();
12501 	}
12502 
result() const12503 	PUGI__FN const xpath_parse_result& xpath_query::result() const
12504 	{
12505 		return _result;
12506 	}
12507 
unspecified_bool_xpath_query(xpath_query ***)12508 	PUGI__FN static void unspecified_bool_xpath_query(xpath_query***)
12509 	{
12510 	}
12511 
operator xpath_query::unspecified_bool_type() const12512 	PUGI__FN xpath_query::operator xpath_query::unspecified_bool_type() const
12513 	{
12514 		return _impl ? unspecified_bool_xpath_query : 0;
12515 	}
12516 
operator !() const12517 	PUGI__FN bool xpath_query::operator!() const
12518 	{
12519 		return !_impl;
12520 	}
12521 
select_node(const char_t * query,xpath_variable_set * variables) const12522 	PUGI__FN xpath_node xml_node::select_node(const char_t* query, xpath_variable_set* variables) const
12523 	{
12524 		xpath_query q(query, variables);
12525 		return select_node(q);
12526 	}
12527 
select_node(const xpath_query & query) const12528 	PUGI__FN xpath_node xml_node::select_node(const xpath_query& query) const
12529 	{
12530 		return query.evaluate_node(*this);
12531 	}
12532 
select_nodes(const char_t * query,xpath_variable_set * variables) const12533 	PUGI__FN xpath_node_set xml_node::select_nodes(const char_t* query, xpath_variable_set* variables) const
12534 	{
12535 		xpath_query q(query, variables);
12536 		return select_nodes(q);
12537 	}
12538 
select_nodes(const xpath_query & query) const12539 	PUGI__FN xpath_node_set xml_node::select_nodes(const xpath_query& query) const
12540 	{
12541 		return query.evaluate_node_set(*this);
12542 	}
12543 
select_single_node(const char_t * query,xpath_variable_set * variables) const12544 	PUGI__FN xpath_node xml_node::select_single_node(const char_t* query, xpath_variable_set* variables) const
12545 	{
12546 		xpath_query q(query, variables);
12547 		return select_single_node(q);
12548 	}
12549 
select_single_node(const xpath_query & query) const12550 	PUGI__FN xpath_node xml_node::select_single_node(const xpath_query& query) const
12551 	{
12552 		return query.evaluate_node(*this);
12553 	}
12554 }
12555 
12556 #endif
12557 
12558 #ifdef __BORLANDC__
12559 #	pragma option pop
12560 #endif
12561 
12562 // Intel C++ does not properly keep warning state for function templates,
12563 // so popping warning state at the end of translation unit leads to warnings in the middle.
12564 #if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
12565 #	pragma warning(pop)
12566 #endif
12567 
12568 // Undefine all local macros (makes sure we're not leaking macros in header-only mode)
12569 #undef PUGI__NO_INLINE
12570 #undef PUGI__UNLIKELY
12571 #undef PUGI__STATIC_ASSERT
12572 #undef PUGI__DMC_VOLATILE
12573 #undef PUGI__MSVC_CRT_VERSION
12574 #undef PUGI__NS_BEGIN
12575 #undef PUGI__NS_END
12576 #undef PUGI__FN
12577 #undef PUGI__FN_NO_INLINE
12578 #undef PUGI__GETHEADER_IMPL
12579 #undef PUGI__GETPAGE_IMPL
12580 #undef PUGI__GETPAGE
12581 #undef PUGI__NODETYPE
12582 #undef PUGI__IS_CHARTYPE_IMPL
12583 #undef PUGI__IS_CHARTYPE
12584 #undef PUGI__IS_CHARTYPEX
12585 #undef PUGI__ENDSWITH
12586 #undef PUGI__SKIPWS
12587 #undef PUGI__OPTSET
12588 #undef PUGI__PUSHNODE
12589 #undef PUGI__POPNODE
12590 #undef PUGI__SCANFOR
12591 #undef PUGI__SCANWHILE
12592 #undef PUGI__SCANWHILE_UNROLL
12593 #undef PUGI__ENDSEG
12594 #undef PUGI__THROW_ERROR
12595 #undef PUGI__CHECK_ERROR
12596 
12597 #endif
12598 
12599 /**
12600  * Copyright (c) 2006-2016 Arseny Kapoulkine
12601  *
12602  * Permission is hereby granted, free of charge, to any person
12603  * obtaining a copy of this software and associated documentation
12604  * files (the "Software"), to deal in the Software without
12605  * restriction, including without limitation the rights to use,
12606  * copy, modify, merge, publish, distribute, sublicense, and/or sell
12607  * copies of the Software, and to permit persons to whom the
12608  * Software is furnished to do so, subject to the following
12609  * conditions:
12610  *
12611  * The above copyright notice and this permission notice shall be
12612  * included in all copies or substantial portions of the Software.
12613  *
12614  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
12615  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
12616  * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
12617  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
12618  * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
12619  * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
12620  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
12621  * OTHER DEALINGS IN THE SOFTWARE.
12622  */
12623