1 /**
2  * pugixml parser - version 1.9
3  * --------------------------------------------------------
4  * Copyright (C) 2006-2018, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com)
5  * Report bugs and download new versions at http://pugixml.org/
6  *
7  * This library is distributed under the MIT License. See notice at the end
8  * of this file.
9  *
10  * This work is based on the pugxml parser, which is:
11  * Copyright (C) 2003, by Kristen Wegner (kristen@tima.net)
12  */
13 
14 #ifndef SOURCE_PUGIXML_CPP
15 #define SOURCE_PUGIXML_CPP
16 
17 #include "pugixml.hpp"
18 
19 #include <stdlib.h>
20 #include <stdio.h>
21 #include <string.h>
22 #include <assert.h>
23 #include <limits.h>
24 
25 #ifdef PUGIXML_WCHAR_MODE
26 #	include <wchar.h>
27 #endif
28 
29 #ifndef PUGIXML_NO_XPATH
30 #	include <math.h>
31 #	include <float.h>
32 #endif
33 
34 #ifndef PUGIXML_NO_STL
35 #	include <istream>
36 #	include <ostream>
37 #	include <string>
38 #endif
39 
40 // For placement new
41 #include <new>
42 
43 #ifdef _MSC_VER
44 #	pragma warning(push)
45 #	pragma warning(disable: 4127) // conditional expression is constant
46 #	pragma warning(disable: 4324) // structure was padded due to __declspec(align())
47 #	pragma warning(disable: 4702) // unreachable code
48 #	pragma warning(disable: 4996) // this function or variable may be unsafe
49 #endif
50 
51 #if defined(_MSC_VER) && defined(__c2__)
52 #	pragma clang diagnostic push
53 #	pragma clang diagnostic ignored "-Wdeprecated" // this function or variable may be unsafe
54 #endif
55 
56 #ifdef __INTEL_COMPILER
57 #	pragma warning(disable: 177) // function was declared but never referenced
58 #	pragma warning(disable: 279) // controlling expression is constant
59 #	pragma warning(disable: 1478 1786) // function was declared "deprecated"
60 #	pragma warning(disable: 1684) // conversion from pointer to same-sized integral type
61 #endif
62 
63 #if defined(__BORLANDC__) && defined(PUGIXML_HEADER_ONLY)
64 #	pragma warn -8080 // symbol is declared but never used; disabling this inside push/pop bracket does not make the warning go away
65 #endif
66 
67 #ifdef __BORLANDC__
68 #	pragma option push
69 #	pragma warn -8008 // condition is always false
70 #	pragma warn -8066 // unreachable code
71 #endif
72 
73 #ifdef __SNC__
74 // Using diag_push/diag_pop does not disable the warnings inside templates due to a compiler bug
75 #	pragma diag_suppress=178 // function was declared but never referenced
76 #	pragma diag_suppress=237 // controlling expression is constant
77 #endif
78 
79 #ifdef __TI_COMPILER_VERSION__
80 #	pragma diag_suppress 179 // function was declared but never referenced
81 #endif
82 
83 // Inlining controls
84 #if defined(_MSC_VER) && _MSC_VER >= 1300
85 #	define PUGI__NO_INLINE __declspec(noinline)
86 #elif defined(__GNUC__)
87 #	define PUGI__NO_INLINE __attribute__((noinline))
88 #else
89 #	define PUGI__NO_INLINE
90 #endif
91 
92 // Branch weight controls
93 #if defined(__GNUC__) && !defined(__c2__)
94 #	define PUGI__UNLIKELY(cond) __builtin_expect(cond, 0)
95 #else
96 #	define PUGI__UNLIKELY(cond) (cond)
97 #endif
98 
99 // Simple static assertion
100 #define PUGI__STATIC_ASSERT(cond) { static const char condition_failed[(cond) ? 1 : -1] = {0}; (void)condition_failed[0]; }
101 
102 // Digital Mars C++ bug workaround for passing char loaded from memory via stack
103 #ifdef __DMC__
104 #	define PUGI__DMC_VOLATILE volatile
105 #else
106 #	define PUGI__DMC_VOLATILE
107 #endif
108 
109 // Integer sanitizer workaround; we only apply this for clang since gcc8 has no_sanitize but not unsigned-integer-overflow and produces "attribute directive ignored" warnings
110 #if defined(__clang__) && defined(__has_attribute)
111 #	if __has_attribute(no_sanitize)
112 #		define PUGI__UNSIGNED_OVERFLOW __attribute__((no_sanitize("unsigned-integer-overflow")))
113 #	else
114 #		define PUGI__UNSIGNED_OVERFLOW
115 #	endif
116 #else
117 #	define PUGI__UNSIGNED_OVERFLOW
118 #endif
119 
120 // Borland C++ bug workaround for not defining ::memcpy depending on header include order (can't always use std::memcpy because some compilers don't have it at all)
121 #if defined(__BORLANDC__) && !defined(__MEM_H_USING_LIST)
122 using std::memcpy;
123 using std::memmove;
124 using std::memset;
125 #endif
126 
127 // Some MinGW/GCC versions have headers that erroneously omit LLONG_MIN/LLONG_MAX/ULLONG_MAX definitions from limits.h in some configurations
128 #if defined(PUGIXML_HAS_LONG_LONG) && defined(__GNUC__) && !defined(LLONG_MAX) && !defined(LLONG_MIN) && !defined(ULLONG_MAX)
129 #	define LLONG_MIN (-LLONG_MAX - 1LL)
130 #	define LLONG_MAX __LONG_LONG_MAX__
131 #	define ULLONG_MAX (LLONG_MAX * 2ULL + 1ULL)
132 #endif
133 
134 // In some environments MSVC is a compiler but the CRT lacks certain MSVC-specific features
135 #if defined(_MSC_VER) && !defined(__S3E__)
136 #	define PUGI__MSVC_CRT_VERSION _MSC_VER
137 #endif
138 
139 // Not all platforms have snprintf; we define a wrapper that uses snprintf if possible. This only works with buffers with a known size.
140 #if __cplusplus >= 201103
141 #	define PUGI__SNPRINTF(buf, ...) snprintf(buf, sizeof(buf), __VA_ARGS__)
142 #elif defined(PUGI__MSVC_CRT_VERSION) && PUGI__MSVC_CRT_VERSION >= 1400
143 #	define PUGI__SNPRINTF(buf, ...) _snprintf_s(buf, _countof(buf), _TRUNCATE, __VA_ARGS__)
144 #else
145 #	define PUGI__SNPRINTF sprintf
146 #endif
147 
148 // We put implementation details into an anonymous namespace in source mode, but have to keep it in non-anonymous namespace in header-only mode to prevent binary bloat.
149 #ifdef PUGIXML_HEADER_ONLY
150 #	define PUGI__NS_BEGIN namespace pugi { namespace impl {
151 #	define PUGI__NS_END } }
152 #	define PUGI__FN inline
153 #	define PUGI__FN_NO_INLINE inline
154 #else
155 #	if defined(_MSC_VER) && _MSC_VER < 1300 // MSVC6 seems to have an amusing bug with anonymous namespaces inside namespaces
156 #		define PUGI__NS_BEGIN namespace pugi { namespace impl {
157 #		define PUGI__NS_END } }
158 #	else
159 #		define PUGI__NS_BEGIN namespace pugi { namespace impl { namespace {
160 #		define PUGI__NS_END } } }
161 #	endif
162 #	define PUGI__FN
163 #	define PUGI__FN_NO_INLINE PUGI__NO_INLINE
164 #endif
165 
166 // uintptr_t
167 #if (defined(_MSC_VER) && _MSC_VER < 1600) || (defined(__BORLANDC__) && __BORLANDC__ < 0x561)
168 namespace pugi
169 {
170 #	ifndef _UINTPTR_T_DEFINED
171 	typedef size_t uintptr_t;
172 #	endif
173 
174 	typedef unsigned __int8 uint8_t;
175 	typedef unsigned __int16 uint16_t;
176 	typedef unsigned __int32 uint32_t;
177 }
178 #else
179 #	include <stdint.h>
180 #endif
181 
182 // Memory allocation
183 PUGI__NS_BEGIN
default_allocate(size_t size)184 	PUGI__FN void* default_allocate(size_t size)
185 	{
186 		return malloc(size);
187 	}
188 
default_deallocate(void * ptr)189 	PUGI__FN void default_deallocate(void* ptr)
190 	{
191 		free(ptr);
192 	}
193 
194 	template <typename T>
195 	struct xml_memory_management_function_storage
196 	{
197 		static allocation_function allocate;
198 		static deallocation_function deallocate;
199 	};
200 
201 	// Global allocation functions are stored in class statics so that in header mode linker deduplicates them
202 	// Without a template<> we'll get multiple definitions of the same static
203 	template <typename T> allocation_function xml_memory_management_function_storage<T>::allocate = default_allocate;
204 	template <typename T> deallocation_function xml_memory_management_function_storage<T>::deallocate = default_deallocate;
205 
206 	typedef xml_memory_management_function_storage<int> xml_memory;
207 PUGI__NS_END
208 
209 // String utilities
210 PUGI__NS_BEGIN
211 	// Get string length
strlength(const char_t * s)212 	PUGI__FN size_t strlength(const char_t* s)
213 	{
214 		assert(s);
215 
216 	#ifdef PUGIXML_WCHAR_MODE
217 		return wcslen(s);
218 	#else
219 		return strlen(s);
220 	#endif
221 	}
222 
223 	// Compare two strings
strequal(const char_t * src,const char_t * dst)224 	PUGI__FN bool strequal(const char_t* src, const char_t* dst)
225 	{
226 		assert(src && dst);
227 
228 	#ifdef PUGIXML_WCHAR_MODE
229 		return wcscmp(src, dst) == 0;
230 	#else
231 		return strcmp(src, dst) == 0;
232 	#endif
233 	}
234 
235 	// Compare lhs with [rhs_begin, rhs_end)
strequalrange(const char_t * lhs,const char_t * rhs,size_t count)236 	PUGI__FN bool strequalrange(const char_t* lhs, const char_t* rhs, size_t count)
237 	{
238 		for (size_t i = 0; i < count; ++i)
239 			if (lhs[i] != rhs[i])
240 				return false;
241 
242 		return lhs[count] == 0;
243 	}
244 
245 	// Get length of wide string, even if CRT lacks wide character support
strlength_wide(const wchar_t * s)246 	PUGI__FN size_t strlength_wide(const wchar_t* s)
247 	{
248 		assert(s);
249 
250 	#ifdef PUGIXML_WCHAR_MODE
251 		return wcslen(s);
252 	#else
253 		const wchar_t* end = s;
254 		while (*end) end++;
255 		return static_cast<size_t>(end - s);
256 	#endif
257 	}
258 PUGI__NS_END
259 
260 // auto_ptr-like object for exception recovery
261 PUGI__NS_BEGIN
262 	template <typename T> struct auto_deleter
263 	{
264 		typedef void (*D)(T*);
265 
266 		T* data;
267 		D deleter;
268 
auto_deleterauto_deleter269 		auto_deleter(T* data_, D deleter_): data(data_), deleter(deleter_)
270 		{
271 		}
272 
~auto_deleterauto_deleter273 		~auto_deleter()
274 		{
275 			if (data) deleter(data);
276 		}
277 
releaseauto_deleter278 		T* release()
279 		{
280 			T* result = data;
281 			data = 0;
282 			return result;
283 		}
284 	};
285 PUGI__NS_END
286 
287 #ifdef PUGIXML_COMPACT
288 PUGI__NS_BEGIN
289 	class compact_hash_table
290 	{
291 	public:
compact_hash_table()292 		compact_hash_table(): _items(0), _capacity(0), _count(0)
293 		{
294 		}
295 
clear()296 		void clear()
297 		{
298 			if (_items)
299 			{
300 				xml_memory::deallocate(_items);
301 				_items = 0;
302 				_capacity = 0;
303 				_count = 0;
304 			}
305 		}
306 
find(const void * key)307 		void* find(const void* key)
308 		{
309 			if (_capacity == 0) return 0;
310 
311 			item_t* item = get_item(key);
312 			assert(item);
313 			assert(item->key == key || (item->key == 0 && item->value == 0));
314 
315 			return item->value;
316 		}
317 
insert(const void * key,void * value)318 		void insert(const void* key, void* value)
319 		{
320 			assert(_capacity != 0 && _count < _capacity - _capacity / 4);
321 
322 			item_t* item = get_item(key);
323 			assert(item);
324 
325 			if (item->key == 0)
326 			{
327 				_count++;
328 				item->key = key;
329 			}
330 
331 			item->value = value;
332 		}
333 
reserve(size_t extra=16)334 		bool reserve(size_t extra = 16)
335 		{
336 			if (_count + extra >= _capacity - _capacity / 4)
337 				return rehash(_count + extra);
338 
339 			return true;
340 		}
341 
342 	private:
343 		struct item_t
344 		{
345 			const void* key;
346 			void* value;
347 		};
348 
349 		item_t* _items;
350 		size_t _capacity;
351 
352 		size_t _count;
353 
354 		bool rehash(size_t count);
355 
get_item(const void * key)356 		item_t* get_item(const void* key)
357 		{
358 			assert(key);
359 			assert(_capacity > 0);
360 
361 			size_t hashmod = _capacity - 1;
362 			size_t bucket = hash(key) & hashmod;
363 
364 			for (size_t probe = 0; probe <= hashmod; ++probe)
365 			{
366 				item_t& probe_item = _items[bucket];
367 
368 				if (probe_item.key == key || probe_item.key == 0)
369 					return &probe_item;
370 
371 				// hash collision, quadratic probing
372 				bucket = (bucket + probe + 1) & hashmod;
373 			}
374 
375 			assert(false && "Hash table is full"); // unreachable
376 			return 0;
377 		}
378 
hash(const void * key)379 		static PUGI__UNSIGNED_OVERFLOW unsigned int hash(const void* key)
380 		{
381 			unsigned int h = static_cast<unsigned int>(reinterpret_cast<uintptr_t>(key));
382 
383 			// MurmurHash3 32-bit finalizer
384 			h ^= h >> 16;
385 			h *= 0x85ebca6bu;
386 			h ^= h >> 13;
387 			h *= 0xc2b2ae35u;
388 			h ^= h >> 16;
389 
390 			return h;
391 		}
392 	};
393 
rehash(size_t count)394 	PUGI__FN_NO_INLINE bool compact_hash_table::rehash(size_t count)
395 	{
396 		size_t capacity = 32;
397 		while (count >= capacity - capacity / 4)
398 			capacity *= 2;
399 
400 		compact_hash_table rt;
401 		rt._capacity = capacity;
402 		rt._items = static_cast<item_t*>(xml_memory::allocate(sizeof(item_t) * capacity));
403 
404 		if (!rt._items)
405 			return false;
406 
407 		memset(rt._items, 0, sizeof(item_t) * capacity);
408 
409 		for (size_t i = 0; i < _capacity; ++i)
410 			if (_items[i].key)
411 				rt.insert(_items[i].key, _items[i].value);
412 
413 		if (_items)
414 			xml_memory::deallocate(_items);
415 
416 		_capacity = capacity;
417 		_items = rt._items;
418 
419 		assert(_count == rt._count);
420 
421 		return true;
422 	}
423 
424 PUGI__NS_END
425 #endif
426 
427 PUGI__NS_BEGIN
428 #ifdef PUGIXML_COMPACT
429 	static const uintptr_t xml_memory_block_alignment = 4;
430 #else
431 	static const uintptr_t xml_memory_block_alignment = sizeof(void*);
432 #endif
433 
434 	// extra metadata bits
435 	static const uintptr_t xml_memory_page_contents_shared_mask = 64;
436 	static const uintptr_t xml_memory_page_name_allocated_mask = 32;
437 	static const uintptr_t xml_memory_page_value_allocated_mask = 16;
438 	static const uintptr_t xml_memory_page_type_mask = 15;
439 
440 	// combined masks for string uniqueness
441 	static const uintptr_t xml_memory_page_name_allocated_or_shared_mask = xml_memory_page_name_allocated_mask | xml_memory_page_contents_shared_mask;
442 	static const uintptr_t xml_memory_page_value_allocated_or_shared_mask = xml_memory_page_value_allocated_mask | xml_memory_page_contents_shared_mask;
443 
444 #ifdef PUGIXML_COMPACT
445 	#define PUGI__GETHEADER_IMPL(object, page, flags) // unused
446 	#define PUGI__GETPAGE_IMPL(header) (header).get_page()
447 #else
448 	#define PUGI__GETHEADER_IMPL(object, page, flags) (((reinterpret_cast<char*>(object) - reinterpret_cast<char*>(page)) << 8) | (flags))
449 	// this macro casts pointers through void* to avoid 'cast increases required alignment of target type' warnings
450 	#define PUGI__GETPAGE_IMPL(header) static_cast<impl::xml_memory_page*>(const_cast<void*>(static_cast<const void*>(reinterpret_cast<const char*>(&header) - (header >> 8))))
451 #endif
452 
453 	#define PUGI__GETPAGE(n) PUGI__GETPAGE_IMPL((n)->header)
454 	#define PUGI__NODETYPE(n) static_cast<xml_node_type>((n)->header & impl::xml_memory_page_type_mask)
455 
456 	struct xml_allocator;
457 
458 	struct xml_memory_page
459 	{
constructxml_memory_page460 		static xml_memory_page* construct(void* memory)
461 		{
462 			xml_memory_page* result = static_cast<xml_memory_page*>(memory);
463 
464 			result->allocator = 0;
465 			result->prev = 0;
466 			result->next = 0;
467 			result->busy_size = 0;
468 			result->freed_size = 0;
469 
470 		#ifdef PUGIXML_COMPACT
471 			result->compact_string_base = 0;
472 			result->compact_shared_parent = 0;
473 			result->compact_page_marker = 0;
474 		#endif
475 
476 			return result;
477 		}
478 
479 		xml_allocator* allocator;
480 
481 		xml_memory_page* prev;
482 		xml_memory_page* next;
483 
484 		size_t busy_size;
485 		size_t freed_size;
486 
487 	#ifdef PUGIXML_COMPACT
488 		char_t* compact_string_base;
489 		void* compact_shared_parent;
490 		uint32_t* compact_page_marker;
491 	#endif
492 	};
493 
494 	static const size_t xml_memory_page_size =
495 	#ifdef PUGIXML_MEMORY_PAGE_SIZE
496 		(PUGIXML_MEMORY_PAGE_SIZE)
497 	#else
498 		32768
499 	#endif
500 		- sizeof(xml_memory_page);
501 
502 	struct xml_memory_string_header
503 	{
504 		uint16_t page_offset; // offset from page->data
505 		uint16_t full_size; // 0 if string occupies whole page
506 	};
507 
508 	struct xml_allocator
509 	{
xml_allocatorxml_allocator510 		xml_allocator(xml_memory_page* root): _root(root), _busy_size(root->busy_size)
511 		{
512 		#ifdef PUGIXML_COMPACT
513 			_hash = 0;
514 		#endif
515 		}
516 
allocate_pagexml_allocator517 		xml_memory_page* allocate_page(size_t data_size)
518 		{
519 			size_t size = sizeof(xml_memory_page) + data_size;
520 
521 			// allocate block with some alignment, leaving memory for worst-case padding
522 			void* memory = xml_memory::allocate(size);
523 			if (!memory) return 0;
524 
525 			// prepare page structure
526 			xml_memory_page* page = xml_memory_page::construct(memory);
527 			assert(page);
528 
529 			page->allocator = _root->allocator;
530 
531 			return page;
532 		}
533 
deallocate_pagexml_allocator534 		static void deallocate_page(xml_memory_page* page)
535 		{
536 			xml_memory::deallocate(page);
537 		}
538 
539 		void* allocate_memory_oob(size_t size, xml_memory_page*& out_page);
540 
allocate_memoryxml_allocator541 		void* allocate_memory(size_t size, xml_memory_page*& out_page)
542 		{
543 			if (PUGI__UNLIKELY(_busy_size + size > xml_memory_page_size))
544 				return allocate_memory_oob(size, out_page);
545 
546 			void* buf = reinterpret_cast<char*>(_root) + sizeof(xml_memory_page) + _busy_size;
547 
548 			_busy_size += size;
549 
550 			out_page = _root;
551 
552 			return buf;
553 		}
554 
555 	#ifdef PUGIXML_COMPACT
allocate_objectxml_allocator556 		void* allocate_object(size_t size, xml_memory_page*& out_page)
557 		{
558 			void* result = allocate_memory(size + sizeof(uint32_t), out_page);
559 			if (!result) return 0;
560 
561 			// adjust for marker
562 			ptrdiff_t offset = static_cast<char*>(result) - reinterpret_cast<char*>(out_page->compact_page_marker);
563 
564 			if (PUGI__UNLIKELY(static_cast<uintptr_t>(offset) >= 256 * xml_memory_block_alignment))
565 			{
566 				// insert new marker
567 				uint32_t* marker = static_cast<uint32_t*>(result);
568 
569 				*marker = static_cast<uint32_t>(reinterpret_cast<char*>(marker) - reinterpret_cast<char*>(out_page));
570 				out_page->compact_page_marker = marker;
571 
572 				// since we don't reuse the page space until we reallocate it, we can just pretend that we freed the marker block
573 				// this will make sure deallocate_memory correctly tracks the size
574 				out_page->freed_size += sizeof(uint32_t);
575 
576 				return marker + 1;
577 			}
578 			else
579 			{
580 				// roll back uint32_t part
581 				_busy_size -= sizeof(uint32_t);
582 
583 				return result;
584 			}
585 		}
586 	#else
allocate_objectxml_allocator587 		void* allocate_object(size_t size, xml_memory_page*& out_page)
588 		{
589 			return allocate_memory(size, out_page);
590 		}
591 	#endif
592 
deallocate_memoryxml_allocator593 		void deallocate_memory(void* ptr, size_t size, xml_memory_page* page)
594 		{
595 			if (page == _root) page->busy_size = _busy_size;
596 
597 			assert(ptr >= reinterpret_cast<char*>(page) + sizeof(xml_memory_page) && ptr < reinterpret_cast<char*>(page) + sizeof(xml_memory_page) + page->busy_size);
598 			(void)!ptr;
599 
600 			page->freed_size += size;
601 			assert(page->freed_size <= page->busy_size);
602 
603 			if (page->freed_size == page->busy_size)
604 			{
605 				if (page->next == 0)
606 				{
607 					assert(_root == page);
608 
609 					// top page freed, just reset sizes
610 					page->busy_size = 0;
611 					page->freed_size = 0;
612 
613 				#ifdef PUGIXML_COMPACT
614 					// reset compact state to maximize efficiency
615 					page->compact_string_base = 0;
616 					page->compact_shared_parent = 0;
617 					page->compact_page_marker = 0;
618 				#endif
619 
620 					_busy_size = 0;
621 				}
622 				else
623 				{
624 					assert(_root != page);
625 					assert(page->prev);
626 
627 					// remove from the list
628 					page->prev->next = page->next;
629 					page->next->prev = page->prev;
630 
631 					// deallocate
632 					deallocate_page(page);
633 				}
634 			}
635 		}
636 
allocate_stringxml_allocator637 		char_t* allocate_string(size_t length)
638 		{
639 			static const size_t max_encoded_offset = (1 << 16) * xml_memory_block_alignment;
640 
641 			PUGI__STATIC_ASSERT(xml_memory_page_size <= max_encoded_offset);
642 
643 			// allocate memory for string and header block
644 			size_t size = sizeof(xml_memory_string_header) + length * sizeof(char_t);
645 
646 			// round size up to block alignment boundary
647 			size_t full_size = (size + (xml_memory_block_alignment - 1)) & ~(xml_memory_block_alignment - 1);
648 
649 			xml_memory_page* page;
650 			xml_memory_string_header* header = static_cast<xml_memory_string_header*>(allocate_memory(full_size, page));
651 
652 			if (!header) return 0;
653 
654 			// setup header
655 			ptrdiff_t page_offset = reinterpret_cast<char*>(header) - reinterpret_cast<char*>(page) - sizeof(xml_memory_page);
656 
657 			assert(page_offset % xml_memory_block_alignment == 0);
658 			assert(page_offset >= 0 && static_cast<size_t>(page_offset) < max_encoded_offset);
659 			header->page_offset = static_cast<uint16_t>(static_cast<size_t>(page_offset) / xml_memory_block_alignment);
660 
661 			// full_size == 0 for large strings that occupy the whole page
662 			assert(full_size % xml_memory_block_alignment == 0);
663 			assert(full_size < max_encoded_offset || (page->busy_size == full_size && page_offset == 0));
664 			header->full_size = static_cast<uint16_t>(full_size < max_encoded_offset ? full_size / xml_memory_block_alignment : 0);
665 
666 			// round-trip through void* to avoid 'cast increases required alignment of target type' warning
667 			// header is guaranteed a pointer-sized alignment, which should be enough for char_t
668 			return static_cast<char_t*>(static_cast<void*>(header + 1));
669 		}
670 
deallocate_stringxml_allocator671 		void deallocate_string(char_t* string)
672 		{
673 			// this function casts pointers through void* to avoid 'cast increases required alignment of target type' warnings
674 			// we're guaranteed the proper (pointer-sized) alignment on the input string if it was allocated via allocate_string
675 
676 			// get header
677 			xml_memory_string_header* header = static_cast<xml_memory_string_header*>(static_cast<void*>(string)) - 1;
678 			assert(header);
679 
680 			// deallocate
681 			size_t page_offset = sizeof(xml_memory_page) + header->page_offset * xml_memory_block_alignment;
682 			xml_memory_page* page = reinterpret_cast<xml_memory_page*>(static_cast<void*>(reinterpret_cast<char*>(header) - page_offset));
683 
684 			// if full_size == 0 then this string occupies the whole page
685 			size_t full_size = header->full_size == 0 ? page->busy_size : header->full_size * xml_memory_block_alignment;
686 
687 			deallocate_memory(header, full_size, page);
688 		}
689 
reservexml_allocator690 		bool reserve()
691 		{
692 		#ifdef PUGIXML_COMPACT
693 			return _hash->reserve();
694 		#else
695 			return true;
696 		#endif
697 		}
698 
699 		xml_memory_page* _root;
700 		size_t _busy_size;
701 
702 	#ifdef PUGIXML_COMPACT
703 		compact_hash_table* _hash;
704 	#endif
705 	};
706 
allocate_memory_oob(size_t size,xml_memory_page * & out_page)707 	PUGI__FN_NO_INLINE void* xml_allocator::allocate_memory_oob(size_t size, xml_memory_page*& out_page)
708 	{
709 		const size_t large_allocation_threshold = xml_memory_page_size / 4;
710 
711 		xml_memory_page* page = allocate_page(size <= large_allocation_threshold ? xml_memory_page_size : size);
712 		out_page = page;
713 
714 		if (!page) return 0;
715 
716 		if (size <= large_allocation_threshold)
717 		{
718 			_root->busy_size = _busy_size;
719 
720 			// insert page at the end of linked list
721 			page->prev = _root;
722 			_root->next = page;
723 			_root = page;
724 
725 			_busy_size = size;
726 		}
727 		else
728 		{
729 			// insert page before the end of linked list, so that it is deleted as soon as possible
730 			// the last page is not deleted even if it's empty (see deallocate_memory)
731 			assert(_root->prev);
732 
733 			page->prev = _root->prev;
734 			page->next = _root;
735 
736 			_root->prev->next = page;
737 			_root->prev = page;
738 
739 			page->busy_size = size;
740 		}
741 
742 		return reinterpret_cast<char*>(page) + sizeof(xml_memory_page);
743 	}
744 PUGI__NS_END
745 
746 #ifdef PUGIXML_COMPACT
747 PUGI__NS_BEGIN
748 	static const uintptr_t compact_alignment_log2 = 2;
749 	static const uintptr_t compact_alignment = 1 << compact_alignment_log2;
750 
751 	class compact_header
752 	{
753 	public:
compact_header(xml_memory_page * page,unsigned int flags)754 		compact_header(xml_memory_page* page, unsigned int flags)
755 		{
756 			PUGI__STATIC_ASSERT(xml_memory_block_alignment == compact_alignment);
757 
758 			ptrdiff_t offset = (reinterpret_cast<char*>(this) - reinterpret_cast<char*>(page->compact_page_marker));
759 			assert(offset % compact_alignment == 0 && static_cast<uintptr_t>(offset) < 256 * compact_alignment);
760 
761 			_page = static_cast<unsigned char>(offset >> compact_alignment_log2);
762 			_flags = static_cast<unsigned char>(flags);
763 		}
764 
operator &=(uintptr_t mod)765 		void operator&=(uintptr_t mod)
766 		{
767 			_flags &= static_cast<unsigned char>(mod);
768 		}
769 
operator |=(uintptr_t mod)770 		void operator|=(uintptr_t mod)
771 		{
772 			_flags |= static_cast<unsigned char>(mod);
773 		}
774 
operator &(uintptr_t mod) const775 		uintptr_t operator&(uintptr_t mod) const
776 		{
777 			return _flags & mod;
778 		}
779 
get_page() const780 		xml_memory_page* get_page() const
781 		{
782 			// round-trip through void* to silence 'cast increases required alignment of target type' warnings
783 			const char* page_marker = reinterpret_cast<const char*>(this) - (_page << compact_alignment_log2);
784 			const char* page = page_marker - *reinterpret_cast<const uint32_t*>(static_cast<const void*>(page_marker));
785 
786 			return const_cast<xml_memory_page*>(reinterpret_cast<const xml_memory_page*>(static_cast<const void*>(page)));
787 		}
788 
789 	private:
790 		unsigned char _page;
791 		unsigned char _flags;
792 	};
793 
compact_get_page(const void * object,int header_offset)794 	PUGI__FN xml_memory_page* compact_get_page(const void* object, int header_offset)
795 	{
796 		const compact_header* header = reinterpret_cast<const compact_header*>(static_cast<const char*>(object) - header_offset);
797 
798 		return header->get_page();
799 	}
800 
compact_get_value(const void * object)801 	template <int header_offset, typename T> PUGI__FN_NO_INLINE T* compact_get_value(const void* object)
802 	{
803 		return static_cast<T*>(compact_get_page(object, header_offset)->allocator->_hash->find(object));
804 	}
805 
compact_set_value(const void * object,T * value)806 	template <int header_offset, typename T> PUGI__FN_NO_INLINE void compact_set_value(const void* object, T* value)
807 	{
808 		compact_get_page(object, header_offset)->allocator->_hash->insert(object, value);
809 	}
810 
811 	template <typename T, int header_offset, int start = -126> class compact_pointer
812 	{
813 	public:
compact_pointer()814 		compact_pointer(): _data(0)
815 		{
816 		}
817 
operator =(const compact_pointer & rhs)818 		void operator=(const compact_pointer& rhs)
819 		{
820 			*this = rhs + 0;
821 		}
822 
operator =(T * value)823 		void operator=(T* value)
824 		{
825 			if (value)
826 			{
827 				// value is guaranteed to be compact-aligned; 'this' is not
828 				// our decoding is based on 'this' aligned to compact alignment downwards (see operator T*)
829 				// so for negative offsets (e.g. -3) we need to adjust the diff by compact_alignment - 1 to
830 				// compensate for arithmetic shift rounding for negative values
831 				ptrdiff_t diff = reinterpret_cast<char*>(value) - reinterpret_cast<char*>(this);
832 				ptrdiff_t offset = ((diff + int(compact_alignment - 1)) >> compact_alignment_log2) - start;
833 
834 				if (static_cast<uintptr_t>(offset) <= 253)
835 					_data = static_cast<unsigned char>(offset + 1);
836 				else
837 				{
838 					compact_set_value<header_offset>(this, value);
839 
840 					_data = 255;
841 				}
842 			}
843 			else
844 				_data = 0;
845 		}
846 
operator T*() const847 		operator T*() const
848 		{
849 			if (_data)
850 			{
851 				if (_data < 255)
852 				{
853 					uintptr_t base = reinterpret_cast<uintptr_t>(this) & ~(compact_alignment - 1);
854 
855 					return reinterpret_cast<T*>(base + (_data - 1 + start) * compact_alignment);
856 				}
857 				else
858 					return compact_get_value<header_offset, T>(this);
859 			}
860 			else
861 				return 0;
862 		}
863 
operator ->() const864 		T* operator->() const
865 		{
866 			return *this;
867 		}
868 
869 	private:
870 		unsigned char _data;
871 	};
872 
873 	template <typename T, int header_offset> class compact_pointer_parent
874 	{
875 	public:
compact_pointer_parent()876 		compact_pointer_parent(): _data(0)
877 		{
878 		}
879 
operator =(const compact_pointer_parent & rhs)880 		void operator=(const compact_pointer_parent& rhs)
881 		{
882 			*this = rhs + 0;
883 		}
884 
operator =(T * value)885 		void operator=(T* value)
886 		{
887 			if (value)
888 			{
889 				// value is guaranteed to be compact-aligned; 'this' is not
890 				// our decoding is based on 'this' aligned to compact alignment downwards (see operator T*)
891 				// so for negative offsets (e.g. -3) we need to adjust the diff by compact_alignment - 1 to
892 				// compensate for arithmetic shift behavior for negative values
893 				ptrdiff_t diff = reinterpret_cast<char*>(value) - reinterpret_cast<char*>(this);
894 				ptrdiff_t offset = ((diff + int(compact_alignment - 1)) >> compact_alignment_log2) + 65533;
895 
896 				if (static_cast<uintptr_t>(offset) <= 65533)
897 				{
898 					_data = static_cast<unsigned short>(offset + 1);
899 				}
900 				else
901 				{
902 					xml_memory_page* page = compact_get_page(this, header_offset);
903 
904 					if (PUGI__UNLIKELY(page->compact_shared_parent == 0))
905 						page->compact_shared_parent = value;
906 
907 					if (page->compact_shared_parent == value)
908 					{
909 						_data = 65534;
910 					}
911 					else
912 					{
913 						compact_set_value<header_offset>(this, value);
914 
915 						_data = 65535;
916 					}
917 				}
918 			}
919 			else
920 			{
921 				_data = 0;
922 			}
923 		}
924 
operator T*() const925 		operator T*() const
926 		{
927 			if (_data)
928 			{
929 				if (_data < 65534)
930 				{
931 					uintptr_t base = reinterpret_cast<uintptr_t>(this) & ~(compact_alignment - 1);
932 
933 					return reinterpret_cast<T*>(base + (_data - 1 - 65533) * compact_alignment);
934 				}
935 				else if (_data == 65534)
936 					return static_cast<T*>(compact_get_page(this, header_offset)->compact_shared_parent);
937 				else
938 					return compact_get_value<header_offset, T>(this);
939 			}
940 			else
941 				return 0;
942 		}
943 
operator ->() const944 		T* operator->() const
945 		{
946 			return *this;
947 		}
948 
949 	private:
950 		uint16_t _data;
951 	};
952 
953 	template <int header_offset, int base_offset> class compact_string
954 	{
955 	public:
compact_string()956 		compact_string(): _data(0)
957 		{
958 		}
959 
operator =(const compact_string & rhs)960 		void operator=(const compact_string& rhs)
961 		{
962 			*this = rhs + 0;
963 		}
964 
operator =(char_t * value)965 		void operator=(char_t* value)
966 		{
967 			if (value)
968 			{
969 				xml_memory_page* page = compact_get_page(this, header_offset);
970 
971 				if (PUGI__UNLIKELY(page->compact_string_base == 0))
972 					page->compact_string_base = value;
973 
974 				ptrdiff_t offset = value - page->compact_string_base;
975 
976 				if (static_cast<uintptr_t>(offset) < (65535 << 7))
977 				{
978 					// round-trip through void* to silence 'cast increases required alignment of target type' warnings
979 					uint16_t* base = reinterpret_cast<uint16_t*>(static_cast<void*>(reinterpret_cast<char*>(this) - base_offset));
980 
981 					if (*base == 0)
982 					{
983 						*base = static_cast<uint16_t>((offset >> 7) + 1);
984 						_data = static_cast<unsigned char>((offset & 127) + 1);
985 					}
986 					else
987 					{
988 						ptrdiff_t remainder = offset - ((*base - 1) << 7);
989 
990 						if (static_cast<uintptr_t>(remainder) <= 253)
991 						{
992 							_data = static_cast<unsigned char>(remainder + 1);
993 						}
994 						else
995 						{
996 							compact_set_value<header_offset>(this, value);
997 
998 							_data = 255;
999 						}
1000 					}
1001 				}
1002 				else
1003 				{
1004 					compact_set_value<header_offset>(this, value);
1005 
1006 					_data = 255;
1007 				}
1008 			}
1009 			else
1010 			{
1011 				_data = 0;
1012 			}
1013 		}
1014 
operator char_t*() const1015 		operator char_t*() const
1016 		{
1017 			if (_data)
1018 			{
1019 				if (_data < 255)
1020 				{
1021 					xml_memory_page* page = compact_get_page(this, header_offset);
1022 
1023 					// round-trip through void* to silence 'cast increases required alignment of target type' warnings
1024 					const uint16_t* base = reinterpret_cast<const uint16_t*>(static_cast<const void*>(reinterpret_cast<const char*>(this) - base_offset));
1025 					assert(*base);
1026 
1027 					ptrdiff_t offset = ((*base - 1) << 7) + (_data - 1);
1028 
1029 					return page->compact_string_base + offset;
1030 				}
1031 				else
1032 				{
1033 					return compact_get_value<header_offset, char_t>(this);
1034 				}
1035 			}
1036 			else
1037 				return 0;
1038 		}
1039 
1040 	private:
1041 		unsigned char _data;
1042 	};
1043 PUGI__NS_END
1044 #endif
1045 
1046 #ifdef PUGIXML_COMPACT
1047 namespace pugi
1048 {
1049 	struct xml_attribute_struct
1050 	{
xml_attribute_structpugi::xml_attribute_struct1051 		xml_attribute_struct(impl::xml_memory_page* page): header(page, 0), namevalue_base(0)
1052 		{
1053 			PUGI__STATIC_ASSERT(sizeof(xml_attribute_struct) == 8);
1054 		}
1055 
1056 		impl::compact_header header;
1057 
1058 		uint16_t namevalue_base;
1059 
1060 		impl::compact_string<4, 2> name;
1061 		impl::compact_string<5, 3> value;
1062 
1063 		impl::compact_pointer<xml_attribute_struct, 6> prev_attribute_c;
1064 		impl::compact_pointer<xml_attribute_struct, 7, 0> next_attribute;
1065 	};
1066 
1067 	struct xml_node_struct
1068 	{
xml_node_structpugi::xml_node_struct1069 		xml_node_struct(impl::xml_memory_page* page, xml_node_type type): header(page, type), namevalue_base(0)
1070 		{
1071 			PUGI__STATIC_ASSERT(sizeof(xml_node_struct) == 12);
1072 		}
1073 
1074 		impl::compact_header header;
1075 
1076 		uint16_t namevalue_base;
1077 
1078 		impl::compact_string<4, 2> name;
1079 		impl::compact_string<5, 3> value;
1080 
1081 		impl::compact_pointer_parent<xml_node_struct, 6> parent;
1082 
1083 		impl::compact_pointer<xml_node_struct, 8, 0> first_child;
1084 
1085 		impl::compact_pointer<xml_node_struct,  9>    prev_sibling_c;
1086 		impl::compact_pointer<xml_node_struct, 10, 0> next_sibling;
1087 
1088 		impl::compact_pointer<xml_attribute_struct, 11, 0> first_attribute;
1089 	};
1090 }
1091 #else
1092 namespace pugi
1093 {
1094 	struct xml_attribute_struct
1095 	{
1096 		xml_attribute_struct(impl::xml_memory_page* page): name(0), value(0), prev_attribute_c(0), next_attribute(0)
1097 		{
1098 			header = PUGI__GETHEADER_IMPL(this, page, 0);
1099 		}
1100 
1101 		uintptr_t header;
1102 
1103 		char_t*	name;
1104 		char_t*	value;
1105 
1106 		xml_attribute_struct* prev_attribute_c;
1107 		xml_attribute_struct* next_attribute;
1108 	};
1109 
1110 	struct xml_node_struct
1111 	{
1112 		xml_node_struct(impl::xml_memory_page* page, xml_node_type type): name(0), value(0), parent(0), first_child(0), prev_sibling_c(0), next_sibling(0), first_attribute(0)
1113 		{
1114 			header = PUGI__GETHEADER_IMPL(this, page, type);
1115 		}
1116 
1117 		uintptr_t header;
1118 
1119 		char_t* name;
1120 		char_t* value;
1121 
1122 		xml_node_struct* parent;
1123 
1124 		xml_node_struct* first_child;
1125 
1126 		xml_node_struct* prev_sibling_c;
1127 		xml_node_struct* next_sibling;
1128 
1129 		xml_attribute_struct* first_attribute;
1130 	};
1131 }
1132 #endif
1133 
1134 PUGI__NS_BEGIN
1135 	struct xml_extra_buffer
1136 	{
1137 		char_t* buffer;
1138 		xml_extra_buffer* next;
1139 	};
1140 
1141 	struct xml_document_struct: public xml_node_struct, public xml_allocator
1142 	{
xml_document_structxml_document_struct1143 		xml_document_struct(xml_memory_page* page): xml_node_struct(page, node_document), xml_allocator(page), buffer(0), extra_buffers(0)
1144 		{
1145 		}
1146 
1147 		const char_t* buffer;
1148 
1149 		xml_extra_buffer* extra_buffers;
1150 
1151 	#ifdef PUGIXML_COMPACT
1152 		compact_hash_table hash;
1153 	#endif
1154 	};
1155 
get_allocator(const Object * object)1156 	template <typename Object> inline xml_allocator& get_allocator(const Object* object)
1157 	{
1158 		assert(object);
1159 
1160 		return *PUGI__GETPAGE(object)->allocator;
1161 	}
1162 
get_document(const Object * object)1163 	template <typename Object> inline xml_document_struct& get_document(const Object* object)
1164 	{
1165 		assert(object);
1166 
1167 		return *static_cast<xml_document_struct*>(PUGI__GETPAGE(object)->allocator);
1168 	}
1169 PUGI__NS_END
1170 
1171 // Low-level DOM operations
1172 PUGI__NS_BEGIN
allocate_attribute(xml_allocator & alloc)1173 	inline xml_attribute_struct* allocate_attribute(xml_allocator& alloc)
1174 	{
1175 		xml_memory_page* page;
1176 		void* memory = alloc.allocate_object(sizeof(xml_attribute_struct), page);
1177 		if (!memory) return 0;
1178 
1179 		return new (memory) xml_attribute_struct(page);
1180 	}
1181 
allocate_node(xml_allocator & alloc,xml_node_type type)1182 	inline xml_node_struct* allocate_node(xml_allocator& alloc, xml_node_type type)
1183 	{
1184 		xml_memory_page* page;
1185 		void* memory = alloc.allocate_object(sizeof(xml_node_struct), page);
1186 		if (!memory) return 0;
1187 
1188 		return new (memory) xml_node_struct(page, type);
1189 	}
1190 
destroy_attribute(xml_attribute_struct * a,xml_allocator & alloc)1191 	inline void destroy_attribute(xml_attribute_struct* a, xml_allocator& alloc)
1192 	{
1193 		if (a->header & impl::xml_memory_page_name_allocated_mask)
1194 			alloc.deallocate_string(a->name);
1195 
1196 		if (a->header & impl::xml_memory_page_value_allocated_mask)
1197 			alloc.deallocate_string(a->value);
1198 
1199 		alloc.deallocate_memory(a, sizeof(xml_attribute_struct), PUGI__GETPAGE(a));
1200 	}
1201 
destroy_node(xml_node_struct * n,xml_allocator & alloc)1202 	inline void destroy_node(xml_node_struct* n, xml_allocator& alloc)
1203 	{
1204 		if (n->header & impl::xml_memory_page_name_allocated_mask)
1205 			alloc.deallocate_string(n->name);
1206 
1207 		if (n->header & impl::xml_memory_page_value_allocated_mask)
1208 			alloc.deallocate_string(n->value);
1209 
1210 		for (xml_attribute_struct* attr = n->first_attribute; attr; )
1211 		{
1212 			xml_attribute_struct* next = attr->next_attribute;
1213 
1214 			destroy_attribute(attr, alloc);
1215 
1216 			attr = next;
1217 		}
1218 
1219 		for (xml_node_struct* child = n->first_child; child; )
1220 		{
1221 			xml_node_struct* next = child->next_sibling;
1222 
1223 			destroy_node(child, alloc);
1224 
1225 			child = next;
1226 		}
1227 
1228 		alloc.deallocate_memory(n, sizeof(xml_node_struct), PUGI__GETPAGE(n));
1229 	}
1230 
append_node(xml_node_struct * child,xml_node_struct * node)1231 	inline void append_node(xml_node_struct* child, xml_node_struct* node)
1232 	{
1233 		child->parent = node;
1234 
1235 		xml_node_struct* head = node->first_child;
1236 
1237 		if (head)
1238 		{
1239 			xml_node_struct* tail = head->prev_sibling_c;
1240 
1241 			tail->next_sibling = child;
1242 			child->prev_sibling_c = tail;
1243 			head->prev_sibling_c = child;
1244 		}
1245 		else
1246 		{
1247 			node->first_child = child;
1248 			child->prev_sibling_c = child;
1249 		}
1250 	}
1251 
prepend_node(xml_node_struct * child,xml_node_struct * node)1252 	inline void prepend_node(xml_node_struct* child, xml_node_struct* node)
1253 	{
1254 		child->parent = node;
1255 
1256 		xml_node_struct* head = node->first_child;
1257 
1258 		if (head)
1259 		{
1260 			child->prev_sibling_c = head->prev_sibling_c;
1261 			head->prev_sibling_c = child;
1262 		}
1263 		else
1264 			child->prev_sibling_c = child;
1265 
1266 		child->next_sibling = head;
1267 		node->first_child = child;
1268 	}
1269 
insert_node_after(xml_node_struct * child,xml_node_struct * node)1270 	inline void insert_node_after(xml_node_struct* child, xml_node_struct* node)
1271 	{
1272 		xml_node_struct* parent = node->parent;
1273 
1274 		child->parent = parent;
1275 
1276 		if (node->next_sibling)
1277 			node->next_sibling->prev_sibling_c = child;
1278 		else
1279 			parent->first_child->prev_sibling_c = child;
1280 
1281 		child->next_sibling = node->next_sibling;
1282 		child->prev_sibling_c = node;
1283 
1284 		node->next_sibling = child;
1285 	}
1286 
insert_node_before(xml_node_struct * child,xml_node_struct * node)1287 	inline void insert_node_before(xml_node_struct* child, xml_node_struct* node)
1288 	{
1289 		xml_node_struct* parent = node->parent;
1290 
1291 		child->parent = parent;
1292 
1293 		if (node->prev_sibling_c->next_sibling)
1294 			node->prev_sibling_c->next_sibling = child;
1295 		else
1296 			parent->first_child = child;
1297 
1298 		child->prev_sibling_c = node->prev_sibling_c;
1299 		child->next_sibling = node;
1300 
1301 		node->prev_sibling_c = child;
1302 	}
1303 
remove_node(xml_node_struct * node)1304 	inline void remove_node(xml_node_struct* node)
1305 	{
1306 		xml_node_struct* parent = node->parent;
1307 
1308 		if (node->next_sibling)
1309 			node->next_sibling->prev_sibling_c = node->prev_sibling_c;
1310 		else
1311 			parent->first_child->prev_sibling_c = node->prev_sibling_c;
1312 
1313 		if (node->prev_sibling_c->next_sibling)
1314 			node->prev_sibling_c->next_sibling = node->next_sibling;
1315 		else
1316 			parent->first_child = node->next_sibling;
1317 
1318 		node->parent = 0;
1319 		node->prev_sibling_c = 0;
1320 		node->next_sibling = 0;
1321 	}
1322 
append_attribute(xml_attribute_struct * attr,xml_node_struct * node)1323 	inline void append_attribute(xml_attribute_struct* attr, xml_node_struct* node)
1324 	{
1325 		xml_attribute_struct* head = node->first_attribute;
1326 
1327 		if (head)
1328 		{
1329 			xml_attribute_struct* tail = head->prev_attribute_c;
1330 
1331 			tail->next_attribute = attr;
1332 			attr->prev_attribute_c = tail;
1333 			head->prev_attribute_c = attr;
1334 		}
1335 		else
1336 		{
1337 			node->first_attribute = attr;
1338 			attr->prev_attribute_c = attr;
1339 		}
1340 	}
1341 
prepend_attribute(xml_attribute_struct * attr,xml_node_struct * node)1342 	inline void prepend_attribute(xml_attribute_struct* attr, xml_node_struct* node)
1343 	{
1344 		xml_attribute_struct* head = node->first_attribute;
1345 
1346 		if (head)
1347 		{
1348 			attr->prev_attribute_c = head->prev_attribute_c;
1349 			head->prev_attribute_c = attr;
1350 		}
1351 		else
1352 			attr->prev_attribute_c = attr;
1353 
1354 		attr->next_attribute = head;
1355 		node->first_attribute = attr;
1356 	}
1357 
insert_attribute_after(xml_attribute_struct * attr,xml_attribute_struct * place,xml_node_struct * node)1358 	inline void insert_attribute_after(xml_attribute_struct* attr, xml_attribute_struct* place, xml_node_struct* node)
1359 	{
1360 		if (place->next_attribute)
1361 			place->next_attribute->prev_attribute_c = attr;
1362 		else
1363 			node->first_attribute->prev_attribute_c = attr;
1364 
1365 		attr->next_attribute = place->next_attribute;
1366 		attr->prev_attribute_c = place;
1367 		place->next_attribute = attr;
1368 	}
1369 
insert_attribute_before(xml_attribute_struct * attr,xml_attribute_struct * place,xml_node_struct * node)1370 	inline void insert_attribute_before(xml_attribute_struct* attr, xml_attribute_struct* place, xml_node_struct* node)
1371 	{
1372 		if (place->prev_attribute_c->next_attribute)
1373 			place->prev_attribute_c->next_attribute = attr;
1374 		else
1375 			node->first_attribute = attr;
1376 
1377 		attr->prev_attribute_c = place->prev_attribute_c;
1378 		attr->next_attribute = place;
1379 		place->prev_attribute_c = attr;
1380 	}
1381 
remove_attribute(xml_attribute_struct * attr,xml_node_struct * node)1382 	inline void remove_attribute(xml_attribute_struct* attr, xml_node_struct* node)
1383 	{
1384 		if (attr->next_attribute)
1385 			attr->next_attribute->prev_attribute_c = attr->prev_attribute_c;
1386 		else
1387 			node->first_attribute->prev_attribute_c = attr->prev_attribute_c;
1388 
1389 		if (attr->prev_attribute_c->next_attribute)
1390 			attr->prev_attribute_c->next_attribute = attr->next_attribute;
1391 		else
1392 			node->first_attribute = attr->next_attribute;
1393 
1394 		attr->prev_attribute_c = 0;
1395 		attr->next_attribute = 0;
1396 	}
1397 
append_new_node(xml_node_struct * node,xml_allocator & alloc,xml_node_type type=node_element)1398 	PUGI__FN_NO_INLINE xml_node_struct* append_new_node(xml_node_struct* node, xml_allocator& alloc, xml_node_type type = node_element)
1399 	{
1400 		if (!alloc.reserve()) return 0;
1401 
1402 		xml_node_struct* child = allocate_node(alloc, type);
1403 		if (!child) return 0;
1404 
1405 		append_node(child, node);
1406 
1407 		return child;
1408 	}
1409 
append_new_attribute(xml_node_struct * node,xml_allocator & alloc)1410 	PUGI__FN_NO_INLINE xml_attribute_struct* append_new_attribute(xml_node_struct* node, xml_allocator& alloc)
1411 	{
1412 		if (!alloc.reserve()) return 0;
1413 
1414 		xml_attribute_struct* attr = allocate_attribute(alloc);
1415 		if (!attr) return 0;
1416 
1417 		append_attribute(attr, node);
1418 
1419 		return attr;
1420 	}
1421 PUGI__NS_END
1422 
1423 // Helper classes for code generation
1424 PUGI__NS_BEGIN
1425 	struct opt_false
1426 	{
1427 		enum { value = 0 };
1428 	};
1429 
1430 	struct opt_true
1431 	{
1432 		enum { value = 1 };
1433 	};
1434 PUGI__NS_END
1435 
1436 // Unicode utilities
1437 PUGI__NS_BEGIN
endian_swap(uint16_t value)1438 	inline uint16_t endian_swap(uint16_t value)
1439 	{
1440 		return static_cast<uint16_t>(((value & 0xff) << 8) | (value >> 8));
1441 	}
1442 
endian_swap(uint32_t value)1443 	inline uint32_t endian_swap(uint32_t value)
1444 	{
1445 		return ((value & 0xff) << 24) | ((value & 0xff00) << 8) | ((value & 0xff0000) >> 8) | (value >> 24);
1446 	}
1447 
1448 	struct utf8_counter
1449 	{
1450 		typedef size_t value_type;
1451 
lowutf8_counter1452 		static value_type low(value_type result, uint32_t ch)
1453 		{
1454 			// U+0000..U+007F
1455 			if (ch < 0x80) return result + 1;
1456 			// U+0080..U+07FF
1457 			else if (ch < 0x800) return result + 2;
1458 			// U+0800..U+FFFF
1459 			else return result + 3;
1460 		}
1461 
highutf8_counter1462 		static value_type high(value_type result, uint32_t)
1463 		{
1464 			// U+10000..U+10FFFF
1465 			return result + 4;
1466 		}
1467 	};
1468 
1469 	struct utf8_writer
1470 	{
1471 		typedef uint8_t* value_type;
1472 
lowutf8_writer1473 		static value_type low(value_type result, uint32_t ch)
1474 		{
1475 			// U+0000..U+007F
1476 			if (ch < 0x80)
1477 			{
1478 				*result = static_cast<uint8_t>(ch);
1479 				return result + 1;
1480 			}
1481 			// U+0080..U+07FF
1482 			else if (ch < 0x800)
1483 			{
1484 				result[0] = static_cast<uint8_t>(0xC0 | (ch >> 6));
1485 				result[1] = static_cast<uint8_t>(0x80 | (ch & 0x3F));
1486 				return result + 2;
1487 			}
1488 			// U+0800..U+FFFF
1489 			else
1490 			{
1491 				result[0] = static_cast<uint8_t>(0xE0 | (ch >> 12));
1492 				result[1] = static_cast<uint8_t>(0x80 | ((ch >> 6) & 0x3F));
1493 				result[2] = static_cast<uint8_t>(0x80 | (ch & 0x3F));
1494 				return result + 3;
1495 			}
1496 		}
1497 
highutf8_writer1498 		static value_type high(value_type result, uint32_t ch)
1499 		{
1500 			// U+10000..U+10FFFF
1501 			result[0] = static_cast<uint8_t>(0xF0 | (ch >> 18));
1502 			result[1] = static_cast<uint8_t>(0x80 | ((ch >> 12) & 0x3F));
1503 			result[2] = static_cast<uint8_t>(0x80 | ((ch >> 6) & 0x3F));
1504 			result[3] = static_cast<uint8_t>(0x80 | (ch & 0x3F));
1505 			return result + 4;
1506 		}
1507 
anyutf8_writer1508 		static value_type any(value_type result, uint32_t ch)
1509 		{
1510 			return (ch < 0x10000) ? low(result, ch) : high(result, ch);
1511 		}
1512 	};
1513 
1514 	struct utf16_counter
1515 	{
1516 		typedef size_t value_type;
1517 
lowutf16_counter1518 		static value_type low(value_type result, uint32_t)
1519 		{
1520 			return result + 1;
1521 		}
1522 
highutf16_counter1523 		static value_type high(value_type result, uint32_t)
1524 		{
1525 			return result + 2;
1526 		}
1527 	};
1528 
1529 	struct utf16_writer
1530 	{
1531 		typedef uint16_t* value_type;
1532 
lowutf16_writer1533 		static value_type low(value_type result, uint32_t ch)
1534 		{
1535 			*result = static_cast<uint16_t>(ch);
1536 
1537 			return result + 1;
1538 		}
1539 
highutf16_writer1540 		static value_type high(value_type result, uint32_t ch)
1541 		{
1542 			uint32_t msh = static_cast<uint32_t>(ch - 0x10000) >> 10;
1543 			uint32_t lsh = static_cast<uint32_t>(ch - 0x10000) & 0x3ff;
1544 
1545 			result[0] = static_cast<uint16_t>(0xD800 + msh);
1546 			result[1] = static_cast<uint16_t>(0xDC00 + lsh);
1547 
1548 			return result + 2;
1549 		}
1550 
anyutf16_writer1551 		static value_type any(value_type result, uint32_t ch)
1552 		{
1553 			return (ch < 0x10000) ? low(result, ch) : high(result, ch);
1554 		}
1555 	};
1556 
1557 	struct utf32_counter
1558 	{
1559 		typedef size_t value_type;
1560 
lowutf32_counter1561 		static value_type low(value_type result, uint32_t)
1562 		{
1563 			return result + 1;
1564 		}
1565 
highutf32_counter1566 		static value_type high(value_type result, uint32_t)
1567 		{
1568 			return result + 1;
1569 		}
1570 	};
1571 
1572 	struct utf32_writer
1573 	{
1574 		typedef uint32_t* value_type;
1575 
lowutf32_writer1576 		static value_type low(value_type result, uint32_t ch)
1577 		{
1578 			*result = ch;
1579 
1580 			return result + 1;
1581 		}
1582 
highutf32_writer1583 		static value_type high(value_type result, uint32_t ch)
1584 		{
1585 			*result = ch;
1586 
1587 			return result + 1;
1588 		}
1589 
anyutf32_writer1590 		static value_type any(value_type result, uint32_t ch)
1591 		{
1592 			*result = ch;
1593 
1594 			return result + 1;
1595 		}
1596 	};
1597 
1598 	struct latin1_writer
1599 	{
1600 		typedef uint8_t* value_type;
1601 
lowlatin1_writer1602 		static value_type low(value_type result, uint32_t ch)
1603 		{
1604 			*result = static_cast<uint8_t>(ch > 255 ? '?' : ch);
1605 
1606 			return result + 1;
1607 		}
1608 
highlatin1_writer1609 		static value_type high(value_type result, uint32_t ch)
1610 		{
1611 			(void)ch;
1612 
1613 			*result = '?';
1614 
1615 			return result + 1;
1616 		}
1617 	};
1618 
1619 	struct utf8_decoder
1620 	{
1621 		typedef uint8_t type;
1622 
processutf8_decoder1623 		template <typename Traits> static inline typename Traits::value_type process(const uint8_t* data, size_t size, typename Traits::value_type result, Traits)
1624 		{
1625 			const uint8_t utf8_byte_mask = 0x3f;
1626 
1627 			while (size)
1628 			{
1629 				uint8_t lead = *data;
1630 
1631 				// 0xxxxxxx -> U+0000..U+007F
1632 				if (lead < 0x80)
1633 				{
1634 					result = Traits::low(result, lead);
1635 					data += 1;
1636 					size -= 1;
1637 
1638 					// process aligned single-byte (ascii) blocks
1639 					if ((reinterpret_cast<uintptr_t>(data) & 3) == 0)
1640 					{
1641 						// round-trip through void* to silence 'cast increases required alignment of target type' warnings
1642 						while (size >= 4 && (*static_cast<const uint32_t*>(static_cast<const void*>(data)) & 0x80808080) == 0)
1643 						{
1644 							result = Traits::low(result, data[0]);
1645 							result = Traits::low(result, data[1]);
1646 							result = Traits::low(result, data[2]);
1647 							result = Traits::low(result, data[3]);
1648 							data += 4;
1649 							size -= 4;
1650 						}
1651 					}
1652 				}
1653 				// 110xxxxx -> U+0080..U+07FF
1654 				else if (static_cast<unsigned int>(lead - 0xC0) < 0x20 && size >= 2 && (data[1] & 0xc0) == 0x80)
1655 				{
1656 					result = Traits::low(result, ((lead & ~0xC0) << 6) | (data[1] & utf8_byte_mask));
1657 					data += 2;
1658 					size -= 2;
1659 				}
1660 				// 1110xxxx -> U+0800-U+FFFF
1661 				else if (static_cast<unsigned int>(lead - 0xE0) < 0x10 && size >= 3 && (data[1] & 0xc0) == 0x80 && (data[2] & 0xc0) == 0x80)
1662 				{
1663 					result = Traits::low(result, ((lead & ~0xE0) << 12) | ((data[1] & utf8_byte_mask) << 6) | (data[2] & utf8_byte_mask));
1664 					data += 3;
1665 					size -= 3;
1666 				}
1667 				// 11110xxx -> U+10000..U+10FFFF
1668 				else if (static_cast<unsigned int>(lead - 0xF0) < 0x08 && size >= 4 && (data[1] & 0xc0) == 0x80 && (data[2] & 0xc0) == 0x80 && (data[3] & 0xc0) == 0x80)
1669 				{
1670 					result = Traits::high(result, ((lead & ~0xF0) << 18) | ((data[1] & utf8_byte_mask) << 12) | ((data[2] & utf8_byte_mask) << 6) | (data[3] & utf8_byte_mask));
1671 					data += 4;
1672 					size -= 4;
1673 				}
1674 				// 10xxxxxx or 11111xxx -> invalid
1675 				else
1676 				{
1677 					data += 1;
1678 					size -= 1;
1679 				}
1680 			}
1681 
1682 			return result;
1683 		}
1684 	};
1685 
1686 	template <typename opt_swap> struct utf16_decoder
1687 	{
1688 		typedef uint16_t type;
1689 
processutf16_decoder1690 		template <typename Traits> static inline typename Traits::value_type process(const uint16_t* data, size_t size, typename Traits::value_type result, Traits)
1691 		{
1692 			while (size)
1693 			{
1694 				uint16_t lead = opt_swap::value ? endian_swap(*data) : *data;
1695 
1696 				// U+0000..U+D7FF
1697 				if (lead < 0xD800)
1698 				{
1699 					result = Traits::low(result, lead);
1700 					data += 1;
1701 					size -= 1;
1702 				}
1703 				// U+E000..U+FFFF
1704 				else if (static_cast<unsigned int>(lead - 0xE000) < 0x2000)
1705 				{
1706 					result = Traits::low(result, lead);
1707 					data += 1;
1708 					size -= 1;
1709 				}
1710 				// surrogate pair lead
1711 				else if (static_cast<unsigned int>(lead - 0xD800) < 0x400 && size >= 2)
1712 				{
1713 					uint16_t next = opt_swap::value ? endian_swap(data[1]) : data[1];
1714 
1715 					if (static_cast<unsigned int>(next - 0xDC00) < 0x400)
1716 					{
1717 						result = Traits::high(result, 0x10000 + ((lead & 0x3ff) << 10) + (next & 0x3ff));
1718 						data += 2;
1719 						size -= 2;
1720 					}
1721 					else
1722 					{
1723 						data += 1;
1724 						size -= 1;
1725 					}
1726 				}
1727 				else
1728 				{
1729 					data += 1;
1730 					size -= 1;
1731 				}
1732 			}
1733 
1734 			return result;
1735 		}
1736 	};
1737 
1738 	template <typename opt_swap> struct utf32_decoder
1739 	{
1740 		typedef uint32_t type;
1741 
processutf32_decoder1742 		template <typename Traits> static inline typename Traits::value_type process(const uint32_t* data, size_t size, typename Traits::value_type result, Traits)
1743 		{
1744 			while (size)
1745 			{
1746 				uint32_t lead = opt_swap::value ? endian_swap(*data) : *data;
1747 
1748 				// U+0000..U+FFFF
1749 				if (lead < 0x10000)
1750 				{
1751 					result = Traits::low(result, lead);
1752 					data += 1;
1753 					size -= 1;
1754 				}
1755 				// U+10000..U+10FFFF
1756 				else
1757 				{
1758 					result = Traits::high(result, lead);
1759 					data += 1;
1760 					size -= 1;
1761 				}
1762 			}
1763 
1764 			return result;
1765 		}
1766 	};
1767 
1768 	struct latin1_decoder
1769 	{
1770 		typedef uint8_t type;
1771 
processlatin1_decoder1772 		template <typename Traits> static inline typename Traits::value_type process(const uint8_t* data, size_t size, typename Traits::value_type result, Traits)
1773 		{
1774 			while (size)
1775 			{
1776 				result = Traits::low(result, *data);
1777 				data += 1;
1778 				size -= 1;
1779 			}
1780 
1781 			return result;
1782 		}
1783 	};
1784 
1785 	template <size_t size> struct wchar_selector;
1786 
1787 	template <> struct wchar_selector<2>
1788 	{
1789 		typedef uint16_t type;
1790 		typedef utf16_counter counter;
1791 		typedef utf16_writer writer;
1792 		typedef utf16_decoder<opt_false> decoder;
1793 	};
1794 
1795 	template <> struct wchar_selector<4>
1796 	{
1797 		typedef uint32_t type;
1798 		typedef utf32_counter counter;
1799 		typedef utf32_writer writer;
1800 		typedef utf32_decoder<opt_false> decoder;
1801 	};
1802 
1803 	typedef wchar_selector<sizeof(wchar_t)>::counter wchar_counter;
1804 	typedef wchar_selector<sizeof(wchar_t)>::writer wchar_writer;
1805 
1806 	struct wchar_decoder
1807 	{
1808 		typedef wchar_t type;
1809 
processwchar_decoder1810 		template <typename Traits> static inline typename Traits::value_type process(const wchar_t* data, size_t size, typename Traits::value_type result, Traits traits)
1811 		{
1812 			typedef wchar_selector<sizeof(wchar_t)>::decoder decoder;
1813 
1814 			return decoder::process(reinterpret_cast<const typename decoder::type*>(data), size, result, traits);
1815 		}
1816 	};
1817 
1818 #ifdef PUGIXML_WCHAR_MODE
convert_wchar_endian_swap(wchar_t * result,const wchar_t * data,size_t length)1819 	PUGI__FN void convert_wchar_endian_swap(wchar_t* result, const wchar_t* data, size_t length)
1820 	{
1821 		for (size_t i = 0; i < length; ++i)
1822 			result[i] = static_cast<wchar_t>(endian_swap(static_cast<wchar_selector<sizeof(wchar_t)>::type>(data[i])));
1823 	}
1824 #endif
1825 PUGI__NS_END
1826 
1827 PUGI__NS_BEGIN
1828 	enum chartype_t
1829 	{
1830 		ct_parse_pcdata = 1,	// \0, &, \r, <
1831 		ct_parse_attr = 2,		// \0, &, \r, ', "
1832 		ct_parse_attr_ws = 4,	// \0, &, \r, ', ", \n, tab
1833 		ct_space = 8,			// \r, \n, space, tab
1834 		ct_parse_cdata = 16,	// \0, ], >, \r
1835 		ct_parse_comment = 32,	// \0, -, >, \r
1836 		ct_symbol = 64,			// Any symbol > 127, a-z, A-Z, 0-9, _, :, -, .
1837 		ct_start_symbol = 128	// Any symbol > 127, a-z, A-Z, _, :
1838 	};
1839 
1840 	static const unsigned char chartype_table[256] =
1841 	{
1842 		55,  0,   0,   0,   0,   0,   0,   0,      0,   12,  12,  0,   0,   63,  0,   0,   // 0-15
1843 		0,   0,   0,   0,   0,   0,   0,   0,      0,   0,   0,   0,   0,   0,   0,   0,   // 16-31
1844 		8,   0,   6,   0,   0,   0,   7,   6,      0,   0,   0,   0,   0,   96,  64,  0,   // 32-47
1845 		64,  64,  64,  64,  64,  64,  64,  64,     64,  64,  192, 0,   1,   0,   48,  0,   // 48-63
1846 		0,   192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 192, 192, 192, 192, 192, // 64-79
1847 		192, 192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 0,   0,   16,  0,   192, // 80-95
1848 		0,   192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 192, 192, 192, 192, 192, // 96-111
1849 		192, 192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 0, 0, 0, 0, 0,           // 112-127
1850 
1851 		192, 192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 192, 192, 192, 192, 192, // 128+
1852 		192, 192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 192, 192, 192, 192, 192,
1853 		192, 192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 192, 192, 192, 192, 192,
1854 		192, 192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 192, 192, 192, 192, 192,
1855 		192, 192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 192, 192, 192, 192, 192,
1856 		192, 192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 192, 192, 192, 192, 192,
1857 		192, 192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 192, 192, 192, 192, 192,
1858 		192, 192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 192, 192, 192, 192, 192
1859 	};
1860 
1861 	enum chartypex_t
1862 	{
1863 		ctx_special_pcdata = 1,   // Any symbol >= 0 and < 32 (except \t, \r, \n), &, <, >
1864 		ctx_special_attr = 2,     // Any symbol >= 0 and < 32 (except \t), &, <, >, "
1865 		ctx_start_symbol = 4,	  // Any symbol > 127, a-z, A-Z, _
1866 		ctx_digit = 8,			  // 0-9
1867 		ctx_symbol = 16			  // Any symbol > 127, a-z, A-Z, 0-9, _, -, .
1868 	};
1869 
1870 	static const unsigned char chartypex_table[256] =
1871 	{
1872 		3,  3,  3,  3,  3,  3,  3,  3,     3,  0,  2,  3,  3,  2,  3,  3,     // 0-15
1873 		3,  3,  3,  3,  3,  3,  3,  3,     3,  3,  3,  3,  3,  3,  3,  3,     // 16-31
1874 		0,  0,  2,  0,  0,  0,  3,  0,     0,  0,  0,  0,  0, 16, 16,  0,     // 32-47
1875 		24, 24, 24, 24, 24, 24, 24, 24,    24, 24, 0,  0,  3,  0,  3,  0,     // 48-63
1876 
1877 		0,  20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 20, 20, 20, 20, 20,    // 64-79
1878 		20, 20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 0,  0,  0,  0,  20,    // 80-95
1879 		0,  20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 20, 20, 20, 20, 20,    // 96-111
1880 		20, 20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 0,  0,  0,  0,  0,     // 112-127
1881 
1882 		20, 20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 20, 20, 20, 20, 20,    // 128+
1883 		20, 20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 20, 20, 20, 20, 20,
1884 		20, 20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 20, 20, 20, 20, 20,
1885 		20, 20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 20, 20, 20, 20, 20,
1886 		20, 20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 20, 20, 20, 20, 20,
1887 		20, 20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 20, 20, 20, 20, 20,
1888 		20, 20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 20, 20, 20, 20, 20,
1889 		20, 20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 20, 20, 20, 20, 20
1890 	};
1891 
1892 #ifdef PUGIXML_WCHAR_MODE
1893 	#define PUGI__IS_CHARTYPE_IMPL(c, ct, table) ((static_cast<unsigned int>(c) < 128 ? table[static_cast<unsigned int>(c)] : table[128]) & (ct))
1894 #else
1895 	#define PUGI__IS_CHARTYPE_IMPL(c, ct, table) (table[static_cast<unsigned char>(c)] & (ct))
1896 #endif
1897 
1898 	#define PUGI__IS_CHARTYPE(c, ct) PUGI__IS_CHARTYPE_IMPL(c, ct, chartype_table)
1899 	#define PUGI__IS_CHARTYPEX(c, ct) PUGI__IS_CHARTYPE_IMPL(c, ct, chartypex_table)
1900 
is_little_endian()1901 	PUGI__FN bool is_little_endian()
1902 	{
1903 		unsigned int ui = 1;
1904 
1905 		return *reinterpret_cast<unsigned char*>(&ui) == 1;
1906 	}
1907 
get_wchar_encoding()1908 	PUGI__FN xml_encoding get_wchar_encoding()
1909 	{
1910 		PUGI__STATIC_ASSERT(sizeof(wchar_t) == 2 || sizeof(wchar_t) == 4);
1911 
1912 		if (sizeof(wchar_t) == 2)
1913 			return is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
1914 		else
1915 			return is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
1916 	}
1917 
parse_declaration_encoding(const uint8_t * data,size_t size,const uint8_t * & out_encoding,size_t & out_length)1918 	PUGI__FN bool parse_declaration_encoding(const uint8_t* data, size_t size, const uint8_t*& out_encoding, size_t& out_length)
1919 	{
1920 	#define PUGI__SCANCHAR(ch) { if (offset >= size || data[offset] != ch) return false; offset++; }
1921 	#define PUGI__SCANCHARTYPE(ct) { while (offset < size && PUGI__IS_CHARTYPE(data[offset], ct)) offset++; }
1922 
1923 		// check if we have a non-empty XML declaration
1924 		if (size < 6 || !((data[0] == '<') & (data[1] == '?') & (data[2] == 'x') & (data[3] == 'm') & (data[4] == 'l') && PUGI__IS_CHARTYPE(data[5], ct_space)))
1925 			return false;
1926 
1927 		// scan XML declaration until the encoding field
1928 		for (size_t i = 6; i + 1 < size; ++i)
1929 		{
1930 			// declaration can not contain ? in quoted values
1931 			if (data[i] == '?')
1932 				return false;
1933 
1934 			if (data[i] == 'e' && data[i + 1] == 'n')
1935 			{
1936 				size_t offset = i;
1937 
1938 				// encoding follows the version field which can't contain 'en' so this has to be the encoding if XML is well formed
1939 				PUGI__SCANCHAR('e'); PUGI__SCANCHAR('n'); PUGI__SCANCHAR('c'); PUGI__SCANCHAR('o');
1940 				PUGI__SCANCHAR('d'); PUGI__SCANCHAR('i'); PUGI__SCANCHAR('n'); PUGI__SCANCHAR('g');
1941 
1942 				// S? = S?
1943 				PUGI__SCANCHARTYPE(ct_space);
1944 				PUGI__SCANCHAR('=');
1945 				PUGI__SCANCHARTYPE(ct_space);
1946 
1947 				// the only two valid delimiters are ' and "
1948 				uint8_t delimiter = (offset < size && data[offset] == '"') ? '"' : '\'';
1949 
1950 				PUGI__SCANCHAR(delimiter);
1951 
1952 				size_t start = offset;
1953 
1954 				out_encoding = data + offset;
1955 
1956 				PUGI__SCANCHARTYPE(ct_symbol);
1957 
1958 				out_length = offset - start;
1959 
1960 				PUGI__SCANCHAR(delimiter);
1961 
1962 				return true;
1963 			}
1964 		}
1965 
1966 		return false;
1967 
1968 	#undef PUGI__SCANCHAR
1969 	#undef PUGI__SCANCHARTYPE
1970 	}
1971 
guess_buffer_encoding(const uint8_t * data,size_t size)1972 	PUGI__FN xml_encoding guess_buffer_encoding(const uint8_t* data, size_t size)
1973 	{
1974 		// skip encoding autodetection if input buffer is too small
1975 		if (size < 4) return encoding_utf8;
1976 
1977 		uint8_t d0 = data[0], d1 = data[1], d2 = data[2], d3 = data[3];
1978 
1979 		// look for BOM in first few bytes
1980 		if (d0 == 0 && d1 == 0 && d2 == 0xfe && d3 == 0xff) return encoding_utf32_be;
1981 		if (d0 == 0xff && d1 == 0xfe && d2 == 0 && d3 == 0) return encoding_utf32_le;
1982 		if (d0 == 0xfe && d1 == 0xff) return encoding_utf16_be;
1983 		if (d0 == 0xff && d1 == 0xfe) return encoding_utf16_le;
1984 		if (d0 == 0xef && d1 == 0xbb && d2 == 0xbf) return encoding_utf8;
1985 
1986 		// look for <, <? or <?xm in various encodings
1987 		if (d0 == 0 && d1 == 0 && d2 == 0 && d3 == 0x3c) return encoding_utf32_be;
1988 		if (d0 == 0x3c && d1 == 0 && d2 == 0 && d3 == 0) return encoding_utf32_le;
1989 		if (d0 == 0 && d1 == 0x3c && d2 == 0 && d3 == 0x3f) return encoding_utf16_be;
1990 		if (d0 == 0x3c && d1 == 0 && d2 == 0x3f && d3 == 0) return encoding_utf16_le;
1991 
1992 		// look for utf16 < followed by node name (this may fail, but is better than utf8 since it's zero terminated so early)
1993 		if (d0 == 0 && d1 == 0x3c) return encoding_utf16_be;
1994 		if (d0 == 0x3c && d1 == 0) return encoding_utf16_le;
1995 
1996 		// no known BOM detected; parse declaration
1997 		const uint8_t* enc = 0;
1998 		size_t enc_length = 0;
1999 
2000 		if (d0 == 0x3c && d1 == 0x3f && d2 == 0x78 && d3 == 0x6d && parse_declaration_encoding(data, size, enc, enc_length))
2001 		{
2002 			// iso-8859-1 (case-insensitive)
2003 			if (enc_length == 10
2004 				&& (enc[0] | ' ') == 'i' && (enc[1] | ' ') == 's' && (enc[2] | ' ') == 'o'
2005 				&& enc[3] == '-' && enc[4] == '8' && enc[5] == '8' && enc[6] == '5' && enc[7] == '9'
2006 				&& enc[8] == '-' && enc[9] == '1')
2007 				return encoding_latin1;
2008 
2009 			// latin1 (case-insensitive)
2010 			if (enc_length == 6
2011 				&& (enc[0] | ' ') == 'l' && (enc[1] | ' ') == 'a' && (enc[2] | ' ') == 't'
2012 				&& (enc[3] | ' ') == 'i' && (enc[4] | ' ') == 'n'
2013 				&& enc[5] == '1')
2014 				return encoding_latin1;
2015 		}
2016 
2017 		return encoding_utf8;
2018 	}
2019 
get_buffer_encoding(xml_encoding encoding,const void * contents,size_t size)2020 	PUGI__FN xml_encoding get_buffer_encoding(xml_encoding encoding, const void* contents, size_t size)
2021 	{
2022 		// replace wchar encoding with utf implementation
2023 		if (encoding == encoding_wchar) return get_wchar_encoding();
2024 
2025 		// replace utf16 encoding with utf16 with specific endianness
2026 		if (encoding == encoding_utf16) return is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
2027 
2028 		// replace utf32 encoding with utf32 with specific endianness
2029 		if (encoding == encoding_utf32) return is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
2030 
2031 		// only do autodetection if no explicit encoding is requested
2032 		if (encoding != encoding_auto) return encoding;
2033 
2034 		// try to guess encoding (based on XML specification, Appendix F.1)
2035 		const uint8_t* data = static_cast<const uint8_t*>(contents);
2036 
2037 		return guess_buffer_encoding(data, size);
2038 	}
2039 
get_mutable_buffer(char_t * & out_buffer,size_t & out_length,const void * contents,size_t size,bool is_mutable)2040 	PUGI__FN bool get_mutable_buffer(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable)
2041 	{
2042 		size_t length = size / sizeof(char_t);
2043 
2044 		if (is_mutable)
2045 		{
2046 			out_buffer = static_cast<char_t*>(const_cast<void*>(contents));
2047 			out_length = length;
2048 		}
2049 		else
2050 		{
2051 			char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
2052 			if (!buffer) return false;
2053 
2054 			if (contents)
2055 				memcpy(buffer, contents, length * sizeof(char_t));
2056 			else
2057 				assert(length == 0);
2058 
2059 			buffer[length] = 0;
2060 
2061 			out_buffer = buffer;
2062 			out_length = length + 1;
2063 		}
2064 
2065 		return true;
2066 	}
2067 
2068 #ifdef PUGIXML_WCHAR_MODE
need_endian_swap_utf(xml_encoding le,xml_encoding re)2069 	PUGI__FN bool need_endian_swap_utf(xml_encoding le, xml_encoding re)
2070 	{
2071 		return (le == encoding_utf16_be && re == encoding_utf16_le) || (le == encoding_utf16_le && re == encoding_utf16_be) ||
2072 			   (le == encoding_utf32_be && re == encoding_utf32_le) || (le == encoding_utf32_le && re == encoding_utf32_be);
2073 	}
2074 
convert_buffer_endian_swap(char_t * & out_buffer,size_t & out_length,const void * contents,size_t size,bool is_mutable)2075 	PUGI__FN bool convert_buffer_endian_swap(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable)
2076 	{
2077 		const char_t* data = static_cast<const char_t*>(contents);
2078 		size_t length = size / sizeof(char_t);
2079 
2080 		if (is_mutable)
2081 		{
2082 			char_t* buffer = const_cast<char_t*>(data);
2083 
2084 			convert_wchar_endian_swap(buffer, data, length);
2085 
2086 			out_buffer = buffer;
2087 			out_length = length;
2088 		}
2089 		else
2090 		{
2091 			char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
2092 			if (!buffer) return false;
2093 
2094 			convert_wchar_endian_swap(buffer, data, length);
2095 			buffer[length] = 0;
2096 
2097 			out_buffer = buffer;
2098 			out_length = length + 1;
2099 		}
2100 
2101 		return true;
2102 	}
2103 
convert_buffer_generic(char_t * & out_buffer,size_t & out_length,const void * contents,size_t size,D)2104 	template <typename D> PUGI__FN bool convert_buffer_generic(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, D)
2105 	{
2106 		const typename D::type* data = static_cast<const typename D::type*>(contents);
2107 		size_t data_length = size / sizeof(typename D::type);
2108 
2109 		// first pass: get length in wchar_t units
2110 		size_t length = D::process(data, data_length, 0, wchar_counter());
2111 
2112 		// allocate buffer of suitable length
2113 		char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
2114 		if (!buffer) return false;
2115 
2116 		// second pass: convert utf16 input to wchar_t
2117 		wchar_writer::value_type obegin = reinterpret_cast<wchar_writer::value_type>(buffer);
2118 		wchar_writer::value_type oend = D::process(data, data_length, obegin, wchar_writer());
2119 
2120 		assert(oend == obegin + length);
2121 		*oend = 0;
2122 
2123 		out_buffer = buffer;
2124 		out_length = length + 1;
2125 
2126 		return true;
2127 	}
2128 
convert_buffer(char_t * & out_buffer,size_t & out_length,xml_encoding encoding,const void * contents,size_t size,bool is_mutable)2129 	PUGI__FN bool convert_buffer(char_t*& out_buffer, size_t& out_length, xml_encoding encoding, const void* contents, size_t size, bool is_mutable)
2130 	{
2131 		// get native encoding
2132 		xml_encoding wchar_encoding = get_wchar_encoding();
2133 
2134 		// fast path: no conversion required
2135 		if (encoding == wchar_encoding)
2136 			return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable);
2137 
2138 		// only endian-swapping is required
2139 		if (need_endian_swap_utf(encoding, wchar_encoding))
2140 			return convert_buffer_endian_swap(out_buffer, out_length, contents, size, is_mutable);
2141 
2142 		// source encoding is utf8
2143 		if (encoding == encoding_utf8)
2144 			return convert_buffer_generic(out_buffer, out_length, contents, size, utf8_decoder());
2145 
2146 		// source encoding is utf16
2147 		if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
2148 		{
2149 			xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
2150 
2151 			return (native_encoding == encoding) ?
2152 				convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder<opt_false>()) :
2153 				convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder<opt_true>());
2154 		}
2155 
2156 		// source encoding is utf32
2157 		if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)
2158 		{
2159 			xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
2160 
2161 			return (native_encoding == encoding) ?
2162 				convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder<opt_false>()) :
2163 				convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder<opt_true>());
2164 		}
2165 
2166 		// source encoding is latin1
2167 		if (encoding == encoding_latin1)
2168 			return convert_buffer_generic(out_buffer, out_length, contents, size, latin1_decoder());
2169 
2170 		assert(false && "Invalid encoding"); // unreachable
2171 		return false;
2172 	}
2173 #else
convert_buffer_generic(char_t * & out_buffer,size_t & out_length,const void * contents,size_t size,D)2174 	template <typename D> PUGI__FN bool convert_buffer_generic(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, D)
2175 	{
2176 		const typename D::type* data = static_cast<const typename D::type*>(contents);
2177 		size_t data_length = size / sizeof(typename D::type);
2178 
2179 		// first pass: get length in utf8 units
2180 		size_t length = D::process(data, data_length, 0, utf8_counter());
2181 
2182 		// allocate buffer of suitable length
2183 		char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
2184 		if (!buffer) return false;
2185 
2186 		// second pass: convert utf16 input to utf8
2187 		uint8_t* obegin = reinterpret_cast<uint8_t*>(buffer);
2188 		uint8_t* oend = D::process(data, data_length, obegin, utf8_writer());
2189 
2190 		assert(oend == obegin + length);
2191 		*oend = 0;
2192 
2193 		out_buffer = buffer;
2194 		out_length = length + 1;
2195 
2196 		return true;
2197 	}
2198 
get_latin1_7bit_prefix_length(const uint8_t * data,size_t size)2199 	PUGI__FN size_t get_latin1_7bit_prefix_length(const uint8_t* data, size_t size)
2200 	{
2201 		for (size_t i = 0; i < size; ++i)
2202 			if (data[i] > 127)
2203 				return i;
2204 
2205 		return size;
2206 	}
2207 
convert_buffer_latin1(char_t * & out_buffer,size_t & out_length,const void * contents,size_t size,bool is_mutable)2208 	PUGI__FN bool convert_buffer_latin1(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable)
2209 	{
2210 		const uint8_t* data = static_cast<const uint8_t*>(contents);
2211 		size_t data_length = size;
2212 
2213 		// get size of prefix that does not need utf8 conversion
2214 		size_t prefix_length = get_latin1_7bit_prefix_length(data, data_length);
2215 		assert(prefix_length <= data_length);
2216 
2217 		const uint8_t* postfix = data + prefix_length;
2218 		size_t postfix_length = data_length - prefix_length;
2219 
2220 		// if no conversion is needed, just return the original buffer
2221 		if (postfix_length == 0) return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable);
2222 
2223 		// first pass: get length in utf8 units
2224 		size_t length = prefix_length + latin1_decoder::process(postfix, postfix_length, 0, utf8_counter());
2225 
2226 		// allocate buffer of suitable length
2227 		char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
2228 		if (!buffer) return false;
2229 
2230 		// second pass: convert latin1 input to utf8
2231 		memcpy(buffer, data, prefix_length);
2232 
2233 		uint8_t* obegin = reinterpret_cast<uint8_t*>(buffer);
2234 		uint8_t* oend = latin1_decoder::process(postfix, postfix_length, obegin + prefix_length, utf8_writer());
2235 
2236 		assert(oend == obegin + length);
2237 		*oend = 0;
2238 
2239 		out_buffer = buffer;
2240 		out_length = length + 1;
2241 
2242 		return true;
2243 	}
2244 
convert_buffer(char_t * & out_buffer,size_t & out_length,xml_encoding encoding,const void * contents,size_t size,bool is_mutable)2245 	PUGI__FN bool convert_buffer(char_t*& out_buffer, size_t& out_length, xml_encoding encoding, const void* contents, size_t size, bool is_mutable)
2246 	{
2247 		// fast path: no conversion required
2248 		if (encoding == encoding_utf8)
2249 			return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable);
2250 
2251 		// source encoding is utf16
2252 		if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
2253 		{
2254 			xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
2255 
2256 			return (native_encoding == encoding) ?
2257 				convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder<opt_false>()) :
2258 				convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder<opt_true>());
2259 		}
2260 
2261 		// source encoding is utf32
2262 		if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)
2263 		{
2264 			xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
2265 
2266 			return (native_encoding == encoding) ?
2267 				convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder<opt_false>()) :
2268 				convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder<opt_true>());
2269 		}
2270 
2271 		// source encoding is latin1
2272 		if (encoding == encoding_latin1)
2273 			return convert_buffer_latin1(out_buffer, out_length, contents, size, is_mutable);
2274 
2275 		assert(false && "Invalid encoding"); // unreachable
2276 		return false;
2277 	}
2278 #endif
2279 
as_utf8_begin(const wchar_t * str,size_t length)2280 	PUGI__FN size_t as_utf8_begin(const wchar_t* str, size_t length)
2281 	{
2282 		// get length in utf8 characters
2283 		return wchar_decoder::process(str, length, 0, utf8_counter());
2284 	}
2285 
as_utf8_end(char * buffer,size_t size,const wchar_t * str,size_t length)2286 	PUGI__FN void as_utf8_end(char* buffer, size_t size, const wchar_t* str, size_t length)
2287 	{
2288 		// convert to utf8
2289 		uint8_t* begin = reinterpret_cast<uint8_t*>(buffer);
2290 		uint8_t* end = wchar_decoder::process(str, length, begin, utf8_writer());
2291 
2292 		assert(begin + size == end);
2293 		(void)!end;
2294 		(void)!size;
2295 	}
2296 
2297 #ifndef PUGIXML_NO_STL
as_utf8_impl(const wchar_t * str,size_t length)2298 	PUGI__FN std::string as_utf8_impl(const wchar_t* str, size_t length)
2299 	{
2300 		// first pass: get length in utf8 characters
2301 		size_t size = as_utf8_begin(str, length);
2302 
2303 		// allocate resulting string
2304 		std::string result;
2305 		result.resize(size);
2306 
2307 		// second pass: convert to utf8
2308 		if (size > 0) as_utf8_end(&result[0], size, str, length);
2309 
2310 		return result;
2311 	}
2312 
as_wide_impl(const char * str,size_t size)2313 	PUGI__FN std::basic_string<wchar_t> as_wide_impl(const char* str, size_t size)
2314 	{
2315 		const uint8_t* data = reinterpret_cast<const uint8_t*>(str);
2316 
2317 		// first pass: get length in wchar_t units
2318 		size_t length = utf8_decoder::process(data, size, 0, wchar_counter());
2319 
2320 		// allocate resulting string
2321 		std::basic_string<wchar_t> result;
2322 		result.resize(length);
2323 
2324 		// second pass: convert to wchar_t
2325 		if (length > 0)
2326 		{
2327 			wchar_writer::value_type begin = reinterpret_cast<wchar_writer::value_type>(&result[0]);
2328 			wchar_writer::value_type end = utf8_decoder::process(data, size, begin, wchar_writer());
2329 
2330 			assert(begin + length == end);
2331 			(void)!end;
2332 		}
2333 
2334 		return result;
2335 	}
2336 #endif
2337 
2338 	template <typename Header>
strcpy_insitu_allow(size_t length,const Header & header,uintptr_t header_mask,char_t * target)2339 	inline bool strcpy_insitu_allow(size_t length, const Header& header, uintptr_t header_mask, char_t* target)
2340 	{
2341 		// never reuse shared memory
2342 		if (header & xml_memory_page_contents_shared_mask) return false;
2343 
2344 		size_t target_length = strlength(target);
2345 
2346 		// always reuse document buffer memory if possible
2347 		if ((header & header_mask) == 0) return target_length >= length;
2348 
2349 		// reuse heap memory if waste is not too great
2350 		const size_t reuse_threshold = 32;
2351 
2352 		return target_length >= length && (target_length < reuse_threshold || target_length - length < target_length / 2);
2353 	}
2354 
2355 	template <typename String, typename Header>
strcpy_insitu(String & dest,Header & header,uintptr_t header_mask,const char_t * source,size_t source_length)2356 	PUGI__FN bool strcpy_insitu(String& dest, Header& header, uintptr_t header_mask, const char_t* source, size_t source_length)
2357 	{
2358 		if (source_length == 0)
2359 		{
2360 			// empty string and null pointer are equivalent, so just deallocate old memory
2361 			xml_allocator* alloc = PUGI__GETPAGE_IMPL(header)->allocator;
2362 
2363 			if (header & header_mask) alloc->deallocate_string(dest);
2364 
2365 			// mark the string as not allocated
2366 			dest = 0;
2367 			header &= ~header_mask;
2368 
2369 			return true;
2370 		}
2371 		else if (dest && strcpy_insitu_allow(source_length, header, header_mask, dest))
2372 		{
2373 			// we can reuse old buffer, so just copy the new data (including zero terminator)
2374 			memcpy(dest, source, source_length * sizeof(char_t));
2375 			dest[source_length] = 0;
2376 
2377 			return true;
2378 		}
2379 		else
2380 		{
2381 			xml_allocator* alloc = PUGI__GETPAGE_IMPL(header)->allocator;
2382 
2383 			if (!alloc->reserve()) return false;
2384 
2385 			// allocate new buffer
2386 			char_t* buf = alloc->allocate_string(source_length + 1);
2387 			if (!buf) return false;
2388 
2389 			// copy the string (including zero terminator)
2390 			memcpy(buf, source, source_length * sizeof(char_t));
2391 			buf[source_length] = 0;
2392 
2393 			// deallocate old buffer (*after* the above to protect against overlapping memory and/or allocation failures)
2394 			if (header & header_mask) alloc->deallocate_string(dest);
2395 
2396 			// the string is now allocated, so set the flag
2397 			dest = buf;
2398 			header |= header_mask;
2399 
2400 			return true;
2401 		}
2402 	}
2403 
2404 	struct gap
2405 	{
2406 		char_t* end;
2407 		size_t size;
2408 
gapgap2409 		gap(): end(0), size(0)
2410 		{
2411 		}
2412 
2413 		// Push new gap, move s count bytes further (skipping the gap).
2414 		// Collapse previous gap.
pushgap2415 		void push(char_t*& s, size_t count)
2416 		{
2417 			if (end) // there was a gap already; collapse it
2418 			{
2419 				// Move [old_gap_end, new_gap_start) to [old_gap_start, ...)
2420 				assert(s >= end);
2421 				memmove(end - size, end, reinterpret_cast<char*>(s) - reinterpret_cast<char*>(end));
2422 			}
2423 
2424 			s += count; // end of current gap
2425 
2426 			// "merge" two gaps
2427 			end = s;
2428 			size += count;
2429 		}
2430 
2431 		// Collapse all gaps, return past-the-end pointer
flushgap2432 		char_t* flush(char_t* s)
2433 		{
2434 			if (end)
2435 			{
2436 				// Move [old_gap_end, current_pos) to [old_gap_start, ...)
2437 				assert(s >= end);
2438 				memmove(end - size, end, reinterpret_cast<char*>(s) - reinterpret_cast<char*>(end));
2439 
2440 				return s - size;
2441 			}
2442 			else return s;
2443 		}
2444 	};
2445 
strconv_escape(char_t * s,gap & g)2446 	PUGI__FN char_t* strconv_escape(char_t* s, gap& g)
2447 	{
2448 		char_t* stre = s + 1;
2449 
2450 		switch (*stre)
2451 		{
2452 			case '#':	// &#...
2453 			{
2454 				unsigned int ucsc = 0;
2455 
2456 				if (stre[1] == 'x') // &#x... (hex code)
2457 				{
2458 					stre += 2;
2459 
2460 					char_t ch = *stre;
2461 
2462 					if (ch == ';') return stre;
2463 
2464 					for (;;)
2465 					{
2466 						if (static_cast<unsigned int>(ch - '0') <= 9)
2467 							ucsc = 16 * ucsc + (ch - '0');
2468 						else if (static_cast<unsigned int>((ch | ' ') - 'a') <= 5)
2469 							ucsc = 16 * ucsc + ((ch | ' ') - 'a' + 10);
2470 						else if (ch == ';')
2471 							break;
2472 						else // cancel
2473 							return stre;
2474 
2475 						ch = *++stre;
2476 					}
2477 
2478 					++stre;
2479 				}
2480 				else	// &#... (dec code)
2481 				{
2482 					char_t ch = *++stre;
2483 
2484 					if (ch == ';') return stre;
2485 
2486 					for (;;)
2487 					{
2488 						if (static_cast<unsigned int>(ch - '0') <= 9)
2489 							ucsc = 10 * ucsc + (ch - '0');
2490 						else if (ch == ';')
2491 							break;
2492 						else // cancel
2493 							return stre;
2494 
2495 						ch = *++stre;
2496 					}
2497 
2498 					++stre;
2499 				}
2500 
2501 			#ifdef PUGIXML_WCHAR_MODE
2502 				s = reinterpret_cast<char_t*>(wchar_writer::any(reinterpret_cast<wchar_writer::value_type>(s), ucsc));
2503 			#else
2504 				s = reinterpret_cast<char_t*>(utf8_writer::any(reinterpret_cast<uint8_t*>(s), ucsc));
2505 			#endif
2506 
2507 				g.push(s, stre - s);
2508 				return stre;
2509 			}
2510 
2511 			case 'a':	// &a
2512 			{
2513 				++stre;
2514 
2515 				if (*stre == 'm') // &am
2516 				{
2517 					if (*++stre == 'p' && *++stre == ';') // &amp;
2518 					{
2519 						*s++ = '&';
2520 						++stre;
2521 
2522 						g.push(s, stre - s);
2523 						return stre;
2524 					}
2525 				}
2526 				else if (*stre == 'p') // &ap
2527 				{
2528 					if (*++stre == 'o' && *++stre == 's' && *++stre == ';') // &apos;
2529 					{
2530 						*s++ = '\'';
2531 						++stre;
2532 
2533 						g.push(s, stre - s);
2534 						return stre;
2535 					}
2536 				}
2537 				break;
2538 			}
2539 
2540 			case 'g': // &g
2541 			{
2542 				if (*++stre == 't' && *++stre == ';') // &gt;
2543 				{
2544 					*s++ = '>';
2545 					++stre;
2546 
2547 					g.push(s, stre - s);
2548 					return stre;
2549 				}
2550 				break;
2551 			}
2552 
2553 			case 'l': // &l
2554 			{
2555 				if (*++stre == 't' && *++stre == ';') // &lt;
2556 				{
2557 					*s++ = '<';
2558 					++stre;
2559 
2560 					g.push(s, stre - s);
2561 					return stre;
2562 				}
2563 				break;
2564 			}
2565 
2566 			case 'q': // &q
2567 			{
2568 				if (*++stre == 'u' && *++stre == 'o' && *++stre == 't' && *++stre == ';') // &quot;
2569 				{
2570 					*s++ = '"';
2571 					++stre;
2572 
2573 					g.push(s, stre - s);
2574 					return stre;
2575 				}
2576 				break;
2577 			}
2578 
2579 			default:
2580 				break;
2581 		}
2582 
2583 		return stre;
2584 	}
2585 
2586 	// Parser utilities
2587 	#define PUGI__ENDSWITH(c, e)        ((c) == (e) || ((c) == 0 && endch == (e)))
2588 	#define PUGI__SKIPWS()              { while (PUGI__IS_CHARTYPE(*s, ct_space)) ++s; }
2589 	#define PUGI__OPTSET(OPT)           ( optmsk & (OPT) )
2590 	#define PUGI__PUSHNODE(TYPE)        { cursor = append_new_node(cursor, *alloc, TYPE); if (!cursor) PUGI__THROW_ERROR(status_out_of_memory, s); }
2591 	#define PUGI__POPNODE()             { cursor = cursor->parent; }
2592 	#define PUGI__SCANFOR(X)            { while (*s != 0 && !(X)) ++s; }
2593 	#define PUGI__SCANWHILE(X)          { while (X) ++s; }
2594 	#define PUGI__SCANWHILE_UNROLL(X)   { for (;;) { char_t ss = s[0]; if (PUGI__UNLIKELY(!(X))) { break; } ss = s[1]; if (PUGI__UNLIKELY(!(X))) { s += 1; break; } ss = s[2]; if (PUGI__UNLIKELY(!(X))) { s += 2; break; } ss = s[3]; if (PUGI__UNLIKELY(!(X))) { s += 3; break; } s += 4; } }
2595 	#define PUGI__ENDSEG()              { ch = *s; *s = 0; ++s; }
2596 	#define PUGI__THROW_ERROR(err, m)   return error_offset = m, error_status = err, static_cast<char_t*>(0)
2597 	#define PUGI__CHECK_ERROR(err, m)   { if (*s == 0) PUGI__THROW_ERROR(err, m); }
2598 
strconv_comment(char_t * s,char_t endch)2599 	PUGI__FN char_t* strconv_comment(char_t* s, char_t endch)
2600 	{
2601 		gap g;
2602 
2603 		while (true)
2604 		{
2605 			PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_comment));
2606 
2607 			if (*s == '\r') // Either a single 0x0d or 0x0d 0x0a pair
2608 			{
2609 				*s++ = '\n'; // replace first one with 0x0a
2610 
2611 				if (*s == '\n') g.push(s, 1);
2612 			}
2613 			else if (s[0] == '-' && s[1] == '-' && PUGI__ENDSWITH(s[2], '>')) // comment ends here
2614 			{
2615 				*g.flush(s) = 0;
2616 
2617 				return s + (s[2] == '>' ? 3 : 2);
2618 			}
2619 			else if (*s == 0)
2620 			{
2621 				return 0;
2622 			}
2623 			else ++s;
2624 		}
2625 	}
2626 
strconv_cdata(char_t * s,char_t endch)2627 	PUGI__FN char_t* strconv_cdata(char_t* s, char_t endch)
2628 	{
2629 		gap g;
2630 
2631 		while (true)
2632 		{
2633 			PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_cdata));
2634 
2635 			if (*s == '\r') // Either a single 0x0d or 0x0d 0x0a pair
2636 			{
2637 				*s++ = '\n'; // replace first one with 0x0a
2638 
2639 				if (*s == '\n') g.push(s, 1);
2640 			}
2641 			else if (s[0] == ']' && s[1] == ']' && PUGI__ENDSWITH(s[2], '>')) // CDATA ends here
2642 			{
2643 				*g.flush(s) = 0;
2644 
2645 				return s + 1;
2646 			}
2647 			else if (*s == 0)
2648 			{
2649 				return 0;
2650 			}
2651 			else ++s;
2652 		}
2653 	}
2654 
2655 	typedef char_t* (*strconv_pcdata_t)(char_t*);
2656 
2657 	template <typename opt_trim, typename opt_eol, typename opt_escape> struct strconv_pcdata_impl
2658 	{
parsestrconv_pcdata_impl2659 		static char_t* parse(char_t* s)
2660 		{
2661 			gap g;
2662 
2663 			char_t* begin = s;
2664 
2665 			while (true)
2666 			{
2667 				PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_pcdata));
2668 
2669 				if (*s == '<') // PCDATA ends here
2670 				{
2671 					char_t* end = g.flush(s);
2672 
2673 					if (opt_trim::value)
2674 						while (end > begin && PUGI__IS_CHARTYPE(end[-1], ct_space))
2675 							--end;
2676 
2677 					*end = 0;
2678 
2679 					return s + 1;
2680 				}
2681 				else if (opt_eol::value && *s == '\r') // Either a single 0x0d or 0x0d 0x0a pair
2682 				{
2683 					*s++ = '\n'; // replace first one with 0x0a
2684 
2685 					if (*s == '\n') g.push(s, 1);
2686 				}
2687 				else if (opt_escape::value && *s == '&')
2688 				{
2689 					s = strconv_escape(s, g);
2690 				}
2691 				else if (*s == 0)
2692 				{
2693 					char_t* end = g.flush(s);
2694 
2695 					if (opt_trim::value)
2696 						while (end > begin && PUGI__IS_CHARTYPE(end[-1], ct_space))
2697 							--end;
2698 
2699 					*end = 0;
2700 
2701 					return s;
2702 				}
2703 				else ++s;
2704 			}
2705 		}
2706 	};
2707 
get_strconv_pcdata(unsigned int optmask)2708 	PUGI__FN strconv_pcdata_t get_strconv_pcdata(unsigned int optmask)
2709 	{
2710 		PUGI__STATIC_ASSERT(parse_escapes == 0x10 && parse_eol == 0x20 && parse_trim_pcdata == 0x0800);
2711 
2712 		switch (((optmask >> 4) & 3) | ((optmask >> 9) & 4)) // get bitmask for flags (eol escapes trim)
2713 		{
2714 		case 0: return strconv_pcdata_impl<opt_false, opt_false, opt_false>::parse;
2715 		case 1: return strconv_pcdata_impl<opt_false, opt_false, opt_true>::parse;
2716 		case 2: return strconv_pcdata_impl<opt_false, opt_true, opt_false>::parse;
2717 		case 3: return strconv_pcdata_impl<opt_false, opt_true, opt_true>::parse;
2718 		case 4: return strconv_pcdata_impl<opt_true, opt_false, opt_false>::parse;
2719 		case 5: return strconv_pcdata_impl<opt_true, opt_false, opt_true>::parse;
2720 		case 6: return strconv_pcdata_impl<opt_true, opt_true, opt_false>::parse;
2721 		case 7: return strconv_pcdata_impl<opt_true, opt_true, opt_true>::parse;
2722 		default: assert(false); return 0; // unreachable
2723 		}
2724 	}
2725 
2726 	typedef char_t* (*strconv_attribute_t)(char_t*, char_t);
2727 
2728 	template <typename opt_escape> struct strconv_attribute_impl
2729 	{
parse_wnormstrconv_attribute_impl2730 		static char_t* parse_wnorm(char_t* s, char_t end_quote)
2731 		{
2732 			gap g;
2733 
2734 			// trim leading whitespaces
2735 			if (PUGI__IS_CHARTYPE(*s, ct_space))
2736 			{
2737 				char_t* str = s;
2738 
2739 				do ++str;
2740 				while (PUGI__IS_CHARTYPE(*str, ct_space));
2741 
2742 				g.push(s, str - s);
2743 			}
2744 
2745 			while (true)
2746 			{
2747 				PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr_ws | ct_space));
2748 
2749 				if (*s == end_quote)
2750 				{
2751 					char_t* str = g.flush(s);
2752 
2753 					do *str-- = 0;
2754 					while (PUGI__IS_CHARTYPE(*str, ct_space));
2755 
2756 					return s + 1;
2757 				}
2758 				else if (PUGI__IS_CHARTYPE(*s, ct_space))
2759 				{
2760 					*s++ = ' ';
2761 
2762 					if (PUGI__IS_CHARTYPE(*s, ct_space))
2763 					{
2764 						char_t* str = s + 1;
2765 						while (PUGI__IS_CHARTYPE(*str, ct_space)) ++str;
2766 
2767 						g.push(s, str - s);
2768 					}
2769 				}
2770 				else if (opt_escape::value && *s == '&')
2771 				{
2772 					s = strconv_escape(s, g);
2773 				}
2774 				else if (!*s)
2775 				{
2776 					return 0;
2777 				}
2778 				else ++s;
2779 			}
2780 		}
2781 
parse_wconvstrconv_attribute_impl2782 		static char_t* parse_wconv(char_t* s, char_t end_quote)
2783 		{
2784 			gap g;
2785 
2786 			while (true)
2787 			{
2788 				PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr_ws));
2789 
2790 				if (*s == end_quote)
2791 				{
2792 					*g.flush(s) = 0;
2793 
2794 					return s + 1;
2795 				}
2796 				else if (PUGI__IS_CHARTYPE(*s, ct_space))
2797 				{
2798 					if (*s == '\r')
2799 					{
2800 						*s++ = ' ';
2801 
2802 						if (*s == '\n') g.push(s, 1);
2803 					}
2804 					else *s++ = ' ';
2805 				}
2806 				else if (opt_escape::value && *s == '&')
2807 				{
2808 					s = strconv_escape(s, g);
2809 				}
2810 				else if (!*s)
2811 				{
2812 					return 0;
2813 				}
2814 				else ++s;
2815 			}
2816 		}
2817 
parse_eolstrconv_attribute_impl2818 		static char_t* parse_eol(char_t* s, char_t end_quote)
2819 		{
2820 			gap g;
2821 
2822 			while (true)
2823 			{
2824 				PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr));
2825 
2826 				if (*s == end_quote)
2827 				{
2828 					*g.flush(s) = 0;
2829 
2830 					return s + 1;
2831 				}
2832 				else if (*s == '\r')
2833 				{
2834 					*s++ = '\n';
2835 
2836 					if (*s == '\n') g.push(s, 1);
2837 				}
2838 				else if (opt_escape::value && *s == '&')
2839 				{
2840 					s = strconv_escape(s, g);
2841 				}
2842 				else if (!*s)
2843 				{
2844 					return 0;
2845 				}
2846 				else ++s;
2847 			}
2848 		}
2849 
parse_simplestrconv_attribute_impl2850 		static char_t* parse_simple(char_t* s, char_t end_quote)
2851 		{
2852 			gap g;
2853 
2854 			while (true)
2855 			{
2856 				PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr));
2857 
2858 				if (*s == end_quote)
2859 				{
2860 					*g.flush(s) = 0;
2861 
2862 					return s + 1;
2863 				}
2864 				else if (opt_escape::value && *s == '&')
2865 				{
2866 					s = strconv_escape(s, g);
2867 				}
2868 				else if (!*s)
2869 				{
2870 					return 0;
2871 				}
2872 				else ++s;
2873 			}
2874 		}
2875 	};
2876 
get_strconv_attribute(unsigned int optmask)2877 	PUGI__FN strconv_attribute_t get_strconv_attribute(unsigned int optmask)
2878 	{
2879 		PUGI__STATIC_ASSERT(parse_escapes == 0x10 && parse_eol == 0x20 && parse_wconv_attribute == 0x40 && parse_wnorm_attribute == 0x80);
2880 
2881 		switch ((optmask >> 4) & 15) // get bitmask for flags (wconv wnorm eol escapes)
2882 		{
2883 		case 0:  return strconv_attribute_impl<opt_false>::parse_simple;
2884 		case 1:  return strconv_attribute_impl<opt_true>::parse_simple;
2885 		case 2:  return strconv_attribute_impl<opt_false>::parse_eol;
2886 		case 3:  return strconv_attribute_impl<opt_true>::parse_eol;
2887 		case 4:  return strconv_attribute_impl<opt_false>::parse_wconv;
2888 		case 5:  return strconv_attribute_impl<opt_true>::parse_wconv;
2889 		case 6:  return strconv_attribute_impl<opt_false>::parse_wconv;
2890 		case 7:  return strconv_attribute_impl<opt_true>::parse_wconv;
2891 		case 8:  return strconv_attribute_impl<opt_false>::parse_wnorm;
2892 		case 9:  return strconv_attribute_impl<opt_true>::parse_wnorm;
2893 		case 10: return strconv_attribute_impl<opt_false>::parse_wnorm;
2894 		case 11: return strconv_attribute_impl<opt_true>::parse_wnorm;
2895 		case 12: return strconv_attribute_impl<opt_false>::parse_wnorm;
2896 		case 13: return strconv_attribute_impl<opt_true>::parse_wnorm;
2897 		case 14: return strconv_attribute_impl<opt_false>::parse_wnorm;
2898 		case 15: return strconv_attribute_impl<opt_true>::parse_wnorm;
2899 		default: assert(false); return 0; // unreachable
2900 		}
2901 	}
2902 
make_parse_result(xml_parse_status status,ptrdiff_t offset=0)2903 	inline xml_parse_result make_parse_result(xml_parse_status status, ptrdiff_t offset = 0)
2904 	{
2905 		xml_parse_result result;
2906 		result.status = status;
2907 		result.offset = offset;
2908 
2909 		return result;
2910 	}
2911 
2912 	struct xml_parser
2913 	{
2914 		xml_allocator* alloc;
2915 		char_t* error_offset;
2916 		xml_parse_status error_status;
2917 
xml_parserxml_parser2918 		xml_parser(xml_allocator* alloc_): alloc(alloc_), error_offset(0), error_status(status_ok)
2919 		{
2920 		}
2921 
2922 		// DOCTYPE consists of nested sections of the following possible types:
2923 		// <!-- ... -->, <? ... ?>, "...", '...'
2924 		// <![...]]>
2925 		// <!...>
2926 		// First group can not contain nested groups
2927 		// Second group can contain nested groups of the same type
2928 		// Third group can contain all other groups
parse_doctype_primitivexml_parser2929 		char_t* parse_doctype_primitive(char_t* s)
2930 		{
2931 			if (*s == '"' || *s == '\'')
2932 			{
2933 				// quoted string
2934 				char_t ch = *s++;
2935 				PUGI__SCANFOR(*s == ch);
2936 				if (!*s) PUGI__THROW_ERROR(status_bad_doctype, s);
2937 
2938 				s++;
2939 			}
2940 			else if (s[0] == '<' && s[1] == '?')
2941 			{
2942 				// <? ... ?>
2943 				s += 2;
2944 				PUGI__SCANFOR(s[0] == '?' && s[1] == '>'); // no need for ENDSWITH because ?> can't terminate proper doctype
2945 				if (!*s) PUGI__THROW_ERROR(status_bad_doctype, s);
2946 
2947 				s += 2;
2948 			}
2949 			else if (s[0] == '<' && s[1] == '!' && s[2] == '-' && s[3] == '-')
2950 			{
2951 				s += 4;
2952 				PUGI__SCANFOR(s[0] == '-' && s[1] == '-' && s[2] == '>'); // no need for ENDSWITH because --> can't terminate proper doctype
2953 				if (!*s) PUGI__THROW_ERROR(status_bad_doctype, s);
2954 
2955 				s += 3;
2956 			}
2957 			else PUGI__THROW_ERROR(status_bad_doctype, s);
2958 
2959 			return s;
2960 		}
2961 
parse_doctype_ignorexml_parser2962 		char_t* parse_doctype_ignore(char_t* s)
2963 		{
2964 			size_t depth = 0;
2965 
2966 			assert(s[0] == '<' && s[1] == '!' && s[2] == '[');
2967 			s += 3;
2968 
2969 			while (*s)
2970 			{
2971 				if (s[0] == '<' && s[1] == '!' && s[2] == '[')
2972 				{
2973 					// nested ignore section
2974 					s += 3;
2975 					depth++;
2976 				}
2977 				else if (s[0] == ']' && s[1] == ']' && s[2] == '>')
2978 				{
2979 					// ignore section end
2980 					s += 3;
2981 
2982 					if (depth == 0)
2983 						return s;
2984 
2985 					depth--;
2986 				}
2987 				else s++;
2988 			}
2989 
2990 			PUGI__THROW_ERROR(status_bad_doctype, s);
2991 		}
2992 
parse_doctype_groupxml_parser2993 		char_t* parse_doctype_group(char_t* s, char_t endch)
2994 		{
2995 			size_t depth = 0;
2996 
2997 			assert((s[0] == '<' || s[0] == 0) && s[1] == '!');
2998 			s += 2;
2999 
3000 			while (*s)
3001 			{
3002 				if (s[0] == '<' && s[1] == '!' && s[2] != '-')
3003 				{
3004 					if (s[2] == '[')
3005 					{
3006 						// ignore
3007 						s = parse_doctype_ignore(s);
3008 						if (!s) return s;
3009 					}
3010 					else
3011 					{
3012 						// some control group
3013 						s += 2;
3014 						depth++;
3015 					}
3016 				}
3017 				else if (s[0] == '<' || s[0] == '"' || s[0] == '\'')
3018 				{
3019 					// unknown tag (forbidden), or some primitive group
3020 					s = parse_doctype_primitive(s);
3021 					if (!s) return s;
3022 				}
3023 				else if (*s == '>')
3024 				{
3025 					if (depth == 0)
3026 						return s;
3027 
3028 					depth--;
3029 					s++;
3030 				}
3031 				else s++;
3032 			}
3033 
3034 			if (depth != 0 || endch != '>') PUGI__THROW_ERROR(status_bad_doctype, s);
3035 
3036 			return s;
3037 		}
3038 
parse_exclamationxml_parser3039 		char_t* parse_exclamation(char_t* s, xml_node_struct* cursor, unsigned int optmsk, char_t endch)
3040 		{
3041 			// parse node contents, starting with exclamation mark
3042 			++s;
3043 
3044 			if (*s == '-') // '<!-...'
3045 			{
3046 				++s;
3047 
3048 				if (*s == '-') // '<!--...'
3049 				{
3050 					++s;
3051 
3052 					if (PUGI__OPTSET(parse_comments))
3053 					{
3054 						PUGI__PUSHNODE(node_comment); // Append a new node on the tree.
3055 						cursor->value = s; // Save the offset.
3056 					}
3057 
3058 					if (PUGI__OPTSET(parse_eol) && PUGI__OPTSET(parse_comments))
3059 					{
3060 						s = strconv_comment(s, endch);
3061 
3062 						if (!s) PUGI__THROW_ERROR(status_bad_comment, cursor->value);
3063 					}
3064 					else
3065 					{
3066 						// Scan for terminating '-->'.
3067 						PUGI__SCANFOR(s[0] == '-' && s[1] == '-' && PUGI__ENDSWITH(s[2], '>'));
3068 						PUGI__CHECK_ERROR(status_bad_comment, s);
3069 
3070 						if (PUGI__OPTSET(parse_comments))
3071 							*s = 0; // Zero-terminate this segment at the first terminating '-'.
3072 
3073 						s += (s[2] == '>' ? 3 : 2); // Step over the '\0->'.
3074 					}
3075 				}
3076 				else PUGI__THROW_ERROR(status_bad_comment, s);
3077 			}
3078 			else if (*s == '[')
3079 			{
3080 				// '<![CDATA[...'
3081 				if (*++s=='C' && *++s=='D' && *++s=='A' && *++s=='T' && *++s=='A' && *++s == '[')
3082 				{
3083 					++s;
3084 
3085 					if (PUGI__OPTSET(parse_cdata))
3086 					{
3087 						PUGI__PUSHNODE(node_cdata); // Append a new node on the tree.
3088 						cursor->value = s; // Save the offset.
3089 
3090 						if (PUGI__OPTSET(parse_eol))
3091 						{
3092 							s = strconv_cdata(s, endch);
3093 
3094 							if (!s) PUGI__THROW_ERROR(status_bad_cdata, cursor->value);
3095 						}
3096 						else
3097 						{
3098 							// Scan for terminating ']]>'.
3099 							PUGI__SCANFOR(s[0] == ']' && s[1] == ']' && PUGI__ENDSWITH(s[2], '>'));
3100 							PUGI__CHECK_ERROR(status_bad_cdata, s);
3101 
3102 							*s++ = 0; // Zero-terminate this segment.
3103 						}
3104 					}
3105 					else // Flagged for discard, but we still have to scan for the terminator.
3106 					{
3107 						// Scan for terminating ']]>'.
3108 						PUGI__SCANFOR(s[0] == ']' && s[1] == ']' && PUGI__ENDSWITH(s[2], '>'));
3109 						PUGI__CHECK_ERROR(status_bad_cdata, s);
3110 
3111 						++s;
3112 					}
3113 
3114 					s += (s[1] == '>' ? 2 : 1); // Step over the last ']>'.
3115 				}
3116 				else PUGI__THROW_ERROR(status_bad_cdata, s);
3117 			}
3118 			else if (s[0] == 'D' && s[1] == 'O' && s[2] == 'C' && s[3] == 'T' && s[4] == 'Y' && s[5] == 'P' && PUGI__ENDSWITH(s[6], 'E'))
3119 			{
3120 				s -= 2;
3121 
3122 				if (cursor->parent) PUGI__THROW_ERROR(status_bad_doctype, s);
3123 
3124 				char_t* mark = s + 9;
3125 
3126 				s = parse_doctype_group(s, endch);
3127 				if (!s) return s;
3128 
3129 				assert((*s == 0 && endch == '>') || *s == '>');
3130 				if (*s) *s++ = 0;
3131 
3132 				if (PUGI__OPTSET(parse_doctype))
3133 				{
3134 					while (PUGI__IS_CHARTYPE(*mark, ct_space)) ++mark;
3135 
3136 					PUGI__PUSHNODE(node_doctype);
3137 
3138 					cursor->value = mark;
3139 				}
3140 			}
3141 			else if (*s == 0 && endch == '-') PUGI__THROW_ERROR(status_bad_comment, s);
3142 			else if (*s == 0 && endch == '[') PUGI__THROW_ERROR(status_bad_cdata, s);
3143 			else PUGI__THROW_ERROR(status_unrecognized_tag, s);
3144 
3145 			return s;
3146 		}
3147 
parse_questionxml_parser3148 		char_t* parse_question(char_t* s, xml_node_struct*& ref_cursor, unsigned int optmsk, char_t endch)
3149 		{
3150 			// load into registers
3151 			xml_node_struct* cursor = ref_cursor;
3152 			char_t ch = 0;
3153 
3154 			// parse node contents, starting with question mark
3155 			++s;
3156 
3157 			// read PI target
3158 			char_t* target = s;
3159 
3160 			if (!PUGI__IS_CHARTYPE(*s, ct_start_symbol)) PUGI__THROW_ERROR(status_bad_pi, s);
3161 
3162 			PUGI__SCANWHILE(PUGI__IS_CHARTYPE(*s, ct_symbol));
3163 			PUGI__CHECK_ERROR(status_bad_pi, s);
3164 
3165 			// determine node type; stricmp / strcasecmp is not portable
3166 			bool declaration = (target[0] | ' ') == 'x' && (target[1] | ' ') == 'm' && (target[2] | ' ') == 'l' && target + 3 == s;
3167 
3168 			if (declaration ? PUGI__OPTSET(parse_declaration) : PUGI__OPTSET(parse_pi))
3169 			{
3170 				if (declaration)
3171 				{
3172 					// disallow non top-level declarations
3173 					if (cursor->parent) PUGI__THROW_ERROR(status_bad_pi, s);
3174 
3175 					PUGI__PUSHNODE(node_declaration);
3176 				}
3177 				else
3178 				{
3179 					PUGI__PUSHNODE(node_pi);
3180 				}
3181 
3182 				cursor->name = target;
3183 
3184 				PUGI__ENDSEG();
3185 
3186 				// parse value/attributes
3187 				if (ch == '?')
3188 				{
3189 					// empty node
3190 					if (!PUGI__ENDSWITH(*s, '>')) PUGI__THROW_ERROR(status_bad_pi, s);
3191 					s += (*s == '>');
3192 
3193 					PUGI__POPNODE();
3194 				}
3195 				else if (PUGI__IS_CHARTYPE(ch, ct_space))
3196 				{
3197 					PUGI__SKIPWS();
3198 
3199 					// scan for tag end
3200 					char_t* value = s;
3201 
3202 					PUGI__SCANFOR(s[0] == '?' && PUGI__ENDSWITH(s[1], '>'));
3203 					PUGI__CHECK_ERROR(status_bad_pi, s);
3204 
3205 					if (declaration)
3206 					{
3207 						// replace ending ? with / so that 'element' terminates properly
3208 						*s = '/';
3209 
3210 						// we exit from this function with cursor at node_declaration, which is a signal to parse() to go to LOC_ATTRIBUTES
3211 						s = value;
3212 					}
3213 					else
3214 					{
3215 						// store value and step over >
3216 						cursor->value = value;
3217 
3218 						PUGI__POPNODE();
3219 
3220 						PUGI__ENDSEG();
3221 
3222 						s += (*s == '>');
3223 					}
3224 				}
3225 				else PUGI__THROW_ERROR(status_bad_pi, s);
3226 			}
3227 			else
3228 			{
3229 				// scan for tag end
3230 				PUGI__SCANFOR(s[0] == '?' && PUGI__ENDSWITH(s[1], '>'));
3231 				PUGI__CHECK_ERROR(status_bad_pi, s);
3232 
3233 				s += (s[1] == '>' ? 2 : 1);
3234 			}
3235 
3236 			// store from registers
3237 			ref_cursor = cursor;
3238 
3239 			return s;
3240 		}
3241 
parse_treexml_parser3242 		char_t* parse_tree(char_t* s, xml_node_struct* root, unsigned int optmsk, char_t endch)
3243 		{
3244 			strconv_attribute_t strconv_attribute = get_strconv_attribute(optmsk);
3245 			strconv_pcdata_t strconv_pcdata = get_strconv_pcdata(optmsk);
3246 
3247 			char_t ch = 0;
3248 			xml_node_struct* cursor = root;
3249 			char_t* mark = s;
3250 
3251 			while (*s != 0)
3252 			{
3253 				if (*s == '<')
3254 				{
3255 					++s;
3256 
3257 				LOC_TAG:
3258 					if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) // '<#...'
3259 					{
3260 						PUGI__PUSHNODE(node_element); // Append a new node to the tree.
3261 
3262 						cursor->name = s;
3263 
3264 						PUGI__SCANWHILE_UNROLL(PUGI__IS_CHARTYPE(ss, ct_symbol)); // Scan for a terminator.
3265 						PUGI__ENDSEG(); // Save char in 'ch', terminate & step over.
3266 
3267 						if (ch == '>')
3268 						{
3269 							// end of tag
3270 						}
3271 						else if (PUGI__IS_CHARTYPE(ch, ct_space))
3272 						{
3273 						LOC_ATTRIBUTES:
3274 							while (true)
3275 							{
3276 								PUGI__SKIPWS(); // Eat any whitespace.
3277 
3278 								if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) // <... #...
3279 								{
3280 									xml_attribute_struct* a = append_new_attribute(cursor, *alloc); // Make space for this attribute.
3281 									if (!a) PUGI__THROW_ERROR(status_out_of_memory, s);
3282 
3283 									a->name = s; // Save the offset.
3284 
3285 									PUGI__SCANWHILE_UNROLL(PUGI__IS_CHARTYPE(ss, ct_symbol)); // Scan for a terminator.
3286 									PUGI__ENDSEG(); // Save char in 'ch', terminate & step over.
3287 
3288 									if (PUGI__IS_CHARTYPE(ch, ct_space))
3289 									{
3290 										PUGI__SKIPWS(); // Eat any whitespace.
3291 
3292 										ch = *s;
3293 										++s;
3294 									}
3295 
3296 									if (ch == '=') // '<... #=...'
3297 									{
3298 										PUGI__SKIPWS(); // Eat any whitespace.
3299 
3300 										if (*s == '"' || *s == '\'') // '<... #="...'
3301 										{
3302 											ch = *s; // Save quote char to avoid breaking on "''" -or- '""'.
3303 											++s; // Step over the quote.
3304 											a->value = s; // Save the offset.
3305 
3306 											s = strconv_attribute(s, ch);
3307 
3308 											if (!s) PUGI__THROW_ERROR(status_bad_attribute, a->value);
3309 
3310 											// After this line the loop continues from the start;
3311 											// Whitespaces, / and > are ok, symbols and EOF are wrong,
3312 											// everything else will be detected
3313 											if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) PUGI__THROW_ERROR(status_bad_attribute, s);
3314 										}
3315 										else PUGI__THROW_ERROR(status_bad_attribute, s);
3316 									}
3317 									else PUGI__THROW_ERROR(status_bad_attribute, s);
3318 								}
3319 								else if (*s == '/')
3320 								{
3321 									++s;
3322 
3323 									if (*s == '>')
3324 									{
3325 										PUGI__POPNODE();
3326 										s++;
3327 										break;
3328 									}
3329 									else if (*s == 0 && endch == '>')
3330 									{
3331 										PUGI__POPNODE();
3332 										break;
3333 									}
3334 									else PUGI__THROW_ERROR(status_bad_start_element, s);
3335 								}
3336 								else if (*s == '>')
3337 								{
3338 									++s;
3339 
3340 									break;
3341 								}
3342 								else if (*s == 0 && endch == '>')
3343 								{
3344 									break;
3345 								}
3346 								else PUGI__THROW_ERROR(status_bad_start_element, s);
3347 							}
3348 
3349 							// !!!
3350 						}
3351 						else if (ch == '/') // '<#.../'
3352 						{
3353 							if (!PUGI__ENDSWITH(*s, '>')) PUGI__THROW_ERROR(status_bad_start_element, s);
3354 
3355 							PUGI__POPNODE(); // Pop.
3356 
3357 							s += (*s == '>');
3358 						}
3359 						else if (ch == 0)
3360 						{
3361 							// we stepped over null terminator, backtrack & handle closing tag
3362 							--s;
3363 
3364 							if (endch != '>') PUGI__THROW_ERROR(status_bad_start_element, s);
3365 						}
3366 						else PUGI__THROW_ERROR(status_bad_start_element, s);
3367 					}
3368 					else if (*s == '/')
3369 					{
3370 						++s;
3371 
3372 						mark = s;
3373 
3374 						char_t* name = cursor->name;
3375 						if (!name) PUGI__THROW_ERROR(status_end_element_mismatch, mark);
3376 
3377 						while (PUGI__IS_CHARTYPE(*s, ct_symbol))
3378 						{
3379 							if (*s++ != *name++) PUGI__THROW_ERROR(status_end_element_mismatch, mark);
3380 						}
3381 
3382 						if (*name)
3383 						{
3384 							if (*s == 0 && name[0] == endch && name[1] == 0) PUGI__THROW_ERROR(status_bad_end_element, s);
3385 							else PUGI__THROW_ERROR(status_end_element_mismatch, mark);
3386 						}
3387 
3388 						PUGI__POPNODE(); // Pop.
3389 
3390 						PUGI__SKIPWS();
3391 
3392 						if (*s == 0)
3393 						{
3394 							if (endch != '>') PUGI__THROW_ERROR(status_bad_end_element, s);
3395 						}
3396 						else
3397 						{
3398 							if (*s != '>') PUGI__THROW_ERROR(status_bad_end_element, s);
3399 							++s;
3400 						}
3401 					}
3402 					else if (*s == '?') // '<?...'
3403 					{
3404 						s = parse_question(s, cursor, optmsk, endch);
3405 						if (!s) return s;
3406 
3407 						assert(cursor);
3408 						if (PUGI__NODETYPE(cursor) == node_declaration) goto LOC_ATTRIBUTES;
3409 					}
3410 					else if (*s == '!') // '<!...'
3411 					{
3412 						s = parse_exclamation(s, cursor, optmsk, endch);
3413 						if (!s) return s;
3414 					}
3415 					else if (*s == 0 && endch == '?') PUGI__THROW_ERROR(status_bad_pi, s);
3416 					else PUGI__THROW_ERROR(status_unrecognized_tag, s);
3417 				}
3418 				else
3419 				{
3420 					mark = s; // Save this offset while searching for a terminator.
3421 
3422 					PUGI__SKIPWS(); // Eat whitespace if no genuine PCDATA here.
3423 
3424 					if (*s == '<' || !*s)
3425 					{
3426 						// We skipped some whitespace characters because otherwise we would take the tag branch instead of PCDATA one
3427 						assert(mark != s);
3428 
3429 						if (!PUGI__OPTSET(parse_ws_pcdata | parse_ws_pcdata_single) || PUGI__OPTSET(parse_trim_pcdata))
3430 						{
3431 							continue;
3432 						}
3433 						else if (PUGI__OPTSET(parse_ws_pcdata_single))
3434 						{
3435 							if (s[0] != '<' || s[1] != '/' || cursor->first_child) continue;
3436 						}
3437 					}
3438 
3439 					if (!PUGI__OPTSET(parse_trim_pcdata))
3440 						s = mark;
3441 
3442 					if (cursor->parent || PUGI__OPTSET(parse_fragment))
3443 					{
3444 						if (PUGI__OPTSET(parse_embed_pcdata) && cursor->parent && !cursor->first_child && !cursor->value)
3445 						{
3446 							cursor->value = s; // Save the offset.
3447 						}
3448 						else
3449 						{
3450 							PUGI__PUSHNODE(node_pcdata); // Append a new node on the tree.
3451 
3452 							cursor->value = s; // Save the offset.
3453 
3454 							PUGI__POPNODE(); // Pop since this is a standalone.
3455 						}
3456 
3457 						s = strconv_pcdata(s);
3458 
3459 						if (!*s) break;
3460 					}
3461 					else
3462 					{
3463 						PUGI__SCANFOR(*s == '<'); // '...<'
3464 						if (!*s) break;
3465 
3466 						++s;
3467 					}
3468 
3469 					// We're after '<'
3470 					goto LOC_TAG;
3471 				}
3472 			}
3473 
3474 			// check that last tag is closed
3475 			if (cursor != root) PUGI__THROW_ERROR(status_end_element_mismatch, s);
3476 
3477 			return s;
3478 		}
3479 
3480 	#ifdef PUGIXML_WCHAR_MODE
parse_skip_bomxml_parser3481 		static char_t* parse_skip_bom(char_t* s)
3482 		{
3483 			unsigned int bom = 0xfeff;
3484 			return (s[0] == static_cast<wchar_t>(bom)) ? s + 1 : s;
3485 		}
3486 	#else
parse_skip_bomxml_parser3487 		static char_t* parse_skip_bom(char_t* s)
3488 		{
3489 			return (s[0] == '\xef' && s[1] == '\xbb' && s[2] == '\xbf') ? s + 3 : s;
3490 		}
3491 	#endif
3492 
has_element_node_siblingsxml_parser3493 		static bool has_element_node_siblings(xml_node_struct* node)
3494 		{
3495 			while (node)
3496 			{
3497 				if (PUGI__NODETYPE(node) == node_element) return true;
3498 
3499 				node = node->next_sibling;
3500 			}
3501 
3502 			return false;
3503 		}
3504 
parsexml_parser3505 		static xml_parse_result parse(char_t* buffer, size_t length, xml_document_struct* xmldoc, xml_node_struct* root, unsigned int optmsk)
3506 		{
3507 			// early-out for empty documents
3508 			if (length == 0)
3509 				return make_parse_result(PUGI__OPTSET(parse_fragment) ? status_ok : status_no_document_element);
3510 
3511 			// get last child of the root before parsing
3512 			xml_node_struct* last_root_child = root->first_child ? root->first_child->prev_sibling_c + 0 : 0;
3513 
3514 			// create parser on stack
3515 			xml_parser parser(static_cast<xml_allocator*>(xmldoc));
3516 
3517 			// save last character and make buffer zero-terminated (speeds up parsing)
3518 			char_t endch = buffer[length - 1];
3519 			buffer[length - 1] = 0;
3520 
3521 			// skip BOM to make sure it does not end up as part of parse output
3522 			char_t* buffer_data = parse_skip_bom(buffer);
3523 
3524 			// perform actual parsing
3525 			parser.parse_tree(buffer_data, root, optmsk, endch);
3526 
3527 			xml_parse_result result = make_parse_result(parser.error_status, parser.error_offset ? parser.error_offset - buffer : 0);
3528 			assert(result.offset >= 0 && static_cast<size_t>(result.offset) <= length);
3529 
3530 			if (result)
3531 			{
3532 				// since we removed last character, we have to handle the only possible false positive (stray <)
3533 				if (endch == '<')
3534 					return make_parse_result(status_unrecognized_tag, length - 1);
3535 
3536 				// check if there are any element nodes parsed
3537 				xml_node_struct* first_root_child_parsed = last_root_child ? last_root_child->next_sibling + 0 : root->first_child+ 0;
3538 
3539 				if (!PUGI__OPTSET(parse_fragment) && !has_element_node_siblings(first_root_child_parsed))
3540 					return make_parse_result(status_no_document_element, length - 1);
3541 			}
3542 			else
3543 			{
3544 				// roll back offset if it occurs on a null terminator in the source buffer
3545 				if (result.offset > 0 && static_cast<size_t>(result.offset) == length - 1 && endch == 0)
3546 					result.offset--;
3547 			}
3548 
3549 			return result;
3550 		}
3551 	};
3552 
3553 	// Output facilities
get_write_native_encoding()3554 	PUGI__FN xml_encoding get_write_native_encoding()
3555 	{
3556 	#ifdef PUGIXML_WCHAR_MODE
3557 		return get_wchar_encoding();
3558 	#else
3559 		return encoding_utf8;
3560 	#endif
3561 	}
3562 
get_write_encoding(xml_encoding encoding)3563 	PUGI__FN xml_encoding get_write_encoding(xml_encoding encoding)
3564 	{
3565 		// replace wchar encoding with utf implementation
3566 		if (encoding == encoding_wchar) return get_wchar_encoding();
3567 
3568 		// replace utf16 encoding with utf16 with specific endianness
3569 		if (encoding == encoding_utf16) return is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
3570 
3571 		// replace utf32 encoding with utf32 with specific endianness
3572 		if (encoding == encoding_utf32) return is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
3573 
3574 		// only do autodetection if no explicit encoding is requested
3575 		if (encoding != encoding_auto) return encoding;
3576 
3577 		// assume utf8 encoding
3578 		return encoding_utf8;
3579 	}
3580 
convert_buffer_output_generic(typename T::value_type dest,const char_t * data,size_t length,D,T)3581 	template <typename D, typename T> PUGI__FN size_t convert_buffer_output_generic(typename T::value_type dest, const char_t* data, size_t length, D, T)
3582 	{
3583 		PUGI__STATIC_ASSERT(sizeof(char_t) == sizeof(typename D::type));
3584 
3585 		typename T::value_type end = D::process(reinterpret_cast<const typename D::type*>(data), length, dest, T());
3586 
3587 		return static_cast<size_t>(end - dest) * sizeof(*dest);
3588 	}
3589 
convert_buffer_output_generic(typename T::value_type dest,const char_t * data,size_t length,D,T,bool opt_swap)3590 	template <typename D, typename T> PUGI__FN size_t convert_buffer_output_generic(typename T::value_type dest, const char_t* data, size_t length, D, T, bool opt_swap)
3591 	{
3592 		PUGI__STATIC_ASSERT(sizeof(char_t) == sizeof(typename D::type));
3593 
3594 		typename T::value_type end = D::process(reinterpret_cast<const typename D::type*>(data), length, dest, T());
3595 
3596 		if (opt_swap)
3597 		{
3598 			for (typename T::value_type i = dest; i != end; ++i)
3599 				*i = endian_swap(*i);
3600 		}
3601 
3602 		return static_cast<size_t>(end - dest) * sizeof(*dest);
3603 	}
3604 
3605 #ifdef PUGIXML_WCHAR_MODE
get_valid_length(const char_t * data,size_t length)3606 	PUGI__FN size_t get_valid_length(const char_t* data, size_t length)
3607 	{
3608 		if (length < 1) return 0;
3609 
3610 		// discard last character if it's the lead of a surrogate pair
3611 		return (sizeof(wchar_t) == 2 && static_cast<unsigned int>(static_cast<uint16_t>(data[length - 1]) - 0xD800) < 0x400) ? length - 1 : length;
3612 	}
3613 
convert_buffer_output(char_t * r_char,uint8_t * r_u8,uint16_t * r_u16,uint32_t * r_u32,const char_t * data,size_t length,xml_encoding encoding)3614 	PUGI__FN size_t convert_buffer_output(char_t* r_char, uint8_t* r_u8, uint16_t* r_u16, uint32_t* r_u32, const char_t* data, size_t length, xml_encoding encoding)
3615 	{
3616 		// only endian-swapping is required
3617 		if (need_endian_swap_utf(encoding, get_wchar_encoding()))
3618 		{
3619 			convert_wchar_endian_swap(r_char, data, length);
3620 
3621 			return length * sizeof(char_t);
3622 		}
3623 
3624 		// convert to utf8
3625 		if (encoding == encoding_utf8)
3626 			return convert_buffer_output_generic(r_u8, data, length, wchar_decoder(), utf8_writer());
3627 
3628 		// convert to utf16
3629 		if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
3630 		{
3631 			xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
3632 
3633 			return convert_buffer_output_generic(r_u16, data, length, wchar_decoder(), utf16_writer(), native_encoding != encoding);
3634 		}
3635 
3636 		// convert to utf32
3637 		if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)
3638 		{
3639 			xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
3640 
3641 			return convert_buffer_output_generic(r_u32, data, length, wchar_decoder(), utf32_writer(), native_encoding != encoding);
3642 		}
3643 
3644 		// convert to latin1
3645 		if (encoding == encoding_latin1)
3646 			return convert_buffer_output_generic(r_u8, data, length, wchar_decoder(), latin1_writer());
3647 
3648 		assert(false && "Invalid encoding"); // unreachable
3649 		return 0;
3650 	}
3651 #else
get_valid_length(const char_t * data,size_t length)3652 	PUGI__FN size_t get_valid_length(const char_t* data, size_t length)
3653 	{
3654 		if (length < 5) return 0;
3655 
3656 		for (size_t i = 1; i <= 4; ++i)
3657 		{
3658 			uint8_t ch = static_cast<uint8_t>(data[length - i]);
3659 
3660 			// either a standalone character or a leading one
3661 			if ((ch & 0xc0) != 0x80) return length - i;
3662 		}
3663 
3664 		// there are four non-leading characters at the end, sequence tail is broken so might as well process the whole chunk
3665 		return length;
3666 	}
3667 
convert_buffer_output(char_t *,uint8_t * r_u8,uint16_t * r_u16,uint32_t * r_u32,const char_t * data,size_t length,xml_encoding encoding)3668 	PUGI__FN size_t convert_buffer_output(char_t* /* r_char */, uint8_t* r_u8, uint16_t* r_u16, uint32_t* r_u32, const char_t* data, size_t length, xml_encoding encoding)
3669 	{
3670 		if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
3671 		{
3672 			xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
3673 
3674 			return convert_buffer_output_generic(r_u16, data, length, utf8_decoder(), utf16_writer(), native_encoding != encoding);
3675 		}
3676 
3677 		if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)
3678 		{
3679 			xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
3680 
3681 			return convert_buffer_output_generic(r_u32, data, length, utf8_decoder(), utf32_writer(), native_encoding != encoding);
3682 		}
3683 
3684 		if (encoding == encoding_latin1)
3685 			return convert_buffer_output_generic(r_u8, data, length, utf8_decoder(), latin1_writer());
3686 
3687 		assert(false && "Invalid encoding"); // unreachable
3688 		return 0;
3689 	}
3690 #endif
3691 
3692 	class xml_buffered_writer
3693 	{
3694 		xml_buffered_writer(const xml_buffered_writer&);
3695 		xml_buffered_writer& operator=(const xml_buffered_writer&);
3696 
3697 	public:
xml_buffered_writer(xml_writer & writer_,xml_encoding user_encoding)3698 		xml_buffered_writer(xml_writer& writer_, xml_encoding user_encoding): writer(writer_), bufsize(0), encoding(get_write_encoding(user_encoding))
3699 		{
3700 			PUGI__STATIC_ASSERT(bufcapacity >= 8);
3701 		}
3702 
flush()3703 		size_t flush()
3704 		{
3705 			flush(buffer, bufsize);
3706 			bufsize = 0;
3707 			return 0;
3708 		}
3709 
flush(const char_t * data,size_t size)3710 		void flush(const char_t* data, size_t size)
3711 		{
3712 			if (size == 0) return;
3713 
3714 			// fast path, just write data
3715 			if (encoding == get_write_native_encoding())
3716 				writer.write(data, size * sizeof(char_t));
3717 			else
3718 			{
3719 				// convert chunk
3720 				size_t result = convert_buffer_output(scratch.data_char, scratch.data_u8, scratch.data_u16, scratch.data_u32, data, size, encoding);
3721 				assert(result <= sizeof(scratch));
3722 
3723 				// write data
3724 				writer.write(scratch.data_u8, result);
3725 			}
3726 		}
3727 
write_direct(const char_t * data,size_t length)3728 		void write_direct(const char_t* data, size_t length)
3729 		{
3730 			// flush the remaining buffer contents
3731 			flush();
3732 
3733 			// handle large chunks
3734 			if (length > bufcapacity)
3735 			{
3736 				if (encoding == get_write_native_encoding())
3737 				{
3738 					// fast path, can just write data chunk
3739 					writer.write(data, length * sizeof(char_t));
3740 					return;
3741 				}
3742 
3743 				// need to convert in suitable chunks
3744 				while (length > bufcapacity)
3745 				{
3746 					// get chunk size by selecting such number of characters that are guaranteed to fit into scratch buffer
3747 					// and form a complete codepoint sequence (i.e. discard start of last codepoint if necessary)
3748 					size_t chunk_size = get_valid_length(data, bufcapacity);
3749 					assert(chunk_size);
3750 
3751 					// convert chunk and write
3752 					flush(data, chunk_size);
3753 
3754 					// iterate
3755 					data += chunk_size;
3756 					length -= chunk_size;
3757 				}
3758 
3759 				// small tail is copied below
3760 				bufsize = 0;
3761 			}
3762 
3763 			memcpy(buffer + bufsize, data, length * sizeof(char_t));
3764 			bufsize += length;
3765 		}
3766 
write_buffer(const char_t * data,size_t length)3767 		void write_buffer(const char_t* data, size_t length)
3768 		{
3769 			size_t offset = bufsize;
3770 
3771 			if (offset + length <= bufcapacity)
3772 			{
3773 				memcpy(buffer + offset, data, length * sizeof(char_t));
3774 				bufsize = offset + length;
3775 			}
3776 			else
3777 			{
3778 				write_direct(data, length);
3779 			}
3780 		}
3781 
write_string(const char_t * data)3782 		void write_string(const char_t* data)
3783 		{
3784 			// write the part of the string that fits in the buffer
3785 			size_t offset = bufsize;
3786 
3787 			while (*data && offset < bufcapacity)
3788 				buffer[offset++] = *data++;
3789 
3790 			// write the rest
3791 			if (offset < bufcapacity)
3792 			{
3793 				bufsize = offset;
3794 			}
3795 			else
3796 			{
3797 				// backtrack a bit if we have split the codepoint
3798 				size_t length = offset - bufsize;
3799 				size_t extra = length - get_valid_length(data - length, length);
3800 
3801 				bufsize = offset - extra;
3802 
3803 				write_direct(data - extra, strlength(data) + extra);
3804 			}
3805 		}
3806 
write(char_t d0)3807 		void write(char_t d0)
3808 		{
3809 			size_t offset = bufsize;
3810 			if (offset > bufcapacity - 1) offset = flush();
3811 
3812 			buffer[offset + 0] = d0;
3813 			bufsize = offset + 1;
3814 		}
3815 
write(char_t d0,char_t d1)3816 		void write(char_t d0, char_t d1)
3817 		{
3818 			size_t offset = bufsize;
3819 			if (offset > bufcapacity - 2) offset = flush();
3820 
3821 			buffer[offset + 0] = d0;
3822 			buffer[offset + 1] = d1;
3823 			bufsize = offset + 2;
3824 		}
3825 
write(char_t d0,char_t d1,char_t d2)3826 		void write(char_t d0, char_t d1, char_t d2)
3827 		{
3828 			size_t offset = bufsize;
3829 			if (offset > bufcapacity - 3) offset = flush();
3830 
3831 			buffer[offset + 0] = d0;
3832 			buffer[offset + 1] = d1;
3833 			buffer[offset + 2] = d2;
3834 			bufsize = offset + 3;
3835 		}
3836 
write(char_t d0,char_t d1,char_t d2,char_t d3)3837 		void write(char_t d0, char_t d1, char_t d2, char_t d3)
3838 		{
3839 			size_t offset = bufsize;
3840 			if (offset > bufcapacity - 4) offset = flush();
3841 
3842 			buffer[offset + 0] = d0;
3843 			buffer[offset + 1] = d1;
3844 			buffer[offset + 2] = d2;
3845 			buffer[offset + 3] = d3;
3846 			bufsize = offset + 4;
3847 		}
3848 
write(char_t d0,char_t d1,char_t d2,char_t d3,char_t d4)3849 		void write(char_t d0, char_t d1, char_t d2, char_t d3, char_t d4)
3850 		{
3851 			size_t offset = bufsize;
3852 			if (offset > bufcapacity - 5) offset = flush();
3853 
3854 			buffer[offset + 0] = d0;
3855 			buffer[offset + 1] = d1;
3856 			buffer[offset + 2] = d2;
3857 			buffer[offset + 3] = d3;
3858 			buffer[offset + 4] = d4;
3859 			bufsize = offset + 5;
3860 		}
3861 
write(char_t d0,char_t d1,char_t d2,char_t d3,char_t d4,char_t d5)3862 		void write(char_t d0, char_t d1, char_t d2, char_t d3, char_t d4, char_t d5)
3863 		{
3864 			size_t offset = bufsize;
3865 			if (offset > bufcapacity - 6) offset = flush();
3866 
3867 			buffer[offset + 0] = d0;
3868 			buffer[offset + 1] = d1;
3869 			buffer[offset + 2] = d2;
3870 			buffer[offset + 3] = d3;
3871 			buffer[offset + 4] = d4;
3872 			buffer[offset + 5] = d5;
3873 			bufsize = offset + 6;
3874 		}
3875 
3876 		// utf8 maximum expansion: x4 (-> utf32)
3877 		// utf16 maximum expansion: x2 (-> utf32)
3878 		// utf32 maximum expansion: x1
3879 		enum
3880 		{
3881 			bufcapacitybytes =
3882 			#ifdef PUGIXML_MEMORY_OUTPUT_STACK
3883 				PUGIXML_MEMORY_OUTPUT_STACK
3884 			#else
3885 				10240
3886 			#endif
3887 			,
3888 			bufcapacity = bufcapacitybytes / (sizeof(char_t) + 4)
3889 		};
3890 
3891 		char_t buffer[bufcapacity];
3892 
3893 		union
3894 		{
3895 			uint8_t data_u8[4 * bufcapacity];
3896 			uint16_t data_u16[2 * bufcapacity];
3897 			uint32_t data_u32[bufcapacity];
3898 			char_t data_char[bufcapacity];
3899 		} scratch;
3900 
3901 		xml_writer& writer;
3902 		size_t bufsize;
3903 		xml_encoding encoding;
3904 	};
3905 
text_output_escaped(xml_buffered_writer & writer,const char_t * s,chartypex_t type)3906 	PUGI__FN void text_output_escaped(xml_buffered_writer& writer, const char_t* s, chartypex_t type)
3907 	{
3908 		while (*s)
3909 		{
3910 			const char_t* prev = s;
3911 
3912 			// While *s is a usual symbol
3913 			PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPEX(ss, type));
3914 
3915 			writer.write_buffer(prev, static_cast<size_t>(s - prev));
3916 
3917 			switch (*s)
3918 			{
3919 				case 0: break;
3920 				case '&':
3921 					writer.write('&', 'a', 'm', 'p', ';');
3922 					++s;
3923 					break;
3924 				case '<':
3925 					writer.write('&', 'l', 't', ';');
3926 					++s;
3927 					break;
3928 				case '>':
3929 					writer.write('&', 'g', 't', ';');
3930 					++s;
3931 					break;
3932 				case '"':
3933 					writer.write('&', 'q', 'u', 'o', 't', ';');
3934 					++s;
3935 					break;
3936 				default: // s is not a usual symbol
3937 				{
3938 					unsigned int ch = static_cast<unsigned int>(*s++);
3939 					assert(ch < 32);
3940 
3941 					writer.write('&', '#', static_cast<char_t>((ch / 10) + '0'), static_cast<char_t>((ch % 10) + '0'), ';');
3942 				}
3943 			}
3944 		}
3945 	}
3946 
text_output(xml_buffered_writer & writer,const char_t * s,chartypex_t type,unsigned int flags)3947 	PUGI__FN void text_output(xml_buffered_writer& writer, const char_t* s, chartypex_t type, unsigned int flags)
3948 	{
3949 		if (flags & format_no_escapes)
3950 			writer.write_string(s);
3951 		else
3952 			text_output_escaped(writer, s, type);
3953 	}
3954 
text_output_cdata(xml_buffered_writer & writer,const char_t * s)3955 	PUGI__FN void text_output_cdata(xml_buffered_writer& writer, const char_t* s)
3956 	{
3957 		do
3958 		{
3959 			writer.write('<', '!', '[', 'C', 'D');
3960 			writer.write('A', 'T', 'A', '[');
3961 
3962 			const char_t* prev = s;
3963 
3964 			// look for ]]> sequence - we can't output it as is since it terminates CDATA
3965 			while (*s && !(s[0] == ']' && s[1] == ']' && s[2] == '>')) ++s;
3966 
3967 			// skip ]] if we stopped at ]]>, > will go to the next CDATA section
3968 			if (*s) s += 2;
3969 
3970 			writer.write_buffer(prev, static_cast<size_t>(s - prev));
3971 
3972 			writer.write(']', ']', '>');
3973 		}
3974 		while (*s);
3975 	}
3976 
text_output_indent(xml_buffered_writer & writer,const char_t * indent,size_t indent_length,unsigned int depth)3977 	PUGI__FN void text_output_indent(xml_buffered_writer& writer, const char_t* indent, size_t indent_length, unsigned int depth)
3978 	{
3979 		switch (indent_length)
3980 		{
3981 		case 1:
3982 		{
3983 			for (unsigned int i = 0; i < depth; ++i)
3984 				writer.write(indent[0]);
3985 			break;
3986 		}
3987 
3988 		case 2:
3989 		{
3990 			for (unsigned int i = 0; i < depth; ++i)
3991 				writer.write(indent[0], indent[1]);
3992 			break;
3993 		}
3994 
3995 		case 3:
3996 		{
3997 			for (unsigned int i = 0; i < depth; ++i)
3998 				writer.write(indent[0], indent[1], indent[2]);
3999 			break;
4000 		}
4001 
4002 		case 4:
4003 		{
4004 			for (unsigned int i = 0; i < depth; ++i)
4005 				writer.write(indent[0], indent[1], indent[2], indent[3]);
4006 			break;
4007 		}
4008 
4009 		default:
4010 		{
4011 			for (unsigned int i = 0; i < depth; ++i)
4012 				writer.write_buffer(indent, indent_length);
4013 		}
4014 		}
4015 	}
4016 
node_output_comment(xml_buffered_writer & writer,const char_t * s)4017 	PUGI__FN void node_output_comment(xml_buffered_writer& writer, const char_t* s)
4018 	{
4019 		writer.write('<', '!', '-', '-');
4020 
4021 		while (*s)
4022 		{
4023 			const char_t* prev = s;
4024 
4025 			// look for -\0 or -- sequence - we can't output it since -- is illegal in comment body
4026 			while (*s && !(s[0] == '-' && (s[1] == '-' || s[1] == 0))) ++s;
4027 
4028 			writer.write_buffer(prev, static_cast<size_t>(s - prev));
4029 
4030 			if (*s)
4031 			{
4032 				assert(*s == '-');
4033 
4034 				writer.write('-', ' ');
4035 				++s;
4036 			}
4037 		}
4038 
4039 		writer.write('-', '-', '>');
4040 	}
4041 
node_output_pi_value(xml_buffered_writer & writer,const char_t * s)4042 	PUGI__FN void node_output_pi_value(xml_buffered_writer& writer, const char_t* s)
4043 	{
4044 		while (*s)
4045 		{
4046 			const char_t* prev = s;
4047 
4048 			// look for ?> sequence - we can't output it since ?> terminates PI
4049 			while (*s && !(s[0] == '?' && s[1] == '>')) ++s;
4050 
4051 			writer.write_buffer(prev, static_cast<size_t>(s - prev));
4052 
4053 			if (*s)
4054 			{
4055 				assert(s[0] == '?' && s[1] == '>');
4056 
4057 				writer.write('?', ' ', '>');
4058 				s += 2;
4059 			}
4060 		}
4061 	}
4062 
node_output_attributes(xml_buffered_writer & writer,xml_node_struct * node,const char_t * indent,size_t indent_length,unsigned int flags,unsigned int depth)4063 	PUGI__FN void node_output_attributes(xml_buffered_writer& writer, xml_node_struct* node, const char_t* indent, size_t indent_length, unsigned int flags, unsigned int depth)
4064 	{
4065 		const char_t* default_name = PUGIXML_TEXT(":anonymous");
4066 
4067 		for (xml_attribute_struct* a = node->first_attribute; a; a = a->next_attribute)
4068 		{
4069 			if ((flags & (format_indent_attributes | format_raw)) == format_indent_attributes)
4070 			{
4071 				writer.write('\n');
4072 
4073 				text_output_indent(writer, indent, indent_length, depth + 1);
4074 			}
4075 			else
4076 			{
4077 				writer.write(' ');
4078 			}
4079 
4080 			writer.write_string(a->name ? a->name + 0 : default_name);
4081 			writer.write('=', '"');
4082 
4083 			if (a->value)
4084 				text_output(writer, a->value, ctx_special_attr, flags);
4085 
4086 			writer.write('"');
4087 		}
4088 	}
4089 
node_output_start(xml_buffered_writer & writer,xml_node_struct * node,const char_t * indent,size_t indent_length,unsigned int flags,unsigned int depth)4090 	PUGI__FN bool node_output_start(xml_buffered_writer& writer, xml_node_struct* node, const char_t* indent, size_t indent_length, unsigned int flags, unsigned int depth)
4091 	{
4092 		const char_t* default_name = PUGIXML_TEXT(":anonymous");
4093 		const char_t* name = node->name ? node->name + 0 : default_name;
4094 
4095 		writer.write('<');
4096 		writer.write_string(name);
4097 
4098 		if (node->first_attribute)
4099 			node_output_attributes(writer, node, indent, indent_length, flags, depth);
4100 
4101 		// element nodes can have value if parse_embed_pcdata was used
4102 		if (!node->value)
4103 		{
4104 			if (!node->first_child)
4105 			{
4106 				if (flags & format_no_empty_element_tags)
4107 				{
4108 					writer.write('>', '<', '/');
4109 					writer.write_string(name);
4110 					writer.write('>');
4111 
4112 					return false;
4113 				}
4114 				else
4115 				{
4116 					if ((flags & format_raw) == 0)
4117 						writer.write(' ');
4118 
4119 					writer.write('/', '>');
4120 
4121 					return false;
4122 				}
4123 			}
4124 			else
4125 			{
4126 				writer.write('>');
4127 
4128 				return true;
4129 			}
4130 		}
4131 		else
4132 		{
4133 			writer.write('>');
4134 
4135 			text_output(writer, node->value, ctx_special_pcdata, flags);
4136 
4137 			if (!node->first_child)
4138 			{
4139 				writer.write('<', '/');
4140 				writer.write_string(name);
4141 				writer.write('>');
4142 
4143 				return false;
4144 			}
4145 			else
4146 			{
4147 				return true;
4148 			}
4149 		}
4150 	}
4151 
node_output_end(xml_buffered_writer & writer,xml_node_struct * node)4152 	PUGI__FN void node_output_end(xml_buffered_writer& writer, xml_node_struct* node)
4153 	{
4154 		const char_t* default_name = PUGIXML_TEXT(":anonymous");
4155 		const char_t* name = node->name ? node->name + 0 : default_name;
4156 
4157 		writer.write('<', '/');
4158 		writer.write_string(name);
4159 		writer.write('>');
4160 	}
4161 
node_output_simple(xml_buffered_writer & writer,xml_node_struct * node,unsigned int flags)4162 	PUGI__FN void node_output_simple(xml_buffered_writer& writer, xml_node_struct* node, unsigned int flags)
4163 	{
4164 		const char_t* default_name = PUGIXML_TEXT(":anonymous");
4165 
4166 		switch (PUGI__NODETYPE(node))
4167 		{
4168 			case node_pcdata:
4169 				text_output(writer, node->value ? node->value + 0 : PUGIXML_TEXT(""), ctx_special_pcdata, flags);
4170 				break;
4171 
4172 			case node_cdata:
4173 				text_output_cdata(writer, node->value ? node->value + 0 : PUGIXML_TEXT(""));
4174 				break;
4175 
4176 			case node_comment:
4177 				node_output_comment(writer, node->value ? node->value + 0 : PUGIXML_TEXT(""));
4178 				break;
4179 
4180 			case node_pi:
4181 				writer.write('<', '?');
4182 				writer.write_string(node->name ? node->name + 0 : default_name);
4183 
4184 				if (node->value)
4185 				{
4186 					writer.write(' ');
4187 					node_output_pi_value(writer, node->value);
4188 				}
4189 
4190 				writer.write('?', '>');
4191 				break;
4192 
4193 			case node_declaration:
4194 				writer.write('<', '?');
4195 				writer.write_string(node->name ? node->name + 0 : default_name);
4196 				node_output_attributes(writer, node, PUGIXML_TEXT(""), 0, flags | format_raw, 0);
4197 				writer.write('?', '>');
4198 				break;
4199 
4200 			case node_doctype:
4201 				writer.write('<', '!', 'D', 'O', 'C');
4202 				writer.write('T', 'Y', 'P', 'E');
4203 
4204 				if (node->value)
4205 				{
4206 					writer.write(' ');
4207 					writer.write_string(node->value);
4208 				}
4209 
4210 				writer.write('>');
4211 				break;
4212 
4213 			default:
4214 				assert(false && "Invalid node type"); // unreachable
4215 		}
4216 	}
4217 
4218 	enum indent_flags_t
4219 	{
4220 		indent_newline = 1,
4221 		indent_indent = 2
4222 	};
4223 
node_output(xml_buffered_writer & writer,xml_node_struct * root,const char_t * indent,unsigned int flags,unsigned int depth)4224 	PUGI__FN void node_output(xml_buffered_writer& writer, xml_node_struct* root, const char_t* indent, unsigned int flags, unsigned int depth)
4225 	{
4226 		size_t indent_length = ((flags & (format_indent | format_indent_attributes)) && (flags & format_raw) == 0) ? strlength(indent) : 0;
4227 		unsigned int indent_flags = indent_indent;
4228 
4229 		xml_node_struct* node = root;
4230 
4231 		do
4232 		{
4233 			assert(node);
4234 
4235 			// begin writing current node
4236 			if (PUGI__NODETYPE(node) == node_pcdata || PUGI__NODETYPE(node) == node_cdata)
4237 			{
4238 				node_output_simple(writer, node, flags);
4239 
4240 				indent_flags = 0;
4241 			}
4242 			else
4243 			{
4244 				if ((indent_flags & indent_newline) && (flags & format_raw) == 0)
4245 					writer.write('\n');
4246 
4247 				if ((indent_flags & indent_indent) && indent_length)
4248 					text_output_indent(writer, indent, indent_length, depth);
4249 
4250 				if (PUGI__NODETYPE(node) == node_element)
4251 				{
4252 					indent_flags = indent_newline | indent_indent;
4253 
4254 					if (node_output_start(writer, node, indent, indent_length, flags, depth))
4255 					{
4256 						// element nodes can have value if parse_embed_pcdata was used
4257 						if (node->value)
4258 							indent_flags = 0;
4259 
4260 						node = node->first_child;
4261 						depth++;
4262 						continue;
4263 					}
4264 				}
4265 				else if (PUGI__NODETYPE(node) == node_document)
4266 				{
4267 					indent_flags = indent_indent;
4268 
4269 					if (node->first_child)
4270 					{
4271 						node = node->first_child;
4272 						continue;
4273 					}
4274 				}
4275 				else
4276 				{
4277 					node_output_simple(writer, node, flags);
4278 
4279 					indent_flags = indent_newline | indent_indent;
4280 				}
4281 			}
4282 
4283 			// continue to the next node
4284 			while (node != root)
4285 			{
4286 				if (node->next_sibling)
4287 				{
4288 					node = node->next_sibling;
4289 					break;
4290 				}
4291 
4292 				node = node->parent;
4293 
4294 				// write closing node
4295 				if (PUGI__NODETYPE(node) == node_element)
4296 				{
4297 					depth--;
4298 
4299 					if ((indent_flags & indent_newline) && (flags & format_raw) == 0)
4300 						writer.write('\n');
4301 
4302 					if ((indent_flags & indent_indent) && indent_length)
4303 						text_output_indent(writer, indent, indent_length, depth);
4304 
4305 					node_output_end(writer, node);
4306 
4307 					indent_flags = indent_newline | indent_indent;
4308 				}
4309 			}
4310 		}
4311 		while (node != root);
4312 
4313 		if ((indent_flags & indent_newline) && (flags & format_raw) == 0)
4314 			writer.write('\n');
4315 	}
4316 
has_declaration(xml_node_struct * node)4317 	PUGI__FN bool has_declaration(xml_node_struct* node)
4318 	{
4319 		for (xml_node_struct* child = node->first_child; child; child = child->next_sibling)
4320 		{
4321 			xml_node_type type = PUGI__NODETYPE(child);
4322 
4323 			if (type == node_declaration) return true;
4324 			if (type == node_element) return false;
4325 		}
4326 
4327 		return false;
4328 	}
4329 
is_attribute_of(xml_attribute_struct * attr,xml_node_struct * node)4330 	PUGI__FN bool is_attribute_of(xml_attribute_struct* attr, xml_node_struct* node)
4331 	{
4332 		for (xml_attribute_struct* a = node->first_attribute; a; a = a->next_attribute)
4333 			if (a == attr)
4334 				return true;
4335 
4336 		return false;
4337 	}
4338 
allow_insert_attribute(xml_node_type parent)4339 	PUGI__FN bool allow_insert_attribute(xml_node_type parent)
4340 	{
4341 		return parent == node_element || parent == node_declaration;
4342 	}
4343 
allow_insert_child(xml_node_type parent,xml_node_type child)4344 	PUGI__FN bool allow_insert_child(xml_node_type parent, xml_node_type child)
4345 	{
4346 		if (parent != node_document && parent != node_element) return false;
4347 		if (child == node_document || child == node_null) return false;
4348 		if (parent != node_document && (child == node_declaration || child == node_doctype)) return false;
4349 
4350 		return true;
4351 	}
4352 
allow_move(xml_node parent,xml_node child)4353 	PUGI__FN bool allow_move(xml_node parent, xml_node child)
4354 	{
4355 		// check that child can be a child of parent
4356 		if (!allow_insert_child(parent.type(), child.type()))
4357 			return false;
4358 
4359 		// check that node is not moved between documents
4360 		if (parent.root() != child.root())
4361 			return false;
4362 
4363 		// check that new parent is not in the child subtree
4364 		xml_node cur = parent;
4365 
4366 		while (cur)
4367 		{
4368 			if (cur == child)
4369 				return false;
4370 
4371 			cur = cur.parent();
4372 		}
4373 
4374 		return true;
4375 	}
4376 
4377 	template <typename String, typename Header>
node_copy_string(String & dest,Header & header,uintptr_t header_mask,char_t * source,Header & source_header,xml_allocator * alloc)4378 	PUGI__FN void node_copy_string(String& dest, Header& header, uintptr_t header_mask, char_t* source, Header& source_header, xml_allocator* alloc)
4379 	{
4380 		assert(!dest && (header & header_mask) == 0);
4381 
4382 		if (source)
4383 		{
4384 			if (alloc && (source_header & header_mask) == 0)
4385 			{
4386 				dest = source;
4387 
4388 				// since strcpy_insitu can reuse document buffer memory we need to mark both source and dest as shared
4389 				header |= xml_memory_page_contents_shared_mask;
4390 				source_header |= xml_memory_page_contents_shared_mask;
4391 			}
4392 			else
4393 				strcpy_insitu(dest, header, header_mask, source, strlength(source));
4394 		}
4395 	}
4396 
node_copy_contents(xml_node_struct * dn,xml_node_struct * sn,xml_allocator * shared_alloc)4397 	PUGI__FN void node_copy_contents(xml_node_struct* dn, xml_node_struct* sn, xml_allocator* shared_alloc)
4398 	{
4399 		node_copy_string(dn->name, dn->header, xml_memory_page_name_allocated_mask, sn->name, sn->header, shared_alloc);
4400 		node_copy_string(dn->value, dn->header, xml_memory_page_value_allocated_mask, sn->value, sn->header, shared_alloc);
4401 
4402 		for (xml_attribute_struct* sa = sn->first_attribute; sa; sa = sa->next_attribute)
4403 		{
4404 			xml_attribute_struct* da = append_new_attribute(dn, get_allocator(dn));
4405 
4406 			if (da)
4407 			{
4408 				node_copy_string(da->name, da->header, xml_memory_page_name_allocated_mask, sa->name, sa->header, shared_alloc);
4409 				node_copy_string(da->value, da->header, xml_memory_page_value_allocated_mask, sa->value, sa->header, shared_alloc);
4410 			}
4411 		}
4412 	}
4413 
node_copy_tree(xml_node_struct * dn,xml_node_struct * sn)4414 	PUGI__FN void node_copy_tree(xml_node_struct* dn, xml_node_struct* sn)
4415 	{
4416 		xml_allocator& alloc = get_allocator(dn);
4417 		xml_allocator* shared_alloc = (&alloc == &get_allocator(sn)) ? &alloc : 0;
4418 
4419 		node_copy_contents(dn, sn, shared_alloc);
4420 
4421 		xml_node_struct* dit = dn;
4422 		xml_node_struct* sit = sn->first_child;
4423 
4424 		while (sit && sit != sn)
4425 		{
4426 			// when a tree is copied into one of the descendants, we need to skip that subtree to avoid an infinite loop
4427 			if (sit != dn)
4428 			{
4429 				xml_node_struct* copy = append_new_node(dit, alloc, PUGI__NODETYPE(sit));
4430 
4431 				if (copy)
4432 				{
4433 					node_copy_contents(copy, sit, shared_alloc);
4434 
4435 					if (sit->first_child)
4436 					{
4437 						dit = copy;
4438 						sit = sit->first_child;
4439 						continue;
4440 					}
4441 				}
4442 			}
4443 
4444 			// continue to the next node
4445 			do
4446 			{
4447 				if (sit->next_sibling)
4448 				{
4449 					sit = sit->next_sibling;
4450 					break;
4451 				}
4452 
4453 				sit = sit->parent;
4454 				dit = dit->parent;
4455 			}
4456 			while (sit != sn);
4457 		}
4458 	}
4459 
node_copy_attribute(xml_attribute_struct * da,xml_attribute_struct * sa)4460 	PUGI__FN void node_copy_attribute(xml_attribute_struct* da, xml_attribute_struct* sa)
4461 	{
4462 		xml_allocator& alloc = get_allocator(da);
4463 		xml_allocator* shared_alloc = (&alloc == &get_allocator(sa)) ? &alloc : 0;
4464 
4465 		node_copy_string(da->name, da->header, xml_memory_page_name_allocated_mask, sa->name, sa->header, shared_alloc);
4466 		node_copy_string(da->value, da->header, xml_memory_page_value_allocated_mask, sa->value, sa->header, shared_alloc);
4467 	}
4468 
is_text_node(xml_node_struct * node)4469 	inline bool is_text_node(xml_node_struct* node)
4470 	{
4471 		xml_node_type type = PUGI__NODETYPE(node);
4472 
4473 		return type == node_pcdata || type == node_cdata;
4474 	}
4475 
4476 	// get value with conversion functions
string_to_integer(const char_t * value,U minv,U maxv)4477 	template <typename U> PUGI__FN PUGI__UNSIGNED_OVERFLOW U string_to_integer(const char_t* value, U minv, U maxv)
4478 	{
4479 		U result = 0;
4480 		const char_t* s = value;
4481 
4482 		while (PUGI__IS_CHARTYPE(*s, ct_space))
4483 			s++;
4484 
4485 		bool negative = (*s == '-');
4486 
4487 		s += (*s == '+' || *s == '-');
4488 
4489 		bool overflow = false;
4490 
4491 		if (s[0] == '0' && (s[1] | ' ') == 'x')
4492 		{
4493 			s += 2;
4494 
4495 			// since overflow detection relies on length of the sequence skip leading zeros
4496 			while (*s == '0')
4497 				s++;
4498 
4499 			const char_t* start = s;
4500 
4501 			for (;;)
4502 			{
4503 				if (static_cast<unsigned>(*s - '0') < 10)
4504 					result = result * 16 + (*s - '0');
4505 				else if (static_cast<unsigned>((*s | ' ') - 'a') < 6)
4506 					result = result * 16 + ((*s | ' ') - 'a' + 10);
4507 				else
4508 					break;
4509 
4510 				s++;
4511 			}
4512 
4513 			size_t digits = static_cast<size_t>(s - start);
4514 
4515 			overflow = digits > sizeof(U) * 2;
4516 		}
4517 		else
4518 		{
4519 			// since overflow detection relies on length of the sequence skip leading zeros
4520 			while (*s == '0')
4521 				s++;
4522 
4523 			const char_t* start = s;
4524 
4525 			for (;;)
4526 			{
4527 				if (static_cast<unsigned>(*s - '0') < 10)
4528 					result = result * 10 + (*s - '0');
4529 				else
4530 					break;
4531 
4532 				s++;
4533 			}
4534 
4535 			size_t digits = static_cast<size_t>(s - start);
4536 
4537 			PUGI__STATIC_ASSERT(sizeof(U) == 8 || sizeof(U) == 4 || sizeof(U) == 2);
4538 
4539 			const size_t max_digits10 = sizeof(U) == 8 ? 20 : sizeof(U) == 4 ? 10 : 5;
4540 			const char_t max_lead = sizeof(U) == 8 ? '1' : sizeof(U) == 4 ? '4' : '6';
4541 			const size_t high_bit = sizeof(U) * 8 - 1;
4542 
4543 			overflow = digits >= max_digits10 && !(digits == max_digits10 && (*start < max_lead || (*start == max_lead && result >> high_bit)));
4544 		}
4545 
4546 		if (negative)
4547 		{
4548 			// Workaround for crayc++ CC-3059: Expected no overflow in routine.
4549 		#ifdef _CRAYC
4550 			return (overflow || result > ~minv + 1) ? minv : ~result + 1;
4551 		#else
4552 			return (overflow || result > 0 - minv) ? minv : 0 - result;
4553 		#endif
4554 		}
4555 		else
4556 			return (overflow || result > maxv) ? maxv : result;
4557 	}
4558 
get_value_int(const char_t * value)4559 	PUGI__FN int get_value_int(const char_t* value)
4560 	{
4561 		return string_to_integer<unsigned int>(value, static_cast<unsigned int>(INT_MIN), INT_MAX);
4562 	}
4563 
get_value_uint(const char_t * value)4564 	PUGI__FN unsigned int get_value_uint(const char_t* value)
4565 	{
4566 		return string_to_integer<unsigned int>(value, 0, UINT_MAX);
4567 	}
4568 
get_value_double(const char_t * value)4569 	PUGI__FN double get_value_double(const char_t* value)
4570 	{
4571 	#ifdef PUGIXML_WCHAR_MODE
4572 		return wcstod(value, 0);
4573 	#else
4574 		return strtod(value, 0);
4575 	#endif
4576 	}
4577 
get_value_float(const char_t * value)4578 	PUGI__FN float get_value_float(const char_t* value)
4579 	{
4580 	#ifdef PUGIXML_WCHAR_MODE
4581 		return static_cast<float>(wcstod(value, 0));
4582 	#else
4583 		return static_cast<float>(strtod(value, 0));
4584 	#endif
4585 	}
4586 
get_value_bool(const char_t * value)4587 	PUGI__FN bool get_value_bool(const char_t* value)
4588 	{
4589 		// only look at first char
4590 		char_t first = *value;
4591 
4592 		// 1*, t* (true), T* (True), y* (yes), Y* (YES)
4593 		return (first == '1' || first == 't' || first == 'T' || first == 'y' || first == 'Y');
4594 	}
4595 
4596 #ifdef PUGIXML_HAS_LONG_LONG
get_value_llong(const char_t * value)4597 	PUGI__FN long long get_value_llong(const char_t* value)
4598 	{
4599 		return string_to_integer<unsigned long long>(value, static_cast<unsigned long long>(LLONG_MIN), LLONG_MAX);
4600 	}
4601 
get_value_ullong(const char_t * value)4602 	PUGI__FN unsigned long long get_value_ullong(const char_t* value)
4603 	{
4604 		return string_to_integer<unsigned long long>(value, 0, ULLONG_MAX);
4605 	}
4606 #endif
4607 
integer_to_string(char_t * begin,char_t * end,U value,bool negative)4608 	template <typename U> PUGI__FN PUGI__UNSIGNED_OVERFLOW char_t* integer_to_string(char_t* begin, char_t* end, U value, bool negative)
4609 	{
4610 		char_t* result = end - 1;
4611 		U rest = negative ? 0 - value : value;
4612 
4613 		do
4614 		{
4615 			*result-- = static_cast<char_t>('0' + (rest % 10));
4616 			rest /= 10;
4617 		}
4618 		while (rest);
4619 
4620 		assert(result >= begin);
4621 		(void)begin;
4622 
4623 		*result = '-';
4624 
4625 		return result + !negative;
4626 	}
4627 
4628 	// set value with conversion functions
4629 	template <typename String, typename Header>
set_value_ascii(String & dest,Header & header,uintptr_t header_mask,char * buf)4630 	PUGI__FN bool set_value_ascii(String& dest, Header& header, uintptr_t header_mask, char* buf)
4631 	{
4632 	#ifdef PUGIXML_WCHAR_MODE
4633 		char_t wbuf[128];
4634 		assert(strlen(buf) < sizeof(wbuf) / sizeof(wbuf[0]));
4635 
4636 		size_t offset = 0;
4637 		for (; buf[offset]; ++offset) wbuf[offset] = buf[offset];
4638 
4639 		return strcpy_insitu(dest, header, header_mask, wbuf, offset);
4640 	#else
4641 		return strcpy_insitu(dest, header, header_mask, buf, strlen(buf));
4642 	#endif
4643 	}
4644 
4645 	template <typename U, typename String, typename Header>
set_value_integer(String & dest,Header & header,uintptr_t header_mask,U value,bool negative)4646 	PUGI__FN bool set_value_integer(String& dest, Header& header, uintptr_t header_mask, U value, bool negative)
4647 	{
4648 		char_t buf[64];
4649 		char_t* end = buf + sizeof(buf) / sizeof(buf[0]);
4650 		char_t* begin = integer_to_string(buf, end, value, negative);
4651 
4652 		return strcpy_insitu(dest, header, header_mask, begin, end - begin);
4653 	}
4654 
4655 	template <typename String, typename Header>
set_value_convert(String & dest,Header & header,uintptr_t header_mask,float value)4656 	PUGI__FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, float value)
4657 	{
4658 		char buf[128];
4659 		PUGI__SNPRINTF(buf, "%.9g", value);
4660 
4661 		return set_value_ascii(dest, header, header_mask, buf);
4662 	}
4663 
4664 	template <typename String, typename Header>
set_value_convert(String & dest,Header & header,uintptr_t header_mask,double value)4665 	PUGI__FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, double value)
4666 	{
4667 		char buf[128];
4668 		PUGI__SNPRINTF(buf, "%.17g", value);
4669 
4670 		return set_value_ascii(dest, header, header_mask, buf);
4671 	}
4672 
4673 	template <typename String, typename Header>
set_value_bool(String & dest,Header & header,uintptr_t header_mask,bool value)4674 	PUGI__FN bool set_value_bool(String& dest, Header& header, uintptr_t header_mask, bool value)
4675 	{
4676 		return strcpy_insitu(dest, header, header_mask, value ? PUGIXML_TEXT("true") : PUGIXML_TEXT("false"), value ? 4 : 5);
4677 	}
4678 
load_buffer_impl(xml_document_struct * doc,xml_node_struct * root,void * contents,size_t size,unsigned int options,xml_encoding encoding,bool is_mutable,bool own,char_t ** out_buffer)4679 	PUGI__FN xml_parse_result load_buffer_impl(xml_document_struct* doc, xml_node_struct* root, void* contents, size_t size, unsigned int options, xml_encoding encoding, bool is_mutable, bool own, char_t** out_buffer)
4680 	{
4681 		// check input buffer
4682 		if (!contents && size) return make_parse_result(status_io_error);
4683 
4684 		// get actual encoding
4685 		xml_encoding buffer_encoding = impl::get_buffer_encoding(encoding, contents, size);
4686 
4687 		// get private buffer
4688 		char_t* buffer = 0;
4689 		size_t length = 0;
4690 
4691 		if (!impl::convert_buffer(buffer, length, buffer_encoding, contents, size, is_mutable)) return impl::make_parse_result(status_out_of_memory);
4692 
4693 		// delete original buffer if we performed a conversion
4694 		if (own && buffer != contents && contents) impl::xml_memory::deallocate(contents);
4695 
4696 		// grab onto buffer if it's our buffer, user is responsible for deallocating contents himself
4697 		if (own || buffer != contents) *out_buffer = buffer;
4698 
4699 		// store buffer for offset_debug
4700 		doc->buffer = buffer;
4701 
4702 		// parse
4703 		xml_parse_result res = impl::xml_parser::parse(buffer, length, doc, root, options);
4704 
4705 		// remember encoding
4706 		res.encoding = buffer_encoding;
4707 
4708 		return res;
4709 	}
4710 
4711 	// we need to get length of entire file to load it in memory; the only (relatively) sane way to do it is via seek/tell trick
get_file_size(FILE * file,size_t & out_result)4712 	PUGI__FN xml_parse_status get_file_size(FILE* file, size_t& out_result)
4713 	{
4714 	#if defined(PUGI__MSVC_CRT_VERSION) && PUGI__MSVC_CRT_VERSION >= 1400 && !defined(_WIN32_WCE)
4715 		// there are 64-bit versions of fseek/ftell, let's use them
4716 		typedef __int64 length_type;
4717 
4718 		_fseeki64(file, 0, SEEK_END);
4719 		length_type length = _ftelli64(file);
4720 		_fseeki64(file, 0, SEEK_SET);
4721 	#elif defined(__MINGW32__) && !defined(__NO_MINGW_LFS) && (!defined(__STRICT_ANSI__) || defined(__MINGW64_VERSION_MAJOR))
4722 		// there are 64-bit versions of fseek/ftell, let's use them
4723 		typedef off64_t length_type;
4724 
4725 		fseeko64(file, 0, SEEK_END);
4726 		length_type length = ftello64(file);
4727 		fseeko64(file, 0, SEEK_SET);
4728 	#else
4729 		// if this is a 32-bit OS, long is enough; if this is a unix system, long is 64-bit, which is enough; otherwise we can't do anything anyway.
4730 		typedef long length_type;
4731 
4732 		fseek(file, 0, SEEK_END);
4733 		length_type length = ftell(file);
4734 		fseek(file, 0, SEEK_SET);
4735 	#endif
4736 
4737 		// check for I/O errors
4738 		if (length < 0) return status_io_error;
4739 
4740 		// check for overflow
4741 		size_t result = static_cast<size_t>(length);
4742 
4743 		if (static_cast<length_type>(result) != length) return status_out_of_memory;
4744 
4745 		// finalize
4746 		out_result = result;
4747 
4748 		return status_ok;
4749 	}
4750 
4751 	// This function assumes that buffer has extra sizeof(char_t) writable bytes after size
zero_terminate_buffer(void * buffer,size_t size,xml_encoding encoding)4752 	PUGI__FN size_t zero_terminate_buffer(void* buffer, size_t size, xml_encoding encoding)
4753 	{
4754 		// We only need to zero-terminate if encoding conversion does not do it for us
4755 	#ifdef PUGIXML_WCHAR_MODE
4756 		xml_encoding wchar_encoding = get_wchar_encoding();
4757 
4758 		if (encoding == wchar_encoding || need_endian_swap_utf(encoding, wchar_encoding))
4759 		{
4760 			size_t length = size / sizeof(char_t);
4761 
4762 			static_cast<char_t*>(buffer)[length] = 0;
4763 			return (length + 1) * sizeof(char_t);
4764 		}
4765 	#else
4766 		if (encoding == encoding_utf8)
4767 		{
4768 			static_cast<char*>(buffer)[size] = 0;
4769 			return size + 1;
4770 		}
4771 	#endif
4772 
4773 		return size;
4774 	}
4775 
load_file_impl(xml_document_struct * doc,FILE * file,unsigned int options,xml_encoding encoding,char_t ** out_buffer)4776 	PUGI__FN xml_parse_result load_file_impl(xml_document_struct* doc, FILE* file, unsigned int options, xml_encoding encoding, char_t** out_buffer)
4777 	{
4778 		if (!file) return make_parse_result(status_file_not_found);
4779 
4780 		// get file size (can result in I/O errors)
4781 		size_t size = 0;
4782 		xml_parse_status size_status = get_file_size(file, size);
4783 		if (size_status != status_ok) return make_parse_result(size_status);
4784 
4785 		size_t max_suffix_size = sizeof(char_t);
4786 
4787 		// allocate buffer for the whole file
4788 		char* contents = static_cast<char*>(xml_memory::allocate(size + max_suffix_size));
4789 		if (!contents) return make_parse_result(status_out_of_memory);
4790 
4791 		// read file in memory
4792 		size_t read_size = fread(contents, 1, size, file);
4793 
4794 		if (read_size != size)
4795 		{
4796 			xml_memory::deallocate(contents);
4797 			return make_parse_result(status_io_error);
4798 		}
4799 
4800 		xml_encoding real_encoding = get_buffer_encoding(encoding, contents, size);
4801 
4802 		return load_buffer_impl(doc, doc, contents, zero_terminate_buffer(contents, size, real_encoding), options, real_encoding, true, true, out_buffer);
4803 	}
4804 
close_file(FILE * file)4805 	PUGI__FN void close_file(FILE* file)
4806 	{
4807 		fclose(file);
4808 	}
4809 
4810 #ifndef PUGIXML_NO_STL
4811 	template <typename T> struct xml_stream_chunk
4812 	{
createxml_stream_chunk4813 		static xml_stream_chunk* create()
4814 		{
4815 			void* memory = xml_memory::allocate(sizeof(xml_stream_chunk));
4816 			if (!memory) return 0;
4817 
4818 			return new (memory) xml_stream_chunk();
4819 		}
4820 
destroyxml_stream_chunk4821 		static void destroy(xml_stream_chunk* chunk)
4822 		{
4823 			// free chunk chain
4824 			while (chunk)
4825 			{
4826 				xml_stream_chunk* next_ = chunk->next;
4827 
4828 				xml_memory::deallocate(chunk);
4829 
4830 				chunk = next_;
4831 			}
4832 		}
4833 
xml_stream_chunkxml_stream_chunk4834 		xml_stream_chunk(): next(0), size(0)
4835 		{
4836 		}
4837 
4838 		xml_stream_chunk* next;
4839 		size_t size;
4840 
4841 		T data[xml_memory_page_size / sizeof(T)];
4842 	};
4843 
load_stream_data_noseek(std::basic_istream<T> & stream,void ** out_buffer,size_t * out_size)4844 	template <typename T> PUGI__FN xml_parse_status load_stream_data_noseek(std::basic_istream<T>& stream, void** out_buffer, size_t* out_size)
4845 	{
4846 		auto_deleter<xml_stream_chunk<T> > chunks(0, xml_stream_chunk<T>::destroy);
4847 
4848 		// read file to a chunk list
4849 		size_t total = 0;
4850 		xml_stream_chunk<T>* last = 0;
4851 
4852 		while (!stream.eof())
4853 		{
4854 			// allocate new chunk
4855 			xml_stream_chunk<T>* chunk = xml_stream_chunk<T>::create();
4856 			if (!chunk) return status_out_of_memory;
4857 
4858 			// append chunk to list
4859 			if (last) last = last->next = chunk;
4860 			else chunks.data = last = chunk;
4861 
4862 			// read data to chunk
4863 			stream.read(chunk->data, static_cast<std::streamsize>(sizeof(chunk->data) / sizeof(T)));
4864 			chunk->size = static_cast<size_t>(stream.gcount()) * sizeof(T);
4865 
4866 			// read may set failbit | eofbit in case gcount() is less than read length, so check for other I/O errors
4867 			if (stream.bad() || (!stream.eof() && stream.fail())) return status_io_error;
4868 
4869 			// guard against huge files (chunk size is small enough to make this overflow check work)
4870 			if (total + chunk->size < total) return status_out_of_memory;
4871 			total += chunk->size;
4872 		}
4873 
4874 		size_t max_suffix_size = sizeof(char_t);
4875 
4876 		// copy chunk list to a contiguous buffer
4877 		char* buffer = static_cast<char*>(xml_memory::allocate(total + max_suffix_size));
4878 		if (!buffer) return status_out_of_memory;
4879 
4880 		char* write = buffer;
4881 
4882 		for (xml_stream_chunk<T>* chunk = chunks.data; chunk; chunk = chunk->next)
4883 		{
4884 			assert(write + chunk->size <= buffer + total);
4885 			memcpy(write, chunk->data, chunk->size);
4886 			write += chunk->size;
4887 		}
4888 
4889 		assert(write == buffer + total);
4890 
4891 		// return buffer
4892 		*out_buffer = buffer;
4893 		*out_size = total;
4894 
4895 		return status_ok;
4896 	}
4897 
load_stream_data_seek(std::basic_istream<T> & stream,void ** out_buffer,size_t * out_size)4898 	template <typename T> PUGI__FN xml_parse_status load_stream_data_seek(std::basic_istream<T>& stream, void** out_buffer, size_t* out_size)
4899 	{
4900 		// get length of remaining data in stream
4901 		typename std::basic_istream<T>::pos_type pos = stream.tellg();
4902 		stream.seekg(0, std::ios::end);
4903 		std::streamoff length = stream.tellg() - pos;
4904 		stream.seekg(pos);
4905 
4906 		if (stream.fail() || pos < 0) return status_io_error;
4907 
4908 		// guard against huge files
4909 		size_t read_length = static_cast<size_t>(length);
4910 
4911 		if (static_cast<std::streamsize>(read_length) != length || length < 0) return status_out_of_memory;
4912 
4913 		size_t max_suffix_size = sizeof(char_t);
4914 
4915 		// read stream data into memory (guard against stream exceptions with buffer holder)
4916 		auto_deleter<void> buffer(xml_memory::allocate(read_length * sizeof(T) + max_suffix_size), xml_memory::deallocate);
4917 		if (!buffer.data) return status_out_of_memory;
4918 
4919 		stream.read(static_cast<T*>(buffer.data), static_cast<std::streamsize>(read_length));
4920 
4921 		// read may set failbit | eofbit in case gcount() is less than read_length (i.e. line ending conversion), so check for other I/O errors
4922 		if (stream.bad() || (!stream.eof() && stream.fail())) return status_io_error;
4923 
4924 		// return buffer
4925 		size_t actual_length = static_cast<size_t>(stream.gcount());
4926 		assert(actual_length <= read_length);
4927 
4928 		*out_buffer = buffer.release();
4929 		*out_size = actual_length * sizeof(T);
4930 
4931 		return status_ok;
4932 	}
4933 
load_stream_impl(xml_document_struct * doc,std::basic_istream<T> & stream,unsigned int options,xml_encoding encoding,char_t ** out_buffer)4934 	template <typename T> PUGI__FN xml_parse_result load_stream_impl(xml_document_struct* doc, std::basic_istream<T>& stream, unsigned int options, xml_encoding encoding, char_t** out_buffer)
4935 	{
4936 		void* buffer = 0;
4937 		size_t size = 0;
4938 		xml_parse_status status = status_ok;
4939 
4940 		// if stream has an error bit set, bail out (otherwise tellg() can fail and we'll clear error bits)
4941 		if (stream.fail()) return make_parse_result(status_io_error);
4942 
4943 		// load stream to memory (using seek-based implementation if possible, since it's faster and takes less memory)
4944 		if (stream.tellg() < 0)
4945 		{
4946 			stream.clear(); // clear error flags that could be set by a failing tellg
4947 			status = load_stream_data_noseek(stream, &buffer, &size);
4948 		}
4949 		else
4950 			status = load_stream_data_seek(stream, &buffer, &size);
4951 
4952 		if (status != status_ok) return make_parse_result(status);
4953 
4954 		xml_encoding real_encoding = get_buffer_encoding(encoding, buffer, size);
4955 
4956 		return load_buffer_impl(doc, doc, buffer, zero_terminate_buffer(buffer, size, real_encoding), options, real_encoding, true, true, out_buffer);
4957 	}
4958 #endif
4959 
4960 #if defined(PUGI__MSVC_CRT_VERSION) || defined(__BORLANDC__) || (defined(__MINGW32__) && (!defined(__STRICT_ANSI__) || defined(__MINGW64_VERSION_MAJOR)))
open_file_wide(const wchar_t * path,const wchar_t * mode)4961 	PUGI__FN FILE* open_file_wide(const wchar_t* path, const wchar_t* mode)
4962 	{
4963 		return _wfopen(path, mode);
4964 	}
4965 #else
convert_path_heap(const wchar_t * str)4966 	PUGI__FN char* convert_path_heap(const wchar_t* str)
4967 	{
4968 		assert(str);
4969 
4970 		// first pass: get length in utf8 characters
4971 		size_t length = strlength_wide(str);
4972 		size_t size = as_utf8_begin(str, length);
4973 
4974 		// allocate resulting string
4975 		char* result = static_cast<char*>(xml_memory::allocate(size + 1));
4976 		if (!result) return 0;
4977 
4978 		// second pass: convert to utf8
4979 		as_utf8_end(result, size, str, length);
4980 
4981 		// zero-terminate
4982 		result[size] = 0;
4983 
4984 		return result;
4985 	}
4986 
open_file_wide(const wchar_t * path,const wchar_t * mode)4987 	PUGI__FN FILE* open_file_wide(const wchar_t* path, const wchar_t* mode)
4988 	{
4989 		// there is no standard function to open wide paths, so our best bet is to try utf8 path
4990 		char* path_utf8 = convert_path_heap(path);
4991 		if (!path_utf8) return 0;
4992 
4993 		// convert mode to ASCII (we mirror _wfopen interface)
4994 		char mode_ascii[4] = {0};
4995 		for (size_t i = 0; mode[i]; ++i) mode_ascii[i] = static_cast<char>(mode[i]);
4996 
4997 		// try to open the utf8 path
4998 		FILE* result = fopen(path_utf8, mode_ascii);
4999 
5000 		// free dummy buffer
5001 		xml_memory::deallocate(path_utf8);
5002 
5003 		return result;
5004 	}
5005 #endif
5006 
save_file_impl(const xml_document & doc,FILE * file,const char_t * indent,unsigned int flags,xml_encoding encoding)5007 	PUGI__FN bool save_file_impl(const xml_document& doc, FILE* file, const char_t* indent, unsigned int flags, xml_encoding encoding)
5008 	{
5009 		if (!file) return false;
5010 
5011 		xml_writer_file writer(file);
5012 		doc.save(writer, indent, flags, encoding);
5013 
5014 		return ferror(file) == 0;
5015 	}
5016 
5017 	struct name_null_sentry
5018 	{
5019 		xml_node_struct* node;
5020 		char_t* name;
5021 
name_null_sentryname_null_sentry5022 		name_null_sentry(xml_node_struct* node_): node(node_), name(node_->name)
5023 		{
5024 			node->name = 0;
5025 		}
5026 
~name_null_sentryname_null_sentry5027 		~name_null_sentry()
5028 		{
5029 			node->name = name;
5030 		}
5031 	};
5032 PUGI__NS_END
5033 
5034 namespace pugi
5035 {
xml_writer_file(void * file_)5036 	PUGI__FN xml_writer_file::xml_writer_file(void* file_): file(file_)
5037 	{
5038 	}
5039 
write(const void * data,size_t size)5040 	PUGI__FN void xml_writer_file::write(const void* data, size_t size)
5041 	{
5042 		size_t result = fwrite(data, 1, size, static_cast<FILE*>(file));
5043 		(void)!result; // unfortunately we can't do proper error handling here
5044 	}
5045 
5046 #ifndef PUGIXML_NO_STL
xml_writer_stream(std::basic_ostream<char,std::char_traits<char>> & stream)5047 	PUGI__FN xml_writer_stream::xml_writer_stream(std::basic_ostream<char, std::char_traits<char> >& stream): narrow_stream(&stream), wide_stream(0)
5048 	{
5049 	}
5050 
xml_writer_stream(std::basic_ostream<wchar_t,std::char_traits<wchar_t>> & stream)5051 	PUGI__FN xml_writer_stream::xml_writer_stream(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream): narrow_stream(0), wide_stream(&stream)
5052 	{
5053 	}
5054 
write(const void * data,size_t size)5055 	PUGI__FN void xml_writer_stream::write(const void* data, size_t size)
5056 	{
5057 		if (narrow_stream)
5058 		{
5059 			assert(!wide_stream);
5060 			narrow_stream->write(reinterpret_cast<const char*>(data), static_cast<std::streamsize>(size));
5061 		}
5062 		else
5063 		{
5064 			assert(wide_stream);
5065 			assert(size % sizeof(wchar_t) == 0);
5066 
5067 			wide_stream->write(reinterpret_cast<const wchar_t*>(data), static_cast<std::streamsize>(size / sizeof(wchar_t)));
5068 		}
5069 	}
5070 #endif
5071 
xml_tree_walker()5072 	PUGI__FN xml_tree_walker::xml_tree_walker(): _depth(0)
5073 	{
5074 	}
5075 
~xml_tree_walker()5076 	PUGI__FN xml_tree_walker::~xml_tree_walker()
5077 	{
5078 	}
5079 
depth() const5080 	PUGI__FN int xml_tree_walker::depth() const
5081 	{
5082 		return _depth;
5083 	}
5084 
begin(xml_node &)5085 	PUGI__FN bool xml_tree_walker::begin(xml_node&)
5086 	{
5087 		return true;
5088 	}
5089 
end(xml_node &)5090 	PUGI__FN bool xml_tree_walker::end(xml_node&)
5091 	{
5092 		return true;
5093 	}
5094 
xml_attribute()5095 	PUGI__FN xml_attribute::xml_attribute(): _attr(0)
5096 	{
5097 	}
5098 
xml_attribute(xml_attribute_struct * attr)5099 	PUGI__FN xml_attribute::xml_attribute(xml_attribute_struct* attr): _attr(attr)
5100 	{
5101 	}
5102 
unspecified_bool_xml_attribute(xml_attribute ***)5103 	PUGI__FN static void unspecified_bool_xml_attribute(xml_attribute***)
5104 	{
5105 	}
5106 
operator xml_attribute::unspecified_bool_type() const5107 	PUGI__FN xml_attribute::operator xml_attribute::unspecified_bool_type() const
5108 	{
5109 		return _attr ? unspecified_bool_xml_attribute : 0;
5110 	}
5111 
operator !() const5112 	PUGI__FN bool xml_attribute::operator!() const
5113 	{
5114 		return !_attr;
5115 	}
5116 
operator ==(const xml_attribute & r) const5117 	PUGI__FN bool xml_attribute::operator==(const xml_attribute& r) const
5118 	{
5119 		return (_attr == r._attr);
5120 	}
5121 
operator !=(const xml_attribute & r) const5122 	PUGI__FN bool xml_attribute::operator!=(const xml_attribute& r) const
5123 	{
5124 		return (_attr != r._attr);
5125 	}
5126 
operator <(const xml_attribute & r) const5127 	PUGI__FN bool xml_attribute::operator<(const xml_attribute& r) const
5128 	{
5129 		return (_attr < r._attr);
5130 	}
5131 
operator >(const xml_attribute & r) const5132 	PUGI__FN bool xml_attribute::operator>(const xml_attribute& r) const
5133 	{
5134 		return (_attr > r._attr);
5135 	}
5136 
operator <=(const xml_attribute & r) const5137 	PUGI__FN bool xml_attribute::operator<=(const xml_attribute& r) const
5138 	{
5139 		return (_attr <= r._attr);
5140 	}
5141 
operator >=(const xml_attribute & r) const5142 	PUGI__FN bool xml_attribute::operator>=(const xml_attribute& r) const
5143 	{
5144 		return (_attr >= r._attr);
5145 	}
5146 
next_attribute() const5147 	PUGI__FN xml_attribute xml_attribute::next_attribute() const
5148 	{
5149 		return _attr ? xml_attribute(_attr->next_attribute) : xml_attribute();
5150 	}
5151 
previous_attribute() const5152 	PUGI__FN xml_attribute xml_attribute::previous_attribute() const
5153 	{
5154 		return _attr && _attr->prev_attribute_c->next_attribute ? xml_attribute(_attr->prev_attribute_c) : xml_attribute();
5155 	}
5156 
as_string(const char_t * def) const5157 	PUGI__FN const char_t* xml_attribute::as_string(const char_t* def) const
5158 	{
5159 		return (_attr && _attr->value) ? _attr->value + 0 : def;
5160 	}
5161 
as_int(int def) const5162 	PUGI__FN int xml_attribute::as_int(int def) const
5163 	{
5164 		return (_attr && _attr->value) ? impl::get_value_int(_attr->value) : def;
5165 	}
5166 
as_uint(unsigned int def) const5167 	PUGI__FN unsigned int xml_attribute::as_uint(unsigned int def) const
5168 	{
5169 		return (_attr && _attr->value) ? impl::get_value_uint(_attr->value) : def;
5170 	}
5171 
as_double(double def) const5172 	PUGI__FN double xml_attribute::as_double(double def) const
5173 	{
5174 		return (_attr && _attr->value) ? impl::get_value_double(_attr->value) : def;
5175 	}
5176 
as_float(float def) const5177 	PUGI__FN float xml_attribute::as_float(float def) const
5178 	{
5179 		return (_attr && _attr->value) ? impl::get_value_float(_attr->value) : def;
5180 	}
5181 
as_bool(bool def) const5182 	PUGI__FN bool xml_attribute::as_bool(bool def) const
5183 	{
5184 		return (_attr && _attr->value) ? impl::get_value_bool(_attr->value) : def;
5185 	}
5186 
5187 #ifdef PUGIXML_HAS_LONG_LONG
as_llong(long long def) const5188 	PUGI__FN long long xml_attribute::as_llong(long long def) const
5189 	{
5190 		return (_attr && _attr->value) ? impl::get_value_llong(_attr->value) : def;
5191 	}
5192 
as_ullong(unsigned long long def) const5193 	PUGI__FN unsigned long long xml_attribute::as_ullong(unsigned long long def) const
5194 	{
5195 		return (_attr && _attr->value) ? impl::get_value_ullong(_attr->value) : def;
5196 	}
5197 #endif
5198 
empty() const5199 	PUGI__FN bool xml_attribute::empty() const
5200 	{
5201 		return !_attr;
5202 	}
5203 
name() const5204 	PUGI__FN const char_t* xml_attribute::name() const
5205 	{
5206 		return (_attr && _attr->name) ? _attr->name + 0 : PUGIXML_TEXT("");
5207 	}
5208 
value() const5209 	PUGI__FN const char_t* xml_attribute::value() const
5210 	{
5211 		return (_attr && _attr->value) ? _attr->value + 0 : PUGIXML_TEXT("");
5212 	}
5213 
hash_value() const5214 	PUGI__FN size_t xml_attribute::hash_value() const
5215 	{
5216 		return static_cast<size_t>(reinterpret_cast<uintptr_t>(_attr) / sizeof(xml_attribute_struct));
5217 	}
5218 
internal_object() const5219 	PUGI__FN xml_attribute_struct* xml_attribute::internal_object() const
5220 	{
5221 		return _attr;
5222 	}
5223 
operator =(const char_t * rhs)5224 	PUGI__FN xml_attribute& xml_attribute::operator=(const char_t* rhs)
5225 	{
5226 		set_value(rhs);
5227 		return *this;
5228 	}
5229 
operator =(int rhs)5230 	PUGI__FN xml_attribute& xml_attribute::operator=(int rhs)
5231 	{
5232 		set_value(rhs);
5233 		return *this;
5234 	}
5235 
operator =(unsigned int rhs)5236 	PUGI__FN xml_attribute& xml_attribute::operator=(unsigned int rhs)
5237 	{
5238 		set_value(rhs);
5239 		return *this;
5240 	}
5241 
operator =(long rhs)5242 	PUGI__FN xml_attribute& xml_attribute::operator=(long rhs)
5243 	{
5244 		set_value(rhs);
5245 		return *this;
5246 	}
5247 
operator =(unsigned long rhs)5248 	PUGI__FN xml_attribute& xml_attribute::operator=(unsigned long rhs)
5249 	{
5250 		set_value(rhs);
5251 		return *this;
5252 	}
5253 
operator =(double rhs)5254 	PUGI__FN xml_attribute& xml_attribute::operator=(double rhs)
5255 	{
5256 		set_value(rhs);
5257 		return *this;
5258 	}
5259 
operator =(float rhs)5260 	PUGI__FN xml_attribute& xml_attribute::operator=(float rhs)
5261 	{
5262 		set_value(rhs);
5263 		return *this;
5264 	}
5265 
operator =(bool rhs)5266 	PUGI__FN xml_attribute& xml_attribute::operator=(bool rhs)
5267 	{
5268 		set_value(rhs);
5269 		return *this;
5270 	}
5271 
5272 #ifdef PUGIXML_HAS_LONG_LONG
operator =(long long rhs)5273 	PUGI__FN xml_attribute& xml_attribute::operator=(long long rhs)
5274 	{
5275 		set_value(rhs);
5276 		return *this;
5277 	}
5278 
operator =(unsigned long long rhs)5279 	PUGI__FN xml_attribute& xml_attribute::operator=(unsigned long long rhs)
5280 	{
5281 		set_value(rhs);
5282 		return *this;
5283 	}
5284 #endif
5285 
set_name(const char_t * rhs)5286 	PUGI__FN bool xml_attribute::set_name(const char_t* rhs)
5287 	{
5288 		if (!_attr) return false;
5289 
5290 		return impl::strcpy_insitu(_attr->name, _attr->header, impl::xml_memory_page_name_allocated_mask, rhs, impl::strlength(rhs));
5291 	}
5292 
set_value(const char_t * rhs)5293 	PUGI__FN bool xml_attribute::set_value(const char_t* rhs)
5294 	{
5295 		if (!_attr) return false;
5296 
5297 		return impl::strcpy_insitu(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, impl::strlength(rhs));
5298 	}
5299 
set_value(int rhs)5300 	PUGI__FN bool xml_attribute::set_value(int rhs)
5301 	{
5302 		if (!_attr) return false;
5303 
5304 		return impl::set_value_integer<unsigned int>(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0);
5305 	}
5306 
set_value(unsigned int rhs)5307 	PUGI__FN bool xml_attribute::set_value(unsigned int rhs)
5308 	{
5309 		if (!_attr) return false;
5310 
5311 		return impl::set_value_integer<unsigned int>(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, false);
5312 	}
5313 
set_value(long rhs)5314 	PUGI__FN bool xml_attribute::set_value(long rhs)
5315 	{
5316 		if (!_attr) return false;
5317 
5318 		return impl::set_value_integer<unsigned long>(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0);
5319 	}
5320 
set_value(unsigned long rhs)5321 	PUGI__FN bool xml_attribute::set_value(unsigned long rhs)
5322 	{
5323 		if (!_attr) return false;
5324 
5325 		return impl::set_value_integer<unsigned long>(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, false);
5326 	}
5327 
set_value(double rhs)5328 	PUGI__FN bool xml_attribute::set_value(double rhs)
5329 	{
5330 		if (!_attr) return false;
5331 
5332 		return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);
5333 	}
5334 
set_value(float rhs)5335 	PUGI__FN bool xml_attribute::set_value(float rhs)
5336 	{
5337 		if (!_attr) return false;
5338 
5339 		return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);
5340 	}
5341 
set_value(bool rhs)5342 	PUGI__FN bool xml_attribute::set_value(bool rhs)
5343 	{
5344 		if (!_attr) return false;
5345 
5346 		return impl::set_value_bool(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);
5347 	}
5348 
5349 #ifdef PUGIXML_HAS_LONG_LONG
set_value(long long rhs)5350 	PUGI__FN bool xml_attribute::set_value(long long rhs)
5351 	{
5352 		if (!_attr) return false;
5353 
5354 		return impl::set_value_integer<unsigned long long>(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0);
5355 	}
5356 
set_value(unsigned long long rhs)5357 	PUGI__FN bool xml_attribute::set_value(unsigned long long rhs)
5358 	{
5359 		if (!_attr) return false;
5360 
5361 		return impl::set_value_integer<unsigned long long>(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, false);
5362 	}
5363 #endif
5364 
5365 #ifdef __BORLANDC__
operator &&(const xml_attribute & lhs,bool rhs)5366 	PUGI__FN bool operator&&(const xml_attribute& lhs, bool rhs)
5367 	{
5368 		return (bool)lhs && rhs;
5369 	}
5370 
operator ||(const xml_attribute & lhs,bool rhs)5371 	PUGI__FN bool operator||(const xml_attribute& lhs, bool rhs)
5372 	{
5373 		return (bool)lhs || rhs;
5374 	}
5375 #endif
5376 
xml_node()5377 	PUGI__FN xml_node::xml_node(): _root(0)
5378 	{
5379 	}
5380 
xml_node(xml_node_struct * p)5381 	PUGI__FN xml_node::xml_node(xml_node_struct* p): _root(p)
5382 	{
5383 	}
5384 
unspecified_bool_xml_node(xml_node ***)5385 	PUGI__FN static void unspecified_bool_xml_node(xml_node***)
5386 	{
5387 	}
5388 
operator xml_node::unspecified_bool_type() const5389 	PUGI__FN xml_node::operator xml_node::unspecified_bool_type() const
5390 	{
5391 		return _root ? unspecified_bool_xml_node : 0;
5392 	}
5393 
operator !() const5394 	PUGI__FN bool xml_node::operator!() const
5395 	{
5396 		return !_root;
5397 	}
5398 
begin() const5399 	PUGI__FN xml_node::iterator xml_node::begin() const
5400 	{
5401 		return iterator(_root ? _root->first_child + 0 : 0, _root);
5402 	}
5403 
end() const5404 	PUGI__FN xml_node::iterator xml_node::end() const
5405 	{
5406 		return iterator(0, _root);
5407 	}
5408 
attributes_begin() const5409 	PUGI__FN xml_node::attribute_iterator xml_node::attributes_begin() const
5410 	{
5411 		return attribute_iterator(_root ? _root->first_attribute + 0 : 0, _root);
5412 	}
5413 
attributes_end() const5414 	PUGI__FN xml_node::attribute_iterator xml_node::attributes_end() const
5415 	{
5416 		return attribute_iterator(0, _root);
5417 	}
5418 
children() const5419 	PUGI__FN xml_object_range<xml_node_iterator> xml_node::children() const
5420 	{
5421 		return xml_object_range<xml_node_iterator>(begin(), end());
5422 	}
5423 
children(const char_t * name_) const5424 	PUGI__FN xml_object_range<xml_named_node_iterator> xml_node::children(const char_t* name_) const
5425 	{
5426 		return xml_object_range<xml_named_node_iterator>(xml_named_node_iterator(child(name_)._root, _root, name_), xml_named_node_iterator(0, _root, name_));
5427 	}
5428 
attributes() const5429 	PUGI__FN xml_object_range<xml_attribute_iterator> xml_node::attributes() const
5430 	{
5431 		return xml_object_range<xml_attribute_iterator>(attributes_begin(), attributes_end());
5432 	}
5433 
operator ==(const xml_node & r) const5434 	PUGI__FN bool xml_node::operator==(const xml_node& r) const
5435 	{
5436 		return (_root == r._root);
5437 	}
5438 
operator !=(const xml_node & r) const5439 	PUGI__FN bool xml_node::operator!=(const xml_node& r) const
5440 	{
5441 		return (_root != r._root);
5442 	}
5443 
operator <(const xml_node & r) const5444 	PUGI__FN bool xml_node::operator<(const xml_node& r) const
5445 	{
5446 		return (_root < r._root);
5447 	}
5448 
operator >(const xml_node & r) const5449 	PUGI__FN bool xml_node::operator>(const xml_node& r) const
5450 	{
5451 		return (_root > r._root);
5452 	}
5453 
operator <=(const xml_node & r) const5454 	PUGI__FN bool xml_node::operator<=(const xml_node& r) const
5455 	{
5456 		return (_root <= r._root);
5457 	}
5458 
operator >=(const xml_node & r) const5459 	PUGI__FN bool xml_node::operator>=(const xml_node& r) const
5460 	{
5461 		return (_root >= r._root);
5462 	}
5463 
empty() const5464 	PUGI__FN bool xml_node::empty() const
5465 	{
5466 		return !_root;
5467 	}
5468 
name() const5469 	PUGI__FN const char_t* xml_node::name() const
5470 	{
5471 		return (_root && _root->name) ? _root->name + 0 : PUGIXML_TEXT("");
5472 	}
5473 
type() const5474 	PUGI__FN xml_node_type xml_node::type() const
5475 	{
5476 		return _root ? PUGI__NODETYPE(_root) : node_null;
5477 	}
5478 
value() const5479 	PUGI__FN const char_t* xml_node::value() const
5480 	{
5481 		return (_root && _root->value) ? _root->value + 0 : PUGIXML_TEXT("");
5482 	}
5483 
child(const char_t * name_) const5484 	PUGI__FN xml_node xml_node::child(const char_t* name_) const
5485 	{
5486 		if (!_root) return xml_node();
5487 
5488 		for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
5489 			if (i->name && impl::strequal(name_, i->name)) return xml_node(i);
5490 
5491 		return xml_node();
5492 	}
5493 
attribute(const char_t * name_) const5494 	PUGI__FN xml_attribute xml_node::attribute(const char_t* name_) const
5495 	{
5496 		if (!_root) return xml_attribute();
5497 
5498 		for (xml_attribute_struct* i = _root->first_attribute; i; i = i->next_attribute)
5499 			if (i->name && impl::strequal(name_, i->name))
5500 				return xml_attribute(i);
5501 
5502 		return xml_attribute();
5503 	}
5504 
next_sibling(const char_t * name_) const5505 	PUGI__FN xml_node xml_node::next_sibling(const char_t* name_) const
5506 	{
5507 		if (!_root) return xml_node();
5508 
5509 		for (xml_node_struct* i = _root->next_sibling; i; i = i->next_sibling)
5510 			if (i->name && impl::strequal(name_, i->name)) return xml_node(i);
5511 
5512 		return xml_node();
5513 	}
5514 
next_sibling() const5515 	PUGI__FN xml_node xml_node::next_sibling() const
5516 	{
5517 		return _root ? xml_node(_root->next_sibling) : xml_node();
5518 	}
5519 
previous_sibling(const char_t * name_) const5520 	PUGI__FN xml_node xml_node::previous_sibling(const char_t* name_) const
5521 	{
5522 		if (!_root) return xml_node();
5523 
5524 		for (xml_node_struct* i = _root->prev_sibling_c; i->next_sibling; i = i->prev_sibling_c)
5525 			if (i->name && impl::strequal(name_, i->name)) return xml_node(i);
5526 
5527 		return xml_node();
5528 	}
5529 
attribute(const char_t * name_,xml_attribute & hint_) const5530 	PUGI__FN xml_attribute xml_node::attribute(const char_t* name_, xml_attribute& hint_) const
5531 	{
5532 		xml_attribute_struct* hint = hint_._attr;
5533 
5534 		// if hint is not an attribute of node, behavior is not defined
5535 		assert(!hint || (_root && impl::is_attribute_of(hint, _root)));
5536 
5537 		if (!_root) return xml_attribute();
5538 
5539 		// optimistically search from hint up until the end
5540 		for (xml_attribute_struct* i = hint; i; i = i->next_attribute)
5541 			if (i->name && impl::strequal(name_, i->name))
5542 			{
5543 				// update hint to maximize efficiency of searching for consecutive attributes
5544 				hint_._attr = i->next_attribute;
5545 
5546 				return xml_attribute(i);
5547 			}
5548 
5549 		// wrap around and search from the first attribute until the hint
5550 		// 'j' null pointer check is technically redundant, but it prevents a crash in case the assertion above fails
5551 		for (xml_attribute_struct* j = _root->first_attribute; j && j != hint; j = j->next_attribute)
5552 			if (j->name && impl::strequal(name_, j->name))
5553 			{
5554 				// update hint to maximize efficiency of searching for consecutive attributes
5555 				hint_._attr = j->next_attribute;
5556 
5557 				return xml_attribute(j);
5558 			}
5559 
5560 		return xml_attribute();
5561 	}
5562 
previous_sibling() const5563 	PUGI__FN xml_node xml_node::previous_sibling() const
5564 	{
5565 		if (!_root) return xml_node();
5566 
5567 		if (_root->prev_sibling_c->next_sibling) return xml_node(_root->prev_sibling_c);
5568 		else return xml_node();
5569 	}
5570 
parent() const5571 	PUGI__FN xml_node xml_node::parent() const
5572 	{
5573 		return _root ? xml_node(_root->parent) : xml_node();
5574 	}
5575 
root() const5576 	PUGI__FN xml_node xml_node::root() const
5577 	{
5578 		return _root ? xml_node(&impl::get_document(_root)) : xml_node();
5579 	}
5580 
text() const5581 	PUGI__FN xml_text xml_node::text() const
5582 	{
5583 		return xml_text(_root);
5584 	}
5585 
child_value() const5586 	PUGI__FN const char_t* xml_node::child_value() const
5587 	{
5588 		if (!_root) return PUGIXML_TEXT("");
5589 
5590 		// element nodes can have value if parse_embed_pcdata was used
5591 		if (PUGI__NODETYPE(_root) == node_element && _root->value)
5592 			return _root->value;
5593 
5594 		for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
5595 			if (impl::is_text_node(i) && i->value)
5596 				return i->value;
5597 
5598 		return PUGIXML_TEXT("");
5599 	}
5600 
child_value(const char_t * name_) const5601 	PUGI__FN const char_t* xml_node::child_value(const char_t* name_) const
5602 	{
5603 		return child(name_).child_value();
5604 	}
5605 
first_attribute() const5606 	PUGI__FN xml_attribute xml_node::first_attribute() const
5607 	{
5608 		return _root ? xml_attribute(_root->first_attribute) : xml_attribute();
5609 	}
5610 
last_attribute() const5611 	PUGI__FN xml_attribute xml_node::last_attribute() const
5612 	{
5613 		return _root && _root->first_attribute ? xml_attribute(_root->first_attribute->prev_attribute_c) : xml_attribute();
5614 	}
5615 
first_child() const5616 	PUGI__FN xml_node xml_node::first_child() const
5617 	{
5618 		return _root ? xml_node(_root->first_child) : xml_node();
5619 	}
5620 
last_child() const5621 	PUGI__FN xml_node xml_node::last_child() const
5622 	{
5623 		return _root && _root->first_child ? xml_node(_root->first_child->prev_sibling_c) : xml_node();
5624 	}
5625 
set_name(const char_t * rhs)5626 	PUGI__FN bool xml_node::set_name(const char_t* rhs)
5627 	{
5628 		xml_node_type type_ = _root ? PUGI__NODETYPE(_root) : node_null;
5629 
5630 		if (type_ != node_element && type_ != node_pi && type_ != node_declaration)
5631 			return false;
5632 
5633 		return impl::strcpy_insitu(_root->name, _root->header, impl::xml_memory_page_name_allocated_mask, rhs, impl::strlength(rhs));
5634 	}
5635 
set_value(const char_t * rhs)5636 	PUGI__FN bool xml_node::set_value(const char_t* rhs)
5637 	{
5638 		xml_node_type type_ = _root ? PUGI__NODETYPE(_root) : node_null;
5639 
5640 		if (type_ != node_pcdata && type_ != node_cdata && type_ != node_comment && type_ != node_pi && type_ != node_doctype)
5641 			return false;
5642 
5643 		return impl::strcpy_insitu(_root->value, _root->header, impl::xml_memory_page_value_allocated_mask, rhs, impl::strlength(rhs));
5644 	}
5645 
append_attribute(const char_t * name_)5646 	PUGI__FN xml_attribute xml_node::append_attribute(const char_t* name_)
5647 	{
5648 		if (!impl::allow_insert_attribute(type())) return xml_attribute();
5649 
5650 		impl::xml_allocator& alloc = impl::get_allocator(_root);
5651 		if (!alloc.reserve()) return xml_attribute();
5652 
5653 		xml_attribute a(impl::allocate_attribute(alloc));
5654 		if (!a) return xml_attribute();
5655 
5656 		impl::append_attribute(a._attr, _root);
5657 
5658 		a.set_name(name_);
5659 
5660 		return a;
5661 	}
5662 
prepend_attribute(const char_t * name_)5663 	PUGI__FN xml_attribute xml_node::prepend_attribute(const char_t* name_)
5664 	{
5665 		if (!impl::allow_insert_attribute(type())) return xml_attribute();
5666 
5667 		impl::xml_allocator& alloc = impl::get_allocator(_root);
5668 		if (!alloc.reserve()) return xml_attribute();
5669 
5670 		xml_attribute a(impl::allocate_attribute(alloc));
5671 		if (!a) return xml_attribute();
5672 
5673 		impl::prepend_attribute(a._attr, _root);
5674 
5675 		a.set_name(name_);
5676 
5677 		return a;
5678 	}
5679 
insert_attribute_after(const char_t * name_,const xml_attribute & attr)5680 	PUGI__FN xml_attribute xml_node::insert_attribute_after(const char_t* name_, const xml_attribute& attr)
5681 	{
5682 		if (!impl::allow_insert_attribute(type())) return xml_attribute();
5683 		if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute();
5684 
5685 		impl::xml_allocator& alloc = impl::get_allocator(_root);
5686 		if (!alloc.reserve()) return xml_attribute();
5687 
5688 		xml_attribute a(impl::allocate_attribute(alloc));
5689 		if (!a) return xml_attribute();
5690 
5691 		impl::insert_attribute_after(a._attr, attr._attr, _root);
5692 
5693 		a.set_name(name_);
5694 
5695 		return a;
5696 	}
5697 
insert_attribute_before(const char_t * name_,const xml_attribute & attr)5698 	PUGI__FN xml_attribute xml_node::insert_attribute_before(const char_t* name_, const xml_attribute& attr)
5699 	{
5700 		if (!impl::allow_insert_attribute(type())) return xml_attribute();
5701 		if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute();
5702 
5703 		impl::xml_allocator& alloc = impl::get_allocator(_root);
5704 		if (!alloc.reserve()) return xml_attribute();
5705 
5706 		xml_attribute a(impl::allocate_attribute(alloc));
5707 		if (!a) return xml_attribute();
5708 
5709 		impl::insert_attribute_before(a._attr, attr._attr, _root);
5710 
5711 		a.set_name(name_);
5712 
5713 		return a;
5714 	}
5715 
append_copy(const xml_attribute & proto)5716 	PUGI__FN xml_attribute xml_node::append_copy(const xml_attribute& proto)
5717 	{
5718 		if (!proto) return xml_attribute();
5719 		if (!impl::allow_insert_attribute(type())) return xml_attribute();
5720 
5721 		impl::xml_allocator& alloc = impl::get_allocator(_root);
5722 		if (!alloc.reserve()) return xml_attribute();
5723 
5724 		xml_attribute a(impl::allocate_attribute(alloc));
5725 		if (!a) return xml_attribute();
5726 
5727 		impl::append_attribute(a._attr, _root);
5728 		impl::node_copy_attribute(a._attr, proto._attr);
5729 
5730 		return a;
5731 	}
5732 
prepend_copy(const xml_attribute & proto)5733 	PUGI__FN xml_attribute xml_node::prepend_copy(const xml_attribute& proto)
5734 	{
5735 		if (!proto) return xml_attribute();
5736 		if (!impl::allow_insert_attribute(type())) return xml_attribute();
5737 
5738 		impl::xml_allocator& alloc = impl::get_allocator(_root);
5739 		if (!alloc.reserve()) return xml_attribute();
5740 
5741 		xml_attribute a(impl::allocate_attribute(alloc));
5742 		if (!a) return xml_attribute();
5743 
5744 		impl::prepend_attribute(a._attr, _root);
5745 		impl::node_copy_attribute(a._attr, proto._attr);
5746 
5747 		return a;
5748 	}
5749 
insert_copy_after(const xml_attribute & proto,const xml_attribute & attr)5750 	PUGI__FN xml_attribute xml_node::insert_copy_after(const xml_attribute& proto, const xml_attribute& attr)
5751 	{
5752 		if (!proto) return xml_attribute();
5753 		if (!impl::allow_insert_attribute(type())) return xml_attribute();
5754 		if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute();
5755 
5756 		impl::xml_allocator& alloc = impl::get_allocator(_root);
5757 		if (!alloc.reserve()) return xml_attribute();
5758 
5759 		xml_attribute a(impl::allocate_attribute(alloc));
5760 		if (!a) return xml_attribute();
5761 
5762 		impl::insert_attribute_after(a._attr, attr._attr, _root);
5763 		impl::node_copy_attribute(a._attr, proto._attr);
5764 
5765 		return a;
5766 	}
5767 
insert_copy_before(const xml_attribute & proto,const xml_attribute & attr)5768 	PUGI__FN xml_attribute xml_node::insert_copy_before(const xml_attribute& proto, const xml_attribute& attr)
5769 	{
5770 		if (!proto) return xml_attribute();
5771 		if (!impl::allow_insert_attribute(type())) return xml_attribute();
5772 		if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute();
5773 
5774 		impl::xml_allocator& alloc = impl::get_allocator(_root);
5775 		if (!alloc.reserve()) return xml_attribute();
5776 
5777 		xml_attribute a(impl::allocate_attribute(alloc));
5778 		if (!a) return xml_attribute();
5779 
5780 		impl::insert_attribute_before(a._attr, attr._attr, _root);
5781 		impl::node_copy_attribute(a._attr, proto._attr);
5782 
5783 		return a;
5784 	}
5785 
append_child(xml_node_type type_)5786 	PUGI__FN xml_node xml_node::append_child(xml_node_type type_)
5787 	{
5788 		if (!impl::allow_insert_child(type(), type_)) return xml_node();
5789 
5790 		impl::xml_allocator& alloc = impl::get_allocator(_root);
5791 		if (!alloc.reserve()) return xml_node();
5792 
5793 		xml_node n(impl::allocate_node(alloc, type_));
5794 		if (!n) return xml_node();
5795 
5796 		impl::append_node(n._root, _root);
5797 
5798 		if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml"));
5799 
5800 		return n;
5801 	}
5802 
prepend_child(xml_node_type type_)5803 	PUGI__FN xml_node xml_node::prepend_child(xml_node_type type_)
5804 	{
5805 		if (!impl::allow_insert_child(type(), type_)) return xml_node();
5806 
5807 		impl::xml_allocator& alloc = impl::get_allocator(_root);
5808 		if (!alloc.reserve()) return xml_node();
5809 
5810 		xml_node n(impl::allocate_node(alloc, type_));
5811 		if (!n) return xml_node();
5812 
5813 		impl::prepend_node(n._root, _root);
5814 
5815 		if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml"));
5816 
5817 		return n;
5818 	}
5819 
insert_child_before(xml_node_type type_,const xml_node & node)5820 	PUGI__FN xml_node xml_node::insert_child_before(xml_node_type type_, const xml_node& node)
5821 	{
5822 		if (!impl::allow_insert_child(type(), type_)) return xml_node();
5823 		if (!node._root || node._root->parent != _root) return xml_node();
5824 
5825 		impl::xml_allocator& alloc = impl::get_allocator(_root);
5826 		if (!alloc.reserve()) return xml_node();
5827 
5828 		xml_node n(impl::allocate_node(alloc, type_));
5829 		if (!n) return xml_node();
5830 
5831 		impl::insert_node_before(n._root, node._root);
5832 
5833 		if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml"));
5834 
5835 		return n;
5836 	}
5837 
insert_child_after(xml_node_type type_,const xml_node & node)5838 	PUGI__FN xml_node xml_node::insert_child_after(xml_node_type type_, const xml_node& node)
5839 	{
5840 		if (!impl::allow_insert_child(type(), type_)) return xml_node();
5841 		if (!node._root || node._root->parent != _root) return xml_node();
5842 
5843 		impl::xml_allocator& alloc = impl::get_allocator(_root);
5844 		if (!alloc.reserve()) return xml_node();
5845 
5846 		xml_node n(impl::allocate_node(alloc, type_));
5847 		if (!n) return xml_node();
5848 
5849 		impl::insert_node_after(n._root, node._root);
5850 
5851 		if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml"));
5852 
5853 		return n;
5854 	}
5855 
append_child(const char_t * name_)5856 	PUGI__FN xml_node xml_node::append_child(const char_t* name_)
5857 	{
5858 		xml_node result = append_child(node_element);
5859 
5860 		result.set_name(name_);
5861 
5862 		return result;
5863 	}
5864 
prepend_child(const char_t * name_)5865 	PUGI__FN xml_node xml_node::prepend_child(const char_t* name_)
5866 	{
5867 		xml_node result = prepend_child(node_element);
5868 
5869 		result.set_name(name_);
5870 
5871 		return result;
5872 	}
5873 
insert_child_after(const char_t * name_,const xml_node & node)5874 	PUGI__FN xml_node xml_node::insert_child_after(const char_t* name_, const xml_node& node)
5875 	{
5876 		xml_node result = insert_child_after(node_element, node);
5877 
5878 		result.set_name(name_);
5879 
5880 		return result;
5881 	}
5882 
insert_child_before(const char_t * name_,const xml_node & node)5883 	PUGI__FN xml_node xml_node::insert_child_before(const char_t* name_, const xml_node& node)
5884 	{
5885 		xml_node result = insert_child_before(node_element, node);
5886 
5887 		result.set_name(name_);
5888 
5889 		return result;
5890 	}
5891 
append_copy(const xml_node & proto)5892 	PUGI__FN xml_node xml_node::append_copy(const xml_node& proto)
5893 	{
5894 		xml_node_type type_ = proto.type();
5895 		if (!impl::allow_insert_child(type(), type_)) return xml_node();
5896 
5897 		impl::xml_allocator& alloc = impl::get_allocator(_root);
5898 		if (!alloc.reserve()) return xml_node();
5899 
5900 		xml_node n(impl::allocate_node(alloc, type_));
5901 		if (!n) return xml_node();
5902 
5903 		impl::append_node(n._root, _root);
5904 		impl::node_copy_tree(n._root, proto._root);
5905 
5906 		return n;
5907 	}
5908 
prepend_copy(const xml_node & proto)5909 	PUGI__FN xml_node xml_node::prepend_copy(const xml_node& proto)
5910 	{
5911 		xml_node_type type_ = proto.type();
5912 		if (!impl::allow_insert_child(type(), type_)) return xml_node();
5913 
5914 		impl::xml_allocator& alloc = impl::get_allocator(_root);
5915 		if (!alloc.reserve()) return xml_node();
5916 
5917 		xml_node n(impl::allocate_node(alloc, type_));
5918 		if (!n) return xml_node();
5919 
5920 		impl::prepend_node(n._root, _root);
5921 		impl::node_copy_tree(n._root, proto._root);
5922 
5923 		return n;
5924 	}
5925 
insert_copy_after(const xml_node & proto,const xml_node & node)5926 	PUGI__FN xml_node xml_node::insert_copy_after(const xml_node& proto, const xml_node& node)
5927 	{
5928 		xml_node_type type_ = proto.type();
5929 		if (!impl::allow_insert_child(type(), type_)) return xml_node();
5930 		if (!node._root || node._root->parent != _root) return xml_node();
5931 
5932 		impl::xml_allocator& alloc = impl::get_allocator(_root);
5933 		if (!alloc.reserve()) return xml_node();
5934 
5935 		xml_node n(impl::allocate_node(alloc, type_));
5936 		if (!n) return xml_node();
5937 
5938 		impl::insert_node_after(n._root, node._root);
5939 		impl::node_copy_tree(n._root, proto._root);
5940 
5941 		return n;
5942 	}
5943 
insert_copy_before(const xml_node & proto,const xml_node & node)5944 	PUGI__FN xml_node xml_node::insert_copy_before(const xml_node& proto, const xml_node& node)
5945 	{
5946 		xml_node_type type_ = proto.type();
5947 		if (!impl::allow_insert_child(type(), type_)) return xml_node();
5948 		if (!node._root || node._root->parent != _root) return xml_node();
5949 
5950 		impl::xml_allocator& alloc = impl::get_allocator(_root);
5951 		if (!alloc.reserve()) return xml_node();
5952 
5953 		xml_node n(impl::allocate_node(alloc, type_));
5954 		if (!n) return xml_node();
5955 
5956 		impl::insert_node_before(n._root, node._root);
5957 		impl::node_copy_tree(n._root, proto._root);
5958 
5959 		return n;
5960 	}
5961 
append_move(const xml_node & moved)5962 	PUGI__FN xml_node xml_node::append_move(const xml_node& moved)
5963 	{
5964 		if (!impl::allow_move(*this, moved)) return xml_node();
5965 
5966 		impl::xml_allocator& alloc = impl::get_allocator(_root);
5967 		if (!alloc.reserve()) return xml_node();
5968 
5969 		// disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers
5970 		impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask;
5971 
5972 		impl::remove_node(moved._root);
5973 		impl::append_node(moved._root, _root);
5974 
5975 		return moved;
5976 	}
5977 
prepend_move(const xml_node & moved)5978 	PUGI__FN xml_node xml_node::prepend_move(const xml_node& moved)
5979 	{
5980 		if (!impl::allow_move(*this, moved)) return xml_node();
5981 
5982 		impl::xml_allocator& alloc = impl::get_allocator(_root);
5983 		if (!alloc.reserve()) return xml_node();
5984 
5985 		// disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers
5986 		impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask;
5987 
5988 		impl::remove_node(moved._root);
5989 		impl::prepend_node(moved._root, _root);
5990 
5991 		return moved;
5992 	}
5993 
insert_move_after(const xml_node & moved,const xml_node & node)5994 	PUGI__FN xml_node xml_node::insert_move_after(const xml_node& moved, const xml_node& node)
5995 	{
5996 		if (!impl::allow_move(*this, moved)) return xml_node();
5997 		if (!node._root || node._root->parent != _root) return xml_node();
5998 		if (moved._root == node._root) return xml_node();
5999 
6000 		impl::xml_allocator& alloc = impl::get_allocator(_root);
6001 		if (!alloc.reserve()) return xml_node();
6002 
6003 		// disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers
6004 		impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask;
6005 
6006 		impl::remove_node(moved._root);
6007 		impl::insert_node_after(moved._root, node._root);
6008 
6009 		return moved;
6010 	}
6011 
insert_move_before(const xml_node & moved,const xml_node & node)6012 	PUGI__FN xml_node xml_node::insert_move_before(const xml_node& moved, const xml_node& node)
6013 	{
6014 		if (!impl::allow_move(*this, moved)) return xml_node();
6015 		if (!node._root || node._root->parent != _root) return xml_node();
6016 		if (moved._root == node._root) return xml_node();
6017 
6018 		impl::xml_allocator& alloc = impl::get_allocator(_root);
6019 		if (!alloc.reserve()) return xml_node();
6020 
6021 		// disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers
6022 		impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask;
6023 
6024 		impl::remove_node(moved._root);
6025 		impl::insert_node_before(moved._root, node._root);
6026 
6027 		return moved;
6028 	}
6029 
remove_attribute(const char_t * name_)6030 	PUGI__FN bool xml_node::remove_attribute(const char_t* name_)
6031 	{
6032 		return remove_attribute(attribute(name_));
6033 	}
6034 
remove_attribute(const xml_attribute & a)6035 	PUGI__FN bool xml_node::remove_attribute(const xml_attribute& a)
6036 	{
6037 		if (!_root || !a._attr) return false;
6038 		if (!impl::is_attribute_of(a._attr, _root)) return false;
6039 
6040 		impl::xml_allocator& alloc = impl::get_allocator(_root);
6041 		if (!alloc.reserve()) return false;
6042 
6043 		impl::remove_attribute(a._attr, _root);
6044 		impl::destroy_attribute(a._attr, alloc);
6045 
6046 		return true;
6047 	}
6048 
remove_child(const char_t * name_)6049 	PUGI__FN bool xml_node::remove_child(const char_t* name_)
6050 	{
6051 		return remove_child(child(name_));
6052 	}
6053 
remove_child(const xml_node & n)6054 	PUGI__FN bool xml_node::remove_child(const xml_node& n)
6055 	{
6056 		if (!_root || !n._root || n._root->parent != _root) return false;
6057 
6058 		impl::xml_allocator& alloc = impl::get_allocator(_root);
6059 		if (!alloc.reserve()) return false;
6060 
6061 		impl::remove_node(n._root);
6062 		impl::destroy_node(n._root, alloc);
6063 
6064 		return true;
6065 	}
6066 
append_buffer(const void * contents,size_t size,unsigned int options,xml_encoding encoding)6067 	PUGI__FN xml_parse_result xml_node::append_buffer(const void* contents, size_t size, unsigned int options, xml_encoding encoding)
6068 	{
6069 		// append_buffer is only valid for elements/documents
6070 		if (!impl::allow_insert_child(type(), node_element)) return impl::make_parse_result(status_append_invalid_root);
6071 
6072 		// get document node
6073 		impl::xml_document_struct* doc = &impl::get_document(_root);
6074 
6075 		// disable document_buffer_order optimization since in a document with multiple buffers comparing buffer pointers does not make sense
6076 		doc->header |= impl::xml_memory_page_contents_shared_mask;
6077 
6078 		// get extra buffer element (we'll store the document fragment buffer there so that we can deallocate it later)
6079 		impl::xml_memory_page* page = 0;
6080 		impl::xml_extra_buffer* extra = static_cast<impl::xml_extra_buffer*>(doc->allocate_memory(sizeof(impl::xml_extra_buffer) + sizeof(void*), page));
6081 		(void)page;
6082 
6083 		if (!extra) return impl::make_parse_result(status_out_of_memory);
6084 
6085 	#ifdef PUGIXML_COMPACT
6086 		// align the memory block to a pointer boundary; this is required for compact mode where memory allocations are only 4b aligned
6087 		// note that this requires up to sizeof(void*)-1 additional memory, which the allocation above takes into account
6088 		extra = reinterpret_cast<impl::xml_extra_buffer*>((reinterpret_cast<uintptr_t>(extra) + (sizeof(void*) - 1)) & ~(sizeof(void*) - 1));
6089 	#endif
6090 
6091 		// add extra buffer to the list
6092 		extra->buffer = 0;
6093 		extra->next = doc->extra_buffers;
6094 		doc->extra_buffers = extra;
6095 
6096 		// name of the root has to be NULL before parsing - otherwise closing node mismatches will not be detected at the top level
6097 		impl::name_null_sentry sentry(_root);
6098 
6099 		return impl::load_buffer_impl(doc, _root, const_cast<void*>(contents), size, options, encoding, false, false, &extra->buffer);
6100 	}
6101 
find_child_by_attribute(const char_t * name_,const char_t * attr_name,const char_t * attr_value) const6102 	PUGI__FN xml_node xml_node::find_child_by_attribute(const char_t* name_, const char_t* attr_name, const char_t* attr_value) const
6103 	{
6104 		if (!_root) return xml_node();
6105 
6106 		for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
6107 			if (i->name && impl::strequal(name_, i->name))
6108 			{
6109 				for (xml_attribute_struct* a = i->first_attribute; a; a = a->next_attribute)
6110 					if (a->name && impl::strequal(attr_name, a->name) && impl::strequal(attr_value, a->value ? a->value + 0 : PUGIXML_TEXT("")))
6111 						return xml_node(i);
6112 			}
6113 
6114 		return xml_node();
6115 	}
6116 
find_child_by_attribute(const char_t * attr_name,const char_t * attr_value) const6117 	PUGI__FN xml_node xml_node::find_child_by_attribute(const char_t* attr_name, const char_t* attr_value) const
6118 	{
6119 		if (!_root) return xml_node();
6120 
6121 		for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
6122 			for (xml_attribute_struct* a = i->first_attribute; a; a = a->next_attribute)
6123 				if (a->name && impl::strequal(attr_name, a->name) && impl::strequal(attr_value, a->value ? a->value + 0 : PUGIXML_TEXT("")))
6124 					return xml_node(i);
6125 
6126 		return xml_node();
6127 	}
6128 
6129 #ifndef PUGIXML_NO_STL
path(char_t delimiter) const6130 	PUGI__FN string_t xml_node::path(char_t delimiter) const
6131 	{
6132 		if (!_root) return string_t();
6133 
6134 		size_t offset = 0;
6135 
6136 		for (xml_node_struct* i = _root; i; i = i->parent)
6137 		{
6138 			offset += (i != _root);
6139 			offset += i->name ? impl::strlength(i->name) : 0;
6140 		}
6141 
6142 		string_t result;
6143 		result.resize(offset);
6144 
6145 		for (xml_node_struct* j = _root; j; j = j->parent)
6146 		{
6147 			if (j != _root)
6148 				result[--offset] = delimiter;
6149 
6150 			if (j->name)
6151 			{
6152 				size_t length = impl::strlength(j->name);
6153 
6154 				offset -= length;
6155 				memcpy(&result[offset], j->name, length * sizeof(char_t));
6156 			}
6157 		}
6158 
6159 		assert(offset == 0);
6160 
6161 		return result;
6162 	}
6163 #endif
6164 
first_element_by_path(const char_t * path_,char_t delimiter) const6165 	PUGI__FN xml_node xml_node::first_element_by_path(const char_t* path_, char_t delimiter) const
6166 	{
6167 		xml_node found = *this; // Current search context.
6168 
6169 		if (!_root || !path_[0]) return found;
6170 
6171 		if (path_[0] == delimiter)
6172 		{
6173 			// Absolute path; e.g. '/foo/bar'
6174 			found = found.root();
6175 			++path_;
6176 		}
6177 
6178 		const char_t* path_segment = path_;
6179 
6180 		while (*path_segment == delimiter) ++path_segment;
6181 
6182 		const char_t* path_segment_end = path_segment;
6183 
6184 		while (*path_segment_end && *path_segment_end != delimiter) ++path_segment_end;
6185 
6186 		if (path_segment == path_segment_end) return found;
6187 
6188 		const char_t* next_segment = path_segment_end;
6189 
6190 		while (*next_segment == delimiter) ++next_segment;
6191 
6192 		if (*path_segment == '.' && path_segment + 1 == path_segment_end)
6193 			return found.first_element_by_path(next_segment, delimiter);
6194 		else if (*path_segment == '.' && *(path_segment+1) == '.' && path_segment + 2 == path_segment_end)
6195 			return found.parent().first_element_by_path(next_segment, delimiter);
6196 		else
6197 		{
6198 			for (xml_node_struct* j = found._root->first_child; j; j = j->next_sibling)
6199 			{
6200 				if (j->name && impl::strequalrange(j->name, path_segment, static_cast<size_t>(path_segment_end - path_segment)))
6201 				{
6202 					xml_node subsearch = xml_node(j).first_element_by_path(next_segment, delimiter);
6203 
6204 					if (subsearch) return subsearch;
6205 				}
6206 			}
6207 
6208 			return xml_node();
6209 		}
6210 	}
6211 
traverse(xml_tree_walker & walker)6212 	PUGI__FN bool xml_node::traverse(xml_tree_walker& walker)
6213 	{
6214 		walker._depth = -1;
6215 
6216 		xml_node arg_begin(_root);
6217 		if (!walker.begin(arg_begin)) return false;
6218 
6219 		xml_node_struct* cur = _root ? _root->first_child + 0 : 0;
6220 
6221 		if (cur)
6222 		{
6223 			++walker._depth;
6224 
6225 			do
6226 			{
6227 				xml_node arg_for_each(cur);
6228 				if (!walker.for_each(arg_for_each))
6229 					return false;
6230 
6231 				if (cur->first_child)
6232 				{
6233 					++walker._depth;
6234 					cur = cur->first_child;
6235 				}
6236 				else if (cur->next_sibling)
6237 					cur = cur->next_sibling;
6238 				else
6239 				{
6240 					while (!cur->next_sibling && cur != _root && cur->parent)
6241 					{
6242 						--walker._depth;
6243 						cur = cur->parent;
6244 					}
6245 
6246 					if (cur != _root)
6247 						cur = cur->next_sibling;
6248 				}
6249 			}
6250 			while (cur && cur != _root);
6251 		}
6252 
6253 		assert(walker._depth == -1);
6254 
6255 		xml_node arg_end(_root);
6256 		return walker.end(arg_end);
6257 	}
6258 
hash_value() const6259 	PUGI__FN size_t xml_node::hash_value() const
6260 	{
6261 		return static_cast<size_t>(reinterpret_cast<uintptr_t>(_root) / sizeof(xml_node_struct));
6262 	}
6263 
internal_object() const6264 	PUGI__FN xml_node_struct* xml_node::internal_object() const
6265 	{
6266 		return _root;
6267 	}
6268 
print(xml_writer & writer,const char_t * indent,unsigned int flags,xml_encoding encoding,unsigned int depth) const6269 	PUGI__FN void xml_node::print(xml_writer& writer, const char_t* indent, unsigned int flags, xml_encoding encoding, unsigned int depth) const
6270 	{
6271 		if (!_root) return;
6272 
6273 		impl::xml_buffered_writer buffered_writer(writer, encoding);
6274 
6275 		impl::node_output(buffered_writer, _root, indent, flags, depth);
6276 
6277 		buffered_writer.flush();
6278 	}
6279 
6280 #ifndef PUGIXML_NO_STL
print(std::basic_ostream<char,std::char_traits<char>> & stream,const char_t * indent,unsigned int flags,xml_encoding encoding,unsigned int depth) const6281 	PUGI__FN void xml_node::print(std::basic_ostream<char, std::char_traits<char> >& stream, const char_t* indent, unsigned int flags, xml_encoding encoding, unsigned int depth) const
6282 	{
6283 		xml_writer_stream writer(stream);
6284 
6285 		print(writer, indent, flags, encoding, depth);
6286 	}
6287 
print(std::basic_ostream<wchar_t,std::char_traits<wchar_t>> & stream,const char_t * indent,unsigned int flags,unsigned int depth) const6288 	PUGI__FN void xml_node::print(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream, const char_t* indent, unsigned int flags, unsigned int depth) const
6289 	{
6290 		xml_writer_stream writer(stream);
6291 
6292 		print(writer, indent, flags, encoding_wchar, depth);
6293 	}
6294 #endif
6295 
offset_debug() const6296 	PUGI__FN ptrdiff_t xml_node::offset_debug() const
6297 	{
6298 		if (!_root) return -1;
6299 
6300 		impl::xml_document_struct& doc = impl::get_document(_root);
6301 
6302 		// we can determine the offset reliably only if there is exactly once parse buffer
6303 		if (!doc.buffer || doc.extra_buffers) return -1;
6304 
6305 		switch (type())
6306 		{
6307 		case node_document:
6308 			return 0;
6309 
6310 		case node_element:
6311 		case node_declaration:
6312 		case node_pi:
6313 			return _root->name && (_root->header & impl::xml_memory_page_name_allocated_or_shared_mask) == 0 ? _root->name - doc.buffer : -1;
6314 
6315 		case node_pcdata:
6316 		case node_cdata:
6317 		case node_comment:
6318 		case node_doctype:
6319 			return _root->value && (_root->header & impl::xml_memory_page_value_allocated_or_shared_mask) == 0 ? _root->value - doc.buffer : -1;
6320 
6321 		default:
6322 			assert(false && "Invalid node type"); // unreachable
6323 			return -1;
6324 		}
6325 	}
6326 
6327 #ifdef __BORLANDC__
operator &&(const xml_node & lhs,bool rhs)6328 	PUGI__FN bool operator&&(const xml_node& lhs, bool rhs)
6329 	{
6330 		return (bool)lhs && rhs;
6331 	}
6332 
operator ||(const xml_node & lhs,bool rhs)6333 	PUGI__FN bool operator||(const xml_node& lhs, bool rhs)
6334 	{
6335 		return (bool)lhs || rhs;
6336 	}
6337 #endif
6338 
xml_text(xml_node_struct * root)6339 	PUGI__FN xml_text::xml_text(xml_node_struct* root): _root(root)
6340 	{
6341 	}
6342 
_data() const6343 	PUGI__FN xml_node_struct* xml_text::_data() const
6344 	{
6345 		if (!_root || impl::is_text_node(_root)) return _root;
6346 
6347 		// element nodes can have value if parse_embed_pcdata was used
6348 		if (PUGI__NODETYPE(_root) == node_element && _root->value)
6349 			return _root;
6350 
6351 		for (xml_node_struct* node = _root->first_child; node; node = node->next_sibling)
6352 			if (impl::is_text_node(node))
6353 				return node;
6354 
6355 		return 0;
6356 	}
6357 
_data_new()6358 	PUGI__FN xml_node_struct* xml_text::_data_new()
6359 	{
6360 		xml_node_struct* d = _data();
6361 		if (d) return d;
6362 
6363 		return xml_node(_root).append_child(node_pcdata).internal_object();
6364 	}
6365 
xml_text()6366 	PUGI__FN xml_text::xml_text(): _root(0)
6367 	{
6368 	}
6369 
unspecified_bool_xml_text(xml_text ***)6370 	PUGI__FN static void unspecified_bool_xml_text(xml_text***)
6371 	{
6372 	}
6373 
operator xml_text::unspecified_bool_type() const6374 	PUGI__FN xml_text::operator xml_text::unspecified_bool_type() const
6375 	{
6376 		return _data() ? unspecified_bool_xml_text : 0;
6377 	}
6378 
operator !() const6379 	PUGI__FN bool xml_text::operator!() const
6380 	{
6381 		return !_data();
6382 	}
6383 
empty() const6384 	PUGI__FN bool xml_text::empty() const
6385 	{
6386 		return _data() == 0;
6387 	}
6388 
get() const6389 	PUGI__FN const char_t* xml_text::get() const
6390 	{
6391 		xml_node_struct* d = _data();
6392 
6393 		return (d && d->value) ? d->value + 0 : PUGIXML_TEXT("");
6394 	}
6395 
as_string(const char_t * def) const6396 	PUGI__FN const char_t* xml_text::as_string(const char_t* def) const
6397 	{
6398 		xml_node_struct* d = _data();
6399 
6400 		return (d && d->value) ? d->value + 0 : def;
6401 	}
6402 
as_int(int def) const6403 	PUGI__FN int xml_text::as_int(int def) const
6404 	{
6405 		xml_node_struct* d = _data();
6406 
6407 		return (d && d->value) ? impl::get_value_int(d->value) : def;
6408 	}
6409 
as_uint(unsigned int def) const6410 	PUGI__FN unsigned int xml_text::as_uint(unsigned int def) const
6411 	{
6412 		xml_node_struct* d = _data();
6413 
6414 		return (d && d->value) ? impl::get_value_uint(d->value) : def;
6415 	}
6416 
as_double(double def) const6417 	PUGI__FN double xml_text::as_double(double def) const
6418 	{
6419 		xml_node_struct* d = _data();
6420 
6421 		return (d && d->value) ? impl::get_value_double(d->value) : def;
6422 	}
6423 
as_float(float def) const6424 	PUGI__FN float xml_text::as_float(float def) const
6425 	{
6426 		xml_node_struct* d = _data();
6427 
6428 		return (d && d->value) ? impl::get_value_float(d->value) : def;
6429 	}
6430 
as_bool(bool def) const6431 	PUGI__FN bool xml_text::as_bool(bool def) const
6432 	{
6433 		xml_node_struct* d = _data();
6434 
6435 		return (d && d->value) ? impl::get_value_bool(d->value) : def;
6436 	}
6437 
6438 #ifdef PUGIXML_HAS_LONG_LONG
as_llong(long long def) const6439 	PUGI__FN long long xml_text::as_llong(long long def) const
6440 	{
6441 		xml_node_struct* d = _data();
6442 
6443 		return (d && d->value) ? impl::get_value_llong(d->value) : def;
6444 	}
6445 
as_ullong(unsigned long long def) const6446 	PUGI__FN unsigned long long xml_text::as_ullong(unsigned long long def) const
6447 	{
6448 		xml_node_struct* d = _data();
6449 
6450 		return (d && d->value) ? impl::get_value_ullong(d->value) : def;
6451 	}
6452 #endif
6453 
set(const char_t * rhs)6454 	PUGI__FN bool xml_text::set(const char_t* rhs)
6455 	{
6456 		xml_node_struct* dn = _data_new();
6457 
6458 		return dn ? impl::strcpy_insitu(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, impl::strlength(rhs)) : false;
6459 	}
6460 
set(int rhs)6461 	PUGI__FN bool xml_text::set(int rhs)
6462 	{
6463 		xml_node_struct* dn = _data_new();
6464 
6465 		return dn ? impl::set_value_integer<unsigned int>(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0) : false;
6466 	}
6467 
set(unsigned int rhs)6468 	PUGI__FN bool xml_text::set(unsigned int rhs)
6469 	{
6470 		xml_node_struct* dn = _data_new();
6471 
6472 		return dn ? impl::set_value_integer<unsigned int>(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, false) : false;
6473 	}
6474 
set(long rhs)6475 	PUGI__FN bool xml_text::set(long rhs)
6476 	{
6477 		xml_node_struct* dn = _data_new();
6478 
6479 		return dn ? impl::set_value_integer<unsigned long>(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0) : false;
6480 	}
6481 
set(unsigned long rhs)6482 	PUGI__FN bool xml_text::set(unsigned long rhs)
6483 	{
6484 		xml_node_struct* dn = _data_new();
6485 
6486 		return dn ? impl::set_value_integer<unsigned long>(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, false) : false;
6487 	}
6488 
set(float rhs)6489 	PUGI__FN bool xml_text::set(float rhs)
6490 	{
6491 		xml_node_struct* dn = _data_new();
6492 
6493 		return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false;
6494 	}
6495 
set(double rhs)6496 	PUGI__FN bool xml_text::set(double rhs)
6497 	{
6498 		xml_node_struct* dn = _data_new();
6499 
6500 		return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false;
6501 	}
6502 
set(bool rhs)6503 	PUGI__FN bool xml_text::set(bool rhs)
6504 	{
6505 		xml_node_struct* dn = _data_new();
6506 
6507 		return dn ? impl::set_value_bool(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false;
6508 	}
6509 
6510 #ifdef PUGIXML_HAS_LONG_LONG
set(long long rhs)6511 	PUGI__FN bool xml_text::set(long long rhs)
6512 	{
6513 		xml_node_struct* dn = _data_new();
6514 
6515 		return dn ? impl::set_value_integer<unsigned long long>(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0) : false;
6516 	}
6517 
set(unsigned long long rhs)6518 	PUGI__FN bool xml_text::set(unsigned long long rhs)
6519 	{
6520 		xml_node_struct* dn = _data_new();
6521 
6522 		return dn ? impl::set_value_integer<unsigned long long>(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, false) : false;
6523 	}
6524 #endif
6525 
operator =(const char_t * rhs)6526 	PUGI__FN xml_text& xml_text::operator=(const char_t* rhs)
6527 	{
6528 		set(rhs);
6529 		return *this;
6530 	}
6531 
operator =(int rhs)6532 	PUGI__FN xml_text& xml_text::operator=(int rhs)
6533 	{
6534 		set(rhs);
6535 		return *this;
6536 	}
6537 
operator =(unsigned int rhs)6538 	PUGI__FN xml_text& xml_text::operator=(unsigned int rhs)
6539 	{
6540 		set(rhs);
6541 		return *this;
6542 	}
6543 
operator =(long rhs)6544 	PUGI__FN xml_text& xml_text::operator=(long rhs)
6545 	{
6546 		set(rhs);
6547 		return *this;
6548 	}
6549 
operator =(unsigned long rhs)6550 	PUGI__FN xml_text& xml_text::operator=(unsigned long rhs)
6551 	{
6552 		set(rhs);
6553 		return *this;
6554 	}
6555 
operator =(double rhs)6556 	PUGI__FN xml_text& xml_text::operator=(double rhs)
6557 	{
6558 		set(rhs);
6559 		return *this;
6560 	}
6561 
operator =(float rhs)6562 	PUGI__FN xml_text& xml_text::operator=(float rhs)
6563 	{
6564 		set(rhs);
6565 		return *this;
6566 	}
6567 
operator =(bool rhs)6568 	PUGI__FN xml_text& xml_text::operator=(bool rhs)
6569 	{
6570 		set(rhs);
6571 		return *this;
6572 	}
6573 
6574 #ifdef PUGIXML_HAS_LONG_LONG
operator =(long long rhs)6575 	PUGI__FN xml_text& xml_text::operator=(long long rhs)
6576 	{
6577 		set(rhs);
6578 		return *this;
6579 	}
6580 
operator =(unsigned long long rhs)6581 	PUGI__FN xml_text& xml_text::operator=(unsigned long long rhs)
6582 	{
6583 		set(rhs);
6584 		return *this;
6585 	}
6586 #endif
6587 
data() const6588 	PUGI__FN xml_node xml_text::data() const
6589 	{
6590 		return xml_node(_data());
6591 	}
6592 
6593 #ifdef __BORLANDC__
operator &&(const xml_text & lhs,bool rhs)6594 	PUGI__FN bool operator&&(const xml_text& lhs, bool rhs)
6595 	{
6596 		return (bool)lhs && rhs;
6597 	}
6598 
operator ||(const xml_text & lhs,bool rhs)6599 	PUGI__FN bool operator||(const xml_text& lhs, bool rhs)
6600 	{
6601 		return (bool)lhs || rhs;
6602 	}
6603 #endif
6604 
xml_node_iterator()6605 	PUGI__FN xml_node_iterator::xml_node_iterator()
6606 	{
6607 	}
6608 
xml_node_iterator(const xml_node & node)6609 	PUGI__FN xml_node_iterator::xml_node_iterator(const xml_node& node): _wrap(node), _parent(node.parent())
6610 	{
6611 	}
6612 
xml_node_iterator(xml_node_struct * ref,xml_node_struct * parent)6613 	PUGI__FN xml_node_iterator::xml_node_iterator(xml_node_struct* ref, xml_node_struct* parent): _wrap(ref), _parent(parent)
6614 	{
6615 	}
6616 
operator ==(const xml_node_iterator & rhs) const6617 	PUGI__FN bool xml_node_iterator::operator==(const xml_node_iterator& rhs) const
6618 	{
6619 		return _wrap._root == rhs._wrap._root && _parent._root == rhs._parent._root;
6620 	}
6621 
operator !=(const xml_node_iterator & rhs) const6622 	PUGI__FN bool xml_node_iterator::operator!=(const xml_node_iterator& rhs) const
6623 	{
6624 		return _wrap._root != rhs._wrap._root || _parent._root != rhs._parent._root;
6625 	}
6626 
operator *() const6627 	PUGI__FN xml_node& xml_node_iterator::operator*() const
6628 	{
6629 		assert(_wrap._root);
6630 		return _wrap;
6631 	}
6632 
operator ->() const6633 	PUGI__FN xml_node* xml_node_iterator::operator->() const
6634 	{
6635 		assert(_wrap._root);
6636 		return const_cast<xml_node*>(&_wrap); // BCC5 workaround
6637 	}
6638 
operator ++()6639 	PUGI__FN const xml_node_iterator& xml_node_iterator::operator++()
6640 	{
6641 		assert(_wrap._root);
6642 		_wrap._root = _wrap._root->next_sibling;
6643 		return *this;
6644 	}
6645 
operator ++(int)6646 	PUGI__FN xml_node_iterator xml_node_iterator::operator++(int)
6647 	{
6648 		xml_node_iterator temp = *this;
6649 		++*this;
6650 		return temp;
6651 	}
6652 
operator --()6653 	PUGI__FN const xml_node_iterator& xml_node_iterator::operator--()
6654 	{
6655 		_wrap = _wrap._root ? _wrap.previous_sibling() : _parent.last_child();
6656 		return *this;
6657 	}
6658 
operator --(int)6659 	PUGI__FN xml_node_iterator xml_node_iterator::operator--(int)
6660 	{
6661 		xml_node_iterator temp = *this;
6662 		--*this;
6663 		return temp;
6664 	}
6665 
xml_attribute_iterator()6666 	PUGI__FN xml_attribute_iterator::xml_attribute_iterator()
6667 	{
6668 	}
6669 
xml_attribute_iterator(const xml_attribute & attr,const xml_node & parent)6670 	PUGI__FN xml_attribute_iterator::xml_attribute_iterator(const xml_attribute& attr, const xml_node& parent): _wrap(attr), _parent(parent)
6671 	{
6672 	}
6673 
xml_attribute_iterator(xml_attribute_struct * ref,xml_node_struct * parent)6674 	PUGI__FN xml_attribute_iterator::xml_attribute_iterator(xml_attribute_struct* ref, xml_node_struct* parent): _wrap(ref), _parent(parent)
6675 	{
6676 	}
6677 
operator ==(const xml_attribute_iterator & rhs) const6678 	PUGI__FN bool xml_attribute_iterator::operator==(const xml_attribute_iterator& rhs) const
6679 	{
6680 		return _wrap._attr == rhs._wrap._attr && _parent._root == rhs._parent._root;
6681 	}
6682 
operator !=(const xml_attribute_iterator & rhs) const6683 	PUGI__FN bool xml_attribute_iterator::operator!=(const xml_attribute_iterator& rhs) const
6684 	{
6685 		return _wrap._attr != rhs._wrap._attr || _parent._root != rhs._parent._root;
6686 	}
6687 
operator *() const6688 	PUGI__FN xml_attribute& xml_attribute_iterator::operator*() const
6689 	{
6690 		assert(_wrap._attr);
6691 		return _wrap;
6692 	}
6693 
operator ->() const6694 	PUGI__FN xml_attribute* xml_attribute_iterator::operator->() const
6695 	{
6696 		assert(_wrap._attr);
6697 		return const_cast<xml_attribute*>(&_wrap); // BCC5 workaround
6698 	}
6699 
operator ++()6700 	PUGI__FN const xml_attribute_iterator& xml_attribute_iterator::operator++()
6701 	{
6702 		assert(_wrap._attr);
6703 		_wrap._attr = _wrap._attr->next_attribute;
6704 		return *this;
6705 	}
6706 
operator ++(int)6707 	PUGI__FN xml_attribute_iterator xml_attribute_iterator::operator++(int)
6708 	{
6709 		xml_attribute_iterator temp = *this;
6710 		++*this;
6711 		return temp;
6712 	}
6713 
operator --()6714 	PUGI__FN const xml_attribute_iterator& xml_attribute_iterator::operator--()
6715 	{
6716 		_wrap = _wrap._attr ? _wrap.previous_attribute() : _parent.last_attribute();
6717 		return *this;
6718 	}
6719 
operator --(int)6720 	PUGI__FN xml_attribute_iterator xml_attribute_iterator::operator--(int)
6721 	{
6722 		xml_attribute_iterator temp = *this;
6723 		--*this;
6724 		return temp;
6725 	}
6726 
xml_named_node_iterator()6727 	PUGI__FN xml_named_node_iterator::xml_named_node_iterator(): _name(0)
6728 	{
6729 	}
6730 
xml_named_node_iterator(const xml_node & node,const char_t * name)6731 	PUGI__FN xml_named_node_iterator::xml_named_node_iterator(const xml_node& node, const char_t* name): _wrap(node), _parent(node.parent()), _name(name)
6732 	{
6733 	}
6734 
xml_named_node_iterator(xml_node_struct * ref,xml_node_struct * parent,const char_t * name)6735 	PUGI__FN xml_named_node_iterator::xml_named_node_iterator(xml_node_struct* ref, xml_node_struct* parent, const char_t* name): _wrap(ref), _parent(parent), _name(name)
6736 	{
6737 	}
6738 
operator ==(const xml_named_node_iterator & rhs) const6739 	PUGI__FN bool xml_named_node_iterator::operator==(const xml_named_node_iterator& rhs) const
6740 	{
6741 		return _wrap._root == rhs._wrap._root && _parent._root == rhs._parent._root;
6742 	}
6743 
operator !=(const xml_named_node_iterator & rhs) const6744 	PUGI__FN bool xml_named_node_iterator::operator!=(const xml_named_node_iterator& rhs) const
6745 	{
6746 		return _wrap._root != rhs._wrap._root || _parent._root != rhs._parent._root;
6747 	}
6748 
operator *() const6749 	PUGI__FN xml_node& xml_named_node_iterator::operator*() const
6750 	{
6751 		assert(_wrap._root);
6752 		return _wrap;
6753 	}
6754 
operator ->() const6755 	PUGI__FN xml_node* xml_named_node_iterator::operator->() const
6756 	{
6757 		assert(_wrap._root);
6758 		return const_cast<xml_node*>(&_wrap); // BCC5 workaround
6759 	}
6760 
operator ++()6761 	PUGI__FN const xml_named_node_iterator& xml_named_node_iterator::operator++()
6762 	{
6763 		assert(_wrap._root);
6764 		_wrap = _wrap.next_sibling(_name);
6765 		return *this;
6766 	}
6767 
operator ++(int)6768 	PUGI__FN xml_named_node_iterator xml_named_node_iterator::operator++(int)
6769 	{
6770 		xml_named_node_iterator temp = *this;
6771 		++*this;
6772 		return temp;
6773 	}
6774 
operator --()6775 	PUGI__FN const xml_named_node_iterator& xml_named_node_iterator::operator--()
6776 	{
6777 		if (_wrap._root)
6778 			_wrap = _wrap.previous_sibling(_name);
6779 		else
6780 		{
6781 			_wrap = _parent.last_child();
6782 
6783 			if (!impl::strequal(_wrap.name(), _name))
6784 				_wrap = _wrap.previous_sibling(_name);
6785 		}
6786 
6787 		return *this;
6788 	}
6789 
operator --(int)6790 	PUGI__FN xml_named_node_iterator xml_named_node_iterator::operator--(int)
6791 	{
6792 		xml_named_node_iterator temp = *this;
6793 		--*this;
6794 		return temp;
6795 	}
6796 
xml_parse_result()6797 	PUGI__FN xml_parse_result::xml_parse_result(): status(status_internal_error), offset(0), encoding(encoding_auto)
6798 	{
6799 	}
6800 
operator bool() const6801 	PUGI__FN xml_parse_result::operator bool() const
6802 	{
6803 		return status == status_ok;
6804 	}
6805 
description() const6806 	PUGI__FN const char* xml_parse_result::description() const
6807 	{
6808 		switch (status)
6809 		{
6810 		case status_ok: return "No error";
6811 
6812 		case status_file_not_found: return "File was not found";
6813 		case status_io_error: return "Error reading from file/stream";
6814 		case status_out_of_memory: return "Could not allocate memory";
6815 		case status_internal_error: return "Internal error occurred";
6816 
6817 		case status_unrecognized_tag: return "Could not determine tag type";
6818 
6819 		case status_bad_pi: return "Error parsing document declaration/processing instruction";
6820 		case status_bad_comment: return "Error parsing comment";
6821 		case status_bad_cdata: return "Error parsing CDATA section";
6822 		case status_bad_doctype: return "Error parsing document type declaration";
6823 		case status_bad_pcdata: return "Error parsing PCDATA section";
6824 		case status_bad_start_element: return "Error parsing start element tag";
6825 		case status_bad_attribute: return "Error parsing element attribute";
6826 		case status_bad_end_element: return "Error parsing end element tag";
6827 		case status_end_element_mismatch: return "Start-end tags mismatch";
6828 
6829 		case status_append_invalid_root: return "Unable to append nodes: root is not an element or document";
6830 
6831 		case status_no_document_element: return "No document element found";
6832 
6833 		default: return "Unknown error";
6834 		}
6835 	}
6836 
xml_document()6837 	PUGI__FN xml_document::xml_document(): _buffer(0)
6838 	{
6839 		_create();
6840 	}
6841 
~xml_document()6842 	PUGI__FN xml_document::~xml_document()
6843 	{
6844 		_destroy();
6845 	}
6846 
6847 #ifdef PUGIXML_HAS_MOVE
xml_document(xml_document && rhs)6848 	PUGI__FN xml_document::xml_document(xml_document&& rhs) PUGIXML_NOEXCEPT_IF_NOT_COMPACT: _buffer(0)
6849 	{
6850 		_create();
6851 		_move(rhs);
6852 	}
6853 
operator =(xml_document && rhs)6854 	PUGI__FN xml_document& xml_document::operator=(xml_document&& rhs) PUGIXML_NOEXCEPT_IF_NOT_COMPACT
6855 	{
6856 		if (this == &rhs) return *this;
6857 
6858 		_destroy();
6859 		_create();
6860 		_move(rhs);
6861 
6862 		return *this;
6863 	}
6864 #endif
6865 
reset()6866 	PUGI__FN void xml_document::reset()
6867 	{
6868 		_destroy();
6869 		_create();
6870 	}
6871 
reset(const xml_document & proto)6872 	PUGI__FN void xml_document::reset(const xml_document& proto)
6873 	{
6874 		reset();
6875 
6876 		for (xml_node cur = proto.first_child(); cur; cur = cur.next_sibling())
6877 			append_copy(cur);
6878 	}
6879 
_create()6880 	PUGI__FN void xml_document::_create()
6881 	{
6882 		assert(!_root);
6883 
6884 	#ifdef PUGIXML_COMPACT
6885 		// space for page marker for the first page (uint32_t), rounded up to pointer size; assumes pointers are at least 32-bit
6886 		const size_t page_offset = sizeof(void*);
6887 	#else
6888 		const size_t page_offset = 0;
6889 	#endif
6890 
6891 		// initialize sentinel page
6892 		PUGI__STATIC_ASSERT(sizeof(impl::xml_memory_page) + sizeof(impl::xml_document_struct) + page_offset <= sizeof(_memory));
6893 
6894 		// prepare page structure
6895 		impl::xml_memory_page* page = impl::xml_memory_page::construct(_memory);
6896 		assert(page);
6897 
6898 		page->busy_size = impl::xml_memory_page_size;
6899 
6900 		// setup first page marker
6901 	#ifdef PUGIXML_COMPACT
6902 		// round-trip through void* to avoid 'cast increases required alignment of target type' warning
6903 		page->compact_page_marker = reinterpret_cast<uint32_t*>(static_cast<void*>(reinterpret_cast<char*>(page) + sizeof(impl::xml_memory_page)));
6904 		*page->compact_page_marker = sizeof(impl::xml_memory_page);
6905 	#endif
6906 
6907 		// allocate new root
6908 		_root = new (reinterpret_cast<char*>(page) + sizeof(impl::xml_memory_page) + page_offset) impl::xml_document_struct(page);
6909 		_root->prev_sibling_c = _root;
6910 
6911 		// setup sentinel page
6912 		page->allocator = static_cast<impl::xml_document_struct*>(_root);
6913 
6914 		// setup hash table pointer in allocator
6915 	#ifdef PUGIXML_COMPACT
6916 		page->allocator->_hash = &static_cast<impl::xml_document_struct*>(_root)->hash;
6917 	#endif
6918 
6919 		// verify the document allocation
6920 		assert(reinterpret_cast<char*>(_root) + sizeof(impl::xml_document_struct) <= _memory + sizeof(_memory));
6921 	}
6922 
_destroy()6923 	PUGI__FN void xml_document::_destroy()
6924 	{
6925 		assert(_root);
6926 
6927 		// destroy static storage
6928 		if (_buffer)
6929 		{
6930 			impl::xml_memory::deallocate(_buffer);
6931 			_buffer = 0;
6932 		}
6933 
6934 		// destroy extra buffers (note: no need to destroy linked list nodes, they're allocated using document allocator)
6935 		for (impl::xml_extra_buffer* extra = static_cast<impl::xml_document_struct*>(_root)->extra_buffers; extra; extra = extra->next)
6936 		{
6937 			if (extra->buffer) impl::xml_memory::deallocate(extra->buffer);
6938 		}
6939 
6940 		// destroy dynamic storage, leave sentinel page (it's in static memory)
6941 		impl::xml_memory_page* root_page = PUGI__GETPAGE(_root);
6942 		assert(root_page && !root_page->prev);
6943 		assert(reinterpret_cast<char*>(root_page) >= _memory && reinterpret_cast<char*>(root_page) < _memory + sizeof(_memory));
6944 
6945 		for (impl::xml_memory_page* page = root_page->next; page; )
6946 		{
6947 			impl::xml_memory_page* next = page->next;
6948 
6949 			impl::xml_allocator::deallocate_page(page);
6950 
6951 			page = next;
6952 		}
6953 
6954 	#ifdef PUGIXML_COMPACT
6955 		// destroy hash table
6956 		static_cast<impl::xml_document_struct*>(_root)->hash.clear();
6957 	#endif
6958 
6959 		_root = 0;
6960 	}
6961 
6962 #ifdef PUGIXML_HAS_MOVE
_move(xml_document & rhs)6963 	PUGI__FN void xml_document::_move(xml_document& rhs) PUGIXML_NOEXCEPT_IF_NOT_COMPACT
6964 	{
6965 		impl::xml_document_struct* doc = static_cast<impl::xml_document_struct*>(_root);
6966 		impl::xml_document_struct* other = static_cast<impl::xml_document_struct*>(rhs._root);
6967 
6968 		// save first child pointer for later; this needs hash access
6969 		xml_node_struct* other_first_child = other->first_child;
6970 
6971 	#ifdef PUGIXML_COMPACT
6972 		// reserve space for the hash table up front; this is the only operation that can fail
6973 		// if it does, we have no choice but to throw (if we have exceptions)
6974 		if (other_first_child)
6975 		{
6976 			size_t other_children = 0;
6977 			for (xml_node_struct* node = other_first_child; node; node = node->next_sibling)
6978 				other_children++;
6979 
6980 			// in compact mode, each pointer assignment could result in a hash table request
6981 			// during move, we have to relocate document first_child and parents of all children
6982 			// normally there's just one child and its parent has a pointerless encoding but
6983 			// we assume the worst here
6984 			if (!other->_hash->reserve(other_children + 1))
6985 			{
6986 			#ifdef PUGIXML_NO_EXCEPTIONS
6987 				return;
6988 			#else
6989 				throw std::bad_alloc();
6990 			#endif
6991 			}
6992 		}
6993 	#endif
6994 
6995 		// move allocation state
6996 		doc->_root = other->_root;
6997 		doc->_busy_size = other->_busy_size;
6998 
6999 		// move buffer state
7000 		doc->buffer = other->buffer;
7001 		doc->extra_buffers = other->extra_buffers;
7002 		_buffer = rhs._buffer;
7003 
7004 	#ifdef PUGIXML_COMPACT
7005 		// move compact hash; note that the hash table can have pointers to other but they will be "inactive", similarly to nodes removed with remove_child
7006 		doc->hash = other->hash;
7007 		doc->_hash = &doc->hash;
7008 
7009 		// make sure we don't access other hash up until the end when we reinitialize other document
7010 		other->_hash = 0;
7011 	#endif
7012 
7013 		// move page structure
7014 		impl::xml_memory_page* doc_page = PUGI__GETPAGE(doc);
7015 		assert(doc_page && !doc_page->prev && !doc_page->next);
7016 
7017 		impl::xml_memory_page* other_page = PUGI__GETPAGE(other);
7018 		assert(other_page && !other_page->prev);
7019 
7020 		// relink pages since root page is embedded into xml_document
7021 		if (impl::xml_memory_page* page = other_page->next)
7022 		{
7023 			assert(page->prev == other_page);
7024 
7025 			page->prev = doc_page;
7026 
7027 			doc_page->next = page;
7028 			other_page->next = 0;
7029 		}
7030 
7031 		// make sure pages point to the correct document state
7032 		for (impl::xml_memory_page* page = doc_page->next; page; page = page->next)
7033 		{
7034 			assert(page->allocator == other);
7035 
7036 			page->allocator = doc;
7037 
7038 		#ifdef PUGIXML_COMPACT
7039 			// this automatically migrates most children between documents and prevents ->parent assignment from allocating
7040 			if (page->compact_shared_parent == other)
7041 				page->compact_shared_parent = doc;
7042 		#endif
7043 		}
7044 
7045 		// move tree structure
7046 		assert(!doc->first_child);
7047 
7048 		doc->first_child = other_first_child;
7049 
7050 		for (xml_node_struct* node = other_first_child; node; node = node->next_sibling)
7051 		{
7052 		#ifdef PUGIXML_COMPACT
7053 			// most children will have migrated when we reassigned compact_shared_parent
7054 			assert(node->parent == other || node->parent == doc);
7055 
7056 			node->parent = doc;
7057 		#else
7058 			assert(node->parent == other);
7059 			node->parent = doc;
7060 		#endif
7061 		}
7062 
7063 		// reset other document
7064 		new (other) impl::xml_document_struct(PUGI__GETPAGE(other));
7065 		rhs._buffer = 0;
7066 	}
7067 #endif
7068 
7069 #ifndef PUGIXML_NO_STL
load(std::basic_istream<char,std::char_traits<char>> & stream,unsigned int options,xml_encoding encoding)7070 	PUGI__FN xml_parse_result xml_document::load(std::basic_istream<char, std::char_traits<char> >& stream, unsigned int options, xml_encoding encoding)
7071 	{
7072 		reset();
7073 
7074 		return impl::load_stream_impl(static_cast<impl::xml_document_struct*>(_root), stream, options, encoding, &_buffer);
7075 	}
7076 
load(std::basic_istream<wchar_t,std::char_traits<wchar_t>> & stream,unsigned int options)7077 	PUGI__FN xml_parse_result xml_document::load(std::basic_istream<wchar_t, std::char_traits<wchar_t> >& stream, unsigned int options)
7078 	{
7079 		reset();
7080 
7081 		return impl::load_stream_impl(static_cast<impl::xml_document_struct*>(_root), stream, options, encoding_wchar, &_buffer);
7082 	}
7083 #endif
7084 
load_string(const char_t * contents,unsigned int options)7085 	PUGI__FN xml_parse_result xml_document::load_string(const char_t* contents, unsigned int options)
7086 	{
7087 		// Force native encoding (skip autodetection)
7088 	#ifdef PUGIXML_WCHAR_MODE
7089 		xml_encoding encoding = encoding_wchar;
7090 	#else
7091 		xml_encoding encoding = encoding_utf8;
7092 	#endif
7093 
7094 		return load_buffer(contents, impl::strlength(contents) * sizeof(char_t), options, encoding);
7095 	}
7096 
load(const char_t * contents,unsigned int options)7097 	PUGI__FN xml_parse_result xml_document::load(const char_t* contents, unsigned int options)
7098 	{
7099 		return load_string(contents, options);
7100 	}
7101 
load_file(const char * path_,unsigned int options,xml_encoding encoding)7102 	PUGI__FN xml_parse_result xml_document::load_file(const char* path_, unsigned int options, xml_encoding encoding)
7103 	{
7104 		reset();
7105 
7106 		using impl::auto_deleter; // MSVC7 workaround
7107 		auto_deleter<FILE> file(fopen(path_, "rb"), impl::close_file);
7108 
7109 		return impl::load_file_impl(static_cast<impl::xml_document_struct*>(_root), file.data, options, encoding, &_buffer);
7110 	}
7111 
load_file(const wchar_t * path_,unsigned int options,xml_encoding encoding)7112 	PUGI__FN xml_parse_result xml_document::load_file(const wchar_t* path_, unsigned int options, xml_encoding encoding)
7113 	{
7114 		reset();
7115 
7116 		using impl::auto_deleter; // MSVC7 workaround
7117 		auto_deleter<FILE> file(impl::open_file_wide(path_, L"rb"), impl::close_file);
7118 
7119 		return impl::load_file_impl(static_cast<impl::xml_document_struct*>(_root), file.data, options, encoding, &_buffer);
7120 	}
7121 
load_buffer(const void * contents,size_t size,unsigned int options,xml_encoding encoding)7122 	PUGI__FN xml_parse_result xml_document::load_buffer(const void* contents, size_t size, unsigned int options, xml_encoding encoding)
7123 	{
7124 		reset();
7125 
7126 		return impl::load_buffer_impl(static_cast<impl::xml_document_struct*>(_root), _root, const_cast<void*>(contents), size, options, encoding, false, false, &_buffer);
7127 	}
7128 
load_buffer_inplace(void * contents,size_t size,unsigned int options,xml_encoding encoding)7129 	PUGI__FN xml_parse_result xml_document::load_buffer_inplace(void* contents, size_t size, unsigned int options, xml_encoding encoding)
7130 	{
7131 		reset();
7132 
7133 		return impl::load_buffer_impl(static_cast<impl::xml_document_struct*>(_root), _root, contents, size, options, encoding, true, false, &_buffer);
7134 	}
7135 
load_buffer_inplace_own(void * contents,size_t size,unsigned int options,xml_encoding encoding)7136 	PUGI__FN xml_parse_result xml_document::load_buffer_inplace_own(void* contents, size_t size, unsigned int options, xml_encoding encoding)
7137 	{
7138 		reset();
7139 
7140 		return impl::load_buffer_impl(static_cast<impl::xml_document_struct*>(_root), _root, contents, size, options, encoding, true, true, &_buffer);
7141 	}
7142 
save(xml_writer & writer,const char_t * indent,unsigned int flags,xml_encoding encoding) const7143 	PUGI__FN void xml_document::save(xml_writer& writer, const char_t* indent, unsigned int flags, xml_encoding encoding) const
7144 	{
7145 		impl::xml_buffered_writer buffered_writer(writer, encoding);
7146 
7147 		if ((flags & format_write_bom) && encoding != encoding_latin1)
7148 		{
7149 			// BOM always represents the codepoint U+FEFF, so just write it in native encoding
7150 		#ifdef PUGIXML_WCHAR_MODE
7151 			unsigned int bom = 0xfeff;
7152 			buffered_writer.write(static_cast<wchar_t>(bom));
7153 		#else
7154 			buffered_writer.write('\xef', '\xbb', '\xbf');
7155 		#endif
7156 		}
7157 
7158 		if (!(flags & format_no_declaration) && !impl::has_declaration(_root))
7159 		{
7160 			buffered_writer.write_string(PUGIXML_TEXT("<?xml version=\"1.0\""));
7161 			if (encoding == encoding_latin1) buffered_writer.write_string(PUGIXML_TEXT(" encoding=\"ISO-8859-1\""));
7162 			buffered_writer.write('?', '>');
7163 			if (!(flags & format_raw)) buffered_writer.write('\n');
7164 		}
7165 
7166 		impl::node_output(buffered_writer, _root, indent, flags, 0);
7167 
7168 		buffered_writer.flush();
7169 	}
7170 
7171 #ifndef PUGIXML_NO_STL
save(std::basic_ostream<char,std::char_traits<char>> & stream,const char_t * indent,unsigned int flags,xml_encoding encoding) const7172 	PUGI__FN void xml_document::save(std::basic_ostream<char, std::char_traits<char> >& stream, const char_t* indent, unsigned int flags, xml_encoding encoding) const
7173 	{
7174 		xml_writer_stream writer(stream);
7175 
7176 		save(writer, indent, flags, encoding);
7177 	}
7178 
save(std::basic_ostream<wchar_t,std::char_traits<wchar_t>> & stream,const char_t * indent,unsigned int flags) const7179 	PUGI__FN void xml_document::save(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream, const char_t* indent, unsigned int flags) const
7180 	{
7181 		xml_writer_stream writer(stream);
7182 
7183 		save(writer, indent, flags, encoding_wchar);
7184 	}
7185 #endif
7186 
save_file(const char * path_,const char_t * indent,unsigned int flags,xml_encoding encoding) const7187 	PUGI__FN bool xml_document::save_file(const char* path_, const char_t* indent, unsigned int flags, xml_encoding encoding) const
7188 	{
7189 		using impl::auto_deleter; // MSVC7 workaround
7190 		auto_deleter<FILE> file(fopen(path_, (flags & format_save_file_text) ? "w" : "wb"), impl::close_file);
7191 
7192 		return impl::save_file_impl(*this, file.data, indent, flags, encoding);
7193 	}
7194 
save_file(const wchar_t * path_,const char_t * indent,unsigned int flags,xml_encoding encoding) const7195 	PUGI__FN bool xml_document::save_file(const wchar_t* path_, const char_t* indent, unsigned int flags, xml_encoding encoding) const
7196 	{
7197 		using impl::auto_deleter; // MSVC7 workaround
7198 		auto_deleter<FILE> file(impl::open_file_wide(path_, (flags & format_save_file_text) ? L"w" : L"wb"), impl::close_file);
7199 
7200 		return impl::save_file_impl(*this, file.data, indent, flags, encoding);
7201 	}
7202 
document_element() const7203 	PUGI__FN xml_node xml_document::document_element() const
7204 	{
7205 		assert(_root);
7206 
7207 		for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
7208 			if (PUGI__NODETYPE(i) == node_element)
7209 				return xml_node(i);
7210 
7211 		return xml_node();
7212 	}
7213 
7214 #ifndef PUGIXML_NO_STL
as_utf8(const wchar_t * str)7215 	PUGI__FN std::string PUGIXML_FUNCTION as_utf8(const wchar_t* str)
7216 	{
7217 		assert(str);
7218 
7219 		return impl::as_utf8_impl(str, impl::strlength_wide(str));
7220 	}
7221 
as_utf8(const std::basic_string<wchar_t> & str)7222 	PUGI__FN std::string PUGIXML_FUNCTION as_utf8(const std::basic_string<wchar_t>& str)
7223 	{
7224 		return impl::as_utf8_impl(str.c_str(), str.size());
7225 	}
7226 
as_wide(const char * str)7227 	PUGI__FN std::basic_string<wchar_t> PUGIXML_FUNCTION as_wide(const char* str)
7228 	{
7229 		assert(str);
7230 
7231 		return impl::as_wide_impl(str, strlen(str));
7232 	}
7233 
as_wide(const std::string & str)7234 	PUGI__FN std::basic_string<wchar_t> PUGIXML_FUNCTION as_wide(const std::string& str)
7235 	{
7236 		return impl::as_wide_impl(str.c_str(), str.size());
7237 	}
7238 #endif
7239 
set_memory_management_functions(allocation_function allocate,deallocation_function deallocate)7240 	PUGI__FN void PUGIXML_FUNCTION set_memory_management_functions(allocation_function allocate, deallocation_function deallocate)
7241 	{
7242 		impl::xml_memory::allocate = allocate;
7243 		impl::xml_memory::deallocate = deallocate;
7244 	}
7245 
get_memory_allocation_function()7246 	PUGI__FN allocation_function PUGIXML_FUNCTION get_memory_allocation_function()
7247 	{
7248 		return impl::xml_memory::allocate;
7249 	}
7250 
get_memory_deallocation_function()7251 	PUGI__FN deallocation_function PUGIXML_FUNCTION get_memory_deallocation_function()
7252 	{
7253 		return impl::xml_memory::deallocate;
7254 	}
7255 }
7256 
7257 #if !defined(PUGIXML_NO_STL) && (defined(_MSC_VER) || defined(__ICC))
7258 namespace std
7259 {
7260 	// Workarounds for (non-standard) iterator category detection for older versions (MSVC7/IC8 and earlier)
_Iter_cat(const pugi::xml_node_iterator &)7261 	PUGI__FN std::bidirectional_iterator_tag _Iter_cat(const pugi::xml_node_iterator&)
7262 	{
7263 		return std::bidirectional_iterator_tag();
7264 	}
7265 
_Iter_cat(const pugi::xml_attribute_iterator &)7266 	PUGI__FN std::bidirectional_iterator_tag _Iter_cat(const pugi::xml_attribute_iterator&)
7267 	{
7268 		return std::bidirectional_iterator_tag();
7269 	}
7270 
_Iter_cat(const pugi::xml_named_node_iterator &)7271 	PUGI__FN std::bidirectional_iterator_tag _Iter_cat(const pugi::xml_named_node_iterator&)
7272 	{
7273 		return std::bidirectional_iterator_tag();
7274 	}
7275 }
7276 #endif
7277 
7278 #if !defined(PUGIXML_NO_STL) && defined(__SUNPRO_CC)
7279 namespace std
7280 {
7281 	// Workarounds for (non-standard) iterator category detection
__iterator_category(const pugi::xml_node_iterator &)7282 	PUGI__FN std::bidirectional_iterator_tag __iterator_category(const pugi::xml_node_iterator&)
7283 	{
7284 		return std::bidirectional_iterator_tag();
7285 	}
7286 
__iterator_category(const pugi::xml_attribute_iterator &)7287 	PUGI__FN std::bidirectional_iterator_tag __iterator_category(const pugi::xml_attribute_iterator&)
7288 	{
7289 		return std::bidirectional_iterator_tag();
7290 	}
7291 
__iterator_category(const pugi::xml_named_node_iterator &)7292 	PUGI__FN std::bidirectional_iterator_tag __iterator_category(const pugi::xml_named_node_iterator&)
7293 	{
7294 		return std::bidirectional_iterator_tag();
7295 	}
7296 }
7297 #endif
7298 
7299 #ifndef PUGIXML_NO_XPATH
7300 // STL replacements
7301 PUGI__NS_BEGIN
7302 	struct equal_to
7303 	{
operator ()equal_to7304 		template <typename T> bool operator()(const T& lhs, const T& rhs) const
7305 		{
7306 			return lhs == rhs;
7307 		}
7308 	};
7309 
7310 	struct not_equal_to
7311 	{
operator ()not_equal_to7312 		template <typename T> bool operator()(const T& lhs, const T& rhs) const
7313 		{
7314 			return lhs != rhs;
7315 		}
7316 	};
7317 
7318 	struct less
7319 	{
operator ()less7320 		template <typename T> bool operator()(const T& lhs, const T& rhs) const
7321 		{
7322 			return lhs < rhs;
7323 		}
7324 	};
7325 
7326 	struct less_equal
7327 	{
operator ()less_equal7328 		template <typename T> bool operator()(const T& lhs, const T& rhs) const
7329 		{
7330 			return lhs <= rhs;
7331 		}
7332 	};
7333 
swap(T & lhs,T & rhs)7334 	template <typename T> void swap(T& lhs, T& rhs)
7335 	{
7336 		T temp = lhs;
7337 		lhs = rhs;
7338 		rhs = temp;
7339 	}
7340 
7341 	template <typename I, typename Pred> I min_element(I begin, I end, const Pred& pred)
7342 	{
7343 		I result = begin;
7344 
7345 		for (I it = begin + 1; it != end; ++it)
7346 			if (pred(*it, *result))
7347 				result = it;
7348 
7349 		return result;
7350 	}
7351 
reverse(I begin,I end)7352 	template <typename I> void reverse(I begin, I end)
7353 	{
7354 		while (end - begin > 1) swap(*begin++, *--end);
7355 	}
7356 
unique(I begin,I end)7357 	template <typename I> I unique(I begin, I end)
7358 	{
7359 		// fast skip head
7360 		while (end - begin > 1 && *begin != *(begin + 1)) begin++;
7361 
7362 		if (begin == end) return begin;
7363 
7364 		// last written element
7365 		I write = begin++;
7366 
7367 		// merge unique elements
7368 		while (begin != end)
7369 		{
7370 			if (*begin != *write)
7371 				*++write = *begin++;
7372 			else
7373 				begin++;
7374 		}
7375 
7376 		// past-the-end (write points to live element)
7377 		return write + 1;
7378 	}
7379 
insertion_sort(T * begin,T * end,const Pred & pred)7380 	template <typename T, typename Pred> void insertion_sort(T* begin, T* end, const Pred& pred)
7381 	{
7382 		if (begin == end)
7383 			return;
7384 
7385 		for (T* it = begin + 1; it != end; ++it)
7386 		{
7387 			T val = *it;
7388 			T* hole = it;
7389 
7390 			// move hole backwards
7391 			while (hole > begin && pred(val, *(hole - 1)))
7392 			{
7393 				*hole = *(hole - 1);
7394 				hole--;
7395 			}
7396 
7397 			// fill hole with element
7398 			*hole = val;
7399 		}
7400 	}
7401 
7402 	template <typename I, typename Pred> I median3(I first, I middle, I last, const Pred& pred)
7403 	{
7404 		if (pred(*middle, *first)) swap(middle, first);
7405 		if (pred(*last, *middle)) swap(last, middle);
7406 		if (pred(*middle, *first)) swap(middle, first);
7407 
7408 		return middle;
7409 	}
7410 
partition3(T * begin,T * end,T pivot,const Pred & pred,T ** out_eqbeg,T ** out_eqend)7411 	template <typename T, typename Pred> void partition3(T* begin, T* end, T pivot, const Pred& pred, T** out_eqbeg, T** out_eqend)
7412 	{
7413 		// invariant: array is split into 4 groups: = < ? > (each variable denotes the boundary between the groups)
7414 		T* eq = begin;
7415 		T* lt = begin;
7416 		T* gt = end;
7417 
7418 		while (lt < gt)
7419 		{
7420 			if (pred(*lt, pivot))
7421 				lt++;
7422 			else if (*lt == pivot)
7423 				swap(*eq++, *lt++);
7424 			else
7425 				swap(*lt, *--gt);
7426 		}
7427 
7428 		// we now have just 4 groups: = < >; move equal elements to the middle
7429 		T* eqbeg = gt;
7430 
7431 		for (T* it = begin; it != eq; ++it)
7432 			swap(*it, *--eqbeg);
7433 
7434 		*out_eqbeg = eqbeg;
7435 		*out_eqend = gt;
7436 	}
7437 
sort(I begin,I end,const Pred & pred)7438 	template <typename I, typename Pred> void sort(I begin, I end, const Pred& pred)
7439 	{
7440 		// sort large chunks
7441 		while (end - begin > 16)
7442 		{
7443 			// find median element
7444 			I middle = begin + (end - begin) / 2;
7445 			I median = median3(begin, middle, end - 1, pred);
7446 
7447 			// partition in three chunks (< = >)
7448 			I eqbeg, eqend;
7449 			partition3(begin, end, *median, pred, &eqbeg, &eqend);
7450 
7451 			// loop on larger half
7452 			if (eqbeg - begin > end - eqend)
7453 			{
7454 				sort(eqend, end, pred);
7455 				end = eqbeg;
7456 			}
7457 			else
7458 			{
7459 				sort(begin, eqbeg, pred);
7460 				begin = eqend;
7461 			}
7462 		}
7463 
7464 		// insertion sort small chunk
7465 		insertion_sort(begin, end, pred);
7466 	}
7467 PUGI__NS_END
7468 
7469 // Allocator used for AST and evaluation stacks
7470 PUGI__NS_BEGIN
7471 	static const size_t xpath_memory_page_size =
7472 	#ifdef PUGIXML_MEMORY_XPATH_PAGE_SIZE
7473 		PUGIXML_MEMORY_XPATH_PAGE_SIZE
7474 	#else
7475 		4096
7476 	#endif
7477 		;
7478 
7479 	static const uintptr_t xpath_memory_block_alignment = sizeof(double) > sizeof(void*) ? sizeof(double) : sizeof(void*);
7480 
7481 	struct xpath_memory_block
7482 	{
7483 		xpath_memory_block* next;
7484 		size_t capacity;
7485 
7486 		union
7487 		{
7488 			char data[xpath_memory_page_size];
7489 			double alignment;
7490 		};
7491 	};
7492 
7493 	struct xpath_allocator
7494 	{
7495 		xpath_memory_block* _root;
7496 		size_t _root_size;
7497 		bool* _error;
7498 
xpath_allocatorxpath_allocator7499 		xpath_allocator(xpath_memory_block* root, bool* error = 0): _root(root), _root_size(0), _error(error)
7500 		{
7501 		}
7502 
allocatexpath_allocator7503 		void* allocate(size_t size)
7504 		{
7505 			// round size up to block alignment boundary
7506 			size = (size + xpath_memory_block_alignment - 1) & ~(xpath_memory_block_alignment - 1);
7507 
7508 			if (_root_size + size <= _root->capacity)
7509 			{
7510 				void* buf = &_root->data[0] + _root_size;
7511 				_root_size += size;
7512 				return buf;
7513 			}
7514 			else
7515 			{
7516 				// make sure we have at least 1/4th of the page free after allocation to satisfy subsequent allocation requests
7517 				size_t block_capacity_base = sizeof(_root->data);
7518 				size_t block_capacity_req = size + block_capacity_base / 4;
7519 				size_t block_capacity = (block_capacity_base > block_capacity_req) ? block_capacity_base : block_capacity_req;
7520 
7521 				size_t block_size = block_capacity + offsetof(xpath_memory_block, data);
7522 
7523 				xpath_memory_block* block = static_cast<xpath_memory_block*>(xml_memory::allocate(block_size));
7524 				if (!block)
7525 				{
7526 					if (_error) *_error = true;
7527 					return 0;
7528 				}
7529 
7530 				block->next = _root;
7531 				block->capacity = block_capacity;
7532 
7533 				_root = block;
7534 				_root_size = size;
7535 
7536 				return block->data;
7537 			}
7538 		}
7539 
reallocatexpath_allocator7540 		void* reallocate(void* ptr, size_t old_size, size_t new_size)
7541 		{
7542 			// round size up to block alignment boundary
7543 			old_size = (old_size + xpath_memory_block_alignment - 1) & ~(xpath_memory_block_alignment - 1);
7544 			new_size = (new_size + xpath_memory_block_alignment - 1) & ~(xpath_memory_block_alignment - 1);
7545 
7546 			// we can only reallocate the last object
7547 			assert(ptr == 0 || static_cast<char*>(ptr) + old_size == &_root->data[0] + _root_size);
7548 
7549 			// try to reallocate the object inplace
7550 			if (ptr && _root_size - old_size + new_size <= _root->capacity)
7551 			{
7552 				_root_size = _root_size - old_size + new_size;
7553 				return ptr;
7554 			}
7555 
7556 			// allocate a new block
7557 			void* result = allocate(new_size);
7558 			if (!result) return 0;
7559 
7560 			// we have a new block
7561 			if (ptr)
7562 			{
7563 				// copy old data (we only support growing)
7564 				assert(new_size >= old_size);
7565 				memcpy(result, ptr, old_size);
7566 
7567 				// free the previous page if it had no other objects
7568 				assert(_root->data == result);
7569 				assert(_root->next);
7570 
7571 				if (_root->next->data == ptr)
7572 				{
7573 					// deallocate the whole page, unless it was the first one
7574 					xpath_memory_block* next = _root->next->next;
7575 
7576 					if (next)
7577 					{
7578 						xml_memory::deallocate(_root->next);
7579 						_root->next = next;
7580 					}
7581 				}
7582 			}
7583 
7584 			return result;
7585 		}
7586 
revertxpath_allocator7587 		void revert(const xpath_allocator& state)
7588 		{
7589 			// free all new pages
7590 			xpath_memory_block* cur = _root;
7591 
7592 			while (cur != state._root)
7593 			{
7594 				xpath_memory_block* next = cur->next;
7595 
7596 				xml_memory::deallocate(cur);
7597 
7598 				cur = next;
7599 			}
7600 
7601 			// restore state
7602 			_root = state._root;
7603 			_root_size = state._root_size;
7604 		}
7605 
releasexpath_allocator7606 		void release()
7607 		{
7608 			xpath_memory_block* cur = _root;
7609 			assert(cur);
7610 
7611 			while (cur->next)
7612 			{
7613 				xpath_memory_block* next = cur->next;
7614 
7615 				xml_memory::deallocate(cur);
7616 
7617 				cur = next;
7618 			}
7619 		}
7620 	};
7621 
7622 	struct xpath_allocator_capture
7623 	{
xpath_allocator_capturexpath_allocator_capture7624 		xpath_allocator_capture(xpath_allocator* alloc): _target(alloc), _state(*alloc)
7625 		{
7626 		}
7627 
~xpath_allocator_capturexpath_allocator_capture7628 		~xpath_allocator_capture()
7629 		{
7630 			_target->revert(_state);
7631 		}
7632 
7633 		xpath_allocator* _target;
7634 		xpath_allocator _state;
7635 	};
7636 
7637 	struct xpath_stack
7638 	{
7639 		xpath_allocator* result;
7640 		xpath_allocator* temp;
7641 	};
7642 
7643 	struct xpath_stack_data
7644 	{
7645 		xpath_memory_block blocks[2];
7646 		xpath_allocator result;
7647 		xpath_allocator temp;
7648 		xpath_stack stack;
7649 		bool oom;
7650 
xpath_stack_dataxpath_stack_data7651 		xpath_stack_data(): result(blocks + 0, &oom), temp(blocks + 1, &oom), oom(false)
7652 		{
7653 			blocks[0].next = blocks[1].next = 0;
7654 			blocks[0].capacity = blocks[1].capacity = sizeof(blocks[0].data);
7655 
7656 			stack.result = &result;
7657 			stack.temp = &temp;
7658 		}
7659 
~xpath_stack_dataxpath_stack_data7660 		~xpath_stack_data()
7661 		{
7662 			result.release();
7663 			temp.release();
7664 		}
7665 	};
7666 PUGI__NS_END
7667 
7668 // String class
7669 PUGI__NS_BEGIN
7670 	class xpath_string
7671 	{
7672 		const char_t* _buffer;
7673 		bool _uses_heap;
7674 		size_t _length_heap;
7675 
duplicate_string(const char_t * string,size_t length,xpath_allocator * alloc)7676 		static char_t* duplicate_string(const char_t* string, size_t length, xpath_allocator* alloc)
7677 		{
7678 			char_t* result = static_cast<char_t*>(alloc->allocate((length + 1) * sizeof(char_t)));
7679 			if (!result) return 0;
7680 
7681 			memcpy(result, string, length * sizeof(char_t));
7682 			result[length] = 0;
7683 
7684 			return result;
7685 		}
7686 
xpath_string(const char_t * buffer,bool uses_heap_,size_t length_heap)7687 		xpath_string(const char_t* buffer, bool uses_heap_, size_t length_heap): _buffer(buffer), _uses_heap(uses_heap_), _length_heap(length_heap)
7688 		{
7689 		}
7690 
7691 	public:
from_const(const char_t * str)7692 		static xpath_string from_const(const char_t* str)
7693 		{
7694 			return xpath_string(str, false, 0);
7695 		}
7696 
from_heap_preallocated(const char_t * begin,const char_t * end)7697 		static xpath_string from_heap_preallocated(const char_t* begin, const char_t* end)
7698 		{
7699 			assert(begin <= end && *end == 0);
7700 
7701 			return xpath_string(begin, true, static_cast<size_t>(end - begin));
7702 		}
7703 
from_heap(const char_t * begin,const char_t * end,xpath_allocator * alloc)7704 		static xpath_string from_heap(const char_t* begin, const char_t* end, xpath_allocator* alloc)
7705 		{
7706 			assert(begin <= end);
7707 
7708 			if (begin == end)
7709 				return xpath_string();
7710 
7711 			size_t length = static_cast<size_t>(end - begin);
7712 			const char_t* data = duplicate_string(begin, length, alloc);
7713 
7714 			return data ? xpath_string(data, true, length) : xpath_string();
7715 		}
7716 
xpath_string()7717 		xpath_string(): _buffer(PUGIXML_TEXT("")), _uses_heap(false), _length_heap(0)
7718 		{
7719 		}
7720 
append(const xpath_string & o,xpath_allocator * alloc)7721 		void append(const xpath_string& o, xpath_allocator* alloc)
7722 		{
7723 			// skip empty sources
7724 			if (!*o._buffer) return;
7725 
7726 			// fast append for constant empty target and constant source
7727 			if (!*_buffer && !_uses_heap && !o._uses_heap)
7728 			{
7729 				_buffer = o._buffer;
7730 			}
7731 			else
7732 			{
7733 				// need to make heap copy
7734 				size_t target_length = length();
7735 				size_t source_length = o.length();
7736 				size_t result_length = target_length + source_length;
7737 
7738 				// allocate new buffer
7739 				char_t* result = static_cast<char_t*>(alloc->reallocate(_uses_heap ? const_cast<char_t*>(_buffer) : 0, (target_length + 1) * sizeof(char_t), (result_length + 1) * sizeof(char_t)));
7740 				if (!result) return;
7741 
7742 				// append first string to the new buffer in case there was no reallocation
7743 				if (!_uses_heap) memcpy(result, _buffer, target_length * sizeof(char_t));
7744 
7745 				// append second string to the new buffer
7746 				memcpy(result + target_length, o._buffer, source_length * sizeof(char_t));
7747 				result[result_length] = 0;
7748 
7749 				// finalize
7750 				_buffer = result;
7751 				_uses_heap = true;
7752 				_length_heap = result_length;
7753 			}
7754 		}
7755 
c_str() const7756 		const char_t* c_str() const
7757 		{
7758 			return _buffer;
7759 		}
7760 
length() const7761 		size_t length() const
7762 		{
7763 			return _uses_heap ? _length_heap : strlength(_buffer);
7764 		}
7765 
data(xpath_allocator * alloc)7766 		char_t* data(xpath_allocator* alloc)
7767 		{
7768 			// make private heap copy
7769 			if (!_uses_heap)
7770 			{
7771 				size_t length_ = strlength(_buffer);
7772 				const char_t* data_ = duplicate_string(_buffer, length_, alloc);
7773 
7774 				if (!data_) return 0;
7775 
7776 				_buffer = data_;
7777 				_uses_heap = true;
7778 				_length_heap = length_;
7779 			}
7780 
7781 			return const_cast<char_t*>(_buffer);
7782 		}
7783 
empty() const7784 		bool empty() const
7785 		{
7786 			return *_buffer == 0;
7787 		}
7788 
operator ==(const xpath_string & o) const7789 		bool operator==(const xpath_string& o) const
7790 		{
7791 			return strequal(_buffer, o._buffer);
7792 		}
7793 
operator !=(const xpath_string & o) const7794 		bool operator!=(const xpath_string& o) const
7795 		{
7796 			return !strequal(_buffer, o._buffer);
7797 		}
7798 
uses_heap() const7799 		bool uses_heap() const
7800 		{
7801 			return _uses_heap;
7802 		}
7803 	};
7804 PUGI__NS_END
7805 
7806 PUGI__NS_BEGIN
starts_with(const char_t * string,const char_t * pattern)7807 	PUGI__FN bool starts_with(const char_t* string, const char_t* pattern)
7808 	{
7809 		while (*pattern && *string == *pattern)
7810 		{
7811 			string++;
7812 			pattern++;
7813 		}
7814 
7815 		return *pattern == 0;
7816 	}
7817 
find_char(const char_t * s,char_t c)7818 	PUGI__FN const char_t* find_char(const char_t* s, char_t c)
7819 	{
7820 	#ifdef PUGIXML_WCHAR_MODE
7821 		return wcschr(s, c);
7822 	#else
7823 		return strchr(s, c);
7824 	#endif
7825 	}
7826 
find_substring(const char_t * s,const char_t * p)7827 	PUGI__FN const char_t* find_substring(const char_t* s, const char_t* p)
7828 	{
7829 	#ifdef PUGIXML_WCHAR_MODE
7830 		// MSVC6 wcsstr bug workaround (if s is empty it always returns 0)
7831 		return (*p == 0) ? s : wcsstr(s, p);
7832 	#else
7833 		return strstr(s, p);
7834 	#endif
7835 	}
7836 
7837 	// Converts symbol to lower case, if it is an ASCII one
tolower_ascii(char_t ch)7838 	PUGI__FN char_t tolower_ascii(char_t ch)
7839 	{
7840 		return static_cast<unsigned int>(ch - 'A') < 26 ? static_cast<char_t>(ch | ' ') : ch;
7841 	}
7842 
string_value(const xpath_node & na,xpath_allocator * alloc)7843 	PUGI__FN xpath_string string_value(const xpath_node& na, xpath_allocator* alloc)
7844 	{
7845 		if (na.attribute())
7846 			return xpath_string::from_const(na.attribute().value());
7847 		else
7848 		{
7849 			xml_node n = na.node();
7850 
7851 			switch (n.type())
7852 			{
7853 			case node_pcdata:
7854 			case node_cdata:
7855 			case node_comment:
7856 			case node_pi:
7857 				return xpath_string::from_const(n.value());
7858 
7859 			case node_document:
7860 			case node_element:
7861 			{
7862 				xpath_string result;
7863 
7864 				// element nodes can have value if parse_embed_pcdata was used
7865 				if (n.value()[0])
7866 					result.append(xpath_string::from_const(n.value()), alloc);
7867 
7868 				xml_node cur = n.first_child();
7869 
7870 				while (cur && cur != n)
7871 				{
7872 					if (cur.type() == node_pcdata || cur.type() == node_cdata)
7873 						result.append(xpath_string::from_const(cur.value()), alloc);
7874 
7875 					if (cur.first_child())
7876 						cur = cur.first_child();
7877 					else if (cur.next_sibling())
7878 						cur = cur.next_sibling();
7879 					else
7880 					{
7881 						while (!cur.next_sibling() && cur != n)
7882 							cur = cur.parent();
7883 
7884 						if (cur != n) cur = cur.next_sibling();
7885 					}
7886 				}
7887 
7888 				return result;
7889 			}
7890 
7891 			default:
7892 				return xpath_string();
7893 			}
7894 		}
7895 	}
7896 
node_is_before_sibling(xml_node_struct * ln,xml_node_struct * rn)7897 	PUGI__FN bool node_is_before_sibling(xml_node_struct* ln, xml_node_struct* rn)
7898 	{
7899 		assert(ln->parent == rn->parent);
7900 
7901 		// there is no common ancestor (the shared parent is null), nodes are from different documents
7902 		if (!ln->parent) return ln < rn;
7903 
7904 		// determine sibling order
7905 		xml_node_struct* ls = ln;
7906 		xml_node_struct* rs = rn;
7907 
7908 		while (ls && rs)
7909 		{
7910 			if (ls == rn) return true;
7911 			if (rs == ln) return false;
7912 
7913 			ls = ls->next_sibling;
7914 			rs = rs->next_sibling;
7915 		}
7916 
7917 		// if rn sibling chain ended ln must be before rn
7918 		return !rs;
7919 	}
7920 
node_is_before(xml_node_struct * ln,xml_node_struct * rn)7921 	PUGI__FN bool node_is_before(xml_node_struct* ln, xml_node_struct* rn)
7922 	{
7923 		// find common ancestor at the same depth, if any
7924 		xml_node_struct* lp = ln;
7925 		xml_node_struct* rp = rn;
7926 
7927 		while (lp && rp && lp->parent != rp->parent)
7928 		{
7929 			lp = lp->parent;
7930 			rp = rp->parent;
7931 		}
7932 
7933 		// parents are the same!
7934 		if (lp && rp) return node_is_before_sibling(lp, rp);
7935 
7936 		// nodes are at different depths, need to normalize heights
7937 		bool left_higher = !lp;
7938 
7939 		while (lp)
7940 		{
7941 			lp = lp->parent;
7942 			ln = ln->parent;
7943 		}
7944 
7945 		while (rp)
7946 		{
7947 			rp = rp->parent;
7948 			rn = rn->parent;
7949 		}
7950 
7951 		// one node is the ancestor of the other
7952 		if (ln == rn) return left_higher;
7953 
7954 		// find common ancestor... again
7955 		while (ln->parent != rn->parent)
7956 		{
7957 			ln = ln->parent;
7958 			rn = rn->parent;
7959 		}
7960 
7961 		return node_is_before_sibling(ln, rn);
7962 	}
7963 
node_is_ancestor(xml_node_struct * parent,xml_node_struct * node)7964 	PUGI__FN bool node_is_ancestor(xml_node_struct* parent, xml_node_struct* node)
7965 	{
7966 		while (node && node != parent) node = node->parent;
7967 
7968 		return parent && node == parent;
7969 	}
7970 
document_buffer_order(const xpath_node & xnode)7971 	PUGI__FN const void* document_buffer_order(const xpath_node& xnode)
7972 	{
7973 		xml_node_struct* node = xnode.node().internal_object();
7974 
7975 		if (node)
7976 		{
7977 			if ((get_document(node).header & xml_memory_page_contents_shared_mask) == 0)
7978 			{
7979 				if (node->name && (node->header & impl::xml_memory_page_name_allocated_or_shared_mask) == 0) return node->name;
7980 				if (node->value && (node->header & impl::xml_memory_page_value_allocated_or_shared_mask) == 0) return node->value;
7981 			}
7982 
7983 			return 0;
7984 		}
7985 
7986 		xml_attribute_struct* attr = xnode.attribute().internal_object();
7987 
7988 		if (attr)
7989 		{
7990 			if ((get_document(attr).header & xml_memory_page_contents_shared_mask) == 0)
7991 			{
7992 				if ((attr->header & impl::xml_memory_page_name_allocated_or_shared_mask) == 0) return attr->name;
7993 				if ((attr->header & impl::xml_memory_page_value_allocated_or_shared_mask) == 0) return attr->value;
7994 			}
7995 
7996 			return 0;
7997 		}
7998 
7999 		return 0;
8000 	}
8001 
8002 	struct document_order_comparator
8003 	{
operator ()document_order_comparator8004 		bool operator()(const xpath_node& lhs, const xpath_node& rhs) const
8005 		{
8006 			// optimized document order based check
8007 			const void* lo = document_buffer_order(lhs);
8008 			const void* ro = document_buffer_order(rhs);
8009 
8010 			if (lo && ro) return lo < ro;
8011 
8012 			// slow comparison
8013 			xml_node ln = lhs.node(), rn = rhs.node();
8014 
8015 			// compare attributes
8016 			if (lhs.attribute() && rhs.attribute())
8017 			{
8018 				// shared parent
8019 				if (lhs.parent() == rhs.parent())
8020 				{
8021 					// determine sibling order
8022 					for (xml_attribute a = lhs.attribute(); a; a = a.next_attribute())
8023 						if (a == rhs.attribute())
8024 							return true;
8025 
8026 					return false;
8027 				}
8028 
8029 				// compare attribute parents
8030 				ln = lhs.parent();
8031 				rn = rhs.parent();
8032 			}
8033 			else if (lhs.attribute())
8034 			{
8035 				// attributes go after the parent element
8036 				if (lhs.parent() == rhs.node()) return false;
8037 
8038 				ln = lhs.parent();
8039 			}
8040 			else if (rhs.attribute())
8041 			{
8042 				// attributes go after the parent element
8043 				if (rhs.parent() == lhs.node()) return true;
8044 
8045 				rn = rhs.parent();
8046 			}
8047 
8048 			if (ln == rn) return false;
8049 
8050 			if (!ln || !rn) return ln < rn;
8051 
8052 			return node_is_before(ln.internal_object(), rn.internal_object());
8053 		}
8054 	};
8055 
8056 	struct duplicate_comparator
8057 	{
operator ()duplicate_comparator8058 		bool operator()(const xpath_node& lhs, const xpath_node& rhs) const
8059 		{
8060 			if (lhs.attribute()) return rhs.attribute() ? lhs.attribute() < rhs.attribute() : true;
8061 			else return rhs.attribute() ? false : lhs.node() < rhs.node();
8062 		}
8063 	};
8064 
gen_nan()8065 	PUGI__FN double gen_nan()
8066 	{
8067 	#if defined(__STDC_IEC_559__) || ((FLT_RADIX - 0 == 2) && (FLT_MAX_EXP - 0 == 128) && (FLT_MANT_DIG - 0 == 24))
8068 		PUGI__STATIC_ASSERT(sizeof(float) == sizeof(uint32_t));
8069 		typedef uint32_t UI; // BCC5 workaround
8070 		union { float f; UI i; } u;
8071 		u.i = 0x7fc00000;
8072 		return u.f;
8073 	#else
8074 		// fallback
8075 		const volatile double zero = 0.0;
8076 		return zero / zero;
8077 	#endif
8078 	}
8079 
is_nan(double value)8080 	PUGI__FN bool is_nan(double value)
8081 	{
8082 	#if defined(PUGI__MSVC_CRT_VERSION) || defined(__BORLANDC__)
8083 		return !!_isnan(value);
8084 	#elif defined(fpclassify) && defined(FP_NAN)
8085 		return fpclassify(value) == FP_NAN;
8086 	#else
8087 		// fallback
8088 		const volatile double v = value;
8089 		return v != v;
8090 	#endif
8091 	}
8092 
convert_number_to_string_special(double value)8093 	PUGI__FN const char_t* convert_number_to_string_special(double value)
8094 	{
8095 	#if defined(PUGI__MSVC_CRT_VERSION) || defined(__BORLANDC__)
8096 		if (_finite(value)) return (value == 0) ? PUGIXML_TEXT("0") : 0;
8097 		if (_isnan(value)) return PUGIXML_TEXT("NaN");
8098 		return value > 0 ? PUGIXML_TEXT("Infinity") : PUGIXML_TEXT("-Infinity");
8099 	#elif defined(fpclassify) && defined(FP_NAN) && defined(FP_INFINITE) && defined(FP_ZERO)
8100 		switch (fpclassify(value))
8101 		{
8102 		case FP_NAN:
8103 			return PUGIXML_TEXT("NaN");
8104 
8105 		case FP_INFINITE:
8106 			return value > 0 ? PUGIXML_TEXT("Infinity") : PUGIXML_TEXT("-Infinity");
8107 
8108 		case FP_ZERO:
8109 			return PUGIXML_TEXT("0");
8110 
8111 		default:
8112 			return 0;
8113 		}
8114 	#else
8115 		// fallback
8116 		const volatile double v = value;
8117 
8118 		if (v == 0) return PUGIXML_TEXT("0");
8119 		if (v != v) return PUGIXML_TEXT("NaN");
8120 		if (v * 2 == v) return value > 0 ? PUGIXML_TEXT("Infinity") : PUGIXML_TEXT("-Infinity");
8121 		return 0;
8122 	#endif
8123 	}
8124 
convert_number_to_boolean(double value)8125 	PUGI__FN bool convert_number_to_boolean(double value)
8126 	{
8127 		return (value != 0 && !is_nan(value));
8128 	}
8129 
truncate_zeros(char * begin,char * end)8130 	PUGI__FN void truncate_zeros(char* begin, char* end)
8131 	{
8132 		while (begin != end && end[-1] == '0') end--;
8133 
8134 		*end = 0;
8135 	}
8136 
8137 	// gets mantissa digits in the form of 0.xxxxx with 0. implied and the exponent
8138 #if defined(PUGI__MSVC_CRT_VERSION) && PUGI__MSVC_CRT_VERSION >= 1400 && !defined(_WIN32_WCE)
convert_number_to_mantissa_exponent(double value,char (& buffer)[32],char ** out_mantissa,int * out_exponent)8139 	PUGI__FN void convert_number_to_mantissa_exponent(double value, char (&buffer)[32], char** out_mantissa, int* out_exponent)
8140 	{
8141 		// get base values
8142 		int sign, exponent;
8143 		_ecvt_s(buffer, sizeof(buffer), value, DBL_DIG + 1, &exponent, &sign);
8144 
8145 		// truncate redundant zeros
8146 		truncate_zeros(buffer, buffer + strlen(buffer));
8147 
8148 		// fill results
8149 		*out_mantissa = buffer;
8150 		*out_exponent = exponent;
8151 	}
8152 #else
convert_number_to_mantissa_exponent(double value,char (& buffer)[32],char ** out_mantissa,int * out_exponent)8153 	PUGI__FN void convert_number_to_mantissa_exponent(double value, char (&buffer)[32], char** out_mantissa, int* out_exponent)
8154 	{
8155 		// get a scientific notation value with IEEE DBL_DIG decimals
8156 		PUGI__SNPRINTF(buffer, "%.*e", DBL_DIG, value);
8157 
8158 		// get the exponent (possibly negative)
8159 		char* exponent_string = strchr(buffer, 'e');
8160 		assert(exponent_string);
8161 
8162 		int exponent = atoi(exponent_string + 1);
8163 
8164 		// extract mantissa string: skip sign
8165 		char* mantissa = buffer[0] == '-' ? buffer + 1 : buffer;
8166 		assert(mantissa[0] != '0' && mantissa[1] == '.');
8167 
8168 		// divide mantissa by 10 to eliminate integer part
8169 		mantissa[1] = mantissa[0];
8170 		mantissa++;
8171 		exponent++;
8172 
8173 		// remove extra mantissa digits and zero-terminate mantissa
8174 		truncate_zeros(mantissa, exponent_string);
8175 
8176 		// fill results
8177 		*out_mantissa = mantissa;
8178 		*out_exponent = exponent;
8179 	}
8180 #endif
8181 
convert_number_to_string(double value,xpath_allocator * alloc)8182 	PUGI__FN xpath_string convert_number_to_string(double value, xpath_allocator* alloc)
8183 	{
8184 		// try special number conversion
8185 		const char_t* special = convert_number_to_string_special(value);
8186 		if (special) return xpath_string::from_const(special);
8187 
8188 		// get mantissa + exponent form
8189 		char mantissa_buffer[32];
8190 
8191 		char* mantissa;
8192 		int exponent;
8193 		convert_number_to_mantissa_exponent(value, mantissa_buffer, &mantissa, &exponent);
8194 
8195 		// allocate a buffer of suitable length for the number
8196 		size_t result_size = strlen(mantissa_buffer) + (exponent > 0 ? exponent : -exponent) + 4;
8197 		char_t* result = static_cast<char_t*>(alloc->allocate(sizeof(char_t) * result_size));
8198 		if (!result) return xpath_string();
8199 
8200 		// make the number!
8201 		char_t* s = result;
8202 
8203 		// sign
8204 		if (value < 0) *s++ = '-';
8205 
8206 		// integer part
8207 		if (exponent <= 0)
8208 		{
8209 			*s++ = '0';
8210 		}
8211 		else
8212 		{
8213 			while (exponent > 0)
8214 			{
8215 				assert(*mantissa == 0 || static_cast<unsigned int>(*mantissa - '0') <= 9);
8216 				*s++ = *mantissa ? *mantissa++ : '0';
8217 				exponent--;
8218 			}
8219 		}
8220 
8221 		// fractional part
8222 		if (*mantissa)
8223 		{
8224 			// decimal point
8225 			*s++ = '.';
8226 
8227 			// extra zeroes from negative exponent
8228 			while (exponent < 0)
8229 			{
8230 				*s++ = '0';
8231 				exponent++;
8232 			}
8233 
8234 			// extra mantissa digits
8235 			while (*mantissa)
8236 			{
8237 				assert(static_cast<unsigned int>(*mantissa - '0') <= 9);
8238 				*s++ = *mantissa++;
8239 			}
8240 		}
8241 
8242 		// zero-terminate
8243 		assert(s < result + result_size);
8244 		*s = 0;
8245 
8246 		return xpath_string::from_heap_preallocated(result, s);
8247 	}
8248 
check_string_to_number_format(const char_t * string)8249 	PUGI__FN bool check_string_to_number_format(const char_t* string)
8250 	{
8251 		// parse leading whitespace
8252 		while (PUGI__IS_CHARTYPE(*string, ct_space)) ++string;
8253 
8254 		// parse sign
8255 		if (*string == '-') ++string;
8256 
8257 		if (!*string) return false;
8258 
8259 		// if there is no integer part, there should be a decimal part with at least one digit
8260 		if (!PUGI__IS_CHARTYPEX(string[0], ctx_digit) && (string[0] != '.' || !PUGI__IS_CHARTYPEX(string[1], ctx_digit))) return false;
8261 
8262 		// parse integer part
8263 		while (PUGI__IS_CHARTYPEX(*string, ctx_digit)) ++string;
8264 
8265 		// parse decimal part
8266 		if (*string == '.')
8267 		{
8268 			++string;
8269 
8270 			while (PUGI__IS_CHARTYPEX(*string, ctx_digit)) ++string;
8271 		}
8272 
8273 		// parse trailing whitespace
8274 		while (PUGI__IS_CHARTYPE(*string, ct_space)) ++string;
8275 
8276 		return *string == 0;
8277 	}
8278 
convert_string_to_number(const char_t * string)8279 	PUGI__FN double convert_string_to_number(const char_t* string)
8280 	{
8281 		// check string format
8282 		if (!check_string_to_number_format(string)) return gen_nan();
8283 
8284 		// parse string
8285 	#ifdef PUGIXML_WCHAR_MODE
8286 		return wcstod(string, 0);
8287 	#else
8288 		return strtod(string, 0);
8289 	#endif
8290 	}
8291 
convert_string_to_number_scratch(char_t (& buffer)[32],const char_t * begin,const char_t * end,double * out_result)8292 	PUGI__FN bool convert_string_to_number_scratch(char_t (&buffer)[32], const char_t* begin, const char_t* end, double* out_result)
8293 	{
8294 		size_t length = static_cast<size_t>(end - begin);
8295 		char_t* scratch = buffer;
8296 
8297 		if (length >= sizeof(buffer) / sizeof(buffer[0]))
8298 		{
8299 			// need to make dummy on-heap copy
8300 			scratch = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
8301 			if (!scratch) return false;
8302 		}
8303 
8304 		// copy string to zero-terminated buffer and perform conversion
8305 		memcpy(scratch, begin, length * sizeof(char_t));
8306 		scratch[length] = 0;
8307 
8308 		*out_result = convert_string_to_number(scratch);
8309 
8310 		// free dummy buffer
8311 		if (scratch != buffer) xml_memory::deallocate(scratch);
8312 
8313 		return true;
8314 	}
8315 
round_nearest(double value)8316 	PUGI__FN double round_nearest(double value)
8317 	{
8318 		return floor(value + 0.5);
8319 	}
8320 
round_nearest_nzero(double value)8321 	PUGI__FN double round_nearest_nzero(double value)
8322 	{
8323 		// same as round_nearest, but returns -0 for [-0.5, -0]
8324 		// ceil is used to differentiate between +0 and -0 (we return -0 for [-0.5, -0] and +0 for +0)
8325 		return (value >= -0.5 && value <= 0) ? ceil(value) : floor(value + 0.5);
8326 	}
8327 
qualified_name(const xpath_node & node)8328 	PUGI__FN const char_t* qualified_name(const xpath_node& node)
8329 	{
8330 		return node.attribute() ? node.attribute().name() : node.node().name();
8331 	}
8332 
local_name(const xpath_node & node)8333 	PUGI__FN const char_t* local_name(const xpath_node& node)
8334 	{
8335 		const char_t* name = qualified_name(node);
8336 		const char_t* p = find_char(name, ':');
8337 
8338 		return p ? p + 1 : name;
8339 	}
8340 
8341 	struct namespace_uri_predicate
8342 	{
8343 		const char_t* prefix;
8344 		size_t prefix_length;
8345 
namespace_uri_predicatenamespace_uri_predicate8346 		namespace_uri_predicate(const char_t* name)
8347 		{
8348 			const char_t* pos = find_char(name, ':');
8349 
8350 			prefix = pos ? name : 0;
8351 			prefix_length = pos ? static_cast<size_t>(pos - name) : 0;
8352 		}
8353 
operator ()namespace_uri_predicate8354 		bool operator()(xml_attribute a) const
8355 		{
8356 			const char_t* name = a.name();
8357 
8358 			if (!starts_with(name, PUGIXML_TEXT("xmlns"))) return false;
8359 
8360 			return prefix ? name[5] == ':' && strequalrange(name + 6, prefix, prefix_length) : name[5] == 0;
8361 		}
8362 	};
8363 
namespace_uri(xml_node node)8364 	PUGI__FN const char_t* namespace_uri(xml_node node)
8365 	{
8366 		namespace_uri_predicate pred = node.name();
8367 
8368 		xml_node p = node;
8369 
8370 		while (p)
8371 		{
8372 			xml_attribute a = p.find_attribute(pred);
8373 
8374 			if (a) return a.value();
8375 
8376 			p = p.parent();
8377 		}
8378 
8379 		return PUGIXML_TEXT("");
8380 	}
8381 
namespace_uri(xml_attribute attr,xml_node parent)8382 	PUGI__FN const char_t* namespace_uri(xml_attribute attr, xml_node parent)
8383 	{
8384 		namespace_uri_predicate pred = attr.name();
8385 
8386 		// Default namespace does not apply to attributes
8387 		if (!pred.prefix) return PUGIXML_TEXT("");
8388 
8389 		xml_node p = parent;
8390 
8391 		while (p)
8392 		{
8393 			xml_attribute a = p.find_attribute(pred);
8394 
8395 			if (a) return a.value();
8396 
8397 			p = p.parent();
8398 		}
8399 
8400 		return PUGIXML_TEXT("");
8401 	}
8402 
namespace_uri(const xpath_node & node)8403 	PUGI__FN const char_t* namespace_uri(const xpath_node& node)
8404 	{
8405 		return node.attribute() ? namespace_uri(node.attribute(), node.parent()) : namespace_uri(node.node());
8406 	}
8407 
normalize_space(char_t * buffer)8408 	PUGI__FN char_t* normalize_space(char_t* buffer)
8409 	{
8410 		char_t* write = buffer;
8411 
8412 		for (char_t* it = buffer; *it; )
8413 		{
8414 			char_t ch = *it++;
8415 
8416 			if (PUGI__IS_CHARTYPE(ch, ct_space))
8417 			{
8418 				// replace whitespace sequence with single space
8419 				while (PUGI__IS_CHARTYPE(*it, ct_space)) it++;
8420 
8421 				// avoid leading spaces
8422 				if (write != buffer) *write++ = ' ';
8423 			}
8424 			else *write++ = ch;
8425 		}
8426 
8427 		// remove trailing space
8428 		if (write != buffer && PUGI__IS_CHARTYPE(write[-1], ct_space)) write--;
8429 
8430 		// zero-terminate
8431 		*write = 0;
8432 
8433 		return write;
8434 	}
8435 
translate(char_t * buffer,const char_t * from,const char_t * to,size_t to_length)8436 	PUGI__FN char_t* translate(char_t* buffer, const char_t* from, const char_t* to, size_t to_length)
8437 	{
8438 		char_t* write = buffer;
8439 
8440 		while (*buffer)
8441 		{
8442 			PUGI__DMC_VOLATILE char_t ch = *buffer++;
8443 
8444 			const char_t* pos = find_char(from, ch);
8445 
8446 			if (!pos)
8447 				*write++ = ch; // do not process
8448 			else if (static_cast<size_t>(pos - from) < to_length)
8449 				*write++ = to[pos - from]; // replace
8450 		}
8451 
8452 		// zero-terminate
8453 		*write = 0;
8454 
8455 		return write;
8456 	}
8457 
translate_table_generate(xpath_allocator * alloc,const char_t * from,const char_t * to)8458 	PUGI__FN unsigned char* translate_table_generate(xpath_allocator* alloc, const char_t* from, const char_t* to)
8459 	{
8460 		unsigned char table[128] = {0};
8461 
8462 		while (*from)
8463 		{
8464 			unsigned int fc = static_cast<unsigned int>(*from);
8465 			unsigned int tc = static_cast<unsigned int>(*to);
8466 
8467 			if (fc >= 128 || tc >= 128)
8468 				return 0;
8469 
8470 			// code=128 means "skip character"
8471 			if (!table[fc])
8472 				table[fc] = static_cast<unsigned char>(tc ? tc : 128);
8473 
8474 			from++;
8475 			if (tc) to++;
8476 		}
8477 
8478 		for (int i = 0; i < 128; ++i)
8479 			if (!table[i])
8480 				table[i] = static_cast<unsigned char>(i);
8481 
8482 		void* result = alloc->allocate(sizeof(table));
8483 		if (!result) return 0;
8484 
8485 		memcpy(result, table, sizeof(table));
8486 
8487 		return static_cast<unsigned char*>(result);
8488 	}
8489 
translate_table(char_t * buffer,const unsigned char * table)8490 	PUGI__FN char_t* translate_table(char_t* buffer, const unsigned char* table)
8491 	{
8492 		char_t* write = buffer;
8493 
8494 		while (*buffer)
8495 		{
8496 			char_t ch = *buffer++;
8497 			unsigned int index = static_cast<unsigned int>(ch);
8498 
8499 			if (index < 128)
8500 			{
8501 				unsigned char code = table[index];
8502 
8503 				// code=128 means "skip character" (table size is 128 so 128 can be a special value)
8504 				// this code skips these characters without extra branches
8505 				*write = static_cast<char_t>(code);
8506 				write += 1 - (code >> 7);
8507 			}
8508 			else
8509 			{
8510 				*write++ = ch;
8511 			}
8512 		}
8513 
8514 		// zero-terminate
8515 		*write = 0;
8516 
8517 		return write;
8518 	}
8519 
is_xpath_attribute(const char_t * name)8520 	inline bool is_xpath_attribute(const char_t* name)
8521 	{
8522 		return !(starts_with(name, PUGIXML_TEXT("xmlns")) && (name[5] == 0 || name[5] == ':'));
8523 	}
8524 
8525 	struct xpath_variable_boolean: xpath_variable
8526 	{
xpath_variable_booleanxpath_variable_boolean8527 		xpath_variable_boolean(): xpath_variable(xpath_type_boolean), value(false)
8528 		{
8529 		}
8530 
8531 		bool value;
8532 		char_t name[1];
8533 	};
8534 
8535 	struct xpath_variable_number: xpath_variable
8536 	{
xpath_variable_numberxpath_variable_number8537 		xpath_variable_number(): xpath_variable(xpath_type_number), value(0)
8538 		{
8539 		}
8540 
8541 		double value;
8542 		char_t name[1];
8543 	};
8544 
8545 	struct xpath_variable_string: xpath_variable
8546 	{
xpath_variable_stringxpath_variable_string8547 		xpath_variable_string(): xpath_variable(xpath_type_string), value(0)
8548 		{
8549 		}
8550 
~xpath_variable_stringxpath_variable_string8551 		~xpath_variable_string()
8552 		{
8553 			if (value) xml_memory::deallocate(value);
8554 		}
8555 
8556 		char_t* value;
8557 		char_t name[1];
8558 	};
8559 
8560 	struct xpath_variable_node_set: xpath_variable
8561 	{
xpath_variable_node_setxpath_variable_node_set8562 		xpath_variable_node_set(): xpath_variable(xpath_type_node_set)
8563 		{
8564 		}
8565 
8566 		xpath_node_set value;
8567 		char_t name[1];
8568 	};
8569 
8570 	static const xpath_node_set dummy_node_set;
8571 
hash_string(const char_t * str)8572 	PUGI__FN PUGI__UNSIGNED_OVERFLOW unsigned int hash_string(const char_t* str)
8573 	{
8574 		// Jenkins one-at-a-time hash (http://en.wikipedia.org/wiki/Jenkins_hash_function#one-at-a-time)
8575 		unsigned int result = 0;
8576 
8577 		while (*str)
8578 		{
8579 			result += static_cast<unsigned int>(*str++);
8580 			result += result << 10;
8581 			result ^= result >> 6;
8582 		}
8583 
8584 		result += result << 3;
8585 		result ^= result >> 11;
8586 		result += result << 15;
8587 
8588 		return result;
8589 	}
8590 
new_xpath_variable(const char_t * name)8591 	template <typename T> PUGI__FN T* new_xpath_variable(const char_t* name)
8592 	{
8593 		size_t length = strlength(name);
8594 		if (length == 0) return 0; // empty variable names are invalid
8595 
8596 		// $$ we can't use offsetof(T, name) because T is non-POD, so we just allocate additional length characters
8597 		void* memory = xml_memory::allocate(sizeof(T) + length * sizeof(char_t));
8598 		if (!memory) return 0;
8599 
8600 		T* result = new (memory) T();
8601 
8602 		memcpy(result->name, name, (length + 1) * sizeof(char_t));
8603 
8604 		return result;
8605 	}
8606 
new_xpath_variable(xpath_value_type type,const char_t * name)8607 	PUGI__FN xpath_variable* new_xpath_variable(xpath_value_type type, const char_t* name)
8608 	{
8609 		switch (type)
8610 		{
8611 		case xpath_type_node_set:
8612 			return new_xpath_variable<xpath_variable_node_set>(name);
8613 
8614 		case xpath_type_number:
8615 			return new_xpath_variable<xpath_variable_number>(name);
8616 
8617 		case xpath_type_string:
8618 			return new_xpath_variable<xpath_variable_string>(name);
8619 
8620 		case xpath_type_boolean:
8621 			return new_xpath_variable<xpath_variable_boolean>(name);
8622 
8623 		default:
8624 			return 0;
8625 		}
8626 	}
8627 
delete_xpath_variable(T * var)8628 	template <typename T> PUGI__FN void delete_xpath_variable(T* var)
8629 	{
8630 		var->~T();
8631 		xml_memory::deallocate(var);
8632 	}
8633 
delete_xpath_variable(xpath_value_type type,xpath_variable * var)8634 	PUGI__FN void delete_xpath_variable(xpath_value_type type, xpath_variable* var)
8635 	{
8636 		switch (type)
8637 		{
8638 		case xpath_type_node_set:
8639 			delete_xpath_variable(static_cast<xpath_variable_node_set*>(var));
8640 			break;
8641 
8642 		case xpath_type_number:
8643 			delete_xpath_variable(static_cast<xpath_variable_number*>(var));
8644 			break;
8645 
8646 		case xpath_type_string:
8647 			delete_xpath_variable(static_cast<xpath_variable_string*>(var));
8648 			break;
8649 
8650 		case xpath_type_boolean:
8651 			delete_xpath_variable(static_cast<xpath_variable_boolean*>(var));
8652 			break;
8653 
8654 		default:
8655 			assert(false && "Invalid variable type"); // unreachable
8656 		}
8657 	}
8658 
copy_xpath_variable(xpath_variable * lhs,const xpath_variable * rhs)8659 	PUGI__FN bool copy_xpath_variable(xpath_variable* lhs, const xpath_variable* rhs)
8660 	{
8661 		switch (rhs->type())
8662 		{
8663 		case xpath_type_node_set:
8664 			return lhs->set(static_cast<const xpath_variable_node_set*>(rhs)->value);
8665 
8666 		case xpath_type_number:
8667 			return lhs->set(static_cast<const xpath_variable_number*>(rhs)->value);
8668 
8669 		case xpath_type_string:
8670 			return lhs->set(static_cast<const xpath_variable_string*>(rhs)->value);
8671 
8672 		case xpath_type_boolean:
8673 			return lhs->set(static_cast<const xpath_variable_boolean*>(rhs)->value);
8674 
8675 		default:
8676 			assert(false && "Invalid variable type"); // unreachable
8677 			return false;
8678 		}
8679 	}
8680 
get_variable_scratch(char_t (& buffer)[32],xpath_variable_set * set,const char_t * begin,const char_t * end,xpath_variable ** out_result)8681 	PUGI__FN bool get_variable_scratch(char_t (&buffer)[32], xpath_variable_set* set, const char_t* begin, const char_t* end, xpath_variable** out_result)
8682 	{
8683 		size_t length = static_cast<size_t>(end - begin);
8684 		char_t* scratch = buffer;
8685 
8686 		if (length >= sizeof(buffer) / sizeof(buffer[0]))
8687 		{
8688 			// need to make dummy on-heap copy
8689 			scratch = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
8690 			if (!scratch) return false;
8691 		}
8692 
8693 		// copy string to zero-terminated buffer and perform lookup
8694 		memcpy(scratch, begin, length * sizeof(char_t));
8695 		scratch[length] = 0;
8696 
8697 		*out_result = set->get(scratch);
8698 
8699 		// free dummy buffer
8700 		if (scratch != buffer) xml_memory::deallocate(scratch);
8701 
8702 		return true;
8703 	}
8704 PUGI__NS_END
8705 
8706 // Internal node set class
8707 PUGI__NS_BEGIN
xpath_get_order(const xpath_node * begin,const xpath_node * end)8708 	PUGI__FN xpath_node_set::type_t xpath_get_order(const xpath_node* begin, const xpath_node* end)
8709 	{
8710 		if (end - begin < 2)
8711 			return xpath_node_set::type_sorted;
8712 
8713 		document_order_comparator cmp;
8714 
8715 		bool first = cmp(begin[0], begin[1]);
8716 
8717 		for (const xpath_node* it = begin + 1; it + 1 < end; ++it)
8718 			if (cmp(it[0], it[1]) != first)
8719 				return xpath_node_set::type_unsorted;
8720 
8721 		return first ? xpath_node_set::type_sorted : xpath_node_set::type_sorted_reverse;
8722 	}
8723 
xpath_sort(xpath_node * begin,xpath_node * end,xpath_node_set::type_t type,bool rev)8724 	PUGI__FN xpath_node_set::type_t xpath_sort(xpath_node* begin, xpath_node* end, xpath_node_set::type_t type, bool rev)
8725 	{
8726 		xpath_node_set::type_t order = rev ? xpath_node_set::type_sorted_reverse : xpath_node_set::type_sorted;
8727 
8728 		if (type == xpath_node_set::type_unsorted)
8729 		{
8730 			xpath_node_set::type_t sorted = xpath_get_order(begin, end);
8731 
8732 			if (sorted == xpath_node_set::type_unsorted)
8733 			{
8734 				sort(begin, end, document_order_comparator());
8735 
8736 				type = xpath_node_set::type_sorted;
8737 			}
8738 			else
8739 				type = sorted;
8740 		}
8741 
8742 		if (type != order) reverse(begin, end);
8743 
8744 		return order;
8745 	}
8746 
xpath_first(const xpath_node * begin,const xpath_node * end,xpath_node_set::type_t type)8747 	PUGI__FN xpath_node xpath_first(const xpath_node* begin, const xpath_node* end, xpath_node_set::type_t type)
8748 	{
8749 		if (begin == end) return xpath_node();
8750 
8751 		switch (type)
8752 		{
8753 		case xpath_node_set::type_sorted:
8754 			return *begin;
8755 
8756 		case xpath_node_set::type_sorted_reverse:
8757 			return *(end - 1);
8758 
8759 		case xpath_node_set::type_unsorted:
8760 			return *min_element(begin, end, document_order_comparator());
8761 
8762 		default:
8763 			assert(false && "Invalid node set type"); // unreachable
8764 			return xpath_node();
8765 		}
8766 	}
8767 
8768 	class xpath_node_set_raw
8769 	{
8770 		xpath_node_set::type_t _type;
8771 
8772 		xpath_node* _begin;
8773 		xpath_node* _end;
8774 		xpath_node* _eos;
8775 
8776 	public:
xpath_node_set_raw()8777 		xpath_node_set_raw(): _type(xpath_node_set::type_unsorted), _begin(0), _end(0), _eos(0)
8778 		{
8779 		}
8780 
begin() const8781 		xpath_node* begin() const
8782 		{
8783 			return _begin;
8784 		}
8785 
end() const8786 		xpath_node* end() const
8787 		{
8788 			return _end;
8789 		}
8790 
empty() const8791 		bool empty() const
8792 		{
8793 			return _begin == _end;
8794 		}
8795 
size() const8796 		size_t size() const
8797 		{
8798 			return static_cast<size_t>(_end - _begin);
8799 		}
8800 
first() const8801 		xpath_node first() const
8802 		{
8803 			return xpath_first(_begin, _end, _type);
8804 		}
8805 
8806 		void push_back_grow(const xpath_node& node, xpath_allocator* alloc);
8807 
push_back(const xpath_node & node,xpath_allocator * alloc)8808 		void push_back(const xpath_node& node, xpath_allocator* alloc)
8809 		{
8810 			if (_end != _eos)
8811 				*_end++ = node;
8812 			else
8813 				push_back_grow(node, alloc);
8814 		}
8815 
append(const xpath_node * begin_,const xpath_node * end_,xpath_allocator * alloc)8816 		void append(const xpath_node* begin_, const xpath_node* end_, xpath_allocator* alloc)
8817 		{
8818 			if (begin_ == end_) return;
8819 
8820 			size_t size_ = static_cast<size_t>(_end - _begin);
8821 			size_t capacity = static_cast<size_t>(_eos - _begin);
8822 			size_t count = static_cast<size_t>(end_ - begin_);
8823 
8824 			if (size_ + count > capacity)
8825 			{
8826 				// reallocate the old array or allocate a new one
8827 				xpath_node* data = static_cast<xpath_node*>(alloc->reallocate(_begin, capacity * sizeof(xpath_node), (size_ + count) * sizeof(xpath_node)));
8828 				if (!data) return;
8829 
8830 				// finalize
8831 				_begin = data;
8832 				_end = data + size_;
8833 				_eos = data + size_ + count;
8834 			}
8835 
8836 			memcpy(_end, begin_, count * sizeof(xpath_node));
8837 			_end += count;
8838 		}
8839 
sort_do()8840 		void sort_do()
8841 		{
8842 			_type = xpath_sort(_begin, _end, _type, false);
8843 		}
8844 
truncate(xpath_node * pos)8845 		void truncate(xpath_node* pos)
8846 		{
8847 			assert(_begin <= pos && pos <= _end);
8848 
8849 			_end = pos;
8850 		}
8851 
remove_duplicates()8852 		void remove_duplicates()
8853 		{
8854 			if (_type == xpath_node_set::type_unsorted)
8855 				sort(_begin, _end, duplicate_comparator());
8856 
8857 			_end = unique(_begin, _end);
8858 		}
8859 
type() const8860 		xpath_node_set::type_t type() const
8861 		{
8862 			return _type;
8863 		}
8864 
set_type(xpath_node_set::type_t value)8865 		void set_type(xpath_node_set::type_t value)
8866 		{
8867 			_type = value;
8868 		}
8869 	};
8870 
push_back_grow(const xpath_node & node,xpath_allocator * alloc)8871 	PUGI__FN_NO_INLINE void xpath_node_set_raw::push_back_grow(const xpath_node& node, xpath_allocator* alloc)
8872 	{
8873 		size_t capacity = static_cast<size_t>(_eos - _begin);
8874 
8875 		// get new capacity (1.5x rule)
8876 		size_t new_capacity = capacity + capacity / 2 + 1;
8877 
8878 		// reallocate the old array or allocate a new one
8879 		xpath_node* data = static_cast<xpath_node*>(alloc->reallocate(_begin, capacity * sizeof(xpath_node), new_capacity * sizeof(xpath_node)));
8880 		if (!data) return;
8881 
8882 		// finalize
8883 		_begin = data;
8884 		_end = data + capacity;
8885 		_eos = data + new_capacity;
8886 
8887 		// push
8888 		*_end++ = node;
8889 	}
8890 PUGI__NS_END
8891 
8892 PUGI__NS_BEGIN
8893 	struct xpath_context
8894 	{
8895 		xpath_node n;
8896 		size_t position, size;
8897 
xpath_contextxpath_context8898 		xpath_context(const xpath_node& n_, size_t position_, size_t size_): n(n_), position(position_), size(size_)
8899 		{
8900 		}
8901 	};
8902 
8903 	enum lexeme_t
8904 	{
8905 		lex_none = 0,
8906 		lex_equal,
8907 		lex_not_equal,
8908 		lex_less,
8909 		lex_greater,
8910 		lex_less_or_equal,
8911 		lex_greater_or_equal,
8912 		lex_plus,
8913 		lex_minus,
8914 		lex_multiply,
8915 		lex_union,
8916 		lex_var_ref,
8917 		lex_open_brace,
8918 		lex_close_brace,
8919 		lex_quoted_string,
8920 		lex_number,
8921 		lex_slash,
8922 		lex_double_slash,
8923 		lex_open_square_brace,
8924 		lex_close_square_brace,
8925 		lex_string,
8926 		lex_comma,
8927 		lex_axis_attribute,
8928 		lex_dot,
8929 		lex_double_dot,
8930 		lex_double_colon,
8931 		lex_eof
8932 	};
8933 
8934 	struct xpath_lexer_string
8935 	{
8936 		const char_t* begin;
8937 		const char_t* end;
8938 
xpath_lexer_stringxpath_lexer_string8939 		xpath_lexer_string(): begin(0), end(0)
8940 		{
8941 		}
8942 
operator ==xpath_lexer_string8943 		bool operator==(const char_t* other) const
8944 		{
8945 			size_t length = static_cast<size_t>(end - begin);
8946 
8947 			return strequalrange(other, begin, length);
8948 		}
8949 	};
8950 
8951 	class xpath_lexer
8952 	{
8953 		const char_t* _cur;
8954 		const char_t* _cur_lexeme_pos;
8955 		xpath_lexer_string _cur_lexeme_contents;
8956 
8957 		lexeme_t _cur_lexeme;
8958 
8959 	public:
xpath_lexer(const char_t * query)8960 		explicit xpath_lexer(const char_t* query): _cur(query)
8961 		{
8962 			next();
8963 		}
8964 
state() const8965 		const char_t* state() const
8966 		{
8967 			return _cur;
8968 		}
8969 
next()8970 		void next()
8971 		{
8972 			const char_t* cur = _cur;
8973 
8974 			while (PUGI__IS_CHARTYPE(*cur, ct_space)) ++cur;
8975 
8976 			// save lexeme position for error reporting
8977 			_cur_lexeme_pos = cur;
8978 
8979 			switch (*cur)
8980 			{
8981 			case 0:
8982 				_cur_lexeme = lex_eof;
8983 				break;
8984 
8985 			case '>':
8986 				if (*(cur+1) == '=')
8987 				{
8988 					cur += 2;
8989 					_cur_lexeme = lex_greater_or_equal;
8990 				}
8991 				else
8992 				{
8993 					cur += 1;
8994 					_cur_lexeme = lex_greater;
8995 				}
8996 				break;
8997 
8998 			case '<':
8999 				if (*(cur+1) == '=')
9000 				{
9001 					cur += 2;
9002 					_cur_lexeme = lex_less_or_equal;
9003 				}
9004 				else
9005 				{
9006 					cur += 1;
9007 					_cur_lexeme = lex_less;
9008 				}
9009 				break;
9010 
9011 			case '!':
9012 				if (*(cur+1) == '=')
9013 				{
9014 					cur += 2;
9015 					_cur_lexeme = lex_not_equal;
9016 				}
9017 				else
9018 				{
9019 					_cur_lexeme = lex_none;
9020 				}
9021 				break;
9022 
9023 			case '=':
9024 				cur += 1;
9025 				_cur_lexeme = lex_equal;
9026 
9027 				break;
9028 
9029 			case '+':
9030 				cur += 1;
9031 				_cur_lexeme = lex_plus;
9032 
9033 				break;
9034 
9035 			case '-':
9036 				cur += 1;
9037 				_cur_lexeme = lex_minus;
9038 
9039 				break;
9040 
9041 			case '*':
9042 				cur += 1;
9043 				_cur_lexeme = lex_multiply;
9044 
9045 				break;
9046 
9047 			case '|':
9048 				cur += 1;
9049 				_cur_lexeme = lex_union;
9050 
9051 				break;
9052 
9053 			case '$':
9054 				cur += 1;
9055 
9056 				if (PUGI__IS_CHARTYPEX(*cur, ctx_start_symbol))
9057 				{
9058 					_cur_lexeme_contents.begin = cur;
9059 
9060 					while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
9061 
9062 					if (cur[0] == ':' && PUGI__IS_CHARTYPEX(cur[1], ctx_symbol)) // qname
9063 					{
9064 						cur++; // :
9065 
9066 						while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
9067 					}
9068 
9069 					_cur_lexeme_contents.end = cur;
9070 
9071 					_cur_lexeme = lex_var_ref;
9072 				}
9073 				else
9074 				{
9075 					_cur_lexeme = lex_none;
9076 				}
9077 
9078 				break;
9079 
9080 			case '(':
9081 				cur += 1;
9082 				_cur_lexeme = lex_open_brace;
9083 
9084 				break;
9085 
9086 			case ')':
9087 				cur += 1;
9088 				_cur_lexeme = lex_close_brace;
9089 
9090 				break;
9091 
9092 			case '[':
9093 				cur += 1;
9094 				_cur_lexeme = lex_open_square_brace;
9095 
9096 				break;
9097 
9098 			case ']':
9099 				cur += 1;
9100 				_cur_lexeme = lex_close_square_brace;
9101 
9102 				break;
9103 
9104 			case ',':
9105 				cur += 1;
9106 				_cur_lexeme = lex_comma;
9107 
9108 				break;
9109 
9110 			case '/':
9111 				if (*(cur+1) == '/')
9112 				{
9113 					cur += 2;
9114 					_cur_lexeme = lex_double_slash;
9115 				}
9116 				else
9117 				{
9118 					cur += 1;
9119 					_cur_lexeme = lex_slash;
9120 				}
9121 				break;
9122 
9123 			case '.':
9124 				if (*(cur+1) == '.')
9125 				{
9126 					cur += 2;
9127 					_cur_lexeme = lex_double_dot;
9128 				}
9129 				else if (PUGI__IS_CHARTYPEX(*(cur+1), ctx_digit))
9130 				{
9131 					_cur_lexeme_contents.begin = cur; // .
9132 
9133 					++cur;
9134 
9135 					while (PUGI__IS_CHARTYPEX(*cur, ctx_digit)) cur++;
9136 
9137 					_cur_lexeme_contents.end = cur;
9138 
9139 					_cur_lexeme = lex_number;
9140 				}
9141 				else
9142 				{
9143 					cur += 1;
9144 					_cur_lexeme = lex_dot;
9145 				}
9146 				break;
9147 
9148 			case '@':
9149 				cur += 1;
9150 				_cur_lexeme = lex_axis_attribute;
9151 
9152 				break;
9153 
9154 			case '"':
9155 			case '\'':
9156 			{
9157 				char_t terminator = *cur;
9158 
9159 				++cur;
9160 
9161 				_cur_lexeme_contents.begin = cur;
9162 				while (*cur && *cur != terminator) cur++;
9163 				_cur_lexeme_contents.end = cur;
9164 
9165 				if (!*cur)
9166 					_cur_lexeme = lex_none;
9167 				else
9168 				{
9169 					cur += 1;
9170 					_cur_lexeme = lex_quoted_string;
9171 				}
9172 
9173 				break;
9174 			}
9175 
9176 			case ':':
9177 				if (*(cur+1) == ':')
9178 				{
9179 					cur += 2;
9180 					_cur_lexeme = lex_double_colon;
9181 				}
9182 				else
9183 				{
9184 					_cur_lexeme = lex_none;
9185 				}
9186 				break;
9187 
9188 			default:
9189 				if (PUGI__IS_CHARTYPEX(*cur, ctx_digit))
9190 				{
9191 					_cur_lexeme_contents.begin = cur;
9192 
9193 					while (PUGI__IS_CHARTYPEX(*cur, ctx_digit)) cur++;
9194 
9195 					if (*cur == '.')
9196 					{
9197 						cur++;
9198 
9199 						while (PUGI__IS_CHARTYPEX(*cur, ctx_digit)) cur++;
9200 					}
9201 
9202 					_cur_lexeme_contents.end = cur;
9203 
9204 					_cur_lexeme = lex_number;
9205 				}
9206 				else if (PUGI__IS_CHARTYPEX(*cur, ctx_start_symbol))
9207 				{
9208 					_cur_lexeme_contents.begin = cur;
9209 
9210 					while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
9211 
9212 					if (cur[0] == ':')
9213 					{
9214 						if (cur[1] == '*') // namespace test ncname:*
9215 						{
9216 							cur += 2; // :*
9217 						}
9218 						else if (PUGI__IS_CHARTYPEX(cur[1], ctx_symbol)) // namespace test qname
9219 						{
9220 							cur++; // :
9221 
9222 							while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
9223 						}
9224 					}
9225 
9226 					_cur_lexeme_contents.end = cur;
9227 
9228 					_cur_lexeme = lex_string;
9229 				}
9230 				else
9231 				{
9232 					_cur_lexeme = lex_none;
9233 				}
9234 			}
9235 
9236 			_cur = cur;
9237 		}
9238 
current() const9239 		lexeme_t current() const
9240 		{
9241 			return _cur_lexeme;
9242 		}
9243 
current_pos() const9244 		const char_t* current_pos() const
9245 		{
9246 			return _cur_lexeme_pos;
9247 		}
9248 
contents() const9249 		const xpath_lexer_string& contents() const
9250 		{
9251 			assert(_cur_lexeme == lex_var_ref || _cur_lexeme == lex_number || _cur_lexeme == lex_string || _cur_lexeme == lex_quoted_string);
9252 
9253 			return _cur_lexeme_contents;
9254 		}
9255 	};
9256 
9257 	enum ast_type_t
9258 	{
9259 		ast_unknown,
9260 		ast_op_or,						// left or right
9261 		ast_op_and,						// left and right
9262 		ast_op_equal,					// left = right
9263 		ast_op_not_equal,				// left != right
9264 		ast_op_less,					// left < right
9265 		ast_op_greater,					// left > right
9266 		ast_op_less_or_equal,			// left <= right
9267 		ast_op_greater_or_equal,		// left >= right
9268 		ast_op_add,						// left + right
9269 		ast_op_subtract,				// left - right
9270 		ast_op_multiply,				// left * right
9271 		ast_op_divide,					// left / right
9272 		ast_op_mod,						// left % right
9273 		ast_op_negate,					// left - right
9274 		ast_op_union,					// left | right
9275 		ast_predicate,					// apply predicate to set; next points to next predicate
9276 		ast_filter,						// select * from left where right
9277 		ast_string_constant,			// string constant
9278 		ast_number_constant,			// number constant
9279 		ast_variable,					// variable
9280 		ast_func_last,					// last()
9281 		ast_func_position,				// position()
9282 		ast_func_count,					// count(left)
9283 		ast_func_id,					// id(left)
9284 		ast_func_local_name_0,			// local-name()
9285 		ast_func_local_name_1,			// local-name(left)
9286 		ast_func_namespace_uri_0,		// namespace-uri()
9287 		ast_func_namespace_uri_1,		// namespace-uri(left)
9288 		ast_func_name_0,				// name()
9289 		ast_func_name_1,				// name(left)
9290 		ast_func_string_0,				// string()
9291 		ast_func_string_1,				// string(left)
9292 		ast_func_concat,				// concat(left, right, siblings)
9293 		ast_func_starts_with,			// starts_with(left, right)
9294 		ast_func_contains,				// contains(left, right)
9295 		ast_func_substring_before,		// substring-before(left, right)
9296 		ast_func_substring_after,		// substring-after(left, right)
9297 		ast_func_substring_2,			// substring(left, right)
9298 		ast_func_substring_3,			// substring(left, right, third)
9299 		ast_func_string_length_0,		// string-length()
9300 		ast_func_string_length_1,		// string-length(left)
9301 		ast_func_normalize_space_0,		// normalize-space()
9302 		ast_func_normalize_space_1,		// normalize-space(left)
9303 		ast_func_translate,				// translate(left, right, third)
9304 		ast_func_boolean,				// boolean(left)
9305 		ast_func_not,					// not(left)
9306 		ast_func_true,					// true()
9307 		ast_func_false,					// false()
9308 		ast_func_lang,					// lang(left)
9309 		ast_func_number_0,				// number()
9310 		ast_func_number_1,				// number(left)
9311 		ast_func_sum,					// sum(left)
9312 		ast_func_floor,					// floor(left)
9313 		ast_func_ceiling,				// ceiling(left)
9314 		ast_func_round,					// round(left)
9315 		ast_step,						// process set left with step
9316 		ast_step_root,					// select root node
9317 
9318 		ast_opt_translate_table,		// translate(left, right, third) where right/third are constants
9319 		ast_opt_compare_attribute		// @name = 'string'
9320 	};
9321 
9322 	enum axis_t
9323 	{
9324 		axis_ancestor,
9325 		axis_ancestor_or_self,
9326 		axis_attribute,
9327 		axis_child,
9328 		axis_descendant,
9329 		axis_descendant_or_self,
9330 		axis_following,
9331 		axis_following_sibling,
9332 		axis_namespace,
9333 		axis_parent,
9334 		axis_preceding,
9335 		axis_preceding_sibling,
9336 		axis_self
9337 	};
9338 
9339 	enum nodetest_t
9340 	{
9341 		nodetest_none,
9342 		nodetest_name,
9343 		nodetest_type_node,
9344 		nodetest_type_comment,
9345 		nodetest_type_pi,
9346 		nodetest_type_text,
9347 		nodetest_pi,
9348 		nodetest_all,
9349 		nodetest_all_in_namespace
9350 	};
9351 
9352 	enum predicate_t
9353 	{
9354 		predicate_default,
9355 		predicate_posinv,
9356 		predicate_constant,
9357 		predicate_constant_one
9358 	};
9359 
9360 	enum nodeset_eval_t
9361 	{
9362 		nodeset_eval_all,
9363 		nodeset_eval_any,
9364 		nodeset_eval_first
9365 	};
9366 
9367 	template <axis_t N> struct axis_to_type
9368 	{
9369 		static const axis_t axis;
9370 	};
9371 
9372 	template <axis_t N> const axis_t axis_to_type<N>::axis = N;
9373 
9374 	class xpath_ast_node
9375 	{
9376 	private:
9377 		// node type
9378 		char _type;
9379 		char _rettype;
9380 
9381 		// for ast_step
9382 		char _axis;
9383 
9384 		// for ast_step/ast_predicate/ast_filter
9385 		char _test;
9386 
9387 		// tree node structure
9388 		xpath_ast_node* _left;
9389 		xpath_ast_node* _right;
9390 		xpath_ast_node* _next;
9391 
9392 		union
9393 		{
9394 			// value for ast_string_constant
9395 			const char_t* string;
9396 			// value for ast_number_constant
9397 			double number;
9398 			// variable for ast_variable
9399 			xpath_variable* variable;
9400 			// node test for ast_step (node name/namespace/node type/pi target)
9401 			const char_t* nodetest;
9402 			// table for ast_opt_translate_table
9403 			const unsigned char* table;
9404 		} _data;
9405 
9406 		xpath_ast_node(const xpath_ast_node&);
9407 		xpath_ast_node& operator=(const xpath_ast_node&);
9408 
compare_eq(xpath_ast_node * lhs,xpath_ast_node * rhs,const xpath_context & c,const xpath_stack & stack,const Comp & comp)9409 		template <class Comp> static bool compare_eq(xpath_ast_node* lhs, xpath_ast_node* rhs, const xpath_context& c, const xpath_stack& stack, const Comp& comp)
9410 		{
9411 			xpath_value_type lt = lhs->rettype(), rt = rhs->rettype();
9412 
9413 			if (lt != xpath_type_node_set && rt != xpath_type_node_set)
9414 			{
9415 				if (lt == xpath_type_boolean || rt == xpath_type_boolean)
9416 					return comp(lhs->eval_boolean(c, stack), rhs->eval_boolean(c, stack));
9417 				else if (lt == xpath_type_number || rt == xpath_type_number)
9418 					return comp(lhs->eval_number(c, stack), rhs->eval_number(c, stack));
9419 				else if (lt == xpath_type_string || rt == xpath_type_string)
9420 				{
9421 					xpath_allocator_capture cr(stack.result);
9422 
9423 					xpath_string ls = lhs->eval_string(c, stack);
9424 					xpath_string rs = rhs->eval_string(c, stack);
9425 
9426 					return comp(ls, rs);
9427 				}
9428 			}
9429 			else if (lt == xpath_type_node_set && rt == xpath_type_node_set)
9430 			{
9431 				xpath_allocator_capture cr(stack.result);
9432 
9433 				xpath_node_set_raw ls = lhs->eval_node_set(c, stack, nodeset_eval_all);
9434 				xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all);
9435 
9436 				for (const xpath_node* li = ls.begin(); li != ls.end(); ++li)
9437 					for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
9438 					{
9439 						xpath_allocator_capture cri(stack.result);
9440 
9441 						if (comp(string_value(*li, stack.result), string_value(*ri, stack.result)))
9442 							return true;
9443 					}
9444 
9445 				return false;
9446 			}
9447 			else
9448 			{
9449 				if (lt == xpath_type_node_set)
9450 				{
9451 					swap(lhs, rhs);
9452 					swap(lt, rt);
9453 				}
9454 
9455 				if (lt == xpath_type_boolean)
9456 					return comp(lhs->eval_boolean(c, stack), rhs->eval_boolean(c, stack));
9457 				else if (lt == xpath_type_number)
9458 				{
9459 					xpath_allocator_capture cr(stack.result);
9460 
9461 					double l = lhs->eval_number(c, stack);
9462 					xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all);
9463 
9464 					for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
9465 					{
9466 						xpath_allocator_capture cri(stack.result);
9467 
9468 						if (comp(l, convert_string_to_number(string_value(*ri, stack.result).c_str())))
9469 							return true;
9470 					}
9471 
9472 					return false;
9473 				}
9474 				else if (lt == xpath_type_string)
9475 				{
9476 					xpath_allocator_capture cr(stack.result);
9477 
9478 					xpath_string l = lhs->eval_string(c, stack);
9479 					xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all);
9480 
9481 					for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
9482 					{
9483 						xpath_allocator_capture cri(stack.result);
9484 
9485 						if (comp(l, string_value(*ri, stack.result)))
9486 							return true;
9487 					}
9488 
9489 					return false;
9490 				}
9491 			}
9492 
9493 			assert(false && "Wrong types"); // unreachable
9494 			return false;
9495 		}
9496 
eval_once(xpath_node_set::type_t type,nodeset_eval_t eval)9497 		static bool eval_once(xpath_node_set::type_t type, nodeset_eval_t eval)
9498 		{
9499 			return type == xpath_node_set::type_sorted ? eval != nodeset_eval_all : eval == nodeset_eval_any;
9500 		}
9501 
compare_rel(xpath_ast_node * lhs,xpath_ast_node * rhs,const xpath_context & c,const xpath_stack & stack,const Comp & comp)9502 		template <class Comp> static bool compare_rel(xpath_ast_node* lhs, xpath_ast_node* rhs, const xpath_context& c, const xpath_stack& stack, const Comp& comp)
9503 		{
9504 			xpath_value_type lt = lhs->rettype(), rt = rhs->rettype();
9505 
9506 			if (lt != xpath_type_node_set && rt != xpath_type_node_set)
9507 				return comp(lhs->eval_number(c, stack), rhs->eval_number(c, stack));
9508 			else if (lt == xpath_type_node_set && rt == xpath_type_node_set)
9509 			{
9510 				xpath_allocator_capture cr(stack.result);
9511 
9512 				xpath_node_set_raw ls = lhs->eval_node_set(c, stack, nodeset_eval_all);
9513 				xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all);
9514 
9515 				for (const xpath_node* li = ls.begin(); li != ls.end(); ++li)
9516 				{
9517 					xpath_allocator_capture cri(stack.result);
9518 
9519 					double l = convert_string_to_number(string_value(*li, stack.result).c_str());
9520 
9521 					for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
9522 					{
9523 						xpath_allocator_capture crii(stack.result);
9524 
9525 						if (comp(l, convert_string_to_number(string_value(*ri, stack.result).c_str())))
9526 							return true;
9527 					}
9528 				}
9529 
9530 				return false;
9531 			}
9532 			else if (lt != xpath_type_node_set && rt == xpath_type_node_set)
9533 			{
9534 				xpath_allocator_capture cr(stack.result);
9535 
9536 				double l = lhs->eval_number(c, stack);
9537 				xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all);
9538 
9539 				for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
9540 				{
9541 					xpath_allocator_capture cri(stack.result);
9542 
9543 					if (comp(l, convert_string_to_number(string_value(*ri, stack.result).c_str())))
9544 						return true;
9545 				}
9546 
9547 				return false;
9548 			}
9549 			else if (lt == xpath_type_node_set && rt != xpath_type_node_set)
9550 			{
9551 				xpath_allocator_capture cr(stack.result);
9552 
9553 				xpath_node_set_raw ls = lhs->eval_node_set(c, stack, nodeset_eval_all);
9554 				double r = rhs->eval_number(c, stack);
9555 
9556 				for (const xpath_node* li = ls.begin(); li != ls.end(); ++li)
9557 				{
9558 					xpath_allocator_capture cri(stack.result);
9559 
9560 					if (comp(convert_string_to_number(string_value(*li, stack.result).c_str()), r))
9561 						return true;
9562 				}
9563 
9564 				return false;
9565 			}
9566 			else
9567 			{
9568 				assert(false && "Wrong types"); // unreachable
9569 				return false;
9570 			}
9571 		}
9572 
apply_predicate_boolean(xpath_node_set_raw & ns,size_t first,xpath_ast_node * expr,const xpath_stack & stack,bool once)9573 		static void apply_predicate_boolean(xpath_node_set_raw& ns, size_t first, xpath_ast_node* expr, const xpath_stack& stack, bool once)
9574 		{
9575 			assert(ns.size() >= first);
9576 			assert(expr->rettype() != xpath_type_number);
9577 
9578 			size_t i = 1;
9579 			size_t size = ns.size() - first;
9580 
9581 			xpath_node* last = ns.begin() + first;
9582 
9583 			// remove_if... or well, sort of
9584 			for (xpath_node* it = last; it != ns.end(); ++it, ++i)
9585 			{
9586 				xpath_context c(*it, i, size);
9587 
9588 				if (expr->eval_boolean(c, stack))
9589 				{
9590 					*last++ = *it;
9591 
9592 					if (once) break;
9593 				}
9594 			}
9595 
9596 			ns.truncate(last);
9597 		}
9598 
apply_predicate_number(xpath_node_set_raw & ns,size_t first,xpath_ast_node * expr,const xpath_stack & stack,bool once)9599 		static void apply_predicate_number(xpath_node_set_raw& ns, size_t first, xpath_ast_node* expr, const xpath_stack& stack, bool once)
9600 		{
9601 			assert(ns.size() >= first);
9602 			assert(expr->rettype() == xpath_type_number);
9603 
9604 			size_t i = 1;
9605 			size_t size = ns.size() - first;
9606 
9607 			xpath_node* last = ns.begin() + first;
9608 
9609 			// remove_if... or well, sort of
9610 			for (xpath_node* it = last; it != ns.end(); ++it, ++i)
9611 			{
9612 				xpath_context c(*it, i, size);
9613 
9614 				if (expr->eval_number(c, stack) == i)
9615 				{
9616 					*last++ = *it;
9617 
9618 					if (once) break;
9619 				}
9620 			}
9621 
9622 			ns.truncate(last);
9623 		}
9624 
apply_predicate_number_const(xpath_node_set_raw & ns,size_t first,xpath_ast_node * expr,const xpath_stack & stack)9625 		static void apply_predicate_number_const(xpath_node_set_raw& ns, size_t first, xpath_ast_node* expr, const xpath_stack& stack)
9626 		{
9627 			assert(ns.size() >= first);
9628 			assert(expr->rettype() == xpath_type_number);
9629 
9630 			size_t size = ns.size() - first;
9631 
9632 			xpath_node* last = ns.begin() + first;
9633 
9634 			xpath_context c(xpath_node(), 1, size);
9635 
9636 			double er = expr->eval_number(c, stack);
9637 
9638 			if (er >= 1.0 && er <= size)
9639 			{
9640 				size_t eri = static_cast<size_t>(er);
9641 
9642 				if (er == eri)
9643 				{
9644 					xpath_node r = last[eri - 1];
9645 
9646 					*last++ = r;
9647 				}
9648 			}
9649 
9650 			ns.truncate(last);
9651 		}
9652 
apply_predicate(xpath_node_set_raw & ns,size_t first,const xpath_stack & stack,bool once)9653 		void apply_predicate(xpath_node_set_raw& ns, size_t first, const xpath_stack& stack, bool once)
9654 		{
9655 			if (ns.size() == first) return;
9656 
9657 			assert(_type == ast_filter || _type == ast_predicate);
9658 
9659 			if (_test == predicate_constant || _test == predicate_constant_one)
9660 				apply_predicate_number_const(ns, first, _right, stack);
9661 			else if (_right->rettype() == xpath_type_number)
9662 				apply_predicate_number(ns, first, _right, stack, once);
9663 			else
9664 				apply_predicate_boolean(ns, first, _right, stack, once);
9665 		}
9666 
apply_predicates(xpath_node_set_raw & ns,size_t first,const xpath_stack & stack,nodeset_eval_t eval)9667 		void apply_predicates(xpath_node_set_raw& ns, size_t first, const xpath_stack& stack, nodeset_eval_t eval)
9668 		{
9669 			if (ns.size() == first) return;
9670 
9671 			bool last_once = eval_once(ns.type(), eval);
9672 
9673 			for (xpath_ast_node* pred = _right; pred; pred = pred->_next)
9674 				pred->apply_predicate(ns, first, stack, !pred->_next && last_once);
9675 		}
9676 
step_push(xpath_node_set_raw & ns,xml_attribute_struct * a,xml_node_struct * parent,xpath_allocator * alloc)9677 		bool step_push(xpath_node_set_raw& ns, xml_attribute_struct* a, xml_node_struct* parent, xpath_allocator* alloc)
9678 		{
9679 			assert(a);
9680 
9681 			const char_t* name = a->name ? a->name + 0 : PUGIXML_TEXT("");
9682 
9683 			switch (_test)
9684 			{
9685 			case nodetest_name:
9686 				if (strequal(name, _data.nodetest) && is_xpath_attribute(name))
9687 				{
9688 					ns.push_back(xpath_node(xml_attribute(a), xml_node(parent)), alloc);
9689 					return true;
9690 				}
9691 				break;
9692 
9693 			case nodetest_type_node:
9694 			case nodetest_all:
9695 				if (is_xpath_attribute(name))
9696 				{
9697 					ns.push_back(xpath_node(xml_attribute(a), xml_node(parent)), alloc);
9698 					return true;
9699 				}
9700 				break;
9701 
9702 			case nodetest_all_in_namespace:
9703 				if (starts_with(name, _data.nodetest) && is_xpath_attribute(name))
9704 				{
9705 					ns.push_back(xpath_node(xml_attribute(a), xml_node(parent)), alloc);
9706 					return true;
9707 				}
9708 				break;
9709 
9710 			default:
9711 				;
9712 			}
9713 
9714 			return false;
9715 		}
9716 
step_push(xpath_node_set_raw & ns,xml_node_struct * n,xpath_allocator * alloc)9717 		bool step_push(xpath_node_set_raw& ns, xml_node_struct* n, xpath_allocator* alloc)
9718 		{
9719 			assert(n);
9720 
9721 			xml_node_type type = PUGI__NODETYPE(n);
9722 
9723 			switch (_test)
9724 			{
9725 			case nodetest_name:
9726 				if (type == node_element && n->name && strequal(n->name, _data.nodetest))
9727 				{
9728 					ns.push_back(xml_node(n), alloc);
9729 					return true;
9730 				}
9731 				break;
9732 
9733 			case nodetest_type_node:
9734 				ns.push_back(xml_node(n), alloc);
9735 				return true;
9736 
9737 			case nodetest_type_comment:
9738 				if (type == node_comment)
9739 				{
9740 					ns.push_back(xml_node(n), alloc);
9741 					return true;
9742 				}
9743 				break;
9744 
9745 			case nodetest_type_text:
9746 				if (type == node_pcdata || type == node_cdata)
9747 				{
9748 					ns.push_back(xml_node(n), alloc);
9749 					return true;
9750 				}
9751 				break;
9752 
9753 			case nodetest_type_pi:
9754 				if (type == node_pi)
9755 				{
9756 					ns.push_back(xml_node(n), alloc);
9757 					return true;
9758 				}
9759 				break;
9760 
9761 			case nodetest_pi:
9762 				if (type == node_pi && n->name && strequal(n->name, _data.nodetest))
9763 				{
9764 					ns.push_back(xml_node(n), alloc);
9765 					return true;
9766 				}
9767 				break;
9768 
9769 			case nodetest_all:
9770 				if (type == node_element)
9771 				{
9772 					ns.push_back(xml_node(n), alloc);
9773 					return true;
9774 				}
9775 				break;
9776 
9777 			case nodetest_all_in_namespace:
9778 				if (type == node_element && n->name && starts_with(n->name, _data.nodetest))
9779 				{
9780 					ns.push_back(xml_node(n), alloc);
9781 					return true;
9782 				}
9783 				break;
9784 
9785 			default:
9786 				assert(false && "Unknown axis"); // unreachable
9787 			}
9788 
9789 			return false;
9790 		}
9791 
step_fill(xpath_node_set_raw & ns,xml_node_struct * n,xpath_allocator * alloc,bool once,T)9792 		template <class T> void step_fill(xpath_node_set_raw& ns, xml_node_struct* n, xpath_allocator* alloc, bool once, T)
9793 		{
9794 			const axis_t axis = T::axis;
9795 
9796 			switch (axis)
9797 			{
9798 			case axis_attribute:
9799 			{
9800 				for (xml_attribute_struct* a = n->first_attribute; a; a = a->next_attribute)
9801 					if (step_push(ns, a, n, alloc) & once)
9802 						return;
9803 
9804 				break;
9805 			}
9806 
9807 			case axis_child:
9808 			{
9809 				for (xml_node_struct* c = n->first_child; c; c = c->next_sibling)
9810 					if (step_push(ns, c, alloc) & once)
9811 						return;
9812 
9813 				break;
9814 			}
9815 
9816 			case axis_descendant:
9817 			case axis_descendant_or_self:
9818 			{
9819 				if (axis == axis_descendant_or_self)
9820 					if (step_push(ns, n, alloc) & once)
9821 						return;
9822 
9823 				xml_node_struct* cur = n->first_child;
9824 
9825 				while (cur)
9826 				{
9827 					if (step_push(ns, cur, alloc) & once)
9828 						return;
9829 
9830 					if (cur->first_child)
9831 						cur = cur->first_child;
9832 					else
9833 					{
9834 						while (!cur->next_sibling)
9835 						{
9836 							cur = cur->parent;
9837 
9838 							if (cur == n) return;
9839 						}
9840 
9841 						cur = cur->next_sibling;
9842 					}
9843 				}
9844 
9845 				break;
9846 			}
9847 
9848 			case axis_following_sibling:
9849 			{
9850 				for (xml_node_struct* c = n->next_sibling; c; c = c->next_sibling)
9851 					if (step_push(ns, c, alloc) & once)
9852 						return;
9853 
9854 				break;
9855 			}
9856 
9857 			case axis_preceding_sibling:
9858 			{
9859 				for (xml_node_struct* c = n->prev_sibling_c; c->next_sibling; c = c->prev_sibling_c)
9860 					if (step_push(ns, c, alloc) & once)
9861 						return;
9862 
9863 				break;
9864 			}
9865 
9866 			case axis_following:
9867 			{
9868 				xml_node_struct* cur = n;
9869 
9870 				// exit from this node so that we don't include descendants
9871 				while (!cur->next_sibling)
9872 				{
9873 					cur = cur->parent;
9874 
9875 					if (!cur) return;
9876 				}
9877 
9878 				cur = cur->next_sibling;
9879 
9880 				while (cur)
9881 				{
9882 					if (step_push(ns, cur, alloc) & once)
9883 						return;
9884 
9885 					if (cur->first_child)
9886 						cur = cur->first_child;
9887 					else
9888 					{
9889 						while (!cur->next_sibling)
9890 						{
9891 							cur = cur->parent;
9892 
9893 							if (!cur) return;
9894 						}
9895 
9896 						cur = cur->next_sibling;
9897 					}
9898 				}
9899 
9900 				break;
9901 			}
9902 
9903 			case axis_preceding:
9904 			{
9905 				xml_node_struct* cur = n;
9906 
9907 				// exit from this node so that we don't include descendants
9908 				while (!cur->prev_sibling_c->next_sibling)
9909 				{
9910 					cur = cur->parent;
9911 
9912 					if (!cur) return;
9913 				}
9914 
9915 				cur = cur->prev_sibling_c;
9916 
9917 				while (cur)
9918 				{
9919 					if (cur->first_child)
9920 						cur = cur->first_child->prev_sibling_c;
9921 					else
9922 					{
9923 						// leaf node, can't be ancestor
9924 						if (step_push(ns, cur, alloc) & once)
9925 							return;
9926 
9927 						while (!cur->prev_sibling_c->next_sibling)
9928 						{
9929 							cur = cur->parent;
9930 
9931 							if (!cur) return;
9932 
9933 							if (!node_is_ancestor(cur, n))
9934 								if (step_push(ns, cur, alloc) & once)
9935 									return;
9936 						}
9937 
9938 						cur = cur->prev_sibling_c;
9939 					}
9940 				}
9941 
9942 				break;
9943 			}
9944 
9945 			case axis_ancestor:
9946 			case axis_ancestor_or_self:
9947 			{
9948 				if (axis == axis_ancestor_or_self)
9949 					if (step_push(ns, n, alloc) & once)
9950 						return;
9951 
9952 				xml_node_struct* cur = n->parent;
9953 
9954 				while (cur)
9955 				{
9956 					if (step_push(ns, cur, alloc) & once)
9957 						return;
9958 
9959 					cur = cur->parent;
9960 				}
9961 
9962 				break;
9963 			}
9964 
9965 			case axis_self:
9966 			{
9967 				step_push(ns, n, alloc);
9968 
9969 				break;
9970 			}
9971 
9972 			case axis_parent:
9973 			{
9974 				if (n->parent)
9975 					step_push(ns, n->parent, alloc);
9976 
9977 				break;
9978 			}
9979 
9980 			default:
9981 				assert(false && "Unimplemented axis"); // unreachable
9982 			}
9983 		}
9984 
step_fill(xpath_node_set_raw & ns,xml_attribute_struct * a,xml_node_struct * p,xpath_allocator * alloc,bool once,T v)9985 		template <class T> void step_fill(xpath_node_set_raw& ns, xml_attribute_struct* a, xml_node_struct* p, xpath_allocator* alloc, bool once, T v)
9986 		{
9987 			const axis_t axis = T::axis;
9988 
9989 			switch (axis)
9990 			{
9991 			case axis_ancestor:
9992 			case axis_ancestor_or_self:
9993 			{
9994 				if (axis == axis_ancestor_or_self && _test == nodetest_type_node) // reject attributes based on principal node type test
9995 					if (step_push(ns, a, p, alloc) & once)
9996 						return;
9997 
9998 				xml_node_struct* cur = p;
9999 
10000 				while (cur)
10001 				{
10002 					if (step_push(ns, cur, alloc) & once)
10003 						return;
10004 
10005 					cur = cur->parent;
10006 				}
10007 
10008 				break;
10009 			}
10010 
10011 			case axis_descendant_or_self:
10012 			case axis_self:
10013 			{
10014 				if (_test == nodetest_type_node) // reject attributes based on principal node type test
10015 					step_push(ns, a, p, alloc);
10016 
10017 				break;
10018 			}
10019 
10020 			case axis_following:
10021 			{
10022 				xml_node_struct* cur = p;
10023 
10024 				while (cur)
10025 				{
10026 					if (cur->first_child)
10027 						cur = cur->first_child;
10028 					else
10029 					{
10030 						while (!cur->next_sibling)
10031 						{
10032 							cur = cur->parent;
10033 
10034 							if (!cur) return;
10035 						}
10036 
10037 						cur = cur->next_sibling;
10038 					}
10039 
10040 					if (step_push(ns, cur, alloc) & once)
10041 						return;
10042 				}
10043 
10044 				break;
10045 			}
10046 
10047 			case axis_parent:
10048 			{
10049 				step_push(ns, p, alloc);
10050 
10051 				break;
10052 			}
10053 
10054 			case axis_preceding:
10055 			{
10056 				// preceding:: axis does not include attribute nodes and attribute ancestors (they are the same as parent's ancestors), so we can reuse node preceding
10057 				step_fill(ns, p, alloc, once, v);
10058 				break;
10059 			}
10060 
10061 			default:
10062 				assert(false && "Unimplemented axis"); // unreachable
10063 			}
10064 		}
10065 
step_fill(xpath_node_set_raw & ns,const xpath_node & xn,xpath_allocator * alloc,bool once,T v)10066 		template <class T> void step_fill(xpath_node_set_raw& ns, const xpath_node& xn, xpath_allocator* alloc, bool once, T v)
10067 		{
10068 			const axis_t axis = T::axis;
10069 			const bool axis_has_attributes = (axis == axis_ancestor || axis == axis_ancestor_or_self || axis == axis_descendant_or_self || axis == axis_following || axis == axis_parent || axis == axis_preceding || axis == axis_self);
10070 
10071 			if (xn.node())
10072 				step_fill(ns, xn.node().internal_object(), alloc, once, v);
10073 			else if (axis_has_attributes && xn.attribute() && xn.parent())
10074 				step_fill(ns, xn.attribute().internal_object(), xn.parent().internal_object(), alloc, once, v);
10075 		}
10076 
step_do(const xpath_context & c,const xpath_stack & stack,nodeset_eval_t eval,T v)10077 		template <class T> xpath_node_set_raw step_do(const xpath_context& c, const xpath_stack& stack, nodeset_eval_t eval, T v)
10078 		{
10079 			const axis_t axis = T::axis;
10080 			const bool axis_reverse = (axis == axis_ancestor || axis == axis_ancestor_or_self || axis == axis_preceding || axis == axis_preceding_sibling);
10081 			const xpath_node_set::type_t axis_type = axis_reverse ? xpath_node_set::type_sorted_reverse : xpath_node_set::type_sorted;
10082 
10083 			bool once =
10084 				(axis == axis_attribute && _test == nodetest_name) ||
10085 				(!_right && eval_once(axis_type, eval)) ||
10086 				(_right && !_right->_next && _right->_test == predicate_constant_one);
10087 
10088 			xpath_node_set_raw ns;
10089 			ns.set_type(axis_type);
10090 
10091 			if (_left)
10092 			{
10093 				xpath_node_set_raw s = _left->eval_node_set(c, stack, nodeset_eval_all);
10094 
10095 				// self axis preserves the original order
10096 				if (axis == axis_self) ns.set_type(s.type());
10097 
10098 				for (const xpath_node* it = s.begin(); it != s.end(); ++it)
10099 				{
10100 					size_t size = ns.size();
10101 
10102 					// in general, all axes generate elements in a particular order, but there is no order guarantee if axis is applied to two nodes
10103 					if (axis != axis_self && size != 0) ns.set_type(xpath_node_set::type_unsorted);
10104 
10105 					step_fill(ns, *it, stack.result, once, v);
10106 					if (_right) apply_predicates(ns, size, stack, eval);
10107 				}
10108 			}
10109 			else
10110 			{
10111 				step_fill(ns, c.n, stack.result, once, v);
10112 				if (_right) apply_predicates(ns, 0, stack, eval);
10113 			}
10114 
10115 			// child, attribute and self axes always generate unique set of nodes
10116 			// for other axis, if the set stayed sorted, it stayed unique because the traversal algorithms do not visit the same node twice
10117 			if (axis != axis_child && axis != axis_attribute && axis != axis_self && ns.type() == xpath_node_set::type_unsorted)
10118 				ns.remove_duplicates();
10119 
10120 			return ns;
10121 		}
10122 
10123 	public:
xpath_ast_node(ast_type_t type,xpath_value_type rettype_,const char_t * value)10124 		xpath_ast_node(ast_type_t type, xpath_value_type rettype_, const char_t* value):
10125 			_type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(0), _right(0), _next(0)
10126 		{
10127 			assert(type == ast_string_constant);
10128 			_data.string = value;
10129 		}
10130 
xpath_ast_node(ast_type_t type,xpath_value_type rettype_,double value)10131 		xpath_ast_node(ast_type_t type, xpath_value_type rettype_, double value):
10132 			_type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(0), _right(0), _next(0)
10133 		{
10134 			assert(type == ast_number_constant);
10135 			_data.number = value;
10136 		}
10137 
xpath_ast_node(ast_type_t type,xpath_value_type rettype_,xpath_variable * value)10138 		xpath_ast_node(ast_type_t type, xpath_value_type rettype_, xpath_variable* value):
10139 			_type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(0), _right(0), _next(0)
10140 		{
10141 			assert(type == ast_variable);
10142 			_data.variable = value;
10143 		}
10144 
xpath_ast_node(ast_type_t type,xpath_value_type rettype_,xpath_ast_node * left=0,xpath_ast_node * right=0)10145 		xpath_ast_node(ast_type_t type, xpath_value_type rettype_, xpath_ast_node* left = 0, xpath_ast_node* right = 0):
10146 			_type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(left), _right(right), _next(0)
10147 		{
10148 		}
10149 
xpath_ast_node(ast_type_t type,xpath_ast_node * left,axis_t axis,nodetest_t test,const char_t * contents)10150 		xpath_ast_node(ast_type_t type, xpath_ast_node* left, axis_t axis, nodetest_t test, const char_t* contents):
10151 			_type(static_cast<char>(type)), _rettype(xpath_type_node_set), _axis(static_cast<char>(axis)), _test(static_cast<char>(test)), _left(left), _right(0), _next(0)
10152 		{
10153 			assert(type == ast_step);
10154 			_data.nodetest = contents;
10155 		}
10156 
xpath_ast_node(ast_type_t type,xpath_ast_node * left,xpath_ast_node * right,predicate_t test)10157 		xpath_ast_node(ast_type_t type, xpath_ast_node* left, xpath_ast_node* right, predicate_t test):
10158 			_type(static_cast<char>(type)), _rettype(xpath_type_node_set), _axis(0), _test(static_cast<char>(test)), _left(left), _right(right), _next(0)
10159 		{
10160 			assert(type == ast_filter || type == ast_predicate);
10161 		}
10162 
set_next(xpath_ast_node * value)10163 		void set_next(xpath_ast_node* value)
10164 		{
10165 			_next = value;
10166 		}
10167 
set_right(xpath_ast_node * value)10168 		void set_right(xpath_ast_node* value)
10169 		{
10170 			_right = value;
10171 		}
10172 
eval_boolean(const xpath_context & c,const xpath_stack & stack)10173 		bool eval_boolean(const xpath_context& c, const xpath_stack& stack)
10174 		{
10175 			switch (_type)
10176 			{
10177 			case ast_op_or:
10178 				return _left->eval_boolean(c, stack) || _right->eval_boolean(c, stack);
10179 
10180 			case ast_op_and:
10181 				return _left->eval_boolean(c, stack) && _right->eval_boolean(c, stack);
10182 
10183 			case ast_op_equal:
10184 				return compare_eq(_left, _right, c, stack, equal_to());
10185 
10186 			case ast_op_not_equal:
10187 				return compare_eq(_left, _right, c, stack, not_equal_to());
10188 
10189 			case ast_op_less:
10190 				return compare_rel(_left, _right, c, stack, less());
10191 
10192 			case ast_op_greater:
10193 				return compare_rel(_right, _left, c, stack, less());
10194 
10195 			case ast_op_less_or_equal:
10196 				return compare_rel(_left, _right, c, stack, less_equal());
10197 
10198 			case ast_op_greater_or_equal:
10199 				return compare_rel(_right, _left, c, stack, less_equal());
10200 
10201 			case ast_func_starts_with:
10202 			{
10203 				xpath_allocator_capture cr(stack.result);
10204 
10205 				xpath_string lr = _left->eval_string(c, stack);
10206 				xpath_string rr = _right->eval_string(c, stack);
10207 
10208 				return starts_with(lr.c_str(), rr.c_str());
10209 			}
10210 
10211 			case ast_func_contains:
10212 			{
10213 				xpath_allocator_capture cr(stack.result);
10214 
10215 				xpath_string lr = _left->eval_string(c, stack);
10216 				xpath_string rr = _right->eval_string(c, stack);
10217 
10218 				return find_substring(lr.c_str(), rr.c_str()) != 0;
10219 			}
10220 
10221 			case ast_func_boolean:
10222 				return _left->eval_boolean(c, stack);
10223 
10224 			case ast_func_not:
10225 				return !_left->eval_boolean(c, stack);
10226 
10227 			case ast_func_true:
10228 				return true;
10229 
10230 			case ast_func_false:
10231 				return false;
10232 
10233 			case ast_func_lang:
10234 			{
10235 				if (c.n.attribute()) return false;
10236 
10237 				xpath_allocator_capture cr(stack.result);
10238 
10239 				xpath_string lang = _left->eval_string(c, stack);
10240 
10241 				for (xml_node n = c.n.node(); n; n = n.parent())
10242 				{
10243 					xml_attribute a = n.attribute(PUGIXML_TEXT("xml:lang"));
10244 
10245 					if (a)
10246 					{
10247 						const char_t* value = a.value();
10248 
10249 						// strnicmp / strncasecmp is not portable
10250 						for (const char_t* lit = lang.c_str(); *lit; ++lit)
10251 						{
10252 							if (tolower_ascii(*lit) != tolower_ascii(*value)) return false;
10253 							++value;
10254 						}
10255 
10256 						return *value == 0 || *value == '-';
10257 					}
10258 				}
10259 
10260 				return false;
10261 			}
10262 
10263 			case ast_opt_compare_attribute:
10264 			{
10265 				const char_t* value = (_right->_type == ast_string_constant) ? _right->_data.string : _right->_data.variable->get_string();
10266 
10267 				xml_attribute attr = c.n.node().attribute(_left->_data.nodetest);
10268 
10269 				return attr && strequal(attr.value(), value) && is_xpath_attribute(attr.name());
10270 			}
10271 
10272 			case ast_variable:
10273 			{
10274 				assert(_rettype == _data.variable->type());
10275 
10276 				if (_rettype == xpath_type_boolean)
10277 					return _data.variable->get_boolean();
10278 			}
10279 
10280 			// fallthrough
10281 			default:
10282 			{
10283 				switch (_rettype)
10284 				{
10285 				case xpath_type_number:
10286 					return convert_number_to_boolean(eval_number(c, stack));
10287 
10288 				case xpath_type_string:
10289 				{
10290 					xpath_allocator_capture cr(stack.result);
10291 
10292 					return !eval_string(c, stack).empty();
10293 				}
10294 
10295 				case xpath_type_node_set:
10296 				{
10297 					xpath_allocator_capture cr(stack.result);
10298 
10299 					return !eval_node_set(c, stack, nodeset_eval_any).empty();
10300 				}
10301 
10302 				default:
10303 					assert(false && "Wrong expression for return type boolean"); // unreachable
10304 					return false;
10305 				}
10306 			}
10307 			}
10308 		}
10309 
eval_number(const xpath_context & c,const xpath_stack & stack)10310 		double eval_number(const xpath_context& c, const xpath_stack& stack)
10311 		{
10312 			switch (_type)
10313 			{
10314 			case ast_op_add:
10315 				return _left->eval_number(c, stack) + _right->eval_number(c, stack);
10316 
10317 			case ast_op_subtract:
10318 				return _left->eval_number(c, stack) - _right->eval_number(c, stack);
10319 
10320 			case ast_op_multiply:
10321 				return _left->eval_number(c, stack) * _right->eval_number(c, stack);
10322 
10323 			case ast_op_divide:
10324 				return _left->eval_number(c, stack) / _right->eval_number(c, stack);
10325 
10326 			case ast_op_mod:
10327 				return fmod(_left->eval_number(c, stack), _right->eval_number(c, stack));
10328 
10329 			case ast_op_negate:
10330 				return -_left->eval_number(c, stack);
10331 
10332 			case ast_number_constant:
10333 				return _data.number;
10334 
10335 			case ast_func_last:
10336 				return static_cast<double>(c.size);
10337 
10338 			case ast_func_position:
10339 				return static_cast<double>(c.position);
10340 
10341 			case ast_func_count:
10342 			{
10343 				xpath_allocator_capture cr(stack.result);
10344 
10345 				return static_cast<double>(_left->eval_node_set(c, stack, nodeset_eval_all).size());
10346 			}
10347 
10348 			case ast_func_string_length_0:
10349 			{
10350 				xpath_allocator_capture cr(stack.result);
10351 
10352 				return static_cast<double>(string_value(c.n, stack.result).length());
10353 			}
10354 
10355 			case ast_func_string_length_1:
10356 			{
10357 				xpath_allocator_capture cr(stack.result);
10358 
10359 				return static_cast<double>(_left->eval_string(c, stack).length());
10360 			}
10361 
10362 			case ast_func_number_0:
10363 			{
10364 				xpath_allocator_capture cr(stack.result);
10365 
10366 				return convert_string_to_number(string_value(c.n, stack.result).c_str());
10367 			}
10368 
10369 			case ast_func_number_1:
10370 				return _left->eval_number(c, stack);
10371 
10372 			case ast_func_sum:
10373 			{
10374 				xpath_allocator_capture cr(stack.result);
10375 
10376 				double r = 0;
10377 
10378 				xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_all);
10379 
10380 				for (const xpath_node* it = ns.begin(); it != ns.end(); ++it)
10381 				{
10382 					xpath_allocator_capture cri(stack.result);
10383 
10384 					r += convert_string_to_number(string_value(*it, stack.result).c_str());
10385 				}
10386 
10387 				return r;
10388 			}
10389 
10390 			case ast_func_floor:
10391 			{
10392 				double r = _left->eval_number(c, stack);
10393 
10394 				return r == r ? floor(r) : r;
10395 			}
10396 
10397 			case ast_func_ceiling:
10398 			{
10399 				double r = _left->eval_number(c, stack);
10400 
10401 				return r == r ? ceil(r) : r;
10402 			}
10403 
10404 			case ast_func_round:
10405 				return round_nearest_nzero(_left->eval_number(c, stack));
10406 
10407 			case ast_variable:
10408 			{
10409 				assert(_rettype == _data.variable->type());
10410 
10411 				if (_rettype == xpath_type_number)
10412 					return _data.variable->get_number();
10413 			}
10414 
10415 			// fallthrough
10416 			default:
10417 			{
10418 				switch (_rettype)
10419 				{
10420 				case xpath_type_boolean:
10421 					return eval_boolean(c, stack) ? 1 : 0;
10422 
10423 				case xpath_type_string:
10424 				{
10425 					xpath_allocator_capture cr(stack.result);
10426 
10427 					return convert_string_to_number(eval_string(c, stack).c_str());
10428 				}
10429 
10430 				case xpath_type_node_set:
10431 				{
10432 					xpath_allocator_capture cr(stack.result);
10433 
10434 					return convert_string_to_number(eval_string(c, stack).c_str());
10435 				}
10436 
10437 				default:
10438 					assert(false && "Wrong expression for return type number"); // unreachable
10439 					return 0;
10440 				}
10441 
10442 			}
10443 			}
10444 		}
10445 
eval_string_concat(const xpath_context & c,const xpath_stack & stack)10446 		xpath_string eval_string_concat(const xpath_context& c, const xpath_stack& stack)
10447 		{
10448 			assert(_type == ast_func_concat);
10449 
10450 			xpath_allocator_capture ct(stack.temp);
10451 
10452 			// count the string number
10453 			size_t count = 1;
10454 			for (xpath_ast_node* nc = _right; nc; nc = nc->_next) count++;
10455 
10456 			// allocate a buffer for temporary string objects
10457 			xpath_string* buffer = static_cast<xpath_string*>(stack.temp->allocate(count * sizeof(xpath_string)));
10458 			if (!buffer) return xpath_string();
10459 
10460 			// evaluate all strings to temporary stack
10461 			xpath_stack swapped_stack = {stack.temp, stack.result};
10462 
10463 			buffer[0] = _left->eval_string(c, swapped_stack);
10464 
10465 			size_t pos = 1;
10466 			for (xpath_ast_node* n = _right; n; n = n->_next, ++pos) buffer[pos] = n->eval_string(c, swapped_stack);
10467 			assert(pos == count);
10468 
10469 			// get total length
10470 			size_t length = 0;
10471 			for (size_t i = 0; i < count; ++i) length += buffer[i].length();
10472 
10473 			// create final string
10474 			char_t* result = static_cast<char_t*>(stack.result->allocate((length + 1) * sizeof(char_t)));
10475 			if (!result) return xpath_string();
10476 
10477 			char_t* ri = result;
10478 
10479 			for (size_t j = 0; j < count; ++j)
10480 				for (const char_t* bi = buffer[j].c_str(); *bi; ++bi)
10481 					*ri++ = *bi;
10482 
10483 			*ri = 0;
10484 
10485 			return xpath_string::from_heap_preallocated(result, ri);
10486 		}
10487 
eval_string(const xpath_context & c,const xpath_stack & stack)10488 		xpath_string eval_string(const xpath_context& c, const xpath_stack& stack)
10489 		{
10490 			switch (_type)
10491 			{
10492 			case ast_string_constant:
10493 				return xpath_string::from_const(_data.string);
10494 
10495 			case ast_func_local_name_0:
10496 			{
10497 				xpath_node na = c.n;
10498 
10499 				return xpath_string::from_const(local_name(na));
10500 			}
10501 
10502 			case ast_func_local_name_1:
10503 			{
10504 				xpath_allocator_capture cr(stack.result);
10505 
10506 				xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_first);
10507 				xpath_node na = ns.first();
10508 
10509 				return xpath_string::from_const(local_name(na));
10510 			}
10511 
10512 			case ast_func_name_0:
10513 			{
10514 				xpath_node na = c.n;
10515 
10516 				return xpath_string::from_const(qualified_name(na));
10517 			}
10518 
10519 			case ast_func_name_1:
10520 			{
10521 				xpath_allocator_capture cr(stack.result);
10522 
10523 				xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_first);
10524 				xpath_node na = ns.first();
10525 
10526 				return xpath_string::from_const(qualified_name(na));
10527 			}
10528 
10529 			case ast_func_namespace_uri_0:
10530 			{
10531 				xpath_node na = c.n;
10532 
10533 				return xpath_string::from_const(namespace_uri(na));
10534 			}
10535 
10536 			case ast_func_namespace_uri_1:
10537 			{
10538 				xpath_allocator_capture cr(stack.result);
10539 
10540 				xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_first);
10541 				xpath_node na = ns.first();
10542 
10543 				return xpath_string::from_const(namespace_uri(na));
10544 			}
10545 
10546 			case ast_func_string_0:
10547 				return string_value(c.n, stack.result);
10548 
10549 			case ast_func_string_1:
10550 				return _left->eval_string(c, stack);
10551 
10552 			case ast_func_concat:
10553 				return eval_string_concat(c, stack);
10554 
10555 			case ast_func_substring_before:
10556 			{
10557 				xpath_allocator_capture cr(stack.temp);
10558 
10559 				xpath_stack swapped_stack = {stack.temp, stack.result};
10560 
10561 				xpath_string s = _left->eval_string(c, swapped_stack);
10562 				xpath_string p = _right->eval_string(c, swapped_stack);
10563 
10564 				const char_t* pos = find_substring(s.c_str(), p.c_str());
10565 
10566 				return pos ? xpath_string::from_heap(s.c_str(), pos, stack.result) : xpath_string();
10567 			}
10568 
10569 			case ast_func_substring_after:
10570 			{
10571 				xpath_allocator_capture cr(stack.temp);
10572 
10573 				xpath_stack swapped_stack = {stack.temp, stack.result};
10574 
10575 				xpath_string s = _left->eval_string(c, swapped_stack);
10576 				xpath_string p = _right->eval_string(c, swapped_stack);
10577 
10578 				const char_t* pos = find_substring(s.c_str(), p.c_str());
10579 				if (!pos) return xpath_string();
10580 
10581 				const char_t* rbegin = pos + p.length();
10582 				const char_t* rend = s.c_str() + s.length();
10583 
10584 				return s.uses_heap() ? xpath_string::from_heap(rbegin, rend, stack.result) : xpath_string::from_const(rbegin);
10585 			}
10586 
10587 			case ast_func_substring_2:
10588 			{
10589 				xpath_allocator_capture cr(stack.temp);
10590 
10591 				xpath_stack swapped_stack = {stack.temp, stack.result};
10592 
10593 				xpath_string s = _left->eval_string(c, swapped_stack);
10594 				size_t s_length = s.length();
10595 
10596 				double first = round_nearest(_right->eval_number(c, stack));
10597 
10598 				if (is_nan(first)) return xpath_string(); // NaN
10599 				else if (first >= s_length + 1) return xpath_string();
10600 
10601 				size_t pos = first < 1 ? 1 : static_cast<size_t>(first);
10602 				assert(1 <= pos && pos <= s_length + 1);
10603 
10604 				const char_t* rbegin = s.c_str() + (pos - 1);
10605 				const char_t* rend = s.c_str() + s.length();
10606 
10607 				return s.uses_heap() ? xpath_string::from_heap(rbegin, rend, stack.result) : xpath_string::from_const(rbegin);
10608 			}
10609 
10610 			case ast_func_substring_3:
10611 			{
10612 				xpath_allocator_capture cr(stack.temp);
10613 
10614 				xpath_stack swapped_stack = {stack.temp, stack.result};
10615 
10616 				xpath_string s = _left->eval_string(c, swapped_stack);
10617 				size_t s_length = s.length();
10618 
10619 				double first = round_nearest(_right->eval_number(c, stack));
10620 				double last = first + round_nearest(_right->_next->eval_number(c, stack));
10621 
10622 				if (is_nan(first) || is_nan(last)) return xpath_string();
10623 				else if (first >= s_length + 1) return xpath_string();
10624 				else if (first >= last) return xpath_string();
10625 				else if (last < 1) return xpath_string();
10626 
10627 				size_t pos = first < 1 ? 1 : static_cast<size_t>(first);
10628 				size_t end = last >= s_length + 1 ? s_length + 1 : static_cast<size_t>(last);
10629 
10630 				assert(1 <= pos && pos <= end && end <= s_length + 1);
10631 				const char_t* rbegin = s.c_str() + (pos - 1);
10632 				const char_t* rend = s.c_str() + (end - 1);
10633 
10634 				return (end == s_length + 1 && !s.uses_heap()) ? xpath_string::from_const(rbegin) : xpath_string::from_heap(rbegin, rend, stack.result);
10635 			}
10636 
10637 			case ast_func_normalize_space_0:
10638 			{
10639 				xpath_string s = string_value(c.n, stack.result);
10640 
10641 				char_t* begin = s.data(stack.result);
10642 				if (!begin) return xpath_string();
10643 
10644 				char_t* end = normalize_space(begin);
10645 
10646 				return xpath_string::from_heap_preallocated(begin, end);
10647 			}
10648 
10649 			case ast_func_normalize_space_1:
10650 			{
10651 				xpath_string s = _left->eval_string(c, stack);
10652 
10653 				char_t* begin = s.data(stack.result);
10654 				if (!begin) return xpath_string();
10655 
10656 				char_t* end = normalize_space(begin);
10657 
10658 				return xpath_string::from_heap_preallocated(begin, end);
10659 			}
10660 
10661 			case ast_func_translate:
10662 			{
10663 				xpath_allocator_capture cr(stack.temp);
10664 
10665 				xpath_stack swapped_stack = {stack.temp, stack.result};
10666 
10667 				xpath_string s = _left->eval_string(c, stack);
10668 				xpath_string from = _right->eval_string(c, swapped_stack);
10669 				xpath_string to = _right->_next->eval_string(c, swapped_stack);
10670 
10671 				char_t* begin = s.data(stack.result);
10672 				if (!begin) return xpath_string();
10673 
10674 				char_t* end = translate(begin, from.c_str(), to.c_str(), to.length());
10675 
10676 				return xpath_string::from_heap_preallocated(begin, end);
10677 			}
10678 
10679 			case ast_opt_translate_table:
10680 			{
10681 				xpath_string s = _left->eval_string(c, stack);
10682 
10683 				char_t* begin = s.data(stack.result);
10684 				if (!begin) return xpath_string();
10685 
10686 				char_t* end = translate_table(begin, _data.table);
10687 
10688 				return xpath_string::from_heap_preallocated(begin, end);
10689 			}
10690 
10691 			case ast_variable:
10692 			{
10693 				assert(_rettype == _data.variable->type());
10694 
10695 				if (_rettype == xpath_type_string)
10696 					return xpath_string::from_const(_data.variable->get_string());
10697 			}
10698 
10699 			// fallthrough
10700 			default:
10701 			{
10702 				switch (_rettype)
10703 				{
10704 				case xpath_type_boolean:
10705 					return xpath_string::from_const(eval_boolean(c, stack) ? PUGIXML_TEXT("true") : PUGIXML_TEXT("false"));
10706 
10707 				case xpath_type_number:
10708 					return convert_number_to_string(eval_number(c, stack), stack.result);
10709 
10710 				case xpath_type_node_set:
10711 				{
10712 					xpath_allocator_capture cr(stack.temp);
10713 
10714 					xpath_stack swapped_stack = {stack.temp, stack.result};
10715 
10716 					xpath_node_set_raw ns = eval_node_set(c, swapped_stack, nodeset_eval_first);
10717 					return ns.empty() ? xpath_string() : string_value(ns.first(), stack.result);
10718 				}
10719 
10720 				default:
10721 					assert(false && "Wrong expression for return type string"); // unreachable
10722 					return xpath_string();
10723 				}
10724 			}
10725 			}
10726 		}
10727 
eval_node_set(const xpath_context & c,const xpath_stack & stack,nodeset_eval_t eval)10728 		xpath_node_set_raw eval_node_set(const xpath_context& c, const xpath_stack& stack, nodeset_eval_t eval)
10729 		{
10730 			switch (_type)
10731 			{
10732 			case ast_op_union:
10733 			{
10734 				xpath_allocator_capture cr(stack.temp);
10735 
10736 				xpath_stack swapped_stack = {stack.temp, stack.result};
10737 
10738 				xpath_node_set_raw ls = _left->eval_node_set(c, swapped_stack, eval);
10739 				xpath_node_set_raw rs = _right->eval_node_set(c, stack, eval);
10740 
10741 				// we can optimize merging two sorted sets, but this is a very rare operation, so don't bother
10742 				rs.set_type(xpath_node_set::type_unsorted);
10743 
10744 				rs.append(ls.begin(), ls.end(), stack.result);
10745 				rs.remove_duplicates();
10746 
10747 				return rs;
10748 			}
10749 
10750 			case ast_filter:
10751 			{
10752 				xpath_node_set_raw set = _left->eval_node_set(c, stack, _test == predicate_constant_one ? nodeset_eval_first : nodeset_eval_all);
10753 
10754 				// either expression is a number or it contains position() call; sort by document order
10755 				if (_test != predicate_posinv) set.sort_do();
10756 
10757 				bool once = eval_once(set.type(), eval);
10758 
10759 				apply_predicate(set, 0, stack, once);
10760 
10761 				return set;
10762 			}
10763 
10764 			case ast_func_id:
10765 				return xpath_node_set_raw();
10766 
10767 			case ast_step:
10768 			{
10769 				switch (_axis)
10770 				{
10771 				case axis_ancestor:
10772 					return step_do(c, stack, eval, axis_to_type<axis_ancestor>());
10773 
10774 				case axis_ancestor_or_self:
10775 					return step_do(c, stack, eval, axis_to_type<axis_ancestor_or_self>());
10776 
10777 				case axis_attribute:
10778 					return step_do(c, stack, eval, axis_to_type<axis_attribute>());
10779 
10780 				case axis_child:
10781 					return step_do(c, stack, eval, axis_to_type<axis_child>());
10782 
10783 				case axis_descendant:
10784 					return step_do(c, stack, eval, axis_to_type<axis_descendant>());
10785 
10786 				case axis_descendant_or_self:
10787 					return step_do(c, stack, eval, axis_to_type<axis_descendant_or_self>());
10788 
10789 				case axis_following:
10790 					return step_do(c, stack, eval, axis_to_type<axis_following>());
10791 
10792 				case axis_following_sibling:
10793 					return step_do(c, stack, eval, axis_to_type<axis_following_sibling>());
10794 
10795 				case axis_namespace:
10796 					// namespaced axis is not supported
10797 					return xpath_node_set_raw();
10798 
10799 				case axis_parent:
10800 					return step_do(c, stack, eval, axis_to_type<axis_parent>());
10801 
10802 				case axis_preceding:
10803 					return step_do(c, stack, eval, axis_to_type<axis_preceding>());
10804 
10805 				case axis_preceding_sibling:
10806 					return step_do(c, stack, eval, axis_to_type<axis_preceding_sibling>());
10807 
10808 				case axis_self:
10809 					return step_do(c, stack, eval, axis_to_type<axis_self>());
10810 
10811 				default:
10812 					assert(false && "Unknown axis"); // unreachable
10813 					return xpath_node_set_raw();
10814 				}
10815 			}
10816 
10817 			case ast_step_root:
10818 			{
10819 				assert(!_right); // root step can't have any predicates
10820 
10821 				xpath_node_set_raw ns;
10822 
10823 				ns.set_type(xpath_node_set::type_sorted);
10824 
10825 				if (c.n.node()) ns.push_back(c.n.node().root(), stack.result);
10826 				else if (c.n.attribute()) ns.push_back(c.n.parent().root(), stack.result);
10827 
10828 				return ns;
10829 			}
10830 
10831 			case ast_variable:
10832 			{
10833 				assert(_rettype == _data.variable->type());
10834 
10835 				if (_rettype == xpath_type_node_set)
10836 				{
10837 					const xpath_node_set& s = _data.variable->get_node_set();
10838 
10839 					xpath_node_set_raw ns;
10840 
10841 					ns.set_type(s.type());
10842 					ns.append(s.begin(), s.end(), stack.result);
10843 
10844 					return ns;
10845 				}
10846 			}
10847 
10848 			// fallthrough
10849 			default:
10850 				assert(false && "Wrong expression for return type node set"); // unreachable
10851 				return xpath_node_set_raw();
10852 			}
10853 		}
10854 
optimize(xpath_allocator * alloc)10855 		void optimize(xpath_allocator* alloc)
10856 		{
10857 			if (_left)
10858 				_left->optimize(alloc);
10859 
10860 			if (_right)
10861 				_right->optimize(alloc);
10862 
10863 			if (_next)
10864 				_next->optimize(alloc);
10865 
10866 			optimize_self(alloc);
10867 		}
10868 
optimize_self(xpath_allocator * alloc)10869 		void optimize_self(xpath_allocator* alloc)
10870 		{
10871 			// Rewrite [position()=expr] with [expr]
10872 			// Note that this step has to go before classification to recognize [position()=1]
10873 			if ((_type == ast_filter || _type == ast_predicate) &&
10874 				_right->_type == ast_op_equal && _right->_left->_type == ast_func_position && _right->_right->_rettype == xpath_type_number)
10875 			{
10876 				_right = _right->_right;
10877 			}
10878 
10879 			// Classify filter/predicate ops to perform various optimizations during evaluation
10880 			if (_type == ast_filter || _type == ast_predicate)
10881 			{
10882 				assert(_test == predicate_default);
10883 
10884 				if (_right->_type == ast_number_constant && _right->_data.number == 1.0)
10885 					_test = predicate_constant_one;
10886 				else if (_right->_rettype == xpath_type_number && (_right->_type == ast_number_constant || _right->_type == ast_variable || _right->_type == ast_func_last))
10887 					_test = predicate_constant;
10888 				else if (_right->_rettype != xpath_type_number && _right->is_posinv_expr())
10889 					_test = predicate_posinv;
10890 			}
10891 
10892 			// Rewrite descendant-or-self::node()/child::foo with descendant::foo
10893 			// The former is a full form of //foo, the latter is much faster since it executes the node test immediately
10894 			// Do a similar kind of rewrite for self/descendant/descendant-or-self axes
10895 			// Note that we only rewrite positionally invariant steps (//foo[1] != /descendant::foo[1])
10896 			if (_type == ast_step && (_axis == axis_child || _axis == axis_self || _axis == axis_descendant || _axis == axis_descendant_or_self) && _left &&
10897 				_left->_type == ast_step && _left->_axis == axis_descendant_or_self && _left->_test == nodetest_type_node && !_left->_right &&
10898 				is_posinv_step())
10899 			{
10900 				if (_axis == axis_child || _axis == axis_descendant)
10901 					_axis = axis_descendant;
10902 				else
10903 					_axis = axis_descendant_or_self;
10904 
10905 				_left = _left->_left;
10906 			}
10907 
10908 			// Use optimized lookup table implementation for translate() with constant arguments
10909 			if (_type == ast_func_translate && _right->_type == ast_string_constant && _right->_next->_type == ast_string_constant)
10910 			{
10911 				unsigned char* table = translate_table_generate(alloc, _right->_data.string, _right->_next->_data.string);
10912 
10913 				if (table)
10914 				{
10915 					_type = ast_opt_translate_table;
10916 					_data.table = table;
10917 				}
10918 			}
10919 
10920 			// Use optimized path for @attr = 'value' or @attr = $value
10921 			if (_type == ast_op_equal &&
10922 				_left->_type == ast_step && _left->_axis == axis_attribute && _left->_test == nodetest_name && !_left->_left && !_left->_right &&
10923 				(_right->_type == ast_string_constant || (_right->_type == ast_variable && _right->_rettype == xpath_type_string)))
10924 			{
10925 				_type = ast_opt_compare_attribute;
10926 			}
10927 		}
10928 
is_posinv_expr() const10929 		bool is_posinv_expr() const
10930 		{
10931 			switch (_type)
10932 			{
10933 			case ast_func_position:
10934 			case ast_func_last:
10935 				return false;
10936 
10937 			case ast_string_constant:
10938 			case ast_number_constant:
10939 			case ast_variable:
10940 				return true;
10941 
10942 			case ast_step:
10943 			case ast_step_root:
10944 				return true;
10945 
10946 			case ast_predicate:
10947 			case ast_filter:
10948 				return true;
10949 
10950 			default:
10951 				if (_left && !_left->is_posinv_expr()) return false;
10952 
10953 				for (xpath_ast_node* n = _right; n; n = n->_next)
10954 					if (!n->is_posinv_expr()) return false;
10955 
10956 				return true;
10957 			}
10958 		}
10959 
is_posinv_step() const10960 		bool is_posinv_step() const
10961 		{
10962 			assert(_type == ast_step);
10963 
10964 			for (xpath_ast_node* n = _right; n; n = n->_next)
10965 			{
10966 				assert(n->_type == ast_predicate);
10967 
10968 				if (n->_test != predicate_posinv)
10969 					return false;
10970 			}
10971 
10972 			return true;
10973 		}
10974 
rettype() const10975 		xpath_value_type rettype() const
10976 		{
10977 			return static_cast<xpath_value_type>(_rettype);
10978 		}
10979 	};
10980 
10981 	struct xpath_parser
10982 	{
10983 		xpath_allocator* _alloc;
10984 		xpath_lexer _lexer;
10985 
10986 		const char_t* _query;
10987 		xpath_variable_set* _variables;
10988 
10989 		xpath_parse_result* _result;
10990 
10991 		char_t _scratch[32];
10992 
errorxpath_parser10993 		xpath_ast_node* error(const char* message)
10994 		{
10995 			_result->error = message;
10996 			_result->offset = _lexer.current_pos() - _query;
10997 
10998 			return 0;
10999 		}
11000 
error_oomxpath_parser11001 		xpath_ast_node* error_oom()
11002 		{
11003 			assert(_alloc->_error);
11004 			*_alloc->_error = true;
11005 
11006 			return 0;
11007 		}
11008 
alloc_nodexpath_parser11009 		void* alloc_node()
11010 		{
11011 			return _alloc->allocate(sizeof(xpath_ast_node));
11012 		}
11013 
alloc_nodexpath_parser11014 		xpath_ast_node* alloc_node(ast_type_t type, xpath_value_type rettype, const char_t* value)
11015 		{
11016 			void* memory = alloc_node();
11017 			return memory ? new (memory) xpath_ast_node(type, rettype, value) : 0;
11018 		}
11019 
alloc_nodexpath_parser11020 		xpath_ast_node* alloc_node(ast_type_t type, xpath_value_type rettype, double value)
11021 		{
11022 			void* memory = alloc_node();
11023 			return memory ? new (memory) xpath_ast_node(type, rettype, value) : 0;
11024 		}
11025 
alloc_nodexpath_parser11026 		xpath_ast_node* alloc_node(ast_type_t type, xpath_value_type rettype, xpath_variable* value)
11027 		{
11028 			void* memory = alloc_node();
11029 			return memory ? new (memory) xpath_ast_node(type, rettype, value) : 0;
11030 		}
11031 
alloc_nodexpath_parser11032 		xpath_ast_node* alloc_node(ast_type_t type, xpath_value_type rettype, xpath_ast_node* left = 0, xpath_ast_node* right = 0)
11033 		{
11034 			void* memory = alloc_node();
11035 			return memory ? new (memory) xpath_ast_node(type, rettype, left, right) : 0;
11036 		}
11037 
alloc_nodexpath_parser11038 		xpath_ast_node* alloc_node(ast_type_t type, xpath_ast_node* left, axis_t axis, nodetest_t test, const char_t* contents)
11039 		{
11040 			void* memory = alloc_node();
11041 			return memory ? new (memory) xpath_ast_node(type, left, axis, test, contents) : 0;
11042 		}
11043 
alloc_nodexpath_parser11044 		xpath_ast_node* alloc_node(ast_type_t type, xpath_ast_node* left, xpath_ast_node* right, predicate_t test)
11045 		{
11046 			void* memory = alloc_node();
11047 			return memory ? new (memory) xpath_ast_node(type, left, right, test) : 0;
11048 		}
11049 
alloc_stringxpath_parser11050 		const char_t* alloc_string(const xpath_lexer_string& value)
11051 		{
11052 			if (!value.begin)
11053 				return PUGIXML_TEXT("");
11054 
11055 			size_t length = static_cast<size_t>(value.end - value.begin);
11056 
11057 			char_t* c = static_cast<char_t*>(_alloc->allocate((length + 1) * sizeof(char_t)));
11058 			if (!c) return 0;
11059 
11060 			memcpy(c, value.begin, length * sizeof(char_t));
11061 			c[length] = 0;
11062 
11063 			return c;
11064 		}
11065 
parse_functionxpath_parser11066 		xpath_ast_node* parse_function(const xpath_lexer_string& name, size_t argc, xpath_ast_node* args[2])
11067 		{
11068 			switch (name.begin[0])
11069 			{
11070 			case 'b':
11071 				if (name == PUGIXML_TEXT("boolean") && argc == 1)
11072 					return alloc_node(ast_func_boolean, xpath_type_boolean, args[0]);
11073 
11074 				break;
11075 
11076 			case 'c':
11077 				if (name == PUGIXML_TEXT("count") && argc == 1)
11078 				{
11079 					if (args[0]->rettype() != xpath_type_node_set) return error("Function has to be applied to node set");
11080 					return alloc_node(ast_func_count, xpath_type_number, args[0]);
11081 				}
11082 				else if (name == PUGIXML_TEXT("contains") && argc == 2)
11083 					return alloc_node(ast_func_contains, xpath_type_boolean, args[0], args[1]);
11084 				else if (name == PUGIXML_TEXT("concat") && argc >= 2)
11085 					return alloc_node(ast_func_concat, xpath_type_string, args[0], args[1]);
11086 				else if (name == PUGIXML_TEXT("ceiling") && argc == 1)
11087 					return alloc_node(ast_func_ceiling, xpath_type_number, args[0]);
11088 
11089 				break;
11090 
11091 			case 'f':
11092 				if (name == PUGIXML_TEXT("false") && argc == 0)
11093 					return alloc_node(ast_func_false, xpath_type_boolean);
11094 				else if (name == PUGIXML_TEXT("floor") && argc == 1)
11095 					return alloc_node(ast_func_floor, xpath_type_number, args[0]);
11096 
11097 				break;
11098 
11099 			case 'i':
11100 				if (name == PUGIXML_TEXT("id") && argc == 1)
11101 					return alloc_node(ast_func_id, xpath_type_node_set, args[0]);
11102 
11103 				break;
11104 
11105 			case 'l':
11106 				if (name == PUGIXML_TEXT("last") && argc == 0)
11107 					return alloc_node(ast_func_last, xpath_type_number);
11108 				else if (name == PUGIXML_TEXT("lang") && argc == 1)
11109 					return alloc_node(ast_func_lang, xpath_type_boolean, args[0]);
11110 				else if (name == PUGIXML_TEXT("local-name") && argc <= 1)
11111 				{
11112 					if (argc == 1 && args[0]->rettype() != xpath_type_node_set) return error("Function has to be applied to node set");
11113 					return alloc_node(argc == 0 ? ast_func_local_name_0 : ast_func_local_name_1, xpath_type_string, args[0]);
11114 				}
11115 
11116 				break;
11117 
11118 			case 'n':
11119 				if (name == PUGIXML_TEXT("name") && argc <= 1)
11120 				{
11121 					if (argc == 1 && args[0]->rettype() != xpath_type_node_set) return error("Function has to be applied to node set");
11122 					return alloc_node(argc == 0 ? ast_func_name_0 : ast_func_name_1, xpath_type_string, args[0]);
11123 				}
11124 				else if (name == PUGIXML_TEXT("namespace-uri") && argc <= 1)
11125 				{
11126 					if (argc == 1 && args[0]->rettype() != xpath_type_node_set) return error("Function has to be applied to node set");
11127 					return alloc_node(argc == 0 ? ast_func_namespace_uri_0 : ast_func_namespace_uri_1, xpath_type_string, args[0]);
11128 				}
11129 				else if (name == PUGIXML_TEXT("normalize-space") && argc <= 1)
11130 					return alloc_node(argc == 0 ? ast_func_normalize_space_0 : ast_func_normalize_space_1, xpath_type_string, args[0], args[1]);
11131 				else if (name == PUGIXML_TEXT("not") && argc == 1)
11132 					return alloc_node(ast_func_not, xpath_type_boolean, args[0]);
11133 				else if (name == PUGIXML_TEXT("number") && argc <= 1)
11134 					return alloc_node(argc == 0 ? ast_func_number_0 : ast_func_number_1, xpath_type_number, args[0]);
11135 
11136 				break;
11137 
11138 			case 'p':
11139 				if (name == PUGIXML_TEXT("position") && argc == 0)
11140 					return alloc_node(ast_func_position, xpath_type_number);
11141 
11142 				break;
11143 
11144 			case 'r':
11145 				if (name == PUGIXML_TEXT("round") && argc == 1)
11146 					return alloc_node(ast_func_round, xpath_type_number, args[0]);
11147 
11148 				break;
11149 
11150 			case 's':
11151 				if (name == PUGIXML_TEXT("string") && argc <= 1)
11152 					return alloc_node(argc == 0 ? ast_func_string_0 : ast_func_string_1, xpath_type_string, args[0]);
11153 				else if (name == PUGIXML_TEXT("string-length") && argc <= 1)
11154 					return alloc_node(argc == 0 ? ast_func_string_length_0 : ast_func_string_length_1, xpath_type_number, args[0]);
11155 				else if (name == PUGIXML_TEXT("starts-with") && argc == 2)
11156 					return alloc_node(ast_func_starts_with, xpath_type_boolean, args[0], args[1]);
11157 				else if (name == PUGIXML_TEXT("substring-before") && argc == 2)
11158 					return alloc_node(ast_func_substring_before, xpath_type_string, args[0], args[1]);
11159 				else if (name == PUGIXML_TEXT("substring-after") && argc == 2)
11160 					return alloc_node(ast_func_substring_after, xpath_type_string, args[0], args[1]);
11161 				else if (name == PUGIXML_TEXT("substring") && (argc == 2 || argc == 3))
11162 					return alloc_node(argc == 2 ? ast_func_substring_2 : ast_func_substring_3, xpath_type_string, args[0], args[1]);
11163 				else if (name == PUGIXML_TEXT("sum") && argc == 1)
11164 				{
11165 					if (args[0]->rettype() != xpath_type_node_set) return error("Function has to be applied to node set");
11166 					return alloc_node(ast_func_sum, xpath_type_number, args[0]);
11167 				}
11168 
11169 				break;
11170 
11171 			case 't':
11172 				if (name == PUGIXML_TEXT("translate") && argc == 3)
11173 					return alloc_node(ast_func_translate, xpath_type_string, args[0], args[1]);
11174 				else if (name == PUGIXML_TEXT("true") && argc == 0)
11175 					return alloc_node(ast_func_true, xpath_type_boolean);
11176 
11177 				break;
11178 
11179 			default:
11180 				break;
11181 			}
11182 
11183 			return error("Unrecognized function or wrong parameter count");
11184 		}
11185 
parse_axis_namexpath_parser11186 		axis_t parse_axis_name(const xpath_lexer_string& name, bool& specified)
11187 		{
11188 			specified = true;
11189 
11190 			switch (name.begin[0])
11191 			{
11192 			case 'a':
11193 				if (name == PUGIXML_TEXT("ancestor"))
11194 					return axis_ancestor;
11195 				else if (name == PUGIXML_TEXT("ancestor-or-self"))
11196 					return axis_ancestor_or_self;
11197 				else if (name == PUGIXML_TEXT("attribute"))
11198 					return axis_attribute;
11199 
11200 				break;
11201 
11202 			case 'c':
11203 				if (name == PUGIXML_TEXT("child"))
11204 					return axis_child;
11205 
11206 				break;
11207 
11208 			case 'd':
11209 				if (name == PUGIXML_TEXT("descendant"))
11210 					return axis_descendant;
11211 				else if (name == PUGIXML_TEXT("descendant-or-self"))
11212 					return axis_descendant_or_self;
11213 
11214 				break;
11215 
11216 			case 'f':
11217 				if (name == PUGIXML_TEXT("following"))
11218 					return axis_following;
11219 				else if (name == PUGIXML_TEXT("following-sibling"))
11220 					return axis_following_sibling;
11221 
11222 				break;
11223 
11224 			case 'n':
11225 				if (name == PUGIXML_TEXT("namespace"))
11226 					return axis_namespace;
11227 
11228 				break;
11229 
11230 			case 'p':
11231 				if (name == PUGIXML_TEXT("parent"))
11232 					return axis_parent;
11233 				else if (name == PUGIXML_TEXT("preceding"))
11234 					return axis_preceding;
11235 				else if (name == PUGIXML_TEXT("preceding-sibling"))
11236 					return axis_preceding_sibling;
11237 
11238 				break;
11239 
11240 			case 's':
11241 				if (name == PUGIXML_TEXT("self"))
11242 					return axis_self;
11243 
11244 				break;
11245 
11246 			default:
11247 				break;
11248 			}
11249 
11250 			specified = false;
11251 			return axis_child;
11252 		}
11253 
parse_node_test_typexpath_parser11254 		nodetest_t parse_node_test_type(const xpath_lexer_string& name)
11255 		{
11256 			switch (name.begin[0])
11257 			{
11258 			case 'c':
11259 				if (name == PUGIXML_TEXT("comment"))
11260 					return nodetest_type_comment;
11261 
11262 				break;
11263 
11264 			case 'n':
11265 				if (name == PUGIXML_TEXT("node"))
11266 					return nodetest_type_node;
11267 
11268 				break;
11269 
11270 			case 'p':
11271 				if (name == PUGIXML_TEXT("processing-instruction"))
11272 					return nodetest_type_pi;
11273 
11274 				break;
11275 
11276 			case 't':
11277 				if (name == PUGIXML_TEXT("text"))
11278 					return nodetest_type_text;
11279 
11280 				break;
11281 
11282 			default:
11283 				break;
11284 			}
11285 
11286 			return nodetest_none;
11287 		}
11288 
11289 		// PrimaryExpr ::= VariableReference | '(' Expr ')' | Literal | Number | FunctionCall
parse_primary_expressionxpath_parser11290 		xpath_ast_node* parse_primary_expression()
11291 		{
11292 			switch (_lexer.current())
11293 			{
11294 			case lex_var_ref:
11295 			{
11296 				xpath_lexer_string name = _lexer.contents();
11297 
11298 				if (!_variables)
11299 					return error("Unknown variable: variable set is not provided");
11300 
11301 				xpath_variable* var = 0;
11302 				if (!get_variable_scratch(_scratch, _variables, name.begin, name.end, &var))
11303 					return error_oom();
11304 
11305 				if (!var)
11306 					return error("Unknown variable: variable set does not contain the given name");
11307 
11308 				_lexer.next();
11309 
11310 				return alloc_node(ast_variable, var->type(), var);
11311 			}
11312 
11313 			case lex_open_brace:
11314 			{
11315 				_lexer.next();
11316 
11317 				xpath_ast_node* n = parse_expression();
11318 				if (!n) return 0;
11319 
11320 				if (_lexer.current() != lex_close_brace)
11321 					return error("Expected ')' to match an opening '('");
11322 
11323 				_lexer.next();
11324 
11325 				return n;
11326 			}
11327 
11328 			case lex_quoted_string:
11329 			{
11330 				const char_t* value = alloc_string(_lexer.contents());
11331 				if (!value) return 0;
11332 
11333 				_lexer.next();
11334 
11335 				return alloc_node(ast_string_constant, xpath_type_string, value);
11336 			}
11337 
11338 			case lex_number:
11339 			{
11340 				double value = 0;
11341 
11342 				if (!convert_string_to_number_scratch(_scratch, _lexer.contents().begin, _lexer.contents().end, &value))
11343 					return error_oom();
11344 
11345 				_lexer.next();
11346 
11347 				return alloc_node(ast_number_constant, xpath_type_number, value);
11348 			}
11349 
11350 			case lex_string:
11351 			{
11352 				xpath_ast_node* args[2] = {0};
11353 				size_t argc = 0;
11354 
11355 				xpath_lexer_string function = _lexer.contents();
11356 				_lexer.next();
11357 
11358 				xpath_ast_node* last_arg = 0;
11359 
11360 				if (_lexer.current() != lex_open_brace)
11361 					return error("Unrecognized function call");
11362 				_lexer.next();
11363 
11364 				while (_lexer.current() != lex_close_brace)
11365 				{
11366 					if (argc > 0)
11367 					{
11368 						if (_lexer.current() != lex_comma)
11369 							return error("No comma between function arguments");
11370 						_lexer.next();
11371 					}
11372 
11373 					xpath_ast_node* n = parse_expression();
11374 					if (!n) return 0;
11375 
11376 					if (argc < 2) args[argc] = n;
11377 					else last_arg->set_next(n);
11378 
11379 					argc++;
11380 					last_arg = n;
11381 				}
11382 
11383 				_lexer.next();
11384 
11385 				return parse_function(function, argc, args);
11386 			}
11387 
11388 			default:
11389 				return error("Unrecognizable primary expression");
11390 			}
11391 		}
11392 
11393 		// FilterExpr ::= PrimaryExpr | FilterExpr Predicate
11394 		// Predicate ::= '[' PredicateExpr ']'
11395 		// PredicateExpr ::= Expr
parse_filter_expressionxpath_parser11396 		xpath_ast_node* parse_filter_expression()
11397 		{
11398 			xpath_ast_node* n = parse_primary_expression();
11399 			if (!n) return 0;
11400 
11401 			while (_lexer.current() == lex_open_square_brace)
11402 			{
11403 				_lexer.next();
11404 
11405 				if (n->rettype() != xpath_type_node_set)
11406 					return error("Predicate has to be applied to node set");
11407 
11408 				xpath_ast_node* expr = parse_expression();
11409 				if (!expr) return 0;
11410 
11411 				n = alloc_node(ast_filter, n, expr, predicate_default);
11412 				if (!n) return 0;
11413 
11414 				if (_lexer.current() != lex_close_square_brace)
11415 					return error("Expected ']' to match an opening '['");
11416 
11417 				_lexer.next();
11418 			}
11419 
11420 			return n;
11421 		}
11422 
11423 		// Step ::= AxisSpecifier NodeTest Predicate* | AbbreviatedStep
11424 		// AxisSpecifier ::= AxisName '::' | '@'?
11425 		// NodeTest ::= NameTest | NodeType '(' ')' | 'processing-instruction' '(' Literal ')'
11426 		// NameTest ::= '*' | NCName ':' '*' | QName
11427 		// AbbreviatedStep ::= '.' | '..'
parse_stepxpath_parser11428 		xpath_ast_node* parse_step(xpath_ast_node* set)
11429 		{
11430 			if (set && set->rettype() != xpath_type_node_set)
11431 				return error("Step has to be applied to node set");
11432 
11433 			bool axis_specified = false;
11434 			axis_t axis = axis_child; // implied child axis
11435 
11436 			if (_lexer.current() == lex_axis_attribute)
11437 			{
11438 				axis = axis_attribute;
11439 				axis_specified = true;
11440 
11441 				_lexer.next();
11442 			}
11443 			else if (_lexer.current() == lex_dot)
11444 			{
11445 				_lexer.next();
11446 
11447 				if (_lexer.current() == lex_open_square_brace)
11448 					return error("Predicates are not allowed after an abbreviated step");
11449 
11450 				return alloc_node(ast_step, set, axis_self, nodetest_type_node, 0);
11451 			}
11452 			else if (_lexer.current() == lex_double_dot)
11453 			{
11454 				_lexer.next();
11455 
11456 				if (_lexer.current() == lex_open_square_brace)
11457 					return error("Predicates are not allowed after an abbreviated step");
11458 
11459 				return alloc_node(ast_step, set, axis_parent, nodetest_type_node, 0);
11460 			}
11461 
11462 			nodetest_t nt_type = nodetest_none;
11463 			xpath_lexer_string nt_name;
11464 
11465 			if (_lexer.current() == lex_string)
11466 			{
11467 				// node name test
11468 				nt_name = _lexer.contents();
11469 				_lexer.next();
11470 
11471 				// was it an axis name?
11472 				if (_lexer.current() == lex_double_colon)
11473 				{
11474 					// parse axis name
11475 					if (axis_specified)
11476 						return error("Two axis specifiers in one step");
11477 
11478 					axis = parse_axis_name(nt_name, axis_specified);
11479 
11480 					if (!axis_specified)
11481 						return error("Unknown axis");
11482 
11483 					// read actual node test
11484 					_lexer.next();
11485 
11486 					if (_lexer.current() == lex_multiply)
11487 					{
11488 						nt_type = nodetest_all;
11489 						nt_name = xpath_lexer_string();
11490 						_lexer.next();
11491 					}
11492 					else if (_lexer.current() == lex_string)
11493 					{
11494 						nt_name = _lexer.contents();
11495 						_lexer.next();
11496 					}
11497 					else
11498 					{
11499 						return error("Unrecognized node test");
11500 					}
11501 				}
11502 
11503 				if (nt_type == nodetest_none)
11504 				{
11505 					// node type test or processing-instruction
11506 					if (_lexer.current() == lex_open_brace)
11507 					{
11508 						_lexer.next();
11509 
11510 						if (_lexer.current() == lex_close_brace)
11511 						{
11512 							_lexer.next();
11513 
11514 							nt_type = parse_node_test_type(nt_name);
11515 
11516 							if (nt_type == nodetest_none)
11517 								return error("Unrecognized node type");
11518 
11519 							nt_name = xpath_lexer_string();
11520 						}
11521 						else if (nt_name == PUGIXML_TEXT("processing-instruction"))
11522 						{
11523 							if (_lexer.current() != lex_quoted_string)
11524 								return error("Only literals are allowed as arguments to processing-instruction()");
11525 
11526 							nt_type = nodetest_pi;
11527 							nt_name = _lexer.contents();
11528 							_lexer.next();
11529 
11530 							if (_lexer.current() != lex_close_brace)
11531 								return error("Unmatched brace near processing-instruction()");
11532 							_lexer.next();
11533 						}
11534 						else
11535 						{
11536 							return error("Unmatched brace near node type test");
11537 						}
11538 					}
11539 					// QName or NCName:*
11540 					else
11541 					{
11542 						if (nt_name.end - nt_name.begin > 2 && nt_name.end[-2] == ':' && nt_name.end[-1] == '*') // NCName:*
11543 						{
11544 							nt_name.end--; // erase *
11545 
11546 							nt_type = nodetest_all_in_namespace;
11547 						}
11548 						else
11549 						{
11550 							nt_type = nodetest_name;
11551 						}
11552 					}
11553 				}
11554 			}
11555 			else if (_lexer.current() == lex_multiply)
11556 			{
11557 				nt_type = nodetest_all;
11558 				_lexer.next();
11559 			}
11560 			else
11561 			{
11562 				return error("Unrecognized node test");
11563 			}
11564 
11565 			const char_t* nt_name_copy = alloc_string(nt_name);
11566 			if (!nt_name_copy) return 0;
11567 
11568 			xpath_ast_node* n = alloc_node(ast_step, set, axis, nt_type, nt_name_copy);
11569 			if (!n) return 0;
11570 
11571 			xpath_ast_node* last = 0;
11572 
11573 			while (_lexer.current() == lex_open_square_brace)
11574 			{
11575 				_lexer.next();
11576 
11577 				xpath_ast_node* expr = parse_expression();
11578 				if (!expr) return 0;
11579 
11580 				xpath_ast_node* pred = alloc_node(ast_predicate, 0, expr, predicate_default);
11581 				if (!pred) return 0;
11582 
11583 				if (_lexer.current() != lex_close_square_brace)
11584 					return error("Expected ']' to match an opening '['");
11585 				_lexer.next();
11586 
11587 				if (last) last->set_next(pred);
11588 				else n->set_right(pred);
11589 
11590 				last = pred;
11591 			}
11592 
11593 			return n;
11594 		}
11595 
11596 		// RelativeLocationPath ::= Step | RelativeLocationPath '/' Step | RelativeLocationPath '//' Step
parse_relative_location_pathxpath_parser11597 		xpath_ast_node* parse_relative_location_path(xpath_ast_node* set)
11598 		{
11599 			xpath_ast_node* n = parse_step(set);
11600 			if (!n) return 0;
11601 
11602 			while (_lexer.current() == lex_slash || _lexer.current() == lex_double_slash)
11603 			{
11604 				lexeme_t l = _lexer.current();
11605 				_lexer.next();
11606 
11607 				if (l == lex_double_slash)
11608 				{
11609 					n = alloc_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0);
11610 					if (!n) return 0;
11611 				}
11612 
11613 				n = parse_step(n);
11614 				if (!n) return 0;
11615 			}
11616 
11617 			return n;
11618 		}
11619 
11620 		// LocationPath ::= RelativeLocationPath | AbsoluteLocationPath
11621 		// AbsoluteLocationPath ::= '/' RelativeLocationPath? | '//' RelativeLocationPath
parse_location_pathxpath_parser11622 		xpath_ast_node* parse_location_path()
11623 		{
11624 			if (_lexer.current() == lex_slash)
11625 			{
11626 				_lexer.next();
11627 
11628 				xpath_ast_node* n = alloc_node(ast_step_root, xpath_type_node_set);
11629 				if (!n) return 0;
11630 
11631 				// relative location path can start from axis_attribute, dot, double_dot, multiply and string lexemes; any other lexeme means standalone root path
11632 				lexeme_t l = _lexer.current();
11633 
11634 				if (l == lex_string || l == lex_axis_attribute || l == lex_dot || l == lex_double_dot || l == lex_multiply)
11635 					return parse_relative_location_path(n);
11636 				else
11637 					return n;
11638 			}
11639 			else if (_lexer.current() == lex_double_slash)
11640 			{
11641 				_lexer.next();
11642 
11643 				xpath_ast_node* n = alloc_node(ast_step_root, xpath_type_node_set);
11644 				if (!n) return 0;
11645 
11646 				n = alloc_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0);
11647 				if (!n) return 0;
11648 
11649 				return parse_relative_location_path(n);
11650 			}
11651 
11652 			// else clause moved outside of if because of bogus warning 'control may reach end of non-void function being inlined' in gcc 4.0.1
11653 			return parse_relative_location_path(0);
11654 		}
11655 
11656 		// PathExpr ::= LocationPath
11657 		//				| FilterExpr
11658 		//				| FilterExpr '/' RelativeLocationPath
11659 		//				| FilterExpr '//' RelativeLocationPath
11660 		// UnionExpr ::= PathExpr | UnionExpr '|' PathExpr
11661 		// UnaryExpr ::= UnionExpr | '-' UnaryExpr
parse_path_or_unary_expressionxpath_parser11662 		xpath_ast_node* parse_path_or_unary_expression()
11663 		{
11664 			// Clarification.
11665 			// PathExpr begins with either LocationPath or FilterExpr.
11666 			// FilterExpr begins with PrimaryExpr
11667 			// PrimaryExpr begins with '$' in case of it being a variable reference,
11668 			// '(' in case of it being an expression, string literal, number constant or
11669 			// function call.
11670 			if (_lexer.current() == lex_var_ref || _lexer.current() == lex_open_brace ||
11671 				_lexer.current() == lex_quoted_string || _lexer.current() == lex_number ||
11672 				_lexer.current() == lex_string)
11673 			{
11674 				if (_lexer.current() == lex_string)
11675 				{
11676 					// This is either a function call, or not - if not, we shall proceed with location path
11677 					const char_t* state = _lexer.state();
11678 
11679 					while (PUGI__IS_CHARTYPE(*state, ct_space)) ++state;
11680 
11681 					if (*state != '(')
11682 						return parse_location_path();
11683 
11684 					// This looks like a function call; however this still can be a node-test. Check it.
11685 					if (parse_node_test_type(_lexer.contents()) != nodetest_none)
11686 						return parse_location_path();
11687 				}
11688 
11689 				xpath_ast_node* n = parse_filter_expression();
11690 				if (!n) return 0;
11691 
11692 				if (_lexer.current() == lex_slash || _lexer.current() == lex_double_slash)
11693 				{
11694 					lexeme_t l = _lexer.current();
11695 					_lexer.next();
11696 
11697 					if (l == lex_double_slash)
11698 					{
11699 						if (n->rettype() != xpath_type_node_set)
11700 							return error("Step has to be applied to node set");
11701 
11702 						n = alloc_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0);
11703 						if (!n) return 0;
11704 					}
11705 
11706 					// select from location path
11707 					return parse_relative_location_path(n);
11708 				}
11709 
11710 				return n;
11711 			}
11712 			else if (_lexer.current() == lex_minus)
11713 			{
11714 				_lexer.next();
11715 
11716 				// precedence 7+ - only parses union expressions
11717 				xpath_ast_node* n = parse_expression(7);
11718 				if (!n) return 0;
11719 
11720 				return alloc_node(ast_op_negate, xpath_type_number, n);
11721 			}
11722 			else
11723 			{
11724 				return parse_location_path();
11725 			}
11726 		}
11727 
11728 		struct binary_op_t
11729 		{
11730 			ast_type_t asttype;
11731 			xpath_value_type rettype;
11732 			int precedence;
11733 
binary_op_txpath_parser::binary_op_t11734 			binary_op_t(): asttype(ast_unknown), rettype(xpath_type_none), precedence(0)
11735 			{
11736 			}
11737 
binary_op_txpath_parser::binary_op_t11738 			binary_op_t(ast_type_t asttype_, xpath_value_type rettype_, int precedence_): asttype(asttype_), rettype(rettype_), precedence(precedence_)
11739 			{
11740 			}
11741 
parsexpath_parser::binary_op_t11742 			static binary_op_t parse(xpath_lexer& lexer)
11743 			{
11744 				switch (lexer.current())
11745 				{
11746 				case lex_string:
11747 					if (lexer.contents() == PUGIXML_TEXT("or"))
11748 						return binary_op_t(ast_op_or, xpath_type_boolean, 1);
11749 					else if (lexer.contents() == PUGIXML_TEXT("and"))
11750 						return binary_op_t(ast_op_and, xpath_type_boolean, 2);
11751 					else if (lexer.contents() == PUGIXML_TEXT("div"))
11752 						return binary_op_t(ast_op_divide, xpath_type_number, 6);
11753 					else if (lexer.contents() == PUGIXML_TEXT("mod"))
11754 						return binary_op_t(ast_op_mod, xpath_type_number, 6);
11755 					else
11756 						return binary_op_t();
11757 
11758 				case lex_equal:
11759 					return binary_op_t(ast_op_equal, xpath_type_boolean, 3);
11760 
11761 				case lex_not_equal:
11762 					return binary_op_t(ast_op_not_equal, xpath_type_boolean, 3);
11763 
11764 				case lex_less:
11765 					return binary_op_t(ast_op_less, xpath_type_boolean, 4);
11766 
11767 				case lex_greater:
11768 					return binary_op_t(ast_op_greater, xpath_type_boolean, 4);
11769 
11770 				case lex_less_or_equal:
11771 					return binary_op_t(ast_op_less_or_equal, xpath_type_boolean, 4);
11772 
11773 				case lex_greater_or_equal:
11774 					return binary_op_t(ast_op_greater_or_equal, xpath_type_boolean, 4);
11775 
11776 				case lex_plus:
11777 					return binary_op_t(ast_op_add, xpath_type_number, 5);
11778 
11779 				case lex_minus:
11780 					return binary_op_t(ast_op_subtract, xpath_type_number, 5);
11781 
11782 				case lex_multiply:
11783 					return binary_op_t(ast_op_multiply, xpath_type_number, 6);
11784 
11785 				case lex_union:
11786 					return binary_op_t(ast_op_union, xpath_type_node_set, 7);
11787 
11788 				default:
11789 					return binary_op_t();
11790 				}
11791 			}
11792 		};
11793 
parse_expression_recxpath_parser11794 		xpath_ast_node* parse_expression_rec(xpath_ast_node* lhs, int limit)
11795 		{
11796 			binary_op_t op = binary_op_t::parse(_lexer);
11797 
11798 			while (op.asttype != ast_unknown && op.precedence >= limit)
11799 			{
11800 				_lexer.next();
11801 
11802 				xpath_ast_node* rhs = parse_path_or_unary_expression();
11803 				if (!rhs) return 0;
11804 
11805 				binary_op_t nextop = binary_op_t::parse(_lexer);
11806 
11807 				while (nextop.asttype != ast_unknown && nextop.precedence > op.precedence)
11808 				{
11809 					rhs = parse_expression_rec(rhs, nextop.precedence);
11810 					if (!rhs) return 0;
11811 
11812 					nextop = binary_op_t::parse(_lexer);
11813 				}
11814 
11815 				if (op.asttype == ast_op_union && (lhs->rettype() != xpath_type_node_set || rhs->rettype() != xpath_type_node_set))
11816 					return error("Union operator has to be applied to node sets");
11817 
11818 				lhs = alloc_node(op.asttype, op.rettype, lhs, rhs);
11819 				if (!lhs) return 0;
11820 
11821 				op = binary_op_t::parse(_lexer);
11822 			}
11823 
11824 			return lhs;
11825 		}
11826 
11827 		// Expr ::= OrExpr
11828 		// OrExpr ::= AndExpr | OrExpr 'or' AndExpr
11829 		// AndExpr ::= EqualityExpr | AndExpr 'and' EqualityExpr
11830 		// EqualityExpr ::= RelationalExpr
11831 		//					| EqualityExpr '=' RelationalExpr
11832 		//					| EqualityExpr '!=' RelationalExpr
11833 		// RelationalExpr ::= AdditiveExpr
11834 		//					  | RelationalExpr '<' AdditiveExpr
11835 		//					  | RelationalExpr '>' AdditiveExpr
11836 		//					  | RelationalExpr '<=' AdditiveExpr
11837 		//					  | RelationalExpr '>=' AdditiveExpr
11838 		// AdditiveExpr ::= MultiplicativeExpr
11839 		//					| AdditiveExpr '+' MultiplicativeExpr
11840 		//					| AdditiveExpr '-' MultiplicativeExpr
11841 		// MultiplicativeExpr ::= UnaryExpr
11842 		//						  | MultiplicativeExpr '*' UnaryExpr
11843 		//						  | MultiplicativeExpr 'div' UnaryExpr
11844 		//						  | MultiplicativeExpr 'mod' UnaryExpr
parse_expressionxpath_parser11845 		xpath_ast_node* parse_expression(int limit = 0)
11846 		{
11847 			xpath_ast_node* n = parse_path_or_unary_expression();
11848 			if (!n) return 0;
11849 
11850 			return parse_expression_rec(n, limit);
11851 		}
11852 
xpath_parserxpath_parser11853 		xpath_parser(const char_t* query, xpath_variable_set* variables, xpath_allocator* alloc, xpath_parse_result* result): _alloc(alloc), _lexer(query), _query(query), _variables(variables), _result(result)
11854 		{
11855 		}
11856 
parsexpath_parser11857 		xpath_ast_node* parse()
11858 		{
11859 			xpath_ast_node* n = parse_expression();
11860 			if (!n) return 0;
11861 
11862 			// check if there are unparsed tokens left
11863 			if (_lexer.current() != lex_eof)
11864 				return error("Incorrect query");
11865 
11866 			return n;
11867 		}
11868 
parsexpath_parser11869 		static xpath_ast_node* parse(const char_t* query, xpath_variable_set* variables, xpath_allocator* alloc, xpath_parse_result* result)
11870 		{
11871 			xpath_parser parser(query, variables, alloc, result);
11872 
11873 			return parser.parse();
11874 		}
11875 	};
11876 
11877 	struct xpath_query_impl
11878 	{
createxpath_query_impl11879 		static xpath_query_impl* create()
11880 		{
11881 			void* memory = xml_memory::allocate(sizeof(xpath_query_impl));
11882 			if (!memory) return 0;
11883 
11884 			return new (memory) xpath_query_impl();
11885 		}
11886 
destroyxpath_query_impl11887 		static void destroy(xpath_query_impl* impl)
11888 		{
11889 			// free all allocated pages
11890 			impl->alloc.release();
11891 
11892 			// free allocator memory (with the first page)
11893 			xml_memory::deallocate(impl);
11894 		}
11895 
xpath_query_implxpath_query_impl11896 		xpath_query_impl(): root(0), alloc(&block, &oom), oom(false)
11897 		{
11898 			block.next = 0;
11899 			block.capacity = sizeof(block.data);
11900 		}
11901 
11902 		xpath_ast_node* root;
11903 		xpath_allocator alloc;
11904 		xpath_memory_block block;
11905 		bool oom;
11906 	};
11907 
evaluate_node_set_prepare(xpath_query_impl * impl)11908 	PUGI__FN impl::xpath_ast_node* evaluate_node_set_prepare(xpath_query_impl* impl)
11909 	{
11910 		if (!impl) return 0;
11911 
11912 		if (impl->root->rettype() != xpath_type_node_set)
11913 		{
11914 		#ifdef PUGIXML_NO_EXCEPTIONS
11915 			return 0;
11916 		#else
11917 			xpath_parse_result res;
11918 			res.error = "Expression does not evaluate to node set";
11919 
11920 			throw xpath_exception(res);
11921 		#endif
11922 		}
11923 
11924 		return impl->root;
11925 	}
11926 PUGI__NS_END
11927 
11928 namespace pugi
11929 {
11930 #ifndef PUGIXML_NO_EXCEPTIONS
xpath_exception(const xpath_parse_result & result_)11931 	PUGI__FN xpath_exception::xpath_exception(const xpath_parse_result& result_): _result(result_)
11932 	{
11933 		assert(_result.error);
11934 	}
11935 
what() const11936 	PUGI__FN const char* xpath_exception::what() const throw()
11937 	{
11938 		return _result.error;
11939 	}
11940 
result() const11941 	PUGI__FN const xpath_parse_result& xpath_exception::result() const
11942 	{
11943 		return _result;
11944 	}
11945 #endif
11946 
xpath_node()11947 	PUGI__FN xpath_node::xpath_node()
11948 	{
11949 	}
11950 
xpath_node(const xml_node & node_)11951 	PUGI__FN xpath_node::xpath_node(const xml_node& node_): _node(node_)
11952 	{
11953 	}
11954 
xpath_node(const xml_attribute & attribute_,const xml_node & parent_)11955 	PUGI__FN xpath_node::xpath_node(const xml_attribute& attribute_, const xml_node& parent_): _node(attribute_ ? parent_ : xml_node()), _attribute(attribute_)
11956 	{
11957 	}
11958 
node() const11959 	PUGI__FN xml_node xpath_node::node() const
11960 	{
11961 		return _attribute ? xml_node() : _node;
11962 	}
11963 
attribute() const11964 	PUGI__FN xml_attribute xpath_node::attribute() const
11965 	{
11966 		return _attribute;
11967 	}
11968 
parent() const11969 	PUGI__FN xml_node xpath_node::parent() const
11970 	{
11971 		return _attribute ? _node : _node.parent();
11972 	}
11973 
unspecified_bool_xpath_node(xpath_node ***)11974 	PUGI__FN static void unspecified_bool_xpath_node(xpath_node***)
11975 	{
11976 	}
11977 
operator xpath_node::unspecified_bool_type() const11978 	PUGI__FN xpath_node::operator xpath_node::unspecified_bool_type() const
11979 	{
11980 		return (_node || _attribute) ? unspecified_bool_xpath_node : 0;
11981 	}
11982 
operator !() const11983 	PUGI__FN bool xpath_node::operator!() const
11984 	{
11985 		return !(_node || _attribute);
11986 	}
11987 
operator ==(const xpath_node & n) const11988 	PUGI__FN bool xpath_node::operator==(const xpath_node& n) const
11989 	{
11990 		return _node == n._node && _attribute == n._attribute;
11991 	}
11992 
operator !=(const xpath_node & n) const11993 	PUGI__FN bool xpath_node::operator!=(const xpath_node& n) const
11994 	{
11995 		return _node != n._node || _attribute != n._attribute;
11996 	}
11997 
11998 #ifdef __BORLANDC__
operator &&(const xpath_node & lhs,bool rhs)11999 	PUGI__FN bool operator&&(const xpath_node& lhs, bool rhs)
12000 	{
12001 		return (bool)lhs && rhs;
12002 	}
12003 
operator ||(const xpath_node & lhs,bool rhs)12004 	PUGI__FN bool operator||(const xpath_node& lhs, bool rhs)
12005 	{
12006 		return (bool)lhs || rhs;
12007 	}
12008 #endif
12009 
_assign(const_iterator begin_,const_iterator end_,type_t type_)12010 	PUGI__FN void xpath_node_set::_assign(const_iterator begin_, const_iterator end_, type_t type_)
12011 	{
12012 		assert(begin_ <= end_);
12013 
12014 		size_t size_ = static_cast<size_t>(end_ - begin_);
12015 
12016 		if (size_ <= 1)
12017 		{
12018 			// deallocate old buffer
12019 			if (_begin != &_storage) impl::xml_memory::deallocate(_begin);
12020 
12021 			// use internal buffer
12022 			if (begin_ != end_) _storage = *begin_;
12023 
12024 			_begin = &_storage;
12025 			_end = &_storage + size_;
12026 			_type = type_;
12027 		}
12028 		else
12029 		{
12030 			// make heap copy
12031 			xpath_node* storage = static_cast<xpath_node*>(impl::xml_memory::allocate(size_ * sizeof(xpath_node)));
12032 
12033 			if (!storage)
12034 			{
12035 			#ifdef PUGIXML_NO_EXCEPTIONS
12036 				return;
12037 			#else
12038 				throw std::bad_alloc();
12039 			#endif
12040 			}
12041 
12042 			memcpy(storage, begin_, size_ * sizeof(xpath_node));
12043 
12044 			// deallocate old buffer
12045 			if (_begin != &_storage) impl::xml_memory::deallocate(_begin);
12046 
12047 			// finalize
12048 			_begin = storage;
12049 			_end = storage + size_;
12050 			_type = type_;
12051 		}
12052 	}
12053 
12054 #ifdef PUGIXML_HAS_MOVE
_move(xpath_node_set & rhs)12055 	PUGI__FN void xpath_node_set::_move(xpath_node_set& rhs) PUGIXML_NOEXCEPT
12056 	{
12057 		_type = rhs._type;
12058 		_storage = rhs._storage;
12059 		_begin = (rhs._begin == &rhs._storage) ? &_storage : rhs._begin;
12060 		_end = _begin + (rhs._end - rhs._begin);
12061 
12062 		rhs._type = type_unsorted;
12063 		rhs._begin = &rhs._storage;
12064 		rhs._end = rhs._begin;
12065 	}
12066 #endif
12067 
xpath_node_set()12068 	PUGI__FN xpath_node_set::xpath_node_set(): _type(type_unsorted), _begin(&_storage), _end(&_storage)
12069 	{
12070 	}
12071 
xpath_node_set(const_iterator begin_,const_iterator end_,type_t type_)12072 	PUGI__FN xpath_node_set::xpath_node_set(const_iterator begin_, const_iterator end_, type_t type_): _type(type_unsorted), _begin(&_storage), _end(&_storage)
12073 	{
12074 		_assign(begin_, end_, type_);
12075 	}
12076 
~xpath_node_set()12077 	PUGI__FN xpath_node_set::~xpath_node_set()
12078 	{
12079 		if (_begin != &_storage)
12080 			impl::xml_memory::deallocate(_begin);
12081 	}
12082 
xpath_node_set(const xpath_node_set & ns)12083 	PUGI__FN xpath_node_set::xpath_node_set(const xpath_node_set& ns): _type(type_unsorted), _begin(&_storage), _end(&_storage)
12084 	{
12085 		_assign(ns._begin, ns._end, ns._type);
12086 	}
12087 
operator =(const xpath_node_set & ns)12088 	PUGI__FN xpath_node_set& xpath_node_set::operator=(const xpath_node_set& ns)
12089 	{
12090 		if (this == &ns) return *this;
12091 
12092 		_assign(ns._begin, ns._end, ns._type);
12093 
12094 		return *this;
12095 	}
12096 
12097 #ifdef PUGIXML_HAS_MOVE
xpath_node_set(xpath_node_set && rhs)12098 	PUGI__FN xpath_node_set::xpath_node_set(xpath_node_set&& rhs) PUGIXML_NOEXCEPT: _type(type_unsorted), _begin(&_storage), _end(&_storage)
12099 	{
12100 		_move(rhs);
12101 	}
12102 
operator =(xpath_node_set && rhs)12103 	PUGI__FN xpath_node_set& xpath_node_set::operator=(xpath_node_set&& rhs) PUGIXML_NOEXCEPT
12104 	{
12105 		if (this == &rhs) return *this;
12106 
12107 		if (_begin != &_storage)
12108 			impl::xml_memory::deallocate(_begin);
12109 
12110 		_move(rhs);
12111 
12112 		return *this;
12113 	}
12114 #endif
12115 
type() const12116 	PUGI__FN xpath_node_set::type_t xpath_node_set::type() const
12117 	{
12118 		return _type;
12119 	}
12120 
size() const12121 	PUGI__FN size_t xpath_node_set::size() const
12122 	{
12123 		return _end - _begin;
12124 	}
12125 
empty() const12126 	PUGI__FN bool xpath_node_set::empty() const
12127 	{
12128 		return _begin == _end;
12129 	}
12130 
operator [](size_t index) const12131 	PUGI__FN const xpath_node& xpath_node_set::operator[](size_t index) const
12132 	{
12133 		assert(index < size());
12134 		return _begin[index];
12135 	}
12136 
begin() const12137 	PUGI__FN xpath_node_set::const_iterator xpath_node_set::begin() const
12138 	{
12139 		return _begin;
12140 	}
12141 
end() const12142 	PUGI__FN xpath_node_set::const_iterator xpath_node_set::end() const
12143 	{
12144 		return _end;
12145 	}
12146 
sort(bool reverse)12147 	PUGI__FN void xpath_node_set::sort(bool reverse)
12148 	{
12149 		_type = impl::xpath_sort(_begin, _end, _type, reverse);
12150 	}
12151 
first() const12152 	PUGI__FN xpath_node xpath_node_set::first() const
12153 	{
12154 		return impl::xpath_first(_begin, _end, _type);
12155 	}
12156 
xpath_parse_result()12157 	PUGI__FN xpath_parse_result::xpath_parse_result(): error("Internal error"), offset(0)
12158 	{
12159 	}
12160 
operator bool() const12161 	PUGI__FN xpath_parse_result::operator bool() const
12162 	{
12163 		return error == 0;
12164 	}
12165 
description() const12166 	PUGI__FN const char* xpath_parse_result::description() const
12167 	{
12168 		return error ? error : "No error";
12169 	}
12170 
xpath_variable(xpath_value_type type_)12171 	PUGI__FN xpath_variable::xpath_variable(xpath_value_type type_): _type(type_), _next(0)
12172 	{
12173 	}
12174 
name() const12175 	PUGI__FN const char_t* xpath_variable::name() const
12176 	{
12177 		switch (_type)
12178 		{
12179 		case xpath_type_node_set:
12180 			return static_cast<const impl::xpath_variable_node_set*>(this)->name;
12181 
12182 		case xpath_type_number:
12183 			return static_cast<const impl::xpath_variable_number*>(this)->name;
12184 
12185 		case xpath_type_string:
12186 			return static_cast<const impl::xpath_variable_string*>(this)->name;
12187 
12188 		case xpath_type_boolean:
12189 			return static_cast<const impl::xpath_variable_boolean*>(this)->name;
12190 
12191 		default:
12192 			assert(false && "Invalid variable type"); // unreachable
12193 			return 0;
12194 		}
12195 	}
12196 
type() const12197 	PUGI__FN xpath_value_type xpath_variable::type() const
12198 	{
12199 		return _type;
12200 	}
12201 
get_boolean() const12202 	PUGI__FN bool xpath_variable::get_boolean() const
12203 	{
12204 		return (_type == xpath_type_boolean) ? static_cast<const impl::xpath_variable_boolean*>(this)->value : false;
12205 	}
12206 
get_number() const12207 	PUGI__FN double xpath_variable::get_number() const
12208 	{
12209 		return (_type == xpath_type_number) ? static_cast<const impl::xpath_variable_number*>(this)->value : impl::gen_nan();
12210 	}
12211 
get_string() const12212 	PUGI__FN const char_t* xpath_variable::get_string() const
12213 	{
12214 		const char_t* value = (_type == xpath_type_string) ? static_cast<const impl::xpath_variable_string*>(this)->value : 0;
12215 		return value ? value : PUGIXML_TEXT("");
12216 	}
12217 
get_node_set() const12218 	PUGI__FN const xpath_node_set& xpath_variable::get_node_set() const
12219 	{
12220 		return (_type == xpath_type_node_set) ? static_cast<const impl::xpath_variable_node_set*>(this)->value : impl::dummy_node_set;
12221 	}
12222 
set(bool value)12223 	PUGI__FN bool xpath_variable::set(bool value)
12224 	{
12225 		if (_type != xpath_type_boolean) return false;
12226 
12227 		static_cast<impl::xpath_variable_boolean*>(this)->value = value;
12228 		return true;
12229 	}
12230 
set(double value)12231 	PUGI__FN bool xpath_variable::set(double value)
12232 	{
12233 		if (_type != xpath_type_number) return false;
12234 
12235 		static_cast<impl::xpath_variable_number*>(this)->value = value;
12236 		return true;
12237 	}
12238 
set(const char_t * value)12239 	PUGI__FN bool xpath_variable::set(const char_t* value)
12240 	{
12241 		if (_type != xpath_type_string) return false;
12242 
12243 		impl::xpath_variable_string* var = static_cast<impl::xpath_variable_string*>(this);
12244 
12245 		// duplicate string
12246 		size_t size = (impl::strlength(value) + 1) * sizeof(char_t);
12247 
12248 		char_t* copy = static_cast<char_t*>(impl::xml_memory::allocate(size));
12249 		if (!copy) return false;
12250 
12251 		memcpy(copy, value, size);
12252 
12253 		// replace old string
12254 		if (var->value) impl::xml_memory::deallocate(var->value);
12255 		var->value = copy;
12256 
12257 		return true;
12258 	}
12259 
set(const xpath_node_set & value)12260 	PUGI__FN bool xpath_variable::set(const xpath_node_set& value)
12261 	{
12262 		if (_type != xpath_type_node_set) return false;
12263 
12264 		static_cast<impl::xpath_variable_node_set*>(this)->value = value;
12265 		return true;
12266 	}
12267 
xpath_variable_set()12268 	PUGI__FN xpath_variable_set::xpath_variable_set()
12269 	{
12270 		for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
12271 			_data[i] = 0;
12272 	}
12273 
~xpath_variable_set()12274 	PUGI__FN xpath_variable_set::~xpath_variable_set()
12275 	{
12276 		for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
12277 			_destroy(_data[i]);
12278 	}
12279 
xpath_variable_set(const xpath_variable_set & rhs)12280 	PUGI__FN xpath_variable_set::xpath_variable_set(const xpath_variable_set& rhs)
12281 	{
12282 		for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
12283 			_data[i] = 0;
12284 
12285 		_assign(rhs);
12286 	}
12287 
operator =(const xpath_variable_set & rhs)12288 	PUGI__FN xpath_variable_set& xpath_variable_set::operator=(const xpath_variable_set& rhs)
12289 	{
12290 		if (this == &rhs) return *this;
12291 
12292 		_assign(rhs);
12293 
12294 		return *this;
12295 	}
12296 
12297 #ifdef PUGIXML_HAS_MOVE
xpath_variable_set(xpath_variable_set && rhs)12298 	PUGI__FN xpath_variable_set::xpath_variable_set(xpath_variable_set&& rhs) PUGIXML_NOEXCEPT
12299 	{
12300 		for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
12301 		{
12302 			_data[i] = rhs._data[i];
12303 			rhs._data[i] = 0;
12304 		}
12305 	}
12306 
operator =(xpath_variable_set && rhs)12307 	PUGI__FN xpath_variable_set& xpath_variable_set::operator=(xpath_variable_set&& rhs) PUGIXML_NOEXCEPT
12308 	{
12309 		for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
12310 		{
12311 			_destroy(_data[i]);
12312 
12313 			_data[i] = rhs._data[i];
12314 			rhs._data[i] = 0;
12315 		}
12316 
12317 		return *this;
12318 	}
12319 #endif
12320 
_assign(const xpath_variable_set & rhs)12321 	PUGI__FN void xpath_variable_set::_assign(const xpath_variable_set& rhs)
12322 	{
12323 		xpath_variable_set temp;
12324 
12325 		for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
12326 			if (rhs._data[i] && !_clone(rhs._data[i], &temp._data[i]))
12327 				return;
12328 
12329 		_swap(temp);
12330 	}
12331 
_swap(xpath_variable_set & rhs)12332 	PUGI__FN void xpath_variable_set::_swap(xpath_variable_set& rhs)
12333 	{
12334 		for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
12335 		{
12336 			xpath_variable* chain = _data[i];
12337 
12338 			_data[i] = rhs._data[i];
12339 			rhs._data[i] = chain;
12340 		}
12341 	}
12342 
_find(const char_t * name) const12343 	PUGI__FN xpath_variable* xpath_variable_set::_find(const char_t* name) const
12344 	{
12345 		const size_t hash_size = sizeof(_data) / sizeof(_data[0]);
12346 		size_t hash = impl::hash_string(name) % hash_size;
12347 
12348 		// look for existing variable
12349 		for (xpath_variable* var = _data[hash]; var; var = var->_next)
12350 			if (impl::strequal(var->name(), name))
12351 				return var;
12352 
12353 		return 0;
12354 	}
12355 
_clone(xpath_variable * var,xpath_variable ** out_result)12356 	PUGI__FN bool xpath_variable_set::_clone(xpath_variable* var, xpath_variable** out_result)
12357 	{
12358 		xpath_variable* last = 0;
12359 
12360 		while (var)
12361 		{
12362 			// allocate storage for new variable
12363 			xpath_variable* nvar = impl::new_xpath_variable(var->_type, var->name());
12364 			if (!nvar) return false;
12365 
12366 			// link the variable to the result immediately to handle failures gracefully
12367 			if (last)
12368 				last->_next = nvar;
12369 			else
12370 				*out_result = nvar;
12371 
12372 			last = nvar;
12373 
12374 			// copy the value; this can fail due to out-of-memory conditions
12375 			if (!impl::copy_xpath_variable(nvar, var)) return false;
12376 
12377 			var = var->_next;
12378 		}
12379 
12380 		return true;
12381 	}
12382 
_destroy(xpath_variable * var)12383 	PUGI__FN void xpath_variable_set::_destroy(xpath_variable* var)
12384 	{
12385 		while (var)
12386 		{
12387 			xpath_variable* next = var->_next;
12388 
12389 			impl::delete_xpath_variable(var->_type, var);
12390 
12391 			var = next;
12392 		}
12393 	}
12394 
add(const char_t * name,xpath_value_type type)12395 	PUGI__FN xpath_variable* xpath_variable_set::add(const char_t* name, xpath_value_type type)
12396 	{
12397 		const size_t hash_size = sizeof(_data) / sizeof(_data[0]);
12398 		size_t hash = impl::hash_string(name) % hash_size;
12399 
12400 		// look for existing variable
12401 		for (xpath_variable* var = _data[hash]; var; var = var->_next)
12402 			if (impl::strequal(var->name(), name))
12403 				return var->type() == type ? var : 0;
12404 
12405 		// add new variable
12406 		xpath_variable* result = impl::new_xpath_variable(type, name);
12407 
12408 		if (result)
12409 		{
12410 			result->_next = _data[hash];
12411 
12412 			_data[hash] = result;
12413 		}
12414 
12415 		return result;
12416 	}
12417 
set(const char_t * name,bool value)12418 	PUGI__FN bool xpath_variable_set::set(const char_t* name, bool value)
12419 	{
12420 		xpath_variable* var = add(name, xpath_type_boolean);
12421 		return var ? var->set(value) : false;
12422 	}
12423 
set(const char_t * name,double value)12424 	PUGI__FN bool xpath_variable_set::set(const char_t* name, double value)
12425 	{
12426 		xpath_variable* var = add(name, xpath_type_number);
12427 		return var ? var->set(value) : false;
12428 	}
12429 
set(const char_t * name,const char_t * value)12430 	PUGI__FN bool xpath_variable_set::set(const char_t* name, const char_t* value)
12431 	{
12432 		xpath_variable* var = add(name, xpath_type_string);
12433 		return var ? var->set(value) : false;
12434 	}
12435 
set(const char_t * name,const xpath_node_set & value)12436 	PUGI__FN bool xpath_variable_set::set(const char_t* name, const xpath_node_set& value)
12437 	{
12438 		xpath_variable* var = add(name, xpath_type_node_set);
12439 		return var ? var->set(value) : false;
12440 	}
12441 
get(const char_t * name)12442 	PUGI__FN xpath_variable* xpath_variable_set::get(const char_t* name)
12443 	{
12444 		return _find(name);
12445 	}
12446 
get(const char_t * name) const12447 	PUGI__FN const xpath_variable* xpath_variable_set::get(const char_t* name) const
12448 	{
12449 		return _find(name);
12450 	}
12451 
xpath_query(const char_t * query,xpath_variable_set * variables)12452 	PUGI__FN xpath_query::xpath_query(const char_t* query, xpath_variable_set* variables): _impl(0)
12453 	{
12454 		impl::xpath_query_impl* qimpl = impl::xpath_query_impl::create();
12455 
12456 		if (!qimpl)
12457 		{
12458 		#ifdef PUGIXML_NO_EXCEPTIONS
12459 			_result.error = "Out of memory";
12460 		#else
12461 			throw std::bad_alloc();
12462 		#endif
12463 		}
12464 		else
12465 		{
12466 			using impl::auto_deleter; // MSVC7 workaround
12467 			auto_deleter<impl::xpath_query_impl> impl(qimpl, impl::xpath_query_impl::destroy);
12468 
12469 			qimpl->root = impl::xpath_parser::parse(query, variables, &qimpl->alloc, &_result);
12470 
12471 			if (qimpl->root)
12472 			{
12473 				qimpl->root->optimize(&qimpl->alloc);
12474 
12475 				_impl = impl.release();
12476 				_result.error = 0;
12477 			}
12478 			else
12479 			{
12480 			#ifdef PUGIXML_NO_EXCEPTIONS
12481 				if (qimpl->oom) _result.error = "Out of memory";
12482 			#else
12483 				if (qimpl->oom) throw std::bad_alloc();
12484 				throw xpath_exception(_result);
12485 			#endif
12486 			}
12487 		}
12488 	}
12489 
xpath_query()12490 	PUGI__FN xpath_query::xpath_query(): _impl(0)
12491 	{
12492 	}
12493 
~xpath_query()12494 	PUGI__FN xpath_query::~xpath_query()
12495 	{
12496 		if (_impl)
12497 			impl::xpath_query_impl::destroy(static_cast<impl::xpath_query_impl*>(_impl));
12498 	}
12499 
12500 #ifdef PUGIXML_HAS_MOVE
xpath_query(xpath_query && rhs)12501 	PUGI__FN xpath_query::xpath_query(xpath_query&& rhs) PUGIXML_NOEXCEPT
12502 	{
12503 		_impl = rhs._impl;
12504 		_result = rhs._result;
12505 		rhs._impl = 0;
12506 		rhs._result = xpath_parse_result();
12507 	}
12508 
operator =(xpath_query && rhs)12509 	PUGI__FN xpath_query& xpath_query::operator=(xpath_query&& rhs) PUGIXML_NOEXCEPT
12510 	{
12511 		if (this == &rhs) return *this;
12512 
12513 		if (_impl)
12514 			impl::xpath_query_impl::destroy(static_cast<impl::xpath_query_impl*>(_impl));
12515 
12516 		_impl = rhs._impl;
12517 		_result = rhs._result;
12518 		rhs._impl = 0;
12519 		rhs._result = xpath_parse_result();
12520 
12521 		return *this;
12522 	}
12523 #endif
12524 
return_type() const12525 	PUGI__FN xpath_value_type xpath_query::return_type() const
12526 	{
12527 		if (!_impl) return xpath_type_none;
12528 
12529 		return static_cast<impl::xpath_query_impl*>(_impl)->root->rettype();
12530 	}
12531 
evaluate_boolean(const xpath_node & n) const12532 	PUGI__FN bool xpath_query::evaluate_boolean(const xpath_node& n) const
12533 	{
12534 		if (!_impl) return false;
12535 
12536 		impl::xpath_context c(n, 1, 1);
12537 		impl::xpath_stack_data sd;
12538 
12539 		bool r = static_cast<impl::xpath_query_impl*>(_impl)->root->eval_boolean(c, sd.stack);
12540 
12541 		if (sd.oom)
12542 		{
12543 		#ifdef PUGIXML_NO_EXCEPTIONS
12544 			return false;
12545 		#else
12546 			throw std::bad_alloc();
12547 		#endif
12548 		}
12549 
12550 		return r;
12551 	}
12552 
evaluate_number(const xpath_node & n) const12553 	PUGI__FN double xpath_query::evaluate_number(const xpath_node& n) const
12554 	{
12555 		if (!_impl) return impl::gen_nan();
12556 
12557 		impl::xpath_context c(n, 1, 1);
12558 		impl::xpath_stack_data sd;
12559 
12560 		double r = static_cast<impl::xpath_query_impl*>(_impl)->root->eval_number(c, sd.stack);
12561 
12562 		if (sd.oom)
12563 		{
12564 		#ifdef PUGIXML_NO_EXCEPTIONS
12565 			return impl::gen_nan();
12566 		#else
12567 			throw std::bad_alloc();
12568 		#endif
12569 		}
12570 
12571 		return r;
12572 	}
12573 
12574 #ifndef PUGIXML_NO_STL
evaluate_string(const xpath_node & n) const12575 	PUGI__FN string_t xpath_query::evaluate_string(const xpath_node& n) const
12576 	{
12577 		if (!_impl) return string_t();
12578 
12579 		impl::xpath_context c(n, 1, 1);
12580 		impl::xpath_stack_data sd;
12581 
12582 		impl::xpath_string r = static_cast<impl::xpath_query_impl*>(_impl)->root->eval_string(c, sd.stack);
12583 
12584 		if (sd.oom)
12585 		{
12586 		#ifdef PUGIXML_NO_EXCEPTIONS
12587 			return string_t();
12588 		#else
12589 			throw std::bad_alloc();
12590 		#endif
12591 		}
12592 
12593 		return string_t(r.c_str(), r.length());
12594 	}
12595 #endif
12596 
evaluate_string(char_t * buffer,size_t capacity,const xpath_node & n) const12597 	PUGI__FN size_t xpath_query::evaluate_string(char_t* buffer, size_t capacity, const xpath_node& n) const
12598 	{
12599 		impl::xpath_context c(n, 1, 1);
12600 		impl::xpath_stack_data sd;
12601 
12602 		impl::xpath_string r = _impl ? static_cast<impl::xpath_query_impl*>(_impl)->root->eval_string(c, sd.stack) : impl::xpath_string();
12603 
12604 		if (sd.oom)
12605 		{
12606 		#ifdef PUGIXML_NO_EXCEPTIONS
12607 			r = impl::xpath_string();
12608 		#else
12609 			throw std::bad_alloc();
12610 		#endif
12611 		}
12612 
12613 		size_t full_size = r.length() + 1;
12614 
12615 		if (capacity > 0)
12616 		{
12617 			size_t size = (full_size < capacity) ? full_size : capacity;
12618 			assert(size > 0);
12619 
12620 			memcpy(buffer, r.c_str(), (size - 1) * sizeof(char_t));
12621 			buffer[size - 1] = 0;
12622 		}
12623 
12624 		return full_size;
12625 	}
12626 
evaluate_node_set(const xpath_node & n) const12627 	PUGI__FN xpath_node_set xpath_query::evaluate_node_set(const xpath_node& n) const
12628 	{
12629 		impl::xpath_ast_node* root = impl::evaluate_node_set_prepare(static_cast<impl::xpath_query_impl*>(_impl));
12630 		if (!root) return xpath_node_set();
12631 
12632 		impl::xpath_context c(n, 1, 1);
12633 		impl::xpath_stack_data sd;
12634 
12635 		impl::xpath_node_set_raw r = root->eval_node_set(c, sd.stack, impl::nodeset_eval_all);
12636 
12637 		if (sd.oom)
12638 		{
12639 		#ifdef PUGIXML_NO_EXCEPTIONS
12640 			return xpath_node_set();
12641 		#else
12642 			throw std::bad_alloc();
12643 		#endif
12644 		}
12645 
12646 		return xpath_node_set(r.begin(), r.end(), r.type());
12647 	}
12648 
evaluate_node(const xpath_node & n) const12649 	PUGI__FN xpath_node xpath_query::evaluate_node(const xpath_node& n) const
12650 	{
12651 		impl::xpath_ast_node* root = impl::evaluate_node_set_prepare(static_cast<impl::xpath_query_impl*>(_impl));
12652 		if (!root) return xpath_node();
12653 
12654 		impl::xpath_context c(n, 1, 1);
12655 		impl::xpath_stack_data sd;
12656 
12657 		impl::xpath_node_set_raw r = root->eval_node_set(c, sd.stack, impl::nodeset_eval_first);
12658 
12659 		if (sd.oom)
12660 		{
12661 		#ifdef PUGIXML_NO_EXCEPTIONS
12662 			return xpath_node();
12663 		#else
12664 			throw std::bad_alloc();
12665 		#endif
12666 		}
12667 
12668 		return r.first();
12669 	}
12670 
result() const12671 	PUGI__FN const xpath_parse_result& xpath_query::result() const
12672 	{
12673 		return _result;
12674 	}
12675 
unspecified_bool_xpath_query(xpath_query ***)12676 	PUGI__FN static void unspecified_bool_xpath_query(xpath_query***)
12677 	{
12678 	}
12679 
operator xpath_query::unspecified_bool_type() const12680 	PUGI__FN xpath_query::operator xpath_query::unspecified_bool_type() const
12681 	{
12682 		return _impl ? unspecified_bool_xpath_query : 0;
12683 	}
12684 
operator !() const12685 	PUGI__FN bool xpath_query::operator!() const
12686 	{
12687 		return !_impl;
12688 	}
12689 
select_node(const char_t * query,xpath_variable_set * variables) const12690 	PUGI__FN xpath_node xml_node::select_node(const char_t* query, xpath_variable_set* variables) const
12691 	{
12692 		xpath_query q(query, variables);
12693 		return q.evaluate_node(*this);
12694 	}
12695 
select_node(const xpath_query & query) const12696 	PUGI__FN xpath_node xml_node::select_node(const xpath_query& query) const
12697 	{
12698 		return query.evaluate_node(*this);
12699 	}
12700 
select_nodes(const char_t * query,xpath_variable_set * variables) const12701 	PUGI__FN xpath_node_set xml_node::select_nodes(const char_t* query, xpath_variable_set* variables) const
12702 	{
12703 		xpath_query q(query, variables);
12704 		return q.evaluate_node_set(*this);
12705 	}
12706 
select_nodes(const xpath_query & query) const12707 	PUGI__FN xpath_node_set xml_node::select_nodes(const xpath_query& query) const
12708 	{
12709 		return query.evaluate_node_set(*this);
12710 	}
12711 
select_single_node(const char_t * query,xpath_variable_set * variables) const12712 	PUGI__FN xpath_node xml_node::select_single_node(const char_t* query, xpath_variable_set* variables) const
12713 	{
12714 		xpath_query q(query, variables);
12715 		return q.evaluate_node(*this);
12716 	}
12717 
select_single_node(const xpath_query & query) const12718 	PUGI__FN xpath_node xml_node::select_single_node(const xpath_query& query) const
12719 	{
12720 		return query.evaluate_node(*this);
12721 	}
12722 }
12723 
12724 #endif
12725 
12726 #ifdef __BORLANDC__
12727 #	pragma option pop
12728 #endif
12729 
12730 // Intel C++ does not properly keep warning state for function templates,
12731 // so popping warning state at the end of translation unit leads to warnings in the middle.
12732 #if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
12733 #	pragma warning(pop)
12734 #endif
12735 
12736 #if defined(_MSC_VER) && defined(__c2__)
12737 #	pragma clang diagnostic pop
12738 #endif
12739 
12740 // Undefine all local macros (makes sure we're not leaking macros in header-only mode)
12741 #undef PUGI__NO_INLINE
12742 #undef PUGI__UNLIKELY
12743 #undef PUGI__STATIC_ASSERT
12744 #undef PUGI__DMC_VOLATILE
12745 #undef PUGI__UNSIGNED_OVERFLOW
12746 #undef PUGI__MSVC_CRT_VERSION
12747 #undef PUGI__SNPRINTF
12748 #undef PUGI__NS_BEGIN
12749 #undef PUGI__NS_END
12750 #undef PUGI__FN
12751 #undef PUGI__FN_NO_INLINE
12752 #undef PUGI__GETHEADER_IMPL
12753 #undef PUGI__GETPAGE_IMPL
12754 #undef PUGI__GETPAGE
12755 #undef PUGI__NODETYPE
12756 #undef PUGI__IS_CHARTYPE_IMPL
12757 #undef PUGI__IS_CHARTYPE
12758 #undef PUGI__IS_CHARTYPEX
12759 #undef PUGI__ENDSWITH
12760 #undef PUGI__SKIPWS
12761 #undef PUGI__OPTSET
12762 #undef PUGI__PUSHNODE
12763 #undef PUGI__POPNODE
12764 #undef PUGI__SCANFOR
12765 #undef PUGI__SCANWHILE
12766 #undef PUGI__SCANWHILE_UNROLL
12767 #undef PUGI__ENDSEG
12768 #undef PUGI__THROW_ERROR
12769 #undef PUGI__CHECK_ERROR
12770 
12771 #endif
12772 
12773 /**
12774  * Copyright (c) 2006-2018 Arseny Kapoulkine
12775  *
12776  * Permission is hereby granted, free of charge, to any person
12777  * obtaining a copy of this software and associated documentation
12778  * files (the "Software"), to deal in the Software without
12779  * restriction, including without limitation the rights to use,
12780  * copy, modify, merge, publish, distribute, sublicense, and/or sell
12781  * copies of the Software, and to permit persons to whom the
12782  * Software is furnished to do so, subject to the following
12783  * conditions:
12784  *
12785  * The above copyright notice and this permission notice shall be
12786  * included in all copies or substantial portions of the Software.
12787  *
12788  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
12789  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
12790  * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
12791  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
12792  * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
12793  * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
12794  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
12795  * OTHER DEALINGS IN THE SOFTWARE.
12796  */
12797