1 /**
2  * pugixml parser - version 1.11
3  * --------------------------------------------------------
4  * Copyright (C) 2006-2020, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com)
5  * Report bugs and download new versions at https://pugixml.org/
6  *
7  * This library is distributed under the MIT License. See notice at the end
8  * of this file.
9  *
10  * This work is based on the pugxml parser, which is:
11  * Copyright (C) 2003, by Kristen Wegner (kristen@tima.net)
12  */
13 
14 #ifndef SOURCE_PUGIXML_CPP
15 #define SOURCE_PUGIXML_CPP
16 
17 #include "pugixml.hpp"
18 
19 #include <stdlib.h>
20 #include <stdio.h>
21 #include <string.h>
22 #include <assert.h>
23 #include <limits.h>
24 
25 #ifdef PUGIXML_WCHAR_MODE
26 #	include <wchar.h>
27 #endif
28 
29 #ifndef PUGIXML_NO_XPATH
30 #	include <math.h>
31 #	include <float.h>
32 #endif
33 
34 #ifndef PUGIXML_NO_STL
35 #	include <istream>
36 #	include <ostream>
37 #	include <string>
38 #endif
39 
40 // For placement new
41 #include <new>
42 
43 #ifdef _MSC_VER
44 #	pragma warning(push)
45 #	pragma warning(disable: 4127) // conditional expression is constant
46 #	pragma warning(disable: 4324) // structure was padded due to __declspec(align())
47 #	pragma warning(disable: 4702) // unreachable code
48 #	pragma warning(disable: 4996) // this function or variable may be unsafe
49 #endif
50 
51 #if defined(_MSC_VER) && defined(__c2__)
52 #	pragma clang diagnostic push
53 #	pragma clang diagnostic ignored "-Wdeprecated" // this function or variable may be unsafe
54 #endif
55 
56 #ifdef __INTEL_COMPILER
57 #	pragma warning(disable: 177) // function was declared but never referenced
58 #	pragma warning(disable: 279) // controlling expression is constant
59 #	pragma warning(disable: 1478 1786) // function was declared "deprecated"
60 #	pragma warning(disable: 1684) // conversion from pointer to same-sized integral type
61 #endif
62 
63 #if defined(__BORLANDC__) && defined(PUGIXML_HEADER_ONLY)
64 #	pragma warn -8080 // symbol is declared but never used; disabling this inside push/pop bracket does not make the warning go away
65 #endif
66 
67 #ifdef __BORLANDC__
68 #	pragma option push
69 #	pragma warn -8008 // condition is always false
70 #	pragma warn -8066 // unreachable code
71 #endif
72 
73 #ifdef __SNC__
74 // Using diag_push/diag_pop does not disable the warnings inside templates due to a compiler bug
75 #	pragma diag_suppress=178 // function was declared but never referenced
76 #	pragma diag_suppress=237 // controlling expression is constant
77 #endif
78 
79 #ifdef __TI_COMPILER_VERSION__
80 #	pragma diag_suppress 179 // function was declared but never referenced
81 #endif
82 
83 // Inlining controls
84 #if defined(_MSC_VER) && _MSC_VER >= 1300
85 #	define PUGI__NO_INLINE __declspec(noinline)
86 #elif defined(__GNUC__)
87 #	define PUGI__NO_INLINE __attribute__((noinline))
88 #else
89 #	define PUGI__NO_INLINE
90 #endif
91 
92 // Branch weight controls
93 #if defined(__GNUC__) && !defined(__c2__)
94 #	define PUGI__UNLIKELY(cond) __builtin_expect(cond, 0)
95 #else
96 #	define PUGI__UNLIKELY(cond) (cond)
97 #endif
98 
99 // Simple static assertion
100 #define PUGI__STATIC_ASSERT(cond) { static const char condition_failed[(cond) ? 1 : -1] = {0}; (void)condition_failed[0]; }
101 
102 // Digital Mars C++ bug workaround for passing char loaded from memory via stack
103 #ifdef __DMC__
104 #	define PUGI__DMC_VOLATILE volatile
105 #else
106 #	define PUGI__DMC_VOLATILE
107 #endif
108 
109 // Integer sanitizer workaround; we only apply this for clang since gcc8 has no_sanitize but not unsigned-integer-overflow and produces "attribute directive ignored" warnings
110 #if defined(__clang__) && defined(__has_attribute)
111 #	if __has_attribute(no_sanitize)
112 #		define PUGI__UNSIGNED_OVERFLOW __attribute__((no_sanitize("unsigned-integer-overflow")))
113 #	else
114 #		define PUGI__UNSIGNED_OVERFLOW
115 #	endif
116 #else
117 #	define PUGI__UNSIGNED_OVERFLOW
118 #endif
119 
120 // Borland C++ bug workaround for not defining ::memcpy depending on header include order (can't always use std::memcpy because some compilers don't have it at all)
121 #if defined(__BORLANDC__) && !defined(__MEM_H_USING_LIST)
122 using std::memcpy;
123 using std::memmove;
124 using std::memset;
125 #endif
126 
127 // Some MinGW/GCC versions have headers that erroneously omit LLONG_MIN/LLONG_MAX/ULLONG_MAX definitions from limits.h in some configurations
128 #if defined(PUGIXML_HAS_LONG_LONG) && defined(__GNUC__) && !defined(LLONG_MAX) && !defined(LLONG_MIN) && !defined(ULLONG_MAX)
129 #	define LLONG_MIN (-LLONG_MAX - 1LL)
130 #	define LLONG_MAX __LONG_LONG_MAX__
131 #	define ULLONG_MAX (LLONG_MAX * 2ULL + 1ULL)
132 #endif
133 
134 // In some environments MSVC is a compiler but the CRT lacks certain MSVC-specific features
135 #if defined(_MSC_VER) && !defined(__S3E__)
136 #	define PUGI__MSVC_CRT_VERSION _MSC_VER
137 #endif
138 
139 // Not all platforms have snprintf; we define a wrapper that uses snprintf if possible. This only works with buffers with a known size.
140 #if __cplusplus >= 201103
141 #	define PUGI__SNPRINTF(buf, ...) snprintf(buf, sizeof(buf), __VA_ARGS__)
142 #elif defined(PUGI__MSVC_CRT_VERSION) && PUGI__MSVC_CRT_VERSION >= 1400
143 #	define PUGI__SNPRINTF(buf, ...) _snprintf_s(buf, _countof(buf), _TRUNCATE, __VA_ARGS__)
144 #else
145 #	define PUGI__SNPRINTF sprintf
146 #endif
147 
148 // We put implementation details into an anonymous namespace in source mode, but have to keep it in non-anonymous namespace in header-only mode to prevent binary bloat.
149 #ifdef PUGIXML_HEADER_ONLY
150 #	define PUGI__NS_BEGIN namespace pugi { namespace impl {
151 #	define PUGI__NS_END } }
152 #	define PUGI__FN inline
153 #	define PUGI__FN_NO_INLINE inline
154 #else
155 #	if defined(_MSC_VER) && _MSC_VER < 1300 // MSVC6 seems to have an amusing bug with anonymous namespaces inside namespaces
156 #		define PUGI__NS_BEGIN namespace pugi { namespace impl {
157 #		define PUGI__NS_END } }
158 #	else
159 #		define PUGI__NS_BEGIN namespace pugi { namespace impl { namespace {
160 #		define PUGI__NS_END } } }
161 #	endif
162 #	define PUGI__FN
163 #	define PUGI__FN_NO_INLINE PUGI__NO_INLINE
164 #endif
165 
166 // uintptr_t
167 #if (defined(_MSC_VER) && _MSC_VER < 1600) || (defined(__BORLANDC__) && __BORLANDC__ < 0x561)
168 namespace pugi
169 {
170 #	ifndef _UINTPTR_T_DEFINED
171 	typedef size_t uintptr_t;
172 #	endif
173 
174 	typedef unsigned __int8 uint8_t;
175 	typedef unsigned __int16 uint16_t;
176 	typedef unsigned __int32 uint32_t;
177 }
178 #else
179 #	include <stdint.h>
180 #endif
181 
182 // Memory allocation
183 PUGI__NS_BEGIN
default_allocate(size_t size)184 	PUGI__FN void* default_allocate(size_t size)
185 	{
186 		return malloc(size);
187 	}
188 
default_deallocate(void * ptr)189 	PUGI__FN void default_deallocate(void* ptr)
190 	{
191 		free(ptr);
192 	}
193 
194 	template <typename T>
195 	struct xml_memory_management_function_storage
196 	{
197 		static allocation_function allocate;
198 		static deallocation_function deallocate;
199 	};
200 
201 	// Global allocation functions are stored in class statics so that in header mode linker deduplicates them
202 	// Without a template<> we'll get multiple definitions of the same static
203 	template <typename T> allocation_function xml_memory_management_function_storage<T>::allocate = default_allocate;
204 	template <typename T> deallocation_function xml_memory_management_function_storage<T>::deallocate = default_deallocate;
205 
206 	typedef xml_memory_management_function_storage<int> xml_memory;
207 PUGI__NS_END
208 
209 // String utilities
210 PUGI__NS_BEGIN
211 	// Get string length
strlength(const char_t * s)212 	PUGI__FN size_t strlength(const char_t* s)
213 	{
214 		assert(s);
215 
216 	#ifdef PUGIXML_WCHAR_MODE
217 		return wcslen(s);
218 	#else
219 		return strlen(s);
220 	#endif
221 	}
222 
223 	// Compare two strings
strequal(const char_t * src,const char_t * dst)224 	PUGI__FN bool strequal(const char_t* src, const char_t* dst)
225 	{
226 		assert(src && dst);
227 
228 	#ifdef PUGIXML_WCHAR_MODE
229 		return wcscmp(src, dst) == 0;
230 	#else
231 		return strcmp(src, dst) == 0;
232 	#endif
233 	}
234 
235 	// Compare lhs with [rhs_begin, rhs_end)
strequalrange(const char_t * lhs,const char_t * rhs,size_t count)236 	PUGI__FN bool strequalrange(const char_t* lhs, const char_t* rhs, size_t count)
237 	{
238 		for (size_t i = 0; i < count; ++i)
239 			if (lhs[i] != rhs[i])
240 				return false;
241 
242 		return lhs[count] == 0;
243 	}
244 
245 	// Get length of wide string, even if CRT lacks wide character support
strlength_wide(const wchar_t * s)246 	PUGI__FN size_t strlength_wide(const wchar_t* s)
247 	{
248 		assert(s);
249 
250 	#ifdef PUGIXML_WCHAR_MODE
251 		return wcslen(s);
252 	#else
253 		const wchar_t* end = s;
254 		while (*end) end++;
255 		return static_cast<size_t>(end - s);
256 	#endif
257 	}
258 PUGI__NS_END
259 
260 // auto_ptr-like object for exception recovery
261 PUGI__NS_BEGIN
262 	template <typename T> struct auto_deleter
263 	{
264 		typedef void (*D)(T*);
265 
266 		T* data;
267 		D deleter;
268 
auto_deleterauto_deleter269 		auto_deleter(T* data_, D deleter_): data(data_), deleter(deleter_)
270 		{
271 		}
272 
~auto_deleterauto_deleter273 		~auto_deleter()
274 		{
275 			if (data) deleter(data);
276 		}
277 
releaseauto_deleter278 		T* release()
279 		{
280 			T* result = data;
281 			data = 0;
282 			return result;
283 		}
284 	};
285 PUGI__NS_END
286 
287 #ifdef PUGIXML_COMPACT
288 PUGI__NS_BEGIN
289 	class compact_hash_table
290 	{
291 	public:
compact_hash_table()292 		compact_hash_table(): _items(0), _capacity(0), _count(0)
293 		{
294 		}
295 
clear()296 		void clear()
297 		{
298 			if (_items)
299 			{
300 				xml_memory::deallocate(_items);
301 				_items = 0;
302 				_capacity = 0;
303 				_count = 0;
304 			}
305 		}
306 
find(const void * key)307 		void* find(const void* key)
308 		{
309 			if (_capacity == 0) return 0;
310 
311 			item_t* item = get_item(key);
312 			assert(item);
313 			assert(item->key == key || (item->key == 0 && item->value == 0));
314 
315 			return item->value;
316 		}
317 
insert(const void * key,void * value)318 		void insert(const void* key, void* value)
319 		{
320 			assert(_capacity != 0 && _count < _capacity - _capacity / 4);
321 
322 			item_t* item = get_item(key);
323 			assert(item);
324 
325 			if (item->key == 0)
326 			{
327 				_count++;
328 				item->key = key;
329 			}
330 
331 			item->value = value;
332 		}
333 
reserve(size_t extra=16)334 		bool reserve(size_t extra = 16)
335 		{
336 			if (_count + extra >= _capacity - _capacity / 4)
337 				return rehash(_count + extra);
338 
339 			return true;
340 		}
341 
342 	private:
343 		struct item_t
344 		{
345 			const void* key;
346 			void* value;
347 		};
348 
349 		item_t* _items;
350 		size_t _capacity;
351 
352 		size_t _count;
353 
354 		bool rehash(size_t count);
355 
get_item(const void * key)356 		item_t* get_item(const void* key)
357 		{
358 			assert(key);
359 			assert(_capacity > 0);
360 
361 			size_t hashmod = _capacity - 1;
362 			size_t bucket = hash(key) & hashmod;
363 
364 			for (size_t probe = 0; probe <= hashmod; ++probe)
365 			{
366 				item_t& probe_item = _items[bucket];
367 
368 				if (probe_item.key == key || probe_item.key == 0)
369 					return &probe_item;
370 
371 				// hash collision, quadratic probing
372 				bucket = (bucket + probe + 1) & hashmod;
373 			}
374 
375 			assert(false && "Hash table is full"); // unreachable
376 			return 0;
377 		}
378 
hash(const void * key)379 		static PUGI__UNSIGNED_OVERFLOW unsigned int hash(const void* key)
380 		{
381 			unsigned int h = static_cast<unsigned int>(reinterpret_cast<uintptr_t>(key) & 0xffffffff);
382 
383 			// MurmurHash3 32-bit finalizer
384 			h ^= h >> 16;
385 			h *= 0x85ebca6bu;
386 			h ^= h >> 13;
387 			h *= 0xc2b2ae35u;
388 			h ^= h >> 16;
389 
390 			return h;
391 		}
392 	};
393 
rehash(size_t count)394 	PUGI__FN_NO_INLINE bool compact_hash_table::rehash(size_t count)
395 	{
396 		size_t capacity = 32;
397 		while (count >= capacity - capacity / 4)
398 			capacity *= 2;
399 
400 		compact_hash_table rt;
401 		rt._capacity = capacity;
402 		rt._items = static_cast<item_t*>(xml_memory::allocate(sizeof(item_t) * capacity));
403 
404 		if (!rt._items)
405 			return false;
406 
407 		memset(rt._items, 0, sizeof(item_t) * capacity);
408 
409 		for (size_t i = 0; i < _capacity; ++i)
410 			if (_items[i].key)
411 				rt.insert(_items[i].key, _items[i].value);
412 
413 		if (_items)
414 			xml_memory::deallocate(_items);
415 
416 		_capacity = capacity;
417 		_items = rt._items;
418 
419 		assert(_count == rt._count);
420 
421 		return true;
422 	}
423 
424 PUGI__NS_END
425 #endif
426 
427 PUGI__NS_BEGIN
428 #ifdef PUGIXML_COMPACT
429 	static const uintptr_t xml_memory_block_alignment = 4;
430 #else
431 	static const uintptr_t xml_memory_block_alignment = sizeof(void*);
432 #endif
433 
434 	// extra metadata bits
435 	static const uintptr_t xml_memory_page_contents_shared_mask = 64;
436 	static const uintptr_t xml_memory_page_name_allocated_mask = 32;
437 	static const uintptr_t xml_memory_page_value_allocated_mask = 16;
438 	static const uintptr_t xml_memory_page_type_mask = 15;
439 
440 	// combined masks for string uniqueness
441 	static const uintptr_t xml_memory_page_name_allocated_or_shared_mask = xml_memory_page_name_allocated_mask | xml_memory_page_contents_shared_mask;
442 	static const uintptr_t xml_memory_page_value_allocated_or_shared_mask = xml_memory_page_value_allocated_mask | xml_memory_page_contents_shared_mask;
443 
444 #ifdef PUGIXML_COMPACT
445 	#define PUGI__GETHEADER_IMPL(object, page, flags) // unused
446 	#define PUGI__GETPAGE_IMPL(header) (header).get_page()
447 #else
448 	#define PUGI__GETHEADER_IMPL(object, page, flags) (((reinterpret_cast<char*>(object) - reinterpret_cast<char*>(page)) << 8) | (flags))
449 	// this macro casts pointers through void* to avoid 'cast increases required alignment of target type' warnings
450 	#define PUGI__GETPAGE_IMPL(header) static_cast<impl::xml_memory_page*>(const_cast<void*>(static_cast<const void*>(reinterpret_cast<const char*>(&header) - (header >> 8))))
451 #endif
452 
453 	#define PUGI__GETPAGE(n) PUGI__GETPAGE_IMPL((n)->header)
454 	#define PUGI__NODETYPE(n) static_cast<xml_node_type>((n)->header & impl::xml_memory_page_type_mask)
455 
456 	struct xml_allocator;
457 
458 	struct xml_memory_page
459 	{
constructxml_memory_page460 		static xml_memory_page* construct(void* memory)
461 		{
462 			xml_memory_page* result = static_cast<xml_memory_page*>(memory);
463 
464 			result->allocator = 0;
465 			result->prev = 0;
466 			result->next = 0;
467 			result->busy_size = 0;
468 			result->freed_size = 0;
469 
470 		#ifdef PUGIXML_COMPACT
471 			result->compact_string_base = 0;
472 			result->compact_shared_parent = 0;
473 			result->compact_page_marker = 0;
474 		#endif
475 
476 			return result;
477 		}
478 
479 		xml_allocator* allocator;
480 
481 		xml_memory_page* prev;
482 		xml_memory_page* next;
483 
484 		size_t busy_size;
485 		size_t freed_size;
486 
487 	#ifdef PUGIXML_COMPACT
488 		char_t* compact_string_base;
489 		void* compact_shared_parent;
490 		uint32_t* compact_page_marker;
491 	#endif
492 	};
493 
494 	static const size_t xml_memory_page_size =
495 	#ifdef PUGIXML_MEMORY_PAGE_SIZE
496 		(PUGIXML_MEMORY_PAGE_SIZE)
497 	#else
498 		32768
499 	#endif
500 		- sizeof(xml_memory_page);
501 
502 	struct xml_memory_string_header
503 	{
504 		uint16_t page_offset; // offset from page->data
505 		uint16_t full_size; // 0 if string occupies whole page
506 	};
507 
508 	struct xml_allocator
509 	{
xml_allocatorxml_allocator510 		xml_allocator(xml_memory_page* root): _root(root), _busy_size(root->busy_size)
511 		{
512 		#ifdef PUGIXML_COMPACT
513 			_hash = 0;
514 		#endif
515 		}
516 
allocate_pagexml_allocator517 		xml_memory_page* allocate_page(size_t data_size)
518 		{
519 			size_t size = sizeof(xml_memory_page) + data_size;
520 
521 			// allocate block with some alignment, leaving memory for worst-case padding
522 			void* memory = xml_memory::allocate(size);
523 			if (!memory) return 0;
524 
525 			// prepare page structure
526 			xml_memory_page* page = xml_memory_page::construct(memory);
527 			assert(page);
528 
529 			page->allocator = _root->allocator;
530 
531 			return page;
532 		}
533 
deallocate_pagexml_allocator534 		static void deallocate_page(xml_memory_page* page)
535 		{
536 			xml_memory::deallocate(page);
537 		}
538 
539 		void* allocate_memory_oob(size_t size, xml_memory_page*& out_page);
540 
allocate_memoryxml_allocator541 		void* allocate_memory(size_t size, xml_memory_page*& out_page)
542 		{
543 			if (PUGI__UNLIKELY(_busy_size + size > xml_memory_page_size))
544 				return allocate_memory_oob(size, out_page);
545 
546 			void* buf = reinterpret_cast<char*>(_root) + sizeof(xml_memory_page) + _busy_size;
547 
548 			_busy_size += size;
549 
550 			out_page = _root;
551 
552 			return buf;
553 		}
554 
555 	#ifdef PUGIXML_COMPACT
allocate_objectxml_allocator556 		void* allocate_object(size_t size, xml_memory_page*& out_page)
557 		{
558 			void* result = allocate_memory(size + sizeof(uint32_t), out_page);
559 			if (!result) return 0;
560 
561 			// adjust for marker
562 			ptrdiff_t offset = static_cast<char*>(result) - reinterpret_cast<char*>(out_page->compact_page_marker);
563 
564 			if (PUGI__UNLIKELY(static_cast<uintptr_t>(offset) >= 256 * xml_memory_block_alignment))
565 			{
566 				// insert new marker
567 				uint32_t* marker = static_cast<uint32_t*>(result);
568 
569 				*marker = static_cast<uint32_t>(reinterpret_cast<char*>(marker) - reinterpret_cast<char*>(out_page));
570 				out_page->compact_page_marker = marker;
571 
572 				// since we don't reuse the page space until we reallocate it, we can just pretend that we freed the marker block
573 				// this will make sure deallocate_memory correctly tracks the size
574 				out_page->freed_size += sizeof(uint32_t);
575 
576 				return marker + 1;
577 			}
578 			else
579 			{
580 				// roll back uint32_t part
581 				_busy_size -= sizeof(uint32_t);
582 
583 				return result;
584 			}
585 		}
586 	#else
allocate_objectxml_allocator587 		void* allocate_object(size_t size, xml_memory_page*& out_page)
588 		{
589 			return allocate_memory(size, out_page);
590 		}
591 	#endif
592 
deallocate_memoryxml_allocator593 		void deallocate_memory(void* ptr, size_t size, xml_memory_page* page)
594 		{
595 			if (page == _root) page->busy_size = _busy_size;
596 
597 			assert(ptr >= reinterpret_cast<char*>(page) + sizeof(xml_memory_page) && ptr < reinterpret_cast<char*>(page) + sizeof(xml_memory_page) + page->busy_size);
598 			(void)!ptr;
599 
600 			page->freed_size += size;
601 			assert(page->freed_size <= page->busy_size);
602 
603 			if (page->freed_size == page->busy_size)
604 			{
605 				if (page->next == 0)
606 				{
607 					assert(_root == page);
608 
609 					// top page freed, just reset sizes
610 					page->busy_size = 0;
611 					page->freed_size = 0;
612 
613 				#ifdef PUGIXML_COMPACT
614 					// reset compact state to maximize efficiency
615 					page->compact_string_base = 0;
616 					page->compact_shared_parent = 0;
617 					page->compact_page_marker = 0;
618 				#endif
619 
620 					_busy_size = 0;
621 				}
622 				else
623 				{
624 					assert(_root != page);
625 					assert(page->prev);
626 
627 					// remove from the list
628 					page->prev->next = page->next;
629 					page->next->prev = page->prev;
630 
631 					// deallocate
632 					deallocate_page(page);
633 				}
634 			}
635 		}
636 
allocate_stringxml_allocator637 		char_t* allocate_string(size_t length)
638 		{
639 			static const size_t max_encoded_offset = (1 << 16) * xml_memory_block_alignment;
640 
641 			PUGI__STATIC_ASSERT(xml_memory_page_size <= max_encoded_offset);
642 
643 			// allocate memory for string and header block
644 			size_t size = sizeof(xml_memory_string_header) + length * sizeof(char_t);
645 
646 			// round size up to block alignment boundary
647 			size_t full_size = (size + (xml_memory_block_alignment - 1)) & ~(xml_memory_block_alignment - 1);
648 
649 			xml_memory_page* page;
650 			xml_memory_string_header* header = static_cast<xml_memory_string_header*>(allocate_memory(full_size, page));
651 
652 			if (!header) return 0;
653 
654 			// setup header
655 			ptrdiff_t page_offset = reinterpret_cast<char*>(header) - reinterpret_cast<char*>(page) - sizeof(xml_memory_page);
656 
657 			assert(page_offset % xml_memory_block_alignment == 0);
658 			assert(page_offset >= 0 && static_cast<size_t>(page_offset) < max_encoded_offset);
659 			header->page_offset = static_cast<uint16_t>(static_cast<size_t>(page_offset) / xml_memory_block_alignment);
660 
661 			// full_size == 0 for large strings that occupy the whole page
662 			assert(full_size % xml_memory_block_alignment == 0);
663 			assert(full_size < max_encoded_offset || (page->busy_size == full_size && page_offset == 0));
664 			header->full_size = static_cast<uint16_t>(full_size < max_encoded_offset ? full_size / xml_memory_block_alignment : 0);
665 
666 			// round-trip through void* to avoid 'cast increases required alignment of target type' warning
667 			// header is guaranteed a pointer-sized alignment, which should be enough for char_t
668 			return static_cast<char_t*>(static_cast<void*>(header + 1));
669 		}
670 
deallocate_stringxml_allocator671 		void deallocate_string(char_t* string)
672 		{
673 			// this function casts pointers through void* to avoid 'cast increases required alignment of target type' warnings
674 			// we're guaranteed the proper (pointer-sized) alignment on the input string if it was allocated via allocate_string
675 
676 			// get header
677 			xml_memory_string_header* header = static_cast<xml_memory_string_header*>(static_cast<void*>(string)) - 1;
678 			assert(header);
679 
680 			// deallocate
681 			size_t page_offset = sizeof(xml_memory_page) + header->page_offset * xml_memory_block_alignment;
682 			xml_memory_page* page = reinterpret_cast<xml_memory_page*>(static_cast<void*>(reinterpret_cast<char*>(header) - page_offset));
683 
684 			// if full_size == 0 then this string occupies the whole page
685 			size_t full_size = header->full_size == 0 ? page->busy_size : header->full_size * xml_memory_block_alignment;
686 
687 			deallocate_memory(header, full_size, page);
688 		}
689 
reservexml_allocator690 		bool reserve()
691 		{
692 		#ifdef PUGIXML_COMPACT
693 			return _hash->reserve();
694 		#else
695 			return true;
696 		#endif
697 		}
698 
699 		xml_memory_page* _root;
700 		size_t _busy_size;
701 
702 	#ifdef PUGIXML_COMPACT
703 		compact_hash_table* _hash;
704 	#endif
705 	};
706 
allocate_memory_oob(size_t size,xml_memory_page * & out_page)707 	PUGI__FN_NO_INLINE void* xml_allocator::allocate_memory_oob(size_t size, xml_memory_page*& out_page)
708 	{
709 		const size_t large_allocation_threshold = xml_memory_page_size / 4;
710 
711 		xml_memory_page* page = allocate_page(size <= large_allocation_threshold ? xml_memory_page_size : size);
712 		out_page = page;
713 
714 		if (!page) return 0;
715 
716 		if (size <= large_allocation_threshold)
717 		{
718 			_root->busy_size = _busy_size;
719 
720 			// insert page at the end of linked list
721 			page->prev = _root;
722 			_root->next = page;
723 			_root = page;
724 
725 			_busy_size = size;
726 		}
727 		else
728 		{
729 			// insert page before the end of linked list, so that it is deleted as soon as possible
730 			// the last page is not deleted even if it's empty (see deallocate_memory)
731 			assert(_root->prev);
732 
733 			page->prev = _root->prev;
734 			page->next = _root;
735 
736 			_root->prev->next = page;
737 			_root->prev = page;
738 
739 			page->busy_size = size;
740 		}
741 
742 		return reinterpret_cast<char*>(page) + sizeof(xml_memory_page);
743 	}
744 PUGI__NS_END
745 
746 #ifdef PUGIXML_COMPACT
747 PUGI__NS_BEGIN
748 	static const uintptr_t compact_alignment_log2 = 2;
749 	static const uintptr_t compact_alignment = 1 << compact_alignment_log2;
750 
751 	class compact_header
752 	{
753 	public:
compact_header(xml_memory_page * page,unsigned int flags)754 		compact_header(xml_memory_page* page, unsigned int flags)
755 		{
756 			PUGI__STATIC_ASSERT(xml_memory_block_alignment == compact_alignment);
757 
758 			ptrdiff_t offset = (reinterpret_cast<char*>(this) - reinterpret_cast<char*>(page->compact_page_marker));
759 			assert(offset % compact_alignment == 0 && static_cast<uintptr_t>(offset) < 256 * compact_alignment);
760 
761 			_page = static_cast<unsigned char>(offset >> compact_alignment_log2);
762 			_flags = static_cast<unsigned char>(flags);
763 		}
764 
operator &=(uintptr_t mod)765 		void operator&=(uintptr_t mod)
766 		{
767 			_flags &= static_cast<unsigned char>(mod);
768 		}
769 
operator |=(uintptr_t mod)770 		void operator|=(uintptr_t mod)
771 		{
772 			_flags |= static_cast<unsigned char>(mod);
773 		}
774 
operator &(uintptr_t mod) const775 		uintptr_t operator&(uintptr_t mod) const
776 		{
777 			return _flags & mod;
778 		}
779 
get_page() const780 		xml_memory_page* get_page() const
781 		{
782 			// round-trip through void* to silence 'cast increases required alignment of target type' warnings
783 			const char* page_marker = reinterpret_cast<const char*>(this) - (_page << compact_alignment_log2);
784 			const char* page = page_marker - *reinterpret_cast<const uint32_t*>(static_cast<const void*>(page_marker));
785 
786 			return const_cast<xml_memory_page*>(reinterpret_cast<const xml_memory_page*>(static_cast<const void*>(page)));
787 		}
788 
789 	private:
790 		unsigned char _page;
791 		unsigned char _flags;
792 	};
793 
compact_get_page(const void * object,int header_offset)794 	PUGI__FN xml_memory_page* compact_get_page(const void* object, int header_offset)
795 	{
796 		const compact_header* header = reinterpret_cast<const compact_header*>(static_cast<const char*>(object) - header_offset);
797 
798 		return header->get_page();
799 	}
800 
compact_get_value(const void * object)801 	template <int header_offset, typename T> PUGI__FN_NO_INLINE T* compact_get_value(const void* object)
802 	{
803 		return static_cast<T*>(compact_get_page(object, header_offset)->allocator->_hash->find(object));
804 	}
805 
compact_set_value(const void * object,T * value)806 	template <int header_offset, typename T> PUGI__FN_NO_INLINE void compact_set_value(const void* object, T* value)
807 	{
808 		compact_get_page(object, header_offset)->allocator->_hash->insert(object, value);
809 	}
810 
811 	template <typename T, int header_offset, int start = -126> class compact_pointer
812 	{
813 	public:
compact_pointer()814 		compact_pointer(): _data(0)
815 		{
816 		}
817 
operator =(const compact_pointer & rhs)818 		void operator=(const compact_pointer& rhs)
819 		{
820 			*this = rhs + 0;
821 		}
822 
operator =(T * value)823 		void operator=(T* value)
824 		{
825 			if (value)
826 			{
827 				// value is guaranteed to be compact-aligned; 'this' is not
828 				// our decoding is based on 'this' aligned to compact alignment downwards (see operator T*)
829 				// so for negative offsets (e.g. -3) we need to adjust the diff by compact_alignment - 1 to
830 				// compensate for arithmetic shift rounding for negative values
831 				ptrdiff_t diff = reinterpret_cast<char*>(value) - reinterpret_cast<char*>(this);
832 				ptrdiff_t offset = ((diff + int(compact_alignment - 1)) >> compact_alignment_log2) - start;
833 
834 				if (static_cast<uintptr_t>(offset) <= 253)
835 					_data = static_cast<unsigned char>(offset + 1);
836 				else
837 				{
838 					compact_set_value<header_offset>(this, value);
839 
840 					_data = 255;
841 				}
842 			}
843 			else
844 				_data = 0;
845 		}
846 
operator T*() const847 		operator T*() const
848 		{
849 			if (_data)
850 			{
851 				if (_data < 255)
852 				{
853 					uintptr_t base = reinterpret_cast<uintptr_t>(this) & ~(compact_alignment - 1);
854 
855 					return reinterpret_cast<T*>(base + (_data - 1 + start) * compact_alignment);
856 				}
857 				else
858 					return compact_get_value<header_offset, T>(this);
859 			}
860 			else
861 				return 0;
862 		}
863 
operator ->() const864 		T* operator->() const
865 		{
866 			return *this;
867 		}
868 
869 	private:
870 		unsigned char _data;
871 	};
872 
873 	template <typename T, int header_offset> class compact_pointer_parent
874 	{
875 	public:
compact_pointer_parent()876 		compact_pointer_parent(): _data(0)
877 		{
878 		}
879 
operator =(const compact_pointer_parent & rhs)880 		void operator=(const compact_pointer_parent& rhs)
881 		{
882 			*this = rhs + 0;
883 		}
884 
operator =(T * value)885 		void operator=(T* value)
886 		{
887 			if (value)
888 			{
889 				// value is guaranteed to be compact-aligned; 'this' is not
890 				// our decoding is based on 'this' aligned to compact alignment downwards (see operator T*)
891 				// so for negative offsets (e.g. -3) we need to adjust the diff by compact_alignment - 1 to
892 				// compensate for arithmetic shift behavior for negative values
893 				ptrdiff_t diff = reinterpret_cast<char*>(value) - reinterpret_cast<char*>(this);
894 				ptrdiff_t offset = ((diff + int(compact_alignment - 1)) >> compact_alignment_log2) + 65533;
895 
896 				if (static_cast<uintptr_t>(offset) <= 65533)
897 				{
898 					_data = static_cast<unsigned short>(offset + 1);
899 				}
900 				else
901 				{
902 					xml_memory_page* page = compact_get_page(this, header_offset);
903 
904 					if (PUGI__UNLIKELY(page->compact_shared_parent == 0))
905 						page->compact_shared_parent = value;
906 
907 					if (page->compact_shared_parent == value)
908 					{
909 						_data = 65534;
910 					}
911 					else
912 					{
913 						compact_set_value<header_offset>(this, value);
914 
915 						_data = 65535;
916 					}
917 				}
918 			}
919 			else
920 			{
921 				_data = 0;
922 			}
923 		}
924 
operator T*() const925 		operator T*() const
926 		{
927 			if (_data)
928 			{
929 				if (_data < 65534)
930 				{
931 					uintptr_t base = reinterpret_cast<uintptr_t>(this) & ~(compact_alignment - 1);
932 
933 					return reinterpret_cast<T*>(base + (_data - 1 - 65533) * compact_alignment);
934 				}
935 				else if (_data == 65534)
936 					return static_cast<T*>(compact_get_page(this, header_offset)->compact_shared_parent);
937 				else
938 					return compact_get_value<header_offset, T>(this);
939 			}
940 			else
941 				return 0;
942 		}
943 
operator ->() const944 		T* operator->() const
945 		{
946 			return *this;
947 		}
948 
949 	private:
950 		uint16_t _data;
951 	};
952 
953 	template <int header_offset, int base_offset> class compact_string
954 	{
955 	public:
compact_string()956 		compact_string(): _data(0)
957 		{
958 		}
959 
operator =(const compact_string & rhs)960 		void operator=(const compact_string& rhs)
961 		{
962 			*this = rhs + 0;
963 		}
964 
operator =(char_t * value)965 		void operator=(char_t* value)
966 		{
967 			if (value)
968 			{
969 				xml_memory_page* page = compact_get_page(this, header_offset);
970 
971 				if (PUGI__UNLIKELY(page->compact_string_base == 0))
972 					page->compact_string_base = value;
973 
974 				ptrdiff_t offset = value - page->compact_string_base;
975 
976 				if (static_cast<uintptr_t>(offset) < (65535 << 7))
977 				{
978 					// round-trip through void* to silence 'cast increases required alignment of target type' warnings
979 					uint16_t* base = reinterpret_cast<uint16_t*>(static_cast<void*>(reinterpret_cast<char*>(this) - base_offset));
980 
981 					if (*base == 0)
982 					{
983 						*base = static_cast<uint16_t>((offset >> 7) + 1);
984 						_data = static_cast<unsigned char>((offset & 127) + 1);
985 					}
986 					else
987 					{
988 						ptrdiff_t remainder = offset - ((*base - 1) << 7);
989 
990 						if (static_cast<uintptr_t>(remainder) <= 253)
991 						{
992 							_data = static_cast<unsigned char>(remainder + 1);
993 						}
994 						else
995 						{
996 							compact_set_value<header_offset>(this, value);
997 
998 							_data = 255;
999 						}
1000 					}
1001 				}
1002 				else
1003 				{
1004 					compact_set_value<header_offset>(this, value);
1005 
1006 					_data = 255;
1007 				}
1008 			}
1009 			else
1010 			{
1011 				_data = 0;
1012 			}
1013 		}
1014 
operator char_t*() const1015 		operator char_t*() const
1016 		{
1017 			if (_data)
1018 			{
1019 				if (_data < 255)
1020 				{
1021 					xml_memory_page* page = compact_get_page(this, header_offset);
1022 
1023 					// round-trip through void* to silence 'cast increases required alignment of target type' warnings
1024 					const uint16_t* base = reinterpret_cast<const uint16_t*>(static_cast<const void*>(reinterpret_cast<const char*>(this) - base_offset));
1025 					assert(*base);
1026 
1027 					ptrdiff_t offset = ((*base - 1) << 7) + (_data - 1);
1028 
1029 					return page->compact_string_base + offset;
1030 				}
1031 				else
1032 				{
1033 					return compact_get_value<header_offset, char_t>(this);
1034 				}
1035 			}
1036 			else
1037 				return 0;
1038 		}
1039 
1040 	private:
1041 		unsigned char _data;
1042 	};
1043 PUGI__NS_END
1044 #endif
1045 
1046 #ifdef PUGIXML_COMPACT
1047 namespace pugi
1048 {
1049 	struct xml_attribute_struct
1050 	{
xml_attribute_structpugi::xml_attribute_struct1051 		xml_attribute_struct(impl::xml_memory_page* page): header(page, 0), namevalue_base(0)
1052 		{
1053 			PUGI__STATIC_ASSERT(sizeof(xml_attribute_struct) == 8);
1054 		}
1055 
1056 		impl::compact_header header;
1057 
1058 		uint16_t namevalue_base;
1059 
1060 		impl::compact_string<4, 2> name;
1061 		impl::compact_string<5, 3> value;
1062 
1063 		impl::compact_pointer<xml_attribute_struct, 6> prev_attribute_c;
1064 		impl::compact_pointer<xml_attribute_struct, 7, 0> next_attribute;
1065 	};
1066 
1067 	struct xml_node_struct
1068 	{
xml_node_structpugi::xml_node_struct1069 		xml_node_struct(impl::xml_memory_page* page, xml_node_type type): header(page, type), namevalue_base(0)
1070 		{
1071 			PUGI__STATIC_ASSERT(sizeof(xml_node_struct) == 12);
1072 		}
1073 
1074 		impl::compact_header header;
1075 
1076 		uint16_t namevalue_base;
1077 
1078 		impl::compact_string<4, 2> name;
1079 		impl::compact_string<5, 3> value;
1080 
1081 		impl::compact_pointer_parent<xml_node_struct, 6> parent;
1082 
1083 		impl::compact_pointer<xml_node_struct, 8, 0> first_child;
1084 
1085 		impl::compact_pointer<xml_node_struct,  9>    prev_sibling_c;
1086 		impl::compact_pointer<xml_node_struct, 10, 0> next_sibling;
1087 
1088 		impl::compact_pointer<xml_attribute_struct, 11, 0> first_attribute;
1089 	};
1090 }
1091 #else
1092 namespace pugi
1093 {
1094 	struct xml_attribute_struct
1095 	{
1096 		xml_attribute_struct(impl::xml_memory_page* page): name(0), value(0), prev_attribute_c(0), next_attribute(0)
1097 		{
1098 			header = PUGI__GETHEADER_IMPL(this, page, 0);
1099 		}
1100 
1101 		uintptr_t header;
1102 
1103 		char_t*	name;
1104 		char_t*	value;
1105 
1106 		xml_attribute_struct* prev_attribute_c;
1107 		xml_attribute_struct* next_attribute;
1108 	};
1109 
1110 	struct xml_node_struct
1111 	{
1112 		xml_node_struct(impl::xml_memory_page* page, xml_node_type type): name(0), value(0), parent(0), first_child(0), prev_sibling_c(0), next_sibling(0), first_attribute(0)
1113 		{
1114 			header = PUGI__GETHEADER_IMPL(this, page, type);
1115 		}
1116 
1117 		uintptr_t header;
1118 
1119 		char_t* name;
1120 		char_t* value;
1121 
1122 		xml_node_struct* parent;
1123 
1124 		xml_node_struct* first_child;
1125 
1126 		xml_node_struct* prev_sibling_c;
1127 		xml_node_struct* next_sibling;
1128 
1129 		xml_attribute_struct* first_attribute;
1130 	};
1131 }
1132 #endif
1133 
1134 PUGI__NS_BEGIN
1135 	struct xml_extra_buffer
1136 	{
1137 		char_t* buffer;
1138 		xml_extra_buffer* next;
1139 	};
1140 
1141 	struct xml_document_struct: public xml_node_struct, public xml_allocator
1142 	{
xml_document_structxml_document_struct1143 		xml_document_struct(xml_memory_page* page): xml_node_struct(page, node_document), xml_allocator(page), buffer(0), extra_buffers(0)
1144 		{
1145 		}
1146 
1147 		const char_t* buffer;
1148 
1149 		xml_extra_buffer* extra_buffers;
1150 
1151 	#ifdef PUGIXML_COMPACT
1152 		compact_hash_table hash;
1153 	#endif
1154 	};
1155 
get_allocator(const Object * object)1156 	template <typename Object> inline xml_allocator& get_allocator(const Object* object)
1157 	{
1158 		assert(object);
1159 
1160 		return *PUGI__GETPAGE(object)->allocator;
1161 	}
1162 
get_document(const Object * object)1163 	template <typename Object> inline xml_document_struct& get_document(const Object* object)
1164 	{
1165 		assert(object);
1166 
1167 		return *static_cast<xml_document_struct*>(PUGI__GETPAGE(object)->allocator);
1168 	}
1169 PUGI__NS_END
1170 
1171 // Low-level DOM operations
1172 PUGI__NS_BEGIN
allocate_attribute(xml_allocator & alloc)1173 	inline xml_attribute_struct* allocate_attribute(xml_allocator& alloc)
1174 	{
1175 		xml_memory_page* page;
1176 		void* memory = alloc.allocate_object(sizeof(xml_attribute_struct), page);
1177 		if (!memory) return 0;
1178 
1179 		return new (memory) xml_attribute_struct(page);
1180 	}
1181 
allocate_node(xml_allocator & alloc,xml_node_type type)1182 	inline xml_node_struct* allocate_node(xml_allocator& alloc, xml_node_type type)
1183 	{
1184 		xml_memory_page* page;
1185 		void* memory = alloc.allocate_object(sizeof(xml_node_struct), page);
1186 		if (!memory) return 0;
1187 
1188 		return new (memory) xml_node_struct(page, type);
1189 	}
1190 
destroy_attribute(xml_attribute_struct * a,xml_allocator & alloc)1191 	inline void destroy_attribute(xml_attribute_struct* a, xml_allocator& alloc)
1192 	{
1193 		if (a->header & impl::xml_memory_page_name_allocated_mask)
1194 			alloc.deallocate_string(a->name);
1195 
1196 		if (a->header & impl::xml_memory_page_value_allocated_mask)
1197 			alloc.deallocate_string(a->value);
1198 
1199 		alloc.deallocate_memory(a, sizeof(xml_attribute_struct), PUGI__GETPAGE(a));
1200 	}
1201 
destroy_node(xml_node_struct * n,xml_allocator & alloc)1202 	inline void destroy_node(xml_node_struct* n, xml_allocator& alloc)
1203 	{
1204 		if (n->header & impl::xml_memory_page_name_allocated_mask)
1205 			alloc.deallocate_string(n->name);
1206 
1207 		if (n->header & impl::xml_memory_page_value_allocated_mask)
1208 			alloc.deallocate_string(n->value);
1209 
1210 		for (xml_attribute_struct* attr = n->first_attribute; attr; )
1211 		{
1212 			xml_attribute_struct* next = attr->next_attribute;
1213 
1214 			destroy_attribute(attr, alloc);
1215 
1216 			attr = next;
1217 		}
1218 
1219 		for (xml_node_struct* child = n->first_child; child; )
1220 		{
1221 			xml_node_struct* next = child->next_sibling;
1222 
1223 			destroy_node(child, alloc);
1224 
1225 			child = next;
1226 		}
1227 
1228 		alloc.deallocate_memory(n, sizeof(xml_node_struct), PUGI__GETPAGE(n));
1229 	}
1230 
append_node(xml_node_struct * child,xml_node_struct * node)1231 	inline void append_node(xml_node_struct* child, xml_node_struct* node)
1232 	{
1233 		child->parent = node;
1234 
1235 		xml_node_struct* head = node->first_child;
1236 
1237 		if (head)
1238 		{
1239 			xml_node_struct* tail = head->prev_sibling_c;
1240 
1241 			tail->next_sibling = child;
1242 			child->prev_sibling_c = tail;
1243 			head->prev_sibling_c = child;
1244 		}
1245 		else
1246 		{
1247 			node->first_child = child;
1248 			child->prev_sibling_c = child;
1249 		}
1250 	}
1251 
prepend_node(xml_node_struct * child,xml_node_struct * node)1252 	inline void prepend_node(xml_node_struct* child, xml_node_struct* node)
1253 	{
1254 		child->parent = node;
1255 
1256 		xml_node_struct* head = node->first_child;
1257 
1258 		if (head)
1259 		{
1260 			child->prev_sibling_c = head->prev_sibling_c;
1261 			head->prev_sibling_c = child;
1262 		}
1263 		else
1264 			child->prev_sibling_c = child;
1265 
1266 		child->next_sibling = head;
1267 		node->first_child = child;
1268 	}
1269 
insert_node_after(xml_node_struct * child,xml_node_struct * node)1270 	inline void insert_node_after(xml_node_struct* child, xml_node_struct* node)
1271 	{
1272 		xml_node_struct* parent = node->parent;
1273 
1274 		child->parent = parent;
1275 
1276 		if (node->next_sibling)
1277 			node->next_sibling->prev_sibling_c = child;
1278 		else
1279 			parent->first_child->prev_sibling_c = child;
1280 
1281 		child->next_sibling = node->next_sibling;
1282 		child->prev_sibling_c = node;
1283 
1284 		node->next_sibling = child;
1285 	}
1286 
insert_node_before(xml_node_struct * child,xml_node_struct * node)1287 	inline void insert_node_before(xml_node_struct* child, xml_node_struct* node)
1288 	{
1289 		xml_node_struct* parent = node->parent;
1290 
1291 		child->parent = parent;
1292 
1293 		if (node->prev_sibling_c->next_sibling)
1294 			node->prev_sibling_c->next_sibling = child;
1295 		else
1296 			parent->first_child = child;
1297 
1298 		child->prev_sibling_c = node->prev_sibling_c;
1299 		child->next_sibling = node;
1300 
1301 		node->prev_sibling_c = child;
1302 	}
1303 
remove_node(xml_node_struct * node)1304 	inline void remove_node(xml_node_struct* node)
1305 	{
1306 		xml_node_struct* parent = node->parent;
1307 
1308 		if (node->next_sibling)
1309 			node->next_sibling->prev_sibling_c = node->prev_sibling_c;
1310 		else
1311 			parent->first_child->prev_sibling_c = node->prev_sibling_c;
1312 
1313 		if (node->prev_sibling_c->next_sibling)
1314 			node->prev_sibling_c->next_sibling = node->next_sibling;
1315 		else
1316 			parent->first_child = node->next_sibling;
1317 
1318 		node->parent = 0;
1319 		node->prev_sibling_c = 0;
1320 		node->next_sibling = 0;
1321 	}
1322 
append_attribute(xml_attribute_struct * attr,xml_node_struct * node)1323 	inline void append_attribute(xml_attribute_struct* attr, xml_node_struct* node)
1324 	{
1325 		xml_attribute_struct* head = node->first_attribute;
1326 
1327 		if (head)
1328 		{
1329 			xml_attribute_struct* tail = head->prev_attribute_c;
1330 
1331 			tail->next_attribute = attr;
1332 			attr->prev_attribute_c = tail;
1333 			head->prev_attribute_c = attr;
1334 		}
1335 		else
1336 		{
1337 			node->first_attribute = attr;
1338 			attr->prev_attribute_c = attr;
1339 		}
1340 	}
1341 
prepend_attribute(xml_attribute_struct * attr,xml_node_struct * node)1342 	inline void prepend_attribute(xml_attribute_struct* attr, xml_node_struct* node)
1343 	{
1344 		xml_attribute_struct* head = node->first_attribute;
1345 
1346 		if (head)
1347 		{
1348 			attr->prev_attribute_c = head->prev_attribute_c;
1349 			head->prev_attribute_c = attr;
1350 		}
1351 		else
1352 			attr->prev_attribute_c = attr;
1353 
1354 		attr->next_attribute = head;
1355 		node->first_attribute = attr;
1356 	}
1357 
insert_attribute_after(xml_attribute_struct * attr,xml_attribute_struct * place,xml_node_struct * node)1358 	inline void insert_attribute_after(xml_attribute_struct* attr, xml_attribute_struct* place, xml_node_struct* node)
1359 	{
1360 		if (place->next_attribute)
1361 			place->next_attribute->prev_attribute_c = attr;
1362 		else
1363 			node->first_attribute->prev_attribute_c = attr;
1364 
1365 		attr->next_attribute = place->next_attribute;
1366 		attr->prev_attribute_c = place;
1367 		place->next_attribute = attr;
1368 	}
1369 
insert_attribute_before(xml_attribute_struct * attr,xml_attribute_struct * place,xml_node_struct * node)1370 	inline void insert_attribute_before(xml_attribute_struct* attr, xml_attribute_struct* place, xml_node_struct* node)
1371 	{
1372 		if (place->prev_attribute_c->next_attribute)
1373 			place->prev_attribute_c->next_attribute = attr;
1374 		else
1375 			node->first_attribute = attr;
1376 
1377 		attr->prev_attribute_c = place->prev_attribute_c;
1378 		attr->next_attribute = place;
1379 		place->prev_attribute_c = attr;
1380 	}
1381 
remove_attribute(xml_attribute_struct * attr,xml_node_struct * node)1382 	inline void remove_attribute(xml_attribute_struct* attr, xml_node_struct* node)
1383 	{
1384 		if (attr->next_attribute)
1385 			attr->next_attribute->prev_attribute_c = attr->prev_attribute_c;
1386 		else
1387 			node->first_attribute->prev_attribute_c = attr->prev_attribute_c;
1388 
1389 		if (attr->prev_attribute_c->next_attribute)
1390 			attr->prev_attribute_c->next_attribute = attr->next_attribute;
1391 		else
1392 			node->first_attribute = attr->next_attribute;
1393 
1394 		attr->prev_attribute_c = 0;
1395 		attr->next_attribute = 0;
1396 	}
1397 
append_new_node(xml_node_struct * node,xml_allocator & alloc,xml_node_type type=node_element)1398 	PUGI__FN_NO_INLINE xml_node_struct* append_new_node(xml_node_struct* node, xml_allocator& alloc, xml_node_type type = node_element)
1399 	{
1400 		if (!alloc.reserve()) return 0;
1401 
1402 		xml_node_struct* child = allocate_node(alloc, type);
1403 		if (!child) return 0;
1404 
1405 		append_node(child, node);
1406 
1407 		return child;
1408 	}
1409 
append_new_attribute(xml_node_struct * node,xml_allocator & alloc)1410 	PUGI__FN_NO_INLINE xml_attribute_struct* append_new_attribute(xml_node_struct* node, xml_allocator& alloc)
1411 	{
1412 		if (!alloc.reserve()) return 0;
1413 
1414 		xml_attribute_struct* attr = allocate_attribute(alloc);
1415 		if (!attr) return 0;
1416 
1417 		append_attribute(attr, node);
1418 
1419 		return attr;
1420 	}
1421 PUGI__NS_END
1422 
1423 // Helper classes for code generation
1424 PUGI__NS_BEGIN
1425 	struct opt_false
1426 	{
1427 		enum { value = 0 };
1428 	};
1429 
1430 	struct opt_true
1431 	{
1432 		enum { value = 1 };
1433 	};
1434 PUGI__NS_END
1435 
1436 // Unicode utilities
1437 PUGI__NS_BEGIN
endian_swap(uint16_t value)1438 	inline uint16_t endian_swap(uint16_t value)
1439 	{
1440 		return static_cast<uint16_t>(((value & 0xff) << 8) | (value >> 8));
1441 	}
1442 
endian_swap(uint32_t value)1443 	inline uint32_t endian_swap(uint32_t value)
1444 	{
1445 		return ((value & 0xff) << 24) | ((value & 0xff00) << 8) | ((value & 0xff0000) >> 8) | (value >> 24);
1446 	}
1447 
1448 	struct utf8_counter
1449 	{
1450 		typedef size_t value_type;
1451 
lowutf8_counter1452 		static value_type low(value_type result, uint32_t ch)
1453 		{
1454 			// U+0000..U+007F
1455 			if (ch < 0x80) return result + 1;
1456 			// U+0080..U+07FF
1457 			else if (ch < 0x800) return result + 2;
1458 			// U+0800..U+FFFF
1459 			else return result + 3;
1460 		}
1461 
highutf8_counter1462 		static value_type high(value_type result, uint32_t)
1463 		{
1464 			// U+10000..U+10FFFF
1465 			return result + 4;
1466 		}
1467 	};
1468 
1469 	struct utf8_writer
1470 	{
1471 		typedef uint8_t* value_type;
1472 
lowutf8_writer1473 		static value_type low(value_type result, uint32_t ch)
1474 		{
1475 			// U+0000..U+007F
1476 			if (ch < 0x80)
1477 			{
1478 				*result = static_cast<uint8_t>(ch);
1479 				return result + 1;
1480 			}
1481 			// U+0080..U+07FF
1482 			else if (ch < 0x800)
1483 			{
1484 				result[0] = static_cast<uint8_t>(0xC0 | (ch >> 6));
1485 				result[1] = static_cast<uint8_t>(0x80 | (ch & 0x3F));
1486 				return result + 2;
1487 			}
1488 			// U+0800..U+FFFF
1489 			else
1490 			{
1491 				result[0] = static_cast<uint8_t>(0xE0 | (ch >> 12));
1492 				result[1] = static_cast<uint8_t>(0x80 | ((ch >> 6) & 0x3F));
1493 				result[2] = static_cast<uint8_t>(0x80 | (ch & 0x3F));
1494 				return result + 3;
1495 			}
1496 		}
1497 
highutf8_writer1498 		static value_type high(value_type result, uint32_t ch)
1499 		{
1500 			// U+10000..U+10FFFF
1501 			result[0] = static_cast<uint8_t>(0xF0 | (ch >> 18));
1502 			result[1] = static_cast<uint8_t>(0x80 | ((ch >> 12) & 0x3F));
1503 			result[2] = static_cast<uint8_t>(0x80 | ((ch >> 6) & 0x3F));
1504 			result[3] = static_cast<uint8_t>(0x80 | (ch & 0x3F));
1505 			return result + 4;
1506 		}
1507 
anyutf8_writer1508 		static value_type any(value_type result, uint32_t ch)
1509 		{
1510 			return (ch < 0x10000) ? low(result, ch) : high(result, ch);
1511 		}
1512 	};
1513 
1514 	struct utf16_counter
1515 	{
1516 		typedef size_t value_type;
1517 
lowutf16_counter1518 		static value_type low(value_type result, uint32_t)
1519 		{
1520 			return result + 1;
1521 		}
1522 
highutf16_counter1523 		static value_type high(value_type result, uint32_t)
1524 		{
1525 			return result + 2;
1526 		}
1527 	};
1528 
1529 	struct utf16_writer
1530 	{
1531 		typedef uint16_t* value_type;
1532 
lowutf16_writer1533 		static value_type low(value_type result, uint32_t ch)
1534 		{
1535 			*result = static_cast<uint16_t>(ch);
1536 
1537 			return result + 1;
1538 		}
1539 
highutf16_writer1540 		static value_type high(value_type result, uint32_t ch)
1541 		{
1542 			uint32_t msh = static_cast<uint32_t>(ch - 0x10000) >> 10;
1543 			uint32_t lsh = static_cast<uint32_t>(ch - 0x10000) & 0x3ff;
1544 
1545 			result[0] = static_cast<uint16_t>(0xD800 + msh);
1546 			result[1] = static_cast<uint16_t>(0xDC00 + lsh);
1547 
1548 			return result + 2;
1549 		}
1550 
anyutf16_writer1551 		static value_type any(value_type result, uint32_t ch)
1552 		{
1553 			return (ch < 0x10000) ? low(result, ch) : high(result, ch);
1554 		}
1555 	};
1556 
1557 	struct utf32_counter
1558 	{
1559 		typedef size_t value_type;
1560 
lowutf32_counter1561 		static value_type low(value_type result, uint32_t)
1562 		{
1563 			return result + 1;
1564 		}
1565 
highutf32_counter1566 		static value_type high(value_type result, uint32_t)
1567 		{
1568 			return result + 1;
1569 		}
1570 	};
1571 
1572 	struct utf32_writer
1573 	{
1574 		typedef uint32_t* value_type;
1575 
lowutf32_writer1576 		static value_type low(value_type result, uint32_t ch)
1577 		{
1578 			*result = ch;
1579 
1580 			return result + 1;
1581 		}
1582 
highutf32_writer1583 		static value_type high(value_type result, uint32_t ch)
1584 		{
1585 			*result = ch;
1586 
1587 			return result + 1;
1588 		}
1589 
anyutf32_writer1590 		static value_type any(value_type result, uint32_t ch)
1591 		{
1592 			*result = ch;
1593 
1594 			return result + 1;
1595 		}
1596 	};
1597 
1598 	struct latin1_writer
1599 	{
1600 		typedef uint8_t* value_type;
1601 
lowlatin1_writer1602 		static value_type low(value_type result, uint32_t ch)
1603 		{
1604 			*result = static_cast<uint8_t>(ch > 255 ? '?' : ch);
1605 
1606 			return result + 1;
1607 		}
1608 
highlatin1_writer1609 		static value_type high(value_type result, uint32_t ch)
1610 		{
1611 			(void)ch;
1612 
1613 			*result = '?';
1614 
1615 			return result + 1;
1616 		}
1617 	};
1618 
1619 	struct utf8_decoder
1620 	{
1621 		typedef uint8_t type;
1622 
processutf8_decoder1623 		template <typename Traits> static inline typename Traits::value_type process(const uint8_t* data, size_t size, typename Traits::value_type result, Traits)
1624 		{
1625 			const uint8_t utf8_byte_mask = 0x3f;
1626 
1627 			while (size)
1628 			{
1629 				uint8_t lead = *data;
1630 
1631 				// 0xxxxxxx -> U+0000..U+007F
1632 				if (lead < 0x80)
1633 				{
1634 					result = Traits::low(result, lead);
1635 					data += 1;
1636 					size -= 1;
1637 
1638 					// process aligned single-byte (ascii) blocks
1639 					if ((reinterpret_cast<uintptr_t>(data) & 3) == 0)
1640 					{
1641 						// round-trip through void* to silence 'cast increases required alignment of target type' warnings
1642 						while (size >= 4 && (*static_cast<const uint32_t*>(static_cast<const void*>(data)) & 0x80808080) == 0)
1643 						{
1644 							result = Traits::low(result, data[0]);
1645 							result = Traits::low(result, data[1]);
1646 							result = Traits::low(result, data[2]);
1647 							result = Traits::low(result, data[3]);
1648 							data += 4;
1649 							size -= 4;
1650 						}
1651 					}
1652 				}
1653 				// 110xxxxx -> U+0080..U+07FF
1654 				else if (static_cast<unsigned int>(lead - 0xC0) < 0x20 && size >= 2 && (data[1] & 0xc0) == 0x80)
1655 				{
1656 					result = Traits::low(result, ((lead & ~0xC0) << 6) | (data[1] & utf8_byte_mask));
1657 					data += 2;
1658 					size -= 2;
1659 				}
1660 				// 1110xxxx -> U+0800-U+FFFF
1661 				else if (static_cast<unsigned int>(lead - 0xE0) < 0x10 && size >= 3 && (data[1] & 0xc0) == 0x80 && (data[2] & 0xc0) == 0x80)
1662 				{
1663 					result = Traits::low(result, ((lead & ~0xE0) << 12) | ((data[1] & utf8_byte_mask) << 6) | (data[2] & utf8_byte_mask));
1664 					data += 3;
1665 					size -= 3;
1666 				}
1667 				// 11110xxx -> U+10000..U+10FFFF
1668 				else if (static_cast<unsigned int>(lead - 0xF0) < 0x08 && size >= 4 && (data[1] & 0xc0) == 0x80 && (data[2] & 0xc0) == 0x80 && (data[3] & 0xc0) == 0x80)
1669 				{
1670 					result = Traits::high(result, ((lead & ~0xF0) << 18) | ((data[1] & utf8_byte_mask) << 12) | ((data[2] & utf8_byte_mask) << 6) | (data[3] & utf8_byte_mask));
1671 					data += 4;
1672 					size -= 4;
1673 				}
1674 				// 10xxxxxx or 11111xxx -> invalid
1675 				else
1676 				{
1677 					data += 1;
1678 					size -= 1;
1679 				}
1680 			}
1681 
1682 			return result;
1683 		}
1684 	};
1685 
1686 	template <typename opt_swap> struct utf16_decoder
1687 	{
1688 		typedef uint16_t type;
1689 
processutf16_decoder1690 		template <typename Traits> static inline typename Traits::value_type process(const uint16_t* data, size_t size, typename Traits::value_type result, Traits)
1691 		{
1692 			while (size)
1693 			{
1694 				uint16_t lead = opt_swap::value ? endian_swap(*data) : *data;
1695 
1696 				// U+0000..U+D7FF
1697 				if (lead < 0xD800)
1698 				{
1699 					result = Traits::low(result, lead);
1700 					data += 1;
1701 					size -= 1;
1702 				}
1703 				// U+E000..U+FFFF
1704 				else if (static_cast<unsigned int>(lead - 0xE000) < 0x2000)
1705 				{
1706 					result = Traits::low(result, lead);
1707 					data += 1;
1708 					size -= 1;
1709 				}
1710 				// surrogate pair lead
1711 				else if (static_cast<unsigned int>(lead - 0xD800) < 0x400 && size >= 2)
1712 				{
1713 					uint16_t next = opt_swap::value ? endian_swap(data[1]) : data[1];
1714 
1715 					if (static_cast<unsigned int>(next - 0xDC00) < 0x400)
1716 					{
1717 						result = Traits::high(result, 0x10000 + ((lead & 0x3ff) << 10) + (next & 0x3ff));
1718 						data += 2;
1719 						size -= 2;
1720 					}
1721 					else
1722 					{
1723 						data += 1;
1724 						size -= 1;
1725 					}
1726 				}
1727 				else
1728 				{
1729 					data += 1;
1730 					size -= 1;
1731 				}
1732 			}
1733 
1734 			return result;
1735 		}
1736 	};
1737 
1738 	template <typename opt_swap> struct utf32_decoder
1739 	{
1740 		typedef uint32_t type;
1741 
processutf32_decoder1742 		template <typename Traits> static inline typename Traits::value_type process(const uint32_t* data, size_t size, typename Traits::value_type result, Traits)
1743 		{
1744 			while (size)
1745 			{
1746 				uint32_t lead = opt_swap::value ? endian_swap(*data) : *data;
1747 
1748 				// U+0000..U+FFFF
1749 				if (lead < 0x10000)
1750 				{
1751 					result = Traits::low(result, lead);
1752 					data += 1;
1753 					size -= 1;
1754 				}
1755 				// U+10000..U+10FFFF
1756 				else
1757 				{
1758 					result = Traits::high(result, lead);
1759 					data += 1;
1760 					size -= 1;
1761 				}
1762 			}
1763 
1764 			return result;
1765 		}
1766 	};
1767 
1768 	struct latin1_decoder
1769 	{
1770 		typedef uint8_t type;
1771 
processlatin1_decoder1772 		template <typename Traits> static inline typename Traits::value_type process(const uint8_t* data, size_t size, typename Traits::value_type result, Traits)
1773 		{
1774 			while (size)
1775 			{
1776 				result = Traits::low(result, *data);
1777 				data += 1;
1778 				size -= 1;
1779 			}
1780 
1781 			return result;
1782 		}
1783 	};
1784 
1785 	template <size_t size> struct wchar_selector;
1786 
1787 	template <> struct wchar_selector<2>
1788 	{
1789 		typedef uint16_t type;
1790 		typedef utf16_counter counter;
1791 		typedef utf16_writer writer;
1792 		typedef utf16_decoder<opt_false> decoder;
1793 	};
1794 
1795 	template <> struct wchar_selector<4>
1796 	{
1797 		typedef uint32_t type;
1798 		typedef utf32_counter counter;
1799 		typedef utf32_writer writer;
1800 		typedef utf32_decoder<opt_false> decoder;
1801 	};
1802 
1803 	typedef wchar_selector<sizeof(wchar_t)>::counter wchar_counter;
1804 	typedef wchar_selector<sizeof(wchar_t)>::writer wchar_writer;
1805 
1806 	struct wchar_decoder
1807 	{
1808 		typedef wchar_t type;
1809 
processwchar_decoder1810 		template <typename Traits> static inline typename Traits::value_type process(const wchar_t* data, size_t size, typename Traits::value_type result, Traits traits)
1811 		{
1812 			typedef wchar_selector<sizeof(wchar_t)>::decoder decoder;
1813 
1814 			return decoder::process(reinterpret_cast<const typename decoder::type*>(data), size, result, traits);
1815 		}
1816 	};
1817 
1818 #ifdef PUGIXML_WCHAR_MODE
convert_wchar_endian_swap(wchar_t * result,const wchar_t * data,size_t length)1819 	PUGI__FN void convert_wchar_endian_swap(wchar_t* result, const wchar_t* data, size_t length)
1820 	{
1821 		for (size_t i = 0; i < length; ++i)
1822 			result[i] = static_cast<wchar_t>(endian_swap(static_cast<wchar_selector<sizeof(wchar_t)>::type>(data[i])));
1823 	}
1824 #endif
1825 PUGI__NS_END
1826 
1827 PUGI__NS_BEGIN
1828 	enum chartype_t
1829 	{
1830 		ct_parse_pcdata = 1,	// \0, &, \r, <
1831 		ct_parse_attr = 2,		// \0, &, \r, ', "
1832 		ct_parse_attr_ws = 4,	// \0, &, \r, ', ", \n, tab
1833 		ct_space = 8,			// \r, \n, space, tab
1834 		ct_parse_cdata = 16,	// \0, ], >, \r
1835 		ct_parse_comment = 32,	// \0, -, >, \r
1836 		ct_symbol = 64,			// Any symbol > 127, a-z, A-Z, 0-9, _, :, -, .
1837 		ct_start_symbol = 128	// Any symbol > 127, a-z, A-Z, _, :
1838 	};
1839 
1840 	static const unsigned char chartype_table[256] =
1841 	{
1842 		55,  0,   0,   0,   0,   0,   0,   0,      0,   12,  12,  0,   0,   63,  0,   0,   // 0-15
1843 		0,   0,   0,   0,   0,   0,   0,   0,      0,   0,   0,   0,   0,   0,   0,   0,   // 16-31
1844 		8,   0,   6,   0,   0,   0,   7,   6,      0,   0,   0,   0,   0,   96,  64,  0,   // 32-47
1845 		64,  64,  64,  64,  64,  64,  64,  64,     64,  64,  192, 0,   1,   0,   48,  0,   // 48-63
1846 		0,   192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 192, 192, 192, 192, 192, // 64-79
1847 		192, 192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 0,   0,   16,  0,   192, // 80-95
1848 		0,   192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 192, 192, 192, 192, 192, // 96-111
1849 		192, 192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 0, 0, 0, 0, 0,           // 112-127
1850 
1851 		192, 192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 192, 192, 192, 192, 192, // 128+
1852 		192, 192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 192, 192, 192, 192, 192,
1853 		192, 192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 192, 192, 192, 192, 192,
1854 		192, 192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 192, 192, 192, 192, 192,
1855 		192, 192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 192, 192, 192, 192, 192,
1856 		192, 192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 192, 192, 192, 192, 192,
1857 		192, 192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 192, 192, 192, 192, 192,
1858 		192, 192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 192, 192, 192, 192, 192
1859 	};
1860 
1861 	enum chartypex_t
1862 	{
1863 		ctx_special_pcdata = 1,   // Any symbol >= 0 and < 32 (except \t, \r, \n), &, <, >
1864 		ctx_special_attr = 2,     // Any symbol >= 0 and < 32, &, <, ", '
1865 		ctx_start_symbol = 4,	  // Any symbol > 127, a-z, A-Z, _
1866 		ctx_digit = 8,			  // 0-9
1867 		ctx_symbol = 16			  // Any symbol > 127, a-z, A-Z, 0-9, _, -, .
1868 	};
1869 
1870 	static const unsigned char chartypex_table[256] =
1871 	{
1872 		3,  3,  3,  3,  3,  3,  3,  3,     3,  2,  2,  3,  3,  2,  3,  3,     // 0-15
1873 		3,  3,  3,  3,  3,  3,  3,  3,     3,  3,  3,  3,  3,  3,  3,  3,     // 16-31
1874 		0,  0,  2,  0,  0,  0,  3,  2,     0,  0,  0,  0,  0, 16, 16,  0,     // 32-47
1875 		24, 24, 24, 24, 24, 24, 24, 24,    24, 24, 0,  0,  3,  0,  1,  0,     // 48-63
1876 
1877 		0,  20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 20, 20, 20, 20, 20,    // 64-79
1878 		20, 20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 0,  0,  0,  0,  20,    // 80-95
1879 		0,  20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 20, 20, 20, 20, 20,    // 96-111
1880 		20, 20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 0,  0,  0,  0,  0,     // 112-127
1881 
1882 		20, 20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 20, 20, 20, 20, 20,    // 128+
1883 		20, 20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 20, 20, 20, 20, 20,
1884 		20, 20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 20, 20, 20, 20, 20,
1885 		20, 20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 20, 20, 20, 20, 20,
1886 		20, 20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 20, 20, 20, 20, 20,
1887 		20, 20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 20, 20, 20, 20, 20,
1888 		20, 20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 20, 20, 20, 20, 20,
1889 		20, 20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 20, 20, 20, 20, 20
1890 	};
1891 
1892 #ifdef PUGIXML_WCHAR_MODE
1893 	#define PUGI__IS_CHARTYPE_IMPL(c, ct, table) ((static_cast<unsigned int>(c) < 128 ? table[static_cast<unsigned int>(c)] : table[128]) & (ct))
1894 #else
1895 	#define PUGI__IS_CHARTYPE_IMPL(c, ct, table) (table[static_cast<unsigned char>(c)] & (ct))
1896 #endif
1897 
1898 	#define PUGI__IS_CHARTYPE(c, ct) PUGI__IS_CHARTYPE_IMPL(c, ct, chartype_table)
1899 	#define PUGI__IS_CHARTYPEX(c, ct) PUGI__IS_CHARTYPE_IMPL(c, ct, chartypex_table)
1900 
is_little_endian()1901 	PUGI__FN bool is_little_endian()
1902 	{
1903 		unsigned int ui = 1;
1904 
1905 		return *reinterpret_cast<unsigned char*>(&ui) == 1;
1906 	}
1907 
get_wchar_encoding()1908 	PUGI__FN xml_encoding get_wchar_encoding()
1909 	{
1910 		PUGI__STATIC_ASSERT(sizeof(wchar_t) == 2 || sizeof(wchar_t) == 4);
1911 
1912 		if (sizeof(wchar_t) == 2)
1913 			return is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
1914 		else
1915 			return is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
1916 	}
1917 
parse_declaration_encoding(const uint8_t * data,size_t size,const uint8_t * & out_encoding,size_t & out_length)1918 	PUGI__FN bool parse_declaration_encoding(const uint8_t* data, size_t size, const uint8_t*& out_encoding, size_t& out_length)
1919 	{
1920 	#define PUGI__SCANCHAR(ch) { if (offset >= size || data[offset] != ch) return false; offset++; }
1921 	#define PUGI__SCANCHARTYPE(ct) { while (offset < size && PUGI__IS_CHARTYPE(data[offset], ct)) offset++; }
1922 
1923 		// check if we have a non-empty XML declaration
1924 		if (size < 6 || !((data[0] == '<') & (data[1] == '?') & (data[2] == 'x') & (data[3] == 'm') & (data[4] == 'l') && PUGI__IS_CHARTYPE(data[5], ct_space)))
1925 			return false;
1926 
1927 		// scan XML declaration until the encoding field
1928 		for (size_t i = 6; i + 1 < size; ++i)
1929 		{
1930 			// declaration can not contain ? in quoted values
1931 			if (data[i] == '?')
1932 				return false;
1933 
1934 			if (data[i] == 'e' && data[i + 1] == 'n')
1935 			{
1936 				size_t offset = i;
1937 
1938 				// encoding follows the version field which can't contain 'en' so this has to be the encoding if XML is well formed
1939 				PUGI__SCANCHAR('e'); PUGI__SCANCHAR('n'); PUGI__SCANCHAR('c'); PUGI__SCANCHAR('o');
1940 				PUGI__SCANCHAR('d'); PUGI__SCANCHAR('i'); PUGI__SCANCHAR('n'); PUGI__SCANCHAR('g');
1941 
1942 				// S? = S?
1943 				PUGI__SCANCHARTYPE(ct_space);
1944 				PUGI__SCANCHAR('=');
1945 				PUGI__SCANCHARTYPE(ct_space);
1946 
1947 				// the only two valid delimiters are ' and "
1948 				uint8_t delimiter = (offset < size && data[offset] == '"') ? '"' : '\'';
1949 
1950 				PUGI__SCANCHAR(delimiter);
1951 
1952 				size_t start = offset;
1953 
1954 				out_encoding = data + offset;
1955 
1956 				PUGI__SCANCHARTYPE(ct_symbol);
1957 
1958 				out_length = offset - start;
1959 
1960 				PUGI__SCANCHAR(delimiter);
1961 
1962 				return true;
1963 			}
1964 		}
1965 
1966 		return false;
1967 
1968 	#undef PUGI__SCANCHAR
1969 	#undef PUGI__SCANCHARTYPE
1970 	}
1971 
guess_buffer_encoding(const uint8_t * data,size_t size)1972 	PUGI__FN xml_encoding guess_buffer_encoding(const uint8_t* data, size_t size)
1973 	{
1974 		// skip encoding autodetection if input buffer is too small
1975 		if (size < 4) return encoding_utf8;
1976 
1977 		uint8_t d0 = data[0], d1 = data[1], d2 = data[2], d3 = data[3];
1978 
1979 		// look for BOM in first few bytes
1980 		if (d0 == 0 && d1 == 0 && d2 == 0xfe && d3 == 0xff) return encoding_utf32_be;
1981 		if (d0 == 0xff && d1 == 0xfe && d2 == 0 && d3 == 0) return encoding_utf32_le;
1982 		if (d0 == 0xfe && d1 == 0xff) return encoding_utf16_be;
1983 		if (d0 == 0xff && d1 == 0xfe) return encoding_utf16_le;
1984 		if (d0 == 0xef && d1 == 0xbb && d2 == 0xbf) return encoding_utf8;
1985 
1986 		// look for <, <? or <?xm in various encodings
1987 		if (d0 == 0 && d1 == 0 && d2 == 0 && d3 == 0x3c) return encoding_utf32_be;
1988 		if (d0 == 0x3c && d1 == 0 && d2 == 0 && d3 == 0) return encoding_utf32_le;
1989 		if (d0 == 0 && d1 == 0x3c && d2 == 0 && d3 == 0x3f) return encoding_utf16_be;
1990 		if (d0 == 0x3c && d1 == 0 && d2 == 0x3f && d3 == 0) return encoding_utf16_le;
1991 
1992 		// look for utf16 < followed by node name (this may fail, but is better than utf8 since it's zero terminated so early)
1993 		if (d0 == 0 && d1 == 0x3c) return encoding_utf16_be;
1994 		if (d0 == 0x3c && d1 == 0) return encoding_utf16_le;
1995 
1996 		// no known BOM detected; parse declaration
1997 		const uint8_t* enc = 0;
1998 		size_t enc_length = 0;
1999 
2000 		if (d0 == 0x3c && d1 == 0x3f && d2 == 0x78 && d3 == 0x6d && parse_declaration_encoding(data, size, enc, enc_length))
2001 		{
2002 			// iso-8859-1 (case-insensitive)
2003 			if (enc_length == 10
2004 				&& (enc[0] | ' ') == 'i' && (enc[1] | ' ') == 's' && (enc[2] | ' ') == 'o'
2005 				&& enc[3] == '-' && enc[4] == '8' && enc[5] == '8' && enc[6] == '5' && enc[7] == '9'
2006 				&& enc[8] == '-' && enc[9] == '1')
2007 				return encoding_latin1;
2008 
2009 			// latin1 (case-insensitive)
2010 			if (enc_length == 6
2011 				&& (enc[0] | ' ') == 'l' && (enc[1] | ' ') == 'a' && (enc[2] | ' ') == 't'
2012 				&& (enc[3] | ' ') == 'i' && (enc[4] | ' ') == 'n'
2013 				&& enc[5] == '1')
2014 				return encoding_latin1;
2015 		}
2016 
2017 		return encoding_utf8;
2018 	}
2019 
get_buffer_encoding(xml_encoding encoding,const void * contents,size_t size)2020 	PUGI__FN xml_encoding get_buffer_encoding(xml_encoding encoding, const void* contents, size_t size)
2021 	{
2022 		// replace wchar encoding with utf implementation
2023 		if (encoding == encoding_wchar) return get_wchar_encoding();
2024 
2025 		// replace utf16 encoding with utf16 with specific endianness
2026 		if (encoding == encoding_utf16) return is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
2027 
2028 		// replace utf32 encoding with utf32 with specific endianness
2029 		if (encoding == encoding_utf32) return is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
2030 
2031 		// only do autodetection if no explicit encoding is requested
2032 		if (encoding != encoding_auto) return encoding;
2033 
2034 		// try to guess encoding (based on XML specification, Appendix F.1)
2035 		const uint8_t* data = static_cast<const uint8_t*>(contents);
2036 
2037 		return guess_buffer_encoding(data, size);
2038 	}
2039 
get_mutable_buffer(char_t * & out_buffer,size_t & out_length,const void * contents,size_t size,bool is_mutable)2040 	PUGI__FN bool get_mutable_buffer(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable)
2041 	{
2042 		size_t length = size / sizeof(char_t);
2043 
2044 		if (is_mutable)
2045 		{
2046 			out_buffer = static_cast<char_t*>(const_cast<void*>(contents));
2047 			out_length = length;
2048 		}
2049 		else
2050 		{
2051 			char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
2052 			if (!buffer) return false;
2053 
2054 			if (contents)
2055 				memcpy(buffer, contents, length * sizeof(char_t));
2056 			else
2057 				assert(length == 0);
2058 
2059 			buffer[length] = 0;
2060 
2061 			out_buffer = buffer;
2062 			out_length = length + 1;
2063 		}
2064 
2065 		return true;
2066 	}
2067 
2068 #ifdef PUGIXML_WCHAR_MODE
need_endian_swap_utf(xml_encoding le,xml_encoding re)2069 	PUGI__FN bool need_endian_swap_utf(xml_encoding le, xml_encoding re)
2070 	{
2071 		return (le == encoding_utf16_be && re == encoding_utf16_le) || (le == encoding_utf16_le && re == encoding_utf16_be) ||
2072 			   (le == encoding_utf32_be && re == encoding_utf32_le) || (le == encoding_utf32_le && re == encoding_utf32_be);
2073 	}
2074 
convert_buffer_endian_swap(char_t * & out_buffer,size_t & out_length,const void * contents,size_t size,bool is_mutable)2075 	PUGI__FN bool convert_buffer_endian_swap(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable)
2076 	{
2077 		const char_t* data = static_cast<const char_t*>(contents);
2078 		size_t length = size / sizeof(char_t);
2079 
2080 		if (is_mutable)
2081 		{
2082 			char_t* buffer = const_cast<char_t*>(data);
2083 
2084 			convert_wchar_endian_swap(buffer, data, length);
2085 
2086 			out_buffer = buffer;
2087 			out_length = length;
2088 		}
2089 		else
2090 		{
2091 			char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
2092 			if (!buffer) return false;
2093 
2094 			convert_wchar_endian_swap(buffer, data, length);
2095 			buffer[length] = 0;
2096 
2097 			out_buffer = buffer;
2098 			out_length = length + 1;
2099 		}
2100 
2101 		return true;
2102 	}
2103 
convert_buffer_generic(char_t * & out_buffer,size_t & out_length,const void * contents,size_t size,D)2104 	template <typename D> PUGI__FN bool convert_buffer_generic(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, D)
2105 	{
2106 		const typename D::type* data = static_cast<const typename D::type*>(contents);
2107 		size_t data_length = size / sizeof(typename D::type);
2108 
2109 		// first pass: get length in wchar_t units
2110 		size_t length = D::process(data, data_length, 0, wchar_counter());
2111 
2112 		// allocate buffer of suitable length
2113 		char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
2114 		if (!buffer) return false;
2115 
2116 		// second pass: convert utf16 input to wchar_t
2117 		wchar_writer::value_type obegin = reinterpret_cast<wchar_writer::value_type>(buffer);
2118 		wchar_writer::value_type oend = D::process(data, data_length, obegin, wchar_writer());
2119 
2120 		assert(oend == obegin + length);
2121 		*oend = 0;
2122 
2123 		out_buffer = buffer;
2124 		out_length = length + 1;
2125 
2126 		return true;
2127 	}
2128 
convert_buffer(char_t * & out_buffer,size_t & out_length,xml_encoding encoding,const void * contents,size_t size,bool is_mutable)2129 	PUGI__FN bool convert_buffer(char_t*& out_buffer, size_t& out_length, xml_encoding encoding, const void* contents, size_t size, bool is_mutable)
2130 	{
2131 		// get native encoding
2132 		xml_encoding wchar_encoding = get_wchar_encoding();
2133 
2134 		// fast path: no conversion required
2135 		if (encoding == wchar_encoding)
2136 			return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable);
2137 
2138 		// only endian-swapping is required
2139 		if (need_endian_swap_utf(encoding, wchar_encoding))
2140 			return convert_buffer_endian_swap(out_buffer, out_length, contents, size, is_mutable);
2141 
2142 		// source encoding is utf8
2143 		if (encoding == encoding_utf8)
2144 			return convert_buffer_generic(out_buffer, out_length, contents, size, utf8_decoder());
2145 
2146 		// source encoding is utf16
2147 		if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
2148 		{
2149 			xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
2150 
2151 			return (native_encoding == encoding) ?
2152 				convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder<opt_false>()) :
2153 				convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder<opt_true>());
2154 		}
2155 
2156 		// source encoding is utf32
2157 		if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)
2158 		{
2159 			xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
2160 
2161 			return (native_encoding == encoding) ?
2162 				convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder<opt_false>()) :
2163 				convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder<opt_true>());
2164 		}
2165 
2166 		// source encoding is latin1
2167 		if (encoding == encoding_latin1)
2168 			return convert_buffer_generic(out_buffer, out_length, contents, size, latin1_decoder());
2169 
2170 		assert(false && "Invalid encoding"); // unreachable
2171 		return false;
2172 	}
2173 #else
convert_buffer_generic(char_t * & out_buffer,size_t & out_length,const void * contents,size_t size,D)2174 	template <typename D> PUGI__FN bool convert_buffer_generic(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, D)
2175 	{
2176 		const typename D::type* data = static_cast<const typename D::type*>(contents);
2177 		size_t data_length = size / sizeof(typename D::type);
2178 
2179 		// first pass: get length in utf8 units
2180 		size_t length = D::process(data, data_length, 0, utf8_counter());
2181 
2182 		// allocate buffer of suitable length
2183 		char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
2184 		if (!buffer) return false;
2185 
2186 		// second pass: convert utf16 input to utf8
2187 		uint8_t* obegin = reinterpret_cast<uint8_t*>(buffer);
2188 		uint8_t* oend = D::process(data, data_length, obegin, utf8_writer());
2189 
2190 		assert(oend == obegin + length);
2191 		*oend = 0;
2192 
2193 		out_buffer = buffer;
2194 		out_length = length + 1;
2195 
2196 		return true;
2197 	}
2198 
get_latin1_7bit_prefix_length(const uint8_t * data,size_t size)2199 	PUGI__FN size_t get_latin1_7bit_prefix_length(const uint8_t* data, size_t size)
2200 	{
2201 		for (size_t i = 0; i < size; ++i)
2202 			if (data[i] > 127)
2203 				return i;
2204 
2205 		return size;
2206 	}
2207 
convert_buffer_latin1(char_t * & out_buffer,size_t & out_length,const void * contents,size_t size,bool is_mutable)2208 	PUGI__FN bool convert_buffer_latin1(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable)
2209 	{
2210 		const uint8_t* data = static_cast<const uint8_t*>(contents);
2211 		size_t data_length = size;
2212 
2213 		// get size of prefix that does not need utf8 conversion
2214 		size_t prefix_length = get_latin1_7bit_prefix_length(data, data_length);
2215 		assert(prefix_length <= data_length);
2216 
2217 		const uint8_t* postfix = data + prefix_length;
2218 		size_t postfix_length = data_length - prefix_length;
2219 
2220 		// if no conversion is needed, just return the original buffer
2221 		if (postfix_length == 0) return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable);
2222 
2223 		// first pass: get length in utf8 units
2224 		size_t length = prefix_length + latin1_decoder::process(postfix, postfix_length, 0, utf8_counter());
2225 
2226 		// allocate buffer of suitable length
2227 		char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
2228 		if (!buffer) return false;
2229 
2230 		// second pass: convert latin1 input to utf8
2231 		memcpy(buffer, data, prefix_length);
2232 
2233 		uint8_t* obegin = reinterpret_cast<uint8_t*>(buffer);
2234 		uint8_t* oend = latin1_decoder::process(postfix, postfix_length, obegin + prefix_length, utf8_writer());
2235 
2236 		assert(oend == obegin + length);
2237 		*oend = 0;
2238 
2239 		out_buffer = buffer;
2240 		out_length = length + 1;
2241 
2242 		return true;
2243 	}
2244 
convert_buffer(char_t * & out_buffer,size_t & out_length,xml_encoding encoding,const void * contents,size_t size,bool is_mutable)2245 	PUGI__FN bool convert_buffer(char_t*& out_buffer, size_t& out_length, xml_encoding encoding, const void* contents, size_t size, bool is_mutable)
2246 	{
2247 		// fast path: no conversion required
2248 		if (encoding == encoding_utf8)
2249 			return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable);
2250 
2251 		// source encoding is utf16
2252 		if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
2253 		{
2254 			xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
2255 
2256 			return (native_encoding == encoding) ?
2257 				convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder<opt_false>()) :
2258 				convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder<opt_true>());
2259 		}
2260 
2261 		// source encoding is utf32
2262 		if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)
2263 		{
2264 			xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
2265 
2266 			return (native_encoding == encoding) ?
2267 				convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder<opt_false>()) :
2268 				convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder<opt_true>());
2269 		}
2270 
2271 		// source encoding is latin1
2272 		if (encoding == encoding_latin1)
2273 			return convert_buffer_latin1(out_buffer, out_length, contents, size, is_mutable);
2274 
2275 		assert(false && "Invalid encoding"); // unreachable
2276 		return false;
2277 	}
2278 #endif
2279 
as_utf8_begin(const wchar_t * str,size_t length)2280 	PUGI__FN size_t as_utf8_begin(const wchar_t* str, size_t length)
2281 	{
2282 		// get length in utf8 characters
2283 		return wchar_decoder::process(str, length, 0, utf8_counter());
2284 	}
2285 
as_utf8_end(char * buffer,size_t size,const wchar_t * str,size_t length)2286 	PUGI__FN void as_utf8_end(char* buffer, size_t size, const wchar_t* str, size_t length)
2287 	{
2288 		// convert to utf8
2289 		uint8_t* begin = reinterpret_cast<uint8_t*>(buffer);
2290 		uint8_t* end = wchar_decoder::process(str, length, begin, utf8_writer());
2291 
2292 		assert(begin + size == end);
2293 		(void)!end;
2294 		(void)!size;
2295 	}
2296 
2297 #ifndef PUGIXML_NO_STL
as_utf8_impl(const wchar_t * str,size_t length)2298 	PUGI__FN std::string as_utf8_impl(const wchar_t* str, size_t length)
2299 	{
2300 		// first pass: get length in utf8 characters
2301 		size_t size = as_utf8_begin(str, length);
2302 
2303 		// allocate resulting string
2304 		std::string result;
2305 		result.resize(size);
2306 
2307 		// second pass: convert to utf8
2308 		if (size > 0) as_utf8_end(&result[0], size, str, length);
2309 
2310 		return result;
2311 	}
2312 
as_wide_impl(const char * str,size_t size)2313 	PUGI__FN std::basic_string<wchar_t> as_wide_impl(const char* str, size_t size)
2314 	{
2315 		const uint8_t* data = reinterpret_cast<const uint8_t*>(str);
2316 
2317 		// first pass: get length in wchar_t units
2318 		size_t length = utf8_decoder::process(data, size, 0, wchar_counter());
2319 
2320 		// allocate resulting string
2321 		std::basic_string<wchar_t> result;
2322 		result.resize(length);
2323 
2324 		// second pass: convert to wchar_t
2325 		if (length > 0)
2326 		{
2327 			wchar_writer::value_type begin = reinterpret_cast<wchar_writer::value_type>(&result[0]);
2328 			wchar_writer::value_type end = utf8_decoder::process(data, size, begin, wchar_writer());
2329 
2330 			assert(begin + length == end);
2331 			(void)!end;
2332 		}
2333 
2334 		return result;
2335 	}
2336 #endif
2337 
2338 	template <typename Header>
strcpy_insitu_allow(size_t length,const Header & header,uintptr_t header_mask,char_t * target)2339 	inline bool strcpy_insitu_allow(size_t length, const Header& header, uintptr_t header_mask, char_t* target)
2340 	{
2341 		// never reuse shared memory
2342 		if (header & xml_memory_page_contents_shared_mask) return false;
2343 
2344 		size_t target_length = strlength(target);
2345 
2346 		// always reuse document buffer memory if possible
2347 		if ((header & header_mask) == 0) return target_length >= length;
2348 
2349 		// reuse heap memory if waste is not too great
2350 		const size_t reuse_threshold = 32;
2351 
2352 		return target_length >= length && (target_length < reuse_threshold || target_length - length < target_length / 2);
2353 	}
2354 
2355 	template <typename String, typename Header>
strcpy_insitu(String & dest,Header & header,uintptr_t header_mask,const char_t * source,size_t source_length)2356 	PUGI__FN bool strcpy_insitu(String& dest, Header& header, uintptr_t header_mask, const char_t* source, size_t source_length)
2357 	{
2358 		if (source_length == 0)
2359 		{
2360 			// empty string and null pointer are equivalent, so just deallocate old memory
2361 			xml_allocator* alloc = PUGI__GETPAGE_IMPL(header)->allocator;
2362 
2363 			if (header & header_mask) alloc->deallocate_string(dest);
2364 
2365 			// mark the string as not allocated
2366 			dest = 0;
2367 			header &= ~header_mask;
2368 
2369 			return true;
2370 		}
2371 		else if (dest && strcpy_insitu_allow(source_length, header, header_mask, dest))
2372 		{
2373 			// we can reuse old buffer, so just copy the new data (including zero terminator)
2374 			memcpy(dest, source, source_length * sizeof(char_t));
2375 			dest[source_length] = 0;
2376 
2377 			return true;
2378 		}
2379 		else
2380 		{
2381 			xml_allocator* alloc = PUGI__GETPAGE_IMPL(header)->allocator;
2382 
2383 			if (!alloc->reserve()) return false;
2384 
2385 			// allocate new buffer
2386 			char_t* buf = alloc->allocate_string(source_length + 1);
2387 			if (!buf) return false;
2388 
2389 			// copy the string (including zero terminator)
2390 			memcpy(buf, source, source_length * sizeof(char_t));
2391 			buf[source_length] = 0;
2392 
2393 			// deallocate old buffer (*after* the above to protect against overlapping memory and/or allocation failures)
2394 			if (header & header_mask) alloc->deallocate_string(dest);
2395 
2396 			// the string is now allocated, so set the flag
2397 			dest = buf;
2398 			header |= header_mask;
2399 
2400 			return true;
2401 		}
2402 	}
2403 
2404 	struct gap
2405 	{
2406 		char_t* end;
2407 		size_t size;
2408 
gapgap2409 		gap(): end(0), size(0)
2410 		{
2411 		}
2412 
2413 		// Push new gap, move s count bytes further (skipping the gap).
2414 		// Collapse previous gap.
pushgap2415 		void push(char_t*& s, size_t count)
2416 		{
2417 			if (end) // there was a gap already; collapse it
2418 			{
2419 				// Move [old_gap_end, new_gap_start) to [old_gap_start, ...)
2420 				assert(s >= end);
2421 				memmove(end - size, end, reinterpret_cast<char*>(s) - reinterpret_cast<char*>(end));
2422 			}
2423 
2424 			s += count; // end of current gap
2425 
2426 			// "merge" two gaps
2427 			end = s;
2428 			size += count;
2429 		}
2430 
2431 		// Collapse all gaps, return past-the-end pointer
flushgap2432 		char_t* flush(char_t* s)
2433 		{
2434 			if (end)
2435 			{
2436 				// Move [old_gap_end, current_pos) to [old_gap_start, ...)
2437 				assert(s >= end);
2438 				memmove(end - size, end, reinterpret_cast<char*>(s) - reinterpret_cast<char*>(end));
2439 
2440 				return s - size;
2441 			}
2442 			else return s;
2443 		}
2444 	};
2445 
strconv_escape(char_t * s,gap & g)2446 	PUGI__FN char_t* strconv_escape(char_t* s, gap& g)
2447 	{
2448 		char_t* stre = s + 1;
2449 
2450 		switch (*stre)
2451 		{
2452 			case '#':	// &#...
2453 			{
2454 				unsigned int ucsc = 0;
2455 
2456 				if (stre[1] == 'x') // &#x... (hex code)
2457 				{
2458 					stre += 2;
2459 
2460 					char_t ch = *stre;
2461 
2462 					if (ch == ';') return stre;
2463 
2464 					for (;;)
2465 					{
2466 						if (static_cast<unsigned int>(ch - '0') <= 9)
2467 							ucsc = 16 * ucsc + (ch - '0');
2468 						else if (static_cast<unsigned int>((ch | ' ') - 'a') <= 5)
2469 							ucsc = 16 * ucsc + ((ch | ' ') - 'a' + 10);
2470 						else if (ch == ';')
2471 							break;
2472 						else // cancel
2473 							return stre;
2474 
2475 						ch = *++stre;
2476 					}
2477 
2478 					++stre;
2479 				}
2480 				else	// &#... (dec code)
2481 				{
2482 					char_t ch = *++stre;
2483 
2484 					if (ch == ';') return stre;
2485 
2486 					for (;;)
2487 					{
2488 						if (static_cast<unsigned int>(ch - '0') <= 9)
2489 							ucsc = 10 * ucsc + (ch - '0');
2490 						else if (ch == ';')
2491 							break;
2492 						else // cancel
2493 							return stre;
2494 
2495 						ch = *++stre;
2496 					}
2497 
2498 					++stre;
2499 				}
2500 
2501 			#ifdef PUGIXML_WCHAR_MODE
2502 				s = reinterpret_cast<char_t*>(wchar_writer::any(reinterpret_cast<wchar_writer::value_type>(s), ucsc));
2503 			#else
2504 				s = reinterpret_cast<char_t*>(utf8_writer::any(reinterpret_cast<uint8_t*>(s), ucsc));
2505 			#endif
2506 
2507 				g.push(s, stre - s);
2508 				return stre;
2509 			}
2510 
2511 			case 'a':	// &a
2512 			{
2513 				++stre;
2514 
2515 				if (*stre == 'm') // &am
2516 				{
2517 					if (*++stre == 'p' && *++stre == ';') // &amp;
2518 					{
2519 						*s++ = '&';
2520 						++stre;
2521 
2522 						g.push(s, stre - s);
2523 						return stre;
2524 					}
2525 				}
2526 				else if (*stre == 'p') // &ap
2527 				{
2528 					if (*++stre == 'o' && *++stre == 's' && *++stre == ';') // &apos;
2529 					{
2530 						*s++ = '\'';
2531 						++stre;
2532 
2533 						g.push(s, stre - s);
2534 						return stre;
2535 					}
2536 				}
2537 				break;
2538 			}
2539 
2540 			case 'g': // &g
2541 			{
2542 				if (*++stre == 't' && *++stre == ';') // &gt;
2543 				{
2544 					*s++ = '>';
2545 					++stre;
2546 
2547 					g.push(s, stre - s);
2548 					return stre;
2549 				}
2550 				break;
2551 			}
2552 
2553 			case 'l': // &l
2554 			{
2555 				if (*++stre == 't' && *++stre == ';') // &lt;
2556 				{
2557 					*s++ = '<';
2558 					++stre;
2559 
2560 					g.push(s, stre - s);
2561 					return stre;
2562 				}
2563 				break;
2564 			}
2565 
2566 			case 'q': // &q
2567 			{
2568 				if (*++stre == 'u' && *++stre == 'o' && *++stre == 't' && *++stre == ';') // &quot;
2569 				{
2570 					*s++ = '"';
2571 					++stre;
2572 
2573 					g.push(s, stre - s);
2574 					return stre;
2575 				}
2576 				break;
2577 			}
2578 
2579 			default:
2580 				break;
2581 		}
2582 
2583 		return stre;
2584 	}
2585 
2586 	// Parser utilities
2587 	#define PUGI__ENDSWITH(c, e)        ((c) == (e) || ((c) == 0 && endch == (e)))
2588 	#define PUGI__SKIPWS()              { while (PUGI__IS_CHARTYPE(*s, ct_space)) ++s; }
2589 	#define PUGI__OPTSET(OPT)           ( optmsk & (OPT) )
2590 	#define PUGI__PUSHNODE(TYPE)        { cursor = append_new_node(cursor, *alloc, TYPE); if (!cursor) PUGI__THROW_ERROR(status_out_of_memory, s); }
2591 	#define PUGI__POPNODE()             { cursor = cursor->parent; }
2592 	#define PUGI__SCANFOR(X)            { while (*s != 0 && !(X)) ++s; }
2593 	#define PUGI__SCANWHILE(X)          { while (X) ++s; }
2594 	#define PUGI__SCANWHILE_UNROLL(X)   { for (;;) { char_t ss = s[0]; if (PUGI__UNLIKELY(!(X))) { break; } ss = s[1]; if (PUGI__UNLIKELY(!(X))) { s += 1; break; } ss = s[2]; if (PUGI__UNLIKELY(!(X))) { s += 2; break; } ss = s[3]; if (PUGI__UNLIKELY(!(X))) { s += 3; break; } s += 4; } }
2595 	#define PUGI__ENDSEG()              { ch = *s; *s = 0; ++s; }
2596 	#define PUGI__THROW_ERROR(err, m)   return error_offset = m, error_status = err, static_cast<char_t*>(0)
2597 	#define PUGI__CHECK_ERROR(err, m)   { if (*s == 0) PUGI__THROW_ERROR(err, m); }
2598 
strconv_comment(char_t * s,char_t endch)2599 	PUGI__FN char_t* strconv_comment(char_t* s, char_t endch)
2600 	{
2601 		gap g;
2602 
2603 		while (true)
2604 		{
2605 			PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_comment));
2606 
2607 			if (*s == '\r') // Either a single 0x0d or 0x0d 0x0a pair
2608 			{
2609 				*s++ = '\n'; // replace first one with 0x0a
2610 
2611 				if (*s == '\n') g.push(s, 1);
2612 			}
2613 			else if (s[0] == '-' && s[1] == '-' && PUGI__ENDSWITH(s[2], '>')) // comment ends here
2614 			{
2615 				*g.flush(s) = 0;
2616 
2617 				return s + (s[2] == '>' ? 3 : 2);
2618 			}
2619 			else if (*s == 0)
2620 			{
2621 				return 0;
2622 			}
2623 			else ++s;
2624 		}
2625 	}
2626 
strconv_cdata(char_t * s,char_t endch)2627 	PUGI__FN char_t* strconv_cdata(char_t* s, char_t endch)
2628 	{
2629 		gap g;
2630 
2631 		while (true)
2632 		{
2633 			PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_cdata));
2634 
2635 			if (*s == '\r') // Either a single 0x0d or 0x0d 0x0a pair
2636 			{
2637 				*s++ = '\n'; // replace first one with 0x0a
2638 
2639 				if (*s == '\n') g.push(s, 1);
2640 			}
2641 			else if (s[0] == ']' && s[1] == ']' && PUGI__ENDSWITH(s[2], '>')) // CDATA ends here
2642 			{
2643 				*g.flush(s) = 0;
2644 
2645 				return s + 1;
2646 			}
2647 			else if (*s == 0)
2648 			{
2649 				return 0;
2650 			}
2651 			else ++s;
2652 		}
2653 	}
2654 
2655 	typedef char_t* (*strconv_pcdata_t)(char_t*);
2656 
2657 	template <typename opt_trim, typename opt_eol, typename opt_escape> struct strconv_pcdata_impl
2658 	{
parsestrconv_pcdata_impl2659 		static char_t* parse(char_t* s)
2660 		{
2661 			gap g;
2662 
2663 			char_t* begin = s;
2664 
2665 			while (true)
2666 			{
2667 				PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_pcdata));
2668 
2669 				if (*s == '<') // PCDATA ends here
2670 				{
2671 					char_t* end = g.flush(s);
2672 
2673 					if (opt_trim::value)
2674 						while (end > begin && PUGI__IS_CHARTYPE(end[-1], ct_space))
2675 							--end;
2676 
2677 					*end = 0;
2678 
2679 					return s + 1;
2680 				}
2681 				else if (opt_eol::value && *s == '\r') // Either a single 0x0d or 0x0d 0x0a pair
2682 				{
2683 					*s++ = '\n'; // replace first one with 0x0a
2684 
2685 					if (*s == '\n') g.push(s, 1);
2686 				}
2687 				else if (opt_escape::value && *s == '&')
2688 				{
2689 					s = strconv_escape(s, g);
2690 				}
2691 				else if (*s == 0)
2692 				{
2693 					char_t* end = g.flush(s);
2694 
2695 					if (opt_trim::value)
2696 						while (end > begin && PUGI__IS_CHARTYPE(end[-1], ct_space))
2697 							--end;
2698 
2699 					*end = 0;
2700 
2701 					return s;
2702 				}
2703 				else ++s;
2704 			}
2705 		}
2706 	};
2707 
get_strconv_pcdata(unsigned int optmask)2708 	PUGI__FN strconv_pcdata_t get_strconv_pcdata(unsigned int optmask)
2709 	{
2710 		PUGI__STATIC_ASSERT(parse_escapes == 0x10 && parse_eol == 0x20 && parse_trim_pcdata == 0x0800);
2711 
2712 		switch (((optmask >> 4) & 3) | ((optmask >> 9) & 4)) // get bitmask for flags (trim eol escapes); this simultaneously checks 3 options from assertion above
2713 		{
2714 		case 0: return strconv_pcdata_impl<opt_false, opt_false, opt_false>::parse;
2715 		case 1: return strconv_pcdata_impl<opt_false, opt_false, opt_true>::parse;
2716 		case 2: return strconv_pcdata_impl<opt_false, opt_true, opt_false>::parse;
2717 		case 3: return strconv_pcdata_impl<opt_false, opt_true, opt_true>::parse;
2718 		case 4: return strconv_pcdata_impl<opt_true, opt_false, opt_false>::parse;
2719 		case 5: return strconv_pcdata_impl<opt_true, opt_false, opt_true>::parse;
2720 		case 6: return strconv_pcdata_impl<opt_true, opt_true, opt_false>::parse;
2721 		case 7: return strconv_pcdata_impl<opt_true, opt_true, opt_true>::parse;
2722 		default: assert(false); return 0; // unreachable
2723 		}
2724 	}
2725 
2726 	typedef char_t* (*strconv_attribute_t)(char_t*, char_t);
2727 
2728 	template <typename opt_escape> struct strconv_attribute_impl
2729 	{
parse_wnormstrconv_attribute_impl2730 		static char_t* parse_wnorm(char_t* s, char_t end_quote)
2731 		{
2732 			gap g;
2733 
2734 			// trim leading whitespaces
2735 			if (PUGI__IS_CHARTYPE(*s, ct_space))
2736 			{
2737 				char_t* str = s;
2738 
2739 				do ++str;
2740 				while (PUGI__IS_CHARTYPE(*str, ct_space));
2741 
2742 				g.push(s, str - s);
2743 			}
2744 
2745 			while (true)
2746 			{
2747 				PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr_ws | ct_space));
2748 
2749 				if (*s == end_quote)
2750 				{
2751 					char_t* str = g.flush(s);
2752 
2753 					do *str-- = 0;
2754 					while (PUGI__IS_CHARTYPE(*str, ct_space));
2755 
2756 					return s + 1;
2757 				}
2758 				else if (PUGI__IS_CHARTYPE(*s, ct_space))
2759 				{
2760 					*s++ = ' ';
2761 
2762 					if (PUGI__IS_CHARTYPE(*s, ct_space))
2763 					{
2764 						char_t* str = s + 1;
2765 						while (PUGI__IS_CHARTYPE(*str, ct_space)) ++str;
2766 
2767 						g.push(s, str - s);
2768 					}
2769 				}
2770 				else if (opt_escape::value && *s == '&')
2771 				{
2772 					s = strconv_escape(s, g);
2773 				}
2774 				else if (!*s)
2775 				{
2776 					return 0;
2777 				}
2778 				else ++s;
2779 			}
2780 		}
2781 
parse_wconvstrconv_attribute_impl2782 		static char_t* parse_wconv(char_t* s, char_t end_quote)
2783 		{
2784 			gap g;
2785 
2786 			while (true)
2787 			{
2788 				PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr_ws));
2789 
2790 				if (*s == end_quote)
2791 				{
2792 					*g.flush(s) = 0;
2793 
2794 					return s + 1;
2795 				}
2796 				else if (PUGI__IS_CHARTYPE(*s, ct_space))
2797 				{
2798 					if (*s == '\r')
2799 					{
2800 						*s++ = ' ';
2801 
2802 						if (*s == '\n') g.push(s, 1);
2803 					}
2804 					else *s++ = ' ';
2805 				}
2806 				else if (opt_escape::value && *s == '&')
2807 				{
2808 					s = strconv_escape(s, g);
2809 				}
2810 				else if (!*s)
2811 				{
2812 					return 0;
2813 				}
2814 				else ++s;
2815 			}
2816 		}
2817 
parse_eolstrconv_attribute_impl2818 		static char_t* parse_eol(char_t* s, char_t end_quote)
2819 		{
2820 			gap g;
2821 
2822 			while (true)
2823 			{
2824 				PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr));
2825 
2826 				if (*s == end_quote)
2827 				{
2828 					*g.flush(s) = 0;
2829 
2830 					return s + 1;
2831 				}
2832 				else if (*s == '\r')
2833 				{
2834 					*s++ = '\n';
2835 
2836 					if (*s == '\n') g.push(s, 1);
2837 				}
2838 				else if (opt_escape::value && *s == '&')
2839 				{
2840 					s = strconv_escape(s, g);
2841 				}
2842 				else if (!*s)
2843 				{
2844 					return 0;
2845 				}
2846 				else ++s;
2847 			}
2848 		}
2849 
parse_simplestrconv_attribute_impl2850 		static char_t* parse_simple(char_t* s, char_t end_quote)
2851 		{
2852 			gap g;
2853 
2854 			while (true)
2855 			{
2856 				PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr));
2857 
2858 				if (*s == end_quote)
2859 				{
2860 					*g.flush(s) = 0;
2861 
2862 					return s + 1;
2863 				}
2864 				else if (opt_escape::value && *s == '&')
2865 				{
2866 					s = strconv_escape(s, g);
2867 				}
2868 				else if (!*s)
2869 				{
2870 					return 0;
2871 				}
2872 				else ++s;
2873 			}
2874 		}
2875 	};
2876 
get_strconv_attribute(unsigned int optmask)2877 	PUGI__FN strconv_attribute_t get_strconv_attribute(unsigned int optmask)
2878 	{
2879 		PUGI__STATIC_ASSERT(parse_escapes == 0x10 && parse_eol == 0x20 && parse_wconv_attribute == 0x40 && parse_wnorm_attribute == 0x80);
2880 
2881 		switch ((optmask >> 4) & 15) // get bitmask for flags (wnorm wconv eol escapes); this simultaneously checks 4 options from assertion above
2882 		{
2883 		case 0:  return strconv_attribute_impl<opt_false>::parse_simple;
2884 		case 1:  return strconv_attribute_impl<opt_true>::parse_simple;
2885 		case 2:  return strconv_attribute_impl<opt_false>::parse_eol;
2886 		case 3:  return strconv_attribute_impl<opt_true>::parse_eol;
2887 		case 4:  return strconv_attribute_impl<opt_false>::parse_wconv;
2888 		case 5:  return strconv_attribute_impl<opt_true>::parse_wconv;
2889 		case 6:  return strconv_attribute_impl<opt_false>::parse_wconv;
2890 		case 7:  return strconv_attribute_impl<opt_true>::parse_wconv;
2891 		case 8:  return strconv_attribute_impl<opt_false>::parse_wnorm;
2892 		case 9:  return strconv_attribute_impl<opt_true>::parse_wnorm;
2893 		case 10: return strconv_attribute_impl<opt_false>::parse_wnorm;
2894 		case 11: return strconv_attribute_impl<opt_true>::parse_wnorm;
2895 		case 12: return strconv_attribute_impl<opt_false>::parse_wnorm;
2896 		case 13: return strconv_attribute_impl<opt_true>::parse_wnorm;
2897 		case 14: return strconv_attribute_impl<opt_false>::parse_wnorm;
2898 		case 15: return strconv_attribute_impl<opt_true>::parse_wnorm;
2899 		default: assert(false); return 0; // unreachable
2900 		}
2901 	}
2902 
make_parse_result(xml_parse_status status,ptrdiff_t offset=0)2903 	inline xml_parse_result make_parse_result(xml_parse_status status, ptrdiff_t offset = 0)
2904 	{
2905 		xml_parse_result result;
2906 		result.status = status;
2907 		result.offset = offset;
2908 
2909 		return result;
2910 	}
2911 
2912 	struct xml_parser
2913 	{
2914 		xml_allocator* alloc;
2915 		char_t* error_offset;
2916 		xml_parse_status error_status;
2917 
xml_parserxml_parser2918 		xml_parser(xml_allocator* alloc_): alloc(alloc_), error_offset(0), error_status(status_ok)
2919 		{
2920 		}
2921 
2922 		// DOCTYPE consists of nested sections of the following possible types:
2923 		// <!-- ... -->, <? ... ?>, "...", '...'
2924 		// <![...]]>
2925 		// <!...>
2926 		// First group can not contain nested groups
2927 		// Second group can contain nested groups of the same type
2928 		// Third group can contain all other groups
parse_doctype_primitivexml_parser2929 		char_t* parse_doctype_primitive(char_t* s)
2930 		{
2931 			if (*s == '"' || *s == '\'')
2932 			{
2933 				// quoted string
2934 				char_t ch = *s++;
2935 				PUGI__SCANFOR(*s == ch);
2936 				if (!*s) PUGI__THROW_ERROR(status_bad_doctype, s);
2937 
2938 				s++;
2939 			}
2940 			else if (s[0] == '<' && s[1] == '?')
2941 			{
2942 				// <? ... ?>
2943 				s += 2;
2944 				PUGI__SCANFOR(s[0] == '?' && s[1] == '>'); // no need for ENDSWITH because ?> can't terminate proper doctype
2945 				if (!*s) PUGI__THROW_ERROR(status_bad_doctype, s);
2946 
2947 				s += 2;
2948 			}
2949 			else if (s[0] == '<' && s[1] == '!' && s[2] == '-' && s[3] == '-')
2950 			{
2951 				s += 4;
2952 				PUGI__SCANFOR(s[0] == '-' && s[1] == '-' && s[2] == '>'); // no need for ENDSWITH because --> can't terminate proper doctype
2953 				if (!*s) PUGI__THROW_ERROR(status_bad_doctype, s);
2954 
2955 				s += 3;
2956 			}
2957 			else PUGI__THROW_ERROR(status_bad_doctype, s);
2958 
2959 			return s;
2960 		}
2961 
parse_doctype_ignorexml_parser2962 		char_t* parse_doctype_ignore(char_t* s)
2963 		{
2964 			size_t depth = 0;
2965 
2966 			assert(s[0] == '<' && s[1] == '!' && s[2] == '[');
2967 			s += 3;
2968 
2969 			while (*s)
2970 			{
2971 				if (s[0] == '<' && s[1] == '!' && s[2] == '[')
2972 				{
2973 					// nested ignore section
2974 					s += 3;
2975 					depth++;
2976 				}
2977 				else if (s[0] == ']' && s[1] == ']' && s[2] == '>')
2978 				{
2979 					// ignore section end
2980 					s += 3;
2981 
2982 					if (depth == 0)
2983 						return s;
2984 
2985 					depth--;
2986 				}
2987 				else s++;
2988 			}
2989 
2990 			PUGI__THROW_ERROR(status_bad_doctype, s);
2991 		}
2992 
parse_doctype_groupxml_parser2993 		char_t* parse_doctype_group(char_t* s, char_t endch)
2994 		{
2995 			size_t depth = 0;
2996 
2997 			assert((s[0] == '<' || s[0] == 0) && s[1] == '!');
2998 			s += 2;
2999 
3000 			while (*s)
3001 			{
3002 				if (s[0] == '<' && s[1] == '!' && s[2] != '-')
3003 				{
3004 					if (s[2] == '[')
3005 					{
3006 						// ignore
3007 						s = parse_doctype_ignore(s);
3008 						if (!s) return s;
3009 					}
3010 					else
3011 					{
3012 						// some control group
3013 						s += 2;
3014 						depth++;
3015 					}
3016 				}
3017 				else if (s[0] == '<' || s[0] == '"' || s[0] == '\'')
3018 				{
3019 					// unknown tag (forbidden), or some primitive group
3020 					s = parse_doctype_primitive(s);
3021 					if (!s) return s;
3022 				}
3023 				else if (*s == '>')
3024 				{
3025 					if (depth == 0)
3026 						return s;
3027 
3028 					depth--;
3029 					s++;
3030 				}
3031 				else s++;
3032 			}
3033 
3034 			if (depth != 0 || endch != '>') PUGI__THROW_ERROR(status_bad_doctype, s);
3035 
3036 			return s;
3037 		}
3038 
parse_exclamationxml_parser3039 		char_t* parse_exclamation(char_t* s, xml_node_struct* cursor, unsigned int optmsk, char_t endch)
3040 		{
3041 			// parse node contents, starting with exclamation mark
3042 			++s;
3043 
3044 			if (*s == '-') // '<!-...'
3045 			{
3046 				++s;
3047 
3048 				if (*s == '-') // '<!--...'
3049 				{
3050 					++s;
3051 
3052 					if (PUGI__OPTSET(parse_comments))
3053 					{
3054 						PUGI__PUSHNODE(node_comment); // Append a new node on the tree.
3055 						cursor->value = s; // Save the offset.
3056 					}
3057 
3058 					if (PUGI__OPTSET(parse_eol) && PUGI__OPTSET(parse_comments))
3059 					{
3060 						s = strconv_comment(s, endch);
3061 
3062 						if (!s) PUGI__THROW_ERROR(status_bad_comment, cursor->value);
3063 					}
3064 					else
3065 					{
3066 						// Scan for terminating '-->'.
3067 						PUGI__SCANFOR(s[0] == '-' && s[1] == '-' && PUGI__ENDSWITH(s[2], '>'));
3068 						PUGI__CHECK_ERROR(status_bad_comment, s);
3069 
3070 						if (PUGI__OPTSET(parse_comments))
3071 							*s = 0; // Zero-terminate this segment at the first terminating '-'.
3072 
3073 						s += (s[2] == '>' ? 3 : 2); // Step over the '\0->'.
3074 					}
3075 				}
3076 				else PUGI__THROW_ERROR(status_bad_comment, s);
3077 			}
3078 			else if (*s == '[')
3079 			{
3080 				// '<![CDATA[...'
3081 				if (*++s=='C' && *++s=='D' && *++s=='A' && *++s=='T' && *++s=='A' && *++s == '[')
3082 				{
3083 					++s;
3084 
3085 					if (PUGI__OPTSET(parse_cdata))
3086 					{
3087 						PUGI__PUSHNODE(node_cdata); // Append a new node on the tree.
3088 						cursor->value = s; // Save the offset.
3089 
3090 						if (PUGI__OPTSET(parse_eol))
3091 						{
3092 							s = strconv_cdata(s, endch);
3093 
3094 							if (!s) PUGI__THROW_ERROR(status_bad_cdata, cursor->value);
3095 						}
3096 						else
3097 						{
3098 							// Scan for terminating ']]>'.
3099 							PUGI__SCANFOR(s[0] == ']' && s[1] == ']' && PUGI__ENDSWITH(s[2], '>'));
3100 							PUGI__CHECK_ERROR(status_bad_cdata, s);
3101 
3102 							*s++ = 0; // Zero-terminate this segment.
3103 						}
3104 					}
3105 					else // Flagged for discard, but we still have to scan for the terminator.
3106 					{
3107 						// Scan for terminating ']]>'.
3108 						PUGI__SCANFOR(s[0] == ']' && s[1] == ']' && PUGI__ENDSWITH(s[2], '>'));
3109 						PUGI__CHECK_ERROR(status_bad_cdata, s);
3110 
3111 						++s;
3112 					}
3113 
3114 					s += (s[1] == '>' ? 2 : 1); // Step over the last ']>'.
3115 				}
3116 				else PUGI__THROW_ERROR(status_bad_cdata, s);
3117 			}
3118 			else if (s[0] == 'D' && s[1] == 'O' && s[2] == 'C' && s[3] == 'T' && s[4] == 'Y' && s[5] == 'P' && PUGI__ENDSWITH(s[6], 'E'))
3119 			{
3120 				s -= 2;
3121 
3122 				if (cursor->parent) PUGI__THROW_ERROR(status_bad_doctype, s);
3123 
3124 				char_t* mark = s + 9;
3125 
3126 				s = parse_doctype_group(s, endch);
3127 				if (!s) return s;
3128 
3129 				assert((*s == 0 && endch == '>') || *s == '>');
3130 				if (*s) *s++ = 0;
3131 
3132 				if (PUGI__OPTSET(parse_doctype))
3133 				{
3134 					while (PUGI__IS_CHARTYPE(*mark, ct_space)) ++mark;
3135 
3136 					PUGI__PUSHNODE(node_doctype);
3137 
3138 					cursor->value = mark;
3139 				}
3140 			}
3141 			else if (*s == 0 && endch == '-') PUGI__THROW_ERROR(status_bad_comment, s);
3142 			else if (*s == 0 && endch == '[') PUGI__THROW_ERROR(status_bad_cdata, s);
3143 			else PUGI__THROW_ERROR(status_unrecognized_tag, s);
3144 
3145 			return s;
3146 		}
3147 
parse_questionxml_parser3148 		char_t* parse_question(char_t* s, xml_node_struct*& ref_cursor, unsigned int optmsk, char_t endch)
3149 		{
3150 			// load into registers
3151 			xml_node_struct* cursor = ref_cursor;
3152 			char_t ch = 0;
3153 
3154 			// parse node contents, starting with question mark
3155 			++s;
3156 
3157 			// read PI target
3158 			char_t* target = s;
3159 
3160 			if (!PUGI__IS_CHARTYPE(*s, ct_start_symbol)) PUGI__THROW_ERROR(status_bad_pi, s);
3161 
3162 			PUGI__SCANWHILE(PUGI__IS_CHARTYPE(*s, ct_symbol));
3163 			PUGI__CHECK_ERROR(status_bad_pi, s);
3164 
3165 			// determine node type; stricmp / strcasecmp is not portable
3166 			bool declaration = (target[0] | ' ') == 'x' && (target[1] | ' ') == 'm' && (target[2] | ' ') == 'l' && target + 3 == s;
3167 
3168 			if (declaration ? PUGI__OPTSET(parse_declaration) : PUGI__OPTSET(parse_pi))
3169 			{
3170 				if (declaration)
3171 				{
3172 					// disallow non top-level declarations
3173 					if (cursor->parent) PUGI__THROW_ERROR(status_bad_pi, s);
3174 
3175 					PUGI__PUSHNODE(node_declaration);
3176 				}
3177 				else
3178 				{
3179 					PUGI__PUSHNODE(node_pi);
3180 				}
3181 
3182 				cursor->name = target;
3183 
3184 				PUGI__ENDSEG();
3185 
3186 				// parse value/attributes
3187 				if (ch == '?')
3188 				{
3189 					// empty node
3190 					if (!PUGI__ENDSWITH(*s, '>')) PUGI__THROW_ERROR(status_bad_pi, s);
3191 					s += (*s == '>');
3192 
3193 					PUGI__POPNODE();
3194 				}
3195 				else if (PUGI__IS_CHARTYPE(ch, ct_space))
3196 				{
3197 					PUGI__SKIPWS();
3198 
3199 					// scan for tag end
3200 					char_t* value = s;
3201 
3202 					PUGI__SCANFOR(s[0] == '?' && PUGI__ENDSWITH(s[1], '>'));
3203 					PUGI__CHECK_ERROR(status_bad_pi, s);
3204 
3205 					if (declaration)
3206 					{
3207 						// replace ending ? with / so that 'element' terminates properly
3208 						*s = '/';
3209 
3210 						// we exit from this function with cursor at node_declaration, which is a signal to parse() to go to LOC_ATTRIBUTES
3211 						s = value;
3212 					}
3213 					else
3214 					{
3215 						// store value and step over >
3216 						cursor->value = value;
3217 
3218 						PUGI__POPNODE();
3219 
3220 						PUGI__ENDSEG();
3221 
3222 						s += (*s == '>');
3223 					}
3224 				}
3225 				else PUGI__THROW_ERROR(status_bad_pi, s);
3226 			}
3227 			else
3228 			{
3229 				// scan for tag end
3230 				PUGI__SCANFOR(s[0] == '?' && PUGI__ENDSWITH(s[1], '>'));
3231 				PUGI__CHECK_ERROR(status_bad_pi, s);
3232 
3233 				s += (s[1] == '>' ? 2 : 1);
3234 			}
3235 
3236 			// store from registers
3237 			ref_cursor = cursor;
3238 
3239 			return s;
3240 		}
3241 
parse_treexml_parser3242 		char_t* parse_tree(char_t* s, xml_node_struct* root, unsigned int optmsk, char_t endch)
3243 		{
3244 			strconv_attribute_t strconv_attribute = get_strconv_attribute(optmsk);
3245 			strconv_pcdata_t strconv_pcdata = get_strconv_pcdata(optmsk);
3246 
3247 			char_t ch = 0;
3248 			xml_node_struct* cursor = root;
3249 			char_t* mark = s;
3250 
3251 			while (*s != 0)
3252 			{
3253 				if (*s == '<')
3254 				{
3255 					++s;
3256 
3257 				LOC_TAG:
3258 					if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) // '<#...'
3259 					{
3260 						PUGI__PUSHNODE(node_element); // Append a new node to the tree.
3261 
3262 						cursor->name = s;
3263 
3264 						PUGI__SCANWHILE_UNROLL(PUGI__IS_CHARTYPE(ss, ct_symbol)); // Scan for a terminator.
3265 						PUGI__ENDSEG(); // Save char in 'ch', terminate & step over.
3266 
3267 						if (ch == '>')
3268 						{
3269 							// end of tag
3270 						}
3271 						else if (PUGI__IS_CHARTYPE(ch, ct_space))
3272 						{
3273 						LOC_ATTRIBUTES:
3274 							while (true)
3275 							{
3276 								PUGI__SKIPWS(); // Eat any whitespace.
3277 
3278 								if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) // <... #...
3279 								{
3280 									xml_attribute_struct* a = append_new_attribute(cursor, *alloc); // Make space for this attribute.
3281 									if (!a) PUGI__THROW_ERROR(status_out_of_memory, s);
3282 
3283 									a->name = s; // Save the offset.
3284 
3285 									PUGI__SCANWHILE_UNROLL(PUGI__IS_CHARTYPE(ss, ct_symbol)); // Scan for a terminator.
3286 									PUGI__ENDSEG(); // Save char in 'ch', terminate & step over.
3287 
3288 									if (PUGI__IS_CHARTYPE(ch, ct_space))
3289 									{
3290 										PUGI__SKIPWS(); // Eat any whitespace.
3291 
3292 										ch = *s;
3293 										++s;
3294 									}
3295 
3296 									if (ch == '=') // '<... #=...'
3297 									{
3298 										PUGI__SKIPWS(); // Eat any whitespace.
3299 
3300 										if (*s == '"' || *s == '\'') // '<... #="...'
3301 										{
3302 											ch = *s; // Save quote char to avoid breaking on "''" -or- '""'.
3303 											++s; // Step over the quote.
3304 											a->value = s; // Save the offset.
3305 
3306 											s = strconv_attribute(s, ch);
3307 
3308 											if (!s) PUGI__THROW_ERROR(status_bad_attribute, a->value);
3309 
3310 											// After this line the loop continues from the start;
3311 											// Whitespaces, / and > are ok, symbols and EOF are wrong,
3312 											// everything else will be detected
3313 											if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) PUGI__THROW_ERROR(status_bad_attribute, s);
3314 										}
3315 										else PUGI__THROW_ERROR(status_bad_attribute, s);
3316 									}
3317 									else PUGI__THROW_ERROR(status_bad_attribute, s);
3318 								}
3319 								else if (*s == '/')
3320 								{
3321 									++s;
3322 
3323 									if (*s == '>')
3324 									{
3325 										PUGI__POPNODE();
3326 										s++;
3327 										break;
3328 									}
3329 									else if (*s == 0 && endch == '>')
3330 									{
3331 										PUGI__POPNODE();
3332 										break;
3333 									}
3334 									else PUGI__THROW_ERROR(status_bad_start_element, s);
3335 								}
3336 								else if (*s == '>')
3337 								{
3338 									++s;
3339 
3340 									break;
3341 								}
3342 								else if (*s == 0 && endch == '>')
3343 								{
3344 									break;
3345 								}
3346 								else PUGI__THROW_ERROR(status_bad_start_element, s);
3347 							}
3348 
3349 							// !!!
3350 						}
3351 						else if (ch == '/') // '<#.../'
3352 						{
3353 							if (!PUGI__ENDSWITH(*s, '>')) PUGI__THROW_ERROR(status_bad_start_element, s);
3354 
3355 							PUGI__POPNODE(); // Pop.
3356 
3357 							s += (*s == '>');
3358 						}
3359 						else if (ch == 0)
3360 						{
3361 							// we stepped over null terminator, backtrack & handle closing tag
3362 							--s;
3363 
3364 							if (endch != '>') PUGI__THROW_ERROR(status_bad_start_element, s);
3365 						}
3366 						else PUGI__THROW_ERROR(status_bad_start_element, s);
3367 					}
3368 					else if (*s == '/')
3369 					{
3370 						++s;
3371 
3372 						mark = s;
3373 
3374 						char_t* name = cursor->name;
3375 						if (!name) PUGI__THROW_ERROR(status_end_element_mismatch, mark);
3376 
3377 						while (PUGI__IS_CHARTYPE(*s, ct_symbol))
3378 						{
3379 							if (*s++ != *name++) PUGI__THROW_ERROR(status_end_element_mismatch, mark);
3380 						}
3381 
3382 						if (*name)
3383 						{
3384 							if (*s == 0 && name[0] == endch && name[1] == 0) PUGI__THROW_ERROR(status_bad_end_element, s);
3385 							else PUGI__THROW_ERROR(status_end_element_mismatch, mark);
3386 						}
3387 
3388 						PUGI__POPNODE(); // Pop.
3389 
3390 						PUGI__SKIPWS();
3391 
3392 						if (*s == 0)
3393 						{
3394 							if (endch != '>') PUGI__THROW_ERROR(status_bad_end_element, s);
3395 						}
3396 						else
3397 						{
3398 							if (*s != '>') PUGI__THROW_ERROR(status_bad_end_element, s);
3399 							++s;
3400 						}
3401 					}
3402 					else if (*s == '?') // '<?...'
3403 					{
3404 						s = parse_question(s, cursor, optmsk, endch);
3405 						if (!s) return s;
3406 
3407 						assert(cursor);
3408 						if (PUGI__NODETYPE(cursor) == node_declaration) goto LOC_ATTRIBUTES;
3409 					}
3410 					else if (*s == '!') // '<!...'
3411 					{
3412 						s = parse_exclamation(s, cursor, optmsk, endch);
3413 						if (!s) return s;
3414 					}
3415 					else if (*s == 0 && endch == '?') PUGI__THROW_ERROR(status_bad_pi, s);
3416 					else PUGI__THROW_ERROR(status_unrecognized_tag, s);
3417 				}
3418 				else
3419 				{
3420 					mark = s; // Save this offset while searching for a terminator.
3421 
3422 					PUGI__SKIPWS(); // Eat whitespace if no genuine PCDATA here.
3423 
3424 					if (*s == '<' || !*s)
3425 					{
3426 						// We skipped some whitespace characters because otherwise we would take the tag branch instead of PCDATA one
3427 						assert(mark != s);
3428 
3429 						if (!PUGI__OPTSET(parse_ws_pcdata | parse_ws_pcdata_single) || PUGI__OPTSET(parse_trim_pcdata))
3430 						{
3431 							continue;
3432 						}
3433 						else if (PUGI__OPTSET(parse_ws_pcdata_single))
3434 						{
3435 							if (s[0] != '<' || s[1] != '/' || cursor->first_child) continue;
3436 						}
3437 					}
3438 
3439 					if (!PUGI__OPTSET(parse_trim_pcdata))
3440 						s = mark;
3441 
3442 					if (cursor->parent || PUGI__OPTSET(parse_fragment))
3443 					{
3444 						if (PUGI__OPTSET(parse_embed_pcdata) && cursor->parent && !cursor->first_child && !cursor->value)
3445 						{
3446 							cursor->value = s; // Save the offset.
3447 						}
3448 						else
3449 						{
3450 							PUGI__PUSHNODE(node_pcdata); // Append a new node on the tree.
3451 
3452 							cursor->value = s; // Save the offset.
3453 
3454 							PUGI__POPNODE(); // Pop since this is a standalone.
3455 						}
3456 
3457 						s = strconv_pcdata(s);
3458 
3459 						if (!*s) break;
3460 					}
3461 					else
3462 					{
3463 						PUGI__SCANFOR(*s == '<'); // '...<'
3464 						if (!*s) break;
3465 
3466 						++s;
3467 					}
3468 
3469 					// We're after '<'
3470 					goto LOC_TAG;
3471 				}
3472 			}
3473 
3474 			// check that last tag is closed
3475 			if (cursor != root) PUGI__THROW_ERROR(status_end_element_mismatch, s);
3476 
3477 			return s;
3478 		}
3479 
3480 	#ifdef PUGIXML_WCHAR_MODE
parse_skip_bomxml_parser3481 		static char_t* parse_skip_bom(char_t* s)
3482 		{
3483 			unsigned int bom = 0xfeff;
3484 			return (s[0] == static_cast<wchar_t>(bom)) ? s + 1 : s;
3485 		}
3486 	#else
parse_skip_bomxml_parser3487 		static char_t* parse_skip_bom(char_t* s)
3488 		{
3489 			return (s[0] == '\xef' && s[1] == '\xbb' && s[2] == '\xbf') ? s + 3 : s;
3490 		}
3491 	#endif
3492 
has_element_node_siblingsxml_parser3493 		static bool has_element_node_siblings(xml_node_struct* node)
3494 		{
3495 			while (node)
3496 			{
3497 				if (PUGI__NODETYPE(node) == node_element) return true;
3498 
3499 				node = node->next_sibling;
3500 			}
3501 
3502 			return false;
3503 		}
3504 
parsexml_parser3505 		static xml_parse_result parse(char_t* buffer, size_t length, xml_document_struct* xmldoc, xml_node_struct* root, unsigned int optmsk)
3506 		{
3507 			// early-out for empty documents
3508 			if (length == 0)
3509 				return make_parse_result(PUGI__OPTSET(parse_fragment) ? status_ok : status_no_document_element);
3510 
3511 			// get last child of the root before parsing
3512 			xml_node_struct* last_root_child = root->first_child ? root->first_child->prev_sibling_c + 0 : 0;
3513 
3514 			// create parser on stack
3515 			xml_parser parser(static_cast<xml_allocator*>(xmldoc));
3516 
3517 			// save last character and make buffer zero-terminated (speeds up parsing)
3518 			char_t endch = buffer[length - 1];
3519 			buffer[length - 1] = 0;
3520 
3521 			// skip BOM to make sure it does not end up as part of parse output
3522 			char_t* buffer_data = parse_skip_bom(buffer);
3523 
3524 			// perform actual parsing
3525 			parser.parse_tree(buffer_data, root, optmsk, endch);
3526 
3527 			xml_parse_result result = make_parse_result(parser.error_status, parser.error_offset ? parser.error_offset - buffer : 0);
3528 			assert(result.offset >= 0 && static_cast<size_t>(result.offset) <= length);
3529 
3530 			if (result)
3531 			{
3532 				// since we removed last character, we have to handle the only possible false positive (stray <)
3533 				if (endch == '<')
3534 					return make_parse_result(status_unrecognized_tag, length - 1);
3535 
3536 				// check if there are any element nodes parsed
3537 				xml_node_struct* first_root_child_parsed = last_root_child ? last_root_child->next_sibling + 0 : root->first_child+ 0;
3538 
3539 				if (!PUGI__OPTSET(parse_fragment) && !has_element_node_siblings(first_root_child_parsed))
3540 					return make_parse_result(status_no_document_element, length - 1);
3541 			}
3542 			else
3543 			{
3544 				// roll back offset if it occurs on a null terminator in the source buffer
3545 				if (result.offset > 0 && static_cast<size_t>(result.offset) == length - 1 && endch == 0)
3546 					result.offset--;
3547 			}
3548 
3549 			return result;
3550 		}
3551 	};
3552 
3553 	// Output facilities
get_write_native_encoding()3554 	PUGI__FN xml_encoding get_write_native_encoding()
3555 	{
3556 	#ifdef PUGIXML_WCHAR_MODE
3557 		return get_wchar_encoding();
3558 	#else
3559 		return encoding_utf8;
3560 	#endif
3561 	}
3562 
get_write_encoding(xml_encoding encoding)3563 	PUGI__FN xml_encoding get_write_encoding(xml_encoding encoding)
3564 	{
3565 		// replace wchar encoding with utf implementation
3566 		if (encoding == encoding_wchar) return get_wchar_encoding();
3567 
3568 		// replace utf16 encoding with utf16 with specific endianness
3569 		if (encoding == encoding_utf16) return is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
3570 
3571 		// replace utf32 encoding with utf32 with specific endianness
3572 		if (encoding == encoding_utf32) return is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
3573 
3574 		// only do autodetection if no explicit encoding is requested
3575 		if (encoding != encoding_auto) return encoding;
3576 
3577 		// assume utf8 encoding
3578 		return encoding_utf8;
3579 	}
3580 
convert_buffer_output_generic(typename T::value_type dest,const char_t * data,size_t length,D,T)3581 	template <typename D, typename T> PUGI__FN size_t convert_buffer_output_generic(typename T::value_type dest, const char_t* data, size_t length, D, T)
3582 	{
3583 		PUGI__STATIC_ASSERT(sizeof(char_t) == sizeof(typename D::type));
3584 
3585 		typename T::value_type end = D::process(reinterpret_cast<const typename D::type*>(data), length, dest, T());
3586 
3587 		return static_cast<size_t>(end - dest) * sizeof(*dest);
3588 	}
3589 
convert_buffer_output_generic(typename T::value_type dest,const char_t * data,size_t length,D,T,bool opt_swap)3590 	template <typename D, typename T> PUGI__FN size_t convert_buffer_output_generic(typename T::value_type dest, const char_t* data, size_t length, D, T, bool opt_swap)
3591 	{
3592 		PUGI__STATIC_ASSERT(sizeof(char_t) == sizeof(typename D::type));
3593 
3594 		typename T::value_type end = D::process(reinterpret_cast<const typename D::type*>(data), length, dest, T());
3595 
3596 		if (opt_swap)
3597 		{
3598 			for (typename T::value_type i = dest; i != end; ++i)
3599 				*i = endian_swap(*i);
3600 		}
3601 
3602 		return static_cast<size_t>(end - dest) * sizeof(*dest);
3603 	}
3604 
3605 #ifdef PUGIXML_WCHAR_MODE
get_valid_length(const char_t * data,size_t length)3606 	PUGI__FN size_t get_valid_length(const char_t* data, size_t length)
3607 	{
3608 		if (length < 1) return 0;
3609 
3610 		// discard last character if it's the lead of a surrogate pair
3611 		return (sizeof(wchar_t) == 2 && static_cast<unsigned int>(static_cast<uint16_t>(data[length - 1]) - 0xD800) < 0x400) ? length - 1 : length;
3612 	}
3613 
convert_buffer_output(char_t * r_char,uint8_t * r_u8,uint16_t * r_u16,uint32_t * r_u32,const char_t * data,size_t length,xml_encoding encoding)3614 	PUGI__FN size_t convert_buffer_output(char_t* r_char, uint8_t* r_u8, uint16_t* r_u16, uint32_t* r_u32, const char_t* data, size_t length, xml_encoding encoding)
3615 	{
3616 		// only endian-swapping is required
3617 		if (need_endian_swap_utf(encoding, get_wchar_encoding()))
3618 		{
3619 			convert_wchar_endian_swap(r_char, data, length);
3620 
3621 			return length * sizeof(char_t);
3622 		}
3623 
3624 		// convert to utf8
3625 		if (encoding == encoding_utf8)
3626 			return convert_buffer_output_generic(r_u8, data, length, wchar_decoder(), utf8_writer());
3627 
3628 		// convert to utf16
3629 		if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
3630 		{
3631 			xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
3632 
3633 			return convert_buffer_output_generic(r_u16, data, length, wchar_decoder(), utf16_writer(), native_encoding != encoding);
3634 		}
3635 
3636 		// convert to utf32
3637 		if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)
3638 		{
3639 			xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
3640 
3641 			return convert_buffer_output_generic(r_u32, data, length, wchar_decoder(), utf32_writer(), native_encoding != encoding);
3642 		}
3643 
3644 		// convert to latin1
3645 		if (encoding == encoding_latin1)
3646 			return convert_buffer_output_generic(r_u8, data, length, wchar_decoder(), latin1_writer());
3647 
3648 		assert(false && "Invalid encoding"); // unreachable
3649 		return 0;
3650 	}
3651 #else
get_valid_length(const char_t * data,size_t length)3652 	PUGI__FN size_t get_valid_length(const char_t* data, size_t length)
3653 	{
3654 		if (length < 5) return 0;
3655 
3656 		for (size_t i = 1; i <= 4; ++i)
3657 		{
3658 			uint8_t ch = static_cast<uint8_t>(data[length - i]);
3659 
3660 			// either a standalone character or a leading one
3661 			if ((ch & 0xc0) != 0x80) return length - i;
3662 		}
3663 
3664 		// there are four non-leading characters at the end, sequence tail is broken so might as well process the whole chunk
3665 		return length;
3666 	}
3667 
convert_buffer_output(char_t *,uint8_t * r_u8,uint16_t * r_u16,uint32_t * r_u32,const char_t * data,size_t length,xml_encoding encoding)3668 	PUGI__FN size_t convert_buffer_output(char_t* /* r_char */, uint8_t* r_u8, uint16_t* r_u16, uint32_t* r_u32, const char_t* data, size_t length, xml_encoding encoding)
3669 	{
3670 		if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
3671 		{
3672 			xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
3673 
3674 			return convert_buffer_output_generic(r_u16, data, length, utf8_decoder(), utf16_writer(), native_encoding != encoding);
3675 		}
3676 
3677 		if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)
3678 		{
3679 			xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
3680 
3681 			return convert_buffer_output_generic(r_u32, data, length, utf8_decoder(), utf32_writer(), native_encoding != encoding);
3682 		}
3683 
3684 		if (encoding == encoding_latin1)
3685 			return convert_buffer_output_generic(r_u8, data, length, utf8_decoder(), latin1_writer());
3686 
3687 		assert(false && "Invalid encoding"); // unreachable
3688 		return 0;
3689 	}
3690 #endif
3691 
3692 	class xml_buffered_writer
3693 	{
3694 		xml_buffered_writer(const xml_buffered_writer&);
3695 		xml_buffered_writer& operator=(const xml_buffered_writer&);
3696 
3697 	public:
xml_buffered_writer(xml_writer & writer_,xml_encoding user_encoding)3698 		xml_buffered_writer(xml_writer& writer_, xml_encoding user_encoding): writer(writer_), bufsize(0), encoding(get_write_encoding(user_encoding))
3699 		{
3700 			PUGI__STATIC_ASSERT(bufcapacity >= 8);
3701 		}
3702 
flush()3703 		size_t flush()
3704 		{
3705 			flush(buffer, bufsize);
3706 			bufsize = 0;
3707 			return 0;
3708 		}
3709 
flush(const char_t * data,size_t size)3710 		void flush(const char_t* data, size_t size)
3711 		{
3712 			if (size == 0) return;
3713 
3714 			// fast path, just write data
3715 			if (encoding == get_write_native_encoding())
3716 				writer.write(data, size * sizeof(char_t));
3717 			else
3718 			{
3719 				// convert chunk
3720 				size_t result = convert_buffer_output(scratch.data_char, scratch.data_u8, scratch.data_u16, scratch.data_u32, data, size, encoding);
3721 				assert(result <= sizeof(scratch));
3722 
3723 				// write data
3724 				writer.write(scratch.data_u8, result);
3725 			}
3726 		}
3727 
write_direct(const char_t * data,size_t length)3728 		void write_direct(const char_t* data, size_t length)
3729 		{
3730 			// flush the remaining buffer contents
3731 			flush();
3732 
3733 			// handle large chunks
3734 			if (length > bufcapacity)
3735 			{
3736 				if (encoding == get_write_native_encoding())
3737 				{
3738 					// fast path, can just write data chunk
3739 					writer.write(data, length * sizeof(char_t));
3740 					return;
3741 				}
3742 
3743 				// need to convert in suitable chunks
3744 				while (length > bufcapacity)
3745 				{
3746 					// get chunk size by selecting such number of characters that are guaranteed to fit into scratch buffer
3747 					// and form a complete codepoint sequence (i.e. discard start of last codepoint if necessary)
3748 					size_t chunk_size = get_valid_length(data, bufcapacity);
3749 					assert(chunk_size);
3750 
3751 					// convert chunk and write
3752 					flush(data, chunk_size);
3753 
3754 					// iterate
3755 					data += chunk_size;
3756 					length -= chunk_size;
3757 				}
3758 
3759 				// small tail is copied below
3760 				bufsize = 0;
3761 			}
3762 
3763 			memcpy(buffer + bufsize, data, length * sizeof(char_t));
3764 			bufsize += length;
3765 		}
3766 
write_buffer(const char_t * data,size_t length)3767 		void write_buffer(const char_t* data, size_t length)
3768 		{
3769 			size_t offset = bufsize;
3770 
3771 			if (offset + length <= bufcapacity)
3772 			{
3773 				memcpy(buffer + offset, data, length * sizeof(char_t));
3774 				bufsize = offset + length;
3775 			}
3776 			else
3777 			{
3778 				write_direct(data, length);
3779 			}
3780 		}
3781 
write_string(const char_t * data)3782 		void write_string(const char_t* data)
3783 		{
3784 			// write the part of the string that fits in the buffer
3785 			size_t offset = bufsize;
3786 
3787 			while (*data && offset < bufcapacity)
3788 				buffer[offset++] = *data++;
3789 
3790 			// write the rest
3791 			if (offset < bufcapacity)
3792 			{
3793 				bufsize = offset;
3794 			}
3795 			else
3796 			{
3797 				// backtrack a bit if we have split the codepoint
3798 				size_t length = offset - bufsize;
3799 				size_t extra = length - get_valid_length(data - length, length);
3800 
3801 				bufsize = offset - extra;
3802 
3803 				write_direct(data - extra, strlength(data) + extra);
3804 			}
3805 		}
3806 
write(char_t d0)3807 		void write(char_t d0)
3808 		{
3809 			size_t offset = bufsize;
3810 			if (offset > bufcapacity - 1) offset = flush();
3811 
3812 			buffer[offset + 0] = d0;
3813 			bufsize = offset + 1;
3814 		}
3815 
write(char_t d0,char_t d1)3816 		void write(char_t d0, char_t d1)
3817 		{
3818 			size_t offset = bufsize;
3819 			if (offset > bufcapacity - 2) offset = flush();
3820 
3821 			buffer[offset + 0] = d0;
3822 			buffer[offset + 1] = d1;
3823 			bufsize = offset + 2;
3824 		}
3825 
write(char_t d0,char_t d1,char_t d2)3826 		void write(char_t d0, char_t d1, char_t d2)
3827 		{
3828 			size_t offset = bufsize;
3829 			if (offset > bufcapacity - 3) offset = flush();
3830 
3831 			buffer[offset + 0] = d0;
3832 			buffer[offset + 1] = d1;
3833 			buffer[offset + 2] = d2;
3834 			bufsize = offset + 3;
3835 		}
3836 
write(char_t d0,char_t d1,char_t d2,char_t d3)3837 		void write(char_t d0, char_t d1, char_t d2, char_t d3)
3838 		{
3839 			size_t offset = bufsize;
3840 			if (offset > bufcapacity - 4) offset = flush();
3841 
3842 			buffer[offset + 0] = d0;
3843 			buffer[offset + 1] = d1;
3844 			buffer[offset + 2] = d2;
3845 			buffer[offset + 3] = d3;
3846 			bufsize = offset + 4;
3847 		}
3848 
write(char_t d0,char_t d1,char_t d2,char_t d3,char_t d4)3849 		void write(char_t d0, char_t d1, char_t d2, char_t d3, char_t d4)
3850 		{
3851 			size_t offset = bufsize;
3852 			if (offset > bufcapacity - 5) offset = flush();
3853 
3854 			buffer[offset + 0] = d0;
3855 			buffer[offset + 1] = d1;
3856 			buffer[offset + 2] = d2;
3857 			buffer[offset + 3] = d3;
3858 			buffer[offset + 4] = d4;
3859 			bufsize = offset + 5;
3860 		}
3861 
write(char_t d0,char_t d1,char_t d2,char_t d3,char_t d4,char_t d5)3862 		void write(char_t d0, char_t d1, char_t d2, char_t d3, char_t d4, char_t d5)
3863 		{
3864 			size_t offset = bufsize;
3865 			if (offset > bufcapacity - 6) offset = flush();
3866 
3867 			buffer[offset + 0] = d0;
3868 			buffer[offset + 1] = d1;
3869 			buffer[offset + 2] = d2;
3870 			buffer[offset + 3] = d3;
3871 			buffer[offset + 4] = d4;
3872 			buffer[offset + 5] = d5;
3873 			bufsize = offset + 6;
3874 		}
3875 
3876 		// utf8 maximum expansion: x4 (-> utf32)
3877 		// utf16 maximum expansion: x2 (-> utf32)
3878 		// utf32 maximum expansion: x1
3879 		enum
3880 		{
3881 			bufcapacitybytes =
3882 			#ifdef PUGIXML_MEMORY_OUTPUT_STACK
3883 				PUGIXML_MEMORY_OUTPUT_STACK
3884 			#else
3885 				10240
3886 			#endif
3887 			,
3888 			bufcapacity = bufcapacitybytes / (sizeof(char_t) + 4)
3889 		};
3890 
3891 		char_t buffer[bufcapacity];
3892 
3893 		union
3894 		{
3895 			uint8_t data_u8[4 * bufcapacity];
3896 			uint16_t data_u16[2 * bufcapacity];
3897 			uint32_t data_u32[bufcapacity];
3898 			char_t data_char[bufcapacity];
3899 		} scratch;
3900 
3901 		xml_writer& writer;
3902 		size_t bufsize;
3903 		xml_encoding encoding;
3904 	};
3905 
text_output_escaped(xml_buffered_writer & writer,const char_t * s,chartypex_t type,unsigned int flags)3906 	PUGI__FN void text_output_escaped(xml_buffered_writer& writer, const char_t* s, chartypex_t type, unsigned int flags)
3907 	{
3908 		while (*s)
3909 		{
3910 			const char_t* prev = s;
3911 
3912 			// While *s is a usual symbol
3913 			PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPEX(ss, type));
3914 
3915 			writer.write_buffer(prev, static_cast<size_t>(s - prev));
3916 
3917 			switch (*s)
3918 			{
3919 				case 0: break;
3920 				case '&':
3921 					writer.write('&', 'a', 'm', 'p', ';');
3922 					++s;
3923 					break;
3924 				case '<':
3925 					writer.write('&', 'l', 't', ';');
3926 					++s;
3927 					break;
3928 				case '>':
3929 					writer.write('&', 'g', 't', ';');
3930 					++s;
3931 					break;
3932 				case '"':
3933 					if (flags & format_attribute_single_quote)
3934 						writer.write('"');
3935 					else
3936 						writer.write('&', 'q', 'u', 'o', 't', ';');
3937 					++s;
3938 					break;
3939 				case '\'':
3940 					if (flags & format_attribute_single_quote)
3941 						writer.write('&', 'a', 'p', 'o', 's', ';');
3942 					else
3943 						writer.write('\'');
3944 					++s;
3945 					break;
3946 				default: // s is not a usual symbol
3947 				{
3948 					unsigned int ch = static_cast<unsigned int>(*s++);
3949 					assert(ch < 32);
3950 
3951 					if (!(flags & format_skip_control_chars))
3952 						writer.write('&', '#', static_cast<char_t>((ch / 10) + '0'), static_cast<char_t>((ch % 10) + '0'), ';');
3953 				}
3954 			}
3955 		}
3956 	}
3957 
text_output(xml_buffered_writer & writer,const char_t * s,chartypex_t type,unsigned int flags)3958 	PUGI__FN void text_output(xml_buffered_writer& writer, const char_t* s, chartypex_t type, unsigned int flags)
3959 	{
3960 		if (flags & format_no_escapes)
3961 			writer.write_string(s);
3962 		else
3963 			text_output_escaped(writer, s, type, flags);
3964 	}
3965 
text_output_cdata(xml_buffered_writer & writer,const char_t * s)3966 	PUGI__FN void text_output_cdata(xml_buffered_writer& writer, const char_t* s)
3967 	{
3968 		do
3969 		{
3970 			writer.write('<', '!', '[', 'C', 'D');
3971 			writer.write('A', 'T', 'A', '[');
3972 
3973 			const char_t* prev = s;
3974 
3975 			// look for ]]> sequence - we can't output it as is since it terminates CDATA
3976 			while (*s && !(s[0] == ']' && s[1] == ']' && s[2] == '>')) ++s;
3977 
3978 			// skip ]] if we stopped at ]]>, > will go to the next CDATA section
3979 			if (*s) s += 2;
3980 
3981 			writer.write_buffer(prev, static_cast<size_t>(s - prev));
3982 
3983 			writer.write(']', ']', '>');
3984 		}
3985 		while (*s);
3986 	}
3987 
text_output_indent(xml_buffered_writer & writer,const char_t * indent,size_t indent_length,unsigned int depth)3988 	PUGI__FN void text_output_indent(xml_buffered_writer& writer, const char_t* indent, size_t indent_length, unsigned int depth)
3989 	{
3990 		switch (indent_length)
3991 		{
3992 		case 1:
3993 		{
3994 			for (unsigned int i = 0; i < depth; ++i)
3995 				writer.write(indent[0]);
3996 			break;
3997 		}
3998 
3999 		case 2:
4000 		{
4001 			for (unsigned int i = 0; i < depth; ++i)
4002 				writer.write(indent[0], indent[1]);
4003 			break;
4004 		}
4005 
4006 		case 3:
4007 		{
4008 			for (unsigned int i = 0; i < depth; ++i)
4009 				writer.write(indent[0], indent[1], indent[2]);
4010 			break;
4011 		}
4012 
4013 		case 4:
4014 		{
4015 			for (unsigned int i = 0; i < depth; ++i)
4016 				writer.write(indent[0], indent[1], indent[2], indent[3]);
4017 			break;
4018 		}
4019 
4020 		default:
4021 		{
4022 			for (unsigned int i = 0; i < depth; ++i)
4023 				writer.write_buffer(indent, indent_length);
4024 		}
4025 		}
4026 	}
4027 
node_output_comment(xml_buffered_writer & writer,const char_t * s)4028 	PUGI__FN void node_output_comment(xml_buffered_writer& writer, const char_t* s)
4029 	{
4030 		writer.write('<', '!', '-', '-');
4031 
4032 		while (*s)
4033 		{
4034 			const char_t* prev = s;
4035 
4036 			// look for -\0 or -- sequence - we can't output it since -- is illegal in comment body
4037 			while (*s && !(s[0] == '-' && (s[1] == '-' || s[1] == 0))) ++s;
4038 
4039 			writer.write_buffer(prev, static_cast<size_t>(s - prev));
4040 
4041 			if (*s)
4042 			{
4043 				assert(*s == '-');
4044 
4045 				writer.write('-', ' ');
4046 				++s;
4047 			}
4048 		}
4049 
4050 		writer.write('-', '-', '>');
4051 	}
4052 
node_output_pi_value(xml_buffered_writer & writer,const char_t * s)4053 	PUGI__FN void node_output_pi_value(xml_buffered_writer& writer, const char_t* s)
4054 	{
4055 		while (*s)
4056 		{
4057 			const char_t* prev = s;
4058 
4059 			// look for ?> sequence - we can't output it since ?> terminates PI
4060 			while (*s && !(s[0] == '?' && s[1] == '>')) ++s;
4061 
4062 			writer.write_buffer(prev, static_cast<size_t>(s - prev));
4063 
4064 			if (*s)
4065 			{
4066 				assert(s[0] == '?' && s[1] == '>');
4067 
4068 				writer.write('?', ' ', '>');
4069 				s += 2;
4070 			}
4071 		}
4072 	}
4073 
node_output_attributes(xml_buffered_writer & writer,xml_node_struct * node,const char_t * indent,size_t indent_length,unsigned int flags,unsigned int depth)4074 	PUGI__FN void node_output_attributes(xml_buffered_writer& writer, xml_node_struct* node, const char_t* indent, size_t indent_length, unsigned int flags, unsigned int depth)
4075 	{
4076 		const char_t* default_name = PUGIXML_TEXT(":anonymous");
4077 		const char_t enquotation_char = (flags & format_attribute_single_quote) ? '\'' : '"';
4078 
4079 		for (xml_attribute_struct* a = node->first_attribute; a; a = a->next_attribute)
4080 		{
4081 			if ((flags & (format_indent_attributes | format_raw)) == format_indent_attributes)
4082 			{
4083 				writer.write('\n');
4084 
4085 				text_output_indent(writer, indent, indent_length, depth + 1);
4086 			}
4087 			else
4088 			{
4089 				writer.write(' ');
4090 			}
4091 
4092 			writer.write_string(a->name ? a->name + 0 : default_name);
4093 			writer.write('=', enquotation_char);
4094 
4095 			if (a->value)
4096 				text_output(writer, a->value, ctx_special_attr, flags);
4097 
4098 			writer.write(enquotation_char);
4099 		}
4100 	}
4101 
node_output_start(xml_buffered_writer & writer,xml_node_struct * node,const char_t * indent,size_t indent_length,unsigned int flags,unsigned int depth)4102 	PUGI__FN bool node_output_start(xml_buffered_writer& writer, xml_node_struct* node, const char_t* indent, size_t indent_length, unsigned int flags, unsigned int depth)
4103 	{
4104 		const char_t* default_name = PUGIXML_TEXT(":anonymous");
4105 		const char_t* name = node->name ? node->name + 0 : default_name;
4106 
4107 		writer.write('<');
4108 		writer.write_string(name);
4109 
4110 		if (node->first_attribute)
4111 			node_output_attributes(writer, node, indent, indent_length, flags, depth);
4112 
4113 		// element nodes can have value if parse_embed_pcdata was used
4114 		if (!node->value)
4115 		{
4116 			if (!node->first_child)
4117 			{
4118 				if (flags & format_no_empty_element_tags)
4119 				{
4120 					writer.write('>', '<', '/');
4121 					writer.write_string(name);
4122 					writer.write('>');
4123 
4124 					return false;
4125 				}
4126 				else
4127 				{
4128 					if ((flags & format_raw) == 0)
4129 						writer.write(' ');
4130 
4131 					writer.write('/', '>');
4132 
4133 					return false;
4134 				}
4135 			}
4136 			else
4137 			{
4138 				writer.write('>');
4139 
4140 				return true;
4141 			}
4142 		}
4143 		else
4144 		{
4145 			writer.write('>');
4146 
4147 			text_output(writer, node->value, ctx_special_pcdata, flags);
4148 
4149 			if (!node->first_child)
4150 			{
4151 				writer.write('<', '/');
4152 				writer.write_string(name);
4153 				writer.write('>');
4154 
4155 				return false;
4156 			}
4157 			else
4158 			{
4159 				return true;
4160 			}
4161 		}
4162 	}
4163 
node_output_end(xml_buffered_writer & writer,xml_node_struct * node)4164 	PUGI__FN void node_output_end(xml_buffered_writer& writer, xml_node_struct* node)
4165 	{
4166 		const char_t* default_name = PUGIXML_TEXT(":anonymous");
4167 		const char_t* name = node->name ? node->name + 0 : default_name;
4168 
4169 		writer.write('<', '/');
4170 		writer.write_string(name);
4171 		writer.write('>');
4172 	}
4173 
node_output_simple(xml_buffered_writer & writer,xml_node_struct * node,unsigned int flags)4174 	PUGI__FN void node_output_simple(xml_buffered_writer& writer, xml_node_struct* node, unsigned int flags)
4175 	{
4176 		const char_t* default_name = PUGIXML_TEXT(":anonymous");
4177 
4178 		switch (PUGI__NODETYPE(node))
4179 		{
4180 			case node_pcdata:
4181 				text_output(writer, node->value ? node->value + 0 : PUGIXML_TEXT(""), ctx_special_pcdata, flags);
4182 				break;
4183 
4184 			case node_cdata:
4185 				text_output_cdata(writer, node->value ? node->value + 0 : PUGIXML_TEXT(""));
4186 				break;
4187 
4188 			case node_comment:
4189 				node_output_comment(writer, node->value ? node->value + 0 : PUGIXML_TEXT(""));
4190 				break;
4191 
4192 			case node_pi:
4193 				writer.write('<', '?');
4194 				writer.write_string(node->name ? node->name + 0 : default_name);
4195 
4196 				if (node->value)
4197 				{
4198 					writer.write(' ');
4199 					node_output_pi_value(writer, node->value);
4200 				}
4201 
4202 				writer.write('?', '>');
4203 				break;
4204 
4205 			case node_declaration:
4206 				writer.write('<', '?');
4207 				writer.write_string(node->name ? node->name + 0 : default_name);
4208 				node_output_attributes(writer, node, PUGIXML_TEXT(""), 0, flags | format_raw, 0);
4209 				writer.write('?', '>');
4210 				break;
4211 
4212 			case node_doctype:
4213 				writer.write('<', '!', 'D', 'O', 'C');
4214 				writer.write('T', 'Y', 'P', 'E');
4215 
4216 				if (node->value)
4217 				{
4218 					writer.write(' ');
4219 					writer.write_string(node->value);
4220 				}
4221 
4222 				writer.write('>');
4223 				break;
4224 
4225 			default:
4226 				assert(false && "Invalid node type"); // unreachable
4227 		}
4228 	}
4229 
4230 	enum indent_flags_t
4231 	{
4232 		indent_newline = 1,
4233 		indent_indent = 2
4234 	};
4235 
node_output(xml_buffered_writer & writer,xml_node_struct * root,const char_t * indent,unsigned int flags,unsigned int depth)4236 	PUGI__FN void node_output(xml_buffered_writer& writer, xml_node_struct* root, const char_t* indent, unsigned int flags, unsigned int depth)
4237 	{
4238 		size_t indent_length = ((flags & (format_indent | format_indent_attributes)) && (flags & format_raw) == 0) ? strlength(indent) : 0;
4239 		unsigned int indent_flags = indent_indent;
4240 
4241 		xml_node_struct* node = root;
4242 
4243 		do
4244 		{
4245 			assert(node);
4246 
4247 			// begin writing current node
4248 			if (PUGI__NODETYPE(node) == node_pcdata || PUGI__NODETYPE(node) == node_cdata)
4249 			{
4250 				node_output_simple(writer, node, flags);
4251 
4252 				indent_flags = 0;
4253 			}
4254 			else
4255 			{
4256 				if ((indent_flags & indent_newline) && (flags & format_raw) == 0)
4257 					writer.write('\n');
4258 
4259 				if ((indent_flags & indent_indent) && indent_length)
4260 					text_output_indent(writer, indent, indent_length, depth);
4261 
4262 				if (PUGI__NODETYPE(node) == node_element)
4263 				{
4264 					indent_flags = indent_newline | indent_indent;
4265 
4266 					if (node_output_start(writer, node, indent, indent_length, flags, depth))
4267 					{
4268 						// element nodes can have value if parse_embed_pcdata was used
4269 						if (node->value)
4270 							indent_flags = 0;
4271 
4272 						node = node->first_child;
4273 						depth++;
4274 						continue;
4275 					}
4276 				}
4277 				else if (PUGI__NODETYPE(node) == node_document)
4278 				{
4279 					indent_flags = indent_indent;
4280 
4281 					if (node->first_child)
4282 					{
4283 						node = node->first_child;
4284 						continue;
4285 					}
4286 				}
4287 				else
4288 				{
4289 					node_output_simple(writer, node, flags);
4290 
4291 					indent_flags = indent_newline | indent_indent;
4292 				}
4293 			}
4294 
4295 			// continue to the next node
4296 			while (node != root)
4297 			{
4298 				if (node->next_sibling)
4299 				{
4300 					node = node->next_sibling;
4301 					break;
4302 				}
4303 
4304 				node = node->parent;
4305 
4306 				// write closing node
4307 				if (PUGI__NODETYPE(node) == node_element)
4308 				{
4309 					depth--;
4310 
4311 					if ((indent_flags & indent_newline) && (flags & format_raw) == 0)
4312 						writer.write('\n');
4313 
4314 					if ((indent_flags & indent_indent) && indent_length)
4315 						text_output_indent(writer, indent, indent_length, depth);
4316 
4317 					node_output_end(writer, node);
4318 
4319 					indent_flags = indent_newline | indent_indent;
4320 				}
4321 			}
4322 		}
4323 		while (node != root);
4324 
4325 		if ((indent_flags & indent_newline) && (flags & format_raw) == 0)
4326 			writer.write('\n');
4327 	}
4328 
has_declaration(xml_node_struct * node)4329 	PUGI__FN bool has_declaration(xml_node_struct* node)
4330 	{
4331 		for (xml_node_struct* child = node->first_child; child; child = child->next_sibling)
4332 		{
4333 			xml_node_type type = PUGI__NODETYPE(child);
4334 
4335 			if (type == node_declaration) return true;
4336 			if (type == node_element) return false;
4337 		}
4338 
4339 		return false;
4340 	}
4341 
is_attribute_of(xml_attribute_struct * attr,xml_node_struct * node)4342 	PUGI__FN bool is_attribute_of(xml_attribute_struct* attr, xml_node_struct* node)
4343 	{
4344 		for (xml_attribute_struct* a = node->first_attribute; a; a = a->next_attribute)
4345 			if (a == attr)
4346 				return true;
4347 
4348 		return false;
4349 	}
4350 
allow_insert_attribute(xml_node_type parent)4351 	PUGI__FN bool allow_insert_attribute(xml_node_type parent)
4352 	{
4353 		return parent == node_element || parent == node_declaration;
4354 	}
4355 
allow_insert_child(xml_node_type parent,xml_node_type child)4356 	PUGI__FN bool allow_insert_child(xml_node_type parent, xml_node_type child)
4357 	{
4358 		if (parent != node_document && parent != node_element) return false;
4359 		if (child == node_document || child == node_null) return false;
4360 		if (parent != node_document && (child == node_declaration || child == node_doctype)) return false;
4361 
4362 		return true;
4363 	}
4364 
allow_move(xml_node parent,xml_node child)4365 	PUGI__FN bool allow_move(xml_node parent, xml_node child)
4366 	{
4367 		// check that child can be a child of parent
4368 		if (!allow_insert_child(parent.type(), child.type()))
4369 			return false;
4370 
4371 		// check that node is not moved between documents
4372 		if (parent.root() != child.root())
4373 			return false;
4374 
4375 		// check that new parent is not in the child subtree
4376 		xml_node cur = parent;
4377 
4378 		while (cur)
4379 		{
4380 			if (cur == child)
4381 				return false;
4382 
4383 			cur = cur.parent();
4384 		}
4385 
4386 		return true;
4387 	}
4388 
4389 	template <typename String, typename Header>
node_copy_string(String & dest,Header & header,uintptr_t header_mask,char_t * source,Header & source_header,xml_allocator * alloc)4390 	PUGI__FN void node_copy_string(String& dest, Header& header, uintptr_t header_mask, char_t* source, Header& source_header, xml_allocator* alloc)
4391 	{
4392 		assert(!dest && (header & header_mask) == 0);
4393 
4394 		if (source)
4395 		{
4396 			if (alloc && (source_header & header_mask) == 0)
4397 			{
4398 				dest = source;
4399 
4400 				// since strcpy_insitu can reuse document buffer memory we need to mark both source and dest as shared
4401 				header |= xml_memory_page_contents_shared_mask;
4402 				source_header |= xml_memory_page_contents_shared_mask;
4403 			}
4404 			else
4405 				strcpy_insitu(dest, header, header_mask, source, strlength(source));
4406 		}
4407 	}
4408 
node_copy_contents(xml_node_struct * dn,xml_node_struct * sn,xml_allocator * shared_alloc)4409 	PUGI__FN void node_copy_contents(xml_node_struct* dn, xml_node_struct* sn, xml_allocator* shared_alloc)
4410 	{
4411 		node_copy_string(dn->name, dn->header, xml_memory_page_name_allocated_mask, sn->name, sn->header, shared_alloc);
4412 		node_copy_string(dn->value, dn->header, xml_memory_page_value_allocated_mask, sn->value, sn->header, shared_alloc);
4413 
4414 		for (xml_attribute_struct* sa = sn->first_attribute; sa; sa = sa->next_attribute)
4415 		{
4416 			xml_attribute_struct* da = append_new_attribute(dn, get_allocator(dn));
4417 
4418 			if (da)
4419 			{
4420 				node_copy_string(da->name, da->header, xml_memory_page_name_allocated_mask, sa->name, sa->header, shared_alloc);
4421 				node_copy_string(da->value, da->header, xml_memory_page_value_allocated_mask, sa->value, sa->header, shared_alloc);
4422 			}
4423 		}
4424 	}
4425 
node_copy_tree(xml_node_struct * dn,xml_node_struct * sn)4426 	PUGI__FN void node_copy_tree(xml_node_struct* dn, xml_node_struct* sn)
4427 	{
4428 		xml_allocator& alloc = get_allocator(dn);
4429 		xml_allocator* shared_alloc = (&alloc == &get_allocator(sn)) ? &alloc : 0;
4430 
4431 		node_copy_contents(dn, sn, shared_alloc);
4432 
4433 		xml_node_struct* dit = dn;
4434 		xml_node_struct* sit = sn->first_child;
4435 
4436 		while (sit && sit != sn)
4437 		{
4438 			// loop invariant: dit is inside the subtree rooted at dn
4439 			assert(dit);
4440 
4441 			// when a tree is copied into one of the descendants, we need to skip that subtree to avoid an infinite loop
4442 			if (sit != dn)
4443 			{
4444 				xml_node_struct* copy = append_new_node(dit, alloc, PUGI__NODETYPE(sit));
4445 
4446 				if (copy)
4447 				{
4448 					node_copy_contents(copy, sit, shared_alloc);
4449 
4450 					if (sit->first_child)
4451 					{
4452 						dit = copy;
4453 						sit = sit->first_child;
4454 						continue;
4455 					}
4456 				}
4457 			}
4458 
4459 			// continue to the next node
4460 			do
4461 			{
4462 				if (sit->next_sibling)
4463 				{
4464 					sit = sit->next_sibling;
4465 					break;
4466 				}
4467 
4468 				sit = sit->parent;
4469 				dit = dit->parent;
4470 
4471 				// loop invariant: dit is inside the subtree rooted at dn while sit is inside sn
4472 				assert(sit == sn || dit);
4473 			}
4474 			while (sit != sn);
4475 		}
4476 
4477 		assert(!sit || dit == dn->parent);
4478 	}
4479 
node_copy_attribute(xml_attribute_struct * da,xml_attribute_struct * sa)4480 	PUGI__FN void node_copy_attribute(xml_attribute_struct* da, xml_attribute_struct* sa)
4481 	{
4482 		xml_allocator& alloc = get_allocator(da);
4483 		xml_allocator* shared_alloc = (&alloc == &get_allocator(sa)) ? &alloc : 0;
4484 
4485 		node_copy_string(da->name, da->header, xml_memory_page_name_allocated_mask, sa->name, sa->header, shared_alloc);
4486 		node_copy_string(da->value, da->header, xml_memory_page_value_allocated_mask, sa->value, sa->header, shared_alloc);
4487 	}
4488 
is_text_node(xml_node_struct * node)4489 	inline bool is_text_node(xml_node_struct* node)
4490 	{
4491 		xml_node_type type = PUGI__NODETYPE(node);
4492 
4493 		return type == node_pcdata || type == node_cdata;
4494 	}
4495 
4496 	// get value with conversion functions
string_to_integer(const char_t * value,U minv,U maxv)4497 	template <typename U> PUGI__FN PUGI__UNSIGNED_OVERFLOW U string_to_integer(const char_t* value, U minv, U maxv)
4498 	{
4499 		U result = 0;
4500 		const char_t* s = value;
4501 
4502 		while (PUGI__IS_CHARTYPE(*s, ct_space))
4503 			s++;
4504 
4505 		bool negative = (*s == '-');
4506 
4507 		s += (*s == '+' || *s == '-');
4508 
4509 		bool overflow = false;
4510 
4511 		if (s[0] == '0' && (s[1] | ' ') == 'x')
4512 		{
4513 			s += 2;
4514 
4515 			// since overflow detection relies on length of the sequence skip leading zeros
4516 			while (*s == '0')
4517 				s++;
4518 
4519 			const char_t* start = s;
4520 
4521 			for (;;)
4522 			{
4523 				if (static_cast<unsigned>(*s - '0') < 10)
4524 					result = result * 16 + (*s - '0');
4525 				else if (static_cast<unsigned>((*s | ' ') - 'a') < 6)
4526 					result = result * 16 + ((*s | ' ') - 'a' + 10);
4527 				else
4528 					break;
4529 
4530 				s++;
4531 			}
4532 
4533 			size_t digits = static_cast<size_t>(s - start);
4534 
4535 			overflow = digits > sizeof(U) * 2;
4536 		}
4537 		else
4538 		{
4539 			// since overflow detection relies on length of the sequence skip leading zeros
4540 			while (*s == '0')
4541 				s++;
4542 
4543 			const char_t* start = s;
4544 
4545 			for (;;)
4546 			{
4547 				if (static_cast<unsigned>(*s - '0') < 10)
4548 					result = result * 10 + (*s - '0');
4549 				else
4550 					break;
4551 
4552 				s++;
4553 			}
4554 
4555 			size_t digits = static_cast<size_t>(s - start);
4556 
4557 			PUGI__STATIC_ASSERT(sizeof(U) == 8 || sizeof(U) == 4 || sizeof(U) == 2);
4558 
4559 			const size_t max_digits10 = sizeof(U) == 8 ? 20 : sizeof(U) == 4 ? 10 : 5;
4560 			const char_t max_lead = sizeof(U) == 8 ? '1' : sizeof(U) == 4 ? '4' : '6';
4561 			const size_t high_bit = sizeof(U) * 8 - 1;
4562 
4563 			overflow = digits >= max_digits10 && !(digits == max_digits10 && (*start < max_lead || (*start == max_lead && result >> high_bit)));
4564 		}
4565 
4566 		if (negative)
4567 		{
4568 			// Workaround for crayc++ CC-3059: Expected no overflow in routine.
4569 		#ifdef _CRAYC
4570 			return (overflow || result > ~minv + 1) ? minv : ~result + 1;
4571 		#else
4572 			return (overflow || result > 0 - minv) ? minv : 0 - result;
4573 		#endif
4574 		}
4575 		else
4576 			return (overflow || result > maxv) ? maxv : result;
4577 	}
4578 
get_value_int(const char_t * value)4579 	PUGI__FN int get_value_int(const char_t* value)
4580 	{
4581 		return string_to_integer<unsigned int>(value, static_cast<unsigned int>(INT_MIN), INT_MAX);
4582 	}
4583 
get_value_uint(const char_t * value)4584 	PUGI__FN unsigned int get_value_uint(const char_t* value)
4585 	{
4586 		return string_to_integer<unsigned int>(value, 0, UINT_MAX);
4587 	}
4588 
get_value_double(const char_t * value)4589 	PUGI__FN double get_value_double(const char_t* value)
4590 	{
4591 	#ifdef PUGIXML_WCHAR_MODE
4592 		return wcstod(value, 0);
4593 	#else
4594 		return strtod(value, 0);
4595 	#endif
4596 	}
4597 
get_value_float(const char_t * value)4598 	PUGI__FN float get_value_float(const char_t* value)
4599 	{
4600 	#ifdef PUGIXML_WCHAR_MODE
4601 		return static_cast<float>(wcstod(value, 0));
4602 	#else
4603 		return static_cast<float>(strtod(value, 0));
4604 	#endif
4605 	}
4606 
get_value_bool(const char_t * value)4607 	PUGI__FN bool get_value_bool(const char_t* value)
4608 	{
4609 		// only look at first char
4610 		char_t first = *value;
4611 
4612 		// 1*, t* (true), T* (True), y* (yes), Y* (YES)
4613 		return (first == '1' || first == 't' || first == 'T' || first == 'y' || first == 'Y');
4614 	}
4615 
4616 #ifdef PUGIXML_HAS_LONG_LONG
get_value_llong(const char_t * value)4617 	PUGI__FN long long get_value_llong(const char_t* value)
4618 	{
4619 		return string_to_integer<unsigned long long>(value, static_cast<unsigned long long>(LLONG_MIN), LLONG_MAX);
4620 	}
4621 
get_value_ullong(const char_t * value)4622 	PUGI__FN unsigned long long get_value_ullong(const char_t* value)
4623 	{
4624 		return string_to_integer<unsigned long long>(value, 0, ULLONG_MAX);
4625 	}
4626 #endif
4627 
integer_to_string(char_t * begin,char_t * end,U value,bool negative)4628 	template <typename U> PUGI__FN PUGI__UNSIGNED_OVERFLOW char_t* integer_to_string(char_t* begin, char_t* end, U value, bool negative)
4629 	{
4630 		char_t* result = end - 1;
4631 		U rest = negative ? 0 - value : value;
4632 
4633 		do
4634 		{
4635 			*result-- = static_cast<char_t>('0' + (rest % 10));
4636 			rest /= 10;
4637 		}
4638 		while (rest);
4639 
4640 		assert(result >= begin);
4641 		(void)begin;
4642 
4643 		*result = '-';
4644 
4645 		return result + !negative;
4646 	}
4647 
4648 	// set value with conversion functions
4649 	template <typename String, typename Header>
set_value_ascii(String & dest,Header & header,uintptr_t header_mask,char * buf)4650 	PUGI__FN bool set_value_ascii(String& dest, Header& header, uintptr_t header_mask, char* buf)
4651 	{
4652 	#ifdef PUGIXML_WCHAR_MODE
4653 		char_t wbuf[128];
4654 		assert(strlen(buf) < sizeof(wbuf) / sizeof(wbuf[0]));
4655 
4656 		size_t offset = 0;
4657 		for (; buf[offset]; ++offset) wbuf[offset] = buf[offset];
4658 
4659 		return strcpy_insitu(dest, header, header_mask, wbuf, offset);
4660 	#else
4661 		return strcpy_insitu(dest, header, header_mask, buf, strlen(buf));
4662 	#endif
4663 	}
4664 
4665 	template <typename U, typename String, typename Header>
set_value_integer(String & dest,Header & header,uintptr_t header_mask,U value,bool negative)4666 	PUGI__FN bool set_value_integer(String& dest, Header& header, uintptr_t header_mask, U value, bool negative)
4667 	{
4668 		char_t buf[64];
4669 		char_t* end = buf + sizeof(buf) / sizeof(buf[0]);
4670 		char_t* begin = integer_to_string(buf, end, value, negative);
4671 
4672 		return strcpy_insitu(dest, header, header_mask, begin, end - begin);
4673 	}
4674 
4675 	template <typename String, typename Header>
set_value_convert(String & dest,Header & header,uintptr_t header_mask,float value,int precision)4676 	PUGI__FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, float value, int precision)
4677 	{
4678 		char buf[128];
4679 		PUGI__SNPRINTF(buf, "%.*g", precision, double(value));
4680 
4681 		return set_value_ascii(dest, header, header_mask, buf);
4682 	}
4683 
4684 	template <typename String, typename Header>
set_value_convert(String & dest,Header & header,uintptr_t header_mask,double value,int precision)4685 	PUGI__FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, double value, int precision)
4686 	{
4687 		char buf[128];
4688 		PUGI__SNPRINTF(buf, "%.*g", precision, value);
4689 
4690 		return set_value_ascii(dest, header, header_mask, buf);
4691 	}
4692 
4693 	template <typename String, typename Header>
set_value_bool(String & dest,Header & header,uintptr_t header_mask,bool value)4694 	PUGI__FN bool set_value_bool(String& dest, Header& header, uintptr_t header_mask, bool value)
4695 	{
4696 		return strcpy_insitu(dest, header, header_mask, value ? PUGIXML_TEXT("true") : PUGIXML_TEXT("false"), value ? 4 : 5);
4697 	}
4698 
load_buffer_impl(xml_document_struct * doc,xml_node_struct * root,void * contents,size_t size,unsigned int options,xml_encoding encoding,bool is_mutable,bool own,char_t ** out_buffer)4699 	PUGI__FN xml_parse_result load_buffer_impl(xml_document_struct* doc, xml_node_struct* root, void* contents, size_t size, unsigned int options, xml_encoding encoding, bool is_mutable, bool own, char_t** out_buffer)
4700 	{
4701 		// check input buffer
4702 		if (!contents && size) return make_parse_result(status_io_error);
4703 
4704 		// get actual encoding
4705 		xml_encoding buffer_encoding = impl::get_buffer_encoding(encoding, contents, size);
4706 
4707 		// get private buffer
4708 		char_t* buffer = 0;
4709 		size_t length = 0;
4710 
4711 		// coverity[var_deref_model]
4712 		if (!impl::convert_buffer(buffer, length, buffer_encoding, contents, size, is_mutable)) return impl::make_parse_result(status_out_of_memory);
4713 
4714 		// delete original buffer if we performed a conversion
4715 		if (own && buffer != contents && contents) impl::xml_memory::deallocate(contents);
4716 
4717 		// grab onto buffer if it's our buffer, user is responsible for deallocating contents himself
4718 		if (own || buffer != contents) *out_buffer = buffer;
4719 
4720 		// store buffer for offset_debug
4721 		doc->buffer = buffer;
4722 
4723 		// parse
4724 		xml_parse_result res = impl::xml_parser::parse(buffer, length, doc, root, options);
4725 
4726 		// remember encoding
4727 		res.encoding = buffer_encoding;
4728 
4729 		return res;
4730 	}
4731 
4732 	// we need to get length of entire file to load it in memory; the only (relatively) sane way to do it is via seek/tell trick
get_file_size(FILE * file,size_t & out_result)4733 	PUGI__FN xml_parse_status get_file_size(FILE* file, size_t& out_result)
4734 	{
4735 	#if defined(PUGI__MSVC_CRT_VERSION) && PUGI__MSVC_CRT_VERSION >= 1400 && !defined(_WIN32_WCE)
4736 		// there are 64-bit versions of fseek/ftell, let's use them
4737 		typedef __int64 length_type;
4738 
4739 		_fseeki64(file, 0, SEEK_END);
4740 		length_type length = _ftelli64(file);
4741 		_fseeki64(file, 0, SEEK_SET);
4742 	#elif defined(__MINGW32__) && !defined(__NO_MINGW_LFS) && (!defined(__STRICT_ANSI__) || defined(__MINGW64_VERSION_MAJOR))
4743 		// there are 64-bit versions of fseek/ftell, let's use them
4744 		typedef off64_t length_type;
4745 
4746 		fseeko64(file, 0, SEEK_END);
4747 		length_type length = ftello64(file);
4748 		fseeko64(file, 0, SEEK_SET);
4749 	#else
4750 		// if this is a 32-bit OS, long is enough; if this is a unix system, long is 64-bit, which is enough; otherwise we can't do anything anyway.
4751 		typedef long length_type;
4752 
4753 		fseek(file, 0, SEEK_END);
4754 		length_type length = ftell(file);
4755 		fseek(file, 0, SEEK_SET);
4756 	#endif
4757 
4758 		// check for I/O errors
4759 		if (length < 0) return status_io_error;
4760 
4761 		// check for overflow
4762 		size_t result = static_cast<size_t>(length);
4763 
4764 		if (static_cast<length_type>(result) != length) return status_out_of_memory;
4765 
4766 		// finalize
4767 		out_result = result;
4768 
4769 		return status_ok;
4770 	}
4771 
4772 	// This function assumes that buffer has extra sizeof(char_t) writable bytes after size
zero_terminate_buffer(void * buffer,size_t size,xml_encoding encoding)4773 	PUGI__FN size_t zero_terminate_buffer(void* buffer, size_t size, xml_encoding encoding)
4774 	{
4775 		// We only need to zero-terminate if encoding conversion does not do it for us
4776 	#ifdef PUGIXML_WCHAR_MODE
4777 		xml_encoding wchar_encoding = get_wchar_encoding();
4778 
4779 		if (encoding == wchar_encoding || need_endian_swap_utf(encoding, wchar_encoding))
4780 		{
4781 			size_t length = size / sizeof(char_t);
4782 
4783 			static_cast<char_t*>(buffer)[length] = 0;
4784 			return (length + 1) * sizeof(char_t);
4785 		}
4786 	#else
4787 		if (encoding == encoding_utf8)
4788 		{
4789 			static_cast<char*>(buffer)[size] = 0;
4790 			return size + 1;
4791 		}
4792 	#endif
4793 
4794 		return size;
4795 	}
4796 
load_file_impl(xml_document_struct * doc,FILE * file,unsigned int options,xml_encoding encoding,char_t ** out_buffer)4797 	PUGI__FN xml_parse_result load_file_impl(xml_document_struct* doc, FILE* file, unsigned int options, xml_encoding encoding, char_t** out_buffer)
4798 	{
4799 		if (!file) return make_parse_result(status_file_not_found);
4800 
4801 		// get file size (can result in I/O errors)
4802 		size_t size = 0;
4803 		xml_parse_status size_status = get_file_size(file, size);
4804 		if (size_status != status_ok) return make_parse_result(size_status);
4805 
4806 		size_t max_suffix_size = sizeof(char_t);
4807 
4808 		// allocate buffer for the whole file
4809 		char* contents = static_cast<char*>(xml_memory::allocate(size + max_suffix_size));
4810 		if (!contents) return make_parse_result(status_out_of_memory);
4811 
4812 		// read file in memory
4813 		size_t read_size = fread(contents, 1, size, file);
4814 
4815 		if (read_size != size)
4816 		{
4817 			xml_memory::deallocate(contents);
4818 			return make_parse_result(status_io_error);
4819 		}
4820 
4821 		xml_encoding real_encoding = get_buffer_encoding(encoding, contents, size);
4822 
4823 		return load_buffer_impl(doc, doc, contents, zero_terminate_buffer(contents, size, real_encoding), options, real_encoding, true, true, out_buffer);
4824 	}
4825 
close_file(FILE * file)4826 	PUGI__FN void close_file(FILE* file)
4827 	{
4828 		fclose(file);
4829 	}
4830 
4831 #ifndef PUGIXML_NO_STL
4832 	template <typename T> struct xml_stream_chunk
4833 	{
createxml_stream_chunk4834 		static xml_stream_chunk* create()
4835 		{
4836 			void* memory = xml_memory::allocate(sizeof(xml_stream_chunk));
4837 			if (!memory) return 0;
4838 
4839 			return new (memory) xml_stream_chunk();
4840 		}
4841 
destroyxml_stream_chunk4842 		static void destroy(xml_stream_chunk* chunk)
4843 		{
4844 			// free chunk chain
4845 			while (chunk)
4846 			{
4847 				xml_stream_chunk* next_ = chunk->next;
4848 
4849 				xml_memory::deallocate(chunk);
4850 
4851 				chunk = next_;
4852 			}
4853 		}
4854 
xml_stream_chunkxml_stream_chunk4855 		xml_stream_chunk(): next(0), size(0)
4856 		{
4857 		}
4858 
4859 		xml_stream_chunk* next;
4860 		size_t size;
4861 
4862 		T data[xml_memory_page_size / sizeof(T)];
4863 	};
4864 
load_stream_data_noseek(std::basic_istream<T> & stream,void ** out_buffer,size_t * out_size)4865 	template <typename T> PUGI__FN xml_parse_status load_stream_data_noseek(std::basic_istream<T>& stream, void** out_buffer, size_t* out_size)
4866 	{
4867 		auto_deleter<xml_stream_chunk<T> > chunks(0, xml_stream_chunk<T>::destroy);
4868 
4869 		// read file to a chunk list
4870 		size_t total = 0;
4871 		xml_stream_chunk<T>* last = 0;
4872 
4873 		while (!stream.eof())
4874 		{
4875 			// allocate new chunk
4876 			xml_stream_chunk<T>* chunk = xml_stream_chunk<T>::create();
4877 			if (!chunk) return status_out_of_memory;
4878 
4879 			// append chunk to list
4880 			if (last) last = last->next = chunk;
4881 			else chunks.data = last = chunk;
4882 
4883 			// read data to chunk
4884 			stream.read(chunk->data, static_cast<std::streamsize>(sizeof(chunk->data) / sizeof(T)));
4885 			chunk->size = static_cast<size_t>(stream.gcount()) * sizeof(T);
4886 
4887 			// read may set failbit | eofbit in case gcount() is less than read length, so check for other I/O errors
4888 			if (stream.bad() || (!stream.eof() && stream.fail())) return status_io_error;
4889 
4890 			// guard against huge files (chunk size is small enough to make this overflow check work)
4891 			if (total + chunk->size < total) return status_out_of_memory;
4892 			total += chunk->size;
4893 		}
4894 
4895 		size_t max_suffix_size = sizeof(char_t);
4896 
4897 		// copy chunk list to a contiguous buffer
4898 		char* buffer = static_cast<char*>(xml_memory::allocate(total + max_suffix_size));
4899 		if (!buffer) return status_out_of_memory;
4900 
4901 		char* write = buffer;
4902 
4903 		for (xml_stream_chunk<T>* chunk = chunks.data; chunk; chunk = chunk->next)
4904 		{
4905 			assert(write + chunk->size <= buffer + total);
4906 			memcpy(write, chunk->data, chunk->size);
4907 			write += chunk->size;
4908 		}
4909 
4910 		assert(write == buffer + total);
4911 
4912 		// return buffer
4913 		*out_buffer = buffer;
4914 		*out_size = total;
4915 
4916 		return status_ok;
4917 	}
4918 
load_stream_data_seek(std::basic_istream<T> & stream,void ** out_buffer,size_t * out_size)4919 	template <typename T> PUGI__FN xml_parse_status load_stream_data_seek(std::basic_istream<T>& stream, void** out_buffer, size_t* out_size)
4920 	{
4921 		// get length of remaining data in stream
4922 		typename std::basic_istream<T>::pos_type pos = stream.tellg();
4923 		stream.seekg(0, std::ios::end);
4924 		std::streamoff length = stream.tellg() - pos;
4925 		stream.seekg(pos);
4926 
4927 		if (stream.fail() || pos < 0) return status_io_error;
4928 
4929 		// guard against huge files
4930 		size_t read_length = static_cast<size_t>(length);
4931 
4932 		if (static_cast<std::streamsize>(read_length) != length || length < 0) return status_out_of_memory;
4933 
4934 		size_t max_suffix_size = sizeof(char_t);
4935 
4936 		// read stream data into memory (guard against stream exceptions with buffer holder)
4937 		auto_deleter<void> buffer(xml_memory::allocate(read_length * sizeof(T) + max_suffix_size), xml_memory::deallocate);
4938 		if (!buffer.data) return status_out_of_memory;
4939 
4940 		stream.read(static_cast<T*>(buffer.data), static_cast<std::streamsize>(read_length));
4941 
4942 		// read may set failbit | eofbit in case gcount() is less than read_length (i.e. line ending conversion), so check for other I/O errors
4943 		if (stream.bad() || (!stream.eof() && stream.fail())) return status_io_error;
4944 
4945 		// return buffer
4946 		size_t actual_length = static_cast<size_t>(stream.gcount());
4947 		assert(actual_length <= read_length);
4948 
4949 		*out_buffer = buffer.release();
4950 		*out_size = actual_length * sizeof(T);
4951 
4952 		return status_ok;
4953 	}
4954 
load_stream_impl(xml_document_struct * doc,std::basic_istream<T> & stream,unsigned int options,xml_encoding encoding,char_t ** out_buffer)4955 	template <typename T> PUGI__FN xml_parse_result load_stream_impl(xml_document_struct* doc, std::basic_istream<T>& stream, unsigned int options, xml_encoding encoding, char_t** out_buffer)
4956 	{
4957 		void* buffer = 0;
4958 		size_t size = 0;
4959 		xml_parse_status status = status_ok;
4960 
4961 		// if stream has an error bit set, bail out (otherwise tellg() can fail and we'll clear error bits)
4962 		if (stream.fail()) return make_parse_result(status_io_error);
4963 
4964 		// load stream to memory (using seek-based implementation if possible, since it's faster and takes less memory)
4965 		if (stream.tellg() < 0)
4966 		{
4967 			stream.clear(); // clear error flags that could be set by a failing tellg
4968 			status = load_stream_data_noseek(stream, &buffer, &size);
4969 		}
4970 		else
4971 			status = load_stream_data_seek(stream, &buffer, &size);
4972 
4973 		if (status != status_ok) return make_parse_result(status);
4974 
4975 		xml_encoding real_encoding = get_buffer_encoding(encoding, buffer, size);
4976 
4977 		return load_buffer_impl(doc, doc, buffer, zero_terminate_buffer(buffer, size, real_encoding), options, real_encoding, true, true, out_buffer);
4978 	}
4979 #endif
4980 
4981 #if defined(PUGI__MSVC_CRT_VERSION) || defined(__BORLANDC__) || (defined(__MINGW32__) && (!defined(__STRICT_ANSI__) || defined(__MINGW64_VERSION_MAJOR)))
open_file_wide(const wchar_t * path,const wchar_t * mode)4982 	PUGI__FN FILE* open_file_wide(const wchar_t* path, const wchar_t* mode)
4983 	{
4984 #if defined(PUGI__MSVC_CRT_VERSION) && PUGI__MSVC_CRT_VERSION >= 1400
4985 		FILE* file = 0;
4986 		return _wfopen_s(&file, path, mode) == 0 ? file : 0;
4987 #else
4988 		return _wfopen(path, mode);
4989 #endif
4990 	}
4991 #else
convert_path_heap(const wchar_t * str)4992 	PUGI__FN char* convert_path_heap(const wchar_t* str)
4993 	{
4994 		assert(str);
4995 
4996 		// first pass: get length in utf8 characters
4997 		size_t length = strlength_wide(str);
4998 		size_t size = as_utf8_begin(str, length);
4999 
5000 		// allocate resulting string
5001 		char* result = static_cast<char*>(xml_memory::allocate(size + 1));
5002 		if (!result) return 0;
5003 
5004 		// second pass: convert to utf8
5005 		as_utf8_end(result, size, str, length);
5006 
5007 		// zero-terminate
5008 		result[size] = 0;
5009 
5010 		return result;
5011 	}
5012 
open_file_wide(const wchar_t * path,const wchar_t * mode)5013 	PUGI__FN FILE* open_file_wide(const wchar_t* path, const wchar_t* mode)
5014 	{
5015 		// there is no standard function to open wide paths, so our best bet is to try utf8 path
5016 		char* path_utf8 = convert_path_heap(path);
5017 		if (!path_utf8) return 0;
5018 
5019 		// convert mode to ASCII (we mirror _wfopen interface)
5020 		char mode_ascii[4] = {0};
5021 		for (size_t i = 0; mode[i]; ++i) mode_ascii[i] = static_cast<char>(mode[i]);
5022 
5023 		// try to open the utf8 path
5024 		FILE* result = fopen(path_utf8, mode_ascii);
5025 
5026 		// free dummy buffer
5027 		xml_memory::deallocate(path_utf8);
5028 
5029 		return result;
5030 	}
5031 #endif
5032 
open_file(const char * path,const char * mode)5033 	PUGI__FN FILE* open_file(const char* path, const char* mode)
5034 	{
5035 #if defined(PUGI__MSVC_CRT_VERSION) && PUGI__MSVC_CRT_VERSION >= 1400
5036 		FILE* file = 0;
5037 		return fopen_s(&file, path, mode) == 0 ? file : 0;
5038 #else
5039 		return fopen(path, mode);
5040 #endif
5041 	}
5042 
save_file_impl(const xml_document & doc,FILE * file,const char_t * indent,unsigned int flags,xml_encoding encoding)5043 	PUGI__FN bool save_file_impl(const xml_document& doc, FILE* file, const char_t* indent, unsigned int flags, xml_encoding encoding)
5044 	{
5045 		if (!file) return false;
5046 
5047 		xml_writer_file writer(file);
5048 		doc.save(writer, indent, flags, encoding);
5049 
5050 		return ferror(file) == 0;
5051 	}
5052 
5053 	struct name_null_sentry
5054 	{
5055 		xml_node_struct* node;
5056 		char_t* name;
5057 
name_null_sentryname_null_sentry5058 		name_null_sentry(xml_node_struct* node_): node(node_), name(node_->name)
5059 		{
5060 			node->name = 0;
5061 		}
5062 
~name_null_sentryname_null_sentry5063 		~name_null_sentry()
5064 		{
5065 			node->name = name;
5066 		}
5067 	};
5068 PUGI__NS_END
5069 
5070 namespace pugi
5071 {
xml_writer_file(void * file_)5072 	PUGI__FN xml_writer_file::xml_writer_file(void* file_): file(file_)
5073 	{
5074 	}
5075 
write(const void * data,size_t size)5076 	PUGI__FN void xml_writer_file::write(const void* data, size_t size)
5077 	{
5078 		size_t result = fwrite(data, 1, size, static_cast<FILE*>(file));
5079 		(void)!result; // unfortunately we can't do proper error handling here
5080 	}
5081 
5082 #ifndef PUGIXML_NO_STL
xml_writer_stream(std::basic_ostream<char,std::char_traits<char>> & stream)5083 	PUGI__FN xml_writer_stream::xml_writer_stream(std::basic_ostream<char, std::char_traits<char> >& stream): narrow_stream(&stream), wide_stream(0)
5084 	{
5085 	}
5086 
xml_writer_stream(std::basic_ostream<wchar_t,std::char_traits<wchar_t>> & stream)5087 	PUGI__FN xml_writer_stream::xml_writer_stream(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream): narrow_stream(0), wide_stream(&stream)
5088 	{
5089 	}
5090 
write(const void * data,size_t size)5091 	PUGI__FN void xml_writer_stream::write(const void* data, size_t size)
5092 	{
5093 		if (narrow_stream)
5094 		{
5095 			assert(!wide_stream);
5096 			narrow_stream->write(reinterpret_cast<const char*>(data), static_cast<std::streamsize>(size));
5097 		}
5098 		else
5099 		{
5100 			assert(wide_stream);
5101 			assert(size % sizeof(wchar_t) == 0);
5102 
5103 			wide_stream->write(reinterpret_cast<const wchar_t*>(data), static_cast<std::streamsize>(size / sizeof(wchar_t)));
5104 		}
5105 	}
5106 #endif
5107 
xml_tree_walker()5108 	PUGI__FN xml_tree_walker::xml_tree_walker(): _depth(0)
5109 	{
5110 	}
5111 
~xml_tree_walker()5112 	PUGI__FN xml_tree_walker::~xml_tree_walker()
5113 	{
5114 	}
5115 
depth() const5116 	PUGI__FN int xml_tree_walker::depth() const
5117 	{
5118 		return _depth;
5119 	}
5120 
begin(xml_node &)5121 	PUGI__FN bool xml_tree_walker::begin(xml_node&)
5122 	{
5123 		return true;
5124 	}
5125 
end(xml_node &)5126 	PUGI__FN bool xml_tree_walker::end(xml_node&)
5127 	{
5128 		return true;
5129 	}
5130 
xml_attribute()5131 	PUGI__FN xml_attribute::xml_attribute(): _attr(0)
5132 	{
5133 	}
5134 
xml_attribute(xml_attribute_struct * attr)5135 	PUGI__FN xml_attribute::xml_attribute(xml_attribute_struct* attr): _attr(attr)
5136 	{
5137 	}
5138 
unspecified_bool_xml_attribute(xml_attribute ***)5139 	PUGI__FN static void unspecified_bool_xml_attribute(xml_attribute***)
5140 	{
5141 	}
5142 
operator xml_attribute::unspecified_bool_type() const5143 	PUGI__FN xml_attribute::operator xml_attribute::unspecified_bool_type() const
5144 	{
5145 		return _attr ? unspecified_bool_xml_attribute : 0;
5146 	}
5147 
operator !() const5148 	PUGI__FN bool xml_attribute::operator!() const
5149 	{
5150 		return !_attr;
5151 	}
5152 
operator ==(const xml_attribute & r) const5153 	PUGI__FN bool xml_attribute::operator==(const xml_attribute& r) const
5154 	{
5155 		return (_attr == r._attr);
5156 	}
5157 
operator !=(const xml_attribute & r) const5158 	PUGI__FN bool xml_attribute::operator!=(const xml_attribute& r) const
5159 	{
5160 		return (_attr != r._attr);
5161 	}
5162 
operator <(const xml_attribute & r) const5163 	PUGI__FN bool xml_attribute::operator<(const xml_attribute& r) const
5164 	{
5165 		return (_attr < r._attr);
5166 	}
5167 
operator >(const xml_attribute & r) const5168 	PUGI__FN bool xml_attribute::operator>(const xml_attribute& r) const
5169 	{
5170 		return (_attr > r._attr);
5171 	}
5172 
operator <=(const xml_attribute & r) const5173 	PUGI__FN bool xml_attribute::operator<=(const xml_attribute& r) const
5174 	{
5175 		return (_attr <= r._attr);
5176 	}
5177 
operator >=(const xml_attribute & r) const5178 	PUGI__FN bool xml_attribute::operator>=(const xml_attribute& r) const
5179 	{
5180 		return (_attr >= r._attr);
5181 	}
5182 
next_attribute() const5183 	PUGI__FN xml_attribute xml_attribute::next_attribute() const
5184 	{
5185 		return _attr ? xml_attribute(_attr->next_attribute) : xml_attribute();
5186 	}
5187 
previous_attribute() const5188 	PUGI__FN xml_attribute xml_attribute::previous_attribute() const
5189 	{
5190 		return _attr && _attr->prev_attribute_c->next_attribute ? xml_attribute(_attr->prev_attribute_c) : xml_attribute();
5191 	}
5192 
as_string(const char_t * def) const5193 	PUGI__FN const char_t* xml_attribute::as_string(const char_t* def) const
5194 	{
5195 		return (_attr && _attr->value) ? _attr->value + 0 : def;
5196 	}
5197 
as_int(int def) const5198 	PUGI__FN int xml_attribute::as_int(int def) const
5199 	{
5200 		return (_attr && _attr->value) ? impl::get_value_int(_attr->value) : def;
5201 	}
5202 
as_uint(unsigned int def) const5203 	PUGI__FN unsigned int xml_attribute::as_uint(unsigned int def) const
5204 	{
5205 		return (_attr && _attr->value) ? impl::get_value_uint(_attr->value) : def;
5206 	}
5207 
as_double(double def) const5208 	PUGI__FN double xml_attribute::as_double(double def) const
5209 	{
5210 		return (_attr && _attr->value) ? impl::get_value_double(_attr->value) : def;
5211 	}
5212 
as_float(float def) const5213 	PUGI__FN float xml_attribute::as_float(float def) const
5214 	{
5215 		return (_attr && _attr->value) ? impl::get_value_float(_attr->value) : def;
5216 	}
5217 
as_bool(bool def) const5218 	PUGI__FN bool xml_attribute::as_bool(bool def) const
5219 	{
5220 		return (_attr && _attr->value) ? impl::get_value_bool(_attr->value) : def;
5221 	}
5222 
5223 #ifdef PUGIXML_HAS_LONG_LONG
as_llong(long long def) const5224 	PUGI__FN long long xml_attribute::as_llong(long long def) const
5225 	{
5226 		return (_attr && _attr->value) ? impl::get_value_llong(_attr->value) : def;
5227 	}
5228 
as_ullong(unsigned long long def) const5229 	PUGI__FN unsigned long long xml_attribute::as_ullong(unsigned long long def) const
5230 	{
5231 		return (_attr && _attr->value) ? impl::get_value_ullong(_attr->value) : def;
5232 	}
5233 #endif
5234 
empty() const5235 	PUGI__FN bool xml_attribute::empty() const
5236 	{
5237 		return !_attr;
5238 	}
5239 
name() const5240 	PUGI__FN const char_t* xml_attribute::name() const
5241 	{
5242 		return (_attr && _attr->name) ? _attr->name + 0 : PUGIXML_TEXT("");
5243 	}
5244 
value() const5245 	PUGI__FN const char_t* xml_attribute::value() const
5246 	{
5247 		return (_attr && _attr->value) ? _attr->value + 0 : PUGIXML_TEXT("");
5248 	}
5249 
hash_value() const5250 	PUGI__FN size_t xml_attribute::hash_value() const
5251 	{
5252 		return static_cast<size_t>(reinterpret_cast<uintptr_t>(_attr) / sizeof(xml_attribute_struct));
5253 	}
5254 
internal_object() const5255 	PUGI__FN xml_attribute_struct* xml_attribute::internal_object() const
5256 	{
5257 		return _attr;
5258 	}
5259 
operator =(const char_t * rhs)5260 	PUGI__FN xml_attribute& xml_attribute::operator=(const char_t* rhs)
5261 	{
5262 		set_value(rhs);
5263 		return *this;
5264 	}
5265 
operator =(int rhs)5266 	PUGI__FN xml_attribute& xml_attribute::operator=(int rhs)
5267 	{
5268 		set_value(rhs);
5269 		return *this;
5270 	}
5271 
operator =(unsigned int rhs)5272 	PUGI__FN xml_attribute& xml_attribute::operator=(unsigned int rhs)
5273 	{
5274 		set_value(rhs);
5275 		return *this;
5276 	}
5277 
operator =(long rhs)5278 	PUGI__FN xml_attribute& xml_attribute::operator=(long rhs)
5279 	{
5280 		set_value(rhs);
5281 		return *this;
5282 	}
5283 
operator =(unsigned long rhs)5284 	PUGI__FN xml_attribute& xml_attribute::operator=(unsigned long rhs)
5285 	{
5286 		set_value(rhs);
5287 		return *this;
5288 	}
5289 
operator =(double rhs)5290 	PUGI__FN xml_attribute& xml_attribute::operator=(double rhs)
5291 	{
5292 		set_value(rhs);
5293 		return *this;
5294 	}
5295 
operator =(float rhs)5296 	PUGI__FN xml_attribute& xml_attribute::operator=(float rhs)
5297 	{
5298 		set_value(rhs);
5299 		return *this;
5300 	}
5301 
operator =(bool rhs)5302 	PUGI__FN xml_attribute& xml_attribute::operator=(bool rhs)
5303 	{
5304 		set_value(rhs);
5305 		return *this;
5306 	}
5307 
5308 #ifdef PUGIXML_HAS_LONG_LONG
operator =(long long rhs)5309 	PUGI__FN xml_attribute& xml_attribute::operator=(long long rhs)
5310 	{
5311 		set_value(rhs);
5312 		return *this;
5313 	}
5314 
operator =(unsigned long long rhs)5315 	PUGI__FN xml_attribute& xml_attribute::operator=(unsigned long long rhs)
5316 	{
5317 		set_value(rhs);
5318 		return *this;
5319 	}
5320 #endif
5321 
set_name(const char_t * rhs)5322 	PUGI__FN bool xml_attribute::set_name(const char_t* rhs)
5323 	{
5324 		if (!_attr) return false;
5325 
5326 		return impl::strcpy_insitu(_attr->name, _attr->header, impl::xml_memory_page_name_allocated_mask, rhs, impl::strlength(rhs));
5327 	}
5328 
set_value(const char_t * rhs)5329 	PUGI__FN bool xml_attribute::set_value(const char_t* rhs)
5330 	{
5331 		if (!_attr) return false;
5332 
5333 		return impl::strcpy_insitu(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, impl::strlength(rhs));
5334 	}
5335 
set_value(int rhs)5336 	PUGI__FN bool xml_attribute::set_value(int rhs)
5337 	{
5338 		if (!_attr) return false;
5339 
5340 		return impl::set_value_integer<unsigned int>(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0);
5341 	}
5342 
set_value(unsigned int rhs)5343 	PUGI__FN bool xml_attribute::set_value(unsigned int rhs)
5344 	{
5345 		if (!_attr) return false;
5346 
5347 		return impl::set_value_integer<unsigned int>(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, false);
5348 	}
5349 
set_value(long rhs)5350 	PUGI__FN bool xml_attribute::set_value(long rhs)
5351 	{
5352 		if (!_attr) return false;
5353 
5354 		return impl::set_value_integer<unsigned long>(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0);
5355 	}
5356 
set_value(unsigned long rhs)5357 	PUGI__FN bool xml_attribute::set_value(unsigned long rhs)
5358 	{
5359 		if (!_attr) return false;
5360 
5361 		return impl::set_value_integer<unsigned long>(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, false);
5362 	}
5363 
set_value(double rhs)5364 	PUGI__FN bool xml_attribute::set_value(double rhs)
5365 	{
5366 		if (!_attr) return false;
5367 
5368 		return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, default_double_precision);
5369 	}
5370 
set_value(double rhs,int precision)5371 	PUGI__FN bool xml_attribute::set_value(double rhs, int precision)
5372 	{
5373 		if (!_attr) return false;
5374 
5375 		return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, precision);
5376 	}
5377 
set_value(float rhs)5378 	PUGI__FN bool xml_attribute::set_value(float rhs)
5379 	{
5380 		if (!_attr) return false;
5381 
5382 		return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, default_float_precision);
5383 	}
5384 
set_value(float rhs,int precision)5385 	PUGI__FN bool xml_attribute::set_value(float rhs, int precision)
5386 	{
5387 		if (!_attr) return false;
5388 
5389 		return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, precision);
5390 	}
5391 
set_value(bool rhs)5392 	PUGI__FN bool xml_attribute::set_value(bool rhs)
5393 	{
5394 		if (!_attr) return false;
5395 
5396 		return impl::set_value_bool(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);
5397 	}
5398 
5399 #ifdef PUGIXML_HAS_LONG_LONG
set_value(long long rhs)5400 	PUGI__FN bool xml_attribute::set_value(long long rhs)
5401 	{
5402 		if (!_attr) return false;
5403 
5404 		return impl::set_value_integer<unsigned long long>(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0);
5405 	}
5406 
set_value(unsigned long long rhs)5407 	PUGI__FN bool xml_attribute::set_value(unsigned long long rhs)
5408 	{
5409 		if (!_attr) return false;
5410 
5411 		return impl::set_value_integer<unsigned long long>(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, false);
5412 	}
5413 #endif
5414 
5415 #ifdef __BORLANDC__
operator &&(const xml_attribute & lhs,bool rhs)5416 	PUGI__FN bool operator&&(const xml_attribute& lhs, bool rhs)
5417 	{
5418 		return (bool)lhs && rhs;
5419 	}
5420 
operator ||(const xml_attribute & lhs,bool rhs)5421 	PUGI__FN bool operator||(const xml_attribute& lhs, bool rhs)
5422 	{
5423 		return (bool)lhs || rhs;
5424 	}
5425 #endif
5426 
xml_node()5427 	PUGI__FN xml_node::xml_node(): _root(0)
5428 	{
5429 	}
5430 
xml_node(xml_node_struct * p)5431 	PUGI__FN xml_node::xml_node(xml_node_struct* p): _root(p)
5432 	{
5433 	}
5434 
unspecified_bool_xml_node(xml_node ***)5435 	PUGI__FN static void unspecified_bool_xml_node(xml_node***)
5436 	{
5437 	}
5438 
operator xml_node::unspecified_bool_type() const5439 	PUGI__FN xml_node::operator xml_node::unspecified_bool_type() const
5440 	{
5441 		return _root ? unspecified_bool_xml_node : 0;
5442 	}
5443 
operator !() const5444 	PUGI__FN bool xml_node::operator!() const
5445 	{
5446 		return !_root;
5447 	}
5448 
begin() const5449 	PUGI__FN xml_node::iterator xml_node::begin() const
5450 	{
5451 		return iterator(_root ? _root->first_child + 0 : 0, _root);
5452 	}
5453 
end() const5454 	PUGI__FN xml_node::iterator xml_node::end() const
5455 	{
5456 		return iterator(0, _root);
5457 	}
5458 
attributes_begin() const5459 	PUGI__FN xml_node::attribute_iterator xml_node::attributes_begin() const
5460 	{
5461 		return attribute_iterator(_root ? _root->first_attribute + 0 : 0, _root);
5462 	}
5463 
attributes_end() const5464 	PUGI__FN xml_node::attribute_iterator xml_node::attributes_end() const
5465 	{
5466 		return attribute_iterator(0, _root);
5467 	}
5468 
children() const5469 	PUGI__FN xml_object_range<xml_node_iterator> xml_node::children() const
5470 	{
5471 		return xml_object_range<xml_node_iterator>(begin(), end());
5472 	}
5473 
children(const char_t * name_) const5474 	PUGI__FN xml_object_range<xml_named_node_iterator> xml_node::children(const char_t* name_) const
5475 	{
5476 		return xml_object_range<xml_named_node_iterator>(xml_named_node_iterator(child(name_)._root, _root, name_), xml_named_node_iterator(0, _root, name_));
5477 	}
5478 
attributes() const5479 	PUGI__FN xml_object_range<xml_attribute_iterator> xml_node::attributes() const
5480 	{
5481 		return xml_object_range<xml_attribute_iterator>(attributes_begin(), attributes_end());
5482 	}
5483 
operator ==(const xml_node & r) const5484 	PUGI__FN bool xml_node::operator==(const xml_node& r) const
5485 	{
5486 		return (_root == r._root);
5487 	}
5488 
operator !=(const xml_node & r) const5489 	PUGI__FN bool xml_node::operator!=(const xml_node& r) const
5490 	{
5491 		return (_root != r._root);
5492 	}
5493 
operator <(const xml_node & r) const5494 	PUGI__FN bool xml_node::operator<(const xml_node& r) const
5495 	{
5496 		return (_root < r._root);
5497 	}
5498 
operator >(const xml_node & r) const5499 	PUGI__FN bool xml_node::operator>(const xml_node& r) const
5500 	{
5501 		return (_root > r._root);
5502 	}
5503 
operator <=(const xml_node & r) const5504 	PUGI__FN bool xml_node::operator<=(const xml_node& r) const
5505 	{
5506 		return (_root <= r._root);
5507 	}
5508 
operator >=(const xml_node & r) const5509 	PUGI__FN bool xml_node::operator>=(const xml_node& r) const
5510 	{
5511 		return (_root >= r._root);
5512 	}
5513 
empty() const5514 	PUGI__FN bool xml_node::empty() const
5515 	{
5516 		return !_root;
5517 	}
5518 
name() const5519 	PUGI__FN const char_t* xml_node::name() const
5520 	{
5521 		return (_root && _root->name) ? _root->name + 0 : PUGIXML_TEXT("");
5522 	}
5523 
type() const5524 	PUGI__FN xml_node_type xml_node::type() const
5525 	{
5526 		return _root ? PUGI__NODETYPE(_root) : node_null;
5527 	}
5528 
value() const5529 	PUGI__FN const char_t* xml_node::value() const
5530 	{
5531 		return (_root && _root->value) ? _root->value + 0 : PUGIXML_TEXT("");
5532 	}
5533 
child(const char_t * name_) const5534 	PUGI__FN xml_node xml_node::child(const char_t* name_) const
5535 	{
5536 		if (!_root) return xml_node();
5537 
5538 		for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
5539 			if (i->name && impl::strequal(name_, i->name)) return xml_node(i);
5540 
5541 		return xml_node();
5542 	}
5543 
attribute(const char_t * name_) const5544 	PUGI__FN xml_attribute xml_node::attribute(const char_t* name_) const
5545 	{
5546 		if (!_root) return xml_attribute();
5547 
5548 		for (xml_attribute_struct* i = _root->first_attribute; i; i = i->next_attribute)
5549 			if (i->name && impl::strequal(name_, i->name))
5550 				return xml_attribute(i);
5551 
5552 		return xml_attribute();
5553 	}
5554 
next_sibling(const char_t * name_) const5555 	PUGI__FN xml_node xml_node::next_sibling(const char_t* name_) const
5556 	{
5557 		if (!_root) return xml_node();
5558 
5559 		for (xml_node_struct* i = _root->next_sibling; i; i = i->next_sibling)
5560 			if (i->name && impl::strequal(name_, i->name)) return xml_node(i);
5561 
5562 		return xml_node();
5563 	}
5564 
next_sibling() const5565 	PUGI__FN xml_node xml_node::next_sibling() const
5566 	{
5567 		return _root ? xml_node(_root->next_sibling) : xml_node();
5568 	}
5569 
previous_sibling(const char_t * name_) const5570 	PUGI__FN xml_node xml_node::previous_sibling(const char_t* name_) const
5571 	{
5572 		if (!_root) return xml_node();
5573 
5574 		for (xml_node_struct* i = _root->prev_sibling_c; i->next_sibling; i = i->prev_sibling_c)
5575 			if (i->name && impl::strequal(name_, i->name)) return xml_node(i);
5576 
5577 		return xml_node();
5578 	}
5579 
attribute(const char_t * name_,xml_attribute & hint_) const5580 	PUGI__FN xml_attribute xml_node::attribute(const char_t* name_, xml_attribute& hint_) const
5581 	{
5582 		xml_attribute_struct* hint = hint_._attr;
5583 
5584 		// if hint is not an attribute of node, behavior is not defined
5585 		assert(!hint || (_root && impl::is_attribute_of(hint, _root)));
5586 
5587 		if (!_root) return xml_attribute();
5588 
5589 		// optimistically search from hint up until the end
5590 		for (xml_attribute_struct* i = hint; i; i = i->next_attribute)
5591 			if (i->name && impl::strequal(name_, i->name))
5592 			{
5593 				// update hint to maximize efficiency of searching for consecutive attributes
5594 				hint_._attr = i->next_attribute;
5595 
5596 				return xml_attribute(i);
5597 			}
5598 
5599 		// wrap around and search from the first attribute until the hint
5600 		// 'j' null pointer check is technically redundant, but it prevents a crash in case the assertion above fails
5601 		for (xml_attribute_struct* j = _root->first_attribute; j && j != hint; j = j->next_attribute)
5602 			if (j->name && impl::strequal(name_, j->name))
5603 			{
5604 				// update hint to maximize efficiency of searching for consecutive attributes
5605 				hint_._attr = j->next_attribute;
5606 
5607 				return xml_attribute(j);
5608 			}
5609 
5610 		return xml_attribute();
5611 	}
5612 
previous_sibling() const5613 	PUGI__FN xml_node xml_node::previous_sibling() const
5614 	{
5615 		if (!_root) return xml_node();
5616 
5617 		if (_root->prev_sibling_c->next_sibling) return xml_node(_root->prev_sibling_c);
5618 		else return xml_node();
5619 	}
5620 
parent() const5621 	PUGI__FN xml_node xml_node::parent() const
5622 	{
5623 		return _root ? xml_node(_root->parent) : xml_node();
5624 	}
5625 
root() const5626 	PUGI__FN xml_node xml_node::root() const
5627 	{
5628 		return _root ? xml_node(&impl::get_document(_root)) : xml_node();
5629 	}
5630 
text() const5631 	PUGI__FN xml_text xml_node::text() const
5632 	{
5633 		return xml_text(_root);
5634 	}
5635 
child_value() const5636 	PUGI__FN const char_t* xml_node::child_value() const
5637 	{
5638 		if (!_root) return PUGIXML_TEXT("");
5639 
5640 		// element nodes can have value if parse_embed_pcdata was used
5641 		if (PUGI__NODETYPE(_root) == node_element && _root->value)
5642 			return _root->value;
5643 
5644 		for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
5645 			if (impl::is_text_node(i) && i->value)
5646 				return i->value;
5647 
5648 		return PUGIXML_TEXT("");
5649 	}
5650 
child_value(const char_t * name_) const5651 	PUGI__FN const char_t* xml_node::child_value(const char_t* name_) const
5652 	{
5653 		return child(name_).child_value();
5654 	}
5655 
first_attribute() const5656 	PUGI__FN xml_attribute xml_node::first_attribute() const
5657 	{
5658 		return _root ? xml_attribute(_root->first_attribute) : xml_attribute();
5659 	}
5660 
last_attribute() const5661 	PUGI__FN xml_attribute xml_node::last_attribute() const
5662 	{
5663 		return _root && _root->first_attribute ? xml_attribute(_root->first_attribute->prev_attribute_c) : xml_attribute();
5664 	}
5665 
first_child() const5666 	PUGI__FN xml_node xml_node::first_child() const
5667 	{
5668 		return _root ? xml_node(_root->first_child) : xml_node();
5669 	}
5670 
last_child() const5671 	PUGI__FN xml_node xml_node::last_child() const
5672 	{
5673 		return _root && _root->first_child ? xml_node(_root->first_child->prev_sibling_c) : xml_node();
5674 	}
5675 
set_name(const char_t * rhs)5676 	PUGI__FN bool xml_node::set_name(const char_t* rhs)
5677 	{
5678 		xml_node_type type_ = _root ? PUGI__NODETYPE(_root) : node_null;
5679 
5680 		if (type_ != node_element && type_ != node_pi && type_ != node_declaration)
5681 			return false;
5682 
5683 		return impl::strcpy_insitu(_root->name, _root->header, impl::xml_memory_page_name_allocated_mask, rhs, impl::strlength(rhs));
5684 	}
5685 
set_value(const char_t * rhs)5686 	PUGI__FN bool xml_node::set_value(const char_t* rhs)
5687 	{
5688 		xml_node_type type_ = _root ? PUGI__NODETYPE(_root) : node_null;
5689 
5690 		if (type_ != node_pcdata && type_ != node_cdata && type_ != node_comment && type_ != node_pi && type_ != node_doctype)
5691 			return false;
5692 
5693 		return impl::strcpy_insitu(_root->value, _root->header, impl::xml_memory_page_value_allocated_mask, rhs, impl::strlength(rhs));
5694 	}
5695 
append_attribute(const char_t * name_)5696 	PUGI__FN xml_attribute xml_node::append_attribute(const char_t* name_)
5697 	{
5698 		if (!impl::allow_insert_attribute(type())) return xml_attribute();
5699 
5700 		impl::xml_allocator& alloc = impl::get_allocator(_root);
5701 		if (!alloc.reserve()) return xml_attribute();
5702 
5703 		xml_attribute a(impl::allocate_attribute(alloc));
5704 		if (!a) return xml_attribute();
5705 
5706 		impl::append_attribute(a._attr, _root);
5707 
5708 		a.set_name(name_);
5709 
5710 		return a;
5711 	}
5712 
prepend_attribute(const char_t * name_)5713 	PUGI__FN xml_attribute xml_node::prepend_attribute(const char_t* name_)
5714 	{
5715 		if (!impl::allow_insert_attribute(type())) return xml_attribute();
5716 
5717 		impl::xml_allocator& alloc = impl::get_allocator(_root);
5718 		if (!alloc.reserve()) return xml_attribute();
5719 
5720 		xml_attribute a(impl::allocate_attribute(alloc));
5721 		if (!a) return xml_attribute();
5722 
5723 		impl::prepend_attribute(a._attr, _root);
5724 
5725 		a.set_name(name_);
5726 
5727 		return a;
5728 	}
5729 
insert_attribute_after(const char_t * name_,const xml_attribute & attr)5730 	PUGI__FN xml_attribute xml_node::insert_attribute_after(const char_t* name_, const xml_attribute& attr)
5731 	{
5732 		if (!impl::allow_insert_attribute(type())) return xml_attribute();
5733 		if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute();
5734 
5735 		impl::xml_allocator& alloc = impl::get_allocator(_root);
5736 		if (!alloc.reserve()) return xml_attribute();
5737 
5738 		xml_attribute a(impl::allocate_attribute(alloc));
5739 		if (!a) return xml_attribute();
5740 
5741 		impl::insert_attribute_after(a._attr, attr._attr, _root);
5742 
5743 		a.set_name(name_);
5744 
5745 		return a;
5746 	}
5747 
insert_attribute_before(const char_t * name_,const xml_attribute & attr)5748 	PUGI__FN xml_attribute xml_node::insert_attribute_before(const char_t* name_, const xml_attribute& attr)
5749 	{
5750 		if (!impl::allow_insert_attribute(type())) return xml_attribute();
5751 		if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute();
5752 
5753 		impl::xml_allocator& alloc = impl::get_allocator(_root);
5754 		if (!alloc.reserve()) return xml_attribute();
5755 
5756 		xml_attribute a(impl::allocate_attribute(alloc));
5757 		if (!a) return xml_attribute();
5758 
5759 		impl::insert_attribute_before(a._attr, attr._attr, _root);
5760 
5761 		a.set_name(name_);
5762 
5763 		return a;
5764 	}
5765 
append_copy(const xml_attribute & proto)5766 	PUGI__FN xml_attribute xml_node::append_copy(const xml_attribute& proto)
5767 	{
5768 		if (!proto) return xml_attribute();
5769 		if (!impl::allow_insert_attribute(type())) return xml_attribute();
5770 
5771 		impl::xml_allocator& alloc = impl::get_allocator(_root);
5772 		if (!alloc.reserve()) return xml_attribute();
5773 
5774 		xml_attribute a(impl::allocate_attribute(alloc));
5775 		if (!a) return xml_attribute();
5776 
5777 		impl::append_attribute(a._attr, _root);
5778 		impl::node_copy_attribute(a._attr, proto._attr);
5779 
5780 		return a;
5781 	}
5782 
prepend_copy(const xml_attribute & proto)5783 	PUGI__FN xml_attribute xml_node::prepend_copy(const xml_attribute& proto)
5784 	{
5785 		if (!proto) return xml_attribute();
5786 		if (!impl::allow_insert_attribute(type())) return xml_attribute();
5787 
5788 		impl::xml_allocator& alloc = impl::get_allocator(_root);
5789 		if (!alloc.reserve()) return xml_attribute();
5790 
5791 		xml_attribute a(impl::allocate_attribute(alloc));
5792 		if (!a) return xml_attribute();
5793 
5794 		impl::prepend_attribute(a._attr, _root);
5795 		impl::node_copy_attribute(a._attr, proto._attr);
5796 
5797 		return a;
5798 	}
5799 
insert_copy_after(const xml_attribute & proto,const xml_attribute & attr)5800 	PUGI__FN xml_attribute xml_node::insert_copy_after(const xml_attribute& proto, const xml_attribute& attr)
5801 	{
5802 		if (!proto) return xml_attribute();
5803 		if (!impl::allow_insert_attribute(type())) return xml_attribute();
5804 		if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute();
5805 
5806 		impl::xml_allocator& alloc = impl::get_allocator(_root);
5807 		if (!alloc.reserve()) return xml_attribute();
5808 
5809 		xml_attribute a(impl::allocate_attribute(alloc));
5810 		if (!a) return xml_attribute();
5811 
5812 		impl::insert_attribute_after(a._attr, attr._attr, _root);
5813 		impl::node_copy_attribute(a._attr, proto._attr);
5814 
5815 		return a;
5816 	}
5817 
insert_copy_before(const xml_attribute & proto,const xml_attribute & attr)5818 	PUGI__FN xml_attribute xml_node::insert_copy_before(const xml_attribute& proto, const xml_attribute& attr)
5819 	{
5820 		if (!proto) return xml_attribute();
5821 		if (!impl::allow_insert_attribute(type())) return xml_attribute();
5822 		if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute();
5823 
5824 		impl::xml_allocator& alloc = impl::get_allocator(_root);
5825 		if (!alloc.reserve()) return xml_attribute();
5826 
5827 		xml_attribute a(impl::allocate_attribute(alloc));
5828 		if (!a) return xml_attribute();
5829 
5830 		impl::insert_attribute_before(a._attr, attr._attr, _root);
5831 		impl::node_copy_attribute(a._attr, proto._attr);
5832 
5833 		return a;
5834 	}
5835 
append_child(xml_node_type type_)5836 	PUGI__FN xml_node xml_node::append_child(xml_node_type type_)
5837 	{
5838 		if (!impl::allow_insert_child(type(), type_)) return xml_node();
5839 
5840 		impl::xml_allocator& alloc = impl::get_allocator(_root);
5841 		if (!alloc.reserve()) return xml_node();
5842 
5843 		xml_node n(impl::allocate_node(alloc, type_));
5844 		if (!n) return xml_node();
5845 
5846 		impl::append_node(n._root, _root);
5847 
5848 		if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml"));
5849 
5850 		return n;
5851 	}
5852 
prepend_child(xml_node_type type_)5853 	PUGI__FN xml_node xml_node::prepend_child(xml_node_type type_)
5854 	{
5855 		if (!impl::allow_insert_child(type(), type_)) return xml_node();
5856 
5857 		impl::xml_allocator& alloc = impl::get_allocator(_root);
5858 		if (!alloc.reserve()) return xml_node();
5859 
5860 		xml_node n(impl::allocate_node(alloc, type_));
5861 		if (!n) return xml_node();
5862 
5863 		impl::prepend_node(n._root, _root);
5864 
5865 		if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml"));
5866 
5867 		return n;
5868 	}
5869 
insert_child_before(xml_node_type type_,const xml_node & node)5870 	PUGI__FN xml_node xml_node::insert_child_before(xml_node_type type_, const xml_node& node)
5871 	{
5872 		if (!impl::allow_insert_child(type(), type_)) return xml_node();
5873 		if (!node._root || node._root->parent != _root) return xml_node();
5874 
5875 		impl::xml_allocator& alloc = impl::get_allocator(_root);
5876 		if (!alloc.reserve()) return xml_node();
5877 
5878 		xml_node n(impl::allocate_node(alloc, type_));
5879 		if (!n) return xml_node();
5880 
5881 		impl::insert_node_before(n._root, node._root);
5882 
5883 		if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml"));
5884 
5885 		return n;
5886 	}
5887 
insert_child_after(xml_node_type type_,const xml_node & node)5888 	PUGI__FN xml_node xml_node::insert_child_after(xml_node_type type_, const xml_node& node)
5889 	{
5890 		if (!impl::allow_insert_child(type(), type_)) return xml_node();
5891 		if (!node._root || node._root->parent != _root) return xml_node();
5892 
5893 		impl::xml_allocator& alloc = impl::get_allocator(_root);
5894 		if (!alloc.reserve()) return xml_node();
5895 
5896 		xml_node n(impl::allocate_node(alloc, type_));
5897 		if (!n) return xml_node();
5898 
5899 		impl::insert_node_after(n._root, node._root);
5900 
5901 		if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml"));
5902 
5903 		return n;
5904 	}
5905 
append_child(const char_t * name_)5906 	PUGI__FN xml_node xml_node::append_child(const char_t* name_)
5907 	{
5908 		xml_node result = append_child(node_element);
5909 
5910 		result.set_name(name_);
5911 
5912 		return result;
5913 	}
5914 
prepend_child(const char_t * name_)5915 	PUGI__FN xml_node xml_node::prepend_child(const char_t* name_)
5916 	{
5917 		xml_node result = prepend_child(node_element);
5918 
5919 		result.set_name(name_);
5920 
5921 		return result;
5922 	}
5923 
insert_child_after(const char_t * name_,const xml_node & node)5924 	PUGI__FN xml_node xml_node::insert_child_after(const char_t* name_, const xml_node& node)
5925 	{
5926 		xml_node result = insert_child_after(node_element, node);
5927 
5928 		result.set_name(name_);
5929 
5930 		return result;
5931 	}
5932 
insert_child_before(const char_t * name_,const xml_node & node)5933 	PUGI__FN xml_node xml_node::insert_child_before(const char_t* name_, const xml_node& node)
5934 	{
5935 		xml_node result = insert_child_before(node_element, node);
5936 
5937 		result.set_name(name_);
5938 
5939 		return result;
5940 	}
5941 
append_copy(const xml_node & proto)5942 	PUGI__FN xml_node xml_node::append_copy(const xml_node& proto)
5943 	{
5944 		xml_node_type type_ = proto.type();
5945 		if (!impl::allow_insert_child(type(), type_)) return xml_node();
5946 
5947 		impl::xml_allocator& alloc = impl::get_allocator(_root);
5948 		if (!alloc.reserve()) return xml_node();
5949 
5950 		xml_node n(impl::allocate_node(alloc, type_));
5951 		if (!n) return xml_node();
5952 
5953 		impl::append_node(n._root, _root);
5954 		impl::node_copy_tree(n._root, proto._root);
5955 
5956 		return n;
5957 	}
5958 
prepend_copy(const xml_node & proto)5959 	PUGI__FN xml_node xml_node::prepend_copy(const xml_node& proto)
5960 	{
5961 		xml_node_type type_ = proto.type();
5962 		if (!impl::allow_insert_child(type(), type_)) return xml_node();
5963 
5964 		impl::xml_allocator& alloc = impl::get_allocator(_root);
5965 		if (!alloc.reserve()) return xml_node();
5966 
5967 		xml_node n(impl::allocate_node(alloc, type_));
5968 		if (!n) return xml_node();
5969 
5970 		impl::prepend_node(n._root, _root);
5971 		impl::node_copy_tree(n._root, proto._root);
5972 
5973 		return n;
5974 	}
5975 
insert_copy_after(const xml_node & proto,const xml_node & node)5976 	PUGI__FN xml_node xml_node::insert_copy_after(const xml_node& proto, const xml_node& node)
5977 	{
5978 		xml_node_type type_ = proto.type();
5979 		if (!impl::allow_insert_child(type(), type_)) return xml_node();
5980 		if (!node._root || node._root->parent != _root) return xml_node();
5981 
5982 		impl::xml_allocator& alloc = impl::get_allocator(_root);
5983 		if (!alloc.reserve()) return xml_node();
5984 
5985 		xml_node n(impl::allocate_node(alloc, type_));
5986 		if (!n) return xml_node();
5987 
5988 		impl::insert_node_after(n._root, node._root);
5989 		impl::node_copy_tree(n._root, proto._root);
5990 
5991 		return n;
5992 	}
5993 
insert_copy_before(const xml_node & proto,const xml_node & node)5994 	PUGI__FN xml_node xml_node::insert_copy_before(const xml_node& proto, const xml_node& node)
5995 	{
5996 		xml_node_type type_ = proto.type();
5997 		if (!impl::allow_insert_child(type(), type_)) return xml_node();
5998 		if (!node._root || node._root->parent != _root) return xml_node();
5999 
6000 		impl::xml_allocator& alloc = impl::get_allocator(_root);
6001 		if (!alloc.reserve()) return xml_node();
6002 
6003 		xml_node n(impl::allocate_node(alloc, type_));
6004 		if (!n) return xml_node();
6005 
6006 		impl::insert_node_before(n._root, node._root);
6007 		impl::node_copy_tree(n._root, proto._root);
6008 
6009 		return n;
6010 	}
6011 
append_move(const xml_node & moved)6012 	PUGI__FN xml_node xml_node::append_move(const xml_node& moved)
6013 	{
6014 		if (!impl::allow_move(*this, moved)) return xml_node();
6015 
6016 		impl::xml_allocator& alloc = impl::get_allocator(_root);
6017 		if (!alloc.reserve()) return xml_node();
6018 
6019 		// disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers
6020 		impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask;
6021 
6022 		impl::remove_node(moved._root);
6023 		impl::append_node(moved._root, _root);
6024 
6025 		return moved;
6026 	}
6027 
prepend_move(const xml_node & moved)6028 	PUGI__FN xml_node xml_node::prepend_move(const xml_node& moved)
6029 	{
6030 		if (!impl::allow_move(*this, moved)) return xml_node();
6031 
6032 		impl::xml_allocator& alloc = impl::get_allocator(_root);
6033 		if (!alloc.reserve()) return xml_node();
6034 
6035 		// disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers
6036 		impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask;
6037 
6038 		impl::remove_node(moved._root);
6039 		impl::prepend_node(moved._root, _root);
6040 
6041 		return moved;
6042 	}
6043 
insert_move_after(const xml_node & moved,const xml_node & node)6044 	PUGI__FN xml_node xml_node::insert_move_after(const xml_node& moved, const xml_node& node)
6045 	{
6046 		if (!impl::allow_move(*this, moved)) return xml_node();
6047 		if (!node._root || node._root->parent != _root) return xml_node();
6048 		if (moved._root == node._root) return xml_node();
6049 
6050 		impl::xml_allocator& alloc = impl::get_allocator(_root);
6051 		if (!alloc.reserve()) return xml_node();
6052 
6053 		// disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers
6054 		impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask;
6055 
6056 		impl::remove_node(moved._root);
6057 		impl::insert_node_after(moved._root, node._root);
6058 
6059 		return moved;
6060 	}
6061 
insert_move_before(const xml_node & moved,const xml_node & node)6062 	PUGI__FN xml_node xml_node::insert_move_before(const xml_node& moved, const xml_node& node)
6063 	{
6064 		if (!impl::allow_move(*this, moved)) return xml_node();
6065 		if (!node._root || node._root->parent != _root) return xml_node();
6066 		if (moved._root == node._root) return xml_node();
6067 
6068 		impl::xml_allocator& alloc = impl::get_allocator(_root);
6069 		if (!alloc.reserve()) return xml_node();
6070 
6071 		// disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers
6072 		impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask;
6073 
6074 		impl::remove_node(moved._root);
6075 		impl::insert_node_before(moved._root, node._root);
6076 
6077 		return moved;
6078 	}
6079 
remove_attribute(const char_t * name_)6080 	PUGI__FN bool xml_node::remove_attribute(const char_t* name_)
6081 	{
6082 		return remove_attribute(attribute(name_));
6083 	}
6084 
remove_attribute(const xml_attribute & a)6085 	PUGI__FN bool xml_node::remove_attribute(const xml_attribute& a)
6086 	{
6087 		if (!_root || !a._attr) return false;
6088 		if (!impl::is_attribute_of(a._attr, _root)) return false;
6089 
6090 		impl::xml_allocator& alloc = impl::get_allocator(_root);
6091 		if (!alloc.reserve()) return false;
6092 
6093 		impl::remove_attribute(a._attr, _root);
6094 		impl::destroy_attribute(a._attr, alloc);
6095 
6096 		return true;
6097 	}
6098 
remove_attributes()6099 	PUGI__FN bool xml_node::remove_attributes()
6100 	{
6101 		if (!_root) return false;
6102 
6103 		impl::xml_allocator& alloc = impl::get_allocator(_root);
6104 		if (!alloc.reserve()) return false;
6105 
6106 		for (xml_attribute_struct* attr = _root->first_attribute; attr; )
6107 		{
6108 			xml_attribute_struct* next = attr->next_attribute;
6109 
6110 			impl::destroy_attribute(attr, alloc);
6111 
6112 			attr = next;
6113 		}
6114 
6115 		_root->first_attribute = 0;
6116 
6117 		return true;
6118 	}
6119 
remove_child(const char_t * name_)6120 	PUGI__FN bool xml_node::remove_child(const char_t* name_)
6121 	{
6122 		return remove_child(child(name_));
6123 	}
6124 
remove_child(const xml_node & n)6125 	PUGI__FN bool xml_node::remove_child(const xml_node& n)
6126 	{
6127 		if (!_root || !n._root || n._root->parent != _root) return false;
6128 
6129 		impl::xml_allocator& alloc = impl::get_allocator(_root);
6130 		if (!alloc.reserve()) return false;
6131 
6132 		impl::remove_node(n._root);
6133 		impl::destroy_node(n._root, alloc);
6134 
6135 		return true;
6136 	}
6137 
remove_children()6138 	PUGI__FN bool xml_node::remove_children()
6139 	{
6140 		if (!_root) return false;
6141 
6142 		impl::xml_allocator& alloc = impl::get_allocator(_root);
6143 		if (!alloc.reserve()) return false;
6144 
6145 		for (xml_node_struct* cur = _root->first_child; cur; )
6146 		{
6147 			xml_node_struct* next = cur->next_sibling;
6148 
6149 			impl::destroy_node(cur, alloc);
6150 
6151 			cur = next;
6152 		}
6153 
6154 		_root->first_child = 0;
6155 
6156 		return true;
6157 	}
6158 
append_buffer(const void * contents,size_t size,unsigned int options,xml_encoding encoding)6159 	PUGI__FN xml_parse_result xml_node::append_buffer(const void* contents, size_t size, unsigned int options, xml_encoding encoding)
6160 	{
6161 		// append_buffer is only valid for elements/documents
6162 		if (!impl::allow_insert_child(type(), node_element)) return impl::make_parse_result(status_append_invalid_root);
6163 
6164 		// get document node
6165 		impl::xml_document_struct* doc = &impl::get_document(_root);
6166 
6167 		// disable document_buffer_order optimization since in a document with multiple buffers comparing buffer pointers does not make sense
6168 		doc->header |= impl::xml_memory_page_contents_shared_mask;
6169 
6170 		// get extra buffer element (we'll store the document fragment buffer there so that we can deallocate it later)
6171 		impl::xml_memory_page* page = 0;
6172 		impl::xml_extra_buffer* extra = static_cast<impl::xml_extra_buffer*>(doc->allocate_memory(sizeof(impl::xml_extra_buffer) + sizeof(void*), page));
6173 		(void)page;
6174 
6175 		if (!extra) return impl::make_parse_result(status_out_of_memory);
6176 
6177 	#ifdef PUGIXML_COMPACT
6178 		// align the memory block to a pointer boundary; this is required for compact mode where memory allocations are only 4b aligned
6179 		// note that this requires up to sizeof(void*)-1 additional memory, which the allocation above takes into account
6180 		extra = reinterpret_cast<impl::xml_extra_buffer*>((reinterpret_cast<uintptr_t>(extra) + (sizeof(void*) - 1)) & ~(sizeof(void*) - 1));
6181 	#endif
6182 
6183 		// add extra buffer to the list
6184 		extra->buffer = 0;
6185 		extra->next = doc->extra_buffers;
6186 		doc->extra_buffers = extra;
6187 
6188 		// name of the root has to be NULL before parsing - otherwise closing node mismatches will not be detected at the top level
6189 		impl::name_null_sentry sentry(_root);
6190 
6191 		return impl::load_buffer_impl(doc, _root, const_cast<void*>(contents), size, options, encoding, false, false, &extra->buffer);
6192 	}
6193 
find_child_by_attribute(const char_t * name_,const char_t * attr_name,const char_t * attr_value) const6194 	PUGI__FN xml_node xml_node::find_child_by_attribute(const char_t* name_, const char_t* attr_name, const char_t* attr_value) const
6195 	{
6196 		if (!_root) return xml_node();
6197 
6198 		for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
6199 			if (i->name && impl::strequal(name_, i->name))
6200 			{
6201 				for (xml_attribute_struct* a = i->first_attribute; a; a = a->next_attribute)
6202 					if (a->name && impl::strequal(attr_name, a->name) && impl::strequal(attr_value, a->value ? a->value + 0 : PUGIXML_TEXT("")))
6203 						return xml_node(i);
6204 			}
6205 
6206 		return xml_node();
6207 	}
6208 
find_child_by_attribute(const char_t * attr_name,const char_t * attr_value) const6209 	PUGI__FN xml_node xml_node::find_child_by_attribute(const char_t* attr_name, const char_t* attr_value) const
6210 	{
6211 		if (!_root) return xml_node();
6212 
6213 		for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
6214 			for (xml_attribute_struct* a = i->first_attribute; a; a = a->next_attribute)
6215 				if (a->name && impl::strequal(attr_name, a->name) && impl::strequal(attr_value, a->value ? a->value + 0 : PUGIXML_TEXT("")))
6216 					return xml_node(i);
6217 
6218 		return xml_node();
6219 	}
6220 
6221 #ifndef PUGIXML_NO_STL
path(char_t delimiter) const6222 	PUGI__FN string_t xml_node::path(char_t delimiter) const
6223 	{
6224 		if (!_root) return string_t();
6225 
6226 		size_t offset = 0;
6227 
6228 		for (xml_node_struct* i = _root; i; i = i->parent)
6229 		{
6230 			offset += (i != _root);
6231 			offset += i->name ? impl::strlength(i->name) : 0;
6232 		}
6233 
6234 		string_t result;
6235 		result.resize(offset);
6236 
6237 		for (xml_node_struct* j = _root; j; j = j->parent)
6238 		{
6239 			if (j != _root)
6240 				result[--offset] = delimiter;
6241 
6242 			if (j->name)
6243 			{
6244 				size_t length = impl::strlength(j->name);
6245 
6246 				offset -= length;
6247 				memcpy(&result[offset], j->name, length * sizeof(char_t));
6248 			}
6249 		}
6250 
6251 		assert(offset == 0);
6252 
6253 		return result;
6254 	}
6255 #endif
6256 
first_element_by_path(const char_t * path_,char_t delimiter) const6257 	PUGI__FN xml_node xml_node::first_element_by_path(const char_t* path_, char_t delimiter) const
6258 	{
6259 		xml_node context = path_[0] == delimiter ? root() : *this;
6260 
6261 		if (!context._root) return xml_node();
6262 
6263 		const char_t* path_segment = path_;
6264 
6265 		while (*path_segment == delimiter) ++path_segment;
6266 
6267 		const char_t* path_segment_end = path_segment;
6268 
6269 		while (*path_segment_end && *path_segment_end != delimiter) ++path_segment_end;
6270 
6271 		if (path_segment == path_segment_end) return context;
6272 
6273 		const char_t* next_segment = path_segment_end;
6274 
6275 		while (*next_segment == delimiter) ++next_segment;
6276 
6277 		if (*path_segment == '.' && path_segment + 1 == path_segment_end)
6278 			return context.first_element_by_path(next_segment, delimiter);
6279 		else if (*path_segment == '.' && *(path_segment+1) == '.' && path_segment + 2 == path_segment_end)
6280 			return context.parent().first_element_by_path(next_segment, delimiter);
6281 		else
6282 		{
6283 			for (xml_node_struct* j = context._root->first_child; j; j = j->next_sibling)
6284 			{
6285 				if (j->name && impl::strequalrange(j->name, path_segment, static_cast<size_t>(path_segment_end - path_segment)))
6286 				{
6287 					xml_node subsearch = xml_node(j).first_element_by_path(next_segment, delimiter);
6288 
6289 					if (subsearch) return subsearch;
6290 				}
6291 			}
6292 
6293 			return xml_node();
6294 		}
6295 	}
6296 
traverse(xml_tree_walker & walker)6297 	PUGI__FN bool xml_node::traverse(xml_tree_walker& walker)
6298 	{
6299 		walker._depth = -1;
6300 
6301 		xml_node arg_begin(_root);
6302 		if (!walker.begin(arg_begin)) return false;
6303 
6304 		xml_node_struct* cur = _root ? _root->first_child + 0 : 0;
6305 
6306 		if (cur)
6307 		{
6308 			++walker._depth;
6309 
6310 			do
6311 			{
6312 				xml_node arg_for_each(cur);
6313 				if (!walker.for_each(arg_for_each))
6314 					return false;
6315 
6316 				if (cur->first_child)
6317 				{
6318 					++walker._depth;
6319 					cur = cur->first_child;
6320 				}
6321 				else if (cur->next_sibling)
6322 					cur = cur->next_sibling;
6323 				else
6324 				{
6325 					while (!cur->next_sibling && cur != _root && cur->parent)
6326 					{
6327 						--walker._depth;
6328 						cur = cur->parent;
6329 					}
6330 
6331 					if (cur != _root)
6332 						cur = cur->next_sibling;
6333 				}
6334 			}
6335 			while (cur && cur != _root);
6336 		}
6337 
6338 		assert(walker._depth == -1);
6339 
6340 		xml_node arg_end(_root);
6341 		return walker.end(arg_end);
6342 	}
6343 
hash_value() const6344 	PUGI__FN size_t xml_node::hash_value() const
6345 	{
6346 		return static_cast<size_t>(reinterpret_cast<uintptr_t>(_root) / sizeof(xml_node_struct));
6347 	}
6348 
internal_object() const6349 	PUGI__FN xml_node_struct* xml_node::internal_object() const
6350 	{
6351 		return _root;
6352 	}
6353 
print(xml_writer & writer,const char_t * indent,unsigned int flags,xml_encoding encoding,unsigned int depth) const6354 	PUGI__FN void xml_node::print(xml_writer& writer, const char_t* indent, unsigned int flags, xml_encoding encoding, unsigned int depth) const
6355 	{
6356 		if (!_root) return;
6357 
6358 		impl::xml_buffered_writer buffered_writer(writer, encoding);
6359 
6360 		impl::node_output(buffered_writer, _root, indent, flags, depth);
6361 
6362 		buffered_writer.flush();
6363 	}
6364 
6365 #ifndef PUGIXML_NO_STL
print(std::basic_ostream<char,std::char_traits<char>> & stream,const char_t * indent,unsigned int flags,xml_encoding encoding,unsigned int depth) const6366 	PUGI__FN void xml_node::print(std::basic_ostream<char, std::char_traits<char> >& stream, const char_t* indent, unsigned int flags, xml_encoding encoding, unsigned int depth) const
6367 	{
6368 		xml_writer_stream writer(stream);
6369 
6370 		print(writer, indent, flags, encoding, depth);
6371 	}
6372 
print(std::basic_ostream<wchar_t,std::char_traits<wchar_t>> & stream,const char_t * indent,unsigned int flags,unsigned int depth) const6373 	PUGI__FN void xml_node::print(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream, const char_t* indent, unsigned int flags, unsigned int depth) const
6374 	{
6375 		xml_writer_stream writer(stream);
6376 
6377 		print(writer, indent, flags, encoding_wchar, depth);
6378 	}
6379 #endif
6380 
offset_debug() const6381 	PUGI__FN ptrdiff_t xml_node::offset_debug() const
6382 	{
6383 		if (!_root) return -1;
6384 
6385 		impl::xml_document_struct& doc = impl::get_document(_root);
6386 
6387 		// we can determine the offset reliably only if there is exactly once parse buffer
6388 		if (!doc.buffer || doc.extra_buffers) return -1;
6389 
6390 		switch (type())
6391 		{
6392 		case node_document:
6393 			return 0;
6394 
6395 		case node_element:
6396 		case node_declaration:
6397 		case node_pi:
6398 			return _root->name && (_root->header & impl::xml_memory_page_name_allocated_or_shared_mask) == 0 ? _root->name - doc.buffer : -1;
6399 
6400 		case node_pcdata:
6401 		case node_cdata:
6402 		case node_comment:
6403 		case node_doctype:
6404 			return _root->value && (_root->header & impl::xml_memory_page_value_allocated_or_shared_mask) == 0 ? _root->value - doc.buffer : -1;
6405 
6406 		default:
6407 			assert(false && "Invalid node type"); // unreachable
6408 			return -1;
6409 		}
6410 	}
6411 
6412 #ifdef __BORLANDC__
operator &&(const xml_node & lhs,bool rhs)6413 	PUGI__FN bool operator&&(const xml_node& lhs, bool rhs)
6414 	{
6415 		return (bool)lhs && rhs;
6416 	}
6417 
operator ||(const xml_node & lhs,bool rhs)6418 	PUGI__FN bool operator||(const xml_node& lhs, bool rhs)
6419 	{
6420 		return (bool)lhs || rhs;
6421 	}
6422 #endif
6423 
xml_text(xml_node_struct * root)6424 	PUGI__FN xml_text::xml_text(xml_node_struct* root): _root(root)
6425 	{
6426 	}
6427 
_data() const6428 	PUGI__FN xml_node_struct* xml_text::_data() const
6429 	{
6430 		if (!_root || impl::is_text_node(_root)) return _root;
6431 
6432 		// element nodes can have value if parse_embed_pcdata was used
6433 		if (PUGI__NODETYPE(_root) == node_element && _root->value)
6434 			return _root;
6435 
6436 		for (xml_node_struct* node = _root->first_child; node; node = node->next_sibling)
6437 			if (impl::is_text_node(node))
6438 				return node;
6439 
6440 		return 0;
6441 	}
6442 
_data_new()6443 	PUGI__FN xml_node_struct* xml_text::_data_new()
6444 	{
6445 		xml_node_struct* d = _data();
6446 		if (d) return d;
6447 
6448 		return xml_node(_root).append_child(node_pcdata).internal_object();
6449 	}
6450 
xml_text()6451 	PUGI__FN xml_text::xml_text(): _root(0)
6452 	{
6453 	}
6454 
unspecified_bool_xml_text(xml_text ***)6455 	PUGI__FN static void unspecified_bool_xml_text(xml_text***)
6456 	{
6457 	}
6458 
operator xml_text::unspecified_bool_type() const6459 	PUGI__FN xml_text::operator xml_text::unspecified_bool_type() const
6460 	{
6461 		return _data() ? unspecified_bool_xml_text : 0;
6462 	}
6463 
operator !() const6464 	PUGI__FN bool xml_text::operator!() const
6465 	{
6466 		return !_data();
6467 	}
6468 
empty() const6469 	PUGI__FN bool xml_text::empty() const
6470 	{
6471 		return _data() == 0;
6472 	}
6473 
get() const6474 	PUGI__FN const char_t* xml_text::get() const
6475 	{
6476 		xml_node_struct* d = _data();
6477 
6478 		return (d && d->value) ? d->value + 0 : PUGIXML_TEXT("");
6479 	}
6480 
as_string(const char_t * def) const6481 	PUGI__FN const char_t* xml_text::as_string(const char_t* def) const
6482 	{
6483 		xml_node_struct* d = _data();
6484 
6485 		return (d && d->value) ? d->value + 0 : def;
6486 	}
6487 
as_int(int def) const6488 	PUGI__FN int xml_text::as_int(int def) const
6489 	{
6490 		xml_node_struct* d = _data();
6491 
6492 		return (d && d->value) ? impl::get_value_int(d->value) : def;
6493 	}
6494 
as_uint(unsigned int def) const6495 	PUGI__FN unsigned int xml_text::as_uint(unsigned int def) const
6496 	{
6497 		xml_node_struct* d = _data();
6498 
6499 		return (d && d->value) ? impl::get_value_uint(d->value) : def;
6500 	}
6501 
as_double(double def) const6502 	PUGI__FN double xml_text::as_double(double def) const
6503 	{
6504 		xml_node_struct* d = _data();
6505 
6506 		return (d && d->value) ? impl::get_value_double(d->value) : def;
6507 	}
6508 
as_float(float def) const6509 	PUGI__FN float xml_text::as_float(float def) const
6510 	{
6511 		xml_node_struct* d = _data();
6512 
6513 		return (d && d->value) ? impl::get_value_float(d->value) : def;
6514 	}
6515 
as_bool(bool def) const6516 	PUGI__FN bool xml_text::as_bool(bool def) const
6517 	{
6518 		xml_node_struct* d = _data();
6519 
6520 		return (d && d->value) ? impl::get_value_bool(d->value) : def;
6521 	}
6522 
6523 #ifdef PUGIXML_HAS_LONG_LONG
as_llong(long long def) const6524 	PUGI__FN long long xml_text::as_llong(long long def) const
6525 	{
6526 		xml_node_struct* d = _data();
6527 
6528 		return (d && d->value) ? impl::get_value_llong(d->value) : def;
6529 	}
6530 
as_ullong(unsigned long long def) const6531 	PUGI__FN unsigned long long xml_text::as_ullong(unsigned long long def) const
6532 	{
6533 		xml_node_struct* d = _data();
6534 
6535 		return (d && d->value) ? impl::get_value_ullong(d->value) : def;
6536 	}
6537 #endif
6538 
set(const char_t * rhs)6539 	PUGI__FN bool xml_text::set(const char_t* rhs)
6540 	{
6541 		xml_node_struct* dn = _data_new();
6542 
6543 		return dn ? impl::strcpy_insitu(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, impl::strlength(rhs)) : false;
6544 	}
6545 
set(int rhs)6546 	PUGI__FN bool xml_text::set(int rhs)
6547 	{
6548 		xml_node_struct* dn = _data_new();
6549 
6550 		return dn ? impl::set_value_integer<unsigned int>(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0) : false;
6551 	}
6552 
set(unsigned int rhs)6553 	PUGI__FN bool xml_text::set(unsigned int rhs)
6554 	{
6555 		xml_node_struct* dn = _data_new();
6556 
6557 		return dn ? impl::set_value_integer<unsigned int>(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, false) : false;
6558 	}
6559 
set(long rhs)6560 	PUGI__FN bool xml_text::set(long rhs)
6561 	{
6562 		xml_node_struct* dn = _data_new();
6563 
6564 		return dn ? impl::set_value_integer<unsigned long>(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0) : false;
6565 	}
6566 
set(unsigned long rhs)6567 	PUGI__FN bool xml_text::set(unsigned long rhs)
6568 	{
6569 		xml_node_struct* dn = _data_new();
6570 
6571 		return dn ? impl::set_value_integer<unsigned long>(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, false) : false;
6572 	}
6573 
set(float rhs)6574 	PUGI__FN bool xml_text::set(float rhs)
6575 	{
6576 		xml_node_struct* dn = _data_new();
6577 
6578 		return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, default_float_precision) : false;
6579 	}
6580 
set(float rhs,int precision)6581 	PUGI__FN bool xml_text::set(float rhs, int precision)
6582 	{
6583 		xml_node_struct* dn = _data_new();
6584 
6585 		return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, precision) : false;
6586 	}
6587 
set(double rhs)6588 	PUGI__FN bool xml_text::set(double rhs)
6589 	{
6590 		xml_node_struct* dn = _data_new();
6591 
6592 		return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, default_double_precision) : false;
6593 	}
6594 
set(double rhs,int precision)6595 	PUGI__FN bool xml_text::set(double rhs, int precision)
6596 	{
6597 		xml_node_struct* dn = _data_new();
6598 
6599 		return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, precision) : false;
6600 	}
6601 
set(bool rhs)6602 	PUGI__FN bool xml_text::set(bool rhs)
6603 	{
6604 		xml_node_struct* dn = _data_new();
6605 
6606 		return dn ? impl::set_value_bool(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false;
6607 	}
6608 
6609 #ifdef PUGIXML_HAS_LONG_LONG
set(long long rhs)6610 	PUGI__FN bool xml_text::set(long long rhs)
6611 	{
6612 		xml_node_struct* dn = _data_new();
6613 
6614 		return dn ? impl::set_value_integer<unsigned long long>(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0) : false;
6615 	}
6616 
set(unsigned long long rhs)6617 	PUGI__FN bool xml_text::set(unsigned long long rhs)
6618 	{
6619 		xml_node_struct* dn = _data_new();
6620 
6621 		return dn ? impl::set_value_integer<unsigned long long>(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, false) : false;
6622 	}
6623 #endif
6624 
operator =(const char_t * rhs)6625 	PUGI__FN xml_text& xml_text::operator=(const char_t* rhs)
6626 	{
6627 		set(rhs);
6628 		return *this;
6629 	}
6630 
operator =(int rhs)6631 	PUGI__FN xml_text& xml_text::operator=(int rhs)
6632 	{
6633 		set(rhs);
6634 		return *this;
6635 	}
6636 
operator =(unsigned int rhs)6637 	PUGI__FN xml_text& xml_text::operator=(unsigned int rhs)
6638 	{
6639 		set(rhs);
6640 		return *this;
6641 	}
6642 
operator =(long rhs)6643 	PUGI__FN xml_text& xml_text::operator=(long rhs)
6644 	{
6645 		set(rhs);
6646 		return *this;
6647 	}
6648 
operator =(unsigned long rhs)6649 	PUGI__FN xml_text& xml_text::operator=(unsigned long rhs)
6650 	{
6651 		set(rhs);
6652 		return *this;
6653 	}
6654 
operator =(double rhs)6655 	PUGI__FN xml_text& xml_text::operator=(double rhs)
6656 	{
6657 		set(rhs);
6658 		return *this;
6659 	}
6660 
operator =(float rhs)6661 	PUGI__FN xml_text& xml_text::operator=(float rhs)
6662 	{
6663 		set(rhs);
6664 		return *this;
6665 	}
6666 
operator =(bool rhs)6667 	PUGI__FN xml_text& xml_text::operator=(bool rhs)
6668 	{
6669 		set(rhs);
6670 		return *this;
6671 	}
6672 
6673 #ifdef PUGIXML_HAS_LONG_LONG
operator =(long long rhs)6674 	PUGI__FN xml_text& xml_text::operator=(long long rhs)
6675 	{
6676 		set(rhs);
6677 		return *this;
6678 	}
6679 
operator =(unsigned long long rhs)6680 	PUGI__FN xml_text& xml_text::operator=(unsigned long long rhs)
6681 	{
6682 		set(rhs);
6683 		return *this;
6684 	}
6685 #endif
6686 
data() const6687 	PUGI__FN xml_node xml_text::data() const
6688 	{
6689 		return xml_node(_data());
6690 	}
6691 
6692 #ifdef __BORLANDC__
operator &&(const xml_text & lhs,bool rhs)6693 	PUGI__FN bool operator&&(const xml_text& lhs, bool rhs)
6694 	{
6695 		return (bool)lhs && rhs;
6696 	}
6697 
operator ||(const xml_text & lhs,bool rhs)6698 	PUGI__FN bool operator||(const xml_text& lhs, bool rhs)
6699 	{
6700 		return (bool)lhs || rhs;
6701 	}
6702 #endif
6703 
xml_node_iterator()6704 	PUGI__FN xml_node_iterator::xml_node_iterator()
6705 	{
6706 	}
6707 
xml_node_iterator(const xml_node & node)6708 	PUGI__FN xml_node_iterator::xml_node_iterator(const xml_node& node): _wrap(node), _parent(node.parent())
6709 	{
6710 	}
6711 
xml_node_iterator(xml_node_struct * ref,xml_node_struct * parent)6712 	PUGI__FN xml_node_iterator::xml_node_iterator(xml_node_struct* ref, xml_node_struct* parent): _wrap(ref), _parent(parent)
6713 	{
6714 	}
6715 
operator ==(const xml_node_iterator & rhs) const6716 	PUGI__FN bool xml_node_iterator::operator==(const xml_node_iterator& rhs) const
6717 	{
6718 		return _wrap._root == rhs._wrap._root && _parent._root == rhs._parent._root;
6719 	}
6720 
operator !=(const xml_node_iterator & rhs) const6721 	PUGI__FN bool xml_node_iterator::operator!=(const xml_node_iterator& rhs) const
6722 	{
6723 		return _wrap._root != rhs._wrap._root || _parent._root != rhs._parent._root;
6724 	}
6725 
operator *() const6726 	PUGI__FN xml_node& xml_node_iterator::operator*() const
6727 	{
6728 		assert(_wrap._root);
6729 		return _wrap;
6730 	}
6731 
operator ->() const6732 	PUGI__FN xml_node* xml_node_iterator::operator->() const
6733 	{
6734 		assert(_wrap._root);
6735 		return const_cast<xml_node*>(&_wrap); // BCC5 workaround
6736 	}
6737 
operator ++()6738 	PUGI__FN const xml_node_iterator& xml_node_iterator::operator++()
6739 	{
6740 		assert(_wrap._root);
6741 		_wrap._root = _wrap._root->next_sibling;
6742 		return *this;
6743 	}
6744 
operator ++(int)6745 	PUGI__FN xml_node_iterator xml_node_iterator::operator++(int)
6746 	{
6747 		xml_node_iterator temp = *this;
6748 		++*this;
6749 		return temp;
6750 	}
6751 
operator --()6752 	PUGI__FN const xml_node_iterator& xml_node_iterator::operator--()
6753 	{
6754 		_wrap = _wrap._root ? _wrap.previous_sibling() : _parent.last_child();
6755 		return *this;
6756 	}
6757 
operator --(int)6758 	PUGI__FN xml_node_iterator xml_node_iterator::operator--(int)
6759 	{
6760 		xml_node_iterator temp = *this;
6761 		--*this;
6762 		return temp;
6763 	}
6764 
xml_attribute_iterator()6765 	PUGI__FN xml_attribute_iterator::xml_attribute_iterator()
6766 	{
6767 	}
6768 
xml_attribute_iterator(const xml_attribute & attr,const xml_node & parent)6769 	PUGI__FN xml_attribute_iterator::xml_attribute_iterator(const xml_attribute& attr, const xml_node& parent): _wrap(attr), _parent(parent)
6770 	{
6771 	}
6772 
xml_attribute_iterator(xml_attribute_struct * ref,xml_node_struct * parent)6773 	PUGI__FN xml_attribute_iterator::xml_attribute_iterator(xml_attribute_struct* ref, xml_node_struct* parent): _wrap(ref), _parent(parent)
6774 	{
6775 	}
6776 
operator ==(const xml_attribute_iterator & rhs) const6777 	PUGI__FN bool xml_attribute_iterator::operator==(const xml_attribute_iterator& rhs) const
6778 	{
6779 		return _wrap._attr == rhs._wrap._attr && _parent._root == rhs._parent._root;
6780 	}
6781 
operator !=(const xml_attribute_iterator & rhs) const6782 	PUGI__FN bool xml_attribute_iterator::operator!=(const xml_attribute_iterator& rhs) const
6783 	{
6784 		return _wrap._attr != rhs._wrap._attr || _parent._root != rhs._parent._root;
6785 	}
6786 
operator *() const6787 	PUGI__FN xml_attribute& xml_attribute_iterator::operator*() const
6788 	{
6789 		assert(_wrap._attr);
6790 		return _wrap;
6791 	}
6792 
operator ->() const6793 	PUGI__FN xml_attribute* xml_attribute_iterator::operator->() const
6794 	{
6795 		assert(_wrap._attr);
6796 		return const_cast<xml_attribute*>(&_wrap); // BCC5 workaround
6797 	}
6798 
operator ++()6799 	PUGI__FN const xml_attribute_iterator& xml_attribute_iterator::operator++()
6800 	{
6801 		assert(_wrap._attr);
6802 		_wrap._attr = _wrap._attr->next_attribute;
6803 		return *this;
6804 	}
6805 
operator ++(int)6806 	PUGI__FN xml_attribute_iterator xml_attribute_iterator::operator++(int)
6807 	{
6808 		xml_attribute_iterator temp = *this;
6809 		++*this;
6810 		return temp;
6811 	}
6812 
operator --()6813 	PUGI__FN const xml_attribute_iterator& xml_attribute_iterator::operator--()
6814 	{
6815 		_wrap = _wrap._attr ? _wrap.previous_attribute() : _parent.last_attribute();
6816 		return *this;
6817 	}
6818 
operator --(int)6819 	PUGI__FN xml_attribute_iterator xml_attribute_iterator::operator--(int)
6820 	{
6821 		xml_attribute_iterator temp = *this;
6822 		--*this;
6823 		return temp;
6824 	}
6825 
xml_named_node_iterator()6826 	PUGI__FN xml_named_node_iterator::xml_named_node_iterator(): _name(0)
6827 	{
6828 	}
6829 
xml_named_node_iterator(const xml_node & node,const char_t * name)6830 	PUGI__FN xml_named_node_iterator::xml_named_node_iterator(const xml_node& node, const char_t* name): _wrap(node), _parent(node.parent()), _name(name)
6831 	{
6832 	}
6833 
xml_named_node_iterator(xml_node_struct * ref,xml_node_struct * parent,const char_t * name)6834 	PUGI__FN xml_named_node_iterator::xml_named_node_iterator(xml_node_struct* ref, xml_node_struct* parent, const char_t* name): _wrap(ref), _parent(parent), _name(name)
6835 	{
6836 	}
6837 
operator ==(const xml_named_node_iterator & rhs) const6838 	PUGI__FN bool xml_named_node_iterator::operator==(const xml_named_node_iterator& rhs) const
6839 	{
6840 		return _wrap._root == rhs._wrap._root && _parent._root == rhs._parent._root;
6841 	}
6842 
operator !=(const xml_named_node_iterator & rhs) const6843 	PUGI__FN bool xml_named_node_iterator::operator!=(const xml_named_node_iterator& rhs) const
6844 	{
6845 		return _wrap._root != rhs._wrap._root || _parent._root != rhs._parent._root;
6846 	}
6847 
operator *() const6848 	PUGI__FN xml_node& xml_named_node_iterator::operator*() const
6849 	{
6850 		assert(_wrap._root);
6851 		return _wrap;
6852 	}
6853 
operator ->() const6854 	PUGI__FN xml_node* xml_named_node_iterator::operator->() const
6855 	{
6856 		assert(_wrap._root);
6857 		return const_cast<xml_node*>(&_wrap); // BCC5 workaround
6858 	}
6859 
operator ++()6860 	PUGI__FN const xml_named_node_iterator& xml_named_node_iterator::operator++()
6861 	{
6862 		assert(_wrap._root);
6863 		_wrap = _wrap.next_sibling(_name);
6864 		return *this;
6865 	}
6866 
operator ++(int)6867 	PUGI__FN xml_named_node_iterator xml_named_node_iterator::operator++(int)
6868 	{
6869 		xml_named_node_iterator temp = *this;
6870 		++*this;
6871 		return temp;
6872 	}
6873 
operator --()6874 	PUGI__FN const xml_named_node_iterator& xml_named_node_iterator::operator--()
6875 	{
6876 		if (_wrap._root)
6877 			_wrap = _wrap.previous_sibling(_name);
6878 		else
6879 		{
6880 			_wrap = _parent.last_child();
6881 
6882 			if (!impl::strequal(_wrap.name(), _name))
6883 				_wrap = _wrap.previous_sibling(_name);
6884 		}
6885 
6886 		return *this;
6887 	}
6888 
operator --(int)6889 	PUGI__FN xml_named_node_iterator xml_named_node_iterator::operator--(int)
6890 	{
6891 		xml_named_node_iterator temp = *this;
6892 		--*this;
6893 		return temp;
6894 	}
6895 
xml_parse_result()6896 	PUGI__FN xml_parse_result::xml_parse_result(): status(status_internal_error), offset(0), encoding(encoding_auto)
6897 	{
6898 	}
6899 
operator bool() const6900 	PUGI__FN xml_parse_result::operator bool() const
6901 	{
6902 		return status == status_ok;
6903 	}
6904 
description() const6905 	PUGI__FN const char* xml_parse_result::description() const
6906 	{
6907 		switch (status)
6908 		{
6909 		case status_ok: return "No error";
6910 
6911 		case status_file_not_found: return "File was not found";
6912 		case status_io_error: return "Error reading from file/stream";
6913 		case status_out_of_memory: return "Could not allocate memory";
6914 		case status_internal_error: return "Internal error occurred";
6915 
6916 		case status_unrecognized_tag: return "Could not determine tag type";
6917 
6918 		case status_bad_pi: return "Error parsing document declaration/processing instruction";
6919 		case status_bad_comment: return "Error parsing comment";
6920 		case status_bad_cdata: return "Error parsing CDATA section";
6921 		case status_bad_doctype: return "Error parsing document type declaration";
6922 		case status_bad_pcdata: return "Error parsing PCDATA section";
6923 		case status_bad_start_element: return "Error parsing start element tag";
6924 		case status_bad_attribute: return "Error parsing element attribute";
6925 		case status_bad_end_element: return "Error parsing end element tag";
6926 		case status_end_element_mismatch: return "Start-end tags mismatch";
6927 
6928 		case status_append_invalid_root: return "Unable to append nodes: root is not an element or document";
6929 
6930 		case status_no_document_element: return "No document element found";
6931 
6932 		default: return "Unknown error";
6933 		}
6934 	}
6935 
xml_document()6936 	PUGI__FN xml_document::xml_document(): _buffer(0)
6937 	{
6938 		_create();
6939 	}
6940 
~xml_document()6941 	PUGI__FN xml_document::~xml_document()
6942 	{
6943 		_destroy();
6944 	}
6945 
6946 #ifdef PUGIXML_HAS_MOVE
xml_document(xml_document && rhs)6947 	PUGI__FN xml_document::xml_document(xml_document&& rhs) PUGIXML_NOEXCEPT_IF_NOT_COMPACT: _buffer(0)
6948 	{
6949 		_create();
6950 		_move(rhs);
6951 	}
6952 
operator =(xml_document && rhs)6953 	PUGI__FN xml_document& xml_document::operator=(xml_document&& rhs) PUGIXML_NOEXCEPT_IF_NOT_COMPACT
6954 	{
6955 		if (this == &rhs) return *this;
6956 
6957 		_destroy();
6958 		_create();
6959 		_move(rhs);
6960 
6961 		return *this;
6962 	}
6963 #endif
6964 
reset()6965 	PUGI__FN void xml_document::reset()
6966 	{
6967 		_destroy();
6968 		_create();
6969 	}
6970 
reset(const xml_document & proto)6971 	PUGI__FN void xml_document::reset(const xml_document& proto)
6972 	{
6973 		reset();
6974 
6975 		impl::node_copy_tree(_root, proto._root);
6976 	}
6977 
_create()6978 	PUGI__FN void xml_document::_create()
6979 	{
6980 		assert(!_root);
6981 
6982 	#ifdef PUGIXML_COMPACT
6983 		// space for page marker for the first page (uint32_t), rounded up to pointer size; assumes pointers are at least 32-bit
6984 		const size_t page_offset = sizeof(void*);
6985 	#else
6986 		const size_t page_offset = 0;
6987 	#endif
6988 
6989 		// initialize sentinel page
6990 		PUGI__STATIC_ASSERT(sizeof(impl::xml_memory_page) + sizeof(impl::xml_document_struct) + page_offset <= sizeof(_memory));
6991 
6992 		// prepare page structure
6993 		impl::xml_memory_page* page = impl::xml_memory_page::construct(_memory);
6994 		assert(page);
6995 
6996 		page->busy_size = impl::xml_memory_page_size;
6997 
6998 		// setup first page marker
6999 	#ifdef PUGIXML_COMPACT
7000 		// round-trip through void* to avoid 'cast increases required alignment of target type' warning
7001 		page->compact_page_marker = reinterpret_cast<uint32_t*>(static_cast<void*>(reinterpret_cast<char*>(page) + sizeof(impl::xml_memory_page)));
7002 		*page->compact_page_marker = sizeof(impl::xml_memory_page);
7003 	#endif
7004 
7005 		// allocate new root
7006 		_root = new (reinterpret_cast<char*>(page) + sizeof(impl::xml_memory_page) + page_offset) impl::xml_document_struct(page);
7007 		_root->prev_sibling_c = _root;
7008 
7009 		// setup sentinel page
7010 		page->allocator = static_cast<impl::xml_document_struct*>(_root);
7011 
7012 		// setup hash table pointer in allocator
7013 	#ifdef PUGIXML_COMPACT
7014 		page->allocator->_hash = &static_cast<impl::xml_document_struct*>(_root)->hash;
7015 	#endif
7016 
7017 		// verify the document allocation
7018 		assert(reinterpret_cast<char*>(_root) + sizeof(impl::xml_document_struct) <= _memory + sizeof(_memory));
7019 	}
7020 
_destroy()7021 	PUGI__FN void xml_document::_destroy()
7022 	{
7023 		assert(_root);
7024 
7025 		// destroy static storage
7026 		if (_buffer)
7027 		{
7028 			impl::xml_memory::deallocate(_buffer);
7029 			_buffer = 0;
7030 		}
7031 
7032 		// destroy extra buffers (note: no need to destroy linked list nodes, they're allocated using document allocator)
7033 		for (impl::xml_extra_buffer* extra = static_cast<impl::xml_document_struct*>(_root)->extra_buffers; extra; extra = extra->next)
7034 		{
7035 			if (extra->buffer) impl::xml_memory::deallocate(extra->buffer);
7036 		}
7037 
7038 		// destroy dynamic storage, leave sentinel page (it's in static memory)
7039 		impl::xml_memory_page* root_page = PUGI__GETPAGE(_root);
7040 		assert(root_page && !root_page->prev);
7041 		assert(reinterpret_cast<char*>(root_page) >= _memory && reinterpret_cast<char*>(root_page) < _memory + sizeof(_memory));
7042 
7043 		for (impl::xml_memory_page* page = root_page->next; page; )
7044 		{
7045 			impl::xml_memory_page* next = page->next;
7046 
7047 			impl::xml_allocator::deallocate_page(page);
7048 
7049 			page = next;
7050 		}
7051 
7052 	#ifdef PUGIXML_COMPACT
7053 		// destroy hash table
7054 		static_cast<impl::xml_document_struct*>(_root)->hash.clear();
7055 	#endif
7056 
7057 		_root = 0;
7058 	}
7059 
7060 #ifdef PUGIXML_HAS_MOVE
_move(xml_document & rhs)7061 	PUGI__FN void xml_document::_move(xml_document& rhs) PUGIXML_NOEXCEPT_IF_NOT_COMPACT
7062 	{
7063 		impl::xml_document_struct* doc = static_cast<impl::xml_document_struct*>(_root);
7064 		impl::xml_document_struct* other = static_cast<impl::xml_document_struct*>(rhs._root);
7065 
7066 		// save first child pointer for later; this needs hash access
7067 		xml_node_struct* other_first_child = other->first_child;
7068 
7069 	#ifdef PUGIXML_COMPACT
7070 		// reserve space for the hash table up front; this is the only operation that can fail
7071 		// if it does, we have no choice but to throw (if we have exceptions)
7072 		if (other_first_child)
7073 		{
7074 			size_t other_children = 0;
7075 			for (xml_node_struct* node = other_first_child; node; node = node->next_sibling)
7076 				other_children++;
7077 
7078 			// in compact mode, each pointer assignment could result in a hash table request
7079 			// during move, we have to relocate document first_child and parents of all children
7080 			// normally there's just one child and its parent has a pointerless encoding but
7081 			// we assume the worst here
7082 			if (!other->_hash->reserve(other_children + 1))
7083 			{
7084 			#ifdef PUGIXML_NO_EXCEPTIONS
7085 				return;
7086 			#else
7087 				throw std::bad_alloc();
7088 			#endif
7089 			}
7090 		}
7091 	#endif
7092 
7093 		// move allocation state
7094 		doc->_root = other->_root;
7095 		doc->_busy_size = other->_busy_size;
7096 
7097 		// move buffer state
7098 		doc->buffer = other->buffer;
7099 		doc->extra_buffers = other->extra_buffers;
7100 		_buffer = rhs._buffer;
7101 
7102 	#ifdef PUGIXML_COMPACT
7103 		// move compact hash; note that the hash table can have pointers to other but they will be "inactive", similarly to nodes removed with remove_child
7104 		doc->hash = other->hash;
7105 		doc->_hash = &doc->hash;
7106 
7107 		// make sure we don't access other hash up until the end when we reinitialize other document
7108 		other->_hash = 0;
7109 	#endif
7110 
7111 		// move page structure
7112 		impl::xml_memory_page* doc_page = PUGI__GETPAGE(doc);
7113 		assert(doc_page && !doc_page->prev && !doc_page->next);
7114 
7115 		impl::xml_memory_page* other_page = PUGI__GETPAGE(other);
7116 		assert(other_page && !other_page->prev);
7117 
7118 		// relink pages since root page is embedded into xml_document
7119 		if (impl::xml_memory_page* page = other_page->next)
7120 		{
7121 			assert(page->prev == other_page);
7122 
7123 			page->prev = doc_page;
7124 
7125 			doc_page->next = page;
7126 			other_page->next = 0;
7127 		}
7128 
7129 		// make sure pages point to the correct document state
7130 		for (impl::xml_memory_page* page = doc_page->next; page; page = page->next)
7131 		{
7132 			assert(page->allocator == other);
7133 
7134 			page->allocator = doc;
7135 
7136 		#ifdef PUGIXML_COMPACT
7137 			// this automatically migrates most children between documents and prevents ->parent assignment from allocating
7138 			if (page->compact_shared_parent == other)
7139 				page->compact_shared_parent = doc;
7140 		#endif
7141 		}
7142 
7143 		// move tree structure
7144 		assert(!doc->first_child);
7145 
7146 		doc->first_child = other_first_child;
7147 
7148 		for (xml_node_struct* node = other_first_child; node; node = node->next_sibling)
7149 		{
7150 		#ifdef PUGIXML_COMPACT
7151 			// most children will have migrated when we reassigned compact_shared_parent
7152 			assert(node->parent == other || node->parent == doc);
7153 
7154 			node->parent = doc;
7155 		#else
7156 			assert(node->parent == other);
7157 			node->parent = doc;
7158 		#endif
7159 		}
7160 
7161 		// reset other document
7162 		new (other) impl::xml_document_struct(PUGI__GETPAGE(other));
7163 		rhs._buffer = 0;
7164 	}
7165 #endif
7166 
7167 #ifndef PUGIXML_NO_STL
load(std::basic_istream<char,std::char_traits<char>> & stream,unsigned int options,xml_encoding encoding)7168 	PUGI__FN xml_parse_result xml_document::load(std::basic_istream<char, std::char_traits<char> >& stream, unsigned int options, xml_encoding encoding)
7169 	{
7170 		reset();
7171 
7172 		return impl::load_stream_impl(static_cast<impl::xml_document_struct*>(_root), stream, options, encoding, &_buffer);
7173 	}
7174 
load(std::basic_istream<wchar_t,std::char_traits<wchar_t>> & stream,unsigned int options)7175 	PUGI__FN xml_parse_result xml_document::load(std::basic_istream<wchar_t, std::char_traits<wchar_t> >& stream, unsigned int options)
7176 	{
7177 		reset();
7178 
7179 		return impl::load_stream_impl(static_cast<impl::xml_document_struct*>(_root), stream, options, encoding_wchar, &_buffer);
7180 	}
7181 #endif
7182 
load_string(const char_t * contents,unsigned int options)7183 	PUGI__FN xml_parse_result xml_document::load_string(const char_t* contents, unsigned int options)
7184 	{
7185 		// Force native encoding (skip autodetection)
7186 	#ifdef PUGIXML_WCHAR_MODE
7187 		xml_encoding encoding = encoding_wchar;
7188 	#else
7189 		xml_encoding encoding = encoding_utf8;
7190 	#endif
7191 
7192 		return load_buffer(contents, impl::strlength(contents) * sizeof(char_t), options, encoding);
7193 	}
7194 
load(const char_t * contents,unsigned int options)7195 	PUGI__FN xml_parse_result xml_document::load(const char_t* contents, unsigned int options)
7196 	{
7197 		return load_string(contents, options);
7198 	}
7199 
load_file(const char * path_,unsigned int options,xml_encoding encoding)7200 	PUGI__FN xml_parse_result xml_document::load_file(const char* path_, unsigned int options, xml_encoding encoding)
7201 	{
7202 		reset();
7203 
7204 		using impl::auto_deleter; // MSVC7 workaround
7205 		auto_deleter<FILE> file(impl::open_file(path_, "rb"), impl::close_file);
7206 
7207 		return impl::load_file_impl(static_cast<impl::xml_document_struct*>(_root), file.data, options, encoding, &_buffer);
7208 	}
7209 
load_file(const wchar_t * path_,unsigned int options,xml_encoding encoding)7210 	PUGI__FN xml_parse_result xml_document::load_file(const wchar_t* path_, unsigned int options, xml_encoding encoding)
7211 	{
7212 		reset();
7213 
7214 		using impl::auto_deleter; // MSVC7 workaround
7215 		auto_deleter<FILE> file(impl::open_file_wide(path_, L"rb"), impl::close_file);
7216 
7217 		return impl::load_file_impl(static_cast<impl::xml_document_struct*>(_root), file.data, options, encoding, &_buffer);
7218 	}
7219 
load_buffer(const void * contents,size_t size,unsigned int options,xml_encoding encoding)7220 	PUGI__FN xml_parse_result xml_document::load_buffer(const void* contents, size_t size, unsigned int options, xml_encoding encoding)
7221 	{
7222 		reset();
7223 
7224 		return impl::load_buffer_impl(static_cast<impl::xml_document_struct*>(_root), _root, const_cast<void*>(contents), size, options, encoding, false, false, &_buffer);
7225 	}
7226 
load_buffer_inplace(void * contents,size_t size,unsigned int options,xml_encoding encoding)7227 	PUGI__FN xml_parse_result xml_document::load_buffer_inplace(void* contents, size_t size, unsigned int options, xml_encoding encoding)
7228 	{
7229 		reset();
7230 
7231 		return impl::load_buffer_impl(static_cast<impl::xml_document_struct*>(_root), _root, contents, size, options, encoding, true, false, &_buffer);
7232 	}
7233 
load_buffer_inplace_own(void * contents,size_t size,unsigned int options,xml_encoding encoding)7234 	PUGI__FN xml_parse_result xml_document::load_buffer_inplace_own(void* contents, size_t size, unsigned int options, xml_encoding encoding)
7235 	{
7236 		reset();
7237 
7238 		return impl::load_buffer_impl(static_cast<impl::xml_document_struct*>(_root), _root, contents, size, options, encoding, true, true, &_buffer);
7239 	}
7240 
save(xml_writer & writer,const char_t * indent,unsigned int flags,xml_encoding encoding) const7241 	PUGI__FN void xml_document::save(xml_writer& writer, const char_t* indent, unsigned int flags, xml_encoding encoding) const
7242 	{
7243 		impl::xml_buffered_writer buffered_writer(writer, encoding);
7244 
7245 		if ((flags & format_write_bom) && encoding != encoding_latin1)
7246 		{
7247 			// BOM always represents the codepoint U+FEFF, so just write it in native encoding
7248 		#ifdef PUGIXML_WCHAR_MODE
7249 			unsigned int bom = 0xfeff;
7250 			buffered_writer.write(static_cast<wchar_t>(bom));
7251 		#else
7252 			buffered_writer.write('\xef', '\xbb', '\xbf');
7253 		#endif
7254 		}
7255 
7256 		if (!(flags & format_no_declaration) && !impl::has_declaration(_root))
7257 		{
7258 			buffered_writer.write_string(PUGIXML_TEXT("<?xml version=\"1.0\""));
7259 			if (encoding == encoding_latin1) buffered_writer.write_string(PUGIXML_TEXT(" encoding=\"ISO-8859-1\""));
7260 			buffered_writer.write('?', '>');
7261 			if (!(flags & format_raw)) buffered_writer.write('\n');
7262 		}
7263 
7264 		impl::node_output(buffered_writer, _root, indent, flags, 0);
7265 
7266 		buffered_writer.flush();
7267 	}
7268 
7269 #ifndef PUGIXML_NO_STL
save(std::basic_ostream<char,std::char_traits<char>> & stream,const char_t * indent,unsigned int flags,xml_encoding encoding) const7270 	PUGI__FN void xml_document::save(std::basic_ostream<char, std::char_traits<char> >& stream, const char_t* indent, unsigned int flags, xml_encoding encoding) const
7271 	{
7272 		xml_writer_stream writer(stream);
7273 
7274 		save(writer, indent, flags, encoding);
7275 	}
7276 
save(std::basic_ostream<wchar_t,std::char_traits<wchar_t>> & stream,const char_t * indent,unsigned int flags) const7277 	PUGI__FN void xml_document::save(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream, const char_t* indent, unsigned int flags) const
7278 	{
7279 		xml_writer_stream writer(stream);
7280 
7281 		save(writer, indent, flags, encoding_wchar);
7282 	}
7283 #endif
7284 
save_file(const char * path_,const char_t * indent,unsigned int flags,xml_encoding encoding) const7285 	PUGI__FN bool xml_document::save_file(const char* path_, const char_t* indent, unsigned int flags, xml_encoding encoding) const
7286 	{
7287 		using impl::auto_deleter; // MSVC7 workaround
7288 		auto_deleter<FILE> file(impl::open_file(path_, (flags & format_save_file_text) ? "w" : "wb"), impl::close_file);
7289 
7290 		return impl::save_file_impl(*this, file.data, indent, flags, encoding);
7291 	}
7292 
save_file(const wchar_t * path_,const char_t * indent,unsigned int flags,xml_encoding encoding) const7293 	PUGI__FN bool xml_document::save_file(const wchar_t* path_, const char_t* indent, unsigned int flags, xml_encoding encoding) const
7294 	{
7295 		using impl::auto_deleter; // MSVC7 workaround
7296 		auto_deleter<FILE> file(impl::open_file_wide(path_, (flags & format_save_file_text) ? L"w" : L"wb"), impl::close_file);
7297 
7298 		return impl::save_file_impl(*this, file.data, indent, flags, encoding);
7299 	}
7300 
document_element() const7301 	PUGI__FN xml_node xml_document::document_element() const
7302 	{
7303 		assert(_root);
7304 
7305 		for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
7306 			if (PUGI__NODETYPE(i) == node_element)
7307 				return xml_node(i);
7308 
7309 		return xml_node();
7310 	}
7311 
7312 #ifndef PUGIXML_NO_STL
as_utf8(const wchar_t * str)7313 	PUGI__FN std::string PUGIXML_FUNCTION as_utf8(const wchar_t* str)
7314 	{
7315 		assert(str);
7316 
7317 		return impl::as_utf8_impl(str, impl::strlength_wide(str));
7318 	}
7319 
as_utf8(const std::basic_string<wchar_t> & str)7320 	PUGI__FN std::string PUGIXML_FUNCTION as_utf8(const std::basic_string<wchar_t>& str)
7321 	{
7322 		return impl::as_utf8_impl(str.c_str(), str.size());
7323 	}
7324 
as_wide(const char * str)7325 	PUGI__FN std::basic_string<wchar_t> PUGIXML_FUNCTION as_wide(const char* str)
7326 	{
7327 		assert(str);
7328 
7329 		return impl::as_wide_impl(str, strlen(str));
7330 	}
7331 
as_wide(const std::string & str)7332 	PUGI__FN std::basic_string<wchar_t> PUGIXML_FUNCTION as_wide(const std::string& str)
7333 	{
7334 		return impl::as_wide_impl(str.c_str(), str.size());
7335 	}
7336 #endif
7337 
set_memory_management_functions(allocation_function allocate,deallocation_function deallocate)7338 	PUGI__FN void PUGIXML_FUNCTION set_memory_management_functions(allocation_function allocate, deallocation_function deallocate)
7339 	{
7340 		impl::xml_memory::allocate = allocate;
7341 		impl::xml_memory::deallocate = deallocate;
7342 	}
7343 
get_memory_allocation_function()7344 	PUGI__FN allocation_function PUGIXML_FUNCTION get_memory_allocation_function()
7345 	{
7346 		return impl::xml_memory::allocate;
7347 	}
7348 
get_memory_deallocation_function()7349 	PUGI__FN deallocation_function PUGIXML_FUNCTION get_memory_deallocation_function()
7350 	{
7351 		return impl::xml_memory::deallocate;
7352 	}
7353 }
7354 
7355 #if !defined(PUGIXML_NO_STL) && (defined(_MSC_VER) || defined(__ICC))
7356 namespace std
7357 {
7358 	// Workarounds for (non-standard) iterator category detection for older versions (MSVC7/IC8 and earlier)
_Iter_cat(const pugi::xml_node_iterator &)7359 	PUGI__FN std::bidirectional_iterator_tag _Iter_cat(const pugi::xml_node_iterator&)
7360 	{
7361 		return std::bidirectional_iterator_tag();
7362 	}
7363 
_Iter_cat(const pugi::xml_attribute_iterator &)7364 	PUGI__FN std::bidirectional_iterator_tag _Iter_cat(const pugi::xml_attribute_iterator&)
7365 	{
7366 		return std::bidirectional_iterator_tag();
7367 	}
7368 
_Iter_cat(const pugi::xml_named_node_iterator &)7369 	PUGI__FN std::bidirectional_iterator_tag _Iter_cat(const pugi::xml_named_node_iterator&)
7370 	{
7371 		return std::bidirectional_iterator_tag();
7372 	}
7373 }
7374 #endif
7375 
7376 #if !defined(PUGIXML_NO_STL) && defined(__SUNPRO_CC)
7377 namespace std
7378 {
7379 	// Workarounds for (non-standard) iterator category detection
__iterator_category(const pugi::xml_node_iterator &)7380 	PUGI__FN std::bidirectional_iterator_tag __iterator_category(const pugi::xml_node_iterator&)
7381 	{
7382 		return std::bidirectional_iterator_tag();
7383 	}
7384 
__iterator_category(const pugi::xml_attribute_iterator &)7385 	PUGI__FN std::bidirectional_iterator_tag __iterator_category(const pugi::xml_attribute_iterator&)
7386 	{
7387 		return std::bidirectional_iterator_tag();
7388 	}
7389 
__iterator_category(const pugi::xml_named_node_iterator &)7390 	PUGI__FN std::bidirectional_iterator_tag __iterator_category(const pugi::xml_named_node_iterator&)
7391 	{
7392 		return std::bidirectional_iterator_tag();
7393 	}
7394 }
7395 #endif
7396 
7397 #ifndef PUGIXML_NO_XPATH
7398 // STL replacements
7399 PUGI__NS_BEGIN
7400 	struct equal_to
7401 	{
operator ()equal_to7402 		template <typename T> bool operator()(const T& lhs, const T& rhs) const
7403 		{
7404 			return lhs == rhs;
7405 		}
7406 	};
7407 
7408 	struct not_equal_to
7409 	{
operator ()not_equal_to7410 		template <typename T> bool operator()(const T& lhs, const T& rhs) const
7411 		{
7412 			return lhs != rhs;
7413 		}
7414 	};
7415 
7416 	struct less
7417 	{
operator ()less7418 		template <typename T> bool operator()(const T& lhs, const T& rhs) const
7419 		{
7420 			return lhs < rhs;
7421 		}
7422 	};
7423 
7424 	struct less_equal
7425 	{
operator ()less_equal7426 		template <typename T> bool operator()(const T& lhs, const T& rhs) const
7427 		{
7428 			return lhs <= rhs;
7429 		}
7430 	};
7431 
swap(T & lhs,T & rhs)7432 	template <typename T> inline void swap(T& lhs, T& rhs)
7433 	{
7434 		T temp = lhs;
7435 		lhs = rhs;
7436 		rhs = temp;
7437 	}
7438 
min_element(I begin,I end,const Pred & pred)7439 	template <typename I, typename Pred> PUGI__FN I min_element(I begin, I end, const Pred& pred)
7440 	{
7441 		I result = begin;
7442 
7443 		for (I it = begin + 1; it != end; ++it)
7444 			if (pred(*it, *result))
7445 				result = it;
7446 
7447 		return result;
7448 	}
7449 
reverse(I begin,I end)7450 	template <typename I> PUGI__FN void reverse(I begin, I end)
7451 	{
7452 		while (end - begin > 1)
7453 			swap(*begin++, *--end);
7454 	}
7455 
unique(I begin,I end)7456 	template <typename I> PUGI__FN I unique(I begin, I end)
7457 	{
7458 		// fast skip head
7459 		while (end - begin > 1 && *begin != *(begin + 1))
7460 			begin++;
7461 
7462 		if (begin == end)
7463 			return begin;
7464 
7465 		// last written element
7466 		I write = begin++;
7467 
7468 		// merge unique elements
7469 		while (begin != end)
7470 		{
7471 			if (*begin != *write)
7472 				*++write = *begin++;
7473 			else
7474 				begin++;
7475 		}
7476 
7477 		// past-the-end (write points to live element)
7478 		return write + 1;
7479 	}
7480 
insertion_sort(T * begin,T * end,const Pred & pred)7481 	template <typename T, typename Pred> PUGI__FN void insertion_sort(T* begin, T* end, const Pred& pred)
7482 	{
7483 		if (begin == end)
7484 			return;
7485 
7486 		for (T* it = begin + 1; it != end; ++it)
7487 		{
7488 			T val = *it;
7489 			T* hole = it;
7490 
7491 			// move hole backwards
7492 			while (hole > begin && pred(val, *(hole - 1)))
7493 			{
7494 				*hole = *(hole - 1);
7495 				hole--;
7496 			}
7497 
7498 			// fill hole with element
7499 			*hole = val;
7500 		}
7501 	}
7502 
median3(I first,I middle,I last,const Pred & pred)7503 	template <typename I, typename Pred> inline I median3(I first, I middle, I last, const Pred& pred)
7504 	{
7505 		if (pred(*middle, *first))
7506 			swap(middle, first);
7507 		if (pred(*last, *middle))
7508 			swap(last, middle);
7509 		if (pred(*middle, *first))
7510 			swap(middle, first);
7511 
7512 		return middle;
7513 	}
7514 
partition3(T * begin,T * end,T pivot,const Pred & pred,T ** out_eqbeg,T ** out_eqend)7515 	template <typename T, typename Pred> PUGI__FN void partition3(T* begin, T* end, T pivot, const Pred& pred, T** out_eqbeg, T** out_eqend)
7516 	{
7517 		// invariant: array is split into 4 groups: = < ? > (each variable denotes the boundary between the groups)
7518 		T* eq = begin;
7519 		T* lt = begin;
7520 		T* gt = end;
7521 
7522 		while (lt < gt)
7523 		{
7524 			if (pred(*lt, pivot))
7525 				lt++;
7526 			else if (*lt == pivot)
7527 				swap(*eq++, *lt++);
7528 			else
7529 				swap(*lt, *--gt);
7530 		}
7531 
7532 		// we now have just 4 groups: = < >; move equal elements to the middle
7533 		T* eqbeg = gt;
7534 
7535 		for (T* it = begin; it != eq; ++it)
7536 			swap(*it, *--eqbeg);
7537 
7538 		*out_eqbeg = eqbeg;
7539 		*out_eqend = gt;
7540 	}
7541 
sort(I begin,I end,const Pred & pred)7542 	template <typename I, typename Pred> PUGI__FN void sort(I begin, I end, const Pred& pred)
7543 	{
7544 		// sort large chunks
7545 		while (end - begin > 16)
7546 		{
7547 			// find median element
7548 			I middle = begin + (end - begin) / 2;
7549 			I median = median3(begin, middle, end - 1, pred);
7550 
7551 			// partition in three chunks (< = >)
7552 			I eqbeg, eqend;
7553 			partition3(begin, end, *median, pred, &eqbeg, &eqend);
7554 
7555 			// loop on larger half
7556 			if (eqbeg - begin > end - eqend)
7557 			{
7558 				sort(eqend, end, pred);
7559 				end = eqbeg;
7560 			}
7561 			else
7562 			{
7563 				sort(begin, eqbeg, pred);
7564 				begin = eqend;
7565 			}
7566 		}
7567 
7568 		// insertion sort small chunk
7569 		insertion_sort(begin, end, pred);
7570 	}
7571 
hash_insert(const void ** table,size_t size,const void * key)7572 	PUGI__FN bool hash_insert(const void** table, size_t size, const void* key)
7573 	{
7574 		assert(key);
7575 
7576 		unsigned int h = static_cast<unsigned int>(reinterpret_cast<uintptr_t>(key));
7577 
7578 		// MurmurHash3 32-bit finalizer
7579 		h ^= h >> 16;
7580 		h *= 0x85ebca6bu;
7581 		h ^= h >> 13;
7582 		h *= 0xc2b2ae35u;
7583 		h ^= h >> 16;
7584 
7585 		size_t hashmod = size - 1;
7586 		size_t bucket = h & hashmod;
7587 
7588 		for (size_t probe = 0; probe <= hashmod; ++probe)
7589 		{
7590 			if (table[bucket] == 0)
7591 			{
7592 				table[bucket] = key;
7593 				return true;
7594 			}
7595 
7596 			if (table[bucket] == key)
7597 				return false;
7598 
7599 			// hash collision, quadratic probing
7600 			bucket = (bucket + probe + 1) & hashmod;
7601 		}
7602 
7603 		assert(false && "Hash table is full"); // unreachable
7604 		return false;
7605 	}
7606 PUGI__NS_END
7607 
7608 // Allocator used for AST and evaluation stacks
7609 PUGI__NS_BEGIN
7610 	static const size_t xpath_memory_page_size =
7611 	#ifdef PUGIXML_MEMORY_XPATH_PAGE_SIZE
7612 		PUGIXML_MEMORY_XPATH_PAGE_SIZE
7613 	#else
7614 		4096
7615 	#endif
7616 		;
7617 
7618 	static const uintptr_t xpath_memory_block_alignment = sizeof(double) > sizeof(void*) ? sizeof(double) : sizeof(void*);
7619 
7620 	struct xpath_memory_block
7621 	{
7622 		xpath_memory_block* next;
7623 		size_t capacity;
7624 
7625 		union
7626 		{
7627 			char data[xpath_memory_page_size];
7628 			double alignment;
7629 		};
7630 	};
7631 
7632 	struct xpath_allocator
7633 	{
7634 		xpath_memory_block* _root;
7635 		size_t _root_size;
7636 		bool* _error;
7637 
xpath_allocatorxpath_allocator7638 		xpath_allocator(xpath_memory_block* root, bool* error = 0): _root(root), _root_size(0), _error(error)
7639 		{
7640 		}
7641 
allocatexpath_allocator7642 		void* allocate(size_t size)
7643 		{
7644 			// round size up to block alignment boundary
7645 			size = (size + xpath_memory_block_alignment - 1) & ~(xpath_memory_block_alignment - 1);
7646 
7647 			if (_root_size + size <= _root->capacity)
7648 			{
7649 				void* buf = &_root->data[0] + _root_size;
7650 				_root_size += size;
7651 				return buf;
7652 			}
7653 			else
7654 			{
7655 				// make sure we have at least 1/4th of the page free after allocation to satisfy subsequent allocation requests
7656 				size_t block_capacity_base = sizeof(_root->data);
7657 				size_t block_capacity_req = size + block_capacity_base / 4;
7658 				size_t block_capacity = (block_capacity_base > block_capacity_req) ? block_capacity_base : block_capacity_req;
7659 
7660 				size_t block_size = block_capacity + offsetof(xpath_memory_block, data);
7661 
7662 				xpath_memory_block* block = static_cast<xpath_memory_block*>(xml_memory::allocate(block_size));
7663 				if (!block)
7664 				{
7665 					if (_error) *_error = true;
7666 					return 0;
7667 				}
7668 
7669 				block->next = _root;
7670 				block->capacity = block_capacity;
7671 
7672 				_root = block;
7673 				_root_size = size;
7674 
7675 				return block->data;
7676 			}
7677 		}
7678 
reallocatexpath_allocator7679 		void* reallocate(void* ptr, size_t old_size, size_t new_size)
7680 		{
7681 			// round size up to block alignment boundary
7682 			old_size = (old_size + xpath_memory_block_alignment - 1) & ~(xpath_memory_block_alignment - 1);
7683 			new_size = (new_size + xpath_memory_block_alignment - 1) & ~(xpath_memory_block_alignment - 1);
7684 
7685 			// we can only reallocate the last object
7686 			assert(ptr == 0 || static_cast<char*>(ptr) + old_size == &_root->data[0] + _root_size);
7687 
7688 			// try to reallocate the object inplace
7689 			if (ptr && _root_size - old_size + new_size <= _root->capacity)
7690 			{
7691 				_root_size = _root_size - old_size + new_size;
7692 				return ptr;
7693 			}
7694 
7695 			// allocate a new block
7696 			void* result = allocate(new_size);
7697 			if (!result) return 0;
7698 
7699 			// we have a new block
7700 			if (ptr)
7701 			{
7702 				// copy old data (we only support growing)
7703 				assert(new_size >= old_size);
7704 				memcpy(result, ptr, old_size);
7705 
7706 				// free the previous page if it had no other objects
7707 				assert(_root->data == result);
7708 				assert(_root->next);
7709 
7710 				if (_root->next->data == ptr)
7711 				{
7712 					// deallocate the whole page, unless it was the first one
7713 					xpath_memory_block* next = _root->next->next;
7714 
7715 					if (next)
7716 					{
7717 						xml_memory::deallocate(_root->next);
7718 						_root->next = next;
7719 					}
7720 				}
7721 			}
7722 
7723 			return result;
7724 		}
7725 
revertxpath_allocator7726 		void revert(const xpath_allocator& state)
7727 		{
7728 			// free all new pages
7729 			xpath_memory_block* cur = _root;
7730 
7731 			while (cur != state._root)
7732 			{
7733 				xpath_memory_block* next = cur->next;
7734 
7735 				xml_memory::deallocate(cur);
7736 
7737 				cur = next;
7738 			}
7739 
7740 			// restore state
7741 			_root = state._root;
7742 			_root_size = state._root_size;
7743 		}
7744 
releasexpath_allocator7745 		void release()
7746 		{
7747 			xpath_memory_block* cur = _root;
7748 			assert(cur);
7749 
7750 			while (cur->next)
7751 			{
7752 				xpath_memory_block* next = cur->next;
7753 
7754 				xml_memory::deallocate(cur);
7755 
7756 				cur = next;
7757 			}
7758 		}
7759 	};
7760 
7761 	struct xpath_allocator_capture
7762 	{
xpath_allocator_capturexpath_allocator_capture7763 		xpath_allocator_capture(xpath_allocator* alloc): _target(alloc), _state(*alloc)
7764 		{
7765 		}
7766 
~xpath_allocator_capturexpath_allocator_capture7767 		~xpath_allocator_capture()
7768 		{
7769 			_target->revert(_state);
7770 		}
7771 
7772 		xpath_allocator* _target;
7773 		xpath_allocator _state;
7774 	};
7775 
7776 	struct xpath_stack
7777 	{
7778 		xpath_allocator* result;
7779 		xpath_allocator* temp;
7780 	};
7781 
7782 	struct xpath_stack_data
7783 	{
7784 		xpath_memory_block blocks[2];
7785 		xpath_allocator result;
7786 		xpath_allocator temp;
7787 		xpath_stack stack;
7788 		bool oom;
7789 
xpath_stack_dataxpath_stack_data7790 		xpath_stack_data(): result(blocks + 0, &oom), temp(blocks + 1, &oom), oom(false)
7791 		{
7792 			blocks[0].next = blocks[1].next = 0;
7793 			blocks[0].capacity = blocks[1].capacity = sizeof(blocks[0].data);
7794 
7795 			stack.result = &result;
7796 			stack.temp = &temp;
7797 		}
7798 
~xpath_stack_dataxpath_stack_data7799 		~xpath_stack_data()
7800 		{
7801 			result.release();
7802 			temp.release();
7803 		}
7804 	};
7805 PUGI__NS_END
7806 
7807 // String class
7808 PUGI__NS_BEGIN
7809 	class xpath_string
7810 	{
7811 		const char_t* _buffer;
7812 		bool _uses_heap;
7813 		size_t _length_heap;
7814 
duplicate_string(const char_t * string,size_t length,xpath_allocator * alloc)7815 		static char_t* duplicate_string(const char_t* string, size_t length, xpath_allocator* alloc)
7816 		{
7817 			char_t* result = static_cast<char_t*>(alloc->allocate((length + 1) * sizeof(char_t)));
7818 			if (!result) return 0;
7819 
7820 			memcpy(result, string, length * sizeof(char_t));
7821 			result[length] = 0;
7822 
7823 			return result;
7824 		}
7825 
xpath_string(const char_t * buffer,bool uses_heap_,size_t length_heap)7826 		xpath_string(const char_t* buffer, bool uses_heap_, size_t length_heap): _buffer(buffer), _uses_heap(uses_heap_), _length_heap(length_heap)
7827 		{
7828 		}
7829 
7830 	public:
from_const(const char_t * str)7831 		static xpath_string from_const(const char_t* str)
7832 		{
7833 			return xpath_string(str, false, 0);
7834 		}
7835 
from_heap_preallocated(const char_t * begin,const char_t * end)7836 		static xpath_string from_heap_preallocated(const char_t* begin, const char_t* end)
7837 		{
7838 			assert(begin <= end && *end == 0);
7839 
7840 			return xpath_string(begin, true, static_cast<size_t>(end - begin));
7841 		}
7842 
from_heap(const char_t * begin,const char_t * end,xpath_allocator * alloc)7843 		static xpath_string from_heap(const char_t* begin, const char_t* end, xpath_allocator* alloc)
7844 		{
7845 			assert(begin <= end);
7846 
7847 			if (begin == end)
7848 				return xpath_string();
7849 
7850 			size_t length = static_cast<size_t>(end - begin);
7851 			const char_t* data = duplicate_string(begin, length, alloc);
7852 
7853 			return data ? xpath_string(data, true, length) : xpath_string();
7854 		}
7855 
xpath_string()7856 		xpath_string(): _buffer(PUGIXML_TEXT("")), _uses_heap(false), _length_heap(0)
7857 		{
7858 		}
7859 
append(const xpath_string & o,xpath_allocator * alloc)7860 		void append(const xpath_string& o, xpath_allocator* alloc)
7861 		{
7862 			// skip empty sources
7863 			if (!*o._buffer) return;
7864 
7865 			// fast append for constant empty target and constant source
7866 			if (!*_buffer && !_uses_heap && !o._uses_heap)
7867 			{
7868 				_buffer = o._buffer;
7869 			}
7870 			else
7871 			{
7872 				// need to make heap copy
7873 				size_t target_length = length();
7874 				size_t source_length = o.length();
7875 				size_t result_length = target_length + source_length;
7876 
7877 				// allocate new buffer
7878 				char_t* result = static_cast<char_t*>(alloc->reallocate(_uses_heap ? const_cast<char_t*>(_buffer) : 0, (target_length + 1) * sizeof(char_t), (result_length + 1) * sizeof(char_t)));
7879 				if (!result) return;
7880 
7881 				// append first string to the new buffer in case there was no reallocation
7882 				if (!_uses_heap) memcpy(result, _buffer, target_length * sizeof(char_t));
7883 
7884 				// append second string to the new buffer
7885 				memcpy(result + target_length, o._buffer, source_length * sizeof(char_t));
7886 				result[result_length] = 0;
7887 
7888 				// finalize
7889 				_buffer = result;
7890 				_uses_heap = true;
7891 				_length_heap = result_length;
7892 			}
7893 		}
7894 
c_str() const7895 		const char_t* c_str() const
7896 		{
7897 			return _buffer;
7898 		}
7899 
length() const7900 		size_t length() const
7901 		{
7902 			return _uses_heap ? _length_heap : strlength(_buffer);
7903 		}
7904 
data(xpath_allocator * alloc)7905 		char_t* data(xpath_allocator* alloc)
7906 		{
7907 			// make private heap copy
7908 			if (!_uses_heap)
7909 			{
7910 				size_t length_ = strlength(_buffer);
7911 				const char_t* data_ = duplicate_string(_buffer, length_, alloc);
7912 
7913 				if (!data_) return 0;
7914 
7915 				_buffer = data_;
7916 				_uses_heap = true;
7917 				_length_heap = length_;
7918 			}
7919 
7920 			return const_cast<char_t*>(_buffer);
7921 		}
7922 
empty() const7923 		bool empty() const
7924 		{
7925 			return *_buffer == 0;
7926 		}
7927 
operator ==(const xpath_string & o) const7928 		bool operator==(const xpath_string& o) const
7929 		{
7930 			return strequal(_buffer, o._buffer);
7931 		}
7932 
operator !=(const xpath_string & o) const7933 		bool operator!=(const xpath_string& o) const
7934 		{
7935 			return !strequal(_buffer, o._buffer);
7936 		}
7937 
uses_heap() const7938 		bool uses_heap() const
7939 		{
7940 			return _uses_heap;
7941 		}
7942 	};
7943 PUGI__NS_END
7944 
7945 PUGI__NS_BEGIN
starts_with(const char_t * string,const char_t * pattern)7946 	PUGI__FN bool starts_with(const char_t* string, const char_t* pattern)
7947 	{
7948 		while (*pattern && *string == *pattern)
7949 		{
7950 			string++;
7951 			pattern++;
7952 		}
7953 
7954 		return *pattern == 0;
7955 	}
7956 
find_char(const char_t * s,char_t c)7957 	PUGI__FN const char_t* find_char(const char_t* s, char_t c)
7958 	{
7959 	#ifdef PUGIXML_WCHAR_MODE
7960 		return wcschr(s, c);
7961 	#else
7962 		return strchr(s, c);
7963 	#endif
7964 	}
7965 
find_substring(const char_t * s,const char_t * p)7966 	PUGI__FN const char_t* find_substring(const char_t* s, const char_t* p)
7967 	{
7968 	#ifdef PUGIXML_WCHAR_MODE
7969 		// MSVC6 wcsstr bug workaround (if s is empty it always returns 0)
7970 		return (*p == 0) ? s : wcsstr(s, p);
7971 	#else
7972 		return strstr(s, p);
7973 	#endif
7974 	}
7975 
7976 	// Converts symbol to lower case, if it is an ASCII one
tolower_ascii(char_t ch)7977 	PUGI__FN char_t tolower_ascii(char_t ch)
7978 	{
7979 		return static_cast<unsigned int>(ch - 'A') < 26 ? static_cast<char_t>(ch | ' ') : ch;
7980 	}
7981 
string_value(const xpath_node & na,xpath_allocator * alloc)7982 	PUGI__FN xpath_string string_value(const xpath_node& na, xpath_allocator* alloc)
7983 	{
7984 		if (na.attribute())
7985 			return xpath_string::from_const(na.attribute().value());
7986 		else
7987 		{
7988 			xml_node n = na.node();
7989 
7990 			switch (n.type())
7991 			{
7992 			case node_pcdata:
7993 			case node_cdata:
7994 			case node_comment:
7995 			case node_pi:
7996 				return xpath_string::from_const(n.value());
7997 
7998 			case node_document:
7999 			case node_element:
8000 			{
8001 				xpath_string result;
8002 
8003 				// element nodes can have value if parse_embed_pcdata was used
8004 				if (n.value()[0])
8005 					result.append(xpath_string::from_const(n.value()), alloc);
8006 
8007 				xml_node cur = n.first_child();
8008 
8009 				while (cur && cur != n)
8010 				{
8011 					if (cur.type() == node_pcdata || cur.type() == node_cdata)
8012 						result.append(xpath_string::from_const(cur.value()), alloc);
8013 
8014 					if (cur.first_child())
8015 						cur = cur.first_child();
8016 					else if (cur.next_sibling())
8017 						cur = cur.next_sibling();
8018 					else
8019 					{
8020 						while (!cur.next_sibling() && cur != n)
8021 							cur = cur.parent();
8022 
8023 						if (cur != n) cur = cur.next_sibling();
8024 					}
8025 				}
8026 
8027 				return result;
8028 			}
8029 
8030 			default:
8031 				return xpath_string();
8032 			}
8033 		}
8034 	}
8035 
node_is_before_sibling(xml_node_struct * ln,xml_node_struct * rn)8036 	PUGI__FN bool node_is_before_sibling(xml_node_struct* ln, xml_node_struct* rn)
8037 	{
8038 		assert(ln->parent == rn->parent);
8039 
8040 		// there is no common ancestor (the shared parent is null), nodes are from different documents
8041 		if (!ln->parent) return ln < rn;
8042 
8043 		// determine sibling order
8044 		xml_node_struct* ls = ln;
8045 		xml_node_struct* rs = rn;
8046 
8047 		while (ls && rs)
8048 		{
8049 			if (ls == rn) return true;
8050 			if (rs == ln) return false;
8051 
8052 			ls = ls->next_sibling;
8053 			rs = rs->next_sibling;
8054 		}
8055 
8056 		// if rn sibling chain ended ln must be before rn
8057 		return !rs;
8058 	}
8059 
node_is_before(xml_node_struct * ln,xml_node_struct * rn)8060 	PUGI__FN bool node_is_before(xml_node_struct* ln, xml_node_struct* rn)
8061 	{
8062 		// find common ancestor at the same depth, if any
8063 		xml_node_struct* lp = ln;
8064 		xml_node_struct* rp = rn;
8065 
8066 		while (lp && rp && lp->parent != rp->parent)
8067 		{
8068 			lp = lp->parent;
8069 			rp = rp->parent;
8070 		}
8071 
8072 		// parents are the same!
8073 		if (lp && rp) return node_is_before_sibling(lp, rp);
8074 
8075 		// nodes are at different depths, need to normalize heights
8076 		bool left_higher = !lp;
8077 
8078 		while (lp)
8079 		{
8080 			lp = lp->parent;
8081 			ln = ln->parent;
8082 		}
8083 
8084 		while (rp)
8085 		{
8086 			rp = rp->parent;
8087 			rn = rn->parent;
8088 		}
8089 
8090 		// one node is the ancestor of the other
8091 		if (ln == rn) return left_higher;
8092 
8093 		// find common ancestor... again
8094 		while (ln->parent != rn->parent)
8095 		{
8096 			ln = ln->parent;
8097 			rn = rn->parent;
8098 		}
8099 
8100 		return node_is_before_sibling(ln, rn);
8101 	}
8102 
node_is_ancestor(xml_node_struct * parent,xml_node_struct * node)8103 	PUGI__FN bool node_is_ancestor(xml_node_struct* parent, xml_node_struct* node)
8104 	{
8105 		while (node && node != parent) node = node->parent;
8106 
8107 		return parent && node == parent;
8108 	}
8109 
document_buffer_order(const xpath_node & xnode)8110 	PUGI__FN const void* document_buffer_order(const xpath_node& xnode)
8111 	{
8112 		xml_node_struct* node = xnode.node().internal_object();
8113 
8114 		if (node)
8115 		{
8116 			if ((get_document(node).header & xml_memory_page_contents_shared_mask) == 0)
8117 			{
8118 				if (node->name && (node->header & impl::xml_memory_page_name_allocated_or_shared_mask) == 0) return node->name;
8119 				if (node->value && (node->header & impl::xml_memory_page_value_allocated_or_shared_mask) == 0) return node->value;
8120 			}
8121 
8122 			return 0;
8123 		}
8124 
8125 		xml_attribute_struct* attr = xnode.attribute().internal_object();
8126 
8127 		if (attr)
8128 		{
8129 			if ((get_document(attr).header & xml_memory_page_contents_shared_mask) == 0)
8130 			{
8131 				if ((attr->header & impl::xml_memory_page_name_allocated_or_shared_mask) == 0) return attr->name;
8132 				if ((attr->header & impl::xml_memory_page_value_allocated_or_shared_mask) == 0) return attr->value;
8133 			}
8134 
8135 			return 0;
8136 		}
8137 
8138 		return 0;
8139 	}
8140 
8141 	struct document_order_comparator
8142 	{
operator ()document_order_comparator8143 		bool operator()(const xpath_node& lhs, const xpath_node& rhs) const
8144 		{
8145 			// optimized document order based check
8146 			const void* lo = document_buffer_order(lhs);
8147 			const void* ro = document_buffer_order(rhs);
8148 
8149 			if (lo && ro) return lo < ro;
8150 
8151 			// slow comparison
8152 			xml_node ln = lhs.node(), rn = rhs.node();
8153 
8154 			// compare attributes
8155 			if (lhs.attribute() && rhs.attribute())
8156 			{
8157 				// shared parent
8158 				if (lhs.parent() == rhs.parent())
8159 				{
8160 					// determine sibling order
8161 					for (xml_attribute a = lhs.attribute(); a; a = a.next_attribute())
8162 						if (a == rhs.attribute())
8163 							return true;
8164 
8165 					return false;
8166 				}
8167 
8168 				// compare attribute parents
8169 				ln = lhs.parent();
8170 				rn = rhs.parent();
8171 			}
8172 			else if (lhs.attribute())
8173 			{
8174 				// attributes go after the parent element
8175 				if (lhs.parent() == rhs.node()) return false;
8176 
8177 				ln = lhs.parent();
8178 			}
8179 			else if (rhs.attribute())
8180 			{
8181 				// attributes go after the parent element
8182 				if (rhs.parent() == lhs.node()) return true;
8183 
8184 				rn = rhs.parent();
8185 			}
8186 
8187 			if (ln == rn) return false;
8188 
8189 			if (!ln || !rn) return ln < rn;
8190 
8191 			return node_is_before(ln.internal_object(), rn.internal_object());
8192 		}
8193 	};
8194 
gen_nan()8195 	PUGI__FN double gen_nan()
8196 	{
8197 	#if defined(__STDC_IEC_559__) || ((FLT_RADIX - 0 == 2) && (FLT_MAX_EXP - 0 == 128) && (FLT_MANT_DIG - 0 == 24))
8198 		PUGI__STATIC_ASSERT(sizeof(float) == sizeof(uint32_t));
8199 		typedef uint32_t UI; // BCC5 workaround
8200 		union { float f; UI i; } u;
8201 		u.i = 0x7fc00000;
8202 		return double(u.f);
8203 	#else
8204 		// fallback
8205 		const volatile double zero = 0.0;
8206 		return zero / zero;
8207 	#endif
8208 	}
8209 
is_nan(double value)8210 	PUGI__FN bool is_nan(double value)
8211 	{
8212 	#if defined(PUGI__MSVC_CRT_VERSION) || defined(__BORLANDC__)
8213 		return !!_isnan(value);
8214 	#elif defined(fpclassify) && defined(FP_NAN)
8215 		return fpclassify(value) == FP_NAN;
8216 	#else
8217 		// fallback
8218 		const volatile double v = value;
8219 		return v != v;
8220 	#endif
8221 	}
8222 
convert_number_to_string_special(double value)8223 	PUGI__FN const char_t* convert_number_to_string_special(double value)
8224 	{
8225 	#if defined(PUGI__MSVC_CRT_VERSION) || defined(__BORLANDC__)
8226 		if (_finite(value)) return (value == 0) ? PUGIXML_TEXT("0") : 0;
8227 		if (_isnan(value)) return PUGIXML_TEXT("NaN");
8228 		return value > 0 ? PUGIXML_TEXT("Infinity") : PUGIXML_TEXT("-Infinity");
8229 	#elif defined(fpclassify) && defined(FP_NAN) && defined(FP_INFINITE) && defined(FP_ZERO)
8230 		switch (fpclassify(value))
8231 		{
8232 		case FP_NAN:
8233 			return PUGIXML_TEXT("NaN");
8234 
8235 		case FP_INFINITE:
8236 			return value > 0 ? PUGIXML_TEXT("Infinity") : PUGIXML_TEXT("-Infinity");
8237 
8238 		case FP_ZERO:
8239 			return PUGIXML_TEXT("0");
8240 
8241 		default:
8242 			return 0;
8243 		}
8244 	#else
8245 		// fallback
8246 		const volatile double v = value;
8247 
8248 		if (v == 0) return PUGIXML_TEXT("0");
8249 		if (v != v) return PUGIXML_TEXT("NaN");
8250 		if (v * 2 == v) return value > 0 ? PUGIXML_TEXT("Infinity") : PUGIXML_TEXT("-Infinity");
8251 		return 0;
8252 	#endif
8253 	}
8254 
convert_number_to_boolean(double value)8255 	PUGI__FN bool convert_number_to_boolean(double value)
8256 	{
8257 		return (value != 0 && !is_nan(value));
8258 	}
8259 
truncate_zeros(char * begin,char * end)8260 	PUGI__FN void truncate_zeros(char* begin, char* end)
8261 	{
8262 		while (begin != end && end[-1] == '0') end--;
8263 
8264 		*end = 0;
8265 	}
8266 
8267 	// gets mantissa digits in the form of 0.xxxxx with 0. implied and the exponent
8268 #if defined(PUGI__MSVC_CRT_VERSION) && PUGI__MSVC_CRT_VERSION >= 1400 && !defined(_WIN32_WCE)
convert_number_to_mantissa_exponent(double value,char (& buffer)[32],char ** out_mantissa,int * out_exponent)8269 	PUGI__FN void convert_number_to_mantissa_exponent(double value, char (&buffer)[32], char** out_mantissa, int* out_exponent)
8270 	{
8271 		// get base values
8272 		int sign, exponent;
8273 		_ecvt_s(buffer, sizeof(buffer), value, DBL_DIG + 1, &exponent, &sign);
8274 
8275 		// truncate redundant zeros
8276 		truncate_zeros(buffer, buffer + strlen(buffer));
8277 
8278 		// fill results
8279 		*out_mantissa = buffer;
8280 		*out_exponent = exponent;
8281 	}
8282 #else
convert_number_to_mantissa_exponent(double value,char (& buffer)[32],char ** out_mantissa,int * out_exponent)8283 	PUGI__FN void convert_number_to_mantissa_exponent(double value, char (&buffer)[32], char** out_mantissa, int* out_exponent)
8284 	{
8285 		// get a scientific notation value with IEEE DBL_DIG decimals
8286 		PUGI__SNPRINTF(buffer, "%.*e", DBL_DIG, value);
8287 
8288 		// get the exponent (possibly negative)
8289 		char* exponent_string = strchr(buffer, 'e');
8290 		assert(exponent_string);
8291 
8292 		int exponent = atoi(exponent_string + 1);
8293 
8294 		// extract mantissa string: skip sign
8295 		char* mantissa = buffer[0] == '-' ? buffer + 1 : buffer;
8296 		assert(mantissa[0] != '0' && mantissa[1] == '.');
8297 
8298 		// divide mantissa by 10 to eliminate integer part
8299 		mantissa[1] = mantissa[0];
8300 		mantissa++;
8301 		exponent++;
8302 
8303 		// remove extra mantissa digits and zero-terminate mantissa
8304 		truncate_zeros(mantissa, exponent_string);
8305 
8306 		// fill results
8307 		*out_mantissa = mantissa;
8308 		*out_exponent = exponent;
8309 	}
8310 #endif
8311 
convert_number_to_string(double value,xpath_allocator * alloc)8312 	PUGI__FN xpath_string convert_number_to_string(double value, xpath_allocator* alloc)
8313 	{
8314 		// try special number conversion
8315 		const char_t* special = convert_number_to_string_special(value);
8316 		if (special) return xpath_string::from_const(special);
8317 
8318 		// get mantissa + exponent form
8319 		char mantissa_buffer[32];
8320 
8321 		char* mantissa;
8322 		int exponent;
8323 		convert_number_to_mantissa_exponent(value, mantissa_buffer, &mantissa, &exponent);
8324 
8325 		// allocate a buffer of suitable length for the number
8326 		size_t result_size = strlen(mantissa_buffer) + (exponent > 0 ? exponent : -exponent) + 4;
8327 		char_t* result = static_cast<char_t*>(alloc->allocate(sizeof(char_t) * result_size));
8328 		if (!result) return xpath_string();
8329 
8330 		// make the number!
8331 		char_t* s = result;
8332 
8333 		// sign
8334 		if (value < 0) *s++ = '-';
8335 
8336 		// integer part
8337 		if (exponent <= 0)
8338 		{
8339 			*s++ = '0';
8340 		}
8341 		else
8342 		{
8343 			while (exponent > 0)
8344 			{
8345 				assert(*mantissa == 0 || static_cast<unsigned int>(*mantissa - '0') <= 9);
8346 				*s++ = *mantissa ? *mantissa++ : '0';
8347 				exponent--;
8348 			}
8349 		}
8350 
8351 		// fractional part
8352 		if (*mantissa)
8353 		{
8354 			// decimal point
8355 			*s++ = '.';
8356 
8357 			// extra zeroes from negative exponent
8358 			while (exponent < 0)
8359 			{
8360 				*s++ = '0';
8361 				exponent++;
8362 			}
8363 
8364 			// extra mantissa digits
8365 			while (*mantissa)
8366 			{
8367 				assert(static_cast<unsigned int>(*mantissa - '0') <= 9);
8368 				*s++ = *mantissa++;
8369 			}
8370 		}
8371 
8372 		// zero-terminate
8373 		assert(s < result + result_size);
8374 		*s = 0;
8375 
8376 		return xpath_string::from_heap_preallocated(result, s);
8377 	}
8378 
check_string_to_number_format(const char_t * string)8379 	PUGI__FN bool check_string_to_number_format(const char_t* string)
8380 	{
8381 		// parse leading whitespace
8382 		while (PUGI__IS_CHARTYPE(*string, ct_space)) ++string;
8383 
8384 		// parse sign
8385 		if (*string == '-') ++string;
8386 
8387 		if (!*string) return false;
8388 
8389 		// if there is no integer part, there should be a decimal part with at least one digit
8390 		if (!PUGI__IS_CHARTYPEX(string[0], ctx_digit) && (string[0] != '.' || !PUGI__IS_CHARTYPEX(string[1], ctx_digit))) return false;
8391 
8392 		// parse integer part
8393 		while (PUGI__IS_CHARTYPEX(*string, ctx_digit)) ++string;
8394 
8395 		// parse decimal part
8396 		if (*string == '.')
8397 		{
8398 			++string;
8399 
8400 			while (PUGI__IS_CHARTYPEX(*string, ctx_digit)) ++string;
8401 		}
8402 
8403 		// parse trailing whitespace
8404 		while (PUGI__IS_CHARTYPE(*string, ct_space)) ++string;
8405 
8406 		return *string == 0;
8407 	}
8408 
convert_string_to_number(const char_t * string)8409 	PUGI__FN double convert_string_to_number(const char_t* string)
8410 	{
8411 		// check string format
8412 		if (!check_string_to_number_format(string)) return gen_nan();
8413 
8414 		// parse string
8415 	#ifdef PUGIXML_WCHAR_MODE
8416 		return wcstod(string, 0);
8417 	#else
8418 		return strtod(string, 0);
8419 	#endif
8420 	}
8421 
convert_string_to_number_scratch(char_t (& buffer)[32],const char_t * begin,const char_t * end,double * out_result)8422 	PUGI__FN bool convert_string_to_number_scratch(char_t (&buffer)[32], const char_t* begin, const char_t* end, double* out_result)
8423 	{
8424 		size_t length = static_cast<size_t>(end - begin);
8425 		char_t* scratch = buffer;
8426 
8427 		if (length >= sizeof(buffer) / sizeof(buffer[0]))
8428 		{
8429 			// need to make dummy on-heap copy
8430 			scratch = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
8431 			if (!scratch) return false;
8432 		}
8433 
8434 		// copy string to zero-terminated buffer and perform conversion
8435 		memcpy(scratch, begin, length * sizeof(char_t));
8436 		scratch[length] = 0;
8437 
8438 		*out_result = convert_string_to_number(scratch);
8439 
8440 		// free dummy buffer
8441 		if (scratch != buffer) xml_memory::deallocate(scratch);
8442 
8443 		return true;
8444 	}
8445 
round_nearest(double value)8446 	PUGI__FN double round_nearest(double value)
8447 	{
8448 		return floor(value + 0.5);
8449 	}
8450 
round_nearest_nzero(double value)8451 	PUGI__FN double round_nearest_nzero(double value)
8452 	{
8453 		// same as round_nearest, but returns -0 for [-0.5, -0]
8454 		// ceil is used to differentiate between +0 and -0 (we return -0 for [-0.5, -0] and +0 for +0)
8455 		return (value >= -0.5 && value <= 0) ? ceil(value) : floor(value + 0.5);
8456 	}
8457 
qualified_name(const xpath_node & node)8458 	PUGI__FN const char_t* qualified_name(const xpath_node& node)
8459 	{
8460 		return node.attribute() ? node.attribute().name() : node.node().name();
8461 	}
8462 
local_name(const xpath_node & node)8463 	PUGI__FN const char_t* local_name(const xpath_node& node)
8464 	{
8465 		const char_t* name = qualified_name(node);
8466 		const char_t* p = find_char(name, ':');
8467 
8468 		return p ? p + 1 : name;
8469 	}
8470 
8471 	struct namespace_uri_predicate
8472 	{
8473 		const char_t* prefix;
8474 		size_t prefix_length;
8475 
namespace_uri_predicatenamespace_uri_predicate8476 		namespace_uri_predicate(const char_t* name)
8477 		{
8478 			const char_t* pos = find_char(name, ':');
8479 
8480 			prefix = pos ? name : 0;
8481 			prefix_length = pos ? static_cast<size_t>(pos - name) : 0;
8482 		}
8483 
operator ()namespace_uri_predicate8484 		bool operator()(xml_attribute a) const
8485 		{
8486 			const char_t* name = a.name();
8487 
8488 			if (!starts_with(name, PUGIXML_TEXT("xmlns"))) return false;
8489 
8490 			return prefix ? name[5] == ':' && strequalrange(name + 6, prefix, prefix_length) : name[5] == 0;
8491 		}
8492 	};
8493 
namespace_uri(xml_node node)8494 	PUGI__FN const char_t* namespace_uri(xml_node node)
8495 	{
8496 		namespace_uri_predicate pred = node.name();
8497 
8498 		xml_node p = node;
8499 
8500 		while (p)
8501 		{
8502 			xml_attribute a = p.find_attribute(pred);
8503 
8504 			if (a) return a.value();
8505 
8506 			p = p.parent();
8507 		}
8508 
8509 		return PUGIXML_TEXT("");
8510 	}
8511 
namespace_uri(xml_attribute attr,xml_node parent)8512 	PUGI__FN const char_t* namespace_uri(xml_attribute attr, xml_node parent)
8513 	{
8514 		namespace_uri_predicate pred = attr.name();
8515 
8516 		// Default namespace does not apply to attributes
8517 		if (!pred.prefix) return PUGIXML_TEXT("");
8518 
8519 		xml_node p = parent;
8520 
8521 		while (p)
8522 		{
8523 			xml_attribute a = p.find_attribute(pred);
8524 
8525 			if (a) return a.value();
8526 
8527 			p = p.parent();
8528 		}
8529 
8530 		return PUGIXML_TEXT("");
8531 	}
8532 
namespace_uri(const xpath_node & node)8533 	PUGI__FN const char_t* namespace_uri(const xpath_node& node)
8534 	{
8535 		return node.attribute() ? namespace_uri(node.attribute(), node.parent()) : namespace_uri(node.node());
8536 	}
8537 
normalize_space(char_t * buffer)8538 	PUGI__FN char_t* normalize_space(char_t* buffer)
8539 	{
8540 		char_t* write = buffer;
8541 
8542 		for (char_t* it = buffer; *it; )
8543 		{
8544 			char_t ch = *it++;
8545 
8546 			if (PUGI__IS_CHARTYPE(ch, ct_space))
8547 			{
8548 				// replace whitespace sequence with single space
8549 				while (PUGI__IS_CHARTYPE(*it, ct_space)) it++;
8550 
8551 				// avoid leading spaces
8552 				if (write != buffer) *write++ = ' ';
8553 			}
8554 			else *write++ = ch;
8555 		}
8556 
8557 		// remove trailing space
8558 		if (write != buffer && PUGI__IS_CHARTYPE(write[-1], ct_space)) write--;
8559 
8560 		// zero-terminate
8561 		*write = 0;
8562 
8563 		return write;
8564 	}
8565 
translate(char_t * buffer,const char_t * from,const char_t * to,size_t to_length)8566 	PUGI__FN char_t* translate(char_t* buffer, const char_t* from, const char_t* to, size_t to_length)
8567 	{
8568 		char_t* write = buffer;
8569 
8570 		while (*buffer)
8571 		{
8572 			PUGI__DMC_VOLATILE char_t ch = *buffer++;
8573 
8574 			const char_t* pos = find_char(from, ch);
8575 
8576 			if (!pos)
8577 				*write++ = ch; // do not process
8578 			else if (static_cast<size_t>(pos - from) < to_length)
8579 				*write++ = to[pos - from]; // replace
8580 		}
8581 
8582 		// zero-terminate
8583 		*write = 0;
8584 
8585 		return write;
8586 	}
8587 
translate_table_generate(xpath_allocator * alloc,const char_t * from,const char_t * to)8588 	PUGI__FN unsigned char* translate_table_generate(xpath_allocator* alloc, const char_t* from, const char_t* to)
8589 	{
8590 		unsigned char table[128] = {0};
8591 
8592 		while (*from)
8593 		{
8594 			unsigned int fc = static_cast<unsigned int>(*from);
8595 			unsigned int tc = static_cast<unsigned int>(*to);
8596 
8597 			if (fc >= 128 || tc >= 128)
8598 				return 0;
8599 
8600 			// code=128 means "skip character"
8601 			if (!table[fc])
8602 				table[fc] = static_cast<unsigned char>(tc ? tc : 128);
8603 
8604 			from++;
8605 			if (tc) to++;
8606 		}
8607 
8608 		for (int i = 0; i < 128; ++i)
8609 			if (!table[i])
8610 				table[i] = static_cast<unsigned char>(i);
8611 
8612 		void* result = alloc->allocate(sizeof(table));
8613 		if (!result) return 0;
8614 
8615 		memcpy(result, table, sizeof(table));
8616 
8617 		return static_cast<unsigned char*>(result);
8618 	}
8619 
translate_table(char_t * buffer,const unsigned char * table)8620 	PUGI__FN char_t* translate_table(char_t* buffer, const unsigned char* table)
8621 	{
8622 		char_t* write = buffer;
8623 
8624 		while (*buffer)
8625 		{
8626 			char_t ch = *buffer++;
8627 			unsigned int index = static_cast<unsigned int>(ch);
8628 
8629 			if (index < 128)
8630 			{
8631 				unsigned char code = table[index];
8632 
8633 				// code=128 means "skip character" (table size is 128 so 128 can be a special value)
8634 				// this code skips these characters without extra branches
8635 				*write = static_cast<char_t>(code);
8636 				write += 1 - (code >> 7);
8637 			}
8638 			else
8639 			{
8640 				*write++ = ch;
8641 			}
8642 		}
8643 
8644 		// zero-terminate
8645 		*write = 0;
8646 
8647 		return write;
8648 	}
8649 
is_xpath_attribute(const char_t * name)8650 	inline bool is_xpath_attribute(const char_t* name)
8651 	{
8652 		return !(starts_with(name, PUGIXML_TEXT("xmlns")) && (name[5] == 0 || name[5] == ':'));
8653 	}
8654 
8655 	struct xpath_variable_boolean: xpath_variable
8656 	{
xpath_variable_booleanxpath_variable_boolean8657 		xpath_variable_boolean(): xpath_variable(xpath_type_boolean), value(false)
8658 		{
8659 		}
8660 
8661 		bool value;
8662 		char_t name[1];
8663 	};
8664 
8665 	struct xpath_variable_number: xpath_variable
8666 	{
xpath_variable_numberxpath_variable_number8667 		xpath_variable_number(): xpath_variable(xpath_type_number), value(0)
8668 		{
8669 		}
8670 
8671 		double value;
8672 		char_t name[1];
8673 	};
8674 
8675 	struct xpath_variable_string: xpath_variable
8676 	{
xpath_variable_stringxpath_variable_string8677 		xpath_variable_string(): xpath_variable(xpath_type_string), value(0)
8678 		{
8679 		}
8680 
~xpath_variable_stringxpath_variable_string8681 		~xpath_variable_string()
8682 		{
8683 			if (value) xml_memory::deallocate(value);
8684 		}
8685 
8686 		char_t* value;
8687 		char_t name[1];
8688 	};
8689 
8690 	struct xpath_variable_node_set: xpath_variable
8691 	{
xpath_variable_node_setxpath_variable_node_set8692 		xpath_variable_node_set(): xpath_variable(xpath_type_node_set)
8693 		{
8694 		}
8695 
8696 		xpath_node_set value;
8697 		char_t name[1];
8698 	};
8699 
8700 	static const xpath_node_set dummy_node_set;
8701 
hash_string(const char_t * str)8702 	PUGI__FN PUGI__UNSIGNED_OVERFLOW unsigned int hash_string(const char_t* str)
8703 	{
8704 		// Jenkins one-at-a-time hash (http://en.wikipedia.org/wiki/Jenkins_hash_function#one-at-a-time)
8705 		unsigned int result = 0;
8706 
8707 		while (*str)
8708 		{
8709 			result += static_cast<unsigned int>(*str++);
8710 			result += result << 10;
8711 			result ^= result >> 6;
8712 		}
8713 
8714 		result += result << 3;
8715 		result ^= result >> 11;
8716 		result += result << 15;
8717 
8718 		return result;
8719 	}
8720 
new_xpath_variable(const char_t * name)8721 	template <typename T> PUGI__FN T* new_xpath_variable(const char_t* name)
8722 	{
8723 		size_t length = strlength(name);
8724 		if (length == 0) return 0; // empty variable names are invalid
8725 
8726 		// $$ we can't use offsetof(T, name) because T is non-POD, so we just allocate additional length characters
8727 		void* memory = xml_memory::allocate(sizeof(T) + length * sizeof(char_t));
8728 		if (!memory) return 0;
8729 
8730 		T* result = new (memory) T();
8731 
8732 		memcpy(result->name, name, (length + 1) * sizeof(char_t));
8733 
8734 		return result;
8735 	}
8736 
new_xpath_variable(xpath_value_type type,const char_t * name)8737 	PUGI__FN xpath_variable* new_xpath_variable(xpath_value_type type, const char_t* name)
8738 	{
8739 		switch (type)
8740 		{
8741 		case xpath_type_node_set:
8742 			return new_xpath_variable<xpath_variable_node_set>(name);
8743 
8744 		case xpath_type_number:
8745 			return new_xpath_variable<xpath_variable_number>(name);
8746 
8747 		case xpath_type_string:
8748 			return new_xpath_variable<xpath_variable_string>(name);
8749 
8750 		case xpath_type_boolean:
8751 			return new_xpath_variable<xpath_variable_boolean>(name);
8752 
8753 		default:
8754 			return 0;
8755 		}
8756 	}
8757 
delete_xpath_variable(T * var)8758 	template <typename T> PUGI__FN void delete_xpath_variable(T* var)
8759 	{
8760 		var->~T();
8761 		xml_memory::deallocate(var);
8762 	}
8763 
delete_xpath_variable(xpath_value_type type,xpath_variable * var)8764 	PUGI__FN void delete_xpath_variable(xpath_value_type type, xpath_variable* var)
8765 	{
8766 		switch (type)
8767 		{
8768 		case xpath_type_node_set:
8769 			delete_xpath_variable(static_cast<xpath_variable_node_set*>(var));
8770 			break;
8771 
8772 		case xpath_type_number:
8773 			delete_xpath_variable(static_cast<xpath_variable_number*>(var));
8774 			break;
8775 
8776 		case xpath_type_string:
8777 			delete_xpath_variable(static_cast<xpath_variable_string*>(var));
8778 			break;
8779 
8780 		case xpath_type_boolean:
8781 			delete_xpath_variable(static_cast<xpath_variable_boolean*>(var));
8782 			break;
8783 
8784 		default:
8785 			assert(false && "Invalid variable type"); // unreachable
8786 		}
8787 	}
8788 
copy_xpath_variable(xpath_variable * lhs,const xpath_variable * rhs)8789 	PUGI__FN bool copy_xpath_variable(xpath_variable* lhs, const xpath_variable* rhs)
8790 	{
8791 		switch (rhs->type())
8792 		{
8793 		case xpath_type_node_set:
8794 			return lhs->set(static_cast<const xpath_variable_node_set*>(rhs)->value);
8795 
8796 		case xpath_type_number:
8797 			return lhs->set(static_cast<const xpath_variable_number*>(rhs)->value);
8798 
8799 		case xpath_type_string:
8800 			return lhs->set(static_cast<const xpath_variable_string*>(rhs)->value);
8801 
8802 		case xpath_type_boolean:
8803 			return lhs->set(static_cast<const xpath_variable_boolean*>(rhs)->value);
8804 
8805 		default:
8806 			assert(false && "Invalid variable type"); // unreachable
8807 			return false;
8808 		}
8809 	}
8810 
get_variable_scratch(char_t (& buffer)[32],xpath_variable_set * set,const char_t * begin,const char_t * end,xpath_variable ** out_result)8811 	PUGI__FN bool get_variable_scratch(char_t (&buffer)[32], xpath_variable_set* set, const char_t* begin, const char_t* end, xpath_variable** out_result)
8812 	{
8813 		size_t length = static_cast<size_t>(end - begin);
8814 		char_t* scratch = buffer;
8815 
8816 		if (length >= sizeof(buffer) / sizeof(buffer[0]))
8817 		{
8818 			// need to make dummy on-heap copy
8819 			scratch = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
8820 			if (!scratch) return false;
8821 		}
8822 
8823 		// copy string to zero-terminated buffer and perform lookup
8824 		memcpy(scratch, begin, length * sizeof(char_t));
8825 		scratch[length] = 0;
8826 
8827 		*out_result = set->get(scratch);
8828 
8829 		// free dummy buffer
8830 		if (scratch != buffer) xml_memory::deallocate(scratch);
8831 
8832 		return true;
8833 	}
8834 PUGI__NS_END
8835 
8836 // Internal node set class
8837 PUGI__NS_BEGIN
xpath_get_order(const xpath_node * begin,const xpath_node * end)8838 	PUGI__FN xpath_node_set::type_t xpath_get_order(const xpath_node* begin, const xpath_node* end)
8839 	{
8840 		if (end - begin < 2)
8841 			return xpath_node_set::type_sorted;
8842 
8843 		document_order_comparator cmp;
8844 
8845 		bool first = cmp(begin[0], begin[1]);
8846 
8847 		for (const xpath_node* it = begin + 1; it + 1 < end; ++it)
8848 			if (cmp(it[0], it[1]) != first)
8849 				return xpath_node_set::type_unsorted;
8850 
8851 		return first ? xpath_node_set::type_sorted : xpath_node_set::type_sorted_reverse;
8852 	}
8853 
xpath_sort(xpath_node * begin,xpath_node * end,xpath_node_set::type_t type,bool rev)8854 	PUGI__FN xpath_node_set::type_t xpath_sort(xpath_node* begin, xpath_node* end, xpath_node_set::type_t type, bool rev)
8855 	{
8856 		xpath_node_set::type_t order = rev ? xpath_node_set::type_sorted_reverse : xpath_node_set::type_sorted;
8857 
8858 		if (type == xpath_node_set::type_unsorted)
8859 		{
8860 			xpath_node_set::type_t sorted = xpath_get_order(begin, end);
8861 
8862 			if (sorted == xpath_node_set::type_unsorted)
8863 			{
8864 				sort(begin, end, document_order_comparator());
8865 
8866 				type = xpath_node_set::type_sorted;
8867 			}
8868 			else
8869 				type = sorted;
8870 		}
8871 
8872 		if (type != order) reverse(begin, end);
8873 
8874 		return order;
8875 	}
8876 
xpath_first(const xpath_node * begin,const xpath_node * end,xpath_node_set::type_t type)8877 	PUGI__FN xpath_node xpath_first(const xpath_node* begin, const xpath_node* end, xpath_node_set::type_t type)
8878 	{
8879 		if (begin == end) return xpath_node();
8880 
8881 		switch (type)
8882 		{
8883 		case xpath_node_set::type_sorted:
8884 			return *begin;
8885 
8886 		case xpath_node_set::type_sorted_reverse:
8887 			return *(end - 1);
8888 
8889 		case xpath_node_set::type_unsorted:
8890 			return *min_element(begin, end, document_order_comparator());
8891 
8892 		default:
8893 			assert(false && "Invalid node set type"); // unreachable
8894 			return xpath_node();
8895 		}
8896 	}
8897 
8898 	class xpath_node_set_raw
8899 	{
8900 		xpath_node_set::type_t _type;
8901 
8902 		xpath_node* _begin;
8903 		xpath_node* _end;
8904 		xpath_node* _eos;
8905 
8906 	public:
xpath_node_set_raw()8907 		xpath_node_set_raw(): _type(xpath_node_set::type_unsorted), _begin(0), _end(0), _eos(0)
8908 		{
8909 		}
8910 
begin() const8911 		xpath_node* begin() const
8912 		{
8913 			return _begin;
8914 		}
8915 
end() const8916 		xpath_node* end() const
8917 		{
8918 			return _end;
8919 		}
8920 
empty() const8921 		bool empty() const
8922 		{
8923 			return _begin == _end;
8924 		}
8925 
size() const8926 		size_t size() const
8927 		{
8928 			return static_cast<size_t>(_end - _begin);
8929 		}
8930 
first() const8931 		xpath_node first() const
8932 		{
8933 			return xpath_first(_begin, _end, _type);
8934 		}
8935 
8936 		void push_back_grow(const xpath_node& node, xpath_allocator* alloc);
8937 
push_back(const xpath_node & node,xpath_allocator * alloc)8938 		void push_back(const xpath_node& node, xpath_allocator* alloc)
8939 		{
8940 			if (_end != _eos)
8941 				*_end++ = node;
8942 			else
8943 				push_back_grow(node, alloc);
8944 		}
8945 
append(const xpath_node * begin_,const xpath_node * end_,xpath_allocator * alloc)8946 		void append(const xpath_node* begin_, const xpath_node* end_, xpath_allocator* alloc)
8947 		{
8948 			if (begin_ == end_) return;
8949 
8950 			size_t size_ = static_cast<size_t>(_end - _begin);
8951 			size_t capacity = static_cast<size_t>(_eos - _begin);
8952 			size_t count = static_cast<size_t>(end_ - begin_);
8953 
8954 			if (size_ + count > capacity)
8955 			{
8956 				// reallocate the old array or allocate a new one
8957 				xpath_node* data = static_cast<xpath_node*>(alloc->reallocate(_begin, capacity * sizeof(xpath_node), (size_ + count) * sizeof(xpath_node)));
8958 				if (!data) return;
8959 
8960 				// finalize
8961 				_begin = data;
8962 				_end = data + size_;
8963 				_eos = data + size_ + count;
8964 			}
8965 
8966 			memcpy(_end, begin_, count * sizeof(xpath_node));
8967 			_end += count;
8968 		}
8969 
sort_do()8970 		void sort_do()
8971 		{
8972 			_type = xpath_sort(_begin, _end, _type, false);
8973 		}
8974 
truncate(xpath_node * pos)8975 		void truncate(xpath_node* pos)
8976 		{
8977 			assert(_begin <= pos && pos <= _end);
8978 
8979 			_end = pos;
8980 		}
8981 
remove_duplicates(xpath_allocator * alloc)8982 		void remove_duplicates(xpath_allocator* alloc)
8983 		{
8984 			if (_type == xpath_node_set::type_unsorted && _end - _begin > 2)
8985 			{
8986 				xpath_allocator_capture cr(alloc);
8987 
8988 				size_t size_ = static_cast<size_t>(_end - _begin);
8989 
8990 				size_t hash_size = 1;
8991 				while (hash_size < size_ + size_ / 2) hash_size *= 2;
8992 
8993 				const void** hash_data = static_cast<const void**>(alloc->allocate(hash_size * sizeof(void**)));
8994 				if (!hash_data) return;
8995 
8996 				memset(hash_data, 0, hash_size * sizeof(const void**));
8997 
8998 				xpath_node* write = _begin;
8999 
9000 				for (xpath_node* it = _begin; it != _end; ++it)
9001 				{
9002 					const void* attr = it->attribute().internal_object();
9003 					const void* node = it->node().internal_object();
9004 					const void* key = attr ? attr : node;
9005 
9006 					if (key && hash_insert(hash_data, hash_size, key))
9007 					{
9008 						*write++ = *it;
9009 					}
9010 				}
9011 
9012 				_end = write;
9013 			}
9014 			else
9015 			{
9016 				_end = unique(_begin, _end);
9017 			}
9018 		}
9019 
type() const9020 		xpath_node_set::type_t type() const
9021 		{
9022 			return _type;
9023 		}
9024 
set_type(xpath_node_set::type_t value)9025 		void set_type(xpath_node_set::type_t value)
9026 		{
9027 			_type = value;
9028 		}
9029 	};
9030 
push_back_grow(const xpath_node & node,xpath_allocator * alloc)9031 	PUGI__FN_NO_INLINE void xpath_node_set_raw::push_back_grow(const xpath_node& node, xpath_allocator* alloc)
9032 	{
9033 		size_t capacity = static_cast<size_t>(_eos - _begin);
9034 
9035 		// get new capacity (1.5x rule)
9036 		size_t new_capacity = capacity + capacity / 2 + 1;
9037 
9038 		// reallocate the old array or allocate a new one
9039 		xpath_node* data = static_cast<xpath_node*>(alloc->reallocate(_begin, capacity * sizeof(xpath_node), new_capacity * sizeof(xpath_node)));
9040 		if (!data) return;
9041 
9042 		// finalize
9043 		_begin = data;
9044 		_end = data + capacity;
9045 		_eos = data + new_capacity;
9046 
9047 		// push
9048 		*_end++ = node;
9049 	}
9050 PUGI__NS_END
9051 
9052 PUGI__NS_BEGIN
9053 	struct xpath_context
9054 	{
9055 		xpath_node n;
9056 		size_t position, size;
9057 
xpath_contextxpath_context9058 		xpath_context(const xpath_node& n_, size_t position_, size_t size_): n(n_), position(position_), size(size_)
9059 		{
9060 		}
9061 	};
9062 
9063 	enum lexeme_t
9064 	{
9065 		lex_none = 0,
9066 		lex_equal,
9067 		lex_not_equal,
9068 		lex_less,
9069 		lex_greater,
9070 		lex_less_or_equal,
9071 		lex_greater_or_equal,
9072 		lex_plus,
9073 		lex_minus,
9074 		lex_multiply,
9075 		lex_union,
9076 		lex_var_ref,
9077 		lex_open_brace,
9078 		lex_close_brace,
9079 		lex_quoted_string,
9080 		lex_number,
9081 		lex_slash,
9082 		lex_double_slash,
9083 		lex_open_square_brace,
9084 		lex_close_square_brace,
9085 		lex_string,
9086 		lex_comma,
9087 		lex_axis_attribute,
9088 		lex_dot,
9089 		lex_double_dot,
9090 		lex_double_colon,
9091 		lex_eof
9092 	};
9093 
9094 	struct xpath_lexer_string
9095 	{
9096 		const char_t* begin;
9097 		const char_t* end;
9098 
xpath_lexer_stringxpath_lexer_string9099 		xpath_lexer_string(): begin(0), end(0)
9100 		{
9101 		}
9102 
operator ==xpath_lexer_string9103 		bool operator==(const char_t* other) const
9104 		{
9105 			size_t length = static_cast<size_t>(end - begin);
9106 
9107 			return strequalrange(other, begin, length);
9108 		}
9109 	};
9110 
9111 	class xpath_lexer
9112 	{
9113 		const char_t* _cur;
9114 		const char_t* _cur_lexeme_pos;
9115 		xpath_lexer_string _cur_lexeme_contents;
9116 
9117 		lexeme_t _cur_lexeme;
9118 
9119 	public:
xpath_lexer(const char_t * query)9120 		explicit xpath_lexer(const char_t* query): _cur(query)
9121 		{
9122 			next();
9123 		}
9124 
state() const9125 		const char_t* state() const
9126 		{
9127 			return _cur;
9128 		}
9129 
next()9130 		void next()
9131 		{
9132 			const char_t* cur = _cur;
9133 
9134 			while (PUGI__IS_CHARTYPE(*cur, ct_space)) ++cur;
9135 
9136 			// save lexeme position for error reporting
9137 			_cur_lexeme_pos = cur;
9138 
9139 			switch (*cur)
9140 			{
9141 			case 0:
9142 				_cur_lexeme = lex_eof;
9143 				break;
9144 
9145 			case '>':
9146 				if (*(cur+1) == '=')
9147 				{
9148 					cur += 2;
9149 					_cur_lexeme = lex_greater_or_equal;
9150 				}
9151 				else
9152 				{
9153 					cur += 1;
9154 					_cur_lexeme = lex_greater;
9155 				}
9156 				break;
9157 
9158 			case '<':
9159 				if (*(cur+1) == '=')
9160 				{
9161 					cur += 2;
9162 					_cur_lexeme = lex_less_or_equal;
9163 				}
9164 				else
9165 				{
9166 					cur += 1;
9167 					_cur_lexeme = lex_less;
9168 				}
9169 				break;
9170 
9171 			case '!':
9172 				if (*(cur+1) == '=')
9173 				{
9174 					cur += 2;
9175 					_cur_lexeme = lex_not_equal;
9176 				}
9177 				else
9178 				{
9179 					_cur_lexeme = lex_none;
9180 				}
9181 				break;
9182 
9183 			case '=':
9184 				cur += 1;
9185 				_cur_lexeme = lex_equal;
9186 
9187 				break;
9188 
9189 			case '+':
9190 				cur += 1;
9191 				_cur_lexeme = lex_plus;
9192 
9193 				break;
9194 
9195 			case '-':
9196 				cur += 1;
9197 				_cur_lexeme = lex_minus;
9198 
9199 				break;
9200 
9201 			case '*':
9202 				cur += 1;
9203 				_cur_lexeme = lex_multiply;
9204 
9205 				break;
9206 
9207 			case '|':
9208 				cur += 1;
9209 				_cur_lexeme = lex_union;
9210 
9211 				break;
9212 
9213 			case '$':
9214 				cur += 1;
9215 
9216 				if (PUGI__IS_CHARTYPEX(*cur, ctx_start_symbol))
9217 				{
9218 					_cur_lexeme_contents.begin = cur;
9219 
9220 					while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
9221 
9222 					if (cur[0] == ':' && PUGI__IS_CHARTYPEX(cur[1], ctx_symbol)) // qname
9223 					{
9224 						cur++; // :
9225 
9226 						while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
9227 					}
9228 
9229 					_cur_lexeme_contents.end = cur;
9230 
9231 					_cur_lexeme = lex_var_ref;
9232 				}
9233 				else
9234 				{
9235 					_cur_lexeme = lex_none;
9236 				}
9237 
9238 				break;
9239 
9240 			case '(':
9241 				cur += 1;
9242 				_cur_lexeme = lex_open_brace;
9243 
9244 				break;
9245 
9246 			case ')':
9247 				cur += 1;
9248 				_cur_lexeme = lex_close_brace;
9249 
9250 				break;
9251 
9252 			case '[':
9253 				cur += 1;
9254 				_cur_lexeme = lex_open_square_brace;
9255 
9256 				break;
9257 
9258 			case ']':
9259 				cur += 1;
9260 				_cur_lexeme = lex_close_square_brace;
9261 
9262 				break;
9263 
9264 			case ',':
9265 				cur += 1;
9266 				_cur_lexeme = lex_comma;
9267 
9268 				break;
9269 
9270 			case '/':
9271 				if (*(cur+1) == '/')
9272 				{
9273 					cur += 2;
9274 					_cur_lexeme = lex_double_slash;
9275 				}
9276 				else
9277 				{
9278 					cur += 1;
9279 					_cur_lexeme = lex_slash;
9280 				}
9281 				break;
9282 
9283 			case '.':
9284 				if (*(cur+1) == '.')
9285 				{
9286 					cur += 2;
9287 					_cur_lexeme = lex_double_dot;
9288 				}
9289 				else if (PUGI__IS_CHARTYPEX(*(cur+1), ctx_digit))
9290 				{
9291 					_cur_lexeme_contents.begin = cur; // .
9292 
9293 					++cur;
9294 
9295 					while (PUGI__IS_CHARTYPEX(*cur, ctx_digit)) cur++;
9296 
9297 					_cur_lexeme_contents.end = cur;
9298 
9299 					_cur_lexeme = lex_number;
9300 				}
9301 				else
9302 				{
9303 					cur += 1;
9304 					_cur_lexeme = lex_dot;
9305 				}
9306 				break;
9307 
9308 			case '@':
9309 				cur += 1;
9310 				_cur_lexeme = lex_axis_attribute;
9311 
9312 				break;
9313 
9314 			case '"':
9315 			case '\'':
9316 			{
9317 				char_t terminator = *cur;
9318 
9319 				++cur;
9320 
9321 				_cur_lexeme_contents.begin = cur;
9322 				while (*cur && *cur != terminator) cur++;
9323 				_cur_lexeme_contents.end = cur;
9324 
9325 				if (!*cur)
9326 					_cur_lexeme = lex_none;
9327 				else
9328 				{
9329 					cur += 1;
9330 					_cur_lexeme = lex_quoted_string;
9331 				}
9332 
9333 				break;
9334 			}
9335 
9336 			case ':':
9337 				if (*(cur+1) == ':')
9338 				{
9339 					cur += 2;
9340 					_cur_lexeme = lex_double_colon;
9341 				}
9342 				else
9343 				{
9344 					_cur_lexeme = lex_none;
9345 				}
9346 				break;
9347 
9348 			default:
9349 				if (PUGI__IS_CHARTYPEX(*cur, ctx_digit))
9350 				{
9351 					_cur_lexeme_contents.begin = cur;
9352 
9353 					while (PUGI__IS_CHARTYPEX(*cur, ctx_digit)) cur++;
9354 
9355 					if (*cur == '.')
9356 					{
9357 						cur++;
9358 
9359 						while (PUGI__IS_CHARTYPEX(*cur, ctx_digit)) cur++;
9360 					}
9361 
9362 					_cur_lexeme_contents.end = cur;
9363 
9364 					_cur_lexeme = lex_number;
9365 				}
9366 				else if (PUGI__IS_CHARTYPEX(*cur, ctx_start_symbol))
9367 				{
9368 					_cur_lexeme_contents.begin = cur;
9369 
9370 					while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
9371 
9372 					if (cur[0] == ':')
9373 					{
9374 						if (cur[1] == '*') // namespace test ncname:*
9375 						{
9376 							cur += 2; // :*
9377 						}
9378 						else if (PUGI__IS_CHARTYPEX(cur[1], ctx_symbol)) // namespace test qname
9379 						{
9380 							cur++; // :
9381 
9382 							while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
9383 						}
9384 					}
9385 
9386 					_cur_lexeme_contents.end = cur;
9387 
9388 					_cur_lexeme = lex_string;
9389 				}
9390 				else
9391 				{
9392 					_cur_lexeme = lex_none;
9393 				}
9394 			}
9395 
9396 			_cur = cur;
9397 		}
9398 
current() const9399 		lexeme_t current() const
9400 		{
9401 			return _cur_lexeme;
9402 		}
9403 
current_pos() const9404 		const char_t* current_pos() const
9405 		{
9406 			return _cur_lexeme_pos;
9407 		}
9408 
contents() const9409 		const xpath_lexer_string& contents() const
9410 		{
9411 			assert(_cur_lexeme == lex_var_ref || _cur_lexeme == lex_number || _cur_lexeme == lex_string || _cur_lexeme == lex_quoted_string);
9412 
9413 			return _cur_lexeme_contents;
9414 		}
9415 	};
9416 
9417 	enum ast_type_t
9418 	{
9419 		ast_unknown,
9420 		ast_op_or,						// left or right
9421 		ast_op_and,						// left and right
9422 		ast_op_equal,					// left = right
9423 		ast_op_not_equal,				// left != right
9424 		ast_op_less,					// left < right
9425 		ast_op_greater,					// left > right
9426 		ast_op_less_or_equal,			// left <= right
9427 		ast_op_greater_or_equal,		// left >= right
9428 		ast_op_add,						// left + right
9429 		ast_op_subtract,				// left - right
9430 		ast_op_multiply,				// left * right
9431 		ast_op_divide,					// left / right
9432 		ast_op_mod,						// left % right
9433 		ast_op_negate,					// left - right
9434 		ast_op_union,					// left | right
9435 		ast_predicate,					// apply predicate to set; next points to next predicate
9436 		ast_filter,						// select * from left where right
9437 		ast_string_constant,			// string constant
9438 		ast_number_constant,			// number constant
9439 		ast_variable,					// variable
9440 		ast_func_last,					// last()
9441 		ast_func_position,				// position()
9442 		ast_func_count,					// count(left)
9443 		ast_func_id,					// id(left)
9444 		ast_func_local_name_0,			// local-name()
9445 		ast_func_local_name_1,			// local-name(left)
9446 		ast_func_namespace_uri_0,		// namespace-uri()
9447 		ast_func_namespace_uri_1,		// namespace-uri(left)
9448 		ast_func_name_0,				// name()
9449 		ast_func_name_1,				// name(left)
9450 		ast_func_string_0,				// string()
9451 		ast_func_string_1,				// string(left)
9452 		ast_func_concat,				// concat(left, right, siblings)
9453 		ast_func_starts_with,			// starts_with(left, right)
9454 		ast_func_contains,				// contains(left, right)
9455 		ast_func_substring_before,		// substring-before(left, right)
9456 		ast_func_substring_after,		// substring-after(left, right)
9457 		ast_func_substring_2,			// substring(left, right)
9458 		ast_func_substring_3,			// substring(left, right, third)
9459 		ast_func_string_length_0,		// string-length()
9460 		ast_func_string_length_1,		// string-length(left)
9461 		ast_func_normalize_space_0,		// normalize-space()
9462 		ast_func_normalize_space_1,		// normalize-space(left)
9463 		ast_func_translate,				// translate(left, right, third)
9464 		ast_func_boolean,				// boolean(left)
9465 		ast_func_not,					// not(left)
9466 		ast_func_true,					// true()
9467 		ast_func_false,					// false()
9468 		ast_func_lang,					// lang(left)
9469 		ast_func_number_0,				// number()
9470 		ast_func_number_1,				// number(left)
9471 		ast_func_sum,					// sum(left)
9472 		ast_func_floor,					// floor(left)
9473 		ast_func_ceiling,				// ceiling(left)
9474 		ast_func_round,					// round(left)
9475 		ast_step,						// process set left with step
9476 		ast_step_root,					// select root node
9477 
9478 		ast_opt_translate_table,		// translate(left, right, third) where right/third are constants
9479 		ast_opt_compare_attribute		// @name = 'string'
9480 	};
9481 
9482 	enum axis_t
9483 	{
9484 		axis_ancestor,
9485 		axis_ancestor_or_self,
9486 		axis_attribute,
9487 		axis_child,
9488 		axis_descendant,
9489 		axis_descendant_or_self,
9490 		axis_following,
9491 		axis_following_sibling,
9492 		axis_namespace,
9493 		axis_parent,
9494 		axis_preceding,
9495 		axis_preceding_sibling,
9496 		axis_self
9497 	};
9498 
9499 	enum nodetest_t
9500 	{
9501 		nodetest_none,
9502 		nodetest_name,
9503 		nodetest_type_node,
9504 		nodetest_type_comment,
9505 		nodetest_type_pi,
9506 		nodetest_type_text,
9507 		nodetest_pi,
9508 		nodetest_all,
9509 		nodetest_all_in_namespace
9510 	};
9511 
9512 	enum predicate_t
9513 	{
9514 		predicate_default,
9515 		predicate_posinv,
9516 		predicate_constant,
9517 		predicate_constant_one
9518 	};
9519 
9520 	enum nodeset_eval_t
9521 	{
9522 		nodeset_eval_all,
9523 		nodeset_eval_any,
9524 		nodeset_eval_first
9525 	};
9526 
9527 	template <axis_t N> struct axis_to_type
9528 	{
9529 		static const axis_t axis;
9530 	};
9531 
9532 	template <axis_t N> const axis_t axis_to_type<N>::axis = N;
9533 
9534 	class xpath_ast_node
9535 	{
9536 	private:
9537 		// node type
9538 		char _type;
9539 		char _rettype;
9540 
9541 		// for ast_step
9542 		char _axis;
9543 
9544 		// for ast_step/ast_predicate/ast_filter
9545 		char _test;
9546 
9547 		// tree node structure
9548 		xpath_ast_node* _left;
9549 		xpath_ast_node* _right;
9550 		xpath_ast_node* _next;
9551 
9552 		union
9553 		{
9554 			// value for ast_string_constant
9555 			const char_t* string;
9556 			// value for ast_number_constant
9557 			double number;
9558 			// variable for ast_variable
9559 			xpath_variable* variable;
9560 			// node test for ast_step (node name/namespace/node type/pi target)
9561 			const char_t* nodetest;
9562 			// table for ast_opt_translate_table
9563 			const unsigned char* table;
9564 		} _data;
9565 
9566 		xpath_ast_node(const xpath_ast_node&);
9567 		xpath_ast_node& operator=(const xpath_ast_node&);
9568 
compare_eq(xpath_ast_node * lhs,xpath_ast_node * rhs,const xpath_context & c,const xpath_stack & stack,const Comp & comp)9569 		template <class Comp> static bool compare_eq(xpath_ast_node* lhs, xpath_ast_node* rhs, const xpath_context& c, const xpath_stack& stack, const Comp& comp)
9570 		{
9571 			xpath_value_type lt = lhs->rettype(), rt = rhs->rettype();
9572 
9573 			if (lt != xpath_type_node_set && rt != xpath_type_node_set)
9574 			{
9575 				if (lt == xpath_type_boolean || rt == xpath_type_boolean)
9576 					return comp(lhs->eval_boolean(c, stack), rhs->eval_boolean(c, stack));
9577 				else if (lt == xpath_type_number || rt == xpath_type_number)
9578 					return comp(lhs->eval_number(c, stack), rhs->eval_number(c, stack));
9579 				else if (lt == xpath_type_string || rt == xpath_type_string)
9580 				{
9581 					xpath_allocator_capture cr(stack.result);
9582 
9583 					xpath_string ls = lhs->eval_string(c, stack);
9584 					xpath_string rs = rhs->eval_string(c, stack);
9585 
9586 					return comp(ls, rs);
9587 				}
9588 			}
9589 			else if (lt == xpath_type_node_set && rt == xpath_type_node_set)
9590 			{
9591 				xpath_allocator_capture cr(stack.result);
9592 
9593 				xpath_node_set_raw ls = lhs->eval_node_set(c, stack, nodeset_eval_all);
9594 				xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all);
9595 
9596 				for (const xpath_node* li = ls.begin(); li != ls.end(); ++li)
9597 					for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
9598 					{
9599 						xpath_allocator_capture cri(stack.result);
9600 
9601 						if (comp(string_value(*li, stack.result), string_value(*ri, stack.result)))
9602 							return true;
9603 					}
9604 
9605 				return false;
9606 			}
9607 			else
9608 			{
9609 				if (lt == xpath_type_node_set)
9610 				{
9611 					swap(lhs, rhs);
9612 					swap(lt, rt);
9613 				}
9614 
9615 				if (lt == xpath_type_boolean)
9616 					return comp(lhs->eval_boolean(c, stack), rhs->eval_boolean(c, stack));
9617 				else if (lt == xpath_type_number)
9618 				{
9619 					xpath_allocator_capture cr(stack.result);
9620 
9621 					double l = lhs->eval_number(c, stack);
9622 					xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all);
9623 
9624 					for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
9625 					{
9626 						xpath_allocator_capture cri(stack.result);
9627 
9628 						if (comp(l, convert_string_to_number(string_value(*ri, stack.result).c_str())))
9629 							return true;
9630 					}
9631 
9632 					return false;
9633 				}
9634 				else if (lt == xpath_type_string)
9635 				{
9636 					xpath_allocator_capture cr(stack.result);
9637 
9638 					xpath_string l = lhs->eval_string(c, stack);
9639 					xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all);
9640 
9641 					for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
9642 					{
9643 						xpath_allocator_capture cri(stack.result);
9644 
9645 						if (comp(l, string_value(*ri, stack.result)))
9646 							return true;
9647 					}
9648 
9649 					return false;
9650 				}
9651 			}
9652 
9653 			assert(false && "Wrong types"); // unreachable
9654 			return false;
9655 		}
9656 
eval_once(xpath_node_set::type_t type,nodeset_eval_t eval)9657 		static bool eval_once(xpath_node_set::type_t type, nodeset_eval_t eval)
9658 		{
9659 			return type == xpath_node_set::type_sorted ? eval != nodeset_eval_all : eval == nodeset_eval_any;
9660 		}
9661 
compare_rel(xpath_ast_node * lhs,xpath_ast_node * rhs,const xpath_context & c,const xpath_stack & stack,const Comp & comp)9662 		template <class Comp> static bool compare_rel(xpath_ast_node* lhs, xpath_ast_node* rhs, const xpath_context& c, const xpath_stack& stack, const Comp& comp)
9663 		{
9664 			xpath_value_type lt = lhs->rettype(), rt = rhs->rettype();
9665 
9666 			if (lt != xpath_type_node_set && rt != xpath_type_node_set)
9667 				return comp(lhs->eval_number(c, stack), rhs->eval_number(c, stack));
9668 			else if (lt == xpath_type_node_set && rt == xpath_type_node_set)
9669 			{
9670 				xpath_allocator_capture cr(stack.result);
9671 
9672 				xpath_node_set_raw ls = lhs->eval_node_set(c, stack, nodeset_eval_all);
9673 				xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all);
9674 
9675 				for (const xpath_node* li = ls.begin(); li != ls.end(); ++li)
9676 				{
9677 					xpath_allocator_capture cri(stack.result);
9678 
9679 					double l = convert_string_to_number(string_value(*li, stack.result).c_str());
9680 
9681 					for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
9682 					{
9683 						xpath_allocator_capture crii(stack.result);
9684 
9685 						if (comp(l, convert_string_to_number(string_value(*ri, stack.result).c_str())))
9686 							return true;
9687 					}
9688 				}
9689 
9690 				return false;
9691 			}
9692 			else if (lt != xpath_type_node_set && rt == xpath_type_node_set)
9693 			{
9694 				xpath_allocator_capture cr(stack.result);
9695 
9696 				double l = lhs->eval_number(c, stack);
9697 				xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all);
9698 
9699 				for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
9700 				{
9701 					xpath_allocator_capture cri(stack.result);
9702 
9703 					if (comp(l, convert_string_to_number(string_value(*ri, stack.result).c_str())))
9704 						return true;
9705 				}
9706 
9707 				return false;
9708 			}
9709 			else if (lt == xpath_type_node_set && rt != xpath_type_node_set)
9710 			{
9711 				xpath_allocator_capture cr(stack.result);
9712 
9713 				xpath_node_set_raw ls = lhs->eval_node_set(c, stack, nodeset_eval_all);
9714 				double r = rhs->eval_number(c, stack);
9715 
9716 				for (const xpath_node* li = ls.begin(); li != ls.end(); ++li)
9717 				{
9718 					xpath_allocator_capture cri(stack.result);
9719 
9720 					if (comp(convert_string_to_number(string_value(*li, stack.result).c_str()), r))
9721 						return true;
9722 				}
9723 
9724 				return false;
9725 			}
9726 			else
9727 			{
9728 				assert(false && "Wrong types"); // unreachable
9729 				return false;
9730 			}
9731 		}
9732 
apply_predicate_boolean(xpath_node_set_raw & ns,size_t first,xpath_ast_node * expr,const xpath_stack & stack,bool once)9733 		static void apply_predicate_boolean(xpath_node_set_raw& ns, size_t first, xpath_ast_node* expr, const xpath_stack& stack, bool once)
9734 		{
9735 			assert(ns.size() >= first);
9736 			assert(expr->rettype() != xpath_type_number);
9737 
9738 			size_t i = 1;
9739 			size_t size = ns.size() - first;
9740 
9741 			xpath_node* last = ns.begin() + first;
9742 
9743 			// remove_if... or well, sort of
9744 			for (xpath_node* it = last; it != ns.end(); ++it, ++i)
9745 			{
9746 				xpath_context c(*it, i, size);
9747 
9748 				if (expr->eval_boolean(c, stack))
9749 				{
9750 					*last++ = *it;
9751 
9752 					if (once) break;
9753 				}
9754 			}
9755 
9756 			ns.truncate(last);
9757 		}
9758 
apply_predicate_number(xpath_node_set_raw & ns,size_t first,xpath_ast_node * expr,const xpath_stack & stack,bool once)9759 		static void apply_predicate_number(xpath_node_set_raw& ns, size_t first, xpath_ast_node* expr, const xpath_stack& stack, bool once)
9760 		{
9761 			assert(ns.size() >= first);
9762 			assert(expr->rettype() == xpath_type_number);
9763 
9764 			size_t i = 1;
9765 			size_t size = ns.size() - first;
9766 
9767 			xpath_node* last = ns.begin() + first;
9768 
9769 			// remove_if... or well, sort of
9770 			for (xpath_node* it = last; it != ns.end(); ++it, ++i)
9771 			{
9772 				xpath_context c(*it, i, size);
9773 
9774 				if (expr->eval_number(c, stack) == static_cast<double>(i))
9775 				{
9776 					*last++ = *it;
9777 
9778 					if (once) break;
9779 				}
9780 			}
9781 
9782 			ns.truncate(last);
9783 		}
9784 
apply_predicate_number_const(xpath_node_set_raw & ns,size_t first,xpath_ast_node * expr,const xpath_stack & stack)9785 		static void apply_predicate_number_const(xpath_node_set_raw& ns, size_t first, xpath_ast_node* expr, const xpath_stack& stack)
9786 		{
9787 			assert(ns.size() >= first);
9788 			assert(expr->rettype() == xpath_type_number);
9789 
9790 			size_t size = ns.size() - first;
9791 
9792 			xpath_node* last = ns.begin() + first;
9793 
9794 			xpath_context c(xpath_node(), 1, size);
9795 
9796 			double er = expr->eval_number(c, stack);
9797 
9798 			if (er >= 1.0 && er <= static_cast<double>(size))
9799 			{
9800 				size_t eri = static_cast<size_t>(er);
9801 
9802 				if (er == static_cast<double>(eri))
9803 				{
9804 					xpath_node r = last[eri - 1];
9805 
9806 					*last++ = r;
9807 				}
9808 			}
9809 
9810 			ns.truncate(last);
9811 		}
9812 
apply_predicate(xpath_node_set_raw & ns,size_t first,const xpath_stack & stack,bool once)9813 		void apply_predicate(xpath_node_set_raw& ns, size_t first, const xpath_stack& stack, bool once)
9814 		{
9815 			if (ns.size() == first) return;
9816 
9817 			assert(_type == ast_filter || _type == ast_predicate);
9818 
9819 			if (_test == predicate_constant || _test == predicate_constant_one)
9820 				apply_predicate_number_const(ns, first, _right, stack);
9821 			else if (_right->rettype() == xpath_type_number)
9822 				apply_predicate_number(ns, first, _right, stack, once);
9823 			else
9824 				apply_predicate_boolean(ns, first, _right, stack, once);
9825 		}
9826 
apply_predicates(xpath_node_set_raw & ns,size_t first,const xpath_stack & stack,nodeset_eval_t eval)9827 		void apply_predicates(xpath_node_set_raw& ns, size_t first, const xpath_stack& stack, nodeset_eval_t eval)
9828 		{
9829 			if (ns.size() == first) return;
9830 
9831 			bool last_once = eval_once(ns.type(), eval);
9832 
9833 			for (xpath_ast_node* pred = _right; pred; pred = pred->_next)
9834 				pred->apply_predicate(ns, first, stack, !pred->_next && last_once);
9835 		}
9836 
step_push(xpath_node_set_raw & ns,xml_attribute_struct * a,xml_node_struct * parent,xpath_allocator * alloc)9837 		bool step_push(xpath_node_set_raw& ns, xml_attribute_struct* a, xml_node_struct* parent, xpath_allocator* alloc)
9838 		{
9839 			assert(a);
9840 
9841 			const char_t* name = a->name ? a->name + 0 : PUGIXML_TEXT("");
9842 
9843 			switch (_test)
9844 			{
9845 			case nodetest_name:
9846 				if (strequal(name, _data.nodetest) && is_xpath_attribute(name))
9847 				{
9848 					ns.push_back(xpath_node(xml_attribute(a), xml_node(parent)), alloc);
9849 					return true;
9850 				}
9851 				break;
9852 
9853 			case nodetest_type_node:
9854 			case nodetest_all:
9855 				if (is_xpath_attribute(name))
9856 				{
9857 					ns.push_back(xpath_node(xml_attribute(a), xml_node(parent)), alloc);
9858 					return true;
9859 				}
9860 				break;
9861 
9862 			case nodetest_all_in_namespace:
9863 				if (starts_with(name, _data.nodetest) && is_xpath_attribute(name))
9864 				{
9865 					ns.push_back(xpath_node(xml_attribute(a), xml_node(parent)), alloc);
9866 					return true;
9867 				}
9868 				break;
9869 
9870 			default:
9871 				;
9872 			}
9873 
9874 			return false;
9875 		}
9876 
step_push(xpath_node_set_raw & ns,xml_node_struct * n,xpath_allocator * alloc)9877 		bool step_push(xpath_node_set_raw& ns, xml_node_struct* n, xpath_allocator* alloc)
9878 		{
9879 			assert(n);
9880 
9881 			xml_node_type type = PUGI__NODETYPE(n);
9882 
9883 			switch (_test)
9884 			{
9885 			case nodetest_name:
9886 				if (type == node_element && n->name && strequal(n->name, _data.nodetest))
9887 				{
9888 					ns.push_back(xml_node(n), alloc);
9889 					return true;
9890 				}
9891 				break;
9892 
9893 			case nodetest_type_node:
9894 				ns.push_back(xml_node(n), alloc);
9895 				return true;
9896 
9897 			case nodetest_type_comment:
9898 				if (type == node_comment)
9899 				{
9900 					ns.push_back(xml_node(n), alloc);
9901 					return true;
9902 				}
9903 				break;
9904 
9905 			case nodetest_type_text:
9906 				if (type == node_pcdata || type == node_cdata)
9907 				{
9908 					ns.push_back(xml_node(n), alloc);
9909 					return true;
9910 				}
9911 				break;
9912 
9913 			case nodetest_type_pi:
9914 				if (type == node_pi)
9915 				{
9916 					ns.push_back(xml_node(n), alloc);
9917 					return true;
9918 				}
9919 				break;
9920 
9921 			case nodetest_pi:
9922 				if (type == node_pi && n->name && strequal(n->name, _data.nodetest))
9923 				{
9924 					ns.push_back(xml_node(n), alloc);
9925 					return true;
9926 				}
9927 				break;
9928 
9929 			case nodetest_all:
9930 				if (type == node_element)
9931 				{
9932 					ns.push_back(xml_node(n), alloc);
9933 					return true;
9934 				}
9935 				break;
9936 
9937 			case nodetest_all_in_namespace:
9938 				if (type == node_element && n->name && starts_with(n->name, _data.nodetest))
9939 				{
9940 					ns.push_back(xml_node(n), alloc);
9941 					return true;
9942 				}
9943 				break;
9944 
9945 			default:
9946 				assert(false && "Unknown axis"); // unreachable
9947 			}
9948 
9949 			return false;
9950 		}
9951 
step_fill(xpath_node_set_raw & ns,xml_node_struct * n,xpath_allocator * alloc,bool once,T)9952 		template <class T> void step_fill(xpath_node_set_raw& ns, xml_node_struct* n, xpath_allocator* alloc, bool once, T)
9953 		{
9954 			const axis_t axis = T::axis;
9955 
9956 			switch (axis)
9957 			{
9958 			case axis_attribute:
9959 			{
9960 				for (xml_attribute_struct* a = n->first_attribute; a; a = a->next_attribute)
9961 					if (step_push(ns, a, n, alloc) & once)
9962 						return;
9963 
9964 				break;
9965 			}
9966 
9967 			case axis_child:
9968 			{
9969 				for (xml_node_struct* c = n->first_child; c; c = c->next_sibling)
9970 					if (step_push(ns, c, alloc) & once)
9971 						return;
9972 
9973 				break;
9974 			}
9975 
9976 			case axis_descendant:
9977 			case axis_descendant_or_self:
9978 			{
9979 				if (axis == axis_descendant_or_self)
9980 					if (step_push(ns, n, alloc) & once)
9981 						return;
9982 
9983 				xml_node_struct* cur = n->first_child;
9984 
9985 				while (cur)
9986 				{
9987 					if (step_push(ns, cur, alloc) & once)
9988 						return;
9989 
9990 					if (cur->first_child)
9991 						cur = cur->first_child;
9992 					else
9993 					{
9994 						while (!cur->next_sibling)
9995 						{
9996 							cur = cur->parent;
9997 
9998 							if (cur == n) return;
9999 						}
10000 
10001 						cur = cur->next_sibling;
10002 					}
10003 				}
10004 
10005 				break;
10006 			}
10007 
10008 			case axis_following_sibling:
10009 			{
10010 				for (xml_node_struct* c = n->next_sibling; c; c = c->next_sibling)
10011 					if (step_push(ns, c, alloc) & once)
10012 						return;
10013 
10014 				break;
10015 			}
10016 
10017 			case axis_preceding_sibling:
10018 			{
10019 				for (xml_node_struct* c = n->prev_sibling_c; c->next_sibling; c = c->prev_sibling_c)
10020 					if (step_push(ns, c, alloc) & once)
10021 						return;
10022 
10023 				break;
10024 			}
10025 
10026 			case axis_following:
10027 			{
10028 				xml_node_struct* cur = n;
10029 
10030 				// exit from this node so that we don't include descendants
10031 				while (!cur->next_sibling)
10032 				{
10033 					cur = cur->parent;
10034 
10035 					if (!cur) return;
10036 				}
10037 
10038 				cur = cur->next_sibling;
10039 
10040 				while (cur)
10041 				{
10042 					if (step_push(ns, cur, alloc) & once)
10043 						return;
10044 
10045 					if (cur->first_child)
10046 						cur = cur->first_child;
10047 					else
10048 					{
10049 						while (!cur->next_sibling)
10050 						{
10051 							cur = cur->parent;
10052 
10053 							if (!cur) return;
10054 						}
10055 
10056 						cur = cur->next_sibling;
10057 					}
10058 				}
10059 
10060 				break;
10061 			}
10062 
10063 			case axis_preceding:
10064 			{
10065 				xml_node_struct* cur = n;
10066 
10067 				// exit from this node so that we don't include descendants
10068 				while (!cur->prev_sibling_c->next_sibling)
10069 				{
10070 					cur = cur->parent;
10071 
10072 					if (!cur) return;
10073 				}
10074 
10075 				cur = cur->prev_sibling_c;
10076 
10077 				while (cur)
10078 				{
10079 					if (cur->first_child)
10080 						cur = cur->first_child->prev_sibling_c;
10081 					else
10082 					{
10083 						// leaf node, can't be ancestor
10084 						if (step_push(ns, cur, alloc) & once)
10085 							return;
10086 
10087 						while (!cur->prev_sibling_c->next_sibling)
10088 						{
10089 							cur = cur->parent;
10090 
10091 							if (!cur) return;
10092 
10093 							if (!node_is_ancestor(cur, n))
10094 								if (step_push(ns, cur, alloc) & once)
10095 									return;
10096 						}
10097 
10098 						cur = cur->prev_sibling_c;
10099 					}
10100 				}
10101 
10102 				break;
10103 			}
10104 
10105 			case axis_ancestor:
10106 			case axis_ancestor_or_self:
10107 			{
10108 				if (axis == axis_ancestor_or_self)
10109 					if (step_push(ns, n, alloc) & once)
10110 						return;
10111 
10112 				xml_node_struct* cur = n->parent;
10113 
10114 				while (cur)
10115 				{
10116 					if (step_push(ns, cur, alloc) & once)
10117 						return;
10118 
10119 					cur = cur->parent;
10120 				}
10121 
10122 				break;
10123 			}
10124 
10125 			case axis_self:
10126 			{
10127 				step_push(ns, n, alloc);
10128 
10129 				break;
10130 			}
10131 
10132 			case axis_parent:
10133 			{
10134 				if (n->parent)
10135 					step_push(ns, n->parent, alloc);
10136 
10137 				break;
10138 			}
10139 
10140 			default:
10141 				assert(false && "Unimplemented axis"); // unreachable
10142 			}
10143 		}
10144 
step_fill(xpath_node_set_raw & ns,xml_attribute_struct * a,xml_node_struct * p,xpath_allocator * alloc,bool once,T v)10145 		template <class T> void step_fill(xpath_node_set_raw& ns, xml_attribute_struct* a, xml_node_struct* p, xpath_allocator* alloc, bool once, T v)
10146 		{
10147 			const axis_t axis = T::axis;
10148 
10149 			switch (axis)
10150 			{
10151 			case axis_ancestor:
10152 			case axis_ancestor_or_self:
10153 			{
10154 				if (axis == axis_ancestor_or_self && _test == nodetest_type_node) // reject attributes based on principal node type test
10155 					if (step_push(ns, a, p, alloc) & once)
10156 						return;
10157 
10158 				xml_node_struct* cur = p;
10159 
10160 				while (cur)
10161 				{
10162 					if (step_push(ns, cur, alloc) & once)
10163 						return;
10164 
10165 					cur = cur->parent;
10166 				}
10167 
10168 				break;
10169 			}
10170 
10171 			case axis_descendant_or_self:
10172 			case axis_self:
10173 			{
10174 				if (_test == nodetest_type_node) // reject attributes based on principal node type test
10175 					step_push(ns, a, p, alloc);
10176 
10177 				break;
10178 			}
10179 
10180 			case axis_following:
10181 			{
10182 				xml_node_struct* cur = p;
10183 
10184 				while (cur)
10185 				{
10186 					if (cur->first_child)
10187 						cur = cur->first_child;
10188 					else
10189 					{
10190 						while (!cur->next_sibling)
10191 						{
10192 							cur = cur->parent;
10193 
10194 							if (!cur) return;
10195 						}
10196 
10197 						cur = cur->next_sibling;
10198 					}
10199 
10200 					if (step_push(ns, cur, alloc) & once)
10201 						return;
10202 				}
10203 
10204 				break;
10205 			}
10206 
10207 			case axis_parent:
10208 			{
10209 				step_push(ns, p, alloc);
10210 
10211 				break;
10212 			}
10213 
10214 			case axis_preceding:
10215 			{
10216 				// preceding:: axis does not include attribute nodes and attribute ancestors (they are the same as parent's ancestors), so we can reuse node preceding
10217 				step_fill(ns, p, alloc, once, v);
10218 				break;
10219 			}
10220 
10221 			default:
10222 				assert(false && "Unimplemented axis"); // unreachable
10223 			}
10224 		}
10225 
step_fill(xpath_node_set_raw & ns,const xpath_node & xn,xpath_allocator * alloc,bool once,T v)10226 		template <class T> void step_fill(xpath_node_set_raw& ns, const xpath_node& xn, xpath_allocator* alloc, bool once, T v)
10227 		{
10228 			const axis_t axis = T::axis;
10229 			const bool axis_has_attributes = (axis == axis_ancestor || axis == axis_ancestor_or_self || axis == axis_descendant_or_self || axis == axis_following || axis == axis_parent || axis == axis_preceding || axis == axis_self);
10230 
10231 			if (xn.node())
10232 				step_fill(ns, xn.node().internal_object(), alloc, once, v);
10233 			else if (axis_has_attributes && xn.attribute() && xn.parent())
10234 				step_fill(ns, xn.attribute().internal_object(), xn.parent().internal_object(), alloc, once, v);
10235 		}
10236 
step_do(const xpath_context & c,const xpath_stack & stack,nodeset_eval_t eval,T v)10237 		template <class T> xpath_node_set_raw step_do(const xpath_context& c, const xpath_stack& stack, nodeset_eval_t eval, T v)
10238 		{
10239 			const axis_t axis = T::axis;
10240 			const bool axis_reverse = (axis == axis_ancestor || axis == axis_ancestor_or_self || axis == axis_preceding || axis == axis_preceding_sibling);
10241 			const xpath_node_set::type_t axis_type = axis_reverse ? xpath_node_set::type_sorted_reverse : xpath_node_set::type_sorted;
10242 
10243 			bool once =
10244 				(axis == axis_attribute && _test == nodetest_name) ||
10245 				(!_right && eval_once(axis_type, eval)) ||
10246 			    // coverity[mixed_enums]
10247 				(_right && !_right->_next && _right->_test == predicate_constant_one);
10248 
10249 			xpath_node_set_raw ns;
10250 			ns.set_type(axis_type);
10251 
10252 			if (_left)
10253 			{
10254 				xpath_node_set_raw s = _left->eval_node_set(c, stack, nodeset_eval_all);
10255 
10256 				// self axis preserves the original order
10257 				if (axis == axis_self) ns.set_type(s.type());
10258 
10259 				for (const xpath_node* it = s.begin(); it != s.end(); ++it)
10260 				{
10261 					size_t size = ns.size();
10262 
10263 					// in general, all axes generate elements in a particular order, but there is no order guarantee if axis is applied to two nodes
10264 					if (axis != axis_self && size != 0) ns.set_type(xpath_node_set::type_unsorted);
10265 
10266 					step_fill(ns, *it, stack.result, once, v);
10267 					if (_right) apply_predicates(ns, size, stack, eval);
10268 				}
10269 			}
10270 			else
10271 			{
10272 				step_fill(ns, c.n, stack.result, once, v);
10273 				if (_right) apply_predicates(ns, 0, stack, eval);
10274 			}
10275 
10276 			// child, attribute and self axes always generate unique set of nodes
10277 			// for other axis, if the set stayed sorted, it stayed unique because the traversal algorithms do not visit the same node twice
10278 			if (axis != axis_child && axis != axis_attribute && axis != axis_self && ns.type() == xpath_node_set::type_unsorted)
10279 				ns.remove_duplicates(stack.temp);
10280 
10281 			return ns;
10282 		}
10283 
10284 	public:
xpath_ast_node(ast_type_t type,xpath_value_type rettype_,const char_t * value)10285 		xpath_ast_node(ast_type_t type, xpath_value_type rettype_, const char_t* value):
10286 			_type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(0), _right(0), _next(0)
10287 		{
10288 			assert(type == ast_string_constant);
10289 			_data.string = value;
10290 		}
10291 
xpath_ast_node(ast_type_t type,xpath_value_type rettype_,double value)10292 		xpath_ast_node(ast_type_t type, xpath_value_type rettype_, double value):
10293 			_type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(0), _right(0), _next(0)
10294 		{
10295 			assert(type == ast_number_constant);
10296 			_data.number = value;
10297 		}
10298 
xpath_ast_node(ast_type_t type,xpath_value_type rettype_,xpath_variable * value)10299 		xpath_ast_node(ast_type_t type, xpath_value_type rettype_, xpath_variable* value):
10300 			_type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(0), _right(0), _next(0)
10301 		{
10302 			assert(type == ast_variable);
10303 			_data.variable = value;
10304 		}
10305 
xpath_ast_node(ast_type_t type,xpath_value_type rettype_,xpath_ast_node * left=0,xpath_ast_node * right=0)10306 		xpath_ast_node(ast_type_t type, xpath_value_type rettype_, xpath_ast_node* left = 0, xpath_ast_node* right = 0):
10307 			_type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(left), _right(right), _next(0)
10308 		{
10309 		}
10310 
xpath_ast_node(ast_type_t type,xpath_ast_node * left,axis_t axis,nodetest_t test,const char_t * contents)10311 		xpath_ast_node(ast_type_t type, xpath_ast_node* left, axis_t axis, nodetest_t test, const char_t* contents):
10312 			_type(static_cast<char>(type)), _rettype(xpath_type_node_set), _axis(static_cast<char>(axis)), _test(static_cast<char>(test)), _left(left), _right(0), _next(0)
10313 		{
10314 			assert(type == ast_step);
10315 			_data.nodetest = contents;
10316 		}
10317 
xpath_ast_node(ast_type_t type,xpath_ast_node * left,xpath_ast_node * right,predicate_t test)10318 		xpath_ast_node(ast_type_t type, xpath_ast_node* left, xpath_ast_node* right, predicate_t test):
10319 			_type(static_cast<char>(type)), _rettype(xpath_type_node_set), _axis(0), _test(static_cast<char>(test)), _left(left), _right(right), _next(0)
10320 		{
10321 			assert(type == ast_filter || type == ast_predicate);
10322 		}
10323 
set_next(xpath_ast_node * value)10324 		void set_next(xpath_ast_node* value)
10325 		{
10326 			_next = value;
10327 		}
10328 
set_right(xpath_ast_node * value)10329 		void set_right(xpath_ast_node* value)
10330 		{
10331 			_right = value;
10332 		}
10333 
eval_boolean(const xpath_context & c,const xpath_stack & stack)10334 		bool eval_boolean(const xpath_context& c, const xpath_stack& stack)
10335 		{
10336 			switch (_type)
10337 			{
10338 			case ast_op_or:
10339 				return _left->eval_boolean(c, stack) || _right->eval_boolean(c, stack);
10340 
10341 			case ast_op_and:
10342 				return _left->eval_boolean(c, stack) && _right->eval_boolean(c, stack);
10343 
10344 			case ast_op_equal:
10345 				return compare_eq(_left, _right, c, stack, equal_to());
10346 
10347 			case ast_op_not_equal:
10348 				return compare_eq(_left, _right, c, stack, not_equal_to());
10349 
10350 			case ast_op_less:
10351 				return compare_rel(_left, _right, c, stack, less());
10352 
10353 			case ast_op_greater:
10354 				return compare_rel(_right, _left, c, stack, less());
10355 
10356 			case ast_op_less_or_equal:
10357 				return compare_rel(_left, _right, c, stack, less_equal());
10358 
10359 			case ast_op_greater_or_equal:
10360 				return compare_rel(_right, _left, c, stack, less_equal());
10361 
10362 			case ast_func_starts_with:
10363 			{
10364 				xpath_allocator_capture cr(stack.result);
10365 
10366 				xpath_string lr = _left->eval_string(c, stack);
10367 				xpath_string rr = _right->eval_string(c, stack);
10368 
10369 				return starts_with(lr.c_str(), rr.c_str());
10370 			}
10371 
10372 			case ast_func_contains:
10373 			{
10374 				xpath_allocator_capture cr(stack.result);
10375 
10376 				xpath_string lr = _left->eval_string(c, stack);
10377 				xpath_string rr = _right->eval_string(c, stack);
10378 
10379 				return find_substring(lr.c_str(), rr.c_str()) != 0;
10380 			}
10381 
10382 			case ast_func_boolean:
10383 				return _left->eval_boolean(c, stack);
10384 
10385 			case ast_func_not:
10386 				return !_left->eval_boolean(c, stack);
10387 
10388 			case ast_func_true:
10389 				return true;
10390 
10391 			case ast_func_false:
10392 				return false;
10393 
10394 			case ast_func_lang:
10395 			{
10396 				if (c.n.attribute()) return false;
10397 
10398 				xpath_allocator_capture cr(stack.result);
10399 
10400 				xpath_string lang = _left->eval_string(c, stack);
10401 
10402 				for (xml_node n = c.n.node(); n; n = n.parent())
10403 				{
10404 					xml_attribute a = n.attribute(PUGIXML_TEXT("xml:lang"));
10405 
10406 					if (a)
10407 					{
10408 						const char_t* value = a.value();
10409 
10410 						// strnicmp / strncasecmp is not portable
10411 						for (const char_t* lit = lang.c_str(); *lit; ++lit)
10412 						{
10413 							if (tolower_ascii(*lit) != tolower_ascii(*value)) return false;
10414 							++value;
10415 						}
10416 
10417 						return *value == 0 || *value == '-';
10418 					}
10419 				}
10420 
10421 				return false;
10422 			}
10423 
10424 			case ast_opt_compare_attribute:
10425 			{
10426 				const char_t* value = (_right->_type == ast_string_constant) ? _right->_data.string : _right->_data.variable->get_string();
10427 
10428 				xml_attribute attr = c.n.node().attribute(_left->_data.nodetest);
10429 
10430 				return attr && strequal(attr.value(), value) && is_xpath_attribute(attr.name());
10431 			}
10432 
10433 			case ast_variable:
10434 			{
10435 				assert(_rettype == _data.variable->type());
10436 
10437 				if (_rettype == xpath_type_boolean)
10438 					return _data.variable->get_boolean();
10439 
10440 				// variable needs to be converted to the correct type, this is handled by the fallthrough block below
10441 				break;
10442 			}
10443 
10444 			default:
10445 				;
10446 			}
10447 
10448 			// none of the ast types that return the value directly matched, we need to perform type conversion
10449 			switch (_rettype)
10450 			{
10451 			case xpath_type_number:
10452 				return convert_number_to_boolean(eval_number(c, stack));
10453 
10454 			case xpath_type_string:
10455 			{
10456 				xpath_allocator_capture cr(stack.result);
10457 
10458 				return !eval_string(c, stack).empty();
10459 			}
10460 
10461 			case xpath_type_node_set:
10462 			{
10463 				xpath_allocator_capture cr(stack.result);
10464 
10465 				return !eval_node_set(c, stack, nodeset_eval_any).empty();
10466 			}
10467 
10468 			default:
10469 				assert(false && "Wrong expression for return type boolean"); // unreachable
10470 				return false;
10471 			}
10472 		}
10473 
eval_number(const xpath_context & c,const xpath_stack & stack)10474 		double eval_number(const xpath_context& c, const xpath_stack& stack)
10475 		{
10476 			switch (_type)
10477 			{
10478 			case ast_op_add:
10479 				return _left->eval_number(c, stack) + _right->eval_number(c, stack);
10480 
10481 			case ast_op_subtract:
10482 				return _left->eval_number(c, stack) - _right->eval_number(c, stack);
10483 
10484 			case ast_op_multiply:
10485 				return _left->eval_number(c, stack) * _right->eval_number(c, stack);
10486 
10487 			case ast_op_divide:
10488 				return _left->eval_number(c, stack) / _right->eval_number(c, stack);
10489 
10490 			case ast_op_mod:
10491 				return fmod(_left->eval_number(c, stack), _right->eval_number(c, stack));
10492 
10493 			case ast_op_negate:
10494 				return -_left->eval_number(c, stack);
10495 
10496 			case ast_number_constant:
10497 				return _data.number;
10498 
10499 			case ast_func_last:
10500 				return static_cast<double>(c.size);
10501 
10502 			case ast_func_position:
10503 				return static_cast<double>(c.position);
10504 
10505 			case ast_func_count:
10506 			{
10507 				xpath_allocator_capture cr(stack.result);
10508 
10509 				return static_cast<double>(_left->eval_node_set(c, stack, nodeset_eval_all).size());
10510 			}
10511 
10512 			case ast_func_string_length_0:
10513 			{
10514 				xpath_allocator_capture cr(stack.result);
10515 
10516 				return static_cast<double>(string_value(c.n, stack.result).length());
10517 			}
10518 
10519 			case ast_func_string_length_1:
10520 			{
10521 				xpath_allocator_capture cr(stack.result);
10522 
10523 				return static_cast<double>(_left->eval_string(c, stack).length());
10524 			}
10525 
10526 			case ast_func_number_0:
10527 			{
10528 				xpath_allocator_capture cr(stack.result);
10529 
10530 				return convert_string_to_number(string_value(c.n, stack.result).c_str());
10531 			}
10532 
10533 			case ast_func_number_1:
10534 				return _left->eval_number(c, stack);
10535 
10536 			case ast_func_sum:
10537 			{
10538 				xpath_allocator_capture cr(stack.result);
10539 
10540 				double r = 0;
10541 
10542 				xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_all);
10543 
10544 				for (const xpath_node* it = ns.begin(); it != ns.end(); ++it)
10545 				{
10546 					xpath_allocator_capture cri(stack.result);
10547 
10548 					r += convert_string_to_number(string_value(*it, stack.result).c_str());
10549 				}
10550 
10551 				return r;
10552 			}
10553 
10554 			case ast_func_floor:
10555 			{
10556 				double r = _left->eval_number(c, stack);
10557 
10558 				return r == r ? floor(r) : r;
10559 			}
10560 
10561 			case ast_func_ceiling:
10562 			{
10563 				double r = _left->eval_number(c, stack);
10564 
10565 				return r == r ? ceil(r) : r;
10566 			}
10567 
10568 			case ast_func_round:
10569 				return round_nearest_nzero(_left->eval_number(c, stack));
10570 
10571 			case ast_variable:
10572 			{
10573 				assert(_rettype == _data.variable->type());
10574 
10575 				if (_rettype == xpath_type_number)
10576 					return _data.variable->get_number();
10577 
10578 				// variable needs to be converted to the correct type, this is handled by the fallthrough block below
10579 				break;
10580 			}
10581 
10582 			default:
10583 				;
10584 			}
10585 
10586 			// none of the ast types that return the value directly matched, we need to perform type conversion
10587 			switch (_rettype)
10588 			{
10589 			case xpath_type_boolean:
10590 				return eval_boolean(c, stack) ? 1 : 0;
10591 
10592 			case xpath_type_string:
10593 			{
10594 				xpath_allocator_capture cr(stack.result);
10595 
10596 				return convert_string_to_number(eval_string(c, stack).c_str());
10597 			}
10598 
10599 			case xpath_type_node_set:
10600 			{
10601 				xpath_allocator_capture cr(stack.result);
10602 
10603 				return convert_string_to_number(eval_string(c, stack).c_str());
10604 			}
10605 
10606 			default:
10607 				assert(false && "Wrong expression for return type number"); // unreachable
10608 				return 0;
10609 			}
10610 		}
10611 
eval_string_concat(const xpath_context & c,const xpath_stack & stack)10612 		xpath_string eval_string_concat(const xpath_context& c, const xpath_stack& stack)
10613 		{
10614 			assert(_type == ast_func_concat);
10615 
10616 			xpath_allocator_capture ct(stack.temp);
10617 
10618 			// count the string number
10619 			size_t count = 1;
10620 			for (xpath_ast_node* nc = _right; nc; nc = nc->_next) count++;
10621 
10622 			// allocate a buffer for temporary string objects
10623 			xpath_string* buffer = static_cast<xpath_string*>(stack.temp->allocate(count * sizeof(xpath_string)));
10624 			if (!buffer) return xpath_string();
10625 
10626 			// evaluate all strings to temporary stack
10627 			xpath_stack swapped_stack = {stack.temp, stack.result};
10628 
10629 			buffer[0] = _left->eval_string(c, swapped_stack);
10630 
10631 			size_t pos = 1;
10632 			for (xpath_ast_node* n = _right; n; n = n->_next, ++pos) buffer[pos] = n->eval_string(c, swapped_stack);
10633 			assert(pos == count);
10634 
10635 			// get total length
10636 			size_t length = 0;
10637 			for (size_t i = 0; i < count; ++i) length += buffer[i].length();
10638 
10639 			// create final string
10640 			char_t* result = static_cast<char_t*>(stack.result->allocate((length + 1) * sizeof(char_t)));
10641 			if (!result) return xpath_string();
10642 
10643 			char_t* ri = result;
10644 
10645 			for (size_t j = 0; j < count; ++j)
10646 				for (const char_t* bi = buffer[j].c_str(); *bi; ++bi)
10647 					*ri++ = *bi;
10648 
10649 			*ri = 0;
10650 
10651 			return xpath_string::from_heap_preallocated(result, ri);
10652 		}
10653 
eval_string(const xpath_context & c,const xpath_stack & stack)10654 		xpath_string eval_string(const xpath_context& c, const xpath_stack& stack)
10655 		{
10656 			switch (_type)
10657 			{
10658 			case ast_string_constant:
10659 				return xpath_string::from_const(_data.string);
10660 
10661 			case ast_func_local_name_0:
10662 			{
10663 				xpath_node na = c.n;
10664 
10665 				return xpath_string::from_const(local_name(na));
10666 			}
10667 
10668 			case ast_func_local_name_1:
10669 			{
10670 				xpath_allocator_capture cr(stack.result);
10671 
10672 				xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_first);
10673 				xpath_node na = ns.first();
10674 
10675 				return xpath_string::from_const(local_name(na));
10676 			}
10677 
10678 			case ast_func_name_0:
10679 			{
10680 				xpath_node na = c.n;
10681 
10682 				return xpath_string::from_const(qualified_name(na));
10683 			}
10684 
10685 			case ast_func_name_1:
10686 			{
10687 				xpath_allocator_capture cr(stack.result);
10688 
10689 				xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_first);
10690 				xpath_node na = ns.first();
10691 
10692 				return xpath_string::from_const(qualified_name(na));
10693 			}
10694 
10695 			case ast_func_namespace_uri_0:
10696 			{
10697 				xpath_node na = c.n;
10698 
10699 				return xpath_string::from_const(namespace_uri(na));
10700 			}
10701 
10702 			case ast_func_namespace_uri_1:
10703 			{
10704 				xpath_allocator_capture cr(stack.result);
10705 
10706 				xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_first);
10707 				xpath_node na = ns.first();
10708 
10709 				return xpath_string::from_const(namespace_uri(na));
10710 			}
10711 
10712 			case ast_func_string_0:
10713 				return string_value(c.n, stack.result);
10714 
10715 			case ast_func_string_1:
10716 				return _left->eval_string(c, stack);
10717 
10718 			case ast_func_concat:
10719 				return eval_string_concat(c, stack);
10720 
10721 			case ast_func_substring_before:
10722 			{
10723 				xpath_allocator_capture cr(stack.temp);
10724 
10725 				xpath_stack swapped_stack = {stack.temp, stack.result};
10726 
10727 				xpath_string s = _left->eval_string(c, swapped_stack);
10728 				xpath_string p = _right->eval_string(c, swapped_stack);
10729 
10730 				const char_t* pos = find_substring(s.c_str(), p.c_str());
10731 
10732 				return pos ? xpath_string::from_heap(s.c_str(), pos, stack.result) : xpath_string();
10733 			}
10734 
10735 			case ast_func_substring_after:
10736 			{
10737 				xpath_allocator_capture cr(stack.temp);
10738 
10739 				xpath_stack swapped_stack = {stack.temp, stack.result};
10740 
10741 				xpath_string s = _left->eval_string(c, swapped_stack);
10742 				xpath_string p = _right->eval_string(c, swapped_stack);
10743 
10744 				const char_t* pos = find_substring(s.c_str(), p.c_str());
10745 				if (!pos) return xpath_string();
10746 
10747 				const char_t* rbegin = pos + p.length();
10748 				const char_t* rend = s.c_str() + s.length();
10749 
10750 				return s.uses_heap() ? xpath_string::from_heap(rbegin, rend, stack.result) : xpath_string::from_const(rbegin);
10751 			}
10752 
10753 			case ast_func_substring_2:
10754 			{
10755 				xpath_allocator_capture cr(stack.temp);
10756 
10757 				xpath_stack swapped_stack = {stack.temp, stack.result};
10758 
10759 				xpath_string s = _left->eval_string(c, swapped_stack);
10760 				size_t s_length = s.length();
10761 
10762 				double first = round_nearest(_right->eval_number(c, stack));
10763 
10764 				if (is_nan(first)) return xpath_string(); // NaN
10765 				else if (first >= static_cast<double>(s_length + 1)) return xpath_string();
10766 
10767 				size_t pos = first < 1 ? 1 : static_cast<size_t>(first);
10768 				assert(1 <= pos && pos <= s_length + 1);
10769 
10770 				const char_t* rbegin = s.c_str() + (pos - 1);
10771 				const char_t* rend = s.c_str() + s.length();
10772 
10773 				return s.uses_heap() ? xpath_string::from_heap(rbegin, rend, stack.result) : xpath_string::from_const(rbegin);
10774 			}
10775 
10776 			case ast_func_substring_3:
10777 			{
10778 				xpath_allocator_capture cr(stack.temp);
10779 
10780 				xpath_stack swapped_stack = {stack.temp, stack.result};
10781 
10782 				xpath_string s = _left->eval_string(c, swapped_stack);
10783 				size_t s_length = s.length();
10784 
10785 				double first = round_nearest(_right->eval_number(c, stack));
10786 				double last = first + round_nearest(_right->_next->eval_number(c, stack));
10787 
10788 				if (is_nan(first) || is_nan(last)) return xpath_string();
10789 				else if (first >= static_cast<double>(s_length + 1)) return xpath_string();
10790 				else if (first >= last) return xpath_string();
10791 				else if (last < 1) return xpath_string();
10792 
10793 				size_t pos = first < 1 ? 1 : static_cast<size_t>(first);
10794 				size_t end = last >= static_cast<double>(s_length + 1) ? s_length + 1 : static_cast<size_t>(last);
10795 
10796 				assert(1 <= pos && pos <= end && end <= s_length + 1);
10797 				const char_t* rbegin = s.c_str() + (pos - 1);
10798 				const char_t* rend = s.c_str() + (end - 1);
10799 
10800 				return (end == s_length + 1 && !s.uses_heap()) ? xpath_string::from_const(rbegin) : xpath_string::from_heap(rbegin, rend, stack.result);
10801 			}
10802 
10803 			case ast_func_normalize_space_0:
10804 			{
10805 				xpath_string s = string_value(c.n, stack.result);
10806 
10807 				char_t* begin = s.data(stack.result);
10808 				if (!begin) return xpath_string();
10809 
10810 				char_t* end = normalize_space(begin);
10811 
10812 				return xpath_string::from_heap_preallocated(begin, end);
10813 			}
10814 
10815 			case ast_func_normalize_space_1:
10816 			{
10817 				xpath_string s = _left->eval_string(c, stack);
10818 
10819 				char_t* begin = s.data(stack.result);
10820 				if (!begin) return xpath_string();
10821 
10822 				char_t* end = normalize_space(begin);
10823 
10824 				return xpath_string::from_heap_preallocated(begin, end);
10825 			}
10826 
10827 			case ast_func_translate:
10828 			{
10829 				xpath_allocator_capture cr(stack.temp);
10830 
10831 				xpath_stack swapped_stack = {stack.temp, stack.result};
10832 
10833 				xpath_string s = _left->eval_string(c, stack);
10834 				xpath_string from = _right->eval_string(c, swapped_stack);
10835 				xpath_string to = _right->_next->eval_string(c, swapped_stack);
10836 
10837 				char_t* begin = s.data(stack.result);
10838 				if (!begin) return xpath_string();
10839 
10840 				char_t* end = translate(begin, from.c_str(), to.c_str(), to.length());
10841 
10842 				return xpath_string::from_heap_preallocated(begin, end);
10843 			}
10844 
10845 			case ast_opt_translate_table:
10846 			{
10847 				xpath_string s = _left->eval_string(c, stack);
10848 
10849 				char_t* begin = s.data(stack.result);
10850 				if (!begin) return xpath_string();
10851 
10852 				char_t* end = translate_table(begin, _data.table);
10853 
10854 				return xpath_string::from_heap_preallocated(begin, end);
10855 			}
10856 
10857 			case ast_variable:
10858 			{
10859 				assert(_rettype == _data.variable->type());
10860 
10861 				if (_rettype == xpath_type_string)
10862 					return xpath_string::from_const(_data.variable->get_string());
10863 
10864 				// variable needs to be converted to the correct type, this is handled by the fallthrough block below
10865 				break;
10866 			}
10867 
10868 			default:
10869 				;
10870 			}
10871 
10872 			// none of the ast types that return the value directly matched, we need to perform type conversion
10873 			switch (_rettype)
10874 			{
10875 			case xpath_type_boolean:
10876 				return xpath_string::from_const(eval_boolean(c, stack) ? PUGIXML_TEXT("true") : PUGIXML_TEXT("false"));
10877 
10878 			case xpath_type_number:
10879 				return convert_number_to_string(eval_number(c, stack), stack.result);
10880 
10881 			case xpath_type_node_set:
10882 			{
10883 				xpath_allocator_capture cr(stack.temp);
10884 
10885 				xpath_stack swapped_stack = {stack.temp, stack.result};
10886 
10887 				xpath_node_set_raw ns = eval_node_set(c, swapped_stack, nodeset_eval_first);
10888 				return ns.empty() ? xpath_string() : string_value(ns.first(), stack.result);
10889 			}
10890 
10891 			default:
10892 				assert(false && "Wrong expression for return type string"); // unreachable
10893 				return xpath_string();
10894 			}
10895 		}
10896 
eval_node_set(const xpath_context & c,const xpath_stack & stack,nodeset_eval_t eval)10897 		xpath_node_set_raw eval_node_set(const xpath_context& c, const xpath_stack& stack, nodeset_eval_t eval)
10898 		{
10899 			switch (_type)
10900 			{
10901 			case ast_op_union:
10902 			{
10903 				xpath_allocator_capture cr(stack.temp);
10904 
10905 				xpath_stack swapped_stack = {stack.temp, stack.result};
10906 
10907 				xpath_node_set_raw ls = _left->eval_node_set(c, stack, eval);
10908 				xpath_node_set_raw rs = _right->eval_node_set(c, swapped_stack, eval);
10909 
10910 				// we can optimize merging two sorted sets, but this is a very rare operation, so don't bother
10911 				ls.set_type(xpath_node_set::type_unsorted);
10912 
10913 				ls.append(rs.begin(), rs.end(), stack.result);
10914 				ls.remove_duplicates(stack.temp);
10915 
10916 				return ls;
10917 			}
10918 
10919 			case ast_filter:
10920 			{
10921 				xpath_node_set_raw set = _left->eval_node_set(c, stack, _test == predicate_constant_one ? nodeset_eval_first : nodeset_eval_all);
10922 
10923 				// either expression is a number or it contains position() call; sort by document order
10924 				if (_test != predicate_posinv) set.sort_do();
10925 
10926 				bool once = eval_once(set.type(), eval);
10927 
10928 				apply_predicate(set, 0, stack, once);
10929 
10930 				return set;
10931 			}
10932 
10933 			case ast_func_id:
10934 				return xpath_node_set_raw();
10935 
10936 			case ast_step:
10937 			{
10938 				switch (_axis)
10939 				{
10940 				case axis_ancestor:
10941 					return step_do(c, stack, eval, axis_to_type<axis_ancestor>());
10942 
10943 				case axis_ancestor_or_self:
10944 					return step_do(c, stack, eval, axis_to_type<axis_ancestor_or_self>());
10945 
10946 				case axis_attribute:
10947 					return step_do(c, stack, eval, axis_to_type<axis_attribute>());
10948 
10949 				case axis_child:
10950 					return step_do(c, stack, eval, axis_to_type<axis_child>());
10951 
10952 				case axis_descendant:
10953 					return step_do(c, stack, eval, axis_to_type<axis_descendant>());
10954 
10955 				case axis_descendant_or_self:
10956 					return step_do(c, stack, eval, axis_to_type<axis_descendant_or_self>());
10957 
10958 				case axis_following:
10959 					return step_do(c, stack, eval, axis_to_type<axis_following>());
10960 
10961 				case axis_following_sibling:
10962 					return step_do(c, stack, eval, axis_to_type<axis_following_sibling>());
10963 
10964 				case axis_namespace:
10965 					// namespaced axis is not supported
10966 					return xpath_node_set_raw();
10967 
10968 				case axis_parent:
10969 					return step_do(c, stack, eval, axis_to_type<axis_parent>());
10970 
10971 				case axis_preceding:
10972 					return step_do(c, stack, eval, axis_to_type<axis_preceding>());
10973 
10974 				case axis_preceding_sibling:
10975 					return step_do(c, stack, eval, axis_to_type<axis_preceding_sibling>());
10976 
10977 				case axis_self:
10978 					return step_do(c, stack, eval, axis_to_type<axis_self>());
10979 
10980 				default:
10981 					assert(false && "Unknown axis"); // unreachable
10982 					return xpath_node_set_raw();
10983 				}
10984 			}
10985 
10986 			case ast_step_root:
10987 			{
10988 				assert(!_right); // root step can't have any predicates
10989 
10990 				xpath_node_set_raw ns;
10991 
10992 				ns.set_type(xpath_node_set::type_sorted);
10993 
10994 				if (c.n.node()) ns.push_back(c.n.node().root(), stack.result);
10995 				else if (c.n.attribute()) ns.push_back(c.n.parent().root(), stack.result);
10996 
10997 				return ns;
10998 			}
10999 
11000 			case ast_variable:
11001 			{
11002 				assert(_rettype == _data.variable->type());
11003 
11004 				if (_rettype == xpath_type_node_set)
11005 				{
11006 					const xpath_node_set& s = _data.variable->get_node_set();
11007 
11008 					xpath_node_set_raw ns;
11009 
11010 					ns.set_type(s.type());
11011 					ns.append(s.begin(), s.end(), stack.result);
11012 
11013 					return ns;
11014 				}
11015 
11016 				// variable needs to be converted to the correct type, this is handled by the fallthrough block below
11017 				break;
11018 			}
11019 
11020 			default:
11021 				;
11022 			}
11023 
11024 			// none of the ast types that return the value directly matched, but conversions to node set are invalid
11025 			assert(false && "Wrong expression for return type node set"); // unreachable
11026 			return xpath_node_set_raw();
11027 		}
11028 
optimize(xpath_allocator * alloc)11029 		void optimize(xpath_allocator* alloc)
11030 		{
11031 			if (_left)
11032 				_left->optimize(alloc);
11033 
11034 			if (_right)
11035 				_right->optimize(alloc);
11036 
11037 			if (_next)
11038 				_next->optimize(alloc);
11039 
11040 			// coverity[var_deref_model]
11041 			optimize_self(alloc);
11042 		}
11043 
optimize_self(xpath_allocator * alloc)11044 		void optimize_self(xpath_allocator* alloc)
11045 		{
11046 			// Rewrite [position()=expr] with [expr]
11047 			// Note that this step has to go before classification to recognize [position()=1]
11048 			if ((_type == ast_filter || _type == ast_predicate) &&
11049 				_right && // workaround for clang static analyzer (_right is never null for ast_filter/ast_predicate)
11050 				_right->_type == ast_op_equal && _right->_left->_type == ast_func_position && _right->_right->_rettype == xpath_type_number)
11051 			{
11052 				_right = _right->_right;
11053 			}
11054 
11055 			// Classify filter/predicate ops to perform various optimizations during evaluation
11056 			if ((_type == ast_filter || _type == ast_predicate) && _right) // workaround for clang static analyzer (_right is never null for ast_filter/ast_predicate)
11057 			{
11058 				assert(_test == predicate_default);
11059 
11060 				if (_right->_type == ast_number_constant && _right->_data.number == 1.0)
11061 					_test = predicate_constant_one;
11062 				else if (_right->_rettype == xpath_type_number && (_right->_type == ast_number_constant || _right->_type == ast_variable || _right->_type == ast_func_last))
11063 					_test = predicate_constant;
11064 				else if (_right->_rettype != xpath_type_number && _right->is_posinv_expr())
11065 					_test = predicate_posinv;
11066 			}
11067 
11068 			// Rewrite descendant-or-self::node()/child::foo with descendant::foo
11069 			// The former is a full form of //foo, the latter is much faster since it executes the node test immediately
11070 			// Do a similar kind of rewrite for self/descendant/descendant-or-self axes
11071 			// Note that we only rewrite positionally invariant steps (//foo[1] != /descendant::foo[1])
11072 			if (_type == ast_step && (_axis == axis_child || _axis == axis_self || _axis == axis_descendant || _axis == axis_descendant_or_self) &&
11073 				_left && _left->_type == ast_step && _left->_axis == axis_descendant_or_self && _left->_test == nodetest_type_node && !_left->_right &&
11074 				is_posinv_step())
11075 			{
11076 				if (_axis == axis_child || _axis == axis_descendant)
11077 					_axis = axis_descendant;
11078 				else
11079 					_axis = axis_descendant_or_self;
11080 
11081 				_left = _left->_left;
11082 			}
11083 
11084 			// Use optimized lookup table implementation for translate() with constant arguments
11085 			if (_type == ast_func_translate &&
11086 				_right && // workaround for clang static analyzer (_right is never null for ast_func_translate)
11087 				_right->_type == ast_string_constant && _right->_next->_type == ast_string_constant)
11088 			{
11089 				unsigned char* table = translate_table_generate(alloc, _right->_data.string, _right->_next->_data.string);
11090 
11091 				if (table)
11092 				{
11093 					_type = ast_opt_translate_table;
11094 					_data.table = table;
11095 				}
11096 			}
11097 
11098 			// Use optimized path for @attr = 'value' or @attr = $value
11099 			if (_type == ast_op_equal &&
11100 				_left && _right && // workaround for clang static analyzer and Coverity (_left and _right are never null for ast_op_equal)
11101                 // coverity[mixed_enums]
11102 				_left->_type == ast_step && _left->_axis == axis_attribute && _left->_test == nodetest_name && !_left->_left && !_left->_right &&
11103 				(_right->_type == ast_string_constant || (_right->_type == ast_variable && _right->_rettype == xpath_type_string)))
11104 			{
11105 				_type = ast_opt_compare_attribute;
11106 			}
11107 		}
11108 
is_posinv_expr() const11109 		bool is_posinv_expr() const
11110 		{
11111 			switch (_type)
11112 			{
11113 			case ast_func_position:
11114 			case ast_func_last:
11115 				return false;
11116 
11117 			case ast_string_constant:
11118 			case ast_number_constant:
11119 			case ast_variable:
11120 				return true;
11121 
11122 			case ast_step:
11123 			case ast_step_root:
11124 				return true;
11125 
11126 			case ast_predicate:
11127 			case ast_filter:
11128 				return true;
11129 
11130 			default:
11131 				if (_left && !_left->is_posinv_expr()) return false;
11132 
11133 				for (xpath_ast_node* n = _right; n; n = n->_next)
11134 					if (!n->is_posinv_expr()) return false;
11135 
11136 				return true;
11137 			}
11138 		}
11139 
is_posinv_step() const11140 		bool is_posinv_step() const
11141 		{
11142 			assert(_type == ast_step);
11143 
11144 			for (xpath_ast_node* n = _right; n; n = n->_next)
11145 			{
11146 				assert(n->_type == ast_predicate);
11147 
11148 				if (n->_test != predicate_posinv)
11149 					return false;
11150 			}
11151 
11152 			return true;
11153 		}
11154 
rettype() const11155 		xpath_value_type rettype() const
11156 		{
11157 			return static_cast<xpath_value_type>(_rettype);
11158 		}
11159 	};
11160 
11161 	static const size_t xpath_ast_depth_limit =
11162 	#ifdef PUGIXML_XPATH_DEPTH_LIMIT
11163 		PUGIXML_XPATH_DEPTH_LIMIT
11164 	#else
11165 		1024
11166 	#endif
11167 		;
11168 
11169 	struct xpath_parser
11170 	{
11171 		xpath_allocator* _alloc;
11172 		xpath_lexer _lexer;
11173 
11174 		const char_t* _query;
11175 		xpath_variable_set* _variables;
11176 
11177 		xpath_parse_result* _result;
11178 
11179 		char_t _scratch[32];
11180 
11181 		size_t _depth;
11182 
errorxpath_parser11183 		xpath_ast_node* error(const char* message)
11184 		{
11185 			_result->error = message;
11186 			_result->offset = _lexer.current_pos() - _query;
11187 
11188 			return 0;
11189 		}
11190 
error_oomxpath_parser11191 		xpath_ast_node* error_oom()
11192 		{
11193 			assert(_alloc->_error);
11194 			*_alloc->_error = true;
11195 
11196 			return 0;
11197 		}
11198 
error_recxpath_parser11199 		xpath_ast_node* error_rec()
11200 		{
11201 			return error("Exceeded maximum allowed query depth");
11202 		}
11203 
alloc_nodexpath_parser11204 		void* alloc_node()
11205 		{
11206 			return _alloc->allocate(sizeof(xpath_ast_node));
11207 		}
11208 
alloc_nodexpath_parser11209 		xpath_ast_node* alloc_node(ast_type_t type, xpath_value_type rettype, const char_t* value)
11210 		{
11211 			void* memory = alloc_node();
11212 			return memory ? new (memory) xpath_ast_node(type, rettype, value) : 0;
11213 		}
11214 
alloc_nodexpath_parser11215 		xpath_ast_node* alloc_node(ast_type_t type, xpath_value_type rettype, double value)
11216 		{
11217 			void* memory = alloc_node();
11218 			return memory ? new (memory) xpath_ast_node(type, rettype, value) : 0;
11219 		}
11220 
alloc_nodexpath_parser11221 		xpath_ast_node* alloc_node(ast_type_t type, xpath_value_type rettype, xpath_variable* value)
11222 		{
11223 			void* memory = alloc_node();
11224 			return memory ? new (memory) xpath_ast_node(type, rettype, value) : 0;
11225 		}
11226 
alloc_nodexpath_parser11227 		xpath_ast_node* alloc_node(ast_type_t type, xpath_value_type rettype, xpath_ast_node* left = 0, xpath_ast_node* right = 0)
11228 		{
11229 			void* memory = alloc_node();
11230 			return memory ? new (memory) xpath_ast_node(type, rettype, left, right) : 0;
11231 		}
11232 
alloc_nodexpath_parser11233 		xpath_ast_node* alloc_node(ast_type_t type, xpath_ast_node* left, axis_t axis, nodetest_t test, const char_t* contents)
11234 		{
11235 			void* memory = alloc_node();
11236 			return memory ? new (memory) xpath_ast_node(type, left, axis, test, contents) : 0;
11237 		}
11238 
alloc_nodexpath_parser11239 		xpath_ast_node* alloc_node(ast_type_t type, xpath_ast_node* left, xpath_ast_node* right, predicate_t test)
11240 		{
11241 			void* memory = alloc_node();
11242 			return memory ? new (memory) xpath_ast_node(type, left, right, test) : 0;
11243 		}
11244 
alloc_stringxpath_parser11245 		const char_t* alloc_string(const xpath_lexer_string& value)
11246 		{
11247 			if (!value.begin)
11248 				return PUGIXML_TEXT("");
11249 
11250 			size_t length = static_cast<size_t>(value.end - value.begin);
11251 
11252 			char_t* c = static_cast<char_t*>(_alloc->allocate((length + 1) * sizeof(char_t)));
11253 			if (!c) return 0;
11254 
11255 			memcpy(c, value.begin, length * sizeof(char_t));
11256 			c[length] = 0;
11257 
11258 			return c;
11259 		}
11260 
parse_functionxpath_parser11261 		xpath_ast_node* parse_function(const xpath_lexer_string& name, size_t argc, xpath_ast_node* args[2])
11262 		{
11263 			switch (name.begin[0])
11264 			{
11265 			case 'b':
11266 				if (name == PUGIXML_TEXT("boolean") && argc == 1)
11267 					return alloc_node(ast_func_boolean, xpath_type_boolean, args[0]);
11268 
11269 				break;
11270 
11271 			case 'c':
11272 				if (name == PUGIXML_TEXT("count") && argc == 1)
11273 				{
11274 					if (args[0]->rettype() != xpath_type_node_set) return error("Function has to be applied to node set");
11275 					return alloc_node(ast_func_count, xpath_type_number, args[0]);
11276 				}
11277 				else if (name == PUGIXML_TEXT("contains") && argc == 2)
11278 					return alloc_node(ast_func_contains, xpath_type_boolean, args[0], args[1]);
11279 				else if (name == PUGIXML_TEXT("concat") && argc >= 2)
11280 					return alloc_node(ast_func_concat, xpath_type_string, args[0], args[1]);
11281 				else if (name == PUGIXML_TEXT("ceiling") && argc == 1)
11282 					return alloc_node(ast_func_ceiling, xpath_type_number, args[0]);
11283 
11284 				break;
11285 
11286 			case 'f':
11287 				if (name == PUGIXML_TEXT("false") && argc == 0)
11288 					return alloc_node(ast_func_false, xpath_type_boolean);
11289 				else if (name == PUGIXML_TEXT("floor") && argc == 1)
11290 					return alloc_node(ast_func_floor, xpath_type_number, args[0]);
11291 
11292 				break;
11293 
11294 			case 'i':
11295 				if (name == PUGIXML_TEXT("id") && argc == 1)
11296 					return alloc_node(ast_func_id, xpath_type_node_set, args[0]);
11297 
11298 				break;
11299 
11300 			case 'l':
11301 				if (name == PUGIXML_TEXT("last") && argc == 0)
11302 					return alloc_node(ast_func_last, xpath_type_number);
11303 				else if (name == PUGIXML_TEXT("lang") && argc == 1)
11304 					return alloc_node(ast_func_lang, xpath_type_boolean, args[0]);
11305 				else if (name == PUGIXML_TEXT("local-name") && argc <= 1)
11306 				{
11307 					if (argc == 1 && args[0]->rettype() != xpath_type_node_set) return error("Function has to be applied to node set");
11308 					return alloc_node(argc == 0 ? ast_func_local_name_0 : ast_func_local_name_1, xpath_type_string, args[0]);
11309 				}
11310 
11311 				break;
11312 
11313 			case 'n':
11314 				if (name == PUGIXML_TEXT("name") && argc <= 1)
11315 				{
11316 					if (argc == 1 && args[0]->rettype() != xpath_type_node_set) return error("Function has to be applied to node set");
11317 					return alloc_node(argc == 0 ? ast_func_name_0 : ast_func_name_1, xpath_type_string, args[0]);
11318 				}
11319 				else if (name == PUGIXML_TEXT("namespace-uri") && argc <= 1)
11320 				{
11321 					if (argc == 1 && args[0]->rettype() != xpath_type_node_set) return error("Function has to be applied to node set");
11322 					return alloc_node(argc == 0 ? ast_func_namespace_uri_0 : ast_func_namespace_uri_1, xpath_type_string, args[0]);
11323 				}
11324 				else if (name == PUGIXML_TEXT("normalize-space") && argc <= 1)
11325 					return alloc_node(argc == 0 ? ast_func_normalize_space_0 : ast_func_normalize_space_1, xpath_type_string, args[0], args[1]);
11326 				else if (name == PUGIXML_TEXT("not") && argc == 1)
11327 					return alloc_node(ast_func_not, xpath_type_boolean, args[0]);
11328 				else if (name == PUGIXML_TEXT("number") && argc <= 1)
11329 					return alloc_node(argc == 0 ? ast_func_number_0 : ast_func_number_1, xpath_type_number, args[0]);
11330 
11331 				break;
11332 
11333 			case 'p':
11334 				if (name == PUGIXML_TEXT("position") && argc == 0)
11335 					return alloc_node(ast_func_position, xpath_type_number);
11336 
11337 				break;
11338 
11339 			case 'r':
11340 				if (name == PUGIXML_TEXT("round") && argc == 1)
11341 					return alloc_node(ast_func_round, xpath_type_number, args[0]);
11342 
11343 				break;
11344 
11345 			case 's':
11346 				if (name == PUGIXML_TEXT("string") && argc <= 1)
11347 					return alloc_node(argc == 0 ? ast_func_string_0 : ast_func_string_1, xpath_type_string, args[0]);
11348 				else if (name == PUGIXML_TEXT("string-length") && argc <= 1)
11349 					return alloc_node(argc == 0 ? ast_func_string_length_0 : ast_func_string_length_1, xpath_type_number, args[0]);
11350 				else if (name == PUGIXML_TEXT("starts-with") && argc == 2)
11351 					return alloc_node(ast_func_starts_with, xpath_type_boolean, args[0], args[1]);
11352 				else if (name == PUGIXML_TEXT("substring-before") && argc == 2)
11353 					return alloc_node(ast_func_substring_before, xpath_type_string, args[0], args[1]);
11354 				else if (name == PUGIXML_TEXT("substring-after") && argc == 2)
11355 					return alloc_node(ast_func_substring_after, xpath_type_string, args[0], args[1]);
11356 				else if (name == PUGIXML_TEXT("substring") && (argc == 2 || argc == 3))
11357 					return alloc_node(argc == 2 ? ast_func_substring_2 : ast_func_substring_3, xpath_type_string, args[0], args[1]);
11358 				else if (name == PUGIXML_TEXT("sum") && argc == 1)
11359 				{
11360 					if (args[0]->rettype() != xpath_type_node_set) return error("Function has to be applied to node set");
11361 					return alloc_node(ast_func_sum, xpath_type_number, args[0]);
11362 				}
11363 
11364 				break;
11365 
11366 			case 't':
11367 				if (name == PUGIXML_TEXT("translate") && argc == 3)
11368 					return alloc_node(ast_func_translate, xpath_type_string, args[0], args[1]);
11369 				else if (name == PUGIXML_TEXT("true") && argc == 0)
11370 					return alloc_node(ast_func_true, xpath_type_boolean);
11371 
11372 				break;
11373 
11374 			default:
11375 				break;
11376 			}
11377 
11378 			return error("Unrecognized function or wrong parameter count");
11379 		}
11380 
parse_axis_namexpath_parser11381 		axis_t parse_axis_name(const xpath_lexer_string& name, bool& specified)
11382 		{
11383 			specified = true;
11384 
11385 			switch (name.begin[0])
11386 			{
11387 			case 'a':
11388 				if (name == PUGIXML_TEXT("ancestor"))
11389 					return axis_ancestor;
11390 				else if (name == PUGIXML_TEXT("ancestor-or-self"))
11391 					return axis_ancestor_or_self;
11392 				else if (name == PUGIXML_TEXT("attribute"))
11393 					return axis_attribute;
11394 
11395 				break;
11396 
11397 			case 'c':
11398 				if (name == PUGIXML_TEXT("child"))
11399 					return axis_child;
11400 
11401 				break;
11402 
11403 			case 'd':
11404 				if (name == PUGIXML_TEXT("descendant"))
11405 					return axis_descendant;
11406 				else if (name == PUGIXML_TEXT("descendant-or-self"))
11407 					return axis_descendant_or_self;
11408 
11409 				break;
11410 
11411 			case 'f':
11412 				if (name == PUGIXML_TEXT("following"))
11413 					return axis_following;
11414 				else if (name == PUGIXML_TEXT("following-sibling"))
11415 					return axis_following_sibling;
11416 
11417 				break;
11418 
11419 			case 'n':
11420 				if (name == PUGIXML_TEXT("namespace"))
11421 					return axis_namespace;
11422 
11423 				break;
11424 
11425 			case 'p':
11426 				if (name == PUGIXML_TEXT("parent"))
11427 					return axis_parent;
11428 				else if (name == PUGIXML_TEXT("preceding"))
11429 					return axis_preceding;
11430 				else if (name == PUGIXML_TEXT("preceding-sibling"))
11431 					return axis_preceding_sibling;
11432 
11433 				break;
11434 
11435 			case 's':
11436 				if (name == PUGIXML_TEXT("self"))
11437 					return axis_self;
11438 
11439 				break;
11440 
11441 			default:
11442 				break;
11443 			}
11444 
11445 			specified = false;
11446 			return axis_child;
11447 		}
11448 
parse_node_test_typexpath_parser11449 		nodetest_t parse_node_test_type(const xpath_lexer_string& name)
11450 		{
11451 			switch (name.begin[0])
11452 			{
11453 			case 'c':
11454 				if (name == PUGIXML_TEXT("comment"))
11455 					return nodetest_type_comment;
11456 
11457 				break;
11458 
11459 			case 'n':
11460 				if (name == PUGIXML_TEXT("node"))
11461 					return nodetest_type_node;
11462 
11463 				break;
11464 
11465 			case 'p':
11466 				if (name == PUGIXML_TEXT("processing-instruction"))
11467 					return nodetest_type_pi;
11468 
11469 				break;
11470 
11471 			case 't':
11472 				if (name == PUGIXML_TEXT("text"))
11473 					return nodetest_type_text;
11474 
11475 				break;
11476 
11477 			default:
11478 				break;
11479 			}
11480 
11481 			return nodetest_none;
11482 		}
11483 
11484 		// PrimaryExpr ::= VariableReference | '(' Expr ')' | Literal | Number | FunctionCall
parse_primary_expressionxpath_parser11485 		xpath_ast_node* parse_primary_expression()
11486 		{
11487 			switch (_lexer.current())
11488 			{
11489 			case lex_var_ref:
11490 			{
11491 				xpath_lexer_string name = _lexer.contents();
11492 
11493 				if (!_variables)
11494 					return error("Unknown variable: variable set is not provided");
11495 
11496 				xpath_variable* var = 0;
11497 				if (!get_variable_scratch(_scratch, _variables, name.begin, name.end, &var))
11498 					return error_oom();
11499 
11500 				if (!var)
11501 					return error("Unknown variable: variable set does not contain the given name");
11502 
11503 				_lexer.next();
11504 
11505 				return alloc_node(ast_variable, var->type(), var);
11506 			}
11507 
11508 			case lex_open_brace:
11509 			{
11510 				_lexer.next();
11511 
11512 				xpath_ast_node* n = parse_expression();
11513 				if (!n) return 0;
11514 
11515 				if (_lexer.current() != lex_close_brace)
11516 					return error("Expected ')' to match an opening '('");
11517 
11518 				_lexer.next();
11519 
11520 				return n;
11521 			}
11522 
11523 			case lex_quoted_string:
11524 			{
11525 				const char_t* value = alloc_string(_lexer.contents());
11526 				if (!value) return 0;
11527 
11528 				_lexer.next();
11529 
11530 				return alloc_node(ast_string_constant, xpath_type_string, value);
11531 			}
11532 
11533 			case lex_number:
11534 			{
11535 				double value = 0;
11536 
11537 				if (!convert_string_to_number_scratch(_scratch, _lexer.contents().begin, _lexer.contents().end, &value))
11538 					return error_oom();
11539 
11540 				_lexer.next();
11541 
11542 				return alloc_node(ast_number_constant, xpath_type_number, value);
11543 			}
11544 
11545 			case lex_string:
11546 			{
11547 				xpath_ast_node* args[2] = {0};
11548 				size_t argc = 0;
11549 
11550 				xpath_lexer_string function = _lexer.contents();
11551 				_lexer.next();
11552 
11553 				xpath_ast_node* last_arg = 0;
11554 
11555 				if (_lexer.current() != lex_open_brace)
11556 					return error("Unrecognized function call");
11557 				_lexer.next();
11558 
11559 				size_t old_depth = _depth;
11560 
11561 				while (_lexer.current() != lex_close_brace)
11562 				{
11563 					if (argc > 0)
11564 					{
11565 						if (_lexer.current() != lex_comma)
11566 							return error("No comma between function arguments");
11567 						_lexer.next();
11568 					}
11569 
11570 					if (++_depth > xpath_ast_depth_limit)
11571 						return error_rec();
11572 
11573 					xpath_ast_node* n = parse_expression();
11574 					if (!n) return 0;
11575 
11576 					if (argc < 2) args[argc] = n;
11577 					else last_arg->set_next(n);
11578 
11579 					argc++;
11580 					last_arg = n;
11581 				}
11582 
11583 				_lexer.next();
11584 
11585 				_depth = old_depth;
11586 
11587 				return parse_function(function, argc, args);
11588 			}
11589 
11590 			default:
11591 				return error("Unrecognizable primary expression");
11592 			}
11593 		}
11594 
11595 		// FilterExpr ::= PrimaryExpr | FilterExpr Predicate
11596 		// Predicate ::= '[' PredicateExpr ']'
11597 		// PredicateExpr ::= Expr
parse_filter_expressionxpath_parser11598 		xpath_ast_node* parse_filter_expression()
11599 		{
11600 			xpath_ast_node* n = parse_primary_expression();
11601 			if (!n) return 0;
11602 
11603 			size_t old_depth = _depth;
11604 
11605 			while (_lexer.current() == lex_open_square_brace)
11606 			{
11607 				_lexer.next();
11608 
11609 				if (++_depth > xpath_ast_depth_limit)
11610 					return error_rec();
11611 
11612 				if (n->rettype() != xpath_type_node_set)
11613 					return error("Predicate has to be applied to node set");
11614 
11615 				xpath_ast_node* expr = parse_expression();
11616 				if (!expr) return 0;
11617 
11618 				n = alloc_node(ast_filter, n, expr, predicate_default);
11619 				if (!n) return 0;
11620 
11621 				if (_lexer.current() != lex_close_square_brace)
11622 					return error("Expected ']' to match an opening '['");
11623 
11624 				_lexer.next();
11625 			}
11626 
11627 			_depth = old_depth;
11628 
11629 			return n;
11630 		}
11631 
11632 		// Step ::= AxisSpecifier NodeTest Predicate* | AbbreviatedStep
11633 		// AxisSpecifier ::= AxisName '::' | '@'?
11634 		// NodeTest ::= NameTest | NodeType '(' ')' | 'processing-instruction' '(' Literal ')'
11635 		// NameTest ::= '*' | NCName ':' '*' | QName
11636 		// AbbreviatedStep ::= '.' | '..'
parse_stepxpath_parser11637 		xpath_ast_node* parse_step(xpath_ast_node* set)
11638 		{
11639 			if (set && set->rettype() != xpath_type_node_set)
11640 				return error("Step has to be applied to node set");
11641 
11642 			bool axis_specified = false;
11643 			axis_t axis = axis_child; // implied child axis
11644 
11645 			if (_lexer.current() == lex_axis_attribute)
11646 			{
11647 				axis = axis_attribute;
11648 				axis_specified = true;
11649 
11650 				_lexer.next();
11651 			}
11652 			else if (_lexer.current() == lex_dot)
11653 			{
11654 				_lexer.next();
11655 
11656 				if (_lexer.current() == lex_open_square_brace)
11657 					return error("Predicates are not allowed after an abbreviated step");
11658 
11659 				return alloc_node(ast_step, set, axis_self, nodetest_type_node, 0);
11660 			}
11661 			else if (_lexer.current() == lex_double_dot)
11662 			{
11663 				_lexer.next();
11664 
11665 				if (_lexer.current() == lex_open_square_brace)
11666 					return error("Predicates are not allowed after an abbreviated step");
11667 
11668 				return alloc_node(ast_step, set, axis_parent, nodetest_type_node, 0);
11669 			}
11670 
11671 			nodetest_t nt_type = nodetest_none;
11672 			xpath_lexer_string nt_name;
11673 
11674 			if (_lexer.current() == lex_string)
11675 			{
11676 				// node name test
11677 				nt_name = _lexer.contents();
11678 				_lexer.next();
11679 
11680 				// was it an axis name?
11681 				if (_lexer.current() == lex_double_colon)
11682 				{
11683 					// parse axis name
11684 					if (axis_specified)
11685 						return error("Two axis specifiers in one step");
11686 
11687 					axis = parse_axis_name(nt_name, axis_specified);
11688 
11689 					if (!axis_specified)
11690 						return error("Unknown axis");
11691 
11692 					// read actual node test
11693 					_lexer.next();
11694 
11695 					if (_lexer.current() == lex_multiply)
11696 					{
11697 						nt_type = nodetest_all;
11698 						nt_name = xpath_lexer_string();
11699 						_lexer.next();
11700 					}
11701 					else if (_lexer.current() == lex_string)
11702 					{
11703 						nt_name = _lexer.contents();
11704 						_lexer.next();
11705 					}
11706 					else
11707 					{
11708 						return error("Unrecognized node test");
11709 					}
11710 				}
11711 
11712 				if (nt_type == nodetest_none)
11713 				{
11714 					// node type test or processing-instruction
11715 					if (_lexer.current() == lex_open_brace)
11716 					{
11717 						_lexer.next();
11718 
11719 						if (_lexer.current() == lex_close_brace)
11720 						{
11721 							_lexer.next();
11722 
11723 							nt_type = parse_node_test_type(nt_name);
11724 
11725 							if (nt_type == nodetest_none)
11726 								return error("Unrecognized node type");
11727 
11728 							nt_name = xpath_lexer_string();
11729 						}
11730 						else if (nt_name == PUGIXML_TEXT("processing-instruction"))
11731 						{
11732 							if (_lexer.current() != lex_quoted_string)
11733 								return error("Only literals are allowed as arguments to processing-instruction()");
11734 
11735 							nt_type = nodetest_pi;
11736 							nt_name = _lexer.contents();
11737 							_lexer.next();
11738 
11739 							if (_lexer.current() != lex_close_brace)
11740 								return error("Unmatched brace near processing-instruction()");
11741 							_lexer.next();
11742 						}
11743 						else
11744 						{
11745 							return error("Unmatched brace near node type test");
11746 						}
11747 					}
11748 					// QName or NCName:*
11749 					else
11750 					{
11751 						if (nt_name.end - nt_name.begin > 2 && nt_name.end[-2] == ':' && nt_name.end[-1] == '*') // NCName:*
11752 						{
11753 							nt_name.end--; // erase *
11754 
11755 							nt_type = nodetest_all_in_namespace;
11756 						}
11757 						else
11758 						{
11759 							nt_type = nodetest_name;
11760 						}
11761 					}
11762 				}
11763 			}
11764 			else if (_lexer.current() == lex_multiply)
11765 			{
11766 				nt_type = nodetest_all;
11767 				_lexer.next();
11768 			}
11769 			else
11770 			{
11771 				return error("Unrecognized node test");
11772 			}
11773 
11774 			const char_t* nt_name_copy = alloc_string(nt_name);
11775 			if (!nt_name_copy) return 0;
11776 
11777 			xpath_ast_node* n = alloc_node(ast_step, set, axis, nt_type, nt_name_copy);
11778 			if (!n) return 0;
11779 
11780 			size_t old_depth = _depth;
11781 
11782 			xpath_ast_node* last = 0;
11783 
11784 			while (_lexer.current() == lex_open_square_brace)
11785 			{
11786 				_lexer.next();
11787 
11788 				if (++_depth > xpath_ast_depth_limit)
11789 					return error_rec();
11790 
11791 				xpath_ast_node* expr = parse_expression();
11792 				if (!expr) return 0;
11793 
11794 				xpath_ast_node* pred = alloc_node(ast_predicate, 0, expr, predicate_default);
11795 				if (!pred) return 0;
11796 
11797 				if (_lexer.current() != lex_close_square_brace)
11798 					return error("Expected ']' to match an opening '['");
11799 				_lexer.next();
11800 
11801 				if (last) last->set_next(pred);
11802 				else n->set_right(pred);
11803 
11804 				last = pred;
11805 			}
11806 
11807 			_depth = old_depth;
11808 
11809 			return n;
11810 		}
11811 
11812 		// RelativeLocationPath ::= Step | RelativeLocationPath '/' Step | RelativeLocationPath '//' Step
parse_relative_location_pathxpath_parser11813 		xpath_ast_node* parse_relative_location_path(xpath_ast_node* set)
11814 		{
11815 			xpath_ast_node* n = parse_step(set);
11816 			if (!n) return 0;
11817 
11818 			size_t old_depth = _depth;
11819 
11820 			while (_lexer.current() == lex_slash || _lexer.current() == lex_double_slash)
11821 			{
11822 				lexeme_t l = _lexer.current();
11823 				_lexer.next();
11824 
11825 				if (++_depth > xpath_ast_depth_limit)
11826 					return error_rec();
11827 
11828 				if (l == lex_double_slash)
11829 				{
11830 					n = alloc_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0);
11831 					if (!n) return 0;
11832 				}
11833 
11834 				n = parse_step(n);
11835 				if (!n) return 0;
11836 			}
11837 
11838 			_depth = old_depth;
11839 
11840 			return n;
11841 		}
11842 
11843 		// LocationPath ::= RelativeLocationPath | AbsoluteLocationPath
11844 		// AbsoluteLocationPath ::= '/' RelativeLocationPath? | '//' RelativeLocationPath
parse_location_pathxpath_parser11845 		xpath_ast_node* parse_location_path()
11846 		{
11847 			if (_lexer.current() == lex_slash)
11848 			{
11849 				_lexer.next();
11850 
11851 				xpath_ast_node* n = alloc_node(ast_step_root, xpath_type_node_set);
11852 				if (!n) return 0;
11853 
11854 				// relative location path can start from axis_attribute, dot, double_dot, multiply and string lexemes; any other lexeme means standalone root path
11855 				lexeme_t l = _lexer.current();
11856 
11857 				if (l == lex_string || l == lex_axis_attribute || l == lex_dot || l == lex_double_dot || l == lex_multiply)
11858 					return parse_relative_location_path(n);
11859 				else
11860 					return n;
11861 			}
11862 			else if (_lexer.current() == lex_double_slash)
11863 			{
11864 				_lexer.next();
11865 
11866 				xpath_ast_node* n = alloc_node(ast_step_root, xpath_type_node_set);
11867 				if (!n) return 0;
11868 
11869 				n = alloc_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0);
11870 				if (!n) return 0;
11871 
11872 				return parse_relative_location_path(n);
11873 			}
11874 
11875 			// else clause moved outside of if because of bogus warning 'control may reach end of non-void function being inlined' in gcc 4.0.1
11876 			return parse_relative_location_path(0);
11877 		}
11878 
11879 		// PathExpr ::= LocationPath
11880 		//				| FilterExpr
11881 		//				| FilterExpr '/' RelativeLocationPath
11882 		//				| FilterExpr '//' RelativeLocationPath
11883 		// UnionExpr ::= PathExpr | UnionExpr '|' PathExpr
11884 		// UnaryExpr ::= UnionExpr | '-' UnaryExpr
parse_path_or_unary_expressionxpath_parser11885 		xpath_ast_node* parse_path_or_unary_expression()
11886 		{
11887 			// Clarification.
11888 			// PathExpr begins with either LocationPath or FilterExpr.
11889 			// FilterExpr begins with PrimaryExpr
11890 			// PrimaryExpr begins with '$' in case of it being a variable reference,
11891 			// '(' in case of it being an expression, string literal, number constant or
11892 			// function call.
11893 			if (_lexer.current() == lex_var_ref || _lexer.current() == lex_open_brace ||
11894 				_lexer.current() == lex_quoted_string || _lexer.current() == lex_number ||
11895 				_lexer.current() == lex_string)
11896 			{
11897 				if (_lexer.current() == lex_string)
11898 				{
11899 					// This is either a function call, or not - if not, we shall proceed with location path
11900 					const char_t* state = _lexer.state();
11901 
11902 					while (PUGI__IS_CHARTYPE(*state, ct_space)) ++state;
11903 
11904 					if (*state != '(')
11905 						return parse_location_path();
11906 
11907 					// This looks like a function call; however this still can be a node-test. Check it.
11908 					if (parse_node_test_type(_lexer.contents()) != nodetest_none)
11909 						return parse_location_path();
11910 				}
11911 
11912 				xpath_ast_node* n = parse_filter_expression();
11913 				if (!n) return 0;
11914 
11915 				if (_lexer.current() == lex_slash || _lexer.current() == lex_double_slash)
11916 				{
11917 					lexeme_t l = _lexer.current();
11918 					_lexer.next();
11919 
11920 					if (l == lex_double_slash)
11921 					{
11922 						if (n->rettype() != xpath_type_node_set)
11923 							return error("Step has to be applied to node set");
11924 
11925 						n = alloc_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0);
11926 						if (!n) return 0;
11927 					}
11928 
11929 					// select from location path
11930 					return parse_relative_location_path(n);
11931 				}
11932 
11933 				return n;
11934 			}
11935 			else if (_lexer.current() == lex_minus)
11936 			{
11937 				_lexer.next();
11938 
11939 				// precedence 7+ - only parses union expressions
11940 				xpath_ast_node* n = parse_expression(7);
11941 				if (!n) return 0;
11942 
11943 				return alloc_node(ast_op_negate, xpath_type_number, n);
11944 			}
11945 			else
11946 			{
11947 				return parse_location_path();
11948 			}
11949 		}
11950 
11951 		struct binary_op_t
11952 		{
11953 			ast_type_t asttype;
11954 			xpath_value_type rettype;
11955 			int precedence;
11956 
binary_op_txpath_parser::binary_op_t11957 			binary_op_t(): asttype(ast_unknown), rettype(xpath_type_none), precedence(0)
11958 			{
11959 			}
11960 
binary_op_txpath_parser::binary_op_t11961 			binary_op_t(ast_type_t asttype_, xpath_value_type rettype_, int precedence_): asttype(asttype_), rettype(rettype_), precedence(precedence_)
11962 			{
11963 			}
11964 
parsexpath_parser::binary_op_t11965 			static binary_op_t parse(xpath_lexer& lexer)
11966 			{
11967 				switch (lexer.current())
11968 				{
11969 				case lex_string:
11970 					if (lexer.contents() == PUGIXML_TEXT("or"))
11971 						return binary_op_t(ast_op_or, xpath_type_boolean, 1);
11972 					else if (lexer.contents() == PUGIXML_TEXT("and"))
11973 						return binary_op_t(ast_op_and, xpath_type_boolean, 2);
11974 					else if (lexer.contents() == PUGIXML_TEXT("div"))
11975 						return binary_op_t(ast_op_divide, xpath_type_number, 6);
11976 					else if (lexer.contents() == PUGIXML_TEXT("mod"))
11977 						return binary_op_t(ast_op_mod, xpath_type_number, 6);
11978 					else
11979 						return binary_op_t();
11980 
11981 				case lex_equal:
11982 					return binary_op_t(ast_op_equal, xpath_type_boolean, 3);
11983 
11984 				case lex_not_equal:
11985 					return binary_op_t(ast_op_not_equal, xpath_type_boolean, 3);
11986 
11987 				case lex_less:
11988 					return binary_op_t(ast_op_less, xpath_type_boolean, 4);
11989 
11990 				case lex_greater:
11991 					return binary_op_t(ast_op_greater, xpath_type_boolean, 4);
11992 
11993 				case lex_less_or_equal:
11994 					return binary_op_t(ast_op_less_or_equal, xpath_type_boolean, 4);
11995 
11996 				case lex_greater_or_equal:
11997 					return binary_op_t(ast_op_greater_or_equal, xpath_type_boolean, 4);
11998 
11999 				case lex_plus:
12000 					return binary_op_t(ast_op_add, xpath_type_number, 5);
12001 
12002 				case lex_minus:
12003 					return binary_op_t(ast_op_subtract, xpath_type_number, 5);
12004 
12005 				case lex_multiply:
12006 					return binary_op_t(ast_op_multiply, xpath_type_number, 6);
12007 
12008 				case lex_union:
12009 					return binary_op_t(ast_op_union, xpath_type_node_set, 7);
12010 
12011 				default:
12012 					return binary_op_t();
12013 				}
12014 			}
12015 		};
12016 
parse_expression_recxpath_parser12017 		xpath_ast_node* parse_expression_rec(xpath_ast_node* lhs, int limit)
12018 		{
12019 			binary_op_t op = binary_op_t::parse(_lexer);
12020 
12021 			while (op.asttype != ast_unknown && op.precedence >= limit)
12022 			{
12023 				_lexer.next();
12024 
12025 				if (++_depth > xpath_ast_depth_limit)
12026 					return error_rec();
12027 
12028 				xpath_ast_node* rhs = parse_path_or_unary_expression();
12029 				if (!rhs) return 0;
12030 
12031 				binary_op_t nextop = binary_op_t::parse(_lexer);
12032 
12033 				while (nextop.asttype != ast_unknown && nextop.precedence > op.precedence)
12034 				{
12035 					rhs = parse_expression_rec(rhs, nextop.precedence);
12036 					if (!rhs) return 0;
12037 
12038 					nextop = binary_op_t::parse(_lexer);
12039 				}
12040 
12041 				if (op.asttype == ast_op_union && (lhs->rettype() != xpath_type_node_set || rhs->rettype() != xpath_type_node_set))
12042 					return error("Union operator has to be applied to node sets");
12043 
12044 				lhs = alloc_node(op.asttype, op.rettype, lhs, rhs);
12045 				if (!lhs) return 0;
12046 
12047 				op = binary_op_t::parse(_lexer);
12048 			}
12049 
12050 			return lhs;
12051 		}
12052 
12053 		// Expr ::= OrExpr
12054 		// OrExpr ::= AndExpr | OrExpr 'or' AndExpr
12055 		// AndExpr ::= EqualityExpr | AndExpr 'and' EqualityExpr
12056 		// EqualityExpr ::= RelationalExpr
12057 		//					| EqualityExpr '=' RelationalExpr
12058 		//					| EqualityExpr '!=' RelationalExpr
12059 		// RelationalExpr ::= AdditiveExpr
12060 		//					  | RelationalExpr '<' AdditiveExpr
12061 		//					  | RelationalExpr '>' AdditiveExpr
12062 		//					  | RelationalExpr '<=' AdditiveExpr
12063 		//					  | RelationalExpr '>=' AdditiveExpr
12064 		// AdditiveExpr ::= MultiplicativeExpr
12065 		//					| AdditiveExpr '+' MultiplicativeExpr
12066 		//					| AdditiveExpr '-' MultiplicativeExpr
12067 		// MultiplicativeExpr ::= UnaryExpr
12068 		//						  | MultiplicativeExpr '*' UnaryExpr
12069 		//						  | MultiplicativeExpr 'div' UnaryExpr
12070 		//						  | MultiplicativeExpr 'mod' UnaryExpr
parse_expressionxpath_parser12071 		xpath_ast_node* parse_expression(int limit = 0)
12072 		{
12073 			size_t old_depth = _depth;
12074 
12075 			if (++_depth > xpath_ast_depth_limit)
12076 				return error_rec();
12077 
12078 			xpath_ast_node* n = parse_path_or_unary_expression();
12079 			if (!n) return 0;
12080 
12081 			n = parse_expression_rec(n, limit);
12082 
12083 			_depth = old_depth;
12084 
12085 			return n;
12086 		}
12087 
xpath_parserxpath_parser12088 		xpath_parser(const char_t* query, xpath_variable_set* variables, xpath_allocator* alloc, xpath_parse_result* result): _alloc(alloc), _lexer(query), _query(query), _variables(variables), _result(result), _depth(0)
12089 		{
12090 		}
12091 
parsexpath_parser12092 		xpath_ast_node* parse()
12093 		{
12094 			xpath_ast_node* n = parse_expression();
12095 			if (!n) return 0;
12096 
12097 			assert(_depth == 0);
12098 
12099 			// check if there are unparsed tokens left
12100 			if (_lexer.current() != lex_eof)
12101 				return error("Incorrect query");
12102 
12103 			return n;
12104 		}
12105 
parsexpath_parser12106 		static xpath_ast_node* parse(const char_t* query, xpath_variable_set* variables, xpath_allocator* alloc, xpath_parse_result* result)
12107 		{
12108 			xpath_parser parser(query, variables, alloc, result);
12109 
12110 			return parser.parse();
12111 		}
12112 	};
12113 
12114 	struct xpath_query_impl
12115 	{
createxpath_query_impl12116 		static xpath_query_impl* create()
12117 		{
12118 			void* memory = xml_memory::allocate(sizeof(xpath_query_impl));
12119 			if (!memory) return 0;
12120 
12121 			return new (memory) xpath_query_impl();
12122 		}
12123 
destroyxpath_query_impl12124 		static void destroy(xpath_query_impl* impl)
12125 		{
12126 			// free all allocated pages
12127 			impl->alloc.release();
12128 
12129 			// free allocator memory (with the first page)
12130 			xml_memory::deallocate(impl);
12131 		}
12132 
xpath_query_implxpath_query_impl12133 		xpath_query_impl(): root(0), alloc(&block, &oom), oom(false)
12134 		{
12135 			block.next = 0;
12136 			block.capacity = sizeof(block.data);
12137 		}
12138 
12139 		xpath_ast_node* root;
12140 		xpath_allocator alloc;
12141 		xpath_memory_block block;
12142 		bool oom;
12143 	};
12144 
evaluate_node_set_prepare(xpath_query_impl * impl)12145 	PUGI__FN impl::xpath_ast_node* evaluate_node_set_prepare(xpath_query_impl* impl)
12146 	{
12147 		if (!impl) return 0;
12148 
12149 		if (impl->root->rettype() != xpath_type_node_set)
12150 		{
12151 		#ifdef PUGIXML_NO_EXCEPTIONS
12152 			return 0;
12153 		#else
12154 			xpath_parse_result res;
12155 			res.error = "Expression does not evaluate to node set";
12156 
12157 			throw xpath_exception(res);
12158 		#endif
12159 		}
12160 
12161 		return impl->root;
12162 	}
12163 PUGI__NS_END
12164 
12165 namespace pugi
12166 {
12167 #ifndef PUGIXML_NO_EXCEPTIONS
xpath_exception(const xpath_parse_result & result_)12168 	PUGI__FN xpath_exception::xpath_exception(const xpath_parse_result& result_): _result(result_)
12169 	{
12170 		assert(_result.error);
12171 	}
12172 
what() const12173 	PUGI__FN const char* xpath_exception::what() const throw()
12174 	{
12175 		return _result.error;
12176 	}
12177 
result() const12178 	PUGI__FN const xpath_parse_result& xpath_exception::result() const
12179 	{
12180 		return _result;
12181 	}
12182 #endif
12183 
xpath_node()12184 	PUGI__FN xpath_node::xpath_node()
12185 	{
12186 	}
12187 
xpath_node(const xml_node & node_)12188 	PUGI__FN xpath_node::xpath_node(const xml_node& node_): _node(node_)
12189 	{
12190 	}
12191 
xpath_node(const xml_attribute & attribute_,const xml_node & parent_)12192 	PUGI__FN xpath_node::xpath_node(const xml_attribute& attribute_, const xml_node& parent_): _node(attribute_ ? parent_ : xml_node()), _attribute(attribute_)
12193 	{
12194 	}
12195 
node() const12196 	PUGI__FN xml_node xpath_node::node() const
12197 	{
12198 		return _attribute ? xml_node() : _node;
12199 	}
12200 
attribute() const12201 	PUGI__FN xml_attribute xpath_node::attribute() const
12202 	{
12203 		return _attribute;
12204 	}
12205 
parent() const12206 	PUGI__FN xml_node xpath_node::parent() const
12207 	{
12208 		return _attribute ? _node : _node.parent();
12209 	}
12210 
unspecified_bool_xpath_node(xpath_node ***)12211 	PUGI__FN static void unspecified_bool_xpath_node(xpath_node***)
12212 	{
12213 	}
12214 
operator xpath_node::unspecified_bool_type() const12215 	PUGI__FN xpath_node::operator xpath_node::unspecified_bool_type() const
12216 	{
12217 		return (_node || _attribute) ? unspecified_bool_xpath_node : 0;
12218 	}
12219 
operator !() const12220 	PUGI__FN bool xpath_node::operator!() const
12221 	{
12222 		return !(_node || _attribute);
12223 	}
12224 
operator ==(const xpath_node & n) const12225 	PUGI__FN bool xpath_node::operator==(const xpath_node& n) const
12226 	{
12227 		return _node == n._node && _attribute == n._attribute;
12228 	}
12229 
operator !=(const xpath_node & n) const12230 	PUGI__FN bool xpath_node::operator!=(const xpath_node& n) const
12231 	{
12232 		return _node != n._node || _attribute != n._attribute;
12233 	}
12234 
12235 #ifdef __BORLANDC__
operator &&(const xpath_node & lhs,bool rhs)12236 	PUGI__FN bool operator&&(const xpath_node& lhs, bool rhs)
12237 	{
12238 		return (bool)lhs && rhs;
12239 	}
12240 
operator ||(const xpath_node & lhs,bool rhs)12241 	PUGI__FN bool operator||(const xpath_node& lhs, bool rhs)
12242 	{
12243 		return (bool)lhs || rhs;
12244 	}
12245 #endif
12246 
_assign(const_iterator begin_,const_iterator end_,type_t type_)12247 	PUGI__FN void xpath_node_set::_assign(const_iterator begin_, const_iterator end_, type_t type_)
12248 	{
12249 		assert(begin_ <= end_);
12250 
12251 		size_t size_ = static_cast<size_t>(end_ - begin_);
12252 
12253 		// use internal buffer for 0 or 1 elements, heap buffer otherwise
12254 		xpath_node* storage = (size_ <= 1) ? _storage : static_cast<xpath_node*>(impl::xml_memory::allocate(size_ * sizeof(xpath_node)));
12255 
12256 		if (!storage)
12257 		{
12258 		#ifdef PUGIXML_NO_EXCEPTIONS
12259 			return;
12260 		#else
12261 			throw std::bad_alloc();
12262 		#endif
12263 		}
12264 
12265 		// deallocate old buffer
12266 		if (_begin != _storage)
12267 			impl::xml_memory::deallocate(_begin);
12268 
12269 		// size check is necessary because for begin_ = end_ = nullptr, memcpy is UB
12270 		if (size_)
12271 			memcpy(storage, begin_, size_ * sizeof(xpath_node));
12272 
12273 		_begin = storage;
12274 		_end = storage + size_;
12275 		_type = type_;
12276 	}
12277 
12278 #ifdef PUGIXML_HAS_MOVE
_move(xpath_node_set & rhs)12279 	PUGI__FN void xpath_node_set::_move(xpath_node_set& rhs) PUGIXML_NOEXCEPT
12280 	{
12281 		_type = rhs._type;
12282 		_storage[0] = rhs._storage[0];
12283 		_begin = (rhs._begin == rhs._storage) ? _storage : rhs._begin;
12284 		_end = _begin + (rhs._end - rhs._begin);
12285 
12286 		rhs._type = type_unsorted;
12287 		rhs._begin = rhs._storage;
12288 		rhs._end = rhs._storage;
12289 	}
12290 #endif
12291 
xpath_node_set()12292 	PUGI__FN xpath_node_set::xpath_node_set(): _type(type_unsorted), _begin(_storage), _end(_storage)
12293 	{
12294 	}
12295 
xpath_node_set(const_iterator begin_,const_iterator end_,type_t type_)12296 	PUGI__FN xpath_node_set::xpath_node_set(const_iterator begin_, const_iterator end_, type_t type_): _type(type_unsorted), _begin(_storage), _end(_storage)
12297 	{
12298 		_assign(begin_, end_, type_);
12299 	}
12300 
~xpath_node_set()12301 	PUGI__FN xpath_node_set::~xpath_node_set()
12302 	{
12303 		if (_begin != _storage)
12304 			impl::xml_memory::deallocate(_begin);
12305 	}
12306 
xpath_node_set(const xpath_node_set & ns)12307 	PUGI__FN xpath_node_set::xpath_node_set(const xpath_node_set& ns): _type(type_unsorted), _begin(_storage), _end(_storage)
12308 	{
12309 		_assign(ns._begin, ns._end, ns._type);
12310 	}
12311 
operator =(const xpath_node_set & ns)12312 	PUGI__FN xpath_node_set& xpath_node_set::operator=(const xpath_node_set& ns)
12313 	{
12314 		if (this == &ns) return *this;
12315 
12316 		_assign(ns._begin, ns._end, ns._type);
12317 
12318 		return *this;
12319 	}
12320 
12321 #ifdef PUGIXML_HAS_MOVE
xpath_node_set(xpath_node_set && rhs)12322 	PUGI__FN xpath_node_set::xpath_node_set(xpath_node_set&& rhs) PUGIXML_NOEXCEPT: _type(type_unsorted), _begin(_storage), _end(_storage)
12323 	{
12324 		_move(rhs);
12325 	}
12326 
operator =(xpath_node_set && rhs)12327 	PUGI__FN xpath_node_set& xpath_node_set::operator=(xpath_node_set&& rhs) PUGIXML_NOEXCEPT
12328 	{
12329 		if (this == &rhs) return *this;
12330 
12331 		if (_begin != _storage)
12332 			impl::xml_memory::deallocate(_begin);
12333 
12334 		_move(rhs);
12335 
12336 		return *this;
12337 	}
12338 #endif
12339 
type() const12340 	PUGI__FN xpath_node_set::type_t xpath_node_set::type() const
12341 	{
12342 		return _type;
12343 	}
12344 
size() const12345 	PUGI__FN size_t xpath_node_set::size() const
12346 	{
12347 		return _end - _begin;
12348 	}
12349 
empty() const12350 	PUGI__FN bool xpath_node_set::empty() const
12351 	{
12352 		return _begin == _end;
12353 	}
12354 
operator [](size_t index) const12355 	PUGI__FN const xpath_node& xpath_node_set::operator[](size_t index) const
12356 	{
12357 		assert(index < size());
12358 		return _begin[index];
12359 	}
12360 
begin() const12361 	PUGI__FN xpath_node_set::const_iterator xpath_node_set::begin() const
12362 	{
12363 		return _begin;
12364 	}
12365 
end() const12366 	PUGI__FN xpath_node_set::const_iterator xpath_node_set::end() const
12367 	{
12368 		return _end;
12369 	}
12370 
sort(bool reverse)12371 	PUGI__FN void xpath_node_set::sort(bool reverse)
12372 	{
12373 		_type = impl::xpath_sort(_begin, _end, _type, reverse);
12374 	}
12375 
first() const12376 	PUGI__FN xpath_node xpath_node_set::first() const
12377 	{
12378 		return impl::xpath_first(_begin, _end, _type);
12379 	}
12380 
xpath_parse_result()12381 	PUGI__FN xpath_parse_result::xpath_parse_result(): error("Internal error"), offset(0)
12382 	{
12383 	}
12384 
operator bool() const12385 	PUGI__FN xpath_parse_result::operator bool() const
12386 	{
12387 		return error == 0;
12388 	}
12389 
description() const12390 	PUGI__FN const char* xpath_parse_result::description() const
12391 	{
12392 		return error ? error : "No error";
12393 	}
12394 
xpath_variable(xpath_value_type type_)12395 	PUGI__FN xpath_variable::xpath_variable(xpath_value_type type_): _type(type_), _next(0)
12396 	{
12397 	}
12398 
name() const12399 	PUGI__FN const char_t* xpath_variable::name() const
12400 	{
12401 		switch (_type)
12402 		{
12403 		case xpath_type_node_set:
12404 			return static_cast<const impl::xpath_variable_node_set*>(this)->name;
12405 
12406 		case xpath_type_number:
12407 			return static_cast<const impl::xpath_variable_number*>(this)->name;
12408 
12409 		case xpath_type_string:
12410 			return static_cast<const impl::xpath_variable_string*>(this)->name;
12411 
12412 		case xpath_type_boolean:
12413 			return static_cast<const impl::xpath_variable_boolean*>(this)->name;
12414 
12415 		default:
12416 			assert(false && "Invalid variable type"); // unreachable
12417 			return 0;
12418 		}
12419 	}
12420 
type() const12421 	PUGI__FN xpath_value_type xpath_variable::type() const
12422 	{
12423 		return _type;
12424 	}
12425 
get_boolean() const12426 	PUGI__FN bool xpath_variable::get_boolean() const
12427 	{
12428 		return (_type == xpath_type_boolean) ? static_cast<const impl::xpath_variable_boolean*>(this)->value : false;
12429 	}
12430 
get_number() const12431 	PUGI__FN double xpath_variable::get_number() const
12432 	{
12433 		return (_type == xpath_type_number) ? static_cast<const impl::xpath_variable_number*>(this)->value : impl::gen_nan();
12434 	}
12435 
get_string() const12436 	PUGI__FN const char_t* xpath_variable::get_string() const
12437 	{
12438 		const char_t* value = (_type == xpath_type_string) ? static_cast<const impl::xpath_variable_string*>(this)->value : 0;
12439 		return value ? value : PUGIXML_TEXT("");
12440 	}
12441 
get_node_set() const12442 	PUGI__FN const xpath_node_set& xpath_variable::get_node_set() const
12443 	{
12444 		return (_type == xpath_type_node_set) ? static_cast<const impl::xpath_variable_node_set*>(this)->value : impl::dummy_node_set;
12445 	}
12446 
set(bool value)12447 	PUGI__FN bool xpath_variable::set(bool value)
12448 	{
12449 		if (_type != xpath_type_boolean) return false;
12450 
12451 		static_cast<impl::xpath_variable_boolean*>(this)->value = value;
12452 		return true;
12453 	}
12454 
set(double value)12455 	PUGI__FN bool xpath_variable::set(double value)
12456 	{
12457 		if (_type != xpath_type_number) return false;
12458 
12459 		static_cast<impl::xpath_variable_number*>(this)->value = value;
12460 		return true;
12461 	}
12462 
set(const char_t * value)12463 	PUGI__FN bool xpath_variable::set(const char_t* value)
12464 	{
12465 		if (_type != xpath_type_string) return false;
12466 
12467 		impl::xpath_variable_string* var = static_cast<impl::xpath_variable_string*>(this);
12468 
12469 		// duplicate string
12470 		size_t size = (impl::strlength(value) + 1) * sizeof(char_t);
12471 
12472 		char_t* copy = static_cast<char_t*>(impl::xml_memory::allocate(size));
12473 		if (!copy) return false;
12474 
12475 		memcpy(copy, value, size);
12476 
12477 		// replace old string
12478 		if (var->value) impl::xml_memory::deallocate(var->value);
12479 		var->value = copy;
12480 
12481 		return true;
12482 	}
12483 
set(const xpath_node_set & value)12484 	PUGI__FN bool xpath_variable::set(const xpath_node_set& value)
12485 	{
12486 		if (_type != xpath_type_node_set) return false;
12487 
12488 		static_cast<impl::xpath_variable_node_set*>(this)->value = value;
12489 		return true;
12490 	}
12491 
xpath_variable_set()12492 	PUGI__FN xpath_variable_set::xpath_variable_set()
12493 	{
12494 		for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
12495 			_data[i] = 0;
12496 	}
12497 
~xpath_variable_set()12498 	PUGI__FN xpath_variable_set::~xpath_variable_set()
12499 	{
12500 		for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
12501 			_destroy(_data[i]);
12502 	}
12503 
xpath_variable_set(const xpath_variable_set & rhs)12504 	PUGI__FN xpath_variable_set::xpath_variable_set(const xpath_variable_set& rhs)
12505 	{
12506 		for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
12507 			_data[i] = 0;
12508 
12509 		_assign(rhs);
12510 	}
12511 
operator =(const xpath_variable_set & rhs)12512 	PUGI__FN xpath_variable_set& xpath_variable_set::operator=(const xpath_variable_set& rhs)
12513 	{
12514 		if (this == &rhs) return *this;
12515 
12516 		_assign(rhs);
12517 
12518 		return *this;
12519 	}
12520 
12521 #ifdef PUGIXML_HAS_MOVE
xpath_variable_set(xpath_variable_set && rhs)12522 	PUGI__FN xpath_variable_set::xpath_variable_set(xpath_variable_set&& rhs) PUGIXML_NOEXCEPT
12523 	{
12524 		for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
12525 		{
12526 			_data[i] = rhs._data[i];
12527 			rhs._data[i] = 0;
12528 		}
12529 	}
12530 
operator =(xpath_variable_set && rhs)12531 	PUGI__FN xpath_variable_set& xpath_variable_set::operator=(xpath_variable_set&& rhs) PUGIXML_NOEXCEPT
12532 	{
12533 		for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
12534 		{
12535 			_destroy(_data[i]);
12536 
12537 			_data[i] = rhs._data[i];
12538 			rhs._data[i] = 0;
12539 		}
12540 
12541 		return *this;
12542 	}
12543 #endif
12544 
_assign(const xpath_variable_set & rhs)12545 	PUGI__FN void xpath_variable_set::_assign(const xpath_variable_set& rhs)
12546 	{
12547 		xpath_variable_set temp;
12548 
12549 		for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
12550 			if (rhs._data[i] && !_clone(rhs._data[i], &temp._data[i]))
12551 				return;
12552 
12553 		_swap(temp);
12554 	}
12555 
_swap(xpath_variable_set & rhs)12556 	PUGI__FN void xpath_variable_set::_swap(xpath_variable_set& rhs)
12557 	{
12558 		for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
12559 		{
12560 			xpath_variable* chain = _data[i];
12561 
12562 			_data[i] = rhs._data[i];
12563 			rhs._data[i] = chain;
12564 		}
12565 	}
12566 
_find(const char_t * name) const12567 	PUGI__FN xpath_variable* xpath_variable_set::_find(const char_t* name) const
12568 	{
12569 		const size_t hash_size = sizeof(_data) / sizeof(_data[0]);
12570 		size_t hash = impl::hash_string(name) % hash_size;
12571 
12572 		// look for existing variable
12573 		for (xpath_variable* var = _data[hash]; var; var = var->_next)
12574 			if (impl::strequal(var->name(), name))
12575 				return var;
12576 
12577 		return 0;
12578 	}
12579 
_clone(xpath_variable * var,xpath_variable ** out_result)12580 	PUGI__FN bool xpath_variable_set::_clone(xpath_variable* var, xpath_variable** out_result)
12581 	{
12582 		xpath_variable* last = 0;
12583 
12584 		while (var)
12585 		{
12586 			// allocate storage for new variable
12587 			xpath_variable* nvar = impl::new_xpath_variable(var->_type, var->name());
12588 			if (!nvar) return false;
12589 
12590 			// link the variable to the result immediately to handle failures gracefully
12591 			if (last)
12592 				last->_next = nvar;
12593 			else
12594 				*out_result = nvar;
12595 
12596 			last = nvar;
12597 
12598 			// copy the value; this can fail due to out-of-memory conditions
12599 			if (!impl::copy_xpath_variable(nvar, var)) return false;
12600 
12601 			var = var->_next;
12602 		}
12603 
12604 		return true;
12605 	}
12606 
_destroy(xpath_variable * var)12607 	PUGI__FN void xpath_variable_set::_destroy(xpath_variable* var)
12608 	{
12609 		while (var)
12610 		{
12611 			xpath_variable* next = var->_next;
12612 
12613 			impl::delete_xpath_variable(var->_type, var);
12614 
12615 			var = next;
12616 		}
12617 	}
12618 
add(const char_t * name,xpath_value_type type)12619 	PUGI__FN xpath_variable* xpath_variable_set::add(const char_t* name, xpath_value_type type)
12620 	{
12621 		const size_t hash_size = sizeof(_data) / sizeof(_data[0]);
12622 		size_t hash = impl::hash_string(name) % hash_size;
12623 
12624 		// look for existing variable
12625 		for (xpath_variable* var = _data[hash]; var; var = var->_next)
12626 			if (impl::strequal(var->name(), name))
12627 				return var->type() == type ? var : 0;
12628 
12629 		// add new variable
12630 		xpath_variable* result = impl::new_xpath_variable(type, name);
12631 
12632 		if (result)
12633 		{
12634 			result->_next = _data[hash];
12635 
12636 			_data[hash] = result;
12637 		}
12638 
12639 		return result;
12640 	}
12641 
set(const char_t * name,bool value)12642 	PUGI__FN bool xpath_variable_set::set(const char_t* name, bool value)
12643 	{
12644 		xpath_variable* var = add(name, xpath_type_boolean);
12645 		return var ? var->set(value) : false;
12646 	}
12647 
set(const char_t * name,double value)12648 	PUGI__FN bool xpath_variable_set::set(const char_t* name, double value)
12649 	{
12650 		xpath_variable* var = add(name, xpath_type_number);
12651 		return var ? var->set(value) : false;
12652 	}
12653 
set(const char_t * name,const char_t * value)12654 	PUGI__FN bool xpath_variable_set::set(const char_t* name, const char_t* value)
12655 	{
12656 		xpath_variable* var = add(name, xpath_type_string);
12657 		return var ? var->set(value) : false;
12658 	}
12659 
set(const char_t * name,const xpath_node_set & value)12660 	PUGI__FN bool xpath_variable_set::set(const char_t* name, const xpath_node_set& value)
12661 	{
12662 		xpath_variable* var = add(name, xpath_type_node_set);
12663 		return var ? var->set(value) : false;
12664 	}
12665 
get(const char_t * name)12666 	PUGI__FN xpath_variable* xpath_variable_set::get(const char_t* name)
12667 	{
12668 		return _find(name);
12669 	}
12670 
get(const char_t * name) const12671 	PUGI__FN const xpath_variable* xpath_variable_set::get(const char_t* name) const
12672 	{
12673 		return _find(name);
12674 	}
12675 
xpath_query(const char_t * query,xpath_variable_set * variables)12676 	PUGI__FN xpath_query::xpath_query(const char_t* query, xpath_variable_set* variables): _impl(0)
12677 	{
12678 		impl::xpath_query_impl* qimpl = impl::xpath_query_impl::create();
12679 
12680 		if (!qimpl)
12681 		{
12682 		#ifdef PUGIXML_NO_EXCEPTIONS
12683 			_result.error = "Out of memory";
12684 		#else
12685 			throw std::bad_alloc();
12686 		#endif
12687 		}
12688 		else
12689 		{
12690 			using impl::auto_deleter; // MSVC7 workaround
12691 			auto_deleter<impl::xpath_query_impl> impl(qimpl, impl::xpath_query_impl::destroy);
12692 
12693 			qimpl->root = impl::xpath_parser::parse(query, variables, &qimpl->alloc, &_result);
12694 
12695 			if (qimpl->root)
12696 			{
12697 				qimpl->root->optimize(&qimpl->alloc);
12698 
12699 				_impl = impl.release();
12700 				_result.error = 0;
12701 			}
12702 			else
12703 			{
12704 			#ifdef PUGIXML_NO_EXCEPTIONS
12705 				if (qimpl->oom) _result.error = "Out of memory";
12706 			#else
12707 				if (qimpl->oom) throw std::bad_alloc();
12708 				throw xpath_exception(_result);
12709 			#endif
12710 			}
12711 		}
12712 	}
12713 
xpath_query()12714 	PUGI__FN xpath_query::xpath_query(): _impl(0)
12715 	{
12716 	}
12717 
~xpath_query()12718 	PUGI__FN xpath_query::~xpath_query()
12719 	{
12720 		if (_impl)
12721 			impl::xpath_query_impl::destroy(static_cast<impl::xpath_query_impl*>(_impl));
12722 	}
12723 
12724 #ifdef PUGIXML_HAS_MOVE
xpath_query(xpath_query && rhs)12725 	PUGI__FN xpath_query::xpath_query(xpath_query&& rhs) PUGIXML_NOEXCEPT
12726 	{
12727 		_impl = rhs._impl;
12728 		_result = rhs._result;
12729 		rhs._impl = 0;
12730 		rhs._result = xpath_parse_result();
12731 	}
12732 
operator =(xpath_query && rhs)12733 	PUGI__FN xpath_query& xpath_query::operator=(xpath_query&& rhs) PUGIXML_NOEXCEPT
12734 	{
12735 		if (this == &rhs) return *this;
12736 
12737 		if (_impl)
12738 			impl::xpath_query_impl::destroy(static_cast<impl::xpath_query_impl*>(_impl));
12739 
12740 		_impl = rhs._impl;
12741 		_result = rhs._result;
12742 		rhs._impl = 0;
12743 		rhs._result = xpath_parse_result();
12744 
12745 		return *this;
12746 	}
12747 #endif
12748 
return_type() const12749 	PUGI__FN xpath_value_type xpath_query::return_type() const
12750 	{
12751 		if (!_impl) return xpath_type_none;
12752 
12753 		return static_cast<impl::xpath_query_impl*>(_impl)->root->rettype();
12754 	}
12755 
evaluate_boolean(const xpath_node & n) const12756 	PUGI__FN bool xpath_query::evaluate_boolean(const xpath_node& n) const
12757 	{
12758 		if (!_impl) return false;
12759 
12760 		impl::xpath_context c(n, 1, 1);
12761 		impl::xpath_stack_data sd;
12762 
12763 		bool r = static_cast<impl::xpath_query_impl*>(_impl)->root->eval_boolean(c, sd.stack);
12764 
12765 		if (sd.oom)
12766 		{
12767 		#ifdef PUGIXML_NO_EXCEPTIONS
12768 			return false;
12769 		#else
12770 			throw std::bad_alloc();
12771 		#endif
12772 		}
12773 
12774 		return r;
12775 	}
12776 
evaluate_number(const xpath_node & n) const12777 	PUGI__FN double xpath_query::evaluate_number(const xpath_node& n) const
12778 	{
12779 		if (!_impl) return impl::gen_nan();
12780 
12781 		impl::xpath_context c(n, 1, 1);
12782 		impl::xpath_stack_data sd;
12783 
12784 		double r = static_cast<impl::xpath_query_impl*>(_impl)->root->eval_number(c, sd.stack);
12785 
12786 		if (sd.oom)
12787 		{
12788 		#ifdef PUGIXML_NO_EXCEPTIONS
12789 			return impl::gen_nan();
12790 		#else
12791 			throw std::bad_alloc();
12792 		#endif
12793 		}
12794 
12795 		return r;
12796 	}
12797 
12798 #ifndef PUGIXML_NO_STL
evaluate_string(const xpath_node & n) const12799 	PUGI__FN string_t xpath_query::evaluate_string(const xpath_node& n) const
12800 	{
12801 		if (!_impl) return string_t();
12802 
12803 		impl::xpath_context c(n, 1, 1);
12804 		impl::xpath_stack_data sd;
12805 
12806 		impl::xpath_string r = static_cast<impl::xpath_query_impl*>(_impl)->root->eval_string(c, sd.stack);
12807 
12808 		if (sd.oom)
12809 		{
12810 		#ifdef PUGIXML_NO_EXCEPTIONS
12811 			return string_t();
12812 		#else
12813 			throw std::bad_alloc();
12814 		#endif
12815 		}
12816 
12817 		return string_t(r.c_str(), r.length());
12818 	}
12819 #endif
12820 
evaluate_string(char_t * buffer,size_t capacity,const xpath_node & n) const12821 	PUGI__FN size_t xpath_query::evaluate_string(char_t* buffer, size_t capacity, const xpath_node& n) const
12822 	{
12823 		impl::xpath_context c(n, 1, 1);
12824 		impl::xpath_stack_data sd;
12825 
12826 		impl::xpath_string r = _impl ? static_cast<impl::xpath_query_impl*>(_impl)->root->eval_string(c, sd.stack) : impl::xpath_string();
12827 
12828 		if (sd.oom)
12829 		{
12830 		#ifdef PUGIXML_NO_EXCEPTIONS
12831 			r = impl::xpath_string();
12832 		#else
12833 			throw std::bad_alloc();
12834 		#endif
12835 		}
12836 
12837 		size_t full_size = r.length() + 1;
12838 
12839 		if (capacity > 0)
12840 		{
12841 			size_t size = (full_size < capacity) ? full_size : capacity;
12842 			assert(size > 0);
12843 
12844 			memcpy(buffer, r.c_str(), (size - 1) * sizeof(char_t));
12845 			buffer[size - 1] = 0;
12846 		}
12847 
12848 		return full_size;
12849 	}
12850 
evaluate_node_set(const xpath_node & n) const12851 	PUGI__FN xpath_node_set xpath_query::evaluate_node_set(const xpath_node& n) const
12852 	{
12853 		impl::xpath_ast_node* root = impl::evaluate_node_set_prepare(static_cast<impl::xpath_query_impl*>(_impl));
12854 		if (!root) return xpath_node_set();
12855 
12856 		impl::xpath_context c(n, 1, 1);
12857 		impl::xpath_stack_data sd;
12858 
12859 		impl::xpath_node_set_raw r = root->eval_node_set(c, sd.stack, impl::nodeset_eval_all);
12860 
12861 		if (sd.oom)
12862 		{
12863 		#ifdef PUGIXML_NO_EXCEPTIONS
12864 			return xpath_node_set();
12865 		#else
12866 			throw std::bad_alloc();
12867 		#endif
12868 		}
12869 
12870 		return xpath_node_set(r.begin(), r.end(), r.type());
12871 	}
12872 
evaluate_node(const xpath_node & n) const12873 	PUGI__FN xpath_node xpath_query::evaluate_node(const xpath_node& n) const
12874 	{
12875 		impl::xpath_ast_node* root = impl::evaluate_node_set_prepare(static_cast<impl::xpath_query_impl*>(_impl));
12876 		if (!root) return xpath_node();
12877 
12878 		impl::xpath_context c(n, 1, 1);
12879 		impl::xpath_stack_data sd;
12880 
12881 		impl::xpath_node_set_raw r = root->eval_node_set(c, sd.stack, impl::nodeset_eval_first);
12882 
12883 		if (sd.oom)
12884 		{
12885 		#ifdef PUGIXML_NO_EXCEPTIONS
12886 			return xpath_node();
12887 		#else
12888 			throw std::bad_alloc();
12889 		#endif
12890 		}
12891 
12892 		return r.first();
12893 	}
12894 
result() const12895 	PUGI__FN const xpath_parse_result& xpath_query::result() const
12896 	{
12897 		return _result;
12898 	}
12899 
unspecified_bool_xpath_query(xpath_query ***)12900 	PUGI__FN static void unspecified_bool_xpath_query(xpath_query***)
12901 	{
12902 	}
12903 
operator xpath_query::unspecified_bool_type() const12904 	PUGI__FN xpath_query::operator xpath_query::unspecified_bool_type() const
12905 	{
12906 		return _impl ? unspecified_bool_xpath_query : 0;
12907 	}
12908 
operator !() const12909 	PUGI__FN bool xpath_query::operator!() const
12910 	{
12911 		return !_impl;
12912 	}
12913 
select_node(const char_t * query,xpath_variable_set * variables) const12914 	PUGI__FN xpath_node xml_node::select_node(const char_t* query, xpath_variable_set* variables) const
12915 	{
12916 		xpath_query q(query, variables);
12917 		return q.evaluate_node(*this);
12918 	}
12919 
select_node(const xpath_query & query) const12920 	PUGI__FN xpath_node xml_node::select_node(const xpath_query& query) const
12921 	{
12922 		return query.evaluate_node(*this);
12923 	}
12924 
select_nodes(const char_t * query,xpath_variable_set * variables) const12925 	PUGI__FN xpath_node_set xml_node::select_nodes(const char_t* query, xpath_variable_set* variables) const
12926 	{
12927 		xpath_query q(query, variables);
12928 		return q.evaluate_node_set(*this);
12929 	}
12930 
select_nodes(const xpath_query & query) const12931 	PUGI__FN xpath_node_set xml_node::select_nodes(const xpath_query& query) const
12932 	{
12933 		return query.evaluate_node_set(*this);
12934 	}
12935 
select_single_node(const char_t * query,xpath_variable_set * variables) const12936 	PUGI__FN xpath_node xml_node::select_single_node(const char_t* query, xpath_variable_set* variables) const
12937 	{
12938 		xpath_query q(query, variables);
12939 		return q.evaluate_node(*this);
12940 	}
12941 
select_single_node(const xpath_query & query) const12942 	PUGI__FN xpath_node xml_node::select_single_node(const xpath_query& query) const
12943 	{
12944 		return query.evaluate_node(*this);
12945 	}
12946 }
12947 
12948 #endif
12949 
12950 #ifdef __BORLANDC__
12951 #	pragma option pop
12952 #endif
12953 
12954 // Intel C++ does not properly keep warning state for function templates,
12955 // so popping warning state at the end of translation unit leads to warnings in the middle.
12956 #if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
12957 #	pragma warning(pop)
12958 #endif
12959 
12960 #if defined(_MSC_VER) && defined(__c2__)
12961 #	pragma clang diagnostic pop
12962 #endif
12963 
12964 // Undefine all local macros (makes sure we're not leaking macros in header-only mode)
12965 #undef PUGI__NO_INLINE
12966 #undef PUGI__UNLIKELY
12967 #undef PUGI__STATIC_ASSERT
12968 #undef PUGI__DMC_VOLATILE
12969 #undef PUGI__UNSIGNED_OVERFLOW
12970 #undef PUGI__MSVC_CRT_VERSION
12971 #undef PUGI__SNPRINTF
12972 #undef PUGI__NS_BEGIN
12973 #undef PUGI__NS_END
12974 #undef PUGI__FN
12975 #undef PUGI__FN_NO_INLINE
12976 #undef PUGI__GETHEADER_IMPL
12977 #undef PUGI__GETPAGE_IMPL
12978 #undef PUGI__GETPAGE
12979 #undef PUGI__NODETYPE
12980 #undef PUGI__IS_CHARTYPE_IMPL
12981 #undef PUGI__IS_CHARTYPE
12982 #undef PUGI__IS_CHARTYPEX
12983 #undef PUGI__ENDSWITH
12984 #undef PUGI__SKIPWS
12985 #undef PUGI__OPTSET
12986 #undef PUGI__PUSHNODE
12987 #undef PUGI__POPNODE
12988 #undef PUGI__SCANFOR
12989 #undef PUGI__SCANWHILE
12990 #undef PUGI__SCANWHILE_UNROLL
12991 #undef PUGI__ENDSEG
12992 #undef PUGI__THROW_ERROR
12993 #undef PUGI__CHECK_ERROR
12994 
12995 #endif
12996 
12997 /**
12998  * Copyright (c) 2006-2020 Arseny Kapoulkine
12999  *
13000  * Permission is hereby granted, free of charge, to any person
13001  * obtaining a copy of this software and associated documentation
13002  * files (the "Software"), to deal in the Software without
13003  * restriction, including without limitation the rights to use,
13004  * copy, modify, merge, publish, distribute, sublicense, and/or sell
13005  * copies of the Software, and to permit persons to whom the
13006  * Software is furnished to do so, subject to the following
13007  * conditions:
13008  *
13009  * The above copyright notice and this permission notice shall be
13010  * included in all copies or substantial portions of the Software.
13011  *
13012  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
13013  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
13014  * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
13015  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
13016  * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
13017  * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
13018  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
13019  * OTHER DEALINGS IN THE SOFTWARE.
13020  */
13021