1 /**
2  * pugixml parser - version 1.11
3  * --------------------------------------------------------
4  * Copyright (C) 2006-2020, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com)
5  * Report bugs and download new versions at https://pugixml.org/
6  *
7  * This library is distributed under the MIT License. See notice at the end
8  * of this file.
9  *
10  * This work is based on the pugxml parser, which is:
11  * Copyright (C) 2003, by Kristen Wegner (kristen@tima.net)
12  */
13 
14 #ifndef SOURCE_PUGIXML_CPP
15 #define SOURCE_PUGIXML_CPP
16 
17 #include "pugixml.hpp"
18 
19 #include <stdlib.h>
20 #include <stdio.h>
21 #include <string.h>
22 #include <assert.h>
23 #include <limits.h>
24 
25 #ifdef PUGIXML_WCHAR_MODE
26 #	include <wchar.h>
27 #endif
28 
29 #ifndef PUGIXML_NO_XPATH
30 #	include <math.h>
31 #	include <float.h>
32 #endif
33 
34 #ifndef PUGIXML_NO_STL
35 #	include <istream>
36 #	include <ostream>
37 #	include <string>
38 #endif
39 
40 // For placement new
41 #include <new>
42 
43 #ifdef _MSC_VER
44 #	pragma warning(push)
45 #	pragma warning(disable: 4127) // conditional expression is constant
46 #	pragma warning(disable: 4324) // structure was padded due to __declspec(align())
47 #	pragma warning(disable: 4702) // unreachable code
48 #	pragma warning(disable: 4996) // this function or variable may be unsafe
49 #endif
50 
51 #if defined(_MSC_VER) && defined(__c2__)
52 #	pragma clang diagnostic push
53 #	pragma clang diagnostic ignored "-Wdeprecated" // this function or variable may be unsafe
54 #endif
55 
56 #ifdef __INTEL_COMPILER
57 #	pragma warning(disable: 177) // function was declared but never referenced
58 #	pragma warning(disable: 279) // controlling expression is constant
59 #	pragma warning(disable: 1478 1786) // function was declared "deprecated"
60 #	pragma warning(disable: 1684) // conversion from pointer to same-sized integral type
61 #endif
62 
63 #if defined(__BORLANDC__) && defined(PUGIXML_HEADER_ONLY)
64 #	pragma warn -8080 // symbol is declared but never used; disabling this inside push/pop bracket does not make the warning go away
65 #endif
66 
67 #ifdef __BORLANDC__
68 #	pragma option push
69 #	pragma warn -8008 // condition is always false
70 #	pragma warn -8066 // unreachable code
71 #endif
72 
73 #ifdef __SNC__
74 // Using diag_push/diag_pop does not disable the warnings inside templates due to a compiler bug
75 #	pragma diag_suppress=178 // function was declared but never referenced
76 #	pragma diag_suppress=237 // controlling expression is constant
77 #endif
78 
79 #ifdef __TI_COMPILER_VERSION__
80 #	pragma diag_suppress 179 // function was declared but never referenced
81 #endif
82 
83 // Inlining controls
84 #if defined(_MSC_VER) && _MSC_VER >= 1300
85 #	define PUGI__NO_INLINE __declspec(noinline)
86 #elif defined(__GNUC__)
87 #	define PUGI__NO_INLINE __attribute__((noinline))
88 #else
89 #	define PUGI__NO_INLINE
90 #endif
91 
92 // Branch weight controls
93 #if defined(__GNUC__) && !defined(__c2__)
94 #	define PUGI__UNLIKELY(cond) __builtin_expect(cond, 0)
95 #else
96 #	define PUGI__UNLIKELY(cond) (cond)
97 #endif
98 
99 // Simple static assertion
100 #define PUGI__STATIC_ASSERT(cond) { static const char condition_failed[(cond) ? 1 : -1] = {0}; (void)condition_failed[0]; }
101 
102 // Digital Mars C++ bug workaround for passing char loaded from memory via stack
103 #ifdef __DMC__
104 #	define PUGI__DMC_VOLATILE volatile
105 #else
106 #	define PUGI__DMC_VOLATILE
107 #endif
108 
109 // Integer sanitizer workaround; we only apply this for clang since gcc8 has no_sanitize but not unsigned-integer-overflow and produces "attribute directive ignored" warnings
110 #if defined(__clang__) && defined(__has_attribute)
111 #	if __has_attribute(no_sanitize)
112 #		define PUGI__UNSIGNED_OVERFLOW __attribute__((no_sanitize("unsigned-integer-overflow")))
113 #	else
114 #		define PUGI__UNSIGNED_OVERFLOW
115 #	endif
116 #else
117 #	define PUGI__UNSIGNED_OVERFLOW
118 #endif
119 
120 // Borland C++ bug workaround for not defining ::memcpy depending on header include order (can't always use std::memcpy because some compilers don't have it at all)
121 #if defined(__BORLANDC__) && !defined(__MEM_H_USING_LIST)
122 using std::memcpy;
123 using std::memmove;
124 using std::memset;
125 #endif
126 
127 // Some MinGW/GCC versions have headers that erroneously omit LLONG_MIN/LLONG_MAX/ULLONG_MAX definitions from limits.h in some configurations
128 #if defined(PUGIXML_HAS_LONG_LONG) && defined(__GNUC__) && !defined(LLONG_MAX) && !defined(LLONG_MIN) && !defined(ULLONG_MAX)
129 #	define LLONG_MIN (-LLONG_MAX - 1LL)
130 #	define LLONG_MAX __LONG_LONG_MAX__
131 #	define ULLONG_MAX (LLONG_MAX * 2ULL + 1ULL)
132 #endif
133 
134 // In some environments MSVC is a compiler but the CRT lacks certain MSVC-specific features
135 #if defined(_MSC_VER) && !defined(__S3E__)
136 #	define PUGI__MSVC_CRT_VERSION _MSC_VER
137 #endif
138 
139 // Not all platforms have snprintf; we define a wrapper that uses snprintf if possible. This only works with buffers with a known size.
140 #if __cplusplus >= 201103
141 #	define PUGI__SNPRINTF(buf, ...) snprintf(buf, sizeof(buf), __VA_ARGS__)
142 #elif defined(PUGI__MSVC_CRT_VERSION) && PUGI__MSVC_CRT_VERSION >= 1400
143 #	define PUGI__SNPRINTF(buf, ...) _snprintf_s(buf, _countof(buf), _TRUNCATE, __VA_ARGS__)
144 #else
145 #	define PUGI__SNPRINTF sprintf
146 #endif
147 
148 // We put implementation details into an anonymous namespace in source mode, but have to keep it in non-anonymous namespace in header-only mode to prevent binary bloat.
149 #ifdef PUGIXML_HEADER_ONLY
150 #	define PUGI__NS_BEGIN namespace pugi { namespace impl {
151 #	define PUGI__NS_END } }
152 #	define PUGI__FN inline
153 #	define PUGI__FN_NO_INLINE inline
154 #else
155 #	if defined(_MSC_VER) && _MSC_VER < 1300 // MSVC6 seems to have an amusing bug with anonymous namespaces inside namespaces
156 #		define PUGI__NS_BEGIN namespace pugi { namespace impl {
157 #		define PUGI__NS_END } }
158 #	else
159 #		define PUGI__NS_BEGIN namespace pugi { namespace impl { namespace {
160 #		define PUGI__NS_END } } }
161 #	endif
162 #	define PUGI__FN
163 #	define PUGI__FN_NO_INLINE PUGI__NO_INLINE
164 #endif
165 
166 // uintptr_t
167 #if (defined(_MSC_VER) && _MSC_VER < 1600) || (defined(__BORLANDC__) && __BORLANDC__ < 0x561)
168 namespace pugi
169 {
170 #	ifndef _UINTPTR_T_DEFINED
171 	typedef size_t uintptr_t;
172 #	endif
173 
174 	typedef unsigned __int8 uint8_t;
175 	typedef unsigned __int16 uint16_t;
176 	typedef unsigned __int32 uint32_t;
177 }
178 #else
179 #	include <stdint.h>
180 #endif
181 
182 // Memory allocation
183 PUGI__NS_BEGIN
default_allocate(size_t size)184 	PUGI__FN void* default_allocate(size_t size)
185 	{
186 		return malloc(size);
187 	}
188 
default_deallocate(void * ptr)189 	PUGI__FN void default_deallocate(void* ptr)
190 	{
191 		free(ptr);
192 	}
193 
194 	template <typename T>
195 	struct xml_memory_management_function_storage
196 	{
197 		static allocation_function allocate;
198 		static deallocation_function deallocate;
199 	};
200 
201 	// Global allocation functions are stored in class statics so that in header mode linker deduplicates them
202 	// Without a template<> we'll get multiple definitions of the same static
203 	template <typename T> allocation_function xml_memory_management_function_storage<T>::allocate = default_allocate;
204 	template <typename T> deallocation_function xml_memory_management_function_storage<T>::deallocate = default_deallocate;
205 
206 	typedef xml_memory_management_function_storage<int> xml_memory;
207 PUGI__NS_END
208 
209 // String utilities
210 PUGI__NS_BEGIN
211 	// Get string length
strlength(const char_t * s)212 	PUGI__FN size_t strlength(const char_t* s)
213 	{
214 		assert(s);
215 
216 	#ifdef PUGIXML_WCHAR_MODE
217 		return wcslen(s);
218 	#else
219 		return strlen(s);
220 	#endif
221 	}
222 
223 	// Compare two strings
strequal(const char_t * src,const char_t * dst)224 	PUGI__FN bool strequal(const char_t* src, const char_t* dst)
225 	{
226 		assert(src && dst);
227 
228 	#ifdef PUGIXML_WCHAR_MODE
229 		return wcscmp(src, dst) == 0;
230 	#else
231 		return strcmp(src, dst) == 0;
232 	#endif
233 	}
234 
235 	// Compare lhs with [rhs_begin, rhs_end)
strequalrange(const char_t * lhs,const char_t * rhs,size_t count)236 	PUGI__FN bool strequalrange(const char_t* lhs, const char_t* rhs, size_t count)
237 	{
238 		for (size_t i = 0; i < count; ++i)
239 			if (lhs[i] != rhs[i])
240 				return false;
241 
242 		return lhs[count] == 0;
243 	}
244 
245 	// Get length of wide string, even if CRT lacks wide character support
strlength_wide(const wchar_t * s)246 	PUGI__FN size_t strlength_wide(const wchar_t* s)
247 	{
248 		assert(s);
249 
250 	#ifdef PUGIXML_WCHAR_MODE
251 		return wcslen(s);
252 	#else
253 		const wchar_t* end = s;
254 		while (*end) end++;
255 		return static_cast<size_t>(end - s);
256 	#endif
257 	}
258 PUGI__NS_END
259 
260 // auto_ptr-like object for exception recovery
261 PUGI__NS_BEGIN
262 	template <typename T> struct auto_deleter
263 	{
264 		typedef void (*D)(T*);
265 
266 		T* data;
267 		D deleter;
268 
auto_deleterauto_deleter269 		auto_deleter(T* data_, D deleter_): data(data_), deleter(deleter_)
270 		{
271 		}
272 
~auto_deleterauto_deleter273 		~auto_deleter()
274 		{
275 			if (data) deleter(data);
276 		}
277 
releaseauto_deleter278 		T* release()
279 		{
280 			T* result = data;
281 			data = 0;
282 			return result;
283 		}
284 	};
285 PUGI__NS_END
286 
287 #ifdef PUGIXML_COMPACT
288 PUGI__NS_BEGIN
289 	class compact_hash_table
290 	{
291 	public:
compact_hash_table()292 		compact_hash_table(): _items(0), _capacity(0), _count(0)
293 		{
294 		}
295 
clear()296 		void clear()
297 		{
298 			if (_items)
299 			{
300 				xml_memory::deallocate(_items);
301 				_items = 0;
302 				_capacity = 0;
303 				_count = 0;
304 			}
305 		}
306 
find(const void * key)307 		void* find(const void* key)
308 		{
309 			if (_capacity == 0) return 0;
310 
311 			item_t* item = get_item(key);
312 			assert(item);
313 			assert(item->key == key || (item->key == 0 && item->value == 0));
314 
315 			return item->value;
316 		}
317 
insert(const void * key,void * value)318 		void insert(const void* key, void* value)
319 		{
320 			assert(_capacity != 0 && _count < _capacity - _capacity / 4);
321 
322 			item_t* item = get_item(key);
323 			assert(item);
324 
325 			if (item->key == 0)
326 			{
327 				_count++;
328 				item->key = key;
329 			}
330 
331 			item->value = value;
332 		}
333 
reserve(size_t extra=16)334 		bool reserve(size_t extra = 16)
335 		{
336 			if (_count + extra >= _capacity - _capacity / 4)
337 				return rehash(_count + extra);
338 
339 			return true;
340 		}
341 
342 	private:
343 		struct item_t
344 		{
345 			const void* key;
346 			void* value;
347 		};
348 
349 		item_t* _items;
350 		size_t _capacity;
351 
352 		size_t _count;
353 
354 		bool rehash(size_t count);
355 
get_item(const void * key)356 		item_t* get_item(const void* key)
357 		{
358 			assert(key);
359 			assert(_capacity > 0);
360 
361 			size_t hashmod = _capacity - 1;
362 			size_t bucket = hash(key) & hashmod;
363 
364 			for (size_t probe = 0; probe <= hashmod; ++probe)
365 			{
366 				item_t& probe_item = _items[bucket];
367 
368 				if (probe_item.key == key || probe_item.key == 0)
369 					return &probe_item;
370 
371 				// hash collision, quadratic probing
372 				bucket = (bucket + probe + 1) & hashmod;
373 			}
374 
375 			assert(false && "Hash table is full"); // unreachable
376 			return 0;
377 		}
378 
hash(const void * key)379 		static PUGI__UNSIGNED_OVERFLOW unsigned int hash(const void* key)
380 		{
381 			unsigned int h = static_cast<unsigned int>(reinterpret_cast<uintptr_t>(key) & 0xffffffff);
382 
383 			// MurmurHash3 32-bit finalizer
384 			h ^= h >> 16;
385 			h *= 0x85ebca6bu;
386 			h ^= h >> 13;
387 			h *= 0xc2b2ae35u;
388 			h ^= h >> 16;
389 
390 			return h;
391 		}
392 	};
393 
rehash(size_t count)394 	PUGI__FN_NO_INLINE bool compact_hash_table::rehash(size_t count)
395 	{
396 		size_t capacity = 32;
397 		while (count >= capacity - capacity / 4)
398 			capacity *= 2;
399 
400 		compact_hash_table rt;
401 		rt._capacity = capacity;
402 		rt._items = static_cast<item_t*>(xml_memory::allocate(sizeof(item_t) * capacity));
403 
404 		if (!rt._items)
405 			return false;
406 
407 		memset(rt._items, 0, sizeof(item_t) * capacity);
408 
409 		for (size_t i = 0; i < _capacity; ++i)
410 			if (_items[i].key)
411 				rt.insert(_items[i].key, _items[i].value);
412 
413 		if (_items)
414 			xml_memory::deallocate(_items);
415 
416 		_capacity = capacity;
417 		_items = rt._items;
418 
419 		assert(_count == rt._count);
420 
421 		return true;
422 	}
423 
424 PUGI__NS_END
425 #endif
426 
427 PUGI__NS_BEGIN
428 #ifdef PUGIXML_COMPACT
429 	static const uintptr_t xml_memory_block_alignment = 4;
430 #else
431 	static const uintptr_t xml_memory_block_alignment = sizeof(void*);
432 #endif
433 
434 	// extra metadata bits
435 	static const uintptr_t xml_memory_page_contents_shared_mask = 64;
436 	static const uintptr_t xml_memory_page_name_allocated_mask = 32;
437 	static const uintptr_t xml_memory_page_value_allocated_mask = 16;
438 	static const uintptr_t xml_memory_page_type_mask = 15;
439 
440 	// combined masks for string uniqueness
441 	static const uintptr_t xml_memory_page_name_allocated_or_shared_mask = xml_memory_page_name_allocated_mask | xml_memory_page_contents_shared_mask;
442 	static const uintptr_t xml_memory_page_value_allocated_or_shared_mask = xml_memory_page_value_allocated_mask | xml_memory_page_contents_shared_mask;
443 
444 #ifdef PUGIXML_COMPACT
445 	#define PUGI__GETHEADER_IMPL(object, page, flags) // unused
446 	#define PUGI__GETPAGE_IMPL(header) (header).get_page()
447 #else
448 	#define PUGI__GETHEADER_IMPL(object, page, flags) (((reinterpret_cast<char*>(object) - reinterpret_cast<char*>(page)) << 8) | (flags))
449 	// this macro casts pointers through void* to avoid 'cast increases required alignment of target type' warnings
450 	#define PUGI__GETPAGE_IMPL(header) static_cast<impl::xml_memory_page*>(const_cast<void*>(static_cast<const void*>(reinterpret_cast<const char*>(&header) - (header >> 8))))
451 #endif
452 
453 	#define PUGI__GETPAGE(n) PUGI__GETPAGE_IMPL((n)->header)
454 	#define PUGI__NODETYPE(n) static_cast<xml_node_type>((n)->header & impl::xml_memory_page_type_mask)
455 
456 	struct xml_allocator;
457 
458 	struct xml_memory_page
459 	{
constructxml_memory_page460 		static xml_memory_page* construct(void* memory)
461 		{
462 			xml_memory_page* result = static_cast<xml_memory_page*>(memory);
463 
464 			result->allocator = 0;
465 			result->prev = 0;
466 			result->next = 0;
467 			result->busy_size = 0;
468 			result->freed_size = 0;
469 
470 		#ifdef PUGIXML_COMPACT
471 			result->compact_string_base = 0;
472 			result->compact_shared_parent = 0;
473 			result->compact_page_marker = 0;
474 		#endif
475 
476 			return result;
477 		}
478 
479 		xml_allocator* allocator;
480 
481 		xml_memory_page* prev;
482 		xml_memory_page* next;
483 
484 		size_t busy_size;
485 		size_t freed_size;
486 
487 	#ifdef PUGIXML_COMPACT
488 		char_t* compact_string_base;
489 		void* compact_shared_parent;
490 		uint32_t* compact_page_marker;
491 	#endif
492 	};
493 
494 	static const size_t xml_memory_page_size =
495 	#ifdef PUGIXML_MEMORY_PAGE_SIZE
496 		(PUGIXML_MEMORY_PAGE_SIZE)
497 	#else
498 		32768
499 	#endif
500 		- sizeof(xml_memory_page);
501 
502 	struct xml_memory_string_header
503 	{
504 		uint16_t page_offset; // offset from page->data
505 		uint16_t full_size; // 0 if string occupies whole page
506 	};
507 
508 	struct xml_allocator
509 	{
xml_allocatorxml_allocator510 		xml_allocator(xml_memory_page* root): _root(root), _busy_size(root->busy_size)
511 		{
512 		#ifdef PUGIXML_COMPACT
513 			_hash = 0;
514 		#endif
515 		}
516 
allocate_pagexml_allocator517 		xml_memory_page* allocate_page(size_t data_size)
518 		{
519 			size_t size = sizeof(xml_memory_page) + data_size;
520 
521 			// allocate block with some alignment, leaving memory for worst-case padding
522 			void* memory = xml_memory::allocate(size);
523 			if (!memory) return 0;
524 
525 			// prepare page structure
526 			xml_memory_page* page = xml_memory_page::construct(memory);
527 			assert(page);
528 
529 			assert(this == _root->allocator);
530 			page->allocator = this;
531 
532 			return page;
533 		}
534 
deallocate_pagexml_allocator535 		static void deallocate_page(xml_memory_page* page)
536 		{
537 			xml_memory::deallocate(page);
538 		}
539 
540 		void* allocate_memory_oob(size_t size, xml_memory_page*& out_page);
541 
allocate_memoryxml_allocator542 		void* allocate_memory(size_t size, xml_memory_page*& out_page)
543 		{
544 			if (PUGI__UNLIKELY(_busy_size + size > xml_memory_page_size))
545 				return allocate_memory_oob(size, out_page);
546 
547 			void* buf = reinterpret_cast<char*>(_root) + sizeof(xml_memory_page) + _busy_size;
548 
549 			_busy_size += size;
550 
551 			out_page = _root;
552 
553 			return buf;
554 		}
555 
556 	#ifdef PUGIXML_COMPACT
allocate_objectxml_allocator557 		void* allocate_object(size_t size, xml_memory_page*& out_page)
558 		{
559 			void* result = allocate_memory(size + sizeof(uint32_t), out_page);
560 			if (!result) return 0;
561 
562 			// adjust for marker
563 			ptrdiff_t offset = static_cast<char*>(result) - reinterpret_cast<char*>(out_page->compact_page_marker);
564 
565 			if (PUGI__UNLIKELY(static_cast<uintptr_t>(offset) >= 256 * xml_memory_block_alignment))
566 			{
567 				// insert new marker
568 				uint32_t* marker = static_cast<uint32_t*>(result);
569 
570 				*marker = static_cast<uint32_t>(reinterpret_cast<char*>(marker) - reinterpret_cast<char*>(out_page));
571 				out_page->compact_page_marker = marker;
572 
573 				// since we don't reuse the page space until we reallocate it, we can just pretend that we freed the marker block
574 				// this will make sure deallocate_memory correctly tracks the size
575 				out_page->freed_size += sizeof(uint32_t);
576 
577 				return marker + 1;
578 			}
579 			else
580 			{
581 				// roll back uint32_t part
582 				_busy_size -= sizeof(uint32_t);
583 
584 				return result;
585 			}
586 		}
587 	#else
allocate_objectxml_allocator588 		void* allocate_object(size_t size, xml_memory_page*& out_page)
589 		{
590 			return allocate_memory(size, out_page);
591 		}
592 	#endif
593 
deallocate_memoryxml_allocator594 		void deallocate_memory(void* ptr, size_t size, xml_memory_page* page)
595 		{
596 			if (page == _root) page->busy_size = _busy_size;
597 
598 			assert(ptr >= reinterpret_cast<char*>(page) + sizeof(xml_memory_page) && ptr < reinterpret_cast<char*>(page) + sizeof(xml_memory_page) + page->busy_size);
599 			(void)!ptr;
600 
601 			page->freed_size += size;
602 			assert(page->freed_size <= page->busy_size);
603 
604 			if (page->freed_size == page->busy_size)
605 			{
606 				if (page->next == 0)
607 				{
608 					assert(_root == page);
609 
610 					// top page freed, just reset sizes
611 					page->busy_size = 0;
612 					page->freed_size = 0;
613 
614 				#ifdef PUGIXML_COMPACT
615 					// reset compact state to maximize efficiency
616 					page->compact_string_base = 0;
617 					page->compact_shared_parent = 0;
618 					page->compact_page_marker = 0;
619 				#endif
620 
621 					_busy_size = 0;
622 				}
623 				else
624 				{
625 					assert(_root != page);
626 					assert(page->prev);
627 
628 					// remove from the list
629 					page->prev->next = page->next;
630 					page->next->prev = page->prev;
631 
632 					// deallocate
633 					deallocate_page(page);
634 				}
635 			}
636 		}
637 
allocate_stringxml_allocator638 		char_t* allocate_string(size_t length)
639 		{
640 			static const size_t max_encoded_offset = (1 << 16) * xml_memory_block_alignment;
641 
642 			PUGI__STATIC_ASSERT(xml_memory_page_size <= max_encoded_offset);
643 
644 			// allocate memory for string and header block
645 			size_t size = sizeof(xml_memory_string_header) + length * sizeof(char_t);
646 
647 			// round size up to block alignment boundary
648 			size_t full_size = (size + (xml_memory_block_alignment - 1)) & ~(xml_memory_block_alignment - 1);
649 
650 			xml_memory_page* page;
651 			xml_memory_string_header* header = static_cast<xml_memory_string_header*>(allocate_memory(full_size, page));
652 
653 			if (!header) return 0;
654 
655 			// setup header
656 			ptrdiff_t page_offset = reinterpret_cast<char*>(header) - reinterpret_cast<char*>(page) - sizeof(xml_memory_page);
657 
658 			assert(page_offset % xml_memory_block_alignment == 0);
659 			assert(page_offset >= 0 && static_cast<size_t>(page_offset) < max_encoded_offset);
660 			header->page_offset = static_cast<uint16_t>(static_cast<size_t>(page_offset) / xml_memory_block_alignment);
661 
662 			// full_size == 0 for large strings that occupy the whole page
663 			assert(full_size % xml_memory_block_alignment == 0);
664 			assert(full_size < max_encoded_offset || (page->busy_size == full_size && page_offset == 0));
665 			header->full_size = static_cast<uint16_t>(full_size < max_encoded_offset ? full_size / xml_memory_block_alignment : 0);
666 
667 			// round-trip through void* to avoid 'cast increases required alignment of target type' warning
668 			// header is guaranteed a pointer-sized alignment, which should be enough for char_t
669 			return static_cast<char_t*>(static_cast<void*>(header + 1));
670 		}
671 
deallocate_stringxml_allocator672 		void deallocate_string(char_t* string)
673 		{
674 			// this function casts pointers through void* to avoid 'cast increases required alignment of target type' warnings
675 			// we're guaranteed the proper (pointer-sized) alignment on the input string if it was allocated via allocate_string
676 
677 			// get header
678 			xml_memory_string_header* header = static_cast<xml_memory_string_header*>(static_cast<void*>(string)) - 1;
679 			assert(header);
680 
681 			// deallocate
682 			size_t page_offset = sizeof(xml_memory_page) + header->page_offset * xml_memory_block_alignment;
683 			xml_memory_page* page = reinterpret_cast<xml_memory_page*>(static_cast<void*>(reinterpret_cast<char*>(header) - page_offset));
684 
685 			// if full_size == 0 then this string occupies the whole page
686 			size_t full_size = header->full_size == 0 ? page->busy_size : header->full_size * xml_memory_block_alignment;
687 
688 			deallocate_memory(header, full_size, page);
689 		}
690 
reservexml_allocator691 		bool reserve()
692 		{
693 		#ifdef PUGIXML_COMPACT
694 			return _hash->reserve();
695 		#else
696 			return true;
697 		#endif
698 		}
699 
700 		xml_memory_page* _root;
701 		size_t _busy_size;
702 
703 	#ifdef PUGIXML_COMPACT
704 		compact_hash_table* _hash;
705 	#endif
706 	};
707 
allocate_memory_oob(size_t size,xml_memory_page * & out_page)708 	PUGI__FN_NO_INLINE void* xml_allocator::allocate_memory_oob(size_t size, xml_memory_page*& out_page)
709 	{
710 		const size_t large_allocation_threshold = xml_memory_page_size / 4;
711 
712 		xml_memory_page* page = allocate_page(size <= large_allocation_threshold ? xml_memory_page_size : size);
713 		out_page = page;
714 
715 		if (!page) return 0;
716 
717 		if (size <= large_allocation_threshold)
718 		{
719 			_root->busy_size = _busy_size;
720 
721 			// insert page at the end of linked list
722 			page->prev = _root;
723 			_root->next = page;
724 			_root = page;
725 
726 			_busy_size = size;
727 		}
728 		else
729 		{
730 			// insert page before the end of linked list, so that it is deleted as soon as possible
731 			// the last page is not deleted even if it's empty (see deallocate_memory)
732 			assert(_root->prev);
733 
734 			page->prev = _root->prev;
735 			page->next = _root;
736 
737 			_root->prev->next = page;
738 			_root->prev = page;
739 
740 			page->busy_size = size;
741 		}
742 
743 		return reinterpret_cast<char*>(page) + sizeof(xml_memory_page);
744 	}
745 PUGI__NS_END
746 
747 #ifdef PUGIXML_COMPACT
748 PUGI__NS_BEGIN
749 	static const uintptr_t compact_alignment_log2 = 2;
750 	static const uintptr_t compact_alignment = 1 << compact_alignment_log2;
751 
752 	class compact_header
753 	{
754 	public:
compact_header(xml_memory_page * page,unsigned int flags)755 		compact_header(xml_memory_page* page, unsigned int flags)
756 		{
757 			PUGI__STATIC_ASSERT(xml_memory_block_alignment == compact_alignment);
758 
759 			ptrdiff_t offset = (reinterpret_cast<char*>(this) - reinterpret_cast<char*>(page->compact_page_marker));
760 			assert(offset % compact_alignment == 0 && static_cast<uintptr_t>(offset) < 256 * compact_alignment);
761 
762 			_page = static_cast<unsigned char>(offset >> compact_alignment_log2);
763 			_flags = static_cast<unsigned char>(flags);
764 		}
765 
operator &=(uintptr_t mod)766 		void operator&=(uintptr_t mod)
767 		{
768 			_flags &= static_cast<unsigned char>(mod);
769 		}
770 
operator |=(uintptr_t mod)771 		void operator|=(uintptr_t mod)
772 		{
773 			_flags |= static_cast<unsigned char>(mod);
774 		}
775 
operator &(uintptr_t mod) const776 		uintptr_t operator&(uintptr_t mod) const
777 		{
778 			return _flags & mod;
779 		}
780 
get_page() const781 		xml_memory_page* get_page() const
782 		{
783 			// round-trip through void* to silence 'cast increases required alignment of target type' warnings
784 			const char* page_marker = reinterpret_cast<const char*>(this) - (_page << compact_alignment_log2);
785 			const char* page = page_marker - *reinterpret_cast<const uint32_t*>(static_cast<const void*>(page_marker));
786 
787 			return const_cast<xml_memory_page*>(reinterpret_cast<const xml_memory_page*>(static_cast<const void*>(page)));
788 		}
789 
790 	private:
791 		unsigned char _page;
792 		unsigned char _flags;
793 	};
794 
compact_get_page(const void * object,int header_offset)795 	PUGI__FN xml_memory_page* compact_get_page(const void* object, int header_offset)
796 	{
797 		const compact_header* header = reinterpret_cast<const compact_header*>(static_cast<const char*>(object) - header_offset);
798 
799 		return header->get_page();
800 	}
801 
compact_get_value(const void * object)802 	template <int header_offset, typename T> PUGI__FN_NO_INLINE T* compact_get_value(const void* object)
803 	{
804 		return static_cast<T*>(compact_get_page(object, header_offset)->allocator->_hash->find(object));
805 	}
806 
compact_set_value(const void * object,T * value)807 	template <int header_offset, typename T> PUGI__FN_NO_INLINE void compact_set_value(const void* object, T* value)
808 	{
809 		compact_get_page(object, header_offset)->allocator->_hash->insert(object, value);
810 	}
811 
812 	template <typename T, int header_offset, int start = -126> class compact_pointer
813 	{
814 	public:
compact_pointer()815 		compact_pointer(): _data(0)
816 		{
817 		}
818 
operator =(const compact_pointer & rhs)819 		void operator=(const compact_pointer& rhs)
820 		{
821 			*this = rhs + 0;
822 		}
823 
operator =(T * value)824 		void operator=(T* value)
825 		{
826 			if (value)
827 			{
828 				// value is guaranteed to be compact-aligned; 'this' is not
829 				// our decoding is based on 'this' aligned to compact alignment downwards (see operator T*)
830 				// so for negative offsets (e.g. -3) we need to adjust the diff by compact_alignment - 1 to
831 				// compensate for arithmetic shift rounding for negative values
832 				ptrdiff_t diff = reinterpret_cast<char*>(value) - reinterpret_cast<char*>(this);
833 				ptrdiff_t offset = ((diff + int(compact_alignment - 1)) >> compact_alignment_log2) - start;
834 
835 				if (static_cast<uintptr_t>(offset) <= 253)
836 					_data = static_cast<unsigned char>(offset + 1);
837 				else
838 				{
839 					compact_set_value<header_offset>(this, value);
840 
841 					_data = 255;
842 				}
843 			}
844 			else
845 				_data = 0;
846 		}
847 
operator T*() const848 		operator T*() const
849 		{
850 			if (_data)
851 			{
852 				if (_data < 255)
853 				{
854 					uintptr_t base = reinterpret_cast<uintptr_t>(this) & ~(compact_alignment - 1);
855 
856 					return reinterpret_cast<T*>(base + (_data - 1 + start) * compact_alignment);
857 				}
858 				else
859 					return compact_get_value<header_offset, T>(this);
860 			}
861 			else
862 				return 0;
863 		}
864 
operator ->() const865 		T* operator->() const
866 		{
867 			return *this;
868 		}
869 
870 	private:
871 		unsigned char _data;
872 	};
873 
874 	template <typename T, int header_offset> class compact_pointer_parent
875 	{
876 	public:
compact_pointer_parent()877 		compact_pointer_parent(): _data(0)
878 		{
879 		}
880 
operator =(const compact_pointer_parent & rhs)881 		void operator=(const compact_pointer_parent& rhs)
882 		{
883 			*this = rhs + 0;
884 		}
885 
operator =(T * value)886 		void operator=(T* value)
887 		{
888 			if (value)
889 			{
890 				// value is guaranteed to be compact-aligned; 'this' is not
891 				// our decoding is based on 'this' aligned to compact alignment downwards (see operator T*)
892 				// so for negative offsets (e.g. -3) we need to adjust the diff by compact_alignment - 1 to
893 				// compensate for arithmetic shift behavior for negative values
894 				ptrdiff_t diff = reinterpret_cast<char*>(value) - reinterpret_cast<char*>(this);
895 				ptrdiff_t offset = ((diff + int(compact_alignment - 1)) >> compact_alignment_log2) + 65533;
896 
897 				if (static_cast<uintptr_t>(offset) <= 65533)
898 				{
899 					_data = static_cast<unsigned short>(offset + 1);
900 				}
901 				else
902 				{
903 					xml_memory_page* page = compact_get_page(this, header_offset);
904 
905 					if (PUGI__UNLIKELY(page->compact_shared_parent == 0))
906 						page->compact_shared_parent = value;
907 
908 					if (page->compact_shared_parent == value)
909 					{
910 						_data = 65534;
911 					}
912 					else
913 					{
914 						compact_set_value<header_offset>(this, value);
915 
916 						_data = 65535;
917 					}
918 				}
919 			}
920 			else
921 			{
922 				_data = 0;
923 			}
924 		}
925 
operator T*() const926 		operator T*() const
927 		{
928 			if (_data)
929 			{
930 				if (_data < 65534)
931 				{
932 					uintptr_t base = reinterpret_cast<uintptr_t>(this) & ~(compact_alignment - 1);
933 
934 					return reinterpret_cast<T*>(base + (_data - 1 - 65533) * compact_alignment);
935 				}
936 				else if (_data == 65534)
937 					return static_cast<T*>(compact_get_page(this, header_offset)->compact_shared_parent);
938 				else
939 					return compact_get_value<header_offset, T>(this);
940 			}
941 			else
942 				return 0;
943 		}
944 
operator ->() const945 		T* operator->() const
946 		{
947 			return *this;
948 		}
949 
950 	private:
951 		uint16_t _data;
952 	};
953 
954 	template <int header_offset, int base_offset> class compact_string
955 	{
956 	public:
compact_string()957 		compact_string(): _data(0)
958 		{
959 		}
960 
operator =(const compact_string & rhs)961 		void operator=(const compact_string& rhs)
962 		{
963 			*this = rhs + 0;
964 		}
965 
operator =(char_t * value)966 		void operator=(char_t* value)
967 		{
968 			if (value)
969 			{
970 				xml_memory_page* page = compact_get_page(this, header_offset);
971 
972 				if (PUGI__UNLIKELY(page->compact_string_base == 0))
973 					page->compact_string_base = value;
974 
975 				ptrdiff_t offset = value - page->compact_string_base;
976 
977 				if (static_cast<uintptr_t>(offset) < (65535 << 7))
978 				{
979 					// round-trip through void* to silence 'cast increases required alignment of target type' warnings
980 					uint16_t* base = reinterpret_cast<uint16_t*>(static_cast<void*>(reinterpret_cast<char*>(this) - base_offset));
981 
982 					if (*base == 0)
983 					{
984 						*base = static_cast<uint16_t>((offset >> 7) + 1);
985 						_data = static_cast<unsigned char>((offset & 127) + 1);
986 					}
987 					else
988 					{
989 						ptrdiff_t remainder = offset - ((*base - 1) << 7);
990 
991 						if (static_cast<uintptr_t>(remainder) <= 253)
992 						{
993 							_data = static_cast<unsigned char>(remainder + 1);
994 						}
995 						else
996 						{
997 							compact_set_value<header_offset>(this, value);
998 
999 							_data = 255;
1000 						}
1001 					}
1002 				}
1003 				else
1004 				{
1005 					compact_set_value<header_offset>(this, value);
1006 
1007 					_data = 255;
1008 				}
1009 			}
1010 			else
1011 			{
1012 				_data = 0;
1013 			}
1014 		}
1015 
operator char_t*() const1016 		operator char_t*() const
1017 		{
1018 			if (_data)
1019 			{
1020 				if (_data < 255)
1021 				{
1022 					xml_memory_page* page = compact_get_page(this, header_offset);
1023 
1024 					// round-trip through void* to silence 'cast increases required alignment of target type' warnings
1025 					const uint16_t* base = reinterpret_cast<const uint16_t*>(static_cast<const void*>(reinterpret_cast<const char*>(this) - base_offset));
1026 					assert(*base);
1027 
1028 					ptrdiff_t offset = ((*base - 1) << 7) + (_data - 1);
1029 
1030 					return page->compact_string_base + offset;
1031 				}
1032 				else
1033 				{
1034 					return compact_get_value<header_offset, char_t>(this);
1035 				}
1036 			}
1037 			else
1038 				return 0;
1039 		}
1040 
1041 	private:
1042 		unsigned char _data;
1043 	};
1044 PUGI__NS_END
1045 #endif
1046 
1047 #ifdef PUGIXML_COMPACT
1048 namespace pugi
1049 {
1050 	struct xml_attribute_struct
1051 	{
xml_attribute_structpugi::xml_attribute_struct1052 		xml_attribute_struct(impl::xml_memory_page* page): header(page, 0), namevalue_base(0)
1053 		{
1054 			PUGI__STATIC_ASSERT(sizeof(xml_attribute_struct) == 8);
1055 		}
1056 
1057 		impl::compact_header header;
1058 
1059 		uint16_t namevalue_base;
1060 
1061 		impl::compact_string<4, 2> name;
1062 		impl::compact_string<5, 3> value;
1063 
1064 		impl::compact_pointer<xml_attribute_struct, 6> prev_attribute_c;
1065 		impl::compact_pointer<xml_attribute_struct, 7, 0> next_attribute;
1066 	};
1067 
1068 	struct xml_node_struct
1069 	{
xml_node_structpugi::xml_node_struct1070 		xml_node_struct(impl::xml_memory_page* page, xml_node_type type): header(page, type), namevalue_base(0)
1071 		{
1072 			PUGI__STATIC_ASSERT(sizeof(xml_node_struct) == 12);
1073 		}
1074 
1075 		impl::compact_header header;
1076 
1077 		uint16_t namevalue_base;
1078 
1079 		impl::compact_string<4, 2> name;
1080 		impl::compact_string<5, 3> value;
1081 
1082 		impl::compact_pointer_parent<xml_node_struct, 6> parent;
1083 
1084 		impl::compact_pointer<xml_node_struct, 8, 0> first_child;
1085 
1086 		impl::compact_pointer<xml_node_struct,  9>    prev_sibling_c;
1087 		impl::compact_pointer<xml_node_struct, 10, 0> next_sibling;
1088 
1089 		impl::compact_pointer<xml_attribute_struct, 11, 0> first_attribute;
1090 	};
1091 }
1092 #else
1093 namespace pugi
1094 {
1095 	struct xml_attribute_struct
1096 	{
1097 		xml_attribute_struct(impl::xml_memory_page* page): name(0), value(0), prev_attribute_c(0), next_attribute(0)
1098 		{
1099 			header = PUGI__GETHEADER_IMPL(this, page, 0);
1100 		}
1101 
1102 		uintptr_t header;
1103 
1104 		char_t*	name;
1105 		char_t*	value;
1106 
1107 		xml_attribute_struct* prev_attribute_c;
1108 		xml_attribute_struct* next_attribute;
1109 	};
1110 
1111 	struct xml_node_struct
1112 	{
1113 		xml_node_struct(impl::xml_memory_page* page, xml_node_type type): name(0), value(0), parent(0), first_child(0), prev_sibling_c(0), next_sibling(0), first_attribute(0)
1114 		{
1115 			header = PUGI__GETHEADER_IMPL(this, page, type);
1116 		}
1117 
1118 		uintptr_t header;
1119 
1120 		char_t* name;
1121 		char_t* value;
1122 
1123 		xml_node_struct* parent;
1124 
1125 		xml_node_struct* first_child;
1126 
1127 		xml_node_struct* prev_sibling_c;
1128 		xml_node_struct* next_sibling;
1129 
1130 		xml_attribute_struct* first_attribute;
1131 	};
1132 }
1133 #endif
1134 
1135 PUGI__NS_BEGIN
1136 	struct xml_extra_buffer
1137 	{
1138 		char_t* buffer;
1139 		xml_extra_buffer* next;
1140 	};
1141 
1142 	struct xml_document_struct: public xml_node_struct, public xml_allocator
1143 	{
xml_document_structxml_document_struct1144 		xml_document_struct(xml_memory_page* page): xml_node_struct(page, node_document), xml_allocator(page), buffer(0), extra_buffers(0)
1145 		{
1146 		}
1147 
1148 		const char_t* buffer;
1149 
1150 		xml_extra_buffer* extra_buffers;
1151 
1152 	#ifdef PUGIXML_COMPACT
1153 		compact_hash_table hash;
1154 	#endif
1155 	};
1156 
get_allocator(const Object * object)1157 	template <typename Object> inline xml_allocator& get_allocator(const Object* object)
1158 	{
1159 		assert(object);
1160 
1161 		return *PUGI__GETPAGE(object)->allocator;
1162 	}
1163 
get_document(const Object * object)1164 	template <typename Object> inline xml_document_struct& get_document(const Object* object)
1165 	{
1166 		assert(object);
1167 
1168 		return *static_cast<xml_document_struct*>(PUGI__GETPAGE(object)->allocator);
1169 	}
1170 PUGI__NS_END
1171 
1172 // Low-level DOM operations
1173 PUGI__NS_BEGIN
allocate_attribute(xml_allocator & alloc)1174 	inline xml_attribute_struct* allocate_attribute(xml_allocator& alloc)
1175 	{
1176 		xml_memory_page* page;
1177 		void* memory = alloc.allocate_object(sizeof(xml_attribute_struct), page);
1178 		if (!memory) return 0;
1179 
1180 		return new (memory) xml_attribute_struct(page);
1181 	}
1182 
allocate_node(xml_allocator & alloc,xml_node_type type)1183 	inline xml_node_struct* allocate_node(xml_allocator& alloc, xml_node_type type)
1184 	{
1185 		xml_memory_page* page;
1186 		void* memory = alloc.allocate_object(sizeof(xml_node_struct), page);
1187 		if (!memory) return 0;
1188 
1189 		return new (memory) xml_node_struct(page, type);
1190 	}
1191 
destroy_attribute(xml_attribute_struct * a,xml_allocator & alloc)1192 	inline void destroy_attribute(xml_attribute_struct* a, xml_allocator& alloc)
1193 	{
1194 		if (a->header & impl::xml_memory_page_name_allocated_mask)
1195 			alloc.deallocate_string(a->name);
1196 
1197 		if (a->header & impl::xml_memory_page_value_allocated_mask)
1198 			alloc.deallocate_string(a->value);
1199 
1200 		alloc.deallocate_memory(a, sizeof(xml_attribute_struct), PUGI__GETPAGE(a));
1201 	}
1202 
destroy_node(xml_node_struct * n,xml_allocator & alloc)1203 	inline void destroy_node(xml_node_struct* n, xml_allocator& alloc)
1204 	{
1205 		if (n->header & impl::xml_memory_page_name_allocated_mask)
1206 			alloc.deallocate_string(n->name);
1207 
1208 		if (n->header & impl::xml_memory_page_value_allocated_mask)
1209 			alloc.deallocate_string(n->value);
1210 
1211 		for (xml_attribute_struct* attr = n->first_attribute; attr; )
1212 		{
1213 			xml_attribute_struct* next = attr->next_attribute;
1214 
1215 			destroy_attribute(attr, alloc);
1216 
1217 			attr = next;
1218 		}
1219 
1220 		for (xml_node_struct* child = n->first_child; child; )
1221 		{
1222 			xml_node_struct* next = child->next_sibling;
1223 
1224 			destroy_node(child, alloc);
1225 
1226 			child = next;
1227 		}
1228 
1229 		alloc.deallocate_memory(n, sizeof(xml_node_struct), PUGI__GETPAGE(n));
1230 	}
1231 
append_node(xml_node_struct * child,xml_node_struct * node)1232 	inline void append_node(xml_node_struct* child, xml_node_struct* node)
1233 	{
1234 		child->parent = node;
1235 
1236 		xml_node_struct* head = node->first_child;
1237 
1238 		if (head)
1239 		{
1240 			xml_node_struct* tail = head->prev_sibling_c;
1241 
1242 			tail->next_sibling = child;
1243 			child->prev_sibling_c = tail;
1244 			head->prev_sibling_c = child;
1245 		}
1246 		else
1247 		{
1248 			node->first_child = child;
1249 			child->prev_sibling_c = child;
1250 		}
1251 	}
1252 
prepend_node(xml_node_struct * child,xml_node_struct * node)1253 	inline void prepend_node(xml_node_struct* child, xml_node_struct* node)
1254 	{
1255 		child->parent = node;
1256 
1257 		xml_node_struct* head = node->first_child;
1258 
1259 		if (head)
1260 		{
1261 			child->prev_sibling_c = head->prev_sibling_c;
1262 			head->prev_sibling_c = child;
1263 		}
1264 		else
1265 			child->prev_sibling_c = child;
1266 
1267 		child->next_sibling = head;
1268 		node->first_child = child;
1269 	}
1270 
insert_node_after(xml_node_struct * child,xml_node_struct * node)1271 	inline void insert_node_after(xml_node_struct* child, xml_node_struct* node)
1272 	{
1273 		xml_node_struct* parent = node->parent;
1274 
1275 		child->parent = parent;
1276 
1277 		if (node->next_sibling)
1278 			node->next_sibling->prev_sibling_c = child;
1279 		else
1280 			parent->first_child->prev_sibling_c = child;
1281 
1282 		child->next_sibling = node->next_sibling;
1283 		child->prev_sibling_c = node;
1284 
1285 		node->next_sibling = child;
1286 	}
1287 
insert_node_before(xml_node_struct * child,xml_node_struct * node)1288 	inline void insert_node_before(xml_node_struct* child, xml_node_struct* node)
1289 	{
1290 		xml_node_struct* parent = node->parent;
1291 
1292 		child->parent = parent;
1293 
1294 		if (node->prev_sibling_c->next_sibling)
1295 			node->prev_sibling_c->next_sibling = child;
1296 		else
1297 			parent->first_child = child;
1298 
1299 		child->prev_sibling_c = node->prev_sibling_c;
1300 		child->next_sibling = node;
1301 
1302 		node->prev_sibling_c = child;
1303 	}
1304 
remove_node(xml_node_struct * node)1305 	inline void remove_node(xml_node_struct* node)
1306 	{
1307 		xml_node_struct* parent = node->parent;
1308 
1309 		if (node->next_sibling)
1310 			node->next_sibling->prev_sibling_c = node->prev_sibling_c;
1311 		else
1312 			parent->first_child->prev_sibling_c = node->prev_sibling_c;
1313 
1314 		if (node->prev_sibling_c->next_sibling)
1315 			node->prev_sibling_c->next_sibling = node->next_sibling;
1316 		else
1317 			parent->first_child = node->next_sibling;
1318 
1319 		node->parent = 0;
1320 		node->prev_sibling_c = 0;
1321 		node->next_sibling = 0;
1322 	}
1323 
append_attribute(xml_attribute_struct * attr,xml_node_struct * node)1324 	inline void append_attribute(xml_attribute_struct* attr, xml_node_struct* node)
1325 	{
1326 		xml_attribute_struct* head = node->first_attribute;
1327 
1328 		if (head)
1329 		{
1330 			xml_attribute_struct* tail = head->prev_attribute_c;
1331 
1332 			tail->next_attribute = attr;
1333 			attr->prev_attribute_c = tail;
1334 			head->prev_attribute_c = attr;
1335 		}
1336 		else
1337 		{
1338 			node->first_attribute = attr;
1339 			attr->prev_attribute_c = attr;
1340 		}
1341 	}
1342 
prepend_attribute(xml_attribute_struct * attr,xml_node_struct * node)1343 	inline void prepend_attribute(xml_attribute_struct* attr, xml_node_struct* node)
1344 	{
1345 		xml_attribute_struct* head = node->first_attribute;
1346 
1347 		if (head)
1348 		{
1349 			attr->prev_attribute_c = head->prev_attribute_c;
1350 			head->prev_attribute_c = attr;
1351 		}
1352 		else
1353 			attr->prev_attribute_c = attr;
1354 
1355 		attr->next_attribute = head;
1356 		node->first_attribute = attr;
1357 	}
1358 
insert_attribute_after(xml_attribute_struct * attr,xml_attribute_struct * place,xml_node_struct * node)1359 	inline void insert_attribute_after(xml_attribute_struct* attr, xml_attribute_struct* place, xml_node_struct* node)
1360 	{
1361 		if (place->next_attribute)
1362 			place->next_attribute->prev_attribute_c = attr;
1363 		else
1364 			node->first_attribute->prev_attribute_c = attr;
1365 
1366 		attr->next_attribute = place->next_attribute;
1367 		attr->prev_attribute_c = place;
1368 		place->next_attribute = attr;
1369 	}
1370 
insert_attribute_before(xml_attribute_struct * attr,xml_attribute_struct * place,xml_node_struct * node)1371 	inline void insert_attribute_before(xml_attribute_struct* attr, xml_attribute_struct* place, xml_node_struct* node)
1372 	{
1373 		if (place->prev_attribute_c->next_attribute)
1374 			place->prev_attribute_c->next_attribute = attr;
1375 		else
1376 			node->first_attribute = attr;
1377 
1378 		attr->prev_attribute_c = place->prev_attribute_c;
1379 		attr->next_attribute = place;
1380 		place->prev_attribute_c = attr;
1381 	}
1382 
remove_attribute(xml_attribute_struct * attr,xml_node_struct * node)1383 	inline void remove_attribute(xml_attribute_struct* attr, xml_node_struct* node)
1384 	{
1385 		if (attr->next_attribute)
1386 			attr->next_attribute->prev_attribute_c = attr->prev_attribute_c;
1387 		else
1388 			node->first_attribute->prev_attribute_c = attr->prev_attribute_c;
1389 
1390 		if (attr->prev_attribute_c->next_attribute)
1391 			attr->prev_attribute_c->next_attribute = attr->next_attribute;
1392 		else
1393 			node->first_attribute = attr->next_attribute;
1394 
1395 		attr->prev_attribute_c = 0;
1396 		attr->next_attribute = 0;
1397 	}
1398 
append_new_node(xml_node_struct * node,xml_allocator & alloc,xml_node_type type=node_element)1399 	PUGI__FN_NO_INLINE xml_node_struct* append_new_node(xml_node_struct* node, xml_allocator& alloc, xml_node_type type = node_element)
1400 	{
1401 		if (!alloc.reserve()) return 0;
1402 
1403 		xml_node_struct* child = allocate_node(alloc, type);
1404 		if (!child) return 0;
1405 
1406 		append_node(child, node);
1407 
1408 		return child;
1409 	}
1410 
append_new_attribute(xml_node_struct * node,xml_allocator & alloc)1411 	PUGI__FN_NO_INLINE xml_attribute_struct* append_new_attribute(xml_node_struct* node, xml_allocator& alloc)
1412 	{
1413 		if (!alloc.reserve()) return 0;
1414 
1415 		xml_attribute_struct* attr = allocate_attribute(alloc);
1416 		if (!attr) return 0;
1417 
1418 		append_attribute(attr, node);
1419 
1420 		return attr;
1421 	}
1422 PUGI__NS_END
1423 
1424 // Helper classes for code generation
1425 PUGI__NS_BEGIN
1426 	struct opt_false
1427 	{
1428 		enum { value = 0 };
1429 	};
1430 
1431 	struct opt_true
1432 	{
1433 		enum { value = 1 };
1434 	};
1435 PUGI__NS_END
1436 
1437 // Unicode utilities
1438 PUGI__NS_BEGIN
endian_swap(uint16_t value)1439 	inline uint16_t endian_swap(uint16_t value)
1440 	{
1441 		return static_cast<uint16_t>(((value & 0xff) << 8) | (value >> 8));
1442 	}
1443 
endian_swap(uint32_t value)1444 	inline uint32_t endian_swap(uint32_t value)
1445 	{
1446 		return ((value & 0xff) << 24) | ((value & 0xff00) << 8) | ((value & 0xff0000) >> 8) | (value >> 24);
1447 	}
1448 
1449 	struct utf8_counter
1450 	{
1451 		typedef size_t value_type;
1452 
lowutf8_counter1453 		static value_type low(value_type result, uint32_t ch)
1454 		{
1455 			// U+0000..U+007F
1456 			if (ch < 0x80) return result + 1;
1457 			// U+0080..U+07FF
1458 			else if (ch < 0x800) return result + 2;
1459 			// U+0800..U+FFFF
1460 			else return result + 3;
1461 		}
1462 
highutf8_counter1463 		static value_type high(value_type result, uint32_t)
1464 		{
1465 			// U+10000..U+10FFFF
1466 			return result + 4;
1467 		}
1468 	};
1469 
1470 	struct utf8_writer
1471 	{
1472 		typedef uint8_t* value_type;
1473 
lowutf8_writer1474 		static value_type low(value_type result, uint32_t ch)
1475 		{
1476 			// U+0000..U+007F
1477 			if (ch < 0x80)
1478 			{
1479 				*result = static_cast<uint8_t>(ch);
1480 				return result + 1;
1481 			}
1482 			// U+0080..U+07FF
1483 			else if (ch < 0x800)
1484 			{
1485 				result[0] = static_cast<uint8_t>(0xC0 | (ch >> 6));
1486 				result[1] = static_cast<uint8_t>(0x80 | (ch & 0x3F));
1487 				return result + 2;
1488 			}
1489 			// U+0800..U+FFFF
1490 			else
1491 			{
1492 				result[0] = static_cast<uint8_t>(0xE0 | (ch >> 12));
1493 				result[1] = static_cast<uint8_t>(0x80 | ((ch >> 6) & 0x3F));
1494 				result[2] = static_cast<uint8_t>(0x80 | (ch & 0x3F));
1495 				return result + 3;
1496 			}
1497 		}
1498 
highutf8_writer1499 		static value_type high(value_type result, uint32_t ch)
1500 		{
1501 			// U+10000..U+10FFFF
1502 			result[0] = static_cast<uint8_t>(0xF0 | (ch >> 18));
1503 			result[1] = static_cast<uint8_t>(0x80 | ((ch >> 12) & 0x3F));
1504 			result[2] = static_cast<uint8_t>(0x80 | ((ch >> 6) & 0x3F));
1505 			result[3] = static_cast<uint8_t>(0x80 | (ch & 0x3F));
1506 			return result + 4;
1507 		}
1508 
anyutf8_writer1509 		static value_type any(value_type result, uint32_t ch)
1510 		{
1511 			return (ch < 0x10000) ? low(result, ch) : high(result, ch);
1512 		}
1513 	};
1514 
1515 	struct utf16_counter
1516 	{
1517 		typedef size_t value_type;
1518 
lowutf16_counter1519 		static value_type low(value_type result, uint32_t)
1520 		{
1521 			return result + 1;
1522 		}
1523 
highutf16_counter1524 		static value_type high(value_type result, uint32_t)
1525 		{
1526 			return result + 2;
1527 		}
1528 	};
1529 
1530 	struct utf16_writer
1531 	{
1532 		typedef uint16_t* value_type;
1533 
lowutf16_writer1534 		static value_type low(value_type result, uint32_t ch)
1535 		{
1536 			*result = static_cast<uint16_t>(ch);
1537 
1538 			return result + 1;
1539 		}
1540 
highutf16_writer1541 		static value_type high(value_type result, uint32_t ch)
1542 		{
1543 			uint32_t msh = static_cast<uint32_t>(ch - 0x10000) >> 10;
1544 			uint32_t lsh = static_cast<uint32_t>(ch - 0x10000) & 0x3ff;
1545 
1546 			result[0] = static_cast<uint16_t>(0xD800 + msh);
1547 			result[1] = static_cast<uint16_t>(0xDC00 + lsh);
1548 
1549 			return result + 2;
1550 		}
1551 
anyutf16_writer1552 		static value_type any(value_type result, uint32_t ch)
1553 		{
1554 			return (ch < 0x10000) ? low(result, ch) : high(result, ch);
1555 		}
1556 	};
1557 
1558 	struct utf32_counter
1559 	{
1560 		typedef size_t value_type;
1561 
lowutf32_counter1562 		static value_type low(value_type result, uint32_t)
1563 		{
1564 			return result + 1;
1565 		}
1566 
highutf32_counter1567 		static value_type high(value_type result, uint32_t)
1568 		{
1569 			return result + 1;
1570 		}
1571 	};
1572 
1573 	struct utf32_writer
1574 	{
1575 		typedef uint32_t* value_type;
1576 
lowutf32_writer1577 		static value_type low(value_type result, uint32_t ch)
1578 		{
1579 			*result = ch;
1580 
1581 			return result + 1;
1582 		}
1583 
highutf32_writer1584 		static value_type high(value_type result, uint32_t ch)
1585 		{
1586 			*result = ch;
1587 
1588 			return result + 1;
1589 		}
1590 
anyutf32_writer1591 		static value_type any(value_type result, uint32_t ch)
1592 		{
1593 			*result = ch;
1594 
1595 			return result + 1;
1596 		}
1597 	};
1598 
1599 	struct latin1_writer
1600 	{
1601 		typedef uint8_t* value_type;
1602 
lowlatin1_writer1603 		static value_type low(value_type result, uint32_t ch)
1604 		{
1605 			*result = static_cast<uint8_t>(ch > 255 ? '?' : ch);
1606 
1607 			return result + 1;
1608 		}
1609 
highlatin1_writer1610 		static value_type high(value_type result, uint32_t ch)
1611 		{
1612 			(void)ch;
1613 
1614 			*result = '?';
1615 
1616 			return result + 1;
1617 		}
1618 	};
1619 
1620 	struct utf8_decoder
1621 	{
1622 		typedef uint8_t type;
1623 
processutf8_decoder1624 		template <typename Traits> static inline typename Traits::value_type process(const uint8_t* data, size_t size, typename Traits::value_type result, Traits)
1625 		{
1626 			const uint8_t utf8_byte_mask = 0x3f;
1627 
1628 			while (size)
1629 			{
1630 				uint8_t lead = *data;
1631 
1632 				// 0xxxxxxx -> U+0000..U+007F
1633 				if (lead < 0x80)
1634 				{
1635 					result = Traits::low(result, lead);
1636 					data += 1;
1637 					size -= 1;
1638 
1639 					// process aligned single-byte (ascii) blocks
1640 					if ((reinterpret_cast<uintptr_t>(data) & 3) == 0)
1641 					{
1642 						// round-trip through void* to silence 'cast increases required alignment of target type' warnings
1643 						while (size >= 4 && (*static_cast<const uint32_t*>(static_cast<const void*>(data)) & 0x80808080) == 0)
1644 						{
1645 							result = Traits::low(result, data[0]);
1646 							result = Traits::low(result, data[1]);
1647 							result = Traits::low(result, data[2]);
1648 							result = Traits::low(result, data[3]);
1649 							data += 4;
1650 							size -= 4;
1651 						}
1652 					}
1653 				}
1654 				// 110xxxxx -> U+0080..U+07FF
1655 				else if (static_cast<unsigned int>(lead - 0xC0) < 0x20 && size >= 2 && (data[1] & 0xc0) == 0x80)
1656 				{
1657 					result = Traits::low(result, ((lead & ~0xC0) << 6) | (data[1] & utf8_byte_mask));
1658 					data += 2;
1659 					size -= 2;
1660 				}
1661 				// 1110xxxx -> U+0800-U+FFFF
1662 				else if (static_cast<unsigned int>(lead - 0xE0) < 0x10 && size >= 3 && (data[1] & 0xc0) == 0x80 && (data[2] & 0xc0) == 0x80)
1663 				{
1664 					result = Traits::low(result, ((lead & ~0xE0) << 12) | ((data[1] & utf8_byte_mask) << 6) | (data[2] & utf8_byte_mask));
1665 					data += 3;
1666 					size -= 3;
1667 				}
1668 				// 11110xxx -> U+10000..U+10FFFF
1669 				else if (static_cast<unsigned int>(lead - 0xF0) < 0x08 && size >= 4 && (data[1] & 0xc0) == 0x80 && (data[2] & 0xc0) == 0x80 && (data[3] & 0xc0) == 0x80)
1670 				{
1671 					result = Traits::high(result, ((lead & ~0xF0) << 18) | ((data[1] & utf8_byte_mask) << 12) | ((data[2] & utf8_byte_mask) << 6) | (data[3] & utf8_byte_mask));
1672 					data += 4;
1673 					size -= 4;
1674 				}
1675 				// 10xxxxxx or 11111xxx -> invalid
1676 				else
1677 				{
1678 					data += 1;
1679 					size -= 1;
1680 				}
1681 			}
1682 
1683 			return result;
1684 		}
1685 	};
1686 
1687 	template <typename opt_swap> struct utf16_decoder
1688 	{
1689 		typedef uint16_t type;
1690 
processutf16_decoder1691 		template <typename Traits> static inline typename Traits::value_type process(const uint16_t* data, size_t size, typename Traits::value_type result, Traits)
1692 		{
1693 			while (size)
1694 			{
1695 				uint16_t lead = opt_swap::value ? endian_swap(*data) : *data;
1696 
1697 				// U+0000..U+D7FF
1698 				if (lead < 0xD800)
1699 				{
1700 					result = Traits::low(result, lead);
1701 					data += 1;
1702 					size -= 1;
1703 				}
1704 				// U+E000..U+FFFF
1705 				else if (static_cast<unsigned int>(lead - 0xE000) < 0x2000)
1706 				{
1707 					result = Traits::low(result, lead);
1708 					data += 1;
1709 					size -= 1;
1710 				}
1711 				// surrogate pair lead
1712 				else if (static_cast<unsigned int>(lead - 0xD800) < 0x400 && size >= 2)
1713 				{
1714 					uint16_t next = opt_swap::value ? endian_swap(data[1]) : data[1];
1715 
1716 					if (static_cast<unsigned int>(next - 0xDC00) < 0x400)
1717 					{
1718 						result = Traits::high(result, 0x10000 + ((lead & 0x3ff) << 10) + (next & 0x3ff));
1719 						data += 2;
1720 						size -= 2;
1721 					}
1722 					else
1723 					{
1724 						data += 1;
1725 						size -= 1;
1726 					}
1727 				}
1728 				else
1729 				{
1730 					data += 1;
1731 					size -= 1;
1732 				}
1733 			}
1734 
1735 			return result;
1736 		}
1737 	};
1738 
1739 	template <typename opt_swap> struct utf32_decoder
1740 	{
1741 		typedef uint32_t type;
1742 
processutf32_decoder1743 		template <typename Traits> static inline typename Traits::value_type process(const uint32_t* data, size_t size, typename Traits::value_type result, Traits)
1744 		{
1745 			while (size)
1746 			{
1747 				uint32_t lead = opt_swap::value ? endian_swap(*data) : *data;
1748 
1749 				// U+0000..U+FFFF
1750 				if (lead < 0x10000)
1751 				{
1752 					result = Traits::low(result, lead);
1753 					data += 1;
1754 					size -= 1;
1755 				}
1756 				// U+10000..U+10FFFF
1757 				else
1758 				{
1759 					result = Traits::high(result, lead);
1760 					data += 1;
1761 					size -= 1;
1762 				}
1763 			}
1764 
1765 			return result;
1766 		}
1767 	};
1768 
1769 	struct latin1_decoder
1770 	{
1771 		typedef uint8_t type;
1772 
processlatin1_decoder1773 		template <typename Traits> static inline typename Traits::value_type process(const uint8_t* data, size_t size, typename Traits::value_type result, Traits)
1774 		{
1775 			while (size)
1776 			{
1777 				result = Traits::low(result, *data);
1778 				data += 1;
1779 				size -= 1;
1780 			}
1781 
1782 			return result;
1783 		}
1784 	};
1785 
1786 	template <size_t size> struct wchar_selector;
1787 
1788 	template <> struct wchar_selector<2>
1789 	{
1790 		typedef uint16_t type;
1791 		typedef utf16_counter counter;
1792 		typedef utf16_writer writer;
1793 		typedef utf16_decoder<opt_false> decoder;
1794 	};
1795 
1796 	template <> struct wchar_selector<4>
1797 	{
1798 		typedef uint32_t type;
1799 		typedef utf32_counter counter;
1800 		typedef utf32_writer writer;
1801 		typedef utf32_decoder<opt_false> decoder;
1802 	};
1803 
1804 	typedef wchar_selector<sizeof(wchar_t)>::counter wchar_counter;
1805 	typedef wchar_selector<sizeof(wchar_t)>::writer wchar_writer;
1806 
1807 	struct wchar_decoder
1808 	{
1809 		typedef wchar_t type;
1810 
processwchar_decoder1811 		template <typename Traits> static inline typename Traits::value_type process(const wchar_t* data, size_t size, typename Traits::value_type result, Traits traits)
1812 		{
1813 			typedef wchar_selector<sizeof(wchar_t)>::decoder decoder;
1814 
1815 			return decoder::process(reinterpret_cast<const typename decoder::type*>(data), size, result, traits);
1816 		}
1817 	};
1818 
1819 #ifdef PUGIXML_WCHAR_MODE
convert_wchar_endian_swap(wchar_t * result,const wchar_t * data,size_t length)1820 	PUGI__FN void convert_wchar_endian_swap(wchar_t* result, const wchar_t* data, size_t length)
1821 	{
1822 		for (size_t i = 0; i < length; ++i)
1823 			result[i] = static_cast<wchar_t>(endian_swap(static_cast<wchar_selector<sizeof(wchar_t)>::type>(data[i])));
1824 	}
1825 #endif
1826 PUGI__NS_END
1827 
1828 PUGI__NS_BEGIN
1829 	enum chartype_t
1830 	{
1831 		ct_parse_pcdata = 1,	// \0, &, \r, <
1832 		ct_parse_attr = 2,		// \0, &, \r, ', "
1833 		ct_parse_attr_ws = 4,	// \0, &, \r, ', ", \n, tab
1834 		ct_space = 8,			// \r, \n, space, tab
1835 		ct_parse_cdata = 16,	// \0, ], >, \r
1836 		ct_parse_comment = 32,	// \0, -, >, \r
1837 		ct_symbol = 64,			// Any symbol > 127, a-z, A-Z, 0-9, _, :, -, .
1838 		ct_start_symbol = 128	// Any symbol > 127, a-z, A-Z, _, :
1839 	};
1840 
1841 	static const unsigned char chartype_table[256] =
1842 	{
1843 		55,  0,   0,   0,   0,   0,   0,   0,      0,   12,  12,  0,   0,   63,  0,   0,   // 0-15
1844 		0,   0,   0,   0,   0,   0,   0,   0,      0,   0,   0,   0,   0,   0,   0,   0,   // 16-31
1845 		8,   0,   6,   0,   0,   0,   7,   6,      0,   0,   0,   0,   0,   96,  64,  0,   // 32-47
1846 		64,  64,  64,  64,  64,  64,  64,  64,     64,  64,  192, 0,   1,   0,   48,  0,   // 48-63
1847 		0,   192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 192, 192, 192, 192, 192, // 64-79
1848 		192, 192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 0,   0,   16,  0,   192, // 80-95
1849 		0,   192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 192, 192, 192, 192, 192, // 96-111
1850 		192, 192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 0, 0, 0, 0, 0,           // 112-127
1851 
1852 		192, 192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 192, 192, 192, 192, 192, // 128+
1853 		192, 192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 192, 192, 192, 192, 192,
1854 		192, 192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 192, 192, 192, 192, 192,
1855 		192, 192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 192, 192, 192, 192, 192,
1856 		192, 192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 192, 192, 192, 192, 192,
1857 		192, 192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 192, 192, 192, 192, 192,
1858 		192, 192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 192, 192, 192, 192, 192,
1859 		192, 192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 192, 192, 192, 192, 192
1860 	};
1861 
1862 	enum chartypex_t
1863 	{
1864 		ctx_special_pcdata = 1,   // Any symbol >= 0 and < 32 (except \t, \r, \n), &, <, >
1865 		ctx_special_attr = 2,     // Any symbol >= 0 and < 32, &, <, ", '
1866 		ctx_start_symbol = 4,	  // Any symbol > 127, a-z, A-Z, _
1867 		ctx_digit = 8,			  // 0-9
1868 		ctx_symbol = 16			  // Any symbol > 127, a-z, A-Z, 0-9, _, -, .
1869 	};
1870 
1871 	static const unsigned char chartypex_table[256] =
1872 	{
1873 		3,  3,  3,  3,  3,  3,  3,  3,     3,  2,  2,  3,  3,  2,  3,  3,     // 0-15
1874 		3,  3,  3,  3,  3,  3,  3,  3,     3,  3,  3,  3,  3,  3,  3,  3,     // 16-31
1875 		0,  0,  2,  0,  0,  0,  3,  2,     0,  0,  0,  0,  0, 16, 16,  0,     // 32-47
1876 		24, 24, 24, 24, 24, 24, 24, 24,    24, 24, 0,  0,  3,  0,  1,  0,     // 48-63
1877 
1878 		0,  20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 20, 20, 20, 20, 20,    // 64-79
1879 		20, 20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 0,  0,  0,  0,  20,    // 80-95
1880 		0,  20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 20, 20, 20, 20, 20,    // 96-111
1881 		20, 20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 0,  0,  0,  0,  0,     // 112-127
1882 
1883 		20, 20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 20, 20, 20, 20, 20,    // 128+
1884 		20, 20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 20, 20, 20, 20, 20,
1885 		20, 20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 20, 20, 20, 20, 20,
1886 		20, 20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 20, 20, 20, 20, 20,
1887 		20, 20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 20, 20, 20, 20, 20,
1888 		20, 20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 20, 20, 20, 20, 20,
1889 		20, 20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 20, 20, 20, 20, 20,
1890 		20, 20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 20, 20, 20, 20, 20
1891 	};
1892 
1893 #ifdef PUGIXML_WCHAR_MODE
1894 	#define PUGI__IS_CHARTYPE_IMPL(c, ct, table) ((static_cast<unsigned int>(c) < 128 ? table[static_cast<unsigned int>(c)] : table[128]) & (ct))
1895 #else
1896 	#define PUGI__IS_CHARTYPE_IMPL(c, ct, table) (table[static_cast<unsigned char>(c)] & (ct))
1897 #endif
1898 
1899 	#define PUGI__IS_CHARTYPE(c, ct) PUGI__IS_CHARTYPE_IMPL(c, ct, chartype_table)
1900 	#define PUGI__IS_CHARTYPEX(c, ct) PUGI__IS_CHARTYPE_IMPL(c, ct, chartypex_table)
1901 
is_little_endian()1902 	PUGI__FN bool is_little_endian()
1903 	{
1904 		unsigned int ui = 1;
1905 
1906 		return *reinterpret_cast<unsigned char*>(&ui) == 1;
1907 	}
1908 
get_wchar_encoding()1909 	PUGI__FN xml_encoding get_wchar_encoding()
1910 	{
1911 		PUGI__STATIC_ASSERT(sizeof(wchar_t) == 2 || sizeof(wchar_t) == 4);
1912 
1913 		if (sizeof(wchar_t) == 2)
1914 			return is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
1915 		else
1916 			return is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
1917 	}
1918 
parse_declaration_encoding(const uint8_t * data,size_t size,const uint8_t * & out_encoding,size_t & out_length)1919 	PUGI__FN bool parse_declaration_encoding(const uint8_t* data, size_t size, const uint8_t*& out_encoding, size_t& out_length)
1920 	{
1921 	#define PUGI__SCANCHAR(ch) { if (offset >= size || data[offset] != ch) return false; offset++; }
1922 	#define PUGI__SCANCHARTYPE(ct) { while (offset < size && PUGI__IS_CHARTYPE(data[offset], ct)) offset++; }
1923 
1924 		// check if we have a non-empty XML declaration
1925 		if (size < 6 || !((data[0] == '<') & (data[1] == '?') & (data[2] == 'x') & (data[3] == 'm') & (data[4] == 'l') && PUGI__IS_CHARTYPE(data[5], ct_space)))
1926 			return false;
1927 
1928 		// scan XML declaration until the encoding field
1929 		for (size_t i = 6; i + 1 < size; ++i)
1930 		{
1931 			// declaration can not contain ? in quoted values
1932 			if (data[i] == '?')
1933 				return false;
1934 
1935 			if (data[i] == 'e' && data[i + 1] == 'n')
1936 			{
1937 				size_t offset = i;
1938 
1939 				// encoding follows the version field which can't contain 'en' so this has to be the encoding if XML is well formed
1940 				PUGI__SCANCHAR('e'); PUGI__SCANCHAR('n'); PUGI__SCANCHAR('c'); PUGI__SCANCHAR('o');
1941 				PUGI__SCANCHAR('d'); PUGI__SCANCHAR('i'); PUGI__SCANCHAR('n'); PUGI__SCANCHAR('g');
1942 
1943 				// S? = S?
1944 				PUGI__SCANCHARTYPE(ct_space);
1945 				PUGI__SCANCHAR('=');
1946 				PUGI__SCANCHARTYPE(ct_space);
1947 
1948 				// the only two valid delimiters are ' and "
1949 				uint8_t delimiter = (offset < size && data[offset] == '"') ? '"' : '\'';
1950 
1951 				PUGI__SCANCHAR(delimiter);
1952 
1953 				size_t start = offset;
1954 
1955 				out_encoding = data + offset;
1956 
1957 				PUGI__SCANCHARTYPE(ct_symbol);
1958 
1959 				out_length = offset - start;
1960 
1961 				PUGI__SCANCHAR(delimiter);
1962 
1963 				return true;
1964 			}
1965 		}
1966 
1967 		return false;
1968 
1969 	#undef PUGI__SCANCHAR
1970 	#undef PUGI__SCANCHARTYPE
1971 	}
1972 
guess_buffer_encoding(const uint8_t * data,size_t size)1973 	PUGI__FN xml_encoding guess_buffer_encoding(const uint8_t* data, size_t size)
1974 	{
1975 		// skip encoding autodetection if input buffer is too small
1976 		if (size < 4) return encoding_utf8;
1977 
1978 		uint8_t d0 = data[0], d1 = data[1], d2 = data[2], d3 = data[3];
1979 
1980 		// look for BOM in first few bytes
1981 		if (d0 == 0 && d1 == 0 && d2 == 0xfe && d3 == 0xff) return encoding_utf32_be;
1982 		if (d0 == 0xff && d1 == 0xfe && d2 == 0 && d3 == 0) return encoding_utf32_le;
1983 		if (d0 == 0xfe && d1 == 0xff) return encoding_utf16_be;
1984 		if (d0 == 0xff && d1 == 0xfe) return encoding_utf16_le;
1985 		if (d0 == 0xef && d1 == 0xbb && d2 == 0xbf) return encoding_utf8;
1986 
1987 		// look for <, <? or <?xm in various encodings
1988 		if (d0 == 0 && d1 == 0 && d2 == 0 && d3 == 0x3c) return encoding_utf32_be;
1989 		if (d0 == 0x3c && d1 == 0 && d2 == 0 && d3 == 0) return encoding_utf32_le;
1990 		if (d0 == 0 && d1 == 0x3c && d2 == 0 && d3 == 0x3f) return encoding_utf16_be;
1991 		if (d0 == 0x3c && d1 == 0 && d2 == 0x3f && d3 == 0) return encoding_utf16_le;
1992 
1993 		// look for utf16 < followed by node name (this may fail, but is better than utf8 since it's zero terminated so early)
1994 		if (d0 == 0 && d1 == 0x3c) return encoding_utf16_be;
1995 		if (d0 == 0x3c && d1 == 0) return encoding_utf16_le;
1996 
1997 		// no known BOM detected; parse declaration
1998 		const uint8_t* enc = 0;
1999 		size_t enc_length = 0;
2000 
2001 		if (d0 == 0x3c && d1 == 0x3f && d2 == 0x78 && d3 == 0x6d && parse_declaration_encoding(data, size, enc, enc_length))
2002 		{
2003 			// iso-8859-1 (case-insensitive)
2004 			if (enc_length == 10
2005 				&& (enc[0] | ' ') == 'i' && (enc[1] | ' ') == 's' && (enc[2] | ' ') == 'o'
2006 				&& enc[3] == '-' && enc[4] == '8' && enc[5] == '8' && enc[6] == '5' && enc[7] == '9'
2007 				&& enc[8] == '-' && enc[9] == '1')
2008 				return encoding_latin1;
2009 
2010 			// latin1 (case-insensitive)
2011 			if (enc_length == 6
2012 				&& (enc[0] | ' ') == 'l' && (enc[1] | ' ') == 'a' && (enc[2] | ' ') == 't'
2013 				&& (enc[3] | ' ') == 'i' && (enc[4] | ' ') == 'n'
2014 				&& enc[5] == '1')
2015 				return encoding_latin1;
2016 		}
2017 
2018 		return encoding_utf8;
2019 	}
2020 
get_buffer_encoding(xml_encoding encoding,const void * contents,size_t size)2021 	PUGI__FN xml_encoding get_buffer_encoding(xml_encoding encoding, const void* contents, size_t size)
2022 	{
2023 		// replace wchar encoding with utf implementation
2024 		if (encoding == encoding_wchar) return get_wchar_encoding();
2025 
2026 		// replace utf16 encoding with utf16 with specific endianness
2027 		if (encoding == encoding_utf16) return is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
2028 
2029 		// replace utf32 encoding with utf32 with specific endianness
2030 		if (encoding == encoding_utf32) return is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
2031 
2032 		// only do autodetection if no explicit encoding is requested
2033 		if (encoding != encoding_auto) return encoding;
2034 
2035 		// try to guess encoding (based on XML specification, Appendix F.1)
2036 		const uint8_t* data = static_cast<const uint8_t*>(contents);
2037 
2038 		return guess_buffer_encoding(data, size);
2039 	}
2040 
get_mutable_buffer(char_t * & out_buffer,size_t & out_length,const void * contents,size_t size,bool is_mutable)2041 	PUGI__FN bool get_mutable_buffer(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable)
2042 	{
2043 		size_t length = size / sizeof(char_t);
2044 
2045 		if (is_mutable)
2046 		{
2047 			out_buffer = static_cast<char_t*>(const_cast<void*>(contents));
2048 			out_length = length;
2049 		}
2050 		else
2051 		{
2052 			char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
2053 			if (!buffer) return false;
2054 
2055 			if (contents)
2056 				memcpy(buffer, contents, length * sizeof(char_t));
2057 			else
2058 				assert(length == 0);
2059 
2060 			buffer[length] = 0;
2061 
2062 			out_buffer = buffer;
2063 			out_length = length + 1;
2064 		}
2065 
2066 		return true;
2067 	}
2068 
2069 #ifdef PUGIXML_WCHAR_MODE
need_endian_swap_utf(xml_encoding le,xml_encoding re)2070 	PUGI__FN bool need_endian_swap_utf(xml_encoding le, xml_encoding re)
2071 	{
2072 		return (le == encoding_utf16_be && re == encoding_utf16_le) || (le == encoding_utf16_le && re == encoding_utf16_be) ||
2073 			   (le == encoding_utf32_be && re == encoding_utf32_le) || (le == encoding_utf32_le && re == encoding_utf32_be);
2074 	}
2075 
convert_buffer_endian_swap(char_t * & out_buffer,size_t & out_length,const void * contents,size_t size,bool is_mutable)2076 	PUGI__FN bool convert_buffer_endian_swap(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable)
2077 	{
2078 		const char_t* data = static_cast<const char_t*>(contents);
2079 		size_t length = size / sizeof(char_t);
2080 
2081 		if (is_mutable)
2082 		{
2083 			char_t* buffer = const_cast<char_t*>(data);
2084 
2085 			convert_wchar_endian_swap(buffer, data, length);
2086 
2087 			out_buffer = buffer;
2088 			out_length = length;
2089 		}
2090 		else
2091 		{
2092 			char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
2093 			if (!buffer) return false;
2094 
2095 			convert_wchar_endian_swap(buffer, data, length);
2096 			buffer[length] = 0;
2097 
2098 			out_buffer = buffer;
2099 			out_length = length + 1;
2100 		}
2101 
2102 		return true;
2103 	}
2104 
convert_buffer_generic(char_t * & out_buffer,size_t & out_length,const void * contents,size_t size,D)2105 	template <typename D> PUGI__FN bool convert_buffer_generic(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, D)
2106 	{
2107 		const typename D::type* data = static_cast<const typename D::type*>(contents);
2108 		size_t data_length = size / sizeof(typename D::type);
2109 
2110 		// first pass: get length in wchar_t units
2111 		size_t length = D::process(data, data_length, 0, wchar_counter());
2112 
2113 		// allocate buffer of suitable length
2114 		char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
2115 		if (!buffer) return false;
2116 
2117 		// second pass: convert utf16 input to wchar_t
2118 		wchar_writer::value_type obegin = reinterpret_cast<wchar_writer::value_type>(buffer);
2119 		wchar_writer::value_type oend = D::process(data, data_length, obegin, wchar_writer());
2120 
2121 		assert(oend == obegin + length);
2122 		*oend = 0;
2123 
2124 		out_buffer = buffer;
2125 		out_length = length + 1;
2126 
2127 		return true;
2128 	}
2129 
convert_buffer(char_t * & out_buffer,size_t & out_length,xml_encoding encoding,const void * contents,size_t size,bool is_mutable)2130 	PUGI__FN bool convert_buffer(char_t*& out_buffer, size_t& out_length, xml_encoding encoding, const void* contents, size_t size, bool is_mutable)
2131 	{
2132 		// get native encoding
2133 		xml_encoding wchar_encoding = get_wchar_encoding();
2134 
2135 		// fast path: no conversion required
2136 		if (encoding == wchar_encoding)
2137 			return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable);
2138 
2139 		// only endian-swapping is required
2140 		if (need_endian_swap_utf(encoding, wchar_encoding))
2141 			return convert_buffer_endian_swap(out_buffer, out_length, contents, size, is_mutable);
2142 
2143 		// source encoding is utf8
2144 		if (encoding == encoding_utf8)
2145 			return convert_buffer_generic(out_buffer, out_length, contents, size, utf8_decoder());
2146 
2147 		// source encoding is utf16
2148 		if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
2149 		{
2150 			xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
2151 
2152 			return (native_encoding == encoding) ?
2153 				convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder<opt_false>()) :
2154 				convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder<opt_true>());
2155 		}
2156 
2157 		// source encoding is utf32
2158 		if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)
2159 		{
2160 			xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
2161 
2162 			return (native_encoding == encoding) ?
2163 				convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder<opt_false>()) :
2164 				convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder<opt_true>());
2165 		}
2166 
2167 		// source encoding is latin1
2168 		if (encoding == encoding_latin1)
2169 			return convert_buffer_generic(out_buffer, out_length, contents, size, latin1_decoder());
2170 
2171 		assert(false && "Invalid encoding"); // unreachable
2172 		return false;
2173 	}
2174 #else
convert_buffer_generic(char_t * & out_buffer,size_t & out_length,const void * contents,size_t size,D)2175 	template <typename D> PUGI__FN bool convert_buffer_generic(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, D)
2176 	{
2177 		const typename D::type* data = static_cast<const typename D::type*>(contents);
2178 		size_t data_length = size / sizeof(typename D::type);
2179 
2180 		// first pass: get length in utf8 units
2181 		size_t length = D::process(data, data_length, 0, utf8_counter());
2182 
2183 		// allocate buffer of suitable length
2184 		char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
2185 		if (!buffer) return false;
2186 
2187 		// second pass: convert utf16 input to utf8
2188 		uint8_t* obegin = reinterpret_cast<uint8_t*>(buffer);
2189 		uint8_t* oend = D::process(data, data_length, obegin, utf8_writer());
2190 
2191 		assert(oend == obegin + length);
2192 		*oend = 0;
2193 
2194 		out_buffer = buffer;
2195 		out_length = length + 1;
2196 
2197 		return true;
2198 	}
2199 
get_latin1_7bit_prefix_length(const uint8_t * data,size_t size)2200 	PUGI__FN size_t get_latin1_7bit_prefix_length(const uint8_t* data, size_t size)
2201 	{
2202 		for (size_t i = 0; i < size; ++i)
2203 			if (data[i] > 127)
2204 				return i;
2205 
2206 		return size;
2207 	}
2208 
convert_buffer_latin1(char_t * & out_buffer,size_t & out_length,const void * contents,size_t size,bool is_mutable)2209 	PUGI__FN bool convert_buffer_latin1(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable)
2210 	{
2211 		const uint8_t* data = static_cast<const uint8_t*>(contents);
2212 		size_t data_length = size;
2213 
2214 		// get size of prefix that does not need utf8 conversion
2215 		size_t prefix_length = get_latin1_7bit_prefix_length(data, data_length);
2216 		assert(prefix_length <= data_length);
2217 
2218 		const uint8_t* postfix = data + prefix_length;
2219 		size_t postfix_length = data_length - prefix_length;
2220 
2221 		// if no conversion is needed, just return the original buffer
2222 		if (postfix_length == 0) return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable);
2223 
2224 		// first pass: get length in utf8 units
2225 		size_t length = prefix_length + latin1_decoder::process(postfix, postfix_length, 0, utf8_counter());
2226 
2227 		// allocate buffer of suitable length
2228 		char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
2229 		if (!buffer) return false;
2230 
2231 		// second pass: convert latin1 input to utf8
2232 		memcpy(buffer, data, prefix_length);
2233 
2234 		uint8_t* obegin = reinterpret_cast<uint8_t*>(buffer);
2235 		uint8_t* oend = latin1_decoder::process(postfix, postfix_length, obegin + prefix_length, utf8_writer());
2236 
2237 		assert(oend == obegin + length);
2238 		*oend = 0;
2239 
2240 		out_buffer = buffer;
2241 		out_length = length + 1;
2242 
2243 		return true;
2244 	}
2245 
convert_buffer(char_t * & out_buffer,size_t & out_length,xml_encoding encoding,const void * contents,size_t size,bool is_mutable)2246 	PUGI__FN bool convert_buffer(char_t*& out_buffer, size_t& out_length, xml_encoding encoding, const void* contents, size_t size, bool is_mutable)
2247 	{
2248 		// fast path: no conversion required
2249 		if (encoding == encoding_utf8)
2250 			return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable);
2251 
2252 		// source encoding is utf16
2253 		if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
2254 		{
2255 			xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
2256 
2257 			return (native_encoding == encoding) ?
2258 				convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder<opt_false>()) :
2259 				convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder<opt_true>());
2260 		}
2261 
2262 		// source encoding is utf32
2263 		if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)
2264 		{
2265 			xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
2266 
2267 			return (native_encoding == encoding) ?
2268 				convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder<opt_false>()) :
2269 				convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder<opt_true>());
2270 		}
2271 
2272 		// source encoding is latin1
2273 		if (encoding == encoding_latin1)
2274 			return convert_buffer_latin1(out_buffer, out_length, contents, size, is_mutable);
2275 
2276 		assert(false && "Invalid encoding"); // unreachable
2277 		return false;
2278 	}
2279 #endif
2280 
as_utf8_begin(const wchar_t * str,size_t length)2281 	PUGI__FN size_t as_utf8_begin(const wchar_t* str, size_t length)
2282 	{
2283 		// get length in utf8 characters
2284 		return wchar_decoder::process(str, length, 0, utf8_counter());
2285 	}
2286 
as_utf8_end(char * buffer,size_t size,const wchar_t * str,size_t length)2287 	PUGI__FN void as_utf8_end(char* buffer, size_t size, const wchar_t* str, size_t length)
2288 	{
2289 		// convert to utf8
2290 		uint8_t* begin = reinterpret_cast<uint8_t*>(buffer);
2291 		uint8_t* end = wchar_decoder::process(str, length, begin, utf8_writer());
2292 
2293 		assert(begin + size == end);
2294 		(void)!end;
2295 		(void)!size;
2296 	}
2297 
2298 #ifndef PUGIXML_NO_STL
as_utf8_impl(const wchar_t * str,size_t length)2299 	PUGI__FN std::string as_utf8_impl(const wchar_t* str, size_t length)
2300 	{
2301 		// first pass: get length in utf8 characters
2302 		size_t size = as_utf8_begin(str, length);
2303 
2304 		// allocate resulting string
2305 		std::string result;
2306 		result.resize(size);
2307 
2308 		// second pass: convert to utf8
2309 		if (size > 0) as_utf8_end(&result[0], size, str, length);
2310 
2311 		return result;
2312 	}
2313 
as_wide_impl(const char * str,size_t size)2314 	PUGI__FN std::basic_string<wchar_t> as_wide_impl(const char* str, size_t size)
2315 	{
2316 		const uint8_t* data = reinterpret_cast<const uint8_t*>(str);
2317 
2318 		// first pass: get length in wchar_t units
2319 		size_t length = utf8_decoder::process(data, size, 0, wchar_counter());
2320 
2321 		// allocate resulting string
2322 		std::basic_string<wchar_t> result;
2323 		result.resize(length);
2324 
2325 		// second pass: convert to wchar_t
2326 		if (length > 0)
2327 		{
2328 			wchar_writer::value_type begin = reinterpret_cast<wchar_writer::value_type>(&result[0]);
2329 			wchar_writer::value_type end = utf8_decoder::process(data, size, begin, wchar_writer());
2330 
2331 			assert(begin + length == end);
2332 			(void)!end;
2333 		}
2334 
2335 		return result;
2336 	}
2337 #endif
2338 
2339 	template <typename Header>
strcpy_insitu_allow(size_t length,const Header & header,uintptr_t header_mask,char_t * target)2340 	inline bool strcpy_insitu_allow(size_t length, const Header& header, uintptr_t header_mask, char_t* target)
2341 	{
2342 		// never reuse shared memory
2343 		if (header & xml_memory_page_contents_shared_mask) return false;
2344 
2345 		size_t target_length = strlength(target);
2346 
2347 		// always reuse document buffer memory if possible
2348 		if ((header & header_mask) == 0) return target_length >= length;
2349 
2350 		// reuse heap memory if waste is not too great
2351 		const size_t reuse_threshold = 32;
2352 
2353 		return target_length >= length && (target_length < reuse_threshold || target_length - length < target_length / 2);
2354 	}
2355 
2356 	template <typename String, typename Header>
strcpy_insitu(String & dest,Header & header,uintptr_t header_mask,const char_t * source,size_t source_length)2357 	PUGI__FN bool strcpy_insitu(String& dest, Header& header, uintptr_t header_mask, const char_t* source, size_t source_length)
2358 	{
2359 		if (source_length == 0)
2360 		{
2361 			// empty string and null pointer are equivalent, so just deallocate old memory
2362 			xml_allocator* alloc = PUGI__GETPAGE_IMPL(header)->allocator;
2363 
2364 			if (header & header_mask) alloc->deallocate_string(dest);
2365 
2366 			// mark the string as not allocated
2367 			dest = 0;
2368 			header &= ~header_mask;
2369 
2370 			return true;
2371 		}
2372 		else if (dest && strcpy_insitu_allow(source_length, header, header_mask, dest))
2373 		{
2374 			// we can reuse old buffer, so just copy the new data (including zero terminator)
2375 			memcpy(dest, source, source_length * sizeof(char_t));
2376 			dest[source_length] = 0;
2377 
2378 			return true;
2379 		}
2380 		else
2381 		{
2382 			xml_allocator* alloc = PUGI__GETPAGE_IMPL(header)->allocator;
2383 
2384 			if (!alloc->reserve()) return false;
2385 
2386 			// allocate new buffer
2387 			char_t* buf = alloc->allocate_string(source_length + 1);
2388 			if (!buf) return false;
2389 
2390 			// copy the string (including zero terminator)
2391 			memcpy(buf, source, source_length * sizeof(char_t));
2392 			buf[source_length] = 0;
2393 
2394 			// deallocate old buffer (*after* the above to protect against overlapping memory and/or allocation failures)
2395 			if (header & header_mask) alloc->deallocate_string(dest);
2396 
2397 			// the string is now allocated, so set the flag
2398 			dest = buf;
2399 			header |= header_mask;
2400 
2401 			return true;
2402 		}
2403 	}
2404 
2405 	struct gap
2406 	{
2407 		char_t* end;
2408 		size_t size;
2409 
gapgap2410 		gap(): end(0), size(0)
2411 		{
2412 		}
2413 
2414 		// Push new gap, move s count bytes further (skipping the gap).
2415 		// Collapse previous gap.
pushgap2416 		void push(char_t*& s, size_t count)
2417 		{
2418 			if (end) // there was a gap already; collapse it
2419 			{
2420 				// Move [old_gap_end, new_gap_start) to [old_gap_start, ...)
2421 				assert(s >= end);
2422 				memmove(end - size, end, reinterpret_cast<char*>(s) - reinterpret_cast<char*>(end));
2423 			}
2424 
2425 			s += count; // end of current gap
2426 
2427 			// "merge" two gaps
2428 			end = s;
2429 			size += count;
2430 		}
2431 
2432 		// Collapse all gaps, return past-the-end pointer
flushgap2433 		char_t* flush(char_t* s)
2434 		{
2435 			if (end)
2436 			{
2437 				// Move [old_gap_end, current_pos) to [old_gap_start, ...)
2438 				assert(s >= end);
2439 				memmove(end - size, end, reinterpret_cast<char*>(s) - reinterpret_cast<char*>(end));
2440 
2441 				return s - size;
2442 			}
2443 			else return s;
2444 		}
2445 	};
2446 
strconv_escape(char_t * s,gap & g)2447 	PUGI__FN char_t* strconv_escape(char_t* s, gap& g)
2448 	{
2449 		char_t* stre = s + 1;
2450 
2451 		switch (*stre)
2452 		{
2453 			case '#':	// &#...
2454 			{
2455 				unsigned int ucsc = 0;
2456 
2457 				if (stre[1] == 'x') // &#x... (hex code)
2458 				{
2459 					stre += 2;
2460 
2461 					char_t ch = *stre;
2462 
2463 					if (ch == ';') return stre;
2464 
2465 					for (;;)
2466 					{
2467 						if (static_cast<unsigned int>(ch - '0') <= 9)
2468 							ucsc = 16 * ucsc + (ch - '0');
2469 						else if (static_cast<unsigned int>((ch | ' ') - 'a') <= 5)
2470 							ucsc = 16 * ucsc + ((ch | ' ') - 'a' + 10);
2471 						else if (ch == ';')
2472 							break;
2473 						else // cancel
2474 							return stre;
2475 
2476 						ch = *++stre;
2477 					}
2478 
2479 					++stre;
2480 				}
2481 				else	// &#... (dec code)
2482 				{
2483 					char_t ch = *++stre;
2484 
2485 					if (ch == ';') return stre;
2486 
2487 					for (;;)
2488 					{
2489 						if (static_cast<unsigned int>(ch - '0') <= 9)
2490 							ucsc = 10 * ucsc + (ch - '0');
2491 						else if (ch == ';')
2492 							break;
2493 						else // cancel
2494 							return stre;
2495 
2496 						ch = *++stre;
2497 					}
2498 
2499 					++stre;
2500 				}
2501 
2502 			#ifdef PUGIXML_WCHAR_MODE
2503 				s = reinterpret_cast<char_t*>(wchar_writer::any(reinterpret_cast<wchar_writer::value_type>(s), ucsc));
2504 			#else
2505 				s = reinterpret_cast<char_t*>(utf8_writer::any(reinterpret_cast<uint8_t*>(s), ucsc));
2506 			#endif
2507 
2508 				g.push(s, stre - s);
2509 				return stre;
2510 			}
2511 
2512 			case 'a':	// &a
2513 			{
2514 				++stre;
2515 
2516 				if (*stre == 'm') // &am
2517 				{
2518 					if (*++stre == 'p' && *++stre == ';') // &amp;
2519 					{
2520 						*s++ = '&';
2521 						++stre;
2522 
2523 						g.push(s, stre - s);
2524 						return stre;
2525 					}
2526 				}
2527 				else if (*stre == 'p') // &ap
2528 				{
2529 					if (*++stre == 'o' && *++stre == 's' && *++stre == ';') // &apos;
2530 					{
2531 						*s++ = '\'';
2532 						++stre;
2533 
2534 						g.push(s, stre - s);
2535 						return stre;
2536 					}
2537 				}
2538 				break;
2539 			}
2540 
2541 			case 'g': // &g
2542 			{
2543 				if (*++stre == 't' && *++stre == ';') // &gt;
2544 				{
2545 					*s++ = '>';
2546 					++stre;
2547 
2548 					g.push(s, stre - s);
2549 					return stre;
2550 				}
2551 				break;
2552 			}
2553 
2554 			case 'l': // &l
2555 			{
2556 				if (*++stre == 't' && *++stre == ';') // &lt;
2557 				{
2558 					*s++ = '<';
2559 					++stre;
2560 
2561 					g.push(s, stre - s);
2562 					return stre;
2563 				}
2564 				break;
2565 			}
2566 
2567 			case 'q': // &q
2568 			{
2569 				if (*++stre == 'u' && *++stre == 'o' && *++stre == 't' && *++stre == ';') // &quot;
2570 				{
2571 					*s++ = '"';
2572 					++stre;
2573 
2574 					g.push(s, stre - s);
2575 					return stre;
2576 				}
2577 				break;
2578 			}
2579 
2580 			default:
2581 				break;
2582 		}
2583 
2584 		return stre;
2585 	}
2586 
2587 	// Parser utilities
2588 	#define PUGI__ENDSWITH(c, e)        ((c) == (e) || ((c) == 0 && endch == (e)))
2589 	#define PUGI__SKIPWS()              { while (PUGI__IS_CHARTYPE(*s, ct_space)) ++s; }
2590 	#define PUGI__OPTSET(OPT)           ( optmsk & (OPT) )
2591 	#define PUGI__PUSHNODE(TYPE)        { cursor = append_new_node(cursor, *alloc, TYPE); if (!cursor) PUGI__THROW_ERROR(status_out_of_memory, s); }
2592 	#define PUGI__POPNODE()             { cursor = cursor->parent; }
2593 	#define PUGI__SCANFOR(X)            { while (*s != 0 && !(X)) ++s; }
2594 	#define PUGI__SCANWHILE(X)          { while (X) ++s; }
2595 	#define PUGI__SCANWHILE_UNROLL(X)   { for (;;) { char_t ss = s[0]; if (PUGI__UNLIKELY(!(X))) { break; } ss = s[1]; if (PUGI__UNLIKELY(!(X))) { s += 1; break; } ss = s[2]; if (PUGI__UNLIKELY(!(X))) { s += 2; break; } ss = s[3]; if (PUGI__UNLIKELY(!(X))) { s += 3; break; } s += 4; } }
2596 	#define PUGI__ENDSEG()              { ch = *s; *s = 0; ++s; }
2597 	#define PUGI__THROW_ERROR(err, m)   return error_offset = m, error_status = err, static_cast<char_t*>(0)
2598 	#define PUGI__CHECK_ERROR(err, m)   { if (*s == 0) PUGI__THROW_ERROR(err, m); }
2599 
strconv_comment(char_t * s,char_t endch)2600 	PUGI__FN char_t* strconv_comment(char_t* s, char_t endch)
2601 	{
2602 		gap g;
2603 
2604 		while (true)
2605 		{
2606 			PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_comment));
2607 
2608 			if (*s == '\r') // Either a single 0x0d or 0x0d 0x0a pair
2609 			{
2610 				*s++ = '\n'; // replace first one with 0x0a
2611 
2612 				if (*s == '\n') g.push(s, 1);
2613 			}
2614 			else if (s[0] == '-' && s[1] == '-' && PUGI__ENDSWITH(s[2], '>')) // comment ends here
2615 			{
2616 				*g.flush(s) = 0;
2617 
2618 				return s + (s[2] == '>' ? 3 : 2);
2619 			}
2620 			else if (*s == 0)
2621 			{
2622 				return 0;
2623 			}
2624 			else ++s;
2625 		}
2626 	}
2627 
strconv_cdata(char_t * s,char_t endch)2628 	PUGI__FN char_t* strconv_cdata(char_t* s, char_t endch)
2629 	{
2630 		gap g;
2631 
2632 		while (true)
2633 		{
2634 			PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_cdata));
2635 
2636 			if (*s == '\r') // Either a single 0x0d or 0x0d 0x0a pair
2637 			{
2638 				*s++ = '\n'; // replace first one with 0x0a
2639 
2640 				if (*s == '\n') g.push(s, 1);
2641 			}
2642 			else if (s[0] == ']' && s[1] == ']' && PUGI__ENDSWITH(s[2], '>')) // CDATA ends here
2643 			{
2644 				*g.flush(s) = 0;
2645 
2646 				return s + 1;
2647 			}
2648 			else if (*s == 0)
2649 			{
2650 				return 0;
2651 			}
2652 			else ++s;
2653 		}
2654 	}
2655 
2656 	typedef char_t* (*strconv_pcdata_t)(char_t*);
2657 
2658 	template <typename opt_trim, typename opt_eol, typename opt_escape> struct strconv_pcdata_impl
2659 	{
parsestrconv_pcdata_impl2660 		static char_t* parse(char_t* s)
2661 		{
2662 			gap g;
2663 
2664 			char_t* begin = s;
2665 
2666 			while (true)
2667 			{
2668 				PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_pcdata));
2669 
2670 				if (*s == '<') // PCDATA ends here
2671 				{
2672 					char_t* end = g.flush(s);
2673 
2674 					if (opt_trim::value)
2675 						while (end > begin && PUGI__IS_CHARTYPE(end[-1], ct_space))
2676 							--end;
2677 
2678 					*end = 0;
2679 
2680 					return s + 1;
2681 				}
2682 				else if (opt_eol::value && *s == '\r') // Either a single 0x0d or 0x0d 0x0a pair
2683 				{
2684 					*s++ = '\n'; // replace first one with 0x0a
2685 
2686 					if (*s == '\n') g.push(s, 1);
2687 				}
2688 				else if (opt_escape::value && *s == '&')
2689 				{
2690 					s = strconv_escape(s, g);
2691 				}
2692 				else if (*s == 0)
2693 				{
2694 					char_t* end = g.flush(s);
2695 
2696 					if (opt_trim::value)
2697 						while (end > begin && PUGI__IS_CHARTYPE(end[-1], ct_space))
2698 							--end;
2699 
2700 					*end = 0;
2701 
2702 					return s;
2703 				}
2704 				else ++s;
2705 			}
2706 		}
2707 	};
2708 
get_strconv_pcdata(unsigned int optmask)2709 	PUGI__FN strconv_pcdata_t get_strconv_pcdata(unsigned int optmask)
2710 	{
2711 		PUGI__STATIC_ASSERT(parse_escapes == 0x10 && parse_eol == 0x20 && parse_trim_pcdata == 0x0800);
2712 
2713 		switch (((optmask >> 4) & 3) | ((optmask >> 9) & 4)) // get bitmask for flags (trim eol escapes); this simultaneously checks 3 options from assertion above
2714 		{
2715 		case 0: return strconv_pcdata_impl<opt_false, opt_false, opt_false>::parse;
2716 		case 1: return strconv_pcdata_impl<opt_false, opt_false, opt_true>::parse;
2717 		case 2: return strconv_pcdata_impl<opt_false, opt_true, opt_false>::parse;
2718 		case 3: return strconv_pcdata_impl<opt_false, opt_true, opt_true>::parse;
2719 		case 4: return strconv_pcdata_impl<opt_true, opt_false, opt_false>::parse;
2720 		case 5: return strconv_pcdata_impl<opt_true, opt_false, opt_true>::parse;
2721 		case 6: return strconv_pcdata_impl<opt_true, opt_true, opt_false>::parse;
2722 		case 7: return strconv_pcdata_impl<opt_true, opt_true, opt_true>::parse;
2723 		default: assert(false); return 0; // unreachable
2724 		}
2725 	}
2726 
2727 	typedef char_t* (*strconv_attribute_t)(char_t*, char_t);
2728 
2729 	template <typename opt_escape> struct strconv_attribute_impl
2730 	{
parse_wnormstrconv_attribute_impl2731 		static char_t* parse_wnorm(char_t* s, char_t end_quote)
2732 		{
2733 			gap g;
2734 
2735 			// trim leading whitespaces
2736 			if (PUGI__IS_CHARTYPE(*s, ct_space))
2737 			{
2738 				char_t* str = s;
2739 
2740 				do ++str;
2741 				while (PUGI__IS_CHARTYPE(*str, ct_space));
2742 
2743 				g.push(s, str - s);
2744 			}
2745 
2746 			while (true)
2747 			{
2748 				PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr_ws | ct_space));
2749 
2750 				if (*s == end_quote)
2751 				{
2752 					char_t* str = g.flush(s);
2753 
2754 					do *str-- = 0;
2755 					while (PUGI__IS_CHARTYPE(*str, ct_space));
2756 
2757 					return s + 1;
2758 				}
2759 				else if (PUGI__IS_CHARTYPE(*s, ct_space))
2760 				{
2761 					*s++ = ' ';
2762 
2763 					if (PUGI__IS_CHARTYPE(*s, ct_space))
2764 					{
2765 						char_t* str = s + 1;
2766 						while (PUGI__IS_CHARTYPE(*str, ct_space)) ++str;
2767 
2768 						g.push(s, str - s);
2769 					}
2770 				}
2771 				else if (opt_escape::value && *s == '&')
2772 				{
2773 					s = strconv_escape(s, g);
2774 				}
2775 				else if (!*s)
2776 				{
2777 					return 0;
2778 				}
2779 				else ++s;
2780 			}
2781 		}
2782 
parse_wconvstrconv_attribute_impl2783 		static char_t* parse_wconv(char_t* s, char_t end_quote)
2784 		{
2785 			gap g;
2786 
2787 			while (true)
2788 			{
2789 				PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr_ws));
2790 
2791 				if (*s == end_quote)
2792 				{
2793 					*g.flush(s) = 0;
2794 
2795 					return s + 1;
2796 				}
2797 				else if (PUGI__IS_CHARTYPE(*s, ct_space))
2798 				{
2799 					if (*s == '\r')
2800 					{
2801 						*s++ = ' ';
2802 
2803 						if (*s == '\n') g.push(s, 1);
2804 					}
2805 					else *s++ = ' ';
2806 				}
2807 				else if (opt_escape::value && *s == '&')
2808 				{
2809 					s = strconv_escape(s, g);
2810 				}
2811 				else if (!*s)
2812 				{
2813 					return 0;
2814 				}
2815 				else ++s;
2816 			}
2817 		}
2818 
parse_eolstrconv_attribute_impl2819 		static char_t* parse_eol(char_t* s, char_t end_quote)
2820 		{
2821 			gap g;
2822 
2823 			while (true)
2824 			{
2825 				PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr));
2826 
2827 				if (*s == end_quote)
2828 				{
2829 					*g.flush(s) = 0;
2830 
2831 					return s + 1;
2832 				}
2833 				else if (*s == '\r')
2834 				{
2835 					*s++ = '\n';
2836 
2837 					if (*s == '\n') g.push(s, 1);
2838 				}
2839 				else if (opt_escape::value && *s == '&')
2840 				{
2841 					s = strconv_escape(s, g);
2842 				}
2843 				else if (!*s)
2844 				{
2845 					return 0;
2846 				}
2847 				else ++s;
2848 			}
2849 		}
2850 
parse_simplestrconv_attribute_impl2851 		static char_t* parse_simple(char_t* s, char_t end_quote)
2852 		{
2853 			gap g;
2854 
2855 			while (true)
2856 			{
2857 				PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr));
2858 
2859 				if (*s == end_quote)
2860 				{
2861 					*g.flush(s) = 0;
2862 
2863 					return s + 1;
2864 				}
2865 				else if (opt_escape::value && *s == '&')
2866 				{
2867 					s = strconv_escape(s, g);
2868 				}
2869 				else if (!*s)
2870 				{
2871 					return 0;
2872 				}
2873 				else ++s;
2874 			}
2875 		}
2876 	};
2877 
get_strconv_attribute(unsigned int optmask)2878 	PUGI__FN strconv_attribute_t get_strconv_attribute(unsigned int optmask)
2879 	{
2880 		PUGI__STATIC_ASSERT(parse_escapes == 0x10 && parse_eol == 0x20 && parse_wconv_attribute == 0x40 && parse_wnorm_attribute == 0x80);
2881 
2882 		switch ((optmask >> 4) & 15) // get bitmask for flags (wnorm wconv eol escapes); this simultaneously checks 4 options from assertion above
2883 		{
2884 		case 0:  return strconv_attribute_impl<opt_false>::parse_simple;
2885 		case 1:  return strconv_attribute_impl<opt_true>::parse_simple;
2886 		case 2:  return strconv_attribute_impl<opt_false>::parse_eol;
2887 		case 3:  return strconv_attribute_impl<opt_true>::parse_eol;
2888 		case 4:  return strconv_attribute_impl<opt_false>::parse_wconv;
2889 		case 5:  return strconv_attribute_impl<opt_true>::parse_wconv;
2890 		case 6:  return strconv_attribute_impl<opt_false>::parse_wconv;
2891 		case 7:  return strconv_attribute_impl<opt_true>::parse_wconv;
2892 		case 8:  return strconv_attribute_impl<opt_false>::parse_wnorm;
2893 		case 9:  return strconv_attribute_impl<opt_true>::parse_wnorm;
2894 		case 10: return strconv_attribute_impl<opt_false>::parse_wnorm;
2895 		case 11: return strconv_attribute_impl<opt_true>::parse_wnorm;
2896 		case 12: return strconv_attribute_impl<opt_false>::parse_wnorm;
2897 		case 13: return strconv_attribute_impl<opt_true>::parse_wnorm;
2898 		case 14: return strconv_attribute_impl<opt_false>::parse_wnorm;
2899 		case 15: return strconv_attribute_impl<opt_true>::parse_wnorm;
2900 		default: assert(false); return 0; // unreachable
2901 		}
2902 	}
2903 
make_parse_result(xml_parse_status status,ptrdiff_t offset=0)2904 	inline xml_parse_result make_parse_result(xml_parse_status status, ptrdiff_t offset = 0)
2905 	{
2906 		xml_parse_result result;
2907 		result.status = status;
2908 		result.offset = offset;
2909 
2910 		return result;
2911 	}
2912 
2913 	struct xml_parser
2914 	{
2915 		xml_allocator* alloc;
2916 		char_t* error_offset;
2917 		xml_parse_status error_status;
2918 
xml_parserxml_parser2919 		xml_parser(xml_allocator* alloc_): alloc(alloc_), error_offset(0), error_status(status_ok)
2920 		{
2921 		}
2922 
2923 		// DOCTYPE consists of nested sections of the following possible types:
2924 		// <!-- ... -->, <? ... ?>, "...", '...'
2925 		// <![...]]>
2926 		// <!...>
2927 		// First group can not contain nested groups
2928 		// Second group can contain nested groups of the same type
2929 		// Third group can contain all other groups
parse_doctype_primitivexml_parser2930 		char_t* parse_doctype_primitive(char_t* s)
2931 		{
2932 			if (*s == '"' || *s == '\'')
2933 			{
2934 				// quoted string
2935 				char_t ch = *s++;
2936 				PUGI__SCANFOR(*s == ch);
2937 				if (!*s) PUGI__THROW_ERROR(status_bad_doctype, s);
2938 
2939 				s++;
2940 			}
2941 			else if (s[0] == '<' && s[1] == '?')
2942 			{
2943 				// <? ... ?>
2944 				s += 2;
2945 				PUGI__SCANFOR(s[0] == '?' && s[1] == '>'); // no need for ENDSWITH because ?> can't terminate proper doctype
2946 				if (!*s) PUGI__THROW_ERROR(status_bad_doctype, s);
2947 
2948 				s += 2;
2949 			}
2950 			else if (s[0] == '<' && s[1] == '!' && s[2] == '-' && s[3] == '-')
2951 			{
2952 				s += 4;
2953 				PUGI__SCANFOR(s[0] == '-' && s[1] == '-' && s[2] == '>'); // no need for ENDSWITH because --> can't terminate proper doctype
2954 				if (!*s) PUGI__THROW_ERROR(status_bad_doctype, s);
2955 
2956 				s += 3;
2957 			}
2958 			else PUGI__THROW_ERROR(status_bad_doctype, s);
2959 
2960 			return s;
2961 		}
2962 
parse_doctype_ignorexml_parser2963 		char_t* parse_doctype_ignore(char_t* s)
2964 		{
2965 			size_t depth = 0;
2966 
2967 			assert(s[0] == '<' && s[1] == '!' && s[2] == '[');
2968 			s += 3;
2969 
2970 			while (*s)
2971 			{
2972 				if (s[0] == '<' && s[1] == '!' && s[2] == '[')
2973 				{
2974 					// nested ignore section
2975 					s += 3;
2976 					depth++;
2977 				}
2978 				else if (s[0] == ']' && s[1] == ']' && s[2] == '>')
2979 				{
2980 					// ignore section end
2981 					s += 3;
2982 
2983 					if (depth == 0)
2984 						return s;
2985 
2986 					depth--;
2987 				}
2988 				else s++;
2989 			}
2990 
2991 			PUGI__THROW_ERROR(status_bad_doctype, s);
2992 		}
2993 
parse_doctype_groupxml_parser2994 		char_t* parse_doctype_group(char_t* s, char_t endch)
2995 		{
2996 			size_t depth = 0;
2997 
2998 			assert((s[0] == '<' || s[0] == 0) && s[1] == '!');
2999 			s += 2;
3000 
3001 			while (*s)
3002 			{
3003 				if (s[0] == '<' && s[1] == '!' && s[2] != '-')
3004 				{
3005 					if (s[2] == '[')
3006 					{
3007 						// ignore
3008 						s = parse_doctype_ignore(s);
3009 						if (!s) return s;
3010 					}
3011 					else
3012 					{
3013 						// some control group
3014 						s += 2;
3015 						depth++;
3016 					}
3017 				}
3018 				else if (s[0] == '<' || s[0] == '"' || s[0] == '\'')
3019 				{
3020 					// unknown tag (forbidden), or some primitive group
3021 					s = parse_doctype_primitive(s);
3022 					if (!s) return s;
3023 				}
3024 				else if (*s == '>')
3025 				{
3026 					if (depth == 0)
3027 						return s;
3028 
3029 					depth--;
3030 					s++;
3031 				}
3032 				else s++;
3033 			}
3034 
3035 			if (depth != 0 || endch != '>') PUGI__THROW_ERROR(status_bad_doctype, s);
3036 
3037 			return s;
3038 		}
3039 
parse_exclamationxml_parser3040 		char_t* parse_exclamation(char_t* s, xml_node_struct* cursor, unsigned int optmsk, char_t endch)
3041 		{
3042 			// parse node contents, starting with exclamation mark
3043 			++s;
3044 
3045 			if (*s == '-') // '<!-...'
3046 			{
3047 				++s;
3048 
3049 				if (*s == '-') // '<!--...'
3050 				{
3051 					++s;
3052 
3053 					if (PUGI__OPTSET(parse_comments))
3054 					{
3055 						PUGI__PUSHNODE(node_comment); // Append a new node on the tree.
3056 						cursor->value = s; // Save the offset.
3057 					}
3058 
3059 					if (PUGI__OPTSET(parse_eol) && PUGI__OPTSET(parse_comments))
3060 					{
3061 						s = strconv_comment(s, endch);
3062 
3063 						if (!s) PUGI__THROW_ERROR(status_bad_comment, cursor->value);
3064 					}
3065 					else
3066 					{
3067 						// Scan for terminating '-->'.
3068 						PUGI__SCANFOR(s[0] == '-' && s[1] == '-' && PUGI__ENDSWITH(s[2], '>'));
3069 						PUGI__CHECK_ERROR(status_bad_comment, s);
3070 
3071 						if (PUGI__OPTSET(parse_comments))
3072 							*s = 0; // Zero-terminate this segment at the first terminating '-'.
3073 
3074 						s += (s[2] == '>' ? 3 : 2); // Step over the '\0->'.
3075 					}
3076 				}
3077 				else PUGI__THROW_ERROR(status_bad_comment, s);
3078 			}
3079 			else if (*s == '[')
3080 			{
3081 				// '<![CDATA[...'
3082 				if (*++s=='C' && *++s=='D' && *++s=='A' && *++s=='T' && *++s=='A' && *++s == '[')
3083 				{
3084 					++s;
3085 
3086 					if (PUGI__OPTSET(parse_cdata))
3087 					{
3088 						PUGI__PUSHNODE(node_cdata); // Append a new node on the tree.
3089 						cursor->value = s; // Save the offset.
3090 
3091 						if (PUGI__OPTSET(parse_eol))
3092 						{
3093 							s = strconv_cdata(s, endch);
3094 
3095 							if (!s) PUGI__THROW_ERROR(status_bad_cdata, cursor->value);
3096 						}
3097 						else
3098 						{
3099 							// Scan for terminating ']]>'.
3100 							PUGI__SCANFOR(s[0] == ']' && s[1] == ']' && PUGI__ENDSWITH(s[2], '>'));
3101 							PUGI__CHECK_ERROR(status_bad_cdata, s);
3102 
3103 							*s++ = 0; // Zero-terminate this segment.
3104 						}
3105 					}
3106 					else // Flagged for discard, but we still have to scan for the terminator.
3107 					{
3108 						// Scan for terminating ']]>'.
3109 						PUGI__SCANFOR(s[0] == ']' && s[1] == ']' && PUGI__ENDSWITH(s[2], '>'));
3110 						PUGI__CHECK_ERROR(status_bad_cdata, s);
3111 
3112 						++s;
3113 					}
3114 
3115 					s += (s[1] == '>' ? 2 : 1); // Step over the last ']>'.
3116 				}
3117 				else PUGI__THROW_ERROR(status_bad_cdata, s);
3118 			}
3119 			else if (s[0] == 'D' && s[1] == 'O' && s[2] == 'C' && s[3] == 'T' && s[4] == 'Y' && s[5] == 'P' && PUGI__ENDSWITH(s[6], 'E'))
3120 			{
3121 				s -= 2;
3122 
3123 				if (cursor->parent) PUGI__THROW_ERROR(status_bad_doctype, s);
3124 
3125 				char_t* mark = s + 9;
3126 
3127 				s = parse_doctype_group(s, endch);
3128 				if (!s) return s;
3129 
3130 				assert((*s == 0 && endch == '>') || *s == '>');
3131 				if (*s) *s++ = 0;
3132 
3133 				if (PUGI__OPTSET(parse_doctype))
3134 				{
3135 					while (PUGI__IS_CHARTYPE(*mark, ct_space)) ++mark;
3136 
3137 					PUGI__PUSHNODE(node_doctype);
3138 
3139 					cursor->value = mark;
3140 				}
3141 			}
3142 			else if (*s == 0 && endch == '-') PUGI__THROW_ERROR(status_bad_comment, s);
3143 			else if (*s == 0 && endch == '[') PUGI__THROW_ERROR(status_bad_cdata, s);
3144 			else PUGI__THROW_ERROR(status_unrecognized_tag, s);
3145 
3146 			return s;
3147 		}
3148 
parse_questionxml_parser3149 		char_t* parse_question(char_t* s, xml_node_struct*& ref_cursor, unsigned int optmsk, char_t endch)
3150 		{
3151 			// load into registers
3152 			xml_node_struct* cursor = ref_cursor;
3153 			char_t ch = 0;
3154 
3155 			// parse node contents, starting with question mark
3156 			++s;
3157 
3158 			// read PI target
3159 			char_t* target = s;
3160 
3161 			if (!PUGI__IS_CHARTYPE(*s, ct_start_symbol)) PUGI__THROW_ERROR(status_bad_pi, s);
3162 
3163 			PUGI__SCANWHILE(PUGI__IS_CHARTYPE(*s, ct_symbol));
3164 			PUGI__CHECK_ERROR(status_bad_pi, s);
3165 
3166 			// determine node type; stricmp / strcasecmp is not portable
3167 			bool declaration = (target[0] | ' ') == 'x' && (target[1] | ' ') == 'm' && (target[2] | ' ') == 'l' && target + 3 == s;
3168 
3169 			if (declaration ? PUGI__OPTSET(parse_declaration) : PUGI__OPTSET(parse_pi))
3170 			{
3171 				if (declaration)
3172 				{
3173 					// disallow non top-level declarations
3174 					if (cursor->parent) PUGI__THROW_ERROR(status_bad_pi, s);
3175 
3176 					PUGI__PUSHNODE(node_declaration);
3177 				}
3178 				else
3179 				{
3180 					PUGI__PUSHNODE(node_pi);
3181 				}
3182 
3183 				cursor->name = target;
3184 
3185 				PUGI__ENDSEG();
3186 
3187 				// parse value/attributes
3188 				if (ch == '?')
3189 				{
3190 					// empty node
3191 					if (!PUGI__ENDSWITH(*s, '>')) PUGI__THROW_ERROR(status_bad_pi, s);
3192 					s += (*s == '>');
3193 
3194 					PUGI__POPNODE();
3195 				}
3196 				else if (PUGI__IS_CHARTYPE(ch, ct_space))
3197 				{
3198 					PUGI__SKIPWS();
3199 
3200 					// scan for tag end
3201 					char_t* value = s;
3202 
3203 					PUGI__SCANFOR(s[0] == '?' && PUGI__ENDSWITH(s[1], '>'));
3204 					PUGI__CHECK_ERROR(status_bad_pi, s);
3205 
3206 					if (declaration)
3207 					{
3208 						// replace ending ? with / so that 'element' terminates properly
3209 						*s = '/';
3210 
3211 						// we exit from this function with cursor at node_declaration, which is a signal to parse() to go to LOC_ATTRIBUTES
3212 						s = value;
3213 					}
3214 					else
3215 					{
3216 						// store value and step over >
3217 						cursor->value = value;
3218 
3219 						PUGI__POPNODE();
3220 
3221 						PUGI__ENDSEG();
3222 
3223 						s += (*s == '>');
3224 					}
3225 				}
3226 				else PUGI__THROW_ERROR(status_bad_pi, s);
3227 			}
3228 			else
3229 			{
3230 				// scan for tag end
3231 				PUGI__SCANFOR(s[0] == '?' && PUGI__ENDSWITH(s[1], '>'));
3232 				PUGI__CHECK_ERROR(status_bad_pi, s);
3233 
3234 				s += (s[1] == '>' ? 2 : 1);
3235 			}
3236 
3237 			// store from registers
3238 			ref_cursor = cursor;
3239 
3240 			return s;
3241 		}
3242 
parse_treexml_parser3243 		char_t* parse_tree(char_t* s, xml_node_struct* root, unsigned int optmsk, char_t endch)
3244 		{
3245 			strconv_attribute_t strconv_attribute = get_strconv_attribute(optmsk);
3246 			strconv_pcdata_t strconv_pcdata = get_strconv_pcdata(optmsk);
3247 
3248 			char_t ch = 0;
3249 			xml_node_struct* cursor = root;
3250 			char_t* mark = s;
3251 
3252 			while (*s != 0)
3253 			{
3254 				if (*s == '<')
3255 				{
3256 					++s;
3257 
3258 				LOC_TAG:
3259 					if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) // '<#...'
3260 					{
3261 						PUGI__PUSHNODE(node_element); // Append a new node to the tree.
3262 
3263 						cursor->name = s;
3264 
3265 						PUGI__SCANWHILE_UNROLL(PUGI__IS_CHARTYPE(ss, ct_symbol)); // Scan for a terminator.
3266 						PUGI__ENDSEG(); // Save char in 'ch', terminate & step over.
3267 
3268 						if (ch == '>')
3269 						{
3270 							// end of tag
3271 						}
3272 						else if (PUGI__IS_CHARTYPE(ch, ct_space))
3273 						{
3274 						LOC_ATTRIBUTES:
3275 							while (true)
3276 							{
3277 								PUGI__SKIPWS(); // Eat any whitespace.
3278 
3279 								if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) // <... #...
3280 								{
3281 									xml_attribute_struct* a = append_new_attribute(cursor, *alloc); // Make space for this attribute.
3282 									if (!a) PUGI__THROW_ERROR(status_out_of_memory, s);
3283 
3284 									a->name = s; // Save the offset.
3285 
3286 									PUGI__SCANWHILE_UNROLL(PUGI__IS_CHARTYPE(ss, ct_symbol)); // Scan for a terminator.
3287 									PUGI__ENDSEG(); // Save char in 'ch', terminate & step over.
3288 
3289 									if (PUGI__IS_CHARTYPE(ch, ct_space))
3290 									{
3291 										PUGI__SKIPWS(); // Eat any whitespace.
3292 
3293 										ch = *s;
3294 										++s;
3295 									}
3296 
3297 									if (ch == '=') // '<... #=...'
3298 									{
3299 										PUGI__SKIPWS(); // Eat any whitespace.
3300 
3301 										if (*s == '"' || *s == '\'') // '<... #="...'
3302 										{
3303 											ch = *s; // Save quote char to avoid breaking on "''" -or- '""'.
3304 											++s; // Step over the quote.
3305 											a->value = s; // Save the offset.
3306 
3307 											s = strconv_attribute(s, ch);
3308 
3309 											if (!s) PUGI__THROW_ERROR(status_bad_attribute, a->value);
3310 
3311 											// After this line the loop continues from the start;
3312 											// Whitespaces, / and > are ok, symbols and EOF are wrong,
3313 											// everything else will be detected
3314 											if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) PUGI__THROW_ERROR(status_bad_attribute, s);
3315 										}
3316 										else PUGI__THROW_ERROR(status_bad_attribute, s);
3317 									}
3318 									else PUGI__THROW_ERROR(status_bad_attribute, s);
3319 								}
3320 								else if (*s == '/')
3321 								{
3322 									++s;
3323 
3324 									if (*s == '>')
3325 									{
3326 										PUGI__POPNODE();
3327 										s++;
3328 										break;
3329 									}
3330 									else if (*s == 0 && endch == '>')
3331 									{
3332 										PUGI__POPNODE();
3333 										break;
3334 									}
3335 									else PUGI__THROW_ERROR(status_bad_start_element, s);
3336 								}
3337 								else if (*s == '>')
3338 								{
3339 									++s;
3340 
3341 									break;
3342 								}
3343 								else if (*s == 0 && endch == '>')
3344 								{
3345 									break;
3346 								}
3347 								else PUGI__THROW_ERROR(status_bad_start_element, s);
3348 							}
3349 
3350 							// !!!
3351 						}
3352 						else if (ch == '/') // '<#.../'
3353 						{
3354 							if (!PUGI__ENDSWITH(*s, '>')) PUGI__THROW_ERROR(status_bad_start_element, s);
3355 
3356 							PUGI__POPNODE(); // Pop.
3357 
3358 							s += (*s == '>');
3359 						}
3360 						else if (ch == 0)
3361 						{
3362 							// we stepped over null terminator, backtrack & handle closing tag
3363 							--s;
3364 
3365 							if (endch != '>') PUGI__THROW_ERROR(status_bad_start_element, s);
3366 						}
3367 						else PUGI__THROW_ERROR(status_bad_start_element, s);
3368 					}
3369 					else if (*s == '/')
3370 					{
3371 						++s;
3372 
3373 						mark = s;
3374 
3375 						char_t* name = cursor->name;
3376 						if (!name) PUGI__THROW_ERROR(status_end_element_mismatch, mark);
3377 
3378 						while (PUGI__IS_CHARTYPE(*s, ct_symbol))
3379 						{
3380 							if (*s++ != *name++) PUGI__THROW_ERROR(status_end_element_mismatch, mark);
3381 						}
3382 
3383 						if (*name)
3384 						{
3385 							if (*s == 0 && name[0] == endch && name[1] == 0) PUGI__THROW_ERROR(status_bad_end_element, s);
3386 							else PUGI__THROW_ERROR(status_end_element_mismatch, mark);
3387 						}
3388 
3389 						PUGI__POPNODE(); // Pop.
3390 
3391 						PUGI__SKIPWS();
3392 
3393 						if (*s == 0)
3394 						{
3395 							if (endch != '>') PUGI__THROW_ERROR(status_bad_end_element, s);
3396 						}
3397 						else
3398 						{
3399 							if (*s != '>') PUGI__THROW_ERROR(status_bad_end_element, s);
3400 							++s;
3401 						}
3402 					}
3403 					else if (*s == '?') // '<?...'
3404 					{
3405 						s = parse_question(s, cursor, optmsk, endch);
3406 						if (!s) return s;
3407 
3408 						assert(cursor);
3409 						if (PUGI__NODETYPE(cursor) == node_declaration) goto LOC_ATTRIBUTES;
3410 					}
3411 					else if (*s == '!') // '<!...'
3412 					{
3413 						s = parse_exclamation(s, cursor, optmsk, endch);
3414 						if (!s) return s;
3415 					}
3416 					else if (*s == 0 && endch == '?') PUGI__THROW_ERROR(status_bad_pi, s);
3417 					else PUGI__THROW_ERROR(status_unrecognized_tag, s);
3418 				}
3419 				else
3420 				{
3421 					mark = s; // Save this offset while searching for a terminator.
3422 
3423 					PUGI__SKIPWS(); // Eat whitespace if no genuine PCDATA here.
3424 
3425 					if (*s == '<' || !*s)
3426 					{
3427 						// We skipped some whitespace characters because otherwise we would take the tag branch instead of PCDATA one
3428 						assert(mark != s);
3429 
3430 						if (!PUGI__OPTSET(parse_ws_pcdata | parse_ws_pcdata_single) || PUGI__OPTSET(parse_trim_pcdata))
3431 						{
3432 							continue;
3433 						}
3434 						else if (PUGI__OPTSET(parse_ws_pcdata_single))
3435 						{
3436 							if (s[0] != '<' || s[1] != '/' || cursor->first_child) continue;
3437 						}
3438 					}
3439 
3440 					if (!PUGI__OPTSET(parse_trim_pcdata))
3441 						s = mark;
3442 
3443 					if (cursor->parent || PUGI__OPTSET(parse_fragment))
3444 					{
3445 						if (PUGI__OPTSET(parse_embed_pcdata) && cursor->parent && !cursor->first_child && !cursor->value)
3446 						{
3447 							cursor->value = s; // Save the offset.
3448 						}
3449 						else
3450 						{
3451 							PUGI__PUSHNODE(node_pcdata); // Append a new node on the tree.
3452 
3453 							cursor->value = s; // Save the offset.
3454 
3455 							PUGI__POPNODE(); // Pop since this is a standalone.
3456 						}
3457 
3458 						s = strconv_pcdata(s);
3459 
3460 						if (!*s) break;
3461 					}
3462 					else
3463 					{
3464 						PUGI__SCANFOR(*s == '<'); // '...<'
3465 						if (!*s) break;
3466 
3467 						++s;
3468 					}
3469 
3470 					// We're after '<'
3471 					goto LOC_TAG;
3472 				}
3473 			}
3474 
3475 			// check that last tag is closed
3476 			if (cursor != root) PUGI__THROW_ERROR(status_end_element_mismatch, s);
3477 
3478 			return s;
3479 		}
3480 
3481 	#ifdef PUGIXML_WCHAR_MODE
parse_skip_bomxml_parser3482 		static char_t* parse_skip_bom(char_t* s)
3483 		{
3484 			unsigned int bom = 0xfeff;
3485 			return (s[0] == static_cast<wchar_t>(bom)) ? s + 1 : s;
3486 		}
3487 	#else
parse_skip_bomxml_parser3488 		static char_t* parse_skip_bom(char_t* s)
3489 		{
3490 			return (s[0] == '\xef' && s[1] == '\xbb' && s[2] == '\xbf') ? s + 3 : s;
3491 		}
3492 	#endif
3493 
has_element_node_siblingsxml_parser3494 		static bool has_element_node_siblings(xml_node_struct* node)
3495 		{
3496 			while (node)
3497 			{
3498 				if (PUGI__NODETYPE(node) == node_element) return true;
3499 
3500 				node = node->next_sibling;
3501 			}
3502 
3503 			return false;
3504 		}
3505 
parsexml_parser3506 		static xml_parse_result parse(char_t* buffer, size_t length, xml_document_struct* xmldoc, xml_node_struct* root, unsigned int optmsk)
3507 		{
3508 			// early-out for empty documents
3509 			if (length == 0)
3510 				return make_parse_result(PUGI__OPTSET(parse_fragment) ? status_ok : status_no_document_element);
3511 
3512 			// get last child of the root before parsing
3513 			xml_node_struct* last_root_child = root->first_child ? root->first_child->prev_sibling_c + 0 : 0;
3514 
3515 			// create parser on stack
3516 			xml_parser parser(static_cast<xml_allocator*>(xmldoc));
3517 
3518 			// save last character and make buffer zero-terminated (speeds up parsing)
3519 			char_t endch = buffer[length - 1];
3520 			buffer[length - 1] = 0;
3521 
3522 			// skip BOM to make sure it does not end up as part of parse output
3523 			char_t* buffer_data = parse_skip_bom(buffer);
3524 
3525 			// perform actual parsing
3526 			parser.parse_tree(buffer_data, root, optmsk, endch);
3527 
3528 			xml_parse_result result = make_parse_result(parser.error_status, parser.error_offset ? parser.error_offset - buffer : 0);
3529 			assert(result.offset >= 0 && static_cast<size_t>(result.offset) <= length);
3530 
3531 			if (result)
3532 			{
3533 				// since we removed last character, we have to handle the only possible false positive (stray <)
3534 				if (endch == '<')
3535 					return make_parse_result(status_unrecognized_tag, length - 1);
3536 
3537 				// check if there are any element nodes parsed
3538 				xml_node_struct* first_root_child_parsed = last_root_child ? last_root_child->next_sibling + 0 : root->first_child+ 0;
3539 
3540 				if (!PUGI__OPTSET(parse_fragment) && !has_element_node_siblings(first_root_child_parsed))
3541 					return make_parse_result(status_no_document_element, length - 1);
3542 			}
3543 			else
3544 			{
3545 				// roll back offset if it occurs on a null terminator in the source buffer
3546 				if (result.offset > 0 && static_cast<size_t>(result.offset) == length - 1 && endch == 0)
3547 					result.offset--;
3548 			}
3549 
3550 			return result;
3551 		}
3552 	};
3553 
3554 	// Output facilities
get_write_native_encoding()3555 	PUGI__FN xml_encoding get_write_native_encoding()
3556 	{
3557 	#ifdef PUGIXML_WCHAR_MODE
3558 		return get_wchar_encoding();
3559 	#else
3560 		return encoding_utf8;
3561 	#endif
3562 	}
3563 
get_write_encoding(xml_encoding encoding)3564 	PUGI__FN xml_encoding get_write_encoding(xml_encoding encoding)
3565 	{
3566 		// replace wchar encoding with utf implementation
3567 		if (encoding == encoding_wchar) return get_wchar_encoding();
3568 
3569 		// replace utf16 encoding with utf16 with specific endianness
3570 		if (encoding == encoding_utf16) return is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
3571 
3572 		// replace utf32 encoding with utf32 with specific endianness
3573 		if (encoding == encoding_utf32) return is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
3574 
3575 		// only do autodetection if no explicit encoding is requested
3576 		if (encoding != encoding_auto) return encoding;
3577 
3578 		// assume utf8 encoding
3579 		return encoding_utf8;
3580 	}
3581 
convert_buffer_output_generic(typename T::value_type dest,const char_t * data,size_t length,D,T)3582 	template <typename D, typename T> PUGI__FN size_t convert_buffer_output_generic(typename T::value_type dest, const char_t* data, size_t length, D, T)
3583 	{
3584 		PUGI__STATIC_ASSERT(sizeof(char_t) == sizeof(typename D::type));
3585 
3586 		typename T::value_type end = D::process(reinterpret_cast<const typename D::type*>(data), length, dest, T());
3587 
3588 		return static_cast<size_t>(end - dest) * sizeof(*dest);
3589 	}
3590 
convert_buffer_output_generic(typename T::value_type dest,const char_t * data,size_t length,D,T,bool opt_swap)3591 	template <typename D, typename T> PUGI__FN size_t convert_buffer_output_generic(typename T::value_type dest, const char_t* data, size_t length, D, T, bool opt_swap)
3592 	{
3593 		PUGI__STATIC_ASSERT(sizeof(char_t) == sizeof(typename D::type));
3594 
3595 		typename T::value_type end = D::process(reinterpret_cast<const typename D::type*>(data), length, dest, T());
3596 
3597 		if (opt_swap)
3598 		{
3599 			for (typename T::value_type i = dest; i != end; ++i)
3600 				*i = endian_swap(*i);
3601 		}
3602 
3603 		return static_cast<size_t>(end - dest) * sizeof(*dest);
3604 	}
3605 
3606 #ifdef PUGIXML_WCHAR_MODE
get_valid_length(const char_t * data,size_t length)3607 	PUGI__FN size_t get_valid_length(const char_t* data, size_t length)
3608 	{
3609 		if (length < 1) return 0;
3610 
3611 		// discard last character if it's the lead of a surrogate pair
3612 		return (sizeof(wchar_t) == 2 && static_cast<unsigned int>(static_cast<uint16_t>(data[length - 1]) - 0xD800) < 0x400) ? length - 1 : length;
3613 	}
3614 
convert_buffer_output(char_t * r_char,uint8_t * r_u8,uint16_t * r_u16,uint32_t * r_u32,const char_t * data,size_t length,xml_encoding encoding)3615 	PUGI__FN size_t convert_buffer_output(char_t* r_char, uint8_t* r_u8, uint16_t* r_u16, uint32_t* r_u32, const char_t* data, size_t length, xml_encoding encoding)
3616 	{
3617 		// only endian-swapping is required
3618 		if (need_endian_swap_utf(encoding, get_wchar_encoding()))
3619 		{
3620 			convert_wchar_endian_swap(r_char, data, length);
3621 
3622 			return length * sizeof(char_t);
3623 		}
3624 
3625 		// convert to utf8
3626 		if (encoding == encoding_utf8)
3627 			return convert_buffer_output_generic(r_u8, data, length, wchar_decoder(), utf8_writer());
3628 
3629 		// convert to utf16
3630 		if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
3631 		{
3632 			xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
3633 
3634 			return convert_buffer_output_generic(r_u16, data, length, wchar_decoder(), utf16_writer(), native_encoding != encoding);
3635 		}
3636 
3637 		// convert to utf32
3638 		if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)
3639 		{
3640 			xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
3641 
3642 			return convert_buffer_output_generic(r_u32, data, length, wchar_decoder(), utf32_writer(), native_encoding != encoding);
3643 		}
3644 
3645 		// convert to latin1
3646 		if (encoding == encoding_latin1)
3647 			return convert_buffer_output_generic(r_u8, data, length, wchar_decoder(), latin1_writer());
3648 
3649 		assert(false && "Invalid encoding"); // unreachable
3650 		return 0;
3651 	}
3652 #else
get_valid_length(const char_t * data,size_t length)3653 	PUGI__FN size_t get_valid_length(const char_t* data, size_t length)
3654 	{
3655 		if (length < 5) return 0;
3656 
3657 		for (size_t i = 1; i <= 4; ++i)
3658 		{
3659 			uint8_t ch = static_cast<uint8_t>(data[length - i]);
3660 
3661 			// either a standalone character or a leading one
3662 			if ((ch & 0xc0) != 0x80) return length - i;
3663 		}
3664 
3665 		// there are four non-leading characters at the end, sequence tail is broken so might as well process the whole chunk
3666 		return length;
3667 	}
3668 
convert_buffer_output(char_t *,uint8_t * r_u8,uint16_t * r_u16,uint32_t * r_u32,const char_t * data,size_t length,xml_encoding encoding)3669 	PUGI__FN size_t convert_buffer_output(char_t* /* r_char */, uint8_t* r_u8, uint16_t* r_u16, uint32_t* r_u32, const char_t* data, size_t length, xml_encoding encoding)
3670 	{
3671 		if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
3672 		{
3673 			xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
3674 
3675 			return convert_buffer_output_generic(r_u16, data, length, utf8_decoder(), utf16_writer(), native_encoding != encoding);
3676 		}
3677 
3678 		if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)
3679 		{
3680 			xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
3681 
3682 			return convert_buffer_output_generic(r_u32, data, length, utf8_decoder(), utf32_writer(), native_encoding != encoding);
3683 		}
3684 
3685 		if (encoding == encoding_latin1)
3686 			return convert_buffer_output_generic(r_u8, data, length, utf8_decoder(), latin1_writer());
3687 
3688 		assert(false && "Invalid encoding"); // unreachable
3689 		return 0;
3690 	}
3691 #endif
3692 
3693 	class xml_buffered_writer
3694 	{
3695 		xml_buffered_writer(const xml_buffered_writer&);
3696 		xml_buffered_writer& operator=(const xml_buffered_writer&);
3697 
3698 	public:
xml_buffered_writer(xml_writer & writer_,xml_encoding user_encoding)3699 		xml_buffered_writer(xml_writer& writer_, xml_encoding user_encoding): writer(writer_), bufsize(0), encoding(get_write_encoding(user_encoding))
3700 		{
3701 			PUGI__STATIC_ASSERT(bufcapacity >= 8);
3702 		}
3703 
flush()3704 		size_t flush()
3705 		{
3706 			flush(buffer, bufsize);
3707 			bufsize = 0;
3708 			return 0;
3709 		}
3710 
flush(const char_t * data,size_t size)3711 		void flush(const char_t* data, size_t size)
3712 		{
3713 			if (size == 0) return;
3714 
3715 			// fast path, just write data
3716 			if (encoding == get_write_native_encoding())
3717 				writer.write(data, size * sizeof(char_t));
3718 			else
3719 			{
3720 				// convert chunk
3721 				size_t result = convert_buffer_output(scratch.data_char, scratch.data_u8, scratch.data_u16, scratch.data_u32, data, size, encoding);
3722 				assert(result <= sizeof(scratch));
3723 
3724 				// write data
3725 				writer.write(scratch.data_u8, result);
3726 			}
3727 		}
3728 
write_direct(const char_t * data,size_t length)3729 		void write_direct(const char_t* data, size_t length)
3730 		{
3731 			// flush the remaining buffer contents
3732 			flush();
3733 
3734 			// handle large chunks
3735 			if (length > bufcapacity)
3736 			{
3737 				if (encoding == get_write_native_encoding())
3738 				{
3739 					// fast path, can just write data chunk
3740 					writer.write(data, length * sizeof(char_t));
3741 					return;
3742 				}
3743 
3744 				// need to convert in suitable chunks
3745 				while (length > bufcapacity)
3746 				{
3747 					// get chunk size by selecting such number of characters that are guaranteed to fit into scratch buffer
3748 					// and form a complete codepoint sequence (i.e. discard start of last codepoint if necessary)
3749 					size_t chunk_size = get_valid_length(data, bufcapacity);
3750 					assert(chunk_size);
3751 
3752 					// convert chunk and write
3753 					flush(data, chunk_size);
3754 
3755 					// iterate
3756 					data += chunk_size;
3757 					length -= chunk_size;
3758 				}
3759 
3760 				// small tail is copied below
3761 				bufsize = 0;
3762 			}
3763 
3764 			memcpy(buffer + bufsize, data, length * sizeof(char_t));
3765 			bufsize += length;
3766 		}
3767 
write_buffer(const char_t * data,size_t length)3768 		void write_buffer(const char_t* data, size_t length)
3769 		{
3770 			size_t offset = bufsize;
3771 
3772 			if (offset + length <= bufcapacity)
3773 			{
3774 				memcpy(buffer + offset, data, length * sizeof(char_t));
3775 				bufsize = offset + length;
3776 			}
3777 			else
3778 			{
3779 				write_direct(data, length);
3780 			}
3781 		}
3782 
write_string(const char_t * data)3783 		void write_string(const char_t* data)
3784 		{
3785 			// write the part of the string that fits in the buffer
3786 			size_t offset = bufsize;
3787 
3788 			while (*data && offset < bufcapacity)
3789 				buffer[offset++] = *data++;
3790 
3791 			// write the rest
3792 			if (offset < bufcapacity)
3793 			{
3794 				bufsize = offset;
3795 			}
3796 			else
3797 			{
3798 				// backtrack a bit if we have split the codepoint
3799 				size_t length = offset - bufsize;
3800 				size_t extra = length - get_valid_length(data - length, length);
3801 
3802 				bufsize = offset - extra;
3803 
3804 				write_direct(data - extra, strlength(data) + extra);
3805 			}
3806 		}
3807 
write(char_t d0)3808 		void write(char_t d0)
3809 		{
3810 			size_t offset = bufsize;
3811 			if (offset > bufcapacity - 1) offset = flush();
3812 
3813 			buffer[offset + 0] = d0;
3814 			bufsize = offset + 1;
3815 		}
3816 
write(char_t d0,char_t d1)3817 		void write(char_t d0, char_t d1)
3818 		{
3819 			size_t offset = bufsize;
3820 			if (offset > bufcapacity - 2) offset = flush();
3821 
3822 			buffer[offset + 0] = d0;
3823 			buffer[offset + 1] = d1;
3824 			bufsize = offset + 2;
3825 		}
3826 
write(char_t d0,char_t d1,char_t d2)3827 		void write(char_t d0, char_t d1, char_t d2)
3828 		{
3829 			size_t offset = bufsize;
3830 			if (offset > bufcapacity - 3) offset = flush();
3831 
3832 			buffer[offset + 0] = d0;
3833 			buffer[offset + 1] = d1;
3834 			buffer[offset + 2] = d2;
3835 			bufsize = offset + 3;
3836 		}
3837 
write(char_t d0,char_t d1,char_t d2,char_t d3)3838 		void write(char_t d0, char_t d1, char_t d2, char_t d3)
3839 		{
3840 			size_t offset = bufsize;
3841 			if (offset > bufcapacity - 4) offset = flush();
3842 
3843 			buffer[offset + 0] = d0;
3844 			buffer[offset + 1] = d1;
3845 			buffer[offset + 2] = d2;
3846 			buffer[offset + 3] = d3;
3847 			bufsize = offset + 4;
3848 		}
3849 
write(char_t d0,char_t d1,char_t d2,char_t d3,char_t d4)3850 		void write(char_t d0, char_t d1, char_t d2, char_t d3, char_t d4)
3851 		{
3852 			size_t offset = bufsize;
3853 			if (offset > bufcapacity - 5) offset = flush();
3854 
3855 			buffer[offset + 0] = d0;
3856 			buffer[offset + 1] = d1;
3857 			buffer[offset + 2] = d2;
3858 			buffer[offset + 3] = d3;
3859 			buffer[offset + 4] = d4;
3860 			bufsize = offset + 5;
3861 		}
3862 
write(char_t d0,char_t d1,char_t d2,char_t d3,char_t d4,char_t d5)3863 		void write(char_t d0, char_t d1, char_t d2, char_t d3, char_t d4, char_t d5)
3864 		{
3865 			size_t offset = bufsize;
3866 			if (offset > bufcapacity - 6) offset = flush();
3867 
3868 			buffer[offset + 0] = d0;
3869 			buffer[offset + 1] = d1;
3870 			buffer[offset + 2] = d2;
3871 			buffer[offset + 3] = d3;
3872 			buffer[offset + 4] = d4;
3873 			buffer[offset + 5] = d5;
3874 			bufsize = offset + 6;
3875 		}
3876 
3877 		// utf8 maximum expansion: x4 (-> utf32)
3878 		// utf16 maximum expansion: x2 (-> utf32)
3879 		// utf32 maximum expansion: x1
3880 		enum
3881 		{
3882 			bufcapacitybytes =
3883 			#ifdef PUGIXML_MEMORY_OUTPUT_STACK
3884 				PUGIXML_MEMORY_OUTPUT_STACK
3885 			#else
3886 				10240
3887 			#endif
3888 			,
3889 			bufcapacity = bufcapacitybytes / (sizeof(char_t) + 4)
3890 		};
3891 
3892 		char_t buffer[bufcapacity];
3893 
3894 		union
3895 		{
3896 			uint8_t data_u8[4 * bufcapacity];
3897 			uint16_t data_u16[2 * bufcapacity];
3898 			uint32_t data_u32[bufcapacity];
3899 			char_t data_char[bufcapacity];
3900 		} scratch;
3901 
3902 		xml_writer& writer;
3903 		size_t bufsize;
3904 		xml_encoding encoding;
3905 	};
3906 
text_output_escaped(xml_buffered_writer & writer,const char_t * s,chartypex_t type,unsigned int flags)3907 	PUGI__FN void text_output_escaped(xml_buffered_writer& writer, const char_t* s, chartypex_t type, unsigned int flags)
3908 	{
3909 		while (*s)
3910 		{
3911 			const char_t* prev = s;
3912 
3913 			// While *s is a usual symbol
3914 			PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPEX(ss, type));
3915 
3916 			writer.write_buffer(prev, static_cast<size_t>(s - prev));
3917 
3918 			switch (*s)
3919 			{
3920 				case 0: break;
3921 				case '&':
3922 					writer.write('&', 'a', 'm', 'p', ';');
3923 					++s;
3924 					break;
3925 				case '<':
3926 					writer.write('&', 'l', 't', ';');
3927 					++s;
3928 					break;
3929 				case '>':
3930 					writer.write('&', 'g', 't', ';');
3931 					++s;
3932 					break;
3933 				case '"':
3934 					if (flags & format_attribute_single_quote)
3935 						writer.write('"');
3936 					else
3937 						writer.write('&', 'q', 'u', 'o', 't', ';');
3938 					++s;
3939 					break;
3940 				case '\'':
3941 					if (flags & format_attribute_single_quote)
3942 						writer.write('&', 'a', 'p', 'o', 's', ';');
3943 					else
3944 						writer.write('\'');
3945 					++s;
3946 					break;
3947 				default: // s is not a usual symbol
3948 				{
3949 					unsigned int ch = static_cast<unsigned int>(*s++);
3950 					assert(ch < 32);
3951 
3952 					if (!(flags & format_skip_control_chars))
3953 						writer.write('&', '#', static_cast<char_t>((ch / 10) + '0'), static_cast<char_t>((ch % 10) + '0'), ';');
3954 				}
3955 			}
3956 		}
3957 	}
3958 
text_output(xml_buffered_writer & writer,const char_t * s,chartypex_t type,unsigned int flags)3959 	PUGI__FN void text_output(xml_buffered_writer& writer, const char_t* s, chartypex_t type, unsigned int flags)
3960 	{
3961 		if (flags & format_no_escapes)
3962 			writer.write_string(s);
3963 		else
3964 			text_output_escaped(writer, s, type, flags);
3965 	}
3966 
text_output_cdata(xml_buffered_writer & writer,const char_t * s)3967 	PUGI__FN void text_output_cdata(xml_buffered_writer& writer, const char_t* s)
3968 	{
3969 		do
3970 		{
3971 			writer.write('<', '!', '[', 'C', 'D');
3972 			writer.write('A', 'T', 'A', '[');
3973 
3974 			const char_t* prev = s;
3975 
3976 			// look for ]]> sequence - we can't output it as is since it terminates CDATA
3977 			while (*s && !(s[0] == ']' && s[1] == ']' && s[2] == '>')) ++s;
3978 
3979 			// skip ]] if we stopped at ]]>, > will go to the next CDATA section
3980 			if (*s) s += 2;
3981 
3982 			writer.write_buffer(prev, static_cast<size_t>(s - prev));
3983 
3984 			writer.write(']', ']', '>');
3985 		}
3986 		while (*s);
3987 	}
3988 
text_output_indent(xml_buffered_writer & writer,const char_t * indent,size_t indent_length,unsigned int depth)3989 	PUGI__FN void text_output_indent(xml_buffered_writer& writer, const char_t* indent, size_t indent_length, unsigned int depth)
3990 	{
3991 		switch (indent_length)
3992 		{
3993 		case 1:
3994 		{
3995 			for (unsigned int i = 0; i < depth; ++i)
3996 				writer.write(indent[0]);
3997 			break;
3998 		}
3999 
4000 		case 2:
4001 		{
4002 			for (unsigned int i = 0; i < depth; ++i)
4003 				writer.write(indent[0], indent[1]);
4004 			break;
4005 		}
4006 
4007 		case 3:
4008 		{
4009 			for (unsigned int i = 0; i < depth; ++i)
4010 				writer.write(indent[0], indent[1], indent[2]);
4011 			break;
4012 		}
4013 
4014 		case 4:
4015 		{
4016 			for (unsigned int i = 0; i < depth; ++i)
4017 				writer.write(indent[0], indent[1], indent[2], indent[3]);
4018 			break;
4019 		}
4020 
4021 		default:
4022 		{
4023 			for (unsigned int i = 0; i < depth; ++i)
4024 				writer.write_buffer(indent, indent_length);
4025 		}
4026 		}
4027 	}
4028 
node_output_comment(xml_buffered_writer & writer,const char_t * s)4029 	PUGI__FN void node_output_comment(xml_buffered_writer& writer, const char_t* s)
4030 	{
4031 		writer.write('<', '!', '-', '-');
4032 
4033 		while (*s)
4034 		{
4035 			const char_t* prev = s;
4036 
4037 			// look for -\0 or -- sequence - we can't output it since -- is illegal in comment body
4038 			while (*s && !(s[0] == '-' && (s[1] == '-' || s[1] == 0))) ++s;
4039 
4040 			writer.write_buffer(prev, static_cast<size_t>(s - prev));
4041 
4042 			if (*s)
4043 			{
4044 				assert(*s == '-');
4045 
4046 				writer.write('-', ' ');
4047 				++s;
4048 			}
4049 		}
4050 
4051 		writer.write('-', '-', '>');
4052 	}
4053 
node_output_pi_value(xml_buffered_writer & writer,const char_t * s)4054 	PUGI__FN void node_output_pi_value(xml_buffered_writer& writer, const char_t* s)
4055 	{
4056 		while (*s)
4057 		{
4058 			const char_t* prev = s;
4059 
4060 			// look for ?> sequence - we can't output it since ?> terminates PI
4061 			while (*s && !(s[0] == '?' && s[1] == '>')) ++s;
4062 
4063 			writer.write_buffer(prev, static_cast<size_t>(s - prev));
4064 
4065 			if (*s)
4066 			{
4067 				assert(s[0] == '?' && s[1] == '>');
4068 
4069 				writer.write('?', ' ', '>');
4070 				s += 2;
4071 			}
4072 		}
4073 	}
4074 
node_output_attributes(xml_buffered_writer & writer,xml_node_struct * node,const char_t * indent,size_t indent_length,unsigned int flags,unsigned int depth)4075 	PUGI__FN void node_output_attributes(xml_buffered_writer& writer, xml_node_struct* node, const char_t* indent, size_t indent_length, unsigned int flags, unsigned int depth)
4076 	{
4077 		const char_t* default_name = PUGIXML_TEXT(":anonymous");
4078 		const char_t enquotation_char = (flags & format_attribute_single_quote) ? '\'' : '"';
4079 
4080 		for (xml_attribute_struct* a = node->first_attribute; a; a = a->next_attribute)
4081 		{
4082 			if ((flags & (format_indent_attributes | format_raw)) == format_indent_attributes)
4083 			{
4084 				writer.write('\n');
4085 
4086 				text_output_indent(writer, indent, indent_length, depth + 1);
4087 			}
4088 			else
4089 			{
4090 				writer.write(' ');
4091 			}
4092 
4093 			writer.write_string(a->name ? a->name + 0 : default_name);
4094 			writer.write('=', enquotation_char);
4095 
4096 			if (a->value)
4097 				text_output(writer, a->value, ctx_special_attr, flags);
4098 
4099 			writer.write(enquotation_char);
4100 		}
4101 	}
4102 
node_output_start(xml_buffered_writer & writer,xml_node_struct * node,const char_t * indent,size_t indent_length,unsigned int flags,unsigned int depth)4103 	PUGI__FN bool node_output_start(xml_buffered_writer& writer, xml_node_struct* node, const char_t* indent, size_t indent_length, unsigned int flags, unsigned int depth)
4104 	{
4105 		const char_t* default_name = PUGIXML_TEXT(":anonymous");
4106 		const char_t* name = node->name ? node->name + 0 : default_name;
4107 
4108 		writer.write('<');
4109 		writer.write_string(name);
4110 
4111 		if (node->first_attribute)
4112 			node_output_attributes(writer, node, indent, indent_length, flags, depth);
4113 
4114 		// element nodes can have value if parse_embed_pcdata was used
4115 		if (!node->value)
4116 		{
4117 			if (!node->first_child)
4118 			{
4119 				if (flags & format_no_empty_element_tags)
4120 				{
4121 					writer.write('>', '<', '/');
4122 					writer.write_string(name);
4123 					writer.write('>');
4124 
4125 					return false;
4126 				}
4127 				else
4128 				{
4129 					if ((flags & format_raw) == 0)
4130 						writer.write(' ');
4131 
4132 					writer.write('/', '>');
4133 
4134 					return false;
4135 				}
4136 			}
4137 			else
4138 			{
4139 				writer.write('>');
4140 
4141 				return true;
4142 			}
4143 		}
4144 		else
4145 		{
4146 			writer.write('>');
4147 
4148 			text_output(writer, node->value, ctx_special_pcdata, flags);
4149 
4150 			if (!node->first_child)
4151 			{
4152 				writer.write('<', '/');
4153 				writer.write_string(name);
4154 				writer.write('>');
4155 
4156 				return false;
4157 			}
4158 			else
4159 			{
4160 				return true;
4161 			}
4162 		}
4163 	}
4164 
node_output_end(xml_buffered_writer & writer,xml_node_struct * node)4165 	PUGI__FN void node_output_end(xml_buffered_writer& writer, xml_node_struct* node)
4166 	{
4167 		const char_t* default_name = PUGIXML_TEXT(":anonymous");
4168 		const char_t* name = node->name ? node->name + 0 : default_name;
4169 
4170 		writer.write('<', '/');
4171 		writer.write_string(name);
4172 		writer.write('>');
4173 	}
4174 
node_output_simple(xml_buffered_writer & writer,xml_node_struct * node,unsigned int flags)4175 	PUGI__FN void node_output_simple(xml_buffered_writer& writer, xml_node_struct* node, unsigned int flags)
4176 	{
4177 		const char_t* default_name = PUGIXML_TEXT(":anonymous");
4178 
4179 		switch (PUGI__NODETYPE(node))
4180 		{
4181 			case node_pcdata:
4182 				text_output(writer, node->value ? node->value + 0 : PUGIXML_TEXT(""), ctx_special_pcdata, flags);
4183 				break;
4184 
4185 			case node_cdata:
4186 				text_output_cdata(writer, node->value ? node->value + 0 : PUGIXML_TEXT(""));
4187 				break;
4188 
4189 			case node_comment:
4190 				node_output_comment(writer, node->value ? node->value + 0 : PUGIXML_TEXT(""));
4191 				break;
4192 
4193 			case node_pi:
4194 				writer.write('<', '?');
4195 				writer.write_string(node->name ? node->name + 0 : default_name);
4196 
4197 				if (node->value)
4198 				{
4199 					writer.write(' ');
4200 					node_output_pi_value(writer, node->value);
4201 				}
4202 
4203 				writer.write('?', '>');
4204 				break;
4205 
4206 			case node_declaration:
4207 				writer.write('<', '?');
4208 				writer.write_string(node->name ? node->name + 0 : default_name);
4209 				node_output_attributes(writer, node, PUGIXML_TEXT(""), 0, flags | format_raw, 0);
4210 				writer.write('?', '>');
4211 				break;
4212 
4213 			case node_doctype:
4214 				writer.write('<', '!', 'D', 'O', 'C');
4215 				writer.write('T', 'Y', 'P', 'E');
4216 
4217 				if (node->value)
4218 				{
4219 					writer.write(' ');
4220 					writer.write_string(node->value);
4221 				}
4222 
4223 				writer.write('>');
4224 				break;
4225 
4226 			default:
4227 				assert(false && "Invalid node type"); // unreachable
4228 		}
4229 	}
4230 
4231 	enum indent_flags_t
4232 	{
4233 		indent_newline = 1,
4234 		indent_indent = 2
4235 	};
4236 
node_output(xml_buffered_writer & writer,xml_node_struct * root,const char_t * indent,unsigned int flags,unsigned int depth)4237 	PUGI__FN void node_output(xml_buffered_writer& writer, xml_node_struct* root, const char_t* indent, unsigned int flags, unsigned int depth)
4238 	{
4239 		size_t indent_length = ((flags & (format_indent | format_indent_attributes)) && (flags & format_raw) == 0) ? strlength(indent) : 0;
4240 		unsigned int indent_flags = indent_indent;
4241 
4242 		xml_node_struct* node = root;
4243 
4244 		do
4245 		{
4246 			assert(node);
4247 
4248 			// begin writing current node
4249 			if (PUGI__NODETYPE(node) == node_pcdata || PUGI__NODETYPE(node) == node_cdata)
4250 			{
4251 				node_output_simple(writer, node, flags);
4252 
4253 				indent_flags = 0;
4254 			}
4255 			else
4256 			{
4257 				if ((indent_flags & indent_newline) && (flags & format_raw) == 0)
4258 					writer.write('\n');
4259 
4260 				if ((indent_flags & indent_indent) && indent_length)
4261 					text_output_indent(writer, indent, indent_length, depth);
4262 
4263 				if (PUGI__NODETYPE(node) == node_element)
4264 				{
4265 					indent_flags = indent_newline | indent_indent;
4266 
4267 					if (node_output_start(writer, node, indent, indent_length, flags, depth))
4268 					{
4269 						// element nodes can have value if parse_embed_pcdata was used
4270 						if (node->value)
4271 							indent_flags = 0;
4272 
4273 						node = node->first_child;
4274 						depth++;
4275 						continue;
4276 					}
4277 				}
4278 				else if (PUGI__NODETYPE(node) == node_document)
4279 				{
4280 					indent_flags = indent_indent;
4281 
4282 					if (node->first_child)
4283 					{
4284 						node = node->first_child;
4285 						continue;
4286 					}
4287 				}
4288 				else
4289 				{
4290 					node_output_simple(writer, node, flags);
4291 
4292 					indent_flags = indent_newline | indent_indent;
4293 				}
4294 			}
4295 
4296 			// continue to the next node
4297 			while (node != root)
4298 			{
4299 				if (node->next_sibling)
4300 				{
4301 					node = node->next_sibling;
4302 					break;
4303 				}
4304 
4305 				node = node->parent;
4306 
4307 				// write closing node
4308 				if (PUGI__NODETYPE(node) == node_element)
4309 				{
4310 					depth--;
4311 
4312 					if ((indent_flags & indent_newline) && (flags & format_raw) == 0)
4313 						writer.write('\n');
4314 
4315 					if ((indent_flags & indent_indent) && indent_length)
4316 						text_output_indent(writer, indent, indent_length, depth);
4317 
4318 					node_output_end(writer, node);
4319 
4320 					indent_flags = indent_newline | indent_indent;
4321 				}
4322 			}
4323 		}
4324 		while (node != root);
4325 
4326 		if ((indent_flags & indent_newline) && (flags & format_raw) == 0)
4327 			writer.write('\n');
4328 	}
4329 
has_declaration(xml_node_struct * node)4330 	PUGI__FN bool has_declaration(xml_node_struct* node)
4331 	{
4332 		for (xml_node_struct* child = node->first_child; child; child = child->next_sibling)
4333 		{
4334 			xml_node_type type = PUGI__NODETYPE(child);
4335 
4336 			if (type == node_declaration) return true;
4337 			if (type == node_element) return false;
4338 		}
4339 
4340 		return false;
4341 	}
4342 
is_attribute_of(xml_attribute_struct * attr,xml_node_struct * node)4343 	PUGI__FN bool is_attribute_of(xml_attribute_struct* attr, xml_node_struct* node)
4344 	{
4345 		for (xml_attribute_struct* a = node->first_attribute; a; a = a->next_attribute)
4346 			if (a == attr)
4347 				return true;
4348 
4349 		return false;
4350 	}
4351 
allow_insert_attribute(xml_node_type parent)4352 	PUGI__FN bool allow_insert_attribute(xml_node_type parent)
4353 	{
4354 		return parent == node_element || parent == node_declaration;
4355 	}
4356 
allow_insert_child(xml_node_type parent,xml_node_type child)4357 	PUGI__FN bool allow_insert_child(xml_node_type parent, xml_node_type child)
4358 	{
4359 		if (parent != node_document && parent != node_element) return false;
4360 		if (child == node_document || child == node_null) return false;
4361 		if (parent != node_document && (child == node_declaration || child == node_doctype)) return false;
4362 
4363 		return true;
4364 	}
4365 
allow_move(xml_node parent,xml_node child)4366 	PUGI__FN bool allow_move(xml_node parent, xml_node child)
4367 	{
4368 		// check that child can be a child of parent
4369 		if (!allow_insert_child(parent.type(), child.type()))
4370 			return false;
4371 
4372 		// check that node is not moved between documents
4373 		if (parent.root() != child.root())
4374 			return false;
4375 
4376 		// check that new parent is not in the child subtree
4377 		xml_node cur = parent;
4378 
4379 		while (cur)
4380 		{
4381 			if (cur == child)
4382 				return false;
4383 
4384 			cur = cur.parent();
4385 		}
4386 
4387 		return true;
4388 	}
4389 
4390 	template <typename String, typename Header>
node_copy_string(String & dest,Header & header,uintptr_t header_mask,char_t * source,Header & source_header,xml_allocator * alloc)4391 	PUGI__FN void node_copy_string(String& dest, Header& header, uintptr_t header_mask, char_t* source, Header& source_header, xml_allocator* alloc)
4392 	{
4393 		assert(!dest && (header & header_mask) == 0);
4394 
4395 		if (source)
4396 		{
4397 			if (alloc && (source_header & header_mask) == 0)
4398 			{
4399 				dest = source;
4400 
4401 				// since strcpy_insitu can reuse document buffer memory we need to mark both source and dest as shared
4402 				header |= xml_memory_page_contents_shared_mask;
4403 				source_header |= xml_memory_page_contents_shared_mask;
4404 			}
4405 			else
4406 				strcpy_insitu(dest, header, header_mask, source, strlength(source));
4407 		}
4408 	}
4409 
node_copy_contents(xml_node_struct * dn,xml_node_struct * sn,xml_allocator * shared_alloc)4410 	PUGI__FN void node_copy_contents(xml_node_struct* dn, xml_node_struct* sn, xml_allocator* shared_alloc)
4411 	{
4412 		node_copy_string(dn->name, dn->header, xml_memory_page_name_allocated_mask, sn->name, sn->header, shared_alloc);
4413 		node_copy_string(dn->value, dn->header, xml_memory_page_value_allocated_mask, sn->value, sn->header, shared_alloc);
4414 
4415 		for (xml_attribute_struct* sa = sn->first_attribute; sa; sa = sa->next_attribute)
4416 		{
4417 			xml_attribute_struct* da = append_new_attribute(dn, get_allocator(dn));
4418 
4419 			if (da)
4420 			{
4421 				node_copy_string(da->name, da->header, xml_memory_page_name_allocated_mask, sa->name, sa->header, shared_alloc);
4422 				node_copy_string(da->value, da->header, xml_memory_page_value_allocated_mask, sa->value, sa->header, shared_alloc);
4423 			}
4424 		}
4425 	}
4426 
node_copy_tree(xml_node_struct * dn,xml_node_struct * sn)4427 	PUGI__FN void node_copy_tree(xml_node_struct* dn, xml_node_struct* sn)
4428 	{
4429 		xml_allocator& alloc = get_allocator(dn);
4430 		xml_allocator* shared_alloc = (&alloc == &get_allocator(sn)) ? &alloc : 0;
4431 
4432 		node_copy_contents(dn, sn, shared_alloc);
4433 
4434 		xml_node_struct* dit = dn;
4435 		xml_node_struct* sit = sn->first_child;
4436 
4437 		while (sit && sit != sn)
4438 		{
4439 			// loop invariant: dit is inside the subtree rooted at dn
4440 			assert(dit);
4441 
4442 			// when a tree is copied into one of the descendants, we need to skip that subtree to avoid an infinite loop
4443 			if (sit != dn)
4444 			{
4445 				xml_node_struct* copy = append_new_node(dit, alloc, PUGI__NODETYPE(sit));
4446 
4447 				if (copy)
4448 				{
4449 					node_copy_contents(copy, sit, shared_alloc);
4450 
4451 					if (sit->first_child)
4452 					{
4453 						dit = copy;
4454 						sit = sit->first_child;
4455 						continue;
4456 					}
4457 				}
4458 			}
4459 
4460 			// continue to the next node
4461 			do
4462 			{
4463 				if (sit->next_sibling)
4464 				{
4465 					sit = sit->next_sibling;
4466 					break;
4467 				}
4468 
4469 				sit = sit->parent;
4470 				dit = dit->parent;
4471 
4472 				// loop invariant: dit is inside the subtree rooted at dn while sit is inside sn
4473 				assert(sit == sn || dit);
4474 			}
4475 			while (sit != sn);
4476 		}
4477 
4478 		assert(!sit || dit == dn->parent);
4479 	}
4480 
node_copy_attribute(xml_attribute_struct * da,xml_attribute_struct * sa)4481 	PUGI__FN void node_copy_attribute(xml_attribute_struct* da, xml_attribute_struct* sa)
4482 	{
4483 		xml_allocator& alloc = get_allocator(da);
4484 		xml_allocator* shared_alloc = (&alloc == &get_allocator(sa)) ? &alloc : 0;
4485 
4486 		node_copy_string(da->name, da->header, xml_memory_page_name_allocated_mask, sa->name, sa->header, shared_alloc);
4487 		node_copy_string(da->value, da->header, xml_memory_page_value_allocated_mask, sa->value, sa->header, shared_alloc);
4488 	}
4489 
is_text_node(xml_node_struct * node)4490 	inline bool is_text_node(xml_node_struct* node)
4491 	{
4492 		xml_node_type type = PUGI__NODETYPE(node);
4493 
4494 		return type == node_pcdata || type == node_cdata;
4495 	}
4496 
4497 	// get value with conversion functions
string_to_integer(const char_t * value,U minv,U maxv)4498 	template <typename U> PUGI__FN PUGI__UNSIGNED_OVERFLOW U string_to_integer(const char_t* value, U minv, U maxv)
4499 	{
4500 		U result = 0;
4501 		const char_t* s = value;
4502 
4503 		while (PUGI__IS_CHARTYPE(*s, ct_space))
4504 			s++;
4505 
4506 		bool negative = (*s == '-');
4507 
4508 		s += (*s == '+' || *s == '-');
4509 
4510 		bool overflow = false;
4511 
4512 		if (s[0] == '0' && (s[1] | ' ') == 'x')
4513 		{
4514 			s += 2;
4515 
4516 			// since overflow detection relies on length of the sequence skip leading zeros
4517 			while (*s == '0')
4518 				s++;
4519 
4520 			const char_t* start = s;
4521 
4522 			for (;;)
4523 			{
4524 				if (static_cast<unsigned>(*s - '0') < 10)
4525 					result = result * 16 + (*s - '0');
4526 				else if (static_cast<unsigned>((*s | ' ') - 'a') < 6)
4527 					result = result * 16 + ((*s | ' ') - 'a' + 10);
4528 				else
4529 					break;
4530 
4531 				s++;
4532 			}
4533 
4534 			size_t digits = static_cast<size_t>(s - start);
4535 
4536 			overflow = digits > sizeof(U) * 2;
4537 		}
4538 		else
4539 		{
4540 			// since overflow detection relies on length of the sequence skip leading zeros
4541 			while (*s == '0')
4542 				s++;
4543 
4544 			const char_t* start = s;
4545 
4546 			for (;;)
4547 			{
4548 				if (static_cast<unsigned>(*s - '0') < 10)
4549 					result = result * 10 + (*s - '0');
4550 				else
4551 					break;
4552 
4553 				s++;
4554 			}
4555 
4556 			size_t digits = static_cast<size_t>(s - start);
4557 
4558 			PUGI__STATIC_ASSERT(sizeof(U) == 8 || sizeof(U) == 4 || sizeof(U) == 2);
4559 
4560 			const size_t max_digits10 = sizeof(U) == 8 ? 20 : sizeof(U) == 4 ? 10 : 5;
4561 			const char_t max_lead = sizeof(U) == 8 ? '1' : sizeof(U) == 4 ? '4' : '6';
4562 			const size_t high_bit = sizeof(U) * 8 - 1;
4563 
4564 			overflow = digits >= max_digits10 && !(digits == max_digits10 && (*start < max_lead || (*start == max_lead && result >> high_bit)));
4565 		}
4566 
4567 		if (negative)
4568 		{
4569 			// Workaround for crayc++ CC-3059: Expected no overflow in routine.
4570 		#ifdef _CRAYC
4571 			return (overflow || result > ~minv + 1) ? minv : ~result + 1;
4572 		#else
4573 			return (overflow || result > 0 - minv) ? minv : 0 - result;
4574 		#endif
4575 		}
4576 		else
4577 			return (overflow || result > maxv) ? maxv : result;
4578 	}
4579 
get_value_int(const char_t * value)4580 	PUGI__FN int get_value_int(const char_t* value)
4581 	{
4582 		return string_to_integer<unsigned int>(value, static_cast<unsigned int>(INT_MIN), INT_MAX);
4583 	}
4584 
get_value_uint(const char_t * value)4585 	PUGI__FN unsigned int get_value_uint(const char_t* value)
4586 	{
4587 		return string_to_integer<unsigned int>(value, 0, UINT_MAX);
4588 	}
4589 
get_value_double(const char_t * value)4590 	PUGI__FN double get_value_double(const char_t* value)
4591 	{
4592 	#ifdef PUGIXML_WCHAR_MODE
4593 		return wcstod(value, 0);
4594 	#else
4595 		return strtod(value, 0);
4596 	#endif
4597 	}
4598 
get_value_float(const char_t * value)4599 	PUGI__FN float get_value_float(const char_t* value)
4600 	{
4601 	#ifdef PUGIXML_WCHAR_MODE
4602 		return static_cast<float>(wcstod(value, 0));
4603 	#else
4604 		return static_cast<float>(strtod(value, 0));
4605 	#endif
4606 	}
4607 
get_value_bool(const char_t * value)4608 	PUGI__FN bool get_value_bool(const char_t* value)
4609 	{
4610 		// only look at first char
4611 		char_t first = *value;
4612 
4613 		// 1*, t* (true), T* (True), y* (yes), Y* (YES)
4614 		return (first == '1' || first == 't' || first == 'T' || first == 'y' || first == 'Y');
4615 	}
4616 
4617 #ifdef PUGIXML_HAS_LONG_LONG
get_value_llong(const char_t * value)4618 	PUGI__FN long long get_value_llong(const char_t* value)
4619 	{
4620 		return string_to_integer<unsigned long long>(value, static_cast<unsigned long long>(LLONG_MIN), LLONG_MAX);
4621 	}
4622 
get_value_ullong(const char_t * value)4623 	PUGI__FN unsigned long long get_value_ullong(const char_t* value)
4624 	{
4625 		return string_to_integer<unsigned long long>(value, 0, ULLONG_MAX);
4626 	}
4627 #endif
4628 
integer_to_string(char_t * begin,char_t * end,U value,bool negative)4629 	template <typename U> PUGI__FN PUGI__UNSIGNED_OVERFLOW char_t* integer_to_string(char_t* begin, char_t* end, U value, bool negative)
4630 	{
4631 		char_t* result = end - 1;
4632 		U rest = negative ? 0 - value : value;
4633 
4634 		do
4635 		{
4636 			*result-- = static_cast<char_t>('0' + (rest % 10));
4637 			rest /= 10;
4638 		}
4639 		while (rest);
4640 
4641 		assert(result >= begin);
4642 		(void)begin;
4643 
4644 		*result = '-';
4645 
4646 		return result + !negative;
4647 	}
4648 
4649 	// set value with conversion functions
4650 	template <typename String, typename Header>
set_value_ascii(String & dest,Header & header,uintptr_t header_mask,char * buf)4651 	PUGI__FN bool set_value_ascii(String& dest, Header& header, uintptr_t header_mask, char* buf)
4652 	{
4653 	#ifdef PUGIXML_WCHAR_MODE
4654 		char_t wbuf[128];
4655 		assert(strlen(buf) < sizeof(wbuf) / sizeof(wbuf[0]));
4656 
4657 		size_t offset = 0;
4658 		for (; buf[offset]; ++offset) wbuf[offset] = buf[offset];
4659 
4660 		return strcpy_insitu(dest, header, header_mask, wbuf, offset);
4661 	#else
4662 		return strcpy_insitu(dest, header, header_mask, buf, strlen(buf));
4663 	#endif
4664 	}
4665 
4666 	template <typename U, typename String, typename Header>
set_value_integer(String & dest,Header & header,uintptr_t header_mask,U value,bool negative)4667 	PUGI__FN bool set_value_integer(String& dest, Header& header, uintptr_t header_mask, U value, bool negative)
4668 	{
4669 		char_t buf[64];
4670 		char_t* end = buf + sizeof(buf) / sizeof(buf[0]);
4671 		char_t* begin = integer_to_string(buf, end, value, negative);
4672 
4673 		return strcpy_insitu(dest, header, header_mask, begin, end - begin);
4674 	}
4675 
4676 	template <typename String, typename Header>
set_value_convert(String & dest,Header & header,uintptr_t header_mask,float value,int precision)4677 	PUGI__FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, float value, int precision)
4678 	{
4679 		char buf[128];
4680 		PUGI__SNPRINTF(buf, "%.*g", precision, double(value));
4681 
4682 		return set_value_ascii(dest, header, header_mask, buf);
4683 	}
4684 
4685 	template <typename String, typename Header>
set_value_convert(String & dest,Header & header,uintptr_t header_mask,double value,int precision)4686 	PUGI__FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, double value, int precision)
4687 	{
4688 		char buf[128];
4689 		PUGI__SNPRINTF(buf, "%.*g", precision, value);
4690 
4691 		return set_value_ascii(dest, header, header_mask, buf);
4692 	}
4693 
4694 	template <typename String, typename Header>
set_value_bool(String & dest,Header & header,uintptr_t header_mask,bool value)4695 	PUGI__FN bool set_value_bool(String& dest, Header& header, uintptr_t header_mask, bool value)
4696 	{
4697 		return strcpy_insitu(dest, header, header_mask, value ? PUGIXML_TEXT("true") : PUGIXML_TEXT("false"), value ? 4 : 5);
4698 	}
4699 
load_buffer_impl(xml_document_struct * doc,xml_node_struct * root,void * contents,size_t size,unsigned int options,xml_encoding encoding,bool is_mutable,bool own,char_t ** out_buffer)4700 	PUGI__FN xml_parse_result load_buffer_impl(xml_document_struct* doc, xml_node_struct* root, void* contents, size_t size, unsigned int options, xml_encoding encoding, bool is_mutable, bool own, char_t** out_buffer)
4701 	{
4702 		// check input buffer
4703 		if (!contents && size) return make_parse_result(status_io_error);
4704 
4705 		// get actual encoding
4706 		xml_encoding buffer_encoding = impl::get_buffer_encoding(encoding, contents, size);
4707 
4708 		// get private buffer
4709 		char_t* buffer = 0;
4710 		size_t length = 0;
4711 
4712 		// coverity[var_deref_model]
4713 		if (!impl::convert_buffer(buffer, length, buffer_encoding, contents, size, is_mutable)) return impl::make_parse_result(status_out_of_memory);
4714 
4715 		// delete original buffer if we performed a conversion
4716 		if (own && buffer != contents && contents) impl::xml_memory::deallocate(contents);
4717 
4718 		// grab onto buffer if it's our buffer, user is responsible for deallocating contents himself
4719 		if (own || buffer != contents) *out_buffer = buffer;
4720 
4721 		// store buffer for offset_debug
4722 		doc->buffer = buffer;
4723 
4724 		// parse
4725 		xml_parse_result res = impl::xml_parser::parse(buffer, length, doc, root, options);
4726 
4727 		// remember encoding
4728 		res.encoding = buffer_encoding;
4729 
4730 		return res;
4731 	}
4732 
4733 	// we need to get length of entire file to load it in memory; the only (relatively) sane way to do it is via seek/tell trick
get_file_size(FILE * file,size_t & out_result)4734 	PUGI__FN xml_parse_status get_file_size(FILE* file, size_t& out_result)
4735 	{
4736 	#if defined(PUGI__MSVC_CRT_VERSION) && PUGI__MSVC_CRT_VERSION >= 1400 && !defined(_WIN32_WCE)
4737 		// there are 64-bit versions of fseek/ftell, let's use them
4738 		typedef __int64 length_type;
4739 
4740 		_fseeki64(file, 0, SEEK_END);
4741 		length_type length = _ftelli64(file);
4742 		_fseeki64(file, 0, SEEK_SET);
4743 	#elif defined(__MINGW32__) && !defined(__NO_MINGW_LFS) && (!defined(__STRICT_ANSI__) || defined(__MINGW64_VERSION_MAJOR))
4744 		// there are 64-bit versions of fseek/ftell, let's use them
4745 		typedef off64_t length_type;
4746 
4747 		fseeko64(file, 0, SEEK_END);
4748 		length_type length = ftello64(file);
4749 		fseeko64(file, 0, SEEK_SET);
4750 	#else
4751 		// if this is a 32-bit OS, long is enough; if this is a unix system, long is 64-bit, which is enough; otherwise we can't do anything anyway.
4752 		typedef long length_type;
4753 
4754 		fseek(file, 0, SEEK_END);
4755 		length_type length = ftell(file);
4756 		fseek(file, 0, SEEK_SET);
4757 	#endif
4758 
4759 		// check for I/O errors
4760 		if (length < 0) return status_io_error;
4761 
4762 		// check for overflow
4763 		size_t result = static_cast<size_t>(length);
4764 
4765 		if (static_cast<length_type>(result) != length) return status_out_of_memory;
4766 
4767 		// finalize
4768 		out_result = result;
4769 
4770 		return status_ok;
4771 	}
4772 
4773 	// This function assumes that buffer has extra sizeof(char_t) writable bytes after size
zero_terminate_buffer(void * buffer,size_t size,xml_encoding encoding)4774 	PUGI__FN size_t zero_terminate_buffer(void* buffer, size_t size, xml_encoding encoding)
4775 	{
4776 		// We only need to zero-terminate if encoding conversion does not do it for us
4777 	#ifdef PUGIXML_WCHAR_MODE
4778 		xml_encoding wchar_encoding = get_wchar_encoding();
4779 
4780 		if (encoding == wchar_encoding || need_endian_swap_utf(encoding, wchar_encoding))
4781 		{
4782 			size_t length = size / sizeof(char_t);
4783 
4784 			static_cast<char_t*>(buffer)[length] = 0;
4785 			return (length + 1) * sizeof(char_t);
4786 		}
4787 	#else
4788 		if (encoding == encoding_utf8)
4789 		{
4790 			static_cast<char*>(buffer)[size] = 0;
4791 			return size + 1;
4792 		}
4793 	#endif
4794 
4795 		return size;
4796 	}
4797 
load_file_impl(xml_document_struct * doc,FILE * file,unsigned int options,xml_encoding encoding,char_t ** out_buffer)4798 	PUGI__FN xml_parse_result load_file_impl(xml_document_struct* doc, FILE* file, unsigned int options, xml_encoding encoding, char_t** out_buffer)
4799 	{
4800 		if (!file) return make_parse_result(status_file_not_found);
4801 
4802 		// get file size (can result in I/O errors)
4803 		size_t size = 0;
4804 		xml_parse_status size_status = get_file_size(file, size);
4805 		if (size_status != status_ok) return make_parse_result(size_status);
4806 
4807 		size_t max_suffix_size = sizeof(char_t);
4808 
4809 		// allocate buffer for the whole file
4810 		char* contents = static_cast<char*>(xml_memory::allocate(size + max_suffix_size));
4811 		if (!contents) return make_parse_result(status_out_of_memory);
4812 
4813 		// read file in memory
4814 		size_t read_size = fread(contents, 1, size, file);
4815 
4816 		if (read_size != size)
4817 		{
4818 			xml_memory::deallocate(contents);
4819 			return make_parse_result(status_io_error);
4820 		}
4821 
4822 		xml_encoding real_encoding = get_buffer_encoding(encoding, contents, size);
4823 
4824 		return load_buffer_impl(doc, doc, contents, zero_terminate_buffer(contents, size, real_encoding), options, real_encoding, true, true, out_buffer);
4825 	}
4826 
close_file(FILE * file)4827 	PUGI__FN void close_file(FILE* file)
4828 	{
4829 		fclose(file);
4830 	}
4831 
4832 #ifndef PUGIXML_NO_STL
4833 	template <typename T> struct xml_stream_chunk
4834 	{
createxml_stream_chunk4835 		static xml_stream_chunk* create()
4836 		{
4837 			void* memory = xml_memory::allocate(sizeof(xml_stream_chunk));
4838 			if (!memory) return 0;
4839 
4840 			return new (memory) xml_stream_chunk();
4841 		}
4842 
destroyxml_stream_chunk4843 		static void destroy(xml_stream_chunk* chunk)
4844 		{
4845 			// free chunk chain
4846 			while (chunk)
4847 			{
4848 				xml_stream_chunk* next_ = chunk->next;
4849 
4850 				xml_memory::deallocate(chunk);
4851 
4852 				chunk = next_;
4853 			}
4854 		}
4855 
xml_stream_chunkxml_stream_chunk4856 		xml_stream_chunk(): next(0), size(0)
4857 		{
4858 		}
4859 
4860 		xml_stream_chunk* next;
4861 		size_t size;
4862 
4863 		T data[xml_memory_page_size / sizeof(T)];
4864 	};
4865 
load_stream_data_noseek(std::basic_istream<T> & stream,void ** out_buffer,size_t * out_size)4866 	template <typename T> PUGI__FN xml_parse_status load_stream_data_noseek(std::basic_istream<T>& stream, void** out_buffer, size_t* out_size)
4867 	{
4868 		auto_deleter<xml_stream_chunk<T> > chunks(0, xml_stream_chunk<T>::destroy);
4869 
4870 		// read file to a chunk list
4871 		size_t total = 0;
4872 		xml_stream_chunk<T>* last = 0;
4873 
4874 		while (!stream.eof())
4875 		{
4876 			// allocate new chunk
4877 			xml_stream_chunk<T>* chunk = xml_stream_chunk<T>::create();
4878 			if (!chunk) return status_out_of_memory;
4879 
4880 			// append chunk to list
4881 			if (last) last = last->next = chunk;
4882 			else chunks.data = last = chunk;
4883 
4884 			// read data to chunk
4885 			stream.read(chunk->data, static_cast<std::streamsize>(sizeof(chunk->data) / sizeof(T)));
4886 			chunk->size = static_cast<size_t>(stream.gcount()) * sizeof(T);
4887 
4888 			// read may set failbit | eofbit in case gcount() is less than read length, so check for other I/O errors
4889 			if (stream.bad() || (!stream.eof() && stream.fail())) return status_io_error;
4890 
4891 			// guard against huge files (chunk size is small enough to make this overflow check work)
4892 			if (total + chunk->size < total) return status_out_of_memory;
4893 			total += chunk->size;
4894 		}
4895 
4896 		size_t max_suffix_size = sizeof(char_t);
4897 
4898 		// copy chunk list to a contiguous buffer
4899 		char* buffer = static_cast<char*>(xml_memory::allocate(total + max_suffix_size));
4900 		if (!buffer) return status_out_of_memory;
4901 
4902 		char* write = buffer;
4903 
4904 		for (xml_stream_chunk<T>* chunk = chunks.data; chunk; chunk = chunk->next)
4905 		{
4906 			assert(write + chunk->size <= buffer + total);
4907 			memcpy(write, chunk->data, chunk->size);
4908 			write += chunk->size;
4909 		}
4910 
4911 		assert(write == buffer + total);
4912 
4913 		// return buffer
4914 		*out_buffer = buffer;
4915 		*out_size = total;
4916 
4917 		return status_ok;
4918 	}
4919 
load_stream_data_seek(std::basic_istream<T> & stream,void ** out_buffer,size_t * out_size)4920 	template <typename T> PUGI__FN xml_parse_status load_stream_data_seek(std::basic_istream<T>& stream, void** out_buffer, size_t* out_size)
4921 	{
4922 		// get length of remaining data in stream
4923 		typename std::basic_istream<T>::pos_type pos = stream.tellg();
4924 		stream.seekg(0, std::ios::end);
4925 		std::streamoff length = stream.tellg() - pos;
4926 		stream.seekg(pos);
4927 
4928 		if (stream.fail() || pos < 0) return status_io_error;
4929 
4930 		// guard against huge files
4931 		size_t read_length = static_cast<size_t>(length);
4932 
4933 		if (static_cast<std::streamsize>(read_length) != length || length < 0) return status_out_of_memory;
4934 
4935 		size_t max_suffix_size = sizeof(char_t);
4936 
4937 		// read stream data into memory (guard against stream exceptions with buffer holder)
4938 		auto_deleter<void> buffer(xml_memory::allocate(read_length * sizeof(T) + max_suffix_size), xml_memory::deallocate);
4939 		if (!buffer.data) return status_out_of_memory;
4940 
4941 		stream.read(static_cast<T*>(buffer.data), static_cast<std::streamsize>(read_length));
4942 
4943 		// read may set failbit | eofbit in case gcount() is less than read_length (i.e. line ending conversion), so check for other I/O errors
4944 		if (stream.bad() || (!stream.eof() && stream.fail())) return status_io_error;
4945 
4946 		// return buffer
4947 		size_t actual_length = static_cast<size_t>(stream.gcount());
4948 		assert(actual_length <= read_length);
4949 
4950 		*out_buffer = buffer.release();
4951 		*out_size = actual_length * sizeof(T);
4952 
4953 		return status_ok;
4954 	}
4955 
load_stream_impl(xml_document_struct * doc,std::basic_istream<T> & stream,unsigned int options,xml_encoding encoding,char_t ** out_buffer)4956 	template <typename T> PUGI__FN xml_parse_result load_stream_impl(xml_document_struct* doc, std::basic_istream<T>& stream, unsigned int options, xml_encoding encoding, char_t** out_buffer)
4957 	{
4958 		void* buffer = 0;
4959 		size_t size = 0;
4960 		xml_parse_status status = status_ok;
4961 
4962 		// if stream has an error bit set, bail out (otherwise tellg() can fail and we'll clear error bits)
4963 		if (stream.fail()) return make_parse_result(status_io_error);
4964 
4965 		// load stream to memory (using seek-based implementation if possible, since it's faster and takes less memory)
4966 		if (stream.tellg() < 0)
4967 		{
4968 			stream.clear(); // clear error flags that could be set by a failing tellg
4969 			status = load_stream_data_noseek(stream, &buffer, &size);
4970 		}
4971 		else
4972 			status = load_stream_data_seek(stream, &buffer, &size);
4973 
4974 		if (status != status_ok) return make_parse_result(status);
4975 
4976 		xml_encoding real_encoding = get_buffer_encoding(encoding, buffer, size);
4977 
4978 		return load_buffer_impl(doc, doc, buffer, zero_terminate_buffer(buffer, size, real_encoding), options, real_encoding, true, true, out_buffer);
4979 	}
4980 #endif
4981 
4982 #if defined(PUGI__MSVC_CRT_VERSION) || defined(__BORLANDC__) || (defined(__MINGW32__) && (!defined(__STRICT_ANSI__) || defined(__MINGW64_VERSION_MAJOR)))
open_file_wide(const wchar_t * path,const wchar_t * mode)4983 	PUGI__FN FILE* open_file_wide(const wchar_t* path, const wchar_t* mode)
4984 	{
4985 #if defined(PUGI__MSVC_CRT_VERSION) && PUGI__MSVC_CRT_VERSION >= 1400
4986 		FILE* file = 0;
4987 		return _wfopen_s(&file, path, mode) == 0 ? file : 0;
4988 #else
4989 		return _wfopen(path, mode);
4990 #endif
4991 	}
4992 #else
convert_path_heap(const wchar_t * str)4993 	PUGI__FN char* convert_path_heap(const wchar_t* str)
4994 	{
4995 		assert(str);
4996 
4997 		// first pass: get length in utf8 characters
4998 		size_t length = strlength_wide(str);
4999 		size_t size = as_utf8_begin(str, length);
5000 
5001 		// allocate resulting string
5002 		char* result = static_cast<char*>(xml_memory::allocate(size + 1));
5003 		if (!result) return 0;
5004 
5005 		// second pass: convert to utf8
5006 		as_utf8_end(result, size, str, length);
5007 
5008 		// zero-terminate
5009 		result[size] = 0;
5010 
5011 		return result;
5012 	}
5013 
open_file_wide(const wchar_t * path,const wchar_t * mode)5014 	PUGI__FN FILE* open_file_wide(const wchar_t* path, const wchar_t* mode)
5015 	{
5016 		// there is no standard function to open wide paths, so our best bet is to try utf8 path
5017 		char* path_utf8 = convert_path_heap(path);
5018 		if (!path_utf8) return 0;
5019 
5020 		// convert mode to ASCII (we mirror _wfopen interface)
5021 		char mode_ascii[4] = {0};
5022 		for (size_t i = 0; mode[i]; ++i) mode_ascii[i] = static_cast<char>(mode[i]);
5023 
5024 		// try to open the utf8 path
5025 		FILE* result = fopen(path_utf8, mode_ascii);
5026 
5027 		// free dummy buffer
5028 		xml_memory::deallocate(path_utf8);
5029 
5030 		return result;
5031 	}
5032 #endif
5033 
open_file(const char * path,const char * mode)5034 	PUGI__FN FILE* open_file(const char* path, const char* mode)
5035 	{
5036 #if defined(PUGI__MSVC_CRT_VERSION) && PUGI__MSVC_CRT_VERSION >= 1400
5037 		FILE* file = 0;
5038 		return fopen_s(&file, path, mode) == 0 ? file : 0;
5039 #else
5040 		return fopen(path, mode);
5041 #endif
5042 	}
5043 
save_file_impl(const xml_document & doc,FILE * file,const char_t * indent,unsigned int flags,xml_encoding encoding)5044 	PUGI__FN bool save_file_impl(const xml_document& doc, FILE* file, const char_t* indent, unsigned int flags, xml_encoding encoding)
5045 	{
5046 		if (!file) return false;
5047 
5048 		xml_writer_file writer(file);
5049 		doc.save(writer, indent, flags, encoding);
5050 
5051 		return ferror(file) == 0;
5052 	}
5053 
5054 	struct name_null_sentry
5055 	{
5056 		xml_node_struct* node;
5057 		char_t* name;
5058 
name_null_sentryname_null_sentry5059 		name_null_sentry(xml_node_struct* node_): node(node_), name(node_->name)
5060 		{
5061 			node->name = 0;
5062 		}
5063 
~name_null_sentryname_null_sentry5064 		~name_null_sentry()
5065 		{
5066 			node->name = name;
5067 		}
5068 	};
5069 PUGI__NS_END
5070 
5071 namespace pugi
5072 {
xml_writer_file(void * file_)5073 	PUGI__FN xml_writer_file::xml_writer_file(void* file_): file(file_)
5074 	{
5075 	}
5076 
write(const void * data,size_t size)5077 	PUGI__FN void xml_writer_file::write(const void* data, size_t size)
5078 	{
5079 		size_t result = fwrite(data, 1, size, static_cast<FILE*>(file));
5080 		(void)!result; // unfortunately we can't do proper error handling here
5081 	}
5082 
5083 #ifndef PUGIXML_NO_STL
xml_writer_stream(std::basic_ostream<char,std::char_traits<char>> & stream)5084 	PUGI__FN xml_writer_stream::xml_writer_stream(std::basic_ostream<char, std::char_traits<char> >& stream): narrow_stream(&stream), wide_stream(0)
5085 	{
5086 	}
5087 
xml_writer_stream(std::basic_ostream<wchar_t,std::char_traits<wchar_t>> & stream)5088 	PUGI__FN xml_writer_stream::xml_writer_stream(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream): narrow_stream(0), wide_stream(&stream)
5089 	{
5090 	}
5091 
write(const void * data,size_t size)5092 	PUGI__FN void xml_writer_stream::write(const void* data, size_t size)
5093 	{
5094 		if (narrow_stream)
5095 		{
5096 			assert(!wide_stream);
5097 			narrow_stream->write(reinterpret_cast<const char*>(data), static_cast<std::streamsize>(size));
5098 		}
5099 		else
5100 		{
5101 			assert(wide_stream);
5102 			assert(size % sizeof(wchar_t) == 0);
5103 
5104 			wide_stream->write(reinterpret_cast<const wchar_t*>(data), static_cast<std::streamsize>(size / sizeof(wchar_t)));
5105 		}
5106 	}
5107 #endif
5108 
xml_tree_walker()5109 	PUGI__FN xml_tree_walker::xml_tree_walker(): _depth(0)
5110 	{
5111 	}
5112 
~xml_tree_walker()5113 	PUGI__FN xml_tree_walker::~xml_tree_walker()
5114 	{
5115 	}
5116 
depth() const5117 	PUGI__FN int xml_tree_walker::depth() const
5118 	{
5119 		return _depth;
5120 	}
5121 
begin(xml_node &)5122 	PUGI__FN bool xml_tree_walker::begin(xml_node&)
5123 	{
5124 		return true;
5125 	}
5126 
end(xml_node &)5127 	PUGI__FN bool xml_tree_walker::end(xml_node&)
5128 	{
5129 		return true;
5130 	}
5131 
xml_attribute()5132 	PUGI__FN xml_attribute::xml_attribute(): _attr(0)
5133 	{
5134 	}
5135 
xml_attribute(xml_attribute_struct * attr)5136 	PUGI__FN xml_attribute::xml_attribute(xml_attribute_struct* attr): _attr(attr)
5137 	{
5138 	}
5139 
unspecified_bool_xml_attribute(xml_attribute ***)5140 	PUGI__FN static void unspecified_bool_xml_attribute(xml_attribute***)
5141 	{
5142 	}
5143 
operator xml_attribute::unspecified_bool_type() const5144 	PUGI__FN xml_attribute::operator xml_attribute::unspecified_bool_type() const
5145 	{
5146 		return _attr ? unspecified_bool_xml_attribute : 0;
5147 	}
5148 
operator !() const5149 	PUGI__FN bool xml_attribute::operator!() const
5150 	{
5151 		return !_attr;
5152 	}
5153 
operator ==(const xml_attribute & r) const5154 	PUGI__FN bool xml_attribute::operator==(const xml_attribute& r) const
5155 	{
5156 		return (_attr == r._attr);
5157 	}
5158 
operator !=(const xml_attribute & r) const5159 	PUGI__FN bool xml_attribute::operator!=(const xml_attribute& r) const
5160 	{
5161 		return (_attr != r._attr);
5162 	}
5163 
operator <(const xml_attribute & r) const5164 	PUGI__FN bool xml_attribute::operator<(const xml_attribute& r) const
5165 	{
5166 		return (_attr < r._attr);
5167 	}
5168 
operator >(const xml_attribute & r) const5169 	PUGI__FN bool xml_attribute::operator>(const xml_attribute& r) const
5170 	{
5171 		return (_attr > r._attr);
5172 	}
5173 
operator <=(const xml_attribute & r) const5174 	PUGI__FN bool xml_attribute::operator<=(const xml_attribute& r) const
5175 	{
5176 		return (_attr <= r._attr);
5177 	}
5178 
operator >=(const xml_attribute & r) const5179 	PUGI__FN bool xml_attribute::operator>=(const xml_attribute& r) const
5180 	{
5181 		return (_attr >= r._attr);
5182 	}
5183 
next_attribute() const5184 	PUGI__FN xml_attribute xml_attribute::next_attribute() const
5185 	{
5186 		return _attr ? xml_attribute(_attr->next_attribute) : xml_attribute();
5187 	}
5188 
previous_attribute() const5189 	PUGI__FN xml_attribute xml_attribute::previous_attribute() const
5190 	{
5191 		return _attr && _attr->prev_attribute_c->next_attribute ? xml_attribute(_attr->prev_attribute_c) : xml_attribute();
5192 	}
5193 
as_string(const char_t * def) const5194 	PUGI__FN const char_t* xml_attribute::as_string(const char_t* def) const
5195 	{
5196 		return (_attr && _attr->value) ? _attr->value + 0 : def;
5197 	}
5198 
as_int(int def) const5199 	PUGI__FN int xml_attribute::as_int(int def) const
5200 	{
5201 		return (_attr && _attr->value) ? impl::get_value_int(_attr->value) : def;
5202 	}
5203 
as_uint(unsigned int def) const5204 	PUGI__FN unsigned int xml_attribute::as_uint(unsigned int def) const
5205 	{
5206 		return (_attr && _attr->value) ? impl::get_value_uint(_attr->value) : def;
5207 	}
5208 
as_double(double def) const5209 	PUGI__FN double xml_attribute::as_double(double def) const
5210 	{
5211 		return (_attr && _attr->value) ? impl::get_value_double(_attr->value) : def;
5212 	}
5213 
as_float(float def) const5214 	PUGI__FN float xml_attribute::as_float(float def) const
5215 	{
5216 		return (_attr && _attr->value) ? impl::get_value_float(_attr->value) : def;
5217 	}
5218 
as_bool(bool def) const5219 	PUGI__FN bool xml_attribute::as_bool(bool def) const
5220 	{
5221 		return (_attr && _attr->value) ? impl::get_value_bool(_attr->value) : def;
5222 	}
5223 
5224 #ifdef PUGIXML_HAS_LONG_LONG
as_llong(long long def) const5225 	PUGI__FN long long xml_attribute::as_llong(long long def) const
5226 	{
5227 		return (_attr && _attr->value) ? impl::get_value_llong(_attr->value) : def;
5228 	}
5229 
as_ullong(unsigned long long def) const5230 	PUGI__FN unsigned long long xml_attribute::as_ullong(unsigned long long def) const
5231 	{
5232 		return (_attr && _attr->value) ? impl::get_value_ullong(_attr->value) : def;
5233 	}
5234 #endif
5235 
empty() const5236 	PUGI__FN bool xml_attribute::empty() const
5237 	{
5238 		return !_attr;
5239 	}
5240 
name() const5241 	PUGI__FN const char_t* xml_attribute::name() const
5242 	{
5243 		return (_attr && _attr->name) ? _attr->name + 0 : PUGIXML_TEXT("");
5244 	}
5245 
value() const5246 	PUGI__FN const char_t* xml_attribute::value() const
5247 	{
5248 		return (_attr && _attr->value) ? _attr->value + 0 : PUGIXML_TEXT("");
5249 	}
5250 
hash_value() const5251 	PUGI__FN size_t xml_attribute::hash_value() const
5252 	{
5253 		return static_cast<size_t>(reinterpret_cast<uintptr_t>(_attr) / sizeof(xml_attribute_struct));
5254 	}
5255 
internal_object() const5256 	PUGI__FN xml_attribute_struct* xml_attribute::internal_object() const
5257 	{
5258 		return _attr;
5259 	}
5260 
operator =(const char_t * rhs)5261 	PUGI__FN xml_attribute& xml_attribute::operator=(const char_t* rhs)
5262 	{
5263 		set_value(rhs);
5264 		return *this;
5265 	}
5266 
operator =(int rhs)5267 	PUGI__FN xml_attribute& xml_attribute::operator=(int rhs)
5268 	{
5269 		set_value(rhs);
5270 		return *this;
5271 	}
5272 
operator =(unsigned int rhs)5273 	PUGI__FN xml_attribute& xml_attribute::operator=(unsigned int rhs)
5274 	{
5275 		set_value(rhs);
5276 		return *this;
5277 	}
5278 
operator =(long rhs)5279 	PUGI__FN xml_attribute& xml_attribute::operator=(long rhs)
5280 	{
5281 		set_value(rhs);
5282 		return *this;
5283 	}
5284 
operator =(unsigned long rhs)5285 	PUGI__FN xml_attribute& xml_attribute::operator=(unsigned long rhs)
5286 	{
5287 		set_value(rhs);
5288 		return *this;
5289 	}
5290 
operator =(double rhs)5291 	PUGI__FN xml_attribute& xml_attribute::operator=(double rhs)
5292 	{
5293 		set_value(rhs);
5294 		return *this;
5295 	}
5296 
operator =(float rhs)5297 	PUGI__FN xml_attribute& xml_attribute::operator=(float rhs)
5298 	{
5299 		set_value(rhs);
5300 		return *this;
5301 	}
5302 
operator =(bool rhs)5303 	PUGI__FN xml_attribute& xml_attribute::operator=(bool rhs)
5304 	{
5305 		set_value(rhs);
5306 		return *this;
5307 	}
5308 
5309 #ifdef PUGIXML_HAS_LONG_LONG
operator =(long long rhs)5310 	PUGI__FN xml_attribute& xml_attribute::operator=(long long rhs)
5311 	{
5312 		set_value(rhs);
5313 		return *this;
5314 	}
5315 
operator =(unsigned long long rhs)5316 	PUGI__FN xml_attribute& xml_attribute::operator=(unsigned long long rhs)
5317 	{
5318 		set_value(rhs);
5319 		return *this;
5320 	}
5321 #endif
5322 
set_name(const char_t * rhs)5323 	PUGI__FN bool xml_attribute::set_name(const char_t* rhs)
5324 	{
5325 		if (!_attr) return false;
5326 
5327 		return impl::strcpy_insitu(_attr->name, _attr->header, impl::xml_memory_page_name_allocated_mask, rhs, impl::strlength(rhs));
5328 	}
5329 
set_value(const char_t * rhs)5330 	PUGI__FN bool xml_attribute::set_value(const char_t* rhs)
5331 	{
5332 		if (!_attr) return false;
5333 
5334 		return impl::strcpy_insitu(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, impl::strlength(rhs));
5335 	}
5336 
set_value(int rhs)5337 	PUGI__FN bool xml_attribute::set_value(int rhs)
5338 	{
5339 		if (!_attr) return false;
5340 
5341 		return impl::set_value_integer<unsigned int>(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0);
5342 	}
5343 
set_value(unsigned int rhs)5344 	PUGI__FN bool xml_attribute::set_value(unsigned int rhs)
5345 	{
5346 		if (!_attr) return false;
5347 
5348 		return impl::set_value_integer<unsigned int>(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, false);
5349 	}
5350 
set_value(long rhs)5351 	PUGI__FN bool xml_attribute::set_value(long rhs)
5352 	{
5353 		if (!_attr) return false;
5354 
5355 		return impl::set_value_integer<unsigned long>(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0);
5356 	}
5357 
set_value(unsigned long rhs)5358 	PUGI__FN bool xml_attribute::set_value(unsigned long rhs)
5359 	{
5360 		if (!_attr) return false;
5361 
5362 		return impl::set_value_integer<unsigned long>(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, false);
5363 	}
5364 
set_value(double rhs)5365 	PUGI__FN bool xml_attribute::set_value(double rhs)
5366 	{
5367 		if (!_attr) return false;
5368 
5369 		return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, default_double_precision);
5370 	}
5371 
set_value(double rhs,int precision)5372 	PUGI__FN bool xml_attribute::set_value(double rhs, int precision)
5373 	{
5374 		if (!_attr) return false;
5375 
5376 		return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, precision);
5377 	}
5378 
set_value(float rhs)5379 	PUGI__FN bool xml_attribute::set_value(float rhs)
5380 	{
5381 		if (!_attr) return false;
5382 
5383 		return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, default_float_precision);
5384 	}
5385 
set_value(float rhs,int precision)5386 	PUGI__FN bool xml_attribute::set_value(float rhs, int precision)
5387 	{
5388 		if (!_attr) return false;
5389 
5390 		return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, precision);
5391 	}
5392 
set_value(bool rhs)5393 	PUGI__FN bool xml_attribute::set_value(bool rhs)
5394 	{
5395 		if (!_attr) return false;
5396 
5397 		return impl::set_value_bool(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);
5398 	}
5399 
5400 #ifdef PUGIXML_HAS_LONG_LONG
set_value(long long rhs)5401 	PUGI__FN bool xml_attribute::set_value(long long rhs)
5402 	{
5403 		if (!_attr) return false;
5404 
5405 		return impl::set_value_integer<unsigned long long>(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0);
5406 	}
5407 
set_value(unsigned long long rhs)5408 	PUGI__FN bool xml_attribute::set_value(unsigned long long rhs)
5409 	{
5410 		if (!_attr) return false;
5411 
5412 		return impl::set_value_integer<unsigned long long>(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, false);
5413 	}
5414 #endif
5415 
5416 #ifdef __BORLANDC__
operator &&(const xml_attribute & lhs,bool rhs)5417 	PUGI__FN bool operator&&(const xml_attribute& lhs, bool rhs)
5418 	{
5419 		return (bool)lhs && rhs;
5420 	}
5421 
operator ||(const xml_attribute & lhs,bool rhs)5422 	PUGI__FN bool operator||(const xml_attribute& lhs, bool rhs)
5423 	{
5424 		return (bool)lhs || rhs;
5425 	}
5426 #endif
5427 
xml_node()5428 	PUGI__FN xml_node::xml_node(): _root(0)
5429 	{
5430 	}
5431 
xml_node(xml_node_struct * p)5432 	PUGI__FN xml_node::xml_node(xml_node_struct* p): _root(p)
5433 	{
5434 	}
5435 
unspecified_bool_xml_node(xml_node ***)5436 	PUGI__FN static void unspecified_bool_xml_node(xml_node***)
5437 	{
5438 	}
5439 
operator xml_node::unspecified_bool_type() const5440 	PUGI__FN xml_node::operator xml_node::unspecified_bool_type() const
5441 	{
5442 		return _root ? unspecified_bool_xml_node : 0;
5443 	}
5444 
operator !() const5445 	PUGI__FN bool xml_node::operator!() const
5446 	{
5447 		return !_root;
5448 	}
5449 
begin() const5450 	PUGI__FN xml_node::iterator xml_node::begin() const
5451 	{
5452 		return iterator(_root ? _root->first_child + 0 : 0, _root);
5453 	}
5454 
end() const5455 	PUGI__FN xml_node::iterator xml_node::end() const
5456 	{
5457 		return iterator(0, _root);
5458 	}
5459 
attributes_begin() const5460 	PUGI__FN xml_node::attribute_iterator xml_node::attributes_begin() const
5461 	{
5462 		return attribute_iterator(_root ? _root->first_attribute + 0 : 0, _root);
5463 	}
5464 
attributes_end() const5465 	PUGI__FN xml_node::attribute_iterator xml_node::attributes_end() const
5466 	{
5467 		return attribute_iterator(0, _root);
5468 	}
5469 
children() const5470 	PUGI__FN xml_object_range<xml_node_iterator> xml_node::children() const
5471 	{
5472 		return xml_object_range<xml_node_iterator>(begin(), end());
5473 	}
5474 
children(const char_t * name_) const5475 	PUGI__FN xml_object_range<xml_named_node_iterator> xml_node::children(const char_t* name_) const
5476 	{
5477 		return xml_object_range<xml_named_node_iterator>(xml_named_node_iterator(child(name_)._root, _root, name_), xml_named_node_iterator(0, _root, name_));
5478 	}
5479 
attributes() const5480 	PUGI__FN xml_object_range<xml_attribute_iterator> xml_node::attributes() const
5481 	{
5482 		return xml_object_range<xml_attribute_iterator>(attributes_begin(), attributes_end());
5483 	}
5484 
operator ==(const xml_node & r) const5485 	PUGI__FN bool xml_node::operator==(const xml_node& r) const
5486 	{
5487 		return (_root == r._root);
5488 	}
5489 
operator !=(const xml_node & r) const5490 	PUGI__FN bool xml_node::operator!=(const xml_node& r) const
5491 	{
5492 		return (_root != r._root);
5493 	}
5494 
operator <(const xml_node & r) const5495 	PUGI__FN bool xml_node::operator<(const xml_node& r) const
5496 	{
5497 		return (_root < r._root);
5498 	}
5499 
operator >(const xml_node & r) const5500 	PUGI__FN bool xml_node::operator>(const xml_node& r) const
5501 	{
5502 		return (_root > r._root);
5503 	}
5504 
operator <=(const xml_node & r) const5505 	PUGI__FN bool xml_node::operator<=(const xml_node& r) const
5506 	{
5507 		return (_root <= r._root);
5508 	}
5509 
operator >=(const xml_node & r) const5510 	PUGI__FN bool xml_node::operator>=(const xml_node& r) const
5511 	{
5512 		return (_root >= r._root);
5513 	}
5514 
empty() const5515 	PUGI__FN bool xml_node::empty() const
5516 	{
5517 		return !_root;
5518 	}
5519 
name() const5520 	PUGI__FN const char_t* xml_node::name() const
5521 	{
5522 		return (_root && _root->name) ? _root->name + 0 : PUGIXML_TEXT("");
5523 	}
5524 
type() const5525 	PUGI__FN xml_node_type xml_node::type() const
5526 	{
5527 		return _root ? PUGI__NODETYPE(_root) : node_null;
5528 	}
5529 
value() const5530 	PUGI__FN const char_t* xml_node::value() const
5531 	{
5532 		return (_root && _root->value) ? _root->value + 0 : PUGIXML_TEXT("");
5533 	}
5534 
child(const char_t * name_) const5535 	PUGI__FN xml_node xml_node::child(const char_t* name_) const
5536 	{
5537 		if (!_root) return xml_node();
5538 
5539 		for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
5540 			if (i->name && impl::strequal(name_, i->name)) return xml_node(i);
5541 
5542 		return xml_node();
5543 	}
5544 
attribute(const char_t * name_) const5545 	PUGI__FN xml_attribute xml_node::attribute(const char_t* name_) const
5546 	{
5547 		if (!_root) return xml_attribute();
5548 
5549 		for (xml_attribute_struct* i = _root->first_attribute; i; i = i->next_attribute)
5550 			if (i->name && impl::strequal(name_, i->name))
5551 				return xml_attribute(i);
5552 
5553 		return xml_attribute();
5554 	}
5555 
next_sibling(const char_t * name_) const5556 	PUGI__FN xml_node xml_node::next_sibling(const char_t* name_) const
5557 	{
5558 		if (!_root) return xml_node();
5559 
5560 		for (xml_node_struct* i = _root->next_sibling; i; i = i->next_sibling)
5561 			if (i->name && impl::strequal(name_, i->name)) return xml_node(i);
5562 
5563 		return xml_node();
5564 	}
5565 
next_sibling() const5566 	PUGI__FN xml_node xml_node::next_sibling() const
5567 	{
5568 		return _root ? xml_node(_root->next_sibling) : xml_node();
5569 	}
5570 
previous_sibling(const char_t * name_) const5571 	PUGI__FN xml_node xml_node::previous_sibling(const char_t* name_) const
5572 	{
5573 		if (!_root) return xml_node();
5574 
5575 		for (xml_node_struct* i = _root->prev_sibling_c; i->next_sibling; i = i->prev_sibling_c)
5576 			if (i->name && impl::strequal(name_, i->name)) return xml_node(i);
5577 
5578 		return xml_node();
5579 	}
5580 
attribute(const char_t * name_,xml_attribute & hint_) const5581 	PUGI__FN xml_attribute xml_node::attribute(const char_t* name_, xml_attribute& hint_) const
5582 	{
5583 		xml_attribute_struct* hint = hint_._attr;
5584 
5585 		// if hint is not an attribute of node, behavior is not defined
5586 		assert(!hint || (_root && impl::is_attribute_of(hint, _root)));
5587 
5588 		if (!_root) return xml_attribute();
5589 
5590 		// optimistically search from hint up until the end
5591 		for (xml_attribute_struct* i = hint; i; i = i->next_attribute)
5592 			if (i->name && impl::strequal(name_, i->name))
5593 			{
5594 				// update hint to maximize efficiency of searching for consecutive attributes
5595 				hint_._attr = i->next_attribute;
5596 
5597 				return xml_attribute(i);
5598 			}
5599 
5600 		// wrap around and search from the first attribute until the hint
5601 		// 'j' null pointer check is technically redundant, but it prevents a crash in case the assertion above fails
5602 		for (xml_attribute_struct* j = _root->first_attribute; j && j != hint; j = j->next_attribute)
5603 			if (j->name && impl::strequal(name_, j->name))
5604 			{
5605 				// update hint to maximize efficiency of searching for consecutive attributes
5606 				hint_._attr = j->next_attribute;
5607 
5608 				return xml_attribute(j);
5609 			}
5610 
5611 		return xml_attribute();
5612 	}
5613 
previous_sibling() const5614 	PUGI__FN xml_node xml_node::previous_sibling() const
5615 	{
5616 		if (!_root) return xml_node();
5617 
5618 		if (_root->prev_sibling_c->next_sibling) return xml_node(_root->prev_sibling_c);
5619 		else return xml_node();
5620 	}
5621 
parent() const5622 	PUGI__FN xml_node xml_node::parent() const
5623 	{
5624 		return _root ? xml_node(_root->parent) : xml_node();
5625 	}
5626 
root() const5627 	PUGI__FN xml_node xml_node::root() const
5628 	{
5629 		return _root ? xml_node(&impl::get_document(_root)) : xml_node();
5630 	}
5631 
text() const5632 	PUGI__FN xml_text xml_node::text() const
5633 	{
5634 		return xml_text(_root);
5635 	}
5636 
child_value() const5637 	PUGI__FN const char_t* xml_node::child_value() const
5638 	{
5639 		if (!_root) return PUGIXML_TEXT("");
5640 
5641 		// element nodes can have value if parse_embed_pcdata was used
5642 		if (PUGI__NODETYPE(_root) == node_element && _root->value)
5643 			return _root->value;
5644 
5645 		for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
5646 			if (impl::is_text_node(i) && i->value)
5647 				return i->value;
5648 
5649 		return PUGIXML_TEXT("");
5650 	}
5651 
child_value(const char_t * name_) const5652 	PUGI__FN const char_t* xml_node::child_value(const char_t* name_) const
5653 	{
5654 		return child(name_).child_value();
5655 	}
5656 
first_attribute() const5657 	PUGI__FN xml_attribute xml_node::first_attribute() const
5658 	{
5659 		return _root ? xml_attribute(_root->first_attribute) : xml_attribute();
5660 	}
5661 
last_attribute() const5662 	PUGI__FN xml_attribute xml_node::last_attribute() const
5663 	{
5664 		return _root && _root->first_attribute ? xml_attribute(_root->first_attribute->prev_attribute_c) : xml_attribute();
5665 	}
5666 
first_child() const5667 	PUGI__FN xml_node xml_node::first_child() const
5668 	{
5669 		return _root ? xml_node(_root->first_child) : xml_node();
5670 	}
5671 
last_child() const5672 	PUGI__FN xml_node xml_node::last_child() const
5673 	{
5674 		return _root && _root->first_child ? xml_node(_root->first_child->prev_sibling_c) : xml_node();
5675 	}
5676 
set_name(const char_t * rhs)5677 	PUGI__FN bool xml_node::set_name(const char_t* rhs)
5678 	{
5679 		xml_node_type type_ = _root ? PUGI__NODETYPE(_root) : node_null;
5680 
5681 		if (type_ != node_element && type_ != node_pi && type_ != node_declaration)
5682 			return false;
5683 
5684 		return impl::strcpy_insitu(_root->name, _root->header, impl::xml_memory_page_name_allocated_mask, rhs, impl::strlength(rhs));
5685 	}
5686 
set_value(const char_t * rhs)5687 	PUGI__FN bool xml_node::set_value(const char_t* rhs)
5688 	{
5689 		xml_node_type type_ = _root ? PUGI__NODETYPE(_root) : node_null;
5690 
5691 		if (type_ != node_pcdata && type_ != node_cdata && type_ != node_comment && type_ != node_pi && type_ != node_doctype)
5692 			return false;
5693 
5694 		return impl::strcpy_insitu(_root->value, _root->header, impl::xml_memory_page_value_allocated_mask, rhs, impl::strlength(rhs));
5695 	}
5696 
append_attribute(const char_t * name_)5697 	PUGI__FN xml_attribute xml_node::append_attribute(const char_t* name_)
5698 	{
5699 		if (!impl::allow_insert_attribute(type())) return xml_attribute();
5700 
5701 		impl::xml_allocator& alloc = impl::get_allocator(_root);
5702 		if (!alloc.reserve()) return xml_attribute();
5703 
5704 		xml_attribute a(impl::allocate_attribute(alloc));
5705 		if (!a) return xml_attribute();
5706 
5707 		impl::append_attribute(a._attr, _root);
5708 
5709 		a.set_name(name_);
5710 
5711 		return a;
5712 	}
5713 
prepend_attribute(const char_t * name_)5714 	PUGI__FN xml_attribute xml_node::prepend_attribute(const char_t* name_)
5715 	{
5716 		if (!impl::allow_insert_attribute(type())) return xml_attribute();
5717 
5718 		impl::xml_allocator& alloc = impl::get_allocator(_root);
5719 		if (!alloc.reserve()) return xml_attribute();
5720 
5721 		xml_attribute a(impl::allocate_attribute(alloc));
5722 		if (!a) return xml_attribute();
5723 
5724 		impl::prepend_attribute(a._attr, _root);
5725 
5726 		a.set_name(name_);
5727 
5728 		return a;
5729 	}
5730 
insert_attribute_after(const char_t * name_,const xml_attribute & attr)5731 	PUGI__FN xml_attribute xml_node::insert_attribute_after(const char_t* name_, const xml_attribute& attr)
5732 	{
5733 		if (!impl::allow_insert_attribute(type())) return xml_attribute();
5734 		if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute();
5735 
5736 		impl::xml_allocator& alloc = impl::get_allocator(_root);
5737 		if (!alloc.reserve()) return xml_attribute();
5738 
5739 		xml_attribute a(impl::allocate_attribute(alloc));
5740 		if (!a) return xml_attribute();
5741 
5742 		impl::insert_attribute_after(a._attr, attr._attr, _root);
5743 
5744 		a.set_name(name_);
5745 
5746 		return a;
5747 	}
5748 
insert_attribute_before(const char_t * name_,const xml_attribute & attr)5749 	PUGI__FN xml_attribute xml_node::insert_attribute_before(const char_t* name_, const xml_attribute& attr)
5750 	{
5751 		if (!impl::allow_insert_attribute(type())) return xml_attribute();
5752 		if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute();
5753 
5754 		impl::xml_allocator& alloc = impl::get_allocator(_root);
5755 		if (!alloc.reserve()) return xml_attribute();
5756 
5757 		xml_attribute a(impl::allocate_attribute(alloc));
5758 		if (!a) return xml_attribute();
5759 
5760 		impl::insert_attribute_before(a._attr, attr._attr, _root);
5761 
5762 		a.set_name(name_);
5763 
5764 		return a;
5765 	}
5766 
append_copy(const xml_attribute & proto)5767 	PUGI__FN xml_attribute xml_node::append_copy(const xml_attribute& proto)
5768 	{
5769 		if (!proto) return xml_attribute();
5770 		if (!impl::allow_insert_attribute(type())) return xml_attribute();
5771 
5772 		impl::xml_allocator& alloc = impl::get_allocator(_root);
5773 		if (!alloc.reserve()) return xml_attribute();
5774 
5775 		xml_attribute a(impl::allocate_attribute(alloc));
5776 		if (!a) return xml_attribute();
5777 
5778 		impl::append_attribute(a._attr, _root);
5779 		impl::node_copy_attribute(a._attr, proto._attr);
5780 
5781 		return a;
5782 	}
5783 
prepend_copy(const xml_attribute & proto)5784 	PUGI__FN xml_attribute xml_node::prepend_copy(const xml_attribute& proto)
5785 	{
5786 		if (!proto) return xml_attribute();
5787 		if (!impl::allow_insert_attribute(type())) return xml_attribute();
5788 
5789 		impl::xml_allocator& alloc = impl::get_allocator(_root);
5790 		if (!alloc.reserve()) return xml_attribute();
5791 
5792 		xml_attribute a(impl::allocate_attribute(alloc));
5793 		if (!a) return xml_attribute();
5794 
5795 		impl::prepend_attribute(a._attr, _root);
5796 		impl::node_copy_attribute(a._attr, proto._attr);
5797 
5798 		return a;
5799 	}
5800 
insert_copy_after(const xml_attribute & proto,const xml_attribute & attr)5801 	PUGI__FN xml_attribute xml_node::insert_copy_after(const xml_attribute& proto, const xml_attribute& attr)
5802 	{
5803 		if (!proto) return xml_attribute();
5804 		if (!impl::allow_insert_attribute(type())) return xml_attribute();
5805 		if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute();
5806 
5807 		impl::xml_allocator& alloc = impl::get_allocator(_root);
5808 		if (!alloc.reserve()) return xml_attribute();
5809 
5810 		xml_attribute a(impl::allocate_attribute(alloc));
5811 		if (!a) return xml_attribute();
5812 
5813 		impl::insert_attribute_after(a._attr, attr._attr, _root);
5814 		impl::node_copy_attribute(a._attr, proto._attr);
5815 
5816 		return a;
5817 	}
5818 
insert_copy_before(const xml_attribute & proto,const xml_attribute & attr)5819 	PUGI__FN xml_attribute xml_node::insert_copy_before(const xml_attribute& proto, const xml_attribute& attr)
5820 	{
5821 		if (!proto) return xml_attribute();
5822 		if (!impl::allow_insert_attribute(type())) return xml_attribute();
5823 		if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute();
5824 
5825 		impl::xml_allocator& alloc = impl::get_allocator(_root);
5826 		if (!alloc.reserve()) return xml_attribute();
5827 
5828 		xml_attribute a(impl::allocate_attribute(alloc));
5829 		if (!a) return xml_attribute();
5830 
5831 		impl::insert_attribute_before(a._attr, attr._attr, _root);
5832 		impl::node_copy_attribute(a._attr, proto._attr);
5833 
5834 		return a;
5835 	}
5836 
append_child(xml_node_type type_)5837 	PUGI__FN xml_node xml_node::append_child(xml_node_type type_)
5838 	{
5839 		if (!impl::allow_insert_child(type(), type_)) return xml_node();
5840 
5841 		impl::xml_allocator& alloc = impl::get_allocator(_root);
5842 		if (!alloc.reserve()) return xml_node();
5843 
5844 		xml_node n(impl::allocate_node(alloc, type_));
5845 		if (!n) return xml_node();
5846 
5847 		impl::append_node(n._root, _root);
5848 
5849 		if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml"));
5850 
5851 		return n;
5852 	}
5853 
prepend_child(xml_node_type type_)5854 	PUGI__FN xml_node xml_node::prepend_child(xml_node_type type_)
5855 	{
5856 		if (!impl::allow_insert_child(type(), type_)) return xml_node();
5857 
5858 		impl::xml_allocator& alloc = impl::get_allocator(_root);
5859 		if (!alloc.reserve()) return xml_node();
5860 
5861 		xml_node n(impl::allocate_node(alloc, type_));
5862 		if (!n) return xml_node();
5863 
5864 		impl::prepend_node(n._root, _root);
5865 
5866 		if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml"));
5867 
5868 		return n;
5869 	}
5870 
insert_child_before(xml_node_type type_,const xml_node & node)5871 	PUGI__FN xml_node xml_node::insert_child_before(xml_node_type type_, const xml_node& node)
5872 	{
5873 		if (!impl::allow_insert_child(type(), type_)) return xml_node();
5874 		if (!node._root || node._root->parent != _root) return xml_node();
5875 
5876 		impl::xml_allocator& alloc = impl::get_allocator(_root);
5877 		if (!alloc.reserve()) return xml_node();
5878 
5879 		xml_node n(impl::allocate_node(alloc, type_));
5880 		if (!n) return xml_node();
5881 
5882 		impl::insert_node_before(n._root, node._root);
5883 
5884 		if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml"));
5885 
5886 		return n;
5887 	}
5888 
insert_child_after(xml_node_type type_,const xml_node & node)5889 	PUGI__FN xml_node xml_node::insert_child_after(xml_node_type type_, const xml_node& node)
5890 	{
5891 		if (!impl::allow_insert_child(type(), type_)) return xml_node();
5892 		if (!node._root || node._root->parent != _root) return xml_node();
5893 
5894 		impl::xml_allocator& alloc = impl::get_allocator(_root);
5895 		if (!alloc.reserve()) return xml_node();
5896 
5897 		xml_node n(impl::allocate_node(alloc, type_));
5898 		if (!n) return xml_node();
5899 
5900 		impl::insert_node_after(n._root, node._root);
5901 
5902 		if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml"));
5903 
5904 		return n;
5905 	}
5906 
append_child(const char_t * name_)5907 	PUGI__FN xml_node xml_node::append_child(const char_t* name_)
5908 	{
5909 		xml_node result = append_child(node_element);
5910 
5911 		result.set_name(name_);
5912 
5913 		return result;
5914 	}
5915 
prepend_child(const char_t * name_)5916 	PUGI__FN xml_node xml_node::prepend_child(const char_t* name_)
5917 	{
5918 		xml_node result = prepend_child(node_element);
5919 
5920 		result.set_name(name_);
5921 
5922 		return result;
5923 	}
5924 
insert_child_after(const char_t * name_,const xml_node & node)5925 	PUGI__FN xml_node xml_node::insert_child_after(const char_t* name_, const xml_node& node)
5926 	{
5927 		xml_node result = insert_child_after(node_element, node);
5928 
5929 		result.set_name(name_);
5930 
5931 		return result;
5932 	}
5933 
insert_child_before(const char_t * name_,const xml_node & node)5934 	PUGI__FN xml_node xml_node::insert_child_before(const char_t* name_, const xml_node& node)
5935 	{
5936 		xml_node result = insert_child_before(node_element, node);
5937 
5938 		result.set_name(name_);
5939 
5940 		return result;
5941 	}
5942 
append_copy(const xml_node & proto)5943 	PUGI__FN xml_node xml_node::append_copy(const xml_node& proto)
5944 	{
5945 		xml_node_type type_ = proto.type();
5946 		if (!impl::allow_insert_child(type(), type_)) return xml_node();
5947 
5948 		impl::xml_allocator& alloc = impl::get_allocator(_root);
5949 		if (!alloc.reserve()) return xml_node();
5950 
5951 		xml_node n(impl::allocate_node(alloc, type_));
5952 		if (!n) return xml_node();
5953 
5954 		impl::append_node(n._root, _root);
5955 		impl::node_copy_tree(n._root, proto._root);
5956 
5957 		return n;
5958 	}
5959 
prepend_copy(const xml_node & proto)5960 	PUGI__FN xml_node xml_node::prepend_copy(const xml_node& proto)
5961 	{
5962 		xml_node_type type_ = proto.type();
5963 		if (!impl::allow_insert_child(type(), type_)) return xml_node();
5964 
5965 		impl::xml_allocator& alloc = impl::get_allocator(_root);
5966 		if (!alloc.reserve()) return xml_node();
5967 
5968 		xml_node n(impl::allocate_node(alloc, type_));
5969 		if (!n) return xml_node();
5970 
5971 		impl::prepend_node(n._root, _root);
5972 		impl::node_copy_tree(n._root, proto._root);
5973 
5974 		return n;
5975 	}
5976 
insert_copy_after(const xml_node & proto,const xml_node & node)5977 	PUGI__FN xml_node xml_node::insert_copy_after(const xml_node& proto, const xml_node& node)
5978 	{
5979 		xml_node_type type_ = proto.type();
5980 		if (!impl::allow_insert_child(type(), type_)) return xml_node();
5981 		if (!node._root || node._root->parent != _root) return xml_node();
5982 
5983 		impl::xml_allocator& alloc = impl::get_allocator(_root);
5984 		if (!alloc.reserve()) return xml_node();
5985 
5986 		xml_node n(impl::allocate_node(alloc, type_));
5987 		if (!n) return xml_node();
5988 
5989 		impl::insert_node_after(n._root, node._root);
5990 		impl::node_copy_tree(n._root, proto._root);
5991 
5992 		return n;
5993 	}
5994 
insert_copy_before(const xml_node & proto,const xml_node & node)5995 	PUGI__FN xml_node xml_node::insert_copy_before(const xml_node& proto, const xml_node& node)
5996 	{
5997 		xml_node_type type_ = proto.type();
5998 		if (!impl::allow_insert_child(type(), type_)) return xml_node();
5999 		if (!node._root || node._root->parent != _root) return xml_node();
6000 
6001 		impl::xml_allocator& alloc = impl::get_allocator(_root);
6002 		if (!alloc.reserve()) return xml_node();
6003 
6004 		xml_node n(impl::allocate_node(alloc, type_));
6005 		if (!n) return xml_node();
6006 
6007 		impl::insert_node_before(n._root, node._root);
6008 		impl::node_copy_tree(n._root, proto._root);
6009 
6010 		return n;
6011 	}
6012 
append_move(const xml_node & moved)6013 	PUGI__FN xml_node xml_node::append_move(const xml_node& moved)
6014 	{
6015 		if (!impl::allow_move(*this, moved)) return xml_node();
6016 
6017 		impl::xml_allocator& alloc = impl::get_allocator(_root);
6018 		if (!alloc.reserve()) return xml_node();
6019 
6020 		// disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers
6021 		impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask;
6022 
6023 		impl::remove_node(moved._root);
6024 		impl::append_node(moved._root, _root);
6025 
6026 		return moved;
6027 	}
6028 
prepend_move(const xml_node & moved)6029 	PUGI__FN xml_node xml_node::prepend_move(const xml_node& moved)
6030 	{
6031 		if (!impl::allow_move(*this, moved)) return xml_node();
6032 
6033 		impl::xml_allocator& alloc = impl::get_allocator(_root);
6034 		if (!alloc.reserve()) return xml_node();
6035 
6036 		// disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers
6037 		impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask;
6038 
6039 		impl::remove_node(moved._root);
6040 		impl::prepend_node(moved._root, _root);
6041 
6042 		return moved;
6043 	}
6044 
insert_move_after(const xml_node & moved,const xml_node & node)6045 	PUGI__FN xml_node xml_node::insert_move_after(const xml_node& moved, const xml_node& node)
6046 	{
6047 		if (!impl::allow_move(*this, moved)) return xml_node();
6048 		if (!node._root || node._root->parent != _root) return xml_node();
6049 		if (moved._root == node._root) return xml_node();
6050 
6051 		impl::xml_allocator& alloc = impl::get_allocator(_root);
6052 		if (!alloc.reserve()) return xml_node();
6053 
6054 		// disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers
6055 		impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask;
6056 
6057 		impl::remove_node(moved._root);
6058 		impl::insert_node_after(moved._root, node._root);
6059 
6060 		return moved;
6061 	}
6062 
insert_move_before(const xml_node & moved,const xml_node & node)6063 	PUGI__FN xml_node xml_node::insert_move_before(const xml_node& moved, const xml_node& node)
6064 	{
6065 		if (!impl::allow_move(*this, moved)) return xml_node();
6066 		if (!node._root || node._root->parent != _root) return xml_node();
6067 		if (moved._root == node._root) return xml_node();
6068 
6069 		impl::xml_allocator& alloc = impl::get_allocator(_root);
6070 		if (!alloc.reserve()) return xml_node();
6071 
6072 		// disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers
6073 		impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask;
6074 
6075 		impl::remove_node(moved._root);
6076 		impl::insert_node_before(moved._root, node._root);
6077 
6078 		return moved;
6079 	}
6080 
remove_attribute(const char_t * name_)6081 	PUGI__FN bool xml_node::remove_attribute(const char_t* name_)
6082 	{
6083 		return remove_attribute(attribute(name_));
6084 	}
6085 
remove_attribute(const xml_attribute & a)6086 	PUGI__FN bool xml_node::remove_attribute(const xml_attribute& a)
6087 	{
6088 		if (!_root || !a._attr) return false;
6089 		if (!impl::is_attribute_of(a._attr, _root)) return false;
6090 
6091 		impl::xml_allocator& alloc = impl::get_allocator(_root);
6092 		if (!alloc.reserve()) return false;
6093 
6094 		impl::remove_attribute(a._attr, _root);
6095 		impl::destroy_attribute(a._attr, alloc);
6096 
6097 		return true;
6098 	}
6099 
remove_attributes()6100 	PUGI__FN bool xml_node::remove_attributes()
6101 	{
6102 		if (!_root) return false;
6103 
6104 		impl::xml_allocator& alloc = impl::get_allocator(_root);
6105 		if (!alloc.reserve()) return false;
6106 
6107 		for (xml_attribute_struct* attr = _root->first_attribute; attr; )
6108 		{
6109 			xml_attribute_struct* next = attr->next_attribute;
6110 
6111 			impl::destroy_attribute(attr, alloc);
6112 
6113 			attr = next;
6114 		}
6115 
6116 		_root->first_attribute = 0;
6117 
6118 		return true;
6119 	}
6120 
remove_child(const char_t * name_)6121 	PUGI__FN bool xml_node::remove_child(const char_t* name_)
6122 	{
6123 		return remove_child(child(name_));
6124 	}
6125 
remove_child(const xml_node & n)6126 	PUGI__FN bool xml_node::remove_child(const xml_node& n)
6127 	{
6128 		if (!_root || !n._root || n._root->parent != _root) return false;
6129 
6130 		impl::xml_allocator& alloc = impl::get_allocator(_root);
6131 		if (!alloc.reserve()) return false;
6132 
6133 		impl::remove_node(n._root);
6134 		impl::destroy_node(n._root, alloc);
6135 
6136 		return true;
6137 	}
6138 
remove_children()6139 	PUGI__FN bool xml_node::remove_children()
6140 	{
6141 		if (!_root) return false;
6142 
6143 		impl::xml_allocator& alloc = impl::get_allocator(_root);
6144 		if (!alloc.reserve()) return false;
6145 
6146 		for (xml_node_struct* cur = _root->first_child; cur; )
6147 		{
6148 			xml_node_struct* next = cur->next_sibling;
6149 
6150 			impl::destroy_node(cur, alloc);
6151 
6152 			cur = next;
6153 		}
6154 
6155 		_root->first_child = 0;
6156 
6157 		return true;
6158 	}
6159 
append_buffer(const void * contents,size_t size,unsigned int options,xml_encoding encoding)6160 	PUGI__FN xml_parse_result xml_node::append_buffer(const void* contents, size_t size, unsigned int options, xml_encoding encoding)
6161 	{
6162 		// append_buffer is only valid for elements/documents
6163 		if (!impl::allow_insert_child(type(), node_element)) return impl::make_parse_result(status_append_invalid_root);
6164 
6165 		// get document node
6166 		impl::xml_document_struct* doc = &impl::get_document(_root);
6167 
6168 		// disable document_buffer_order optimization since in a document with multiple buffers comparing buffer pointers does not make sense
6169 		doc->header |= impl::xml_memory_page_contents_shared_mask;
6170 
6171 		// get extra buffer element (we'll store the document fragment buffer there so that we can deallocate it later)
6172 		impl::xml_memory_page* page = 0;
6173 		impl::xml_extra_buffer* extra = static_cast<impl::xml_extra_buffer*>(doc->allocate_memory(sizeof(impl::xml_extra_buffer) + sizeof(void*), page));
6174 		(void)page;
6175 
6176 		if (!extra) return impl::make_parse_result(status_out_of_memory);
6177 
6178 	#ifdef PUGIXML_COMPACT
6179 		// align the memory block to a pointer boundary; this is required for compact mode where memory allocations are only 4b aligned
6180 		// note that this requires up to sizeof(void*)-1 additional memory, which the allocation above takes into account
6181 		extra = reinterpret_cast<impl::xml_extra_buffer*>((reinterpret_cast<uintptr_t>(extra) + (sizeof(void*) - 1)) & ~(sizeof(void*) - 1));
6182 	#endif
6183 
6184 		// add extra buffer to the list
6185 		extra->buffer = 0;
6186 		extra->next = doc->extra_buffers;
6187 		doc->extra_buffers = extra;
6188 
6189 		// name of the root has to be NULL before parsing - otherwise closing node mismatches will not be detected at the top level
6190 		impl::name_null_sentry sentry(_root);
6191 
6192 		return impl::load_buffer_impl(doc, _root, const_cast<void*>(contents), size, options, encoding, false, false, &extra->buffer);
6193 	}
6194 
find_child_by_attribute(const char_t * name_,const char_t * attr_name,const char_t * attr_value) const6195 	PUGI__FN xml_node xml_node::find_child_by_attribute(const char_t* name_, const char_t* attr_name, const char_t* attr_value) const
6196 	{
6197 		if (!_root) return xml_node();
6198 
6199 		for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
6200 			if (i->name && impl::strequal(name_, i->name))
6201 			{
6202 				for (xml_attribute_struct* a = i->first_attribute; a; a = a->next_attribute)
6203 					if (a->name && impl::strequal(attr_name, a->name) && impl::strequal(attr_value, a->value ? a->value + 0 : PUGIXML_TEXT("")))
6204 						return xml_node(i);
6205 			}
6206 
6207 		return xml_node();
6208 	}
6209 
find_child_by_attribute(const char_t * attr_name,const char_t * attr_value) const6210 	PUGI__FN xml_node xml_node::find_child_by_attribute(const char_t* attr_name, const char_t* attr_value) const
6211 	{
6212 		if (!_root) return xml_node();
6213 
6214 		for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
6215 			for (xml_attribute_struct* a = i->first_attribute; a; a = a->next_attribute)
6216 				if (a->name && impl::strequal(attr_name, a->name) && impl::strequal(attr_value, a->value ? a->value + 0 : PUGIXML_TEXT("")))
6217 					return xml_node(i);
6218 
6219 		return xml_node();
6220 	}
6221 
6222 #ifndef PUGIXML_NO_STL
path(char_t delimiter) const6223 	PUGI__FN string_t xml_node::path(char_t delimiter) const
6224 	{
6225 		if (!_root) return string_t();
6226 
6227 		size_t offset = 0;
6228 
6229 		for (xml_node_struct* i = _root; i; i = i->parent)
6230 		{
6231 			offset += (i != _root);
6232 			offset += i->name ? impl::strlength(i->name) : 0;
6233 		}
6234 
6235 		string_t result;
6236 		result.resize(offset);
6237 
6238 		for (xml_node_struct* j = _root; j; j = j->parent)
6239 		{
6240 			if (j != _root)
6241 				result[--offset] = delimiter;
6242 
6243 			if (j->name)
6244 			{
6245 				size_t length = impl::strlength(j->name);
6246 
6247 				offset -= length;
6248 				memcpy(&result[offset], j->name, length * sizeof(char_t));
6249 			}
6250 		}
6251 
6252 		assert(offset == 0);
6253 
6254 		return result;
6255 	}
6256 #endif
6257 
first_element_by_path(const char_t * path_,char_t delimiter) const6258 	PUGI__FN xml_node xml_node::first_element_by_path(const char_t* path_, char_t delimiter) const
6259 	{
6260 		xml_node context = path_[0] == delimiter ? root() : *this;
6261 
6262 		if (!context._root) return xml_node();
6263 
6264 		const char_t* path_segment = path_;
6265 
6266 		while (*path_segment == delimiter) ++path_segment;
6267 
6268 		const char_t* path_segment_end = path_segment;
6269 
6270 		while (*path_segment_end && *path_segment_end != delimiter) ++path_segment_end;
6271 
6272 		if (path_segment == path_segment_end) return context;
6273 
6274 		const char_t* next_segment = path_segment_end;
6275 
6276 		while (*next_segment == delimiter) ++next_segment;
6277 
6278 		if (*path_segment == '.' && path_segment + 1 == path_segment_end)
6279 			return context.first_element_by_path(next_segment, delimiter);
6280 		else if (*path_segment == '.' && *(path_segment+1) == '.' && path_segment + 2 == path_segment_end)
6281 			return context.parent().first_element_by_path(next_segment, delimiter);
6282 		else
6283 		{
6284 			for (xml_node_struct* j = context._root->first_child; j; j = j->next_sibling)
6285 			{
6286 				if (j->name && impl::strequalrange(j->name, path_segment, static_cast<size_t>(path_segment_end - path_segment)))
6287 				{
6288 					xml_node subsearch = xml_node(j).first_element_by_path(next_segment, delimiter);
6289 
6290 					if (subsearch) return subsearch;
6291 				}
6292 			}
6293 
6294 			return xml_node();
6295 		}
6296 	}
6297 
traverse(xml_tree_walker & walker)6298 	PUGI__FN bool xml_node::traverse(xml_tree_walker& walker)
6299 	{
6300 		walker._depth = -1;
6301 
6302 		xml_node arg_begin(_root);
6303 		if (!walker.begin(arg_begin)) return false;
6304 
6305 		xml_node_struct* cur = _root ? _root->first_child + 0 : 0;
6306 
6307 		if (cur)
6308 		{
6309 			++walker._depth;
6310 
6311 			do
6312 			{
6313 				xml_node arg_for_each(cur);
6314 				if (!walker.for_each(arg_for_each))
6315 					return false;
6316 
6317 				if (cur->first_child)
6318 				{
6319 					++walker._depth;
6320 					cur = cur->first_child;
6321 				}
6322 				else if (cur->next_sibling)
6323 					cur = cur->next_sibling;
6324 				else
6325 				{
6326 					while (!cur->next_sibling && cur != _root && cur->parent)
6327 					{
6328 						--walker._depth;
6329 						cur = cur->parent;
6330 					}
6331 
6332 					if (cur != _root)
6333 						cur = cur->next_sibling;
6334 				}
6335 			}
6336 			while (cur && cur != _root);
6337 		}
6338 
6339 		assert(walker._depth == -1);
6340 
6341 		xml_node arg_end(_root);
6342 		return walker.end(arg_end);
6343 	}
6344 
hash_value() const6345 	PUGI__FN size_t xml_node::hash_value() const
6346 	{
6347 		return static_cast<size_t>(reinterpret_cast<uintptr_t>(_root) / sizeof(xml_node_struct));
6348 	}
6349 
internal_object() const6350 	PUGI__FN xml_node_struct* xml_node::internal_object() const
6351 	{
6352 		return _root;
6353 	}
6354 
print(xml_writer & writer,const char_t * indent,unsigned int flags,xml_encoding encoding,unsigned int depth) const6355 	PUGI__FN void xml_node::print(xml_writer& writer, const char_t* indent, unsigned int flags, xml_encoding encoding, unsigned int depth) const
6356 	{
6357 		if (!_root) return;
6358 
6359 		impl::xml_buffered_writer buffered_writer(writer, encoding);
6360 
6361 		impl::node_output(buffered_writer, _root, indent, flags, depth);
6362 
6363 		buffered_writer.flush();
6364 	}
6365 
6366 #ifndef PUGIXML_NO_STL
print(std::basic_ostream<char,std::char_traits<char>> & stream,const char_t * indent,unsigned int flags,xml_encoding encoding,unsigned int depth) const6367 	PUGI__FN void xml_node::print(std::basic_ostream<char, std::char_traits<char> >& stream, const char_t* indent, unsigned int flags, xml_encoding encoding, unsigned int depth) const
6368 	{
6369 		xml_writer_stream writer(stream);
6370 
6371 		print(writer, indent, flags, encoding, depth);
6372 	}
6373 
print(std::basic_ostream<wchar_t,std::char_traits<wchar_t>> & stream,const char_t * indent,unsigned int flags,unsigned int depth) const6374 	PUGI__FN void xml_node::print(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream, const char_t* indent, unsigned int flags, unsigned int depth) const
6375 	{
6376 		xml_writer_stream writer(stream);
6377 
6378 		print(writer, indent, flags, encoding_wchar, depth);
6379 	}
6380 #endif
6381 
offset_debug() const6382 	PUGI__FN ptrdiff_t xml_node::offset_debug() const
6383 	{
6384 		if (!_root) return -1;
6385 
6386 		impl::xml_document_struct& doc = impl::get_document(_root);
6387 
6388 		// we can determine the offset reliably only if there is exactly once parse buffer
6389 		if (!doc.buffer || doc.extra_buffers) return -1;
6390 
6391 		switch (type())
6392 		{
6393 		case node_document:
6394 			return 0;
6395 
6396 		case node_element:
6397 		case node_declaration:
6398 		case node_pi:
6399 			return _root->name && (_root->header & impl::xml_memory_page_name_allocated_or_shared_mask) == 0 ? _root->name - doc.buffer : -1;
6400 
6401 		case node_pcdata:
6402 		case node_cdata:
6403 		case node_comment:
6404 		case node_doctype:
6405 			return _root->value && (_root->header & impl::xml_memory_page_value_allocated_or_shared_mask) == 0 ? _root->value - doc.buffer : -1;
6406 
6407 		default:
6408 			assert(false && "Invalid node type"); // unreachable
6409 			return -1;
6410 		}
6411 	}
6412 
6413 #ifdef __BORLANDC__
operator &&(const xml_node & lhs,bool rhs)6414 	PUGI__FN bool operator&&(const xml_node& lhs, bool rhs)
6415 	{
6416 		return (bool)lhs && rhs;
6417 	}
6418 
operator ||(const xml_node & lhs,bool rhs)6419 	PUGI__FN bool operator||(const xml_node& lhs, bool rhs)
6420 	{
6421 		return (bool)lhs || rhs;
6422 	}
6423 #endif
6424 
xml_text(xml_node_struct * root)6425 	PUGI__FN xml_text::xml_text(xml_node_struct* root): _root(root)
6426 	{
6427 	}
6428 
_data() const6429 	PUGI__FN xml_node_struct* xml_text::_data() const
6430 	{
6431 		if (!_root || impl::is_text_node(_root)) return _root;
6432 
6433 		// element nodes can have value if parse_embed_pcdata was used
6434 		if (PUGI__NODETYPE(_root) == node_element && _root->value)
6435 			return _root;
6436 
6437 		for (xml_node_struct* node = _root->first_child; node; node = node->next_sibling)
6438 			if (impl::is_text_node(node))
6439 				return node;
6440 
6441 		return 0;
6442 	}
6443 
_data_new()6444 	PUGI__FN xml_node_struct* xml_text::_data_new()
6445 	{
6446 		xml_node_struct* d = _data();
6447 		if (d) return d;
6448 
6449 		return xml_node(_root).append_child(node_pcdata).internal_object();
6450 	}
6451 
xml_text()6452 	PUGI__FN xml_text::xml_text(): _root(0)
6453 	{
6454 	}
6455 
unspecified_bool_xml_text(xml_text ***)6456 	PUGI__FN static void unspecified_bool_xml_text(xml_text***)
6457 	{
6458 	}
6459 
operator xml_text::unspecified_bool_type() const6460 	PUGI__FN xml_text::operator xml_text::unspecified_bool_type() const
6461 	{
6462 		return _data() ? unspecified_bool_xml_text : 0;
6463 	}
6464 
operator !() const6465 	PUGI__FN bool xml_text::operator!() const
6466 	{
6467 		return !_data();
6468 	}
6469 
empty() const6470 	PUGI__FN bool xml_text::empty() const
6471 	{
6472 		return _data() == 0;
6473 	}
6474 
get() const6475 	PUGI__FN const char_t* xml_text::get() const
6476 	{
6477 		xml_node_struct* d = _data();
6478 
6479 		return (d && d->value) ? d->value + 0 : PUGIXML_TEXT("");
6480 	}
6481 
as_string(const char_t * def) const6482 	PUGI__FN const char_t* xml_text::as_string(const char_t* def) const
6483 	{
6484 		xml_node_struct* d = _data();
6485 
6486 		return (d && d->value) ? d->value + 0 : def;
6487 	}
6488 
as_int(int def) const6489 	PUGI__FN int xml_text::as_int(int def) const
6490 	{
6491 		xml_node_struct* d = _data();
6492 
6493 		return (d && d->value) ? impl::get_value_int(d->value) : def;
6494 	}
6495 
as_uint(unsigned int def) const6496 	PUGI__FN unsigned int xml_text::as_uint(unsigned int def) const
6497 	{
6498 		xml_node_struct* d = _data();
6499 
6500 		return (d && d->value) ? impl::get_value_uint(d->value) : def;
6501 	}
6502 
as_double(double def) const6503 	PUGI__FN double xml_text::as_double(double def) const
6504 	{
6505 		xml_node_struct* d = _data();
6506 
6507 		return (d && d->value) ? impl::get_value_double(d->value) : def;
6508 	}
6509 
as_float(float def) const6510 	PUGI__FN float xml_text::as_float(float def) const
6511 	{
6512 		xml_node_struct* d = _data();
6513 
6514 		return (d && d->value) ? impl::get_value_float(d->value) : def;
6515 	}
6516 
as_bool(bool def) const6517 	PUGI__FN bool xml_text::as_bool(bool def) const
6518 	{
6519 		xml_node_struct* d = _data();
6520 
6521 		return (d && d->value) ? impl::get_value_bool(d->value) : def;
6522 	}
6523 
6524 #ifdef PUGIXML_HAS_LONG_LONG
as_llong(long long def) const6525 	PUGI__FN long long xml_text::as_llong(long long def) const
6526 	{
6527 		xml_node_struct* d = _data();
6528 
6529 		return (d && d->value) ? impl::get_value_llong(d->value) : def;
6530 	}
6531 
as_ullong(unsigned long long def) const6532 	PUGI__FN unsigned long long xml_text::as_ullong(unsigned long long def) const
6533 	{
6534 		xml_node_struct* d = _data();
6535 
6536 		return (d && d->value) ? impl::get_value_ullong(d->value) : def;
6537 	}
6538 #endif
6539 
set(const char_t * rhs)6540 	PUGI__FN bool xml_text::set(const char_t* rhs)
6541 	{
6542 		xml_node_struct* dn = _data_new();
6543 
6544 		return dn ? impl::strcpy_insitu(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, impl::strlength(rhs)) : false;
6545 	}
6546 
set(int rhs)6547 	PUGI__FN bool xml_text::set(int rhs)
6548 	{
6549 		xml_node_struct* dn = _data_new();
6550 
6551 		return dn ? impl::set_value_integer<unsigned int>(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0) : false;
6552 	}
6553 
set(unsigned int rhs)6554 	PUGI__FN bool xml_text::set(unsigned int rhs)
6555 	{
6556 		xml_node_struct* dn = _data_new();
6557 
6558 		return dn ? impl::set_value_integer<unsigned int>(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, false) : false;
6559 	}
6560 
set(long rhs)6561 	PUGI__FN bool xml_text::set(long rhs)
6562 	{
6563 		xml_node_struct* dn = _data_new();
6564 
6565 		return dn ? impl::set_value_integer<unsigned long>(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0) : false;
6566 	}
6567 
set(unsigned long rhs)6568 	PUGI__FN bool xml_text::set(unsigned long rhs)
6569 	{
6570 		xml_node_struct* dn = _data_new();
6571 
6572 		return dn ? impl::set_value_integer<unsigned long>(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, false) : false;
6573 	}
6574 
set(float rhs)6575 	PUGI__FN bool xml_text::set(float rhs)
6576 	{
6577 		xml_node_struct* dn = _data_new();
6578 
6579 		return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, default_float_precision) : false;
6580 	}
6581 
set(float rhs,int precision)6582 	PUGI__FN bool xml_text::set(float rhs, int precision)
6583 	{
6584 		xml_node_struct* dn = _data_new();
6585 
6586 		return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, precision) : false;
6587 	}
6588 
set(double rhs)6589 	PUGI__FN bool xml_text::set(double rhs)
6590 	{
6591 		xml_node_struct* dn = _data_new();
6592 
6593 		return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, default_double_precision) : false;
6594 	}
6595 
set(double rhs,int precision)6596 	PUGI__FN bool xml_text::set(double rhs, int precision)
6597 	{
6598 		xml_node_struct* dn = _data_new();
6599 
6600 		return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, precision) : false;
6601 	}
6602 
set(bool rhs)6603 	PUGI__FN bool xml_text::set(bool rhs)
6604 	{
6605 		xml_node_struct* dn = _data_new();
6606 
6607 		return dn ? impl::set_value_bool(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false;
6608 	}
6609 
6610 #ifdef PUGIXML_HAS_LONG_LONG
set(long long rhs)6611 	PUGI__FN bool xml_text::set(long long rhs)
6612 	{
6613 		xml_node_struct* dn = _data_new();
6614 
6615 		return dn ? impl::set_value_integer<unsigned long long>(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0) : false;
6616 	}
6617 
set(unsigned long long rhs)6618 	PUGI__FN bool xml_text::set(unsigned long long rhs)
6619 	{
6620 		xml_node_struct* dn = _data_new();
6621 
6622 		return dn ? impl::set_value_integer<unsigned long long>(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, false) : false;
6623 	}
6624 #endif
6625 
operator =(const char_t * rhs)6626 	PUGI__FN xml_text& xml_text::operator=(const char_t* rhs)
6627 	{
6628 		set(rhs);
6629 		return *this;
6630 	}
6631 
operator =(int rhs)6632 	PUGI__FN xml_text& xml_text::operator=(int rhs)
6633 	{
6634 		set(rhs);
6635 		return *this;
6636 	}
6637 
operator =(unsigned int rhs)6638 	PUGI__FN xml_text& xml_text::operator=(unsigned int rhs)
6639 	{
6640 		set(rhs);
6641 		return *this;
6642 	}
6643 
operator =(long rhs)6644 	PUGI__FN xml_text& xml_text::operator=(long rhs)
6645 	{
6646 		set(rhs);
6647 		return *this;
6648 	}
6649 
operator =(unsigned long rhs)6650 	PUGI__FN xml_text& xml_text::operator=(unsigned long rhs)
6651 	{
6652 		set(rhs);
6653 		return *this;
6654 	}
6655 
operator =(double rhs)6656 	PUGI__FN xml_text& xml_text::operator=(double rhs)
6657 	{
6658 		set(rhs);
6659 		return *this;
6660 	}
6661 
operator =(float rhs)6662 	PUGI__FN xml_text& xml_text::operator=(float rhs)
6663 	{
6664 		set(rhs);
6665 		return *this;
6666 	}
6667 
operator =(bool rhs)6668 	PUGI__FN xml_text& xml_text::operator=(bool rhs)
6669 	{
6670 		set(rhs);
6671 		return *this;
6672 	}
6673 
6674 #ifdef PUGIXML_HAS_LONG_LONG
operator =(long long rhs)6675 	PUGI__FN xml_text& xml_text::operator=(long long rhs)
6676 	{
6677 		set(rhs);
6678 		return *this;
6679 	}
6680 
operator =(unsigned long long rhs)6681 	PUGI__FN xml_text& xml_text::operator=(unsigned long long rhs)
6682 	{
6683 		set(rhs);
6684 		return *this;
6685 	}
6686 #endif
6687 
data() const6688 	PUGI__FN xml_node xml_text::data() const
6689 	{
6690 		return xml_node(_data());
6691 	}
6692 
6693 #ifdef __BORLANDC__
operator &&(const xml_text & lhs,bool rhs)6694 	PUGI__FN bool operator&&(const xml_text& lhs, bool rhs)
6695 	{
6696 		return (bool)lhs && rhs;
6697 	}
6698 
operator ||(const xml_text & lhs,bool rhs)6699 	PUGI__FN bool operator||(const xml_text& lhs, bool rhs)
6700 	{
6701 		return (bool)lhs || rhs;
6702 	}
6703 #endif
6704 
xml_node_iterator()6705 	PUGI__FN xml_node_iterator::xml_node_iterator()
6706 	{
6707 	}
6708 
xml_node_iterator(const xml_node & node)6709 	PUGI__FN xml_node_iterator::xml_node_iterator(const xml_node& node): _wrap(node), _parent(node.parent())
6710 	{
6711 	}
6712 
xml_node_iterator(xml_node_struct * ref,xml_node_struct * parent)6713 	PUGI__FN xml_node_iterator::xml_node_iterator(xml_node_struct* ref, xml_node_struct* parent): _wrap(ref), _parent(parent)
6714 	{
6715 	}
6716 
operator ==(const xml_node_iterator & rhs) const6717 	PUGI__FN bool xml_node_iterator::operator==(const xml_node_iterator& rhs) const
6718 	{
6719 		return _wrap._root == rhs._wrap._root && _parent._root == rhs._parent._root;
6720 	}
6721 
operator !=(const xml_node_iterator & rhs) const6722 	PUGI__FN bool xml_node_iterator::operator!=(const xml_node_iterator& rhs) const
6723 	{
6724 		return _wrap._root != rhs._wrap._root || _parent._root != rhs._parent._root;
6725 	}
6726 
operator *() const6727 	PUGI__FN xml_node& xml_node_iterator::operator*() const
6728 	{
6729 		assert(_wrap._root);
6730 		return _wrap;
6731 	}
6732 
operator ->() const6733 	PUGI__FN xml_node* xml_node_iterator::operator->() const
6734 	{
6735 		assert(_wrap._root);
6736 		return const_cast<xml_node*>(&_wrap); // BCC5 workaround
6737 	}
6738 
operator ++()6739 	PUGI__FN xml_node_iterator& xml_node_iterator::operator++()
6740 	{
6741 		assert(_wrap._root);
6742 		_wrap._root = _wrap._root->next_sibling;
6743 		return *this;
6744 	}
6745 
operator ++(int)6746 	PUGI__FN xml_node_iterator xml_node_iterator::operator++(int)
6747 	{
6748 		xml_node_iterator temp = *this;
6749 		++*this;
6750 		return temp;
6751 	}
6752 
operator --()6753 	PUGI__FN xml_node_iterator& xml_node_iterator::operator--()
6754 	{
6755 		_wrap = _wrap._root ? _wrap.previous_sibling() : _parent.last_child();
6756 		return *this;
6757 	}
6758 
operator --(int)6759 	PUGI__FN xml_node_iterator xml_node_iterator::operator--(int)
6760 	{
6761 		xml_node_iterator temp = *this;
6762 		--*this;
6763 		return temp;
6764 	}
6765 
xml_attribute_iterator()6766 	PUGI__FN xml_attribute_iterator::xml_attribute_iterator()
6767 	{
6768 	}
6769 
xml_attribute_iterator(const xml_attribute & attr,const xml_node & parent)6770 	PUGI__FN xml_attribute_iterator::xml_attribute_iterator(const xml_attribute& attr, const xml_node& parent): _wrap(attr), _parent(parent)
6771 	{
6772 	}
6773 
xml_attribute_iterator(xml_attribute_struct * ref,xml_node_struct * parent)6774 	PUGI__FN xml_attribute_iterator::xml_attribute_iterator(xml_attribute_struct* ref, xml_node_struct* parent): _wrap(ref), _parent(parent)
6775 	{
6776 	}
6777 
operator ==(const xml_attribute_iterator & rhs) const6778 	PUGI__FN bool xml_attribute_iterator::operator==(const xml_attribute_iterator& rhs) const
6779 	{
6780 		return _wrap._attr == rhs._wrap._attr && _parent._root == rhs._parent._root;
6781 	}
6782 
operator !=(const xml_attribute_iterator & rhs) const6783 	PUGI__FN bool xml_attribute_iterator::operator!=(const xml_attribute_iterator& rhs) const
6784 	{
6785 		return _wrap._attr != rhs._wrap._attr || _parent._root != rhs._parent._root;
6786 	}
6787 
operator *() const6788 	PUGI__FN xml_attribute& xml_attribute_iterator::operator*() const
6789 	{
6790 		assert(_wrap._attr);
6791 		return _wrap;
6792 	}
6793 
operator ->() const6794 	PUGI__FN xml_attribute* xml_attribute_iterator::operator->() const
6795 	{
6796 		assert(_wrap._attr);
6797 		return const_cast<xml_attribute*>(&_wrap); // BCC5 workaround
6798 	}
6799 
operator ++()6800 	PUGI__FN xml_attribute_iterator& xml_attribute_iterator::operator++()
6801 	{
6802 		assert(_wrap._attr);
6803 		_wrap._attr = _wrap._attr->next_attribute;
6804 		return *this;
6805 	}
6806 
operator ++(int)6807 	PUGI__FN xml_attribute_iterator xml_attribute_iterator::operator++(int)
6808 	{
6809 		xml_attribute_iterator temp = *this;
6810 		++*this;
6811 		return temp;
6812 	}
6813 
operator --()6814 	PUGI__FN xml_attribute_iterator& xml_attribute_iterator::operator--()
6815 	{
6816 		_wrap = _wrap._attr ? _wrap.previous_attribute() : _parent.last_attribute();
6817 		return *this;
6818 	}
6819 
operator --(int)6820 	PUGI__FN xml_attribute_iterator xml_attribute_iterator::operator--(int)
6821 	{
6822 		xml_attribute_iterator temp = *this;
6823 		--*this;
6824 		return temp;
6825 	}
6826 
xml_named_node_iterator()6827 	PUGI__FN xml_named_node_iterator::xml_named_node_iterator(): _name(0)
6828 	{
6829 	}
6830 
xml_named_node_iterator(const xml_node & node,const char_t * name)6831 	PUGI__FN xml_named_node_iterator::xml_named_node_iterator(const xml_node& node, const char_t* name): _wrap(node), _parent(node.parent()), _name(name)
6832 	{
6833 	}
6834 
xml_named_node_iterator(xml_node_struct * ref,xml_node_struct * parent,const char_t * name)6835 	PUGI__FN xml_named_node_iterator::xml_named_node_iterator(xml_node_struct* ref, xml_node_struct* parent, const char_t* name): _wrap(ref), _parent(parent), _name(name)
6836 	{
6837 	}
6838 
operator ==(const xml_named_node_iterator & rhs) const6839 	PUGI__FN bool xml_named_node_iterator::operator==(const xml_named_node_iterator& rhs) const
6840 	{
6841 		return _wrap._root == rhs._wrap._root && _parent._root == rhs._parent._root;
6842 	}
6843 
operator !=(const xml_named_node_iterator & rhs) const6844 	PUGI__FN bool xml_named_node_iterator::operator!=(const xml_named_node_iterator& rhs) const
6845 	{
6846 		return _wrap._root != rhs._wrap._root || _parent._root != rhs._parent._root;
6847 	}
6848 
operator *() const6849 	PUGI__FN xml_node& xml_named_node_iterator::operator*() const
6850 	{
6851 		assert(_wrap._root);
6852 		return _wrap;
6853 	}
6854 
operator ->() const6855 	PUGI__FN xml_node* xml_named_node_iterator::operator->() const
6856 	{
6857 		assert(_wrap._root);
6858 		return const_cast<xml_node*>(&_wrap); // BCC5 workaround
6859 	}
6860 
operator ++()6861 	PUGI__FN xml_named_node_iterator& xml_named_node_iterator::operator++()
6862 	{
6863 		assert(_wrap._root);
6864 		_wrap = _wrap.next_sibling(_name);
6865 		return *this;
6866 	}
6867 
operator ++(int)6868 	PUGI__FN xml_named_node_iterator xml_named_node_iterator::operator++(int)
6869 	{
6870 		xml_named_node_iterator temp = *this;
6871 		++*this;
6872 		return temp;
6873 	}
6874 
operator --()6875 	PUGI__FN xml_named_node_iterator& xml_named_node_iterator::operator--()
6876 	{
6877 		if (_wrap._root)
6878 			_wrap = _wrap.previous_sibling(_name);
6879 		else
6880 		{
6881 			_wrap = _parent.last_child();
6882 
6883 			if (!impl::strequal(_wrap.name(), _name))
6884 				_wrap = _wrap.previous_sibling(_name);
6885 		}
6886 
6887 		return *this;
6888 	}
6889 
operator --(int)6890 	PUGI__FN xml_named_node_iterator xml_named_node_iterator::operator--(int)
6891 	{
6892 		xml_named_node_iterator temp = *this;
6893 		--*this;
6894 		return temp;
6895 	}
6896 
xml_parse_result()6897 	PUGI__FN xml_parse_result::xml_parse_result(): status(status_internal_error), offset(0), encoding(encoding_auto)
6898 	{
6899 	}
6900 
operator bool() const6901 	PUGI__FN xml_parse_result::operator bool() const
6902 	{
6903 		return status == status_ok;
6904 	}
6905 
description() const6906 	PUGI__FN const char* xml_parse_result::description() const
6907 	{
6908 		switch (status)
6909 		{
6910 		case status_ok: return "No error";
6911 
6912 		case status_file_not_found: return "File was not found";
6913 		case status_io_error: return "Error reading from file/stream";
6914 		case status_out_of_memory: return "Could not allocate memory";
6915 		case status_internal_error: return "Internal error occurred";
6916 
6917 		case status_unrecognized_tag: return "Could not determine tag type";
6918 
6919 		case status_bad_pi: return "Error parsing document declaration/processing instruction";
6920 		case status_bad_comment: return "Error parsing comment";
6921 		case status_bad_cdata: return "Error parsing CDATA section";
6922 		case status_bad_doctype: return "Error parsing document type declaration";
6923 		case status_bad_pcdata: return "Error parsing PCDATA section";
6924 		case status_bad_start_element: return "Error parsing start element tag";
6925 		case status_bad_attribute: return "Error parsing element attribute";
6926 		case status_bad_end_element: return "Error parsing end element tag";
6927 		case status_end_element_mismatch: return "Start-end tags mismatch";
6928 
6929 		case status_append_invalid_root: return "Unable to append nodes: root is not an element or document";
6930 
6931 		case status_no_document_element: return "No document element found";
6932 
6933 		default: return "Unknown error";
6934 		}
6935 	}
6936 
xml_document()6937 	PUGI__FN xml_document::xml_document(): _buffer(0)
6938 	{
6939 		_create();
6940 	}
6941 
~xml_document()6942 	PUGI__FN xml_document::~xml_document()
6943 	{
6944 		_destroy();
6945 	}
6946 
6947 #ifdef PUGIXML_HAS_MOVE
xml_document(xml_document && rhs)6948 	PUGI__FN xml_document::xml_document(xml_document&& rhs) PUGIXML_NOEXCEPT_IF_NOT_COMPACT: _buffer(0)
6949 	{
6950 		_create();
6951 		_move(rhs);
6952 	}
6953 
operator =(xml_document && rhs)6954 	PUGI__FN xml_document& xml_document::operator=(xml_document&& rhs) PUGIXML_NOEXCEPT_IF_NOT_COMPACT
6955 	{
6956 		if (this == &rhs) return *this;
6957 
6958 		_destroy();
6959 		_create();
6960 		_move(rhs);
6961 
6962 		return *this;
6963 	}
6964 #endif
6965 
reset()6966 	PUGI__FN void xml_document::reset()
6967 	{
6968 		_destroy();
6969 		_create();
6970 	}
6971 
reset(const xml_document & proto)6972 	PUGI__FN void xml_document::reset(const xml_document& proto)
6973 	{
6974 		reset();
6975 
6976 		impl::node_copy_tree(_root, proto._root);
6977 	}
6978 
_create()6979 	PUGI__FN void xml_document::_create()
6980 	{
6981 		assert(!_root);
6982 
6983 	#ifdef PUGIXML_COMPACT
6984 		// space for page marker for the first page (uint32_t), rounded up to pointer size; assumes pointers are at least 32-bit
6985 		const size_t page_offset = sizeof(void*);
6986 	#else
6987 		const size_t page_offset = 0;
6988 	#endif
6989 
6990 		// initialize sentinel page
6991 		PUGI__STATIC_ASSERT(sizeof(impl::xml_memory_page) + sizeof(impl::xml_document_struct) + page_offset <= sizeof(_memory));
6992 
6993 		// prepare page structure
6994 		impl::xml_memory_page* page = impl::xml_memory_page::construct(_memory);
6995 		assert(page);
6996 
6997 		page->busy_size = impl::xml_memory_page_size;
6998 
6999 		// setup first page marker
7000 	#ifdef PUGIXML_COMPACT
7001 		// round-trip through void* to avoid 'cast increases required alignment of target type' warning
7002 		page->compact_page_marker = reinterpret_cast<uint32_t*>(static_cast<void*>(reinterpret_cast<char*>(page) + sizeof(impl::xml_memory_page)));
7003 		*page->compact_page_marker = sizeof(impl::xml_memory_page);
7004 	#endif
7005 
7006 		// allocate new root
7007 		_root = new (reinterpret_cast<char*>(page) + sizeof(impl::xml_memory_page) + page_offset) impl::xml_document_struct(page);
7008 		_root->prev_sibling_c = _root;
7009 
7010 		// setup sentinel page
7011 		page->allocator = static_cast<impl::xml_document_struct*>(_root);
7012 
7013 		// setup hash table pointer in allocator
7014 	#ifdef PUGIXML_COMPACT
7015 		page->allocator->_hash = &static_cast<impl::xml_document_struct*>(_root)->hash;
7016 	#endif
7017 
7018 		// verify the document allocation
7019 		assert(reinterpret_cast<char*>(_root) + sizeof(impl::xml_document_struct) <= _memory + sizeof(_memory));
7020 	}
7021 
_destroy()7022 	PUGI__FN void xml_document::_destroy()
7023 	{
7024 		assert(_root);
7025 
7026 		// destroy static storage
7027 		if (_buffer)
7028 		{
7029 			impl::xml_memory::deallocate(_buffer);
7030 			_buffer = 0;
7031 		}
7032 
7033 		// destroy extra buffers (note: no need to destroy linked list nodes, they're allocated using document allocator)
7034 		for (impl::xml_extra_buffer* extra = static_cast<impl::xml_document_struct*>(_root)->extra_buffers; extra; extra = extra->next)
7035 		{
7036 			if (extra->buffer) impl::xml_memory::deallocate(extra->buffer);
7037 		}
7038 
7039 		// destroy dynamic storage, leave sentinel page (it's in static memory)
7040 		impl::xml_memory_page* root_page = PUGI__GETPAGE(_root);
7041 		assert(root_page && !root_page->prev);
7042 		assert(reinterpret_cast<char*>(root_page) >= _memory && reinterpret_cast<char*>(root_page) < _memory + sizeof(_memory));
7043 
7044 		for (impl::xml_memory_page* page = root_page->next; page; )
7045 		{
7046 			impl::xml_memory_page* next = page->next;
7047 
7048 			impl::xml_allocator::deallocate_page(page);
7049 
7050 			page = next;
7051 		}
7052 
7053 	#ifdef PUGIXML_COMPACT
7054 		// destroy hash table
7055 		static_cast<impl::xml_document_struct*>(_root)->hash.clear();
7056 	#endif
7057 
7058 		_root = 0;
7059 	}
7060 
7061 #ifdef PUGIXML_HAS_MOVE
_move(xml_document & rhs)7062 	PUGI__FN void xml_document::_move(xml_document& rhs) PUGIXML_NOEXCEPT_IF_NOT_COMPACT
7063 	{
7064 		impl::xml_document_struct* doc = static_cast<impl::xml_document_struct*>(_root);
7065 		impl::xml_document_struct* other = static_cast<impl::xml_document_struct*>(rhs._root);
7066 
7067 		// save first child pointer for later; this needs hash access
7068 		xml_node_struct* other_first_child = other->first_child;
7069 
7070 	#ifdef PUGIXML_COMPACT
7071 		// reserve space for the hash table up front; this is the only operation that can fail
7072 		// if it does, we have no choice but to throw (if we have exceptions)
7073 		if (other_first_child)
7074 		{
7075 			size_t other_children = 0;
7076 			for (xml_node_struct* node = other_first_child; node; node = node->next_sibling)
7077 				other_children++;
7078 
7079 			// in compact mode, each pointer assignment could result in a hash table request
7080 			// during move, we have to relocate document first_child and parents of all children
7081 			// normally there's just one child and its parent has a pointerless encoding but
7082 			// we assume the worst here
7083 			if (!other->_hash->reserve(other_children + 1))
7084 			{
7085 			#ifdef PUGIXML_NO_EXCEPTIONS
7086 				return;
7087 			#else
7088 				throw std::bad_alloc();
7089 			#endif
7090 			}
7091 		}
7092 	#endif
7093 
7094 		// move allocation state
7095 		// note that other->_root may point to the embedded document page, in which case we should keep original (empty) state
7096 		if (other->_root != PUGI__GETPAGE(other))
7097 		{
7098 			doc->_root = other->_root;
7099 			doc->_busy_size = other->_busy_size;
7100 		}
7101 
7102 		// move buffer state
7103 		doc->buffer = other->buffer;
7104 		doc->extra_buffers = other->extra_buffers;
7105 		_buffer = rhs._buffer;
7106 
7107 	#ifdef PUGIXML_COMPACT
7108 		// move compact hash; note that the hash table can have pointers to other but they will be "inactive", similarly to nodes removed with remove_child
7109 		doc->hash = other->hash;
7110 		doc->_hash = &doc->hash;
7111 
7112 		// make sure we don't access other hash up until the end when we reinitialize other document
7113 		other->_hash = 0;
7114 	#endif
7115 
7116 		// move page structure
7117 		impl::xml_memory_page* doc_page = PUGI__GETPAGE(doc);
7118 		assert(doc_page && !doc_page->prev && !doc_page->next);
7119 
7120 		impl::xml_memory_page* other_page = PUGI__GETPAGE(other);
7121 		assert(other_page && !other_page->prev);
7122 
7123 		// relink pages since root page is embedded into xml_document
7124 		if (impl::xml_memory_page* page = other_page->next)
7125 		{
7126 			assert(page->prev == other_page);
7127 
7128 			page->prev = doc_page;
7129 
7130 			doc_page->next = page;
7131 			other_page->next = 0;
7132 		}
7133 
7134 		// make sure pages point to the correct document state
7135 		for (impl::xml_memory_page* page = doc_page->next; page; page = page->next)
7136 		{
7137 			assert(page->allocator == other);
7138 
7139 			page->allocator = doc;
7140 
7141 		#ifdef PUGIXML_COMPACT
7142 			// this automatically migrates most children between documents and prevents ->parent assignment from allocating
7143 			if (page->compact_shared_parent == other)
7144 				page->compact_shared_parent = doc;
7145 		#endif
7146 		}
7147 
7148 		// move tree structure
7149 		assert(!doc->first_child);
7150 
7151 		doc->first_child = other_first_child;
7152 
7153 		for (xml_node_struct* node = other_first_child; node; node = node->next_sibling)
7154 		{
7155 		#ifdef PUGIXML_COMPACT
7156 			// most children will have migrated when we reassigned compact_shared_parent
7157 			assert(node->parent == other || node->parent == doc);
7158 
7159 			node->parent = doc;
7160 		#else
7161 			assert(node->parent == other);
7162 			node->parent = doc;
7163 		#endif
7164 		}
7165 
7166 		// reset other document
7167 		new (other) impl::xml_document_struct(PUGI__GETPAGE(other));
7168 		rhs._buffer = 0;
7169 	}
7170 #endif
7171 
7172 #ifndef PUGIXML_NO_STL
load(std::basic_istream<char,std::char_traits<char>> & stream,unsigned int options,xml_encoding encoding)7173 	PUGI__FN xml_parse_result xml_document::load(std::basic_istream<char, std::char_traits<char> >& stream, unsigned int options, xml_encoding encoding)
7174 	{
7175 		reset();
7176 
7177 		return impl::load_stream_impl(static_cast<impl::xml_document_struct*>(_root), stream, options, encoding, &_buffer);
7178 	}
7179 
load(std::basic_istream<wchar_t,std::char_traits<wchar_t>> & stream,unsigned int options)7180 	PUGI__FN xml_parse_result xml_document::load(std::basic_istream<wchar_t, std::char_traits<wchar_t> >& stream, unsigned int options)
7181 	{
7182 		reset();
7183 
7184 		return impl::load_stream_impl(static_cast<impl::xml_document_struct*>(_root), stream, options, encoding_wchar, &_buffer);
7185 	}
7186 #endif
7187 
load_string(const char_t * contents,unsigned int options)7188 	PUGI__FN xml_parse_result xml_document::load_string(const char_t* contents, unsigned int options)
7189 	{
7190 		// Force native encoding (skip autodetection)
7191 	#ifdef PUGIXML_WCHAR_MODE
7192 		xml_encoding encoding = encoding_wchar;
7193 	#else
7194 		xml_encoding encoding = encoding_utf8;
7195 	#endif
7196 
7197 		return load_buffer(contents, impl::strlength(contents) * sizeof(char_t), options, encoding);
7198 	}
7199 
load(const char_t * contents,unsigned int options)7200 	PUGI__FN xml_parse_result xml_document::load(const char_t* contents, unsigned int options)
7201 	{
7202 		return load_string(contents, options);
7203 	}
7204 
load_file(const char * path_,unsigned int options,xml_encoding encoding)7205 	PUGI__FN xml_parse_result xml_document::load_file(const char* path_, unsigned int options, xml_encoding encoding)
7206 	{
7207 		reset();
7208 
7209 		using impl::auto_deleter; // MSVC7 workaround
7210 		auto_deleter<FILE> file(impl::open_file(path_, "rb"), impl::close_file);
7211 
7212 		return impl::load_file_impl(static_cast<impl::xml_document_struct*>(_root), file.data, options, encoding, &_buffer);
7213 	}
7214 
load_file(const wchar_t * path_,unsigned int options,xml_encoding encoding)7215 	PUGI__FN xml_parse_result xml_document::load_file(const wchar_t* path_, unsigned int options, xml_encoding encoding)
7216 	{
7217 		reset();
7218 
7219 		using impl::auto_deleter; // MSVC7 workaround
7220 		auto_deleter<FILE> file(impl::open_file_wide(path_, L"rb"), impl::close_file);
7221 
7222 		return impl::load_file_impl(static_cast<impl::xml_document_struct*>(_root), file.data, options, encoding, &_buffer);
7223 	}
7224 
load_buffer(const void * contents,size_t size,unsigned int options,xml_encoding encoding)7225 	PUGI__FN xml_parse_result xml_document::load_buffer(const void* contents, size_t size, unsigned int options, xml_encoding encoding)
7226 	{
7227 		reset();
7228 
7229 		return impl::load_buffer_impl(static_cast<impl::xml_document_struct*>(_root), _root, const_cast<void*>(contents), size, options, encoding, false, false, &_buffer);
7230 	}
7231 
load_buffer_inplace(void * contents,size_t size,unsigned int options,xml_encoding encoding)7232 	PUGI__FN xml_parse_result xml_document::load_buffer_inplace(void* contents, size_t size, unsigned int options, xml_encoding encoding)
7233 	{
7234 		reset();
7235 
7236 		return impl::load_buffer_impl(static_cast<impl::xml_document_struct*>(_root), _root, contents, size, options, encoding, true, false, &_buffer);
7237 	}
7238 
load_buffer_inplace_own(void * contents,size_t size,unsigned int options,xml_encoding encoding)7239 	PUGI__FN xml_parse_result xml_document::load_buffer_inplace_own(void* contents, size_t size, unsigned int options, xml_encoding encoding)
7240 	{
7241 		reset();
7242 
7243 		return impl::load_buffer_impl(static_cast<impl::xml_document_struct*>(_root), _root, contents, size, options, encoding, true, true, &_buffer);
7244 	}
7245 
save(xml_writer & writer,const char_t * indent,unsigned int flags,xml_encoding encoding) const7246 	PUGI__FN void xml_document::save(xml_writer& writer, const char_t* indent, unsigned int flags, xml_encoding encoding) const
7247 	{
7248 		impl::xml_buffered_writer buffered_writer(writer, encoding);
7249 
7250 		if ((flags & format_write_bom) && encoding != encoding_latin1)
7251 		{
7252 			// BOM always represents the codepoint U+FEFF, so just write it in native encoding
7253 		#ifdef PUGIXML_WCHAR_MODE
7254 			unsigned int bom = 0xfeff;
7255 			buffered_writer.write(static_cast<wchar_t>(bom));
7256 		#else
7257 			buffered_writer.write('\xef', '\xbb', '\xbf');
7258 		#endif
7259 		}
7260 
7261 		if (!(flags & format_no_declaration) && !impl::has_declaration(_root))
7262 		{
7263 			buffered_writer.write_string(PUGIXML_TEXT("<?xml version=\"1.0\""));
7264 			if (encoding == encoding_latin1) buffered_writer.write_string(PUGIXML_TEXT(" encoding=\"ISO-8859-1\""));
7265 			buffered_writer.write('?', '>');
7266 			if (!(flags & format_raw)) buffered_writer.write('\n');
7267 		}
7268 
7269 		impl::node_output(buffered_writer, _root, indent, flags, 0);
7270 
7271 		buffered_writer.flush();
7272 	}
7273 
7274 #ifndef PUGIXML_NO_STL
save(std::basic_ostream<char,std::char_traits<char>> & stream,const char_t * indent,unsigned int flags,xml_encoding encoding) const7275 	PUGI__FN void xml_document::save(std::basic_ostream<char, std::char_traits<char> >& stream, const char_t* indent, unsigned int flags, xml_encoding encoding) const
7276 	{
7277 		xml_writer_stream writer(stream);
7278 
7279 		save(writer, indent, flags, encoding);
7280 	}
7281 
save(std::basic_ostream<wchar_t,std::char_traits<wchar_t>> & stream,const char_t * indent,unsigned int flags) const7282 	PUGI__FN void xml_document::save(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream, const char_t* indent, unsigned int flags) const
7283 	{
7284 		xml_writer_stream writer(stream);
7285 
7286 		save(writer, indent, flags, encoding_wchar);
7287 	}
7288 #endif
7289 
save_file(const char * path_,const char_t * indent,unsigned int flags,xml_encoding encoding) const7290 	PUGI__FN bool xml_document::save_file(const char* path_, const char_t* indent, unsigned int flags, xml_encoding encoding) const
7291 	{
7292 		using impl::auto_deleter; // MSVC7 workaround
7293 		auto_deleter<FILE> file(impl::open_file(path_, (flags & format_save_file_text) ? "w" : "wb"), impl::close_file);
7294 
7295 		return impl::save_file_impl(*this, file.data, indent, flags, encoding);
7296 	}
7297 
save_file(const wchar_t * path_,const char_t * indent,unsigned int flags,xml_encoding encoding) const7298 	PUGI__FN bool xml_document::save_file(const wchar_t* path_, const char_t* indent, unsigned int flags, xml_encoding encoding) const
7299 	{
7300 		using impl::auto_deleter; // MSVC7 workaround
7301 		auto_deleter<FILE> file(impl::open_file_wide(path_, (flags & format_save_file_text) ? L"w" : L"wb"), impl::close_file);
7302 
7303 		return impl::save_file_impl(*this, file.data, indent, flags, encoding);
7304 	}
7305 
document_element() const7306 	PUGI__FN xml_node xml_document::document_element() const
7307 	{
7308 		assert(_root);
7309 
7310 		for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
7311 			if (PUGI__NODETYPE(i) == node_element)
7312 				return xml_node(i);
7313 
7314 		return xml_node();
7315 	}
7316 
7317 #ifndef PUGIXML_NO_STL
as_utf8(const wchar_t * str)7318 	PUGI__FN std::string PUGIXML_FUNCTION as_utf8(const wchar_t* str)
7319 	{
7320 		assert(str);
7321 
7322 		return impl::as_utf8_impl(str, impl::strlength_wide(str));
7323 	}
7324 
as_utf8(const std::basic_string<wchar_t> & str)7325 	PUGI__FN std::string PUGIXML_FUNCTION as_utf8(const std::basic_string<wchar_t>& str)
7326 	{
7327 		return impl::as_utf8_impl(str.c_str(), str.size());
7328 	}
7329 
as_wide(const char * str)7330 	PUGI__FN std::basic_string<wchar_t> PUGIXML_FUNCTION as_wide(const char* str)
7331 	{
7332 		assert(str);
7333 
7334 		return impl::as_wide_impl(str, strlen(str));
7335 	}
7336 
as_wide(const std::string & str)7337 	PUGI__FN std::basic_string<wchar_t> PUGIXML_FUNCTION as_wide(const std::string& str)
7338 	{
7339 		return impl::as_wide_impl(str.c_str(), str.size());
7340 	}
7341 #endif
7342 
set_memory_management_functions(allocation_function allocate,deallocation_function deallocate)7343 	PUGI__FN void PUGIXML_FUNCTION set_memory_management_functions(allocation_function allocate, deallocation_function deallocate)
7344 	{
7345 		impl::xml_memory::allocate = allocate;
7346 		impl::xml_memory::deallocate = deallocate;
7347 	}
7348 
get_memory_allocation_function()7349 	PUGI__FN allocation_function PUGIXML_FUNCTION get_memory_allocation_function()
7350 	{
7351 		return impl::xml_memory::allocate;
7352 	}
7353 
get_memory_deallocation_function()7354 	PUGI__FN deallocation_function PUGIXML_FUNCTION get_memory_deallocation_function()
7355 	{
7356 		return impl::xml_memory::deallocate;
7357 	}
7358 }
7359 
7360 #if !defined(PUGIXML_NO_STL) && (defined(_MSC_VER) || defined(__ICC))
7361 namespace std
7362 {
7363 	// Workarounds for (non-standard) iterator category detection for older versions (MSVC7/IC8 and earlier)
_Iter_cat(const pugi::xml_node_iterator &)7364 	PUGI__FN std::bidirectional_iterator_tag _Iter_cat(const pugi::xml_node_iterator&)
7365 	{
7366 		return std::bidirectional_iterator_tag();
7367 	}
7368 
_Iter_cat(const pugi::xml_attribute_iterator &)7369 	PUGI__FN std::bidirectional_iterator_tag _Iter_cat(const pugi::xml_attribute_iterator&)
7370 	{
7371 		return std::bidirectional_iterator_tag();
7372 	}
7373 
_Iter_cat(const pugi::xml_named_node_iterator &)7374 	PUGI__FN std::bidirectional_iterator_tag _Iter_cat(const pugi::xml_named_node_iterator&)
7375 	{
7376 		return std::bidirectional_iterator_tag();
7377 	}
7378 }
7379 #endif
7380 
7381 #if !defined(PUGIXML_NO_STL) && defined(__SUNPRO_CC)
7382 namespace std
7383 {
7384 	// Workarounds for (non-standard) iterator category detection
__iterator_category(const pugi::xml_node_iterator &)7385 	PUGI__FN std::bidirectional_iterator_tag __iterator_category(const pugi::xml_node_iterator&)
7386 	{
7387 		return std::bidirectional_iterator_tag();
7388 	}
7389 
__iterator_category(const pugi::xml_attribute_iterator &)7390 	PUGI__FN std::bidirectional_iterator_tag __iterator_category(const pugi::xml_attribute_iterator&)
7391 	{
7392 		return std::bidirectional_iterator_tag();
7393 	}
7394 
__iterator_category(const pugi::xml_named_node_iterator &)7395 	PUGI__FN std::bidirectional_iterator_tag __iterator_category(const pugi::xml_named_node_iterator&)
7396 	{
7397 		return std::bidirectional_iterator_tag();
7398 	}
7399 }
7400 #endif
7401 
7402 #ifndef PUGIXML_NO_XPATH
7403 // STL replacements
7404 PUGI__NS_BEGIN
7405 	struct equal_to
7406 	{
operator ()equal_to7407 		template <typename T> bool operator()(const T& lhs, const T& rhs) const
7408 		{
7409 			return lhs == rhs;
7410 		}
7411 	};
7412 
7413 	struct not_equal_to
7414 	{
operator ()not_equal_to7415 		template <typename T> bool operator()(const T& lhs, const T& rhs) const
7416 		{
7417 			return lhs != rhs;
7418 		}
7419 	};
7420 
7421 	struct less
7422 	{
operator ()less7423 		template <typename T> bool operator()(const T& lhs, const T& rhs) const
7424 		{
7425 			return lhs < rhs;
7426 		}
7427 	};
7428 
7429 	struct less_equal
7430 	{
operator ()less_equal7431 		template <typename T> bool operator()(const T& lhs, const T& rhs) const
7432 		{
7433 			return lhs <= rhs;
7434 		}
7435 	};
7436 
swap(T & lhs,T & rhs)7437 	template <typename T> inline void swap(T& lhs, T& rhs)
7438 	{
7439 		T temp = lhs;
7440 		lhs = rhs;
7441 		rhs = temp;
7442 	}
7443 
min_element(I begin,I end,const Pred & pred)7444 	template <typename I, typename Pred> PUGI__FN I min_element(I begin, I end, const Pred& pred)
7445 	{
7446 		I result = begin;
7447 
7448 		for (I it = begin + 1; it != end; ++it)
7449 			if (pred(*it, *result))
7450 				result = it;
7451 
7452 		return result;
7453 	}
7454 
reverse(I begin,I end)7455 	template <typename I> PUGI__FN void reverse(I begin, I end)
7456 	{
7457 		while (end - begin > 1)
7458 			swap(*begin++, *--end);
7459 	}
7460 
unique(I begin,I end)7461 	template <typename I> PUGI__FN I unique(I begin, I end)
7462 	{
7463 		// fast skip head
7464 		while (end - begin > 1 && *begin != *(begin + 1))
7465 			begin++;
7466 
7467 		if (begin == end)
7468 			return begin;
7469 
7470 		// last written element
7471 		I write = begin++;
7472 
7473 		// merge unique elements
7474 		while (begin != end)
7475 		{
7476 			if (*begin != *write)
7477 				*++write = *begin++;
7478 			else
7479 				begin++;
7480 		}
7481 
7482 		// past-the-end (write points to live element)
7483 		return write + 1;
7484 	}
7485 
insertion_sort(T * begin,T * end,const Pred & pred)7486 	template <typename T, typename Pred> PUGI__FN void insertion_sort(T* begin, T* end, const Pred& pred)
7487 	{
7488 		if (begin == end)
7489 			return;
7490 
7491 		for (T* it = begin + 1; it != end; ++it)
7492 		{
7493 			T val = *it;
7494 			T* hole = it;
7495 
7496 			// move hole backwards
7497 			while (hole > begin && pred(val, *(hole - 1)))
7498 			{
7499 				*hole = *(hole - 1);
7500 				hole--;
7501 			}
7502 
7503 			// fill hole with element
7504 			*hole = val;
7505 		}
7506 	}
7507 
median3(I first,I middle,I last,const Pred & pred)7508 	template <typename I, typename Pred> inline I median3(I first, I middle, I last, const Pred& pred)
7509 	{
7510 		if (pred(*middle, *first))
7511 			swap(middle, first);
7512 		if (pred(*last, *middle))
7513 			swap(last, middle);
7514 		if (pred(*middle, *first))
7515 			swap(middle, first);
7516 
7517 		return middle;
7518 	}
7519 
partition3(T * begin,T * end,T pivot,const Pred & pred,T ** out_eqbeg,T ** out_eqend)7520 	template <typename T, typename Pred> PUGI__FN void partition3(T* begin, T* end, T pivot, const Pred& pred, T** out_eqbeg, T** out_eqend)
7521 	{
7522 		// invariant: array is split into 4 groups: = < ? > (each variable denotes the boundary between the groups)
7523 		T* eq = begin;
7524 		T* lt = begin;
7525 		T* gt = end;
7526 
7527 		while (lt < gt)
7528 		{
7529 			if (pred(*lt, pivot))
7530 				lt++;
7531 			else if (*lt == pivot)
7532 				swap(*eq++, *lt++);
7533 			else
7534 				swap(*lt, *--gt);
7535 		}
7536 
7537 		// we now have just 4 groups: = < >; move equal elements to the middle
7538 		T* eqbeg = gt;
7539 
7540 		for (T* it = begin; it != eq; ++it)
7541 			swap(*it, *--eqbeg);
7542 
7543 		*out_eqbeg = eqbeg;
7544 		*out_eqend = gt;
7545 	}
7546 
sort(I begin,I end,const Pred & pred)7547 	template <typename I, typename Pred> PUGI__FN void sort(I begin, I end, const Pred& pred)
7548 	{
7549 		// sort large chunks
7550 		while (end - begin > 16)
7551 		{
7552 			// find median element
7553 			I middle = begin + (end - begin) / 2;
7554 			I median = median3(begin, middle, end - 1, pred);
7555 
7556 			// partition in three chunks (< = >)
7557 			I eqbeg, eqend;
7558 			partition3(begin, end, *median, pred, &eqbeg, &eqend);
7559 
7560 			// loop on larger half
7561 			if (eqbeg - begin > end - eqend)
7562 			{
7563 				sort(eqend, end, pred);
7564 				end = eqbeg;
7565 			}
7566 			else
7567 			{
7568 				sort(begin, eqbeg, pred);
7569 				begin = eqend;
7570 			}
7571 		}
7572 
7573 		// insertion sort small chunk
7574 		insertion_sort(begin, end, pred);
7575 	}
7576 
hash_insert(const void ** table,size_t size,const void * key)7577 	PUGI__FN bool hash_insert(const void** table, size_t size, const void* key)
7578 	{
7579 		assert(key);
7580 
7581 		unsigned int h = static_cast<unsigned int>(reinterpret_cast<uintptr_t>(key));
7582 
7583 		// MurmurHash3 32-bit finalizer
7584 		h ^= h >> 16;
7585 		h *= 0x85ebca6bu;
7586 		h ^= h >> 13;
7587 		h *= 0xc2b2ae35u;
7588 		h ^= h >> 16;
7589 
7590 		size_t hashmod = size - 1;
7591 		size_t bucket = h & hashmod;
7592 
7593 		for (size_t probe = 0; probe <= hashmod; ++probe)
7594 		{
7595 			if (table[bucket] == 0)
7596 			{
7597 				table[bucket] = key;
7598 				return true;
7599 			}
7600 
7601 			if (table[bucket] == key)
7602 				return false;
7603 
7604 			// hash collision, quadratic probing
7605 			bucket = (bucket + probe + 1) & hashmod;
7606 		}
7607 
7608 		assert(false && "Hash table is full"); // unreachable
7609 		return false;
7610 	}
7611 PUGI__NS_END
7612 
7613 // Allocator used for AST and evaluation stacks
7614 PUGI__NS_BEGIN
7615 	static const size_t xpath_memory_page_size =
7616 	#ifdef PUGIXML_MEMORY_XPATH_PAGE_SIZE
7617 		PUGIXML_MEMORY_XPATH_PAGE_SIZE
7618 	#else
7619 		4096
7620 	#endif
7621 		;
7622 
7623 	static const uintptr_t xpath_memory_block_alignment = sizeof(double) > sizeof(void*) ? sizeof(double) : sizeof(void*);
7624 
7625 	struct xpath_memory_block
7626 	{
7627 		xpath_memory_block* next;
7628 		size_t capacity;
7629 
7630 		union
7631 		{
7632 			char data[xpath_memory_page_size];
7633 			double alignment;
7634 		};
7635 	};
7636 
7637 	struct xpath_allocator
7638 	{
7639 		xpath_memory_block* _root;
7640 		size_t _root_size;
7641 		bool* _error;
7642 
xpath_allocatorxpath_allocator7643 		xpath_allocator(xpath_memory_block* root, bool* error = 0): _root(root), _root_size(0), _error(error)
7644 		{
7645 		}
7646 
allocatexpath_allocator7647 		void* allocate(size_t size)
7648 		{
7649 			// round size up to block alignment boundary
7650 			size = (size + xpath_memory_block_alignment - 1) & ~(xpath_memory_block_alignment - 1);
7651 
7652 			if (_root_size + size <= _root->capacity)
7653 			{
7654 				void* buf = &_root->data[0] + _root_size;
7655 				_root_size += size;
7656 				return buf;
7657 			}
7658 			else
7659 			{
7660 				// make sure we have at least 1/4th of the page free after allocation to satisfy subsequent allocation requests
7661 				size_t block_capacity_base = sizeof(_root->data);
7662 				size_t block_capacity_req = size + block_capacity_base / 4;
7663 				size_t block_capacity = (block_capacity_base > block_capacity_req) ? block_capacity_base : block_capacity_req;
7664 
7665 				size_t block_size = block_capacity + offsetof(xpath_memory_block, data);
7666 
7667 				xpath_memory_block* block = static_cast<xpath_memory_block*>(xml_memory::allocate(block_size));
7668 				if (!block)
7669 				{
7670 					if (_error) *_error = true;
7671 					return 0;
7672 				}
7673 
7674 				block->next = _root;
7675 				block->capacity = block_capacity;
7676 
7677 				_root = block;
7678 				_root_size = size;
7679 
7680 				return block->data;
7681 			}
7682 		}
7683 
reallocatexpath_allocator7684 		void* reallocate(void* ptr, size_t old_size, size_t new_size)
7685 		{
7686 			// round size up to block alignment boundary
7687 			old_size = (old_size + xpath_memory_block_alignment - 1) & ~(xpath_memory_block_alignment - 1);
7688 			new_size = (new_size + xpath_memory_block_alignment - 1) & ~(xpath_memory_block_alignment - 1);
7689 
7690 			// we can only reallocate the last object
7691 			assert(ptr == 0 || static_cast<char*>(ptr) + old_size == &_root->data[0] + _root_size);
7692 
7693 			// try to reallocate the object inplace
7694 			if (ptr && _root_size - old_size + new_size <= _root->capacity)
7695 			{
7696 				_root_size = _root_size - old_size + new_size;
7697 				return ptr;
7698 			}
7699 
7700 			// allocate a new block
7701 			void* result = allocate(new_size);
7702 			if (!result) return 0;
7703 
7704 			// we have a new block
7705 			if (ptr)
7706 			{
7707 				// copy old data (we only support growing)
7708 				assert(new_size >= old_size);
7709 				memcpy(result, ptr, old_size);
7710 
7711 				// free the previous page if it had no other objects
7712 				assert(_root->data == result);
7713 				assert(_root->next);
7714 
7715 				if (_root->next->data == ptr)
7716 				{
7717 					// deallocate the whole page, unless it was the first one
7718 					xpath_memory_block* next = _root->next->next;
7719 
7720 					if (next)
7721 					{
7722 						xml_memory::deallocate(_root->next);
7723 						_root->next = next;
7724 					}
7725 				}
7726 			}
7727 
7728 			return result;
7729 		}
7730 
revertxpath_allocator7731 		void revert(const xpath_allocator& state)
7732 		{
7733 			// free all new pages
7734 			xpath_memory_block* cur = _root;
7735 
7736 			while (cur != state._root)
7737 			{
7738 				xpath_memory_block* next = cur->next;
7739 
7740 				xml_memory::deallocate(cur);
7741 
7742 				cur = next;
7743 			}
7744 
7745 			// restore state
7746 			_root = state._root;
7747 			_root_size = state._root_size;
7748 		}
7749 
releasexpath_allocator7750 		void release()
7751 		{
7752 			xpath_memory_block* cur = _root;
7753 			assert(cur);
7754 
7755 			while (cur->next)
7756 			{
7757 				xpath_memory_block* next = cur->next;
7758 
7759 				xml_memory::deallocate(cur);
7760 
7761 				cur = next;
7762 			}
7763 		}
7764 	};
7765 
7766 	struct xpath_allocator_capture
7767 	{
xpath_allocator_capturexpath_allocator_capture7768 		xpath_allocator_capture(xpath_allocator* alloc): _target(alloc), _state(*alloc)
7769 		{
7770 		}
7771 
~xpath_allocator_capturexpath_allocator_capture7772 		~xpath_allocator_capture()
7773 		{
7774 			_target->revert(_state);
7775 		}
7776 
7777 		xpath_allocator* _target;
7778 		xpath_allocator _state;
7779 	};
7780 
7781 	struct xpath_stack
7782 	{
7783 		xpath_allocator* result;
7784 		xpath_allocator* temp;
7785 	};
7786 
7787 	struct xpath_stack_data
7788 	{
7789 		xpath_memory_block blocks[2];
7790 		xpath_allocator result;
7791 		xpath_allocator temp;
7792 		xpath_stack stack;
7793 		bool oom;
7794 
xpath_stack_dataxpath_stack_data7795 		xpath_stack_data(): result(blocks + 0, &oom), temp(blocks + 1, &oom), oom(false)
7796 		{
7797 			blocks[0].next = blocks[1].next = 0;
7798 			blocks[0].capacity = blocks[1].capacity = sizeof(blocks[0].data);
7799 
7800 			stack.result = &result;
7801 			stack.temp = &temp;
7802 		}
7803 
~xpath_stack_dataxpath_stack_data7804 		~xpath_stack_data()
7805 		{
7806 			result.release();
7807 			temp.release();
7808 		}
7809 	};
7810 PUGI__NS_END
7811 
7812 // String class
7813 PUGI__NS_BEGIN
7814 	class xpath_string
7815 	{
7816 		const char_t* _buffer;
7817 		bool _uses_heap;
7818 		size_t _length_heap;
7819 
duplicate_string(const char_t * string,size_t length,xpath_allocator * alloc)7820 		static char_t* duplicate_string(const char_t* string, size_t length, xpath_allocator* alloc)
7821 		{
7822 			char_t* result = static_cast<char_t*>(alloc->allocate((length + 1) * sizeof(char_t)));
7823 			if (!result) return 0;
7824 
7825 			memcpy(result, string, length * sizeof(char_t));
7826 			result[length] = 0;
7827 
7828 			return result;
7829 		}
7830 
xpath_string(const char_t * buffer,bool uses_heap_,size_t length_heap)7831 		xpath_string(const char_t* buffer, bool uses_heap_, size_t length_heap): _buffer(buffer), _uses_heap(uses_heap_), _length_heap(length_heap)
7832 		{
7833 		}
7834 
7835 	public:
from_const(const char_t * str)7836 		static xpath_string from_const(const char_t* str)
7837 		{
7838 			return xpath_string(str, false, 0);
7839 		}
7840 
from_heap_preallocated(const char_t * begin,const char_t * end)7841 		static xpath_string from_heap_preallocated(const char_t* begin, const char_t* end)
7842 		{
7843 			assert(begin <= end && *end == 0);
7844 
7845 			return xpath_string(begin, true, static_cast<size_t>(end - begin));
7846 		}
7847 
from_heap(const char_t * begin,const char_t * end,xpath_allocator * alloc)7848 		static xpath_string from_heap(const char_t* begin, const char_t* end, xpath_allocator* alloc)
7849 		{
7850 			assert(begin <= end);
7851 
7852 			if (begin == end)
7853 				return xpath_string();
7854 
7855 			size_t length = static_cast<size_t>(end - begin);
7856 			const char_t* data = duplicate_string(begin, length, alloc);
7857 
7858 			return data ? xpath_string(data, true, length) : xpath_string();
7859 		}
7860 
xpath_string()7861 		xpath_string(): _buffer(PUGIXML_TEXT("")), _uses_heap(false), _length_heap(0)
7862 		{
7863 		}
7864 
append(const xpath_string & o,xpath_allocator * alloc)7865 		void append(const xpath_string& o, xpath_allocator* alloc)
7866 		{
7867 			// skip empty sources
7868 			if (!*o._buffer) return;
7869 
7870 			// fast append for constant empty target and constant source
7871 			if (!*_buffer && !_uses_heap && !o._uses_heap)
7872 			{
7873 				_buffer = o._buffer;
7874 			}
7875 			else
7876 			{
7877 				// need to make heap copy
7878 				size_t target_length = length();
7879 				size_t source_length = o.length();
7880 				size_t result_length = target_length + source_length;
7881 
7882 				// allocate new buffer
7883 				char_t* result = static_cast<char_t*>(alloc->reallocate(_uses_heap ? const_cast<char_t*>(_buffer) : 0, (target_length + 1) * sizeof(char_t), (result_length + 1) * sizeof(char_t)));
7884 				if (!result) return;
7885 
7886 				// append first string to the new buffer in case there was no reallocation
7887 				if (!_uses_heap) memcpy(result, _buffer, target_length * sizeof(char_t));
7888 
7889 				// append second string to the new buffer
7890 				memcpy(result + target_length, o._buffer, source_length * sizeof(char_t));
7891 				result[result_length] = 0;
7892 
7893 				// finalize
7894 				_buffer = result;
7895 				_uses_heap = true;
7896 				_length_heap = result_length;
7897 			}
7898 		}
7899 
c_str() const7900 		const char_t* c_str() const
7901 		{
7902 			return _buffer;
7903 		}
7904 
length() const7905 		size_t length() const
7906 		{
7907 			return _uses_heap ? _length_heap : strlength(_buffer);
7908 		}
7909 
data(xpath_allocator * alloc)7910 		char_t* data(xpath_allocator* alloc)
7911 		{
7912 			// make private heap copy
7913 			if (!_uses_heap)
7914 			{
7915 				size_t length_ = strlength(_buffer);
7916 				const char_t* data_ = duplicate_string(_buffer, length_, alloc);
7917 
7918 				if (!data_) return 0;
7919 
7920 				_buffer = data_;
7921 				_uses_heap = true;
7922 				_length_heap = length_;
7923 			}
7924 
7925 			return const_cast<char_t*>(_buffer);
7926 		}
7927 
empty() const7928 		bool empty() const
7929 		{
7930 			return *_buffer == 0;
7931 		}
7932 
operator ==(const xpath_string & o) const7933 		bool operator==(const xpath_string& o) const
7934 		{
7935 			return strequal(_buffer, o._buffer);
7936 		}
7937 
operator !=(const xpath_string & o) const7938 		bool operator!=(const xpath_string& o) const
7939 		{
7940 			return !strequal(_buffer, o._buffer);
7941 		}
7942 
uses_heap() const7943 		bool uses_heap() const
7944 		{
7945 			return _uses_heap;
7946 		}
7947 	};
7948 PUGI__NS_END
7949 
7950 PUGI__NS_BEGIN
starts_with(const char_t * string,const char_t * pattern)7951 	PUGI__FN bool starts_with(const char_t* string, const char_t* pattern)
7952 	{
7953 		while (*pattern && *string == *pattern)
7954 		{
7955 			string++;
7956 			pattern++;
7957 		}
7958 
7959 		return *pattern == 0;
7960 	}
7961 
find_char(const char_t * s,char_t c)7962 	PUGI__FN const char_t* find_char(const char_t* s, char_t c)
7963 	{
7964 	#ifdef PUGIXML_WCHAR_MODE
7965 		return wcschr(s, c);
7966 	#else
7967 		return strchr(s, c);
7968 	#endif
7969 	}
7970 
find_substring(const char_t * s,const char_t * p)7971 	PUGI__FN const char_t* find_substring(const char_t* s, const char_t* p)
7972 	{
7973 	#ifdef PUGIXML_WCHAR_MODE
7974 		// MSVC6 wcsstr bug workaround (if s is empty it always returns 0)
7975 		return (*p == 0) ? s : wcsstr(s, p);
7976 	#else
7977 		return strstr(s, p);
7978 	#endif
7979 	}
7980 
7981 	// Converts symbol to lower case, if it is an ASCII one
tolower_ascii(char_t ch)7982 	PUGI__FN char_t tolower_ascii(char_t ch)
7983 	{
7984 		return static_cast<unsigned int>(ch - 'A') < 26 ? static_cast<char_t>(ch | ' ') : ch;
7985 	}
7986 
string_value(const xpath_node & na,xpath_allocator * alloc)7987 	PUGI__FN xpath_string string_value(const xpath_node& na, xpath_allocator* alloc)
7988 	{
7989 		if (na.attribute())
7990 			return xpath_string::from_const(na.attribute().value());
7991 		else
7992 		{
7993 			xml_node n = na.node();
7994 
7995 			switch (n.type())
7996 			{
7997 			case node_pcdata:
7998 			case node_cdata:
7999 			case node_comment:
8000 			case node_pi:
8001 				return xpath_string::from_const(n.value());
8002 
8003 			case node_document:
8004 			case node_element:
8005 			{
8006 				xpath_string result;
8007 
8008 				// element nodes can have value if parse_embed_pcdata was used
8009 				if (n.value()[0])
8010 					result.append(xpath_string::from_const(n.value()), alloc);
8011 
8012 				xml_node cur = n.first_child();
8013 
8014 				while (cur && cur != n)
8015 				{
8016 					if (cur.type() == node_pcdata || cur.type() == node_cdata)
8017 						result.append(xpath_string::from_const(cur.value()), alloc);
8018 
8019 					if (cur.first_child())
8020 						cur = cur.first_child();
8021 					else if (cur.next_sibling())
8022 						cur = cur.next_sibling();
8023 					else
8024 					{
8025 						while (!cur.next_sibling() && cur != n)
8026 							cur = cur.parent();
8027 
8028 						if (cur != n) cur = cur.next_sibling();
8029 					}
8030 				}
8031 
8032 				return result;
8033 			}
8034 
8035 			default:
8036 				return xpath_string();
8037 			}
8038 		}
8039 	}
8040 
node_is_before_sibling(xml_node_struct * ln,xml_node_struct * rn)8041 	PUGI__FN bool node_is_before_sibling(xml_node_struct* ln, xml_node_struct* rn)
8042 	{
8043 		assert(ln->parent == rn->parent);
8044 
8045 		// there is no common ancestor (the shared parent is null), nodes are from different documents
8046 		if (!ln->parent) return ln < rn;
8047 
8048 		// determine sibling order
8049 		xml_node_struct* ls = ln;
8050 		xml_node_struct* rs = rn;
8051 
8052 		while (ls && rs)
8053 		{
8054 			if (ls == rn) return true;
8055 			if (rs == ln) return false;
8056 
8057 			ls = ls->next_sibling;
8058 			rs = rs->next_sibling;
8059 		}
8060 
8061 		// if rn sibling chain ended ln must be before rn
8062 		return !rs;
8063 	}
8064 
node_is_before(xml_node_struct * ln,xml_node_struct * rn)8065 	PUGI__FN bool node_is_before(xml_node_struct* ln, xml_node_struct* rn)
8066 	{
8067 		// find common ancestor at the same depth, if any
8068 		xml_node_struct* lp = ln;
8069 		xml_node_struct* rp = rn;
8070 
8071 		while (lp && rp && lp->parent != rp->parent)
8072 		{
8073 			lp = lp->parent;
8074 			rp = rp->parent;
8075 		}
8076 
8077 		// parents are the same!
8078 		if (lp && rp) return node_is_before_sibling(lp, rp);
8079 
8080 		// nodes are at different depths, need to normalize heights
8081 		bool left_higher = !lp;
8082 
8083 		while (lp)
8084 		{
8085 			lp = lp->parent;
8086 			ln = ln->parent;
8087 		}
8088 
8089 		while (rp)
8090 		{
8091 			rp = rp->parent;
8092 			rn = rn->parent;
8093 		}
8094 
8095 		// one node is the ancestor of the other
8096 		if (ln == rn) return left_higher;
8097 
8098 		// find common ancestor... again
8099 		while (ln->parent != rn->parent)
8100 		{
8101 			ln = ln->parent;
8102 			rn = rn->parent;
8103 		}
8104 
8105 		return node_is_before_sibling(ln, rn);
8106 	}
8107 
node_is_ancestor(xml_node_struct * parent,xml_node_struct * node)8108 	PUGI__FN bool node_is_ancestor(xml_node_struct* parent, xml_node_struct* node)
8109 	{
8110 		while (node && node != parent) node = node->parent;
8111 
8112 		return parent && node == parent;
8113 	}
8114 
document_buffer_order(const xpath_node & xnode)8115 	PUGI__FN const void* document_buffer_order(const xpath_node& xnode)
8116 	{
8117 		xml_node_struct* node = xnode.node().internal_object();
8118 
8119 		if (node)
8120 		{
8121 			if ((get_document(node).header & xml_memory_page_contents_shared_mask) == 0)
8122 			{
8123 				if (node->name && (node->header & impl::xml_memory_page_name_allocated_or_shared_mask) == 0) return node->name;
8124 				if (node->value && (node->header & impl::xml_memory_page_value_allocated_or_shared_mask) == 0) return node->value;
8125 			}
8126 
8127 			return 0;
8128 		}
8129 
8130 		xml_attribute_struct* attr = xnode.attribute().internal_object();
8131 
8132 		if (attr)
8133 		{
8134 			if ((get_document(attr).header & xml_memory_page_contents_shared_mask) == 0)
8135 			{
8136 				if ((attr->header & impl::xml_memory_page_name_allocated_or_shared_mask) == 0) return attr->name;
8137 				if ((attr->header & impl::xml_memory_page_value_allocated_or_shared_mask) == 0) return attr->value;
8138 			}
8139 
8140 			return 0;
8141 		}
8142 
8143 		return 0;
8144 	}
8145 
8146 	struct document_order_comparator
8147 	{
operator ()document_order_comparator8148 		bool operator()(const xpath_node& lhs, const xpath_node& rhs) const
8149 		{
8150 			// optimized document order based check
8151 			const void* lo = document_buffer_order(lhs);
8152 			const void* ro = document_buffer_order(rhs);
8153 
8154 			if (lo && ro) return lo < ro;
8155 
8156 			// slow comparison
8157 			xml_node ln = lhs.node(), rn = rhs.node();
8158 
8159 			// compare attributes
8160 			if (lhs.attribute() && rhs.attribute())
8161 			{
8162 				// shared parent
8163 				if (lhs.parent() == rhs.parent())
8164 				{
8165 					// determine sibling order
8166 					for (xml_attribute a = lhs.attribute(); a; a = a.next_attribute())
8167 						if (a == rhs.attribute())
8168 							return true;
8169 
8170 					return false;
8171 				}
8172 
8173 				// compare attribute parents
8174 				ln = lhs.parent();
8175 				rn = rhs.parent();
8176 			}
8177 			else if (lhs.attribute())
8178 			{
8179 				// attributes go after the parent element
8180 				if (lhs.parent() == rhs.node()) return false;
8181 
8182 				ln = lhs.parent();
8183 			}
8184 			else if (rhs.attribute())
8185 			{
8186 				// attributes go after the parent element
8187 				if (rhs.parent() == lhs.node()) return true;
8188 
8189 				rn = rhs.parent();
8190 			}
8191 
8192 			if (ln == rn) return false;
8193 
8194 			if (!ln || !rn) return ln < rn;
8195 
8196 			return node_is_before(ln.internal_object(), rn.internal_object());
8197 		}
8198 	};
8199 
gen_nan()8200 	PUGI__FN double gen_nan()
8201 	{
8202 	#if defined(__STDC_IEC_559__) || ((FLT_RADIX - 0 == 2) && (FLT_MAX_EXP - 0 == 128) && (FLT_MANT_DIG - 0 == 24))
8203 		PUGI__STATIC_ASSERT(sizeof(float) == sizeof(uint32_t));
8204 		typedef uint32_t UI; // BCC5 workaround
8205 		union { float f; UI i; } u;
8206 		u.i = 0x7fc00000;
8207 		return double(u.f);
8208 	#else
8209 		// fallback
8210 		const volatile double zero = 0.0;
8211 		return zero / zero;
8212 	#endif
8213 	}
8214 
is_nan(double value)8215 	PUGI__FN bool is_nan(double value)
8216 	{
8217 	#if defined(PUGI__MSVC_CRT_VERSION) || defined(__BORLANDC__)
8218 		return !!_isnan(value);
8219 	#elif defined(fpclassify) && defined(FP_NAN)
8220 		return fpclassify(value) == FP_NAN;
8221 	#else
8222 		// fallback
8223 		const volatile double v = value;
8224 		return v != v;
8225 	#endif
8226 	}
8227 
convert_number_to_string_special(double value)8228 	PUGI__FN const char_t* convert_number_to_string_special(double value)
8229 	{
8230 	#if defined(PUGI__MSVC_CRT_VERSION) || defined(__BORLANDC__)
8231 		if (_finite(value)) return (value == 0) ? PUGIXML_TEXT("0") : 0;
8232 		if (_isnan(value)) return PUGIXML_TEXT("NaN");
8233 		return value > 0 ? PUGIXML_TEXT("Infinity") : PUGIXML_TEXT("-Infinity");
8234 	#elif defined(fpclassify) && defined(FP_NAN) && defined(FP_INFINITE) && defined(FP_ZERO)
8235 		switch (fpclassify(value))
8236 		{
8237 		case FP_NAN:
8238 			return PUGIXML_TEXT("NaN");
8239 
8240 		case FP_INFINITE:
8241 			return value > 0 ? PUGIXML_TEXT("Infinity") : PUGIXML_TEXT("-Infinity");
8242 
8243 		case FP_ZERO:
8244 			return PUGIXML_TEXT("0");
8245 
8246 		default:
8247 			return 0;
8248 		}
8249 	#else
8250 		// fallback
8251 		const volatile double v = value;
8252 
8253 		if (v == 0) return PUGIXML_TEXT("0");
8254 		if (v != v) return PUGIXML_TEXT("NaN");
8255 		if (v * 2 == v) return value > 0 ? PUGIXML_TEXT("Infinity") : PUGIXML_TEXT("-Infinity");
8256 		return 0;
8257 	#endif
8258 	}
8259 
convert_number_to_boolean(double value)8260 	PUGI__FN bool convert_number_to_boolean(double value)
8261 	{
8262 		return (value != 0 && !is_nan(value));
8263 	}
8264 
truncate_zeros(char * begin,char * end)8265 	PUGI__FN void truncate_zeros(char* begin, char* end)
8266 	{
8267 		while (begin != end && end[-1] == '0') end--;
8268 
8269 		*end = 0;
8270 	}
8271 
8272 	// gets mantissa digits in the form of 0.xxxxx with 0. implied and the exponent
8273 #if defined(PUGI__MSVC_CRT_VERSION) && PUGI__MSVC_CRT_VERSION >= 1400 && !defined(_WIN32_WCE)
convert_number_to_mantissa_exponent(double value,char (& buffer)[32],char ** out_mantissa,int * out_exponent)8274 	PUGI__FN void convert_number_to_mantissa_exponent(double value, char (&buffer)[32], char** out_mantissa, int* out_exponent)
8275 	{
8276 		// get base values
8277 		int sign, exponent;
8278 		_ecvt_s(buffer, sizeof(buffer), value, DBL_DIG + 1, &exponent, &sign);
8279 
8280 		// truncate redundant zeros
8281 		truncate_zeros(buffer, buffer + strlen(buffer));
8282 
8283 		// fill results
8284 		*out_mantissa = buffer;
8285 		*out_exponent = exponent;
8286 	}
8287 #else
convert_number_to_mantissa_exponent(double value,char (& buffer)[32],char ** out_mantissa,int * out_exponent)8288 	PUGI__FN void convert_number_to_mantissa_exponent(double value, char (&buffer)[32], char** out_mantissa, int* out_exponent)
8289 	{
8290 		// get a scientific notation value with IEEE DBL_DIG decimals
8291 		PUGI__SNPRINTF(buffer, "%.*e", DBL_DIG, value);
8292 
8293 		// get the exponent (possibly negative)
8294 		char* exponent_string = strchr(buffer, 'e');
8295 		assert(exponent_string);
8296 
8297 		int exponent = atoi(exponent_string + 1);
8298 
8299 		// extract mantissa string: skip sign
8300 		char* mantissa = buffer[0] == '-' ? buffer + 1 : buffer;
8301 		assert(mantissa[0] != '0' && mantissa[1] == '.');
8302 
8303 		// divide mantissa by 10 to eliminate integer part
8304 		mantissa[1] = mantissa[0];
8305 		mantissa++;
8306 		exponent++;
8307 
8308 		// remove extra mantissa digits and zero-terminate mantissa
8309 		truncate_zeros(mantissa, exponent_string);
8310 
8311 		// fill results
8312 		*out_mantissa = mantissa;
8313 		*out_exponent = exponent;
8314 	}
8315 #endif
8316 
convert_number_to_string(double value,xpath_allocator * alloc)8317 	PUGI__FN xpath_string convert_number_to_string(double value, xpath_allocator* alloc)
8318 	{
8319 		// try special number conversion
8320 		const char_t* special = convert_number_to_string_special(value);
8321 		if (special) return xpath_string::from_const(special);
8322 
8323 		// get mantissa + exponent form
8324 		char mantissa_buffer[32];
8325 
8326 		char* mantissa;
8327 		int exponent;
8328 		convert_number_to_mantissa_exponent(value, mantissa_buffer, &mantissa, &exponent);
8329 
8330 		// allocate a buffer of suitable length for the number
8331 		size_t result_size = strlen(mantissa_buffer) + (exponent > 0 ? exponent : -exponent) + 4;
8332 		char_t* result = static_cast<char_t*>(alloc->allocate(sizeof(char_t) * result_size));
8333 		if (!result) return xpath_string();
8334 
8335 		// make the number!
8336 		char_t* s = result;
8337 
8338 		// sign
8339 		if (value < 0) *s++ = '-';
8340 
8341 		// integer part
8342 		if (exponent <= 0)
8343 		{
8344 			*s++ = '0';
8345 		}
8346 		else
8347 		{
8348 			while (exponent > 0)
8349 			{
8350 				assert(*mantissa == 0 || static_cast<unsigned int>(*mantissa - '0') <= 9);
8351 				*s++ = *mantissa ? *mantissa++ : '0';
8352 				exponent--;
8353 			}
8354 		}
8355 
8356 		// fractional part
8357 		if (*mantissa)
8358 		{
8359 			// decimal point
8360 			*s++ = '.';
8361 
8362 			// extra zeroes from negative exponent
8363 			while (exponent < 0)
8364 			{
8365 				*s++ = '0';
8366 				exponent++;
8367 			}
8368 
8369 			// extra mantissa digits
8370 			while (*mantissa)
8371 			{
8372 				assert(static_cast<unsigned int>(*mantissa - '0') <= 9);
8373 				*s++ = *mantissa++;
8374 			}
8375 		}
8376 
8377 		// zero-terminate
8378 		assert(s < result + result_size);
8379 		*s = 0;
8380 
8381 		return xpath_string::from_heap_preallocated(result, s);
8382 	}
8383 
check_string_to_number_format(const char_t * string)8384 	PUGI__FN bool check_string_to_number_format(const char_t* string)
8385 	{
8386 		// parse leading whitespace
8387 		while (PUGI__IS_CHARTYPE(*string, ct_space)) ++string;
8388 
8389 		// parse sign
8390 		if (*string == '-') ++string;
8391 
8392 		if (!*string) return false;
8393 
8394 		// if there is no integer part, there should be a decimal part with at least one digit
8395 		if (!PUGI__IS_CHARTYPEX(string[0], ctx_digit) && (string[0] != '.' || !PUGI__IS_CHARTYPEX(string[1], ctx_digit))) return false;
8396 
8397 		// parse integer part
8398 		while (PUGI__IS_CHARTYPEX(*string, ctx_digit)) ++string;
8399 
8400 		// parse decimal part
8401 		if (*string == '.')
8402 		{
8403 			++string;
8404 
8405 			while (PUGI__IS_CHARTYPEX(*string, ctx_digit)) ++string;
8406 		}
8407 
8408 		// parse trailing whitespace
8409 		while (PUGI__IS_CHARTYPE(*string, ct_space)) ++string;
8410 
8411 		return *string == 0;
8412 	}
8413 
convert_string_to_number(const char_t * string)8414 	PUGI__FN double convert_string_to_number(const char_t* string)
8415 	{
8416 		// check string format
8417 		if (!check_string_to_number_format(string)) return gen_nan();
8418 
8419 		// parse string
8420 	#ifdef PUGIXML_WCHAR_MODE
8421 		return wcstod(string, 0);
8422 	#else
8423 		return strtod(string, 0);
8424 	#endif
8425 	}
8426 
convert_string_to_number_scratch(char_t (& buffer)[32],const char_t * begin,const char_t * end,double * out_result)8427 	PUGI__FN bool convert_string_to_number_scratch(char_t (&buffer)[32], const char_t* begin, const char_t* end, double* out_result)
8428 	{
8429 		size_t length = static_cast<size_t>(end - begin);
8430 		char_t* scratch = buffer;
8431 
8432 		if (length >= sizeof(buffer) / sizeof(buffer[0]))
8433 		{
8434 			// need to make dummy on-heap copy
8435 			scratch = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
8436 			if (!scratch) return false;
8437 		}
8438 
8439 		// copy string to zero-terminated buffer and perform conversion
8440 		memcpy(scratch, begin, length * sizeof(char_t));
8441 		scratch[length] = 0;
8442 
8443 		*out_result = convert_string_to_number(scratch);
8444 
8445 		// free dummy buffer
8446 		if (scratch != buffer) xml_memory::deallocate(scratch);
8447 
8448 		return true;
8449 	}
8450 
round_nearest(double value)8451 	PUGI__FN double round_nearest(double value)
8452 	{
8453 		return floor(value + 0.5);
8454 	}
8455 
round_nearest_nzero(double value)8456 	PUGI__FN double round_nearest_nzero(double value)
8457 	{
8458 		// same as round_nearest, but returns -0 for [-0.5, -0]
8459 		// ceil is used to differentiate between +0 and -0 (we return -0 for [-0.5, -0] and +0 for +0)
8460 		return (value >= -0.5 && value <= 0) ? ceil(value) : floor(value + 0.5);
8461 	}
8462 
qualified_name(const xpath_node & node)8463 	PUGI__FN const char_t* qualified_name(const xpath_node& node)
8464 	{
8465 		return node.attribute() ? node.attribute().name() : node.node().name();
8466 	}
8467 
local_name(const xpath_node & node)8468 	PUGI__FN const char_t* local_name(const xpath_node& node)
8469 	{
8470 		const char_t* name = qualified_name(node);
8471 		const char_t* p = find_char(name, ':');
8472 
8473 		return p ? p + 1 : name;
8474 	}
8475 
8476 	struct namespace_uri_predicate
8477 	{
8478 		const char_t* prefix;
8479 		size_t prefix_length;
8480 
namespace_uri_predicatenamespace_uri_predicate8481 		namespace_uri_predicate(const char_t* name)
8482 		{
8483 			const char_t* pos = find_char(name, ':');
8484 
8485 			prefix = pos ? name : 0;
8486 			prefix_length = pos ? static_cast<size_t>(pos - name) : 0;
8487 		}
8488 
operator ()namespace_uri_predicate8489 		bool operator()(xml_attribute a) const
8490 		{
8491 			const char_t* name = a.name();
8492 
8493 			if (!starts_with(name, PUGIXML_TEXT("xmlns"))) return false;
8494 
8495 			return prefix ? name[5] == ':' && strequalrange(name + 6, prefix, prefix_length) : name[5] == 0;
8496 		}
8497 	};
8498 
namespace_uri(xml_node node)8499 	PUGI__FN const char_t* namespace_uri(xml_node node)
8500 	{
8501 		namespace_uri_predicate pred = node.name();
8502 
8503 		xml_node p = node;
8504 
8505 		while (p)
8506 		{
8507 			xml_attribute a = p.find_attribute(pred);
8508 
8509 			if (a) return a.value();
8510 
8511 			p = p.parent();
8512 		}
8513 
8514 		return PUGIXML_TEXT("");
8515 	}
8516 
namespace_uri(xml_attribute attr,xml_node parent)8517 	PUGI__FN const char_t* namespace_uri(xml_attribute attr, xml_node parent)
8518 	{
8519 		namespace_uri_predicate pred = attr.name();
8520 
8521 		// Default namespace does not apply to attributes
8522 		if (!pred.prefix) return PUGIXML_TEXT("");
8523 
8524 		xml_node p = parent;
8525 
8526 		while (p)
8527 		{
8528 			xml_attribute a = p.find_attribute(pred);
8529 
8530 			if (a) return a.value();
8531 
8532 			p = p.parent();
8533 		}
8534 
8535 		return PUGIXML_TEXT("");
8536 	}
8537 
namespace_uri(const xpath_node & node)8538 	PUGI__FN const char_t* namespace_uri(const xpath_node& node)
8539 	{
8540 		return node.attribute() ? namespace_uri(node.attribute(), node.parent()) : namespace_uri(node.node());
8541 	}
8542 
normalize_space(char_t * buffer)8543 	PUGI__FN char_t* normalize_space(char_t* buffer)
8544 	{
8545 		char_t* write = buffer;
8546 
8547 		for (char_t* it = buffer; *it; )
8548 		{
8549 			char_t ch = *it++;
8550 
8551 			if (PUGI__IS_CHARTYPE(ch, ct_space))
8552 			{
8553 				// replace whitespace sequence with single space
8554 				while (PUGI__IS_CHARTYPE(*it, ct_space)) it++;
8555 
8556 				// avoid leading spaces
8557 				if (write != buffer) *write++ = ' ';
8558 			}
8559 			else *write++ = ch;
8560 		}
8561 
8562 		// remove trailing space
8563 		if (write != buffer && PUGI__IS_CHARTYPE(write[-1], ct_space)) write--;
8564 
8565 		// zero-terminate
8566 		*write = 0;
8567 
8568 		return write;
8569 	}
8570 
translate(char_t * buffer,const char_t * from,const char_t * to,size_t to_length)8571 	PUGI__FN char_t* translate(char_t* buffer, const char_t* from, const char_t* to, size_t to_length)
8572 	{
8573 		char_t* write = buffer;
8574 
8575 		while (*buffer)
8576 		{
8577 			PUGI__DMC_VOLATILE char_t ch = *buffer++;
8578 
8579 			const char_t* pos = find_char(from, ch);
8580 
8581 			if (!pos)
8582 				*write++ = ch; // do not process
8583 			else if (static_cast<size_t>(pos - from) < to_length)
8584 				*write++ = to[pos - from]; // replace
8585 		}
8586 
8587 		// zero-terminate
8588 		*write = 0;
8589 
8590 		return write;
8591 	}
8592 
translate_table_generate(xpath_allocator * alloc,const char_t * from,const char_t * to)8593 	PUGI__FN unsigned char* translate_table_generate(xpath_allocator* alloc, const char_t* from, const char_t* to)
8594 	{
8595 		unsigned char table[128] = {0};
8596 
8597 		while (*from)
8598 		{
8599 			unsigned int fc = static_cast<unsigned int>(*from);
8600 			unsigned int tc = static_cast<unsigned int>(*to);
8601 
8602 			if (fc >= 128 || tc >= 128)
8603 				return 0;
8604 
8605 			// code=128 means "skip character"
8606 			if (!table[fc])
8607 				table[fc] = static_cast<unsigned char>(tc ? tc : 128);
8608 
8609 			from++;
8610 			if (tc) to++;
8611 		}
8612 
8613 		for (int i = 0; i < 128; ++i)
8614 			if (!table[i])
8615 				table[i] = static_cast<unsigned char>(i);
8616 
8617 		void* result = alloc->allocate(sizeof(table));
8618 		if (!result) return 0;
8619 
8620 		memcpy(result, table, sizeof(table));
8621 
8622 		return static_cast<unsigned char*>(result);
8623 	}
8624 
translate_table(char_t * buffer,const unsigned char * table)8625 	PUGI__FN char_t* translate_table(char_t* buffer, const unsigned char* table)
8626 	{
8627 		char_t* write = buffer;
8628 
8629 		while (*buffer)
8630 		{
8631 			char_t ch = *buffer++;
8632 			unsigned int index = static_cast<unsigned int>(ch);
8633 
8634 			if (index < 128)
8635 			{
8636 				unsigned char code = table[index];
8637 
8638 				// code=128 means "skip character" (table size is 128 so 128 can be a special value)
8639 				// this code skips these characters without extra branches
8640 				*write = static_cast<char_t>(code);
8641 				write += 1 - (code >> 7);
8642 			}
8643 			else
8644 			{
8645 				*write++ = ch;
8646 			}
8647 		}
8648 
8649 		// zero-terminate
8650 		*write = 0;
8651 
8652 		return write;
8653 	}
8654 
is_xpath_attribute(const char_t * name)8655 	inline bool is_xpath_attribute(const char_t* name)
8656 	{
8657 		return !(starts_with(name, PUGIXML_TEXT("xmlns")) && (name[5] == 0 || name[5] == ':'));
8658 	}
8659 
8660 	struct xpath_variable_boolean: xpath_variable
8661 	{
xpath_variable_booleanxpath_variable_boolean8662 		xpath_variable_boolean(): xpath_variable(xpath_type_boolean), value(false)
8663 		{
8664 		}
8665 
8666 		bool value;
8667 		char_t name[1];
8668 	};
8669 
8670 	struct xpath_variable_number: xpath_variable
8671 	{
xpath_variable_numberxpath_variable_number8672 		xpath_variable_number(): xpath_variable(xpath_type_number), value(0)
8673 		{
8674 		}
8675 
8676 		double value;
8677 		char_t name[1];
8678 	};
8679 
8680 	struct xpath_variable_string: xpath_variable
8681 	{
xpath_variable_stringxpath_variable_string8682 		xpath_variable_string(): xpath_variable(xpath_type_string), value(0)
8683 		{
8684 		}
8685 
~xpath_variable_stringxpath_variable_string8686 		~xpath_variable_string()
8687 		{
8688 			if (value) xml_memory::deallocate(value);
8689 		}
8690 
8691 		char_t* value;
8692 		char_t name[1];
8693 	};
8694 
8695 	struct xpath_variable_node_set: xpath_variable
8696 	{
xpath_variable_node_setxpath_variable_node_set8697 		xpath_variable_node_set(): xpath_variable(xpath_type_node_set)
8698 		{
8699 		}
8700 
8701 		xpath_node_set value;
8702 		char_t name[1];
8703 	};
8704 
8705 	static const xpath_node_set dummy_node_set;
8706 
hash_string(const char_t * str)8707 	PUGI__FN PUGI__UNSIGNED_OVERFLOW unsigned int hash_string(const char_t* str)
8708 	{
8709 		// Jenkins one-at-a-time hash (http://en.wikipedia.org/wiki/Jenkins_hash_function#one-at-a-time)
8710 		unsigned int result = 0;
8711 
8712 		while (*str)
8713 		{
8714 			result += static_cast<unsigned int>(*str++);
8715 			result += result << 10;
8716 			result ^= result >> 6;
8717 		}
8718 
8719 		result += result << 3;
8720 		result ^= result >> 11;
8721 		result += result << 15;
8722 
8723 		return result;
8724 	}
8725 
new_xpath_variable(const char_t * name)8726 	template <typename T> PUGI__FN T* new_xpath_variable(const char_t* name)
8727 	{
8728 		size_t length = strlength(name);
8729 		if (length == 0) return 0; // empty variable names are invalid
8730 
8731 		// $$ we can't use offsetof(T, name) because T is non-POD, so we just allocate additional length characters
8732 		void* memory = xml_memory::allocate(sizeof(T) + length * sizeof(char_t));
8733 		if (!memory) return 0;
8734 
8735 		T* result = new (memory) T();
8736 
8737 		memcpy(result->name, name, (length + 1) * sizeof(char_t));
8738 
8739 		return result;
8740 	}
8741 
new_xpath_variable(xpath_value_type type,const char_t * name)8742 	PUGI__FN xpath_variable* new_xpath_variable(xpath_value_type type, const char_t* name)
8743 	{
8744 		switch (type)
8745 		{
8746 		case xpath_type_node_set:
8747 			return new_xpath_variable<xpath_variable_node_set>(name);
8748 
8749 		case xpath_type_number:
8750 			return new_xpath_variable<xpath_variable_number>(name);
8751 
8752 		case xpath_type_string:
8753 			return new_xpath_variable<xpath_variable_string>(name);
8754 
8755 		case xpath_type_boolean:
8756 			return new_xpath_variable<xpath_variable_boolean>(name);
8757 
8758 		default:
8759 			return 0;
8760 		}
8761 	}
8762 
delete_xpath_variable(T * var)8763 	template <typename T> PUGI__FN void delete_xpath_variable(T* var)
8764 	{
8765 		var->~T();
8766 		xml_memory::deallocate(var);
8767 	}
8768 
delete_xpath_variable(xpath_value_type type,xpath_variable * var)8769 	PUGI__FN void delete_xpath_variable(xpath_value_type type, xpath_variable* var)
8770 	{
8771 		switch (type)
8772 		{
8773 		case xpath_type_node_set:
8774 			delete_xpath_variable(static_cast<xpath_variable_node_set*>(var));
8775 			break;
8776 
8777 		case xpath_type_number:
8778 			delete_xpath_variable(static_cast<xpath_variable_number*>(var));
8779 			break;
8780 
8781 		case xpath_type_string:
8782 			delete_xpath_variable(static_cast<xpath_variable_string*>(var));
8783 			break;
8784 
8785 		case xpath_type_boolean:
8786 			delete_xpath_variable(static_cast<xpath_variable_boolean*>(var));
8787 			break;
8788 
8789 		default:
8790 			assert(false && "Invalid variable type"); // unreachable
8791 		}
8792 	}
8793 
copy_xpath_variable(xpath_variable * lhs,const xpath_variable * rhs)8794 	PUGI__FN bool copy_xpath_variable(xpath_variable* lhs, const xpath_variable* rhs)
8795 	{
8796 		switch (rhs->type())
8797 		{
8798 		case xpath_type_node_set:
8799 			return lhs->set(static_cast<const xpath_variable_node_set*>(rhs)->value);
8800 
8801 		case xpath_type_number:
8802 			return lhs->set(static_cast<const xpath_variable_number*>(rhs)->value);
8803 
8804 		case xpath_type_string:
8805 			return lhs->set(static_cast<const xpath_variable_string*>(rhs)->value);
8806 
8807 		case xpath_type_boolean:
8808 			return lhs->set(static_cast<const xpath_variable_boolean*>(rhs)->value);
8809 
8810 		default:
8811 			assert(false && "Invalid variable type"); // unreachable
8812 			return false;
8813 		}
8814 	}
8815 
get_variable_scratch(char_t (& buffer)[32],xpath_variable_set * set,const char_t * begin,const char_t * end,xpath_variable ** out_result)8816 	PUGI__FN bool get_variable_scratch(char_t (&buffer)[32], xpath_variable_set* set, const char_t* begin, const char_t* end, xpath_variable** out_result)
8817 	{
8818 		size_t length = static_cast<size_t>(end - begin);
8819 		char_t* scratch = buffer;
8820 
8821 		if (length >= sizeof(buffer) / sizeof(buffer[0]))
8822 		{
8823 			// need to make dummy on-heap copy
8824 			scratch = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
8825 			if (!scratch) return false;
8826 		}
8827 
8828 		// copy string to zero-terminated buffer and perform lookup
8829 		memcpy(scratch, begin, length * sizeof(char_t));
8830 		scratch[length] = 0;
8831 
8832 		*out_result = set->get(scratch);
8833 
8834 		// free dummy buffer
8835 		if (scratch != buffer) xml_memory::deallocate(scratch);
8836 
8837 		return true;
8838 	}
8839 PUGI__NS_END
8840 
8841 // Internal node set class
8842 PUGI__NS_BEGIN
xpath_get_order(const xpath_node * begin,const xpath_node * end)8843 	PUGI__FN xpath_node_set::type_t xpath_get_order(const xpath_node* begin, const xpath_node* end)
8844 	{
8845 		if (end - begin < 2)
8846 			return xpath_node_set::type_sorted;
8847 
8848 		document_order_comparator cmp;
8849 
8850 		bool first = cmp(begin[0], begin[1]);
8851 
8852 		for (const xpath_node* it = begin + 1; it + 1 < end; ++it)
8853 			if (cmp(it[0], it[1]) != first)
8854 				return xpath_node_set::type_unsorted;
8855 
8856 		return first ? xpath_node_set::type_sorted : xpath_node_set::type_sorted_reverse;
8857 	}
8858 
xpath_sort(xpath_node * begin,xpath_node * end,xpath_node_set::type_t type,bool rev)8859 	PUGI__FN xpath_node_set::type_t xpath_sort(xpath_node* begin, xpath_node* end, xpath_node_set::type_t type, bool rev)
8860 	{
8861 		xpath_node_set::type_t order = rev ? xpath_node_set::type_sorted_reverse : xpath_node_set::type_sorted;
8862 
8863 		if (type == xpath_node_set::type_unsorted)
8864 		{
8865 			xpath_node_set::type_t sorted = xpath_get_order(begin, end);
8866 
8867 			if (sorted == xpath_node_set::type_unsorted)
8868 			{
8869 				sort(begin, end, document_order_comparator());
8870 
8871 				type = xpath_node_set::type_sorted;
8872 			}
8873 			else
8874 				type = sorted;
8875 		}
8876 
8877 		if (type != order) reverse(begin, end);
8878 
8879 		return order;
8880 	}
8881 
xpath_first(const xpath_node * begin,const xpath_node * end,xpath_node_set::type_t type)8882 	PUGI__FN xpath_node xpath_first(const xpath_node* begin, const xpath_node* end, xpath_node_set::type_t type)
8883 	{
8884 		if (begin == end) return xpath_node();
8885 
8886 		switch (type)
8887 		{
8888 		case xpath_node_set::type_sorted:
8889 			return *begin;
8890 
8891 		case xpath_node_set::type_sorted_reverse:
8892 			return *(end - 1);
8893 
8894 		case xpath_node_set::type_unsorted:
8895 			return *min_element(begin, end, document_order_comparator());
8896 
8897 		default:
8898 			assert(false && "Invalid node set type"); // unreachable
8899 			return xpath_node();
8900 		}
8901 	}
8902 
8903 	class xpath_node_set_raw
8904 	{
8905 		xpath_node_set::type_t _type;
8906 
8907 		xpath_node* _begin;
8908 		xpath_node* _end;
8909 		xpath_node* _eos;
8910 
8911 	public:
xpath_node_set_raw()8912 		xpath_node_set_raw(): _type(xpath_node_set::type_unsorted), _begin(0), _end(0), _eos(0)
8913 		{
8914 		}
8915 
begin() const8916 		xpath_node* begin() const
8917 		{
8918 			return _begin;
8919 		}
8920 
end() const8921 		xpath_node* end() const
8922 		{
8923 			return _end;
8924 		}
8925 
empty() const8926 		bool empty() const
8927 		{
8928 			return _begin == _end;
8929 		}
8930 
size() const8931 		size_t size() const
8932 		{
8933 			return static_cast<size_t>(_end - _begin);
8934 		}
8935 
first() const8936 		xpath_node first() const
8937 		{
8938 			return xpath_first(_begin, _end, _type);
8939 		}
8940 
8941 		void push_back_grow(const xpath_node& node, xpath_allocator* alloc);
8942 
push_back(const xpath_node & node,xpath_allocator * alloc)8943 		void push_back(const xpath_node& node, xpath_allocator* alloc)
8944 		{
8945 			if (_end != _eos)
8946 				*_end++ = node;
8947 			else
8948 				push_back_grow(node, alloc);
8949 		}
8950 
append(const xpath_node * begin_,const xpath_node * end_,xpath_allocator * alloc)8951 		void append(const xpath_node* begin_, const xpath_node* end_, xpath_allocator* alloc)
8952 		{
8953 			if (begin_ == end_) return;
8954 
8955 			size_t size_ = static_cast<size_t>(_end - _begin);
8956 			size_t capacity = static_cast<size_t>(_eos - _begin);
8957 			size_t count = static_cast<size_t>(end_ - begin_);
8958 
8959 			if (size_ + count > capacity)
8960 			{
8961 				// reallocate the old array or allocate a new one
8962 				xpath_node* data = static_cast<xpath_node*>(alloc->reallocate(_begin, capacity * sizeof(xpath_node), (size_ + count) * sizeof(xpath_node)));
8963 				if (!data) return;
8964 
8965 				// finalize
8966 				_begin = data;
8967 				_end = data + size_;
8968 				_eos = data + size_ + count;
8969 			}
8970 
8971 			memcpy(_end, begin_, count * sizeof(xpath_node));
8972 			_end += count;
8973 		}
8974 
sort_do()8975 		void sort_do()
8976 		{
8977 			_type = xpath_sort(_begin, _end, _type, false);
8978 		}
8979 
truncate(xpath_node * pos)8980 		void truncate(xpath_node* pos)
8981 		{
8982 			assert(_begin <= pos && pos <= _end);
8983 
8984 			_end = pos;
8985 		}
8986 
remove_duplicates(xpath_allocator * alloc)8987 		void remove_duplicates(xpath_allocator* alloc)
8988 		{
8989 			if (_type == xpath_node_set::type_unsorted && _end - _begin > 2)
8990 			{
8991 				xpath_allocator_capture cr(alloc);
8992 
8993 				size_t size_ = static_cast<size_t>(_end - _begin);
8994 
8995 				size_t hash_size = 1;
8996 				while (hash_size < size_ + size_ / 2) hash_size *= 2;
8997 
8998 				const void** hash_data = static_cast<const void**>(alloc->allocate(hash_size * sizeof(void**)));
8999 				if (!hash_data) return;
9000 
9001 				memset(hash_data, 0, hash_size * sizeof(const void**));
9002 
9003 				xpath_node* write = _begin;
9004 
9005 				for (xpath_node* it = _begin; it != _end; ++it)
9006 				{
9007 					const void* attr = it->attribute().internal_object();
9008 					const void* node = it->node().internal_object();
9009 					const void* key = attr ? attr : node;
9010 
9011 					if (key && hash_insert(hash_data, hash_size, key))
9012 					{
9013 						*write++ = *it;
9014 					}
9015 				}
9016 
9017 				_end = write;
9018 			}
9019 			else
9020 			{
9021 				_end = unique(_begin, _end);
9022 			}
9023 		}
9024 
type() const9025 		xpath_node_set::type_t type() const
9026 		{
9027 			return _type;
9028 		}
9029 
set_type(xpath_node_set::type_t value)9030 		void set_type(xpath_node_set::type_t value)
9031 		{
9032 			_type = value;
9033 		}
9034 	};
9035 
push_back_grow(const xpath_node & node,xpath_allocator * alloc)9036 	PUGI__FN_NO_INLINE void xpath_node_set_raw::push_back_grow(const xpath_node& node, xpath_allocator* alloc)
9037 	{
9038 		size_t capacity = static_cast<size_t>(_eos - _begin);
9039 
9040 		// get new capacity (1.5x rule)
9041 		size_t new_capacity = capacity + capacity / 2 + 1;
9042 
9043 		// reallocate the old array or allocate a new one
9044 		xpath_node* data = static_cast<xpath_node*>(alloc->reallocate(_begin, capacity * sizeof(xpath_node), new_capacity * sizeof(xpath_node)));
9045 		if (!data) return;
9046 
9047 		// finalize
9048 		_begin = data;
9049 		_end = data + capacity;
9050 		_eos = data + new_capacity;
9051 
9052 		// push
9053 		*_end++ = node;
9054 	}
9055 PUGI__NS_END
9056 
9057 PUGI__NS_BEGIN
9058 	struct xpath_context
9059 	{
9060 		xpath_node n;
9061 		size_t position, size;
9062 
xpath_contextxpath_context9063 		xpath_context(const xpath_node& n_, size_t position_, size_t size_): n(n_), position(position_), size(size_)
9064 		{
9065 		}
9066 	};
9067 
9068 	enum lexeme_t
9069 	{
9070 		lex_none = 0,
9071 		lex_equal,
9072 		lex_not_equal,
9073 		lex_less,
9074 		lex_greater,
9075 		lex_less_or_equal,
9076 		lex_greater_or_equal,
9077 		lex_plus,
9078 		lex_minus,
9079 		lex_multiply,
9080 		lex_union,
9081 		lex_var_ref,
9082 		lex_open_brace,
9083 		lex_close_brace,
9084 		lex_quoted_string,
9085 		lex_number,
9086 		lex_slash,
9087 		lex_double_slash,
9088 		lex_open_square_brace,
9089 		lex_close_square_brace,
9090 		lex_string,
9091 		lex_comma,
9092 		lex_axis_attribute,
9093 		lex_dot,
9094 		lex_double_dot,
9095 		lex_double_colon,
9096 		lex_eof
9097 	};
9098 
9099 	struct xpath_lexer_string
9100 	{
9101 		const char_t* begin;
9102 		const char_t* end;
9103 
xpath_lexer_stringxpath_lexer_string9104 		xpath_lexer_string(): begin(0), end(0)
9105 		{
9106 		}
9107 
operator ==xpath_lexer_string9108 		bool operator==(const char_t* other) const
9109 		{
9110 			size_t length = static_cast<size_t>(end - begin);
9111 
9112 			return strequalrange(other, begin, length);
9113 		}
9114 	};
9115 
9116 	class xpath_lexer
9117 	{
9118 		const char_t* _cur;
9119 		const char_t* _cur_lexeme_pos;
9120 		xpath_lexer_string _cur_lexeme_contents;
9121 
9122 		lexeme_t _cur_lexeme;
9123 
9124 	public:
xpath_lexer(const char_t * query)9125 		explicit xpath_lexer(const char_t* query): _cur(query)
9126 		{
9127 			next();
9128 		}
9129 
state() const9130 		const char_t* state() const
9131 		{
9132 			return _cur;
9133 		}
9134 
next()9135 		void next()
9136 		{
9137 			const char_t* cur = _cur;
9138 
9139 			while (PUGI__IS_CHARTYPE(*cur, ct_space)) ++cur;
9140 
9141 			// save lexeme position for error reporting
9142 			_cur_lexeme_pos = cur;
9143 
9144 			switch (*cur)
9145 			{
9146 			case 0:
9147 				_cur_lexeme = lex_eof;
9148 				break;
9149 
9150 			case '>':
9151 				if (*(cur+1) == '=')
9152 				{
9153 					cur += 2;
9154 					_cur_lexeme = lex_greater_or_equal;
9155 				}
9156 				else
9157 				{
9158 					cur += 1;
9159 					_cur_lexeme = lex_greater;
9160 				}
9161 				break;
9162 
9163 			case '<':
9164 				if (*(cur+1) == '=')
9165 				{
9166 					cur += 2;
9167 					_cur_lexeme = lex_less_or_equal;
9168 				}
9169 				else
9170 				{
9171 					cur += 1;
9172 					_cur_lexeme = lex_less;
9173 				}
9174 				break;
9175 
9176 			case '!':
9177 				if (*(cur+1) == '=')
9178 				{
9179 					cur += 2;
9180 					_cur_lexeme = lex_not_equal;
9181 				}
9182 				else
9183 				{
9184 					_cur_lexeme = lex_none;
9185 				}
9186 				break;
9187 
9188 			case '=':
9189 				cur += 1;
9190 				_cur_lexeme = lex_equal;
9191 
9192 				break;
9193 
9194 			case '+':
9195 				cur += 1;
9196 				_cur_lexeme = lex_plus;
9197 
9198 				break;
9199 
9200 			case '-':
9201 				cur += 1;
9202 				_cur_lexeme = lex_minus;
9203 
9204 				break;
9205 
9206 			case '*':
9207 				cur += 1;
9208 				_cur_lexeme = lex_multiply;
9209 
9210 				break;
9211 
9212 			case '|':
9213 				cur += 1;
9214 				_cur_lexeme = lex_union;
9215 
9216 				break;
9217 
9218 			case '$':
9219 				cur += 1;
9220 
9221 				if (PUGI__IS_CHARTYPEX(*cur, ctx_start_symbol))
9222 				{
9223 					_cur_lexeme_contents.begin = cur;
9224 
9225 					while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
9226 
9227 					if (cur[0] == ':' && PUGI__IS_CHARTYPEX(cur[1], ctx_symbol)) // qname
9228 					{
9229 						cur++; // :
9230 
9231 						while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
9232 					}
9233 
9234 					_cur_lexeme_contents.end = cur;
9235 
9236 					_cur_lexeme = lex_var_ref;
9237 				}
9238 				else
9239 				{
9240 					_cur_lexeme = lex_none;
9241 				}
9242 
9243 				break;
9244 
9245 			case '(':
9246 				cur += 1;
9247 				_cur_lexeme = lex_open_brace;
9248 
9249 				break;
9250 
9251 			case ')':
9252 				cur += 1;
9253 				_cur_lexeme = lex_close_brace;
9254 
9255 				break;
9256 
9257 			case '[':
9258 				cur += 1;
9259 				_cur_lexeme = lex_open_square_brace;
9260 
9261 				break;
9262 
9263 			case ']':
9264 				cur += 1;
9265 				_cur_lexeme = lex_close_square_brace;
9266 
9267 				break;
9268 
9269 			case ',':
9270 				cur += 1;
9271 				_cur_lexeme = lex_comma;
9272 
9273 				break;
9274 
9275 			case '/':
9276 				if (*(cur+1) == '/')
9277 				{
9278 					cur += 2;
9279 					_cur_lexeme = lex_double_slash;
9280 				}
9281 				else
9282 				{
9283 					cur += 1;
9284 					_cur_lexeme = lex_slash;
9285 				}
9286 				break;
9287 
9288 			case '.':
9289 				if (*(cur+1) == '.')
9290 				{
9291 					cur += 2;
9292 					_cur_lexeme = lex_double_dot;
9293 				}
9294 				else if (PUGI__IS_CHARTYPEX(*(cur+1), ctx_digit))
9295 				{
9296 					_cur_lexeme_contents.begin = cur; // .
9297 
9298 					++cur;
9299 
9300 					while (PUGI__IS_CHARTYPEX(*cur, ctx_digit)) cur++;
9301 
9302 					_cur_lexeme_contents.end = cur;
9303 
9304 					_cur_lexeme = lex_number;
9305 				}
9306 				else
9307 				{
9308 					cur += 1;
9309 					_cur_lexeme = lex_dot;
9310 				}
9311 				break;
9312 
9313 			case '@':
9314 				cur += 1;
9315 				_cur_lexeme = lex_axis_attribute;
9316 
9317 				break;
9318 
9319 			case '"':
9320 			case '\'':
9321 			{
9322 				char_t terminator = *cur;
9323 
9324 				++cur;
9325 
9326 				_cur_lexeme_contents.begin = cur;
9327 				while (*cur && *cur != terminator) cur++;
9328 				_cur_lexeme_contents.end = cur;
9329 
9330 				if (!*cur)
9331 					_cur_lexeme = lex_none;
9332 				else
9333 				{
9334 					cur += 1;
9335 					_cur_lexeme = lex_quoted_string;
9336 				}
9337 
9338 				break;
9339 			}
9340 
9341 			case ':':
9342 				if (*(cur+1) == ':')
9343 				{
9344 					cur += 2;
9345 					_cur_lexeme = lex_double_colon;
9346 				}
9347 				else
9348 				{
9349 					_cur_lexeme = lex_none;
9350 				}
9351 				break;
9352 
9353 			default:
9354 				if (PUGI__IS_CHARTYPEX(*cur, ctx_digit))
9355 				{
9356 					_cur_lexeme_contents.begin = cur;
9357 
9358 					while (PUGI__IS_CHARTYPEX(*cur, ctx_digit)) cur++;
9359 
9360 					if (*cur == '.')
9361 					{
9362 						cur++;
9363 
9364 						while (PUGI__IS_CHARTYPEX(*cur, ctx_digit)) cur++;
9365 					}
9366 
9367 					_cur_lexeme_contents.end = cur;
9368 
9369 					_cur_lexeme = lex_number;
9370 				}
9371 				else if (PUGI__IS_CHARTYPEX(*cur, ctx_start_symbol))
9372 				{
9373 					_cur_lexeme_contents.begin = cur;
9374 
9375 					while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
9376 
9377 					if (cur[0] == ':')
9378 					{
9379 						if (cur[1] == '*') // namespace test ncname:*
9380 						{
9381 							cur += 2; // :*
9382 						}
9383 						else if (PUGI__IS_CHARTYPEX(cur[1], ctx_symbol)) // namespace test qname
9384 						{
9385 							cur++; // :
9386 
9387 							while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
9388 						}
9389 					}
9390 
9391 					_cur_lexeme_contents.end = cur;
9392 
9393 					_cur_lexeme = lex_string;
9394 				}
9395 				else
9396 				{
9397 					_cur_lexeme = lex_none;
9398 				}
9399 			}
9400 
9401 			_cur = cur;
9402 		}
9403 
current() const9404 		lexeme_t current() const
9405 		{
9406 			return _cur_lexeme;
9407 		}
9408 
current_pos() const9409 		const char_t* current_pos() const
9410 		{
9411 			return _cur_lexeme_pos;
9412 		}
9413 
contents() const9414 		const xpath_lexer_string& contents() const
9415 		{
9416 			assert(_cur_lexeme == lex_var_ref || _cur_lexeme == lex_number || _cur_lexeme == lex_string || _cur_lexeme == lex_quoted_string);
9417 
9418 			return _cur_lexeme_contents;
9419 		}
9420 	};
9421 
9422 	enum ast_type_t
9423 	{
9424 		ast_unknown,
9425 		ast_op_or,						// left or right
9426 		ast_op_and,						// left and right
9427 		ast_op_equal,					// left = right
9428 		ast_op_not_equal,				// left != right
9429 		ast_op_less,					// left < right
9430 		ast_op_greater,					// left > right
9431 		ast_op_less_or_equal,			// left <= right
9432 		ast_op_greater_or_equal,		// left >= right
9433 		ast_op_add,						// left + right
9434 		ast_op_subtract,				// left - right
9435 		ast_op_multiply,				// left * right
9436 		ast_op_divide,					// left / right
9437 		ast_op_mod,						// left % right
9438 		ast_op_negate,					// left - right
9439 		ast_op_union,					// left | right
9440 		ast_predicate,					// apply predicate to set; next points to next predicate
9441 		ast_filter,						// select * from left where right
9442 		ast_string_constant,			// string constant
9443 		ast_number_constant,			// number constant
9444 		ast_variable,					// variable
9445 		ast_func_last,					// last()
9446 		ast_func_position,				// position()
9447 		ast_func_count,					// count(left)
9448 		ast_func_id,					// id(left)
9449 		ast_func_local_name_0,			// local-name()
9450 		ast_func_local_name_1,			// local-name(left)
9451 		ast_func_namespace_uri_0,		// namespace-uri()
9452 		ast_func_namespace_uri_1,		// namespace-uri(left)
9453 		ast_func_name_0,				// name()
9454 		ast_func_name_1,				// name(left)
9455 		ast_func_string_0,				// string()
9456 		ast_func_string_1,				// string(left)
9457 		ast_func_concat,				// concat(left, right, siblings)
9458 		ast_func_starts_with,			// starts_with(left, right)
9459 		ast_func_contains,				// contains(left, right)
9460 		ast_func_substring_before,		// substring-before(left, right)
9461 		ast_func_substring_after,		// substring-after(left, right)
9462 		ast_func_substring_2,			// substring(left, right)
9463 		ast_func_substring_3,			// substring(left, right, third)
9464 		ast_func_string_length_0,		// string-length()
9465 		ast_func_string_length_1,		// string-length(left)
9466 		ast_func_normalize_space_0,		// normalize-space()
9467 		ast_func_normalize_space_1,		// normalize-space(left)
9468 		ast_func_translate,				// translate(left, right, third)
9469 		ast_func_boolean,				// boolean(left)
9470 		ast_func_not,					// not(left)
9471 		ast_func_true,					// true()
9472 		ast_func_false,					// false()
9473 		ast_func_lang,					// lang(left)
9474 		ast_func_number_0,				// number()
9475 		ast_func_number_1,				// number(left)
9476 		ast_func_sum,					// sum(left)
9477 		ast_func_floor,					// floor(left)
9478 		ast_func_ceiling,				// ceiling(left)
9479 		ast_func_round,					// round(left)
9480 		ast_step,						// process set left with step
9481 		ast_step_root,					// select root node
9482 
9483 		ast_opt_translate_table,		// translate(left, right, third) where right/third are constants
9484 		ast_opt_compare_attribute		// @name = 'string'
9485 	};
9486 
9487 	enum axis_t
9488 	{
9489 		axis_ancestor,
9490 		axis_ancestor_or_self,
9491 		axis_attribute,
9492 		axis_child,
9493 		axis_descendant,
9494 		axis_descendant_or_self,
9495 		axis_following,
9496 		axis_following_sibling,
9497 		axis_namespace,
9498 		axis_parent,
9499 		axis_preceding,
9500 		axis_preceding_sibling,
9501 		axis_self
9502 	};
9503 
9504 	enum nodetest_t
9505 	{
9506 		nodetest_none,
9507 		nodetest_name,
9508 		nodetest_type_node,
9509 		nodetest_type_comment,
9510 		nodetest_type_pi,
9511 		nodetest_type_text,
9512 		nodetest_pi,
9513 		nodetest_all,
9514 		nodetest_all_in_namespace
9515 	};
9516 
9517 	enum predicate_t
9518 	{
9519 		predicate_default,
9520 		predicate_posinv,
9521 		predicate_constant,
9522 		predicate_constant_one
9523 	};
9524 
9525 	enum nodeset_eval_t
9526 	{
9527 		nodeset_eval_all,
9528 		nodeset_eval_any,
9529 		nodeset_eval_first
9530 	};
9531 
9532 	template <axis_t N> struct axis_to_type
9533 	{
9534 		static const axis_t axis;
9535 	};
9536 
9537 	template <axis_t N> const axis_t axis_to_type<N>::axis = N;
9538 
9539 	class xpath_ast_node
9540 	{
9541 	private:
9542 		// node type
9543 		char _type;
9544 		char _rettype;
9545 
9546 		// for ast_step
9547 		char _axis;
9548 
9549 		// for ast_step/ast_predicate/ast_filter
9550 		char _test;
9551 
9552 		// tree node structure
9553 		xpath_ast_node* _left;
9554 		xpath_ast_node* _right;
9555 		xpath_ast_node* _next;
9556 
9557 		union
9558 		{
9559 			// value for ast_string_constant
9560 			const char_t* string;
9561 			// value for ast_number_constant
9562 			double number;
9563 			// variable for ast_variable
9564 			xpath_variable* variable;
9565 			// node test for ast_step (node name/namespace/node type/pi target)
9566 			const char_t* nodetest;
9567 			// table for ast_opt_translate_table
9568 			const unsigned char* table;
9569 		} _data;
9570 
9571 		xpath_ast_node(const xpath_ast_node&);
9572 		xpath_ast_node& operator=(const xpath_ast_node&);
9573 
compare_eq(xpath_ast_node * lhs,xpath_ast_node * rhs,const xpath_context & c,const xpath_stack & stack,const Comp & comp)9574 		template <class Comp> static bool compare_eq(xpath_ast_node* lhs, xpath_ast_node* rhs, const xpath_context& c, const xpath_stack& stack, const Comp& comp)
9575 		{
9576 			xpath_value_type lt = lhs->rettype(), rt = rhs->rettype();
9577 
9578 			if (lt != xpath_type_node_set && rt != xpath_type_node_set)
9579 			{
9580 				if (lt == xpath_type_boolean || rt == xpath_type_boolean)
9581 					return comp(lhs->eval_boolean(c, stack), rhs->eval_boolean(c, stack));
9582 				else if (lt == xpath_type_number || rt == xpath_type_number)
9583 					return comp(lhs->eval_number(c, stack), rhs->eval_number(c, stack));
9584 				else if (lt == xpath_type_string || rt == xpath_type_string)
9585 				{
9586 					xpath_allocator_capture cr(stack.result);
9587 
9588 					xpath_string ls = lhs->eval_string(c, stack);
9589 					xpath_string rs = rhs->eval_string(c, stack);
9590 
9591 					return comp(ls, rs);
9592 				}
9593 			}
9594 			else if (lt == xpath_type_node_set && rt == xpath_type_node_set)
9595 			{
9596 				xpath_allocator_capture cr(stack.result);
9597 
9598 				xpath_node_set_raw ls = lhs->eval_node_set(c, stack, nodeset_eval_all);
9599 				xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all);
9600 
9601 				for (const xpath_node* li = ls.begin(); li != ls.end(); ++li)
9602 					for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
9603 					{
9604 						xpath_allocator_capture cri(stack.result);
9605 
9606 						if (comp(string_value(*li, stack.result), string_value(*ri, stack.result)))
9607 							return true;
9608 					}
9609 
9610 				return false;
9611 			}
9612 			else
9613 			{
9614 				if (lt == xpath_type_node_set)
9615 				{
9616 					swap(lhs, rhs);
9617 					swap(lt, rt);
9618 				}
9619 
9620 				if (lt == xpath_type_boolean)
9621 					return comp(lhs->eval_boolean(c, stack), rhs->eval_boolean(c, stack));
9622 				else if (lt == xpath_type_number)
9623 				{
9624 					xpath_allocator_capture cr(stack.result);
9625 
9626 					double l = lhs->eval_number(c, stack);
9627 					xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all);
9628 
9629 					for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
9630 					{
9631 						xpath_allocator_capture cri(stack.result);
9632 
9633 						if (comp(l, convert_string_to_number(string_value(*ri, stack.result).c_str())))
9634 							return true;
9635 					}
9636 
9637 					return false;
9638 				}
9639 				else if (lt == xpath_type_string)
9640 				{
9641 					xpath_allocator_capture cr(stack.result);
9642 
9643 					xpath_string l = lhs->eval_string(c, stack);
9644 					xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all);
9645 
9646 					for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
9647 					{
9648 						xpath_allocator_capture cri(stack.result);
9649 
9650 						if (comp(l, string_value(*ri, stack.result)))
9651 							return true;
9652 					}
9653 
9654 					return false;
9655 				}
9656 			}
9657 
9658 			assert(false && "Wrong types"); // unreachable
9659 			return false;
9660 		}
9661 
eval_once(xpath_node_set::type_t type,nodeset_eval_t eval)9662 		static bool eval_once(xpath_node_set::type_t type, nodeset_eval_t eval)
9663 		{
9664 			return type == xpath_node_set::type_sorted ? eval != nodeset_eval_all : eval == nodeset_eval_any;
9665 		}
9666 
compare_rel(xpath_ast_node * lhs,xpath_ast_node * rhs,const xpath_context & c,const xpath_stack & stack,const Comp & comp)9667 		template <class Comp> static bool compare_rel(xpath_ast_node* lhs, xpath_ast_node* rhs, const xpath_context& c, const xpath_stack& stack, const Comp& comp)
9668 		{
9669 			xpath_value_type lt = lhs->rettype(), rt = rhs->rettype();
9670 
9671 			if (lt != xpath_type_node_set && rt != xpath_type_node_set)
9672 				return comp(lhs->eval_number(c, stack), rhs->eval_number(c, stack));
9673 			else if (lt == xpath_type_node_set && rt == xpath_type_node_set)
9674 			{
9675 				xpath_allocator_capture cr(stack.result);
9676 
9677 				xpath_node_set_raw ls = lhs->eval_node_set(c, stack, nodeset_eval_all);
9678 				xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all);
9679 
9680 				for (const xpath_node* li = ls.begin(); li != ls.end(); ++li)
9681 				{
9682 					xpath_allocator_capture cri(stack.result);
9683 
9684 					double l = convert_string_to_number(string_value(*li, stack.result).c_str());
9685 
9686 					for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
9687 					{
9688 						xpath_allocator_capture crii(stack.result);
9689 
9690 						if (comp(l, convert_string_to_number(string_value(*ri, stack.result).c_str())))
9691 							return true;
9692 					}
9693 				}
9694 
9695 				return false;
9696 			}
9697 			else if (lt != xpath_type_node_set && rt == xpath_type_node_set)
9698 			{
9699 				xpath_allocator_capture cr(stack.result);
9700 
9701 				double l = lhs->eval_number(c, stack);
9702 				xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all);
9703 
9704 				for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
9705 				{
9706 					xpath_allocator_capture cri(stack.result);
9707 
9708 					if (comp(l, convert_string_to_number(string_value(*ri, stack.result).c_str())))
9709 						return true;
9710 				}
9711 
9712 				return false;
9713 			}
9714 			else if (lt == xpath_type_node_set && rt != xpath_type_node_set)
9715 			{
9716 				xpath_allocator_capture cr(stack.result);
9717 
9718 				xpath_node_set_raw ls = lhs->eval_node_set(c, stack, nodeset_eval_all);
9719 				double r = rhs->eval_number(c, stack);
9720 
9721 				for (const xpath_node* li = ls.begin(); li != ls.end(); ++li)
9722 				{
9723 					xpath_allocator_capture cri(stack.result);
9724 
9725 					if (comp(convert_string_to_number(string_value(*li, stack.result).c_str()), r))
9726 						return true;
9727 				}
9728 
9729 				return false;
9730 			}
9731 			else
9732 			{
9733 				assert(false && "Wrong types"); // unreachable
9734 				return false;
9735 			}
9736 		}
9737 
apply_predicate_boolean(xpath_node_set_raw & ns,size_t first,xpath_ast_node * expr,const xpath_stack & stack,bool once)9738 		static void apply_predicate_boolean(xpath_node_set_raw& ns, size_t first, xpath_ast_node* expr, const xpath_stack& stack, bool once)
9739 		{
9740 			assert(ns.size() >= first);
9741 			assert(expr->rettype() != xpath_type_number);
9742 
9743 			size_t i = 1;
9744 			size_t size = ns.size() - first;
9745 
9746 			xpath_node* last = ns.begin() + first;
9747 
9748 			// remove_if... or well, sort of
9749 			for (xpath_node* it = last; it != ns.end(); ++it, ++i)
9750 			{
9751 				xpath_context c(*it, i, size);
9752 
9753 				if (expr->eval_boolean(c, stack))
9754 				{
9755 					*last++ = *it;
9756 
9757 					if (once) break;
9758 				}
9759 			}
9760 
9761 			ns.truncate(last);
9762 		}
9763 
apply_predicate_number(xpath_node_set_raw & ns,size_t first,xpath_ast_node * expr,const xpath_stack & stack,bool once)9764 		static void apply_predicate_number(xpath_node_set_raw& ns, size_t first, xpath_ast_node* expr, const xpath_stack& stack, bool once)
9765 		{
9766 			assert(ns.size() >= first);
9767 			assert(expr->rettype() == xpath_type_number);
9768 
9769 			size_t i = 1;
9770 			size_t size = ns.size() - first;
9771 
9772 			xpath_node* last = ns.begin() + first;
9773 
9774 			// remove_if... or well, sort of
9775 			for (xpath_node* it = last; it != ns.end(); ++it, ++i)
9776 			{
9777 				xpath_context c(*it, i, size);
9778 
9779 				if (expr->eval_number(c, stack) == static_cast<double>(i))
9780 				{
9781 					*last++ = *it;
9782 
9783 					if (once) break;
9784 				}
9785 			}
9786 
9787 			ns.truncate(last);
9788 		}
9789 
apply_predicate_number_const(xpath_node_set_raw & ns,size_t first,xpath_ast_node * expr,const xpath_stack & stack)9790 		static void apply_predicate_number_const(xpath_node_set_raw& ns, size_t first, xpath_ast_node* expr, const xpath_stack& stack)
9791 		{
9792 			assert(ns.size() >= first);
9793 			assert(expr->rettype() == xpath_type_number);
9794 
9795 			size_t size = ns.size() - first;
9796 
9797 			xpath_node* last = ns.begin() + first;
9798 
9799 			xpath_context c(xpath_node(), 1, size);
9800 
9801 			double er = expr->eval_number(c, stack);
9802 
9803 			if (er >= 1.0 && er <= static_cast<double>(size))
9804 			{
9805 				size_t eri = static_cast<size_t>(er);
9806 
9807 				if (er == static_cast<double>(eri))
9808 				{
9809 					xpath_node r = last[eri - 1];
9810 
9811 					*last++ = r;
9812 				}
9813 			}
9814 
9815 			ns.truncate(last);
9816 		}
9817 
apply_predicate(xpath_node_set_raw & ns,size_t first,const xpath_stack & stack,bool once)9818 		void apply_predicate(xpath_node_set_raw& ns, size_t first, const xpath_stack& stack, bool once)
9819 		{
9820 			if (ns.size() == first) return;
9821 
9822 			assert(_type == ast_filter || _type == ast_predicate);
9823 
9824 			if (_test == predicate_constant || _test == predicate_constant_one)
9825 				apply_predicate_number_const(ns, first, _right, stack);
9826 			else if (_right->rettype() == xpath_type_number)
9827 				apply_predicate_number(ns, first, _right, stack, once);
9828 			else
9829 				apply_predicate_boolean(ns, first, _right, stack, once);
9830 		}
9831 
apply_predicates(xpath_node_set_raw & ns,size_t first,const xpath_stack & stack,nodeset_eval_t eval)9832 		void apply_predicates(xpath_node_set_raw& ns, size_t first, const xpath_stack& stack, nodeset_eval_t eval)
9833 		{
9834 			if (ns.size() == first) return;
9835 
9836 			bool last_once = eval_once(ns.type(), eval);
9837 
9838 			for (xpath_ast_node* pred = _right; pred; pred = pred->_next)
9839 				pred->apply_predicate(ns, first, stack, !pred->_next && last_once);
9840 		}
9841 
step_push(xpath_node_set_raw & ns,xml_attribute_struct * a,xml_node_struct * parent,xpath_allocator * alloc)9842 		bool step_push(xpath_node_set_raw& ns, xml_attribute_struct* a, xml_node_struct* parent, xpath_allocator* alloc)
9843 		{
9844 			assert(a);
9845 
9846 			const char_t* name = a->name ? a->name + 0 : PUGIXML_TEXT("");
9847 
9848 			switch (_test)
9849 			{
9850 			case nodetest_name:
9851 				if (strequal(name, _data.nodetest) && is_xpath_attribute(name))
9852 				{
9853 					ns.push_back(xpath_node(xml_attribute(a), xml_node(parent)), alloc);
9854 					return true;
9855 				}
9856 				break;
9857 
9858 			case nodetest_type_node:
9859 			case nodetest_all:
9860 				if (is_xpath_attribute(name))
9861 				{
9862 					ns.push_back(xpath_node(xml_attribute(a), xml_node(parent)), alloc);
9863 					return true;
9864 				}
9865 				break;
9866 
9867 			case nodetest_all_in_namespace:
9868 				if (starts_with(name, _data.nodetest) && is_xpath_attribute(name))
9869 				{
9870 					ns.push_back(xpath_node(xml_attribute(a), xml_node(parent)), alloc);
9871 					return true;
9872 				}
9873 				break;
9874 
9875 			default:
9876 				;
9877 			}
9878 
9879 			return false;
9880 		}
9881 
step_push(xpath_node_set_raw & ns,xml_node_struct * n,xpath_allocator * alloc)9882 		bool step_push(xpath_node_set_raw& ns, xml_node_struct* n, xpath_allocator* alloc)
9883 		{
9884 			assert(n);
9885 
9886 			xml_node_type type = PUGI__NODETYPE(n);
9887 
9888 			switch (_test)
9889 			{
9890 			case nodetest_name:
9891 				if (type == node_element && n->name && strequal(n->name, _data.nodetest))
9892 				{
9893 					ns.push_back(xml_node(n), alloc);
9894 					return true;
9895 				}
9896 				break;
9897 
9898 			case nodetest_type_node:
9899 				ns.push_back(xml_node(n), alloc);
9900 				return true;
9901 
9902 			case nodetest_type_comment:
9903 				if (type == node_comment)
9904 				{
9905 					ns.push_back(xml_node(n), alloc);
9906 					return true;
9907 				}
9908 				break;
9909 
9910 			case nodetest_type_text:
9911 				if (type == node_pcdata || type == node_cdata)
9912 				{
9913 					ns.push_back(xml_node(n), alloc);
9914 					return true;
9915 				}
9916 				break;
9917 
9918 			case nodetest_type_pi:
9919 				if (type == node_pi)
9920 				{
9921 					ns.push_back(xml_node(n), alloc);
9922 					return true;
9923 				}
9924 				break;
9925 
9926 			case nodetest_pi:
9927 				if (type == node_pi && n->name && strequal(n->name, _data.nodetest))
9928 				{
9929 					ns.push_back(xml_node(n), alloc);
9930 					return true;
9931 				}
9932 				break;
9933 
9934 			case nodetest_all:
9935 				if (type == node_element)
9936 				{
9937 					ns.push_back(xml_node(n), alloc);
9938 					return true;
9939 				}
9940 				break;
9941 
9942 			case nodetest_all_in_namespace:
9943 				if (type == node_element && n->name && starts_with(n->name, _data.nodetest))
9944 				{
9945 					ns.push_back(xml_node(n), alloc);
9946 					return true;
9947 				}
9948 				break;
9949 
9950 			default:
9951 				assert(false && "Unknown axis"); // unreachable
9952 			}
9953 
9954 			return false;
9955 		}
9956 
step_fill(xpath_node_set_raw & ns,xml_node_struct * n,xpath_allocator * alloc,bool once,T)9957 		template <class T> void step_fill(xpath_node_set_raw& ns, xml_node_struct* n, xpath_allocator* alloc, bool once, T)
9958 		{
9959 			const axis_t axis = T::axis;
9960 
9961 			switch (axis)
9962 			{
9963 			case axis_attribute:
9964 			{
9965 				for (xml_attribute_struct* a = n->first_attribute; a; a = a->next_attribute)
9966 					if (step_push(ns, a, n, alloc) & once)
9967 						return;
9968 
9969 				break;
9970 			}
9971 
9972 			case axis_child:
9973 			{
9974 				for (xml_node_struct* c = n->first_child; c; c = c->next_sibling)
9975 					if (step_push(ns, c, alloc) & once)
9976 						return;
9977 
9978 				break;
9979 			}
9980 
9981 			case axis_descendant:
9982 			case axis_descendant_or_self:
9983 			{
9984 				if (axis == axis_descendant_or_self)
9985 					if (step_push(ns, n, alloc) & once)
9986 						return;
9987 
9988 				xml_node_struct* cur = n->first_child;
9989 
9990 				while (cur)
9991 				{
9992 					if (step_push(ns, cur, alloc) & once)
9993 						return;
9994 
9995 					if (cur->first_child)
9996 						cur = cur->first_child;
9997 					else
9998 					{
9999 						while (!cur->next_sibling)
10000 						{
10001 							cur = cur->parent;
10002 
10003 							if (cur == n) return;
10004 						}
10005 
10006 						cur = cur->next_sibling;
10007 					}
10008 				}
10009 
10010 				break;
10011 			}
10012 
10013 			case axis_following_sibling:
10014 			{
10015 				for (xml_node_struct* c = n->next_sibling; c; c = c->next_sibling)
10016 					if (step_push(ns, c, alloc) & once)
10017 						return;
10018 
10019 				break;
10020 			}
10021 
10022 			case axis_preceding_sibling:
10023 			{
10024 				for (xml_node_struct* c = n->prev_sibling_c; c->next_sibling; c = c->prev_sibling_c)
10025 					if (step_push(ns, c, alloc) & once)
10026 						return;
10027 
10028 				break;
10029 			}
10030 
10031 			case axis_following:
10032 			{
10033 				xml_node_struct* cur = n;
10034 
10035 				// exit from this node so that we don't include descendants
10036 				while (!cur->next_sibling)
10037 				{
10038 					cur = cur->parent;
10039 
10040 					if (!cur) return;
10041 				}
10042 
10043 				cur = cur->next_sibling;
10044 
10045 				while (cur)
10046 				{
10047 					if (step_push(ns, cur, alloc) & once)
10048 						return;
10049 
10050 					if (cur->first_child)
10051 						cur = cur->first_child;
10052 					else
10053 					{
10054 						while (!cur->next_sibling)
10055 						{
10056 							cur = cur->parent;
10057 
10058 							if (!cur) return;
10059 						}
10060 
10061 						cur = cur->next_sibling;
10062 					}
10063 				}
10064 
10065 				break;
10066 			}
10067 
10068 			case axis_preceding:
10069 			{
10070 				xml_node_struct* cur = n;
10071 
10072 				// exit from this node so that we don't include descendants
10073 				while (!cur->prev_sibling_c->next_sibling)
10074 				{
10075 					cur = cur->parent;
10076 
10077 					if (!cur) return;
10078 				}
10079 
10080 				cur = cur->prev_sibling_c;
10081 
10082 				while (cur)
10083 				{
10084 					if (cur->first_child)
10085 						cur = cur->first_child->prev_sibling_c;
10086 					else
10087 					{
10088 						// leaf node, can't be ancestor
10089 						if (step_push(ns, cur, alloc) & once)
10090 							return;
10091 
10092 						while (!cur->prev_sibling_c->next_sibling)
10093 						{
10094 							cur = cur->parent;
10095 
10096 							if (!cur) return;
10097 
10098 							if (!node_is_ancestor(cur, n))
10099 								if (step_push(ns, cur, alloc) & once)
10100 									return;
10101 						}
10102 
10103 						cur = cur->prev_sibling_c;
10104 					}
10105 				}
10106 
10107 				break;
10108 			}
10109 
10110 			case axis_ancestor:
10111 			case axis_ancestor_or_self:
10112 			{
10113 				if (axis == axis_ancestor_or_self)
10114 					if (step_push(ns, n, alloc) & once)
10115 						return;
10116 
10117 				xml_node_struct* cur = n->parent;
10118 
10119 				while (cur)
10120 				{
10121 					if (step_push(ns, cur, alloc) & once)
10122 						return;
10123 
10124 					cur = cur->parent;
10125 				}
10126 
10127 				break;
10128 			}
10129 
10130 			case axis_self:
10131 			{
10132 				step_push(ns, n, alloc);
10133 
10134 				break;
10135 			}
10136 
10137 			case axis_parent:
10138 			{
10139 				if (n->parent)
10140 					step_push(ns, n->parent, alloc);
10141 
10142 				break;
10143 			}
10144 
10145 			default:
10146 				assert(false && "Unimplemented axis"); // unreachable
10147 			}
10148 		}
10149 
step_fill(xpath_node_set_raw & ns,xml_attribute_struct * a,xml_node_struct * p,xpath_allocator * alloc,bool once,T v)10150 		template <class T> void step_fill(xpath_node_set_raw& ns, xml_attribute_struct* a, xml_node_struct* p, xpath_allocator* alloc, bool once, T v)
10151 		{
10152 			const axis_t axis = T::axis;
10153 
10154 			switch (axis)
10155 			{
10156 			case axis_ancestor:
10157 			case axis_ancestor_or_self:
10158 			{
10159 				if (axis == axis_ancestor_or_self && _test == nodetest_type_node) // reject attributes based on principal node type test
10160 					if (step_push(ns, a, p, alloc) & once)
10161 						return;
10162 
10163 				xml_node_struct* cur = p;
10164 
10165 				while (cur)
10166 				{
10167 					if (step_push(ns, cur, alloc) & once)
10168 						return;
10169 
10170 					cur = cur->parent;
10171 				}
10172 
10173 				break;
10174 			}
10175 
10176 			case axis_descendant_or_self:
10177 			case axis_self:
10178 			{
10179 				if (_test == nodetest_type_node) // reject attributes based on principal node type test
10180 					step_push(ns, a, p, alloc);
10181 
10182 				break;
10183 			}
10184 
10185 			case axis_following:
10186 			{
10187 				xml_node_struct* cur = p;
10188 
10189 				while (cur)
10190 				{
10191 					if (cur->first_child)
10192 						cur = cur->first_child;
10193 					else
10194 					{
10195 						while (!cur->next_sibling)
10196 						{
10197 							cur = cur->parent;
10198 
10199 							if (!cur) return;
10200 						}
10201 
10202 						cur = cur->next_sibling;
10203 					}
10204 
10205 					if (step_push(ns, cur, alloc) & once)
10206 						return;
10207 				}
10208 
10209 				break;
10210 			}
10211 
10212 			case axis_parent:
10213 			{
10214 				step_push(ns, p, alloc);
10215 
10216 				break;
10217 			}
10218 
10219 			case axis_preceding:
10220 			{
10221 				// preceding:: axis does not include attribute nodes and attribute ancestors (they are the same as parent's ancestors), so we can reuse node preceding
10222 				step_fill(ns, p, alloc, once, v);
10223 				break;
10224 			}
10225 
10226 			default:
10227 				assert(false && "Unimplemented axis"); // unreachable
10228 			}
10229 		}
10230 
step_fill(xpath_node_set_raw & ns,const xpath_node & xn,xpath_allocator * alloc,bool once,T v)10231 		template <class T> void step_fill(xpath_node_set_raw& ns, const xpath_node& xn, xpath_allocator* alloc, bool once, T v)
10232 		{
10233 			const axis_t axis = T::axis;
10234 			const bool axis_has_attributes = (axis == axis_ancestor || axis == axis_ancestor_or_self || axis == axis_descendant_or_self || axis == axis_following || axis == axis_parent || axis == axis_preceding || axis == axis_self);
10235 
10236 			if (xn.node())
10237 				step_fill(ns, xn.node().internal_object(), alloc, once, v);
10238 			else if (axis_has_attributes && xn.attribute() && xn.parent())
10239 				step_fill(ns, xn.attribute().internal_object(), xn.parent().internal_object(), alloc, once, v);
10240 		}
10241 
step_do(const xpath_context & c,const xpath_stack & stack,nodeset_eval_t eval,T v)10242 		template <class T> xpath_node_set_raw step_do(const xpath_context& c, const xpath_stack& stack, nodeset_eval_t eval, T v)
10243 		{
10244 			const axis_t axis = T::axis;
10245 			const bool axis_reverse = (axis == axis_ancestor || axis == axis_ancestor_or_self || axis == axis_preceding || axis == axis_preceding_sibling);
10246 			const xpath_node_set::type_t axis_type = axis_reverse ? xpath_node_set::type_sorted_reverse : xpath_node_set::type_sorted;
10247 
10248 			bool once =
10249 				(axis == axis_attribute && _test == nodetest_name) ||
10250 				(!_right && eval_once(axis_type, eval)) ||
10251 			    // coverity[mixed_enums]
10252 				(_right && !_right->_next && _right->_test == predicate_constant_one);
10253 
10254 			xpath_node_set_raw ns;
10255 			ns.set_type(axis_type);
10256 
10257 			if (_left)
10258 			{
10259 				xpath_node_set_raw s = _left->eval_node_set(c, stack, nodeset_eval_all);
10260 
10261 				// self axis preserves the original order
10262 				if (axis == axis_self) ns.set_type(s.type());
10263 
10264 				for (const xpath_node* it = s.begin(); it != s.end(); ++it)
10265 				{
10266 					size_t size = ns.size();
10267 
10268 					// in general, all axes generate elements in a particular order, but there is no order guarantee if axis is applied to two nodes
10269 					if (axis != axis_self && size != 0) ns.set_type(xpath_node_set::type_unsorted);
10270 
10271 					step_fill(ns, *it, stack.result, once, v);
10272 					if (_right) apply_predicates(ns, size, stack, eval);
10273 				}
10274 			}
10275 			else
10276 			{
10277 				step_fill(ns, c.n, stack.result, once, v);
10278 				if (_right) apply_predicates(ns, 0, stack, eval);
10279 			}
10280 
10281 			// child, attribute and self axes always generate unique set of nodes
10282 			// for other axis, if the set stayed sorted, it stayed unique because the traversal algorithms do not visit the same node twice
10283 			if (axis != axis_child && axis != axis_attribute && axis != axis_self && ns.type() == xpath_node_set::type_unsorted)
10284 				ns.remove_duplicates(stack.temp);
10285 
10286 			return ns;
10287 		}
10288 
10289 	public:
xpath_ast_node(ast_type_t type,xpath_value_type rettype_,const char_t * value)10290 		xpath_ast_node(ast_type_t type, xpath_value_type rettype_, const char_t* value):
10291 			_type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(0), _right(0), _next(0)
10292 		{
10293 			assert(type == ast_string_constant);
10294 			_data.string = value;
10295 		}
10296 
xpath_ast_node(ast_type_t type,xpath_value_type rettype_,double value)10297 		xpath_ast_node(ast_type_t type, xpath_value_type rettype_, double value):
10298 			_type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(0), _right(0), _next(0)
10299 		{
10300 			assert(type == ast_number_constant);
10301 			_data.number = value;
10302 		}
10303 
xpath_ast_node(ast_type_t type,xpath_value_type rettype_,xpath_variable * value)10304 		xpath_ast_node(ast_type_t type, xpath_value_type rettype_, xpath_variable* value):
10305 			_type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(0), _right(0), _next(0)
10306 		{
10307 			assert(type == ast_variable);
10308 			_data.variable = value;
10309 		}
10310 
xpath_ast_node(ast_type_t type,xpath_value_type rettype_,xpath_ast_node * left=0,xpath_ast_node * right=0)10311 		xpath_ast_node(ast_type_t type, xpath_value_type rettype_, xpath_ast_node* left = 0, xpath_ast_node* right = 0):
10312 			_type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(left), _right(right), _next(0)
10313 		{
10314 		}
10315 
xpath_ast_node(ast_type_t type,xpath_ast_node * left,axis_t axis,nodetest_t test,const char_t * contents)10316 		xpath_ast_node(ast_type_t type, xpath_ast_node* left, axis_t axis, nodetest_t test, const char_t* contents):
10317 			_type(static_cast<char>(type)), _rettype(xpath_type_node_set), _axis(static_cast<char>(axis)), _test(static_cast<char>(test)), _left(left), _right(0), _next(0)
10318 		{
10319 			assert(type == ast_step);
10320 			_data.nodetest = contents;
10321 		}
10322 
xpath_ast_node(ast_type_t type,xpath_ast_node * left,xpath_ast_node * right,predicate_t test)10323 		xpath_ast_node(ast_type_t type, xpath_ast_node* left, xpath_ast_node* right, predicate_t test):
10324 			_type(static_cast<char>(type)), _rettype(xpath_type_node_set), _axis(0), _test(static_cast<char>(test)), _left(left), _right(right), _next(0)
10325 		{
10326 			assert(type == ast_filter || type == ast_predicate);
10327 		}
10328 
set_next(xpath_ast_node * value)10329 		void set_next(xpath_ast_node* value)
10330 		{
10331 			_next = value;
10332 		}
10333 
set_right(xpath_ast_node * value)10334 		void set_right(xpath_ast_node* value)
10335 		{
10336 			_right = value;
10337 		}
10338 
eval_boolean(const xpath_context & c,const xpath_stack & stack)10339 		bool eval_boolean(const xpath_context& c, const xpath_stack& stack)
10340 		{
10341 			switch (_type)
10342 			{
10343 			case ast_op_or:
10344 				return _left->eval_boolean(c, stack) || _right->eval_boolean(c, stack);
10345 
10346 			case ast_op_and:
10347 				return _left->eval_boolean(c, stack) && _right->eval_boolean(c, stack);
10348 
10349 			case ast_op_equal:
10350 				return compare_eq(_left, _right, c, stack, equal_to());
10351 
10352 			case ast_op_not_equal:
10353 				return compare_eq(_left, _right, c, stack, not_equal_to());
10354 
10355 			case ast_op_less:
10356 				return compare_rel(_left, _right, c, stack, less());
10357 
10358 			case ast_op_greater:
10359 				return compare_rel(_right, _left, c, stack, less());
10360 
10361 			case ast_op_less_or_equal:
10362 				return compare_rel(_left, _right, c, stack, less_equal());
10363 
10364 			case ast_op_greater_or_equal:
10365 				return compare_rel(_right, _left, c, stack, less_equal());
10366 
10367 			case ast_func_starts_with:
10368 			{
10369 				xpath_allocator_capture cr(stack.result);
10370 
10371 				xpath_string lr = _left->eval_string(c, stack);
10372 				xpath_string rr = _right->eval_string(c, stack);
10373 
10374 				return starts_with(lr.c_str(), rr.c_str());
10375 			}
10376 
10377 			case ast_func_contains:
10378 			{
10379 				xpath_allocator_capture cr(stack.result);
10380 
10381 				xpath_string lr = _left->eval_string(c, stack);
10382 				xpath_string rr = _right->eval_string(c, stack);
10383 
10384 				return find_substring(lr.c_str(), rr.c_str()) != 0;
10385 			}
10386 
10387 			case ast_func_boolean:
10388 				return _left->eval_boolean(c, stack);
10389 
10390 			case ast_func_not:
10391 				return !_left->eval_boolean(c, stack);
10392 
10393 			case ast_func_true:
10394 				return true;
10395 
10396 			case ast_func_false:
10397 				return false;
10398 
10399 			case ast_func_lang:
10400 			{
10401 				if (c.n.attribute()) return false;
10402 
10403 				xpath_allocator_capture cr(stack.result);
10404 
10405 				xpath_string lang = _left->eval_string(c, stack);
10406 
10407 				for (xml_node n = c.n.node(); n; n = n.parent())
10408 				{
10409 					xml_attribute a = n.attribute(PUGIXML_TEXT("xml:lang"));
10410 
10411 					if (a)
10412 					{
10413 						const char_t* value = a.value();
10414 
10415 						// strnicmp / strncasecmp is not portable
10416 						for (const char_t* lit = lang.c_str(); *lit; ++lit)
10417 						{
10418 							if (tolower_ascii(*lit) != tolower_ascii(*value)) return false;
10419 							++value;
10420 						}
10421 
10422 						return *value == 0 || *value == '-';
10423 					}
10424 				}
10425 
10426 				return false;
10427 			}
10428 
10429 			case ast_opt_compare_attribute:
10430 			{
10431 				const char_t* value = (_right->_type == ast_string_constant) ? _right->_data.string : _right->_data.variable->get_string();
10432 
10433 				xml_attribute attr = c.n.node().attribute(_left->_data.nodetest);
10434 
10435 				return attr && strequal(attr.value(), value) && is_xpath_attribute(attr.name());
10436 			}
10437 
10438 			case ast_variable:
10439 			{
10440 				assert(_rettype == _data.variable->type());
10441 
10442 				if (_rettype == xpath_type_boolean)
10443 					return _data.variable->get_boolean();
10444 
10445 				// variable needs to be converted to the correct type, this is handled by the fallthrough block below
10446 				break;
10447 			}
10448 
10449 			default:
10450 				;
10451 			}
10452 
10453 			// none of the ast types that return the value directly matched, we need to perform type conversion
10454 			switch (_rettype)
10455 			{
10456 			case xpath_type_number:
10457 				return convert_number_to_boolean(eval_number(c, stack));
10458 
10459 			case xpath_type_string:
10460 			{
10461 				xpath_allocator_capture cr(stack.result);
10462 
10463 				return !eval_string(c, stack).empty();
10464 			}
10465 
10466 			case xpath_type_node_set:
10467 			{
10468 				xpath_allocator_capture cr(stack.result);
10469 
10470 				return !eval_node_set(c, stack, nodeset_eval_any).empty();
10471 			}
10472 
10473 			default:
10474 				assert(false && "Wrong expression for return type boolean"); // unreachable
10475 				return false;
10476 			}
10477 		}
10478 
eval_number(const xpath_context & c,const xpath_stack & stack)10479 		double eval_number(const xpath_context& c, const xpath_stack& stack)
10480 		{
10481 			switch (_type)
10482 			{
10483 			case ast_op_add:
10484 				return _left->eval_number(c, stack) + _right->eval_number(c, stack);
10485 
10486 			case ast_op_subtract:
10487 				return _left->eval_number(c, stack) - _right->eval_number(c, stack);
10488 
10489 			case ast_op_multiply:
10490 				return _left->eval_number(c, stack) * _right->eval_number(c, stack);
10491 
10492 			case ast_op_divide:
10493 				return _left->eval_number(c, stack) / _right->eval_number(c, stack);
10494 
10495 			case ast_op_mod:
10496 				return fmod(_left->eval_number(c, stack), _right->eval_number(c, stack));
10497 
10498 			case ast_op_negate:
10499 				return -_left->eval_number(c, stack);
10500 
10501 			case ast_number_constant:
10502 				return _data.number;
10503 
10504 			case ast_func_last:
10505 				return static_cast<double>(c.size);
10506 
10507 			case ast_func_position:
10508 				return static_cast<double>(c.position);
10509 
10510 			case ast_func_count:
10511 			{
10512 				xpath_allocator_capture cr(stack.result);
10513 
10514 				return static_cast<double>(_left->eval_node_set(c, stack, nodeset_eval_all).size());
10515 			}
10516 
10517 			case ast_func_string_length_0:
10518 			{
10519 				xpath_allocator_capture cr(stack.result);
10520 
10521 				return static_cast<double>(string_value(c.n, stack.result).length());
10522 			}
10523 
10524 			case ast_func_string_length_1:
10525 			{
10526 				xpath_allocator_capture cr(stack.result);
10527 
10528 				return static_cast<double>(_left->eval_string(c, stack).length());
10529 			}
10530 
10531 			case ast_func_number_0:
10532 			{
10533 				xpath_allocator_capture cr(stack.result);
10534 
10535 				return convert_string_to_number(string_value(c.n, stack.result).c_str());
10536 			}
10537 
10538 			case ast_func_number_1:
10539 				return _left->eval_number(c, stack);
10540 
10541 			case ast_func_sum:
10542 			{
10543 				xpath_allocator_capture cr(stack.result);
10544 
10545 				double r = 0;
10546 
10547 				xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_all);
10548 
10549 				for (const xpath_node* it = ns.begin(); it != ns.end(); ++it)
10550 				{
10551 					xpath_allocator_capture cri(stack.result);
10552 
10553 					r += convert_string_to_number(string_value(*it, stack.result).c_str());
10554 				}
10555 
10556 				return r;
10557 			}
10558 
10559 			case ast_func_floor:
10560 			{
10561 				double r = _left->eval_number(c, stack);
10562 
10563 				return r == r ? floor(r) : r;
10564 			}
10565 
10566 			case ast_func_ceiling:
10567 			{
10568 				double r = _left->eval_number(c, stack);
10569 
10570 				return r == r ? ceil(r) : r;
10571 			}
10572 
10573 			case ast_func_round:
10574 				return round_nearest_nzero(_left->eval_number(c, stack));
10575 
10576 			case ast_variable:
10577 			{
10578 				assert(_rettype == _data.variable->type());
10579 
10580 				if (_rettype == xpath_type_number)
10581 					return _data.variable->get_number();
10582 
10583 				// variable needs to be converted to the correct type, this is handled by the fallthrough block below
10584 				break;
10585 			}
10586 
10587 			default:
10588 				;
10589 			}
10590 
10591 			// none of the ast types that return the value directly matched, we need to perform type conversion
10592 			switch (_rettype)
10593 			{
10594 			case xpath_type_boolean:
10595 				return eval_boolean(c, stack) ? 1 : 0;
10596 
10597 			case xpath_type_string:
10598 			{
10599 				xpath_allocator_capture cr(stack.result);
10600 
10601 				return convert_string_to_number(eval_string(c, stack).c_str());
10602 			}
10603 
10604 			case xpath_type_node_set:
10605 			{
10606 				xpath_allocator_capture cr(stack.result);
10607 
10608 				return convert_string_to_number(eval_string(c, stack).c_str());
10609 			}
10610 
10611 			default:
10612 				assert(false && "Wrong expression for return type number"); // unreachable
10613 				return 0;
10614 			}
10615 		}
10616 
eval_string_concat(const xpath_context & c,const xpath_stack & stack)10617 		xpath_string eval_string_concat(const xpath_context& c, const xpath_stack& stack)
10618 		{
10619 			assert(_type == ast_func_concat);
10620 
10621 			xpath_allocator_capture ct(stack.temp);
10622 
10623 			// count the string number
10624 			size_t count = 1;
10625 			for (xpath_ast_node* nc = _right; nc; nc = nc->_next) count++;
10626 
10627 			// allocate a buffer for temporary string objects
10628 			xpath_string* buffer = static_cast<xpath_string*>(stack.temp->allocate(count * sizeof(xpath_string)));
10629 			if (!buffer) return xpath_string();
10630 
10631 			// evaluate all strings to temporary stack
10632 			xpath_stack swapped_stack = {stack.temp, stack.result};
10633 
10634 			buffer[0] = _left->eval_string(c, swapped_stack);
10635 
10636 			size_t pos = 1;
10637 			for (xpath_ast_node* n = _right; n; n = n->_next, ++pos) buffer[pos] = n->eval_string(c, swapped_stack);
10638 			assert(pos == count);
10639 
10640 			// get total length
10641 			size_t length = 0;
10642 			for (size_t i = 0; i < count; ++i) length += buffer[i].length();
10643 
10644 			// create final string
10645 			char_t* result = static_cast<char_t*>(stack.result->allocate((length + 1) * sizeof(char_t)));
10646 			if (!result) return xpath_string();
10647 
10648 			char_t* ri = result;
10649 
10650 			for (size_t j = 0; j < count; ++j)
10651 				for (const char_t* bi = buffer[j].c_str(); *bi; ++bi)
10652 					*ri++ = *bi;
10653 
10654 			*ri = 0;
10655 
10656 			return xpath_string::from_heap_preallocated(result, ri);
10657 		}
10658 
eval_string(const xpath_context & c,const xpath_stack & stack)10659 		xpath_string eval_string(const xpath_context& c, const xpath_stack& stack)
10660 		{
10661 			switch (_type)
10662 			{
10663 			case ast_string_constant:
10664 				return xpath_string::from_const(_data.string);
10665 
10666 			case ast_func_local_name_0:
10667 			{
10668 				xpath_node na = c.n;
10669 
10670 				return xpath_string::from_const(local_name(na));
10671 			}
10672 
10673 			case ast_func_local_name_1:
10674 			{
10675 				xpath_allocator_capture cr(stack.result);
10676 
10677 				xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_first);
10678 				xpath_node na = ns.first();
10679 
10680 				return xpath_string::from_const(local_name(na));
10681 			}
10682 
10683 			case ast_func_name_0:
10684 			{
10685 				xpath_node na = c.n;
10686 
10687 				return xpath_string::from_const(qualified_name(na));
10688 			}
10689 
10690 			case ast_func_name_1:
10691 			{
10692 				xpath_allocator_capture cr(stack.result);
10693 
10694 				xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_first);
10695 				xpath_node na = ns.first();
10696 
10697 				return xpath_string::from_const(qualified_name(na));
10698 			}
10699 
10700 			case ast_func_namespace_uri_0:
10701 			{
10702 				xpath_node na = c.n;
10703 
10704 				return xpath_string::from_const(namespace_uri(na));
10705 			}
10706 
10707 			case ast_func_namespace_uri_1:
10708 			{
10709 				xpath_allocator_capture cr(stack.result);
10710 
10711 				xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_first);
10712 				xpath_node na = ns.first();
10713 
10714 				return xpath_string::from_const(namespace_uri(na));
10715 			}
10716 
10717 			case ast_func_string_0:
10718 				return string_value(c.n, stack.result);
10719 
10720 			case ast_func_string_1:
10721 				return _left->eval_string(c, stack);
10722 
10723 			case ast_func_concat:
10724 				return eval_string_concat(c, stack);
10725 
10726 			case ast_func_substring_before:
10727 			{
10728 				xpath_allocator_capture cr(stack.temp);
10729 
10730 				xpath_stack swapped_stack = {stack.temp, stack.result};
10731 
10732 				xpath_string s = _left->eval_string(c, swapped_stack);
10733 				xpath_string p = _right->eval_string(c, swapped_stack);
10734 
10735 				const char_t* pos = find_substring(s.c_str(), p.c_str());
10736 
10737 				return pos ? xpath_string::from_heap(s.c_str(), pos, stack.result) : xpath_string();
10738 			}
10739 
10740 			case ast_func_substring_after:
10741 			{
10742 				xpath_allocator_capture cr(stack.temp);
10743 
10744 				xpath_stack swapped_stack = {stack.temp, stack.result};
10745 
10746 				xpath_string s = _left->eval_string(c, swapped_stack);
10747 				xpath_string p = _right->eval_string(c, swapped_stack);
10748 
10749 				const char_t* pos = find_substring(s.c_str(), p.c_str());
10750 				if (!pos) return xpath_string();
10751 
10752 				const char_t* rbegin = pos + p.length();
10753 				const char_t* rend = s.c_str() + s.length();
10754 
10755 				return s.uses_heap() ? xpath_string::from_heap(rbegin, rend, stack.result) : xpath_string::from_const(rbegin);
10756 			}
10757 
10758 			case ast_func_substring_2:
10759 			{
10760 				xpath_allocator_capture cr(stack.temp);
10761 
10762 				xpath_stack swapped_stack = {stack.temp, stack.result};
10763 
10764 				xpath_string s = _left->eval_string(c, swapped_stack);
10765 				size_t s_length = s.length();
10766 
10767 				double first = round_nearest(_right->eval_number(c, stack));
10768 
10769 				if (is_nan(first)) return xpath_string(); // NaN
10770 				else if (first >= static_cast<double>(s_length + 1)) return xpath_string();
10771 
10772 				size_t pos = first < 1 ? 1 : static_cast<size_t>(first);
10773 				assert(1 <= pos && pos <= s_length + 1);
10774 
10775 				const char_t* rbegin = s.c_str() + (pos - 1);
10776 				const char_t* rend = s.c_str() + s.length();
10777 
10778 				return s.uses_heap() ? xpath_string::from_heap(rbegin, rend, stack.result) : xpath_string::from_const(rbegin);
10779 			}
10780 
10781 			case ast_func_substring_3:
10782 			{
10783 				xpath_allocator_capture cr(stack.temp);
10784 
10785 				xpath_stack swapped_stack = {stack.temp, stack.result};
10786 
10787 				xpath_string s = _left->eval_string(c, swapped_stack);
10788 				size_t s_length = s.length();
10789 
10790 				double first = round_nearest(_right->eval_number(c, stack));
10791 				double last = first + round_nearest(_right->_next->eval_number(c, stack));
10792 
10793 				if (is_nan(first) || is_nan(last)) return xpath_string();
10794 				else if (first >= static_cast<double>(s_length + 1)) return xpath_string();
10795 				else if (first >= last) return xpath_string();
10796 				else if (last < 1) return xpath_string();
10797 
10798 				size_t pos = first < 1 ? 1 : static_cast<size_t>(first);
10799 				size_t end = last >= static_cast<double>(s_length + 1) ? s_length + 1 : static_cast<size_t>(last);
10800 
10801 				assert(1 <= pos && pos <= end && end <= s_length + 1);
10802 				const char_t* rbegin = s.c_str() + (pos - 1);
10803 				const char_t* rend = s.c_str() + (end - 1);
10804 
10805 				return (end == s_length + 1 && !s.uses_heap()) ? xpath_string::from_const(rbegin) : xpath_string::from_heap(rbegin, rend, stack.result);
10806 			}
10807 
10808 			case ast_func_normalize_space_0:
10809 			{
10810 				xpath_string s = string_value(c.n, stack.result);
10811 
10812 				char_t* begin = s.data(stack.result);
10813 				if (!begin) return xpath_string();
10814 
10815 				char_t* end = normalize_space(begin);
10816 
10817 				return xpath_string::from_heap_preallocated(begin, end);
10818 			}
10819 
10820 			case ast_func_normalize_space_1:
10821 			{
10822 				xpath_string s = _left->eval_string(c, stack);
10823 
10824 				char_t* begin = s.data(stack.result);
10825 				if (!begin) return xpath_string();
10826 
10827 				char_t* end = normalize_space(begin);
10828 
10829 				return xpath_string::from_heap_preallocated(begin, end);
10830 			}
10831 
10832 			case ast_func_translate:
10833 			{
10834 				xpath_allocator_capture cr(stack.temp);
10835 
10836 				xpath_stack swapped_stack = {stack.temp, stack.result};
10837 
10838 				xpath_string s = _left->eval_string(c, stack);
10839 				xpath_string from = _right->eval_string(c, swapped_stack);
10840 				xpath_string to = _right->_next->eval_string(c, swapped_stack);
10841 
10842 				char_t* begin = s.data(stack.result);
10843 				if (!begin) return xpath_string();
10844 
10845 				char_t* end = translate(begin, from.c_str(), to.c_str(), to.length());
10846 
10847 				return xpath_string::from_heap_preallocated(begin, end);
10848 			}
10849 
10850 			case ast_opt_translate_table:
10851 			{
10852 				xpath_string s = _left->eval_string(c, stack);
10853 
10854 				char_t* begin = s.data(stack.result);
10855 				if (!begin) return xpath_string();
10856 
10857 				char_t* end = translate_table(begin, _data.table);
10858 
10859 				return xpath_string::from_heap_preallocated(begin, end);
10860 			}
10861 
10862 			case ast_variable:
10863 			{
10864 				assert(_rettype == _data.variable->type());
10865 
10866 				if (_rettype == xpath_type_string)
10867 					return xpath_string::from_const(_data.variable->get_string());
10868 
10869 				// variable needs to be converted to the correct type, this is handled by the fallthrough block below
10870 				break;
10871 			}
10872 
10873 			default:
10874 				;
10875 			}
10876 
10877 			// none of the ast types that return the value directly matched, we need to perform type conversion
10878 			switch (_rettype)
10879 			{
10880 			case xpath_type_boolean:
10881 				return xpath_string::from_const(eval_boolean(c, stack) ? PUGIXML_TEXT("true") : PUGIXML_TEXT("false"));
10882 
10883 			case xpath_type_number:
10884 				return convert_number_to_string(eval_number(c, stack), stack.result);
10885 
10886 			case xpath_type_node_set:
10887 			{
10888 				xpath_allocator_capture cr(stack.temp);
10889 
10890 				xpath_stack swapped_stack = {stack.temp, stack.result};
10891 
10892 				xpath_node_set_raw ns = eval_node_set(c, swapped_stack, nodeset_eval_first);
10893 				return ns.empty() ? xpath_string() : string_value(ns.first(), stack.result);
10894 			}
10895 
10896 			default:
10897 				assert(false && "Wrong expression for return type string"); // unreachable
10898 				return xpath_string();
10899 			}
10900 		}
10901 
eval_node_set(const xpath_context & c,const xpath_stack & stack,nodeset_eval_t eval)10902 		xpath_node_set_raw eval_node_set(const xpath_context& c, const xpath_stack& stack, nodeset_eval_t eval)
10903 		{
10904 			switch (_type)
10905 			{
10906 			case ast_op_union:
10907 			{
10908 				xpath_allocator_capture cr(stack.temp);
10909 
10910 				xpath_stack swapped_stack = {stack.temp, stack.result};
10911 
10912 				xpath_node_set_raw ls = _left->eval_node_set(c, stack, eval);
10913 				xpath_node_set_raw rs = _right->eval_node_set(c, swapped_stack, eval);
10914 
10915 				// we can optimize merging two sorted sets, but this is a very rare operation, so don't bother
10916 				ls.set_type(xpath_node_set::type_unsorted);
10917 
10918 				ls.append(rs.begin(), rs.end(), stack.result);
10919 				ls.remove_duplicates(stack.temp);
10920 
10921 				return ls;
10922 			}
10923 
10924 			case ast_filter:
10925 			{
10926 				xpath_node_set_raw set = _left->eval_node_set(c, stack, _test == predicate_constant_one ? nodeset_eval_first : nodeset_eval_all);
10927 
10928 				// either expression is a number or it contains position() call; sort by document order
10929 				if (_test != predicate_posinv) set.sort_do();
10930 
10931 				bool once = eval_once(set.type(), eval);
10932 
10933 				apply_predicate(set, 0, stack, once);
10934 
10935 				return set;
10936 			}
10937 
10938 			case ast_func_id:
10939 				return xpath_node_set_raw();
10940 
10941 			case ast_step:
10942 			{
10943 				switch (_axis)
10944 				{
10945 				case axis_ancestor:
10946 					return step_do(c, stack, eval, axis_to_type<axis_ancestor>());
10947 
10948 				case axis_ancestor_or_self:
10949 					return step_do(c, stack, eval, axis_to_type<axis_ancestor_or_self>());
10950 
10951 				case axis_attribute:
10952 					return step_do(c, stack, eval, axis_to_type<axis_attribute>());
10953 
10954 				case axis_child:
10955 					return step_do(c, stack, eval, axis_to_type<axis_child>());
10956 
10957 				case axis_descendant:
10958 					return step_do(c, stack, eval, axis_to_type<axis_descendant>());
10959 
10960 				case axis_descendant_or_self:
10961 					return step_do(c, stack, eval, axis_to_type<axis_descendant_or_self>());
10962 
10963 				case axis_following:
10964 					return step_do(c, stack, eval, axis_to_type<axis_following>());
10965 
10966 				case axis_following_sibling:
10967 					return step_do(c, stack, eval, axis_to_type<axis_following_sibling>());
10968 
10969 				case axis_namespace:
10970 					// namespaced axis is not supported
10971 					return xpath_node_set_raw();
10972 
10973 				case axis_parent:
10974 					return step_do(c, stack, eval, axis_to_type<axis_parent>());
10975 
10976 				case axis_preceding:
10977 					return step_do(c, stack, eval, axis_to_type<axis_preceding>());
10978 
10979 				case axis_preceding_sibling:
10980 					return step_do(c, stack, eval, axis_to_type<axis_preceding_sibling>());
10981 
10982 				case axis_self:
10983 					return step_do(c, stack, eval, axis_to_type<axis_self>());
10984 
10985 				default:
10986 					assert(false && "Unknown axis"); // unreachable
10987 					return xpath_node_set_raw();
10988 				}
10989 			}
10990 
10991 			case ast_step_root:
10992 			{
10993 				assert(!_right); // root step can't have any predicates
10994 
10995 				xpath_node_set_raw ns;
10996 
10997 				ns.set_type(xpath_node_set::type_sorted);
10998 
10999 				if (c.n.node()) ns.push_back(c.n.node().root(), stack.result);
11000 				else if (c.n.attribute()) ns.push_back(c.n.parent().root(), stack.result);
11001 
11002 				return ns;
11003 			}
11004 
11005 			case ast_variable:
11006 			{
11007 				assert(_rettype == _data.variable->type());
11008 
11009 				if (_rettype == xpath_type_node_set)
11010 				{
11011 					const xpath_node_set& s = _data.variable->get_node_set();
11012 
11013 					xpath_node_set_raw ns;
11014 
11015 					ns.set_type(s.type());
11016 					ns.append(s.begin(), s.end(), stack.result);
11017 
11018 					return ns;
11019 				}
11020 
11021 				// variable needs to be converted to the correct type, this is handled by the fallthrough block below
11022 				break;
11023 			}
11024 
11025 			default:
11026 				;
11027 			}
11028 
11029 			// none of the ast types that return the value directly matched, but conversions to node set are invalid
11030 			assert(false && "Wrong expression for return type node set"); // unreachable
11031 			return xpath_node_set_raw();
11032 		}
11033 
optimize(xpath_allocator * alloc)11034 		void optimize(xpath_allocator* alloc)
11035 		{
11036 			if (_left)
11037 				_left->optimize(alloc);
11038 
11039 			if (_right)
11040 				_right->optimize(alloc);
11041 
11042 			if (_next)
11043 				_next->optimize(alloc);
11044 
11045 			// coverity[var_deref_model]
11046 			optimize_self(alloc);
11047 		}
11048 
optimize_self(xpath_allocator * alloc)11049 		void optimize_self(xpath_allocator* alloc)
11050 		{
11051 			// Rewrite [position()=expr] with [expr]
11052 			// Note that this step has to go before classification to recognize [position()=1]
11053 			if ((_type == ast_filter || _type == ast_predicate) &&
11054 				_right && // workaround for clang static analyzer (_right is never null for ast_filter/ast_predicate)
11055 				_right->_type == ast_op_equal && _right->_left->_type == ast_func_position && _right->_right->_rettype == xpath_type_number)
11056 			{
11057 				_right = _right->_right;
11058 			}
11059 
11060 			// Classify filter/predicate ops to perform various optimizations during evaluation
11061 			if ((_type == ast_filter || _type == ast_predicate) && _right) // workaround for clang static analyzer (_right is never null for ast_filter/ast_predicate)
11062 			{
11063 				assert(_test == predicate_default);
11064 
11065 				if (_right->_type == ast_number_constant && _right->_data.number == 1.0)
11066 					_test = predicate_constant_one;
11067 				else if (_right->_rettype == xpath_type_number && (_right->_type == ast_number_constant || _right->_type == ast_variable || _right->_type == ast_func_last))
11068 					_test = predicate_constant;
11069 				else if (_right->_rettype != xpath_type_number && _right->is_posinv_expr())
11070 					_test = predicate_posinv;
11071 			}
11072 
11073 			// Rewrite descendant-or-self::node()/child::foo with descendant::foo
11074 			// The former is a full form of //foo, the latter is much faster since it executes the node test immediately
11075 			// Do a similar kind of rewrite for self/descendant/descendant-or-self axes
11076 			// Note that we only rewrite positionally invariant steps (//foo[1] != /descendant::foo[1])
11077 			if (_type == ast_step && (_axis == axis_child || _axis == axis_self || _axis == axis_descendant || _axis == axis_descendant_or_self) &&
11078 				_left && _left->_type == ast_step && _left->_axis == axis_descendant_or_self && _left->_test == nodetest_type_node && !_left->_right &&
11079 				is_posinv_step())
11080 			{
11081 				if (_axis == axis_child || _axis == axis_descendant)
11082 					_axis = axis_descendant;
11083 				else
11084 					_axis = axis_descendant_or_self;
11085 
11086 				_left = _left->_left;
11087 			}
11088 
11089 			// Use optimized lookup table implementation for translate() with constant arguments
11090 			if (_type == ast_func_translate &&
11091 				_right && // workaround for clang static analyzer (_right is never null for ast_func_translate)
11092 				_right->_type == ast_string_constant && _right->_next->_type == ast_string_constant)
11093 			{
11094 				unsigned char* table = translate_table_generate(alloc, _right->_data.string, _right->_next->_data.string);
11095 
11096 				if (table)
11097 				{
11098 					_type = ast_opt_translate_table;
11099 					_data.table = table;
11100 				}
11101 			}
11102 
11103 			// Use optimized path for @attr = 'value' or @attr = $value
11104 			if (_type == ast_op_equal &&
11105 				_left && _right && // workaround for clang static analyzer and Coverity (_left and _right are never null for ast_op_equal)
11106                 // coverity[mixed_enums]
11107 				_left->_type == ast_step && _left->_axis == axis_attribute && _left->_test == nodetest_name && !_left->_left && !_left->_right &&
11108 				(_right->_type == ast_string_constant || (_right->_type == ast_variable && _right->_rettype == xpath_type_string)))
11109 			{
11110 				_type = ast_opt_compare_attribute;
11111 			}
11112 		}
11113 
is_posinv_expr() const11114 		bool is_posinv_expr() const
11115 		{
11116 			switch (_type)
11117 			{
11118 			case ast_func_position:
11119 			case ast_func_last:
11120 				return false;
11121 
11122 			case ast_string_constant:
11123 			case ast_number_constant:
11124 			case ast_variable:
11125 				return true;
11126 
11127 			case ast_step:
11128 			case ast_step_root:
11129 				return true;
11130 
11131 			case ast_predicate:
11132 			case ast_filter:
11133 				return true;
11134 
11135 			default:
11136 				if (_left && !_left->is_posinv_expr()) return false;
11137 
11138 				for (xpath_ast_node* n = _right; n; n = n->_next)
11139 					if (!n->is_posinv_expr()) return false;
11140 
11141 				return true;
11142 			}
11143 		}
11144 
is_posinv_step() const11145 		bool is_posinv_step() const
11146 		{
11147 			assert(_type == ast_step);
11148 
11149 			for (xpath_ast_node* n = _right; n; n = n->_next)
11150 			{
11151 				assert(n->_type == ast_predicate);
11152 
11153 				if (n->_test != predicate_posinv)
11154 					return false;
11155 			}
11156 
11157 			return true;
11158 		}
11159 
rettype() const11160 		xpath_value_type rettype() const
11161 		{
11162 			return static_cast<xpath_value_type>(_rettype);
11163 		}
11164 	};
11165 
11166 	static const size_t xpath_ast_depth_limit =
11167 	#ifdef PUGIXML_XPATH_DEPTH_LIMIT
11168 		PUGIXML_XPATH_DEPTH_LIMIT
11169 	#else
11170 		1024
11171 	#endif
11172 		;
11173 
11174 	struct xpath_parser
11175 	{
11176 		xpath_allocator* _alloc;
11177 		xpath_lexer _lexer;
11178 
11179 		const char_t* _query;
11180 		xpath_variable_set* _variables;
11181 
11182 		xpath_parse_result* _result;
11183 
11184 		char_t _scratch[32];
11185 
11186 		size_t _depth;
11187 
errorxpath_parser11188 		xpath_ast_node* error(const char* message)
11189 		{
11190 			_result->error = message;
11191 			_result->offset = _lexer.current_pos() - _query;
11192 
11193 			return 0;
11194 		}
11195 
error_oomxpath_parser11196 		xpath_ast_node* error_oom()
11197 		{
11198 			assert(_alloc->_error);
11199 			*_alloc->_error = true;
11200 
11201 			return 0;
11202 		}
11203 
error_recxpath_parser11204 		xpath_ast_node* error_rec()
11205 		{
11206 			return error("Exceeded maximum allowed query depth");
11207 		}
11208 
alloc_nodexpath_parser11209 		void* alloc_node()
11210 		{
11211 			return _alloc->allocate(sizeof(xpath_ast_node));
11212 		}
11213 
alloc_nodexpath_parser11214 		xpath_ast_node* alloc_node(ast_type_t type, xpath_value_type rettype, const char_t* value)
11215 		{
11216 			void* memory = alloc_node();
11217 			return memory ? new (memory) xpath_ast_node(type, rettype, value) : 0;
11218 		}
11219 
alloc_nodexpath_parser11220 		xpath_ast_node* alloc_node(ast_type_t type, xpath_value_type rettype, double value)
11221 		{
11222 			void* memory = alloc_node();
11223 			return memory ? new (memory) xpath_ast_node(type, rettype, value) : 0;
11224 		}
11225 
alloc_nodexpath_parser11226 		xpath_ast_node* alloc_node(ast_type_t type, xpath_value_type rettype, xpath_variable* value)
11227 		{
11228 			void* memory = alloc_node();
11229 			return memory ? new (memory) xpath_ast_node(type, rettype, value) : 0;
11230 		}
11231 
alloc_nodexpath_parser11232 		xpath_ast_node* alloc_node(ast_type_t type, xpath_value_type rettype, xpath_ast_node* left = 0, xpath_ast_node* right = 0)
11233 		{
11234 			void* memory = alloc_node();
11235 			return memory ? new (memory) xpath_ast_node(type, rettype, left, right) : 0;
11236 		}
11237 
alloc_nodexpath_parser11238 		xpath_ast_node* alloc_node(ast_type_t type, xpath_ast_node* left, axis_t axis, nodetest_t test, const char_t* contents)
11239 		{
11240 			void* memory = alloc_node();
11241 			return memory ? new (memory) xpath_ast_node(type, left, axis, test, contents) : 0;
11242 		}
11243 
alloc_nodexpath_parser11244 		xpath_ast_node* alloc_node(ast_type_t type, xpath_ast_node* left, xpath_ast_node* right, predicate_t test)
11245 		{
11246 			void* memory = alloc_node();
11247 			return memory ? new (memory) xpath_ast_node(type, left, right, test) : 0;
11248 		}
11249 
alloc_stringxpath_parser11250 		const char_t* alloc_string(const xpath_lexer_string& value)
11251 		{
11252 			if (!value.begin)
11253 				return PUGIXML_TEXT("");
11254 
11255 			size_t length = static_cast<size_t>(value.end - value.begin);
11256 
11257 			char_t* c = static_cast<char_t*>(_alloc->allocate((length + 1) * sizeof(char_t)));
11258 			if (!c) return 0;
11259 
11260 			memcpy(c, value.begin, length * sizeof(char_t));
11261 			c[length] = 0;
11262 
11263 			return c;
11264 		}
11265 
parse_functionxpath_parser11266 		xpath_ast_node* parse_function(const xpath_lexer_string& name, size_t argc, xpath_ast_node* args[2])
11267 		{
11268 			switch (name.begin[0])
11269 			{
11270 			case 'b':
11271 				if (name == PUGIXML_TEXT("boolean") && argc == 1)
11272 					return alloc_node(ast_func_boolean, xpath_type_boolean, args[0]);
11273 
11274 				break;
11275 
11276 			case 'c':
11277 				if (name == PUGIXML_TEXT("count") && argc == 1)
11278 				{
11279 					if (args[0]->rettype() != xpath_type_node_set) return error("Function has to be applied to node set");
11280 					return alloc_node(ast_func_count, xpath_type_number, args[0]);
11281 				}
11282 				else if (name == PUGIXML_TEXT("contains") && argc == 2)
11283 					return alloc_node(ast_func_contains, xpath_type_boolean, args[0], args[1]);
11284 				else if (name == PUGIXML_TEXT("concat") && argc >= 2)
11285 					return alloc_node(ast_func_concat, xpath_type_string, args[0], args[1]);
11286 				else if (name == PUGIXML_TEXT("ceiling") && argc == 1)
11287 					return alloc_node(ast_func_ceiling, xpath_type_number, args[0]);
11288 
11289 				break;
11290 
11291 			case 'f':
11292 				if (name == PUGIXML_TEXT("false") && argc == 0)
11293 					return alloc_node(ast_func_false, xpath_type_boolean);
11294 				else if (name == PUGIXML_TEXT("floor") && argc == 1)
11295 					return alloc_node(ast_func_floor, xpath_type_number, args[0]);
11296 
11297 				break;
11298 
11299 			case 'i':
11300 				if (name == PUGIXML_TEXT("id") && argc == 1)
11301 					return alloc_node(ast_func_id, xpath_type_node_set, args[0]);
11302 
11303 				break;
11304 
11305 			case 'l':
11306 				if (name == PUGIXML_TEXT("last") && argc == 0)
11307 					return alloc_node(ast_func_last, xpath_type_number);
11308 				else if (name == PUGIXML_TEXT("lang") && argc == 1)
11309 					return alloc_node(ast_func_lang, xpath_type_boolean, args[0]);
11310 				else if (name == PUGIXML_TEXT("local-name") && argc <= 1)
11311 				{
11312 					if (argc == 1 && args[0]->rettype() != xpath_type_node_set) return error("Function has to be applied to node set");
11313 					return alloc_node(argc == 0 ? ast_func_local_name_0 : ast_func_local_name_1, xpath_type_string, args[0]);
11314 				}
11315 
11316 				break;
11317 
11318 			case 'n':
11319 				if (name == PUGIXML_TEXT("name") && argc <= 1)
11320 				{
11321 					if (argc == 1 && args[0]->rettype() != xpath_type_node_set) return error("Function has to be applied to node set");
11322 					return alloc_node(argc == 0 ? ast_func_name_0 : ast_func_name_1, xpath_type_string, args[0]);
11323 				}
11324 				else if (name == PUGIXML_TEXT("namespace-uri") && argc <= 1)
11325 				{
11326 					if (argc == 1 && args[0]->rettype() != xpath_type_node_set) return error("Function has to be applied to node set");
11327 					return alloc_node(argc == 0 ? ast_func_namespace_uri_0 : ast_func_namespace_uri_1, xpath_type_string, args[0]);
11328 				}
11329 				else if (name == PUGIXML_TEXT("normalize-space") && argc <= 1)
11330 					return alloc_node(argc == 0 ? ast_func_normalize_space_0 : ast_func_normalize_space_1, xpath_type_string, args[0], args[1]);
11331 				else if (name == PUGIXML_TEXT("not") && argc == 1)
11332 					return alloc_node(ast_func_not, xpath_type_boolean, args[0]);
11333 				else if (name == PUGIXML_TEXT("number") && argc <= 1)
11334 					return alloc_node(argc == 0 ? ast_func_number_0 : ast_func_number_1, xpath_type_number, args[0]);
11335 
11336 				break;
11337 
11338 			case 'p':
11339 				if (name == PUGIXML_TEXT("position") && argc == 0)
11340 					return alloc_node(ast_func_position, xpath_type_number);
11341 
11342 				break;
11343 
11344 			case 'r':
11345 				if (name == PUGIXML_TEXT("round") && argc == 1)
11346 					return alloc_node(ast_func_round, xpath_type_number, args[0]);
11347 
11348 				break;
11349 
11350 			case 's':
11351 				if (name == PUGIXML_TEXT("string") && argc <= 1)
11352 					return alloc_node(argc == 0 ? ast_func_string_0 : ast_func_string_1, xpath_type_string, args[0]);
11353 				else if (name == PUGIXML_TEXT("string-length") && argc <= 1)
11354 					return alloc_node(argc == 0 ? ast_func_string_length_0 : ast_func_string_length_1, xpath_type_number, args[0]);
11355 				else if (name == PUGIXML_TEXT("starts-with") && argc == 2)
11356 					return alloc_node(ast_func_starts_with, xpath_type_boolean, args[0], args[1]);
11357 				else if (name == PUGIXML_TEXT("substring-before") && argc == 2)
11358 					return alloc_node(ast_func_substring_before, xpath_type_string, args[0], args[1]);
11359 				else if (name == PUGIXML_TEXT("substring-after") && argc == 2)
11360 					return alloc_node(ast_func_substring_after, xpath_type_string, args[0], args[1]);
11361 				else if (name == PUGIXML_TEXT("substring") && (argc == 2 || argc == 3))
11362 					return alloc_node(argc == 2 ? ast_func_substring_2 : ast_func_substring_3, xpath_type_string, args[0], args[1]);
11363 				else if (name == PUGIXML_TEXT("sum") && argc == 1)
11364 				{
11365 					if (args[0]->rettype() != xpath_type_node_set) return error("Function has to be applied to node set");
11366 					return alloc_node(ast_func_sum, xpath_type_number, args[0]);
11367 				}
11368 
11369 				break;
11370 
11371 			case 't':
11372 				if (name == PUGIXML_TEXT("translate") && argc == 3)
11373 					return alloc_node(ast_func_translate, xpath_type_string, args[0], args[1]);
11374 				else if (name == PUGIXML_TEXT("true") && argc == 0)
11375 					return alloc_node(ast_func_true, xpath_type_boolean);
11376 
11377 				break;
11378 
11379 			default:
11380 				break;
11381 			}
11382 
11383 			return error("Unrecognized function or wrong parameter count");
11384 		}
11385 
parse_axis_namexpath_parser11386 		axis_t parse_axis_name(const xpath_lexer_string& name, bool& specified)
11387 		{
11388 			specified = true;
11389 
11390 			switch (name.begin[0])
11391 			{
11392 			case 'a':
11393 				if (name == PUGIXML_TEXT("ancestor"))
11394 					return axis_ancestor;
11395 				else if (name == PUGIXML_TEXT("ancestor-or-self"))
11396 					return axis_ancestor_or_self;
11397 				else if (name == PUGIXML_TEXT("attribute"))
11398 					return axis_attribute;
11399 
11400 				break;
11401 
11402 			case 'c':
11403 				if (name == PUGIXML_TEXT("child"))
11404 					return axis_child;
11405 
11406 				break;
11407 
11408 			case 'd':
11409 				if (name == PUGIXML_TEXT("descendant"))
11410 					return axis_descendant;
11411 				else if (name == PUGIXML_TEXT("descendant-or-self"))
11412 					return axis_descendant_or_self;
11413 
11414 				break;
11415 
11416 			case 'f':
11417 				if (name == PUGIXML_TEXT("following"))
11418 					return axis_following;
11419 				else if (name == PUGIXML_TEXT("following-sibling"))
11420 					return axis_following_sibling;
11421 
11422 				break;
11423 
11424 			case 'n':
11425 				if (name == PUGIXML_TEXT("namespace"))
11426 					return axis_namespace;
11427 
11428 				break;
11429 
11430 			case 'p':
11431 				if (name == PUGIXML_TEXT("parent"))
11432 					return axis_parent;
11433 				else if (name == PUGIXML_TEXT("preceding"))
11434 					return axis_preceding;
11435 				else if (name == PUGIXML_TEXT("preceding-sibling"))
11436 					return axis_preceding_sibling;
11437 
11438 				break;
11439 
11440 			case 's':
11441 				if (name == PUGIXML_TEXT("self"))
11442 					return axis_self;
11443 
11444 				break;
11445 
11446 			default:
11447 				break;
11448 			}
11449 
11450 			specified = false;
11451 			return axis_child;
11452 		}
11453 
parse_node_test_typexpath_parser11454 		nodetest_t parse_node_test_type(const xpath_lexer_string& name)
11455 		{
11456 			switch (name.begin[0])
11457 			{
11458 			case 'c':
11459 				if (name == PUGIXML_TEXT("comment"))
11460 					return nodetest_type_comment;
11461 
11462 				break;
11463 
11464 			case 'n':
11465 				if (name == PUGIXML_TEXT("node"))
11466 					return nodetest_type_node;
11467 
11468 				break;
11469 
11470 			case 'p':
11471 				if (name == PUGIXML_TEXT("processing-instruction"))
11472 					return nodetest_type_pi;
11473 
11474 				break;
11475 
11476 			case 't':
11477 				if (name == PUGIXML_TEXT("text"))
11478 					return nodetest_type_text;
11479 
11480 				break;
11481 
11482 			default:
11483 				break;
11484 			}
11485 
11486 			return nodetest_none;
11487 		}
11488 
11489 		// PrimaryExpr ::= VariableReference | '(' Expr ')' | Literal | Number | FunctionCall
parse_primary_expressionxpath_parser11490 		xpath_ast_node* parse_primary_expression()
11491 		{
11492 			switch (_lexer.current())
11493 			{
11494 			case lex_var_ref:
11495 			{
11496 				xpath_lexer_string name = _lexer.contents();
11497 
11498 				if (!_variables)
11499 					return error("Unknown variable: variable set is not provided");
11500 
11501 				xpath_variable* var = 0;
11502 				if (!get_variable_scratch(_scratch, _variables, name.begin, name.end, &var))
11503 					return error_oom();
11504 
11505 				if (!var)
11506 					return error("Unknown variable: variable set does not contain the given name");
11507 
11508 				_lexer.next();
11509 
11510 				return alloc_node(ast_variable, var->type(), var);
11511 			}
11512 
11513 			case lex_open_brace:
11514 			{
11515 				_lexer.next();
11516 
11517 				xpath_ast_node* n = parse_expression();
11518 				if (!n) return 0;
11519 
11520 				if (_lexer.current() != lex_close_brace)
11521 					return error("Expected ')' to match an opening '('");
11522 
11523 				_lexer.next();
11524 
11525 				return n;
11526 			}
11527 
11528 			case lex_quoted_string:
11529 			{
11530 				const char_t* value = alloc_string(_lexer.contents());
11531 				if (!value) return 0;
11532 
11533 				_lexer.next();
11534 
11535 				return alloc_node(ast_string_constant, xpath_type_string, value);
11536 			}
11537 
11538 			case lex_number:
11539 			{
11540 				double value = 0;
11541 
11542 				if (!convert_string_to_number_scratch(_scratch, _lexer.contents().begin, _lexer.contents().end, &value))
11543 					return error_oom();
11544 
11545 				_lexer.next();
11546 
11547 				return alloc_node(ast_number_constant, xpath_type_number, value);
11548 			}
11549 
11550 			case lex_string:
11551 			{
11552 				xpath_ast_node* args[2] = {0};
11553 				size_t argc = 0;
11554 
11555 				xpath_lexer_string function = _lexer.contents();
11556 				_lexer.next();
11557 
11558 				xpath_ast_node* last_arg = 0;
11559 
11560 				if (_lexer.current() != lex_open_brace)
11561 					return error("Unrecognized function call");
11562 				_lexer.next();
11563 
11564 				size_t old_depth = _depth;
11565 
11566 				while (_lexer.current() != lex_close_brace)
11567 				{
11568 					if (argc > 0)
11569 					{
11570 						if (_lexer.current() != lex_comma)
11571 							return error("No comma between function arguments");
11572 						_lexer.next();
11573 					}
11574 
11575 					if (++_depth > xpath_ast_depth_limit)
11576 						return error_rec();
11577 
11578 					xpath_ast_node* n = parse_expression();
11579 					if (!n) return 0;
11580 
11581 					if (argc < 2) args[argc] = n;
11582 					else last_arg->set_next(n);
11583 
11584 					argc++;
11585 					last_arg = n;
11586 				}
11587 
11588 				_lexer.next();
11589 
11590 				_depth = old_depth;
11591 
11592 				return parse_function(function, argc, args);
11593 			}
11594 
11595 			default:
11596 				return error("Unrecognizable primary expression");
11597 			}
11598 		}
11599 
11600 		// FilterExpr ::= PrimaryExpr | FilterExpr Predicate
11601 		// Predicate ::= '[' PredicateExpr ']'
11602 		// PredicateExpr ::= Expr
parse_filter_expressionxpath_parser11603 		xpath_ast_node* parse_filter_expression()
11604 		{
11605 			xpath_ast_node* n = parse_primary_expression();
11606 			if (!n) return 0;
11607 
11608 			size_t old_depth = _depth;
11609 
11610 			while (_lexer.current() == lex_open_square_brace)
11611 			{
11612 				_lexer.next();
11613 
11614 				if (++_depth > xpath_ast_depth_limit)
11615 					return error_rec();
11616 
11617 				if (n->rettype() != xpath_type_node_set)
11618 					return error("Predicate has to be applied to node set");
11619 
11620 				xpath_ast_node* expr = parse_expression();
11621 				if (!expr) return 0;
11622 
11623 				n = alloc_node(ast_filter, n, expr, predicate_default);
11624 				if (!n) return 0;
11625 
11626 				if (_lexer.current() != lex_close_square_brace)
11627 					return error("Expected ']' to match an opening '['");
11628 
11629 				_lexer.next();
11630 			}
11631 
11632 			_depth = old_depth;
11633 
11634 			return n;
11635 		}
11636 
11637 		// Step ::= AxisSpecifier NodeTest Predicate* | AbbreviatedStep
11638 		// AxisSpecifier ::= AxisName '::' | '@'?
11639 		// NodeTest ::= NameTest | NodeType '(' ')' | 'processing-instruction' '(' Literal ')'
11640 		// NameTest ::= '*' | NCName ':' '*' | QName
11641 		// AbbreviatedStep ::= '.' | '..'
parse_stepxpath_parser11642 		xpath_ast_node* parse_step(xpath_ast_node* set)
11643 		{
11644 			if (set && set->rettype() != xpath_type_node_set)
11645 				return error("Step has to be applied to node set");
11646 
11647 			bool axis_specified = false;
11648 			axis_t axis = axis_child; // implied child axis
11649 
11650 			if (_lexer.current() == lex_axis_attribute)
11651 			{
11652 				axis = axis_attribute;
11653 				axis_specified = true;
11654 
11655 				_lexer.next();
11656 			}
11657 			else if (_lexer.current() == lex_dot)
11658 			{
11659 				_lexer.next();
11660 
11661 				if (_lexer.current() == lex_open_square_brace)
11662 					return error("Predicates are not allowed after an abbreviated step");
11663 
11664 				return alloc_node(ast_step, set, axis_self, nodetest_type_node, 0);
11665 			}
11666 			else if (_lexer.current() == lex_double_dot)
11667 			{
11668 				_lexer.next();
11669 
11670 				if (_lexer.current() == lex_open_square_brace)
11671 					return error("Predicates are not allowed after an abbreviated step");
11672 
11673 				return alloc_node(ast_step, set, axis_parent, nodetest_type_node, 0);
11674 			}
11675 
11676 			nodetest_t nt_type = nodetest_none;
11677 			xpath_lexer_string nt_name;
11678 
11679 			if (_lexer.current() == lex_string)
11680 			{
11681 				// node name test
11682 				nt_name = _lexer.contents();
11683 				_lexer.next();
11684 
11685 				// was it an axis name?
11686 				if (_lexer.current() == lex_double_colon)
11687 				{
11688 					// parse axis name
11689 					if (axis_specified)
11690 						return error("Two axis specifiers in one step");
11691 
11692 					axis = parse_axis_name(nt_name, axis_specified);
11693 
11694 					if (!axis_specified)
11695 						return error("Unknown axis");
11696 
11697 					// read actual node test
11698 					_lexer.next();
11699 
11700 					if (_lexer.current() == lex_multiply)
11701 					{
11702 						nt_type = nodetest_all;
11703 						nt_name = xpath_lexer_string();
11704 						_lexer.next();
11705 					}
11706 					else if (_lexer.current() == lex_string)
11707 					{
11708 						nt_name = _lexer.contents();
11709 						_lexer.next();
11710 					}
11711 					else
11712 					{
11713 						return error("Unrecognized node test");
11714 					}
11715 				}
11716 
11717 				if (nt_type == nodetest_none)
11718 				{
11719 					// node type test or processing-instruction
11720 					if (_lexer.current() == lex_open_brace)
11721 					{
11722 						_lexer.next();
11723 
11724 						if (_lexer.current() == lex_close_brace)
11725 						{
11726 							_lexer.next();
11727 
11728 							nt_type = parse_node_test_type(nt_name);
11729 
11730 							if (nt_type == nodetest_none)
11731 								return error("Unrecognized node type");
11732 
11733 							nt_name = xpath_lexer_string();
11734 						}
11735 						else if (nt_name == PUGIXML_TEXT("processing-instruction"))
11736 						{
11737 							if (_lexer.current() != lex_quoted_string)
11738 								return error("Only literals are allowed as arguments to processing-instruction()");
11739 
11740 							nt_type = nodetest_pi;
11741 							nt_name = _lexer.contents();
11742 							_lexer.next();
11743 
11744 							if (_lexer.current() != lex_close_brace)
11745 								return error("Unmatched brace near processing-instruction()");
11746 							_lexer.next();
11747 						}
11748 						else
11749 						{
11750 							return error("Unmatched brace near node type test");
11751 						}
11752 					}
11753 					// QName or NCName:*
11754 					else
11755 					{
11756 						if (nt_name.end - nt_name.begin > 2 && nt_name.end[-2] == ':' && nt_name.end[-1] == '*') // NCName:*
11757 						{
11758 							nt_name.end--; // erase *
11759 
11760 							nt_type = nodetest_all_in_namespace;
11761 						}
11762 						else
11763 						{
11764 							nt_type = nodetest_name;
11765 						}
11766 					}
11767 				}
11768 			}
11769 			else if (_lexer.current() == lex_multiply)
11770 			{
11771 				nt_type = nodetest_all;
11772 				_lexer.next();
11773 			}
11774 			else
11775 			{
11776 				return error("Unrecognized node test");
11777 			}
11778 
11779 			const char_t* nt_name_copy = alloc_string(nt_name);
11780 			if (!nt_name_copy) return 0;
11781 
11782 			xpath_ast_node* n = alloc_node(ast_step, set, axis, nt_type, nt_name_copy);
11783 			if (!n) return 0;
11784 
11785 			size_t old_depth = _depth;
11786 
11787 			xpath_ast_node* last = 0;
11788 
11789 			while (_lexer.current() == lex_open_square_brace)
11790 			{
11791 				_lexer.next();
11792 
11793 				if (++_depth > xpath_ast_depth_limit)
11794 					return error_rec();
11795 
11796 				xpath_ast_node* expr = parse_expression();
11797 				if (!expr) return 0;
11798 
11799 				xpath_ast_node* pred = alloc_node(ast_predicate, 0, expr, predicate_default);
11800 				if (!pred) return 0;
11801 
11802 				if (_lexer.current() != lex_close_square_brace)
11803 					return error("Expected ']' to match an opening '['");
11804 				_lexer.next();
11805 
11806 				if (last) last->set_next(pred);
11807 				else n->set_right(pred);
11808 
11809 				last = pred;
11810 			}
11811 
11812 			_depth = old_depth;
11813 
11814 			return n;
11815 		}
11816 
11817 		// RelativeLocationPath ::= Step | RelativeLocationPath '/' Step | RelativeLocationPath '//' Step
parse_relative_location_pathxpath_parser11818 		xpath_ast_node* parse_relative_location_path(xpath_ast_node* set)
11819 		{
11820 			xpath_ast_node* n = parse_step(set);
11821 			if (!n) return 0;
11822 
11823 			size_t old_depth = _depth;
11824 
11825 			while (_lexer.current() == lex_slash || _lexer.current() == lex_double_slash)
11826 			{
11827 				lexeme_t l = _lexer.current();
11828 				_lexer.next();
11829 
11830 				if (l == lex_double_slash)
11831 				{
11832 					n = alloc_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0);
11833 					if (!n) return 0;
11834 
11835 					++_depth;
11836 				}
11837 
11838 				if (++_depth > xpath_ast_depth_limit)
11839 					return error_rec();
11840 
11841 				n = parse_step(n);
11842 				if (!n) return 0;
11843 			}
11844 
11845 			_depth = old_depth;
11846 
11847 			return n;
11848 		}
11849 
11850 		// LocationPath ::= RelativeLocationPath | AbsoluteLocationPath
11851 		// AbsoluteLocationPath ::= '/' RelativeLocationPath? | '//' RelativeLocationPath
parse_location_pathxpath_parser11852 		xpath_ast_node* parse_location_path()
11853 		{
11854 			if (_lexer.current() == lex_slash)
11855 			{
11856 				_lexer.next();
11857 
11858 				xpath_ast_node* n = alloc_node(ast_step_root, xpath_type_node_set);
11859 				if (!n) return 0;
11860 
11861 				// relative location path can start from axis_attribute, dot, double_dot, multiply and string lexemes; any other lexeme means standalone root path
11862 				lexeme_t l = _lexer.current();
11863 
11864 				if (l == lex_string || l == lex_axis_attribute || l == lex_dot || l == lex_double_dot || l == lex_multiply)
11865 					return parse_relative_location_path(n);
11866 				else
11867 					return n;
11868 			}
11869 			else if (_lexer.current() == lex_double_slash)
11870 			{
11871 				_lexer.next();
11872 
11873 				xpath_ast_node* n = alloc_node(ast_step_root, xpath_type_node_set);
11874 				if (!n) return 0;
11875 
11876 				n = alloc_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0);
11877 				if (!n) return 0;
11878 
11879 				return parse_relative_location_path(n);
11880 			}
11881 
11882 			// else clause moved outside of if because of bogus warning 'control may reach end of non-void function being inlined' in gcc 4.0.1
11883 			return parse_relative_location_path(0);
11884 		}
11885 
11886 		// PathExpr ::= LocationPath
11887 		//				| FilterExpr
11888 		//				| FilterExpr '/' RelativeLocationPath
11889 		//				| FilterExpr '//' RelativeLocationPath
11890 		// UnionExpr ::= PathExpr | UnionExpr '|' PathExpr
11891 		// UnaryExpr ::= UnionExpr | '-' UnaryExpr
parse_path_or_unary_expressionxpath_parser11892 		xpath_ast_node* parse_path_or_unary_expression()
11893 		{
11894 			// Clarification.
11895 			// PathExpr begins with either LocationPath or FilterExpr.
11896 			// FilterExpr begins with PrimaryExpr
11897 			// PrimaryExpr begins with '$' in case of it being a variable reference,
11898 			// '(' in case of it being an expression, string literal, number constant or
11899 			// function call.
11900 			if (_lexer.current() == lex_var_ref || _lexer.current() == lex_open_brace ||
11901 				_lexer.current() == lex_quoted_string || _lexer.current() == lex_number ||
11902 				_lexer.current() == lex_string)
11903 			{
11904 				if (_lexer.current() == lex_string)
11905 				{
11906 					// This is either a function call, or not - if not, we shall proceed with location path
11907 					const char_t* state = _lexer.state();
11908 
11909 					while (PUGI__IS_CHARTYPE(*state, ct_space)) ++state;
11910 
11911 					if (*state != '(')
11912 						return parse_location_path();
11913 
11914 					// This looks like a function call; however this still can be a node-test. Check it.
11915 					if (parse_node_test_type(_lexer.contents()) != nodetest_none)
11916 						return parse_location_path();
11917 				}
11918 
11919 				xpath_ast_node* n = parse_filter_expression();
11920 				if (!n) return 0;
11921 
11922 				if (_lexer.current() == lex_slash || _lexer.current() == lex_double_slash)
11923 				{
11924 					lexeme_t l = _lexer.current();
11925 					_lexer.next();
11926 
11927 					if (l == lex_double_slash)
11928 					{
11929 						if (n->rettype() != xpath_type_node_set)
11930 							return error("Step has to be applied to node set");
11931 
11932 						n = alloc_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0);
11933 						if (!n) return 0;
11934 					}
11935 
11936 					// select from location path
11937 					return parse_relative_location_path(n);
11938 				}
11939 
11940 				return n;
11941 			}
11942 			else if (_lexer.current() == lex_minus)
11943 			{
11944 				_lexer.next();
11945 
11946 				// precedence 7+ - only parses union expressions
11947 				xpath_ast_node* n = parse_expression(7);
11948 				if (!n) return 0;
11949 
11950 				return alloc_node(ast_op_negate, xpath_type_number, n);
11951 			}
11952 			else
11953 			{
11954 				return parse_location_path();
11955 			}
11956 		}
11957 
11958 		struct binary_op_t
11959 		{
11960 			ast_type_t asttype;
11961 			xpath_value_type rettype;
11962 			int precedence;
11963 
binary_op_txpath_parser::binary_op_t11964 			binary_op_t(): asttype(ast_unknown), rettype(xpath_type_none), precedence(0)
11965 			{
11966 			}
11967 
binary_op_txpath_parser::binary_op_t11968 			binary_op_t(ast_type_t asttype_, xpath_value_type rettype_, int precedence_): asttype(asttype_), rettype(rettype_), precedence(precedence_)
11969 			{
11970 			}
11971 
parsexpath_parser::binary_op_t11972 			static binary_op_t parse(xpath_lexer& lexer)
11973 			{
11974 				switch (lexer.current())
11975 				{
11976 				case lex_string:
11977 					if (lexer.contents() == PUGIXML_TEXT("or"))
11978 						return binary_op_t(ast_op_or, xpath_type_boolean, 1);
11979 					else if (lexer.contents() == PUGIXML_TEXT("and"))
11980 						return binary_op_t(ast_op_and, xpath_type_boolean, 2);
11981 					else if (lexer.contents() == PUGIXML_TEXT("div"))
11982 						return binary_op_t(ast_op_divide, xpath_type_number, 6);
11983 					else if (lexer.contents() == PUGIXML_TEXT("mod"))
11984 						return binary_op_t(ast_op_mod, xpath_type_number, 6);
11985 					else
11986 						return binary_op_t();
11987 
11988 				case lex_equal:
11989 					return binary_op_t(ast_op_equal, xpath_type_boolean, 3);
11990 
11991 				case lex_not_equal:
11992 					return binary_op_t(ast_op_not_equal, xpath_type_boolean, 3);
11993 
11994 				case lex_less:
11995 					return binary_op_t(ast_op_less, xpath_type_boolean, 4);
11996 
11997 				case lex_greater:
11998 					return binary_op_t(ast_op_greater, xpath_type_boolean, 4);
11999 
12000 				case lex_less_or_equal:
12001 					return binary_op_t(ast_op_less_or_equal, xpath_type_boolean, 4);
12002 
12003 				case lex_greater_or_equal:
12004 					return binary_op_t(ast_op_greater_or_equal, xpath_type_boolean, 4);
12005 
12006 				case lex_plus:
12007 					return binary_op_t(ast_op_add, xpath_type_number, 5);
12008 
12009 				case lex_minus:
12010 					return binary_op_t(ast_op_subtract, xpath_type_number, 5);
12011 
12012 				case lex_multiply:
12013 					return binary_op_t(ast_op_multiply, xpath_type_number, 6);
12014 
12015 				case lex_union:
12016 					return binary_op_t(ast_op_union, xpath_type_node_set, 7);
12017 
12018 				default:
12019 					return binary_op_t();
12020 				}
12021 			}
12022 		};
12023 
parse_expression_recxpath_parser12024 		xpath_ast_node* parse_expression_rec(xpath_ast_node* lhs, int limit)
12025 		{
12026 			binary_op_t op = binary_op_t::parse(_lexer);
12027 
12028 			while (op.asttype != ast_unknown && op.precedence >= limit)
12029 			{
12030 				_lexer.next();
12031 
12032 				if (++_depth > xpath_ast_depth_limit)
12033 					return error_rec();
12034 
12035 				xpath_ast_node* rhs = parse_path_or_unary_expression();
12036 				if (!rhs) return 0;
12037 
12038 				binary_op_t nextop = binary_op_t::parse(_lexer);
12039 
12040 				while (nextop.asttype != ast_unknown && nextop.precedence > op.precedence)
12041 				{
12042 					rhs = parse_expression_rec(rhs, nextop.precedence);
12043 					if (!rhs) return 0;
12044 
12045 					nextop = binary_op_t::parse(_lexer);
12046 				}
12047 
12048 				if (op.asttype == ast_op_union && (lhs->rettype() != xpath_type_node_set || rhs->rettype() != xpath_type_node_set))
12049 					return error("Union operator has to be applied to node sets");
12050 
12051 				lhs = alloc_node(op.asttype, op.rettype, lhs, rhs);
12052 				if (!lhs) return 0;
12053 
12054 				op = binary_op_t::parse(_lexer);
12055 			}
12056 
12057 			return lhs;
12058 		}
12059 
12060 		// Expr ::= OrExpr
12061 		// OrExpr ::= AndExpr | OrExpr 'or' AndExpr
12062 		// AndExpr ::= EqualityExpr | AndExpr 'and' EqualityExpr
12063 		// EqualityExpr ::= RelationalExpr
12064 		//					| EqualityExpr '=' RelationalExpr
12065 		//					| EqualityExpr '!=' RelationalExpr
12066 		// RelationalExpr ::= AdditiveExpr
12067 		//					  | RelationalExpr '<' AdditiveExpr
12068 		//					  | RelationalExpr '>' AdditiveExpr
12069 		//					  | RelationalExpr '<=' AdditiveExpr
12070 		//					  | RelationalExpr '>=' AdditiveExpr
12071 		// AdditiveExpr ::= MultiplicativeExpr
12072 		//					| AdditiveExpr '+' MultiplicativeExpr
12073 		//					| AdditiveExpr '-' MultiplicativeExpr
12074 		// MultiplicativeExpr ::= UnaryExpr
12075 		//						  | MultiplicativeExpr '*' UnaryExpr
12076 		//						  | MultiplicativeExpr 'div' UnaryExpr
12077 		//						  | MultiplicativeExpr 'mod' UnaryExpr
parse_expressionxpath_parser12078 		xpath_ast_node* parse_expression(int limit = 0)
12079 		{
12080 			size_t old_depth = _depth;
12081 
12082 			if (++_depth > xpath_ast_depth_limit)
12083 				return error_rec();
12084 
12085 			xpath_ast_node* n = parse_path_or_unary_expression();
12086 			if (!n) return 0;
12087 
12088 			n = parse_expression_rec(n, limit);
12089 
12090 			_depth = old_depth;
12091 
12092 			return n;
12093 		}
12094 
xpath_parserxpath_parser12095 		xpath_parser(const char_t* query, xpath_variable_set* variables, xpath_allocator* alloc, xpath_parse_result* result): _alloc(alloc), _lexer(query), _query(query), _variables(variables), _result(result), _depth(0)
12096 		{
12097 		}
12098 
parsexpath_parser12099 		xpath_ast_node* parse()
12100 		{
12101 			xpath_ast_node* n = parse_expression();
12102 			if (!n) return 0;
12103 
12104 			assert(_depth == 0);
12105 
12106 			// check if there are unparsed tokens left
12107 			if (_lexer.current() != lex_eof)
12108 				return error("Incorrect query");
12109 
12110 			return n;
12111 		}
12112 
parsexpath_parser12113 		static xpath_ast_node* parse(const char_t* query, xpath_variable_set* variables, xpath_allocator* alloc, xpath_parse_result* result)
12114 		{
12115 			xpath_parser parser(query, variables, alloc, result);
12116 
12117 			return parser.parse();
12118 		}
12119 	};
12120 
12121 	struct xpath_query_impl
12122 	{
createxpath_query_impl12123 		static xpath_query_impl* create()
12124 		{
12125 			void* memory = xml_memory::allocate(sizeof(xpath_query_impl));
12126 			if (!memory) return 0;
12127 
12128 			return new (memory) xpath_query_impl();
12129 		}
12130 
destroyxpath_query_impl12131 		static void destroy(xpath_query_impl* impl)
12132 		{
12133 			// free all allocated pages
12134 			impl->alloc.release();
12135 
12136 			// free allocator memory (with the first page)
12137 			xml_memory::deallocate(impl);
12138 		}
12139 
xpath_query_implxpath_query_impl12140 		xpath_query_impl(): root(0), alloc(&block, &oom), oom(false)
12141 		{
12142 			block.next = 0;
12143 			block.capacity = sizeof(block.data);
12144 		}
12145 
12146 		xpath_ast_node* root;
12147 		xpath_allocator alloc;
12148 		xpath_memory_block block;
12149 		bool oom;
12150 	};
12151 
evaluate_node_set_prepare(xpath_query_impl * impl)12152 	PUGI__FN impl::xpath_ast_node* evaluate_node_set_prepare(xpath_query_impl* impl)
12153 	{
12154 		if (!impl) return 0;
12155 
12156 		if (impl->root->rettype() != xpath_type_node_set)
12157 		{
12158 		#ifdef PUGIXML_NO_EXCEPTIONS
12159 			return 0;
12160 		#else
12161 			xpath_parse_result res;
12162 			res.error = "Expression does not evaluate to node set";
12163 
12164 			throw xpath_exception(res);
12165 		#endif
12166 		}
12167 
12168 		return impl->root;
12169 	}
12170 PUGI__NS_END
12171 
12172 namespace pugi
12173 {
12174 #ifndef PUGIXML_NO_EXCEPTIONS
xpath_exception(const xpath_parse_result & result_)12175 	PUGI__FN xpath_exception::xpath_exception(const xpath_parse_result& result_): _result(result_)
12176 	{
12177 		assert(_result.error);
12178 	}
12179 
what() const12180 	PUGI__FN const char* xpath_exception::what() const throw()
12181 	{
12182 		return _result.error;
12183 	}
12184 
result() const12185 	PUGI__FN const xpath_parse_result& xpath_exception::result() const
12186 	{
12187 		return _result;
12188 	}
12189 #endif
12190 
xpath_node()12191 	PUGI__FN xpath_node::xpath_node()
12192 	{
12193 	}
12194 
xpath_node(const xml_node & node_)12195 	PUGI__FN xpath_node::xpath_node(const xml_node& node_): _node(node_)
12196 	{
12197 	}
12198 
xpath_node(const xml_attribute & attribute_,const xml_node & parent_)12199 	PUGI__FN xpath_node::xpath_node(const xml_attribute& attribute_, const xml_node& parent_): _node(attribute_ ? parent_ : xml_node()), _attribute(attribute_)
12200 	{
12201 	}
12202 
node() const12203 	PUGI__FN xml_node xpath_node::node() const
12204 	{
12205 		return _attribute ? xml_node() : _node;
12206 	}
12207 
attribute() const12208 	PUGI__FN xml_attribute xpath_node::attribute() const
12209 	{
12210 		return _attribute;
12211 	}
12212 
parent() const12213 	PUGI__FN xml_node xpath_node::parent() const
12214 	{
12215 		return _attribute ? _node : _node.parent();
12216 	}
12217 
unspecified_bool_xpath_node(xpath_node ***)12218 	PUGI__FN static void unspecified_bool_xpath_node(xpath_node***)
12219 	{
12220 	}
12221 
operator xpath_node::unspecified_bool_type() const12222 	PUGI__FN xpath_node::operator xpath_node::unspecified_bool_type() const
12223 	{
12224 		return (_node || _attribute) ? unspecified_bool_xpath_node : 0;
12225 	}
12226 
operator !() const12227 	PUGI__FN bool xpath_node::operator!() const
12228 	{
12229 		return !(_node || _attribute);
12230 	}
12231 
operator ==(const xpath_node & n) const12232 	PUGI__FN bool xpath_node::operator==(const xpath_node& n) const
12233 	{
12234 		return _node == n._node && _attribute == n._attribute;
12235 	}
12236 
operator !=(const xpath_node & n) const12237 	PUGI__FN bool xpath_node::operator!=(const xpath_node& n) const
12238 	{
12239 		return _node != n._node || _attribute != n._attribute;
12240 	}
12241 
12242 #ifdef __BORLANDC__
operator &&(const xpath_node & lhs,bool rhs)12243 	PUGI__FN bool operator&&(const xpath_node& lhs, bool rhs)
12244 	{
12245 		return (bool)lhs && rhs;
12246 	}
12247 
operator ||(const xpath_node & lhs,bool rhs)12248 	PUGI__FN bool operator||(const xpath_node& lhs, bool rhs)
12249 	{
12250 		return (bool)lhs || rhs;
12251 	}
12252 #endif
12253 
_assign(const_iterator begin_,const_iterator end_,type_t type_)12254 	PUGI__FN void xpath_node_set::_assign(const_iterator begin_, const_iterator end_, type_t type_)
12255 	{
12256 		assert(begin_ <= end_);
12257 
12258 		size_t size_ = static_cast<size_t>(end_ - begin_);
12259 
12260 		// use internal buffer for 0 or 1 elements, heap buffer otherwise
12261 		xpath_node* storage = (size_ <= 1) ? _storage : static_cast<xpath_node*>(impl::xml_memory::allocate(size_ * sizeof(xpath_node)));
12262 
12263 		if (!storage)
12264 		{
12265 		#ifdef PUGIXML_NO_EXCEPTIONS
12266 			return;
12267 		#else
12268 			throw std::bad_alloc();
12269 		#endif
12270 		}
12271 
12272 		// deallocate old buffer
12273 		if (_begin != _storage)
12274 			impl::xml_memory::deallocate(_begin);
12275 
12276 		// size check is necessary because for begin_ = end_ = nullptr, memcpy is UB
12277 		if (size_)
12278 			memcpy(storage, begin_, size_ * sizeof(xpath_node));
12279 
12280 		_begin = storage;
12281 		_end = storage + size_;
12282 		_type = type_;
12283 	}
12284 
12285 #ifdef PUGIXML_HAS_MOVE
_move(xpath_node_set & rhs)12286 	PUGI__FN void xpath_node_set::_move(xpath_node_set& rhs) PUGIXML_NOEXCEPT
12287 	{
12288 		_type = rhs._type;
12289 		_storage[0] = rhs._storage[0];
12290 		_begin = (rhs._begin == rhs._storage) ? _storage : rhs._begin;
12291 		_end = _begin + (rhs._end - rhs._begin);
12292 
12293 		rhs._type = type_unsorted;
12294 		rhs._begin = rhs._storage;
12295 		rhs._end = rhs._storage;
12296 	}
12297 #endif
12298 
xpath_node_set()12299 	PUGI__FN xpath_node_set::xpath_node_set(): _type(type_unsorted), _begin(_storage), _end(_storage)
12300 	{
12301 	}
12302 
xpath_node_set(const_iterator begin_,const_iterator end_,type_t type_)12303 	PUGI__FN xpath_node_set::xpath_node_set(const_iterator begin_, const_iterator end_, type_t type_): _type(type_unsorted), _begin(_storage), _end(_storage)
12304 	{
12305 		_assign(begin_, end_, type_);
12306 	}
12307 
~xpath_node_set()12308 	PUGI__FN xpath_node_set::~xpath_node_set()
12309 	{
12310 		if (_begin != _storage)
12311 			impl::xml_memory::deallocate(_begin);
12312 	}
12313 
xpath_node_set(const xpath_node_set & ns)12314 	PUGI__FN xpath_node_set::xpath_node_set(const xpath_node_set& ns): _type(type_unsorted), _begin(_storage), _end(_storage)
12315 	{
12316 		_assign(ns._begin, ns._end, ns._type);
12317 	}
12318 
operator =(const xpath_node_set & ns)12319 	PUGI__FN xpath_node_set& xpath_node_set::operator=(const xpath_node_set& ns)
12320 	{
12321 		if (this == &ns) return *this;
12322 
12323 		_assign(ns._begin, ns._end, ns._type);
12324 
12325 		return *this;
12326 	}
12327 
12328 #ifdef PUGIXML_HAS_MOVE
xpath_node_set(xpath_node_set && rhs)12329 	PUGI__FN xpath_node_set::xpath_node_set(xpath_node_set&& rhs) PUGIXML_NOEXCEPT: _type(type_unsorted), _begin(_storage), _end(_storage)
12330 	{
12331 		_move(rhs);
12332 	}
12333 
operator =(xpath_node_set && rhs)12334 	PUGI__FN xpath_node_set& xpath_node_set::operator=(xpath_node_set&& rhs) PUGIXML_NOEXCEPT
12335 	{
12336 		if (this == &rhs) return *this;
12337 
12338 		if (_begin != _storage)
12339 			impl::xml_memory::deallocate(_begin);
12340 
12341 		_move(rhs);
12342 
12343 		return *this;
12344 	}
12345 #endif
12346 
type() const12347 	PUGI__FN xpath_node_set::type_t xpath_node_set::type() const
12348 	{
12349 		return _type;
12350 	}
12351 
size() const12352 	PUGI__FN size_t xpath_node_set::size() const
12353 	{
12354 		return _end - _begin;
12355 	}
12356 
empty() const12357 	PUGI__FN bool xpath_node_set::empty() const
12358 	{
12359 		return _begin == _end;
12360 	}
12361 
operator [](size_t index) const12362 	PUGI__FN const xpath_node& xpath_node_set::operator[](size_t index) const
12363 	{
12364 		assert(index < size());
12365 		return _begin[index];
12366 	}
12367 
begin() const12368 	PUGI__FN xpath_node_set::const_iterator xpath_node_set::begin() const
12369 	{
12370 		return _begin;
12371 	}
12372 
end() const12373 	PUGI__FN xpath_node_set::const_iterator xpath_node_set::end() const
12374 	{
12375 		return _end;
12376 	}
12377 
sort(bool reverse)12378 	PUGI__FN void xpath_node_set::sort(bool reverse)
12379 	{
12380 		_type = impl::xpath_sort(_begin, _end, _type, reverse);
12381 	}
12382 
first() const12383 	PUGI__FN xpath_node xpath_node_set::first() const
12384 	{
12385 		return impl::xpath_first(_begin, _end, _type);
12386 	}
12387 
xpath_parse_result()12388 	PUGI__FN xpath_parse_result::xpath_parse_result(): error("Internal error"), offset(0)
12389 	{
12390 	}
12391 
operator bool() const12392 	PUGI__FN xpath_parse_result::operator bool() const
12393 	{
12394 		return error == 0;
12395 	}
12396 
description() const12397 	PUGI__FN const char* xpath_parse_result::description() const
12398 	{
12399 		return error ? error : "No error";
12400 	}
12401 
xpath_variable(xpath_value_type type_)12402 	PUGI__FN xpath_variable::xpath_variable(xpath_value_type type_): _type(type_), _next(0)
12403 	{
12404 	}
12405 
name() const12406 	PUGI__FN const char_t* xpath_variable::name() const
12407 	{
12408 		switch (_type)
12409 		{
12410 		case xpath_type_node_set:
12411 			return static_cast<const impl::xpath_variable_node_set*>(this)->name;
12412 
12413 		case xpath_type_number:
12414 			return static_cast<const impl::xpath_variable_number*>(this)->name;
12415 
12416 		case xpath_type_string:
12417 			return static_cast<const impl::xpath_variable_string*>(this)->name;
12418 
12419 		case xpath_type_boolean:
12420 			return static_cast<const impl::xpath_variable_boolean*>(this)->name;
12421 
12422 		default:
12423 			assert(false && "Invalid variable type"); // unreachable
12424 			return 0;
12425 		}
12426 	}
12427 
type() const12428 	PUGI__FN xpath_value_type xpath_variable::type() const
12429 	{
12430 		return _type;
12431 	}
12432 
get_boolean() const12433 	PUGI__FN bool xpath_variable::get_boolean() const
12434 	{
12435 		return (_type == xpath_type_boolean) ? static_cast<const impl::xpath_variable_boolean*>(this)->value : false;
12436 	}
12437 
get_number() const12438 	PUGI__FN double xpath_variable::get_number() const
12439 	{
12440 		return (_type == xpath_type_number) ? static_cast<const impl::xpath_variable_number*>(this)->value : impl::gen_nan();
12441 	}
12442 
get_string() const12443 	PUGI__FN const char_t* xpath_variable::get_string() const
12444 	{
12445 		const char_t* value = (_type == xpath_type_string) ? static_cast<const impl::xpath_variable_string*>(this)->value : 0;
12446 		return value ? value : PUGIXML_TEXT("");
12447 	}
12448 
get_node_set() const12449 	PUGI__FN const xpath_node_set& xpath_variable::get_node_set() const
12450 	{
12451 		return (_type == xpath_type_node_set) ? static_cast<const impl::xpath_variable_node_set*>(this)->value : impl::dummy_node_set;
12452 	}
12453 
set(bool value)12454 	PUGI__FN bool xpath_variable::set(bool value)
12455 	{
12456 		if (_type != xpath_type_boolean) return false;
12457 
12458 		static_cast<impl::xpath_variable_boolean*>(this)->value = value;
12459 		return true;
12460 	}
12461 
set(double value)12462 	PUGI__FN bool xpath_variable::set(double value)
12463 	{
12464 		if (_type != xpath_type_number) return false;
12465 
12466 		static_cast<impl::xpath_variable_number*>(this)->value = value;
12467 		return true;
12468 	}
12469 
set(const char_t * value)12470 	PUGI__FN bool xpath_variable::set(const char_t* value)
12471 	{
12472 		if (_type != xpath_type_string) return false;
12473 
12474 		impl::xpath_variable_string* var = static_cast<impl::xpath_variable_string*>(this);
12475 
12476 		// duplicate string
12477 		size_t size = (impl::strlength(value) + 1) * sizeof(char_t);
12478 
12479 		char_t* copy = static_cast<char_t*>(impl::xml_memory::allocate(size));
12480 		if (!copy) return false;
12481 
12482 		memcpy(copy, value, size);
12483 
12484 		// replace old string
12485 		if (var->value) impl::xml_memory::deallocate(var->value);
12486 		var->value = copy;
12487 
12488 		return true;
12489 	}
12490 
set(const xpath_node_set & value)12491 	PUGI__FN bool xpath_variable::set(const xpath_node_set& value)
12492 	{
12493 		if (_type != xpath_type_node_set) return false;
12494 
12495 		static_cast<impl::xpath_variable_node_set*>(this)->value = value;
12496 		return true;
12497 	}
12498 
xpath_variable_set()12499 	PUGI__FN xpath_variable_set::xpath_variable_set()
12500 	{
12501 		for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
12502 			_data[i] = 0;
12503 	}
12504 
~xpath_variable_set()12505 	PUGI__FN xpath_variable_set::~xpath_variable_set()
12506 	{
12507 		for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
12508 			_destroy(_data[i]);
12509 	}
12510 
xpath_variable_set(const xpath_variable_set & rhs)12511 	PUGI__FN xpath_variable_set::xpath_variable_set(const xpath_variable_set& rhs)
12512 	{
12513 		for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
12514 			_data[i] = 0;
12515 
12516 		_assign(rhs);
12517 	}
12518 
operator =(const xpath_variable_set & rhs)12519 	PUGI__FN xpath_variable_set& xpath_variable_set::operator=(const xpath_variable_set& rhs)
12520 	{
12521 		if (this == &rhs) return *this;
12522 
12523 		_assign(rhs);
12524 
12525 		return *this;
12526 	}
12527 
12528 #ifdef PUGIXML_HAS_MOVE
xpath_variable_set(xpath_variable_set && rhs)12529 	PUGI__FN xpath_variable_set::xpath_variable_set(xpath_variable_set&& rhs) PUGIXML_NOEXCEPT
12530 	{
12531 		for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
12532 		{
12533 			_data[i] = rhs._data[i];
12534 			rhs._data[i] = 0;
12535 		}
12536 	}
12537 
operator =(xpath_variable_set && rhs)12538 	PUGI__FN xpath_variable_set& xpath_variable_set::operator=(xpath_variable_set&& rhs) PUGIXML_NOEXCEPT
12539 	{
12540 		for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
12541 		{
12542 			_destroy(_data[i]);
12543 
12544 			_data[i] = rhs._data[i];
12545 			rhs._data[i] = 0;
12546 		}
12547 
12548 		return *this;
12549 	}
12550 #endif
12551 
_assign(const xpath_variable_set & rhs)12552 	PUGI__FN void xpath_variable_set::_assign(const xpath_variable_set& rhs)
12553 	{
12554 		xpath_variable_set temp;
12555 
12556 		for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
12557 			if (rhs._data[i] && !_clone(rhs._data[i], &temp._data[i]))
12558 				return;
12559 
12560 		_swap(temp);
12561 	}
12562 
_swap(xpath_variable_set & rhs)12563 	PUGI__FN void xpath_variable_set::_swap(xpath_variable_set& rhs)
12564 	{
12565 		for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
12566 		{
12567 			xpath_variable* chain = _data[i];
12568 
12569 			_data[i] = rhs._data[i];
12570 			rhs._data[i] = chain;
12571 		}
12572 	}
12573 
_find(const char_t * name) const12574 	PUGI__FN xpath_variable* xpath_variable_set::_find(const char_t* name) const
12575 	{
12576 		const size_t hash_size = sizeof(_data) / sizeof(_data[0]);
12577 		size_t hash = impl::hash_string(name) % hash_size;
12578 
12579 		// look for existing variable
12580 		for (xpath_variable* var = _data[hash]; var; var = var->_next)
12581 			if (impl::strequal(var->name(), name))
12582 				return var;
12583 
12584 		return 0;
12585 	}
12586 
_clone(xpath_variable * var,xpath_variable ** out_result)12587 	PUGI__FN bool xpath_variable_set::_clone(xpath_variable* var, xpath_variable** out_result)
12588 	{
12589 		xpath_variable* last = 0;
12590 
12591 		while (var)
12592 		{
12593 			// allocate storage for new variable
12594 			xpath_variable* nvar = impl::new_xpath_variable(var->_type, var->name());
12595 			if (!nvar) return false;
12596 
12597 			// link the variable to the result immediately to handle failures gracefully
12598 			if (last)
12599 				last->_next = nvar;
12600 			else
12601 				*out_result = nvar;
12602 
12603 			last = nvar;
12604 
12605 			// copy the value; this can fail due to out-of-memory conditions
12606 			if (!impl::copy_xpath_variable(nvar, var)) return false;
12607 
12608 			var = var->_next;
12609 		}
12610 
12611 		return true;
12612 	}
12613 
_destroy(xpath_variable * var)12614 	PUGI__FN void xpath_variable_set::_destroy(xpath_variable* var)
12615 	{
12616 		while (var)
12617 		{
12618 			xpath_variable* next = var->_next;
12619 
12620 			impl::delete_xpath_variable(var->_type, var);
12621 
12622 			var = next;
12623 		}
12624 	}
12625 
add(const char_t * name,xpath_value_type type)12626 	PUGI__FN xpath_variable* xpath_variable_set::add(const char_t* name, xpath_value_type type)
12627 	{
12628 		const size_t hash_size = sizeof(_data) / sizeof(_data[0]);
12629 		size_t hash = impl::hash_string(name) % hash_size;
12630 
12631 		// look for existing variable
12632 		for (xpath_variable* var = _data[hash]; var; var = var->_next)
12633 			if (impl::strequal(var->name(), name))
12634 				return var->type() == type ? var : 0;
12635 
12636 		// add new variable
12637 		xpath_variable* result = impl::new_xpath_variable(type, name);
12638 
12639 		if (result)
12640 		{
12641 			result->_next = _data[hash];
12642 
12643 			_data[hash] = result;
12644 		}
12645 
12646 		return result;
12647 	}
12648 
set(const char_t * name,bool value)12649 	PUGI__FN bool xpath_variable_set::set(const char_t* name, bool value)
12650 	{
12651 		xpath_variable* var = add(name, xpath_type_boolean);
12652 		return var ? var->set(value) : false;
12653 	}
12654 
set(const char_t * name,double value)12655 	PUGI__FN bool xpath_variable_set::set(const char_t* name, double value)
12656 	{
12657 		xpath_variable* var = add(name, xpath_type_number);
12658 		return var ? var->set(value) : false;
12659 	}
12660 
set(const char_t * name,const char_t * value)12661 	PUGI__FN bool xpath_variable_set::set(const char_t* name, const char_t* value)
12662 	{
12663 		xpath_variable* var = add(name, xpath_type_string);
12664 		return var ? var->set(value) : false;
12665 	}
12666 
set(const char_t * name,const xpath_node_set & value)12667 	PUGI__FN bool xpath_variable_set::set(const char_t* name, const xpath_node_set& value)
12668 	{
12669 		xpath_variable* var = add(name, xpath_type_node_set);
12670 		return var ? var->set(value) : false;
12671 	}
12672 
get(const char_t * name)12673 	PUGI__FN xpath_variable* xpath_variable_set::get(const char_t* name)
12674 	{
12675 		return _find(name);
12676 	}
12677 
get(const char_t * name) const12678 	PUGI__FN const xpath_variable* xpath_variable_set::get(const char_t* name) const
12679 	{
12680 		return _find(name);
12681 	}
12682 
xpath_query(const char_t * query,xpath_variable_set * variables)12683 	PUGI__FN xpath_query::xpath_query(const char_t* query, xpath_variable_set* variables): _impl(0)
12684 	{
12685 		impl::xpath_query_impl* qimpl = impl::xpath_query_impl::create();
12686 
12687 		if (!qimpl)
12688 		{
12689 		#ifdef PUGIXML_NO_EXCEPTIONS
12690 			_result.error = "Out of memory";
12691 		#else
12692 			throw std::bad_alloc();
12693 		#endif
12694 		}
12695 		else
12696 		{
12697 			using impl::auto_deleter; // MSVC7 workaround
12698 			auto_deleter<impl::xpath_query_impl> impl(qimpl, impl::xpath_query_impl::destroy);
12699 
12700 			qimpl->root = impl::xpath_parser::parse(query, variables, &qimpl->alloc, &_result);
12701 
12702 			if (qimpl->root)
12703 			{
12704 				qimpl->root->optimize(&qimpl->alloc);
12705 
12706 				_impl = impl.release();
12707 				_result.error = 0;
12708 			}
12709 			else
12710 			{
12711 			#ifdef PUGIXML_NO_EXCEPTIONS
12712 				if (qimpl->oom) _result.error = "Out of memory";
12713 			#else
12714 				if (qimpl->oom) throw std::bad_alloc();
12715 				throw xpath_exception(_result);
12716 			#endif
12717 			}
12718 		}
12719 	}
12720 
xpath_query()12721 	PUGI__FN xpath_query::xpath_query(): _impl(0)
12722 	{
12723 	}
12724 
~xpath_query()12725 	PUGI__FN xpath_query::~xpath_query()
12726 	{
12727 		if (_impl)
12728 			impl::xpath_query_impl::destroy(static_cast<impl::xpath_query_impl*>(_impl));
12729 	}
12730 
12731 #ifdef PUGIXML_HAS_MOVE
xpath_query(xpath_query && rhs)12732 	PUGI__FN xpath_query::xpath_query(xpath_query&& rhs) PUGIXML_NOEXCEPT
12733 	{
12734 		_impl = rhs._impl;
12735 		_result = rhs._result;
12736 		rhs._impl = 0;
12737 		rhs._result = xpath_parse_result();
12738 	}
12739 
operator =(xpath_query && rhs)12740 	PUGI__FN xpath_query& xpath_query::operator=(xpath_query&& rhs) PUGIXML_NOEXCEPT
12741 	{
12742 		if (this == &rhs) return *this;
12743 
12744 		if (_impl)
12745 			impl::xpath_query_impl::destroy(static_cast<impl::xpath_query_impl*>(_impl));
12746 
12747 		_impl = rhs._impl;
12748 		_result = rhs._result;
12749 		rhs._impl = 0;
12750 		rhs._result = xpath_parse_result();
12751 
12752 		return *this;
12753 	}
12754 #endif
12755 
return_type() const12756 	PUGI__FN xpath_value_type xpath_query::return_type() const
12757 	{
12758 		if (!_impl) return xpath_type_none;
12759 
12760 		return static_cast<impl::xpath_query_impl*>(_impl)->root->rettype();
12761 	}
12762 
evaluate_boolean(const xpath_node & n) const12763 	PUGI__FN bool xpath_query::evaluate_boolean(const xpath_node& n) const
12764 	{
12765 		if (!_impl) return false;
12766 
12767 		impl::xpath_context c(n, 1, 1);
12768 		impl::xpath_stack_data sd;
12769 
12770 		bool r = static_cast<impl::xpath_query_impl*>(_impl)->root->eval_boolean(c, sd.stack);
12771 
12772 		if (sd.oom)
12773 		{
12774 		#ifdef PUGIXML_NO_EXCEPTIONS
12775 			return false;
12776 		#else
12777 			throw std::bad_alloc();
12778 		#endif
12779 		}
12780 
12781 		return r;
12782 	}
12783 
evaluate_number(const xpath_node & n) const12784 	PUGI__FN double xpath_query::evaluate_number(const xpath_node& n) const
12785 	{
12786 		if (!_impl) return impl::gen_nan();
12787 
12788 		impl::xpath_context c(n, 1, 1);
12789 		impl::xpath_stack_data sd;
12790 
12791 		double r = static_cast<impl::xpath_query_impl*>(_impl)->root->eval_number(c, sd.stack);
12792 
12793 		if (sd.oom)
12794 		{
12795 		#ifdef PUGIXML_NO_EXCEPTIONS
12796 			return impl::gen_nan();
12797 		#else
12798 			throw std::bad_alloc();
12799 		#endif
12800 		}
12801 
12802 		return r;
12803 	}
12804 
12805 #ifndef PUGIXML_NO_STL
evaluate_string(const xpath_node & n) const12806 	PUGI__FN string_t xpath_query::evaluate_string(const xpath_node& n) const
12807 	{
12808 		if (!_impl) return string_t();
12809 
12810 		impl::xpath_context c(n, 1, 1);
12811 		impl::xpath_stack_data sd;
12812 
12813 		impl::xpath_string r = static_cast<impl::xpath_query_impl*>(_impl)->root->eval_string(c, sd.stack);
12814 
12815 		if (sd.oom)
12816 		{
12817 		#ifdef PUGIXML_NO_EXCEPTIONS
12818 			return string_t();
12819 		#else
12820 			throw std::bad_alloc();
12821 		#endif
12822 		}
12823 
12824 		return string_t(r.c_str(), r.length());
12825 	}
12826 #endif
12827 
evaluate_string(char_t * buffer,size_t capacity,const xpath_node & n) const12828 	PUGI__FN size_t xpath_query::evaluate_string(char_t* buffer, size_t capacity, const xpath_node& n) const
12829 	{
12830 		impl::xpath_context c(n, 1, 1);
12831 		impl::xpath_stack_data sd;
12832 
12833 		impl::xpath_string r = _impl ? static_cast<impl::xpath_query_impl*>(_impl)->root->eval_string(c, sd.stack) : impl::xpath_string();
12834 
12835 		if (sd.oom)
12836 		{
12837 		#ifdef PUGIXML_NO_EXCEPTIONS
12838 			r = impl::xpath_string();
12839 		#else
12840 			throw std::bad_alloc();
12841 		#endif
12842 		}
12843 
12844 		size_t full_size = r.length() + 1;
12845 
12846 		if (capacity > 0)
12847 		{
12848 			size_t size = (full_size < capacity) ? full_size : capacity;
12849 			assert(size > 0);
12850 
12851 			memcpy(buffer, r.c_str(), (size - 1) * sizeof(char_t));
12852 			buffer[size - 1] = 0;
12853 		}
12854 
12855 		return full_size;
12856 	}
12857 
evaluate_node_set(const xpath_node & n) const12858 	PUGI__FN xpath_node_set xpath_query::evaluate_node_set(const xpath_node& n) const
12859 	{
12860 		impl::xpath_ast_node* root = impl::evaluate_node_set_prepare(static_cast<impl::xpath_query_impl*>(_impl));
12861 		if (!root) return xpath_node_set();
12862 
12863 		impl::xpath_context c(n, 1, 1);
12864 		impl::xpath_stack_data sd;
12865 
12866 		impl::xpath_node_set_raw r = root->eval_node_set(c, sd.stack, impl::nodeset_eval_all);
12867 
12868 		if (sd.oom)
12869 		{
12870 		#ifdef PUGIXML_NO_EXCEPTIONS
12871 			return xpath_node_set();
12872 		#else
12873 			throw std::bad_alloc();
12874 		#endif
12875 		}
12876 
12877 		return xpath_node_set(r.begin(), r.end(), r.type());
12878 	}
12879 
evaluate_node(const xpath_node & n) const12880 	PUGI__FN xpath_node xpath_query::evaluate_node(const xpath_node& n) const
12881 	{
12882 		impl::xpath_ast_node* root = impl::evaluate_node_set_prepare(static_cast<impl::xpath_query_impl*>(_impl));
12883 		if (!root) return xpath_node();
12884 
12885 		impl::xpath_context c(n, 1, 1);
12886 		impl::xpath_stack_data sd;
12887 
12888 		impl::xpath_node_set_raw r = root->eval_node_set(c, sd.stack, impl::nodeset_eval_first);
12889 
12890 		if (sd.oom)
12891 		{
12892 		#ifdef PUGIXML_NO_EXCEPTIONS
12893 			return xpath_node();
12894 		#else
12895 			throw std::bad_alloc();
12896 		#endif
12897 		}
12898 
12899 		return r.first();
12900 	}
12901 
result() const12902 	PUGI__FN const xpath_parse_result& xpath_query::result() const
12903 	{
12904 		return _result;
12905 	}
12906 
unspecified_bool_xpath_query(xpath_query ***)12907 	PUGI__FN static void unspecified_bool_xpath_query(xpath_query***)
12908 	{
12909 	}
12910 
operator xpath_query::unspecified_bool_type() const12911 	PUGI__FN xpath_query::operator xpath_query::unspecified_bool_type() const
12912 	{
12913 		return _impl ? unspecified_bool_xpath_query : 0;
12914 	}
12915 
operator !() const12916 	PUGI__FN bool xpath_query::operator!() const
12917 	{
12918 		return !_impl;
12919 	}
12920 
select_node(const char_t * query,xpath_variable_set * variables) const12921 	PUGI__FN xpath_node xml_node::select_node(const char_t* query, xpath_variable_set* variables) const
12922 	{
12923 		xpath_query q(query, variables);
12924 		return q.evaluate_node(*this);
12925 	}
12926 
select_node(const xpath_query & query) const12927 	PUGI__FN xpath_node xml_node::select_node(const xpath_query& query) const
12928 	{
12929 		return query.evaluate_node(*this);
12930 	}
12931 
select_nodes(const char_t * query,xpath_variable_set * variables) const12932 	PUGI__FN xpath_node_set xml_node::select_nodes(const char_t* query, xpath_variable_set* variables) const
12933 	{
12934 		xpath_query q(query, variables);
12935 		return q.evaluate_node_set(*this);
12936 	}
12937 
select_nodes(const xpath_query & query) const12938 	PUGI__FN xpath_node_set xml_node::select_nodes(const xpath_query& query) const
12939 	{
12940 		return query.evaluate_node_set(*this);
12941 	}
12942 
select_single_node(const char_t * query,xpath_variable_set * variables) const12943 	PUGI__FN xpath_node xml_node::select_single_node(const char_t* query, xpath_variable_set* variables) const
12944 	{
12945 		xpath_query q(query, variables);
12946 		return q.evaluate_node(*this);
12947 	}
12948 
select_single_node(const xpath_query & query) const12949 	PUGI__FN xpath_node xml_node::select_single_node(const xpath_query& query) const
12950 	{
12951 		return query.evaluate_node(*this);
12952 	}
12953 }
12954 
12955 #endif
12956 
12957 #ifdef __BORLANDC__
12958 #	pragma option pop
12959 #endif
12960 
12961 // Intel C++ does not properly keep warning state for function templates,
12962 // so popping warning state at the end of translation unit leads to warnings in the middle.
12963 #if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
12964 #	pragma warning(pop)
12965 #endif
12966 
12967 #if defined(_MSC_VER) && defined(__c2__)
12968 #	pragma clang diagnostic pop
12969 #endif
12970 
12971 // Undefine all local macros (makes sure we're not leaking macros in header-only mode)
12972 #undef PUGI__NO_INLINE
12973 #undef PUGI__UNLIKELY
12974 #undef PUGI__STATIC_ASSERT
12975 #undef PUGI__DMC_VOLATILE
12976 #undef PUGI__UNSIGNED_OVERFLOW
12977 #undef PUGI__MSVC_CRT_VERSION
12978 #undef PUGI__SNPRINTF
12979 #undef PUGI__NS_BEGIN
12980 #undef PUGI__NS_END
12981 #undef PUGI__FN
12982 #undef PUGI__FN_NO_INLINE
12983 #undef PUGI__GETHEADER_IMPL
12984 #undef PUGI__GETPAGE_IMPL
12985 #undef PUGI__GETPAGE
12986 #undef PUGI__NODETYPE
12987 #undef PUGI__IS_CHARTYPE_IMPL
12988 #undef PUGI__IS_CHARTYPE
12989 #undef PUGI__IS_CHARTYPEX
12990 #undef PUGI__ENDSWITH
12991 #undef PUGI__SKIPWS
12992 #undef PUGI__OPTSET
12993 #undef PUGI__PUSHNODE
12994 #undef PUGI__POPNODE
12995 #undef PUGI__SCANFOR
12996 #undef PUGI__SCANWHILE
12997 #undef PUGI__SCANWHILE_UNROLL
12998 #undef PUGI__ENDSEG
12999 #undef PUGI__THROW_ERROR
13000 #undef PUGI__CHECK_ERROR
13001 
13002 #endif
13003 
13004 /**
13005  * Copyright (c) 2006-2020 Arseny Kapoulkine
13006  *
13007  * Permission is hereby granted, free of charge, to any person
13008  * obtaining a copy of this software and associated documentation
13009  * files (the "Software"), to deal in the Software without
13010  * restriction, including without limitation the rights to use,
13011  * copy, modify, merge, publish, distribute, sublicense, and/or sell
13012  * copies of the Software, and to permit persons to whom the
13013  * Software is furnished to do so, subject to the following
13014  * conditions:
13015  *
13016  * The above copyright notice and this permission notice shall be
13017  * included in all copies or substantial portions of the Software.
13018  *
13019  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
13020  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
13021  * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
13022  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
13023  * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
13024  * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
13025  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
13026  * OTHER DEALINGS IN THE SOFTWARE.
13027  */
13028