1 /**
2  * pugixml parser - version 1.7
3  * --------------------------------------------------------
4  * Copyright (C) 2006-2015, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com)
5  * Report bugs and download new versions at http://pugixml.org/
6  *
7  * This library is distributed under the MIT License. See notice at the end
8  * of this file.
9  *
10  * This work is based on the pugxml parser, which is:
11  * Copyright (C) 2003, by Kristen Wegner (kristen@tima.net)
12  */
13 
14 #ifndef SOURCE_PUGIXML_CPP
15 #define SOURCE_PUGIXML_CPP
16 
17 #include "pugixml.hpp"
18 
19 #include <stdlib.h>
20 #include <stdio.h>
21 #include <string.h>
22 #include <assert.h>
23 #include <limits.h>
24 
25 #ifdef PUGIXML_WCHAR_MODE
26 #	include <wchar.h>
27 #endif
28 
29 #ifndef PUGIXML_NO_XPATH
30 #	include <math.h>
31 #	include <float.h>
32 #	ifdef PUGIXML_NO_EXCEPTIONS
33 #		include <setjmp.h>
34 #	endif
35 #endif
36 
37 #ifndef PUGIXML_NO_STL
38 #	include <istream>
39 #	include <ostream>
40 #	include <string>
41 #endif
42 
43 // For placement new
44 #include <new>
45 
46 #ifdef _MSC_VER
47 #	pragma warning(push)
48 #	pragma warning(disable: 4127) // conditional expression is constant
49 #	pragma warning(disable: 4324) // structure was padded due to __declspec(align())
50 #	pragma warning(disable: 4611) // interaction between '_setjmp' and C++ object destruction is non-portable
51 #	pragma warning(disable: 4702) // unreachable code
52 #	pragma warning(disable: 4996) // this function or variable may be unsafe
53 #	pragma warning(disable: 4793) // function compiled as native: presence of '_setjmp' makes a function unmanaged
54 #endif
55 
56 #ifdef __INTEL_COMPILER
57 #	pragma warning(disable: 177) // function was declared but never referenced
58 #	pragma warning(disable: 279) // controlling expression is constant
59 #	pragma warning(disable: 1478 1786) // function was declared "deprecated"
60 #	pragma warning(disable: 1684) // conversion from pointer to same-sized integral type
61 #endif
62 
63 #if defined(__BORLANDC__) && defined(PUGIXML_HEADER_ONLY)
64 #	pragma warn -8080 // symbol is declared but never used; disabling this inside push/pop bracket does not make the warning go away
65 #endif
66 
67 #ifdef __BORLANDC__
68 #	pragma option push
69 #	pragma warn -8008 // condition is always false
70 #	pragma warn -8066 // unreachable code
71 #endif
72 
73 #ifdef __SNC__
74 // Using diag_push/diag_pop does not disable the warnings inside templates due to a compiler bug
75 #	pragma diag_suppress=178 // function was declared but never referenced
76 #	pragma diag_suppress=237 // controlling expression is constant
77 #endif
78 
79 // Inlining controls
80 #if defined(_MSC_VER) && _MSC_VER >= 1300
81 #	define PUGI__NO_INLINE __declspec(noinline)
82 #elif defined(__GNUC__)
83 #	define PUGI__NO_INLINE __attribute__((noinline))
84 #else
85 #	define PUGI__NO_INLINE
86 #endif
87 
88 // Branch weight controls
89 #if defined(__GNUC__)
90 #	define PUGI__UNLIKELY(cond) __builtin_expect(cond, 0)
91 #else
92 #	define PUGI__UNLIKELY(cond) (cond)
93 #endif
94 
95 // Simple static assertion
96 #define PUGI__STATIC_ASSERT(cond) { static const char condition_failed[(cond) ? 1 : -1] = {0}; (void)condition_failed[0]; }
97 
98 // Digital Mars C++ bug workaround for passing char loaded from memory via stack
99 #ifdef __DMC__
100 #	define PUGI__DMC_VOLATILE volatile
101 #else
102 #	define PUGI__DMC_VOLATILE
103 #endif
104 
105 // Borland C++ bug workaround for not defining ::memcpy depending on header include order (can't always use std::memcpy because some compilers don't have it at all)
106 #if defined(__BORLANDC__) && !defined(__MEM_H_USING_LIST)
107 using std::memcpy;
108 using std::memmove;
109 using std::memset;
110 #endif
111 
112 // In some environments MSVC is a compiler but the CRT lacks certain MSVC-specific features
113 #if defined(_MSC_VER) && !defined(__S3E__)
114 #	define PUGI__MSVC_CRT_VERSION _MSC_VER
115 #endif
116 
117 #ifdef PUGIXML_HEADER_ONLY
118 #	define PUGI__NS_BEGIN namespace pugi { namespace impl {
119 #	define PUGI__NS_END } }
120 #	define PUGI__FN inline
121 #	define PUGI__FN_NO_INLINE inline
122 #else
123 #	if defined(_MSC_VER) && _MSC_VER < 1300 // MSVC6 seems to have an amusing bug with anonymous namespaces inside namespaces
124 #		define PUGI__NS_BEGIN namespace pugi { namespace impl {
125 #		define PUGI__NS_END } }
126 #	else
127 #		define PUGI__NS_BEGIN namespace pugi { namespace impl { namespace {
128 #		define PUGI__NS_END } } }
129 #	endif
130 #	define PUGI__FN
131 #	define PUGI__FN_NO_INLINE PUGI__NO_INLINE
132 #endif
133 
134 // uintptr_t
135 #if !defined(_MSC_VER) || _MSC_VER >= 1600
136 #	include <stdint.h>
137 #else
138 namespace pugi
139 {
140 #	ifndef _UINTPTR_T_DEFINED
141 	typedef size_t uintptr_t;
142 #	endif
143 
144 	typedef unsigned __int8 uint8_t;
145 	typedef unsigned __int16 uint16_t;
146 	typedef unsigned __int32 uint32_t;
147 }
148 #endif
149 
150 // Memory allocation
151 PUGI__NS_BEGIN
default_allocate(size_t size)152 	PUGI__FN void* default_allocate(size_t size)
153 	{
154 		return malloc(size);
155 	}
156 
default_deallocate(void * ptr)157 	PUGI__FN void default_deallocate(void* ptr)
158 	{
159 		free(ptr);
160 	}
161 
162 	template <typename T>
163 	struct xml_memory_management_function_storage
164 	{
165 		static allocation_function allocate;
166 		static deallocation_function deallocate;
167 	};
168 
169 	// Global allocation functions are stored in class statics so that in header mode linker deduplicates them
170 	// Without a template<> we'll get multiple definitions of the same static
171 	template <typename T> allocation_function xml_memory_management_function_storage<T>::allocate = default_allocate;
172 	template <typename T> deallocation_function xml_memory_management_function_storage<T>::deallocate = default_deallocate;
173 
174 	typedef xml_memory_management_function_storage<int> xml_memory;
175 PUGI__NS_END
176 
177 // String utilities
178 PUGI__NS_BEGIN
179 	// Get string length
strlength(const char_t * s)180 	PUGI__FN size_t strlength(const char_t* s)
181 	{
182 		assert(s);
183 
184 	#ifdef PUGIXML_WCHAR_MODE
185 		return wcslen(s);
186 	#else
187 		return strlen(s);
188 	#endif
189 	}
190 
191 	// Compare two strings
strequal(const char_t * src,const char_t * dst)192 	PUGI__FN bool strequal(const char_t* src, const char_t* dst)
193 	{
194 		assert(src && dst);
195 
196 	#ifdef PUGIXML_WCHAR_MODE
197 		return wcscmp(src, dst) == 0;
198 	#else
199 		return strcmp(src, dst) == 0;
200 	#endif
201 	}
202 
203 	// Compare lhs with [rhs_begin, rhs_end)
strequalrange(const char_t * lhs,const char_t * rhs,size_t count)204 	PUGI__FN bool strequalrange(const char_t* lhs, const char_t* rhs, size_t count)
205 	{
206 		for (size_t i = 0; i < count; ++i)
207 			if (lhs[i] != rhs[i])
208 				return false;
209 
210 		return lhs[count] == 0;
211 	}
212 
213 	// Get length of wide string, even if CRT lacks wide character support
strlength_wide(const wchar_t * s)214 	PUGI__FN size_t strlength_wide(const wchar_t* s)
215 	{
216 		assert(s);
217 
218 	#ifdef PUGIXML_WCHAR_MODE
219 		return wcslen(s);
220 	#else
221 		const wchar_t* end = s;
222 		while (*end) end++;
223 		return static_cast<size_t>(end - s);
224 	#endif
225 	}
226 PUGI__NS_END
227 
228 // auto_ptr-like object for exception recovery
229 PUGI__NS_BEGIN
230 	template <typename T, typename D = void(*)(T*)> struct auto_deleter
231 	{
232 		T* data;
233 		D deleter;
234 
auto_deleterauto_deleter235 		auto_deleter(T* data_, D deleter_): data(data_), deleter(deleter_)
236 		{
237 		}
238 
~auto_deleterauto_deleter239 		~auto_deleter()
240 		{
241 			if (data) deleter(data);
242 		}
243 
releaseauto_deleter244 		T* release()
245 		{
246 			T* result = data;
247 			data = 0;
248 			return result;
249 		}
250 	};
251 PUGI__NS_END
252 
253 #ifdef PUGIXML_COMPACT
254 PUGI__NS_BEGIN
255 	class compact_hash_table
256 	{
257 	public:
compact_hash_table()258 		compact_hash_table(): _items(0), _capacity(0), _count(0)
259 		{
260 		}
261 
clear()262 		void clear()
263 		{
264 			if (_items)
265 			{
266 				xml_memory::deallocate(_items);
267 				_items = 0;
268 				_capacity = 0;
269 				_count = 0;
270 			}
271 		}
272 
find(const void * key)273 		void** find(const void* key)
274 		{
275 			assert(key);
276 
277 			if (_capacity == 0) return 0;
278 
279 			size_t hashmod = _capacity - 1;
280 			size_t bucket = hash(key) & hashmod;
281 
282 			for (size_t probe = 0; probe <= hashmod; ++probe)
283 			{
284 				item_t& probe_item = _items[bucket];
285 
286 				if (probe_item.key == key)
287 					return &probe_item.value;
288 
289 				if (probe_item.key == 0)
290 					return 0;
291 
292 				// hash collision, quadratic probing
293 				bucket = (bucket + probe + 1) & hashmod;
294 			}
295 
296 			assert(!"Hash table is full");
297 			return 0;
298 		}
299 
insert(const void * key)300 		void** insert(const void* key)
301 		{
302 			assert(key);
303 			assert(_capacity != 0 && _count < _capacity - _capacity / 4);
304 
305 			size_t hashmod = _capacity - 1;
306 			size_t bucket = hash(key) & hashmod;
307 
308 			for (size_t probe = 0; probe <= hashmod; ++probe)
309 			{
310 				item_t& probe_item = _items[bucket];
311 
312 				if (probe_item.key == 0)
313 				{
314 					probe_item.key = key;
315 					_count++;
316 					return &probe_item.value;
317 				}
318 
319 				if (probe_item.key == key)
320 					return &probe_item.value;
321 
322 				// hash collision, quadratic probing
323 				bucket = (bucket + probe + 1) & hashmod;
324 			}
325 
326 			assert(!"Hash table is full");
327 			return 0;
328 		}
329 
reserve()330 		bool reserve()
331 		{
332 			if (_count + 16 >= _capacity - _capacity / 4)
333 				return rehash();
334 
335 			return true;
336 		}
337 
338 	private:
339 		struct item_t
340 		{
341 			const void* key;
342 			void* value;
343 		};
344 
345 		item_t* _items;
346 		size_t _capacity;
347 
348 		size_t _count;
349 
350 		bool rehash();
351 
hash(const void * key)352 		static unsigned int hash(const void* key)
353 		{
354 			unsigned int h = static_cast<unsigned int>(reinterpret_cast<uintptr_t>(key));
355 
356 			// MurmurHash3 32-bit finalizer
357 			h ^= h >> 16;
358 			h *= 0x85ebca6bu;
359 			h ^= h >> 13;
360 			h *= 0xc2b2ae35u;
361 			h ^= h >> 16;
362 
363 			return h;
364 		}
365 	};
366 
rehash()367 	PUGI__FN_NO_INLINE bool compact_hash_table::rehash()
368 	{
369 		compact_hash_table rt;
370 		rt._capacity = (_capacity == 0) ? 32 : _capacity * 2;
371 		rt._items = static_cast<item_t*>(xml_memory::allocate(sizeof(item_t) * rt._capacity));
372 
373 		if (!rt._items)
374 			return false;
375 
376 		memset(rt._items, 0, sizeof(item_t) * rt._capacity);
377 
378 		for (size_t i = 0; i < _capacity; ++i)
379 			if (_items[i].key)
380 				*rt.insert(_items[i].key) = _items[i].value;
381 
382 		if (_items)
383 			xml_memory::deallocate(_items);
384 
385 		_capacity = rt._capacity;
386 		_items = rt._items;
387 
388 		assert(_count == rt._count);
389 
390 		return true;
391 	}
392 
393 PUGI__NS_END
394 #endif
395 
396 PUGI__NS_BEGIN
397 	static const size_t xml_memory_page_size =
398 	#ifdef PUGIXML_MEMORY_PAGE_SIZE
399 		PUGIXML_MEMORY_PAGE_SIZE
400 	#else
401 		32768
402 	#endif
403 		;
404 
405 #ifdef PUGIXML_COMPACT
406 	static const uintptr_t xml_memory_block_alignment = 4;
407 
408 	static const uintptr_t xml_memory_page_alignment = sizeof(void*);
409 #else
410 	static const uintptr_t xml_memory_block_alignment = sizeof(void*);
411 
412 	static const uintptr_t xml_memory_page_alignment = 64;
413 	static const uintptr_t xml_memory_page_pointer_mask = ~(xml_memory_page_alignment - 1);
414 #endif
415 
416 	// extra metadata bits
417 	static const uintptr_t xml_memory_page_contents_shared_mask = 32;
418 	static const uintptr_t xml_memory_page_name_allocated_mask = 16;
419 	static const uintptr_t xml_memory_page_value_allocated_mask = 8;
420 	static const uintptr_t xml_memory_page_type_mask = 7;
421 
422 	// combined masks for string uniqueness
423 	static const uintptr_t xml_memory_page_name_allocated_or_shared_mask = xml_memory_page_name_allocated_mask | xml_memory_page_contents_shared_mask;
424 	static const uintptr_t xml_memory_page_value_allocated_or_shared_mask = xml_memory_page_value_allocated_mask | xml_memory_page_contents_shared_mask;
425 
426 #ifdef PUGIXML_COMPACT
427 	#define PUGI__GETPAGE_IMPL(header) (header).get_page()
428 #else
429 	#define PUGI__GETPAGE_IMPL(header) reinterpret_cast<impl::xml_memory_page*>((header) & impl::xml_memory_page_pointer_mask)
430 #endif
431 
432 	#define PUGI__GETPAGE(n) PUGI__GETPAGE_IMPL((n)->header)
433 	#define PUGI__NODETYPE(n) static_cast<xml_node_type>(((n)->header & impl::xml_memory_page_type_mask) + 1)
434 
435 	struct xml_allocator;
436 
437 	struct xml_memory_page
438 	{
constructxml_memory_page439 		static xml_memory_page* construct(void* memory)
440 		{
441 			xml_memory_page* result = static_cast<xml_memory_page*>(memory);
442 
443 			result->allocator = 0;
444 			result->prev = 0;
445 			result->next = 0;
446 			result->busy_size = 0;
447 			result->freed_size = 0;
448 
449 		#ifdef PUGIXML_COMPACT
450 			result->compact_string_base = 0;
451 			result->compact_shared_parent = 0;
452 			result->compact_page_marker = 0;
453 		#endif
454 
455 			return result;
456 		}
457 
458 		xml_allocator* allocator;
459 
460 		xml_memory_page* prev;
461 		xml_memory_page* next;
462 
463 		size_t busy_size;
464 		size_t freed_size;
465 
466 	#ifdef PUGIXML_COMPACT
467 		char_t* compact_string_base;
468 		void* compact_shared_parent;
469 		uint32_t* compact_page_marker;
470 	#endif
471 	};
472 
473 	struct xml_memory_string_header
474 	{
475 		uint16_t page_offset; // offset from page->data
476 		uint16_t full_size; // 0 if string occupies whole page
477 	};
478 
479 	struct xml_allocator
480 	{
xml_allocatorxml_allocator481 		xml_allocator(xml_memory_page* root): _root(root), _busy_size(root->busy_size)
482 		{
483 		#ifdef PUGIXML_COMPACT
484 			_hash = 0;
485 		#endif
486 		}
487 
allocate_pagexml_allocator488 		xml_memory_page* allocate_page(size_t data_size)
489 		{
490 			size_t size = sizeof(xml_memory_page) + data_size;
491 
492 			// allocate block with some alignment, leaving memory for worst-case padding
493 			void* memory = xml_memory::allocate(size + xml_memory_page_alignment);
494 			if (!memory) return 0;
495 
496 			// align to next page boundary (note: this guarantees at least 1 usable byte before the page)
497 			char* page_memory = reinterpret_cast<char*>((reinterpret_cast<uintptr_t>(memory) + xml_memory_page_alignment) & ~(xml_memory_page_alignment - 1));
498 
499 			// prepare page structure
500 			xml_memory_page* page = xml_memory_page::construct(page_memory);
501 			assert(page);
502 
503 			page->allocator = _root->allocator;
504 
505 			// record the offset for freeing the memory block
506 			assert(page_memory > memory && page_memory - static_cast<char*>(memory) <= 127);
507 			page_memory[-1] = static_cast<char>(page_memory - static_cast<char*>(memory));
508 
509 			return page;
510 		}
511 
deallocate_pagexml_allocator512 		static void deallocate_page(xml_memory_page* page)
513 		{
514 			char* page_memory = reinterpret_cast<char*>(page);
515 
516 			xml_memory::deallocate(page_memory - page_memory[-1]);
517 		}
518 
519 		void* allocate_memory_oob(size_t size, xml_memory_page*& out_page);
520 
allocate_memoryxml_allocator521 		void* allocate_memory(size_t size, xml_memory_page*& out_page)
522 		{
523 			if (PUGI__UNLIKELY(_busy_size + size > xml_memory_page_size))
524 				return allocate_memory_oob(size, out_page);
525 
526 			void* buf = reinterpret_cast<char*>(_root) + sizeof(xml_memory_page) + _busy_size;
527 
528 			_busy_size += size;
529 
530 			out_page = _root;
531 
532 			return buf;
533 		}
534 
535 	#ifdef PUGIXML_COMPACT
allocate_objectxml_allocator536 		void* allocate_object(size_t size, xml_memory_page*& out_page)
537 		{
538 			void* result = allocate_memory(size + sizeof(uint32_t), out_page);
539 			if (!result) return 0;
540 
541 			// adjust for marker
542 			ptrdiff_t offset = static_cast<char*>(result) - reinterpret_cast<char*>(out_page->compact_page_marker);
543 
544 			if (PUGI__UNLIKELY(static_cast<uintptr_t>(offset) >= 256 * xml_memory_block_alignment))
545 			{
546 				// insert new marker
547 				uint32_t* marker = static_cast<uint32_t*>(result);
548 
549 				*marker = static_cast<uint32_t>(reinterpret_cast<char*>(marker) - reinterpret_cast<char*>(out_page));
550 				out_page->compact_page_marker = marker;
551 
552 				// since we don't reuse the page space until we reallocate it, we can just pretend that we freed the marker block
553 				// this will make sure deallocate_memory correctly tracks the size
554 				out_page->freed_size += sizeof(uint32_t);
555 
556 				return marker + 1;
557 			}
558 			else
559 			{
560 				// roll back uint32_t part
561 				_busy_size -= sizeof(uint32_t);
562 
563 				return result;
564 			}
565 		}
566 	#else
allocate_objectxml_allocator567 		void* allocate_object(size_t size, xml_memory_page*& out_page)
568 		{
569 			return allocate_memory(size, out_page);
570 		}
571 	#endif
572 
deallocate_memoryxml_allocator573 		void deallocate_memory(void* ptr, size_t size, xml_memory_page* page)
574 		{
575 			if (page == _root) page->busy_size = _busy_size;
576 
577 			assert(ptr >= reinterpret_cast<char*>(page) + sizeof(xml_memory_page) && ptr < reinterpret_cast<char*>(page) + sizeof(xml_memory_page) + page->busy_size);
578 			(void)!ptr;
579 
580 			page->freed_size += size;
581 			assert(page->freed_size <= page->busy_size);
582 
583 			if (page->freed_size == page->busy_size)
584 			{
585 				if (page->next == 0)
586 				{
587 					assert(_root == page);
588 
589 					// top page freed, just reset sizes
590 					page->busy_size = 0;
591 					page->freed_size = 0;
592 
593 				#ifdef PUGIXML_COMPACT
594 					// reset compact state to maximize efficiency
595 					page->compact_string_base = 0;
596 					page->compact_shared_parent = 0;
597 					page->compact_page_marker = 0;
598 				#endif
599 
600 					_busy_size = 0;
601 				}
602 				else
603 				{
604 					assert(_root != page);
605 					assert(page->prev);
606 
607 					// remove from the list
608 					page->prev->next = page->next;
609 					page->next->prev = page->prev;
610 
611 					// deallocate
612 					deallocate_page(page);
613 				}
614 			}
615 		}
616 
allocate_stringxml_allocator617 		char_t* allocate_string(size_t length)
618 		{
619 			static const size_t max_encoded_offset = (1 << 16) * xml_memory_block_alignment;
620 
621 			PUGI__STATIC_ASSERT(xml_memory_page_size <= max_encoded_offset);
622 
623 			// allocate memory for string and header block
624 			size_t size = sizeof(xml_memory_string_header) + length * sizeof(char_t);
625 
626 			// round size up to block alignment boundary
627 			size_t full_size = (size + (xml_memory_block_alignment - 1)) & ~(xml_memory_block_alignment - 1);
628 
629 			xml_memory_page* page;
630 			xml_memory_string_header* header = static_cast<xml_memory_string_header*>(allocate_memory(full_size, page));
631 
632 			if (!header) return 0;
633 
634 			// setup header
635 			ptrdiff_t page_offset = reinterpret_cast<char*>(header) - reinterpret_cast<char*>(page) - sizeof(xml_memory_page);
636 
637 			assert(page_offset % xml_memory_block_alignment == 0);
638 			assert(page_offset >= 0 && static_cast<size_t>(page_offset) < max_encoded_offset);
639 			header->page_offset = static_cast<uint16_t>(static_cast<size_t>(page_offset) / xml_memory_block_alignment);
640 
641 			// full_size == 0 for large strings that occupy the whole page
642 			assert(full_size % xml_memory_block_alignment == 0);
643 			assert(full_size < max_encoded_offset || (page->busy_size == full_size && page_offset == 0));
644 			header->full_size = static_cast<uint16_t>(full_size < max_encoded_offset ? full_size / xml_memory_block_alignment : 0);
645 
646 			// round-trip through void* to avoid 'cast increases required alignment of target type' warning
647 			// header is guaranteed a pointer-sized alignment, which should be enough for char_t
648 			return static_cast<char_t*>(static_cast<void*>(header + 1));
649 		}
650 
deallocate_stringxml_allocator651 		void deallocate_string(char_t* string)
652 		{
653 			// this function casts pointers through void* to avoid 'cast increases required alignment of target type' warnings
654 			// we're guaranteed the proper (pointer-sized) alignment on the input string if it was allocated via allocate_string
655 
656 			// get header
657 			xml_memory_string_header* header = static_cast<xml_memory_string_header*>(static_cast<void*>(string)) - 1;
658 			assert(header);
659 
660 			// deallocate
661 			size_t page_offset = sizeof(xml_memory_page) + header->page_offset * xml_memory_block_alignment;
662 			xml_memory_page* page = reinterpret_cast<xml_memory_page*>(static_cast<void*>(reinterpret_cast<char*>(header) - page_offset));
663 
664 			// if full_size == 0 then this string occupies the whole page
665 			size_t full_size = header->full_size == 0 ? page->busy_size : header->full_size * xml_memory_block_alignment;
666 
667 			deallocate_memory(header, full_size, page);
668 		}
669 
reservexml_allocator670 		bool reserve()
671 		{
672 		#ifdef PUGIXML_COMPACT
673 			return _hash->reserve();
674 		#else
675 			return true;
676 		#endif
677 		}
678 
679 		xml_memory_page* _root;
680 		size_t _busy_size;
681 
682 	#ifdef PUGIXML_COMPACT
683 		compact_hash_table* _hash;
684 	#endif
685 	};
686 
allocate_memory_oob(size_t size,xml_memory_page * & out_page)687 	PUGI__FN_NO_INLINE void* xml_allocator::allocate_memory_oob(size_t size, xml_memory_page*& out_page)
688 	{
689 		const size_t large_allocation_threshold = xml_memory_page_size / 4;
690 
691 		xml_memory_page* page = allocate_page(size <= large_allocation_threshold ? xml_memory_page_size : size);
692 		out_page = page;
693 
694 		if (!page) return 0;
695 
696 		if (size <= large_allocation_threshold)
697 		{
698 			_root->busy_size = _busy_size;
699 
700 			// insert page at the end of linked list
701 			page->prev = _root;
702 			_root->next = page;
703 			_root = page;
704 
705 			_busy_size = size;
706 		}
707 		else
708 		{
709 			// insert page before the end of linked list, so that it is deleted as soon as possible
710 			// the last page is not deleted even if it's empty (see deallocate_memory)
711 			assert(_root->prev);
712 
713 			page->prev = _root->prev;
714 			page->next = _root;
715 
716 			_root->prev->next = page;
717 			_root->prev = page;
718 
719 			page->busy_size = size;
720 		}
721 
722 		return reinterpret_cast<char*>(page) + sizeof(xml_memory_page);
723 	}
724 PUGI__NS_END
725 
726 #ifdef PUGIXML_COMPACT
727 PUGI__NS_BEGIN
728 	static const uintptr_t compact_alignment_log2 = 2;
729 	static const uintptr_t compact_alignment = 1 << compact_alignment_log2;
730 
731 	class compact_header
732 	{
733 	public:
compact_header(xml_memory_page * page,unsigned int flags)734 		compact_header(xml_memory_page* page, unsigned int flags)
735 		{
736 			PUGI__STATIC_ASSERT(xml_memory_block_alignment == compact_alignment);
737 
738 			ptrdiff_t offset = (reinterpret_cast<char*>(this) - reinterpret_cast<char*>(page->compact_page_marker));
739 			assert(offset % compact_alignment == 0 && static_cast<uintptr_t>(offset) < 256 * compact_alignment);
740 
741 			_page = static_cast<unsigned char>(offset >> compact_alignment_log2);
742 			_flags = static_cast<unsigned char>(flags);
743 		}
744 
operator &=(uintptr_t mod)745 		void operator&=(uintptr_t mod)
746 		{
747 			_flags &= static_cast<unsigned char>(mod);
748 		}
749 
operator |=(uintptr_t mod)750 		void operator|=(uintptr_t mod)
751 		{
752 			_flags |= static_cast<unsigned char>(mod);
753 		}
754 
operator &(uintptr_t mod) const755 		uintptr_t operator&(uintptr_t mod) const
756 		{
757 			return _flags & mod;
758 		}
759 
get_page() const760 		xml_memory_page* get_page() const
761 		{
762 			// round-trip through void* to silence 'cast increases required alignment of target type' warnings
763 			const char* page_marker = reinterpret_cast<const char*>(this) - (_page << compact_alignment_log2);
764 			const char* page = page_marker - *reinterpret_cast<const uint32_t*>(static_cast<const void*>(page_marker));
765 
766 			return const_cast<xml_memory_page*>(reinterpret_cast<const xml_memory_page*>(static_cast<const void*>(page)));
767 		}
768 
769 	private:
770 		unsigned char _page;
771 		unsigned char _flags;
772 	};
773 
compact_get_page(const void * object,int header_offset)774 	PUGI__FN xml_memory_page* compact_get_page(const void* object, int header_offset)
775 	{
776 		const compact_header* header = reinterpret_cast<const compact_header*>(static_cast<const char*>(object) - header_offset);
777 
778 		return header->get_page();
779 	}
780 
compact_get_value(const void * object)781 	template <int header_offset, typename T> PUGI__FN_NO_INLINE T* compact_get_value(const void* object)
782 	{
783 		return static_cast<T*>(*compact_get_page(object, header_offset)->allocator->_hash->find(object));
784 	}
785 
compact_set_value(const void * object,T * value)786 	template <int header_offset, typename T> PUGI__FN_NO_INLINE void compact_set_value(const void* object, T* value)
787 	{
788 		*compact_get_page(object, header_offset)->allocator->_hash->insert(object) = value;
789 	}
790 
791 	template <typename T, int header_offset, int start = -126> class compact_pointer
792 	{
793 	public:
compact_pointer()794 		compact_pointer(): _data(0)
795 		{
796 		}
797 
operator =(const compact_pointer & rhs)798 		void operator=(const compact_pointer& rhs)
799 		{
800 			*this = rhs + 0;
801 		}
802 
operator =(T * value)803 		void operator=(T* value)
804 		{
805 			if (value)
806 			{
807 				// value is guaranteed to be compact-aligned; 'this' is not
808 				// our decoding is based on 'this' aligned to compact alignment downwards (see operator T*)
809 				// so for negative offsets (e.g. -3) we need to adjust the diff by compact_alignment - 1 to
810 				// compensate for arithmetic shift rounding for negative values
811 				ptrdiff_t diff = reinterpret_cast<char*>(value) - reinterpret_cast<char*>(this);
812 				ptrdiff_t offset = ((diff + int(compact_alignment - 1)) >> compact_alignment_log2) - start;
813 
814 				if (static_cast<uintptr_t>(offset) <= 253)
815 					_data = static_cast<unsigned char>(offset + 1);
816 				else
817 				{
818 					compact_set_value<header_offset>(this, value);
819 
820 					_data = 255;
821 				}
822 			}
823 			else
824 				_data = 0;
825 		}
826 
operator T*() const827 		operator T*() const
828 		{
829 			if (_data)
830 			{
831 				if (_data < 255)
832 				{
833 					uintptr_t base = reinterpret_cast<uintptr_t>(this) & ~(compact_alignment - 1);
834 
835 					return reinterpret_cast<T*>(base + ((_data - 1 + start) << compact_alignment_log2));
836 				}
837 				else
838 					return compact_get_value<header_offset, T>(this);
839 			}
840 			else
841 				return 0;
842 		}
843 
operator ->() const844 		T* operator->() const
845 		{
846 			return *this;
847 		}
848 
849 	private:
850 		unsigned char _data;
851 	};
852 
853 	template <typename T, int header_offset> class compact_pointer_parent
854 	{
855 	public:
compact_pointer_parent()856 		compact_pointer_parent(): _data(0)
857 		{
858 		}
859 
operator =(const compact_pointer_parent & rhs)860 		void operator=(const compact_pointer_parent& rhs)
861 		{
862 			*this = rhs + 0;
863 		}
864 
operator =(T * value)865 		void operator=(T* value)
866 		{
867 			if (value)
868 			{
869 				// value is guaranteed to be compact-aligned; 'this' is not
870 				// our decoding is based on 'this' aligned to compact alignment downwards (see operator T*)
871 				// so for negative offsets (e.g. -3) we need to adjust the diff by compact_alignment - 1 to
872 				// compensate for arithmetic shift behavior for negative values
873 				ptrdiff_t diff = reinterpret_cast<char*>(value) - reinterpret_cast<char*>(this);
874 				ptrdiff_t offset = ((diff + int(compact_alignment - 1)) >> compact_alignment_log2) + 65533;
875 
876 				if (static_cast<uintptr_t>(offset) <= 65533)
877 				{
878 					_data = static_cast<unsigned short>(offset + 1);
879 				}
880 				else
881 				{
882 					xml_memory_page* page = compact_get_page(this, header_offset);
883 
884 					if (PUGI__UNLIKELY(page->compact_shared_parent == 0))
885 						page->compact_shared_parent = value;
886 
887 					if (page->compact_shared_parent == value)
888 					{
889 						_data = 65534;
890 					}
891 					else
892 					{
893 						compact_set_value<header_offset>(this, value);
894 
895 						_data = 65535;
896 					}
897 				}
898 			}
899 			else
900 			{
901 				_data = 0;
902 			}
903 		}
904 
operator T*() const905 		operator T*() const
906 		{
907 			if (_data)
908 			{
909 				if (_data < 65534)
910 				{
911 					uintptr_t base = reinterpret_cast<uintptr_t>(this) & ~(compact_alignment - 1);
912 
913 					return reinterpret_cast<T*>(base + ((_data - 1 - 65533) << compact_alignment_log2));
914 				}
915 				else if (_data == 65534)
916 					return static_cast<T*>(compact_get_page(this, header_offset)->compact_shared_parent);
917 				else
918 					return compact_get_value<header_offset, T>(this);
919 			}
920 			else
921 				return 0;
922 		}
923 
operator ->() const924 		T* operator->() const
925 		{
926 			return *this;
927 		}
928 
929 	private:
930 		uint16_t _data;
931 	};
932 
933 	template <int header_offset, int base_offset> class compact_string
934 	{
935 	public:
compact_string()936 		compact_string(): _data(0)
937 		{
938 		}
939 
operator =(const compact_string & rhs)940 		void operator=(const compact_string& rhs)
941 		{
942 			*this = rhs + 0;
943 		}
944 
operator =(char_t * value)945 		void operator=(char_t* value)
946 		{
947 			if (value)
948 			{
949 				xml_memory_page* page = compact_get_page(this, header_offset);
950 
951 				if (PUGI__UNLIKELY(page->compact_string_base == 0))
952 					page->compact_string_base = value;
953 
954 				ptrdiff_t offset = value - page->compact_string_base;
955 
956 				if (static_cast<uintptr_t>(offset) < (65535 << 7))
957 				{
958 					// round-trip through void* to silence 'cast increases required alignment of target type' warnings
959 					uint16_t* base = reinterpret_cast<uint16_t*>(static_cast<void*>(reinterpret_cast<char*>(this) - base_offset));
960 
961 					if (*base == 0)
962 					{
963 						*base = static_cast<uint16_t>((offset >> 7) + 1);
964 						_data = static_cast<unsigned char>((offset & 127) + 1);
965 					}
966 					else
967 					{
968 						ptrdiff_t remainder = offset - ((*base - 1) << 7);
969 
970 						if (static_cast<uintptr_t>(remainder) <= 253)
971 						{
972 							_data = static_cast<unsigned char>(remainder + 1);
973 						}
974 						else
975 						{
976 							compact_set_value<header_offset>(this, value);
977 
978 							_data = 255;
979 						}
980 					}
981 				}
982 				else
983 				{
984 					compact_set_value<header_offset>(this, value);
985 
986 					_data = 255;
987 				}
988 			}
989 			else
990 			{
991 				_data = 0;
992 			}
993 		}
994 
operator char_t*() const995 		operator char_t*() const
996 		{
997 			if (_data)
998 			{
999 				if (_data < 255)
1000 				{
1001 					xml_memory_page* page = compact_get_page(this, header_offset);
1002 
1003 					// round-trip through void* to silence 'cast increases required alignment of target type' warnings
1004 					const uint16_t* base = reinterpret_cast<const uint16_t*>(static_cast<const void*>(reinterpret_cast<const char*>(this) - base_offset));
1005 					assert(*base);
1006 
1007 					ptrdiff_t offset = ((*base - 1) << 7) + (_data - 1);
1008 
1009 					return page->compact_string_base + offset;
1010 				}
1011 				else
1012 				{
1013 					return compact_get_value<header_offset, char_t>(this);
1014 				}
1015 			}
1016 			else
1017 				return 0;
1018 		}
1019 
1020 	private:
1021 		unsigned char _data;
1022 	};
1023 PUGI__NS_END
1024 #endif
1025 
1026 #ifdef PUGIXML_COMPACT
1027 namespace pugi
1028 {
1029 	struct xml_attribute_struct
1030 	{
xml_attribute_structpugi::xml_attribute_struct1031 		xml_attribute_struct(impl::xml_memory_page* page): header(page, 0), namevalue_base(0)
1032 		{
1033 			PUGI__STATIC_ASSERT(sizeof(xml_attribute_struct) == 8);
1034 		}
1035 
1036 		impl::compact_header header;
1037 
1038 		uint16_t namevalue_base;
1039 
1040 		impl::compact_string<4, 2> name;
1041 		impl::compact_string<5, 3> value;
1042 
1043 		impl::compact_pointer<xml_attribute_struct, 6> prev_attribute_c;
1044 		impl::compact_pointer<xml_attribute_struct, 7, 0> next_attribute;
1045 	};
1046 
1047 	struct xml_node_struct
1048 	{
xml_node_structpugi::xml_node_struct1049 		xml_node_struct(impl::xml_memory_page* page, xml_node_type type): header(page, type - 1), namevalue_base(0)
1050 		{
1051 			PUGI__STATIC_ASSERT(sizeof(xml_node_struct) == 12);
1052 		}
1053 
1054 		impl::compact_header header;
1055 
1056 		uint16_t namevalue_base;
1057 
1058 		impl::compact_string<4, 2> name;
1059 		impl::compact_string<5, 3> value;
1060 
1061 		impl::compact_pointer_parent<xml_node_struct, 6> parent;
1062 
1063 		impl::compact_pointer<xml_node_struct, 8, 0> first_child;
1064 
1065 		impl::compact_pointer<xml_node_struct,  9>    prev_sibling_c;
1066 		impl::compact_pointer<xml_node_struct, 10, 0> next_sibling;
1067 
1068 		impl::compact_pointer<xml_attribute_struct, 11, 0> first_attribute;
1069 	};
1070 }
1071 #else
1072 namespace pugi
1073 {
1074 	struct xml_attribute_struct
1075 	{
1076 		xml_attribute_struct(impl::xml_memory_page* page): header(reinterpret_cast<uintptr_t>(page)), name(0), value(0), prev_attribute_c(0), next_attribute(0)
1077 		{
1078 		}
1079 
1080 		uintptr_t header;
1081 
1082 		char_t*	name;
1083 		char_t*	value;
1084 
1085 		xml_attribute_struct* prev_attribute_c;
1086 		xml_attribute_struct* next_attribute;
1087 	};
1088 
1089 	struct xml_node_struct
1090 	{
1091 		xml_node_struct(impl::xml_memory_page* page, xml_node_type type): header(reinterpret_cast<uintptr_t>(page) | (type - 1)), name(0), value(0), parent(0), first_child(0), prev_sibling_c(0), next_sibling(0), first_attribute(0)
1092 		{
1093 		}
1094 
1095 		uintptr_t header;
1096 
1097 		char_t* name;
1098 		char_t* value;
1099 
1100 		xml_node_struct* parent;
1101 
1102 		xml_node_struct* first_child;
1103 
1104 		xml_node_struct* prev_sibling_c;
1105 		xml_node_struct* next_sibling;
1106 
1107 		xml_attribute_struct* first_attribute;
1108 	};
1109 }
1110 #endif
1111 
1112 PUGI__NS_BEGIN
1113 	struct xml_extra_buffer
1114 	{
1115 		char_t* buffer;
1116 		xml_extra_buffer* next;
1117 	};
1118 
1119 	struct xml_document_struct: public xml_node_struct, public xml_allocator
1120 	{
xml_document_structxml_document_struct1121 		xml_document_struct(xml_memory_page* page): xml_node_struct(page, node_document), xml_allocator(page), buffer(0), extra_buffers(0)
1122 		{
1123 		#ifdef PUGIXML_COMPACT
1124 			_hash = &hash;
1125 		#endif
1126 		}
1127 
1128 		const char_t* buffer;
1129 
1130 		xml_extra_buffer* extra_buffers;
1131 
1132 	#ifdef PUGIXML_COMPACT
1133 		compact_hash_table hash;
1134 	#endif
1135 	};
1136 
get_allocator(const Object * object)1137 	template <typename Object> inline xml_allocator& get_allocator(const Object* object)
1138 	{
1139 		assert(object);
1140 
1141 		return *PUGI__GETPAGE(object)->allocator;
1142 	}
1143 
get_document(const Object * object)1144 	template <typename Object> inline xml_document_struct& get_document(const Object* object)
1145 	{
1146 		assert(object);
1147 
1148 		return *static_cast<xml_document_struct*>(PUGI__GETPAGE(object)->allocator);
1149 	}
1150 PUGI__NS_END
1151 
1152 // Low-level DOM operations
1153 PUGI__NS_BEGIN
allocate_attribute(xml_allocator & alloc)1154 	inline xml_attribute_struct* allocate_attribute(xml_allocator& alloc)
1155 	{
1156 		xml_memory_page* page;
1157 		void* memory = alloc.allocate_object(sizeof(xml_attribute_struct), page);
1158 		if (!memory) return 0;
1159 
1160 		return new (memory) xml_attribute_struct(page);
1161 	}
1162 
allocate_node(xml_allocator & alloc,xml_node_type type)1163 	inline xml_node_struct* allocate_node(xml_allocator& alloc, xml_node_type type)
1164 	{
1165 		xml_memory_page* page;
1166 		void* memory = alloc.allocate_object(sizeof(xml_node_struct), page);
1167 		if (!memory) return 0;
1168 
1169 		return new (memory) xml_node_struct(page, type);
1170 	}
1171 
destroy_attribute(xml_attribute_struct * a,xml_allocator & alloc)1172 	inline void destroy_attribute(xml_attribute_struct* a, xml_allocator& alloc)
1173 	{
1174 		if (a->header & impl::xml_memory_page_name_allocated_mask)
1175 			alloc.deallocate_string(a->name);
1176 
1177 		if (a->header & impl::xml_memory_page_value_allocated_mask)
1178 			alloc.deallocate_string(a->value);
1179 
1180 		alloc.deallocate_memory(a, sizeof(xml_attribute_struct), PUGI__GETPAGE(a));
1181 	}
1182 
destroy_node(xml_node_struct * n,xml_allocator & alloc)1183 	inline void destroy_node(xml_node_struct* n, xml_allocator& alloc)
1184 	{
1185 		if (n->header & impl::xml_memory_page_name_allocated_mask)
1186 			alloc.deallocate_string(n->name);
1187 
1188 		if (n->header & impl::xml_memory_page_value_allocated_mask)
1189 			alloc.deallocate_string(n->value);
1190 
1191 		for (xml_attribute_struct* attr = n->first_attribute; attr; )
1192 		{
1193 			xml_attribute_struct* next = attr->next_attribute;
1194 
1195 			destroy_attribute(attr, alloc);
1196 
1197 			attr = next;
1198 		}
1199 
1200 		for (xml_node_struct* child = n->first_child; child; )
1201 		{
1202 			xml_node_struct* next = child->next_sibling;
1203 
1204 			destroy_node(child, alloc);
1205 
1206 			child = next;
1207 		}
1208 
1209 		alloc.deallocate_memory(n, sizeof(xml_node_struct), PUGI__GETPAGE(n));
1210 	}
1211 
append_node(xml_node_struct * child,xml_node_struct * node)1212 	inline void append_node(xml_node_struct* child, xml_node_struct* node)
1213 	{
1214 		child->parent = node;
1215 
1216 		xml_node_struct* head = node->first_child;
1217 
1218 		if (head)
1219 		{
1220 			xml_node_struct* tail = head->prev_sibling_c;
1221 
1222 			tail->next_sibling = child;
1223 			child->prev_sibling_c = tail;
1224 			head->prev_sibling_c = child;
1225 		}
1226 		else
1227 		{
1228 			node->first_child = child;
1229 			child->prev_sibling_c = child;
1230 		}
1231 	}
1232 
prepend_node(xml_node_struct * child,xml_node_struct * node)1233 	inline void prepend_node(xml_node_struct* child, xml_node_struct* node)
1234 	{
1235 		child->parent = node;
1236 
1237 		xml_node_struct* head = node->first_child;
1238 
1239 		if (head)
1240 		{
1241 			child->prev_sibling_c = head->prev_sibling_c;
1242 			head->prev_sibling_c = child;
1243 		}
1244 		else
1245 			child->prev_sibling_c = child;
1246 
1247 		child->next_sibling = head;
1248 		node->first_child = child;
1249 	}
1250 
insert_node_after(xml_node_struct * child,xml_node_struct * node)1251 	inline void insert_node_after(xml_node_struct* child, xml_node_struct* node)
1252 	{
1253 		xml_node_struct* parent = node->parent;
1254 
1255 		child->parent = parent;
1256 
1257 		if (node->next_sibling)
1258 			node->next_sibling->prev_sibling_c = child;
1259 		else
1260 			parent->first_child->prev_sibling_c = child;
1261 
1262 		child->next_sibling = node->next_sibling;
1263 		child->prev_sibling_c = node;
1264 
1265 		node->next_sibling = child;
1266 	}
1267 
insert_node_before(xml_node_struct * child,xml_node_struct * node)1268 	inline void insert_node_before(xml_node_struct* child, xml_node_struct* node)
1269 	{
1270 		xml_node_struct* parent = node->parent;
1271 
1272 		child->parent = parent;
1273 
1274 		if (node->prev_sibling_c->next_sibling)
1275 			node->prev_sibling_c->next_sibling = child;
1276 		else
1277 			parent->first_child = child;
1278 
1279 		child->prev_sibling_c = node->prev_sibling_c;
1280 		child->next_sibling = node;
1281 
1282 		node->prev_sibling_c = child;
1283 	}
1284 
remove_node(xml_node_struct * node)1285 	inline void remove_node(xml_node_struct* node)
1286 	{
1287 		xml_node_struct* parent = node->parent;
1288 
1289 		if (node->next_sibling)
1290 			node->next_sibling->prev_sibling_c = node->prev_sibling_c;
1291 		else
1292 			parent->first_child->prev_sibling_c = node->prev_sibling_c;
1293 
1294 		if (node->prev_sibling_c->next_sibling)
1295 			node->prev_sibling_c->next_sibling = node->next_sibling;
1296 		else
1297 			parent->first_child = node->next_sibling;
1298 
1299 		node->parent = 0;
1300 		node->prev_sibling_c = 0;
1301 		node->next_sibling = 0;
1302 	}
1303 
append_attribute(xml_attribute_struct * attr,xml_node_struct * node)1304 	inline void append_attribute(xml_attribute_struct* attr, xml_node_struct* node)
1305 	{
1306 		xml_attribute_struct* head = node->first_attribute;
1307 
1308 		if (head)
1309 		{
1310 			xml_attribute_struct* tail = head->prev_attribute_c;
1311 
1312 			tail->next_attribute = attr;
1313 			attr->prev_attribute_c = tail;
1314 			head->prev_attribute_c = attr;
1315 		}
1316 		else
1317 		{
1318 			node->first_attribute = attr;
1319 			attr->prev_attribute_c = attr;
1320 		}
1321 	}
1322 
prepend_attribute(xml_attribute_struct * attr,xml_node_struct * node)1323 	inline void prepend_attribute(xml_attribute_struct* attr, xml_node_struct* node)
1324 	{
1325 		xml_attribute_struct* head = node->first_attribute;
1326 
1327 		if (head)
1328 		{
1329 			attr->prev_attribute_c = head->prev_attribute_c;
1330 			head->prev_attribute_c = attr;
1331 		}
1332 		else
1333 			attr->prev_attribute_c = attr;
1334 
1335 		attr->next_attribute = head;
1336 		node->first_attribute = attr;
1337 	}
1338 
insert_attribute_after(xml_attribute_struct * attr,xml_attribute_struct * place,xml_node_struct * node)1339 	inline void insert_attribute_after(xml_attribute_struct* attr, xml_attribute_struct* place, xml_node_struct* node)
1340 	{
1341 		if (place->next_attribute)
1342 			place->next_attribute->prev_attribute_c = attr;
1343 		else
1344 			node->first_attribute->prev_attribute_c = attr;
1345 
1346 		attr->next_attribute = place->next_attribute;
1347 		attr->prev_attribute_c = place;
1348 		place->next_attribute = attr;
1349 	}
1350 
insert_attribute_before(xml_attribute_struct * attr,xml_attribute_struct * place,xml_node_struct * node)1351 	inline void insert_attribute_before(xml_attribute_struct* attr, xml_attribute_struct* place, xml_node_struct* node)
1352 	{
1353 		if (place->prev_attribute_c->next_attribute)
1354 			place->prev_attribute_c->next_attribute = attr;
1355 		else
1356 			node->first_attribute = attr;
1357 
1358 		attr->prev_attribute_c = place->prev_attribute_c;
1359 		attr->next_attribute = place;
1360 		place->prev_attribute_c = attr;
1361 	}
1362 
remove_attribute(xml_attribute_struct * attr,xml_node_struct * node)1363 	inline void remove_attribute(xml_attribute_struct* attr, xml_node_struct* node)
1364 	{
1365 		if (attr->next_attribute)
1366 			attr->next_attribute->prev_attribute_c = attr->prev_attribute_c;
1367 		else
1368 			node->first_attribute->prev_attribute_c = attr->prev_attribute_c;
1369 
1370 		if (attr->prev_attribute_c->next_attribute)
1371 			attr->prev_attribute_c->next_attribute = attr->next_attribute;
1372 		else
1373 			node->first_attribute = attr->next_attribute;
1374 
1375 		attr->prev_attribute_c = 0;
1376 		attr->next_attribute = 0;
1377 	}
1378 
append_new_node(xml_node_struct * node,xml_allocator & alloc,xml_node_type type=node_element)1379 	PUGI__FN_NO_INLINE xml_node_struct* append_new_node(xml_node_struct* node, xml_allocator& alloc, xml_node_type type = node_element)
1380 	{
1381 		if (!alloc.reserve()) return 0;
1382 
1383 		xml_node_struct* child = allocate_node(alloc, type);
1384 		if (!child) return 0;
1385 
1386 		append_node(child, node);
1387 
1388 		return child;
1389 	}
1390 
append_new_attribute(xml_node_struct * node,xml_allocator & alloc)1391 	PUGI__FN_NO_INLINE xml_attribute_struct* append_new_attribute(xml_node_struct* node, xml_allocator& alloc)
1392 	{
1393 		if (!alloc.reserve()) return 0;
1394 
1395 		xml_attribute_struct* attr = allocate_attribute(alloc);
1396 		if (!attr) return 0;
1397 
1398 		append_attribute(attr, node);
1399 
1400 		return attr;
1401 	}
1402 PUGI__NS_END
1403 
1404 // Helper classes for code generation
1405 PUGI__NS_BEGIN
1406 	struct opt_false
1407 	{
1408 		enum { value = 0 };
1409 	};
1410 
1411 	struct opt_true
1412 	{
1413 		enum { value = 1 };
1414 	};
1415 PUGI__NS_END
1416 
1417 // Unicode utilities
1418 PUGI__NS_BEGIN
endian_swap(uint16_t value)1419 	inline uint16_t endian_swap(uint16_t value)
1420 	{
1421 		return static_cast<uint16_t>(((value & 0xff) << 8) | (value >> 8));
1422 	}
1423 
endian_swap(uint32_t value)1424 	inline uint32_t endian_swap(uint32_t value)
1425 	{
1426 		return ((value & 0xff) << 24) | ((value & 0xff00) << 8) | ((value & 0xff0000) >> 8) | (value >> 24);
1427 	}
1428 
1429 	struct utf8_counter
1430 	{
1431 		typedef size_t value_type;
1432 
lowutf8_counter1433 		static value_type low(value_type result, uint32_t ch)
1434 		{
1435 			// U+0000..U+007F
1436 			if (ch < 0x80) return result + 1;
1437 			// U+0080..U+07FF
1438 			else if (ch < 0x800) return result + 2;
1439 			// U+0800..U+FFFF
1440 			else return result + 3;
1441 		}
1442 
highutf8_counter1443 		static value_type high(value_type result, uint32_t)
1444 		{
1445 			// U+10000..U+10FFFF
1446 			return result + 4;
1447 		}
1448 	};
1449 
1450 	struct utf8_writer
1451 	{
1452 		typedef uint8_t* value_type;
1453 
lowutf8_writer1454 		static value_type low(value_type result, uint32_t ch)
1455 		{
1456 			// U+0000..U+007F
1457 			if (ch < 0x80)
1458 			{
1459 				*result = static_cast<uint8_t>(ch);
1460 				return result + 1;
1461 			}
1462 			// U+0080..U+07FF
1463 			else if (ch < 0x800)
1464 			{
1465 				result[0] = static_cast<uint8_t>(0xC0 | (ch >> 6));
1466 				result[1] = static_cast<uint8_t>(0x80 | (ch & 0x3F));
1467 				return result + 2;
1468 			}
1469 			// U+0800..U+FFFF
1470 			else
1471 			{
1472 				result[0] = static_cast<uint8_t>(0xE0 | (ch >> 12));
1473 				result[1] = static_cast<uint8_t>(0x80 | ((ch >> 6) & 0x3F));
1474 				result[2] = static_cast<uint8_t>(0x80 | (ch & 0x3F));
1475 				return result + 3;
1476 			}
1477 		}
1478 
highutf8_writer1479 		static value_type high(value_type result, uint32_t ch)
1480 		{
1481 			// U+10000..U+10FFFF
1482 			result[0] = static_cast<uint8_t>(0xF0 | (ch >> 18));
1483 			result[1] = static_cast<uint8_t>(0x80 | ((ch >> 12) & 0x3F));
1484 			result[2] = static_cast<uint8_t>(0x80 | ((ch >> 6) & 0x3F));
1485 			result[3] = static_cast<uint8_t>(0x80 | (ch & 0x3F));
1486 			return result + 4;
1487 		}
1488 
anyutf8_writer1489 		static value_type any(value_type result, uint32_t ch)
1490 		{
1491 			return (ch < 0x10000) ? low(result, ch) : high(result, ch);
1492 		}
1493 	};
1494 
1495 	struct utf16_counter
1496 	{
1497 		typedef size_t value_type;
1498 
lowutf16_counter1499 		static value_type low(value_type result, uint32_t)
1500 		{
1501 			return result + 1;
1502 		}
1503 
highutf16_counter1504 		static value_type high(value_type result, uint32_t)
1505 		{
1506 			return result + 2;
1507 		}
1508 	};
1509 
1510 	struct utf16_writer
1511 	{
1512 		typedef uint16_t* value_type;
1513 
lowutf16_writer1514 		static value_type low(value_type result, uint32_t ch)
1515 		{
1516 			*result = static_cast<uint16_t>(ch);
1517 
1518 			return result + 1;
1519 		}
1520 
highutf16_writer1521 		static value_type high(value_type result, uint32_t ch)
1522 		{
1523 			uint32_t msh = static_cast<uint32_t>(ch - 0x10000) >> 10;
1524 			uint32_t lsh = static_cast<uint32_t>(ch - 0x10000) & 0x3ff;
1525 
1526 			result[0] = static_cast<uint16_t>(0xD800 + msh);
1527 			result[1] = static_cast<uint16_t>(0xDC00 + lsh);
1528 
1529 			return result + 2;
1530 		}
1531 
anyutf16_writer1532 		static value_type any(value_type result, uint32_t ch)
1533 		{
1534 			return (ch < 0x10000) ? low(result, ch) : high(result, ch);
1535 		}
1536 	};
1537 
1538 	struct utf32_counter
1539 	{
1540 		typedef size_t value_type;
1541 
lowutf32_counter1542 		static value_type low(value_type result, uint32_t)
1543 		{
1544 			return result + 1;
1545 		}
1546 
highutf32_counter1547 		static value_type high(value_type result, uint32_t)
1548 		{
1549 			return result + 1;
1550 		}
1551 	};
1552 
1553 	struct utf32_writer
1554 	{
1555 		typedef uint32_t* value_type;
1556 
lowutf32_writer1557 		static value_type low(value_type result, uint32_t ch)
1558 		{
1559 			*result = ch;
1560 
1561 			return result + 1;
1562 		}
1563 
highutf32_writer1564 		static value_type high(value_type result, uint32_t ch)
1565 		{
1566 			*result = ch;
1567 
1568 			return result + 1;
1569 		}
1570 
anyutf32_writer1571 		static value_type any(value_type result, uint32_t ch)
1572 		{
1573 			*result = ch;
1574 
1575 			return result + 1;
1576 		}
1577 	};
1578 
1579 	struct latin1_writer
1580 	{
1581 		typedef uint8_t* value_type;
1582 
lowlatin1_writer1583 		static value_type low(value_type result, uint32_t ch)
1584 		{
1585 			*result = static_cast<uint8_t>(ch > 255 ? '?' : ch);
1586 
1587 			return result + 1;
1588 		}
1589 
highlatin1_writer1590 		static value_type high(value_type result, uint32_t ch)
1591 		{
1592 			(void)ch;
1593 
1594 			*result = '?';
1595 
1596 			return result + 1;
1597 		}
1598 	};
1599 
1600 	struct utf8_decoder
1601 	{
1602 		typedef uint8_t type;
1603 
processutf8_decoder1604 		template <typename Traits> static inline typename Traits::value_type process(const uint8_t* data, size_t size, typename Traits::value_type result, Traits)
1605 		{
1606 			const uint8_t utf8_byte_mask = 0x3f;
1607 
1608 			while (size)
1609 			{
1610 				uint8_t lead = *data;
1611 
1612 				// 0xxxxxxx -> U+0000..U+007F
1613 				if (lead < 0x80)
1614 				{
1615 					result = Traits::low(result, lead);
1616 					data += 1;
1617 					size -= 1;
1618 
1619 					// process aligned single-byte (ascii) blocks
1620 					if ((reinterpret_cast<uintptr_t>(data) & 3) == 0)
1621 					{
1622 						// round-trip through void* to silence 'cast increases required alignment of target type' warnings
1623 						while (size >= 4 && (*static_cast<const uint32_t*>(static_cast<const void*>(data)) & 0x80808080) == 0)
1624 						{
1625 							result = Traits::low(result, data[0]);
1626 							result = Traits::low(result, data[1]);
1627 							result = Traits::low(result, data[2]);
1628 							result = Traits::low(result, data[3]);
1629 							data += 4;
1630 							size -= 4;
1631 						}
1632 					}
1633 				}
1634 				// 110xxxxx -> U+0080..U+07FF
1635 				else if (static_cast<unsigned int>(lead - 0xC0) < 0x20 && size >= 2 && (data[1] & 0xc0) == 0x80)
1636 				{
1637 					result = Traits::low(result, ((lead & ~0xC0) << 6) | (data[1] & utf8_byte_mask));
1638 					data += 2;
1639 					size -= 2;
1640 				}
1641 				// 1110xxxx -> U+0800-U+FFFF
1642 				else if (static_cast<unsigned int>(lead - 0xE0) < 0x10 && size >= 3 && (data[1] & 0xc0) == 0x80 && (data[2] & 0xc0) == 0x80)
1643 				{
1644 					result = Traits::low(result, ((lead & ~0xE0) << 12) | ((data[1] & utf8_byte_mask) << 6) | (data[2] & utf8_byte_mask));
1645 					data += 3;
1646 					size -= 3;
1647 				}
1648 				// 11110xxx -> U+10000..U+10FFFF
1649 				else if (static_cast<unsigned int>(lead - 0xF0) < 0x08 && size >= 4 && (data[1] & 0xc0) == 0x80 && (data[2] & 0xc0) == 0x80 && (data[3] & 0xc0) == 0x80)
1650 				{
1651 					result = Traits::high(result, ((lead & ~0xF0) << 18) | ((data[1] & utf8_byte_mask) << 12) | ((data[2] & utf8_byte_mask) << 6) | (data[3] & utf8_byte_mask));
1652 					data += 4;
1653 					size -= 4;
1654 				}
1655 				// 10xxxxxx or 11111xxx -> invalid
1656 				else
1657 				{
1658 					data += 1;
1659 					size -= 1;
1660 				}
1661 			}
1662 
1663 			return result;
1664 		}
1665 	};
1666 
1667 	template <typename opt_swap> struct utf16_decoder
1668 	{
1669 		typedef uint16_t type;
1670 
processutf16_decoder1671 		template <typename Traits> static inline typename Traits::value_type process(const uint16_t* data, size_t size, typename Traits::value_type result, Traits)
1672 		{
1673 			while (size)
1674 			{
1675 				uint16_t lead = opt_swap::value ? endian_swap(*data) : *data;
1676 
1677 				// U+0000..U+D7FF
1678 				if (lead < 0xD800)
1679 				{
1680 					result = Traits::low(result, lead);
1681 					data += 1;
1682 					size -= 1;
1683 				}
1684 				// U+E000..U+FFFF
1685 				else if (static_cast<unsigned int>(lead - 0xE000) < 0x2000)
1686 				{
1687 					result = Traits::low(result, lead);
1688 					data += 1;
1689 					size -= 1;
1690 				}
1691 				// surrogate pair lead
1692 				else if (static_cast<unsigned int>(lead - 0xD800) < 0x400 && size >= 2)
1693 				{
1694 					uint16_t next = opt_swap::value ? endian_swap(data[1]) : data[1];
1695 
1696 					if (static_cast<unsigned int>(next - 0xDC00) < 0x400)
1697 					{
1698 						result = Traits::high(result, 0x10000 + ((lead & 0x3ff) << 10) + (next & 0x3ff));
1699 						data += 2;
1700 						size -= 2;
1701 					}
1702 					else
1703 					{
1704 						data += 1;
1705 						size -= 1;
1706 					}
1707 				}
1708 				else
1709 				{
1710 					data += 1;
1711 					size -= 1;
1712 				}
1713 			}
1714 
1715 			return result;
1716 		}
1717 	};
1718 
1719 	template <typename opt_swap> struct utf32_decoder
1720 	{
1721 		typedef uint32_t type;
1722 
processutf32_decoder1723 		template <typename Traits> static inline typename Traits::value_type process(const uint32_t* data, size_t size, typename Traits::value_type result, Traits)
1724 		{
1725 			while (size)
1726 			{
1727 				uint32_t lead = opt_swap::value ? endian_swap(*data) : *data;
1728 
1729 				// U+0000..U+FFFF
1730 				if (lead < 0x10000)
1731 				{
1732 					result = Traits::low(result, lead);
1733 					data += 1;
1734 					size -= 1;
1735 				}
1736 				// U+10000..U+10FFFF
1737 				else
1738 				{
1739 					result = Traits::high(result, lead);
1740 					data += 1;
1741 					size -= 1;
1742 				}
1743 			}
1744 
1745 			return result;
1746 		}
1747 	};
1748 
1749 	struct latin1_decoder
1750 	{
1751 		typedef uint8_t type;
1752 
processlatin1_decoder1753 		template <typename Traits> static inline typename Traits::value_type process(const uint8_t* data, size_t size, typename Traits::value_type result, Traits)
1754 		{
1755 			while (size)
1756 			{
1757 				result = Traits::low(result, *data);
1758 				data += 1;
1759 				size -= 1;
1760 			}
1761 
1762 			return result;
1763 		}
1764 	};
1765 
1766 	template <size_t size> struct wchar_selector;
1767 
1768 	template <> struct wchar_selector<2>
1769 	{
1770 		typedef uint16_t type;
1771 		typedef utf16_counter counter;
1772 		typedef utf16_writer writer;
1773 		typedef utf16_decoder<opt_false> decoder;
1774 	};
1775 
1776 	template <> struct wchar_selector<4>
1777 	{
1778 		typedef uint32_t type;
1779 		typedef utf32_counter counter;
1780 		typedef utf32_writer writer;
1781 		typedef utf32_decoder<opt_false> decoder;
1782 	};
1783 
1784 	typedef wchar_selector<sizeof(wchar_t)>::counter wchar_counter;
1785 	typedef wchar_selector<sizeof(wchar_t)>::writer wchar_writer;
1786 
1787 	struct wchar_decoder
1788 	{
1789 		typedef wchar_t type;
1790 
processwchar_decoder1791 		template <typename Traits> static inline typename Traits::value_type process(const wchar_t* data, size_t size, typename Traits::value_type result, Traits traits)
1792 		{
1793 			typedef wchar_selector<sizeof(wchar_t)>::decoder decoder;
1794 
1795 			return decoder::process(reinterpret_cast<const typename decoder::type*>(data), size, result, traits);
1796 		}
1797 	};
1798 
1799 #ifdef PUGIXML_WCHAR_MODE
convert_wchar_endian_swap(wchar_t * result,const wchar_t * data,size_t length)1800 	PUGI__FN void convert_wchar_endian_swap(wchar_t* result, const wchar_t* data, size_t length)
1801 	{
1802 		for (size_t i = 0; i < length; ++i)
1803 			result[i] = static_cast<wchar_t>(endian_swap(static_cast<wchar_selector<sizeof(wchar_t)>::type>(data[i])));
1804 	}
1805 #endif
1806 PUGI__NS_END
1807 
1808 PUGI__NS_BEGIN
1809 	enum chartype_t
1810 	{
1811 		ct_parse_pcdata = 1,	// \0, &, \r, <
1812 		ct_parse_attr = 2,		// \0, &, \r, ', "
1813 		ct_parse_attr_ws = 4,	// \0, &, \r, ', ", \n, tab
1814 		ct_space = 8,			// \r, \n, space, tab
1815 		ct_parse_cdata = 16,	// \0, ], >, \r
1816 		ct_parse_comment = 32,	// \0, -, >, \r
1817 		ct_symbol = 64,			// Any symbol > 127, a-z, A-Z, 0-9, _, :, -, .
1818 		ct_start_symbol = 128	// Any symbol > 127, a-z, A-Z, _, :
1819 	};
1820 
1821 	static const unsigned char chartype_table[256] =
1822 	{
1823 		55,  0,   0,   0,   0,   0,   0,   0,      0,   12,  12,  0,   0,   63,  0,   0,   // 0-15
1824 		0,   0,   0,   0,   0,   0,   0,   0,      0,   0,   0,   0,   0,   0,   0,   0,   // 16-31
1825 		8,   0,   6,   0,   0,   0,   7,   6,      0,   0,   0,   0,   0,   96,  64,  0,   // 32-47
1826 		64,  64,  64,  64,  64,  64,  64,  64,     64,  64,  192, 0,   1,   0,   48,  0,   // 48-63
1827 		0,   192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 192, 192, 192, 192, 192, // 64-79
1828 		192, 192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 0,   0,   16,  0,   192, // 80-95
1829 		0,   192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 192, 192, 192, 192, 192, // 96-111
1830 		192, 192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 0, 0, 0, 0, 0,           // 112-127
1831 
1832 		192, 192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 192, 192, 192, 192, 192, // 128+
1833 		192, 192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 192, 192, 192, 192, 192,
1834 		192, 192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 192, 192, 192, 192, 192,
1835 		192, 192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 192, 192, 192, 192, 192,
1836 		192, 192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 192, 192, 192, 192, 192,
1837 		192, 192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 192, 192, 192, 192, 192,
1838 		192, 192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 192, 192, 192, 192, 192,
1839 		192, 192, 192, 192, 192, 192, 192, 192,    192, 192, 192, 192, 192, 192, 192, 192
1840 	};
1841 
1842 	enum chartypex_t
1843 	{
1844 		ctx_special_pcdata = 1,   // Any symbol >= 0 and < 32 (except \t, \r, \n), &, <, >
1845 		ctx_special_attr = 2,     // Any symbol >= 0 and < 32 (except \t), &, <, >, "
1846 		ctx_start_symbol = 4,	  // Any symbol > 127, a-z, A-Z, _
1847 		ctx_digit = 8,			  // 0-9
1848 		ctx_symbol = 16			  // Any symbol > 127, a-z, A-Z, 0-9, _, -, .
1849 	};
1850 
1851 	static const unsigned char chartypex_table[256] =
1852 	{
1853 		3,  3,  3,  3,  3,  3,  3,  3,     3,  0,  2,  3,  3,  2,  3,  3,     // 0-15
1854 		3,  3,  3,  3,  3,  3,  3,  3,     3,  3,  3,  3,  3,  3,  3,  3,     // 16-31
1855 		0,  0,  2,  0,  0,  0,  3,  0,     0,  0,  0,  0,  0, 16, 16,  0,     // 32-47
1856 		24, 24, 24, 24, 24, 24, 24, 24,    24, 24, 0,  0,  3,  0,  3,  0,     // 48-63
1857 
1858 		0,  20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 20, 20, 20, 20, 20,    // 64-79
1859 		20, 20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 0,  0,  0,  0,  20,    // 80-95
1860 		0,  20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 20, 20, 20, 20, 20,    // 96-111
1861 		20, 20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 0,  0,  0,  0,  0,     // 112-127
1862 
1863 		20, 20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 20, 20, 20, 20, 20,    // 128+
1864 		20, 20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 20, 20, 20, 20, 20,
1865 		20, 20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 20, 20, 20, 20, 20,
1866 		20, 20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 20, 20, 20, 20, 20,
1867 		20, 20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 20, 20, 20, 20, 20,
1868 		20, 20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 20, 20, 20, 20, 20,
1869 		20, 20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 20, 20, 20, 20, 20,
1870 		20, 20, 20, 20, 20, 20, 20, 20,    20, 20, 20, 20, 20, 20, 20, 20
1871 	};
1872 
1873 #ifdef PUGIXML_WCHAR_MODE
1874 	#define PUGI__IS_CHARTYPE_IMPL(c, ct, table) ((static_cast<unsigned int>(c) < 128 ? table[static_cast<unsigned int>(c)] : table[128]) & (ct))
1875 #else
1876 	#define PUGI__IS_CHARTYPE_IMPL(c, ct, table) (table[static_cast<unsigned char>(c)] & (ct))
1877 #endif
1878 
1879 	#define PUGI__IS_CHARTYPE(c, ct) PUGI__IS_CHARTYPE_IMPL(c, ct, chartype_table)
1880 	#define PUGI__IS_CHARTYPEX(c, ct) PUGI__IS_CHARTYPE_IMPL(c, ct, chartypex_table)
1881 
is_little_endian()1882 	PUGI__FN bool is_little_endian()
1883 	{
1884 		unsigned int ui = 1;
1885 
1886 		return *reinterpret_cast<unsigned char*>(&ui) == 1;
1887 	}
1888 
get_wchar_encoding()1889 	PUGI__FN xml_encoding get_wchar_encoding()
1890 	{
1891 		PUGI__STATIC_ASSERT(sizeof(wchar_t) == 2 || sizeof(wchar_t) == 4);
1892 
1893 		if (sizeof(wchar_t) == 2)
1894 			return is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
1895 		else
1896 			return is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
1897 	}
1898 
guess_buffer_encoding(uint8_t d0,uint8_t d1,uint8_t d2,uint8_t d3)1899 	PUGI__FN xml_encoding guess_buffer_encoding(uint8_t d0, uint8_t d1, uint8_t d2, uint8_t d3)
1900 	{
1901 		// look for BOM in first few bytes
1902 		if (d0 == 0 && d1 == 0 && d2 == 0xfe && d3 == 0xff) return encoding_utf32_be;
1903 		if (d0 == 0xff && d1 == 0xfe && d2 == 0 && d3 == 0) return encoding_utf32_le;
1904 		if (d0 == 0xfe && d1 == 0xff) return encoding_utf16_be;
1905 		if (d0 == 0xff && d1 == 0xfe) return encoding_utf16_le;
1906 		if (d0 == 0xef && d1 == 0xbb && d2 == 0xbf) return encoding_utf8;
1907 
1908 		// look for <, <? or <?xm in various encodings
1909 		if (d0 == 0 && d1 == 0 && d2 == 0 && d3 == 0x3c) return encoding_utf32_be;
1910 		if (d0 == 0x3c && d1 == 0 && d2 == 0 && d3 == 0) return encoding_utf32_le;
1911 		if (d0 == 0 && d1 == 0x3c && d2 == 0 && d3 == 0x3f) return encoding_utf16_be;
1912 		if (d0 == 0x3c && d1 == 0 && d2 == 0x3f && d3 == 0) return encoding_utf16_le;
1913 		if (d0 == 0x3c && d1 == 0x3f && d2 == 0x78 && d3 == 0x6d) return encoding_utf8;
1914 
1915 		// look for utf16 < followed by node name (this may fail, but is better than utf8 since it's zero terminated so early)
1916 		if (d0 == 0 && d1 == 0x3c) return encoding_utf16_be;
1917 		if (d0 == 0x3c && d1 == 0) return encoding_utf16_le;
1918 
1919 		// no known BOM detected, assume utf8
1920 		return encoding_utf8;
1921 	}
1922 
get_buffer_encoding(xml_encoding encoding,const void * contents,size_t size)1923 	PUGI__FN xml_encoding get_buffer_encoding(xml_encoding encoding, const void* contents, size_t size)
1924 	{
1925 		// replace wchar encoding with utf implementation
1926 		if (encoding == encoding_wchar) return get_wchar_encoding();
1927 
1928 		// replace utf16 encoding with utf16 with specific endianness
1929 		if (encoding == encoding_utf16) return is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
1930 
1931 		// replace utf32 encoding with utf32 with specific endianness
1932 		if (encoding == encoding_utf32) return is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
1933 
1934 		// only do autodetection if no explicit encoding is requested
1935 		if (encoding != encoding_auto) return encoding;
1936 
1937 		// skip encoding autodetection if input buffer is too small
1938 		if (size < 4) return encoding_utf8;
1939 
1940 		// try to guess encoding (based on XML specification, Appendix F.1)
1941 		const uint8_t* data = static_cast<const uint8_t*>(contents);
1942 
1943 		PUGI__DMC_VOLATILE uint8_t d0 = data[0], d1 = data[1], d2 = data[2], d3 = data[3];
1944 
1945 		return guess_buffer_encoding(d0, d1, d2, d3);
1946 	}
1947 
get_mutable_buffer(char_t * & out_buffer,size_t & out_length,const void * contents,size_t size,bool is_mutable)1948 	PUGI__FN bool get_mutable_buffer(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable)
1949 	{
1950 		size_t length = size / sizeof(char_t);
1951 
1952 		if (is_mutable)
1953 		{
1954 			out_buffer = static_cast<char_t*>(const_cast<void*>(contents));
1955 			out_length = length;
1956 		}
1957 		else
1958 		{
1959 			char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
1960 			if (!buffer) return false;
1961 
1962 			if (contents)
1963 				memcpy(buffer, contents, length * sizeof(char_t));
1964 			else
1965 				assert(length == 0);
1966 
1967 			buffer[length] = 0;
1968 
1969 			out_buffer = buffer;
1970 			out_length = length + 1;
1971 		}
1972 
1973 		return true;
1974 	}
1975 
1976 #ifdef PUGIXML_WCHAR_MODE
need_endian_swap_utf(xml_encoding le,xml_encoding re)1977 	PUGI__FN bool need_endian_swap_utf(xml_encoding le, xml_encoding re)
1978 	{
1979 		return (le == encoding_utf16_be && re == encoding_utf16_le) || (le == encoding_utf16_le && re == encoding_utf16_be) ||
1980 			   (le == encoding_utf32_be && re == encoding_utf32_le) || (le == encoding_utf32_le && re == encoding_utf32_be);
1981 	}
1982 
convert_buffer_endian_swap(char_t * & out_buffer,size_t & out_length,const void * contents,size_t size,bool is_mutable)1983 	PUGI__FN bool convert_buffer_endian_swap(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable)
1984 	{
1985 		const char_t* data = static_cast<const char_t*>(contents);
1986 		size_t length = size / sizeof(char_t);
1987 
1988 		if (is_mutable)
1989 		{
1990 			char_t* buffer = const_cast<char_t*>(data);
1991 
1992 			convert_wchar_endian_swap(buffer, data, length);
1993 
1994 			out_buffer = buffer;
1995 			out_length = length;
1996 		}
1997 		else
1998 		{
1999 			char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
2000 			if (!buffer) return false;
2001 
2002 			convert_wchar_endian_swap(buffer, data, length);
2003 			buffer[length] = 0;
2004 
2005 			out_buffer = buffer;
2006 			out_length = length + 1;
2007 		}
2008 
2009 		return true;
2010 	}
2011 
convert_buffer_generic(char_t * & out_buffer,size_t & out_length,const void * contents,size_t size,D)2012 	template <typename D> PUGI__FN bool convert_buffer_generic(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, D)
2013 	{
2014 		const typename D::type* data = static_cast<const typename D::type*>(contents);
2015 		size_t data_length = size / sizeof(typename D::type);
2016 
2017 		// first pass: get length in wchar_t units
2018 		size_t length = D::process(data, data_length, 0, wchar_counter());
2019 
2020 		// allocate buffer of suitable length
2021 		char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
2022 		if (!buffer) return false;
2023 
2024 		// second pass: convert utf16 input to wchar_t
2025 		wchar_writer::value_type obegin = reinterpret_cast<wchar_writer::value_type>(buffer);
2026 		wchar_writer::value_type oend = D::process(data, data_length, obegin, wchar_writer());
2027 
2028 		assert(oend == obegin + length);
2029 		*oend = 0;
2030 
2031 		out_buffer = buffer;
2032 		out_length = length + 1;
2033 
2034 		return true;
2035 	}
2036 
convert_buffer(char_t * & out_buffer,size_t & out_length,xml_encoding encoding,const void * contents,size_t size,bool is_mutable)2037 	PUGI__FN bool convert_buffer(char_t*& out_buffer, size_t& out_length, xml_encoding encoding, const void* contents, size_t size, bool is_mutable)
2038 	{
2039 		// get native encoding
2040 		xml_encoding wchar_encoding = get_wchar_encoding();
2041 
2042 		// fast path: no conversion required
2043 		if (encoding == wchar_encoding)
2044 			return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable);
2045 
2046 		// only endian-swapping is required
2047 		if (need_endian_swap_utf(encoding, wchar_encoding))
2048 			return convert_buffer_endian_swap(out_buffer, out_length, contents, size, is_mutable);
2049 
2050 		// source encoding is utf8
2051 		if (encoding == encoding_utf8)
2052 			return convert_buffer_generic(out_buffer, out_length, contents, size, utf8_decoder());
2053 
2054 		// source encoding is utf16
2055 		if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
2056 		{
2057 			xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
2058 
2059 			return (native_encoding == encoding) ?
2060 				convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder<opt_false>()) :
2061 				convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder<opt_true>());
2062 		}
2063 
2064 		// source encoding is utf32
2065 		if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)
2066 		{
2067 			xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
2068 
2069 			return (native_encoding == encoding) ?
2070 				convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder<opt_false>()) :
2071 				convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder<opt_true>());
2072 		}
2073 
2074 		// source encoding is latin1
2075 		if (encoding == encoding_latin1)
2076 			return convert_buffer_generic(out_buffer, out_length, contents, size, latin1_decoder());
2077 
2078 		assert(!"Invalid encoding");
2079 		return false;
2080 	}
2081 #else
convert_buffer_generic(char_t * & out_buffer,size_t & out_length,const void * contents,size_t size,D)2082 	template <typename D> PUGI__FN bool convert_buffer_generic(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, D)
2083 	{
2084 		const typename D::type* data = static_cast<const typename D::type*>(contents);
2085 		size_t data_length = size / sizeof(typename D::type);
2086 
2087 		// first pass: get length in utf8 units
2088 		size_t length = D::process(data, data_length, 0, utf8_counter());
2089 
2090 		// allocate buffer of suitable length
2091 		char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
2092 		if (!buffer) return false;
2093 
2094 		// second pass: convert utf16 input to utf8
2095 		uint8_t* obegin = reinterpret_cast<uint8_t*>(buffer);
2096 		uint8_t* oend = D::process(data, data_length, obegin, utf8_writer());
2097 
2098 		assert(oend == obegin + length);
2099 		*oend = 0;
2100 
2101 		out_buffer = buffer;
2102 		out_length = length + 1;
2103 
2104 		return true;
2105 	}
2106 
get_latin1_7bit_prefix_length(const uint8_t * data,size_t size)2107 	PUGI__FN size_t get_latin1_7bit_prefix_length(const uint8_t* data, size_t size)
2108 	{
2109 		for (size_t i = 0; i < size; ++i)
2110 			if (data[i] > 127)
2111 				return i;
2112 
2113 		return size;
2114 	}
2115 
convert_buffer_latin1(char_t * & out_buffer,size_t & out_length,const void * contents,size_t size,bool is_mutable)2116 	PUGI__FN bool convert_buffer_latin1(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable)
2117 	{
2118 		const uint8_t* data = static_cast<const uint8_t*>(contents);
2119 		size_t data_length = size;
2120 
2121 		// get size of prefix that does not need utf8 conversion
2122 		size_t prefix_length = get_latin1_7bit_prefix_length(data, data_length);
2123 		assert(prefix_length <= data_length);
2124 
2125 		const uint8_t* postfix = data + prefix_length;
2126 		size_t postfix_length = data_length - prefix_length;
2127 
2128 		// if no conversion is needed, just return the original buffer
2129 		if (postfix_length == 0) return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable);
2130 
2131 		// first pass: get length in utf8 units
2132 		size_t length = prefix_length + latin1_decoder::process(postfix, postfix_length, 0, utf8_counter());
2133 
2134 		// allocate buffer of suitable length
2135 		char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
2136 		if (!buffer) return false;
2137 
2138 		// second pass: convert latin1 input to utf8
2139 		memcpy(buffer, data, prefix_length);
2140 
2141 		uint8_t* obegin = reinterpret_cast<uint8_t*>(buffer);
2142 		uint8_t* oend = latin1_decoder::process(postfix, postfix_length, obegin + prefix_length, utf8_writer());
2143 
2144 		assert(oend == obegin + length);
2145 		*oend = 0;
2146 
2147 		out_buffer = buffer;
2148 		out_length = length + 1;
2149 
2150 		return true;
2151 	}
2152 
convert_buffer(char_t * & out_buffer,size_t & out_length,xml_encoding encoding,const void * contents,size_t size,bool is_mutable)2153 	PUGI__FN bool convert_buffer(char_t*& out_buffer, size_t& out_length, xml_encoding encoding, const void* contents, size_t size, bool is_mutable)
2154 	{
2155 		// fast path: no conversion required
2156 		if (encoding == encoding_utf8)
2157 			return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable);
2158 
2159 		// source encoding is utf16
2160 		if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
2161 		{
2162 			xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
2163 
2164 			return (native_encoding == encoding) ?
2165 				convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder<opt_false>()) :
2166 				convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder<opt_true>());
2167 		}
2168 
2169 		// source encoding is utf32
2170 		if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)
2171 		{
2172 			xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
2173 
2174 			return (native_encoding == encoding) ?
2175 				convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder<opt_false>()) :
2176 				convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder<opt_true>());
2177 		}
2178 
2179 		// source encoding is latin1
2180 		if (encoding == encoding_latin1)
2181 			return convert_buffer_latin1(out_buffer, out_length, contents, size, is_mutable);
2182 
2183 		assert(!"Invalid encoding");
2184 		return false;
2185 	}
2186 #endif
2187 
as_utf8_begin(const wchar_t * str,size_t length)2188 	PUGI__FN size_t as_utf8_begin(const wchar_t* str, size_t length)
2189 	{
2190 		// get length in utf8 characters
2191 		return wchar_decoder::process(str, length, 0, utf8_counter());
2192 	}
2193 
as_utf8_end(char * buffer,size_t size,const wchar_t * str,size_t length)2194 	PUGI__FN void as_utf8_end(char* buffer, size_t size, const wchar_t* str, size_t length)
2195 	{
2196 		// convert to utf8
2197 		uint8_t* begin = reinterpret_cast<uint8_t*>(buffer);
2198 		uint8_t* end = wchar_decoder::process(str, length, begin, utf8_writer());
2199 
2200 		assert(begin + size == end);
2201 		(void)!end;
2202 		(void)!size;
2203 	}
2204 
2205 #ifndef PUGIXML_NO_STL
as_utf8_impl(const wchar_t * str,size_t length)2206 	PUGI__FN std::string as_utf8_impl(const wchar_t* str, size_t length)
2207 	{
2208 		// first pass: get length in utf8 characters
2209 		size_t size = as_utf8_begin(str, length);
2210 
2211 		// allocate resulting string
2212 		std::string result;
2213 		result.resize(size);
2214 
2215 		// second pass: convert to utf8
2216 		if (size > 0) as_utf8_end(&result[0], size, str, length);
2217 
2218 		return result;
2219 	}
2220 
as_wide_impl(const char * str,size_t size)2221 	PUGI__FN std::basic_string<wchar_t> as_wide_impl(const char* str, size_t size)
2222 	{
2223 		const uint8_t* data = reinterpret_cast<const uint8_t*>(str);
2224 
2225 		// first pass: get length in wchar_t units
2226 		size_t length = utf8_decoder::process(data, size, 0, wchar_counter());
2227 
2228 		// allocate resulting string
2229 		std::basic_string<wchar_t> result;
2230 		result.resize(length);
2231 
2232 		// second pass: convert to wchar_t
2233 		if (length > 0)
2234 		{
2235 			wchar_writer::value_type begin = reinterpret_cast<wchar_writer::value_type>(&result[0]);
2236 			wchar_writer::value_type end = utf8_decoder::process(data, size, begin, wchar_writer());
2237 
2238 			assert(begin + length == end);
2239 			(void)!end;
2240 		}
2241 
2242 		return result;
2243 	}
2244 #endif
2245 
2246 	template <typename Header>
strcpy_insitu_allow(size_t length,const Header & header,uintptr_t header_mask,char_t * target)2247 	inline bool strcpy_insitu_allow(size_t length, const Header& header, uintptr_t header_mask, char_t* target)
2248 	{
2249 		// never reuse shared memory
2250 		if (header & xml_memory_page_contents_shared_mask) return false;
2251 
2252 		size_t target_length = strlength(target);
2253 
2254 		// always reuse document buffer memory if possible
2255 		if ((header & header_mask) == 0) return target_length >= length;
2256 
2257 		// reuse heap memory if waste is not too great
2258 		const size_t reuse_threshold = 32;
2259 
2260 		return target_length >= length && (target_length < reuse_threshold || target_length - length < target_length / 2);
2261 	}
2262 
2263 	template <typename String, typename Header>
strcpy_insitu(String & dest,Header & header,uintptr_t header_mask,const char_t * source,size_t source_length)2264 	PUGI__FN bool strcpy_insitu(String& dest, Header& header, uintptr_t header_mask, const char_t* source, size_t source_length)
2265 	{
2266 		if (source_length == 0)
2267 		{
2268 			// empty string and null pointer are equivalent, so just deallocate old memory
2269 			xml_allocator* alloc = PUGI__GETPAGE_IMPL(header)->allocator;
2270 
2271 			if (header & header_mask) alloc->deallocate_string(dest);
2272 
2273 			// mark the string as not allocated
2274 			dest = 0;
2275 			header &= ~header_mask;
2276 
2277 			return true;
2278 		}
2279 		else if (dest && strcpy_insitu_allow(source_length, header, header_mask, dest))
2280 		{
2281 			// we can reuse old buffer, so just copy the new data (including zero terminator)
2282 			memcpy(dest, source, source_length * sizeof(char_t));
2283 			dest[source_length] = 0;
2284 
2285 			return true;
2286 		}
2287 		else
2288 		{
2289 			xml_allocator* alloc = PUGI__GETPAGE_IMPL(header)->allocator;
2290 
2291 			if (!alloc->reserve()) return false;
2292 
2293 			// allocate new buffer
2294 			char_t* buf = alloc->allocate_string(source_length + 1);
2295 			if (!buf) return false;
2296 
2297 			// copy the string (including zero terminator)
2298 			memcpy(buf, source, source_length * sizeof(char_t));
2299 			buf[source_length] = 0;
2300 
2301 			// deallocate old buffer (*after* the above to protect against overlapping memory and/or allocation failures)
2302 			if (header & header_mask) alloc->deallocate_string(dest);
2303 
2304 			// the string is now allocated, so set the flag
2305 			dest = buf;
2306 			header |= header_mask;
2307 
2308 			return true;
2309 		}
2310 	}
2311 
2312 	struct gap
2313 	{
2314 		char_t* end;
2315 		size_t size;
2316 
gapgap2317 		gap(): end(0), size(0)
2318 		{
2319 		}
2320 
2321 		// Push new gap, move s count bytes further (skipping the gap).
2322 		// Collapse previous gap.
pushgap2323 		void push(char_t*& s, size_t count)
2324 		{
2325 			if (end) // there was a gap already; collapse it
2326 			{
2327 				// Move [old_gap_end, new_gap_start) to [old_gap_start, ...)
2328 				assert(s >= end);
2329 				memmove(end - size, end, reinterpret_cast<char*>(s) - reinterpret_cast<char*>(end));
2330 			}
2331 
2332 			s += count; // end of current gap
2333 
2334 			// "merge" two gaps
2335 			end = s;
2336 			size += count;
2337 		}
2338 
2339 		// Collapse all gaps, return past-the-end pointer
flushgap2340 		char_t* flush(char_t* s)
2341 		{
2342 			if (end)
2343 			{
2344 				// Move [old_gap_end, current_pos) to [old_gap_start, ...)
2345 				assert(s >= end);
2346 				memmove(end - size, end, reinterpret_cast<char*>(s) - reinterpret_cast<char*>(end));
2347 
2348 				return s - size;
2349 			}
2350 			else return s;
2351 		}
2352 	};
2353 
strconv_escape(char_t * s,gap & g)2354 	PUGI__FN char_t* strconv_escape(char_t* s, gap& g)
2355 	{
2356 		char_t* stre = s + 1;
2357 
2358 		switch (*stre)
2359 		{
2360 			case '#':	// &#...
2361 			{
2362 				unsigned int ucsc = 0;
2363 
2364 				if (stre[1] == 'x') // &#x... (hex code)
2365 				{
2366 					stre += 2;
2367 
2368 					char_t ch = *stre;
2369 
2370 					if (ch == ';') return stre;
2371 
2372 					for (;;)
2373 					{
2374 						if (static_cast<unsigned int>(ch - '0') <= 9)
2375 							ucsc = 16 * ucsc + (ch - '0');
2376 						else if (static_cast<unsigned int>((ch | ' ') - 'a') <= 5)
2377 							ucsc = 16 * ucsc + ((ch | ' ') - 'a' + 10);
2378 						else if (ch == ';')
2379 							break;
2380 						else // cancel
2381 							return stre;
2382 
2383 						ch = *++stre;
2384 					}
2385 
2386 					++stre;
2387 				}
2388 				else	// &#... (dec code)
2389 				{
2390 					char_t ch = *++stre;
2391 
2392 					if (ch == ';') return stre;
2393 
2394 					for (;;)
2395 					{
2396 						if (static_cast<unsigned int>(static_cast<unsigned int>(ch) - '0') <= 9)
2397 							ucsc = 10 * ucsc + (ch - '0');
2398 						else if (ch == ';')
2399 							break;
2400 						else // cancel
2401 							return stre;
2402 
2403 						ch = *++stre;
2404 					}
2405 
2406 					++stre;
2407 				}
2408 
2409 			#ifdef PUGIXML_WCHAR_MODE
2410 				s = reinterpret_cast<char_t*>(wchar_writer::any(reinterpret_cast<wchar_writer::value_type>(s), ucsc));
2411 			#else
2412 				s = reinterpret_cast<char_t*>(utf8_writer::any(reinterpret_cast<uint8_t*>(s), ucsc));
2413 			#endif
2414 
2415 				g.push(s, stre - s);
2416 				return stre;
2417 			}
2418 
2419 			case 'a':	// &a
2420 			{
2421 				++stre;
2422 
2423 				if (*stre == 'm') // &am
2424 				{
2425 					if (*++stre == 'p' && *++stre == ';') // &amp;
2426 					{
2427 						*s++ = '&';
2428 						++stre;
2429 
2430 						g.push(s, stre - s);
2431 						return stre;
2432 					}
2433 				}
2434 				else if (*stre == 'p') // &ap
2435 				{
2436 					if (*++stre == 'o' && *++stre == 's' && *++stre == ';') // &apos;
2437 					{
2438 						*s++ = '\'';
2439 						++stre;
2440 
2441 						g.push(s, stre - s);
2442 						return stre;
2443 					}
2444 				}
2445 				break;
2446 			}
2447 
2448 			case 'g': // &g
2449 			{
2450 				if (*++stre == 't' && *++stre == ';') // &gt;
2451 				{
2452 					*s++ = '>';
2453 					++stre;
2454 
2455 					g.push(s, stre - s);
2456 					return stre;
2457 				}
2458 				break;
2459 			}
2460 
2461 			case 'l': // &l
2462 			{
2463 				if (*++stre == 't' && *++stre == ';') // &lt;
2464 				{
2465 					*s++ = '<';
2466 					++stre;
2467 
2468 					g.push(s, stre - s);
2469 					return stre;
2470 				}
2471 				break;
2472 			}
2473 
2474 			case 'q': // &q
2475 			{
2476 				if (*++stre == 'u' && *++stre == 'o' && *++stre == 't' && *++stre == ';') // &quot;
2477 				{
2478 					*s++ = '"';
2479 					++stre;
2480 
2481 					g.push(s, stre - s);
2482 					return stre;
2483 				}
2484 				break;
2485 			}
2486 
2487 			default:
2488 				break;
2489 		}
2490 
2491 		return stre;
2492 	}
2493 
2494 	// Parser utilities
2495 	#define PUGI__ENDSWITH(c, e)        ((c) == (e) || ((c) == 0 && endch == (e)))
2496 	#define PUGI__SKIPWS()              { while (PUGI__IS_CHARTYPE(*s, ct_space)) ++s; }
2497 	#define PUGI__OPTSET(OPT)           ( optmsk & (OPT) )
2498 	#define PUGI__PUSHNODE(TYPE)        { cursor = append_new_node(cursor, alloc, TYPE); if (!cursor) PUGI__THROW_ERROR(status_out_of_memory, s); }
2499 	#define PUGI__POPNODE()             { cursor = cursor->parent; }
2500 	#define PUGI__SCANFOR(X)            { while (*s != 0 && !(X)) ++s; }
2501 	#define PUGI__SCANWHILE(X)          { while (X) ++s; }
2502 	#define PUGI__SCANWHILE_UNROLL(X)   { for (;;) { char_t ss = s[0]; if (PUGI__UNLIKELY(!(X))) { break; } ss = s[1]; if (PUGI__UNLIKELY(!(X))) { s += 1; break; } ss = s[2]; if (PUGI__UNLIKELY(!(X))) { s += 2; break; } ss = s[3]; if (PUGI__UNLIKELY(!(X))) { s += 3; break; } s += 4; } }
2503 	#define PUGI__ENDSEG()              { ch = *s; *s = 0; ++s; }
2504 	#define PUGI__THROW_ERROR(err, m)   return error_offset = m, error_status = err, static_cast<char_t*>(0)
2505 	#define PUGI__CHECK_ERROR(err, m)   { if (*s == 0) PUGI__THROW_ERROR(err, m); }
2506 
strconv_comment(char_t * s,char_t endch)2507 	PUGI__FN char_t* strconv_comment(char_t* s, char_t endch)
2508 	{
2509 		gap g;
2510 
2511 		while (true)
2512 		{
2513 			PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_comment));
2514 
2515 			if (*s == '\r') // Either a single 0x0d or 0x0d 0x0a pair
2516 			{
2517 				*s++ = '\n'; // replace first one with 0x0a
2518 
2519 				if (*s == '\n') g.push(s, 1);
2520 			}
2521 			else if (s[0] == '-' && s[1] == '-' && PUGI__ENDSWITH(s[2], '>')) // comment ends here
2522 			{
2523 				*g.flush(s) = 0;
2524 
2525 				return s + (s[2] == '>' ? 3 : 2);
2526 			}
2527 			else if (*s == 0)
2528 			{
2529 				return 0;
2530 			}
2531 			else ++s;
2532 		}
2533 	}
2534 
strconv_cdata(char_t * s,char_t endch)2535 	PUGI__FN char_t* strconv_cdata(char_t* s, char_t endch)
2536 	{
2537 		gap g;
2538 
2539 		while (true)
2540 		{
2541 			PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_cdata));
2542 
2543 			if (*s == '\r') // Either a single 0x0d or 0x0d 0x0a pair
2544 			{
2545 				*s++ = '\n'; // replace first one with 0x0a
2546 
2547 				if (*s == '\n') g.push(s, 1);
2548 			}
2549 			else if (s[0] == ']' && s[1] == ']' && PUGI__ENDSWITH(s[2], '>')) // CDATA ends here
2550 			{
2551 				*g.flush(s) = 0;
2552 
2553 				return s + 1;
2554 			}
2555 			else if (*s == 0)
2556 			{
2557 				return 0;
2558 			}
2559 			else ++s;
2560 		}
2561 	}
2562 
2563 	typedef char_t* (*strconv_pcdata_t)(char_t*);
2564 
2565 	template <typename opt_trim, typename opt_eol, typename opt_escape> struct strconv_pcdata_impl
2566 	{
parsestrconv_pcdata_impl2567 		static char_t* parse(char_t* s)
2568 		{
2569 			gap g;
2570 
2571 			char_t* begin = s;
2572 
2573 			while (true)
2574 			{
2575 				PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_pcdata));
2576 
2577 				if (*s == '<') // PCDATA ends here
2578 				{
2579 					char_t* end = g.flush(s);
2580 
2581 					if (opt_trim::value)
2582 						while (end > begin && PUGI__IS_CHARTYPE(end[-1], ct_space))
2583 							--end;
2584 
2585 					*end = 0;
2586 
2587 					return s + 1;
2588 				}
2589 				else if (opt_eol::value && *s == '\r') // Either a single 0x0d or 0x0d 0x0a pair
2590 				{
2591 					*s++ = '\n'; // replace first one with 0x0a
2592 
2593 					if (*s == '\n') g.push(s, 1);
2594 				}
2595 				else if (opt_escape::value && *s == '&')
2596 				{
2597 					s = strconv_escape(s, g);
2598 				}
2599 				else if (*s == 0)
2600 				{
2601 					char_t* end = g.flush(s);
2602 
2603 					if (opt_trim::value)
2604 						while (end > begin && PUGI__IS_CHARTYPE(end[-1], ct_space))
2605 							--end;
2606 
2607 					*end = 0;
2608 
2609 					return s;
2610 				}
2611 				else ++s;
2612 			}
2613 		}
2614 	};
2615 
get_strconv_pcdata(unsigned int optmask)2616 	PUGI__FN strconv_pcdata_t get_strconv_pcdata(unsigned int optmask)
2617 	{
2618 		PUGI__STATIC_ASSERT(parse_escapes == 0x10 && parse_eol == 0x20 && parse_trim_pcdata == 0x0800);
2619 
2620 		switch (((optmask >> 4) & 3) | ((optmask >> 9) & 4)) // get bitmask for flags (eol escapes trim)
2621 		{
2622 		case 0: return strconv_pcdata_impl<opt_false, opt_false, opt_false>::parse;
2623 		case 1: return strconv_pcdata_impl<opt_false, opt_false, opt_true>::parse;
2624 		case 2: return strconv_pcdata_impl<opt_false, opt_true, opt_false>::parse;
2625 		case 3: return strconv_pcdata_impl<opt_false, opt_true, opt_true>::parse;
2626 		case 4: return strconv_pcdata_impl<opt_true, opt_false, opt_false>::parse;
2627 		case 5: return strconv_pcdata_impl<opt_true, opt_false, opt_true>::parse;
2628 		case 6: return strconv_pcdata_impl<opt_true, opt_true, opt_false>::parse;
2629 		case 7: return strconv_pcdata_impl<opt_true, opt_true, opt_true>::parse;
2630 		default: assert(false); return 0; // should not get here
2631 		}
2632 	}
2633 
2634 	typedef char_t* (*strconv_attribute_t)(char_t*, char_t);
2635 
2636 	template <typename opt_escape> struct strconv_attribute_impl
2637 	{
parse_wnormstrconv_attribute_impl2638 		static char_t* parse_wnorm(char_t* s, char_t end_quote)
2639 		{
2640 			gap g;
2641 
2642 			// trim leading whitespaces
2643 			if (PUGI__IS_CHARTYPE(*s, ct_space))
2644 			{
2645 				char_t* str = s;
2646 
2647 				do ++str;
2648 				while (PUGI__IS_CHARTYPE(*str, ct_space));
2649 
2650 				g.push(s, str - s);
2651 			}
2652 
2653 			while (true)
2654 			{
2655 				PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr_ws | ct_space));
2656 
2657 				if (*s == end_quote)
2658 				{
2659 					char_t* str = g.flush(s);
2660 
2661 					do *str-- = 0;
2662 					while (PUGI__IS_CHARTYPE(*str, ct_space));
2663 
2664 					return s + 1;
2665 				}
2666 				else if (PUGI__IS_CHARTYPE(*s, ct_space))
2667 				{
2668 					*s++ = ' ';
2669 
2670 					if (PUGI__IS_CHARTYPE(*s, ct_space))
2671 					{
2672 						char_t* str = s + 1;
2673 						while (PUGI__IS_CHARTYPE(*str, ct_space)) ++str;
2674 
2675 						g.push(s, str - s);
2676 					}
2677 				}
2678 				else if (opt_escape::value && *s == '&')
2679 				{
2680 					s = strconv_escape(s, g);
2681 				}
2682 				else if (!*s)
2683 				{
2684 					return 0;
2685 				}
2686 				else ++s;
2687 			}
2688 		}
2689 
parse_wconvstrconv_attribute_impl2690 		static char_t* parse_wconv(char_t* s, char_t end_quote)
2691 		{
2692 			gap g;
2693 
2694 			while (true)
2695 			{
2696 				PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr_ws));
2697 
2698 				if (*s == end_quote)
2699 				{
2700 					*g.flush(s) = 0;
2701 
2702 					return s + 1;
2703 				}
2704 				else if (PUGI__IS_CHARTYPE(*s, ct_space))
2705 				{
2706 					if (*s == '\r')
2707 					{
2708 						*s++ = ' ';
2709 
2710 						if (*s == '\n') g.push(s, 1);
2711 					}
2712 					else *s++ = ' ';
2713 				}
2714 				else if (opt_escape::value && *s == '&')
2715 				{
2716 					s = strconv_escape(s, g);
2717 				}
2718 				else if (!*s)
2719 				{
2720 					return 0;
2721 				}
2722 				else ++s;
2723 			}
2724 		}
2725 
parse_eolstrconv_attribute_impl2726 		static char_t* parse_eol(char_t* s, char_t end_quote)
2727 		{
2728 			gap g;
2729 
2730 			while (true)
2731 			{
2732 				PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr));
2733 
2734 				if (*s == end_quote)
2735 				{
2736 					*g.flush(s) = 0;
2737 
2738 					return s + 1;
2739 				}
2740 				else if (*s == '\r')
2741 				{
2742 					*s++ = '\n';
2743 
2744 					if (*s == '\n') g.push(s, 1);
2745 				}
2746 				else if (opt_escape::value && *s == '&')
2747 				{
2748 					s = strconv_escape(s, g);
2749 				}
2750 				else if (!*s)
2751 				{
2752 					return 0;
2753 				}
2754 				else ++s;
2755 			}
2756 		}
2757 
parse_simplestrconv_attribute_impl2758 		static char_t* parse_simple(char_t* s, char_t end_quote)
2759 		{
2760 			gap g;
2761 
2762 			while (true)
2763 			{
2764 				PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr));
2765 
2766 				if (*s == end_quote)
2767 				{
2768 					*g.flush(s) = 0;
2769 
2770 					return s + 1;
2771 				}
2772 				else if (opt_escape::value && *s == '&')
2773 				{
2774 					s = strconv_escape(s, g);
2775 				}
2776 				else if (!*s)
2777 				{
2778 					return 0;
2779 				}
2780 				else ++s;
2781 			}
2782 		}
2783 	};
2784 
get_strconv_attribute(unsigned int optmask)2785 	PUGI__FN strconv_attribute_t get_strconv_attribute(unsigned int optmask)
2786 	{
2787 		PUGI__STATIC_ASSERT(parse_escapes == 0x10 && parse_eol == 0x20 && parse_wconv_attribute == 0x40 && parse_wnorm_attribute == 0x80);
2788 
2789 		switch ((optmask >> 4) & 15) // get bitmask for flags (wconv wnorm eol escapes)
2790 		{
2791 		case 0:  return strconv_attribute_impl<opt_false>::parse_simple;
2792 		case 1:  return strconv_attribute_impl<opt_true>::parse_simple;
2793 		case 2:  return strconv_attribute_impl<opt_false>::parse_eol;
2794 		case 3:  return strconv_attribute_impl<opt_true>::parse_eol;
2795 		case 4:  return strconv_attribute_impl<opt_false>::parse_wconv;
2796 		case 5:  return strconv_attribute_impl<opt_true>::parse_wconv;
2797 		case 6:  return strconv_attribute_impl<opt_false>::parse_wconv;
2798 		case 7:  return strconv_attribute_impl<opt_true>::parse_wconv;
2799 		case 8:  return strconv_attribute_impl<opt_false>::parse_wnorm;
2800 		case 9:  return strconv_attribute_impl<opt_true>::parse_wnorm;
2801 		case 10: return strconv_attribute_impl<opt_false>::parse_wnorm;
2802 		case 11: return strconv_attribute_impl<opt_true>::parse_wnorm;
2803 		case 12: return strconv_attribute_impl<opt_false>::parse_wnorm;
2804 		case 13: return strconv_attribute_impl<opt_true>::parse_wnorm;
2805 		case 14: return strconv_attribute_impl<opt_false>::parse_wnorm;
2806 		case 15: return strconv_attribute_impl<opt_true>::parse_wnorm;
2807 		default: assert(false); return 0; // should not get here
2808 		}
2809 	}
2810 
make_parse_result(xml_parse_status status,ptrdiff_t offset=0)2811 	inline xml_parse_result make_parse_result(xml_parse_status status, ptrdiff_t offset = 0)
2812 	{
2813 		xml_parse_result result;
2814 		result.status = status;
2815 		result.offset = offset;
2816 
2817 		return result;
2818 	}
2819 
2820 	struct xml_parser
2821 	{
2822 		xml_allocator alloc;
2823 		xml_allocator* alloc_state;
2824 		char_t* error_offset;
2825 		xml_parse_status error_status;
2826 
xml_parserxml_parser2827 		xml_parser(xml_allocator* alloc_): alloc(*alloc_), alloc_state(alloc_), error_offset(0), error_status(status_ok)
2828 		{
2829 		}
2830 
~xml_parserxml_parser2831 		~xml_parser()
2832 		{
2833 			*alloc_state = alloc;
2834 		}
2835 
2836 		// DOCTYPE consists of nested sections of the following possible types:
2837 		// <!-- ... -->, <? ... ?>, "...", '...'
2838 		// <![...]]>
2839 		// <!...>
2840 		// First group can not contain nested groups
2841 		// Second group can contain nested groups of the same type
2842 		// Third group can contain all other groups
parse_doctype_primitivexml_parser2843 		char_t* parse_doctype_primitive(char_t* s)
2844 		{
2845 			if (*s == '"' || *s == '\'')
2846 			{
2847 				// quoted string
2848 				char_t ch = *s++;
2849 				PUGI__SCANFOR(*s == ch);
2850 				if (!*s) PUGI__THROW_ERROR(status_bad_doctype, s);
2851 
2852 				s++;
2853 			}
2854 			else if (s[0] == '<' && s[1] == '?')
2855 			{
2856 				// <? ... ?>
2857 				s += 2;
2858 				PUGI__SCANFOR(s[0] == '?' && s[1] == '>'); // no need for ENDSWITH because ?> can't terminate proper doctype
2859 				if (!*s) PUGI__THROW_ERROR(status_bad_doctype, s);
2860 
2861 				s += 2;
2862 			}
2863 			else if (s[0] == '<' && s[1] == '!' && s[2] == '-' && s[3] == '-')
2864 			{
2865 				s += 4;
2866 				PUGI__SCANFOR(s[0] == '-' && s[1] == '-' && s[2] == '>'); // no need for ENDSWITH because --> can't terminate proper doctype
2867 				if (!*s) PUGI__THROW_ERROR(status_bad_doctype, s);
2868 
2869 				s += 3;
2870 			}
2871 			else PUGI__THROW_ERROR(status_bad_doctype, s);
2872 
2873 			return s;
2874 		}
2875 
parse_doctype_ignorexml_parser2876 		char_t* parse_doctype_ignore(char_t* s)
2877 		{
2878 			size_t depth = 0;
2879 
2880 			assert(s[0] == '<' && s[1] == '!' && s[2] == '[');
2881 			s += 3;
2882 
2883 			while (*s)
2884 			{
2885 				if (s[0] == '<' && s[1] == '!' && s[2] == '[')
2886 				{
2887 					// nested ignore section
2888 					s += 3;
2889 					depth++;
2890 				}
2891 				else if (s[0] == ']' && s[1] == ']' && s[2] == '>')
2892 				{
2893 					// ignore section end
2894 					s += 3;
2895 
2896 					if (depth == 0)
2897 						return s;
2898 
2899 					depth--;
2900 				}
2901 				else s++;
2902 			}
2903 
2904 			PUGI__THROW_ERROR(status_bad_doctype, s);
2905 		}
2906 
parse_doctype_groupxml_parser2907 		char_t* parse_doctype_group(char_t* s, char_t endch)
2908 		{
2909 			size_t depth = 0;
2910 
2911 			assert((s[0] == '<' || s[0] == 0) && s[1] == '!');
2912 			s += 2;
2913 
2914 			while (*s)
2915 			{
2916 				if (s[0] == '<' && s[1] == '!' && s[2] != '-')
2917 				{
2918 					if (s[2] == '[')
2919 					{
2920 						// ignore
2921 						s = parse_doctype_ignore(s);
2922 						if (!s) return s;
2923 					}
2924 					else
2925 					{
2926 						// some control group
2927 						s += 2;
2928 						depth++;
2929 					}
2930 				}
2931 				else if (s[0] == '<' || s[0] == '"' || s[0] == '\'')
2932 				{
2933 					// unknown tag (forbidden), or some primitive group
2934 					s = parse_doctype_primitive(s);
2935 					if (!s) return s;
2936 				}
2937 				else if (*s == '>')
2938 				{
2939 					if (depth == 0)
2940 						return s;
2941 
2942 					depth--;
2943 					s++;
2944 				}
2945 				else s++;
2946 			}
2947 
2948 			if (depth != 0 || endch != '>') PUGI__THROW_ERROR(status_bad_doctype, s);
2949 
2950 			return s;
2951 		}
2952 
parse_exclamationxml_parser2953 		char_t* parse_exclamation(char_t* s, xml_node_struct* cursor, unsigned int optmsk, char_t endch)
2954 		{
2955 			// parse node contents, starting with exclamation mark
2956 			++s;
2957 
2958 			if (*s == '-') // '<!-...'
2959 			{
2960 				++s;
2961 
2962 				if (*s == '-') // '<!--...'
2963 				{
2964 					++s;
2965 
2966 					if (PUGI__OPTSET(parse_comments))
2967 					{
2968 						PUGI__PUSHNODE(node_comment); // Append a new node on the tree.
2969 						cursor->value = s; // Save the offset.
2970 					}
2971 
2972 					if (PUGI__OPTSET(parse_eol) && PUGI__OPTSET(parse_comments))
2973 					{
2974 						s = strconv_comment(s, endch);
2975 
2976 						if (!s) PUGI__THROW_ERROR(status_bad_comment, cursor->value);
2977 					}
2978 					else
2979 					{
2980 						// Scan for terminating '-->'.
2981 						PUGI__SCANFOR(s[0] == '-' && s[1] == '-' && PUGI__ENDSWITH(s[2], '>'));
2982 						PUGI__CHECK_ERROR(status_bad_comment, s);
2983 
2984 						if (PUGI__OPTSET(parse_comments))
2985 							*s = 0; // Zero-terminate this segment at the first terminating '-'.
2986 
2987 						s += (s[2] == '>' ? 3 : 2); // Step over the '\0->'.
2988 					}
2989 				}
2990 				else PUGI__THROW_ERROR(status_bad_comment, s);
2991 			}
2992 			else if (*s == '[')
2993 			{
2994 				// '<![CDATA[...'
2995 				if (*++s=='C' && *++s=='D' && *++s=='A' && *++s=='T' && *++s=='A' && *++s == '[')
2996 				{
2997 					++s;
2998 
2999 					if (PUGI__OPTSET(parse_cdata))
3000 					{
3001 						PUGI__PUSHNODE(node_cdata); // Append a new node on the tree.
3002 						cursor->value = s; // Save the offset.
3003 
3004 						if (PUGI__OPTSET(parse_eol))
3005 						{
3006 							s = strconv_cdata(s, endch);
3007 
3008 							if (!s) PUGI__THROW_ERROR(status_bad_cdata, cursor->value);
3009 						}
3010 						else
3011 						{
3012 							// Scan for terminating ']]>'.
3013 							PUGI__SCANFOR(s[0] == ']' && s[1] == ']' && PUGI__ENDSWITH(s[2], '>'));
3014 							PUGI__CHECK_ERROR(status_bad_cdata, s);
3015 
3016 							*s++ = 0; // Zero-terminate this segment.
3017 						}
3018 					}
3019 					else // Flagged for discard, but we still have to scan for the terminator.
3020 					{
3021 						// Scan for terminating ']]>'.
3022 						PUGI__SCANFOR(s[0] == ']' && s[1] == ']' && PUGI__ENDSWITH(s[2], '>'));
3023 						PUGI__CHECK_ERROR(status_bad_cdata, s);
3024 
3025 						++s;
3026 					}
3027 
3028 					s += (s[1] == '>' ? 2 : 1); // Step over the last ']>'.
3029 				}
3030 				else PUGI__THROW_ERROR(status_bad_cdata, s);
3031 			}
3032 			else if (s[0] == 'D' && s[1] == 'O' && s[2] == 'C' && s[3] == 'T' && s[4] == 'Y' && s[5] == 'P' && PUGI__ENDSWITH(s[6], 'E'))
3033 			{
3034 				s -= 2;
3035 
3036 				if (cursor->parent) PUGI__THROW_ERROR(status_bad_doctype, s);
3037 
3038 				char_t* mark = s + 9;
3039 
3040 				s = parse_doctype_group(s, endch);
3041 				if (!s) return s;
3042 
3043 				assert((*s == 0 && endch == '>') || *s == '>');
3044 				if (*s) *s++ = 0;
3045 
3046 				if (PUGI__OPTSET(parse_doctype))
3047 				{
3048 					while (PUGI__IS_CHARTYPE(*mark, ct_space)) ++mark;
3049 
3050 					PUGI__PUSHNODE(node_doctype);
3051 
3052 					cursor->value = mark;
3053 				}
3054 			}
3055 			else if (*s == 0 && endch == '-') PUGI__THROW_ERROR(status_bad_comment, s);
3056 			else if (*s == 0 && endch == '[') PUGI__THROW_ERROR(status_bad_cdata, s);
3057 			else PUGI__THROW_ERROR(status_unrecognized_tag, s);
3058 
3059 			return s;
3060 		}
3061 
parse_questionxml_parser3062 		char_t* parse_question(char_t* s, xml_node_struct*& ref_cursor, unsigned int optmsk, char_t endch)
3063 		{
3064 			// load into registers
3065 			xml_node_struct* cursor = ref_cursor;
3066 			char_t ch = 0;
3067 
3068 			// parse node contents, starting with question mark
3069 			++s;
3070 
3071 			// read PI target
3072 			char_t* target = s;
3073 
3074 			if (!PUGI__IS_CHARTYPE(*s, ct_start_symbol)) PUGI__THROW_ERROR(status_bad_pi, s);
3075 
3076 			PUGI__SCANWHILE(PUGI__IS_CHARTYPE(*s, ct_symbol));
3077 			PUGI__CHECK_ERROR(status_bad_pi, s);
3078 
3079 			// determine node type; stricmp / strcasecmp is not portable
3080 			bool declaration = (target[0] | ' ') == 'x' && (target[1] | ' ') == 'm' && (target[2] | ' ') == 'l' && target + 3 == s;
3081 
3082 			if (declaration ? PUGI__OPTSET(parse_declaration) : PUGI__OPTSET(parse_pi))
3083 			{
3084 				if (declaration)
3085 				{
3086 					// disallow non top-level declarations
3087 					if (cursor->parent) PUGI__THROW_ERROR(status_bad_pi, s);
3088 
3089 					PUGI__PUSHNODE(node_declaration);
3090 				}
3091 				else
3092 				{
3093 					PUGI__PUSHNODE(node_pi);
3094 				}
3095 
3096 				cursor->name = target;
3097 
3098 				PUGI__ENDSEG();
3099 
3100 				// parse value/attributes
3101 				if (ch == '?')
3102 				{
3103 					// empty node
3104 					if (!PUGI__ENDSWITH(*s, '>')) PUGI__THROW_ERROR(status_bad_pi, s);
3105 					s += (*s == '>');
3106 
3107 					PUGI__POPNODE();
3108 				}
3109 				else if (PUGI__IS_CHARTYPE(ch, ct_space))
3110 				{
3111 					PUGI__SKIPWS();
3112 
3113 					// scan for tag end
3114 					char_t* value = s;
3115 
3116 					PUGI__SCANFOR(s[0] == '?' && PUGI__ENDSWITH(s[1], '>'));
3117 					PUGI__CHECK_ERROR(status_bad_pi, s);
3118 
3119 					if (declaration)
3120 					{
3121 						// replace ending ? with / so that 'element' terminates properly
3122 						*s = '/';
3123 
3124 						// we exit from this function with cursor at node_declaration, which is a signal to parse() to go to LOC_ATTRIBUTES
3125 						s = value;
3126 					}
3127 					else
3128 					{
3129 						// store value and step over >
3130 						cursor->value = value;
3131 
3132 						PUGI__POPNODE();
3133 
3134 						PUGI__ENDSEG();
3135 
3136 						s += (*s == '>');
3137 					}
3138 				}
3139 				else PUGI__THROW_ERROR(status_bad_pi, s);
3140 			}
3141 			else
3142 			{
3143 				// scan for tag end
3144 				PUGI__SCANFOR(s[0] == '?' && PUGI__ENDSWITH(s[1], '>'));
3145 				PUGI__CHECK_ERROR(status_bad_pi, s);
3146 
3147 				s += (s[1] == '>' ? 2 : 1);
3148 			}
3149 
3150 			// store from registers
3151 			ref_cursor = cursor;
3152 
3153 			return s;
3154 		}
3155 
parse_treexml_parser3156 		char_t* parse_tree(char_t* s, xml_node_struct* root, unsigned int optmsk, char_t endch)
3157 		{
3158 			strconv_attribute_t strconv_attribute = get_strconv_attribute(optmsk);
3159 			strconv_pcdata_t strconv_pcdata = get_strconv_pcdata(optmsk);
3160 
3161 			char_t ch = 0;
3162 			xml_node_struct* cursor = root;
3163 			char_t* mark = s;
3164 
3165 			while (*s != 0)
3166 			{
3167 				if (*s == '<')
3168 				{
3169 					++s;
3170 
3171 				LOC_TAG:
3172 					if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) // '<#...'
3173 					{
3174 						PUGI__PUSHNODE(node_element); // Append a new node to the tree.
3175 
3176 						cursor->name = s;
3177 
3178 						PUGI__SCANWHILE_UNROLL(PUGI__IS_CHARTYPE(ss, ct_symbol)); // Scan for a terminator.
3179 						PUGI__ENDSEG(); // Save char in 'ch', terminate & step over.
3180 
3181 						if (ch == '>')
3182 						{
3183 							// end of tag
3184 						}
3185 						else if (PUGI__IS_CHARTYPE(ch, ct_space))
3186 						{
3187 						LOC_ATTRIBUTES:
3188 							while (true)
3189 							{
3190 								PUGI__SKIPWS(); // Eat any whitespace.
3191 
3192 								if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) // <... #...
3193 								{
3194 									xml_attribute_struct* a = append_new_attribute(cursor, alloc); // Make space for this attribute.
3195 									if (!a) PUGI__THROW_ERROR(status_out_of_memory, s);
3196 
3197 									a->name = s; // Save the offset.
3198 
3199 									PUGI__SCANWHILE_UNROLL(PUGI__IS_CHARTYPE(ss, ct_symbol)); // Scan for a terminator.
3200 									PUGI__ENDSEG(); // Save char in 'ch', terminate & step over.
3201 
3202 									if (PUGI__IS_CHARTYPE(ch, ct_space))
3203 									{
3204 										PUGI__SKIPWS(); // Eat any whitespace.
3205 
3206 										ch = *s;
3207 										++s;
3208 									}
3209 
3210 									if (ch == '=') // '<... #=...'
3211 									{
3212 										PUGI__SKIPWS(); // Eat any whitespace.
3213 
3214 										if (*s == '"' || *s == '\'') // '<... #="...'
3215 										{
3216 											ch = *s; // Save quote char to avoid breaking on "''" -or- '""'.
3217 											++s; // Step over the quote.
3218 											a->value = s; // Save the offset.
3219 
3220 											s = strconv_attribute(s, ch);
3221 
3222 											if (!s) PUGI__THROW_ERROR(status_bad_attribute, a->value);
3223 
3224 											// After this line the loop continues from the start;
3225 											// Whitespaces, / and > are ok, symbols and EOF are wrong,
3226 											// everything else will be detected
3227 											if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) PUGI__THROW_ERROR(status_bad_attribute, s);
3228 										}
3229 										else PUGI__THROW_ERROR(status_bad_attribute, s);
3230 									}
3231 									else PUGI__THROW_ERROR(status_bad_attribute, s);
3232 								}
3233 								else if (*s == '/')
3234 								{
3235 									++s;
3236 
3237 									if (*s == '>')
3238 									{
3239 										PUGI__POPNODE();
3240 										s++;
3241 										break;
3242 									}
3243 									else if (*s == 0 && endch == '>')
3244 									{
3245 										PUGI__POPNODE();
3246 										break;
3247 									}
3248 									else PUGI__THROW_ERROR(status_bad_start_element, s);
3249 								}
3250 								else if (*s == '>')
3251 								{
3252 									++s;
3253 
3254 									break;
3255 								}
3256 								else if (*s == 0 && endch == '>')
3257 								{
3258 									break;
3259 								}
3260 								else PUGI__THROW_ERROR(status_bad_start_element, s);
3261 							}
3262 
3263 							// !!!
3264 						}
3265 						else if (ch == '/') // '<#.../'
3266 						{
3267 							if (!PUGI__ENDSWITH(*s, '>')) PUGI__THROW_ERROR(status_bad_start_element, s);
3268 
3269 							PUGI__POPNODE(); // Pop.
3270 
3271 							s += (*s == '>');
3272 						}
3273 						else if (ch == 0)
3274 						{
3275 							// we stepped over null terminator, backtrack & handle closing tag
3276 							--s;
3277 
3278 							if (endch != '>') PUGI__THROW_ERROR(status_bad_start_element, s);
3279 						}
3280 						else PUGI__THROW_ERROR(status_bad_start_element, s);
3281 					}
3282 					else if (*s == '/')
3283 					{
3284 						++s;
3285 
3286 						char_t* name = cursor->name;
3287 						if (!name) PUGI__THROW_ERROR(status_end_element_mismatch, s);
3288 
3289 						while (PUGI__IS_CHARTYPE(*s, ct_symbol))
3290 						{
3291 							if (*s++ != *name++) PUGI__THROW_ERROR(status_end_element_mismatch, s);
3292 						}
3293 
3294 						if (*name)
3295 						{
3296 							if (*s == 0 && name[0] == endch && name[1] == 0) PUGI__THROW_ERROR(status_bad_end_element, s);
3297 							else PUGI__THROW_ERROR(status_end_element_mismatch, s);
3298 						}
3299 
3300 						PUGI__POPNODE(); // Pop.
3301 
3302 						PUGI__SKIPWS();
3303 
3304 						if (*s == 0)
3305 						{
3306 							if (endch != '>') PUGI__THROW_ERROR(status_bad_end_element, s);
3307 						}
3308 						else
3309 						{
3310 							if (*s != '>') PUGI__THROW_ERROR(status_bad_end_element, s);
3311 							++s;
3312 						}
3313 					}
3314 					else if (*s == '?') // '<?...'
3315 					{
3316 						s = parse_question(s, cursor, optmsk, endch);
3317 						if (!s) return s;
3318 
3319 						assert(cursor);
3320 						if (PUGI__NODETYPE(cursor) == node_declaration) goto LOC_ATTRIBUTES;
3321 					}
3322 					else if (*s == '!') // '<!...'
3323 					{
3324 						s = parse_exclamation(s, cursor, optmsk, endch);
3325 						if (!s) return s;
3326 					}
3327 					else if (*s == 0 && endch == '?') PUGI__THROW_ERROR(status_bad_pi, s);
3328 					else PUGI__THROW_ERROR(status_unrecognized_tag, s);
3329 				}
3330 				else
3331 				{
3332 					mark = s; // Save this offset while searching for a terminator.
3333 
3334 					PUGI__SKIPWS(); // Eat whitespace if no genuine PCDATA here.
3335 
3336 					if (*s == '<' || !*s)
3337 					{
3338 						// We skipped some whitespace characters because otherwise we would take the tag branch instead of PCDATA one
3339 						assert(mark != s);
3340 
3341 						if (!PUGI__OPTSET(parse_ws_pcdata | parse_ws_pcdata_single) || PUGI__OPTSET(parse_trim_pcdata))
3342 						{
3343 							continue;
3344 						}
3345 						else if (PUGI__OPTSET(parse_ws_pcdata_single))
3346 						{
3347 							if (s[0] != '<' || s[1] != '/' || cursor->first_child) continue;
3348 						}
3349 					}
3350 
3351 					if (!PUGI__OPTSET(parse_trim_pcdata))
3352 						s = mark;
3353 
3354 					if (cursor->parent || PUGI__OPTSET(parse_fragment))
3355 					{
3356 						PUGI__PUSHNODE(node_pcdata); // Append a new node on the tree.
3357 						cursor->value = s; // Save the offset.
3358 
3359 						s = strconv_pcdata(s);
3360 
3361 						PUGI__POPNODE(); // Pop since this is a standalone.
3362 
3363 						if (!*s) break;
3364 					}
3365 					else
3366 					{
3367 						PUGI__SCANFOR(*s == '<'); // '...<'
3368 						if (!*s) break;
3369 
3370 						++s;
3371 					}
3372 
3373 					// We're after '<'
3374 					goto LOC_TAG;
3375 				}
3376 			}
3377 
3378 			// check that last tag is closed
3379 			if (cursor != root) PUGI__THROW_ERROR(status_end_element_mismatch, s);
3380 
3381 			return s;
3382 		}
3383 
3384 	#ifdef PUGIXML_WCHAR_MODE
parse_skip_bomxml_parser3385 		static char_t* parse_skip_bom(char_t* s)
3386 		{
3387 			unsigned int bom = 0xfeff;
3388 			return (s[0] == static_cast<wchar_t>(bom)) ? s + 1 : s;
3389 		}
3390 	#else
parse_skip_bomxml_parser3391 		static char_t* parse_skip_bom(char_t* s)
3392 		{
3393 			return (s[0] == '\xef' && s[1] == '\xbb' && s[2] == '\xbf') ? s + 3 : s;
3394 		}
3395 	#endif
3396 
has_element_node_siblingsxml_parser3397 		static bool has_element_node_siblings(xml_node_struct* node)
3398 		{
3399 			while (node)
3400 			{
3401 				if (PUGI__NODETYPE(node) == node_element) return true;
3402 
3403 				node = node->next_sibling;
3404 			}
3405 
3406 			return false;
3407 		}
3408 
parsexml_parser3409 		static xml_parse_result parse(char_t* buffer, size_t length, xml_document_struct* xmldoc, xml_node_struct* root, unsigned int optmsk)
3410 		{
3411 			// early-out for empty documents
3412 			if (length == 0)
3413 				return make_parse_result(PUGI__OPTSET(parse_fragment) ? status_ok : status_no_document_element);
3414 
3415 			// get last child of the root before parsing
3416 			xml_node_struct* last_root_child = root->first_child ? root->first_child->prev_sibling_c + 0 : 0;
3417 
3418 			// create parser on stack
3419 			xml_parser parser(static_cast<xml_allocator*>(xmldoc));
3420 
3421 			// save last character and make buffer zero-terminated (speeds up parsing)
3422 			char_t endch = buffer[length - 1];
3423 			buffer[length - 1] = 0;
3424 
3425 			// skip BOM to make sure it does not end up as part of parse output
3426 			char_t* buffer_data = parse_skip_bom(buffer);
3427 
3428 			// perform actual parsing
3429 			parser.parse_tree(buffer_data, root, optmsk, endch);
3430 
3431 			xml_parse_result result = make_parse_result(parser.error_status, parser.error_offset ? parser.error_offset - buffer : 0);
3432 			assert(result.offset >= 0 && static_cast<size_t>(result.offset) <= length);
3433 
3434 			if (result)
3435 			{
3436 				// since we removed last character, we have to handle the only possible false positive (stray <)
3437 				if (endch == '<')
3438 					return make_parse_result(status_unrecognized_tag, length - 1);
3439 
3440 				// check if there are any element nodes parsed
3441 				xml_node_struct* first_root_child_parsed = last_root_child ? last_root_child->next_sibling + 0 : root->first_child+ 0;
3442 
3443 				if (!PUGI__OPTSET(parse_fragment) && !has_element_node_siblings(first_root_child_parsed))
3444 					return make_parse_result(status_no_document_element, length - 1);
3445 			}
3446 			else
3447 			{
3448 				// roll back offset if it occurs on a null terminator in the source buffer
3449 				if (result.offset > 0 && static_cast<size_t>(result.offset) == length - 1 && endch == 0)
3450 					result.offset--;
3451 			}
3452 
3453 			return result;
3454 		}
3455 	};
3456 
3457 	// Output facilities
get_write_native_encoding()3458 	PUGI__FN xml_encoding get_write_native_encoding()
3459 	{
3460 	#ifdef PUGIXML_WCHAR_MODE
3461 		return get_wchar_encoding();
3462 	#else
3463 		return encoding_utf8;
3464 	#endif
3465 	}
3466 
get_write_encoding(xml_encoding encoding)3467 	PUGI__FN xml_encoding get_write_encoding(xml_encoding encoding)
3468 	{
3469 		// replace wchar encoding with utf implementation
3470 		if (encoding == encoding_wchar) return get_wchar_encoding();
3471 
3472 		// replace utf16 encoding with utf16 with specific endianness
3473 		if (encoding == encoding_utf16) return is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
3474 
3475 		// replace utf32 encoding with utf32 with specific endianness
3476 		if (encoding == encoding_utf32) return is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
3477 
3478 		// only do autodetection if no explicit encoding is requested
3479 		if (encoding != encoding_auto) return encoding;
3480 
3481 		// assume utf8 encoding
3482 		return encoding_utf8;
3483 	}
3484 
convert_buffer_output_generic(typename T::value_type dest,const char_t * data,size_t length,D,T)3485 	template <typename D, typename T> PUGI__FN size_t convert_buffer_output_generic(typename T::value_type dest, const char_t* data, size_t length, D, T)
3486 	{
3487 		PUGI__STATIC_ASSERT(sizeof(char_t) == sizeof(typename D::type));
3488 
3489 		typename T::value_type end = D::process(reinterpret_cast<const typename D::type*>(data), length, dest, T());
3490 
3491 		return static_cast<size_t>(end - dest) * sizeof(*dest);
3492 	}
3493 
convert_buffer_output_generic(typename T::value_type dest,const char_t * data,size_t length,D,T,bool opt_swap)3494 	template <typename D, typename T> PUGI__FN size_t convert_buffer_output_generic(typename T::value_type dest, const char_t* data, size_t length, D, T, bool opt_swap)
3495 	{
3496 		PUGI__STATIC_ASSERT(sizeof(char_t) == sizeof(typename D::type));
3497 
3498 		typename T::value_type end = D::process(reinterpret_cast<const typename D::type*>(data), length, dest, T());
3499 
3500 		if (opt_swap)
3501 		{
3502 			for (typename T::value_type i = dest; i != end; ++i)
3503 				*i = endian_swap(*i);
3504 		}
3505 
3506 		return static_cast<size_t>(end - dest) * sizeof(*dest);
3507 	}
3508 
3509 #ifdef PUGIXML_WCHAR_MODE
get_valid_length(const char_t * data,size_t length)3510 	PUGI__FN size_t get_valid_length(const char_t* data, size_t length)
3511 	{
3512 		if (length < 1) return 0;
3513 
3514 		// discard last character if it's the lead of a surrogate pair
3515 		return (sizeof(wchar_t) == 2 && static_cast<unsigned int>(static_cast<uint16_t>(data[length - 1]) - 0xD800) < 0x400) ? length - 1 : length;
3516 	}
3517 
convert_buffer_output(char_t * r_char,uint8_t * r_u8,uint16_t * r_u16,uint32_t * r_u32,const char_t * data,size_t length,xml_encoding encoding)3518 	PUGI__FN size_t convert_buffer_output(char_t* r_char, uint8_t* r_u8, uint16_t* r_u16, uint32_t* r_u32, const char_t* data, size_t length, xml_encoding encoding)
3519 	{
3520 		// only endian-swapping is required
3521 		if (need_endian_swap_utf(encoding, get_wchar_encoding()))
3522 		{
3523 			convert_wchar_endian_swap(r_char, data, length);
3524 
3525 			return length * sizeof(char_t);
3526 		}
3527 
3528 		// convert to utf8
3529 		if (encoding == encoding_utf8)
3530 			return convert_buffer_output_generic(r_u8, data, length, wchar_decoder(), utf8_writer());
3531 
3532 		// convert to utf16
3533 		if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
3534 		{
3535 			xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
3536 
3537 			return convert_buffer_output_generic(r_u16, data, length, wchar_decoder(), utf16_writer(), native_encoding != encoding);
3538 		}
3539 
3540 		// convert to utf32
3541 		if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)
3542 		{
3543 			xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
3544 
3545 			return convert_buffer_output_generic(r_u32, data, length, wchar_decoder(), utf32_writer(), native_encoding != encoding);
3546 		}
3547 
3548 		// convert to latin1
3549 		if (encoding == encoding_latin1)
3550 			return convert_buffer_output_generic(r_u8, data, length, wchar_decoder(), latin1_writer());
3551 
3552 		assert(!"Invalid encoding");
3553 		return 0;
3554 	}
3555 #else
get_valid_length(const char_t * data,size_t length)3556 	PUGI__FN size_t get_valid_length(const char_t* data, size_t length)
3557 	{
3558 		if (length < 5) return 0;
3559 
3560 		for (size_t i = 1; i <= 4; ++i)
3561 		{
3562 			uint8_t ch = static_cast<uint8_t>(data[length - i]);
3563 
3564 			// either a standalone character or a leading one
3565 			if ((ch & 0xc0) != 0x80) return length - i;
3566 		}
3567 
3568 		// there are four non-leading characters at the end, sequence tail is broken so might as well process the whole chunk
3569 		return length;
3570 	}
3571 
convert_buffer_output(char_t *,uint8_t * r_u8,uint16_t * r_u16,uint32_t * r_u32,const char_t * data,size_t length,xml_encoding encoding)3572 	PUGI__FN size_t convert_buffer_output(char_t* /* r_char */, uint8_t* r_u8, uint16_t* r_u16, uint32_t* r_u32, const char_t* data, size_t length, xml_encoding encoding)
3573 	{
3574 		if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
3575 		{
3576 			xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
3577 
3578 			return convert_buffer_output_generic(r_u16, data, length, utf8_decoder(), utf16_writer(), native_encoding != encoding);
3579 		}
3580 
3581 		if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)
3582 		{
3583 			xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
3584 
3585 			return convert_buffer_output_generic(r_u32, data, length, utf8_decoder(), utf32_writer(), native_encoding != encoding);
3586 		}
3587 
3588 		if (encoding == encoding_latin1)
3589 			return convert_buffer_output_generic(r_u8, data, length, utf8_decoder(), latin1_writer());
3590 
3591 		assert(!"Invalid encoding");
3592 		return 0;
3593 	}
3594 #endif
3595 
3596 	class xml_buffered_writer
3597 	{
3598 		xml_buffered_writer(const xml_buffered_writer&);
3599 		xml_buffered_writer& operator=(const xml_buffered_writer&);
3600 
3601 	public:
xml_buffered_writer(xml_writer & writer_,xml_encoding user_encoding)3602 		xml_buffered_writer(xml_writer& writer_, xml_encoding user_encoding): writer(writer_), bufsize(0), encoding(get_write_encoding(user_encoding))
3603 		{
3604 			PUGI__STATIC_ASSERT(bufcapacity >= 8);
3605 		}
3606 
flush()3607 		size_t flush()
3608 		{
3609 			flush(buffer, bufsize);
3610 			bufsize = 0;
3611 			return 0;
3612 		}
3613 
flush(const char_t * data,size_t size)3614 		void flush(const char_t* data, size_t size)
3615 		{
3616 			if (size == 0) return;
3617 
3618 			// fast path, just write data
3619 			if (encoding == get_write_native_encoding())
3620 				writer.write(data, size * sizeof(char_t));
3621 			else
3622 			{
3623 				// convert chunk
3624 				size_t result = convert_buffer_output(scratch.data_char, scratch.data_u8, scratch.data_u16, scratch.data_u32, data, size, encoding);
3625 				assert(result <= sizeof(scratch));
3626 
3627 				// write data
3628 				writer.write(scratch.data_u8, result);
3629 			}
3630 		}
3631 
write_direct(const char_t * data,size_t length)3632 		void write_direct(const char_t* data, size_t length)
3633 		{
3634 			// flush the remaining buffer contents
3635 			flush();
3636 
3637 			// handle large chunks
3638 			if (length > bufcapacity)
3639 			{
3640 				if (encoding == get_write_native_encoding())
3641 				{
3642 					// fast path, can just write data chunk
3643 					writer.write(data, length * sizeof(char_t));
3644 					return;
3645 				}
3646 
3647 				// need to convert in suitable chunks
3648 				while (length > bufcapacity)
3649 				{
3650 					// get chunk size by selecting such number of characters that are guaranteed to fit into scratch buffer
3651 					// and form a complete codepoint sequence (i.e. discard start of last codepoint if necessary)
3652 					size_t chunk_size = get_valid_length(data, bufcapacity);
3653 					assert(chunk_size);
3654 
3655 					// convert chunk and write
3656 					flush(data, chunk_size);
3657 
3658 					// iterate
3659 					data += chunk_size;
3660 					length -= chunk_size;
3661 				}
3662 
3663 				// small tail is copied below
3664 				bufsize = 0;
3665 			}
3666 
3667 			memcpy(buffer + bufsize, data, length * sizeof(char_t));
3668 			bufsize += length;
3669 		}
3670 
write_buffer(const char_t * data,size_t length)3671 		void write_buffer(const char_t* data, size_t length)
3672 		{
3673 			size_t offset = bufsize;
3674 
3675 			if (offset + length <= bufcapacity)
3676 			{
3677 				memcpy(buffer + offset, data, length * sizeof(char_t));
3678 				bufsize = offset + length;
3679 			}
3680 			else
3681 			{
3682 				write_direct(data, length);
3683 			}
3684 		}
3685 
write_string(const char_t * data)3686 		void write_string(const char_t* data)
3687 		{
3688 			// write the part of the string that fits in the buffer
3689 			size_t offset = bufsize;
3690 
3691 			while (*data && offset < bufcapacity)
3692 				buffer[offset++] = *data++;
3693 
3694 			// write the rest
3695 			if (offset < bufcapacity)
3696 			{
3697 				bufsize = offset;
3698 			}
3699 			else
3700 			{
3701 				// backtrack a bit if we have split the codepoint
3702 				size_t length = offset - bufsize;
3703 				size_t extra = length - get_valid_length(data - length, length);
3704 
3705 				bufsize = offset - extra;
3706 
3707 				write_direct(data - extra, strlength(data) + extra);
3708 			}
3709 		}
3710 
write(char_t d0)3711 		void write(char_t d0)
3712 		{
3713 			size_t offset = bufsize;
3714 			if (offset > bufcapacity - 1) offset = flush();
3715 
3716 			buffer[offset + 0] = d0;
3717 			bufsize = offset + 1;
3718 		}
3719 
write(char_t d0,char_t d1)3720 		void write(char_t d0, char_t d1)
3721 		{
3722 			size_t offset = bufsize;
3723 			if (offset > bufcapacity - 2) offset = flush();
3724 
3725 			buffer[offset + 0] = d0;
3726 			buffer[offset + 1] = d1;
3727 			bufsize = offset + 2;
3728 		}
3729 
write(char_t d0,char_t d1,char_t d2)3730 		void write(char_t d0, char_t d1, char_t d2)
3731 		{
3732 			size_t offset = bufsize;
3733 			if (offset > bufcapacity - 3) offset = flush();
3734 
3735 			buffer[offset + 0] = d0;
3736 			buffer[offset + 1] = d1;
3737 			buffer[offset + 2] = d2;
3738 			bufsize = offset + 3;
3739 		}
3740 
write(char_t d0,char_t d1,char_t d2,char_t d3)3741 		void write(char_t d0, char_t d1, char_t d2, char_t d3)
3742 		{
3743 			size_t offset = bufsize;
3744 			if (offset > bufcapacity - 4) offset = flush();
3745 
3746 			buffer[offset + 0] = d0;
3747 			buffer[offset + 1] = d1;
3748 			buffer[offset + 2] = d2;
3749 			buffer[offset + 3] = d3;
3750 			bufsize = offset + 4;
3751 		}
3752 
write(char_t d0,char_t d1,char_t d2,char_t d3,char_t d4)3753 		void write(char_t d0, char_t d1, char_t d2, char_t d3, char_t d4)
3754 		{
3755 			size_t offset = bufsize;
3756 			if (offset > bufcapacity - 5) offset = flush();
3757 
3758 			buffer[offset + 0] = d0;
3759 			buffer[offset + 1] = d1;
3760 			buffer[offset + 2] = d2;
3761 			buffer[offset + 3] = d3;
3762 			buffer[offset + 4] = d4;
3763 			bufsize = offset + 5;
3764 		}
3765 
write(char_t d0,char_t d1,char_t d2,char_t d3,char_t d4,char_t d5)3766 		void write(char_t d0, char_t d1, char_t d2, char_t d3, char_t d4, char_t d5)
3767 		{
3768 			size_t offset = bufsize;
3769 			if (offset > bufcapacity - 6) offset = flush();
3770 
3771 			buffer[offset + 0] = d0;
3772 			buffer[offset + 1] = d1;
3773 			buffer[offset + 2] = d2;
3774 			buffer[offset + 3] = d3;
3775 			buffer[offset + 4] = d4;
3776 			buffer[offset + 5] = d5;
3777 			bufsize = offset + 6;
3778 		}
3779 
3780 		// utf8 maximum expansion: x4 (-> utf32)
3781 		// utf16 maximum expansion: x2 (-> utf32)
3782 		// utf32 maximum expansion: x1
3783 		enum
3784 		{
3785 			bufcapacitybytes =
3786 			#ifdef PUGIXML_MEMORY_OUTPUT_STACK
3787 				PUGIXML_MEMORY_OUTPUT_STACK
3788 			#else
3789 				10240
3790 			#endif
3791 			,
3792 			bufcapacity = bufcapacitybytes / (sizeof(char_t) + 4)
3793 		};
3794 
3795 		char_t buffer[bufcapacity];
3796 
3797 		union
3798 		{
3799 			uint8_t data_u8[4 * bufcapacity];
3800 			uint16_t data_u16[2 * bufcapacity];
3801 			uint32_t data_u32[bufcapacity];
3802 			char_t data_char[bufcapacity];
3803 		} scratch;
3804 
3805 		xml_writer& writer;
3806 		size_t bufsize;
3807 		xml_encoding encoding;
3808 	};
3809 
text_output_escaped(xml_buffered_writer & writer,const char_t * s,chartypex_t type)3810 	PUGI__FN void text_output_escaped(xml_buffered_writer& writer, const char_t* s, chartypex_t type)
3811 	{
3812 		while (*s)
3813 		{
3814 			const char_t* prev = s;
3815 
3816 			// While *s is a usual symbol
3817 			PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPEX(ss, type));
3818 
3819 			writer.write_buffer(prev, static_cast<size_t>(s - prev));
3820 
3821 			switch (*s)
3822 			{
3823 				case 0: break;
3824 				case '&':
3825 					writer.write('&', 'a', 'm', 'p', ';');
3826 					++s;
3827 					break;
3828 				case '<':
3829 					writer.write('&', 'l', 't', ';');
3830 					++s;
3831 					break;
3832 				case '>':
3833 					writer.write('&', 'g', 't', ';');
3834 					++s;
3835 					break;
3836 				case '"':
3837 					writer.write('&', 'q', 'u', 'o', 't', ';');
3838 					++s;
3839 					break;
3840 				default: // s is not a usual symbol
3841 				{
3842 					unsigned int ch = static_cast<unsigned int>(*s++);
3843 					assert(ch < 32);
3844 
3845 					writer.write('&', '#', static_cast<char_t>((ch / 10) + '0'), static_cast<char_t>((ch % 10) + '0'), ';');
3846 				}
3847 			}
3848 		}
3849 	}
3850 
text_output(xml_buffered_writer & writer,const char_t * s,chartypex_t type,unsigned int flags)3851 	PUGI__FN void text_output(xml_buffered_writer& writer, const char_t* s, chartypex_t type, unsigned int flags)
3852 	{
3853 		if (flags & format_no_escapes)
3854 			writer.write_string(s);
3855 		else
3856 			text_output_escaped(writer, s, type);
3857 	}
3858 
text_output_cdata(xml_buffered_writer & writer,const char_t * s)3859 	PUGI__FN void text_output_cdata(xml_buffered_writer& writer, const char_t* s)
3860 	{
3861 		do
3862 		{
3863 			writer.write('<', '!', '[', 'C', 'D');
3864 			writer.write('A', 'T', 'A', '[');
3865 
3866 			const char_t* prev = s;
3867 
3868 			// look for ]]> sequence - we can't output it as is since it terminates CDATA
3869 			while (*s && !(s[0] == ']' && s[1] == ']' && s[2] == '>')) ++s;
3870 
3871 			// skip ]] if we stopped at ]]>, > will go to the next CDATA section
3872 			if (*s) s += 2;
3873 
3874 			writer.write_buffer(prev, static_cast<size_t>(s - prev));
3875 
3876 			writer.write(']', ']', '>');
3877 		}
3878 		while (*s);
3879 	}
3880 
text_output_indent(xml_buffered_writer & writer,const char_t * indent,size_t indent_length,unsigned int depth)3881 	PUGI__FN void text_output_indent(xml_buffered_writer& writer, const char_t* indent, size_t indent_length, unsigned int depth)
3882 	{
3883 		switch (indent_length)
3884 		{
3885 		case 1:
3886 		{
3887 			for (unsigned int i = 0; i < depth; ++i)
3888 				writer.write(indent[0]);
3889 			break;
3890 		}
3891 
3892 		case 2:
3893 		{
3894 			for (unsigned int i = 0; i < depth; ++i)
3895 				writer.write(indent[0], indent[1]);
3896 			break;
3897 		}
3898 
3899 		case 3:
3900 		{
3901 			for (unsigned int i = 0; i < depth; ++i)
3902 				writer.write(indent[0], indent[1], indent[2]);
3903 			break;
3904 		}
3905 
3906 		case 4:
3907 		{
3908 			for (unsigned int i = 0; i < depth; ++i)
3909 				writer.write(indent[0], indent[1], indent[2], indent[3]);
3910 			break;
3911 		}
3912 
3913 		default:
3914 		{
3915 			for (unsigned int i = 0; i < depth; ++i)
3916 				writer.write_buffer(indent, indent_length);
3917 		}
3918 		}
3919 	}
3920 
node_output_comment(xml_buffered_writer & writer,const char_t * s)3921 	PUGI__FN void node_output_comment(xml_buffered_writer& writer, const char_t* s)
3922 	{
3923 		writer.write('<', '!', '-', '-');
3924 
3925 		while (*s)
3926 		{
3927 			const char_t* prev = s;
3928 
3929 			// look for -\0 or -- sequence - we can't output it since -- is illegal in comment body
3930 			while (*s && !(s[0] == '-' && (s[1] == '-' || s[1] == 0))) ++s;
3931 
3932 			writer.write_buffer(prev, static_cast<size_t>(s - prev));
3933 
3934 			if (*s)
3935 			{
3936 				assert(*s == '-');
3937 
3938 				writer.write('-', ' ');
3939 				++s;
3940 			}
3941 		}
3942 
3943 		writer.write('-', '-', '>');
3944 	}
3945 
node_output_pi_value(xml_buffered_writer & writer,const char_t * s)3946 	PUGI__FN void node_output_pi_value(xml_buffered_writer& writer, const char_t* s)
3947 	{
3948 		while (*s)
3949 		{
3950 			const char_t* prev = s;
3951 
3952 			// look for ?> sequence - we can't output it since ?> terminates PI
3953 			while (*s && !(s[0] == '?' && s[1] == '>')) ++s;
3954 
3955 			writer.write_buffer(prev, static_cast<size_t>(s - prev));
3956 
3957 			if (*s)
3958 			{
3959 				assert(s[0] == '?' && s[1] == '>');
3960 
3961 				writer.write('?', ' ', '>');
3962 				s += 2;
3963 			}
3964 		}
3965 	}
3966 
node_output_attributes(xml_buffered_writer & writer,xml_node_struct * node,const char_t * indent,size_t indent_length,unsigned int flags,unsigned int depth)3967 	PUGI__FN void node_output_attributes(xml_buffered_writer& writer, xml_node_struct* node, const char_t* indent, size_t indent_length, unsigned int flags, unsigned int depth)
3968 	{
3969 		const char_t* default_name = PUGIXML_TEXT(":anonymous");
3970 
3971 		for (xml_attribute_struct* a = node->first_attribute; a; a = a->next_attribute)
3972 		{
3973 			if ((flags & (format_indent_attributes | format_raw)) == format_indent_attributes)
3974 			{
3975 				writer.write('\n');
3976 
3977 				text_output_indent(writer, indent, indent_length, depth + 1);
3978 			}
3979 			else
3980 			{
3981 				writer.write(' ');
3982 			}
3983 
3984 			writer.write_string(a->name ? a->name + 0 : default_name);
3985 			writer.write('=', '"');
3986 
3987 			if (a->value)
3988 				text_output(writer, a->value, ctx_special_attr, flags);
3989 
3990 			writer.write('"');
3991 		}
3992 	}
3993 
node_output_start(xml_buffered_writer & writer,xml_node_struct * node,const char_t * indent,size_t indent_length,unsigned int flags,unsigned int depth)3994 	PUGI__FN bool node_output_start(xml_buffered_writer& writer, xml_node_struct* node, const char_t* indent, size_t indent_length, unsigned int flags, unsigned int depth)
3995 	{
3996 		const char_t* default_name = PUGIXML_TEXT(":anonymous");
3997 		const char_t* name = node->name ? node->name + 0 : default_name;
3998 
3999 		writer.write('<');
4000 		writer.write_string(name);
4001 
4002 		if (node->first_attribute)
4003 			node_output_attributes(writer, node, indent, indent_length, flags, depth);
4004 
4005 		if (!node->first_child)
4006 		{
4007 			writer.write(' ', '/', '>');
4008 
4009 			return false;
4010 		}
4011 		else
4012 		{
4013 			writer.write('>');
4014 
4015 			return true;
4016 		}
4017 	}
4018 
node_output_end(xml_buffered_writer & writer,xml_node_struct * node)4019 	PUGI__FN void node_output_end(xml_buffered_writer& writer, xml_node_struct* node)
4020 	{
4021 		const char_t* default_name = PUGIXML_TEXT(":anonymous");
4022 		const char_t* name = node->name ? node->name + 0 : default_name;
4023 
4024 		writer.write('<', '/');
4025 		writer.write_string(name);
4026 		writer.write('>');
4027 	}
4028 
node_output_simple(xml_buffered_writer & writer,xml_node_struct * node,unsigned int flags)4029 	PUGI__FN void node_output_simple(xml_buffered_writer& writer, xml_node_struct* node, unsigned int flags)
4030 	{
4031 		const char_t* default_name = PUGIXML_TEXT(":anonymous");
4032 
4033 		switch (PUGI__NODETYPE(node))
4034 		{
4035 			case node_pcdata:
4036 				text_output(writer, node->value ? node->value + 0 : PUGIXML_TEXT(""), ctx_special_pcdata, flags);
4037 				break;
4038 
4039 			case node_cdata:
4040 				text_output_cdata(writer, node->value ? node->value + 0 : PUGIXML_TEXT(""));
4041 				break;
4042 
4043 			case node_comment:
4044 				node_output_comment(writer, node->value ? node->value + 0 : PUGIXML_TEXT(""));
4045 				break;
4046 
4047 			case node_pi:
4048 				writer.write('<', '?');
4049 				writer.write_string(node->name ? node->name + 0 : default_name);
4050 
4051 				if (node->value)
4052 				{
4053 					writer.write(' ');
4054 					node_output_pi_value(writer, node->value);
4055 				}
4056 
4057 				writer.write('?', '>');
4058 				break;
4059 
4060 			case node_declaration:
4061 				writer.write('<', '?');
4062 				writer.write_string(node->name ? node->name + 0 : default_name);
4063 				node_output_attributes(writer, node, PUGIXML_TEXT(""), 0, flags | format_raw, 0);
4064 				writer.write('?', '>');
4065 				break;
4066 
4067 			case node_doctype:
4068 				writer.write('<', '!', 'D', 'O', 'C');
4069 				writer.write('T', 'Y', 'P', 'E');
4070 
4071 				if (node->value)
4072 				{
4073 					writer.write(' ');
4074 					writer.write_string(node->value);
4075 				}
4076 
4077 				writer.write('>');
4078 				break;
4079 
4080 			default:
4081 				assert(!"Invalid node type");
4082 		}
4083 	}
4084 
4085 	enum indent_flags_t
4086 	{
4087 		indent_newline = 1,
4088 		indent_indent = 2
4089 	};
4090 
node_output(xml_buffered_writer & writer,xml_node_struct * root,const char_t * indent,unsigned int flags,unsigned int depth)4091 	PUGI__FN void node_output(xml_buffered_writer& writer, xml_node_struct* root, const char_t* indent, unsigned int flags, unsigned int depth)
4092 	{
4093 		size_t indent_length = ((flags & (format_indent | format_indent_attributes)) && (flags & format_raw) == 0) ? strlength(indent) : 0;
4094 		unsigned int indent_flags = indent_indent;
4095 
4096 		xml_node_struct* node = root;
4097 
4098 		do
4099 		{
4100 			assert(node);
4101 
4102 			// begin writing current node
4103 			if (PUGI__NODETYPE(node) == node_pcdata || PUGI__NODETYPE(node) == node_cdata)
4104 			{
4105 				node_output_simple(writer, node, flags);
4106 
4107 				indent_flags = 0;
4108 			}
4109 			else
4110 			{
4111 				if ((indent_flags & indent_newline) && (flags & format_raw) == 0)
4112 					writer.write('\n');
4113 
4114 				if ((indent_flags & indent_indent) && indent_length)
4115 					text_output_indent(writer, indent, indent_length, depth);
4116 
4117 				if (PUGI__NODETYPE(node) == node_element)
4118 				{
4119 					indent_flags = indent_newline | indent_indent;
4120 
4121 					if (node_output_start(writer, node, indent, indent_length, flags, depth))
4122 					{
4123 						node = node->first_child;
4124 						depth++;
4125 						continue;
4126 					}
4127 				}
4128 				else if (PUGI__NODETYPE(node) == node_document)
4129 				{
4130 					indent_flags = indent_indent;
4131 
4132 					if (node->first_child)
4133 					{
4134 						node = node->first_child;
4135 						continue;
4136 					}
4137 				}
4138 				else
4139 				{
4140 					node_output_simple(writer, node, flags);
4141 
4142 					indent_flags = indent_newline | indent_indent;
4143 				}
4144 			}
4145 
4146 			// continue to the next node
4147 			while (node != root)
4148 			{
4149 				if (node->next_sibling)
4150 				{
4151 					node = node->next_sibling;
4152 					break;
4153 				}
4154 
4155 				node = node->parent;
4156 
4157 				// write closing node
4158 				if (PUGI__NODETYPE(node) == node_element)
4159 				{
4160 					depth--;
4161 
4162 					if ((indent_flags & indent_newline) && (flags & format_raw) == 0)
4163 						writer.write('\n');
4164 
4165 					if ((indent_flags & indent_indent) && indent_length)
4166 						text_output_indent(writer, indent, indent_length, depth);
4167 
4168 					node_output_end(writer, node);
4169 
4170 					indent_flags = indent_newline | indent_indent;
4171 				}
4172 			}
4173 		}
4174 		while (node != root);
4175 
4176 		if ((indent_flags & indent_newline) && (flags & format_raw) == 0)
4177 			writer.write('\n');
4178 	}
4179 
has_declaration(xml_node_struct * node)4180 	PUGI__FN bool has_declaration(xml_node_struct* node)
4181 	{
4182 		for (xml_node_struct* child = node->first_child; child; child = child->next_sibling)
4183 		{
4184 			xml_node_type type = PUGI__NODETYPE(child);
4185 
4186 			if (type == node_declaration) return true;
4187 			if (type == node_element) return false;
4188 		}
4189 
4190 		return false;
4191 	}
4192 
is_attribute_of(xml_attribute_struct * attr,xml_node_struct * node)4193 	PUGI__FN bool is_attribute_of(xml_attribute_struct* attr, xml_node_struct* node)
4194 	{
4195 		for (xml_attribute_struct* a = node->first_attribute; a; a = a->next_attribute)
4196 			if (a == attr)
4197 				return true;
4198 
4199 		return false;
4200 	}
4201 
allow_insert_attribute(xml_node_type parent)4202 	PUGI__FN bool allow_insert_attribute(xml_node_type parent)
4203 	{
4204 		return parent == node_element || parent == node_declaration;
4205 	}
4206 
allow_insert_child(xml_node_type parent,xml_node_type child)4207 	PUGI__FN bool allow_insert_child(xml_node_type parent, xml_node_type child)
4208 	{
4209 		if (parent != node_document && parent != node_element) return false;
4210 		if (child == node_document || child == node_null) return false;
4211 		if (parent != node_document && (child == node_declaration || child == node_doctype)) return false;
4212 
4213 		return true;
4214 	}
4215 
allow_move(xml_node parent,xml_node child)4216 	PUGI__FN bool allow_move(xml_node parent, xml_node child)
4217 	{
4218 		// check that child can be a child of parent
4219 		if (!allow_insert_child(parent.type(), child.type()))
4220 			return false;
4221 
4222 		// check that node is not moved between documents
4223 		if (parent.root() != child.root())
4224 			return false;
4225 
4226 		// check that new parent is not in the child subtree
4227 		xml_node cur = parent;
4228 
4229 		while (cur)
4230 		{
4231 			if (cur == child)
4232 				return false;
4233 
4234 			cur = cur.parent();
4235 		}
4236 
4237 		return true;
4238 	}
4239 
4240 	template <typename String, typename Header>
node_copy_string(String & dest,Header & header,uintptr_t header_mask,char_t * source,Header & source_header,xml_allocator * alloc)4241 	PUGI__FN void node_copy_string(String& dest, Header& header, uintptr_t header_mask, char_t* source, Header& source_header, xml_allocator* alloc)
4242 	{
4243 		assert(!dest && (header & header_mask) == 0);
4244 
4245 		if (source)
4246 		{
4247 			if (alloc && (source_header & header_mask) == 0)
4248 			{
4249 				dest = source;
4250 
4251 				// since strcpy_insitu can reuse document buffer memory we need to mark both source and dest as shared
4252 				header |= xml_memory_page_contents_shared_mask;
4253 				source_header |= xml_memory_page_contents_shared_mask;
4254 			}
4255 			else
4256 				strcpy_insitu(dest, header, header_mask, source, strlength(source));
4257 		}
4258 	}
4259 
node_copy_contents(xml_node_struct * dn,xml_node_struct * sn,xml_allocator * shared_alloc)4260 	PUGI__FN void node_copy_contents(xml_node_struct* dn, xml_node_struct* sn, xml_allocator* shared_alloc)
4261 	{
4262 		node_copy_string(dn->name, dn->header, xml_memory_page_name_allocated_mask, sn->name, sn->header, shared_alloc);
4263 		node_copy_string(dn->value, dn->header, xml_memory_page_value_allocated_mask, sn->value, sn->header, shared_alloc);
4264 
4265 		for (xml_attribute_struct* sa = sn->first_attribute; sa; sa = sa->next_attribute)
4266 		{
4267 			xml_attribute_struct* da = append_new_attribute(dn, get_allocator(dn));
4268 
4269 			if (da)
4270 			{
4271 				node_copy_string(da->name, da->header, xml_memory_page_name_allocated_mask, sa->name, sa->header, shared_alloc);
4272 				node_copy_string(da->value, da->header, xml_memory_page_value_allocated_mask, sa->value, sa->header, shared_alloc);
4273 			}
4274 		}
4275 	}
4276 
node_copy_tree(xml_node_struct * dn,xml_node_struct * sn)4277 	PUGI__FN void node_copy_tree(xml_node_struct* dn, xml_node_struct* sn)
4278 	{
4279 		xml_allocator& alloc = get_allocator(dn);
4280 		xml_allocator* shared_alloc = (&alloc == &get_allocator(sn)) ? &alloc : 0;
4281 
4282 		node_copy_contents(dn, sn, shared_alloc);
4283 
4284 		xml_node_struct* dit = dn;
4285 		xml_node_struct* sit = sn->first_child;
4286 
4287 		while (sit && sit != sn)
4288 		{
4289 			if (sit != dn)
4290 			{
4291 				xml_node_struct* copy = append_new_node(dit, alloc, PUGI__NODETYPE(sit));
4292 
4293 				if (copy)
4294 				{
4295 					node_copy_contents(copy, sit, shared_alloc);
4296 
4297 					if (sit->first_child)
4298 					{
4299 						dit = copy;
4300 						sit = sit->first_child;
4301 						continue;
4302 					}
4303 				}
4304 			}
4305 
4306 			// continue to the next node
4307 			do
4308 			{
4309 				if (sit->next_sibling)
4310 				{
4311 					sit = sit->next_sibling;
4312 					break;
4313 				}
4314 
4315 				sit = sit->parent;
4316 				dit = dit->parent;
4317 			}
4318 			while (sit != sn);
4319 		}
4320 	}
4321 
node_copy_attribute(xml_attribute_struct * da,xml_attribute_struct * sa)4322 	PUGI__FN void node_copy_attribute(xml_attribute_struct* da, xml_attribute_struct* sa)
4323 	{
4324 		xml_allocator& alloc = get_allocator(da);
4325 		xml_allocator* shared_alloc = (&alloc == &get_allocator(sa)) ? &alloc : 0;
4326 
4327 		node_copy_string(da->name, da->header, xml_memory_page_name_allocated_mask, sa->name, sa->header, shared_alloc);
4328 		node_copy_string(da->value, da->header, xml_memory_page_value_allocated_mask, sa->value, sa->header, shared_alloc);
4329 	}
4330 
is_text_node(xml_node_struct * node)4331 	inline bool is_text_node(xml_node_struct* node)
4332 	{
4333 		xml_node_type type = PUGI__NODETYPE(node);
4334 
4335 		return type == node_pcdata || type == node_cdata;
4336 	}
4337 
4338 	// get value with conversion functions
string_to_integer(const char_t * value,U minneg,U maxpos)4339 	template <typename U> U string_to_integer(const char_t* value, U minneg, U maxpos)
4340 	{
4341 		U result = 0;
4342 		const char_t* s = value;
4343 
4344 		while (PUGI__IS_CHARTYPE(*s, ct_space))
4345 			s++;
4346 
4347 		bool negative = (*s == '-');
4348 
4349 		s += (*s == '+' || *s == '-');
4350 
4351 		bool overflow = false;
4352 
4353 		if (s[0] == '0' && (s[1] | ' ') == 'x')
4354 		{
4355 			s += 2;
4356 
4357 			// since overflow detection relies on length of the sequence skip leading zeros
4358 			while (*s == '0')
4359 				s++;
4360 
4361 			const char_t* start = s;
4362 
4363 			for (;;)
4364 			{
4365 				if (static_cast<unsigned>(*s - '0') < 10)
4366 					result = result * 16 + (*s - '0');
4367 				else if (static_cast<unsigned>((*s | ' ') - 'a') < 6)
4368 					result = result * 16 + ((*s | ' ') - 'a' + 10);
4369 				else
4370 					break;
4371 
4372 				s++;
4373 			}
4374 
4375 			size_t digits = static_cast<size_t>(s - start);
4376 
4377 			overflow = digits > sizeof(U) * 2;
4378 		}
4379 		else
4380 		{
4381 			// since overflow detection relies on length of the sequence skip leading zeros
4382 			while (*s == '0')
4383 				s++;
4384 
4385 			const char_t* start = s;
4386 
4387 			for (;;)
4388 			{
4389 				if (static_cast<unsigned>(*s - '0') < 10)
4390 					result = result * 10 + (*s - '0');
4391 				else
4392 					break;
4393 
4394 				s++;
4395 			}
4396 
4397 			size_t digits = static_cast<size_t>(s - start);
4398 
4399 			PUGI__STATIC_ASSERT(sizeof(U) == 8 || sizeof(U) == 4 || sizeof(U) == 2);
4400 
4401 			const size_t max_digits10 = sizeof(U) == 8 ? 20 : sizeof(U) == 4 ? 10 : 5;
4402 			const char_t max_lead = sizeof(U) == 8 ? '1' : sizeof(U) == 4 ? '4' : '6';
4403 			const size_t high_bit = sizeof(U) * 8 - 1;
4404 
4405 			overflow = digits >= max_digits10 && !(digits == max_digits10 && (*start < max_lead || (*start == max_lead && result >> high_bit)));
4406 		}
4407 
4408 		if (negative)
4409 			return (overflow || result > minneg) ? 0 - minneg : 0 - result;
4410 		else
4411 			return (overflow || result > maxpos) ? maxpos : result;
4412 	}
4413 
get_value_int(const char_t * value)4414 	PUGI__FN int get_value_int(const char_t* value)
4415 	{
4416 		return string_to_integer<unsigned int>(value, 0 - static_cast<unsigned int>(INT_MIN), INT_MAX);
4417 	}
4418 
get_value_uint(const char_t * value)4419 	PUGI__FN unsigned int get_value_uint(const char_t* value)
4420 	{
4421 		return string_to_integer<unsigned int>(value, 0, UINT_MAX);
4422 	}
4423 
get_value_double(const char_t * value)4424 	PUGI__FN double get_value_double(const char_t* value)
4425 	{
4426 	#ifdef PUGIXML_WCHAR_MODE
4427 		return wcstod(value, 0);
4428 	#else
4429 		return strtod(value, 0);
4430 	#endif
4431 	}
4432 
get_value_float(const char_t * value)4433 	PUGI__FN float get_value_float(const char_t* value)
4434 	{
4435 	#ifdef PUGIXML_WCHAR_MODE
4436 		return static_cast<float>(wcstod(value, 0));
4437 	#else
4438 		return static_cast<float>(strtod(value, 0));
4439 	#endif
4440 	}
4441 
get_value_bool(const char_t * value)4442 	PUGI__FN bool get_value_bool(const char_t* value)
4443 	{
4444 		// only look at first char
4445 		char_t first = *value;
4446 
4447 		// 1*, t* (true), T* (True), y* (yes), Y* (YES)
4448 		return (first == '1' || first == 't' || first == 'T' || first == 'y' || first == 'Y');
4449 	}
4450 
4451 #ifdef PUGIXML_HAS_LONG_LONG
get_value_llong(const char_t * value)4452 	PUGI__FN long long get_value_llong(const char_t* value)
4453 	{
4454 		return string_to_integer<unsigned long long>(value, 0 - static_cast<unsigned long long>(LLONG_MIN), LLONG_MAX);
4455 	}
4456 
get_value_ullong(const char_t * value)4457 	PUGI__FN unsigned long long get_value_ullong(const char_t* value)
4458 	{
4459 		return string_to_integer<unsigned long long>(value, 0, ULLONG_MAX);
4460 	}
4461 #endif
4462 
4463 	template <typename U>
integer_to_string(char_t * begin,char_t * end,U value,bool negative)4464 	PUGI__FN char_t* integer_to_string(char_t* begin, char_t* end, U value, bool negative)
4465 	{
4466 		char_t* result = end - 1;
4467 		U rest = negative ? 0 - value : value;
4468 
4469 		do
4470 		{
4471 			*result-- = static_cast<char_t>('0' + (rest % 10));
4472 			rest /= 10;
4473 		}
4474 		while (rest);
4475 
4476 		assert(result >= begin);
4477 		(void)begin;
4478 
4479 		*result = '-';
4480 
4481 		return result + !negative;
4482 	}
4483 
4484 	// set value with conversion functions
4485 	template <typename String, typename Header>
set_value_ascii(String & dest,Header & header,uintptr_t header_mask,char * buf)4486 	PUGI__FN bool set_value_ascii(String& dest, Header& header, uintptr_t header_mask, char* buf)
4487 	{
4488 	#ifdef PUGIXML_WCHAR_MODE
4489 		char_t wbuf[128];
4490 		assert(strlen(buf) < sizeof(wbuf) / sizeof(wbuf[0]));
4491 
4492 		size_t offset = 0;
4493 		for (; buf[offset]; ++offset) wbuf[offset] = buf[offset];
4494 
4495 		return strcpy_insitu(dest, header, header_mask, wbuf, offset);
4496 	#else
4497 		return strcpy_insitu(dest, header, header_mask, buf, strlen(buf));
4498 	#endif
4499 	}
4500 
4501 	template <typename String, typename Header>
set_value_convert(String & dest,Header & header,uintptr_t header_mask,int value)4502 	PUGI__FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, int value)
4503 	{
4504 		char_t buf[64];
4505 		char_t* end = buf + sizeof(buf) / sizeof(buf[0]);
4506 		char_t* begin = integer_to_string<unsigned int>(buf, end, value, value < 0);
4507 
4508 		return strcpy_insitu(dest, header, header_mask, begin, end - begin);
4509 	}
4510 
4511 	template <typename String, typename Header>
set_value_convert(String & dest,Header & header,uintptr_t header_mask,unsigned int value)4512 	PUGI__FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, unsigned int value)
4513 	{
4514 		char_t buf[64];
4515 		char_t* end = buf + sizeof(buf) / sizeof(buf[0]);
4516 		char_t* begin = integer_to_string<unsigned int>(buf, end, value, false);
4517 
4518 		return strcpy_insitu(dest, header, header_mask, begin, end - begin);
4519 	}
4520 
4521 	template <typename String, typename Header>
set_value_convert(String & dest,Header & header,uintptr_t header_mask,float value)4522 	PUGI__FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, float value)
4523 	{
4524 		char buf[128];
4525 		sprintf(buf, "%.9g", value);
4526 
4527 		return set_value_ascii(dest, header, header_mask, buf);
4528 	}
4529 
4530 	template <typename String, typename Header>
set_value_convert(String & dest,Header & header,uintptr_t header_mask,double value)4531 	PUGI__FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, double value)
4532 	{
4533 		char buf[128];
4534 		sprintf(buf, "%.17g", value);
4535 
4536 		return set_value_ascii(dest, header, header_mask, buf);
4537 	}
4538 
4539 	template <typename String, typename Header>
set_value_convert(String & dest,Header & header,uintptr_t header_mask,bool value)4540 	PUGI__FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, bool value)
4541 	{
4542 		return strcpy_insitu(dest, header, header_mask, value ? PUGIXML_TEXT("true") : PUGIXML_TEXT("false"), value ? 4 : 5);
4543 	}
4544 
4545 #ifdef PUGIXML_HAS_LONG_LONG
4546 	template <typename String, typename Header>
set_value_convert(String & dest,Header & header,uintptr_t header_mask,long long value)4547 	PUGI__FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, long long value)
4548 	{
4549 		char_t buf[64];
4550 		char_t* end = buf + sizeof(buf) / sizeof(buf[0]);
4551 		char_t* begin = integer_to_string<unsigned long long>(buf, end, value, value < 0);
4552 
4553 		return strcpy_insitu(dest, header, header_mask, begin, end - begin);
4554 	}
4555 
4556 	template <typename String, typename Header>
set_value_convert(String & dest,Header & header,uintptr_t header_mask,unsigned long long value)4557 	PUGI__FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, unsigned long long value)
4558 	{
4559 		char_t buf[64];
4560 		char_t* end = buf + sizeof(buf) / sizeof(buf[0]);
4561 		char_t* begin = integer_to_string<unsigned long long>(buf, end, value, false);
4562 
4563 		return strcpy_insitu(dest, header, header_mask, begin, end - begin);
4564 	}
4565 #endif
4566 
load_buffer_impl(xml_document_struct * doc,xml_node_struct * root,void * contents,size_t size,unsigned int options,xml_encoding encoding,bool is_mutable,bool own,char_t ** out_buffer)4567 	PUGI__FN xml_parse_result load_buffer_impl(xml_document_struct* doc, xml_node_struct* root, void* contents, size_t size, unsigned int options, xml_encoding encoding, bool is_mutable, bool own, char_t** out_buffer)
4568 	{
4569 		// check input buffer
4570 		if (!contents && size) return make_parse_result(status_io_error);
4571 
4572 		// get actual encoding
4573 		xml_encoding buffer_encoding = impl::get_buffer_encoding(encoding, contents, size);
4574 
4575 		// get private buffer
4576 		char_t* buffer = 0;
4577 		size_t length = 0;
4578 
4579 		if (!impl::convert_buffer(buffer, length, buffer_encoding, contents, size, is_mutable)) return impl::make_parse_result(status_out_of_memory);
4580 
4581 		// delete original buffer if we performed a conversion
4582 		if (own && buffer != contents && contents) impl::xml_memory::deallocate(contents);
4583 
4584 		// grab onto buffer if it's our buffer, user is responsible for deallocating contents himself
4585 		if (own || buffer != contents) *out_buffer = buffer;
4586 
4587 		// store buffer for offset_debug
4588 		doc->buffer = buffer;
4589 
4590 		// parse
4591 		xml_parse_result res = impl::xml_parser::parse(buffer, length, doc, root, options);
4592 
4593 		// remember encoding
4594 		res.encoding = buffer_encoding;
4595 
4596 		return res;
4597 	}
4598 
4599 	// we need to get length of entire file to load it in memory; the only (relatively) sane way to do it is via seek/tell trick
get_file_size(FILE * file,size_t & out_result)4600 	PUGI__FN xml_parse_status get_file_size(FILE* file, size_t& out_result)
4601 	{
4602 	#if defined(PUGI__MSVC_CRT_VERSION) && PUGI__MSVC_CRT_VERSION >= 1400 && !defined(_WIN32_WCE)
4603 		// there are 64-bit versions of fseek/ftell, let's use them
4604 		typedef __int64 length_type;
4605 
4606 		_fseeki64(file, 0, SEEK_END);
4607 		length_type length = _ftelli64(file);
4608 		_fseeki64(file, 0, SEEK_SET);
4609 	#elif defined(__MINGW32__) && !defined(__NO_MINGW_LFS) && (!defined(__STRICT_ANSI__) || defined(__MINGW64_VERSION_MAJOR))
4610 		// there are 64-bit versions of fseek/ftell, let's use them
4611 		typedef off64_t length_type;
4612 
4613 		fseeko64(file, 0, SEEK_END);
4614 		length_type length = ftello64(file);
4615 		fseeko64(file, 0, SEEK_SET);
4616 	#else
4617 		// if this is a 32-bit OS, long is enough; if this is a unix system, long is 64-bit, which is enough; otherwise we can't do anything anyway.
4618 		typedef long length_type;
4619 
4620 		fseek(file, 0, SEEK_END);
4621 		length_type length = ftell(file);
4622 		fseek(file, 0, SEEK_SET);
4623 	#endif
4624 
4625 		// check for I/O errors
4626 		if (length < 0) return status_io_error;
4627 
4628 		// check for overflow
4629 		size_t result = static_cast<size_t>(length);
4630 
4631 		if (static_cast<length_type>(result) != length) return status_out_of_memory;
4632 
4633 		// finalize
4634 		out_result = result;
4635 
4636 		return status_ok;
4637 	}
4638 
4639 	// This function assumes that buffer has extra sizeof(char_t) writable bytes after size
zero_terminate_buffer(void * buffer,size_t size,xml_encoding encoding)4640 	PUGI__FN size_t zero_terminate_buffer(void* buffer, size_t size, xml_encoding encoding)
4641 	{
4642 		// We only need to zero-terminate if encoding conversion does not do it for us
4643 	#ifdef PUGIXML_WCHAR_MODE
4644 		xml_encoding wchar_encoding = get_wchar_encoding();
4645 
4646 		if (encoding == wchar_encoding || need_endian_swap_utf(encoding, wchar_encoding))
4647 		{
4648 			size_t length = size / sizeof(char_t);
4649 
4650 			static_cast<char_t*>(buffer)[length] = 0;
4651 			return (length + 1) * sizeof(char_t);
4652 		}
4653 	#else
4654 		if (encoding == encoding_utf8)
4655 		{
4656 			static_cast<char*>(buffer)[size] = 0;
4657 			return size + 1;
4658 		}
4659 	#endif
4660 
4661 		return size;
4662 	}
4663 
load_file_impl(xml_document_struct * doc,FILE * file,unsigned int options,xml_encoding encoding,char_t ** out_buffer)4664 	PUGI__FN xml_parse_result load_file_impl(xml_document_struct* doc, FILE* file, unsigned int options, xml_encoding encoding, char_t** out_buffer)
4665 	{
4666 		if (!file) return make_parse_result(status_file_not_found);
4667 
4668 		// get file size (can result in I/O errors)
4669 		size_t size = 0;
4670 		xml_parse_status size_status = get_file_size(file, size);
4671 		if (size_status != status_ok) return make_parse_result(size_status);
4672 
4673 		size_t max_suffix_size = sizeof(char_t);
4674 
4675 		// allocate buffer for the whole file
4676 		char* contents = static_cast<char*>(xml_memory::allocate(size + max_suffix_size));
4677 		if (!contents) return make_parse_result(status_out_of_memory);
4678 
4679 		// read file in memory
4680 		size_t read_size = fread(contents, 1, size, file);
4681 
4682 		if (read_size != size)
4683 		{
4684 			xml_memory::deallocate(contents);
4685 			return make_parse_result(status_io_error);
4686 		}
4687 
4688 		xml_encoding real_encoding = get_buffer_encoding(encoding, contents, size);
4689 
4690 		return load_buffer_impl(doc, doc, contents, zero_terminate_buffer(contents, size, real_encoding), options, real_encoding, true, true, out_buffer);
4691 	}
4692 
4693 #ifndef PUGIXML_NO_STL
4694 	template <typename T> struct xml_stream_chunk
4695 	{
createxml_stream_chunk4696 		static xml_stream_chunk* create()
4697 		{
4698 			void* memory = xml_memory::allocate(sizeof(xml_stream_chunk));
4699 			if (!memory) return 0;
4700 
4701 			return new (memory) xml_stream_chunk();
4702 		}
4703 
destroyxml_stream_chunk4704 		static void destroy(xml_stream_chunk* chunk)
4705 		{
4706 			// free chunk chain
4707 			while (chunk)
4708 			{
4709 				xml_stream_chunk* next_ = chunk->next;
4710 
4711 				xml_memory::deallocate(chunk);
4712 
4713 				chunk = next_;
4714 			}
4715 		}
4716 
xml_stream_chunkxml_stream_chunk4717 		xml_stream_chunk(): next(0), size(0)
4718 		{
4719 		}
4720 
4721 		xml_stream_chunk* next;
4722 		size_t size;
4723 
4724 		T data[xml_memory_page_size / sizeof(T)];
4725 	};
4726 
load_stream_data_noseek(std::basic_istream<T> & stream,void ** out_buffer,size_t * out_size)4727 	template <typename T> PUGI__FN xml_parse_status load_stream_data_noseek(std::basic_istream<T>& stream, void** out_buffer, size_t* out_size)
4728 	{
4729 		auto_deleter<xml_stream_chunk<T> > chunks(0, xml_stream_chunk<T>::destroy);
4730 
4731 		// read file to a chunk list
4732 		size_t total = 0;
4733 		xml_stream_chunk<T>* last = 0;
4734 
4735 		while (!stream.eof())
4736 		{
4737 			// allocate new chunk
4738 			xml_stream_chunk<T>* chunk = xml_stream_chunk<T>::create();
4739 			if (!chunk) return status_out_of_memory;
4740 
4741 			// append chunk to list
4742 			if (last) last = last->next = chunk;
4743 			else chunks.data = last = chunk;
4744 
4745 			// read data to chunk
4746 			stream.read(chunk->data, static_cast<std::streamsize>(sizeof(chunk->data) / sizeof(T)));
4747 			chunk->size = static_cast<size_t>(stream.gcount()) * sizeof(T);
4748 
4749 			// read may set failbit | eofbit in case gcount() is less than read length, so check for other I/O errors
4750 			if (stream.bad() || (!stream.eof() && stream.fail())) return status_io_error;
4751 
4752 			// guard against huge files (chunk size is small enough to make this overflow check work)
4753 			if (total + chunk->size < total) return status_out_of_memory;
4754 			total += chunk->size;
4755 		}
4756 
4757 		size_t max_suffix_size = sizeof(char_t);
4758 
4759 		// copy chunk list to a contiguous buffer
4760 		char* buffer = static_cast<char*>(xml_memory::allocate(total + max_suffix_size));
4761 		if (!buffer) return status_out_of_memory;
4762 
4763 		char* write = buffer;
4764 
4765 		for (xml_stream_chunk<T>* chunk = chunks.data; chunk; chunk = chunk->next)
4766 		{
4767 			assert(write + chunk->size <= buffer + total);
4768 			memcpy(write, chunk->data, chunk->size);
4769 			write += chunk->size;
4770 		}
4771 
4772 		assert(write == buffer + total);
4773 
4774 		// return buffer
4775 		*out_buffer = buffer;
4776 		*out_size = total;
4777 
4778 		return status_ok;
4779 	}
4780 
load_stream_data_seek(std::basic_istream<T> & stream,void ** out_buffer,size_t * out_size)4781 	template <typename T> PUGI__FN xml_parse_status load_stream_data_seek(std::basic_istream<T>& stream, void** out_buffer, size_t* out_size)
4782 	{
4783 		// get length of remaining data in stream
4784 		typename std::basic_istream<T>::pos_type pos = stream.tellg();
4785 		stream.seekg(0, std::ios::end);
4786 		std::streamoff length = stream.tellg() - pos;
4787 		stream.seekg(pos);
4788 
4789 		if (stream.fail() || pos < 0) return status_io_error;
4790 
4791 		// guard against huge files
4792 		size_t read_length = static_cast<size_t>(length);
4793 
4794 		if (static_cast<std::streamsize>(read_length) != length || length < 0) return status_out_of_memory;
4795 
4796 		size_t max_suffix_size = sizeof(char_t);
4797 
4798 		// read stream data into memory (guard against stream exceptions with buffer holder)
4799 		auto_deleter<void> buffer(xml_memory::allocate(read_length * sizeof(T) + max_suffix_size), xml_memory::deallocate);
4800 		if (!buffer.data) return status_out_of_memory;
4801 
4802 		stream.read(static_cast<T*>(buffer.data), static_cast<std::streamsize>(read_length));
4803 
4804 		// read may set failbit | eofbit in case gcount() is less than read_length (i.e. line ending conversion), so check for other I/O errors
4805 		if (stream.bad() || (!stream.eof() && stream.fail())) return status_io_error;
4806 
4807 		// return buffer
4808 		size_t actual_length = static_cast<size_t>(stream.gcount());
4809 		assert(actual_length <= read_length);
4810 
4811 		*out_buffer = buffer.release();
4812 		*out_size = actual_length * sizeof(T);
4813 
4814 		return status_ok;
4815 	}
4816 
load_stream_impl(xml_document_struct * doc,std::basic_istream<T> & stream,unsigned int options,xml_encoding encoding,char_t ** out_buffer)4817 	template <typename T> PUGI__FN xml_parse_result load_stream_impl(xml_document_struct* doc, std::basic_istream<T>& stream, unsigned int options, xml_encoding encoding, char_t** out_buffer)
4818 	{
4819 		void* buffer = 0;
4820 		size_t size = 0;
4821 		xml_parse_status status = status_ok;
4822 
4823 		// if stream has an error bit set, bail out (otherwise tellg() can fail and we'll clear error bits)
4824 		if (stream.fail()) return make_parse_result(status_io_error);
4825 
4826 		// load stream to memory (using seek-based implementation if possible, since it's faster and takes less memory)
4827 		if (stream.tellg() < 0)
4828 		{
4829 			stream.clear(); // clear error flags that could be set by a failing tellg
4830 			status = load_stream_data_noseek(stream, &buffer, &size);
4831 		}
4832 		else
4833 			status = load_stream_data_seek(stream, &buffer, &size);
4834 
4835 		if (status != status_ok) return make_parse_result(status);
4836 
4837 		xml_encoding real_encoding = get_buffer_encoding(encoding, buffer, size);
4838 
4839 		return load_buffer_impl(doc, doc, buffer, zero_terminate_buffer(buffer, size, real_encoding), options, real_encoding, true, true, out_buffer);
4840 	}
4841 #endif
4842 
4843 #if defined(PUGI__MSVC_CRT_VERSION) || defined(__BORLANDC__) || (defined(__MINGW32__) && (!defined(__STRICT_ANSI__) || defined(__MINGW64_VERSION_MAJOR)))
open_file_wide(const wchar_t * path,const wchar_t * mode)4844 	PUGI__FN FILE* open_file_wide(const wchar_t* path, const wchar_t* mode)
4845 	{
4846 		return _wfopen(path, mode);
4847 	}
4848 #else
convert_path_heap(const wchar_t * str)4849 	PUGI__FN char* convert_path_heap(const wchar_t* str)
4850 	{
4851 		assert(str);
4852 
4853 		// first pass: get length in utf8 characters
4854 		size_t length = strlength_wide(str);
4855 		size_t size = as_utf8_begin(str, length);
4856 
4857 		// allocate resulting string
4858 		char* result = static_cast<char*>(xml_memory::allocate(size + 1));
4859 		if (!result) return 0;
4860 
4861 		// second pass: convert to utf8
4862 		as_utf8_end(result, size, str, length);
4863 
4864 		// zero-terminate
4865 		result[size] = 0;
4866 
4867 		return result;
4868 	}
4869 
open_file_wide(const wchar_t * path,const wchar_t * mode)4870 	PUGI__FN FILE* open_file_wide(const wchar_t* path, const wchar_t* mode)
4871 	{
4872 		// there is no standard function to open wide paths, so our best bet is to try utf8 path
4873 		char* path_utf8 = convert_path_heap(path);
4874 		if (!path_utf8) return 0;
4875 
4876 		// convert mode to ASCII (we mirror _wfopen interface)
4877 		char mode_ascii[4] = {0};
4878 		for (size_t i = 0; mode[i]; ++i) mode_ascii[i] = static_cast<char>(mode[i]);
4879 
4880 		// try to open the utf8 path
4881 		FILE* result = fopen(path_utf8, mode_ascii);
4882 
4883 		// free dummy buffer
4884 		xml_memory::deallocate(path_utf8);
4885 
4886 		return result;
4887 	}
4888 #endif
4889 
save_file_impl(const xml_document & doc,FILE * file,const char_t * indent,unsigned int flags,xml_encoding encoding)4890 	PUGI__FN bool save_file_impl(const xml_document& doc, FILE* file, const char_t* indent, unsigned int flags, xml_encoding encoding)
4891 	{
4892 		if (!file) return false;
4893 
4894 		xml_writer_file writer(file);
4895 		doc.save(writer, indent, flags, encoding);
4896 
4897 		return ferror(file) == 0;
4898 	}
4899 
4900 	struct name_null_sentry
4901 	{
4902 		xml_node_struct* node;
4903 		char_t* name;
4904 
name_null_sentryname_null_sentry4905 		name_null_sentry(xml_node_struct* node_): node(node_), name(node_->name)
4906 		{
4907 			node->name = 0;
4908 		}
4909 
~name_null_sentryname_null_sentry4910 		~name_null_sentry()
4911 		{
4912 			node->name = name;
4913 		}
4914 	};
4915 PUGI__NS_END
4916 
4917 namespace pugi
4918 {
xml_writer_file(void * file_)4919 	PUGI__FN xml_writer_file::xml_writer_file(void* file_): file(file_)
4920 	{
4921 	}
4922 
write(const void * data,size_t size)4923 	PUGI__FN void xml_writer_file::write(const void* data, size_t size)
4924 	{
4925 		size_t result = fwrite(data, 1, size, static_cast<FILE*>(file));
4926 		(void)!result; // unfortunately we can't do proper error handling here
4927 	}
4928 
4929 #ifndef PUGIXML_NO_STL
xml_writer_stream(std::basic_ostream<char,std::char_traits<char>> & stream)4930 	PUGI__FN xml_writer_stream::xml_writer_stream(std::basic_ostream<char, std::char_traits<char> >& stream): narrow_stream(&stream), wide_stream(0)
4931 	{
4932 	}
4933 
xml_writer_stream(std::basic_ostream<wchar_t,std::char_traits<wchar_t>> & stream)4934 	PUGI__FN xml_writer_stream::xml_writer_stream(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream): narrow_stream(0), wide_stream(&stream)
4935 	{
4936 	}
4937 
write(const void * data,size_t size)4938 	PUGI__FN void xml_writer_stream::write(const void* data, size_t size)
4939 	{
4940 		if (narrow_stream)
4941 		{
4942 			assert(!wide_stream);
4943 			narrow_stream->write(reinterpret_cast<const char*>(data), static_cast<std::streamsize>(size));
4944 		}
4945 		else
4946 		{
4947 			assert(wide_stream);
4948 			assert(size % sizeof(wchar_t) == 0);
4949 
4950 			wide_stream->write(reinterpret_cast<const wchar_t*>(data), static_cast<std::streamsize>(size / sizeof(wchar_t)));
4951 		}
4952 	}
4953 #endif
4954 
xml_tree_walker()4955 	PUGI__FN xml_tree_walker::xml_tree_walker(): _depth(0)
4956 	{
4957 	}
4958 
~xml_tree_walker()4959 	PUGI__FN xml_tree_walker::~xml_tree_walker()
4960 	{
4961 	}
4962 
depth() const4963 	PUGI__FN int xml_tree_walker::depth() const
4964 	{
4965 		return _depth;
4966 	}
4967 
begin(xml_node &)4968 	PUGI__FN bool xml_tree_walker::begin(xml_node&)
4969 	{
4970 		return true;
4971 	}
4972 
end(xml_node &)4973 	PUGI__FN bool xml_tree_walker::end(xml_node&)
4974 	{
4975 		return true;
4976 	}
4977 
xml_attribute()4978 	PUGI__FN xml_attribute::xml_attribute(): _attr(0)
4979 	{
4980 	}
4981 
xml_attribute(xml_attribute_struct * attr)4982 	PUGI__FN xml_attribute::xml_attribute(xml_attribute_struct* attr): _attr(attr)
4983 	{
4984 	}
4985 
unspecified_bool_xml_attribute(xml_attribute ***)4986 	PUGI__FN static void unspecified_bool_xml_attribute(xml_attribute***)
4987 	{
4988 	}
4989 
operator xml_attribute::unspecified_bool_type() const4990 	PUGI__FN xml_attribute::operator xml_attribute::unspecified_bool_type() const
4991 	{
4992 		return _attr ? unspecified_bool_xml_attribute : 0;
4993 	}
4994 
operator !() const4995 	PUGI__FN bool xml_attribute::operator!() const
4996 	{
4997 		return !_attr;
4998 	}
4999 
operator ==(const xml_attribute & r) const5000 	PUGI__FN bool xml_attribute::operator==(const xml_attribute& r) const
5001 	{
5002 		return (_attr == r._attr);
5003 	}
5004 
operator !=(const xml_attribute & r) const5005 	PUGI__FN bool xml_attribute::operator!=(const xml_attribute& r) const
5006 	{
5007 		return (_attr != r._attr);
5008 	}
5009 
operator <(const xml_attribute & r) const5010 	PUGI__FN bool xml_attribute::operator<(const xml_attribute& r) const
5011 	{
5012 		return (_attr < r._attr);
5013 	}
5014 
operator >(const xml_attribute & r) const5015 	PUGI__FN bool xml_attribute::operator>(const xml_attribute& r) const
5016 	{
5017 		return (_attr > r._attr);
5018 	}
5019 
operator <=(const xml_attribute & r) const5020 	PUGI__FN bool xml_attribute::operator<=(const xml_attribute& r) const
5021 	{
5022 		return (_attr <= r._attr);
5023 	}
5024 
operator >=(const xml_attribute & r) const5025 	PUGI__FN bool xml_attribute::operator>=(const xml_attribute& r) const
5026 	{
5027 		return (_attr >= r._attr);
5028 	}
5029 
next_attribute() const5030 	PUGI__FN xml_attribute xml_attribute::next_attribute() const
5031 	{
5032 		return _attr ? xml_attribute(_attr->next_attribute) : xml_attribute();
5033 	}
5034 
previous_attribute() const5035 	PUGI__FN xml_attribute xml_attribute::previous_attribute() const
5036 	{
5037 		return _attr && _attr->prev_attribute_c->next_attribute ? xml_attribute(_attr->prev_attribute_c) : xml_attribute();
5038 	}
5039 
as_string(const char_t * def) const5040 	PUGI__FN const char_t* xml_attribute::as_string(const char_t* def) const
5041 	{
5042 		return (_attr && _attr->value) ? _attr->value + 0 : def;
5043 	}
5044 
as_int(int def) const5045 	PUGI__FN int xml_attribute::as_int(int def) const
5046 	{
5047 		return (_attr && _attr->value) ? impl::get_value_int(_attr->value) : def;
5048 	}
5049 
as_uint(unsigned int def) const5050 	PUGI__FN unsigned int xml_attribute::as_uint(unsigned int def) const
5051 	{
5052 		return (_attr && _attr->value) ? impl::get_value_uint(_attr->value) : def;
5053 	}
5054 
as_double(double def) const5055 	PUGI__FN double xml_attribute::as_double(double def) const
5056 	{
5057 		return (_attr && _attr->value) ? impl::get_value_double(_attr->value) : def;
5058 	}
5059 
as_float(float def) const5060 	PUGI__FN float xml_attribute::as_float(float def) const
5061 	{
5062 		return (_attr && _attr->value) ? impl::get_value_float(_attr->value) : def;
5063 	}
5064 
as_bool(bool def) const5065 	PUGI__FN bool xml_attribute::as_bool(bool def) const
5066 	{
5067 		return (_attr && _attr->value) ? impl::get_value_bool(_attr->value) : def;
5068 	}
5069 
5070 #ifdef PUGIXML_HAS_LONG_LONG
as_llong(long long def) const5071 	PUGI__FN long long xml_attribute::as_llong(long long def) const
5072 	{
5073 		return (_attr && _attr->value) ? impl::get_value_llong(_attr->value) : def;
5074 	}
5075 
as_ullong(unsigned long long def) const5076 	PUGI__FN unsigned long long xml_attribute::as_ullong(unsigned long long def) const
5077 	{
5078 		return (_attr && _attr->value) ? impl::get_value_ullong(_attr->value) : def;
5079 	}
5080 #endif
5081 
empty() const5082 	PUGI__FN bool xml_attribute::empty() const
5083 	{
5084 		return !_attr;
5085 	}
5086 
name() const5087 	PUGI__FN const char_t* xml_attribute::name() const
5088 	{
5089 		return (_attr && _attr->name) ? _attr->name + 0 : PUGIXML_TEXT("");
5090 	}
5091 
value() const5092 	PUGI__FN const char_t* xml_attribute::value() const
5093 	{
5094 		return (_attr && _attr->value) ? _attr->value + 0 : PUGIXML_TEXT("");
5095 	}
5096 
hash_value() const5097 	PUGI__FN size_t xml_attribute::hash_value() const
5098 	{
5099 		return static_cast<size_t>(reinterpret_cast<uintptr_t>(_attr) / sizeof(xml_attribute_struct));
5100 	}
5101 
internal_object() const5102 	PUGI__FN xml_attribute_struct* xml_attribute::internal_object() const
5103 	{
5104 		return _attr;
5105 	}
5106 
operator =(const char_t * rhs)5107 	PUGI__FN xml_attribute& xml_attribute::operator=(const char_t* rhs)
5108 	{
5109 		set_value(rhs);
5110 		return *this;
5111 	}
5112 
operator =(int rhs)5113 	PUGI__FN xml_attribute& xml_attribute::operator=(int rhs)
5114 	{
5115 		set_value(rhs);
5116 		return *this;
5117 	}
5118 
operator =(unsigned int rhs)5119 	PUGI__FN xml_attribute& xml_attribute::operator=(unsigned int rhs)
5120 	{
5121 		set_value(rhs);
5122 		return *this;
5123 	}
5124 
operator =(double rhs)5125 	PUGI__FN xml_attribute& xml_attribute::operator=(double rhs)
5126 	{
5127 		set_value(rhs);
5128 		return *this;
5129 	}
5130 
operator =(float rhs)5131 	PUGI__FN xml_attribute& xml_attribute::operator=(float rhs)
5132 	{
5133 		set_value(rhs);
5134 		return *this;
5135 	}
5136 
operator =(bool rhs)5137 	PUGI__FN xml_attribute& xml_attribute::operator=(bool rhs)
5138 	{
5139 		set_value(rhs);
5140 		return *this;
5141 	}
5142 
5143 #ifdef PUGIXML_HAS_LONG_LONG
operator =(long long rhs)5144 	PUGI__FN xml_attribute& xml_attribute::operator=(long long rhs)
5145 	{
5146 		set_value(rhs);
5147 		return *this;
5148 	}
5149 
operator =(unsigned long long rhs)5150 	PUGI__FN xml_attribute& xml_attribute::operator=(unsigned long long rhs)
5151 	{
5152 		set_value(rhs);
5153 		return *this;
5154 	}
5155 #endif
5156 
set_name(const char_t * rhs)5157 	PUGI__FN bool xml_attribute::set_name(const char_t* rhs)
5158 	{
5159 		if (!_attr) return false;
5160 
5161 		return impl::strcpy_insitu(_attr->name, _attr->header, impl::xml_memory_page_name_allocated_mask, rhs, impl::strlength(rhs));
5162 	}
5163 
set_value(const char_t * rhs)5164 	PUGI__FN bool xml_attribute::set_value(const char_t* rhs)
5165 	{
5166 		if (!_attr) return false;
5167 
5168 		return impl::strcpy_insitu(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, impl::strlength(rhs));
5169 	}
5170 
set_value(int rhs)5171 	PUGI__FN bool xml_attribute::set_value(int rhs)
5172 	{
5173 		if (!_attr) return false;
5174 
5175 		return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);
5176 	}
5177 
set_value(unsigned int rhs)5178 	PUGI__FN bool xml_attribute::set_value(unsigned int rhs)
5179 	{
5180 		if (!_attr) return false;
5181 
5182 		return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);
5183 	}
5184 
set_value(double rhs)5185 	PUGI__FN bool xml_attribute::set_value(double rhs)
5186 	{
5187 		if (!_attr) return false;
5188 
5189 		return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);
5190 	}
5191 
set_value(float rhs)5192 	PUGI__FN bool xml_attribute::set_value(float rhs)
5193 	{
5194 		if (!_attr) return false;
5195 
5196 		return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);
5197 	}
5198 
set_value(bool rhs)5199 	PUGI__FN bool xml_attribute::set_value(bool rhs)
5200 	{
5201 		if (!_attr) return false;
5202 
5203 		return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);
5204 	}
5205 
5206 #ifdef PUGIXML_HAS_LONG_LONG
set_value(long long rhs)5207 	PUGI__FN bool xml_attribute::set_value(long long rhs)
5208 	{
5209 		if (!_attr) return false;
5210 
5211 		return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);
5212 	}
5213 
set_value(unsigned long long rhs)5214 	PUGI__FN bool xml_attribute::set_value(unsigned long long rhs)
5215 	{
5216 		if (!_attr) return false;
5217 
5218 		return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);
5219 	}
5220 #endif
5221 
5222 #ifdef __BORLANDC__
operator &&(const xml_attribute & lhs,bool rhs)5223 	PUGI__FN bool operator&&(const xml_attribute& lhs, bool rhs)
5224 	{
5225 		return (bool)lhs && rhs;
5226 	}
5227 
operator ||(const xml_attribute & lhs,bool rhs)5228 	PUGI__FN bool operator||(const xml_attribute& lhs, bool rhs)
5229 	{
5230 		return (bool)lhs || rhs;
5231 	}
5232 #endif
5233 
xml_node()5234 	PUGI__FN xml_node::xml_node(): _root(0)
5235 	{
5236 	}
5237 
xml_node(xml_node_struct * p)5238 	PUGI__FN xml_node::xml_node(xml_node_struct* p): _root(p)
5239 	{
5240 	}
5241 
unspecified_bool_xml_node(xml_node ***)5242 	PUGI__FN static void unspecified_bool_xml_node(xml_node***)
5243 	{
5244 	}
5245 
operator xml_node::unspecified_bool_type() const5246 	PUGI__FN xml_node::operator xml_node::unspecified_bool_type() const
5247 	{
5248 		return _root ? unspecified_bool_xml_node : 0;
5249 	}
5250 
operator !() const5251 	PUGI__FN bool xml_node::operator!() const
5252 	{
5253 		return !_root;
5254 	}
5255 
begin() const5256 	PUGI__FN xml_node::iterator xml_node::begin() const
5257 	{
5258 		return iterator(_root ? _root->first_child + 0 : 0, _root);
5259 	}
5260 
end() const5261 	PUGI__FN xml_node::iterator xml_node::end() const
5262 	{
5263 		return iterator(0, _root);
5264 	}
5265 
attributes_begin() const5266 	PUGI__FN xml_node::attribute_iterator xml_node::attributes_begin() const
5267 	{
5268 		return attribute_iterator(_root ? _root->first_attribute + 0 : 0, _root);
5269 	}
5270 
attributes_end() const5271 	PUGI__FN xml_node::attribute_iterator xml_node::attributes_end() const
5272 	{
5273 		return attribute_iterator(0, _root);
5274 	}
5275 
children() const5276 	PUGI__FN xml_object_range<xml_node_iterator> xml_node::children() const
5277 	{
5278 		return xml_object_range<xml_node_iterator>(begin(), end());
5279 	}
5280 
children(const char_t * name_) const5281 	PUGI__FN xml_object_range<xml_named_node_iterator> xml_node::children(const char_t* name_) const
5282 	{
5283 		return xml_object_range<xml_named_node_iterator>(xml_named_node_iterator(child(name_)._root, _root, name_), xml_named_node_iterator(0, _root, name_));
5284 	}
5285 
attributes() const5286 	PUGI__FN xml_object_range<xml_attribute_iterator> xml_node::attributes() const
5287 	{
5288 		return xml_object_range<xml_attribute_iterator>(attributes_begin(), attributes_end());
5289 	}
5290 
operator ==(const xml_node & r) const5291 	PUGI__FN bool xml_node::operator==(const xml_node& r) const
5292 	{
5293 		return (_root == r._root);
5294 	}
5295 
operator !=(const xml_node & r) const5296 	PUGI__FN bool xml_node::operator!=(const xml_node& r) const
5297 	{
5298 		return (_root != r._root);
5299 	}
5300 
operator <(const xml_node & r) const5301 	PUGI__FN bool xml_node::operator<(const xml_node& r) const
5302 	{
5303 		return (_root < r._root);
5304 	}
5305 
operator >(const xml_node & r) const5306 	PUGI__FN bool xml_node::operator>(const xml_node& r) const
5307 	{
5308 		return (_root > r._root);
5309 	}
5310 
operator <=(const xml_node & r) const5311 	PUGI__FN bool xml_node::operator<=(const xml_node& r) const
5312 	{
5313 		return (_root <= r._root);
5314 	}
5315 
operator >=(const xml_node & r) const5316 	PUGI__FN bool xml_node::operator>=(const xml_node& r) const
5317 	{
5318 		return (_root >= r._root);
5319 	}
5320 
empty() const5321 	PUGI__FN bool xml_node::empty() const
5322 	{
5323 		return !_root;
5324 	}
5325 
name() const5326 	PUGI__FN const char_t* xml_node::name() const
5327 	{
5328 		return (_root && _root->name) ? _root->name + 0 : PUGIXML_TEXT("");
5329 	}
5330 
type() const5331 	PUGI__FN xml_node_type xml_node::type() const
5332 	{
5333 		return _root ? PUGI__NODETYPE(_root) : node_null;
5334 	}
5335 
value() const5336 	PUGI__FN const char_t* xml_node::value() const
5337 	{
5338 		return (_root && _root->value) ? _root->value + 0 : PUGIXML_TEXT("");
5339 	}
5340 
child(const char_t * name_) const5341 	PUGI__FN xml_node xml_node::child(const char_t* name_) const
5342 	{
5343 		if (!_root) return xml_node();
5344 
5345 		for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
5346 			if (i->name && impl::strequal(name_, i->name)) return xml_node(i);
5347 
5348 		return xml_node();
5349 	}
5350 
attribute(const char_t * name_) const5351 	PUGI__FN xml_attribute xml_node::attribute(const char_t* name_) const
5352 	{
5353 		if (!_root) return xml_attribute();
5354 
5355 		for (xml_attribute_struct* i = _root->first_attribute; i; i = i->next_attribute)
5356 			if (i->name && impl::strequal(name_, i->name))
5357 				return xml_attribute(i);
5358 
5359 		return xml_attribute();
5360 	}
5361 
next_sibling(const char_t * name_) const5362 	PUGI__FN xml_node xml_node::next_sibling(const char_t* name_) const
5363 	{
5364 		if (!_root) return xml_node();
5365 
5366 		for (xml_node_struct* i = _root->next_sibling; i; i = i->next_sibling)
5367 			if (i->name && impl::strequal(name_, i->name)) return xml_node(i);
5368 
5369 		return xml_node();
5370 	}
5371 
next_sibling() const5372 	PUGI__FN xml_node xml_node::next_sibling() const
5373 	{
5374 		return _root ? xml_node(_root->next_sibling) : xml_node();
5375 	}
5376 
previous_sibling(const char_t * name_) const5377 	PUGI__FN xml_node xml_node::previous_sibling(const char_t* name_) const
5378 	{
5379 		if (!_root) return xml_node();
5380 
5381 		for (xml_node_struct* i = _root->prev_sibling_c; i->next_sibling; i = i->prev_sibling_c)
5382 			if (i->name && impl::strequal(name_, i->name)) return xml_node(i);
5383 
5384 		return xml_node();
5385 	}
5386 
attribute(const char_t * name_,xml_attribute & hint_) const5387 	PUGI__FN xml_attribute xml_node::attribute(const char_t* name_, xml_attribute& hint_) const
5388 	{
5389 		xml_attribute_struct* hint = hint_._attr;
5390 
5391 		// if hint is not an attribute of node, behavior is not defined
5392 		assert(!hint || (_root && impl::is_attribute_of(hint, _root)));
5393 
5394 		if (!_root) return xml_attribute();
5395 
5396 		// optimistically search from hint up until the end
5397 		for (xml_attribute_struct* i = hint; i; i = i->next_attribute)
5398 			if (i->name && impl::strequal(name_, i->name))
5399 			{
5400 				// update hint to maximize efficiency of searching for consecutive attributes
5401 				hint_._attr = i->next_attribute;
5402 
5403 				return xml_attribute(i);
5404 			}
5405 
5406 		// wrap around and search from the first attribute until the hint
5407 		// 'j' null pointer check is technically redundant, but it prevents a crash in case the assertion above fails
5408 		for (xml_attribute_struct* j = _root->first_attribute; j && j != hint; j = j->next_attribute)
5409 			if (j->name && impl::strequal(name_, j->name))
5410 			{
5411 				// update hint to maximize efficiency of searching for consecutive attributes
5412 				hint_._attr = j->next_attribute;
5413 
5414 				return xml_attribute(j);
5415 			}
5416 
5417 		return xml_attribute();
5418 	}
5419 
previous_sibling() const5420 	PUGI__FN xml_node xml_node::previous_sibling() const
5421 	{
5422 		if (!_root) return xml_node();
5423 
5424 		if (_root->prev_sibling_c->next_sibling) return xml_node(_root->prev_sibling_c);
5425 		else return xml_node();
5426 	}
5427 
parent() const5428 	PUGI__FN xml_node xml_node::parent() const
5429 	{
5430 		return _root ? xml_node(_root->parent) : xml_node();
5431 	}
5432 
root() const5433 	PUGI__FN xml_node xml_node::root() const
5434 	{
5435 		return _root ? xml_node(&impl::get_document(_root)) : xml_node();
5436 	}
5437 
text() const5438 	PUGI__FN xml_text xml_node::text() const
5439 	{
5440 		return xml_text(_root);
5441 	}
5442 
child_value() const5443 	PUGI__FN const char_t* xml_node::child_value() const
5444 	{
5445 		if (!_root) return PUGIXML_TEXT("");
5446 
5447 		for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
5448 			if (impl::is_text_node(i) && i->value)
5449 				return i->value;
5450 
5451 		return PUGIXML_TEXT("");
5452 	}
5453 
child_value(const char_t * name_) const5454 	PUGI__FN const char_t* xml_node::child_value(const char_t* name_) const
5455 	{
5456 		return child(name_).child_value();
5457 	}
5458 
first_attribute() const5459 	PUGI__FN xml_attribute xml_node::first_attribute() const
5460 	{
5461 		return _root ? xml_attribute(_root->first_attribute) : xml_attribute();
5462 	}
5463 
last_attribute() const5464 	PUGI__FN xml_attribute xml_node::last_attribute() const
5465 	{
5466 		return _root && _root->first_attribute ? xml_attribute(_root->first_attribute->prev_attribute_c) : xml_attribute();
5467 	}
5468 
first_child() const5469 	PUGI__FN xml_node xml_node::first_child() const
5470 	{
5471 		return _root ? xml_node(_root->first_child) : xml_node();
5472 	}
5473 
last_child() const5474 	PUGI__FN xml_node xml_node::last_child() const
5475 	{
5476 		return _root && _root->first_child ? xml_node(_root->first_child->prev_sibling_c) : xml_node();
5477 	}
5478 
set_name(const char_t * rhs)5479 	PUGI__FN bool xml_node::set_name(const char_t* rhs)
5480 	{
5481 		xml_node_type type_ = _root ? PUGI__NODETYPE(_root) : node_null;
5482 
5483 		if (type_ != node_element && type_ != node_pi && type_ != node_declaration)
5484 			return false;
5485 
5486 		return impl::strcpy_insitu(_root->name, _root->header, impl::xml_memory_page_name_allocated_mask, rhs, impl::strlength(rhs));
5487 	}
5488 
set_value(const char_t * rhs)5489 	PUGI__FN bool xml_node::set_value(const char_t* rhs)
5490 	{
5491 		xml_node_type type_ = _root ? PUGI__NODETYPE(_root) : node_null;
5492 
5493 		if (type_ != node_pcdata && type_ != node_cdata && type_ != node_comment && type_ != node_pi && type_ != node_doctype)
5494 			return false;
5495 
5496 		return impl::strcpy_insitu(_root->value, _root->header, impl::xml_memory_page_value_allocated_mask, rhs, impl::strlength(rhs));
5497 	}
5498 
append_attribute(const char_t * name_)5499 	PUGI__FN xml_attribute xml_node::append_attribute(const char_t* name_)
5500 	{
5501 		if (!impl::allow_insert_attribute(type())) return xml_attribute();
5502 
5503 		impl::xml_allocator& alloc = impl::get_allocator(_root);
5504 		if (!alloc.reserve()) return xml_attribute();
5505 
5506 		xml_attribute a(impl::allocate_attribute(alloc));
5507 		if (!a) return xml_attribute();
5508 
5509 		impl::append_attribute(a._attr, _root);
5510 
5511 		a.set_name(name_);
5512 
5513 		return a;
5514 	}
5515 
prepend_attribute(const char_t * name_)5516 	PUGI__FN xml_attribute xml_node::prepend_attribute(const char_t* name_)
5517 	{
5518 		if (!impl::allow_insert_attribute(type())) return xml_attribute();
5519 
5520 		impl::xml_allocator& alloc = impl::get_allocator(_root);
5521 		if (!alloc.reserve()) return xml_attribute();
5522 
5523 		xml_attribute a(impl::allocate_attribute(alloc));
5524 		if (!a) return xml_attribute();
5525 
5526 		impl::prepend_attribute(a._attr, _root);
5527 
5528 		a.set_name(name_);
5529 
5530 		return a;
5531 	}
5532 
insert_attribute_after(const char_t * name_,const xml_attribute & attr)5533 	PUGI__FN xml_attribute xml_node::insert_attribute_after(const char_t* name_, const xml_attribute& attr)
5534 	{
5535 		if (!impl::allow_insert_attribute(type())) return xml_attribute();
5536 		if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute();
5537 
5538 		impl::xml_allocator& alloc = impl::get_allocator(_root);
5539 		if (!alloc.reserve()) return xml_attribute();
5540 
5541 		xml_attribute a(impl::allocate_attribute(alloc));
5542 		if (!a) return xml_attribute();
5543 
5544 		impl::insert_attribute_after(a._attr, attr._attr, _root);
5545 
5546 		a.set_name(name_);
5547 
5548 		return a;
5549 	}
5550 
insert_attribute_before(const char_t * name_,const xml_attribute & attr)5551 	PUGI__FN xml_attribute xml_node::insert_attribute_before(const char_t* name_, const xml_attribute& attr)
5552 	{
5553 		if (!impl::allow_insert_attribute(type())) return xml_attribute();
5554 		if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute();
5555 
5556 		impl::xml_allocator& alloc = impl::get_allocator(_root);
5557 		if (!alloc.reserve()) return xml_attribute();
5558 
5559 		xml_attribute a(impl::allocate_attribute(alloc));
5560 		if (!a) return xml_attribute();
5561 
5562 		impl::insert_attribute_before(a._attr, attr._attr, _root);
5563 
5564 		a.set_name(name_);
5565 
5566 		return a;
5567 	}
5568 
append_copy(const xml_attribute & proto)5569 	PUGI__FN xml_attribute xml_node::append_copy(const xml_attribute& proto)
5570 	{
5571 		if (!proto) return xml_attribute();
5572 		if (!impl::allow_insert_attribute(type())) return xml_attribute();
5573 
5574 		impl::xml_allocator& alloc = impl::get_allocator(_root);
5575 		if (!alloc.reserve()) return xml_attribute();
5576 
5577 		xml_attribute a(impl::allocate_attribute(alloc));
5578 		if (!a) return xml_attribute();
5579 
5580 		impl::append_attribute(a._attr, _root);
5581 		impl::node_copy_attribute(a._attr, proto._attr);
5582 
5583 		return a;
5584 	}
5585 
prepend_copy(const xml_attribute & proto)5586 	PUGI__FN xml_attribute xml_node::prepend_copy(const xml_attribute& proto)
5587 	{
5588 		if (!proto) return xml_attribute();
5589 		if (!impl::allow_insert_attribute(type())) return xml_attribute();
5590 
5591 		impl::xml_allocator& alloc = impl::get_allocator(_root);
5592 		if (!alloc.reserve()) return xml_attribute();
5593 
5594 		xml_attribute a(impl::allocate_attribute(alloc));
5595 		if (!a) return xml_attribute();
5596 
5597 		impl::prepend_attribute(a._attr, _root);
5598 		impl::node_copy_attribute(a._attr, proto._attr);
5599 
5600 		return a;
5601 	}
5602 
insert_copy_after(const xml_attribute & proto,const xml_attribute & attr)5603 	PUGI__FN xml_attribute xml_node::insert_copy_after(const xml_attribute& proto, const xml_attribute& attr)
5604 	{
5605 		if (!proto) return xml_attribute();
5606 		if (!impl::allow_insert_attribute(type())) return xml_attribute();
5607 		if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute();
5608 
5609 		impl::xml_allocator& alloc = impl::get_allocator(_root);
5610 		if (!alloc.reserve()) return xml_attribute();
5611 
5612 		xml_attribute a(impl::allocate_attribute(alloc));
5613 		if (!a) return xml_attribute();
5614 
5615 		impl::insert_attribute_after(a._attr, attr._attr, _root);
5616 		impl::node_copy_attribute(a._attr, proto._attr);
5617 
5618 		return a;
5619 	}
5620 
insert_copy_before(const xml_attribute & proto,const xml_attribute & attr)5621 	PUGI__FN xml_attribute xml_node::insert_copy_before(const xml_attribute& proto, const xml_attribute& attr)
5622 	{
5623 		if (!proto) return xml_attribute();
5624 		if (!impl::allow_insert_attribute(type())) return xml_attribute();
5625 		if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute();
5626 
5627 		impl::xml_allocator& alloc = impl::get_allocator(_root);
5628 		if (!alloc.reserve()) return xml_attribute();
5629 
5630 		xml_attribute a(impl::allocate_attribute(alloc));
5631 		if (!a) return xml_attribute();
5632 
5633 		impl::insert_attribute_before(a._attr, attr._attr, _root);
5634 		impl::node_copy_attribute(a._attr, proto._attr);
5635 
5636 		return a;
5637 	}
5638 
append_child(xml_node_type type_)5639 	PUGI__FN xml_node xml_node::append_child(xml_node_type type_)
5640 	{
5641 		if (!impl::allow_insert_child(type(), type_)) return xml_node();
5642 
5643 		impl::xml_allocator& alloc = impl::get_allocator(_root);
5644 		if (!alloc.reserve()) return xml_node();
5645 
5646 		xml_node n(impl::allocate_node(alloc, type_));
5647 		if (!n) return xml_node();
5648 
5649 		impl::append_node(n._root, _root);
5650 
5651 		if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml"));
5652 
5653 		return n;
5654 	}
5655 
prepend_child(xml_node_type type_)5656 	PUGI__FN xml_node xml_node::prepend_child(xml_node_type type_)
5657 	{
5658 		if (!impl::allow_insert_child(type(), type_)) return xml_node();
5659 
5660 		impl::xml_allocator& alloc = impl::get_allocator(_root);
5661 		if (!alloc.reserve()) return xml_node();
5662 
5663 		xml_node n(impl::allocate_node(alloc, type_));
5664 		if (!n) return xml_node();
5665 
5666 		impl::prepend_node(n._root, _root);
5667 
5668 		if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml"));
5669 
5670 		return n;
5671 	}
5672 
insert_child_before(xml_node_type type_,const xml_node & node)5673 	PUGI__FN xml_node xml_node::insert_child_before(xml_node_type type_, const xml_node& node)
5674 	{
5675 		if (!impl::allow_insert_child(type(), type_)) return xml_node();
5676 		if (!node._root || node._root->parent != _root) return xml_node();
5677 
5678 		impl::xml_allocator& alloc = impl::get_allocator(_root);
5679 		if (!alloc.reserve()) return xml_node();
5680 
5681 		xml_node n(impl::allocate_node(alloc, type_));
5682 		if (!n) return xml_node();
5683 
5684 		impl::insert_node_before(n._root, node._root);
5685 
5686 		if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml"));
5687 
5688 		return n;
5689 	}
5690 
insert_child_after(xml_node_type type_,const xml_node & node)5691 	PUGI__FN xml_node xml_node::insert_child_after(xml_node_type type_, const xml_node& node)
5692 	{
5693 		if (!impl::allow_insert_child(type(), type_)) return xml_node();
5694 		if (!node._root || node._root->parent != _root) return xml_node();
5695 
5696 		impl::xml_allocator& alloc = impl::get_allocator(_root);
5697 		if (!alloc.reserve()) return xml_node();
5698 
5699 		xml_node n(impl::allocate_node(alloc, type_));
5700 		if (!n) return xml_node();
5701 
5702 		impl::insert_node_after(n._root, node._root);
5703 
5704 		if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml"));
5705 
5706 		return n;
5707 	}
5708 
append_child(const char_t * name_)5709 	PUGI__FN xml_node xml_node::append_child(const char_t* name_)
5710 	{
5711 		xml_node result = append_child(node_element);
5712 
5713 		result.set_name(name_);
5714 
5715 		return result;
5716 	}
5717 
prepend_child(const char_t * name_)5718 	PUGI__FN xml_node xml_node::prepend_child(const char_t* name_)
5719 	{
5720 		xml_node result = prepend_child(node_element);
5721 
5722 		result.set_name(name_);
5723 
5724 		return result;
5725 	}
5726 
insert_child_after(const char_t * name_,const xml_node & node)5727 	PUGI__FN xml_node xml_node::insert_child_after(const char_t* name_, const xml_node& node)
5728 	{
5729 		xml_node result = insert_child_after(node_element, node);
5730 
5731 		result.set_name(name_);
5732 
5733 		return result;
5734 	}
5735 
insert_child_before(const char_t * name_,const xml_node & node)5736 	PUGI__FN xml_node xml_node::insert_child_before(const char_t* name_, const xml_node& node)
5737 	{
5738 		xml_node result = insert_child_before(node_element, node);
5739 
5740 		result.set_name(name_);
5741 
5742 		return result;
5743 	}
5744 
append_copy(const xml_node & proto)5745 	PUGI__FN xml_node xml_node::append_copy(const xml_node& proto)
5746 	{
5747 		xml_node_type type_ = proto.type();
5748 		if (!impl::allow_insert_child(type(), type_)) return xml_node();
5749 
5750 		impl::xml_allocator& alloc = impl::get_allocator(_root);
5751 		if (!alloc.reserve()) return xml_node();
5752 
5753 		xml_node n(impl::allocate_node(alloc, type_));
5754 		if (!n) return xml_node();
5755 
5756 		impl::append_node(n._root, _root);
5757 		impl::node_copy_tree(n._root, proto._root);
5758 
5759 		return n;
5760 	}
5761 
prepend_copy(const xml_node & proto)5762 	PUGI__FN xml_node xml_node::prepend_copy(const xml_node& proto)
5763 	{
5764 		xml_node_type type_ = proto.type();
5765 		if (!impl::allow_insert_child(type(), type_)) return xml_node();
5766 
5767 		impl::xml_allocator& alloc = impl::get_allocator(_root);
5768 		if (!alloc.reserve()) return xml_node();
5769 
5770 		xml_node n(impl::allocate_node(alloc, type_));
5771 		if (!n) return xml_node();
5772 
5773 		impl::prepend_node(n._root, _root);
5774 		impl::node_copy_tree(n._root, proto._root);
5775 
5776 		return n;
5777 	}
5778 
insert_copy_after(const xml_node & proto,const xml_node & node)5779 	PUGI__FN xml_node xml_node::insert_copy_after(const xml_node& proto, const xml_node& node)
5780 	{
5781 		xml_node_type type_ = proto.type();
5782 		if (!impl::allow_insert_child(type(), type_)) return xml_node();
5783 		if (!node._root || node._root->parent != _root) return xml_node();
5784 
5785 		impl::xml_allocator& alloc = impl::get_allocator(_root);
5786 		if (!alloc.reserve()) return xml_node();
5787 
5788 		xml_node n(impl::allocate_node(alloc, type_));
5789 		if (!n) return xml_node();
5790 
5791 		impl::insert_node_after(n._root, node._root);
5792 		impl::node_copy_tree(n._root, proto._root);
5793 
5794 		return n;
5795 	}
5796 
insert_copy_before(const xml_node & proto,const xml_node & node)5797 	PUGI__FN xml_node xml_node::insert_copy_before(const xml_node& proto, const xml_node& node)
5798 	{
5799 		xml_node_type type_ = proto.type();
5800 		if (!impl::allow_insert_child(type(), type_)) return xml_node();
5801 		if (!node._root || node._root->parent != _root) return xml_node();
5802 
5803 		impl::xml_allocator& alloc = impl::get_allocator(_root);
5804 		if (!alloc.reserve()) return xml_node();
5805 
5806 		xml_node n(impl::allocate_node(alloc, type_));
5807 		if (!n) return xml_node();
5808 
5809 		impl::insert_node_before(n._root, node._root);
5810 		impl::node_copy_tree(n._root, proto._root);
5811 
5812 		return n;
5813 	}
5814 
append_move(const xml_node & moved)5815 	PUGI__FN xml_node xml_node::append_move(const xml_node& moved)
5816 	{
5817 		if (!impl::allow_move(*this, moved)) return xml_node();
5818 
5819 		impl::xml_allocator& alloc = impl::get_allocator(_root);
5820 		if (!alloc.reserve()) return xml_node();
5821 
5822 		// disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers
5823 		impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask;
5824 
5825 		impl::remove_node(moved._root);
5826 		impl::append_node(moved._root, _root);
5827 
5828 		return moved;
5829 	}
5830 
prepend_move(const xml_node & moved)5831 	PUGI__FN xml_node xml_node::prepend_move(const xml_node& moved)
5832 	{
5833 		if (!impl::allow_move(*this, moved)) return xml_node();
5834 
5835 		impl::xml_allocator& alloc = impl::get_allocator(_root);
5836 		if (!alloc.reserve()) return xml_node();
5837 
5838 		// disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers
5839 		impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask;
5840 
5841 		impl::remove_node(moved._root);
5842 		impl::prepend_node(moved._root, _root);
5843 
5844 		return moved;
5845 	}
5846 
insert_move_after(const xml_node & moved,const xml_node & node)5847 	PUGI__FN xml_node xml_node::insert_move_after(const xml_node& moved, const xml_node& node)
5848 	{
5849 		if (!impl::allow_move(*this, moved)) return xml_node();
5850 		if (!node._root || node._root->parent != _root) return xml_node();
5851 		if (moved._root == node._root) return xml_node();
5852 
5853 		impl::xml_allocator& alloc = impl::get_allocator(_root);
5854 		if (!alloc.reserve()) return xml_node();
5855 
5856 		// disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers
5857 		impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask;
5858 
5859 		impl::remove_node(moved._root);
5860 		impl::insert_node_after(moved._root, node._root);
5861 
5862 		return moved;
5863 	}
5864 
insert_move_before(const xml_node & moved,const xml_node & node)5865 	PUGI__FN xml_node xml_node::insert_move_before(const xml_node& moved, const xml_node& node)
5866 	{
5867 		if (!impl::allow_move(*this, moved)) return xml_node();
5868 		if (!node._root || node._root->parent != _root) return xml_node();
5869 		if (moved._root == node._root) return xml_node();
5870 
5871 		impl::xml_allocator& alloc = impl::get_allocator(_root);
5872 		if (!alloc.reserve()) return xml_node();
5873 
5874 		// disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers
5875 		impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask;
5876 
5877 		impl::remove_node(moved._root);
5878 		impl::insert_node_before(moved._root, node._root);
5879 
5880 		return moved;
5881 	}
5882 
remove_attribute(const char_t * name_)5883 	PUGI__FN bool xml_node::remove_attribute(const char_t* name_)
5884 	{
5885 		return remove_attribute(attribute(name_));
5886 	}
5887 
remove_attribute(const xml_attribute & a)5888 	PUGI__FN bool xml_node::remove_attribute(const xml_attribute& a)
5889 	{
5890 		if (!_root || !a._attr) return false;
5891 		if (!impl::is_attribute_of(a._attr, _root)) return false;
5892 
5893 		impl::xml_allocator& alloc = impl::get_allocator(_root);
5894 		if (!alloc.reserve()) return false;
5895 
5896 		impl::remove_attribute(a._attr, _root);
5897 		impl::destroy_attribute(a._attr, alloc);
5898 
5899 		return true;
5900 	}
5901 
remove_child(const char_t * name_)5902 	PUGI__FN bool xml_node::remove_child(const char_t* name_)
5903 	{
5904 		return remove_child(child(name_));
5905 	}
5906 
remove_child(const xml_node & n)5907 	PUGI__FN bool xml_node::remove_child(const xml_node& n)
5908 	{
5909 		if (!_root || !n._root || n._root->parent != _root) return false;
5910 
5911 		impl::xml_allocator& alloc = impl::get_allocator(_root);
5912 		if (!alloc.reserve()) return false;
5913 
5914 		impl::remove_node(n._root);
5915 		impl::destroy_node(n._root, alloc);
5916 
5917 		return true;
5918 	}
5919 
append_buffer(const void * contents,size_t size,unsigned int options,xml_encoding encoding)5920 	PUGI__FN xml_parse_result xml_node::append_buffer(const void* contents, size_t size, unsigned int options, xml_encoding encoding)
5921 	{
5922 		// append_buffer is only valid for elements/documents
5923 		if (!impl::allow_insert_child(type(), node_element)) return impl::make_parse_result(status_append_invalid_root);
5924 
5925 		// get document node
5926 		impl::xml_document_struct* doc = &impl::get_document(_root);
5927 
5928 		// disable document_buffer_order optimization since in a document with multiple buffers comparing buffer pointers does not make sense
5929 		doc->header |= impl::xml_memory_page_contents_shared_mask;
5930 
5931 		// get extra buffer element (we'll store the document fragment buffer there so that we can deallocate it later)
5932 		impl::xml_memory_page* page = 0;
5933 		impl::xml_extra_buffer* extra = static_cast<impl::xml_extra_buffer*>(doc->allocate_memory(sizeof(impl::xml_extra_buffer), page));
5934 		(void)page;
5935 
5936 		if (!extra) return impl::make_parse_result(status_out_of_memory);
5937 
5938 		// add extra buffer to the list
5939 		extra->buffer = 0;
5940 		extra->next = doc->extra_buffers;
5941 		doc->extra_buffers = extra;
5942 
5943 		// name of the root has to be NULL before parsing - otherwise closing node mismatches will not be detected at the top level
5944 		impl::name_null_sentry sentry(_root);
5945 
5946 		return impl::load_buffer_impl(doc, _root, const_cast<void*>(contents), size, options, encoding, false, false, &extra->buffer);
5947 	}
5948 
find_child_by_attribute(const char_t * name_,const char_t * attr_name,const char_t * attr_value) const5949 	PUGI__FN xml_node xml_node::find_child_by_attribute(const char_t* name_, const char_t* attr_name, const char_t* attr_value) const
5950 	{
5951 		if (!_root) return xml_node();
5952 
5953 		for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
5954 			if (i->name && impl::strequal(name_, i->name))
5955 			{
5956 				for (xml_attribute_struct* a = i->first_attribute; a; a = a->next_attribute)
5957 					if (a->name && impl::strequal(attr_name, a->name) && impl::strequal(attr_value, a->value ? a->value + 0 : PUGIXML_TEXT("")))
5958 						return xml_node(i);
5959 			}
5960 
5961 		return xml_node();
5962 	}
5963 
find_child_by_attribute(const char_t * attr_name,const char_t * attr_value) const5964 	PUGI__FN xml_node xml_node::find_child_by_attribute(const char_t* attr_name, const char_t* attr_value) const
5965 	{
5966 		if (!_root) return xml_node();
5967 
5968 		for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
5969 			for (xml_attribute_struct* a = i->first_attribute; a; a = a->next_attribute)
5970 				if (a->name && impl::strequal(attr_name, a->name) && impl::strequal(attr_value, a->value ? a->value + 0 : PUGIXML_TEXT("")))
5971 					return xml_node(i);
5972 
5973 		return xml_node();
5974 	}
5975 
5976 #ifndef PUGIXML_NO_STL
path(char_t delimiter) const5977 	PUGI__FN string_t xml_node::path(char_t delimiter) const
5978 	{
5979 		if (!_root) return string_t();
5980 
5981 		size_t offset = 0;
5982 
5983 		for (xml_node_struct* i = _root; i; i = i->parent)
5984 		{
5985 			offset += (i != _root);
5986 			offset += i->name ? impl::strlength(i->name) : 0;
5987 		}
5988 
5989 		string_t result;
5990 		result.resize(offset);
5991 
5992 		for (xml_node_struct* j = _root; j; j = j->parent)
5993 		{
5994 			if (j != _root)
5995 				result[--offset] = delimiter;
5996 
5997 			if (j->name && *j->name)
5998 			{
5999 				size_t length = impl::strlength(j->name);
6000 
6001 				offset -= length;
6002 				memcpy(&result[offset], j->name, length * sizeof(char_t));
6003 			}
6004 		}
6005 
6006 		assert(offset == 0);
6007 
6008 		return result;
6009 	}
6010 #endif
6011 
first_element_by_path(const char_t * path_,char_t delimiter) const6012 	PUGI__FN xml_node xml_node::first_element_by_path(const char_t* path_, char_t delimiter) const
6013 	{
6014 		xml_node found = *this; // Current search context.
6015 
6016 		if (!_root || !path_ || !path_[0]) return found;
6017 
6018 		if (path_[0] == delimiter)
6019 		{
6020 			// Absolute path; e.g. '/foo/bar'
6021 			found = found.root();
6022 			++path_;
6023 		}
6024 
6025 		const char_t* path_segment = path_;
6026 
6027 		while (*path_segment == delimiter) ++path_segment;
6028 
6029 		const char_t* path_segment_end = path_segment;
6030 
6031 		while (*path_segment_end && *path_segment_end != delimiter) ++path_segment_end;
6032 
6033 		if (path_segment == path_segment_end) return found;
6034 
6035 		const char_t* next_segment = path_segment_end;
6036 
6037 		while (*next_segment == delimiter) ++next_segment;
6038 
6039 		if (*path_segment == '.' && path_segment + 1 == path_segment_end)
6040 			return found.first_element_by_path(next_segment, delimiter);
6041 		else if (*path_segment == '.' && *(path_segment+1) == '.' && path_segment + 2 == path_segment_end)
6042 			return found.parent().first_element_by_path(next_segment, delimiter);
6043 		else
6044 		{
6045 			for (xml_node_struct* j = found._root->first_child; j; j = j->next_sibling)
6046 			{
6047 				if (j->name && impl::strequalrange(j->name, path_segment, static_cast<size_t>(path_segment_end - path_segment)))
6048 				{
6049 					xml_node subsearch = xml_node(j).first_element_by_path(next_segment, delimiter);
6050 
6051 					if (subsearch) return subsearch;
6052 				}
6053 			}
6054 
6055 			return xml_node();
6056 		}
6057 	}
6058 
traverse(xml_tree_walker & walker)6059 	PUGI__FN bool xml_node::traverse(xml_tree_walker& walker)
6060 	{
6061 		walker._depth = -1;
6062 
6063 		xml_node arg_begin = *this;
6064 		if (!walker.begin(arg_begin)) return false;
6065 
6066 		xml_node cur = first_child();
6067 
6068 		if (cur)
6069 		{
6070 			++walker._depth;
6071 
6072 			do
6073 			{
6074 				xml_node arg_for_each = cur;
6075 				if (!walker.for_each(arg_for_each))
6076 					return false;
6077 
6078 				if (cur.first_child())
6079 				{
6080 					++walker._depth;
6081 					cur = cur.first_child();
6082 				}
6083 				else if (cur.next_sibling())
6084 					cur = cur.next_sibling();
6085 				else
6086 				{
6087 					// Borland C++ workaround
6088 					while (!cur.next_sibling() && cur != *this && !cur.parent().empty())
6089 					{
6090 						--walker._depth;
6091 						cur = cur.parent();
6092 					}
6093 
6094 					if (cur != *this)
6095 						cur = cur.next_sibling();
6096 				}
6097 			}
6098 			while (cur && cur != *this);
6099 		}
6100 
6101 		assert(walker._depth == -1);
6102 
6103 		xml_node arg_end = *this;
6104 		return walker.end(arg_end);
6105 	}
6106 
hash_value() const6107 	PUGI__FN size_t xml_node::hash_value() const
6108 	{
6109 		return static_cast<size_t>(reinterpret_cast<uintptr_t>(_root) / sizeof(xml_node_struct));
6110 	}
6111 
internal_object() const6112 	PUGI__FN xml_node_struct* xml_node::internal_object() const
6113 	{
6114 		return _root;
6115 	}
6116 
print(xml_writer & writer,const char_t * indent,unsigned int flags,xml_encoding encoding,unsigned int depth) const6117 	PUGI__FN void xml_node::print(xml_writer& writer, const char_t* indent, unsigned int flags, xml_encoding encoding, unsigned int depth) const
6118 	{
6119 		if (!_root) return;
6120 
6121 		impl::xml_buffered_writer buffered_writer(writer, encoding);
6122 
6123 		impl::node_output(buffered_writer, _root, indent, flags, depth);
6124 
6125 		buffered_writer.flush();
6126 	}
6127 
6128 #ifndef PUGIXML_NO_STL
print(std::basic_ostream<char,std::char_traits<char>> & stream,const char_t * indent,unsigned int flags,xml_encoding encoding,unsigned int depth) const6129 	PUGI__FN void xml_node::print(std::basic_ostream<char, std::char_traits<char> >& stream, const char_t* indent, unsigned int flags, xml_encoding encoding, unsigned int depth) const
6130 	{
6131 		xml_writer_stream writer(stream);
6132 
6133 		print(writer, indent, flags, encoding, depth);
6134 	}
6135 
print(std::basic_ostream<wchar_t,std::char_traits<wchar_t>> & stream,const char_t * indent,unsigned int flags,unsigned int depth) const6136 	PUGI__FN void xml_node::print(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream, const char_t* indent, unsigned int flags, unsigned int depth) const
6137 	{
6138 		xml_writer_stream writer(stream);
6139 
6140 		print(writer, indent, flags, encoding_wchar, depth);
6141 	}
6142 #endif
6143 
offset_debug() const6144 	PUGI__FN ptrdiff_t xml_node::offset_debug() const
6145 	{
6146 		if (!_root) return -1;
6147 
6148 		impl::xml_document_struct& doc = impl::get_document(_root);
6149 
6150 		// we can determine the offset reliably only if there is exactly once parse buffer
6151 		if (!doc.buffer || doc.extra_buffers) return -1;
6152 
6153 		switch (type())
6154 		{
6155 		case node_document:
6156 			return 0;
6157 
6158 		case node_element:
6159 		case node_declaration:
6160 		case node_pi:
6161 			return _root->name && (_root->header & impl::xml_memory_page_name_allocated_or_shared_mask) == 0 ? _root->name - doc.buffer : -1;
6162 
6163 		case node_pcdata:
6164 		case node_cdata:
6165 		case node_comment:
6166 		case node_doctype:
6167 			return _root->value && (_root->header & impl::xml_memory_page_value_allocated_or_shared_mask) == 0 ? _root->value - doc.buffer : -1;
6168 
6169 		default:
6170 			return -1;
6171 		}
6172 	}
6173 
6174 #ifdef __BORLANDC__
operator &&(const xml_node & lhs,bool rhs)6175 	PUGI__FN bool operator&&(const xml_node& lhs, bool rhs)
6176 	{
6177 		return (bool)lhs && rhs;
6178 	}
6179 
operator ||(const xml_node & lhs,bool rhs)6180 	PUGI__FN bool operator||(const xml_node& lhs, bool rhs)
6181 	{
6182 		return (bool)lhs || rhs;
6183 	}
6184 #endif
6185 
xml_text(xml_node_struct * root)6186 	PUGI__FN xml_text::xml_text(xml_node_struct* root): _root(root)
6187 	{
6188 	}
6189 
_data() const6190 	PUGI__FN xml_node_struct* xml_text::_data() const
6191 	{
6192 		if (!_root || impl::is_text_node(_root)) return _root;
6193 
6194 		for (xml_node_struct* node = _root->first_child; node; node = node->next_sibling)
6195 			if (impl::is_text_node(node))
6196 				return node;
6197 
6198 		return 0;
6199 	}
6200 
_data_new()6201 	PUGI__FN xml_node_struct* xml_text::_data_new()
6202 	{
6203 		xml_node_struct* d = _data();
6204 		if (d) return d;
6205 
6206 		return xml_node(_root).append_child(node_pcdata).internal_object();
6207 	}
6208 
xml_text()6209 	PUGI__FN xml_text::xml_text(): _root(0)
6210 	{
6211 	}
6212 
unspecified_bool_xml_text(xml_text ***)6213 	PUGI__FN static void unspecified_bool_xml_text(xml_text***)
6214 	{
6215 	}
6216 
operator xml_text::unspecified_bool_type() const6217 	PUGI__FN xml_text::operator xml_text::unspecified_bool_type() const
6218 	{
6219 		return _data() ? unspecified_bool_xml_text : 0;
6220 	}
6221 
operator !() const6222 	PUGI__FN bool xml_text::operator!() const
6223 	{
6224 		return !_data();
6225 	}
6226 
empty() const6227 	PUGI__FN bool xml_text::empty() const
6228 	{
6229 		return _data() == 0;
6230 	}
6231 
get() const6232 	PUGI__FN const char_t* xml_text::get() const
6233 	{
6234 		xml_node_struct* d = _data();
6235 
6236 		return (d && d->value) ? d->value + 0 : PUGIXML_TEXT("");
6237 	}
6238 
as_string(const char_t * def) const6239 	PUGI__FN const char_t* xml_text::as_string(const char_t* def) const
6240 	{
6241 		xml_node_struct* d = _data();
6242 
6243 		return (d && d->value) ? d->value + 0 : def;
6244 	}
6245 
as_int(int def) const6246 	PUGI__FN int xml_text::as_int(int def) const
6247 	{
6248 		xml_node_struct* d = _data();
6249 
6250 		return (d && d->value) ? impl::get_value_int(d->value) : def;
6251 	}
6252 
as_uint(unsigned int def) const6253 	PUGI__FN unsigned int xml_text::as_uint(unsigned int def) const
6254 	{
6255 		xml_node_struct* d = _data();
6256 
6257 		return (d && d->value) ? impl::get_value_uint(d->value) : def;
6258 	}
6259 
as_double(double def) const6260 	PUGI__FN double xml_text::as_double(double def) const
6261 	{
6262 		xml_node_struct* d = _data();
6263 
6264 		return (d && d->value) ? impl::get_value_double(d->value) : def;
6265 	}
6266 
as_float(float def) const6267 	PUGI__FN float xml_text::as_float(float def) const
6268 	{
6269 		xml_node_struct* d = _data();
6270 
6271 		return (d && d->value) ? impl::get_value_float(d->value) : def;
6272 	}
6273 
as_bool(bool def) const6274 	PUGI__FN bool xml_text::as_bool(bool def) const
6275 	{
6276 		xml_node_struct* d = _data();
6277 
6278 		return (d && d->value) ? impl::get_value_bool(d->value) : def;
6279 	}
6280 
6281 #ifdef PUGIXML_HAS_LONG_LONG
as_llong(long long def) const6282 	PUGI__FN long long xml_text::as_llong(long long def) const
6283 	{
6284 		xml_node_struct* d = _data();
6285 
6286 		return (d && d->value) ? impl::get_value_llong(d->value) : def;
6287 	}
6288 
as_ullong(unsigned long long def) const6289 	PUGI__FN unsigned long long xml_text::as_ullong(unsigned long long def) const
6290 	{
6291 		xml_node_struct* d = _data();
6292 
6293 		return (d && d->value) ? impl::get_value_ullong(d->value) : def;
6294 	}
6295 #endif
6296 
set(const char_t * rhs)6297 	PUGI__FN bool xml_text::set(const char_t* rhs)
6298 	{
6299 		xml_node_struct* dn = _data_new();
6300 
6301 		return dn ? impl::strcpy_insitu(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, impl::strlength(rhs)) : false;
6302 	}
6303 
set(int rhs)6304 	PUGI__FN bool xml_text::set(int rhs)
6305 	{
6306 		xml_node_struct* dn = _data_new();
6307 
6308 		return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false;
6309 	}
6310 
set(unsigned int rhs)6311 	PUGI__FN bool xml_text::set(unsigned int rhs)
6312 	{
6313 		xml_node_struct* dn = _data_new();
6314 
6315 		return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false;
6316 	}
6317 
set(float rhs)6318 	PUGI__FN bool xml_text::set(float rhs)
6319 	{
6320 		xml_node_struct* dn = _data_new();
6321 
6322 		return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false;
6323 	}
6324 
set(double rhs)6325 	PUGI__FN bool xml_text::set(double rhs)
6326 	{
6327 		xml_node_struct* dn = _data_new();
6328 
6329 		return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false;
6330 	}
6331 
set(bool rhs)6332 	PUGI__FN bool xml_text::set(bool rhs)
6333 	{
6334 		xml_node_struct* dn = _data_new();
6335 
6336 		return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false;
6337 	}
6338 
6339 #ifdef PUGIXML_HAS_LONG_LONG
set(long long rhs)6340 	PUGI__FN bool xml_text::set(long long rhs)
6341 	{
6342 		xml_node_struct* dn = _data_new();
6343 
6344 		return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false;
6345 	}
6346 
set(unsigned long long rhs)6347 	PUGI__FN bool xml_text::set(unsigned long long rhs)
6348 	{
6349 		xml_node_struct* dn = _data_new();
6350 
6351 		return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false;
6352 	}
6353 #endif
6354 
operator =(const char_t * rhs)6355 	PUGI__FN xml_text& xml_text::operator=(const char_t* rhs)
6356 	{
6357 		set(rhs);
6358 		return *this;
6359 	}
6360 
operator =(int rhs)6361 	PUGI__FN xml_text& xml_text::operator=(int rhs)
6362 	{
6363 		set(rhs);
6364 		return *this;
6365 	}
6366 
operator =(unsigned int rhs)6367 	PUGI__FN xml_text& xml_text::operator=(unsigned int rhs)
6368 	{
6369 		set(rhs);
6370 		return *this;
6371 	}
6372 
operator =(double rhs)6373 	PUGI__FN xml_text& xml_text::operator=(double rhs)
6374 	{
6375 		set(rhs);
6376 		return *this;
6377 	}
6378 
operator =(float rhs)6379 	PUGI__FN xml_text& xml_text::operator=(float rhs)
6380 	{
6381 		set(rhs);
6382 		return *this;
6383 	}
6384 
operator =(bool rhs)6385 	PUGI__FN xml_text& xml_text::operator=(bool rhs)
6386 	{
6387 		set(rhs);
6388 		return *this;
6389 	}
6390 
6391 #ifdef PUGIXML_HAS_LONG_LONG
operator =(long long rhs)6392 	PUGI__FN xml_text& xml_text::operator=(long long rhs)
6393 	{
6394 		set(rhs);
6395 		return *this;
6396 	}
6397 
operator =(unsigned long long rhs)6398 	PUGI__FN xml_text& xml_text::operator=(unsigned long long rhs)
6399 	{
6400 		set(rhs);
6401 		return *this;
6402 	}
6403 #endif
6404 
data() const6405 	PUGI__FN xml_node xml_text::data() const
6406 	{
6407 		return xml_node(_data());
6408 	}
6409 
6410 #ifdef __BORLANDC__
operator &&(const xml_text & lhs,bool rhs)6411 	PUGI__FN bool operator&&(const xml_text& lhs, bool rhs)
6412 	{
6413 		return (bool)lhs && rhs;
6414 	}
6415 
operator ||(const xml_text & lhs,bool rhs)6416 	PUGI__FN bool operator||(const xml_text& lhs, bool rhs)
6417 	{
6418 		return (bool)lhs || rhs;
6419 	}
6420 #endif
6421 
xml_node_iterator()6422 	PUGI__FN xml_node_iterator::xml_node_iterator()
6423 	{
6424 	}
6425 
xml_node_iterator(const xml_node & node)6426 	PUGI__FN xml_node_iterator::xml_node_iterator(const xml_node& node): _wrap(node), _parent(node.parent())
6427 	{
6428 	}
6429 
xml_node_iterator(xml_node_struct * ref,xml_node_struct * parent)6430 	PUGI__FN xml_node_iterator::xml_node_iterator(xml_node_struct* ref, xml_node_struct* parent): _wrap(ref), _parent(parent)
6431 	{
6432 	}
6433 
operator ==(const xml_node_iterator & rhs) const6434 	PUGI__FN bool xml_node_iterator::operator==(const xml_node_iterator& rhs) const
6435 	{
6436 		return _wrap._root == rhs._wrap._root && _parent._root == rhs._parent._root;
6437 	}
6438 
operator !=(const xml_node_iterator & rhs) const6439 	PUGI__FN bool xml_node_iterator::operator!=(const xml_node_iterator& rhs) const
6440 	{
6441 		return _wrap._root != rhs._wrap._root || _parent._root != rhs._parent._root;
6442 	}
6443 
operator *() const6444 	PUGI__FN xml_node& xml_node_iterator::operator*() const
6445 	{
6446 		assert(_wrap._root);
6447 		return _wrap;
6448 	}
6449 
operator ->() const6450 	PUGI__FN xml_node* xml_node_iterator::operator->() const
6451 	{
6452 		assert(_wrap._root);
6453 		return const_cast<xml_node*>(&_wrap); // BCC32 workaround
6454 	}
6455 
operator ++()6456 	PUGI__FN const xml_node_iterator& xml_node_iterator::operator++()
6457 	{
6458 		assert(_wrap._root);
6459 		_wrap._root = _wrap._root->next_sibling;
6460 		return *this;
6461 	}
6462 
operator ++(int)6463 	PUGI__FN xml_node_iterator xml_node_iterator::operator++(int)
6464 	{
6465 		xml_node_iterator temp = *this;
6466 		++*this;
6467 		return temp;
6468 	}
6469 
operator --()6470 	PUGI__FN const xml_node_iterator& xml_node_iterator::operator--()
6471 	{
6472 		_wrap = _wrap._root ? _wrap.previous_sibling() : _parent.last_child();
6473 		return *this;
6474 	}
6475 
operator --(int)6476 	PUGI__FN xml_node_iterator xml_node_iterator::operator--(int)
6477 	{
6478 		xml_node_iterator temp = *this;
6479 		--*this;
6480 		return temp;
6481 	}
6482 
xml_attribute_iterator()6483 	PUGI__FN xml_attribute_iterator::xml_attribute_iterator()
6484 	{
6485 	}
6486 
xml_attribute_iterator(const xml_attribute & attr,const xml_node & parent)6487 	PUGI__FN xml_attribute_iterator::xml_attribute_iterator(const xml_attribute& attr, const xml_node& parent): _wrap(attr), _parent(parent)
6488 	{
6489 	}
6490 
xml_attribute_iterator(xml_attribute_struct * ref,xml_node_struct * parent)6491 	PUGI__FN xml_attribute_iterator::xml_attribute_iterator(xml_attribute_struct* ref, xml_node_struct* parent): _wrap(ref), _parent(parent)
6492 	{
6493 	}
6494 
operator ==(const xml_attribute_iterator & rhs) const6495 	PUGI__FN bool xml_attribute_iterator::operator==(const xml_attribute_iterator& rhs) const
6496 	{
6497 		return _wrap._attr == rhs._wrap._attr && _parent._root == rhs._parent._root;
6498 	}
6499 
operator !=(const xml_attribute_iterator & rhs) const6500 	PUGI__FN bool xml_attribute_iterator::operator!=(const xml_attribute_iterator& rhs) const
6501 	{
6502 		return _wrap._attr != rhs._wrap._attr || _parent._root != rhs._parent._root;
6503 	}
6504 
operator *() const6505 	PUGI__FN xml_attribute& xml_attribute_iterator::operator*() const
6506 	{
6507 		assert(_wrap._attr);
6508 		return _wrap;
6509 	}
6510 
operator ->() const6511 	PUGI__FN xml_attribute* xml_attribute_iterator::operator->() const
6512 	{
6513 		assert(_wrap._attr);
6514 		return const_cast<xml_attribute*>(&_wrap); // BCC32 workaround
6515 	}
6516 
operator ++()6517 	PUGI__FN const xml_attribute_iterator& xml_attribute_iterator::operator++()
6518 	{
6519 		assert(_wrap._attr);
6520 		_wrap._attr = _wrap._attr->next_attribute;
6521 		return *this;
6522 	}
6523 
operator ++(int)6524 	PUGI__FN xml_attribute_iterator xml_attribute_iterator::operator++(int)
6525 	{
6526 		xml_attribute_iterator temp = *this;
6527 		++*this;
6528 		return temp;
6529 	}
6530 
operator --()6531 	PUGI__FN const xml_attribute_iterator& xml_attribute_iterator::operator--()
6532 	{
6533 		_wrap = _wrap._attr ? _wrap.previous_attribute() : _parent.last_attribute();
6534 		return *this;
6535 	}
6536 
operator --(int)6537 	PUGI__FN xml_attribute_iterator xml_attribute_iterator::operator--(int)
6538 	{
6539 		xml_attribute_iterator temp = *this;
6540 		--*this;
6541 		return temp;
6542 	}
6543 
xml_named_node_iterator()6544 	PUGI__FN xml_named_node_iterator::xml_named_node_iterator(): _name(0)
6545 	{
6546 	}
6547 
xml_named_node_iterator(const xml_node & node,const char_t * name)6548 	PUGI__FN xml_named_node_iterator::xml_named_node_iterator(const xml_node& node, const char_t* name): _wrap(node), _parent(node.parent()), _name(name)
6549 	{
6550 	}
6551 
xml_named_node_iterator(xml_node_struct * ref,xml_node_struct * parent,const char_t * name)6552 	PUGI__FN xml_named_node_iterator::xml_named_node_iterator(xml_node_struct* ref, xml_node_struct* parent, const char_t* name): _wrap(ref), _parent(parent), _name(name)
6553 	{
6554 	}
6555 
operator ==(const xml_named_node_iterator & rhs) const6556 	PUGI__FN bool xml_named_node_iterator::operator==(const xml_named_node_iterator& rhs) const
6557 	{
6558 		return _wrap._root == rhs._wrap._root && _parent._root == rhs._parent._root;
6559 	}
6560 
operator !=(const xml_named_node_iterator & rhs) const6561 	PUGI__FN bool xml_named_node_iterator::operator!=(const xml_named_node_iterator& rhs) const
6562 	{
6563 		return _wrap._root != rhs._wrap._root || _parent._root != rhs._parent._root;
6564 	}
6565 
operator *() const6566 	PUGI__FN xml_node& xml_named_node_iterator::operator*() const
6567 	{
6568 		assert(_wrap._root);
6569 		return _wrap;
6570 	}
6571 
operator ->() const6572 	PUGI__FN xml_node* xml_named_node_iterator::operator->() const
6573 	{
6574 		assert(_wrap._root);
6575 		return const_cast<xml_node*>(&_wrap); // BCC32 workaround
6576 	}
6577 
operator ++()6578 	PUGI__FN const xml_named_node_iterator& xml_named_node_iterator::operator++()
6579 	{
6580 		assert(_wrap._root);
6581 		_wrap = _wrap.next_sibling(_name);
6582 		return *this;
6583 	}
6584 
operator ++(int)6585 	PUGI__FN xml_named_node_iterator xml_named_node_iterator::operator++(int)
6586 	{
6587 		xml_named_node_iterator temp = *this;
6588 		++*this;
6589 		return temp;
6590 	}
6591 
operator --()6592 	PUGI__FN const xml_named_node_iterator& xml_named_node_iterator::operator--()
6593 	{
6594 		if (_wrap._root)
6595 			_wrap = _wrap.previous_sibling(_name);
6596 		else
6597 		{
6598 			_wrap = _parent.last_child();
6599 
6600 			if (!impl::strequal(_wrap.name(), _name))
6601 				_wrap = _wrap.previous_sibling(_name);
6602 		}
6603 
6604 		return *this;
6605 	}
6606 
operator --(int)6607 	PUGI__FN xml_named_node_iterator xml_named_node_iterator::operator--(int)
6608 	{
6609 		xml_named_node_iterator temp = *this;
6610 		--*this;
6611 		return temp;
6612 	}
6613 
xml_parse_result()6614 	PUGI__FN xml_parse_result::xml_parse_result(): status(status_internal_error), offset(0), encoding(encoding_auto)
6615 	{
6616 	}
6617 
operator bool() const6618 	PUGI__FN xml_parse_result::operator bool() const
6619 	{
6620 		return status == status_ok;
6621 	}
6622 
description() const6623 	PUGI__FN const char* xml_parse_result::description() const
6624 	{
6625 		switch (status)
6626 		{
6627 		case status_ok: return "No error";
6628 
6629 		case status_file_not_found: return "File was not found";
6630 		case status_io_error: return "Error reading from file/stream";
6631 		case status_out_of_memory: return "Could not allocate memory";
6632 		case status_internal_error: return "Internal error occurred";
6633 
6634 		case status_unrecognized_tag: return "Could not determine tag type";
6635 
6636 		case status_bad_pi: return "Error parsing document declaration/processing instruction";
6637 		case status_bad_comment: return "Error parsing comment";
6638 		case status_bad_cdata: return "Error parsing CDATA section";
6639 		case status_bad_doctype: return "Error parsing document type declaration";
6640 		case status_bad_pcdata: return "Error parsing PCDATA section";
6641 		case status_bad_start_element: return "Error parsing start element tag";
6642 		case status_bad_attribute: return "Error parsing element attribute";
6643 		case status_bad_end_element: return "Error parsing end element tag";
6644 		case status_end_element_mismatch: return "Start-end tags mismatch";
6645 
6646 		case status_append_invalid_root: return "Unable to append nodes: root is not an element or document";
6647 
6648 		case status_no_document_element: return "No document element found";
6649 
6650 		default: return "Unknown error";
6651 		}
6652 	}
6653 
xml_document()6654 	PUGI__FN xml_document::xml_document(): _buffer(0)
6655 	{
6656 		create();
6657 	}
6658 
~xml_document()6659 	PUGI__FN xml_document::~xml_document()
6660 	{
6661 		destroy();
6662 	}
6663 
reset()6664 	PUGI__FN void xml_document::reset()
6665 	{
6666 		destroy();
6667 		create();
6668 	}
6669 
reset(const xml_document & proto)6670 	PUGI__FN void xml_document::reset(const xml_document& proto)
6671 	{
6672 		reset();
6673 
6674 		for (xml_node cur = proto.first_child(); cur; cur = cur.next_sibling())
6675 			append_copy(cur);
6676 	}
6677 
create()6678 	PUGI__FN void xml_document::create()
6679 	{
6680 		assert(!_root);
6681 
6682 	#ifdef PUGIXML_COMPACT
6683 		const size_t page_offset = sizeof(uint32_t);
6684 	#else
6685 		const size_t page_offset = 0;
6686 	#endif
6687 
6688 		// initialize sentinel page
6689 		PUGI__STATIC_ASSERT(sizeof(impl::xml_memory_page) + sizeof(impl::xml_document_struct) + impl::xml_memory_page_alignment - sizeof(void*) + page_offset <= sizeof(_memory));
6690 
6691 		// align upwards to page boundary
6692 		void* page_memory = reinterpret_cast<void*>((reinterpret_cast<uintptr_t>(_memory) + (impl::xml_memory_page_alignment - 1)) & ~(impl::xml_memory_page_alignment - 1));
6693 
6694 		// prepare page structure
6695 		impl::xml_memory_page* page = impl::xml_memory_page::construct(page_memory);
6696 		assert(page);
6697 
6698 		page->busy_size = impl::xml_memory_page_size;
6699 
6700 		// setup first page marker
6701 	#ifdef PUGIXML_COMPACT
6702 		// round-trip through void* to avoid 'cast increases required alignment of target type' warning
6703 		page->compact_page_marker = reinterpret_cast<uint32_t*>(static_cast<void*>(reinterpret_cast<char*>(page) + sizeof(impl::xml_memory_page)));
6704 		*page->compact_page_marker = sizeof(impl::xml_memory_page);
6705 	#endif
6706 
6707 		// allocate new root
6708 		_root = new (reinterpret_cast<char*>(page) + sizeof(impl::xml_memory_page) + page_offset) impl::xml_document_struct(page);
6709 		_root->prev_sibling_c = _root;
6710 
6711 		// setup sentinel page
6712 		page->allocator = static_cast<impl::xml_document_struct*>(_root);
6713 
6714 		// verify the document allocation
6715 		assert(reinterpret_cast<char*>(_root) + sizeof(impl::xml_document_struct) <= _memory + sizeof(_memory));
6716 	}
6717 
destroy()6718 	PUGI__FN void xml_document::destroy()
6719 	{
6720 		assert(_root);
6721 
6722 		// destroy static storage
6723 		if (_buffer)
6724 		{
6725 			impl::xml_memory::deallocate(_buffer);
6726 			_buffer = 0;
6727 		}
6728 
6729 		// destroy extra buffers (note: no need to destroy linked list nodes, they're allocated using document allocator)
6730 		for (impl::xml_extra_buffer* extra = static_cast<impl::xml_document_struct*>(_root)->extra_buffers; extra; extra = extra->next)
6731 		{
6732 			if (extra->buffer) impl::xml_memory::deallocate(extra->buffer);
6733 		}
6734 
6735 		// destroy dynamic storage, leave sentinel page (it's in static memory)
6736 		impl::xml_memory_page* root_page = PUGI__GETPAGE(_root);
6737 		assert(root_page && !root_page->prev);
6738 		assert(reinterpret_cast<char*>(root_page) >= _memory && reinterpret_cast<char*>(root_page) < _memory + sizeof(_memory));
6739 
6740 		for (impl::xml_memory_page* page = root_page->next; page; )
6741 		{
6742 			impl::xml_memory_page* next = page->next;
6743 
6744 			impl::xml_allocator::deallocate_page(page);
6745 
6746 			page = next;
6747 		}
6748 
6749 	#ifdef PUGIXML_COMPACT
6750 		// destroy hash table
6751 		static_cast<impl::xml_document_struct*>(_root)->hash.clear();
6752 	#endif
6753 
6754 		_root = 0;
6755 	}
6756 
6757 #ifndef PUGIXML_NO_STL
load(std::basic_istream<char,std::char_traits<char>> & stream,unsigned int options,xml_encoding encoding)6758 	PUGI__FN xml_parse_result xml_document::load(std::basic_istream<char, std::char_traits<char> >& stream, unsigned int options, xml_encoding encoding)
6759 	{
6760 		reset();
6761 
6762 		return impl::load_stream_impl(static_cast<impl::xml_document_struct*>(_root), stream, options, encoding, &_buffer);
6763 	}
6764 
load(std::basic_istream<wchar_t,std::char_traits<wchar_t>> & stream,unsigned int options)6765 	PUGI__FN xml_parse_result xml_document::load(std::basic_istream<wchar_t, std::char_traits<wchar_t> >& stream, unsigned int options)
6766 	{
6767 		reset();
6768 
6769 		return impl::load_stream_impl(static_cast<impl::xml_document_struct*>(_root), stream, options, encoding_wchar, &_buffer);
6770 	}
6771 #endif
6772 
load_string(const char_t * contents,unsigned int options)6773 	PUGI__FN xml_parse_result xml_document::load_string(const char_t* contents, unsigned int options)
6774 	{
6775 		// Force native encoding (skip autodetection)
6776 	#ifdef PUGIXML_WCHAR_MODE
6777 		xml_encoding encoding = encoding_wchar;
6778 	#else
6779 		xml_encoding encoding = encoding_utf8;
6780 	#endif
6781 
6782 		return load_buffer(contents, impl::strlength(contents) * sizeof(char_t), options, encoding);
6783 	}
6784 
load(const char_t * contents,unsigned int options)6785 	PUGI__FN xml_parse_result xml_document::load(const char_t* contents, unsigned int options)
6786 	{
6787 		return load_string(contents, options);
6788 	}
6789 
load_file(const char * path_,unsigned int options,xml_encoding encoding)6790 	PUGI__FN xml_parse_result xml_document::load_file(const char* path_, unsigned int options, xml_encoding encoding)
6791 	{
6792 		reset();
6793 
6794 		using impl::auto_deleter; // MSVC7 workaround
6795 		auto_deleter<FILE, int(*)(FILE*)> file(fopen(path_, "rb"), fclose);
6796 
6797 		return impl::load_file_impl(static_cast<impl::xml_document_struct*>(_root), file.data, options, encoding, &_buffer);
6798 	}
6799 
load_file(const wchar_t * path_,unsigned int options,xml_encoding encoding)6800 	PUGI__FN xml_parse_result xml_document::load_file(const wchar_t* path_, unsigned int options, xml_encoding encoding)
6801 	{
6802 		reset();
6803 
6804 		using impl::auto_deleter; // MSVC7 workaround
6805 		auto_deleter<FILE, int(*)(FILE*)> file(impl::open_file_wide(path_, L"rb"), fclose);
6806 
6807 		return impl::load_file_impl(static_cast<impl::xml_document_struct*>(_root), file.data, options, encoding, &_buffer);
6808 	}
6809 
load_buffer(const void * contents,size_t size,unsigned int options,xml_encoding encoding)6810 	PUGI__FN xml_parse_result xml_document::load_buffer(const void* contents, size_t size, unsigned int options, xml_encoding encoding)
6811 	{
6812 		reset();
6813 
6814 		return impl::load_buffer_impl(static_cast<impl::xml_document_struct*>(_root), _root, const_cast<void*>(contents), size, options, encoding, false, false, &_buffer);
6815 	}
6816 
load_buffer_inplace(void * contents,size_t size,unsigned int options,xml_encoding encoding)6817 	PUGI__FN xml_parse_result xml_document::load_buffer_inplace(void* contents, size_t size, unsigned int options, xml_encoding encoding)
6818 	{
6819 		reset();
6820 
6821 		return impl::load_buffer_impl(static_cast<impl::xml_document_struct*>(_root), _root, contents, size, options, encoding, true, false, &_buffer);
6822 	}
6823 
load_buffer_inplace_own(void * contents,size_t size,unsigned int options,xml_encoding encoding)6824 	PUGI__FN xml_parse_result xml_document::load_buffer_inplace_own(void* contents, size_t size, unsigned int options, xml_encoding encoding)
6825 	{
6826 		reset();
6827 
6828 		return impl::load_buffer_impl(static_cast<impl::xml_document_struct*>(_root), _root, contents, size, options, encoding, true, true, &_buffer);
6829 	}
6830 
save(xml_writer & writer,const char_t * indent,unsigned int flags,xml_encoding encoding) const6831 	PUGI__FN void xml_document::save(xml_writer& writer, const char_t* indent, unsigned int flags, xml_encoding encoding) const
6832 	{
6833 		impl::xml_buffered_writer buffered_writer(writer, encoding);
6834 
6835 		if ((flags & format_write_bom) && encoding != encoding_latin1)
6836 		{
6837 			// BOM always represents the codepoint U+FEFF, so just write it in native encoding
6838 		#ifdef PUGIXML_WCHAR_MODE
6839 			unsigned int bom = 0xfeff;
6840 			buffered_writer.write(static_cast<wchar_t>(bom));
6841 		#else
6842 			buffered_writer.write('\xef', '\xbb', '\xbf');
6843 		#endif
6844 		}
6845 
6846 		if (!(flags & format_no_declaration) && !impl::has_declaration(_root))
6847 		{
6848 			buffered_writer.write_string(PUGIXML_TEXT("<?xml version=\"1.0\""));
6849 			if (encoding == encoding_latin1) buffered_writer.write_string(PUGIXML_TEXT(" encoding=\"ISO-8859-1\""));
6850 			buffered_writer.write('?', '>');
6851 			if (!(flags & format_raw)) buffered_writer.write('\n');
6852 		}
6853 
6854 		impl::node_output(buffered_writer, _root, indent, flags, 0);
6855 
6856 		buffered_writer.flush();
6857 	}
6858 
6859 #ifndef PUGIXML_NO_STL
save(std::basic_ostream<char,std::char_traits<char>> & stream,const char_t * indent,unsigned int flags,xml_encoding encoding) const6860 	PUGI__FN void xml_document::save(std::basic_ostream<char, std::char_traits<char> >& stream, const char_t* indent, unsigned int flags, xml_encoding encoding) const
6861 	{
6862 		xml_writer_stream writer(stream);
6863 
6864 		save(writer, indent, flags, encoding);
6865 	}
6866 
save(std::basic_ostream<wchar_t,std::char_traits<wchar_t>> & stream,const char_t * indent,unsigned int flags) const6867 	PUGI__FN void xml_document::save(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream, const char_t* indent, unsigned int flags) const
6868 	{
6869 		xml_writer_stream writer(stream);
6870 
6871 		save(writer, indent, flags, encoding_wchar);
6872 	}
6873 #endif
6874 
save_file(const char * path_,const char_t * indent,unsigned int flags,xml_encoding encoding) const6875 	PUGI__FN bool xml_document::save_file(const char* path_, const char_t* indent, unsigned int flags, xml_encoding encoding) const
6876 	{
6877 		using impl::auto_deleter; // MSVC7 workaround
6878 		auto_deleter<FILE, int(*)(FILE*)> file(fopen(path_, (flags & format_save_file_text) ? "w" : "wb"), fclose);
6879 
6880 		return impl::save_file_impl(*this, file.data, indent, flags, encoding);
6881 	}
6882 
save_file(const wchar_t * path_,const char_t * indent,unsigned int flags,xml_encoding encoding) const6883 	PUGI__FN bool xml_document::save_file(const wchar_t* path_, const char_t* indent, unsigned int flags, xml_encoding encoding) const
6884 	{
6885 		using impl::auto_deleter; // MSVC7 workaround
6886 		auto_deleter<FILE, int(*)(FILE*)> file(impl::open_file_wide(path_, (flags & format_save_file_text) ? L"w" : L"wb"), fclose);
6887 
6888 		return impl::save_file_impl(*this, file.data, indent, flags, encoding);
6889 	}
6890 
document_element() const6891 	PUGI__FN xml_node xml_document::document_element() const
6892 	{
6893 		assert(_root);
6894 
6895 		for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
6896 			if (PUGI__NODETYPE(i) == node_element)
6897 				return xml_node(i);
6898 
6899 		return xml_node();
6900 	}
6901 
6902 #ifndef PUGIXML_NO_STL
as_utf8(const wchar_t * str)6903 	PUGI__FN std::string PUGIXML_FUNCTION as_utf8(const wchar_t* str)
6904 	{
6905 		assert(str);
6906 
6907 		return impl::as_utf8_impl(str, impl::strlength_wide(str));
6908 	}
6909 
as_utf8(const std::basic_string<wchar_t> & str)6910 	PUGI__FN std::string PUGIXML_FUNCTION as_utf8(const std::basic_string<wchar_t>& str)
6911 	{
6912 		return impl::as_utf8_impl(str.c_str(), str.size());
6913 	}
6914 
as_wide(const char * str)6915 	PUGI__FN std::basic_string<wchar_t> PUGIXML_FUNCTION as_wide(const char* str)
6916 	{
6917 		assert(str);
6918 
6919 		return impl::as_wide_impl(str, strlen(str));
6920 	}
6921 
as_wide(const std::string & str)6922 	PUGI__FN std::basic_string<wchar_t> PUGIXML_FUNCTION as_wide(const std::string& str)
6923 	{
6924 		return impl::as_wide_impl(str.c_str(), str.size());
6925 	}
6926 #endif
6927 
set_memory_management_functions(allocation_function allocate,deallocation_function deallocate)6928 	PUGI__FN void PUGIXML_FUNCTION set_memory_management_functions(allocation_function allocate, deallocation_function deallocate)
6929 	{
6930 		impl::xml_memory::allocate = allocate;
6931 		impl::xml_memory::deallocate = deallocate;
6932 	}
6933 
get_memory_allocation_function()6934 	PUGI__FN allocation_function PUGIXML_FUNCTION get_memory_allocation_function()
6935 	{
6936 		return impl::xml_memory::allocate;
6937 	}
6938 
get_memory_deallocation_function()6939 	PUGI__FN deallocation_function PUGIXML_FUNCTION get_memory_deallocation_function()
6940 	{
6941 		return impl::xml_memory::deallocate;
6942 	}
6943 }
6944 
6945 #if !defined(PUGIXML_NO_STL) && (defined(_MSC_VER) || defined(__ICC))
6946 namespace std
6947 {
6948 	// Workarounds for (non-standard) iterator category detection for older versions (MSVC7/IC8 and earlier)
_Iter_cat(const pugi::xml_node_iterator &)6949 	PUGI__FN std::bidirectional_iterator_tag _Iter_cat(const pugi::xml_node_iterator&)
6950 	{
6951 		return std::bidirectional_iterator_tag();
6952 	}
6953 
_Iter_cat(const pugi::xml_attribute_iterator &)6954 	PUGI__FN std::bidirectional_iterator_tag _Iter_cat(const pugi::xml_attribute_iterator&)
6955 	{
6956 		return std::bidirectional_iterator_tag();
6957 	}
6958 
_Iter_cat(const pugi::xml_named_node_iterator &)6959 	PUGI__FN std::bidirectional_iterator_tag _Iter_cat(const pugi::xml_named_node_iterator&)
6960 	{
6961 		return std::bidirectional_iterator_tag();
6962 	}
6963 }
6964 #endif
6965 
6966 #if !defined(PUGIXML_NO_STL) && defined(__SUNPRO_CC)
6967 namespace std
6968 {
6969 	// Workarounds for (non-standard) iterator category detection
__iterator_category(const pugi::xml_node_iterator &)6970 	PUGI__FN std::bidirectional_iterator_tag __iterator_category(const pugi::xml_node_iterator&)
6971 	{
6972 		return std::bidirectional_iterator_tag();
6973 	}
6974 
__iterator_category(const pugi::xml_attribute_iterator &)6975 	PUGI__FN std::bidirectional_iterator_tag __iterator_category(const pugi::xml_attribute_iterator&)
6976 	{
6977 		return std::bidirectional_iterator_tag();
6978 	}
6979 
__iterator_category(const pugi::xml_named_node_iterator &)6980 	PUGI__FN std::bidirectional_iterator_tag __iterator_category(const pugi::xml_named_node_iterator&)
6981 	{
6982 		return std::bidirectional_iterator_tag();
6983 	}
6984 }
6985 #endif
6986 
6987 #ifndef PUGIXML_NO_XPATH
6988 // STL replacements
6989 PUGI__NS_BEGIN
6990 	struct equal_to
6991 	{
operator ()equal_to6992 		template <typename T> bool operator()(const T& lhs, const T& rhs) const
6993 		{
6994 			return lhs == rhs;
6995 		}
6996 	};
6997 
6998 	struct not_equal_to
6999 	{
operator ()not_equal_to7000 		template <typename T> bool operator()(const T& lhs, const T& rhs) const
7001 		{
7002 			return lhs != rhs;
7003 		}
7004 	};
7005 
7006 	struct less
7007 	{
operator ()less7008 		template <typename T> bool operator()(const T& lhs, const T& rhs) const
7009 		{
7010 			return lhs < rhs;
7011 		}
7012 	};
7013 
7014 	struct less_equal
7015 	{
operator ()less_equal7016 		template <typename T> bool operator()(const T& lhs, const T& rhs) const
7017 		{
7018 			return lhs <= rhs;
7019 		}
7020 	};
7021 
swap(T & lhs,T & rhs)7022 	template <typename T> void swap(T& lhs, T& rhs)
7023 	{
7024 		T temp = lhs;
7025 		lhs = rhs;
7026 		rhs = temp;
7027 	}
7028 
7029 	template <typename I, typename Pred> I min_element(I begin, I end, const Pred& pred)
7030 	{
7031 		I result = begin;
7032 
7033 		for (I it = begin + 1; it != end; ++it)
7034 			if (pred(*it, *result))
7035 				result = it;
7036 
7037 		return result;
7038 	}
7039 
reverse(I begin,I end)7040 	template <typename I> void reverse(I begin, I end)
7041 	{
7042 		while (end - begin > 1) swap(*begin++, *--end);
7043 	}
7044 
unique(I begin,I end)7045 	template <typename I> I unique(I begin, I end)
7046 	{
7047 		// fast skip head
7048 		while (end - begin > 1 && *begin != *(begin + 1)) begin++;
7049 
7050 		if (begin == end) return begin;
7051 
7052 		// last written element
7053 		I write = begin++;
7054 
7055 		// merge unique elements
7056 		while (begin != end)
7057 		{
7058 			if (*begin != *write)
7059 				*++write = *begin++;
7060 			else
7061 				begin++;
7062 		}
7063 
7064 		// past-the-end (write points to live element)
7065 		return write + 1;
7066 	}
7067 
copy_backwards(I begin,I end,I target)7068 	template <typename I> void copy_backwards(I begin, I end, I target)
7069 	{
7070 		while (begin != end) *--target = *--end;
7071 	}
7072 
insertion_sort(I begin,I end,const Pred & pred,T *)7073 	template <typename I, typename Pred, typename T> void insertion_sort(I begin, I end, const Pred& pred, T*)
7074 	{
7075 		assert(begin != end);
7076 
7077 		for (I it = begin + 1; it != end; ++it)
7078 		{
7079 			T val = *it;
7080 
7081 			if (pred(val, *begin))
7082 			{
7083 				// move to front
7084 				copy_backwards(begin, it, it + 1);
7085 				*begin = val;
7086 			}
7087 			else
7088 			{
7089 				I hole = it;
7090 
7091 				// move hole backwards
7092 				while (pred(val, *(hole - 1)))
7093 				{
7094 					*hole = *(hole - 1);
7095 					hole--;
7096 				}
7097 
7098 				// fill hole with element
7099 				*hole = val;
7100 			}
7101 		}
7102 	}
7103 
7104 	// std variant for elements with ==
partition(I begin,I middle,I end,const Pred & pred,I * out_eqbeg,I * out_eqend)7105 	template <typename I, typename Pred> void partition(I begin, I middle, I end, const Pred& pred, I* out_eqbeg, I* out_eqend)
7106 	{
7107 		I eqbeg = middle, eqend = middle + 1;
7108 
7109 		// expand equal range
7110 		while (eqbeg != begin && *(eqbeg - 1) == *eqbeg) --eqbeg;
7111 		while (eqend != end && *eqend == *eqbeg) ++eqend;
7112 
7113 		// process outer elements
7114 		I ltend = eqbeg, gtbeg = eqend;
7115 
7116 		for (;;)
7117 		{
7118 			// find the element from the right side that belongs to the left one
7119 			for (; gtbeg != end; ++gtbeg)
7120 				if (!pred(*eqbeg, *gtbeg))
7121 				{
7122 					if (*gtbeg == *eqbeg) swap(*gtbeg, *eqend++);
7123 					else break;
7124 				}
7125 
7126 			// find the element from the left side that belongs to the right one
7127 			for (; ltend != begin; --ltend)
7128 				if (!pred(*(ltend - 1), *eqbeg))
7129 				{
7130 					if (*eqbeg == *(ltend - 1)) swap(*(ltend - 1), *--eqbeg);
7131 					else break;
7132 				}
7133 
7134 			// scanned all elements
7135 			if (gtbeg == end && ltend == begin)
7136 			{
7137 				*out_eqbeg = eqbeg;
7138 				*out_eqend = eqend;
7139 				return;
7140 			}
7141 
7142 			// make room for elements by moving equal area
7143 			if (gtbeg == end)
7144 			{
7145 				if (--ltend != --eqbeg) swap(*ltend, *eqbeg);
7146 				swap(*eqbeg, *--eqend);
7147 			}
7148 			else if (ltend == begin)
7149 			{
7150 				if (eqend != gtbeg) swap(*eqbeg, *eqend);
7151 				++eqend;
7152 				swap(*gtbeg++, *eqbeg++);
7153 			}
7154 			else swap(*gtbeg++, *--ltend);
7155 		}
7156 	}
7157 
median3(I first,I middle,I last,const Pred & pred)7158 	template <typename I, typename Pred> void median3(I first, I middle, I last, const Pred& pred)
7159 	{
7160 		if (pred(*middle, *first)) swap(*middle, *first);
7161 		if (pred(*last, *middle)) swap(*last, *middle);
7162 		if (pred(*middle, *first)) swap(*middle, *first);
7163 	}
7164 
median(I first,I middle,I last,const Pred & pred)7165 	template <typename I, typename Pred> void median(I first, I middle, I last, const Pred& pred)
7166 	{
7167 		if (last - first <= 40)
7168 		{
7169 			// median of three for small chunks
7170 			median3(first, middle, last, pred);
7171 		}
7172 		else
7173 		{
7174 			// median of nine
7175 			size_t step = (last - first + 1) / 8;
7176 
7177 			median3(first, first + step, first + 2 * step, pred);
7178 			median3(middle - step, middle, middle + step, pred);
7179 			median3(last - 2 * step, last - step, last, pred);
7180 			median3(first + step, middle, last - step, pred);
7181 		}
7182 	}
7183 
sort(I begin,I end,const Pred & pred)7184 	template <typename I, typename Pred> void sort(I begin, I end, const Pred& pred)
7185 	{
7186 		// sort large chunks
7187 		while (end - begin > 32)
7188 		{
7189 			// find median element
7190 			I middle = begin + (end - begin) / 2;
7191 			median(begin, middle, end - 1, pred);
7192 
7193 			// partition in three chunks (< = >)
7194 			I eqbeg, eqend;
7195 			partition(begin, middle, end, pred, &eqbeg, &eqend);
7196 
7197 			// loop on larger half
7198 			if (eqbeg - begin > end - eqend)
7199 			{
7200 				sort(eqend, end, pred);
7201 				end = eqbeg;
7202 			}
7203 			else
7204 			{
7205 				sort(begin, eqbeg, pred);
7206 				begin = eqend;
7207 			}
7208 		}
7209 
7210 		// insertion sort small chunk
7211 		if (begin != end) insertion_sort(begin, end, pred, &*begin);
7212 	}
7213 PUGI__NS_END
7214 
7215 // Allocator used for AST and evaluation stacks
7216 PUGI__NS_BEGIN
7217 	static const size_t xpath_memory_page_size =
7218 	#ifdef PUGIXML_MEMORY_XPATH_PAGE_SIZE
7219 		PUGIXML_MEMORY_XPATH_PAGE_SIZE
7220 	#else
7221 		4096
7222 	#endif
7223 		;
7224 
7225 	static const uintptr_t xpath_memory_block_alignment = sizeof(double) > sizeof(void*) ? sizeof(double) : sizeof(void*);
7226 
7227 	struct xpath_memory_block
7228 	{
7229 		xpath_memory_block* next;
7230 		size_t capacity;
7231 
7232 		union
7233 		{
7234 			char data[xpath_memory_page_size];
7235 			double alignment;
7236 		};
7237 	};
7238 
7239 	class xpath_allocator
7240 	{
7241 		xpath_memory_block* _root;
7242 		size_t _root_size;
7243 
7244 	public:
7245 	#ifdef PUGIXML_NO_EXCEPTIONS
7246 		jmp_buf* error_handler;
7247 	#endif
7248 
xpath_allocator(xpath_memory_block * root,size_t root_size=0)7249 		xpath_allocator(xpath_memory_block* root, size_t root_size = 0): _root(root), _root_size(root_size)
7250 		{
7251 		#ifdef PUGIXML_NO_EXCEPTIONS
7252 			error_handler = 0;
7253 		#endif
7254 		}
7255 
allocate_nothrow(size_t size)7256 		void* allocate_nothrow(size_t size)
7257 		{
7258 			// round size up to block alignment boundary
7259 			size = (size + xpath_memory_block_alignment - 1) & ~(xpath_memory_block_alignment - 1);
7260 
7261 			if (_root_size + size <= _root->capacity)
7262 			{
7263 				void* buf = &_root->data[0] + _root_size;
7264 				_root_size += size;
7265 				return buf;
7266 			}
7267 			else
7268 			{
7269 				// make sure we have at least 1/4th of the page free after allocation to satisfy subsequent allocation requests
7270 				size_t block_capacity_base = sizeof(_root->data);
7271 				size_t block_capacity_req = size + block_capacity_base / 4;
7272 				size_t block_capacity = (block_capacity_base > block_capacity_req) ? block_capacity_base : block_capacity_req;
7273 
7274 				size_t block_size = block_capacity + offsetof(xpath_memory_block, data);
7275 
7276 				xpath_memory_block* block = static_cast<xpath_memory_block*>(xml_memory::allocate(block_size));
7277 				if (!block) return 0;
7278 
7279 				block->next = _root;
7280 				block->capacity = block_capacity;
7281 
7282 				_root = block;
7283 				_root_size = size;
7284 
7285 				return block->data;
7286 			}
7287 		}
7288 
allocate(size_t size)7289 		void* allocate(size_t size)
7290 		{
7291 			void* result = allocate_nothrow(size);
7292 
7293 			if (!result)
7294 			{
7295 			#ifdef PUGIXML_NO_EXCEPTIONS
7296 				assert(error_handler);
7297 				longjmp(*error_handler, 1);
7298 			#else
7299 				throw std::bad_alloc();
7300 			#endif
7301 			}
7302 
7303 			return result;
7304 		}
7305 
reallocate(void * ptr,size_t old_size,size_t new_size)7306 		void* reallocate(void* ptr, size_t old_size, size_t new_size)
7307 		{
7308 			// round size up to block alignment boundary
7309 			old_size = (old_size + xpath_memory_block_alignment - 1) & ~(xpath_memory_block_alignment - 1);
7310 			new_size = (new_size + xpath_memory_block_alignment - 1) & ~(xpath_memory_block_alignment - 1);
7311 
7312 			// we can only reallocate the last object
7313 			assert(ptr == 0 || static_cast<char*>(ptr) + old_size == &_root->data[0] + _root_size);
7314 
7315 			// adjust root size so that we have not allocated the object at all
7316 			bool only_object = (_root_size == old_size);
7317 
7318 			if (ptr) _root_size -= old_size;
7319 
7320 			// allocate a new version (this will obviously reuse the memory if possible)
7321 			void* result = allocate(new_size);
7322 			assert(result);
7323 
7324 			// we have a new block
7325 			if (result != ptr && ptr)
7326 			{
7327 				// copy old data
7328 				assert(new_size >= old_size);
7329 				memcpy(result, ptr, old_size);
7330 
7331 				// free the previous page if it had no other objects
7332 				if (only_object)
7333 				{
7334 					assert(_root->data == result);
7335 					assert(_root->next);
7336 
7337 					xpath_memory_block* next = _root->next->next;
7338 
7339 					if (next)
7340 					{
7341 						// deallocate the whole page, unless it was the first one
7342 						xml_memory::deallocate(_root->next);
7343 						_root->next = next;
7344 					}
7345 				}
7346 			}
7347 
7348 			return result;
7349 		}
7350 
revert(const xpath_allocator & state)7351 		void revert(const xpath_allocator& state)
7352 		{
7353 			// free all new pages
7354 			xpath_memory_block* cur = _root;
7355 
7356 			while (cur != state._root)
7357 			{
7358 				xpath_memory_block* next = cur->next;
7359 
7360 				xml_memory::deallocate(cur);
7361 
7362 				cur = next;
7363 			}
7364 
7365 			// restore state
7366 			_root = state._root;
7367 			_root_size = state._root_size;
7368 		}
7369 
release()7370 		void release()
7371 		{
7372 			xpath_memory_block* cur = _root;
7373 			assert(cur);
7374 
7375 			while (cur->next)
7376 			{
7377 				xpath_memory_block* next = cur->next;
7378 
7379 				xml_memory::deallocate(cur);
7380 
7381 				cur = next;
7382 			}
7383 		}
7384 	};
7385 
7386 	struct xpath_allocator_capture
7387 	{
xpath_allocator_capturexpath_allocator_capture7388 		xpath_allocator_capture(xpath_allocator* alloc): _target(alloc), _state(*alloc)
7389 		{
7390 		}
7391 
~xpath_allocator_capturexpath_allocator_capture7392 		~xpath_allocator_capture()
7393 		{
7394 			_target->revert(_state);
7395 		}
7396 
7397 		xpath_allocator* _target;
7398 		xpath_allocator _state;
7399 	};
7400 
7401 	struct xpath_stack
7402 	{
7403 		xpath_allocator* result;
7404 		xpath_allocator* temp;
7405 	};
7406 
7407 	struct xpath_stack_data
7408 	{
7409 		xpath_memory_block blocks[2];
7410 		xpath_allocator result;
7411 		xpath_allocator temp;
7412 		xpath_stack stack;
7413 
7414 	#ifdef PUGIXML_NO_EXCEPTIONS
7415 		jmp_buf error_handler;
7416 	#endif
7417 
xpath_stack_dataxpath_stack_data7418 		xpath_stack_data(): result(blocks + 0), temp(blocks + 1)
7419 		{
7420 			blocks[0].next = blocks[1].next = 0;
7421 			blocks[0].capacity = blocks[1].capacity = sizeof(blocks[0].data);
7422 
7423 			stack.result = &result;
7424 			stack.temp = &temp;
7425 
7426 		#ifdef PUGIXML_NO_EXCEPTIONS
7427 			result.error_handler = temp.error_handler = &error_handler;
7428 		#endif
7429 		}
7430 
~xpath_stack_dataxpath_stack_data7431 		~xpath_stack_data()
7432 		{
7433 			result.release();
7434 			temp.release();
7435 		}
7436 	};
7437 PUGI__NS_END
7438 
7439 // String class
7440 PUGI__NS_BEGIN
7441 	class xpath_string
7442 	{
7443 		const char_t* _buffer;
7444 		bool _uses_heap;
7445 		size_t _length_heap;
7446 
duplicate_string(const char_t * string,size_t length,xpath_allocator * alloc)7447 		static char_t* duplicate_string(const char_t* string, size_t length, xpath_allocator* alloc)
7448 		{
7449 			char_t* result = static_cast<char_t*>(alloc->allocate((length + 1) * sizeof(char_t)));
7450 			assert(result);
7451 
7452 			memcpy(result, string, length * sizeof(char_t));
7453 			result[length] = 0;
7454 
7455 			return result;
7456 		}
7457 
xpath_string(const char_t * buffer,bool uses_heap_,size_t length_heap)7458 		xpath_string(const char_t* buffer, bool uses_heap_, size_t length_heap): _buffer(buffer), _uses_heap(uses_heap_), _length_heap(length_heap)
7459 		{
7460 		}
7461 
7462 	public:
from_const(const char_t * str)7463 		static xpath_string from_const(const char_t* str)
7464 		{
7465 			return xpath_string(str, false, 0);
7466 		}
7467 
from_heap_preallocated(const char_t * begin,const char_t * end)7468 		static xpath_string from_heap_preallocated(const char_t* begin, const char_t* end)
7469 		{
7470 			assert(begin <= end && *end == 0);
7471 
7472 			return xpath_string(begin, true, static_cast<size_t>(end - begin));
7473 		}
7474 
from_heap(const char_t * begin,const char_t * end,xpath_allocator * alloc)7475 		static xpath_string from_heap(const char_t* begin, const char_t* end, xpath_allocator* alloc)
7476 		{
7477 			assert(begin <= end);
7478 
7479 			size_t length = static_cast<size_t>(end - begin);
7480 
7481 			return length == 0 ? xpath_string() : xpath_string(duplicate_string(begin, length, alloc), true, length);
7482 		}
7483 
xpath_string()7484 		xpath_string(): _buffer(PUGIXML_TEXT("")), _uses_heap(false), _length_heap(0)
7485 		{
7486 		}
7487 
append(const xpath_string & o,xpath_allocator * alloc)7488 		void append(const xpath_string& o, xpath_allocator* alloc)
7489 		{
7490 			// skip empty sources
7491 			if (!*o._buffer) return;
7492 
7493 			// fast append for constant empty target and constant source
7494 			if (!*_buffer && !_uses_heap && !o._uses_heap)
7495 			{
7496 				_buffer = o._buffer;
7497 			}
7498 			else
7499 			{
7500 				// need to make heap copy
7501 				size_t target_length = length();
7502 				size_t source_length = o.length();
7503 				size_t result_length = target_length + source_length;
7504 
7505 				// allocate new buffer
7506 				char_t* result = static_cast<char_t*>(alloc->reallocate(_uses_heap ? const_cast<char_t*>(_buffer) : 0, (target_length + 1) * sizeof(char_t), (result_length + 1) * sizeof(char_t)));
7507 				assert(result);
7508 
7509 				// append first string to the new buffer in case there was no reallocation
7510 				if (!_uses_heap) memcpy(result, _buffer, target_length * sizeof(char_t));
7511 
7512 				// append second string to the new buffer
7513 				memcpy(result + target_length, o._buffer, source_length * sizeof(char_t));
7514 				result[result_length] = 0;
7515 
7516 				// finalize
7517 				_buffer = result;
7518 				_uses_heap = true;
7519 				_length_heap = result_length;
7520 			}
7521 		}
7522 
c_str() const7523 		const char_t* c_str() const
7524 		{
7525 			return _buffer;
7526 		}
7527 
length() const7528 		size_t length() const
7529 		{
7530 			return _uses_heap ? _length_heap : strlength(_buffer);
7531 		}
7532 
data(xpath_allocator * alloc)7533 		char_t* data(xpath_allocator* alloc)
7534 		{
7535 			// make private heap copy
7536 			if (!_uses_heap)
7537 			{
7538 				size_t length_ = strlength(_buffer);
7539 
7540 				_buffer = duplicate_string(_buffer, length_, alloc);
7541 				_uses_heap = true;
7542 				_length_heap = length_;
7543 			}
7544 
7545 			return const_cast<char_t*>(_buffer);
7546 		}
7547 
empty() const7548 		bool empty() const
7549 		{
7550 			return *_buffer == 0;
7551 		}
7552 
operator ==(const xpath_string & o) const7553 		bool operator==(const xpath_string& o) const
7554 		{
7555 			return strequal(_buffer, o._buffer);
7556 		}
7557 
operator !=(const xpath_string & o) const7558 		bool operator!=(const xpath_string& o) const
7559 		{
7560 			return !strequal(_buffer, o._buffer);
7561 		}
7562 
uses_heap() const7563 		bool uses_heap() const
7564 		{
7565 			return _uses_heap;
7566 		}
7567 	};
7568 PUGI__NS_END
7569 
7570 PUGI__NS_BEGIN
starts_with(const char_t * string,const char_t * pattern)7571 	PUGI__FN bool starts_with(const char_t* string, const char_t* pattern)
7572 	{
7573 		while (*pattern && *string == *pattern)
7574 		{
7575 			string++;
7576 			pattern++;
7577 		}
7578 
7579 		return *pattern == 0;
7580 	}
7581 
find_char(const char_t * s,char_t c)7582 	PUGI__FN const char_t* find_char(const char_t* s, char_t c)
7583 	{
7584 	#ifdef PUGIXML_WCHAR_MODE
7585 		return wcschr(s, c);
7586 	#else
7587 		return strchr(s, c);
7588 	#endif
7589 	}
7590 
find_substring(const char_t * s,const char_t * p)7591 	PUGI__FN const char_t* find_substring(const char_t* s, const char_t* p)
7592 	{
7593 	#ifdef PUGIXML_WCHAR_MODE
7594 		// MSVC6 wcsstr bug workaround (if s is empty it always returns 0)
7595 		return (*p == 0) ? s : wcsstr(s, p);
7596 	#else
7597 		return strstr(s, p);
7598 	#endif
7599 	}
7600 
7601 	// Converts symbol to lower case, if it is an ASCII one
tolower_ascii(char_t ch)7602 	PUGI__FN char_t tolower_ascii(char_t ch)
7603 	{
7604 		return static_cast<unsigned int>(ch - 'A') < 26 ? static_cast<char_t>(ch | ' ') : ch;
7605 	}
7606 
string_value(const xpath_node & na,xpath_allocator * alloc)7607 	PUGI__FN xpath_string string_value(const xpath_node& na, xpath_allocator* alloc)
7608 	{
7609 		if (na.attribute())
7610 			return xpath_string::from_const(na.attribute().value());
7611 		else
7612 		{
7613 			xml_node n = na.node();
7614 
7615 			switch (n.type())
7616 			{
7617 			case node_pcdata:
7618 			case node_cdata:
7619 			case node_comment:
7620 			case node_pi:
7621 				return xpath_string::from_const(n.value());
7622 
7623 			case node_document:
7624 			case node_element:
7625 			{
7626 				xpath_string result;
7627 
7628 				xml_node cur = n.first_child();
7629 
7630 				while (cur && cur != n)
7631 				{
7632 					if (cur.type() == node_pcdata || cur.type() == node_cdata)
7633 						result.append(xpath_string::from_const(cur.value()), alloc);
7634 
7635 					if (cur.first_child())
7636 						cur = cur.first_child();
7637 					else if (cur.next_sibling())
7638 						cur = cur.next_sibling();
7639 					else
7640 					{
7641 						while (!cur.next_sibling() && cur != n)
7642 							cur = cur.parent();
7643 
7644 						if (cur != n) cur = cur.next_sibling();
7645 					}
7646 				}
7647 
7648 				return result;
7649 			}
7650 
7651 			default:
7652 				return xpath_string();
7653 			}
7654 		}
7655 	}
7656 
node_is_before_sibling(xml_node_struct * ln,xml_node_struct * rn)7657 	PUGI__FN bool node_is_before_sibling(xml_node_struct* ln, xml_node_struct* rn)
7658 	{
7659 		assert(ln->parent == rn->parent);
7660 
7661 		// there is no common ancestor (the shared parent is null), nodes are from different documents
7662 		if (!ln->parent) return ln < rn;
7663 
7664 		// determine sibling order
7665 		xml_node_struct* ls = ln;
7666 		xml_node_struct* rs = rn;
7667 
7668 		while (ls && rs)
7669 		{
7670 			if (ls == rn) return true;
7671 			if (rs == ln) return false;
7672 
7673 			ls = ls->next_sibling;
7674 			rs = rs->next_sibling;
7675 		}
7676 
7677 		// if rn sibling chain ended ln must be before rn
7678 		return !rs;
7679 	}
7680 
node_is_before(xml_node_struct * ln,xml_node_struct * rn)7681 	PUGI__FN bool node_is_before(xml_node_struct* ln, xml_node_struct* rn)
7682 	{
7683 		// find common ancestor at the same depth, if any
7684 		xml_node_struct* lp = ln;
7685 		xml_node_struct* rp = rn;
7686 
7687 		while (lp && rp && lp->parent != rp->parent)
7688 		{
7689 			lp = lp->parent;
7690 			rp = rp->parent;
7691 		}
7692 
7693 		// parents are the same!
7694 		if (lp && rp) return node_is_before_sibling(lp, rp);
7695 
7696 		// nodes are at different depths, need to normalize heights
7697 		bool left_higher = !lp;
7698 
7699 		while (lp)
7700 		{
7701 			lp = lp->parent;
7702 			ln = ln->parent;
7703 		}
7704 
7705 		while (rp)
7706 		{
7707 			rp = rp->parent;
7708 			rn = rn->parent;
7709 		}
7710 
7711 		// one node is the ancestor of the other
7712 		if (ln == rn) return left_higher;
7713 
7714 		// find common ancestor... again
7715 		while (ln->parent != rn->parent)
7716 		{
7717 			ln = ln->parent;
7718 			rn = rn->parent;
7719 		}
7720 
7721 		return node_is_before_sibling(ln, rn);
7722 	}
7723 
node_is_ancestor(xml_node_struct * parent,xml_node_struct * node)7724 	PUGI__FN bool node_is_ancestor(xml_node_struct* parent, xml_node_struct* node)
7725 	{
7726 		while (node && node != parent) node = node->parent;
7727 
7728 		return parent && node == parent;
7729 	}
7730 
document_buffer_order(const xpath_node & xnode)7731 	PUGI__FN const void* document_buffer_order(const xpath_node& xnode)
7732 	{
7733 		xml_node_struct* node = xnode.node().internal_object();
7734 
7735 		if (node)
7736 		{
7737 			if ((get_document(node).header & xml_memory_page_contents_shared_mask) == 0)
7738 			{
7739 				if (node->name && (node->header & impl::xml_memory_page_name_allocated_or_shared_mask) == 0) return node->name;
7740 				if (node->value && (node->header & impl::xml_memory_page_value_allocated_or_shared_mask) == 0) return node->value;
7741 			}
7742 
7743 			return 0;
7744 		}
7745 
7746 		xml_attribute_struct* attr = xnode.attribute().internal_object();
7747 
7748 		if (attr)
7749 		{
7750 			if ((get_document(attr).header & xml_memory_page_contents_shared_mask) == 0)
7751 			{
7752 				if ((attr->header & impl::xml_memory_page_name_allocated_or_shared_mask) == 0) return attr->name;
7753 				if ((attr->header & impl::xml_memory_page_value_allocated_or_shared_mask) == 0) return attr->value;
7754 			}
7755 
7756 			return 0;
7757 		}
7758 
7759 		return 0;
7760 	}
7761 
7762 	struct document_order_comparator
7763 	{
operator ()document_order_comparator7764 		bool operator()(const xpath_node& lhs, const xpath_node& rhs) const
7765 		{
7766 			// optimized document order based check
7767 			const void* lo = document_buffer_order(lhs);
7768 			const void* ro = document_buffer_order(rhs);
7769 
7770 			if (lo && ro) return lo < ro;
7771 
7772 			// slow comparison
7773 			xml_node ln = lhs.node(), rn = rhs.node();
7774 
7775 			// compare attributes
7776 			if (lhs.attribute() && rhs.attribute())
7777 			{
7778 				// shared parent
7779 				if (lhs.parent() == rhs.parent())
7780 				{
7781 					// determine sibling order
7782 					for (xml_attribute a = lhs.attribute(); a; a = a.next_attribute())
7783 						if (a == rhs.attribute())
7784 							return true;
7785 
7786 					return false;
7787 				}
7788 
7789 				// compare attribute parents
7790 				ln = lhs.parent();
7791 				rn = rhs.parent();
7792 			}
7793 			else if (lhs.attribute())
7794 			{
7795 				// attributes go after the parent element
7796 				if (lhs.parent() == rhs.node()) return false;
7797 
7798 				ln = lhs.parent();
7799 			}
7800 			else if (rhs.attribute())
7801 			{
7802 				// attributes go after the parent element
7803 				if (rhs.parent() == lhs.node()) return true;
7804 
7805 				rn = rhs.parent();
7806 			}
7807 
7808 			if (ln == rn) return false;
7809 
7810 			if (!ln || !rn) return ln < rn;
7811 
7812 			return node_is_before(ln.internal_object(), rn.internal_object());
7813 		}
7814 	};
7815 
7816 	struct duplicate_comparator
7817 	{
operator ()duplicate_comparator7818 		bool operator()(const xpath_node& lhs, const xpath_node& rhs) const
7819 		{
7820 			if (lhs.attribute()) return rhs.attribute() ? lhs.attribute() < rhs.attribute() : true;
7821 			else return rhs.attribute() ? false : lhs.node() < rhs.node();
7822 		}
7823 	};
7824 
gen_nan()7825 	PUGI__FN double gen_nan()
7826 	{
7827 	#if defined(__STDC_IEC_559__) || ((FLT_RADIX - 0 == 2) && (FLT_MAX_EXP - 0 == 128) && (FLT_MANT_DIG - 0 == 24))
7828 		union { float f; uint32_t i; } u[sizeof(float) == sizeof(uint32_t) ? 1 : -1];
7829 		u[0].i = 0x7fc00000;
7830 		return u[0].f;
7831 	#else
7832 		// fallback
7833 		const volatile double zero = 0.0;
7834 		return zero / zero;
7835 	#endif
7836 	}
7837 
is_nan(double value)7838 	PUGI__FN bool is_nan(double value)
7839 	{
7840 	#if defined(PUGI__MSVC_CRT_VERSION) || defined(__BORLANDC__)
7841 		return !!_isnan(value);
7842 	#elif defined(fpclassify) && defined(FP_NAN)
7843 		return fpclassify(value) == FP_NAN;
7844 	#else
7845 		// fallback
7846 		const volatile double v = value;
7847 		return v != v;
7848 	#endif
7849 	}
7850 
convert_number_to_string_special(double value)7851 	PUGI__FN const char_t* convert_number_to_string_special(double value)
7852 	{
7853 	#if defined(PUGI__MSVC_CRT_VERSION) || defined(__BORLANDC__)
7854 		if (_finite(value)) return (value == 0) ? PUGIXML_TEXT("0") : 0;
7855 		if (_isnan(value)) return PUGIXML_TEXT("NaN");
7856 		return value > 0 ? PUGIXML_TEXT("Infinity") : PUGIXML_TEXT("-Infinity");
7857 	#elif defined(fpclassify) && defined(FP_NAN) && defined(FP_INFINITE) && defined(FP_ZERO)
7858 		switch (fpclassify(value))
7859 		{
7860 		case FP_NAN:
7861 			return PUGIXML_TEXT("NaN");
7862 
7863 		case FP_INFINITE:
7864 			return value > 0 ? PUGIXML_TEXT("Infinity") : PUGIXML_TEXT("-Infinity");
7865 
7866 		case FP_ZERO:
7867 			return PUGIXML_TEXT("0");
7868 
7869 		default:
7870 			return 0;
7871 		}
7872 	#else
7873 		// fallback
7874 		const volatile double v = value;
7875 
7876 		if (v == 0) return PUGIXML_TEXT("0");
7877 		if (v != v) return PUGIXML_TEXT("NaN");
7878 		if (v * 2 == v) return value > 0 ? PUGIXML_TEXT("Infinity") : PUGIXML_TEXT("-Infinity");
7879 		return 0;
7880 	#endif
7881 	}
7882 
convert_number_to_boolean(double value)7883 	PUGI__FN bool convert_number_to_boolean(double value)
7884 	{
7885 		return (value != 0 && !is_nan(value));
7886 	}
7887 
truncate_zeros(char * begin,char * end)7888 	PUGI__FN void truncate_zeros(char* begin, char* end)
7889 	{
7890 		while (begin != end && end[-1] == '0') end--;
7891 
7892 		*end = 0;
7893 	}
7894 
7895 	// gets mantissa digits in the form of 0.xxxxx with 0. implied and the exponent
7896 #if defined(PUGI__MSVC_CRT_VERSION) && PUGI__MSVC_CRT_VERSION >= 1400 && !defined(_WIN32_WCE)
convert_number_to_mantissa_exponent(double value,char * buffer,size_t buffer_size,char ** out_mantissa,int * out_exponent)7897 	PUGI__FN void convert_number_to_mantissa_exponent(double value, char* buffer, size_t buffer_size, char** out_mantissa, int* out_exponent)
7898 	{
7899 		// get base values
7900 		int sign, exponent;
7901 		_ecvt_s(buffer, buffer_size, value, DBL_DIG + 1, &exponent, &sign);
7902 
7903 		// truncate redundant zeros
7904 		truncate_zeros(buffer, buffer + strlen(buffer));
7905 
7906 		// fill results
7907 		*out_mantissa = buffer;
7908 		*out_exponent = exponent;
7909 	}
7910 #else
convert_number_to_mantissa_exponent(double value,char * buffer,size_t buffer_size,char ** out_mantissa,int * out_exponent)7911 	PUGI__FN void convert_number_to_mantissa_exponent(double value, char* buffer, size_t buffer_size, char** out_mantissa, int* out_exponent)
7912 	{
7913 		// get a scientific notation value with IEEE DBL_DIG decimals
7914 		sprintf(buffer, "%.*e", DBL_DIG, value);
7915 		assert(strlen(buffer) < buffer_size);
7916 		(void)!buffer_size;
7917 
7918 		// get the exponent (possibly negative)
7919 		char* exponent_string = strchr(buffer, 'e');
7920 		assert(exponent_string);
7921 
7922 		int exponent = atoi(exponent_string + 1);
7923 
7924 		// extract mantissa string: skip sign
7925 		char* mantissa = buffer[0] == '-' ? buffer + 1 : buffer;
7926 		assert(mantissa[0] != '0' && mantissa[1] == '.');
7927 
7928 		// divide mantissa by 10 to eliminate integer part
7929 		mantissa[1] = mantissa[0];
7930 		mantissa++;
7931 		exponent++;
7932 
7933 		// remove extra mantissa digits and zero-terminate mantissa
7934 		truncate_zeros(mantissa, exponent_string);
7935 
7936 		// fill results
7937 		*out_mantissa = mantissa;
7938 		*out_exponent = exponent;
7939 	}
7940 #endif
7941 
convert_number_to_string(double value,xpath_allocator * alloc)7942 	PUGI__FN xpath_string convert_number_to_string(double value, xpath_allocator* alloc)
7943 	{
7944 		// try special number conversion
7945 		const char_t* special = convert_number_to_string_special(value);
7946 		if (special) return xpath_string::from_const(special);
7947 
7948 		// get mantissa + exponent form
7949 		char mantissa_buffer[32];
7950 
7951 		char* mantissa;
7952 		int exponent;
7953 		convert_number_to_mantissa_exponent(value, mantissa_buffer, sizeof(mantissa_buffer), &mantissa, &exponent);
7954 
7955 		// allocate a buffer of suitable length for the number
7956 		size_t result_size = strlen(mantissa_buffer) + (exponent > 0 ? exponent : -exponent) + 4;
7957 		char_t* result = static_cast<char_t*>(alloc->allocate(sizeof(char_t) * result_size));
7958 		assert(result);
7959 
7960 		// make the number!
7961 		char_t* s = result;
7962 
7963 		// sign
7964 		if (value < 0) *s++ = '-';
7965 
7966 		// integer part
7967 		if (exponent <= 0)
7968 		{
7969 			*s++ = '0';
7970 		}
7971 		else
7972 		{
7973 			while (exponent > 0)
7974 			{
7975 				assert(*mantissa == 0 || static_cast<unsigned int>(static_cast<unsigned int>(*mantissa) - '0') <= 9);
7976 				*s++ = *mantissa ? *mantissa++ : '0';
7977 				exponent--;
7978 			}
7979 		}
7980 
7981 		// fractional part
7982 		if (*mantissa)
7983 		{
7984 			// decimal point
7985 			*s++ = '.';
7986 
7987 			// extra zeroes from negative exponent
7988 			while (exponent < 0)
7989 			{
7990 				*s++ = '0';
7991 				exponent++;
7992 			}
7993 
7994 			// extra mantissa digits
7995 			while (*mantissa)
7996 			{
7997 				assert(static_cast<unsigned int>(*mantissa - '0') <= 9);
7998 				*s++ = *mantissa++;
7999 			}
8000 		}
8001 
8002 		// zero-terminate
8003 		assert(s < result + result_size);
8004 		*s = 0;
8005 
8006 		return xpath_string::from_heap_preallocated(result, s);
8007 	}
8008 
check_string_to_number_format(const char_t * string)8009 	PUGI__FN bool check_string_to_number_format(const char_t* string)
8010 	{
8011 		// parse leading whitespace
8012 		while (PUGI__IS_CHARTYPE(*string, ct_space)) ++string;
8013 
8014 		// parse sign
8015 		if (*string == '-') ++string;
8016 
8017 		if (!*string) return false;
8018 
8019 		// if there is no integer part, there should be a decimal part with at least one digit
8020 		if (!PUGI__IS_CHARTYPEX(string[0], ctx_digit) && (string[0] != '.' || !PUGI__IS_CHARTYPEX(string[1], ctx_digit))) return false;
8021 
8022 		// parse integer part
8023 		while (PUGI__IS_CHARTYPEX(*string, ctx_digit)) ++string;
8024 
8025 		// parse decimal part
8026 		if (*string == '.')
8027 		{
8028 			++string;
8029 
8030 			while (PUGI__IS_CHARTYPEX(*string, ctx_digit)) ++string;
8031 		}
8032 
8033 		// parse trailing whitespace
8034 		while (PUGI__IS_CHARTYPE(*string, ct_space)) ++string;
8035 
8036 		return *string == 0;
8037 	}
8038 
convert_string_to_number(const char_t * string)8039 	PUGI__FN double convert_string_to_number(const char_t* string)
8040 	{
8041 		// check string format
8042 		if (!check_string_to_number_format(string)) return gen_nan();
8043 
8044 		// parse string
8045 	#ifdef PUGIXML_WCHAR_MODE
8046 		return wcstod(string, 0);
8047 	#else
8048 		return strtod(string, 0);
8049 	#endif
8050 	}
8051 
convert_string_to_number_scratch(char_t (& buffer)[32],const char_t * begin,const char_t * end,double * out_result)8052 	PUGI__FN bool convert_string_to_number_scratch(char_t (&buffer)[32], const char_t* begin, const char_t* end, double* out_result)
8053 	{
8054 		size_t length = static_cast<size_t>(end - begin);
8055 		char_t* scratch = buffer;
8056 
8057 		if (length >= sizeof(buffer) / sizeof(buffer[0]))
8058 		{
8059 			// need to make dummy on-heap copy
8060 			scratch = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
8061 			if (!scratch) return false;
8062 		}
8063 
8064 		// copy string to zero-terminated buffer and perform conversion
8065 		memcpy(scratch, begin, length * sizeof(char_t));
8066 		scratch[length] = 0;
8067 
8068 		*out_result = convert_string_to_number(scratch);
8069 
8070 		// free dummy buffer
8071 		if (scratch != buffer) xml_memory::deallocate(scratch);
8072 
8073 		return true;
8074 	}
8075 
round_nearest(double value)8076 	PUGI__FN double round_nearest(double value)
8077 	{
8078 		return floor(value + 0.5);
8079 	}
8080 
round_nearest_nzero(double value)8081 	PUGI__FN double round_nearest_nzero(double value)
8082 	{
8083 		// same as round_nearest, but returns -0 for [-0.5, -0]
8084 		// ceil is used to differentiate between +0 and -0 (we return -0 for [-0.5, -0] and +0 for +0)
8085 		return (value >= -0.5 && value <= 0) ? ceil(value) : floor(value + 0.5);
8086 	}
8087 
qualified_name(const xpath_node & node)8088 	PUGI__FN const char_t* qualified_name(const xpath_node& node)
8089 	{
8090 		return node.attribute() ? node.attribute().name() : node.node().name();
8091 	}
8092 
local_name(const xpath_node & node)8093 	PUGI__FN const char_t* local_name(const xpath_node& node)
8094 	{
8095 		const char_t* name = qualified_name(node);
8096 		const char_t* p = find_char(name, ':');
8097 
8098 		return p ? p + 1 : name;
8099 	}
8100 
8101 	struct namespace_uri_predicate
8102 	{
8103 		const char_t* prefix;
8104 		size_t prefix_length;
8105 
namespace_uri_predicatenamespace_uri_predicate8106 		namespace_uri_predicate(const char_t* name)
8107 		{
8108 			const char_t* pos = find_char(name, ':');
8109 
8110 			prefix = pos ? name : 0;
8111 			prefix_length = pos ? static_cast<size_t>(pos - name) : 0;
8112 		}
8113 
operator ()namespace_uri_predicate8114 		bool operator()(xml_attribute a) const
8115 		{
8116 			const char_t* name = a.name();
8117 
8118 			if (!starts_with(name, PUGIXML_TEXT("xmlns"))) return false;
8119 
8120 			return prefix ? name[5] == ':' && strequalrange(name + 6, prefix, prefix_length) : name[5] == 0;
8121 		}
8122 	};
8123 
namespace_uri(xml_node node)8124 	PUGI__FN const char_t* namespace_uri(xml_node node)
8125 	{
8126 		namespace_uri_predicate pred = node.name();
8127 
8128 		xml_node p = node;
8129 
8130 		while (p)
8131 		{
8132 			xml_attribute a = p.find_attribute(pred);
8133 
8134 			if (a) return a.value();
8135 
8136 			p = p.parent();
8137 		}
8138 
8139 		return PUGIXML_TEXT("");
8140 	}
8141 
namespace_uri(xml_attribute attr,xml_node parent)8142 	PUGI__FN const char_t* namespace_uri(xml_attribute attr, xml_node parent)
8143 	{
8144 		namespace_uri_predicate pred = attr.name();
8145 
8146 		// Default namespace does not apply to attributes
8147 		if (!pred.prefix) return PUGIXML_TEXT("");
8148 
8149 		xml_node p = parent;
8150 
8151 		while (p)
8152 		{
8153 			xml_attribute a = p.find_attribute(pred);
8154 
8155 			if (a) return a.value();
8156 
8157 			p = p.parent();
8158 		}
8159 
8160 		return PUGIXML_TEXT("");
8161 	}
8162 
namespace_uri(const xpath_node & node)8163 	PUGI__FN const char_t* namespace_uri(const xpath_node& node)
8164 	{
8165 		return node.attribute() ? namespace_uri(node.attribute(), node.parent()) : namespace_uri(node.node());
8166 	}
8167 
normalize_space(char_t * buffer)8168 	PUGI__FN char_t* normalize_space(char_t* buffer)
8169 	{
8170 		char_t* write = buffer;
8171 
8172 		for (char_t* it = buffer; *it; )
8173 		{
8174 			char_t ch = *it++;
8175 
8176 			if (PUGI__IS_CHARTYPE(ch, ct_space))
8177 			{
8178 				// replace whitespace sequence with single space
8179 				while (PUGI__IS_CHARTYPE(*it, ct_space)) it++;
8180 
8181 				// avoid leading spaces
8182 				if (write != buffer) *write++ = ' ';
8183 			}
8184 			else *write++ = ch;
8185 		}
8186 
8187 		// remove trailing space
8188 		if (write != buffer && PUGI__IS_CHARTYPE(write[-1], ct_space)) write--;
8189 
8190 		// zero-terminate
8191 		*write = 0;
8192 
8193 		return write;
8194 	}
8195 
translate(char_t * buffer,const char_t * from,const char_t * to,size_t to_length)8196 	PUGI__FN char_t* translate(char_t* buffer, const char_t* from, const char_t* to, size_t to_length)
8197 	{
8198 		char_t* write = buffer;
8199 
8200 		while (*buffer)
8201 		{
8202 			PUGI__DMC_VOLATILE char_t ch = *buffer++;
8203 
8204 			const char_t* pos = find_char(from, ch);
8205 
8206 			if (!pos)
8207 				*write++ = ch; // do not process
8208 			else if (static_cast<size_t>(pos - from) < to_length)
8209 				*write++ = to[pos - from]; // replace
8210 		}
8211 
8212 		// zero-terminate
8213 		*write = 0;
8214 
8215 		return write;
8216 	}
8217 
translate_table_generate(xpath_allocator * alloc,const char_t * from,const char_t * to)8218 	PUGI__FN unsigned char* translate_table_generate(xpath_allocator* alloc, const char_t* from, const char_t* to)
8219 	{
8220 		unsigned char table[128] = {0};
8221 
8222 		while (*from)
8223 		{
8224 			unsigned int fc = static_cast<unsigned int>(*from);
8225 			unsigned int tc = static_cast<unsigned int>(*to);
8226 
8227 			if (fc >= 128 || tc >= 128)
8228 				return 0;
8229 
8230 			// code=128 means "skip character"
8231 			if (!table[fc])
8232 				table[fc] = static_cast<unsigned char>(tc ? tc : 128);
8233 
8234 			from++;
8235 			if (tc) to++;
8236 		}
8237 
8238 		for (int i = 0; i < 128; ++i)
8239 			if (!table[i])
8240 				table[i] = static_cast<unsigned char>(i);
8241 
8242 		void* result = alloc->allocate_nothrow(sizeof(table));
8243 
8244 		if (result)
8245 		{
8246 			memcpy(result, table, sizeof(table));
8247 		}
8248 
8249 		return static_cast<unsigned char*>(result);
8250 	}
8251 
translate_table(char_t * buffer,const unsigned char * table)8252 	PUGI__FN char_t* translate_table(char_t* buffer, const unsigned char* table)
8253 	{
8254 		char_t* write = buffer;
8255 
8256 		while (*buffer)
8257 		{
8258 			char_t ch = *buffer++;
8259 			unsigned int index = static_cast<unsigned int>(ch);
8260 
8261 			if (index < 128)
8262 			{
8263 				unsigned char code = table[index];
8264 
8265 				// code=128 means "skip character" (table size is 128 so 128 can be a special value)
8266 				// this code skips these characters without extra branches
8267 				*write = static_cast<char_t>(code);
8268 				write += 1 - (code >> 7);
8269 			}
8270 			else
8271 			{
8272 				*write++ = ch;
8273 			}
8274 		}
8275 
8276 		// zero-terminate
8277 		*write = 0;
8278 
8279 		return write;
8280 	}
8281 
is_xpath_attribute(const char_t * name)8282 	inline bool is_xpath_attribute(const char_t* name)
8283 	{
8284 		return !(starts_with(name, PUGIXML_TEXT("xmlns")) && (name[5] == 0 || name[5] == ':'));
8285 	}
8286 
8287 	struct xpath_variable_boolean: xpath_variable
8288 	{
xpath_variable_booleanxpath_variable_boolean8289 		xpath_variable_boolean(): xpath_variable(xpath_type_boolean), value(false)
8290 		{
8291 		}
8292 
8293 		bool value;
8294 		char_t name[1];
8295 	};
8296 
8297 	struct xpath_variable_number: xpath_variable
8298 	{
xpath_variable_numberxpath_variable_number8299 		xpath_variable_number(): xpath_variable(xpath_type_number), value(0)
8300 		{
8301 		}
8302 
8303 		double value;
8304 		char_t name[1];
8305 	};
8306 
8307 	struct xpath_variable_string: xpath_variable
8308 	{
xpath_variable_stringxpath_variable_string8309 		xpath_variable_string(): xpath_variable(xpath_type_string), value(0)
8310 		{
8311 		}
8312 
~xpath_variable_stringxpath_variable_string8313 		~xpath_variable_string()
8314 		{
8315 			if (value) xml_memory::deallocate(value);
8316 		}
8317 
8318 		char_t* value;
8319 		char_t name[1];
8320 	};
8321 
8322 	struct xpath_variable_node_set: xpath_variable
8323 	{
xpath_variable_node_setxpath_variable_node_set8324 		xpath_variable_node_set(): xpath_variable(xpath_type_node_set)
8325 		{
8326 		}
8327 
8328 		xpath_node_set value;
8329 		char_t name[1];
8330 	};
8331 
8332 	static const xpath_node_set dummy_node_set;
8333 
hash_string(const char_t * str)8334 	PUGI__FN unsigned int hash_string(const char_t* str)
8335 	{
8336 		// Jenkins one-at-a-time hash (http://en.wikipedia.org/wiki/Jenkins_hash_function#one-at-a-time)
8337 		unsigned int result = 0;
8338 
8339 		while (*str)
8340 		{
8341 			result += static_cast<unsigned int>(*str++);
8342 			result += result << 10;
8343 			result ^= result >> 6;
8344 		}
8345 
8346 		result += result << 3;
8347 		result ^= result >> 11;
8348 		result += result << 15;
8349 
8350 		return result;
8351 	}
8352 
new_xpath_variable(const char_t * name)8353 	template <typename T> PUGI__FN T* new_xpath_variable(const char_t* name)
8354 	{
8355 		size_t length = strlength(name);
8356 		if (length == 0) return 0; // empty variable names are invalid
8357 
8358 		// $$ we can't use offsetof(T, name) because T is non-POD, so we just allocate additional length characters
8359 		void* memory = xml_memory::allocate(sizeof(T) + length * sizeof(char_t));
8360 		if (!memory) return 0;
8361 
8362 		T* result = new (memory) T();
8363 
8364 		memcpy(result->name, name, (length + 1) * sizeof(char_t));
8365 
8366 		return result;
8367 	}
8368 
new_xpath_variable(xpath_value_type type,const char_t * name)8369 	PUGI__FN xpath_variable* new_xpath_variable(xpath_value_type type, const char_t* name)
8370 	{
8371 		switch (type)
8372 		{
8373 		case xpath_type_node_set:
8374 			return new_xpath_variable<xpath_variable_node_set>(name);
8375 
8376 		case xpath_type_number:
8377 			return new_xpath_variable<xpath_variable_number>(name);
8378 
8379 		case xpath_type_string:
8380 			return new_xpath_variable<xpath_variable_string>(name);
8381 
8382 		case xpath_type_boolean:
8383 			return new_xpath_variable<xpath_variable_boolean>(name);
8384 
8385 		default:
8386 			return 0;
8387 		}
8388 	}
8389 
delete_xpath_variable(T * var)8390 	template <typename T> PUGI__FN void delete_xpath_variable(T* var)
8391 	{
8392 		var->~T();
8393 		xml_memory::deallocate(var);
8394 	}
8395 
delete_xpath_variable(xpath_value_type type,xpath_variable * var)8396 	PUGI__FN void delete_xpath_variable(xpath_value_type type, xpath_variable* var)
8397 	{
8398 		switch (type)
8399 		{
8400 		case xpath_type_node_set:
8401 			delete_xpath_variable(static_cast<xpath_variable_node_set*>(var));
8402 			break;
8403 
8404 		case xpath_type_number:
8405 			delete_xpath_variable(static_cast<xpath_variable_number*>(var));
8406 			break;
8407 
8408 		case xpath_type_string:
8409 			delete_xpath_variable(static_cast<xpath_variable_string*>(var));
8410 			break;
8411 
8412 		case xpath_type_boolean:
8413 			delete_xpath_variable(static_cast<xpath_variable_boolean*>(var));
8414 			break;
8415 
8416 		default:
8417 			assert(!"Invalid variable type");
8418 		}
8419 	}
8420 
copy_xpath_variable(xpath_variable * lhs,const xpath_variable * rhs)8421 	PUGI__FN bool copy_xpath_variable(xpath_variable* lhs, const xpath_variable* rhs)
8422 	{
8423 		switch (rhs->type())
8424 		{
8425 		case xpath_type_node_set:
8426 			return lhs->set(static_cast<const xpath_variable_node_set*>(rhs)->value);
8427 
8428 		case xpath_type_number:
8429 			return lhs->set(static_cast<const xpath_variable_number*>(rhs)->value);
8430 
8431 		case xpath_type_string:
8432 			return lhs->set(static_cast<const xpath_variable_string*>(rhs)->value);
8433 
8434 		case xpath_type_boolean:
8435 			return lhs->set(static_cast<const xpath_variable_boolean*>(rhs)->value);
8436 
8437 		default:
8438 			assert(!"Invalid variable type");
8439 			return false;
8440 		}
8441 	}
8442 
get_variable_scratch(char_t (& buffer)[32],xpath_variable_set * set,const char_t * begin,const char_t * end,xpath_variable ** out_result)8443 	PUGI__FN bool get_variable_scratch(char_t (&buffer)[32], xpath_variable_set* set, const char_t* begin, const char_t* end, xpath_variable** out_result)
8444 	{
8445 		size_t length = static_cast<size_t>(end - begin);
8446 		char_t* scratch = buffer;
8447 
8448 		if (length >= sizeof(buffer) / sizeof(buffer[0]))
8449 		{
8450 			// need to make dummy on-heap copy
8451 			scratch = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
8452 			if (!scratch) return false;
8453 		}
8454 
8455 		// copy string to zero-terminated buffer and perform lookup
8456 		memcpy(scratch, begin, length * sizeof(char_t));
8457 		scratch[length] = 0;
8458 
8459 		*out_result = set->get(scratch);
8460 
8461 		// free dummy buffer
8462 		if (scratch != buffer) xml_memory::deallocate(scratch);
8463 
8464 		return true;
8465 	}
8466 PUGI__NS_END
8467 
8468 // Internal node set class
8469 PUGI__NS_BEGIN
xpath_get_order(const xpath_node * begin,const xpath_node * end)8470 	PUGI__FN xpath_node_set::type_t xpath_get_order(const xpath_node* begin, const xpath_node* end)
8471 	{
8472 		if (end - begin < 2)
8473 			return xpath_node_set::type_sorted;
8474 
8475 		document_order_comparator cmp;
8476 
8477 		bool first = cmp(begin[0], begin[1]);
8478 
8479 		for (const xpath_node* it = begin + 1; it + 1 < end; ++it)
8480 			if (cmp(it[0], it[1]) != first)
8481 				return xpath_node_set::type_unsorted;
8482 
8483 		return first ? xpath_node_set::type_sorted : xpath_node_set::type_sorted_reverse;
8484 	}
8485 
xpath_sort(xpath_node * begin,xpath_node * end,xpath_node_set::type_t type,bool rev)8486 	PUGI__FN xpath_node_set::type_t xpath_sort(xpath_node* begin, xpath_node* end, xpath_node_set::type_t type, bool rev)
8487 	{
8488 		xpath_node_set::type_t order = rev ? xpath_node_set::type_sorted_reverse : xpath_node_set::type_sorted;
8489 
8490 		if (type == xpath_node_set::type_unsorted)
8491 		{
8492 			xpath_node_set::type_t sorted = xpath_get_order(begin, end);
8493 
8494 			if (sorted == xpath_node_set::type_unsorted)
8495 			{
8496 				sort(begin, end, document_order_comparator());
8497 
8498 				type = xpath_node_set::type_sorted;
8499 			}
8500 			else
8501 				type = sorted;
8502 		}
8503 
8504 		if (type != order) reverse(begin, end);
8505 
8506 		return order;
8507 	}
8508 
xpath_first(const xpath_node * begin,const xpath_node * end,xpath_node_set::type_t type)8509 	PUGI__FN xpath_node xpath_first(const xpath_node* begin, const xpath_node* end, xpath_node_set::type_t type)
8510 	{
8511 		if (begin == end) return xpath_node();
8512 
8513 		switch (type)
8514 		{
8515 		case xpath_node_set::type_sorted:
8516 			return *begin;
8517 
8518 		case xpath_node_set::type_sorted_reverse:
8519 			return *(end - 1);
8520 
8521 		case xpath_node_set::type_unsorted:
8522 			return *min_element(begin, end, document_order_comparator());
8523 
8524 		default:
8525 			assert(!"Invalid node set type");
8526 			return xpath_node();
8527 		}
8528 	}
8529 
8530 	class xpath_node_set_raw
8531 	{
8532 		xpath_node_set::type_t _type;
8533 
8534 		xpath_node* _begin;
8535 		xpath_node* _end;
8536 		xpath_node* _eos;
8537 
8538 	public:
xpath_node_set_raw()8539 		xpath_node_set_raw(): _type(xpath_node_set::type_unsorted), _begin(0), _end(0), _eos(0)
8540 		{
8541 		}
8542 
begin() const8543 		xpath_node* begin() const
8544 		{
8545 			return _begin;
8546 		}
8547 
end() const8548 		xpath_node* end() const
8549 		{
8550 			return _end;
8551 		}
8552 
empty() const8553 		bool empty() const
8554 		{
8555 			return _begin == _end;
8556 		}
8557 
size() const8558 		size_t size() const
8559 		{
8560 			return static_cast<size_t>(_end - _begin);
8561 		}
8562 
first() const8563 		xpath_node first() const
8564 		{
8565 			return xpath_first(_begin, _end, _type);
8566 		}
8567 
8568 		void push_back_grow(const xpath_node& node, xpath_allocator* alloc);
8569 
push_back(const xpath_node & node,xpath_allocator * alloc)8570 		void push_back(const xpath_node& node, xpath_allocator* alloc)
8571 		{
8572 			if (_end != _eos)
8573 				*_end++ = node;
8574 			else
8575 				push_back_grow(node, alloc);
8576 		}
8577 
append(const xpath_node * begin_,const xpath_node * end_,xpath_allocator * alloc)8578 		void append(const xpath_node* begin_, const xpath_node* end_, xpath_allocator* alloc)
8579 		{
8580 			if (begin_ == end_) return;
8581 
8582 			size_t size_ = static_cast<size_t>(_end - _begin);
8583 			size_t capacity = static_cast<size_t>(_eos - _begin);
8584 			size_t count = static_cast<size_t>(end_ - begin_);
8585 
8586 			if (size_ + count > capacity)
8587 			{
8588 				// reallocate the old array or allocate a new one
8589 				xpath_node* data = static_cast<xpath_node*>(alloc->reallocate(_begin, capacity * sizeof(xpath_node), (size_ + count) * sizeof(xpath_node)));
8590 				assert(data);
8591 
8592 				// finalize
8593 				_begin = data;
8594 				_end = data + size_;
8595 				_eos = data + size_ + count;
8596 			}
8597 
8598 			memcpy(_end, begin_, count * sizeof(xpath_node));
8599 			_end += count;
8600 		}
8601 
sort_do()8602 		void sort_do()
8603 		{
8604 			_type = xpath_sort(_begin, _end, _type, false);
8605 		}
8606 
truncate(xpath_node * pos)8607 		void truncate(xpath_node* pos)
8608 		{
8609 			assert(_begin <= pos && pos <= _end);
8610 
8611 			_end = pos;
8612 		}
8613 
remove_duplicates()8614 		void remove_duplicates()
8615 		{
8616 			if (_type == xpath_node_set::type_unsorted)
8617 				sort(_begin, _end, duplicate_comparator());
8618 
8619 			_end = unique(_begin, _end);
8620 		}
8621 
type() const8622 		xpath_node_set::type_t type() const
8623 		{
8624 			return _type;
8625 		}
8626 
set_type(xpath_node_set::type_t value)8627 		void set_type(xpath_node_set::type_t value)
8628 		{
8629 			_type = value;
8630 		}
8631 	};
8632 
push_back_grow(const xpath_node & node,xpath_allocator * alloc)8633 	PUGI__FN_NO_INLINE void xpath_node_set_raw::push_back_grow(const xpath_node& node, xpath_allocator* alloc)
8634 	{
8635 		size_t capacity = static_cast<size_t>(_eos - _begin);
8636 
8637 		// get new capacity (1.5x rule)
8638 		size_t new_capacity = capacity + capacity / 2 + 1;
8639 
8640 		// reallocate the old array or allocate a new one
8641 		xpath_node* data = static_cast<xpath_node*>(alloc->reallocate(_begin, capacity * sizeof(xpath_node), new_capacity * sizeof(xpath_node)));
8642 		assert(data);
8643 
8644 		// finalize
8645 		_begin = data;
8646 		_end = data + capacity;
8647 		_eos = data + new_capacity;
8648 
8649 		// push
8650 		*_end++ = node;
8651 	}
8652 PUGI__NS_END
8653 
8654 PUGI__NS_BEGIN
8655 	struct xpath_context
8656 	{
8657 		xpath_node n;
8658 		size_t position, size;
8659 
xpath_contextxpath_context8660 		xpath_context(const xpath_node& n_, size_t position_, size_t size_): n(n_), position(position_), size(size_)
8661 		{
8662 		}
8663 	};
8664 
8665 	enum lexeme_t
8666 	{
8667 		lex_none = 0,
8668 		lex_equal,
8669 		lex_not_equal,
8670 		lex_less,
8671 		lex_greater,
8672 		lex_less_or_equal,
8673 		lex_greater_or_equal,
8674 		lex_plus,
8675 		lex_minus,
8676 		lex_multiply,
8677 		lex_union,
8678 		lex_var_ref,
8679 		lex_open_brace,
8680 		lex_close_brace,
8681 		lex_quoted_string,
8682 		lex_number,
8683 		lex_slash,
8684 		lex_double_slash,
8685 		lex_open_square_brace,
8686 		lex_close_square_brace,
8687 		lex_string,
8688 		lex_comma,
8689 		lex_axis_attribute,
8690 		lex_dot,
8691 		lex_double_dot,
8692 		lex_double_colon,
8693 		lex_eof
8694 	};
8695 
8696 	struct xpath_lexer_string
8697 	{
8698 		const char_t* begin;
8699 		const char_t* end;
8700 
xpath_lexer_stringxpath_lexer_string8701 		xpath_lexer_string(): begin(0), end(0)
8702 		{
8703 		}
8704 
operator ==xpath_lexer_string8705 		bool operator==(const char_t* other) const
8706 		{
8707 			size_t length = static_cast<size_t>(end - begin);
8708 
8709 			return strequalrange(other, begin, length);
8710 		}
8711 	};
8712 
8713 	class xpath_lexer
8714 	{
8715 		const char_t* _cur;
8716 		const char_t* _cur_lexeme_pos;
8717 		xpath_lexer_string _cur_lexeme_contents;
8718 
8719 		lexeme_t _cur_lexeme;
8720 
8721 	public:
xpath_lexer(const char_t * query)8722 		explicit xpath_lexer(const char_t* query): _cur(query)
8723 		{
8724 			next();
8725 		}
8726 
state() const8727 		const char_t* state() const
8728 		{
8729 			return _cur;
8730 		}
8731 
next()8732 		void next()
8733 		{
8734 			const char_t* cur = _cur;
8735 
8736 			while (PUGI__IS_CHARTYPE(*cur, ct_space)) ++cur;
8737 
8738 			// save lexeme position for error reporting
8739 			_cur_lexeme_pos = cur;
8740 
8741 			switch (*cur)
8742 			{
8743 			case 0:
8744 				_cur_lexeme = lex_eof;
8745 				break;
8746 
8747 			case '>':
8748 				if (*(cur+1) == '=')
8749 				{
8750 					cur += 2;
8751 					_cur_lexeme = lex_greater_or_equal;
8752 				}
8753 				else
8754 				{
8755 					cur += 1;
8756 					_cur_lexeme = lex_greater;
8757 				}
8758 				break;
8759 
8760 			case '<':
8761 				if (*(cur+1) == '=')
8762 				{
8763 					cur += 2;
8764 					_cur_lexeme = lex_less_or_equal;
8765 				}
8766 				else
8767 				{
8768 					cur += 1;
8769 					_cur_lexeme = lex_less;
8770 				}
8771 				break;
8772 
8773 			case '!':
8774 				if (*(cur+1) == '=')
8775 				{
8776 					cur += 2;
8777 					_cur_lexeme = lex_not_equal;
8778 				}
8779 				else
8780 				{
8781 					_cur_lexeme = lex_none;
8782 				}
8783 				break;
8784 
8785 			case '=':
8786 				cur += 1;
8787 				_cur_lexeme = lex_equal;
8788 
8789 				break;
8790 
8791 			case '+':
8792 				cur += 1;
8793 				_cur_lexeme = lex_plus;
8794 
8795 				break;
8796 
8797 			case '-':
8798 				cur += 1;
8799 				_cur_lexeme = lex_minus;
8800 
8801 				break;
8802 
8803 			case '*':
8804 				cur += 1;
8805 				_cur_lexeme = lex_multiply;
8806 
8807 				break;
8808 
8809 			case '|':
8810 				cur += 1;
8811 				_cur_lexeme = lex_union;
8812 
8813 				break;
8814 
8815 			case '$':
8816 				cur += 1;
8817 
8818 				if (PUGI__IS_CHARTYPEX(*cur, ctx_start_symbol))
8819 				{
8820 					_cur_lexeme_contents.begin = cur;
8821 
8822 					while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
8823 
8824 					if (cur[0] == ':' && PUGI__IS_CHARTYPEX(cur[1], ctx_symbol)) // qname
8825 					{
8826 						cur++; // :
8827 
8828 						while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
8829 					}
8830 
8831 					_cur_lexeme_contents.end = cur;
8832 
8833 					_cur_lexeme = lex_var_ref;
8834 				}
8835 				else
8836 				{
8837 					_cur_lexeme = lex_none;
8838 				}
8839 
8840 				break;
8841 
8842 			case '(':
8843 				cur += 1;
8844 				_cur_lexeme = lex_open_brace;
8845 
8846 				break;
8847 
8848 			case ')':
8849 				cur += 1;
8850 				_cur_lexeme = lex_close_brace;
8851 
8852 				break;
8853 
8854 			case '[':
8855 				cur += 1;
8856 				_cur_lexeme = lex_open_square_brace;
8857 
8858 				break;
8859 
8860 			case ']':
8861 				cur += 1;
8862 				_cur_lexeme = lex_close_square_brace;
8863 
8864 				break;
8865 
8866 			case ',':
8867 				cur += 1;
8868 				_cur_lexeme = lex_comma;
8869 
8870 				break;
8871 
8872 			case '/':
8873 				if (*(cur+1) == '/')
8874 				{
8875 					cur += 2;
8876 					_cur_lexeme = lex_double_slash;
8877 				}
8878 				else
8879 				{
8880 					cur += 1;
8881 					_cur_lexeme = lex_slash;
8882 				}
8883 				break;
8884 
8885 			case '.':
8886 				if (*(cur+1) == '.')
8887 				{
8888 					cur += 2;
8889 					_cur_lexeme = lex_double_dot;
8890 				}
8891 				else if (PUGI__IS_CHARTYPEX(*(cur+1), ctx_digit))
8892 				{
8893 					_cur_lexeme_contents.begin = cur; // .
8894 
8895 					++cur;
8896 
8897 					while (PUGI__IS_CHARTYPEX(*cur, ctx_digit)) cur++;
8898 
8899 					_cur_lexeme_contents.end = cur;
8900 
8901 					_cur_lexeme = lex_number;
8902 				}
8903 				else
8904 				{
8905 					cur += 1;
8906 					_cur_lexeme = lex_dot;
8907 				}
8908 				break;
8909 
8910 			case '@':
8911 				cur += 1;
8912 				_cur_lexeme = lex_axis_attribute;
8913 
8914 				break;
8915 
8916 			case '"':
8917 			case '\'':
8918 			{
8919 				char_t terminator = *cur;
8920 
8921 				++cur;
8922 
8923 				_cur_lexeme_contents.begin = cur;
8924 				while (*cur && *cur != terminator) cur++;
8925 				_cur_lexeme_contents.end = cur;
8926 
8927 				if (!*cur)
8928 					_cur_lexeme = lex_none;
8929 				else
8930 				{
8931 					cur += 1;
8932 					_cur_lexeme = lex_quoted_string;
8933 				}
8934 
8935 				break;
8936 			}
8937 
8938 			case ':':
8939 				if (*(cur+1) == ':')
8940 				{
8941 					cur += 2;
8942 					_cur_lexeme = lex_double_colon;
8943 				}
8944 				else
8945 				{
8946 					_cur_lexeme = lex_none;
8947 				}
8948 				break;
8949 
8950 			default:
8951 				if (PUGI__IS_CHARTYPEX(*cur, ctx_digit))
8952 				{
8953 					_cur_lexeme_contents.begin = cur;
8954 
8955 					while (PUGI__IS_CHARTYPEX(*cur, ctx_digit)) cur++;
8956 
8957 					if (*cur == '.')
8958 					{
8959 						cur++;
8960 
8961 						while (PUGI__IS_CHARTYPEX(*cur, ctx_digit)) cur++;
8962 					}
8963 
8964 					_cur_lexeme_contents.end = cur;
8965 
8966 					_cur_lexeme = lex_number;
8967 				}
8968 				else if (PUGI__IS_CHARTYPEX(*cur, ctx_start_symbol))
8969 				{
8970 					_cur_lexeme_contents.begin = cur;
8971 
8972 					while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
8973 
8974 					if (cur[0] == ':')
8975 					{
8976 						if (cur[1] == '*') // namespace test ncname:*
8977 						{
8978 							cur += 2; // :*
8979 						}
8980 						else if (PUGI__IS_CHARTYPEX(cur[1], ctx_symbol)) // namespace test qname
8981 						{
8982 							cur++; // :
8983 
8984 							while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
8985 						}
8986 					}
8987 
8988 					_cur_lexeme_contents.end = cur;
8989 
8990 					_cur_lexeme = lex_string;
8991 				}
8992 				else
8993 				{
8994 					_cur_lexeme = lex_none;
8995 				}
8996 			}
8997 
8998 			_cur = cur;
8999 		}
9000 
current() const9001 		lexeme_t current() const
9002 		{
9003 			return _cur_lexeme;
9004 		}
9005 
current_pos() const9006 		const char_t* current_pos() const
9007 		{
9008 			return _cur_lexeme_pos;
9009 		}
9010 
contents() const9011 		const xpath_lexer_string& contents() const
9012 		{
9013 			assert(_cur_lexeme == lex_var_ref || _cur_lexeme == lex_number || _cur_lexeme == lex_string || _cur_lexeme == lex_quoted_string);
9014 
9015 			return _cur_lexeme_contents;
9016 		}
9017 	};
9018 
9019 	enum ast_type_t
9020 	{
9021 		ast_unknown,
9022 		ast_op_or,						// left or right
9023 		ast_op_and,						// left and right
9024 		ast_op_equal,					// left = right
9025 		ast_op_not_equal,				// left != right
9026 		ast_op_less,					// left < right
9027 		ast_op_greater,					// left > right
9028 		ast_op_less_or_equal,			// left <= right
9029 		ast_op_greater_or_equal,		// left >= right
9030 		ast_op_add,						// left + right
9031 		ast_op_subtract,				// left - right
9032 		ast_op_multiply,				// left * right
9033 		ast_op_divide,					// left / right
9034 		ast_op_mod,						// left % right
9035 		ast_op_negate,					// left - right
9036 		ast_op_union,					// left | right
9037 		ast_predicate,					// apply predicate to set; next points to next predicate
9038 		ast_filter,						// select * from left where right
9039 		ast_string_constant,			// string constant
9040 		ast_number_constant,			// number constant
9041 		ast_variable,					// variable
9042 		ast_func_last,					// last()
9043 		ast_func_position,				// position()
9044 		ast_func_count,					// count(left)
9045 		ast_func_id,					// id(left)
9046 		ast_func_local_name_0,			// local-name()
9047 		ast_func_local_name_1,			// local-name(left)
9048 		ast_func_namespace_uri_0,		// namespace-uri()
9049 		ast_func_namespace_uri_1,		// namespace-uri(left)
9050 		ast_func_name_0,				// name()
9051 		ast_func_name_1,				// name(left)
9052 		ast_func_string_0,				// string()
9053 		ast_func_string_1,				// string(left)
9054 		ast_func_concat,				// concat(left, right, siblings)
9055 		ast_func_starts_with,			// starts_with(left, right)
9056 		ast_func_contains,				// contains(left, right)
9057 		ast_func_substring_before,		// substring-before(left, right)
9058 		ast_func_substring_after,		// substring-after(left, right)
9059 		ast_func_substring_2,			// substring(left, right)
9060 		ast_func_substring_3,			// substring(left, right, third)
9061 		ast_func_string_length_0,		// string-length()
9062 		ast_func_string_length_1,		// string-length(left)
9063 		ast_func_normalize_space_0,		// normalize-space()
9064 		ast_func_normalize_space_1,		// normalize-space(left)
9065 		ast_func_translate,				// translate(left, right, third)
9066 		ast_func_boolean,				// boolean(left)
9067 		ast_func_not,					// not(left)
9068 		ast_func_true,					// true()
9069 		ast_func_false,					// false()
9070 		ast_func_lang,					// lang(left)
9071 		ast_func_number_0,				// number()
9072 		ast_func_number_1,				// number(left)
9073 		ast_func_sum,					// sum(left)
9074 		ast_func_floor,					// floor(left)
9075 		ast_func_ceiling,				// ceiling(left)
9076 		ast_func_round,					// round(left)
9077 		ast_step,						// process set left with step
9078 		ast_step_root,					// select root node
9079 
9080 		ast_opt_translate_table,		// translate(left, right, third) where right/third are constants
9081 		ast_opt_compare_attribute		// @name = 'string'
9082 	};
9083 
9084 	enum axis_t
9085 	{
9086 		axis_ancestor,
9087 		axis_ancestor_or_self,
9088 		axis_attribute,
9089 		axis_child,
9090 		axis_descendant,
9091 		axis_descendant_or_self,
9092 		axis_following,
9093 		axis_following_sibling,
9094 		axis_namespace,
9095 		axis_parent,
9096 		axis_preceding,
9097 		axis_preceding_sibling,
9098 		axis_self
9099 	};
9100 
9101 	enum nodetest_t
9102 	{
9103 		nodetest_none,
9104 		nodetest_name,
9105 		nodetest_type_node,
9106 		nodetest_type_comment,
9107 		nodetest_type_pi,
9108 		nodetest_type_text,
9109 		nodetest_pi,
9110 		nodetest_all,
9111 		nodetest_all_in_namespace
9112 	};
9113 
9114 	enum predicate_t
9115 	{
9116 		predicate_default,
9117 		predicate_posinv,
9118 		predicate_constant,
9119 		predicate_constant_one
9120 	};
9121 
9122 	enum nodeset_eval_t
9123 	{
9124 		nodeset_eval_all,
9125 		nodeset_eval_any,
9126 		nodeset_eval_first
9127 	};
9128 
9129 	template <axis_t N> struct axis_to_type
9130 	{
9131 		static const axis_t axis;
9132 	};
9133 
9134 	template <axis_t N> const axis_t axis_to_type<N>::axis = N;
9135 
9136 	class xpath_ast_node
9137 	{
9138 	private:
9139 		// node type
9140 		char _type;
9141 		char _rettype;
9142 
9143 		// for ast_step
9144 		char _axis;
9145 
9146 		// for ast_step/ast_predicate/ast_filter
9147 		char _test;
9148 
9149 		// tree node structure
9150 		xpath_ast_node* _left;
9151 		xpath_ast_node* _right;
9152 		xpath_ast_node* _next;
9153 
9154 		union
9155 		{
9156 			// value for ast_string_constant
9157 			const char_t* string;
9158 			// value for ast_number_constant
9159 			double number;
9160 			// variable for ast_variable
9161 			xpath_variable* variable;
9162 			// node test for ast_step (node name/namespace/node type/pi target)
9163 			const char_t* nodetest;
9164 			// table for ast_opt_translate_table
9165 			const unsigned char* table;
9166 		} _data;
9167 
9168 		xpath_ast_node(const xpath_ast_node&);
9169 		xpath_ast_node& operator=(const xpath_ast_node&);
9170 
compare_eq(xpath_ast_node * lhs,xpath_ast_node * rhs,const xpath_context & c,const xpath_stack & stack,const Comp & comp)9171 		template <class Comp> static bool compare_eq(xpath_ast_node* lhs, xpath_ast_node* rhs, const xpath_context& c, const xpath_stack& stack, const Comp& comp)
9172 		{
9173 			xpath_value_type lt = lhs->rettype(), rt = rhs->rettype();
9174 
9175 			if (lt != xpath_type_node_set && rt != xpath_type_node_set)
9176 			{
9177 				if (lt == xpath_type_boolean || rt == xpath_type_boolean)
9178 					return comp(lhs->eval_boolean(c, stack), rhs->eval_boolean(c, stack));
9179 				else if (lt == xpath_type_number || rt == xpath_type_number)
9180 					return comp(lhs->eval_number(c, stack), rhs->eval_number(c, stack));
9181 				else if (lt == xpath_type_string || rt == xpath_type_string)
9182 				{
9183 					xpath_allocator_capture cr(stack.result);
9184 
9185 					xpath_string ls = lhs->eval_string(c, stack);
9186 					xpath_string rs = rhs->eval_string(c, stack);
9187 
9188 					return comp(ls, rs);
9189 				}
9190 			}
9191 			else if (lt == xpath_type_node_set && rt == xpath_type_node_set)
9192 			{
9193 				xpath_allocator_capture cr(stack.result);
9194 
9195 				xpath_node_set_raw ls = lhs->eval_node_set(c, stack, nodeset_eval_all);
9196 				xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all);
9197 
9198 				for (const xpath_node* li = ls.begin(); li != ls.end(); ++li)
9199 					for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
9200 					{
9201 						xpath_allocator_capture cri(stack.result);
9202 
9203 						if (comp(string_value(*li, stack.result), string_value(*ri, stack.result)))
9204 							return true;
9205 					}
9206 
9207 				return false;
9208 			}
9209 			else
9210 			{
9211 				if (lt == xpath_type_node_set)
9212 				{
9213 					swap(lhs, rhs);
9214 					swap(lt, rt);
9215 				}
9216 
9217 				if (lt == xpath_type_boolean)
9218 					return comp(lhs->eval_boolean(c, stack), rhs->eval_boolean(c, stack));
9219 				else if (lt == xpath_type_number)
9220 				{
9221 					xpath_allocator_capture cr(stack.result);
9222 
9223 					double l = lhs->eval_number(c, stack);
9224 					xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all);
9225 
9226 					for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
9227 					{
9228 						xpath_allocator_capture cri(stack.result);
9229 
9230 						if (comp(l, convert_string_to_number(string_value(*ri, stack.result).c_str())))
9231 							return true;
9232 					}
9233 
9234 					return false;
9235 				}
9236 				else if (lt == xpath_type_string)
9237 				{
9238 					xpath_allocator_capture cr(stack.result);
9239 
9240 					xpath_string l = lhs->eval_string(c, stack);
9241 					xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all);
9242 
9243 					for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
9244 					{
9245 						xpath_allocator_capture cri(stack.result);
9246 
9247 						if (comp(l, string_value(*ri, stack.result)))
9248 							return true;
9249 					}
9250 
9251 					return false;
9252 				}
9253 			}
9254 
9255 			assert(!"Wrong types");
9256 			return false;
9257 		}
9258 
eval_once(xpath_node_set::type_t type,nodeset_eval_t eval)9259 		static bool eval_once(xpath_node_set::type_t type, nodeset_eval_t eval)
9260 		{
9261 			return type == xpath_node_set::type_sorted ? eval != nodeset_eval_all : eval == nodeset_eval_any;
9262 		}
9263 
compare_rel(xpath_ast_node * lhs,xpath_ast_node * rhs,const xpath_context & c,const xpath_stack & stack,const Comp & comp)9264 		template <class Comp> static bool compare_rel(xpath_ast_node* lhs, xpath_ast_node* rhs, const xpath_context& c, const xpath_stack& stack, const Comp& comp)
9265 		{
9266 			xpath_value_type lt = lhs->rettype(), rt = rhs->rettype();
9267 
9268 			if (lt != xpath_type_node_set && rt != xpath_type_node_set)
9269 				return comp(lhs->eval_number(c, stack), rhs->eval_number(c, stack));
9270 			else if (lt == xpath_type_node_set && rt == xpath_type_node_set)
9271 			{
9272 				xpath_allocator_capture cr(stack.result);
9273 
9274 				xpath_node_set_raw ls = lhs->eval_node_set(c, stack, nodeset_eval_all);
9275 				xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all);
9276 
9277 				for (const xpath_node* li = ls.begin(); li != ls.end(); ++li)
9278 				{
9279 					xpath_allocator_capture cri(stack.result);
9280 
9281 					double l = convert_string_to_number(string_value(*li, stack.result).c_str());
9282 
9283 					for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
9284 					{
9285 						xpath_allocator_capture crii(stack.result);
9286 
9287 						if (comp(l, convert_string_to_number(string_value(*ri, stack.result).c_str())))
9288 							return true;
9289 					}
9290 				}
9291 
9292 				return false;
9293 			}
9294 			else if (lt != xpath_type_node_set && rt == xpath_type_node_set)
9295 			{
9296 				xpath_allocator_capture cr(stack.result);
9297 
9298 				double l = lhs->eval_number(c, stack);
9299 				xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all);
9300 
9301 				for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
9302 				{
9303 					xpath_allocator_capture cri(stack.result);
9304 
9305 					if (comp(l, convert_string_to_number(string_value(*ri, stack.result).c_str())))
9306 						return true;
9307 				}
9308 
9309 				return false;
9310 			}
9311 			else if (lt == xpath_type_node_set && rt != xpath_type_node_set)
9312 			{
9313 				xpath_allocator_capture cr(stack.result);
9314 
9315 				xpath_node_set_raw ls = lhs->eval_node_set(c, stack, nodeset_eval_all);
9316 				double r = rhs->eval_number(c, stack);
9317 
9318 				for (const xpath_node* li = ls.begin(); li != ls.end(); ++li)
9319 				{
9320 					xpath_allocator_capture cri(stack.result);
9321 
9322 					if (comp(convert_string_to_number(string_value(*li, stack.result).c_str()), r))
9323 						return true;
9324 				}
9325 
9326 				return false;
9327 			}
9328 			else
9329 			{
9330 				assert(!"Wrong types");
9331 				return false;
9332 			}
9333 		}
9334 
apply_predicate_boolean(xpath_node_set_raw & ns,size_t first,xpath_ast_node * expr,const xpath_stack & stack,bool once)9335 		static void apply_predicate_boolean(xpath_node_set_raw& ns, size_t first, xpath_ast_node* expr, const xpath_stack& stack, bool once)
9336 		{
9337 			assert(ns.size() >= first);
9338 			assert(expr->rettype() != xpath_type_number);
9339 
9340 			size_t i = 1;
9341 			size_t size = ns.size() - first;
9342 
9343 			xpath_node* last = ns.begin() + first;
9344 
9345 			// remove_if... or well, sort of
9346 			for (xpath_node* it = last; it != ns.end(); ++it, ++i)
9347 			{
9348 				xpath_context c(*it, i, size);
9349 
9350 				if (expr->eval_boolean(c, stack))
9351 				{
9352 					*last++ = *it;
9353 
9354 					if (once) break;
9355 				}
9356 			}
9357 
9358 			ns.truncate(last);
9359 		}
9360 
apply_predicate_number(xpath_node_set_raw & ns,size_t first,xpath_ast_node * expr,const xpath_stack & stack,bool once)9361 		static void apply_predicate_number(xpath_node_set_raw& ns, size_t first, xpath_ast_node* expr, const xpath_stack& stack, bool once)
9362 		{
9363 			assert(ns.size() >= first);
9364 			assert(expr->rettype() == xpath_type_number);
9365 
9366 			size_t i = 1;
9367 			size_t size = ns.size() - first;
9368 
9369 			xpath_node* last = ns.begin() + first;
9370 
9371 			// remove_if... or well, sort of
9372 			for (xpath_node* it = last; it != ns.end(); ++it, ++i)
9373 			{
9374 				xpath_context c(*it, i, size);
9375 
9376 				if (expr->eval_number(c, stack) == i)
9377 				{
9378 					*last++ = *it;
9379 
9380 					if (once) break;
9381 				}
9382 			}
9383 
9384 			ns.truncate(last);
9385 		}
9386 
apply_predicate_number_const(xpath_node_set_raw & ns,size_t first,xpath_ast_node * expr,const xpath_stack & stack)9387 		static void apply_predicate_number_const(xpath_node_set_raw& ns, size_t first, xpath_ast_node* expr, const xpath_stack& stack)
9388 		{
9389 			assert(ns.size() >= first);
9390 			assert(expr->rettype() == xpath_type_number);
9391 
9392 			size_t size = ns.size() - first;
9393 
9394 			xpath_node* last = ns.begin() + first;
9395 
9396 			xpath_context c(xpath_node(), 1, size);
9397 
9398 			double er = expr->eval_number(c, stack);
9399 
9400 			if (er >= 1.0 && er <= size)
9401 			{
9402 				size_t eri = static_cast<size_t>(er);
9403 
9404 				if (er == eri)
9405 				{
9406 					xpath_node r = last[eri - 1];
9407 
9408 					*last++ = r;
9409 				}
9410 			}
9411 
9412 			ns.truncate(last);
9413 		}
9414 
apply_predicate(xpath_node_set_raw & ns,size_t first,const xpath_stack & stack,bool once)9415 		void apply_predicate(xpath_node_set_raw& ns, size_t first, const xpath_stack& stack, bool once)
9416 		{
9417 			if (ns.size() == first) return;
9418 
9419 			assert(_type == ast_filter || _type == ast_predicate);
9420 
9421 			if (_test == predicate_constant || _test == predicate_constant_one)
9422 				apply_predicate_number_const(ns, first, _right, stack);
9423 			else if (_right->rettype() == xpath_type_number)
9424 				apply_predicate_number(ns, first, _right, stack, once);
9425 			else
9426 				apply_predicate_boolean(ns, first, _right, stack, once);
9427 		}
9428 
apply_predicates(xpath_node_set_raw & ns,size_t first,const xpath_stack & stack,nodeset_eval_t eval)9429 		void apply_predicates(xpath_node_set_raw& ns, size_t first, const xpath_stack& stack, nodeset_eval_t eval)
9430 		{
9431 			if (ns.size() == first) return;
9432 
9433 			bool last_once = eval_once(ns.type(), eval);
9434 
9435 			for (xpath_ast_node* pred = _right; pred; pred = pred->_next)
9436 				pred->apply_predicate(ns, first, stack, !pred->_next && last_once);
9437 		}
9438 
step_push(xpath_node_set_raw & ns,xml_attribute_struct * a,xml_node_struct * parent,xpath_allocator * alloc)9439 		bool step_push(xpath_node_set_raw& ns, xml_attribute_struct* a, xml_node_struct* parent, xpath_allocator* alloc)
9440 		{
9441 			assert(a);
9442 
9443 			const char_t* name = a->name ? a->name + 0 : PUGIXML_TEXT("");
9444 
9445 			switch (_test)
9446 			{
9447 			case nodetest_name:
9448 				if (strequal(name, _data.nodetest) && is_xpath_attribute(name))
9449 				{
9450 					ns.push_back(xpath_node(xml_attribute(a), xml_node(parent)), alloc);
9451 					return true;
9452 				}
9453 				break;
9454 
9455 			case nodetest_type_node:
9456 			case nodetest_all:
9457 				if (is_xpath_attribute(name))
9458 				{
9459 					ns.push_back(xpath_node(xml_attribute(a), xml_node(parent)), alloc);
9460 					return true;
9461 				}
9462 				break;
9463 
9464 			case nodetest_all_in_namespace:
9465 				if (starts_with(name, _data.nodetest) && is_xpath_attribute(name))
9466 				{
9467 					ns.push_back(xpath_node(xml_attribute(a), xml_node(parent)), alloc);
9468 					return true;
9469 				}
9470 				break;
9471 
9472 			default:
9473 				;
9474 			}
9475 
9476 			return false;
9477 		}
9478 
step_push(xpath_node_set_raw & ns,xml_node_struct * n,xpath_allocator * alloc)9479 		bool step_push(xpath_node_set_raw& ns, xml_node_struct* n, xpath_allocator* alloc)
9480 		{
9481 			assert(n);
9482 
9483 			xml_node_type type = PUGI__NODETYPE(n);
9484 
9485 			switch (_test)
9486 			{
9487 			case nodetest_name:
9488 				if (type == node_element && n->name && strequal(n->name, _data.nodetest))
9489 				{
9490 					ns.push_back(xml_node(n), alloc);
9491 					return true;
9492 				}
9493 				break;
9494 
9495 			case nodetest_type_node:
9496 				ns.push_back(xml_node(n), alloc);
9497 				return true;
9498 
9499 			case nodetest_type_comment:
9500 				if (type == node_comment)
9501 				{
9502 					ns.push_back(xml_node(n), alloc);
9503 					return true;
9504 				}
9505 				break;
9506 
9507 			case nodetest_type_text:
9508 				if (type == node_pcdata || type == node_cdata)
9509 				{
9510 					ns.push_back(xml_node(n), alloc);
9511 					return true;
9512 				}
9513 				break;
9514 
9515 			case nodetest_type_pi:
9516 				if (type == node_pi)
9517 				{
9518 					ns.push_back(xml_node(n), alloc);
9519 					return true;
9520 				}
9521 				break;
9522 
9523 			case nodetest_pi:
9524 				if (type == node_pi && n->name && strequal(n->name, _data.nodetest))
9525 				{
9526 					ns.push_back(xml_node(n), alloc);
9527 					return true;
9528 				}
9529 				break;
9530 
9531 			case nodetest_all:
9532 				if (type == node_element)
9533 				{
9534 					ns.push_back(xml_node(n), alloc);
9535 					return true;
9536 				}
9537 				break;
9538 
9539 			case nodetest_all_in_namespace:
9540 				if (type == node_element && n->name && starts_with(n->name, _data.nodetest))
9541 				{
9542 					ns.push_back(xml_node(n), alloc);
9543 					return true;
9544 				}
9545 				break;
9546 
9547 			default:
9548 				assert(!"Unknown axis");
9549 			}
9550 
9551 			return false;
9552 		}
9553 
step_fill(xpath_node_set_raw & ns,xml_node_struct * n,xpath_allocator * alloc,bool once,T)9554 		template <class T> void step_fill(xpath_node_set_raw& ns, xml_node_struct* n, xpath_allocator* alloc, bool once, T)
9555 		{
9556 			const axis_t axis = T::axis;
9557 
9558 			switch (axis)
9559 			{
9560 			case axis_attribute:
9561 			{
9562 				for (xml_attribute_struct* a = n->first_attribute; a; a = a->next_attribute)
9563 					if (step_push(ns, a, n, alloc) & once)
9564 						return;
9565 
9566 				break;
9567 			}
9568 
9569 			case axis_child:
9570 			{
9571 				for (xml_node_struct* c = n->first_child; c; c = c->next_sibling)
9572 					if (step_push(ns, c, alloc) & once)
9573 						return;
9574 
9575 				break;
9576 			}
9577 
9578 			case axis_descendant:
9579 			case axis_descendant_or_self:
9580 			{
9581 				if (axis == axis_descendant_or_self)
9582 					if (step_push(ns, n, alloc) & once)
9583 						return;
9584 
9585 				xml_node_struct* cur = n->first_child;
9586 
9587 				while (cur)
9588 				{
9589 					if (step_push(ns, cur, alloc) & once)
9590 						return;
9591 
9592 					if (cur->first_child)
9593 						cur = cur->first_child;
9594 					else
9595 					{
9596 						while (!cur->next_sibling)
9597 						{
9598 							cur = cur->parent;
9599 
9600 							if (cur == n) return;
9601 						}
9602 
9603 						cur = cur->next_sibling;
9604 					}
9605 				}
9606 
9607 				break;
9608 			}
9609 
9610 			case axis_following_sibling:
9611 			{
9612 				for (xml_node_struct* c = n->next_sibling; c; c = c->next_sibling)
9613 					if (step_push(ns, c, alloc) & once)
9614 						return;
9615 
9616 				break;
9617 			}
9618 
9619 			case axis_preceding_sibling:
9620 			{
9621 				for (xml_node_struct* c = n->prev_sibling_c; c->next_sibling; c = c->prev_sibling_c)
9622 					if (step_push(ns, c, alloc) & once)
9623 						return;
9624 
9625 				break;
9626 			}
9627 
9628 			case axis_following:
9629 			{
9630 				xml_node_struct* cur = n;
9631 
9632 				// exit from this node so that we don't include descendants
9633 				while (!cur->next_sibling)
9634 				{
9635 					cur = cur->parent;
9636 
9637 					if (!cur) return;
9638 				}
9639 
9640 				cur = cur->next_sibling;
9641 
9642 				while (cur)
9643 				{
9644 					if (step_push(ns, cur, alloc) & once)
9645 						return;
9646 
9647 					if (cur->first_child)
9648 						cur = cur->first_child;
9649 					else
9650 					{
9651 						while (!cur->next_sibling)
9652 						{
9653 							cur = cur->parent;
9654 
9655 							if (!cur) return;
9656 						}
9657 
9658 						cur = cur->next_sibling;
9659 					}
9660 				}
9661 
9662 				break;
9663 			}
9664 
9665 			case axis_preceding:
9666 			{
9667 				xml_node_struct* cur = n;
9668 
9669 				// exit from this node so that we don't include descendants
9670 				while (!cur->prev_sibling_c->next_sibling)
9671 				{
9672 					cur = cur->parent;
9673 
9674 					if (!cur) return;
9675 				}
9676 
9677 				cur = cur->prev_sibling_c;
9678 
9679 				while (cur)
9680 				{
9681 					if (cur->first_child)
9682 						cur = cur->first_child->prev_sibling_c;
9683 					else
9684 					{
9685 						// leaf node, can't be ancestor
9686 						if (step_push(ns, cur, alloc) & once)
9687 							return;
9688 
9689 						while (!cur->prev_sibling_c->next_sibling)
9690 						{
9691 							cur = cur->parent;
9692 
9693 							if (!cur) return;
9694 
9695 							if (!node_is_ancestor(cur, n))
9696 								if (step_push(ns, cur, alloc) & once)
9697 									return;
9698 						}
9699 
9700 						cur = cur->prev_sibling_c;
9701 					}
9702 				}
9703 
9704 				break;
9705 			}
9706 
9707 			case axis_ancestor:
9708 			case axis_ancestor_or_self:
9709 			{
9710 				if (axis == axis_ancestor_or_self)
9711 					if (step_push(ns, n, alloc) & once)
9712 						return;
9713 
9714 				xml_node_struct* cur = n->parent;
9715 
9716 				while (cur)
9717 				{
9718 					if (step_push(ns, cur, alloc) & once)
9719 						return;
9720 
9721 					cur = cur->parent;
9722 				}
9723 
9724 				break;
9725 			}
9726 
9727 			case axis_self:
9728 			{
9729 				step_push(ns, n, alloc);
9730 
9731 				break;
9732 			}
9733 
9734 			case axis_parent:
9735 			{
9736 				if (n->parent)
9737 					step_push(ns, n->parent, alloc);
9738 
9739 				break;
9740 			}
9741 
9742 			default:
9743 				assert(!"Unimplemented axis");
9744 			}
9745 		}
9746 
step_fill(xpath_node_set_raw & ns,xml_attribute_struct * a,xml_node_struct * p,xpath_allocator * alloc,bool once,T v)9747 		template <class T> void step_fill(xpath_node_set_raw& ns, xml_attribute_struct* a, xml_node_struct* p, xpath_allocator* alloc, bool once, T v)
9748 		{
9749 			const axis_t axis = T::axis;
9750 
9751 			switch (axis)
9752 			{
9753 			case axis_ancestor:
9754 			case axis_ancestor_or_self:
9755 			{
9756 				if (axis == axis_ancestor_or_self && _test == nodetest_type_node) // reject attributes based on principal node type test
9757 					if (step_push(ns, a, p, alloc) & once)
9758 						return;
9759 
9760 				xml_node_struct* cur = p;
9761 
9762 				while (cur)
9763 				{
9764 					if (step_push(ns, cur, alloc) & once)
9765 						return;
9766 
9767 					cur = cur->parent;
9768 				}
9769 
9770 				break;
9771 			}
9772 
9773 			case axis_descendant_or_self:
9774 			case axis_self:
9775 			{
9776 				if (_test == nodetest_type_node) // reject attributes based on principal node type test
9777 					step_push(ns, a, p, alloc);
9778 
9779 				break;
9780 			}
9781 
9782 			case axis_following:
9783 			{
9784 				xml_node_struct* cur = p;
9785 
9786 				while (cur)
9787 				{
9788 					if (cur->first_child)
9789 						cur = cur->first_child;
9790 					else
9791 					{
9792 						while (!cur->next_sibling)
9793 						{
9794 							cur = cur->parent;
9795 
9796 							if (!cur) return;
9797 						}
9798 
9799 						cur = cur->next_sibling;
9800 					}
9801 
9802 					if (step_push(ns, cur, alloc) & once)
9803 						return;
9804 				}
9805 
9806 				break;
9807 			}
9808 
9809 			case axis_parent:
9810 			{
9811 				step_push(ns, p, alloc);
9812 
9813 				break;
9814 			}
9815 
9816 			case axis_preceding:
9817 			{
9818 				// preceding:: axis does not include attribute nodes and attribute ancestors (they are the same as parent's ancestors), so we can reuse node preceding
9819 				step_fill(ns, p, alloc, once, v);
9820 				break;
9821 			}
9822 
9823 			default:
9824 				assert(!"Unimplemented axis");
9825 			}
9826 		}
9827 
step_fill(xpath_node_set_raw & ns,const xpath_node & xn,xpath_allocator * alloc,bool once,T v)9828 		template <class T> void step_fill(xpath_node_set_raw& ns, const xpath_node& xn, xpath_allocator* alloc, bool once, T v)
9829 		{
9830 			const axis_t axis = T::axis;
9831 			const bool axis_has_attributes = (axis == axis_ancestor || axis == axis_ancestor_or_self || axis == axis_descendant_or_self || axis == axis_following || axis == axis_parent || axis == axis_preceding || axis == axis_self);
9832 
9833 			if (xn.node())
9834 				step_fill(ns, xn.node().internal_object(), alloc, once, v);
9835 			else if (axis_has_attributes && xn.attribute() && xn.parent())
9836 				step_fill(ns, xn.attribute().internal_object(), xn.parent().internal_object(), alloc, once, v);
9837 		}
9838 
step_do(const xpath_context & c,const xpath_stack & stack,nodeset_eval_t eval,T v)9839 		template <class T> xpath_node_set_raw step_do(const xpath_context& c, const xpath_stack& stack, nodeset_eval_t eval, T v)
9840 		{
9841 			const axis_t axis = T::axis;
9842 			const bool axis_reverse = (axis == axis_ancestor || axis == axis_ancestor_or_self || axis == axis_preceding || axis == axis_preceding_sibling);
9843 			const xpath_node_set::type_t axis_type = axis_reverse ? xpath_node_set::type_sorted_reverse : xpath_node_set::type_sorted;
9844 
9845 			bool once =
9846 				(axis == axis_attribute && _test == nodetest_name) ||
9847 				(!_right && eval_once(axis_type, eval)) ||
9848 				(_right && !_right->_next && _right->_test == predicate_constant_one);
9849 
9850 			xpath_node_set_raw ns;
9851 			ns.set_type(axis_type);
9852 
9853 			if (_left)
9854 			{
9855 				xpath_node_set_raw s = _left->eval_node_set(c, stack, nodeset_eval_all);
9856 
9857 				// self axis preserves the original order
9858 				if (axis == axis_self) ns.set_type(s.type());
9859 
9860 				for (const xpath_node* it = s.begin(); it != s.end(); ++it)
9861 				{
9862 					size_t size = ns.size();
9863 
9864 					// in general, all axes generate elements in a particular order, but there is no order guarantee if axis is applied to two nodes
9865 					if (axis != axis_self && size != 0) ns.set_type(xpath_node_set::type_unsorted);
9866 
9867 					step_fill(ns, *it, stack.result, once, v);
9868 					if (_right) apply_predicates(ns, size, stack, eval);
9869 				}
9870 			}
9871 			else
9872 			{
9873 				step_fill(ns, c.n, stack.result, once, v);
9874 				if (_right) apply_predicates(ns, 0, stack, eval);
9875 			}
9876 
9877 			// child, attribute and self axes always generate unique set of nodes
9878 			// for other axis, if the set stayed sorted, it stayed unique because the traversal algorithms do not visit the same node twice
9879 			if (axis != axis_child && axis != axis_attribute && axis != axis_self && ns.type() == xpath_node_set::type_unsorted)
9880 				ns.remove_duplicates();
9881 
9882 			return ns;
9883 		}
9884 
9885 	public:
xpath_ast_node(ast_type_t type,xpath_value_type rettype_,const char_t * value)9886 		xpath_ast_node(ast_type_t type, xpath_value_type rettype_, const char_t* value):
9887 			_type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(0), _right(0), _next(0)
9888 		{
9889 			assert(type == ast_string_constant);
9890 			_data.string = value;
9891 		}
9892 
xpath_ast_node(ast_type_t type,xpath_value_type rettype_,double value)9893 		xpath_ast_node(ast_type_t type, xpath_value_type rettype_, double value):
9894 			_type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(0), _right(0), _next(0)
9895 		{
9896 			assert(type == ast_number_constant);
9897 			_data.number = value;
9898 		}
9899 
xpath_ast_node(ast_type_t type,xpath_value_type rettype_,xpath_variable * value)9900 		xpath_ast_node(ast_type_t type, xpath_value_type rettype_, xpath_variable* value):
9901 			_type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(0), _right(0), _next(0)
9902 		{
9903 			assert(type == ast_variable);
9904 			_data.variable = value;
9905 		}
9906 
xpath_ast_node(ast_type_t type,xpath_value_type rettype_,xpath_ast_node * left=0,xpath_ast_node * right=0)9907 		xpath_ast_node(ast_type_t type, xpath_value_type rettype_, xpath_ast_node* left = 0, xpath_ast_node* right = 0):
9908 			_type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(left), _right(right), _next(0)
9909 		{
9910 		}
9911 
xpath_ast_node(ast_type_t type,xpath_ast_node * left,axis_t axis,nodetest_t test,const char_t * contents)9912 		xpath_ast_node(ast_type_t type, xpath_ast_node* left, axis_t axis, nodetest_t test, const char_t* contents):
9913 			_type(static_cast<char>(type)), _rettype(xpath_type_node_set), _axis(static_cast<char>(axis)), _test(static_cast<char>(test)), _left(left), _right(0), _next(0)
9914 		{
9915 			assert(type == ast_step);
9916 			_data.nodetest = contents;
9917 		}
9918 
xpath_ast_node(ast_type_t type,xpath_ast_node * left,xpath_ast_node * right,predicate_t test)9919 		xpath_ast_node(ast_type_t type, xpath_ast_node* left, xpath_ast_node* right, predicate_t test):
9920 			_type(static_cast<char>(type)), _rettype(xpath_type_node_set), _axis(0), _test(static_cast<char>(test)), _left(left), _right(right), _next(0)
9921 		{
9922 			assert(type == ast_filter || type == ast_predicate);
9923 		}
9924 
set_next(xpath_ast_node * value)9925 		void set_next(xpath_ast_node* value)
9926 		{
9927 			_next = value;
9928 		}
9929 
set_right(xpath_ast_node * value)9930 		void set_right(xpath_ast_node* value)
9931 		{
9932 			_right = value;
9933 		}
9934 
eval_boolean(const xpath_context & c,const xpath_stack & stack)9935 		bool eval_boolean(const xpath_context& c, const xpath_stack& stack)
9936 		{
9937 			switch (_type)
9938 			{
9939 			case ast_op_or:
9940 				return _left->eval_boolean(c, stack) || _right->eval_boolean(c, stack);
9941 
9942 			case ast_op_and:
9943 				return _left->eval_boolean(c, stack) && _right->eval_boolean(c, stack);
9944 
9945 			case ast_op_equal:
9946 				return compare_eq(_left, _right, c, stack, equal_to());
9947 
9948 			case ast_op_not_equal:
9949 				return compare_eq(_left, _right, c, stack, not_equal_to());
9950 
9951 			case ast_op_less:
9952 				return compare_rel(_left, _right, c, stack, less());
9953 
9954 			case ast_op_greater:
9955 				return compare_rel(_right, _left, c, stack, less());
9956 
9957 			case ast_op_less_or_equal:
9958 				return compare_rel(_left, _right, c, stack, less_equal());
9959 
9960 			case ast_op_greater_or_equal:
9961 				return compare_rel(_right, _left, c, stack, less_equal());
9962 
9963 			case ast_func_starts_with:
9964 			{
9965 				xpath_allocator_capture cr(stack.result);
9966 
9967 				xpath_string lr = _left->eval_string(c, stack);
9968 				xpath_string rr = _right->eval_string(c, stack);
9969 
9970 				return starts_with(lr.c_str(), rr.c_str());
9971 			}
9972 
9973 			case ast_func_contains:
9974 			{
9975 				xpath_allocator_capture cr(stack.result);
9976 
9977 				xpath_string lr = _left->eval_string(c, stack);
9978 				xpath_string rr = _right->eval_string(c, stack);
9979 
9980 				return find_substring(lr.c_str(), rr.c_str()) != 0;
9981 			}
9982 
9983 			case ast_func_boolean:
9984 				return _left->eval_boolean(c, stack);
9985 
9986 			case ast_func_not:
9987 				return !_left->eval_boolean(c, stack);
9988 
9989 			case ast_func_true:
9990 				return true;
9991 
9992 			case ast_func_false:
9993 				return false;
9994 
9995 			case ast_func_lang:
9996 			{
9997 				if (c.n.attribute()) return false;
9998 
9999 				xpath_allocator_capture cr(stack.result);
10000 
10001 				xpath_string lang = _left->eval_string(c, stack);
10002 
10003 				for (xml_node n = c.n.node(); n; n = n.parent())
10004 				{
10005 					xml_attribute a = n.attribute(PUGIXML_TEXT("xml:lang"));
10006 
10007 					if (a)
10008 					{
10009 						const char_t* value = a.value();
10010 
10011 						// strnicmp / strncasecmp is not portable
10012 						for (const char_t* lit = lang.c_str(); *lit; ++lit)
10013 						{
10014 							if (tolower_ascii(*lit) != tolower_ascii(*value)) return false;
10015 							++value;
10016 						}
10017 
10018 						return *value == 0 || *value == '-';
10019 					}
10020 				}
10021 
10022 				return false;
10023 			}
10024 
10025 			case ast_opt_compare_attribute:
10026 			{
10027 				const char_t* value = (_right->_type == ast_string_constant) ? _right->_data.string : _right->_data.variable->get_string();
10028 
10029 				xml_attribute attr = c.n.node().attribute(_left->_data.nodetest);
10030 
10031 				return attr && strequal(attr.value(), value) && is_xpath_attribute(attr.name());
10032 			}
10033 
10034 			case ast_variable:
10035 			{
10036 				assert(_rettype == _data.variable->type());
10037 
10038 				if (_rettype == xpath_type_boolean)
10039 					return _data.variable->get_boolean();
10040 
10041 				// fallthrough to type conversion
10042 			}
10043 
10044 			default:
10045 			{
10046 				switch (_rettype)
10047 				{
10048 				case xpath_type_number:
10049 					return convert_number_to_boolean(eval_number(c, stack));
10050 
10051 				case xpath_type_string:
10052 				{
10053 					xpath_allocator_capture cr(stack.result);
10054 
10055 					return !eval_string(c, stack).empty();
10056 				}
10057 
10058 				case xpath_type_node_set:
10059 				{
10060 					xpath_allocator_capture cr(stack.result);
10061 
10062 					return !eval_node_set(c, stack, nodeset_eval_any).empty();
10063 				}
10064 
10065 				default:
10066 					assert(!"Wrong expression for return type boolean");
10067 					return false;
10068 				}
10069 			}
10070 			}
10071 		}
10072 
eval_number(const xpath_context & c,const xpath_stack & stack)10073 		double eval_number(const xpath_context& c, const xpath_stack& stack)
10074 		{
10075 			switch (_type)
10076 			{
10077 			case ast_op_add:
10078 				return _left->eval_number(c, stack) + _right->eval_number(c, stack);
10079 
10080 			case ast_op_subtract:
10081 				return _left->eval_number(c, stack) - _right->eval_number(c, stack);
10082 
10083 			case ast_op_multiply:
10084 				return _left->eval_number(c, stack) * _right->eval_number(c, stack);
10085 
10086 			case ast_op_divide:
10087 				return _left->eval_number(c, stack) / _right->eval_number(c, stack);
10088 
10089 			case ast_op_mod:
10090 				return fmod(_left->eval_number(c, stack), _right->eval_number(c, stack));
10091 
10092 			case ast_op_negate:
10093 				return -_left->eval_number(c, stack);
10094 
10095 			case ast_number_constant:
10096 				return _data.number;
10097 
10098 			case ast_func_last:
10099 				return static_cast<double>(c.size);
10100 
10101 			case ast_func_position:
10102 				return static_cast<double>(c.position);
10103 
10104 			case ast_func_count:
10105 			{
10106 				xpath_allocator_capture cr(stack.result);
10107 
10108 				return static_cast<double>(_left->eval_node_set(c, stack, nodeset_eval_all).size());
10109 			}
10110 
10111 			case ast_func_string_length_0:
10112 			{
10113 				xpath_allocator_capture cr(stack.result);
10114 
10115 				return static_cast<double>(string_value(c.n, stack.result).length());
10116 			}
10117 
10118 			case ast_func_string_length_1:
10119 			{
10120 				xpath_allocator_capture cr(stack.result);
10121 
10122 				return static_cast<double>(_left->eval_string(c, stack).length());
10123 			}
10124 
10125 			case ast_func_number_0:
10126 			{
10127 				xpath_allocator_capture cr(stack.result);
10128 
10129 				return convert_string_to_number(string_value(c.n, stack.result).c_str());
10130 			}
10131 
10132 			case ast_func_number_1:
10133 				return _left->eval_number(c, stack);
10134 
10135 			case ast_func_sum:
10136 			{
10137 				xpath_allocator_capture cr(stack.result);
10138 
10139 				double r = 0;
10140 
10141 				xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_all);
10142 
10143 				for (const xpath_node* it = ns.begin(); it != ns.end(); ++it)
10144 				{
10145 					xpath_allocator_capture cri(stack.result);
10146 
10147 					r += convert_string_to_number(string_value(*it, stack.result).c_str());
10148 				}
10149 
10150 				return r;
10151 			}
10152 
10153 			case ast_func_floor:
10154 			{
10155 				double r = _left->eval_number(c, stack);
10156 
10157 				return r == r ? floor(r) : r;
10158 			}
10159 
10160 			case ast_func_ceiling:
10161 			{
10162 				double r = _left->eval_number(c, stack);
10163 
10164 				return r == r ? ceil(r) : r;
10165 			}
10166 
10167 			case ast_func_round:
10168 				return round_nearest_nzero(_left->eval_number(c, stack));
10169 
10170 			case ast_variable:
10171 			{
10172 				assert(_rettype == _data.variable->type());
10173 
10174 				if (_rettype == xpath_type_number)
10175 					return _data.variable->get_number();
10176 
10177 				// fallthrough to type conversion
10178 			}
10179 
10180 			default:
10181 			{
10182 				switch (_rettype)
10183 				{
10184 				case xpath_type_boolean:
10185 					return eval_boolean(c, stack) ? 1 : 0;
10186 
10187 				case xpath_type_string:
10188 				{
10189 					xpath_allocator_capture cr(stack.result);
10190 
10191 					return convert_string_to_number(eval_string(c, stack).c_str());
10192 				}
10193 
10194 				case xpath_type_node_set:
10195 				{
10196 					xpath_allocator_capture cr(stack.result);
10197 
10198 					return convert_string_to_number(eval_string(c, stack).c_str());
10199 				}
10200 
10201 				default:
10202 					assert(!"Wrong expression for return type number");
10203 					return 0;
10204 				}
10205 
10206 			}
10207 			}
10208 		}
10209 
eval_string_concat(const xpath_context & c,const xpath_stack & stack)10210 		xpath_string eval_string_concat(const xpath_context& c, const xpath_stack& stack)
10211 		{
10212 			assert(_type == ast_func_concat);
10213 
10214 			xpath_allocator_capture ct(stack.temp);
10215 
10216 			// count the string number
10217 			size_t count = 1;
10218 			for (xpath_ast_node* nc = _right; nc; nc = nc->_next) count++;
10219 
10220 			// gather all strings
10221 			xpath_string static_buffer[4];
10222 			xpath_string* buffer = static_buffer;
10223 
10224 			// allocate on-heap for large concats
10225 			if (count > sizeof(static_buffer) / sizeof(static_buffer[0]))
10226 			{
10227 				buffer = static_cast<xpath_string*>(stack.temp->allocate(count * sizeof(xpath_string)));
10228 				assert(buffer);
10229 			}
10230 
10231 			// evaluate all strings to temporary stack
10232 			xpath_stack swapped_stack = {stack.temp, stack.result};
10233 
10234 			buffer[0] = _left->eval_string(c, swapped_stack);
10235 
10236 			size_t pos = 1;
10237 			for (xpath_ast_node* n = _right; n; n = n->_next, ++pos) buffer[pos] = n->eval_string(c, swapped_stack);
10238 			assert(pos == count);
10239 
10240 			// get total length
10241 			size_t length = 0;
10242 			for (size_t i = 0; i < count; ++i) length += buffer[i].length();
10243 
10244 			// create final string
10245 			char_t* result = static_cast<char_t*>(stack.result->allocate((length + 1) * sizeof(char_t)));
10246 			assert(result);
10247 
10248 			char_t* ri = result;
10249 
10250 			for (size_t j = 0; j < count; ++j)
10251 				for (const char_t* bi = buffer[j].c_str(); *bi; ++bi)
10252 					*ri++ = *bi;
10253 
10254 			*ri = 0;
10255 
10256 			return xpath_string::from_heap_preallocated(result, ri);
10257 		}
10258 
eval_string(const xpath_context & c,const xpath_stack & stack)10259 		xpath_string eval_string(const xpath_context& c, const xpath_stack& stack)
10260 		{
10261 			switch (_type)
10262 			{
10263 			case ast_string_constant:
10264 				return xpath_string::from_const(_data.string);
10265 
10266 			case ast_func_local_name_0:
10267 			{
10268 				xpath_node na = c.n;
10269 
10270 				return xpath_string::from_const(local_name(na));
10271 			}
10272 
10273 			case ast_func_local_name_1:
10274 			{
10275 				xpath_allocator_capture cr(stack.result);
10276 
10277 				xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_first);
10278 				xpath_node na = ns.first();
10279 
10280 				return xpath_string::from_const(local_name(na));
10281 			}
10282 
10283 			case ast_func_name_0:
10284 			{
10285 				xpath_node na = c.n;
10286 
10287 				return xpath_string::from_const(qualified_name(na));
10288 			}
10289 
10290 			case ast_func_name_1:
10291 			{
10292 				xpath_allocator_capture cr(stack.result);
10293 
10294 				xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_first);
10295 				xpath_node na = ns.first();
10296 
10297 				return xpath_string::from_const(qualified_name(na));
10298 			}
10299 
10300 			case ast_func_namespace_uri_0:
10301 			{
10302 				xpath_node na = c.n;
10303 
10304 				return xpath_string::from_const(namespace_uri(na));
10305 			}
10306 
10307 			case ast_func_namespace_uri_1:
10308 			{
10309 				xpath_allocator_capture cr(stack.result);
10310 
10311 				xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_first);
10312 				xpath_node na = ns.first();
10313 
10314 				return xpath_string::from_const(namespace_uri(na));
10315 			}
10316 
10317 			case ast_func_string_0:
10318 				return string_value(c.n, stack.result);
10319 
10320 			case ast_func_string_1:
10321 				return _left->eval_string(c, stack);
10322 
10323 			case ast_func_concat:
10324 				return eval_string_concat(c, stack);
10325 
10326 			case ast_func_substring_before:
10327 			{
10328 				xpath_allocator_capture cr(stack.temp);
10329 
10330 				xpath_stack swapped_stack = {stack.temp, stack.result};
10331 
10332 				xpath_string s = _left->eval_string(c, swapped_stack);
10333 				xpath_string p = _right->eval_string(c, swapped_stack);
10334 
10335 				const char_t* pos = find_substring(s.c_str(), p.c_str());
10336 
10337 				return pos ? xpath_string::from_heap(s.c_str(), pos, stack.result) : xpath_string();
10338 			}
10339 
10340 			case ast_func_substring_after:
10341 			{
10342 				xpath_allocator_capture cr(stack.temp);
10343 
10344 				xpath_stack swapped_stack = {stack.temp, stack.result};
10345 
10346 				xpath_string s = _left->eval_string(c, swapped_stack);
10347 				xpath_string p = _right->eval_string(c, swapped_stack);
10348 
10349 				const char_t* pos = find_substring(s.c_str(), p.c_str());
10350 				if (!pos) return xpath_string();
10351 
10352 				const char_t* rbegin = pos + p.length();
10353 				const char_t* rend = s.c_str() + s.length();
10354 
10355 				return s.uses_heap() ? xpath_string::from_heap(rbegin, rend, stack.result) : xpath_string::from_const(rbegin);
10356 			}
10357 
10358 			case ast_func_substring_2:
10359 			{
10360 				xpath_allocator_capture cr(stack.temp);
10361 
10362 				xpath_stack swapped_stack = {stack.temp, stack.result};
10363 
10364 				xpath_string s = _left->eval_string(c, swapped_stack);
10365 				size_t s_length = s.length();
10366 
10367 				double first = round_nearest(_right->eval_number(c, stack));
10368 
10369 				if (is_nan(first)) return xpath_string(); // NaN
10370 				else if (first >= s_length + 1) return xpath_string();
10371 
10372 				size_t pos = first < 1 ? 1 : static_cast<size_t>(first);
10373 				assert(1 <= pos && pos <= s_length + 1);
10374 
10375 				const char_t* rbegin = s.c_str() + (pos - 1);
10376 				const char_t* rend = s.c_str() + s.length();
10377 
10378 				return s.uses_heap() ? xpath_string::from_heap(rbegin, rend, stack.result) : xpath_string::from_const(rbegin);
10379 			}
10380 
10381 			case ast_func_substring_3:
10382 			{
10383 				xpath_allocator_capture cr(stack.temp);
10384 
10385 				xpath_stack swapped_stack = {stack.temp, stack.result};
10386 
10387 				xpath_string s = _left->eval_string(c, swapped_stack);
10388 				size_t s_length = s.length();
10389 
10390 				double first = round_nearest(_right->eval_number(c, stack));
10391 				double last = first + round_nearest(_right->_next->eval_number(c, stack));
10392 
10393 				if (is_nan(first) || is_nan(last)) return xpath_string();
10394 				else if (first >= s_length + 1) return xpath_string();
10395 				else if (first >= last) return xpath_string();
10396 				else if (last < 1) return xpath_string();
10397 
10398 				size_t pos = first < 1 ? 1 : static_cast<size_t>(first);
10399 				size_t end = last >= s_length + 1 ? s_length + 1 : static_cast<size_t>(last);
10400 
10401 				assert(1 <= pos && pos <= end && end <= s_length + 1);
10402 				const char_t* rbegin = s.c_str() + (pos - 1);
10403 				const char_t* rend = s.c_str() + (end - 1);
10404 
10405 				return (end == s_length + 1 && !s.uses_heap()) ? xpath_string::from_const(rbegin) : xpath_string::from_heap(rbegin, rend, stack.result);
10406 			}
10407 
10408 			case ast_func_normalize_space_0:
10409 			{
10410 				xpath_string s = string_value(c.n, stack.result);
10411 
10412 				char_t* begin = s.data(stack.result);
10413 				char_t* end = normalize_space(begin);
10414 
10415 				return xpath_string::from_heap_preallocated(begin, end);
10416 			}
10417 
10418 			case ast_func_normalize_space_1:
10419 			{
10420 				xpath_string s = _left->eval_string(c, stack);
10421 
10422 				char_t* begin = s.data(stack.result);
10423 				char_t* end = normalize_space(begin);
10424 
10425 				return xpath_string::from_heap_preallocated(begin, end);
10426 			}
10427 
10428 			case ast_func_translate:
10429 			{
10430 				xpath_allocator_capture cr(stack.temp);
10431 
10432 				xpath_stack swapped_stack = {stack.temp, stack.result};
10433 
10434 				xpath_string s = _left->eval_string(c, stack);
10435 				xpath_string from = _right->eval_string(c, swapped_stack);
10436 				xpath_string to = _right->_next->eval_string(c, swapped_stack);
10437 
10438 				char_t* begin = s.data(stack.result);
10439 				char_t* end = translate(begin, from.c_str(), to.c_str(), to.length());
10440 
10441 				return xpath_string::from_heap_preallocated(begin, end);
10442 			}
10443 
10444 			case ast_opt_translate_table:
10445 			{
10446 				xpath_string s = _left->eval_string(c, stack);
10447 
10448 				char_t* begin = s.data(stack.result);
10449 				char_t* end = translate_table(begin, _data.table);
10450 
10451 				return xpath_string::from_heap_preallocated(begin, end);
10452 			}
10453 
10454 			case ast_variable:
10455 			{
10456 				assert(_rettype == _data.variable->type());
10457 
10458 				if (_rettype == xpath_type_string)
10459 					return xpath_string::from_const(_data.variable->get_string());
10460 
10461 				// fallthrough to type conversion
10462 			}
10463 
10464 			default:
10465 			{
10466 				switch (_rettype)
10467 				{
10468 				case xpath_type_boolean:
10469 					return xpath_string::from_const(eval_boolean(c, stack) ? PUGIXML_TEXT("true") : PUGIXML_TEXT("false"));
10470 
10471 				case xpath_type_number:
10472 					return convert_number_to_string(eval_number(c, stack), stack.result);
10473 
10474 				case xpath_type_node_set:
10475 				{
10476 					xpath_allocator_capture cr(stack.temp);
10477 
10478 					xpath_stack swapped_stack = {stack.temp, stack.result};
10479 
10480 					xpath_node_set_raw ns = eval_node_set(c, swapped_stack, nodeset_eval_first);
10481 					return ns.empty() ? xpath_string() : string_value(ns.first(), stack.result);
10482 				}
10483 
10484 				default:
10485 					assert(!"Wrong expression for return type string");
10486 					return xpath_string();
10487 				}
10488 			}
10489 			}
10490 		}
10491 
eval_node_set(const xpath_context & c,const xpath_stack & stack,nodeset_eval_t eval)10492 		xpath_node_set_raw eval_node_set(const xpath_context& c, const xpath_stack& stack, nodeset_eval_t eval)
10493 		{
10494 			switch (_type)
10495 			{
10496 			case ast_op_union:
10497 			{
10498 				xpath_allocator_capture cr(stack.temp);
10499 
10500 				xpath_stack swapped_stack = {stack.temp, stack.result};
10501 
10502 				xpath_node_set_raw ls = _left->eval_node_set(c, swapped_stack, eval);
10503 				xpath_node_set_raw rs = _right->eval_node_set(c, stack, eval);
10504 
10505 				// we can optimize merging two sorted sets, but this is a very rare operation, so don't bother
10506 				rs.set_type(xpath_node_set::type_unsorted);
10507 
10508 				rs.append(ls.begin(), ls.end(), stack.result);
10509 				rs.remove_duplicates();
10510 
10511 				return rs;
10512 			}
10513 
10514 			case ast_filter:
10515 			{
10516 				xpath_node_set_raw set = _left->eval_node_set(c, stack, _test == predicate_constant_one ? nodeset_eval_first : nodeset_eval_all);
10517 
10518 				// either expression is a number or it contains position() call; sort by document order
10519 				if (_test != predicate_posinv) set.sort_do();
10520 
10521 				bool once = eval_once(set.type(), eval);
10522 
10523 				apply_predicate(set, 0, stack, once);
10524 
10525 				return set;
10526 			}
10527 
10528 			case ast_func_id:
10529 				return xpath_node_set_raw();
10530 
10531 			case ast_step:
10532 			{
10533 				switch (_axis)
10534 				{
10535 				case axis_ancestor:
10536 					return step_do(c, stack, eval, axis_to_type<axis_ancestor>());
10537 
10538 				case axis_ancestor_or_self:
10539 					return step_do(c, stack, eval, axis_to_type<axis_ancestor_or_self>());
10540 
10541 				case axis_attribute:
10542 					return step_do(c, stack, eval, axis_to_type<axis_attribute>());
10543 
10544 				case axis_child:
10545 					return step_do(c, stack, eval, axis_to_type<axis_child>());
10546 
10547 				case axis_descendant:
10548 					return step_do(c, stack, eval, axis_to_type<axis_descendant>());
10549 
10550 				case axis_descendant_or_self:
10551 					return step_do(c, stack, eval, axis_to_type<axis_descendant_or_self>());
10552 
10553 				case axis_following:
10554 					return step_do(c, stack, eval, axis_to_type<axis_following>());
10555 
10556 				case axis_following_sibling:
10557 					return step_do(c, stack, eval, axis_to_type<axis_following_sibling>());
10558 
10559 				case axis_namespace:
10560 					// namespaced axis is not supported
10561 					return xpath_node_set_raw();
10562 
10563 				case axis_parent:
10564 					return step_do(c, stack, eval, axis_to_type<axis_parent>());
10565 
10566 				case axis_preceding:
10567 					return step_do(c, stack, eval, axis_to_type<axis_preceding>());
10568 
10569 				case axis_preceding_sibling:
10570 					return step_do(c, stack, eval, axis_to_type<axis_preceding_sibling>());
10571 
10572 				case axis_self:
10573 					return step_do(c, stack, eval, axis_to_type<axis_self>());
10574 
10575 				default:
10576 					assert(!"Unknown axis");
10577 					return xpath_node_set_raw();
10578 				}
10579 			}
10580 
10581 			case ast_step_root:
10582 			{
10583 				assert(!_right); // root step can't have any predicates
10584 
10585 				xpath_node_set_raw ns;
10586 
10587 				ns.set_type(xpath_node_set::type_sorted);
10588 
10589 				if (c.n.node()) ns.push_back(c.n.node().root(), stack.result);
10590 				else if (c.n.attribute()) ns.push_back(c.n.parent().root(), stack.result);
10591 
10592 				return ns;
10593 			}
10594 
10595 			case ast_variable:
10596 			{
10597 				assert(_rettype == _data.variable->type());
10598 
10599 				if (_rettype == xpath_type_node_set)
10600 				{
10601 					const xpath_node_set& s = _data.variable->get_node_set();
10602 
10603 					xpath_node_set_raw ns;
10604 
10605 					ns.set_type(s.type());
10606 					ns.append(s.begin(), s.end(), stack.result);
10607 
10608 					return ns;
10609 				}
10610 
10611 				// fallthrough to type conversion
10612 			}
10613 
10614 			default:
10615 				assert(!"Wrong expression for return type node set");
10616 				return xpath_node_set_raw();
10617 			}
10618 		}
10619 
optimize(xpath_allocator * alloc)10620 		void optimize(xpath_allocator* alloc)
10621 		{
10622 			if (_left) _left->optimize(alloc);
10623 			if (_right) _right->optimize(alloc);
10624 			if (_next) _next->optimize(alloc);
10625 
10626 			optimize_self(alloc);
10627 		}
10628 
optimize_self(xpath_allocator * alloc)10629 		void optimize_self(xpath_allocator* alloc)
10630 		{
10631 			// Rewrite [position()=expr] with [expr]
10632 			// Note that this step has to go before classification to recognize [position()=1]
10633 			if ((_type == ast_filter || _type == ast_predicate) &&
10634 				_right->_type == ast_op_equal && _right->_left->_type == ast_func_position && _right->_right->_rettype == xpath_type_number)
10635 			{
10636 				_right = _right->_right;
10637 			}
10638 
10639 			// Classify filter/predicate ops to perform various optimizations during evaluation
10640 			if (_type == ast_filter || _type == ast_predicate)
10641 			{
10642 				assert(_test == predicate_default);
10643 
10644 				if (_right->_type == ast_number_constant && _right->_data.number == 1.0)
10645 					_test = predicate_constant_one;
10646 				else if (_right->_rettype == xpath_type_number && (_right->_type == ast_number_constant || _right->_type == ast_variable || _right->_type == ast_func_last))
10647 					_test = predicate_constant;
10648 				else if (_right->_rettype != xpath_type_number && _right->is_posinv_expr())
10649 					_test = predicate_posinv;
10650 			}
10651 
10652 			// Rewrite descendant-or-self::node()/child::foo with descendant::foo
10653 			// The former is a full form of //foo, the latter is much faster since it executes the node test immediately
10654 			// Do a similar kind of rewrite for self/descendant/descendant-or-self axes
10655 			// Note that we only rewrite positionally invariant steps (//foo[1] != /descendant::foo[1])
10656 			if (_type == ast_step && (_axis == axis_child || _axis == axis_self || _axis == axis_descendant || _axis == axis_descendant_or_self) && _left &&
10657 				_left->_type == ast_step && _left->_axis == axis_descendant_or_self && _left->_test == nodetest_type_node && !_left->_right &&
10658 				is_posinv_step())
10659 			{
10660 				if (_axis == axis_child || _axis == axis_descendant)
10661 					_axis = axis_descendant;
10662 				else
10663 					_axis = axis_descendant_or_self;
10664 
10665 				_left = _left->_left;
10666 			}
10667 
10668 			// Use optimized lookup table implementation for translate() with constant arguments
10669 			if (_type == ast_func_translate && _right->_type == ast_string_constant && _right->_next->_type == ast_string_constant)
10670 			{
10671 				unsigned char* table = translate_table_generate(alloc, _right->_data.string, _right->_next->_data.string);
10672 
10673 				if (table)
10674 				{
10675 					_type = ast_opt_translate_table;
10676 					_data.table = table;
10677 				}
10678 			}
10679 
10680 			// Use optimized path for @attr = 'value' or @attr = $value
10681 			if (_type == ast_op_equal &&
10682 				_left->_type == ast_step && _left->_axis == axis_attribute && _left->_test == nodetest_name && !_left->_left && !_left->_right &&
10683 				(_right->_type == ast_string_constant || (_right->_type == ast_variable && _right->_rettype == xpath_type_string)))
10684 			{
10685 				_type = ast_opt_compare_attribute;
10686 			}
10687 		}
10688 
is_posinv_expr() const10689 		bool is_posinv_expr() const
10690 		{
10691 			switch (_type)
10692 			{
10693 			case ast_func_position:
10694 			case ast_func_last:
10695 				return false;
10696 
10697 			case ast_string_constant:
10698 			case ast_number_constant:
10699 			case ast_variable:
10700 				return true;
10701 
10702 			case ast_step:
10703 			case ast_step_root:
10704 				return true;
10705 
10706 			case ast_predicate:
10707 			case ast_filter:
10708 				return true;
10709 
10710 			default:
10711 				if (_left && !_left->is_posinv_expr()) return false;
10712 
10713 				for (xpath_ast_node* n = _right; n; n = n->_next)
10714 					if (!n->is_posinv_expr()) return false;
10715 
10716 				return true;
10717 			}
10718 		}
10719 
is_posinv_step() const10720 		bool is_posinv_step() const
10721 		{
10722 			assert(_type == ast_step);
10723 
10724 			for (xpath_ast_node* n = _right; n; n = n->_next)
10725 			{
10726 				assert(n->_type == ast_predicate);
10727 
10728 				if (n->_test != predicate_posinv)
10729 					return false;
10730 			}
10731 
10732 			return true;
10733 		}
10734 
rettype() const10735 		xpath_value_type rettype() const
10736 		{
10737 			return static_cast<xpath_value_type>(_rettype);
10738 		}
10739 	};
10740 
10741 	struct xpath_parser
10742 	{
10743 		xpath_allocator* _alloc;
10744 		xpath_lexer _lexer;
10745 
10746 		const char_t* _query;
10747 		xpath_variable_set* _variables;
10748 
10749 		xpath_parse_result* _result;
10750 
10751 		char_t _scratch[32];
10752 
10753 	#ifdef PUGIXML_NO_EXCEPTIONS
10754 		jmp_buf _error_handler;
10755 	#endif
10756 
throw_errorxpath_parser10757 		void throw_error(const char* message)
10758 		{
10759 			_result->error = message;
10760 			_result->offset = _lexer.current_pos() - _query;
10761 
10762 		#ifdef PUGIXML_NO_EXCEPTIONS
10763 			longjmp(_error_handler, 1);
10764 		#else
10765 			throw xpath_exception(*_result);
10766 		#endif
10767 		}
10768 
throw_error_oomxpath_parser10769 		void throw_error_oom()
10770 		{
10771 		#ifdef PUGIXML_NO_EXCEPTIONS
10772 			throw_error("Out of memory");
10773 		#else
10774 			throw std::bad_alloc();
10775 		#endif
10776 		}
10777 
alloc_nodexpath_parser10778 		void* alloc_node()
10779 		{
10780 			void* result = _alloc->allocate_nothrow(sizeof(xpath_ast_node));
10781 
10782 			if (!result) throw_error_oom();
10783 
10784 			return result;
10785 		}
10786 
alloc_stringxpath_parser10787 		const char_t* alloc_string(const xpath_lexer_string& value)
10788 		{
10789 			if (value.begin)
10790 			{
10791 				size_t length = static_cast<size_t>(value.end - value.begin);
10792 
10793 				char_t* c = static_cast<char_t*>(_alloc->allocate_nothrow((length + 1) * sizeof(char_t)));
10794 				if (!c) throw_error_oom();
10795 				assert(c); // workaround for clang static analysis
10796 
10797 				memcpy(c, value.begin, length * sizeof(char_t));
10798 				c[length] = 0;
10799 
10800 				return c;
10801 			}
10802 			else return 0;
10803 		}
10804 
parse_function_helperxpath_parser10805 		xpath_ast_node* parse_function_helper(ast_type_t type0, ast_type_t type1, size_t argc, xpath_ast_node* args[2])
10806 		{
10807 			assert(argc <= 1);
10808 
10809 			if (argc == 1 && args[0]->rettype() != xpath_type_node_set) throw_error("Function has to be applied to node set");
10810 
10811 			return new (alloc_node()) xpath_ast_node(argc == 0 ? type0 : type1, xpath_type_string, args[0]);
10812 		}
10813 
parse_functionxpath_parser10814 		xpath_ast_node* parse_function(const xpath_lexer_string& name, size_t argc, xpath_ast_node* args[2])
10815 		{
10816 			switch (name.begin[0])
10817 			{
10818 			case 'b':
10819 				if (name == PUGIXML_TEXT("boolean") && argc == 1)
10820 					return new (alloc_node()) xpath_ast_node(ast_func_boolean, xpath_type_boolean, args[0]);
10821 
10822 				break;
10823 
10824 			case 'c':
10825 				if (name == PUGIXML_TEXT("count") && argc == 1)
10826 				{
10827 					if (args[0]->rettype() != xpath_type_node_set) throw_error("Function has to be applied to node set");
10828 					return new (alloc_node()) xpath_ast_node(ast_func_count, xpath_type_number, args[0]);
10829 				}
10830 				else if (name == PUGIXML_TEXT("contains") && argc == 2)
10831 					return new (alloc_node()) xpath_ast_node(ast_func_contains, xpath_type_boolean, args[0], args[1]);
10832 				else if (name == PUGIXML_TEXT("concat") && argc >= 2)
10833 					return new (alloc_node()) xpath_ast_node(ast_func_concat, xpath_type_string, args[0], args[1]);
10834 				else if (name == PUGIXML_TEXT("ceiling") && argc == 1)
10835 					return new (alloc_node()) xpath_ast_node(ast_func_ceiling, xpath_type_number, args[0]);
10836 
10837 				break;
10838 
10839 			case 'f':
10840 				if (name == PUGIXML_TEXT("false") && argc == 0)
10841 					return new (alloc_node()) xpath_ast_node(ast_func_false, xpath_type_boolean);
10842 				else if (name == PUGIXML_TEXT("floor") && argc == 1)
10843 					return new (alloc_node()) xpath_ast_node(ast_func_floor, xpath_type_number, args[0]);
10844 
10845 				break;
10846 
10847 			case 'i':
10848 				if (name == PUGIXML_TEXT("id") && argc == 1)
10849 					return new (alloc_node()) xpath_ast_node(ast_func_id, xpath_type_node_set, args[0]);
10850 
10851 				break;
10852 
10853 			case 'l':
10854 				if (name == PUGIXML_TEXT("last") && argc == 0)
10855 					return new (alloc_node()) xpath_ast_node(ast_func_last, xpath_type_number);
10856 				else if (name == PUGIXML_TEXT("lang") && argc == 1)
10857 					return new (alloc_node()) xpath_ast_node(ast_func_lang, xpath_type_boolean, args[0]);
10858 				else if (name == PUGIXML_TEXT("local-name") && argc <= 1)
10859 					return parse_function_helper(ast_func_local_name_0, ast_func_local_name_1, argc, args);
10860 
10861 				break;
10862 
10863 			case 'n':
10864 				if (name == PUGIXML_TEXT("name") && argc <= 1)
10865 					return parse_function_helper(ast_func_name_0, ast_func_name_1, argc, args);
10866 				else if (name == PUGIXML_TEXT("namespace-uri") && argc <= 1)
10867 					return parse_function_helper(ast_func_namespace_uri_0, ast_func_namespace_uri_1, argc, args);
10868 				else if (name == PUGIXML_TEXT("normalize-space") && argc <= 1)
10869 					return new (alloc_node()) xpath_ast_node(argc == 0 ? ast_func_normalize_space_0 : ast_func_normalize_space_1, xpath_type_string, args[0], args[1]);
10870 				else if (name == PUGIXML_TEXT("not") && argc == 1)
10871 					return new (alloc_node()) xpath_ast_node(ast_func_not, xpath_type_boolean, args[0]);
10872 				else if (name == PUGIXML_TEXT("number") && argc <= 1)
10873 					return new (alloc_node()) xpath_ast_node(argc == 0 ? ast_func_number_0 : ast_func_number_1, xpath_type_number, args[0]);
10874 
10875 				break;
10876 
10877 			case 'p':
10878 				if (name == PUGIXML_TEXT("position") && argc == 0)
10879 					return new (alloc_node()) xpath_ast_node(ast_func_position, xpath_type_number);
10880 
10881 				break;
10882 
10883 			case 'r':
10884 				if (name == PUGIXML_TEXT("round") && argc == 1)
10885 					return new (alloc_node()) xpath_ast_node(ast_func_round, xpath_type_number, args[0]);
10886 
10887 				break;
10888 
10889 			case 's':
10890 				if (name == PUGIXML_TEXT("string") && argc <= 1)
10891 					return new (alloc_node()) xpath_ast_node(argc == 0 ? ast_func_string_0 : ast_func_string_1, xpath_type_string, args[0]);
10892 				else if (name == PUGIXML_TEXT("string-length") && argc <= 1)
10893 					return new (alloc_node()) xpath_ast_node(argc == 0 ? ast_func_string_length_0 : ast_func_string_length_1, xpath_type_number, args[0]);
10894 				else if (name == PUGIXML_TEXT("starts-with") && argc == 2)
10895 					return new (alloc_node()) xpath_ast_node(ast_func_starts_with, xpath_type_boolean, args[0], args[1]);
10896 				else if (name == PUGIXML_TEXT("substring-before") && argc == 2)
10897 					return new (alloc_node()) xpath_ast_node(ast_func_substring_before, xpath_type_string, args[0], args[1]);
10898 				else if (name == PUGIXML_TEXT("substring-after") && argc == 2)
10899 					return new (alloc_node()) xpath_ast_node(ast_func_substring_after, xpath_type_string, args[0], args[1]);
10900 				else if (name == PUGIXML_TEXT("substring") && (argc == 2 || argc == 3))
10901 					return new (alloc_node()) xpath_ast_node(argc == 2 ? ast_func_substring_2 : ast_func_substring_3, xpath_type_string, args[0], args[1]);
10902 				else if (name == PUGIXML_TEXT("sum") && argc == 1)
10903 				{
10904 					if (args[0]->rettype() != xpath_type_node_set) throw_error("Function has to be applied to node set");
10905 					return new (alloc_node()) xpath_ast_node(ast_func_sum, xpath_type_number, args[0]);
10906 				}
10907 
10908 				break;
10909 
10910 			case 't':
10911 				if (name == PUGIXML_TEXT("translate") && argc == 3)
10912 					return new (alloc_node()) xpath_ast_node(ast_func_translate, xpath_type_string, args[0], args[1]);
10913 				else if (name == PUGIXML_TEXT("true") && argc == 0)
10914 					return new (alloc_node()) xpath_ast_node(ast_func_true, xpath_type_boolean);
10915 
10916 				break;
10917 
10918 			default:
10919 				break;
10920 			}
10921 
10922 			throw_error("Unrecognized function or wrong parameter count");
10923 
10924 			return 0;
10925 		}
10926 
parse_axis_namexpath_parser10927 		axis_t parse_axis_name(const xpath_lexer_string& name, bool& specified)
10928 		{
10929 			specified = true;
10930 
10931 			switch (name.begin[0])
10932 			{
10933 			case 'a':
10934 				if (name == PUGIXML_TEXT("ancestor"))
10935 					return axis_ancestor;
10936 				else if (name == PUGIXML_TEXT("ancestor-or-self"))
10937 					return axis_ancestor_or_self;
10938 				else if (name == PUGIXML_TEXT("attribute"))
10939 					return axis_attribute;
10940 
10941 				break;
10942 
10943 			case 'c':
10944 				if (name == PUGIXML_TEXT("child"))
10945 					return axis_child;
10946 
10947 				break;
10948 
10949 			case 'd':
10950 				if (name == PUGIXML_TEXT("descendant"))
10951 					return axis_descendant;
10952 				else if (name == PUGIXML_TEXT("descendant-or-self"))
10953 					return axis_descendant_or_self;
10954 
10955 				break;
10956 
10957 			case 'f':
10958 				if (name == PUGIXML_TEXT("following"))
10959 					return axis_following;
10960 				else if (name == PUGIXML_TEXT("following-sibling"))
10961 					return axis_following_sibling;
10962 
10963 				break;
10964 
10965 			case 'n':
10966 				if (name == PUGIXML_TEXT("namespace"))
10967 					return axis_namespace;
10968 
10969 				break;
10970 
10971 			case 'p':
10972 				if (name == PUGIXML_TEXT("parent"))
10973 					return axis_parent;
10974 				else if (name == PUGIXML_TEXT("preceding"))
10975 					return axis_preceding;
10976 				else if (name == PUGIXML_TEXT("preceding-sibling"))
10977 					return axis_preceding_sibling;
10978 
10979 				break;
10980 
10981 			case 's':
10982 				if (name == PUGIXML_TEXT("self"))
10983 					return axis_self;
10984 
10985 				break;
10986 
10987 			default:
10988 				break;
10989 			}
10990 
10991 			specified = false;
10992 			return axis_child;
10993 		}
10994 
parse_node_test_typexpath_parser10995 		nodetest_t parse_node_test_type(const xpath_lexer_string& name)
10996 		{
10997 			switch (name.begin[0])
10998 			{
10999 			case 'c':
11000 				if (name == PUGIXML_TEXT("comment"))
11001 					return nodetest_type_comment;
11002 
11003 				break;
11004 
11005 			case 'n':
11006 				if (name == PUGIXML_TEXT("node"))
11007 					return nodetest_type_node;
11008 
11009 				break;
11010 
11011 			case 'p':
11012 				if (name == PUGIXML_TEXT("processing-instruction"))
11013 					return nodetest_type_pi;
11014 
11015 				break;
11016 
11017 			case 't':
11018 				if (name == PUGIXML_TEXT("text"))
11019 					return nodetest_type_text;
11020 
11021 				break;
11022 
11023 			default:
11024 				break;
11025 			}
11026 
11027 			return nodetest_none;
11028 		}
11029 
11030 		// PrimaryExpr ::= VariableReference | '(' Expr ')' | Literal | Number | FunctionCall
parse_primary_expressionxpath_parser11031 		xpath_ast_node* parse_primary_expression()
11032 		{
11033 			switch (_lexer.current())
11034 			{
11035 			case lex_var_ref:
11036 			{
11037 				xpath_lexer_string name = _lexer.contents();
11038 
11039 				if (!_variables)
11040 					throw_error("Unknown variable: variable set is not provided");
11041 
11042 				xpath_variable* var = 0;
11043 				if (!get_variable_scratch(_scratch, _variables, name.begin, name.end, &var))
11044 					throw_error_oom();
11045 
11046 				if (!var)
11047 					throw_error("Unknown variable: variable set does not contain the given name");
11048 
11049 				_lexer.next();
11050 
11051 				return new (alloc_node()) xpath_ast_node(ast_variable, var->type(), var);
11052 			}
11053 
11054 			case lex_open_brace:
11055 			{
11056 				_lexer.next();
11057 
11058 				xpath_ast_node* n = parse_expression();
11059 
11060 				if (_lexer.current() != lex_close_brace)
11061 					throw_error("Unmatched braces");
11062 
11063 				_lexer.next();
11064 
11065 				return n;
11066 			}
11067 
11068 			case lex_quoted_string:
11069 			{
11070 				const char_t* value = alloc_string(_lexer.contents());
11071 
11072 				xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_string_constant, xpath_type_string, value);
11073 				_lexer.next();
11074 
11075 				return n;
11076 			}
11077 
11078 			case lex_number:
11079 			{
11080 				double value = 0;
11081 
11082 				if (!convert_string_to_number_scratch(_scratch, _lexer.contents().begin, _lexer.contents().end, &value))
11083 					throw_error_oom();
11084 
11085 				xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_number_constant, xpath_type_number, value);
11086 				_lexer.next();
11087 
11088 				return n;
11089 			}
11090 
11091 			case lex_string:
11092 			{
11093 				xpath_ast_node* args[2] = {0};
11094 				size_t argc = 0;
11095 
11096 				xpath_lexer_string function = _lexer.contents();
11097 				_lexer.next();
11098 
11099 				xpath_ast_node* last_arg = 0;
11100 
11101 				if (_lexer.current() != lex_open_brace)
11102 					throw_error("Unrecognized function call");
11103 				_lexer.next();
11104 
11105 				if (_lexer.current() != lex_close_brace)
11106 					args[argc++] = parse_expression();
11107 
11108 				while (_lexer.current() != lex_close_brace)
11109 				{
11110 					if (_lexer.current() != lex_comma)
11111 						throw_error("No comma between function arguments");
11112 					_lexer.next();
11113 
11114 					xpath_ast_node* n = parse_expression();
11115 
11116 					if (argc < 2) args[argc] = n;
11117 					else last_arg->set_next(n);
11118 
11119 					argc++;
11120 					last_arg = n;
11121 				}
11122 
11123 				_lexer.next();
11124 
11125 				return parse_function(function, argc, args);
11126 			}
11127 
11128 			default:
11129 				throw_error("Unrecognizable primary expression");
11130 
11131 				return 0;
11132 			}
11133 		}
11134 
11135 		// FilterExpr ::= PrimaryExpr | FilterExpr Predicate
11136 		// Predicate ::= '[' PredicateExpr ']'
11137 		// PredicateExpr ::= Expr
parse_filter_expressionxpath_parser11138 		xpath_ast_node* parse_filter_expression()
11139 		{
11140 			xpath_ast_node* n = parse_primary_expression();
11141 
11142 			while (_lexer.current() == lex_open_square_brace)
11143 			{
11144 				_lexer.next();
11145 
11146 				xpath_ast_node* expr = parse_expression();
11147 
11148 				if (n->rettype() != xpath_type_node_set) throw_error("Predicate has to be applied to node set");
11149 
11150 				n = new (alloc_node()) xpath_ast_node(ast_filter, n, expr, predicate_default);
11151 
11152 				if (_lexer.current() != lex_close_square_brace)
11153 					throw_error("Unmatched square brace");
11154 
11155 				_lexer.next();
11156 			}
11157 
11158 			return n;
11159 		}
11160 
11161 		// Step ::= AxisSpecifier NodeTest Predicate* | AbbreviatedStep
11162 		// AxisSpecifier ::= AxisName '::' | '@'?
11163 		// NodeTest ::= NameTest | NodeType '(' ')' | 'processing-instruction' '(' Literal ')'
11164 		// NameTest ::= '*' | NCName ':' '*' | QName
11165 		// AbbreviatedStep ::= '.' | '..'
parse_stepxpath_parser11166 		xpath_ast_node* parse_step(xpath_ast_node* set)
11167 		{
11168 			if (set && set->rettype() != xpath_type_node_set)
11169 				throw_error("Step has to be applied to node set");
11170 
11171 			bool axis_specified = false;
11172 			axis_t axis = axis_child; // implied child axis
11173 
11174 			if (_lexer.current() == lex_axis_attribute)
11175 			{
11176 				axis = axis_attribute;
11177 				axis_specified = true;
11178 
11179 				_lexer.next();
11180 			}
11181 			else if (_lexer.current() == lex_dot)
11182 			{
11183 				_lexer.next();
11184 
11185 				return new (alloc_node()) xpath_ast_node(ast_step, set, axis_self, nodetest_type_node, 0);
11186 			}
11187 			else if (_lexer.current() == lex_double_dot)
11188 			{
11189 				_lexer.next();
11190 
11191 				return new (alloc_node()) xpath_ast_node(ast_step, set, axis_parent, nodetest_type_node, 0);
11192 			}
11193 
11194 			nodetest_t nt_type = nodetest_none;
11195 			xpath_lexer_string nt_name;
11196 
11197 			if (_lexer.current() == lex_string)
11198 			{
11199 				// node name test
11200 				nt_name = _lexer.contents();
11201 				_lexer.next();
11202 
11203 				// was it an axis name?
11204 				if (_lexer.current() == lex_double_colon)
11205 				{
11206 					// parse axis name
11207 					if (axis_specified) throw_error("Two axis specifiers in one step");
11208 
11209 					axis = parse_axis_name(nt_name, axis_specified);
11210 
11211 					if (!axis_specified) throw_error("Unknown axis");
11212 
11213 					// read actual node test
11214 					_lexer.next();
11215 
11216 					if (_lexer.current() == lex_multiply)
11217 					{
11218 						nt_type = nodetest_all;
11219 						nt_name = xpath_lexer_string();
11220 						_lexer.next();
11221 					}
11222 					else if (_lexer.current() == lex_string)
11223 					{
11224 						nt_name = _lexer.contents();
11225 						_lexer.next();
11226 					}
11227 					else throw_error("Unrecognized node test");
11228 				}
11229 
11230 				if (nt_type == nodetest_none)
11231 				{
11232 					// node type test or processing-instruction
11233 					if (_lexer.current() == lex_open_brace)
11234 					{
11235 						_lexer.next();
11236 
11237 						if (_lexer.current() == lex_close_brace)
11238 						{
11239 							_lexer.next();
11240 
11241 							nt_type = parse_node_test_type(nt_name);
11242 
11243 							if (nt_type == nodetest_none) throw_error("Unrecognized node type");
11244 
11245 							nt_name = xpath_lexer_string();
11246 						}
11247 						else if (nt_name == PUGIXML_TEXT("processing-instruction"))
11248 						{
11249 							if (_lexer.current() != lex_quoted_string)
11250 								throw_error("Only literals are allowed as arguments to processing-instruction()");
11251 
11252 							nt_type = nodetest_pi;
11253 							nt_name = _lexer.contents();
11254 							_lexer.next();
11255 
11256 							if (_lexer.current() != lex_close_brace)
11257 								throw_error("Unmatched brace near processing-instruction()");
11258 							_lexer.next();
11259 						}
11260 						else
11261 							throw_error("Unmatched brace near node type test");
11262 
11263 					}
11264 					// QName or NCName:*
11265 					else
11266 					{
11267 						if (nt_name.end - nt_name.begin > 2 && nt_name.end[-2] == ':' && nt_name.end[-1] == '*') // NCName:*
11268 						{
11269 							nt_name.end--; // erase *
11270 
11271 							nt_type = nodetest_all_in_namespace;
11272 						}
11273 						else nt_type = nodetest_name;
11274 					}
11275 				}
11276 			}
11277 			else if (_lexer.current() == lex_multiply)
11278 			{
11279 				nt_type = nodetest_all;
11280 				_lexer.next();
11281 			}
11282 			else throw_error("Unrecognized node test");
11283 
11284 			xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_step, set, axis, nt_type, alloc_string(nt_name));
11285 
11286 			xpath_ast_node* last = 0;
11287 
11288 			while (_lexer.current() == lex_open_square_brace)
11289 			{
11290 				_lexer.next();
11291 
11292 				xpath_ast_node* expr = parse_expression();
11293 
11294 				xpath_ast_node* pred = new (alloc_node()) xpath_ast_node(ast_predicate, 0, expr, predicate_default);
11295 
11296 				if (_lexer.current() != lex_close_square_brace)
11297 					throw_error("Unmatched square brace");
11298 				_lexer.next();
11299 
11300 				if (last) last->set_next(pred);
11301 				else n->set_right(pred);
11302 
11303 				last = pred;
11304 			}
11305 
11306 			return n;
11307 		}
11308 
11309 		// RelativeLocationPath ::= Step | RelativeLocationPath '/' Step | RelativeLocationPath '//' Step
parse_relative_location_pathxpath_parser11310 		xpath_ast_node* parse_relative_location_path(xpath_ast_node* set)
11311 		{
11312 			xpath_ast_node* n = parse_step(set);
11313 
11314 			while (_lexer.current() == lex_slash || _lexer.current() == lex_double_slash)
11315 			{
11316 				lexeme_t l = _lexer.current();
11317 				_lexer.next();
11318 
11319 				if (l == lex_double_slash)
11320 					n = new (alloc_node()) xpath_ast_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0);
11321 
11322 				n = parse_step(n);
11323 			}
11324 
11325 			return n;
11326 		}
11327 
11328 		// LocationPath ::= RelativeLocationPath | AbsoluteLocationPath
11329 		// AbsoluteLocationPath ::= '/' RelativeLocationPath? | '//' RelativeLocationPath
parse_location_pathxpath_parser11330 		xpath_ast_node* parse_location_path()
11331 		{
11332 			if (_lexer.current() == lex_slash)
11333 			{
11334 				_lexer.next();
11335 
11336 				xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_step_root, xpath_type_node_set);
11337 
11338 				// relative location path can start from axis_attribute, dot, double_dot, multiply and string lexemes; any other lexeme means standalone root path
11339 				lexeme_t l = _lexer.current();
11340 
11341 				if (l == lex_string || l == lex_axis_attribute || l == lex_dot || l == lex_double_dot || l == lex_multiply)
11342 					return parse_relative_location_path(n);
11343 				else
11344 					return n;
11345 			}
11346 			else if (_lexer.current() == lex_double_slash)
11347 			{
11348 				_lexer.next();
11349 
11350 				xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_step_root, xpath_type_node_set);
11351 				n = new (alloc_node()) xpath_ast_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0);
11352 
11353 				return parse_relative_location_path(n);
11354 			}
11355 
11356 			// else clause moved outside of if because of bogus warning 'control may reach end of non-void function being inlined' in gcc 4.0.1
11357 			return parse_relative_location_path(0);
11358 		}
11359 
11360 		// PathExpr ::= LocationPath
11361 		//				| FilterExpr
11362 		//				| FilterExpr '/' RelativeLocationPath
11363 		//				| FilterExpr '//' RelativeLocationPath
11364 		// UnionExpr ::= PathExpr | UnionExpr '|' PathExpr
11365 		// UnaryExpr ::= UnionExpr | '-' UnaryExpr
parse_path_or_unary_expressionxpath_parser11366 		xpath_ast_node* parse_path_or_unary_expression()
11367 		{
11368 			// Clarification.
11369 			// PathExpr begins with either LocationPath or FilterExpr.
11370 			// FilterExpr begins with PrimaryExpr
11371 			// PrimaryExpr begins with '$' in case of it being a variable reference,
11372 			// '(' in case of it being an expression, string literal, number constant or
11373 			// function call.
11374 
11375 			if (_lexer.current() == lex_var_ref || _lexer.current() == lex_open_brace ||
11376 				_lexer.current() == lex_quoted_string || _lexer.current() == lex_number ||
11377 				_lexer.current() == lex_string)
11378 			{
11379 				if (_lexer.current() == lex_string)
11380 				{
11381 					// This is either a function call, or not - if not, we shall proceed with location path
11382 					const char_t* state = _lexer.state();
11383 
11384 					while (PUGI__IS_CHARTYPE(*state, ct_space)) ++state;
11385 
11386 					if (*state != '(') return parse_location_path();
11387 
11388 					// This looks like a function call; however this still can be a node-test. Check it.
11389 					if (parse_node_test_type(_lexer.contents()) != nodetest_none) return parse_location_path();
11390 				}
11391 
11392 				xpath_ast_node* n = parse_filter_expression();
11393 
11394 				if (_lexer.current() == lex_slash || _lexer.current() == lex_double_slash)
11395 				{
11396 					lexeme_t l = _lexer.current();
11397 					_lexer.next();
11398 
11399 					if (l == lex_double_slash)
11400 					{
11401 						if (n->rettype() != xpath_type_node_set) throw_error("Step has to be applied to node set");
11402 
11403 						n = new (alloc_node()) xpath_ast_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0);
11404 					}
11405 
11406 					// select from location path
11407 					return parse_relative_location_path(n);
11408 				}
11409 
11410 				return n;
11411 			}
11412 			else if (_lexer.current() == lex_minus)
11413 			{
11414 				_lexer.next();
11415 
11416 				// precedence 7+ - only parses union expressions
11417 				xpath_ast_node* expr = parse_expression_rec(parse_path_or_unary_expression(), 7);
11418 
11419 				return new (alloc_node()) xpath_ast_node(ast_op_negate, xpath_type_number, expr);
11420 			}
11421 			else
11422 				return parse_location_path();
11423 		}
11424 
11425 		struct binary_op_t
11426 		{
11427 			ast_type_t asttype;
11428 			xpath_value_type rettype;
11429 			int precedence;
11430 
binary_op_txpath_parser::binary_op_t11431 			binary_op_t(): asttype(ast_unknown), rettype(xpath_type_none), precedence(0)
11432 			{
11433 			}
11434 
binary_op_txpath_parser::binary_op_t11435 			binary_op_t(ast_type_t asttype_, xpath_value_type rettype_, int precedence_): asttype(asttype_), rettype(rettype_), precedence(precedence_)
11436 			{
11437 			}
11438 
parsexpath_parser::binary_op_t11439 			static binary_op_t parse(xpath_lexer& lexer)
11440 			{
11441 				switch (lexer.current())
11442 				{
11443 				case lex_string:
11444 					if (lexer.contents() == PUGIXML_TEXT("or"))
11445 						return binary_op_t(ast_op_or, xpath_type_boolean, 1);
11446 					else if (lexer.contents() == PUGIXML_TEXT("and"))
11447 						return binary_op_t(ast_op_and, xpath_type_boolean, 2);
11448 					else if (lexer.contents() == PUGIXML_TEXT("div"))
11449 						return binary_op_t(ast_op_divide, xpath_type_number, 6);
11450 					else if (lexer.contents() == PUGIXML_TEXT("mod"))
11451 						return binary_op_t(ast_op_mod, xpath_type_number, 6);
11452 					else
11453 						return binary_op_t();
11454 
11455 				case lex_equal:
11456 					return binary_op_t(ast_op_equal, xpath_type_boolean, 3);
11457 
11458 				case lex_not_equal:
11459 					return binary_op_t(ast_op_not_equal, xpath_type_boolean, 3);
11460 
11461 				case lex_less:
11462 					return binary_op_t(ast_op_less, xpath_type_boolean, 4);
11463 
11464 				case lex_greater:
11465 					return binary_op_t(ast_op_greater, xpath_type_boolean, 4);
11466 
11467 				case lex_less_or_equal:
11468 					return binary_op_t(ast_op_less_or_equal, xpath_type_boolean, 4);
11469 
11470 				case lex_greater_or_equal:
11471 					return binary_op_t(ast_op_greater_or_equal, xpath_type_boolean, 4);
11472 
11473 				case lex_plus:
11474 					return binary_op_t(ast_op_add, xpath_type_number, 5);
11475 
11476 				case lex_minus:
11477 					return binary_op_t(ast_op_subtract, xpath_type_number, 5);
11478 
11479 				case lex_multiply:
11480 					return binary_op_t(ast_op_multiply, xpath_type_number, 6);
11481 
11482 				case lex_union:
11483 					return binary_op_t(ast_op_union, xpath_type_node_set, 7);
11484 
11485 				default:
11486 					return binary_op_t();
11487 				}
11488 			}
11489 		};
11490 
parse_expression_recxpath_parser11491 		xpath_ast_node* parse_expression_rec(xpath_ast_node* lhs, int limit)
11492 		{
11493 			binary_op_t op = binary_op_t::parse(_lexer);
11494 
11495 			while (op.asttype != ast_unknown && op.precedence >= limit)
11496 			{
11497 				_lexer.next();
11498 
11499 				xpath_ast_node* rhs = parse_path_or_unary_expression();
11500 
11501 				binary_op_t nextop = binary_op_t::parse(_lexer);
11502 
11503 				while (nextop.asttype != ast_unknown && nextop.precedence > op.precedence)
11504 				{
11505 					rhs = parse_expression_rec(rhs, nextop.precedence);
11506 
11507 					nextop = binary_op_t::parse(_lexer);
11508 				}
11509 
11510 				if (op.asttype == ast_op_union && (lhs->rettype() != xpath_type_node_set || rhs->rettype() != xpath_type_node_set))
11511 					throw_error("Union operator has to be applied to node sets");
11512 
11513 				lhs = new (alloc_node()) xpath_ast_node(op.asttype, op.rettype, lhs, rhs);
11514 
11515 				op = binary_op_t::parse(_lexer);
11516 			}
11517 
11518 			return lhs;
11519 		}
11520 
11521 		// Expr ::= OrExpr
11522 		// OrExpr ::= AndExpr | OrExpr 'or' AndExpr
11523 		// AndExpr ::= EqualityExpr | AndExpr 'and' EqualityExpr
11524 		// EqualityExpr ::= RelationalExpr
11525 		//					| EqualityExpr '=' RelationalExpr
11526 		//					| EqualityExpr '!=' RelationalExpr
11527 		// RelationalExpr ::= AdditiveExpr
11528 		//					  | RelationalExpr '<' AdditiveExpr
11529 		//					  | RelationalExpr '>' AdditiveExpr
11530 		//					  | RelationalExpr '<=' AdditiveExpr
11531 		//					  | RelationalExpr '>=' AdditiveExpr
11532 		// AdditiveExpr ::= MultiplicativeExpr
11533 		//					| AdditiveExpr '+' MultiplicativeExpr
11534 		//					| AdditiveExpr '-' MultiplicativeExpr
11535 		// MultiplicativeExpr ::= UnaryExpr
11536 		//						  | MultiplicativeExpr '*' UnaryExpr
11537 		//						  | MultiplicativeExpr 'div' UnaryExpr
11538 		//						  | MultiplicativeExpr 'mod' UnaryExpr
parse_expressionxpath_parser11539 		xpath_ast_node* parse_expression()
11540 		{
11541 			return parse_expression_rec(parse_path_or_unary_expression(), 0);
11542 		}
11543 
xpath_parserxpath_parser11544 		xpath_parser(const char_t* query, xpath_variable_set* variables, xpath_allocator* alloc, xpath_parse_result* result): _alloc(alloc), _lexer(query), _query(query), _variables(variables), _result(result)
11545 		{
11546 		}
11547 
parsexpath_parser11548 		xpath_ast_node* parse()
11549 		{
11550 			xpath_ast_node* result = parse_expression();
11551 
11552 			if (_lexer.current() != lex_eof)
11553 			{
11554 				// there are still unparsed tokens left, error
11555 				throw_error("Incorrect query");
11556 			}
11557 
11558 			return result;
11559 		}
11560 
parsexpath_parser11561 		static xpath_ast_node* parse(const char_t* query, xpath_variable_set* variables, xpath_allocator* alloc, xpath_parse_result* result)
11562 		{
11563 			xpath_parser parser(query, variables, alloc, result);
11564 
11565 		#ifdef PUGIXML_NO_EXCEPTIONS
11566 			int error = setjmp(parser._error_handler);
11567 
11568 			return (error == 0) ? parser.parse() : 0;
11569 		#else
11570 			return parser.parse();
11571 		#endif
11572 		}
11573 	};
11574 
11575 	struct xpath_query_impl
11576 	{
createxpath_query_impl11577 		static xpath_query_impl* create()
11578 		{
11579 			void* memory = xml_memory::allocate(sizeof(xpath_query_impl));
11580 			if (!memory) return 0;
11581 
11582 			return new (memory) xpath_query_impl();
11583 		}
11584 
destroyxpath_query_impl11585 		static void destroy(xpath_query_impl* impl)
11586 		{
11587 			// free all allocated pages
11588 			impl->alloc.release();
11589 
11590 			// free allocator memory (with the first page)
11591 			xml_memory::deallocate(impl);
11592 		}
11593 
xpath_query_implxpath_query_impl11594 		xpath_query_impl(): root(0), alloc(&block)
11595 		{
11596 			block.next = 0;
11597 			block.capacity = sizeof(block.data);
11598 		}
11599 
11600 		xpath_ast_node* root;
11601 		xpath_allocator alloc;
11602 		xpath_memory_block block;
11603 	};
11604 
evaluate_string_impl(xpath_query_impl * impl,const xpath_node & n,xpath_stack_data & sd)11605 	PUGI__FN xpath_string evaluate_string_impl(xpath_query_impl* impl, const xpath_node& n, xpath_stack_data& sd)
11606 	{
11607 		if (!impl) return xpath_string();
11608 
11609 	#ifdef PUGIXML_NO_EXCEPTIONS
11610 		if (setjmp(sd.error_handler)) return xpath_string();
11611 	#endif
11612 
11613 		xpath_context c(n, 1, 1);
11614 
11615 		return impl->root->eval_string(c, sd.stack);
11616 	}
11617 
evaluate_node_set_prepare(xpath_query_impl * impl)11618 	PUGI__FN impl::xpath_ast_node* evaluate_node_set_prepare(xpath_query_impl* impl)
11619 	{
11620 		if (!impl) return 0;
11621 
11622 		if (impl->root->rettype() != xpath_type_node_set)
11623 		{
11624 		#ifdef PUGIXML_NO_EXCEPTIONS
11625 			return 0;
11626 		#else
11627 			xpath_parse_result res;
11628 			res.error = "Expression does not evaluate to node set";
11629 
11630 			throw xpath_exception(res);
11631 		#endif
11632 		}
11633 
11634 		return impl->root;
11635 	}
11636 PUGI__NS_END
11637 
11638 namespace pugi
11639 {
11640 #ifndef PUGIXML_NO_EXCEPTIONS
xpath_exception(const xpath_parse_result & result_)11641 	PUGI__FN xpath_exception::xpath_exception(const xpath_parse_result& result_): _result(result_)
11642 	{
11643 		assert(_result.error);
11644 	}
11645 
what() const11646 	PUGI__FN const char* xpath_exception::what() const throw()
11647 	{
11648 		return _result.error;
11649 	}
11650 
result() const11651 	PUGI__FN const xpath_parse_result& xpath_exception::result() const
11652 	{
11653 		return _result;
11654 	}
11655 #endif
11656 
xpath_node()11657 	PUGI__FN xpath_node::xpath_node()
11658 	{
11659 	}
11660 
xpath_node(const xml_node & node_)11661 	PUGI__FN xpath_node::xpath_node(const xml_node& node_): _node(node_)
11662 	{
11663 	}
11664 
xpath_node(const xml_attribute & attribute_,const xml_node & parent_)11665 	PUGI__FN xpath_node::xpath_node(const xml_attribute& attribute_, const xml_node& parent_): _node(attribute_ ? parent_ : xml_node()), _attribute(attribute_)
11666 	{
11667 	}
11668 
node() const11669 	PUGI__FN xml_node xpath_node::node() const
11670 	{
11671 		return _attribute ? xml_node() : _node;
11672 	}
11673 
attribute() const11674 	PUGI__FN xml_attribute xpath_node::attribute() const
11675 	{
11676 		return _attribute;
11677 	}
11678 
parent() const11679 	PUGI__FN xml_node xpath_node::parent() const
11680 	{
11681 		return _attribute ? _node : _node.parent();
11682 	}
11683 
unspecified_bool_xpath_node(xpath_node ***)11684 	PUGI__FN static void unspecified_bool_xpath_node(xpath_node***)
11685 	{
11686 	}
11687 
operator xpath_node::unspecified_bool_type() const11688 	PUGI__FN xpath_node::operator xpath_node::unspecified_bool_type() const
11689 	{
11690 		return (_node || _attribute) ? unspecified_bool_xpath_node : 0;
11691 	}
11692 
operator !() const11693 	PUGI__FN bool xpath_node::operator!() const
11694 	{
11695 		return !(_node || _attribute);
11696 	}
11697 
operator ==(const xpath_node & n) const11698 	PUGI__FN bool xpath_node::operator==(const xpath_node& n) const
11699 	{
11700 		return _node == n._node && _attribute == n._attribute;
11701 	}
11702 
operator !=(const xpath_node & n) const11703 	PUGI__FN bool xpath_node::operator!=(const xpath_node& n) const
11704 	{
11705 		return _node != n._node || _attribute != n._attribute;
11706 	}
11707 
11708 #ifdef __BORLANDC__
operator &&(const xpath_node & lhs,bool rhs)11709 	PUGI__FN bool operator&&(const xpath_node& lhs, bool rhs)
11710 	{
11711 		return (bool)lhs && rhs;
11712 	}
11713 
operator ||(const xpath_node & lhs,bool rhs)11714 	PUGI__FN bool operator||(const xpath_node& lhs, bool rhs)
11715 	{
11716 		return (bool)lhs || rhs;
11717 	}
11718 #endif
11719 
_assign(const_iterator begin_,const_iterator end_,type_t type_)11720 	PUGI__FN void xpath_node_set::_assign(const_iterator begin_, const_iterator end_, type_t type_)
11721 	{
11722 		assert(begin_ <= end_);
11723 
11724 		size_t size_ = static_cast<size_t>(end_ - begin_);
11725 
11726 		if (size_ <= 1)
11727 		{
11728 			// deallocate old buffer
11729 			if (_begin != &_storage) impl::xml_memory::deallocate(_begin);
11730 
11731 			// use internal buffer
11732 			if (begin_ != end_) _storage = *begin_;
11733 
11734 			_begin = &_storage;
11735 			_end = &_storage + size_;
11736 			_type = type_;
11737 		}
11738 		else
11739 		{
11740 			// make heap copy
11741 			xpath_node* storage = static_cast<xpath_node*>(impl::xml_memory::allocate(size_ * sizeof(xpath_node)));
11742 
11743 			if (!storage)
11744 			{
11745 			#ifdef PUGIXML_NO_EXCEPTIONS
11746 				return;
11747 			#else
11748 				throw std::bad_alloc();
11749 			#endif
11750 			}
11751 
11752 			memcpy(storage, begin_, size_ * sizeof(xpath_node));
11753 
11754 			// deallocate old buffer
11755 			if (_begin != &_storage) impl::xml_memory::deallocate(_begin);
11756 
11757 			// finalize
11758 			_begin = storage;
11759 			_end = storage + size_;
11760 			_type = type_;
11761 		}
11762 	}
11763 
11764 #if __cplusplus >= 201103
_move(xpath_node_set & rhs)11765 	PUGI__FN void xpath_node_set::_move(xpath_node_set& rhs)
11766 	{
11767 		_type = rhs._type;
11768 		_storage = rhs._storage;
11769 		_begin = (rhs._begin == &rhs._storage) ? &_storage : rhs._begin;
11770 		_end = _begin + (rhs._end - rhs._begin);
11771 
11772 		rhs._type = type_unsorted;
11773 		rhs._begin = &rhs._storage;
11774 		rhs._end = rhs._begin;
11775 	}
11776 #endif
11777 
xpath_node_set()11778 	PUGI__FN xpath_node_set::xpath_node_set(): _type(type_unsorted), _begin(&_storage), _end(&_storage)
11779 	{
11780 	}
11781 
xpath_node_set(const_iterator begin_,const_iterator end_,type_t type_)11782 	PUGI__FN xpath_node_set::xpath_node_set(const_iterator begin_, const_iterator end_, type_t type_): _type(type_unsorted), _begin(&_storage), _end(&_storage)
11783 	{
11784 		_assign(begin_, end_, type_);
11785 	}
11786 
~xpath_node_set()11787 	PUGI__FN xpath_node_set::~xpath_node_set()
11788 	{
11789 		if (_begin != &_storage)
11790 			impl::xml_memory::deallocate(_begin);
11791 	}
11792 
xpath_node_set(const xpath_node_set & ns)11793 	PUGI__FN xpath_node_set::xpath_node_set(const xpath_node_set& ns): _type(type_unsorted), _begin(&_storage), _end(&_storage)
11794 	{
11795 		_assign(ns._begin, ns._end, ns._type);
11796 	}
11797 
operator =(const xpath_node_set & ns)11798 	PUGI__FN xpath_node_set& xpath_node_set::operator=(const xpath_node_set& ns)
11799 	{
11800 		if (this == &ns) return *this;
11801 
11802 		_assign(ns._begin, ns._end, ns._type);
11803 
11804 		return *this;
11805 	}
11806 
11807 #if __cplusplus >= 201103
xpath_node_set(xpath_node_set && rhs)11808 	PUGI__FN xpath_node_set::xpath_node_set(xpath_node_set&& rhs): _type(type_unsorted), _begin(&_storage), _end(&_storage)
11809 	{
11810 		_move(rhs);
11811 	}
11812 
operator =(xpath_node_set && rhs)11813 	PUGI__FN xpath_node_set& xpath_node_set::operator=(xpath_node_set&& rhs)
11814 	{
11815 		if (this == &rhs) return *this;
11816 
11817 		if (_begin != &_storage)
11818 			impl::xml_memory::deallocate(_begin);
11819 
11820 		_move(rhs);
11821 
11822 		return *this;
11823 	}
11824 #endif
11825 
type() const11826 	PUGI__FN xpath_node_set::type_t xpath_node_set::type() const
11827 	{
11828 		return _type;
11829 	}
11830 
size() const11831 	PUGI__FN size_t xpath_node_set::size() const
11832 	{
11833 		return _end - _begin;
11834 	}
11835 
empty() const11836 	PUGI__FN bool xpath_node_set::empty() const
11837 	{
11838 		return _begin == _end;
11839 	}
11840 
operator [](size_t index) const11841 	PUGI__FN const xpath_node& xpath_node_set::operator[](size_t index) const
11842 	{
11843 		assert(index < size());
11844 		return _begin[index];
11845 	}
11846 
begin() const11847 	PUGI__FN xpath_node_set::const_iterator xpath_node_set::begin() const
11848 	{
11849 		return _begin;
11850 	}
11851 
end() const11852 	PUGI__FN xpath_node_set::const_iterator xpath_node_set::end() const
11853 	{
11854 		return _end;
11855 	}
11856 
sort(bool reverse)11857 	PUGI__FN void xpath_node_set::sort(bool reverse)
11858 	{
11859 		_type = impl::xpath_sort(_begin, _end, _type, reverse);
11860 	}
11861 
first() const11862 	PUGI__FN xpath_node xpath_node_set::first() const
11863 	{
11864 		return impl::xpath_first(_begin, _end, _type);
11865 	}
11866 
xpath_parse_result()11867 	PUGI__FN xpath_parse_result::xpath_parse_result(): error("Internal error"), offset(0)
11868 	{
11869 	}
11870 
operator bool() const11871 	PUGI__FN xpath_parse_result::operator bool() const
11872 	{
11873 		return error == 0;
11874 	}
11875 
description() const11876 	PUGI__FN const char* xpath_parse_result::description() const
11877 	{
11878 		return error ? error : "No error";
11879 	}
11880 
xpath_variable(xpath_value_type type_)11881 	PUGI__FN xpath_variable::xpath_variable(xpath_value_type type_): _type(type_), _next(0)
11882 	{
11883 	}
11884 
name() const11885 	PUGI__FN const char_t* xpath_variable::name() const
11886 	{
11887 		switch (_type)
11888 		{
11889 		case xpath_type_node_set:
11890 			return static_cast<const impl::xpath_variable_node_set*>(this)->name;
11891 
11892 		case xpath_type_number:
11893 			return static_cast<const impl::xpath_variable_number*>(this)->name;
11894 
11895 		case xpath_type_string:
11896 			return static_cast<const impl::xpath_variable_string*>(this)->name;
11897 
11898 		case xpath_type_boolean:
11899 			return static_cast<const impl::xpath_variable_boolean*>(this)->name;
11900 
11901 		default:
11902 			assert(!"Invalid variable type");
11903 			return 0;
11904 		}
11905 	}
11906 
type() const11907 	PUGI__FN xpath_value_type xpath_variable::type() const
11908 	{
11909 		return _type;
11910 	}
11911 
get_boolean() const11912 	PUGI__FN bool xpath_variable::get_boolean() const
11913 	{
11914 		return (_type == xpath_type_boolean) ? static_cast<const impl::xpath_variable_boolean*>(this)->value : false;
11915 	}
11916 
get_number() const11917 	PUGI__FN double xpath_variable::get_number() const
11918 	{
11919 		return (_type == xpath_type_number) ? static_cast<const impl::xpath_variable_number*>(this)->value : impl::gen_nan();
11920 	}
11921 
get_string() const11922 	PUGI__FN const char_t* xpath_variable::get_string() const
11923 	{
11924 		const char_t* value = (_type == xpath_type_string) ? static_cast<const impl::xpath_variable_string*>(this)->value : 0;
11925 		return value ? value : PUGIXML_TEXT("");
11926 	}
11927 
get_node_set() const11928 	PUGI__FN const xpath_node_set& xpath_variable::get_node_set() const
11929 	{
11930 		return (_type == xpath_type_node_set) ? static_cast<const impl::xpath_variable_node_set*>(this)->value : impl::dummy_node_set;
11931 	}
11932 
set(bool value)11933 	PUGI__FN bool xpath_variable::set(bool value)
11934 	{
11935 		if (_type != xpath_type_boolean) return false;
11936 
11937 		static_cast<impl::xpath_variable_boolean*>(this)->value = value;
11938 		return true;
11939 	}
11940 
set(double value)11941 	PUGI__FN bool xpath_variable::set(double value)
11942 	{
11943 		if (_type != xpath_type_number) return false;
11944 
11945 		static_cast<impl::xpath_variable_number*>(this)->value = value;
11946 		return true;
11947 	}
11948 
set(const char_t * value)11949 	PUGI__FN bool xpath_variable::set(const char_t* value)
11950 	{
11951 		if (_type != xpath_type_string) return false;
11952 
11953 		impl::xpath_variable_string* var = static_cast<impl::xpath_variable_string*>(this);
11954 
11955 		// duplicate string
11956 		size_t size = (impl::strlength(value) + 1) * sizeof(char_t);
11957 
11958 		char_t* copy = static_cast<char_t*>(impl::xml_memory::allocate(size));
11959 		if (!copy) return false;
11960 
11961 		memcpy(copy, value, size);
11962 
11963 		// replace old string
11964 		if (var->value) impl::xml_memory::deallocate(var->value);
11965 		var->value = copy;
11966 
11967 		return true;
11968 	}
11969 
set(const xpath_node_set & value)11970 	PUGI__FN bool xpath_variable::set(const xpath_node_set& value)
11971 	{
11972 		if (_type != xpath_type_node_set) return false;
11973 
11974 		static_cast<impl::xpath_variable_node_set*>(this)->value = value;
11975 		return true;
11976 	}
11977 
xpath_variable_set()11978 	PUGI__FN xpath_variable_set::xpath_variable_set()
11979 	{
11980 		for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
11981 			_data[i] = 0;
11982 	}
11983 
~xpath_variable_set()11984 	PUGI__FN xpath_variable_set::~xpath_variable_set()
11985 	{
11986 		for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
11987 			_destroy(_data[i]);
11988 	}
11989 
xpath_variable_set(const xpath_variable_set & rhs)11990 	PUGI__FN xpath_variable_set::xpath_variable_set(const xpath_variable_set& rhs)
11991 	{
11992 		for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
11993 			_data[i] = 0;
11994 
11995 		_assign(rhs);
11996 	}
11997 
operator =(const xpath_variable_set & rhs)11998 	PUGI__FN xpath_variable_set& xpath_variable_set::operator=(const xpath_variable_set& rhs)
11999 	{
12000 		if (this == &rhs) return *this;
12001 
12002 		_assign(rhs);
12003 
12004 		return *this;
12005 	}
12006 
12007 #if __cplusplus >= 201103
xpath_variable_set(xpath_variable_set && rhs)12008 	PUGI__FN xpath_variable_set::xpath_variable_set(xpath_variable_set&& rhs)
12009 	{
12010 		for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
12011 		{
12012 			_data[i] = rhs._data[i];
12013 			rhs._data[i] = 0;
12014 		}
12015 	}
12016 
operator =(xpath_variable_set && rhs)12017 	PUGI__FN xpath_variable_set& xpath_variable_set::operator=(xpath_variable_set&& rhs)
12018 	{
12019 		for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
12020 		{
12021 			_destroy(_data[i]);
12022 
12023 			_data[i] = rhs._data[i];
12024 			rhs._data[i] = 0;
12025 		}
12026 
12027 		return *this;
12028 	}
12029 #endif
12030 
_assign(const xpath_variable_set & rhs)12031 	PUGI__FN void xpath_variable_set::_assign(const xpath_variable_set& rhs)
12032 	{
12033 		xpath_variable_set temp;
12034 
12035 		for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
12036 			if (rhs._data[i] && !_clone(rhs._data[i], &temp._data[i]))
12037 				return;
12038 
12039 		_swap(temp);
12040 	}
12041 
_swap(xpath_variable_set & rhs)12042 	PUGI__FN void xpath_variable_set::_swap(xpath_variable_set& rhs)
12043 	{
12044 		for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
12045 		{
12046 			xpath_variable* chain = _data[i];
12047 
12048 			_data[i] = rhs._data[i];
12049 			rhs._data[i] = chain;
12050 		}
12051 	}
12052 
_find(const char_t * name) const12053 	PUGI__FN xpath_variable* xpath_variable_set::_find(const char_t* name) const
12054 	{
12055 		const size_t hash_size = sizeof(_data) / sizeof(_data[0]);
12056 		size_t hash = impl::hash_string(name) % hash_size;
12057 
12058 		// look for existing variable
12059 		for (xpath_variable* var = _data[hash]; var; var = var->_next)
12060 			if (impl::strequal(var->name(), name))
12061 				return var;
12062 
12063 		return 0;
12064 	}
12065 
_clone(xpath_variable * var,xpath_variable ** out_result)12066 	PUGI__FN bool xpath_variable_set::_clone(xpath_variable* var, xpath_variable** out_result)
12067 	{
12068 		xpath_variable* last = 0;
12069 
12070 		while (var)
12071 		{
12072 			// allocate storage for new variable
12073 			xpath_variable* nvar = impl::new_xpath_variable(var->_type, var->name());
12074 			if (!nvar) return false;
12075 
12076 			// link the variable to the result immediately to handle failures gracefully
12077 			if (last)
12078 				last->_next = nvar;
12079 			else
12080 				*out_result = nvar;
12081 
12082 			last = nvar;
12083 
12084 			// copy the value; this can fail due to out-of-memory conditions
12085 			if (!impl::copy_xpath_variable(nvar, var)) return false;
12086 
12087 			var = var->_next;
12088 		}
12089 
12090 		return true;
12091 	}
12092 
_destroy(xpath_variable * var)12093 	PUGI__FN void xpath_variable_set::_destroy(xpath_variable* var)
12094 	{
12095 		while (var)
12096 		{
12097 			xpath_variable* next = var->_next;
12098 
12099 			impl::delete_xpath_variable(var->_type, var);
12100 
12101 			var = next;
12102 		}
12103 	}
12104 
add(const char_t * name,xpath_value_type type)12105 	PUGI__FN xpath_variable* xpath_variable_set::add(const char_t* name, xpath_value_type type)
12106 	{
12107 		const size_t hash_size = sizeof(_data) / sizeof(_data[0]);
12108 		size_t hash = impl::hash_string(name) % hash_size;
12109 
12110 		// look for existing variable
12111 		for (xpath_variable* var = _data[hash]; var; var = var->_next)
12112 			if (impl::strequal(var->name(), name))
12113 				return var->type() == type ? var : 0;
12114 
12115 		// add new variable
12116 		xpath_variable* result = impl::new_xpath_variable(type, name);
12117 
12118 		if (result)
12119 		{
12120 			result->_next = _data[hash];
12121 
12122 			_data[hash] = result;
12123 		}
12124 
12125 		return result;
12126 	}
12127 
set(const char_t * name,bool value)12128 	PUGI__FN bool xpath_variable_set::set(const char_t* name, bool value)
12129 	{
12130 		xpath_variable* var = add(name, xpath_type_boolean);
12131 		return var ? var->set(value) : false;
12132 	}
12133 
set(const char_t * name,double value)12134 	PUGI__FN bool xpath_variable_set::set(const char_t* name, double value)
12135 	{
12136 		xpath_variable* var = add(name, xpath_type_number);
12137 		return var ? var->set(value) : false;
12138 	}
12139 
set(const char_t * name,const char_t * value)12140 	PUGI__FN bool xpath_variable_set::set(const char_t* name, const char_t* value)
12141 	{
12142 		xpath_variable* var = add(name, xpath_type_string);
12143 		return var ? var->set(value) : false;
12144 	}
12145 
set(const char_t * name,const xpath_node_set & value)12146 	PUGI__FN bool xpath_variable_set::set(const char_t* name, const xpath_node_set& value)
12147 	{
12148 		xpath_variable* var = add(name, xpath_type_node_set);
12149 		return var ? var->set(value) : false;
12150 	}
12151 
get(const char_t * name)12152 	PUGI__FN xpath_variable* xpath_variable_set::get(const char_t* name)
12153 	{
12154 		return _find(name);
12155 	}
12156 
get(const char_t * name) const12157 	PUGI__FN const xpath_variable* xpath_variable_set::get(const char_t* name) const
12158 	{
12159 		return _find(name);
12160 	}
12161 
xpath_query(const char_t * query,xpath_variable_set * variables)12162 	PUGI__FN xpath_query::xpath_query(const char_t* query, xpath_variable_set* variables): _impl(0)
12163 	{
12164 		impl::xpath_query_impl* qimpl = impl::xpath_query_impl::create();
12165 
12166 		if (!qimpl)
12167 		{
12168 		#ifdef PUGIXML_NO_EXCEPTIONS
12169 			_result.error = "Out of memory";
12170 		#else
12171 			throw std::bad_alloc();
12172 		#endif
12173 		}
12174 		else
12175 		{
12176 			using impl::auto_deleter; // MSVC7 workaround
12177 			auto_deleter<impl::xpath_query_impl> impl(qimpl, impl::xpath_query_impl::destroy);
12178 
12179 			qimpl->root = impl::xpath_parser::parse(query, variables, &qimpl->alloc, &_result);
12180 
12181 			if (qimpl->root)
12182 			{
12183 				qimpl->root->optimize(&qimpl->alloc);
12184 
12185 				_impl = impl.release();
12186 				_result.error = 0;
12187 			}
12188 		}
12189 	}
12190 
xpath_query()12191 	PUGI__FN xpath_query::xpath_query(): _impl(0)
12192 	{
12193 	}
12194 
~xpath_query()12195 	PUGI__FN xpath_query::~xpath_query()
12196 	{
12197 		if (_impl)
12198 			impl::xpath_query_impl::destroy(static_cast<impl::xpath_query_impl*>(_impl));
12199 	}
12200 
12201 #if __cplusplus >= 201103
xpath_query(xpath_query && rhs)12202 	PUGI__FN xpath_query::xpath_query(xpath_query&& rhs)
12203 	{
12204 		_impl = rhs._impl;
12205 		_result = rhs._result;
12206 		rhs._impl = 0;
12207 		rhs._result = xpath_parse_result();
12208 	}
12209 
operator =(xpath_query && rhs)12210 	PUGI__FN xpath_query& xpath_query::operator=(xpath_query&& rhs)
12211 	{
12212 		if (this == &rhs) return *this;
12213 
12214 		if (_impl)
12215 			impl::xpath_query_impl::destroy(static_cast<impl::xpath_query_impl*>(_impl));
12216 
12217 		_impl = rhs._impl;
12218 		_result = rhs._result;
12219 		rhs._impl = 0;
12220 		rhs._result = xpath_parse_result();
12221 
12222 		return *this;
12223 	}
12224 #endif
12225 
return_type() const12226 	PUGI__FN xpath_value_type xpath_query::return_type() const
12227 	{
12228 		if (!_impl) return xpath_type_none;
12229 
12230 		return static_cast<impl::xpath_query_impl*>(_impl)->root->rettype();
12231 	}
12232 
evaluate_boolean(const xpath_node & n) const12233 	PUGI__FN bool xpath_query::evaluate_boolean(const xpath_node& n) const
12234 	{
12235 		if (!_impl) return false;
12236 
12237 		impl::xpath_context c(n, 1, 1);
12238 		impl::xpath_stack_data sd;
12239 
12240 	#ifdef PUGIXML_NO_EXCEPTIONS
12241 		if (setjmp(sd.error_handler)) return false;
12242 	#endif
12243 
12244 		return static_cast<impl::xpath_query_impl*>(_impl)->root->eval_boolean(c, sd.stack);
12245 	}
12246 
evaluate_number(const xpath_node & n) const12247 	PUGI__FN double xpath_query::evaluate_number(const xpath_node& n) const
12248 	{
12249 		if (!_impl) return impl::gen_nan();
12250 
12251 		impl::xpath_context c(n, 1, 1);
12252 		impl::xpath_stack_data sd;
12253 
12254 	#ifdef PUGIXML_NO_EXCEPTIONS
12255 		if (setjmp(sd.error_handler)) return impl::gen_nan();
12256 	#endif
12257 
12258 		return static_cast<impl::xpath_query_impl*>(_impl)->root->eval_number(c, sd.stack);
12259 	}
12260 
12261 #ifndef PUGIXML_NO_STL
evaluate_string(const xpath_node & n) const12262 	PUGI__FN string_t xpath_query::evaluate_string(const xpath_node& n) const
12263 	{
12264 		impl::xpath_stack_data sd;
12265 
12266 		impl::xpath_string r = impl::evaluate_string_impl(static_cast<impl::xpath_query_impl*>(_impl), n, sd);
12267 
12268 		return string_t(r.c_str(), r.length());
12269 	}
12270 #endif
12271 
evaluate_string(char_t * buffer,size_t capacity,const xpath_node & n) const12272 	PUGI__FN size_t xpath_query::evaluate_string(char_t* buffer, size_t capacity, const xpath_node& n) const
12273 	{
12274 		impl::xpath_stack_data sd;
12275 
12276 		impl::xpath_string r = impl::evaluate_string_impl(static_cast<impl::xpath_query_impl*>(_impl), n, sd);
12277 
12278 		size_t full_size = r.length() + 1;
12279 
12280 		if (capacity > 0)
12281 		{
12282 			size_t size = (full_size < capacity) ? full_size : capacity;
12283 			assert(size > 0);
12284 
12285 			memcpy(buffer, r.c_str(), (size - 1) * sizeof(char_t));
12286 			buffer[size - 1] = 0;
12287 		}
12288 
12289 		return full_size;
12290 	}
12291 
evaluate_node_set(const xpath_node & n) const12292 	PUGI__FN xpath_node_set xpath_query::evaluate_node_set(const xpath_node& n) const
12293 	{
12294 		impl::xpath_ast_node* root = impl::evaluate_node_set_prepare(static_cast<impl::xpath_query_impl*>(_impl));
12295 		if (!root) return xpath_node_set();
12296 
12297 		impl::xpath_context c(n, 1, 1);
12298 		impl::xpath_stack_data sd;
12299 
12300 	#ifdef PUGIXML_NO_EXCEPTIONS
12301 		if (setjmp(sd.error_handler)) return xpath_node_set();
12302 	#endif
12303 
12304 		impl::xpath_node_set_raw r = root->eval_node_set(c, sd.stack, impl::nodeset_eval_all);
12305 
12306 		return xpath_node_set(r.begin(), r.end(), r.type());
12307 	}
12308 
evaluate_node(const xpath_node & n) const12309 	PUGI__FN xpath_node xpath_query::evaluate_node(const xpath_node& n) const
12310 	{
12311 		impl::xpath_ast_node* root = impl::evaluate_node_set_prepare(static_cast<impl::xpath_query_impl*>(_impl));
12312 		if (!root) return xpath_node();
12313 
12314 		impl::xpath_context c(n, 1, 1);
12315 		impl::xpath_stack_data sd;
12316 
12317 	#ifdef PUGIXML_NO_EXCEPTIONS
12318 		if (setjmp(sd.error_handler)) return xpath_node();
12319 	#endif
12320 
12321 		impl::xpath_node_set_raw r = root->eval_node_set(c, sd.stack, impl::nodeset_eval_first);
12322 
12323 		return r.first();
12324 	}
12325 
result() const12326 	PUGI__FN const xpath_parse_result& xpath_query::result() const
12327 	{
12328 		return _result;
12329 	}
12330 
unspecified_bool_xpath_query(xpath_query ***)12331 	PUGI__FN static void unspecified_bool_xpath_query(xpath_query***)
12332 	{
12333 	}
12334 
operator xpath_query::unspecified_bool_type() const12335 	PUGI__FN xpath_query::operator xpath_query::unspecified_bool_type() const
12336 	{
12337 		return _impl ? unspecified_bool_xpath_query : 0;
12338 	}
12339 
operator !() const12340 	PUGI__FN bool xpath_query::operator!() const
12341 	{
12342 		return !_impl;
12343 	}
12344 
select_node(const char_t * query,xpath_variable_set * variables) const12345 	PUGI__FN xpath_node xml_node::select_node(const char_t* query, xpath_variable_set* variables) const
12346 	{
12347 		xpath_query q(query, variables);
12348 		return select_node(q);
12349 	}
12350 
select_node(const xpath_query & query) const12351 	PUGI__FN xpath_node xml_node::select_node(const xpath_query& query) const
12352 	{
12353 		return query.evaluate_node(*this);
12354 	}
12355 
select_nodes(const char_t * query,xpath_variable_set * variables) const12356 	PUGI__FN xpath_node_set xml_node::select_nodes(const char_t* query, xpath_variable_set* variables) const
12357 	{
12358 		xpath_query q(query, variables);
12359 		return select_nodes(q);
12360 	}
12361 
select_nodes(const xpath_query & query) const12362 	PUGI__FN xpath_node_set xml_node::select_nodes(const xpath_query& query) const
12363 	{
12364 		return query.evaluate_node_set(*this);
12365 	}
12366 
select_single_node(const char_t * query,xpath_variable_set * variables) const12367 	PUGI__FN xpath_node xml_node::select_single_node(const char_t* query, xpath_variable_set* variables) const
12368 	{
12369 		xpath_query q(query, variables);
12370 		return select_single_node(q);
12371 	}
12372 
select_single_node(const xpath_query & query) const12373 	PUGI__FN xpath_node xml_node::select_single_node(const xpath_query& query) const
12374 	{
12375 		return query.evaluate_node(*this);
12376 	}
12377 }
12378 
12379 #endif
12380 
12381 #ifdef __BORLANDC__
12382 #	pragma option pop
12383 #endif
12384 
12385 // Intel C++ does not properly keep warning state for function templates,
12386 // so popping warning state at the end of translation unit leads to warnings in the middle.
12387 #if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
12388 #	pragma warning(pop)
12389 #endif
12390 
12391 // Undefine all local macros (makes sure we're not leaking macros in header-only mode)
12392 #undef PUGI__NO_INLINE
12393 #undef PUGI__UNLIKELY
12394 #undef PUGI__STATIC_ASSERT
12395 #undef PUGI__DMC_VOLATILE
12396 #undef PUGI__MSVC_CRT_VERSION
12397 #undef PUGI__NS_BEGIN
12398 #undef PUGI__NS_END
12399 #undef PUGI__FN
12400 #undef PUGI__FN_NO_INLINE
12401 #undef PUGI__GETPAGE_IMPL
12402 #undef PUGI__GETPAGE
12403 #undef PUGI__NODETYPE
12404 #undef PUGI__IS_CHARTYPE_IMPL
12405 #undef PUGI__IS_CHARTYPE
12406 #undef PUGI__IS_CHARTYPEX
12407 #undef PUGI__ENDSWITH
12408 #undef PUGI__SKIPWS
12409 #undef PUGI__OPTSET
12410 #undef PUGI__PUSHNODE
12411 #undef PUGI__POPNODE
12412 #undef PUGI__SCANFOR
12413 #undef PUGI__SCANWHILE
12414 #undef PUGI__SCANWHILE_UNROLL
12415 #undef PUGI__ENDSEG
12416 #undef PUGI__THROW_ERROR
12417 #undef PUGI__CHECK_ERROR
12418 
12419 #endif
12420 
12421 /**
12422  * Copyright (c) 2006-2015 Arseny Kapoulkine
12423  *
12424  * Permission is hereby granted, free of charge, to any person
12425  * obtaining a copy of this software and associated documentation
12426  * files (the "Software"), to deal in the Software without
12427  * restriction, including without limitation the rights to use,
12428  * copy, modify, merge, publish, distribute, sublicense, and/or sell
12429  * copies of the Software, and to permit persons to whom the
12430  * Software is furnished to do so, subject to the following
12431  * conditions:
12432  *
12433  * The above copyright notice and this permission notice shall be
12434  * included in all copies or substantial portions of the Software.
12435  *
12436  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
12437  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
12438  * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
12439  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
12440  * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
12441  * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
12442  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
12443  * OTHER DEALINGS IN THE SOFTWARE.
12444  */
12445