1 /** 2 * pugixml parser - version 1.11 3 * -------------------------------------------------------- 4 * Copyright (C) 2006-2020, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com) 5 * Report bugs and download new versions at https://pugixml.org/ 6 * 7 * This library is distributed under the MIT License. See notice at the end 8 * of this file. 9 * 10 * This work is based on the pugxml parser, which is: 11 * Copyright (C) 2003, by Kristen Wegner (kristen@tima.net) 12 */ 13 14 #ifndef SOURCE_PUGIXML_CPP 15 #define SOURCE_PUGIXML_CPP 16 17 #include "pugixml.hpp" 18 19 #include <stdlib.h> 20 #include <stdio.h> 21 #include <string.h> 22 #include <assert.h> 23 #include <limits.h> 24 25 #ifdef PUGIXML_WCHAR_MODE 26 # include <wchar.h> 27 #endif 28 29 #ifndef PUGIXML_NO_XPATH 30 # include <math.h> 31 # include <float.h> 32 #endif 33 34 #ifndef PUGIXML_NO_STL 35 # include <istream> 36 # include <ostream> 37 # include <string> 38 #endif 39 40 // For placement new 41 #include <new> 42 43 #ifdef _MSC_VER 44 # pragma warning(push) 45 # pragma warning(disable: 4127) // conditional expression is constant 46 # pragma warning(disable: 4324) // structure was padded due to __declspec(align()) 47 # pragma warning(disable: 4702) // unreachable code 48 # pragma warning(disable: 4996) // this function or variable may be unsafe 49 #endif 50 51 #if defined(_MSC_VER) && defined(__c2__) 52 # pragma clang diagnostic push 53 # pragma clang diagnostic ignored "-Wdeprecated" // this function or variable may be unsafe 54 #endif 55 56 #ifdef __INTEL_COMPILER 57 # pragma warning(disable: 177) // function was declared but never referenced 58 # pragma warning(disable: 279) // controlling expression is constant 59 # pragma warning(disable: 1478 1786) // function was declared "deprecated" 60 # pragma warning(disable: 1684) // conversion from pointer to same-sized integral type 61 #endif 62 63 #if defined(__BORLANDC__) && defined(PUGIXML_HEADER_ONLY) 64 # pragma warn -8080 // symbol is declared but never used; disabling this inside push/pop bracket does not make the warning go away 65 #endif 66 67 #ifdef __BORLANDC__ 68 # pragma option push 69 # pragma warn -8008 // condition is always false 70 # pragma warn -8066 // unreachable code 71 #endif 72 73 #ifdef __SNC__ 74 // Using diag_push/diag_pop does not disable the warnings inside templates due to a compiler bug 75 # pragma diag_suppress=178 // function was declared but never referenced 76 # pragma diag_suppress=237 // controlling expression is constant 77 #endif 78 79 #ifdef __TI_COMPILER_VERSION__ 80 # pragma diag_suppress 179 // function was declared but never referenced 81 #endif 82 83 // Inlining controls 84 #if defined(_MSC_VER) && _MSC_VER >= 1300 85 # define PUGI__NO_INLINE __declspec(noinline) 86 #elif defined(__GNUC__) 87 # define PUGI__NO_INLINE __attribute__((noinline)) 88 #else 89 # define PUGI__NO_INLINE 90 #endif 91 92 // Branch weight controls 93 #if defined(__GNUC__) && !defined(__c2__) 94 # define PUGI__UNLIKELY(cond) __builtin_expect(cond, 0) 95 #else 96 # define PUGI__UNLIKELY(cond) (cond) 97 #endif 98 99 // Simple static assertion 100 #define PUGI__STATIC_ASSERT(cond) { static const char condition_failed[(cond) ? 1 : -1] = {0}; (void)condition_failed[0]; } 101 102 // Digital Mars C++ bug workaround for passing char loaded from memory via stack 103 #ifdef __DMC__ 104 # define PUGI__DMC_VOLATILE volatile 105 #else 106 # define PUGI__DMC_VOLATILE 107 #endif 108 109 // Integer sanitizer workaround; we only apply this for clang since gcc8 has no_sanitize but not unsigned-integer-overflow and produces "attribute directive ignored" warnings 110 #if defined(__clang__) && defined(__has_attribute) 111 # if __has_attribute(no_sanitize) 112 # define PUGI__UNSIGNED_OVERFLOW __attribute__((no_sanitize("unsigned-integer-overflow"))) 113 # else 114 # define PUGI__UNSIGNED_OVERFLOW 115 # endif 116 #else 117 # define PUGI__UNSIGNED_OVERFLOW 118 #endif 119 120 // Borland C++ bug workaround for not defining ::memcpy depending on header include order (can't always use std::memcpy because some compilers don't have it at all) 121 #if defined(__BORLANDC__) && !defined(__MEM_H_USING_LIST) 122 using std::memcpy; 123 using std::memmove; 124 using std::memset; 125 #endif 126 127 // Some MinGW/GCC versions have headers that erroneously omit LLONG_MIN/LLONG_MAX/ULLONG_MAX definitions from limits.h in some configurations 128 #if defined(PUGIXML_HAS_LONG_LONG) && defined(__GNUC__) && !defined(LLONG_MAX) && !defined(LLONG_MIN) && !defined(ULLONG_MAX) 129 # define LLONG_MIN (-LLONG_MAX - 1LL) 130 # define LLONG_MAX __LONG_LONG_MAX__ 131 # define ULLONG_MAX (LLONG_MAX * 2ULL + 1ULL) 132 #endif 133 134 // In some environments MSVC is a compiler but the CRT lacks certain MSVC-specific features 135 #if defined(_MSC_VER) && !defined(__S3E__) 136 # define PUGI__MSVC_CRT_VERSION _MSC_VER 137 #endif 138 139 // Not all platforms have snprintf; we define a wrapper that uses snprintf if possible. This only works with buffers with a known size. 140 #if __cplusplus >= 201103 141 # define PUGI__SNPRINTF(buf, ...) snprintf(buf, sizeof(buf), __VA_ARGS__) 142 #elif defined(PUGI__MSVC_CRT_VERSION) && PUGI__MSVC_CRT_VERSION >= 1400 143 # define PUGI__SNPRINTF(buf, ...) _snprintf_s(buf, _countof(buf), _TRUNCATE, __VA_ARGS__) 144 #else 145 # define PUGI__SNPRINTF sprintf 146 #endif 147 148 // We put implementation details into an anonymous namespace in source mode, but have to keep it in non-anonymous namespace in header-only mode to prevent binary bloat. 149 #ifdef PUGIXML_HEADER_ONLY 150 # define PUGI__NS_BEGIN namespace pugi { namespace impl { 151 # define PUGI__NS_END } } 152 # define PUGI__FN inline 153 # define PUGI__FN_NO_INLINE inline 154 #else 155 # if defined(_MSC_VER) && _MSC_VER < 1300 // MSVC6 seems to have an amusing bug with anonymous namespaces inside namespaces 156 # define PUGI__NS_BEGIN namespace pugi { namespace impl { 157 # define PUGI__NS_END } } 158 # else 159 # define PUGI__NS_BEGIN namespace pugi { namespace impl { namespace { 160 # define PUGI__NS_END } } } 161 # endif 162 # define PUGI__FN 163 # define PUGI__FN_NO_INLINE PUGI__NO_INLINE 164 #endif 165 166 // uintptr_t 167 #if (defined(_MSC_VER) && _MSC_VER < 1600) || (defined(__BORLANDC__) && __BORLANDC__ < 0x561) 168 namespace pugi 169 { 170 # ifndef _UINTPTR_T_DEFINED 171 typedef size_t uintptr_t; 172 # endif 173 174 typedef unsigned __int8 uint8_t; 175 typedef unsigned __int16 uint16_t; 176 typedef unsigned __int32 uint32_t; 177 } 178 #else 179 # include <stdint.h> 180 #endif 181 182 // Memory allocation 183 PUGI__NS_BEGIN default_allocate(size_t size)184 PUGI__FN void* default_allocate(size_t size) 185 { 186 return malloc(size); 187 } 188 default_deallocate(void * ptr)189 PUGI__FN void default_deallocate(void* ptr) 190 { 191 free(ptr); 192 } 193 194 template <typename T> 195 struct xml_memory_management_function_storage 196 { 197 static allocation_function allocate; 198 static deallocation_function deallocate; 199 }; 200 201 // Global allocation functions are stored in class statics so that in header mode linker deduplicates them 202 // Without a template<> we'll get multiple definitions of the same static 203 template <typename T> allocation_function xml_memory_management_function_storage<T>::allocate = default_allocate; 204 template <typename T> deallocation_function xml_memory_management_function_storage<T>::deallocate = default_deallocate; 205 206 typedef xml_memory_management_function_storage<int> xml_memory; 207 PUGI__NS_END 208 209 // String utilities 210 PUGI__NS_BEGIN 211 // Get string length strlength(const char_t * s)212 PUGI__FN size_t strlength(const char_t* s) 213 { 214 assert(s); 215 216 #ifdef PUGIXML_WCHAR_MODE 217 return wcslen(s); 218 #else 219 return strlen(s); 220 #endif 221 } 222 223 // Compare two strings strequal(const char_t * src,const char_t * dst)224 PUGI__FN bool strequal(const char_t* src, const char_t* dst) 225 { 226 assert(src && dst); 227 228 #ifdef PUGIXML_WCHAR_MODE 229 return wcscmp(src, dst) == 0; 230 #else 231 return strcmp(src, dst) == 0; 232 #endif 233 } 234 235 // Compare lhs with [rhs_begin, rhs_end) strequalrange(const char_t * lhs,const char_t * rhs,size_t count)236 PUGI__FN bool strequalrange(const char_t* lhs, const char_t* rhs, size_t count) 237 { 238 for (size_t i = 0; i < count; ++i) 239 if (lhs[i] != rhs[i]) 240 return false; 241 242 return lhs[count] == 0; 243 } 244 245 // Get length of wide string, even if CRT lacks wide character support strlength_wide(const wchar_t * s)246 PUGI__FN size_t strlength_wide(const wchar_t* s) 247 { 248 assert(s); 249 250 #ifdef PUGIXML_WCHAR_MODE 251 return wcslen(s); 252 #else 253 const wchar_t* end = s; 254 while (*end) end++; 255 return static_cast<size_t>(end - s); 256 #endif 257 } 258 PUGI__NS_END 259 260 // auto_ptr-like object for exception recovery 261 PUGI__NS_BEGIN 262 template <typename T> struct auto_deleter 263 { 264 typedef void (*D)(T*); 265 266 T* data; 267 D deleter; 268 auto_deleterauto_deleter269 auto_deleter(T* data_, D deleter_): data(data_), deleter(deleter_) 270 { 271 } 272 ~auto_deleterauto_deleter273 ~auto_deleter() 274 { 275 if (data) deleter(data); 276 } 277 releaseauto_deleter278 T* release() 279 { 280 T* result = data; 281 data = 0; 282 return result; 283 } 284 }; 285 PUGI__NS_END 286 287 #ifdef PUGIXML_COMPACT 288 PUGI__NS_BEGIN 289 class compact_hash_table 290 { 291 public: compact_hash_table()292 compact_hash_table(): _items(0), _capacity(0), _count(0) 293 { 294 } 295 clear()296 void clear() 297 { 298 if (_items) 299 { 300 xml_memory::deallocate(_items); 301 _items = 0; 302 _capacity = 0; 303 _count = 0; 304 } 305 } 306 find(const void * key)307 void* find(const void* key) 308 { 309 if (_capacity == 0) return 0; 310 311 item_t* item = get_item(key); 312 assert(item); 313 assert(item->key == key || (item->key == 0 && item->value == 0)); 314 315 return item->value; 316 } 317 insert(const void * key,void * value)318 void insert(const void* key, void* value) 319 { 320 assert(_capacity != 0 && _count < _capacity - _capacity / 4); 321 322 item_t* item = get_item(key); 323 assert(item); 324 325 if (item->key == 0) 326 { 327 _count++; 328 item->key = key; 329 } 330 331 item->value = value; 332 } 333 reserve(size_t extra=16)334 bool reserve(size_t extra = 16) 335 { 336 if (_count + extra >= _capacity - _capacity / 4) 337 return rehash(_count + extra); 338 339 return true; 340 } 341 342 private: 343 struct item_t 344 { 345 const void* key; 346 void* value; 347 }; 348 349 item_t* _items; 350 size_t _capacity; 351 352 size_t _count; 353 354 bool rehash(size_t count); 355 get_item(const void * key)356 item_t* get_item(const void* key) 357 { 358 assert(key); 359 assert(_capacity > 0); 360 361 size_t hashmod = _capacity - 1; 362 size_t bucket = hash(key) & hashmod; 363 364 for (size_t probe = 0; probe <= hashmod; ++probe) 365 { 366 item_t& probe_item = _items[bucket]; 367 368 if (probe_item.key == key || probe_item.key == 0) 369 return &probe_item; 370 371 // hash collision, quadratic probing 372 bucket = (bucket + probe + 1) & hashmod; 373 } 374 375 assert(false && "Hash table is full"); // unreachable 376 return 0; 377 } 378 hash(const void * key)379 static PUGI__UNSIGNED_OVERFLOW unsigned int hash(const void* key) 380 { 381 unsigned int h = static_cast<unsigned int>(reinterpret_cast<uintptr_t>(key) & 0xffffffff); 382 383 // MurmurHash3 32-bit finalizer 384 h ^= h >> 16; 385 h *= 0x85ebca6bu; 386 h ^= h >> 13; 387 h *= 0xc2b2ae35u; 388 h ^= h >> 16; 389 390 return h; 391 } 392 }; 393 rehash(size_t count)394 PUGI__FN_NO_INLINE bool compact_hash_table::rehash(size_t count) 395 { 396 size_t capacity = 32; 397 while (count >= capacity - capacity / 4) 398 capacity *= 2; 399 400 compact_hash_table rt; 401 rt._capacity = capacity; 402 rt._items = static_cast<item_t*>(xml_memory::allocate(sizeof(item_t) * capacity)); 403 404 if (!rt._items) 405 return false; 406 407 memset(rt._items, 0, sizeof(item_t) * capacity); 408 409 for (size_t i = 0; i < _capacity; ++i) 410 if (_items[i].key) 411 rt.insert(_items[i].key, _items[i].value); 412 413 if (_items) 414 xml_memory::deallocate(_items); 415 416 _capacity = capacity; 417 _items = rt._items; 418 419 assert(_count == rt._count); 420 421 return true; 422 } 423 424 PUGI__NS_END 425 #endif 426 427 PUGI__NS_BEGIN 428 #ifdef PUGIXML_COMPACT 429 static const uintptr_t xml_memory_block_alignment = 4; 430 #else 431 static const uintptr_t xml_memory_block_alignment = sizeof(void*); 432 #endif 433 434 // extra metadata bits 435 static const uintptr_t xml_memory_page_contents_shared_mask = 64; 436 static const uintptr_t xml_memory_page_name_allocated_mask = 32; 437 static const uintptr_t xml_memory_page_value_allocated_mask = 16; 438 static const uintptr_t xml_memory_page_type_mask = 15; 439 440 // combined masks for string uniqueness 441 static const uintptr_t xml_memory_page_name_allocated_or_shared_mask = xml_memory_page_name_allocated_mask | xml_memory_page_contents_shared_mask; 442 static const uintptr_t xml_memory_page_value_allocated_or_shared_mask = xml_memory_page_value_allocated_mask | xml_memory_page_contents_shared_mask; 443 444 #ifdef PUGIXML_COMPACT 445 #define PUGI__GETHEADER_IMPL(object, page, flags) // unused 446 #define PUGI__GETPAGE_IMPL(header) (header).get_page() 447 #else 448 #define PUGI__GETHEADER_IMPL(object, page, flags) (((reinterpret_cast<char*>(object) - reinterpret_cast<char*>(page)) << 8) | (flags)) 449 // this macro casts pointers through void* to avoid 'cast increases required alignment of target type' warnings 450 #define PUGI__GETPAGE_IMPL(header) static_cast<impl::xml_memory_page*>(const_cast<void*>(static_cast<const void*>(reinterpret_cast<const char*>(&header) - (header >> 8)))) 451 #endif 452 453 #define PUGI__GETPAGE(n) PUGI__GETPAGE_IMPL((n)->header) 454 #define PUGI__NODETYPE(n) static_cast<xml_node_type>((n)->header & impl::xml_memory_page_type_mask) 455 456 struct xml_allocator; 457 458 struct xml_memory_page 459 { constructxml_memory_page460 static xml_memory_page* construct(void* memory) 461 { 462 xml_memory_page* result = static_cast<xml_memory_page*>(memory); 463 464 result->allocator = 0; 465 result->prev = 0; 466 result->next = 0; 467 result->busy_size = 0; 468 result->freed_size = 0; 469 470 #ifdef PUGIXML_COMPACT 471 result->compact_string_base = 0; 472 result->compact_shared_parent = 0; 473 result->compact_page_marker = 0; 474 #endif 475 476 return result; 477 } 478 479 xml_allocator* allocator; 480 481 xml_memory_page* prev; 482 xml_memory_page* next; 483 484 size_t busy_size; 485 size_t freed_size; 486 487 #ifdef PUGIXML_COMPACT 488 char_t* compact_string_base; 489 void* compact_shared_parent; 490 uint32_t* compact_page_marker; 491 #endif 492 }; 493 494 static const size_t xml_memory_page_size = 495 #ifdef PUGIXML_MEMORY_PAGE_SIZE 496 (PUGIXML_MEMORY_PAGE_SIZE) 497 #else 498 32768 499 #endif 500 - sizeof(xml_memory_page); 501 502 struct xml_memory_string_header 503 { 504 uint16_t page_offset; // offset from page->data 505 uint16_t full_size; // 0 if string occupies whole page 506 }; 507 508 struct xml_allocator 509 { xml_allocatorxml_allocator510 xml_allocator(xml_memory_page* root): _root(root), _busy_size(root->busy_size) 511 { 512 #ifdef PUGIXML_COMPACT 513 _hash = 0; 514 #endif 515 } 516 allocate_pagexml_allocator517 xml_memory_page* allocate_page(size_t data_size) 518 { 519 size_t size = sizeof(xml_memory_page) + data_size; 520 521 // allocate block with some alignment, leaving memory for worst-case padding 522 void* memory = xml_memory::allocate(size); 523 if (!memory) return 0; 524 525 // prepare page structure 526 xml_memory_page* page = xml_memory_page::construct(memory); 527 assert(page); 528 529 assert(this == _root->allocator); 530 page->allocator = this; 531 532 return page; 533 } 534 deallocate_pagexml_allocator535 static void deallocate_page(xml_memory_page* page) 536 { 537 xml_memory::deallocate(page); 538 } 539 540 void* allocate_memory_oob(size_t size, xml_memory_page*& out_page); 541 allocate_memoryxml_allocator542 void* allocate_memory(size_t size, xml_memory_page*& out_page) 543 { 544 if (PUGI__UNLIKELY(_busy_size + size > xml_memory_page_size)) 545 return allocate_memory_oob(size, out_page); 546 547 void* buf = reinterpret_cast<char*>(_root) + sizeof(xml_memory_page) + _busy_size; 548 549 _busy_size += size; 550 551 out_page = _root; 552 553 return buf; 554 } 555 556 #ifdef PUGIXML_COMPACT allocate_objectxml_allocator557 void* allocate_object(size_t size, xml_memory_page*& out_page) 558 { 559 void* result = allocate_memory(size + sizeof(uint32_t), out_page); 560 if (!result) return 0; 561 562 // adjust for marker 563 ptrdiff_t offset = static_cast<char*>(result) - reinterpret_cast<char*>(out_page->compact_page_marker); 564 565 if (PUGI__UNLIKELY(static_cast<uintptr_t>(offset) >= 256 * xml_memory_block_alignment)) 566 { 567 // insert new marker 568 uint32_t* marker = static_cast<uint32_t*>(result); 569 570 *marker = static_cast<uint32_t>(reinterpret_cast<char*>(marker) - reinterpret_cast<char*>(out_page)); 571 out_page->compact_page_marker = marker; 572 573 // since we don't reuse the page space until we reallocate it, we can just pretend that we freed the marker block 574 // this will make sure deallocate_memory correctly tracks the size 575 out_page->freed_size += sizeof(uint32_t); 576 577 return marker + 1; 578 } 579 else 580 { 581 // roll back uint32_t part 582 _busy_size -= sizeof(uint32_t); 583 584 return result; 585 } 586 } 587 #else allocate_objectxml_allocator588 void* allocate_object(size_t size, xml_memory_page*& out_page) 589 { 590 return allocate_memory(size, out_page); 591 } 592 #endif 593 deallocate_memoryxml_allocator594 void deallocate_memory(void* ptr, size_t size, xml_memory_page* page) 595 { 596 if (page == _root) page->busy_size = _busy_size; 597 598 assert(ptr >= reinterpret_cast<char*>(page) + sizeof(xml_memory_page) && ptr < reinterpret_cast<char*>(page) + sizeof(xml_memory_page) + page->busy_size); 599 (void)!ptr; 600 601 page->freed_size += size; 602 assert(page->freed_size <= page->busy_size); 603 604 if (page->freed_size == page->busy_size) 605 { 606 if (page->next == 0) 607 { 608 assert(_root == page); 609 610 // top page freed, just reset sizes 611 page->busy_size = 0; 612 page->freed_size = 0; 613 614 #ifdef PUGIXML_COMPACT 615 // reset compact state to maximize efficiency 616 page->compact_string_base = 0; 617 page->compact_shared_parent = 0; 618 page->compact_page_marker = 0; 619 #endif 620 621 _busy_size = 0; 622 } 623 else 624 { 625 assert(_root != page); 626 assert(page->prev); 627 628 // remove from the list 629 page->prev->next = page->next; 630 page->next->prev = page->prev; 631 632 // deallocate 633 deallocate_page(page); 634 } 635 } 636 } 637 allocate_stringxml_allocator638 char_t* allocate_string(size_t length) 639 { 640 static const size_t max_encoded_offset = (1 << 16) * xml_memory_block_alignment; 641 642 PUGI__STATIC_ASSERT(xml_memory_page_size <= max_encoded_offset); 643 644 // allocate memory for string and header block 645 size_t size = sizeof(xml_memory_string_header) + length * sizeof(char_t); 646 647 // round size up to block alignment boundary 648 size_t full_size = (size + (xml_memory_block_alignment - 1)) & ~(xml_memory_block_alignment - 1); 649 650 xml_memory_page* page; 651 xml_memory_string_header* header = static_cast<xml_memory_string_header*>(allocate_memory(full_size, page)); 652 653 if (!header) return 0; 654 655 // setup header 656 ptrdiff_t page_offset = reinterpret_cast<char*>(header) - reinterpret_cast<char*>(page) - sizeof(xml_memory_page); 657 658 assert(page_offset % xml_memory_block_alignment == 0); 659 assert(page_offset >= 0 && static_cast<size_t>(page_offset) < max_encoded_offset); 660 header->page_offset = static_cast<uint16_t>(static_cast<size_t>(page_offset) / xml_memory_block_alignment); 661 662 // full_size == 0 for large strings that occupy the whole page 663 assert(full_size % xml_memory_block_alignment == 0); 664 assert(full_size < max_encoded_offset || (page->busy_size == full_size && page_offset == 0)); 665 header->full_size = static_cast<uint16_t>(full_size < max_encoded_offset ? full_size / xml_memory_block_alignment : 0); 666 667 // round-trip through void* to avoid 'cast increases required alignment of target type' warning 668 // header is guaranteed a pointer-sized alignment, which should be enough for char_t 669 return static_cast<char_t*>(static_cast<void*>(header + 1)); 670 } 671 deallocate_stringxml_allocator672 void deallocate_string(char_t* string) 673 { 674 // this function casts pointers through void* to avoid 'cast increases required alignment of target type' warnings 675 // we're guaranteed the proper (pointer-sized) alignment on the input string if it was allocated via allocate_string 676 677 // get header 678 xml_memory_string_header* header = static_cast<xml_memory_string_header*>(static_cast<void*>(string)) - 1; 679 assert(header); 680 681 // deallocate 682 size_t page_offset = sizeof(xml_memory_page) + header->page_offset * xml_memory_block_alignment; 683 xml_memory_page* page = reinterpret_cast<xml_memory_page*>(static_cast<void*>(reinterpret_cast<char*>(header) - page_offset)); 684 685 // if full_size == 0 then this string occupies the whole page 686 size_t full_size = header->full_size == 0 ? page->busy_size : header->full_size * xml_memory_block_alignment; 687 688 deallocate_memory(header, full_size, page); 689 } 690 reservexml_allocator691 bool reserve() 692 { 693 #ifdef PUGIXML_COMPACT 694 return _hash->reserve(); 695 #else 696 return true; 697 #endif 698 } 699 700 xml_memory_page* _root; 701 size_t _busy_size; 702 703 #ifdef PUGIXML_COMPACT 704 compact_hash_table* _hash; 705 #endif 706 }; 707 allocate_memory_oob(size_t size,xml_memory_page * & out_page)708 PUGI__FN_NO_INLINE void* xml_allocator::allocate_memory_oob(size_t size, xml_memory_page*& out_page) 709 { 710 const size_t large_allocation_threshold = xml_memory_page_size / 4; 711 712 xml_memory_page* page = allocate_page(size <= large_allocation_threshold ? xml_memory_page_size : size); 713 out_page = page; 714 715 if (!page) return 0; 716 717 if (size <= large_allocation_threshold) 718 { 719 _root->busy_size = _busy_size; 720 721 // insert page at the end of linked list 722 page->prev = _root; 723 _root->next = page; 724 _root = page; 725 726 _busy_size = size; 727 } 728 else 729 { 730 // insert page before the end of linked list, so that it is deleted as soon as possible 731 // the last page is not deleted even if it's empty (see deallocate_memory) 732 assert(_root->prev); 733 734 page->prev = _root->prev; 735 page->next = _root; 736 737 _root->prev->next = page; 738 _root->prev = page; 739 740 page->busy_size = size; 741 } 742 743 return reinterpret_cast<char*>(page) + sizeof(xml_memory_page); 744 } 745 PUGI__NS_END 746 747 #ifdef PUGIXML_COMPACT 748 PUGI__NS_BEGIN 749 static const uintptr_t compact_alignment_log2 = 2; 750 static const uintptr_t compact_alignment = 1 << compact_alignment_log2; 751 752 class compact_header 753 { 754 public: compact_header(xml_memory_page * page,unsigned int flags)755 compact_header(xml_memory_page* page, unsigned int flags) 756 { 757 PUGI__STATIC_ASSERT(xml_memory_block_alignment == compact_alignment); 758 759 ptrdiff_t offset = (reinterpret_cast<char*>(this) - reinterpret_cast<char*>(page->compact_page_marker)); 760 assert(offset % compact_alignment == 0 && static_cast<uintptr_t>(offset) < 256 * compact_alignment); 761 762 _page = static_cast<unsigned char>(offset >> compact_alignment_log2); 763 _flags = static_cast<unsigned char>(flags); 764 } 765 operator &=(uintptr_t mod)766 void operator&=(uintptr_t mod) 767 { 768 _flags &= static_cast<unsigned char>(mod); 769 } 770 operator |=(uintptr_t mod)771 void operator|=(uintptr_t mod) 772 { 773 _flags |= static_cast<unsigned char>(mod); 774 } 775 operator &(uintptr_t mod) const776 uintptr_t operator&(uintptr_t mod) const 777 { 778 return _flags & mod; 779 } 780 get_page() const781 xml_memory_page* get_page() const 782 { 783 // round-trip through void* to silence 'cast increases required alignment of target type' warnings 784 const char* page_marker = reinterpret_cast<const char*>(this) - (_page << compact_alignment_log2); 785 const char* page = page_marker - *reinterpret_cast<const uint32_t*>(static_cast<const void*>(page_marker)); 786 787 return const_cast<xml_memory_page*>(reinterpret_cast<const xml_memory_page*>(static_cast<const void*>(page))); 788 } 789 790 private: 791 unsigned char _page; 792 unsigned char _flags; 793 }; 794 compact_get_page(const void * object,int header_offset)795 PUGI__FN xml_memory_page* compact_get_page(const void* object, int header_offset) 796 { 797 const compact_header* header = reinterpret_cast<const compact_header*>(static_cast<const char*>(object) - header_offset); 798 799 return header->get_page(); 800 } 801 compact_get_value(const void * object)802 template <int header_offset, typename T> PUGI__FN_NO_INLINE T* compact_get_value(const void* object) 803 { 804 return static_cast<T*>(compact_get_page(object, header_offset)->allocator->_hash->find(object)); 805 } 806 compact_set_value(const void * object,T * value)807 template <int header_offset, typename T> PUGI__FN_NO_INLINE void compact_set_value(const void* object, T* value) 808 { 809 compact_get_page(object, header_offset)->allocator->_hash->insert(object, value); 810 } 811 812 template <typename T, int header_offset, int start = -126> class compact_pointer 813 { 814 public: compact_pointer()815 compact_pointer(): _data(0) 816 { 817 } 818 operator =(const compact_pointer & rhs)819 void operator=(const compact_pointer& rhs) 820 { 821 *this = rhs + 0; 822 } 823 operator =(T * value)824 void operator=(T* value) 825 { 826 if (value) 827 { 828 // value is guaranteed to be compact-aligned; 'this' is not 829 // our decoding is based on 'this' aligned to compact alignment downwards (see operator T*) 830 // so for negative offsets (e.g. -3) we need to adjust the diff by compact_alignment - 1 to 831 // compensate for arithmetic shift rounding for negative values 832 ptrdiff_t diff = reinterpret_cast<char*>(value) - reinterpret_cast<char*>(this); 833 ptrdiff_t offset = ((diff + int(compact_alignment - 1)) >> compact_alignment_log2) - start; 834 835 if (static_cast<uintptr_t>(offset) <= 253) 836 _data = static_cast<unsigned char>(offset + 1); 837 else 838 { 839 compact_set_value<header_offset>(this, value); 840 841 _data = 255; 842 } 843 } 844 else 845 _data = 0; 846 } 847 operator T*() const848 operator T*() const 849 { 850 if (_data) 851 { 852 if (_data < 255) 853 { 854 uintptr_t base = reinterpret_cast<uintptr_t>(this) & ~(compact_alignment - 1); 855 856 return reinterpret_cast<T*>(base + (_data - 1 + start) * compact_alignment); 857 } 858 else 859 return compact_get_value<header_offset, T>(this); 860 } 861 else 862 return 0; 863 } 864 operator ->() const865 T* operator->() const 866 { 867 return *this; 868 } 869 870 private: 871 unsigned char _data; 872 }; 873 874 template <typename T, int header_offset> class compact_pointer_parent 875 { 876 public: compact_pointer_parent()877 compact_pointer_parent(): _data(0) 878 { 879 } 880 operator =(const compact_pointer_parent & rhs)881 void operator=(const compact_pointer_parent& rhs) 882 { 883 *this = rhs + 0; 884 } 885 operator =(T * value)886 void operator=(T* value) 887 { 888 if (value) 889 { 890 // value is guaranteed to be compact-aligned; 'this' is not 891 // our decoding is based on 'this' aligned to compact alignment downwards (see operator T*) 892 // so for negative offsets (e.g. -3) we need to adjust the diff by compact_alignment - 1 to 893 // compensate for arithmetic shift behavior for negative values 894 ptrdiff_t diff = reinterpret_cast<char*>(value) - reinterpret_cast<char*>(this); 895 ptrdiff_t offset = ((diff + int(compact_alignment - 1)) >> compact_alignment_log2) + 65533; 896 897 if (static_cast<uintptr_t>(offset) <= 65533) 898 { 899 _data = static_cast<unsigned short>(offset + 1); 900 } 901 else 902 { 903 xml_memory_page* page = compact_get_page(this, header_offset); 904 905 if (PUGI__UNLIKELY(page->compact_shared_parent == 0)) 906 page->compact_shared_parent = value; 907 908 if (page->compact_shared_parent == value) 909 { 910 _data = 65534; 911 } 912 else 913 { 914 compact_set_value<header_offset>(this, value); 915 916 _data = 65535; 917 } 918 } 919 } 920 else 921 { 922 _data = 0; 923 } 924 } 925 operator T*() const926 operator T*() const 927 { 928 if (_data) 929 { 930 if (_data < 65534) 931 { 932 uintptr_t base = reinterpret_cast<uintptr_t>(this) & ~(compact_alignment - 1); 933 934 return reinterpret_cast<T*>(base + (_data - 1 - 65533) * compact_alignment); 935 } 936 else if (_data == 65534) 937 return static_cast<T*>(compact_get_page(this, header_offset)->compact_shared_parent); 938 else 939 return compact_get_value<header_offset, T>(this); 940 } 941 else 942 return 0; 943 } 944 operator ->() const945 T* operator->() const 946 { 947 return *this; 948 } 949 950 private: 951 uint16_t _data; 952 }; 953 954 template <int header_offset, int base_offset> class compact_string 955 { 956 public: compact_string()957 compact_string(): _data(0) 958 { 959 } 960 operator =(const compact_string & rhs)961 void operator=(const compact_string& rhs) 962 { 963 *this = rhs + 0; 964 } 965 operator =(char_t * value)966 void operator=(char_t* value) 967 { 968 if (value) 969 { 970 xml_memory_page* page = compact_get_page(this, header_offset); 971 972 if (PUGI__UNLIKELY(page->compact_string_base == 0)) 973 page->compact_string_base = value; 974 975 ptrdiff_t offset = value - page->compact_string_base; 976 977 if (static_cast<uintptr_t>(offset) < (65535 << 7)) 978 { 979 // round-trip through void* to silence 'cast increases required alignment of target type' warnings 980 uint16_t* base = reinterpret_cast<uint16_t*>(static_cast<void*>(reinterpret_cast<char*>(this) - base_offset)); 981 982 if (*base == 0) 983 { 984 *base = static_cast<uint16_t>((offset >> 7) + 1); 985 _data = static_cast<unsigned char>((offset & 127) + 1); 986 } 987 else 988 { 989 ptrdiff_t remainder = offset - ((*base - 1) << 7); 990 991 if (static_cast<uintptr_t>(remainder) <= 253) 992 { 993 _data = static_cast<unsigned char>(remainder + 1); 994 } 995 else 996 { 997 compact_set_value<header_offset>(this, value); 998 999 _data = 255; 1000 } 1001 } 1002 } 1003 else 1004 { 1005 compact_set_value<header_offset>(this, value); 1006 1007 _data = 255; 1008 } 1009 } 1010 else 1011 { 1012 _data = 0; 1013 } 1014 } 1015 operator char_t*() const1016 operator char_t*() const 1017 { 1018 if (_data) 1019 { 1020 if (_data < 255) 1021 { 1022 xml_memory_page* page = compact_get_page(this, header_offset); 1023 1024 // round-trip through void* to silence 'cast increases required alignment of target type' warnings 1025 const uint16_t* base = reinterpret_cast<const uint16_t*>(static_cast<const void*>(reinterpret_cast<const char*>(this) - base_offset)); 1026 assert(*base); 1027 1028 ptrdiff_t offset = ((*base - 1) << 7) + (_data - 1); 1029 1030 return page->compact_string_base + offset; 1031 } 1032 else 1033 { 1034 return compact_get_value<header_offset, char_t>(this); 1035 } 1036 } 1037 else 1038 return 0; 1039 } 1040 1041 private: 1042 unsigned char _data; 1043 }; 1044 PUGI__NS_END 1045 #endif 1046 1047 #ifdef PUGIXML_COMPACT 1048 namespace pugi 1049 { 1050 struct xml_attribute_struct 1051 { xml_attribute_structpugi::xml_attribute_struct1052 xml_attribute_struct(impl::xml_memory_page* page): header(page, 0), namevalue_base(0) 1053 { 1054 PUGI__STATIC_ASSERT(sizeof(xml_attribute_struct) == 8); 1055 } 1056 1057 impl::compact_header header; 1058 1059 uint16_t namevalue_base; 1060 1061 impl::compact_string<4, 2> name; 1062 impl::compact_string<5, 3> value; 1063 1064 impl::compact_pointer<xml_attribute_struct, 6> prev_attribute_c; 1065 impl::compact_pointer<xml_attribute_struct, 7, 0> next_attribute; 1066 }; 1067 1068 struct xml_node_struct 1069 { xml_node_structpugi::xml_node_struct1070 xml_node_struct(impl::xml_memory_page* page, xml_node_type type): header(page, type), namevalue_base(0) 1071 { 1072 PUGI__STATIC_ASSERT(sizeof(xml_node_struct) == 12); 1073 } 1074 1075 impl::compact_header header; 1076 1077 uint16_t namevalue_base; 1078 1079 impl::compact_string<4, 2> name; 1080 impl::compact_string<5, 3> value; 1081 1082 impl::compact_pointer_parent<xml_node_struct, 6> parent; 1083 1084 impl::compact_pointer<xml_node_struct, 8, 0> first_child; 1085 1086 impl::compact_pointer<xml_node_struct, 9> prev_sibling_c; 1087 impl::compact_pointer<xml_node_struct, 10, 0> next_sibling; 1088 1089 impl::compact_pointer<xml_attribute_struct, 11, 0> first_attribute; 1090 }; 1091 } 1092 #else 1093 namespace pugi 1094 { 1095 struct xml_attribute_struct 1096 { 1097 xml_attribute_struct(impl::xml_memory_page* page): name(0), value(0), prev_attribute_c(0), next_attribute(0) 1098 { 1099 header = PUGI__GETHEADER_IMPL(this, page, 0); 1100 } 1101 1102 uintptr_t header; 1103 1104 char_t* name; 1105 char_t* value; 1106 1107 xml_attribute_struct* prev_attribute_c; 1108 xml_attribute_struct* next_attribute; 1109 }; 1110 1111 struct xml_node_struct 1112 { 1113 xml_node_struct(impl::xml_memory_page* page, xml_node_type type): name(0), value(0), parent(0), first_child(0), prev_sibling_c(0), next_sibling(0), first_attribute(0) 1114 { 1115 header = PUGI__GETHEADER_IMPL(this, page, type); 1116 } 1117 1118 uintptr_t header; 1119 1120 char_t* name; 1121 char_t* value; 1122 1123 xml_node_struct* parent; 1124 1125 xml_node_struct* first_child; 1126 1127 xml_node_struct* prev_sibling_c; 1128 xml_node_struct* next_sibling; 1129 1130 xml_attribute_struct* first_attribute; 1131 }; 1132 } 1133 #endif 1134 1135 PUGI__NS_BEGIN 1136 struct xml_extra_buffer 1137 { 1138 char_t* buffer; 1139 xml_extra_buffer* next; 1140 }; 1141 1142 struct xml_document_struct: public xml_node_struct, public xml_allocator 1143 { xml_document_structxml_document_struct1144 xml_document_struct(xml_memory_page* page): xml_node_struct(page, node_document), xml_allocator(page), buffer(0), extra_buffers(0) 1145 { 1146 } 1147 1148 const char_t* buffer; 1149 1150 xml_extra_buffer* extra_buffers; 1151 1152 #ifdef PUGIXML_COMPACT 1153 compact_hash_table hash; 1154 #endif 1155 }; 1156 get_allocator(const Object * object)1157 template <typename Object> inline xml_allocator& get_allocator(const Object* object) 1158 { 1159 assert(object); 1160 1161 return *PUGI__GETPAGE(object)->allocator; 1162 } 1163 get_document(const Object * object)1164 template <typename Object> inline xml_document_struct& get_document(const Object* object) 1165 { 1166 assert(object); 1167 1168 return *static_cast<xml_document_struct*>(PUGI__GETPAGE(object)->allocator); 1169 } 1170 PUGI__NS_END 1171 1172 // Low-level DOM operations 1173 PUGI__NS_BEGIN allocate_attribute(xml_allocator & alloc)1174 inline xml_attribute_struct* allocate_attribute(xml_allocator& alloc) 1175 { 1176 xml_memory_page* page; 1177 void* memory = alloc.allocate_object(sizeof(xml_attribute_struct), page); 1178 if (!memory) return 0; 1179 1180 return new (memory) xml_attribute_struct(page); 1181 } 1182 allocate_node(xml_allocator & alloc,xml_node_type type)1183 inline xml_node_struct* allocate_node(xml_allocator& alloc, xml_node_type type) 1184 { 1185 xml_memory_page* page; 1186 void* memory = alloc.allocate_object(sizeof(xml_node_struct), page); 1187 if (!memory) return 0; 1188 1189 return new (memory) xml_node_struct(page, type); 1190 } 1191 destroy_attribute(xml_attribute_struct * a,xml_allocator & alloc)1192 inline void destroy_attribute(xml_attribute_struct* a, xml_allocator& alloc) 1193 { 1194 if (a->header & impl::xml_memory_page_name_allocated_mask) 1195 alloc.deallocate_string(a->name); 1196 1197 if (a->header & impl::xml_memory_page_value_allocated_mask) 1198 alloc.deallocate_string(a->value); 1199 1200 alloc.deallocate_memory(a, sizeof(xml_attribute_struct), PUGI__GETPAGE(a)); 1201 } 1202 destroy_node(xml_node_struct * n,xml_allocator & alloc)1203 inline void destroy_node(xml_node_struct* n, xml_allocator& alloc) 1204 { 1205 if (n->header & impl::xml_memory_page_name_allocated_mask) 1206 alloc.deallocate_string(n->name); 1207 1208 if (n->header & impl::xml_memory_page_value_allocated_mask) 1209 alloc.deallocate_string(n->value); 1210 1211 for (xml_attribute_struct* attr = n->first_attribute; attr; ) 1212 { 1213 xml_attribute_struct* next = attr->next_attribute; 1214 1215 destroy_attribute(attr, alloc); 1216 1217 attr = next; 1218 } 1219 1220 for (xml_node_struct* child = n->first_child; child; ) 1221 { 1222 xml_node_struct* next = child->next_sibling; 1223 1224 destroy_node(child, alloc); 1225 1226 child = next; 1227 } 1228 1229 alloc.deallocate_memory(n, sizeof(xml_node_struct), PUGI__GETPAGE(n)); 1230 } 1231 append_node(xml_node_struct * child,xml_node_struct * node)1232 inline void append_node(xml_node_struct* child, xml_node_struct* node) 1233 { 1234 child->parent = node; 1235 1236 xml_node_struct* head = node->first_child; 1237 1238 if (head) 1239 { 1240 xml_node_struct* tail = head->prev_sibling_c; 1241 1242 tail->next_sibling = child; 1243 child->prev_sibling_c = tail; 1244 head->prev_sibling_c = child; 1245 } 1246 else 1247 { 1248 node->first_child = child; 1249 child->prev_sibling_c = child; 1250 } 1251 } 1252 prepend_node(xml_node_struct * child,xml_node_struct * node)1253 inline void prepend_node(xml_node_struct* child, xml_node_struct* node) 1254 { 1255 child->parent = node; 1256 1257 xml_node_struct* head = node->first_child; 1258 1259 if (head) 1260 { 1261 child->prev_sibling_c = head->prev_sibling_c; 1262 head->prev_sibling_c = child; 1263 } 1264 else 1265 child->prev_sibling_c = child; 1266 1267 child->next_sibling = head; 1268 node->first_child = child; 1269 } 1270 insert_node_after(xml_node_struct * child,xml_node_struct * node)1271 inline void insert_node_after(xml_node_struct* child, xml_node_struct* node) 1272 { 1273 xml_node_struct* parent = node->parent; 1274 1275 child->parent = parent; 1276 1277 if (node->next_sibling) 1278 node->next_sibling->prev_sibling_c = child; 1279 else 1280 parent->first_child->prev_sibling_c = child; 1281 1282 child->next_sibling = node->next_sibling; 1283 child->prev_sibling_c = node; 1284 1285 node->next_sibling = child; 1286 } 1287 insert_node_before(xml_node_struct * child,xml_node_struct * node)1288 inline void insert_node_before(xml_node_struct* child, xml_node_struct* node) 1289 { 1290 xml_node_struct* parent = node->parent; 1291 1292 child->parent = parent; 1293 1294 if (node->prev_sibling_c->next_sibling) 1295 node->prev_sibling_c->next_sibling = child; 1296 else 1297 parent->first_child = child; 1298 1299 child->prev_sibling_c = node->prev_sibling_c; 1300 child->next_sibling = node; 1301 1302 node->prev_sibling_c = child; 1303 } 1304 remove_node(xml_node_struct * node)1305 inline void remove_node(xml_node_struct* node) 1306 { 1307 xml_node_struct* parent = node->parent; 1308 1309 if (node->next_sibling) 1310 node->next_sibling->prev_sibling_c = node->prev_sibling_c; 1311 else 1312 parent->first_child->prev_sibling_c = node->prev_sibling_c; 1313 1314 if (node->prev_sibling_c->next_sibling) 1315 node->prev_sibling_c->next_sibling = node->next_sibling; 1316 else 1317 parent->first_child = node->next_sibling; 1318 1319 node->parent = 0; 1320 node->prev_sibling_c = 0; 1321 node->next_sibling = 0; 1322 } 1323 append_attribute(xml_attribute_struct * attr,xml_node_struct * node)1324 inline void append_attribute(xml_attribute_struct* attr, xml_node_struct* node) 1325 { 1326 xml_attribute_struct* head = node->first_attribute; 1327 1328 if (head) 1329 { 1330 xml_attribute_struct* tail = head->prev_attribute_c; 1331 1332 tail->next_attribute = attr; 1333 attr->prev_attribute_c = tail; 1334 head->prev_attribute_c = attr; 1335 } 1336 else 1337 { 1338 node->first_attribute = attr; 1339 attr->prev_attribute_c = attr; 1340 } 1341 } 1342 prepend_attribute(xml_attribute_struct * attr,xml_node_struct * node)1343 inline void prepend_attribute(xml_attribute_struct* attr, xml_node_struct* node) 1344 { 1345 xml_attribute_struct* head = node->first_attribute; 1346 1347 if (head) 1348 { 1349 attr->prev_attribute_c = head->prev_attribute_c; 1350 head->prev_attribute_c = attr; 1351 } 1352 else 1353 attr->prev_attribute_c = attr; 1354 1355 attr->next_attribute = head; 1356 node->first_attribute = attr; 1357 } 1358 insert_attribute_after(xml_attribute_struct * attr,xml_attribute_struct * place,xml_node_struct * node)1359 inline void insert_attribute_after(xml_attribute_struct* attr, xml_attribute_struct* place, xml_node_struct* node) 1360 { 1361 if (place->next_attribute) 1362 place->next_attribute->prev_attribute_c = attr; 1363 else 1364 node->first_attribute->prev_attribute_c = attr; 1365 1366 attr->next_attribute = place->next_attribute; 1367 attr->prev_attribute_c = place; 1368 place->next_attribute = attr; 1369 } 1370 insert_attribute_before(xml_attribute_struct * attr,xml_attribute_struct * place,xml_node_struct * node)1371 inline void insert_attribute_before(xml_attribute_struct* attr, xml_attribute_struct* place, xml_node_struct* node) 1372 { 1373 if (place->prev_attribute_c->next_attribute) 1374 place->prev_attribute_c->next_attribute = attr; 1375 else 1376 node->first_attribute = attr; 1377 1378 attr->prev_attribute_c = place->prev_attribute_c; 1379 attr->next_attribute = place; 1380 place->prev_attribute_c = attr; 1381 } 1382 remove_attribute(xml_attribute_struct * attr,xml_node_struct * node)1383 inline void remove_attribute(xml_attribute_struct* attr, xml_node_struct* node) 1384 { 1385 if (attr->next_attribute) 1386 attr->next_attribute->prev_attribute_c = attr->prev_attribute_c; 1387 else 1388 node->first_attribute->prev_attribute_c = attr->prev_attribute_c; 1389 1390 if (attr->prev_attribute_c->next_attribute) 1391 attr->prev_attribute_c->next_attribute = attr->next_attribute; 1392 else 1393 node->first_attribute = attr->next_attribute; 1394 1395 attr->prev_attribute_c = 0; 1396 attr->next_attribute = 0; 1397 } 1398 append_new_node(xml_node_struct * node,xml_allocator & alloc,xml_node_type type=node_element)1399 PUGI__FN_NO_INLINE xml_node_struct* append_new_node(xml_node_struct* node, xml_allocator& alloc, xml_node_type type = node_element) 1400 { 1401 if (!alloc.reserve()) return 0; 1402 1403 xml_node_struct* child = allocate_node(alloc, type); 1404 if (!child) return 0; 1405 1406 append_node(child, node); 1407 1408 return child; 1409 } 1410 append_new_attribute(xml_node_struct * node,xml_allocator & alloc)1411 PUGI__FN_NO_INLINE xml_attribute_struct* append_new_attribute(xml_node_struct* node, xml_allocator& alloc) 1412 { 1413 if (!alloc.reserve()) return 0; 1414 1415 xml_attribute_struct* attr = allocate_attribute(alloc); 1416 if (!attr) return 0; 1417 1418 append_attribute(attr, node); 1419 1420 return attr; 1421 } 1422 PUGI__NS_END 1423 1424 // Helper classes for code generation 1425 PUGI__NS_BEGIN 1426 struct opt_false 1427 { 1428 enum { value = 0 }; 1429 }; 1430 1431 struct opt_true 1432 { 1433 enum { value = 1 }; 1434 }; 1435 PUGI__NS_END 1436 1437 // Unicode utilities 1438 PUGI__NS_BEGIN endian_swap(uint16_t value)1439 inline uint16_t endian_swap(uint16_t value) 1440 { 1441 return static_cast<uint16_t>(((value & 0xff) << 8) | (value >> 8)); 1442 } 1443 endian_swap(uint32_t value)1444 inline uint32_t endian_swap(uint32_t value) 1445 { 1446 return ((value & 0xff) << 24) | ((value & 0xff00) << 8) | ((value & 0xff0000) >> 8) | (value >> 24); 1447 } 1448 1449 struct utf8_counter 1450 { 1451 typedef size_t value_type; 1452 lowutf8_counter1453 static value_type low(value_type result, uint32_t ch) 1454 { 1455 // U+0000..U+007F 1456 if (ch < 0x80) return result + 1; 1457 // U+0080..U+07FF 1458 else if (ch < 0x800) return result + 2; 1459 // U+0800..U+FFFF 1460 else return result + 3; 1461 } 1462 highutf8_counter1463 static value_type high(value_type result, uint32_t) 1464 { 1465 // U+10000..U+10FFFF 1466 return result + 4; 1467 } 1468 }; 1469 1470 struct utf8_writer 1471 { 1472 typedef uint8_t* value_type; 1473 lowutf8_writer1474 static value_type low(value_type result, uint32_t ch) 1475 { 1476 // U+0000..U+007F 1477 if (ch < 0x80) 1478 { 1479 *result = static_cast<uint8_t>(ch); 1480 return result + 1; 1481 } 1482 // U+0080..U+07FF 1483 else if (ch < 0x800) 1484 { 1485 result[0] = static_cast<uint8_t>(0xC0 | (ch >> 6)); 1486 result[1] = static_cast<uint8_t>(0x80 | (ch & 0x3F)); 1487 return result + 2; 1488 } 1489 // U+0800..U+FFFF 1490 else 1491 { 1492 result[0] = static_cast<uint8_t>(0xE0 | (ch >> 12)); 1493 result[1] = static_cast<uint8_t>(0x80 | ((ch >> 6) & 0x3F)); 1494 result[2] = static_cast<uint8_t>(0x80 | (ch & 0x3F)); 1495 return result + 3; 1496 } 1497 } 1498 highutf8_writer1499 static value_type high(value_type result, uint32_t ch) 1500 { 1501 // U+10000..U+10FFFF 1502 result[0] = static_cast<uint8_t>(0xF0 | (ch >> 18)); 1503 result[1] = static_cast<uint8_t>(0x80 | ((ch >> 12) & 0x3F)); 1504 result[2] = static_cast<uint8_t>(0x80 | ((ch >> 6) & 0x3F)); 1505 result[3] = static_cast<uint8_t>(0x80 | (ch & 0x3F)); 1506 return result + 4; 1507 } 1508 anyutf8_writer1509 static value_type any(value_type result, uint32_t ch) 1510 { 1511 return (ch < 0x10000) ? low(result, ch) : high(result, ch); 1512 } 1513 }; 1514 1515 struct utf16_counter 1516 { 1517 typedef size_t value_type; 1518 lowutf16_counter1519 static value_type low(value_type result, uint32_t) 1520 { 1521 return result + 1; 1522 } 1523 highutf16_counter1524 static value_type high(value_type result, uint32_t) 1525 { 1526 return result + 2; 1527 } 1528 }; 1529 1530 struct utf16_writer 1531 { 1532 typedef uint16_t* value_type; 1533 lowutf16_writer1534 static value_type low(value_type result, uint32_t ch) 1535 { 1536 *result = static_cast<uint16_t>(ch); 1537 1538 return result + 1; 1539 } 1540 highutf16_writer1541 static value_type high(value_type result, uint32_t ch) 1542 { 1543 uint32_t msh = static_cast<uint32_t>(ch - 0x10000) >> 10; 1544 uint32_t lsh = static_cast<uint32_t>(ch - 0x10000) & 0x3ff; 1545 1546 result[0] = static_cast<uint16_t>(0xD800 + msh); 1547 result[1] = static_cast<uint16_t>(0xDC00 + lsh); 1548 1549 return result + 2; 1550 } 1551 anyutf16_writer1552 static value_type any(value_type result, uint32_t ch) 1553 { 1554 return (ch < 0x10000) ? low(result, ch) : high(result, ch); 1555 } 1556 }; 1557 1558 struct utf32_counter 1559 { 1560 typedef size_t value_type; 1561 lowutf32_counter1562 static value_type low(value_type result, uint32_t) 1563 { 1564 return result + 1; 1565 } 1566 highutf32_counter1567 static value_type high(value_type result, uint32_t) 1568 { 1569 return result + 1; 1570 } 1571 }; 1572 1573 struct utf32_writer 1574 { 1575 typedef uint32_t* value_type; 1576 lowutf32_writer1577 static value_type low(value_type result, uint32_t ch) 1578 { 1579 *result = ch; 1580 1581 return result + 1; 1582 } 1583 highutf32_writer1584 static value_type high(value_type result, uint32_t ch) 1585 { 1586 *result = ch; 1587 1588 return result + 1; 1589 } 1590 anyutf32_writer1591 static value_type any(value_type result, uint32_t ch) 1592 { 1593 *result = ch; 1594 1595 return result + 1; 1596 } 1597 }; 1598 1599 struct latin1_writer 1600 { 1601 typedef uint8_t* value_type; 1602 lowlatin1_writer1603 static value_type low(value_type result, uint32_t ch) 1604 { 1605 *result = static_cast<uint8_t>(ch > 255 ? '?' : ch); 1606 1607 return result + 1; 1608 } 1609 highlatin1_writer1610 static value_type high(value_type result, uint32_t ch) 1611 { 1612 (void)ch; 1613 1614 *result = '?'; 1615 1616 return result + 1; 1617 } 1618 }; 1619 1620 struct utf8_decoder 1621 { 1622 typedef uint8_t type; 1623 processutf8_decoder1624 template <typename Traits> static inline typename Traits::value_type process(const uint8_t* data, size_t size, typename Traits::value_type result, Traits) 1625 { 1626 const uint8_t utf8_byte_mask = 0x3f; 1627 1628 while (size) 1629 { 1630 uint8_t lead = *data; 1631 1632 // 0xxxxxxx -> U+0000..U+007F 1633 if (lead < 0x80) 1634 { 1635 result = Traits::low(result, lead); 1636 data += 1; 1637 size -= 1; 1638 1639 // process aligned single-byte (ascii) blocks 1640 if ((reinterpret_cast<uintptr_t>(data) & 3) == 0) 1641 { 1642 // round-trip through void* to silence 'cast increases required alignment of target type' warnings 1643 while (size >= 4 && (*static_cast<const uint32_t*>(static_cast<const void*>(data)) & 0x80808080) == 0) 1644 { 1645 result = Traits::low(result, data[0]); 1646 result = Traits::low(result, data[1]); 1647 result = Traits::low(result, data[2]); 1648 result = Traits::low(result, data[3]); 1649 data += 4; 1650 size -= 4; 1651 } 1652 } 1653 } 1654 // 110xxxxx -> U+0080..U+07FF 1655 else if (static_cast<unsigned int>(lead - 0xC0) < 0x20 && size >= 2 && (data[1] & 0xc0) == 0x80) 1656 { 1657 result = Traits::low(result, ((lead & ~0xC0) << 6) | (data[1] & utf8_byte_mask)); 1658 data += 2; 1659 size -= 2; 1660 } 1661 // 1110xxxx -> U+0800-U+FFFF 1662 else if (static_cast<unsigned int>(lead - 0xE0) < 0x10 && size >= 3 && (data[1] & 0xc0) == 0x80 && (data[2] & 0xc0) == 0x80) 1663 { 1664 result = Traits::low(result, ((lead & ~0xE0) << 12) | ((data[1] & utf8_byte_mask) << 6) | (data[2] & utf8_byte_mask)); 1665 data += 3; 1666 size -= 3; 1667 } 1668 // 11110xxx -> U+10000..U+10FFFF 1669 else if (static_cast<unsigned int>(lead - 0xF0) < 0x08 && size >= 4 && (data[1] & 0xc0) == 0x80 && (data[2] & 0xc0) == 0x80 && (data[3] & 0xc0) == 0x80) 1670 { 1671 result = Traits::high(result, ((lead & ~0xF0) << 18) | ((data[1] & utf8_byte_mask) << 12) | ((data[2] & utf8_byte_mask) << 6) | (data[3] & utf8_byte_mask)); 1672 data += 4; 1673 size -= 4; 1674 } 1675 // 10xxxxxx or 11111xxx -> invalid 1676 else 1677 { 1678 data += 1; 1679 size -= 1; 1680 } 1681 } 1682 1683 return result; 1684 } 1685 }; 1686 1687 template <typename opt_swap> struct utf16_decoder 1688 { 1689 typedef uint16_t type; 1690 processutf16_decoder1691 template <typename Traits> static inline typename Traits::value_type process(const uint16_t* data, size_t size, typename Traits::value_type result, Traits) 1692 { 1693 while (size) 1694 { 1695 uint16_t lead = opt_swap::value ? endian_swap(*data) : *data; 1696 1697 // U+0000..U+D7FF 1698 if (lead < 0xD800) 1699 { 1700 result = Traits::low(result, lead); 1701 data += 1; 1702 size -= 1; 1703 } 1704 // U+E000..U+FFFF 1705 else if (static_cast<unsigned int>(lead - 0xE000) < 0x2000) 1706 { 1707 result = Traits::low(result, lead); 1708 data += 1; 1709 size -= 1; 1710 } 1711 // surrogate pair lead 1712 else if (static_cast<unsigned int>(lead - 0xD800) < 0x400 && size >= 2) 1713 { 1714 uint16_t next = opt_swap::value ? endian_swap(data[1]) : data[1]; 1715 1716 if (static_cast<unsigned int>(next - 0xDC00) < 0x400) 1717 { 1718 result = Traits::high(result, 0x10000 + ((lead & 0x3ff) << 10) + (next & 0x3ff)); 1719 data += 2; 1720 size -= 2; 1721 } 1722 else 1723 { 1724 data += 1; 1725 size -= 1; 1726 } 1727 } 1728 else 1729 { 1730 data += 1; 1731 size -= 1; 1732 } 1733 } 1734 1735 return result; 1736 } 1737 }; 1738 1739 template <typename opt_swap> struct utf32_decoder 1740 { 1741 typedef uint32_t type; 1742 processutf32_decoder1743 template <typename Traits> static inline typename Traits::value_type process(const uint32_t* data, size_t size, typename Traits::value_type result, Traits) 1744 { 1745 while (size) 1746 { 1747 uint32_t lead = opt_swap::value ? endian_swap(*data) : *data; 1748 1749 // U+0000..U+FFFF 1750 if (lead < 0x10000) 1751 { 1752 result = Traits::low(result, lead); 1753 data += 1; 1754 size -= 1; 1755 } 1756 // U+10000..U+10FFFF 1757 else 1758 { 1759 result = Traits::high(result, lead); 1760 data += 1; 1761 size -= 1; 1762 } 1763 } 1764 1765 return result; 1766 } 1767 }; 1768 1769 struct latin1_decoder 1770 { 1771 typedef uint8_t type; 1772 processlatin1_decoder1773 template <typename Traits> static inline typename Traits::value_type process(const uint8_t* data, size_t size, typename Traits::value_type result, Traits) 1774 { 1775 while (size) 1776 { 1777 result = Traits::low(result, *data); 1778 data += 1; 1779 size -= 1; 1780 } 1781 1782 return result; 1783 } 1784 }; 1785 1786 template <size_t size> struct wchar_selector; 1787 1788 template <> struct wchar_selector<2> 1789 { 1790 typedef uint16_t type; 1791 typedef utf16_counter counter; 1792 typedef utf16_writer writer; 1793 typedef utf16_decoder<opt_false> decoder; 1794 }; 1795 1796 template <> struct wchar_selector<4> 1797 { 1798 typedef uint32_t type; 1799 typedef utf32_counter counter; 1800 typedef utf32_writer writer; 1801 typedef utf32_decoder<opt_false> decoder; 1802 }; 1803 1804 typedef wchar_selector<sizeof(wchar_t)>::counter wchar_counter; 1805 typedef wchar_selector<sizeof(wchar_t)>::writer wchar_writer; 1806 1807 struct wchar_decoder 1808 { 1809 typedef wchar_t type; 1810 processwchar_decoder1811 template <typename Traits> static inline typename Traits::value_type process(const wchar_t* data, size_t size, typename Traits::value_type result, Traits traits) 1812 { 1813 typedef wchar_selector<sizeof(wchar_t)>::decoder decoder; 1814 1815 return decoder::process(reinterpret_cast<const typename decoder::type*>(data), size, result, traits); 1816 } 1817 }; 1818 1819 #ifdef PUGIXML_WCHAR_MODE convert_wchar_endian_swap(wchar_t * result,const wchar_t * data,size_t length)1820 PUGI__FN void convert_wchar_endian_swap(wchar_t* result, const wchar_t* data, size_t length) 1821 { 1822 for (size_t i = 0; i < length; ++i) 1823 result[i] = static_cast<wchar_t>(endian_swap(static_cast<wchar_selector<sizeof(wchar_t)>::type>(data[i]))); 1824 } 1825 #endif 1826 PUGI__NS_END 1827 1828 PUGI__NS_BEGIN 1829 enum chartype_t 1830 { 1831 ct_parse_pcdata = 1, // \0, &, \r, < 1832 ct_parse_attr = 2, // \0, &, \r, ', " 1833 ct_parse_attr_ws = 4, // \0, &, \r, ', ", \n, tab 1834 ct_space = 8, // \r, \n, space, tab 1835 ct_parse_cdata = 16, // \0, ], >, \r 1836 ct_parse_comment = 32, // \0, -, >, \r 1837 ct_symbol = 64, // Any symbol > 127, a-z, A-Z, 0-9, _, :, -, . 1838 ct_start_symbol = 128 // Any symbol > 127, a-z, A-Z, _, : 1839 }; 1840 1841 static const unsigned char chartype_table[256] = 1842 { 1843 55, 0, 0, 0, 0, 0, 0, 0, 0, 12, 12, 0, 0, 63, 0, 0, // 0-15 1844 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 16-31 1845 8, 0, 6, 0, 0, 0, 7, 6, 0, 0, 0, 0, 0, 96, 64, 0, // 32-47 1846 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 192, 0, 1, 0, 48, 0, // 48-63 1847 0, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, // 64-79 1848 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 0, 0, 16, 0, 192, // 80-95 1849 0, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, // 96-111 1850 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 0, 0, 0, 0, 0, // 112-127 1851 1852 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, // 128+ 1853 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 1854 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 1855 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 1856 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 1857 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 1858 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 1859 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192 1860 }; 1861 1862 enum chartypex_t 1863 { 1864 ctx_special_pcdata = 1, // Any symbol >= 0 and < 32 (except \t, \r, \n), &, <, > 1865 ctx_special_attr = 2, // Any symbol >= 0 and < 32, &, <, ", ' 1866 ctx_start_symbol = 4, // Any symbol > 127, a-z, A-Z, _ 1867 ctx_digit = 8, // 0-9 1868 ctx_symbol = 16 // Any symbol > 127, a-z, A-Z, 0-9, _, -, . 1869 }; 1870 1871 static const unsigned char chartypex_table[256] = 1872 { 1873 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 3, 2, 3, 3, // 0-15 1874 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 16-31 1875 0, 0, 2, 0, 0, 0, 3, 2, 0, 0, 0, 0, 0, 16, 16, 0, // 32-47 1876 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0, 3, 0, 1, 0, // 48-63 1877 1878 0, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, // 64-79 1879 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 0, 0, 0, 0, 20, // 80-95 1880 0, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, // 96-111 1881 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 0, 0, 0, 0, 0, // 112-127 1882 1883 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, // 128+ 1884 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 1885 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 1886 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 1887 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 1888 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 1889 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 1890 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20 1891 }; 1892 1893 #ifdef PUGIXML_WCHAR_MODE 1894 #define PUGI__IS_CHARTYPE_IMPL(c, ct, table) ((static_cast<unsigned int>(c) < 128 ? table[static_cast<unsigned int>(c)] : table[128]) & (ct)) 1895 #else 1896 #define PUGI__IS_CHARTYPE_IMPL(c, ct, table) (table[static_cast<unsigned char>(c)] & (ct)) 1897 #endif 1898 1899 #define PUGI__IS_CHARTYPE(c, ct) PUGI__IS_CHARTYPE_IMPL(c, ct, chartype_table) 1900 #define PUGI__IS_CHARTYPEX(c, ct) PUGI__IS_CHARTYPE_IMPL(c, ct, chartypex_table) 1901 is_little_endian()1902 PUGI__FN bool is_little_endian() 1903 { 1904 unsigned int ui = 1; 1905 1906 return *reinterpret_cast<unsigned char*>(&ui) == 1; 1907 } 1908 get_wchar_encoding()1909 PUGI__FN xml_encoding get_wchar_encoding() 1910 { 1911 PUGI__STATIC_ASSERT(sizeof(wchar_t) == 2 || sizeof(wchar_t) == 4); 1912 1913 if (sizeof(wchar_t) == 2) 1914 return is_little_endian() ? encoding_utf16_le : encoding_utf16_be; 1915 else 1916 return is_little_endian() ? encoding_utf32_le : encoding_utf32_be; 1917 } 1918 parse_declaration_encoding(const uint8_t * data,size_t size,const uint8_t * & out_encoding,size_t & out_length)1919 PUGI__FN bool parse_declaration_encoding(const uint8_t* data, size_t size, const uint8_t*& out_encoding, size_t& out_length) 1920 { 1921 #define PUGI__SCANCHAR(ch) { if (offset >= size || data[offset] != ch) return false; offset++; } 1922 #define PUGI__SCANCHARTYPE(ct) { while (offset < size && PUGI__IS_CHARTYPE(data[offset], ct)) offset++; } 1923 1924 // check if we have a non-empty XML declaration 1925 if (size < 6 || !((data[0] == '<') & (data[1] == '?') & (data[2] == 'x') & (data[3] == 'm') & (data[4] == 'l') && PUGI__IS_CHARTYPE(data[5], ct_space))) 1926 return false; 1927 1928 // scan XML declaration until the encoding field 1929 for (size_t i = 6; i + 1 < size; ++i) 1930 { 1931 // declaration can not contain ? in quoted values 1932 if (data[i] == '?') 1933 return false; 1934 1935 if (data[i] == 'e' && data[i + 1] == 'n') 1936 { 1937 size_t offset = i; 1938 1939 // encoding follows the version field which can't contain 'en' so this has to be the encoding if XML is well formed 1940 PUGI__SCANCHAR('e'); PUGI__SCANCHAR('n'); PUGI__SCANCHAR('c'); PUGI__SCANCHAR('o'); 1941 PUGI__SCANCHAR('d'); PUGI__SCANCHAR('i'); PUGI__SCANCHAR('n'); PUGI__SCANCHAR('g'); 1942 1943 // S? = S? 1944 PUGI__SCANCHARTYPE(ct_space); 1945 PUGI__SCANCHAR('='); 1946 PUGI__SCANCHARTYPE(ct_space); 1947 1948 // the only two valid delimiters are ' and " 1949 uint8_t delimiter = (offset < size && data[offset] == '"') ? '"' : '\''; 1950 1951 PUGI__SCANCHAR(delimiter); 1952 1953 size_t start = offset; 1954 1955 out_encoding = data + offset; 1956 1957 PUGI__SCANCHARTYPE(ct_symbol); 1958 1959 out_length = offset - start; 1960 1961 PUGI__SCANCHAR(delimiter); 1962 1963 return true; 1964 } 1965 } 1966 1967 return false; 1968 1969 #undef PUGI__SCANCHAR 1970 #undef PUGI__SCANCHARTYPE 1971 } 1972 guess_buffer_encoding(const uint8_t * data,size_t size)1973 PUGI__FN xml_encoding guess_buffer_encoding(const uint8_t* data, size_t size) 1974 { 1975 // skip encoding autodetection if input buffer is too small 1976 if (size < 4) return encoding_utf8; 1977 1978 uint8_t d0 = data[0], d1 = data[1], d2 = data[2], d3 = data[3]; 1979 1980 // look for BOM in first few bytes 1981 if (d0 == 0 && d1 == 0 && d2 == 0xfe && d3 == 0xff) return encoding_utf32_be; 1982 if (d0 == 0xff && d1 == 0xfe && d2 == 0 && d3 == 0) return encoding_utf32_le; 1983 if (d0 == 0xfe && d1 == 0xff) return encoding_utf16_be; 1984 if (d0 == 0xff && d1 == 0xfe) return encoding_utf16_le; 1985 if (d0 == 0xef && d1 == 0xbb && d2 == 0xbf) return encoding_utf8; 1986 1987 // look for <, <? or <?xm in various encodings 1988 if (d0 == 0 && d1 == 0 && d2 == 0 && d3 == 0x3c) return encoding_utf32_be; 1989 if (d0 == 0x3c && d1 == 0 && d2 == 0 && d3 == 0) return encoding_utf32_le; 1990 if (d0 == 0 && d1 == 0x3c && d2 == 0 && d3 == 0x3f) return encoding_utf16_be; 1991 if (d0 == 0x3c && d1 == 0 && d2 == 0x3f && d3 == 0) return encoding_utf16_le; 1992 1993 // look for utf16 < followed by node name (this may fail, but is better than utf8 since it's zero terminated so early) 1994 if (d0 == 0 && d1 == 0x3c) return encoding_utf16_be; 1995 if (d0 == 0x3c && d1 == 0) return encoding_utf16_le; 1996 1997 // no known BOM detected; parse declaration 1998 const uint8_t* enc = 0; 1999 size_t enc_length = 0; 2000 2001 if (d0 == 0x3c && d1 == 0x3f && d2 == 0x78 && d3 == 0x6d && parse_declaration_encoding(data, size, enc, enc_length)) 2002 { 2003 // iso-8859-1 (case-insensitive) 2004 if (enc_length == 10 2005 && (enc[0] | ' ') == 'i' && (enc[1] | ' ') == 's' && (enc[2] | ' ') == 'o' 2006 && enc[3] == '-' && enc[4] == '8' && enc[5] == '8' && enc[6] == '5' && enc[7] == '9' 2007 && enc[8] == '-' && enc[9] == '1') 2008 return encoding_latin1; 2009 2010 // latin1 (case-insensitive) 2011 if (enc_length == 6 2012 && (enc[0] | ' ') == 'l' && (enc[1] | ' ') == 'a' && (enc[2] | ' ') == 't' 2013 && (enc[3] | ' ') == 'i' && (enc[4] | ' ') == 'n' 2014 && enc[5] == '1') 2015 return encoding_latin1; 2016 } 2017 2018 return encoding_utf8; 2019 } 2020 get_buffer_encoding(xml_encoding encoding,const void * contents,size_t size)2021 PUGI__FN xml_encoding get_buffer_encoding(xml_encoding encoding, const void* contents, size_t size) 2022 { 2023 // replace wchar encoding with utf implementation 2024 if (encoding == encoding_wchar) return get_wchar_encoding(); 2025 2026 // replace utf16 encoding with utf16 with specific endianness 2027 if (encoding == encoding_utf16) return is_little_endian() ? encoding_utf16_le : encoding_utf16_be; 2028 2029 // replace utf32 encoding with utf32 with specific endianness 2030 if (encoding == encoding_utf32) return is_little_endian() ? encoding_utf32_le : encoding_utf32_be; 2031 2032 // only do autodetection if no explicit encoding is requested 2033 if (encoding != encoding_auto) return encoding; 2034 2035 // try to guess encoding (based on XML specification, Appendix F.1) 2036 const uint8_t* data = static_cast<const uint8_t*>(contents); 2037 2038 return guess_buffer_encoding(data, size); 2039 } 2040 get_mutable_buffer(char_t * & out_buffer,size_t & out_length,const void * contents,size_t size,bool is_mutable)2041 PUGI__FN bool get_mutable_buffer(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable) 2042 { 2043 size_t length = size / sizeof(char_t); 2044 2045 if (is_mutable) 2046 { 2047 out_buffer = static_cast<char_t*>(const_cast<void*>(contents)); 2048 out_length = length; 2049 } 2050 else 2051 { 2052 char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t))); 2053 if (!buffer) return false; 2054 2055 if (contents) 2056 memcpy(buffer, contents, length * sizeof(char_t)); 2057 else 2058 assert(length == 0); 2059 2060 buffer[length] = 0; 2061 2062 out_buffer = buffer; 2063 out_length = length + 1; 2064 } 2065 2066 return true; 2067 } 2068 2069 #ifdef PUGIXML_WCHAR_MODE need_endian_swap_utf(xml_encoding le,xml_encoding re)2070 PUGI__FN bool need_endian_swap_utf(xml_encoding le, xml_encoding re) 2071 { 2072 return (le == encoding_utf16_be && re == encoding_utf16_le) || (le == encoding_utf16_le && re == encoding_utf16_be) || 2073 (le == encoding_utf32_be && re == encoding_utf32_le) || (le == encoding_utf32_le && re == encoding_utf32_be); 2074 } 2075 convert_buffer_endian_swap(char_t * & out_buffer,size_t & out_length,const void * contents,size_t size,bool is_mutable)2076 PUGI__FN bool convert_buffer_endian_swap(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable) 2077 { 2078 const char_t* data = static_cast<const char_t*>(contents); 2079 size_t length = size / sizeof(char_t); 2080 2081 if (is_mutable) 2082 { 2083 char_t* buffer = const_cast<char_t*>(data); 2084 2085 convert_wchar_endian_swap(buffer, data, length); 2086 2087 out_buffer = buffer; 2088 out_length = length; 2089 } 2090 else 2091 { 2092 char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t))); 2093 if (!buffer) return false; 2094 2095 convert_wchar_endian_swap(buffer, data, length); 2096 buffer[length] = 0; 2097 2098 out_buffer = buffer; 2099 out_length = length + 1; 2100 } 2101 2102 return true; 2103 } 2104 convert_buffer_generic(char_t * & out_buffer,size_t & out_length,const void * contents,size_t size,D)2105 template <typename D> PUGI__FN bool convert_buffer_generic(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, D) 2106 { 2107 const typename D::type* data = static_cast<const typename D::type*>(contents); 2108 size_t data_length = size / sizeof(typename D::type); 2109 2110 // first pass: get length in wchar_t units 2111 size_t length = D::process(data, data_length, 0, wchar_counter()); 2112 2113 // allocate buffer of suitable length 2114 char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t))); 2115 if (!buffer) return false; 2116 2117 // second pass: convert utf16 input to wchar_t 2118 wchar_writer::value_type obegin = reinterpret_cast<wchar_writer::value_type>(buffer); 2119 wchar_writer::value_type oend = D::process(data, data_length, obegin, wchar_writer()); 2120 2121 assert(oend == obegin + length); 2122 *oend = 0; 2123 2124 out_buffer = buffer; 2125 out_length = length + 1; 2126 2127 return true; 2128 } 2129 convert_buffer(char_t * & out_buffer,size_t & out_length,xml_encoding encoding,const void * contents,size_t size,bool is_mutable)2130 PUGI__FN bool convert_buffer(char_t*& out_buffer, size_t& out_length, xml_encoding encoding, const void* contents, size_t size, bool is_mutable) 2131 { 2132 // get native encoding 2133 xml_encoding wchar_encoding = get_wchar_encoding(); 2134 2135 // fast path: no conversion required 2136 if (encoding == wchar_encoding) 2137 return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable); 2138 2139 // only endian-swapping is required 2140 if (need_endian_swap_utf(encoding, wchar_encoding)) 2141 return convert_buffer_endian_swap(out_buffer, out_length, contents, size, is_mutable); 2142 2143 // source encoding is utf8 2144 if (encoding == encoding_utf8) 2145 return convert_buffer_generic(out_buffer, out_length, contents, size, utf8_decoder()); 2146 2147 // source encoding is utf16 2148 if (encoding == encoding_utf16_be || encoding == encoding_utf16_le) 2149 { 2150 xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be; 2151 2152 return (native_encoding == encoding) ? 2153 convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder<opt_false>()) : 2154 convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder<opt_true>()); 2155 } 2156 2157 // source encoding is utf32 2158 if (encoding == encoding_utf32_be || encoding == encoding_utf32_le) 2159 { 2160 xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be; 2161 2162 return (native_encoding == encoding) ? 2163 convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder<opt_false>()) : 2164 convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder<opt_true>()); 2165 } 2166 2167 // source encoding is latin1 2168 if (encoding == encoding_latin1) 2169 return convert_buffer_generic(out_buffer, out_length, contents, size, latin1_decoder()); 2170 2171 assert(false && "Invalid encoding"); // unreachable 2172 return false; 2173 } 2174 #else convert_buffer_generic(char_t * & out_buffer,size_t & out_length,const void * contents,size_t size,D)2175 template <typename D> PUGI__FN bool convert_buffer_generic(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, D) 2176 { 2177 const typename D::type* data = static_cast<const typename D::type*>(contents); 2178 size_t data_length = size / sizeof(typename D::type); 2179 2180 // first pass: get length in utf8 units 2181 size_t length = D::process(data, data_length, 0, utf8_counter()); 2182 2183 // allocate buffer of suitable length 2184 char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t))); 2185 if (!buffer) return false; 2186 2187 // second pass: convert utf16 input to utf8 2188 uint8_t* obegin = reinterpret_cast<uint8_t*>(buffer); 2189 uint8_t* oend = D::process(data, data_length, obegin, utf8_writer()); 2190 2191 assert(oend == obegin + length); 2192 *oend = 0; 2193 2194 out_buffer = buffer; 2195 out_length = length + 1; 2196 2197 return true; 2198 } 2199 get_latin1_7bit_prefix_length(const uint8_t * data,size_t size)2200 PUGI__FN size_t get_latin1_7bit_prefix_length(const uint8_t* data, size_t size) 2201 { 2202 for (size_t i = 0; i < size; ++i) 2203 if (data[i] > 127) 2204 return i; 2205 2206 return size; 2207 } 2208 convert_buffer_latin1(char_t * & out_buffer,size_t & out_length,const void * contents,size_t size,bool is_mutable)2209 PUGI__FN bool convert_buffer_latin1(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable) 2210 { 2211 const uint8_t* data = static_cast<const uint8_t*>(contents); 2212 size_t data_length = size; 2213 2214 // get size of prefix that does not need utf8 conversion 2215 size_t prefix_length = get_latin1_7bit_prefix_length(data, data_length); 2216 assert(prefix_length <= data_length); 2217 2218 const uint8_t* postfix = data + prefix_length; 2219 size_t postfix_length = data_length - prefix_length; 2220 2221 // if no conversion is needed, just return the original buffer 2222 if (postfix_length == 0) return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable); 2223 2224 // first pass: get length in utf8 units 2225 size_t length = prefix_length + latin1_decoder::process(postfix, postfix_length, 0, utf8_counter()); 2226 2227 // allocate buffer of suitable length 2228 char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t))); 2229 if (!buffer) return false; 2230 2231 // second pass: convert latin1 input to utf8 2232 memcpy(buffer, data, prefix_length); 2233 2234 uint8_t* obegin = reinterpret_cast<uint8_t*>(buffer); 2235 uint8_t* oend = latin1_decoder::process(postfix, postfix_length, obegin + prefix_length, utf8_writer()); 2236 2237 assert(oend == obegin + length); 2238 *oend = 0; 2239 2240 out_buffer = buffer; 2241 out_length = length + 1; 2242 2243 return true; 2244 } 2245 convert_buffer(char_t * & out_buffer,size_t & out_length,xml_encoding encoding,const void * contents,size_t size,bool is_mutable)2246 PUGI__FN bool convert_buffer(char_t*& out_buffer, size_t& out_length, xml_encoding encoding, const void* contents, size_t size, bool is_mutable) 2247 { 2248 // fast path: no conversion required 2249 if (encoding == encoding_utf8) 2250 return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable); 2251 2252 // source encoding is utf16 2253 if (encoding == encoding_utf16_be || encoding == encoding_utf16_le) 2254 { 2255 xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be; 2256 2257 return (native_encoding == encoding) ? 2258 convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder<opt_false>()) : 2259 convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder<opt_true>()); 2260 } 2261 2262 // source encoding is utf32 2263 if (encoding == encoding_utf32_be || encoding == encoding_utf32_le) 2264 { 2265 xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be; 2266 2267 return (native_encoding == encoding) ? 2268 convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder<opt_false>()) : 2269 convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder<opt_true>()); 2270 } 2271 2272 // source encoding is latin1 2273 if (encoding == encoding_latin1) 2274 return convert_buffer_latin1(out_buffer, out_length, contents, size, is_mutable); 2275 2276 assert(false && "Invalid encoding"); // unreachable 2277 return false; 2278 } 2279 #endif 2280 as_utf8_begin(const wchar_t * str,size_t length)2281 PUGI__FN size_t as_utf8_begin(const wchar_t* str, size_t length) 2282 { 2283 // get length in utf8 characters 2284 return wchar_decoder::process(str, length, 0, utf8_counter()); 2285 } 2286 as_utf8_end(char * buffer,size_t size,const wchar_t * str,size_t length)2287 PUGI__FN void as_utf8_end(char* buffer, size_t size, const wchar_t* str, size_t length) 2288 { 2289 // convert to utf8 2290 uint8_t* begin = reinterpret_cast<uint8_t*>(buffer); 2291 uint8_t* end = wchar_decoder::process(str, length, begin, utf8_writer()); 2292 2293 assert(begin + size == end); 2294 (void)!end; 2295 (void)!size; 2296 } 2297 2298 #ifndef PUGIXML_NO_STL as_utf8_impl(const wchar_t * str,size_t length)2299 PUGI__FN std::string as_utf8_impl(const wchar_t* str, size_t length) 2300 { 2301 // first pass: get length in utf8 characters 2302 size_t size = as_utf8_begin(str, length); 2303 2304 // allocate resulting string 2305 std::string result; 2306 result.resize(size); 2307 2308 // second pass: convert to utf8 2309 if (size > 0) as_utf8_end(&result[0], size, str, length); 2310 2311 return result; 2312 } 2313 as_wide_impl(const char * str,size_t size)2314 PUGI__FN std::basic_string<wchar_t> as_wide_impl(const char* str, size_t size) 2315 { 2316 const uint8_t* data = reinterpret_cast<const uint8_t*>(str); 2317 2318 // first pass: get length in wchar_t units 2319 size_t length = utf8_decoder::process(data, size, 0, wchar_counter()); 2320 2321 // allocate resulting string 2322 std::basic_string<wchar_t> result; 2323 result.resize(length); 2324 2325 // second pass: convert to wchar_t 2326 if (length > 0) 2327 { 2328 wchar_writer::value_type begin = reinterpret_cast<wchar_writer::value_type>(&result[0]); 2329 wchar_writer::value_type end = utf8_decoder::process(data, size, begin, wchar_writer()); 2330 2331 assert(begin + length == end); 2332 (void)!end; 2333 } 2334 2335 return result; 2336 } 2337 #endif 2338 2339 template <typename Header> strcpy_insitu_allow(size_t length,const Header & header,uintptr_t header_mask,char_t * target)2340 inline bool strcpy_insitu_allow(size_t length, const Header& header, uintptr_t header_mask, char_t* target) 2341 { 2342 // never reuse shared memory 2343 if (header & xml_memory_page_contents_shared_mask) return false; 2344 2345 size_t target_length = strlength(target); 2346 2347 // always reuse document buffer memory if possible 2348 if ((header & header_mask) == 0) return target_length >= length; 2349 2350 // reuse heap memory if waste is not too great 2351 const size_t reuse_threshold = 32; 2352 2353 return target_length >= length && (target_length < reuse_threshold || target_length - length < target_length / 2); 2354 } 2355 2356 template <typename String, typename Header> strcpy_insitu(String & dest,Header & header,uintptr_t header_mask,const char_t * source,size_t source_length)2357 PUGI__FN bool strcpy_insitu(String& dest, Header& header, uintptr_t header_mask, const char_t* source, size_t source_length) 2358 { 2359 if (source_length == 0) 2360 { 2361 // empty string and null pointer are equivalent, so just deallocate old memory 2362 xml_allocator* alloc = PUGI__GETPAGE_IMPL(header)->allocator; 2363 2364 if (header & header_mask) alloc->deallocate_string(dest); 2365 2366 // mark the string as not allocated 2367 dest = 0; 2368 header &= ~header_mask; 2369 2370 return true; 2371 } 2372 else if (dest && strcpy_insitu_allow(source_length, header, header_mask, dest)) 2373 { 2374 // we can reuse old buffer, so just copy the new data (including zero terminator) 2375 memcpy(dest, source, source_length * sizeof(char_t)); 2376 dest[source_length] = 0; 2377 2378 return true; 2379 } 2380 else 2381 { 2382 xml_allocator* alloc = PUGI__GETPAGE_IMPL(header)->allocator; 2383 2384 if (!alloc->reserve()) return false; 2385 2386 // allocate new buffer 2387 char_t* buf = alloc->allocate_string(source_length + 1); 2388 if (!buf) return false; 2389 2390 // copy the string (including zero terminator) 2391 memcpy(buf, source, source_length * sizeof(char_t)); 2392 buf[source_length] = 0; 2393 2394 // deallocate old buffer (*after* the above to protect against overlapping memory and/or allocation failures) 2395 if (header & header_mask) alloc->deallocate_string(dest); 2396 2397 // the string is now allocated, so set the flag 2398 dest = buf; 2399 header |= header_mask; 2400 2401 return true; 2402 } 2403 } 2404 2405 struct gap 2406 { 2407 char_t* end; 2408 size_t size; 2409 gapgap2410 gap(): end(0), size(0) 2411 { 2412 } 2413 2414 // Push new gap, move s count bytes further (skipping the gap). 2415 // Collapse previous gap. pushgap2416 void push(char_t*& s, size_t count) 2417 { 2418 if (end) // there was a gap already; collapse it 2419 { 2420 // Move [old_gap_end, new_gap_start) to [old_gap_start, ...) 2421 assert(s >= end); 2422 memmove(end - size, end, reinterpret_cast<char*>(s) - reinterpret_cast<char*>(end)); 2423 } 2424 2425 s += count; // end of current gap 2426 2427 // "merge" two gaps 2428 end = s; 2429 size += count; 2430 } 2431 2432 // Collapse all gaps, return past-the-end pointer flushgap2433 char_t* flush(char_t* s) 2434 { 2435 if (end) 2436 { 2437 // Move [old_gap_end, current_pos) to [old_gap_start, ...) 2438 assert(s >= end); 2439 memmove(end - size, end, reinterpret_cast<char*>(s) - reinterpret_cast<char*>(end)); 2440 2441 return s - size; 2442 } 2443 else return s; 2444 } 2445 }; 2446 strconv_escape(char_t * s,gap & g)2447 PUGI__FN char_t* strconv_escape(char_t* s, gap& g) 2448 { 2449 char_t* stre = s + 1; 2450 2451 switch (*stre) 2452 { 2453 case '#': // &#... 2454 { 2455 unsigned int ucsc = 0; 2456 2457 if (stre[1] == 'x') // &#x... (hex code) 2458 { 2459 stre += 2; 2460 2461 char_t ch = *stre; 2462 2463 if (ch == ';') return stre; 2464 2465 for (;;) 2466 { 2467 if (static_cast<unsigned int>(ch - '0') <= 9) 2468 ucsc = 16 * ucsc + (ch - '0'); 2469 else if (static_cast<unsigned int>((ch | ' ') - 'a') <= 5) 2470 ucsc = 16 * ucsc + ((ch | ' ') - 'a' + 10); 2471 else if (ch == ';') 2472 break; 2473 else // cancel 2474 return stre; 2475 2476 ch = *++stre; 2477 } 2478 2479 ++stre; 2480 } 2481 else // &#... (dec code) 2482 { 2483 char_t ch = *++stre; 2484 2485 if (ch == ';') return stre; 2486 2487 for (;;) 2488 { 2489 if (static_cast<unsigned int>(ch - '0') <= 9) 2490 ucsc = 10 * ucsc + (ch - '0'); 2491 else if (ch == ';') 2492 break; 2493 else // cancel 2494 return stre; 2495 2496 ch = *++stre; 2497 } 2498 2499 ++stre; 2500 } 2501 2502 #ifdef PUGIXML_WCHAR_MODE 2503 s = reinterpret_cast<char_t*>(wchar_writer::any(reinterpret_cast<wchar_writer::value_type>(s), ucsc)); 2504 #else 2505 s = reinterpret_cast<char_t*>(utf8_writer::any(reinterpret_cast<uint8_t*>(s), ucsc)); 2506 #endif 2507 2508 g.push(s, stre - s); 2509 return stre; 2510 } 2511 2512 case 'a': // &a 2513 { 2514 ++stre; 2515 2516 if (*stre == 'm') // &am 2517 { 2518 if (*++stre == 'p' && *++stre == ';') // & 2519 { 2520 *s++ = '&'; 2521 ++stre; 2522 2523 g.push(s, stre - s); 2524 return stre; 2525 } 2526 } 2527 else if (*stre == 'p') // &ap 2528 { 2529 if (*++stre == 'o' && *++stre == 's' && *++stre == ';') // ' 2530 { 2531 *s++ = '\''; 2532 ++stre; 2533 2534 g.push(s, stre - s); 2535 return stre; 2536 } 2537 } 2538 break; 2539 } 2540 2541 case 'g': // &g 2542 { 2543 if (*++stre == 't' && *++stre == ';') // > 2544 { 2545 *s++ = '>'; 2546 ++stre; 2547 2548 g.push(s, stre - s); 2549 return stre; 2550 } 2551 break; 2552 } 2553 2554 case 'l': // &l 2555 { 2556 if (*++stre == 't' && *++stre == ';') // < 2557 { 2558 *s++ = '<'; 2559 ++stre; 2560 2561 g.push(s, stre - s); 2562 return stre; 2563 } 2564 break; 2565 } 2566 2567 case 'q': // &q 2568 { 2569 if (*++stre == 'u' && *++stre == 'o' && *++stre == 't' && *++stre == ';') // " 2570 { 2571 *s++ = '"'; 2572 ++stre; 2573 2574 g.push(s, stre - s); 2575 return stre; 2576 } 2577 break; 2578 } 2579 2580 default: 2581 break; 2582 } 2583 2584 return stre; 2585 } 2586 2587 // Parser utilities 2588 #define PUGI__ENDSWITH(c, e) ((c) == (e) || ((c) == 0 && endch == (e))) 2589 #define PUGI__SKIPWS() { while (PUGI__IS_CHARTYPE(*s, ct_space)) ++s; } 2590 #define PUGI__OPTSET(OPT) ( optmsk & (OPT) ) 2591 #define PUGI__PUSHNODE(TYPE) { cursor = append_new_node(cursor, *alloc, TYPE); if (!cursor) PUGI__THROW_ERROR(status_out_of_memory, s); } 2592 #define PUGI__POPNODE() { cursor = cursor->parent; } 2593 #define PUGI__SCANFOR(X) { while (*s != 0 && !(X)) ++s; } 2594 #define PUGI__SCANWHILE(X) { while (X) ++s; } 2595 #define PUGI__SCANWHILE_UNROLL(X) { for (;;) { char_t ss = s[0]; if (PUGI__UNLIKELY(!(X))) { break; } ss = s[1]; if (PUGI__UNLIKELY(!(X))) { s += 1; break; } ss = s[2]; if (PUGI__UNLIKELY(!(X))) { s += 2; break; } ss = s[3]; if (PUGI__UNLIKELY(!(X))) { s += 3; break; } s += 4; } } 2596 #define PUGI__ENDSEG() { ch = *s; *s = 0; ++s; } 2597 #define PUGI__THROW_ERROR(err, m) return error_offset = m, error_status = err, static_cast<char_t*>(0) 2598 #define PUGI__CHECK_ERROR(err, m) { if (*s == 0) PUGI__THROW_ERROR(err, m); } 2599 strconv_comment(char_t * s,char_t endch)2600 PUGI__FN char_t* strconv_comment(char_t* s, char_t endch) 2601 { 2602 gap g; 2603 2604 while (true) 2605 { 2606 PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_comment)); 2607 2608 if (*s == '\r') // Either a single 0x0d or 0x0d 0x0a pair 2609 { 2610 *s++ = '\n'; // replace first one with 0x0a 2611 2612 if (*s == '\n') g.push(s, 1); 2613 } 2614 else if (s[0] == '-' && s[1] == '-' && PUGI__ENDSWITH(s[2], '>')) // comment ends here 2615 { 2616 *g.flush(s) = 0; 2617 2618 return s + (s[2] == '>' ? 3 : 2); 2619 } 2620 else if (*s == 0) 2621 { 2622 return 0; 2623 } 2624 else ++s; 2625 } 2626 } 2627 strconv_cdata(char_t * s,char_t endch)2628 PUGI__FN char_t* strconv_cdata(char_t* s, char_t endch) 2629 { 2630 gap g; 2631 2632 while (true) 2633 { 2634 PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_cdata)); 2635 2636 if (*s == '\r') // Either a single 0x0d or 0x0d 0x0a pair 2637 { 2638 *s++ = '\n'; // replace first one with 0x0a 2639 2640 if (*s == '\n') g.push(s, 1); 2641 } 2642 else if (s[0] == ']' && s[1] == ']' && PUGI__ENDSWITH(s[2], '>')) // CDATA ends here 2643 { 2644 *g.flush(s) = 0; 2645 2646 return s + 1; 2647 } 2648 else if (*s == 0) 2649 { 2650 return 0; 2651 } 2652 else ++s; 2653 } 2654 } 2655 2656 typedef char_t* (*strconv_pcdata_t)(char_t*); 2657 2658 template <typename opt_trim, typename opt_eol, typename opt_escape> struct strconv_pcdata_impl 2659 { parsestrconv_pcdata_impl2660 static char_t* parse(char_t* s) 2661 { 2662 gap g; 2663 2664 char_t* begin = s; 2665 2666 while (true) 2667 { 2668 PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_pcdata)); 2669 2670 if (*s == '<') // PCDATA ends here 2671 { 2672 char_t* end = g.flush(s); 2673 2674 if (opt_trim::value) 2675 while (end > begin && PUGI__IS_CHARTYPE(end[-1], ct_space)) 2676 --end; 2677 2678 *end = 0; 2679 2680 return s + 1; 2681 } 2682 else if (opt_eol::value && *s == '\r') // Either a single 0x0d or 0x0d 0x0a pair 2683 { 2684 *s++ = '\n'; // replace first one with 0x0a 2685 2686 if (*s == '\n') g.push(s, 1); 2687 } 2688 else if (opt_escape::value && *s == '&') 2689 { 2690 s = strconv_escape(s, g); 2691 } 2692 else if (*s == 0) 2693 { 2694 char_t* end = g.flush(s); 2695 2696 if (opt_trim::value) 2697 while (end > begin && PUGI__IS_CHARTYPE(end[-1], ct_space)) 2698 --end; 2699 2700 *end = 0; 2701 2702 return s; 2703 } 2704 else ++s; 2705 } 2706 } 2707 }; 2708 get_strconv_pcdata(unsigned int optmask)2709 PUGI__FN strconv_pcdata_t get_strconv_pcdata(unsigned int optmask) 2710 { 2711 PUGI__STATIC_ASSERT(parse_escapes == 0x10 && parse_eol == 0x20 && parse_trim_pcdata == 0x0800); 2712 2713 switch (((optmask >> 4) & 3) | ((optmask >> 9) & 4)) // get bitmask for flags (trim eol escapes); this simultaneously checks 3 options from assertion above 2714 { 2715 case 0: return strconv_pcdata_impl<opt_false, opt_false, opt_false>::parse; 2716 case 1: return strconv_pcdata_impl<opt_false, opt_false, opt_true>::parse; 2717 case 2: return strconv_pcdata_impl<opt_false, opt_true, opt_false>::parse; 2718 case 3: return strconv_pcdata_impl<opt_false, opt_true, opt_true>::parse; 2719 case 4: return strconv_pcdata_impl<opt_true, opt_false, opt_false>::parse; 2720 case 5: return strconv_pcdata_impl<opt_true, opt_false, opt_true>::parse; 2721 case 6: return strconv_pcdata_impl<opt_true, opt_true, opt_false>::parse; 2722 case 7: return strconv_pcdata_impl<opt_true, opt_true, opt_true>::parse; 2723 default: assert(false); return 0; // unreachable 2724 } 2725 } 2726 2727 typedef char_t* (*strconv_attribute_t)(char_t*, char_t); 2728 2729 template <typename opt_escape> struct strconv_attribute_impl 2730 { parse_wnormstrconv_attribute_impl2731 static char_t* parse_wnorm(char_t* s, char_t end_quote) 2732 { 2733 gap g; 2734 2735 // trim leading whitespaces 2736 if (PUGI__IS_CHARTYPE(*s, ct_space)) 2737 { 2738 char_t* str = s; 2739 2740 do ++str; 2741 while (PUGI__IS_CHARTYPE(*str, ct_space)); 2742 2743 g.push(s, str - s); 2744 } 2745 2746 while (true) 2747 { 2748 PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr_ws | ct_space)); 2749 2750 if (*s == end_quote) 2751 { 2752 char_t* str = g.flush(s); 2753 2754 do *str-- = 0; 2755 while (PUGI__IS_CHARTYPE(*str, ct_space)); 2756 2757 return s + 1; 2758 } 2759 else if (PUGI__IS_CHARTYPE(*s, ct_space)) 2760 { 2761 *s++ = ' '; 2762 2763 if (PUGI__IS_CHARTYPE(*s, ct_space)) 2764 { 2765 char_t* str = s + 1; 2766 while (PUGI__IS_CHARTYPE(*str, ct_space)) ++str; 2767 2768 g.push(s, str - s); 2769 } 2770 } 2771 else if (opt_escape::value && *s == '&') 2772 { 2773 s = strconv_escape(s, g); 2774 } 2775 else if (!*s) 2776 { 2777 return 0; 2778 } 2779 else ++s; 2780 } 2781 } 2782 parse_wconvstrconv_attribute_impl2783 static char_t* parse_wconv(char_t* s, char_t end_quote) 2784 { 2785 gap g; 2786 2787 while (true) 2788 { 2789 PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr_ws)); 2790 2791 if (*s == end_quote) 2792 { 2793 *g.flush(s) = 0; 2794 2795 return s + 1; 2796 } 2797 else if (PUGI__IS_CHARTYPE(*s, ct_space)) 2798 { 2799 if (*s == '\r') 2800 { 2801 *s++ = ' '; 2802 2803 if (*s == '\n') g.push(s, 1); 2804 } 2805 else *s++ = ' '; 2806 } 2807 else if (opt_escape::value && *s == '&') 2808 { 2809 s = strconv_escape(s, g); 2810 } 2811 else if (!*s) 2812 { 2813 return 0; 2814 } 2815 else ++s; 2816 } 2817 } 2818 parse_eolstrconv_attribute_impl2819 static char_t* parse_eol(char_t* s, char_t end_quote) 2820 { 2821 gap g; 2822 2823 while (true) 2824 { 2825 PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr)); 2826 2827 if (*s == end_quote) 2828 { 2829 *g.flush(s) = 0; 2830 2831 return s + 1; 2832 } 2833 else if (*s == '\r') 2834 { 2835 *s++ = '\n'; 2836 2837 if (*s == '\n') g.push(s, 1); 2838 } 2839 else if (opt_escape::value && *s == '&') 2840 { 2841 s = strconv_escape(s, g); 2842 } 2843 else if (!*s) 2844 { 2845 return 0; 2846 } 2847 else ++s; 2848 } 2849 } 2850 parse_simplestrconv_attribute_impl2851 static char_t* parse_simple(char_t* s, char_t end_quote) 2852 { 2853 gap g; 2854 2855 while (true) 2856 { 2857 PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr)); 2858 2859 if (*s == end_quote) 2860 { 2861 *g.flush(s) = 0; 2862 2863 return s + 1; 2864 } 2865 else if (opt_escape::value && *s == '&') 2866 { 2867 s = strconv_escape(s, g); 2868 } 2869 else if (!*s) 2870 { 2871 return 0; 2872 } 2873 else ++s; 2874 } 2875 } 2876 }; 2877 get_strconv_attribute(unsigned int optmask)2878 PUGI__FN strconv_attribute_t get_strconv_attribute(unsigned int optmask) 2879 { 2880 PUGI__STATIC_ASSERT(parse_escapes == 0x10 && parse_eol == 0x20 && parse_wconv_attribute == 0x40 && parse_wnorm_attribute == 0x80); 2881 2882 switch ((optmask >> 4) & 15) // get bitmask for flags (wnorm wconv eol escapes); this simultaneously checks 4 options from assertion above 2883 { 2884 case 0: return strconv_attribute_impl<opt_false>::parse_simple; 2885 case 1: return strconv_attribute_impl<opt_true>::parse_simple; 2886 case 2: return strconv_attribute_impl<opt_false>::parse_eol; 2887 case 3: return strconv_attribute_impl<opt_true>::parse_eol; 2888 case 4: return strconv_attribute_impl<opt_false>::parse_wconv; 2889 case 5: return strconv_attribute_impl<opt_true>::parse_wconv; 2890 case 6: return strconv_attribute_impl<opt_false>::parse_wconv; 2891 case 7: return strconv_attribute_impl<opt_true>::parse_wconv; 2892 case 8: return strconv_attribute_impl<opt_false>::parse_wnorm; 2893 case 9: return strconv_attribute_impl<opt_true>::parse_wnorm; 2894 case 10: return strconv_attribute_impl<opt_false>::parse_wnorm; 2895 case 11: return strconv_attribute_impl<opt_true>::parse_wnorm; 2896 case 12: return strconv_attribute_impl<opt_false>::parse_wnorm; 2897 case 13: return strconv_attribute_impl<opt_true>::parse_wnorm; 2898 case 14: return strconv_attribute_impl<opt_false>::parse_wnorm; 2899 case 15: return strconv_attribute_impl<opt_true>::parse_wnorm; 2900 default: assert(false); return 0; // unreachable 2901 } 2902 } 2903 make_parse_result(xml_parse_status status,ptrdiff_t offset=0)2904 inline xml_parse_result make_parse_result(xml_parse_status status, ptrdiff_t offset = 0) 2905 { 2906 xml_parse_result result; 2907 result.status = status; 2908 result.offset = offset; 2909 2910 return result; 2911 } 2912 2913 struct xml_parser 2914 { 2915 xml_allocator* alloc; 2916 char_t* error_offset; 2917 xml_parse_status error_status; 2918 xml_parserxml_parser2919 xml_parser(xml_allocator* alloc_): alloc(alloc_), error_offset(0), error_status(status_ok) 2920 { 2921 } 2922 2923 // DOCTYPE consists of nested sections of the following possible types: 2924 // <!-- ... -->, <? ... ?>, "...", '...' 2925 // <![...]]> 2926 // <!...> 2927 // First group can not contain nested groups 2928 // Second group can contain nested groups of the same type 2929 // Third group can contain all other groups parse_doctype_primitivexml_parser2930 char_t* parse_doctype_primitive(char_t* s) 2931 { 2932 if (*s == '"' || *s == '\'') 2933 { 2934 // quoted string 2935 char_t ch = *s++; 2936 PUGI__SCANFOR(*s == ch); 2937 if (!*s) PUGI__THROW_ERROR(status_bad_doctype, s); 2938 2939 s++; 2940 } 2941 else if (s[0] == '<' && s[1] == '?') 2942 { 2943 // <? ... ?> 2944 s += 2; 2945 PUGI__SCANFOR(s[0] == '?' && s[1] == '>'); // no need for ENDSWITH because ?> can't terminate proper doctype 2946 if (!*s) PUGI__THROW_ERROR(status_bad_doctype, s); 2947 2948 s += 2; 2949 } 2950 else if (s[0] == '<' && s[1] == '!' && s[2] == '-' && s[3] == '-') 2951 { 2952 s += 4; 2953 PUGI__SCANFOR(s[0] == '-' && s[1] == '-' && s[2] == '>'); // no need for ENDSWITH because --> can't terminate proper doctype 2954 if (!*s) PUGI__THROW_ERROR(status_bad_doctype, s); 2955 2956 s += 3; 2957 } 2958 else PUGI__THROW_ERROR(status_bad_doctype, s); 2959 2960 return s; 2961 } 2962 parse_doctype_ignorexml_parser2963 char_t* parse_doctype_ignore(char_t* s) 2964 { 2965 size_t depth = 0; 2966 2967 assert(s[0] == '<' && s[1] == '!' && s[2] == '['); 2968 s += 3; 2969 2970 while (*s) 2971 { 2972 if (s[0] == '<' && s[1] == '!' && s[2] == '[') 2973 { 2974 // nested ignore section 2975 s += 3; 2976 depth++; 2977 } 2978 else if (s[0] == ']' && s[1] == ']' && s[2] == '>') 2979 { 2980 // ignore section end 2981 s += 3; 2982 2983 if (depth == 0) 2984 return s; 2985 2986 depth--; 2987 } 2988 else s++; 2989 } 2990 2991 PUGI__THROW_ERROR(status_bad_doctype, s); 2992 } 2993 parse_doctype_groupxml_parser2994 char_t* parse_doctype_group(char_t* s, char_t endch) 2995 { 2996 size_t depth = 0; 2997 2998 assert((s[0] == '<' || s[0] == 0) && s[1] == '!'); 2999 s += 2; 3000 3001 while (*s) 3002 { 3003 if (s[0] == '<' && s[1] == '!' && s[2] != '-') 3004 { 3005 if (s[2] == '[') 3006 { 3007 // ignore 3008 s = parse_doctype_ignore(s); 3009 if (!s) return s; 3010 } 3011 else 3012 { 3013 // some control group 3014 s += 2; 3015 depth++; 3016 } 3017 } 3018 else if (s[0] == '<' || s[0] == '"' || s[0] == '\'') 3019 { 3020 // unknown tag (forbidden), or some primitive group 3021 s = parse_doctype_primitive(s); 3022 if (!s) return s; 3023 } 3024 else if (*s == '>') 3025 { 3026 if (depth == 0) 3027 return s; 3028 3029 depth--; 3030 s++; 3031 } 3032 else s++; 3033 } 3034 3035 if (depth != 0 || endch != '>') PUGI__THROW_ERROR(status_bad_doctype, s); 3036 3037 return s; 3038 } 3039 parse_exclamationxml_parser3040 char_t* parse_exclamation(char_t* s, xml_node_struct* cursor, unsigned int optmsk, char_t endch) 3041 { 3042 // parse node contents, starting with exclamation mark 3043 ++s; 3044 3045 if (*s == '-') // '<!-...' 3046 { 3047 ++s; 3048 3049 if (*s == '-') // '<!--...' 3050 { 3051 ++s; 3052 3053 if (PUGI__OPTSET(parse_comments)) 3054 { 3055 PUGI__PUSHNODE(node_comment); // Append a new node on the tree. 3056 cursor->value = s; // Save the offset. 3057 } 3058 3059 if (PUGI__OPTSET(parse_eol) && PUGI__OPTSET(parse_comments)) 3060 { 3061 s = strconv_comment(s, endch); 3062 3063 if (!s) PUGI__THROW_ERROR(status_bad_comment, cursor->value); 3064 } 3065 else 3066 { 3067 // Scan for terminating '-->'. 3068 PUGI__SCANFOR(s[0] == '-' && s[1] == '-' && PUGI__ENDSWITH(s[2], '>')); 3069 PUGI__CHECK_ERROR(status_bad_comment, s); 3070 3071 if (PUGI__OPTSET(parse_comments)) 3072 *s = 0; // Zero-terminate this segment at the first terminating '-'. 3073 3074 s += (s[2] == '>' ? 3 : 2); // Step over the '\0->'. 3075 } 3076 } 3077 else PUGI__THROW_ERROR(status_bad_comment, s); 3078 } 3079 else if (*s == '[') 3080 { 3081 // '<![CDATA[...' 3082 if (*++s=='C' && *++s=='D' && *++s=='A' && *++s=='T' && *++s=='A' && *++s == '[') 3083 { 3084 ++s; 3085 3086 if (PUGI__OPTSET(parse_cdata)) 3087 { 3088 PUGI__PUSHNODE(node_cdata); // Append a new node on the tree. 3089 cursor->value = s; // Save the offset. 3090 3091 if (PUGI__OPTSET(parse_eol)) 3092 { 3093 s = strconv_cdata(s, endch); 3094 3095 if (!s) PUGI__THROW_ERROR(status_bad_cdata, cursor->value); 3096 } 3097 else 3098 { 3099 // Scan for terminating ']]>'. 3100 PUGI__SCANFOR(s[0] == ']' && s[1] == ']' && PUGI__ENDSWITH(s[2], '>')); 3101 PUGI__CHECK_ERROR(status_bad_cdata, s); 3102 3103 *s++ = 0; // Zero-terminate this segment. 3104 } 3105 } 3106 else // Flagged for discard, but we still have to scan for the terminator. 3107 { 3108 // Scan for terminating ']]>'. 3109 PUGI__SCANFOR(s[0] == ']' && s[1] == ']' && PUGI__ENDSWITH(s[2], '>')); 3110 PUGI__CHECK_ERROR(status_bad_cdata, s); 3111 3112 ++s; 3113 } 3114 3115 s += (s[1] == '>' ? 2 : 1); // Step over the last ']>'. 3116 } 3117 else PUGI__THROW_ERROR(status_bad_cdata, s); 3118 } 3119 else if (s[0] == 'D' && s[1] == 'O' && s[2] == 'C' && s[3] == 'T' && s[4] == 'Y' && s[5] == 'P' && PUGI__ENDSWITH(s[6], 'E')) 3120 { 3121 s -= 2; 3122 3123 if (cursor->parent) PUGI__THROW_ERROR(status_bad_doctype, s); 3124 3125 char_t* mark = s + 9; 3126 3127 s = parse_doctype_group(s, endch); 3128 if (!s) return s; 3129 3130 assert((*s == 0 && endch == '>') || *s == '>'); 3131 if (*s) *s++ = 0; 3132 3133 if (PUGI__OPTSET(parse_doctype)) 3134 { 3135 while (PUGI__IS_CHARTYPE(*mark, ct_space)) ++mark; 3136 3137 PUGI__PUSHNODE(node_doctype); 3138 3139 cursor->value = mark; 3140 } 3141 } 3142 else if (*s == 0 && endch == '-') PUGI__THROW_ERROR(status_bad_comment, s); 3143 else if (*s == 0 && endch == '[') PUGI__THROW_ERROR(status_bad_cdata, s); 3144 else PUGI__THROW_ERROR(status_unrecognized_tag, s); 3145 3146 return s; 3147 } 3148 parse_questionxml_parser3149 char_t* parse_question(char_t* s, xml_node_struct*& ref_cursor, unsigned int optmsk, char_t endch) 3150 { 3151 // load into registers 3152 xml_node_struct* cursor = ref_cursor; 3153 char_t ch = 0; 3154 3155 // parse node contents, starting with question mark 3156 ++s; 3157 3158 // read PI target 3159 char_t* target = s; 3160 3161 if (!PUGI__IS_CHARTYPE(*s, ct_start_symbol)) PUGI__THROW_ERROR(status_bad_pi, s); 3162 3163 PUGI__SCANWHILE(PUGI__IS_CHARTYPE(*s, ct_symbol)); 3164 PUGI__CHECK_ERROR(status_bad_pi, s); 3165 3166 // determine node type; stricmp / strcasecmp is not portable 3167 bool declaration = (target[0] | ' ') == 'x' && (target[1] | ' ') == 'm' && (target[2] | ' ') == 'l' && target + 3 == s; 3168 3169 if (declaration ? PUGI__OPTSET(parse_declaration) : PUGI__OPTSET(parse_pi)) 3170 { 3171 if (declaration) 3172 { 3173 // disallow non top-level declarations 3174 if (cursor->parent) PUGI__THROW_ERROR(status_bad_pi, s); 3175 3176 PUGI__PUSHNODE(node_declaration); 3177 } 3178 else 3179 { 3180 PUGI__PUSHNODE(node_pi); 3181 } 3182 3183 cursor->name = target; 3184 3185 PUGI__ENDSEG(); 3186 3187 // parse value/attributes 3188 if (ch == '?') 3189 { 3190 // empty node 3191 if (!PUGI__ENDSWITH(*s, '>')) PUGI__THROW_ERROR(status_bad_pi, s); 3192 s += (*s == '>'); 3193 3194 PUGI__POPNODE(); 3195 } 3196 else if (PUGI__IS_CHARTYPE(ch, ct_space)) 3197 { 3198 PUGI__SKIPWS(); 3199 3200 // scan for tag end 3201 char_t* value = s; 3202 3203 PUGI__SCANFOR(s[0] == '?' && PUGI__ENDSWITH(s[1], '>')); 3204 PUGI__CHECK_ERROR(status_bad_pi, s); 3205 3206 if (declaration) 3207 { 3208 // replace ending ? with / so that 'element' terminates properly 3209 *s = '/'; 3210 3211 // we exit from this function with cursor at node_declaration, which is a signal to parse() to go to LOC_ATTRIBUTES 3212 s = value; 3213 } 3214 else 3215 { 3216 // store value and step over > 3217 cursor->value = value; 3218 3219 PUGI__POPNODE(); 3220 3221 PUGI__ENDSEG(); 3222 3223 s += (*s == '>'); 3224 } 3225 } 3226 else PUGI__THROW_ERROR(status_bad_pi, s); 3227 } 3228 else 3229 { 3230 // scan for tag end 3231 PUGI__SCANFOR(s[0] == '?' && PUGI__ENDSWITH(s[1], '>')); 3232 PUGI__CHECK_ERROR(status_bad_pi, s); 3233 3234 s += (s[1] == '>' ? 2 : 1); 3235 } 3236 3237 // store from registers 3238 ref_cursor = cursor; 3239 3240 return s; 3241 } 3242 parse_treexml_parser3243 char_t* parse_tree(char_t* s, xml_node_struct* root, unsigned int optmsk, char_t endch) 3244 { 3245 strconv_attribute_t strconv_attribute = get_strconv_attribute(optmsk); 3246 strconv_pcdata_t strconv_pcdata = get_strconv_pcdata(optmsk); 3247 3248 char_t ch = 0; 3249 xml_node_struct* cursor = root; 3250 char_t* mark = s; 3251 3252 while (*s != 0) 3253 { 3254 if (*s == '<') 3255 { 3256 ++s; 3257 3258 LOC_TAG: 3259 if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) // '<#...' 3260 { 3261 PUGI__PUSHNODE(node_element); // Append a new node to the tree. 3262 3263 cursor->name = s; 3264 3265 PUGI__SCANWHILE_UNROLL(PUGI__IS_CHARTYPE(ss, ct_symbol)); // Scan for a terminator. 3266 PUGI__ENDSEG(); // Save char in 'ch', terminate & step over. 3267 3268 if (ch == '>') 3269 { 3270 // end of tag 3271 } 3272 else if (PUGI__IS_CHARTYPE(ch, ct_space)) 3273 { 3274 LOC_ATTRIBUTES: 3275 while (true) 3276 { 3277 PUGI__SKIPWS(); // Eat any whitespace. 3278 3279 if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) // <... #... 3280 { 3281 xml_attribute_struct* a = append_new_attribute(cursor, *alloc); // Make space for this attribute. 3282 if (!a) PUGI__THROW_ERROR(status_out_of_memory, s); 3283 3284 a->name = s; // Save the offset. 3285 3286 PUGI__SCANWHILE_UNROLL(PUGI__IS_CHARTYPE(ss, ct_symbol)); // Scan for a terminator. 3287 PUGI__ENDSEG(); // Save char in 'ch', terminate & step over. 3288 3289 if (PUGI__IS_CHARTYPE(ch, ct_space)) 3290 { 3291 PUGI__SKIPWS(); // Eat any whitespace. 3292 3293 ch = *s; 3294 ++s; 3295 } 3296 3297 if (ch == '=') // '<... #=...' 3298 { 3299 PUGI__SKIPWS(); // Eat any whitespace. 3300 3301 if (*s == '"' || *s == '\'') // '<... #="...' 3302 { 3303 ch = *s; // Save quote char to avoid breaking on "''" -or- '""'. 3304 ++s; // Step over the quote. 3305 a->value = s; // Save the offset. 3306 3307 s = strconv_attribute(s, ch); 3308 3309 if (!s) PUGI__THROW_ERROR(status_bad_attribute, a->value); 3310 3311 // After this line the loop continues from the start; 3312 // Whitespaces, / and > are ok, symbols and EOF are wrong, 3313 // everything else will be detected 3314 if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) PUGI__THROW_ERROR(status_bad_attribute, s); 3315 } 3316 else PUGI__THROW_ERROR(status_bad_attribute, s); 3317 } 3318 else PUGI__THROW_ERROR(status_bad_attribute, s); 3319 } 3320 else if (*s == '/') 3321 { 3322 ++s; 3323 3324 if (*s == '>') 3325 { 3326 PUGI__POPNODE(); 3327 s++; 3328 break; 3329 } 3330 else if (*s == 0 && endch == '>') 3331 { 3332 PUGI__POPNODE(); 3333 break; 3334 } 3335 else PUGI__THROW_ERROR(status_bad_start_element, s); 3336 } 3337 else if (*s == '>') 3338 { 3339 ++s; 3340 3341 break; 3342 } 3343 else if (*s == 0 && endch == '>') 3344 { 3345 break; 3346 } 3347 else PUGI__THROW_ERROR(status_bad_start_element, s); 3348 } 3349 3350 // !!! 3351 } 3352 else if (ch == '/') // '<#.../' 3353 { 3354 if (!PUGI__ENDSWITH(*s, '>')) PUGI__THROW_ERROR(status_bad_start_element, s); 3355 3356 PUGI__POPNODE(); // Pop. 3357 3358 s += (*s == '>'); 3359 } 3360 else if (ch == 0) 3361 { 3362 // we stepped over null terminator, backtrack & handle closing tag 3363 --s; 3364 3365 if (endch != '>') PUGI__THROW_ERROR(status_bad_start_element, s); 3366 } 3367 else PUGI__THROW_ERROR(status_bad_start_element, s); 3368 } 3369 else if (*s == '/') 3370 { 3371 ++s; 3372 3373 mark = s; 3374 3375 char_t* name = cursor->name; 3376 if (!name) PUGI__THROW_ERROR(status_end_element_mismatch, mark); 3377 3378 while (PUGI__IS_CHARTYPE(*s, ct_symbol)) 3379 { 3380 if (*s++ != *name++) PUGI__THROW_ERROR(status_end_element_mismatch, mark); 3381 } 3382 3383 if (*name) 3384 { 3385 if (*s == 0 && name[0] == endch && name[1] == 0) PUGI__THROW_ERROR(status_bad_end_element, s); 3386 else PUGI__THROW_ERROR(status_end_element_mismatch, mark); 3387 } 3388 3389 PUGI__POPNODE(); // Pop. 3390 3391 PUGI__SKIPWS(); 3392 3393 if (*s == 0) 3394 { 3395 if (endch != '>') PUGI__THROW_ERROR(status_bad_end_element, s); 3396 } 3397 else 3398 { 3399 if (*s != '>') PUGI__THROW_ERROR(status_bad_end_element, s); 3400 ++s; 3401 } 3402 } 3403 else if (*s == '?') // '<?...' 3404 { 3405 s = parse_question(s, cursor, optmsk, endch); 3406 if (!s) return s; 3407 3408 assert(cursor); 3409 if (PUGI__NODETYPE(cursor) == node_declaration) goto LOC_ATTRIBUTES; 3410 } 3411 else if (*s == '!') // '<!...' 3412 { 3413 s = parse_exclamation(s, cursor, optmsk, endch); 3414 if (!s) return s; 3415 } 3416 else if (*s == 0 && endch == '?') PUGI__THROW_ERROR(status_bad_pi, s); 3417 else PUGI__THROW_ERROR(status_unrecognized_tag, s); 3418 } 3419 else 3420 { 3421 mark = s; // Save this offset while searching for a terminator. 3422 3423 PUGI__SKIPWS(); // Eat whitespace if no genuine PCDATA here. 3424 3425 if (*s == '<' || !*s) 3426 { 3427 // We skipped some whitespace characters because otherwise we would take the tag branch instead of PCDATA one 3428 assert(mark != s); 3429 3430 if (!PUGI__OPTSET(parse_ws_pcdata | parse_ws_pcdata_single) || PUGI__OPTSET(parse_trim_pcdata)) 3431 { 3432 continue; 3433 } 3434 else if (PUGI__OPTSET(parse_ws_pcdata_single)) 3435 { 3436 if (s[0] != '<' || s[1] != '/' || cursor->first_child) continue; 3437 } 3438 } 3439 3440 if (!PUGI__OPTSET(parse_trim_pcdata)) 3441 s = mark; 3442 3443 if (cursor->parent || PUGI__OPTSET(parse_fragment)) 3444 { 3445 if (PUGI__OPTSET(parse_embed_pcdata) && cursor->parent && !cursor->first_child && !cursor->value) 3446 { 3447 cursor->value = s; // Save the offset. 3448 } 3449 else 3450 { 3451 PUGI__PUSHNODE(node_pcdata); // Append a new node on the tree. 3452 3453 cursor->value = s; // Save the offset. 3454 3455 PUGI__POPNODE(); // Pop since this is a standalone. 3456 } 3457 3458 s = strconv_pcdata(s); 3459 3460 if (!*s) break; 3461 } 3462 else 3463 { 3464 PUGI__SCANFOR(*s == '<'); // '...<' 3465 if (!*s) break; 3466 3467 ++s; 3468 } 3469 3470 // We're after '<' 3471 goto LOC_TAG; 3472 } 3473 } 3474 3475 // check that last tag is closed 3476 if (cursor != root) PUGI__THROW_ERROR(status_end_element_mismatch, s); 3477 3478 return s; 3479 } 3480 3481 #ifdef PUGIXML_WCHAR_MODE parse_skip_bomxml_parser3482 static char_t* parse_skip_bom(char_t* s) 3483 { 3484 unsigned int bom = 0xfeff; 3485 return (s[0] == static_cast<wchar_t>(bom)) ? s + 1 : s; 3486 } 3487 #else parse_skip_bomxml_parser3488 static char_t* parse_skip_bom(char_t* s) 3489 { 3490 return (s[0] == '\xef' && s[1] == '\xbb' && s[2] == '\xbf') ? s + 3 : s; 3491 } 3492 #endif 3493 has_element_node_siblingsxml_parser3494 static bool has_element_node_siblings(xml_node_struct* node) 3495 { 3496 while (node) 3497 { 3498 if (PUGI__NODETYPE(node) == node_element) return true; 3499 3500 node = node->next_sibling; 3501 } 3502 3503 return false; 3504 } 3505 parsexml_parser3506 static xml_parse_result parse(char_t* buffer, size_t length, xml_document_struct* xmldoc, xml_node_struct* root, unsigned int optmsk) 3507 { 3508 // early-out for empty documents 3509 if (length == 0) 3510 return make_parse_result(PUGI__OPTSET(parse_fragment) ? status_ok : status_no_document_element); 3511 3512 // get last child of the root before parsing 3513 xml_node_struct* last_root_child = root->first_child ? root->first_child->prev_sibling_c + 0 : 0; 3514 3515 // create parser on stack 3516 xml_parser parser(static_cast<xml_allocator*>(xmldoc)); 3517 3518 // save last character and make buffer zero-terminated (speeds up parsing) 3519 char_t endch = buffer[length - 1]; 3520 buffer[length - 1] = 0; 3521 3522 // skip BOM to make sure it does not end up as part of parse output 3523 char_t* buffer_data = parse_skip_bom(buffer); 3524 3525 // perform actual parsing 3526 parser.parse_tree(buffer_data, root, optmsk, endch); 3527 3528 xml_parse_result result = make_parse_result(parser.error_status, parser.error_offset ? parser.error_offset - buffer : 0); 3529 assert(result.offset >= 0 && static_cast<size_t>(result.offset) <= length); 3530 3531 if (result) 3532 { 3533 // since we removed last character, we have to handle the only possible false positive (stray <) 3534 if (endch == '<') 3535 return make_parse_result(status_unrecognized_tag, length - 1); 3536 3537 // check if there are any element nodes parsed 3538 xml_node_struct* first_root_child_parsed = last_root_child ? last_root_child->next_sibling + 0 : root->first_child+ 0; 3539 3540 if (!PUGI__OPTSET(parse_fragment) && !has_element_node_siblings(first_root_child_parsed)) 3541 return make_parse_result(status_no_document_element, length - 1); 3542 } 3543 else 3544 { 3545 // roll back offset if it occurs on a null terminator in the source buffer 3546 if (result.offset > 0 && static_cast<size_t>(result.offset) == length - 1 && endch == 0) 3547 result.offset--; 3548 } 3549 3550 return result; 3551 } 3552 }; 3553 3554 // Output facilities get_write_native_encoding()3555 PUGI__FN xml_encoding get_write_native_encoding() 3556 { 3557 #ifdef PUGIXML_WCHAR_MODE 3558 return get_wchar_encoding(); 3559 #else 3560 return encoding_utf8; 3561 #endif 3562 } 3563 get_write_encoding(xml_encoding encoding)3564 PUGI__FN xml_encoding get_write_encoding(xml_encoding encoding) 3565 { 3566 // replace wchar encoding with utf implementation 3567 if (encoding == encoding_wchar) return get_wchar_encoding(); 3568 3569 // replace utf16 encoding with utf16 with specific endianness 3570 if (encoding == encoding_utf16) return is_little_endian() ? encoding_utf16_le : encoding_utf16_be; 3571 3572 // replace utf32 encoding with utf32 with specific endianness 3573 if (encoding == encoding_utf32) return is_little_endian() ? encoding_utf32_le : encoding_utf32_be; 3574 3575 // only do autodetection if no explicit encoding is requested 3576 if (encoding != encoding_auto) return encoding; 3577 3578 // assume utf8 encoding 3579 return encoding_utf8; 3580 } 3581 convert_buffer_output_generic(typename T::value_type dest,const char_t * data,size_t length,D,T)3582 template <typename D, typename T> PUGI__FN size_t convert_buffer_output_generic(typename T::value_type dest, const char_t* data, size_t length, D, T) 3583 { 3584 PUGI__STATIC_ASSERT(sizeof(char_t) == sizeof(typename D::type)); 3585 3586 typename T::value_type end = D::process(reinterpret_cast<const typename D::type*>(data), length, dest, T()); 3587 3588 return static_cast<size_t>(end - dest) * sizeof(*dest); 3589 } 3590 convert_buffer_output_generic(typename T::value_type dest,const char_t * data,size_t length,D,T,bool opt_swap)3591 template <typename D, typename T> PUGI__FN size_t convert_buffer_output_generic(typename T::value_type dest, const char_t* data, size_t length, D, T, bool opt_swap) 3592 { 3593 PUGI__STATIC_ASSERT(sizeof(char_t) == sizeof(typename D::type)); 3594 3595 typename T::value_type end = D::process(reinterpret_cast<const typename D::type*>(data), length, dest, T()); 3596 3597 if (opt_swap) 3598 { 3599 for (typename T::value_type i = dest; i != end; ++i) 3600 *i = endian_swap(*i); 3601 } 3602 3603 return static_cast<size_t>(end - dest) * sizeof(*dest); 3604 } 3605 3606 #ifdef PUGIXML_WCHAR_MODE get_valid_length(const char_t * data,size_t length)3607 PUGI__FN size_t get_valid_length(const char_t* data, size_t length) 3608 { 3609 if (length < 1) return 0; 3610 3611 // discard last character if it's the lead of a surrogate pair 3612 return (sizeof(wchar_t) == 2 && static_cast<unsigned int>(static_cast<uint16_t>(data[length - 1]) - 0xD800) < 0x400) ? length - 1 : length; 3613 } 3614 convert_buffer_output(char_t * r_char,uint8_t * r_u8,uint16_t * r_u16,uint32_t * r_u32,const char_t * data,size_t length,xml_encoding encoding)3615 PUGI__FN size_t convert_buffer_output(char_t* r_char, uint8_t* r_u8, uint16_t* r_u16, uint32_t* r_u32, const char_t* data, size_t length, xml_encoding encoding) 3616 { 3617 // only endian-swapping is required 3618 if (need_endian_swap_utf(encoding, get_wchar_encoding())) 3619 { 3620 convert_wchar_endian_swap(r_char, data, length); 3621 3622 return length * sizeof(char_t); 3623 } 3624 3625 // convert to utf8 3626 if (encoding == encoding_utf8) 3627 return convert_buffer_output_generic(r_u8, data, length, wchar_decoder(), utf8_writer()); 3628 3629 // convert to utf16 3630 if (encoding == encoding_utf16_be || encoding == encoding_utf16_le) 3631 { 3632 xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be; 3633 3634 return convert_buffer_output_generic(r_u16, data, length, wchar_decoder(), utf16_writer(), native_encoding != encoding); 3635 } 3636 3637 // convert to utf32 3638 if (encoding == encoding_utf32_be || encoding == encoding_utf32_le) 3639 { 3640 xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be; 3641 3642 return convert_buffer_output_generic(r_u32, data, length, wchar_decoder(), utf32_writer(), native_encoding != encoding); 3643 } 3644 3645 // convert to latin1 3646 if (encoding == encoding_latin1) 3647 return convert_buffer_output_generic(r_u8, data, length, wchar_decoder(), latin1_writer()); 3648 3649 assert(false && "Invalid encoding"); // unreachable 3650 return 0; 3651 } 3652 #else get_valid_length(const char_t * data,size_t length)3653 PUGI__FN size_t get_valid_length(const char_t* data, size_t length) 3654 { 3655 if (length < 5) return 0; 3656 3657 for (size_t i = 1; i <= 4; ++i) 3658 { 3659 uint8_t ch = static_cast<uint8_t>(data[length - i]); 3660 3661 // either a standalone character or a leading one 3662 if ((ch & 0xc0) != 0x80) return length - i; 3663 } 3664 3665 // there are four non-leading characters at the end, sequence tail is broken so might as well process the whole chunk 3666 return length; 3667 } 3668 convert_buffer_output(char_t *,uint8_t * r_u8,uint16_t * r_u16,uint32_t * r_u32,const char_t * data,size_t length,xml_encoding encoding)3669 PUGI__FN size_t convert_buffer_output(char_t* /* r_char */, uint8_t* r_u8, uint16_t* r_u16, uint32_t* r_u32, const char_t* data, size_t length, xml_encoding encoding) 3670 { 3671 if (encoding == encoding_utf16_be || encoding == encoding_utf16_le) 3672 { 3673 xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be; 3674 3675 return convert_buffer_output_generic(r_u16, data, length, utf8_decoder(), utf16_writer(), native_encoding != encoding); 3676 } 3677 3678 if (encoding == encoding_utf32_be || encoding == encoding_utf32_le) 3679 { 3680 xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be; 3681 3682 return convert_buffer_output_generic(r_u32, data, length, utf8_decoder(), utf32_writer(), native_encoding != encoding); 3683 } 3684 3685 if (encoding == encoding_latin1) 3686 return convert_buffer_output_generic(r_u8, data, length, utf8_decoder(), latin1_writer()); 3687 3688 assert(false && "Invalid encoding"); // unreachable 3689 return 0; 3690 } 3691 #endif 3692 3693 class xml_buffered_writer 3694 { 3695 xml_buffered_writer(const xml_buffered_writer&); 3696 xml_buffered_writer& operator=(const xml_buffered_writer&); 3697 3698 public: xml_buffered_writer(xml_writer & writer_,xml_encoding user_encoding)3699 xml_buffered_writer(xml_writer& writer_, xml_encoding user_encoding): writer(writer_), bufsize(0), encoding(get_write_encoding(user_encoding)) 3700 { 3701 PUGI__STATIC_ASSERT(bufcapacity >= 8); 3702 } 3703 flush()3704 size_t flush() 3705 { 3706 flush(buffer, bufsize); 3707 bufsize = 0; 3708 return 0; 3709 } 3710 flush(const char_t * data,size_t size)3711 void flush(const char_t* data, size_t size) 3712 { 3713 if (size == 0) return; 3714 3715 // fast path, just write data 3716 if (encoding == get_write_native_encoding()) 3717 writer.write(data, size * sizeof(char_t)); 3718 else 3719 { 3720 // convert chunk 3721 size_t result = convert_buffer_output(scratch.data_char, scratch.data_u8, scratch.data_u16, scratch.data_u32, data, size, encoding); 3722 assert(result <= sizeof(scratch)); 3723 3724 // write data 3725 writer.write(scratch.data_u8, result); 3726 } 3727 } 3728 write_direct(const char_t * data,size_t length)3729 void write_direct(const char_t* data, size_t length) 3730 { 3731 // flush the remaining buffer contents 3732 flush(); 3733 3734 // handle large chunks 3735 if (length > bufcapacity) 3736 { 3737 if (encoding == get_write_native_encoding()) 3738 { 3739 // fast path, can just write data chunk 3740 writer.write(data, length * sizeof(char_t)); 3741 return; 3742 } 3743 3744 // need to convert in suitable chunks 3745 while (length > bufcapacity) 3746 { 3747 // get chunk size by selecting such number of characters that are guaranteed to fit into scratch buffer 3748 // and form a complete codepoint sequence (i.e. discard start of last codepoint if necessary) 3749 size_t chunk_size = get_valid_length(data, bufcapacity); 3750 assert(chunk_size); 3751 3752 // convert chunk and write 3753 flush(data, chunk_size); 3754 3755 // iterate 3756 data += chunk_size; 3757 length -= chunk_size; 3758 } 3759 3760 // small tail is copied below 3761 bufsize = 0; 3762 } 3763 3764 memcpy(buffer + bufsize, data, length * sizeof(char_t)); 3765 bufsize += length; 3766 } 3767 write_buffer(const char_t * data,size_t length)3768 void write_buffer(const char_t* data, size_t length) 3769 { 3770 size_t offset = bufsize; 3771 3772 if (offset + length <= bufcapacity) 3773 { 3774 memcpy(buffer + offset, data, length * sizeof(char_t)); 3775 bufsize = offset + length; 3776 } 3777 else 3778 { 3779 write_direct(data, length); 3780 } 3781 } 3782 write_string(const char_t * data)3783 void write_string(const char_t* data) 3784 { 3785 // write the part of the string that fits in the buffer 3786 size_t offset = bufsize; 3787 3788 while (*data && offset < bufcapacity) 3789 buffer[offset++] = *data++; 3790 3791 // write the rest 3792 if (offset < bufcapacity) 3793 { 3794 bufsize = offset; 3795 } 3796 else 3797 { 3798 // backtrack a bit if we have split the codepoint 3799 size_t length = offset - bufsize; 3800 size_t extra = length - get_valid_length(data - length, length); 3801 3802 bufsize = offset - extra; 3803 3804 write_direct(data - extra, strlength(data) + extra); 3805 } 3806 } 3807 write(char_t d0)3808 void write(char_t d0) 3809 { 3810 size_t offset = bufsize; 3811 if (offset > bufcapacity - 1) offset = flush(); 3812 3813 buffer[offset + 0] = d0; 3814 bufsize = offset + 1; 3815 } 3816 write(char_t d0,char_t d1)3817 void write(char_t d0, char_t d1) 3818 { 3819 size_t offset = bufsize; 3820 if (offset > bufcapacity - 2) offset = flush(); 3821 3822 buffer[offset + 0] = d0; 3823 buffer[offset + 1] = d1; 3824 bufsize = offset + 2; 3825 } 3826 write(char_t d0,char_t d1,char_t d2)3827 void write(char_t d0, char_t d1, char_t d2) 3828 { 3829 size_t offset = bufsize; 3830 if (offset > bufcapacity - 3) offset = flush(); 3831 3832 buffer[offset + 0] = d0; 3833 buffer[offset + 1] = d1; 3834 buffer[offset + 2] = d2; 3835 bufsize = offset + 3; 3836 } 3837 write(char_t d0,char_t d1,char_t d2,char_t d3)3838 void write(char_t d0, char_t d1, char_t d2, char_t d3) 3839 { 3840 size_t offset = bufsize; 3841 if (offset > bufcapacity - 4) offset = flush(); 3842 3843 buffer[offset + 0] = d0; 3844 buffer[offset + 1] = d1; 3845 buffer[offset + 2] = d2; 3846 buffer[offset + 3] = d3; 3847 bufsize = offset + 4; 3848 } 3849 write(char_t d0,char_t d1,char_t d2,char_t d3,char_t d4)3850 void write(char_t d0, char_t d1, char_t d2, char_t d3, char_t d4) 3851 { 3852 size_t offset = bufsize; 3853 if (offset > bufcapacity - 5) offset = flush(); 3854 3855 buffer[offset + 0] = d0; 3856 buffer[offset + 1] = d1; 3857 buffer[offset + 2] = d2; 3858 buffer[offset + 3] = d3; 3859 buffer[offset + 4] = d4; 3860 bufsize = offset + 5; 3861 } 3862 write(char_t d0,char_t d1,char_t d2,char_t d3,char_t d4,char_t d5)3863 void write(char_t d0, char_t d1, char_t d2, char_t d3, char_t d4, char_t d5) 3864 { 3865 size_t offset = bufsize; 3866 if (offset > bufcapacity - 6) offset = flush(); 3867 3868 buffer[offset + 0] = d0; 3869 buffer[offset + 1] = d1; 3870 buffer[offset + 2] = d2; 3871 buffer[offset + 3] = d3; 3872 buffer[offset + 4] = d4; 3873 buffer[offset + 5] = d5; 3874 bufsize = offset + 6; 3875 } 3876 3877 // utf8 maximum expansion: x4 (-> utf32) 3878 // utf16 maximum expansion: x2 (-> utf32) 3879 // utf32 maximum expansion: x1 3880 enum 3881 { 3882 bufcapacitybytes = 3883 #ifdef PUGIXML_MEMORY_OUTPUT_STACK 3884 PUGIXML_MEMORY_OUTPUT_STACK 3885 #else 3886 10240 3887 #endif 3888 , 3889 bufcapacity = bufcapacitybytes / (sizeof(char_t) + 4) 3890 }; 3891 3892 char_t buffer[bufcapacity]; 3893 3894 union 3895 { 3896 uint8_t data_u8[4 * bufcapacity]; 3897 uint16_t data_u16[2 * bufcapacity]; 3898 uint32_t data_u32[bufcapacity]; 3899 char_t data_char[bufcapacity]; 3900 } scratch; 3901 3902 xml_writer& writer; 3903 size_t bufsize; 3904 xml_encoding encoding; 3905 }; 3906 text_output_escaped(xml_buffered_writer & writer,const char_t * s,chartypex_t type,unsigned int flags)3907 PUGI__FN void text_output_escaped(xml_buffered_writer& writer, const char_t* s, chartypex_t type, unsigned int flags) 3908 { 3909 while (*s) 3910 { 3911 const char_t* prev = s; 3912 3913 // While *s is a usual symbol 3914 PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPEX(ss, type)); 3915 3916 writer.write_buffer(prev, static_cast<size_t>(s - prev)); 3917 3918 switch (*s) 3919 { 3920 case 0: break; 3921 case '&': 3922 writer.write('&', 'a', 'm', 'p', ';'); 3923 ++s; 3924 break; 3925 case '<': 3926 writer.write('&', 'l', 't', ';'); 3927 ++s; 3928 break; 3929 case '>': 3930 writer.write('&', 'g', 't', ';'); 3931 ++s; 3932 break; 3933 case '"': 3934 if (flags & format_attribute_single_quote) 3935 writer.write('"'); 3936 else 3937 writer.write('&', 'q', 'u', 'o', 't', ';'); 3938 ++s; 3939 break; 3940 case '\'': 3941 if (flags & format_attribute_single_quote) 3942 writer.write('&', 'a', 'p', 'o', 's', ';'); 3943 else 3944 writer.write('\''); 3945 ++s; 3946 break; 3947 default: // s is not a usual symbol 3948 { 3949 unsigned int ch = static_cast<unsigned int>(*s++); 3950 assert(ch < 32); 3951 3952 if (!(flags & format_skip_control_chars)) 3953 writer.write('&', '#', static_cast<char_t>((ch / 10) + '0'), static_cast<char_t>((ch % 10) + '0'), ';'); 3954 } 3955 } 3956 } 3957 } 3958 text_output(xml_buffered_writer & writer,const char_t * s,chartypex_t type,unsigned int flags)3959 PUGI__FN void text_output(xml_buffered_writer& writer, const char_t* s, chartypex_t type, unsigned int flags) 3960 { 3961 if (flags & format_no_escapes) 3962 writer.write_string(s); 3963 else 3964 text_output_escaped(writer, s, type, flags); 3965 } 3966 text_output_cdata(xml_buffered_writer & writer,const char_t * s)3967 PUGI__FN void text_output_cdata(xml_buffered_writer& writer, const char_t* s) 3968 { 3969 do 3970 { 3971 writer.write('<', '!', '[', 'C', 'D'); 3972 writer.write('A', 'T', 'A', '['); 3973 3974 const char_t* prev = s; 3975 3976 // look for ]]> sequence - we can't output it as is since it terminates CDATA 3977 while (*s && !(s[0] == ']' && s[1] == ']' && s[2] == '>')) ++s; 3978 3979 // skip ]] if we stopped at ]]>, > will go to the next CDATA section 3980 if (*s) s += 2; 3981 3982 writer.write_buffer(prev, static_cast<size_t>(s - prev)); 3983 3984 writer.write(']', ']', '>'); 3985 } 3986 while (*s); 3987 } 3988 text_output_indent(xml_buffered_writer & writer,const char_t * indent,size_t indent_length,unsigned int depth)3989 PUGI__FN void text_output_indent(xml_buffered_writer& writer, const char_t* indent, size_t indent_length, unsigned int depth) 3990 { 3991 switch (indent_length) 3992 { 3993 case 1: 3994 { 3995 for (unsigned int i = 0; i < depth; ++i) 3996 writer.write(indent[0]); 3997 break; 3998 } 3999 4000 case 2: 4001 { 4002 for (unsigned int i = 0; i < depth; ++i) 4003 writer.write(indent[0], indent[1]); 4004 break; 4005 } 4006 4007 case 3: 4008 { 4009 for (unsigned int i = 0; i < depth; ++i) 4010 writer.write(indent[0], indent[1], indent[2]); 4011 break; 4012 } 4013 4014 case 4: 4015 { 4016 for (unsigned int i = 0; i < depth; ++i) 4017 writer.write(indent[0], indent[1], indent[2], indent[3]); 4018 break; 4019 } 4020 4021 default: 4022 { 4023 for (unsigned int i = 0; i < depth; ++i) 4024 writer.write_buffer(indent, indent_length); 4025 } 4026 } 4027 } 4028 node_output_comment(xml_buffered_writer & writer,const char_t * s)4029 PUGI__FN void node_output_comment(xml_buffered_writer& writer, const char_t* s) 4030 { 4031 writer.write('<', '!', '-', '-'); 4032 4033 while (*s) 4034 { 4035 const char_t* prev = s; 4036 4037 // look for -\0 or -- sequence - we can't output it since -- is illegal in comment body 4038 while (*s && !(s[0] == '-' && (s[1] == '-' || s[1] == 0))) ++s; 4039 4040 writer.write_buffer(prev, static_cast<size_t>(s - prev)); 4041 4042 if (*s) 4043 { 4044 assert(*s == '-'); 4045 4046 writer.write('-', ' '); 4047 ++s; 4048 } 4049 } 4050 4051 writer.write('-', '-', '>'); 4052 } 4053 node_output_pi_value(xml_buffered_writer & writer,const char_t * s)4054 PUGI__FN void node_output_pi_value(xml_buffered_writer& writer, const char_t* s) 4055 { 4056 while (*s) 4057 { 4058 const char_t* prev = s; 4059 4060 // look for ?> sequence - we can't output it since ?> terminates PI 4061 while (*s && !(s[0] == '?' && s[1] == '>')) ++s; 4062 4063 writer.write_buffer(prev, static_cast<size_t>(s - prev)); 4064 4065 if (*s) 4066 { 4067 assert(s[0] == '?' && s[1] == '>'); 4068 4069 writer.write('?', ' ', '>'); 4070 s += 2; 4071 } 4072 } 4073 } 4074 node_output_attributes(xml_buffered_writer & writer,xml_node_struct * node,const char_t * indent,size_t indent_length,unsigned int flags,unsigned int depth)4075 PUGI__FN void node_output_attributes(xml_buffered_writer& writer, xml_node_struct* node, const char_t* indent, size_t indent_length, unsigned int flags, unsigned int depth) 4076 { 4077 const char_t* default_name = PUGIXML_TEXT(":anonymous"); 4078 const char_t enquotation_char = (flags & format_attribute_single_quote) ? '\'' : '"'; 4079 4080 for (xml_attribute_struct* a = node->first_attribute; a; a = a->next_attribute) 4081 { 4082 if ((flags & (format_indent_attributes | format_raw)) == format_indent_attributes) 4083 { 4084 writer.write('\n'); 4085 4086 text_output_indent(writer, indent, indent_length, depth + 1); 4087 } 4088 else 4089 { 4090 writer.write(' '); 4091 } 4092 4093 writer.write_string(a->name ? a->name + 0 : default_name); 4094 writer.write('=', enquotation_char); 4095 4096 if (a->value) 4097 text_output(writer, a->value, ctx_special_attr, flags); 4098 4099 writer.write(enquotation_char); 4100 } 4101 } 4102 node_output_start(xml_buffered_writer & writer,xml_node_struct * node,const char_t * indent,size_t indent_length,unsigned int flags,unsigned int depth)4103 PUGI__FN bool node_output_start(xml_buffered_writer& writer, xml_node_struct* node, const char_t* indent, size_t indent_length, unsigned int flags, unsigned int depth) 4104 { 4105 const char_t* default_name = PUGIXML_TEXT(":anonymous"); 4106 const char_t* name = node->name ? node->name + 0 : default_name; 4107 4108 writer.write('<'); 4109 writer.write_string(name); 4110 4111 if (node->first_attribute) 4112 node_output_attributes(writer, node, indent, indent_length, flags, depth); 4113 4114 // element nodes can have value if parse_embed_pcdata was used 4115 if (!node->value) 4116 { 4117 if (!node->first_child) 4118 { 4119 if (flags & format_no_empty_element_tags) 4120 { 4121 writer.write('>', '<', '/'); 4122 writer.write_string(name); 4123 writer.write('>'); 4124 4125 return false; 4126 } 4127 else 4128 { 4129 if ((flags & format_raw) == 0) 4130 writer.write(' '); 4131 4132 writer.write('/', '>'); 4133 4134 return false; 4135 } 4136 } 4137 else 4138 { 4139 writer.write('>'); 4140 4141 return true; 4142 } 4143 } 4144 else 4145 { 4146 writer.write('>'); 4147 4148 text_output(writer, node->value, ctx_special_pcdata, flags); 4149 4150 if (!node->first_child) 4151 { 4152 writer.write('<', '/'); 4153 writer.write_string(name); 4154 writer.write('>'); 4155 4156 return false; 4157 } 4158 else 4159 { 4160 return true; 4161 } 4162 } 4163 } 4164 node_output_end(xml_buffered_writer & writer,xml_node_struct * node)4165 PUGI__FN void node_output_end(xml_buffered_writer& writer, xml_node_struct* node) 4166 { 4167 const char_t* default_name = PUGIXML_TEXT(":anonymous"); 4168 const char_t* name = node->name ? node->name + 0 : default_name; 4169 4170 writer.write('<', '/'); 4171 writer.write_string(name); 4172 writer.write('>'); 4173 } 4174 node_output_simple(xml_buffered_writer & writer,xml_node_struct * node,unsigned int flags)4175 PUGI__FN void node_output_simple(xml_buffered_writer& writer, xml_node_struct* node, unsigned int flags) 4176 { 4177 const char_t* default_name = PUGIXML_TEXT(":anonymous"); 4178 4179 switch (PUGI__NODETYPE(node)) 4180 { 4181 case node_pcdata: 4182 text_output(writer, node->value ? node->value + 0 : PUGIXML_TEXT(""), ctx_special_pcdata, flags); 4183 break; 4184 4185 case node_cdata: 4186 text_output_cdata(writer, node->value ? node->value + 0 : PUGIXML_TEXT("")); 4187 break; 4188 4189 case node_comment: 4190 node_output_comment(writer, node->value ? node->value + 0 : PUGIXML_TEXT("")); 4191 break; 4192 4193 case node_pi: 4194 writer.write('<', '?'); 4195 writer.write_string(node->name ? node->name + 0 : default_name); 4196 4197 if (node->value) 4198 { 4199 writer.write(' '); 4200 node_output_pi_value(writer, node->value); 4201 } 4202 4203 writer.write('?', '>'); 4204 break; 4205 4206 case node_declaration: 4207 writer.write('<', '?'); 4208 writer.write_string(node->name ? node->name + 0 : default_name); 4209 node_output_attributes(writer, node, PUGIXML_TEXT(""), 0, flags | format_raw, 0); 4210 writer.write('?', '>'); 4211 break; 4212 4213 case node_doctype: 4214 writer.write('<', '!', 'D', 'O', 'C'); 4215 writer.write('T', 'Y', 'P', 'E'); 4216 4217 if (node->value) 4218 { 4219 writer.write(' '); 4220 writer.write_string(node->value); 4221 } 4222 4223 writer.write('>'); 4224 break; 4225 4226 default: 4227 assert(false && "Invalid node type"); // unreachable 4228 } 4229 } 4230 4231 enum indent_flags_t 4232 { 4233 indent_newline = 1, 4234 indent_indent = 2 4235 }; 4236 node_output(xml_buffered_writer & writer,xml_node_struct * root,const char_t * indent,unsigned int flags,unsigned int depth)4237 PUGI__FN void node_output(xml_buffered_writer& writer, xml_node_struct* root, const char_t* indent, unsigned int flags, unsigned int depth) 4238 { 4239 size_t indent_length = ((flags & (format_indent | format_indent_attributes)) && (flags & format_raw) == 0) ? strlength(indent) : 0; 4240 unsigned int indent_flags = indent_indent; 4241 4242 xml_node_struct* node = root; 4243 4244 do 4245 { 4246 assert(node); 4247 4248 // begin writing current node 4249 if (PUGI__NODETYPE(node) == node_pcdata || PUGI__NODETYPE(node) == node_cdata) 4250 { 4251 node_output_simple(writer, node, flags); 4252 4253 indent_flags = 0; 4254 } 4255 else 4256 { 4257 if ((indent_flags & indent_newline) && (flags & format_raw) == 0) 4258 writer.write('\n'); 4259 4260 if ((indent_flags & indent_indent) && indent_length) 4261 text_output_indent(writer, indent, indent_length, depth); 4262 4263 if (PUGI__NODETYPE(node) == node_element) 4264 { 4265 indent_flags = indent_newline | indent_indent; 4266 4267 if (node_output_start(writer, node, indent, indent_length, flags, depth)) 4268 { 4269 // element nodes can have value if parse_embed_pcdata was used 4270 if (node->value) 4271 indent_flags = 0; 4272 4273 node = node->first_child; 4274 depth++; 4275 continue; 4276 } 4277 } 4278 else if (PUGI__NODETYPE(node) == node_document) 4279 { 4280 indent_flags = indent_indent; 4281 4282 if (node->first_child) 4283 { 4284 node = node->first_child; 4285 continue; 4286 } 4287 } 4288 else 4289 { 4290 node_output_simple(writer, node, flags); 4291 4292 indent_flags = indent_newline | indent_indent; 4293 } 4294 } 4295 4296 // continue to the next node 4297 while (node != root) 4298 { 4299 if (node->next_sibling) 4300 { 4301 node = node->next_sibling; 4302 break; 4303 } 4304 4305 node = node->parent; 4306 4307 // write closing node 4308 if (PUGI__NODETYPE(node) == node_element) 4309 { 4310 depth--; 4311 4312 if ((indent_flags & indent_newline) && (flags & format_raw) == 0) 4313 writer.write('\n'); 4314 4315 if ((indent_flags & indent_indent) && indent_length) 4316 text_output_indent(writer, indent, indent_length, depth); 4317 4318 node_output_end(writer, node); 4319 4320 indent_flags = indent_newline | indent_indent; 4321 } 4322 } 4323 } 4324 while (node != root); 4325 4326 if ((indent_flags & indent_newline) && (flags & format_raw) == 0) 4327 writer.write('\n'); 4328 } 4329 has_declaration(xml_node_struct * node)4330 PUGI__FN bool has_declaration(xml_node_struct* node) 4331 { 4332 for (xml_node_struct* child = node->first_child; child; child = child->next_sibling) 4333 { 4334 xml_node_type type = PUGI__NODETYPE(child); 4335 4336 if (type == node_declaration) return true; 4337 if (type == node_element) return false; 4338 } 4339 4340 return false; 4341 } 4342 is_attribute_of(xml_attribute_struct * attr,xml_node_struct * node)4343 PUGI__FN bool is_attribute_of(xml_attribute_struct* attr, xml_node_struct* node) 4344 { 4345 for (xml_attribute_struct* a = node->first_attribute; a; a = a->next_attribute) 4346 if (a == attr) 4347 return true; 4348 4349 return false; 4350 } 4351 allow_insert_attribute(xml_node_type parent)4352 PUGI__FN bool allow_insert_attribute(xml_node_type parent) 4353 { 4354 return parent == node_element || parent == node_declaration; 4355 } 4356 allow_insert_child(xml_node_type parent,xml_node_type child)4357 PUGI__FN bool allow_insert_child(xml_node_type parent, xml_node_type child) 4358 { 4359 if (parent != node_document && parent != node_element) return false; 4360 if (child == node_document || child == node_null) return false; 4361 if (parent != node_document && (child == node_declaration || child == node_doctype)) return false; 4362 4363 return true; 4364 } 4365 allow_move(xml_node parent,xml_node child)4366 PUGI__FN bool allow_move(xml_node parent, xml_node child) 4367 { 4368 // check that child can be a child of parent 4369 if (!allow_insert_child(parent.type(), child.type())) 4370 return false; 4371 4372 // check that node is not moved between documents 4373 if (parent.root() != child.root()) 4374 return false; 4375 4376 // check that new parent is not in the child subtree 4377 xml_node cur = parent; 4378 4379 while (cur) 4380 { 4381 if (cur == child) 4382 return false; 4383 4384 cur = cur.parent(); 4385 } 4386 4387 return true; 4388 } 4389 4390 template <typename String, typename Header> node_copy_string(String & dest,Header & header,uintptr_t header_mask,char_t * source,Header & source_header,xml_allocator * alloc)4391 PUGI__FN void node_copy_string(String& dest, Header& header, uintptr_t header_mask, char_t* source, Header& source_header, xml_allocator* alloc) 4392 { 4393 assert(!dest && (header & header_mask) == 0); 4394 4395 if (source) 4396 { 4397 if (alloc && (source_header & header_mask) == 0) 4398 { 4399 dest = source; 4400 4401 // since strcpy_insitu can reuse document buffer memory we need to mark both source and dest as shared 4402 header |= xml_memory_page_contents_shared_mask; 4403 source_header |= xml_memory_page_contents_shared_mask; 4404 } 4405 else 4406 strcpy_insitu(dest, header, header_mask, source, strlength(source)); 4407 } 4408 } 4409 node_copy_contents(xml_node_struct * dn,xml_node_struct * sn,xml_allocator * shared_alloc)4410 PUGI__FN void node_copy_contents(xml_node_struct* dn, xml_node_struct* sn, xml_allocator* shared_alloc) 4411 { 4412 node_copy_string(dn->name, dn->header, xml_memory_page_name_allocated_mask, sn->name, sn->header, shared_alloc); 4413 node_copy_string(dn->value, dn->header, xml_memory_page_value_allocated_mask, sn->value, sn->header, shared_alloc); 4414 4415 for (xml_attribute_struct* sa = sn->first_attribute; sa; sa = sa->next_attribute) 4416 { 4417 xml_attribute_struct* da = append_new_attribute(dn, get_allocator(dn)); 4418 4419 if (da) 4420 { 4421 node_copy_string(da->name, da->header, xml_memory_page_name_allocated_mask, sa->name, sa->header, shared_alloc); 4422 node_copy_string(da->value, da->header, xml_memory_page_value_allocated_mask, sa->value, sa->header, shared_alloc); 4423 } 4424 } 4425 } 4426 node_copy_tree(xml_node_struct * dn,xml_node_struct * sn)4427 PUGI__FN void node_copy_tree(xml_node_struct* dn, xml_node_struct* sn) 4428 { 4429 xml_allocator& alloc = get_allocator(dn); 4430 xml_allocator* shared_alloc = (&alloc == &get_allocator(sn)) ? &alloc : 0; 4431 4432 node_copy_contents(dn, sn, shared_alloc); 4433 4434 xml_node_struct* dit = dn; 4435 xml_node_struct* sit = sn->first_child; 4436 4437 while (sit && sit != sn) 4438 { 4439 // loop invariant: dit is inside the subtree rooted at dn 4440 assert(dit); 4441 4442 // when a tree is copied into one of the descendants, we need to skip that subtree to avoid an infinite loop 4443 if (sit != dn) 4444 { 4445 xml_node_struct* copy = append_new_node(dit, alloc, PUGI__NODETYPE(sit)); 4446 4447 if (copy) 4448 { 4449 node_copy_contents(copy, sit, shared_alloc); 4450 4451 if (sit->first_child) 4452 { 4453 dit = copy; 4454 sit = sit->first_child; 4455 continue; 4456 } 4457 } 4458 } 4459 4460 // continue to the next node 4461 do 4462 { 4463 if (sit->next_sibling) 4464 { 4465 sit = sit->next_sibling; 4466 break; 4467 } 4468 4469 sit = sit->parent; 4470 dit = dit->parent; 4471 4472 // loop invariant: dit is inside the subtree rooted at dn while sit is inside sn 4473 assert(sit == sn || dit); 4474 } 4475 while (sit != sn); 4476 } 4477 4478 assert(!sit || dit == dn->parent); 4479 } 4480 node_copy_attribute(xml_attribute_struct * da,xml_attribute_struct * sa)4481 PUGI__FN void node_copy_attribute(xml_attribute_struct* da, xml_attribute_struct* sa) 4482 { 4483 xml_allocator& alloc = get_allocator(da); 4484 xml_allocator* shared_alloc = (&alloc == &get_allocator(sa)) ? &alloc : 0; 4485 4486 node_copy_string(da->name, da->header, xml_memory_page_name_allocated_mask, sa->name, sa->header, shared_alloc); 4487 node_copy_string(da->value, da->header, xml_memory_page_value_allocated_mask, sa->value, sa->header, shared_alloc); 4488 } 4489 is_text_node(xml_node_struct * node)4490 inline bool is_text_node(xml_node_struct* node) 4491 { 4492 xml_node_type type = PUGI__NODETYPE(node); 4493 4494 return type == node_pcdata || type == node_cdata; 4495 } 4496 4497 // get value with conversion functions string_to_integer(const char_t * value,U minv,U maxv)4498 template <typename U> PUGI__FN PUGI__UNSIGNED_OVERFLOW U string_to_integer(const char_t* value, U minv, U maxv) 4499 { 4500 U result = 0; 4501 const char_t* s = value; 4502 4503 while (PUGI__IS_CHARTYPE(*s, ct_space)) 4504 s++; 4505 4506 bool negative = (*s == '-'); 4507 4508 s += (*s == '+' || *s == '-'); 4509 4510 bool overflow = false; 4511 4512 if (s[0] == '0' && (s[1] | ' ') == 'x') 4513 { 4514 s += 2; 4515 4516 // since overflow detection relies on length of the sequence skip leading zeros 4517 while (*s == '0') 4518 s++; 4519 4520 const char_t* start = s; 4521 4522 for (;;) 4523 { 4524 if (static_cast<unsigned>(*s - '0') < 10) 4525 result = result * 16 + (*s - '0'); 4526 else if (static_cast<unsigned>((*s | ' ') - 'a') < 6) 4527 result = result * 16 + ((*s | ' ') - 'a' + 10); 4528 else 4529 break; 4530 4531 s++; 4532 } 4533 4534 size_t digits = static_cast<size_t>(s - start); 4535 4536 overflow = digits > sizeof(U) * 2; 4537 } 4538 else 4539 { 4540 // since overflow detection relies on length of the sequence skip leading zeros 4541 while (*s == '0') 4542 s++; 4543 4544 const char_t* start = s; 4545 4546 for (;;) 4547 { 4548 if (static_cast<unsigned>(*s - '0') < 10) 4549 result = result * 10 + (*s - '0'); 4550 else 4551 break; 4552 4553 s++; 4554 } 4555 4556 size_t digits = static_cast<size_t>(s - start); 4557 4558 PUGI__STATIC_ASSERT(sizeof(U) == 8 || sizeof(U) == 4 || sizeof(U) == 2); 4559 4560 const size_t max_digits10 = sizeof(U) == 8 ? 20 : sizeof(U) == 4 ? 10 : 5; 4561 const char_t max_lead = sizeof(U) == 8 ? '1' : sizeof(U) == 4 ? '4' : '6'; 4562 const size_t high_bit = sizeof(U) * 8 - 1; 4563 4564 overflow = digits >= max_digits10 && !(digits == max_digits10 && (*start < max_lead || (*start == max_lead && result >> high_bit))); 4565 } 4566 4567 if (negative) 4568 { 4569 // Workaround for crayc++ CC-3059: Expected no overflow in routine. 4570 #ifdef _CRAYC 4571 return (overflow || result > ~minv + 1) ? minv : ~result + 1; 4572 #else 4573 return (overflow || result > 0 - minv) ? minv : 0 - result; 4574 #endif 4575 } 4576 else 4577 return (overflow || result > maxv) ? maxv : result; 4578 } 4579 get_value_int(const char_t * value)4580 PUGI__FN int get_value_int(const char_t* value) 4581 { 4582 return string_to_integer<unsigned int>(value, static_cast<unsigned int>(INT_MIN), INT_MAX); 4583 } 4584 get_value_uint(const char_t * value)4585 PUGI__FN unsigned int get_value_uint(const char_t* value) 4586 { 4587 return string_to_integer<unsigned int>(value, 0, UINT_MAX); 4588 } 4589 get_value_double(const char_t * value)4590 PUGI__FN double get_value_double(const char_t* value) 4591 { 4592 #ifdef PUGIXML_WCHAR_MODE 4593 return wcstod(value, 0); 4594 #else 4595 return strtod(value, 0); 4596 #endif 4597 } 4598 get_value_float(const char_t * value)4599 PUGI__FN float get_value_float(const char_t* value) 4600 { 4601 #ifdef PUGIXML_WCHAR_MODE 4602 return static_cast<float>(wcstod(value, 0)); 4603 #else 4604 return static_cast<float>(strtod(value, 0)); 4605 #endif 4606 } 4607 get_value_bool(const char_t * value)4608 PUGI__FN bool get_value_bool(const char_t* value) 4609 { 4610 // only look at first char 4611 char_t first = *value; 4612 4613 // 1*, t* (true), T* (True), y* (yes), Y* (YES) 4614 return (first == '1' || first == 't' || first == 'T' || first == 'y' || first == 'Y'); 4615 } 4616 4617 #ifdef PUGIXML_HAS_LONG_LONG get_value_llong(const char_t * value)4618 PUGI__FN long long get_value_llong(const char_t* value) 4619 { 4620 return string_to_integer<unsigned long long>(value, static_cast<unsigned long long>(LLONG_MIN), LLONG_MAX); 4621 } 4622 get_value_ullong(const char_t * value)4623 PUGI__FN unsigned long long get_value_ullong(const char_t* value) 4624 { 4625 return string_to_integer<unsigned long long>(value, 0, ULLONG_MAX); 4626 } 4627 #endif 4628 integer_to_string(char_t * begin,char_t * end,U value,bool negative)4629 template <typename U> PUGI__FN PUGI__UNSIGNED_OVERFLOW char_t* integer_to_string(char_t* begin, char_t* end, U value, bool negative) 4630 { 4631 char_t* result = end - 1; 4632 U rest = negative ? 0 - value : value; 4633 4634 do 4635 { 4636 *result-- = static_cast<char_t>('0' + (rest % 10)); 4637 rest /= 10; 4638 } 4639 while (rest); 4640 4641 assert(result >= begin); 4642 (void)begin; 4643 4644 *result = '-'; 4645 4646 return result + !negative; 4647 } 4648 4649 // set value with conversion functions 4650 template <typename String, typename Header> set_value_ascii(String & dest,Header & header,uintptr_t header_mask,char * buf)4651 PUGI__FN bool set_value_ascii(String& dest, Header& header, uintptr_t header_mask, char* buf) 4652 { 4653 #ifdef PUGIXML_WCHAR_MODE 4654 char_t wbuf[128]; 4655 assert(strlen(buf) < sizeof(wbuf) / sizeof(wbuf[0])); 4656 4657 size_t offset = 0; 4658 for (; buf[offset]; ++offset) wbuf[offset] = buf[offset]; 4659 4660 return strcpy_insitu(dest, header, header_mask, wbuf, offset); 4661 #else 4662 return strcpy_insitu(dest, header, header_mask, buf, strlen(buf)); 4663 #endif 4664 } 4665 4666 template <typename U, typename String, typename Header> set_value_integer(String & dest,Header & header,uintptr_t header_mask,U value,bool negative)4667 PUGI__FN bool set_value_integer(String& dest, Header& header, uintptr_t header_mask, U value, bool negative) 4668 { 4669 char_t buf[64]; 4670 char_t* end = buf + sizeof(buf) / sizeof(buf[0]); 4671 char_t* begin = integer_to_string(buf, end, value, negative); 4672 4673 return strcpy_insitu(dest, header, header_mask, begin, end - begin); 4674 } 4675 4676 template <typename String, typename Header> set_value_convert(String & dest,Header & header,uintptr_t header_mask,float value,int precision)4677 PUGI__FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, float value, int precision) 4678 { 4679 char buf[128]; 4680 PUGI__SNPRINTF(buf, "%.*g", precision, double(value)); 4681 4682 return set_value_ascii(dest, header, header_mask, buf); 4683 } 4684 4685 template <typename String, typename Header> set_value_convert(String & dest,Header & header,uintptr_t header_mask,double value,int precision)4686 PUGI__FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, double value, int precision) 4687 { 4688 char buf[128]; 4689 PUGI__SNPRINTF(buf, "%.*g", precision, value); 4690 4691 return set_value_ascii(dest, header, header_mask, buf); 4692 } 4693 4694 template <typename String, typename Header> set_value_bool(String & dest,Header & header,uintptr_t header_mask,bool value)4695 PUGI__FN bool set_value_bool(String& dest, Header& header, uintptr_t header_mask, bool value) 4696 { 4697 return strcpy_insitu(dest, header, header_mask, value ? PUGIXML_TEXT("true") : PUGIXML_TEXT("false"), value ? 4 : 5); 4698 } 4699 load_buffer_impl(xml_document_struct * doc,xml_node_struct * root,void * contents,size_t size,unsigned int options,xml_encoding encoding,bool is_mutable,bool own,char_t ** out_buffer)4700 PUGI__FN xml_parse_result load_buffer_impl(xml_document_struct* doc, xml_node_struct* root, void* contents, size_t size, unsigned int options, xml_encoding encoding, bool is_mutable, bool own, char_t** out_buffer) 4701 { 4702 // check input buffer 4703 if (!contents && size) return make_parse_result(status_io_error); 4704 4705 // get actual encoding 4706 xml_encoding buffer_encoding = impl::get_buffer_encoding(encoding, contents, size); 4707 4708 // get private buffer 4709 char_t* buffer = 0; 4710 size_t length = 0; 4711 4712 // coverity[var_deref_model] 4713 if (!impl::convert_buffer(buffer, length, buffer_encoding, contents, size, is_mutable)) return impl::make_parse_result(status_out_of_memory); 4714 4715 // delete original buffer if we performed a conversion 4716 if (own && buffer != contents && contents) impl::xml_memory::deallocate(contents); 4717 4718 // grab onto buffer if it's our buffer, user is responsible for deallocating contents himself 4719 if (own || buffer != contents) *out_buffer = buffer; 4720 4721 // store buffer for offset_debug 4722 doc->buffer = buffer; 4723 4724 // parse 4725 xml_parse_result res = impl::xml_parser::parse(buffer, length, doc, root, options); 4726 4727 // remember encoding 4728 res.encoding = buffer_encoding; 4729 4730 return res; 4731 } 4732 4733 // we need to get length of entire file to load it in memory; the only (relatively) sane way to do it is via seek/tell trick get_file_size(FILE * file,size_t & out_result)4734 PUGI__FN xml_parse_status get_file_size(FILE* file, size_t& out_result) 4735 { 4736 #if defined(PUGI__MSVC_CRT_VERSION) && PUGI__MSVC_CRT_VERSION >= 1400 && !defined(_WIN32_WCE) 4737 // there are 64-bit versions of fseek/ftell, let's use them 4738 typedef __int64 length_type; 4739 4740 _fseeki64(file, 0, SEEK_END); 4741 length_type length = _ftelli64(file); 4742 _fseeki64(file, 0, SEEK_SET); 4743 #elif defined(__MINGW32__) && !defined(__NO_MINGW_LFS) && (!defined(__STRICT_ANSI__) || defined(__MINGW64_VERSION_MAJOR)) 4744 // there are 64-bit versions of fseek/ftell, let's use them 4745 typedef off64_t length_type; 4746 4747 fseeko64(file, 0, SEEK_END); 4748 length_type length = ftello64(file); 4749 fseeko64(file, 0, SEEK_SET); 4750 #else 4751 // if this is a 32-bit OS, long is enough; if this is a unix system, long is 64-bit, which is enough; otherwise we can't do anything anyway. 4752 typedef long length_type; 4753 4754 fseek(file, 0, SEEK_END); 4755 length_type length = ftell(file); 4756 fseek(file, 0, SEEK_SET); 4757 #endif 4758 4759 // check for I/O errors 4760 if (length < 0) return status_io_error; 4761 4762 // check for overflow 4763 size_t result = static_cast<size_t>(length); 4764 4765 if (static_cast<length_type>(result) != length) return status_out_of_memory; 4766 4767 // finalize 4768 out_result = result; 4769 4770 return status_ok; 4771 } 4772 4773 // This function assumes that buffer has extra sizeof(char_t) writable bytes after size zero_terminate_buffer(void * buffer,size_t size,xml_encoding encoding)4774 PUGI__FN size_t zero_terminate_buffer(void* buffer, size_t size, xml_encoding encoding) 4775 { 4776 // We only need to zero-terminate if encoding conversion does not do it for us 4777 #ifdef PUGIXML_WCHAR_MODE 4778 xml_encoding wchar_encoding = get_wchar_encoding(); 4779 4780 if (encoding == wchar_encoding || need_endian_swap_utf(encoding, wchar_encoding)) 4781 { 4782 size_t length = size / sizeof(char_t); 4783 4784 static_cast<char_t*>(buffer)[length] = 0; 4785 return (length + 1) * sizeof(char_t); 4786 } 4787 #else 4788 if (encoding == encoding_utf8) 4789 { 4790 static_cast<char*>(buffer)[size] = 0; 4791 return size + 1; 4792 } 4793 #endif 4794 4795 return size; 4796 } 4797 load_file_impl(xml_document_struct * doc,FILE * file,unsigned int options,xml_encoding encoding,char_t ** out_buffer)4798 PUGI__FN xml_parse_result load_file_impl(xml_document_struct* doc, FILE* file, unsigned int options, xml_encoding encoding, char_t** out_buffer) 4799 { 4800 if (!file) return make_parse_result(status_file_not_found); 4801 4802 // get file size (can result in I/O errors) 4803 size_t size = 0; 4804 xml_parse_status size_status = get_file_size(file, size); 4805 if (size_status != status_ok) return make_parse_result(size_status); 4806 4807 size_t max_suffix_size = sizeof(char_t); 4808 4809 // allocate buffer for the whole file 4810 char* contents = static_cast<char*>(xml_memory::allocate(size + max_suffix_size)); 4811 if (!contents) return make_parse_result(status_out_of_memory); 4812 4813 // read file in memory 4814 size_t read_size = fread(contents, 1, size, file); 4815 4816 if (read_size != size) 4817 { 4818 xml_memory::deallocate(contents); 4819 return make_parse_result(status_io_error); 4820 } 4821 4822 xml_encoding real_encoding = get_buffer_encoding(encoding, contents, size); 4823 4824 return load_buffer_impl(doc, doc, contents, zero_terminate_buffer(contents, size, real_encoding), options, real_encoding, true, true, out_buffer); 4825 } 4826 close_file(FILE * file)4827 PUGI__FN void close_file(FILE* file) 4828 { 4829 fclose(file); 4830 } 4831 4832 #ifndef PUGIXML_NO_STL 4833 template <typename T> struct xml_stream_chunk 4834 { createxml_stream_chunk4835 static xml_stream_chunk* create() 4836 { 4837 void* memory = xml_memory::allocate(sizeof(xml_stream_chunk)); 4838 if (!memory) return 0; 4839 4840 return new (memory) xml_stream_chunk(); 4841 } 4842 destroyxml_stream_chunk4843 static void destroy(xml_stream_chunk* chunk) 4844 { 4845 // free chunk chain 4846 while (chunk) 4847 { 4848 xml_stream_chunk* next_ = chunk->next; 4849 4850 xml_memory::deallocate(chunk); 4851 4852 chunk = next_; 4853 } 4854 } 4855 xml_stream_chunkxml_stream_chunk4856 xml_stream_chunk(): next(0), size(0) 4857 { 4858 } 4859 4860 xml_stream_chunk* next; 4861 size_t size; 4862 4863 T data[xml_memory_page_size / sizeof(T)]; 4864 }; 4865 load_stream_data_noseek(std::basic_istream<T> & stream,void ** out_buffer,size_t * out_size)4866 template <typename T> PUGI__FN xml_parse_status load_stream_data_noseek(std::basic_istream<T>& stream, void** out_buffer, size_t* out_size) 4867 { 4868 auto_deleter<xml_stream_chunk<T> > chunks(0, xml_stream_chunk<T>::destroy); 4869 4870 // read file to a chunk list 4871 size_t total = 0; 4872 xml_stream_chunk<T>* last = 0; 4873 4874 while (!stream.eof()) 4875 { 4876 // allocate new chunk 4877 xml_stream_chunk<T>* chunk = xml_stream_chunk<T>::create(); 4878 if (!chunk) return status_out_of_memory; 4879 4880 // append chunk to list 4881 if (last) last = last->next = chunk; 4882 else chunks.data = last = chunk; 4883 4884 // read data to chunk 4885 stream.read(chunk->data, static_cast<std::streamsize>(sizeof(chunk->data) / sizeof(T))); 4886 chunk->size = static_cast<size_t>(stream.gcount()) * sizeof(T); 4887 4888 // read may set failbit | eofbit in case gcount() is less than read length, so check for other I/O errors 4889 if (stream.bad() || (!stream.eof() && stream.fail())) return status_io_error; 4890 4891 // guard against huge files (chunk size is small enough to make this overflow check work) 4892 if (total + chunk->size < total) return status_out_of_memory; 4893 total += chunk->size; 4894 } 4895 4896 size_t max_suffix_size = sizeof(char_t); 4897 4898 // copy chunk list to a contiguous buffer 4899 char* buffer = static_cast<char*>(xml_memory::allocate(total + max_suffix_size)); 4900 if (!buffer) return status_out_of_memory; 4901 4902 char* write = buffer; 4903 4904 for (xml_stream_chunk<T>* chunk = chunks.data; chunk; chunk = chunk->next) 4905 { 4906 assert(write + chunk->size <= buffer + total); 4907 memcpy(write, chunk->data, chunk->size); 4908 write += chunk->size; 4909 } 4910 4911 assert(write == buffer + total); 4912 4913 // return buffer 4914 *out_buffer = buffer; 4915 *out_size = total; 4916 4917 return status_ok; 4918 } 4919 load_stream_data_seek(std::basic_istream<T> & stream,void ** out_buffer,size_t * out_size)4920 template <typename T> PUGI__FN xml_parse_status load_stream_data_seek(std::basic_istream<T>& stream, void** out_buffer, size_t* out_size) 4921 { 4922 // get length of remaining data in stream 4923 typename std::basic_istream<T>::pos_type pos = stream.tellg(); 4924 stream.seekg(0, std::ios::end); 4925 std::streamoff length = stream.tellg() - pos; 4926 stream.seekg(pos); 4927 4928 if (stream.fail() || pos < 0) return status_io_error; 4929 4930 // guard against huge files 4931 size_t read_length = static_cast<size_t>(length); 4932 4933 if (static_cast<std::streamsize>(read_length) != length || length < 0) return status_out_of_memory; 4934 4935 size_t max_suffix_size = sizeof(char_t); 4936 4937 // read stream data into memory (guard against stream exceptions with buffer holder) 4938 auto_deleter<void> buffer(xml_memory::allocate(read_length * sizeof(T) + max_suffix_size), xml_memory::deallocate); 4939 if (!buffer.data) return status_out_of_memory; 4940 4941 stream.read(static_cast<T*>(buffer.data), static_cast<std::streamsize>(read_length)); 4942 4943 // read may set failbit | eofbit in case gcount() is less than read_length (i.e. line ending conversion), so check for other I/O errors 4944 if (stream.bad() || (!stream.eof() && stream.fail())) return status_io_error; 4945 4946 // return buffer 4947 size_t actual_length = static_cast<size_t>(stream.gcount()); 4948 assert(actual_length <= read_length); 4949 4950 *out_buffer = buffer.release(); 4951 *out_size = actual_length * sizeof(T); 4952 4953 return status_ok; 4954 } 4955 load_stream_impl(xml_document_struct * doc,std::basic_istream<T> & stream,unsigned int options,xml_encoding encoding,char_t ** out_buffer)4956 template <typename T> PUGI__FN xml_parse_result load_stream_impl(xml_document_struct* doc, std::basic_istream<T>& stream, unsigned int options, xml_encoding encoding, char_t** out_buffer) 4957 { 4958 void* buffer = 0; 4959 size_t size = 0; 4960 xml_parse_status status = status_ok; 4961 4962 // if stream has an error bit set, bail out (otherwise tellg() can fail and we'll clear error bits) 4963 if (stream.fail()) return make_parse_result(status_io_error); 4964 4965 // load stream to memory (using seek-based implementation if possible, since it's faster and takes less memory) 4966 if (stream.tellg() < 0) 4967 { 4968 stream.clear(); // clear error flags that could be set by a failing tellg 4969 status = load_stream_data_noseek(stream, &buffer, &size); 4970 } 4971 else 4972 status = load_stream_data_seek(stream, &buffer, &size); 4973 4974 if (status != status_ok) return make_parse_result(status); 4975 4976 xml_encoding real_encoding = get_buffer_encoding(encoding, buffer, size); 4977 4978 return load_buffer_impl(doc, doc, buffer, zero_terminate_buffer(buffer, size, real_encoding), options, real_encoding, true, true, out_buffer); 4979 } 4980 #endif 4981 4982 #if defined(PUGI__MSVC_CRT_VERSION) || defined(__BORLANDC__) || (defined(__MINGW32__) && (!defined(__STRICT_ANSI__) || defined(__MINGW64_VERSION_MAJOR))) open_file_wide(const wchar_t * path,const wchar_t * mode)4983 PUGI__FN FILE* open_file_wide(const wchar_t* path, const wchar_t* mode) 4984 { 4985 #if defined(PUGI__MSVC_CRT_VERSION) && PUGI__MSVC_CRT_VERSION >= 1400 4986 FILE* file = 0; 4987 return _wfopen_s(&file, path, mode) == 0 ? file : 0; 4988 #else 4989 return _wfopen(path, mode); 4990 #endif 4991 } 4992 #else convert_path_heap(const wchar_t * str)4993 PUGI__FN char* convert_path_heap(const wchar_t* str) 4994 { 4995 assert(str); 4996 4997 // first pass: get length in utf8 characters 4998 size_t length = strlength_wide(str); 4999 size_t size = as_utf8_begin(str, length); 5000 5001 // allocate resulting string 5002 char* result = static_cast<char*>(xml_memory::allocate(size + 1)); 5003 if (!result) return 0; 5004 5005 // second pass: convert to utf8 5006 as_utf8_end(result, size, str, length); 5007 5008 // zero-terminate 5009 result[size] = 0; 5010 5011 return result; 5012 } 5013 open_file_wide(const wchar_t * path,const wchar_t * mode)5014 PUGI__FN FILE* open_file_wide(const wchar_t* path, const wchar_t* mode) 5015 { 5016 // there is no standard function to open wide paths, so our best bet is to try utf8 path 5017 char* path_utf8 = convert_path_heap(path); 5018 if (!path_utf8) return 0; 5019 5020 // convert mode to ASCII (we mirror _wfopen interface) 5021 char mode_ascii[4] = {0}; 5022 for (size_t i = 0; mode[i]; ++i) mode_ascii[i] = static_cast<char>(mode[i]); 5023 5024 // try to open the utf8 path 5025 FILE* result = fopen(path_utf8, mode_ascii); 5026 5027 // free dummy buffer 5028 xml_memory::deallocate(path_utf8); 5029 5030 return result; 5031 } 5032 #endif 5033 open_file(const char * path,const char * mode)5034 PUGI__FN FILE* open_file(const char* path, const char* mode) 5035 { 5036 #if defined(PUGI__MSVC_CRT_VERSION) && PUGI__MSVC_CRT_VERSION >= 1400 5037 FILE* file = 0; 5038 return fopen_s(&file, path, mode) == 0 ? file : 0; 5039 #else 5040 return fopen(path, mode); 5041 #endif 5042 } 5043 save_file_impl(const xml_document & doc,FILE * file,const char_t * indent,unsigned int flags,xml_encoding encoding)5044 PUGI__FN bool save_file_impl(const xml_document& doc, FILE* file, const char_t* indent, unsigned int flags, xml_encoding encoding) 5045 { 5046 if (!file) return false; 5047 5048 xml_writer_file writer(file); 5049 doc.save(writer, indent, flags, encoding); 5050 5051 return ferror(file) == 0; 5052 } 5053 5054 struct name_null_sentry 5055 { 5056 xml_node_struct* node; 5057 char_t* name; 5058 name_null_sentryname_null_sentry5059 name_null_sentry(xml_node_struct* node_): node(node_), name(node_->name) 5060 { 5061 node->name = 0; 5062 } 5063 ~name_null_sentryname_null_sentry5064 ~name_null_sentry() 5065 { 5066 node->name = name; 5067 } 5068 }; 5069 PUGI__NS_END 5070 5071 namespace pugi 5072 { xml_writer_file(void * file_)5073 PUGI__FN xml_writer_file::xml_writer_file(void* file_): file(file_) 5074 { 5075 } 5076 write(const void * data,size_t size)5077 PUGI__FN void xml_writer_file::write(const void* data, size_t size) 5078 { 5079 size_t result = fwrite(data, 1, size, static_cast<FILE*>(file)); 5080 (void)!result; // unfortunately we can't do proper error handling here 5081 } 5082 5083 #ifndef PUGIXML_NO_STL xml_writer_stream(std::basic_ostream<char,std::char_traits<char>> & stream)5084 PUGI__FN xml_writer_stream::xml_writer_stream(std::basic_ostream<char, std::char_traits<char> >& stream): narrow_stream(&stream), wide_stream(0) 5085 { 5086 } 5087 xml_writer_stream(std::basic_ostream<wchar_t,std::char_traits<wchar_t>> & stream)5088 PUGI__FN xml_writer_stream::xml_writer_stream(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream): narrow_stream(0), wide_stream(&stream) 5089 { 5090 } 5091 write(const void * data,size_t size)5092 PUGI__FN void xml_writer_stream::write(const void* data, size_t size) 5093 { 5094 if (narrow_stream) 5095 { 5096 assert(!wide_stream); 5097 narrow_stream->write(reinterpret_cast<const char*>(data), static_cast<std::streamsize>(size)); 5098 } 5099 else 5100 { 5101 assert(wide_stream); 5102 assert(size % sizeof(wchar_t) == 0); 5103 5104 wide_stream->write(reinterpret_cast<const wchar_t*>(data), static_cast<std::streamsize>(size / sizeof(wchar_t))); 5105 } 5106 } 5107 #endif 5108 xml_tree_walker()5109 PUGI__FN xml_tree_walker::xml_tree_walker(): _depth(0) 5110 { 5111 } 5112 ~xml_tree_walker()5113 PUGI__FN xml_tree_walker::~xml_tree_walker() 5114 { 5115 } 5116 depth() const5117 PUGI__FN int xml_tree_walker::depth() const 5118 { 5119 return _depth; 5120 } 5121 begin(xml_node &)5122 PUGI__FN bool xml_tree_walker::begin(xml_node&) 5123 { 5124 return true; 5125 } 5126 end(xml_node &)5127 PUGI__FN bool xml_tree_walker::end(xml_node&) 5128 { 5129 return true; 5130 } 5131 xml_attribute()5132 PUGI__FN xml_attribute::xml_attribute(): _attr(0) 5133 { 5134 } 5135 xml_attribute(xml_attribute_struct * attr)5136 PUGI__FN xml_attribute::xml_attribute(xml_attribute_struct* attr): _attr(attr) 5137 { 5138 } 5139 unspecified_bool_xml_attribute(xml_attribute ***)5140 PUGI__FN static void unspecified_bool_xml_attribute(xml_attribute***) 5141 { 5142 } 5143 operator xml_attribute::unspecified_bool_type() const5144 PUGI__FN xml_attribute::operator xml_attribute::unspecified_bool_type() const 5145 { 5146 return _attr ? unspecified_bool_xml_attribute : 0; 5147 } 5148 operator !() const5149 PUGI__FN bool xml_attribute::operator!() const 5150 { 5151 return !_attr; 5152 } 5153 operator ==(const xml_attribute & r) const5154 PUGI__FN bool xml_attribute::operator==(const xml_attribute& r) const 5155 { 5156 return (_attr == r._attr); 5157 } 5158 operator !=(const xml_attribute & r) const5159 PUGI__FN bool xml_attribute::operator!=(const xml_attribute& r) const 5160 { 5161 return (_attr != r._attr); 5162 } 5163 operator <(const xml_attribute & r) const5164 PUGI__FN bool xml_attribute::operator<(const xml_attribute& r) const 5165 { 5166 return (_attr < r._attr); 5167 } 5168 operator >(const xml_attribute & r) const5169 PUGI__FN bool xml_attribute::operator>(const xml_attribute& r) const 5170 { 5171 return (_attr > r._attr); 5172 } 5173 operator <=(const xml_attribute & r) const5174 PUGI__FN bool xml_attribute::operator<=(const xml_attribute& r) const 5175 { 5176 return (_attr <= r._attr); 5177 } 5178 operator >=(const xml_attribute & r) const5179 PUGI__FN bool xml_attribute::operator>=(const xml_attribute& r) const 5180 { 5181 return (_attr >= r._attr); 5182 } 5183 next_attribute() const5184 PUGI__FN xml_attribute xml_attribute::next_attribute() const 5185 { 5186 return _attr ? xml_attribute(_attr->next_attribute) : xml_attribute(); 5187 } 5188 previous_attribute() const5189 PUGI__FN xml_attribute xml_attribute::previous_attribute() const 5190 { 5191 return _attr && _attr->prev_attribute_c->next_attribute ? xml_attribute(_attr->prev_attribute_c) : xml_attribute(); 5192 } 5193 as_string(const char_t * def) const5194 PUGI__FN const char_t* xml_attribute::as_string(const char_t* def) const 5195 { 5196 return (_attr && _attr->value) ? _attr->value + 0 : def; 5197 } 5198 as_int(int def) const5199 PUGI__FN int xml_attribute::as_int(int def) const 5200 { 5201 return (_attr && _attr->value) ? impl::get_value_int(_attr->value) : def; 5202 } 5203 as_uint(unsigned int def) const5204 PUGI__FN unsigned int xml_attribute::as_uint(unsigned int def) const 5205 { 5206 return (_attr && _attr->value) ? impl::get_value_uint(_attr->value) : def; 5207 } 5208 as_double(double def) const5209 PUGI__FN double xml_attribute::as_double(double def) const 5210 { 5211 return (_attr && _attr->value) ? impl::get_value_double(_attr->value) : def; 5212 } 5213 as_float(float def) const5214 PUGI__FN float xml_attribute::as_float(float def) const 5215 { 5216 return (_attr && _attr->value) ? impl::get_value_float(_attr->value) : def; 5217 } 5218 as_bool(bool def) const5219 PUGI__FN bool xml_attribute::as_bool(bool def) const 5220 { 5221 return (_attr && _attr->value) ? impl::get_value_bool(_attr->value) : def; 5222 } 5223 5224 #ifdef PUGIXML_HAS_LONG_LONG as_llong(long long def) const5225 PUGI__FN long long xml_attribute::as_llong(long long def) const 5226 { 5227 return (_attr && _attr->value) ? impl::get_value_llong(_attr->value) : def; 5228 } 5229 as_ullong(unsigned long long def) const5230 PUGI__FN unsigned long long xml_attribute::as_ullong(unsigned long long def) const 5231 { 5232 return (_attr && _attr->value) ? impl::get_value_ullong(_attr->value) : def; 5233 } 5234 #endif 5235 empty() const5236 PUGI__FN bool xml_attribute::empty() const 5237 { 5238 return !_attr; 5239 } 5240 name() const5241 PUGI__FN const char_t* xml_attribute::name() const 5242 { 5243 return (_attr && _attr->name) ? _attr->name + 0 : PUGIXML_TEXT(""); 5244 } 5245 value() const5246 PUGI__FN const char_t* xml_attribute::value() const 5247 { 5248 return (_attr && _attr->value) ? _attr->value + 0 : PUGIXML_TEXT(""); 5249 } 5250 hash_value() const5251 PUGI__FN size_t xml_attribute::hash_value() const 5252 { 5253 return static_cast<size_t>(reinterpret_cast<uintptr_t>(_attr) / sizeof(xml_attribute_struct)); 5254 } 5255 internal_object() const5256 PUGI__FN xml_attribute_struct* xml_attribute::internal_object() const 5257 { 5258 return _attr; 5259 } 5260 operator =(const char_t * rhs)5261 PUGI__FN xml_attribute& xml_attribute::operator=(const char_t* rhs) 5262 { 5263 set_value(rhs); 5264 return *this; 5265 } 5266 operator =(int rhs)5267 PUGI__FN xml_attribute& xml_attribute::operator=(int rhs) 5268 { 5269 set_value(rhs); 5270 return *this; 5271 } 5272 operator =(unsigned int rhs)5273 PUGI__FN xml_attribute& xml_attribute::operator=(unsigned int rhs) 5274 { 5275 set_value(rhs); 5276 return *this; 5277 } 5278 operator =(long rhs)5279 PUGI__FN xml_attribute& xml_attribute::operator=(long rhs) 5280 { 5281 set_value(rhs); 5282 return *this; 5283 } 5284 operator =(unsigned long rhs)5285 PUGI__FN xml_attribute& xml_attribute::operator=(unsigned long rhs) 5286 { 5287 set_value(rhs); 5288 return *this; 5289 } 5290 operator =(double rhs)5291 PUGI__FN xml_attribute& xml_attribute::operator=(double rhs) 5292 { 5293 set_value(rhs); 5294 return *this; 5295 } 5296 operator =(float rhs)5297 PUGI__FN xml_attribute& xml_attribute::operator=(float rhs) 5298 { 5299 set_value(rhs); 5300 return *this; 5301 } 5302 operator =(bool rhs)5303 PUGI__FN xml_attribute& xml_attribute::operator=(bool rhs) 5304 { 5305 set_value(rhs); 5306 return *this; 5307 } 5308 5309 #ifdef PUGIXML_HAS_LONG_LONG operator =(long long rhs)5310 PUGI__FN xml_attribute& xml_attribute::operator=(long long rhs) 5311 { 5312 set_value(rhs); 5313 return *this; 5314 } 5315 operator =(unsigned long long rhs)5316 PUGI__FN xml_attribute& xml_attribute::operator=(unsigned long long rhs) 5317 { 5318 set_value(rhs); 5319 return *this; 5320 } 5321 #endif 5322 set_name(const char_t * rhs)5323 PUGI__FN bool xml_attribute::set_name(const char_t* rhs) 5324 { 5325 if (!_attr) return false; 5326 5327 return impl::strcpy_insitu(_attr->name, _attr->header, impl::xml_memory_page_name_allocated_mask, rhs, impl::strlength(rhs)); 5328 } 5329 set_value(const char_t * rhs)5330 PUGI__FN bool xml_attribute::set_value(const char_t* rhs) 5331 { 5332 if (!_attr) return false; 5333 5334 return impl::strcpy_insitu(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, impl::strlength(rhs)); 5335 } 5336 set_value(int rhs)5337 PUGI__FN bool xml_attribute::set_value(int rhs) 5338 { 5339 if (!_attr) return false; 5340 5341 return impl::set_value_integer<unsigned int>(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0); 5342 } 5343 set_value(unsigned int rhs)5344 PUGI__FN bool xml_attribute::set_value(unsigned int rhs) 5345 { 5346 if (!_attr) return false; 5347 5348 return impl::set_value_integer<unsigned int>(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, false); 5349 } 5350 set_value(long rhs)5351 PUGI__FN bool xml_attribute::set_value(long rhs) 5352 { 5353 if (!_attr) return false; 5354 5355 return impl::set_value_integer<unsigned long>(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0); 5356 } 5357 set_value(unsigned long rhs)5358 PUGI__FN bool xml_attribute::set_value(unsigned long rhs) 5359 { 5360 if (!_attr) return false; 5361 5362 return impl::set_value_integer<unsigned long>(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, false); 5363 } 5364 set_value(double rhs)5365 PUGI__FN bool xml_attribute::set_value(double rhs) 5366 { 5367 if (!_attr) return false; 5368 5369 return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, default_double_precision); 5370 } 5371 set_value(double rhs,int precision)5372 PUGI__FN bool xml_attribute::set_value(double rhs, int precision) 5373 { 5374 if (!_attr) return false; 5375 5376 return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, precision); 5377 } 5378 set_value(float rhs)5379 PUGI__FN bool xml_attribute::set_value(float rhs) 5380 { 5381 if (!_attr) return false; 5382 5383 return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, default_float_precision); 5384 } 5385 set_value(float rhs,int precision)5386 PUGI__FN bool xml_attribute::set_value(float rhs, int precision) 5387 { 5388 if (!_attr) return false; 5389 5390 return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, precision); 5391 } 5392 set_value(bool rhs)5393 PUGI__FN bool xml_attribute::set_value(bool rhs) 5394 { 5395 if (!_attr) return false; 5396 5397 return impl::set_value_bool(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs); 5398 } 5399 5400 #ifdef PUGIXML_HAS_LONG_LONG set_value(long long rhs)5401 PUGI__FN bool xml_attribute::set_value(long long rhs) 5402 { 5403 if (!_attr) return false; 5404 5405 return impl::set_value_integer<unsigned long long>(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0); 5406 } 5407 set_value(unsigned long long rhs)5408 PUGI__FN bool xml_attribute::set_value(unsigned long long rhs) 5409 { 5410 if (!_attr) return false; 5411 5412 return impl::set_value_integer<unsigned long long>(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, false); 5413 } 5414 #endif 5415 5416 #ifdef __BORLANDC__ operator &&(const xml_attribute & lhs,bool rhs)5417 PUGI__FN bool operator&&(const xml_attribute& lhs, bool rhs) 5418 { 5419 return (bool)lhs && rhs; 5420 } 5421 operator ||(const xml_attribute & lhs,bool rhs)5422 PUGI__FN bool operator||(const xml_attribute& lhs, bool rhs) 5423 { 5424 return (bool)lhs || rhs; 5425 } 5426 #endif 5427 xml_node()5428 PUGI__FN xml_node::xml_node(): _root(0) 5429 { 5430 } 5431 xml_node(xml_node_struct * p)5432 PUGI__FN xml_node::xml_node(xml_node_struct* p): _root(p) 5433 { 5434 } 5435 unspecified_bool_xml_node(xml_node ***)5436 PUGI__FN static void unspecified_bool_xml_node(xml_node***) 5437 { 5438 } 5439 operator xml_node::unspecified_bool_type() const5440 PUGI__FN xml_node::operator xml_node::unspecified_bool_type() const 5441 { 5442 return _root ? unspecified_bool_xml_node : 0; 5443 } 5444 operator !() const5445 PUGI__FN bool xml_node::operator!() const 5446 { 5447 return !_root; 5448 } 5449 begin() const5450 PUGI__FN xml_node::iterator xml_node::begin() const 5451 { 5452 return iterator(_root ? _root->first_child + 0 : 0, _root); 5453 } 5454 end() const5455 PUGI__FN xml_node::iterator xml_node::end() const 5456 { 5457 return iterator(0, _root); 5458 } 5459 attributes_begin() const5460 PUGI__FN xml_node::attribute_iterator xml_node::attributes_begin() const 5461 { 5462 return attribute_iterator(_root ? _root->first_attribute + 0 : 0, _root); 5463 } 5464 attributes_end() const5465 PUGI__FN xml_node::attribute_iterator xml_node::attributes_end() const 5466 { 5467 return attribute_iterator(0, _root); 5468 } 5469 children() const5470 PUGI__FN xml_object_range<xml_node_iterator> xml_node::children() const 5471 { 5472 return xml_object_range<xml_node_iterator>(begin(), end()); 5473 } 5474 children(const char_t * name_) const5475 PUGI__FN xml_object_range<xml_named_node_iterator> xml_node::children(const char_t* name_) const 5476 { 5477 return xml_object_range<xml_named_node_iterator>(xml_named_node_iterator(child(name_)._root, _root, name_), xml_named_node_iterator(0, _root, name_)); 5478 } 5479 attributes() const5480 PUGI__FN xml_object_range<xml_attribute_iterator> xml_node::attributes() const 5481 { 5482 return xml_object_range<xml_attribute_iterator>(attributes_begin(), attributes_end()); 5483 } 5484 operator ==(const xml_node & r) const5485 PUGI__FN bool xml_node::operator==(const xml_node& r) const 5486 { 5487 return (_root == r._root); 5488 } 5489 operator !=(const xml_node & r) const5490 PUGI__FN bool xml_node::operator!=(const xml_node& r) const 5491 { 5492 return (_root != r._root); 5493 } 5494 operator <(const xml_node & r) const5495 PUGI__FN bool xml_node::operator<(const xml_node& r) const 5496 { 5497 return (_root < r._root); 5498 } 5499 operator >(const xml_node & r) const5500 PUGI__FN bool xml_node::operator>(const xml_node& r) const 5501 { 5502 return (_root > r._root); 5503 } 5504 operator <=(const xml_node & r) const5505 PUGI__FN bool xml_node::operator<=(const xml_node& r) const 5506 { 5507 return (_root <= r._root); 5508 } 5509 operator >=(const xml_node & r) const5510 PUGI__FN bool xml_node::operator>=(const xml_node& r) const 5511 { 5512 return (_root >= r._root); 5513 } 5514 empty() const5515 PUGI__FN bool xml_node::empty() const 5516 { 5517 return !_root; 5518 } 5519 name() const5520 PUGI__FN const char_t* xml_node::name() const 5521 { 5522 return (_root && _root->name) ? _root->name + 0 : PUGIXML_TEXT(""); 5523 } 5524 type() const5525 PUGI__FN xml_node_type xml_node::type() const 5526 { 5527 return _root ? PUGI__NODETYPE(_root) : node_null; 5528 } 5529 value() const5530 PUGI__FN const char_t* xml_node::value() const 5531 { 5532 return (_root && _root->value) ? _root->value + 0 : PUGIXML_TEXT(""); 5533 } 5534 child(const char_t * name_) const5535 PUGI__FN xml_node xml_node::child(const char_t* name_) const 5536 { 5537 if (!_root) return xml_node(); 5538 5539 for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling) 5540 if (i->name && impl::strequal(name_, i->name)) return xml_node(i); 5541 5542 return xml_node(); 5543 } 5544 attribute(const char_t * name_) const5545 PUGI__FN xml_attribute xml_node::attribute(const char_t* name_) const 5546 { 5547 if (!_root) return xml_attribute(); 5548 5549 for (xml_attribute_struct* i = _root->first_attribute; i; i = i->next_attribute) 5550 if (i->name && impl::strequal(name_, i->name)) 5551 return xml_attribute(i); 5552 5553 return xml_attribute(); 5554 } 5555 next_sibling(const char_t * name_) const5556 PUGI__FN xml_node xml_node::next_sibling(const char_t* name_) const 5557 { 5558 if (!_root) return xml_node(); 5559 5560 for (xml_node_struct* i = _root->next_sibling; i; i = i->next_sibling) 5561 if (i->name && impl::strequal(name_, i->name)) return xml_node(i); 5562 5563 return xml_node(); 5564 } 5565 next_sibling() const5566 PUGI__FN xml_node xml_node::next_sibling() const 5567 { 5568 return _root ? xml_node(_root->next_sibling) : xml_node(); 5569 } 5570 previous_sibling(const char_t * name_) const5571 PUGI__FN xml_node xml_node::previous_sibling(const char_t* name_) const 5572 { 5573 if (!_root) return xml_node(); 5574 5575 for (xml_node_struct* i = _root->prev_sibling_c; i->next_sibling; i = i->prev_sibling_c) 5576 if (i->name && impl::strequal(name_, i->name)) return xml_node(i); 5577 5578 return xml_node(); 5579 } 5580 attribute(const char_t * name_,xml_attribute & hint_) const5581 PUGI__FN xml_attribute xml_node::attribute(const char_t* name_, xml_attribute& hint_) const 5582 { 5583 xml_attribute_struct* hint = hint_._attr; 5584 5585 // if hint is not an attribute of node, behavior is not defined 5586 assert(!hint || (_root && impl::is_attribute_of(hint, _root))); 5587 5588 if (!_root) return xml_attribute(); 5589 5590 // optimistically search from hint up until the end 5591 for (xml_attribute_struct* i = hint; i; i = i->next_attribute) 5592 if (i->name && impl::strequal(name_, i->name)) 5593 { 5594 // update hint to maximize efficiency of searching for consecutive attributes 5595 hint_._attr = i->next_attribute; 5596 5597 return xml_attribute(i); 5598 } 5599 5600 // wrap around and search from the first attribute until the hint 5601 // 'j' null pointer check is technically redundant, but it prevents a crash in case the assertion above fails 5602 for (xml_attribute_struct* j = _root->first_attribute; j && j != hint; j = j->next_attribute) 5603 if (j->name && impl::strequal(name_, j->name)) 5604 { 5605 // update hint to maximize efficiency of searching for consecutive attributes 5606 hint_._attr = j->next_attribute; 5607 5608 return xml_attribute(j); 5609 } 5610 5611 return xml_attribute(); 5612 } 5613 previous_sibling() const5614 PUGI__FN xml_node xml_node::previous_sibling() const 5615 { 5616 if (!_root) return xml_node(); 5617 5618 if (_root->prev_sibling_c->next_sibling) return xml_node(_root->prev_sibling_c); 5619 else return xml_node(); 5620 } 5621 parent() const5622 PUGI__FN xml_node xml_node::parent() const 5623 { 5624 return _root ? xml_node(_root->parent) : xml_node(); 5625 } 5626 root() const5627 PUGI__FN xml_node xml_node::root() const 5628 { 5629 return _root ? xml_node(&impl::get_document(_root)) : xml_node(); 5630 } 5631 text() const5632 PUGI__FN xml_text xml_node::text() const 5633 { 5634 return xml_text(_root); 5635 } 5636 child_value() const5637 PUGI__FN const char_t* xml_node::child_value() const 5638 { 5639 if (!_root) return PUGIXML_TEXT(""); 5640 5641 // element nodes can have value if parse_embed_pcdata was used 5642 if (PUGI__NODETYPE(_root) == node_element && _root->value) 5643 return _root->value; 5644 5645 for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling) 5646 if (impl::is_text_node(i) && i->value) 5647 return i->value; 5648 5649 return PUGIXML_TEXT(""); 5650 } 5651 child_value(const char_t * name_) const5652 PUGI__FN const char_t* xml_node::child_value(const char_t* name_) const 5653 { 5654 return child(name_).child_value(); 5655 } 5656 first_attribute() const5657 PUGI__FN xml_attribute xml_node::first_attribute() const 5658 { 5659 return _root ? xml_attribute(_root->first_attribute) : xml_attribute(); 5660 } 5661 last_attribute() const5662 PUGI__FN xml_attribute xml_node::last_attribute() const 5663 { 5664 return _root && _root->first_attribute ? xml_attribute(_root->first_attribute->prev_attribute_c) : xml_attribute(); 5665 } 5666 first_child() const5667 PUGI__FN xml_node xml_node::first_child() const 5668 { 5669 return _root ? xml_node(_root->first_child) : xml_node(); 5670 } 5671 last_child() const5672 PUGI__FN xml_node xml_node::last_child() const 5673 { 5674 return _root && _root->first_child ? xml_node(_root->first_child->prev_sibling_c) : xml_node(); 5675 } 5676 set_name(const char_t * rhs)5677 PUGI__FN bool xml_node::set_name(const char_t* rhs) 5678 { 5679 xml_node_type type_ = _root ? PUGI__NODETYPE(_root) : node_null; 5680 5681 if (type_ != node_element && type_ != node_pi && type_ != node_declaration) 5682 return false; 5683 5684 return impl::strcpy_insitu(_root->name, _root->header, impl::xml_memory_page_name_allocated_mask, rhs, impl::strlength(rhs)); 5685 } 5686 set_value(const char_t * rhs)5687 PUGI__FN bool xml_node::set_value(const char_t* rhs) 5688 { 5689 xml_node_type type_ = _root ? PUGI__NODETYPE(_root) : node_null; 5690 5691 if (type_ != node_pcdata && type_ != node_cdata && type_ != node_comment && type_ != node_pi && type_ != node_doctype) 5692 return false; 5693 5694 return impl::strcpy_insitu(_root->value, _root->header, impl::xml_memory_page_value_allocated_mask, rhs, impl::strlength(rhs)); 5695 } 5696 append_attribute(const char_t * name_)5697 PUGI__FN xml_attribute xml_node::append_attribute(const char_t* name_) 5698 { 5699 if (!impl::allow_insert_attribute(type())) return xml_attribute(); 5700 5701 impl::xml_allocator& alloc = impl::get_allocator(_root); 5702 if (!alloc.reserve()) return xml_attribute(); 5703 5704 xml_attribute a(impl::allocate_attribute(alloc)); 5705 if (!a) return xml_attribute(); 5706 5707 impl::append_attribute(a._attr, _root); 5708 5709 a.set_name(name_); 5710 5711 return a; 5712 } 5713 prepend_attribute(const char_t * name_)5714 PUGI__FN xml_attribute xml_node::prepend_attribute(const char_t* name_) 5715 { 5716 if (!impl::allow_insert_attribute(type())) return xml_attribute(); 5717 5718 impl::xml_allocator& alloc = impl::get_allocator(_root); 5719 if (!alloc.reserve()) return xml_attribute(); 5720 5721 xml_attribute a(impl::allocate_attribute(alloc)); 5722 if (!a) return xml_attribute(); 5723 5724 impl::prepend_attribute(a._attr, _root); 5725 5726 a.set_name(name_); 5727 5728 return a; 5729 } 5730 insert_attribute_after(const char_t * name_,const xml_attribute & attr)5731 PUGI__FN xml_attribute xml_node::insert_attribute_after(const char_t* name_, const xml_attribute& attr) 5732 { 5733 if (!impl::allow_insert_attribute(type())) return xml_attribute(); 5734 if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute(); 5735 5736 impl::xml_allocator& alloc = impl::get_allocator(_root); 5737 if (!alloc.reserve()) return xml_attribute(); 5738 5739 xml_attribute a(impl::allocate_attribute(alloc)); 5740 if (!a) return xml_attribute(); 5741 5742 impl::insert_attribute_after(a._attr, attr._attr, _root); 5743 5744 a.set_name(name_); 5745 5746 return a; 5747 } 5748 insert_attribute_before(const char_t * name_,const xml_attribute & attr)5749 PUGI__FN xml_attribute xml_node::insert_attribute_before(const char_t* name_, const xml_attribute& attr) 5750 { 5751 if (!impl::allow_insert_attribute(type())) return xml_attribute(); 5752 if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute(); 5753 5754 impl::xml_allocator& alloc = impl::get_allocator(_root); 5755 if (!alloc.reserve()) return xml_attribute(); 5756 5757 xml_attribute a(impl::allocate_attribute(alloc)); 5758 if (!a) return xml_attribute(); 5759 5760 impl::insert_attribute_before(a._attr, attr._attr, _root); 5761 5762 a.set_name(name_); 5763 5764 return a; 5765 } 5766 append_copy(const xml_attribute & proto)5767 PUGI__FN xml_attribute xml_node::append_copy(const xml_attribute& proto) 5768 { 5769 if (!proto) return xml_attribute(); 5770 if (!impl::allow_insert_attribute(type())) return xml_attribute(); 5771 5772 impl::xml_allocator& alloc = impl::get_allocator(_root); 5773 if (!alloc.reserve()) return xml_attribute(); 5774 5775 xml_attribute a(impl::allocate_attribute(alloc)); 5776 if (!a) return xml_attribute(); 5777 5778 impl::append_attribute(a._attr, _root); 5779 impl::node_copy_attribute(a._attr, proto._attr); 5780 5781 return a; 5782 } 5783 prepend_copy(const xml_attribute & proto)5784 PUGI__FN xml_attribute xml_node::prepend_copy(const xml_attribute& proto) 5785 { 5786 if (!proto) return xml_attribute(); 5787 if (!impl::allow_insert_attribute(type())) return xml_attribute(); 5788 5789 impl::xml_allocator& alloc = impl::get_allocator(_root); 5790 if (!alloc.reserve()) return xml_attribute(); 5791 5792 xml_attribute a(impl::allocate_attribute(alloc)); 5793 if (!a) return xml_attribute(); 5794 5795 impl::prepend_attribute(a._attr, _root); 5796 impl::node_copy_attribute(a._attr, proto._attr); 5797 5798 return a; 5799 } 5800 insert_copy_after(const xml_attribute & proto,const xml_attribute & attr)5801 PUGI__FN xml_attribute xml_node::insert_copy_after(const xml_attribute& proto, const xml_attribute& attr) 5802 { 5803 if (!proto) return xml_attribute(); 5804 if (!impl::allow_insert_attribute(type())) return xml_attribute(); 5805 if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute(); 5806 5807 impl::xml_allocator& alloc = impl::get_allocator(_root); 5808 if (!alloc.reserve()) return xml_attribute(); 5809 5810 xml_attribute a(impl::allocate_attribute(alloc)); 5811 if (!a) return xml_attribute(); 5812 5813 impl::insert_attribute_after(a._attr, attr._attr, _root); 5814 impl::node_copy_attribute(a._attr, proto._attr); 5815 5816 return a; 5817 } 5818 insert_copy_before(const xml_attribute & proto,const xml_attribute & attr)5819 PUGI__FN xml_attribute xml_node::insert_copy_before(const xml_attribute& proto, const xml_attribute& attr) 5820 { 5821 if (!proto) return xml_attribute(); 5822 if (!impl::allow_insert_attribute(type())) return xml_attribute(); 5823 if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute(); 5824 5825 impl::xml_allocator& alloc = impl::get_allocator(_root); 5826 if (!alloc.reserve()) return xml_attribute(); 5827 5828 xml_attribute a(impl::allocate_attribute(alloc)); 5829 if (!a) return xml_attribute(); 5830 5831 impl::insert_attribute_before(a._attr, attr._attr, _root); 5832 impl::node_copy_attribute(a._attr, proto._attr); 5833 5834 return a; 5835 } 5836 append_child(xml_node_type type_)5837 PUGI__FN xml_node xml_node::append_child(xml_node_type type_) 5838 { 5839 if (!impl::allow_insert_child(type(), type_)) return xml_node(); 5840 5841 impl::xml_allocator& alloc = impl::get_allocator(_root); 5842 if (!alloc.reserve()) return xml_node(); 5843 5844 xml_node n(impl::allocate_node(alloc, type_)); 5845 if (!n) return xml_node(); 5846 5847 impl::append_node(n._root, _root); 5848 5849 if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml")); 5850 5851 return n; 5852 } 5853 prepend_child(xml_node_type type_)5854 PUGI__FN xml_node xml_node::prepend_child(xml_node_type type_) 5855 { 5856 if (!impl::allow_insert_child(type(), type_)) return xml_node(); 5857 5858 impl::xml_allocator& alloc = impl::get_allocator(_root); 5859 if (!alloc.reserve()) return xml_node(); 5860 5861 xml_node n(impl::allocate_node(alloc, type_)); 5862 if (!n) return xml_node(); 5863 5864 impl::prepend_node(n._root, _root); 5865 5866 if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml")); 5867 5868 return n; 5869 } 5870 insert_child_before(xml_node_type type_,const xml_node & node)5871 PUGI__FN xml_node xml_node::insert_child_before(xml_node_type type_, const xml_node& node) 5872 { 5873 if (!impl::allow_insert_child(type(), type_)) return xml_node(); 5874 if (!node._root || node._root->parent != _root) return xml_node(); 5875 5876 impl::xml_allocator& alloc = impl::get_allocator(_root); 5877 if (!alloc.reserve()) return xml_node(); 5878 5879 xml_node n(impl::allocate_node(alloc, type_)); 5880 if (!n) return xml_node(); 5881 5882 impl::insert_node_before(n._root, node._root); 5883 5884 if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml")); 5885 5886 return n; 5887 } 5888 insert_child_after(xml_node_type type_,const xml_node & node)5889 PUGI__FN xml_node xml_node::insert_child_after(xml_node_type type_, const xml_node& node) 5890 { 5891 if (!impl::allow_insert_child(type(), type_)) return xml_node(); 5892 if (!node._root || node._root->parent != _root) return xml_node(); 5893 5894 impl::xml_allocator& alloc = impl::get_allocator(_root); 5895 if (!alloc.reserve()) return xml_node(); 5896 5897 xml_node n(impl::allocate_node(alloc, type_)); 5898 if (!n) return xml_node(); 5899 5900 impl::insert_node_after(n._root, node._root); 5901 5902 if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml")); 5903 5904 return n; 5905 } 5906 append_child(const char_t * name_)5907 PUGI__FN xml_node xml_node::append_child(const char_t* name_) 5908 { 5909 xml_node result = append_child(node_element); 5910 5911 result.set_name(name_); 5912 5913 return result; 5914 } 5915 prepend_child(const char_t * name_)5916 PUGI__FN xml_node xml_node::prepend_child(const char_t* name_) 5917 { 5918 xml_node result = prepend_child(node_element); 5919 5920 result.set_name(name_); 5921 5922 return result; 5923 } 5924 insert_child_after(const char_t * name_,const xml_node & node)5925 PUGI__FN xml_node xml_node::insert_child_after(const char_t* name_, const xml_node& node) 5926 { 5927 xml_node result = insert_child_after(node_element, node); 5928 5929 result.set_name(name_); 5930 5931 return result; 5932 } 5933 insert_child_before(const char_t * name_,const xml_node & node)5934 PUGI__FN xml_node xml_node::insert_child_before(const char_t* name_, const xml_node& node) 5935 { 5936 xml_node result = insert_child_before(node_element, node); 5937 5938 result.set_name(name_); 5939 5940 return result; 5941 } 5942 append_copy(const xml_node & proto)5943 PUGI__FN xml_node xml_node::append_copy(const xml_node& proto) 5944 { 5945 xml_node_type type_ = proto.type(); 5946 if (!impl::allow_insert_child(type(), type_)) return xml_node(); 5947 5948 impl::xml_allocator& alloc = impl::get_allocator(_root); 5949 if (!alloc.reserve()) return xml_node(); 5950 5951 xml_node n(impl::allocate_node(alloc, type_)); 5952 if (!n) return xml_node(); 5953 5954 impl::append_node(n._root, _root); 5955 impl::node_copy_tree(n._root, proto._root); 5956 5957 return n; 5958 } 5959 prepend_copy(const xml_node & proto)5960 PUGI__FN xml_node xml_node::prepend_copy(const xml_node& proto) 5961 { 5962 xml_node_type type_ = proto.type(); 5963 if (!impl::allow_insert_child(type(), type_)) return xml_node(); 5964 5965 impl::xml_allocator& alloc = impl::get_allocator(_root); 5966 if (!alloc.reserve()) return xml_node(); 5967 5968 xml_node n(impl::allocate_node(alloc, type_)); 5969 if (!n) return xml_node(); 5970 5971 impl::prepend_node(n._root, _root); 5972 impl::node_copy_tree(n._root, proto._root); 5973 5974 return n; 5975 } 5976 insert_copy_after(const xml_node & proto,const xml_node & node)5977 PUGI__FN xml_node xml_node::insert_copy_after(const xml_node& proto, const xml_node& node) 5978 { 5979 xml_node_type type_ = proto.type(); 5980 if (!impl::allow_insert_child(type(), type_)) return xml_node(); 5981 if (!node._root || node._root->parent != _root) return xml_node(); 5982 5983 impl::xml_allocator& alloc = impl::get_allocator(_root); 5984 if (!alloc.reserve()) return xml_node(); 5985 5986 xml_node n(impl::allocate_node(alloc, type_)); 5987 if (!n) return xml_node(); 5988 5989 impl::insert_node_after(n._root, node._root); 5990 impl::node_copy_tree(n._root, proto._root); 5991 5992 return n; 5993 } 5994 insert_copy_before(const xml_node & proto,const xml_node & node)5995 PUGI__FN xml_node xml_node::insert_copy_before(const xml_node& proto, const xml_node& node) 5996 { 5997 xml_node_type type_ = proto.type(); 5998 if (!impl::allow_insert_child(type(), type_)) return xml_node(); 5999 if (!node._root || node._root->parent != _root) return xml_node(); 6000 6001 impl::xml_allocator& alloc = impl::get_allocator(_root); 6002 if (!alloc.reserve()) return xml_node(); 6003 6004 xml_node n(impl::allocate_node(alloc, type_)); 6005 if (!n) return xml_node(); 6006 6007 impl::insert_node_before(n._root, node._root); 6008 impl::node_copy_tree(n._root, proto._root); 6009 6010 return n; 6011 } 6012 append_move(const xml_node & moved)6013 PUGI__FN xml_node xml_node::append_move(const xml_node& moved) 6014 { 6015 if (!impl::allow_move(*this, moved)) return xml_node(); 6016 6017 impl::xml_allocator& alloc = impl::get_allocator(_root); 6018 if (!alloc.reserve()) return xml_node(); 6019 6020 // disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers 6021 impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask; 6022 6023 impl::remove_node(moved._root); 6024 impl::append_node(moved._root, _root); 6025 6026 return moved; 6027 } 6028 prepend_move(const xml_node & moved)6029 PUGI__FN xml_node xml_node::prepend_move(const xml_node& moved) 6030 { 6031 if (!impl::allow_move(*this, moved)) return xml_node(); 6032 6033 impl::xml_allocator& alloc = impl::get_allocator(_root); 6034 if (!alloc.reserve()) return xml_node(); 6035 6036 // disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers 6037 impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask; 6038 6039 impl::remove_node(moved._root); 6040 impl::prepend_node(moved._root, _root); 6041 6042 return moved; 6043 } 6044 insert_move_after(const xml_node & moved,const xml_node & node)6045 PUGI__FN xml_node xml_node::insert_move_after(const xml_node& moved, const xml_node& node) 6046 { 6047 if (!impl::allow_move(*this, moved)) return xml_node(); 6048 if (!node._root || node._root->parent != _root) return xml_node(); 6049 if (moved._root == node._root) return xml_node(); 6050 6051 impl::xml_allocator& alloc = impl::get_allocator(_root); 6052 if (!alloc.reserve()) return xml_node(); 6053 6054 // disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers 6055 impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask; 6056 6057 impl::remove_node(moved._root); 6058 impl::insert_node_after(moved._root, node._root); 6059 6060 return moved; 6061 } 6062 insert_move_before(const xml_node & moved,const xml_node & node)6063 PUGI__FN xml_node xml_node::insert_move_before(const xml_node& moved, const xml_node& node) 6064 { 6065 if (!impl::allow_move(*this, moved)) return xml_node(); 6066 if (!node._root || node._root->parent != _root) return xml_node(); 6067 if (moved._root == node._root) return xml_node(); 6068 6069 impl::xml_allocator& alloc = impl::get_allocator(_root); 6070 if (!alloc.reserve()) return xml_node(); 6071 6072 // disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers 6073 impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask; 6074 6075 impl::remove_node(moved._root); 6076 impl::insert_node_before(moved._root, node._root); 6077 6078 return moved; 6079 } 6080 remove_attribute(const char_t * name_)6081 PUGI__FN bool xml_node::remove_attribute(const char_t* name_) 6082 { 6083 return remove_attribute(attribute(name_)); 6084 } 6085 remove_attribute(const xml_attribute & a)6086 PUGI__FN bool xml_node::remove_attribute(const xml_attribute& a) 6087 { 6088 if (!_root || !a._attr) return false; 6089 if (!impl::is_attribute_of(a._attr, _root)) return false; 6090 6091 impl::xml_allocator& alloc = impl::get_allocator(_root); 6092 if (!alloc.reserve()) return false; 6093 6094 impl::remove_attribute(a._attr, _root); 6095 impl::destroy_attribute(a._attr, alloc); 6096 6097 return true; 6098 } 6099 remove_attributes()6100 PUGI__FN bool xml_node::remove_attributes() 6101 { 6102 if (!_root) return false; 6103 6104 impl::xml_allocator& alloc = impl::get_allocator(_root); 6105 if (!alloc.reserve()) return false; 6106 6107 for (xml_attribute_struct* attr = _root->first_attribute; attr; ) 6108 { 6109 xml_attribute_struct* next = attr->next_attribute; 6110 6111 impl::destroy_attribute(attr, alloc); 6112 6113 attr = next; 6114 } 6115 6116 _root->first_attribute = 0; 6117 6118 return true; 6119 } 6120 remove_child(const char_t * name_)6121 PUGI__FN bool xml_node::remove_child(const char_t* name_) 6122 { 6123 return remove_child(child(name_)); 6124 } 6125 remove_child(const xml_node & n)6126 PUGI__FN bool xml_node::remove_child(const xml_node& n) 6127 { 6128 if (!_root || !n._root || n._root->parent != _root) return false; 6129 6130 impl::xml_allocator& alloc = impl::get_allocator(_root); 6131 if (!alloc.reserve()) return false; 6132 6133 impl::remove_node(n._root); 6134 impl::destroy_node(n._root, alloc); 6135 6136 return true; 6137 } 6138 remove_children()6139 PUGI__FN bool xml_node::remove_children() 6140 { 6141 if (!_root) return false; 6142 6143 impl::xml_allocator& alloc = impl::get_allocator(_root); 6144 if (!alloc.reserve()) return false; 6145 6146 for (xml_node_struct* cur = _root->first_child; cur; ) 6147 { 6148 xml_node_struct* next = cur->next_sibling; 6149 6150 impl::destroy_node(cur, alloc); 6151 6152 cur = next; 6153 } 6154 6155 _root->first_child = 0; 6156 6157 return true; 6158 } 6159 append_buffer(const void * contents,size_t size,unsigned int options,xml_encoding encoding)6160 PUGI__FN xml_parse_result xml_node::append_buffer(const void* contents, size_t size, unsigned int options, xml_encoding encoding) 6161 { 6162 // append_buffer is only valid for elements/documents 6163 if (!impl::allow_insert_child(type(), node_element)) return impl::make_parse_result(status_append_invalid_root); 6164 6165 // get document node 6166 impl::xml_document_struct* doc = &impl::get_document(_root); 6167 6168 // disable document_buffer_order optimization since in a document with multiple buffers comparing buffer pointers does not make sense 6169 doc->header |= impl::xml_memory_page_contents_shared_mask; 6170 6171 // get extra buffer element (we'll store the document fragment buffer there so that we can deallocate it later) 6172 impl::xml_memory_page* page = 0; 6173 impl::xml_extra_buffer* extra = static_cast<impl::xml_extra_buffer*>(doc->allocate_memory(sizeof(impl::xml_extra_buffer) + sizeof(void*), page)); 6174 (void)page; 6175 6176 if (!extra) return impl::make_parse_result(status_out_of_memory); 6177 6178 #ifdef PUGIXML_COMPACT 6179 // align the memory block to a pointer boundary; this is required for compact mode where memory allocations are only 4b aligned 6180 // note that this requires up to sizeof(void*)-1 additional memory, which the allocation above takes into account 6181 extra = reinterpret_cast<impl::xml_extra_buffer*>((reinterpret_cast<uintptr_t>(extra) + (sizeof(void*) - 1)) & ~(sizeof(void*) - 1)); 6182 #endif 6183 6184 // add extra buffer to the list 6185 extra->buffer = 0; 6186 extra->next = doc->extra_buffers; 6187 doc->extra_buffers = extra; 6188 6189 // name of the root has to be NULL before parsing - otherwise closing node mismatches will not be detected at the top level 6190 impl::name_null_sentry sentry(_root); 6191 6192 return impl::load_buffer_impl(doc, _root, const_cast<void*>(contents), size, options, encoding, false, false, &extra->buffer); 6193 } 6194 find_child_by_attribute(const char_t * name_,const char_t * attr_name,const char_t * attr_value) const6195 PUGI__FN xml_node xml_node::find_child_by_attribute(const char_t* name_, const char_t* attr_name, const char_t* attr_value) const 6196 { 6197 if (!_root) return xml_node(); 6198 6199 for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling) 6200 if (i->name && impl::strequal(name_, i->name)) 6201 { 6202 for (xml_attribute_struct* a = i->first_attribute; a; a = a->next_attribute) 6203 if (a->name && impl::strequal(attr_name, a->name) && impl::strequal(attr_value, a->value ? a->value + 0 : PUGIXML_TEXT(""))) 6204 return xml_node(i); 6205 } 6206 6207 return xml_node(); 6208 } 6209 find_child_by_attribute(const char_t * attr_name,const char_t * attr_value) const6210 PUGI__FN xml_node xml_node::find_child_by_attribute(const char_t* attr_name, const char_t* attr_value) const 6211 { 6212 if (!_root) return xml_node(); 6213 6214 for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling) 6215 for (xml_attribute_struct* a = i->first_attribute; a; a = a->next_attribute) 6216 if (a->name && impl::strequal(attr_name, a->name) && impl::strequal(attr_value, a->value ? a->value + 0 : PUGIXML_TEXT(""))) 6217 return xml_node(i); 6218 6219 return xml_node(); 6220 } 6221 6222 #ifndef PUGIXML_NO_STL path(char_t delimiter) const6223 PUGI__FN string_t xml_node::path(char_t delimiter) const 6224 { 6225 if (!_root) return string_t(); 6226 6227 size_t offset = 0; 6228 6229 for (xml_node_struct* i = _root; i; i = i->parent) 6230 { 6231 offset += (i != _root); 6232 offset += i->name ? impl::strlength(i->name) : 0; 6233 } 6234 6235 string_t result; 6236 result.resize(offset); 6237 6238 for (xml_node_struct* j = _root; j; j = j->parent) 6239 { 6240 if (j != _root) 6241 result[--offset] = delimiter; 6242 6243 if (j->name) 6244 { 6245 size_t length = impl::strlength(j->name); 6246 6247 offset -= length; 6248 memcpy(&result[offset], j->name, length * sizeof(char_t)); 6249 } 6250 } 6251 6252 assert(offset == 0); 6253 6254 return result; 6255 } 6256 #endif 6257 first_element_by_path(const char_t * path_,char_t delimiter) const6258 PUGI__FN xml_node xml_node::first_element_by_path(const char_t* path_, char_t delimiter) const 6259 { 6260 xml_node context = path_[0] == delimiter ? root() : *this; 6261 6262 if (!context._root) return xml_node(); 6263 6264 const char_t* path_segment = path_; 6265 6266 while (*path_segment == delimiter) ++path_segment; 6267 6268 const char_t* path_segment_end = path_segment; 6269 6270 while (*path_segment_end && *path_segment_end != delimiter) ++path_segment_end; 6271 6272 if (path_segment == path_segment_end) return context; 6273 6274 const char_t* next_segment = path_segment_end; 6275 6276 while (*next_segment == delimiter) ++next_segment; 6277 6278 if (*path_segment == '.' && path_segment + 1 == path_segment_end) 6279 return context.first_element_by_path(next_segment, delimiter); 6280 else if (*path_segment == '.' && *(path_segment+1) == '.' && path_segment + 2 == path_segment_end) 6281 return context.parent().first_element_by_path(next_segment, delimiter); 6282 else 6283 { 6284 for (xml_node_struct* j = context._root->first_child; j; j = j->next_sibling) 6285 { 6286 if (j->name && impl::strequalrange(j->name, path_segment, static_cast<size_t>(path_segment_end - path_segment))) 6287 { 6288 xml_node subsearch = xml_node(j).first_element_by_path(next_segment, delimiter); 6289 6290 if (subsearch) return subsearch; 6291 } 6292 } 6293 6294 return xml_node(); 6295 } 6296 } 6297 traverse(xml_tree_walker & walker)6298 PUGI__FN bool xml_node::traverse(xml_tree_walker& walker) 6299 { 6300 walker._depth = -1; 6301 6302 xml_node arg_begin(_root); 6303 if (!walker.begin(arg_begin)) return false; 6304 6305 xml_node_struct* cur = _root ? _root->first_child + 0 : 0; 6306 6307 if (cur) 6308 { 6309 ++walker._depth; 6310 6311 do 6312 { 6313 xml_node arg_for_each(cur); 6314 if (!walker.for_each(arg_for_each)) 6315 return false; 6316 6317 if (cur->first_child) 6318 { 6319 ++walker._depth; 6320 cur = cur->first_child; 6321 } 6322 else if (cur->next_sibling) 6323 cur = cur->next_sibling; 6324 else 6325 { 6326 while (!cur->next_sibling && cur != _root && cur->parent) 6327 { 6328 --walker._depth; 6329 cur = cur->parent; 6330 } 6331 6332 if (cur != _root) 6333 cur = cur->next_sibling; 6334 } 6335 } 6336 while (cur && cur != _root); 6337 } 6338 6339 assert(walker._depth == -1); 6340 6341 xml_node arg_end(_root); 6342 return walker.end(arg_end); 6343 } 6344 hash_value() const6345 PUGI__FN size_t xml_node::hash_value() const 6346 { 6347 return static_cast<size_t>(reinterpret_cast<uintptr_t>(_root) / sizeof(xml_node_struct)); 6348 } 6349 internal_object() const6350 PUGI__FN xml_node_struct* xml_node::internal_object() const 6351 { 6352 return _root; 6353 } 6354 print(xml_writer & writer,const char_t * indent,unsigned int flags,xml_encoding encoding,unsigned int depth) const6355 PUGI__FN void xml_node::print(xml_writer& writer, const char_t* indent, unsigned int flags, xml_encoding encoding, unsigned int depth) const 6356 { 6357 if (!_root) return; 6358 6359 impl::xml_buffered_writer buffered_writer(writer, encoding); 6360 6361 impl::node_output(buffered_writer, _root, indent, flags, depth); 6362 6363 buffered_writer.flush(); 6364 } 6365 6366 #ifndef PUGIXML_NO_STL print(std::basic_ostream<char,std::char_traits<char>> & stream,const char_t * indent,unsigned int flags,xml_encoding encoding,unsigned int depth) const6367 PUGI__FN void xml_node::print(std::basic_ostream<char, std::char_traits<char> >& stream, const char_t* indent, unsigned int flags, xml_encoding encoding, unsigned int depth) const 6368 { 6369 xml_writer_stream writer(stream); 6370 6371 print(writer, indent, flags, encoding, depth); 6372 } 6373 print(std::basic_ostream<wchar_t,std::char_traits<wchar_t>> & stream,const char_t * indent,unsigned int flags,unsigned int depth) const6374 PUGI__FN void xml_node::print(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream, const char_t* indent, unsigned int flags, unsigned int depth) const 6375 { 6376 xml_writer_stream writer(stream); 6377 6378 print(writer, indent, flags, encoding_wchar, depth); 6379 } 6380 #endif 6381 offset_debug() const6382 PUGI__FN ptrdiff_t xml_node::offset_debug() const 6383 { 6384 if (!_root) return -1; 6385 6386 impl::xml_document_struct& doc = impl::get_document(_root); 6387 6388 // we can determine the offset reliably only if there is exactly once parse buffer 6389 if (!doc.buffer || doc.extra_buffers) return -1; 6390 6391 switch (type()) 6392 { 6393 case node_document: 6394 return 0; 6395 6396 case node_element: 6397 case node_declaration: 6398 case node_pi: 6399 return _root->name && (_root->header & impl::xml_memory_page_name_allocated_or_shared_mask) == 0 ? _root->name - doc.buffer : -1; 6400 6401 case node_pcdata: 6402 case node_cdata: 6403 case node_comment: 6404 case node_doctype: 6405 return _root->value && (_root->header & impl::xml_memory_page_value_allocated_or_shared_mask) == 0 ? _root->value - doc.buffer : -1; 6406 6407 default: 6408 assert(false && "Invalid node type"); // unreachable 6409 return -1; 6410 } 6411 } 6412 6413 #ifdef __BORLANDC__ operator &&(const xml_node & lhs,bool rhs)6414 PUGI__FN bool operator&&(const xml_node& lhs, bool rhs) 6415 { 6416 return (bool)lhs && rhs; 6417 } 6418 operator ||(const xml_node & lhs,bool rhs)6419 PUGI__FN bool operator||(const xml_node& lhs, bool rhs) 6420 { 6421 return (bool)lhs || rhs; 6422 } 6423 #endif 6424 xml_text(xml_node_struct * root)6425 PUGI__FN xml_text::xml_text(xml_node_struct* root): _root(root) 6426 { 6427 } 6428 _data() const6429 PUGI__FN xml_node_struct* xml_text::_data() const 6430 { 6431 if (!_root || impl::is_text_node(_root)) return _root; 6432 6433 // element nodes can have value if parse_embed_pcdata was used 6434 if (PUGI__NODETYPE(_root) == node_element && _root->value) 6435 return _root; 6436 6437 for (xml_node_struct* node = _root->first_child; node; node = node->next_sibling) 6438 if (impl::is_text_node(node)) 6439 return node; 6440 6441 return 0; 6442 } 6443 _data_new()6444 PUGI__FN xml_node_struct* xml_text::_data_new() 6445 { 6446 xml_node_struct* d = _data(); 6447 if (d) return d; 6448 6449 return xml_node(_root).append_child(node_pcdata).internal_object(); 6450 } 6451 xml_text()6452 PUGI__FN xml_text::xml_text(): _root(0) 6453 { 6454 } 6455 unspecified_bool_xml_text(xml_text ***)6456 PUGI__FN static void unspecified_bool_xml_text(xml_text***) 6457 { 6458 } 6459 operator xml_text::unspecified_bool_type() const6460 PUGI__FN xml_text::operator xml_text::unspecified_bool_type() const 6461 { 6462 return _data() ? unspecified_bool_xml_text : 0; 6463 } 6464 operator !() const6465 PUGI__FN bool xml_text::operator!() const 6466 { 6467 return !_data(); 6468 } 6469 empty() const6470 PUGI__FN bool xml_text::empty() const 6471 { 6472 return _data() == 0; 6473 } 6474 get() const6475 PUGI__FN const char_t* xml_text::get() const 6476 { 6477 xml_node_struct* d = _data(); 6478 6479 return (d && d->value) ? d->value + 0 : PUGIXML_TEXT(""); 6480 } 6481 as_string(const char_t * def) const6482 PUGI__FN const char_t* xml_text::as_string(const char_t* def) const 6483 { 6484 xml_node_struct* d = _data(); 6485 6486 return (d && d->value) ? d->value + 0 : def; 6487 } 6488 as_int(int def) const6489 PUGI__FN int xml_text::as_int(int def) const 6490 { 6491 xml_node_struct* d = _data(); 6492 6493 return (d && d->value) ? impl::get_value_int(d->value) : def; 6494 } 6495 as_uint(unsigned int def) const6496 PUGI__FN unsigned int xml_text::as_uint(unsigned int def) const 6497 { 6498 xml_node_struct* d = _data(); 6499 6500 return (d && d->value) ? impl::get_value_uint(d->value) : def; 6501 } 6502 as_double(double def) const6503 PUGI__FN double xml_text::as_double(double def) const 6504 { 6505 xml_node_struct* d = _data(); 6506 6507 return (d && d->value) ? impl::get_value_double(d->value) : def; 6508 } 6509 as_float(float def) const6510 PUGI__FN float xml_text::as_float(float def) const 6511 { 6512 xml_node_struct* d = _data(); 6513 6514 return (d && d->value) ? impl::get_value_float(d->value) : def; 6515 } 6516 as_bool(bool def) const6517 PUGI__FN bool xml_text::as_bool(bool def) const 6518 { 6519 xml_node_struct* d = _data(); 6520 6521 return (d && d->value) ? impl::get_value_bool(d->value) : def; 6522 } 6523 6524 #ifdef PUGIXML_HAS_LONG_LONG as_llong(long long def) const6525 PUGI__FN long long xml_text::as_llong(long long def) const 6526 { 6527 xml_node_struct* d = _data(); 6528 6529 return (d && d->value) ? impl::get_value_llong(d->value) : def; 6530 } 6531 as_ullong(unsigned long long def) const6532 PUGI__FN unsigned long long xml_text::as_ullong(unsigned long long def) const 6533 { 6534 xml_node_struct* d = _data(); 6535 6536 return (d && d->value) ? impl::get_value_ullong(d->value) : def; 6537 } 6538 #endif 6539 set(const char_t * rhs)6540 PUGI__FN bool xml_text::set(const char_t* rhs) 6541 { 6542 xml_node_struct* dn = _data_new(); 6543 6544 return dn ? impl::strcpy_insitu(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, impl::strlength(rhs)) : false; 6545 } 6546 set(int rhs)6547 PUGI__FN bool xml_text::set(int rhs) 6548 { 6549 xml_node_struct* dn = _data_new(); 6550 6551 return dn ? impl::set_value_integer<unsigned int>(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0) : false; 6552 } 6553 set(unsigned int rhs)6554 PUGI__FN bool xml_text::set(unsigned int rhs) 6555 { 6556 xml_node_struct* dn = _data_new(); 6557 6558 return dn ? impl::set_value_integer<unsigned int>(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, false) : false; 6559 } 6560 set(long rhs)6561 PUGI__FN bool xml_text::set(long rhs) 6562 { 6563 xml_node_struct* dn = _data_new(); 6564 6565 return dn ? impl::set_value_integer<unsigned long>(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0) : false; 6566 } 6567 set(unsigned long rhs)6568 PUGI__FN bool xml_text::set(unsigned long rhs) 6569 { 6570 xml_node_struct* dn = _data_new(); 6571 6572 return dn ? impl::set_value_integer<unsigned long>(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, false) : false; 6573 } 6574 set(float rhs)6575 PUGI__FN bool xml_text::set(float rhs) 6576 { 6577 xml_node_struct* dn = _data_new(); 6578 6579 return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, default_float_precision) : false; 6580 } 6581 set(float rhs,int precision)6582 PUGI__FN bool xml_text::set(float rhs, int precision) 6583 { 6584 xml_node_struct* dn = _data_new(); 6585 6586 return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, precision) : false; 6587 } 6588 set(double rhs)6589 PUGI__FN bool xml_text::set(double rhs) 6590 { 6591 xml_node_struct* dn = _data_new(); 6592 6593 return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, default_double_precision) : false; 6594 } 6595 set(double rhs,int precision)6596 PUGI__FN bool xml_text::set(double rhs, int precision) 6597 { 6598 xml_node_struct* dn = _data_new(); 6599 6600 return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, precision) : false; 6601 } 6602 set(bool rhs)6603 PUGI__FN bool xml_text::set(bool rhs) 6604 { 6605 xml_node_struct* dn = _data_new(); 6606 6607 return dn ? impl::set_value_bool(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false; 6608 } 6609 6610 #ifdef PUGIXML_HAS_LONG_LONG set(long long rhs)6611 PUGI__FN bool xml_text::set(long long rhs) 6612 { 6613 xml_node_struct* dn = _data_new(); 6614 6615 return dn ? impl::set_value_integer<unsigned long long>(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0) : false; 6616 } 6617 set(unsigned long long rhs)6618 PUGI__FN bool xml_text::set(unsigned long long rhs) 6619 { 6620 xml_node_struct* dn = _data_new(); 6621 6622 return dn ? impl::set_value_integer<unsigned long long>(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, false) : false; 6623 } 6624 #endif 6625 operator =(const char_t * rhs)6626 PUGI__FN xml_text& xml_text::operator=(const char_t* rhs) 6627 { 6628 set(rhs); 6629 return *this; 6630 } 6631 operator =(int rhs)6632 PUGI__FN xml_text& xml_text::operator=(int rhs) 6633 { 6634 set(rhs); 6635 return *this; 6636 } 6637 operator =(unsigned int rhs)6638 PUGI__FN xml_text& xml_text::operator=(unsigned int rhs) 6639 { 6640 set(rhs); 6641 return *this; 6642 } 6643 operator =(long rhs)6644 PUGI__FN xml_text& xml_text::operator=(long rhs) 6645 { 6646 set(rhs); 6647 return *this; 6648 } 6649 operator =(unsigned long rhs)6650 PUGI__FN xml_text& xml_text::operator=(unsigned long rhs) 6651 { 6652 set(rhs); 6653 return *this; 6654 } 6655 operator =(double rhs)6656 PUGI__FN xml_text& xml_text::operator=(double rhs) 6657 { 6658 set(rhs); 6659 return *this; 6660 } 6661 operator =(float rhs)6662 PUGI__FN xml_text& xml_text::operator=(float rhs) 6663 { 6664 set(rhs); 6665 return *this; 6666 } 6667 operator =(bool rhs)6668 PUGI__FN xml_text& xml_text::operator=(bool rhs) 6669 { 6670 set(rhs); 6671 return *this; 6672 } 6673 6674 #ifdef PUGIXML_HAS_LONG_LONG operator =(long long rhs)6675 PUGI__FN xml_text& xml_text::operator=(long long rhs) 6676 { 6677 set(rhs); 6678 return *this; 6679 } 6680 operator =(unsigned long long rhs)6681 PUGI__FN xml_text& xml_text::operator=(unsigned long long rhs) 6682 { 6683 set(rhs); 6684 return *this; 6685 } 6686 #endif 6687 data() const6688 PUGI__FN xml_node xml_text::data() const 6689 { 6690 return xml_node(_data()); 6691 } 6692 6693 #ifdef __BORLANDC__ operator &&(const xml_text & lhs,bool rhs)6694 PUGI__FN bool operator&&(const xml_text& lhs, bool rhs) 6695 { 6696 return (bool)lhs && rhs; 6697 } 6698 operator ||(const xml_text & lhs,bool rhs)6699 PUGI__FN bool operator||(const xml_text& lhs, bool rhs) 6700 { 6701 return (bool)lhs || rhs; 6702 } 6703 #endif 6704 xml_node_iterator()6705 PUGI__FN xml_node_iterator::xml_node_iterator() 6706 { 6707 } 6708 xml_node_iterator(const xml_node & node)6709 PUGI__FN xml_node_iterator::xml_node_iterator(const xml_node& node): _wrap(node), _parent(node.parent()) 6710 { 6711 } 6712 xml_node_iterator(xml_node_struct * ref,xml_node_struct * parent)6713 PUGI__FN xml_node_iterator::xml_node_iterator(xml_node_struct* ref, xml_node_struct* parent): _wrap(ref), _parent(parent) 6714 { 6715 } 6716 operator ==(const xml_node_iterator & rhs) const6717 PUGI__FN bool xml_node_iterator::operator==(const xml_node_iterator& rhs) const 6718 { 6719 return _wrap._root == rhs._wrap._root && _parent._root == rhs._parent._root; 6720 } 6721 operator !=(const xml_node_iterator & rhs) const6722 PUGI__FN bool xml_node_iterator::operator!=(const xml_node_iterator& rhs) const 6723 { 6724 return _wrap._root != rhs._wrap._root || _parent._root != rhs._parent._root; 6725 } 6726 operator *() const6727 PUGI__FN xml_node& xml_node_iterator::operator*() const 6728 { 6729 assert(_wrap._root); 6730 return _wrap; 6731 } 6732 operator ->() const6733 PUGI__FN xml_node* xml_node_iterator::operator->() const 6734 { 6735 assert(_wrap._root); 6736 return const_cast<xml_node*>(&_wrap); // BCC5 workaround 6737 } 6738 operator ++()6739 PUGI__FN xml_node_iterator& xml_node_iterator::operator++() 6740 { 6741 assert(_wrap._root); 6742 _wrap._root = _wrap._root->next_sibling; 6743 return *this; 6744 } 6745 operator ++(int)6746 PUGI__FN xml_node_iterator xml_node_iterator::operator++(int) 6747 { 6748 xml_node_iterator temp = *this; 6749 ++*this; 6750 return temp; 6751 } 6752 operator --()6753 PUGI__FN xml_node_iterator& xml_node_iterator::operator--() 6754 { 6755 _wrap = _wrap._root ? _wrap.previous_sibling() : _parent.last_child(); 6756 return *this; 6757 } 6758 operator --(int)6759 PUGI__FN xml_node_iterator xml_node_iterator::operator--(int) 6760 { 6761 xml_node_iterator temp = *this; 6762 --*this; 6763 return temp; 6764 } 6765 xml_attribute_iterator()6766 PUGI__FN xml_attribute_iterator::xml_attribute_iterator() 6767 { 6768 } 6769 xml_attribute_iterator(const xml_attribute & attr,const xml_node & parent)6770 PUGI__FN xml_attribute_iterator::xml_attribute_iterator(const xml_attribute& attr, const xml_node& parent): _wrap(attr), _parent(parent) 6771 { 6772 } 6773 xml_attribute_iterator(xml_attribute_struct * ref,xml_node_struct * parent)6774 PUGI__FN xml_attribute_iterator::xml_attribute_iterator(xml_attribute_struct* ref, xml_node_struct* parent): _wrap(ref), _parent(parent) 6775 { 6776 } 6777 operator ==(const xml_attribute_iterator & rhs) const6778 PUGI__FN bool xml_attribute_iterator::operator==(const xml_attribute_iterator& rhs) const 6779 { 6780 return _wrap._attr == rhs._wrap._attr && _parent._root == rhs._parent._root; 6781 } 6782 operator !=(const xml_attribute_iterator & rhs) const6783 PUGI__FN bool xml_attribute_iterator::operator!=(const xml_attribute_iterator& rhs) const 6784 { 6785 return _wrap._attr != rhs._wrap._attr || _parent._root != rhs._parent._root; 6786 } 6787 operator *() const6788 PUGI__FN xml_attribute& xml_attribute_iterator::operator*() const 6789 { 6790 assert(_wrap._attr); 6791 return _wrap; 6792 } 6793 operator ->() const6794 PUGI__FN xml_attribute* xml_attribute_iterator::operator->() const 6795 { 6796 assert(_wrap._attr); 6797 return const_cast<xml_attribute*>(&_wrap); // BCC5 workaround 6798 } 6799 operator ++()6800 PUGI__FN xml_attribute_iterator& xml_attribute_iterator::operator++() 6801 { 6802 assert(_wrap._attr); 6803 _wrap._attr = _wrap._attr->next_attribute; 6804 return *this; 6805 } 6806 operator ++(int)6807 PUGI__FN xml_attribute_iterator xml_attribute_iterator::operator++(int) 6808 { 6809 xml_attribute_iterator temp = *this; 6810 ++*this; 6811 return temp; 6812 } 6813 operator --()6814 PUGI__FN xml_attribute_iterator& xml_attribute_iterator::operator--() 6815 { 6816 _wrap = _wrap._attr ? _wrap.previous_attribute() : _parent.last_attribute(); 6817 return *this; 6818 } 6819 operator --(int)6820 PUGI__FN xml_attribute_iterator xml_attribute_iterator::operator--(int) 6821 { 6822 xml_attribute_iterator temp = *this; 6823 --*this; 6824 return temp; 6825 } 6826 xml_named_node_iterator()6827 PUGI__FN xml_named_node_iterator::xml_named_node_iterator(): _name(0) 6828 { 6829 } 6830 xml_named_node_iterator(const xml_node & node,const char_t * name)6831 PUGI__FN xml_named_node_iterator::xml_named_node_iterator(const xml_node& node, const char_t* name): _wrap(node), _parent(node.parent()), _name(name) 6832 { 6833 } 6834 xml_named_node_iterator(xml_node_struct * ref,xml_node_struct * parent,const char_t * name)6835 PUGI__FN xml_named_node_iterator::xml_named_node_iterator(xml_node_struct* ref, xml_node_struct* parent, const char_t* name): _wrap(ref), _parent(parent), _name(name) 6836 { 6837 } 6838 operator ==(const xml_named_node_iterator & rhs) const6839 PUGI__FN bool xml_named_node_iterator::operator==(const xml_named_node_iterator& rhs) const 6840 { 6841 return _wrap._root == rhs._wrap._root && _parent._root == rhs._parent._root; 6842 } 6843 operator !=(const xml_named_node_iterator & rhs) const6844 PUGI__FN bool xml_named_node_iterator::operator!=(const xml_named_node_iterator& rhs) const 6845 { 6846 return _wrap._root != rhs._wrap._root || _parent._root != rhs._parent._root; 6847 } 6848 operator *() const6849 PUGI__FN xml_node& xml_named_node_iterator::operator*() const 6850 { 6851 assert(_wrap._root); 6852 return _wrap; 6853 } 6854 operator ->() const6855 PUGI__FN xml_node* xml_named_node_iterator::operator->() const 6856 { 6857 assert(_wrap._root); 6858 return const_cast<xml_node*>(&_wrap); // BCC5 workaround 6859 } 6860 operator ++()6861 PUGI__FN xml_named_node_iterator& xml_named_node_iterator::operator++() 6862 { 6863 assert(_wrap._root); 6864 _wrap = _wrap.next_sibling(_name); 6865 return *this; 6866 } 6867 operator ++(int)6868 PUGI__FN xml_named_node_iterator xml_named_node_iterator::operator++(int) 6869 { 6870 xml_named_node_iterator temp = *this; 6871 ++*this; 6872 return temp; 6873 } 6874 operator --()6875 PUGI__FN xml_named_node_iterator& xml_named_node_iterator::operator--() 6876 { 6877 if (_wrap._root) 6878 _wrap = _wrap.previous_sibling(_name); 6879 else 6880 { 6881 _wrap = _parent.last_child(); 6882 6883 if (!impl::strequal(_wrap.name(), _name)) 6884 _wrap = _wrap.previous_sibling(_name); 6885 } 6886 6887 return *this; 6888 } 6889 operator --(int)6890 PUGI__FN xml_named_node_iterator xml_named_node_iterator::operator--(int) 6891 { 6892 xml_named_node_iterator temp = *this; 6893 --*this; 6894 return temp; 6895 } 6896 xml_parse_result()6897 PUGI__FN xml_parse_result::xml_parse_result(): status(status_internal_error), offset(0), encoding(encoding_auto) 6898 { 6899 } 6900 operator bool() const6901 PUGI__FN xml_parse_result::operator bool() const 6902 { 6903 return status == status_ok; 6904 } 6905 description() const6906 PUGI__FN const char* xml_parse_result::description() const 6907 { 6908 switch (status) 6909 { 6910 case status_ok: return "No error"; 6911 6912 case status_file_not_found: return "File was not found"; 6913 case status_io_error: return "Error reading from file/stream"; 6914 case status_out_of_memory: return "Could not allocate memory"; 6915 case status_internal_error: return "Internal error occurred"; 6916 6917 case status_unrecognized_tag: return "Could not determine tag type"; 6918 6919 case status_bad_pi: return "Error parsing document declaration/processing instruction"; 6920 case status_bad_comment: return "Error parsing comment"; 6921 case status_bad_cdata: return "Error parsing CDATA section"; 6922 case status_bad_doctype: return "Error parsing document type declaration"; 6923 case status_bad_pcdata: return "Error parsing PCDATA section"; 6924 case status_bad_start_element: return "Error parsing start element tag"; 6925 case status_bad_attribute: return "Error parsing element attribute"; 6926 case status_bad_end_element: return "Error parsing end element tag"; 6927 case status_end_element_mismatch: return "Start-end tags mismatch"; 6928 6929 case status_append_invalid_root: return "Unable to append nodes: root is not an element or document"; 6930 6931 case status_no_document_element: return "No document element found"; 6932 6933 default: return "Unknown error"; 6934 } 6935 } 6936 xml_document()6937 PUGI__FN xml_document::xml_document(): _buffer(0) 6938 { 6939 _create(); 6940 } 6941 ~xml_document()6942 PUGI__FN xml_document::~xml_document() 6943 { 6944 _destroy(); 6945 } 6946 6947 #ifdef PUGIXML_HAS_MOVE xml_document(xml_document && rhs)6948 PUGI__FN xml_document::xml_document(xml_document&& rhs) PUGIXML_NOEXCEPT_IF_NOT_COMPACT: _buffer(0) 6949 { 6950 _create(); 6951 _move(rhs); 6952 } 6953 operator =(xml_document && rhs)6954 PUGI__FN xml_document& xml_document::operator=(xml_document&& rhs) PUGIXML_NOEXCEPT_IF_NOT_COMPACT 6955 { 6956 if (this == &rhs) return *this; 6957 6958 _destroy(); 6959 _create(); 6960 _move(rhs); 6961 6962 return *this; 6963 } 6964 #endif 6965 reset()6966 PUGI__FN void xml_document::reset() 6967 { 6968 _destroy(); 6969 _create(); 6970 } 6971 reset(const xml_document & proto)6972 PUGI__FN void xml_document::reset(const xml_document& proto) 6973 { 6974 reset(); 6975 6976 impl::node_copy_tree(_root, proto._root); 6977 } 6978 _create()6979 PUGI__FN void xml_document::_create() 6980 { 6981 assert(!_root); 6982 6983 #ifdef PUGIXML_COMPACT 6984 // space for page marker for the first page (uint32_t), rounded up to pointer size; assumes pointers are at least 32-bit 6985 const size_t page_offset = sizeof(void*); 6986 #else 6987 const size_t page_offset = 0; 6988 #endif 6989 6990 // initialize sentinel page 6991 PUGI__STATIC_ASSERT(sizeof(impl::xml_memory_page) + sizeof(impl::xml_document_struct) + page_offset <= sizeof(_memory)); 6992 6993 // prepare page structure 6994 impl::xml_memory_page* page = impl::xml_memory_page::construct(_memory); 6995 assert(page); 6996 6997 page->busy_size = impl::xml_memory_page_size; 6998 6999 // setup first page marker 7000 #ifdef PUGIXML_COMPACT 7001 // round-trip through void* to avoid 'cast increases required alignment of target type' warning 7002 page->compact_page_marker = reinterpret_cast<uint32_t*>(static_cast<void*>(reinterpret_cast<char*>(page) + sizeof(impl::xml_memory_page))); 7003 *page->compact_page_marker = sizeof(impl::xml_memory_page); 7004 #endif 7005 7006 // allocate new root 7007 _root = new (reinterpret_cast<char*>(page) + sizeof(impl::xml_memory_page) + page_offset) impl::xml_document_struct(page); 7008 _root->prev_sibling_c = _root; 7009 7010 // setup sentinel page 7011 page->allocator = static_cast<impl::xml_document_struct*>(_root); 7012 7013 // setup hash table pointer in allocator 7014 #ifdef PUGIXML_COMPACT 7015 page->allocator->_hash = &static_cast<impl::xml_document_struct*>(_root)->hash; 7016 #endif 7017 7018 // verify the document allocation 7019 assert(reinterpret_cast<char*>(_root) + sizeof(impl::xml_document_struct) <= _memory + sizeof(_memory)); 7020 } 7021 _destroy()7022 PUGI__FN void xml_document::_destroy() 7023 { 7024 assert(_root); 7025 7026 // destroy static storage 7027 if (_buffer) 7028 { 7029 impl::xml_memory::deallocate(_buffer); 7030 _buffer = 0; 7031 } 7032 7033 // destroy extra buffers (note: no need to destroy linked list nodes, they're allocated using document allocator) 7034 for (impl::xml_extra_buffer* extra = static_cast<impl::xml_document_struct*>(_root)->extra_buffers; extra; extra = extra->next) 7035 { 7036 if (extra->buffer) impl::xml_memory::deallocate(extra->buffer); 7037 } 7038 7039 // destroy dynamic storage, leave sentinel page (it's in static memory) 7040 impl::xml_memory_page* root_page = PUGI__GETPAGE(_root); 7041 assert(root_page && !root_page->prev); 7042 assert(reinterpret_cast<char*>(root_page) >= _memory && reinterpret_cast<char*>(root_page) < _memory + sizeof(_memory)); 7043 7044 for (impl::xml_memory_page* page = root_page->next; page; ) 7045 { 7046 impl::xml_memory_page* next = page->next; 7047 7048 impl::xml_allocator::deallocate_page(page); 7049 7050 page = next; 7051 } 7052 7053 #ifdef PUGIXML_COMPACT 7054 // destroy hash table 7055 static_cast<impl::xml_document_struct*>(_root)->hash.clear(); 7056 #endif 7057 7058 _root = 0; 7059 } 7060 7061 #ifdef PUGIXML_HAS_MOVE _move(xml_document & rhs)7062 PUGI__FN void xml_document::_move(xml_document& rhs) PUGIXML_NOEXCEPT_IF_NOT_COMPACT 7063 { 7064 impl::xml_document_struct* doc = static_cast<impl::xml_document_struct*>(_root); 7065 impl::xml_document_struct* other = static_cast<impl::xml_document_struct*>(rhs._root); 7066 7067 // save first child pointer for later; this needs hash access 7068 xml_node_struct* other_first_child = other->first_child; 7069 7070 #ifdef PUGIXML_COMPACT 7071 // reserve space for the hash table up front; this is the only operation that can fail 7072 // if it does, we have no choice but to throw (if we have exceptions) 7073 if (other_first_child) 7074 { 7075 size_t other_children = 0; 7076 for (xml_node_struct* node = other_first_child; node; node = node->next_sibling) 7077 other_children++; 7078 7079 // in compact mode, each pointer assignment could result in a hash table request 7080 // during move, we have to relocate document first_child and parents of all children 7081 // normally there's just one child and its parent has a pointerless encoding but 7082 // we assume the worst here 7083 if (!other->_hash->reserve(other_children + 1)) 7084 { 7085 #ifdef PUGIXML_NO_EXCEPTIONS 7086 return; 7087 #else 7088 throw std::bad_alloc(); 7089 #endif 7090 } 7091 } 7092 #endif 7093 7094 // move allocation state 7095 // note that other->_root may point to the embedded document page, in which case we should keep original (empty) state 7096 if (other->_root != PUGI__GETPAGE(other)) 7097 { 7098 doc->_root = other->_root; 7099 doc->_busy_size = other->_busy_size; 7100 } 7101 7102 // move buffer state 7103 doc->buffer = other->buffer; 7104 doc->extra_buffers = other->extra_buffers; 7105 _buffer = rhs._buffer; 7106 7107 #ifdef PUGIXML_COMPACT 7108 // move compact hash; note that the hash table can have pointers to other but they will be "inactive", similarly to nodes removed with remove_child 7109 doc->hash = other->hash; 7110 doc->_hash = &doc->hash; 7111 7112 // make sure we don't access other hash up until the end when we reinitialize other document 7113 other->_hash = 0; 7114 #endif 7115 7116 // move page structure 7117 impl::xml_memory_page* doc_page = PUGI__GETPAGE(doc); 7118 assert(doc_page && !doc_page->prev && !doc_page->next); 7119 7120 impl::xml_memory_page* other_page = PUGI__GETPAGE(other); 7121 assert(other_page && !other_page->prev); 7122 7123 // relink pages since root page is embedded into xml_document 7124 if (impl::xml_memory_page* page = other_page->next) 7125 { 7126 assert(page->prev == other_page); 7127 7128 page->prev = doc_page; 7129 7130 doc_page->next = page; 7131 other_page->next = 0; 7132 } 7133 7134 // make sure pages point to the correct document state 7135 for (impl::xml_memory_page* page = doc_page->next; page; page = page->next) 7136 { 7137 assert(page->allocator == other); 7138 7139 page->allocator = doc; 7140 7141 #ifdef PUGIXML_COMPACT 7142 // this automatically migrates most children between documents and prevents ->parent assignment from allocating 7143 if (page->compact_shared_parent == other) 7144 page->compact_shared_parent = doc; 7145 #endif 7146 } 7147 7148 // move tree structure 7149 assert(!doc->first_child); 7150 7151 doc->first_child = other_first_child; 7152 7153 for (xml_node_struct* node = other_first_child; node; node = node->next_sibling) 7154 { 7155 #ifdef PUGIXML_COMPACT 7156 // most children will have migrated when we reassigned compact_shared_parent 7157 assert(node->parent == other || node->parent == doc); 7158 7159 node->parent = doc; 7160 #else 7161 assert(node->parent == other); 7162 node->parent = doc; 7163 #endif 7164 } 7165 7166 // reset other document 7167 new (other) impl::xml_document_struct(PUGI__GETPAGE(other)); 7168 rhs._buffer = 0; 7169 } 7170 #endif 7171 7172 #ifndef PUGIXML_NO_STL load(std::basic_istream<char,std::char_traits<char>> & stream,unsigned int options,xml_encoding encoding)7173 PUGI__FN xml_parse_result xml_document::load(std::basic_istream<char, std::char_traits<char> >& stream, unsigned int options, xml_encoding encoding) 7174 { 7175 reset(); 7176 7177 return impl::load_stream_impl(static_cast<impl::xml_document_struct*>(_root), stream, options, encoding, &_buffer); 7178 } 7179 load(std::basic_istream<wchar_t,std::char_traits<wchar_t>> & stream,unsigned int options)7180 PUGI__FN xml_parse_result xml_document::load(std::basic_istream<wchar_t, std::char_traits<wchar_t> >& stream, unsigned int options) 7181 { 7182 reset(); 7183 7184 return impl::load_stream_impl(static_cast<impl::xml_document_struct*>(_root), stream, options, encoding_wchar, &_buffer); 7185 } 7186 #endif 7187 load_string(const char_t * contents,unsigned int options)7188 PUGI__FN xml_parse_result xml_document::load_string(const char_t* contents, unsigned int options) 7189 { 7190 // Force native encoding (skip autodetection) 7191 #ifdef PUGIXML_WCHAR_MODE 7192 xml_encoding encoding = encoding_wchar; 7193 #else 7194 xml_encoding encoding = encoding_utf8; 7195 #endif 7196 7197 return load_buffer(contents, impl::strlength(contents) * sizeof(char_t), options, encoding); 7198 } 7199 load(const char_t * contents,unsigned int options)7200 PUGI__FN xml_parse_result xml_document::load(const char_t* contents, unsigned int options) 7201 { 7202 return load_string(contents, options); 7203 } 7204 load_file(const char * path_,unsigned int options,xml_encoding encoding)7205 PUGI__FN xml_parse_result xml_document::load_file(const char* path_, unsigned int options, xml_encoding encoding) 7206 { 7207 reset(); 7208 7209 using impl::auto_deleter; // MSVC7 workaround 7210 auto_deleter<FILE> file(impl::open_file(path_, "rb"), impl::close_file); 7211 7212 return impl::load_file_impl(static_cast<impl::xml_document_struct*>(_root), file.data, options, encoding, &_buffer); 7213 } 7214 load_file(const wchar_t * path_,unsigned int options,xml_encoding encoding)7215 PUGI__FN xml_parse_result xml_document::load_file(const wchar_t* path_, unsigned int options, xml_encoding encoding) 7216 { 7217 reset(); 7218 7219 using impl::auto_deleter; // MSVC7 workaround 7220 auto_deleter<FILE> file(impl::open_file_wide(path_, L"rb"), impl::close_file); 7221 7222 return impl::load_file_impl(static_cast<impl::xml_document_struct*>(_root), file.data, options, encoding, &_buffer); 7223 } 7224 load_buffer(const void * contents,size_t size,unsigned int options,xml_encoding encoding)7225 PUGI__FN xml_parse_result xml_document::load_buffer(const void* contents, size_t size, unsigned int options, xml_encoding encoding) 7226 { 7227 reset(); 7228 7229 return impl::load_buffer_impl(static_cast<impl::xml_document_struct*>(_root), _root, const_cast<void*>(contents), size, options, encoding, false, false, &_buffer); 7230 } 7231 load_buffer_inplace(void * contents,size_t size,unsigned int options,xml_encoding encoding)7232 PUGI__FN xml_parse_result xml_document::load_buffer_inplace(void* contents, size_t size, unsigned int options, xml_encoding encoding) 7233 { 7234 reset(); 7235 7236 return impl::load_buffer_impl(static_cast<impl::xml_document_struct*>(_root), _root, contents, size, options, encoding, true, false, &_buffer); 7237 } 7238 load_buffer_inplace_own(void * contents,size_t size,unsigned int options,xml_encoding encoding)7239 PUGI__FN xml_parse_result xml_document::load_buffer_inplace_own(void* contents, size_t size, unsigned int options, xml_encoding encoding) 7240 { 7241 reset(); 7242 7243 return impl::load_buffer_impl(static_cast<impl::xml_document_struct*>(_root), _root, contents, size, options, encoding, true, true, &_buffer); 7244 } 7245 save(xml_writer & writer,const char_t * indent,unsigned int flags,xml_encoding encoding) const7246 PUGI__FN void xml_document::save(xml_writer& writer, const char_t* indent, unsigned int flags, xml_encoding encoding) const 7247 { 7248 impl::xml_buffered_writer buffered_writer(writer, encoding); 7249 7250 if ((flags & format_write_bom) && encoding != encoding_latin1) 7251 { 7252 // BOM always represents the codepoint U+FEFF, so just write it in native encoding 7253 #ifdef PUGIXML_WCHAR_MODE 7254 unsigned int bom = 0xfeff; 7255 buffered_writer.write(static_cast<wchar_t>(bom)); 7256 #else 7257 buffered_writer.write('\xef', '\xbb', '\xbf'); 7258 #endif 7259 } 7260 7261 if (!(flags & format_no_declaration) && !impl::has_declaration(_root)) 7262 { 7263 buffered_writer.write_string(PUGIXML_TEXT("<?xml version=\"1.0\"")); 7264 if (encoding == encoding_latin1) buffered_writer.write_string(PUGIXML_TEXT(" encoding=\"ISO-8859-1\"")); 7265 buffered_writer.write('?', '>'); 7266 if (!(flags & format_raw)) buffered_writer.write('\n'); 7267 } 7268 7269 impl::node_output(buffered_writer, _root, indent, flags, 0); 7270 7271 buffered_writer.flush(); 7272 } 7273 7274 #ifndef PUGIXML_NO_STL save(std::basic_ostream<char,std::char_traits<char>> & stream,const char_t * indent,unsigned int flags,xml_encoding encoding) const7275 PUGI__FN void xml_document::save(std::basic_ostream<char, std::char_traits<char> >& stream, const char_t* indent, unsigned int flags, xml_encoding encoding) const 7276 { 7277 xml_writer_stream writer(stream); 7278 7279 save(writer, indent, flags, encoding); 7280 } 7281 save(std::basic_ostream<wchar_t,std::char_traits<wchar_t>> & stream,const char_t * indent,unsigned int flags) const7282 PUGI__FN void xml_document::save(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream, const char_t* indent, unsigned int flags) const 7283 { 7284 xml_writer_stream writer(stream); 7285 7286 save(writer, indent, flags, encoding_wchar); 7287 } 7288 #endif 7289 save_file(const char * path_,const char_t * indent,unsigned int flags,xml_encoding encoding) const7290 PUGI__FN bool xml_document::save_file(const char* path_, const char_t* indent, unsigned int flags, xml_encoding encoding) const 7291 { 7292 using impl::auto_deleter; // MSVC7 workaround 7293 auto_deleter<FILE> file(impl::open_file(path_, (flags & format_save_file_text) ? "w" : "wb"), impl::close_file); 7294 7295 return impl::save_file_impl(*this, file.data, indent, flags, encoding); 7296 } 7297 save_file(const wchar_t * path_,const char_t * indent,unsigned int flags,xml_encoding encoding) const7298 PUGI__FN bool xml_document::save_file(const wchar_t* path_, const char_t* indent, unsigned int flags, xml_encoding encoding) const 7299 { 7300 using impl::auto_deleter; // MSVC7 workaround 7301 auto_deleter<FILE> file(impl::open_file_wide(path_, (flags & format_save_file_text) ? L"w" : L"wb"), impl::close_file); 7302 7303 return impl::save_file_impl(*this, file.data, indent, flags, encoding); 7304 } 7305 document_element() const7306 PUGI__FN xml_node xml_document::document_element() const 7307 { 7308 assert(_root); 7309 7310 for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling) 7311 if (PUGI__NODETYPE(i) == node_element) 7312 return xml_node(i); 7313 7314 return xml_node(); 7315 } 7316 7317 #ifndef PUGIXML_NO_STL as_utf8(const wchar_t * str)7318 PUGI__FN std::string PUGIXML_FUNCTION as_utf8(const wchar_t* str) 7319 { 7320 assert(str); 7321 7322 return impl::as_utf8_impl(str, impl::strlength_wide(str)); 7323 } 7324 as_utf8(const std::basic_string<wchar_t> & str)7325 PUGI__FN std::string PUGIXML_FUNCTION as_utf8(const std::basic_string<wchar_t>& str) 7326 { 7327 return impl::as_utf8_impl(str.c_str(), str.size()); 7328 } 7329 as_wide(const char * str)7330 PUGI__FN std::basic_string<wchar_t> PUGIXML_FUNCTION as_wide(const char* str) 7331 { 7332 assert(str); 7333 7334 return impl::as_wide_impl(str, strlen(str)); 7335 } 7336 as_wide(const std::string & str)7337 PUGI__FN std::basic_string<wchar_t> PUGIXML_FUNCTION as_wide(const std::string& str) 7338 { 7339 return impl::as_wide_impl(str.c_str(), str.size()); 7340 } 7341 #endif 7342 set_memory_management_functions(allocation_function allocate,deallocation_function deallocate)7343 PUGI__FN void PUGIXML_FUNCTION set_memory_management_functions(allocation_function allocate, deallocation_function deallocate) 7344 { 7345 impl::xml_memory::allocate = allocate; 7346 impl::xml_memory::deallocate = deallocate; 7347 } 7348 get_memory_allocation_function()7349 PUGI__FN allocation_function PUGIXML_FUNCTION get_memory_allocation_function() 7350 { 7351 return impl::xml_memory::allocate; 7352 } 7353 get_memory_deallocation_function()7354 PUGI__FN deallocation_function PUGIXML_FUNCTION get_memory_deallocation_function() 7355 { 7356 return impl::xml_memory::deallocate; 7357 } 7358 } 7359 7360 #if !defined(PUGIXML_NO_STL) && (defined(_MSC_VER) || defined(__ICC)) 7361 namespace std 7362 { 7363 // Workarounds for (non-standard) iterator category detection for older versions (MSVC7/IC8 and earlier) _Iter_cat(const pugi::xml_node_iterator &)7364 PUGI__FN std::bidirectional_iterator_tag _Iter_cat(const pugi::xml_node_iterator&) 7365 { 7366 return std::bidirectional_iterator_tag(); 7367 } 7368 _Iter_cat(const pugi::xml_attribute_iterator &)7369 PUGI__FN std::bidirectional_iterator_tag _Iter_cat(const pugi::xml_attribute_iterator&) 7370 { 7371 return std::bidirectional_iterator_tag(); 7372 } 7373 _Iter_cat(const pugi::xml_named_node_iterator &)7374 PUGI__FN std::bidirectional_iterator_tag _Iter_cat(const pugi::xml_named_node_iterator&) 7375 { 7376 return std::bidirectional_iterator_tag(); 7377 } 7378 } 7379 #endif 7380 7381 #if !defined(PUGIXML_NO_STL) && defined(__SUNPRO_CC) 7382 namespace std 7383 { 7384 // Workarounds for (non-standard) iterator category detection __iterator_category(const pugi::xml_node_iterator &)7385 PUGI__FN std::bidirectional_iterator_tag __iterator_category(const pugi::xml_node_iterator&) 7386 { 7387 return std::bidirectional_iterator_tag(); 7388 } 7389 __iterator_category(const pugi::xml_attribute_iterator &)7390 PUGI__FN std::bidirectional_iterator_tag __iterator_category(const pugi::xml_attribute_iterator&) 7391 { 7392 return std::bidirectional_iterator_tag(); 7393 } 7394 __iterator_category(const pugi::xml_named_node_iterator &)7395 PUGI__FN std::bidirectional_iterator_tag __iterator_category(const pugi::xml_named_node_iterator&) 7396 { 7397 return std::bidirectional_iterator_tag(); 7398 } 7399 } 7400 #endif 7401 7402 #ifndef PUGIXML_NO_XPATH 7403 // STL replacements 7404 PUGI__NS_BEGIN 7405 struct equal_to 7406 { operator ()equal_to7407 template <typename T> bool operator()(const T& lhs, const T& rhs) const 7408 { 7409 return lhs == rhs; 7410 } 7411 }; 7412 7413 struct not_equal_to 7414 { operator ()not_equal_to7415 template <typename T> bool operator()(const T& lhs, const T& rhs) const 7416 { 7417 return lhs != rhs; 7418 } 7419 }; 7420 7421 struct less 7422 { operator ()less7423 template <typename T> bool operator()(const T& lhs, const T& rhs) const 7424 { 7425 return lhs < rhs; 7426 } 7427 }; 7428 7429 struct less_equal 7430 { operator ()less_equal7431 template <typename T> bool operator()(const T& lhs, const T& rhs) const 7432 { 7433 return lhs <= rhs; 7434 } 7435 }; 7436 swap(T & lhs,T & rhs)7437 template <typename T> inline void swap(T& lhs, T& rhs) 7438 { 7439 T temp = lhs; 7440 lhs = rhs; 7441 rhs = temp; 7442 } 7443 min_element(I begin,I end,const Pred & pred)7444 template <typename I, typename Pred> PUGI__FN I min_element(I begin, I end, const Pred& pred) 7445 { 7446 I result = begin; 7447 7448 for (I it = begin + 1; it != end; ++it) 7449 if (pred(*it, *result)) 7450 result = it; 7451 7452 return result; 7453 } 7454 reverse(I begin,I end)7455 template <typename I> PUGI__FN void reverse(I begin, I end) 7456 { 7457 while (end - begin > 1) 7458 swap(*begin++, *--end); 7459 } 7460 unique(I begin,I end)7461 template <typename I> PUGI__FN I unique(I begin, I end) 7462 { 7463 // fast skip head 7464 while (end - begin > 1 && *begin != *(begin + 1)) 7465 begin++; 7466 7467 if (begin == end) 7468 return begin; 7469 7470 // last written element 7471 I write = begin++; 7472 7473 // merge unique elements 7474 while (begin != end) 7475 { 7476 if (*begin != *write) 7477 *++write = *begin++; 7478 else 7479 begin++; 7480 } 7481 7482 // past-the-end (write points to live element) 7483 return write + 1; 7484 } 7485 insertion_sort(T * begin,T * end,const Pred & pred)7486 template <typename T, typename Pred> PUGI__FN void insertion_sort(T* begin, T* end, const Pred& pred) 7487 { 7488 if (begin == end) 7489 return; 7490 7491 for (T* it = begin + 1; it != end; ++it) 7492 { 7493 T val = *it; 7494 T* hole = it; 7495 7496 // move hole backwards 7497 while (hole > begin && pred(val, *(hole - 1))) 7498 { 7499 *hole = *(hole - 1); 7500 hole--; 7501 } 7502 7503 // fill hole with element 7504 *hole = val; 7505 } 7506 } 7507 median3(I first,I middle,I last,const Pred & pred)7508 template <typename I, typename Pred> inline I median3(I first, I middle, I last, const Pred& pred) 7509 { 7510 if (pred(*middle, *first)) 7511 swap(middle, first); 7512 if (pred(*last, *middle)) 7513 swap(last, middle); 7514 if (pred(*middle, *first)) 7515 swap(middle, first); 7516 7517 return middle; 7518 } 7519 partition3(T * begin,T * end,T pivot,const Pred & pred,T ** out_eqbeg,T ** out_eqend)7520 template <typename T, typename Pred> PUGI__FN void partition3(T* begin, T* end, T pivot, const Pred& pred, T** out_eqbeg, T** out_eqend) 7521 { 7522 // invariant: array is split into 4 groups: = < ? > (each variable denotes the boundary between the groups) 7523 T* eq = begin; 7524 T* lt = begin; 7525 T* gt = end; 7526 7527 while (lt < gt) 7528 { 7529 if (pred(*lt, pivot)) 7530 lt++; 7531 else if (*lt == pivot) 7532 swap(*eq++, *lt++); 7533 else 7534 swap(*lt, *--gt); 7535 } 7536 7537 // we now have just 4 groups: = < >; move equal elements to the middle 7538 T* eqbeg = gt; 7539 7540 for (T* it = begin; it != eq; ++it) 7541 swap(*it, *--eqbeg); 7542 7543 *out_eqbeg = eqbeg; 7544 *out_eqend = gt; 7545 } 7546 sort(I begin,I end,const Pred & pred)7547 template <typename I, typename Pred> PUGI__FN void sort(I begin, I end, const Pred& pred) 7548 { 7549 // sort large chunks 7550 while (end - begin > 16) 7551 { 7552 // find median element 7553 I middle = begin + (end - begin) / 2; 7554 I median = median3(begin, middle, end - 1, pred); 7555 7556 // partition in three chunks (< = >) 7557 I eqbeg, eqend; 7558 partition3(begin, end, *median, pred, &eqbeg, &eqend); 7559 7560 // loop on larger half 7561 if (eqbeg - begin > end - eqend) 7562 { 7563 sort(eqend, end, pred); 7564 end = eqbeg; 7565 } 7566 else 7567 { 7568 sort(begin, eqbeg, pred); 7569 begin = eqend; 7570 } 7571 } 7572 7573 // insertion sort small chunk 7574 insertion_sort(begin, end, pred); 7575 } 7576 hash_insert(const void ** table,size_t size,const void * key)7577 PUGI__FN bool hash_insert(const void** table, size_t size, const void* key) 7578 { 7579 assert(key); 7580 7581 unsigned int h = static_cast<unsigned int>(reinterpret_cast<uintptr_t>(key)); 7582 7583 // MurmurHash3 32-bit finalizer 7584 h ^= h >> 16; 7585 h *= 0x85ebca6bu; 7586 h ^= h >> 13; 7587 h *= 0xc2b2ae35u; 7588 h ^= h >> 16; 7589 7590 size_t hashmod = size - 1; 7591 size_t bucket = h & hashmod; 7592 7593 for (size_t probe = 0; probe <= hashmod; ++probe) 7594 { 7595 if (table[bucket] == 0) 7596 { 7597 table[bucket] = key; 7598 return true; 7599 } 7600 7601 if (table[bucket] == key) 7602 return false; 7603 7604 // hash collision, quadratic probing 7605 bucket = (bucket + probe + 1) & hashmod; 7606 } 7607 7608 assert(false && "Hash table is full"); // unreachable 7609 return false; 7610 } 7611 PUGI__NS_END 7612 7613 // Allocator used for AST and evaluation stacks 7614 PUGI__NS_BEGIN 7615 static const size_t xpath_memory_page_size = 7616 #ifdef PUGIXML_MEMORY_XPATH_PAGE_SIZE 7617 PUGIXML_MEMORY_XPATH_PAGE_SIZE 7618 #else 7619 4096 7620 #endif 7621 ; 7622 7623 static const uintptr_t xpath_memory_block_alignment = sizeof(double) > sizeof(void*) ? sizeof(double) : sizeof(void*); 7624 7625 struct xpath_memory_block 7626 { 7627 xpath_memory_block* next; 7628 size_t capacity; 7629 7630 union 7631 { 7632 char data[xpath_memory_page_size]; 7633 double alignment; 7634 }; 7635 }; 7636 7637 struct xpath_allocator 7638 { 7639 xpath_memory_block* _root; 7640 size_t _root_size; 7641 bool* _error; 7642 xpath_allocatorxpath_allocator7643 xpath_allocator(xpath_memory_block* root, bool* error = 0): _root(root), _root_size(0), _error(error) 7644 { 7645 } 7646 allocatexpath_allocator7647 void* allocate(size_t size) 7648 { 7649 // round size up to block alignment boundary 7650 size = (size + xpath_memory_block_alignment - 1) & ~(xpath_memory_block_alignment - 1); 7651 7652 if (_root_size + size <= _root->capacity) 7653 { 7654 void* buf = &_root->data[0] + _root_size; 7655 _root_size += size; 7656 return buf; 7657 } 7658 else 7659 { 7660 // make sure we have at least 1/4th of the page free after allocation to satisfy subsequent allocation requests 7661 size_t block_capacity_base = sizeof(_root->data); 7662 size_t block_capacity_req = size + block_capacity_base / 4; 7663 size_t block_capacity = (block_capacity_base > block_capacity_req) ? block_capacity_base : block_capacity_req; 7664 7665 size_t block_size = block_capacity + offsetof(xpath_memory_block, data); 7666 7667 xpath_memory_block* block = static_cast<xpath_memory_block*>(xml_memory::allocate(block_size)); 7668 if (!block) 7669 { 7670 if (_error) *_error = true; 7671 return 0; 7672 } 7673 7674 block->next = _root; 7675 block->capacity = block_capacity; 7676 7677 _root = block; 7678 _root_size = size; 7679 7680 return block->data; 7681 } 7682 } 7683 reallocatexpath_allocator7684 void* reallocate(void* ptr, size_t old_size, size_t new_size) 7685 { 7686 // round size up to block alignment boundary 7687 old_size = (old_size + xpath_memory_block_alignment - 1) & ~(xpath_memory_block_alignment - 1); 7688 new_size = (new_size + xpath_memory_block_alignment - 1) & ~(xpath_memory_block_alignment - 1); 7689 7690 // we can only reallocate the last object 7691 assert(ptr == 0 || static_cast<char*>(ptr) + old_size == &_root->data[0] + _root_size); 7692 7693 // try to reallocate the object inplace 7694 if (ptr && _root_size - old_size + new_size <= _root->capacity) 7695 { 7696 _root_size = _root_size - old_size + new_size; 7697 return ptr; 7698 } 7699 7700 // allocate a new block 7701 void* result = allocate(new_size); 7702 if (!result) return 0; 7703 7704 // we have a new block 7705 if (ptr) 7706 { 7707 // copy old data (we only support growing) 7708 assert(new_size >= old_size); 7709 memcpy(result, ptr, old_size); 7710 7711 // free the previous page if it had no other objects 7712 assert(_root->data == result); 7713 assert(_root->next); 7714 7715 if (_root->next->data == ptr) 7716 { 7717 // deallocate the whole page, unless it was the first one 7718 xpath_memory_block* next = _root->next->next; 7719 7720 if (next) 7721 { 7722 xml_memory::deallocate(_root->next); 7723 _root->next = next; 7724 } 7725 } 7726 } 7727 7728 return result; 7729 } 7730 revertxpath_allocator7731 void revert(const xpath_allocator& state) 7732 { 7733 // free all new pages 7734 xpath_memory_block* cur = _root; 7735 7736 while (cur != state._root) 7737 { 7738 xpath_memory_block* next = cur->next; 7739 7740 xml_memory::deallocate(cur); 7741 7742 cur = next; 7743 } 7744 7745 // restore state 7746 _root = state._root; 7747 _root_size = state._root_size; 7748 } 7749 releasexpath_allocator7750 void release() 7751 { 7752 xpath_memory_block* cur = _root; 7753 assert(cur); 7754 7755 while (cur->next) 7756 { 7757 xpath_memory_block* next = cur->next; 7758 7759 xml_memory::deallocate(cur); 7760 7761 cur = next; 7762 } 7763 } 7764 }; 7765 7766 struct xpath_allocator_capture 7767 { xpath_allocator_capturexpath_allocator_capture7768 xpath_allocator_capture(xpath_allocator* alloc): _target(alloc), _state(*alloc) 7769 { 7770 } 7771 ~xpath_allocator_capturexpath_allocator_capture7772 ~xpath_allocator_capture() 7773 { 7774 _target->revert(_state); 7775 } 7776 7777 xpath_allocator* _target; 7778 xpath_allocator _state; 7779 }; 7780 7781 struct xpath_stack 7782 { 7783 xpath_allocator* result; 7784 xpath_allocator* temp; 7785 }; 7786 7787 struct xpath_stack_data 7788 { 7789 xpath_memory_block blocks[2]; 7790 xpath_allocator result; 7791 xpath_allocator temp; 7792 xpath_stack stack; 7793 bool oom; 7794 xpath_stack_dataxpath_stack_data7795 xpath_stack_data(): result(blocks + 0, &oom), temp(blocks + 1, &oom), oom(false) 7796 { 7797 blocks[0].next = blocks[1].next = 0; 7798 blocks[0].capacity = blocks[1].capacity = sizeof(blocks[0].data); 7799 7800 stack.result = &result; 7801 stack.temp = &temp; 7802 } 7803 ~xpath_stack_dataxpath_stack_data7804 ~xpath_stack_data() 7805 { 7806 result.release(); 7807 temp.release(); 7808 } 7809 }; 7810 PUGI__NS_END 7811 7812 // String class 7813 PUGI__NS_BEGIN 7814 class xpath_string 7815 { 7816 const char_t* _buffer; 7817 bool _uses_heap; 7818 size_t _length_heap; 7819 duplicate_string(const char_t * string,size_t length,xpath_allocator * alloc)7820 static char_t* duplicate_string(const char_t* string, size_t length, xpath_allocator* alloc) 7821 { 7822 char_t* result = static_cast<char_t*>(alloc->allocate((length + 1) * sizeof(char_t))); 7823 if (!result) return 0; 7824 7825 memcpy(result, string, length * sizeof(char_t)); 7826 result[length] = 0; 7827 7828 return result; 7829 } 7830 xpath_string(const char_t * buffer,bool uses_heap_,size_t length_heap)7831 xpath_string(const char_t* buffer, bool uses_heap_, size_t length_heap): _buffer(buffer), _uses_heap(uses_heap_), _length_heap(length_heap) 7832 { 7833 } 7834 7835 public: from_const(const char_t * str)7836 static xpath_string from_const(const char_t* str) 7837 { 7838 return xpath_string(str, false, 0); 7839 } 7840 from_heap_preallocated(const char_t * begin,const char_t * end)7841 static xpath_string from_heap_preallocated(const char_t* begin, const char_t* end) 7842 { 7843 assert(begin <= end && *end == 0); 7844 7845 return xpath_string(begin, true, static_cast<size_t>(end - begin)); 7846 } 7847 from_heap(const char_t * begin,const char_t * end,xpath_allocator * alloc)7848 static xpath_string from_heap(const char_t* begin, const char_t* end, xpath_allocator* alloc) 7849 { 7850 assert(begin <= end); 7851 7852 if (begin == end) 7853 return xpath_string(); 7854 7855 size_t length = static_cast<size_t>(end - begin); 7856 const char_t* data = duplicate_string(begin, length, alloc); 7857 7858 return data ? xpath_string(data, true, length) : xpath_string(); 7859 } 7860 xpath_string()7861 xpath_string(): _buffer(PUGIXML_TEXT("")), _uses_heap(false), _length_heap(0) 7862 { 7863 } 7864 append(const xpath_string & o,xpath_allocator * alloc)7865 void append(const xpath_string& o, xpath_allocator* alloc) 7866 { 7867 // skip empty sources 7868 if (!*o._buffer) return; 7869 7870 // fast append for constant empty target and constant source 7871 if (!*_buffer && !_uses_heap && !o._uses_heap) 7872 { 7873 _buffer = o._buffer; 7874 } 7875 else 7876 { 7877 // need to make heap copy 7878 size_t target_length = length(); 7879 size_t source_length = o.length(); 7880 size_t result_length = target_length + source_length; 7881 7882 // allocate new buffer 7883 char_t* result = static_cast<char_t*>(alloc->reallocate(_uses_heap ? const_cast<char_t*>(_buffer) : 0, (target_length + 1) * sizeof(char_t), (result_length + 1) * sizeof(char_t))); 7884 if (!result) return; 7885 7886 // append first string to the new buffer in case there was no reallocation 7887 if (!_uses_heap) memcpy(result, _buffer, target_length * sizeof(char_t)); 7888 7889 // append second string to the new buffer 7890 memcpy(result + target_length, o._buffer, source_length * sizeof(char_t)); 7891 result[result_length] = 0; 7892 7893 // finalize 7894 _buffer = result; 7895 _uses_heap = true; 7896 _length_heap = result_length; 7897 } 7898 } 7899 c_str() const7900 const char_t* c_str() const 7901 { 7902 return _buffer; 7903 } 7904 length() const7905 size_t length() const 7906 { 7907 return _uses_heap ? _length_heap : strlength(_buffer); 7908 } 7909 data(xpath_allocator * alloc)7910 char_t* data(xpath_allocator* alloc) 7911 { 7912 // make private heap copy 7913 if (!_uses_heap) 7914 { 7915 size_t length_ = strlength(_buffer); 7916 const char_t* data_ = duplicate_string(_buffer, length_, alloc); 7917 7918 if (!data_) return 0; 7919 7920 _buffer = data_; 7921 _uses_heap = true; 7922 _length_heap = length_; 7923 } 7924 7925 return const_cast<char_t*>(_buffer); 7926 } 7927 empty() const7928 bool empty() const 7929 { 7930 return *_buffer == 0; 7931 } 7932 operator ==(const xpath_string & o) const7933 bool operator==(const xpath_string& o) const 7934 { 7935 return strequal(_buffer, o._buffer); 7936 } 7937 operator !=(const xpath_string & o) const7938 bool operator!=(const xpath_string& o) const 7939 { 7940 return !strequal(_buffer, o._buffer); 7941 } 7942 uses_heap() const7943 bool uses_heap() const 7944 { 7945 return _uses_heap; 7946 } 7947 }; 7948 PUGI__NS_END 7949 7950 PUGI__NS_BEGIN starts_with(const char_t * string,const char_t * pattern)7951 PUGI__FN bool starts_with(const char_t* string, const char_t* pattern) 7952 { 7953 while (*pattern && *string == *pattern) 7954 { 7955 string++; 7956 pattern++; 7957 } 7958 7959 return *pattern == 0; 7960 } 7961 find_char(const char_t * s,char_t c)7962 PUGI__FN const char_t* find_char(const char_t* s, char_t c) 7963 { 7964 #ifdef PUGIXML_WCHAR_MODE 7965 return wcschr(s, c); 7966 #else 7967 return strchr(s, c); 7968 #endif 7969 } 7970 find_substring(const char_t * s,const char_t * p)7971 PUGI__FN const char_t* find_substring(const char_t* s, const char_t* p) 7972 { 7973 #ifdef PUGIXML_WCHAR_MODE 7974 // MSVC6 wcsstr bug workaround (if s is empty it always returns 0) 7975 return (*p == 0) ? s : wcsstr(s, p); 7976 #else 7977 return strstr(s, p); 7978 #endif 7979 } 7980 7981 // Converts symbol to lower case, if it is an ASCII one tolower_ascii(char_t ch)7982 PUGI__FN char_t tolower_ascii(char_t ch) 7983 { 7984 return static_cast<unsigned int>(ch - 'A') < 26 ? static_cast<char_t>(ch | ' ') : ch; 7985 } 7986 string_value(const xpath_node & na,xpath_allocator * alloc)7987 PUGI__FN xpath_string string_value(const xpath_node& na, xpath_allocator* alloc) 7988 { 7989 if (na.attribute()) 7990 return xpath_string::from_const(na.attribute().value()); 7991 else 7992 { 7993 xml_node n = na.node(); 7994 7995 switch (n.type()) 7996 { 7997 case node_pcdata: 7998 case node_cdata: 7999 case node_comment: 8000 case node_pi: 8001 return xpath_string::from_const(n.value()); 8002 8003 case node_document: 8004 case node_element: 8005 { 8006 xpath_string result; 8007 8008 // element nodes can have value if parse_embed_pcdata was used 8009 if (n.value()[0]) 8010 result.append(xpath_string::from_const(n.value()), alloc); 8011 8012 xml_node cur = n.first_child(); 8013 8014 while (cur && cur != n) 8015 { 8016 if (cur.type() == node_pcdata || cur.type() == node_cdata) 8017 result.append(xpath_string::from_const(cur.value()), alloc); 8018 8019 if (cur.first_child()) 8020 cur = cur.first_child(); 8021 else if (cur.next_sibling()) 8022 cur = cur.next_sibling(); 8023 else 8024 { 8025 while (!cur.next_sibling() && cur != n) 8026 cur = cur.parent(); 8027 8028 if (cur != n) cur = cur.next_sibling(); 8029 } 8030 } 8031 8032 return result; 8033 } 8034 8035 default: 8036 return xpath_string(); 8037 } 8038 } 8039 } 8040 node_is_before_sibling(xml_node_struct * ln,xml_node_struct * rn)8041 PUGI__FN bool node_is_before_sibling(xml_node_struct* ln, xml_node_struct* rn) 8042 { 8043 assert(ln->parent == rn->parent); 8044 8045 // there is no common ancestor (the shared parent is null), nodes are from different documents 8046 if (!ln->parent) return ln < rn; 8047 8048 // determine sibling order 8049 xml_node_struct* ls = ln; 8050 xml_node_struct* rs = rn; 8051 8052 while (ls && rs) 8053 { 8054 if (ls == rn) return true; 8055 if (rs == ln) return false; 8056 8057 ls = ls->next_sibling; 8058 rs = rs->next_sibling; 8059 } 8060 8061 // if rn sibling chain ended ln must be before rn 8062 return !rs; 8063 } 8064 node_is_before(xml_node_struct * ln,xml_node_struct * rn)8065 PUGI__FN bool node_is_before(xml_node_struct* ln, xml_node_struct* rn) 8066 { 8067 // find common ancestor at the same depth, if any 8068 xml_node_struct* lp = ln; 8069 xml_node_struct* rp = rn; 8070 8071 while (lp && rp && lp->parent != rp->parent) 8072 { 8073 lp = lp->parent; 8074 rp = rp->parent; 8075 } 8076 8077 // parents are the same! 8078 if (lp && rp) return node_is_before_sibling(lp, rp); 8079 8080 // nodes are at different depths, need to normalize heights 8081 bool left_higher = !lp; 8082 8083 while (lp) 8084 { 8085 lp = lp->parent; 8086 ln = ln->parent; 8087 } 8088 8089 while (rp) 8090 { 8091 rp = rp->parent; 8092 rn = rn->parent; 8093 } 8094 8095 // one node is the ancestor of the other 8096 if (ln == rn) return left_higher; 8097 8098 // find common ancestor... again 8099 while (ln->parent != rn->parent) 8100 { 8101 ln = ln->parent; 8102 rn = rn->parent; 8103 } 8104 8105 return node_is_before_sibling(ln, rn); 8106 } 8107 node_is_ancestor(xml_node_struct * parent,xml_node_struct * node)8108 PUGI__FN bool node_is_ancestor(xml_node_struct* parent, xml_node_struct* node) 8109 { 8110 while (node && node != parent) node = node->parent; 8111 8112 return parent && node == parent; 8113 } 8114 document_buffer_order(const xpath_node & xnode)8115 PUGI__FN const void* document_buffer_order(const xpath_node& xnode) 8116 { 8117 xml_node_struct* node = xnode.node().internal_object(); 8118 8119 if (node) 8120 { 8121 if ((get_document(node).header & xml_memory_page_contents_shared_mask) == 0) 8122 { 8123 if (node->name && (node->header & impl::xml_memory_page_name_allocated_or_shared_mask) == 0) return node->name; 8124 if (node->value && (node->header & impl::xml_memory_page_value_allocated_or_shared_mask) == 0) return node->value; 8125 } 8126 8127 return 0; 8128 } 8129 8130 xml_attribute_struct* attr = xnode.attribute().internal_object(); 8131 8132 if (attr) 8133 { 8134 if ((get_document(attr).header & xml_memory_page_contents_shared_mask) == 0) 8135 { 8136 if ((attr->header & impl::xml_memory_page_name_allocated_or_shared_mask) == 0) return attr->name; 8137 if ((attr->header & impl::xml_memory_page_value_allocated_or_shared_mask) == 0) return attr->value; 8138 } 8139 8140 return 0; 8141 } 8142 8143 return 0; 8144 } 8145 8146 struct document_order_comparator 8147 { operator ()document_order_comparator8148 bool operator()(const xpath_node& lhs, const xpath_node& rhs) const 8149 { 8150 // optimized document order based check 8151 const void* lo = document_buffer_order(lhs); 8152 const void* ro = document_buffer_order(rhs); 8153 8154 if (lo && ro) return lo < ro; 8155 8156 // slow comparison 8157 xml_node ln = lhs.node(), rn = rhs.node(); 8158 8159 // compare attributes 8160 if (lhs.attribute() && rhs.attribute()) 8161 { 8162 // shared parent 8163 if (lhs.parent() == rhs.parent()) 8164 { 8165 // determine sibling order 8166 for (xml_attribute a = lhs.attribute(); a; a = a.next_attribute()) 8167 if (a == rhs.attribute()) 8168 return true; 8169 8170 return false; 8171 } 8172 8173 // compare attribute parents 8174 ln = lhs.parent(); 8175 rn = rhs.parent(); 8176 } 8177 else if (lhs.attribute()) 8178 { 8179 // attributes go after the parent element 8180 if (lhs.parent() == rhs.node()) return false; 8181 8182 ln = lhs.parent(); 8183 } 8184 else if (rhs.attribute()) 8185 { 8186 // attributes go after the parent element 8187 if (rhs.parent() == lhs.node()) return true; 8188 8189 rn = rhs.parent(); 8190 } 8191 8192 if (ln == rn) return false; 8193 8194 if (!ln || !rn) return ln < rn; 8195 8196 return node_is_before(ln.internal_object(), rn.internal_object()); 8197 } 8198 }; 8199 gen_nan()8200 PUGI__FN double gen_nan() 8201 { 8202 #if defined(__STDC_IEC_559__) || ((FLT_RADIX - 0 == 2) && (FLT_MAX_EXP - 0 == 128) && (FLT_MANT_DIG - 0 == 24)) 8203 PUGI__STATIC_ASSERT(sizeof(float) == sizeof(uint32_t)); 8204 typedef uint32_t UI; // BCC5 workaround 8205 union { float f; UI i; } u; 8206 u.i = 0x7fc00000; 8207 return double(u.f); 8208 #else 8209 // fallback 8210 const volatile double zero = 0.0; 8211 return zero / zero; 8212 #endif 8213 } 8214 is_nan(double value)8215 PUGI__FN bool is_nan(double value) 8216 { 8217 #if defined(PUGI__MSVC_CRT_VERSION) || defined(__BORLANDC__) 8218 return !!_isnan(value); 8219 #elif defined(fpclassify) && defined(FP_NAN) 8220 return fpclassify(value) == FP_NAN; 8221 #else 8222 // fallback 8223 const volatile double v = value; 8224 return v != v; 8225 #endif 8226 } 8227 convert_number_to_string_special(double value)8228 PUGI__FN const char_t* convert_number_to_string_special(double value) 8229 { 8230 #if defined(PUGI__MSVC_CRT_VERSION) || defined(__BORLANDC__) 8231 if (_finite(value)) return (value == 0) ? PUGIXML_TEXT("0") : 0; 8232 if (_isnan(value)) return PUGIXML_TEXT("NaN"); 8233 return value > 0 ? PUGIXML_TEXT("Infinity") : PUGIXML_TEXT("-Infinity"); 8234 #elif defined(fpclassify) && defined(FP_NAN) && defined(FP_INFINITE) && defined(FP_ZERO) 8235 switch (fpclassify(value)) 8236 { 8237 case FP_NAN: 8238 return PUGIXML_TEXT("NaN"); 8239 8240 case FP_INFINITE: 8241 return value > 0 ? PUGIXML_TEXT("Infinity") : PUGIXML_TEXT("-Infinity"); 8242 8243 case FP_ZERO: 8244 return PUGIXML_TEXT("0"); 8245 8246 default: 8247 return 0; 8248 } 8249 #else 8250 // fallback 8251 const volatile double v = value; 8252 8253 if (v == 0) return PUGIXML_TEXT("0"); 8254 if (v != v) return PUGIXML_TEXT("NaN"); 8255 if (v * 2 == v) return value > 0 ? PUGIXML_TEXT("Infinity") : PUGIXML_TEXT("-Infinity"); 8256 return 0; 8257 #endif 8258 } 8259 convert_number_to_boolean(double value)8260 PUGI__FN bool convert_number_to_boolean(double value) 8261 { 8262 return (value != 0 && !is_nan(value)); 8263 } 8264 truncate_zeros(char * begin,char * end)8265 PUGI__FN void truncate_zeros(char* begin, char* end) 8266 { 8267 while (begin != end && end[-1] == '0') end--; 8268 8269 *end = 0; 8270 } 8271 8272 // gets mantissa digits in the form of 0.xxxxx with 0. implied and the exponent 8273 #if defined(PUGI__MSVC_CRT_VERSION) && PUGI__MSVC_CRT_VERSION >= 1400 && !defined(_WIN32_WCE) convert_number_to_mantissa_exponent(double value,char (& buffer)[32],char ** out_mantissa,int * out_exponent)8274 PUGI__FN void convert_number_to_mantissa_exponent(double value, char (&buffer)[32], char** out_mantissa, int* out_exponent) 8275 { 8276 // get base values 8277 int sign, exponent; 8278 _ecvt_s(buffer, sizeof(buffer), value, DBL_DIG + 1, &exponent, &sign); 8279 8280 // truncate redundant zeros 8281 truncate_zeros(buffer, buffer + strlen(buffer)); 8282 8283 // fill results 8284 *out_mantissa = buffer; 8285 *out_exponent = exponent; 8286 } 8287 #else convert_number_to_mantissa_exponent(double value,char (& buffer)[32],char ** out_mantissa,int * out_exponent)8288 PUGI__FN void convert_number_to_mantissa_exponent(double value, char (&buffer)[32], char** out_mantissa, int* out_exponent) 8289 { 8290 // get a scientific notation value with IEEE DBL_DIG decimals 8291 PUGI__SNPRINTF(buffer, "%.*e", DBL_DIG, value); 8292 8293 // get the exponent (possibly negative) 8294 char* exponent_string = strchr(buffer, 'e'); 8295 assert(exponent_string); 8296 8297 int exponent = atoi(exponent_string + 1); 8298 8299 // extract mantissa string: skip sign 8300 char* mantissa = buffer[0] == '-' ? buffer + 1 : buffer; 8301 assert(mantissa[0] != '0' && mantissa[1] == '.'); 8302 8303 // divide mantissa by 10 to eliminate integer part 8304 mantissa[1] = mantissa[0]; 8305 mantissa++; 8306 exponent++; 8307 8308 // remove extra mantissa digits and zero-terminate mantissa 8309 truncate_zeros(mantissa, exponent_string); 8310 8311 // fill results 8312 *out_mantissa = mantissa; 8313 *out_exponent = exponent; 8314 } 8315 #endif 8316 convert_number_to_string(double value,xpath_allocator * alloc)8317 PUGI__FN xpath_string convert_number_to_string(double value, xpath_allocator* alloc) 8318 { 8319 // try special number conversion 8320 const char_t* special = convert_number_to_string_special(value); 8321 if (special) return xpath_string::from_const(special); 8322 8323 // get mantissa + exponent form 8324 char mantissa_buffer[32]; 8325 8326 char* mantissa; 8327 int exponent; 8328 convert_number_to_mantissa_exponent(value, mantissa_buffer, &mantissa, &exponent); 8329 8330 // allocate a buffer of suitable length for the number 8331 size_t result_size = strlen(mantissa_buffer) + (exponent > 0 ? exponent : -exponent) + 4; 8332 char_t* result = static_cast<char_t*>(alloc->allocate(sizeof(char_t) * result_size)); 8333 if (!result) return xpath_string(); 8334 8335 // make the number! 8336 char_t* s = result; 8337 8338 // sign 8339 if (value < 0) *s++ = '-'; 8340 8341 // integer part 8342 if (exponent <= 0) 8343 { 8344 *s++ = '0'; 8345 } 8346 else 8347 { 8348 while (exponent > 0) 8349 { 8350 assert(*mantissa == 0 || static_cast<unsigned int>(*mantissa - '0') <= 9); 8351 *s++ = *mantissa ? *mantissa++ : '0'; 8352 exponent--; 8353 } 8354 } 8355 8356 // fractional part 8357 if (*mantissa) 8358 { 8359 // decimal point 8360 *s++ = '.'; 8361 8362 // extra zeroes from negative exponent 8363 while (exponent < 0) 8364 { 8365 *s++ = '0'; 8366 exponent++; 8367 } 8368 8369 // extra mantissa digits 8370 while (*mantissa) 8371 { 8372 assert(static_cast<unsigned int>(*mantissa - '0') <= 9); 8373 *s++ = *mantissa++; 8374 } 8375 } 8376 8377 // zero-terminate 8378 assert(s < result + result_size); 8379 *s = 0; 8380 8381 return xpath_string::from_heap_preallocated(result, s); 8382 } 8383 check_string_to_number_format(const char_t * string)8384 PUGI__FN bool check_string_to_number_format(const char_t* string) 8385 { 8386 // parse leading whitespace 8387 while (PUGI__IS_CHARTYPE(*string, ct_space)) ++string; 8388 8389 // parse sign 8390 if (*string == '-') ++string; 8391 8392 if (!*string) return false; 8393 8394 // if there is no integer part, there should be a decimal part with at least one digit 8395 if (!PUGI__IS_CHARTYPEX(string[0], ctx_digit) && (string[0] != '.' || !PUGI__IS_CHARTYPEX(string[1], ctx_digit))) return false; 8396 8397 // parse integer part 8398 while (PUGI__IS_CHARTYPEX(*string, ctx_digit)) ++string; 8399 8400 // parse decimal part 8401 if (*string == '.') 8402 { 8403 ++string; 8404 8405 while (PUGI__IS_CHARTYPEX(*string, ctx_digit)) ++string; 8406 } 8407 8408 // parse trailing whitespace 8409 while (PUGI__IS_CHARTYPE(*string, ct_space)) ++string; 8410 8411 return *string == 0; 8412 } 8413 convert_string_to_number(const char_t * string)8414 PUGI__FN double convert_string_to_number(const char_t* string) 8415 { 8416 // check string format 8417 if (!check_string_to_number_format(string)) return gen_nan(); 8418 8419 // parse string 8420 #ifdef PUGIXML_WCHAR_MODE 8421 return wcstod(string, 0); 8422 #else 8423 return strtod(string, 0); 8424 #endif 8425 } 8426 convert_string_to_number_scratch(char_t (& buffer)[32],const char_t * begin,const char_t * end,double * out_result)8427 PUGI__FN bool convert_string_to_number_scratch(char_t (&buffer)[32], const char_t* begin, const char_t* end, double* out_result) 8428 { 8429 size_t length = static_cast<size_t>(end - begin); 8430 char_t* scratch = buffer; 8431 8432 if (length >= sizeof(buffer) / sizeof(buffer[0])) 8433 { 8434 // need to make dummy on-heap copy 8435 scratch = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t))); 8436 if (!scratch) return false; 8437 } 8438 8439 // copy string to zero-terminated buffer and perform conversion 8440 memcpy(scratch, begin, length * sizeof(char_t)); 8441 scratch[length] = 0; 8442 8443 *out_result = convert_string_to_number(scratch); 8444 8445 // free dummy buffer 8446 if (scratch != buffer) xml_memory::deallocate(scratch); 8447 8448 return true; 8449 } 8450 round_nearest(double value)8451 PUGI__FN double round_nearest(double value) 8452 { 8453 return floor(value + 0.5); 8454 } 8455 round_nearest_nzero(double value)8456 PUGI__FN double round_nearest_nzero(double value) 8457 { 8458 // same as round_nearest, but returns -0 for [-0.5, -0] 8459 // ceil is used to differentiate between +0 and -0 (we return -0 for [-0.5, -0] and +0 for +0) 8460 return (value >= -0.5 && value <= 0) ? ceil(value) : floor(value + 0.5); 8461 } 8462 qualified_name(const xpath_node & node)8463 PUGI__FN const char_t* qualified_name(const xpath_node& node) 8464 { 8465 return node.attribute() ? node.attribute().name() : node.node().name(); 8466 } 8467 local_name(const xpath_node & node)8468 PUGI__FN const char_t* local_name(const xpath_node& node) 8469 { 8470 const char_t* name = qualified_name(node); 8471 const char_t* p = find_char(name, ':'); 8472 8473 return p ? p + 1 : name; 8474 } 8475 8476 struct namespace_uri_predicate 8477 { 8478 const char_t* prefix; 8479 size_t prefix_length; 8480 namespace_uri_predicatenamespace_uri_predicate8481 namespace_uri_predicate(const char_t* name) 8482 { 8483 const char_t* pos = find_char(name, ':'); 8484 8485 prefix = pos ? name : 0; 8486 prefix_length = pos ? static_cast<size_t>(pos - name) : 0; 8487 } 8488 operator ()namespace_uri_predicate8489 bool operator()(xml_attribute a) const 8490 { 8491 const char_t* name = a.name(); 8492 8493 if (!starts_with(name, PUGIXML_TEXT("xmlns"))) return false; 8494 8495 return prefix ? name[5] == ':' && strequalrange(name + 6, prefix, prefix_length) : name[5] == 0; 8496 } 8497 }; 8498 namespace_uri(xml_node node)8499 PUGI__FN const char_t* namespace_uri(xml_node node) 8500 { 8501 namespace_uri_predicate pred = node.name(); 8502 8503 xml_node p = node; 8504 8505 while (p) 8506 { 8507 xml_attribute a = p.find_attribute(pred); 8508 8509 if (a) return a.value(); 8510 8511 p = p.parent(); 8512 } 8513 8514 return PUGIXML_TEXT(""); 8515 } 8516 namespace_uri(xml_attribute attr,xml_node parent)8517 PUGI__FN const char_t* namespace_uri(xml_attribute attr, xml_node parent) 8518 { 8519 namespace_uri_predicate pred = attr.name(); 8520 8521 // Default namespace does not apply to attributes 8522 if (!pred.prefix) return PUGIXML_TEXT(""); 8523 8524 xml_node p = parent; 8525 8526 while (p) 8527 { 8528 xml_attribute a = p.find_attribute(pred); 8529 8530 if (a) return a.value(); 8531 8532 p = p.parent(); 8533 } 8534 8535 return PUGIXML_TEXT(""); 8536 } 8537 namespace_uri(const xpath_node & node)8538 PUGI__FN const char_t* namespace_uri(const xpath_node& node) 8539 { 8540 return node.attribute() ? namespace_uri(node.attribute(), node.parent()) : namespace_uri(node.node()); 8541 } 8542 normalize_space(char_t * buffer)8543 PUGI__FN char_t* normalize_space(char_t* buffer) 8544 { 8545 char_t* write = buffer; 8546 8547 for (char_t* it = buffer; *it; ) 8548 { 8549 char_t ch = *it++; 8550 8551 if (PUGI__IS_CHARTYPE(ch, ct_space)) 8552 { 8553 // replace whitespace sequence with single space 8554 while (PUGI__IS_CHARTYPE(*it, ct_space)) it++; 8555 8556 // avoid leading spaces 8557 if (write != buffer) *write++ = ' '; 8558 } 8559 else *write++ = ch; 8560 } 8561 8562 // remove trailing space 8563 if (write != buffer && PUGI__IS_CHARTYPE(write[-1], ct_space)) write--; 8564 8565 // zero-terminate 8566 *write = 0; 8567 8568 return write; 8569 } 8570 translate(char_t * buffer,const char_t * from,const char_t * to,size_t to_length)8571 PUGI__FN char_t* translate(char_t* buffer, const char_t* from, const char_t* to, size_t to_length) 8572 { 8573 char_t* write = buffer; 8574 8575 while (*buffer) 8576 { 8577 PUGI__DMC_VOLATILE char_t ch = *buffer++; 8578 8579 const char_t* pos = find_char(from, ch); 8580 8581 if (!pos) 8582 *write++ = ch; // do not process 8583 else if (static_cast<size_t>(pos - from) < to_length) 8584 *write++ = to[pos - from]; // replace 8585 } 8586 8587 // zero-terminate 8588 *write = 0; 8589 8590 return write; 8591 } 8592 translate_table_generate(xpath_allocator * alloc,const char_t * from,const char_t * to)8593 PUGI__FN unsigned char* translate_table_generate(xpath_allocator* alloc, const char_t* from, const char_t* to) 8594 { 8595 unsigned char table[128] = {0}; 8596 8597 while (*from) 8598 { 8599 unsigned int fc = static_cast<unsigned int>(*from); 8600 unsigned int tc = static_cast<unsigned int>(*to); 8601 8602 if (fc >= 128 || tc >= 128) 8603 return 0; 8604 8605 // code=128 means "skip character" 8606 if (!table[fc]) 8607 table[fc] = static_cast<unsigned char>(tc ? tc : 128); 8608 8609 from++; 8610 if (tc) to++; 8611 } 8612 8613 for (int i = 0; i < 128; ++i) 8614 if (!table[i]) 8615 table[i] = static_cast<unsigned char>(i); 8616 8617 void* result = alloc->allocate(sizeof(table)); 8618 if (!result) return 0; 8619 8620 memcpy(result, table, sizeof(table)); 8621 8622 return static_cast<unsigned char*>(result); 8623 } 8624 translate_table(char_t * buffer,const unsigned char * table)8625 PUGI__FN char_t* translate_table(char_t* buffer, const unsigned char* table) 8626 { 8627 char_t* write = buffer; 8628 8629 while (*buffer) 8630 { 8631 char_t ch = *buffer++; 8632 unsigned int index = static_cast<unsigned int>(ch); 8633 8634 if (index < 128) 8635 { 8636 unsigned char code = table[index]; 8637 8638 // code=128 means "skip character" (table size is 128 so 128 can be a special value) 8639 // this code skips these characters without extra branches 8640 *write = static_cast<char_t>(code); 8641 write += 1 - (code >> 7); 8642 } 8643 else 8644 { 8645 *write++ = ch; 8646 } 8647 } 8648 8649 // zero-terminate 8650 *write = 0; 8651 8652 return write; 8653 } 8654 is_xpath_attribute(const char_t * name)8655 inline bool is_xpath_attribute(const char_t* name) 8656 { 8657 return !(starts_with(name, PUGIXML_TEXT("xmlns")) && (name[5] == 0 || name[5] == ':')); 8658 } 8659 8660 struct xpath_variable_boolean: xpath_variable 8661 { xpath_variable_booleanxpath_variable_boolean8662 xpath_variable_boolean(): xpath_variable(xpath_type_boolean), value(false) 8663 { 8664 } 8665 8666 bool value; 8667 char_t name[1]; 8668 }; 8669 8670 struct xpath_variable_number: xpath_variable 8671 { xpath_variable_numberxpath_variable_number8672 xpath_variable_number(): xpath_variable(xpath_type_number), value(0) 8673 { 8674 } 8675 8676 double value; 8677 char_t name[1]; 8678 }; 8679 8680 struct xpath_variable_string: xpath_variable 8681 { xpath_variable_stringxpath_variable_string8682 xpath_variable_string(): xpath_variable(xpath_type_string), value(0) 8683 { 8684 } 8685 ~xpath_variable_stringxpath_variable_string8686 ~xpath_variable_string() 8687 { 8688 if (value) xml_memory::deallocate(value); 8689 } 8690 8691 char_t* value; 8692 char_t name[1]; 8693 }; 8694 8695 struct xpath_variable_node_set: xpath_variable 8696 { xpath_variable_node_setxpath_variable_node_set8697 xpath_variable_node_set(): xpath_variable(xpath_type_node_set) 8698 { 8699 } 8700 8701 xpath_node_set value; 8702 char_t name[1]; 8703 }; 8704 8705 static const xpath_node_set dummy_node_set; 8706 hash_string(const char_t * str)8707 PUGI__FN PUGI__UNSIGNED_OVERFLOW unsigned int hash_string(const char_t* str) 8708 { 8709 // Jenkins one-at-a-time hash (http://en.wikipedia.org/wiki/Jenkins_hash_function#one-at-a-time) 8710 unsigned int result = 0; 8711 8712 while (*str) 8713 { 8714 result += static_cast<unsigned int>(*str++); 8715 result += result << 10; 8716 result ^= result >> 6; 8717 } 8718 8719 result += result << 3; 8720 result ^= result >> 11; 8721 result += result << 15; 8722 8723 return result; 8724 } 8725 new_xpath_variable(const char_t * name)8726 template <typename T> PUGI__FN T* new_xpath_variable(const char_t* name) 8727 { 8728 size_t length = strlength(name); 8729 if (length == 0) return 0; // empty variable names are invalid 8730 8731 // $$ we can't use offsetof(T, name) because T is non-POD, so we just allocate additional length characters 8732 void* memory = xml_memory::allocate(sizeof(T) + length * sizeof(char_t)); 8733 if (!memory) return 0; 8734 8735 T* result = new (memory) T(); 8736 8737 memcpy(result->name, name, (length + 1) * sizeof(char_t)); 8738 8739 return result; 8740 } 8741 new_xpath_variable(xpath_value_type type,const char_t * name)8742 PUGI__FN xpath_variable* new_xpath_variable(xpath_value_type type, const char_t* name) 8743 { 8744 switch (type) 8745 { 8746 case xpath_type_node_set: 8747 return new_xpath_variable<xpath_variable_node_set>(name); 8748 8749 case xpath_type_number: 8750 return new_xpath_variable<xpath_variable_number>(name); 8751 8752 case xpath_type_string: 8753 return new_xpath_variable<xpath_variable_string>(name); 8754 8755 case xpath_type_boolean: 8756 return new_xpath_variable<xpath_variable_boolean>(name); 8757 8758 default: 8759 return 0; 8760 } 8761 } 8762 delete_xpath_variable(T * var)8763 template <typename T> PUGI__FN void delete_xpath_variable(T* var) 8764 { 8765 var->~T(); 8766 xml_memory::deallocate(var); 8767 } 8768 delete_xpath_variable(xpath_value_type type,xpath_variable * var)8769 PUGI__FN void delete_xpath_variable(xpath_value_type type, xpath_variable* var) 8770 { 8771 switch (type) 8772 { 8773 case xpath_type_node_set: 8774 delete_xpath_variable(static_cast<xpath_variable_node_set*>(var)); 8775 break; 8776 8777 case xpath_type_number: 8778 delete_xpath_variable(static_cast<xpath_variable_number*>(var)); 8779 break; 8780 8781 case xpath_type_string: 8782 delete_xpath_variable(static_cast<xpath_variable_string*>(var)); 8783 break; 8784 8785 case xpath_type_boolean: 8786 delete_xpath_variable(static_cast<xpath_variable_boolean*>(var)); 8787 break; 8788 8789 default: 8790 assert(false && "Invalid variable type"); // unreachable 8791 } 8792 } 8793 copy_xpath_variable(xpath_variable * lhs,const xpath_variable * rhs)8794 PUGI__FN bool copy_xpath_variable(xpath_variable* lhs, const xpath_variable* rhs) 8795 { 8796 switch (rhs->type()) 8797 { 8798 case xpath_type_node_set: 8799 return lhs->set(static_cast<const xpath_variable_node_set*>(rhs)->value); 8800 8801 case xpath_type_number: 8802 return lhs->set(static_cast<const xpath_variable_number*>(rhs)->value); 8803 8804 case xpath_type_string: 8805 return lhs->set(static_cast<const xpath_variable_string*>(rhs)->value); 8806 8807 case xpath_type_boolean: 8808 return lhs->set(static_cast<const xpath_variable_boolean*>(rhs)->value); 8809 8810 default: 8811 assert(false && "Invalid variable type"); // unreachable 8812 return false; 8813 } 8814 } 8815 get_variable_scratch(char_t (& buffer)[32],xpath_variable_set * set,const char_t * begin,const char_t * end,xpath_variable ** out_result)8816 PUGI__FN bool get_variable_scratch(char_t (&buffer)[32], xpath_variable_set* set, const char_t* begin, const char_t* end, xpath_variable** out_result) 8817 { 8818 size_t length = static_cast<size_t>(end - begin); 8819 char_t* scratch = buffer; 8820 8821 if (length >= sizeof(buffer) / sizeof(buffer[0])) 8822 { 8823 // need to make dummy on-heap copy 8824 scratch = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t))); 8825 if (!scratch) return false; 8826 } 8827 8828 // copy string to zero-terminated buffer and perform lookup 8829 memcpy(scratch, begin, length * sizeof(char_t)); 8830 scratch[length] = 0; 8831 8832 *out_result = set->get(scratch); 8833 8834 // free dummy buffer 8835 if (scratch != buffer) xml_memory::deallocate(scratch); 8836 8837 return true; 8838 } 8839 PUGI__NS_END 8840 8841 // Internal node set class 8842 PUGI__NS_BEGIN xpath_get_order(const xpath_node * begin,const xpath_node * end)8843 PUGI__FN xpath_node_set::type_t xpath_get_order(const xpath_node* begin, const xpath_node* end) 8844 { 8845 if (end - begin < 2) 8846 return xpath_node_set::type_sorted; 8847 8848 document_order_comparator cmp; 8849 8850 bool first = cmp(begin[0], begin[1]); 8851 8852 for (const xpath_node* it = begin + 1; it + 1 < end; ++it) 8853 if (cmp(it[0], it[1]) != first) 8854 return xpath_node_set::type_unsorted; 8855 8856 return first ? xpath_node_set::type_sorted : xpath_node_set::type_sorted_reverse; 8857 } 8858 xpath_sort(xpath_node * begin,xpath_node * end,xpath_node_set::type_t type,bool rev)8859 PUGI__FN xpath_node_set::type_t xpath_sort(xpath_node* begin, xpath_node* end, xpath_node_set::type_t type, bool rev) 8860 { 8861 xpath_node_set::type_t order = rev ? xpath_node_set::type_sorted_reverse : xpath_node_set::type_sorted; 8862 8863 if (type == xpath_node_set::type_unsorted) 8864 { 8865 xpath_node_set::type_t sorted = xpath_get_order(begin, end); 8866 8867 if (sorted == xpath_node_set::type_unsorted) 8868 { 8869 sort(begin, end, document_order_comparator()); 8870 8871 type = xpath_node_set::type_sorted; 8872 } 8873 else 8874 type = sorted; 8875 } 8876 8877 if (type != order) reverse(begin, end); 8878 8879 return order; 8880 } 8881 xpath_first(const xpath_node * begin,const xpath_node * end,xpath_node_set::type_t type)8882 PUGI__FN xpath_node xpath_first(const xpath_node* begin, const xpath_node* end, xpath_node_set::type_t type) 8883 { 8884 if (begin == end) return xpath_node(); 8885 8886 switch (type) 8887 { 8888 case xpath_node_set::type_sorted: 8889 return *begin; 8890 8891 case xpath_node_set::type_sorted_reverse: 8892 return *(end - 1); 8893 8894 case xpath_node_set::type_unsorted: 8895 return *min_element(begin, end, document_order_comparator()); 8896 8897 default: 8898 assert(false && "Invalid node set type"); // unreachable 8899 return xpath_node(); 8900 } 8901 } 8902 8903 class xpath_node_set_raw 8904 { 8905 xpath_node_set::type_t _type; 8906 8907 xpath_node* _begin; 8908 xpath_node* _end; 8909 xpath_node* _eos; 8910 8911 public: xpath_node_set_raw()8912 xpath_node_set_raw(): _type(xpath_node_set::type_unsorted), _begin(0), _end(0), _eos(0) 8913 { 8914 } 8915 begin() const8916 xpath_node* begin() const 8917 { 8918 return _begin; 8919 } 8920 end() const8921 xpath_node* end() const 8922 { 8923 return _end; 8924 } 8925 empty() const8926 bool empty() const 8927 { 8928 return _begin == _end; 8929 } 8930 size() const8931 size_t size() const 8932 { 8933 return static_cast<size_t>(_end - _begin); 8934 } 8935 first() const8936 xpath_node first() const 8937 { 8938 return xpath_first(_begin, _end, _type); 8939 } 8940 8941 void push_back_grow(const xpath_node& node, xpath_allocator* alloc); 8942 push_back(const xpath_node & node,xpath_allocator * alloc)8943 void push_back(const xpath_node& node, xpath_allocator* alloc) 8944 { 8945 if (_end != _eos) 8946 *_end++ = node; 8947 else 8948 push_back_grow(node, alloc); 8949 } 8950 append(const xpath_node * begin_,const xpath_node * end_,xpath_allocator * alloc)8951 void append(const xpath_node* begin_, const xpath_node* end_, xpath_allocator* alloc) 8952 { 8953 if (begin_ == end_) return; 8954 8955 size_t size_ = static_cast<size_t>(_end - _begin); 8956 size_t capacity = static_cast<size_t>(_eos - _begin); 8957 size_t count = static_cast<size_t>(end_ - begin_); 8958 8959 if (size_ + count > capacity) 8960 { 8961 // reallocate the old array or allocate a new one 8962 xpath_node* data = static_cast<xpath_node*>(alloc->reallocate(_begin, capacity * sizeof(xpath_node), (size_ + count) * sizeof(xpath_node))); 8963 if (!data) return; 8964 8965 // finalize 8966 _begin = data; 8967 _end = data + size_; 8968 _eos = data + size_ + count; 8969 } 8970 8971 memcpy(_end, begin_, count * sizeof(xpath_node)); 8972 _end += count; 8973 } 8974 sort_do()8975 void sort_do() 8976 { 8977 _type = xpath_sort(_begin, _end, _type, false); 8978 } 8979 truncate(xpath_node * pos)8980 void truncate(xpath_node* pos) 8981 { 8982 assert(_begin <= pos && pos <= _end); 8983 8984 _end = pos; 8985 } 8986 remove_duplicates(xpath_allocator * alloc)8987 void remove_duplicates(xpath_allocator* alloc) 8988 { 8989 if (_type == xpath_node_set::type_unsorted && _end - _begin > 2) 8990 { 8991 xpath_allocator_capture cr(alloc); 8992 8993 size_t size_ = static_cast<size_t>(_end - _begin); 8994 8995 size_t hash_size = 1; 8996 while (hash_size < size_ + size_ / 2) hash_size *= 2; 8997 8998 const void** hash_data = static_cast<const void**>(alloc->allocate(hash_size * sizeof(void**))); 8999 if (!hash_data) return; 9000 9001 memset(hash_data, 0, hash_size * sizeof(const void**)); 9002 9003 xpath_node* write = _begin; 9004 9005 for (xpath_node* it = _begin; it != _end; ++it) 9006 { 9007 const void* attr = it->attribute().internal_object(); 9008 const void* node = it->node().internal_object(); 9009 const void* key = attr ? attr : node; 9010 9011 if (key && hash_insert(hash_data, hash_size, key)) 9012 { 9013 *write++ = *it; 9014 } 9015 } 9016 9017 _end = write; 9018 } 9019 else 9020 { 9021 _end = unique(_begin, _end); 9022 } 9023 } 9024 type() const9025 xpath_node_set::type_t type() const 9026 { 9027 return _type; 9028 } 9029 set_type(xpath_node_set::type_t value)9030 void set_type(xpath_node_set::type_t value) 9031 { 9032 _type = value; 9033 } 9034 }; 9035 push_back_grow(const xpath_node & node,xpath_allocator * alloc)9036 PUGI__FN_NO_INLINE void xpath_node_set_raw::push_back_grow(const xpath_node& node, xpath_allocator* alloc) 9037 { 9038 size_t capacity = static_cast<size_t>(_eos - _begin); 9039 9040 // get new capacity (1.5x rule) 9041 size_t new_capacity = capacity + capacity / 2 + 1; 9042 9043 // reallocate the old array or allocate a new one 9044 xpath_node* data = static_cast<xpath_node*>(alloc->reallocate(_begin, capacity * sizeof(xpath_node), new_capacity * sizeof(xpath_node))); 9045 if (!data) return; 9046 9047 // finalize 9048 _begin = data; 9049 _end = data + capacity; 9050 _eos = data + new_capacity; 9051 9052 // push 9053 *_end++ = node; 9054 } 9055 PUGI__NS_END 9056 9057 PUGI__NS_BEGIN 9058 struct xpath_context 9059 { 9060 xpath_node n; 9061 size_t position, size; 9062 xpath_contextxpath_context9063 xpath_context(const xpath_node& n_, size_t position_, size_t size_): n(n_), position(position_), size(size_) 9064 { 9065 } 9066 }; 9067 9068 enum lexeme_t 9069 { 9070 lex_none = 0, 9071 lex_equal, 9072 lex_not_equal, 9073 lex_less, 9074 lex_greater, 9075 lex_less_or_equal, 9076 lex_greater_or_equal, 9077 lex_plus, 9078 lex_minus, 9079 lex_multiply, 9080 lex_union, 9081 lex_var_ref, 9082 lex_open_brace, 9083 lex_close_brace, 9084 lex_quoted_string, 9085 lex_number, 9086 lex_slash, 9087 lex_double_slash, 9088 lex_open_square_brace, 9089 lex_close_square_brace, 9090 lex_string, 9091 lex_comma, 9092 lex_axis_attribute, 9093 lex_dot, 9094 lex_double_dot, 9095 lex_double_colon, 9096 lex_eof 9097 }; 9098 9099 struct xpath_lexer_string 9100 { 9101 const char_t* begin; 9102 const char_t* end; 9103 xpath_lexer_stringxpath_lexer_string9104 xpath_lexer_string(): begin(0), end(0) 9105 { 9106 } 9107 operator ==xpath_lexer_string9108 bool operator==(const char_t* other) const 9109 { 9110 size_t length = static_cast<size_t>(end - begin); 9111 9112 return strequalrange(other, begin, length); 9113 } 9114 }; 9115 9116 class xpath_lexer 9117 { 9118 const char_t* _cur; 9119 const char_t* _cur_lexeme_pos; 9120 xpath_lexer_string _cur_lexeme_contents; 9121 9122 lexeme_t _cur_lexeme; 9123 9124 public: xpath_lexer(const char_t * query)9125 explicit xpath_lexer(const char_t* query): _cur(query) 9126 { 9127 next(); 9128 } 9129 state() const9130 const char_t* state() const 9131 { 9132 return _cur; 9133 } 9134 next()9135 void next() 9136 { 9137 const char_t* cur = _cur; 9138 9139 while (PUGI__IS_CHARTYPE(*cur, ct_space)) ++cur; 9140 9141 // save lexeme position for error reporting 9142 _cur_lexeme_pos = cur; 9143 9144 switch (*cur) 9145 { 9146 case 0: 9147 _cur_lexeme = lex_eof; 9148 break; 9149 9150 case '>': 9151 if (*(cur+1) == '=') 9152 { 9153 cur += 2; 9154 _cur_lexeme = lex_greater_or_equal; 9155 } 9156 else 9157 { 9158 cur += 1; 9159 _cur_lexeme = lex_greater; 9160 } 9161 break; 9162 9163 case '<': 9164 if (*(cur+1) == '=') 9165 { 9166 cur += 2; 9167 _cur_lexeme = lex_less_or_equal; 9168 } 9169 else 9170 { 9171 cur += 1; 9172 _cur_lexeme = lex_less; 9173 } 9174 break; 9175 9176 case '!': 9177 if (*(cur+1) == '=') 9178 { 9179 cur += 2; 9180 _cur_lexeme = lex_not_equal; 9181 } 9182 else 9183 { 9184 _cur_lexeme = lex_none; 9185 } 9186 break; 9187 9188 case '=': 9189 cur += 1; 9190 _cur_lexeme = lex_equal; 9191 9192 break; 9193 9194 case '+': 9195 cur += 1; 9196 _cur_lexeme = lex_plus; 9197 9198 break; 9199 9200 case '-': 9201 cur += 1; 9202 _cur_lexeme = lex_minus; 9203 9204 break; 9205 9206 case '*': 9207 cur += 1; 9208 _cur_lexeme = lex_multiply; 9209 9210 break; 9211 9212 case '|': 9213 cur += 1; 9214 _cur_lexeme = lex_union; 9215 9216 break; 9217 9218 case '$': 9219 cur += 1; 9220 9221 if (PUGI__IS_CHARTYPEX(*cur, ctx_start_symbol)) 9222 { 9223 _cur_lexeme_contents.begin = cur; 9224 9225 while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++; 9226 9227 if (cur[0] == ':' && PUGI__IS_CHARTYPEX(cur[1], ctx_symbol)) // qname 9228 { 9229 cur++; // : 9230 9231 while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++; 9232 } 9233 9234 _cur_lexeme_contents.end = cur; 9235 9236 _cur_lexeme = lex_var_ref; 9237 } 9238 else 9239 { 9240 _cur_lexeme = lex_none; 9241 } 9242 9243 break; 9244 9245 case '(': 9246 cur += 1; 9247 _cur_lexeme = lex_open_brace; 9248 9249 break; 9250 9251 case ')': 9252 cur += 1; 9253 _cur_lexeme = lex_close_brace; 9254 9255 break; 9256 9257 case '[': 9258 cur += 1; 9259 _cur_lexeme = lex_open_square_brace; 9260 9261 break; 9262 9263 case ']': 9264 cur += 1; 9265 _cur_lexeme = lex_close_square_brace; 9266 9267 break; 9268 9269 case ',': 9270 cur += 1; 9271 _cur_lexeme = lex_comma; 9272 9273 break; 9274 9275 case '/': 9276 if (*(cur+1) == '/') 9277 { 9278 cur += 2; 9279 _cur_lexeme = lex_double_slash; 9280 } 9281 else 9282 { 9283 cur += 1; 9284 _cur_lexeme = lex_slash; 9285 } 9286 break; 9287 9288 case '.': 9289 if (*(cur+1) == '.') 9290 { 9291 cur += 2; 9292 _cur_lexeme = lex_double_dot; 9293 } 9294 else if (PUGI__IS_CHARTYPEX(*(cur+1), ctx_digit)) 9295 { 9296 _cur_lexeme_contents.begin = cur; // . 9297 9298 ++cur; 9299 9300 while (PUGI__IS_CHARTYPEX(*cur, ctx_digit)) cur++; 9301 9302 _cur_lexeme_contents.end = cur; 9303 9304 _cur_lexeme = lex_number; 9305 } 9306 else 9307 { 9308 cur += 1; 9309 _cur_lexeme = lex_dot; 9310 } 9311 break; 9312 9313 case '@': 9314 cur += 1; 9315 _cur_lexeme = lex_axis_attribute; 9316 9317 break; 9318 9319 case '"': 9320 case '\'': 9321 { 9322 char_t terminator = *cur; 9323 9324 ++cur; 9325 9326 _cur_lexeme_contents.begin = cur; 9327 while (*cur && *cur != terminator) cur++; 9328 _cur_lexeme_contents.end = cur; 9329 9330 if (!*cur) 9331 _cur_lexeme = lex_none; 9332 else 9333 { 9334 cur += 1; 9335 _cur_lexeme = lex_quoted_string; 9336 } 9337 9338 break; 9339 } 9340 9341 case ':': 9342 if (*(cur+1) == ':') 9343 { 9344 cur += 2; 9345 _cur_lexeme = lex_double_colon; 9346 } 9347 else 9348 { 9349 _cur_lexeme = lex_none; 9350 } 9351 break; 9352 9353 default: 9354 if (PUGI__IS_CHARTYPEX(*cur, ctx_digit)) 9355 { 9356 _cur_lexeme_contents.begin = cur; 9357 9358 while (PUGI__IS_CHARTYPEX(*cur, ctx_digit)) cur++; 9359 9360 if (*cur == '.') 9361 { 9362 cur++; 9363 9364 while (PUGI__IS_CHARTYPEX(*cur, ctx_digit)) cur++; 9365 } 9366 9367 _cur_lexeme_contents.end = cur; 9368 9369 _cur_lexeme = lex_number; 9370 } 9371 else if (PUGI__IS_CHARTYPEX(*cur, ctx_start_symbol)) 9372 { 9373 _cur_lexeme_contents.begin = cur; 9374 9375 while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++; 9376 9377 if (cur[0] == ':') 9378 { 9379 if (cur[1] == '*') // namespace test ncname:* 9380 { 9381 cur += 2; // :* 9382 } 9383 else if (PUGI__IS_CHARTYPEX(cur[1], ctx_symbol)) // namespace test qname 9384 { 9385 cur++; // : 9386 9387 while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++; 9388 } 9389 } 9390 9391 _cur_lexeme_contents.end = cur; 9392 9393 _cur_lexeme = lex_string; 9394 } 9395 else 9396 { 9397 _cur_lexeme = lex_none; 9398 } 9399 } 9400 9401 _cur = cur; 9402 } 9403 current() const9404 lexeme_t current() const 9405 { 9406 return _cur_lexeme; 9407 } 9408 current_pos() const9409 const char_t* current_pos() const 9410 { 9411 return _cur_lexeme_pos; 9412 } 9413 contents() const9414 const xpath_lexer_string& contents() const 9415 { 9416 assert(_cur_lexeme == lex_var_ref || _cur_lexeme == lex_number || _cur_lexeme == lex_string || _cur_lexeme == lex_quoted_string); 9417 9418 return _cur_lexeme_contents; 9419 } 9420 }; 9421 9422 enum ast_type_t 9423 { 9424 ast_unknown, 9425 ast_op_or, // left or right 9426 ast_op_and, // left and right 9427 ast_op_equal, // left = right 9428 ast_op_not_equal, // left != right 9429 ast_op_less, // left < right 9430 ast_op_greater, // left > right 9431 ast_op_less_or_equal, // left <= right 9432 ast_op_greater_or_equal, // left >= right 9433 ast_op_add, // left + right 9434 ast_op_subtract, // left - right 9435 ast_op_multiply, // left * right 9436 ast_op_divide, // left / right 9437 ast_op_mod, // left % right 9438 ast_op_negate, // left - right 9439 ast_op_union, // left | right 9440 ast_predicate, // apply predicate to set; next points to next predicate 9441 ast_filter, // select * from left where right 9442 ast_string_constant, // string constant 9443 ast_number_constant, // number constant 9444 ast_variable, // variable 9445 ast_func_last, // last() 9446 ast_func_position, // position() 9447 ast_func_count, // count(left) 9448 ast_func_id, // id(left) 9449 ast_func_local_name_0, // local-name() 9450 ast_func_local_name_1, // local-name(left) 9451 ast_func_namespace_uri_0, // namespace-uri() 9452 ast_func_namespace_uri_1, // namespace-uri(left) 9453 ast_func_name_0, // name() 9454 ast_func_name_1, // name(left) 9455 ast_func_string_0, // string() 9456 ast_func_string_1, // string(left) 9457 ast_func_concat, // concat(left, right, siblings) 9458 ast_func_starts_with, // starts_with(left, right) 9459 ast_func_contains, // contains(left, right) 9460 ast_func_substring_before, // substring-before(left, right) 9461 ast_func_substring_after, // substring-after(left, right) 9462 ast_func_substring_2, // substring(left, right) 9463 ast_func_substring_3, // substring(left, right, third) 9464 ast_func_string_length_0, // string-length() 9465 ast_func_string_length_1, // string-length(left) 9466 ast_func_normalize_space_0, // normalize-space() 9467 ast_func_normalize_space_1, // normalize-space(left) 9468 ast_func_translate, // translate(left, right, third) 9469 ast_func_boolean, // boolean(left) 9470 ast_func_not, // not(left) 9471 ast_func_true, // true() 9472 ast_func_false, // false() 9473 ast_func_lang, // lang(left) 9474 ast_func_number_0, // number() 9475 ast_func_number_1, // number(left) 9476 ast_func_sum, // sum(left) 9477 ast_func_floor, // floor(left) 9478 ast_func_ceiling, // ceiling(left) 9479 ast_func_round, // round(left) 9480 ast_step, // process set left with step 9481 ast_step_root, // select root node 9482 9483 ast_opt_translate_table, // translate(left, right, third) where right/third are constants 9484 ast_opt_compare_attribute // @name = 'string' 9485 }; 9486 9487 enum axis_t 9488 { 9489 axis_ancestor, 9490 axis_ancestor_or_self, 9491 axis_attribute, 9492 axis_child, 9493 axis_descendant, 9494 axis_descendant_or_self, 9495 axis_following, 9496 axis_following_sibling, 9497 axis_namespace, 9498 axis_parent, 9499 axis_preceding, 9500 axis_preceding_sibling, 9501 axis_self 9502 }; 9503 9504 enum nodetest_t 9505 { 9506 nodetest_none, 9507 nodetest_name, 9508 nodetest_type_node, 9509 nodetest_type_comment, 9510 nodetest_type_pi, 9511 nodetest_type_text, 9512 nodetest_pi, 9513 nodetest_all, 9514 nodetest_all_in_namespace 9515 }; 9516 9517 enum predicate_t 9518 { 9519 predicate_default, 9520 predicate_posinv, 9521 predicate_constant, 9522 predicate_constant_one 9523 }; 9524 9525 enum nodeset_eval_t 9526 { 9527 nodeset_eval_all, 9528 nodeset_eval_any, 9529 nodeset_eval_first 9530 }; 9531 9532 template <axis_t N> struct axis_to_type 9533 { 9534 static const axis_t axis; 9535 }; 9536 9537 template <axis_t N> const axis_t axis_to_type<N>::axis = N; 9538 9539 class xpath_ast_node 9540 { 9541 private: 9542 // node type 9543 char _type; 9544 char _rettype; 9545 9546 // for ast_step 9547 char _axis; 9548 9549 // for ast_step/ast_predicate/ast_filter 9550 char _test; 9551 9552 // tree node structure 9553 xpath_ast_node* _left; 9554 xpath_ast_node* _right; 9555 xpath_ast_node* _next; 9556 9557 union 9558 { 9559 // value for ast_string_constant 9560 const char_t* string; 9561 // value for ast_number_constant 9562 double number; 9563 // variable for ast_variable 9564 xpath_variable* variable; 9565 // node test for ast_step (node name/namespace/node type/pi target) 9566 const char_t* nodetest; 9567 // table for ast_opt_translate_table 9568 const unsigned char* table; 9569 } _data; 9570 9571 xpath_ast_node(const xpath_ast_node&); 9572 xpath_ast_node& operator=(const xpath_ast_node&); 9573 compare_eq(xpath_ast_node * lhs,xpath_ast_node * rhs,const xpath_context & c,const xpath_stack & stack,const Comp & comp)9574 template <class Comp> static bool compare_eq(xpath_ast_node* lhs, xpath_ast_node* rhs, const xpath_context& c, const xpath_stack& stack, const Comp& comp) 9575 { 9576 xpath_value_type lt = lhs->rettype(), rt = rhs->rettype(); 9577 9578 if (lt != xpath_type_node_set && rt != xpath_type_node_set) 9579 { 9580 if (lt == xpath_type_boolean || rt == xpath_type_boolean) 9581 return comp(lhs->eval_boolean(c, stack), rhs->eval_boolean(c, stack)); 9582 else if (lt == xpath_type_number || rt == xpath_type_number) 9583 return comp(lhs->eval_number(c, stack), rhs->eval_number(c, stack)); 9584 else if (lt == xpath_type_string || rt == xpath_type_string) 9585 { 9586 xpath_allocator_capture cr(stack.result); 9587 9588 xpath_string ls = lhs->eval_string(c, stack); 9589 xpath_string rs = rhs->eval_string(c, stack); 9590 9591 return comp(ls, rs); 9592 } 9593 } 9594 else if (lt == xpath_type_node_set && rt == xpath_type_node_set) 9595 { 9596 xpath_allocator_capture cr(stack.result); 9597 9598 xpath_node_set_raw ls = lhs->eval_node_set(c, stack, nodeset_eval_all); 9599 xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all); 9600 9601 for (const xpath_node* li = ls.begin(); li != ls.end(); ++li) 9602 for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri) 9603 { 9604 xpath_allocator_capture cri(stack.result); 9605 9606 if (comp(string_value(*li, stack.result), string_value(*ri, stack.result))) 9607 return true; 9608 } 9609 9610 return false; 9611 } 9612 else 9613 { 9614 if (lt == xpath_type_node_set) 9615 { 9616 swap(lhs, rhs); 9617 swap(lt, rt); 9618 } 9619 9620 if (lt == xpath_type_boolean) 9621 return comp(lhs->eval_boolean(c, stack), rhs->eval_boolean(c, stack)); 9622 else if (lt == xpath_type_number) 9623 { 9624 xpath_allocator_capture cr(stack.result); 9625 9626 double l = lhs->eval_number(c, stack); 9627 xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all); 9628 9629 for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri) 9630 { 9631 xpath_allocator_capture cri(stack.result); 9632 9633 if (comp(l, convert_string_to_number(string_value(*ri, stack.result).c_str()))) 9634 return true; 9635 } 9636 9637 return false; 9638 } 9639 else if (lt == xpath_type_string) 9640 { 9641 xpath_allocator_capture cr(stack.result); 9642 9643 xpath_string l = lhs->eval_string(c, stack); 9644 xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all); 9645 9646 for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri) 9647 { 9648 xpath_allocator_capture cri(stack.result); 9649 9650 if (comp(l, string_value(*ri, stack.result))) 9651 return true; 9652 } 9653 9654 return false; 9655 } 9656 } 9657 9658 assert(false && "Wrong types"); // unreachable 9659 return false; 9660 } 9661 eval_once(xpath_node_set::type_t type,nodeset_eval_t eval)9662 static bool eval_once(xpath_node_set::type_t type, nodeset_eval_t eval) 9663 { 9664 return type == xpath_node_set::type_sorted ? eval != nodeset_eval_all : eval == nodeset_eval_any; 9665 } 9666 compare_rel(xpath_ast_node * lhs,xpath_ast_node * rhs,const xpath_context & c,const xpath_stack & stack,const Comp & comp)9667 template <class Comp> static bool compare_rel(xpath_ast_node* lhs, xpath_ast_node* rhs, const xpath_context& c, const xpath_stack& stack, const Comp& comp) 9668 { 9669 xpath_value_type lt = lhs->rettype(), rt = rhs->rettype(); 9670 9671 if (lt != xpath_type_node_set && rt != xpath_type_node_set) 9672 return comp(lhs->eval_number(c, stack), rhs->eval_number(c, stack)); 9673 else if (lt == xpath_type_node_set && rt == xpath_type_node_set) 9674 { 9675 xpath_allocator_capture cr(stack.result); 9676 9677 xpath_node_set_raw ls = lhs->eval_node_set(c, stack, nodeset_eval_all); 9678 xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all); 9679 9680 for (const xpath_node* li = ls.begin(); li != ls.end(); ++li) 9681 { 9682 xpath_allocator_capture cri(stack.result); 9683 9684 double l = convert_string_to_number(string_value(*li, stack.result).c_str()); 9685 9686 for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri) 9687 { 9688 xpath_allocator_capture crii(stack.result); 9689 9690 if (comp(l, convert_string_to_number(string_value(*ri, stack.result).c_str()))) 9691 return true; 9692 } 9693 } 9694 9695 return false; 9696 } 9697 else if (lt != xpath_type_node_set && rt == xpath_type_node_set) 9698 { 9699 xpath_allocator_capture cr(stack.result); 9700 9701 double l = lhs->eval_number(c, stack); 9702 xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all); 9703 9704 for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri) 9705 { 9706 xpath_allocator_capture cri(stack.result); 9707 9708 if (comp(l, convert_string_to_number(string_value(*ri, stack.result).c_str()))) 9709 return true; 9710 } 9711 9712 return false; 9713 } 9714 else if (lt == xpath_type_node_set && rt != xpath_type_node_set) 9715 { 9716 xpath_allocator_capture cr(stack.result); 9717 9718 xpath_node_set_raw ls = lhs->eval_node_set(c, stack, nodeset_eval_all); 9719 double r = rhs->eval_number(c, stack); 9720 9721 for (const xpath_node* li = ls.begin(); li != ls.end(); ++li) 9722 { 9723 xpath_allocator_capture cri(stack.result); 9724 9725 if (comp(convert_string_to_number(string_value(*li, stack.result).c_str()), r)) 9726 return true; 9727 } 9728 9729 return false; 9730 } 9731 else 9732 { 9733 assert(false && "Wrong types"); // unreachable 9734 return false; 9735 } 9736 } 9737 apply_predicate_boolean(xpath_node_set_raw & ns,size_t first,xpath_ast_node * expr,const xpath_stack & stack,bool once)9738 static void apply_predicate_boolean(xpath_node_set_raw& ns, size_t first, xpath_ast_node* expr, const xpath_stack& stack, bool once) 9739 { 9740 assert(ns.size() >= first); 9741 assert(expr->rettype() != xpath_type_number); 9742 9743 size_t i = 1; 9744 size_t size = ns.size() - first; 9745 9746 xpath_node* last = ns.begin() + first; 9747 9748 // remove_if... or well, sort of 9749 for (xpath_node* it = last; it != ns.end(); ++it, ++i) 9750 { 9751 xpath_context c(*it, i, size); 9752 9753 if (expr->eval_boolean(c, stack)) 9754 { 9755 *last++ = *it; 9756 9757 if (once) break; 9758 } 9759 } 9760 9761 ns.truncate(last); 9762 } 9763 apply_predicate_number(xpath_node_set_raw & ns,size_t first,xpath_ast_node * expr,const xpath_stack & stack,bool once)9764 static void apply_predicate_number(xpath_node_set_raw& ns, size_t first, xpath_ast_node* expr, const xpath_stack& stack, bool once) 9765 { 9766 assert(ns.size() >= first); 9767 assert(expr->rettype() == xpath_type_number); 9768 9769 size_t i = 1; 9770 size_t size = ns.size() - first; 9771 9772 xpath_node* last = ns.begin() + first; 9773 9774 // remove_if... or well, sort of 9775 for (xpath_node* it = last; it != ns.end(); ++it, ++i) 9776 { 9777 xpath_context c(*it, i, size); 9778 9779 if (expr->eval_number(c, stack) == static_cast<double>(i)) 9780 { 9781 *last++ = *it; 9782 9783 if (once) break; 9784 } 9785 } 9786 9787 ns.truncate(last); 9788 } 9789 apply_predicate_number_const(xpath_node_set_raw & ns,size_t first,xpath_ast_node * expr,const xpath_stack & stack)9790 static void apply_predicate_number_const(xpath_node_set_raw& ns, size_t first, xpath_ast_node* expr, const xpath_stack& stack) 9791 { 9792 assert(ns.size() >= first); 9793 assert(expr->rettype() == xpath_type_number); 9794 9795 size_t size = ns.size() - first; 9796 9797 xpath_node* last = ns.begin() + first; 9798 9799 xpath_context c(xpath_node(), 1, size); 9800 9801 double er = expr->eval_number(c, stack); 9802 9803 if (er >= 1.0 && er <= static_cast<double>(size)) 9804 { 9805 size_t eri = static_cast<size_t>(er); 9806 9807 if (er == static_cast<double>(eri)) 9808 { 9809 xpath_node r = last[eri - 1]; 9810 9811 *last++ = r; 9812 } 9813 } 9814 9815 ns.truncate(last); 9816 } 9817 apply_predicate(xpath_node_set_raw & ns,size_t first,const xpath_stack & stack,bool once)9818 void apply_predicate(xpath_node_set_raw& ns, size_t first, const xpath_stack& stack, bool once) 9819 { 9820 if (ns.size() == first) return; 9821 9822 assert(_type == ast_filter || _type == ast_predicate); 9823 9824 if (_test == predicate_constant || _test == predicate_constant_one) 9825 apply_predicate_number_const(ns, first, _right, stack); 9826 else if (_right->rettype() == xpath_type_number) 9827 apply_predicate_number(ns, first, _right, stack, once); 9828 else 9829 apply_predicate_boolean(ns, first, _right, stack, once); 9830 } 9831 apply_predicates(xpath_node_set_raw & ns,size_t first,const xpath_stack & stack,nodeset_eval_t eval)9832 void apply_predicates(xpath_node_set_raw& ns, size_t first, const xpath_stack& stack, nodeset_eval_t eval) 9833 { 9834 if (ns.size() == first) return; 9835 9836 bool last_once = eval_once(ns.type(), eval); 9837 9838 for (xpath_ast_node* pred = _right; pred; pred = pred->_next) 9839 pred->apply_predicate(ns, first, stack, !pred->_next && last_once); 9840 } 9841 step_push(xpath_node_set_raw & ns,xml_attribute_struct * a,xml_node_struct * parent,xpath_allocator * alloc)9842 bool step_push(xpath_node_set_raw& ns, xml_attribute_struct* a, xml_node_struct* parent, xpath_allocator* alloc) 9843 { 9844 assert(a); 9845 9846 const char_t* name = a->name ? a->name + 0 : PUGIXML_TEXT(""); 9847 9848 switch (_test) 9849 { 9850 case nodetest_name: 9851 if (strequal(name, _data.nodetest) && is_xpath_attribute(name)) 9852 { 9853 ns.push_back(xpath_node(xml_attribute(a), xml_node(parent)), alloc); 9854 return true; 9855 } 9856 break; 9857 9858 case nodetest_type_node: 9859 case nodetest_all: 9860 if (is_xpath_attribute(name)) 9861 { 9862 ns.push_back(xpath_node(xml_attribute(a), xml_node(parent)), alloc); 9863 return true; 9864 } 9865 break; 9866 9867 case nodetest_all_in_namespace: 9868 if (starts_with(name, _data.nodetest) && is_xpath_attribute(name)) 9869 { 9870 ns.push_back(xpath_node(xml_attribute(a), xml_node(parent)), alloc); 9871 return true; 9872 } 9873 break; 9874 9875 default: 9876 ; 9877 } 9878 9879 return false; 9880 } 9881 step_push(xpath_node_set_raw & ns,xml_node_struct * n,xpath_allocator * alloc)9882 bool step_push(xpath_node_set_raw& ns, xml_node_struct* n, xpath_allocator* alloc) 9883 { 9884 assert(n); 9885 9886 xml_node_type type = PUGI__NODETYPE(n); 9887 9888 switch (_test) 9889 { 9890 case nodetest_name: 9891 if (type == node_element && n->name && strequal(n->name, _data.nodetest)) 9892 { 9893 ns.push_back(xml_node(n), alloc); 9894 return true; 9895 } 9896 break; 9897 9898 case nodetest_type_node: 9899 ns.push_back(xml_node(n), alloc); 9900 return true; 9901 9902 case nodetest_type_comment: 9903 if (type == node_comment) 9904 { 9905 ns.push_back(xml_node(n), alloc); 9906 return true; 9907 } 9908 break; 9909 9910 case nodetest_type_text: 9911 if (type == node_pcdata || type == node_cdata) 9912 { 9913 ns.push_back(xml_node(n), alloc); 9914 return true; 9915 } 9916 break; 9917 9918 case nodetest_type_pi: 9919 if (type == node_pi) 9920 { 9921 ns.push_back(xml_node(n), alloc); 9922 return true; 9923 } 9924 break; 9925 9926 case nodetest_pi: 9927 if (type == node_pi && n->name && strequal(n->name, _data.nodetest)) 9928 { 9929 ns.push_back(xml_node(n), alloc); 9930 return true; 9931 } 9932 break; 9933 9934 case nodetest_all: 9935 if (type == node_element) 9936 { 9937 ns.push_back(xml_node(n), alloc); 9938 return true; 9939 } 9940 break; 9941 9942 case nodetest_all_in_namespace: 9943 if (type == node_element && n->name && starts_with(n->name, _data.nodetest)) 9944 { 9945 ns.push_back(xml_node(n), alloc); 9946 return true; 9947 } 9948 break; 9949 9950 default: 9951 assert(false && "Unknown axis"); // unreachable 9952 } 9953 9954 return false; 9955 } 9956 step_fill(xpath_node_set_raw & ns,xml_node_struct * n,xpath_allocator * alloc,bool once,T)9957 template <class T> void step_fill(xpath_node_set_raw& ns, xml_node_struct* n, xpath_allocator* alloc, bool once, T) 9958 { 9959 const axis_t axis = T::axis; 9960 9961 switch (axis) 9962 { 9963 case axis_attribute: 9964 { 9965 for (xml_attribute_struct* a = n->first_attribute; a; a = a->next_attribute) 9966 if (step_push(ns, a, n, alloc) & once) 9967 return; 9968 9969 break; 9970 } 9971 9972 case axis_child: 9973 { 9974 for (xml_node_struct* c = n->first_child; c; c = c->next_sibling) 9975 if (step_push(ns, c, alloc) & once) 9976 return; 9977 9978 break; 9979 } 9980 9981 case axis_descendant: 9982 case axis_descendant_or_self: 9983 { 9984 if (axis == axis_descendant_or_self) 9985 if (step_push(ns, n, alloc) & once) 9986 return; 9987 9988 xml_node_struct* cur = n->first_child; 9989 9990 while (cur) 9991 { 9992 if (step_push(ns, cur, alloc) & once) 9993 return; 9994 9995 if (cur->first_child) 9996 cur = cur->first_child; 9997 else 9998 { 9999 while (!cur->next_sibling) 10000 { 10001 cur = cur->parent; 10002 10003 if (cur == n) return; 10004 } 10005 10006 cur = cur->next_sibling; 10007 } 10008 } 10009 10010 break; 10011 } 10012 10013 case axis_following_sibling: 10014 { 10015 for (xml_node_struct* c = n->next_sibling; c; c = c->next_sibling) 10016 if (step_push(ns, c, alloc) & once) 10017 return; 10018 10019 break; 10020 } 10021 10022 case axis_preceding_sibling: 10023 { 10024 for (xml_node_struct* c = n->prev_sibling_c; c->next_sibling; c = c->prev_sibling_c) 10025 if (step_push(ns, c, alloc) & once) 10026 return; 10027 10028 break; 10029 } 10030 10031 case axis_following: 10032 { 10033 xml_node_struct* cur = n; 10034 10035 // exit from this node so that we don't include descendants 10036 while (!cur->next_sibling) 10037 { 10038 cur = cur->parent; 10039 10040 if (!cur) return; 10041 } 10042 10043 cur = cur->next_sibling; 10044 10045 while (cur) 10046 { 10047 if (step_push(ns, cur, alloc) & once) 10048 return; 10049 10050 if (cur->first_child) 10051 cur = cur->first_child; 10052 else 10053 { 10054 while (!cur->next_sibling) 10055 { 10056 cur = cur->parent; 10057 10058 if (!cur) return; 10059 } 10060 10061 cur = cur->next_sibling; 10062 } 10063 } 10064 10065 break; 10066 } 10067 10068 case axis_preceding: 10069 { 10070 xml_node_struct* cur = n; 10071 10072 // exit from this node so that we don't include descendants 10073 while (!cur->prev_sibling_c->next_sibling) 10074 { 10075 cur = cur->parent; 10076 10077 if (!cur) return; 10078 } 10079 10080 cur = cur->prev_sibling_c; 10081 10082 while (cur) 10083 { 10084 if (cur->first_child) 10085 cur = cur->first_child->prev_sibling_c; 10086 else 10087 { 10088 // leaf node, can't be ancestor 10089 if (step_push(ns, cur, alloc) & once) 10090 return; 10091 10092 while (!cur->prev_sibling_c->next_sibling) 10093 { 10094 cur = cur->parent; 10095 10096 if (!cur) return; 10097 10098 if (!node_is_ancestor(cur, n)) 10099 if (step_push(ns, cur, alloc) & once) 10100 return; 10101 } 10102 10103 cur = cur->prev_sibling_c; 10104 } 10105 } 10106 10107 break; 10108 } 10109 10110 case axis_ancestor: 10111 case axis_ancestor_or_self: 10112 { 10113 if (axis == axis_ancestor_or_self) 10114 if (step_push(ns, n, alloc) & once) 10115 return; 10116 10117 xml_node_struct* cur = n->parent; 10118 10119 while (cur) 10120 { 10121 if (step_push(ns, cur, alloc) & once) 10122 return; 10123 10124 cur = cur->parent; 10125 } 10126 10127 break; 10128 } 10129 10130 case axis_self: 10131 { 10132 step_push(ns, n, alloc); 10133 10134 break; 10135 } 10136 10137 case axis_parent: 10138 { 10139 if (n->parent) 10140 step_push(ns, n->parent, alloc); 10141 10142 break; 10143 } 10144 10145 default: 10146 assert(false && "Unimplemented axis"); // unreachable 10147 } 10148 } 10149 step_fill(xpath_node_set_raw & ns,xml_attribute_struct * a,xml_node_struct * p,xpath_allocator * alloc,bool once,T v)10150 template <class T> void step_fill(xpath_node_set_raw& ns, xml_attribute_struct* a, xml_node_struct* p, xpath_allocator* alloc, bool once, T v) 10151 { 10152 const axis_t axis = T::axis; 10153 10154 switch (axis) 10155 { 10156 case axis_ancestor: 10157 case axis_ancestor_or_self: 10158 { 10159 if (axis == axis_ancestor_or_self && _test == nodetest_type_node) // reject attributes based on principal node type test 10160 if (step_push(ns, a, p, alloc) & once) 10161 return; 10162 10163 xml_node_struct* cur = p; 10164 10165 while (cur) 10166 { 10167 if (step_push(ns, cur, alloc) & once) 10168 return; 10169 10170 cur = cur->parent; 10171 } 10172 10173 break; 10174 } 10175 10176 case axis_descendant_or_self: 10177 case axis_self: 10178 { 10179 if (_test == nodetest_type_node) // reject attributes based on principal node type test 10180 step_push(ns, a, p, alloc); 10181 10182 break; 10183 } 10184 10185 case axis_following: 10186 { 10187 xml_node_struct* cur = p; 10188 10189 while (cur) 10190 { 10191 if (cur->first_child) 10192 cur = cur->first_child; 10193 else 10194 { 10195 while (!cur->next_sibling) 10196 { 10197 cur = cur->parent; 10198 10199 if (!cur) return; 10200 } 10201 10202 cur = cur->next_sibling; 10203 } 10204 10205 if (step_push(ns, cur, alloc) & once) 10206 return; 10207 } 10208 10209 break; 10210 } 10211 10212 case axis_parent: 10213 { 10214 step_push(ns, p, alloc); 10215 10216 break; 10217 } 10218 10219 case axis_preceding: 10220 { 10221 // preceding:: axis does not include attribute nodes and attribute ancestors (they are the same as parent's ancestors), so we can reuse node preceding 10222 step_fill(ns, p, alloc, once, v); 10223 break; 10224 } 10225 10226 default: 10227 assert(false && "Unimplemented axis"); // unreachable 10228 } 10229 } 10230 step_fill(xpath_node_set_raw & ns,const xpath_node & xn,xpath_allocator * alloc,bool once,T v)10231 template <class T> void step_fill(xpath_node_set_raw& ns, const xpath_node& xn, xpath_allocator* alloc, bool once, T v) 10232 { 10233 const axis_t axis = T::axis; 10234 const bool axis_has_attributes = (axis == axis_ancestor || axis == axis_ancestor_or_self || axis == axis_descendant_or_self || axis == axis_following || axis == axis_parent || axis == axis_preceding || axis == axis_self); 10235 10236 if (xn.node()) 10237 step_fill(ns, xn.node().internal_object(), alloc, once, v); 10238 else if (axis_has_attributes && xn.attribute() && xn.parent()) 10239 step_fill(ns, xn.attribute().internal_object(), xn.parent().internal_object(), alloc, once, v); 10240 } 10241 step_do(const xpath_context & c,const xpath_stack & stack,nodeset_eval_t eval,T v)10242 template <class T> xpath_node_set_raw step_do(const xpath_context& c, const xpath_stack& stack, nodeset_eval_t eval, T v) 10243 { 10244 const axis_t axis = T::axis; 10245 const bool axis_reverse = (axis == axis_ancestor || axis == axis_ancestor_or_self || axis == axis_preceding || axis == axis_preceding_sibling); 10246 const xpath_node_set::type_t axis_type = axis_reverse ? xpath_node_set::type_sorted_reverse : xpath_node_set::type_sorted; 10247 10248 bool once = 10249 (axis == axis_attribute && _test == nodetest_name) || 10250 (!_right && eval_once(axis_type, eval)) || 10251 // coverity[mixed_enums] 10252 (_right && !_right->_next && _right->_test == predicate_constant_one); 10253 10254 xpath_node_set_raw ns; 10255 ns.set_type(axis_type); 10256 10257 if (_left) 10258 { 10259 xpath_node_set_raw s = _left->eval_node_set(c, stack, nodeset_eval_all); 10260 10261 // self axis preserves the original order 10262 if (axis == axis_self) ns.set_type(s.type()); 10263 10264 for (const xpath_node* it = s.begin(); it != s.end(); ++it) 10265 { 10266 size_t size = ns.size(); 10267 10268 // in general, all axes generate elements in a particular order, but there is no order guarantee if axis is applied to two nodes 10269 if (axis != axis_self && size != 0) ns.set_type(xpath_node_set::type_unsorted); 10270 10271 step_fill(ns, *it, stack.result, once, v); 10272 if (_right) apply_predicates(ns, size, stack, eval); 10273 } 10274 } 10275 else 10276 { 10277 step_fill(ns, c.n, stack.result, once, v); 10278 if (_right) apply_predicates(ns, 0, stack, eval); 10279 } 10280 10281 // child, attribute and self axes always generate unique set of nodes 10282 // for other axis, if the set stayed sorted, it stayed unique because the traversal algorithms do not visit the same node twice 10283 if (axis != axis_child && axis != axis_attribute && axis != axis_self && ns.type() == xpath_node_set::type_unsorted) 10284 ns.remove_duplicates(stack.temp); 10285 10286 return ns; 10287 } 10288 10289 public: xpath_ast_node(ast_type_t type,xpath_value_type rettype_,const char_t * value)10290 xpath_ast_node(ast_type_t type, xpath_value_type rettype_, const char_t* value): 10291 _type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(0), _right(0), _next(0) 10292 { 10293 assert(type == ast_string_constant); 10294 _data.string = value; 10295 } 10296 xpath_ast_node(ast_type_t type,xpath_value_type rettype_,double value)10297 xpath_ast_node(ast_type_t type, xpath_value_type rettype_, double value): 10298 _type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(0), _right(0), _next(0) 10299 { 10300 assert(type == ast_number_constant); 10301 _data.number = value; 10302 } 10303 xpath_ast_node(ast_type_t type,xpath_value_type rettype_,xpath_variable * value)10304 xpath_ast_node(ast_type_t type, xpath_value_type rettype_, xpath_variable* value): 10305 _type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(0), _right(0), _next(0) 10306 { 10307 assert(type == ast_variable); 10308 _data.variable = value; 10309 } 10310 xpath_ast_node(ast_type_t type,xpath_value_type rettype_,xpath_ast_node * left=0,xpath_ast_node * right=0)10311 xpath_ast_node(ast_type_t type, xpath_value_type rettype_, xpath_ast_node* left = 0, xpath_ast_node* right = 0): 10312 _type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(left), _right(right), _next(0) 10313 { 10314 } 10315 xpath_ast_node(ast_type_t type,xpath_ast_node * left,axis_t axis,nodetest_t test,const char_t * contents)10316 xpath_ast_node(ast_type_t type, xpath_ast_node* left, axis_t axis, nodetest_t test, const char_t* contents): 10317 _type(static_cast<char>(type)), _rettype(xpath_type_node_set), _axis(static_cast<char>(axis)), _test(static_cast<char>(test)), _left(left), _right(0), _next(0) 10318 { 10319 assert(type == ast_step); 10320 _data.nodetest = contents; 10321 } 10322 xpath_ast_node(ast_type_t type,xpath_ast_node * left,xpath_ast_node * right,predicate_t test)10323 xpath_ast_node(ast_type_t type, xpath_ast_node* left, xpath_ast_node* right, predicate_t test): 10324 _type(static_cast<char>(type)), _rettype(xpath_type_node_set), _axis(0), _test(static_cast<char>(test)), _left(left), _right(right), _next(0) 10325 { 10326 assert(type == ast_filter || type == ast_predicate); 10327 } 10328 set_next(xpath_ast_node * value)10329 void set_next(xpath_ast_node* value) 10330 { 10331 _next = value; 10332 } 10333 set_right(xpath_ast_node * value)10334 void set_right(xpath_ast_node* value) 10335 { 10336 _right = value; 10337 } 10338 eval_boolean(const xpath_context & c,const xpath_stack & stack)10339 bool eval_boolean(const xpath_context& c, const xpath_stack& stack) 10340 { 10341 switch (_type) 10342 { 10343 case ast_op_or: 10344 return _left->eval_boolean(c, stack) || _right->eval_boolean(c, stack); 10345 10346 case ast_op_and: 10347 return _left->eval_boolean(c, stack) && _right->eval_boolean(c, stack); 10348 10349 case ast_op_equal: 10350 return compare_eq(_left, _right, c, stack, equal_to()); 10351 10352 case ast_op_not_equal: 10353 return compare_eq(_left, _right, c, stack, not_equal_to()); 10354 10355 case ast_op_less: 10356 return compare_rel(_left, _right, c, stack, less()); 10357 10358 case ast_op_greater: 10359 return compare_rel(_right, _left, c, stack, less()); 10360 10361 case ast_op_less_or_equal: 10362 return compare_rel(_left, _right, c, stack, less_equal()); 10363 10364 case ast_op_greater_or_equal: 10365 return compare_rel(_right, _left, c, stack, less_equal()); 10366 10367 case ast_func_starts_with: 10368 { 10369 xpath_allocator_capture cr(stack.result); 10370 10371 xpath_string lr = _left->eval_string(c, stack); 10372 xpath_string rr = _right->eval_string(c, stack); 10373 10374 return starts_with(lr.c_str(), rr.c_str()); 10375 } 10376 10377 case ast_func_contains: 10378 { 10379 xpath_allocator_capture cr(stack.result); 10380 10381 xpath_string lr = _left->eval_string(c, stack); 10382 xpath_string rr = _right->eval_string(c, stack); 10383 10384 return find_substring(lr.c_str(), rr.c_str()) != 0; 10385 } 10386 10387 case ast_func_boolean: 10388 return _left->eval_boolean(c, stack); 10389 10390 case ast_func_not: 10391 return !_left->eval_boolean(c, stack); 10392 10393 case ast_func_true: 10394 return true; 10395 10396 case ast_func_false: 10397 return false; 10398 10399 case ast_func_lang: 10400 { 10401 if (c.n.attribute()) return false; 10402 10403 xpath_allocator_capture cr(stack.result); 10404 10405 xpath_string lang = _left->eval_string(c, stack); 10406 10407 for (xml_node n = c.n.node(); n; n = n.parent()) 10408 { 10409 xml_attribute a = n.attribute(PUGIXML_TEXT("xml:lang")); 10410 10411 if (a) 10412 { 10413 const char_t* value = a.value(); 10414 10415 // strnicmp / strncasecmp is not portable 10416 for (const char_t* lit = lang.c_str(); *lit; ++lit) 10417 { 10418 if (tolower_ascii(*lit) != tolower_ascii(*value)) return false; 10419 ++value; 10420 } 10421 10422 return *value == 0 || *value == '-'; 10423 } 10424 } 10425 10426 return false; 10427 } 10428 10429 case ast_opt_compare_attribute: 10430 { 10431 const char_t* value = (_right->_type == ast_string_constant) ? _right->_data.string : _right->_data.variable->get_string(); 10432 10433 xml_attribute attr = c.n.node().attribute(_left->_data.nodetest); 10434 10435 return attr && strequal(attr.value(), value) && is_xpath_attribute(attr.name()); 10436 } 10437 10438 case ast_variable: 10439 { 10440 assert(_rettype == _data.variable->type()); 10441 10442 if (_rettype == xpath_type_boolean) 10443 return _data.variable->get_boolean(); 10444 10445 // variable needs to be converted to the correct type, this is handled by the fallthrough block below 10446 break; 10447 } 10448 10449 default: 10450 ; 10451 } 10452 10453 // none of the ast types that return the value directly matched, we need to perform type conversion 10454 switch (_rettype) 10455 { 10456 case xpath_type_number: 10457 return convert_number_to_boolean(eval_number(c, stack)); 10458 10459 case xpath_type_string: 10460 { 10461 xpath_allocator_capture cr(stack.result); 10462 10463 return !eval_string(c, stack).empty(); 10464 } 10465 10466 case xpath_type_node_set: 10467 { 10468 xpath_allocator_capture cr(stack.result); 10469 10470 return !eval_node_set(c, stack, nodeset_eval_any).empty(); 10471 } 10472 10473 default: 10474 assert(false && "Wrong expression for return type boolean"); // unreachable 10475 return false; 10476 } 10477 } 10478 eval_number(const xpath_context & c,const xpath_stack & stack)10479 double eval_number(const xpath_context& c, const xpath_stack& stack) 10480 { 10481 switch (_type) 10482 { 10483 case ast_op_add: 10484 return _left->eval_number(c, stack) + _right->eval_number(c, stack); 10485 10486 case ast_op_subtract: 10487 return _left->eval_number(c, stack) - _right->eval_number(c, stack); 10488 10489 case ast_op_multiply: 10490 return _left->eval_number(c, stack) * _right->eval_number(c, stack); 10491 10492 case ast_op_divide: 10493 return _left->eval_number(c, stack) / _right->eval_number(c, stack); 10494 10495 case ast_op_mod: 10496 return fmod(_left->eval_number(c, stack), _right->eval_number(c, stack)); 10497 10498 case ast_op_negate: 10499 return -_left->eval_number(c, stack); 10500 10501 case ast_number_constant: 10502 return _data.number; 10503 10504 case ast_func_last: 10505 return static_cast<double>(c.size); 10506 10507 case ast_func_position: 10508 return static_cast<double>(c.position); 10509 10510 case ast_func_count: 10511 { 10512 xpath_allocator_capture cr(stack.result); 10513 10514 return static_cast<double>(_left->eval_node_set(c, stack, nodeset_eval_all).size()); 10515 } 10516 10517 case ast_func_string_length_0: 10518 { 10519 xpath_allocator_capture cr(stack.result); 10520 10521 return static_cast<double>(string_value(c.n, stack.result).length()); 10522 } 10523 10524 case ast_func_string_length_1: 10525 { 10526 xpath_allocator_capture cr(stack.result); 10527 10528 return static_cast<double>(_left->eval_string(c, stack).length()); 10529 } 10530 10531 case ast_func_number_0: 10532 { 10533 xpath_allocator_capture cr(stack.result); 10534 10535 return convert_string_to_number(string_value(c.n, stack.result).c_str()); 10536 } 10537 10538 case ast_func_number_1: 10539 return _left->eval_number(c, stack); 10540 10541 case ast_func_sum: 10542 { 10543 xpath_allocator_capture cr(stack.result); 10544 10545 double r = 0; 10546 10547 xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_all); 10548 10549 for (const xpath_node* it = ns.begin(); it != ns.end(); ++it) 10550 { 10551 xpath_allocator_capture cri(stack.result); 10552 10553 r += convert_string_to_number(string_value(*it, stack.result).c_str()); 10554 } 10555 10556 return r; 10557 } 10558 10559 case ast_func_floor: 10560 { 10561 double r = _left->eval_number(c, stack); 10562 10563 return r == r ? floor(r) : r; 10564 } 10565 10566 case ast_func_ceiling: 10567 { 10568 double r = _left->eval_number(c, stack); 10569 10570 return r == r ? ceil(r) : r; 10571 } 10572 10573 case ast_func_round: 10574 return round_nearest_nzero(_left->eval_number(c, stack)); 10575 10576 case ast_variable: 10577 { 10578 assert(_rettype == _data.variable->type()); 10579 10580 if (_rettype == xpath_type_number) 10581 return _data.variable->get_number(); 10582 10583 // variable needs to be converted to the correct type, this is handled by the fallthrough block below 10584 break; 10585 } 10586 10587 default: 10588 ; 10589 } 10590 10591 // none of the ast types that return the value directly matched, we need to perform type conversion 10592 switch (_rettype) 10593 { 10594 case xpath_type_boolean: 10595 return eval_boolean(c, stack) ? 1 : 0; 10596 10597 case xpath_type_string: 10598 { 10599 xpath_allocator_capture cr(stack.result); 10600 10601 return convert_string_to_number(eval_string(c, stack).c_str()); 10602 } 10603 10604 case xpath_type_node_set: 10605 { 10606 xpath_allocator_capture cr(stack.result); 10607 10608 return convert_string_to_number(eval_string(c, stack).c_str()); 10609 } 10610 10611 default: 10612 assert(false && "Wrong expression for return type number"); // unreachable 10613 return 0; 10614 } 10615 } 10616 eval_string_concat(const xpath_context & c,const xpath_stack & stack)10617 xpath_string eval_string_concat(const xpath_context& c, const xpath_stack& stack) 10618 { 10619 assert(_type == ast_func_concat); 10620 10621 xpath_allocator_capture ct(stack.temp); 10622 10623 // count the string number 10624 size_t count = 1; 10625 for (xpath_ast_node* nc = _right; nc; nc = nc->_next) count++; 10626 10627 // allocate a buffer for temporary string objects 10628 xpath_string* buffer = static_cast<xpath_string*>(stack.temp->allocate(count * sizeof(xpath_string))); 10629 if (!buffer) return xpath_string(); 10630 10631 // evaluate all strings to temporary stack 10632 xpath_stack swapped_stack = {stack.temp, stack.result}; 10633 10634 buffer[0] = _left->eval_string(c, swapped_stack); 10635 10636 size_t pos = 1; 10637 for (xpath_ast_node* n = _right; n; n = n->_next, ++pos) buffer[pos] = n->eval_string(c, swapped_stack); 10638 assert(pos == count); 10639 10640 // get total length 10641 size_t length = 0; 10642 for (size_t i = 0; i < count; ++i) length += buffer[i].length(); 10643 10644 // create final string 10645 char_t* result = static_cast<char_t*>(stack.result->allocate((length + 1) * sizeof(char_t))); 10646 if (!result) return xpath_string(); 10647 10648 char_t* ri = result; 10649 10650 for (size_t j = 0; j < count; ++j) 10651 for (const char_t* bi = buffer[j].c_str(); *bi; ++bi) 10652 *ri++ = *bi; 10653 10654 *ri = 0; 10655 10656 return xpath_string::from_heap_preallocated(result, ri); 10657 } 10658 eval_string(const xpath_context & c,const xpath_stack & stack)10659 xpath_string eval_string(const xpath_context& c, const xpath_stack& stack) 10660 { 10661 switch (_type) 10662 { 10663 case ast_string_constant: 10664 return xpath_string::from_const(_data.string); 10665 10666 case ast_func_local_name_0: 10667 { 10668 xpath_node na = c.n; 10669 10670 return xpath_string::from_const(local_name(na)); 10671 } 10672 10673 case ast_func_local_name_1: 10674 { 10675 xpath_allocator_capture cr(stack.result); 10676 10677 xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_first); 10678 xpath_node na = ns.first(); 10679 10680 return xpath_string::from_const(local_name(na)); 10681 } 10682 10683 case ast_func_name_0: 10684 { 10685 xpath_node na = c.n; 10686 10687 return xpath_string::from_const(qualified_name(na)); 10688 } 10689 10690 case ast_func_name_1: 10691 { 10692 xpath_allocator_capture cr(stack.result); 10693 10694 xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_first); 10695 xpath_node na = ns.first(); 10696 10697 return xpath_string::from_const(qualified_name(na)); 10698 } 10699 10700 case ast_func_namespace_uri_0: 10701 { 10702 xpath_node na = c.n; 10703 10704 return xpath_string::from_const(namespace_uri(na)); 10705 } 10706 10707 case ast_func_namespace_uri_1: 10708 { 10709 xpath_allocator_capture cr(stack.result); 10710 10711 xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_first); 10712 xpath_node na = ns.first(); 10713 10714 return xpath_string::from_const(namespace_uri(na)); 10715 } 10716 10717 case ast_func_string_0: 10718 return string_value(c.n, stack.result); 10719 10720 case ast_func_string_1: 10721 return _left->eval_string(c, stack); 10722 10723 case ast_func_concat: 10724 return eval_string_concat(c, stack); 10725 10726 case ast_func_substring_before: 10727 { 10728 xpath_allocator_capture cr(stack.temp); 10729 10730 xpath_stack swapped_stack = {stack.temp, stack.result}; 10731 10732 xpath_string s = _left->eval_string(c, swapped_stack); 10733 xpath_string p = _right->eval_string(c, swapped_stack); 10734 10735 const char_t* pos = find_substring(s.c_str(), p.c_str()); 10736 10737 return pos ? xpath_string::from_heap(s.c_str(), pos, stack.result) : xpath_string(); 10738 } 10739 10740 case ast_func_substring_after: 10741 { 10742 xpath_allocator_capture cr(stack.temp); 10743 10744 xpath_stack swapped_stack = {stack.temp, stack.result}; 10745 10746 xpath_string s = _left->eval_string(c, swapped_stack); 10747 xpath_string p = _right->eval_string(c, swapped_stack); 10748 10749 const char_t* pos = find_substring(s.c_str(), p.c_str()); 10750 if (!pos) return xpath_string(); 10751 10752 const char_t* rbegin = pos + p.length(); 10753 const char_t* rend = s.c_str() + s.length(); 10754 10755 return s.uses_heap() ? xpath_string::from_heap(rbegin, rend, stack.result) : xpath_string::from_const(rbegin); 10756 } 10757 10758 case ast_func_substring_2: 10759 { 10760 xpath_allocator_capture cr(stack.temp); 10761 10762 xpath_stack swapped_stack = {stack.temp, stack.result}; 10763 10764 xpath_string s = _left->eval_string(c, swapped_stack); 10765 size_t s_length = s.length(); 10766 10767 double first = round_nearest(_right->eval_number(c, stack)); 10768 10769 if (is_nan(first)) return xpath_string(); // NaN 10770 else if (first >= static_cast<double>(s_length + 1)) return xpath_string(); 10771 10772 size_t pos = first < 1 ? 1 : static_cast<size_t>(first); 10773 assert(1 <= pos && pos <= s_length + 1); 10774 10775 const char_t* rbegin = s.c_str() + (pos - 1); 10776 const char_t* rend = s.c_str() + s.length(); 10777 10778 return s.uses_heap() ? xpath_string::from_heap(rbegin, rend, stack.result) : xpath_string::from_const(rbegin); 10779 } 10780 10781 case ast_func_substring_3: 10782 { 10783 xpath_allocator_capture cr(stack.temp); 10784 10785 xpath_stack swapped_stack = {stack.temp, stack.result}; 10786 10787 xpath_string s = _left->eval_string(c, swapped_stack); 10788 size_t s_length = s.length(); 10789 10790 double first = round_nearest(_right->eval_number(c, stack)); 10791 double last = first + round_nearest(_right->_next->eval_number(c, stack)); 10792 10793 if (is_nan(first) || is_nan(last)) return xpath_string(); 10794 else if (first >= static_cast<double>(s_length + 1)) return xpath_string(); 10795 else if (first >= last) return xpath_string(); 10796 else if (last < 1) return xpath_string(); 10797 10798 size_t pos = first < 1 ? 1 : static_cast<size_t>(first); 10799 size_t end = last >= static_cast<double>(s_length + 1) ? s_length + 1 : static_cast<size_t>(last); 10800 10801 assert(1 <= pos && pos <= end && end <= s_length + 1); 10802 const char_t* rbegin = s.c_str() + (pos - 1); 10803 const char_t* rend = s.c_str() + (end - 1); 10804 10805 return (end == s_length + 1 && !s.uses_heap()) ? xpath_string::from_const(rbegin) : xpath_string::from_heap(rbegin, rend, stack.result); 10806 } 10807 10808 case ast_func_normalize_space_0: 10809 { 10810 xpath_string s = string_value(c.n, stack.result); 10811 10812 char_t* begin = s.data(stack.result); 10813 if (!begin) return xpath_string(); 10814 10815 char_t* end = normalize_space(begin); 10816 10817 return xpath_string::from_heap_preallocated(begin, end); 10818 } 10819 10820 case ast_func_normalize_space_1: 10821 { 10822 xpath_string s = _left->eval_string(c, stack); 10823 10824 char_t* begin = s.data(stack.result); 10825 if (!begin) return xpath_string(); 10826 10827 char_t* end = normalize_space(begin); 10828 10829 return xpath_string::from_heap_preallocated(begin, end); 10830 } 10831 10832 case ast_func_translate: 10833 { 10834 xpath_allocator_capture cr(stack.temp); 10835 10836 xpath_stack swapped_stack = {stack.temp, stack.result}; 10837 10838 xpath_string s = _left->eval_string(c, stack); 10839 xpath_string from = _right->eval_string(c, swapped_stack); 10840 xpath_string to = _right->_next->eval_string(c, swapped_stack); 10841 10842 char_t* begin = s.data(stack.result); 10843 if (!begin) return xpath_string(); 10844 10845 char_t* end = translate(begin, from.c_str(), to.c_str(), to.length()); 10846 10847 return xpath_string::from_heap_preallocated(begin, end); 10848 } 10849 10850 case ast_opt_translate_table: 10851 { 10852 xpath_string s = _left->eval_string(c, stack); 10853 10854 char_t* begin = s.data(stack.result); 10855 if (!begin) return xpath_string(); 10856 10857 char_t* end = translate_table(begin, _data.table); 10858 10859 return xpath_string::from_heap_preallocated(begin, end); 10860 } 10861 10862 case ast_variable: 10863 { 10864 assert(_rettype == _data.variable->type()); 10865 10866 if (_rettype == xpath_type_string) 10867 return xpath_string::from_const(_data.variable->get_string()); 10868 10869 // variable needs to be converted to the correct type, this is handled by the fallthrough block below 10870 break; 10871 } 10872 10873 default: 10874 ; 10875 } 10876 10877 // none of the ast types that return the value directly matched, we need to perform type conversion 10878 switch (_rettype) 10879 { 10880 case xpath_type_boolean: 10881 return xpath_string::from_const(eval_boolean(c, stack) ? PUGIXML_TEXT("true") : PUGIXML_TEXT("false")); 10882 10883 case xpath_type_number: 10884 return convert_number_to_string(eval_number(c, stack), stack.result); 10885 10886 case xpath_type_node_set: 10887 { 10888 xpath_allocator_capture cr(stack.temp); 10889 10890 xpath_stack swapped_stack = {stack.temp, stack.result}; 10891 10892 xpath_node_set_raw ns = eval_node_set(c, swapped_stack, nodeset_eval_first); 10893 return ns.empty() ? xpath_string() : string_value(ns.first(), stack.result); 10894 } 10895 10896 default: 10897 assert(false && "Wrong expression for return type string"); // unreachable 10898 return xpath_string(); 10899 } 10900 } 10901 eval_node_set(const xpath_context & c,const xpath_stack & stack,nodeset_eval_t eval)10902 xpath_node_set_raw eval_node_set(const xpath_context& c, const xpath_stack& stack, nodeset_eval_t eval) 10903 { 10904 switch (_type) 10905 { 10906 case ast_op_union: 10907 { 10908 xpath_allocator_capture cr(stack.temp); 10909 10910 xpath_stack swapped_stack = {stack.temp, stack.result}; 10911 10912 xpath_node_set_raw ls = _left->eval_node_set(c, stack, eval); 10913 xpath_node_set_raw rs = _right->eval_node_set(c, swapped_stack, eval); 10914 10915 // we can optimize merging two sorted sets, but this is a very rare operation, so don't bother 10916 ls.set_type(xpath_node_set::type_unsorted); 10917 10918 ls.append(rs.begin(), rs.end(), stack.result); 10919 ls.remove_duplicates(stack.temp); 10920 10921 return ls; 10922 } 10923 10924 case ast_filter: 10925 { 10926 xpath_node_set_raw set = _left->eval_node_set(c, stack, _test == predicate_constant_one ? nodeset_eval_first : nodeset_eval_all); 10927 10928 // either expression is a number or it contains position() call; sort by document order 10929 if (_test != predicate_posinv) set.sort_do(); 10930 10931 bool once = eval_once(set.type(), eval); 10932 10933 apply_predicate(set, 0, stack, once); 10934 10935 return set; 10936 } 10937 10938 case ast_func_id: 10939 return xpath_node_set_raw(); 10940 10941 case ast_step: 10942 { 10943 switch (_axis) 10944 { 10945 case axis_ancestor: 10946 return step_do(c, stack, eval, axis_to_type<axis_ancestor>()); 10947 10948 case axis_ancestor_or_self: 10949 return step_do(c, stack, eval, axis_to_type<axis_ancestor_or_self>()); 10950 10951 case axis_attribute: 10952 return step_do(c, stack, eval, axis_to_type<axis_attribute>()); 10953 10954 case axis_child: 10955 return step_do(c, stack, eval, axis_to_type<axis_child>()); 10956 10957 case axis_descendant: 10958 return step_do(c, stack, eval, axis_to_type<axis_descendant>()); 10959 10960 case axis_descendant_or_self: 10961 return step_do(c, stack, eval, axis_to_type<axis_descendant_or_self>()); 10962 10963 case axis_following: 10964 return step_do(c, stack, eval, axis_to_type<axis_following>()); 10965 10966 case axis_following_sibling: 10967 return step_do(c, stack, eval, axis_to_type<axis_following_sibling>()); 10968 10969 case axis_namespace: 10970 // namespaced axis is not supported 10971 return xpath_node_set_raw(); 10972 10973 case axis_parent: 10974 return step_do(c, stack, eval, axis_to_type<axis_parent>()); 10975 10976 case axis_preceding: 10977 return step_do(c, stack, eval, axis_to_type<axis_preceding>()); 10978 10979 case axis_preceding_sibling: 10980 return step_do(c, stack, eval, axis_to_type<axis_preceding_sibling>()); 10981 10982 case axis_self: 10983 return step_do(c, stack, eval, axis_to_type<axis_self>()); 10984 10985 default: 10986 assert(false && "Unknown axis"); // unreachable 10987 return xpath_node_set_raw(); 10988 } 10989 } 10990 10991 case ast_step_root: 10992 { 10993 assert(!_right); // root step can't have any predicates 10994 10995 xpath_node_set_raw ns; 10996 10997 ns.set_type(xpath_node_set::type_sorted); 10998 10999 if (c.n.node()) ns.push_back(c.n.node().root(), stack.result); 11000 else if (c.n.attribute()) ns.push_back(c.n.parent().root(), stack.result); 11001 11002 return ns; 11003 } 11004 11005 case ast_variable: 11006 { 11007 assert(_rettype == _data.variable->type()); 11008 11009 if (_rettype == xpath_type_node_set) 11010 { 11011 const xpath_node_set& s = _data.variable->get_node_set(); 11012 11013 xpath_node_set_raw ns; 11014 11015 ns.set_type(s.type()); 11016 ns.append(s.begin(), s.end(), stack.result); 11017 11018 return ns; 11019 } 11020 11021 // variable needs to be converted to the correct type, this is handled by the fallthrough block below 11022 break; 11023 } 11024 11025 default: 11026 ; 11027 } 11028 11029 // none of the ast types that return the value directly matched, but conversions to node set are invalid 11030 assert(false && "Wrong expression for return type node set"); // unreachable 11031 return xpath_node_set_raw(); 11032 } 11033 optimize(xpath_allocator * alloc)11034 void optimize(xpath_allocator* alloc) 11035 { 11036 if (_left) 11037 _left->optimize(alloc); 11038 11039 if (_right) 11040 _right->optimize(alloc); 11041 11042 if (_next) 11043 _next->optimize(alloc); 11044 11045 // coverity[var_deref_model] 11046 optimize_self(alloc); 11047 } 11048 optimize_self(xpath_allocator * alloc)11049 void optimize_self(xpath_allocator* alloc) 11050 { 11051 // Rewrite [position()=expr] with [expr] 11052 // Note that this step has to go before classification to recognize [position()=1] 11053 if ((_type == ast_filter || _type == ast_predicate) && 11054 _right && // workaround for clang static analyzer (_right is never null for ast_filter/ast_predicate) 11055 _right->_type == ast_op_equal && _right->_left->_type == ast_func_position && _right->_right->_rettype == xpath_type_number) 11056 { 11057 _right = _right->_right; 11058 } 11059 11060 // Classify filter/predicate ops to perform various optimizations during evaluation 11061 if ((_type == ast_filter || _type == ast_predicate) && _right) // workaround for clang static analyzer (_right is never null for ast_filter/ast_predicate) 11062 { 11063 assert(_test == predicate_default); 11064 11065 if (_right->_type == ast_number_constant && _right->_data.number == 1.0) 11066 _test = predicate_constant_one; 11067 else if (_right->_rettype == xpath_type_number && (_right->_type == ast_number_constant || _right->_type == ast_variable || _right->_type == ast_func_last)) 11068 _test = predicate_constant; 11069 else if (_right->_rettype != xpath_type_number && _right->is_posinv_expr()) 11070 _test = predicate_posinv; 11071 } 11072 11073 // Rewrite descendant-or-self::node()/child::foo with descendant::foo 11074 // The former is a full form of //foo, the latter is much faster since it executes the node test immediately 11075 // Do a similar kind of rewrite for self/descendant/descendant-or-self axes 11076 // Note that we only rewrite positionally invariant steps (//foo[1] != /descendant::foo[1]) 11077 if (_type == ast_step && (_axis == axis_child || _axis == axis_self || _axis == axis_descendant || _axis == axis_descendant_or_self) && 11078 _left && _left->_type == ast_step && _left->_axis == axis_descendant_or_self && _left->_test == nodetest_type_node && !_left->_right && 11079 is_posinv_step()) 11080 { 11081 if (_axis == axis_child || _axis == axis_descendant) 11082 _axis = axis_descendant; 11083 else 11084 _axis = axis_descendant_or_self; 11085 11086 _left = _left->_left; 11087 } 11088 11089 // Use optimized lookup table implementation for translate() with constant arguments 11090 if (_type == ast_func_translate && 11091 _right && // workaround for clang static analyzer (_right is never null for ast_func_translate) 11092 _right->_type == ast_string_constant && _right->_next->_type == ast_string_constant) 11093 { 11094 unsigned char* table = translate_table_generate(alloc, _right->_data.string, _right->_next->_data.string); 11095 11096 if (table) 11097 { 11098 _type = ast_opt_translate_table; 11099 _data.table = table; 11100 } 11101 } 11102 11103 // Use optimized path for @attr = 'value' or @attr = $value 11104 if (_type == ast_op_equal && 11105 _left && _right && // workaround for clang static analyzer and Coverity (_left and _right are never null for ast_op_equal) 11106 // coverity[mixed_enums] 11107 _left->_type == ast_step && _left->_axis == axis_attribute && _left->_test == nodetest_name && !_left->_left && !_left->_right && 11108 (_right->_type == ast_string_constant || (_right->_type == ast_variable && _right->_rettype == xpath_type_string))) 11109 { 11110 _type = ast_opt_compare_attribute; 11111 } 11112 } 11113 is_posinv_expr() const11114 bool is_posinv_expr() const 11115 { 11116 switch (_type) 11117 { 11118 case ast_func_position: 11119 case ast_func_last: 11120 return false; 11121 11122 case ast_string_constant: 11123 case ast_number_constant: 11124 case ast_variable: 11125 return true; 11126 11127 case ast_step: 11128 case ast_step_root: 11129 return true; 11130 11131 case ast_predicate: 11132 case ast_filter: 11133 return true; 11134 11135 default: 11136 if (_left && !_left->is_posinv_expr()) return false; 11137 11138 for (xpath_ast_node* n = _right; n; n = n->_next) 11139 if (!n->is_posinv_expr()) return false; 11140 11141 return true; 11142 } 11143 } 11144 is_posinv_step() const11145 bool is_posinv_step() const 11146 { 11147 assert(_type == ast_step); 11148 11149 for (xpath_ast_node* n = _right; n; n = n->_next) 11150 { 11151 assert(n->_type == ast_predicate); 11152 11153 if (n->_test != predicate_posinv) 11154 return false; 11155 } 11156 11157 return true; 11158 } 11159 rettype() const11160 xpath_value_type rettype() const 11161 { 11162 return static_cast<xpath_value_type>(_rettype); 11163 } 11164 }; 11165 11166 static const size_t xpath_ast_depth_limit = 11167 #ifdef PUGIXML_XPATH_DEPTH_LIMIT 11168 PUGIXML_XPATH_DEPTH_LIMIT 11169 #else 11170 1024 11171 #endif 11172 ; 11173 11174 struct xpath_parser 11175 { 11176 xpath_allocator* _alloc; 11177 xpath_lexer _lexer; 11178 11179 const char_t* _query; 11180 xpath_variable_set* _variables; 11181 11182 xpath_parse_result* _result; 11183 11184 char_t _scratch[32]; 11185 11186 size_t _depth; 11187 errorxpath_parser11188 xpath_ast_node* error(const char* message) 11189 { 11190 _result->error = message; 11191 _result->offset = _lexer.current_pos() - _query; 11192 11193 return 0; 11194 } 11195 error_oomxpath_parser11196 xpath_ast_node* error_oom() 11197 { 11198 assert(_alloc->_error); 11199 *_alloc->_error = true; 11200 11201 return 0; 11202 } 11203 error_recxpath_parser11204 xpath_ast_node* error_rec() 11205 { 11206 return error("Exceeded maximum allowed query depth"); 11207 } 11208 alloc_nodexpath_parser11209 void* alloc_node() 11210 { 11211 return _alloc->allocate(sizeof(xpath_ast_node)); 11212 } 11213 alloc_nodexpath_parser11214 xpath_ast_node* alloc_node(ast_type_t type, xpath_value_type rettype, const char_t* value) 11215 { 11216 void* memory = alloc_node(); 11217 return memory ? new (memory) xpath_ast_node(type, rettype, value) : 0; 11218 } 11219 alloc_nodexpath_parser11220 xpath_ast_node* alloc_node(ast_type_t type, xpath_value_type rettype, double value) 11221 { 11222 void* memory = alloc_node(); 11223 return memory ? new (memory) xpath_ast_node(type, rettype, value) : 0; 11224 } 11225 alloc_nodexpath_parser11226 xpath_ast_node* alloc_node(ast_type_t type, xpath_value_type rettype, xpath_variable* value) 11227 { 11228 void* memory = alloc_node(); 11229 return memory ? new (memory) xpath_ast_node(type, rettype, value) : 0; 11230 } 11231 alloc_nodexpath_parser11232 xpath_ast_node* alloc_node(ast_type_t type, xpath_value_type rettype, xpath_ast_node* left = 0, xpath_ast_node* right = 0) 11233 { 11234 void* memory = alloc_node(); 11235 return memory ? new (memory) xpath_ast_node(type, rettype, left, right) : 0; 11236 } 11237 alloc_nodexpath_parser11238 xpath_ast_node* alloc_node(ast_type_t type, xpath_ast_node* left, axis_t axis, nodetest_t test, const char_t* contents) 11239 { 11240 void* memory = alloc_node(); 11241 return memory ? new (memory) xpath_ast_node(type, left, axis, test, contents) : 0; 11242 } 11243 alloc_nodexpath_parser11244 xpath_ast_node* alloc_node(ast_type_t type, xpath_ast_node* left, xpath_ast_node* right, predicate_t test) 11245 { 11246 void* memory = alloc_node(); 11247 return memory ? new (memory) xpath_ast_node(type, left, right, test) : 0; 11248 } 11249 alloc_stringxpath_parser11250 const char_t* alloc_string(const xpath_lexer_string& value) 11251 { 11252 if (!value.begin) 11253 return PUGIXML_TEXT(""); 11254 11255 size_t length = static_cast<size_t>(value.end - value.begin); 11256 11257 char_t* c = static_cast<char_t*>(_alloc->allocate((length + 1) * sizeof(char_t))); 11258 if (!c) return 0; 11259 11260 memcpy(c, value.begin, length * sizeof(char_t)); 11261 c[length] = 0; 11262 11263 return c; 11264 } 11265 parse_functionxpath_parser11266 xpath_ast_node* parse_function(const xpath_lexer_string& name, size_t argc, xpath_ast_node* args[2]) 11267 { 11268 switch (name.begin[0]) 11269 { 11270 case 'b': 11271 if (name == PUGIXML_TEXT("boolean") && argc == 1) 11272 return alloc_node(ast_func_boolean, xpath_type_boolean, args[0]); 11273 11274 break; 11275 11276 case 'c': 11277 if (name == PUGIXML_TEXT("count") && argc == 1) 11278 { 11279 if (args[0]->rettype() != xpath_type_node_set) return error("Function has to be applied to node set"); 11280 return alloc_node(ast_func_count, xpath_type_number, args[0]); 11281 } 11282 else if (name == PUGIXML_TEXT("contains") && argc == 2) 11283 return alloc_node(ast_func_contains, xpath_type_boolean, args[0], args[1]); 11284 else if (name == PUGIXML_TEXT("concat") && argc >= 2) 11285 return alloc_node(ast_func_concat, xpath_type_string, args[0], args[1]); 11286 else if (name == PUGIXML_TEXT("ceiling") && argc == 1) 11287 return alloc_node(ast_func_ceiling, xpath_type_number, args[0]); 11288 11289 break; 11290 11291 case 'f': 11292 if (name == PUGIXML_TEXT("false") && argc == 0) 11293 return alloc_node(ast_func_false, xpath_type_boolean); 11294 else if (name == PUGIXML_TEXT("floor") && argc == 1) 11295 return alloc_node(ast_func_floor, xpath_type_number, args[0]); 11296 11297 break; 11298 11299 case 'i': 11300 if (name == PUGIXML_TEXT("id") && argc == 1) 11301 return alloc_node(ast_func_id, xpath_type_node_set, args[0]); 11302 11303 break; 11304 11305 case 'l': 11306 if (name == PUGIXML_TEXT("last") && argc == 0) 11307 return alloc_node(ast_func_last, xpath_type_number); 11308 else if (name == PUGIXML_TEXT("lang") && argc == 1) 11309 return alloc_node(ast_func_lang, xpath_type_boolean, args[0]); 11310 else if (name == PUGIXML_TEXT("local-name") && argc <= 1) 11311 { 11312 if (argc == 1 && args[0]->rettype() != xpath_type_node_set) return error("Function has to be applied to node set"); 11313 return alloc_node(argc == 0 ? ast_func_local_name_0 : ast_func_local_name_1, xpath_type_string, args[0]); 11314 } 11315 11316 break; 11317 11318 case 'n': 11319 if (name == PUGIXML_TEXT("name") && argc <= 1) 11320 { 11321 if (argc == 1 && args[0]->rettype() != xpath_type_node_set) return error("Function has to be applied to node set"); 11322 return alloc_node(argc == 0 ? ast_func_name_0 : ast_func_name_1, xpath_type_string, args[0]); 11323 } 11324 else if (name == PUGIXML_TEXT("namespace-uri") && argc <= 1) 11325 { 11326 if (argc == 1 && args[0]->rettype() != xpath_type_node_set) return error("Function has to be applied to node set"); 11327 return alloc_node(argc == 0 ? ast_func_namespace_uri_0 : ast_func_namespace_uri_1, xpath_type_string, args[0]); 11328 } 11329 else if (name == PUGIXML_TEXT("normalize-space") && argc <= 1) 11330 return alloc_node(argc == 0 ? ast_func_normalize_space_0 : ast_func_normalize_space_1, xpath_type_string, args[0], args[1]); 11331 else if (name == PUGIXML_TEXT("not") && argc == 1) 11332 return alloc_node(ast_func_not, xpath_type_boolean, args[0]); 11333 else if (name == PUGIXML_TEXT("number") && argc <= 1) 11334 return alloc_node(argc == 0 ? ast_func_number_0 : ast_func_number_1, xpath_type_number, args[0]); 11335 11336 break; 11337 11338 case 'p': 11339 if (name == PUGIXML_TEXT("position") && argc == 0) 11340 return alloc_node(ast_func_position, xpath_type_number); 11341 11342 break; 11343 11344 case 'r': 11345 if (name == PUGIXML_TEXT("round") && argc == 1) 11346 return alloc_node(ast_func_round, xpath_type_number, args[0]); 11347 11348 break; 11349 11350 case 's': 11351 if (name == PUGIXML_TEXT("string") && argc <= 1) 11352 return alloc_node(argc == 0 ? ast_func_string_0 : ast_func_string_1, xpath_type_string, args[0]); 11353 else if (name == PUGIXML_TEXT("string-length") && argc <= 1) 11354 return alloc_node(argc == 0 ? ast_func_string_length_0 : ast_func_string_length_1, xpath_type_number, args[0]); 11355 else if (name == PUGIXML_TEXT("starts-with") && argc == 2) 11356 return alloc_node(ast_func_starts_with, xpath_type_boolean, args[0], args[1]); 11357 else if (name == PUGIXML_TEXT("substring-before") && argc == 2) 11358 return alloc_node(ast_func_substring_before, xpath_type_string, args[0], args[1]); 11359 else if (name == PUGIXML_TEXT("substring-after") && argc == 2) 11360 return alloc_node(ast_func_substring_after, xpath_type_string, args[0], args[1]); 11361 else if (name == PUGIXML_TEXT("substring") && (argc == 2 || argc == 3)) 11362 return alloc_node(argc == 2 ? ast_func_substring_2 : ast_func_substring_3, xpath_type_string, args[0], args[1]); 11363 else if (name == PUGIXML_TEXT("sum") && argc == 1) 11364 { 11365 if (args[0]->rettype() != xpath_type_node_set) return error("Function has to be applied to node set"); 11366 return alloc_node(ast_func_sum, xpath_type_number, args[0]); 11367 } 11368 11369 break; 11370 11371 case 't': 11372 if (name == PUGIXML_TEXT("translate") && argc == 3) 11373 return alloc_node(ast_func_translate, xpath_type_string, args[0], args[1]); 11374 else if (name == PUGIXML_TEXT("true") && argc == 0) 11375 return alloc_node(ast_func_true, xpath_type_boolean); 11376 11377 break; 11378 11379 default: 11380 break; 11381 } 11382 11383 return error("Unrecognized function or wrong parameter count"); 11384 } 11385 parse_axis_namexpath_parser11386 axis_t parse_axis_name(const xpath_lexer_string& name, bool& specified) 11387 { 11388 specified = true; 11389 11390 switch (name.begin[0]) 11391 { 11392 case 'a': 11393 if (name == PUGIXML_TEXT("ancestor")) 11394 return axis_ancestor; 11395 else if (name == PUGIXML_TEXT("ancestor-or-self")) 11396 return axis_ancestor_or_self; 11397 else if (name == PUGIXML_TEXT("attribute")) 11398 return axis_attribute; 11399 11400 break; 11401 11402 case 'c': 11403 if (name == PUGIXML_TEXT("child")) 11404 return axis_child; 11405 11406 break; 11407 11408 case 'd': 11409 if (name == PUGIXML_TEXT("descendant")) 11410 return axis_descendant; 11411 else if (name == PUGIXML_TEXT("descendant-or-self")) 11412 return axis_descendant_or_self; 11413 11414 break; 11415 11416 case 'f': 11417 if (name == PUGIXML_TEXT("following")) 11418 return axis_following; 11419 else if (name == PUGIXML_TEXT("following-sibling")) 11420 return axis_following_sibling; 11421 11422 break; 11423 11424 case 'n': 11425 if (name == PUGIXML_TEXT("namespace")) 11426 return axis_namespace; 11427 11428 break; 11429 11430 case 'p': 11431 if (name == PUGIXML_TEXT("parent")) 11432 return axis_parent; 11433 else if (name == PUGIXML_TEXT("preceding")) 11434 return axis_preceding; 11435 else if (name == PUGIXML_TEXT("preceding-sibling")) 11436 return axis_preceding_sibling; 11437 11438 break; 11439 11440 case 's': 11441 if (name == PUGIXML_TEXT("self")) 11442 return axis_self; 11443 11444 break; 11445 11446 default: 11447 break; 11448 } 11449 11450 specified = false; 11451 return axis_child; 11452 } 11453 parse_node_test_typexpath_parser11454 nodetest_t parse_node_test_type(const xpath_lexer_string& name) 11455 { 11456 switch (name.begin[0]) 11457 { 11458 case 'c': 11459 if (name == PUGIXML_TEXT("comment")) 11460 return nodetest_type_comment; 11461 11462 break; 11463 11464 case 'n': 11465 if (name == PUGIXML_TEXT("node")) 11466 return nodetest_type_node; 11467 11468 break; 11469 11470 case 'p': 11471 if (name == PUGIXML_TEXT("processing-instruction")) 11472 return nodetest_type_pi; 11473 11474 break; 11475 11476 case 't': 11477 if (name == PUGIXML_TEXT("text")) 11478 return nodetest_type_text; 11479 11480 break; 11481 11482 default: 11483 break; 11484 } 11485 11486 return nodetest_none; 11487 } 11488 11489 // PrimaryExpr ::= VariableReference | '(' Expr ')' | Literal | Number | FunctionCall parse_primary_expressionxpath_parser11490 xpath_ast_node* parse_primary_expression() 11491 { 11492 switch (_lexer.current()) 11493 { 11494 case lex_var_ref: 11495 { 11496 xpath_lexer_string name = _lexer.contents(); 11497 11498 if (!_variables) 11499 return error("Unknown variable: variable set is not provided"); 11500 11501 xpath_variable* var = 0; 11502 if (!get_variable_scratch(_scratch, _variables, name.begin, name.end, &var)) 11503 return error_oom(); 11504 11505 if (!var) 11506 return error("Unknown variable: variable set does not contain the given name"); 11507 11508 _lexer.next(); 11509 11510 return alloc_node(ast_variable, var->type(), var); 11511 } 11512 11513 case lex_open_brace: 11514 { 11515 _lexer.next(); 11516 11517 xpath_ast_node* n = parse_expression(); 11518 if (!n) return 0; 11519 11520 if (_lexer.current() != lex_close_brace) 11521 return error("Expected ')' to match an opening '('"); 11522 11523 _lexer.next(); 11524 11525 return n; 11526 } 11527 11528 case lex_quoted_string: 11529 { 11530 const char_t* value = alloc_string(_lexer.contents()); 11531 if (!value) return 0; 11532 11533 _lexer.next(); 11534 11535 return alloc_node(ast_string_constant, xpath_type_string, value); 11536 } 11537 11538 case lex_number: 11539 { 11540 double value = 0; 11541 11542 if (!convert_string_to_number_scratch(_scratch, _lexer.contents().begin, _lexer.contents().end, &value)) 11543 return error_oom(); 11544 11545 _lexer.next(); 11546 11547 return alloc_node(ast_number_constant, xpath_type_number, value); 11548 } 11549 11550 case lex_string: 11551 { 11552 xpath_ast_node* args[2] = {0}; 11553 size_t argc = 0; 11554 11555 xpath_lexer_string function = _lexer.contents(); 11556 _lexer.next(); 11557 11558 xpath_ast_node* last_arg = 0; 11559 11560 if (_lexer.current() != lex_open_brace) 11561 return error("Unrecognized function call"); 11562 _lexer.next(); 11563 11564 size_t old_depth = _depth; 11565 11566 while (_lexer.current() != lex_close_brace) 11567 { 11568 if (argc > 0) 11569 { 11570 if (_lexer.current() != lex_comma) 11571 return error("No comma between function arguments"); 11572 _lexer.next(); 11573 } 11574 11575 if (++_depth > xpath_ast_depth_limit) 11576 return error_rec(); 11577 11578 xpath_ast_node* n = parse_expression(); 11579 if (!n) return 0; 11580 11581 if (argc < 2) args[argc] = n; 11582 else last_arg->set_next(n); 11583 11584 argc++; 11585 last_arg = n; 11586 } 11587 11588 _lexer.next(); 11589 11590 _depth = old_depth; 11591 11592 return parse_function(function, argc, args); 11593 } 11594 11595 default: 11596 return error("Unrecognizable primary expression"); 11597 } 11598 } 11599 11600 // FilterExpr ::= PrimaryExpr | FilterExpr Predicate 11601 // Predicate ::= '[' PredicateExpr ']' 11602 // PredicateExpr ::= Expr parse_filter_expressionxpath_parser11603 xpath_ast_node* parse_filter_expression() 11604 { 11605 xpath_ast_node* n = parse_primary_expression(); 11606 if (!n) return 0; 11607 11608 size_t old_depth = _depth; 11609 11610 while (_lexer.current() == lex_open_square_brace) 11611 { 11612 _lexer.next(); 11613 11614 if (++_depth > xpath_ast_depth_limit) 11615 return error_rec(); 11616 11617 if (n->rettype() != xpath_type_node_set) 11618 return error("Predicate has to be applied to node set"); 11619 11620 xpath_ast_node* expr = parse_expression(); 11621 if (!expr) return 0; 11622 11623 n = alloc_node(ast_filter, n, expr, predicate_default); 11624 if (!n) return 0; 11625 11626 if (_lexer.current() != lex_close_square_brace) 11627 return error("Expected ']' to match an opening '['"); 11628 11629 _lexer.next(); 11630 } 11631 11632 _depth = old_depth; 11633 11634 return n; 11635 } 11636 11637 // Step ::= AxisSpecifier NodeTest Predicate* | AbbreviatedStep 11638 // AxisSpecifier ::= AxisName '::' | '@'? 11639 // NodeTest ::= NameTest | NodeType '(' ')' | 'processing-instruction' '(' Literal ')' 11640 // NameTest ::= '*' | NCName ':' '*' | QName 11641 // AbbreviatedStep ::= '.' | '..' parse_stepxpath_parser11642 xpath_ast_node* parse_step(xpath_ast_node* set) 11643 { 11644 if (set && set->rettype() != xpath_type_node_set) 11645 return error("Step has to be applied to node set"); 11646 11647 bool axis_specified = false; 11648 axis_t axis = axis_child; // implied child axis 11649 11650 if (_lexer.current() == lex_axis_attribute) 11651 { 11652 axis = axis_attribute; 11653 axis_specified = true; 11654 11655 _lexer.next(); 11656 } 11657 else if (_lexer.current() == lex_dot) 11658 { 11659 _lexer.next(); 11660 11661 if (_lexer.current() == lex_open_square_brace) 11662 return error("Predicates are not allowed after an abbreviated step"); 11663 11664 return alloc_node(ast_step, set, axis_self, nodetest_type_node, 0); 11665 } 11666 else if (_lexer.current() == lex_double_dot) 11667 { 11668 _lexer.next(); 11669 11670 if (_lexer.current() == lex_open_square_brace) 11671 return error("Predicates are not allowed after an abbreviated step"); 11672 11673 return alloc_node(ast_step, set, axis_parent, nodetest_type_node, 0); 11674 } 11675 11676 nodetest_t nt_type = nodetest_none; 11677 xpath_lexer_string nt_name; 11678 11679 if (_lexer.current() == lex_string) 11680 { 11681 // node name test 11682 nt_name = _lexer.contents(); 11683 _lexer.next(); 11684 11685 // was it an axis name? 11686 if (_lexer.current() == lex_double_colon) 11687 { 11688 // parse axis name 11689 if (axis_specified) 11690 return error("Two axis specifiers in one step"); 11691 11692 axis = parse_axis_name(nt_name, axis_specified); 11693 11694 if (!axis_specified) 11695 return error("Unknown axis"); 11696 11697 // read actual node test 11698 _lexer.next(); 11699 11700 if (_lexer.current() == lex_multiply) 11701 { 11702 nt_type = nodetest_all; 11703 nt_name = xpath_lexer_string(); 11704 _lexer.next(); 11705 } 11706 else if (_lexer.current() == lex_string) 11707 { 11708 nt_name = _lexer.contents(); 11709 _lexer.next(); 11710 } 11711 else 11712 { 11713 return error("Unrecognized node test"); 11714 } 11715 } 11716 11717 if (nt_type == nodetest_none) 11718 { 11719 // node type test or processing-instruction 11720 if (_lexer.current() == lex_open_brace) 11721 { 11722 _lexer.next(); 11723 11724 if (_lexer.current() == lex_close_brace) 11725 { 11726 _lexer.next(); 11727 11728 nt_type = parse_node_test_type(nt_name); 11729 11730 if (nt_type == nodetest_none) 11731 return error("Unrecognized node type"); 11732 11733 nt_name = xpath_lexer_string(); 11734 } 11735 else if (nt_name == PUGIXML_TEXT("processing-instruction")) 11736 { 11737 if (_lexer.current() != lex_quoted_string) 11738 return error("Only literals are allowed as arguments to processing-instruction()"); 11739 11740 nt_type = nodetest_pi; 11741 nt_name = _lexer.contents(); 11742 _lexer.next(); 11743 11744 if (_lexer.current() != lex_close_brace) 11745 return error("Unmatched brace near processing-instruction()"); 11746 _lexer.next(); 11747 } 11748 else 11749 { 11750 return error("Unmatched brace near node type test"); 11751 } 11752 } 11753 // QName or NCName:* 11754 else 11755 { 11756 if (nt_name.end - nt_name.begin > 2 && nt_name.end[-2] == ':' && nt_name.end[-1] == '*') // NCName:* 11757 { 11758 nt_name.end--; // erase * 11759 11760 nt_type = nodetest_all_in_namespace; 11761 } 11762 else 11763 { 11764 nt_type = nodetest_name; 11765 } 11766 } 11767 } 11768 } 11769 else if (_lexer.current() == lex_multiply) 11770 { 11771 nt_type = nodetest_all; 11772 _lexer.next(); 11773 } 11774 else 11775 { 11776 return error("Unrecognized node test"); 11777 } 11778 11779 const char_t* nt_name_copy = alloc_string(nt_name); 11780 if (!nt_name_copy) return 0; 11781 11782 xpath_ast_node* n = alloc_node(ast_step, set, axis, nt_type, nt_name_copy); 11783 if (!n) return 0; 11784 11785 size_t old_depth = _depth; 11786 11787 xpath_ast_node* last = 0; 11788 11789 while (_lexer.current() == lex_open_square_brace) 11790 { 11791 _lexer.next(); 11792 11793 if (++_depth > xpath_ast_depth_limit) 11794 return error_rec(); 11795 11796 xpath_ast_node* expr = parse_expression(); 11797 if (!expr) return 0; 11798 11799 xpath_ast_node* pred = alloc_node(ast_predicate, 0, expr, predicate_default); 11800 if (!pred) return 0; 11801 11802 if (_lexer.current() != lex_close_square_brace) 11803 return error("Expected ']' to match an opening '['"); 11804 _lexer.next(); 11805 11806 if (last) last->set_next(pred); 11807 else n->set_right(pred); 11808 11809 last = pred; 11810 } 11811 11812 _depth = old_depth; 11813 11814 return n; 11815 } 11816 11817 // RelativeLocationPath ::= Step | RelativeLocationPath '/' Step | RelativeLocationPath '//' Step parse_relative_location_pathxpath_parser11818 xpath_ast_node* parse_relative_location_path(xpath_ast_node* set) 11819 { 11820 xpath_ast_node* n = parse_step(set); 11821 if (!n) return 0; 11822 11823 size_t old_depth = _depth; 11824 11825 while (_lexer.current() == lex_slash || _lexer.current() == lex_double_slash) 11826 { 11827 lexeme_t l = _lexer.current(); 11828 _lexer.next(); 11829 11830 if (l == lex_double_slash) 11831 { 11832 n = alloc_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0); 11833 if (!n) return 0; 11834 11835 ++_depth; 11836 } 11837 11838 if (++_depth > xpath_ast_depth_limit) 11839 return error_rec(); 11840 11841 n = parse_step(n); 11842 if (!n) return 0; 11843 } 11844 11845 _depth = old_depth; 11846 11847 return n; 11848 } 11849 11850 // LocationPath ::= RelativeLocationPath | AbsoluteLocationPath 11851 // AbsoluteLocationPath ::= '/' RelativeLocationPath? | '//' RelativeLocationPath parse_location_pathxpath_parser11852 xpath_ast_node* parse_location_path() 11853 { 11854 if (_lexer.current() == lex_slash) 11855 { 11856 _lexer.next(); 11857 11858 xpath_ast_node* n = alloc_node(ast_step_root, xpath_type_node_set); 11859 if (!n) return 0; 11860 11861 // relative location path can start from axis_attribute, dot, double_dot, multiply and string lexemes; any other lexeme means standalone root path 11862 lexeme_t l = _lexer.current(); 11863 11864 if (l == lex_string || l == lex_axis_attribute || l == lex_dot || l == lex_double_dot || l == lex_multiply) 11865 return parse_relative_location_path(n); 11866 else 11867 return n; 11868 } 11869 else if (_lexer.current() == lex_double_slash) 11870 { 11871 _lexer.next(); 11872 11873 xpath_ast_node* n = alloc_node(ast_step_root, xpath_type_node_set); 11874 if (!n) return 0; 11875 11876 n = alloc_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0); 11877 if (!n) return 0; 11878 11879 return parse_relative_location_path(n); 11880 } 11881 11882 // else clause moved outside of if because of bogus warning 'control may reach end of non-void function being inlined' in gcc 4.0.1 11883 return parse_relative_location_path(0); 11884 } 11885 11886 // PathExpr ::= LocationPath 11887 // | FilterExpr 11888 // | FilterExpr '/' RelativeLocationPath 11889 // | FilterExpr '//' RelativeLocationPath 11890 // UnionExpr ::= PathExpr | UnionExpr '|' PathExpr 11891 // UnaryExpr ::= UnionExpr | '-' UnaryExpr parse_path_or_unary_expressionxpath_parser11892 xpath_ast_node* parse_path_or_unary_expression() 11893 { 11894 // Clarification. 11895 // PathExpr begins with either LocationPath or FilterExpr. 11896 // FilterExpr begins with PrimaryExpr 11897 // PrimaryExpr begins with '$' in case of it being a variable reference, 11898 // '(' in case of it being an expression, string literal, number constant or 11899 // function call. 11900 if (_lexer.current() == lex_var_ref || _lexer.current() == lex_open_brace || 11901 _lexer.current() == lex_quoted_string || _lexer.current() == lex_number || 11902 _lexer.current() == lex_string) 11903 { 11904 if (_lexer.current() == lex_string) 11905 { 11906 // This is either a function call, or not - if not, we shall proceed with location path 11907 const char_t* state = _lexer.state(); 11908 11909 while (PUGI__IS_CHARTYPE(*state, ct_space)) ++state; 11910 11911 if (*state != '(') 11912 return parse_location_path(); 11913 11914 // This looks like a function call; however this still can be a node-test. Check it. 11915 if (parse_node_test_type(_lexer.contents()) != nodetest_none) 11916 return parse_location_path(); 11917 } 11918 11919 xpath_ast_node* n = parse_filter_expression(); 11920 if (!n) return 0; 11921 11922 if (_lexer.current() == lex_slash || _lexer.current() == lex_double_slash) 11923 { 11924 lexeme_t l = _lexer.current(); 11925 _lexer.next(); 11926 11927 if (l == lex_double_slash) 11928 { 11929 if (n->rettype() != xpath_type_node_set) 11930 return error("Step has to be applied to node set"); 11931 11932 n = alloc_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0); 11933 if (!n) return 0; 11934 } 11935 11936 // select from location path 11937 return parse_relative_location_path(n); 11938 } 11939 11940 return n; 11941 } 11942 else if (_lexer.current() == lex_minus) 11943 { 11944 _lexer.next(); 11945 11946 // precedence 7+ - only parses union expressions 11947 xpath_ast_node* n = parse_expression(7); 11948 if (!n) return 0; 11949 11950 return alloc_node(ast_op_negate, xpath_type_number, n); 11951 } 11952 else 11953 { 11954 return parse_location_path(); 11955 } 11956 } 11957 11958 struct binary_op_t 11959 { 11960 ast_type_t asttype; 11961 xpath_value_type rettype; 11962 int precedence; 11963 binary_op_txpath_parser::binary_op_t11964 binary_op_t(): asttype(ast_unknown), rettype(xpath_type_none), precedence(0) 11965 { 11966 } 11967 binary_op_txpath_parser::binary_op_t11968 binary_op_t(ast_type_t asttype_, xpath_value_type rettype_, int precedence_): asttype(asttype_), rettype(rettype_), precedence(precedence_) 11969 { 11970 } 11971 parsexpath_parser::binary_op_t11972 static binary_op_t parse(xpath_lexer& lexer) 11973 { 11974 switch (lexer.current()) 11975 { 11976 case lex_string: 11977 if (lexer.contents() == PUGIXML_TEXT("or")) 11978 return binary_op_t(ast_op_or, xpath_type_boolean, 1); 11979 else if (lexer.contents() == PUGIXML_TEXT("and")) 11980 return binary_op_t(ast_op_and, xpath_type_boolean, 2); 11981 else if (lexer.contents() == PUGIXML_TEXT("div")) 11982 return binary_op_t(ast_op_divide, xpath_type_number, 6); 11983 else if (lexer.contents() == PUGIXML_TEXT("mod")) 11984 return binary_op_t(ast_op_mod, xpath_type_number, 6); 11985 else 11986 return binary_op_t(); 11987 11988 case lex_equal: 11989 return binary_op_t(ast_op_equal, xpath_type_boolean, 3); 11990 11991 case lex_not_equal: 11992 return binary_op_t(ast_op_not_equal, xpath_type_boolean, 3); 11993 11994 case lex_less: 11995 return binary_op_t(ast_op_less, xpath_type_boolean, 4); 11996 11997 case lex_greater: 11998 return binary_op_t(ast_op_greater, xpath_type_boolean, 4); 11999 12000 case lex_less_or_equal: 12001 return binary_op_t(ast_op_less_or_equal, xpath_type_boolean, 4); 12002 12003 case lex_greater_or_equal: 12004 return binary_op_t(ast_op_greater_or_equal, xpath_type_boolean, 4); 12005 12006 case lex_plus: 12007 return binary_op_t(ast_op_add, xpath_type_number, 5); 12008 12009 case lex_minus: 12010 return binary_op_t(ast_op_subtract, xpath_type_number, 5); 12011 12012 case lex_multiply: 12013 return binary_op_t(ast_op_multiply, xpath_type_number, 6); 12014 12015 case lex_union: 12016 return binary_op_t(ast_op_union, xpath_type_node_set, 7); 12017 12018 default: 12019 return binary_op_t(); 12020 } 12021 } 12022 }; 12023 parse_expression_recxpath_parser12024 xpath_ast_node* parse_expression_rec(xpath_ast_node* lhs, int limit) 12025 { 12026 binary_op_t op = binary_op_t::parse(_lexer); 12027 12028 while (op.asttype != ast_unknown && op.precedence >= limit) 12029 { 12030 _lexer.next(); 12031 12032 if (++_depth > xpath_ast_depth_limit) 12033 return error_rec(); 12034 12035 xpath_ast_node* rhs = parse_path_or_unary_expression(); 12036 if (!rhs) return 0; 12037 12038 binary_op_t nextop = binary_op_t::parse(_lexer); 12039 12040 while (nextop.asttype != ast_unknown && nextop.precedence > op.precedence) 12041 { 12042 rhs = parse_expression_rec(rhs, nextop.precedence); 12043 if (!rhs) return 0; 12044 12045 nextop = binary_op_t::parse(_lexer); 12046 } 12047 12048 if (op.asttype == ast_op_union && (lhs->rettype() != xpath_type_node_set || rhs->rettype() != xpath_type_node_set)) 12049 return error("Union operator has to be applied to node sets"); 12050 12051 lhs = alloc_node(op.asttype, op.rettype, lhs, rhs); 12052 if (!lhs) return 0; 12053 12054 op = binary_op_t::parse(_lexer); 12055 } 12056 12057 return lhs; 12058 } 12059 12060 // Expr ::= OrExpr 12061 // OrExpr ::= AndExpr | OrExpr 'or' AndExpr 12062 // AndExpr ::= EqualityExpr | AndExpr 'and' EqualityExpr 12063 // EqualityExpr ::= RelationalExpr 12064 // | EqualityExpr '=' RelationalExpr 12065 // | EqualityExpr '!=' RelationalExpr 12066 // RelationalExpr ::= AdditiveExpr 12067 // | RelationalExpr '<' AdditiveExpr 12068 // | RelationalExpr '>' AdditiveExpr 12069 // | RelationalExpr '<=' AdditiveExpr 12070 // | RelationalExpr '>=' AdditiveExpr 12071 // AdditiveExpr ::= MultiplicativeExpr 12072 // | AdditiveExpr '+' MultiplicativeExpr 12073 // | AdditiveExpr '-' MultiplicativeExpr 12074 // MultiplicativeExpr ::= UnaryExpr 12075 // | MultiplicativeExpr '*' UnaryExpr 12076 // | MultiplicativeExpr 'div' UnaryExpr 12077 // | MultiplicativeExpr 'mod' UnaryExpr parse_expressionxpath_parser12078 xpath_ast_node* parse_expression(int limit = 0) 12079 { 12080 size_t old_depth = _depth; 12081 12082 if (++_depth > xpath_ast_depth_limit) 12083 return error_rec(); 12084 12085 xpath_ast_node* n = parse_path_or_unary_expression(); 12086 if (!n) return 0; 12087 12088 n = parse_expression_rec(n, limit); 12089 12090 _depth = old_depth; 12091 12092 return n; 12093 } 12094 xpath_parserxpath_parser12095 xpath_parser(const char_t* query, xpath_variable_set* variables, xpath_allocator* alloc, xpath_parse_result* result): _alloc(alloc), _lexer(query), _query(query), _variables(variables), _result(result), _depth(0) 12096 { 12097 } 12098 parsexpath_parser12099 xpath_ast_node* parse() 12100 { 12101 xpath_ast_node* n = parse_expression(); 12102 if (!n) return 0; 12103 12104 assert(_depth == 0); 12105 12106 // check if there are unparsed tokens left 12107 if (_lexer.current() != lex_eof) 12108 return error("Incorrect query"); 12109 12110 return n; 12111 } 12112 parsexpath_parser12113 static xpath_ast_node* parse(const char_t* query, xpath_variable_set* variables, xpath_allocator* alloc, xpath_parse_result* result) 12114 { 12115 xpath_parser parser(query, variables, alloc, result); 12116 12117 return parser.parse(); 12118 } 12119 }; 12120 12121 struct xpath_query_impl 12122 { createxpath_query_impl12123 static xpath_query_impl* create() 12124 { 12125 void* memory = xml_memory::allocate(sizeof(xpath_query_impl)); 12126 if (!memory) return 0; 12127 12128 return new (memory) xpath_query_impl(); 12129 } 12130 destroyxpath_query_impl12131 static void destroy(xpath_query_impl* impl) 12132 { 12133 // free all allocated pages 12134 impl->alloc.release(); 12135 12136 // free allocator memory (with the first page) 12137 xml_memory::deallocate(impl); 12138 } 12139 xpath_query_implxpath_query_impl12140 xpath_query_impl(): root(0), alloc(&block, &oom), oom(false) 12141 { 12142 block.next = 0; 12143 block.capacity = sizeof(block.data); 12144 } 12145 12146 xpath_ast_node* root; 12147 xpath_allocator alloc; 12148 xpath_memory_block block; 12149 bool oom; 12150 }; 12151 evaluate_node_set_prepare(xpath_query_impl * impl)12152 PUGI__FN impl::xpath_ast_node* evaluate_node_set_prepare(xpath_query_impl* impl) 12153 { 12154 if (!impl) return 0; 12155 12156 if (impl->root->rettype() != xpath_type_node_set) 12157 { 12158 #ifdef PUGIXML_NO_EXCEPTIONS 12159 return 0; 12160 #else 12161 xpath_parse_result res; 12162 res.error = "Expression does not evaluate to node set"; 12163 12164 throw xpath_exception(res); 12165 #endif 12166 } 12167 12168 return impl->root; 12169 } 12170 PUGI__NS_END 12171 12172 namespace pugi 12173 { 12174 #ifndef PUGIXML_NO_EXCEPTIONS xpath_exception(const xpath_parse_result & result_)12175 PUGI__FN xpath_exception::xpath_exception(const xpath_parse_result& result_): _result(result_) 12176 { 12177 assert(_result.error); 12178 } 12179 what() const12180 PUGI__FN const char* xpath_exception::what() const throw() 12181 { 12182 return _result.error; 12183 } 12184 result() const12185 PUGI__FN const xpath_parse_result& xpath_exception::result() const 12186 { 12187 return _result; 12188 } 12189 #endif 12190 xpath_node()12191 PUGI__FN xpath_node::xpath_node() 12192 { 12193 } 12194 xpath_node(const xml_node & node_)12195 PUGI__FN xpath_node::xpath_node(const xml_node& node_): _node(node_) 12196 { 12197 } 12198 xpath_node(const xml_attribute & attribute_,const xml_node & parent_)12199 PUGI__FN xpath_node::xpath_node(const xml_attribute& attribute_, const xml_node& parent_): _node(attribute_ ? parent_ : xml_node()), _attribute(attribute_) 12200 { 12201 } 12202 node() const12203 PUGI__FN xml_node xpath_node::node() const 12204 { 12205 return _attribute ? xml_node() : _node; 12206 } 12207 attribute() const12208 PUGI__FN xml_attribute xpath_node::attribute() const 12209 { 12210 return _attribute; 12211 } 12212 parent() const12213 PUGI__FN xml_node xpath_node::parent() const 12214 { 12215 return _attribute ? _node : _node.parent(); 12216 } 12217 unspecified_bool_xpath_node(xpath_node ***)12218 PUGI__FN static void unspecified_bool_xpath_node(xpath_node***) 12219 { 12220 } 12221 operator xpath_node::unspecified_bool_type() const12222 PUGI__FN xpath_node::operator xpath_node::unspecified_bool_type() const 12223 { 12224 return (_node || _attribute) ? unspecified_bool_xpath_node : 0; 12225 } 12226 operator !() const12227 PUGI__FN bool xpath_node::operator!() const 12228 { 12229 return !(_node || _attribute); 12230 } 12231 operator ==(const xpath_node & n) const12232 PUGI__FN bool xpath_node::operator==(const xpath_node& n) const 12233 { 12234 return _node == n._node && _attribute == n._attribute; 12235 } 12236 operator !=(const xpath_node & n) const12237 PUGI__FN bool xpath_node::operator!=(const xpath_node& n) const 12238 { 12239 return _node != n._node || _attribute != n._attribute; 12240 } 12241 12242 #ifdef __BORLANDC__ operator &&(const xpath_node & lhs,bool rhs)12243 PUGI__FN bool operator&&(const xpath_node& lhs, bool rhs) 12244 { 12245 return (bool)lhs && rhs; 12246 } 12247 operator ||(const xpath_node & lhs,bool rhs)12248 PUGI__FN bool operator||(const xpath_node& lhs, bool rhs) 12249 { 12250 return (bool)lhs || rhs; 12251 } 12252 #endif 12253 _assign(const_iterator begin_,const_iterator end_,type_t type_)12254 PUGI__FN void xpath_node_set::_assign(const_iterator begin_, const_iterator end_, type_t type_) 12255 { 12256 assert(begin_ <= end_); 12257 12258 size_t size_ = static_cast<size_t>(end_ - begin_); 12259 12260 // use internal buffer for 0 or 1 elements, heap buffer otherwise 12261 xpath_node* storage = (size_ <= 1) ? _storage : static_cast<xpath_node*>(impl::xml_memory::allocate(size_ * sizeof(xpath_node))); 12262 12263 if (!storage) 12264 { 12265 #ifdef PUGIXML_NO_EXCEPTIONS 12266 return; 12267 #else 12268 throw std::bad_alloc(); 12269 #endif 12270 } 12271 12272 // deallocate old buffer 12273 if (_begin != _storage) 12274 impl::xml_memory::deallocate(_begin); 12275 12276 // size check is necessary because for begin_ = end_ = nullptr, memcpy is UB 12277 if (size_) 12278 memcpy(storage, begin_, size_ * sizeof(xpath_node)); 12279 12280 _begin = storage; 12281 _end = storage + size_; 12282 _type = type_; 12283 } 12284 12285 #ifdef PUGIXML_HAS_MOVE _move(xpath_node_set & rhs)12286 PUGI__FN void xpath_node_set::_move(xpath_node_set& rhs) PUGIXML_NOEXCEPT 12287 { 12288 _type = rhs._type; 12289 _storage[0] = rhs._storage[0]; 12290 _begin = (rhs._begin == rhs._storage) ? _storage : rhs._begin; 12291 _end = _begin + (rhs._end - rhs._begin); 12292 12293 rhs._type = type_unsorted; 12294 rhs._begin = rhs._storage; 12295 rhs._end = rhs._storage; 12296 } 12297 #endif 12298 xpath_node_set()12299 PUGI__FN xpath_node_set::xpath_node_set(): _type(type_unsorted), _begin(_storage), _end(_storage) 12300 { 12301 } 12302 xpath_node_set(const_iterator begin_,const_iterator end_,type_t type_)12303 PUGI__FN xpath_node_set::xpath_node_set(const_iterator begin_, const_iterator end_, type_t type_): _type(type_unsorted), _begin(_storage), _end(_storage) 12304 { 12305 _assign(begin_, end_, type_); 12306 } 12307 ~xpath_node_set()12308 PUGI__FN xpath_node_set::~xpath_node_set() 12309 { 12310 if (_begin != _storage) 12311 impl::xml_memory::deallocate(_begin); 12312 } 12313 xpath_node_set(const xpath_node_set & ns)12314 PUGI__FN xpath_node_set::xpath_node_set(const xpath_node_set& ns): _type(type_unsorted), _begin(_storage), _end(_storage) 12315 { 12316 _assign(ns._begin, ns._end, ns._type); 12317 } 12318 operator =(const xpath_node_set & ns)12319 PUGI__FN xpath_node_set& xpath_node_set::operator=(const xpath_node_set& ns) 12320 { 12321 if (this == &ns) return *this; 12322 12323 _assign(ns._begin, ns._end, ns._type); 12324 12325 return *this; 12326 } 12327 12328 #ifdef PUGIXML_HAS_MOVE xpath_node_set(xpath_node_set && rhs)12329 PUGI__FN xpath_node_set::xpath_node_set(xpath_node_set&& rhs) PUGIXML_NOEXCEPT: _type(type_unsorted), _begin(_storage), _end(_storage) 12330 { 12331 _move(rhs); 12332 } 12333 operator =(xpath_node_set && rhs)12334 PUGI__FN xpath_node_set& xpath_node_set::operator=(xpath_node_set&& rhs) PUGIXML_NOEXCEPT 12335 { 12336 if (this == &rhs) return *this; 12337 12338 if (_begin != _storage) 12339 impl::xml_memory::deallocate(_begin); 12340 12341 _move(rhs); 12342 12343 return *this; 12344 } 12345 #endif 12346 type() const12347 PUGI__FN xpath_node_set::type_t xpath_node_set::type() const 12348 { 12349 return _type; 12350 } 12351 size() const12352 PUGI__FN size_t xpath_node_set::size() const 12353 { 12354 return _end - _begin; 12355 } 12356 empty() const12357 PUGI__FN bool xpath_node_set::empty() const 12358 { 12359 return _begin == _end; 12360 } 12361 operator [](size_t index) const12362 PUGI__FN const xpath_node& xpath_node_set::operator[](size_t index) const 12363 { 12364 assert(index < size()); 12365 return _begin[index]; 12366 } 12367 begin() const12368 PUGI__FN xpath_node_set::const_iterator xpath_node_set::begin() const 12369 { 12370 return _begin; 12371 } 12372 end() const12373 PUGI__FN xpath_node_set::const_iterator xpath_node_set::end() const 12374 { 12375 return _end; 12376 } 12377 sort(bool reverse)12378 PUGI__FN void xpath_node_set::sort(bool reverse) 12379 { 12380 _type = impl::xpath_sort(_begin, _end, _type, reverse); 12381 } 12382 first() const12383 PUGI__FN xpath_node xpath_node_set::first() const 12384 { 12385 return impl::xpath_first(_begin, _end, _type); 12386 } 12387 xpath_parse_result()12388 PUGI__FN xpath_parse_result::xpath_parse_result(): error("Internal error"), offset(0) 12389 { 12390 } 12391 operator bool() const12392 PUGI__FN xpath_parse_result::operator bool() const 12393 { 12394 return error == 0; 12395 } 12396 description() const12397 PUGI__FN const char* xpath_parse_result::description() const 12398 { 12399 return error ? error : "No error"; 12400 } 12401 xpath_variable(xpath_value_type type_)12402 PUGI__FN xpath_variable::xpath_variable(xpath_value_type type_): _type(type_), _next(0) 12403 { 12404 } 12405 name() const12406 PUGI__FN const char_t* xpath_variable::name() const 12407 { 12408 switch (_type) 12409 { 12410 case xpath_type_node_set: 12411 return static_cast<const impl::xpath_variable_node_set*>(this)->name; 12412 12413 case xpath_type_number: 12414 return static_cast<const impl::xpath_variable_number*>(this)->name; 12415 12416 case xpath_type_string: 12417 return static_cast<const impl::xpath_variable_string*>(this)->name; 12418 12419 case xpath_type_boolean: 12420 return static_cast<const impl::xpath_variable_boolean*>(this)->name; 12421 12422 default: 12423 assert(false && "Invalid variable type"); // unreachable 12424 return 0; 12425 } 12426 } 12427 type() const12428 PUGI__FN xpath_value_type xpath_variable::type() const 12429 { 12430 return _type; 12431 } 12432 get_boolean() const12433 PUGI__FN bool xpath_variable::get_boolean() const 12434 { 12435 return (_type == xpath_type_boolean) ? static_cast<const impl::xpath_variable_boolean*>(this)->value : false; 12436 } 12437 get_number() const12438 PUGI__FN double xpath_variable::get_number() const 12439 { 12440 return (_type == xpath_type_number) ? static_cast<const impl::xpath_variable_number*>(this)->value : impl::gen_nan(); 12441 } 12442 get_string() const12443 PUGI__FN const char_t* xpath_variable::get_string() const 12444 { 12445 const char_t* value = (_type == xpath_type_string) ? static_cast<const impl::xpath_variable_string*>(this)->value : 0; 12446 return value ? value : PUGIXML_TEXT(""); 12447 } 12448 get_node_set() const12449 PUGI__FN const xpath_node_set& xpath_variable::get_node_set() const 12450 { 12451 return (_type == xpath_type_node_set) ? static_cast<const impl::xpath_variable_node_set*>(this)->value : impl::dummy_node_set; 12452 } 12453 set(bool value)12454 PUGI__FN bool xpath_variable::set(bool value) 12455 { 12456 if (_type != xpath_type_boolean) return false; 12457 12458 static_cast<impl::xpath_variable_boolean*>(this)->value = value; 12459 return true; 12460 } 12461 set(double value)12462 PUGI__FN bool xpath_variable::set(double value) 12463 { 12464 if (_type != xpath_type_number) return false; 12465 12466 static_cast<impl::xpath_variable_number*>(this)->value = value; 12467 return true; 12468 } 12469 set(const char_t * value)12470 PUGI__FN bool xpath_variable::set(const char_t* value) 12471 { 12472 if (_type != xpath_type_string) return false; 12473 12474 impl::xpath_variable_string* var = static_cast<impl::xpath_variable_string*>(this); 12475 12476 // duplicate string 12477 size_t size = (impl::strlength(value) + 1) * sizeof(char_t); 12478 12479 char_t* copy = static_cast<char_t*>(impl::xml_memory::allocate(size)); 12480 if (!copy) return false; 12481 12482 memcpy(copy, value, size); 12483 12484 // replace old string 12485 if (var->value) impl::xml_memory::deallocate(var->value); 12486 var->value = copy; 12487 12488 return true; 12489 } 12490 set(const xpath_node_set & value)12491 PUGI__FN bool xpath_variable::set(const xpath_node_set& value) 12492 { 12493 if (_type != xpath_type_node_set) return false; 12494 12495 static_cast<impl::xpath_variable_node_set*>(this)->value = value; 12496 return true; 12497 } 12498 xpath_variable_set()12499 PUGI__FN xpath_variable_set::xpath_variable_set() 12500 { 12501 for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i) 12502 _data[i] = 0; 12503 } 12504 ~xpath_variable_set()12505 PUGI__FN xpath_variable_set::~xpath_variable_set() 12506 { 12507 for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i) 12508 _destroy(_data[i]); 12509 } 12510 xpath_variable_set(const xpath_variable_set & rhs)12511 PUGI__FN xpath_variable_set::xpath_variable_set(const xpath_variable_set& rhs) 12512 { 12513 for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i) 12514 _data[i] = 0; 12515 12516 _assign(rhs); 12517 } 12518 operator =(const xpath_variable_set & rhs)12519 PUGI__FN xpath_variable_set& xpath_variable_set::operator=(const xpath_variable_set& rhs) 12520 { 12521 if (this == &rhs) return *this; 12522 12523 _assign(rhs); 12524 12525 return *this; 12526 } 12527 12528 #ifdef PUGIXML_HAS_MOVE xpath_variable_set(xpath_variable_set && rhs)12529 PUGI__FN xpath_variable_set::xpath_variable_set(xpath_variable_set&& rhs) PUGIXML_NOEXCEPT 12530 { 12531 for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i) 12532 { 12533 _data[i] = rhs._data[i]; 12534 rhs._data[i] = 0; 12535 } 12536 } 12537 operator =(xpath_variable_set && rhs)12538 PUGI__FN xpath_variable_set& xpath_variable_set::operator=(xpath_variable_set&& rhs) PUGIXML_NOEXCEPT 12539 { 12540 for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i) 12541 { 12542 _destroy(_data[i]); 12543 12544 _data[i] = rhs._data[i]; 12545 rhs._data[i] = 0; 12546 } 12547 12548 return *this; 12549 } 12550 #endif 12551 _assign(const xpath_variable_set & rhs)12552 PUGI__FN void xpath_variable_set::_assign(const xpath_variable_set& rhs) 12553 { 12554 xpath_variable_set temp; 12555 12556 for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i) 12557 if (rhs._data[i] && !_clone(rhs._data[i], &temp._data[i])) 12558 return; 12559 12560 _swap(temp); 12561 } 12562 _swap(xpath_variable_set & rhs)12563 PUGI__FN void xpath_variable_set::_swap(xpath_variable_set& rhs) 12564 { 12565 for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i) 12566 { 12567 xpath_variable* chain = _data[i]; 12568 12569 _data[i] = rhs._data[i]; 12570 rhs._data[i] = chain; 12571 } 12572 } 12573 _find(const char_t * name) const12574 PUGI__FN xpath_variable* xpath_variable_set::_find(const char_t* name) const 12575 { 12576 const size_t hash_size = sizeof(_data) / sizeof(_data[0]); 12577 size_t hash = impl::hash_string(name) % hash_size; 12578 12579 // look for existing variable 12580 for (xpath_variable* var = _data[hash]; var; var = var->_next) 12581 if (impl::strequal(var->name(), name)) 12582 return var; 12583 12584 return 0; 12585 } 12586 _clone(xpath_variable * var,xpath_variable ** out_result)12587 PUGI__FN bool xpath_variable_set::_clone(xpath_variable* var, xpath_variable** out_result) 12588 { 12589 xpath_variable* last = 0; 12590 12591 while (var) 12592 { 12593 // allocate storage for new variable 12594 xpath_variable* nvar = impl::new_xpath_variable(var->_type, var->name()); 12595 if (!nvar) return false; 12596 12597 // link the variable to the result immediately to handle failures gracefully 12598 if (last) 12599 last->_next = nvar; 12600 else 12601 *out_result = nvar; 12602 12603 last = nvar; 12604 12605 // copy the value; this can fail due to out-of-memory conditions 12606 if (!impl::copy_xpath_variable(nvar, var)) return false; 12607 12608 var = var->_next; 12609 } 12610 12611 return true; 12612 } 12613 _destroy(xpath_variable * var)12614 PUGI__FN void xpath_variable_set::_destroy(xpath_variable* var) 12615 { 12616 while (var) 12617 { 12618 xpath_variable* next = var->_next; 12619 12620 impl::delete_xpath_variable(var->_type, var); 12621 12622 var = next; 12623 } 12624 } 12625 add(const char_t * name,xpath_value_type type)12626 PUGI__FN xpath_variable* xpath_variable_set::add(const char_t* name, xpath_value_type type) 12627 { 12628 const size_t hash_size = sizeof(_data) / sizeof(_data[0]); 12629 size_t hash = impl::hash_string(name) % hash_size; 12630 12631 // look for existing variable 12632 for (xpath_variable* var = _data[hash]; var; var = var->_next) 12633 if (impl::strequal(var->name(), name)) 12634 return var->type() == type ? var : 0; 12635 12636 // add new variable 12637 xpath_variable* result = impl::new_xpath_variable(type, name); 12638 12639 if (result) 12640 { 12641 result->_next = _data[hash]; 12642 12643 _data[hash] = result; 12644 } 12645 12646 return result; 12647 } 12648 set(const char_t * name,bool value)12649 PUGI__FN bool xpath_variable_set::set(const char_t* name, bool value) 12650 { 12651 xpath_variable* var = add(name, xpath_type_boolean); 12652 return var ? var->set(value) : false; 12653 } 12654 set(const char_t * name,double value)12655 PUGI__FN bool xpath_variable_set::set(const char_t* name, double value) 12656 { 12657 xpath_variable* var = add(name, xpath_type_number); 12658 return var ? var->set(value) : false; 12659 } 12660 set(const char_t * name,const char_t * value)12661 PUGI__FN bool xpath_variable_set::set(const char_t* name, const char_t* value) 12662 { 12663 xpath_variable* var = add(name, xpath_type_string); 12664 return var ? var->set(value) : false; 12665 } 12666 set(const char_t * name,const xpath_node_set & value)12667 PUGI__FN bool xpath_variable_set::set(const char_t* name, const xpath_node_set& value) 12668 { 12669 xpath_variable* var = add(name, xpath_type_node_set); 12670 return var ? var->set(value) : false; 12671 } 12672 get(const char_t * name)12673 PUGI__FN xpath_variable* xpath_variable_set::get(const char_t* name) 12674 { 12675 return _find(name); 12676 } 12677 get(const char_t * name) const12678 PUGI__FN const xpath_variable* xpath_variable_set::get(const char_t* name) const 12679 { 12680 return _find(name); 12681 } 12682 xpath_query(const char_t * query,xpath_variable_set * variables)12683 PUGI__FN xpath_query::xpath_query(const char_t* query, xpath_variable_set* variables): _impl(0) 12684 { 12685 impl::xpath_query_impl* qimpl = impl::xpath_query_impl::create(); 12686 12687 if (!qimpl) 12688 { 12689 #ifdef PUGIXML_NO_EXCEPTIONS 12690 _result.error = "Out of memory"; 12691 #else 12692 throw std::bad_alloc(); 12693 #endif 12694 } 12695 else 12696 { 12697 using impl::auto_deleter; // MSVC7 workaround 12698 auto_deleter<impl::xpath_query_impl> impl(qimpl, impl::xpath_query_impl::destroy); 12699 12700 qimpl->root = impl::xpath_parser::parse(query, variables, &qimpl->alloc, &_result); 12701 12702 if (qimpl->root) 12703 { 12704 qimpl->root->optimize(&qimpl->alloc); 12705 12706 _impl = impl.release(); 12707 _result.error = 0; 12708 } 12709 else 12710 { 12711 #ifdef PUGIXML_NO_EXCEPTIONS 12712 if (qimpl->oom) _result.error = "Out of memory"; 12713 #else 12714 if (qimpl->oom) throw std::bad_alloc(); 12715 throw xpath_exception(_result); 12716 #endif 12717 } 12718 } 12719 } 12720 xpath_query()12721 PUGI__FN xpath_query::xpath_query(): _impl(0) 12722 { 12723 } 12724 ~xpath_query()12725 PUGI__FN xpath_query::~xpath_query() 12726 { 12727 if (_impl) 12728 impl::xpath_query_impl::destroy(static_cast<impl::xpath_query_impl*>(_impl)); 12729 } 12730 12731 #ifdef PUGIXML_HAS_MOVE xpath_query(xpath_query && rhs)12732 PUGI__FN xpath_query::xpath_query(xpath_query&& rhs) PUGIXML_NOEXCEPT 12733 { 12734 _impl = rhs._impl; 12735 _result = rhs._result; 12736 rhs._impl = 0; 12737 rhs._result = xpath_parse_result(); 12738 } 12739 operator =(xpath_query && rhs)12740 PUGI__FN xpath_query& xpath_query::operator=(xpath_query&& rhs) PUGIXML_NOEXCEPT 12741 { 12742 if (this == &rhs) return *this; 12743 12744 if (_impl) 12745 impl::xpath_query_impl::destroy(static_cast<impl::xpath_query_impl*>(_impl)); 12746 12747 _impl = rhs._impl; 12748 _result = rhs._result; 12749 rhs._impl = 0; 12750 rhs._result = xpath_parse_result(); 12751 12752 return *this; 12753 } 12754 #endif 12755 return_type() const12756 PUGI__FN xpath_value_type xpath_query::return_type() const 12757 { 12758 if (!_impl) return xpath_type_none; 12759 12760 return static_cast<impl::xpath_query_impl*>(_impl)->root->rettype(); 12761 } 12762 evaluate_boolean(const xpath_node & n) const12763 PUGI__FN bool xpath_query::evaluate_boolean(const xpath_node& n) const 12764 { 12765 if (!_impl) return false; 12766 12767 impl::xpath_context c(n, 1, 1); 12768 impl::xpath_stack_data sd; 12769 12770 bool r = static_cast<impl::xpath_query_impl*>(_impl)->root->eval_boolean(c, sd.stack); 12771 12772 if (sd.oom) 12773 { 12774 #ifdef PUGIXML_NO_EXCEPTIONS 12775 return false; 12776 #else 12777 throw std::bad_alloc(); 12778 #endif 12779 } 12780 12781 return r; 12782 } 12783 evaluate_number(const xpath_node & n) const12784 PUGI__FN double xpath_query::evaluate_number(const xpath_node& n) const 12785 { 12786 if (!_impl) return impl::gen_nan(); 12787 12788 impl::xpath_context c(n, 1, 1); 12789 impl::xpath_stack_data sd; 12790 12791 double r = static_cast<impl::xpath_query_impl*>(_impl)->root->eval_number(c, sd.stack); 12792 12793 if (sd.oom) 12794 { 12795 #ifdef PUGIXML_NO_EXCEPTIONS 12796 return impl::gen_nan(); 12797 #else 12798 throw std::bad_alloc(); 12799 #endif 12800 } 12801 12802 return r; 12803 } 12804 12805 #ifndef PUGIXML_NO_STL evaluate_string(const xpath_node & n) const12806 PUGI__FN string_t xpath_query::evaluate_string(const xpath_node& n) const 12807 { 12808 if (!_impl) return string_t(); 12809 12810 impl::xpath_context c(n, 1, 1); 12811 impl::xpath_stack_data sd; 12812 12813 impl::xpath_string r = static_cast<impl::xpath_query_impl*>(_impl)->root->eval_string(c, sd.stack); 12814 12815 if (sd.oom) 12816 { 12817 #ifdef PUGIXML_NO_EXCEPTIONS 12818 return string_t(); 12819 #else 12820 throw std::bad_alloc(); 12821 #endif 12822 } 12823 12824 return string_t(r.c_str(), r.length()); 12825 } 12826 #endif 12827 evaluate_string(char_t * buffer,size_t capacity,const xpath_node & n) const12828 PUGI__FN size_t xpath_query::evaluate_string(char_t* buffer, size_t capacity, const xpath_node& n) const 12829 { 12830 impl::xpath_context c(n, 1, 1); 12831 impl::xpath_stack_data sd; 12832 12833 impl::xpath_string r = _impl ? static_cast<impl::xpath_query_impl*>(_impl)->root->eval_string(c, sd.stack) : impl::xpath_string(); 12834 12835 if (sd.oom) 12836 { 12837 #ifdef PUGIXML_NO_EXCEPTIONS 12838 r = impl::xpath_string(); 12839 #else 12840 throw std::bad_alloc(); 12841 #endif 12842 } 12843 12844 size_t full_size = r.length() + 1; 12845 12846 if (capacity > 0) 12847 { 12848 size_t size = (full_size < capacity) ? full_size : capacity; 12849 assert(size > 0); 12850 12851 memcpy(buffer, r.c_str(), (size - 1) * sizeof(char_t)); 12852 buffer[size - 1] = 0; 12853 } 12854 12855 return full_size; 12856 } 12857 evaluate_node_set(const xpath_node & n) const12858 PUGI__FN xpath_node_set xpath_query::evaluate_node_set(const xpath_node& n) const 12859 { 12860 impl::xpath_ast_node* root = impl::evaluate_node_set_prepare(static_cast<impl::xpath_query_impl*>(_impl)); 12861 if (!root) return xpath_node_set(); 12862 12863 impl::xpath_context c(n, 1, 1); 12864 impl::xpath_stack_data sd; 12865 12866 impl::xpath_node_set_raw r = root->eval_node_set(c, sd.stack, impl::nodeset_eval_all); 12867 12868 if (sd.oom) 12869 { 12870 #ifdef PUGIXML_NO_EXCEPTIONS 12871 return xpath_node_set(); 12872 #else 12873 throw std::bad_alloc(); 12874 #endif 12875 } 12876 12877 return xpath_node_set(r.begin(), r.end(), r.type()); 12878 } 12879 evaluate_node(const xpath_node & n) const12880 PUGI__FN xpath_node xpath_query::evaluate_node(const xpath_node& n) const 12881 { 12882 impl::xpath_ast_node* root = impl::evaluate_node_set_prepare(static_cast<impl::xpath_query_impl*>(_impl)); 12883 if (!root) return xpath_node(); 12884 12885 impl::xpath_context c(n, 1, 1); 12886 impl::xpath_stack_data sd; 12887 12888 impl::xpath_node_set_raw r = root->eval_node_set(c, sd.stack, impl::nodeset_eval_first); 12889 12890 if (sd.oom) 12891 { 12892 #ifdef PUGIXML_NO_EXCEPTIONS 12893 return xpath_node(); 12894 #else 12895 throw std::bad_alloc(); 12896 #endif 12897 } 12898 12899 return r.first(); 12900 } 12901 result() const12902 PUGI__FN const xpath_parse_result& xpath_query::result() const 12903 { 12904 return _result; 12905 } 12906 unspecified_bool_xpath_query(xpath_query ***)12907 PUGI__FN static void unspecified_bool_xpath_query(xpath_query***) 12908 { 12909 } 12910 operator xpath_query::unspecified_bool_type() const12911 PUGI__FN xpath_query::operator xpath_query::unspecified_bool_type() const 12912 { 12913 return _impl ? unspecified_bool_xpath_query : 0; 12914 } 12915 operator !() const12916 PUGI__FN bool xpath_query::operator!() const 12917 { 12918 return !_impl; 12919 } 12920 select_node(const char_t * query,xpath_variable_set * variables) const12921 PUGI__FN xpath_node xml_node::select_node(const char_t* query, xpath_variable_set* variables) const 12922 { 12923 xpath_query q(query, variables); 12924 return q.evaluate_node(*this); 12925 } 12926 select_node(const xpath_query & query) const12927 PUGI__FN xpath_node xml_node::select_node(const xpath_query& query) const 12928 { 12929 return query.evaluate_node(*this); 12930 } 12931 select_nodes(const char_t * query,xpath_variable_set * variables) const12932 PUGI__FN xpath_node_set xml_node::select_nodes(const char_t* query, xpath_variable_set* variables) const 12933 { 12934 xpath_query q(query, variables); 12935 return q.evaluate_node_set(*this); 12936 } 12937 select_nodes(const xpath_query & query) const12938 PUGI__FN xpath_node_set xml_node::select_nodes(const xpath_query& query) const 12939 { 12940 return query.evaluate_node_set(*this); 12941 } 12942 select_single_node(const char_t * query,xpath_variable_set * variables) const12943 PUGI__FN xpath_node xml_node::select_single_node(const char_t* query, xpath_variable_set* variables) const 12944 { 12945 xpath_query q(query, variables); 12946 return q.evaluate_node(*this); 12947 } 12948 select_single_node(const xpath_query & query) const12949 PUGI__FN xpath_node xml_node::select_single_node(const xpath_query& query) const 12950 { 12951 return query.evaluate_node(*this); 12952 } 12953 } 12954 12955 #endif 12956 12957 #ifdef __BORLANDC__ 12958 # pragma option pop 12959 #endif 12960 12961 // Intel C++ does not properly keep warning state for function templates, 12962 // so popping warning state at the end of translation unit leads to warnings in the middle. 12963 #if defined(_MSC_VER) && !defined(__INTEL_COMPILER) 12964 # pragma warning(pop) 12965 #endif 12966 12967 #if defined(_MSC_VER) && defined(__c2__) 12968 # pragma clang diagnostic pop 12969 #endif 12970 12971 // Undefine all local macros (makes sure we're not leaking macros in header-only mode) 12972 #undef PUGI__NO_INLINE 12973 #undef PUGI__UNLIKELY 12974 #undef PUGI__STATIC_ASSERT 12975 #undef PUGI__DMC_VOLATILE 12976 #undef PUGI__UNSIGNED_OVERFLOW 12977 #undef PUGI__MSVC_CRT_VERSION 12978 #undef PUGI__SNPRINTF 12979 #undef PUGI__NS_BEGIN 12980 #undef PUGI__NS_END 12981 #undef PUGI__FN 12982 #undef PUGI__FN_NO_INLINE 12983 #undef PUGI__GETHEADER_IMPL 12984 #undef PUGI__GETPAGE_IMPL 12985 #undef PUGI__GETPAGE 12986 #undef PUGI__NODETYPE 12987 #undef PUGI__IS_CHARTYPE_IMPL 12988 #undef PUGI__IS_CHARTYPE 12989 #undef PUGI__IS_CHARTYPEX 12990 #undef PUGI__ENDSWITH 12991 #undef PUGI__SKIPWS 12992 #undef PUGI__OPTSET 12993 #undef PUGI__PUSHNODE 12994 #undef PUGI__POPNODE 12995 #undef PUGI__SCANFOR 12996 #undef PUGI__SCANWHILE 12997 #undef PUGI__SCANWHILE_UNROLL 12998 #undef PUGI__ENDSEG 12999 #undef PUGI__THROW_ERROR 13000 #undef PUGI__CHECK_ERROR 13001 13002 #endif 13003 13004 /** 13005 * Copyright (c) 2006-2020 Arseny Kapoulkine 13006 * 13007 * Permission is hereby granted, free of charge, to any person 13008 * obtaining a copy of this software and associated documentation 13009 * files (the "Software"), to deal in the Software without 13010 * restriction, including without limitation the rights to use, 13011 * copy, modify, merge, publish, distribute, sublicense, and/or sell 13012 * copies of the Software, and to permit persons to whom the 13013 * Software is furnished to do so, subject to the following 13014 * conditions: 13015 * 13016 * The above copyright notice and this permission notice shall be 13017 * included in all copies or substantial portions of the Software. 13018 * 13019 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 13020 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 13021 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 13022 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 13023 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 13024 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 13025 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 13026 * OTHER DEALINGS IN THE SOFTWARE. 13027 */ 13028