1 /** 2 * pugixml parser - version 1.11 3 * -------------------------------------------------------- 4 * Copyright (C) 2006-2020, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com) 5 * Report bugs and download new versions at https://pugixml.org/ 6 * 7 * This library is distributed under the MIT License. See notice at the end 8 * of this file. 9 * 10 * This work is based on the pugxml parser, which is: 11 * Copyright (C) 2003, by Kristen Wegner (kristen@tima.net) 12 */ 13 14 #ifndef SOURCE_PUGIXML_CPP 15 #define SOURCE_PUGIXML_CPP 16 17 #include "pugixml.hpp" 18 19 #include <stdlib.h> 20 #include <stdio.h> 21 #include <string.h> 22 #include <assert.h> 23 #include <limits.h> 24 25 #ifdef PUGIXML_WCHAR_MODE 26 # include <wchar.h> 27 #endif 28 29 #ifndef PUGIXML_NO_XPATH 30 # include <math.h> 31 # include <float.h> 32 #endif 33 34 #ifndef PUGIXML_NO_STL 35 # include <istream> 36 # include <ostream> 37 # include <string> 38 #endif 39 40 // For placement new 41 #include <new> 42 43 #ifdef _MSC_VER 44 # pragma warning(push) 45 # pragma warning(disable: 4127) // conditional expression is constant 46 # pragma warning(disable: 4324) // structure was padded due to __declspec(align()) 47 # pragma warning(disable: 4702) // unreachable code 48 # pragma warning(disable: 4996) // this function or variable may be unsafe 49 #endif 50 51 #if defined(_MSC_VER) && defined(__c2__) 52 # pragma clang diagnostic push 53 # pragma clang diagnostic ignored "-Wdeprecated" // this function or variable may be unsafe 54 #endif 55 56 #ifdef __INTEL_COMPILER 57 # pragma warning(disable: 177) // function was declared but never referenced 58 # pragma warning(disable: 279) // controlling expression is constant 59 # pragma warning(disable: 1478 1786) // function was declared "deprecated" 60 # pragma warning(disable: 1684) // conversion from pointer to same-sized integral type 61 #endif 62 63 #if defined(__BORLANDC__) && defined(PUGIXML_HEADER_ONLY) 64 # pragma warn -8080 // symbol is declared but never used; disabling this inside push/pop bracket does not make the warning go away 65 #endif 66 67 #ifdef __BORLANDC__ 68 # pragma option push 69 # pragma warn -8008 // condition is always false 70 # pragma warn -8066 // unreachable code 71 #endif 72 73 #ifdef __SNC__ 74 // Using diag_push/diag_pop does not disable the warnings inside templates due to a compiler bug 75 # pragma diag_suppress=178 // function was declared but never referenced 76 # pragma diag_suppress=237 // controlling expression is constant 77 #endif 78 79 #ifdef __TI_COMPILER_VERSION__ 80 # pragma diag_suppress 179 // function was declared but never referenced 81 #endif 82 83 // Inlining controls 84 #if defined(_MSC_VER) && _MSC_VER >= 1300 85 # define PUGI__NO_INLINE __declspec(noinline) 86 #elif defined(__GNUC__) 87 # define PUGI__NO_INLINE __attribute__((noinline)) 88 #else 89 # define PUGI__NO_INLINE 90 #endif 91 92 // Branch weight controls 93 #if defined(__GNUC__) && !defined(__c2__) 94 # define PUGI__UNLIKELY(cond) __builtin_expect(cond, 0) 95 #else 96 # define PUGI__UNLIKELY(cond) (cond) 97 #endif 98 99 // Simple static assertion 100 #define PUGI__STATIC_ASSERT(cond) { static const char condition_failed[(cond) ? 1 : -1] = {0}; (void)condition_failed[0]; } 101 102 // Digital Mars C++ bug workaround for passing char loaded from memory via stack 103 #ifdef __DMC__ 104 # define PUGI__DMC_VOLATILE volatile 105 #else 106 # define PUGI__DMC_VOLATILE 107 #endif 108 109 // Integer sanitizer workaround; we only apply this for clang since gcc8 has no_sanitize but not unsigned-integer-overflow and produces "attribute directive ignored" warnings 110 #if defined(__clang__) && defined(__has_attribute) 111 # if __has_attribute(no_sanitize) 112 # define PUGI__UNSIGNED_OVERFLOW __attribute__((no_sanitize("unsigned-integer-overflow"))) 113 # else 114 # define PUGI__UNSIGNED_OVERFLOW 115 # endif 116 #else 117 # define PUGI__UNSIGNED_OVERFLOW 118 #endif 119 120 // Borland C++ bug workaround for not defining ::memcpy depending on header include order (can't always use std::memcpy because some compilers don't have it at all) 121 #if defined(__BORLANDC__) && !defined(__MEM_H_USING_LIST) 122 using std::memcpy; 123 using std::memmove; 124 using std::memset; 125 #endif 126 127 // Some MinGW/GCC versions have headers that erroneously omit LLONG_MIN/LLONG_MAX/ULLONG_MAX definitions from limits.h in some configurations 128 #if defined(PUGIXML_HAS_LONG_LONG) && defined(__GNUC__) && !defined(LLONG_MAX) && !defined(LLONG_MIN) && !defined(ULLONG_MAX) 129 # define LLONG_MIN (-LLONG_MAX - 1LL) 130 # define LLONG_MAX __LONG_LONG_MAX__ 131 # define ULLONG_MAX (LLONG_MAX * 2ULL + 1ULL) 132 #endif 133 134 // In some environments MSVC is a compiler but the CRT lacks certain MSVC-specific features 135 #if defined(_MSC_VER) && !defined(__S3E__) 136 # define PUGI__MSVC_CRT_VERSION _MSC_VER 137 #endif 138 139 // Not all platforms have snprintf; we define a wrapper that uses snprintf if possible. This only works with buffers with a known size. 140 #if __cplusplus >= 201103 141 # define PUGI__SNPRINTF(buf, ...) snprintf(buf, sizeof(buf), __VA_ARGS__) 142 #elif defined(PUGI__MSVC_CRT_VERSION) && PUGI__MSVC_CRT_VERSION >= 1400 143 # define PUGI__SNPRINTF(buf, ...) _snprintf_s(buf, _countof(buf), _TRUNCATE, __VA_ARGS__) 144 #else 145 # define PUGI__SNPRINTF sprintf 146 #endif 147 148 // We put implementation details into an anonymous namespace in source mode, but have to keep it in non-anonymous namespace in header-only mode to prevent binary bloat. 149 #ifdef PUGIXML_HEADER_ONLY 150 # define PUGI__NS_BEGIN namespace pugi { namespace impl { 151 # define PUGI__NS_END } } 152 # define PUGI__FN inline 153 # define PUGI__FN_NO_INLINE inline 154 #else 155 # if defined(_MSC_VER) && _MSC_VER < 1300 // MSVC6 seems to have an amusing bug with anonymous namespaces inside namespaces 156 # define PUGI__NS_BEGIN namespace pugi { namespace impl { 157 # define PUGI__NS_END } } 158 # else 159 # define PUGI__NS_BEGIN namespace pugi { namespace impl { namespace { 160 # define PUGI__NS_END } } } 161 # endif 162 # define PUGI__FN 163 # define PUGI__FN_NO_INLINE PUGI__NO_INLINE 164 #endif 165 166 // uintptr_t 167 #if (defined(_MSC_VER) && _MSC_VER < 1600) || (defined(__BORLANDC__) && __BORLANDC__ < 0x561) 168 namespace pugi 169 { 170 # ifndef _UINTPTR_T_DEFINED 171 typedef size_t uintptr_t; 172 # endif 173 174 typedef unsigned __int8 uint8_t; 175 typedef unsigned __int16 uint16_t; 176 typedef unsigned __int32 uint32_t; 177 } 178 #else 179 # include <stdint.h> 180 #endif 181 182 // Memory allocation 183 PUGI__NS_BEGIN default_allocate(size_t size)184 PUGI__FN void* default_allocate(size_t size) 185 { 186 return malloc(size); 187 } 188 default_deallocate(void * ptr)189 PUGI__FN void default_deallocate(void* ptr) 190 { 191 free(ptr); 192 } 193 194 template <typename T> 195 struct xml_memory_management_function_storage 196 { 197 static allocation_function allocate; 198 static deallocation_function deallocate; 199 }; 200 201 // Global allocation functions are stored in class statics so that in header mode linker deduplicates them 202 // Without a template<> we'll get multiple definitions of the same static 203 template <typename T> allocation_function xml_memory_management_function_storage<T>::allocate = default_allocate; 204 template <typename T> deallocation_function xml_memory_management_function_storage<T>::deallocate = default_deallocate; 205 206 typedef xml_memory_management_function_storage<int> xml_memory; 207 PUGI__NS_END 208 209 // String utilities 210 PUGI__NS_BEGIN 211 // Get string length strlength(const char_t * s)212 PUGI__FN size_t strlength(const char_t* s) 213 { 214 assert(s); 215 216 #ifdef PUGIXML_WCHAR_MODE 217 return wcslen(s); 218 #else 219 return strlen(s); 220 #endif 221 } 222 223 // Compare two strings strequal(const char_t * src,const char_t * dst)224 PUGI__FN bool strequal(const char_t* src, const char_t* dst) 225 { 226 assert(src && dst); 227 228 #ifdef PUGIXML_WCHAR_MODE 229 return wcscmp(src, dst) == 0; 230 #else 231 return strcmp(src, dst) == 0; 232 #endif 233 } 234 235 // Compare lhs with [rhs_begin, rhs_end) strequalrange(const char_t * lhs,const char_t * rhs,size_t count)236 PUGI__FN bool strequalrange(const char_t* lhs, const char_t* rhs, size_t count) 237 { 238 for (size_t i = 0; i < count; ++i) 239 if (lhs[i] != rhs[i]) 240 return false; 241 242 return lhs[count] == 0; 243 } 244 245 // Get length of wide string, even if CRT lacks wide character support strlength_wide(const wchar_t * s)246 PUGI__FN size_t strlength_wide(const wchar_t* s) 247 { 248 assert(s); 249 250 #ifdef PUGIXML_WCHAR_MODE 251 return wcslen(s); 252 #else 253 const wchar_t* end = s; 254 while (*end) end++; 255 return static_cast<size_t>(end - s); 256 #endif 257 } 258 PUGI__NS_END 259 260 // auto_ptr-like object for exception recovery 261 PUGI__NS_BEGIN 262 template <typename T> struct auto_deleter 263 { 264 typedef void (*D)(T*); 265 266 T* data; 267 D deleter; 268 auto_deleterauto_deleter269 auto_deleter(T* data_, D deleter_): data(data_), deleter(deleter_) 270 { 271 } 272 ~auto_deleterauto_deleter273 ~auto_deleter() 274 { 275 if (data) deleter(data); 276 } 277 releaseauto_deleter278 T* release() 279 { 280 T* result = data; 281 data = 0; 282 return result; 283 } 284 }; 285 PUGI__NS_END 286 287 #ifdef PUGIXML_COMPACT 288 PUGI__NS_BEGIN 289 class compact_hash_table 290 { 291 public: compact_hash_table()292 compact_hash_table(): _items(0), _capacity(0), _count(0) 293 { 294 } 295 clear()296 void clear() 297 { 298 if (_items) 299 { 300 xml_memory::deallocate(_items); 301 _items = 0; 302 _capacity = 0; 303 _count = 0; 304 } 305 } 306 find(const void * key)307 void* find(const void* key) 308 { 309 if (_capacity == 0) return 0; 310 311 item_t* item = get_item(key); 312 assert(item); 313 assert(item->key == key || (item->key == 0 && item->value == 0)); 314 315 return item->value; 316 } 317 insert(const void * key,void * value)318 void insert(const void* key, void* value) 319 { 320 assert(_capacity != 0 && _count < _capacity - _capacity / 4); 321 322 item_t* item = get_item(key); 323 assert(item); 324 325 if (item->key == 0) 326 { 327 _count++; 328 item->key = key; 329 } 330 331 item->value = value; 332 } 333 reserve(size_t extra=16)334 bool reserve(size_t extra = 16) 335 { 336 if (_count + extra >= _capacity - _capacity / 4) 337 return rehash(_count + extra); 338 339 return true; 340 } 341 342 private: 343 struct item_t 344 { 345 const void* key; 346 void* value; 347 }; 348 349 item_t* _items; 350 size_t _capacity; 351 352 size_t _count; 353 354 bool rehash(size_t count); 355 get_item(const void * key)356 item_t* get_item(const void* key) 357 { 358 assert(key); 359 assert(_capacity > 0); 360 361 size_t hashmod = _capacity - 1; 362 size_t bucket = hash(key) & hashmod; 363 364 for (size_t probe = 0; probe <= hashmod; ++probe) 365 { 366 item_t& probe_item = _items[bucket]; 367 368 if (probe_item.key == key || probe_item.key == 0) 369 return &probe_item; 370 371 // hash collision, quadratic probing 372 bucket = (bucket + probe + 1) & hashmod; 373 } 374 375 assert(false && "Hash table is full"); // unreachable 376 return 0; 377 } 378 hash(const void * key)379 static PUGI__UNSIGNED_OVERFLOW unsigned int hash(const void* key) 380 { 381 unsigned int h = static_cast<unsigned int>(reinterpret_cast<uintptr_t>(key) & 0xffffffff); 382 383 // MurmurHash3 32-bit finalizer 384 h ^= h >> 16; 385 h *= 0x85ebca6bu; 386 h ^= h >> 13; 387 h *= 0xc2b2ae35u; 388 h ^= h >> 16; 389 390 return h; 391 } 392 }; 393 rehash(size_t count)394 PUGI__FN_NO_INLINE bool compact_hash_table::rehash(size_t count) 395 { 396 size_t capacity = 32; 397 while (count >= capacity - capacity / 4) 398 capacity *= 2; 399 400 compact_hash_table rt; 401 rt._capacity = capacity; 402 rt._items = static_cast<item_t*>(xml_memory::allocate(sizeof(item_t) * capacity)); 403 404 if (!rt._items) 405 return false; 406 407 memset(rt._items, 0, sizeof(item_t) * capacity); 408 409 for (size_t i = 0; i < _capacity; ++i) 410 if (_items[i].key) 411 rt.insert(_items[i].key, _items[i].value); 412 413 if (_items) 414 xml_memory::deallocate(_items); 415 416 _capacity = capacity; 417 _items = rt._items; 418 419 assert(_count == rt._count); 420 421 return true; 422 } 423 424 PUGI__NS_END 425 #endif 426 427 PUGI__NS_BEGIN 428 #ifdef PUGIXML_COMPACT 429 static const uintptr_t xml_memory_block_alignment = 4; 430 #else 431 static const uintptr_t xml_memory_block_alignment = sizeof(void*); 432 #endif 433 434 // extra metadata bits 435 static const uintptr_t xml_memory_page_contents_shared_mask = 64; 436 static const uintptr_t xml_memory_page_name_allocated_mask = 32; 437 static const uintptr_t xml_memory_page_value_allocated_mask = 16; 438 static const uintptr_t xml_memory_page_type_mask = 15; 439 440 // combined masks for string uniqueness 441 static const uintptr_t xml_memory_page_name_allocated_or_shared_mask = xml_memory_page_name_allocated_mask | xml_memory_page_contents_shared_mask; 442 static const uintptr_t xml_memory_page_value_allocated_or_shared_mask = xml_memory_page_value_allocated_mask | xml_memory_page_contents_shared_mask; 443 444 #ifdef PUGIXML_COMPACT 445 #define PUGI__GETHEADER_IMPL(object, page, flags) // unused 446 #define PUGI__GETPAGE_IMPL(header) (header).get_page() 447 #else 448 #define PUGI__GETHEADER_IMPL(object, page, flags) (((reinterpret_cast<char*>(object) - reinterpret_cast<char*>(page)) << 8) | (flags)) 449 // this macro casts pointers through void* to avoid 'cast increases required alignment of target type' warnings 450 #define PUGI__GETPAGE_IMPL(header) static_cast<impl::xml_memory_page*>(const_cast<void*>(static_cast<const void*>(reinterpret_cast<const char*>(&header) - (header >> 8)))) 451 #endif 452 453 #define PUGI__GETPAGE(n) PUGI__GETPAGE_IMPL((n)->header) 454 #define PUGI__NODETYPE(n) static_cast<xml_node_type>((n)->header & impl::xml_memory_page_type_mask) 455 456 struct xml_allocator; 457 458 struct xml_memory_page 459 { constructxml_memory_page460 static xml_memory_page* construct(void* memory) 461 { 462 xml_memory_page* result = static_cast<xml_memory_page*>(memory); 463 464 result->allocator = 0; 465 result->prev = 0; 466 result->next = 0; 467 result->busy_size = 0; 468 result->freed_size = 0; 469 470 #ifdef PUGIXML_COMPACT 471 result->compact_string_base = 0; 472 result->compact_shared_parent = 0; 473 result->compact_page_marker = 0; 474 #endif 475 476 return result; 477 } 478 479 xml_allocator* allocator; 480 481 xml_memory_page* prev; 482 xml_memory_page* next; 483 484 size_t busy_size; 485 size_t freed_size; 486 487 #ifdef PUGIXML_COMPACT 488 char_t* compact_string_base; 489 void* compact_shared_parent; 490 uint32_t* compact_page_marker; 491 #endif 492 }; 493 494 static const size_t xml_memory_page_size = 495 #ifdef PUGIXML_MEMORY_PAGE_SIZE 496 (PUGIXML_MEMORY_PAGE_SIZE) 497 #else 498 32768 499 #endif 500 - sizeof(xml_memory_page); 501 502 struct xml_memory_string_header 503 { 504 uint16_t page_offset; // offset from page->data 505 uint16_t full_size; // 0 if string occupies whole page 506 }; 507 508 struct xml_allocator 509 { xml_allocatorxml_allocator510 xml_allocator(xml_memory_page* root): _root(root), _busy_size(root->busy_size) 511 { 512 #ifdef PUGIXML_COMPACT 513 _hash = 0; 514 #endif 515 } 516 allocate_pagexml_allocator517 xml_memory_page* allocate_page(size_t data_size) 518 { 519 size_t size = sizeof(xml_memory_page) + data_size; 520 521 // allocate block with some alignment, leaving memory for worst-case padding 522 void* memory = xml_memory::allocate(size); 523 if (!memory) return 0; 524 525 // prepare page structure 526 xml_memory_page* page = xml_memory_page::construct(memory); 527 assert(page); 528 529 page->allocator = _root->allocator; 530 531 return page; 532 } 533 deallocate_pagexml_allocator534 static void deallocate_page(xml_memory_page* page) 535 { 536 xml_memory::deallocate(page); 537 } 538 539 void* allocate_memory_oob(size_t size, xml_memory_page*& out_page); 540 allocate_memoryxml_allocator541 void* allocate_memory(size_t size, xml_memory_page*& out_page) 542 { 543 if (PUGI__UNLIKELY(_busy_size + size > xml_memory_page_size)) 544 return allocate_memory_oob(size, out_page); 545 546 void* buf = reinterpret_cast<char*>(_root) + sizeof(xml_memory_page) + _busy_size; 547 548 _busy_size += size; 549 550 out_page = _root; 551 552 return buf; 553 } 554 555 #ifdef PUGIXML_COMPACT allocate_objectxml_allocator556 void* allocate_object(size_t size, xml_memory_page*& out_page) 557 { 558 void* result = allocate_memory(size + sizeof(uint32_t), out_page); 559 if (!result) return 0; 560 561 // adjust for marker 562 ptrdiff_t offset = static_cast<char*>(result) - reinterpret_cast<char*>(out_page->compact_page_marker); 563 564 if (PUGI__UNLIKELY(static_cast<uintptr_t>(offset) >= 256 * xml_memory_block_alignment)) 565 { 566 // insert new marker 567 uint32_t* marker = static_cast<uint32_t*>(result); 568 569 *marker = static_cast<uint32_t>(reinterpret_cast<char*>(marker) - reinterpret_cast<char*>(out_page)); 570 out_page->compact_page_marker = marker; 571 572 // since we don't reuse the page space until we reallocate it, we can just pretend that we freed the marker block 573 // this will make sure deallocate_memory correctly tracks the size 574 out_page->freed_size += sizeof(uint32_t); 575 576 return marker + 1; 577 } 578 else 579 { 580 // roll back uint32_t part 581 _busy_size -= sizeof(uint32_t); 582 583 return result; 584 } 585 } 586 #else allocate_objectxml_allocator587 void* allocate_object(size_t size, xml_memory_page*& out_page) 588 { 589 return allocate_memory(size, out_page); 590 } 591 #endif 592 deallocate_memoryxml_allocator593 void deallocate_memory(void* ptr, size_t size, xml_memory_page* page) 594 { 595 if (page == _root) page->busy_size = _busy_size; 596 597 assert(ptr >= reinterpret_cast<char*>(page) + sizeof(xml_memory_page) && ptr < reinterpret_cast<char*>(page) + sizeof(xml_memory_page) + page->busy_size); 598 (void)!ptr; 599 600 page->freed_size += size; 601 assert(page->freed_size <= page->busy_size); 602 603 if (page->freed_size == page->busy_size) 604 { 605 if (page->next == 0) 606 { 607 assert(_root == page); 608 609 // top page freed, just reset sizes 610 page->busy_size = 0; 611 page->freed_size = 0; 612 613 #ifdef PUGIXML_COMPACT 614 // reset compact state to maximize efficiency 615 page->compact_string_base = 0; 616 page->compact_shared_parent = 0; 617 page->compact_page_marker = 0; 618 #endif 619 620 _busy_size = 0; 621 } 622 else 623 { 624 assert(_root != page); 625 assert(page->prev); 626 627 // remove from the list 628 page->prev->next = page->next; 629 page->next->prev = page->prev; 630 631 // deallocate 632 deallocate_page(page); 633 } 634 } 635 } 636 allocate_stringxml_allocator637 char_t* allocate_string(size_t length) 638 { 639 static const size_t max_encoded_offset = (1 << 16) * xml_memory_block_alignment; 640 641 PUGI__STATIC_ASSERT(xml_memory_page_size <= max_encoded_offset); 642 643 // allocate memory for string and header block 644 size_t size = sizeof(xml_memory_string_header) + length * sizeof(char_t); 645 646 // round size up to block alignment boundary 647 size_t full_size = (size + (xml_memory_block_alignment - 1)) & ~(xml_memory_block_alignment - 1); 648 649 xml_memory_page* page; 650 xml_memory_string_header* header = static_cast<xml_memory_string_header*>(allocate_memory(full_size, page)); 651 652 if (!header) return 0; 653 654 // setup header 655 ptrdiff_t page_offset = reinterpret_cast<char*>(header) - reinterpret_cast<char*>(page) - sizeof(xml_memory_page); 656 657 assert(page_offset % xml_memory_block_alignment == 0); 658 assert(page_offset >= 0 && static_cast<size_t>(page_offset) < max_encoded_offset); 659 header->page_offset = static_cast<uint16_t>(static_cast<size_t>(page_offset) / xml_memory_block_alignment); 660 661 // full_size == 0 for large strings that occupy the whole page 662 assert(full_size % xml_memory_block_alignment == 0); 663 assert(full_size < max_encoded_offset || (page->busy_size == full_size && page_offset == 0)); 664 header->full_size = static_cast<uint16_t>(full_size < max_encoded_offset ? full_size / xml_memory_block_alignment : 0); 665 666 // round-trip through void* to avoid 'cast increases required alignment of target type' warning 667 // header is guaranteed a pointer-sized alignment, which should be enough for char_t 668 return static_cast<char_t*>(static_cast<void*>(header + 1)); 669 } 670 deallocate_stringxml_allocator671 void deallocate_string(char_t* string) 672 { 673 // this function casts pointers through void* to avoid 'cast increases required alignment of target type' warnings 674 // we're guaranteed the proper (pointer-sized) alignment on the input string if it was allocated via allocate_string 675 676 // get header 677 xml_memory_string_header* header = static_cast<xml_memory_string_header*>(static_cast<void*>(string)) - 1; 678 assert(header); 679 680 // deallocate 681 size_t page_offset = sizeof(xml_memory_page) + header->page_offset * xml_memory_block_alignment; 682 xml_memory_page* page = reinterpret_cast<xml_memory_page*>(static_cast<void*>(reinterpret_cast<char*>(header) - page_offset)); 683 684 // if full_size == 0 then this string occupies the whole page 685 size_t full_size = header->full_size == 0 ? page->busy_size : header->full_size * xml_memory_block_alignment; 686 687 deallocate_memory(header, full_size, page); 688 } 689 reservexml_allocator690 bool reserve() 691 { 692 #ifdef PUGIXML_COMPACT 693 return _hash->reserve(); 694 #else 695 return true; 696 #endif 697 } 698 699 xml_memory_page* _root; 700 size_t _busy_size; 701 702 #ifdef PUGIXML_COMPACT 703 compact_hash_table* _hash; 704 #endif 705 }; 706 allocate_memory_oob(size_t size,xml_memory_page * & out_page)707 PUGI__FN_NO_INLINE void* xml_allocator::allocate_memory_oob(size_t size, xml_memory_page*& out_page) 708 { 709 const size_t large_allocation_threshold = xml_memory_page_size / 4; 710 711 xml_memory_page* page = allocate_page(size <= large_allocation_threshold ? xml_memory_page_size : size); 712 out_page = page; 713 714 if (!page) return 0; 715 716 if (size <= large_allocation_threshold) 717 { 718 _root->busy_size = _busy_size; 719 720 // insert page at the end of linked list 721 page->prev = _root; 722 _root->next = page; 723 _root = page; 724 725 _busy_size = size; 726 } 727 else 728 { 729 // insert page before the end of linked list, so that it is deleted as soon as possible 730 // the last page is not deleted even if it's empty (see deallocate_memory) 731 assert(_root->prev); 732 733 page->prev = _root->prev; 734 page->next = _root; 735 736 _root->prev->next = page; 737 _root->prev = page; 738 739 page->busy_size = size; 740 } 741 742 return reinterpret_cast<char*>(page) + sizeof(xml_memory_page); 743 } 744 PUGI__NS_END 745 746 #ifdef PUGIXML_COMPACT 747 PUGI__NS_BEGIN 748 static const uintptr_t compact_alignment_log2 = 2; 749 static const uintptr_t compact_alignment = 1 << compact_alignment_log2; 750 751 class compact_header 752 { 753 public: compact_header(xml_memory_page * page,unsigned int flags)754 compact_header(xml_memory_page* page, unsigned int flags) 755 { 756 PUGI__STATIC_ASSERT(xml_memory_block_alignment == compact_alignment); 757 758 ptrdiff_t offset = (reinterpret_cast<char*>(this) - reinterpret_cast<char*>(page->compact_page_marker)); 759 assert(offset % compact_alignment == 0 && static_cast<uintptr_t>(offset) < 256 * compact_alignment); 760 761 _page = static_cast<unsigned char>(offset >> compact_alignment_log2); 762 _flags = static_cast<unsigned char>(flags); 763 } 764 operator &=(uintptr_t mod)765 void operator&=(uintptr_t mod) 766 { 767 _flags &= static_cast<unsigned char>(mod); 768 } 769 operator |=(uintptr_t mod)770 void operator|=(uintptr_t mod) 771 { 772 _flags |= static_cast<unsigned char>(mod); 773 } 774 operator &(uintptr_t mod) const775 uintptr_t operator&(uintptr_t mod) const 776 { 777 return _flags & mod; 778 } 779 get_page() const780 xml_memory_page* get_page() const 781 { 782 // round-trip through void* to silence 'cast increases required alignment of target type' warnings 783 const char* page_marker = reinterpret_cast<const char*>(this) - (_page << compact_alignment_log2); 784 const char* page = page_marker - *reinterpret_cast<const uint32_t*>(static_cast<const void*>(page_marker)); 785 786 return const_cast<xml_memory_page*>(reinterpret_cast<const xml_memory_page*>(static_cast<const void*>(page))); 787 } 788 789 private: 790 unsigned char _page; 791 unsigned char _flags; 792 }; 793 compact_get_page(const void * object,int header_offset)794 PUGI__FN xml_memory_page* compact_get_page(const void* object, int header_offset) 795 { 796 const compact_header* header = reinterpret_cast<const compact_header*>(static_cast<const char*>(object) - header_offset); 797 798 return header->get_page(); 799 } 800 compact_get_value(const void * object)801 template <int header_offset, typename T> PUGI__FN_NO_INLINE T* compact_get_value(const void* object) 802 { 803 return static_cast<T*>(compact_get_page(object, header_offset)->allocator->_hash->find(object)); 804 } 805 compact_set_value(const void * object,T * value)806 template <int header_offset, typename T> PUGI__FN_NO_INLINE void compact_set_value(const void* object, T* value) 807 { 808 compact_get_page(object, header_offset)->allocator->_hash->insert(object, value); 809 } 810 811 template <typename T, int header_offset, int start = -126> class compact_pointer 812 { 813 public: compact_pointer()814 compact_pointer(): _data(0) 815 { 816 } 817 operator =(const compact_pointer & rhs)818 void operator=(const compact_pointer& rhs) 819 { 820 *this = rhs + 0; 821 } 822 operator =(T * value)823 void operator=(T* value) 824 { 825 if (value) 826 { 827 // value is guaranteed to be compact-aligned; 'this' is not 828 // our decoding is based on 'this' aligned to compact alignment downwards (see operator T*) 829 // so for negative offsets (e.g. -3) we need to adjust the diff by compact_alignment - 1 to 830 // compensate for arithmetic shift rounding for negative values 831 ptrdiff_t diff = reinterpret_cast<char*>(value) - reinterpret_cast<char*>(this); 832 ptrdiff_t offset = ((diff + int(compact_alignment - 1)) >> compact_alignment_log2) - start; 833 834 if (static_cast<uintptr_t>(offset) <= 253) 835 _data = static_cast<unsigned char>(offset + 1); 836 else 837 { 838 compact_set_value<header_offset>(this, value); 839 840 _data = 255; 841 } 842 } 843 else 844 _data = 0; 845 } 846 operator T*() const847 operator T*() const 848 { 849 if (_data) 850 { 851 if (_data < 255) 852 { 853 uintptr_t base = reinterpret_cast<uintptr_t>(this) & ~(compact_alignment - 1); 854 855 return reinterpret_cast<T*>(base + (_data - 1 + start) * compact_alignment); 856 } 857 else 858 return compact_get_value<header_offset, T>(this); 859 } 860 else 861 return 0; 862 } 863 operator ->() const864 T* operator->() const 865 { 866 return *this; 867 } 868 869 private: 870 unsigned char _data; 871 }; 872 873 template <typename T, int header_offset> class compact_pointer_parent 874 { 875 public: compact_pointer_parent()876 compact_pointer_parent(): _data(0) 877 { 878 } 879 operator =(const compact_pointer_parent & rhs)880 void operator=(const compact_pointer_parent& rhs) 881 { 882 *this = rhs + 0; 883 } 884 operator =(T * value)885 void operator=(T* value) 886 { 887 if (value) 888 { 889 // value is guaranteed to be compact-aligned; 'this' is not 890 // our decoding is based on 'this' aligned to compact alignment downwards (see operator T*) 891 // so for negative offsets (e.g. -3) we need to adjust the diff by compact_alignment - 1 to 892 // compensate for arithmetic shift behavior for negative values 893 ptrdiff_t diff = reinterpret_cast<char*>(value) - reinterpret_cast<char*>(this); 894 ptrdiff_t offset = ((diff + int(compact_alignment - 1)) >> compact_alignment_log2) + 65533; 895 896 if (static_cast<uintptr_t>(offset) <= 65533) 897 { 898 _data = static_cast<unsigned short>(offset + 1); 899 } 900 else 901 { 902 xml_memory_page* page = compact_get_page(this, header_offset); 903 904 if (PUGI__UNLIKELY(page->compact_shared_parent == 0)) 905 page->compact_shared_parent = value; 906 907 if (page->compact_shared_parent == value) 908 { 909 _data = 65534; 910 } 911 else 912 { 913 compact_set_value<header_offset>(this, value); 914 915 _data = 65535; 916 } 917 } 918 } 919 else 920 { 921 _data = 0; 922 } 923 } 924 operator T*() const925 operator T*() const 926 { 927 if (_data) 928 { 929 if (_data < 65534) 930 { 931 uintptr_t base = reinterpret_cast<uintptr_t>(this) & ~(compact_alignment - 1); 932 933 return reinterpret_cast<T*>(base + (_data - 1 - 65533) * compact_alignment); 934 } 935 else if (_data == 65534) 936 return static_cast<T*>(compact_get_page(this, header_offset)->compact_shared_parent); 937 else 938 return compact_get_value<header_offset, T>(this); 939 } 940 else 941 return 0; 942 } 943 operator ->() const944 T* operator->() const 945 { 946 return *this; 947 } 948 949 private: 950 uint16_t _data; 951 }; 952 953 template <int header_offset, int base_offset> class compact_string 954 { 955 public: compact_string()956 compact_string(): _data(0) 957 { 958 } 959 operator =(const compact_string & rhs)960 void operator=(const compact_string& rhs) 961 { 962 *this = rhs + 0; 963 } 964 operator =(char_t * value)965 void operator=(char_t* value) 966 { 967 if (value) 968 { 969 xml_memory_page* page = compact_get_page(this, header_offset); 970 971 if (PUGI__UNLIKELY(page->compact_string_base == 0)) 972 page->compact_string_base = value; 973 974 ptrdiff_t offset = value - page->compact_string_base; 975 976 if (static_cast<uintptr_t>(offset) < (65535 << 7)) 977 { 978 // round-trip through void* to silence 'cast increases required alignment of target type' warnings 979 uint16_t* base = reinterpret_cast<uint16_t*>(static_cast<void*>(reinterpret_cast<char*>(this) - base_offset)); 980 981 if (*base == 0) 982 { 983 *base = static_cast<uint16_t>((offset >> 7) + 1); 984 _data = static_cast<unsigned char>((offset & 127) + 1); 985 } 986 else 987 { 988 ptrdiff_t remainder = offset - ((*base - 1) << 7); 989 990 if (static_cast<uintptr_t>(remainder) <= 253) 991 { 992 _data = static_cast<unsigned char>(remainder + 1); 993 } 994 else 995 { 996 compact_set_value<header_offset>(this, value); 997 998 _data = 255; 999 } 1000 } 1001 } 1002 else 1003 { 1004 compact_set_value<header_offset>(this, value); 1005 1006 _data = 255; 1007 } 1008 } 1009 else 1010 { 1011 _data = 0; 1012 } 1013 } 1014 operator char_t*() const1015 operator char_t*() const 1016 { 1017 if (_data) 1018 { 1019 if (_data < 255) 1020 { 1021 xml_memory_page* page = compact_get_page(this, header_offset); 1022 1023 // round-trip through void* to silence 'cast increases required alignment of target type' warnings 1024 const uint16_t* base = reinterpret_cast<const uint16_t*>(static_cast<const void*>(reinterpret_cast<const char*>(this) - base_offset)); 1025 assert(*base); 1026 1027 ptrdiff_t offset = ((*base - 1) << 7) + (_data - 1); 1028 1029 return page->compact_string_base + offset; 1030 } 1031 else 1032 { 1033 return compact_get_value<header_offset, char_t>(this); 1034 } 1035 } 1036 else 1037 return 0; 1038 } 1039 1040 private: 1041 unsigned char _data; 1042 }; 1043 PUGI__NS_END 1044 #endif 1045 1046 #ifdef PUGIXML_COMPACT 1047 namespace pugi 1048 { 1049 struct xml_attribute_struct 1050 { xml_attribute_structpugi::xml_attribute_struct1051 xml_attribute_struct(impl::xml_memory_page* page): header(page, 0), namevalue_base(0) 1052 { 1053 PUGI__STATIC_ASSERT(sizeof(xml_attribute_struct) == 8); 1054 } 1055 1056 impl::compact_header header; 1057 1058 uint16_t namevalue_base; 1059 1060 impl::compact_string<4, 2> name; 1061 impl::compact_string<5, 3> value; 1062 1063 impl::compact_pointer<xml_attribute_struct, 6> prev_attribute_c; 1064 impl::compact_pointer<xml_attribute_struct, 7, 0> next_attribute; 1065 }; 1066 1067 struct xml_node_struct 1068 { xml_node_structpugi::xml_node_struct1069 xml_node_struct(impl::xml_memory_page* page, xml_node_type type): header(page, type), namevalue_base(0) 1070 { 1071 PUGI__STATIC_ASSERT(sizeof(xml_node_struct) == 12); 1072 } 1073 1074 impl::compact_header header; 1075 1076 uint16_t namevalue_base; 1077 1078 impl::compact_string<4, 2> name; 1079 impl::compact_string<5, 3> value; 1080 1081 impl::compact_pointer_parent<xml_node_struct, 6> parent; 1082 1083 impl::compact_pointer<xml_node_struct, 8, 0> first_child; 1084 1085 impl::compact_pointer<xml_node_struct, 9> prev_sibling_c; 1086 impl::compact_pointer<xml_node_struct, 10, 0> next_sibling; 1087 1088 impl::compact_pointer<xml_attribute_struct, 11, 0> first_attribute; 1089 }; 1090 } 1091 #else 1092 namespace pugi 1093 { 1094 struct xml_attribute_struct 1095 { 1096 xml_attribute_struct(impl::xml_memory_page* page): name(0), value(0), prev_attribute_c(0), next_attribute(0) 1097 { 1098 header = PUGI__GETHEADER_IMPL(this, page, 0); 1099 } 1100 1101 uintptr_t header; 1102 1103 char_t* name; 1104 char_t* value; 1105 1106 xml_attribute_struct* prev_attribute_c; 1107 xml_attribute_struct* next_attribute; 1108 }; 1109 1110 struct xml_node_struct 1111 { 1112 xml_node_struct(impl::xml_memory_page* page, xml_node_type type): name(0), value(0), parent(0), first_child(0), prev_sibling_c(0), next_sibling(0), first_attribute(0) 1113 { 1114 header = PUGI__GETHEADER_IMPL(this, page, type); 1115 } 1116 1117 uintptr_t header; 1118 1119 char_t* name; 1120 char_t* value; 1121 1122 xml_node_struct* parent; 1123 1124 xml_node_struct* first_child; 1125 1126 xml_node_struct* prev_sibling_c; 1127 xml_node_struct* next_sibling; 1128 1129 xml_attribute_struct* first_attribute; 1130 }; 1131 } 1132 #endif 1133 1134 PUGI__NS_BEGIN 1135 struct xml_extra_buffer 1136 { 1137 char_t* buffer; 1138 xml_extra_buffer* next; 1139 }; 1140 1141 struct xml_document_struct: public xml_node_struct, public xml_allocator 1142 { xml_document_structxml_document_struct1143 xml_document_struct(xml_memory_page* page): xml_node_struct(page, node_document), xml_allocator(page), buffer(0), extra_buffers(0) 1144 { 1145 } 1146 1147 const char_t* buffer; 1148 1149 xml_extra_buffer* extra_buffers; 1150 1151 #ifdef PUGIXML_COMPACT 1152 compact_hash_table hash; 1153 #endif 1154 }; 1155 get_allocator(const Object * object)1156 template <typename Object> inline xml_allocator& get_allocator(const Object* object) 1157 { 1158 assert(object); 1159 1160 return *PUGI__GETPAGE(object)->allocator; 1161 } 1162 get_document(const Object * object)1163 template <typename Object> inline xml_document_struct& get_document(const Object* object) 1164 { 1165 assert(object); 1166 1167 return *static_cast<xml_document_struct*>(PUGI__GETPAGE(object)->allocator); 1168 } 1169 PUGI__NS_END 1170 1171 // Low-level DOM operations 1172 PUGI__NS_BEGIN allocate_attribute(xml_allocator & alloc)1173 inline xml_attribute_struct* allocate_attribute(xml_allocator& alloc) 1174 { 1175 xml_memory_page* page; 1176 void* memory = alloc.allocate_object(sizeof(xml_attribute_struct), page); 1177 if (!memory) return 0; 1178 1179 return new (memory) xml_attribute_struct(page); 1180 } 1181 allocate_node(xml_allocator & alloc,xml_node_type type)1182 inline xml_node_struct* allocate_node(xml_allocator& alloc, xml_node_type type) 1183 { 1184 xml_memory_page* page; 1185 void* memory = alloc.allocate_object(sizeof(xml_node_struct), page); 1186 if (!memory) return 0; 1187 1188 return new (memory) xml_node_struct(page, type); 1189 } 1190 destroy_attribute(xml_attribute_struct * a,xml_allocator & alloc)1191 inline void destroy_attribute(xml_attribute_struct* a, xml_allocator& alloc) 1192 { 1193 if (a->header & impl::xml_memory_page_name_allocated_mask) 1194 alloc.deallocate_string(a->name); 1195 1196 if (a->header & impl::xml_memory_page_value_allocated_mask) 1197 alloc.deallocate_string(a->value); 1198 1199 alloc.deallocate_memory(a, sizeof(xml_attribute_struct), PUGI__GETPAGE(a)); 1200 } 1201 destroy_node(xml_node_struct * n,xml_allocator & alloc)1202 inline void destroy_node(xml_node_struct* n, xml_allocator& alloc) 1203 { 1204 if (n->header & impl::xml_memory_page_name_allocated_mask) 1205 alloc.deallocate_string(n->name); 1206 1207 if (n->header & impl::xml_memory_page_value_allocated_mask) 1208 alloc.deallocate_string(n->value); 1209 1210 for (xml_attribute_struct* attr = n->first_attribute; attr; ) 1211 { 1212 xml_attribute_struct* next = attr->next_attribute; 1213 1214 destroy_attribute(attr, alloc); 1215 1216 attr = next; 1217 } 1218 1219 for (xml_node_struct* child = n->first_child; child; ) 1220 { 1221 xml_node_struct* next = child->next_sibling; 1222 1223 destroy_node(child, alloc); 1224 1225 child = next; 1226 } 1227 1228 alloc.deallocate_memory(n, sizeof(xml_node_struct), PUGI__GETPAGE(n)); 1229 } 1230 append_node(xml_node_struct * child,xml_node_struct * node)1231 inline void append_node(xml_node_struct* child, xml_node_struct* node) 1232 { 1233 child->parent = node; 1234 1235 xml_node_struct* head = node->first_child; 1236 1237 if (head) 1238 { 1239 xml_node_struct* tail = head->prev_sibling_c; 1240 1241 tail->next_sibling = child; 1242 child->prev_sibling_c = tail; 1243 head->prev_sibling_c = child; 1244 } 1245 else 1246 { 1247 node->first_child = child; 1248 child->prev_sibling_c = child; 1249 } 1250 } 1251 prepend_node(xml_node_struct * child,xml_node_struct * node)1252 inline void prepend_node(xml_node_struct* child, xml_node_struct* node) 1253 { 1254 child->parent = node; 1255 1256 xml_node_struct* head = node->first_child; 1257 1258 if (head) 1259 { 1260 child->prev_sibling_c = head->prev_sibling_c; 1261 head->prev_sibling_c = child; 1262 } 1263 else 1264 child->prev_sibling_c = child; 1265 1266 child->next_sibling = head; 1267 node->first_child = child; 1268 } 1269 insert_node_after(xml_node_struct * child,xml_node_struct * node)1270 inline void insert_node_after(xml_node_struct* child, xml_node_struct* node) 1271 { 1272 xml_node_struct* parent = node->parent; 1273 1274 child->parent = parent; 1275 1276 if (node->next_sibling) 1277 node->next_sibling->prev_sibling_c = child; 1278 else 1279 parent->first_child->prev_sibling_c = child; 1280 1281 child->next_sibling = node->next_sibling; 1282 child->prev_sibling_c = node; 1283 1284 node->next_sibling = child; 1285 } 1286 insert_node_before(xml_node_struct * child,xml_node_struct * node)1287 inline void insert_node_before(xml_node_struct* child, xml_node_struct* node) 1288 { 1289 xml_node_struct* parent = node->parent; 1290 1291 child->parent = parent; 1292 1293 if (node->prev_sibling_c->next_sibling) 1294 node->prev_sibling_c->next_sibling = child; 1295 else 1296 parent->first_child = child; 1297 1298 child->prev_sibling_c = node->prev_sibling_c; 1299 child->next_sibling = node; 1300 1301 node->prev_sibling_c = child; 1302 } 1303 remove_node(xml_node_struct * node)1304 inline void remove_node(xml_node_struct* node) 1305 { 1306 xml_node_struct* parent = node->parent; 1307 1308 if (node->next_sibling) 1309 node->next_sibling->prev_sibling_c = node->prev_sibling_c; 1310 else 1311 parent->first_child->prev_sibling_c = node->prev_sibling_c; 1312 1313 if (node->prev_sibling_c->next_sibling) 1314 node->prev_sibling_c->next_sibling = node->next_sibling; 1315 else 1316 parent->first_child = node->next_sibling; 1317 1318 node->parent = 0; 1319 node->prev_sibling_c = 0; 1320 node->next_sibling = 0; 1321 } 1322 append_attribute(xml_attribute_struct * attr,xml_node_struct * node)1323 inline void append_attribute(xml_attribute_struct* attr, xml_node_struct* node) 1324 { 1325 xml_attribute_struct* head = node->first_attribute; 1326 1327 if (head) 1328 { 1329 xml_attribute_struct* tail = head->prev_attribute_c; 1330 1331 tail->next_attribute = attr; 1332 attr->prev_attribute_c = tail; 1333 head->prev_attribute_c = attr; 1334 } 1335 else 1336 { 1337 node->first_attribute = attr; 1338 attr->prev_attribute_c = attr; 1339 } 1340 } 1341 prepend_attribute(xml_attribute_struct * attr,xml_node_struct * node)1342 inline void prepend_attribute(xml_attribute_struct* attr, xml_node_struct* node) 1343 { 1344 xml_attribute_struct* head = node->first_attribute; 1345 1346 if (head) 1347 { 1348 attr->prev_attribute_c = head->prev_attribute_c; 1349 head->prev_attribute_c = attr; 1350 } 1351 else 1352 attr->prev_attribute_c = attr; 1353 1354 attr->next_attribute = head; 1355 node->first_attribute = attr; 1356 } 1357 insert_attribute_after(xml_attribute_struct * attr,xml_attribute_struct * place,xml_node_struct * node)1358 inline void insert_attribute_after(xml_attribute_struct* attr, xml_attribute_struct* place, xml_node_struct* node) 1359 { 1360 if (place->next_attribute) 1361 place->next_attribute->prev_attribute_c = attr; 1362 else 1363 node->first_attribute->prev_attribute_c = attr; 1364 1365 attr->next_attribute = place->next_attribute; 1366 attr->prev_attribute_c = place; 1367 place->next_attribute = attr; 1368 } 1369 insert_attribute_before(xml_attribute_struct * attr,xml_attribute_struct * place,xml_node_struct * node)1370 inline void insert_attribute_before(xml_attribute_struct* attr, xml_attribute_struct* place, xml_node_struct* node) 1371 { 1372 if (place->prev_attribute_c->next_attribute) 1373 place->prev_attribute_c->next_attribute = attr; 1374 else 1375 node->first_attribute = attr; 1376 1377 attr->prev_attribute_c = place->prev_attribute_c; 1378 attr->next_attribute = place; 1379 place->prev_attribute_c = attr; 1380 } 1381 remove_attribute(xml_attribute_struct * attr,xml_node_struct * node)1382 inline void remove_attribute(xml_attribute_struct* attr, xml_node_struct* node) 1383 { 1384 if (attr->next_attribute) 1385 attr->next_attribute->prev_attribute_c = attr->prev_attribute_c; 1386 else 1387 node->first_attribute->prev_attribute_c = attr->prev_attribute_c; 1388 1389 if (attr->prev_attribute_c->next_attribute) 1390 attr->prev_attribute_c->next_attribute = attr->next_attribute; 1391 else 1392 node->first_attribute = attr->next_attribute; 1393 1394 attr->prev_attribute_c = 0; 1395 attr->next_attribute = 0; 1396 } 1397 append_new_node(xml_node_struct * node,xml_allocator & alloc,xml_node_type type=node_element)1398 PUGI__FN_NO_INLINE xml_node_struct* append_new_node(xml_node_struct* node, xml_allocator& alloc, xml_node_type type = node_element) 1399 { 1400 if (!alloc.reserve()) return 0; 1401 1402 xml_node_struct* child = allocate_node(alloc, type); 1403 if (!child) return 0; 1404 1405 append_node(child, node); 1406 1407 return child; 1408 } 1409 append_new_attribute(xml_node_struct * node,xml_allocator & alloc)1410 PUGI__FN_NO_INLINE xml_attribute_struct* append_new_attribute(xml_node_struct* node, xml_allocator& alloc) 1411 { 1412 if (!alloc.reserve()) return 0; 1413 1414 xml_attribute_struct* attr = allocate_attribute(alloc); 1415 if (!attr) return 0; 1416 1417 append_attribute(attr, node); 1418 1419 return attr; 1420 } 1421 PUGI__NS_END 1422 1423 // Helper classes for code generation 1424 PUGI__NS_BEGIN 1425 struct opt_false 1426 { 1427 enum { value = 0 }; 1428 }; 1429 1430 struct opt_true 1431 { 1432 enum { value = 1 }; 1433 }; 1434 PUGI__NS_END 1435 1436 // Unicode utilities 1437 PUGI__NS_BEGIN endian_swap(uint16_t value)1438 inline uint16_t endian_swap(uint16_t value) 1439 { 1440 return static_cast<uint16_t>(((value & 0xff) << 8) | (value >> 8)); 1441 } 1442 endian_swap(uint32_t value)1443 inline uint32_t endian_swap(uint32_t value) 1444 { 1445 return ((value & 0xff) << 24) | ((value & 0xff00) << 8) | ((value & 0xff0000) >> 8) | (value >> 24); 1446 } 1447 1448 struct utf8_counter 1449 { 1450 typedef size_t value_type; 1451 lowutf8_counter1452 static value_type low(value_type result, uint32_t ch) 1453 { 1454 // U+0000..U+007F 1455 if (ch < 0x80) return result + 1; 1456 // U+0080..U+07FF 1457 else if (ch < 0x800) return result + 2; 1458 // U+0800..U+FFFF 1459 else return result + 3; 1460 } 1461 highutf8_counter1462 static value_type high(value_type result, uint32_t) 1463 { 1464 // U+10000..U+10FFFF 1465 return result + 4; 1466 } 1467 }; 1468 1469 struct utf8_writer 1470 { 1471 typedef uint8_t* value_type; 1472 lowutf8_writer1473 static value_type low(value_type result, uint32_t ch) 1474 { 1475 // U+0000..U+007F 1476 if (ch < 0x80) 1477 { 1478 *result = static_cast<uint8_t>(ch); 1479 return result + 1; 1480 } 1481 // U+0080..U+07FF 1482 else if (ch < 0x800) 1483 { 1484 result[0] = static_cast<uint8_t>(0xC0 | (ch >> 6)); 1485 result[1] = static_cast<uint8_t>(0x80 | (ch & 0x3F)); 1486 return result + 2; 1487 } 1488 // U+0800..U+FFFF 1489 else 1490 { 1491 result[0] = static_cast<uint8_t>(0xE0 | (ch >> 12)); 1492 result[1] = static_cast<uint8_t>(0x80 | ((ch >> 6) & 0x3F)); 1493 result[2] = static_cast<uint8_t>(0x80 | (ch & 0x3F)); 1494 return result + 3; 1495 } 1496 } 1497 highutf8_writer1498 static value_type high(value_type result, uint32_t ch) 1499 { 1500 // U+10000..U+10FFFF 1501 result[0] = static_cast<uint8_t>(0xF0 | (ch >> 18)); 1502 result[1] = static_cast<uint8_t>(0x80 | ((ch >> 12) & 0x3F)); 1503 result[2] = static_cast<uint8_t>(0x80 | ((ch >> 6) & 0x3F)); 1504 result[3] = static_cast<uint8_t>(0x80 | (ch & 0x3F)); 1505 return result + 4; 1506 } 1507 anyutf8_writer1508 static value_type any(value_type result, uint32_t ch) 1509 { 1510 return (ch < 0x10000) ? low(result, ch) : high(result, ch); 1511 } 1512 }; 1513 1514 struct utf16_counter 1515 { 1516 typedef size_t value_type; 1517 lowutf16_counter1518 static value_type low(value_type result, uint32_t) 1519 { 1520 return result + 1; 1521 } 1522 highutf16_counter1523 static value_type high(value_type result, uint32_t) 1524 { 1525 return result + 2; 1526 } 1527 }; 1528 1529 struct utf16_writer 1530 { 1531 typedef uint16_t* value_type; 1532 lowutf16_writer1533 static value_type low(value_type result, uint32_t ch) 1534 { 1535 *result = static_cast<uint16_t>(ch); 1536 1537 return result + 1; 1538 } 1539 highutf16_writer1540 static value_type high(value_type result, uint32_t ch) 1541 { 1542 uint32_t msh = static_cast<uint32_t>(ch - 0x10000) >> 10; 1543 uint32_t lsh = static_cast<uint32_t>(ch - 0x10000) & 0x3ff; 1544 1545 result[0] = static_cast<uint16_t>(0xD800 + msh); 1546 result[1] = static_cast<uint16_t>(0xDC00 + lsh); 1547 1548 return result + 2; 1549 } 1550 anyutf16_writer1551 static value_type any(value_type result, uint32_t ch) 1552 { 1553 return (ch < 0x10000) ? low(result, ch) : high(result, ch); 1554 } 1555 }; 1556 1557 struct utf32_counter 1558 { 1559 typedef size_t value_type; 1560 lowutf32_counter1561 static value_type low(value_type result, uint32_t) 1562 { 1563 return result + 1; 1564 } 1565 highutf32_counter1566 static value_type high(value_type result, uint32_t) 1567 { 1568 return result + 1; 1569 } 1570 }; 1571 1572 struct utf32_writer 1573 { 1574 typedef uint32_t* value_type; 1575 lowutf32_writer1576 static value_type low(value_type result, uint32_t ch) 1577 { 1578 *result = ch; 1579 1580 return result + 1; 1581 } 1582 highutf32_writer1583 static value_type high(value_type result, uint32_t ch) 1584 { 1585 *result = ch; 1586 1587 return result + 1; 1588 } 1589 anyutf32_writer1590 static value_type any(value_type result, uint32_t ch) 1591 { 1592 *result = ch; 1593 1594 return result + 1; 1595 } 1596 }; 1597 1598 struct latin1_writer 1599 { 1600 typedef uint8_t* value_type; 1601 lowlatin1_writer1602 static value_type low(value_type result, uint32_t ch) 1603 { 1604 *result = static_cast<uint8_t>(ch > 255 ? '?' : ch); 1605 1606 return result + 1; 1607 } 1608 highlatin1_writer1609 static value_type high(value_type result, uint32_t ch) 1610 { 1611 (void)ch; 1612 1613 *result = '?'; 1614 1615 return result + 1; 1616 } 1617 }; 1618 1619 struct utf8_decoder 1620 { 1621 typedef uint8_t type; 1622 processutf8_decoder1623 template <typename Traits> static inline typename Traits::value_type process(const uint8_t* data, size_t size, typename Traits::value_type result, Traits) 1624 { 1625 const uint8_t utf8_byte_mask = 0x3f; 1626 1627 while (size) 1628 { 1629 uint8_t lead = *data; 1630 1631 // 0xxxxxxx -> U+0000..U+007F 1632 if (lead < 0x80) 1633 { 1634 result = Traits::low(result, lead); 1635 data += 1; 1636 size -= 1; 1637 1638 // process aligned single-byte (ascii) blocks 1639 if ((reinterpret_cast<uintptr_t>(data) & 3) == 0) 1640 { 1641 // round-trip through void* to silence 'cast increases required alignment of target type' warnings 1642 while (size >= 4 && (*static_cast<const uint32_t*>(static_cast<const void*>(data)) & 0x80808080) == 0) 1643 { 1644 result = Traits::low(result, data[0]); 1645 result = Traits::low(result, data[1]); 1646 result = Traits::low(result, data[2]); 1647 result = Traits::low(result, data[3]); 1648 data += 4; 1649 size -= 4; 1650 } 1651 } 1652 } 1653 // 110xxxxx -> U+0080..U+07FF 1654 else if (static_cast<unsigned int>(lead - 0xC0) < 0x20 && size >= 2 && (data[1] & 0xc0) == 0x80) 1655 { 1656 result = Traits::low(result, ((lead & ~0xC0) << 6) | (data[1] & utf8_byte_mask)); 1657 data += 2; 1658 size -= 2; 1659 } 1660 // 1110xxxx -> U+0800-U+FFFF 1661 else if (static_cast<unsigned int>(lead - 0xE0) < 0x10 && size >= 3 && (data[1] & 0xc0) == 0x80 && (data[2] & 0xc0) == 0x80) 1662 { 1663 result = Traits::low(result, ((lead & ~0xE0) << 12) | ((data[1] & utf8_byte_mask) << 6) | (data[2] & utf8_byte_mask)); 1664 data += 3; 1665 size -= 3; 1666 } 1667 // 11110xxx -> U+10000..U+10FFFF 1668 else if (static_cast<unsigned int>(lead - 0xF0) < 0x08 && size >= 4 && (data[1] & 0xc0) == 0x80 && (data[2] & 0xc0) == 0x80 && (data[3] & 0xc0) == 0x80) 1669 { 1670 result = Traits::high(result, ((lead & ~0xF0) << 18) | ((data[1] & utf8_byte_mask) << 12) | ((data[2] & utf8_byte_mask) << 6) | (data[3] & utf8_byte_mask)); 1671 data += 4; 1672 size -= 4; 1673 } 1674 // 10xxxxxx or 11111xxx -> invalid 1675 else 1676 { 1677 data += 1; 1678 size -= 1; 1679 } 1680 } 1681 1682 return result; 1683 } 1684 }; 1685 1686 template <typename opt_swap> struct utf16_decoder 1687 { 1688 typedef uint16_t type; 1689 processutf16_decoder1690 template <typename Traits> static inline typename Traits::value_type process(const uint16_t* data, size_t size, typename Traits::value_type result, Traits) 1691 { 1692 while (size) 1693 { 1694 uint16_t lead = opt_swap::value ? endian_swap(*data) : *data; 1695 1696 // U+0000..U+D7FF 1697 if (lead < 0xD800) 1698 { 1699 result = Traits::low(result, lead); 1700 data += 1; 1701 size -= 1; 1702 } 1703 // U+E000..U+FFFF 1704 else if (static_cast<unsigned int>(lead - 0xE000) < 0x2000) 1705 { 1706 result = Traits::low(result, lead); 1707 data += 1; 1708 size -= 1; 1709 } 1710 // surrogate pair lead 1711 else if (static_cast<unsigned int>(lead - 0xD800) < 0x400 && size >= 2) 1712 { 1713 uint16_t next = opt_swap::value ? endian_swap(data[1]) : data[1]; 1714 1715 if (static_cast<unsigned int>(next - 0xDC00) < 0x400) 1716 { 1717 result = Traits::high(result, 0x10000 + ((lead & 0x3ff) << 10) + (next & 0x3ff)); 1718 data += 2; 1719 size -= 2; 1720 } 1721 else 1722 { 1723 data += 1; 1724 size -= 1; 1725 } 1726 } 1727 else 1728 { 1729 data += 1; 1730 size -= 1; 1731 } 1732 } 1733 1734 return result; 1735 } 1736 }; 1737 1738 template <typename opt_swap> struct utf32_decoder 1739 { 1740 typedef uint32_t type; 1741 processutf32_decoder1742 template <typename Traits> static inline typename Traits::value_type process(const uint32_t* data, size_t size, typename Traits::value_type result, Traits) 1743 { 1744 while (size) 1745 { 1746 uint32_t lead = opt_swap::value ? endian_swap(*data) : *data; 1747 1748 // U+0000..U+FFFF 1749 if (lead < 0x10000) 1750 { 1751 result = Traits::low(result, lead); 1752 data += 1; 1753 size -= 1; 1754 } 1755 // U+10000..U+10FFFF 1756 else 1757 { 1758 result = Traits::high(result, lead); 1759 data += 1; 1760 size -= 1; 1761 } 1762 } 1763 1764 return result; 1765 } 1766 }; 1767 1768 struct latin1_decoder 1769 { 1770 typedef uint8_t type; 1771 processlatin1_decoder1772 template <typename Traits> static inline typename Traits::value_type process(const uint8_t* data, size_t size, typename Traits::value_type result, Traits) 1773 { 1774 while (size) 1775 { 1776 result = Traits::low(result, *data); 1777 data += 1; 1778 size -= 1; 1779 } 1780 1781 return result; 1782 } 1783 }; 1784 1785 template <size_t size> struct wchar_selector; 1786 1787 template <> struct wchar_selector<2> 1788 { 1789 typedef uint16_t type; 1790 typedef utf16_counter counter; 1791 typedef utf16_writer writer; 1792 typedef utf16_decoder<opt_false> decoder; 1793 }; 1794 1795 template <> struct wchar_selector<4> 1796 { 1797 typedef uint32_t type; 1798 typedef utf32_counter counter; 1799 typedef utf32_writer writer; 1800 typedef utf32_decoder<opt_false> decoder; 1801 }; 1802 1803 typedef wchar_selector<sizeof(wchar_t)>::counter wchar_counter; 1804 typedef wchar_selector<sizeof(wchar_t)>::writer wchar_writer; 1805 1806 struct wchar_decoder 1807 { 1808 typedef wchar_t type; 1809 processwchar_decoder1810 template <typename Traits> static inline typename Traits::value_type process(const wchar_t* data, size_t size, typename Traits::value_type result, Traits traits) 1811 { 1812 typedef wchar_selector<sizeof(wchar_t)>::decoder decoder; 1813 1814 return decoder::process(reinterpret_cast<const typename decoder::type*>(data), size, result, traits); 1815 } 1816 }; 1817 1818 #ifdef PUGIXML_WCHAR_MODE convert_wchar_endian_swap(wchar_t * result,const wchar_t * data,size_t length)1819 PUGI__FN void convert_wchar_endian_swap(wchar_t* result, const wchar_t* data, size_t length) 1820 { 1821 for (size_t i = 0; i < length; ++i) 1822 result[i] = static_cast<wchar_t>(endian_swap(static_cast<wchar_selector<sizeof(wchar_t)>::type>(data[i]))); 1823 } 1824 #endif 1825 PUGI__NS_END 1826 1827 PUGI__NS_BEGIN 1828 enum chartype_t 1829 { 1830 ct_parse_pcdata = 1, // \0, &, \r, < 1831 ct_parse_attr = 2, // \0, &, \r, ', " 1832 ct_parse_attr_ws = 4, // \0, &, \r, ', ", \n, tab 1833 ct_space = 8, // \r, \n, space, tab 1834 ct_parse_cdata = 16, // \0, ], >, \r 1835 ct_parse_comment = 32, // \0, -, >, \r 1836 ct_symbol = 64, // Any symbol > 127, a-z, A-Z, 0-9, _, :, -, . 1837 ct_start_symbol = 128 // Any symbol > 127, a-z, A-Z, _, : 1838 }; 1839 1840 static const unsigned char chartype_table[256] = 1841 { 1842 55, 0, 0, 0, 0, 0, 0, 0, 0, 12, 12, 0, 0, 63, 0, 0, // 0-15 1843 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 16-31 1844 8, 0, 6, 0, 0, 0, 7, 6, 0, 0, 0, 0, 0, 96, 64, 0, // 32-47 1845 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 192, 0, 1, 0, 48, 0, // 48-63 1846 0, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, // 64-79 1847 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 0, 0, 16, 0, 192, // 80-95 1848 0, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, // 96-111 1849 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 0, 0, 0, 0, 0, // 112-127 1850 1851 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, // 128+ 1852 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 1853 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 1854 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 1855 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 1856 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 1857 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 1858 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192 1859 }; 1860 1861 enum chartypex_t 1862 { 1863 ctx_special_pcdata = 1, // Any symbol >= 0 and < 32 (except \t, \r, \n), &, <, > 1864 ctx_special_attr = 2, // Any symbol >= 0 and < 32, &, <, ", ' 1865 ctx_start_symbol = 4, // Any symbol > 127, a-z, A-Z, _ 1866 ctx_digit = 8, // 0-9 1867 ctx_symbol = 16 // Any symbol > 127, a-z, A-Z, 0-9, _, -, . 1868 }; 1869 1870 static const unsigned char chartypex_table[256] = 1871 { 1872 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 3, 2, 3, 3, // 0-15 1873 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 16-31 1874 0, 0, 2, 0, 0, 0, 3, 2, 0, 0, 0, 0, 0, 16, 16, 0, // 32-47 1875 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0, 3, 0, 1, 0, // 48-63 1876 1877 0, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, // 64-79 1878 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 0, 0, 0, 0, 20, // 80-95 1879 0, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, // 96-111 1880 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 0, 0, 0, 0, 0, // 112-127 1881 1882 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, // 128+ 1883 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 1884 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 1885 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 1886 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 1887 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 1888 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 1889 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20 1890 }; 1891 1892 #ifdef PUGIXML_WCHAR_MODE 1893 #define PUGI__IS_CHARTYPE_IMPL(c, ct, table) ((static_cast<unsigned int>(c) < 128 ? table[static_cast<unsigned int>(c)] : table[128]) & (ct)) 1894 #else 1895 #define PUGI__IS_CHARTYPE_IMPL(c, ct, table) (table[static_cast<unsigned char>(c)] & (ct)) 1896 #endif 1897 1898 #define PUGI__IS_CHARTYPE(c, ct) PUGI__IS_CHARTYPE_IMPL(c, ct, chartype_table) 1899 #define PUGI__IS_CHARTYPEX(c, ct) PUGI__IS_CHARTYPE_IMPL(c, ct, chartypex_table) 1900 is_little_endian()1901 PUGI__FN bool is_little_endian() 1902 { 1903 unsigned int ui = 1; 1904 1905 return *reinterpret_cast<unsigned char*>(&ui) == 1; 1906 } 1907 get_wchar_encoding()1908 PUGI__FN xml_encoding get_wchar_encoding() 1909 { 1910 PUGI__STATIC_ASSERT(sizeof(wchar_t) == 2 || sizeof(wchar_t) == 4); 1911 1912 if (sizeof(wchar_t) == 2) 1913 return is_little_endian() ? encoding_utf16_le : encoding_utf16_be; 1914 else 1915 return is_little_endian() ? encoding_utf32_le : encoding_utf32_be; 1916 } 1917 parse_declaration_encoding(const uint8_t * data,size_t size,const uint8_t * & out_encoding,size_t & out_length)1918 PUGI__FN bool parse_declaration_encoding(const uint8_t* data, size_t size, const uint8_t*& out_encoding, size_t& out_length) 1919 { 1920 #define PUGI__SCANCHAR(ch) { if (offset >= size || data[offset] != ch) return false; offset++; } 1921 #define PUGI__SCANCHARTYPE(ct) { while (offset < size && PUGI__IS_CHARTYPE(data[offset], ct)) offset++; } 1922 1923 // check if we have a non-empty XML declaration 1924 if (size < 6 || !((data[0] == '<') & (data[1] == '?') & (data[2] == 'x') & (data[3] == 'm') & (data[4] == 'l') && PUGI__IS_CHARTYPE(data[5], ct_space))) 1925 return false; 1926 1927 // scan XML declaration until the encoding field 1928 for (size_t i = 6; i + 1 < size; ++i) 1929 { 1930 // declaration can not contain ? in quoted values 1931 if (data[i] == '?') 1932 return false; 1933 1934 if (data[i] == 'e' && data[i + 1] == 'n') 1935 { 1936 size_t offset = i; 1937 1938 // encoding follows the version field which can't contain 'en' so this has to be the encoding if XML is well formed 1939 PUGI__SCANCHAR('e'); PUGI__SCANCHAR('n'); PUGI__SCANCHAR('c'); PUGI__SCANCHAR('o'); 1940 PUGI__SCANCHAR('d'); PUGI__SCANCHAR('i'); PUGI__SCANCHAR('n'); PUGI__SCANCHAR('g'); 1941 1942 // S? = S? 1943 PUGI__SCANCHARTYPE(ct_space); 1944 PUGI__SCANCHAR('='); 1945 PUGI__SCANCHARTYPE(ct_space); 1946 1947 // the only two valid delimiters are ' and " 1948 uint8_t delimiter = (offset < size && data[offset] == '"') ? '"' : '\''; 1949 1950 PUGI__SCANCHAR(delimiter); 1951 1952 size_t start = offset; 1953 1954 out_encoding = data + offset; 1955 1956 PUGI__SCANCHARTYPE(ct_symbol); 1957 1958 out_length = offset - start; 1959 1960 PUGI__SCANCHAR(delimiter); 1961 1962 return true; 1963 } 1964 } 1965 1966 return false; 1967 1968 #undef PUGI__SCANCHAR 1969 #undef PUGI__SCANCHARTYPE 1970 } 1971 guess_buffer_encoding(const uint8_t * data,size_t size)1972 PUGI__FN xml_encoding guess_buffer_encoding(const uint8_t* data, size_t size) 1973 { 1974 // skip encoding autodetection if input buffer is too small 1975 if (size < 4) return encoding_utf8; 1976 1977 uint8_t d0 = data[0], d1 = data[1], d2 = data[2], d3 = data[3]; 1978 1979 // look for BOM in first few bytes 1980 if (d0 == 0 && d1 == 0 && d2 == 0xfe && d3 == 0xff) return encoding_utf32_be; 1981 if (d0 == 0xff && d1 == 0xfe && d2 == 0 && d3 == 0) return encoding_utf32_le; 1982 if (d0 == 0xfe && d1 == 0xff) return encoding_utf16_be; 1983 if (d0 == 0xff && d1 == 0xfe) return encoding_utf16_le; 1984 if (d0 == 0xef && d1 == 0xbb && d2 == 0xbf) return encoding_utf8; 1985 1986 // look for <, <? or <?xm in various encodings 1987 if (d0 == 0 && d1 == 0 && d2 == 0 && d3 == 0x3c) return encoding_utf32_be; 1988 if (d0 == 0x3c && d1 == 0 && d2 == 0 && d3 == 0) return encoding_utf32_le; 1989 if (d0 == 0 && d1 == 0x3c && d2 == 0 && d3 == 0x3f) return encoding_utf16_be; 1990 if (d0 == 0x3c && d1 == 0 && d2 == 0x3f && d3 == 0) return encoding_utf16_le; 1991 1992 // look for utf16 < followed by node name (this may fail, but is better than utf8 since it's zero terminated so early) 1993 if (d0 == 0 && d1 == 0x3c) return encoding_utf16_be; 1994 if (d0 == 0x3c && d1 == 0) return encoding_utf16_le; 1995 1996 // no known BOM detected; parse declaration 1997 const uint8_t* enc = 0; 1998 size_t enc_length = 0; 1999 2000 if (d0 == 0x3c && d1 == 0x3f && d2 == 0x78 && d3 == 0x6d && parse_declaration_encoding(data, size, enc, enc_length)) 2001 { 2002 // iso-8859-1 (case-insensitive) 2003 if (enc_length == 10 2004 && (enc[0] | ' ') == 'i' && (enc[1] | ' ') == 's' && (enc[2] | ' ') == 'o' 2005 && enc[3] == '-' && enc[4] == '8' && enc[5] == '8' && enc[6] == '5' && enc[7] == '9' 2006 && enc[8] == '-' && enc[9] == '1') 2007 return encoding_latin1; 2008 2009 // latin1 (case-insensitive) 2010 if (enc_length == 6 2011 && (enc[0] | ' ') == 'l' && (enc[1] | ' ') == 'a' && (enc[2] | ' ') == 't' 2012 && (enc[3] | ' ') == 'i' && (enc[4] | ' ') == 'n' 2013 && enc[5] == '1') 2014 return encoding_latin1; 2015 } 2016 2017 return encoding_utf8; 2018 } 2019 get_buffer_encoding(xml_encoding encoding,const void * contents,size_t size)2020 PUGI__FN xml_encoding get_buffer_encoding(xml_encoding encoding, const void* contents, size_t size) 2021 { 2022 // replace wchar encoding with utf implementation 2023 if (encoding == encoding_wchar) return get_wchar_encoding(); 2024 2025 // replace utf16 encoding with utf16 with specific endianness 2026 if (encoding == encoding_utf16) return is_little_endian() ? encoding_utf16_le : encoding_utf16_be; 2027 2028 // replace utf32 encoding with utf32 with specific endianness 2029 if (encoding == encoding_utf32) return is_little_endian() ? encoding_utf32_le : encoding_utf32_be; 2030 2031 // only do autodetection if no explicit encoding is requested 2032 if (encoding != encoding_auto) return encoding; 2033 2034 // try to guess encoding (based on XML specification, Appendix F.1) 2035 const uint8_t* data = static_cast<const uint8_t*>(contents); 2036 2037 return guess_buffer_encoding(data, size); 2038 } 2039 get_mutable_buffer(char_t * & out_buffer,size_t & out_length,const void * contents,size_t size,bool is_mutable)2040 PUGI__FN bool get_mutable_buffer(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable) 2041 { 2042 size_t length = size / sizeof(char_t); 2043 2044 if (is_mutable) 2045 { 2046 out_buffer = static_cast<char_t*>(const_cast<void*>(contents)); 2047 out_length = length; 2048 } 2049 else 2050 { 2051 char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t))); 2052 if (!buffer) return false; 2053 2054 if (contents) 2055 memcpy(buffer, contents, length * sizeof(char_t)); 2056 else 2057 assert(length == 0); 2058 2059 buffer[length] = 0; 2060 2061 out_buffer = buffer; 2062 out_length = length + 1; 2063 } 2064 2065 return true; 2066 } 2067 2068 #ifdef PUGIXML_WCHAR_MODE need_endian_swap_utf(xml_encoding le,xml_encoding re)2069 PUGI__FN bool need_endian_swap_utf(xml_encoding le, xml_encoding re) 2070 { 2071 return (le == encoding_utf16_be && re == encoding_utf16_le) || (le == encoding_utf16_le && re == encoding_utf16_be) || 2072 (le == encoding_utf32_be && re == encoding_utf32_le) || (le == encoding_utf32_le && re == encoding_utf32_be); 2073 } 2074 convert_buffer_endian_swap(char_t * & out_buffer,size_t & out_length,const void * contents,size_t size,bool is_mutable)2075 PUGI__FN bool convert_buffer_endian_swap(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable) 2076 { 2077 const char_t* data = static_cast<const char_t*>(contents); 2078 size_t length = size / sizeof(char_t); 2079 2080 if (is_mutable) 2081 { 2082 char_t* buffer = const_cast<char_t*>(data); 2083 2084 convert_wchar_endian_swap(buffer, data, length); 2085 2086 out_buffer = buffer; 2087 out_length = length; 2088 } 2089 else 2090 { 2091 char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t))); 2092 if (!buffer) return false; 2093 2094 convert_wchar_endian_swap(buffer, data, length); 2095 buffer[length] = 0; 2096 2097 out_buffer = buffer; 2098 out_length = length + 1; 2099 } 2100 2101 return true; 2102 } 2103 convert_buffer_generic(char_t * & out_buffer,size_t & out_length,const void * contents,size_t size,D)2104 template <typename D> PUGI__FN bool convert_buffer_generic(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, D) 2105 { 2106 const typename D::type* data = static_cast<const typename D::type*>(contents); 2107 size_t data_length = size / sizeof(typename D::type); 2108 2109 // first pass: get length in wchar_t units 2110 size_t length = D::process(data, data_length, 0, wchar_counter()); 2111 2112 // allocate buffer of suitable length 2113 char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t))); 2114 if (!buffer) return false; 2115 2116 // second pass: convert utf16 input to wchar_t 2117 wchar_writer::value_type obegin = reinterpret_cast<wchar_writer::value_type>(buffer); 2118 wchar_writer::value_type oend = D::process(data, data_length, obegin, wchar_writer()); 2119 2120 assert(oend == obegin + length); 2121 *oend = 0; 2122 2123 out_buffer = buffer; 2124 out_length = length + 1; 2125 2126 return true; 2127 } 2128 convert_buffer(char_t * & out_buffer,size_t & out_length,xml_encoding encoding,const void * contents,size_t size,bool is_mutable)2129 PUGI__FN bool convert_buffer(char_t*& out_buffer, size_t& out_length, xml_encoding encoding, const void* contents, size_t size, bool is_mutable) 2130 { 2131 // get native encoding 2132 xml_encoding wchar_encoding = get_wchar_encoding(); 2133 2134 // fast path: no conversion required 2135 if (encoding == wchar_encoding) 2136 return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable); 2137 2138 // only endian-swapping is required 2139 if (need_endian_swap_utf(encoding, wchar_encoding)) 2140 return convert_buffer_endian_swap(out_buffer, out_length, contents, size, is_mutable); 2141 2142 // source encoding is utf8 2143 if (encoding == encoding_utf8) 2144 return convert_buffer_generic(out_buffer, out_length, contents, size, utf8_decoder()); 2145 2146 // source encoding is utf16 2147 if (encoding == encoding_utf16_be || encoding == encoding_utf16_le) 2148 { 2149 xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be; 2150 2151 return (native_encoding == encoding) ? 2152 convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder<opt_false>()) : 2153 convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder<opt_true>()); 2154 } 2155 2156 // source encoding is utf32 2157 if (encoding == encoding_utf32_be || encoding == encoding_utf32_le) 2158 { 2159 xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be; 2160 2161 return (native_encoding == encoding) ? 2162 convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder<opt_false>()) : 2163 convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder<opt_true>()); 2164 } 2165 2166 // source encoding is latin1 2167 if (encoding == encoding_latin1) 2168 return convert_buffer_generic(out_buffer, out_length, contents, size, latin1_decoder()); 2169 2170 assert(false && "Invalid encoding"); // unreachable 2171 return false; 2172 } 2173 #else convert_buffer_generic(char_t * & out_buffer,size_t & out_length,const void * contents,size_t size,D)2174 template <typename D> PUGI__FN bool convert_buffer_generic(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, D) 2175 { 2176 const typename D::type* data = static_cast<const typename D::type*>(contents); 2177 size_t data_length = size / sizeof(typename D::type); 2178 2179 // first pass: get length in utf8 units 2180 size_t length = D::process(data, data_length, 0, utf8_counter()); 2181 2182 // allocate buffer of suitable length 2183 char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t))); 2184 if (!buffer) return false; 2185 2186 // second pass: convert utf16 input to utf8 2187 uint8_t* obegin = reinterpret_cast<uint8_t*>(buffer); 2188 uint8_t* oend = D::process(data, data_length, obegin, utf8_writer()); 2189 2190 assert(oend == obegin + length); 2191 *oend = 0; 2192 2193 out_buffer = buffer; 2194 out_length = length + 1; 2195 2196 return true; 2197 } 2198 get_latin1_7bit_prefix_length(const uint8_t * data,size_t size)2199 PUGI__FN size_t get_latin1_7bit_prefix_length(const uint8_t* data, size_t size) 2200 { 2201 for (size_t i = 0; i < size; ++i) 2202 if (data[i] > 127) 2203 return i; 2204 2205 return size; 2206 } 2207 convert_buffer_latin1(char_t * & out_buffer,size_t & out_length,const void * contents,size_t size,bool is_mutable)2208 PUGI__FN bool convert_buffer_latin1(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable) 2209 { 2210 const uint8_t* data = static_cast<const uint8_t*>(contents); 2211 size_t data_length = size; 2212 2213 // get size of prefix that does not need utf8 conversion 2214 size_t prefix_length = get_latin1_7bit_prefix_length(data, data_length); 2215 assert(prefix_length <= data_length); 2216 2217 const uint8_t* postfix = data + prefix_length; 2218 size_t postfix_length = data_length - prefix_length; 2219 2220 // if no conversion is needed, just return the original buffer 2221 if (postfix_length == 0) return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable); 2222 2223 // first pass: get length in utf8 units 2224 size_t length = prefix_length + latin1_decoder::process(postfix, postfix_length, 0, utf8_counter()); 2225 2226 // allocate buffer of suitable length 2227 char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t))); 2228 if (!buffer) return false; 2229 2230 // second pass: convert latin1 input to utf8 2231 memcpy(buffer, data, prefix_length); 2232 2233 uint8_t* obegin = reinterpret_cast<uint8_t*>(buffer); 2234 uint8_t* oend = latin1_decoder::process(postfix, postfix_length, obegin + prefix_length, utf8_writer()); 2235 2236 assert(oend == obegin + length); 2237 *oend = 0; 2238 2239 out_buffer = buffer; 2240 out_length = length + 1; 2241 2242 return true; 2243 } 2244 convert_buffer(char_t * & out_buffer,size_t & out_length,xml_encoding encoding,const void * contents,size_t size,bool is_mutable)2245 PUGI__FN bool convert_buffer(char_t*& out_buffer, size_t& out_length, xml_encoding encoding, const void* contents, size_t size, bool is_mutable) 2246 { 2247 // fast path: no conversion required 2248 if (encoding == encoding_utf8) 2249 return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable); 2250 2251 // source encoding is utf16 2252 if (encoding == encoding_utf16_be || encoding == encoding_utf16_le) 2253 { 2254 xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be; 2255 2256 return (native_encoding == encoding) ? 2257 convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder<opt_false>()) : 2258 convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder<opt_true>()); 2259 } 2260 2261 // source encoding is utf32 2262 if (encoding == encoding_utf32_be || encoding == encoding_utf32_le) 2263 { 2264 xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be; 2265 2266 return (native_encoding == encoding) ? 2267 convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder<opt_false>()) : 2268 convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder<opt_true>()); 2269 } 2270 2271 // source encoding is latin1 2272 if (encoding == encoding_latin1) 2273 return convert_buffer_latin1(out_buffer, out_length, contents, size, is_mutable); 2274 2275 assert(false && "Invalid encoding"); // unreachable 2276 return false; 2277 } 2278 #endif 2279 as_utf8_begin(const wchar_t * str,size_t length)2280 PUGI__FN size_t as_utf8_begin(const wchar_t* str, size_t length) 2281 { 2282 // get length in utf8 characters 2283 return wchar_decoder::process(str, length, 0, utf8_counter()); 2284 } 2285 as_utf8_end(char * buffer,size_t size,const wchar_t * str,size_t length)2286 PUGI__FN void as_utf8_end(char* buffer, size_t size, const wchar_t* str, size_t length) 2287 { 2288 // convert to utf8 2289 uint8_t* begin = reinterpret_cast<uint8_t*>(buffer); 2290 uint8_t* end = wchar_decoder::process(str, length, begin, utf8_writer()); 2291 2292 assert(begin + size == end); 2293 (void)!end; 2294 (void)!size; 2295 } 2296 2297 #ifndef PUGIXML_NO_STL as_utf8_impl(const wchar_t * str,size_t length)2298 PUGI__FN std::string as_utf8_impl(const wchar_t* str, size_t length) 2299 { 2300 // first pass: get length in utf8 characters 2301 size_t size = as_utf8_begin(str, length); 2302 2303 // allocate resulting string 2304 std::string result; 2305 result.resize(size); 2306 2307 // second pass: convert to utf8 2308 if (size > 0) as_utf8_end(&result[0], size, str, length); 2309 2310 return result; 2311 } 2312 as_wide_impl(const char * str,size_t size)2313 PUGI__FN std::basic_string<wchar_t> as_wide_impl(const char* str, size_t size) 2314 { 2315 const uint8_t* data = reinterpret_cast<const uint8_t*>(str); 2316 2317 // first pass: get length in wchar_t units 2318 size_t length = utf8_decoder::process(data, size, 0, wchar_counter()); 2319 2320 // allocate resulting string 2321 std::basic_string<wchar_t> result; 2322 result.resize(length); 2323 2324 // second pass: convert to wchar_t 2325 if (length > 0) 2326 { 2327 wchar_writer::value_type begin = reinterpret_cast<wchar_writer::value_type>(&result[0]); 2328 wchar_writer::value_type end = utf8_decoder::process(data, size, begin, wchar_writer()); 2329 2330 assert(begin + length == end); 2331 (void)!end; 2332 } 2333 2334 return result; 2335 } 2336 #endif 2337 2338 template <typename Header> strcpy_insitu_allow(size_t length,const Header & header,uintptr_t header_mask,char_t * target)2339 inline bool strcpy_insitu_allow(size_t length, const Header& header, uintptr_t header_mask, char_t* target) 2340 { 2341 // never reuse shared memory 2342 if (header & xml_memory_page_contents_shared_mask) return false; 2343 2344 size_t target_length = strlength(target); 2345 2346 // always reuse document buffer memory if possible 2347 if ((header & header_mask) == 0) return target_length >= length; 2348 2349 // reuse heap memory if waste is not too great 2350 const size_t reuse_threshold = 32; 2351 2352 return target_length >= length && (target_length < reuse_threshold || target_length - length < target_length / 2); 2353 } 2354 2355 template <typename String, typename Header> strcpy_insitu(String & dest,Header & header,uintptr_t header_mask,const char_t * source,size_t source_length)2356 PUGI__FN bool strcpy_insitu(String& dest, Header& header, uintptr_t header_mask, const char_t* source, size_t source_length) 2357 { 2358 if (source_length == 0) 2359 { 2360 // empty string and null pointer are equivalent, so just deallocate old memory 2361 xml_allocator* alloc = PUGI__GETPAGE_IMPL(header)->allocator; 2362 2363 if (header & header_mask) alloc->deallocate_string(dest); 2364 2365 // mark the string as not allocated 2366 dest = 0; 2367 header &= ~header_mask; 2368 2369 return true; 2370 } 2371 else if (dest && strcpy_insitu_allow(source_length, header, header_mask, dest)) 2372 { 2373 // we can reuse old buffer, so just copy the new data (including zero terminator) 2374 memcpy(dest, source, source_length * sizeof(char_t)); 2375 dest[source_length] = 0; 2376 2377 return true; 2378 } 2379 else 2380 { 2381 xml_allocator* alloc = PUGI__GETPAGE_IMPL(header)->allocator; 2382 2383 if (!alloc->reserve()) return false; 2384 2385 // allocate new buffer 2386 char_t* buf = alloc->allocate_string(source_length + 1); 2387 if (!buf) return false; 2388 2389 // copy the string (including zero terminator) 2390 memcpy(buf, source, source_length * sizeof(char_t)); 2391 buf[source_length] = 0; 2392 2393 // deallocate old buffer (*after* the above to protect against overlapping memory and/or allocation failures) 2394 if (header & header_mask) alloc->deallocate_string(dest); 2395 2396 // the string is now allocated, so set the flag 2397 dest = buf; 2398 header |= header_mask; 2399 2400 return true; 2401 } 2402 } 2403 2404 struct gap 2405 { 2406 char_t* end; 2407 size_t size; 2408 gapgap2409 gap(): end(0), size(0) 2410 { 2411 } 2412 2413 // Push new gap, move s count bytes further (skipping the gap). 2414 // Collapse previous gap. pushgap2415 void push(char_t*& s, size_t count) 2416 { 2417 if (end) // there was a gap already; collapse it 2418 { 2419 // Move [old_gap_end, new_gap_start) to [old_gap_start, ...) 2420 assert(s >= end); 2421 memmove(end - size, end, reinterpret_cast<char*>(s) - reinterpret_cast<char*>(end)); 2422 } 2423 2424 s += count; // end of current gap 2425 2426 // "merge" two gaps 2427 end = s; 2428 size += count; 2429 } 2430 2431 // Collapse all gaps, return past-the-end pointer flushgap2432 char_t* flush(char_t* s) 2433 { 2434 if (end) 2435 { 2436 // Move [old_gap_end, current_pos) to [old_gap_start, ...) 2437 assert(s >= end); 2438 memmove(end - size, end, reinterpret_cast<char*>(s) - reinterpret_cast<char*>(end)); 2439 2440 return s - size; 2441 } 2442 else return s; 2443 } 2444 }; 2445 strconv_escape(char_t * s,gap & g)2446 PUGI__FN char_t* strconv_escape(char_t* s, gap& g) 2447 { 2448 char_t* stre = s + 1; 2449 2450 switch (*stre) 2451 { 2452 case '#': // &#... 2453 { 2454 unsigned int ucsc = 0; 2455 2456 if (stre[1] == 'x') // &#x... (hex code) 2457 { 2458 stre += 2; 2459 2460 char_t ch = *stre; 2461 2462 if (ch == ';') return stre; 2463 2464 for (;;) 2465 { 2466 if (static_cast<unsigned int>(ch - '0') <= 9) 2467 ucsc = 16 * ucsc + (ch - '0'); 2468 else if (static_cast<unsigned int>((ch | ' ') - 'a') <= 5) 2469 ucsc = 16 * ucsc + ((ch | ' ') - 'a' + 10); 2470 else if (ch == ';') 2471 break; 2472 else // cancel 2473 return stre; 2474 2475 ch = *++stre; 2476 } 2477 2478 ++stre; 2479 } 2480 else // &#... (dec code) 2481 { 2482 char_t ch = *++stre; 2483 2484 if (ch == ';') return stre; 2485 2486 for (;;) 2487 { 2488 if (static_cast<unsigned int>(ch - '0') <= 9) 2489 ucsc = 10 * ucsc + (ch - '0'); 2490 else if (ch == ';') 2491 break; 2492 else // cancel 2493 return stre; 2494 2495 ch = *++stre; 2496 } 2497 2498 ++stre; 2499 } 2500 2501 #ifdef PUGIXML_WCHAR_MODE 2502 s = reinterpret_cast<char_t*>(wchar_writer::any(reinterpret_cast<wchar_writer::value_type>(s), ucsc)); 2503 #else 2504 s = reinterpret_cast<char_t*>(utf8_writer::any(reinterpret_cast<uint8_t*>(s), ucsc)); 2505 #endif 2506 2507 g.push(s, stre - s); 2508 return stre; 2509 } 2510 2511 case 'a': // &a 2512 { 2513 ++stre; 2514 2515 if (*stre == 'm') // &am 2516 { 2517 if (*++stre == 'p' && *++stre == ';') // & 2518 { 2519 *s++ = '&'; 2520 ++stre; 2521 2522 g.push(s, stre - s); 2523 return stre; 2524 } 2525 } 2526 else if (*stre == 'p') // &ap 2527 { 2528 if (*++stre == 'o' && *++stre == 's' && *++stre == ';') // ' 2529 { 2530 *s++ = '\''; 2531 ++stre; 2532 2533 g.push(s, stre - s); 2534 return stre; 2535 } 2536 } 2537 break; 2538 } 2539 2540 case 'g': // &g 2541 { 2542 if (*++stre == 't' && *++stre == ';') // > 2543 { 2544 *s++ = '>'; 2545 ++stre; 2546 2547 g.push(s, stre - s); 2548 return stre; 2549 } 2550 break; 2551 } 2552 2553 case 'l': // &l 2554 { 2555 if (*++stre == 't' && *++stre == ';') // < 2556 { 2557 *s++ = '<'; 2558 ++stre; 2559 2560 g.push(s, stre - s); 2561 return stre; 2562 } 2563 break; 2564 } 2565 2566 case 'q': // &q 2567 { 2568 if (*++stre == 'u' && *++stre == 'o' && *++stre == 't' && *++stre == ';') // " 2569 { 2570 *s++ = '"'; 2571 ++stre; 2572 2573 g.push(s, stre - s); 2574 return stre; 2575 } 2576 break; 2577 } 2578 2579 default: 2580 break; 2581 } 2582 2583 return stre; 2584 } 2585 2586 // Parser utilities 2587 #define PUGI__ENDSWITH(c, e) ((c) == (e) || ((c) == 0 && endch == (e))) 2588 #define PUGI__SKIPWS() { while (PUGI__IS_CHARTYPE(*s, ct_space)) ++s; } 2589 #define PUGI__OPTSET(OPT) ( optmsk & (OPT) ) 2590 #define PUGI__PUSHNODE(TYPE) { cursor = append_new_node(cursor, *alloc, TYPE); if (!cursor) PUGI__THROW_ERROR(status_out_of_memory, s); } 2591 #define PUGI__POPNODE() { cursor = cursor->parent; } 2592 #define PUGI__SCANFOR(X) { while (*s != 0 && !(X)) ++s; } 2593 #define PUGI__SCANWHILE(X) { while (X) ++s; } 2594 #define PUGI__SCANWHILE_UNROLL(X) { for (;;) { char_t ss = s[0]; if (PUGI__UNLIKELY(!(X))) { break; } ss = s[1]; if (PUGI__UNLIKELY(!(X))) { s += 1; break; } ss = s[2]; if (PUGI__UNLIKELY(!(X))) { s += 2; break; } ss = s[3]; if (PUGI__UNLIKELY(!(X))) { s += 3; break; } s += 4; } } 2595 #define PUGI__ENDSEG() { ch = *s; *s = 0; ++s; } 2596 #define PUGI__THROW_ERROR(err, m) return error_offset = m, error_status = err, static_cast<char_t*>(0) 2597 #define PUGI__CHECK_ERROR(err, m) { if (*s == 0) PUGI__THROW_ERROR(err, m); } 2598 strconv_comment(char_t * s,char_t endch)2599 PUGI__FN char_t* strconv_comment(char_t* s, char_t endch) 2600 { 2601 gap g; 2602 2603 while (true) 2604 { 2605 PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_comment)); 2606 2607 if (*s == '\r') // Either a single 0x0d or 0x0d 0x0a pair 2608 { 2609 *s++ = '\n'; // replace first one with 0x0a 2610 2611 if (*s == '\n') g.push(s, 1); 2612 } 2613 else if (s[0] == '-' && s[1] == '-' && PUGI__ENDSWITH(s[2], '>')) // comment ends here 2614 { 2615 *g.flush(s) = 0; 2616 2617 return s + (s[2] == '>' ? 3 : 2); 2618 } 2619 else if (*s == 0) 2620 { 2621 return 0; 2622 } 2623 else ++s; 2624 } 2625 } 2626 strconv_cdata(char_t * s,char_t endch)2627 PUGI__FN char_t* strconv_cdata(char_t* s, char_t endch) 2628 { 2629 gap g; 2630 2631 while (true) 2632 { 2633 PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_cdata)); 2634 2635 if (*s == '\r') // Either a single 0x0d or 0x0d 0x0a pair 2636 { 2637 *s++ = '\n'; // replace first one with 0x0a 2638 2639 if (*s == '\n') g.push(s, 1); 2640 } 2641 else if (s[0] == ']' && s[1] == ']' && PUGI__ENDSWITH(s[2], '>')) // CDATA ends here 2642 { 2643 *g.flush(s) = 0; 2644 2645 return s + 1; 2646 } 2647 else if (*s == 0) 2648 { 2649 return 0; 2650 } 2651 else ++s; 2652 } 2653 } 2654 2655 typedef char_t* (*strconv_pcdata_t)(char_t*); 2656 2657 template <typename opt_trim, typename opt_eol, typename opt_escape> struct strconv_pcdata_impl 2658 { parsestrconv_pcdata_impl2659 static char_t* parse(char_t* s) 2660 { 2661 gap g; 2662 2663 char_t* begin = s; 2664 2665 while (true) 2666 { 2667 PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_pcdata)); 2668 2669 if (*s == '<') // PCDATA ends here 2670 { 2671 char_t* end = g.flush(s); 2672 2673 if (opt_trim::value) 2674 while (end > begin && PUGI__IS_CHARTYPE(end[-1], ct_space)) 2675 --end; 2676 2677 *end = 0; 2678 2679 return s + 1; 2680 } 2681 else if (opt_eol::value && *s == '\r') // Either a single 0x0d or 0x0d 0x0a pair 2682 { 2683 *s++ = '\n'; // replace first one with 0x0a 2684 2685 if (*s == '\n') g.push(s, 1); 2686 } 2687 else if (opt_escape::value && *s == '&') 2688 { 2689 s = strconv_escape(s, g); 2690 } 2691 else if (*s == 0) 2692 { 2693 char_t* end = g.flush(s); 2694 2695 if (opt_trim::value) 2696 while (end > begin && PUGI__IS_CHARTYPE(end[-1], ct_space)) 2697 --end; 2698 2699 *end = 0; 2700 2701 return s; 2702 } 2703 else ++s; 2704 } 2705 } 2706 }; 2707 get_strconv_pcdata(unsigned int optmask)2708 PUGI__FN strconv_pcdata_t get_strconv_pcdata(unsigned int optmask) 2709 { 2710 PUGI__STATIC_ASSERT(parse_escapes == 0x10 && parse_eol == 0x20 && parse_trim_pcdata == 0x0800); 2711 2712 switch (((optmask >> 4) & 3) | ((optmask >> 9) & 4)) // get bitmask for flags (trim eol escapes); this simultaneously checks 3 options from assertion above 2713 { 2714 case 0: return strconv_pcdata_impl<opt_false, opt_false, opt_false>::parse; 2715 case 1: return strconv_pcdata_impl<opt_false, opt_false, opt_true>::parse; 2716 case 2: return strconv_pcdata_impl<opt_false, opt_true, opt_false>::parse; 2717 case 3: return strconv_pcdata_impl<opt_false, opt_true, opt_true>::parse; 2718 case 4: return strconv_pcdata_impl<opt_true, opt_false, opt_false>::parse; 2719 case 5: return strconv_pcdata_impl<opt_true, opt_false, opt_true>::parse; 2720 case 6: return strconv_pcdata_impl<opt_true, opt_true, opt_false>::parse; 2721 case 7: return strconv_pcdata_impl<opt_true, opt_true, opt_true>::parse; 2722 default: assert(false); return 0; // unreachable 2723 } 2724 } 2725 2726 typedef char_t* (*strconv_attribute_t)(char_t*, char_t); 2727 2728 template <typename opt_escape> struct strconv_attribute_impl 2729 { parse_wnormstrconv_attribute_impl2730 static char_t* parse_wnorm(char_t* s, char_t end_quote) 2731 { 2732 gap g; 2733 2734 // trim leading whitespaces 2735 if (PUGI__IS_CHARTYPE(*s, ct_space)) 2736 { 2737 char_t* str = s; 2738 2739 do ++str; 2740 while (PUGI__IS_CHARTYPE(*str, ct_space)); 2741 2742 g.push(s, str - s); 2743 } 2744 2745 while (true) 2746 { 2747 PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr_ws | ct_space)); 2748 2749 if (*s == end_quote) 2750 { 2751 char_t* str = g.flush(s); 2752 2753 do *str-- = 0; 2754 while (PUGI__IS_CHARTYPE(*str, ct_space)); 2755 2756 return s + 1; 2757 } 2758 else if (PUGI__IS_CHARTYPE(*s, ct_space)) 2759 { 2760 *s++ = ' '; 2761 2762 if (PUGI__IS_CHARTYPE(*s, ct_space)) 2763 { 2764 char_t* str = s + 1; 2765 while (PUGI__IS_CHARTYPE(*str, ct_space)) ++str; 2766 2767 g.push(s, str - s); 2768 } 2769 } 2770 else if (opt_escape::value && *s == '&') 2771 { 2772 s = strconv_escape(s, g); 2773 } 2774 else if (!*s) 2775 { 2776 return 0; 2777 } 2778 else ++s; 2779 } 2780 } 2781 parse_wconvstrconv_attribute_impl2782 static char_t* parse_wconv(char_t* s, char_t end_quote) 2783 { 2784 gap g; 2785 2786 while (true) 2787 { 2788 PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr_ws)); 2789 2790 if (*s == end_quote) 2791 { 2792 *g.flush(s) = 0; 2793 2794 return s + 1; 2795 } 2796 else if (PUGI__IS_CHARTYPE(*s, ct_space)) 2797 { 2798 if (*s == '\r') 2799 { 2800 *s++ = ' '; 2801 2802 if (*s == '\n') g.push(s, 1); 2803 } 2804 else *s++ = ' '; 2805 } 2806 else if (opt_escape::value && *s == '&') 2807 { 2808 s = strconv_escape(s, g); 2809 } 2810 else if (!*s) 2811 { 2812 return 0; 2813 } 2814 else ++s; 2815 } 2816 } 2817 parse_eolstrconv_attribute_impl2818 static char_t* parse_eol(char_t* s, char_t end_quote) 2819 { 2820 gap g; 2821 2822 while (true) 2823 { 2824 PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr)); 2825 2826 if (*s == end_quote) 2827 { 2828 *g.flush(s) = 0; 2829 2830 return s + 1; 2831 } 2832 else if (*s == '\r') 2833 { 2834 *s++ = '\n'; 2835 2836 if (*s == '\n') g.push(s, 1); 2837 } 2838 else if (opt_escape::value && *s == '&') 2839 { 2840 s = strconv_escape(s, g); 2841 } 2842 else if (!*s) 2843 { 2844 return 0; 2845 } 2846 else ++s; 2847 } 2848 } 2849 parse_simplestrconv_attribute_impl2850 static char_t* parse_simple(char_t* s, char_t end_quote) 2851 { 2852 gap g; 2853 2854 while (true) 2855 { 2856 PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr)); 2857 2858 if (*s == end_quote) 2859 { 2860 *g.flush(s) = 0; 2861 2862 return s + 1; 2863 } 2864 else if (opt_escape::value && *s == '&') 2865 { 2866 s = strconv_escape(s, g); 2867 } 2868 else if (!*s) 2869 { 2870 return 0; 2871 } 2872 else ++s; 2873 } 2874 } 2875 }; 2876 get_strconv_attribute(unsigned int optmask)2877 PUGI__FN strconv_attribute_t get_strconv_attribute(unsigned int optmask) 2878 { 2879 PUGI__STATIC_ASSERT(parse_escapes == 0x10 && parse_eol == 0x20 && parse_wconv_attribute == 0x40 && parse_wnorm_attribute == 0x80); 2880 2881 switch ((optmask >> 4) & 15) // get bitmask for flags (wnorm wconv eol escapes); this simultaneously checks 4 options from assertion above 2882 { 2883 case 0: return strconv_attribute_impl<opt_false>::parse_simple; 2884 case 1: return strconv_attribute_impl<opt_true>::parse_simple; 2885 case 2: return strconv_attribute_impl<opt_false>::parse_eol; 2886 case 3: return strconv_attribute_impl<opt_true>::parse_eol; 2887 case 4: return strconv_attribute_impl<opt_false>::parse_wconv; 2888 case 5: return strconv_attribute_impl<opt_true>::parse_wconv; 2889 case 6: return strconv_attribute_impl<opt_false>::parse_wconv; 2890 case 7: return strconv_attribute_impl<opt_true>::parse_wconv; 2891 case 8: return strconv_attribute_impl<opt_false>::parse_wnorm; 2892 case 9: return strconv_attribute_impl<opt_true>::parse_wnorm; 2893 case 10: return strconv_attribute_impl<opt_false>::parse_wnorm; 2894 case 11: return strconv_attribute_impl<opt_true>::parse_wnorm; 2895 case 12: return strconv_attribute_impl<opt_false>::parse_wnorm; 2896 case 13: return strconv_attribute_impl<opt_true>::parse_wnorm; 2897 case 14: return strconv_attribute_impl<opt_false>::parse_wnorm; 2898 case 15: return strconv_attribute_impl<opt_true>::parse_wnorm; 2899 default: assert(false); return 0; // unreachable 2900 } 2901 } 2902 make_parse_result(xml_parse_status status,ptrdiff_t offset=0)2903 inline xml_parse_result make_parse_result(xml_parse_status status, ptrdiff_t offset = 0) 2904 { 2905 xml_parse_result result; 2906 result.status = status; 2907 result.offset = offset; 2908 2909 return result; 2910 } 2911 2912 struct xml_parser 2913 { 2914 xml_allocator* alloc; 2915 char_t* error_offset; 2916 xml_parse_status error_status; 2917 xml_parserxml_parser2918 xml_parser(xml_allocator* alloc_): alloc(alloc_), error_offset(0), error_status(status_ok) 2919 { 2920 } 2921 2922 // DOCTYPE consists of nested sections of the following possible types: 2923 // <!-- ... -->, <? ... ?>, "...", '...' 2924 // <![...]]> 2925 // <!...> 2926 // First group can not contain nested groups 2927 // Second group can contain nested groups of the same type 2928 // Third group can contain all other groups parse_doctype_primitivexml_parser2929 char_t* parse_doctype_primitive(char_t* s) 2930 { 2931 if (*s == '"' || *s == '\'') 2932 { 2933 // quoted string 2934 char_t ch = *s++; 2935 PUGI__SCANFOR(*s == ch); 2936 if (!*s) PUGI__THROW_ERROR(status_bad_doctype, s); 2937 2938 s++; 2939 } 2940 else if (s[0] == '<' && s[1] == '?') 2941 { 2942 // <? ... ?> 2943 s += 2; 2944 PUGI__SCANFOR(s[0] == '?' && s[1] == '>'); // no need for ENDSWITH because ?> can't terminate proper doctype 2945 if (!*s) PUGI__THROW_ERROR(status_bad_doctype, s); 2946 2947 s += 2; 2948 } 2949 else if (s[0] == '<' && s[1] == '!' && s[2] == '-' && s[3] == '-') 2950 { 2951 s += 4; 2952 PUGI__SCANFOR(s[0] == '-' && s[1] == '-' && s[2] == '>'); // no need for ENDSWITH because --> can't terminate proper doctype 2953 if (!*s) PUGI__THROW_ERROR(status_bad_doctype, s); 2954 2955 s += 3; 2956 } 2957 else PUGI__THROW_ERROR(status_bad_doctype, s); 2958 2959 return s; 2960 } 2961 parse_doctype_ignorexml_parser2962 char_t* parse_doctype_ignore(char_t* s) 2963 { 2964 size_t depth = 0; 2965 2966 assert(s[0] == '<' && s[1] == '!' && s[2] == '['); 2967 s += 3; 2968 2969 while (*s) 2970 { 2971 if (s[0] == '<' && s[1] == '!' && s[2] == '[') 2972 { 2973 // nested ignore section 2974 s += 3; 2975 depth++; 2976 } 2977 else if (s[0] == ']' && s[1] == ']' && s[2] == '>') 2978 { 2979 // ignore section end 2980 s += 3; 2981 2982 if (depth == 0) 2983 return s; 2984 2985 depth--; 2986 } 2987 else s++; 2988 } 2989 2990 PUGI__THROW_ERROR(status_bad_doctype, s); 2991 } 2992 parse_doctype_groupxml_parser2993 char_t* parse_doctype_group(char_t* s, char_t endch) 2994 { 2995 size_t depth = 0; 2996 2997 assert((s[0] == '<' || s[0] == 0) && s[1] == '!'); 2998 s += 2; 2999 3000 while (*s) 3001 { 3002 if (s[0] == '<' && s[1] == '!' && s[2] != '-') 3003 { 3004 if (s[2] == '[') 3005 { 3006 // ignore 3007 s = parse_doctype_ignore(s); 3008 if (!s) return s; 3009 } 3010 else 3011 { 3012 // some control group 3013 s += 2; 3014 depth++; 3015 } 3016 } 3017 else if (s[0] == '<' || s[0] == '"' || s[0] == '\'') 3018 { 3019 // unknown tag (forbidden), or some primitive group 3020 s = parse_doctype_primitive(s); 3021 if (!s) return s; 3022 } 3023 else if (*s == '>') 3024 { 3025 if (depth == 0) 3026 return s; 3027 3028 depth--; 3029 s++; 3030 } 3031 else s++; 3032 } 3033 3034 if (depth != 0 || endch != '>') PUGI__THROW_ERROR(status_bad_doctype, s); 3035 3036 return s; 3037 } 3038 parse_exclamationxml_parser3039 char_t* parse_exclamation(char_t* s, xml_node_struct* cursor, unsigned int optmsk, char_t endch) 3040 { 3041 // parse node contents, starting with exclamation mark 3042 ++s; 3043 3044 if (*s == '-') // '<!-...' 3045 { 3046 ++s; 3047 3048 if (*s == '-') // '<!--...' 3049 { 3050 ++s; 3051 3052 if (PUGI__OPTSET(parse_comments)) 3053 { 3054 PUGI__PUSHNODE(node_comment); // Append a new node on the tree. 3055 cursor->value = s; // Save the offset. 3056 } 3057 3058 if (PUGI__OPTSET(parse_eol) && PUGI__OPTSET(parse_comments)) 3059 { 3060 s = strconv_comment(s, endch); 3061 3062 if (!s) PUGI__THROW_ERROR(status_bad_comment, cursor->value); 3063 } 3064 else 3065 { 3066 // Scan for terminating '-->'. 3067 PUGI__SCANFOR(s[0] == '-' && s[1] == '-' && PUGI__ENDSWITH(s[2], '>')); 3068 PUGI__CHECK_ERROR(status_bad_comment, s); 3069 3070 if (PUGI__OPTSET(parse_comments)) 3071 *s = 0; // Zero-terminate this segment at the first terminating '-'. 3072 3073 s += (s[2] == '>' ? 3 : 2); // Step over the '\0->'. 3074 } 3075 } 3076 else PUGI__THROW_ERROR(status_bad_comment, s); 3077 } 3078 else if (*s == '[') 3079 { 3080 // '<![CDATA[...' 3081 if (*++s=='C' && *++s=='D' && *++s=='A' && *++s=='T' && *++s=='A' && *++s == '[') 3082 { 3083 ++s; 3084 3085 if (PUGI__OPTSET(parse_cdata)) 3086 { 3087 PUGI__PUSHNODE(node_cdata); // Append a new node on the tree. 3088 cursor->value = s; // Save the offset. 3089 3090 if (PUGI__OPTSET(parse_eol)) 3091 { 3092 s = strconv_cdata(s, endch); 3093 3094 if (!s) PUGI__THROW_ERROR(status_bad_cdata, cursor->value); 3095 } 3096 else 3097 { 3098 // Scan for terminating ']]>'. 3099 PUGI__SCANFOR(s[0] == ']' && s[1] == ']' && PUGI__ENDSWITH(s[2], '>')); 3100 PUGI__CHECK_ERROR(status_bad_cdata, s); 3101 3102 *s++ = 0; // Zero-terminate this segment. 3103 } 3104 } 3105 else // Flagged for discard, but we still have to scan for the terminator. 3106 { 3107 // Scan for terminating ']]>'. 3108 PUGI__SCANFOR(s[0] == ']' && s[1] == ']' && PUGI__ENDSWITH(s[2], '>')); 3109 PUGI__CHECK_ERROR(status_bad_cdata, s); 3110 3111 ++s; 3112 } 3113 3114 s += (s[1] == '>' ? 2 : 1); // Step over the last ']>'. 3115 } 3116 else PUGI__THROW_ERROR(status_bad_cdata, s); 3117 } 3118 else if (s[0] == 'D' && s[1] == 'O' && s[2] == 'C' && s[3] == 'T' && s[4] == 'Y' && s[5] == 'P' && PUGI__ENDSWITH(s[6], 'E')) 3119 { 3120 s -= 2; 3121 3122 if (cursor->parent) PUGI__THROW_ERROR(status_bad_doctype, s); 3123 3124 char_t* mark = s + 9; 3125 3126 s = parse_doctype_group(s, endch); 3127 if (!s) return s; 3128 3129 assert((*s == 0 && endch == '>') || *s == '>'); 3130 if (*s) *s++ = 0; 3131 3132 if (PUGI__OPTSET(parse_doctype)) 3133 { 3134 while (PUGI__IS_CHARTYPE(*mark, ct_space)) ++mark; 3135 3136 PUGI__PUSHNODE(node_doctype); 3137 3138 cursor->value = mark; 3139 } 3140 } 3141 else if (*s == 0 && endch == '-') PUGI__THROW_ERROR(status_bad_comment, s); 3142 else if (*s == 0 && endch == '[') PUGI__THROW_ERROR(status_bad_cdata, s); 3143 else PUGI__THROW_ERROR(status_unrecognized_tag, s); 3144 3145 return s; 3146 } 3147 parse_questionxml_parser3148 char_t* parse_question(char_t* s, xml_node_struct*& ref_cursor, unsigned int optmsk, char_t endch) 3149 { 3150 // load into registers 3151 xml_node_struct* cursor = ref_cursor; 3152 char_t ch = 0; 3153 3154 // parse node contents, starting with question mark 3155 ++s; 3156 3157 // read PI target 3158 char_t* target = s; 3159 3160 if (!PUGI__IS_CHARTYPE(*s, ct_start_symbol)) PUGI__THROW_ERROR(status_bad_pi, s); 3161 3162 PUGI__SCANWHILE(PUGI__IS_CHARTYPE(*s, ct_symbol)); 3163 PUGI__CHECK_ERROR(status_bad_pi, s); 3164 3165 // determine node type; stricmp / strcasecmp is not portable 3166 bool declaration = (target[0] | ' ') == 'x' && (target[1] | ' ') == 'm' && (target[2] | ' ') == 'l' && target + 3 == s; 3167 3168 if (declaration ? PUGI__OPTSET(parse_declaration) : PUGI__OPTSET(parse_pi)) 3169 { 3170 if (declaration) 3171 { 3172 // disallow non top-level declarations 3173 if (cursor->parent) PUGI__THROW_ERROR(status_bad_pi, s); 3174 3175 PUGI__PUSHNODE(node_declaration); 3176 } 3177 else 3178 { 3179 PUGI__PUSHNODE(node_pi); 3180 } 3181 3182 cursor->name = target; 3183 3184 PUGI__ENDSEG(); 3185 3186 // parse value/attributes 3187 if (ch == '?') 3188 { 3189 // empty node 3190 if (!PUGI__ENDSWITH(*s, '>')) PUGI__THROW_ERROR(status_bad_pi, s); 3191 s += (*s == '>'); 3192 3193 PUGI__POPNODE(); 3194 } 3195 else if (PUGI__IS_CHARTYPE(ch, ct_space)) 3196 { 3197 PUGI__SKIPWS(); 3198 3199 // scan for tag end 3200 char_t* value = s; 3201 3202 PUGI__SCANFOR(s[0] == '?' && PUGI__ENDSWITH(s[1], '>')); 3203 PUGI__CHECK_ERROR(status_bad_pi, s); 3204 3205 if (declaration) 3206 { 3207 // replace ending ? with / so that 'element' terminates properly 3208 *s = '/'; 3209 3210 // we exit from this function with cursor at node_declaration, which is a signal to parse() to go to LOC_ATTRIBUTES 3211 s = value; 3212 } 3213 else 3214 { 3215 // store value and step over > 3216 cursor->value = value; 3217 3218 PUGI__POPNODE(); 3219 3220 PUGI__ENDSEG(); 3221 3222 s += (*s == '>'); 3223 } 3224 } 3225 else PUGI__THROW_ERROR(status_bad_pi, s); 3226 } 3227 else 3228 { 3229 // scan for tag end 3230 PUGI__SCANFOR(s[0] == '?' && PUGI__ENDSWITH(s[1], '>')); 3231 PUGI__CHECK_ERROR(status_bad_pi, s); 3232 3233 s += (s[1] == '>' ? 2 : 1); 3234 } 3235 3236 // store from registers 3237 ref_cursor = cursor; 3238 3239 return s; 3240 } 3241 parse_treexml_parser3242 char_t* parse_tree(char_t* s, xml_node_struct* root, unsigned int optmsk, char_t endch) 3243 { 3244 strconv_attribute_t strconv_attribute = get_strconv_attribute(optmsk); 3245 strconv_pcdata_t strconv_pcdata = get_strconv_pcdata(optmsk); 3246 3247 char_t ch = 0; 3248 xml_node_struct* cursor = root; 3249 char_t* mark = s; 3250 3251 while (*s != 0) 3252 { 3253 if (*s == '<') 3254 { 3255 ++s; 3256 3257 LOC_TAG: 3258 if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) // '<#...' 3259 { 3260 PUGI__PUSHNODE(node_element); // Append a new node to the tree. 3261 3262 cursor->name = s; 3263 3264 PUGI__SCANWHILE_UNROLL(PUGI__IS_CHARTYPE(ss, ct_symbol)); // Scan for a terminator. 3265 PUGI__ENDSEG(); // Save char in 'ch', terminate & step over. 3266 3267 if (ch == '>') 3268 { 3269 // end of tag 3270 } 3271 else if (PUGI__IS_CHARTYPE(ch, ct_space)) 3272 { 3273 LOC_ATTRIBUTES: 3274 while (true) 3275 { 3276 PUGI__SKIPWS(); // Eat any whitespace. 3277 3278 if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) // <... #... 3279 { 3280 xml_attribute_struct* a = append_new_attribute(cursor, *alloc); // Make space for this attribute. 3281 if (!a) PUGI__THROW_ERROR(status_out_of_memory, s); 3282 3283 a->name = s; // Save the offset. 3284 3285 PUGI__SCANWHILE_UNROLL(PUGI__IS_CHARTYPE(ss, ct_symbol)); // Scan for a terminator. 3286 PUGI__ENDSEG(); // Save char in 'ch', terminate & step over. 3287 3288 if (PUGI__IS_CHARTYPE(ch, ct_space)) 3289 { 3290 PUGI__SKIPWS(); // Eat any whitespace. 3291 3292 ch = *s; 3293 ++s; 3294 } 3295 3296 if (ch == '=') // '<... #=...' 3297 { 3298 PUGI__SKIPWS(); // Eat any whitespace. 3299 3300 if (*s == '"' || *s == '\'') // '<... #="...' 3301 { 3302 ch = *s; // Save quote char to avoid breaking on "''" -or- '""'. 3303 ++s; // Step over the quote. 3304 a->value = s; // Save the offset. 3305 3306 s = strconv_attribute(s, ch); 3307 3308 if (!s) PUGI__THROW_ERROR(status_bad_attribute, a->value); 3309 3310 // After this line the loop continues from the start; 3311 // Whitespaces, / and > are ok, symbols and EOF are wrong, 3312 // everything else will be detected 3313 if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) PUGI__THROW_ERROR(status_bad_attribute, s); 3314 } 3315 else PUGI__THROW_ERROR(status_bad_attribute, s); 3316 } 3317 else PUGI__THROW_ERROR(status_bad_attribute, s); 3318 } 3319 else if (*s == '/') 3320 { 3321 ++s; 3322 3323 if (*s == '>') 3324 { 3325 PUGI__POPNODE(); 3326 s++; 3327 break; 3328 } 3329 else if (*s == 0 && endch == '>') 3330 { 3331 PUGI__POPNODE(); 3332 break; 3333 } 3334 else PUGI__THROW_ERROR(status_bad_start_element, s); 3335 } 3336 else if (*s == '>') 3337 { 3338 ++s; 3339 3340 break; 3341 } 3342 else if (*s == 0 && endch == '>') 3343 { 3344 break; 3345 } 3346 else PUGI__THROW_ERROR(status_bad_start_element, s); 3347 } 3348 3349 // !!! 3350 } 3351 else if (ch == '/') // '<#.../' 3352 { 3353 if (!PUGI__ENDSWITH(*s, '>')) PUGI__THROW_ERROR(status_bad_start_element, s); 3354 3355 PUGI__POPNODE(); // Pop. 3356 3357 s += (*s == '>'); 3358 } 3359 else if (ch == 0) 3360 { 3361 // we stepped over null terminator, backtrack & handle closing tag 3362 --s; 3363 3364 if (endch != '>') PUGI__THROW_ERROR(status_bad_start_element, s); 3365 } 3366 else PUGI__THROW_ERROR(status_bad_start_element, s); 3367 } 3368 else if (*s == '/') 3369 { 3370 ++s; 3371 3372 mark = s; 3373 3374 char_t* name = cursor->name; 3375 if (!name) PUGI__THROW_ERROR(status_end_element_mismatch, mark); 3376 3377 while (PUGI__IS_CHARTYPE(*s, ct_symbol)) 3378 { 3379 if (*s++ != *name++) PUGI__THROW_ERROR(status_end_element_mismatch, mark); 3380 } 3381 3382 if (*name) 3383 { 3384 if (*s == 0 && name[0] == endch && name[1] == 0) PUGI__THROW_ERROR(status_bad_end_element, s); 3385 else PUGI__THROW_ERROR(status_end_element_mismatch, mark); 3386 } 3387 3388 PUGI__POPNODE(); // Pop. 3389 3390 PUGI__SKIPWS(); 3391 3392 if (*s == 0) 3393 { 3394 if (endch != '>') PUGI__THROW_ERROR(status_bad_end_element, s); 3395 } 3396 else 3397 { 3398 if (*s != '>') PUGI__THROW_ERROR(status_bad_end_element, s); 3399 ++s; 3400 } 3401 } 3402 else if (*s == '?') // '<?...' 3403 { 3404 s = parse_question(s, cursor, optmsk, endch); 3405 if (!s) return s; 3406 3407 assert(cursor); 3408 if (PUGI__NODETYPE(cursor) == node_declaration) goto LOC_ATTRIBUTES; 3409 } 3410 else if (*s == '!') // '<!...' 3411 { 3412 s = parse_exclamation(s, cursor, optmsk, endch); 3413 if (!s) return s; 3414 } 3415 else if (*s == 0 && endch == '?') PUGI__THROW_ERROR(status_bad_pi, s); 3416 else PUGI__THROW_ERROR(status_unrecognized_tag, s); 3417 } 3418 else 3419 { 3420 mark = s; // Save this offset while searching for a terminator. 3421 3422 PUGI__SKIPWS(); // Eat whitespace if no genuine PCDATA here. 3423 3424 if (*s == '<' || !*s) 3425 { 3426 // We skipped some whitespace characters because otherwise we would take the tag branch instead of PCDATA one 3427 assert(mark != s); 3428 3429 if (!PUGI__OPTSET(parse_ws_pcdata | parse_ws_pcdata_single) || PUGI__OPTSET(parse_trim_pcdata)) 3430 { 3431 continue; 3432 } 3433 else if (PUGI__OPTSET(parse_ws_pcdata_single)) 3434 { 3435 if (s[0] != '<' || s[1] != '/' || cursor->first_child) continue; 3436 } 3437 } 3438 3439 if (!PUGI__OPTSET(parse_trim_pcdata)) 3440 s = mark; 3441 3442 if (cursor->parent || PUGI__OPTSET(parse_fragment)) 3443 { 3444 if (PUGI__OPTSET(parse_embed_pcdata) && cursor->parent && !cursor->first_child && !cursor->value) 3445 { 3446 cursor->value = s; // Save the offset. 3447 } 3448 else 3449 { 3450 PUGI__PUSHNODE(node_pcdata); // Append a new node on the tree. 3451 3452 cursor->value = s; // Save the offset. 3453 3454 PUGI__POPNODE(); // Pop since this is a standalone. 3455 } 3456 3457 s = strconv_pcdata(s); 3458 3459 if (!*s) break; 3460 } 3461 else 3462 { 3463 PUGI__SCANFOR(*s == '<'); // '...<' 3464 if (!*s) break; 3465 3466 ++s; 3467 } 3468 3469 // We're after '<' 3470 goto LOC_TAG; 3471 } 3472 } 3473 3474 // check that last tag is closed 3475 if (cursor != root) PUGI__THROW_ERROR(status_end_element_mismatch, s); 3476 3477 return s; 3478 } 3479 3480 #ifdef PUGIXML_WCHAR_MODE parse_skip_bomxml_parser3481 static char_t* parse_skip_bom(char_t* s) 3482 { 3483 unsigned int bom = 0xfeff; 3484 return (s[0] == static_cast<wchar_t>(bom)) ? s + 1 : s; 3485 } 3486 #else parse_skip_bomxml_parser3487 static char_t* parse_skip_bom(char_t* s) 3488 { 3489 return (s[0] == '\xef' && s[1] == '\xbb' && s[2] == '\xbf') ? s + 3 : s; 3490 } 3491 #endif 3492 has_element_node_siblingsxml_parser3493 static bool has_element_node_siblings(xml_node_struct* node) 3494 { 3495 while (node) 3496 { 3497 if (PUGI__NODETYPE(node) == node_element) return true; 3498 3499 node = node->next_sibling; 3500 } 3501 3502 return false; 3503 } 3504 parsexml_parser3505 static xml_parse_result parse(char_t* buffer, size_t length, xml_document_struct* xmldoc, xml_node_struct* root, unsigned int optmsk) 3506 { 3507 // early-out for empty documents 3508 if (length == 0) 3509 return make_parse_result(PUGI__OPTSET(parse_fragment) ? status_ok : status_no_document_element); 3510 3511 // get last child of the root before parsing 3512 xml_node_struct* last_root_child = root->first_child ? root->first_child->prev_sibling_c + 0 : 0; 3513 3514 // create parser on stack 3515 xml_parser parser(static_cast<xml_allocator*>(xmldoc)); 3516 3517 // save last character and make buffer zero-terminated (speeds up parsing) 3518 char_t endch = buffer[length - 1]; 3519 buffer[length - 1] = 0; 3520 3521 // skip BOM to make sure it does not end up as part of parse output 3522 char_t* buffer_data = parse_skip_bom(buffer); 3523 3524 // perform actual parsing 3525 parser.parse_tree(buffer_data, root, optmsk, endch); 3526 3527 xml_parse_result result = make_parse_result(parser.error_status, parser.error_offset ? parser.error_offset - buffer : 0); 3528 assert(result.offset >= 0 && static_cast<size_t>(result.offset) <= length); 3529 3530 if (result) 3531 { 3532 // since we removed last character, we have to handle the only possible false positive (stray <) 3533 if (endch == '<') 3534 return make_parse_result(status_unrecognized_tag, length - 1); 3535 3536 // check if there are any element nodes parsed 3537 xml_node_struct* first_root_child_parsed = last_root_child ? last_root_child->next_sibling + 0 : root->first_child+ 0; 3538 3539 if (!PUGI__OPTSET(parse_fragment) && !has_element_node_siblings(first_root_child_parsed)) 3540 return make_parse_result(status_no_document_element, length - 1); 3541 } 3542 else 3543 { 3544 // roll back offset if it occurs on a null terminator in the source buffer 3545 if (result.offset > 0 && static_cast<size_t>(result.offset) == length - 1 && endch == 0) 3546 result.offset--; 3547 } 3548 3549 return result; 3550 } 3551 }; 3552 3553 // Output facilities get_write_native_encoding()3554 PUGI__FN xml_encoding get_write_native_encoding() 3555 { 3556 #ifdef PUGIXML_WCHAR_MODE 3557 return get_wchar_encoding(); 3558 #else 3559 return encoding_utf8; 3560 #endif 3561 } 3562 get_write_encoding(xml_encoding encoding)3563 PUGI__FN xml_encoding get_write_encoding(xml_encoding encoding) 3564 { 3565 // replace wchar encoding with utf implementation 3566 if (encoding == encoding_wchar) return get_wchar_encoding(); 3567 3568 // replace utf16 encoding with utf16 with specific endianness 3569 if (encoding == encoding_utf16) return is_little_endian() ? encoding_utf16_le : encoding_utf16_be; 3570 3571 // replace utf32 encoding with utf32 with specific endianness 3572 if (encoding == encoding_utf32) return is_little_endian() ? encoding_utf32_le : encoding_utf32_be; 3573 3574 // only do autodetection if no explicit encoding is requested 3575 if (encoding != encoding_auto) return encoding; 3576 3577 // assume utf8 encoding 3578 return encoding_utf8; 3579 } 3580 convert_buffer_output_generic(typename T::value_type dest,const char_t * data,size_t length,D,T)3581 template <typename D, typename T> PUGI__FN size_t convert_buffer_output_generic(typename T::value_type dest, const char_t* data, size_t length, D, T) 3582 { 3583 PUGI__STATIC_ASSERT(sizeof(char_t) == sizeof(typename D::type)); 3584 3585 typename T::value_type end = D::process(reinterpret_cast<const typename D::type*>(data), length, dest, T()); 3586 3587 return static_cast<size_t>(end - dest) * sizeof(*dest); 3588 } 3589 convert_buffer_output_generic(typename T::value_type dest,const char_t * data,size_t length,D,T,bool opt_swap)3590 template <typename D, typename T> PUGI__FN size_t convert_buffer_output_generic(typename T::value_type dest, const char_t* data, size_t length, D, T, bool opt_swap) 3591 { 3592 PUGI__STATIC_ASSERT(sizeof(char_t) == sizeof(typename D::type)); 3593 3594 typename T::value_type end = D::process(reinterpret_cast<const typename D::type*>(data), length, dest, T()); 3595 3596 if (opt_swap) 3597 { 3598 for (typename T::value_type i = dest; i != end; ++i) 3599 *i = endian_swap(*i); 3600 } 3601 3602 return static_cast<size_t>(end - dest) * sizeof(*dest); 3603 } 3604 3605 #ifdef PUGIXML_WCHAR_MODE get_valid_length(const char_t * data,size_t length)3606 PUGI__FN size_t get_valid_length(const char_t* data, size_t length) 3607 { 3608 if (length < 1) return 0; 3609 3610 // discard last character if it's the lead of a surrogate pair 3611 return (sizeof(wchar_t) == 2 && static_cast<unsigned int>(static_cast<uint16_t>(data[length - 1]) - 0xD800) < 0x400) ? length - 1 : length; 3612 } 3613 convert_buffer_output(char_t * r_char,uint8_t * r_u8,uint16_t * r_u16,uint32_t * r_u32,const char_t * data,size_t length,xml_encoding encoding)3614 PUGI__FN size_t convert_buffer_output(char_t* r_char, uint8_t* r_u8, uint16_t* r_u16, uint32_t* r_u32, const char_t* data, size_t length, xml_encoding encoding) 3615 { 3616 // only endian-swapping is required 3617 if (need_endian_swap_utf(encoding, get_wchar_encoding())) 3618 { 3619 convert_wchar_endian_swap(r_char, data, length); 3620 3621 return length * sizeof(char_t); 3622 } 3623 3624 // convert to utf8 3625 if (encoding == encoding_utf8) 3626 return convert_buffer_output_generic(r_u8, data, length, wchar_decoder(), utf8_writer()); 3627 3628 // convert to utf16 3629 if (encoding == encoding_utf16_be || encoding == encoding_utf16_le) 3630 { 3631 xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be; 3632 3633 return convert_buffer_output_generic(r_u16, data, length, wchar_decoder(), utf16_writer(), native_encoding != encoding); 3634 } 3635 3636 // convert to utf32 3637 if (encoding == encoding_utf32_be || encoding == encoding_utf32_le) 3638 { 3639 xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be; 3640 3641 return convert_buffer_output_generic(r_u32, data, length, wchar_decoder(), utf32_writer(), native_encoding != encoding); 3642 } 3643 3644 // convert to latin1 3645 if (encoding == encoding_latin1) 3646 return convert_buffer_output_generic(r_u8, data, length, wchar_decoder(), latin1_writer()); 3647 3648 assert(false && "Invalid encoding"); // unreachable 3649 return 0; 3650 } 3651 #else get_valid_length(const char_t * data,size_t length)3652 PUGI__FN size_t get_valid_length(const char_t* data, size_t length) 3653 { 3654 if (length < 5) return 0; 3655 3656 for (size_t i = 1; i <= 4; ++i) 3657 { 3658 uint8_t ch = static_cast<uint8_t>(data[length - i]); 3659 3660 // either a standalone character or a leading one 3661 if ((ch & 0xc0) != 0x80) return length - i; 3662 } 3663 3664 // there are four non-leading characters at the end, sequence tail is broken so might as well process the whole chunk 3665 return length; 3666 } 3667 convert_buffer_output(char_t *,uint8_t * r_u8,uint16_t * r_u16,uint32_t * r_u32,const char_t * data,size_t length,xml_encoding encoding)3668 PUGI__FN size_t convert_buffer_output(char_t* /* r_char */, uint8_t* r_u8, uint16_t* r_u16, uint32_t* r_u32, const char_t* data, size_t length, xml_encoding encoding) 3669 { 3670 if (encoding == encoding_utf16_be || encoding == encoding_utf16_le) 3671 { 3672 xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be; 3673 3674 return convert_buffer_output_generic(r_u16, data, length, utf8_decoder(), utf16_writer(), native_encoding != encoding); 3675 } 3676 3677 if (encoding == encoding_utf32_be || encoding == encoding_utf32_le) 3678 { 3679 xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be; 3680 3681 return convert_buffer_output_generic(r_u32, data, length, utf8_decoder(), utf32_writer(), native_encoding != encoding); 3682 } 3683 3684 if (encoding == encoding_latin1) 3685 return convert_buffer_output_generic(r_u8, data, length, utf8_decoder(), latin1_writer()); 3686 3687 assert(false && "Invalid encoding"); // unreachable 3688 return 0; 3689 } 3690 #endif 3691 3692 class xml_buffered_writer 3693 { 3694 xml_buffered_writer(const xml_buffered_writer&); 3695 xml_buffered_writer& operator=(const xml_buffered_writer&); 3696 3697 public: xml_buffered_writer(xml_writer & writer_,xml_encoding user_encoding)3698 xml_buffered_writer(xml_writer& writer_, xml_encoding user_encoding): writer(writer_), bufsize(0), encoding(get_write_encoding(user_encoding)) 3699 { 3700 PUGI__STATIC_ASSERT(bufcapacity >= 8); 3701 } 3702 flush()3703 size_t flush() 3704 { 3705 flush(buffer, bufsize); 3706 bufsize = 0; 3707 return 0; 3708 } 3709 flush(const char_t * data,size_t size)3710 void flush(const char_t* data, size_t size) 3711 { 3712 if (size == 0) return; 3713 3714 // fast path, just write data 3715 if (encoding == get_write_native_encoding()) 3716 writer.write(data, size * sizeof(char_t)); 3717 else 3718 { 3719 // convert chunk 3720 size_t result = convert_buffer_output(scratch.data_char, scratch.data_u8, scratch.data_u16, scratch.data_u32, data, size, encoding); 3721 assert(result <= sizeof(scratch)); 3722 3723 // write data 3724 writer.write(scratch.data_u8, result); 3725 } 3726 } 3727 write_direct(const char_t * data,size_t length)3728 void write_direct(const char_t* data, size_t length) 3729 { 3730 // flush the remaining buffer contents 3731 flush(); 3732 3733 // handle large chunks 3734 if (length > bufcapacity) 3735 { 3736 if (encoding == get_write_native_encoding()) 3737 { 3738 // fast path, can just write data chunk 3739 writer.write(data, length * sizeof(char_t)); 3740 return; 3741 } 3742 3743 // need to convert in suitable chunks 3744 while (length > bufcapacity) 3745 { 3746 // get chunk size by selecting such number of characters that are guaranteed to fit into scratch buffer 3747 // and form a complete codepoint sequence (i.e. discard start of last codepoint if necessary) 3748 size_t chunk_size = get_valid_length(data, bufcapacity); 3749 assert(chunk_size); 3750 3751 // convert chunk and write 3752 flush(data, chunk_size); 3753 3754 // iterate 3755 data += chunk_size; 3756 length -= chunk_size; 3757 } 3758 3759 // small tail is copied below 3760 bufsize = 0; 3761 } 3762 3763 memcpy(buffer + bufsize, data, length * sizeof(char_t)); 3764 bufsize += length; 3765 } 3766 write_buffer(const char_t * data,size_t length)3767 void write_buffer(const char_t* data, size_t length) 3768 { 3769 size_t offset = bufsize; 3770 3771 if (offset + length <= bufcapacity) 3772 { 3773 memcpy(buffer + offset, data, length * sizeof(char_t)); 3774 bufsize = offset + length; 3775 } 3776 else 3777 { 3778 write_direct(data, length); 3779 } 3780 } 3781 write_string(const char_t * data)3782 void write_string(const char_t* data) 3783 { 3784 // write the part of the string that fits in the buffer 3785 size_t offset = bufsize; 3786 3787 while (*data && offset < bufcapacity) 3788 buffer[offset++] = *data++; 3789 3790 // write the rest 3791 if (offset < bufcapacity) 3792 { 3793 bufsize = offset; 3794 } 3795 else 3796 { 3797 // backtrack a bit if we have split the codepoint 3798 size_t length = offset - bufsize; 3799 size_t extra = length - get_valid_length(data - length, length); 3800 3801 bufsize = offset - extra; 3802 3803 write_direct(data - extra, strlength(data) + extra); 3804 } 3805 } 3806 write(char_t d0)3807 void write(char_t d0) 3808 { 3809 size_t offset = bufsize; 3810 if (offset > bufcapacity - 1) offset = flush(); 3811 3812 buffer[offset + 0] = d0; 3813 bufsize = offset + 1; 3814 } 3815 write(char_t d0,char_t d1)3816 void write(char_t d0, char_t d1) 3817 { 3818 size_t offset = bufsize; 3819 if (offset > bufcapacity - 2) offset = flush(); 3820 3821 buffer[offset + 0] = d0; 3822 buffer[offset + 1] = d1; 3823 bufsize = offset + 2; 3824 } 3825 write(char_t d0,char_t d1,char_t d2)3826 void write(char_t d0, char_t d1, char_t d2) 3827 { 3828 size_t offset = bufsize; 3829 if (offset > bufcapacity - 3) offset = flush(); 3830 3831 buffer[offset + 0] = d0; 3832 buffer[offset + 1] = d1; 3833 buffer[offset + 2] = d2; 3834 bufsize = offset + 3; 3835 } 3836 write(char_t d0,char_t d1,char_t d2,char_t d3)3837 void write(char_t d0, char_t d1, char_t d2, char_t d3) 3838 { 3839 size_t offset = bufsize; 3840 if (offset > bufcapacity - 4) offset = flush(); 3841 3842 buffer[offset + 0] = d0; 3843 buffer[offset + 1] = d1; 3844 buffer[offset + 2] = d2; 3845 buffer[offset + 3] = d3; 3846 bufsize = offset + 4; 3847 } 3848 write(char_t d0,char_t d1,char_t d2,char_t d3,char_t d4)3849 void write(char_t d0, char_t d1, char_t d2, char_t d3, char_t d4) 3850 { 3851 size_t offset = bufsize; 3852 if (offset > bufcapacity - 5) offset = flush(); 3853 3854 buffer[offset + 0] = d0; 3855 buffer[offset + 1] = d1; 3856 buffer[offset + 2] = d2; 3857 buffer[offset + 3] = d3; 3858 buffer[offset + 4] = d4; 3859 bufsize = offset + 5; 3860 } 3861 write(char_t d0,char_t d1,char_t d2,char_t d3,char_t d4,char_t d5)3862 void write(char_t d0, char_t d1, char_t d2, char_t d3, char_t d4, char_t d5) 3863 { 3864 size_t offset = bufsize; 3865 if (offset > bufcapacity - 6) offset = flush(); 3866 3867 buffer[offset + 0] = d0; 3868 buffer[offset + 1] = d1; 3869 buffer[offset + 2] = d2; 3870 buffer[offset + 3] = d3; 3871 buffer[offset + 4] = d4; 3872 buffer[offset + 5] = d5; 3873 bufsize = offset + 6; 3874 } 3875 3876 // utf8 maximum expansion: x4 (-> utf32) 3877 // utf16 maximum expansion: x2 (-> utf32) 3878 // utf32 maximum expansion: x1 3879 enum 3880 { 3881 bufcapacitybytes = 3882 #ifdef PUGIXML_MEMORY_OUTPUT_STACK 3883 PUGIXML_MEMORY_OUTPUT_STACK 3884 #else 3885 10240 3886 #endif 3887 , 3888 bufcapacity = bufcapacitybytes / (sizeof(char_t) + 4) 3889 }; 3890 3891 char_t buffer[bufcapacity]; 3892 3893 union 3894 { 3895 uint8_t data_u8[4 * bufcapacity]; 3896 uint16_t data_u16[2 * bufcapacity]; 3897 uint32_t data_u32[bufcapacity]; 3898 char_t data_char[bufcapacity]; 3899 } scratch; 3900 3901 xml_writer& writer; 3902 size_t bufsize; 3903 xml_encoding encoding; 3904 }; 3905 text_output_escaped(xml_buffered_writer & writer,const char_t * s,chartypex_t type,unsigned int flags)3906 PUGI__FN void text_output_escaped(xml_buffered_writer& writer, const char_t* s, chartypex_t type, unsigned int flags) 3907 { 3908 while (*s) 3909 { 3910 const char_t* prev = s; 3911 3912 // While *s is a usual symbol 3913 PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPEX(ss, type)); 3914 3915 writer.write_buffer(prev, static_cast<size_t>(s - prev)); 3916 3917 switch (*s) 3918 { 3919 case 0: break; 3920 case '&': 3921 writer.write('&', 'a', 'm', 'p', ';'); 3922 ++s; 3923 break; 3924 case '<': 3925 writer.write('&', 'l', 't', ';'); 3926 ++s; 3927 break; 3928 case '>': 3929 writer.write('&', 'g', 't', ';'); 3930 ++s; 3931 break; 3932 case '"': 3933 if (flags & format_attribute_single_quote) 3934 writer.write('"'); 3935 else 3936 writer.write('&', 'q', 'u', 'o', 't', ';'); 3937 ++s; 3938 break; 3939 case '\'': 3940 if (flags & format_attribute_single_quote) 3941 writer.write('&', 'a', 'p', 'o', 's', ';'); 3942 else 3943 writer.write('\''); 3944 ++s; 3945 break; 3946 default: // s is not a usual symbol 3947 { 3948 unsigned int ch = static_cast<unsigned int>(*s++); 3949 assert(ch < 32); 3950 3951 if (!(flags & format_skip_control_chars)) 3952 writer.write('&', '#', static_cast<char_t>((ch / 10) + '0'), static_cast<char_t>((ch % 10) + '0'), ';'); 3953 } 3954 } 3955 } 3956 } 3957 text_output(xml_buffered_writer & writer,const char_t * s,chartypex_t type,unsigned int flags)3958 PUGI__FN void text_output(xml_buffered_writer& writer, const char_t* s, chartypex_t type, unsigned int flags) 3959 { 3960 if (flags & format_no_escapes) 3961 writer.write_string(s); 3962 else 3963 text_output_escaped(writer, s, type, flags); 3964 } 3965 text_output_cdata(xml_buffered_writer & writer,const char_t * s)3966 PUGI__FN void text_output_cdata(xml_buffered_writer& writer, const char_t* s) 3967 { 3968 do 3969 { 3970 writer.write('<', '!', '[', 'C', 'D'); 3971 writer.write('A', 'T', 'A', '['); 3972 3973 const char_t* prev = s; 3974 3975 // look for ]]> sequence - we can't output it as is since it terminates CDATA 3976 while (*s && !(s[0] == ']' && s[1] == ']' && s[2] == '>')) ++s; 3977 3978 // skip ]] if we stopped at ]]>, > will go to the next CDATA section 3979 if (*s) s += 2; 3980 3981 writer.write_buffer(prev, static_cast<size_t>(s - prev)); 3982 3983 writer.write(']', ']', '>'); 3984 } 3985 while (*s); 3986 } 3987 text_output_indent(xml_buffered_writer & writer,const char_t * indent,size_t indent_length,unsigned int depth)3988 PUGI__FN void text_output_indent(xml_buffered_writer& writer, const char_t* indent, size_t indent_length, unsigned int depth) 3989 { 3990 switch (indent_length) 3991 { 3992 case 1: 3993 { 3994 for (unsigned int i = 0; i < depth; ++i) 3995 writer.write(indent[0]); 3996 break; 3997 } 3998 3999 case 2: 4000 { 4001 for (unsigned int i = 0; i < depth; ++i) 4002 writer.write(indent[0], indent[1]); 4003 break; 4004 } 4005 4006 case 3: 4007 { 4008 for (unsigned int i = 0; i < depth; ++i) 4009 writer.write(indent[0], indent[1], indent[2]); 4010 break; 4011 } 4012 4013 case 4: 4014 { 4015 for (unsigned int i = 0; i < depth; ++i) 4016 writer.write(indent[0], indent[1], indent[2], indent[3]); 4017 break; 4018 } 4019 4020 default: 4021 { 4022 for (unsigned int i = 0; i < depth; ++i) 4023 writer.write_buffer(indent, indent_length); 4024 } 4025 } 4026 } 4027 node_output_comment(xml_buffered_writer & writer,const char_t * s)4028 PUGI__FN void node_output_comment(xml_buffered_writer& writer, const char_t* s) 4029 { 4030 writer.write('<', '!', '-', '-'); 4031 4032 while (*s) 4033 { 4034 const char_t* prev = s; 4035 4036 // look for -\0 or -- sequence - we can't output it since -- is illegal in comment body 4037 while (*s && !(s[0] == '-' && (s[1] == '-' || s[1] == 0))) ++s; 4038 4039 writer.write_buffer(prev, static_cast<size_t>(s - prev)); 4040 4041 if (*s) 4042 { 4043 assert(*s == '-'); 4044 4045 writer.write('-', ' '); 4046 ++s; 4047 } 4048 } 4049 4050 writer.write('-', '-', '>'); 4051 } 4052 node_output_pi_value(xml_buffered_writer & writer,const char_t * s)4053 PUGI__FN void node_output_pi_value(xml_buffered_writer& writer, const char_t* s) 4054 { 4055 while (*s) 4056 { 4057 const char_t* prev = s; 4058 4059 // look for ?> sequence - we can't output it since ?> terminates PI 4060 while (*s && !(s[0] == '?' && s[1] == '>')) ++s; 4061 4062 writer.write_buffer(prev, static_cast<size_t>(s - prev)); 4063 4064 if (*s) 4065 { 4066 assert(s[0] == '?' && s[1] == '>'); 4067 4068 writer.write('?', ' ', '>'); 4069 s += 2; 4070 } 4071 } 4072 } 4073 node_output_attributes(xml_buffered_writer & writer,xml_node_struct * node,const char_t * indent,size_t indent_length,unsigned int flags,unsigned int depth)4074 PUGI__FN void node_output_attributes(xml_buffered_writer& writer, xml_node_struct* node, const char_t* indent, size_t indent_length, unsigned int flags, unsigned int depth) 4075 { 4076 const char_t* default_name = PUGIXML_TEXT(":anonymous"); 4077 const char_t enquotation_char = (flags & format_attribute_single_quote) ? '\'' : '"'; 4078 4079 for (xml_attribute_struct* a = node->first_attribute; a; a = a->next_attribute) 4080 { 4081 if ((flags & (format_indent_attributes | format_raw)) == format_indent_attributes) 4082 { 4083 writer.write('\n'); 4084 4085 text_output_indent(writer, indent, indent_length, depth + 1); 4086 } 4087 else 4088 { 4089 writer.write(' '); 4090 } 4091 4092 writer.write_string(a->name ? a->name + 0 : default_name); 4093 writer.write('=', enquotation_char); 4094 4095 if (a->value) 4096 text_output(writer, a->value, ctx_special_attr, flags); 4097 4098 writer.write(enquotation_char); 4099 } 4100 } 4101 node_output_start(xml_buffered_writer & writer,xml_node_struct * node,const char_t * indent,size_t indent_length,unsigned int flags,unsigned int depth)4102 PUGI__FN bool node_output_start(xml_buffered_writer& writer, xml_node_struct* node, const char_t* indent, size_t indent_length, unsigned int flags, unsigned int depth) 4103 { 4104 const char_t* default_name = PUGIXML_TEXT(":anonymous"); 4105 const char_t* name = node->name ? node->name + 0 : default_name; 4106 4107 writer.write('<'); 4108 writer.write_string(name); 4109 4110 if (node->first_attribute) 4111 node_output_attributes(writer, node, indent, indent_length, flags, depth); 4112 4113 // element nodes can have value if parse_embed_pcdata was used 4114 if (!node->value) 4115 { 4116 if (!node->first_child) 4117 { 4118 if (flags & format_no_empty_element_tags) 4119 { 4120 writer.write('>', '<', '/'); 4121 writer.write_string(name); 4122 writer.write('>'); 4123 4124 return false; 4125 } 4126 else 4127 { 4128 if ((flags & format_raw) == 0) 4129 writer.write(' '); 4130 4131 writer.write('/', '>'); 4132 4133 return false; 4134 } 4135 } 4136 else 4137 { 4138 writer.write('>'); 4139 4140 return true; 4141 } 4142 } 4143 else 4144 { 4145 writer.write('>'); 4146 4147 text_output(writer, node->value, ctx_special_pcdata, flags); 4148 4149 if (!node->first_child) 4150 { 4151 writer.write('<', '/'); 4152 writer.write_string(name); 4153 writer.write('>'); 4154 4155 return false; 4156 } 4157 else 4158 { 4159 return true; 4160 } 4161 } 4162 } 4163 node_output_end(xml_buffered_writer & writer,xml_node_struct * node)4164 PUGI__FN void node_output_end(xml_buffered_writer& writer, xml_node_struct* node) 4165 { 4166 const char_t* default_name = PUGIXML_TEXT(":anonymous"); 4167 const char_t* name = node->name ? node->name + 0 : default_name; 4168 4169 writer.write('<', '/'); 4170 writer.write_string(name); 4171 writer.write('>'); 4172 } 4173 node_output_simple(xml_buffered_writer & writer,xml_node_struct * node,unsigned int flags)4174 PUGI__FN void node_output_simple(xml_buffered_writer& writer, xml_node_struct* node, unsigned int flags) 4175 { 4176 const char_t* default_name = PUGIXML_TEXT(":anonymous"); 4177 4178 switch (PUGI__NODETYPE(node)) 4179 { 4180 case node_pcdata: 4181 text_output(writer, node->value ? node->value + 0 : PUGIXML_TEXT(""), ctx_special_pcdata, flags); 4182 break; 4183 4184 case node_cdata: 4185 text_output_cdata(writer, node->value ? node->value + 0 : PUGIXML_TEXT("")); 4186 break; 4187 4188 case node_comment: 4189 node_output_comment(writer, node->value ? node->value + 0 : PUGIXML_TEXT("")); 4190 break; 4191 4192 case node_pi: 4193 writer.write('<', '?'); 4194 writer.write_string(node->name ? node->name + 0 : default_name); 4195 4196 if (node->value) 4197 { 4198 writer.write(' '); 4199 node_output_pi_value(writer, node->value); 4200 } 4201 4202 writer.write('?', '>'); 4203 break; 4204 4205 case node_declaration: 4206 writer.write('<', '?'); 4207 writer.write_string(node->name ? node->name + 0 : default_name); 4208 node_output_attributes(writer, node, PUGIXML_TEXT(""), 0, flags | format_raw, 0); 4209 writer.write('?', '>'); 4210 break; 4211 4212 case node_doctype: 4213 writer.write('<', '!', 'D', 'O', 'C'); 4214 writer.write('T', 'Y', 'P', 'E'); 4215 4216 if (node->value) 4217 { 4218 writer.write(' '); 4219 writer.write_string(node->value); 4220 } 4221 4222 writer.write('>'); 4223 break; 4224 4225 default: 4226 assert(false && "Invalid node type"); // unreachable 4227 } 4228 } 4229 4230 enum indent_flags_t 4231 { 4232 indent_newline = 1, 4233 indent_indent = 2 4234 }; 4235 node_output(xml_buffered_writer & writer,xml_node_struct * root,const char_t * indent,unsigned int flags,unsigned int depth)4236 PUGI__FN void node_output(xml_buffered_writer& writer, xml_node_struct* root, const char_t* indent, unsigned int flags, unsigned int depth) 4237 { 4238 size_t indent_length = ((flags & (format_indent | format_indent_attributes)) && (flags & format_raw) == 0) ? strlength(indent) : 0; 4239 unsigned int indent_flags = indent_indent; 4240 4241 xml_node_struct* node = root; 4242 4243 do 4244 { 4245 assert(node); 4246 4247 // begin writing current node 4248 if (PUGI__NODETYPE(node) == node_pcdata || PUGI__NODETYPE(node) == node_cdata) 4249 { 4250 node_output_simple(writer, node, flags); 4251 4252 indent_flags = 0; 4253 } 4254 else 4255 { 4256 if ((indent_flags & indent_newline) && (flags & format_raw) == 0) 4257 writer.write('\n'); 4258 4259 if ((indent_flags & indent_indent) && indent_length) 4260 text_output_indent(writer, indent, indent_length, depth); 4261 4262 if (PUGI__NODETYPE(node) == node_element) 4263 { 4264 indent_flags = indent_newline | indent_indent; 4265 4266 if (node_output_start(writer, node, indent, indent_length, flags, depth)) 4267 { 4268 // element nodes can have value if parse_embed_pcdata was used 4269 if (node->value) 4270 indent_flags = 0; 4271 4272 node = node->first_child; 4273 depth++; 4274 continue; 4275 } 4276 } 4277 else if (PUGI__NODETYPE(node) == node_document) 4278 { 4279 indent_flags = indent_indent; 4280 4281 if (node->first_child) 4282 { 4283 node = node->first_child; 4284 continue; 4285 } 4286 } 4287 else 4288 { 4289 node_output_simple(writer, node, flags); 4290 4291 indent_flags = indent_newline | indent_indent; 4292 } 4293 } 4294 4295 // continue to the next node 4296 while (node != root) 4297 { 4298 if (node->next_sibling) 4299 { 4300 node = node->next_sibling; 4301 break; 4302 } 4303 4304 node = node->parent; 4305 4306 // write closing node 4307 if (PUGI__NODETYPE(node) == node_element) 4308 { 4309 depth--; 4310 4311 if ((indent_flags & indent_newline) && (flags & format_raw) == 0) 4312 writer.write('\n'); 4313 4314 if ((indent_flags & indent_indent) && indent_length) 4315 text_output_indent(writer, indent, indent_length, depth); 4316 4317 node_output_end(writer, node); 4318 4319 indent_flags = indent_newline | indent_indent; 4320 } 4321 } 4322 } 4323 while (node != root); 4324 4325 if ((indent_flags & indent_newline) && (flags & format_raw) == 0) 4326 writer.write('\n'); 4327 } 4328 has_declaration(xml_node_struct * node)4329 PUGI__FN bool has_declaration(xml_node_struct* node) 4330 { 4331 for (xml_node_struct* child = node->first_child; child; child = child->next_sibling) 4332 { 4333 xml_node_type type = PUGI__NODETYPE(child); 4334 4335 if (type == node_declaration) return true; 4336 if (type == node_element) return false; 4337 } 4338 4339 return false; 4340 } 4341 is_attribute_of(xml_attribute_struct * attr,xml_node_struct * node)4342 PUGI__FN bool is_attribute_of(xml_attribute_struct* attr, xml_node_struct* node) 4343 { 4344 for (xml_attribute_struct* a = node->first_attribute; a; a = a->next_attribute) 4345 if (a == attr) 4346 return true; 4347 4348 return false; 4349 } 4350 allow_insert_attribute(xml_node_type parent)4351 PUGI__FN bool allow_insert_attribute(xml_node_type parent) 4352 { 4353 return parent == node_element || parent == node_declaration; 4354 } 4355 allow_insert_child(xml_node_type parent,xml_node_type child)4356 PUGI__FN bool allow_insert_child(xml_node_type parent, xml_node_type child) 4357 { 4358 if (parent != node_document && parent != node_element) return false; 4359 if (child == node_document || child == node_null) return false; 4360 if (parent != node_document && (child == node_declaration || child == node_doctype)) return false; 4361 4362 return true; 4363 } 4364 allow_move(xml_node parent,xml_node child)4365 PUGI__FN bool allow_move(xml_node parent, xml_node child) 4366 { 4367 // check that child can be a child of parent 4368 if (!allow_insert_child(parent.type(), child.type())) 4369 return false; 4370 4371 // check that node is not moved between documents 4372 if (parent.root() != child.root()) 4373 return false; 4374 4375 // check that new parent is not in the child subtree 4376 xml_node cur = parent; 4377 4378 while (cur) 4379 { 4380 if (cur == child) 4381 return false; 4382 4383 cur = cur.parent(); 4384 } 4385 4386 return true; 4387 } 4388 4389 template <typename String, typename Header> node_copy_string(String & dest,Header & header,uintptr_t header_mask,char_t * source,Header & source_header,xml_allocator * alloc)4390 PUGI__FN void node_copy_string(String& dest, Header& header, uintptr_t header_mask, char_t* source, Header& source_header, xml_allocator* alloc) 4391 { 4392 assert(!dest && (header & header_mask) == 0); 4393 4394 if (source) 4395 { 4396 if (alloc && (source_header & header_mask) == 0) 4397 { 4398 dest = source; 4399 4400 // since strcpy_insitu can reuse document buffer memory we need to mark both source and dest as shared 4401 header |= xml_memory_page_contents_shared_mask; 4402 source_header |= xml_memory_page_contents_shared_mask; 4403 } 4404 else 4405 strcpy_insitu(dest, header, header_mask, source, strlength(source)); 4406 } 4407 } 4408 node_copy_contents(xml_node_struct * dn,xml_node_struct * sn,xml_allocator * shared_alloc)4409 PUGI__FN void node_copy_contents(xml_node_struct* dn, xml_node_struct* sn, xml_allocator* shared_alloc) 4410 { 4411 node_copy_string(dn->name, dn->header, xml_memory_page_name_allocated_mask, sn->name, sn->header, shared_alloc); 4412 node_copy_string(dn->value, dn->header, xml_memory_page_value_allocated_mask, sn->value, sn->header, shared_alloc); 4413 4414 for (xml_attribute_struct* sa = sn->first_attribute; sa; sa = sa->next_attribute) 4415 { 4416 xml_attribute_struct* da = append_new_attribute(dn, get_allocator(dn)); 4417 4418 if (da) 4419 { 4420 node_copy_string(da->name, da->header, xml_memory_page_name_allocated_mask, sa->name, sa->header, shared_alloc); 4421 node_copy_string(da->value, da->header, xml_memory_page_value_allocated_mask, sa->value, sa->header, shared_alloc); 4422 } 4423 } 4424 } 4425 node_copy_tree(xml_node_struct * dn,xml_node_struct * sn)4426 PUGI__FN void node_copy_tree(xml_node_struct* dn, xml_node_struct* sn) 4427 { 4428 xml_allocator& alloc = get_allocator(dn); 4429 xml_allocator* shared_alloc = (&alloc == &get_allocator(sn)) ? &alloc : 0; 4430 4431 node_copy_contents(dn, sn, shared_alloc); 4432 4433 xml_node_struct* dit = dn; 4434 xml_node_struct* sit = sn->first_child; 4435 4436 while (sit && sit != sn) 4437 { 4438 // loop invariant: dit is inside the subtree rooted at dn 4439 assert(dit); 4440 4441 // when a tree is copied into one of the descendants, we need to skip that subtree to avoid an infinite loop 4442 if (sit != dn) 4443 { 4444 xml_node_struct* copy = append_new_node(dit, alloc, PUGI__NODETYPE(sit)); 4445 4446 if (copy) 4447 { 4448 node_copy_contents(copy, sit, shared_alloc); 4449 4450 if (sit->first_child) 4451 { 4452 dit = copy; 4453 sit = sit->first_child; 4454 continue; 4455 } 4456 } 4457 } 4458 4459 // continue to the next node 4460 do 4461 { 4462 if (sit->next_sibling) 4463 { 4464 sit = sit->next_sibling; 4465 break; 4466 } 4467 4468 sit = sit->parent; 4469 dit = dit->parent; 4470 4471 // loop invariant: dit is inside the subtree rooted at dn while sit is inside sn 4472 assert(sit == sn || dit); 4473 } 4474 while (sit != sn); 4475 } 4476 4477 assert(!sit || dit == dn->parent); 4478 } 4479 node_copy_attribute(xml_attribute_struct * da,xml_attribute_struct * sa)4480 PUGI__FN void node_copy_attribute(xml_attribute_struct* da, xml_attribute_struct* sa) 4481 { 4482 xml_allocator& alloc = get_allocator(da); 4483 xml_allocator* shared_alloc = (&alloc == &get_allocator(sa)) ? &alloc : 0; 4484 4485 node_copy_string(da->name, da->header, xml_memory_page_name_allocated_mask, sa->name, sa->header, shared_alloc); 4486 node_copy_string(da->value, da->header, xml_memory_page_value_allocated_mask, sa->value, sa->header, shared_alloc); 4487 } 4488 is_text_node(xml_node_struct * node)4489 inline bool is_text_node(xml_node_struct* node) 4490 { 4491 xml_node_type type = PUGI__NODETYPE(node); 4492 4493 return type == node_pcdata || type == node_cdata; 4494 } 4495 4496 // get value with conversion functions string_to_integer(const char_t * value,U minv,U maxv)4497 template <typename U> PUGI__FN PUGI__UNSIGNED_OVERFLOW U string_to_integer(const char_t* value, U minv, U maxv) 4498 { 4499 U result = 0; 4500 const char_t* s = value; 4501 4502 while (PUGI__IS_CHARTYPE(*s, ct_space)) 4503 s++; 4504 4505 bool negative = (*s == '-'); 4506 4507 s += (*s == '+' || *s == '-'); 4508 4509 bool overflow = false; 4510 4511 if (s[0] == '0' && (s[1] | ' ') == 'x') 4512 { 4513 s += 2; 4514 4515 // since overflow detection relies on length of the sequence skip leading zeros 4516 while (*s == '0') 4517 s++; 4518 4519 const char_t* start = s; 4520 4521 for (;;) 4522 { 4523 if (static_cast<unsigned>(*s - '0') < 10) 4524 result = result * 16 + (*s - '0'); 4525 else if (static_cast<unsigned>((*s | ' ') - 'a') < 6) 4526 result = result * 16 + ((*s | ' ') - 'a' + 10); 4527 else 4528 break; 4529 4530 s++; 4531 } 4532 4533 size_t digits = static_cast<size_t>(s - start); 4534 4535 overflow = digits > sizeof(U) * 2; 4536 } 4537 else 4538 { 4539 // since overflow detection relies on length of the sequence skip leading zeros 4540 while (*s == '0') 4541 s++; 4542 4543 const char_t* start = s; 4544 4545 for (;;) 4546 { 4547 if (static_cast<unsigned>(*s - '0') < 10) 4548 result = result * 10 + (*s - '0'); 4549 else 4550 break; 4551 4552 s++; 4553 } 4554 4555 size_t digits = static_cast<size_t>(s - start); 4556 4557 PUGI__STATIC_ASSERT(sizeof(U) == 8 || sizeof(U) == 4 || sizeof(U) == 2); 4558 4559 const size_t max_digits10 = sizeof(U) == 8 ? 20 : sizeof(U) == 4 ? 10 : 5; 4560 const char_t max_lead = sizeof(U) == 8 ? '1' : sizeof(U) == 4 ? '4' : '6'; 4561 const size_t high_bit = sizeof(U) * 8 - 1; 4562 4563 overflow = digits >= max_digits10 && !(digits == max_digits10 && (*start < max_lead || (*start == max_lead && result >> high_bit))); 4564 } 4565 4566 if (negative) 4567 { 4568 // Workaround for crayc++ CC-3059: Expected no overflow in routine. 4569 #ifdef _CRAYC 4570 return (overflow || result > ~minv + 1) ? minv : ~result + 1; 4571 #else 4572 return (overflow || result > 0 - minv) ? minv : 0 - result; 4573 #endif 4574 } 4575 else 4576 return (overflow || result > maxv) ? maxv : result; 4577 } 4578 get_value_int(const char_t * value)4579 PUGI__FN int get_value_int(const char_t* value) 4580 { 4581 return string_to_integer<unsigned int>(value, static_cast<unsigned int>(INT_MIN), INT_MAX); 4582 } 4583 get_value_uint(const char_t * value)4584 PUGI__FN unsigned int get_value_uint(const char_t* value) 4585 { 4586 return string_to_integer<unsigned int>(value, 0, UINT_MAX); 4587 } 4588 get_value_double(const char_t * value)4589 PUGI__FN double get_value_double(const char_t* value) 4590 { 4591 #ifdef PUGIXML_WCHAR_MODE 4592 return wcstod(value, 0); 4593 #else 4594 return strtod(value, 0); 4595 #endif 4596 } 4597 get_value_float(const char_t * value)4598 PUGI__FN float get_value_float(const char_t* value) 4599 { 4600 #ifdef PUGIXML_WCHAR_MODE 4601 return static_cast<float>(wcstod(value, 0)); 4602 #else 4603 return static_cast<float>(strtod(value, 0)); 4604 #endif 4605 } 4606 get_value_bool(const char_t * value)4607 PUGI__FN bool get_value_bool(const char_t* value) 4608 { 4609 // only look at first char 4610 char_t first = *value; 4611 4612 // 1*, t* (true), T* (True), y* (yes), Y* (YES) 4613 return (first == '1' || first == 't' || first == 'T' || first == 'y' || first == 'Y'); 4614 } 4615 4616 #ifdef PUGIXML_HAS_LONG_LONG get_value_llong(const char_t * value)4617 PUGI__FN long long get_value_llong(const char_t* value) 4618 { 4619 return string_to_integer<unsigned long long>(value, static_cast<unsigned long long>(LLONG_MIN), LLONG_MAX); 4620 } 4621 get_value_ullong(const char_t * value)4622 PUGI__FN unsigned long long get_value_ullong(const char_t* value) 4623 { 4624 return string_to_integer<unsigned long long>(value, 0, ULLONG_MAX); 4625 } 4626 #endif 4627 integer_to_string(char_t * begin,char_t * end,U value,bool negative)4628 template <typename U> PUGI__FN PUGI__UNSIGNED_OVERFLOW char_t* integer_to_string(char_t* begin, char_t* end, U value, bool negative) 4629 { 4630 char_t* result = end - 1; 4631 U rest = negative ? 0 - value : value; 4632 4633 do 4634 { 4635 *result-- = static_cast<char_t>('0' + (rest % 10)); 4636 rest /= 10; 4637 } 4638 while (rest); 4639 4640 assert(result >= begin); 4641 (void)begin; 4642 4643 *result = '-'; 4644 4645 return result + !negative; 4646 } 4647 4648 // set value with conversion functions 4649 template <typename String, typename Header> set_value_ascii(String & dest,Header & header,uintptr_t header_mask,char * buf)4650 PUGI__FN bool set_value_ascii(String& dest, Header& header, uintptr_t header_mask, char* buf) 4651 { 4652 #ifdef PUGIXML_WCHAR_MODE 4653 char_t wbuf[128]; 4654 assert(strlen(buf) < sizeof(wbuf) / sizeof(wbuf[0])); 4655 4656 size_t offset = 0; 4657 for (; buf[offset]; ++offset) wbuf[offset] = buf[offset]; 4658 4659 return strcpy_insitu(dest, header, header_mask, wbuf, offset); 4660 #else 4661 return strcpy_insitu(dest, header, header_mask, buf, strlen(buf)); 4662 #endif 4663 } 4664 4665 template <typename U, typename String, typename Header> set_value_integer(String & dest,Header & header,uintptr_t header_mask,U value,bool negative)4666 PUGI__FN bool set_value_integer(String& dest, Header& header, uintptr_t header_mask, U value, bool negative) 4667 { 4668 char_t buf[64]; 4669 char_t* end = buf + sizeof(buf) / sizeof(buf[0]); 4670 char_t* begin = integer_to_string(buf, end, value, negative); 4671 4672 return strcpy_insitu(dest, header, header_mask, begin, end - begin); 4673 } 4674 4675 template <typename String, typename Header> set_value_convert(String & dest,Header & header,uintptr_t header_mask,float value,int precision)4676 PUGI__FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, float value, int precision) 4677 { 4678 char buf[128]; 4679 PUGI__SNPRINTF(buf, "%.*g", precision, double(value)); 4680 4681 return set_value_ascii(dest, header, header_mask, buf); 4682 } 4683 4684 template <typename String, typename Header> set_value_convert(String & dest,Header & header,uintptr_t header_mask,double value,int precision)4685 PUGI__FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, double value, int precision) 4686 { 4687 char buf[128]; 4688 PUGI__SNPRINTF(buf, "%.*g", precision, value); 4689 4690 return set_value_ascii(dest, header, header_mask, buf); 4691 } 4692 4693 template <typename String, typename Header> set_value_bool(String & dest,Header & header,uintptr_t header_mask,bool value)4694 PUGI__FN bool set_value_bool(String& dest, Header& header, uintptr_t header_mask, bool value) 4695 { 4696 return strcpy_insitu(dest, header, header_mask, value ? PUGIXML_TEXT("true") : PUGIXML_TEXT("false"), value ? 4 : 5); 4697 } 4698 load_buffer_impl(xml_document_struct * doc,xml_node_struct * root,void * contents,size_t size,unsigned int options,xml_encoding encoding,bool is_mutable,bool own,char_t ** out_buffer)4699 PUGI__FN xml_parse_result load_buffer_impl(xml_document_struct* doc, xml_node_struct* root, void* contents, size_t size, unsigned int options, xml_encoding encoding, bool is_mutable, bool own, char_t** out_buffer) 4700 { 4701 // check input buffer 4702 if (!contents && size) return make_parse_result(status_io_error); 4703 4704 // get actual encoding 4705 xml_encoding buffer_encoding = impl::get_buffer_encoding(encoding, contents, size); 4706 4707 // get private buffer 4708 char_t* buffer = 0; 4709 size_t length = 0; 4710 4711 // coverity[var_deref_model] 4712 if (!impl::convert_buffer(buffer, length, buffer_encoding, contents, size, is_mutable)) return impl::make_parse_result(status_out_of_memory); 4713 4714 // delete original buffer if we performed a conversion 4715 if (own && buffer != contents && contents) impl::xml_memory::deallocate(contents); 4716 4717 // grab onto buffer if it's our buffer, user is responsible for deallocating contents himself 4718 if (own || buffer != contents) *out_buffer = buffer; 4719 4720 // store buffer for offset_debug 4721 doc->buffer = buffer; 4722 4723 // parse 4724 xml_parse_result res = impl::xml_parser::parse(buffer, length, doc, root, options); 4725 4726 // remember encoding 4727 res.encoding = buffer_encoding; 4728 4729 return res; 4730 } 4731 4732 // we need to get length of entire file to load it in memory; the only (relatively) sane way to do it is via seek/tell trick get_file_size(FILE * file,size_t & out_result)4733 PUGI__FN xml_parse_status get_file_size(FILE* file, size_t& out_result) 4734 { 4735 #if defined(PUGI__MSVC_CRT_VERSION) && PUGI__MSVC_CRT_VERSION >= 1400 && !defined(_WIN32_WCE) 4736 // there are 64-bit versions of fseek/ftell, let's use them 4737 typedef __int64 length_type; 4738 4739 _fseeki64(file, 0, SEEK_END); 4740 length_type length = _ftelli64(file); 4741 _fseeki64(file, 0, SEEK_SET); 4742 #elif defined(__MINGW32__) && !defined(__NO_MINGW_LFS) && (!defined(__STRICT_ANSI__) || defined(__MINGW64_VERSION_MAJOR)) 4743 // there are 64-bit versions of fseek/ftell, let's use them 4744 typedef off64_t length_type; 4745 4746 fseeko64(file, 0, SEEK_END); 4747 length_type length = ftello64(file); 4748 fseeko64(file, 0, SEEK_SET); 4749 #else 4750 // if this is a 32-bit OS, long is enough; if this is a unix system, long is 64-bit, which is enough; otherwise we can't do anything anyway. 4751 typedef long length_type; 4752 4753 fseek(file, 0, SEEK_END); 4754 length_type length = ftell(file); 4755 fseek(file, 0, SEEK_SET); 4756 #endif 4757 4758 // check for I/O errors 4759 if (length < 0) return status_io_error; 4760 4761 // check for overflow 4762 size_t result = static_cast<size_t>(length); 4763 4764 if (static_cast<length_type>(result) != length) return status_out_of_memory; 4765 4766 // finalize 4767 out_result = result; 4768 4769 return status_ok; 4770 } 4771 4772 // This function assumes that buffer has extra sizeof(char_t) writable bytes after size zero_terminate_buffer(void * buffer,size_t size,xml_encoding encoding)4773 PUGI__FN size_t zero_terminate_buffer(void* buffer, size_t size, xml_encoding encoding) 4774 { 4775 // We only need to zero-terminate if encoding conversion does not do it for us 4776 #ifdef PUGIXML_WCHAR_MODE 4777 xml_encoding wchar_encoding = get_wchar_encoding(); 4778 4779 if (encoding == wchar_encoding || need_endian_swap_utf(encoding, wchar_encoding)) 4780 { 4781 size_t length = size / sizeof(char_t); 4782 4783 static_cast<char_t*>(buffer)[length] = 0; 4784 return (length + 1) * sizeof(char_t); 4785 } 4786 #else 4787 if (encoding == encoding_utf8) 4788 { 4789 static_cast<char*>(buffer)[size] = 0; 4790 return size + 1; 4791 } 4792 #endif 4793 4794 return size; 4795 } 4796 load_file_impl(xml_document_struct * doc,FILE * file,unsigned int options,xml_encoding encoding,char_t ** out_buffer)4797 PUGI__FN xml_parse_result load_file_impl(xml_document_struct* doc, FILE* file, unsigned int options, xml_encoding encoding, char_t** out_buffer) 4798 { 4799 if (!file) return make_parse_result(status_file_not_found); 4800 4801 // get file size (can result in I/O errors) 4802 size_t size = 0; 4803 xml_parse_status size_status = get_file_size(file, size); 4804 if (size_status != status_ok) return make_parse_result(size_status); 4805 4806 size_t max_suffix_size = sizeof(char_t); 4807 4808 // allocate buffer for the whole file 4809 char* contents = static_cast<char*>(xml_memory::allocate(size + max_suffix_size)); 4810 if (!contents) return make_parse_result(status_out_of_memory); 4811 4812 // read file in memory 4813 size_t read_size = fread(contents, 1, size, file); 4814 4815 if (read_size != size) 4816 { 4817 xml_memory::deallocate(contents); 4818 return make_parse_result(status_io_error); 4819 } 4820 4821 xml_encoding real_encoding = get_buffer_encoding(encoding, contents, size); 4822 4823 return load_buffer_impl(doc, doc, contents, zero_terminate_buffer(contents, size, real_encoding), options, real_encoding, true, true, out_buffer); 4824 } 4825 close_file(FILE * file)4826 PUGI__FN void close_file(FILE* file) 4827 { 4828 fclose(file); 4829 } 4830 4831 #ifndef PUGIXML_NO_STL 4832 template <typename T> struct xml_stream_chunk 4833 { createxml_stream_chunk4834 static xml_stream_chunk* create() 4835 { 4836 void* memory = xml_memory::allocate(sizeof(xml_stream_chunk)); 4837 if (!memory) return 0; 4838 4839 return new (memory) xml_stream_chunk(); 4840 } 4841 destroyxml_stream_chunk4842 static void destroy(xml_stream_chunk* chunk) 4843 { 4844 // free chunk chain 4845 while (chunk) 4846 { 4847 xml_stream_chunk* next_ = chunk->next; 4848 4849 xml_memory::deallocate(chunk); 4850 4851 chunk = next_; 4852 } 4853 } 4854 xml_stream_chunkxml_stream_chunk4855 xml_stream_chunk(): next(0), size(0) 4856 { 4857 } 4858 4859 xml_stream_chunk* next; 4860 size_t size; 4861 4862 T data[xml_memory_page_size / sizeof(T)]; 4863 }; 4864 load_stream_data_noseek(std::basic_istream<T> & stream,void ** out_buffer,size_t * out_size)4865 template <typename T> PUGI__FN xml_parse_status load_stream_data_noseek(std::basic_istream<T>& stream, void** out_buffer, size_t* out_size) 4866 { 4867 auto_deleter<xml_stream_chunk<T> > chunks(0, xml_stream_chunk<T>::destroy); 4868 4869 // read file to a chunk list 4870 size_t total = 0; 4871 xml_stream_chunk<T>* last = 0; 4872 4873 while (!stream.eof()) 4874 { 4875 // allocate new chunk 4876 xml_stream_chunk<T>* chunk = xml_stream_chunk<T>::create(); 4877 if (!chunk) return status_out_of_memory; 4878 4879 // append chunk to list 4880 if (last) last = last->next = chunk; 4881 else chunks.data = last = chunk; 4882 4883 // read data to chunk 4884 stream.read(chunk->data, static_cast<std::streamsize>(sizeof(chunk->data) / sizeof(T))); 4885 chunk->size = static_cast<size_t>(stream.gcount()) * sizeof(T); 4886 4887 // read may set failbit | eofbit in case gcount() is less than read length, so check for other I/O errors 4888 if (stream.bad() || (!stream.eof() && stream.fail())) return status_io_error; 4889 4890 // guard against huge files (chunk size is small enough to make this overflow check work) 4891 if (total + chunk->size < total) return status_out_of_memory; 4892 total += chunk->size; 4893 } 4894 4895 size_t max_suffix_size = sizeof(char_t); 4896 4897 // copy chunk list to a contiguous buffer 4898 char* buffer = static_cast<char*>(xml_memory::allocate(total + max_suffix_size)); 4899 if (!buffer) return status_out_of_memory; 4900 4901 char* write = buffer; 4902 4903 for (xml_stream_chunk<T>* chunk = chunks.data; chunk; chunk = chunk->next) 4904 { 4905 assert(write + chunk->size <= buffer + total); 4906 memcpy(write, chunk->data, chunk->size); 4907 write += chunk->size; 4908 } 4909 4910 assert(write == buffer + total); 4911 4912 // return buffer 4913 *out_buffer = buffer; 4914 *out_size = total; 4915 4916 return status_ok; 4917 } 4918 load_stream_data_seek(std::basic_istream<T> & stream,void ** out_buffer,size_t * out_size)4919 template <typename T> PUGI__FN xml_parse_status load_stream_data_seek(std::basic_istream<T>& stream, void** out_buffer, size_t* out_size) 4920 { 4921 // get length of remaining data in stream 4922 typename std::basic_istream<T>::pos_type pos = stream.tellg(); 4923 stream.seekg(0, std::ios::end); 4924 std::streamoff length = stream.tellg() - pos; 4925 stream.seekg(pos); 4926 4927 if (stream.fail() || pos < 0) return status_io_error; 4928 4929 // guard against huge files 4930 size_t read_length = static_cast<size_t>(length); 4931 4932 if (static_cast<std::streamsize>(read_length) != length || length < 0) return status_out_of_memory; 4933 4934 size_t max_suffix_size = sizeof(char_t); 4935 4936 // read stream data into memory (guard against stream exceptions with buffer holder) 4937 auto_deleter<void> buffer(xml_memory::allocate(read_length * sizeof(T) + max_suffix_size), xml_memory::deallocate); 4938 if (!buffer.data) return status_out_of_memory; 4939 4940 stream.read(static_cast<T*>(buffer.data), static_cast<std::streamsize>(read_length)); 4941 4942 // read may set failbit | eofbit in case gcount() is less than read_length (i.e. line ending conversion), so check for other I/O errors 4943 if (stream.bad() || (!stream.eof() && stream.fail())) return status_io_error; 4944 4945 // return buffer 4946 size_t actual_length = static_cast<size_t>(stream.gcount()); 4947 assert(actual_length <= read_length); 4948 4949 *out_buffer = buffer.release(); 4950 *out_size = actual_length * sizeof(T); 4951 4952 return status_ok; 4953 } 4954 load_stream_impl(xml_document_struct * doc,std::basic_istream<T> & stream,unsigned int options,xml_encoding encoding,char_t ** out_buffer)4955 template <typename T> PUGI__FN xml_parse_result load_stream_impl(xml_document_struct* doc, std::basic_istream<T>& stream, unsigned int options, xml_encoding encoding, char_t** out_buffer) 4956 { 4957 void* buffer = 0; 4958 size_t size = 0; 4959 xml_parse_status status = status_ok; 4960 4961 // if stream has an error bit set, bail out (otherwise tellg() can fail and we'll clear error bits) 4962 if (stream.fail()) return make_parse_result(status_io_error); 4963 4964 // load stream to memory (using seek-based implementation if possible, since it's faster and takes less memory) 4965 if (stream.tellg() < 0) 4966 { 4967 stream.clear(); // clear error flags that could be set by a failing tellg 4968 status = load_stream_data_noseek(stream, &buffer, &size); 4969 } 4970 else 4971 status = load_stream_data_seek(stream, &buffer, &size); 4972 4973 if (status != status_ok) return make_parse_result(status); 4974 4975 xml_encoding real_encoding = get_buffer_encoding(encoding, buffer, size); 4976 4977 return load_buffer_impl(doc, doc, buffer, zero_terminate_buffer(buffer, size, real_encoding), options, real_encoding, true, true, out_buffer); 4978 } 4979 #endif 4980 4981 #if defined(PUGI__MSVC_CRT_VERSION) || defined(__BORLANDC__) || (defined(__MINGW32__) && (!defined(__STRICT_ANSI__) || defined(__MINGW64_VERSION_MAJOR))) open_file_wide(const wchar_t * path,const wchar_t * mode)4982 PUGI__FN FILE* open_file_wide(const wchar_t* path, const wchar_t* mode) 4983 { 4984 #if defined(PUGI__MSVC_CRT_VERSION) && PUGI__MSVC_CRT_VERSION >= 1400 4985 FILE* file = 0; 4986 return _wfopen_s(&file, path, mode) == 0 ? file : 0; 4987 #else 4988 return _wfopen(path, mode); 4989 #endif 4990 } 4991 #else convert_path_heap(const wchar_t * str)4992 PUGI__FN char* convert_path_heap(const wchar_t* str) 4993 { 4994 assert(str); 4995 4996 // first pass: get length in utf8 characters 4997 size_t length = strlength_wide(str); 4998 size_t size = as_utf8_begin(str, length); 4999 5000 // allocate resulting string 5001 char* result = static_cast<char*>(xml_memory::allocate(size + 1)); 5002 if (!result) return 0; 5003 5004 // second pass: convert to utf8 5005 as_utf8_end(result, size, str, length); 5006 5007 // zero-terminate 5008 result[size] = 0; 5009 5010 return result; 5011 } 5012 open_file_wide(const wchar_t * path,const wchar_t * mode)5013 PUGI__FN FILE* open_file_wide(const wchar_t* path, const wchar_t* mode) 5014 { 5015 // there is no standard function to open wide paths, so our best bet is to try utf8 path 5016 char* path_utf8 = convert_path_heap(path); 5017 if (!path_utf8) return 0; 5018 5019 // convert mode to ASCII (we mirror _wfopen interface) 5020 char mode_ascii[4] = {0}; 5021 for (size_t i = 0; mode[i]; ++i) mode_ascii[i] = static_cast<char>(mode[i]); 5022 5023 // try to open the utf8 path 5024 FILE* result = fopen(path_utf8, mode_ascii); 5025 5026 // free dummy buffer 5027 xml_memory::deallocate(path_utf8); 5028 5029 return result; 5030 } 5031 #endif 5032 open_file(const char * path,const char * mode)5033 PUGI__FN FILE* open_file(const char* path, const char* mode) 5034 { 5035 #if defined(PUGI__MSVC_CRT_VERSION) && PUGI__MSVC_CRT_VERSION >= 1400 5036 FILE* file = 0; 5037 return fopen_s(&file, path, mode) == 0 ? file : 0; 5038 #else 5039 return fopen(path, mode); 5040 #endif 5041 } 5042 save_file_impl(const xml_document & doc,FILE * file,const char_t * indent,unsigned int flags,xml_encoding encoding)5043 PUGI__FN bool save_file_impl(const xml_document& doc, FILE* file, const char_t* indent, unsigned int flags, xml_encoding encoding) 5044 { 5045 if (!file) return false; 5046 5047 xml_writer_file writer(file); 5048 doc.save(writer, indent, flags, encoding); 5049 5050 return ferror(file) == 0; 5051 } 5052 5053 struct name_null_sentry 5054 { 5055 xml_node_struct* node; 5056 char_t* name; 5057 name_null_sentryname_null_sentry5058 name_null_sentry(xml_node_struct* node_): node(node_), name(node_->name) 5059 { 5060 node->name = 0; 5061 } 5062 ~name_null_sentryname_null_sentry5063 ~name_null_sentry() 5064 { 5065 node->name = name; 5066 } 5067 }; 5068 PUGI__NS_END 5069 5070 namespace pugi 5071 { xml_writer_file(void * file_)5072 PUGI__FN xml_writer_file::xml_writer_file(void* file_): file(file_) 5073 { 5074 } 5075 write(const void * data,size_t size)5076 PUGI__FN void xml_writer_file::write(const void* data, size_t size) 5077 { 5078 size_t result = fwrite(data, 1, size, static_cast<FILE*>(file)); 5079 (void)!result; // unfortunately we can't do proper error handling here 5080 } 5081 5082 #ifndef PUGIXML_NO_STL xml_writer_stream(std::basic_ostream<char,std::char_traits<char>> & stream)5083 PUGI__FN xml_writer_stream::xml_writer_stream(std::basic_ostream<char, std::char_traits<char> >& stream): narrow_stream(&stream), wide_stream(0) 5084 { 5085 } 5086 xml_writer_stream(std::basic_ostream<wchar_t,std::char_traits<wchar_t>> & stream)5087 PUGI__FN xml_writer_stream::xml_writer_stream(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream): narrow_stream(0), wide_stream(&stream) 5088 { 5089 } 5090 write(const void * data,size_t size)5091 PUGI__FN void xml_writer_stream::write(const void* data, size_t size) 5092 { 5093 if (narrow_stream) 5094 { 5095 assert(!wide_stream); 5096 narrow_stream->write(reinterpret_cast<const char*>(data), static_cast<std::streamsize>(size)); 5097 } 5098 else 5099 { 5100 assert(wide_stream); 5101 assert(size % sizeof(wchar_t) == 0); 5102 5103 wide_stream->write(reinterpret_cast<const wchar_t*>(data), static_cast<std::streamsize>(size / sizeof(wchar_t))); 5104 } 5105 } 5106 #endif 5107 xml_tree_walker()5108 PUGI__FN xml_tree_walker::xml_tree_walker(): _depth(0) 5109 { 5110 } 5111 ~xml_tree_walker()5112 PUGI__FN xml_tree_walker::~xml_tree_walker() 5113 { 5114 } 5115 depth() const5116 PUGI__FN int xml_tree_walker::depth() const 5117 { 5118 return _depth; 5119 } 5120 begin(xml_node &)5121 PUGI__FN bool xml_tree_walker::begin(xml_node&) 5122 { 5123 return true; 5124 } 5125 end(xml_node &)5126 PUGI__FN bool xml_tree_walker::end(xml_node&) 5127 { 5128 return true; 5129 } 5130 xml_attribute()5131 PUGI__FN xml_attribute::xml_attribute(): _attr(0) 5132 { 5133 } 5134 xml_attribute(xml_attribute_struct * attr)5135 PUGI__FN xml_attribute::xml_attribute(xml_attribute_struct* attr): _attr(attr) 5136 { 5137 } 5138 unspecified_bool_xml_attribute(xml_attribute ***)5139 PUGI__FN static void unspecified_bool_xml_attribute(xml_attribute***) 5140 { 5141 } 5142 operator xml_attribute::unspecified_bool_type() const5143 PUGI__FN xml_attribute::operator xml_attribute::unspecified_bool_type() const 5144 { 5145 return _attr ? unspecified_bool_xml_attribute : 0; 5146 } 5147 operator !() const5148 PUGI__FN bool xml_attribute::operator!() const 5149 { 5150 return !_attr; 5151 } 5152 operator ==(const xml_attribute & r) const5153 PUGI__FN bool xml_attribute::operator==(const xml_attribute& r) const 5154 { 5155 return (_attr == r._attr); 5156 } 5157 operator !=(const xml_attribute & r) const5158 PUGI__FN bool xml_attribute::operator!=(const xml_attribute& r) const 5159 { 5160 return (_attr != r._attr); 5161 } 5162 operator <(const xml_attribute & r) const5163 PUGI__FN bool xml_attribute::operator<(const xml_attribute& r) const 5164 { 5165 return (_attr < r._attr); 5166 } 5167 operator >(const xml_attribute & r) const5168 PUGI__FN bool xml_attribute::operator>(const xml_attribute& r) const 5169 { 5170 return (_attr > r._attr); 5171 } 5172 operator <=(const xml_attribute & r) const5173 PUGI__FN bool xml_attribute::operator<=(const xml_attribute& r) const 5174 { 5175 return (_attr <= r._attr); 5176 } 5177 operator >=(const xml_attribute & r) const5178 PUGI__FN bool xml_attribute::operator>=(const xml_attribute& r) const 5179 { 5180 return (_attr >= r._attr); 5181 } 5182 next_attribute() const5183 PUGI__FN xml_attribute xml_attribute::next_attribute() const 5184 { 5185 return _attr ? xml_attribute(_attr->next_attribute) : xml_attribute(); 5186 } 5187 previous_attribute() const5188 PUGI__FN xml_attribute xml_attribute::previous_attribute() const 5189 { 5190 return _attr && _attr->prev_attribute_c->next_attribute ? xml_attribute(_attr->prev_attribute_c) : xml_attribute(); 5191 } 5192 as_string(const char_t * def) const5193 PUGI__FN const char_t* xml_attribute::as_string(const char_t* def) const 5194 { 5195 return (_attr && _attr->value) ? _attr->value + 0 : def; 5196 } 5197 as_int(int def) const5198 PUGI__FN int xml_attribute::as_int(int def) const 5199 { 5200 return (_attr && _attr->value) ? impl::get_value_int(_attr->value) : def; 5201 } 5202 as_uint(unsigned int def) const5203 PUGI__FN unsigned int xml_attribute::as_uint(unsigned int def) const 5204 { 5205 return (_attr && _attr->value) ? impl::get_value_uint(_attr->value) : def; 5206 } 5207 as_double(double def) const5208 PUGI__FN double xml_attribute::as_double(double def) const 5209 { 5210 return (_attr && _attr->value) ? impl::get_value_double(_attr->value) : def; 5211 } 5212 as_float(float def) const5213 PUGI__FN float xml_attribute::as_float(float def) const 5214 { 5215 return (_attr && _attr->value) ? impl::get_value_float(_attr->value) : def; 5216 } 5217 as_bool(bool def) const5218 PUGI__FN bool xml_attribute::as_bool(bool def) const 5219 { 5220 return (_attr && _attr->value) ? impl::get_value_bool(_attr->value) : def; 5221 } 5222 5223 #ifdef PUGIXML_HAS_LONG_LONG as_llong(long long def) const5224 PUGI__FN long long xml_attribute::as_llong(long long def) const 5225 { 5226 return (_attr && _attr->value) ? impl::get_value_llong(_attr->value) : def; 5227 } 5228 as_ullong(unsigned long long def) const5229 PUGI__FN unsigned long long xml_attribute::as_ullong(unsigned long long def) const 5230 { 5231 return (_attr && _attr->value) ? impl::get_value_ullong(_attr->value) : def; 5232 } 5233 #endif 5234 empty() const5235 PUGI__FN bool xml_attribute::empty() const 5236 { 5237 return !_attr; 5238 } 5239 name() const5240 PUGI__FN const char_t* xml_attribute::name() const 5241 { 5242 return (_attr && _attr->name) ? _attr->name + 0 : PUGIXML_TEXT(""); 5243 } 5244 value() const5245 PUGI__FN const char_t* xml_attribute::value() const 5246 { 5247 return (_attr && _attr->value) ? _attr->value + 0 : PUGIXML_TEXT(""); 5248 } 5249 hash_value() const5250 PUGI__FN size_t xml_attribute::hash_value() const 5251 { 5252 return static_cast<size_t>(reinterpret_cast<uintptr_t>(_attr) / sizeof(xml_attribute_struct)); 5253 } 5254 internal_object() const5255 PUGI__FN xml_attribute_struct* xml_attribute::internal_object() const 5256 { 5257 return _attr; 5258 } 5259 operator =(const char_t * rhs)5260 PUGI__FN xml_attribute& xml_attribute::operator=(const char_t* rhs) 5261 { 5262 set_value(rhs); 5263 return *this; 5264 } 5265 operator =(int rhs)5266 PUGI__FN xml_attribute& xml_attribute::operator=(int rhs) 5267 { 5268 set_value(rhs); 5269 return *this; 5270 } 5271 operator =(unsigned int rhs)5272 PUGI__FN xml_attribute& xml_attribute::operator=(unsigned int rhs) 5273 { 5274 set_value(rhs); 5275 return *this; 5276 } 5277 operator =(long rhs)5278 PUGI__FN xml_attribute& xml_attribute::operator=(long rhs) 5279 { 5280 set_value(rhs); 5281 return *this; 5282 } 5283 operator =(unsigned long rhs)5284 PUGI__FN xml_attribute& xml_attribute::operator=(unsigned long rhs) 5285 { 5286 set_value(rhs); 5287 return *this; 5288 } 5289 operator =(double rhs)5290 PUGI__FN xml_attribute& xml_attribute::operator=(double rhs) 5291 { 5292 set_value(rhs); 5293 return *this; 5294 } 5295 operator =(float rhs)5296 PUGI__FN xml_attribute& xml_attribute::operator=(float rhs) 5297 { 5298 set_value(rhs); 5299 return *this; 5300 } 5301 operator =(bool rhs)5302 PUGI__FN xml_attribute& xml_attribute::operator=(bool rhs) 5303 { 5304 set_value(rhs); 5305 return *this; 5306 } 5307 5308 #ifdef PUGIXML_HAS_LONG_LONG operator =(long long rhs)5309 PUGI__FN xml_attribute& xml_attribute::operator=(long long rhs) 5310 { 5311 set_value(rhs); 5312 return *this; 5313 } 5314 operator =(unsigned long long rhs)5315 PUGI__FN xml_attribute& xml_attribute::operator=(unsigned long long rhs) 5316 { 5317 set_value(rhs); 5318 return *this; 5319 } 5320 #endif 5321 set_name(const char_t * rhs)5322 PUGI__FN bool xml_attribute::set_name(const char_t* rhs) 5323 { 5324 if (!_attr) return false; 5325 5326 return impl::strcpy_insitu(_attr->name, _attr->header, impl::xml_memory_page_name_allocated_mask, rhs, impl::strlength(rhs)); 5327 } 5328 set_value(const char_t * rhs)5329 PUGI__FN bool xml_attribute::set_value(const char_t* rhs) 5330 { 5331 if (!_attr) return false; 5332 5333 return impl::strcpy_insitu(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, impl::strlength(rhs)); 5334 } 5335 set_value(int rhs)5336 PUGI__FN bool xml_attribute::set_value(int rhs) 5337 { 5338 if (!_attr) return false; 5339 5340 return impl::set_value_integer<unsigned int>(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0); 5341 } 5342 set_value(unsigned int rhs)5343 PUGI__FN bool xml_attribute::set_value(unsigned int rhs) 5344 { 5345 if (!_attr) return false; 5346 5347 return impl::set_value_integer<unsigned int>(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, false); 5348 } 5349 set_value(long rhs)5350 PUGI__FN bool xml_attribute::set_value(long rhs) 5351 { 5352 if (!_attr) return false; 5353 5354 return impl::set_value_integer<unsigned long>(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0); 5355 } 5356 set_value(unsigned long rhs)5357 PUGI__FN bool xml_attribute::set_value(unsigned long rhs) 5358 { 5359 if (!_attr) return false; 5360 5361 return impl::set_value_integer<unsigned long>(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, false); 5362 } 5363 set_value(double rhs)5364 PUGI__FN bool xml_attribute::set_value(double rhs) 5365 { 5366 if (!_attr) return false; 5367 5368 return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, default_double_precision); 5369 } 5370 set_value(double rhs,int precision)5371 PUGI__FN bool xml_attribute::set_value(double rhs, int precision) 5372 { 5373 if (!_attr) return false; 5374 5375 return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, precision); 5376 } 5377 set_value(float rhs)5378 PUGI__FN bool xml_attribute::set_value(float rhs) 5379 { 5380 if (!_attr) return false; 5381 5382 return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, default_float_precision); 5383 } 5384 set_value(float rhs,int precision)5385 PUGI__FN bool xml_attribute::set_value(float rhs, int precision) 5386 { 5387 if (!_attr) return false; 5388 5389 return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, precision); 5390 } 5391 set_value(bool rhs)5392 PUGI__FN bool xml_attribute::set_value(bool rhs) 5393 { 5394 if (!_attr) return false; 5395 5396 return impl::set_value_bool(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs); 5397 } 5398 5399 #ifdef PUGIXML_HAS_LONG_LONG set_value(long long rhs)5400 PUGI__FN bool xml_attribute::set_value(long long rhs) 5401 { 5402 if (!_attr) return false; 5403 5404 return impl::set_value_integer<unsigned long long>(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0); 5405 } 5406 set_value(unsigned long long rhs)5407 PUGI__FN bool xml_attribute::set_value(unsigned long long rhs) 5408 { 5409 if (!_attr) return false; 5410 5411 return impl::set_value_integer<unsigned long long>(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, false); 5412 } 5413 #endif 5414 5415 #ifdef __BORLANDC__ operator &&(const xml_attribute & lhs,bool rhs)5416 PUGI__FN bool operator&&(const xml_attribute& lhs, bool rhs) 5417 { 5418 return (bool)lhs && rhs; 5419 } 5420 operator ||(const xml_attribute & lhs,bool rhs)5421 PUGI__FN bool operator||(const xml_attribute& lhs, bool rhs) 5422 { 5423 return (bool)lhs || rhs; 5424 } 5425 #endif 5426 xml_node()5427 PUGI__FN xml_node::xml_node(): _root(0) 5428 { 5429 } 5430 xml_node(xml_node_struct * p)5431 PUGI__FN xml_node::xml_node(xml_node_struct* p): _root(p) 5432 { 5433 } 5434 unspecified_bool_xml_node(xml_node ***)5435 PUGI__FN static void unspecified_bool_xml_node(xml_node***) 5436 { 5437 } 5438 operator xml_node::unspecified_bool_type() const5439 PUGI__FN xml_node::operator xml_node::unspecified_bool_type() const 5440 { 5441 return _root ? unspecified_bool_xml_node : 0; 5442 } 5443 operator !() const5444 PUGI__FN bool xml_node::operator!() const 5445 { 5446 return !_root; 5447 } 5448 begin() const5449 PUGI__FN xml_node::iterator xml_node::begin() const 5450 { 5451 return iterator(_root ? _root->first_child + 0 : 0, _root); 5452 } 5453 end() const5454 PUGI__FN xml_node::iterator xml_node::end() const 5455 { 5456 return iterator(0, _root); 5457 } 5458 attributes_begin() const5459 PUGI__FN xml_node::attribute_iterator xml_node::attributes_begin() const 5460 { 5461 return attribute_iterator(_root ? _root->first_attribute + 0 : 0, _root); 5462 } 5463 attributes_end() const5464 PUGI__FN xml_node::attribute_iterator xml_node::attributes_end() const 5465 { 5466 return attribute_iterator(0, _root); 5467 } 5468 children() const5469 PUGI__FN xml_object_range<xml_node_iterator> xml_node::children() const 5470 { 5471 return xml_object_range<xml_node_iterator>(begin(), end()); 5472 } 5473 children(const char_t * name_) const5474 PUGI__FN xml_object_range<xml_named_node_iterator> xml_node::children(const char_t* name_) const 5475 { 5476 return xml_object_range<xml_named_node_iterator>(xml_named_node_iterator(child(name_)._root, _root, name_), xml_named_node_iterator(0, _root, name_)); 5477 } 5478 attributes() const5479 PUGI__FN xml_object_range<xml_attribute_iterator> xml_node::attributes() const 5480 { 5481 return xml_object_range<xml_attribute_iterator>(attributes_begin(), attributes_end()); 5482 } 5483 operator ==(const xml_node & r) const5484 PUGI__FN bool xml_node::operator==(const xml_node& r) const 5485 { 5486 return (_root == r._root); 5487 } 5488 operator !=(const xml_node & r) const5489 PUGI__FN bool xml_node::operator!=(const xml_node& r) const 5490 { 5491 return (_root != r._root); 5492 } 5493 operator <(const xml_node & r) const5494 PUGI__FN bool xml_node::operator<(const xml_node& r) const 5495 { 5496 return (_root < r._root); 5497 } 5498 operator >(const xml_node & r) const5499 PUGI__FN bool xml_node::operator>(const xml_node& r) const 5500 { 5501 return (_root > r._root); 5502 } 5503 operator <=(const xml_node & r) const5504 PUGI__FN bool xml_node::operator<=(const xml_node& r) const 5505 { 5506 return (_root <= r._root); 5507 } 5508 operator >=(const xml_node & r) const5509 PUGI__FN bool xml_node::operator>=(const xml_node& r) const 5510 { 5511 return (_root >= r._root); 5512 } 5513 empty() const5514 PUGI__FN bool xml_node::empty() const 5515 { 5516 return !_root; 5517 } 5518 name() const5519 PUGI__FN const char_t* xml_node::name() const 5520 { 5521 return (_root && _root->name) ? _root->name + 0 : PUGIXML_TEXT(""); 5522 } 5523 type() const5524 PUGI__FN xml_node_type xml_node::type() const 5525 { 5526 return _root ? PUGI__NODETYPE(_root) : node_null; 5527 } 5528 value() const5529 PUGI__FN const char_t* xml_node::value() const 5530 { 5531 return (_root && _root->value) ? _root->value + 0 : PUGIXML_TEXT(""); 5532 } 5533 child(const char_t * name_) const5534 PUGI__FN xml_node xml_node::child(const char_t* name_) const 5535 { 5536 if (!_root) return xml_node(); 5537 5538 for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling) 5539 if (i->name && impl::strequal(name_, i->name)) return xml_node(i); 5540 5541 return xml_node(); 5542 } 5543 attribute(const char_t * name_) const5544 PUGI__FN xml_attribute xml_node::attribute(const char_t* name_) const 5545 { 5546 if (!_root) return xml_attribute(); 5547 5548 for (xml_attribute_struct* i = _root->first_attribute; i; i = i->next_attribute) 5549 if (i->name && impl::strequal(name_, i->name)) 5550 return xml_attribute(i); 5551 5552 return xml_attribute(); 5553 } 5554 next_sibling(const char_t * name_) const5555 PUGI__FN xml_node xml_node::next_sibling(const char_t* name_) const 5556 { 5557 if (!_root) return xml_node(); 5558 5559 for (xml_node_struct* i = _root->next_sibling; i; i = i->next_sibling) 5560 if (i->name && impl::strequal(name_, i->name)) return xml_node(i); 5561 5562 return xml_node(); 5563 } 5564 next_sibling() const5565 PUGI__FN xml_node xml_node::next_sibling() const 5566 { 5567 return _root ? xml_node(_root->next_sibling) : xml_node(); 5568 } 5569 previous_sibling(const char_t * name_) const5570 PUGI__FN xml_node xml_node::previous_sibling(const char_t* name_) const 5571 { 5572 if (!_root) return xml_node(); 5573 5574 for (xml_node_struct* i = _root->prev_sibling_c; i->next_sibling; i = i->prev_sibling_c) 5575 if (i->name && impl::strequal(name_, i->name)) return xml_node(i); 5576 5577 return xml_node(); 5578 } 5579 attribute(const char_t * name_,xml_attribute & hint_) const5580 PUGI__FN xml_attribute xml_node::attribute(const char_t* name_, xml_attribute& hint_) const 5581 { 5582 xml_attribute_struct* hint = hint_._attr; 5583 5584 // if hint is not an attribute of node, behavior is not defined 5585 assert(!hint || (_root && impl::is_attribute_of(hint, _root))); 5586 5587 if (!_root) return xml_attribute(); 5588 5589 // optimistically search from hint up until the end 5590 for (xml_attribute_struct* i = hint; i; i = i->next_attribute) 5591 if (i->name && impl::strequal(name_, i->name)) 5592 { 5593 // update hint to maximize efficiency of searching for consecutive attributes 5594 hint_._attr = i->next_attribute; 5595 5596 return xml_attribute(i); 5597 } 5598 5599 // wrap around and search from the first attribute until the hint 5600 // 'j' null pointer check is technically redundant, but it prevents a crash in case the assertion above fails 5601 for (xml_attribute_struct* j = _root->first_attribute; j && j != hint; j = j->next_attribute) 5602 if (j->name && impl::strequal(name_, j->name)) 5603 { 5604 // update hint to maximize efficiency of searching for consecutive attributes 5605 hint_._attr = j->next_attribute; 5606 5607 return xml_attribute(j); 5608 } 5609 5610 return xml_attribute(); 5611 } 5612 previous_sibling() const5613 PUGI__FN xml_node xml_node::previous_sibling() const 5614 { 5615 if (!_root) return xml_node(); 5616 5617 if (_root->prev_sibling_c->next_sibling) return xml_node(_root->prev_sibling_c); 5618 else return xml_node(); 5619 } 5620 parent() const5621 PUGI__FN xml_node xml_node::parent() const 5622 { 5623 return _root ? xml_node(_root->parent) : xml_node(); 5624 } 5625 root() const5626 PUGI__FN xml_node xml_node::root() const 5627 { 5628 return _root ? xml_node(&impl::get_document(_root)) : xml_node(); 5629 } 5630 text() const5631 PUGI__FN xml_text xml_node::text() const 5632 { 5633 return xml_text(_root); 5634 } 5635 child_value() const5636 PUGI__FN const char_t* xml_node::child_value() const 5637 { 5638 if (!_root) return PUGIXML_TEXT(""); 5639 5640 // element nodes can have value if parse_embed_pcdata was used 5641 if (PUGI__NODETYPE(_root) == node_element && _root->value) 5642 return _root->value; 5643 5644 for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling) 5645 if (impl::is_text_node(i) && i->value) 5646 return i->value; 5647 5648 return PUGIXML_TEXT(""); 5649 } 5650 child_value(const char_t * name_) const5651 PUGI__FN const char_t* xml_node::child_value(const char_t* name_) const 5652 { 5653 return child(name_).child_value(); 5654 } 5655 first_attribute() const5656 PUGI__FN xml_attribute xml_node::first_attribute() const 5657 { 5658 return _root ? xml_attribute(_root->first_attribute) : xml_attribute(); 5659 } 5660 last_attribute() const5661 PUGI__FN xml_attribute xml_node::last_attribute() const 5662 { 5663 return _root && _root->first_attribute ? xml_attribute(_root->first_attribute->prev_attribute_c) : xml_attribute(); 5664 } 5665 first_child() const5666 PUGI__FN xml_node xml_node::first_child() const 5667 { 5668 return _root ? xml_node(_root->first_child) : xml_node(); 5669 } 5670 last_child() const5671 PUGI__FN xml_node xml_node::last_child() const 5672 { 5673 return _root && _root->first_child ? xml_node(_root->first_child->prev_sibling_c) : xml_node(); 5674 } 5675 set_name(const char_t * rhs)5676 PUGI__FN bool xml_node::set_name(const char_t* rhs) 5677 { 5678 xml_node_type type_ = _root ? PUGI__NODETYPE(_root) : node_null; 5679 5680 if (type_ != node_element && type_ != node_pi && type_ != node_declaration) 5681 return false; 5682 5683 return impl::strcpy_insitu(_root->name, _root->header, impl::xml_memory_page_name_allocated_mask, rhs, impl::strlength(rhs)); 5684 } 5685 set_value(const char_t * rhs)5686 PUGI__FN bool xml_node::set_value(const char_t* rhs) 5687 { 5688 xml_node_type type_ = _root ? PUGI__NODETYPE(_root) : node_null; 5689 5690 if (type_ != node_pcdata && type_ != node_cdata && type_ != node_comment && type_ != node_pi && type_ != node_doctype) 5691 return false; 5692 5693 return impl::strcpy_insitu(_root->value, _root->header, impl::xml_memory_page_value_allocated_mask, rhs, impl::strlength(rhs)); 5694 } 5695 append_attribute(const char_t * name_)5696 PUGI__FN xml_attribute xml_node::append_attribute(const char_t* name_) 5697 { 5698 if (!impl::allow_insert_attribute(type())) return xml_attribute(); 5699 5700 impl::xml_allocator& alloc = impl::get_allocator(_root); 5701 if (!alloc.reserve()) return xml_attribute(); 5702 5703 xml_attribute a(impl::allocate_attribute(alloc)); 5704 if (!a) return xml_attribute(); 5705 5706 impl::append_attribute(a._attr, _root); 5707 5708 a.set_name(name_); 5709 5710 return a; 5711 } 5712 prepend_attribute(const char_t * name_)5713 PUGI__FN xml_attribute xml_node::prepend_attribute(const char_t* name_) 5714 { 5715 if (!impl::allow_insert_attribute(type())) return xml_attribute(); 5716 5717 impl::xml_allocator& alloc = impl::get_allocator(_root); 5718 if (!alloc.reserve()) return xml_attribute(); 5719 5720 xml_attribute a(impl::allocate_attribute(alloc)); 5721 if (!a) return xml_attribute(); 5722 5723 impl::prepend_attribute(a._attr, _root); 5724 5725 a.set_name(name_); 5726 5727 return a; 5728 } 5729 insert_attribute_after(const char_t * name_,const xml_attribute & attr)5730 PUGI__FN xml_attribute xml_node::insert_attribute_after(const char_t* name_, const xml_attribute& attr) 5731 { 5732 if (!impl::allow_insert_attribute(type())) return xml_attribute(); 5733 if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute(); 5734 5735 impl::xml_allocator& alloc = impl::get_allocator(_root); 5736 if (!alloc.reserve()) return xml_attribute(); 5737 5738 xml_attribute a(impl::allocate_attribute(alloc)); 5739 if (!a) return xml_attribute(); 5740 5741 impl::insert_attribute_after(a._attr, attr._attr, _root); 5742 5743 a.set_name(name_); 5744 5745 return a; 5746 } 5747 insert_attribute_before(const char_t * name_,const xml_attribute & attr)5748 PUGI__FN xml_attribute xml_node::insert_attribute_before(const char_t* name_, const xml_attribute& attr) 5749 { 5750 if (!impl::allow_insert_attribute(type())) return xml_attribute(); 5751 if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute(); 5752 5753 impl::xml_allocator& alloc = impl::get_allocator(_root); 5754 if (!alloc.reserve()) return xml_attribute(); 5755 5756 xml_attribute a(impl::allocate_attribute(alloc)); 5757 if (!a) return xml_attribute(); 5758 5759 impl::insert_attribute_before(a._attr, attr._attr, _root); 5760 5761 a.set_name(name_); 5762 5763 return a; 5764 } 5765 append_copy(const xml_attribute & proto)5766 PUGI__FN xml_attribute xml_node::append_copy(const xml_attribute& proto) 5767 { 5768 if (!proto) return xml_attribute(); 5769 if (!impl::allow_insert_attribute(type())) return xml_attribute(); 5770 5771 impl::xml_allocator& alloc = impl::get_allocator(_root); 5772 if (!alloc.reserve()) return xml_attribute(); 5773 5774 xml_attribute a(impl::allocate_attribute(alloc)); 5775 if (!a) return xml_attribute(); 5776 5777 impl::append_attribute(a._attr, _root); 5778 impl::node_copy_attribute(a._attr, proto._attr); 5779 5780 return a; 5781 } 5782 prepend_copy(const xml_attribute & proto)5783 PUGI__FN xml_attribute xml_node::prepend_copy(const xml_attribute& proto) 5784 { 5785 if (!proto) return xml_attribute(); 5786 if (!impl::allow_insert_attribute(type())) return xml_attribute(); 5787 5788 impl::xml_allocator& alloc = impl::get_allocator(_root); 5789 if (!alloc.reserve()) return xml_attribute(); 5790 5791 xml_attribute a(impl::allocate_attribute(alloc)); 5792 if (!a) return xml_attribute(); 5793 5794 impl::prepend_attribute(a._attr, _root); 5795 impl::node_copy_attribute(a._attr, proto._attr); 5796 5797 return a; 5798 } 5799 insert_copy_after(const xml_attribute & proto,const xml_attribute & attr)5800 PUGI__FN xml_attribute xml_node::insert_copy_after(const xml_attribute& proto, const xml_attribute& attr) 5801 { 5802 if (!proto) return xml_attribute(); 5803 if (!impl::allow_insert_attribute(type())) return xml_attribute(); 5804 if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute(); 5805 5806 impl::xml_allocator& alloc = impl::get_allocator(_root); 5807 if (!alloc.reserve()) return xml_attribute(); 5808 5809 xml_attribute a(impl::allocate_attribute(alloc)); 5810 if (!a) return xml_attribute(); 5811 5812 impl::insert_attribute_after(a._attr, attr._attr, _root); 5813 impl::node_copy_attribute(a._attr, proto._attr); 5814 5815 return a; 5816 } 5817 insert_copy_before(const xml_attribute & proto,const xml_attribute & attr)5818 PUGI__FN xml_attribute xml_node::insert_copy_before(const xml_attribute& proto, const xml_attribute& attr) 5819 { 5820 if (!proto) return xml_attribute(); 5821 if (!impl::allow_insert_attribute(type())) return xml_attribute(); 5822 if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute(); 5823 5824 impl::xml_allocator& alloc = impl::get_allocator(_root); 5825 if (!alloc.reserve()) return xml_attribute(); 5826 5827 xml_attribute a(impl::allocate_attribute(alloc)); 5828 if (!a) return xml_attribute(); 5829 5830 impl::insert_attribute_before(a._attr, attr._attr, _root); 5831 impl::node_copy_attribute(a._attr, proto._attr); 5832 5833 return a; 5834 } 5835 append_child(xml_node_type type_)5836 PUGI__FN xml_node xml_node::append_child(xml_node_type type_) 5837 { 5838 if (!impl::allow_insert_child(type(), type_)) return xml_node(); 5839 5840 impl::xml_allocator& alloc = impl::get_allocator(_root); 5841 if (!alloc.reserve()) return xml_node(); 5842 5843 xml_node n(impl::allocate_node(alloc, type_)); 5844 if (!n) return xml_node(); 5845 5846 impl::append_node(n._root, _root); 5847 5848 if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml")); 5849 5850 return n; 5851 } 5852 prepend_child(xml_node_type type_)5853 PUGI__FN xml_node xml_node::prepend_child(xml_node_type type_) 5854 { 5855 if (!impl::allow_insert_child(type(), type_)) return xml_node(); 5856 5857 impl::xml_allocator& alloc = impl::get_allocator(_root); 5858 if (!alloc.reserve()) return xml_node(); 5859 5860 xml_node n(impl::allocate_node(alloc, type_)); 5861 if (!n) return xml_node(); 5862 5863 impl::prepend_node(n._root, _root); 5864 5865 if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml")); 5866 5867 return n; 5868 } 5869 insert_child_before(xml_node_type type_,const xml_node & node)5870 PUGI__FN xml_node xml_node::insert_child_before(xml_node_type type_, const xml_node& node) 5871 { 5872 if (!impl::allow_insert_child(type(), type_)) return xml_node(); 5873 if (!node._root || node._root->parent != _root) return xml_node(); 5874 5875 impl::xml_allocator& alloc = impl::get_allocator(_root); 5876 if (!alloc.reserve()) return xml_node(); 5877 5878 xml_node n(impl::allocate_node(alloc, type_)); 5879 if (!n) return xml_node(); 5880 5881 impl::insert_node_before(n._root, node._root); 5882 5883 if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml")); 5884 5885 return n; 5886 } 5887 insert_child_after(xml_node_type type_,const xml_node & node)5888 PUGI__FN xml_node xml_node::insert_child_after(xml_node_type type_, const xml_node& node) 5889 { 5890 if (!impl::allow_insert_child(type(), type_)) return xml_node(); 5891 if (!node._root || node._root->parent != _root) return xml_node(); 5892 5893 impl::xml_allocator& alloc = impl::get_allocator(_root); 5894 if (!alloc.reserve()) return xml_node(); 5895 5896 xml_node n(impl::allocate_node(alloc, type_)); 5897 if (!n) return xml_node(); 5898 5899 impl::insert_node_after(n._root, node._root); 5900 5901 if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml")); 5902 5903 return n; 5904 } 5905 append_child(const char_t * name_)5906 PUGI__FN xml_node xml_node::append_child(const char_t* name_) 5907 { 5908 xml_node result = append_child(node_element); 5909 5910 result.set_name(name_); 5911 5912 return result; 5913 } 5914 prepend_child(const char_t * name_)5915 PUGI__FN xml_node xml_node::prepend_child(const char_t* name_) 5916 { 5917 xml_node result = prepend_child(node_element); 5918 5919 result.set_name(name_); 5920 5921 return result; 5922 } 5923 insert_child_after(const char_t * name_,const xml_node & node)5924 PUGI__FN xml_node xml_node::insert_child_after(const char_t* name_, const xml_node& node) 5925 { 5926 xml_node result = insert_child_after(node_element, node); 5927 5928 result.set_name(name_); 5929 5930 return result; 5931 } 5932 insert_child_before(const char_t * name_,const xml_node & node)5933 PUGI__FN xml_node xml_node::insert_child_before(const char_t* name_, const xml_node& node) 5934 { 5935 xml_node result = insert_child_before(node_element, node); 5936 5937 result.set_name(name_); 5938 5939 return result; 5940 } 5941 append_copy(const xml_node & proto)5942 PUGI__FN xml_node xml_node::append_copy(const xml_node& proto) 5943 { 5944 xml_node_type type_ = proto.type(); 5945 if (!impl::allow_insert_child(type(), type_)) return xml_node(); 5946 5947 impl::xml_allocator& alloc = impl::get_allocator(_root); 5948 if (!alloc.reserve()) return xml_node(); 5949 5950 xml_node n(impl::allocate_node(alloc, type_)); 5951 if (!n) return xml_node(); 5952 5953 impl::append_node(n._root, _root); 5954 impl::node_copy_tree(n._root, proto._root); 5955 5956 return n; 5957 } 5958 prepend_copy(const xml_node & proto)5959 PUGI__FN xml_node xml_node::prepend_copy(const xml_node& proto) 5960 { 5961 xml_node_type type_ = proto.type(); 5962 if (!impl::allow_insert_child(type(), type_)) return xml_node(); 5963 5964 impl::xml_allocator& alloc = impl::get_allocator(_root); 5965 if (!alloc.reserve()) return xml_node(); 5966 5967 xml_node n(impl::allocate_node(alloc, type_)); 5968 if (!n) return xml_node(); 5969 5970 impl::prepend_node(n._root, _root); 5971 impl::node_copy_tree(n._root, proto._root); 5972 5973 return n; 5974 } 5975 insert_copy_after(const xml_node & proto,const xml_node & node)5976 PUGI__FN xml_node xml_node::insert_copy_after(const xml_node& proto, const xml_node& node) 5977 { 5978 xml_node_type type_ = proto.type(); 5979 if (!impl::allow_insert_child(type(), type_)) return xml_node(); 5980 if (!node._root || node._root->parent != _root) return xml_node(); 5981 5982 impl::xml_allocator& alloc = impl::get_allocator(_root); 5983 if (!alloc.reserve()) return xml_node(); 5984 5985 xml_node n(impl::allocate_node(alloc, type_)); 5986 if (!n) return xml_node(); 5987 5988 impl::insert_node_after(n._root, node._root); 5989 impl::node_copy_tree(n._root, proto._root); 5990 5991 return n; 5992 } 5993 insert_copy_before(const xml_node & proto,const xml_node & node)5994 PUGI__FN xml_node xml_node::insert_copy_before(const xml_node& proto, const xml_node& node) 5995 { 5996 xml_node_type type_ = proto.type(); 5997 if (!impl::allow_insert_child(type(), type_)) return xml_node(); 5998 if (!node._root || node._root->parent != _root) return xml_node(); 5999 6000 impl::xml_allocator& alloc = impl::get_allocator(_root); 6001 if (!alloc.reserve()) return xml_node(); 6002 6003 xml_node n(impl::allocate_node(alloc, type_)); 6004 if (!n) return xml_node(); 6005 6006 impl::insert_node_before(n._root, node._root); 6007 impl::node_copy_tree(n._root, proto._root); 6008 6009 return n; 6010 } 6011 append_move(const xml_node & moved)6012 PUGI__FN xml_node xml_node::append_move(const xml_node& moved) 6013 { 6014 if (!impl::allow_move(*this, moved)) return xml_node(); 6015 6016 impl::xml_allocator& alloc = impl::get_allocator(_root); 6017 if (!alloc.reserve()) return xml_node(); 6018 6019 // disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers 6020 impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask; 6021 6022 impl::remove_node(moved._root); 6023 impl::append_node(moved._root, _root); 6024 6025 return moved; 6026 } 6027 prepend_move(const xml_node & moved)6028 PUGI__FN xml_node xml_node::prepend_move(const xml_node& moved) 6029 { 6030 if (!impl::allow_move(*this, moved)) return xml_node(); 6031 6032 impl::xml_allocator& alloc = impl::get_allocator(_root); 6033 if (!alloc.reserve()) return xml_node(); 6034 6035 // disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers 6036 impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask; 6037 6038 impl::remove_node(moved._root); 6039 impl::prepend_node(moved._root, _root); 6040 6041 return moved; 6042 } 6043 insert_move_after(const xml_node & moved,const xml_node & node)6044 PUGI__FN xml_node xml_node::insert_move_after(const xml_node& moved, const xml_node& node) 6045 { 6046 if (!impl::allow_move(*this, moved)) return xml_node(); 6047 if (!node._root || node._root->parent != _root) return xml_node(); 6048 if (moved._root == node._root) return xml_node(); 6049 6050 impl::xml_allocator& alloc = impl::get_allocator(_root); 6051 if (!alloc.reserve()) return xml_node(); 6052 6053 // disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers 6054 impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask; 6055 6056 impl::remove_node(moved._root); 6057 impl::insert_node_after(moved._root, node._root); 6058 6059 return moved; 6060 } 6061 insert_move_before(const xml_node & moved,const xml_node & node)6062 PUGI__FN xml_node xml_node::insert_move_before(const xml_node& moved, const xml_node& node) 6063 { 6064 if (!impl::allow_move(*this, moved)) return xml_node(); 6065 if (!node._root || node._root->parent != _root) return xml_node(); 6066 if (moved._root == node._root) return xml_node(); 6067 6068 impl::xml_allocator& alloc = impl::get_allocator(_root); 6069 if (!alloc.reserve()) return xml_node(); 6070 6071 // disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers 6072 impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask; 6073 6074 impl::remove_node(moved._root); 6075 impl::insert_node_before(moved._root, node._root); 6076 6077 return moved; 6078 } 6079 remove_attribute(const char_t * name_)6080 PUGI__FN bool xml_node::remove_attribute(const char_t* name_) 6081 { 6082 return remove_attribute(attribute(name_)); 6083 } 6084 remove_attribute(const xml_attribute & a)6085 PUGI__FN bool xml_node::remove_attribute(const xml_attribute& a) 6086 { 6087 if (!_root || !a._attr) return false; 6088 if (!impl::is_attribute_of(a._attr, _root)) return false; 6089 6090 impl::xml_allocator& alloc = impl::get_allocator(_root); 6091 if (!alloc.reserve()) return false; 6092 6093 impl::remove_attribute(a._attr, _root); 6094 impl::destroy_attribute(a._attr, alloc); 6095 6096 return true; 6097 } 6098 remove_attributes()6099 PUGI__FN bool xml_node::remove_attributes() 6100 { 6101 if (!_root) return false; 6102 6103 impl::xml_allocator& alloc = impl::get_allocator(_root); 6104 if (!alloc.reserve()) return false; 6105 6106 for (xml_attribute_struct* attr = _root->first_attribute; attr; ) 6107 { 6108 xml_attribute_struct* next = attr->next_attribute; 6109 6110 impl::destroy_attribute(attr, alloc); 6111 6112 attr = next; 6113 } 6114 6115 _root->first_attribute = 0; 6116 6117 return true; 6118 } 6119 remove_child(const char_t * name_)6120 PUGI__FN bool xml_node::remove_child(const char_t* name_) 6121 { 6122 return remove_child(child(name_)); 6123 } 6124 remove_child(const xml_node & n)6125 PUGI__FN bool xml_node::remove_child(const xml_node& n) 6126 { 6127 if (!_root || !n._root || n._root->parent != _root) return false; 6128 6129 impl::xml_allocator& alloc = impl::get_allocator(_root); 6130 if (!alloc.reserve()) return false; 6131 6132 impl::remove_node(n._root); 6133 impl::destroy_node(n._root, alloc); 6134 6135 return true; 6136 } 6137 remove_children()6138 PUGI__FN bool xml_node::remove_children() 6139 { 6140 if (!_root) return false; 6141 6142 impl::xml_allocator& alloc = impl::get_allocator(_root); 6143 if (!alloc.reserve()) return false; 6144 6145 for (xml_node_struct* cur = _root->first_child; cur; ) 6146 { 6147 xml_node_struct* next = cur->next_sibling; 6148 6149 impl::destroy_node(cur, alloc); 6150 6151 cur = next; 6152 } 6153 6154 _root->first_child = 0; 6155 6156 return true; 6157 } 6158 append_buffer(const void * contents,size_t size,unsigned int options,xml_encoding encoding)6159 PUGI__FN xml_parse_result xml_node::append_buffer(const void* contents, size_t size, unsigned int options, xml_encoding encoding) 6160 { 6161 // append_buffer is only valid for elements/documents 6162 if (!impl::allow_insert_child(type(), node_element)) return impl::make_parse_result(status_append_invalid_root); 6163 6164 // get document node 6165 impl::xml_document_struct* doc = &impl::get_document(_root); 6166 6167 // disable document_buffer_order optimization since in a document with multiple buffers comparing buffer pointers does not make sense 6168 doc->header |= impl::xml_memory_page_contents_shared_mask; 6169 6170 // get extra buffer element (we'll store the document fragment buffer there so that we can deallocate it later) 6171 impl::xml_memory_page* page = 0; 6172 impl::xml_extra_buffer* extra = static_cast<impl::xml_extra_buffer*>(doc->allocate_memory(sizeof(impl::xml_extra_buffer) + sizeof(void*), page)); 6173 (void)page; 6174 6175 if (!extra) return impl::make_parse_result(status_out_of_memory); 6176 6177 #ifdef PUGIXML_COMPACT 6178 // align the memory block to a pointer boundary; this is required for compact mode where memory allocations are only 4b aligned 6179 // note that this requires up to sizeof(void*)-1 additional memory, which the allocation above takes into account 6180 extra = reinterpret_cast<impl::xml_extra_buffer*>((reinterpret_cast<uintptr_t>(extra) + (sizeof(void*) - 1)) & ~(sizeof(void*) - 1)); 6181 #endif 6182 6183 // add extra buffer to the list 6184 extra->buffer = 0; 6185 extra->next = doc->extra_buffers; 6186 doc->extra_buffers = extra; 6187 6188 // name of the root has to be NULL before parsing - otherwise closing node mismatches will not be detected at the top level 6189 impl::name_null_sentry sentry(_root); 6190 6191 return impl::load_buffer_impl(doc, _root, const_cast<void*>(contents), size, options, encoding, false, false, &extra->buffer); 6192 } 6193 find_child_by_attribute(const char_t * name_,const char_t * attr_name,const char_t * attr_value) const6194 PUGI__FN xml_node xml_node::find_child_by_attribute(const char_t* name_, const char_t* attr_name, const char_t* attr_value) const 6195 { 6196 if (!_root) return xml_node(); 6197 6198 for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling) 6199 if (i->name && impl::strequal(name_, i->name)) 6200 { 6201 for (xml_attribute_struct* a = i->first_attribute; a; a = a->next_attribute) 6202 if (a->name && impl::strequal(attr_name, a->name) && impl::strequal(attr_value, a->value ? a->value + 0 : PUGIXML_TEXT(""))) 6203 return xml_node(i); 6204 } 6205 6206 return xml_node(); 6207 } 6208 find_child_by_attribute(const char_t * attr_name,const char_t * attr_value) const6209 PUGI__FN xml_node xml_node::find_child_by_attribute(const char_t* attr_name, const char_t* attr_value) const 6210 { 6211 if (!_root) return xml_node(); 6212 6213 for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling) 6214 for (xml_attribute_struct* a = i->first_attribute; a; a = a->next_attribute) 6215 if (a->name && impl::strequal(attr_name, a->name) && impl::strequal(attr_value, a->value ? a->value + 0 : PUGIXML_TEXT(""))) 6216 return xml_node(i); 6217 6218 return xml_node(); 6219 } 6220 6221 #ifndef PUGIXML_NO_STL path(char_t delimiter) const6222 PUGI__FN string_t xml_node::path(char_t delimiter) const 6223 { 6224 if (!_root) return string_t(); 6225 6226 size_t offset = 0; 6227 6228 for (xml_node_struct* i = _root; i; i = i->parent) 6229 { 6230 offset += (i != _root); 6231 offset += i->name ? impl::strlength(i->name) : 0; 6232 } 6233 6234 string_t result; 6235 result.resize(offset); 6236 6237 for (xml_node_struct* j = _root; j; j = j->parent) 6238 { 6239 if (j != _root) 6240 result[--offset] = delimiter; 6241 6242 if (j->name) 6243 { 6244 size_t length = impl::strlength(j->name); 6245 6246 offset -= length; 6247 memcpy(&result[offset], j->name, length * sizeof(char_t)); 6248 } 6249 } 6250 6251 assert(offset == 0); 6252 6253 return result; 6254 } 6255 #endif 6256 first_element_by_path(const char_t * path_,char_t delimiter) const6257 PUGI__FN xml_node xml_node::first_element_by_path(const char_t* path_, char_t delimiter) const 6258 { 6259 xml_node context = path_[0] == delimiter ? root() : *this; 6260 6261 if (!context._root) return xml_node(); 6262 6263 const char_t* path_segment = path_; 6264 6265 while (*path_segment == delimiter) ++path_segment; 6266 6267 const char_t* path_segment_end = path_segment; 6268 6269 while (*path_segment_end && *path_segment_end != delimiter) ++path_segment_end; 6270 6271 if (path_segment == path_segment_end) return context; 6272 6273 const char_t* next_segment = path_segment_end; 6274 6275 while (*next_segment == delimiter) ++next_segment; 6276 6277 if (*path_segment == '.' && path_segment + 1 == path_segment_end) 6278 return context.first_element_by_path(next_segment, delimiter); 6279 else if (*path_segment == '.' && *(path_segment+1) == '.' && path_segment + 2 == path_segment_end) 6280 return context.parent().first_element_by_path(next_segment, delimiter); 6281 else 6282 { 6283 for (xml_node_struct* j = context._root->first_child; j; j = j->next_sibling) 6284 { 6285 if (j->name && impl::strequalrange(j->name, path_segment, static_cast<size_t>(path_segment_end - path_segment))) 6286 { 6287 xml_node subsearch = xml_node(j).first_element_by_path(next_segment, delimiter); 6288 6289 if (subsearch) return subsearch; 6290 } 6291 } 6292 6293 return xml_node(); 6294 } 6295 } 6296 traverse(xml_tree_walker & walker)6297 PUGI__FN bool xml_node::traverse(xml_tree_walker& walker) 6298 { 6299 walker._depth = -1; 6300 6301 xml_node arg_begin(_root); 6302 if (!walker.begin(arg_begin)) return false; 6303 6304 xml_node_struct* cur = _root ? _root->first_child + 0 : 0; 6305 6306 if (cur) 6307 { 6308 ++walker._depth; 6309 6310 do 6311 { 6312 xml_node arg_for_each(cur); 6313 if (!walker.for_each(arg_for_each)) 6314 return false; 6315 6316 if (cur->first_child) 6317 { 6318 ++walker._depth; 6319 cur = cur->first_child; 6320 } 6321 else if (cur->next_sibling) 6322 cur = cur->next_sibling; 6323 else 6324 { 6325 while (!cur->next_sibling && cur != _root && cur->parent) 6326 { 6327 --walker._depth; 6328 cur = cur->parent; 6329 } 6330 6331 if (cur != _root) 6332 cur = cur->next_sibling; 6333 } 6334 } 6335 while (cur && cur != _root); 6336 } 6337 6338 assert(walker._depth == -1); 6339 6340 xml_node arg_end(_root); 6341 return walker.end(arg_end); 6342 } 6343 hash_value() const6344 PUGI__FN size_t xml_node::hash_value() const 6345 { 6346 return static_cast<size_t>(reinterpret_cast<uintptr_t>(_root) / sizeof(xml_node_struct)); 6347 } 6348 internal_object() const6349 PUGI__FN xml_node_struct* xml_node::internal_object() const 6350 { 6351 return _root; 6352 } 6353 print(xml_writer & writer,const char_t * indent,unsigned int flags,xml_encoding encoding,unsigned int depth) const6354 PUGI__FN void xml_node::print(xml_writer& writer, const char_t* indent, unsigned int flags, xml_encoding encoding, unsigned int depth) const 6355 { 6356 if (!_root) return; 6357 6358 impl::xml_buffered_writer buffered_writer(writer, encoding); 6359 6360 impl::node_output(buffered_writer, _root, indent, flags, depth); 6361 6362 buffered_writer.flush(); 6363 } 6364 6365 #ifndef PUGIXML_NO_STL print(std::basic_ostream<char,std::char_traits<char>> & stream,const char_t * indent,unsigned int flags,xml_encoding encoding,unsigned int depth) const6366 PUGI__FN void xml_node::print(std::basic_ostream<char, std::char_traits<char> >& stream, const char_t* indent, unsigned int flags, xml_encoding encoding, unsigned int depth) const 6367 { 6368 xml_writer_stream writer(stream); 6369 6370 print(writer, indent, flags, encoding, depth); 6371 } 6372 print(std::basic_ostream<wchar_t,std::char_traits<wchar_t>> & stream,const char_t * indent,unsigned int flags,unsigned int depth) const6373 PUGI__FN void xml_node::print(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream, const char_t* indent, unsigned int flags, unsigned int depth) const 6374 { 6375 xml_writer_stream writer(stream); 6376 6377 print(writer, indent, flags, encoding_wchar, depth); 6378 } 6379 #endif 6380 offset_debug() const6381 PUGI__FN ptrdiff_t xml_node::offset_debug() const 6382 { 6383 if (!_root) return -1; 6384 6385 impl::xml_document_struct& doc = impl::get_document(_root); 6386 6387 // we can determine the offset reliably only if there is exactly once parse buffer 6388 if (!doc.buffer || doc.extra_buffers) return -1; 6389 6390 switch (type()) 6391 { 6392 case node_document: 6393 return 0; 6394 6395 case node_element: 6396 case node_declaration: 6397 case node_pi: 6398 return _root->name && (_root->header & impl::xml_memory_page_name_allocated_or_shared_mask) == 0 ? _root->name - doc.buffer : -1; 6399 6400 case node_pcdata: 6401 case node_cdata: 6402 case node_comment: 6403 case node_doctype: 6404 return _root->value && (_root->header & impl::xml_memory_page_value_allocated_or_shared_mask) == 0 ? _root->value - doc.buffer : -1; 6405 6406 default: 6407 assert(false && "Invalid node type"); // unreachable 6408 return -1; 6409 } 6410 } 6411 6412 #ifdef __BORLANDC__ operator &&(const xml_node & lhs,bool rhs)6413 PUGI__FN bool operator&&(const xml_node& lhs, bool rhs) 6414 { 6415 return (bool)lhs && rhs; 6416 } 6417 operator ||(const xml_node & lhs,bool rhs)6418 PUGI__FN bool operator||(const xml_node& lhs, bool rhs) 6419 { 6420 return (bool)lhs || rhs; 6421 } 6422 #endif 6423 xml_text(xml_node_struct * root)6424 PUGI__FN xml_text::xml_text(xml_node_struct* root): _root(root) 6425 { 6426 } 6427 _data() const6428 PUGI__FN xml_node_struct* xml_text::_data() const 6429 { 6430 if (!_root || impl::is_text_node(_root)) return _root; 6431 6432 // element nodes can have value if parse_embed_pcdata was used 6433 if (PUGI__NODETYPE(_root) == node_element && _root->value) 6434 return _root; 6435 6436 for (xml_node_struct* node = _root->first_child; node; node = node->next_sibling) 6437 if (impl::is_text_node(node)) 6438 return node; 6439 6440 return 0; 6441 } 6442 _data_new()6443 PUGI__FN xml_node_struct* xml_text::_data_new() 6444 { 6445 xml_node_struct* d = _data(); 6446 if (d) return d; 6447 6448 return xml_node(_root).append_child(node_pcdata).internal_object(); 6449 } 6450 xml_text()6451 PUGI__FN xml_text::xml_text(): _root(0) 6452 { 6453 } 6454 unspecified_bool_xml_text(xml_text ***)6455 PUGI__FN static void unspecified_bool_xml_text(xml_text***) 6456 { 6457 } 6458 operator xml_text::unspecified_bool_type() const6459 PUGI__FN xml_text::operator xml_text::unspecified_bool_type() const 6460 { 6461 return _data() ? unspecified_bool_xml_text : 0; 6462 } 6463 operator !() const6464 PUGI__FN bool xml_text::operator!() const 6465 { 6466 return !_data(); 6467 } 6468 empty() const6469 PUGI__FN bool xml_text::empty() const 6470 { 6471 return _data() == 0; 6472 } 6473 get() const6474 PUGI__FN const char_t* xml_text::get() const 6475 { 6476 xml_node_struct* d = _data(); 6477 6478 return (d && d->value) ? d->value + 0 : PUGIXML_TEXT(""); 6479 } 6480 as_string(const char_t * def) const6481 PUGI__FN const char_t* xml_text::as_string(const char_t* def) const 6482 { 6483 xml_node_struct* d = _data(); 6484 6485 return (d && d->value) ? d->value + 0 : def; 6486 } 6487 as_int(int def) const6488 PUGI__FN int xml_text::as_int(int def) const 6489 { 6490 xml_node_struct* d = _data(); 6491 6492 return (d && d->value) ? impl::get_value_int(d->value) : def; 6493 } 6494 as_uint(unsigned int def) const6495 PUGI__FN unsigned int xml_text::as_uint(unsigned int def) const 6496 { 6497 xml_node_struct* d = _data(); 6498 6499 return (d && d->value) ? impl::get_value_uint(d->value) : def; 6500 } 6501 as_double(double def) const6502 PUGI__FN double xml_text::as_double(double def) const 6503 { 6504 xml_node_struct* d = _data(); 6505 6506 return (d && d->value) ? impl::get_value_double(d->value) : def; 6507 } 6508 as_float(float def) const6509 PUGI__FN float xml_text::as_float(float def) const 6510 { 6511 xml_node_struct* d = _data(); 6512 6513 return (d && d->value) ? impl::get_value_float(d->value) : def; 6514 } 6515 as_bool(bool def) const6516 PUGI__FN bool xml_text::as_bool(bool def) const 6517 { 6518 xml_node_struct* d = _data(); 6519 6520 return (d && d->value) ? impl::get_value_bool(d->value) : def; 6521 } 6522 6523 #ifdef PUGIXML_HAS_LONG_LONG as_llong(long long def) const6524 PUGI__FN long long xml_text::as_llong(long long def) const 6525 { 6526 xml_node_struct* d = _data(); 6527 6528 return (d && d->value) ? impl::get_value_llong(d->value) : def; 6529 } 6530 as_ullong(unsigned long long def) const6531 PUGI__FN unsigned long long xml_text::as_ullong(unsigned long long def) const 6532 { 6533 xml_node_struct* d = _data(); 6534 6535 return (d && d->value) ? impl::get_value_ullong(d->value) : def; 6536 } 6537 #endif 6538 set(const char_t * rhs)6539 PUGI__FN bool xml_text::set(const char_t* rhs) 6540 { 6541 xml_node_struct* dn = _data_new(); 6542 6543 return dn ? impl::strcpy_insitu(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, impl::strlength(rhs)) : false; 6544 } 6545 set(int rhs)6546 PUGI__FN bool xml_text::set(int rhs) 6547 { 6548 xml_node_struct* dn = _data_new(); 6549 6550 return dn ? impl::set_value_integer<unsigned int>(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0) : false; 6551 } 6552 set(unsigned int rhs)6553 PUGI__FN bool xml_text::set(unsigned int rhs) 6554 { 6555 xml_node_struct* dn = _data_new(); 6556 6557 return dn ? impl::set_value_integer<unsigned int>(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, false) : false; 6558 } 6559 set(long rhs)6560 PUGI__FN bool xml_text::set(long rhs) 6561 { 6562 xml_node_struct* dn = _data_new(); 6563 6564 return dn ? impl::set_value_integer<unsigned long>(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0) : false; 6565 } 6566 set(unsigned long rhs)6567 PUGI__FN bool xml_text::set(unsigned long rhs) 6568 { 6569 xml_node_struct* dn = _data_new(); 6570 6571 return dn ? impl::set_value_integer<unsigned long>(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, false) : false; 6572 } 6573 set(float rhs)6574 PUGI__FN bool xml_text::set(float rhs) 6575 { 6576 xml_node_struct* dn = _data_new(); 6577 6578 return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, default_float_precision) : false; 6579 } 6580 set(float rhs,int precision)6581 PUGI__FN bool xml_text::set(float rhs, int precision) 6582 { 6583 xml_node_struct* dn = _data_new(); 6584 6585 return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, precision) : false; 6586 } 6587 set(double rhs)6588 PUGI__FN bool xml_text::set(double rhs) 6589 { 6590 xml_node_struct* dn = _data_new(); 6591 6592 return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, default_double_precision) : false; 6593 } 6594 set(double rhs,int precision)6595 PUGI__FN bool xml_text::set(double rhs, int precision) 6596 { 6597 xml_node_struct* dn = _data_new(); 6598 6599 return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, precision) : false; 6600 } 6601 set(bool rhs)6602 PUGI__FN bool xml_text::set(bool rhs) 6603 { 6604 xml_node_struct* dn = _data_new(); 6605 6606 return dn ? impl::set_value_bool(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false; 6607 } 6608 6609 #ifdef PUGIXML_HAS_LONG_LONG set(long long rhs)6610 PUGI__FN bool xml_text::set(long long rhs) 6611 { 6612 xml_node_struct* dn = _data_new(); 6613 6614 return dn ? impl::set_value_integer<unsigned long long>(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0) : false; 6615 } 6616 set(unsigned long long rhs)6617 PUGI__FN bool xml_text::set(unsigned long long rhs) 6618 { 6619 xml_node_struct* dn = _data_new(); 6620 6621 return dn ? impl::set_value_integer<unsigned long long>(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, false) : false; 6622 } 6623 #endif 6624 operator =(const char_t * rhs)6625 PUGI__FN xml_text& xml_text::operator=(const char_t* rhs) 6626 { 6627 set(rhs); 6628 return *this; 6629 } 6630 operator =(int rhs)6631 PUGI__FN xml_text& xml_text::operator=(int rhs) 6632 { 6633 set(rhs); 6634 return *this; 6635 } 6636 operator =(unsigned int rhs)6637 PUGI__FN xml_text& xml_text::operator=(unsigned int rhs) 6638 { 6639 set(rhs); 6640 return *this; 6641 } 6642 operator =(long rhs)6643 PUGI__FN xml_text& xml_text::operator=(long rhs) 6644 { 6645 set(rhs); 6646 return *this; 6647 } 6648 operator =(unsigned long rhs)6649 PUGI__FN xml_text& xml_text::operator=(unsigned long rhs) 6650 { 6651 set(rhs); 6652 return *this; 6653 } 6654 operator =(double rhs)6655 PUGI__FN xml_text& xml_text::operator=(double rhs) 6656 { 6657 set(rhs); 6658 return *this; 6659 } 6660 operator =(float rhs)6661 PUGI__FN xml_text& xml_text::operator=(float rhs) 6662 { 6663 set(rhs); 6664 return *this; 6665 } 6666 operator =(bool rhs)6667 PUGI__FN xml_text& xml_text::operator=(bool rhs) 6668 { 6669 set(rhs); 6670 return *this; 6671 } 6672 6673 #ifdef PUGIXML_HAS_LONG_LONG operator =(long long rhs)6674 PUGI__FN xml_text& xml_text::operator=(long long rhs) 6675 { 6676 set(rhs); 6677 return *this; 6678 } 6679 operator =(unsigned long long rhs)6680 PUGI__FN xml_text& xml_text::operator=(unsigned long long rhs) 6681 { 6682 set(rhs); 6683 return *this; 6684 } 6685 #endif 6686 data() const6687 PUGI__FN xml_node xml_text::data() const 6688 { 6689 return xml_node(_data()); 6690 } 6691 6692 #ifdef __BORLANDC__ operator &&(const xml_text & lhs,bool rhs)6693 PUGI__FN bool operator&&(const xml_text& lhs, bool rhs) 6694 { 6695 return (bool)lhs && rhs; 6696 } 6697 operator ||(const xml_text & lhs,bool rhs)6698 PUGI__FN bool operator||(const xml_text& lhs, bool rhs) 6699 { 6700 return (bool)lhs || rhs; 6701 } 6702 #endif 6703 xml_node_iterator()6704 PUGI__FN xml_node_iterator::xml_node_iterator() 6705 { 6706 } 6707 xml_node_iterator(const xml_node & node)6708 PUGI__FN xml_node_iterator::xml_node_iterator(const xml_node& node): _wrap(node), _parent(node.parent()) 6709 { 6710 } 6711 xml_node_iterator(xml_node_struct * ref,xml_node_struct * parent)6712 PUGI__FN xml_node_iterator::xml_node_iterator(xml_node_struct* ref, xml_node_struct* parent): _wrap(ref), _parent(parent) 6713 { 6714 } 6715 operator ==(const xml_node_iterator & rhs) const6716 PUGI__FN bool xml_node_iterator::operator==(const xml_node_iterator& rhs) const 6717 { 6718 return _wrap._root == rhs._wrap._root && _parent._root == rhs._parent._root; 6719 } 6720 operator !=(const xml_node_iterator & rhs) const6721 PUGI__FN bool xml_node_iterator::operator!=(const xml_node_iterator& rhs) const 6722 { 6723 return _wrap._root != rhs._wrap._root || _parent._root != rhs._parent._root; 6724 } 6725 operator *() const6726 PUGI__FN xml_node& xml_node_iterator::operator*() const 6727 { 6728 assert(_wrap._root); 6729 return _wrap; 6730 } 6731 operator ->() const6732 PUGI__FN xml_node* xml_node_iterator::operator->() const 6733 { 6734 assert(_wrap._root); 6735 return const_cast<xml_node*>(&_wrap); // BCC5 workaround 6736 } 6737 operator ++()6738 PUGI__FN const xml_node_iterator& xml_node_iterator::operator++() 6739 { 6740 assert(_wrap._root); 6741 _wrap._root = _wrap._root->next_sibling; 6742 return *this; 6743 } 6744 operator ++(int)6745 PUGI__FN xml_node_iterator xml_node_iterator::operator++(int) 6746 { 6747 xml_node_iterator temp = *this; 6748 ++*this; 6749 return temp; 6750 } 6751 operator --()6752 PUGI__FN const xml_node_iterator& xml_node_iterator::operator--() 6753 { 6754 _wrap = _wrap._root ? _wrap.previous_sibling() : _parent.last_child(); 6755 return *this; 6756 } 6757 operator --(int)6758 PUGI__FN xml_node_iterator xml_node_iterator::operator--(int) 6759 { 6760 xml_node_iterator temp = *this; 6761 --*this; 6762 return temp; 6763 } 6764 xml_attribute_iterator()6765 PUGI__FN xml_attribute_iterator::xml_attribute_iterator() 6766 { 6767 } 6768 xml_attribute_iterator(const xml_attribute & attr,const xml_node & parent)6769 PUGI__FN xml_attribute_iterator::xml_attribute_iterator(const xml_attribute& attr, const xml_node& parent): _wrap(attr), _parent(parent) 6770 { 6771 } 6772 xml_attribute_iterator(xml_attribute_struct * ref,xml_node_struct * parent)6773 PUGI__FN xml_attribute_iterator::xml_attribute_iterator(xml_attribute_struct* ref, xml_node_struct* parent): _wrap(ref), _parent(parent) 6774 { 6775 } 6776 operator ==(const xml_attribute_iterator & rhs) const6777 PUGI__FN bool xml_attribute_iterator::operator==(const xml_attribute_iterator& rhs) const 6778 { 6779 return _wrap._attr == rhs._wrap._attr && _parent._root == rhs._parent._root; 6780 } 6781 operator !=(const xml_attribute_iterator & rhs) const6782 PUGI__FN bool xml_attribute_iterator::operator!=(const xml_attribute_iterator& rhs) const 6783 { 6784 return _wrap._attr != rhs._wrap._attr || _parent._root != rhs._parent._root; 6785 } 6786 operator *() const6787 PUGI__FN xml_attribute& xml_attribute_iterator::operator*() const 6788 { 6789 assert(_wrap._attr); 6790 return _wrap; 6791 } 6792 operator ->() const6793 PUGI__FN xml_attribute* xml_attribute_iterator::operator->() const 6794 { 6795 assert(_wrap._attr); 6796 return const_cast<xml_attribute*>(&_wrap); // BCC5 workaround 6797 } 6798 operator ++()6799 PUGI__FN const xml_attribute_iterator& xml_attribute_iterator::operator++() 6800 { 6801 assert(_wrap._attr); 6802 _wrap._attr = _wrap._attr->next_attribute; 6803 return *this; 6804 } 6805 operator ++(int)6806 PUGI__FN xml_attribute_iterator xml_attribute_iterator::operator++(int) 6807 { 6808 xml_attribute_iterator temp = *this; 6809 ++*this; 6810 return temp; 6811 } 6812 operator --()6813 PUGI__FN const xml_attribute_iterator& xml_attribute_iterator::operator--() 6814 { 6815 _wrap = _wrap._attr ? _wrap.previous_attribute() : _parent.last_attribute(); 6816 return *this; 6817 } 6818 operator --(int)6819 PUGI__FN xml_attribute_iterator xml_attribute_iterator::operator--(int) 6820 { 6821 xml_attribute_iterator temp = *this; 6822 --*this; 6823 return temp; 6824 } 6825 xml_named_node_iterator()6826 PUGI__FN xml_named_node_iterator::xml_named_node_iterator(): _name(0) 6827 { 6828 } 6829 xml_named_node_iterator(const xml_node & node,const char_t * name)6830 PUGI__FN xml_named_node_iterator::xml_named_node_iterator(const xml_node& node, const char_t* name): _wrap(node), _parent(node.parent()), _name(name) 6831 { 6832 } 6833 xml_named_node_iterator(xml_node_struct * ref,xml_node_struct * parent,const char_t * name)6834 PUGI__FN xml_named_node_iterator::xml_named_node_iterator(xml_node_struct* ref, xml_node_struct* parent, const char_t* name): _wrap(ref), _parent(parent), _name(name) 6835 { 6836 } 6837 operator ==(const xml_named_node_iterator & rhs) const6838 PUGI__FN bool xml_named_node_iterator::operator==(const xml_named_node_iterator& rhs) const 6839 { 6840 return _wrap._root == rhs._wrap._root && _parent._root == rhs._parent._root; 6841 } 6842 operator !=(const xml_named_node_iterator & rhs) const6843 PUGI__FN bool xml_named_node_iterator::operator!=(const xml_named_node_iterator& rhs) const 6844 { 6845 return _wrap._root != rhs._wrap._root || _parent._root != rhs._parent._root; 6846 } 6847 operator *() const6848 PUGI__FN xml_node& xml_named_node_iterator::operator*() const 6849 { 6850 assert(_wrap._root); 6851 return _wrap; 6852 } 6853 operator ->() const6854 PUGI__FN xml_node* xml_named_node_iterator::operator->() const 6855 { 6856 assert(_wrap._root); 6857 return const_cast<xml_node*>(&_wrap); // BCC5 workaround 6858 } 6859 operator ++()6860 PUGI__FN const xml_named_node_iterator& xml_named_node_iterator::operator++() 6861 { 6862 assert(_wrap._root); 6863 _wrap = _wrap.next_sibling(_name); 6864 return *this; 6865 } 6866 operator ++(int)6867 PUGI__FN xml_named_node_iterator xml_named_node_iterator::operator++(int) 6868 { 6869 xml_named_node_iterator temp = *this; 6870 ++*this; 6871 return temp; 6872 } 6873 operator --()6874 PUGI__FN const xml_named_node_iterator& xml_named_node_iterator::operator--() 6875 { 6876 if (_wrap._root) 6877 _wrap = _wrap.previous_sibling(_name); 6878 else 6879 { 6880 _wrap = _parent.last_child(); 6881 6882 if (!impl::strequal(_wrap.name(), _name)) 6883 _wrap = _wrap.previous_sibling(_name); 6884 } 6885 6886 return *this; 6887 } 6888 operator --(int)6889 PUGI__FN xml_named_node_iterator xml_named_node_iterator::operator--(int) 6890 { 6891 xml_named_node_iterator temp = *this; 6892 --*this; 6893 return temp; 6894 } 6895 xml_parse_result()6896 PUGI__FN xml_parse_result::xml_parse_result(): status(status_internal_error), offset(0), encoding(encoding_auto) 6897 { 6898 } 6899 operator bool() const6900 PUGI__FN xml_parse_result::operator bool() const 6901 { 6902 return status == status_ok; 6903 } 6904 description() const6905 PUGI__FN const char* xml_parse_result::description() const 6906 { 6907 switch (status) 6908 { 6909 case status_ok: return "No error"; 6910 6911 case status_file_not_found: return "File was not found"; 6912 case status_io_error: return "Error reading from file/stream"; 6913 case status_out_of_memory: return "Could not allocate memory"; 6914 case status_internal_error: return "Internal error occurred"; 6915 6916 case status_unrecognized_tag: return "Could not determine tag type"; 6917 6918 case status_bad_pi: return "Error parsing document declaration/processing instruction"; 6919 case status_bad_comment: return "Error parsing comment"; 6920 case status_bad_cdata: return "Error parsing CDATA section"; 6921 case status_bad_doctype: return "Error parsing document type declaration"; 6922 case status_bad_pcdata: return "Error parsing PCDATA section"; 6923 case status_bad_start_element: return "Error parsing start element tag"; 6924 case status_bad_attribute: return "Error parsing element attribute"; 6925 case status_bad_end_element: return "Error parsing end element tag"; 6926 case status_end_element_mismatch: return "Start-end tags mismatch"; 6927 6928 case status_append_invalid_root: return "Unable to append nodes: root is not an element or document"; 6929 6930 case status_no_document_element: return "No document element found"; 6931 6932 default: return "Unknown error"; 6933 } 6934 } 6935 xml_document()6936 PUGI__FN xml_document::xml_document(): _buffer(0) 6937 { 6938 _create(); 6939 } 6940 ~xml_document()6941 PUGI__FN xml_document::~xml_document() 6942 { 6943 _destroy(); 6944 } 6945 6946 #ifdef PUGIXML_HAS_MOVE xml_document(xml_document && rhs)6947 PUGI__FN xml_document::xml_document(xml_document&& rhs) PUGIXML_NOEXCEPT_IF_NOT_COMPACT: _buffer(0) 6948 { 6949 _create(); 6950 _move(rhs); 6951 } 6952 operator =(xml_document && rhs)6953 PUGI__FN xml_document& xml_document::operator=(xml_document&& rhs) PUGIXML_NOEXCEPT_IF_NOT_COMPACT 6954 { 6955 if (this == &rhs) return *this; 6956 6957 _destroy(); 6958 _create(); 6959 _move(rhs); 6960 6961 return *this; 6962 } 6963 #endif 6964 reset()6965 PUGI__FN void xml_document::reset() 6966 { 6967 _destroy(); 6968 _create(); 6969 } 6970 reset(const xml_document & proto)6971 PUGI__FN void xml_document::reset(const xml_document& proto) 6972 { 6973 reset(); 6974 6975 impl::node_copy_tree(_root, proto._root); 6976 } 6977 _create()6978 PUGI__FN void xml_document::_create() 6979 { 6980 assert(!_root); 6981 6982 #ifdef PUGIXML_COMPACT 6983 // space for page marker for the first page (uint32_t), rounded up to pointer size; assumes pointers are at least 32-bit 6984 const size_t page_offset = sizeof(void*); 6985 #else 6986 const size_t page_offset = 0; 6987 #endif 6988 6989 // initialize sentinel page 6990 PUGI__STATIC_ASSERT(sizeof(impl::xml_memory_page) + sizeof(impl::xml_document_struct) + page_offset <= sizeof(_memory)); 6991 6992 // prepare page structure 6993 impl::xml_memory_page* page = impl::xml_memory_page::construct(_memory); 6994 assert(page); 6995 6996 page->busy_size = impl::xml_memory_page_size; 6997 6998 // setup first page marker 6999 #ifdef PUGIXML_COMPACT 7000 // round-trip through void* to avoid 'cast increases required alignment of target type' warning 7001 page->compact_page_marker = reinterpret_cast<uint32_t*>(static_cast<void*>(reinterpret_cast<char*>(page) + sizeof(impl::xml_memory_page))); 7002 *page->compact_page_marker = sizeof(impl::xml_memory_page); 7003 #endif 7004 7005 // allocate new root 7006 _root = new (reinterpret_cast<char*>(page) + sizeof(impl::xml_memory_page) + page_offset) impl::xml_document_struct(page); 7007 _root->prev_sibling_c = _root; 7008 7009 // setup sentinel page 7010 page->allocator = static_cast<impl::xml_document_struct*>(_root); 7011 7012 // setup hash table pointer in allocator 7013 #ifdef PUGIXML_COMPACT 7014 page->allocator->_hash = &static_cast<impl::xml_document_struct*>(_root)->hash; 7015 #endif 7016 7017 // verify the document allocation 7018 assert(reinterpret_cast<char*>(_root) + sizeof(impl::xml_document_struct) <= _memory + sizeof(_memory)); 7019 } 7020 _destroy()7021 PUGI__FN void xml_document::_destroy() 7022 { 7023 assert(_root); 7024 7025 // destroy static storage 7026 if (_buffer) 7027 { 7028 impl::xml_memory::deallocate(_buffer); 7029 _buffer = 0; 7030 } 7031 7032 // destroy extra buffers (note: no need to destroy linked list nodes, they're allocated using document allocator) 7033 for (impl::xml_extra_buffer* extra = static_cast<impl::xml_document_struct*>(_root)->extra_buffers; extra; extra = extra->next) 7034 { 7035 if (extra->buffer) impl::xml_memory::deallocate(extra->buffer); 7036 } 7037 7038 // destroy dynamic storage, leave sentinel page (it's in static memory) 7039 impl::xml_memory_page* root_page = PUGI__GETPAGE(_root); 7040 assert(root_page && !root_page->prev); 7041 assert(reinterpret_cast<char*>(root_page) >= _memory && reinterpret_cast<char*>(root_page) < _memory + sizeof(_memory)); 7042 7043 for (impl::xml_memory_page* page = root_page->next; page; ) 7044 { 7045 impl::xml_memory_page* next = page->next; 7046 7047 impl::xml_allocator::deallocate_page(page); 7048 7049 page = next; 7050 } 7051 7052 #ifdef PUGIXML_COMPACT 7053 // destroy hash table 7054 static_cast<impl::xml_document_struct*>(_root)->hash.clear(); 7055 #endif 7056 7057 _root = 0; 7058 } 7059 7060 #ifdef PUGIXML_HAS_MOVE _move(xml_document & rhs)7061 PUGI__FN void xml_document::_move(xml_document& rhs) PUGIXML_NOEXCEPT_IF_NOT_COMPACT 7062 { 7063 impl::xml_document_struct* doc = static_cast<impl::xml_document_struct*>(_root); 7064 impl::xml_document_struct* other = static_cast<impl::xml_document_struct*>(rhs._root); 7065 7066 // save first child pointer for later; this needs hash access 7067 xml_node_struct* other_first_child = other->first_child; 7068 7069 #ifdef PUGIXML_COMPACT 7070 // reserve space for the hash table up front; this is the only operation that can fail 7071 // if it does, we have no choice but to throw (if we have exceptions) 7072 if (other_first_child) 7073 { 7074 size_t other_children = 0; 7075 for (xml_node_struct* node = other_first_child; node; node = node->next_sibling) 7076 other_children++; 7077 7078 // in compact mode, each pointer assignment could result in a hash table request 7079 // during move, we have to relocate document first_child and parents of all children 7080 // normally there's just one child and its parent has a pointerless encoding but 7081 // we assume the worst here 7082 if (!other->_hash->reserve(other_children + 1)) 7083 { 7084 #ifdef PUGIXML_NO_EXCEPTIONS 7085 return; 7086 #else 7087 throw std::bad_alloc(); 7088 #endif 7089 } 7090 } 7091 #endif 7092 7093 // move allocation state 7094 doc->_root = other->_root; 7095 doc->_busy_size = other->_busy_size; 7096 7097 // move buffer state 7098 doc->buffer = other->buffer; 7099 doc->extra_buffers = other->extra_buffers; 7100 _buffer = rhs._buffer; 7101 7102 #ifdef PUGIXML_COMPACT 7103 // move compact hash; note that the hash table can have pointers to other but they will be "inactive", similarly to nodes removed with remove_child 7104 doc->hash = other->hash; 7105 doc->_hash = &doc->hash; 7106 7107 // make sure we don't access other hash up until the end when we reinitialize other document 7108 other->_hash = 0; 7109 #endif 7110 7111 // move page structure 7112 impl::xml_memory_page* doc_page = PUGI__GETPAGE(doc); 7113 assert(doc_page && !doc_page->prev && !doc_page->next); 7114 7115 impl::xml_memory_page* other_page = PUGI__GETPAGE(other); 7116 assert(other_page && !other_page->prev); 7117 7118 // relink pages since root page is embedded into xml_document 7119 if (impl::xml_memory_page* page = other_page->next) 7120 { 7121 assert(page->prev == other_page); 7122 7123 page->prev = doc_page; 7124 7125 doc_page->next = page; 7126 other_page->next = 0; 7127 } 7128 7129 // make sure pages point to the correct document state 7130 for (impl::xml_memory_page* page = doc_page->next; page; page = page->next) 7131 { 7132 assert(page->allocator == other); 7133 7134 page->allocator = doc; 7135 7136 #ifdef PUGIXML_COMPACT 7137 // this automatically migrates most children between documents and prevents ->parent assignment from allocating 7138 if (page->compact_shared_parent == other) 7139 page->compact_shared_parent = doc; 7140 #endif 7141 } 7142 7143 // move tree structure 7144 assert(!doc->first_child); 7145 7146 doc->first_child = other_first_child; 7147 7148 for (xml_node_struct* node = other_first_child; node; node = node->next_sibling) 7149 { 7150 #ifdef PUGIXML_COMPACT 7151 // most children will have migrated when we reassigned compact_shared_parent 7152 assert(node->parent == other || node->parent == doc); 7153 7154 node->parent = doc; 7155 #else 7156 assert(node->parent == other); 7157 node->parent = doc; 7158 #endif 7159 } 7160 7161 // reset other document 7162 new (other) impl::xml_document_struct(PUGI__GETPAGE(other)); 7163 rhs._buffer = 0; 7164 } 7165 #endif 7166 7167 #ifndef PUGIXML_NO_STL load(std::basic_istream<char,std::char_traits<char>> & stream,unsigned int options,xml_encoding encoding)7168 PUGI__FN xml_parse_result xml_document::load(std::basic_istream<char, std::char_traits<char> >& stream, unsigned int options, xml_encoding encoding) 7169 { 7170 reset(); 7171 7172 return impl::load_stream_impl(static_cast<impl::xml_document_struct*>(_root), stream, options, encoding, &_buffer); 7173 } 7174 load(std::basic_istream<wchar_t,std::char_traits<wchar_t>> & stream,unsigned int options)7175 PUGI__FN xml_parse_result xml_document::load(std::basic_istream<wchar_t, std::char_traits<wchar_t> >& stream, unsigned int options) 7176 { 7177 reset(); 7178 7179 return impl::load_stream_impl(static_cast<impl::xml_document_struct*>(_root), stream, options, encoding_wchar, &_buffer); 7180 } 7181 #endif 7182 load_string(const char_t * contents,unsigned int options)7183 PUGI__FN xml_parse_result xml_document::load_string(const char_t* contents, unsigned int options) 7184 { 7185 // Force native encoding (skip autodetection) 7186 #ifdef PUGIXML_WCHAR_MODE 7187 xml_encoding encoding = encoding_wchar; 7188 #else 7189 xml_encoding encoding = encoding_utf8; 7190 #endif 7191 7192 return load_buffer(contents, impl::strlength(contents) * sizeof(char_t), options, encoding); 7193 } 7194 load(const char_t * contents,unsigned int options)7195 PUGI__FN xml_parse_result xml_document::load(const char_t* contents, unsigned int options) 7196 { 7197 return load_string(contents, options); 7198 } 7199 load_file(const char * path_,unsigned int options,xml_encoding encoding)7200 PUGI__FN xml_parse_result xml_document::load_file(const char* path_, unsigned int options, xml_encoding encoding) 7201 { 7202 reset(); 7203 7204 using impl::auto_deleter; // MSVC7 workaround 7205 auto_deleter<FILE> file(impl::open_file(path_, "rb"), impl::close_file); 7206 7207 return impl::load_file_impl(static_cast<impl::xml_document_struct*>(_root), file.data, options, encoding, &_buffer); 7208 } 7209 load_file(const wchar_t * path_,unsigned int options,xml_encoding encoding)7210 PUGI__FN xml_parse_result xml_document::load_file(const wchar_t* path_, unsigned int options, xml_encoding encoding) 7211 { 7212 reset(); 7213 7214 using impl::auto_deleter; // MSVC7 workaround 7215 auto_deleter<FILE> file(impl::open_file_wide(path_, L"rb"), impl::close_file); 7216 7217 return impl::load_file_impl(static_cast<impl::xml_document_struct*>(_root), file.data, options, encoding, &_buffer); 7218 } 7219 load_buffer(const void * contents,size_t size,unsigned int options,xml_encoding encoding)7220 PUGI__FN xml_parse_result xml_document::load_buffer(const void* contents, size_t size, unsigned int options, xml_encoding encoding) 7221 { 7222 reset(); 7223 7224 return impl::load_buffer_impl(static_cast<impl::xml_document_struct*>(_root), _root, const_cast<void*>(contents), size, options, encoding, false, false, &_buffer); 7225 } 7226 load_buffer_inplace(void * contents,size_t size,unsigned int options,xml_encoding encoding)7227 PUGI__FN xml_parse_result xml_document::load_buffer_inplace(void* contents, size_t size, unsigned int options, xml_encoding encoding) 7228 { 7229 reset(); 7230 7231 return impl::load_buffer_impl(static_cast<impl::xml_document_struct*>(_root), _root, contents, size, options, encoding, true, false, &_buffer); 7232 } 7233 load_buffer_inplace_own(void * contents,size_t size,unsigned int options,xml_encoding encoding)7234 PUGI__FN xml_parse_result xml_document::load_buffer_inplace_own(void* contents, size_t size, unsigned int options, xml_encoding encoding) 7235 { 7236 reset(); 7237 7238 return impl::load_buffer_impl(static_cast<impl::xml_document_struct*>(_root), _root, contents, size, options, encoding, true, true, &_buffer); 7239 } 7240 save(xml_writer & writer,const char_t * indent,unsigned int flags,xml_encoding encoding) const7241 PUGI__FN void xml_document::save(xml_writer& writer, const char_t* indent, unsigned int flags, xml_encoding encoding) const 7242 { 7243 impl::xml_buffered_writer buffered_writer(writer, encoding); 7244 7245 if ((flags & format_write_bom) && encoding != encoding_latin1) 7246 { 7247 // BOM always represents the codepoint U+FEFF, so just write it in native encoding 7248 #ifdef PUGIXML_WCHAR_MODE 7249 unsigned int bom = 0xfeff; 7250 buffered_writer.write(static_cast<wchar_t>(bom)); 7251 #else 7252 buffered_writer.write('\xef', '\xbb', '\xbf'); 7253 #endif 7254 } 7255 7256 if (!(flags & format_no_declaration) && !impl::has_declaration(_root)) 7257 { 7258 buffered_writer.write_string(PUGIXML_TEXT("<?xml version=\"1.0\"")); 7259 if (encoding == encoding_latin1) buffered_writer.write_string(PUGIXML_TEXT(" encoding=\"ISO-8859-1\"")); 7260 buffered_writer.write('?', '>'); 7261 if (!(flags & format_raw)) buffered_writer.write('\n'); 7262 } 7263 7264 impl::node_output(buffered_writer, _root, indent, flags, 0); 7265 7266 buffered_writer.flush(); 7267 } 7268 7269 #ifndef PUGIXML_NO_STL save(std::basic_ostream<char,std::char_traits<char>> & stream,const char_t * indent,unsigned int flags,xml_encoding encoding) const7270 PUGI__FN void xml_document::save(std::basic_ostream<char, std::char_traits<char> >& stream, const char_t* indent, unsigned int flags, xml_encoding encoding) const 7271 { 7272 xml_writer_stream writer(stream); 7273 7274 save(writer, indent, flags, encoding); 7275 } 7276 save(std::basic_ostream<wchar_t,std::char_traits<wchar_t>> & stream,const char_t * indent,unsigned int flags) const7277 PUGI__FN void xml_document::save(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream, const char_t* indent, unsigned int flags) const 7278 { 7279 xml_writer_stream writer(stream); 7280 7281 save(writer, indent, flags, encoding_wchar); 7282 } 7283 #endif 7284 save_file(const char * path_,const char_t * indent,unsigned int flags,xml_encoding encoding) const7285 PUGI__FN bool xml_document::save_file(const char* path_, const char_t* indent, unsigned int flags, xml_encoding encoding) const 7286 { 7287 using impl::auto_deleter; // MSVC7 workaround 7288 auto_deleter<FILE> file(impl::open_file(path_, (flags & format_save_file_text) ? "w" : "wb"), impl::close_file); 7289 7290 return impl::save_file_impl(*this, file.data, indent, flags, encoding); 7291 } 7292 save_file(const wchar_t * path_,const char_t * indent,unsigned int flags,xml_encoding encoding) const7293 PUGI__FN bool xml_document::save_file(const wchar_t* path_, const char_t* indent, unsigned int flags, xml_encoding encoding) const 7294 { 7295 using impl::auto_deleter; // MSVC7 workaround 7296 auto_deleter<FILE> file(impl::open_file_wide(path_, (flags & format_save_file_text) ? L"w" : L"wb"), impl::close_file); 7297 7298 return impl::save_file_impl(*this, file.data, indent, flags, encoding); 7299 } 7300 document_element() const7301 PUGI__FN xml_node xml_document::document_element() const 7302 { 7303 assert(_root); 7304 7305 for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling) 7306 if (PUGI__NODETYPE(i) == node_element) 7307 return xml_node(i); 7308 7309 return xml_node(); 7310 } 7311 7312 #ifndef PUGIXML_NO_STL as_utf8(const wchar_t * str)7313 PUGI__FN std::string PUGIXML_FUNCTION as_utf8(const wchar_t* str) 7314 { 7315 assert(str); 7316 7317 return impl::as_utf8_impl(str, impl::strlength_wide(str)); 7318 } 7319 as_utf8(const std::basic_string<wchar_t> & str)7320 PUGI__FN std::string PUGIXML_FUNCTION as_utf8(const std::basic_string<wchar_t>& str) 7321 { 7322 return impl::as_utf8_impl(str.c_str(), str.size()); 7323 } 7324 as_wide(const char * str)7325 PUGI__FN std::basic_string<wchar_t> PUGIXML_FUNCTION as_wide(const char* str) 7326 { 7327 assert(str); 7328 7329 return impl::as_wide_impl(str, strlen(str)); 7330 } 7331 as_wide(const std::string & str)7332 PUGI__FN std::basic_string<wchar_t> PUGIXML_FUNCTION as_wide(const std::string& str) 7333 { 7334 return impl::as_wide_impl(str.c_str(), str.size()); 7335 } 7336 #endif 7337 set_memory_management_functions(allocation_function allocate,deallocation_function deallocate)7338 PUGI__FN void PUGIXML_FUNCTION set_memory_management_functions(allocation_function allocate, deallocation_function deallocate) 7339 { 7340 impl::xml_memory::allocate = allocate; 7341 impl::xml_memory::deallocate = deallocate; 7342 } 7343 get_memory_allocation_function()7344 PUGI__FN allocation_function PUGIXML_FUNCTION get_memory_allocation_function() 7345 { 7346 return impl::xml_memory::allocate; 7347 } 7348 get_memory_deallocation_function()7349 PUGI__FN deallocation_function PUGIXML_FUNCTION get_memory_deallocation_function() 7350 { 7351 return impl::xml_memory::deallocate; 7352 } 7353 } 7354 7355 #if !defined(PUGIXML_NO_STL) && (defined(_MSC_VER) || defined(__ICC)) 7356 namespace std 7357 { 7358 // Workarounds for (non-standard) iterator category detection for older versions (MSVC7/IC8 and earlier) _Iter_cat(const pugi::xml_node_iterator &)7359 PUGI__FN std::bidirectional_iterator_tag _Iter_cat(const pugi::xml_node_iterator&) 7360 { 7361 return std::bidirectional_iterator_tag(); 7362 } 7363 _Iter_cat(const pugi::xml_attribute_iterator &)7364 PUGI__FN std::bidirectional_iterator_tag _Iter_cat(const pugi::xml_attribute_iterator&) 7365 { 7366 return std::bidirectional_iterator_tag(); 7367 } 7368 _Iter_cat(const pugi::xml_named_node_iterator &)7369 PUGI__FN std::bidirectional_iterator_tag _Iter_cat(const pugi::xml_named_node_iterator&) 7370 { 7371 return std::bidirectional_iterator_tag(); 7372 } 7373 } 7374 #endif 7375 7376 #if !defined(PUGIXML_NO_STL) && defined(__SUNPRO_CC) 7377 namespace std 7378 { 7379 // Workarounds for (non-standard) iterator category detection __iterator_category(const pugi::xml_node_iterator &)7380 PUGI__FN std::bidirectional_iterator_tag __iterator_category(const pugi::xml_node_iterator&) 7381 { 7382 return std::bidirectional_iterator_tag(); 7383 } 7384 __iterator_category(const pugi::xml_attribute_iterator &)7385 PUGI__FN std::bidirectional_iterator_tag __iterator_category(const pugi::xml_attribute_iterator&) 7386 { 7387 return std::bidirectional_iterator_tag(); 7388 } 7389 __iterator_category(const pugi::xml_named_node_iterator &)7390 PUGI__FN std::bidirectional_iterator_tag __iterator_category(const pugi::xml_named_node_iterator&) 7391 { 7392 return std::bidirectional_iterator_tag(); 7393 } 7394 } 7395 #endif 7396 7397 #ifndef PUGIXML_NO_XPATH 7398 // STL replacements 7399 PUGI__NS_BEGIN 7400 struct equal_to 7401 { operator ()equal_to7402 template <typename T> bool operator()(const T& lhs, const T& rhs) const 7403 { 7404 return lhs == rhs; 7405 } 7406 }; 7407 7408 struct not_equal_to 7409 { operator ()not_equal_to7410 template <typename T> bool operator()(const T& lhs, const T& rhs) const 7411 { 7412 return lhs != rhs; 7413 } 7414 }; 7415 7416 struct less 7417 { operator ()less7418 template <typename T> bool operator()(const T& lhs, const T& rhs) const 7419 { 7420 return lhs < rhs; 7421 } 7422 }; 7423 7424 struct less_equal 7425 { operator ()less_equal7426 template <typename T> bool operator()(const T& lhs, const T& rhs) const 7427 { 7428 return lhs <= rhs; 7429 } 7430 }; 7431 swap(T & lhs,T & rhs)7432 template <typename T> inline void swap(T& lhs, T& rhs) 7433 { 7434 T temp = lhs; 7435 lhs = rhs; 7436 rhs = temp; 7437 } 7438 min_element(I begin,I end,const Pred & pred)7439 template <typename I, typename Pred> PUGI__FN I min_element(I begin, I end, const Pred& pred) 7440 { 7441 I result = begin; 7442 7443 for (I it = begin + 1; it != end; ++it) 7444 if (pred(*it, *result)) 7445 result = it; 7446 7447 return result; 7448 } 7449 reverse(I begin,I end)7450 template <typename I> PUGI__FN void reverse(I begin, I end) 7451 { 7452 while (end - begin > 1) 7453 swap(*begin++, *--end); 7454 } 7455 unique(I begin,I end)7456 template <typename I> PUGI__FN I unique(I begin, I end) 7457 { 7458 // fast skip head 7459 while (end - begin > 1 && *begin != *(begin + 1)) 7460 begin++; 7461 7462 if (begin == end) 7463 return begin; 7464 7465 // last written element 7466 I write = begin++; 7467 7468 // merge unique elements 7469 while (begin != end) 7470 { 7471 if (*begin != *write) 7472 *++write = *begin++; 7473 else 7474 begin++; 7475 } 7476 7477 // past-the-end (write points to live element) 7478 return write + 1; 7479 } 7480 insertion_sort(T * begin,T * end,const Pred & pred)7481 template <typename T, typename Pred> PUGI__FN void insertion_sort(T* begin, T* end, const Pred& pred) 7482 { 7483 if (begin == end) 7484 return; 7485 7486 for (T* it = begin + 1; it != end; ++it) 7487 { 7488 T val = *it; 7489 T* hole = it; 7490 7491 // move hole backwards 7492 while (hole > begin && pred(val, *(hole - 1))) 7493 { 7494 *hole = *(hole - 1); 7495 hole--; 7496 } 7497 7498 // fill hole with element 7499 *hole = val; 7500 } 7501 } 7502 median3(I first,I middle,I last,const Pred & pred)7503 template <typename I, typename Pred> inline I median3(I first, I middle, I last, const Pred& pred) 7504 { 7505 if (pred(*middle, *first)) 7506 swap(middle, first); 7507 if (pred(*last, *middle)) 7508 swap(last, middle); 7509 if (pred(*middle, *first)) 7510 swap(middle, first); 7511 7512 return middle; 7513 } 7514 partition3(T * begin,T * end,T pivot,const Pred & pred,T ** out_eqbeg,T ** out_eqend)7515 template <typename T, typename Pred> PUGI__FN void partition3(T* begin, T* end, T pivot, const Pred& pred, T** out_eqbeg, T** out_eqend) 7516 { 7517 // invariant: array is split into 4 groups: = < ? > (each variable denotes the boundary between the groups) 7518 T* eq = begin; 7519 T* lt = begin; 7520 T* gt = end; 7521 7522 while (lt < gt) 7523 { 7524 if (pred(*lt, pivot)) 7525 lt++; 7526 else if (*lt == pivot) 7527 swap(*eq++, *lt++); 7528 else 7529 swap(*lt, *--gt); 7530 } 7531 7532 // we now have just 4 groups: = < >; move equal elements to the middle 7533 T* eqbeg = gt; 7534 7535 for (T* it = begin; it != eq; ++it) 7536 swap(*it, *--eqbeg); 7537 7538 *out_eqbeg = eqbeg; 7539 *out_eqend = gt; 7540 } 7541 sort(I begin,I end,const Pred & pred)7542 template <typename I, typename Pred> PUGI__FN void sort(I begin, I end, const Pred& pred) 7543 { 7544 // sort large chunks 7545 while (end - begin > 16) 7546 { 7547 // find median element 7548 I middle = begin + (end - begin) / 2; 7549 I median = median3(begin, middle, end - 1, pred); 7550 7551 // partition in three chunks (< = >) 7552 I eqbeg, eqend; 7553 partition3(begin, end, *median, pred, &eqbeg, &eqend); 7554 7555 // loop on larger half 7556 if (eqbeg - begin > end - eqend) 7557 { 7558 sort(eqend, end, pred); 7559 end = eqbeg; 7560 } 7561 else 7562 { 7563 sort(begin, eqbeg, pred); 7564 begin = eqend; 7565 } 7566 } 7567 7568 // insertion sort small chunk 7569 insertion_sort(begin, end, pred); 7570 } 7571 hash_insert(const void ** table,size_t size,const void * key)7572 PUGI__FN bool hash_insert(const void** table, size_t size, const void* key) 7573 { 7574 assert(key); 7575 7576 unsigned int h = static_cast<unsigned int>(reinterpret_cast<uintptr_t>(key)); 7577 7578 // MurmurHash3 32-bit finalizer 7579 h ^= h >> 16; 7580 h *= 0x85ebca6bu; 7581 h ^= h >> 13; 7582 h *= 0xc2b2ae35u; 7583 h ^= h >> 16; 7584 7585 size_t hashmod = size - 1; 7586 size_t bucket = h & hashmod; 7587 7588 for (size_t probe = 0; probe <= hashmod; ++probe) 7589 { 7590 if (table[bucket] == 0) 7591 { 7592 table[bucket] = key; 7593 return true; 7594 } 7595 7596 if (table[bucket] == key) 7597 return false; 7598 7599 // hash collision, quadratic probing 7600 bucket = (bucket + probe + 1) & hashmod; 7601 } 7602 7603 assert(false && "Hash table is full"); // unreachable 7604 return false; 7605 } 7606 PUGI__NS_END 7607 7608 // Allocator used for AST and evaluation stacks 7609 PUGI__NS_BEGIN 7610 static const size_t xpath_memory_page_size = 7611 #ifdef PUGIXML_MEMORY_XPATH_PAGE_SIZE 7612 PUGIXML_MEMORY_XPATH_PAGE_SIZE 7613 #else 7614 4096 7615 #endif 7616 ; 7617 7618 static const uintptr_t xpath_memory_block_alignment = sizeof(double) > sizeof(void*) ? sizeof(double) : sizeof(void*); 7619 7620 struct xpath_memory_block 7621 { 7622 xpath_memory_block* next; 7623 size_t capacity; 7624 7625 union 7626 { 7627 char data[xpath_memory_page_size]; 7628 double alignment; 7629 }; 7630 }; 7631 7632 struct xpath_allocator 7633 { 7634 xpath_memory_block* _root; 7635 size_t _root_size; 7636 bool* _error; 7637 xpath_allocatorxpath_allocator7638 xpath_allocator(xpath_memory_block* root, bool* error = 0): _root(root), _root_size(0), _error(error) 7639 { 7640 } 7641 allocatexpath_allocator7642 void* allocate(size_t size) 7643 { 7644 // round size up to block alignment boundary 7645 size = (size + xpath_memory_block_alignment - 1) & ~(xpath_memory_block_alignment - 1); 7646 7647 if (_root_size + size <= _root->capacity) 7648 { 7649 void* buf = &_root->data[0] + _root_size; 7650 _root_size += size; 7651 return buf; 7652 } 7653 else 7654 { 7655 // make sure we have at least 1/4th of the page free after allocation to satisfy subsequent allocation requests 7656 size_t block_capacity_base = sizeof(_root->data); 7657 size_t block_capacity_req = size + block_capacity_base / 4; 7658 size_t block_capacity = (block_capacity_base > block_capacity_req) ? block_capacity_base : block_capacity_req; 7659 7660 size_t block_size = block_capacity + offsetof(xpath_memory_block, data); 7661 7662 xpath_memory_block* block = static_cast<xpath_memory_block*>(xml_memory::allocate(block_size)); 7663 if (!block) 7664 { 7665 if (_error) *_error = true; 7666 return 0; 7667 } 7668 7669 block->next = _root; 7670 block->capacity = block_capacity; 7671 7672 _root = block; 7673 _root_size = size; 7674 7675 return block->data; 7676 } 7677 } 7678 reallocatexpath_allocator7679 void* reallocate(void* ptr, size_t old_size, size_t new_size) 7680 { 7681 // round size up to block alignment boundary 7682 old_size = (old_size + xpath_memory_block_alignment - 1) & ~(xpath_memory_block_alignment - 1); 7683 new_size = (new_size + xpath_memory_block_alignment - 1) & ~(xpath_memory_block_alignment - 1); 7684 7685 // we can only reallocate the last object 7686 assert(ptr == 0 || static_cast<char*>(ptr) + old_size == &_root->data[0] + _root_size); 7687 7688 // try to reallocate the object inplace 7689 if (ptr && _root_size - old_size + new_size <= _root->capacity) 7690 { 7691 _root_size = _root_size - old_size + new_size; 7692 return ptr; 7693 } 7694 7695 // allocate a new block 7696 void* result = allocate(new_size); 7697 if (!result) return 0; 7698 7699 // we have a new block 7700 if (ptr) 7701 { 7702 // copy old data (we only support growing) 7703 assert(new_size >= old_size); 7704 memcpy(result, ptr, old_size); 7705 7706 // free the previous page if it had no other objects 7707 assert(_root->data == result); 7708 assert(_root->next); 7709 7710 if (_root->next->data == ptr) 7711 { 7712 // deallocate the whole page, unless it was the first one 7713 xpath_memory_block* next = _root->next->next; 7714 7715 if (next) 7716 { 7717 xml_memory::deallocate(_root->next); 7718 _root->next = next; 7719 } 7720 } 7721 } 7722 7723 return result; 7724 } 7725 revertxpath_allocator7726 void revert(const xpath_allocator& state) 7727 { 7728 // free all new pages 7729 xpath_memory_block* cur = _root; 7730 7731 while (cur != state._root) 7732 { 7733 xpath_memory_block* next = cur->next; 7734 7735 xml_memory::deallocate(cur); 7736 7737 cur = next; 7738 } 7739 7740 // restore state 7741 _root = state._root; 7742 _root_size = state._root_size; 7743 } 7744 releasexpath_allocator7745 void release() 7746 { 7747 xpath_memory_block* cur = _root; 7748 assert(cur); 7749 7750 while (cur->next) 7751 { 7752 xpath_memory_block* next = cur->next; 7753 7754 xml_memory::deallocate(cur); 7755 7756 cur = next; 7757 } 7758 } 7759 }; 7760 7761 struct xpath_allocator_capture 7762 { xpath_allocator_capturexpath_allocator_capture7763 xpath_allocator_capture(xpath_allocator* alloc): _target(alloc), _state(*alloc) 7764 { 7765 } 7766 ~xpath_allocator_capturexpath_allocator_capture7767 ~xpath_allocator_capture() 7768 { 7769 _target->revert(_state); 7770 } 7771 7772 xpath_allocator* _target; 7773 xpath_allocator _state; 7774 }; 7775 7776 struct xpath_stack 7777 { 7778 xpath_allocator* result; 7779 xpath_allocator* temp; 7780 }; 7781 7782 struct xpath_stack_data 7783 { 7784 xpath_memory_block blocks[2]; 7785 xpath_allocator result; 7786 xpath_allocator temp; 7787 xpath_stack stack; 7788 bool oom; 7789 xpath_stack_dataxpath_stack_data7790 xpath_stack_data(): result(blocks + 0, &oom), temp(blocks + 1, &oom), oom(false) 7791 { 7792 blocks[0].next = blocks[1].next = 0; 7793 blocks[0].capacity = blocks[1].capacity = sizeof(blocks[0].data); 7794 7795 stack.result = &result; 7796 stack.temp = &temp; 7797 } 7798 ~xpath_stack_dataxpath_stack_data7799 ~xpath_stack_data() 7800 { 7801 result.release(); 7802 temp.release(); 7803 } 7804 }; 7805 PUGI__NS_END 7806 7807 // String class 7808 PUGI__NS_BEGIN 7809 class xpath_string 7810 { 7811 const char_t* _buffer; 7812 bool _uses_heap; 7813 size_t _length_heap; 7814 duplicate_string(const char_t * string,size_t length,xpath_allocator * alloc)7815 static char_t* duplicate_string(const char_t* string, size_t length, xpath_allocator* alloc) 7816 { 7817 char_t* result = static_cast<char_t*>(alloc->allocate((length + 1) * sizeof(char_t))); 7818 if (!result) return 0; 7819 7820 memcpy(result, string, length * sizeof(char_t)); 7821 result[length] = 0; 7822 7823 return result; 7824 } 7825 xpath_string(const char_t * buffer,bool uses_heap_,size_t length_heap)7826 xpath_string(const char_t* buffer, bool uses_heap_, size_t length_heap): _buffer(buffer), _uses_heap(uses_heap_), _length_heap(length_heap) 7827 { 7828 } 7829 7830 public: from_const(const char_t * str)7831 static xpath_string from_const(const char_t* str) 7832 { 7833 return xpath_string(str, false, 0); 7834 } 7835 from_heap_preallocated(const char_t * begin,const char_t * end)7836 static xpath_string from_heap_preallocated(const char_t* begin, const char_t* end) 7837 { 7838 assert(begin <= end && *end == 0); 7839 7840 return xpath_string(begin, true, static_cast<size_t>(end - begin)); 7841 } 7842 from_heap(const char_t * begin,const char_t * end,xpath_allocator * alloc)7843 static xpath_string from_heap(const char_t* begin, const char_t* end, xpath_allocator* alloc) 7844 { 7845 assert(begin <= end); 7846 7847 if (begin == end) 7848 return xpath_string(); 7849 7850 size_t length = static_cast<size_t>(end - begin); 7851 const char_t* data = duplicate_string(begin, length, alloc); 7852 7853 return data ? xpath_string(data, true, length) : xpath_string(); 7854 } 7855 xpath_string()7856 xpath_string(): _buffer(PUGIXML_TEXT("")), _uses_heap(false), _length_heap(0) 7857 { 7858 } 7859 append(const xpath_string & o,xpath_allocator * alloc)7860 void append(const xpath_string& o, xpath_allocator* alloc) 7861 { 7862 // skip empty sources 7863 if (!*o._buffer) return; 7864 7865 // fast append for constant empty target and constant source 7866 if (!*_buffer && !_uses_heap && !o._uses_heap) 7867 { 7868 _buffer = o._buffer; 7869 } 7870 else 7871 { 7872 // need to make heap copy 7873 size_t target_length = length(); 7874 size_t source_length = o.length(); 7875 size_t result_length = target_length + source_length; 7876 7877 // allocate new buffer 7878 char_t* result = static_cast<char_t*>(alloc->reallocate(_uses_heap ? const_cast<char_t*>(_buffer) : 0, (target_length + 1) * sizeof(char_t), (result_length + 1) * sizeof(char_t))); 7879 if (!result) return; 7880 7881 // append first string to the new buffer in case there was no reallocation 7882 if (!_uses_heap) memcpy(result, _buffer, target_length * sizeof(char_t)); 7883 7884 // append second string to the new buffer 7885 memcpy(result + target_length, o._buffer, source_length * sizeof(char_t)); 7886 result[result_length] = 0; 7887 7888 // finalize 7889 _buffer = result; 7890 _uses_heap = true; 7891 _length_heap = result_length; 7892 } 7893 } 7894 c_str() const7895 const char_t* c_str() const 7896 { 7897 return _buffer; 7898 } 7899 length() const7900 size_t length() const 7901 { 7902 return _uses_heap ? _length_heap : strlength(_buffer); 7903 } 7904 data(xpath_allocator * alloc)7905 char_t* data(xpath_allocator* alloc) 7906 { 7907 // make private heap copy 7908 if (!_uses_heap) 7909 { 7910 size_t length_ = strlength(_buffer); 7911 const char_t* data_ = duplicate_string(_buffer, length_, alloc); 7912 7913 if (!data_) return 0; 7914 7915 _buffer = data_; 7916 _uses_heap = true; 7917 _length_heap = length_; 7918 } 7919 7920 return const_cast<char_t*>(_buffer); 7921 } 7922 empty() const7923 bool empty() const 7924 { 7925 return *_buffer == 0; 7926 } 7927 operator ==(const xpath_string & o) const7928 bool operator==(const xpath_string& o) const 7929 { 7930 return strequal(_buffer, o._buffer); 7931 } 7932 operator !=(const xpath_string & o) const7933 bool operator!=(const xpath_string& o) const 7934 { 7935 return !strequal(_buffer, o._buffer); 7936 } 7937 uses_heap() const7938 bool uses_heap() const 7939 { 7940 return _uses_heap; 7941 } 7942 }; 7943 PUGI__NS_END 7944 7945 PUGI__NS_BEGIN starts_with(const char_t * string,const char_t * pattern)7946 PUGI__FN bool starts_with(const char_t* string, const char_t* pattern) 7947 { 7948 while (*pattern && *string == *pattern) 7949 { 7950 string++; 7951 pattern++; 7952 } 7953 7954 return *pattern == 0; 7955 } 7956 find_char(const char_t * s,char_t c)7957 PUGI__FN const char_t* find_char(const char_t* s, char_t c) 7958 { 7959 #ifdef PUGIXML_WCHAR_MODE 7960 return wcschr(s, c); 7961 #else 7962 return strchr(s, c); 7963 #endif 7964 } 7965 find_substring(const char_t * s,const char_t * p)7966 PUGI__FN const char_t* find_substring(const char_t* s, const char_t* p) 7967 { 7968 #ifdef PUGIXML_WCHAR_MODE 7969 // MSVC6 wcsstr bug workaround (if s is empty it always returns 0) 7970 return (*p == 0) ? s : wcsstr(s, p); 7971 #else 7972 return strstr(s, p); 7973 #endif 7974 } 7975 7976 // Converts symbol to lower case, if it is an ASCII one tolower_ascii(char_t ch)7977 PUGI__FN char_t tolower_ascii(char_t ch) 7978 { 7979 return static_cast<unsigned int>(ch - 'A') < 26 ? static_cast<char_t>(ch | ' ') : ch; 7980 } 7981 string_value(const xpath_node & na,xpath_allocator * alloc)7982 PUGI__FN xpath_string string_value(const xpath_node& na, xpath_allocator* alloc) 7983 { 7984 if (na.attribute()) 7985 return xpath_string::from_const(na.attribute().value()); 7986 else 7987 { 7988 xml_node n = na.node(); 7989 7990 switch (n.type()) 7991 { 7992 case node_pcdata: 7993 case node_cdata: 7994 case node_comment: 7995 case node_pi: 7996 return xpath_string::from_const(n.value()); 7997 7998 case node_document: 7999 case node_element: 8000 { 8001 xpath_string result; 8002 8003 // element nodes can have value if parse_embed_pcdata was used 8004 if (n.value()[0]) 8005 result.append(xpath_string::from_const(n.value()), alloc); 8006 8007 xml_node cur = n.first_child(); 8008 8009 while (cur && cur != n) 8010 { 8011 if (cur.type() == node_pcdata || cur.type() == node_cdata) 8012 result.append(xpath_string::from_const(cur.value()), alloc); 8013 8014 if (cur.first_child()) 8015 cur = cur.first_child(); 8016 else if (cur.next_sibling()) 8017 cur = cur.next_sibling(); 8018 else 8019 { 8020 while (!cur.next_sibling() && cur != n) 8021 cur = cur.parent(); 8022 8023 if (cur != n) cur = cur.next_sibling(); 8024 } 8025 } 8026 8027 return result; 8028 } 8029 8030 default: 8031 return xpath_string(); 8032 } 8033 } 8034 } 8035 node_is_before_sibling(xml_node_struct * ln,xml_node_struct * rn)8036 PUGI__FN bool node_is_before_sibling(xml_node_struct* ln, xml_node_struct* rn) 8037 { 8038 assert(ln->parent == rn->parent); 8039 8040 // there is no common ancestor (the shared parent is null), nodes are from different documents 8041 if (!ln->parent) return ln < rn; 8042 8043 // determine sibling order 8044 xml_node_struct* ls = ln; 8045 xml_node_struct* rs = rn; 8046 8047 while (ls && rs) 8048 { 8049 if (ls == rn) return true; 8050 if (rs == ln) return false; 8051 8052 ls = ls->next_sibling; 8053 rs = rs->next_sibling; 8054 } 8055 8056 // if rn sibling chain ended ln must be before rn 8057 return !rs; 8058 } 8059 node_is_before(xml_node_struct * ln,xml_node_struct * rn)8060 PUGI__FN bool node_is_before(xml_node_struct* ln, xml_node_struct* rn) 8061 { 8062 // find common ancestor at the same depth, if any 8063 xml_node_struct* lp = ln; 8064 xml_node_struct* rp = rn; 8065 8066 while (lp && rp && lp->parent != rp->parent) 8067 { 8068 lp = lp->parent; 8069 rp = rp->parent; 8070 } 8071 8072 // parents are the same! 8073 if (lp && rp) return node_is_before_sibling(lp, rp); 8074 8075 // nodes are at different depths, need to normalize heights 8076 bool left_higher = !lp; 8077 8078 while (lp) 8079 { 8080 lp = lp->parent; 8081 ln = ln->parent; 8082 } 8083 8084 while (rp) 8085 { 8086 rp = rp->parent; 8087 rn = rn->parent; 8088 } 8089 8090 // one node is the ancestor of the other 8091 if (ln == rn) return left_higher; 8092 8093 // find common ancestor... again 8094 while (ln->parent != rn->parent) 8095 { 8096 ln = ln->parent; 8097 rn = rn->parent; 8098 } 8099 8100 return node_is_before_sibling(ln, rn); 8101 } 8102 node_is_ancestor(xml_node_struct * parent,xml_node_struct * node)8103 PUGI__FN bool node_is_ancestor(xml_node_struct* parent, xml_node_struct* node) 8104 { 8105 while (node && node != parent) node = node->parent; 8106 8107 return parent && node == parent; 8108 } 8109 document_buffer_order(const xpath_node & xnode)8110 PUGI__FN const void* document_buffer_order(const xpath_node& xnode) 8111 { 8112 xml_node_struct* node = xnode.node().internal_object(); 8113 8114 if (node) 8115 { 8116 if ((get_document(node).header & xml_memory_page_contents_shared_mask) == 0) 8117 { 8118 if (node->name && (node->header & impl::xml_memory_page_name_allocated_or_shared_mask) == 0) return node->name; 8119 if (node->value && (node->header & impl::xml_memory_page_value_allocated_or_shared_mask) == 0) return node->value; 8120 } 8121 8122 return 0; 8123 } 8124 8125 xml_attribute_struct* attr = xnode.attribute().internal_object(); 8126 8127 if (attr) 8128 { 8129 if ((get_document(attr).header & xml_memory_page_contents_shared_mask) == 0) 8130 { 8131 if ((attr->header & impl::xml_memory_page_name_allocated_or_shared_mask) == 0) return attr->name; 8132 if ((attr->header & impl::xml_memory_page_value_allocated_or_shared_mask) == 0) return attr->value; 8133 } 8134 8135 return 0; 8136 } 8137 8138 return 0; 8139 } 8140 8141 struct document_order_comparator 8142 { operator ()document_order_comparator8143 bool operator()(const xpath_node& lhs, const xpath_node& rhs) const 8144 { 8145 // optimized document order based check 8146 const void* lo = document_buffer_order(lhs); 8147 const void* ro = document_buffer_order(rhs); 8148 8149 if (lo && ro) return lo < ro; 8150 8151 // slow comparison 8152 xml_node ln = lhs.node(), rn = rhs.node(); 8153 8154 // compare attributes 8155 if (lhs.attribute() && rhs.attribute()) 8156 { 8157 // shared parent 8158 if (lhs.parent() == rhs.parent()) 8159 { 8160 // determine sibling order 8161 for (xml_attribute a = lhs.attribute(); a; a = a.next_attribute()) 8162 if (a == rhs.attribute()) 8163 return true; 8164 8165 return false; 8166 } 8167 8168 // compare attribute parents 8169 ln = lhs.parent(); 8170 rn = rhs.parent(); 8171 } 8172 else if (lhs.attribute()) 8173 { 8174 // attributes go after the parent element 8175 if (lhs.parent() == rhs.node()) return false; 8176 8177 ln = lhs.parent(); 8178 } 8179 else if (rhs.attribute()) 8180 { 8181 // attributes go after the parent element 8182 if (rhs.parent() == lhs.node()) return true; 8183 8184 rn = rhs.parent(); 8185 } 8186 8187 if (ln == rn) return false; 8188 8189 if (!ln || !rn) return ln < rn; 8190 8191 return node_is_before(ln.internal_object(), rn.internal_object()); 8192 } 8193 }; 8194 gen_nan()8195 PUGI__FN double gen_nan() 8196 { 8197 #if defined(__STDC_IEC_559__) || ((FLT_RADIX - 0 == 2) && (FLT_MAX_EXP - 0 == 128) && (FLT_MANT_DIG - 0 == 24)) 8198 PUGI__STATIC_ASSERT(sizeof(float) == sizeof(uint32_t)); 8199 typedef uint32_t UI; // BCC5 workaround 8200 union { float f; UI i; } u; 8201 u.i = 0x7fc00000; 8202 return double(u.f); 8203 #else 8204 // fallback 8205 const volatile double zero = 0.0; 8206 return zero / zero; 8207 #endif 8208 } 8209 is_nan(double value)8210 PUGI__FN bool is_nan(double value) 8211 { 8212 #if defined(PUGI__MSVC_CRT_VERSION) || defined(__BORLANDC__) 8213 return !!_isnan(value); 8214 #elif defined(fpclassify) && defined(FP_NAN) 8215 return fpclassify(value) == FP_NAN; 8216 #else 8217 // fallback 8218 const volatile double v = value; 8219 return v != v; 8220 #endif 8221 } 8222 convert_number_to_string_special(double value)8223 PUGI__FN const char_t* convert_number_to_string_special(double value) 8224 { 8225 #if defined(PUGI__MSVC_CRT_VERSION) || defined(__BORLANDC__) 8226 if (_finite(value)) return (value == 0) ? PUGIXML_TEXT("0") : 0; 8227 if (_isnan(value)) return PUGIXML_TEXT("NaN"); 8228 return value > 0 ? PUGIXML_TEXT("Infinity") : PUGIXML_TEXT("-Infinity"); 8229 #elif defined(fpclassify) && defined(FP_NAN) && defined(FP_INFINITE) && defined(FP_ZERO) 8230 switch (fpclassify(value)) 8231 { 8232 case FP_NAN: 8233 return PUGIXML_TEXT("NaN"); 8234 8235 case FP_INFINITE: 8236 return value > 0 ? PUGIXML_TEXT("Infinity") : PUGIXML_TEXT("-Infinity"); 8237 8238 case FP_ZERO: 8239 return PUGIXML_TEXT("0"); 8240 8241 default: 8242 return 0; 8243 } 8244 #else 8245 // fallback 8246 const volatile double v = value; 8247 8248 if (v == 0) return PUGIXML_TEXT("0"); 8249 if (v != v) return PUGIXML_TEXT("NaN"); 8250 if (v * 2 == v) return value > 0 ? PUGIXML_TEXT("Infinity") : PUGIXML_TEXT("-Infinity"); 8251 return 0; 8252 #endif 8253 } 8254 convert_number_to_boolean(double value)8255 PUGI__FN bool convert_number_to_boolean(double value) 8256 { 8257 return (value != 0 && !is_nan(value)); 8258 } 8259 truncate_zeros(char * begin,char * end)8260 PUGI__FN void truncate_zeros(char* begin, char* end) 8261 { 8262 while (begin != end && end[-1] == '0') end--; 8263 8264 *end = 0; 8265 } 8266 8267 // gets mantissa digits in the form of 0.xxxxx with 0. implied and the exponent 8268 #if defined(PUGI__MSVC_CRT_VERSION) && PUGI__MSVC_CRT_VERSION >= 1400 && !defined(_WIN32_WCE) convert_number_to_mantissa_exponent(double value,char (& buffer)[32],char ** out_mantissa,int * out_exponent)8269 PUGI__FN void convert_number_to_mantissa_exponent(double value, char (&buffer)[32], char** out_mantissa, int* out_exponent) 8270 { 8271 // get base values 8272 int sign, exponent; 8273 _ecvt_s(buffer, sizeof(buffer), value, DBL_DIG + 1, &exponent, &sign); 8274 8275 // truncate redundant zeros 8276 truncate_zeros(buffer, buffer + strlen(buffer)); 8277 8278 // fill results 8279 *out_mantissa = buffer; 8280 *out_exponent = exponent; 8281 } 8282 #else convert_number_to_mantissa_exponent(double value,char (& buffer)[32],char ** out_mantissa,int * out_exponent)8283 PUGI__FN void convert_number_to_mantissa_exponent(double value, char (&buffer)[32], char** out_mantissa, int* out_exponent) 8284 { 8285 // get a scientific notation value with IEEE DBL_DIG decimals 8286 PUGI__SNPRINTF(buffer, "%.*e", DBL_DIG, value); 8287 8288 // get the exponent (possibly negative) 8289 char* exponent_string = strchr(buffer, 'e'); 8290 assert(exponent_string); 8291 8292 int exponent = atoi(exponent_string + 1); 8293 8294 // extract mantissa string: skip sign 8295 char* mantissa = buffer[0] == '-' ? buffer + 1 : buffer; 8296 assert(mantissa[0] != '0' && mantissa[1] == '.'); 8297 8298 // divide mantissa by 10 to eliminate integer part 8299 mantissa[1] = mantissa[0]; 8300 mantissa++; 8301 exponent++; 8302 8303 // remove extra mantissa digits and zero-terminate mantissa 8304 truncate_zeros(mantissa, exponent_string); 8305 8306 // fill results 8307 *out_mantissa = mantissa; 8308 *out_exponent = exponent; 8309 } 8310 #endif 8311 convert_number_to_string(double value,xpath_allocator * alloc)8312 PUGI__FN xpath_string convert_number_to_string(double value, xpath_allocator* alloc) 8313 { 8314 // try special number conversion 8315 const char_t* special = convert_number_to_string_special(value); 8316 if (special) return xpath_string::from_const(special); 8317 8318 // get mantissa + exponent form 8319 char mantissa_buffer[32]; 8320 8321 char* mantissa; 8322 int exponent; 8323 convert_number_to_mantissa_exponent(value, mantissa_buffer, &mantissa, &exponent); 8324 8325 // allocate a buffer of suitable length for the number 8326 size_t result_size = strlen(mantissa_buffer) + (exponent > 0 ? exponent : -exponent) + 4; 8327 char_t* result = static_cast<char_t*>(alloc->allocate(sizeof(char_t) * result_size)); 8328 if (!result) return xpath_string(); 8329 8330 // make the number! 8331 char_t* s = result; 8332 8333 // sign 8334 if (value < 0) *s++ = '-'; 8335 8336 // integer part 8337 if (exponent <= 0) 8338 { 8339 *s++ = '0'; 8340 } 8341 else 8342 { 8343 while (exponent > 0) 8344 { 8345 assert(*mantissa == 0 || static_cast<unsigned int>(*mantissa - '0') <= 9); 8346 *s++ = *mantissa ? *mantissa++ : '0'; 8347 exponent--; 8348 } 8349 } 8350 8351 // fractional part 8352 if (*mantissa) 8353 { 8354 // decimal point 8355 *s++ = '.'; 8356 8357 // extra zeroes from negative exponent 8358 while (exponent < 0) 8359 { 8360 *s++ = '0'; 8361 exponent++; 8362 } 8363 8364 // extra mantissa digits 8365 while (*mantissa) 8366 { 8367 assert(static_cast<unsigned int>(*mantissa - '0') <= 9); 8368 *s++ = *mantissa++; 8369 } 8370 } 8371 8372 // zero-terminate 8373 assert(s < result + result_size); 8374 *s = 0; 8375 8376 return xpath_string::from_heap_preallocated(result, s); 8377 } 8378 check_string_to_number_format(const char_t * string)8379 PUGI__FN bool check_string_to_number_format(const char_t* string) 8380 { 8381 // parse leading whitespace 8382 while (PUGI__IS_CHARTYPE(*string, ct_space)) ++string; 8383 8384 // parse sign 8385 if (*string == '-') ++string; 8386 8387 if (!*string) return false; 8388 8389 // if there is no integer part, there should be a decimal part with at least one digit 8390 if (!PUGI__IS_CHARTYPEX(string[0], ctx_digit) && (string[0] != '.' || !PUGI__IS_CHARTYPEX(string[1], ctx_digit))) return false; 8391 8392 // parse integer part 8393 while (PUGI__IS_CHARTYPEX(*string, ctx_digit)) ++string; 8394 8395 // parse decimal part 8396 if (*string == '.') 8397 { 8398 ++string; 8399 8400 while (PUGI__IS_CHARTYPEX(*string, ctx_digit)) ++string; 8401 } 8402 8403 // parse trailing whitespace 8404 while (PUGI__IS_CHARTYPE(*string, ct_space)) ++string; 8405 8406 return *string == 0; 8407 } 8408 convert_string_to_number(const char_t * string)8409 PUGI__FN double convert_string_to_number(const char_t* string) 8410 { 8411 // check string format 8412 if (!check_string_to_number_format(string)) return gen_nan(); 8413 8414 // parse string 8415 #ifdef PUGIXML_WCHAR_MODE 8416 return wcstod(string, 0); 8417 #else 8418 return strtod(string, 0); 8419 #endif 8420 } 8421 convert_string_to_number_scratch(char_t (& buffer)[32],const char_t * begin,const char_t * end,double * out_result)8422 PUGI__FN bool convert_string_to_number_scratch(char_t (&buffer)[32], const char_t* begin, const char_t* end, double* out_result) 8423 { 8424 size_t length = static_cast<size_t>(end - begin); 8425 char_t* scratch = buffer; 8426 8427 if (length >= sizeof(buffer) / sizeof(buffer[0])) 8428 { 8429 // need to make dummy on-heap copy 8430 scratch = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t))); 8431 if (!scratch) return false; 8432 } 8433 8434 // copy string to zero-terminated buffer and perform conversion 8435 memcpy(scratch, begin, length * sizeof(char_t)); 8436 scratch[length] = 0; 8437 8438 *out_result = convert_string_to_number(scratch); 8439 8440 // free dummy buffer 8441 if (scratch != buffer) xml_memory::deallocate(scratch); 8442 8443 return true; 8444 } 8445 round_nearest(double value)8446 PUGI__FN double round_nearest(double value) 8447 { 8448 return floor(value + 0.5); 8449 } 8450 round_nearest_nzero(double value)8451 PUGI__FN double round_nearest_nzero(double value) 8452 { 8453 // same as round_nearest, but returns -0 for [-0.5, -0] 8454 // ceil is used to differentiate between +0 and -0 (we return -0 for [-0.5, -0] and +0 for +0) 8455 return (value >= -0.5 && value <= 0) ? ceil(value) : floor(value + 0.5); 8456 } 8457 qualified_name(const xpath_node & node)8458 PUGI__FN const char_t* qualified_name(const xpath_node& node) 8459 { 8460 return node.attribute() ? node.attribute().name() : node.node().name(); 8461 } 8462 local_name(const xpath_node & node)8463 PUGI__FN const char_t* local_name(const xpath_node& node) 8464 { 8465 const char_t* name = qualified_name(node); 8466 const char_t* p = find_char(name, ':'); 8467 8468 return p ? p + 1 : name; 8469 } 8470 8471 struct namespace_uri_predicate 8472 { 8473 const char_t* prefix; 8474 size_t prefix_length; 8475 namespace_uri_predicatenamespace_uri_predicate8476 namespace_uri_predicate(const char_t* name) 8477 { 8478 const char_t* pos = find_char(name, ':'); 8479 8480 prefix = pos ? name : 0; 8481 prefix_length = pos ? static_cast<size_t>(pos - name) : 0; 8482 } 8483 operator ()namespace_uri_predicate8484 bool operator()(xml_attribute a) const 8485 { 8486 const char_t* name = a.name(); 8487 8488 if (!starts_with(name, PUGIXML_TEXT("xmlns"))) return false; 8489 8490 return prefix ? name[5] == ':' && strequalrange(name + 6, prefix, prefix_length) : name[5] == 0; 8491 } 8492 }; 8493 namespace_uri(xml_node node)8494 PUGI__FN const char_t* namespace_uri(xml_node node) 8495 { 8496 namespace_uri_predicate pred = node.name(); 8497 8498 xml_node p = node; 8499 8500 while (p) 8501 { 8502 xml_attribute a = p.find_attribute(pred); 8503 8504 if (a) return a.value(); 8505 8506 p = p.parent(); 8507 } 8508 8509 return PUGIXML_TEXT(""); 8510 } 8511 namespace_uri(xml_attribute attr,xml_node parent)8512 PUGI__FN const char_t* namespace_uri(xml_attribute attr, xml_node parent) 8513 { 8514 namespace_uri_predicate pred = attr.name(); 8515 8516 // Default namespace does not apply to attributes 8517 if (!pred.prefix) return PUGIXML_TEXT(""); 8518 8519 xml_node p = parent; 8520 8521 while (p) 8522 { 8523 xml_attribute a = p.find_attribute(pred); 8524 8525 if (a) return a.value(); 8526 8527 p = p.parent(); 8528 } 8529 8530 return PUGIXML_TEXT(""); 8531 } 8532 namespace_uri(const xpath_node & node)8533 PUGI__FN const char_t* namespace_uri(const xpath_node& node) 8534 { 8535 return node.attribute() ? namespace_uri(node.attribute(), node.parent()) : namespace_uri(node.node()); 8536 } 8537 normalize_space(char_t * buffer)8538 PUGI__FN char_t* normalize_space(char_t* buffer) 8539 { 8540 char_t* write = buffer; 8541 8542 for (char_t* it = buffer; *it; ) 8543 { 8544 char_t ch = *it++; 8545 8546 if (PUGI__IS_CHARTYPE(ch, ct_space)) 8547 { 8548 // replace whitespace sequence with single space 8549 while (PUGI__IS_CHARTYPE(*it, ct_space)) it++; 8550 8551 // avoid leading spaces 8552 if (write != buffer) *write++ = ' '; 8553 } 8554 else *write++ = ch; 8555 } 8556 8557 // remove trailing space 8558 if (write != buffer && PUGI__IS_CHARTYPE(write[-1], ct_space)) write--; 8559 8560 // zero-terminate 8561 *write = 0; 8562 8563 return write; 8564 } 8565 translate(char_t * buffer,const char_t * from,const char_t * to,size_t to_length)8566 PUGI__FN char_t* translate(char_t* buffer, const char_t* from, const char_t* to, size_t to_length) 8567 { 8568 char_t* write = buffer; 8569 8570 while (*buffer) 8571 { 8572 PUGI__DMC_VOLATILE char_t ch = *buffer++; 8573 8574 const char_t* pos = find_char(from, ch); 8575 8576 if (!pos) 8577 *write++ = ch; // do not process 8578 else if (static_cast<size_t>(pos - from) < to_length) 8579 *write++ = to[pos - from]; // replace 8580 } 8581 8582 // zero-terminate 8583 *write = 0; 8584 8585 return write; 8586 } 8587 translate_table_generate(xpath_allocator * alloc,const char_t * from,const char_t * to)8588 PUGI__FN unsigned char* translate_table_generate(xpath_allocator* alloc, const char_t* from, const char_t* to) 8589 { 8590 unsigned char table[128] = {0}; 8591 8592 while (*from) 8593 { 8594 unsigned int fc = static_cast<unsigned int>(*from); 8595 unsigned int tc = static_cast<unsigned int>(*to); 8596 8597 if (fc >= 128 || tc >= 128) 8598 return 0; 8599 8600 // code=128 means "skip character" 8601 if (!table[fc]) 8602 table[fc] = static_cast<unsigned char>(tc ? tc : 128); 8603 8604 from++; 8605 if (tc) to++; 8606 } 8607 8608 for (int i = 0; i < 128; ++i) 8609 if (!table[i]) 8610 table[i] = static_cast<unsigned char>(i); 8611 8612 void* result = alloc->allocate(sizeof(table)); 8613 if (!result) return 0; 8614 8615 memcpy(result, table, sizeof(table)); 8616 8617 return static_cast<unsigned char*>(result); 8618 } 8619 translate_table(char_t * buffer,const unsigned char * table)8620 PUGI__FN char_t* translate_table(char_t* buffer, const unsigned char* table) 8621 { 8622 char_t* write = buffer; 8623 8624 while (*buffer) 8625 { 8626 char_t ch = *buffer++; 8627 unsigned int index = static_cast<unsigned int>(ch); 8628 8629 if (index < 128) 8630 { 8631 unsigned char code = table[index]; 8632 8633 // code=128 means "skip character" (table size is 128 so 128 can be a special value) 8634 // this code skips these characters without extra branches 8635 *write = static_cast<char_t>(code); 8636 write += 1 - (code >> 7); 8637 } 8638 else 8639 { 8640 *write++ = ch; 8641 } 8642 } 8643 8644 // zero-terminate 8645 *write = 0; 8646 8647 return write; 8648 } 8649 is_xpath_attribute(const char_t * name)8650 inline bool is_xpath_attribute(const char_t* name) 8651 { 8652 return !(starts_with(name, PUGIXML_TEXT("xmlns")) && (name[5] == 0 || name[5] == ':')); 8653 } 8654 8655 struct xpath_variable_boolean: xpath_variable 8656 { xpath_variable_booleanxpath_variable_boolean8657 xpath_variable_boolean(): xpath_variable(xpath_type_boolean), value(false) 8658 { 8659 } 8660 8661 bool value; 8662 char_t name[1]; 8663 }; 8664 8665 struct xpath_variable_number: xpath_variable 8666 { xpath_variable_numberxpath_variable_number8667 xpath_variable_number(): xpath_variable(xpath_type_number), value(0) 8668 { 8669 } 8670 8671 double value; 8672 char_t name[1]; 8673 }; 8674 8675 struct xpath_variable_string: xpath_variable 8676 { xpath_variable_stringxpath_variable_string8677 xpath_variable_string(): xpath_variable(xpath_type_string), value(0) 8678 { 8679 } 8680 ~xpath_variable_stringxpath_variable_string8681 ~xpath_variable_string() 8682 { 8683 if (value) xml_memory::deallocate(value); 8684 } 8685 8686 char_t* value; 8687 char_t name[1]; 8688 }; 8689 8690 struct xpath_variable_node_set: xpath_variable 8691 { xpath_variable_node_setxpath_variable_node_set8692 xpath_variable_node_set(): xpath_variable(xpath_type_node_set) 8693 { 8694 } 8695 8696 xpath_node_set value; 8697 char_t name[1]; 8698 }; 8699 8700 static const xpath_node_set dummy_node_set; 8701 hash_string(const char_t * str)8702 PUGI__FN PUGI__UNSIGNED_OVERFLOW unsigned int hash_string(const char_t* str) 8703 { 8704 // Jenkins one-at-a-time hash (http://en.wikipedia.org/wiki/Jenkins_hash_function#one-at-a-time) 8705 unsigned int result = 0; 8706 8707 while (*str) 8708 { 8709 result += static_cast<unsigned int>(*str++); 8710 result += result << 10; 8711 result ^= result >> 6; 8712 } 8713 8714 result += result << 3; 8715 result ^= result >> 11; 8716 result += result << 15; 8717 8718 return result; 8719 } 8720 new_xpath_variable(const char_t * name)8721 template <typename T> PUGI__FN T* new_xpath_variable(const char_t* name) 8722 { 8723 size_t length = strlength(name); 8724 if (length == 0) return 0; // empty variable names are invalid 8725 8726 // $$ we can't use offsetof(T, name) because T is non-POD, so we just allocate additional length characters 8727 void* memory = xml_memory::allocate(sizeof(T) + length * sizeof(char_t)); 8728 if (!memory) return 0; 8729 8730 T* result = new (memory) T(); 8731 8732 memcpy(result->name, name, (length + 1) * sizeof(char_t)); 8733 8734 return result; 8735 } 8736 new_xpath_variable(xpath_value_type type,const char_t * name)8737 PUGI__FN xpath_variable* new_xpath_variable(xpath_value_type type, const char_t* name) 8738 { 8739 switch (type) 8740 { 8741 case xpath_type_node_set: 8742 return new_xpath_variable<xpath_variable_node_set>(name); 8743 8744 case xpath_type_number: 8745 return new_xpath_variable<xpath_variable_number>(name); 8746 8747 case xpath_type_string: 8748 return new_xpath_variable<xpath_variable_string>(name); 8749 8750 case xpath_type_boolean: 8751 return new_xpath_variable<xpath_variable_boolean>(name); 8752 8753 default: 8754 return 0; 8755 } 8756 } 8757 delete_xpath_variable(T * var)8758 template <typename T> PUGI__FN void delete_xpath_variable(T* var) 8759 { 8760 var->~T(); 8761 xml_memory::deallocate(var); 8762 } 8763 delete_xpath_variable(xpath_value_type type,xpath_variable * var)8764 PUGI__FN void delete_xpath_variable(xpath_value_type type, xpath_variable* var) 8765 { 8766 switch (type) 8767 { 8768 case xpath_type_node_set: 8769 delete_xpath_variable(static_cast<xpath_variable_node_set*>(var)); 8770 break; 8771 8772 case xpath_type_number: 8773 delete_xpath_variable(static_cast<xpath_variable_number*>(var)); 8774 break; 8775 8776 case xpath_type_string: 8777 delete_xpath_variable(static_cast<xpath_variable_string*>(var)); 8778 break; 8779 8780 case xpath_type_boolean: 8781 delete_xpath_variable(static_cast<xpath_variable_boolean*>(var)); 8782 break; 8783 8784 default: 8785 assert(false && "Invalid variable type"); // unreachable 8786 } 8787 } 8788 copy_xpath_variable(xpath_variable * lhs,const xpath_variable * rhs)8789 PUGI__FN bool copy_xpath_variable(xpath_variable* lhs, const xpath_variable* rhs) 8790 { 8791 switch (rhs->type()) 8792 { 8793 case xpath_type_node_set: 8794 return lhs->set(static_cast<const xpath_variable_node_set*>(rhs)->value); 8795 8796 case xpath_type_number: 8797 return lhs->set(static_cast<const xpath_variable_number*>(rhs)->value); 8798 8799 case xpath_type_string: 8800 return lhs->set(static_cast<const xpath_variable_string*>(rhs)->value); 8801 8802 case xpath_type_boolean: 8803 return lhs->set(static_cast<const xpath_variable_boolean*>(rhs)->value); 8804 8805 default: 8806 assert(false && "Invalid variable type"); // unreachable 8807 return false; 8808 } 8809 } 8810 get_variable_scratch(char_t (& buffer)[32],xpath_variable_set * set,const char_t * begin,const char_t * end,xpath_variable ** out_result)8811 PUGI__FN bool get_variable_scratch(char_t (&buffer)[32], xpath_variable_set* set, const char_t* begin, const char_t* end, xpath_variable** out_result) 8812 { 8813 size_t length = static_cast<size_t>(end - begin); 8814 char_t* scratch = buffer; 8815 8816 if (length >= sizeof(buffer) / sizeof(buffer[0])) 8817 { 8818 // need to make dummy on-heap copy 8819 scratch = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t))); 8820 if (!scratch) return false; 8821 } 8822 8823 // copy string to zero-terminated buffer and perform lookup 8824 memcpy(scratch, begin, length * sizeof(char_t)); 8825 scratch[length] = 0; 8826 8827 *out_result = set->get(scratch); 8828 8829 // free dummy buffer 8830 if (scratch != buffer) xml_memory::deallocate(scratch); 8831 8832 return true; 8833 } 8834 PUGI__NS_END 8835 8836 // Internal node set class 8837 PUGI__NS_BEGIN xpath_get_order(const xpath_node * begin,const xpath_node * end)8838 PUGI__FN xpath_node_set::type_t xpath_get_order(const xpath_node* begin, const xpath_node* end) 8839 { 8840 if (end - begin < 2) 8841 return xpath_node_set::type_sorted; 8842 8843 document_order_comparator cmp; 8844 8845 bool first = cmp(begin[0], begin[1]); 8846 8847 for (const xpath_node* it = begin + 1; it + 1 < end; ++it) 8848 if (cmp(it[0], it[1]) != first) 8849 return xpath_node_set::type_unsorted; 8850 8851 return first ? xpath_node_set::type_sorted : xpath_node_set::type_sorted_reverse; 8852 } 8853 xpath_sort(xpath_node * begin,xpath_node * end,xpath_node_set::type_t type,bool rev)8854 PUGI__FN xpath_node_set::type_t xpath_sort(xpath_node* begin, xpath_node* end, xpath_node_set::type_t type, bool rev) 8855 { 8856 xpath_node_set::type_t order = rev ? xpath_node_set::type_sorted_reverse : xpath_node_set::type_sorted; 8857 8858 if (type == xpath_node_set::type_unsorted) 8859 { 8860 xpath_node_set::type_t sorted = xpath_get_order(begin, end); 8861 8862 if (sorted == xpath_node_set::type_unsorted) 8863 { 8864 sort(begin, end, document_order_comparator()); 8865 8866 type = xpath_node_set::type_sorted; 8867 } 8868 else 8869 type = sorted; 8870 } 8871 8872 if (type != order) reverse(begin, end); 8873 8874 return order; 8875 } 8876 xpath_first(const xpath_node * begin,const xpath_node * end,xpath_node_set::type_t type)8877 PUGI__FN xpath_node xpath_first(const xpath_node* begin, const xpath_node* end, xpath_node_set::type_t type) 8878 { 8879 if (begin == end) return xpath_node(); 8880 8881 switch (type) 8882 { 8883 case xpath_node_set::type_sorted: 8884 return *begin; 8885 8886 case xpath_node_set::type_sorted_reverse: 8887 return *(end - 1); 8888 8889 case xpath_node_set::type_unsorted: 8890 return *min_element(begin, end, document_order_comparator()); 8891 8892 default: 8893 assert(false && "Invalid node set type"); // unreachable 8894 return xpath_node(); 8895 } 8896 } 8897 8898 class xpath_node_set_raw 8899 { 8900 xpath_node_set::type_t _type; 8901 8902 xpath_node* _begin; 8903 xpath_node* _end; 8904 xpath_node* _eos; 8905 8906 public: xpath_node_set_raw()8907 xpath_node_set_raw(): _type(xpath_node_set::type_unsorted), _begin(0), _end(0), _eos(0) 8908 { 8909 } 8910 begin() const8911 xpath_node* begin() const 8912 { 8913 return _begin; 8914 } 8915 end() const8916 xpath_node* end() const 8917 { 8918 return _end; 8919 } 8920 empty() const8921 bool empty() const 8922 { 8923 return _begin == _end; 8924 } 8925 size() const8926 size_t size() const 8927 { 8928 return static_cast<size_t>(_end - _begin); 8929 } 8930 first() const8931 xpath_node first() const 8932 { 8933 return xpath_first(_begin, _end, _type); 8934 } 8935 8936 void push_back_grow(const xpath_node& node, xpath_allocator* alloc); 8937 push_back(const xpath_node & node,xpath_allocator * alloc)8938 void push_back(const xpath_node& node, xpath_allocator* alloc) 8939 { 8940 if (_end != _eos) 8941 *_end++ = node; 8942 else 8943 push_back_grow(node, alloc); 8944 } 8945 append(const xpath_node * begin_,const xpath_node * end_,xpath_allocator * alloc)8946 void append(const xpath_node* begin_, const xpath_node* end_, xpath_allocator* alloc) 8947 { 8948 if (begin_ == end_) return; 8949 8950 size_t size_ = static_cast<size_t>(_end - _begin); 8951 size_t capacity = static_cast<size_t>(_eos - _begin); 8952 size_t count = static_cast<size_t>(end_ - begin_); 8953 8954 if (size_ + count > capacity) 8955 { 8956 // reallocate the old array or allocate a new one 8957 xpath_node* data = static_cast<xpath_node*>(alloc->reallocate(_begin, capacity * sizeof(xpath_node), (size_ + count) * sizeof(xpath_node))); 8958 if (!data) return; 8959 8960 // finalize 8961 _begin = data; 8962 _end = data + size_; 8963 _eos = data + size_ + count; 8964 } 8965 8966 memcpy(_end, begin_, count * sizeof(xpath_node)); 8967 _end += count; 8968 } 8969 sort_do()8970 void sort_do() 8971 { 8972 _type = xpath_sort(_begin, _end, _type, false); 8973 } 8974 truncate(xpath_node * pos)8975 void truncate(xpath_node* pos) 8976 { 8977 assert(_begin <= pos && pos <= _end); 8978 8979 _end = pos; 8980 } 8981 remove_duplicates(xpath_allocator * alloc)8982 void remove_duplicates(xpath_allocator* alloc) 8983 { 8984 if (_type == xpath_node_set::type_unsorted && _end - _begin > 2) 8985 { 8986 xpath_allocator_capture cr(alloc); 8987 8988 size_t size_ = static_cast<size_t>(_end - _begin); 8989 8990 size_t hash_size = 1; 8991 while (hash_size < size_ + size_ / 2) hash_size *= 2; 8992 8993 const void** hash_data = static_cast<const void**>(alloc->allocate(hash_size * sizeof(void**))); 8994 if (!hash_data) return; 8995 8996 memset(hash_data, 0, hash_size * sizeof(const void**)); 8997 8998 xpath_node* write = _begin; 8999 9000 for (xpath_node* it = _begin; it != _end; ++it) 9001 { 9002 const void* attr = it->attribute().internal_object(); 9003 const void* node = it->node().internal_object(); 9004 const void* key = attr ? attr : node; 9005 9006 if (key && hash_insert(hash_data, hash_size, key)) 9007 { 9008 *write++ = *it; 9009 } 9010 } 9011 9012 _end = write; 9013 } 9014 else 9015 { 9016 _end = unique(_begin, _end); 9017 } 9018 } 9019 type() const9020 xpath_node_set::type_t type() const 9021 { 9022 return _type; 9023 } 9024 set_type(xpath_node_set::type_t value)9025 void set_type(xpath_node_set::type_t value) 9026 { 9027 _type = value; 9028 } 9029 }; 9030 push_back_grow(const xpath_node & node,xpath_allocator * alloc)9031 PUGI__FN_NO_INLINE void xpath_node_set_raw::push_back_grow(const xpath_node& node, xpath_allocator* alloc) 9032 { 9033 size_t capacity = static_cast<size_t>(_eos - _begin); 9034 9035 // get new capacity (1.5x rule) 9036 size_t new_capacity = capacity + capacity / 2 + 1; 9037 9038 // reallocate the old array or allocate a new one 9039 xpath_node* data = static_cast<xpath_node*>(alloc->reallocate(_begin, capacity * sizeof(xpath_node), new_capacity * sizeof(xpath_node))); 9040 if (!data) return; 9041 9042 // finalize 9043 _begin = data; 9044 _end = data + capacity; 9045 _eos = data + new_capacity; 9046 9047 // push 9048 *_end++ = node; 9049 } 9050 PUGI__NS_END 9051 9052 PUGI__NS_BEGIN 9053 struct xpath_context 9054 { 9055 xpath_node n; 9056 size_t position, size; 9057 xpath_contextxpath_context9058 xpath_context(const xpath_node& n_, size_t position_, size_t size_): n(n_), position(position_), size(size_) 9059 { 9060 } 9061 }; 9062 9063 enum lexeme_t 9064 { 9065 lex_none = 0, 9066 lex_equal, 9067 lex_not_equal, 9068 lex_less, 9069 lex_greater, 9070 lex_less_or_equal, 9071 lex_greater_or_equal, 9072 lex_plus, 9073 lex_minus, 9074 lex_multiply, 9075 lex_union, 9076 lex_var_ref, 9077 lex_open_brace, 9078 lex_close_brace, 9079 lex_quoted_string, 9080 lex_number, 9081 lex_slash, 9082 lex_double_slash, 9083 lex_open_square_brace, 9084 lex_close_square_brace, 9085 lex_string, 9086 lex_comma, 9087 lex_axis_attribute, 9088 lex_dot, 9089 lex_double_dot, 9090 lex_double_colon, 9091 lex_eof 9092 }; 9093 9094 struct xpath_lexer_string 9095 { 9096 const char_t* begin; 9097 const char_t* end; 9098 xpath_lexer_stringxpath_lexer_string9099 xpath_lexer_string(): begin(0), end(0) 9100 { 9101 } 9102 operator ==xpath_lexer_string9103 bool operator==(const char_t* other) const 9104 { 9105 size_t length = static_cast<size_t>(end - begin); 9106 9107 return strequalrange(other, begin, length); 9108 } 9109 }; 9110 9111 class xpath_lexer 9112 { 9113 const char_t* _cur; 9114 const char_t* _cur_lexeme_pos; 9115 xpath_lexer_string _cur_lexeme_contents; 9116 9117 lexeme_t _cur_lexeme; 9118 9119 public: xpath_lexer(const char_t * query)9120 explicit xpath_lexer(const char_t* query): _cur(query) 9121 { 9122 next(); 9123 } 9124 state() const9125 const char_t* state() const 9126 { 9127 return _cur; 9128 } 9129 next()9130 void next() 9131 { 9132 const char_t* cur = _cur; 9133 9134 while (PUGI__IS_CHARTYPE(*cur, ct_space)) ++cur; 9135 9136 // save lexeme position for error reporting 9137 _cur_lexeme_pos = cur; 9138 9139 switch (*cur) 9140 { 9141 case 0: 9142 _cur_lexeme = lex_eof; 9143 break; 9144 9145 case '>': 9146 if (*(cur+1) == '=') 9147 { 9148 cur += 2; 9149 _cur_lexeme = lex_greater_or_equal; 9150 } 9151 else 9152 { 9153 cur += 1; 9154 _cur_lexeme = lex_greater; 9155 } 9156 break; 9157 9158 case '<': 9159 if (*(cur+1) == '=') 9160 { 9161 cur += 2; 9162 _cur_lexeme = lex_less_or_equal; 9163 } 9164 else 9165 { 9166 cur += 1; 9167 _cur_lexeme = lex_less; 9168 } 9169 break; 9170 9171 case '!': 9172 if (*(cur+1) == '=') 9173 { 9174 cur += 2; 9175 _cur_lexeme = lex_not_equal; 9176 } 9177 else 9178 { 9179 _cur_lexeme = lex_none; 9180 } 9181 break; 9182 9183 case '=': 9184 cur += 1; 9185 _cur_lexeme = lex_equal; 9186 9187 break; 9188 9189 case '+': 9190 cur += 1; 9191 _cur_lexeme = lex_plus; 9192 9193 break; 9194 9195 case '-': 9196 cur += 1; 9197 _cur_lexeme = lex_minus; 9198 9199 break; 9200 9201 case '*': 9202 cur += 1; 9203 _cur_lexeme = lex_multiply; 9204 9205 break; 9206 9207 case '|': 9208 cur += 1; 9209 _cur_lexeme = lex_union; 9210 9211 break; 9212 9213 case '$': 9214 cur += 1; 9215 9216 if (PUGI__IS_CHARTYPEX(*cur, ctx_start_symbol)) 9217 { 9218 _cur_lexeme_contents.begin = cur; 9219 9220 while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++; 9221 9222 if (cur[0] == ':' && PUGI__IS_CHARTYPEX(cur[1], ctx_symbol)) // qname 9223 { 9224 cur++; // : 9225 9226 while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++; 9227 } 9228 9229 _cur_lexeme_contents.end = cur; 9230 9231 _cur_lexeme = lex_var_ref; 9232 } 9233 else 9234 { 9235 _cur_lexeme = lex_none; 9236 } 9237 9238 break; 9239 9240 case '(': 9241 cur += 1; 9242 _cur_lexeme = lex_open_brace; 9243 9244 break; 9245 9246 case ')': 9247 cur += 1; 9248 _cur_lexeme = lex_close_brace; 9249 9250 break; 9251 9252 case '[': 9253 cur += 1; 9254 _cur_lexeme = lex_open_square_brace; 9255 9256 break; 9257 9258 case ']': 9259 cur += 1; 9260 _cur_lexeme = lex_close_square_brace; 9261 9262 break; 9263 9264 case ',': 9265 cur += 1; 9266 _cur_lexeme = lex_comma; 9267 9268 break; 9269 9270 case '/': 9271 if (*(cur+1) == '/') 9272 { 9273 cur += 2; 9274 _cur_lexeme = lex_double_slash; 9275 } 9276 else 9277 { 9278 cur += 1; 9279 _cur_lexeme = lex_slash; 9280 } 9281 break; 9282 9283 case '.': 9284 if (*(cur+1) == '.') 9285 { 9286 cur += 2; 9287 _cur_lexeme = lex_double_dot; 9288 } 9289 else if (PUGI__IS_CHARTYPEX(*(cur+1), ctx_digit)) 9290 { 9291 _cur_lexeme_contents.begin = cur; // . 9292 9293 ++cur; 9294 9295 while (PUGI__IS_CHARTYPEX(*cur, ctx_digit)) cur++; 9296 9297 _cur_lexeme_contents.end = cur; 9298 9299 _cur_lexeme = lex_number; 9300 } 9301 else 9302 { 9303 cur += 1; 9304 _cur_lexeme = lex_dot; 9305 } 9306 break; 9307 9308 case '@': 9309 cur += 1; 9310 _cur_lexeme = lex_axis_attribute; 9311 9312 break; 9313 9314 case '"': 9315 case '\'': 9316 { 9317 char_t terminator = *cur; 9318 9319 ++cur; 9320 9321 _cur_lexeme_contents.begin = cur; 9322 while (*cur && *cur != terminator) cur++; 9323 _cur_lexeme_contents.end = cur; 9324 9325 if (!*cur) 9326 _cur_lexeme = lex_none; 9327 else 9328 { 9329 cur += 1; 9330 _cur_lexeme = lex_quoted_string; 9331 } 9332 9333 break; 9334 } 9335 9336 case ':': 9337 if (*(cur+1) == ':') 9338 { 9339 cur += 2; 9340 _cur_lexeme = lex_double_colon; 9341 } 9342 else 9343 { 9344 _cur_lexeme = lex_none; 9345 } 9346 break; 9347 9348 default: 9349 if (PUGI__IS_CHARTYPEX(*cur, ctx_digit)) 9350 { 9351 _cur_lexeme_contents.begin = cur; 9352 9353 while (PUGI__IS_CHARTYPEX(*cur, ctx_digit)) cur++; 9354 9355 if (*cur == '.') 9356 { 9357 cur++; 9358 9359 while (PUGI__IS_CHARTYPEX(*cur, ctx_digit)) cur++; 9360 } 9361 9362 _cur_lexeme_contents.end = cur; 9363 9364 _cur_lexeme = lex_number; 9365 } 9366 else if (PUGI__IS_CHARTYPEX(*cur, ctx_start_symbol)) 9367 { 9368 _cur_lexeme_contents.begin = cur; 9369 9370 while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++; 9371 9372 if (cur[0] == ':') 9373 { 9374 if (cur[1] == '*') // namespace test ncname:* 9375 { 9376 cur += 2; // :* 9377 } 9378 else if (PUGI__IS_CHARTYPEX(cur[1], ctx_symbol)) // namespace test qname 9379 { 9380 cur++; // : 9381 9382 while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++; 9383 } 9384 } 9385 9386 _cur_lexeme_contents.end = cur; 9387 9388 _cur_lexeme = lex_string; 9389 } 9390 else 9391 { 9392 _cur_lexeme = lex_none; 9393 } 9394 } 9395 9396 _cur = cur; 9397 } 9398 current() const9399 lexeme_t current() const 9400 { 9401 return _cur_lexeme; 9402 } 9403 current_pos() const9404 const char_t* current_pos() const 9405 { 9406 return _cur_lexeme_pos; 9407 } 9408 contents() const9409 const xpath_lexer_string& contents() const 9410 { 9411 assert(_cur_lexeme == lex_var_ref || _cur_lexeme == lex_number || _cur_lexeme == lex_string || _cur_lexeme == lex_quoted_string); 9412 9413 return _cur_lexeme_contents; 9414 } 9415 }; 9416 9417 enum ast_type_t 9418 { 9419 ast_unknown, 9420 ast_op_or, // left or right 9421 ast_op_and, // left and right 9422 ast_op_equal, // left = right 9423 ast_op_not_equal, // left != right 9424 ast_op_less, // left < right 9425 ast_op_greater, // left > right 9426 ast_op_less_or_equal, // left <= right 9427 ast_op_greater_or_equal, // left >= right 9428 ast_op_add, // left + right 9429 ast_op_subtract, // left - right 9430 ast_op_multiply, // left * right 9431 ast_op_divide, // left / right 9432 ast_op_mod, // left % right 9433 ast_op_negate, // left - right 9434 ast_op_union, // left | right 9435 ast_predicate, // apply predicate to set; next points to next predicate 9436 ast_filter, // select * from left where right 9437 ast_string_constant, // string constant 9438 ast_number_constant, // number constant 9439 ast_variable, // variable 9440 ast_func_last, // last() 9441 ast_func_position, // position() 9442 ast_func_count, // count(left) 9443 ast_func_id, // id(left) 9444 ast_func_local_name_0, // local-name() 9445 ast_func_local_name_1, // local-name(left) 9446 ast_func_namespace_uri_0, // namespace-uri() 9447 ast_func_namespace_uri_1, // namespace-uri(left) 9448 ast_func_name_0, // name() 9449 ast_func_name_1, // name(left) 9450 ast_func_string_0, // string() 9451 ast_func_string_1, // string(left) 9452 ast_func_concat, // concat(left, right, siblings) 9453 ast_func_starts_with, // starts_with(left, right) 9454 ast_func_contains, // contains(left, right) 9455 ast_func_substring_before, // substring-before(left, right) 9456 ast_func_substring_after, // substring-after(left, right) 9457 ast_func_substring_2, // substring(left, right) 9458 ast_func_substring_3, // substring(left, right, third) 9459 ast_func_string_length_0, // string-length() 9460 ast_func_string_length_1, // string-length(left) 9461 ast_func_normalize_space_0, // normalize-space() 9462 ast_func_normalize_space_1, // normalize-space(left) 9463 ast_func_translate, // translate(left, right, third) 9464 ast_func_boolean, // boolean(left) 9465 ast_func_not, // not(left) 9466 ast_func_true, // true() 9467 ast_func_false, // false() 9468 ast_func_lang, // lang(left) 9469 ast_func_number_0, // number() 9470 ast_func_number_1, // number(left) 9471 ast_func_sum, // sum(left) 9472 ast_func_floor, // floor(left) 9473 ast_func_ceiling, // ceiling(left) 9474 ast_func_round, // round(left) 9475 ast_step, // process set left with step 9476 ast_step_root, // select root node 9477 9478 ast_opt_translate_table, // translate(left, right, third) where right/third are constants 9479 ast_opt_compare_attribute // @name = 'string' 9480 }; 9481 9482 enum axis_t 9483 { 9484 axis_ancestor, 9485 axis_ancestor_or_self, 9486 axis_attribute, 9487 axis_child, 9488 axis_descendant, 9489 axis_descendant_or_self, 9490 axis_following, 9491 axis_following_sibling, 9492 axis_namespace, 9493 axis_parent, 9494 axis_preceding, 9495 axis_preceding_sibling, 9496 axis_self 9497 }; 9498 9499 enum nodetest_t 9500 { 9501 nodetest_none, 9502 nodetest_name, 9503 nodetest_type_node, 9504 nodetest_type_comment, 9505 nodetest_type_pi, 9506 nodetest_type_text, 9507 nodetest_pi, 9508 nodetest_all, 9509 nodetest_all_in_namespace 9510 }; 9511 9512 enum predicate_t 9513 { 9514 predicate_default, 9515 predicate_posinv, 9516 predicate_constant, 9517 predicate_constant_one 9518 }; 9519 9520 enum nodeset_eval_t 9521 { 9522 nodeset_eval_all, 9523 nodeset_eval_any, 9524 nodeset_eval_first 9525 }; 9526 9527 template <axis_t N> struct axis_to_type 9528 { 9529 static const axis_t axis; 9530 }; 9531 9532 template <axis_t N> const axis_t axis_to_type<N>::axis = N; 9533 9534 class xpath_ast_node 9535 { 9536 private: 9537 // node type 9538 char _type; 9539 char _rettype; 9540 9541 // for ast_step 9542 char _axis; 9543 9544 // for ast_step/ast_predicate/ast_filter 9545 char _test; 9546 9547 // tree node structure 9548 xpath_ast_node* _left; 9549 xpath_ast_node* _right; 9550 xpath_ast_node* _next; 9551 9552 union 9553 { 9554 // value for ast_string_constant 9555 const char_t* string; 9556 // value for ast_number_constant 9557 double number; 9558 // variable for ast_variable 9559 xpath_variable* variable; 9560 // node test for ast_step (node name/namespace/node type/pi target) 9561 const char_t* nodetest; 9562 // table for ast_opt_translate_table 9563 const unsigned char* table; 9564 } _data; 9565 9566 xpath_ast_node(const xpath_ast_node&); 9567 xpath_ast_node& operator=(const xpath_ast_node&); 9568 compare_eq(xpath_ast_node * lhs,xpath_ast_node * rhs,const xpath_context & c,const xpath_stack & stack,const Comp & comp)9569 template <class Comp> static bool compare_eq(xpath_ast_node* lhs, xpath_ast_node* rhs, const xpath_context& c, const xpath_stack& stack, const Comp& comp) 9570 { 9571 xpath_value_type lt = lhs->rettype(), rt = rhs->rettype(); 9572 9573 if (lt != xpath_type_node_set && rt != xpath_type_node_set) 9574 { 9575 if (lt == xpath_type_boolean || rt == xpath_type_boolean) 9576 return comp(lhs->eval_boolean(c, stack), rhs->eval_boolean(c, stack)); 9577 else if (lt == xpath_type_number || rt == xpath_type_number) 9578 return comp(lhs->eval_number(c, stack), rhs->eval_number(c, stack)); 9579 else if (lt == xpath_type_string || rt == xpath_type_string) 9580 { 9581 xpath_allocator_capture cr(stack.result); 9582 9583 xpath_string ls = lhs->eval_string(c, stack); 9584 xpath_string rs = rhs->eval_string(c, stack); 9585 9586 return comp(ls, rs); 9587 } 9588 } 9589 else if (lt == xpath_type_node_set && rt == xpath_type_node_set) 9590 { 9591 xpath_allocator_capture cr(stack.result); 9592 9593 xpath_node_set_raw ls = lhs->eval_node_set(c, stack, nodeset_eval_all); 9594 xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all); 9595 9596 for (const xpath_node* li = ls.begin(); li != ls.end(); ++li) 9597 for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri) 9598 { 9599 xpath_allocator_capture cri(stack.result); 9600 9601 if (comp(string_value(*li, stack.result), string_value(*ri, stack.result))) 9602 return true; 9603 } 9604 9605 return false; 9606 } 9607 else 9608 { 9609 if (lt == xpath_type_node_set) 9610 { 9611 swap(lhs, rhs); 9612 swap(lt, rt); 9613 } 9614 9615 if (lt == xpath_type_boolean) 9616 return comp(lhs->eval_boolean(c, stack), rhs->eval_boolean(c, stack)); 9617 else if (lt == xpath_type_number) 9618 { 9619 xpath_allocator_capture cr(stack.result); 9620 9621 double l = lhs->eval_number(c, stack); 9622 xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all); 9623 9624 for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri) 9625 { 9626 xpath_allocator_capture cri(stack.result); 9627 9628 if (comp(l, convert_string_to_number(string_value(*ri, stack.result).c_str()))) 9629 return true; 9630 } 9631 9632 return false; 9633 } 9634 else if (lt == xpath_type_string) 9635 { 9636 xpath_allocator_capture cr(stack.result); 9637 9638 xpath_string l = lhs->eval_string(c, stack); 9639 xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all); 9640 9641 for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri) 9642 { 9643 xpath_allocator_capture cri(stack.result); 9644 9645 if (comp(l, string_value(*ri, stack.result))) 9646 return true; 9647 } 9648 9649 return false; 9650 } 9651 } 9652 9653 assert(false && "Wrong types"); // unreachable 9654 return false; 9655 } 9656 eval_once(xpath_node_set::type_t type,nodeset_eval_t eval)9657 static bool eval_once(xpath_node_set::type_t type, nodeset_eval_t eval) 9658 { 9659 return type == xpath_node_set::type_sorted ? eval != nodeset_eval_all : eval == nodeset_eval_any; 9660 } 9661 compare_rel(xpath_ast_node * lhs,xpath_ast_node * rhs,const xpath_context & c,const xpath_stack & stack,const Comp & comp)9662 template <class Comp> static bool compare_rel(xpath_ast_node* lhs, xpath_ast_node* rhs, const xpath_context& c, const xpath_stack& stack, const Comp& comp) 9663 { 9664 xpath_value_type lt = lhs->rettype(), rt = rhs->rettype(); 9665 9666 if (lt != xpath_type_node_set && rt != xpath_type_node_set) 9667 return comp(lhs->eval_number(c, stack), rhs->eval_number(c, stack)); 9668 else if (lt == xpath_type_node_set && rt == xpath_type_node_set) 9669 { 9670 xpath_allocator_capture cr(stack.result); 9671 9672 xpath_node_set_raw ls = lhs->eval_node_set(c, stack, nodeset_eval_all); 9673 xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all); 9674 9675 for (const xpath_node* li = ls.begin(); li != ls.end(); ++li) 9676 { 9677 xpath_allocator_capture cri(stack.result); 9678 9679 double l = convert_string_to_number(string_value(*li, stack.result).c_str()); 9680 9681 for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri) 9682 { 9683 xpath_allocator_capture crii(stack.result); 9684 9685 if (comp(l, convert_string_to_number(string_value(*ri, stack.result).c_str()))) 9686 return true; 9687 } 9688 } 9689 9690 return false; 9691 } 9692 else if (lt != xpath_type_node_set && rt == xpath_type_node_set) 9693 { 9694 xpath_allocator_capture cr(stack.result); 9695 9696 double l = lhs->eval_number(c, stack); 9697 xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all); 9698 9699 for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri) 9700 { 9701 xpath_allocator_capture cri(stack.result); 9702 9703 if (comp(l, convert_string_to_number(string_value(*ri, stack.result).c_str()))) 9704 return true; 9705 } 9706 9707 return false; 9708 } 9709 else if (lt == xpath_type_node_set && rt != xpath_type_node_set) 9710 { 9711 xpath_allocator_capture cr(stack.result); 9712 9713 xpath_node_set_raw ls = lhs->eval_node_set(c, stack, nodeset_eval_all); 9714 double r = rhs->eval_number(c, stack); 9715 9716 for (const xpath_node* li = ls.begin(); li != ls.end(); ++li) 9717 { 9718 xpath_allocator_capture cri(stack.result); 9719 9720 if (comp(convert_string_to_number(string_value(*li, stack.result).c_str()), r)) 9721 return true; 9722 } 9723 9724 return false; 9725 } 9726 else 9727 { 9728 assert(false && "Wrong types"); // unreachable 9729 return false; 9730 } 9731 } 9732 apply_predicate_boolean(xpath_node_set_raw & ns,size_t first,xpath_ast_node * expr,const xpath_stack & stack,bool once)9733 static void apply_predicate_boolean(xpath_node_set_raw& ns, size_t first, xpath_ast_node* expr, const xpath_stack& stack, bool once) 9734 { 9735 assert(ns.size() >= first); 9736 assert(expr->rettype() != xpath_type_number); 9737 9738 size_t i = 1; 9739 size_t size = ns.size() - first; 9740 9741 xpath_node* last = ns.begin() + first; 9742 9743 // remove_if... or well, sort of 9744 for (xpath_node* it = last; it != ns.end(); ++it, ++i) 9745 { 9746 xpath_context c(*it, i, size); 9747 9748 if (expr->eval_boolean(c, stack)) 9749 { 9750 *last++ = *it; 9751 9752 if (once) break; 9753 } 9754 } 9755 9756 ns.truncate(last); 9757 } 9758 apply_predicate_number(xpath_node_set_raw & ns,size_t first,xpath_ast_node * expr,const xpath_stack & stack,bool once)9759 static void apply_predicate_number(xpath_node_set_raw& ns, size_t first, xpath_ast_node* expr, const xpath_stack& stack, bool once) 9760 { 9761 assert(ns.size() >= first); 9762 assert(expr->rettype() == xpath_type_number); 9763 9764 size_t i = 1; 9765 size_t size = ns.size() - first; 9766 9767 xpath_node* last = ns.begin() + first; 9768 9769 // remove_if... or well, sort of 9770 for (xpath_node* it = last; it != ns.end(); ++it, ++i) 9771 { 9772 xpath_context c(*it, i, size); 9773 9774 if (expr->eval_number(c, stack) == static_cast<double>(i)) 9775 { 9776 *last++ = *it; 9777 9778 if (once) break; 9779 } 9780 } 9781 9782 ns.truncate(last); 9783 } 9784 apply_predicate_number_const(xpath_node_set_raw & ns,size_t first,xpath_ast_node * expr,const xpath_stack & stack)9785 static void apply_predicate_number_const(xpath_node_set_raw& ns, size_t first, xpath_ast_node* expr, const xpath_stack& stack) 9786 { 9787 assert(ns.size() >= first); 9788 assert(expr->rettype() == xpath_type_number); 9789 9790 size_t size = ns.size() - first; 9791 9792 xpath_node* last = ns.begin() + first; 9793 9794 xpath_context c(xpath_node(), 1, size); 9795 9796 double er = expr->eval_number(c, stack); 9797 9798 if (er >= 1.0 && er <= static_cast<double>(size)) 9799 { 9800 size_t eri = static_cast<size_t>(er); 9801 9802 if (er == static_cast<double>(eri)) 9803 { 9804 xpath_node r = last[eri - 1]; 9805 9806 *last++ = r; 9807 } 9808 } 9809 9810 ns.truncate(last); 9811 } 9812 apply_predicate(xpath_node_set_raw & ns,size_t first,const xpath_stack & stack,bool once)9813 void apply_predicate(xpath_node_set_raw& ns, size_t first, const xpath_stack& stack, bool once) 9814 { 9815 if (ns.size() == first) return; 9816 9817 assert(_type == ast_filter || _type == ast_predicate); 9818 9819 if (_test == predicate_constant || _test == predicate_constant_one) 9820 apply_predicate_number_const(ns, first, _right, stack); 9821 else if (_right->rettype() == xpath_type_number) 9822 apply_predicate_number(ns, first, _right, stack, once); 9823 else 9824 apply_predicate_boolean(ns, first, _right, stack, once); 9825 } 9826 apply_predicates(xpath_node_set_raw & ns,size_t first,const xpath_stack & stack,nodeset_eval_t eval)9827 void apply_predicates(xpath_node_set_raw& ns, size_t first, const xpath_stack& stack, nodeset_eval_t eval) 9828 { 9829 if (ns.size() == first) return; 9830 9831 bool last_once = eval_once(ns.type(), eval); 9832 9833 for (xpath_ast_node* pred = _right; pred; pred = pred->_next) 9834 pred->apply_predicate(ns, first, stack, !pred->_next && last_once); 9835 } 9836 step_push(xpath_node_set_raw & ns,xml_attribute_struct * a,xml_node_struct * parent,xpath_allocator * alloc)9837 bool step_push(xpath_node_set_raw& ns, xml_attribute_struct* a, xml_node_struct* parent, xpath_allocator* alloc) 9838 { 9839 assert(a); 9840 9841 const char_t* name = a->name ? a->name + 0 : PUGIXML_TEXT(""); 9842 9843 switch (_test) 9844 { 9845 case nodetest_name: 9846 if (strequal(name, _data.nodetest) && is_xpath_attribute(name)) 9847 { 9848 ns.push_back(xpath_node(xml_attribute(a), xml_node(parent)), alloc); 9849 return true; 9850 } 9851 break; 9852 9853 case nodetest_type_node: 9854 case nodetest_all: 9855 if (is_xpath_attribute(name)) 9856 { 9857 ns.push_back(xpath_node(xml_attribute(a), xml_node(parent)), alloc); 9858 return true; 9859 } 9860 break; 9861 9862 case nodetest_all_in_namespace: 9863 if (starts_with(name, _data.nodetest) && is_xpath_attribute(name)) 9864 { 9865 ns.push_back(xpath_node(xml_attribute(a), xml_node(parent)), alloc); 9866 return true; 9867 } 9868 break; 9869 9870 default: 9871 ; 9872 } 9873 9874 return false; 9875 } 9876 step_push(xpath_node_set_raw & ns,xml_node_struct * n,xpath_allocator * alloc)9877 bool step_push(xpath_node_set_raw& ns, xml_node_struct* n, xpath_allocator* alloc) 9878 { 9879 assert(n); 9880 9881 xml_node_type type = PUGI__NODETYPE(n); 9882 9883 switch (_test) 9884 { 9885 case nodetest_name: 9886 if (type == node_element && n->name && strequal(n->name, _data.nodetest)) 9887 { 9888 ns.push_back(xml_node(n), alloc); 9889 return true; 9890 } 9891 break; 9892 9893 case nodetest_type_node: 9894 ns.push_back(xml_node(n), alloc); 9895 return true; 9896 9897 case nodetest_type_comment: 9898 if (type == node_comment) 9899 { 9900 ns.push_back(xml_node(n), alloc); 9901 return true; 9902 } 9903 break; 9904 9905 case nodetest_type_text: 9906 if (type == node_pcdata || type == node_cdata) 9907 { 9908 ns.push_back(xml_node(n), alloc); 9909 return true; 9910 } 9911 break; 9912 9913 case nodetest_type_pi: 9914 if (type == node_pi) 9915 { 9916 ns.push_back(xml_node(n), alloc); 9917 return true; 9918 } 9919 break; 9920 9921 case nodetest_pi: 9922 if (type == node_pi && n->name && strequal(n->name, _data.nodetest)) 9923 { 9924 ns.push_back(xml_node(n), alloc); 9925 return true; 9926 } 9927 break; 9928 9929 case nodetest_all: 9930 if (type == node_element) 9931 { 9932 ns.push_back(xml_node(n), alloc); 9933 return true; 9934 } 9935 break; 9936 9937 case nodetest_all_in_namespace: 9938 if (type == node_element && n->name && starts_with(n->name, _data.nodetest)) 9939 { 9940 ns.push_back(xml_node(n), alloc); 9941 return true; 9942 } 9943 break; 9944 9945 default: 9946 assert(false && "Unknown axis"); // unreachable 9947 } 9948 9949 return false; 9950 } 9951 step_fill(xpath_node_set_raw & ns,xml_node_struct * n,xpath_allocator * alloc,bool once,T)9952 template <class T> void step_fill(xpath_node_set_raw& ns, xml_node_struct* n, xpath_allocator* alloc, bool once, T) 9953 { 9954 const axis_t axis = T::axis; 9955 9956 switch (axis) 9957 { 9958 case axis_attribute: 9959 { 9960 for (xml_attribute_struct* a = n->first_attribute; a; a = a->next_attribute) 9961 if (step_push(ns, a, n, alloc) & once) 9962 return; 9963 9964 break; 9965 } 9966 9967 case axis_child: 9968 { 9969 for (xml_node_struct* c = n->first_child; c; c = c->next_sibling) 9970 if (step_push(ns, c, alloc) & once) 9971 return; 9972 9973 break; 9974 } 9975 9976 case axis_descendant: 9977 case axis_descendant_or_self: 9978 { 9979 if (axis == axis_descendant_or_self) 9980 if (step_push(ns, n, alloc) & once) 9981 return; 9982 9983 xml_node_struct* cur = n->first_child; 9984 9985 while (cur) 9986 { 9987 if (step_push(ns, cur, alloc) & once) 9988 return; 9989 9990 if (cur->first_child) 9991 cur = cur->first_child; 9992 else 9993 { 9994 while (!cur->next_sibling) 9995 { 9996 cur = cur->parent; 9997 9998 if (cur == n) return; 9999 } 10000 10001 cur = cur->next_sibling; 10002 } 10003 } 10004 10005 break; 10006 } 10007 10008 case axis_following_sibling: 10009 { 10010 for (xml_node_struct* c = n->next_sibling; c; c = c->next_sibling) 10011 if (step_push(ns, c, alloc) & once) 10012 return; 10013 10014 break; 10015 } 10016 10017 case axis_preceding_sibling: 10018 { 10019 for (xml_node_struct* c = n->prev_sibling_c; c->next_sibling; c = c->prev_sibling_c) 10020 if (step_push(ns, c, alloc) & once) 10021 return; 10022 10023 break; 10024 } 10025 10026 case axis_following: 10027 { 10028 xml_node_struct* cur = n; 10029 10030 // exit from this node so that we don't include descendants 10031 while (!cur->next_sibling) 10032 { 10033 cur = cur->parent; 10034 10035 if (!cur) return; 10036 } 10037 10038 cur = cur->next_sibling; 10039 10040 while (cur) 10041 { 10042 if (step_push(ns, cur, alloc) & once) 10043 return; 10044 10045 if (cur->first_child) 10046 cur = cur->first_child; 10047 else 10048 { 10049 while (!cur->next_sibling) 10050 { 10051 cur = cur->parent; 10052 10053 if (!cur) return; 10054 } 10055 10056 cur = cur->next_sibling; 10057 } 10058 } 10059 10060 break; 10061 } 10062 10063 case axis_preceding: 10064 { 10065 xml_node_struct* cur = n; 10066 10067 // exit from this node so that we don't include descendants 10068 while (!cur->prev_sibling_c->next_sibling) 10069 { 10070 cur = cur->parent; 10071 10072 if (!cur) return; 10073 } 10074 10075 cur = cur->prev_sibling_c; 10076 10077 while (cur) 10078 { 10079 if (cur->first_child) 10080 cur = cur->first_child->prev_sibling_c; 10081 else 10082 { 10083 // leaf node, can't be ancestor 10084 if (step_push(ns, cur, alloc) & once) 10085 return; 10086 10087 while (!cur->prev_sibling_c->next_sibling) 10088 { 10089 cur = cur->parent; 10090 10091 if (!cur) return; 10092 10093 if (!node_is_ancestor(cur, n)) 10094 if (step_push(ns, cur, alloc) & once) 10095 return; 10096 } 10097 10098 cur = cur->prev_sibling_c; 10099 } 10100 } 10101 10102 break; 10103 } 10104 10105 case axis_ancestor: 10106 case axis_ancestor_or_self: 10107 { 10108 if (axis == axis_ancestor_or_self) 10109 if (step_push(ns, n, alloc) & once) 10110 return; 10111 10112 xml_node_struct* cur = n->parent; 10113 10114 while (cur) 10115 { 10116 if (step_push(ns, cur, alloc) & once) 10117 return; 10118 10119 cur = cur->parent; 10120 } 10121 10122 break; 10123 } 10124 10125 case axis_self: 10126 { 10127 step_push(ns, n, alloc); 10128 10129 break; 10130 } 10131 10132 case axis_parent: 10133 { 10134 if (n->parent) 10135 step_push(ns, n->parent, alloc); 10136 10137 break; 10138 } 10139 10140 default: 10141 assert(false && "Unimplemented axis"); // unreachable 10142 } 10143 } 10144 step_fill(xpath_node_set_raw & ns,xml_attribute_struct * a,xml_node_struct * p,xpath_allocator * alloc,bool once,T v)10145 template <class T> void step_fill(xpath_node_set_raw& ns, xml_attribute_struct* a, xml_node_struct* p, xpath_allocator* alloc, bool once, T v) 10146 { 10147 const axis_t axis = T::axis; 10148 10149 switch (axis) 10150 { 10151 case axis_ancestor: 10152 case axis_ancestor_or_self: 10153 { 10154 if (axis == axis_ancestor_or_self && _test == nodetest_type_node) // reject attributes based on principal node type test 10155 if (step_push(ns, a, p, alloc) & once) 10156 return; 10157 10158 xml_node_struct* cur = p; 10159 10160 while (cur) 10161 { 10162 if (step_push(ns, cur, alloc) & once) 10163 return; 10164 10165 cur = cur->parent; 10166 } 10167 10168 break; 10169 } 10170 10171 case axis_descendant_or_self: 10172 case axis_self: 10173 { 10174 if (_test == nodetest_type_node) // reject attributes based on principal node type test 10175 step_push(ns, a, p, alloc); 10176 10177 break; 10178 } 10179 10180 case axis_following: 10181 { 10182 xml_node_struct* cur = p; 10183 10184 while (cur) 10185 { 10186 if (cur->first_child) 10187 cur = cur->first_child; 10188 else 10189 { 10190 while (!cur->next_sibling) 10191 { 10192 cur = cur->parent; 10193 10194 if (!cur) return; 10195 } 10196 10197 cur = cur->next_sibling; 10198 } 10199 10200 if (step_push(ns, cur, alloc) & once) 10201 return; 10202 } 10203 10204 break; 10205 } 10206 10207 case axis_parent: 10208 { 10209 step_push(ns, p, alloc); 10210 10211 break; 10212 } 10213 10214 case axis_preceding: 10215 { 10216 // preceding:: axis does not include attribute nodes and attribute ancestors (they are the same as parent's ancestors), so we can reuse node preceding 10217 step_fill(ns, p, alloc, once, v); 10218 break; 10219 } 10220 10221 default: 10222 assert(false && "Unimplemented axis"); // unreachable 10223 } 10224 } 10225 step_fill(xpath_node_set_raw & ns,const xpath_node & xn,xpath_allocator * alloc,bool once,T v)10226 template <class T> void step_fill(xpath_node_set_raw& ns, const xpath_node& xn, xpath_allocator* alloc, bool once, T v) 10227 { 10228 const axis_t axis = T::axis; 10229 const bool axis_has_attributes = (axis == axis_ancestor || axis == axis_ancestor_or_self || axis == axis_descendant_or_self || axis == axis_following || axis == axis_parent || axis == axis_preceding || axis == axis_self); 10230 10231 if (xn.node()) 10232 step_fill(ns, xn.node().internal_object(), alloc, once, v); 10233 else if (axis_has_attributes && xn.attribute() && xn.parent()) 10234 step_fill(ns, xn.attribute().internal_object(), xn.parent().internal_object(), alloc, once, v); 10235 } 10236 step_do(const xpath_context & c,const xpath_stack & stack,nodeset_eval_t eval,T v)10237 template <class T> xpath_node_set_raw step_do(const xpath_context& c, const xpath_stack& stack, nodeset_eval_t eval, T v) 10238 { 10239 const axis_t axis = T::axis; 10240 const bool axis_reverse = (axis == axis_ancestor || axis == axis_ancestor_or_self || axis == axis_preceding || axis == axis_preceding_sibling); 10241 const xpath_node_set::type_t axis_type = axis_reverse ? xpath_node_set::type_sorted_reverse : xpath_node_set::type_sorted; 10242 10243 bool once = 10244 (axis == axis_attribute && _test == nodetest_name) || 10245 (!_right && eval_once(axis_type, eval)) || 10246 // coverity[mixed_enums] 10247 (_right && !_right->_next && _right->_test == predicate_constant_one); 10248 10249 xpath_node_set_raw ns; 10250 ns.set_type(axis_type); 10251 10252 if (_left) 10253 { 10254 xpath_node_set_raw s = _left->eval_node_set(c, stack, nodeset_eval_all); 10255 10256 // self axis preserves the original order 10257 if (axis == axis_self) ns.set_type(s.type()); 10258 10259 for (const xpath_node* it = s.begin(); it != s.end(); ++it) 10260 { 10261 size_t size = ns.size(); 10262 10263 // in general, all axes generate elements in a particular order, but there is no order guarantee if axis is applied to two nodes 10264 if (axis != axis_self && size != 0) ns.set_type(xpath_node_set::type_unsorted); 10265 10266 step_fill(ns, *it, stack.result, once, v); 10267 if (_right) apply_predicates(ns, size, stack, eval); 10268 } 10269 } 10270 else 10271 { 10272 step_fill(ns, c.n, stack.result, once, v); 10273 if (_right) apply_predicates(ns, 0, stack, eval); 10274 } 10275 10276 // child, attribute and self axes always generate unique set of nodes 10277 // for other axis, if the set stayed sorted, it stayed unique because the traversal algorithms do not visit the same node twice 10278 if (axis != axis_child && axis != axis_attribute && axis != axis_self && ns.type() == xpath_node_set::type_unsorted) 10279 ns.remove_duplicates(stack.temp); 10280 10281 return ns; 10282 } 10283 10284 public: xpath_ast_node(ast_type_t type,xpath_value_type rettype_,const char_t * value)10285 xpath_ast_node(ast_type_t type, xpath_value_type rettype_, const char_t* value): 10286 _type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(0), _right(0), _next(0) 10287 { 10288 assert(type == ast_string_constant); 10289 _data.string = value; 10290 } 10291 xpath_ast_node(ast_type_t type,xpath_value_type rettype_,double value)10292 xpath_ast_node(ast_type_t type, xpath_value_type rettype_, double value): 10293 _type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(0), _right(0), _next(0) 10294 { 10295 assert(type == ast_number_constant); 10296 _data.number = value; 10297 } 10298 xpath_ast_node(ast_type_t type,xpath_value_type rettype_,xpath_variable * value)10299 xpath_ast_node(ast_type_t type, xpath_value_type rettype_, xpath_variable* value): 10300 _type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(0), _right(0), _next(0) 10301 { 10302 assert(type == ast_variable); 10303 _data.variable = value; 10304 } 10305 xpath_ast_node(ast_type_t type,xpath_value_type rettype_,xpath_ast_node * left=0,xpath_ast_node * right=0)10306 xpath_ast_node(ast_type_t type, xpath_value_type rettype_, xpath_ast_node* left = 0, xpath_ast_node* right = 0): 10307 _type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(left), _right(right), _next(0) 10308 { 10309 } 10310 xpath_ast_node(ast_type_t type,xpath_ast_node * left,axis_t axis,nodetest_t test,const char_t * contents)10311 xpath_ast_node(ast_type_t type, xpath_ast_node* left, axis_t axis, nodetest_t test, const char_t* contents): 10312 _type(static_cast<char>(type)), _rettype(xpath_type_node_set), _axis(static_cast<char>(axis)), _test(static_cast<char>(test)), _left(left), _right(0), _next(0) 10313 { 10314 assert(type == ast_step); 10315 _data.nodetest = contents; 10316 } 10317 xpath_ast_node(ast_type_t type,xpath_ast_node * left,xpath_ast_node * right,predicate_t test)10318 xpath_ast_node(ast_type_t type, xpath_ast_node* left, xpath_ast_node* right, predicate_t test): 10319 _type(static_cast<char>(type)), _rettype(xpath_type_node_set), _axis(0), _test(static_cast<char>(test)), _left(left), _right(right), _next(0) 10320 { 10321 assert(type == ast_filter || type == ast_predicate); 10322 } 10323 set_next(xpath_ast_node * value)10324 void set_next(xpath_ast_node* value) 10325 { 10326 _next = value; 10327 } 10328 set_right(xpath_ast_node * value)10329 void set_right(xpath_ast_node* value) 10330 { 10331 _right = value; 10332 } 10333 eval_boolean(const xpath_context & c,const xpath_stack & stack)10334 bool eval_boolean(const xpath_context& c, const xpath_stack& stack) 10335 { 10336 switch (_type) 10337 { 10338 case ast_op_or: 10339 return _left->eval_boolean(c, stack) || _right->eval_boolean(c, stack); 10340 10341 case ast_op_and: 10342 return _left->eval_boolean(c, stack) && _right->eval_boolean(c, stack); 10343 10344 case ast_op_equal: 10345 return compare_eq(_left, _right, c, stack, equal_to()); 10346 10347 case ast_op_not_equal: 10348 return compare_eq(_left, _right, c, stack, not_equal_to()); 10349 10350 case ast_op_less: 10351 return compare_rel(_left, _right, c, stack, less()); 10352 10353 case ast_op_greater: 10354 return compare_rel(_right, _left, c, stack, less()); 10355 10356 case ast_op_less_or_equal: 10357 return compare_rel(_left, _right, c, stack, less_equal()); 10358 10359 case ast_op_greater_or_equal: 10360 return compare_rel(_right, _left, c, stack, less_equal()); 10361 10362 case ast_func_starts_with: 10363 { 10364 xpath_allocator_capture cr(stack.result); 10365 10366 xpath_string lr = _left->eval_string(c, stack); 10367 xpath_string rr = _right->eval_string(c, stack); 10368 10369 return starts_with(lr.c_str(), rr.c_str()); 10370 } 10371 10372 case ast_func_contains: 10373 { 10374 xpath_allocator_capture cr(stack.result); 10375 10376 xpath_string lr = _left->eval_string(c, stack); 10377 xpath_string rr = _right->eval_string(c, stack); 10378 10379 return find_substring(lr.c_str(), rr.c_str()) != 0; 10380 } 10381 10382 case ast_func_boolean: 10383 return _left->eval_boolean(c, stack); 10384 10385 case ast_func_not: 10386 return !_left->eval_boolean(c, stack); 10387 10388 case ast_func_true: 10389 return true; 10390 10391 case ast_func_false: 10392 return false; 10393 10394 case ast_func_lang: 10395 { 10396 if (c.n.attribute()) return false; 10397 10398 xpath_allocator_capture cr(stack.result); 10399 10400 xpath_string lang = _left->eval_string(c, stack); 10401 10402 for (xml_node n = c.n.node(); n; n = n.parent()) 10403 { 10404 xml_attribute a = n.attribute(PUGIXML_TEXT("xml:lang")); 10405 10406 if (a) 10407 { 10408 const char_t* value = a.value(); 10409 10410 // strnicmp / strncasecmp is not portable 10411 for (const char_t* lit = lang.c_str(); *lit; ++lit) 10412 { 10413 if (tolower_ascii(*lit) != tolower_ascii(*value)) return false; 10414 ++value; 10415 } 10416 10417 return *value == 0 || *value == '-'; 10418 } 10419 } 10420 10421 return false; 10422 } 10423 10424 case ast_opt_compare_attribute: 10425 { 10426 const char_t* value = (_right->_type == ast_string_constant) ? _right->_data.string : _right->_data.variable->get_string(); 10427 10428 xml_attribute attr = c.n.node().attribute(_left->_data.nodetest); 10429 10430 return attr && strequal(attr.value(), value) && is_xpath_attribute(attr.name()); 10431 } 10432 10433 case ast_variable: 10434 { 10435 assert(_rettype == _data.variable->type()); 10436 10437 if (_rettype == xpath_type_boolean) 10438 return _data.variable->get_boolean(); 10439 10440 // variable needs to be converted to the correct type, this is handled by the fallthrough block below 10441 break; 10442 } 10443 10444 default: 10445 ; 10446 } 10447 10448 // none of the ast types that return the value directly matched, we need to perform type conversion 10449 switch (_rettype) 10450 { 10451 case xpath_type_number: 10452 return convert_number_to_boolean(eval_number(c, stack)); 10453 10454 case xpath_type_string: 10455 { 10456 xpath_allocator_capture cr(stack.result); 10457 10458 return !eval_string(c, stack).empty(); 10459 } 10460 10461 case xpath_type_node_set: 10462 { 10463 xpath_allocator_capture cr(stack.result); 10464 10465 return !eval_node_set(c, stack, nodeset_eval_any).empty(); 10466 } 10467 10468 default: 10469 assert(false && "Wrong expression for return type boolean"); // unreachable 10470 return false; 10471 } 10472 } 10473 eval_number(const xpath_context & c,const xpath_stack & stack)10474 double eval_number(const xpath_context& c, const xpath_stack& stack) 10475 { 10476 switch (_type) 10477 { 10478 case ast_op_add: 10479 return _left->eval_number(c, stack) + _right->eval_number(c, stack); 10480 10481 case ast_op_subtract: 10482 return _left->eval_number(c, stack) - _right->eval_number(c, stack); 10483 10484 case ast_op_multiply: 10485 return _left->eval_number(c, stack) * _right->eval_number(c, stack); 10486 10487 case ast_op_divide: 10488 return _left->eval_number(c, stack) / _right->eval_number(c, stack); 10489 10490 case ast_op_mod: 10491 return fmod(_left->eval_number(c, stack), _right->eval_number(c, stack)); 10492 10493 case ast_op_negate: 10494 return -_left->eval_number(c, stack); 10495 10496 case ast_number_constant: 10497 return _data.number; 10498 10499 case ast_func_last: 10500 return static_cast<double>(c.size); 10501 10502 case ast_func_position: 10503 return static_cast<double>(c.position); 10504 10505 case ast_func_count: 10506 { 10507 xpath_allocator_capture cr(stack.result); 10508 10509 return static_cast<double>(_left->eval_node_set(c, stack, nodeset_eval_all).size()); 10510 } 10511 10512 case ast_func_string_length_0: 10513 { 10514 xpath_allocator_capture cr(stack.result); 10515 10516 return static_cast<double>(string_value(c.n, stack.result).length()); 10517 } 10518 10519 case ast_func_string_length_1: 10520 { 10521 xpath_allocator_capture cr(stack.result); 10522 10523 return static_cast<double>(_left->eval_string(c, stack).length()); 10524 } 10525 10526 case ast_func_number_0: 10527 { 10528 xpath_allocator_capture cr(stack.result); 10529 10530 return convert_string_to_number(string_value(c.n, stack.result).c_str()); 10531 } 10532 10533 case ast_func_number_1: 10534 return _left->eval_number(c, stack); 10535 10536 case ast_func_sum: 10537 { 10538 xpath_allocator_capture cr(stack.result); 10539 10540 double r = 0; 10541 10542 xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_all); 10543 10544 for (const xpath_node* it = ns.begin(); it != ns.end(); ++it) 10545 { 10546 xpath_allocator_capture cri(stack.result); 10547 10548 r += convert_string_to_number(string_value(*it, stack.result).c_str()); 10549 } 10550 10551 return r; 10552 } 10553 10554 case ast_func_floor: 10555 { 10556 double r = _left->eval_number(c, stack); 10557 10558 return r == r ? floor(r) : r; 10559 } 10560 10561 case ast_func_ceiling: 10562 { 10563 double r = _left->eval_number(c, stack); 10564 10565 return r == r ? ceil(r) : r; 10566 } 10567 10568 case ast_func_round: 10569 return round_nearest_nzero(_left->eval_number(c, stack)); 10570 10571 case ast_variable: 10572 { 10573 assert(_rettype == _data.variable->type()); 10574 10575 if (_rettype == xpath_type_number) 10576 return _data.variable->get_number(); 10577 10578 // variable needs to be converted to the correct type, this is handled by the fallthrough block below 10579 break; 10580 } 10581 10582 default: 10583 ; 10584 } 10585 10586 // none of the ast types that return the value directly matched, we need to perform type conversion 10587 switch (_rettype) 10588 { 10589 case xpath_type_boolean: 10590 return eval_boolean(c, stack) ? 1 : 0; 10591 10592 case xpath_type_string: 10593 { 10594 xpath_allocator_capture cr(stack.result); 10595 10596 return convert_string_to_number(eval_string(c, stack).c_str()); 10597 } 10598 10599 case xpath_type_node_set: 10600 { 10601 xpath_allocator_capture cr(stack.result); 10602 10603 return convert_string_to_number(eval_string(c, stack).c_str()); 10604 } 10605 10606 default: 10607 assert(false && "Wrong expression for return type number"); // unreachable 10608 return 0; 10609 } 10610 } 10611 eval_string_concat(const xpath_context & c,const xpath_stack & stack)10612 xpath_string eval_string_concat(const xpath_context& c, const xpath_stack& stack) 10613 { 10614 assert(_type == ast_func_concat); 10615 10616 xpath_allocator_capture ct(stack.temp); 10617 10618 // count the string number 10619 size_t count = 1; 10620 for (xpath_ast_node* nc = _right; nc; nc = nc->_next) count++; 10621 10622 // allocate a buffer for temporary string objects 10623 xpath_string* buffer = static_cast<xpath_string*>(stack.temp->allocate(count * sizeof(xpath_string))); 10624 if (!buffer) return xpath_string(); 10625 10626 // evaluate all strings to temporary stack 10627 xpath_stack swapped_stack = {stack.temp, stack.result}; 10628 10629 buffer[0] = _left->eval_string(c, swapped_stack); 10630 10631 size_t pos = 1; 10632 for (xpath_ast_node* n = _right; n; n = n->_next, ++pos) buffer[pos] = n->eval_string(c, swapped_stack); 10633 assert(pos == count); 10634 10635 // get total length 10636 size_t length = 0; 10637 for (size_t i = 0; i < count; ++i) length += buffer[i].length(); 10638 10639 // create final string 10640 char_t* result = static_cast<char_t*>(stack.result->allocate((length + 1) * sizeof(char_t))); 10641 if (!result) return xpath_string(); 10642 10643 char_t* ri = result; 10644 10645 for (size_t j = 0; j < count; ++j) 10646 for (const char_t* bi = buffer[j].c_str(); *bi; ++bi) 10647 *ri++ = *bi; 10648 10649 *ri = 0; 10650 10651 return xpath_string::from_heap_preallocated(result, ri); 10652 } 10653 eval_string(const xpath_context & c,const xpath_stack & stack)10654 xpath_string eval_string(const xpath_context& c, const xpath_stack& stack) 10655 { 10656 switch (_type) 10657 { 10658 case ast_string_constant: 10659 return xpath_string::from_const(_data.string); 10660 10661 case ast_func_local_name_0: 10662 { 10663 xpath_node na = c.n; 10664 10665 return xpath_string::from_const(local_name(na)); 10666 } 10667 10668 case ast_func_local_name_1: 10669 { 10670 xpath_allocator_capture cr(stack.result); 10671 10672 xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_first); 10673 xpath_node na = ns.first(); 10674 10675 return xpath_string::from_const(local_name(na)); 10676 } 10677 10678 case ast_func_name_0: 10679 { 10680 xpath_node na = c.n; 10681 10682 return xpath_string::from_const(qualified_name(na)); 10683 } 10684 10685 case ast_func_name_1: 10686 { 10687 xpath_allocator_capture cr(stack.result); 10688 10689 xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_first); 10690 xpath_node na = ns.first(); 10691 10692 return xpath_string::from_const(qualified_name(na)); 10693 } 10694 10695 case ast_func_namespace_uri_0: 10696 { 10697 xpath_node na = c.n; 10698 10699 return xpath_string::from_const(namespace_uri(na)); 10700 } 10701 10702 case ast_func_namespace_uri_1: 10703 { 10704 xpath_allocator_capture cr(stack.result); 10705 10706 xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_first); 10707 xpath_node na = ns.first(); 10708 10709 return xpath_string::from_const(namespace_uri(na)); 10710 } 10711 10712 case ast_func_string_0: 10713 return string_value(c.n, stack.result); 10714 10715 case ast_func_string_1: 10716 return _left->eval_string(c, stack); 10717 10718 case ast_func_concat: 10719 return eval_string_concat(c, stack); 10720 10721 case ast_func_substring_before: 10722 { 10723 xpath_allocator_capture cr(stack.temp); 10724 10725 xpath_stack swapped_stack = {stack.temp, stack.result}; 10726 10727 xpath_string s = _left->eval_string(c, swapped_stack); 10728 xpath_string p = _right->eval_string(c, swapped_stack); 10729 10730 const char_t* pos = find_substring(s.c_str(), p.c_str()); 10731 10732 return pos ? xpath_string::from_heap(s.c_str(), pos, stack.result) : xpath_string(); 10733 } 10734 10735 case ast_func_substring_after: 10736 { 10737 xpath_allocator_capture cr(stack.temp); 10738 10739 xpath_stack swapped_stack = {stack.temp, stack.result}; 10740 10741 xpath_string s = _left->eval_string(c, swapped_stack); 10742 xpath_string p = _right->eval_string(c, swapped_stack); 10743 10744 const char_t* pos = find_substring(s.c_str(), p.c_str()); 10745 if (!pos) return xpath_string(); 10746 10747 const char_t* rbegin = pos + p.length(); 10748 const char_t* rend = s.c_str() + s.length(); 10749 10750 return s.uses_heap() ? xpath_string::from_heap(rbegin, rend, stack.result) : xpath_string::from_const(rbegin); 10751 } 10752 10753 case ast_func_substring_2: 10754 { 10755 xpath_allocator_capture cr(stack.temp); 10756 10757 xpath_stack swapped_stack = {stack.temp, stack.result}; 10758 10759 xpath_string s = _left->eval_string(c, swapped_stack); 10760 size_t s_length = s.length(); 10761 10762 double first = round_nearest(_right->eval_number(c, stack)); 10763 10764 if (is_nan(first)) return xpath_string(); // NaN 10765 else if (first >= static_cast<double>(s_length + 1)) return xpath_string(); 10766 10767 size_t pos = first < 1 ? 1 : static_cast<size_t>(first); 10768 assert(1 <= pos && pos <= s_length + 1); 10769 10770 const char_t* rbegin = s.c_str() + (pos - 1); 10771 const char_t* rend = s.c_str() + s.length(); 10772 10773 return s.uses_heap() ? xpath_string::from_heap(rbegin, rend, stack.result) : xpath_string::from_const(rbegin); 10774 } 10775 10776 case ast_func_substring_3: 10777 { 10778 xpath_allocator_capture cr(stack.temp); 10779 10780 xpath_stack swapped_stack = {stack.temp, stack.result}; 10781 10782 xpath_string s = _left->eval_string(c, swapped_stack); 10783 size_t s_length = s.length(); 10784 10785 double first = round_nearest(_right->eval_number(c, stack)); 10786 double last = first + round_nearest(_right->_next->eval_number(c, stack)); 10787 10788 if (is_nan(first) || is_nan(last)) return xpath_string(); 10789 else if (first >= static_cast<double>(s_length + 1)) return xpath_string(); 10790 else if (first >= last) return xpath_string(); 10791 else if (last < 1) return xpath_string(); 10792 10793 size_t pos = first < 1 ? 1 : static_cast<size_t>(first); 10794 size_t end = last >= static_cast<double>(s_length + 1) ? s_length + 1 : static_cast<size_t>(last); 10795 10796 assert(1 <= pos && pos <= end && end <= s_length + 1); 10797 const char_t* rbegin = s.c_str() + (pos - 1); 10798 const char_t* rend = s.c_str() + (end - 1); 10799 10800 return (end == s_length + 1 && !s.uses_heap()) ? xpath_string::from_const(rbegin) : xpath_string::from_heap(rbegin, rend, stack.result); 10801 } 10802 10803 case ast_func_normalize_space_0: 10804 { 10805 xpath_string s = string_value(c.n, stack.result); 10806 10807 char_t* begin = s.data(stack.result); 10808 if (!begin) return xpath_string(); 10809 10810 char_t* end = normalize_space(begin); 10811 10812 return xpath_string::from_heap_preallocated(begin, end); 10813 } 10814 10815 case ast_func_normalize_space_1: 10816 { 10817 xpath_string s = _left->eval_string(c, stack); 10818 10819 char_t* begin = s.data(stack.result); 10820 if (!begin) return xpath_string(); 10821 10822 char_t* end = normalize_space(begin); 10823 10824 return xpath_string::from_heap_preallocated(begin, end); 10825 } 10826 10827 case ast_func_translate: 10828 { 10829 xpath_allocator_capture cr(stack.temp); 10830 10831 xpath_stack swapped_stack = {stack.temp, stack.result}; 10832 10833 xpath_string s = _left->eval_string(c, stack); 10834 xpath_string from = _right->eval_string(c, swapped_stack); 10835 xpath_string to = _right->_next->eval_string(c, swapped_stack); 10836 10837 char_t* begin = s.data(stack.result); 10838 if (!begin) return xpath_string(); 10839 10840 char_t* end = translate(begin, from.c_str(), to.c_str(), to.length()); 10841 10842 return xpath_string::from_heap_preallocated(begin, end); 10843 } 10844 10845 case ast_opt_translate_table: 10846 { 10847 xpath_string s = _left->eval_string(c, stack); 10848 10849 char_t* begin = s.data(stack.result); 10850 if (!begin) return xpath_string(); 10851 10852 char_t* end = translate_table(begin, _data.table); 10853 10854 return xpath_string::from_heap_preallocated(begin, end); 10855 } 10856 10857 case ast_variable: 10858 { 10859 assert(_rettype == _data.variable->type()); 10860 10861 if (_rettype == xpath_type_string) 10862 return xpath_string::from_const(_data.variable->get_string()); 10863 10864 // variable needs to be converted to the correct type, this is handled by the fallthrough block below 10865 break; 10866 } 10867 10868 default: 10869 ; 10870 } 10871 10872 // none of the ast types that return the value directly matched, we need to perform type conversion 10873 switch (_rettype) 10874 { 10875 case xpath_type_boolean: 10876 return xpath_string::from_const(eval_boolean(c, stack) ? PUGIXML_TEXT("true") : PUGIXML_TEXT("false")); 10877 10878 case xpath_type_number: 10879 return convert_number_to_string(eval_number(c, stack), stack.result); 10880 10881 case xpath_type_node_set: 10882 { 10883 xpath_allocator_capture cr(stack.temp); 10884 10885 xpath_stack swapped_stack = {stack.temp, stack.result}; 10886 10887 xpath_node_set_raw ns = eval_node_set(c, swapped_stack, nodeset_eval_first); 10888 return ns.empty() ? xpath_string() : string_value(ns.first(), stack.result); 10889 } 10890 10891 default: 10892 assert(false && "Wrong expression for return type string"); // unreachable 10893 return xpath_string(); 10894 } 10895 } 10896 eval_node_set(const xpath_context & c,const xpath_stack & stack,nodeset_eval_t eval)10897 xpath_node_set_raw eval_node_set(const xpath_context& c, const xpath_stack& stack, nodeset_eval_t eval) 10898 { 10899 switch (_type) 10900 { 10901 case ast_op_union: 10902 { 10903 xpath_allocator_capture cr(stack.temp); 10904 10905 xpath_stack swapped_stack = {stack.temp, stack.result}; 10906 10907 xpath_node_set_raw ls = _left->eval_node_set(c, stack, eval); 10908 xpath_node_set_raw rs = _right->eval_node_set(c, swapped_stack, eval); 10909 10910 // we can optimize merging two sorted sets, but this is a very rare operation, so don't bother 10911 ls.set_type(xpath_node_set::type_unsorted); 10912 10913 ls.append(rs.begin(), rs.end(), stack.result); 10914 ls.remove_duplicates(stack.temp); 10915 10916 return ls; 10917 } 10918 10919 case ast_filter: 10920 { 10921 xpath_node_set_raw set = _left->eval_node_set(c, stack, _test == predicate_constant_one ? nodeset_eval_first : nodeset_eval_all); 10922 10923 // either expression is a number or it contains position() call; sort by document order 10924 if (_test != predicate_posinv) set.sort_do(); 10925 10926 bool once = eval_once(set.type(), eval); 10927 10928 apply_predicate(set, 0, stack, once); 10929 10930 return set; 10931 } 10932 10933 case ast_func_id: 10934 return xpath_node_set_raw(); 10935 10936 case ast_step: 10937 { 10938 switch (_axis) 10939 { 10940 case axis_ancestor: 10941 return step_do(c, stack, eval, axis_to_type<axis_ancestor>()); 10942 10943 case axis_ancestor_or_self: 10944 return step_do(c, stack, eval, axis_to_type<axis_ancestor_or_self>()); 10945 10946 case axis_attribute: 10947 return step_do(c, stack, eval, axis_to_type<axis_attribute>()); 10948 10949 case axis_child: 10950 return step_do(c, stack, eval, axis_to_type<axis_child>()); 10951 10952 case axis_descendant: 10953 return step_do(c, stack, eval, axis_to_type<axis_descendant>()); 10954 10955 case axis_descendant_or_self: 10956 return step_do(c, stack, eval, axis_to_type<axis_descendant_or_self>()); 10957 10958 case axis_following: 10959 return step_do(c, stack, eval, axis_to_type<axis_following>()); 10960 10961 case axis_following_sibling: 10962 return step_do(c, stack, eval, axis_to_type<axis_following_sibling>()); 10963 10964 case axis_namespace: 10965 // namespaced axis is not supported 10966 return xpath_node_set_raw(); 10967 10968 case axis_parent: 10969 return step_do(c, stack, eval, axis_to_type<axis_parent>()); 10970 10971 case axis_preceding: 10972 return step_do(c, stack, eval, axis_to_type<axis_preceding>()); 10973 10974 case axis_preceding_sibling: 10975 return step_do(c, stack, eval, axis_to_type<axis_preceding_sibling>()); 10976 10977 case axis_self: 10978 return step_do(c, stack, eval, axis_to_type<axis_self>()); 10979 10980 default: 10981 assert(false && "Unknown axis"); // unreachable 10982 return xpath_node_set_raw(); 10983 } 10984 } 10985 10986 case ast_step_root: 10987 { 10988 assert(!_right); // root step can't have any predicates 10989 10990 xpath_node_set_raw ns; 10991 10992 ns.set_type(xpath_node_set::type_sorted); 10993 10994 if (c.n.node()) ns.push_back(c.n.node().root(), stack.result); 10995 else if (c.n.attribute()) ns.push_back(c.n.parent().root(), stack.result); 10996 10997 return ns; 10998 } 10999 11000 case ast_variable: 11001 { 11002 assert(_rettype == _data.variable->type()); 11003 11004 if (_rettype == xpath_type_node_set) 11005 { 11006 const xpath_node_set& s = _data.variable->get_node_set(); 11007 11008 xpath_node_set_raw ns; 11009 11010 ns.set_type(s.type()); 11011 ns.append(s.begin(), s.end(), stack.result); 11012 11013 return ns; 11014 } 11015 11016 // variable needs to be converted to the correct type, this is handled by the fallthrough block below 11017 break; 11018 } 11019 11020 default: 11021 ; 11022 } 11023 11024 // none of the ast types that return the value directly matched, but conversions to node set are invalid 11025 assert(false && "Wrong expression for return type node set"); // unreachable 11026 return xpath_node_set_raw(); 11027 } 11028 optimize(xpath_allocator * alloc)11029 void optimize(xpath_allocator* alloc) 11030 { 11031 if (_left) 11032 _left->optimize(alloc); 11033 11034 if (_right) 11035 _right->optimize(alloc); 11036 11037 if (_next) 11038 _next->optimize(alloc); 11039 11040 // coverity[var_deref_model] 11041 optimize_self(alloc); 11042 } 11043 optimize_self(xpath_allocator * alloc)11044 void optimize_self(xpath_allocator* alloc) 11045 { 11046 // Rewrite [position()=expr] with [expr] 11047 // Note that this step has to go before classification to recognize [position()=1] 11048 if ((_type == ast_filter || _type == ast_predicate) && 11049 _right && // workaround for clang static analyzer (_right is never null for ast_filter/ast_predicate) 11050 _right->_type == ast_op_equal && _right->_left->_type == ast_func_position && _right->_right->_rettype == xpath_type_number) 11051 { 11052 _right = _right->_right; 11053 } 11054 11055 // Classify filter/predicate ops to perform various optimizations during evaluation 11056 if ((_type == ast_filter || _type == ast_predicate) && _right) // workaround for clang static analyzer (_right is never null for ast_filter/ast_predicate) 11057 { 11058 assert(_test == predicate_default); 11059 11060 if (_right->_type == ast_number_constant && _right->_data.number == 1.0) 11061 _test = predicate_constant_one; 11062 else if (_right->_rettype == xpath_type_number && (_right->_type == ast_number_constant || _right->_type == ast_variable || _right->_type == ast_func_last)) 11063 _test = predicate_constant; 11064 else if (_right->_rettype != xpath_type_number && _right->is_posinv_expr()) 11065 _test = predicate_posinv; 11066 } 11067 11068 // Rewrite descendant-or-self::node()/child::foo with descendant::foo 11069 // The former is a full form of //foo, the latter is much faster since it executes the node test immediately 11070 // Do a similar kind of rewrite for self/descendant/descendant-or-self axes 11071 // Note that we only rewrite positionally invariant steps (//foo[1] != /descendant::foo[1]) 11072 if (_type == ast_step && (_axis == axis_child || _axis == axis_self || _axis == axis_descendant || _axis == axis_descendant_or_self) && 11073 _left && _left->_type == ast_step && _left->_axis == axis_descendant_or_self && _left->_test == nodetest_type_node && !_left->_right && 11074 is_posinv_step()) 11075 { 11076 if (_axis == axis_child || _axis == axis_descendant) 11077 _axis = axis_descendant; 11078 else 11079 _axis = axis_descendant_or_self; 11080 11081 _left = _left->_left; 11082 } 11083 11084 // Use optimized lookup table implementation for translate() with constant arguments 11085 if (_type == ast_func_translate && 11086 _right && // workaround for clang static analyzer (_right is never null for ast_func_translate) 11087 _right->_type == ast_string_constant && _right->_next->_type == ast_string_constant) 11088 { 11089 unsigned char* table = translate_table_generate(alloc, _right->_data.string, _right->_next->_data.string); 11090 11091 if (table) 11092 { 11093 _type = ast_opt_translate_table; 11094 _data.table = table; 11095 } 11096 } 11097 11098 // Use optimized path for @attr = 'value' or @attr = $value 11099 if (_type == ast_op_equal && 11100 _left && _right && // workaround for clang static analyzer and Coverity (_left and _right are never null for ast_op_equal) 11101 // coverity[mixed_enums] 11102 _left->_type == ast_step && _left->_axis == axis_attribute && _left->_test == nodetest_name && !_left->_left && !_left->_right && 11103 (_right->_type == ast_string_constant || (_right->_type == ast_variable && _right->_rettype == xpath_type_string))) 11104 { 11105 _type = ast_opt_compare_attribute; 11106 } 11107 } 11108 is_posinv_expr() const11109 bool is_posinv_expr() const 11110 { 11111 switch (_type) 11112 { 11113 case ast_func_position: 11114 case ast_func_last: 11115 return false; 11116 11117 case ast_string_constant: 11118 case ast_number_constant: 11119 case ast_variable: 11120 return true; 11121 11122 case ast_step: 11123 case ast_step_root: 11124 return true; 11125 11126 case ast_predicate: 11127 case ast_filter: 11128 return true; 11129 11130 default: 11131 if (_left && !_left->is_posinv_expr()) return false; 11132 11133 for (xpath_ast_node* n = _right; n; n = n->_next) 11134 if (!n->is_posinv_expr()) return false; 11135 11136 return true; 11137 } 11138 } 11139 is_posinv_step() const11140 bool is_posinv_step() const 11141 { 11142 assert(_type == ast_step); 11143 11144 for (xpath_ast_node* n = _right; n; n = n->_next) 11145 { 11146 assert(n->_type == ast_predicate); 11147 11148 if (n->_test != predicate_posinv) 11149 return false; 11150 } 11151 11152 return true; 11153 } 11154 rettype() const11155 xpath_value_type rettype() const 11156 { 11157 return static_cast<xpath_value_type>(_rettype); 11158 } 11159 }; 11160 11161 static const size_t xpath_ast_depth_limit = 11162 #ifdef PUGIXML_XPATH_DEPTH_LIMIT 11163 PUGIXML_XPATH_DEPTH_LIMIT 11164 #else 11165 1024 11166 #endif 11167 ; 11168 11169 struct xpath_parser 11170 { 11171 xpath_allocator* _alloc; 11172 xpath_lexer _lexer; 11173 11174 const char_t* _query; 11175 xpath_variable_set* _variables; 11176 11177 xpath_parse_result* _result; 11178 11179 char_t _scratch[32]; 11180 11181 size_t _depth; 11182 errorxpath_parser11183 xpath_ast_node* error(const char* message) 11184 { 11185 _result->error = message; 11186 _result->offset = _lexer.current_pos() - _query; 11187 11188 return 0; 11189 } 11190 error_oomxpath_parser11191 xpath_ast_node* error_oom() 11192 { 11193 assert(_alloc->_error); 11194 *_alloc->_error = true; 11195 11196 return 0; 11197 } 11198 error_recxpath_parser11199 xpath_ast_node* error_rec() 11200 { 11201 return error("Exceeded maximum allowed query depth"); 11202 } 11203 alloc_nodexpath_parser11204 void* alloc_node() 11205 { 11206 return _alloc->allocate(sizeof(xpath_ast_node)); 11207 } 11208 alloc_nodexpath_parser11209 xpath_ast_node* alloc_node(ast_type_t type, xpath_value_type rettype, const char_t* value) 11210 { 11211 void* memory = alloc_node(); 11212 return memory ? new (memory) xpath_ast_node(type, rettype, value) : 0; 11213 } 11214 alloc_nodexpath_parser11215 xpath_ast_node* alloc_node(ast_type_t type, xpath_value_type rettype, double value) 11216 { 11217 void* memory = alloc_node(); 11218 return memory ? new (memory) xpath_ast_node(type, rettype, value) : 0; 11219 } 11220 alloc_nodexpath_parser11221 xpath_ast_node* alloc_node(ast_type_t type, xpath_value_type rettype, xpath_variable* value) 11222 { 11223 void* memory = alloc_node(); 11224 return memory ? new (memory) xpath_ast_node(type, rettype, value) : 0; 11225 } 11226 alloc_nodexpath_parser11227 xpath_ast_node* alloc_node(ast_type_t type, xpath_value_type rettype, xpath_ast_node* left = 0, xpath_ast_node* right = 0) 11228 { 11229 void* memory = alloc_node(); 11230 return memory ? new (memory) xpath_ast_node(type, rettype, left, right) : 0; 11231 } 11232 alloc_nodexpath_parser11233 xpath_ast_node* alloc_node(ast_type_t type, xpath_ast_node* left, axis_t axis, nodetest_t test, const char_t* contents) 11234 { 11235 void* memory = alloc_node(); 11236 return memory ? new (memory) xpath_ast_node(type, left, axis, test, contents) : 0; 11237 } 11238 alloc_nodexpath_parser11239 xpath_ast_node* alloc_node(ast_type_t type, xpath_ast_node* left, xpath_ast_node* right, predicate_t test) 11240 { 11241 void* memory = alloc_node(); 11242 return memory ? new (memory) xpath_ast_node(type, left, right, test) : 0; 11243 } 11244 alloc_stringxpath_parser11245 const char_t* alloc_string(const xpath_lexer_string& value) 11246 { 11247 if (!value.begin) 11248 return PUGIXML_TEXT(""); 11249 11250 size_t length = static_cast<size_t>(value.end - value.begin); 11251 11252 char_t* c = static_cast<char_t*>(_alloc->allocate((length + 1) * sizeof(char_t))); 11253 if (!c) return 0; 11254 11255 memcpy(c, value.begin, length * sizeof(char_t)); 11256 c[length] = 0; 11257 11258 return c; 11259 } 11260 parse_functionxpath_parser11261 xpath_ast_node* parse_function(const xpath_lexer_string& name, size_t argc, xpath_ast_node* args[2]) 11262 { 11263 switch (name.begin[0]) 11264 { 11265 case 'b': 11266 if (name == PUGIXML_TEXT("boolean") && argc == 1) 11267 return alloc_node(ast_func_boolean, xpath_type_boolean, args[0]); 11268 11269 break; 11270 11271 case 'c': 11272 if (name == PUGIXML_TEXT("count") && argc == 1) 11273 { 11274 if (args[0]->rettype() != xpath_type_node_set) return error("Function has to be applied to node set"); 11275 return alloc_node(ast_func_count, xpath_type_number, args[0]); 11276 } 11277 else if (name == PUGIXML_TEXT("contains") && argc == 2) 11278 return alloc_node(ast_func_contains, xpath_type_boolean, args[0], args[1]); 11279 else if (name == PUGIXML_TEXT("concat") && argc >= 2) 11280 return alloc_node(ast_func_concat, xpath_type_string, args[0], args[1]); 11281 else if (name == PUGIXML_TEXT("ceiling") && argc == 1) 11282 return alloc_node(ast_func_ceiling, xpath_type_number, args[0]); 11283 11284 break; 11285 11286 case 'f': 11287 if (name == PUGIXML_TEXT("false") && argc == 0) 11288 return alloc_node(ast_func_false, xpath_type_boolean); 11289 else if (name == PUGIXML_TEXT("floor") && argc == 1) 11290 return alloc_node(ast_func_floor, xpath_type_number, args[0]); 11291 11292 break; 11293 11294 case 'i': 11295 if (name == PUGIXML_TEXT("id") && argc == 1) 11296 return alloc_node(ast_func_id, xpath_type_node_set, args[0]); 11297 11298 break; 11299 11300 case 'l': 11301 if (name == PUGIXML_TEXT("last") && argc == 0) 11302 return alloc_node(ast_func_last, xpath_type_number); 11303 else if (name == PUGIXML_TEXT("lang") && argc == 1) 11304 return alloc_node(ast_func_lang, xpath_type_boolean, args[0]); 11305 else if (name == PUGIXML_TEXT("local-name") && argc <= 1) 11306 { 11307 if (argc == 1 && args[0]->rettype() != xpath_type_node_set) return error("Function has to be applied to node set"); 11308 return alloc_node(argc == 0 ? ast_func_local_name_0 : ast_func_local_name_1, xpath_type_string, args[0]); 11309 } 11310 11311 break; 11312 11313 case 'n': 11314 if (name == PUGIXML_TEXT("name") && argc <= 1) 11315 { 11316 if (argc == 1 && args[0]->rettype() != xpath_type_node_set) return error("Function has to be applied to node set"); 11317 return alloc_node(argc == 0 ? ast_func_name_0 : ast_func_name_1, xpath_type_string, args[0]); 11318 } 11319 else if (name == PUGIXML_TEXT("namespace-uri") && argc <= 1) 11320 { 11321 if (argc == 1 && args[0]->rettype() != xpath_type_node_set) return error("Function has to be applied to node set"); 11322 return alloc_node(argc == 0 ? ast_func_namespace_uri_0 : ast_func_namespace_uri_1, xpath_type_string, args[0]); 11323 } 11324 else if (name == PUGIXML_TEXT("normalize-space") && argc <= 1) 11325 return alloc_node(argc == 0 ? ast_func_normalize_space_0 : ast_func_normalize_space_1, xpath_type_string, args[0], args[1]); 11326 else if (name == PUGIXML_TEXT("not") && argc == 1) 11327 return alloc_node(ast_func_not, xpath_type_boolean, args[0]); 11328 else if (name == PUGIXML_TEXT("number") && argc <= 1) 11329 return alloc_node(argc == 0 ? ast_func_number_0 : ast_func_number_1, xpath_type_number, args[0]); 11330 11331 break; 11332 11333 case 'p': 11334 if (name == PUGIXML_TEXT("position") && argc == 0) 11335 return alloc_node(ast_func_position, xpath_type_number); 11336 11337 break; 11338 11339 case 'r': 11340 if (name == PUGIXML_TEXT("round") && argc == 1) 11341 return alloc_node(ast_func_round, xpath_type_number, args[0]); 11342 11343 break; 11344 11345 case 's': 11346 if (name == PUGIXML_TEXT("string") && argc <= 1) 11347 return alloc_node(argc == 0 ? ast_func_string_0 : ast_func_string_1, xpath_type_string, args[0]); 11348 else if (name == PUGIXML_TEXT("string-length") && argc <= 1) 11349 return alloc_node(argc == 0 ? ast_func_string_length_0 : ast_func_string_length_1, xpath_type_number, args[0]); 11350 else if (name == PUGIXML_TEXT("starts-with") && argc == 2) 11351 return alloc_node(ast_func_starts_with, xpath_type_boolean, args[0], args[1]); 11352 else if (name == PUGIXML_TEXT("substring-before") && argc == 2) 11353 return alloc_node(ast_func_substring_before, xpath_type_string, args[0], args[1]); 11354 else if (name == PUGIXML_TEXT("substring-after") && argc == 2) 11355 return alloc_node(ast_func_substring_after, xpath_type_string, args[0], args[1]); 11356 else if (name == PUGIXML_TEXT("substring") && (argc == 2 || argc == 3)) 11357 return alloc_node(argc == 2 ? ast_func_substring_2 : ast_func_substring_3, xpath_type_string, args[0], args[1]); 11358 else if (name == PUGIXML_TEXT("sum") && argc == 1) 11359 { 11360 if (args[0]->rettype() != xpath_type_node_set) return error("Function has to be applied to node set"); 11361 return alloc_node(ast_func_sum, xpath_type_number, args[0]); 11362 } 11363 11364 break; 11365 11366 case 't': 11367 if (name == PUGIXML_TEXT("translate") && argc == 3) 11368 return alloc_node(ast_func_translate, xpath_type_string, args[0], args[1]); 11369 else if (name == PUGIXML_TEXT("true") && argc == 0) 11370 return alloc_node(ast_func_true, xpath_type_boolean); 11371 11372 break; 11373 11374 default: 11375 break; 11376 } 11377 11378 return error("Unrecognized function or wrong parameter count"); 11379 } 11380 parse_axis_namexpath_parser11381 axis_t parse_axis_name(const xpath_lexer_string& name, bool& specified) 11382 { 11383 specified = true; 11384 11385 switch (name.begin[0]) 11386 { 11387 case 'a': 11388 if (name == PUGIXML_TEXT("ancestor")) 11389 return axis_ancestor; 11390 else if (name == PUGIXML_TEXT("ancestor-or-self")) 11391 return axis_ancestor_or_self; 11392 else if (name == PUGIXML_TEXT("attribute")) 11393 return axis_attribute; 11394 11395 break; 11396 11397 case 'c': 11398 if (name == PUGIXML_TEXT("child")) 11399 return axis_child; 11400 11401 break; 11402 11403 case 'd': 11404 if (name == PUGIXML_TEXT("descendant")) 11405 return axis_descendant; 11406 else if (name == PUGIXML_TEXT("descendant-or-self")) 11407 return axis_descendant_or_self; 11408 11409 break; 11410 11411 case 'f': 11412 if (name == PUGIXML_TEXT("following")) 11413 return axis_following; 11414 else if (name == PUGIXML_TEXT("following-sibling")) 11415 return axis_following_sibling; 11416 11417 break; 11418 11419 case 'n': 11420 if (name == PUGIXML_TEXT("namespace")) 11421 return axis_namespace; 11422 11423 break; 11424 11425 case 'p': 11426 if (name == PUGIXML_TEXT("parent")) 11427 return axis_parent; 11428 else if (name == PUGIXML_TEXT("preceding")) 11429 return axis_preceding; 11430 else if (name == PUGIXML_TEXT("preceding-sibling")) 11431 return axis_preceding_sibling; 11432 11433 break; 11434 11435 case 's': 11436 if (name == PUGIXML_TEXT("self")) 11437 return axis_self; 11438 11439 break; 11440 11441 default: 11442 break; 11443 } 11444 11445 specified = false; 11446 return axis_child; 11447 } 11448 parse_node_test_typexpath_parser11449 nodetest_t parse_node_test_type(const xpath_lexer_string& name) 11450 { 11451 switch (name.begin[0]) 11452 { 11453 case 'c': 11454 if (name == PUGIXML_TEXT("comment")) 11455 return nodetest_type_comment; 11456 11457 break; 11458 11459 case 'n': 11460 if (name == PUGIXML_TEXT("node")) 11461 return nodetest_type_node; 11462 11463 break; 11464 11465 case 'p': 11466 if (name == PUGIXML_TEXT("processing-instruction")) 11467 return nodetest_type_pi; 11468 11469 break; 11470 11471 case 't': 11472 if (name == PUGIXML_TEXT("text")) 11473 return nodetest_type_text; 11474 11475 break; 11476 11477 default: 11478 break; 11479 } 11480 11481 return nodetest_none; 11482 } 11483 11484 // PrimaryExpr ::= VariableReference | '(' Expr ')' | Literal | Number | FunctionCall parse_primary_expressionxpath_parser11485 xpath_ast_node* parse_primary_expression() 11486 { 11487 switch (_lexer.current()) 11488 { 11489 case lex_var_ref: 11490 { 11491 xpath_lexer_string name = _lexer.contents(); 11492 11493 if (!_variables) 11494 return error("Unknown variable: variable set is not provided"); 11495 11496 xpath_variable* var = 0; 11497 if (!get_variable_scratch(_scratch, _variables, name.begin, name.end, &var)) 11498 return error_oom(); 11499 11500 if (!var) 11501 return error("Unknown variable: variable set does not contain the given name"); 11502 11503 _lexer.next(); 11504 11505 return alloc_node(ast_variable, var->type(), var); 11506 } 11507 11508 case lex_open_brace: 11509 { 11510 _lexer.next(); 11511 11512 xpath_ast_node* n = parse_expression(); 11513 if (!n) return 0; 11514 11515 if (_lexer.current() != lex_close_brace) 11516 return error("Expected ')' to match an opening '('"); 11517 11518 _lexer.next(); 11519 11520 return n; 11521 } 11522 11523 case lex_quoted_string: 11524 { 11525 const char_t* value = alloc_string(_lexer.contents()); 11526 if (!value) return 0; 11527 11528 _lexer.next(); 11529 11530 return alloc_node(ast_string_constant, xpath_type_string, value); 11531 } 11532 11533 case lex_number: 11534 { 11535 double value = 0; 11536 11537 if (!convert_string_to_number_scratch(_scratch, _lexer.contents().begin, _lexer.contents().end, &value)) 11538 return error_oom(); 11539 11540 _lexer.next(); 11541 11542 return alloc_node(ast_number_constant, xpath_type_number, value); 11543 } 11544 11545 case lex_string: 11546 { 11547 xpath_ast_node* args[2] = {0}; 11548 size_t argc = 0; 11549 11550 xpath_lexer_string function = _lexer.contents(); 11551 _lexer.next(); 11552 11553 xpath_ast_node* last_arg = 0; 11554 11555 if (_lexer.current() != lex_open_brace) 11556 return error("Unrecognized function call"); 11557 _lexer.next(); 11558 11559 size_t old_depth = _depth; 11560 11561 while (_lexer.current() != lex_close_brace) 11562 { 11563 if (argc > 0) 11564 { 11565 if (_lexer.current() != lex_comma) 11566 return error("No comma between function arguments"); 11567 _lexer.next(); 11568 } 11569 11570 if (++_depth > xpath_ast_depth_limit) 11571 return error_rec(); 11572 11573 xpath_ast_node* n = parse_expression(); 11574 if (!n) return 0; 11575 11576 if (argc < 2) args[argc] = n; 11577 else last_arg->set_next(n); 11578 11579 argc++; 11580 last_arg = n; 11581 } 11582 11583 _lexer.next(); 11584 11585 _depth = old_depth; 11586 11587 return parse_function(function, argc, args); 11588 } 11589 11590 default: 11591 return error("Unrecognizable primary expression"); 11592 } 11593 } 11594 11595 // FilterExpr ::= PrimaryExpr | FilterExpr Predicate 11596 // Predicate ::= '[' PredicateExpr ']' 11597 // PredicateExpr ::= Expr parse_filter_expressionxpath_parser11598 xpath_ast_node* parse_filter_expression() 11599 { 11600 xpath_ast_node* n = parse_primary_expression(); 11601 if (!n) return 0; 11602 11603 size_t old_depth = _depth; 11604 11605 while (_lexer.current() == lex_open_square_brace) 11606 { 11607 _lexer.next(); 11608 11609 if (++_depth > xpath_ast_depth_limit) 11610 return error_rec(); 11611 11612 if (n->rettype() != xpath_type_node_set) 11613 return error("Predicate has to be applied to node set"); 11614 11615 xpath_ast_node* expr = parse_expression(); 11616 if (!expr) return 0; 11617 11618 n = alloc_node(ast_filter, n, expr, predicate_default); 11619 if (!n) return 0; 11620 11621 if (_lexer.current() != lex_close_square_brace) 11622 return error("Expected ']' to match an opening '['"); 11623 11624 _lexer.next(); 11625 } 11626 11627 _depth = old_depth; 11628 11629 return n; 11630 } 11631 11632 // Step ::= AxisSpecifier NodeTest Predicate* | AbbreviatedStep 11633 // AxisSpecifier ::= AxisName '::' | '@'? 11634 // NodeTest ::= NameTest | NodeType '(' ')' | 'processing-instruction' '(' Literal ')' 11635 // NameTest ::= '*' | NCName ':' '*' | QName 11636 // AbbreviatedStep ::= '.' | '..' parse_stepxpath_parser11637 xpath_ast_node* parse_step(xpath_ast_node* set) 11638 { 11639 if (set && set->rettype() != xpath_type_node_set) 11640 return error("Step has to be applied to node set"); 11641 11642 bool axis_specified = false; 11643 axis_t axis = axis_child; // implied child axis 11644 11645 if (_lexer.current() == lex_axis_attribute) 11646 { 11647 axis = axis_attribute; 11648 axis_specified = true; 11649 11650 _lexer.next(); 11651 } 11652 else if (_lexer.current() == lex_dot) 11653 { 11654 _lexer.next(); 11655 11656 if (_lexer.current() == lex_open_square_brace) 11657 return error("Predicates are not allowed after an abbreviated step"); 11658 11659 return alloc_node(ast_step, set, axis_self, nodetest_type_node, 0); 11660 } 11661 else if (_lexer.current() == lex_double_dot) 11662 { 11663 _lexer.next(); 11664 11665 if (_lexer.current() == lex_open_square_brace) 11666 return error("Predicates are not allowed after an abbreviated step"); 11667 11668 return alloc_node(ast_step, set, axis_parent, nodetest_type_node, 0); 11669 } 11670 11671 nodetest_t nt_type = nodetest_none; 11672 xpath_lexer_string nt_name; 11673 11674 if (_lexer.current() == lex_string) 11675 { 11676 // node name test 11677 nt_name = _lexer.contents(); 11678 _lexer.next(); 11679 11680 // was it an axis name? 11681 if (_lexer.current() == lex_double_colon) 11682 { 11683 // parse axis name 11684 if (axis_specified) 11685 return error("Two axis specifiers in one step"); 11686 11687 axis = parse_axis_name(nt_name, axis_specified); 11688 11689 if (!axis_specified) 11690 return error("Unknown axis"); 11691 11692 // read actual node test 11693 _lexer.next(); 11694 11695 if (_lexer.current() == lex_multiply) 11696 { 11697 nt_type = nodetest_all; 11698 nt_name = xpath_lexer_string(); 11699 _lexer.next(); 11700 } 11701 else if (_lexer.current() == lex_string) 11702 { 11703 nt_name = _lexer.contents(); 11704 _lexer.next(); 11705 } 11706 else 11707 { 11708 return error("Unrecognized node test"); 11709 } 11710 } 11711 11712 if (nt_type == nodetest_none) 11713 { 11714 // node type test or processing-instruction 11715 if (_lexer.current() == lex_open_brace) 11716 { 11717 _lexer.next(); 11718 11719 if (_lexer.current() == lex_close_brace) 11720 { 11721 _lexer.next(); 11722 11723 nt_type = parse_node_test_type(nt_name); 11724 11725 if (nt_type == nodetest_none) 11726 return error("Unrecognized node type"); 11727 11728 nt_name = xpath_lexer_string(); 11729 } 11730 else if (nt_name == PUGIXML_TEXT("processing-instruction")) 11731 { 11732 if (_lexer.current() != lex_quoted_string) 11733 return error("Only literals are allowed as arguments to processing-instruction()"); 11734 11735 nt_type = nodetest_pi; 11736 nt_name = _lexer.contents(); 11737 _lexer.next(); 11738 11739 if (_lexer.current() != lex_close_brace) 11740 return error("Unmatched brace near processing-instruction()"); 11741 _lexer.next(); 11742 } 11743 else 11744 { 11745 return error("Unmatched brace near node type test"); 11746 } 11747 } 11748 // QName or NCName:* 11749 else 11750 { 11751 if (nt_name.end - nt_name.begin > 2 && nt_name.end[-2] == ':' && nt_name.end[-1] == '*') // NCName:* 11752 { 11753 nt_name.end--; // erase * 11754 11755 nt_type = nodetest_all_in_namespace; 11756 } 11757 else 11758 { 11759 nt_type = nodetest_name; 11760 } 11761 } 11762 } 11763 } 11764 else if (_lexer.current() == lex_multiply) 11765 { 11766 nt_type = nodetest_all; 11767 _lexer.next(); 11768 } 11769 else 11770 { 11771 return error("Unrecognized node test"); 11772 } 11773 11774 const char_t* nt_name_copy = alloc_string(nt_name); 11775 if (!nt_name_copy) return 0; 11776 11777 xpath_ast_node* n = alloc_node(ast_step, set, axis, nt_type, nt_name_copy); 11778 if (!n) return 0; 11779 11780 size_t old_depth = _depth; 11781 11782 xpath_ast_node* last = 0; 11783 11784 while (_lexer.current() == lex_open_square_brace) 11785 { 11786 _lexer.next(); 11787 11788 if (++_depth > xpath_ast_depth_limit) 11789 return error_rec(); 11790 11791 xpath_ast_node* expr = parse_expression(); 11792 if (!expr) return 0; 11793 11794 xpath_ast_node* pred = alloc_node(ast_predicate, 0, expr, predicate_default); 11795 if (!pred) return 0; 11796 11797 if (_lexer.current() != lex_close_square_brace) 11798 return error("Expected ']' to match an opening '['"); 11799 _lexer.next(); 11800 11801 if (last) last->set_next(pred); 11802 else n->set_right(pred); 11803 11804 last = pred; 11805 } 11806 11807 _depth = old_depth; 11808 11809 return n; 11810 } 11811 11812 // RelativeLocationPath ::= Step | RelativeLocationPath '/' Step | RelativeLocationPath '//' Step parse_relative_location_pathxpath_parser11813 xpath_ast_node* parse_relative_location_path(xpath_ast_node* set) 11814 { 11815 xpath_ast_node* n = parse_step(set); 11816 if (!n) return 0; 11817 11818 size_t old_depth = _depth; 11819 11820 while (_lexer.current() == lex_slash || _lexer.current() == lex_double_slash) 11821 { 11822 lexeme_t l = _lexer.current(); 11823 _lexer.next(); 11824 11825 if (++_depth > xpath_ast_depth_limit) 11826 return error_rec(); 11827 11828 if (l == lex_double_slash) 11829 { 11830 n = alloc_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0); 11831 if (!n) return 0; 11832 } 11833 11834 n = parse_step(n); 11835 if (!n) return 0; 11836 } 11837 11838 _depth = old_depth; 11839 11840 return n; 11841 } 11842 11843 // LocationPath ::= RelativeLocationPath | AbsoluteLocationPath 11844 // AbsoluteLocationPath ::= '/' RelativeLocationPath? | '//' RelativeLocationPath parse_location_pathxpath_parser11845 xpath_ast_node* parse_location_path() 11846 { 11847 if (_lexer.current() == lex_slash) 11848 { 11849 _lexer.next(); 11850 11851 xpath_ast_node* n = alloc_node(ast_step_root, xpath_type_node_set); 11852 if (!n) return 0; 11853 11854 // relative location path can start from axis_attribute, dot, double_dot, multiply and string lexemes; any other lexeme means standalone root path 11855 lexeme_t l = _lexer.current(); 11856 11857 if (l == lex_string || l == lex_axis_attribute || l == lex_dot || l == lex_double_dot || l == lex_multiply) 11858 return parse_relative_location_path(n); 11859 else 11860 return n; 11861 } 11862 else if (_lexer.current() == lex_double_slash) 11863 { 11864 _lexer.next(); 11865 11866 xpath_ast_node* n = alloc_node(ast_step_root, xpath_type_node_set); 11867 if (!n) return 0; 11868 11869 n = alloc_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0); 11870 if (!n) return 0; 11871 11872 return parse_relative_location_path(n); 11873 } 11874 11875 // else clause moved outside of if because of bogus warning 'control may reach end of non-void function being inlined' in gcc 4.0.1 11876 return parse_relative_location_path(0); 11877 } 11878 11879 // PathExpr ::= LocationPath 11880 // | FilterExpr 11881 // | FilterExpr '/' RelativeLocationPath 11882 // | FilterExpr '//' RelativeLocationPath 11883 // UnionExpr ::= PathExpr | UnionExpr '|' PathExpr 11884 // UnaryExpr ::= UnionExpr | '-' UnaryExpr parse_path_or_unary_expressionxpath_parser11885 xpath_ast_node* parse_path_or_unary_expression() 11886 { 11887 // Clarification. 11888 // PathExpr begins with either LocationPath or FilterExpr. 11889 // FilterExpr begins with PrimaryExpr 11890 // PrimaryExpr begins with '$' in case of it being a variable reference, 11891 // '(' in case of it being an expression, string literal, number constant or 11892 // function call. 11893 if (_lexer.current() == lex_var_ref || _lexer.current() == lex_open_brace || 11894 _lexer.current() == lex_quoted_string || _lexer.current() == lex_number || 11895 _lexer.current() == lex_string) 11896 { 11897 if (_lexer.current() == lex_string) 11898 { 11899 // This is either a function call, or not - if not, we shall proceed with location path 11900 const char_t* state = _lexer.state(); 11901 11902 while (PUGI__IS_CHARTYPE(*state, ct_space)) ++state; 11903 11904 if (*state != '(') 11905 return parse_location_path(); 11906 11907 // This looks like a function call; however this still can be a node-test. Check it. 11908 if (parse_node_test_type(_lexer.contents()) != nodetest_none) 11909 return parse_location_path(); 11910 } 11911 11912 xpath_ast_node* n = parse_filter_expression(); 11913 if (!n) return 0; 11914 11915 if (_lexer.current() == lex_slash || _lexer.current() == lex_double_slash) 11916 { 11917 lexeme_t l = _lexer.current(); 11918 _lexer.next(); 11919 11920 if (l == lex_double_slash) 11921 { 11922 if (n->rettype() != xpath_type_node_set) 11923 return error("Step has to be applied to node set"); 11924 11925 n = alloc_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0); 11926 if (!n) return 0; 11927 } 11928 11929 // select from location path 11930 return parse_relative_location_path(n); 11931 } 11932 11933 return n; 11934 } 11935 else if (_lexer.current() == lex_minus) 11936 { 11937 _lexer.next(); 11938 11939 // precedence 7+ - only parses union expressions 11940 xpath_ast_node* n = parse_expression(7); 11941 if (!n) return 0; 11942 11943 return alloc_node(ast_op_negate, xpath_type_number, n); 11944 } 11945 else 11946 { 11947 return parse_location_path(); 11948 } 11949 } 11950 11951 struct binary_op_t 11952 { 11953 ast_type_t asttype; 11954 xpath_value_type rettype; 11955 int precedence; 11956 binary_op_txpath_parser::binary_op_t11957 binary_op_t(): asttype(ast_unknown), rettype(xpath_type_none), precedence(0) 11958 { 11959 } 11960 binary_op_txpath_parser::binary_op_t11961 binary_op_t(ast_type_t asttype_, xpath_value_type rettype_, int precedence_): asttype(asttype_), rettype(rettype_), precedence(precedence_) 11962 { 11963 } 11964 parsexpath_parser::binary_op_t11965 static binary_op_t parse(xpath_lexer& lexer) 11966 { 11967 switch (lexer.current()) 11968 { 11969 case lex_string: 11970 if (lexer.contents() == PUGIXML_TEXT("or")) 11971 return binary_op_t(ast_op_or, xpath_type_boolean, 1); 11972 else if (lexer.contents() == PUGIXML_TEXT("and")) 11973 return binary_op_t(ast_op_and, xpath_type_boolean, 2); 11974 else if (lexer.contents() == PUGIXML_TEXT("div")) 11975 return binary_op_t(ast_op_divide, xpath_type_number, 6); 11976 else if (lexer.contents() == PUGIXML_TEXT("mod")) 11977 return binary_op_t(ast_op_mod, xpath_type_number, 6); 11978 else 11979 return binary_op_t(); 11980 11981 case lex_equal: 11982 return binary_op_t(ast_op_equal, xpath_type_boolean, 3); 11983 11984 case lex_not_equal: 11985 return binary_op_t(ast_op_not_equal, xpath_type_boolean, 3); 11986 11987 case lex_less: 11988 return binary_op_t(ast_op_less, xpath_type_boolean, 4); 11989 11990 case lex_greater: 11991 return binary_op_t(ast_op_greater, xpath_type_boolean, 4); 11992 11993 case lex_less_or_equal: 11994 return binary_op_t(ast_op_less_or_equal, xpath_type_boolean, 4); 11995 11996 case lex_greater_or_equal: 11997 return binary_op_t(ast_op_greater_or_equal, xpath_type_boolean, 4); 11998 11999 case lex_plus: 12000 return binary_op_t(ast_op_add, xpath_type_number, 5); 12001 12002 case lex_minus: 12003 return binary_op_t(ast_op_subtract, xpath_type_number, 5); 12004 12005 case lex_multiply: 12006 return binary_op_t(ast_op_multiply, xpath_type_number, 6); 12007 12008 case lex_union: 12009 return binary_op_t(ast_op_union, xpath_type_node_set, 7); 12010 12011 default: 12012 return binary_op_t(); 12013 } 12014 } 12015 }; 12016 parse_expression_recxpath_parser12017 xpath_ast_node* parse_expression_rec(xpath_ast_node* lhs, int limit) 12018 { 12019 binary_op_t op = binary_op_t::parse(_lexer); 12020 12021 while (op.asttype != ast_unknown && op.precedence >= limit) 12022 { 12023 _lexer.next(); 12024 12025 if (++_depth > xpath_ast_depth_limit) 12026 return error_rec(); 12027 12028 xpath_ast_node* rhs = parse_path_or_unary_expression(); 12029 if (!rhs) return 0; 12030 12031 binary_op_t nextop = binary_op_t::parse(_lexer); 12032 12033 while (nextop.asttype != ast_unknown && nextop.precedence > op.precedence) 12034 { 12035 rhs = parse_expression_rec(rhs, nextop.precedence); 12036 if (!rhs) return 0; 12037 12038 nextop = binary_op_t::parse(_lexer); 12039 } 12040 12041 if (op.asttype == ast_op_union && (lhs->rettype() != xpath_type_node_set || rhs->rettype() != xpath_type_node_set)) 12042 return error("Union operator has to be applied to node sets"); 12043 12044 lhs = alloc_node(op.asttype, op.rettype, lhs, rhs); 12045 if (!lhs) return 0; 12046 12047 op = binary_op_t::parse(_lexer); 12048 } 12049 12050 return lhs; 12051 } 12052 12053 // Expr ::= OrExpr 12054 // OrExpr ::= AndExpr | OrExpr 'or' AndExpr 12055 // AndExpr ::= EqualityExpr | AndExpr 'and' EqualityExpr 12056 // EqualityExpr ::= RelationalExpr 12057 // | EqualityExpr '=' RelationalExpr 12058 // | EqualityExpr '!=' RelationalExpr 12059 // RelationalExpr ::= AdditiveExpr 12060 // | RelationalExpr '<' AdditiveExpr 12061 // | RelationalExpr '>' AdditiveExpr 12062 // | RelationalExpr '<=' AdditiveExpr 12063 // | RelationalExpr '>=' AdditiveExpr 12064 // AdditiveExpr ::= MultiplicativeExpr 12065 // | AdditiveExpr '+' MultiplicativeExpr 12066 // | AdditiveExpr '-' MultiplicativeExpr 12067 // MultiplicativeExpr ::= UnaryExpr 12068 // | MultiplicativeExpr '*' UnaryExpr 12069 // | MultiplicativeExpr 'div' UnaryExpr 12070 // | MultiplicativeExpr 'mod' UnaryExpr parse_expressionxpath_parser12071 xpath_ast_node* parse_expression(int limit = 0) 12072 { 12073 size_t old_depth = _depth; 12074 12075 if (++_depth > xpath_ast_depth_limit) 12076 return error_rec(); 12077 12078 xpath_ast_node* n = parse_path_or_unary_expression(); 12079 if (!n) return 0; 12080 12081 n = parse_expression_rec(n, limit); 12082 12083 _depth = old_depth; 12084 12085 return n; 12086 } 12087 xpath_parserxpath_parser12088 xpath_parser(const char_t* query, xpath_variable_set* variables, xpath_allocator* alloc, xpath_parse_result* result): _alloc(alloc), _lexer(query), _query(query), _variables(variables), _result(result), _depth(0) 12089 { 12090 } 12091 parsexpath_parser12092 xpath_ast_node* parse() 12093 { 12094 xpath_ast_node* n = parse_expression(); 12095 if (!n) return 0; 12096 12097 assert(_depth == 0); 12098 12099 // check if there are unparsed tokens left 12100 if (_lexer.current() != lex_eof) 12101 return error("Incorrect query"); 12102 12103 return n; 12104 } 12105 parsexpath_parser12106 static xpath_ast_node* parse(const char_t* query, xpath_variable_set* variables, xpath_allocator* alloc, xpath_parse_result* result) 12107 { 12108 xpath_parser parser(query, variables, alloc, result); 12109 12110 return parser.parse(); 12111 } 12112 }; 12113 12114 struct xpath_query_impl 12115 { createxpath_query_impl12116 static xpath_query_impl* create() 12117 { 12118 void* memory = xml_memory::allocate(sizeof(xpath_query_impl)); 12119 if (!memory) return 0; 12120 12121 return new (memory) xpath_query_impl(); 12122 } 12123 destroyxpath_query_impl12124 static void destroy(xpath_query_impl* impl) 12125 { 12126 // free all allocated pages 12127 impl->alloc.release(); 12128 12129 // free allocator memory (with the first page) 12130 xml_memory::deallocate(impl); 12131 } 12132 xpath_query_implxpath_query_impl12133 xpath_query_impl(): root(0), alloc(&block, &oom), oom(false) 12134 { 12135 block.next = 0; 12136 block.capacity = sizeof(block.data); 12137 } 12138 12139 xpath_ast_node* root; 12140 xpath_allocator alloc; 12141 xpath_memory_block block; 12142 bool oom; 12143 }; 12144 evaluate_node_set_prepare(xpath_query_impl * impl)12145 PUGI__FN impl::xpath_ast_node* evaluate_node_set_prepare(xpath_query_impl* impl) 12146 { 12147 if (!impl) return 0; 12148 12149 if (impl->root->rettype() != xpath_type_node_set) 12150 { 12151 #ifdef PUGIXML_NO_EXCEPTIONS 12152 return 0; 12153 #else 12154 xpath_parse_result res; 12155 res.error = "Expression does not evaluate to node set"; 12156 12157 throw xpath_exception(res); 12158 #endif 12159 } 12160 12161 return impl->root; 12162 } 12163 PUGI__NS_END 12164 12165 namespace pugi 12166 { 12167 #ifndef PUGIXML_NO_EXCEPTIONS xpath_exception(const xpath_parse_result & result_)12168 PUGI__FN xpath_exception::xpath_exception(const xpath_parse_result& result_): _result(result_) 12169 { 12170 assert(_result.error); 12171 } 12172 what() const12173 PUGI__FN const char* xpath_exception::what() const throw() 12174 { 12175 return _result.error; 12176 } 12177 result() const12178 PUGI__FN const xpath_parse_result& xpath_exception::result() const 12179 { 12180 return _result; 12181 } 12182 #endif 12183 xpath_node()12184 PUGI__FN xpath_node::xpath_node() 12185 { 12186 } 12187 xpath_node(const xml_node & node_)12188 PUGI__FN xpath_node::xpath_node(const xml_node& node_): _node(node_) 12189 { 12190 } 12191 xpath_node(const xml_attribute & attribute_,const xml_node & parent_)12192 PUGI__FN xpath_node::xpath_node(const xml_attribute& attribute_, const xml_node& parent_): _node(attribute_ ? parent_ : xml_node()), _attribute(attribute_) 12193 { 12194 } 12195 node() const12196 PUGI__FN xml_node xpath_node::node() const 12197 { 12198 return _attribute ? xml_node() : _node; 12199 } 12200 attribute() const12201 PUGI__FN xml_attribute xpath_node::attribute() const 12202 { 12203 return _attribute; 12204 } 12205 parent() const12206 PUGI__FN xml_node xpath_node::parent() const 12207 { 12208 return _attribute ? _node : _node.parent(); 12209 } 12210 unspecified_bool_xpath_node(xpath_node ***)12211 PUGI__FN static void unspecified_bool_xpath_node(xpath_node***) 12212 { 12213 } 12214 operator xpath_node::unspecified_bool_type() const12215 PUGI__FN xpath_node::operator xpath_node::unspecified_bool_type() const 12216 { 12217 return (_node || _attribute) ? unspecified_bool_xpath_node : 0; 12218 } 12219 operator !() const12220 PUGI__FN bool xpath_node::operator!() const 12221 { 12222 return !(_node || _attribute); 12223 } 12224 operator ==(const xpath_node & n) const12225 PUGI__FN bool xpath_node::operator==(const xpath_node& n) const 12226 { 12227 return _node == n._node && _attribute == n._attribute; 12228 } 12229 operator !=(const xpath_node & n) const12230 PUGI__FN bool xpath_node::operator!=(const xpath_node& n) const 12231 { 12232 return _node != n._node || _attribute != n._attribute; 12233 } 12234 12235 #ifdef __BORLANDC__ operator &&(const xpath_node & lhs,bool rhs)12236 PUGI__FN bool operator&&(const xpath_node& lhs, bool rhs) 12237 { 12238 return (bool)lhs && rhs; 12239 } 12240 operator ||(const xpath_node & lhs,bool rhs)12241 PUGI__FN bool operator||(const xpath_node& lhs, bool rhs) 12242 { 12243 return (bool)lhs || rhs; 12244 } 12245 #endif 12246 _assign(const_iterator begin_,const_iterator end_,type_t type_)12247 PUGI__FN void xpath_node_set::_assign(const_iterator begin_, const_iterator end_, type_t type_) 12248 { 12249 assert(begin_ <= end_); 12250 12251 size_t size_ = static_cast<size_t>(end_ - begin_); 12252 12253 // use internal buffer for 0 or 1 elements, heap buffer otherwise 12254 xpath_node* storage = (size_ <= 1) ? _storage : static_cast<xpath_node*>(impl::xml_memory::allocate(size_ * sizeof(xpath_node))); 12255 12256 if (!storage) 12257 { 12258 #ifdef PUGIXML_NO_EXCEPTIONS 12259 return; 12260 #else 12261 throw std::bad_alloc(); 12262 #endif 12263 } 12264 12265 // deallocate old buffer 12266 if (_begin != _storage) 12267 impl::xml_memory::deallocate(_begin); 12268 12269 // size check is necessary because for begin_ = end_ = nullptr, memcpy is UB 12270 if (size_) 12271 memcpy(storage, begin_, size_ * sizeof(xpath_node)); 12272 12273 _begin = storage; 12274 _end = storage + size_; 12275 _type = type_; 12276 } 12277 12278 #ifdef PUGIXML_HAS_MOVE _move(xpath_node_set & rhs)12279 PUGI__FN void xpath_node_set::_move(xpath_node_set& rhs) PUGIXML_NOEXCEPT 12280 { 12281 _type = rhs._type; 12282 _storage[0] = rhs._storage[0]; 12283 _begin = (rhs._begin == rhs._storage) ? _storage : rhs._begin; 12284 _end = _begin + (rhs._end - rhs._begin); 12285 12286 rhs._type = type_unsorted; 12287 rhs._begin = rhs._storage; 12288 rhs._end = rhs._storage; 12289 } 12290 #endif 12291 xpath_node_set()12292 PUGI__FN xpath_node_set::xpath_node_set(): _type(type_unsorted), _begin(_storage), _end(_storage) 12293 { 12294 } 12295 xpath_node_set(const_iterator begin_,const_iterator end_,type_t type_)12296 PUGI__FN xpath_node_set::xpath_node_set(const_iterator begin_, const_iterator end_, type_t type_): _type(type_unsorted), _begin(_storage), _end(_storage) 12297 { 12298 _assign(begin_, end_, type_); 12299 } 12300 ~xpath_node_set()12301 PUGI__FN xpath_node_set::~xpath_node_set() 12302 { 12303 if (_begin != _storage) 12304 impl::xml_memory::deallocate(_begin); 12305 } 12306 xpath_node_set(const xpath_node_set & ns)12307 PUGI__FN xpath_node_set::xpath_node_set(const xpath_node_set& ns): _type(type_unsorted), _begin(_storage), _end(_storage) 12308 { 12309 _assign(ns._begin, ns._end, ns._type); 12310 } 12311 operator =(const xpath_node_set & ns)12312 PUGI__FN xpath_node_set& xpath_node_set::operator=(const xpath_node_set& ns) 12313 { 12314 if (this == &ns) return *this; 12315 12316 _assign(ns._begin, ns._end, ns._type); 12317 12318 return *this; 12319 } 12320 12321 #ifdef PUGIXML_HAS_MOVE xpath_node_set(xpath_node_set && rhs)12322 PUGI__FN xpath_node_set::xpath_node_set(xpath_node_set&& rhs) PUGIXML_NOEXCEPT: _type(type_unsorted), _begin(_storage), _end(_storage) 12323 { 12324 _move(rhs); 12325 } 12326 operator =(xpath_node_set && rhs)12327 PUGI__FN xpath_node_set& xpath_node_set::operator=(xpath_node_set&& rhs) PUGIXML_NOEXCEPT 12328 { 12329 if (this == &rhs) return *this; 12330 12331 if (_begin != _storage) 12332 impl::xml_memory::deallocate(_begin); 12333 12334 _move(rhs); 12335 12336 return *this; 12337 } 12338 #endif 12339 type() const12340 PUGI__FN xpath_node_set::type_t xpath_node_set::type() const 12341 { 12342 return _type; 12343 } 12344 size() const12345 PUGI__FN size_t xpath_node_set::size() const 12346 { 12347 return _end - _begin; 12348 } 12349 empty() const12350 PUGI__FN bool xpath_node_set::empty() const 12351 { 12352 return _begin == _end; 12353 } 12354 operator [](size_t index) const12355 PUGI__FN const xpath_node& xpath_node_set::operator[](size_t index) const 12356 { 12357 assert(index < size()); 12358 return _begin[index]; 12359 } 12360 begin() const12361 PUGI__FN xpath_node_set::const_iterator xpath_node_set::begin() const 12362 { 12363 return _begin; 12364 } 12365 end() const12366 PUGI__FN xpath_node_set::const_iterator xpath_node_set::end() const 12367 { 12368 return _end; 12369 } 12370 sort(bool reverse)12371 PUGI__FN void xpath_node_set::sort(bool reverse) 12372 { 12373 _type = impl::xpath_sort(_begin, _end, _type, reverse); 12374 } 12375 first() const12376 PUGI__FN xpath_node xpath_node_set::first() const 12377 { 12378 return impl::xpath_first(_begin, _end, _type); 12379 } 12380 xpath_parse_result()12381 PUGI__FN xpath_parse_result::xpath_parse_result(): error("Internal error"), offset(0) 12382 { 12383 } 12384 operator bool() const12385 PUGI__FN xpath_parse_result::operator bool() const 12386 { 12387 return error == 0; 12388 } 12389 description() const12390 PUGI__FN const char* xpath_parse_result::description() const 12391 { 12392 return error ? error : "No error"; 12393 } 12394 xpath_variable(xpath_value_type type_)12395 PUGI__FN xpath_variable::xpath_variable(xpath_value_type type_): _type(type_), _next(0) 12396 { 12397 } 12398 name() const12399 PUGI__FN const char_t* xpath_variable::name() const 12400 { 12401 switch (_type) 12402 { 12403 case xpath_type_node_set: 12404 return static_cast<const impl::xpath_variable_node_set*>(this)->name; 12405 12406 case xpath_type_number: 12407 return static_cast<const impl::xpath_variable_number*>(this)->name; 12408 12409 case xpath_type_string: 12410 return static_cast<const impl::xpath_variable_string*>(this)->name; 12411 12412 case xpath_type_boolean: 12413 return static_cast<const impl::xpath_variable_boolean*>(this)->name; 12414 12415 default: 12416 assert(false && "Invalid variable type"); // unreachable 12417 return 0; 12418 } 12419 } 12420 type() const12421 PUGI__FN xpath_value_type xpath_variable::type() const 12422 { 12423 return _type; 12424 } 12425 get_boolean() const12426 PUGI__FN bool xpath_variable::get_boolean() const 12427 { 12428 return (_type == xpath_type_boolean) ? static_cast<const impl::xpath_variable_boolean*>(this)->value : false; 12429 } 12430 get_number() const12431 PUGI__FN double xpath_variable::get_number() const 12432 { 12433 return (_type == xpath_type_number) ? static_cast<const impl::xpath_variable_number*>(this)->value : impl::gen_nan(); 12434 } 12435 get_string() const12436 PUGI__FN const char_t* xpath_variable::get_string() const 12437 { 12438 const char_t* value = (_type == xpath_type_string) ? static_cast<const impl::xpath_variable_string*>(this)->value : 0; 12439 return value ? value : PUGIXML_TEXT(""); 12440 } 12441 get_node_set() const12442 PUGI__FN const xpath_node_set& xpath_variable::get_node_set() const 12443 { 12444 return (_type == xpath_type_node_set) ? static_cast<const impl::xpath_variable_node_set*>(this)->value : impl::dummy_node_set; 12445 } 12446 set(bool value)12447 PUGI__FN bool xpath_variable::set(bool value) 12448 { 12449 if (_type != xpath_type_boolean) return false; 12450 12451 static_cast<impl::xpath_variable_boolean*>(this)->value = value; 12452 return true; 12453 } 12454 set(double value)12455 PUGI__FN bool xpath_variable::set(double value) 12456 { 12457 if (_type != xpath_type_number) return false; 12458 12459 static_cast<impl::xpath_variable_number*>(this)->value = value; 12460 return true; 12461 } 12462 set(const char_t * value)12463 PUGI__FN bool xpath_variable::set(const char_t* value) 12464 { 12465 if (_type != xpath_type_string) return false; 12466 12467 impl::xpath_variable_string* var = static_cast<impl::xpath_variable_string*>(this); 12468 12469 // duplicate string 12470 size_t size = (impl::strlength(value) + 1) * sizeof(char_t); 12471 12472 char_t* copy = static_cast<char_t*>(impl::xml_memory::allocate(size)); 12473 if (!copy) return false; 12474 12475 memcpy(copy, value, size); 12476 12477 // replace old string 12478 if (var->value) impl::xml_memory::deallocate(var->value); 12479 var->value = copy; 12480 12481 return true; 12482 } 12483 set(const xpath_node_set & value)12484 PUGI__FN bool xpath_variable::set(const xpath_node_set& value) 12485 { 12486 if (_type != xpath_type_node_set) return false; 12487 12488 static_cast<impl::xpath_variable_node_set*>(this)->value = value; 12489 return true; 12490 } 12491 xpath_variable_set()12492 PUGI__FN xpath_variable_set::xpath_variable_set() 12493 { 12494 for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i) 12495 _data[i] = 0; 12496 } 12497 ~xpath_variable_set()12498 PUGI__FN xpath_variable_set::~xpath_variable_set() 12499 { 12500 for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i) 12501 _destroy(_data[i]); 12502 } 12503 xpath_variable_set(const xpath_variable_set & rhs)12504 PUGI__FN xpath_variable_set::xpath_variable_set(const xpath_variable_set& rhs) 12505 { 12506 for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i) 12507 _data[i] = 0; 12508 12509 _assign(rhs); 12510 } 12511 operator =(const xpath_variable_set & rhs)12512 PUGI__FN xpath_variable_set& xpath_variable_set::operator=(const xpath_variable_set& rhs) 12513 { 12514 if (this == &rhs) return *this; 12515 12516 _assign(rhs); 12517 12518 return *this; 12519 } 12520 12521 #ifdef PUGIXML_HAS_MOVE xpath_variable_set(xpath_variable_set && rhs)12522 PUGI__FN xpath_variable_set::xpath_variable_set(xpath_variable_set&& rhs) PUGIXML_NOEXCEPT 12523 { 12524 for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i) 12525 { 12526 _data[i] = rhs._data[i]; 12527 rhs._data[i] = 0; 12528 } 12529 } 12530 operator =(xpath_variable_set && rhs)12531 PUGI__FN xpath_variable_set& xpath_variable_set::operator=(xpath_variable_set&& rhs) PUGIXML_NOEXCEPT 12532 { 12533 for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i) 12534 { 12535 _destroy(_data[i]); 12536 12537 _data[i] = rhs._data[i]; 12538 rhs._data[i] = 0; 12539 } 12540 12541 return *this; 12542 } 12543 #endif 12544 _assign(const xpath_variable_set & rhs)12545 PUGI__FN void xpath_variable_set::_assign(const xpath_variable_set& rhs) 12546 { 12547 xpath_variable_set temp; 12548 12549 for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i) 12550 if (rhs._data[i] && !_clone(rhs._data[i], &temp._data[i])) 12551 return; 12552 12553 _swap(temp); 12554 } 12555 _swap(xpath_variable_set & rhs)12556 PUGI__FN void xpath_variable_set::_swap(xpath_variable_set& rhs) 12557 { 12558 for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i) 12559 { 12560 xpath_variable* chain = _data[i]; 12561 12562 _data[i] = rhs._data[i]; 12563 rhs._data[i] = chain; 12564 } 12565 } 12566 _find(const char_t * name) const12567 PUGI__FN xpath_variable* xpath_variable_set::_find(const char_t* name) const 12568 { 12569 const size_t hash_size = sizeof(_data) / sizeof(_data[0]); 12570 size_t hash = impl::hash_string(name) % hash_size; 12571 12572 // look for existing variable 12573 for (xpath_variable* var = _data[hash]; var; var = var->_next) 12574 if (impl::strequal(var->name(), name)) 12575 return var; 12576 12577 return 0; 12578 } 12579 _clone(xpath_variable * var,xpath_variable ** out_result)12580 PUGI__FN bool xpath_variable_set::_clone(xpath_variable* var, xpath_variable** out_result) 12581 { 12582 xpath_variable* last = 0; 12583 12584 while (var) 12585 { 12586 // allocate storage for new variable 12587 xpath_variable* nvar = impl::new_xpath_variable(var->_type, var->name()); 12588 if (!nvar) return false; 12589 12590 // link the variable to the result immediately to handle failures gracefully 12591 if (last) 12592 last->_next = nvar; 12593 else 12594 *out_result = nvar; 12595 12596 last = nvar; 12597 12598 // copy the value; this can fail due to out-of-memory conditions 12599 if (!impl::copy_xpath_variable(nvar, var)) return false; 12600 12601 var = var->_next; 12602 } 12603 12604 return true; 12605 } 12606 _destroy(xpath_variable * var)12607 PUGI__FN void xpath_variable_set::_destroy(xpath_variable* var) 12608 { 12609 while (var) 12610 { 12611 xpath_variable* next = var->_next; 12612 12613 impl::delete_xpath_variable(var->_type, var); 12614 12615 var = next; 12616 } 12617 } 12618 add(const char_t * name,xpath_value_type type)12619 PUGI__FN xpath_variable* xpath_variable_set::add(const char_t* name, xpath_value_type type) 12620 { 12621 const size_t hash_size = sizeof(_data) / sizeof(_data[0]); 12622 size_t hash = impl::hash_string(name) % hash_size; 12623 12624 // look for existing variable 12625 for (xpath_variable* var = _data[hash]; var; var = var->_next) 12626 if (impl::strequal(var->name(), name)) 12627 return var->type() == type ? var : 0; 12628 12629 // add new variable 12630 xpath_variable* result = impl::new_xpath_variable(type, name); 12631 12632 if (result) 12633 { 12634 result->_next = _data[hash]; 12635 12636 _data[hash] = result; 12637 } 12638 12639 return result; 12640 } 12641 set(const char_t * name,bool value)12642 PUGI__FN bool xpath_variable_set::set(const char_t* name, bool value) 12643 { 12644 xpath_variable* var = add(name, xpath_type_boolean); 12645 return var ? var->set(value) : false; 12646 } 12647 set(const char_t * name,double value)12648 PUGI__FN bool xpath_variable_set::set(const char_t* name, double value) 12649 { 12650 xpath_variable* var = add(name, xpath_type_number); 12651 return var ? var->set(value) : false; 12652 } 12653 set(const char_t * name,const char_t * value)12654 PUGI__FN bool xpath_variable_set::set(const char_t* name, const char_t* value) 12655 { 12656 xpath_variable* var = add(name, xpath_type_string); 12657 return var ? var->set(value) : false; 12658 } 12659 set(const char_t * name,const xpath_node_set & value)12660 PUGI__FN bool xpath_variable_set::set(const char_t* name, const xpath_node_set& value) 12661 { 12662 xpath_variable* var = add(name, xpath_type_node_set); 12663 return var ? var->set(value) : false; 12664 } 12665 get(const char_t * name)12666 PUGI__FN xpath_variable* xpath_variable_set::get(const char_t* name) 12667 { 12668 return _find(name); 12669 } 12670 get(const char_t * name) const12671 PUGI__FN const xpath_variable* xpath_variable_set::get(const char_t* name) const 12672 { 12673 return _find(name); 12674 } 12675 xpath_query(const char_t * query,xpath_variable_set * variables)12676 PUGI__FN xpath_query::xpath_query(const char_t* query, xpath_variable_set* variables): _impl(0) 12677 { 12678 impl::xpath_query_impl* qimpl = impl::xpath_query_impl::create(); 12679 12680 if (!qimpl) 12681 { 12682 #ifdef PUGIXML_NO_EXCEPTIONS 12683 _result.error = "Out of memory"; 12684 #else 12685 throw std::bad_alloc(); 12686 #endif 12687 } 12688 else 12689 { 12690 using impl::auto_deleter; // MSVC7 workaround 12691 auto_deleter<impl::xpath_query_impl> impl(qimpl, impl::xpath_query_impl::destroy); 12692 12693 qimpl->root = impl::xpath_parser::parse(query, variables, &qimpl->alloc, &_result); 12694 12695 if (qimpl->root) 12696 { 12697 qimpl->root->optimize(&qimpl->alloc); 12698 12699 _impl = impl.release(); 12700 _result.error = 0; 12701 } 12702 else 12703 { 12704 #ifdef PUGIXML_NO_EXCEPTIONS 12705 if (qimpl->oom) _result.error = "Out of memory"; 12706 #else 12707 if (qimpl->oom) throw std::bad_alloc(); 12708 throw xpath_exception(_result); 12709 #endif 12710 } 12711 } 12712 } 12713 xpath_query()12714 PUGI__FN xpath_query::xpath_query(): _impl(0) 12715 { 12716 } 12717 ~xpath_query()12718 PUGI__FN xpath_query::~xpath_query() 12719 { 12720 if (_impl) 12721 impl::xpath_query_impl::destroy(static_cast<impl::xpath_query_impl*>(_impl)); 12722 } 12723 12724 #ifdef PUGIXML_HAS_MOVE xpath_query(xpath_query && rhs)12725 PUGI__FN xpath_query::xpath_query(xpath_query&& rhs) PUGIXML_NOEXCEPT 12726 { 12727 _impl = rhs._impl; 12728 _result = rhs._result; 12729 rhs._impl = 0; 12730 rhs._result = xpath_parse_result(); 12731 } 12732 operator =(xpath_query && rhs)12733 PUGI__FN xpath_query& xpath_query::operator=(xpath_query&& rhs) PUGIXML_NOEXCEPT 12734 { 12735 if (this == &rhs) return *this; 12736 12737 if (_impl) 12738 impl::xpath_query_impl::destroy(static_cast<impl::xpath_query_impl*>(_impl)); 12739 12740 _impl = rhs._impl; 12741 _result = rhs._result; 12742 rhs._impl = 0; 12743 rhs._result = xpath_parse_result(); 12744 12745 return *this; 12746 } 12747 #endif 12748 return_type() const12749 PUGI__FN xpath_value_type xpath_query::return_type() const 12750 { 12751 if (!_impl) return xpath_type_none; 12752 12753 return static_cast<impl::xpath_query_impl*>(_impl)->root->rettype(); 12754 } 12755 evaluate_boolean(const xpath_node & n) const12756 PUGI__FN bool xpath_query::evaluate_boolean(const xpath_node& n) const 12757 { 12758 if (!_impl) return false; 12759 12760 impl::xpath_context c(n, 1, 1); 12761 impl::xpath_stack_data sd; 12762 12763 bool r = static_cast<impl::xpath_query_impl*>(_impl)->root->eval_boolean(c, sd.stack); 12764 12765 if (sd.oom) 12766 { 12767 #ifdef PUGIXML_NO_EXCEPTIONS 12768 return false; 12769 #else 12770 throw std::bad_alloc(); 12771 #endif 12772 } 12773 12774 return r; 12775 } 12776 evaluate_number(const xpath_node & n) const12777 PUGI__FN double xpath_query::evaluate_number(const xpath_node& n) const 12778 { 12779 if (!_impl) return impl::gen_nan(); 12780 12781 impl::xpath_context c(n, 1, 1); 12782 impl::xpath_stack_data sd; 12783 12784 double r = static_cast<impl::xpath_query_impl*>(_impl)->root->eval_number(c, sd.stack); 12785 12786 if (sd.oom) 12787 { 12788 #ifdef PUGIXML_NO_EXCEPTIONS 12789 return impl::gen_nan(); 12790 #else 12791 throw std::bad_alloc(); 12792 #endif 12793 } 12794 12795 return r; 12796 } 12797 12798 #ifndef PUGIXML_NO_STL evaluate_string(const xpath_node & n) const12799 PUGI__FN string_t xpath_query::evaluate_string(const xpath_node& n) const 12800 { 12801 if (!_impl) return string_t(); 12802 12803 impl::xpath_context c(n, 1, 1); 12804 impl::xpath_stack_data sd; 12805 12806 impl::xpath_string r = static_cast<impl::xpath_query_impl*>(_impl)->root->eval_string(c, sd.stack); 12807 12808 if (sd.oom) 12809 { 12810 #ifdef PUGIXML_NO_EXCEPTIONS 12811 return string_t(); 12812 #else 12813 throw std::bad_alloc(); 12814 #endif 12815 } 12816 12817 return string_t(r.c_str(), r.length()); 12818 } 12819 #endif 12820 evaluate_string(char_t * buffer,size_t capacity,const xpath_node & n) const12821 PUGI__FN size_t xpath_query::evaluate_string(char_t* buffer, size_t capacity, const xpath_node& n) const 12822 { 12823 impl::xpath_context c(n, 1, 1); 12824 impl::xpath_stack_data sd; 12825 12826 impl::xpath_string r = _impl ? static_cast<impl::xpath_query_impl*>(_impl)->root->eval_string(c, sd.stack) : impl::xpath_string(); 12827 12828 if (sd.oom) 12829 { 12830 #ifdef PUGIXML_NO_EXCEPTIONS 12831 r = impl::xpath_string(); 12832 #else 12833 throw std::bad_alloc(); 12834 #endif 12835 } 12836 12837 size_t full_size = r.length() + 1; 12838 12839 if (capacity > 0) 12840 { 12841 size_t size = (full_size < capacity) ? full_size : capacity; 12842 assert(size > 0); 12843 12844 memcpy(buffer, r.c_str(), (size - 1) * sizeof(char_t)); 12845 buffer[size - 1] = 0; 12846 } 12847 12848 return full_size; 12849 } 12850 evaluate_node_set(const xpath_node & n) const12851 PUGI__FN xpath_node_set xpath_query::evaluate_node_set(const xpath_node& n) const 12852 { 12853 impl::xpath_ast_node* root = impl::evaluate_node_set_prepare(static_cast<impl::xpath_query_impl*>(_impl)); 12854 if (!root) return xpath_node_set(); 12855 12856 impl::xpath_context c(n, 1, 1); 12857 impl::xpath_stack_data sd; 12858 12859 impl::xpath_node_set_raw r = root->eval_node_set(c, sd.stack, impl::nodeset_eval_all); 12860 12861 if (sd.oom) 12862 { 12863 #ifdef PUGIXML_NO_EXCEPTIONS 12864 return xpath_node_set(); 12865 #else 12866 throw std::bad_alloc(); 12867 #endif 12868 } 12869 12870 return xpath_node_set(r.begin(), r.end(), r.type()); 12871 } 12872 evaluate_node(const xpath_node & n) const12873 PUGI__FN xpath_node xpath_query::evaluate_node(const xpath_node& n) const 12874 { 12875 impl::xpath_ast_node* root = impl::evaluate_node_set_prepare(static_cast<impl::xpath_query_impl*>(_impl)); 12876 if (!root) return xpath_node(); 12877 12878 impl::xpath_context c(n, 1, 1); 12879 impl::xpath_stack_data sd; 12880 12881 impl::xpath_node_set_raw r = root->eval_node_set(c, sd.stack, impl::nodeset_eval_first); 12882 12883 if (sd.oom) 12884 { 12885 #ifdef PUGIXML_NO_EXCEPTIONS 12886 return xpath_node(); 12887 #else 12888 throw std::bad_alloc(); 12889 #endif 12890 } 12891 12892 return r.first(); 12893 } 12894 result() const12895 PUGI__FN const xpath_parse_result& xpath_query::result() const 12896 { 12897 return _result; 12898 } 12899 unspecified_bool_xpath_query(xpath_query ***)12900 PUGI__FN static void unspecified_bool_xpath_query(xpath_query***) 12901 { 12902 } 12903 operator xpath_query::unspecified_bool_type() const12904 PUGI__FN xpath_query::operator xpath_query::unspecified_bool_type() const 12905 { 12906 return _impl ? unspecified_bool_xpath_query : 0; 12907 } 12908 operator !() const12909 PUGI__FN bool xpath_query::operator!() const 12910 { 12911 return !_impl; 12912 } 12913 select_node(const char_t * query,xpath_variable_set * variables) const12914 PUGI__FN xpath_node xml_node::select_node(const char_t* query, xpath_variable_set* variables) const 12915 { 12916 xpath_query q(query, variables); 12917 return q.evaluate_node(*this); 12918 } 12919 select_node(const xpath_query & query) const12920 PUGI__FN xpath_node xml_node::select_node(const xpath_query& query) const 12921 { 12922 return query.evaluate_node(*this); 12923 } 12924 select_nodes(const char_t * query,xpath_variable_set * variables) const12925 PUGI__FN xpath_node_set xml_node::select_nodes(const char_t* query, xpath_variable_set* variables) const 12926 { 12927 xpath_query q(query, variables); 12928 return q.evaluate_node_set(*this); 12929 } 12930 select_nodes(const xpath_query & query) const12931 PUGI__FN xpath_node_set xml_node::select_nodes(const xpath_query& query) const 12932 { 12933 return query.evaluate_node_set(*this); 12934 } 12935 select_single_node(const char_t * query,xpath_variable_set * variables) const12936 PUGI__FN xpath_node xml_node::select_single_node(const char_t* query, xpath_variable_set* variables) const 12937 { 12938 xpath_query q(query, variables); 12939 return q.evaluate_node(*this); 12940 } 12941 select_single_node(const xpath_query & query) const12942 PUGI__FN xpath_node xml_node::select_single_node(const xpath_query& query) const 12943 { 12944 return query.evaluate_node(*this); 12945 } 12946 } 12947 12948 #endif 12949 12950 #ifdef __BORLANDC__ 12951 # pragma option pop 12952 #endif 12953 12954 // Intel C++ does not properly keep warning state for function templates, 12955 // so popping warning state at the end of translation unit leads to warnings in the middle. 12956 #if defined(_MSC_VER) && !defined(__INTEL_COMPILER) 12957 # pragma warning(pop) 12958 #endif 12959 12960 #if defined(_MSC_VER) && defined(__c2__) 12961 # pragma clang diagnostic pop 12962 #endif 12963 12964 // Undefine all local macros (makes sure we're not leaking macros in header-only mode) 12965 #undef PUGI__NO_INLINE 12966 #undef PUGI__UNLIKELY 12967 #undef PUGI__STATIC_ASSERT 12968 #undef PUGI__DMC_VOLATILE 12969 #undef PUGI__UNSIGNED_OVERFLOW 12970 #undef PUGI__MSVC_CRT_VERSION 12971 #undef PUGI__SNPRINTF 12972 #undef PUGI__NS_BEGIN 12973 #undef PUGI__NS_END 12974 #undef PUGI__FN 12975 #undef PUGI__FN_NO_INLINE 12976 #undef PUGI__GETHEADER_IMPL 12977 #undef PUGI__GETPAGE_IMPL 12978 #undef PUGI__GETPAGE 12979 #undef PUGI__NODETYPE 12980 #undef PUGI__IS_CHARTYPE_IMPL 12981 #undef PUGI__IS_CHARTYPE 12982 #undef PUGI__IS_CHARTYPEX 12983 #undef PUGI__ENDSWITH 12984 #undef PUGI__SKIPWS 12985 #undef PUGI__OPTSET 12986 #undef PUGI__PUSHNODE 12987 #undef PUGI__POPNODE 12988 #undef PUGI__SCANFOR 12989 #undef PUGI__SCANWHILE 12990 #undef PUGI__SCANWHILE_UNROLL 12991 #undef PUGI__ENDSEG 12992 #undef PUGI__THROW_ERROR 12993 #undef PUGI__CHECK_ERROR 12994 12995 #endif 12996 12997 /** 12998 * Copyright (c) 2006-2020 Arseny Kapoulkine 12999 * 13000 * Permission is hereby granted, free of charge, to any person 13001 * obtaining a copy of this software and associated documentation 13002 * files (the "Software"), to deal in the Software without 13003 * restriction, including without limitation the rights to use, 13004 * copy, modify, merge, publish, distribute, sublicense, and/or sell 13005 * copies of the Software, and to permit persons to whom the 13006 * Software is furnished to do so, subject to the following 13007 * conditions: 13008 * 13009 * The above copyright notice and this permission notice shall be 13010 * included in all copies or substantial portions of the Software. 13011 * 13012 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 13013 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 13014 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 13015 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 13016 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 13017 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 13018 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 13019 * OTHER DEALINGS IN THE SOFTWARE. 13020 */ 13021