1 /** 2 * pugixml parser - version 1.8 3 * -------------------------------------------------------- 4 * Copyright (C) 2006-2016, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com) 5 * Report bugs and download new versions at http://pugixml.org/ 6 * 7 * This library is distributed under the MIT License. See notice at the end 8 * of this file. 9 * 10 * This work is based on the pugxml parser, which is: 11 * Copyright (C) 2003, by Kristen Wegner (kristen@tima.net) 12 */ 13 14 #ifndef SOURCE_PUGIXML_CPP 15 #define SOURCE_PUGIXML_CPP 16 17 #include "pugixml.hpp" 18 19 #include <stdlib.h> 20 #include <stdio.h> 21 #include <string.h> 22 #include <assert.h> 23 #include <limits.h> 24 25 #ifdef PUGIXML_WCHAR_MODE 26 # include <wchar.h> 27 #endif 28 29 #ifndef PUGIXML_NO_XPATH 30 # include <math.h> 31 # include <float.h> 32 # ifdef PUGIXML_NO_EXCEPTIONS 33 # include <setjmp.h> 34 # endif 35 #endif 36 37 #ifndef PUGIXML_NO_STL 38 # include <istream> 39 # include <ostream> 40 # include <string> 41 #endif 42 43 // For placement new 44 #include <new> 45 46 #ifdef _MSC_VER 47 # pragma warning(push) 48 # pragma warning(disable: 4127) // conditional expression is constant 49 # pragma warning(disable: 4324) // structure was padded due to __declspec(align()) 50 # pragma warning(disable: 4611) // interaction between '_setjmp' and C++ object destruction is non-portable 51 # pragma warning(disable: 4702) // unreachable code 52 # pragma warning(disable: 4996) // this function or variable may be unsafe 53 # pragma warning(disable: 4793) // function compiled as native: presence of '_setjmp' makes a function unmanaged 54 #endif 55 56 #ifdef __INTEL_COMPILER 57 # pragma warning(disable: 177) // function was declared but never referenced 58 # pragma warning(disable: 279) // controlling expression is constant 59 # pragma warning(disable: 1478 1786) // function was declared "deprecated" 60 # pragma warning(disable: 1684) // conversion from pointer to same-sized integral type 61 #endif 62 63 #if defined(__BORLANDC__) && defined(PUGIXML_HEADER_ONLY) 64 # pragma warn -8080 // symbol is declared but never used; disabling this inside push/pop bracket does not make the warning go away 65 #endif 66 67 #ifdef __BORLANDC__ 68 # pragma option push 69 # pragma warn -8008 // condition is always false 70 # pragma warn -8066 // unreachable code 71 #endif 72 73 #ifdef __SNC__ 74 // Using diag_push/diag_pop does not disable the warnings inside templates due to a compiler bug 75 # pragma diag_suppress=178 // function was declared but never referenced 76 # pragma diag_suppress=237 // controlling expression is constant 77 #endif 78 79 // Inlining controls 80 #if defined(_MSC_VER) && _MSC_VER >= 1300 81 # define PUGI__NO_INLINE __declspec(noinline) 82 #elif defined(__GNUC__) 83 # define PUGI__NO_INLINE __attribute__((noinline)) 84 #else 85 # define PUGI__NO_INLINE 86 #endif 87 88 // Branch weight controls 89 #if defined(__GNUC__) 90 # define PUGI__UNLIKELY(cond) __builtin_expect(cond, 0) 91 #else 92 # define PUGI__UNLIKELY(cond) (cond) 93 #endif 94 95 // Simple static assertion 96 #define PUGI__STATIC_ASSERT(cond) { static const char condition_failed[(cond) ? 1 : -1] = {0}; (void)condition_failed[0]; } 97 98 // Digital Mars C++ bug workaround for passing char loaded from memory via stack 99 #ifdef __DMC__ 100 # define PUGI__DMC_VOLATILE volatile 101 #else 102 # define PUGI__DMC_VOLATILE 103 #endif 104 105 // Borland C++ bug workaround for not defining ::memcpy depending on header include order (can't always use std::memcpy because some compilers don't have it at all) 106 #if defined(__BORLANDC__) && !defined(__MEM_H_USING_LIST) 107 using std::memcpy; 108 using std::memmove; 109 using std::memset; 110 #endif 111 112 // Some MinGW versions have headers that erroneously omit LLONG_MIN/LLONG_MAX/ULLONG_MAX definitions in strict ANSI mode 113 #if defined(PUGIXML_HAS_LONG_LONG) && defined(__MINGW32__) && defined(__STRICT_ANSI__) && !defined(LLONG_MAX) && !defined(LLONG_MIN) && !defined(ULLONG_MAX) 114 # define LLONG_MAX 9223372036854775807LL 115 # define LLONG_MIN (-LLONG_MAX-1) 116 # define ULLONG_MAX (2ULL*LLONG_MAX+1) 117 #endif 118 119 // In some environments MSVC is a compiler but the CRT lacks certain MSVC-specific features 120 #if defined(_MSC_VER) && !defined(__S3E__) 121 # define PUGI__MSVC_CRT_VERSION _MSC_VER 122 #endif 123 124 #ifdef PUGIXML_HEADER_ONLY 125 # define PUGI__NS_BEGIN namespace pugi { namespace impl { 126 # define PUGI__NS_END } } 127 # define PUGI__FN inline 128 # define PUGI__FN_NO_INLINE inline 129 #else 130 # if defined(_MSC_VER) && _MSC_VER < 1300 // MSVC6 seems to have an amusing bug with anonymous namespaces inside namespaces 131 # define PUGI__NS_BEGIN namespace pugi { namespace impl { 132 # define PUGI__NS_END } } 133 # else 134 # define PUGI__NS_BEGIN namespace pugi { namespace impl { namespace { 135 # define PUGI__NS_END } } } 136 # endif 137 # define PUGI__FN 138 # define PUGI__FN_NO_INLINE PUGI__NO_INLINE 139 #endif 140 141 // uintptr_t 142 #if (defined(_MSC_VER) && _MSC_VER < 1600) || (defined(__BORLANDC__) && __BORLANDC__ < 0x561) 143 namespace pugi 144 { 145 # ifndef _UINTPTR_T_DEFINED 146 typedef size_t uintptr_t; 147 # endif 148 149 typedef unsigned __int8 uint8_t; 150 typedef unsigned __int16 uint16_t; 151 typedef unsigned __int32 uint32_t; 152 } 153 #else 154 # include <stdint.h> 155 #endif 156 157 // Memory allocation 158 PUGI__NS_BEGIN default_allocate(size_t size)159 PUGI__FN void* default_allocate(size_t size) 160 { 161 return malloc(size); 162 } 163 default_deallocate(void * ptr)164 PUGI__FN void default_deallocate(void* ptr) 165 { 166 free(ptr); 167 } 168 169 template <typename T> 170 struct xml_memory_management_function_storage 171 { 172 static allocation_function allocate; 173 static deallocation_function deallocate; 174 }; 175 176 // Global allocation functions are stored in class statics so that in header mode linker deduplicates them 177 // Without a template<> we'll get multiple definitions of the same static 178 template <typename T> allocation_function xml_memory_management_function_storage<T>::allocate = default_allocate; 179 template <typename T> deallocation_function xml_memory_management_function_storage<T>::deallocate = default_deallocate; 180 181 typedef xml_memory_management_function_storage<int> xml_memory; 182 PUGI__NS_END 183 184 // String utilities 185 PUGI__NS_BEGIN 186 // Get string length strlength(const char_t * s)187 PUGI__FN size_t strlength(const char_t* s) 188 { 189 assert(s); 190 191 #ifdef PUGIXML_WCHAR_MODE 192 return wcslen(s); 193 #else 194 return strlen(s); 195 #endif 196 } 197 198 // Compare two strings strequal(const char_t * src,const char_t * dst)199 PUGI__FN bool strequal(const char_t* src, const char_t* dst) 200 { 201 assert(src && dst); 202 203 #ifdef PUGIXML_WCHAR_MODE 204 return wcscmp(src, dst) == 0; 205 #else 206 return strcmp(src, dst) == 0; 207 #endif 208 } 209 210 // Compare lhs with [rhs_begin, rhs_end) strequalrange(const char_t * lhs,const char_t * rhs,size_t count)211 PUGI__FN bool strequalrange(const char_t* lhs, const char_t* rhs, size_t count) 212 { 213 for (size_t i = 0; i < count; ++i) 214 if (lhs[i] != rhs[i]) 215 return false; 216 217 return lhs[count] == 0; 218 } 219 220 // Get length of wide string, even if CRT lacks wide character support strlength_wide(const wchar_t * s)221 PUGI__FN size_t strlength_wide(const wchar_t* s) 222 { 223 assert(s); 224 225 #ifdef PUGIXML_WCHAR_MODE 226 return wcslen(s); 227 #else 228 const wchar_t* end = s; 229 while (*end) end++; 230 return static_cast<size_t>(end - s); 231 #endif 232 } 233 PUGI__NS_END 234 235 // auto_ptr-like object for exception recovery 236 PUGI__NS_BEGIN 237 template <typename T> struct auto_deleter 238 { 239 typedef void (*D)(T*); 240 241 T* data; 242 D deleter; 243 auto_deleterauto_deleter244 auto_deleter(T* data_, D deleter_): data(data_), deleter(deleter_) 245 { 246 } 247 ~auto_deleterauto_deleter248 ~auto_deleter() 249 { 250 if (data) deleter(data); 251 } 252 releaseauto_deleter253 T* release() 254 { 255 T* result = data; 256 data = 0; 257 return result; 258 } 259 }; 260 PUGI__NS_END 261 262 #ifdef PUGIXML_COMPACT 263 PUGI__NS_BEGIN 264 class compact_hash_table 265 { 266 public: compact_hash_table()267 compact_hash_table(): _items(0), _capacity(0), _count(0) 268 { 269 } 270 clear()271 void clear() 272 { 273 if (_items) 274 { 275 xml_memory::deallocate(_items); 276 _items = 0; 277 _capacity = 0; 278 _count = 0; 279 } 280 } 281 find(const void * key)282 void** find(const void* key) 283 { 284 assert(key); 285 286 if (_capacity == 0) return 0; 287 288 size_t hashmod = _capacity - 1; 289 size_t bucket = hash(key) & hashmod; 290 291 for (size_t probe = 0; probe <= hashmod; ++probe) 292 { 293 item_t& probe_item = _items[bucket]; 294 295 if (probe_item.key == key) 296 return &probe_item.value; 297 298 if (probe_item.key == 0) 299 return 0; 300 301 // hash collision, quadratic probing 302 bucket = (bucket + probe + 1) & hashmod; 303 } 304 305 assert(false && "Hash table is full"); 306 return 0; 307 } 308 insert(const void * key)309 void** insert(const void* key) 310 { 311 assert(key); 312 assert(_capacity != 0 && _count < _capacity - _capacity / 4); 313 314 size_t hashmod = _capacity - 1; 315 size_t bucket = hash(key) & hashmod; 316 317 for (size_t probe = 0; probe <= hashmod; ++probe) 318 { 319 item_t& probe_item = _items[bucket]; 320 321 if (probe_item.key == 0) 322 { 323 probe_item.key = key; 324 _count++; 325 return &probe_item.value; 326 } 327 328 if (probe_item.key == key) 329 return &probe_item.value; 330 331 // hash collision, quadratic probing 332 bucket = (bucket + probe + 1) & hashmod; 333 } 334 335 assert(false && "Hash table is full"); 336 return 0; 337 } 338 reserve()339 bool reserve() 340 { 341 if (_count + 16 >= _capacity - _capacity / 4) 342 return rehash(); 343 344 return true; 345 } 346 347 private: 348 struct item_t 349 { 350 const void* key; 351 void* value; 352 }; 353 354 item_t* _items; 355 size_t _capacity; 356 357 size_t _count; 358 359 bool rehash(); 360 hash(const void * key)361 static unsigned int hash(const void* key) 362 { 363 unsigned int h = static_cast<unsigned int>(reinterpret_cast<uintptr_t>(key)); 364 365 // MurmurHash3 32-bit finalizer 366 h ^= h >> 16; 367 h *= 0x85ebca6bu; 368 h ^= h >> 13; 369 h *= 0xc2b2ae35u; 370 h ^= h >> 16; 371 372 return h; 373 } 374 }; 375 rehash()376 PUGI__FN_NO_INLINE bool compact_hash_table::rehash() 377 { 378 compact_hash_table rt; 379 rt._capacity = (_capacity == 0) ? 32 : _capacity * 2; 380 rt._items = static_cast<item_t*>(xml_memory::allocate(sizeof(item_t) * rt._capacity)); 381 382 if (!rt._items) 383 return false; 384 385 memset(rt._items, 0, sizeof(item_t) * rt._capacity); 386 387 for (size_t i = 0; i < _capacity; ++i) 388 if (_items[i].key) 389 *rt.insert(_items[i].key) = _items[i].value; 390 391 if (_items) 392 xml_memory::deallocate(_items); 393 394 _capacity = rt._capacity; 395 _items = rt._items; 396 397 assert(_count == rt._count); 398 399 return true; 400 } 401 402 PUGI__NS_END 403 #endif 404 405 PUGI__NS_BEGIN 406 #ifdef PUGIXML_COMPACT 407 static const uintptr_t xml_memory_block_alignment = 4; 408 #else 409 static const uintptr_t xml_memory_block_alignment = sizeof(void*); 410 #endif 411 412 // extra metadata bits 413 static const uintptr_t xml_memory_page_contents_shared_mask = 64; 414 static const uintptr_t xml_memory_page_name_allocated_mask = 32; 415 static const uintptr_t xml_memory_page_value_allocated_mask = 16; 416 static const uintptr_t xml_memory_page_type_mask = 15; 417 418 // combined masks for string uniqueness 419 static const uintptr_t xml_memory_page_name_allocated_or_shared_mask = xml_memory_page_name_allocated_mask | xml_memory_page_contents_shared_mask; 420 static const uintptr_t xml_memory_page_value_allocated_or_shared_mask = xml_memory_page_value_allocated_mask | xml_memory_page_contents_shared_mask; 421 422 #ifdef PUGIXML_COMPACT 423 #define PUGI__GETHEADER_IMPL(object, page, flags) // unused 424 #define PUGI__GETPAGE_IMPL(header) (header).get_page() 425 #else 426 #define PUGI__GETHEADER_IMPL(object, page, flags) (((reinterpret_cast<char*>(object) - reinterpret_cast<char*>(page)) << 8) | (flags)) 427 // this macro casts pointers through void* to avoid 'cast increases required alignment of target type' warnings 428 #define PUGI__GETPAGE_IMPL(header) static_cast<impl::xml_memory_page*>(const_cast<void*>(static_cast<const void*>(reinterpret_cast<const char*>(&header) - (header >> 8)))) 429 #endif 430 431 #define PUGI__GETPAGE(n) PUGI__GETPAGE_IMPL((n)->header) 432 #define PUGI__NODETYPE(n) static_cast<xml_node_type>((n)->header & impl::xml_memory_page_type_mask) 433 434 struct xml_allocator; 435 436 struct xml_memory_page 437 { constructxml_memory_page438 static xml_memory_page* construct(void* memory) 439 { 440 xml_memory_page* result = static_cast<xml_memory_page*>(memory); 441 442 result->allocator = 0; 443 result->prev = 0; 444 result->next = 0; 445 result->busy_size = 0; 446 result->freed_size = 0; 447 448 #ifdef PUGIXML_COMPACT 449 result->compact_string_base = 0; 450 result->compact_shared_parent = 0; 451 result->compact_page_marker = 0; 452 #endif 453 454 return result; 455 } 456 457 xml_allocator* allocator; 458 459 xml_memory_page* prev; 460 xml_memory_page* next; 461 462 size_t busy_size; 463 size_t freed_size; 464 465 #ifdef PUGIXML_COMPACT 466 char_t* compact_string_base; 467 void* compact_shared_parent; 468 uint32_t* compact_page_marker; 469 #endif 470 }; 471 472 static const size_t xml_memory_page_size = 473 #ifdef PUGIXML_MEMORY_PAGE_SIZE 474 (PUGIXML_MEMORY_PAGE_SIZE) 475 #else 476 32768 477 #endif 478 - sizeof(xml_memory_page); 479 480 struct xml_memory_string_header 481 { 482 uint16_t page_offset; // offset from page->data 483 uint16_t full_size; // 0 if string occupies whole page 484 }; 485 486 struct xml_allocator 487 { xml_allocatorxml_allocator488 xml_allocator(xml_memory_page* root): _root(root), _busy_size(root->busy_size) 489 { 490 #ifdef PUGIXML_COMPACT 491 _hash = 0; 492 #endif 493 } 494 allocate_pagexml_allocator495 xml_memory_page* allocate_page(size_t data_size) 496 { 497 size_t size = sizeof(xml_memory_page) + data_size; 498 499 // allocate block with some alignment, leaving memory for worst-case padding 500 void* memory = xml_memory::allocate(size); 501 if (!memory) return 0; 502 503 // prepare page structure 504 xml_memory_page* page = xml_memory_page::construct(memory); 505 assert(page); 506 507 page->allocator = _root->allocator; 508 509 return page; 510 } 511 deallocate_pagexml_allocator512 static void deallocate_page(xml_memory_page* page) 513 { 514 xml_memory::deallocate(page); 515 } 516 517 void* allocate_memory_oob(size_t size, xml_memory_page*& out_page); 518 allocate_memoryxml_allocator519 void* allocate_memory(size_t size, xml_memory_page*& out_page) 520 { 521 if (PUGI__UNLIKELY(_busy_size + size > xml_memory_page_size)) 522 return allocate_memory_oob(size, out_page); 523 524 void* buf = reinterpret_cast<char*>(_root) + sizeof(xml_memory_page) + _busy_size; 525 526 _busy_size += size; 527 528 out_page = _root; 529 530 return buf; 531 } 532 533 #ifdef PUGIXML_COMPACT allocate_objectxml_allocator534 void* allocate_object(size_t size, xml_memory_page*& out_page) 535 { 536 void* result = allocate_memory(size + sizeof(uint32_t), out_page); 537 if (!result) return 0; 538 539 // adjust for marker 540 ptrdiff_t offset = static_cast<char*>(result) - reinterpret_cast<char*>(out_page->compact_page_marker); 541 542 if (PUGI__UNLIKELY(static_cast<uintptr_t>(offset) >= 256 * xml_memory_block_alignment)) 543 { 544 // insert new marker 545 uint32_t* marker = static_cast<uint32_t*>(result); 546 547 *marker = static_cast<uint32_t>(reinterpret_cast<char*>(marker) - reinterpret_cast<char*>(out_page)); 548 out_page->compact_page_marker = marker; 549 550 // since we don't reuse the page space until we reallocate it, we can just pretend that we freed the marker block 551 // this will make sure deallocate_memory correctly tracks the size 552 out_page->freed_size += sizeof(uint32_t); 553 554 return marker + 1; 555 } 556 else 557 { 558 // roll back uint32_t part 559 _busy_size -= sizeof(uint32_t); 560 561 return result; 562 } 563 } 564 #else allocate_objectxml_allocator565 void* allocate_object(size_t size, xml_memory_page*& out_page) 566 { 567 return allocate_memory(size, out_page); 568 } 569 #endif 570 deallocate_memoryxml_allocator571 void deallocate_memory(void* ptr, size_t size, xml_memory_page* page) 572 { 573 if (page == _root) page->busy_size = _busy_size; 574 575 assert(ptr >= reinterpret_cast<char*>(page) + sizeof(xml_memory_page) && ptr < reinterpret_cast<char*>(page) + sizeof(xml_memory_page) + page->busy_size); 576 (void)!ptr; 577 578 page->freed_size += size; 579 assert(page->freed_size <= page->busy_size); 580 581 if (page->freed_size == page->busy_size) 582 { 583 if (page->next == 0) 584 { 585 assert(_root == page); 586 587 // top page freed, just reset sizes 588 page->busy_size = 0; 589 page->freed_size = 0; 590 591 #ifdef PUGIXML_COMPACT 592 // reset compact state to maximize efficiency 593 page->compact_string_base = 0; 594 page->compact_shared_parent = 0; 595 page->compact_page_marker = 0; 596 #endif 597 598 _busy_size = 0; 599 } 600 else 601 { 602 assert(_root != page); 603 assert(page->prev); 604 605 // remove from the list 606 page->prev->next = page->next; 607 page->next->prev = page->prev; 608 609 // deallocate 610 deallocate_page(page); 611 } 612 } 613 } 614 allocate_stringxml_allocator615 char_t* allocate_string(size_t length) 616 { 617 static const size_t max_encoded_offset = (1 << 16) * xml_memory_block_alignment; 618 619 PUGI__STATIC_ASSERT(xml_memory_page_size <= max_encoded_offset); 620 621 // allocate memory for string and header block 622 size_t size = sizeof(xml_memory_string_header) + length * sizeof(char_t); 623 624 // round size up to block alignment boundary 625 size_t full_size = (size + (xml_memory_block_alignment - 1)) & ~(xml_memory_block_alignment - 1); 626 627 xml_memory_page* page; 628 xml_memory_string_header* header = static_cast<xml_memory_string_header*>(allocate_memory(full_size, page)); 629 630 if (!header) return 0; 631 632 // setup header 633 ptrdiff_t page_offset = reinterpret_cast<char*>(header) - reinterpret_cast<char*>(page) - sizeof(xml_memory_page); 634 635 assert(page_offset % xml_memory_block_alignment == 0); 636 assert(page_offset >= 0 && static_cast<size_t>(page_offset) < max_encoded_offset); 637 header->page_offset = static_cast<uint16_t>(static_cast<size_t>(page_offset) / xml_memory_block_alignment); 638 639 // full_size == 0 for large strings that occupy the whole page 640 assert(full_size % xml_memory_block_alignment == 0); 641 assert(full_size < max_encoded_offset || (page->busy_size == full_size && page_offset == 0)); 642 header->full_size = static_cast<uint16_t>(full_size < max_encoded_offset ? full_size / xml_memory_block_alignment : 0); 643 644 // round-trip through void* to avoid 'cast increases required alignment of target type' warning 645 // header is guaranteed a pointer-sized alignment, which should be enough for char_t 646 return static_cast<char_t*>(static_cast<void*>(header + 1)); 647 } 648 deallocate_stringxml_allocator649 void deallocate_string(char_t* string) 650 { 651 // this function casts pointers through void* to avoid 'cast increases required alignment of target type' warnings 652 // we're guaranteed the proper (pointer-sized) alignment on the input string if it was allocated via allocate_string 653 654 // get header 655 xml_memory_string_header* header = static_cast<xml_memory_string_header*>(static_cast<void*>(string)) - 1; 656 assert(header); 657 658 // deallocate 659 size_t page_offset = sizeof(xml_memory_page) + header->page_offset * xml_memory_block_alignment; 660 xml_memory_page* page = reinterpret_cast<xml_memory_page*>(static_cast<void*>(reinterpret_cast<char*>(header) - page_offset)); 661 662 // if full_size == 0 then this string occupies the whole page 663 size_t full_size = header->full_size == 0 ? page->busy_size : header->full_size * xml_memory_block_alignment; 664 665 deallocate_memory(header, full_size, page); 666 } 667 reservexml_allocator668 bool reserve() 669 { 670 #ifdef PUGIXML_COMPACT 671 return _hash->reserve(); 672 #else 673 return true; 674 #endif 675 } 676 677 xml_memory_page* _root; 678 size_t _busy_size; 679 680 #ifdef PUGIXML_COMPACT 681 compact_hash_table* _hash; 682 #endif 683 }; 684 allocate_memory_oob(size_t size,xml_memory_page * & out_page)685 PUGI__FN_NO_INLINE void* xml_allocator::allocate_memory_oob(size_t size, xml_memory_page*& out_page) 686 { 687 const size_t large_allocation_threshold = xml_memory_page_size / 4; 688 689 xml_memory_page* page = allocate_page(size <= large_allocation_threshold ? xml_memory_page_size : size); 690 out_page = page; 691 692 if (!page) return 0; 693 694 if (size <= large_allocation_threshold) 695 { 696 _root->busy_size = _busy_size; 697 698 // insert page at the end of linked list 699 page->prev = _root; 700 _root->next = page; 701 _root = page; 702 703 _busy_size = size; 704 } 705 else 706 { 707 // insert page before the end of linked list, so that it is deleted as soon as possible 708 // the last page is not deleted even if it's empty (see deallocate_memory) 709 assert(_root->prev); 710 711 page->prev = _root->prev; 712 page->next = _root; 713 714 _root->prev->next = page; 715 _root->prev = page; 716 717 page->busy_size = size; 718 } 719 720 return reinterpret_cast<char*>(page) + sizeof(xml_memory_page); 721 } 722 PUGI__NS_END 723 724 #ifdef PUGIXML_COMPACT 725 PUGI__NS_BEGIN 726 static const uintptr_t compact_alignment_log2 = 2; 727 static const uintptr_t compact_alignment = 1 << compact_alignment_log2; 728 729 class compact_header 730 { 731 public: compact_header(xml_memory_page * page,unsigned int flags)732 compact_header(xml_memory_page* page, unsigned int flags) 733 { 734 PUGI__STATIC_ASSERT(xml_memory_block_alignment == compact_alignment); 735 736 ptrdiff_t offset = (reinterpret_cast<char*>(this) - reinterpret_cast<char*>(page->compact_page_marker)); 737 assert(offset % compact_alignment == 0 && static_cast<uintptr_t>(offset) < 256 * compact_alignment); 738 739 _page = static_cast<unsigned char>(offset >> compact_alignment_log2); 740 _flags = static_cast<unsigned char>(flags); 741 } 742 operator &=(uintptr_t mod)743 void operator&=(uintptr_t mod) 744 { 745 _flags &= static_cast<unsigned char>(mod); 746 } 747 operator |=(uintptr_t mod)748 void operator|=(uintptr_t mod) 749 { 750 _flags |= static_cast<unsigned char>(mod); 751 } 752 operator &(uintptr_t mod) const753 uintptr_t operator&(uintptr_t mod) const 754 { 755 return _flags & mod; 756 } 757 get_page() const758 xml_memory_page* get_page() const 759 { 760 // round-trip through void* to silence 'cast increases required alignment of target type' warnings 761 const char* page_marker = reinterpret_cast<const char*>(this) - (_page << compact_alignment_log2); 762 const char* page = page_marker - *reinterpret_cast<const uint32_t*>(static_cast<const void*>(page_marker)); 763 764 return const_cast<xml_memory_page*>(reinterpret_cast<const xml_memory_page*>(static_cast<const void*>(page))); 765 } 766 767 private: 768 unsigned char _page; 769 unsigned char _flags; 770 }; 771 compact_get_page(const void * object,int header_offset)772 PUGI__FN xml_memory_page* compact_get_page(const void* object, int header_offset) 773 { 774 const compact_header* header = reinterpret_cast<const compact_header*>(static_cast<const char*>(object) - header_offset); 775 776 return header->get_page(); 777 } 778 compact_get_value(const void * object)779 template <int header_offset, typename T> PUGI__FN_NO_INLINE T* compact_get_value(const void* object) 780 { 781 return static_cast<T*>(*compact_get_page(object, header_offset)->allocator->_hash->find(object)); 782 } 783 compact_set_value(const void * object,T * value)784 template <int header_offset, typename T> PUGI__FN_NO_INLINE void compact_set_value(const void* object, T* value) 785 { 786 *compact_get_page(object, header_offset)->allocator->_hash->insert(object) = value; 787 } 788 789 template <typename T, int header_offset, int start = -126> class compact_pointer 790 { 791 public: compact_pointer()792 compact_pointer(): _data(0) 793 { 794 } 795 operator =(const compact_pointer & rhs)796 void operator=(const compact_pointer& rhs) 797 { 798 *this = rhs + 0; 799 } 800 operator =(T * value)801 void operator=(T* value) 802 { 803 if (value) 804 { 805 // value is guaranteed to be compact-aligned; 'this' is not 806 // our decoding is based on 'this' aligned to compact alignment downwards (see operator T*) 807 // so for negative offsets (e.g. -3) we need to adjust the diff by compact_alignment - 1 to 808 // compensate for arithmetic shift rounding for negative values 809 ptrdiff_t diff = reinterpret_cast<char*>(value) - reinterpret_cast<char*>(this); 810 ptrdiff_t offset = ((diff + int(compact_alignment - 1)) >> compact_alignment_log2) - start; 811 812 if (static_cast<uintptr_t>(offset) <= 253) 813 _data = static_cast<unsigned char>(offset + 1); 814 else 815 { 816 compact_set_value<header_offset>(this, value); 817 818 _data = 255; 819 } 820 } 821 else 822 _data = 0; 823 } 824 operator T*() const825 operator T*() const 826 { 827 if (_data) 828 { 829 if (_data < 255) 830 { 831 uintptr_t base = reinterpret_cast<uintptr_t>(this) & ~(compact_alignment - 1); 832 833 return reinterpret_cast<T*>(base + ((_data - 1 + start) << compact_alignment_log2)); 834 } 835 else 836 return compact_get_value<header_offset, T>(this); 837 } 838 else 839 return 0; 840 } 841 operator ->() const842 T* operator->() const 843 { 844 return *this; 845 } 846 847 private: 848 unsigned char _data; 849 }; 850 851 template <typename T, int header_offset> class compact_pointer_parent 852 { 853 public: compact_pointer_parent()854 compact_pointer_parent(): _data(0) 855 { 856 } 857 operator =(const compact_pointer_parent & rhs)858 void operator=(const compact_pointer_parent& rhs) 859 { 860 *this = rhs + 0; 861 } 862 operator =(T * value)863 void operator=(T* value) 864 { 865 if (value) 866 { 867 // value is guaranteed to be compact-aligned; 'this' is not 868 // our decoding is based on 'this' aligned to compact alignment downwards (see operator T*) 869 // so for negative offsets (e.g. -3) we need to adjust the diff by compact_alignment - 1 to 870 // compensate for arithmetic shift behavior for negative values 871 ptrdiff_t diff = reinterpret_cast<char*>(value) - reinterpret_cast<char*>(this); 872 ptrdiff_t offset = ((diff + int(compact_alignment - 1)) >> compact_alignment_log2) + 65533; 873 874 if (static_cast<uintptr_t>(offset) <= 65533) 875 { 876 _data = static_cast<unsigned short>(offset + 1); 877 } 878 else 879 { 880 xml_memory_page* page = compact_get_page(this, header_offset); 881 882 if (PUGI__UNLIKELY(page->compact_shared_parent == 0)) 883 page->compact_shared_parent = value; 884 885 if (page->compact_shared_parent == value) 886 { 887 _data = 65534; 888 } 889 else 890 { 891 compact_set_value<header_offset>(this, value); 892 893 _data = 65535; 894 } 895 } 896 } 897 else 898 { 899 _data = 0; 900 } 901 } 902 operator T*() const903 operator T*() const 904 { 905 if (_data) 906 { 907 if (_data < 65534) 908 { 909 uintptr_t base = reinterpret_cast<uintptr_t>(this) & ~(compact_alignment - 1); 910 911 return reinterpret_cast<T*>(base + ((_data - 1 - 65533) << compact_alignment_log2)); 912 } 913 else if (_data == 65534) 914 return static_cast<T*>(compact_get_page(this, header_offset)->compact_shared_parent); 915 else 916 return compact_get_value<header_offset, T>(this); 917 } 918 else 919 return 0; 920 } 921 operator ->() const922 T* operator->() const 923 { 924 return *this; 925 } 926 927 private: 928 uint16_t _data; 929 }; 930 931 template <int header_offset, int base_offset> class compact_string 932 { 933 public: compact_string()934 compact_string(): _data(0) 935 { 936 } 937 operator =(const compact_string & rhs)938 void operator=(const compact_string& rhs) 939 { 940 *this = rhs + 0; 941 } 942 operator =(char_t * value)943 void operator=(char_t* value) 944 { 945 if (value) 946 { 947 xml_memory_page* page = compact_get_page(this, header_offset); 948 949 if (PUGI__UNLIKELY(page->compact_string_base == 0)) 950 page->compact_string_base = value; 951 952 ptrdiff_t offset = value - page->compact_string_base; 953 954 if (static_cast<uintptr_t>(offset) < (65535 << 7)) 955 { 956 // round-trip through void* to silence 'cast increases required alignment of target type' warnings 957 uint16_t* base = reinterpret_cast<uint16_t*>(static_cast<void*>(reinterpret_cast<char*>(this) - base_offset)); 958 959 if (*base == 0) 960 { 961 *base = static_cast<uint16_t>((offset >> 7) + 1); 962 _data = static_cast<unsigned char>((offset & 127) + 1); 963 } 964 else 965 { 966 ptrdiff_t remainder = offset - ((*base - 1) << 7); 967 968 if (static_cast<uintptr_t>(remainder) <= 253) 969 { 970 _data = static_cast<unsigned char>(remainder + 1); 971 } 972 else 973 { 974 compact_set_value<header_offset>(this, value); 975 976 _data = 255; 977 } 978 } 979 } 980 else 981 { 982 compact_set_value<header_offset>(this, value); 983 984 _data = 255; 985 } 986 } 987 else 988 { 989 _data = 0; 990 } 991 } 992 operator char_t*() const993 operator char_t*() const 994 { 995 if (_data) 996 { 997 if (_data < 255) 998 { 999 xml_memory_page* page = compact_get_page(this, header_offset); 1000 1001 // round-trip through void* to silence 'cast increases required alignment of target type' warnings 1002 const uint16_t* base = reinterpret_cast<const uint16_t*>(static_cast<const void*>(reinterpret_cast<const char*>(this) - base_offset)); 1003 assert(*base); 1004 1005 ptrdiff_t offset = ((*base - 1) << 7) + (_data - 1); 1006 1007 return page->compact_string_base + offset; 1008 } 1009 else 1010 { 1011 return compact_get_value<header_offset, char_t>(this); 1012 } 1013 } 1014 else 1015 return 0; 1016 } 1017 1018 private: 1019 unsigned char _data; 1020 }; 1021 PUGI__NS_END 1022 #endif 1023 1024 #ifdef PUGIXML_COMPACT 1025 namespace pugi 1026 { 1027 struct xml_attribute_struct 1028 { xml_attribute_structpugi::xml_attribute_struct1029 xml_attribute_struct(impl::xml_memory_page* page): header(page, 0), namevalue_base(0) 1030 { 1031 PUGI__STATIC_ASSERT(sizeof(xml_attribute_struct) == 8); 1032 } 1033 1034 impl::compact_header header; 1035 1036 uint16_t namevalue_base; 1037 1038 impl::compact_string<4, 2> name; 1039 impl::compact_string<5, 3> value; 1040 1041 impl::compact_pointer<xml_attribute_struct, 6> prev_attribute_c; 1042 impl::compact_pointer<xml_attribute_struct, 7, 0> next_attribute; 1043 }; 1044 1045 struct xml_node_struct 1046 { xml_node_structpugi::xml_node_struct1047 xml_node_struct(impl::xml_memory_page* page, xml_node_type type): header(page, type), namevalue_base(0) 1048 { 1049 PUGI__STATIC_ASSERT(sizeof(xml_node_struct) == 12); 1050 } 1051 1052 impl::compact_header header; 1053 1054 uint16_t namevalue_base; 1055 1056 impl::compact_string<4, 2> name; 1057 impl::compact_string<5, 3> value; 1058 1059 impl::compact_pointer_parent<xml_node_struct, 6> parent; 1060 1061 impl::compact_pointer<xml_node_struct, 8, 0> first_child; 1062 1063 impl::compact_pointer<xml_node_struct, 9> prev_sibling_c; 1064 impl::compact_pointer<xml_node_struct, 10, 0> next_sibling; 1065 1066 impl::compact_pointer<xml_attribute_struct, 11, 0> first_attribute; 1067 }; 1068 } 1069 #else 1070 namespace pugi 1071 { 1072 struct xml_attribute_struct 1073 { 1074 xml_attribute_struct(impl::xml_memory_page* page): name(0), value(0), prev_attribute_c(0), next_attribute(0) 1075 { 1076 header = PUGI__GETHEADER_IMPL(this, page, 0); 1077 } 1078 1079 uintptr_t header; 1080 1081 char_t* name; 1082 char_t* value; 1083 1084 xml_attribute_struct* prev_attribute_c; 1085 xml_attribute_struct* next_attribute; 1086 }; 1087 1088 struct xml_node_struct 1089 { 1090 xml_node_struct(impl::xml_memory_page* page, xml_node_type type): name(0), value(0), parent(0), first_child(0), prev_sibling_c(0), next_sibling(0), first_attribute(0) 1091 { 1092 header = PUGI__GETHEADER_IMPL(this, page, type); 1093 } 1094 1095 uintptr_t header; 1096 1097 char_t* name; 1098 char_t* value; 1099 1100 xml_node_struct* parent; 1101 1102 xml_node_struct* first_child; 1103 1104 xml_node_struct* prev_sibling_c; 1105 xml_node_struct* next_sibling; 1106 1107 xml_attribute_struct* first_attribute; 1108 }; 1109 } 1110 #endif 1111 1112 PUGI__NS_BEGIN 1113 struct xml_extra_buffer 1114 { 1115 char_t* buffer; 1116 xml_extra_buffer* next; 1117 }; 1118 1119 struct xml_document_struct: public xml_node_struct, public xml_allocator 1120 { xml_document_structxml_document_struct1121 xml_document_struct(xml_memory_page* page): xml_node_struct(page, node_document), xml_allocator(page), buffer(0), extra_buffers(0) 1122 { 1123 } 1124 1125 const char_t* buffer; 1126 1127 xml_extra_buffer* extra_buffers; 1128 1129 #ifdef PUGIXML_COMPACT 1130 compact_hash_table hash; 1131 #endif 1132 }; 1133 get_allocator(const Object * object)1134 template <typename Object> inline xml_allocator& get_allocator(const Object* object) 1135 { 1136 assert(object); 1137 1138 return *PUGI__GETPAGE(object)->allocator; 1139 } 1140 get_document(const Object * object)1141 template <typename Object> inline xml_document_struct& get_document(const Object* object) 1142 { 1143 assert(object); 1144 1145 return *static_cast<xml_document_struct*>(PUGI__GETPAGE(object)->allocator); 1146 } 1147 PUGI__NS_END 1148 1149 // Low-level DOM operations 1150 PUGI__NS_BEGIN allocate_attribute(xml_allocator & alloc)1151 inline xml_attribute_struct* allocate_attribute(xml_allocator& alloc) 1152 { 1153 xml_memory_page* page; 1154 void* memory = alloc.allocate_object(sizeof(xml_attribute_struct), page); 1155 if (!memory) return 0; 1156 1157 return new (memory) xml_attribute_struct(page); 1158 } 1159 allocate_node(xml_allocator & alloc,xml_node_type type)1160 inline xml_node_struct* allocate_node(xml_allocator& alloc, xml_node_type type) 1161 { 1162 xml_memory_page* page; 1163 void* memory = alloc.allocate_object(sizeof(xml_node_struct), page); 1164 if (!memory) return 0; 1165 1166 return new (memory) xml_node_struct(page, type); 1167 } 1168 destroy_attribute(xml_attribute_struct * a,xml_allocator & alloc)1169 inline void destroy_attribute(xml_attribute_struct* a, xml_allocator& alloc) 1170 { 1171 if (a->header & impl::xml_memory_page_name_allocated_mask) 1172 alloc.deallocate_string(a->name); 1173 1174 if (a->header & impl::xml_memory_page_value_allocated_mask) 1175 alloc.deallocate_string(a->value); 1176 1177 alloc.deallocate_memory(a, sizeof(xml_attribute_struct), PUGI__GETPAGE(a)); 1178 } 1179 destroy_node(xml_node_struct * n,xml_allocator & alloc)1180 inline void destroy_node(xml_node_struct* n, xml_allocator& alloc) 1181 { 1182 if (n->header & impl::xml_memory_page_name_allocated_mask) 1183 alloc.deallocate_string(n->name); 1184 1185 if (n->header & impl::xml_memory_page_value_allocated_mask) 1186 alloc.deallocate_string(n->value); 1187 1188 for (xml_attribute_struct* attr = n->first_attribute; attr; ) 1189 { 1190 xml_attribute_struct* next = attr->next_attribute; 1191 1192 destroy_attribute(attr, alloc); 1193 1194 attr = next; 1195 } 1196 1197 for (xml_node_struct* child = n->first_child; child; ) 1198 { 1199 xml_node_struct* next = child->next_sibling; 1200 1201 destroy_node(child, alloc); 1202 1203 child = next; 1204 } 1205 1206 alloc.deallocate_memory(n, sizeof(xml_node_struct), PUGI__GETPAGE(n)); 1207 } 1208 append_node(xml_node_struct * child,xml_node_struct * node)1209 inline void append_node(xml_node_struct* child, xml_node_struct* node) 1210 { 1211 child->parent = node; 1212 1213 xml_node_struct* head = node->first_child; 1214 1215 if (head) 1216 { 1217 xml_node_struct* tail = head->prev_sibling_c; 1218 1219 tail->next_sibling = child; 1220 child->prev_sibling_c = tail; 1221 head->prev_sibling_c = child; 1222 } 1223 else 1224 { 1225 node->first_child = child; 1226 child->prev_sibling_c = child; 1227 } 1228 } 1229 prepend_node(xml_node_struct * child,xml_node_struct * node)1230 inline void prepend_node(xml_node_struct* child, xml_node_struct* node) 1231 { 1232 child->parent = node; 1233 1234 xml_node_struct* head = node->first_child; 1235 1236 if (head) 1237 { 1238 child->prev_sibling_c = head->prev_sibling_c; 1239 head->prev_sibling_c = child; 1240 } 1241 else 1242 child->prev_sibling_c = child; 1243 1244 child->next_sibling = head; 1245 node->first_child = child; 1246 } 1247 insert_node_after(xml_node_struct * child,xml_node_struct * node)1248 inline void insert_node_after(xml_node_struct* child, xml_node_struct* node) 1249 { 1250 xml_node_struct* parent = node->parent; 1251 1252 child->parent = parent; 1253 1254 if (node->next_sibling) 1255 node->next_sibling->prev_sibling_c = child; 1256 else 1257 parent->first_child->prev_sibling_c = child; 1258 1259 child->next_sibling = node->next_sibling; 1260 child->prev_sibling_c = node; 1261 1262 node->next_sibling = child; 1263 } 1264 insert_node_before(xml_node_struct * child,xml_node_struct * node)1265 inline void insert_node_before(xml_node_struct* child, xml_node_struct* node) 1266 { 1267 xml_node_struct* parent = node->parent; 1268 1269 child->parent = parent; 1270 1271 if (node->prev_sibling_c->next_sibling) 1272 node->prev_sibling_c->next_sibling = child; 1273 else 1274 parent->first_child = child; 1275 1276 child->prev_sibling_c = node->prev_sibling_c; 1277 child->next_sibling = node; 1278 1279 node->prev_sibling_c = child; 1280 } 1281 remove_node(xml_node_struct * node)1282 inline void remove_node(xml_node_struct* node) 1283 { 1284 xml_node_struct* parent = node->parent; 1285 1286 if (node->next_sibling) 1287 node->next_sibling->prev_sibling_c = node->prev_sibling_c; 1288 else 1289 parent->first_child->prev_sibling_c = node->prev_sibling_c; 1290 1291 if (node->prev_sibling_c->next_sibling) 1292 node->prev_sibling_c->next_sibling = node->next_sibling; 1293 else 1294 parent->first_child = node->next_sibling; 1295 1296 node->parent = 0; 1297 node->prev_sibling_c = 0; 1298 node->next_sibling = 0; 1299 } 1300 append_attribute(xml_attribute_struct * attr,xml_node_struct * node)1301 inline void append_attribute(xml_attribute_struct* attr, xml_node_struct* node) 1302 { 1303 xml_attribute_struct* head = node->first_attribute; 1304 1305 if (head) 1306 { 1307 xml_attribute_struct* tail = head->prev_attribute_c; 1308 1309 tail->next_attribute = attr; 1310 attr->prev_attribute_c = tail; 1311 head->prev_attribute_c = attr; 1312 } 1313 else 1314 { 1315 node->first_attribute = attr; 1316 attr->prev_attribute_c = attr; 1317 } 1318 } 1319 prepend_attribute(xml_attribute_struct * attr,xml_node_struct * node)1320 inline void prepend_attribute(xml_attribute_struct* attr, xml_node_struct* node) 1321 { 1322 xml_attribute_struct* head = node->first_attribute; 1323 1324 if (head) 1325 { 1326 attr->prev_attribute_c = head->prev_attribute_c; 1327 head->prev_attribute_c = attr; 1328 } 1329 else 1330 attr->prev_attribute_c = attr; 1331 1332 attr->next_attribute = head; 1333 node->first_attribute = attr; 1334 } 1335 insert_attribute_after(xml_attribute_struct * attr,xml_attribute_struct * place,xml_node_struct * node)1336 inline void insert_attribute_after(xml_attribute_struct* attr, xml_attribute_struct* place, xml_node_struct* node) 1337 { 1338 if (place->next_attribute) 1339 place->next_attribute->prev_attribute_c = attr; 1340 else 1341 node->first_attribute->prev_attribute_c = attr; 1342 1343 attr->next_attribute = place->next_attribute; 1344 attr->prev_attribute_c = place; 1345 place->next_attribute = attr; 1346 } 1347 insert_attribute_before(xml_attribute_struct * attr,xml_attribute_struct * place,xml_node_struct * node)1348 inline void insert_attribute_before(xml_attribute_struct* attr, xml_attribute_struct* place, xml_node_struct* node) 1349 { 1350 if (place->prev_attribute_c->next_attribute) 1351 place->prev_attribute_c->next_attribute = attr; 1352 else 1353 node->first_attribute = attr; 1354 1355 attr->prev_attribute_c = place->prev_attribute_c; 1356 attr->next_attribute = place; 1357 place->prev_attribute_c = attr; 1358 } 1359 remove_attribute(xml_attribute_struct * attr,xml_node_struct * node)1360 inline void remove_attribute(xml_attribute_struct* attr, xml_node_struct* node) 1361 { 1362 if (attr->next_attribute) 1363 attr->next_attribute->prev_attribute_c = attr->prev_attribute_c; 1364 else 1365 node->first_attribute->prev_attribute_c = attr->prev_attribute_c; 1366 1367 if (attr->prev_attribute_c->next_attribute) 1368 attr->prev_attribute_c->next_attribute = attr->next_attribute; 1369 else 1370 node->first_attribute = attr->next_attribute; 1371 1372 attr->prev_attribute_c = 0; 1373 attr->next_attribute = 0; 1374 } 1375 append_new_node(xml_node_struct * node,xml_allocator & alloc,xml_node_type type=node_element)1376 PUGI__FN_NO_INLINE xml_node_struct* append_new_node(xml_node_struct* node, xml_allocator& alloc, xml_node_type type = node_element) 1377 { 1378 if (!alloc.reserve()) return 0; 1379 1380 xml_node_struct* child = allocate_node(alloc, type); 1381 if (!child) return 0; 1382 1383 append_node(child, node); 1384 1385 return child; 1386 } 1387 append_new_attribute(xml_node_struct * node,xml_allocator & alloc)1388 PUGI__FN_NO_INLINE xml_attribute_struct* append_new_attribute(xml_node_struct* node, xml_allocator& alloc) 1389 { 1390 if (!alloc.reserve()) return 0; 1391 1392 xml_attribute_struct* attr = allocate_attribute(alloc); 1393 if (!attr) return 0; 1394 1395 append_attribute(attr, node); 1396 1397 return attr; 1398 } 1399 PUGI__NS_END 1400 1401 // Helper classes for code generation 1402 PUGI__NS_BEGIN 1403 struct opt_false 1404 { 1405 enum { value = 0 }; 1406 }; 1407 1408 struct opt_true 1409 { 1410 enum { value = 1 }; 1411 }; 1412 PUGI__NS_END 1413 1414 // Unicode utilities 1415 PUGI__NS_BEGIN endian_swap(uint16_t value)1416 inline uint16_t endian_swap(uint16_t value) 1417 { 1418 return static_cast<uint16_t>(((value & 0xff) << 8) | (value >> 8)); 1419 } 1420 endian_swap(uint32_t value)1421 inline uint32_t endian_swap(uint32_t value) 1422 { 1423 return ((value & 0xff) << 24) | ((value & 0xff00) << 8) | ((value & 0xff0000) >> 8) | (value >> 24); 1424 } 1425 1426 struct utf8_counter 1427 { 1428 typedef size_t value_type; 1429 lowutf8_counter1430 static value_type low(value_type result, uint32_t ch) 1431 { 1432 // U+0000..U+007F 1433 if (ch < 0x80) return result + 1; 1434 // U+0080..U+07FF 1435 else if (ch < 0x800) return result + 2; 1436 // U+0800..U+FFFF 1437 else return result + 3; 1438 } 1439 highutf8_counter1440 static value_type high(value_type result, uint32_t) 1441 { 1442 // U+10000..U+10FFFF 1443 return result + 4; 1444 } 1445 }; 1446 1447 struct utf8_writer 1448 { 1449 typedef uint8_t* value_type; 1450 lowutf8_writer1451 static value_type low(value_type result, uint32_t ch) 1452 { 1453 // U+0000..U+007F 1454 if (ch < 0x80) 1455 { 1456 *result = static_cast<uint8_t>(ch); 1457 return result + 1; 1458 } 1459 // U+0080..U+07FF 1460 else if (ch < 0x800) 1461 { 1462 result[0] = static_cast<uint8_t>(0xC0 | (ch >> 6)); 1463 result[1] = static_cast<uint8_t>(0x80 | (ch & 0x3F)); 1464 return result + 2; 1465 } 1466 // U+0800..U+FFFF 1467 else 1468 { 1469 result[0] = static_cast<uint8_t>(0xE0 | (ch >> 12)); 1470 result[1] = static_cast<uint8_t>(0x80 | ((ch >> 6) & 0x3F)); 1471 result[2] = static_cast<uint8_t>(0x80 | (ch & 0x3F)); 1472 return result + 3; 1473 } 1474 } 1475 highutf8_writer1476 static value_type high(value_type result, uint32_t ch) 1477 { 1478 // U+10000..U+10FFFF 1479 result[0] = static_cast<uint8_t>(0xF0 | (ch >> 18)); 1480 result[1] = static_cast<uint8_t>(0x80 | ((ch >> 12) & 0x3F)); 1481 result[2] = static_cast<uint8_t>(0x80 | ((ch >> 6) & 0x3F)); 1482 result[3] = static_cast<uint8_t>(0x80 | (ch & 0x3F)); 1483 return result + 4; 1484 } 1485 anyutf8_writer1486 static value_type any(value_type result, uint32_t ch) 1487 { 1488 return (ch < 0x10000) ? low(result, ch) : high(result, ch); 1489 } 1490 }; 1491 1492 struct utf16_counter 1493 { 1494 typedef size_t value_type; 1495 lowutf16_counter1496 static value_type low(value_type result, uint32_t) 1497 { 1498 return result + 1; 1499 } 1500 highutf16_counter1501 static value_type high(value_type result, uint32_t) 1502 { 1503 return result + 2; 1504 } 1505 }; 1506 1507 struct utf16_writer 1508 { 1509 typedef uint16_t* value_type; 1510 lowutf16_writer1511 static value_type low(value_type result, uint32_t ch) 1512 { 1513 *result = static_cast<uint16_t>(ch); 1514 1515 return result + 1; 1516 } 1517 highutf16_writer1518 static value_type high(value_type result, uint32_t ch) 1519 { 1520 uint32_t msh = static_cast<uint32_t>(ch - 0x10000) >> 10; 1521 uint32_t lsh = static_cast<uint32_t>(ch - 0x10000) & 0x3ff; 1522 1523 result[0] = static_cast<uint16_t>(0xD800 + msh); 1524 result[1] = static_cast<uint16_t>(0xDC00 + lsh); 1525 1526 return result + 2; 1527 } 1528 anyutf16_writer1529 static value_type any(value_type result, uint32_t ch) 1530 { 1531 return (ch < 0x10000) ? low(result, ch) : high(result, ch); 1532 } 1533 }; 1534 1535 struct utf32_counter 1536 { 1537 typedef size_t value_type; 1538 lowutf32_counter1539 static value_type low(value_type result, uint32_t) 1540 { 1541 return result + 1; 1542 } 1543 highutf32_counter1544 static value_type high(value_type result, uint32_t) 1545 { 1546 return result + 1; 1547 } 1548 }; 1549 1550 struct utf32_writer 1551 { 1552 typedef uint32_t* value_type; 1553 lowutf32_writer1554 static value_type low(value_type result, uint32_t ch) 1555 { 1556 *result = ch; 1557 1558 return result + 1; 1559 } 1560 highutf32_writer1561 static value_type high(value_type result, uint32_t ch) 1562 { 1563 *result = ch; 1564 1565 return result + 1; 1566 } 1567 anyutf32_writer1568 static value_type any(value_type result, uint32_t ch) 1569 { 1570 *result = ch; 1571 1572 return result + 1; 1573 } 1574 }; 1575 1576 struct latin1_writer 1577 { 1578 typedef uint8_t* value_type; 1579 lowlatin1_writer1580 static value_type low(value_type result, uint32_t ch) 1581 { 1582 *result = static_cast<uint8_t>(ch > 255 ? '?' : ch); 1583 1584 return result + 1; 1585 } 1586 highlatin1_writer1587 static value_type high(value_type result, uint32_t ch) 1588 { 1589 (void)ch; 1590 1591 *result = '?'; 1592 1593 return result + 1; 1594 } 1595 }; 1596 1597 struct utf8_decoder 1598 { 1599 typedef uint8_t type; 1600 processutf8_decoder1601 template <typename Traits> static inline typename Traits::value_type process(const uint8_t* data, size_t size, typename Traits::value_type result, Traits) 1602 { 1603 const uint8_t utf8_byte_mask = 0x3f; 1604 1605 while (size) 1606 { 1607 uint8_t lead = *data; 1608 1609 // 0xxxxxxx -> U+0000..U+007F 1610 if (lead < 0x80) 1611 { 1612 result = Traits::low(result, lead); 1613 data += 1; 1614 size -= 1; 1615 1616 // process aligned single-byte (ascii) blocks 1617 if ((reinterpret_cast<uintptr_t>(data) & 3) == 0) 1618 { 1619 // round-trip through void* to silence 'cast increases required alignment of target type' warnings 1620 while (size >= 4 && (*static_cast<const uint32_t*>(static_cast<const void*>(data)) & 0x80808080) == 0) 1621 { 1622 result = Traits::low(result, data[0]); 1623 result = Traits::low(result, data[1]); 1624 result = Traits::low(result, data[2]); 1625 result = Traits::low(result, data[3]); 1626 data += 4; 1627 size -= 4; 1628 } 1629 } 1630 } 1631 // 110xxxxx -> U+0080..U+07FF 1632 else if (static_cast<unsigned int>(lead - 0xC0) < 0x20 && size >= 2 && (data[1] & 0xc0) == 0x80) 1633 { 1634 result = Traits::low(result, ((lead & ~0xC0) << 6) | (data[1] & utf8_byte_mask)); 1635 data += 2; 1636 size -= 2; 1637 } 1638 // 1110xxxx -> U+0800-U+FFFF 1639 else if (static_cast<unsigned int>(lead - 0xE0) < 0x10 && size >= 3 && (data[1] & 0xc0) == 0x80 && (data[2] & 0xc0) == 0x80) 1640 { 1641 result = Traits::low(result, ((lead & ~0xE0) << 12) | ((data[1] & utf8_byte_mask) << 6) | (data[2] & utf8_byte_mask)); 1642 data += 3; 1643 size -= 3; 1644 } 1645 // 11110xxx -> U+10000..U+10FFFF 1646 else if (static_cast<unsigned int>(lead - 0xF0) < 0x08 && size >= 4 && (data[1] & 0xc0) == 0x80 && (data[2] & 0xc0) == 0x80 && (data[3] & 0xc0) == 0x80) 1647 { 1648 result = Traits::high(result, ((lead & ~0xF0) << 18) | ((data[1] & utf8_byte_mask) << 12) | ((data[2] & utf8_byte_mask) << 6) | (data[3] & utf8_byte_mask)); 1649 data += 4; 1650 size -= 4; 1651 } 1652 // 10xxxxxx or 11111xxx -> invalid 1653 else 1654 { 1655 data += 1; 1656 size -= 1; 1657 } 1658 } 1659 1660 return result; 1661 } 1662 }; 1663 1664 template <typename opt_swap> struct utf16_decoder 1665 { 1666 typedef uint16_t type; 1667 processutf16_decoder1668 template <typename Traits> static inline typename Traits::value_type process(const uint16_t* data, size_t size, typename Traits::value_type result, Traits) 1669 { 1670 while (size) 1671 { 1672 uint16_t lead = opt_swap::value ? endian_swap(*data) : *data; 1673 1674 // U+0000..U+D7FF 1675 if (lead < 0xD800) 1676 { 1677 result = Traits::low(result, lead); 1678 data += 1; 1679 size -= 1; 1680 } 1681 // U+E000..U+FFFF 1682 else if (static_cast<unsigned int>(lead - 0xE000) < 0x2000) 1683 { 1684 result = Traits::low(result, lead); 1685 data += 1; 1686 size -= 1; 1687 } 1688 // surrogate pair lead 1689 else if (static_cast<unsigned int>(lead - 0xD800) < 0x400 && size >= 2) 1690 { 1691 uint16_t next = opt_swap::value ? endian_swap(data[1]) : data[1]; 1692 1693 if (static_cast<unsigned int>(next - 0xDC00) < 0x400) 1694 { 1695 result = Traits::high(result, 0x10000 + ((lead & 0x3ff) << 10) + (next & 0x3ff)); 1696 data += 2; 1697 size -= 2; 1698 } 1699 else 1700 { 1701 data += 1; 1702 size -= 1; 1703 } 1704 } 1705 else 1706 { 1707 data += 1; 1708 size -= 1; 1709 } 1710 } 1711 1712 return result; 1713 } 1714 }; 1715 1716 template <typename opt_swap> struct utf32_decoder 1717 { 1718 typedef uint32_t type; 1719 processutf32_decoder1720 template <typename Traits> static inline typename Traits::value_type process(const uint32_t* data, size_t size, typename Traits::value_type result, Traits) 1721 { 1722 while (size) 1723 { 1724 uint32_t lead = opt_swap::value ? endian_swap(*data) : *data; 1725 1726 // U+0000..U+FFFF 1727 if (lead < 0x10000) 1728 { 1729 result = Traits::low(result, lead); 1730 data += 1; 1731 size -= 1; 1732 } 1733 // U+10000..U+10FFFF 1734 else 1735 { 1736 result = Traits::high(result, lead); 1737 data += 1; 1738 size -= 1; 1739 } 1740 } 1741 1742 return result; 1743 } 1744 }; 1745 1746 struct latin1_decoder 1747 { 1748 typedef uint8_t type; 1749 processlatin1_decoder1750 template <typename Traits> static inline typename Traits::value_type process(const uint8_t* data, size_t size, typename Traits::value_type result, Traits) 1751 { 1752 while (size) 1753 { 1754 result = Traits::low(result, *data); 1755 data += 1; 1756 size -= 1; 1757 } 1758 1759 return result; 1760 } 1761 }; 1762 1763 template <size_t size> struct wchar_selector; 1764 1765 template <> struct wchar_selector<2> 1766 { 1767 typedef uint16_t type; 1768 typedef utf16_counter counter; 1769 typedef utf16_writer writer; 1770 typedef utf16_decoder<opt_false> decoder; 1771 }; 1772 1773 template <> struct wchar_selector<4> 1774 { 1775 typedef uint32_t type; 1776 typedef utf32_counter counter; 1777 typedef utf32_writer writer; 1778 typedef utf32_decoder<opt_false> decoder; 1779 }; 1780 1781 typedef wchar_selector<sizeof(wchar_t)>::counter wchar_counter; 1782 typedef wchar_selector<sizeof(wchar_t)>::writer wchar_writer; 1783 1784 struct wchar_decoder 1785 { 1786 typedef wchar_t type; 1787 processwchar_decoder1788 template <typename Traits> static inline typename Traits::value_type process(const wchar_t* data, size_t size, typename Traits::value_type result, Traits traits) 1789 { 1790 typedef wchar_selector<sizeof(wchar_t)>::decoder decoder; 1791 1792 return decoder::process(reinterpret_cast<const typename decoder::type*>(data), size, result, traits); 1793 } 1794 }; 1795 1796 #ifdef PUGIXML_WCHAR_MODE convert_wchar_endian_swap(wchar_t * result,const wchar_t * data,size_t length)1797 PUGI__FN void convert_wchar_endian_swap(wchar_t* result, const wchar_t* data, size_t length) 1798 { 1799 for (size_t i = 0; i < length; ++i) 1800 result[i] = static_cast<wchar_t>(endian_swap(static_cast<wchar_selector<sizeof(wchar_t)>::type>(data[i]))); 1801 } 1802 #endif 1803 PUGI__NS_END 1804 1805 PUGI__NS_BEGIN 1806 enum chartype_t 1807 { 1808 ct_parse_pcdata = 1, // \0, &, \r, < 1809 ct_parse_attr = 2, // \0, &, \r, ', " 1810 ct_parse_attr_ws = 4, // \0, &, \r, ', ", \n, tab 1811 ct_space = 8, // \r, \n, space, tab 1812 ct_parse_cdata = 16, // \0, ], >, \r 1813 ct_parse_comment = 32, // \0, -, >, \r 1814 ct_symbol = 64, // Any symbol > 127, a-z, A-Z, 0-9, _, :, -, . 1815 ct_start_symbol = 128 // Any symbol > 127, a-z, A-Z, _, : 1816 }; 1817 1818 static const unsigned char chartype_table[256] = 1819 { 1820 55, 0, 0, 0, 0, 0, 0, 0, 0, 12, 12, 0, 0, 63, 0, 0, // 0-15 1821 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 16-31 1822 8, 0, 6, 0, 0, 0, 7, 6, 0, 0, 0, 0, 0, 96, 64, 0, // 32-47 1823 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 192, 0, 1, 0, 48, 0, // 48-63 1824 0, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, // 64-79 1825 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 0, 0, 16, 0, 192, // 80-95 1826 0, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, // 96-111 1827 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 0, 0, 0, 0, 0, // 112-127 1828 1829 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, // 128+ 1830 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 1831 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 1832 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 1833 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 1834 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 1835 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 1836 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192 1837 }; 1838 1839 enum chartypex_t 1840 { 1841 ctx_special_pcdata = 1, // Any symbol >= 0 and < 32 (except \t, \r, \n), &, <, > 1842 ctx_special_attr = 2, // Any symbol >= 0 and < 32 (except \t), &, <, >, " 1843 ctx_start_symbol = 4, // Any symbol > 127, a-z, A-Z, _ 1844 ctx_digit = 8, // 0-9 1845 ctx_symbol = 16 // Any symbol > 127, a-z, A-Z, 0-9, _, -, . 1846 }; 1847 1848 static const unsigned char chartypex_table[256] = 1849 { 1850 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 2, 3, 3, 2, 3, 3, // 0-15 1851 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 16-31 1852 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 16, 16, 0, // 32-47 1853 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0, 3, 0, 3, 0, // 48-63 1854 1855 0, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, // 64-79 1856 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 0, 0, 0, 0, 20, // 80-95 1857 0, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, // 96-111 1858 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 0, 0, 0, 0, 0, // 112-127 1859 1860 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, // 128+ 1861 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 1862 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 1863 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 1864 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 1865 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 1866 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 1867 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20 1868 }; 1869 1870 #ifdef PUGIXML_WCHAR_MODE 1871 #define PUGI__IS_CHARTYPE_IMPL(c, ct, table) ((static_cast<unsigned int>(c) < 128 ? table[static_cast<unsigned int>(c)] : table[128]) & (ct)) 1872 #else 1873 #define PUGI__IS_CHARTYPE_IMPL(c, ct, table) (table[static_cast<unsigned char>(c)] & (ct)) 1874 #endif 1875 1876 #define PUGI__IS_CHARTYPE(c, ct) PUGI__IS_CHARTYPE_IMPL(c, ct, chartype_table) 1877 #define PUGI__IS_CHARTYPEX(c, ct) PUGI__IS_CHARTYPE_IMPL(c, ct, chartypex_table) 1878 is_little_endian()1879 PUGI__FN bool is_little_endian() 1880 { 1881 unsigned int ui = 1; 1882 1883 return *reinterpret_cast<unsigned char*>(&ui) == 1; 1884 } 1885 get_wchar_encoding()1886 PUGI__FN xml_encoding get_wchar_encoding() 1887 { 1888 PUGI__STATIC_ASSERT(sizeof(wchar_t) == 2 || sizeof(wchar_t) == 4); 1889 1890 if (sizeof(wchar_t) == 2) 1891 return is_little_endian() ? encoding_utf16_le : encoding_utf16_be; 1892 else 1893 return is_little_endian() ? encoding_utf32_le : encoding_utf32_be; 1894 } 1895 parse_declaration_encoding(const uint8_t * data,size_t size,const uint8_t * & out_encoding,size_t & out_length)1896 PUGI__FN bool parse_declaration_encoding(const uint8_t* data, size_t size, const uint8_t*& out_encoding, size_t& out_length) 1897 { 1898 #define PUGI__SCANCHAR(ch) { if (offset >= size || data[offset] != ch) return false; offset++; } 1899 #define PUGI__SCANCHARTYPE(ct) { while (offset < size && PUGI__IS_CHARTYPE(data[offset], ct)) offset++; } 1900 1901 // check if we have a non-empty XML declaration 1902 if (size < 6 || !((data[0] == '<') & (data[1] == '?') & (data[2] == 'x') & (data[3] == 'm') & (data[4] == 'l') && PUGI__IS_CHARTYPE(data[5], ct_space))) 1903 return false; 1904 1905 // scan XML declaration until the encoding field 1906 for (size_t i = 6; i + 1 < size; ++i) 1907 { 1908 // declaration can not contain ? in quoted values 1909 if (data[i] == '?') 1910 return false; 1911 1912 if (data[i] == 'e' && data[i + 1] == 'n') 1913 { 1914 size_t offset = i; 1915 1916 // encoding follows the version field which can't contain 'en' so this has to be the encoding if XML is well formed 1917 PUGI__SCANCHAR('e'); PUGI__SCANCHAR('n'); PUGI__SCANCHAR('c'); PUGI__SCANCHAR('o'); 1918 PUGI__SCANCHAR('d'); PUGI__SCANCHAR('i'); PUGI__SCANCHAR('n'); PUGI__SCANCHAR('g'); 1919 1920 // S? = S? 1921 PUGI__SCANCHARTYPE(ct_space); 1922 PUGI__SCANCHAR('='); 1923 PUGI__SCANCHARTYPE(ct_space); 1924 1925 // the only two valid delimiters are ' and " 1926 uint8_t delimiter = (offset < size && data[offset] == '"') ? '"' : '\''; 1927 1928 PUGI__SCANCHAR(delimiter); 1929 1930 size_t start = offset; 1931 1932 out_encoding = data + offset; 1933 1934 PUGI__SCANCHARTYPE(ct_symbol); 1935 1936 out_length = offset - start; 1937 1938 PUGI__SCANCHAR(delimiter); 1939 1940 return true; 1941 } 1942 } 1943 1944 return false; 1945 1946 #undef PUGI__SCANCHAR 1947 #undef PUGI__SCANCHARTYPE 1948 } 1949 guess_buffer_encoding(const uint8_t * data,size_t size)1950 PUGI__FN xml_encoding guess_buffer_encoding(const uint8_t* data, size_t size) 1951 { 1952 // skip encoding autodetection if input buffer is too small 1953 if (size < 4) return encoding_utf8; 1954 1955 uint8_t d0 = data[0], d1 = data[1], d2 = data[2], d3 = data[3]; 1956 1957 // look for BOM in first few bytes 1958 if (d0 == 0 && d1 == 0 && d2 == 0xfe && d3 == 0xff) return encoding_utf32_be; 1959 if (d0 == 0xff && d1 == 0xfe && d2 == 0 && d3 == 0) return encoding_utf32_le; 1960 if (d0 == 0xfe && d1 == 0xff) return encoding_utf16_be; 1961 if (d0 == 0xff && d1 == 0xfe) return encoding_utf16_le; 1962 if (d0 == 0xef && d1 == 0xbb && d2 == 0xbf) return encoding_utf8; 1963 1964 // look for <, <? or <?xm in various encodings 1965 if (d0 == 0 && d1 == 0 && d2 == 0 && d3 == 0x3c) return encoding_utf32_be; 1966 if (d0 == 0x3c && d1 == 0 && d2 == 0 && d3 == 0) return encoding_utf32_le; 1967 if (d0 == 0 && d1 == 0x3c && d2 == 0 && d3 == 0x3f) return encoding_utf16_be; 1968 if (d0 == 0x3c && d1 == 0 && d2 == 0x3f && d3 == 0) return encoding_utf16_le; 1969 1970 // look for utf16 < followed by node name (this may fail, but is better than utf8 since it's zero terminated so early) 1971 if (d0 == 0 && d1 == 0x3c) return encoding_utf16_be; 1972 if (d0 == 0x3c && d1 == 0) return encoding_utf16_le; 1973 1974 // no known BOM detected; parse declaration 1975 const uint8_t* enc = 0; 1976 size_t enc_length = 0; 1977 1978 if (d0 == 0x3c && d1 == 0x3f && d2 == 0x78 && d3 == 0x6d && parse_declaration_encoding(data, size, enc, enc_length)) 1979 { 1980 // iso-8859-1 (case-insensitive) 1981 if (enc_length == 10 1982 && (enc[0] | ' ') == 'i' && (enc[1] | ' ') == 's' && (enc[2] | ' ') == 'o' 1983 && enc[3] == '-' && enc[4] == '8' && enc[5] == '8' && enc[6] == '5' && enc[7] == '9' 1984 && enc[8] == '-' && enc[9] == '1') 1985 return encoding_latin1; 1986 1987 // latin1 (case-insensitive) 1988 if (enc_length == 6 1989 && (enc[0] | ' ') == 'l' && (enc[1] | ' ') == 'a' && (enc[2] | ' ') == 't' 1990 && (enc[3] | ' ') == 'i' && (enc[4] | ' ') == 'n' 1991 && enc[5] == '1') 1992 return encoding_latin1; 1993 } 1994 1995 return encoding_utf8; 1996 } 1997 get_buffer_encoding(xml_encoding encoding,const void * contents,size_t size)1998 PUGI__FN xml_encoding get_buffer_encoding(xml_encoding encoding, const void* contents, size_t size) 1999 { 2000 // replace wchar encoding with utf implementation 2001 if (encoding == encoding_wchar) return get_wchar_encoding(); 2002 2003 // replace utf16 encoding with utf16 with specific endianness 2004 if (encoding == encoding_utf16) return is_little_endian() ? encoding_utf16_le : encoding_utf16_be; 2005 2006 // replace utf32 encoding with utf32 with specific endianness 2007 if (encoding == encoding_utf32) return is_little_endian() ? encoding_utf32_le : encoding_utf32_be; 2008 2009 // only do autodetection if no explicit encoding is requested 2010 if (encoding != encoding_auto) return encoding; 2011 2012 // try to guess encoding (based on XML specification, Appendix F.1) 2013 const uint8_t* data = static_cast<const uint8_t*>(contents); 2014 2015 return guess_buffer_encoding(data, size); 2016 } 2017 get_mutable_buffer(char_t * & out_buffer,size_t & out_length,const void * contents,size_t size,bool is_mutable)2018 PUGI__FN bool get_mutable_buffer(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable) 2019 { 2020 size_t length = size / sizeof(char_t); 2021 2022 if (is_mutable) 2023 { 2024 out_buffer = static_cast<char_t*>(const_cast<void*>(contents)); 2025 out_length = length; 2026 } 2027 else 2028 { 2029 char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t))); 2030 if (!buffer) return false; 2031 2032 if (contents) 2033 memcpy(buffer, contents, length * sizeof(char_t)); 2034 else 2035 assert(length == 0); 2036 2037 buffer[length] = 0; 2038 2039 out_buffer = buffer; 2040 out_length = length + 1; 2041 } 2042 2043 return true; 2044 } 2045 2046 #ifdef PUGIXML_WCHAR_MODE need_endian_swap_utf(xml_encoding le,xml_encoding re)2047 PUGI__FN bool need_endian_swap_utf(xml_encoding le, xml_encoding re) 2048 { 2049 return (le == encoding_utf16_be && re == encoding_utf16_le) || (le == encoding_utf16_le && re == encoding_utf16_be) || 2050 (le == encoding_utf32_be && re == encoding_utf32_le) || (le == encoding_utf32_le && re == encoding_utf32_be); 2051 } 2052 convert_buffer_endian_swap(char_t * & out_buffer,size_t & out_length,const void * contents,size_t size,bool is_mutable)2053 PUGI__FN bool convert_buffer_endian_swap(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable) 2054 { 2055 const char_t* data = static_cast<const char_t*>(contents); 2056 size_t length = size / sizeof(char_t); 2057 2058 if (is_mutable) 2059 { 2060 char_t* buffer = const_cast<char_t*>(data); 2061 2062 convert_wchar_endian_swap(buffer, data, length); 2063 2064 out_buffer = buffer; 2065 out_length = length; 2066 } 2067 else 2068 { 2069 char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t))); 2070 if (!buffer) return false; 2071 2072 convert_wchar_endian_swap(buffer, data, length); 2073 buffer[length] = 0; 2074 2075 out_buffer = buffer; 2076 out_length = length + 1; 2077 } 2078 2079 return true; 2080 } 2081 convert_buffer_generic(char_t * & out_buffer,size_t & out_length,const void * contents,size_t size,D)2082 template <typename D> PUGI__FN bool convert_buffer_generic(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, D) 2083 { 2084 const typename D::type* data = static_cast<const typename D::type*>(contents); 2085 size_t data_length = size / sizeof(typename D::type); 2086 2087 // first pass: get length in wchar_t units 2088 size_t length = D::process(data, data_length, 0, wchar_counter()); 2089 2090 // allocate buffer of suitable length 2091 char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t))); 2092 if (!buffer) return false; 2093 2094 // second pass: convert utf16 input to wchar_t 2095 wchar_writer::value_type obegin = reinterpret_cast<wchar_writer::value_type>(buffer); 2096 wchar_writer::value_type oend = D::process(data, data_length, obegin, wchar_writer()); 2097 2098 assert(oend == obegin + length); 2099 *oend = 0; 2100 2101 out_buffer = buffer; 2102 out_length = length + 1; 2103 2104 return true; 2105 } 2106 convert_buffer(char_t * & out_buffer,size_t & out_length,xml_encoding encoding,const void * contents,size_t size,bool is_mutable)2107 PUGI__FN bool convert_buffer(char_t*& out_buffer, size_t& out_length, xml_encoding encoding, const void* contents, size_t size, bool is_mutable) 2108 { 2109 // get native encoding 2110 xml_encoding wchar_encoding = get_wchar_encoding(); 2111 2112 // fast path: no conversion required 2113 if (encoding == wchar_encoding) 2114 return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable); 2115 2116 // only endian-swapping is required 2117 if (need_endian_swap_utf(encoding, wchar_encoding)) 2118 return convert_buffer_endian_swap(out_buffer, out_length, contents, size, is_mutable); 2119 2120 // source encoding is utf8 2121 if (encoding == encoding_utf8) 2122 return convert_buffer_generic(out_buffer, out_length, contents, size, utf8_decoder()); 2123 2124 // source encoding is utf16 2125 if (encoding == encoding_utf16_be || encoding == encoding_utf16_le) 2126 { 2127 xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be; 2128 2129 return (native_encoding == encoding) ? 2130 convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder<opt_false>()) : 2131 convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder<opt_true>()); 2132 } 2133 2134 // source encoding is utf32 2135 if (encoding == encoding_utf32_be || encoding == encoding_utf32_le) 2136 { 2137 xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be; 2138 2139 return (native_encoding == encoding) ? 2140 convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder<opt_false>()) : 2141 convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder<opt_true>()); 2142 } 2143 2144 // source encoding is latin1 2145 if (encoding == encoding_latin1) 2146 return convert_buffer_generic(out_buffer, out_length, contents, size, latin1_decoder()); 2147 2148 assert(false && "Invalid encoding"); 2149 return false; 2150 } 2151 #else convert_buffer_generic(char_t * & out_buffer,size_t & out_length,const void * contents,size_t size,D)2152 template <typename D> PUGI__FN bool convert_buffer_generic(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, D) 2153 { 2154 const typename D::type* data = static_cast<const typename D::type*>(contents); 2155 size_t data_length = size / sizeof(typename D::type); 2156 2157 // first pass: get length in utf8 units 2158 size_t length = D::process(data, data_length, 0, utf8_counter()); 2159 2160 // allocate buffer of suitable length 2161 char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t))); 2162 if (!buffer) return false; 2163 2164 // second pass: convert utf16 input to utf8 2165 uint8_t* obegin = reinterpret_cast<uint8_t*>(buffer); 2166 uint8_t* oend = D::process(data, data_length, obegin, utf8_writer()); 2167 2168 assert(oend == obegin + length); 2169 *oend = 0; 2170 2171 out_buffer = buffer; 2172 out_length = length + 1; 2173 2174 return true; 2175 } 2176 get_latin1_7bit_prefix_length(const uint8_t * data,size_t size)2177 PUGI__FN size_t get_latin1_7bit_prefix_length(const uint8_t* data, size_t size) 2178 { 2179 for (size_t i = 0; i < size; ++i) 2180 if (data[i] > 127) 2181 return i; 2182 2183 return size; 2184 } 2185 convert_buffer_latin1(char_t * & out_buffer,size_t & out_length,const void * contents,size_t size,bool is_mutable)2186 PUGI__FN bool convert_buffer_latin1(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable) 2187 { 2188 const uint8_t* data = static_cast<const uint8_t*>(contents); 2189 size_t data_length = size; 2190 2191 // get size of prefix that does not need utf8 conversion 2192 size_t prefix_length = get_latin1_7bit_prefix_length(data, data_length); 2193 assert(prefix_length <= data_length); 2194 2195 const uint8_t* postfix = data + prefix_length; 2196 size_t postfix_length = data_length - prefix_length; 2197 2198 // if no conversion is needed, just return the original buffer 2199 if (postfix_length == 0) return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable); 2200 2201 // first pass: get length in utf8 units 2202 size_t length = prefix_length + latin1_decoder::process(postfix, postfix_length, 0, utf8_counter()); 2203 2204 // allocate buffer of suitable length 2205 char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t))); 2206 if (!buffer) return false; 2207 2208 // second pass: convert latin1 input to utf8 2209 memcpy(buffer, data, prefix_length); 2210 2211 uint8_t* obegin = reinterpret_cast<uint8_t*>(buffer); 2212 uint8_t* oend = latin1_decoder::process(postfix, postfix_length, obegin + prefix_length, utf8_writer()); 2213 2214 assert(oend == obegin + length); 2215 *oend = 0; 2216 2217 out_buffer = buffer; 2218 out_length = length + 1; 2219 2220 return true; 2221 } 2222 convert_buffer(char_t * & out_buffer,size_t & out_length,xml_encoding encoding,const void * contents,size_t size,bool is_mutable)2223 PUGI__FN bool convert_buffer(char_t*& out_buffer, size_t& out_length, xml_encoding encoding, const void* contents, size_t size, bool is_mutable) 2224 { 2225 // fast path: no conversion required 2226 if (encoding == encoding_utf8) 2227 return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable); 2228 2229 // source encoding is utf16 2230 if (encoding == encoding_utf16_be || encoding == encoding_utf16_le) 2231 { 2232 xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be; 2233 2234 return (native_encoding == encoding) ? 2235 convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder<opt_false>()) : 2236 convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder<opt_true>()); 2237 } 2238 2239 // source encoding is utf32 2240 if (encoding == encoding_utf32_be || encoding == encoding_utf32_le) 2241 { 2242 xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be; 2243 2244 return (native_encoding == encoding) ? 2245 convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder<opt_false>()) : 2246 convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder<opt_true>()); 2247 } 2248 2249 // source encoding is latin1 2250 if (encoding == encoding_latin1) 2251 return convert_buffer_latin1(out_buffer, out_length, contents, size, is_mutable); 2252 2253 assert(false && "Invalid encoding"); 2254 return false; 2255 } 2256 #endif 2257 as_utf8_begin(const wchar_t * str,size_t length)2258 PUGI__FN size_t as_utf8_begin(const wchar_t* str, size_t length) 2259 { 2260 // get length in utf8 characters 2261 return wchar_decoder::process(str, length, 0, utf8_counter()); 2262 } 2263 as_utf8_end(char * buffer,size_t size,const wchar_t * str,size_t length)2264 PUGI__FN void as_utf8_end(char* buffer, size_t size, const wchar_t* str, size_t length) 2265 { 2266 // convert to utf8 2267 uint8_t* begin = reinterpret_cast<uint8_t*>(buffer); 2268 uint8_t* end = wchar_decoder::process(str, length, begin, utf8_writer()); 2269 2270 assert(begin + size == end); 2271 (void)!end; 2272 (void)!size; 2273 } 2274 2275 #ifndef PUGIXML_NO_STL as_utf8_impl(const wchar_t * str,size_t length)2276 PUGI__FN std::string as_utf8_impl(const wchar_t* str, size_t length) 2277 { 2278 // first pass: get length in utf8 characters 2279 size_t size = as_utf8_begin(str, length); 2280 2281 // allocate resulting string 2282 std::string result; 2283 result.resize(size); 2284 2285 // second pass: convert to utf8 2286 if (size > 0) as_utf8_end(&result[0], size, str, length); 2287 2288 return result; 2289 } 2290 as_wide_impl(const char * str,size_t size)2291 PUGI__FN std::basic_string<wchar_t> as_wide_impl(const char* str, size_t size) 2292 { 2293 const uint8_t* data = reinterpret_cast<const uint8_t*>(str); 2294 2295 // first pass: get length in wchar_t units 2296 size_t length = utf8_decoder::process(data, size, 0, wchar_counter()); 2297 2298 // allocate resulting string 2299 std::basic_string<wchar_t> result; 2300 result.resize(length); 2301 2302 // second pass: convert to wchar_t 2303 if (length > 0) 2304 { 2305 wchar_writer::value_type begin = reinterpret_cast<wchar_writer::value_type>(&result[0]); 2306 wchar_writer::value_type end = utf8_decoder::process(data, size, begin, wchar_writer()); 2307 2308 assert(begin + length == end); 2309 (void)!end; 2310 } 2311 2312 return result; 2313 } 2314 #endif 2315 2316 template <typename Header> strcpy_insitu_allow(size_t length,const Header & header,uintptr_t header_mask,char_t * target)2317 inline bool strcpy_insitu_allow(size_t length, const Header& header, uintptr_t header_mask, char_t* target) 2318 { 2319 // never reuse shared memory 2320 if (header & xml_memory_page_contents_shared_mask) return false; 2321 2322 size_t target_length = strlength(target); 2323 2324 // always reuse document buffer memory if possible 2325 if ((header & header_mask) == 0) return target_length >= length; 2326 2327 // reuse heap memory if waste is not too great 2328 const size_t reuse_threshold = 32; 2329 2330 return target_length >= length && (target_length < reuse_threshold || target_length - length < target_length / 2); 2331 } 2332 2333 template <typename String, typename Header> strcpy_insitu(String & dest,Header & header,uintptr_t header_mask,const char_t * source,size_t source_length)2334 PUGI__FN bool strcpy_insitu(String& dest, Header& header, uintptr_t header_mask, const char_t* source, size_t source_length) 2335 { 2336 if (source_length == 0) 2337 { 2338 // empty string and null pointer are equivalent, so just deallocate old memory 2339 xml_allocator* alloc = PUGI__GETPAGE_IMPL(header)->allocator; 2340 2341 if (header & header_mask) alloc->deallocate_string(dest); 2342 2343 // mark the string as not allocated 2344 dest = 0; 2345 header &= ~header_mask; 2346 2347 return true; 2348 } 2349 else if (dest && strcpy_insitu_allow(source_length, header, header_mask, dest)) 2350 { 2351 // we can reuse old buffer, so just copy the new data (including zero terminator) 2352 memcpy(dest, source, source_length * sizeof(char_t)); 2353 dest[source_length] = 0; 2354 2355 return true; 2356 } 2357 else 2358 { 2359 xml_allocator* alloc = PUGI__GETPAGE_IMPL(header)->allocator; 2360 2361 if (!alloc->reserve()) return false; 2362 2363 // allocate new buffer 2364 char_t* buf = alloc->allocate_string(source_length + 1); 2365 if (!buf) return false; 2366 2367 // copy the string (including zero terminator) 2368 memcpy(buf, source, source_length * sizeof(char_t)); 2369 buf[source_length] = 0; 2370 2371 // deallocate old buffer (*after* the above to protect against overlapping memory and/or allocation failures) 2372 if (header & header_mask) alloc->deallocate_string(dest); 2373 2374 // the string is now allocated, so set the flag 2375 dest = buf; 2376 header |= header_mask; 2377 2378 return true; 2379 } 2380 } 2381 2382 struct gap 2383 { 2384 char_t* end; 2385 size_t size; 2386 gapgap2387 gap(): end(0), size(0) 2388 { 2389 } 2390 2391 // Push new gap, move s count bytes further (skipping the gap). 2392 // Collapse previous gap. pushgap2393 void push(char_t*& s, size_t count) 2394 { 2395 if (end) // there was a gap already; collapse it 2396 { 2397 // Move [old_gap_end, new_gap_start) to [old_gap_start, ...) 2398 assert(s >= end); 2399 memmove(end - size, end, reinterpret_cast<char*>(s) - reinterpret_cast<char*>(end)); 2400 } 2401 2402 s += count; // end of current gap 2403 2404 // "merge" two gaps 2405 end = s; 2406 size += count; 2407 } 2408 2409 // Collapse all gaps, return past-the-end pointer flushgap2410 char_t* flush(char_t* s) 2411 { 2412 if (end) 2413 { 2414 // Move [old_gap_end, current_pos) to [old_gap_start, ...) 2415 assert(s >= end); 2416 memmove(end - size, end, reinterpret_cast<char*>(s) - reinterpret_cast<char*>(end)); 2417 2418 return s - size; 2419 } 2420 else return s; 2421 } 2422 }; 2423 strconv_escape(char_t * s,gap & g)2424 PUGI__FN char_t* strconv_escape(char_t* s, gap& g) 2425 { 2426 char_t* stre = s + 1; 2427 2428 switch (*stre) 2429 { 2430 case '#': // &#... 2431 { 2432 unsigned int ucsc = 0; 2433 2434 if (stre[1] == 'x') // &#x... (hex code) 2435 { 2436 stre += 2; 2437 2438 char_t ch = *stre; 2439 2440 if (ch == ';') return stre; 2441 2442 for (;;) 2443 { 2444 if (static_cast<unsigned int>(ch - '0') <= 9) 2445 ucsc = 16 * ucsc + (ch - '0'); 2446 else if (static_cast<unsigned int>((ch | ' ') - 'a') <= 5) 2447 ucsc = 16 * ucsc + ((ch | ' ') - 'a' + 10); 2448 else if (ch == ';') 2449 break; 2450 else // cancel 2451 return stre; 2452 2453 ch = *++stre; 2454 } 2455 2456 ++stre; 2457 } 2458 else // &#... (dec code) 2459 { 2460 char_t ch = *++stre; 2461 2462 if (ch == ';') return stre; 2463 2464 for (;;) 2465 { 2466 if (static_cast<unsigned int>(static_cast<unsigned int>(ch) - '0') <= 9) 2467 ucsc = 10 * ucsc + (ch - '0'); 2468 else if (ch == ';') 2469 break; 2470 else // cancel 2471 return stre; 2472 2473 ch = *++stre; 2474 } 2475 2476 ++stre; 2477 } 2478 2479 #ifdef PUGIXML_WCHAR_MODE 2480 s = reinterpret_cast<char_t*>(wchar_writer::any(reinterpret_cast<wchar_writer::value_type>(s), ucsc)); 2481 #else 2482 s = reinterpret_cast<char_t*>(utf8_writer::any(reinterpret_cast<uint8_t*>(s), ucsc)); 2483 #endif 2484 2485 g.push(s, stre - s); 2486 return stre; 2487 } 2488 2489 case 'a': // &a 2490 { 2491 ++stre; 2492 2493 if (*stre == 'm') // &am 2494 { 2495 if (*++stre == 'p' && *++stre == ';') // & 2496 { 2497 *s++ = '&'; 2498 ++stre; 2499 2500 g.push(s, stre - s); 2501 return stre; 2502 } 2503 } 2504 else if (*stre == 'p') // &ap 2505 { 2506 if (*++stre == 'o' && *++stre == 's' && *++stre == ';') // ' 2507 { 2508 *s++ = '\''; 2509 ++stre; 2510 2511 g.push(s, stre - s); 2512 return stre; 2513 } 2514 } 2515 break; 2516 } 2517 2518 case 'g': // &g 2519 { 2520 if (*++stre == 't' && *++stre == ';') // > 2521 { 2522 *s++ = '>'; 2523 ++stre; 2524 2525 g.push(s, stre - s); 2526 return stre; 2527 } 2528 break; 2529 } 2530 2531 case 'l': // &l 2532 { 2533 if (*++stre == 't' && *++stre == ';') // < 2534 { 2535 *s++ = '<'; 2536 ++stre; 2537 2538 g.push(s, stre - s); 2539 return stre; 2540 } 2541 break; 2542 } 2543 2544 case 'q': // &q 2545 { 2546 if (*++stre == 'u' && *++stre == 'o' && *++stre == 't' && *++stre == ';') // " 2547 { 2548 *s++ = '"'; 2549 ++stre; 2550 2551 g.push(s, stre - s); 2552 return stre; 2553 } 2554 break; 2555 } 2556 2557 default: 2558 break; 2559 } 2560 2561 return stre; 2562 } 2563 2564 // Parser utilities 2565 #define PUGI__ENDSWITH(c, e) ((c) == (e) || ((c) == 0 && endch == (e))) 2566 #define PUGI__SKIPWS() { while (PUGI__IS_CHARTYPE(*s, ct_space)) ++s; } 2567 #define PUGI__OPTSET(OPT) ( optmsk & (OPT) ) 2568 #define PUGI__PUSHNODE(TYPE) { cursor = append_new_node(cursor, *alloc, TYPE); if (!cursor) PUGI__THROW_ERROR(status_out_of_memory, s); } 2569 #define PUGI__POPNODE() { cursor = cursor->parent; } 2570 #define PUGI__SCANFOR(X) { while (*s != 0 && !(X)) ++s; } 2571 #define PUGI__SCANWHILE(X) { while (X) ++s; } 2572 #define PUGI__SCANWHILE_UNROLL(X) { for (;;) { char_t ss = s[0]; if (PUGI__UNLIKELY(!(X))) { break; } ss = s[1]; if (PUGI__UNLIKELY(!(X))) { s += 1; break; } ss = s[2]; if (PUGI__UNLIKELY(!(X))) { s += 2; break; } ss = s[3]; if (PUGI__UNLIKELY(!(X))) { s += 3; break; } s += 4; } } 2573 #define PUGI__ENDSEG() { ch = *s; *s = 0; ++s; } 2574 #define PUGI__THROW_ERROR(err, m) return error_offset = m, error_status = err, static_cast<char_t*>(0) 2575 #define PUGI__CHECK_ERROR(err, m) { if (*s == 0) PUGI__THROW_ERROR(err, m); } 2576 strconv_comment(char_t * s,char_t endch)2577 PUGI__FN char_t* strconv_comment(char_t* s, char_t endch) 2578 { 2579 gap g; 2580 2581 while (true) 2582 { 2583 PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_comment)); 2584 2585 if (*s == '\r') // Either a single 0x0d or 0x0d 0x0a pair 2586 { 2587 *s++ = '\n'; // replace first one with 0x0a 2588 2589 if (*s == '\n') g.push(s, 1); 2590 } 2591 else if (s[0] == '-' && s[1] == '-' && PUGI__ENDSWITH(s[2], '>')) // comment ends here 2592 { 2593 *g.flush(s) = 0; 2594 2595 return s + (s[2] == '>' ? 3 : 2); 2596 } 2597 else if (*s == 0) 2598 { 2599 return 0; 2600 } 2601 else ++s; 2602 } 2603 } 2604 strconv_cdata(char_t * s,char_t endch)2605 PUGI__FN char_t* strconv_cdata(char_t* s, char_t endch) 2606 { 2607 gap g; 2608 2609 while (true) 2610 { 2611 PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_cdata)); 2612 2613 if (*s == '\r') // Either a single 0x0d or 0x0d 0x0a pair 2614 { 2615 *s++ = '\n'; // replace first one with 0x0a 2616 2617 if (*s == '\n') g.push(s, 1); 2618 } 2619 else if (s[0] == ']' && s[1] == ']' && PUGI__ENDSWITH(s[2], '>')) // CDATA ends here 2620 { 2621 *g.flush(s) = 0; 2622 2623 return s + 1; 2624 } 2625 else if (*s == 0) 2626 { 2627 return 0; 2628 } 2629 else ++s; 2630 } 2631 } 2632 2633 typedef char_t* (*strconv_pcdata_t)(char_t*); 2634 2635 template <typename opt_trim, typename opt_eol, typename opt_escape> struct strconv_pcdata_impl 2636 { parsestrconv_pcdata_impl2637 static char_t* parse(char_t* s) 2638 { 2639 gap g; 2640 2641 char_t* begin = s; 2642 2643 while (true) 2644 { 2645 PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_pcdata)); 2646 2647 if (*s == '<') // PCDATA ends here 2648 { 2649 char_t* end = g.flush(s); 2650 2651 if (opt_trim::value) 2652 while (end > begin && PUGI__IS_CHARTYPE(end[-1], ct_space)) 2653 --end; 2654 2655 *end = 0; 2656 2657 return s + 1; 2658 } 2659 else if (opt_eol::value && *s == '\r') // Either a single 0x0d or 0x0d 0x0a pair 2660 { 2661 *s++ = '\n'; // replace first one with 0x0a 2662 2663 if (*s == '\n') g.push(s, 1); 2664 } 2665 else if (opt_escape::value && *s == '&') 2666 { 2667 s = strconv_escape(s, g); 2668 } 2669 else if (*s == 0) 2670 { 2671 char_t* end = g.flush(s); 2672 2673 if (opt_trim::value) 2674 while (end > begin && PUGI__IS_CHARTYPE(end[-1], ct_space)) 2675 --end; 2676 2677 *end = 0; 2678 2679 return s; 2680 } 2681 else ++s; 2682 } 2683 } 2684 }; 2685 get_strconv_pcdata(unsigned int optmask)2686 PUGI__FN strconv_pcdata_t get_strconv_pcdata(unsigned int optmask) 2687 { 2688 PUGI__STATIC_ASSERT(parse_escapes == 0x10 && parse_eol == 0x20 && parse_trim_pcdata == 0x0800); 2689 2690 switch (((optmask >> 4) & 3) | ((optmask >> 9) & 4)) // get bitmask for flags (eol escapes trim) 2691 { 2692 case 0: return strconv_pcdata_impl<opt_false, opt_false, opt_false>::parse; 2693 case 1: return strconv_pcdata_impl<opt_false, opt_false, opt_true>::parse; 2694 case 2: return strconv_pcdata_impl<opt_false, opt_true, opt_false>::parse; 2695 case 3: return strconv_pcdata_impl<opt_false, opt_true, opt_true>::parse; 2696 case 4: return strconv_pcdata_impl<opt_true, opt_false, opt_false>::parse; 2697 case 5: return strconv_pcdata_impl<opt_true, opt_false, opt_true>::parse; 2698 case 6: return strconv_pcdata_impl<opt_true, opt_true, opt_false>::parse; 2699 case 7: return strconv_pcdata_impl<opt_true, opt_true, opt_true>::parse; 2700 default: assert(false); return 0; // should not get here 2701 } 2702 } 2703 2704 typedef char_t* (*strconv_attribute_t)(char_t*, char_t); 2705 2706 template <typename opt_escape> struct strconv_attribute_impl 2707 { parse_wnormstrconv_attribute_impl2708 static char_t* parse_wnorm(char_t* s, char_t end_quote) 2709 { 2710 gap g; 2711 2712 // trim leading whitespaces 2713 if (PUGI__IS_CHARTYPE(*s, ct_space)) 2714 { 2715 char_t* str = s; 2716 2717 do ++str; 2718 while (PUGI__IS_CHARTYPE(*str, ct_space)); 2719 2720 g.push(s, str - s); 2721 } 2722 2723 while (true) 2724 { 2725 PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr_ws | ct_space)); 2726 2727 if (*s == end_quote) 2728 { 2729 char_t* str = g.flush(s); 2730 2731 do *str-- = 0; 2732 while (PUGI__IS_CHARTYPE(*str, ct_space)); 2733 2734 return s + 1; 2735 } 2736 else if (PUGI__IS_CHARTYPE(*s, ct_space)) 2737 { 2738 *s++ = ' '; 2739 2740 if (PUGI__IS_CHARTYPE(*s, ct_space)) 2741 { 2742 char_t* str = s + 1; 2743 while (PUGI__IS_CHARTYPE(*str, ct_space)) ++str; 2744 2745 g.push(s, str - s); 2746 } 2747 } 2748 else if (opt_escape::value && *s == '&') 2749 { 2750 s = strconv_escape(s, g); 2751 } 2752 else if (!*s) 2753 { 2754 return 0; 2755 } 2756 else ++s; 2757 } 2758 } 2759 parse_wconvstrconv_attribute_impl2760 static char_t* parse_wconv(char_t* s, char_t end_quote) 2761 { 2762 gap g; 2763 2764 while (true) 2765 { 2766 PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr_ws)); 2767 2768 if (*s == end_quote) 2769 { 2770 *g.flush(s) = 0; 2771 2772 return s + 1; 2773 } 2774 else if (PUGI__IS_CHARTYPE(*s, ct_space)) 2775 { 2776 if (*s == '\r') 2777 { 2778 *s++ = ' '; 2779 2780 if (*s == '\n') g.push(s, 1); 2781 } 2782 else *s++ = ' '; 2783 } 2784 else if (opt_escape::value && *s == '&') 2785 { 2786 s = strconv_escape(s, g); 2787 } 2788 else if (!*s) 2789 { 2790 return 0; 2791 } 2792 else ++s; 2793 } 2794 } 2795 parse_eolstrconv_attribute_impl2796 static char_t* parse_eol(char_t* s, char_t end_quote) 2797 { 2798 gap g; 2799 2800 while (true) 2801 { 2802 PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr)); 2803 2804 if (*s == end_quote) 2805 { 2806 *g.flush(s) = 0; 2807 2808 return s + 1; 2809 } 2810 else if (*s == '\r') 2811 { 2812 *s++ = '\n'; 2813 2814 if (*s == '\n') g.push(s, 1); 2815 } 2816 else if (opt_escape::value && *s == '&') 2817 { 2818 s = strconv_escape(s, g); 2819 } 2820 else if (!*s) 2821 { 2822 return 0; 2823 } 2824 else ++s; 2825 } 2826 } 2827 parse_simplestrconv_attribute_impl2828 static char_t* parse_simple(char_t* s, char_t end_quote) 2829 { 2830 gap g; 2831 2832 while (true) 2833 { 2834 PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr)); 2835 2836 if (*s == end_quote) 2837 { 2838 *g.flush(s) = 0; 2839 2840 return s + 1; 2841 } 2842 else if (opt_escape::value && *s == '&') 2843 { 2844 s = strconv_escape(s, g); 2845 } 2846 else if (!*s) 2847 { 2848 return 0; 2849 } 2850 else ++s; 2851 } 2852 } 2853 }; 2854 get_strconv_attribute(unsigned int optmask)2855 PUGI__FN strconv_attribute_t get_strconv_attribute(unsigned int optmask) 2856 { 2857 PUGI__STATIC_ASSERT(parse_escapes == 0x10 && parse_eol == 0x20 && parse_wconv_attribute == 0x40 && parse_wnorm_attribute == 0x80); 2858 2859 switch ((optmask >> 4) & 15) // get bitmask for flags (wconv wnorm eol escapes) 2860 { 2861 case 0: return strconv_attribute_impl<opt_false>::parse_simple; 2862 case 1: return strconv_attribute_impl<opt_true>::parse_simple; 2863 case 2: return strconv_attribute_impl<opt_false>::parse_eol; 2864 case 3: return strconv_attribute_impl<opt_true>::parse_eol; 2865 case 4: return strconv_attribute_impl<opt_false>::parse_wconv; 2866 case 5: return strconv_attribute_impl<opt_true>::parse_wconv; 2867 case 6: return strconv_attribute_impl<opt_false>::parse_wconv; 2868 case 7: return strconv_attribute_impl<opt_true>::parse_wconv; 2869 case 8: return strconv_attribute_impl<opt_false>::parse_wnorm; 2870 case 9: return strconv_attribute_impl<opt_true>::parse_wnorm; 2871 case 10: return strconv_attribute_impl<opt_false>::parse_wnorm; 2872 case 11: return strconv_attribute_impl<opt_true>::parse_wnorm; 2873 case 12: return strconv_attribute_impl<opt_false>::parse_wnorm; 2874 case 13: return strconv_attribute_impl<opt_true>::parse_wnorm; 2875 case 14: return strconv_attribute_impl<opt_false>::parse_wnorm; 2876 case 15: return strconv_attribute_impl<opt_true>::parse_wnorm; 2877 default: assert(false); return 0; // should not get here 2878 } 2879 } 2880 make_parse_result(xml_parse_status status,ptrdiff_t offset=0)2881 inline xml_parse_result make_parse_result(xml_parse_status status, ptrdiff_t offset = 0) 2882 { 2883 xml_parse_result result; 2884 result.status = status; 2885 result.offset = offset; 2886 2887 return result; 2888 } 2889 2890 struct xml_parser 2891 { 2892 xml_allocator* alloc; 2893 char_t* error_offset; 2894 xml_parse_status error_status; 2895 xml_parserxml_parser2896 xml_parser(xml_allocator* alloc_): alloc(alloc_), error_offset(0), error_status(status_ok) 2897 { 2898 } 2899 2900 // DOCTYPE consists of nested sections of the following possible types: 2901 // <!-- ... -->, <? ... ?>, "...", '...' 2902 // <![...]]> 2903 // <!...> 2904 // First group can not contain nested groups 2905 // Second group can contain nested groups of the same type 2906 // Third group can contain all other groups parse_doctype_primitivexml_parser2907 char_t* parse_doctype_primitive(char_t* s) 2908 { 2909 if (*s == '"' || *s == '\'') 2910 { 2911 // quoted string 2912 char_t ch = *s++; 2913 PUGI__SCANFOR(*s == ch); 2914 if (!*s) PUGI__THROW_ERROR(status_bad_doctype, s); 2915 2916 s++; 2917 } 2918 else if (s[0] == '<' && s[1] == '?') 2919 { 2920 // <? ... ?> 2921 s += 2; 2922 PUGI__SCANFOR(s[0] == '?' && s[1] == '>'); // no need for ENDSWITH because ?> can't terminate proper doctype 2923 if (!*s) PUGI__THROW_ERROR(status_bad_doctype, s); 2924 2925 s += 2; 2926 } 2927 else if (s[0] == '<' && s[1] == '!' && s[2] == '-' && s[3] == '-') 2928 { 2929 s += 4; 2930 PUGI__SCANFOR(s[0] == '-' && s[1] == '-' && s[2] == '>'); // no need for ENDSWITH because --> can't terminate proper doctype 2931 if (!*s) PUGI__THROW_ERROR(status_bad_doctype, s); 2932 2933 s += 3; 2934 } 2935 else PUGI__THROW_ERROR(status_bad_doctype, s); 2936 2937 return s; 2938 } 2939 parse_doctype_ignorexml_parser2940 char_t* parse_doctype_ignore(char_t* s) 2941 { 2942 size_t depth = 0; 2943 2944 assert(s[0] == '<' && s[1] == '!' && s[2] == '['); 2945 s += 3; 2946 2947 while (*s) 2948 { 2949 if (s[0] == '<' && s[1] == '!' && s[2] == '[') 2950 { 2951 // nested ignore section 2952 s += 3; 2953 depth++; 2954 } 2955 else if (s[0] == ']' && s[1] == ']' && s[2] == '>') 2956 { 2957 // ignore section end 2958 s += 3; 2959 2960 if (depth == 0) 2961 return s; 2962 2963 depth--; 2964 } 2965 else s++; 2966 } 2967 2968 PUGI__THROW_ERROR(status_bad_doctype, s); 2969 } 2970 parse_doctype_groupxml_parser2971 char_t* parse_doctype_group(char_t* s, char_t endch) 2972 { 2973 size_t depth = 0; 2974 2975 assert((s[0] == '<' || s[0] == 0) && s[1] == '!'); 2976 s += 2; 2977 2978 while (*s) 2979 { 2980 if (s[0] == '<' && s[1] == '!' && s[2] != '-') 2981 { 2982 if (s[2] == '[') 2983 { 2984 // ignore 2985 s = parse_doctype_ignore(s); 2986 if (!s) return s; 2987 } 2988 else 2989 { 2990 // some control group 2991 s += 2; 2992 depth++; 2993 } 2994 } 2995 else if (s[0] == '<' || s[0] == '"' || s[0] == '\'') 2996 { 2997 // unknown tag (forbidden), or some primitive group 2998 s = parse_doctype_primitive(s); 2999 if (!s) return s; 3000 } 3001 else if (*s == '>') 3002 { 3003 if (depth == 0) 3004 return s; 3005 3006 depth--; 3007 s++; 3008 } 3009 else s++; 3010 } 3011 3012 if (depth != 0 || endch != '>') PUGI__THROW_ERROR(status_bad_doctype, s); 3013 3014 return s; 3015 } 3016 parse_exclamationxml_parser3017 char_t* parse_exclamation(char_t* s, xml_node_struct* cursor, unsigned int optmsk, char_t endch) 3018 { 3019 // parse node contents, starting with exclamation mark 3020 ++s; 3021 3022 if (*s == '-') // '<!-...' 3023 { 3024 ++s; 3025 3026 if (*s == '-') // '<!--...' 3027 { 3028 ++s; 3029 3030 if (PUGI__OPTSET(parse_comments)) 3031 { 3032 PUGI__PUSHNODE(node_comment); // Append a new node on the tree. 3033 cursor->value = s; // Save the offset. 3034 } 3035 3036 if (PUGI__OPTSET(parse_eol) && PUGI__OPTSET(parse_comments)) 3037 { 3038 s = strconv_comment(s, endch); 3039 3040 if (!s) PUGI__THROW_ERROR(status_bad_comment, cursor->value); 3041 } 3042 else 3043 { 3044 // Scan for terminating '-->'. 3045 PUGI__SCANFOR(s[0] == '-' && s[1] == '-' && PUGI__ENDSWITH(s[2], '>')); 3046 PUGI__CHECK_ERROR(status_bad_comment, s); 3047 3048 if (PUGI__OPTSET(parse_comments)) 3049 *s = 0; // Zero-terminate this segment at the first terminating '-'. 3050 3051 s += (s[2] == '>' ? 3 : 2); // Step over the '\0->'. 3052 } 3053 } 3054 else PUGI__THROW_ERROR(status_bad_comment, s); 3055 } 3056 else if (*s == '[') 3057 { 3058 // '<![CDATA[...' 3059 if (*++s=='C' && *++s=='D' && *++s=='A' && *++s=='T' && *++s=='A' && *++s == '[') 3060 { 3061 ++s; 3062 3063 if (PUGI__OPTSET(parse_cdata)) 3064 { 3065 PUGI__PUSHNODE(node_cdata); // Append a new node on the tree. 3066 cursor->value = s; // Save the offset. 3067 3068 if (PUGI__OPTSET(parse_eol)) 3069 { 3070 s = strconv_cdata(s, endch); 3071 3072 if (!s) PUGI__THROW_ERROR(status_bad_cdata, cursor->value); 3073 } 3074 else 3075 { 3076 // Scan for terminating ']]>'. 3077 PUGI__SCANFOR(s[0] == ']' && s[1] == ']' && PUGI__ENDSWITH(s[2], '>')); 3078 PUGI__CHECK_ERROR(status_bad_cdata, s); 3079 3080 *s++ = 0; // Zero-terminate this segment. 3081 } 3082 } 3083 else // Flagged for discard, but we still have to scan for the terminator. 3084 { 3085 // Scan for terminating ']]>'. 3086 PUGI__SCANFOR(s[0] == ']' && s[1] == ']' && PUGI__ENDSWITH(s[2], '>')); 3087 PUGI__CHECK_ERROR(status_bad_cdata, s); 3088 3089 ++s; 3090 } 3091 3092 s += (s[1] == '>' ? 2 : 1); // Step over the last ']>'. 3093 } 3094 else PUGI__THROW_ERROR(status_bad_cdata, s); 3095 } 3096 else if (s[0] == 'D' && s[1] == 'O' && s[2] == 'C' && s[3] == 'T' && s[4] == 'Y' && s[5] == 'P' && PUGI__ENDSWITH(s[6], 'E')) 3097 { 3098 s -= 2; 3099 3100 if (cursor->parent) PUGI__THROW_ERROR(status_bad_doctype, s); 3101 3102 char_t* mark = s + 9; 3103 3104 s = parse_doctype_group(s, endch); 3105 if (!s) return s; 3106 3107 assert((*s == 0 && endch == '>') || *s == '>'); 3108 if (*s) *s++ = 0; 3109 3110 if (PUGI__OPTSET(parse_doctype)) 3111 { 3112 while (PUGI__IS_CHARTYPE(*mark, ct_space)) ++mark; 3113 3114 PUGI__PUSHNODE(node_doctype); 3115 3116 cursor->value = mark; 3117 } 3118 } 3119 else if (*s == 0 && endch == '-') PUGI__THROW_ERROR(status_bad_comment, s); 3120 else if (*s == 0 && endch == '[') PUGI__THROW_ERROR(status_bad_cdata, s); 3121 else PUGI__THROW_ERROR(status_unrecognized_tag, s); 3122 3123 return s; 3124 } 3125 parse_questionxml_parser3126 char_t* parse_question(char_t* s, xml_node_struct*& ref_cursor, unsigned int optmsk, char_t endch) 3127 { 3128 // load into registers 3129 xml_node_struct* cursor = ref_cursor; 3130 char_t ch = 0; 3131 3132 // parse node contents, starting with question mark 3133 ++s; 3134 3135 // read PI target 3136 char_t* target = s; 3137 3138 if (!PUGI__IS_CHARTYPE(*s, ct_start_symbol)) PUGI__THROW_ERROR(status_bad_pi, s); 3139 3140 PUGI__SCANWHILE(PUGI__IS_CHARTYPE(*s, ct_symbol)); 3141 PUGI__CHECK_ERROR(status_bad_pi, s); 3142 3143 // determine node type; stricmp / strcasecmp is not portable 3144 bool declaration = (target[0] | ' ') == 'x' && (target[1] | ' ') == 'm' && (target[2] | ' ') == 'l' && target + 3 == s; 3145 3146 if (declaration ? PUGI__OPTSET(parse_declaration) : PUGI__OPTSET(parse_pi)) 3147 { 3148 if (declaration) 3149 { 3150 // disallow non top-level declarations 3151 if (cursor->parent) PUGI__THROW_ERROR(status_bad_pi, s); 3152 3153 PUGI__PUSHNODE(node_declaration); 3154 } 3155 else 3156 { 3157 PUGI__PUSHNODE(node_pi); 3158 } 3159 3160 cursor->name = target; 3161 3162 PUGI__ENDSEG(); 3163 3164 // parse value/attributes 3165 if (ch == '?') 3166 { 3167 // empty node 3168 if (!PUGI__ENDSWITH(*s, '>')) PUGI__THROW_ERROR(status_bad_pi, s); 3169 s += (*s == '>'); 3170 3171 PUGI__POPNODE(); 3172 } 3173 else if (PUGI__IS_CHARTYPE(ch, ct_space)) 3174 { 3175 PUGI__SKIPWS(); 3176 3177 // scan for tag end 3178 char_t* value = s; 3179 3180 PUGI__SCANFOR(s[0] == '?' && PUGI__ENDSWITH(s[1], '>')); 3181 PUGI__CHECK_ERROR(status_bad_pi, s); 3182 3183 if (declaration) 3184 { 3185 // replace ending ? with / so that 'element' terminates properly 3186 *s = '/'; 3187 3188 // we exit from this function with cursor at node_declaration, which is a signal to parse() to go to LOC_ATTRIBUTES 3189 s = value; 3190 } 3191 else 3192 { 3193 // store value and step over > 3194 cursor->value = value; 3195 3196 PUGI__POPNODE(); 3197 3198 PUGI__ENDSEG(); 3199 3200 s += (*s == '>'); 3201 } 3202 } 3203 else PUGI__THROW_ERROR(status_bad_pi, s); 3204 } 3205 else 3206 { 3207 // scan for tag end 3208 PUGI__SCANFOR(s[0] == '?' && PUGI__ENDSWITH(s[1], '>')); 3209 PUGI__CHECK_ERROR(status_bad_pi, s); 3210 3211 s += (s[1] == '>' ? 2 : 1); 3212 } 3213 3214 // store from registers 3215 ref_cursor = cursor; 3216 3217 return s; 3218 } 3219 parse_treexml_parser3220 char_t* parse_tree(char_t* s, xml_node_struct* root, unsigned int optmsk, char_t endch) 3221 { 3222 strconv_attribute_t strconv_attribute = get_strconv_attribute(optmsk); 3223 strconv_pcdata_t strconv_pcdata = get_strconv_pcdata(optmsk); 3224 3225 char_t ch = 0; 3226 xml_node_struct* cursor = root; 3227 char_t* mark = s; 3228 3229 while (*s != 0) 3230 { 3231 if (*s == '<') 3232 { 3233 ++s; 3234 3235 LOC_TAG: 3236 if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) // '<#...' 3237 { 3238 PUGI__PUSHNODE(node_element); // Append a new node to the tree. 3239 3240 cursor->name = s; 3241 3242 PUGI__SCANWHILE_UNROLL(PUGI__IS_CHARTYPE(ss, ct_symbol)); // Scan for a terminator. 3243 PUGI__ENDSEG(); // Save char in 'ch', terminate & step over. 3244 3245 if (ch == '>') 3246 { 3247 // end of tag 3248 } 3249 else if (PUGI__IS_CHARTYPE(ch, ct_space)) 3250 { 3251 LOC_ATTRIBUTES: 3252 while (true) 3253 { 3254 PUGI__SKIPWS(); // Eat any whitespace. 3255 3256 if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) // <... #... 3257 { 3258 xml_attribute_struct* a = append_new_attribute(cursor, *alloc); // Make space for this attribute. 3259 if (!a) PUGI__THROW_ERROR(status_out_of_memory, s); 3260 3261 a->name = s; // Save the offset. 3262 3263 PUGI__SCANWHILE_UNROLL(PUGI__IS_CHARTYPE(ss, ct_symbol)); // Scan for a terminator. 3264 PUGI__ENDSEG(); // Save char in 'ch', terminate & step over. 3265 3266 if (PUGI__IS_CHARTYPE(ch, ct_space)) 3267 { 3268 PUGI__SKIPWS(); // Eat any whitespace. 3269 3270 ch = *s; 3271 ++s; 3272 } 3273 3274 if (ch == '=') // '<... #=...' 3275 { 3276 PUGI__SKIPWS(); // Eat any whitespace. 3277 3278 if (*s == '"' || *s == '\'') // '<... #="...' 3279 { 3280 ch = *s; // Save quote char to avoid breaking on "''" -or- '""'. 3281 ++s; // Step over the quote. 3282 a->value = s; // Save the offset. 3283 3284 s = strconv_attribute(s, ch); 3285 3286 if (!s) PUGI__THROW_ERROR(status_bad_attribute, a->value); 3287 3288 // After this line the loop continues from the start; 3289 // Whitespaces, / and > are ok, symbols and EOF are wrong, 3290 // everything else will be detected 3291 if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) PUGI__THROW_ERROR(status_bad_attribute, s); 3292 } 3293 else PUGI__THROW_ERROR(status_bad_attribute, s); 3294 } 3295 else PUGI__THROW_ERROR(status_bad_attribute, s); 3296 } 3297 else if (*s == '/') 3298 { 3299 ++s; 3300 3301 if (*s == '>') 3302 { 3303 PUGI__POPNODE(); 3304 s++; 3305 break; 3306 } 3307 else if (*s == 0 && endch == '>') 3308 { 3309 PUGI__POPNODE(); 3310 break; 3311 } 3312 else PUGI__THROW_ERROR(status_bad_start_element, s); 3313 } 3314 else if (*s == '>') 3315 { 3316 ++s; 3317 3318 break; 3319 } 3320 else if (*s == 0 && endch == '>') 3321 { 3322 break; 3323 } 3324 else PUGI__THROW_ERROR(status_bad_start_element, s); 3325 } 3326 3327 // !!! 3328 } 3329 else if (ch == '/') // '<#.../' 3330 { 3331 if (!PUGI__ENDSWITH(*s, '>')) PUGI__THROW_ERROR(status_bad_start_element, s); 3332 3333 PUGI__POPNODE(); // Pop. 3334 3335 s += (*s == '>'); 3336 } 3337 else if (ch == 0) 3338 { 3339 // we stepped over null terminator, backtrack & handle closing tag 3340 --s; 3341 3342 if (endch != '>') PUGI__THROW_ERROR(status_bad_start_element, s); 3343 } 3344 else PUGI__THROW_ERROR(status_bad_start_element, s); 3345 } 3346 else if (*s == '/') 3347 { 3348 ++s; 3349 3350 mark = s; 3351 3352 char_t* name = cursor->name; 3353 if (!name) PUGI__THROW_ERROR(status_end_element_mismatch, mark); 3354 3355 while (PUGI__IS_CHARTYPE(*s, ct_symbol)) 3356 { 3357 if (*s++ != *name++) PUGI__THROW_ERROR(status_end_element_mismatch, mark); 3358 } 3359 3360 if (*name) 3361 { 3362 if (*s == 0 && name[0] == endch && name[1] == 0) PUGI__THROW_ERROR(status_bad_end_element, s); 3363 else PUGI__THROW_ERROR(status_end_element_mismatch, mark); 3364 } 3365 3366 PUGI__POPNODE(); // Pop. 3367 3368 PUGI__SKIPWS(); 3369 3370 if (*s == 0) 3371 { 3372 if (endch != '>') PUGI__THROW_ERROR(status_bad_end_element, s); 3373 } 3374 else 3375 { 3376 if (*s != '>') PUGI__THROW_ERROR(status_bad_end_element, s); 3377 ++s; 3378 } 3379 } 3380 else if (*s == '?') // '<?...' 3381 { 3382 s = parse_question(s, cursor, optmsk, endch); 3383 if (!s) return s; 3384 3385 assert(cursor); 3386 if (PUGI__NODETYPE(cursor) == node_declaration) goto LOC_ATTRIBUTES; 3387 } 3388 else if (*s == '!') // '<!...' 3389 { 3390 s = parse_exclamation(s, cursor, optmsk, endch); 3391 if (!s) return s; 3392 } 3393 else if (*s == 0 && endch == '?') PUGI__THROW_ERROR(status_bad_pi, s); 3394 else PUGI__THROW_ERROR(status_unrecognized_tag, s); 3395 } 3396 else 3397 { 3398 mark = s; // Save this offset while searching for a terminator. 3399 3400 PUGI__SKIPWS(); // Eat whitespace if no genuine PCDATA here. 3401 3402 if (*s == '<' || !*s) 3403 { 3404 // We skipped some whitespace characters because otherwise we would take the tag branch instead of PCDATA one 3405 assert(mark != s); 3406 3407 if (!PUGI__OPTSET(parse_ws_pcdata | parse_ws_pcdata_single) || PUGI__OPTSET(parse_trim_pcdata)) 3408 { 3409 continue; 3410 } 3411 else if (PUGI__OPTSET(parse_ws_pcdata_single)) 3412 { 3413 if (s[0] != '<' || s[1] != '/' || cursor->first_child) continue; 3414 } 3415 } 3416 3417 if (!PUGI__OPTSET(parse_trim_pcdata)) 3418 s = mark; 3419 3420 if (cursor->parent || PUGI__OPTSET(parse_fragment)) 3421 { 3422 if (PUGI__OPTSET(parse_embed_pcdata) && cursor->parent && !cursor->first_child && !cursor->value) 3423 { 3424 cursor->value = s; // Save the offset. 3425 } 3426 else 3427 { 3428 PUGI__PUSHNODE(node_pcdata); // Append a new node on the tree. 3429 3430 cursor->value = s; // Save the offset. 3431 3432 PUGI__POPNODE(); // Pop since this is a standalone. 3433 } 3434 3435 s = strconv_pcdata(s); 3436 3437 if (!*s) break; 3438 } 3439 else 3440 { 3441 PUGI__SCANFOR(*s == '<'); // '...<' 3442 if (!*s) break; 3443 3444 ++s; 3445 } 3446 3447 // We're after '<' 3448 goto LOC_TAG; 3449 } 3450 } 3451 3452 // check that last tag is closed 3453 if (cursor != root) PUGI__THROW_ERROR(status_end_element_mismatch, s); 3454 3455 return s; 3456 } 3457 3458 #ifdef PUGIXML_WCHAR_MODE parse_skip_bomxml_parser3459 static char_t* parse_skip_bom(char_t* s) 3460 { 3461 unsigned int bom = 0xfeff; 3462 return (s[0] == static_cast<wchar_t>(bom)) ? s + 1 : s; 3463 } 3464 #else parse_skip_bomxml_parser3465 static char_t* parse_skip_bom(char_t* s) 3466 { 3467 return (s[0] == '\xef' && s[1] == '\xbb' && s[2] == '\xbf') ? s + 3 : s; 3468 } 3469 #endif 3470 has_element_node_siblingsxml_parser3471 static bool has_element_node_siblings(xml_node_struct* node) 3472 { 3473 while (node) 3474 { 3475 if (PUGI__NODETYPE(node) == node_element) return true; 3476 3477 node = node->next_sibling; 3478 } 3479 3480 return false; 3481 } 3482 parsexml_parser3483 static xml_parse_result parse(char_t* buffer, size_t length, xml_document_struct* xmldoc, xml_node_struct* root, unsigned int optmsk) 3484 { 3485 // early-out for empty documents 3486 if (length == 0) 3487 return make_parse_result(PUGI__OPTSET(parse_fragment) ? status_ok : status_no_document_element); 3488 3489 // get last child of the root before parsing 3490 xml_node_struct* last_root_child = root->first_child ? root->first_child->prev_sibling_c + 0 : 0; 3491 3492 // create parser on stack 3493 xml_parser parser(static_cast<xml_allocator*>(xmldoc)); 3494 3495 // save last character and make buffer zero-terminated (speeds up parsing) 3496 char_t endch = buffer[length - 1]; 3497 buffer[length - 1] = 0; 3498 3499 // skip BOM to make sure it does not end up as part of parse output 3500 char_t* buffer_data = parse_skip_bom(buffer); 3501 3502 // perform actual parsing 3503 parser.parse_tree(buffer_data, root, optmsk, endch); 3504 3505 xml_parse_result result = make_parse_result(parser.error_status, parser.error_offset ? parser.error_offset - buffer : 0); 3506 assert(result.offset >= 0 && static_cast<size_t>(result.offset) <= length); 3507 3508 if (result) 3509 { 3510 // since we removed last character, we have to handle the only possible false positive (stray <) 3511 if (endch == '<') 3512 return make_parse_result(status_unrecognized_tag, length - 1); 3513 3514 // check if there are any element nodes parsed 3515 xml_node_struct* first_root_child_parsed = last_root_child ? last_root_child->next_sibling + 0 : root->first_child+ 0; 3516 3517 if (!PUGI__OPTSET(parse_fragment) && !has_element_node_siblings(first_root_child_parsed)) 3518 return make_parse_result(status_no_document_element, length - 1); 3519 } 3520 else 3521 { 3522 // roll back offset if it occurs on a null terminator in the source buffer 3523 if (result.offset > 0 && static_cast<size_t>(result.offset) == length - 1 && endch == 0) 3524 result.offset--; 3525 } 3526 3527 return result; 3528 } 3529 }; 3530 3531 // Output facilities get_write_native_encoding()3532 PUGI__FN xml_encoding get_write_native_encoding() 3533 { 3534 #ifdef PUGIXML_WCHAR_MODE 3535 return get_wchar_encoding(); 3536 #else 3537 return encoding_utf8; 3538 #endif 3539 } 3540 get_write_encoding(xml_encoding encoding)3541 PUGI__FN xml_encoding get_write_encoding(xml_encoding encoding) 3542 { 3543 // replace wchar encoding with utf implementation 3544 if (encoding == encoding_wchar) return get_wchar_encoding(); 3545 3546 // replace utf16 encoding with utf16 with specific endianness 3547 if (encoding == encoding_utf16) return is_little_endian() ? encoding_utf16_le : encoding_utf16_be; 3548 3549 // replace utf32 encoding with utf32 with specific endianness 3550 if (encoding == encoding_utf32) return is_little_endian() ? encoding_utf32_le : encoding_utf32_be; 3551 3552 // only do autodetection if no explicit encoding is requested 3553 if (encoding != encoding_auto) return encoding; 3554 3555 // assume utf8 encoding 3556 return encoding_utf8; 3557 } 3558 convert_buffer_output_generic(typename T::value_type dest,const char_t * data,size_t length,D,T)3559 template <typename D, typename T> PUGI__FN size_t convert_buffer_output_generic(typename T::value_type dest, const char_t* data, size_t length, D, T) 3560 { 3561 PUGI__STATIC_ASSERT(sizeof(char_t) == sizeof(typename D::type)); 3562 3563 typename T::value_type end = D::process(reinterpret_cast<const typename D::type*>(data), length, dest, T()); 3564 3565 return static_cast<size_t>(end - dest) * sizeof(*dest); 3566 } 3567 convert_buffer_output_generic(typename T::value_type dest,const char_t * data,size_t length,D,T,bool opt_swap)3568 template <typename D, typename T> PUGI__FN size_t convert_buffer_output_generic(typename T::value_type dest, const char_t* data, size_t length, D, T, bool opt_swap) 3569 { 3570 PUGI__STATIC_ASSERT(sizeof(char_t) == sizeof(typename D::type)); 3571 3572 typename T::value_type end = D::process(reinterpret_cast<const typename D::type*>(data), length, dest, T()); 3573 3574 if (opt_swap) 3575 { 3576 for (typename T::value_type i = dest; i != end; ++i) 3577 *i = endian_swap(*i); 3578 } 3579 3580 return static_cast<size_t>(end - dest) * sizeof(*dest); 3581 } 3582 3583 #ifdef PUGIXML_WCHAR_MODE get_valid_length(const char_t * data,size_t length)3584 PUGI__FN size_t get_valid_length(const char_t* data, size_t length) 3585 { 3586 if (length < 1) return 0; 3587 3588 // discard last character if it's the lead of a surrogate pair 3589 return (sizeof(wchar_t) == 2 && static_cast<unsigned int>(static_cast<uint16_t>(data[length - 1]) - 0xD800) < 0x400) ? length - 1 : length; 3590 } 3591 convert_buffer_output(char_t * r_char,uint8_t * r_u8,uint16_t * r_u16,uint32_t * r_u32,const char_t * data,size_t length,xml_encoding encoding)3592 PUGI__FN size_t convert_buffer_output(char_t* r_char, uint8_t* r_u8, uint16_t* r_u16, uint32_t* r_u32, const char_t* data, size_t length, xml_encoding encoding) 3593 { 3594 // only endian-swapping is required 3595 if (need_endian_swap_utf(encoding, get_wchar_encoding())) 3596 { 3597 convert_wchar_endian_swap(r_char, data, length); 3598 3599 return length * sizeof(char_t); 3600 } 3601 3602 // convert to utf8 3603 if (encoding == encoding_utf8) 3604 return convert_buffer_output_generic(r_u8, data, length, wchar_decoder(), utf8_writer()); 3605 3606 // convert to utf16 3607 if (encoding == encoding_utf16_be || encoding == encoding_utf16_le) 3608 { 3609 xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be; 3610 3611 return convert_buffer_output_generic(r_u16, data, length, wchar_decoder(), utf16_writer(), native_encoding != encoding); 3612 } 3613 3614 // convert to utf32 3615 if (encoding == encoding_utf32_be || encoding == encoding_utf32_le) 3616 { 3617 xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be; 3618 3619 return convert_buffer_output_generic(r_u32, data, length, wchar_decoder(), utf32_writer(), native_encoding != encoding); 3620 } 3621 3622 // convert to latin1 3623 if (encoding == encoding_latin1) 3624 return convert_buffer_output_generic(r_u8, data, length, wchar_decoder(), latin1_writer()); 3625 3626 assert(false && "Invalid encoding"); 3627 return 0; 3628 } 3629 #else get_valid_length(const char_t * data,size_t length)3630 PUGI__FN size_t get_valid_length(const char_t* data, size_t length) 3631 { 3632 if (length < 5) return 0; 3633 3634 for (size_t i = 1; i <= 4; ++i) 3635 { 3636 uint8_t ch = static_cast<uint8_t>(data[length - i]); 3637 3638 // either a standalone character or a leading one 3639 if ((ch & 0xc0) != 0x80) return length - i; 3640 } 3641 3642 // there are four non-leading characters at the end, sequence tail is broken so might as well process the whole chunk 3643 return length; 3644 } 3645 convert_buffer_output(char_t *,uint8_t * r_u8,uint16_t * r_u16,uint32_t * r_u32,const char_t * data,size_t length,xml_encoding encoding)3646 PUGI__FN size_t convert_buffer_output(char_t* /* r_char */, uint8_t* r_u8, uint16_t* r_u16, uint32_t* r_u32, const char_t* data, size_t length, xml_encoding encoding) 3647 { 3648 if (encoding == encoding_utf16_be || encoding == encoding_utf16_le) 3649 { 3650 xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be; 3651 3652 return convert_buffer_output_generic(r_u16, data, length, utf8_decoder(), utf16_writer(), native_encoding != encoding); 3653 } 3654 3655 if (encoding == encoding_utf32_be || encoding == encoding_utf32_le) 3656 { 3657 xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be; 3658 3659 return convert_buffer_output_generic(r_u32, data, length, utf8_decoder(), utf32_writer(), native_encoding != encoding); 3660 } 3661 3662 if (encoding == encoding_latin1) 3663 return convert_buffer_output_generic(r_u8, data, length, utf8_decoder(), latin1_writer()); 3664 3665 assert(false && "Invalid encoding"); 3666 return 0; 3667 } 3668 #endif 3669 3670 class xml_buffered_writer 3671 { 3672 xml_buffered_writer(const xml_buffered_writer&); 3673 xml_buffered_writer& operator=(const xml_buffered_writer&); 3674 3675 public: xml_buffered_writer(xml_writer & writer_,xml_encoding user_encoding)3676 xml_buffered_writer(xml_writer& writer_, xml_encoding user_encoding): writer(writer_), bufsize(0), encoding(get_write_encoding(user_encoding)) 3677 { 3678 PUGI__STATIC_ASSERT(bufcapacity >= 8); 3679 } 3680 flush()3681 size_t flush() 3682 { 3683 flush(buffer, bufsize); 3684 bufsize = 0; 3685 return 0; 3686 } 3687 flush(const char_t * data,size_t size)3688 void flush(const char_t* data, size_t size) 3689 { 3690 if (size == 0) return; 3691 3692 // fast path, just write data 3693 if (encoding == get_write_native_encoding()) 3694 writer.write(data, size * sizeof(char_t)); 3695 else 3696 { 3697 // convert chunk 3698 size_t result = convert_buffer_output(scratch.data_char, scratch.data_u8, scratch.data_u16, scratch.data_u32, data, size, encoding); 3699 assert(result <= sizeof(scratch)); 3700 3701 // write data 3702 writer.write(scratch.data_u8, result); 3703 } 3704 } 3705 write_direct(const char_t * data,size_t length)3706 void write_direct(const char_t* data, size_t length) 3707 { 3708 // flush the remaining buffer contents 3709 flush(); 3710 3711 // handle large chunks 3712 if (length > bufcapacity) 3713 { 3714 if (encoding == get_write_native_encoding()) 3715 { 3716 // fast path, can just write data chunk 3717 writer.write(data, length * sizeof(char_t)); 3718 return; 3719 } 3720 3721 // need to convert in suitable chunks 3722 while (length > bufcapacity) 3723 { 3724 // get chunk size by selecting such number of characters that are guaranteed to fit into scratch buffer 3725 // and form a complete codepoint sequence (i.e. discard start of last codepoint if necessary) 3726 size_t chunk_size = get_valid_length(data, bufcapacity); 3727 assert(chunk_size); 3728 3729 // convert chunk and write 3730 flush(data, chunk_size); 3731 3732 // iterate 3733 data += chunk_size; 3734 length -= chunk_size; 3735 } 3736 3737 // small tail is copied below 3738 bufsize = 0; 3739 } 3740 3741 memcpy(buffer + bufsize, data, length * sizeof(char_t)); 3742 bufsize += length; 3743 } 3744 write_buffer(const char_t * data,size_t length)3745 void write_buffer(const char_t* data, size_t length) 3746 { 3747 size_t offset = bufsize; 3748 3749 if (offset + length <= bufcapacity) 3750 { 3751 memcpy(buffer + offset, data, length * sizeof(char_t)); 3752 bufsize = offset + length; 3753 } 3754 else 3755 { 3756 write_direct(data, length); 3757 } 3758 } 3759 write_string(const char_t * data)3760 void write_string(const char_t* data) 3761 { 3762 // write the part of the string that fits in the buffer 3763 size_t offset = bufsize; 3764 3765 while (*data && offset < bufcapacity) 3766 buffer[offset++] = *data++; 3767 3768 // write the rest 3769 if (offset < bufcapacity) 3770 { 3771 bufsize = offset; 3772 } 3773 else 3774 { 3775 // backtrack a bit if we have split the codepoint 3776 size_t length = offset - bufsize; 3777 size_t extra = length - get_valid_length(data - length, length); 3778 3779 bufsize = offset - extra; 3780 3781 write_direct(data - extra, strlength(data) + extra); 3782 } 3783 } 3784 write(char_t d0)3785 void write(char_t d0) 3786 { 3787 size_t offset = bufsize; 3788 if (offset > bufcapacity - 1) offset = flush(); 3789 3790 buffer[offset + 0] = d0; 3791 bufsize = offset + 1; 3792 } 3793 write(char_t d0,char_t d1)3794 void write(char_t d0, char_t d1) 3795 { 3796 size_t offset = bufsize; 3797 if (offset > bufcapacity - 2) offset = flush(); 3798 3799 buffer[offset + 0] = d0; 3800 buffer[offset + 1] = d1; 3801 bufsize = offset + 2; 3802 } 3803 write(char_t d0,char_t d1,char_t d2)3804 void write(char_t d0, char_t d1, char_t d2) 3805 { 3806 size_t offset = bufsize; 3807 if (offset > bufcapacity - 3) offset = flush(); 3808 3809 buffer[offset + 0] = d0; 3810 buffer[offset + 1] = d1; 3811 buffer[offset + 2] = d2; 3812 bufsize = offset + 3; 3813 } 3814 write(char_t d0,char_t d1,char_t d2,char_t d3)3815 void write(char_t d0, char_t d1, char_t d2, char_t d3) 3816 { 3817 size_t offset = bufsize; 3818 if (offset > bufcapacity - 4) offset = flush(); 3819 3820 buffer[offset + 0] = d0; 3821 buffer[offset + 1] = d1; 3822 buffer[offset + 2] = d2; 3823 buffer[offset + 3] = d3; 3824 bufsize = offset + 4; 3825 } 3826 write(char_t d0,char_t d1,char_t d2,char_t d3,char_t d4)3827 void write(char_t d0, char_t d1, char_t d2, char_t d3, char_t d4) 3828 { 3829 size_t offset = bufsize; 3830 if (offset > bufcapacity - 5) offset = flush(); 3831 3832 buffer[offset + 0] = d0; 3833 buffer[offset + 1] = d1; 3834 buffer[offset + 2] = d2; 3835 buffer[offset + 3] = d3; 3836 buffer[offset + 4] = d4; 3837 bufsize = offset + 5; 3838 } 3839 write(char_t d0,char_t d1,char_t d2,char_t d3,char_t d4,char_t d5)3840 void write(char_t d0, char_t d1, char_t d2, char_t d3, char_t d4, char_t d5) 3841 { 3842 size_t offset = bufsize; 3843 if (offset > bufcapacity - 6) offset = flush(); 3844 3845 buffer[offset + 0] = d0; 3846 buffer[offset + 1] = d1; 3847 buffer[offset + 2] = d2; 3848 buffer[offset + 3] = d3; 3849 buffer[offset + 4] = d4; 3850 buffer[offset + 5] = d5; 3851 bufsize = offset + 6; 3852 } 3853 3854 // utf8 maximum expansion: x4 (-> utf32) 3855 // utf16 maximum expansion: x2 (-> utf32) 3856 // utf32 maximum expansion: x1 3857 enum 3858 { 3859 bufcapacitybytes = 3860 #ifdef PUGIXML_MEMORY_OUTPUT_STACK 3861 PUGIXML_MEMORY_OUTPUT_STACK 3862 #else 3863 10240 3864 #endif 3865 , 3866 bufcapacity = bufcapacitybytes / (sizeof(char_t) + 4) 3867 }; 3868 3869 char_t buffer[bufcapacity]; 3870 3871 union 3872 { 3873 uint8_t data_u8[4 * bufcapacity]; 3874 uint16_t data_u16[2 * bufcapacity]; 3875 uint32_t data_u32[bufcapacity]; 3876 char_t data_char[bufcapacity]; 3877 } scratch; 3878 3879 xml_writer& writer; 3880 size_t bufsize; 3881 xml_encoding encoding; 3882 }; 3883 text_output_escaped(xml_buffered_writer & writer,const char_t * s,chartypex_t type)3884 PUGI__FN void text_output_escaped(xml_buffered_writer& writer, const char_t* s, chartypex_t type) 3885 { 3886 while (*s) 3887 { 3888 const char_t* prev = s; 3889 3890 // While *s is a usual symbol 3891 PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPEX(ss, type)); 3892 3893 writer.write_buffer(prev, static_cast<size_t>(s - prev)); 3894 3895 switch (*s) 3896 { 3897 case 0: break; 3898 case '&': 3899 writer.write('&', 'a', 'm', 'p', ';'); 3900 ++s; 3901 break; 3902 case '<': 3903 writer.write('&', 'l', 't', ';'); 3904 ++s; 3905 break; 3906 case '>': 3907 writer.write('&', 'g', 't', ';'); 3908 ++s; 3909 break; 3910 case '"': 3911 writer.write('&', 'q', 'u', 'o', 't', ';'); 3912 ++s; 3913 break; 3914 default: // s is not a usual symbol 3915 { 3916 unsigned int ch = static_cast<unsigned int>(*s++); 3917 assert(ch < 32); 3918 3919 writer.write('&', '#', static_cast<char_t>((ch / 10) + '0'), static_cast<char_t>((ch % 10) + '0'), ';'); 3920 } 3921 } 3922 } 3923 } 3924 text_output(xml_buffered_writer & writer,const char_t * s,chartypex_t type,unsigned int flags)3925 PUGI__FN void text_output(xml_buffered_writer& writer, const char_t* s, chartypex_t type, unsigned int flags) 3926 { 3927 if (flags & format_no_escapes) 3928 writer.write_string(s); 3929 else 3930 text_output_escaped(writer, s, type); 3931 } 3932 text_output_cdata(xml_buffered_writer & writer,const char_t * s)3933 PUGI__FN void text_output_cdata(xml_buffered_writer& writer, const char_t* s) 3934 { 3935 do 3936 { 3937 writer.write('<', '!', '[', 'C', 'D'); 3938 writer.write('A', 'T', 'A', '['); 3939 3940 const char_t* prev = s; 3941 3942 // look for ]]> sequence - we can't output it as is since it terminates CDATA 3943 while (*s && !(s[0] == ']' && s[1] == ']' && s[2] == '>')) ++s; 3944 3945 // skip ]] if we stopped at ]]>, > will go to the next CDATA section 3946 if (*s) s += 2; 3947 3948 writer.write_buffer(prev, static_cast<size_t>(s - prev)); 3949 3950 writer.write(']', ']', '>'); 3951 } 3952 while (*s); 3953 } 3954 text_output_indent(xml_buffered_writer & writer,const char_t * indent,size_t indent_length,unsigned int depth)3955 PUGI__FN void text_output_indent(xml_buffered_writer& writer, const char_t* indent, size_t indent_length, unsigned int depth) 3956 { 3957 switch (indent_length) 3958 { 3959 case 1: 3960 { 3961 for (unsigned int i = 0; i < depth; ++i) 3962 writer.write(indent[0]); 3963 break; 3964 } 3965 3966 case 2: 3967 { 3968 for (unsigned int i = 0; i < depth; ++i) 3969 writer.write(indent[0], indent[1]); 3970 break; 3971 } 3972 3973 case 3: 3974 { 3975 for (unsigned int i = 0; i < depth; ++i) 3976 writer.write(indent[0], indent[1], indent[2]); 3977 break; 3978 } 3979 3980 case 4: 3981 { 3982 for (unsigned int i = 0; i < depth; ++i) 3983 writer.write(indent[0], indent[1], indent[2], indent[3]); 3984 break; 3985 } 3986 3987 default: 3988 { 3989 for (unsigned int i = 0; i < depth; ++i) 3990 writer.write_buffer(indent, indent_length); 3991 } 3992 } 3993 } 3994 node_output_comment(xml_buffered_writer & writer,const char_t * s)3995 PUGI__FN void node_output_comment(xml_buffered_writer& writer, const char_t* s) 3996 { 3997 writer.write('<', '!', '-', '-'); 3998 3999 while (*s) 4000 { 4001 const char_t* prev = s; 4002 4003 // look for -\0 or -- sequence - we can't output it since -- is illegal in comment body 4004 while (*s && !(s[0] == '-' && (s[1] == '-' || s[1] == 0))) ++s; 4005 4006 writer.write_buffer(prev, static_cast<size_t>(s - prev)); 4007 4008 if (*s) 4009 { 4010 assert(*s == '-'); 4011 4012 writer.write('-', ' '); 4013 ++s; 4014 } 4015 } 4016 4017 writer.write('-', '-', '>'); 4018 } 4019 node_output_pi_value(xml_buffered_writer & writer,const char_t * s)4020 PUGI__FN void node_output_pi_value(xml_buffered_writer& writer, const char_t* s) 4021 { 4022 while (*s) 4023 { 4024 const char_t* prev = s; 4025 4026 // look for ?> sequence - we can't output it since ?> terminates PI 4027 while (*s && !(s[0] == '?' && s[1] == '>')) ++s; 4028 4029 writer.write_buffer(prev, static_cast<size_t>(s - prev)); 4030 4031 if (*s) 4032 { 4033 assert(s[0] == '?' && s[1] == '>'); 4034 4035 writer.write('?', ' ', '>'); 4036 s += 2; 4037 } 4038 } 4039 } 4040 node_output_attributes(xml_buffered_writer & writer,xml_node_struct * node,const char_t * indent,size_t indent_length,unsigned int flags,unsigned int depth)4041 PUGI__FN void node_output_attributes(xml_buffered_writer& writer, xml_node_struct* node, const char_t* indent, size_t indent_length, unsigned int flags, unsigned int depth) 4042 { 4043 const char_t* default_name = PUGIXML_TEXT(":anonymous"); 4044 4045 for (xml_attribute_struct* a = node->first_attribute; a; a = a->next_attribute) 4046 { 4047 if ((flags & (format_indent_attributes | format_raw)) == format_indent_attributes) 4048 { 4049 writer.write('\n'); 4050 4051 text_output_indent(writer, indent, indent_length, depth + 1); 4052 } 4053 else 4054 { 4055 writer.write(' '); 4056 } 4057 4058 writer.write_string(a->name ? a->name + 0 : default_name); 4059 writer.write('=', '"'); 4060 4061 if (a->value) 4062 text_output(writer, a->value, ctx_special_attr, flags); 4063 4064 writer.write('"'); 4065 } 4066 } 4067 node_output_start(xml_buffered_writer & writer,xml_node_struct * node,const char_t * indent,size_t indent_length,unsigned int flags,unsigned int depth)4068 PUGI__FN bool node_output_start(xml_buffered_writer& writer, xml_node_struct* node, const char_t* indent, size_t indent_length, unsigned int flags, unsigned int depth) 4069 { 4070 const char_t* default_name = PUGIXML_TEXT(":anonymous"); 4071 const char_t* name = node->name ? node->name + 0 : default_name; 4072 4073 writer.write('<'); 4074 writer.write_string(name); 4075 4076 if (node->first_attribute) 4077 node_output_attributes(writer, node, indent, indent_length, flags, depth); 4078 4079 // element nodes can have value if parse_embed_pcdata was used 4080 if (!node->value) 4081 { 4082 if (!node->first_child) 4083 { 4084 if (flags & format_no_empty_element_tags) 4085 { 4086 writer.write('>', '<', '/'); 4087 writer.write_string(name); 4088 writer.write('>'); 4089 4090 return false; 4091 } 4092 else 4093 { 4094 if ((flags & format_raw) == 0) 4095 writer.write(' '); 4096 4097 writer.write('/', '>'); 4098 4099 return false; 4100 } 4101 } 4102 else 4103 { 4104 writer.write('>'); 4105 4106 return true; 4107 } 4108 } 4109 else 4110 { 4111 writer.write('>'); 4112 4113 text_output(writer, node->value, ctx_special_pcdata, flags); 4114 4115 if (!node->first_child) 4116 { 4117 writer.write('<', '/'); 4118 writer.write_string(name); 4119 writer.write('>'); 4120 4121 return false; 4122 } 4123 else 4124 { 4125 return true; 4126 } 4127 } 4128 } 4129 node_output_end(xml_buffered_writer & writer,xml_node_struct * node)4130 PUGI__FN void node_output_end(xml_buffered_writer& writer, xml_node_struct* node) 4131 { 4132 const char_t* default_name = PUGIXML_TEXT(":anonymous"); 4133 const char_t* name = node->name ? node->name + 0 : default_name; 4134 4135 writer.write('<', '/'); 4136 writer.write_string(name); 4137 writer.write('>'); 4138 } 4139 node_output_simple(xml_buffered_writer & writer,xml_node_struct * node,unsigned int flags)4140 PUGI__FN void node_output_simple(xml_buffered_writer& writer, xml_node_struct* node, unsigned int flags) 4141 { 4142 const char_t* default_name = PUGIXML_TEXT(":anonymous"); 4143 4144 switch (PUGI__NODETYPE(node)) 4145 { 4146 case node_pcdata: 4147 text_output(writer, node->value ? node->value + 0 : PUGIXML_TEXT(""), ctx_special_pcdata, flags); 4148 break; 4149 4150 case node_cdata: 4151 text_output_cdata(writer, node->value ? node->value + 0 : PUGIXML_TEXT("")); 4152 break; 4153 4154 case node_comment: 4155 node_output_comment(writer, node->value ? node->value + 0 : PUGIXML_TEXT("")); 4156 break; 4157 4158 case node_pi: 4159 writer.write('<', '?'); 4160 writer.write_string(node->name ? node->name + 0 : default_name); 4161 4162 if (node->value) 4163 { 4164 writer.write(' '); 4165 node_output_pi_value(writer, node->value); 4166 } 4167 4168 writer.write('?', '>'); 4169 break; 4170 4171 case node_declaration: 4172 writer.write('<', '?'); 4173 writer.write_string(node->name ? node->name + 0 : default_name); 4174 node_output_attributes(writer, node, PUGIXML_TEXT(""), 0, flags | format_raw, 0); 4175 writer.write('?', '>'); 4176 break; 4177 4178 case node_doctype: 4179 writer.write('<', '!', 'D', 'O', 'C'); 4180 writer.write('T', 'Y', 'P', 'E'); 4181 4182 if (node->value) 4183 { 4184 writer.write(' '); 4185 writer.write_string(node->value); 4186 } 4187 4188 writer.write('>'); 4189 break; 4190 4191 default: 4192 assert(false && "Invalid node type"); 4193 } 4194 } 4195 4196 enum indent_flags_t 4197 { 4198 indent_newline = 1, 4199 indent_indent = 2 4200 }; 4201 node_output(xml_buffered_writer & writer,xml_node_struct * root,const char_t * indent,unsigned int flags,unsigned int depth)4202 PUGI__FN void node_output(xml_buffered_writer& writer, xml_node_struct* root, const char_t* indent, unsigned int flags, unsigned int depth) 4203 { 4204 size_t indent_length = ((flags & (format_indent | format_indent_attributes)) && (flags & format_raw) == 0) ? strlength(indent) : 0; 4205 unsigned int indent_flags = indent_indent; 4206 4207 xml_node_struct* node = root; 4208 4209 do 4210 { 4211 assert(node); 4212 4213 // begin writing current node 4214 if (PUGI__NODETYPE(node) == node_pcdata || PUGI__NODETYPE(node) == node_cdata) 4215 { 4216 node_output_simple(writer, node, flags); 4217 4218 indent_flags = 0; 4219 } 4220 else 4221 { 4222 if ((indent_flags & indent_newline) && (flags & format_raw) == 0) 4223 writer.write('\n'); 4224 4225 if ((indent_flags & indent_indent) && indent_length) 4226 text_output_indent(writer, indent, indent_length, depth); 4227 4228 if (PUGI__NODETYPE(node) == node_element) 4229 { 4230 indent_flags = indent_newline | indent_indent; 4231 4232 if (node_output_start(writer, node, indent, indent_length, flags, depth)) 4233 { 4234 // element nodes can have value if parse_embed_pcdata was used 4235 if (node->value) 4236 indent_flags = 0; 4237 4238 node = node->first_child; 4239 depth++; 4240 continue; 4241 } 4242 } 4243 else if (PUGI__NODETYPE(node) == node_document) 4244 { 4245 indent_flags = indent_indent; 4246 4247 if (node->first_child) 4248 { 4249 node = node->first_child; 4250 continue; 4251 } 4252 } 4253 else 4254 { 4255 node_output_simple(writer, node, flags); 4256 4257 indent_flags = indent_newline | indent_indent; 4258 } 4259 } 4260 4261 // continue to the next node 4262 while (node != root) 4263 { 4264 if (node->next_sibling) 4265 { 4266 node = node->next_sibling; 4267 break; 4268 } 4269 4270 node = node->parent; 4271 4272 // write closing node 4273 if (PUGI__NODETYPE(node) == node_element) 4274 { 4275 depth--; 4276 4277 if ((indent_flags & indent_newline) && (flags & format_raw) == 0) 4278 writer.write('\n'); 4279 4280 if ((indent_flags & indent_indent) && indent_length) 4281 text_output_indent(writer, indent, indent_length, depth); 4282 4283 node_output_end(writer, node); 4284 4285 indent_flags = indent_newline | indent_indent; 4286 } 4287 } 4288 } 4289 while (node != root); 4290 4291 if ((indent_flags & indent_newline) && (flags & format_raw) == 0) 4292 writer.write('\n'); 4293 } 4294 has_declaration(xml_node_struct * node)4295 PUGI__FN bool has_declaration(xml_node_struct* node) 4296 { 4297 for (xml_node_struct* child = node->first_child; child; child = child->next_sibling) 4298 { 4299 xml_node_type type = PUGI__NODETYPE(child); 4300 4301 if (type == node_declaration) return true; 4302 if (type == node_element) return false; 4303 } 4304 4305 return false; 4306 } 4307 is_attribute_of(xml_attribute_struct * attr,xml_node_struct * node)4308 PUGI__FN bool is_attribute_of(xml_attribute_struct* attr, xml_node_struct* node) 4309 { 4310 for (xml_attribute_struct* a = node->first_attribute; a; a = a->next_attribute) 4311 if (a == attr) 4312 return true; 4313 4314 return false; 4315 } 4316 allow_insert_attribute(xml_node_type parent)4317 PUGI__FN bool allow_insert_attribute(xml_node_type parent) 4318 { 4319 return parent == node_element || parent == node_declaration; 4320 } 4321 allow_insert_child(xml_node_type parent,xml_node_type child)4322 PUGI__FN bool allow_insert_child(xml_node_type parent, xml_node_type child) 4323 { 4324 if (parent != node_document && parent != node_element) return false; 4325 if (child == node_document || child == node_null) return false; 4326 if (parent != node_document && (child == node_declaration || child == node_doctype)) return false; 4327 4328 return true; 4329 } 4330 allow_move(xml_node parent,xml_node child)4331 PUGI__FN bool allow_move(xml_node parent, xml_node child) 4332 { 4333 // check that child can be a child of parent 4334 if (!allow_insert_child(parent.type(), child.type())) 4335 return false; 4336 4337 // check that node is not moved between documents 4338 if (parent.root() != child.root()) 4339 return false; 4340 4341 // check that new parent is not in the child subtree 4342 xml_node cur = parent; 4343 4344 while (cur) 4345 { 4346 if (cur == child) 4347 return false; 4348 4349 cur = cur.parent(); 4350 } 4351 4352 return true; 4353 } 4354 4355 template <typename String, typename Header> node_copy_string(String & dest,Header & header,uintptr_t header_mask,char_t * source,Header & source_header,xml_allocator * alloc)4356 PUGI__FN void node_copy_string(String& dest, Header& header, uintptr_t header_mask, char_t* source, Header& source_header, xml_allocator* alloc) 4357 { 4358 assert(!dest && (header & header_mask) == 0); 4359 4360 if (source) 4361 { 4362 if (alloc && (source_header & header_mask) == 0) 4363 { 4364 dest = source; 4365 4366 // since strcpy_insitu can reuse document buffer memory we need to mark both source and dest as shared 4367 header |= xml_memory_page_contents_shared_mask; 4368 source_header |= xml_memory_page_contents_shared_mask; 4369 } 4370 else 4371 strcpy_insitu(dest, header, header_mask, source, strlength(source)); 4372 } 4373 } 4374 node_copy_contents(xml_node_struct * dn,xml_node_struct * sn,xml_allocator * shared_alloc)4375 PUGI__FN void node_copy_contents(xml_node_struct* dn, xml_node_struct* sn, xml_allocator* shared_alloc) 4376 { 4377 node_copy_string(dn->name, dn->header, xml_memory_page_name_allocated_mask, sn->name, sn->header, shared_alloc); 4378 node_copy_string(dn->value, dn->header, xml_memory_page_value_allocated_mask, sn->value, sn->header, shared_alloc); 4379 4380 for (xml_attribute_struct* sa = sn->first_attribute; sa; sa = sa->next_attribute) 4381 { 4382 xml_attribute_struct* da = append_new_attribute(dn, get_allocator(dn)); 4383 4384 if (da) 4385 { 4386 node_copy_string(da->name, da->header, xml_memory_page_name_allocated_mask, sa->name, sa->header, shared_alloc); 4387 node_copy_string(da->value, da->header, xml_memory_page_value_allocated_mask, sa->value, sa->header, shared_alloc); 4388 } 4389 } 4390 } 4391 node_copy_tree(xml_node_struct * dn,xml_node_struct * sn)4392 PUGI__FN void node_copy_tree(xml_node_struct* dn, xml_node_struct* sn) 4393 { 4394 xml_allocator& alloc = get_allocator(dn); 4395 xml_allocator* shared_alloc = (&alloc == &get_allocator(sn)) ? &alloc : 0; 4396 4397 node_copy_contents(dn, sn, shared_alloc); 4398 4399 xml_node_struct* dit = dn; 4400 xml_node_struct* sit = sn->first_child; 4401 4402 while (sit && sit != sn) 4403 { 4404 if (sit != dn) 4405 { 4406 xml_node_struct* copy = append_new_node(dit, alloc, PUGI__NODETYPE(sit)); 4407 4408 if (copy) 4409 { 4410 node_copy_contents(copy, sit, shared_alloc); 4411 4412 if (sit->first_child) 4413 { 4414 dit = copy; 4415 sit = sit->first_child; 4416 continue; 4417 } 4418 } 4419 } 4420 4421 // continue to the next node 4422 do 4423 { 4424 if (sit->next_sibling) 4425 { 4426 sit = sit->next_sibling; 4427 break; 4428 } 4429 4430 sit = sit->parent; 4431 dit = dit->parent; 4432 } 4433 while (sit != sn); 4434 } 4435 } 4436 node_copy_attribute(xml_attribute_struct * da,xml_attribute_struct * sa)4437 PUGI__FN void node_copy_attribute(xml_attribute_struct* da, xml_attribute_struct* sa) 4438 { 4439 xml_allocator& alloc = get_allocator(da); 4440 xml_allocator* shared_alloc = (&alloc == &get_allocator(sa)) ? &alloc : 0; 4441 4442 node_copy_string(da->name, da->header, xml_memory_page_name_allocated_mask, sa->name, sa->header, shared_alloc); 4443 node_copy_string(da->value, da->header, xml_memory_page_value_allocated_mask, sa->value, sa->header, shared_alloc); 4444 } 4445 is_text_node(xml_node_struct * node)4446 inline bool is_text_node(xml_node_struct* node) 4447 { 4448 xml_node_type type = PUGI__NODETYPE(node); 4449 4450 return type == node_pcdata || type == node_cdata; 4451 } 4452 4453 // get value with conversion functions string_to_integer(const char_t * value,U minneg,U maxpos)4454 template <typename U> U string_to_integer(const char_t* value, U minneg, U maxpos) 4455 { 4456 U result = 0; 4457 const char_t* s = value; 4458 4459 while (PUGI__IS_CHARTYPE(*s, ct_space)) 4460 s++; 4461 4462 bool negative = (*s == '-'); 4463 4464 s += (*s == '+' || *s == '-'); 4465 4466 bool overflow = false; 4467 4468 if (s[0] == '0' && (s[1] | ' ') == 'x') 4469 { 4470 s += 2; 4471 4472 // since overflow detection relies on length of the sequence skip leading zeros 4473 while (*s == '0') 4474 s++; 4475 4476 const char_t* start = s; 4477 4478 for (;;) 4479 { 4480 if (static_cast<unsigned>(*s - '0') < 10) 4481 result = result * 16 + (*s - '0'); 4482 else if (static_cast<unsigned>((*s | ' ') - 'a') < 6) 4483 result = result * 16 + ((*s | ' ') - 'a' + 10); 4484 else 4485 break; 4486 4487 s++; 4488 } 4489 4490 size_t digits = static_cast<size_t>(s - start); 4491 4492 overflow = digits > sizeof(U) * 2; 4493 } 4494 else 4495 { 4496 // since overflow detection relies on length of the sequence skip leading zeros 4497 while (*s == '0') 4498 s++; 4499 4500 const char_t* start = s; 4501 4502 for (;;) 4503 { 4504 if (static_cast<unsigned>(*s - '0') < 10) 4505 result = result * 10 + (*s - '0'); 4506 else 4507 break; 4508 4509 s++; 4510 } 4511 4512 size_t digits = static_cast<size_t>(s - start); 4513 4514 PUGI__STATIC_ASSERT(sizeof(U) == 8 || sizeof(U) == 4 || sizeof(U) == 2); 4515 4516 const size_t max_digits10 = sizeof(U) == 8 ? 20 : sizeof(U) == 4 ? 10 : 5; 4517 const char_t max_lead = sizeof(U) == 8 ? '1' : sizeof(U) == 4 ? '4' : '6'; 4518 const size_t high_bit = sizeof(U) * 8 - 1; 4519 4520 overflow = digits >= max_digits10 && !(digits == max_digits10 && (*start < max_lead || (*start == max_lead && result >> high_bit))); 4521 } 4522 4523 if (negative) 4524 return (overflow || result > minneg) ? 0 - minneg : 0 - result; 4525 else 4526 return (overflow || result > maxpos) ? maxpos : result; 4527 } 4528 get_value_int(const char_t * value)4529 PUGI__FN int get_value_int(const char_t* value) 4530 { 4531 return string_to_integer<unsigned int>(value, 0 - static_cast<unsigned int>(INT_MIN), INT_MAX); 4532 } 4533 get_value_uint(const char_t * value)4534 PUGI__FN unsigned int get_value_uint(const char_t* value) 4535 { 4536 return string_to_integer<unsigned int>(value, 0, UINT_MAX); 4537 } 4538 get_value_double(const char_t * value)4539 PUGI__FN double get_value_double(const char_t* value) 4540 { 4541 #ifdef PUGIXML_WCHAR_MODE 4542 return wcstod(value, 0); 4543 #else 4544 return strtod(value, 0); 4545 #endif 4546 } 4547 get_value_float(const char_t * value)4548 PUGI__FN float get_value_float(const char_t* value) 4549 { 4550 #ifdef PUGIXML_WCHAR_MODE 4551 return static_cast<float>(wcstod(value, 0)); 4552 #else 4553 return static_cast<float>(strtod(value, 0)); 4554 #endif 4555 } 4556 get_value_bool(const char_t * value)4557 PUGI__FN bool get_value_bool(const char_t* value) 4558 { 4559 // only look at first char 4560 char_t first = *value; 4561 4562 // 1*, t* (true), T* (True), y* (yes), Y* (YES) 4563 return (first == '1' || first == 't' || first == 'T' || first == 'y' || first == 'Y'); 4564 } 4565 4566 #ifdef PUGIXML_HAS_LONG_LONG get_value_llong(const char_t * value)4567 PUGI__FN long long get_value_llong(const char_t* value) 4568 { 4569 return string_to_integer<unsigned long long>(value, 0 - static_cast<unsigned long long>(LLONG_MIN), LLONG_MAX); 4570 } 4571 get_value_ullong(const char_t * value)4572 PUGI__FN unsigned long long get_value_ullong(const char_t* value) 4573 { 4574 return string_to_integer<unsigned long long>(value, 0, ULLONG_MAX); 4575 } 4576 #endif 4577 integer_to_string(char_t * begin,char_t * end,U value,bool negative)4578 template <typename U> PUGI__FN char_t* integer_to_string(char_t* begin, char_t* end, U value, bool negative) 4579 { 4580 char_t* result = end - 1; 4581 U rest = negative ? 0 - value : value; 4582 4583 do 4584 { 4585 *result-- = static_cast<char_t>('0' + (rest % 10)); 4586 rest /= 10; 4587 } 4588 while (rest); 4589 4590 assert(result >= begin); 4591 (void)begin; 4592 4593 *result = '-'; 4594 4595 return result + !negative; 4596 } 4597 4598 // set value with conversion functions 4599 template <typename String, typename Header> set_value_ascii(String & dest,Header & header,uintptr_t header_mask,char * buf)4600 PUGI__FN bool set_value_ascii(String& dest, Header& header, uintptr_t header_mask, char* buf) 4601 { 4602 #ifdef PUGIXML_WCHAR_MODE 4603 char_t wbuf[128]; 4604 assert(strlen(buf) < sizeof(wbuf) / sizeof(wbuf[0])); 4605 4606 size_t offset = 0; 4607 for (; buf[offset]; ++offset) wbuf[offset] = buf[offset]; 4608 4609 return strcpy_insitu(dest, header, header_mask, wbuf, offset); 4610 #else 4611 return strcpy_insitu(dest, header, header_mask, buf, strlen(buf)); 4612 #endif 4613 } 4614 4615 template <typename U, typename String, typename Header> set_value_integer(String & dest,Header & header,uintptr_t header_mask,U value,bool negative)4616 PUGI__FN bool set_value_integer(String& dest, Header& header, uintptr_t header_mask, U value, bool negative) 4617 { 4618 char_t buf[64]; 4619 char_t* end = buf + sizeof(buf) / sizeof(buf[0]); 4620 char_t* begin = integer_to_string(buf, end, value, negative); 4621 4622 return strcpy_insitu(dest, header, header_mask, begin, end - begin); 4623 } 4624 4625 template <typename String, typename Header> set_value_convert(String & dest,Header & header,uintptr_t header_mask,float value)4626 PUGI__FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, float value) 4627 { 4628 char buf[128]; 4629 sprintf(buf, "%.9g", value); 4630 4631 return set_value_ascii(dest, header, header_mask, buf); 4632 } 4633 4634 template <typename String, typename Header> set_value_convert(String & dest,Header & header,uintptr_t header_mask,double value)4635 PUGI__FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, double value) 4636 { 4637 char buf[128]; 4638 sprintf(buf, "%.17g", value); 4639 4640 return set_value_ascii(dest, header, header_mask, buf); 4641 } 4642 4643 template <typename String, typename Header> set_value_bool(String & dest,Header & header,uintptr_t header_mask,bool value)4644 PUGI__FN bool set_value_bool(String& dest, Header& header, uintptr_t header_mask, bool value) 4645 { 4646 return strcpy_insitu(dest, header, header_mask, value ? PUGIXML_TEXT("true") : PUGIXML_TEXT("false"), value ? 4 : 5); 4647 } 4648 load_buffer_impl(xml_document_struct * doc,xml_node_struct * root,void * contents,size_t size,unsigned int options,xml_encoding encoding,bool is_mutable,bool own,char_t ** out_buffer)4649 PUGI__FN xml_parse_result load_buffer_impl(xml_document_struct* doc, xml_node_struct* root, void* contents, size_t size, unsigned int options, xml_encoding encoding, bool is_mutable, bool own, char_t** out_buffer) 4650 { 4651 // check input buffer 4652 if (!contents && size) return make_parse_result(status_io_error); 4653 4654 // get actual encoding 4655 xml_encoding buffer_encoding = impl::get_buffer_encoding(encoding, contents, size); 4656 4657 // get private buffer 4658 char_t* buffer = 0; 4659 size_t length = 0; 4660 4661 if (!impl::convert_buffer(buffer, length, buffer_encoding, contents, size, is_mutable)) return impl::make_parse_result(status_out_of_memory); 4662 4663 // delete original buffer if we performed a conversion 4664 if (own && buffer != contents && contents) impl::xml_memory::deallocate(contents); 4665 4666 // grab onto buffer if it's our buffer, user is responsible for deallocating contents himself 4667 if (own || buffer != contents) *out_buffer = buffer; 4668 4669 // store buffer for offset_debug 4670 doc->buffer = buffer; 4671 4672 // parse 4673 xml_parse_result res = impl::xml_parser::parse(buffer, length, doc, root, options); 4674 4675 // remember encoding 4676 res.encoding = buffer_encoding; 4677 4678 return res; 4679 } 4680 4681 // we need to get length of entire file to load it in memory; the only (relatively) sane way to do it is via seek/tell trick get_file_size(FILE * file,size_t & out_result)4682 PUGI__FN xml_parse_status get_file_size(FILE* file, size_t& out_result) 4683 { 4684 #if defined(PUGI__MSVC_CRT_VERSION) && PUGI__MSVC_CRT_VERSION >= 1400 && !defined(_WIN32_WCE) 4685 // there are 64-bit versions of fseek/ftell, let's use them 4686 typedef __int64 length_type; 4687 4688 _fseeki64(file, 0, SEEK_END); 4689 length_type length = _ftelli64(file); 4690 _fseeki64(file, 0, SEEK_SET); 4691 #elif defined(__MINGW32__) && !defined(__NO_MINGW_LFS) && (!defined(__STRICT_ANSI__) || defined(__MINGW64_VERSION_MAJOR)) 4692 // there are 64-bit versions of fseek/ftell, let's use them 4693 typedef off64_t length_type; 4694 4695 fseeko64(file, 0, SEEK_END); 4696 length_type length = ftello64(file); 4697 fseeko64(file, 0, SEEK_SET); 4698 #else 4699 // if this is a 32-bit OS, long is enough; if this is a unix system, long is 64-bit, which is enough; otherwise we can't do anything anyway. 4700 typedef long length_type; 4701 4702 fseek(file, 0, SEEK_END); 4703 length_type length = ftell(file); 4704 fseek(file, 0, SEEK_SET); 4705 #endif 4706 4707 // check for I/O errors 4708 if (length < 0) return status_io_error; 4709 4710 // check for overflow 4711 size_t result = static_cast<size_t>(length); 4712 4713 if (static_cast<length_type>(result) != length) return status_out_of_memory; 4714 4715 // finalize 4716 out_result = result; 4717 4718 return status_ok; 4719 } 4720 4721 // This function assumes that buffer has extra sizeof(char_t) writable bytes after size zero_terminate_buffer(void * buffer,size_t size,xml_encoding encoding)4722 PUGI__FN size_t zero_terminate_buffer(void* buffer, size_t size, xml_encoding encoding) 4723 { 4724 // We only need to zero-terminate if encoding conversion does not do it for us 4725 #ifdef PUGIXML_WCHAR_MODE 4726 xml_encoding wchar_encoding = get_wchar_encoding(); 4727 4728 if (encoding == wchar_encoding || need_endian_swap_utf(encoding, wchar_encoding)) 4729 { 4730 size_t length = size / sizeof(char_t); 4731 4732 static_cast<char_t*>(buffer)[length] = 0; 4733 return (length + 1) * sizeof(char_t); 4734 } 4735 #else 4736 if (encoding == encoding_utf8) 4737 { 4738 static_cast<char*>(buffer)[size] = 0; 4739 return size + 1; 4740 } 4741 #endif 4742 4743 return size; 4744 } 4745 load_file_impl(xml_document_struct * doc,FILE * file,unsigned int options,xml_encoding encoding,char_t ** out_buffer)4746 PUGI__FN xml_parse_result load_file_impl(xml_document_struct* doc, FILE* file, unsigned int options, xml_encoding encoding, char_t** out_buffer) 4747 { 4748 if (!file) return make_parse_result(status_file_not_found); 4749 4750 // get file size (can result in I/O errors) 4751 size_t size = 0; 4752 xml_parse_status size_status = get_file_size(file, size); 4753 if (size_status != status_ok) return make_parse_result(size_status); 4754 4755 size_t max_suffix_size = sizeof(char_t); 4756 4757 // allocate buffer for the whole file 4758 char* contents = static_cast<char*>(xml_memory::allocate(size + max_suffix_size)); 4759 if (!contents) return make_parse_result(status_out_of_memory); 4760 4761 // read file in memory 4762 size_t read_size = fread(contents, 1, size, file); 4763 4764 if (read_size != size) 4765 { 4766 xml_memory::deallocate(contents); 4767 return make_parse_result(status_io_error); 4768 } 4769 4770 xml_encoding real_encoding = get_buffer_encoding(encoding, contents, size); 4771 4772 return load_buffer_impl(doc, doc, contents, zero_terminate_buffer(contents, size, real_encoding), options, real_encoding, true, true, out_buffer); 4773 } 4774 close_file(FILE * file)4775 PUGI__FN void close_file(FILE* file) 4776 { 4777 fclose(file); 4778 } 4779 4780 #ifndef PUGIXML_NO_STL 4781 template <typename T> struct xml_stream_chunk 4782 { createxml_stream_chunk4783 static xml_stream_chunk* create() 4784 { 4785 void* memory = xml_memory::allocate(sizeof(xml_stream_chunk)); 4786 if (!memory) return 0; 4787 4788 return new (memory) xml_stream_chunk(); 4789 } 4790 destroyxml_stream_chunk4791 static void destroy(xml_stream_chunk* chunk) 4792 { 4793 // free chunk chain 4794 while (chunk) 4795 { 4796 xml_stream_chunk* next_ = chunk->next; 4797 4798 xml_memory::deallocate(chunk); 4799 4800 chunk = next_; 4801 } 4802 } 4803 xml_stream_chunkxml_stream_chunk4804 xml_stream_chunk(): next(0), size(0) 4805 { 4806 } 4807 4808 xml_stream_chunk* next; 4809 size_t size; 4810 4811 T data[xml_memory_page_size / sizeof(T)]; 4812 }; 4813 load_stream_data_noseek(std::basic_istream<T> & stream,void ** out_buffer,size_t * out_size)4814 template <typename T> PUGI__FN xml_parse_status load_stream_data_noseek(std::basic_istream<T>& stream, void** out_buffer, size_t* out_size) 4815 { 4816 auto_deleter<xml_stream_chunk<T> > chunks(0, xml_stream_chunk<T>::destroy); 4817 4818 // read file to a chunk list 4819 size_t total = 0; 4820 xml_stream_chunk<T>* last = 0; 4821 4822 while (!stream.eof()) 4823 { 4824 // allocate new chunk 4825 xml_stream_chunk<T>* chunk = xml_stream_chunk<T>::create(); 4826 if (!chunk) return status_out_of_memory; 4827 4828 // append chunk to list 4829 if (last) last = last->next = chunk; 4830 else chunks.data = last = chunk; 4831 4832 // read data to chunk 4833 stream.read(chunk->data, static_cast<std::streamsize>(sizeof(chunk->data) / sizeof(T))); 4834 chunk->size = static_cast<size_t>(stream.gcount()) * sizeof(T); 4835 4836 // read may set failbit | eofbit in case gcount() is less than read length, so check for other I/O errors 4837 if (stream.bad() || (!stream.eof() && stream.fail())) return status_io_error; 4838 4839 // guard against huge files (chunk size is small enough to make this overflow check work) 4840 if (total + chunk->size < total) return status_out_of_memory; 4841 total += chunk->size; 4842 } 4843 4844 size_t max_suffix_size = sizeof(char_t); 4845 4846 // copy chunk list to a contiguous buffer 4847 char* buffer = static_cast<char*>(xml_memory::allocate(total + max_suffix_size)); 4848 if (!buffer) return status_out_of_memory; 4849 4850 char* write = buffer; 4851 4852 for (xml_stream_chunk<T>* chunk = chunks.data; chunk; chunk = chunk->next) 4853 { 4854 assert(write + chunk->size <= buffer + total); 4855 memcpy(write, chunk->data, chunk->size); 4856 write += chunk->size; 4857 } 4858 4859 assert(write == buffer + total); 4860 4861 // return buffer 4862 *out_buffer = buffer; 4863 *out_size = total; 4864 4865 return status_ok; 4866 } 4867 load_stream_data_seek(std::basic_istream<T> & stream,void ** out_buffer,size_t * out_size)4868 template <typename T> PUGI__FN xml_parse_status load_stream_data_seek(std::basic_istream<T>& stream, void** out_buffer, size_t* out_size) 4869 { 4870 // get length of remaining data in stream 4871 typename std::basic_istream<T>::pos_type pos = stream.tellg(); 4872 stream.seekg(0, std::ios::end); 4873 std::streamoff length = stream.tellg() - pos; 4874 stream.seekg(pos); 4875 4876 if (stream.fail() || pos < 0) return status_io_error; 4877 4878 // guard against huge files 4879 size_t read_length = static_cast<size_t>(length); 4880 4881 if (static_cast<std::streamsize>(read_length) != length || length < 0) return status_out_of_memory; 4882 4883 size_t max_suffix_size = sizeof(char_t); 4884 4885 // read stream data into memory (guard against stream exceptions with buffer holder) 4886 auto_deleter<void> buffer(xml_memory::allocate(read_length * sizeof(T) + max_suffix_size), xml_memory::deallocate); 4887 if (!buffer.data) return status_out_of_memory; 4888 4889 stream.read(static_cast<T*>(buffer.data), static_cast<std::streamsize>(read_length)); 4890 4891 // read may set failbit | eofbit in case gcount() is less than read_length (i.e. line ending conversion), so check for other I/O errors 4892 if (stream.bad() || (!stream.eof() && stream.fail())) return status_io_error; 4893 4894 // return buffer 4895 size_t actual_length = static_cast<size_t>(stream.gcount()); 4896 assert(actual_length <= read_length); 4897 4898 *out_buffer = buffer.release(); 4899 *out_size = actual_length * sizeof(T); 4900 4901 return status_ok; 4902 } 4903 load_stream_impl(xml_document_struct * doc,std::basic_istream<T> & stream,unsigned int options,xml_encoding encoding,char_t ** out_buffer)4904 template <typename T> PUGI__FN xml_parse_result load_stream_impl(xml_document_struct* doc, std::basic_istream<T>& stream, unsigned int options, xml_encoding encoding, char_t** out_buffer) 4905 { 4906 void* buffer = 0; 4907 size_t size = 0; 4908 xml_parse_status status = status_ok; 4909 4910 // if stream has an error bit set, bail out (otherwise tellg() can fail and we'll clear error bits) 4911 if (stream.fail()) return make_parse_result(status_io_error); 4912 4913 // load stream to memory (using seek-based implementation if possible, since it's faster and takes less memory) 4914 if (stream.tellg() < 0) 4915 { 4916 stream.clear(); // clear error flags that could be set by a failing tellg 4917 status = load_stream_data_noseek(stream, &buffer, &size); 4918 } 4919 else 4920 status = load_stream_data_seek(stream, &buffer, &size); 4921 4922 if (status != status_ok) return make_parse_result(status); 4923 4924 xml_encoding real_encoding = get_buffer_encoding(encoding, buffer, size); 4925 4926 return load_buffer_impl(doc, doc, buffer, zero_terminate_buffer(buffer, size, real_encoding), options, real_encoding, true, true, out_buffer); 4927 } 4928 #endif 4929 4930 #if defined(PUGI__MSVC_CRT_VERSION) || defined(__BORLANDC__) || (defined(__MINGW32__) && (!defined(__STRICT_ANSI__) || defined(__MINGW64_VERSION_MAJOR))) open_file_wide(const wchar_t * path,const wchar_t * mode)4931 PUGI__FN FILE* open_file_wide(const wchar_t* path, const wchar_t* mode) 4932 { 4933 return _wfopen(path, mode); 4934 } 4935 #else convert_path_heap(const wchar_t * str)4936 PUGI__FN char* convert_path_heap(const wchar_t* str) 4937 { 4938 assert(str); 4939 4940 // first pass: get length in utf8 characters 4941 size_t length = strlength_wide(str); 4942 size_t size = as_utf8_begin(str, length); 4943 4944 // allocate resulting string 4945 char* result = static_cast<char*>(xml_memory::allocate(size + 1)); 4946 if (!result) return 0; 4947 4948 // second pass: convert to utf8 4949 as_utf8_end(result, size, str, length); 4950 4951 // zero-terminate 4952 result[size] = 0; 4953 4954 return result; 4955 } 4956 open_file_wide(const wchar_t * path,const wchar_t * mode)4957 PUGI__FN FILE* open_file_wide(const wchar_t* path, const wchar_t* mode) 4958 { 4959 // there is no standard function to open wide paths, so our best bet is to try utf8 path 4960 char* path_utf8 = convert_path_heap(path); 4961 if (!path_utf8) return 0; 4962 4963 // convert mode to ASCII (we mirror _wfopen interface) 4964 char mode_ascii[4] = {0}; 4965 for (size_t i = 0; mode[i]; ++i) mode_ascii[i] = static_cast<char>(mode[i]); 4966 4967 // try to open the utf8 path 4968 FILE* result = fopen(path_utf8, mode_ascii); 4969 4970 // free dummy buffer 4971 xml_memory::deallocate(path_utf8); 4972 4973 return result; 4974 } 4975 #endif 4976 save_file_impl(const xml_document & doc,FILE * file,const char_t * indent,unsigned int flags,xml_encoding encoding)4977 PUGI__FN bool save_file_impl(const xml_document& doc, FILE* file, const char_t* indent, unsigned int flags, xml_encoding encoding) 4978 { 4979 if (!file) return false; 4980 4981 xml_writer_file writer(file); 4982 doc.save(writer, indent, flags, encoding); 4983 4984 return ferror(file) == 0; 4985 } 4986 4987 struct name_null_sentry 4988 { 4989 xml_node_struct* node; 4990 char_t* name; 4991 name_null_sentryname_null_sentry4992 name_null_sentry(xml_node_struct* node_): node(node_), name(node_->name) 4993 { 4994 node->name = 0; 4995 } 4996 ~name_null_sentryname_null_sentry4997 ~name_null_sentry() 4998 { 4999 node->name = name; 5000 } 5001 }; 5002 PUGI__NS_END 5003 5004 namespace pugi 5005 { xml_writer_file(void * file_)5006 PUGI__FN xml_writer_file::xml_writer_file(void* file_): file(file_) 5007 { 5008 } 5009 write(const void * data,size_t size)5010 PUGI__FN void xml_writer_file::write(const void* data, size_t size) 5011 { 5012 size_t result = fwrite(data, 1, size, static_cast<FILE*>(file)); 5013 (void)!result; // unfortunately we can't do proper error handling here 5014 } 5015 5016 #ifndef PUGIXML_NO_STL xml_writer_stream(std::basic_ostream<char,std::char_traits<char>> & stream)5017 PUGI__FN xml_writer_stream::xml_writer_stream(std::basic_ostream<char, std::char_traits<char> >& stream): narrow_stream(&stream), wide_stream(0) 5018 { 5019 } 5020 xml_writer_stream(std::basic_ostream<wchar_t,std::char_traits<wchar_t>> & stream)5021 PUGI__FN xml_writer_stream::xml_writer_stream(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream): narrow_stream(0), wide_stream(&stream) 5022 { 5023 } 5024 write(const void * data,size_t size)5025 PUGI__FN void xml_writer_stream::write(const void* data, size_t size) 5026 { 5027 if (narrow_stream) 5028 { 5029 assert(!wide_stream); 5030 narrow_stream->write(reinterpret_cast<const char*>(data), static_cast<std::streamsize>(size)); 5031 } 5032 else 5033 { 5034 assert(wide_stream); 5035 assert(size % sizeof(wchar_t) == 0); 5036 5037 wide_stream->write(reinterpret_cast<const wchar_t*>(data), static_cast<std::streamsize>(size / sizeof(wchar_t))); 5038 } 5039 } 5040 #endif 5041 xml_tree_walker()5042 PUGI__FN xml_tree_walker::xml_tree_walker(): _depth(0) 5043 { 5044 } 5045 ~xml_tree_walker()5046 PUGI__FN xml_tree_walker::~xml_tree_walker() 5047 { 5048 } 5049 depth() const5050 PUGI__FN int xml_tree_walker::depth() const 5051 { 5052 return _depth; 5053 } 5054 begin(xml_node &)5055 PUGI__FN bool xml_tree_walker::begin(xml_node&) 5056 { 5057 return true; 5058 } 5059 end(xml_node &)5060 PUGI__FN bool xml_tree_walker::end(xml_node&) 5061 { 5062 return true; 5063 } 5064 xml_attribute()5065 PUGI__FN xml_attribute::xml_attribute(): _attr(0) 5066 { 5067 } 5068 xml_attribute(xml_attribute_struct * attr)5069 PUGI__FN xml_attribute::xml_attribute(xml_attribute_struct* attr): _attr(attr) 5070 { 5071 } 5072 unspecified_bool_xml_attribute(xml_attribute ***)5073 PUGI__FN static void unspecified_bool_xml_attribute(xml_attribute***) 5074 { 5075 } 5076 operator xml_attribute::unspecified_bool_type() const5077 PUGI__FN xml_attribute::operator xml_attribute::unspecified_bool_type() const 5078 { 5079 return _attr ? unspecified_bool_xml_attribute : 0; 5080 } 5081 operator !() const5082 PUGI__FN bool xml_attribute::operator!() const 5083 { 5084 return !_attr; 5085 } 5086 operator ==(const xml_attribute & r) const5087 PUGI__FN bool xml_attribute::operator==(const xml_attribute& r) const 5088 { 5089 return (_attr == r._attr); 5090 } 5091 operator !=(const xml_attribute & r) const5092 PUGI__FN bool xml_attribute::operator!=(const xml_attribute& r) const 5093 { 5094 return (_attr != r._attr); 5095 } 5096 operator <(const xml_attribute & r) const5097 PUGI__FN bool xml_attribute::operator<(const xml_attribute& r) const 5098 { 5099 return (_attr < r._attr); 5100 } 5101 operator >(const xml_attribute & r) const5102 PUGI__FN bool xml_attribute::operator>(const xml_attribute& r) const 5103 { 5104 return (_attr > r._attr); 5105 } 5106 operator <=(const xml_attribute & r) const5107 PUGI__FN bool xml_attribute::operator<=(const xml_attribute& r) const 5108 { 5109 return (_attr <= r._attr); 5110 } 5111 operator >=(const xml_attribute & r) const5112 PUGI__FN bool xml_attribute::operator>=(const xml_attribute& r) const 5113 { 5114 return (_attr >= r._attr); 5115 } 5116 next_attribute() const5117 PUGI__FN xml_attribute xml_attribute::next_attribute() const 5118 { 5119 return _attr ? xml_attribute(_attr->next_attribute) : xml_attribute(); 5120 } 5121 previous_attribute() const5122 PUGI__FN xml_attribute xml_attribute::previous_attribute() const 5123 { 5124 return _attr && _attr->prev_attribute_c->next_attribute ? xml_attribute(_attr->prev_attribute_c) : xml_attribute(); 5125 } 5126 as_string(const char_t * def) const5127 PUGI__FN const char_t* xml_attribute::as_string(const char_t* def) const 5128 { 5129 return (_attr && _attr->value) ? _attr->value + 0 : def; 5130 } 5131 as_int(int def) const5132 PUGI__FN int xml_attribute::as_int(int def) const 5133 { 5134 return (_attr && _attr->value) ? impl::get_value_int(_attr->value) : def; 5135 } 5136 as_uint(unsigned int def) const5137 PUGI__FN unsigned int xml_attribute::as_uint(unsigned int def) const 5138 { 5139 return (_attr && _attr->value) ? impl::get_value_uint(_attr->value) : def; 5140 } 5141 as_double(double def) const5142 PUGI__FN double xml_attribute::as_double(double def) const 5143 { 5144 return (_attr && _attr->value) ? impl::get_value_double(_attr->value) : def; 5145 } 5146 as_float(float def) const5147 PUGI__FN float xml_attribute::as_float(float def) const 5148 { 5149 return (_attr && _attr->value) ? impl::get_value_float(_attr->value) : def; 5150 } 5151 as_bool(bool def) const5152 PUGI__FN bool xml_attribute::as_bool(bool def) const 5153 { 5154 return (_attr && _attr->value) ? impl::get_value_bool(_attr->value) : def; 5155 } 5156 5157 #ifdef PUGIXML_HAS_LONG_LONG as_llong(long long def) const5158 PUGI__FN long long xml_attribute::as_llong(long long def) const 5159 { 5160 return (_attr && _attr->value) ? impl::get_value_llong(_attr->value) : def; 5161 } 5162 as_ullong(unsigned long long def) const5163 PUGI__FN unsigned long long xml_attribute::as_ullong(unsigned long long def) const 5164 { 5165 return (_attr && _attr->value) ? impl::get_value_ullong(_attr->value) : def; 5166 } 5167 #endif 5168 empty() const5169 PUGI__FN bool xml_attribute::empty() const 5170 { 5171 return !_attr; 5172 } 5173 name() const5174 PUGI__FN const char_t* xml_attribute::name() const 5175 { 5176 return (_attr && _attr->name) ? _attr->name + 0 : PUGIXML_TEXT(""); 5177 } 5178 value() const5179 PUGI__FN const char_t* xml_attribute::value() const 5180 { 5181 return (_attr && _attr->value) ? _attr->value + 0 : PUGIXML_TEXT(""); 5182 } 5183 hash_value() const5184 PUGI__FN size_t xml_attribute::hash_value() const 5185 { 5186 return static_cast<size_t>(reinterpret_cast<uintptr_t>(_attr) / sizeof(xml_attribute_struct)); 5187 } 5188 internal_object() const5189 PUGI__FN xml_attribute_struct* xml_attribute::internal_object() const 5190 { 5191 return _attr; 5192 } 5193 operator =(const char_t * rhs)5194 PUGI__FN xml_attribute& xml_attribute::operator=(const char_t* rhs) 5195 { 5196 set_value(rhs); 5197 return *this; 5198 } 5199 operator =(int rhs)5200 PUGI__FN xml_attribute& xml_attribute::operator=(int rhs) 5201 { 5202 set_value(rhs); 5203 return *this; 5204 } 5205 operator =(unsigned int rhs)5206 PUGI__FN xml_attribute& xml_attribute::operator=(unsigned int rhs) 5207 { 5208 set_value(rhs); 5209 return *this; 5210 } 5211 operator =(long rhs)5212 PUGI__FN xml_attribute& xml_attribute::operator=(long rhs) 5213 { 5214 set_value(rhs); 5215 return *this; 5216 } 5217 operator =(unsigned long rhs)5218 PUGI__FN xml_attribute& xml_attribute::operator=(unsigned long rhs) 5219 { 5220 set_value(rhs); 5221 return *this; 5222 } 5223 operator =(double rhs)5224 PUGI__FN xml_attribute& xml_attribute::operator=(double rhs) 5225 { 5226 set_value(rhs); 5227 return *this; 5228 } 5229 operator =(float rhs)5230 PUGI__FN xml_attribute& xml_attribute::operator=(float rhs) 5231 { 5232 set_value(rhs); 5233 return *this; 5234 } 5235 operator =(bool rhs)5236 PUGI__FN xml_attribute& xml_attribute::operator=(bool rhs) 5237 { 5238 set_value(rhs); 5239 return *this; 5240 } 5241 5242 #ifdef PUGIXML_HAS_LONG_LONG operator =(long long rhs)5243 PUGI__FN xml_attribute& xml_attribute::operator=(long long rhs) 5244 { 5245 set_value(rhs); 5246 return *this; 5247 } 5248 operator =(unsigned long long rhs)5249 PUGI__FN xml_attribute& xml_attribute::operator=(unsigned long long rhs) 5250 { 5251 set_value(rhs); 5252 return *this; 5253 } 5254 #endif 5255 set_name(const char_t * rhs)5256 PUGI__FN bool xml_attribute::set_name(const char_t* rhs) 5257 { 5258 if (!_attr) return false; 5259 5260 return impl::strcpy_insitu(_attr->name, _attr->header, impl::xml_memory_page_name_allocated_mask, rhs, impl::strlength(rhs)); 5261 } 5262 set_value(const char_t * rhs)5263 PUGI__FN bool xml_attribute::set_value(const char_t* rhs) 5264 { 5265 if (!_attr) return false; 5266 5267 return impl::strcpy_insitu(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, impl::strlength(rhs)); 5268 } 5269 set_value(int rhs)5270 PUGI__FN bool xml_attribute::set_value(int rhs) 5271 { 5272 if (!_attr) return false; 5273 5274 return impl::set_value_integer<unsigned int>(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0); 5275 } 5276 set_value(unsigned int rhs)5277 PUGI__FN bool xml_attribute::set_value(unsigned int rhs) 5278 { 5279 if (!_attr) return false; 5280 5281 return impl::set_value_integer<unsigned int>(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, false); 5282 } 5283 set_value(long rhs)5284 PUGI__FN bool xml_attribute::set_value(long rhs) 5285 { 5286 if (!_attr) return false; 5287 5288 return impl::set_value_integer<unsigned long>(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0); 5289 } 5290 set_value(unsigned long rhs)5291 PUGI__FN bool xml_attribute::set_value(unsigned long rhs) 5292 { 5293 if (!_attr) return false; 5294 5295 return impl::set_value_integer<unsigned long>(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, false); 5296 } 5297 set_value(double rhs)5298 PUGI__FN bool xml_attribute::set_value(double rhs) 5299 { 5300 if (!_attr) return false; 5301 5302 return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs); 5303 } 5304 set_value(float rhs)5305 PUGI__FN bool xml_attribute::set_value(float rhs) 5306 { 5307 if (!_attr) return false; 5308 5309 return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs); 5310 } 5311 set_value(bool rhs)5312 PUGI__FN bool xml_attribute::set_value(bool rhs) 5313 { 5314 if (!_attr) return false; 5315 5316 return impl::set_value_bool(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs); 5317 } 5318 5319 #ifdef PUGIXML_HAS_LONG_LONG set_value(long long rhs)5320 PUGI__FN bool xml_attribute::set_value(long long rhs) 5321 { 5322 if (!_attr) return false; 5323 5324 return impl::set_value_integer<unsigned long long>(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0); 5325 } 5326 set_value(unsigned long long rhs)5327 PUGI__FN bool xml_attribute::set_value(unsigned long long rhs) 5328 { 5329 if (!_attr) return false; 5330 5331 return impl::set_value_integer<unsigned long long>(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, false); 5332 } 5333 #endif 5334 5335 #ifdef __BORLANDC__ operator &&(const xml_attribute & lhs,bool rhs)5336 PUGI__FN bool operator&&(const xml_attribute& lhs, bool rhs) 5337 { 5338 return (bool)lhs && rhs; 5339 } 5340 operator ||(const xml_attribute & lhs,bool rhs)5341 PUGI__FN bool operator||(const xml_attribute& lhs, bool rhs) 5342 { 5343 return (bool)lhs || rhs; 5344 } 5345 #endif 5346 xml_node()5347 PUGI__FN xml_node::xml_node(): _root(0) 5348 { 5349 } 5350 xml_node(xml_node_struct * p)5351 PUGI__FN xml_node::xml_node(xml_node_struct* p): _root(p) 5352 { 5353 } 5354 unspecified_bool_xml_node(xml_node ***)5355 PUGI__FN static void unspecified_bool_xml_node(xml_node***) 5356 { 5357 } 5358 operator xml_node::unspecified_bool_type() const5359 PUGI__FN xml_node::operator xml_node::unspecified_bool_type() const 5360 { 5361 return _root ? unspecified_bool_xml_node : 0; 5362 } 5363 operator !() const5364 PUGI__FN bool xml_node::operator!() const 5365 { 5366 return !_root; 5367 } 5368 begin() const5369 PUGI__FN xml_node::iterator xml_node::begin() const 5370 { 5371 return iterator(_root ? _root->first_child + 0 : 0, _root); 5372 } 5373 end() const5374 PUGI__FN xml_node::iterator xml_node::end() const 5375 { 5376 return iterator(0, _root); 5377 } 5378 attributes_begin() const5379 PUGI__FN xml_node::attribute_iterator xml_node::attributes_begin() const 5380 { 5381 return attribute_iterator(_root ? _root->first_attribute + 0 : 0, _root); 5382 } 5383 attributes_end() const5384 PUGI__FN xml_node::attribute_iterator xml_node::attributes_end() const 5385 { 5386 return attribute_iterator(0, _root); 5387 } 5388 children() const5389 PUGI__FN xml_object_range<xml_node_iterator> xml_node::children() const 5390 { 5391 return xml_object_range<xml_node_iterator>(begin(), end()); 5392 } 5393 children(const char_t * name_) const5394 PUGI__FN xml_object_range<xml_named_node_iterator> xml_node::children(const char_t* name_) const 5395 { 5396 return xml_object_range<xml_named_node_iterator>(xml_named_node_iterator(child(name_)._root, _root, name_), xml_named_node_iterator(0, _root, name_)); 5397 } 5398 attributes() const5399 PUGI__FN xml_object_range<xml_attribute_iterator> xml_node::attributes() const 5400 { 5401 return xml_object_range<xml_attribute_iterator>(attributes_begin(), attributes_end()); 5402 } 5403 operator ==(const xml_node & r) const5404 PUGI__FN bool xml_node::operator==(const xml_node& r) const 5405 { 5406 return (_root == r._root); 5407 } 5408 operator !=(const xml_node & r) const5409 PUGI__FN bool xml_node::operator!=(const xml_node& r) const 5410 { 5411 return (_root != r._root); 5412 } 5413 operator <(const xml_node & r) const5414 PUGI__FN bool xml_node::operator<(const xml_node& r) const 5415 { 5416 return (_root < r._root); 5417 } 5418 operator >(const xml_node & r) const5419 PUGI__FN bool xml_node::operator>(const xml_node& r) const 5420 { 5421 return (_root > r._root); 5422 } 5423 operator <=(const xml_node & r) const5424 PUGI__FN bool xml_node::operator<=(const xml_node& r) const 5425 { 5426 return (_root <= r._root); 5427 } 5428 operator >=(const xml_node & r) const5429 PUGI__FN bool xml_node::operator>=(const xml_node& r) const 5430 { 5431 return (_root >= r._root); 5432 } 5433 empty() const5434 PUGI__FN bool xml_node::empty() const 5435 { 5436 return !_root; 5437 } 5438 name() const5439 PUGI__FN const char_t* xml_node::name() const 5440 { 5441 return (_root && _root->name) ? _root->name + 0 : PUGIXML_TEXT(""); 5442 } 5443 type() const5444 PUGI__FN xml_node_type xml_node::type() const 5445 { 5446 return _root ? PUGI__NODETYPE(_root) : node_null; 5447 } 5448 value() const5449 PUGI__FN const char_t* xml_node::value() const 5450 { 5451 return (_root && _root->value) ? _root->value + 0 : PUGIXML_TEXT(""); 5452 } 5453 child(const char_t * name_) const5454 PUGI__FN xml_node xml_node::child(const char_t* name_) const 5455 { 5456 if (!_root) return xml_node(); 5457 5458 for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling) 5459 if (i->name && impl::strequal(name_, i->name)) return xml_node(i); 5460 5461 return xml_node(); 5462 } 5463 attribute(const char_t * name_) const5464 PUGI__FN xml_attribute xml_node::attribute(const char_t* name_) const 5465 { 5466 if (!_root) return xml_attribute(); 5467 5468 for (xml_attribute_struct* i = _root->first_attribute; i; i = i->next_attribute) 5469 if (i->name && impl::strequal(name_, i->name)) 5470 return xml_attribute(i); 5471 5472 return xml_attribute(); 5473 } 5474 next_sibling(const char_t * name_) const5475 PUGI__FN xml_node xml_node::next_sibling(const char_t* name_) const 5476 { 5477 if (!_root) return xml_node(); 5478 5479 for (xml_node_struct* i = _root->next_sibling; i; i = i->next_sibling) 5480 if (i->name && impl::strequal(name_, i->name)) return xml_node(i); 5481 5482 return xml_node(); 5483 } 5484 next_sibling() const5485 PUGI__FN xml_node xml_node::next_sibling() const 5486 { 5487 return _root ? xml_node(_root->next_sibling) : xml_node(); 5488 } 5489 previous_sibling(const char_t * name_) const5490 PUGI__FN xml_node xml_node::previous_sibling(const char_t* name_) const 5491 { 5492 if (!_root) return xml_node(); 5493 5494 for (xml_node_struct* i = _root->prev_sibling_c; i->next_sibling; i = i->prev_sibling_c) 5495 if (i->name && impl::strequal(name_, i->name)) return xml_node(i); 5496 5497 return xml_node(); 5498 } 5499 attribute(const char_t * name_,xml_attribute & hint_) const5500 PUGI__FN xml_attribute xml_node::attribute(const char_t* name_, xml_attribute& hint_) const 5501 { 5502 xml_attribute_struct* hint = hint_._attr; 5503 5504 // if hint is not an attribute of node, behavior is not defined 5505 assert(!hint || (_root && impl::is_attribute_of(hint, _root))); 5506 5507 if (!_root) return xml_attribute(); 5508 5509 // optimistically search from hint up until the end 5510 for (xml_attribute_struct* i = hint; i; i = i->next_attribute) 5511 if (i->name && impl::strequal(name_, i->name)) 5512 { 5513 // update hint to maximize efficiency of searching for consecutive attributes 5514 hint_._attr = i->next_attribute; 5515 5516 return xml_attribute(i); 5517 } 5518 5519 // wrap around and search from the first attribute until the hint 5520 // 'j' null pointer check is technically redundant, but it prevents a crash in case the assertion above fails 5521 for (xml_attribute_struct* j = _root->first_attribute; j && j != hint; j = j->next_attribute) 5522 if (j->name && impl::strequal(name_, j->name)) 5523 { 5524 // update hint to maximize efficiency of searching for consecutive attributes 5525 hint_._attr = j->next_attribute; 5526 5527 return xml_attribute(j); 5528 } 5529 5530 return xml_attribute(); 5531 } 5532 previous_sibling() const5533 PUGI__FN xml_node xml_node::previous_sibling() const 5534 { 5535 if (!_root) return xml_node(); 5536 5537 if (_root->prev_sibling_c->next_sibling) return xml_node(_root->prev_sibling_c); 5538 else return xml_node(); 5539 } 5540 parent() const5541 PUGI__FN xml_node xml_node::parent() const 5542 { 5543 return _root ? xml_node(_root->parent) : xml_node(); 5544 } 5545 root() const5546 PUGI__FN xml_node xml_node::root() const 5547 { 5548 return _root ? xml_node(&impl::get_document(_root)) : xml_node(); 5549 } 5550 text() const5551 PUGI__FN xml_text xml_node::text() const 5552 { 5553 return xml_text(_root); 5554 } 5555 child_value() const5556 PUGI__FN const char_t* xml_node::child_value() const 5557 { 5558 if (!_root) return PUGIXML_TEXT(""); 5559 5560 // element nodes can have value if parse_embed_pcdata was used 5561 if (PUGI__NODETYPE(_root) == node_element && _root->value) 5562 return _root->value; 5563 5564 for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling) 5565 if (impl::is_text_node(i) && i->value) 5566 return i->value; 5567 5568 return PUGIXML_TEXT(""); 5569 } 5570 child_value(const char_t * name_) const5571 PUGI__FN const char_t* xml_node::child_value(const char_t* name_) const 5572 { 5573 return child(name_).child_value(); 5574 } 5575 first_attribute() const5576 PUGI__FN xml_attribute xml_node::first_attribute() const 5577 { 5578 return _root ? xml_attribute(_root->first_attribute) : xml_attribute(); 5579 } 5580 last_attribute() const5581 PUGI__FN xml_attribute xml_node::last_attribute() const 5582 { 5583 return _root && _root->first_attribute ? xml_attribute(_root->first_attribute->prev_attribute_c) : xml_attribute(); 5584 } 5585 first_child() const5586 PUGI__FN xml_node xml_node::first_child() const 5587 { 5588 return _root ? xml_node(_root->first_child) : xml_node(); 5589 } 5590 last_child() const5591 PUGI__FN xml_node xml_node::last_child() const 5592 { 5593 return _root && _root->first_child ? xml_node(_root->first_child->prev_sibling_c) : xml_node(); 5594 } 5595 set_name(const char_t * rhs)5596 PUGI__FN bool xml_node::set_name(const char_t* rhs) 5597 { 5598 xml_node_type type_ = _root ? PUGI__NODETYPE(_root) : node_null; 5599 5600 if (type_ != node_element && type_ != node_pi && type_ != node_declaration) 5601 return false; 5602 5603 return impl::strcpy_insitu(_root->name, _root->header, impl::xml_memory_page_name_allocated_mask, rhs, impl::strlength(rhs)); 5604 } 5605 set_value(const char_t * rhs)5606 PUGI__FN bool xml_node::set_value(const char_t* rhs) 5607 { 5608 xml_node_type type_ = _root ? PUGI__NODETYPE(_root) : node_null; 5609 5610 if (type_ != node_pcdata && type_ != node_cdata && type_ != node_comment && type_ != node_pi && type_ != node_doctype) 5611 return false; 5612 5613 return impl::strcpy_insitu(_root->value, _root->header, impl::xml_memory_page_value_allocated_mask, rhs, impl::strlength(rhs)); 5614 } 5615 append_attribute(const char_t * name_)5616 PUGI__FN xml_attribute xml_node::append_attribute(const char_t* name_) 5617 { 5618 if (!impl::allow_insert_attribute(type())) return xml_attribute(); 5619 5620 impl::xml_allocator& alloc = impl::get_allocator(_root); 5621 if (!alloc.reserve()) return xml_attribute(); 5622 5623 xml_attribute a(impl::allocate_attribute(alloc)); 5624 if (!a) return xml_attribute(); 5625 5626 impl::append_attribute(a._attr, _root); 5627 5628 a.set_name(name_); 5629 5630 return a; 5631 } 5632 prepend_attribute(const char_t * name_)5633 PUGI__FN xml_attribute xml_node::prepend_attribute(const char_t* name_) 5634 { 5635 if (!impl::allow_insert_attribute(type())) return xml_attribute(); 5636 5637 impl::xml_allocator& alloc = impl::get_allocator(_root); 5638 if (!alloc.reserve()) return xml_attribute(); 5639 5640 xml_attribute a(impl::allocate_attribute(alloc)); 5641 if (!a) return xml_attribute(); 5642 5643 impl::prepend_attribute(a._attr, _root); 5644 5645 a.set_name(name_); 5646 5647 return a; 5648 } 5649 insert_attribute_after(const char_t * name_,const xml_attribute & attr)5650 PUGI__FN xml_attribute xml_node::insert_attribute_after(const char_t* name_, const xml_attribute& attr) 5651 { 5652 if (!impl::allow_insert_attribute(type())) return xml_attribute(); 5653 if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute(); 5654 5655 impl::xml_allocator& alloc = impl::get_allocator(_root); 5656 if (!alloc.reserve()) return xml_attribute(); 5657 5658 xml_attribute a(impl::allocate_attribute(alloc)); 5659 if (!a) return xml_attribute(); 5660 5661 impl::insert_attribute_after(a._attr, attr._attr, _root); 5662 5663 a.set_name(name_); 5664 5665 return a; 5666 } 5667 insert_attribute_before(const char_t * name_,const xml_attribute & attr)5668 PUGI__FN xml_attribute xml_node::insert_attribute_before(const char_t* name_, const xml_attribute& attr) 5669 { 5670 if (!impl::allow_insert_attribute(type())) return xml_attribute(); 5671 if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute(); 5672 5673 impl::xml_allocator& alloc = impl::get_allocator(_root); 5674 if (!alloc.reserve()) return xml_attribute(); 5675 5676 xml_attribute a(impl::allocate_attribute(alloc)); 5677 if (!a) return xml_attribute(); 5678 5679 impl::insert_attribute_before(a._attr, attr._attr, _root); 5680 5681 a.set_name(name_); 5682 5683 return a; 5684 } 5685 append_copy(const xml_attribute & proto)5686 PUGI__FN xml_attribute xml_node::append_copy(const xml_attribute& proto) 5687 { 5688 if (!proto) return xml_attribute(); 5689 if (!impl::allow_insert_attribute(type())) return xml_attribute(); 5690 5691 impl::xml_allocator& alloc = impl::get_allocator(_root); 5692 if (!alloc.reserve()) return xml_attribute(); 5693 5694 xml_attribute a(impl::allocate_attribute(alloc)); 5695 if (!a) return xml_attribute(); 5696 5697 impl::append_attribute(a._attr, _root); 5698 impl::node_copy_attribute(a._attr, proto._attr); 5699 5700 return a; 5701 } 5702 prepend_copy(const xml_attribute & proto)5703 PUGI__FN xml_attribute xml_node::prepend_copy(const xml_attribute& proto) 5704 { 5705 if (!proto) return xml_attribute(); 5706 if (!impl::allow_insert_attribute(type())) return xml_attribute(); 5707 5708 impl::xml_allocator& alloc = impl::get_allocator(_root); 5709 if (!alloc.reserve()) return xml_attribute(); 5710 5711 xml_attribute a(impl::allocate_attribute(alloc)); 5712 if (!a) return xml_attribute(); 5713 5714 impl::prepend_attribute(a._attr, _root); 5715 impl::node_copy_attribute(a._attr, proto._attr); 5716 5717 return a; 5718 } 5719 insert_copy_after(const xml_attribute & proto,const xml_attribute & attr)5720 PUGI__FN xml_attribute xml_node::insert_copy_after(const xml_attribute& proto, const xml_attribute& attr) 5721 { 5722 if (!proto) return xml_attribute(); 5723 if (!impl::allow_insert_attribute(type())) return xml_attribute(); 5724 if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute(); 5725 5726 impl::xml_allocator& alloc = impl::get_allocator(_root); 5727 if (!alloc.reserve()) return xml_attribute(); 5728 5729 xml_attribute a(impl::allocate_attribute(alloc)); 5730 if (!a) return xml_attribute(); 5731 5732 impl::insert_attribute_after(a._attr, attr._attr, _root); 5733 impl::node_copy_attribute(a._attr, proto._attr); 5734 5735 return a; 5736 } 5737 insert_copy_before(const xml_attribute & proto,const xml_attribute & attr)5738 PUGI__FN xml_attribute xml_node::insert_copy_before(const xml_attribute& proto, const xml_attribute& attr) 5739 { 5740 if (!proto) return xml_attribute(); 5741 if (!impl::allow_insert_attribute(type())) return xml_attribute(); 5742 if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute(); 5743 5744 impl::xml_allocator& alloc = impl::get_allocator(_root); 5745 if (!alloc.reserve()) return xml_attribute(); 5746 5747 xml_attribute a(impl::allocate_attribute(alloc)); 5748 if (!a) return xml_attribute(); 5749 5750 impl::insert_attribute_before(a._attr, attr._attr, _root); 5751 impl::node_copy_attribute(a._attr, proto._attr); 5752 5753 return a; 5754 } 5755 append_child(xml_node_type type_)5756 PUGI__FN xml_node xml_node::append_child(xml_node_type type_) 5757 { 5758 if (!impl::allow_insert_child(type(), type_)) return xml_node(); 5759 5760 impl::xml_allocator& alloc = impl::get_allocator(_root); 5761 if (!alloc.reserve()) return xml_node(); 5762 5763 xml_node n(impl::allocate_node(alloc, type_)); 5764 if (!n) return xml_node(); 5765 5766 impl::append_node(n._root, _root); 5767 5768 if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml")); 5769 5770 return n; 5771 } 5772 prepend_child(xml_node_type type_)5773 PUGI__FN xml_node xml_node::prepend_child(xml_node_type type_) 5774 { 5775 if (!impl::allow_insert_child(type(), type_)) return xml_node(); 5776 5777 impl::xml_allocator& alloc = impl::get_allocator(_root); 5778 if (!alloc.reserve()) return xml_node(); 5779 5780 xml_node n(impl::allocate_node(alloc, type_)); 5781 if (!n) return xml_node(); 5782 5783 impl::prepend_node(n._root, _root); 5784 5785 if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml")); 5786 5787 return n; 5788 } 5789 insert_child_before(xml_node_type type_,const xml_node & node)5790 PUGI__FN xml_node xml_node::insert_child_before(xml_node_type type_, const xml_node& node) 5791 { 5792 if (!impl::allow_insert_child(type(), type_)) return xml_node(); 5793 if (!node._root || node._root->parent != _root) return xml_node(); 5794 5795 impl::xml_allocator& alloc = impl::get_allocator(_root); 5796 if (!alloc.reserve()) return xml_node(); 5797 5798 xml_node n(impl::allocate_node(alloc, type_)); 5799 if (!n) return xml_node(); 5800 5801 impl::insert_node_before(n._root, node._root); 5802 5803 if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml")); 5804 5805 return n; 5806 } 5807 insert_child_after(xml_node_type type_,const xml_node & node)5808 PUGI__FN xml_node xml_node::insert_child_after(xml_node_type type_, const xml_node& node) 5809 { 5810 if (!impl::allow_insert_child(type(), type_)) return xml_node(); 5811 if (!node._root || node._root->parent != _root) return xml_node(); 5812 5813 impl::xml_allocator& alloc = impl::get_allocator(_root); 5814 if (!alloc.reserve()) return xml_node(); 5815 5816 xml_node n(impl::allocate_node(alloc, type_)); 5817 if (!n) return xml_node(); 5818 5819 impl::insert_node_after(n._root, node._root); 5820 5821 if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml")); 5822 5823 return n; 5824 } 5825 append_child(const char_t * name_)5826 PUGI__FN xml_node xml_node::append_child(const char_t* name_) 5827 { 5828 xml_node result = append_child(node_element); 5829 5830 result.set_name(name_); 5831 5832 return result; 5833 } 5834 prepend_child(const char_t * name_)5835 PUGI__FN xml_node xml_node::prepend_child(const char_t* name_) 5836 { 5837 xml_node result = prepend_child(node_element); 5838 5839 result.set_name(name_); 5840 5841 return result; 5842 } 5843 insert_child_after(const char_t * name_,const xml_node & node)5844 PUGI__FN xml_node xml_node::insert_child_after(const char_t* name_, const xml_node& node) 5845 { 5846 xml_node result = insert_child_after(node_element, node); 5847 5848 result.set_name(name_); 5849 5850 return result; 5851 } 5852 insert_child_before(const char_t * name_,const xml_node & node)5853 PUGI__FN xml_node xml_node::insert_child_before(const char_t* name_, const xml_node& node) 5854 { 5855 xml_node result = insert_child_before(node_element, node); 5856 5857 result.set_name(name_); 5858 5859 return result; 5860 } 5861 append_copy(const xml_node & proto)5862 PUGI__FN xml_node xml_node::append_copy(const xml_node& proto) 5863 { 5864 xml_node_type type_ = proto.type(); 5865 if (!impl::allow_insert_child(type(), type_)) return xml_node(); 5866 5867 impl::xml_allocator& alloc = impl::get_allocator(_root); 5868 if (!alloc.reserve()) return xml_node(); 5869 5870 xml_node n(impl::allocate_node(alloc, type_)); 5871 if (!n) return xml_node(); 5872 5873 impl::append_node(n._root, _root); 5874 impl::node_copy_tree(n._root, proto._root); 5875 5876 return n; 5877 } 5878 prepend_copy(const xml_node & proto)5879 PUGI__FN xml_node xml_node::prepend_copy(const xml_node& proto) 5880 { 5881 xml_node_type type_ = proto.type(); 5882 if (!impl::allow_insert_child(type(), type_)) return xml_node(); 5883 5884 impl::xml_allocator& alloc = impl::get_allocator(_root); 5885 if (!alloc.reserve()) return xml_node(); 5886 5887 xml_node n(impl::allocate_node(alloc, type_)); 5888 if (!n) return xml_node(); 5889 5890 impl::prepend_node(n._root, _root); 5891 impl::node_copy_tree(n._root, proto._root); 5892 5893 return n; 5894 } 5895 insert_copy_after(const xml_node & proto,const xml_node & node)5896 PUGI__FN xml_node xml_node::insert_copy_after(const xml_node& proto, const xml_node& node) 5897 { 5898 xml_node_type type_ = proto.type(); 5899 if (!impl::allow_insert_child(type(), type_)) return xml_node(); 5900 if (!node._root || node._root->parent != _root) return xml_node(); 5901 5902 impl::xml_allocator& alloc = impl::get_allocator(_root); 5903 if (!alloc.reserve()) return xml_node(); 5904 5905 xml_node n(impl::allocate_node(alloc, type_)); 5906 if (!n) return xml_node(); 5907 5908 impl::insert_node_after(n._root, node._root); 5909 impl::node_copy_tree(n._root, proto._root); 5910 5911 return n; 5912 } 5913 insert_copy_before(const xml_node & proto,const xml_node & node)5914 PUGI__FN xml_node xml_node::insert_copy_before(const xml_node& proto, const xml_node& node) 5915 { 5916 xml_node_type type_ = proto.type(); 5917 if (!impl::allow_insert_child(type(), type_)) return xml_node(); 5918 if (!node._root || node._root->parent != _root) return xml_node(); 5919 5920 impl::xml_allocator& alloc = impl::get_allocator(_root); 5921 if (!alloc.reserve()) return xml_node(); 5922 5923 xml_node n(impl::allocate_node(alloc, type_)); 5924 if (!n) return xml_node(); 5925 5926 impl::insert_node_before(n._root, node._root); 5927 impl::node_copy_tree(n._root, proto._root); 5928 5929 return n; 5930 } 5931 append_move(const xml_node & moved)5932 PUGI__FN xml_node xml_node::append_move(const xml_node& moved) 5933 { 5934 if (!impl::allow_move(*this, moved)) return xml_node(); 5935 5936 impl::xml_allocator& alloc = impl::get_allocator(_root); 5937 if (!alloc.reserve()) return xml_node(); 5938 5939 // disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers 5940 impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask; 5941 5942 impl::remove_node(moved._root); 5943 impl::append_node(moved._root, _root); 5944 5945 return moved; 5946 } 5947 prepend_move(const xml_node & moved)5948 PUGI__FN xml_node xml_node::prepend_move(const xml_node& moved) 5949 { 5950 if (!impl::allow_move(*this, moved)) return xml_node(); 5951 5952 impl::xml_allocator& alloc = impl::get_allocator(_root); 5953 if (!alloc.reserve()) return xml_node(); 5954 5955 // disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers 5956 impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask; 5957 5958 impl::remove_node(moved._root); 5959 impl::prepend_node(moved._root, _root); 5960 5961 return moved; 5962 } 5963 insert_move_after(const xml_node & moved,const xml_node & node)5964 PUGI__FN xml_node xml_node::insert_move_after(const xml_node& moved, const xml_node& node) 5965 { 5966 if (!impl::allow_move(*this, moved)) return xml_node(); 5967 if (!node._root || node._root->parent != _root) return xml_node(); 5968 if (moved._root == node._root) return xml_node(); 5969 5970 impl::xml_allocator& alloc = impl::get_allocator(_root); 5971 if (!alloc.reserve()) return xml_node(); 5972 5973 // disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers 5974 impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask; 5975 5976 impl::remove_node(moved._root); 5977 impl::insert_node_after(moved._root, node._root); 5978 5979 return moved; 5980 } 5981 insert_move_before(const xml_node & moved,const xml_node & node)5982 PUGI__FN xml_node xml_node::insert_move_before(const xml_node& moved, const xml_node& node) 5983 { 5984 if (!impl::allow_move(*this, moved)) return xml_node(); 5985 if (!node._root || node._root->parent != _root) return xml_node(); 5986 if (moved._root == node._root) return xml_node(); 5987 5988 impl::xml_allocator& alloc = impl::get_allocator(_root); 5989 if (!alloc.reserve()) return xml_node(); 5990 5991 // disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers 5992 impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask; 5993 5994 impl::remove_node(moved._root); 5995 impl::insert_node_before(moved._root, node._root); 5996 5997 return moved; 5998 } 5999 remove_attribute(const char_t * name_)6000 PUGI__FN bool xml_node::remove_attribute(const char_t* name_) 6001 { 6002 return remove_attribute(attribute(name_)); 6003 } 6004 remove_attribute(const xml_attribute & a)6005 PUGI__FN bool xml_node::remove_attribute(const xml_attribute& a) 6006 { 6007 if (!_root || !a._attr) return false; 6008 if (!impl::is_attribute_of(a._attr, _root)) return false; 6009 6010 impl::xml_allocator& alloc = impl::get_allocator(_root); 6011 if (!alloc.reserve()) return false; 6012 6013 impl::remove_attribute(a._attr, _root); 6014 impl::destroy_attribute(a._attr, alloc); 6015 6016 return true; 6017 } 6018 remove_child(const char_t * name_)6019 PUGI__FN bool xml_node::remove_child(const char_t* name_) 6020 { 6021 return remove_child(child(name_)); 6022 } 6023 remove_child(const xml_node & n)6024 PUGI__FN bool xml_node::remove_child(const xml_node& n) 6025 { 6026 if (!_root || !n._root || n._root->parent != _root) return false; 6027 6028 impl::xml_allocator& alloc = impl::get_allocator(_root); 6029 if (!alloc.reserve()) return false; 6030 6031 impl::remove_node(n._root); 6032 impl::destroy_node(n._root, alloc); 6033 6034 return true; 6035 } 6036 append_buffer(const void * contents,size_t size,unsigned int options,xml_encoding encoding)6037 PUGI__FN xml_parse_result xml_node::append_buffer(const void* contents, size_t size, unsigned int options, xml_encoding encoding) 6038 { 6039 // append_buffer is only valid for elements/documents 6040 if (!impl::allow_insert_child(type(), node_element)) return impl::make_parse_result(status_append_invalid_root); 6041 6042 // get document node 6043 impl::xml_document_struct* doc = &impl::get_document(_root); 6044 6045 // disable document_buffer_order optimization since in a document with multiple buffers comparing buffer pointers does not make sense 6046 doc->header |= impl::xml_memory_page_contents_shared_mask; 6047 6048 // get extra buffer element (we'll store the document fragment buffer there so that we can deallocate it later) 6049 impl::xml_memory_page* page = 0; 6050 impl::xml_extra_buffer* extra = static_cast<impl::xml_extra_buffer*>(doc->allocate_memory(sizeof(impl::xml_extra_buffer), page)); 6051 (void)page; 6052 6053 if (!extra) return impl::make_parse_result(status_out_of_memory); 6054 6055 // add extra buffer to the list 6056 extra->buffer = 0; 6057 extra->next = doc->extra_buffers; 6058 doc->extra_buffers = extra; 6059 6060 // name of the root has to be NULL before parsing - otherwise closing node mismatches will not be detected at the top level 6061 impl::name_null_sentry sentry(_root); 6062 6063 return impl::load_buffer_impl(doc, _root, const_cast<void*>(contents), size, options, encoding, false, false, &extra->buffer); 6064 } 6065 find_child_by_attribute(const char_t * name_,const char_t * attr_name,const char_t * attr_value) const6066 PUGI__FN xml_node xml_node::find_child_by_attribute(const char_t* name_, const char_t* attr_name, const char_t* attr_value) const 6067 { 6068 if (!_root) return xml_node(); 6069 6070 for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling) 6071 if (i->name && impl::strequal(name_, i->name)) 6072 { 6073 for (xml_attribute_struct* a = i->first_attribute; a; a = a->next_attribute) 6074 if (a->name && impl::strequal(attr_name, a->name) && impl::strequal(attr_value, a->value ? a->value + 0 : PUGIXML_TEXT(""))) 6075 return xml_node(i); 6076 } 6077 6078 return xml_node(); 6079 } 6080 find_child_by_attribute(const char_t * attr_name,const char_t * attr_value) const6081 PUGI__FN xml_node xml_node::find_child_by_attribute(const char_t* attr_name, const char_t* attr_value) const 6082 { 6083 if (!_root) return xml_node(); 6084 6085 for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling) 6086 for (xml_attribute_struct* a = i->first_attribute; a; a = a->next_attribute) 6087 if (a->name && impl::strequal(attr_name, a->name) && impl::strequal(attr_value, a->value ? a->value + 0 : PUGIXML_TEXT(""))) 6088 return xml_node(i); 6089 6090 return xml_node(); 6091 } 6092 6093 #ifndef PUGIXML_NO_STL path(char_t delimiter) const6094 PUGI__FN string_t xml_node::path(char_t delimiter) const 6095 { 6096 if (!_root) return string_t(); 6097 6098 size_t offset = 0; 6099 6100 for (xml_node_struct* i = _root; i; i = i->parent) 6101 { 6102 offset += (i != _root); 6103 offset += i->name ? impl::strlength(i->name) : 0; 6104 } 6105 6106 string_t result; 6107 result.resize(offset); 6108 6109 for (xml_node_struct* j = _root; j; j = j->parent) 6110 { 6111 if (j != _root) 6112 result[--offset] = delimiter; 6113 6114 if (j->name && *j->name) 6115 { 6116 size_t length = impl::strlength(j->name); 6117 6118 offset -= length; 6119 memcpy(&result[offset], j->name, length * sizeof(char_t)); 6120 } 6121 } 6122 6123 assert(offset == 0); 6124 6125 return result; 6126 } 6127 #endif 6128 first_element_by_path(const char_t * path_,char_t delimiter) const6129 PUGI__FN xml_node xml_node::first_element_by_path(const char_t* path_, char_t delimiter) const 6130 { 6131 xml_node found = *this; // Current search context. 6132 6133 if (!_root || !path_ || !path_[0]) return found; 6134 6135 if (path_[0] == delimiter) 6136 { 6137 // Absolute path; e.g. '/foo/bar' 6138 found = found.root(); 6139 ++path_; 6140 } 6141 6142 const char_t* path_segment = path_; 6143 6144 while (*path_segment == delimiter) ++path_segment; 6145 6146 const char_t* path_segment_end = path_segment; 6147 6148 while (*path_segment_end && *path_segment_end != delimiter) ++path_segment_end; 6149 6150 if (path_segment == path_segment_end) return found; 6151 6152 const char_t* next_segment = path_segment_end; 6153 6154 while (*next_segment == delimiter) ++next_segment; 6155 6156 if (*path_segment == '.' && path_segment + 1 == path_segment_end) 6157 return found.first_element_by_path(next_segment, delimiter); 6158 else if (*path_segment == '.' && *(path_segment+1) == '.' && path_segment + 2 == path_segment_end) 6159 return found.parent().first_element_by_path(next_segment, delimiter); 6160 else 6161 { 6162 for (xml_node_struct* j = found._root->first_child; j; j = j->next_sibling) 6163 { 6164 if (j->name && impl::strequalrange(j->name, path_segment, static_cast<size_t>(path_segment_end - path_segment))) 6165 { 6166 xml_node subsearch = xml_node(j).first_element_by_path(next_segment, delimiter); 6167 6168 if (subsearch) return subsearch; 6169 } 6170 } 6171 6172 return xml_node(); 6173 } 6174 } 6175 traverse(xml_tree_walker & walker)6176 PUGI__FN bool xml_node::traverse(xml_tree_walker& walker) 6177 { 6178 walker._depth = -1; 6179 6180 xml_node arg_begin = *this; 6181 if (!walker.begin(arg_begin)) return false; 6182 6183 xml_node cur = first_child(); 6184 6185 if (cur) 6186 { 6187 ++walker._depth; 6188 6189 do 6190 { 6191 xml_node arg_for_each = cur; 6192 if (!walker.for_each(arg_for_each)) 6193 return false; 6194 6195 if (cur.first_child()) 6196 { 6197 ++walker._depth; 6198 cur = cur.first_child(); 6199 } 6200 else if (cur.next_sibling()) 6201 cur = cur.next_sibling(); 6202 else 6203 { 6204 // Borland C++ workaround 6205 while (!cur.next_sibling() && cur != *this && !cur.parent().empty()) 6206 { 6207 --walker._depth; 6208 cur = cur.parent(); 6209 } 6210 6211 if (cur != *this) 6212 cur = cur.next_sibling(); 6213 } 6214 } 6215 while (cur && cur != *this); 6216 } 6217 6218 assert(walker._depth == -1); 6219 6220 xml_node arg_end = *this; 6221 return walker.end(arg_end); 6222 } 6223 hash_value() const6224 PUGI__FN size_t xml_node::hash_value() const 6225 { 6226 return static_cast<size_t>(reinterpret_cast<uintptr_t>(_root) / sizeof(xml_node_struct)); 6227 } 6228 internal_object() const6229 PUGI__FN xml_node_struct* xml_node::internal_object() const 6230 { 6231 return _root; 6232 } 6233 print(xml_writer & writer,const char_t * indent,unsigned int flags,xml_encoding encoding,unsigned int depth) const6234 PUGI__FN void xml_node::print(xml_writer& writer, const char_t* indent, unsigned int flags, xml_encoding encoding, unsigned int depth) const 6235 { 6236 if (!_root) return; 6237 6238 impl::xml_buffered_writer buffered_writer(writer, encoding); 6239 6240 impl::node_output(buffered_writer, _root, indent, flags, depth); 6241 6242 buffered_writer.flush(); 6243 } 6244 6245 #ifndef PUGIXML_NO_STL print(std::basic_ostream<char,std::char_traits<char>> & stream,const char_t * indent,unsigned int flags,xml_encoding encoding,unsigned int depth) const6246 PUGI__FN void xml_node::print(std::basic_ostream<char, std::char_traits<char> >& stream, const char_t* indent, unsigned int flags, xml_encoding encoding, unsigned int depth) const 6247 { 6248 xml_writer_stream writer(stream); 6249 6250 print(writer, indent, flags, encoding, depth); 6251 } 6252 print(std::basic_ostream<wchar_t,std::char_traits<wchar_t>> & stream,const char_t * indent,unsigned int flags,unsigned int depth) const6253 PUGI__FN void xml_node::print(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream, const char_t* indent, unsigned int flags, unsigned int depth) const 6254 { 6255 xml_writer_stream writer(stream); 6256 6257 print(writer, indent, flags, encoding_wchar, depth); 6258 } 6259 #endif 6260 offset_debug() const6261 PUGI__FN ptrdiff_t xml_node::offset_debug() const 6262 { 6263 if (!_root) return -1; 6264 6265 impl::xml_document_struct& doc = impl::get_document(_root); 6266 6267 // we can determine the offset reliably only if there is exactly once parse buffer 6268 if (!doc.buffer || doc.extra_buffers) return -1; 6269 6270 switch (type()) 6271 { 6272 case node_document: 6273 return 0; 6274 6275 case node_element: 6276 case node_declaration: 6277 case node_pi: 6278 return _root->name && (_root->header & impl::xml_memory_page_name_allocated_or_shared_mask) == 0 ? _root->name - doc.buffer : -1; 6279 6280 case node_pcdata: 6281 case node_cdata: 6282 case node_comment: 6283 case node_doctype: 6284 return _root->value && (_root->header & impl::xml_memory_page_value_allocated_or_shared_mask) == 0 ? _root->value - doc.buffer : -1; 6285 6286 default: 6287 return -1; 6288 } 6289 } 6290 6291 #ifdef __BORLANDC__ operator &&(const xml_node & lhs,bool rhs)6292 PUGI__FN bool operator&&(const xml_node& lhs, bool rhs) 6293 { 6294 return (bool)lhs && rhs; 6295 } 6296 operator ||(const xml_node & lhs,bool rhs)6297 PUGI__FN bool operator||(const xml_node& lhs, bool rhs) 6298 { 6299 return (bool)lhs || rhs; 6300 } 6301 #endif 6302 xml_text(xml_node_struct * root)6303 PUGI__FN xml_text::xml_text(xml_node_struct* root): _root(root) 6304 { 6305 } 6306 _data() const6307 PUGI__FN xml_node_struct* xml_text::_data() const 6308 { 6309 if (!_root || impl::is_text_node(_root)) return _root; 6310 6311 // element nodes can have value if parse_embed_pcdata was used 6312 if (PUGI__NODETYPE(_root) == node_element && _root->value) 6313 return _root; 6314 6315 for (xml_node_struct* node = _root->first_child; node; node = node->next_sibling) 6316 if (impl::is_text_node(node)) 6317 return node; 6318 6319 return 0; 6320 } 6321 _data_new()6322 PUGI__FN xml_node_struct* xml_text::_data_new() 6323 { 6324 xml_node_struct* d = _data(); 6325 if (d) return d; 6326 6327 return xml_node(_root).append_child(node_pcdata).internal_object(); 6328 } 6329 xml_text()6330 PUGI__FN xml_text::xml_text(): _root(0) 6331 { 6332 } 6333 unspecified_bool_xml_text(xml_text ***)6334 PUGI__FN static void unspecified_bool_xml_text(xml_text***) 6335 { 6336 } 6337 operator xml_text::unspecified_bool_type() const6338 PUGI__FN xml_text::operator xml_text::unspecified_bool_type() const 6339 { 6340 return _data() ? unspecified_bool_xml_text : 0; 6341 } 6342 operator !() const6343 PUGI__FN bool xml_text::operator!() const 6344 { 6345 return !_data(); 6346 } 6347 empty() const6348 PUGI__FN bool xml_text::empty() const 6349 { 6350 return _data() == 0; 6351 } 6352 get() const6353 PUGI__FN const char_t* xml_text::get() const 6354 { 6355 xml_node_struct* d = _data(); 6356 6357 return (d && d->value) ? d->value + 0 : PUGIXML_TEXT(""); 6358 } 6359 as_string(const char_t * def) const6360 PUGI__FN const char_t* xml_text::as_string(const char_t* def) const 6361 { 6362 xml_node_struct* d = _data(); 6363 6364 return (d && d->value) ? d->value + 0 : def; 6365 } 6366 as_int(int def) const6367 PUGI__FN int xml_text::as_int(int def) const 6368 { 6369 xml_node_struct* d = _data(); 6370 6371 return (d && d->value) ? impl::get_value_int(d->value) : def; 6372 } 6373 as_uint(unsigned int def) const6374 PUGI__FN unsigned int xml_text::as_uint(unsigned int def) const 6375 { 6376 xml_node_struct* d = _data(); 6377 6378 return (d && d->value) ? impl::get_value_uint(d->value) : def; 6379 } 6380 as_double(double def) const6381 PUGI__FN double xml_text::as_double(double def) const 6382 { 6383 xml_node_struct* d = _data(); 6384 6385 return (d && d->value) ? impl::get_value_double(d->value) : def; 6386 } 6387 as_float(float def) const6388 PUGI__FN float xml_text::as_float(float def) const 6389 { 6390 xml_node_struct* d = _data(); 6391 6392 return (d && d->value) ? impl::get_value_float(d->value) : def; 6393 } 6394 as_bool(bool def) const6395 PUGI__FN bool xml_text::as_bool(bool def) const 6396 { 6397 xml_node_struct* d = _data(); 6398 6399 return (d && d->value) ? impl::get_value_bool(d->value) : def; 6400 } 6401 6402 #ifdef PUGIXML_HAS_LONG_LONG as_llong(long long def) const6403 PUGI__FN long long xml_text::as_llong(long long def) const 6404 { 6405 xml_node_struct* d = _data(); 6406 6407 return (d && d->value) ? impl::get_value_llong(d->value) : def; 6408 } 6409 as_ullong(unsigned long long def) const6410 PUGI__FN unsigned long long xml_text::as_ullong(unsigned long long def) const 6411 { 6412 xml_node_struct* d = _data(); 6413 6414 return (d && d->value) ? impl::get_value_ullong(d->value) : def; 6415 } 6416 #endif 6417 set(const char_t * rhs)6418 PUGI__FN bool xml_text::set(const char_t* rhs) 6419 { 6420 xml_node_struct* dn = _data_new(); 6421 6422 return dn ? impl::strcpy_insitu(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, impl::strlength(rhs)) : false; 6423 } 6424 set(int rhs)6425 PUGI__FN bool xml_text::set(int rhs) 6426 { 6427 xml_node_struct* dn = _data_new(); 6428 6429 return dn ? impl::set_value_integer<unsigned int>(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0) : false; 6430 } 6431 set(unsigned int rhs)6432 PUGI__FN bool xml_text::set(unsigned int rhs) 6433 { 6434 xml_node_struct* dn = _data_new(); 6435 6436 return dn ? impl::set_value_integer<unsigned int>(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, false) : false; 6437 } 6438 set(long rhs)6439 PUGI__FN bool xml_text::set(long rhs) 6440 { 6441 xml_node_struct* dn = _data_new(); 6442 6443 return dn ? impl::set_value_integer<unsigned long>(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0) : false; 6444 } 6445 set(unsigned long rhs)6446 PUGI__FN bool xml_text::set(unsigned long rhs) 6447 { 6448 xml_node_struct* dn = _data_new(); 6449 6450 return dn ? impl::set_value_integer<unsigned long>(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, false) : false; 6451 } 6452 set(float rhs)6453 PUGI__FN bool xml_text::set(float rhs) 6454 { 6455 xml_node_struct* dn = _data_new(); 6456 6457 return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false; 6458 } 6459 set(double rhs)6460 PUGI__FN bool xml_text::set(double rhs) 6461 { 6462 xml_node_struct* dn = _data_new(); 6463 6464 return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false; 6465 } 6466 set(bool rhs)6467 PUGI__FN bool xml_text::set(bool rhs) 6468 { 6469 xml_node_struct* dn = _data_new(); 6470 6471 return dn ? impl::set_value_bool(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false; 6472 } 6473 6474 #ifdef PUGIXML_HAS_LONG_LONG set(long long rhs)6475 PUGI__FN bool xml_text::set(long long rhs) 6476 { 6477 xml_node_struct* dn = _data_new(); 6478 6479 return dn ? impl::set_value_integer<unsigned long long>(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0) : false; 6480 } 6481 set(unsigned long long rhs)6482 PUGI__FN bool xml_text::set(unsigned long long rhs) 6483 { 6484 xml_node_struct* dn = _data_new(); 6485 6486 return dn ? impl::set_value_integer<unsigned long long>(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, false) : false; 6487 } 6488 #endif 6489 operator =(const char_t * rhs)6490 PUGI__FN xml_text& xml_text::operator=(const char_t* rhs) 6491 { 6492 set(rhs); 6493 return *this; 6494 } 6495 operator =(int rhs)6496 PUGI__FN xml_text& xml_text::operator=(int rhs) 6497 { 6498 set(rhs); 6499 return *this; 6500 } 6501 operator =(unsigned int rhs)6502 PUGI__FN xml_text& xml_text::operator=(unsigned int rhs) 6503 { 6504 set(rhs); 6505 return *this; 6506 } 6507 operator =(long rhs)6508 PUGI__FN xml_text& xml_text::operator=(long rhs) 6509 { 6510 set(rhs); 6511 return *this; 6512 } 6513 operator =(unsigned long rhs)6514 PUGI__FN xml_text& xml_text::operator=(unsigned long rhs) 6515 { 6516 set(rhs); 6517 return *this; 6518 } 6519 operator =(double rhs)6520 PUGI__FN xml_text& xml_text::operator=(double rhs) 6521 { 6522 set(rhs); 6523 return *this; 6524 } 6525 operator =(float rhs)6526 PUGI__FN xml_text& xml_text::operator=(float rhs) 6527 { 6528 set(rhs); 6529 return *this; 6530 } 6531 operator =(bool rhs)6532 PUGI__FN xml_text& xml_text::operator=(bool rhs) 6533 { 6534 set(rhs); 6535 return *this; 6536 } 6537 6538 #ifdef PUGIXML_HAS_LONG_LONG operator =(long long rhs)6539 PUGI__FN xml_text& xml_text::operator=(long long rhs) 6540 { 6541 set(rhs); 6542 return *this; 6543 } 6544 operator =(unsigned long long rhs)6545 PUGI__FN xml_text& xml_text::operator=(unsigned long long rhs) 6546 { 6547 set(rhs); 6548 return *this; 6549 } 6550 #endif 6551 data() const6552 PUGI__FN xml_node xml_text::data() const 6553 { 6554 return xml_node(_data()); 6555 } 6556 6557 #ifdef __BORLANDC__ operator &&(const xml_text & lhs,bool rhs)6558 PUGI__FN bool operator&&(const xml_text& lhs, bool rhs) 6559 { 6560 return (bool)lhs && rhs; 6561 } 6562 operator ||(const xml_text & lhs,bool rhs)6563 PUGI__FN bool operator||(const xml_text& lhs, bool rhs) 6564 { 6565 return (bool)lhs || rhs; 6566 } 6567 #endif 6568 xml_node_iterator()6569 PUGI__FN xml_node_iterator::xml_node_iterator() 6570 { 6571 } 6572 xml_node_iterator(const xml_node & node)6573 PUGI__FN xml_node_iterator::xml_node_iterator(const xml_node& node): _wrap(node), _parent(node.parent()) 6574 { 6575 } 6576 xml_node_iterator(xml_node_struct * ref,xml_node_struct * parent)6577 PUGI__FN xml_node_iterator::xml_node_iterator(xml_node_struct* ref, xml_node_struct* parent): _wrap(ref), _parent(parent) 6578 { 6579 } 6580 operator ==(const xml_node_iterator & rhs) const6581 PUGI__FN bool xml_node_iterator::operator==(const xml_node_iterator& rhs) const 6582 { 6583 return _wrap._root == rhs._wrap._root && _parent._root == rhs._parent._root; 6584 } 6585 operator !=(const xml_node_iterator & rhs) const6586 PUGI__FN bool xml_node_iterator::operator!=(const xml_node_iterator& rhs) const 6587 { 6588 return _wrap._root != rhs._wrap._root || _parent._root != rhs._parent._root; 6589 } 6590 operator *() const6591 PUGI__FN xml_node& xml_node_iterator::operator*() const 6592 { 6593 assert(_wrap._root); 6594 return _wrap; 6595 } 6596 operator ->() const6597 PUGI__FN xml_node* xml_node_iterator::operator->() const 6598 { 6599 assert(_wrap._root); 6600 return const_cast<xml_node*>(&_wrap); // BCC5 workaround 6601 } 6602 operator ++()6603 PUGI__FN const xml_node_iterator& xml_node_iterator::operator++() 6604 { 6605 assert(_wrap._root); 6606 _wrap._root = _wrap._root->next_sibling; 6607 return *this; 6608 } 6609 operator ++(int)6610 PUGI__FN xml_node_iterator xml_node_iterator::operator++(int) 6611 { 6612 xml_node_iterator temp = *this; 6613 ++*this; 6614 return temp; 6615 } 6616 operator --()6617 PUGI__FN const xml_node_iterator& xml_node_iterator::operator--() 6618 { 6619 _wrap = _wrap._root ? _wrap.previous_sibling() : _parent.last_child(); 6620 return *this; 6621 } 6622 operator --(int)6623 PUGI__FN xml_node_iterator xml_node_iterator::operator--(int) 6624 { 6625 xml_node_iterator temp = *this; 6626 --*this; 6627 return temp; 6628 } 6629 xml_attribute_iterator()6630 PUGI__FN xml_attribute_iterator::xml_attribute_iterator() 6631 { 6632 } 6633 xml_attribute_iterator(const xml_attribute & attr,const xml_node & parent)6634 PUGI__FN xml_attribute_iterator::xml_attribute_iterator(const xml_attribute& attr, const xml_node& parent): _wrap(attr), _parent(parent) 6635 { 6636 } 6637 xml_attribute_iterator(xml_attribute_struct * ref,xml_node_struct * parent)6638 PUGI__FN xml_attribute_iterator::xml_attribute_iterator(xml_attribute_struct* ref, xml_node_struct* parent): _wrap(ref), _parent(parent) 6639 { 6640 } 6641 operator ==(const xml_attribute_iterator & rhs) const6642 PUGI__FN bool xml_attribute_iterator::operator==(const xml_attribute_iterator& rhs) const 6643 { 6644 return _wrap._attr == rhs._wrap._attr && _parent._root == rhs._parent._root; 6645 } 6646 operator !=(const xml_attribute_iterator & rhs) const6647 PUGI__FN bool xml_attribute_iterator::operator!=(const xml_attribute_iterator& rhs) const 6648 { 6649 return _wrap._attr != rhs._wrap._attr || _parent._root != rhs._parent._root; 6650 } 6651 operator *() const6652 PUGI__FN xml_attribute& xml_attribute_iterator::operator*() const 6653 { 6654 assert(_wrap._attr); 6655 return _wrap; 6656 } 6657 operator ->() const6658 PUGI__FN xml_attribute* xml_attribute_iterator::operator->() const 6659 { 6660 assert(_wrap._attr); 6661 return const_cast<xml_attribute*>(&_wrap); // BCC5 workaround 6662 } 6663 operator ++()6664 PUGI__FN const xml_attribute_iterator& xml_attribute_iterator::operator++() 6665 { 6666 assert(_wrap._attr); 6667 _wrap._attr = _wrap._attr->next_attribute; 6668 return *this; 6669 } 6670 operator ++(int)6671 PUGI__FN xml_attribute_iterator xml_attribute_iterator::operator++(int) 6672 { 6673 xml_attribute_iterator temp = *this; 6674 ++*this; 6675 return temp; 6676 } 6677 operator --()6678 PUGI__FN const xml_attribute_iterator& xml_attribute_iterator::operator--() 6679 { 6680 _wrap = _wrap._attr ? _wrap.previous_attribute() : _parent.last_attribute(); 6681 return *this; 6682 } 6683 operator --(int)6684 PUGI__FN xml_attribute_iterator xml_attribute_iterator::operator--(int) 6685 { 6686 xml_attribute_iterator temp = *this; 6687 --*this; 6688 return temp; 6689 } 6690 xml_named_node_iterator()6691 PUGI__FN xml_named_node_iterator::xml_named_node_iterator(): _name(0) 6692 { 6693 } 6694 xml_named_node_iterator(const xml_node & node,const char_t * name)6695 PUGI__FN xml_named_node_iterator::xml_named_node_iterator(const xml_node& node, const char_t* name): _wrap(node), _parent(node.parent()), _name(name) 6696 { 6697 } 6698 xml_named_node_iterator(xml_node_struct * ref,xml_node_struct * parent,const char_t * name)6699 PUGI__FN xml_named_node_iterator::xml_named_node_iterator(xml_node_struct* ref, xml_node_struct* parent, const char_t* name): _wrap(ref), _parent(parent), _name(name) 6700 { 6701 } 6702 operator ==(const xml_named_node_iterator & rhs) const6703 PUGI__FN bool xml_named_node_iterator::operator==(const xml_named_node_iterator& rhs) const 6704 { 6705 return _wrap._root == rhs._wrap._root && _parent._root == rhs._parent._root; 6706 } 6707 operator !=(const xml_named_node_iterator & rhs) const6708 PUGI__FN bool xml_named_node_iterator::operator!=(const xml_named_node_iterator& rhs) const 6709 { 6710 return _wrap._root != rhs._wrap._root || _parent._root != rhs._parent._root; 6711 } 6712 operator *() const6713 PUGI__FN xml_node& xml_named_node_iterator::operator*() const 6714 { 6715 assert(_wrap._root); 6716 return _wrap; 6717 } 6718 operator ->() const6719 PUGI__FN xml_node* xml_named_node_iterator::operator->() const 6720 { 6721 assert(_wrap._root); 6722 return const_cast<xml_node*>(&_wrap); // BCC5 workaround 6723 } 6724 operator ++()6725 PUGI__FN const xml_named_node_iterator& xml_named_node_iterator::operator++() 6726 { 6727 assert(_wrap._root); 6728 _wrap = _wrap.next_sibling(_name); 6729 return *this; 6730 } 6731 operator ++(int)6732 PUGI__FN xml_named_node_iterator xml_named_node_iterator::operator++(int) 6733 { 6734 xml_named_node_iterator temp = *this; 6735 ++*this; 6736 return temp; 6737 } 6738 operator --()6739 PUGI__FN const xml_named_node_iterator& xml_named_node_iterator::operator--() 6740 { 6741 if (_wrap._root) 6742 _wrap = _wrap.previous_sibling(_name); 6743 else 6744 { 6745 _wrap = _parent.last_child(); 6746 6747 if (!impl::strequal(_wrap.name(), _name)) 6748 _wrap = _wrap.previous_sibling(_name); 6749 } 6750 6751 return *this; 6752 } 6753 operator --(int)6754 PUGI__FN xml_named_node_iterator xml_named_node_iterator::operator--(int) 6755 { 6756 xml_named_node_iterator temp = *this; 6757 --*this; 6758 return temp; 6759 } 6760 xml_parse_result()6761 PUGI__FN xml_parse_result::xml_parse_result(): status(status_internal_error), offset(0), encoding(encoding_auto) 6762 { 6763 } 6764 operator bool() const6765 PUGI__FN xml_parse_result::operator bool() const 6766 { 6767 return status == status_ok; 6768 } 6769 description() const6770 PUGI__FN const char* xml_parse_result::description() const 6771 { 6772 switch (status) 6773 { 6774 case status_ok: return "No error"; 6775 6776 case status_file_not_found: return "File was not found"; 6777 case status_io_error: return "Error reading from file/stream"; 6778 case status_out_of_memory: return "Could not allocate memory"; 6779 case status_internal_error: return "Internal error occurred"; 6780 6781 case status_unrecognized_tag: return "Could not determine tag type"; 6782 6783 case status_bad_pi: return "Error parsing document declaration/processing instruction"; 6784 case status_bad_comment: return "Error parsing comment"; 6785 case status_bad_cdata: return "Error parsing CDATA section"; 6786 case status_bad_doctype: return "Error parsing document type declaration"; 6787 case status_bad_pcdata: return "Error parsing PCDATA section"; 6788 case status_bad_start_element: return "Error parsing start element tag"; 6789 case status_bad_attribute: return "Error parsing element attribute"; 6790 case status_bad_end_element: return "Error parsing end element tag"; 6791 case status_end_element_mismatch: return "Start-end tags mismatch"; 6792 6793 case status_append_invalid_root: return "Unable to append nodes: root is not an element or document"; 6794 6795 case status_no_document_element: return "No document element found"; 6796 6797 default: return "Unknown error"; 6798 } 6799 } 6800 xml_document()6801 PUGI__FN xml_document::xml_document(): _buffer(0) 6802 { 6803 _create(); 6804 } 6805 ~xml_document()6806 PUGI__FN xml_document::~xml_document() 6807 { 6808 _destroy(); 6809 } 6810 reset()6811 PUGI__FN void xml_document::reset() 6812 { 6813 _destroy(); 6814 _create(); 6815 } 6816 reset(const xml_document & proto)6817 PUGI__FN void xml_document::reset(const xml_document& proto) 6818 { 6819 reset(); 6820 6821 for (xml_node cur = proto.first_child(); cur; cur = cur.next_sibling()) 6822 append_copy(cur); 6823 } 6824 _create()6825 PUGI__FN void xml_document::_create() 6826 { 6827 assert(!_root); 6828 6829 #ifdef PUGIXML_COMPACT 6830 const size_t page_offset = sizeof(uint32_t); 6831 #else 6832 const size_t page_offset = 0; 6833 #endif 6834 6835 // initialize sentinel page 6836 PUGI__STATIC_ASSERT(sizeof(impl::xml_memory_page) + sizeof(impl::xml_document_struct) + page_offset <= sizeof(_memory)); 6837 6838 // prepare page structure 6839 impl::xml_memory_page* page = impl::xml_memory_page::construct(_memory); 6840 assert(page); 6841 6842 page->busy_size = impl::xml_memory_page_size; 6843 6844 // setup first page marker 6845 #ifdef PUGIXML_COMPACT 6846 // round-trip through void* to avoid 'cast increases required alignment of target type' warning 6847 page->compact_page_marker = reinterpret_cast<uint32_t*>(static_cast<void*>(reinterpret_cast<char*>(page) + sizeof(impl::xml_memory_page))); 6848 *page->compact_page_marker = sizeof(impl::xml_memory_page); 6849 #endif 6850 6851 // allocate new root 6852 _root = new (reinterpret_cast<char*>(page) + sizeof(impl::xml_memory_page) + page_offset) impl::xml_document_struct(page); 6853 _root->prev_sibling_c = _root; 6854 6855 // setup sentinel page 6856 page->allocator = static_cast<impl::xml_document_struct*>(_root); 6857 6858 // setup hash table pointer in allocator 6859 #ifdef PUGIXML_COMPACT 6860 page->allocator->_hash = &static_cast<impl::xml_document_struct*>(_root)->hash; 6861 #endif 6862 6863 // verify the document allocation 6864 assert(reinterpret_cast<char*>(_root) + sizeof(impl::xml_document_struct) <= _memory + sizeof(_memory)); 6865 } 6866 _destroy()6867 PUGI__FN void xml_document::_destroy() 6868 { 6869 assert(_root); 6870 6871 // destroy static storage 6872 if (_buffer) 6873 { 6874 impl::xml_memory::deallocate(_buffer); 6875 _buffer = 0; 6876 } 6877 6878 // destroy extra buffers (note: no need to destroy linked list nodes, they're allocated using document allocator) 6879 for (impl::xml_extra_buffer* extra = static_cast<impl::xml_document_struct*>(_root)->extra_buffers; extra; extra = extra->next) 6880 { 6881 if (extra->buffer) impl::xml_memory::deallocate(extra->buffer); 6882 } 6883 6884 // destroy dynamic storage, leave sentinel page (it's in static memory) 6885 impl::xml_memory_page* root_page = PUGI__GETPAGE(_root); 6886 assert(root_page && !root_page->prev); 6887 assert(reinterpret_cast<char*>(root_page) >= _memory && reinterpret_cast<char*>(root_page) < _memory + sizeof(_memory)); 6888 6889 for (impl::xml_memory_page* page = root_page->next; page; ) 6890 { 6891 impl::xml_memory_page* next = page->next; 6892 6893 impl::xml_allocator::deallocate_page(page); 6894 6895 page = next; 6896 } 6897 6898 #ifdef PUGIXML_COMPACT 6899 // destroy hash table 6900 static_cast<impl::xml_document_struct*>(_root)->hash.clear(); 6901 #endif 6902 6903 _root = 0; 6904 } 6905 6906 #ifndef PUGIXML_NO_STL load(std::basic_istream<char,std::char_traits<char>> & stream,unsigned int options,xml_encoding encoding)6907 PUGI__FN xml_parse_result xml_document::load(std::basic_istream<char, std::char_traits<char> >& stream, unsigned int options, xml_encoding encoding) 6908 { 6909 reset(); 6910 6911 return impl::load_stream_impl(static_cast<impl::xml_document_struct*>(_root), stream, options, encoding, &_buffer); 6912 } 6913 load(std::basic_istream<wchar_t,std::char_traits<wchar_t>> & stream,unsigned int options)6914 PUGI__FN xml_parse_result xml_document::load(std::basic_istream<wchar_t, std::char_traits<wchar_t> >& stream, unsigned int options) 6915 { 6916 reset(); 6917 6918 return impl::load_stream_impl(static_cast<impl::xml_document_struct*>(_root), stream, options, encoding_wchar, &_buffer); 6919 } 6920 #endif 6921 load_string(const char_t * contents,unsigned int options)6922 PUGI__FN xml_parse_result xml_document::load_string(const char_t* contents, unsigned int options) 6923 { 6924 // Force native encoding (skip autodetection) 6925 #ifdef PUGIXML_WCHAR_MODE 6926 xml_encoding encoding = encoding_wchar; 6927 #else 6928 xml_encoding encoding = encoding_utf8; 6929 #endif 6930 6931 return load_buffer(contents, impl::strlength(contents) * sizeof(char_t), options, encoding); 6932 } 6933 load(const char_t * contents,unsigned int options)6934 PUGI__FN xml_parse_result xml_document::load(const char_t* contents, unsigned int options) 6935 { 6936 return load_string(contents, options); 6937 } 6938 load_file(const char * path_,unsigned int options,xml_encoding encoding)6939 PUGI__FN xml_parse_result xml_document::load_file(const char* path_, unsigned int options, xml_encoding encoding) 6940 { 6941 reset(); 6942 6943 using impl::auto_deleter; // MSVC7 workaround 6944 auto_deleter<FILE> file(fopen(path_, "rb"), impl::close_file); 6945 6946 return impl::load_file_impl(static_cast<impl::xml_document_struct*>(_root), file.data, options, encoding, &_buffer); 6947 } 6948 load_file(const wchar_t * path_,unsigned int options,xml_encoding encoding)6949 PUGI__FN xml_parse_result xml_document::load_file(const wchar_t* path_, unsigned int options, xml_encoding encoding) 6950 { 6951 reset(); 6952 6953 using impl::auto_deleter; // MSVC7 workaround 6954 auto_deleter<FILE> file(impl::open_file_wide(path_, L"rb"), impl::close_file); 6955 6956 return impl::load_file_impl(static_cast<impl::xml_document_struct*>(_root), file.data, options, encoding, &_buffer); 6957 } 6958 load_buffer(const void * contents,size_t size,unsigned int options,xml_encoding encoding)6959 PUGI__FN xml_parse_result xml_document::load_buffer(const void* contents, size_t size, unsigned int options, xml_encoding encoding) 6960 { 6961 reset(); 6962 6963 return impl::load_buffer_impl(static_cast<impl::xml_document_struct*>(_root), _root, const_cast<void*>(contents), size, options, encoding, false, false, &_buffer); 6964 } 6965 load_buffer_inplace(void * contents,size_t size,unsigned int options,xml_encoding encoding)6966 PUGI__FN xml_parse_result xml_document::load_buffer_inplace(void* contents, size_t size, unsigned int options, xml_encoding encoding) 6967 { 6968 reset(); 6969 6970 return impl::load_buffer_impl(static_cast<impl::xml_document_struct*>(_root), _root, contents, size, options, encoding, true, false, &_buffer); 6971 } 6972 load_buffer_inplace_own(void * contents,size_t size,unsigned int options,xml_encoding encoding)6973 PUGI__FN xml_parse_result xml_document::load_buffer_inplace_own(void* contents, size_t size, unsigned int options, xml_encoding encoding) 6974 { 6975 reset(); 6976 6977 return impl::load_buffer_impl(static_cast<impl::xml_document_struct*>(_root), _root, contents, size, options, encoding, true, true, &_buffer); 6978 } 6979 save(xml_writer & writer,const char_t * indent,unsigned int flags,xml_encoding encoding) const6980 PUGI__FN void xml_document::save(xml_writer& writer, const char_t* indent, unsigned int flags, xml_encoding encoding) const 6981 { 6982 impl::xml_buffered_writer buffered_writer(writer, encoding); 6983 6984 if ((flags & format_write_bom) && encoding != encoding_latin1) 6985 { 6986 // BOM always represents the codepoint U+FEFF, so just write it in native encoding 6987 #ifdef PUGIXML_WCHAR_MODE 6988 unsigned int bom = 0xfeff; 6989 buffered_writer.write(static_cast<wchar_t>(bom)); 6990 #else 6991 buffered_writer.write('\xef', '\xbb', '\xbf'); 6992 #endif 6993 } 6994 6995 if (!(flags & format_no_declaration) && !impl::has_declaration(_root)) 6996 { 6997 buffered_writer.write_string(PUGIXML_TEXT("<?xml version=\"1.0\"")); 6998 if (encoding == encoding_latin1) buffered_writer.write_string(PUGIXML_TEXT(" encoding=\"ISO-8859-1\"")); 6999 buffered_writer.write('?', '>'); 7000 if (!(flags & format_raw)) buffered_writer.write('\n'); 7001 } 7002 7003 impl::node_output(buffered_writer, _root, indent, flags, 0); 7004 7005 buffered_writer.flush(); 7006 } 7007 7008 #ifndef PUGIXML_NO_STL save(std::basic_ostream<char,std::char_traits<char>> & stream,const char_t * indent,unsigned int flags,xml_encoding encoding) const7009 PUGI__FN void xml_document::save(std::basic_ostream<char, std::char_traits<char> >& stream, const char_t* indent, unsigned int flags, xml_encoding encoding) const 7010 { 7011 xml_writer_stream writer(stream); 7012 7013 save(writer, indent, flags, encoding); 7014 } 7015 save(std::basic_ostream<wchar_t,std::char_traits<wchar_t>> & stream,const char_t * indent,unsigned int flags) const7016 PUGI__FN void xml_document::save(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream, const char_t* indent, unsigned int flags) const 7017 { 7018 xml_writer_stream writer(stream); 7019 7020 save(writer, indent, flags, encoding_wchar); 7021 } 7022 #endif 7023 save_file(const char * path_,const char_t * indent,unsigned int flags,xml_encoding encoding) const7024 PUGI__FN bool xml_document::save_file(const char* path_, const char_t* indent, unsigned int flags, xml_encoding encoding) const 7025 { 7026 using impl::auto_deleter; // MSVC7 workaround 7027 auto_deleter<FILE> file(fopen(path_, (flags & format_save_file_text) ? "w" : "wb"), impl::close_file); 7028 7029 return impl::save_file_impl(*this, file.data, indent, flags, encoding); 7030 } 7031 save_file(const wchar_t * path_,const char_t * indent,unsigned int flags,xml_encoding encoding) const7032 PUGI__FN bool xml_document::save_file(const wchar_t* path_, const char_t* indent, unsigned int flags, xml_encoding encoding) const 7033 { 7034 using impl::auto_deleter; // MSVC7 workaround 7035 auto_deleter<FILE> file(impl::open_file_wide(path_, (flags & format_save_file_text) ? L"w" : L"wb"), impl::close_file); 7036 7037 return impl::save_file_impl(*this, file.data, indent, flags, encoding); 7038 } 7039 document_element() const7040 PUGI__FN xml_node xml_document::document_element() const 7041 { 7042 assert(_root); 7043 7044 for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling) 7045 if (PUGI__NODETYPE(i) == node_element) 7046 return xml_node(i); 7047 7048 return xml_node(); 7049 } 7050 7051 #ifndef PUGIXML_NO_STL as_utf8(const wchar_t * str)7052 PUGI__FN std::string PUGIXML_FUNCTION as_utf8(const wchar_t* str) 7053 { 7054 assert(str); 7055 7056 return impl::as_utf8_impl(str, impl::strlength_wide(str)); 7057 } 7058 as_utf8(const std::basic_string<wchar_t> & str)7059 PUGI__FN std::string PUGIXML_FUNCTION as_utf8(const std::basic_string<wchar_t>& str) 7060 { 7061 return impl::as_utf8_impl(str.c_str(), str.size()); 7062 } 7063 as_wide(const char * str)7064 PUGI__FN std::basic_string<wchar_t> PUGIXML_FUNCTION as_wide(const char* str) 7065 { 7066 assert(str); 7067 7068 return impl::as_wide_impl(str, strlen(str)); 7069 } 7070 as_wide(const std::string & str)7071 PUGI__FN std::basic_string<wchar_t> PUGIXML_FUNCTION as_wide(const std::string& str) 7072 { 7073 return impl::as_wide_impl(str.c_str(), str.size()); 7074 } 7075 #endif 7076 set_memory_management_functions(allocation_function allocate,deallocation_function deallocate)7077 PUGI__FN void PUGIXML_FUNCTION set_memory_management_functions(allocation_function allocate, deallocation_function deallocate) 7078 { 7079 impl::xml_memory::allocate = allocate; 7080 impl::xml_memory::deallocate = deallocate; 7081 } 7082 get_memory_allocation_function()7083 PUGI__FN allocation_function PUGIXML_FUNCTION get_memory_allocation_function() 7084 { 7085 return impl::xml_memory::allocate; 7086 } 7087 get_memory_deallocation_function()7088 PUGI__FN deallocation_function PUGIXML_FUNCTION get_memory_deallocation_function() 7089 { 7090 return impl::xml_memory::deallocate; 7091 } 7092 } 7093 7094 #if !defined(PUGIXML_NO_STL) && (defined(_MSC_VER) || defined(__ICC)) 7095 namespace std 7096 { 7097 // Workarounds for (non-standard) iterator category detection for older versions (MSVC7/IC8 and earlier) _Iter_cat(const pugi::xml_node_iterator &)7098 PUGI__FN std::bidirectional_iterator_tag _Iter_cat(const pugi::xml_node_iterator&) 7099 { 7100 return std::bidirectional_iterator_tag(); 7101 } 7102 _Iter_cat(const pugi::xml_attribute_iterator &)7103 PUGI__FN std::bidirectional_iterator_tag _Iter_cat(const pugi::xml_attribute_iterator&) 7104 { 7105 return std::bidirectional_iterator_tag(); 7106 } 7107 _Iter_cat(const pugi::xml_named_node_iterator &)7108 PUGI__FN std::bidirectional_iterator_tag _Iter_cat(const pugi::xml_named_node_iterator&) 7109 { 7110 return std::bidirectional_iterator_tag(); 7111 } 7112 } 7113 #endif 7114 7115 #if !defined(PUGIXML_NO_STL) && defined(__SUNPRO_CC) 7116 namespace std 7117 { 7118 // Workarounds for (non-standard) iterator category detection __iterator_category(const pugi::xml_node_iterator &)7119 PUGI__FN std::bidirectional_iterator_tag __iterator_category(const pugi::xml_node_iterator&) 7120 { 7121 return std::bidirectional_iterator_tag(); 7122 } 7123 __iterator_category(const pugi::xml_attribute_iterator &)7124 PUGI__FN std::bidirectional_iterator_tag __iterator_category(const pugi::xml_attribute_iterator&) 7125 { 7126 return std::bidirectional_iterator_tag(); 7127 } 7128 __iterator_category(const pugi::xml_named_node_iterator &)7129 PUGI__FN std::bidirectional_iterator_tag __iterator_category(const pugi::xml_named_node_iterator&) 7130 { 7131 return std::bidirectional_iterator_tag(); 7132 } 7133 } 7134 #endif 7135 7136 #ifndef PUGIXML_NO_XPATH 7137 // STL replacements 7138 PUGI__NS_BEGIN 7139 struct equal_to 7140 { operator ()equal_to7141 template <typename T> bool operator()(const T& lhs, const T& rhs) const 7142 { 7143 return lhs == rhs; 7144 } 7145 }; 7146 7147 struct not_equal_to 7148 { operator ()not_equal_to7149 template <typename T> bool operator()(const T& lhs, const T& rhs) const 7150 { 7151 return lhs != rhs; 7152 } 7153 }; 7154 7155 struct less 7156 { operator ()less7157 template <typename T> bool operator()(const T& lhs, const T& rhs) const 7158 { 7159 return lhs < rhs; 7160 } 7161 }; 7162 7163 struct less_equal 7164 { operator ()less_equal7165 template <typename T> bool operator()(const T& lhs, const T& rhs) const 7166 { 7167 return lhs <= rhs; 7168 } 7169 }; 7170 swap(T & lhs,T & rhs)7171 template <typename T> void swap(T& lhs, T& rhs) 7172 { 7173 T temp = lhs; 7174 lhs = rhs; 7175 rhs = temp; 7176 } 7177 7178 template <typename I, typename Pred> I min_element(I begin, I end, const Pred& pred) 7179 { 7180 I result = begin; 7181 7182 for (I it = begin + 1; it != end; ++it) 7183 if (pred(*it, *result)) 7184 result = it; 7185 7186 return result; 7187 } 7188 reverse(I begin,I end)7189 template <typename I> void reverse(I begin, I end) 7190 { 7191 while (end - begin > 1) swap(*begin++, *--end); 7192 } 7193 unique(I begin,I end)7194 template <typename I> I unique(I begin, I end) 7195 { 7196 // fast skip head 7197 while (end - begin > 1 && *begin != *(begin + 1)) begin++; 7198 7199 if (begin == end) return begin; 7200 7201 // last written element 7202 I write = begin++; 7203 7204 // merge unique elements 7205 while (begin != end) 7206 { 7207 if (*begin != *write) 7208 *++write = *begin++; 7209 else 7210 begin++; 7211 } 7212 7213 // past-the-end (write points to live element) 7214 return write + 1; 7215 } 7216 copy_backwards(I begin,I end,I target)7217 template <typename I> void copy_backwards(I begin, I end, I target) 7218 { 7219 while (begin != end) *--target = *--end; 7220 } 7221 insertion_sort(I begin,I end,const Pred & pred,T *)7222 template <typename I, typename Pred, typename T> void insertion_sort(I begin, I end, const Pred& pred, T*) 7223 { 7224 assert(begin != end); 7225 7226 for (I it = begin + 1; it != end; ++it) 7227 { 7228 T val = *it; 7229 7230 if (pred(val, *begin)) 7231 { 7232 // move to front 7233 copy_backwards(begin, it, it + 1); 7234 *begin = val; 7235 } 7236 else 7237 { 7238 I hole = it; 7239 7240 // move hole backwards 7241 while (pred(val, *(hole - 1))) 7242 { 7243 *hole = *(hole - 1); 7244 hole--; 7245 } 7246 7247 // fill hole with element 7248 *hole = val; 7249 } 7250 } 7251 } 7252 7253 // std variant for elements with == partition(I begin,I middle,I end,const Pred & pred,I * out_eqbeg,I * out_eqend)7254 template <typename I, typename Pred> void partition(I begin, I middle, I end, const Pred& pred, I* out_eqbeg, I* out_eqend) 7255 { 7256 I eqbeg = middle, eqend = middle + 1; 7257 7258 // expand equal range 7259 while (eqbeg != begin && *(eqbeg - 1) == *eqbeg) --eqbeg; 7260 while (eqend != end && *eqend == *eqbeg) ++eqend; 7261 7262 // process outer elements 7263 I ltend = eqbeg, gtbeg = eqend; 7264 7265 for (;;) 7266 { 7267 // find the element from the right side that belongs to the left one 7268 for (; gtbeg != end; ++gtbeg) 7269 if (!pred(*eqbeg, *gtbeg)) 7270 { 7271 if (*gtbeg == *eqbeg) swap(*gtbeg, *eqend++); 7272 else break; 7273 } 7274 7275 // find the element from the left side that belongs to the right one 7276 for (; ltend != begin; --ltend) 7277 if (!pred(*(ltend - 1), *eqbeg)) 7278 { 7279 if (*eqbeg == *(ltend - 1)) swap(*(ltend - 1), *--eqbeg); 7280 else break; 7281 } 7282 7283 // scanned all elements 7284 if (gtbeg == end && ltend == begin) 7285 { 7286 *out_eqbeg = eqbeg; 7287 *out_eqend = eqend; 7288 return; 7289 } 7290 7291 // make room for elements by moving equal area 7292 if (gtbeg == end) 7293 { 7294 if (--ltend != --eqbeg) swap(*ltend, *eqbeg); 7295 swap(*eqbeg, *--eqend); 7296 } 7297 else if (ltend == begin) 7298 { 7299 if (eqend != gtbeg) swap(*eqbeg, *eqend); 7300 ++eqend; 7301 swap(*gtbeg++, *eqbeg++); 7302 } 7303 else swap(*gtbeg++, *--ltend); 7304 } 7305 } 7306 median3(I first,I middle,I last,const Pred & pred)7307 template <typename I, typename Pred> void median3(I first, I middle, I last, const Pred& pred) 7308 { 7309 if (pred(*middle, *first)) swap(*middle, *first); 7310 if (pred(*last, *middle)) swap(*last, *middle); 7311 if (pred(*middle, *first)) swap(*middle, *first); 7312 } 7313 median(I first,I middle,I last,const Pred & pred)7314 template <typename I, typename Pred> void median(I first, I middle, I last, const Pred& pred) 7315 { 7316 if (last - first <= 40) 7317 { 7318 // median of three for small chunks 7319 median3(first, middle, last, pred); 7320 } 7321 else 7322 { 7323 // median of nine 7324 size_t step = (last - first + 1) / 8; 7325 7326 median3(first, first + step, first + 2 * step, pred); 7327 median3(middle - step, middle, middle + step, pred); 7328 median3(last - 2 * step, last - step, last, pred); 7329 median3(first + step, middle, last - step, pred); 7330 } 7331 } 7332 sort(I begin,I end,const Pred & pred)7333 template <typename I, typename Pred> void sort(I begin, I end, const Pred& pred) 7334 { 7335 // sort large chunks 7336 while (end - begin > 32) 7337 { 7338 // find median element 7339 I middle = begin + (end - begin) / 2; 7340 median(begin, middle, end - 1, pred); 7341 7342 // partition in three chunks (< = >) 7343 I eqbeg, eqend; 7344 partition(begin, middle, end, pred, &eqbeg, &eqend); 7345 7346 // loop on larger half 7347 if (eqbeg - begin > end - eqend) 7348 { 7349 sort(eqend, end, pred); 7350 end = eqbeg; 7351 } 7352 else 7353 { 7354 sort(begin, eqbeg, pred); 7355 begin = eqend; 7356 } 7357 } 7358 7359 // insertion sort small chunk 7360 if (begin != end) insertion_sort(begin, end, pred, &*begin); 7361 } 7362 PUGI__NS_END 7363 7364 // Allocator used for AST and evaluation stacks 7365 PUGI__NS_BEGIN 7366 static const size_t xpath_memory_page_size = 7367 #ifdef PUGIXML_MEMORY_XPATH_PAGE_SIZE 7368 PUGIXML_MEMORY_XPATH_PAGE_SIZE 7369 #else 7370 4096 7371 #endif 7372 ; 7373 7374 static const uintptr_t xpath_memory_block_alignment = sizeof(double) > sizeof(void*) ? sizeof(double) : sizeof(void*); 7375 7376 struct xpath_memory_block 7377 { 7378 xpath_memory_block* next; 7379 size_t capacity; 7380 7381 union 7382 { 7383 char data[xpath_memory_page_size]; 7384 double alignment; 7385 }; 7386 }; 7387 7388 class xpath_allocator 7389 { 7390 xpath_memory_block* _root; 7391 size_t _root_size; 7392 7393 public: 7394 #ifdef PUGIXML_NO_EXCEPTIONS 7395 jmp_buf* error_handler; 7396 #endif 7397 xpath_allocator(xpath_memory_block * root,size_t root_size=0)7398 xpath_allocator(xpath_memory_block* root, size_t root_size = 0): _root(root), _root_size(root_size) 7399 { 7400 #ifdef PUGIXML_NO_EXCEPTIONS 7401 error_handler = 0; 7402 #endif 7403 } 7404 allocate_nothrow(size_t size)7405 void* allocate_nothrow(size_t size) 7406 { 7407 // round size up to block alignment boundary 7408 size = (size + xpath_memory_block_alignment - 1) & ~(xpath_memory_block_alignment - 1); 7409 7410 if (_root_size + size <= _root->capacity) 7411 { 7412 void* buf = &_root->data[0] + _root_size; 7413 _root_size += size; 7414 return buf; 7415 } 7416 else 7417 { 7418 // make sure we have at least 1/4th of the page free after allocation to satisfy subsequent allocation requests 7419 size_t block_capacity_base = sizeof(_root->data); 7420 size_t block_capacity_req = size + block_capacity_base / 4; 7421 size_t block_capacity = (block_capacity_base > block_capacity_req) ? block_capacity_base : block_capacity_req; 7422 7423 size_t block_size = block_capacity + offsetof(xpath_memory_block, data); 7424 7425 xpath_memory_block* block = static_cast<xpath_memory_block*>(xml_memory::allocate(block_size)); 7426 if (!block) return 0; 7427 7428 block->next = _root; 7429 block->capacity = block_capacity; 7430 7431 _root = block; 7432 _root_size = size; 7433 7434 return block->data; 7435 } 7436 } 7437 allocate(size_t size)7438 void* allocate(size_t size) 7439 { 7440 void* result = allocate_nothrow(size); 7441 7442 if (!result) 7443 { 7444 #ifdef PUGIXML_NO_EXCEPTIONS 7445 assert(error_handler); 7446 longjmp(*error_handler, 1); 7447 #else 7448 throw std::bad_alloc(); 7449 #endif 7450 } 7451 7452 return result; 7453 } 7454 reallocate(void * ptr,size_t old_size,size_t new_size)7455 void* reallocate(void* ptr, size_t old_size, size_t new_size) 7456 { 7457 // round size up to block alignment boundary 7458 old_size = (old_size + xpath_memory_block_alignment - 1) & ~(xpath_memory_block_alignment - 1); 7459 new_size = (new_size + xpath_memory_block_alignment - 1) & ~(xpath_memory_block_alignment - 1); 7460 7461 // we can only reallocate the last object 7462 assert(ptr == 0 || static_cast<char*>(ptr) + old_size == &_root->data[0] + _root_size); 7463 7464 // adjust root size so that we have not allocated the object at all 7465 bool only_object = (_root_size == old_size); 7466 7467 if (ptr) _root_size -= old_size; 7468 7469 // allocate a new version (this will obviously reuse the memory if possible) 7470 void* result = allocate(new_size); 7471 assert(result); 7472 7473 // we have a new block 7474 if (result != ptr && ptr) 7475 { 7476 // copy old data 7477 assert(new_size >= old_size); 7478 memcpy(result, ptr, old_size); 7479 7480 // free the previous page if it had no other objects 7481 if (only_object) 7482 { 7483 assert(_root->data == result); 7484 assert(_root->next); 7485 7486 xpath_memory_block* next = _root->next->next; 7487 7488 if (next) 7489 { 7490 // deallocate the whole page, unless it was the first one 7491 xml_memory::deallocate(_root->next); 7492 _root->next = next; 7493 } 7494 } 7495 } 7496 7497 return result; 7498 } 7499 revert(const xpath_allocator & state)7500 void revert(const xpath_allocator& state) 7501 { 7502 // free all new pages 7503 xpath_memory_block* cur = _root; 7504 7505 while (cur != state._root) 7506 { 7507 xpath_memory_block* next = cur->next; 7508 7509 xml_memory::deallocate(cur); 7510 7511 cur = next; 7512 } 7513 7514 // restore state 7515 _root = state._root; 7516 _root_size = state._root_size; 7517 } 7518 release()7519 void release() 7520 { 7521 xpath_memory_block* cur = _root; 7522 assert(cur); 7523 7524 while (cur->next) 7525 { 7526 xpath_memory_block* next = cur->next; 7527 7528 xml_memory::deallocate(cur); 7529 7530 cur = next; 7531 } 7532 } 7533 }; 7534 7535 struct xpath_allocator_capture 7536 { xpath_allocator_capturexpath_allocator_capture7537 xpath_allocator_capture(xpath_allocator* alloc): _target(alloc), _state(*alloc) 7538 { 7539 } 7540 ~xpath_allocator_capturexpath_allocator_capture7541 ~xpath_allocator_capture() 7542 { 7543 _target->revert(_state); 7544 } 7545 7546 xpath_allocator* _target; 7547 xpath_allocator _state; 7548 }; 7549 7550 struct xpath_stack 7551 { 7552 xpath_allocator* result; 7553 xpath_allocator* temp; 7554 }; 7555 7556 struct xpath_stack_data 7557 { 7558 xpath_memory_block blocks[2]; 7559 xpath_allocator result; 7560 xpath_allocator temp; 7561 xpath_stack stack; 7562 7563 #ifdef PUGIXML_NO_EXCEPTIONS 7564 jmp_buf error_handler; 7565 #endif 7566 xpath_stack_dataxpath_stack_data7567 xpath_stack_data(): result(blocks + 0), temp(blocks + 1) 7568 { 7569 blocks[0].next = blocks[1].next = 0; 7570 blocks[0].capacity = blocks[1].capacity = sizeof(blocks[0].data); 7571 7572 stack.result = &result; 7573 stack.temp = &temp; 7574 7575 #ifdef PUGIXML_NO_EXCEPTIONS 7576 result.error_handler = temp.error_handler = &error_handler; 7577 #endif 7578 } 7579 ~xpath_stack_dataxpath_stack_data7580 ~xpath_stack_data() 7581 { 7582 result.release(); 7583 temp.release(); 7584 } 7585 }; 7586 PUGI__NS_END 7587 7588 // String class 7589 PUGI__NS_BEGIN 7590 class xpath_string 7591 { 7592 const char_t* _buffer; 7593 bool _uses_heap; 7594 size_t _length_heap; 7595 duplicate_string(const char_t * string,size_t length,xpath_allocator * alloc)7596 static char_t* duplicate_string(const char_t* string, size_t length, xpath_allocator* alloc) 7597 { 7598 char_t* result = static_cast<char_t*>(alloc->allocate((length + 1) * sizeof(char_t))); 7599 assert(result); 7600 7601 memcpy(result, string, length * sizeof(char_t)); 7602 result[length] = 0; 7603 7604 return result; 7605 } 7606 xpath_string(const char_t * buffer,bool uses_heap_,size_t length_heap)7607 xpath_string(const char_t* buffer, bool uses_heap_, size_t length_heap): _buffer(buffer), _uses_heap(uses_heap_), _length_heap(length_heap) 7608 { 7609 } 7610 7611 public: from_const(const char_t * str)7612 static xpath_string from_const(const char_t* str) 7613 { 7614 return xpath_string(str, false, 0); 7615 } 7616 from_heap_preallocated(const char_t * begin,const char_t * end)7617 static xpath_string from_heap_preallocated(const char_t* begin, const char_t* end) 7618 { 7619 assert(begin <= end && *end == 0); 7620 7621 return xpath_string(begin, true, static_cast<size_t>(end - begin)); 7622 } 7623 from_heap(const char_t * begin,const char_t * end,xpath_allocator * alloc)7624 static xpath_string from_heap(const char_t* begin, const char_t* end, xpath_allocator* alloc) 7625 { 7626 assert(begin <= end); 7627 7628 size_t length = static_cast<size_t>(end - begin); 7629 7630 return length == 0 ? xpath_string() : xpath_string(duplicate_string(begin, length, alloc), true, length); 7631 } 7632 xpath_string()7633 xpath_string(): _buffer(PUGIXML_TEXT("")), _uses_heap(false), _length_heap(0) 7634 { 7635 } 7636 append(const xpath_string & o,xpath_allocator * alloc)7637 void append(const xpath_string& o, xpath_allocator* alloc) 7638 { 7639 // skip empty sources 7640 if (!*o._buffer) return; 7641 7642 // fast append for constant empty target and constant source 7643 if (!*_buffer && !_uses_heap && !o._uses_heap) 7644 { 7645 _buffer = o._buffer; 7646 } 7647 else 7648 { 7649 // need to make heap copy 7650 size_t target_length = length(); 7651 size_t source_length = o.length(); 7652 size_t result_length = target_length + source_length; 7653 7654 // allocate new buffer 7655 char_t* result = static_cast<char_t*>(alloc->reallocate(_uses_heap ? const_cast<char_t*>(_buffer) : 0, (target_length + 1) * sizeof(char_t), (result_length + 1) * sizeof(char_t))); 7656 assert(result); 7657 7658 // append first string to the new buffer in case there was no reallocation 7659 if (!_uses_heap) memcpy(result, _buffer, target_length * sizeof(char_t)); 7660 7661 // append second string to the new buffer 7662 memcpy(result + target_length, o._buffer, source_length * sizeof(char_t)); 7663 result[result_length] = 0; 7664 7665 // finalize 7666 _buffer = result; 7667 _uses_heap = true; 7668 _length_heap = result_length; 7669 } 7670 } 7671 c_str() const7672 const char_t* c_str() const 7673 { 7674 return _buffer; 7675 } 7676 length() const7677 size_t length() const 7678 { 7679 return _uses_heap ? _length_heap : strlength(_buffer); 7680 } 7681 data(xpath_allocator * alloc)7682 char_t* data(xpath_allocator* alloc) 7683 { 7684 // make private heap copy 7685 if (!_uses_heap) 7686 { 7687 size_t length_ = strlength(_buffer); 7688 7689 _buffer = duplicate_string(_buffer, length_, alloc); 7690 _uses_heap = true; 7691 _length_heap = length_; 7692 } 7693 7694 return const_cast<char_t*>(_buffer); 7695 } 7696 empty() const7697 bool empty() const 7698 { 7699 return *_buffer == 0; 7700 } 7701 operator ==(const xpath_string & o) const7702 bool operator==(const xpath_string& o) const 7703 { 7704 return strequal(_buffer, o._buffer); 7705 } 7706 operator !=(const xpath_string & o) const7707 bool operator!=(const xpath_string& o) const 7708 { 7709 return !strequal(_buffer, o._buffer); 7710 } 7711 uses_heap() const7712 bool uses_heap() const 7713 { 7714 return _uses_heap; 7715 } 7716 }; 7717 PUGI__NS_END 7718 7719 PUGI__NS_BEGIN starts_with(const char_t * string,const char_t * pattern)7720 PUGI__FN bool starts_with(const char_t* string, const char_t* pattern) 7721 { 7722 while (*pattern && *string == *pattern) 7723 { 7724 string++; 7725 pattern++; 7726 } 7727 7728 return *pattern == 0; 7729 } 7730 find_char(const char_t * s,char_t c)7731 PUGI__FN const char_t* find_char(const char_t* s, char_t c) 7732 { 7733 #ifdef PUGIXML_WCHAR_MODE 7734 return wcschr(s, c); 7735 #else 7736 return strchr(s, c); 7737 #endif 7738 } 7739 find_substring(const char_t * s,const char_t * p)7740 PUGI__FN const char_t* find_substring(const char_t* s, const char_t* p) 7741 { 7742 #ifdef PUGIXML_WCHAR_MODE 7743 // MSVC6 wcsstr bug workaround (if s is empty it always returns 0) 7744 return (*p == 0) ? s : wcsstr(s, p); 7745 #else 7746 return strstr(s, p); 7747 #endif 7748 } 7749 7750 // Converts symbol to lower case, if it is an ASCII one tolower_ascii(char_t ch)7751 PUGI__FN char_t tolower_ascii(char_t ch) 7752 { 7753 return static_cast<unsigned int>(ch - 'A') < 26 ? static_cast<char_t>(ch | ' ') : ch; 7754 } 7755 string_value(const xpath_node & na,xpath_allocator * alloc)7756 PUGI__FN xpath_string string_value(const xpath_node& na, xpath_allocator* alloc) 7757 { 7758 if (na.attribute()) 7759 return xpath_string::from_const(na.attribute().value()); 7760 else 7761 { 7762 xml_node n = na.node(); 7763 7764 switch (n.type()) 7765 { 7766 case node_pcdata: 7767 case node_cdata: 7768 case node_comment: 7769 case node_pi: 7770 return xpath_string::from_const(n.value()); 7771 7772 case node_document: 7773 case node_element: 7774 { 7775 xpath_string result; 7776 7777 // element nodes can have value if parse_embed_pcdata was used 7778 if (n.value()[0]) 7779 result.append(xpath_string::from_const(n.value()), alloc); 7780 7781 xml_node cur = n.first_child(); 7782 7783 while (cur && cur != n) 7784 { 7785 if (cur.type() == node_pcdata || cur.type() == node_cdata) 7786 result.append(xpath_string::from_const(cur.value()), alloc); 7787 7788 if (cur.first_child()) 7789 cur = cur.first_child(); 7790 else if (cur.next_sibling()) 7791 cur = cur.next_sibling(); 7792 else 7793 { 7794 while (!cur.next_sibling() && cur != n) 7795 cur = cur.parent(); 7796 7797 if (cur != n) cur = cur.next_sibling(); 7798 } 7799 } 7800 7801 return result; 7802 } 7803 7804 default: 7805 return xpath_string(); 7806 } 7807 } 7808 } 7809 node_is_before_sibling(xml_node_struct * ln,xml_node_struct * rn)7810 PUGI__FN bool node_is_before_sibling(xml_node_struct* ln, xml_node_struct* rn) 7811 { 7812 assert(ln->parent == rn->parent); 7813 7814 // there is no common ancestor (the shared parent is null), nodes are from different documents 7815 if (!ln->parent) return ln < rn; 7816 7817 // determine sibling order 7818 xml_node_struct* ls = ln; 7819 xml_node_struct* rs = rn; 7820 7821 while (ls && rs) 7822 { 7823 if (ls == rn) return true; 7824 if (rs == ln) return false; 7825 7826 ls = ls->next_sibling; 7827 rs = rs->next_sibling; 7828 } 7829 7830 // if rn sibling chain ended ln must be before rn 7831 return !rs; 7832 } 7833 node_is_before(xml_node_struct * ln,xml_node_struct * rn)7834 PUGI__FN bool node_is_before(xml_node_struct* ln, xml_node_struct* rn) 7835 { 7836 // find common ancestor at the same depth, if any 7837 xml_node_struct* lp = ln; 7838 xml_node_struct* rp = rn; 7839 7840 while (lp && rp && lp->parent != rp->parent) 7841 { 7842 lp = lp->parent; 7843 rp = rp->parent; 7844 } 7845 7846 // parents are the same! 7847 if (lp && rp) return node_is_before_sibling(lp, rp); 7848 7849 // nodes are at different depths, need to normalize heights 7850 bool left_higher = !lp; 7851 7852 while (lp) 7853 { 7854 lp = lp->parent; 7855 ln = ln->parent; 7856 } 7857 7858 while (rp) 7859 { 7860 rp = rp->parent; 7861 rn = rn->parent; 7862 } 7863 7864 // one node is the ancestor of the other 7865 if (ln == rn) return left_higher; 7866 7867 // find common ancestor... again 7868 while (ln->parent != rn->parent) 7869 { 7870 ln = ln->parent; 7871 rn = rn->parent; 7872 } 7873 7874 return node_is_before_sibling(ln, rn); 7875 } 7876 node_is_ancestor(xml_node_struct * parent,xml_node_struct * node)7877 PUGI__FN bool node_is_ancestor(xml_node_struct* parent, xml_node_struct* node) 7878 { 7879 while (node && node != parent) node = node->parent; 7880 7881 return parent && node == parent; 7882 } 7883 document_buffer_order(const xpath_node & xnode)7884 PUGI__FN const void* document_buffer_order(const xpath_node& xnode) 7885 { 7886 xml_node_struct* node = xnode.node().internal_object(); 7887 7888 if (node) 7889 { 7890 if ((get_document(node).header & xml_memory_page_contents_shared_mask) == 0) 7891 { 7892 if (node->name && (node->header & impl::xml_memory_page_name_allocated_or_shared_mask) == 0) return node->name; 7893 if (node->value && (node->header & impl::xml_memory_page_value_allocated_or_shared_mask) == 0) return node->value; 7894 } 7895 7896 return 0; 7897 } 7898 7899 xml_attribute_struct* attr = xnode.attribute().internal_object(); 7900 7901 if (attr) 7902 { 7903 if ((get_document(attr).header & xml_memory_page_contents_shared_mask) == 0) 7904 { 7905 if ((attr->header & impl::xml_memory_page_name_allocated_or_shared_mask) == 0) return attr->name; 7906 if ((attr->header & impl::xml_memory_page_value_allocated_or_shared_mask) == 0) return attr->value; 7907 } 7908 7909 return 0; 7910 } 7911 7912 return 0; 7913 } 7914 7915 struct document_order_comparator 7916 { operator ()document_order_comparator7917 bool operator()(const xpath_node& lhs, const xpath_node& rhs) const 7918 { 7919 // optimized document order based check 7920 const void* lo = document_buffer_order(lhs); 7921 const void* ro = document_buffer_order(rhs); 7922 7923 if (lo && ro) return lo < ro; 7924 7925 // slow comparison 7926 xml_node ln = lhs.node(), rn = rhs.node(); 7927 7928 // compare attributes 7929 if (lhs.attribute() && rhs.attribute()) 7930 { 7931 // shared parent 7932 if (lhs.parent() == rhs.parent()) 7933 { 7934 // determine sibling order 7935 for (xml_attribute a = lhs.attribute(); a; a = a.next_attribute()) 7936 if (a == rhs.attribute()) 7937 return true; 7938 7939 return false; 7940 } 7941 7942 // compare attribute parents 7943 ln = lhs.parent(); 7944 rn = rhs.parent(); 7945 } 7946 else if (lhs.attribute()) 7947 { 7948 // attributes go after the parent element 7949 if (lhs.parent() == rhs.node()) return false; 7950 7951 ln = lhs.parent(); 7952 } 7953 else if (rhs.attribute()) 7954 { 7955 // attributes go after the parent element 7956 if (rhs.parent() == lhs.node()) return true; 7957 7958 rn = rhs.parent(); 7959 } 7960 7961 if (ln == rn) return false; 7962 7963 if (!ln || !rn) return ln < rn; 7964 7965 return node_is_before(ln.internal_object(), rn.internal_object()); 7966 } 7967 }; 7968 7969 struct duplicate_comparator 7970 { operator ()duplicate_comparator7971 bool operator()(const xpath_node& lhs, const xpath_node& rhs) const 7972 { 7973 if (lhs.attribute()) return rhs.attribute() ? lhs.attribute() < rhs.attribute() : true; 7974 else return rhs.attribute() ? false : lhs.node() < rhs.node(); 7975 } 7976 }; 7977 gen_nan()7978 PUGI__FN double gen_nan() 7979 { 7980 #if defined(__STDC_IEC_559__) || ((FLT_RADIX - 0 == 2) && (FLT_MAX_EXP - 0 == 128) && (FLT_MANT_DIG - 0 == 24)) 7981 PUGI__STATIC_ASSERT(sizeof(float) == sizeof(uint32_t)); 7982 typedef uint32_t UI; // BCC5 workaround 7983 union { float f; UI i; } u; 7984 u.i = 0x7fc00000; 7985 return u.f; 7986 #else 7987 // fallback 7988 const volatile double zero = 0.0; 7989 return zero / zero; 7990 #endif 7991 } 7992 is_nan(double value)7993 PUGI__FN bool is_nan(double value) 7994 { 7995 #if defined(PUGI__MSVC_CRT_VERSION) || defined(__BORLANDC__) 7996 return !!_isnan(value); 7997 #elif defined(fpclassify) && defined(FP_NAN) 7998 return fpclassify(value) == FP_NAN; 7999 #else 8000 // fallback 8001 const volatile double v = value; 8002 return v != v; 8003 #endif 8004 } 8005 convert_number_to_string_special(double value)8006 PUGI__FN const char_t* convert_number_to_string_special(double value) 8007 { 8008 #if defined(PUGI__MSVC_CRT_VERSION) || defined(__BORLANDC__) 8009 if (_finite(value)) return (value == 0) ? PUGIXML_TEXT("0") : 0; 8010 if (_isnan(value)) return PUGIXML_TEXT("NaN"); 8011 return value > 0 ? PUGIXML_TEXT("Infinity") : PUGIXML_TEXT("-Infinity"); 8012 #elif defined(fpclassify) && defined(FP_NAN) && defined(FP_INFINITE) && defined(FP_ZERO) 8013 switch (fpclassify(value)) 8014 { 8015 case FP_NAN: 8016 return PUGIXML_TEXT("NaN"); 8017 8018 case FP_INFINITE: 8019 return value > 0 ? PUGIXML_TEXT("Infinity") : PUGIXML_TEXT("-Infinity"); 8020 8021 case FP_ZERO: 8022 return PUGIXML_TEXT("0"); 8023 8024 default: 8025 return 0; 8026 } 8027 #else 8028 // fallback 8029 const volatile double v = value; 8030 8031 if (v == 0) return PUGIXML_TEXT("0"); 8032 if (v != v) return PUGIXML_TEXT("NaN"); 8033 if (v * 2 == v) return value > 0 ? PUGIXML_TEXT("Infinity") : PUGIXML_TEXT("-Infinity"); 8034 return 0; 8035 #endif 8036 } 8037 convert_number_to_boolean(double value)8038 PUGI__FN bool convert_number_to_boolean(double value) 8039 { 8040 return (value != 0 && !is_nan(value)); 8041 } 8042 truncate_zeros(char * begin,char * end)8043 PUGI__FN void truncate_zeros(char* begin, char* end) 8044 { 8045 while (begin != end && end[-1] == '0') end--; 8046 8047 *end = 0; 8048 } 8049 8050 // gets mantissa digits in the form of 0.xxxxx with 0. implied and the exponent 8051 #if defined(PUGI__MSVC_CRT_VERSION) && PUGI__MSVC_CRT_VERSION >= 1400 && !defined(_WIN32_WCE) convert_number_to_mantissa_exponent(double value,char * buffer,size_t buffer_size,char ** out_mantissa,int * out_exponent)8052 PUGI__FN void convert_number_to_mantissa_exponent(double value, char* buffer, size_t buffer_size, char** out_mantissa, int* out_exponent) 8053 { 8054 // get base values 8055 int sign, exponent; 8056 _ecvt_s(buffer, buffer_size, value, DBL_DIG + 1, &exponent, &sign); 8057 8058 // truncate redundant zeros 8059 truncate_zeros(buffer, buffer + strlen(buffer)); 8060 8061 // fill results 8062 *out_mantissa = buffer; 8063 *out_exponent = exponent; 8064 } 8065 #else convert_number_to_mantissa_exponent(double value,char * buffer,size_t buffer_size,char ** out_mantissa,int * out_exponent)8066 PUGI__FN void convert_number_to_mantissa_exponent(double value, char* buffer, size_t buffer_size, char** out_mantissa, int* out_exponent) 8067 { 8068 // get a scientific notation value with IEEE DBL_DIG decimals 8069 sprintf(buffer, "%.*e", DBL_DIG, value); 8070 assert(strlen(buffer) < buffer_size); 8071 (void)!buffer_size; 8072 8073 // get the exponent (possibly negative) 8074 char* exponent_string = strchr(buffer, 'e'); 8075 assert(exponent_string); 8076 8077 int exponent = atoi(exponent_string + 1); 8078 8079 // extract mantissa string: skip sign 8080 char* mantissa = buffer[0] == '-' ? buffer + 1 : buffer; 8081 assert(mantissa[0] != '0' && mantissa[1] == '.'); 8082 8083 // divide mantissa by 10 to eliminate integer part 8084 mantissa[1] = mantissa[0]; 8085 mantissa++; 8086 exponent++; 8087 8088 // remove extra mantissa digits and zero-terminate mantissa 8089 truncate_zeros(mantissa, exponent_string); 8090 8091 // fill results 8092 *out_mantissa = mantissa; 8093 *out_exponent = exponent; 8094 } 8095 #endif 8096 convert_number_to_string(double value,xpath_allocator * alloc)8097 PUGI__FN xpath_string convert_number_to_string(double value, xpath_allocator* alloc) 8098 { 8099 // try special number conversion 8100 const char_t* special = convert_number_to_string_special(value); 8101 if (special) return xpath_string::from_const(special); 8102 8103 // get mantissa + exponent form 8104 char mantissa_buffer[32]; 8105 8106 char* mantissa; 8107 int exponent; 8108 convert_number_to_mantissa_exponent(value, mantissa_buffer, sizeof(mantissa_buffer), &mantissa, &exponent); 8109 8110 // allocate a buffer of suitable length for the number 8111 size_t result_size = strlen(mantissa_buffer) + (exponent > 0 ? exponent : -exponent) + 4; 8112 char_t* result = static_cast<char_t*>(alloc->allocate(sizeof(char_t) * result_size)); 8113 assert(result); 8114 8115 // make the number! 8116 char_t* s = result; 8117 8118 // sign 8119 if (value < 0) *s++ = '-'; 8120 8121 // integer part 8122 if (exponent <= 0) 8123 { 8124 *s++ = '0'; 8125 } 8126 else 8127 { 8128 while (exponent > 0) 8129 { 8130 assert(*mantissa == 0 || static_cast<unsigned int>(static_cast<unsigned int>(*mantissa) - '0') <= 9); 8131 *s++ = *mantissa ? *mantissa++ : '0'; 8132 exponent--; 8133 } 8134 } 8135 8136 // fractional part 8137 if (*mantissa) 8138 { 8139 // decimal point 8140 *s++ = '.'; 8141 8142 // extra zeroes from negative exponent 8143 while (exponent < 0) 8144 { 8145 *s++ = '0'; 8146 exponent++; 8147 } 8148 8149 // extra mantissa digits 8150 while (*mantissa) 8151 { 8152 assert(static_cast<unsigned int>(*mantissa - '0') <= 9); 8153 *s++ = *mantissa++; 8154 } 8155 } 8156 8157 // zero-terminate 8158 assert(s < result + result_size); 8159 *s = 0; 8160 8161 return xpath_string::from_heap_preallocated(result, s); 8162 } 8163 check_string_to_number_format(const char_t * string)8164 PUGI__FN bool check_string_to_number_format(const char_t* string) 8165 { 8166 // parse leading whitespace 8167 while (PUGI__IS_CHARTYPE(*string, ct_space)) ++string; 8168 8169 // parse sign 8170 if (*string == '-') ++string; 8171 8172 if (!*string) return false; 8173 8174 // if there is no integer part, there should be a decimal part with at least one digit 8175 if (!PUGI__IS_CHARTYPEX(string[0], ctx_digit) && (string[0] != '.' || !PUGI__IS_CHARTYPEX(string[1], ctx_digit))) return false; 8176 8177 // parse integer part 8178 while (PUGI__IS_CHARTYPEX(*string, ctx_digit)) ++string; 8179 8180 // parse decimal part 8181 if (*string == '.') 8182 { 8183 ++string; 8184 8185 while (PUGI__IS_CHARTYPEX(*string, ctx_digit)) ++string; 8186 } 8187 8188 // parse trailing whitespace 8189 while (PUGI__IS_CHARTYPE(*string, ct_space)) ++string; 8190 8191 return *string == 0; 8192 } 8193 convert_string_to_number(const char_t * string)8194 PUGI__FN double convert_string_to_number(const char_t* string) 8195 { 8196 // check string format 8197 if (!check_string_to_number_format(string)) return gen_nan(); 8198 8199 // parse string 8200 #ifdef PUGIXML_WCHAR_MODE 8201 return wcstod(string, 0); 8202 #else 8203 return strtod(string, 0); 8204 #endif 8205 } 8206 convert_string_to_number_scratch(char_t (& buffer)[32],const char_t * begin,const char_t * end,double * out_result)8207 PUGI__FN bool convert_string_to_number_scratch(char_t (&buffer)[32], const char_t* begin, const char_t* end, double* out_result) 8208 { 8209 size_t length = static_cast<size_t>(end - begin); 8210 char_t* scratch = buffer; 8211 8212 if (length >= sizeof(buffer) / sizeof(buffer[0])) 8213 { 8214 // need to make dummy on-heap copy 8215 scratch = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t))); 8216 if (!scratch) return false; 8217 } 8218 8219 // copy string to zero-terminated buffer and perform conversion 8220 memcpy(scratch, begin, length * sizeof(char_t)); 8221 scratch[length] = 0; 8222 8223 *out_result = convert_string_to_number(scratch); 8224 8225 // free dummy buffer 8226 if (scratch != buffer) xml_memory::deallocate(scratch); 8227 8228 return true; 8229 } 8230 round_nearest(double value)8231 PUGI__FN double round_nearest(double value) 8232 { 8233 return floor(value + 0.5); 8234 } 8235 round_nearest_nzero(double value)8236 PUGI__FN double round_nearest_nzero(double value) 8237 { 8238 // same as round_nearest, but returns -0 for [-0.5, -0] 8239 // ceil is used to differentiate between +0 and -0 (we return -0 for [-0.5, -0] and +0 for +0) 8240 return (value >= -0.5 && value <= 0) ? ceil(value) : floor(value + 0.5); 8241 } 8242 qualified_name(const xpath_node & node)8243 PUGI__FN const char_t* qualified_name(const xpath_node& node) 8244 { 8245 return node.attribute() ? node.attribute().name() : node.node().name(); 8246 } 8247 local_name(const xpath_node & node)8248 PUGI__FN const char_t* local_name(const xpath_node& node) 8249 { 8250 const char_t* name = qualified_name(node); 8251 const char_t* p = find_char(name, ':'); 8252 8253 return p ? p + 1 : name; 8254 } 8255 8256 struct namespace_uri_predicate 8257 { 8258 const char_t* prefix; 8259 size_t prefix_length; 8260 namespace_uri_predicatenamespace_uri_predicate8261 namespace_uri_predicate(const char_t* name) 8262 { 8263 const char_t* pos = find_char(name, ':'); 8264 8265 prefix = pos ? name : 0; 8266 prefix_length = pos ? static_cast<size_t>(pos - name) : 0; 8267 } 8268 operator ()namespace_uri_predicate8269 bool operator()(xml_attribute a) const 8270 { 8271 const char_t* name = a.name(); 8272 8273 if (!starts_with(name, PUGIXML_TEXT("xmlns"))) return false; 8274 8275 return prefix ? name[5] == ':' && strequalrange(name + 6, prefix, prefix_length) : name[5] == 0; 8276 } 8277 }; 8278 namespace_uri(xml_node node)8279 PUGI__FN const char_t* namespace_uri(xml_node node) 8280 { 8281 namespace_uri_predicate pred = node.name(); 8282 8283 xml_node p = node; 8284 8285 while (p) 8286 { 8287 xml_attribute a = p.find_attribute(pred); 8288 8289 if (a) return a.value(); 8290 8291 p = p.parent(); 8292 } 8293 8294 return PUGIXML_TEXT(""); 8295 } 8296 namespace_uri(xml_attribute attr,xml_node parent)8297 PUGI__FN const char_t* namespace_uri(xml_attribute attr, xml_node parent) 8298 { 8299 namespace_uri_predicate pred = attr.name(); 8300 8301 // Default namespace does not apply to attributes 8302 if (!pred.prefix) return PUGIXML_TEXT(""); 8303 8304 xml_node p = parent; 8305 8306 while (p) 8307 { 8308 xml_attribute a = p.find_attribute(pred); 8309 8310 if (a) return a.value(); 8311 8312 p = p.parent(); 8313 } 8314 8315 return PUGIXML_TEXT(""); 8316 } 8317 namespace_uri(const xpath_node & node)8318 PUGI__FN const char_t* namespace_uri(const xpath_node& node) 8319 { 8320 return node.attribute() ? namespace_uri(node.attribute(), node.parent()) : namespace_uri(node.node()); 8321 } 8322 normalize_space(char_t * buffer)8323 PUGI__FN char_t* normalize_space(char_t* buffer) 8324 { 8325 char_t* write = buffer; 8326 8327 for (char_t* it = buffer; *it; ) 8328 { 8329 char_t ch = *it++; 8330 8331 if (PUGI__IS_CHARTYPE(ch, ct_space)) 8332 { 8333 // replace whitespace sequence with single space 8334 while (PUGI__IS_CHARTYPE(*it, ct_space)) it++; 8335 8336 // avoid leading spaces 8337 if (write != buffer) *write++ = ' '; 8338 } 8339 else *write++ = ch; 8340 } 8341 8342 // remove trailing space 8343 if (write != buffer && PUGI__IS_CHARTYPE(write[-1], ct_space)) write--; 8344 8345 // zero-terminate 8346 *write = 0; 8347 8348 return write; 8349 } 8350 translate(char_t * buffer,const char_t * from,const char_t * to,size_t to_length)8351 PUGI__FN char_t* translate(char_t* buffer, const char_t* from, const char_t* to, size_t to_length) 8352 { 8353 char_t* write = buffer; 8354 8355 while (*buffer) 8356 { 8357 PUGI__DMC_VOLATILE char_t ch = *buffer++; 8358 8359 const char_t* pos = find_char(from, ch); 8360 8361 if (!pos) 8362 *write++ = ch; // do not process 8363 else if (static_cast<size_t>(pos - from) < to_length) 8364 *write++ = to[pos - from]; // replace 8365 } 8366 8367 // zero-terminate 8368 *write = 0; 8369 8370 return write; 8371 } 8372 translate_table_generate(xpath_allocator * alloc,const char_t * from,const char_t * to)8373 PUGI__FN unsigned char* translate_table_generate(xpath_allocator* alloc, const char_t* from, const char_t* to) 8374 { 8375 unsigned char table[128] = {0}; 8376 8377 while (*from) 8378 { 8379 unsigned int fc = static_cast<unsigned int>(*from); 8380 unsigned int tc = static_cast<unsigned int>(*to); 8381 8382 if (fc >= 128 || tc >= 128) 8383 return 0; 8384 8385 // code=128 means "skip character" 8386 if (!table[fc]) 8387 table[fc] = static_cast<unsigned char>(tc ? tc : 128); 8388 8389 from++; 8390 if (tc) to++; 8391 } 8392 8393 for (int i = 0; i < 128; ++i) 8394 if (!table[i]) 8395 table[i] = static_cast<unsigned char>(i); 8396 8397 void* result = alloc->allocate_nothrow(sizeof(table)); 8398 8399 if (result) 8400 { 8401 memcpy(result, table, sizeof(table)); 8402 } 8403 8404 return static_cast<unsigned char*>(result); 8405 } 8406 translate_table(char_t * buffer,const unsigned char * table)8407 PUGI__FN char_t* translate_table(char_t* buffer, const unsigned char* table) 8408 { 8409 char_t* write = buffer; 8410 8411 while (*buffer) 8412 { 8413 char_t ch = *buffer++; 8414 unsigned int index = static_cast<unsigned int>(ch); 8415 8416 if (index < 128) 8417 { 8418 unsigned char code = table[index]; 8419 8420 // code=128 means "skip character" (table size is 128 so 128 can be a special value) 8421 // this code skips these characters without extra branches 8422 *write = static_cast<char_t>(code); 8423 write += 1 - (code >> 7); 8424 } 8425 else 8426 { 8427 *write++ = ch; 8428 } 8429 } 8430 8431 // zero-terminate 8432 *write = 0; 8433 8434 return write; 8435 } 8436 is_xpath_attribute(const char_t * name)8437 inline bool is_xpath_attribute(const char_t* name) 8438 { 8439 return !(starts_with(name, PUGIXML_TEXT("xmlns")) && (name[5] == 0 || name[5] == ':')); 8440 } 8441 8442 struct xpath_variable_boolean: xpath_variable 8443 { xpath_variable_booleanxpath_variable_boolean8444 xpath_variable_boolean(): xpath_variable(xpath_type_boolean), value(false) 8445 { 8446 } 8447 8448 bool value; 8449 char_t name[1]; 8450 }; 8451 8452 struct xpath_variable_number: xpath_variable 8453 { xpath_variable_numberxpath_variable_number8454 xpath_variable_number(): xpath_variable(xpath_type_number), value(0) 8455 { 8456 } 8457 8458 double value; 8459 char_t name[1]; 8460 }; 8461 8462 struct xpath_variable_string: xpath_variable 8463 { xpath_variable_stringxpath_variable_string8464 xpath_variable_string(): xpath_variable(xpath_type_string), value(0) 8465 { 8466 } 8467 ~xpath_variable_stringxpath_variable_string8468 ~xpath_variable_string() 8469 { 8470 if (value) xml_memory::deallocate(value); 8471 } 8472 8473 char_t* value; 8474 char_t name[1]; 8475 }; 8476 8477 struct xpath_variable_node_set: xpath_variable 8478 { xpath_variable_node_setxpath_variable_node_set8479 xpath_variable_node_set(): xpath_variable(xpath_type_node_set) 8480 { 8481 } 8482 8483 xpath_node_set value; 8484 char_t name[1]; 8485 }; 8486 8487 static const xpath_node_set dummy_node_set; 8488 hash_string(const char_t * str)8489 PUGI__FN unsigned int hash_string(const char_t* str) 8490 { 8491 // Jenkins one-at-a-time hash (http://en.wikipedia.org/wiki/Jenkins_hash_function#one-at-a-time) 8492 unsigned int result = 0; 8493 8494 while (*str) 8495 { 8496 result += static_cast<unsigned int>(*str++); 8497 result += result << 10; 8498 result ^= result >> 6; 8499 } 8500 8501 result += result << 3; 8502 result ^= result >> 11; 8503 result += result << 15; 8504 8505 return result; 8506 } 8507 new_xpath_variable(const char_t * name)8508 template <typename T> PUGI__FN T* new_xpath_variable(const char_t* name) 8509 { 8510 size_t length = strlength(name); 8511 if (length == 0) return 0; // empty variable names are invalid 8512 8513 // $$ we can't use offsetof(T, name) because T is non-POD, so we just allocate additional length characters 8514 void* memory = xml_memory::allocate(sizeof(T) + length * sizeof(char_t)); 8515 if (!memory) return 0; 8516 8517 T* result = new (memory) T(); 8518 8519 memcpy(result->name, name, (length + 1) * sizeof(char_t)); 8520 8521 return result; 8522 } 8523 new_xpath_variable(xpath_value_type type,const char_t * name)8524 PUGI__FN xpath_variable* new_xpath_variable(xpath_value_type type, const char_t* name) 8525 { 8526 switch (type) 8527 { 8528 case xpath_type_node_set: 8529 return new_xpath_variable<xpath_variable_node_set>(name); 8530 8531 case xpath_type_number: 8532 return new_xpath_variable<xpath_variable_number>(name); 8533 8534 case xpath_type_string: 8535 return new_xpath_variable<xpath_variable_string>(name); 8536 8537 case xpath_type_boolean: 8538 return new_xpath_variable<xpath_variable_boolean>(name); 8539 8540 default: 8541 return 0; 8542 } 8543 } 8544 delete_xpath_variable(T * var)8545 template <typename T> PUGI__FN void delete_xpath_variable(T* var) 8546 { 8547 var->~T(); 8548 xml_memory::deallocate(var); 8549 } 8550 delete_xpath_variable(xpath_value_type type,xpath_variable * var)8551 PUGI__FN void delete_xpath_variable(xpath_value_type type, xpath_variable* var) 8552 { 8553 switch (type) 8554 { 8555 case xpath_type_node_set: 8556 delete_xpath_variable(static_cast<xpath_variable_node_set*>(var)); 8557 break; 8558 8559 case xpath_type_number: 8560 delete_xpath_variable(static_cast<xpath_variable_number*>(var)); 8561 break; 8562 8563 case xpath_type_string: 8564 delete_xpath_variable(static_cast<xpath_variable_string*>(var)); 8565 break; 8566 8567 case xpath_type_boolean: 8568 delete_xpath_variable(static_cast<xpath_variable_boolean*>(var)); 8569 break; 8570 8571 default: 8572 assert(false && "Invalid variable type"); 8573 } 8574 } 8575 copy_xpath_variable(xpath_variable * lhs,const xpath_variable * rhs)8576 PUGI__FN bool copy_xpath_variable(xpath_variable* lhs, const xpath_variable* rhs) 8577 { 8578 switch (rhs->type()) 8579 { 8580 case xpath_type_node_set: 8581 return lhs->set(static_cast<const xpath_variable_node_set*>(rhs)->value); 8582 8583 case xpath_type_number: 8584 return lhs->set(static_cast<const xpath_variable_number*>(rhs)->value); 8585 8586 case xpath_type_string: 8587 return lhs->set(static_cast<const xpath_variable_string*>(rhs)->value); 8588 8589 case xpath_type_boolean: 8590 return lhs->set(static_cast<const xpath_variable_boolean*>(rhs)->value); 8591 8592 default: 8593 assert(false && "Invalid variable type"); 8594 return false; 8595 } 8596 } 8597 get_variable_scratch(char_t (& buffer)[32],xpath_variable_set * set,const char_t * begin,const char_t * end,xpath_variable ** out_result)8598 PUGI__FN bool get_variable_scratch(char_t (&buffer)[32], xpath_variable_set* set, const char_t* begin, const char_t* end, xpath_variable** out_result) 8599 { 8600 size_t length = static_cast<size_t>(end - begin); 8601 char_t* scratch = buffer; 8602 8603 if (length >= sizeof(buffer) / sizeof(buffer[0])) 8604 { 8605 // need to make dummy on-heap copy 8606 scratch = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t))); 8607 if (!scratch) return false; 8608 } 8609 8610 // copy string to zero-terminated buffer and perform lookup 8611 memcpy(scratch, begin, length * sizeof(char_t)); 8612 scratch[length] = 0; 8613 8614 *out_result = set->get(scratch); 8615 8616 // free dummy buffer 8617 if (scratch != buffer) xml_memory::deallocate(scratch); 8618 8619 return true; 8620 } 8621 PUGI__NS_END 8622 8623 // Internal node set class 8624 PUGI__NS_BEGIN xpath_get_order(const xpath_node * begin,const xpath_node * end)8625 PUGI__FN xpath_node_set::type_t xpath_get_order(const xpath_node* begin, const xpath_node* end) 8626 { 8627 if (end - begin < 2) 8628 return xpath_node_set::type_sorted; 8629 8630 document_order_comparator cmp; 8631 8632 bool first = cmp(begin[0], begin[1]); 8633 8634 for (const xpath_node* it = begin + 1; it + 1 < end; ++it) 8635 if (cmp(it[0], it[1]) != first) 8636 return xpath_node_set::type_unsorted; 8637 8638 return first ? xpath_node_set::type_sorted : xpath_node_set::type_sorted_reverse; 8639 } 8640 xpath_sort(xpath_node * begin,xpath_node * end,xpath_node_set::type_t type,bool rev)8641 PUGI__FN xpath_node_set::type_t xpath_sort(xpath_node* begin, xpath_node* end, xpath_node_set::type_t type, bool rev) 8642 { 8643 xpath_node_set::type_t order = rev ? xpath_node_set::type_sorted_reverse : xpath_node_set::type_sorted; 8644 8645 if (type == xpath_node_set::type_unsorted) 8646 { 8647 xpath_node_set::type_t sorted = xpath_get_order(begin, end); 8648 8649 if (sorted == xpath_node_set::type_unsorted) 8650 { 8651 sort(begin, end, document_order_comparator()); 8652 8653 type = xpath_node_set::type_sorted; 8654 } 8655 else 8656 type = sorted; 8657 } 8658 8659 if (type != order) reverse(begin, end); 8660 8661 return order; 8662 } 8663 xpath_first(const xpath_node * begin,const xpath_node * end,xpath_node_set::type_t type)8664 PUGI__FN xpath_node xpath_first(const xpath_node* begin, const xpath_node* end, xpath_node_set::type_t type) 8665 { 8666 if (begin == end) return xpath_node(); 8667 8668 switch (type) 8669 { 8670 case xpath_node_set::type_sorted: 8671 return *begin; 8672 8673 case xpath_node_set::type_sorted_reverse: 8674 return *(end - 1); 8675 8676 case xpath_node_set::type_unsorted: 8677 return *min_element(begin, end, document_order_comparator()); 8678 8679 default: 8680 assert(false && "Invalid node set type"); 8681 return xpath_node(); 8682 } 8683 } 8684 8685 class xpath_node_set_raw 8686 { 8687 xpath_node_set::type_t _type; 8688 8689 xpath_node* _begin; 8690 xpath_node* _end; 8691 xpath_node* _eos; 8692 8693 public: xpath_node_set_raw()8694 xpath_node_set_raw(): _type(xpath_node_set::type_unsorted), _begin(0), _end(0), _eos(0) 8695 { 8696 } 8697 begin() const8698 xpath_node* begin() const 8699 { 8700 return _begin; 8701 } 8702 end() const8703 xpath_node* end() const 8704 { 8705 return _end; 8706 } 8707 empty() const8708 bool empty() const 8709 { 8710 return _begin == _end; 8711 } 8712 size() const8713 size_t size() const 8714 { 8715 return static_cast<size_t>(_end - _begin); 8716 } 8717 first() const8718 xpath_node first() const 8719 { 8720 return xpath_first(_begin, _end, _type); 8721 } 8722 8723 void push_back_grow(const xpath_node& node, xpath_allocator* alloc); 8724 push_back(const xpath_node & node,xpath_allocator * alloc)8725 void push_back(const xpath_node& node, xpath_allocator* alloc) 8726 { 8727 if (_end != _eos) 8728 *_end++ = node; 8729 else 8730 push_back_grow(node, alloc); 8731 } 8732 append(const xpath_node * begin_,const xpath_node * end_,xpath_allocator * alloc)8733 void append(const xpath_node* begin_, const xpath_node* end_, xpath_allocator* alloc) 8734 { 8735 if (begin_ == end_) return; 8736 8737 size_t size_ = static_cast<size_t>(_end - _begin); 8738 size_t capacity = static_cast<size_t>(_eos - _begin); 8739 size_t count = static_cast<size_t>(end_ - begin_); 8740 8741 if (size_ + count > capacity) 8742 { 8743 // reallocate the old array or allocate a new one 8744 xpath_node* data = static_cast<xpath_node*>(alloc->reallocate(_begin, capacity * sizeof(xpath_node), (size_ + count) * sizeof(xpath_node))); 8745 assert(data); 8746 8747 // finalize 8748 _begin = data; 8749 _end = data + size_; 8750 _eos = data + size_ + count; 8751 } 8752 8753 memcpy(_end, begin_, count * sizeof(xpath_node)); 8754 _end += count; 8755 } 8756 sort_do()8757 void sort_do() 8758 { 8759 _type = xpath_sort(_begin, _end, _type, false); 8760 } 8761 truncate(xpath_node * pos)8762 void truncate(xpath_node* pos) 8763 { 8764 assert(_begin <= pos && pos <= _end); 8765 8766 _end = pos; 8767 } 8768 remove_duplicates()8769 void remove_duplicates() 8770 { 8771 if (_type == xpath_node_set::type_unsorted) 8772 sort(_begin, _end, duplicate_comparator()); 8773 8774 _end = unique(_begin, _end); 8775 } 8776 type() const8777 xpath_node_set::type_t type() const 8778 { 8779 return _type; 8780 } 8781 set_type(xpath_node_set::type_t value)8782 void set_type(xpath_node_set::type_t value) 8783 { 8784 _type = value; 8785 } 8786 }; 8787 push_back_grow(const xpath_node & node,xpath_allocator * alloc)8788 PUGI__FN_NO_INLINE void xpath_node_set_raw::push_back_grow(const xpath_node& node, xpath_allocator* alloc) 8789 { 8790 size_t capacity = static_cast<size_t>(_eos - _begin); 8791 8792 // get new capacity (1.5x rule) 8793 size_t new_capacity = capacity + capacity / 2 + 1; 8794 8795 // reallocate the old array or allocate a new one 8796 xpath_node* data = static_cast<xpath_node*>(alloc->reallocate(_begin, capacity * sizeof(xpath_node), new_capacity * sizeof(xpath_node))); 8797 assert(data); 8798 8799 // finalize 8800 _begin = data; 8801 _end = data + capacity; 8802 _eos = data + new_capacity; 8803 8804 // push 8805 *_end++ = node; 8806 } 8807 PUGI__NS_END 8808 8809 PUGI__NS_BEGIN 8810 struct xpath_context 8811 { 8812 xpath_node n; 8813 size_t position, size; 8814 xpath_contextxpath_context8815 xpath_context(const xpath_node& n_, size_t position_, size_t size_): n(n_), position(position_), size(size_) 8816 { 8817 } 8818 }; 8819 8820 enum lexeme_t 8821 { 8822 lex_none = 0, 8823 lex_equal, 8824 lex_not_equal, 8825 lex_less, 8826 lex_greater, 8827 lex_less_or_equal, 8828 lex_greater_or_equal, 8829 lex_plus, 8830 lex_minus, 8831 lex_multiply, 8832 lex_union, 8833 lex_var_ref, 8834 lex_open_brace, 8835 lex_close_brace, 8836 lex_quoted_string, 8837 lex_number, 8838 lex_slash, 8839 lex_double_slash, 8840 lex_open_square_brace, 8841 lex_close_square_brace, 8842 lex_string, 8843 lex_comma, 8844 lex_axis_attribute, 8845 lex_dot, 8846 lex_double_dot, 8847 lex_double_colon, 8848 lex_eof 8849 }; 8850 8851 struct xpath_lexer_string 8852 { 8853 const char_t* begin; 8854 const char_t* end; 8855 xpath_lexer_stringxpath_lexer_string8856 xpath_lexer_string(): begin(0), end(0) 8857 { 8858 } 8859 operator ==xpath_lexer_string8860 bool operator==(const char_t* other) const 8861 { 8862 size_t length = static_cast<size_t>(end - begin); 8863 8864 return strequalrange(other, begin, length); 8865 } 8866 }; 8867 8868 class xpath_lexer 8869 { 8870 const char_t* _cur; 8871 const char_t* _cur_lexeme_pos; 8872 xpath_lexer_string _cur_lexeme_contents; 8873 8874 lexeme_t _cur_lexeme; 8875 8876 public: xpath_lexer(const char_t * query)8877 explicit xpath_lexer(const char_t* query): _cur(query) 8878 { 8879 next(); 8880 } 8881 state() const8882 const char_t* state() const 8883 { 8884 return _cur; 8885 } 8886 next()8887 void next() 8888 { 8889 const char_t* cur = _cur; 8890 8891 while (PUGI__IS_CHARTYPE(*cur, ct_space)) ++cur; 8892 8893 // save lexeme position for error reporting 8894 _cur_lexeme_pos = cur; 8895 8896 switch (*cur) 8897 { 8898 case 0: 8899 _cur_lexeme = lex_eof; 8900 break; 8901 8902 case '>': 8903 if (*(cur+1) == '=') 8904 { 8905 cur += 2; 8906 _cur_lexeme = lex_greater_or_equal; 8907 } 8908 else 8909 { 8910 cur += 1; 8911 _cur_lexeme = lex_greater; 8912 } 8913 break; 8914 8915 case '<': 8916 if (*(cur+1) == '=') 8917 { 8918 cur += 2; 8919 _cur_lexeme = lex_less_or_equal; 8920 } 8921 else 8922 { 8923 cur += 1; 8924 _cur_lexeme = lex_less; 8925 } 8926 break; 8927 8928 case '!': 8929 if (*(cur+1) == '=') 8930 { 8931 cur += 2; 8932 _cur_lexeme = lex_not_equal; 8933 } 8934 else 8935 { 8936 _cur_lexeme = lex_none; 8937 } 8938 break; 8939 8940 case '=': 8941 cur += 1; 8942 _cur_lexeme = lex_equal; 8943 8944 break; 8945 8946 case '+': 8947 cur += 1; 8948 _cur_lexeme = lex_plus; 8949 8950 break; 8951 8952 case '-': 8953 cur += 1; 8954 _cur_lexeme = lex_minus; 8955 8956 break; 8957 8958 case '*': 8959 cur += 1; 8960 _cur_lexeme = lex_multiply; 8961 8962 break; 8963 8964 case '|': 8965 cur += 1; 8966 _cur_lexeme = lex_union; 8967 8968 break; 8969 8970 case '$': 8971 cur += 1; 8972 8973 if (PUGI__IS_CHARTYPEX(*cur, ctx_start_symbol)) 8974 { 8975 _cur_lexeme_contents.begin = cur; 8976 8977 while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++; 8978 8979 if (cur[0] == ':' && PUGI__IS_CHARTYPEX(cur[1], ctx_symbol)) // qname 8980 { 8981 cur++; // : 8982 8983 while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++; 8984 } 8985 8986 _cur_lexeme_contents.end = cur; 8987 8988 _cur_lexeme = lex_var_ref; 8989 } 8990 else 8991 { 8992 _cur_lexeme = lex_none; 8993 } 8994 8995 break; 8996 8997 case '(': 8998 cur += 1; 8999 _cur_lexeme = lex_open_brace; 9000 9001 break; 9002 9003 case ')': 9004 cur += 1; 9005 _cur_lexeme = lex_close_brace; 9006 9007 break; 9008 9009 case '[': 9010 cur += 1; 9011 _cur_lexeme = lex_open_square_brace; 9012 9013 break; 9014 9015 case ']': 9016 cur += 1; 9017 _cur_lexeme = lex_close_square_brace; 9018 9019 break; 9020 9021 case ',': 9022 cur += 1; 9023 _cur_lexeme = lex_comma; 9024 9025 break; 9026 9027 case '/': 9028 if (*(cur+1) == '/') 9029 { 9030 cur += 2; 9031 _cur_lexeme = lex_double_slash; 9032 } 9033 else 9034 { 9035 cur += 1; 9036 _cur_lexeme = lex_slash; 9037 } 9038 break; 9039 9040 case '.': 9041 if (*(cur+1) == '.') 9042 { 9043 cur += 2; 9044 _cur_lexeme = lex_double_dot; 9045 } 9046 else if (PUGI__IS_CHARTYPEX(*(cur+1), ctx_digit)) 9047 { 9048 _cur_lexeme_contents.begin = cur; // . 9049 9050 ++cur; 9051 9052 while (PUGI__IS_CHARTYPEX(*cur, ctx_digit)) cur++; 9053 9054 _cur_lexeme_contents.end = cur; 9055 9056 _cur_lexeme = lex_number; 9057 } 9058 else 9059 { 9060 cur += 1; 9061 _cur_lexeme = lex_dot; 9062 } 9063 break; 9064 9065 case '@': 9066 cur += 1; 9067 _cur_lexeme = lex_axis_attribute; 9068 9069 break; 9070 9071 case '"': 9072 case '\'': 9073 { 9074 char_t terminator = *cur; 9075 9076 ++cur; 9077 9078 _cur_lexeme_contents.begin = cur; 9079 while (*cur && *cur != terminator) cur++; 9080 _cur_lexeme_contents.end = cur; 9081 9082 if (!*cur) 9083 _cur_lexeme = lex_none; 9084 else 9085 { 9086 cur += 1; 9087 _cur_lexeme = lex_quoted_string; 9088 } 9089 9090 break; 9091 } 9092 9093 case ':': 9094 if (*(cur+1) == ':') 9095 { 9096 cur += 2; 9097 _cur_lexeme = lex_double_colon; 9098 } 9099 else 9100 { 9101 _cur_lexeme = lex_none; 9102 } 9103 break; 9104 9105 default: 9106 if (PUGI__IS_CHARTYPEX(*cur, ctx_digit)) 9107 { 9108 _cur_lexeme_contents.begin = cur; 9109 9110 while (PUGI__IS_CHARTYPEX(*cur, ctx_digit)) cur++; 9111 9112 if (*cur == '.') 9113 { 9114 cur++; 9115 9116 while (PUGI__IS_CHARTYPEX(*cur, ctx_digit)) cur++; 9117 } 9118 9119 _cur_lexeme_contents.end = cur; 9120 9121 _cur_lexeme = lex_number; 9122 } 9123 else if (PUGI__IS_CHARTYPEX(*cur, ctx_start_symbol)) 9124 { 9125 _cur_lexeme_contents.begin = cur; 9126 9127 while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++; 9128 9129 if (cur[0] == ':') 9130 { 9131 if (cur[1] == '*') // namespace test ncname:* 9132 { 9133 cur += 2; // :* 9134 } 9135 else if (PUGI__IS_CHARTYPEX(cur[1], ctx_symbol)) // namespace test qname 9136 { 9137 cur++; // : 9138 9139 while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++; 9140 } 9141 } 9142 9143 _cur_lexeme_contents.end = cur; 9144 9145 _cur_lexeme = lex_string; 9146 } 9147 else 9148 { 9149 _cur_lexeme = lex_none; 9150 } 9151 } 9152 9153 _cur = cur; 9154 } 9155 current() const9156 lexeme_t current() const 9157 { 9158 return _cur_lexeme; 9159 } 9160 current_pos() const9161 const char_t* current_pos() const 9162 { 9163 return _cur_lexeme_pos; 9164 } 9165 contents() const9166 const xpath_lexer_string& contents() const 9167 { 9168 assert(_cur_lexeme == lex_var_ref || _cur_lexeme == lex_number || _cur_lexeme == lex_string || _cur_lexeme == lex_quoted_string); 9169 9170 return _cur_lexeme_contents; 9171 } 9172 }; 9173 9174 enum ast_type_t 9175 { 9176 ast_unknown, 9177 ast_op_or, // left or right 9178 ast_op_and, // left and right 9179 ast_op_equal, // left = right 9180 ast_op_not_equal, // left != right 9181 ast_op_less, // left < right 9182 ast_op_greater, // left > right 9183 ast_op_less_or_equal, // left <= right 9184 ast_op_greater_or_equal, // left >= right 9185 ast_op_add, // left + right 9186 ast_op_subtract, // left - right 9187 ast_op_multiply, // left * right 9188 ast_op_divide, // left / right 9189 ast_op_mod, // left % right 9190 ast_op_negate, // left - right 9191 ast_op_union, // left | right 9192 ast_predicate, // apply predicate to set; next points to next predicate 9193 ast_filter, // select * from left where right 9194 ast_string_constant, // string constant 9195 ast_number_constant, // number constant 9196 ast_variable, // variable 9197 ast_func_last, // last() 9198 ast_func_position, // position() 9199 ast_func_count, // count(left) 9200 ast_func_id, // id(left) 9201 ast_func_local_name_0, // local-name() 9202 ast_func_local_name_1, // local-name(left) 9203 ast_func_namespace_uri_0, // namespace-uri() 9204 ast_func_namespace_uri_1, // namespace-uri(left) 9205 ast_func_name_0, // name() 9206 ast_func_name_1, // name(left) 9207 ast_func_string_0, // string() 9208 ast_func_string_1, // string(left) 9209 ast_func_concat, // concat(left, right, siblings) 9210 ast_func_starts_with, // starts_with(left, right) 9211 ast_func_contains, // contains(left, right) 9212 ast_func_substring_before, // substring-before(left, right) 9213 ast_func_substring_after, // substring-after(left, right) 9214 ast_func_substring_2, // substring(left, right) 9215 ast_func_substring_3, // substring(left, right, third) 9216 ast_func_string_length_0, // string-length() 9217 ast_func_string_length_1, // string-length(left) 9218 ast_func_normalize_space_0, // normalize-space() 9219 ast_func_normalize_space_1, // normalize-space(left) 9220 ast_func_translate, // translate(left, right, third) 9221 ast_func_boolean, // boolean(left) 9222 ast_func_not, // not(left) 9223 ast_func_true, // true() 9224 ast_func_false, // false() 9225 ast_func_lang, // lang(left) 9226 ast_func_number_0, // number() 9227 ast_func_number_1, // number(left) 9228 ast_func_sum, // sum(left) 9229 ast_func_floor, // floor(left) 9230 ast_func_ceiling, // ceiling(left) 9231 ast_func_round, // round(left) 9232 ast_step, // process set left with step 9233 ast_step_root, // select root node 9234 9235 ast_opt_translate_table, // translate(left, right, third) where right/third are constants 9236 ast_opt_compare_attribute // @name = 'string' 9237 }; 9238 9239 enum axis_t 9240 { 9241 axis_ancestor, 9242 axis_ancestor_or_self, 9243 axis_attribute, 9244 axis_child, 9245 axis_descendant, 9246 axis_descendant_or_self, 9247 axis_following, 9248 axis_following_sibling, 9249 axis_namespace, 9250 axis_parent, 9251 axis_preceding, 9252 axis_preceding_sibling, 9253 axis_self 9254 }; 9255 9256 enum nodetest_t 9257 { 9258 nodetest_none, 9259 nodetest_name, 9260 nodetest_type_node, 9261 nodetest_type_comment, 9262 nodetest_type_pi, 9263 nodetest_type_text, 9264 nodetest_pi, 9265 nodetest_all, 9266 nodetest_all_in_namespace 9267 }; 9268 9269 enum predicate_t 9270 { 9271 predicate_default, 9272 predicate_posinv, 9273 predicate_constant, 9274 predicate_constant_one 9275 }; 9276 9277 enum nodeset_eval_t 9278 { 9279 nodeset_eval_all, 9280 nodeset_eval_any, 9281 nodeset_eval_first 9282 }; 9283 9284 template <axis_t N> struct axis_to_type 9285 { 9286 static const axis_t axis; 9287 }; 9288 9289 template <axis_t N> const axis_t axis_to_type<N>::axis = N; 9290 9291 class xpath_ast_node 9292 { 9293 private: 9294 // node type 9295 char _type; 9296 char _rettype; 9297 9298 // for ast_step 9299 char _axis; 9300 9301 // for ast_step/ast_predicate/ast_filter 9302 char _test; 9303 9304 // tree node structure 9305 xpath_ast_node* _left; 9306 xpath_ast_node* _right; 9307 xpath_ast_node* _next; 9308 9309 union 9310 { 9311 // value for ast_string_constant 9312 const char_t* string; 9313 // value for ast_number_constant 9314 double number; 9315 // variable for ast_variable 9316 xpath_variable* variable; 9317 // node test for ast_step (node name/namespace/node type/pi target) 9318 const char_t* nodetest; 9319 // table for ast_opt_translate_table 9320 const unsigned char* table; 9321 } _data; 9322 9323 xpath_ast_node(const xpath_ast_node&); 9324 xpath_ast_node& operator=(const xpath_ast_node&); 9325 compare_eq(xpath_ast_node * lhs,xpath_ast_node * rhs,const xpath_context & c,const xpath_stack & stack,const Comp & comp)9326 template <class Comp> static bool compare_eq(xpath_ast_node* lhs, xpath_ast_node* rhs, const xpath_context& c, const xpath_stack& stack, const Comp& comp) 9327 { 9328 xpath_value_type lt = lhs->rettype(), rt = rhs->rettype(); 9329 9330 if (lt != xpath_type_node_set && rt != xpath_type_node_set) 9331 { 9332 if (lt == xpath_type_boolean || rt == xpath_type_boolean) 9333 return comp(lhs->eval_boolean(c, stack), rhs->eval_boolean(c, stack)); 9334 else if (lt == xpath_type_number || rt == xpath_type_number) 9335 return comp(lhs->eval_number(c, stack), rhs->eval_number(c, stack)); 9336 else if (lt == xpath_type_string || rt == xpath_type_string) 9337 { 9338 xpath_allocator_capture cr(stack.result); 9339 9340 xpath_string ls = lhs->eval_string(c, stack); 9341 xpath_string rs = rhs->eval_string(c, stack); 9342 9343 return comp(ls, rs); 9344 } 9345 } 9346 else if (lt == xpath_type_node_set && rt == xpath_type_node_set) 9347 { 9348 xpath_allocator_capture cr(stack.result); 9349 9350 xpath_node_set_raw ls = lhs->eval_node_set(c, stack, nodeset_eval_all); 9351 xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all); 9352 9353 for (const xpath_node* li = ls.begin(); li != ls.end(); ++li) 9354 for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri) 9355 { 9356 xpath_allocator_capture cri(stack.result); 9357 9358 if (comp(string_value(*li, stack.result), string_value(*ri, stack.result))) 9359 return true; 9360 } 9361 9362 return false; 9363 } 9364 else 9365 { 9366 if (lt == xpath_type_node_set) 9367 { 9368 swap(lhs, rhs); 9369 swap(lt, rt); 9370 } 9371 9372 if (lt == xpath_type_boolean) 9373 return comp(lhs->eval_boolean(c, stack), rhs->eval_boolean(c, stack)); 9374 else if (lt == xpath_type_number) 9375 { 9376 xpath_allocator_capture cr(stack.result); 9377 9378 double l = lhs->eval_number(c, stack); 9379 xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all); 9380 9381 for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri) 9382 { 9383 xpath_allocator_capture cri(stack.result); 9384 9385 if (comp(l, convert_string_to_number(string_value(*ri, stack.result).c_str()))) 9386 return true; 9387 } 9388 9389 return false; 9390 } 9391 else if (lt == xpath_type_string) 9392 { 9393 xpath_allocator_capture cr(stack.result); 9394 9395 xpath_string l = lhs->eval_string(c, stack); 9396 xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all); 9397 9398 for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri) 9399 { 9400 xpath_allocator_capture cri(stack.result); 9401 9402 if (comp(l, string_value(*ri, stack.result))) 9403 return true; 9404 } 9405 9406 return false; 9407 } 9408 } 9409 9410 assert(false && "Wrong types"); 9411 return false; 9412 } 9413 eval_once(xpath_node_set::type_t type,nodeset_eval_t eval)9414 static bool eval_once(xpath_node_set::type_t type, nodeset_eval_t eval) 9415 { 9416 return type == xpath_node_set::type_sorted ? eval != nodeset_eval_all : eval == nodeset_eval_any; 9417 } 9418 compare_rel(xpath_ast_node * lhs,xpath_ast_node * rhs,const xpath_context & c,const xpath_stack & stack,const Comp & comp)9419 template <class Comp> static bool compare_rel(xpath_ast_node* lhs, xpath_ast_node* rhs, const xpath_context& c, const xpath_stack& stack, const Comp& comp) 9420 { 9421 xpath_value_type lt = lhs->rettype(), rt = rhs->rettype(); 9422 9423 if (lt != xpath_type_node_set && rt != xpath_type_node_set) 9424 return comp(lhs->eval_number(c, stack), rhs->eval_number(c, stack)); 9425 else if (lt == xpath_type_node_set && rt == xpath_type_node_set) 9426 { 9427 xpath_allocator_capture cr(stack.result); 9428 9429 xpath_node_set_raw ls = lhs->eval_node_set(c, stack, nodeset_eval_all); 9430 xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all); 9431 9432 for (const xpath_node* li = ls.begin(); li != ls.end(); ++li) 9433 { 9434 xpath_allocator_capture cri(stack.result); 9435 9436 double l = convert_string_to_number(string_value(*li, stack.result).c_str()); 9437 9438 for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri) 9439 { 9440 xpath_allocator_capture crii(stack.result); 9441 9442 if (comp(l, convert_string_to_number(string_value(*ri, stack.result).c_str()))) 9443 return true; 9444 } 9445 } 9446 9447 return false; 9448 } 9449 else if (lt != xpath_type_node_set && rt == xpath_type_node_set) 9450 { 9451 xpath_allocator_capture cr(stack.result); 9452 9453 double l = lhs->eval_number(c, stack); 9454 xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all); 9455 9456 for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri) 9457 { 9458 xpath_allocator_capture cri(stack.result); 9459 9460 if (comp(l, convert_string_to_number(string_value(*ri, stack.result).c_str()))) 9461 return true; 9462 } 9463 9464 return false; 9465 } 9466 else if (lt == xpath_type_node_set && rt != xpath_type_node_set) 9467 { 9468 xpath_allocator_capture cr(stack.result); 9469 9470 xpath_node_set_raw ls = lhs->eval_node_set(c, stack, nodeset_eval_all); 9471 double r = rhs->eval_number(c, stack); 9472 9473 for (const xpath_node* li = ls.begin(); li != ls.end(); ++li) 9474 { 9475 xpath_allocator_capture cri(stack.result); 9476 9477 if (comp(convert_string_to_number(string_value(*li, stack.result).c_str()), r)) 9478 return true; 9479 } 9480 9481 return false; 9482 } 9483 else 9484 { 9485 assert(false && "Wrong types"); 9486 return false; 9487 } 9488 } 9489 apply_predicate_boolean(xpath_node_set_raw & ns,size_t first,xpath_ast_node * expr,const xpath_stack & stack,bool once)9490 static void apply_predicate_boolean(xpath_node_set_raw& ns, size_t first, xpath_ast_node* expr, const xpath_stack& stack, bool once) 9491 { 9492 assert(ns.size() >= first); 9493 assert(expr->rettype() != xpath_type_number); 9494 9495 size_t i = 1; 9496 size_t size = ns.size() - first; 9497 9498 xpath_node* last = ns.begin() + first; 9499 9500 // remove_if... or well, sort of 9501 for (xpath_node* it = last; it != ns.end(); ++it, ++i) 9502 { 9503 xpath_context c(*it, i, size); 9504 9505 if (expr->eval_boolean(c, stack)) 9506 { 9507 *last++ = *it; 9508 9509 if (once) break; 9510 } 9511 } 9512 9513 ns.truncate(last); 9514 } 9515 apply_predicate_number(xpath_node_set_raw & ns,size_t first,xpath_ast_node * expr,const xpath_stack & stack,bool once)9516 static void apply_predicate_number(xpath_node_set_raw& ns, size_t first, xpath_ast_node* expr, const xpath_stack& stack, bool once) 9517 { 9518 assert(ns.size() >= first); 9519 assert(expr->rettype() == xpath_type_number); 9520 9521 size_t i = 1; 9522 size_t size = ns.size() - first; 9523 9524 xpath_node* last = ns.begin() + first; 9525 9526 // remove_if... or well, sort of 9527 for (xpath_node* it = last; it != ns.end(); ++it, ++i) 9528 { 9529 xpath_context c(*it, i, size); 9530 9531 if (expr->eval_number(c, stack) == i) 9532 { 9533 *last++ = *it; 9534 9535 if (once) break; 9536 } 9537 } 9538 9539 ns.truncate(last); 9540 } 9541 apply_predicate_number_const(xpath_node_set_raw & ns,size_t first,xpath_ast_node * expr,const xpath_stack & stack)9542 static void apply_predicate_number_const(xpath_node_set_raw& ns, size_t first, xpath_ast_node* expr, const xpath_stack& stack) 9543 { 9544 assert(ns.size() >= first); 9545 assert(expr->rettype() == xpath_type_number); 9546 9547 size_t size = ns.size() - first; 9548 9549 xpath_node* last = ns.begin() + first; 9550 9551 xpath_context c(xpath_node(), 1, size); 9552 9553 double er = expr->eval_number(c, stack); 9554 9555 if (er >= 1.0 && er <= size) 9556 { 9557 size_t eri = static_cast<size_t>(er); 9558 9559 if (er == eri) 9560 { 9561 xpath_node r = last[eri - 1]; 9562 9563 *last++ = r; 9564 } 9565 } 9566 9567 ns.truncate(last); 9568 } 9569 apply_predicate(xpath_node_set_raw & ns,size_t first,const xpath_stack & stack,bool once)9570 void apply_predicate(xpath_node_set_raw& ns, size_t first, const xpath_stack& stack, bool once) 9571 { 9572 if (ns.size() == first) return; 9573 9574 assert(_type == ast_filter || _type == ast_predicate); 9575 9576 if (_test == predicate_constant || _test == predicate_constant_one) 9577 apply_predicate_number_const(ns, first, _right, stack); 9578 else if (_right->rettype() == xpath_type_number) 9579 apply_predicate_number(ns, first, _right, stack, once); 9580 else 9581 apply_predicate_boolean(ns, first, _right, stack, once); 9582 } 9583 apply_predicates(xpath_node_set_raw & ns,size_t first,const xpath_stack & stack,nodeset_eval_t eval)9584 void apply_predicates(xpath_node_set_raw& ns, size_t first, const xpath_stack& stack, nodeset_eval_t eval) 9585 { 9586 if (ns.size() == first) return; 9587 9588 bool last_once = eval_once(ns.type(), eval); 9589 9590 for (xpath_ast_node* pred = _right; pred; pred = pred->_next) 9591 pred->apply_predicate(ns, first, stack, !pred->_next && last_once); 9592 } 9593 step_push(xpath_node_set_raw & ns,xml_attribute_struct * a,xml_node_struct * parent,xpath_allocator * alloc)9594 bool step_push(xpath_node_set_raw& ns, xml_attribute_struct* a, xml_node_struct* parent, xpath_allocator* alloc) 9595 { 9596 assert(a); 9597 9598 const char_t* name = a->name ? a->name + 0 : PUGIXML_TEXT(""); 9599 9600 switch (_test) 9601 { 9602 case nodetest_name: 9603 if (strequal(name, _data.nodetest) && is_xpath_attribute(name)) 9604 { 9605 ns.push_back(xpath_node(xml_attribute(a), xml_node(parent)), alloc); 9606 return true; 9607 } 9608 break; 9609 9610 case nodetest_type_node: 9611 case nodetest_all: 9612 if (is_xpath_attribute(name)) 9613 { 9614 ns.push_back(xpath_node(xml_attribute(a), xml_node(parent)), alloc); 9615 return true; 9616 } 9617 break; 9618 9619 case nodetest_all_in_namespace: 9620 if (starts_with(name, _data.nodetest) && is_xpath_attribute(name)) 9621 { 9622 ns.push_back(xpath_node(xml_attribute(a), xml_node(parent)), alloc); 9623 return true; 9624 } 9625 break; 9626 9627 default: 9628 ; 9629 } 9630 9631 return false; 9632 } 9633 step_push(xpath_node_set_raw & ns,xml_node_struct * n,xpath_allocator * alloc)9634 bool step_push(xpath_node_set_raw& ns, xml_node_struct* n, xpath_allocator* alloc) 9635 { 9636 assert(n); 9637 9638 xml_node_type type = PUGI__NODETYPE(n); 9639 9640 switch (_test) 9641 { 9642 case nodetest_name: 9643 if (type == node_element && n->name && strequal(n->name, _data.nodetest)) 9644 { 9645 ns.push_back(xml_node(n), alloc); 9646 return true; 9647 } 9648 break; 9649 9650 case nodetest_type_node: 9651 ns.push_back(xml_node(n), alloc); 9652 return true; 9653 9654 case nodetest_type_comment: 9655 if (type == node_comment) 9656 { 9657 ns.push_back(xml_node(n), alloc); 9658 return true; 9659 } 9660 break; 9661 9662 case nodetest_type_text: 9663 if (type == node_pcdata || type == node_cdata) 9664 { 9665 ns.push_back(xml_node(n), alloc); 9666 return true; 9667 } 9668 break; 9669 9670 case nodetest_type_pi: 9671 if (type == node_pi) 9672 { 9673 ns.push_back(xml_node(n), alloc); 9674 return true; 9675 } 9676 break; 9677 9678 case nodetest_pi: 9679 if (type == node_pi && n->name && strequal(n->name, _data.nodetest)) 9680 { 9681 ns.push_back(xml_node(n), alloc); 9682 return true; 9683 } 9684 break; 9685 9686 case nodetest_all: 9687 if (type == node_element) 9688 { 9689 ns.push_back(xml_node(n), alloc); 9690 return true; 9691 } 9692 break; 9693 9694 case nodetest_all_in_namespace: 9695 if (type == node_element && n->name && starts_with(n->name, _data.nodetest)) 9696 { 9697 ns.push_back(xml_node(n), alloc); 9698 return true; 9699 } 9700 break; 9701 9702 default: 9703 assert(false && "Unknown axis"); 9704 } 9705 9706 return false; 9707 } 9708 step_fill(xpath_node_set_raw & ns,xml_node_struct * n,xpath_allocator * alloc,bool once,T)9709 template <class T> void step_fill(xpath_node_set_raw& ns, xml_node_struct* n, xpath_allocator* alloc, bool once, T) 9710 { 9711 const axis_t axis = T::axis; 9712 9713 switch (axis) 9714 { 9715 case axis_attribute: 9716 { 9717 for (xml_attribute_struct* a = n->first_attribute; a; a = a->next_attribute) 9718 if (step_push(ns, a, n, alloc) & once) 9719 return; 9720 9721 break; 9722 } 9723 9724 case axis_child: 9725 { 9726 for (xml_node_struct* c = n->first_child; c; c = c->next_sibling) 9727 if (step_push(ns, c, alloc) & once) 9728 return; 9729 9730 break; 9731 } 9732 9733 case axis_descendant: 9734 case axis_descendant_or_self: 9735 { 9736 if (axis == axis_descendant_or_self) 9737 if (step_push(ns, n, alloc) & once) 9738 return; 9739 9740 xml_node_struct* cur = n->first_child; 9741 9742 while (cur) 9743 { 9744 if (step_push(ns, cur, alloc) & once) 9745 return; 9746 9747 if (cur->first_child) 9748 cur = cur->first_child; 9749 else 9750 { 9751 while (!cur->next_sibling) 9752 { 9753 cur = cur->parent; 9754 9755 if (cur == n) return; 9756 } 9757 9758 cur = cur->next_sibling; 9759 } 9760 } 9761 9762 break; 9763 } 9764 9765 case axis_following_sibling: 9766 { 9767 for (xml_node_struct* c = n->next_sibling; c; c = c->next_sibling) 9768 if (step_push(ns, c, alloc) & once) 9769 return; 9770 9771 break; 9772 } 9773 9774 case axis_preceding_sibling: 9775 { 9776 for (xml_node_struct* c = n->prev_sibling_c; c->next_sibling; c = c->prev_sibling_c) 9777 if (step_push(ns, c, alloc) & once) 9778 return; 9779 9780 break; 9781 } 9782 9783 case axis_following: 9784 { 9785 xml_node_struct* cur = n; 9786 9787 // exit from this node so that we don't include descendants 9788 while (!cur->next_sibling) 9789 { 9790 cur = cur->parent; 9791 9792 if (!cur) return; 9793 } 9794 9795 cur = cur->next_sibling; 9796 9797 while (cur) 9798 { 9799 if (step_push(ns, cur, alloc) & once) 9800 return; 9801 9802 if (cur->first_child) 9803 cur = cur->first_child; 9804 else 9805 { 9806 while (!cur->next_sibling) 9807 { 9808 cur = cur->parent; 9809 9810 if (!cur) return; 9811 } 9812 9813 cur = cur->next_sibling; 9814 } 9815 } 9816 9817 break; 9818 } 9819 9820 case axis_preceding: 9821 { 9822 xml_node_struct* cur = n; 9823 9824 // exit from this node so that we don't include descendants 9825 while (!cur->prev_sibling_c->next_sibling) 9826 { 9827 cur = cur->parent; 9828 9829 if (!cur) return; 9830 } 9831 9832 cur = cur->prev_sibling_c; 9833 9834 while (cur) 9835 { 9836 if (cur->first_child) 9837 cur = cur->first_child->prev_sibling_c; 9838 else 9839 { 9840 // leaf node, can't be ancestor 9841 if (step_push(ns, cur, alloc) & once) 9842 return; 9843 9844 while (!cur->prev_sibling_c->next_sibling) 9845 { 9846 cur = cur->parent; 9847 9848 if (!cur) return; 9849 9850 if (!node_is_ancestor(cur, n)) 9851 if (step_push(ns, cur, alloc) & once) 9852 return; 9853 } 9854 9855 cur = cur->prev_sibling_c; 9856 } 9857 } 9858 9859 break; 9860 } 9861 9862 case axis_ancestor: 9863 case axis_ancestor_or_self: 9864 { 9865 if (axis == axis_ancestor_or_self) 9866 if (step_push(ns, n, alloc) & once) 9867 return; 9868 9869 xml_node_struct* cur = n->parent; 9870 9871 while (cur) 9872 { 9873 if (step_push(ns, cur, alloc) & once) 9874 return; 9875 9876 cur = cur->parent; 9877 } 9878 9879 break; 9880 } 9881 9882 case axis_self: 9883 { 9884 step_push(ns, n, alloc); 9885 9886 break; 9887 } 9888 9889 case axis_parent: 9890 { 9891 if (n->parent) 9892 step_push(ns, n->parent, alloc); 9893 9894 break; 9895 } 9896 9897 default: 9898 assert(false && "Unimplemented axis"); 9899 } 9900 } 9901 step_fill(xpath_node_set_raw & ns,xml_attribute_struct * a,xml_node_struct * p,xpath_allocator * alloc,bool once,T v)9902 template <class T> void step_fill(xpath_node_set_raw& ns, xml_attribute_struct* a, xml_node_struct* p, xpath_allocator* alloc, bool once, T v) 9903 { 9904 const axis_t axis = T::axis; 9905 9906 switch (axis) 9907 { 9908 case axis_ancestor: 9909 case axis_ancestor_or_self: 9910 { 9911 if (axis == axis_ancestor_or_self && _test == nodetest_type_node) // reject attributes based on principal node type test 9912 if (step_push(ns, a, p, alloc) & once) 9913 return; 9914 9915 xml_node_struct* cur = p; 9916 9917 while (cur) 9918 { 9919 if (step_push(ns, cur, alloc) & once) 9920 return; 9921 9922 cur = cur->parent; 9923 } 9924 9925 break; 9926 } 9927 9928 case axis_descendant_or_self: 9929 case axis_self: 9930 { 9931 if (_test == nodetest_type_node) // reject attributes based on principal node type test 9932 step_push(ns, a, p, alloc); 9933 9934 break; 9935 } 9936 9937 case axis_following: 9938 { 9939 xml_node_struct* cur = p; 9940 9941 while (cur) 9942 { 9943 if (cur->first_child) 9944 cur = cur->first_child; 9945 else 9946 { 9947 while (!cur->next_sibling) 9948 { 9949 cur = cur->parent; 9950 9951 if (!cur) return; 9952 } 9953 9954 cur = cur->next_sibling; 9955 } 9956 9957 if (step_push(ns, cur, alloc) & once) 9958 return; 9959 } 9960 9961 break; 9962 } 9963 9964 case axis_parent: 9965 { 9966 step_push(ns, p, alloc); 9967 9968 break; 9969 } 9970 9971 case axis_preceding: 9972 { 9973 // preceding:: axis does not include attribute nodes and attribute ancestors (they are the same as parent's ancestors), so we can reuse node preceding 9974 step_fill(ns, p, alloc, once, v); 9975 break; 9976 } 9977 9978 default: 9979 assert(false && "Unimplemented axis"); 9980 } 9981 } 9982 step_fill(xpath_node_set_raw & ns,const xpath_node & xn,xpath_allocator * alloc,bool once,T v)9983 template <class T> void step_fill(xpath_node_set_raw& ns, const xpath_node& xn, xpath_allocator* alloc, bool once, T v) 9984 { 9985 const axis_t axis = T::axis; 9986 const bool axis_has_attributes = (axis == axis_ancestor || axis == axis_ancestor_or_self || axis == axis_descendant_or_self || axis == axis_following || axis == axis_parent || axis == axis_preceding || axis == axis_self); 9987 9988 if (xn.node()) 9989 step_fill(ns, xn.node().internal_object(), alloc, once, v); 9990 else if (axis_has_attributes && xn.attribute() && xn.parent()) 9991 step_fill(ns, xn.attribute().internal_object(), xn.parent().internal_object(), alloc, once, v); 9992 } 9993 step_do(const xpath_context & c,const xpath_stack & stack,nodeset_eval_t eval,T v)9994 template <class T> xpath_node_set_raw step_do(const xpath_context& c, const xpath_stack& stack, nodeset_eval_t eval, T v) 9995 { 9996 const axis_t axis = T::axis; 9997 const bool axis_reverse = (axis == axis_ancestor || axis == axis_ancestor_or_self || axis == axis_preceding || axis == axis_preceding_sibling); 9998 const xpath_node_set::type_t axis_type = axis_reverse ? xpath_node_set::type_sorted_reverse : xpath_node_set::type_sorted; 9999 10000 bool once = 10001 (axis == axis_attribute && _test == nodetest_name) || 10002 (!_right && eval_once(axis_type, eval)) || 10003 (_right && !_right->_next && _right->_test == predicate_constant_one); 10004 10005 xpath_node_set_raw ns; 10006 ns.set_type(axis_type); 10007 10008 if (_left) 10009 { 10010 xpath_node_set_raw s = _left->eval_node_set(c, stack, nodeset_eval_all); 10011 10012 // self axis preserves the original order 10013 if (axis == axis_self) ns.set_type(s.type()); 10014 10015 for (const xpath_node* it = s.begin(); it != s.end(); ++it) 10016 { 10017 size_t size = ns.size(); 10018 10019 // in general, all axes generate elements in a particular order, but there is no order guarantee if axis is applied to two nodes 10020 if (axis != axis_self && size != 0) ns.set_type(xpath_node_set::type_unsorted); 10021 10022 step_fill(ns, *it, stack.result, once, v); 10023 if (_right) apply_predicates(ns, size, stack, eval); 10024 } 10025 } 10026 else 10027 { 10028 step_fill(ns, c.n, stack.result, once, v); 10029 if (_right) apply_predicates(ns, 0, stack, eval); 10030 } 10031 10032 // child, attribute and self axes always generate unique set of nodes 10033 // for other axis, if the set stayed sorted, it stayed unique because the traversal algorithms do not visit the same node twice 10034 if (axis != axis_child && axis != axis_attribute && axis != axis_self && ns.type() == xpath_node_set::type_unsorted) 10035 ns.remove_duplicates(); 10036 10037 return ns; 10038 } 10039 10040 public: xpath_ast_node(ast_type_t type,xpath_value_type rettype_,const char_t * value)10041 xpath_ast_node(ast_type_t type, xpath_value_type rettype_, const char_t* value): 10042 _type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(0), _right(0), _next(0) 10043 { 10044 assert(type == ast_string_constant); 10045 _data.string = value; 10046 } 10047 xpath_ast_node(ast_type_t type,xpath_value_type rettype_,double value)10048 xpath_ast_node(ast_type_t type, xpath_value_type rettype_, double value): 10049 _type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(0), _right(0), _next(0) 10050 { 10051 assert(type == ast_number_constant); 10052 _data.number = value; 10053 } 10054 xpath_ast_node(ast_type_t type,xpath_value_type rettype_,xpath_variable * value)10055 xpath_ast_node(ast_type_t type, xpath_value_type rettype_, xpath_variable* value): 10056 _type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(0), _right(0), _next(0) 10057 { 10058 assert(type == ast_variable); 10059 _data.variable = value; 10060 } 10061 xpath_ast_node(ast_type_t type,xpath_value_type rettype_,xpath_ast_node * left=0,xpath_ast_node * right=0)10062 xpath_ast_node(ast_type_t type, xpath_value_type rettype_, xpath_ast_node* left = 0, xpath_ast_node* right = 0): 10063 _type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(left), _right(right), _next(0) 10064 { 10065 } 10066 xpath_ast_node(ast_type_t type,xpath_ast_node * left,axis_t axis,nodetest_t test,const char_t * contents)10067 xpath_ast_node(ast_type_t type, xpath_ast_node* left, axis_t axis, nodetest_t test, const char_t* contents): 10068 _type(static_cast<char>(type)), _rettype(xpath_type_node_set), _axis(static_cast<char>(axis)), _test(static_cast<char>(test)), _left(left), _right(0), _next(0) 10069 { 10070 assert(type == ast_step); 10071 _data.nodetest = contents; 10072 } 10073 xpath_ast_node(ast_type_t type,xpath_ast_node * left,xpath_ast_node * right,predicate_t test)10074 xpath_ast_node(ast_type_t type, xpath_ast_node* left, xpath_ast_node* right, predicate_t test): 10075 _type(static_cast<char>(type)), _rettype(xpath_type_node_set), _axis(0), _test(static_cast<char>(test)), _left(left), _right(right), _next(0) 10076 { 10077 assert(type == ast_filter || type == ast_predicate); 10078 } 10079 set_next(xpath_ast_node * value)10080 void set_next(xpath_ast_node* value) 10081 { 10082 _next = value; 10083 } 10084 set_right(xpath_ast_node * value)10085 void set_right(xpath_ast_node* value) 10086 { 10087 _right = value; 10088 } 10089 eval_boolean(const xpath_context & c,const xpath_stack & stack)10090 bool eval_boolean(const xpath_context& c, const xpath_stack& stack) 10091 { 10092 switch (_type) 10093 { 10094 case ast_op_or: 10095 return _left->eval_boolean(c, stack) || _right->eval_boolean(c, stack); 10096 10097 case ast_op_and: 10098 return _left->eval_boolean(c, stack) && _right->eval_boolean(c, stack); 10099 10100 case ast_op_equal: 10101 return compare_eq(_left, _right, c, stack, equal_to()); 10102 10103 case ast_op_not_equal: 10104 return compare_eq(_left, _right, c, stack, not_equal_to()); 10105 10106 case ast_op_less: 10107 return compare_rel(_left, _right, c, stack, less()); 10108 10109 case ast_op_greater: 10110 return compare_rel(_right, _left, c, stack, less()); 10111 10112 case ast_op_less_or_equal: 10113 return compare_rel(_left, _right, c, stack, less_equal()); 10114 10115 case ast_op_greater_or_equal: 10116 return compare_rel(_right, _left, c, stack, less_equal()); 10117 10118 case ast_func_starts_with: 10119 { 10120 xpath_allocator_capture cr(stack.result); 10121 10122 xpath_string lr = _left->eval_string(c, stack); 10123 xpath_string rr = _right->eval_string(c, stack); 10124 10125 return starts_with(lr.c_str(), rr.c_str()); 10126 } 10127 10128 case ast_func_contains: 10129 { 10130 xpath_allocator_capture cr(stack.result); 10131 10132 xpath_string lr = _left->eval_string(c, stack); 10133 xpath_string rr = _right->eval_string(c, stack); 10134 10135 return find_substring(lr.c_str(), rr.c_str()) != 0; 10136 } 10137 10138 case ast_func_boolean: 10139 return _left->eval_boolean(c, stack); 10140 10141 case ast_func_not: 10142 return !_left->eval_boolean(c, stack); 10143 10144 case ast_func_true: 10145 return true; 10146 10147 case ast_func_false: 10148 return false; 10149 10150 case ast_func_lang: 10151 { 10152 if (c.n.attribute()) return false; 10153 10154 xpath_allocator_capture cr(stack.result); 10155 10156 xpath_string lang = _left->eval_string(c, stack); 10157 10158 for (xml_node n = c.n.node(); n; n = n.parent()) 10159 { 10160 xml_attribute a = n.attribute(PUGIXML_TEXT("xml:lang")); 10161 10162 if (a) 10163 { 10164 const char_t* value = a.value(); 10165 10166 // strnicmp / strncasecmp is not portable 10167 for (const char_t* lit = lang.c_str(); *lit; ++lit) 10168 { 10169 if (tolower_ascii(*lit) != tolower_ascii(*value)) return false; 10170 ++value; 10171 } 10172 10173 return *value == 0 || *value == '-'; 10174 } 10175 } 10176 10177 return false; 10178 } 10179 10180 case ast_opt_compare_attribute: 10181 { 10182 const char_t* value = (_right->_type == ast_string_constant) ? _right->_data.string : _right->_data.variable->get_string(); 10183 10184 xml_attribute attr = c.n.node().attribute(_left->_data.nodetest); 10185 10186 return attr && strequal(attr.value(), value) && is_xpath_attribute(attr.name()); 10187 } 10188 10189 case ast_variable: 10190 { 10191 assert(_rettype == _data.variable->type()); 10192 10193 if (_rettype == xpath_type_boolean) 10194 return _data.variable->get_boolean(); 10195 10196 // fallthrough to type conversion 10197 } 10198 10199 default: 10200 { 10201 switch (_rettype) 10202 { 10203 case xpath_type_number: 10204 return convert_number_to_boolean(eval_number(c, stack)); 10205 10206 case xpath_type_string: 10207 { 10208 xpath_allocator_capture cr(stack.result); 10209 10210 return !eval_string(c, stack).empty(); 10211 } 10212 10213 case xpath_type_node_set: 10214 { 10215 xpath_allocator_capture cr(stack.result); 10216 10217 return !eval_node_set(c, stack, nodeset_eval_any).empty(); 10218 } 10219 10220 default: 10221 assert(false && "Wrong expression for return type boolean"); 10222 return false; 10223 } 10224 } 10225 } 10226 } 10227 eval_number(const xpath_context & c,const xpath_stack & stack)10228 double eval_number(const xpath_context& c, const xpath_stack& stack) 10229 { 10230 switch (_type) 10231 { 10232 case ast_op_add: 10233 return _left->eval_number(c, stack) + _right->eval_number(c, stack); 10234 10235 case ast_op_subtract: 10236 return _left->eval_number(c, stack) - _right->eval_number(c, stack); 10237 10238 case ast_op_multiply: 10239 return _left->eval_number(c, stack) * _right->eval_number(c, stack); 10240 10241 case ast_op_divide: 10242 return _left->eval_number(c, stack) / _right->eval_number(c, stack); 10243 10244 case ast_op_mod: 10245 return fmod(_left->eval_number(c, stack), _right->eval_number(c, stack)); 10246 10247 case ast_op_negate: 10248 return -_left->eval_number(c, stack); 10249 10250 case ast_number_constant: 10251 return _data.number; 10252 10253 case ast_func_last: 10254 return static_cast<double>(c.size); 10255 10256 case ast_func_position: 10257 return static_cast<double>(c.position); 10258 10259 case ast_func_count: 10260 { 10261 xpath_allocator_capture cr(stack.result); 10262 10263 return static_cast<double>(_left->eval_node_set(c, stack, nodeset_eval_all).size()); 10264 } 10265 10266 case ast_func_string_length_0: 10267 { 10268 xpath_allocator_capture cr(stack.result); 10269 10270 return static_cast<double>(string_value(c.n, stack.result).length()); 10271 } 10272 10273 case ast_func_string_length_1: 10274 { 10275 xpath_allocator_capture cr(stack.result); 10276 10277 return static_cast<double>(_left->eval_string(c, stack).length()); 10278 } 10279 10280 case ast_func_number_0: 10281 { 10282 xpath_allocator_capture cr(stack.result); 10283 10284 return convert_string_to_number(string_value(c.n, stack.result).c_str()); 10285 } 10286 10287 case ast_func_number_1: 10288 return _left->eval_number(c, stack); 10289 10290 case ast_func_sum: 10291 { 10292 xpath_allocator_capture cr(stack.result); 10293 10294 double r = 0; 10295 10296 xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_all); 10297 10298 for (const xpath_node* it = ns.begin(); it != ns.end(); ++it) 10299 { 10300 xpath_allocator_capture cri(stack.result); 10301 10302 r += convert_string_to_number(string_value(*it, stack.result).c_str()); 10303 } 10304 10305 return r; 10306 } 10307 10308 case ast_func_floor: 10309 { 10310 double r = _left->eval_number(c, stack); 10311 10312 return r == r ? floor(r) : r; 10313 } 10314 10315 case ast_func_ceiling: 10316 { 10317 double r = _left->eval_number(c, stack); 10318 10319 return r == r ? ceil(r) : r; 10320 } 10321 10322 case ast_func_round: 10323 return round_nearest_nzero(_left->eval_number(c, stack)); 10324 10325 case ast_variable: 10326 { 10327 assert(_rettype == _data.variable->type()); 10328 10329 if (_rettype == xpath_type_number) 10330 return _data.variable->get_number(); 10331 10332 // fallthrough to type conversion 10333 } 10334 10335 default: 10336 { 10337 switch (_rettype) 10338 { 10339 case xpath_type_boolean: 10340 return eval_boolean(c, stack) ? 1 : 0; 10341 10342 case xpath_type_string: 10343 { 10344 xpath_allocator_capture cr(stack.result); 10345 10346 return convert_string_to_number(eval_string(c, stack).c_str()); 10347 } 10348 10349 case xpath_type_node_set: 10350 { 10351 xpath_allocator_capture cr(stack.result); 10352 10353 return convert_string_to_number(eval_string(c, stack).c_str()); 10354 } 10355 10356 default: 10357 assert(false && "Wrong expression for return type number"); 10358 return 0; 10359 } 10360 10361 } 10362 } 10363 } 10364 eval_string_concat(const xpath_context & c,const xpath_stack & stack)10365 xpath_string eval_string_concat(const xpath_context& c, const xpath_stack& stack) 10366 { 10367 assert(_type == ast_func_concat); 10368 10369 xpath_allocator_capture ct(stack.temp); 10370 10371 // count the string number 10372 size_t count = 1; 10373 for (xpath_ast_node* nc = _right; nc; nc = nc->_next) count++; 10374 10375 // gather all strings 10376 xpath_string static_buffer[4]; 10377 xpath_string* buffer = static_buffer; 10378 10379 // allocate on-heap for large concats 10380 if (count > sizeof(static_buffer) / sizeof(static_buffer[0])) 10381 { 10382 buffer = static_cast<xpath_string*>(stack.temp->allocate(count * sizeof(xpath_string))); 10383 assert(buffer); 10384 } 10385 10386 // evaluate all strings to temporary stack 10387 xpath_stack swapped_stack = {stack.temp, stack.result}; 10388 10389 buffer[0] = _left->eval_string(c, swapped_stack); 10390 10391 size_t pos = 1; 10392 for (xpath_ast_node* n = _right; n; n = n->_next, ++pos) buffer[pos] = n->eval_string(c, swapped_stack); 10393 assert(pos == count); 10394 10395 // get total length 10396 size_t length = 0; 10397 for (size_t i = 0; i < count; ++i) length += buffer[i].length(); 10398 10399 // create final string 10400 char_t* result = static_cast<char_t*>(stack.result->allocate((length + 1) * sizeof(char_t))); 10401 assert(result); 10402 10403 char_t* ri = result; 10404 10405 for (size_t j = 0; j < count; ++j) 10406 for (const char_t* bi = buffer[j].c_str(); *bi; ++bi) 10407 *ri++ = *bi; 10408 10409 *ri = 0; 10410 10411 return xpath_string::from_heap_preallocated(result, ri); 10412 } 10413 eval_string(const xpath_context & c,const xpath_stack & stack)10414 xpath_string eval_string(const xpath_context& c, const xpath_stack& stack) 10415 { 10416 switch (_type) 10417 { 10418 case ast_string_constant: 10419 return xpath_string::from_const(_data.string); 10420 10421 case ast_func_local_name_0: 10422 { 10423 xpath_node na = c.n; 10424 10425 return xpath_string::from_const(local_name(na)); 10426 } 10427 10428 case ast_func_local_name_1: 10429 { 10430 xpath_allocator_capture cr(stack.result); 10431 10432 xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_first); 10433 xpath_node na = ns.first(); 10434 10435 return xpath_string::from_const(local_name(na)); 10436 } 10437 10438 case ast_func_name_0: 10439 { 10440 xpath_node na = c.n; 10441 10442 return xpath_string::from_const(qualified_name(na)); 10443 } 10444 10445 case ast_func_name_1: 10446 { 10447 xpath_allocator_capture cr(stack.result); 10448 10449 xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_first); 10450 xpath_node na = ns.first(); 10451 10452 return xpath_string::from_const(qualified_name(na)); 10453 } 10454 10455 case ast_func_namespace_uri_0: 10456 { 10457 xpath_node na = c.n; 10458 10459 return xpath_string::from_const(namespace_uri(na)); 10460 } 10461 10462 case ast_func_namespace_uri_1: 10463 { 10464 xpath_allocator_capture cr(stack.result); 10465 10466 xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_first); 10467 xpath_node na = ns.first(); 10468 10469 return xpath_string::from_const(namespace_uri(na)); 10470 } 10471 10472 case ast_func_string_0: 10473 return string_value(c.n, stack.result); 10474 10475 case ast_func_string_1: 10476 return _left->eval_string(c, stack); 10477 10478 case ast_func_concat: 10479 return eval_string_concat(c, stack); 10480 10481 case ast_func_substring_before: 10482 { 10483 xpath_allocator_capture cr(stack.temp); 10484 10485 xpath_stack swapped_stack = {stack.temp, stack.result}; 10486 10487 xpath_string s = _left->eval_string(c, swapped_stack); 10488 xpath_string p = _right->eval_string(c, swapped_stack); 10489 10490 const char_t* pos = find_substring(s.c_str(), p.c_str()); 10491 10492 return pos ? xpath_string::from_heap(s.c_str(), pos, stack.result) : xpath_string(); 10493 } 10494 10495 case ast_func_substring_after: 10496 { 10497 xpath_allocator_capture cr(stack.temp); 10498 10499 xpath_stack swapped_stack = {stack.temp, stack.result}; 10500 10501 xpath_string s = _left->eval_string(c, swapped_stack); 10502 xpath_string p = _right->eval_string(c, swapped_stack); 10503 10504 const char_t* pos = find_substring(s.c_str(), p.c_str()); 10505 if (!pos) return xpath_string(); 10506 10507 const char_t* rbegin = pos + p.length(); 10508 const char_t* rend = s.c_str() + s.length(); 10509 10510 return s.uses_heap() ? xpath_string::from_heap(rbegin, rend, stack.result) : xpath_string::from_const(rbegin); 10511 } 10512 10513 case ast_func_substring_2: 10514 { 10515 xpath_allocator_capture cr(stack.temp); 10516 10517 xpath_stack swapped_stack = {stack.temp, stack.result}; 10518 10519 xpath_string s = _left->eval_string(c, swapped_stack); 10520 size_t s_length = s.length(); 10521 10522 double first = round_nearest(_right->eval_number(c, stack)); 10523 10524 if (is_nan(first)) return xpath_string(); // NaN 10525 else if (first >= s_length + 1) return xpath_string(); 10526 10527 size_t pos = first < 1 ? 1 : static_cast<size_t>(first); 10528 assert(1 <= pos && pos <= s_length + 1); 10529 10530 const char_t* rbegin = s.c_str() + (pos - 1); 10531 const char_t* rend = s.c_str() + s.length(); 10532 10533 return s.uses_heap() ? xpath_string::from_heap(rbegin, rend, stack.result) : xpath_string::from_const(rbegin); 10534 } 10535 10536 case ast_func_substring_3: 10537 { 10538 xpath_allocator_capture cr(stack.temp); 10539 10540 xpath_stack swapped_stack = {stack.temp, stack.result}; 10541 10542 xpath_string s = _left->eval_string(c, swapped_stack); 10543 size_t s_length = s.length(); 10544 10545 double first = round_nearest(_right->eval_number(c, stack)); 10546 double last = first + round_nearest(_right->_next->eval_number(c, stack)); 10547 10548 if (is_nan(first) || is_nan(last)) return xpath_string(); 10549 else if (first >= s_length + 1) return xpath_string(); 10550 else if (first >= last) return xpath_string(); 10551 else if (last < 1) return xpath_string(); 10552 10553 size_t pos = first < 1 ? 1 : static_cast<size_t>(first); 10554 size_t end = last >= s_length + 1 ? s_length + 1 : static_cast<size_t>(last); 10555 10556 assert(1 <= pos && pos <= end && end <= s_length + 1); 10557 const char_t* rbegin = s.c_str() + (pos - 1); 10558 const char_t* rend = s.c_str() + (end - 1); 10559 10560 return (end == s_length + 1 && !s.uses_heap()) ? xpath_string::from_const(rbegin) : xpath_string::from_heap(rbegin, rend, stack.result); 10561 } 10562 10563 case ast_func_normalize_space_0: 10564 { 10565 xpath_string s = string_value(c.n, stack.result); 10566 10567 char_t* begin = s.data(stack.result); 10568 char_t* end = normalize_space(begin); 10569 10570 return xpath_string::from_heap_preallocated(begin, end); 10571 } 10572 10573 case ast_func_normalize_space_1: 10574 { 10575 xpath_string s = _left->eval_string(c, stack); 10576 10577 char_t* begin = s.data(stack.result); 10578 char_t* end = normalize_space(begin); 10579 10580 return xpath_string::from_heap_preallocated(begin, end); 10581 } 10582 10583 case ast_func_translate: 10584 { 10585 xpath_allocator_capture cr(stack.temp); 10586 10587 xpath_stack swapped_stack = {stack.temp, stack.result}; 10588 10589 xpath_string s = _left->eval_string(c, stack); 10590 xpath_string from = _right->eval_string(c, swapped_stack); 10591 xpath_string to = _right->_next->eval_string(c, swapped_stack); 10592 10593 char_t* begin = s.data(stack.result); 10594 char_t* end = translate(begin, from.c_str(), to.c_str(), to.length()); 10595 10596 return xpath_string::from_heap_preallocated(begin, end); 10597 } 10598 10599 case ast_opt_translate_table: 10600 { 10601 xpath_string s = _left->eval_string(c, stack); 10602 10603 char_t* begin = s.data(stack.result); 10604 char_t* end = translate_table(begin, _data.table); 10605 10606 return xpath_string::from_heap_preallocated(begin, end); 10607 } 10608 10609 case ast_variable: 10610 { 10611 assert(_rettype == _data.variable->type()); 10612 10613 if (_rettype == xpath_type_string) 10614 return xpath_string::from_const(_data.variable->get_string()); 10615 10616 // fallthrough to type conversion 10617 } 10618 10619 default: 10620 { 10621 switch (_rettype) 10622 { 10623 case xpath_type_boolean: 10624 return xpath_string::from_const(eval_boolean(c, stack) ? PUGIXML_TEXT("true") : PUGIXML_TEXT("false")); 10625 10626 case xpath_type_number: 10627 return convert_number_to_string(eval_number(c, stack), stack.result); 10628 10629 case xpath_type_node_set: 10630 { 10631 xpath_allocator_capture cr(stack.temp); 10632 10633 xpath_stack swapped_stack = {stack.temp, stack.result}; 10634 10635 xpath_node_set_raw ns = eval_node_set(c, swapped_stack, nodeset_eval_first); 10636 return ns.empty() ? xpath_string() : string_value(ns.first(), stack.result); 10637 } 10638 10639 default: 10640 assert(false && "Wrong expression for return type string"); 10641 return xpath_string(); 10642 } 10643 } 10644 } 10645 } 10646 eval_node_set(const xpath_context & c,const xpath_stack & stack,nodeset_eval_t eval)10647 xpath_node_set_raw eval_node_set(const xpath_context& c, const xpath_stack& stack, nodeset_eval_t eval) 10648 { 10649 switch (_type) 10650 { 10651 case ast_op_union: 10652 { 10653 xpath_allocator_capture cr(stack.temp); 10654 10655 xpath_stack swapped_stack = {stack.temp, stack.result}; 10656 10657 xpath_node_set_raw ls = _left->eval_node_set(c, swapped_stack, eval); 10658 xpath_node_set_raw rs = _right->eval_node_set(c, stack, eval); 10659 10660 // we can optimize merging two sorted sets, but this is a very rare operation, so don't bother 10661 rs.set_type(xpath_node_set::type_unsorted); 10662 10663 rs.append(ls.begin(), ls.end(), stack.result); 10664 rs.remove_duplicates(); 10665 10666 return rs; 10667 } 10668 10669 case ast_filter: 10670 { 10671 xpath_node_set_raw set = _left->eval_node_set(c, stack, _test == predicate_constant_one ? nodeset_eval_first : nodeset_eval_all); 10672 10673 // either expression is a number or it contains position() call; sort by document order 10674 if (_test != predicate_posinv) set.sort_do(); 10675 10676 bool once = eval_once(set.type(), eval); 10677 10678 apply_predicate(set, 0, stack, once); 10679 10680 return set; 10681 } 10682 10683 case ast_func_id: 10684 return xpath_node_set_raw(); 10685 10686 case ast_step: 10687 { 10688 switch (_axis) 10689 { 10690 case axis_ancestor: 10691 return step_do(c, stack, eval, axis_to_type<axis_ancestor>()); 10692 10693 case axis_ancestor_or_self: 10694 return step_do(c, stack, eval, axis_to_type<axis_ancestor_or_self>()); 10695 10696 case axis_attribute: 10697 return step_do(c, stack, eval, axis_to_type<axis_attribute>()); 10698 10699 case axis_child: 10700 return step_do(c, stack, eval, axis_to_type<axis_child>()); 10701 10702 case axis_descendant: 10703 return step_do(c, stack, eval, axis_to_type<axis_descendant>()); 10704 10705 case axis_descendant_or_self: 10706 return step_do(c, stack, eval, axis_to_type<axis_descendant_or_self>()); 10707 10708 case axis_following: 10709 return step_do(c, stack, eval, axis_to_type<axis_following>()); 10710 10711 case axis_following_sibling: 10712 return step_do(c, stack, eval, axis_to_type<axis_following_sibling>()); 10713 10714 case axis_namespace: 10715 // namespaced axis is not supported 10716 return xpath_node_set_raw(); 10717 10718 case axis_parent: 10719 return step_do(c, stack, eval, axis_to_type<axis_parent>()); 10720 10721 case axis_preceding: 10722 return step_do(c, stack, eval, axis_to_type<axis_preceding>()); 10723 10724 case axis_preceding_sibling: 10725 return step_do(c, stack, eval, axis_to_type<axis_preceding_sibling>()); 10726 10727 case axis_self: 10728 return step_do(c, stack, eval, axis_to_type<axis_self>()); 10729 10730 default: 10731 assert(false && "Unknown axis"); 10732 return xpath_node_set_raw(); 10733 } 10734 } 10735 10736 case ast_step_root: 10737 { 10738 assert(!_right); // root step can't have any predicates 10739 10740 xpath_node_set_raw ns; 10741 10742 ns.set_type(xpath_node_set::type_sorted); 10743 10744 if (c.n.node()) ns.push_back(c.n.node().root(), stack.result); 10745 else if (c.n.attribute()) ns.push_back(c.n.parent().root(), stack.result); 10746 10747 return ns; 10748 } 10749 10750 case ast_variable: 10751 { 10752 assert(_rettype == _data.variable->type()); 10753 10754 if (_rettype == xpath_type_node_set) 10755 { 10756 const xpath_node_set& s = _data.variable->get_node_set(); 10757 10758 xpath_node_set_raw ns; 10759 10760 ns.set_type(s.type()); 10761 ns.append(s.begin(), s.end(), stack.result); 10762 10763 return ns; 10764 } 10765 10766 // fallthrough to type conversion 10767 } 10768 10769 default: 10770 assert(false && "Wrong expression for return type node set"); 10771 return xpath_node_set_raw(); 10772 } 10773 } 10774 optimize(xpath_allocator * alloc)10775 void optimize(xpath_allocator* alloc) 10776 { 10777 if (_left) 10778 _left->optimize(alloc); 10779 10780 if (_right) 10781 _right->optimize(alloc); 10782 10783 if (_next) 10784 _next->optimize(alloc); 10785 10786 optimize_self(alloc); 10787 } 10788 optimize_self(xpath_allocator * alloc)10789 void optimize_self(xpath_allocator* alloc) 10790 { 10791 // Rewrite [position()=expr] with [expr] 10792 // Note that this step has to go before classification to recognize [position()=1] 10793 if ((_type == ast_filter || _type == ast_predicate) && 10794 _right->_type == ast_op_equal && _right->_left->_type == ast_func_position && _right->_right->_rettype == xpath_type_number) 10795 { 10796 _right = _right->_right; 10797 } 10798 10799 // Classify filter/predicate ops to perform various optimizations during evaluation 10800 if (_type == ast_filter || _type == ast_predicate) 10801 { 10802 assert(_test == predicate_default); 10803 10804 if (_right->_type == ast_number_constant && _right->_data.number == 1.0) 10805 _test = predicate_constant_one; 10806 else if (_right->_rettype == xpath_type_number && (_right->_type == ast_number_constant || _right->_type == ast_variable || _right->_type == ast_func_last)) 10807 _test = predicate_constant; 10808 else if (_right->_rettype != xpath_type_number && _right->is_posinv_expr()) 10809 _test = predicate_posinv; 10810 } 10811 10812 // Rewrite descendant-or-self::node()/child::foo with descendant::foo 10813 // The former is a full form of //foo, the latter is much faster since it executes the node test immediately 10814 // Do a similar kind of rewrite for self/descendant/descendant-or-self axes 10815 // Note that we only rewrite positionally invariant steps (//foo[1] != /descendant::foo[1]) 10816 if (_type == ast_step && (_axis == axis_child || _axis == axis_self || _axis == axis_descendant || _axis == axis_descendant_or_self) && _left && 10817 _left->_type == ast_step && _left->_axis == axis_descendant_or_self && _left->_test == nodetest_type_node && !_left->_right && 10818 is_posinv_step()) 10819 { 10820 if (_axis == axis_child || _axis == axis_descendant) 10821 _axis = axis_descendant; 10822 else 10823 _axis = axis_descendant_or_self; 10824 10825 _left = _left->_left; 10826 } 10827 10828 // Use optimized lookup table implementation for translate() with constant arguments 10829 if (_type == ast_func_translate && _right->_type == ast_string_constant && _right->_next->_type == ast_string_constant) 10830 { 10831 unsigned char* table = translate_table_generate(alloc, _right->_data.string, _right->_next->_data.string); 10832 10833 if (table) 10834 { 10835 _type = ast_opt_translate_table; 10836 _data.table = table; 10837 } 10838 } 10839 10840 // Use optimized path for @attr = 'value' or @attr = $value 10841 if (_type == ast_op_equal && 10842 _left->_type == ast_step && _left->_axis == axis_attribute && _left->_test == nodetest_name && !_left->_left && !_left->_right && 10843 (_right->_type == ast_string_constant || (_right->_type == ast_variable && _right->_rettype == xpath_type_string))) 10844 { 10845 _type = ast_opt_compare_attribute; 10846 } 10847 } 10848 is_posinv_expr() const10849 bool is_posinv_expr() const 10850 { 10851 switch (_type) 10852 { 10853 case ast_func_position: 10854 case ast_func_last: 10855 return false; 10856 10857 case ast_string_constant: 10858 case ast_number_constant: 10859 case ast_variable: 10860 return true; 10861 10862 case ast_step: 10863 case ast_step_root: 10864 return true; 10865 10866 case ast_predicate: 10867 case ast_filter: 10868 return true; 10869 10870 default: 10871 if (_left && !_left->is_posinv_expr()) return false; 10872 10873 for (xpath_ast_node* n = _right; n; n = n->_next) 10874 if (!n->is_posinv_expr()) return false; 10875 10876 return true; 10877 } 10878 } 10879 is_posinv_step() const10880 bool is_posinv_step() const 10881 { 10882 assert(_type == ast_step); 10883 10884 for (xpath_ast_node* n = _right; n; n = n->_next) 10885 { 10886 assert(n->_type == ast_predicate); 10887 10888 if (n->_test != predicate_posinv) 10889 return false; 10890 } 10891 10892 return true; 10893 } 10894 rettype() const10895 xpath_value_type rettype() const 10896 { 10897 return static_cast<xpath_value_type>(_rettype); 10898 } 10899 }; 10900 10901 struct xpath_parser 10902 { 10903 xpath_allocator* _alloc; 10904 xpath_lexer _lexer; 10905 10906 const char_t* _query; 10907 xpath_variable_set* _variables; 10908 10909 xpath_parse_result* _result; 10910 10911 char_t _scratch[32]; 10912 10913 #ifdef PUGIXML_NO_EXCEPTIONS 10914 jmp_buf _error_handler; 10915 #endif 10916 throw_errorxpath_parser10917 void throw_error(const char* message) 10918 { 10919 _result->error = message; 10920 _result->offset = _lexer.current_pos() - _query; 10921 10922 #ifdef PUGIXML_NO_EXCEPTIONS 10923 longjmp(_error_handler, 1); 10924 #else 10925 throw xpath_exception(*_result); 10926 #endif 10927 } 10928 throw_error_oomxpath_parser10929 void throw_error_oom() 10930 { 10931 #ifdef PUGIXML_NO_EXCEPTIONS 10932 throw_error("Out of memory"); 10933 #else 10934 throw std::bad_alloc(); 10935 #endif 10936 } 10937 alloc_nodexpath_parser10938 void* alloc_node() 10939 { 10940 void* result = _alloc->allocate_nothrow(sizeof(xpath_ast_node)); 10941 10942 if (!result) throw_error_oom(); 10943 10944 return result; 10945 } 10946 alloc_stringxpath_parser10947 const char_t* alloc_string(const xpath_lexer_string& value) 10948 { 10949 if (value.begin) 10950 { 10951 size_t length = static_cast<size_t>(value.end - value.begin); 10952 10953 char_t* c = static_cast<char_t*>(_alloc->allocate_nothrow((length + 1) * sizeof(char_t))); 10954 if (!c) throw_error_oom(); 10955 assert(c); // workaround for clang static analysis 10956 10957 memcpy(c, value.begin, length * sizeof(char_t)); 10958 c[length] = 0; 10959 10960 return c; 10961 } 10962 else return 0; 10963 } 10964 parse_function_helperxpath_parser10965 xpath_ast_node* parse_function_helper(ast_type_t type0, ast_type_t type1, size_t argc, xpath_ast_node* args[2]) 10966 { 10967 assert(argc <= 1); 10968 10969 if (argc == 1 && args[0]->rettype() != xpath_type_node_set) 10970 throw_error("Function has to be applied to node set"); 10971 10972 return new (alloc_node()) xpath_ast_node(argc == 0 ? type0 : type1, xpath_type_string, args[0]); 10973 } 10974 parse_functionxpath_parser10975 xpath_ast_node* parse_function(const xpath_lexer_string& name, size_t argc, xpath_ast_node* args[2]) 10976 { 10977 switch (name.begin[0]) 10978 { 10979 case 'b': 10980 if (name == PUGIXML_TEXT("boolean") && argc == 1) 10981 return new (alloc_node()) xpath_ast_node(ast_func_boolean, xpath_type_boolean, args[0]); 10982 10983 break; 10984 10985 case 'c': 10986 if (name == PUGIXML_TEXT("count") && argc == 1) 10987 { 10988 if (args[0]->rettype() != xpath_type_node_set) 10989 throw_error("Function has to be applied to node set"); 10990 10991 return new (alloc_node()) xpath_ast_node(ast_func_count, xpath_type_number, args[0]); 10992 } 10993 else if (name == PUGIXML_TEXT("contains") && argc == 2) 10994 return new (alloc_node()) xpath_ast_node(ast_func_contains, xpath_type_boolean, args[0], args[1]); 10995 else if (name == PUGIXML_TEXT("concat") && argc >= 2) 10996 return new (alloc_node()) xpath_ast_node(ast_func_concat, xpath_type_string, args[0], args[1]); 10997 else if (name == PUGIXML_TEXT("ceiling") && argc == 1) 10998 return new (alloc_node()) xpath_ast_node(ast_func_ceiling, xpath_type_number, args[0]); 10999 11000 break; 11001 11002 case 'f': 11003 if (name == PUGIXML_TEXT("false") && argc == 0) 11004 return new (alloc_node()) xpath_ast_node(ast_func_false, xpath_type_boolean); 11005 else if (name == PUGIXML_TEXT("floor") && argc == 1) 11006 return new (alloc_node()) xpath_ast_node(ast_func_floor, xpath_type_number, args[0]); 11007 11008 break; 11009 11010 case 'i': 11011 if (name == PUGIXML_TEXT("id") && argc == 1) 11012 return new (alloc_node()) xpath_ast_node(ast_func_id, xpath_type_node_set, args[0]); 11013 11014 break; 11015 11016 case 'l': 11017 if (name == PUGIXML_TEXT("last") && argc == 0) 11018 return new (alloc_node()) xpath_ast_node(ast_func_last, xpath_type_number); 11019 else if (name == PUGIXML_TEXT("lang") && argc == 1) 11020 return new (alloc_node()) xpath_ast_node(ast_func_lang, xpath_type_boolean, args[0]); 11021 else if (name == PUGIXML_TEXT("local-name") && argc <= 1) 11022 return parse_function_helper(ast_func_local_name_0, ast_func_local_name_1, argc, args); 11023 11024 break; 11025 11026 case 'n': 11027 if (name == PUGIXML_TEXT("name") && argc <= 1) 11028 return parse_function_helper(ast_func_name_0, ast_func_name_1, argc, args); 11029 else if (name == PUGIXML_TEXT("namespace-uri") && argc <= 1) 11030 return parse_function_helper(ast_func_namespace_uri_0, ast_func_namespace_uri_1, argc, args); 11031 else if (name == PUGIXML_TEXT("normalize-space") && argc <= 1) 11032 return new (alloc_node()) xpath_ast_node(argc == 0 ? ast_func_normalize_space_0 : ast_func_normalize_space_1, xpath_type_string, args[0], args[1]); 11033 else if (name == PUGIXML_TEXT("not") && argc == 1) 11034 return new (alloc_node()) xpath_ast_node(ast_func_not, xpath_type_boolean, args[0]); 11035 else if (name == PUGIXML_TEXT("number") && argc <= 1) 11036 return new (alloc_node()) xpath_ast_node(argc == 0 ? ast_func_number_0 : ast_func_number_1, xpath_type_number, args[0]); 11037 11038 break; 11039 11040 case 'p': 11041 if (name == PUGIXML_TEXT("position") && argc == 0) 11042 return new (alloc_node()) xpath_ast_node(ast_func_position, xpath_type_number); 11043 11044 break; 11045 11046 case 'r': 11047 if (name == PUGIXML_TEXT("round") && argc == 1) 11048 return new (alloc_node()) xpath_ast_node(ast_func_round, xpath_type_number, args[0]); 11049 11050 break; 11051 11052 case 's': 11053 if (name == PUGIXML_TEXT("string") && argc <= 1) 11054 return new (alloc_node()) xpath_ast_node(argc == 0 ? ast_func_string_0 : ast_func_string_1, xpath_type_string, args[0]); 11055 else if (name == PUGIXML_TEXT("string-length") && argc <= 1) 11056 return new (alloc_node()) xpath_ast_node(argc == 0 ? ast_func_string_length_0 : ast_func_string_length_1, xpath_type_number, args[0]); 11057 else if (name == PUGIXML_TEXT("starts-with") && argc == 2) 11058 return new (alloc_node()) xpath_ast_node(ast_func_starts_with, xpath_type_boolean, args[0], args[1]); 11059 else if (name == PUGIXML_TEXT("substring-before") && argc == 2) 11060 return new (alloc_node()) xpath_ast_node(ast_func_substring_before, xpath_type_string, args[0], args[1]); 11061 else if (name == PUGIXML_TEXT("substring-after") && argc == 2) 11062 return new (alloc_node()) xpath_ast_node(ast_func_substring_after, xpath_type_string, args[0], args[1]); 11063 else if (name == PUGIXML_TEXT("substring") && (argc == 2 || argc == 3)) 11064 return new (alloc_node()) xpath_ast_node(argc == 2 ? ast_func_substring_2 : ast_func_substring_3, xpath_type_string, args[0], args[1]); 11065 else if (name == PUGIXML_TEXT("sum") && argc == 1) 11066 { 11067 if (args[0]->rettype() != xpath_type_node_set) throw_error("Function has to be applied to node set"); 11068 return new (alloc_node()) xpath_ast_node(ast_func_sum, xpath_type_number, args[0]); 11069 } 11070 11071 break; 11072 11073 case 't': 11074 if (name == PUGIXML_TEXT("translate") && argc == 3) 11075 return new (alloc_node()) xpath_ast_node(ast_func_translate, xpath_type_string, args[0], args[1]); 11076 else if (name == PUGIXML_TEXT("true") && argc == 0) 11077 return new (alloc_node()) xpath_ast_node(ast_func_true, xpath_type_boolean); 11078 11079 break; 11080 11081 default: 11082 break; 11083 } 11084 11085 throw_error("Unrecognized function or wrong parameter count"); 11086 11087 return 0; 11088 } 11089 parse_axis_namexpath_parser11090 axis_t parse_axis_name(const xpath_lexer_string& name, bool& specified) 11091 { 11092 specified = true; 11093 11094 switch (name.begin[0]) 11095 { 11096 case 'a': 11097 if (name == PUGIXML_TEXT("ancestor")) 11098 return axis_ancestor; 11099 else if (name == PUGIXML_TEXT("ancestor-or-self")) 11100 return axis_ancestor_or_self; 11101 else if (name == PUGIXML_TEXT("attribute")) 11102 return axis_attribute; 11103 11104 break; 11105 11106 case 'c': 11107 if (name == PUGIXML_TEXT("child")) 11108 return axis_child; 11109 11110 break; 11111 11112 case 'd': 11113 if (name == PUGIXML_TEXT("descendant")) 11114 return axis_descendant; 11115 else if (name == PUGIXML_TEXT("descendant-or-self")) 11116 return axis_descendant_or_self; 11117 11118 break; 11119 11120 case 'f': 11121 if (name == PUGIXML_TEXT("following")) 11122 return axis_following; 11123 else if (name == PUGIXML_TEXT("following-sibling")) 11124 return axis_following_sibling; 11125 11126 break; 11127 11128 case 'n': 11129 if (name == PUGIXML_TEXT("namespace")) 11130 return axis_namespace; 11131 11132 break; 11133 11134 case 'p': 11135 if (name == PUGIXML_TEXT("parent")) 11136 return axis_parent; 11137 else if (name == PUGIXML_TEXT("preceding")) 11138 return axis_preceding; 11139 else if (name == PUGIXML_TEXT("preceding-sibling")) 11140 return axis_preceding_sibling; 11141 11142 break; 11143 11144 case 's': 11145 if (name == PUGIXML_TEXT("self")) 11146 return axis_self; 11147 11148 break; 11149 11150 default: 11151 break; 11152 } 11153 11154 specified = false; 11155 return axis_child; 11156 } 11157 parse_node_test_typexpath_parser11158 nodetest_t parse_node_test_type(const xpath_lexer_string& name) 11159 { 11160 switch (name.begin[0]) 11161 { 11162 case 'c': 11163 if (name == PUGIXML_TEXT("comment")) 11164 return nodetest_type_comment; 11165 11166 break; 11167 11168 case 'n': 11169 if (name == PUGIXML_TEXT("node")) 11170 return nodetest_type_node; 11171 11172 break; 11173 11174 case 'p': 11175 if (name == PUGIXML_TEXT("processing-instruction")) 11176 return nodetest_type_pi; 11177 11178 break; 11179 11180 case 't': 11181 if (name == PUGIXML_TEXT("text")) 11182 return nodetest_type_text; 11183 11184 break; 11185 11186 default: 11187 break; 11188 } 11189 11190 return nodetest_none; 11191 } 11192 11193 // PrimaryExpr ::= VariableReference | '(' Expr ')' | Literal | Number | FunctionCall parse_primary_expressionxpath_parser11194 xpath_ast_node* parse_primary_expression() 11195 { 11196 switch (_lexer.current()) 11197 { 11198 case lex_var_ref: 11199 { 11200 xpath_lexer_string name = _lexer.contents(); 11201 11202 if (!_variables) 11203 throw_error("Unknown variable: variable set is not provided"); 11204 11205 xpath_variable* var = 0; 11206 if (!get_variable_scratch(_scratch, _variables, name.begin, name.end, &var)) 11207 throw_error_oom(); 11208 11209 if (!var) 11210 throw_error("Unknown variable: variable set does not contain the given name"); 11211 11212 _lexer.next(); 11213 11214 return new (alloc_node()) xpath_ast_node(ast_variable, var->type(), var); 11215 } 11216 11217 case lex_open_brace: 11218 { 11219 _lexer.next(); 11220 11221 xpath_ast_node* n = parse_expression(); 11222 11223 if (_lexer.current() != lex_close_brace) 11224 throw_error("Unmatched braces"); 11225 11226 _lexer.next(); 11227 11228 return n; 11229 } 11230 11231 case lex_quoted_string: 11232 { 11233 const char_t* value = alloc_string(_lexer.contents()); 11234 11235 xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_string_constant, xpath_type_string, value); 11236 _lexer.next(); 11237 11238 return n; 11239 } 11240 11241 case lex_number: 11242 { 11243 double value = 0; 11244 11245 if (!convert_string_to_number_scratch(_scratch, _lexer.contents().begin, _lexer.contents().end, &value)) 11246 throw_error_oom(); 11247 11248 xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_number_constant, xpath_type_number, value); 11249 _lexer.next(); 11250 11251 return n; 11252 } 11253 11254 case lex_string: 11255 { 11256 xpath_ast_node* args[2] = {0}; 11257 size_t argc = 0; 11258 11259 xpath_lexer_string function = _lexer.contents(); 11260 _lexer.next(); 11261 11262 xpath_ast_node* last_arg = 0; 11263 11264 if (_lexer.current() != lex_open_brace) 11265 throw_error("Unrecognized function call"); 11266 _lexer.next(); 11267 11268 if (_lexer.current() != lex_close_brace) 11269 args[argc++] = parse_expression(); 11270 11271 while (_lexer.current() != lex_close_brace) 11272 { 11273 if (_lexer.current() != lex_comma) 11274 throw_error("No comma between function arguments"); 11275 _lexer.next(); 11276 11277 xpath_ast_node* n = parse_expression(); 11278 11279 if (argc < 2) args[argc] = n; 11280 else last_arg->set_next(n); 11281 11282 argc++; 11283 last_arg = n; 11284 } 11285 11286 _lexer.next(); 11287 11288 return parse_function(function, argc, args); 11289 } 11290 11291 default: 11292 throw_error("Unrecognizable primary expression"); 11293 11294 return 0; 11295 } 11296 } 11297 11298 // FilterExpr ::= PrimaryExpr | FilterExpr Predicate 11299 // Predicate ::= '[' PredicateExpr ']' 11300 // PredicateExpr ::= Expr parse_filter_expressionxpath_parser11301 xpath_ast_node* parse_filter_expression() 11302 { 11303 xpath_ast_node* n = parse_primary_expression(); 11304 11305 while (_lexer.current() == lex_open_square_brace) 11306 { 11307 _lexer.next(); 11308 11309 xpath_ast_node* expr = parse_expression(); 11310 11311 if (n->rettype() != xpath_type_node_set) 11312 throw_error("Predicate has to be applied to node set"); 11313 11314 n = new (alloc_node()) xpath_ast_node(ast_filter, n, expr, predicate_default); 11315 11316 if (_lexer.current() != lex_close_square_brace) 11317 throw_error("Unmatched square brace"); 11318 11319 _lexer.next(); 11320 } 11321 11322 return n; 11323 } 11324 11325 // Step ::= AxisSpecifier NodeTest Predicate* | AbbreviatedStep 11326 // AxisSpecifier ::= AxisName '::' | '@'? 11327 // NodeTest ::= NameTest | NodeType '(' ')' | 'processing-instruction' '(' Literal ')' 11328 // NameTest ::= '*' | NCName ':' '*' | QName 11329 // AbbreviatedStep ::= '.' | '..' parse_stepxpath_parser11330 xpath_ast_node* parse_step(xpath_ast_node* set) 11331 { 11332 if (set && set->rettype() != xpath_type_node_set) 11333 throw_error("Step has to be applied to node set"); 11334 11335 bool axis_specified = false; 11336 axis_t axis = axis_child; // implied child axis 11337 11338 if (_lexer.current() == lex_axis_attribute) 11339 { 11340 axis = axis_attribute; 11341 axis_specified = true; 11342 11343 _lexer.next(); 11344 } 11345 else if (_lexer.current() == lex_dot) 11346 { 11347 _lexer.next(); 11348 11349 return new (alloc_node()) xpath_ast_node(ast_step, set, axis_self, nodetest_type_node, 0); 11350 } 11351 else if (_lexer.current() == lex_double_dot) 11352 { 11353 _lexer.next(); 11354 11355 return new (alloc_node()) xpath_ast_node(ast_step, set, axis_parent, nodetest_type_node, 0); 11356 } 11357 11358 nodetest_t nt_type = nodetest_none; 11359 xpath_lexer_string nt_name; 11360 11361 if (_lexer.current() == lex_string) 11362 { 11363 // node name test 11364 nt_name = _lexer.contents(); 11365 _lexer.next(); 11366 11367 // was it an axis name? 11368 if (_lexer.current() == lex_double_colon) 11369 { 11370 // parse axis name 11371 if (axis_specified) 11372 throw_error("Two axis specifiers in one step"); 11373 11374 axis = parse_axis_name(nt_name, axis_specified); 11375 11376 if (!axis_specified) 11377 throw_error("Unknown axis"); 11378 11379 // read actual node test 11380 _lexer.next(); 11381 11382 if (_lexer.current() == lex_multiply) 11383 { 11384 nt_type = nodetest_all; 11385 nt_name = xpath_lexer_string(); 11386 _lexer.next(); 11387 } 11388 else if (_lexer.current() == lex_string) 11389 { 11390 nt_name = _lexer.contents(); 11391 _lexer.next(); 11392 } 11393 else throw_error("Unrecognized node test"); 11394 } 11395 11396 if (nt_type == nodetest_none) 11397 { 11398 // node type test or processing-instruction 11399 if (_lexer.current() == lex_open_brace) 11400 { 11401 _lexer.next(); 11402 11403 if (_lexer.current() == lex_close_brace) 11404 { 11405 _lexer.next(); 11406 11407 nt_type = parse_node_test_type(nt_name); 11408 11409 if (nt_type == nodetest_none) 11410 throw_error("Unrecognized node type"); 11411 11412 nt_name = xpath_lexer_string(); 11413 } 11414 else if (nt_name == PUGIXML_TEXT("processing-instruction")) 11415 { 11416 if (_lexer.current() != lex_quoted_string) 11417 throw_error("Only literals are allowed as arguments to processing-instruction()"); 11418 11419 nt_type = nodetest_pi; 11420 nt_name = _lexer.contents(); 11421 _lexer.next(); 11422 11423 if (_lexer.current() != lex_close_brace) 11424 throw_error("Unmatched brace near processing-instruction()"); 11425 _lexer.next(); 11426 } 11427 else 11428 { 11429 throw_error("Unmatched brace near node type test"); 11430 } 11431 } 11432 // QName or NCName:* 11433 else 11434 { 11435 if (nt_name.end - nt_name.begin > 2 && nt_name.end[-2] == ':' && nt_name.end[-1] == '*') // NCName:* 11436 { 11437 nt_name.end--; // erase * 11438 11439 nt_type = nodetest_all_in_namespace; 11440 } 11441 else 11442 { 11443 nt_type = nodetest_name; 11444 } 11445 } 11446 } 11447 } 11448 else if (_lexer.current() == lex_multiply) 11449 { 11450 nt_type = nodetest_all; 11451 _lexer.next(); 11452 } 11453 else 11454 { 11455 throw_error("Unrecognized node test"); 11456 } 11457 11458 const char_t* nt_name_copy = alloc_string(nt_name); 11459 xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_step, set, axis, nt_type, nt_name_copy); 11460 11461 xpath_ast_node* last = 0; 11462 11463 while (_lexer.current() == lex_open_square_brace) 11464 { 11465 _lexer.next(); 11466 11467 xpath_ast_node* expr = parse_expression(); 11468 11469 xpath_ast_node* pred = new (alloc_node()) xpath_ast_node(ast_predicate, 0, expr, predicate_default); 11470 11471 if (_lexer.current() != lex_close_square_brace) 11472 throw_error("Unmatched square brace"); 11473 _lexer.next(); 11474 11475 if (last) last->set_next(pred); 11476 else n->set_right(pred); 11477 11478 last = pred; 11479 } 11480 11481 return n; 11482 } 11483 11484 // RelativeLocationPath ::= Step | RelativeLocationPath '/' Step | RelativeLocationPath '//' Step parse_relative_location_pathxpath_parser11485 xpath_ast_node* parse_relative_location_path(xpath_ast_node* set) 11486 { 11487 xpath_ast_node* n = parse_step(set); 11488 11489 while (_lexer.current() == lex_slash || _lexer.current() == lex_double_slash) 11490 { 11491 lexeme_t l = _lexer.current(); 11492 _lexer.next(); 11493 11494 if (l == lex_double_slash) 11495 n = new (alloc_node()) xpath_ast_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0); 11496 11497 n = parse_step(n); 11498 } 11499 11500 return n; 11501 } 11502 11503 // LocationPath ::= RelativeLocationPath | AbsoluteLocationPath 11504 // AbsoluteLocationPath ::= '/' RelativeLocationPath? | '//' RelativeLocationPath parse_location_pathxpath_parser11505 xpath_ast_node* parse_location_path() 11506 { 11507 if (_lexer.current() == lex_slash) 11508 { 11509 _lexer.next(); 11510 11511 xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_step_root, xpath_type_node_set); 11512 11513 // relative location path can start from axis_attribute, dot, double_dot, multiply and string lexemes; any other lexeme means standalone root path 11514 lexeme_t l = _lexer.current(); 11515 11516 if (l == lex_string || l == lex_axis_attribute || l == lex_dot || l == lex_double_dot || l == lex_multiply) 11517 return parse_relative_location_path(n); 11518 else 11519 return n; 11520 } 11521 else if (_lexer.current() == lex_double_slash) 11522 { 11523 _lexer.next(); 11524 11525 xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_step_root, xpath_type_node_set); 11526 n = new (alloc_node()) xpath_ast_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0); 11527 11528 return parse_relative_location_path(n); 11529 } 11530 11531 // else clause moved outside of if because of bogus warning 'control may reach end of non-void function being inlined' in gcc 4.0.1 11532 return parse_relative_location_path(0); 11533 } 11534 11535 // PathExpr ::= LocationPath 11536 // | FilterExpr 11537 // | FilterExpr '/' RelativeLocationPath 11538 // | FilterExpr '//' RelativeLocationPath 11539 // UnionExpr ::= PathExpr | UnionExpr '|' PathExpr 11540 // UnaryExpr ::= UnionExpr | '-' UnaryExpr parse_path_or_unary_expressionxpath_parser11541 xpath_ast_node* parse_path_or_unary_expression() 11542 { 11543 // Clarification. 11544 // PathExpr begins with either LocationPath or FilterExpr. 11545 // FilterExpr begins with PrimaryExpr 11546 // PrimaryExpr begins with '$' in case of it being a variable reference, 11547 // '(' in case of it being an expression, string literal, number constant or 11548 // function call. 11549 11550 if (_lexer.current() == lex_var_ref || _lexer.current() == lex_open_brace || 11551 _lexer.current() == lex_quoted_string || _lexer.current() == lex_number || 11552 _lexer.current() == lex_string) 11553 { 11554 if (_lexer.current() == lex_string) 11555 { 11556 // This is either a function call, or not - if not, we shall proceed with location path 11557 const char_t* state = _lexer.state(); 11558 11559 while (PUGI__IS_CHARTYPE(*state, ct_space)) ++state; 11560 11561 if (*state != '(') return parse_location_path(); 11562 11563 // This looks like a function call; however this still can be a node-test. Check it. 11564 if (parse_node_test_type(_lexer.contents()) != nodetest_none) 11565 return parse_location_path(); 11566 } 11567 11568 xpath_ast_node* n = parse_filter_expression(); 11569 11570 if (_lexer.current() == lex_slash || _lexer.current() == lex_double_slash) 11571 { 11572 lexeme_t l = _lexer.current(); 11573 _lexer.next(); 11574 11575 if (l == lex_double_slash) 11576 { 11577 if (n->rettype() != xpath_type_node_set) 11578 throw_error("Step has to be applied to node set"); 11579 11580 n = new (alloc_node()) xpath_ast_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0); 11581 } 11582 11583 // select from location path 11584 return parse_relative_location_path(n); 11585 } 11586 11587 return n; 11588 } 11589 else if (_lexer.current() == lex_minus) 11590 { 11591 _lexer.next(); 11592 11593 // precedence 7+ - only parses union expressions 11594 xpath_ast_node* expr = parse_expression_rec(parse_path_or_unary_expression(), 7); 11595 11596 return new (alloc_node()) xpath_ast_node(ast_op_negate, xpath_type_number, expr); 11597 } 11598 else 11599 { 11600 return parse_location_path(); 11601 } 11602 } 11603 11604 struct binary_op_t 11605 { 11606 ast_type_t asttype; 11607 xpath_value_type rettype; 11608 int precedence; 11609 binary_op_txpath_parser::binary_op_t11610 binary_op_t(): asttype(ast_unknown), rettype(xpath_type_none), precedence(0) 11611 { 11612 } 11613 binary_op_txpath_parser::binary_op_t11614 binary_op_t(ast_type_t asttype_, xpath_value_type rettype_, int precedence_): asttype(asttype_), rettype(rettype_), precedence(precedence_) 11615 { 11616 } 11617 parsexpath_parser::binary_op_t11618 static binary_op_t parse(xpath_lexer& lexer) 11619 { 11620 switch (lexer.current()) 11621 { 11622 case lex_string: 11623 if (lexer.contents() == PUGIXML_TEXT("or")) 11624 return binary_op_t(ast_op_or, xpath_type_boolean, 1); 11625 else if (lexer.contents() == PUGIXML_TEXT("and")) 11626 return binary_op_t(ast_op_and, xpath_type_boolean, 2); 11627 else if (lexer.contents() == PUGIXML_TEXT("div")) 11628 return binary_op_t(ast_op_divide, xpath_type_number, 6); 11629 else if (lexer.contents() == PUGIXML_TEXT("mod")) 11630 return binary_op_t(ast_op_mod, xpath_type_number, 6); 11631 else 11632 return binary_op_t(); 11633 11634 case lex_equal: 11635 return binary_op_t(ast_op_equal, xpath_type_boolean, 3); 11636 11637 case lex_not_equal: 11638 return binary_op_t(ast_op_not_equal, xpath_type_boolean, 3); 11639 11640 case lex_less: 11641 return binary_op_t(ast_op_less, xpath_type_boolean, 4); 11642 11643 case lex_greater: 11644 return binary_op_t(ast_op_greater, xpath_type_boolean, 4); 11645 11646 case lex_less_or_equal: 11647 return binary_op_t(ast_op_less_or_equal, xpath_type_boolean, 4); 11648 11649 case lex_greater_or_equal: 11650 return binary_op_t(ast_op_greater_or_equal, xpath_type_boolean, 4); 11651 11652 case lex_plus: 11653 return binary_op_t(ast_op_add, xpath_type_number, 5); 11654 11655 case lex_minus: 11656 return binary_op_t(ast_op_subtract, xpath_type_number, 5); 11657 11658 case lex_multiply: 11659 return binary_op_t(ast_op_multiply, xpath_type_number, 6); 11660 11661 case lex_union: 11662 return binary_op_t(ast_op_union, xpath_type_node_set, 7); 11663 11664 default: 11665 return binary_op_t(); 11666 } 11667 } 11668 }; 11669 parse_expression_recxpath_parser11670 xpath_ast_node* parse_expression_rec(xpath_ast_node* lhs, int limit) 11671 { 11672 binary_op_t op = binary_op_t::parse(_lexer); 11673 11674 while (op.asttype != ast_unknown && op.precedence >= limit) 11675 { 11676 _lexer.next(); 11677 11678 xpath_ast_node* rhs = parse_path_or_unary_expression(); 11679 11680 binary_op_t nextop = binary_op_t::parse(_lexer); 11681 11682 while (nextop.asttype != ast_unknown && nextop.precedence > op.precedence) 11683 { 11684 rhs = parse_expression_rec(rhs, nextop.precedence); 11685 11686 nextop = binary_op_t::parse(_lexer); 11687 } 11688 11689 if (op.asttype == ast_op_union && (lhs->rettype() != xpath_type_node_set || rhs->rettype() != xpath_type_node_set)) 11690 throw_error("Union operator has to be applied to node sets"); 11691 11692 lhs = new (alloc_node()) xpath_ast_node(op.asttype, op.rettype, lhs, rhs); 11693 11694 op = binary_op_t::parse(_lexer); 11695 } 11696 11697 return lhs; 11698 } 11699 11700 // Expr ::= OrExpr 11701 // OrExpr ::= AndExpr | OrExpr 'or' AndExpr 11702 // AndExpr ::= EqualityExpr | AndExpr 'and' EqualityExpr 11703 // EqualityExpr ::= RelationalExpr 11704 // | EqualityExpr '=' RelationalExpr 11705 // | EqualityExpr '!=' RelationalExpr 11706 // RelationalExpr ::= AdditiveExpr 11707 // | RelationalExpr '<' AdditiveExpr 11708 // | RelationalExpr '>' AdditiveExpr 11709 // | RelationalExpr '<=' AdditiveExpr 11710 // | RelationalExpr '>=' AdditiveExpr 11711 // AdditiveExpr ::= MultiplicativeExpr 11712 // | AdditiveExpr '+' MultiplicativeExpr 11713 // | AdditiveExpr '-' MultiplicativeExpr 11714 // MultiplicativeExpr ::= UnaryExpr 11715 // | MultiplicativeExpr '*' UnaryExpr 11716 // | MultiplicativeExpr 'div' UnaryExpr 11717 // | MultiplicativeExpr 'mod' UnaryExpr parse_expressionxpath_parser11718 xpath_ast_node* parse_expression() 11719 { 11720 return parse_expression_rec(parse_path_or_unary_expression(), 0); 11721 } 11722 xpath_parserxpath_parser11723 xpath_parser(const char_t* query, xpath_variable_set* variables, xpath_allocator* alloc, xpath_parse_result* result): _alloc(alloc), _lexer(query), _query(query), _variables(variables), _result(result) 11724 { 11725 } 11726 parsexpath_parser11727 xpath_ast_node* parse() 11728 { 11729 xpath_ast_node* result = parse_expression(); 11730 11731 // check if there are unparsed tokens left 11732 if (_lexer.current() != lex_eof) 11733 throw_error("Incorrect query"); 11734 11735 return result; 11736 } 11737 parsexpath_parser11738 static xpath_ast_node* parse(const char_t* query, xpath_variable_set* variables, xpath_allocator* alloc, xpath_parse_result* result) 11739 { 11740 xpath_parser parser(query, variables, alloc, result); 11741 11742 #ifdef PUGIXML_NO_EXCEPTIONS 11743 int error = setjmp(parser._error_handler); 11744 11745 return (error == 0) ? parser.parse() : 0; 11746 #else 11747 return parser.parse(); 11748 #endif 11749 } 11750 }; 11751 11752 struct xpath_query_impl 11753 { createxpath_query_impl11754 static xpath_query_impl* create() 11755 { 11756 void* memory = xml_memory::allocate(sizeof(xpath_query_impl)); 11757 if (!memory) return 0; 11758 11759 return new (memory) xpath_query_impl(); 11760 } 11761 destroyxpath_query_impl11762 static void destroy(xpath_query_impl* impl) 11763 { 11764 // free all allocated pages 11765 impl->alloc.release(); 11766 11767 // free allocator memory (with the first page) 11768 xml_memory::deallocate(impl); 11769 } 11770 xpath_query_implxpath_query_impl11771 xpath_query_impl(): root(0), alloc(&block) 11772 { 11773 block.next = 0; 11774 block.capacity = sizeof(block.data); 11775 } 11776 11777 xpath_ast_node* root; 11778 xpath_allocator alloc; 11779 xpath_memory_block block; 11780 }; 11781 evaluate_string_impl(xpath_query_impl * impl,const xpath_node & n,xpath_stack_data & sd)11782 PUGI__FN xpath_string evaluate_string_impl(xpath_query_impl* impl, const xpath_node& n, xpath_stack_data& sd) 11783 { 11784 if (!impl) return xpath_string(); 11785 11786 #ifdef PUGIXML_NO_EXCEPTIONS 11787 if (setjmp(sd.error_handler)) return xpath_string(); 11788 #endif 11789 11790 xpath_context c(n, 1, 1); 11791 11792 return impl->root->eval_string(c, sd.stack); 11793 } 11794 evaluate_node_set_prepare(xpath_query_impl * impl)11795 PUGI__FN impl::xpath_ast_node* evaluate_node_set_prepare(xpath_query_impl* impl) 11796 { 11797 if (!impl) return 0; 11798 11799 if (impl->root->rettype() != xpath_type_node_set) 11800 { 11801 #ifdef PUGIXML_NO_EXCEPTIONS 11802 return 0; 11803 #else 11804 xpath_parse_result res; 11805 res.error = "Expression does not evaluate to node set"; 11806 11807 throw xpath_exception(res); 11808 #endif 11809 } 11810 11811 return impl->root; 11812 } 11813 PUGI__NS_END 11814 11815 namespace pugi 11816 { 11817 #ifndef PUGIXML_NO_EXCEPTIONS xpath_exception(const xpath_parse_result & result_)11818 PUGI__FN xpath_exception::xpath_exception(const xpath_parse_result& result_): _result(result_) 11819 { 11820 assert(_result.error); 11821 } 11822 what() const11823 PUGI__FN const char* xpath_exception::what() const throw() 11824 { 11825 return _result.error; 11826 } 11827 result() const11828 PUGI__FN const xpath_parse_result& xpath_exception::result() const 11829 { 11830 return _result; 11831 } 11832 #endif 11833 xpath_node()11834 PUGI__FN xpath_node::xpath_node() 11835 { 11836 } 11837 xpath_node(const xml_node & node_)11838 PUGI__FN xpath_node::xpath_node(const xml_node& node_): _node(node_) 11839 { 11840 } 11841 xpath_node(const xml_attribute & attribute_,const xml_node & parent_)11842 PUGI__FN xpath_node::xpath_node(const xml_attribute& attribute_, const xml_node& parent_): _node(attribute_ ? parent_ : xml_node()), _attribute(attribute_) 11843 { 11844 } 11845 node() const11846 PUGI__FN xml_node xpath_node::node() const 11847 { 11848 return _attribute ? xml_node() : _node; 11849 } 11850 attribute() const11851 PUGI__FN xml_attribute xpath_node::attribute() const 11852 { 11853 return _attribute; 11854 } 11855 parent() const11856 PUGI__FN xml_node xpath_node::parent() const 11857 { 11858 return _attribute ? _node : _node.parent(); 11859 } 11860 unspecified_bool_xpath_node(xpath_node ***)11861 PUGI__FN static void unspecified_bool_xpath_node(xpath_node***) 11862 { 11863 } 11864 operator xpath_node::unspecified_bool_type() const11865 PUGI__FN xpath_node::operator xpath_node::unspecified_bool_type() const 11866 { 11867 return (_node || _attribute) ? unspecified_bool_xpath_node : 0; 11868 } 11869 operator !() const11870 PUGI__FN bool xpath_node::operator!() const 11871 { 11872 return !(_node || _attribute); 11873 } 11874 operator ==(const xpath_node & n) const11875 PUGI__FN bool xpath_node::operator==(const xpath_node& n) const 11876 { 11877 return _node == n._node && _attribute == n._attribute; 11878 } 11879 operator !=(const xpath_node & n) const11880 PUGI__FN bool xpath_node::operator!=(const xpath_node& n) const 11881 { 11882 return _node != n._node || _attribute != n._attribute; 11883 } 11884 11885 #ifdef __BORLANDC__ operator &&(const xpath_node & lhs,bool rhs)11886 PUGI__FN bool operator&&(const xpath_node& lhs, bool rhs) 11887 { 11888 return (bool)lhs && rhs; 11889 } 11890 operator ||(const xpath_node & lhs,bool rhs)11891 PUGI__FN bool operator||(const xpath_node& lhs, bool rhs) 11892 { 11893 return (bool)lhs || rhs; 11894 } 11895 #endif 11896 _assign(const_iterator begin_,const_iterator end_,type_t type_)11897 PUGI__FN void xpath_node_set::_assign(const_iterator begin_, const_iterator end_, type_t type_) 11898 { 11899 assert(begin_ <= end_); 11900 11901 size_t size_ = static_cast<size_t>(end_ - begin_); 11902 11903 if (size_ <= 1) 11904 { 11905 // deallocate old buffer 11906 if (_begin != &_storage) impl::xml_memory::deallocate(_begin); 11907 11908 // use internal buffer 11909 if (begin_ != end_) _storage = *begin_; 11910 11911 _begin = &_storage; 11912 _end = &_storage + size_; 11913 _type = type_; 11914 } 11915 else 11916 { 11917 // make heap copy 11918 xpath_node* storage = static_cast<xpath_node*>(impl::xml_memory::allocate(size_ * sizeof(xpath_node))); 11919 11920 if (!storage) 11921 { 11922 #ifdef PUGIXML_NO_EXCEPTIONS 11923 return; 11924 #else 11925 throw std::bad_alloc(); 11926 #endif 11927 } 11928 11929 memcpy(storage, begin_, size_ * sizeof(xpath_node)); 11930 11931 // deallocate old buffer 11932 if (_begin != &_storage) impl::xml_memory::deallocate(_begin); 11933 11934 // finalize 11935 _begin = storage; 11936 _end = storage + size_; 11937 _type = type_; 11938 } 11939 } 11940 11941 #ifdef PUGIXML_HAS_MOVE _move(xpath_node_set & rhs)11942 PUGI__FN void xpath_node_set::_move(xpath_node_set& rhs) 11943 { 11944 _type = rhs._type; 11945 _storage = rhs._storage; 11946 _begin = (rhs._begin == &rhs._storage) ? &_storage : rhs._begin; 11947 _end = _begin + (rhs._end - rhs._begin); 11948 11949 rhs._type = type_unsorted; 11950 rhs._begin = &rhs._storage; 11951 rhs._end = rhs._begin; 11952 } 11953 #endif 11954 xpath_node_set()11955 PUGI__FN xpath_node_set::xpath_node_set(): _type(type_unsorted), _begin(&_storage), _end(&_storage) 11956 { 11957 } 11958 xpath_node_set(const_iterator begin_,const_iterator end_,type_t type_)11959 PUGI__FN xpath_node_set::xpath_node_set(const_iterator begin_, const_iterator end_, type_t type_): _type(type_unsorted), _begin(&_storage), _end(&_storage) 11960 { 11961 _assign(begin_, end_, type_); 11962 } 11963 ~xpath_node_set()11964 PUGI__FN xpath_node_set::~xpath_node_set() 11965 { 11966 if (_begin != &_storage) 11967 impl::xml_memory::deallocate(_begin); 11968 } 11969 xpath_node_set(const xpath_node_set & ns)11970 PUGI__FN xpath_node_set::xpath_node_set(const xpath_node_set& ns): _type(type_unsorted), _begin(&_storage), _end(&_storage) 11971 { 11972 _assign(ns._begin, ns._end, ns._type); 11973 } 11974 operator =(const xpath_node_set & ns)11975 PUGI__FN xpath_node_set& xpath_node_set::operator=(const xpath_node_set& ns) 11976 { 11977 if (this == &ns) return *this; 11978 11979 _assign(ns._begin, ns._end, ns._type); 11980 11981 return *this; 11982 } 11983 11984 #ifdef PUGIXML_HAS_MOVE xpath_node_set(xpath_node_set && rhs)11985 PUGI__FN xpath_node_set::xpath_node_set(xpath_node_set&& rhs): _type(type_unsorted), _begin(&_storage), _end(&_storage) 11986 { 11987 _move(rhs); 11988 } 11989 operator =(xpath_node_set && rhs)11990 PUGI__FN xpath_node_set& xpath_node_set::operator=(xpath_node_set&& rhs) 11991 { 11992 if (this == &rhs) return *this; 11993 11994 if (_begin != &_storage) 11995 impl::xml_memory::deallocate(_begin); 11996 11997 _move(rhs); 11998 11999 return *this; 12000 } 12001 #endif 12002 type() const12003 PUGI__FN xpath_node_set::type_t xpath_node_set::type() const 12004 { 12005 return _type; 12006 } 12007 size() const12008 PUGI__FN size_t xpath_node_set::size() const 12009 { 12010 return _end - _begin; 12011 } 12012 empty() const12013 PUGI__FN bool xpath_node_set::empty() const 12014 { 12015 return _begin == _end; 12016 } 12017 operator [](size_t index) const12018 PUGI__FN const xpath_node& xpath_node_set::operator[](size_t index) const 12019 { 12020 assert(index < size()); 12021 return _begin[index]; 12022 } 12023 begin() const12024 PUGI__FN xpath_node_set::const_iterator xpath_node_set::begin() const 12025 { 12026 return _begin; 12027 } 12028 end() const12029 PUGI__FN xpath_node_set::const_iterator xpath_node_set::end() const 12030 { 12031 return _end; 12032 } 12033 sort(bool reverse)12034 PUGI__FN void xpath_node_set::sort(bool reverse) 12035 { 12036 _type = impl::xpath_sort(_begin, _end, _type, reverse); 12037 } 12038 first() const12039 PUGI__FN xpath_node xpath_node_set::first() const 12040 { 12041 return impl::xpath_first(_begin, _end, _type); 12042 } 12043 xpath_parse_result()12044 PUGI__FN xpath_parse_result::xpath_parse_result(): error("Internal error"), offset(0) 12045 { 12046 } 12047 operator bool() const12048 PUGI__FN xpath_parse_result::operator bool() const 12049 { 12050 return error == 0; 12051 } 12052 description() const12053 PUGI__FN const char* xpath_parse_result::description() const 12054 { 12055 return error ? error : "No error"; 12056 } 12057 xpath_variable(xpath_value_type type_)12058 PUGI__FN xpath_variable::xpath_variable(xpath_value_type type_): _type(type_), _next(0) 12059 { 12060 } 12061 name() const12062 PUGI__FN const char_t* xpath_variable::name() const 12063 { 12064 switch (_type) 12065 { 12066 case xpath_type_node_set: 12067 return static_cast<const impl::xpath_variable_node_set*>(this)->name; 12068 12069 case xpath_type_number: 12070 return static_cast<const impl::xpath_variable_number*>(this)->name; 12071 12072 case xpath_type_string: 12073 return static_cast<const impl::xpath_variable_string*>(this)->name; 12074 12075 case xpath_type_boolean: 12076 return static_cast<const impl::xpath_variable_boolean*>(this)->name; 12077 12078 default: 12079 assert(false && "Invalid variable type"); 12080 return 0; 12081 } 12082 } 12083 type() const12084 PUGI__FN xpath_value_type xpath_variable::type() const 12085 { 12086 return _type; 12087 } 12088 get_boolean() const12089 PUGI__FN bool xpath_variable::get_boolean() const 12090 { 12091 return (_type == xpath_type_boolean) ? static_cast<const impl::xpath_variable_boolean*>(this)->value : false; 12092 } 12093 get_number() const12094 PUGI__FN double xpath_variable::get_number() const 12095 { 12096 return (_type == xpath_type_number) ? static_cast<const impl::xpath_variable_number*>(this)->value : impl::gen_nan(); 12097 } 12098 get_string() const12099 PUGI__FN const char_t* xpath_variable::get_string() const 12100 { 12101 const char_t* value = (_type == xpath_type_string) ? static_cast<const impl::xpath_variable_string*>(this)->value : 0; 12102 return value ? value : PUGIXML_TEXT(""); 12103 } 12104 get_node_set() const12105 PUGI__FN const xpath_node_set& xpath_variable::get_node_set() const 12106 { 12107 return (_type == xpath_type_node_set) ? static_cast<const impl::xpath_variable_node_set*>(this)->value : impl::dummy_node_set; 12108 } 12109 set(bool value)12110 PUGI__FN bool xpath_variable::set(bool value) 12111 { 12112 if (_type != xpath_type_boolean) return false; 12113 12114 static_cast<impl::xpath_variable_boolean*>(this)->value = value; 12115 return true; 12116 } 12117 set(double value)12118 PUGI__FN bool xpath_variable::set(double value) 12119 { 12120 if (_type != xpath_type_number) return false; 12121 12122 static_cast<impl::xpath_variable_number*>(this)->value = value; 12123 return true; 12124 } 12125 set(const char_t * value)12126 PUGI__FN bool xpath_variable::set(const char_t* value) 12127 { 12128 if (_type != xpath_type_string) return false; 12129 12130 impl::xpath_variable_string* var = static_cast<impl::xpath_variable_string*>(this); 12131 12132 // duplicate string 12133 size_t size = (impl::strlength(value) + 1) * sizeof(char_t); 12134 12135 char_t* copy = static_cast<char_t*>(impl::xml_memory::allocate(size)); 12136 if (!copy) return false; 12137 12138 memcpy(copy, value, size); 12139 12140 // replace old string 12141 if (var->value) impl::xml_memory::deallocate(var->value); 12142 var->value = copy; 12143 12144 return true; 12145 } 12146 set(const xpath_node_set & value)12147 PUGI__FN bool xpath_variable::set(const xpath_node_set& value) 12148 { 12149 if (_type != xpath_type_node_set) return false; 12150 12151 static_cast<impl::xpath_variable_node_set*>(this)->value = value; 12152 return true; 12153 } 12154 xpath_variable_set()12155 PUGI__FN xpath_variable_set::xpath_variable_set() 12156 { 12157 for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i) 12158 _data[i] = 0; 12159 } 12160 ~xpath_variable_set()12161 PUGI__FN xpath_variable_set::~xpath_variable_set() 12162 { 12163 for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i) 12164 _destroy(_data[i]); 12165 } 12166 xpath_variable_set(const xpath_variable_set & rhs)12167 PUGI__FN xpath_variable_set::xpath_variable_set(const xpath_variable_set& rhs) 12168 { 12169 for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i) 12170 _data[i] = 0; 12171 12172 _assign(rhs); 12173 } 12174 operator =(const xpath_variable_set & rhs)12175 PUGI__FN xpath_variable_set& xpath_variable_set::operator=(const xpath_variable_set& rhs) 12176 { 12177 if (this == &rhs) return *this; 12178 12179 _assign(rhs); 12180 12181 return *this; 12182 } 12183 12184 #ifdef PUGIXML_HAS_MOVE xpath_variable_set(xpath_variable_set && rhs)12185 PUGI__FN xpath_variable_set::xpath_variable_set(xpath_variable_set&& rhs) 12186 { 12187 for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i) 12188 { 12189 _data[i] = rhs._data[i]; 12190 rhs._data[i] = 0; 12191 } 12192 } 12193 operator =(xpath_variable_set && rhs)12194 PUGI__FN xpath_variable_set& xpath_variable_set::operator=(xpath_variable_set&& rhs) 12195 { 12196 for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i) 12197 { 12198 _destroy(_data[i]); 12199 12200 _data[i] = rhs._data[i]; 12201 rhs._data[i] = 0; 12202 } 12203 12204 return *this; 12205 } 12206 #endif 12207 _assign(const xpath_variable_set & rhs)12208 PUGI__FN void xpath_variable_set::_assign(const xpath_variable_set& rhs) 12209 { 12210 xpath_variable_set temp; 12211 12212 for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i) 12213 if (rhs._data[i] && !_clone(rhs._data[i], &temp._data[i])) 12214 return; 12215 12216 _swap(temp); 12217 } 12218 _swap(xpath_variable_set & rhs)12219 PUGI__FN void xpath_variable_set::_swap(xpath_variable_set& rhs) 12220 { 12221 for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i) 12222 { 12223 xpath_variable* chain = _data[i]; 12224 12225 _data[i] = rhs._data[i]; 12226 rhs._data[i] = chain; 12227 } 12228 } 12229 _find(const char_t * name) const12230 PUGI__FN xpath_variable* xpath_variable_set::_find(const char_t* name) const 12231 { 12232 const size_t hash_size = sizeof(_data) / sizeof(_data[0]); 12233 size_t hash = impl::hash_string(name) % hash_size; 12234 12235 // look for existing variable 12236 for (xpath_variable* var = _data[hash]; var; var = var->_next) 12237 if (impl::strequal(var->name(), name)) 12238 return var; 12239 12240 return 0; 12241 } 12242 _clone(xpath_variable * var,xpath_variable ** out_result)12243 PUGI__FN bool xpath_variable_set::_clone(xpath_variable* var, xpath_variable** out_result) 12244 { 12245 xpath_variable* last = 0; 12246 12247 while (var) 12248 { 12249 // allocate storage for new variable 12250 xpath_variable* nvar = impl::new_xpath_variable(var->_type, var->name()); 12251 if (!nvar) return false; 12252 12253 // link the variable to the result immediately to handle failures gracefully 12254 if (last) 12255 last->_next = nvar; 12256 else 12257 *out_result = nvar; 12258 12259 last = nvar; 12260 12261 // copy the value; this can fail due to out-of-memory conditions 12262 if (!impl::copy_xpath_variable(nvar, var)) return false; 12263 12264 var = var->_next; 12265 } 12266 12267 return true; 12268 } 12269 _destroy(xpath_variable * var)12270 PUGI__FN void xpath_variable_set::_destroy(xpath_variable* var) 12271 { 12272 while (var) 12273 { 12274 xpath_variable* next = var->_next; 12275 12276 impl::delete_xpath_variable(var->_type, var); 12277 12278 var = next; 12279 } 12280 } 12281 add(const char_t * name,xpath_value_type type)12282 PUGI__FN xpath_variable* xpath_variable_set::add(const char_t* name, xpath_value_type type) 12283 { 12284 const size_t hash_size = sizeof(_data) / sizeof(_data[0]); 12285 size_t hash = impl::hash_string(name) % hash_size; 12286 12287 // look for existing variable 12288 for (xpath_variable* var = _data[hash]; var; var = var->_next) 12289 if (impl::strequal(var->name(), name)) 12290 return var->type() == type ? var : 0; 12291 12292 // add new variable 12293 xpath_variable* result = impl::new_xpath_variable(type, name); 12294 12295 if (result) 12296 { 12297 result->_next = _data[hash]; 12298 12299 _data[hash] = result; 12300 } 12301 12302 return result; 12303 } 12304 set(const char_t * name,bool value)12305 PUGI__FN bool xpath_variable_set::set(const char_t* name, bool value) 12306 { 12307 xpath_variable* var = add(name, xpath_type_boolean); 12308 return var ? var->set(value) : false; 12309 } 12310 set(const char_t * name,double value)12311 PUGI__FN bool xpath_variable_set::set(const char_t* name, double value) 12312 { 12313 xpath_variable* var = add(name, xpath_type_number); 12314 return var ? var->set(value) : false; 12315 } 12316 set(const char_t * name,const char_t * value)12317 PUGI__FN bool xpath_variable_set::set(const char_t* name, const char_t* value) 12318 { 12319 xpath_variable* var = add(name, xpath_type_string); 12320 return var ? var->set(value) : false; 12321 } 12322 set(const char_t * name,const xpath_node_set & value)12323 PUGI__FN bool xpath_variable_set::set(const char_t* name, const xpath_node_set& value) 12324 { 12325 xpath_variable* var = add(name, xpath_type_node_set); 12326 return var ? var->set(value) : false; 12327 } 12328 get(const char_t * name)12329 PUGI__FN xpath_variable* xpath_variable_set::get(const char_t* name) 12330 { 12331 return _find(name); 12332 } 12333 get(const char_t * name) const12334 PUGI__FN const xpath_variable* xpath_variable_set::get(const char_t* name) const 12335 { 12336 return _find(name); 12337 } 12338 xpath_query(const char_t * query,xpath_variable_set * variables)12339 PUGI__FN xpath_query::xpath_query(const char_t* query, xpath_variable_set* variables): _impl(0) 12340 { 12341 impl::xpath_query_impl* qimpl = impl::xpath_query_impl::create(); 12342 12343 if (!qimpl) 12344 { 12345 #ifdef PUGIXML_NO_EXCEPTIONS 12346 _result.error = "Out of memory"; 12347 #else 12348 throw std::bad_alloc(); 12349 #endif 12350 } 12351 else 12352 { 12353 using impl::auto_deleter; // MSVC7 workaround 12354 auto_deleter<impl::xpath_query_impl> impl(qimpl, impl::xpath_query_impl::destroy); 12355 12356 qimpl->root = impl::xpath_parser::parse(query, variables, &qimpl->alloc, &_result); 12357 12358 if (qimpl->root) 12359 { 12360 qimpl->root->optimize(&qimpl->alloc); 12361 12362 _impl = impl.release(); 12363 _result.error = 0; 12364 } 12365 } 12366 } 12367 xpath_query()12368 PUGI__FN xpath_query::xpath_query(): _impl(0) 12369 { 12370 } 12371 ~xpath_query()12372 PUGI__FN xpath_query::~xpath_query() 12373 { 12374 if (_impl) 12375 impl::xpath_query_impl::destroy(static_cast<impl::xpath_query_impl*>(_impl)); 12376 } 12377 12378 #ifdef PUGIXML_HAS_MOVE xpath_query(xpath_query && rhs)12379 PUGI__FN xpath_query::xpath_query(xpath_query&& rhs) 12380 { 12381 _impl = rhs._impl; 12382 _result = rhs._result; 12383 rhs._impl = 0; 12384 rhs._result = xpath_parse_result(); 12385 } 12386 operator =(xpath_query && rhs)12387 PUGI__FN xpath_query& xpath_query::operator=(xpath_query&& rhs) 12388 { 12389 if (this == &rhs) return *this; 12390 12391 if (_impl) 12392 impl::xpath_query_impl::destroy(static_cast<impl::xpath_query_impl*>(_impl)); 12393 12394 _impl = rhs._impl; 12395 _result = rhs._result; 12396 rhs._impl = 0; 12397 rhs._result = xpath_parse_result(); 12398 12399 return *this; 12400 } 12401 #endif 12402 return_type() const12403 PUGI__FN xpath_value_type xpath_query::return_type() const 12404 { 12405 if (!_impl) return xpath_type_none; 12406 12407 return static_cast<impl::xpath_query_impl*>(_impl)->root->rettype(); 12408 } 12409 evaluate_boolean(const xpath_node & n) const12410 PUGI__FN bool xpath_query::evaluate_boolean(const xpath_node& n) const 12411 { 12412 if (!_impl) return false; 12413 12414 impl::xpath_context c(n, 1, 1); 12415 impl::xpath_stack_data sd; 12416 12417 #ifdef PUGIXML_NO_EXCEPTIONS 12418 if (setjmp(sd.error_handler)) return false; 12419 #endif 12420 12421 return static_cast<impl::xpath_query_impl*>(_impl)->root->eval_boolean(c, sd.stack); 12422 } 12423 evaluate_number(const xpath_node & n) const12424 PUGI__FN double xpath_query::evaluate_number(const xpath_node& n) const 12425 { 12426 if (!_impl) return impl::gen_nan(); 12427 12428 impl::xpath_context c(n, 1, 1); 12429 impl::xpath_stack_data sd; 12430 12431 #ifdef PUGIXML_NO_EXCEPTIONS 12432 if (setjmp(sd.error_handler)) return impl::gen_nan(); 12433 #endif 12434 12435 return static_cast<impl::xpath_query_impl*>(_impl)->root->eval_number(c, sd.stack); 12436 } 12437 12438 #ifndef PUGIXML_NO_STL evaluate_string(const xpath_node & n) const12439 PUGI__FN string_t xpath_query::evaluate_string(const xpath_node& n) const 12440 { 12441 impl::xpath_stack_data sd; 12442 12443 impl::xpath_string r = impl::evaluate_string_impl(static_cast<impl::xpath_query_impl*>(_impl), n, sd); 12444 12445 return string_t(r.c_str(), r.length()); 12446 } 12447 #endif 12448 evaluate_string(char_t * buffer,size_t capacity,const xpath_node & n) const12449 PUGI__FN size_t xpath_query::evaluate_string(char_t* buffer, size_t capacity, const xpath_node& n) const 12450 { 12451 impl::xpath_stack_data sd; 12452 12453 impl::xpath_string r = impl::evaluate_string_impl(static_cast<impl::xpath_query_impl*>(_impl), n, sd); 12454 12455 size_t full_size = r.length() + 1; 12456 12457 if (capacity > 0) 12458 { 12459 size_t size = (full_size < capacity) ? full_size : capacity; 12460 assert(size > 0); 12461 12462 memcpy(buffer, r.c_str(), (size - 1) * sizeof(char_t)); 12463 buffer[size - 1] = 0; 12464 } 12465 12466 return full_size; 12467 } 12468 evaluate_node_set(const xpath_node & n) const12469 PUGI__FN xpath_node_set xpath_query::evaluate_node_set(const xpath_node& n) const 12470 { 12471 impl::xpath_ast_node* root = impl::evaluate_node_set_prepare(static_cast<impl::xpath_query_impl*>(_impl)); 12472 if (!root) return xpath_node_set(); 12473 12474 impl::xpath_context c(n, 1, 1); 12475 impl::xpath_stack_data sd; 12476 12477 #ifdef PUGIXML_NO_EXCEPTIONS 12478 if (setjmp(sd.error_handler)) return xpath_node_set(); 12479 #endif 12480 12481 impl::xpath_node_set_raw r = root->eval_node_set(c, sd.stack, impl::nodeset_eval_all); 12482 12483 return xpath_node_set(r.begin(), r.end(), r.type()); 12484 } 12485 evaluate_node(const xpath_node & n) const12486 PUGI__FN xpath_node xpath_query::evaluate_node(const xpath_node& n) const 12487 { 12488 impl::xpath_ast_node* root = impl::evaluate_node_set_prepare(static_cast<impl::xpath_query_impl*>(_impl)); 12489 if (!root) return xpath_node(); 12490 12491 impl::xpath_context c(n, 1, 1); 12492 impl::xpath_stack_data sd; 12493 12494 #ifdef PUGIXML_NO_EXCEPTIONS 12495 if (setjmp(sd.error_handler)) return xpath_node(); 12496 #endif 12497 12498 impl::xpath_node_set_raw r = root->eval_node_set(c, sd.stack, impl::nodeset_eval_first); 12499 12500 return r.first(); 12501 } 12502 result() const12503 PUGI__FN const xpath_parse_result& xpath_query::result() const 12504 { 12505 return _result; 12506 } 12507 unspecified_bool_xpath_query(xpath_query ***)12508 PUGI__FN static void unspecified_bool_xpath_query(xpath_query***) 12509 { 12510 } 12511 operator xpath_query::unspecified_bool_type() const12512 PUGI__FN xpath_query::operator xpath_query::unspecified_bool_type() const 12513 { 12514 return _impl ? unspecified_bool_xpath_query : 0; 12515 } 12516 operator !() const12517 PUGI__FN bool xpath_query::operator!() const 12518 { 12519 return !_impl; 12520 } 12521 select_node(const char_t * query,xpath_variable_set * variables) const12522 PUGI__FN xpath_node xml_node::select_node(const char_t* query, xpath_variable_set* variables) const 12523 { 12524 xpath_query q(query, variables); 12525 return select_node(q); 12526 } 12527 select_node(const xpath_query & query) const12528 PUGI__FN xpath_node xml_node::select_node(const xpath_query& query) const 12529 { 12530 return query.evaluate_node(*this); 12531 } 12532 select_nodes(const char_t * query,xpath_variable_set * variables) const12533 PUGI__FN xpath_node_set xml_node::select_nodes(const char_t* query, xpath_variable_set* variables) const 12534 { 12535 xpath_query q(query, variables); 12536 return select_nodes(q); 12537 } 12538 select_nodes(const xpath_query & query) const12539 PUGI__FN xpath_node_set xml_node::select_nodes(const xpath_query& query) const 12540 { 12541 return query.evaluate_node_set(*this); 12542 } 12543 select_single_node(const char_t * query,xpath_variable_set * variables) const12544 PUGI__FN xpath_node xml_node::select_single_node(const char_t* query, xpath_variable_set* variables) const 12545 { 12546 xpath_query q(query, variables); 12547 return select_single_node(q); 12548 } 12549 select_single_node(const xpath_query & query) const12550 PUGI__FN xpath_node xml_node::select_single_node(const xpath_query& query) const 12551 { 12552 return query.evaluate_node(*this); 12553 } 12554 } 12555 12556 #endif 12557 12558 #ifdef __BORLANDC__ 12559 # pragma option pop 12560 #endif 12561 12562 // Intel C++ does not properly keep warning state for function templates, 12563 // so popping warning state at the end of translation unit leads to warnings in the middle. 12564 #if defined(_MSC_VER) && !defined(__INTEL_COMPILER) 12565 # pragma warning(pop) 12566 #endif 12567 12568 // Undefine all local macros (makes sure we're not leaking macros in header-only mode) 12569 #undef PUGI__NO_INLINE 12570 #undef PUGI__UNLIKELY 12571 #undef PUGI__STATIC_ASSERT 12572 #undef PUGI__DMC_VOLATILE 12573 #undef PUGI__MSVC_CRT_VERSION 12574 #undef PUGI__NS_BEGIN 12575 #undef PUGI__NS_END 12576 #undef PUGI__FN 12577 #undef PUGI__FN_NO_INLINE 12578 #undef PUGI__GETHEADER_IMPL 12579 #undef PUGI__GETPAGE_IMPL 12580 #undef PUGI__GETPAGE 12581 #undef PUGI__NODETYPE 12582 #undef PUGI__IS_CHARTYPE_IMPL 12583 #undef PUGI__IS_CHARTYPE 12584 #undef PUGI__IS_CHARTYPEX 12585 #undef PUGI__ENDSWITH 12586 #undef PUGI__SKIPWS 12587 #undef PUGI__OPTSET 12588 #undef PUGI__PUSHNODE 12589 #undef PUGI__POPNODE 12590 #undef PUGI__SCANFOR 12591 #undef PUGI__SCANWHILE 12592 #undef PUGI__SCANWHILE_UNROLL 12593 #undef PUGI__ENDSEG 12594 #undef PUGI__THROW_ERROR 12595 #undef PUGI__CHECK_ERROR 12596 12597 #endif 12598 12599 /** 12600 * Copyright (c) 2006-2016 Arseny Kapoulkine 12601 * 12602 * Permission is hereby granted, free of charge, to any person 12603 * obtaining a copy of this software and associated documentation 12604 * files (the "Software"), to deal in the Software without 12605 * restriction, including without limitation the rights to use, 12606 * copy, modify, merge, publish, distribute, sublicense, and/or sell 12607 * copies of the Software, and to permit persons to whom the 12608 * Software is furnished to do so, subject to the following 12609 * conditions: 12610 * 12611 * The above copyright notice and this permission notice shall be 12612 * included in all copies or substantial portions of the Software. 12613 * 12614 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 12615 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 12616 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 12617 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 12618 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 12619 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 12620 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 12621 * OTHER DEALINGS IN THE SOFTWARE. 12622 */ 12623