1 /** 2 * pugixml parser - version 1.7 3 * -------------------------------------------------------- 4 * Copyright (C) 2006-2015, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com) 5 * Report bugs and download new versions at http://pugixml.org/ 6 * 7 * This library is distributed under the MIT License. See notice at the end 8 * of this file. 9 * 10 * This work is based on the pugxml parser, which is: 11 * Copyright (C) 2003, by Kristen Wegner (kristen@tima.net) 12 */ 13 14 #ifndef SOURCE_PUGIXML_CPP 15 #define SOURCE_PUGIXML_CPP 16 17 #include "pugixml.hpp" 18 19 #include <stdlib.h> 20 #include <stdio.h> 21 #include <string.h> 22 #include <assert.h> 23 #include <limits.h> 24 25 #ifdef PUGIXML_WCHAR_MODE 26 # include <wchar.h> 27 #endif 28 29 #ifndef PUGIXML_NO_XPATH 30 # include <math.h> 31 # include <float.h> 32 # ifdef PUGIXML_NO_EXCEPTIONS 33 # include <setjmp.h> 34 # endif 35 #endif 36 37 #ifndef PUGIXML_NO_STL 38 # include <istream> 39 # include <ostream> 40 # include <string> 41 #endif 42 43 // For placement new 44 #include <new> 45 46 #ifdef _MSC_VER 47 # pragma warning(push) 48 # pragma warning(disable: 4127) // conditional expression is constant 49 # pragma warning(disable: 4324) // structure was padded due to __declspec(align()) 50 # pragma warning(disable: 4611) // interaction between '_setjmp' and C++ object destruction is non-portable 51 # pragma warning(disable: 4702) // unreachable code 52 # pragma warning(disable: 4996) // this function or variable may be unsafe 53 # pragma warning(disable: 4793) // function compiled as native: presence of '_setjmp' makes a function unmanaged 54 #endif 55 56 #ifdef __INTEL_COMPILER 57 # pragma warning(disable: 177) // function was declared but never referenced 58 # pragma warning(disable: 279) // controlling expression is constant 59 # pragma warning(disable: 1478 1786) // function was declared "deprecated" 60 # pragma warning(disable: 1684) // conversion from pointer to same-sized integral type 61 #endif 62 63 #if defined(__BORLANDC__) && defined(PUGIXML_HEADER_ONLY) 64 # pragma warn -8080 // symbol is declared but never used; disabling this inside push/pop bracket does not make the warning go away 65 #endif 66 67 #ifdef __BORLANDC__ 68 # pragma option push 69 # pragma warn -8008 // condition is always false 70 # pragma warn -8066 // unreachable code 71 #endif 72 73 #ifdef __SNC__ 74 // Using diag_push/diag_pop does not disable the warnings inside templates due to a compiler bug 75 # pragma diag_suppress=178 // function was declared but never referenced 76 # pragma diag_suppress=237 // controlling expression is constant 77 #endif 78 79 // Inlining controls 80 #if defined(_MSC_VER) && _MSC_VER >= 1300 81 # define PUGI__NO_INLINE __declspec(noinline) 82 #elif defined(__GNUC__) 83 # define PUGI__NO_INLINE __attribute__((noinline)) 84 #else 85 # define PUGI__NO_INLINE 86 #endif 87 88 // Branch weight controls 89 #if defined(__GNUC__) 90 # define PUGI__UNLIKELY(cond) __builtin_expect(cond, 0) 91 #else 92 # define PUGI__UNLIKELY(cond) (cond) 93 #endif 94 95 // Simple static assertion 96 #define PUGI__STATIC_ASSERT(cond) { static const char condition_failed[(cond) ? 1 : -1] = {0}; (void)condition_failed[0]; } 97 98 // Digital Mars C++ bug workaround for passing char loaded from memory via stack 99 #ifdef __DMC__ 100 # define PUGI__DMC_VOLATILE volatile 101 #else 102 # define PUGI__DMC_VOLATILE 103 #endif 104 105 // Borland C++ bug workaround for not defining ::memcpy depending on header include order (can't always use std::memcpy because some compilers don't have it at all) 106 #if defined(__BORLANDC__) && !defined(__MEM_H_USING_LIST) 107 using std::memcpy; 108 using std::memmove; 109 using std::memset; 110 #endif 111 112 // In some environments MSVC is a compiler but the CRT lacks certain MSVC-specific features 113 #if defined(_MSC_VER) && !defined(__S3E__) 114 # define PUGI__MSVC_CRT_VERSION _MSC_VER 115 #endif 116 117 #ifdef PUGIXML_HEADER_ONLY 118 # define PUGI__NS_BEGIN namespace pugi { namespace impl { 119 # define PUGI__NS_END } } 120 # define PUGI__FN inline 121 # define PUGI__FN_NO_INLINE inline 122 #else 123 # if defined(_MSC_VER) && _MSC_VER < 1300 // MSVC6 seems to have an amusing bug with anonymous namespaces inside namespaces 124 # define PUGI__NS_BEGIN namespace pugi { namespace impl { 125 # define PUGI__NS_END } } 126 # else 127 # define PUGI__NS_BEGIN namespace pugi { namespace impl { namespace { 128 # define PUGI__NS_END } } } 129 # endif 130 # define PUGI__FN 131 # define PUGI__FN_NO_INLINE PUGI__NO_INLINE 132 #endif 133 134 // uintptr_t 135 #if !defined(_MSC_VER) || _MSC_VER >= 1600 136 # include <stdint.h> 137 #else 138 namespace pugi 139 { 140 # ifndef _UINTPTR_T_DEFINED 141 typedef size_t uintptr_t; 142 # endif 143 144 typedef unsigned __int8 uint8_t; 145 typedef unsigned __int16 uint16_t; 146 typedef unsigned __int32 uint32_t; 147 } 148 #endif 149 150 // Memory allocation 151 PUGI__NS_BEGIN default_allocate(size_t size)152 PUGI__FN void* default_allocate(size_t size) 153 { 154 return malloc(size); 155 } 156 default_deallocate(void * ptr)157 PUGI__FN void default_deallocate(void* ptr) 158 { 159 free(ptr); 160 } 161 162 template <typename T> 163 struct xml_memory_management_function_storage 164 { 165 static allocation_function allocate; 166 static deallocation_function deallocate; 167 }; 168 169 // Global allocation functions are stored in class statics so that in header mode linker deduplicates them 170 // Without a template<> we'll get multiple definitions of the same static 171 template <typename T> allocation_function xml_memory_management_function_storage<T>::allocate = default_allocate; 172 template <typename T> deallocation_function xml_memory_management_function_storage<T>::deallocate = default_deallocate; 173 174 typedef xml_memory_management_function_storage<int> xml_memory; 175 PUGI__NS_END 176 177 // String utilities 178 PUGI__NS_BEGIN 179 // Get string length strlength(const char_t * s)180 PUGI__FN size_t strlength(const char_t* s) 181 { 182 assert(s); 183 184 #ifdef PUGIXML_WCHAR_MODE 185 return wcslen(s); 186 #else 187 return strlen(s); 188 #endif 189 } 190 191 // Compare two strings strequal(const char_t * src,const char_t * dst)192 PUGI__FN bool strequal(const char_t* src, const char_t* dst) 193 { 194 assert(src && dst); 195 196 #ifdef PUGIXML_WCHAR_MODE 197 return wcscmp(src, dst) == 0; 198 #else 199 return strcmp(src, dst) == 0; 200 #endif 201 } 202 203 // Compare lhs with [rhs_begin, rhs_end) strequalrange(const char_t * lhs,const char_t * rhs,size_t count)204 PUGI__FN bool strequalrange(const char_t* lhs, const char_t* rhs, size_t count) 205 { 206 for (size_t i = 0; i < count; ++i) 207 if (lhs[i] != rhs[i]) 208 return false; 209 210 return lhs[count] == 0; 211 } 212 213 // Get length of wide string, even if CRT lacks wide character support strlength_wide(const wchar_t * s)214 PUGI__FN size_t strlength_wide(const wchar_t* s) 215 { 216 assert(s); 217 218 #ifdef PUGIXML_WCHAR_MODE 219 return wcslen(s); 220 #else 221 const wchar_t* end = s; 222 while (*end) end++; 223 return static_cast<size_t>(end - s); 224 #endif 225 } 226 PUGI__NS_END 227 228 // auto_ptr-like object for exception recovery 229 PUGI__NS_BEGIN 230 template <typename T, typename D = void(*)(T*)> struct auto_deleter 231 { 232 T* data; 233 D deleter; 234 auto_deleterauto_deleter235 auto_deleter(T* data_, D deleter_): data(data_), deleter(deleter_) 236 { 237 } 238 ~auto_deleterauto_deleter239 ~auto_deleter() 240 { 241 if (data) deleter(data); 242 } 243 releaseauto_deleter244 T* release() 245 { 246 T* result = data; 247 data = 0; 248 return result; 249 } 250 }; 251 PUGI__NS_END 252 253 #ifdef PUGIXML_COMPACT 254 PUGI__NS_BEGIN 255 class compact_hash_table 256 { 257 public: compact_hash_table()258 compact_hash_table(): _items(0), _capacity(0), _count(0) 259 { 260 } 261 clear()262 void clear() 263 { 264 if (_items) 265 { 266 xml_memory::deallocate(_items); 267 _items = 0; 268 _capacity = 0; 269 _count = 0; 270 } 271 } 272 find(const void * key)273 void** find(const void* key) 274 { 275 assert(key); 276 277 if (_capacity == 0) return 0; 278 279 size_t hashmod = _capacity - 1; 280 size_t bucket = hash(key) & hashmod; 281 282 for (size_t probe = 0; probe <= hashmod; ++probe) 283 { 284 item_t& probe_item = _items[bucket]; 285 286 if (probe_item.key == key) 287 return &probe_item.value; 288 289 if (probe_item.key == 0) 290 return 0; 291 292 // hash collision, quadratic probing 293 bucket = (bucket + probe + 1) & hashmod; 294 } 295 296 assert(!"Hash table is full"); 297 return 0; 298 } 299 insert(const void * key)300 void** insert(const void* key) 301 { 302 assert(key); 303 assert(_capacity != 0 && _count < _capacity - _capacity / 4); 304 305 size_t hashmod = _capacity - 1; 306 size_t bucket = hash(key) & hashmod; 307 308 for (size_t probe = 0; probe <= hashmod; ++probe) 309 { 310 item_t& probe_item = _items[bucket]; 311 312 if (probe_item.key == 0) 313 { 314 probe_item.key = key; 315 _count++; 316 return &probe_item.value; 317 } 318 319 if (probe_item.key == key) 320 return &probe_item.value; 321 322 // hash collision, quadratic probing 323 bucket = (bucket + probe + 1) & hashmod; 324 } 325 326 assert(!"Hash table is full"); 327 return 0; 328 } 329 reserve()330 bool reserve() 331 { 332 if (_count + 16 >= _capacity - _capacity / 4) 333 return rehash(); 334 335 return true; 336 } 337 338 private: 339 struct item_t 340 { 341 const void* key; 342 void* value; 343 }; 344 345 item_t* _items; 346 size_t _capacity; 347 348 size_t _count; 349 350 bool rehash(); 351 hash(const void * key)352 static unsigned int hash(const void* key) 353 { 354 unsigned int h = static_cast<unsigned int>(reinterpret_cast<uintptr_t>(key)); 355 356 // MurmurHash3 32-bit finalizer 357 h ^= h >> 16; 358 h *= 0x85ebca6bu; 359 h ^= h >> 13; 360 h *= 0xc2b2ae35u; 361 h ^= h >> 16; 362 363 return h; 364 } 365 }; 366 rehash()367 PUGI__FN_NO_INLINE bool compact_hash_table::rehash() 368 { 369 compact_hash_table rt; 370 rt._capacity = (_capacity == 0) ? 32 : _capacity * 2; 371 rt._items = static_cast<item_t*>(xml_memory::allocate(sizeof(item_t) * rt._capacity)); 372 373 if (!rt._items) 374 return false; 375 376 memset(rt._items, 0, sizeof(item_t) * rt._capacity); 377 378 for (size_t i = 0; i < _capacity; ++i) 379 if (_items[i].key) 380 *rt.insert(_items[i].key) = _items[i].value; 381 382 if (_items) 383 xml_memory::deallocate(_items); 384 385 _capacity = rt._capacity; 386 _items = rt._items; 387 388 assert(_count == rt._count); 389 390 return true; 391 } 392 393 PUGI__NS_END 394 #endif 395 396 PUGI__NS_BEGIN 397 static const size_t xml_memory_page_size = 398 #ifdef PUGIXML_MEMORY_PAGE_SIZE 399 PUGIXML_MEMORY_PAGE_SIZE 400 #else 401 32768 402 #endif 403 ; 404 405 #ifdef PUGIXML_COMPACT 406 static const uintptr_t xml_memory_block_alignment = 4; 407 408 static const uintptr_t xml_memory_page_alignment = sizeof(void*); 409 #else 410 static const uintptr_t xml_memory_block_alignment = sizeof(void*); 411 412 static const uintptr_t xml_memory_page_alignment = 64; 413 static const uintptr_t xml_memory_page_pointer_mask = ~(xml_memory_page_alignment - 1); 414 #endif 415 416 // extra metadata bits 417 static const uintptr_t xml_memory_page_contents_shared_mask = 32; 418 static const uintptr_t xml_memory_page_name_allocated_mask = 16; 419 static const uintptr_t xml_memory_page_value_allocated_mask = 8; 420 static const uintptr_t xml_memory_page_type_mask = 7; 421 422 // combined masks for string uniqueness 423 static const uintptr_t xml_memory_page_name_allocated_or_shared_mask = xml_memory_page_name_allocated_mask | xml_memory_page_contents_shared_mask; 424 static const uintptr_t xml_memory_page_value_allocated_or_shared_mask = xml_memory_page_value_allocated_mask | xml_memory_page_contents_shared_mask; 425 426 #ifdef PUGIXML_COMPACT 427 #define PUGI__GETPAGE_IMPL(header) (header).get_page() 428 #else 429 #define PUGI__GETPAGE_IMPL(header) reinterpret_cast<impl::xml_memory_page*>((header) & impl::xml_memory_page_pointer_mask) 430 #endif 431 432 #define PUGI__GETPAGE(n) PUGI__GETPAGE_IMPL((n)->header) 433 #define PUGI__NODETYPE(n) static_cast<xml_node_type>(((n)->header & impl::xml_memory_page_type_mask) + 1) 434 435 struct xml_allocator; 436 437 struct xml_memory_page 438 { constructxml_memory_page439 static xml_memory_page* construct(void* memory) 440 { 441 xml_memory_page* result = static_cast<xml_memory_page*>(memory); 442 443 result->allocator = 0; 444 result->prev = 0; 445 result->next = 0; 446 result->busy_size = 0; 447 result->freed_size = 0; 448 449 #ifdef PUGIXML_COMPACT 450 result->compact_string_base = 0; 451 result->compact_shared_parent = 0; 452 result->compact_page_marker = 0; 453 #endif 454 455 return result; 456 } 457 458 xml_allocator* allocator; 459 460 xml_memory_page* prev; 461 xml_memory_page* next; 462 463 size_t busy_size; 464 size_t freed_size; 465 466 #ifdef PUGIXML_COMPACT 467 char_t* compact_string_base; 468 void* compact_shared_parent; 469 uint32_t* compact_page_marker; 470 #endif 471 }; 472 473 struct xml_memory_string_header 474 { 475 uint16_t page_offset; // offset from page->data 476 uint16_t full_size; // 0 if string occupies whole page 477 }; 478 479 struct xml_allocator 480 { xml_allocatorxml_allocator481 xml_allocator(xml_memory_page* root): _root(root), _busy_size(root->busy_size) 482 { 483 #ifdef PUGIXML_COMPACT 484 _hash = 0; 485 #endif 486 } 487 allocate_pagexml_allocator488 xml_memory_page* allocate_page(size_t data_size) 489 { 490 size_t size = sizeof(xml_memory_page) + data_size; 491 492 // allocate block with some alignment, leaving memory for worst-case padding 493 void* memory = xml_memory::allocate(size + xml_memory_page_alignment); 494 if (!memory) return 0; 495 496 // align to next page boundary (note: this guarantees at least 1 usable byte before the page) 497 char* page_memory = reinterpret_cast<char*>((reinterpret_cast<uintptr_t>(memory) + xml_memory_page_alignment) & ~(xml_memory_page_alignment - 1)); 498 499 // prepare page structure 500 xml_memory_page* page = xml_memory_page::construct(page_memory); 501 assert(page); 502 503 page->allocator = _root->allocator; 504 505 // record the offset for freeing the memory block 506 assert(page_memory > memory && page_memory - static_cast<char*>(memory) <= 127); 507 page_memory[-1] = static_cast<char>(page_memory - static_cast<char*>(memory)); 508 509 return page; 510 } 511 deallocate_pagexml_allocator512 static void deallocate_page(xml_memory_page* page) 513 { 514 char* page_memory = reinterpret_cast<char*>(page); 515 516 xml_memory::deallocate(page_memory - page_memory[-1]); 517 } 518 519 void* allocate_memory_oob(size_t size, xml_memory_page*& out_page); 520 allocate_memoryxml_allocator521 void* allocate_memory(size_t size, xml_memory_page*& out_page) 522 { 523 if (PUGI__UNLIKELY(_busy_size + size > xml_memory_page_size)) 524 return allocate_memory_oob(size, out_page); 525 526 void* buf = reinterpret_cast<char*>(_root) + sizeof(xml_memory_page) + _busy_size; 527 528 _busy_size += size; 529 530 out_page = _root; 531 532 return buf; 533 } 534 535 #ifdef PUGIXML_COMPACT allocate_objectxml_allocator536 void* allocate_object(size_t size, xml_memory_page*& out_page) 537 { 538 void* result = allocate_memory(size + sizeof(uint32_t), out_page); 539 if (!result) return 0; 540 541 // adjust for marker 542 ptrdiff_t offset = static_cast<char*>(result) - reinterpret_cast<char*>(out_page->compact_page_marker); 543 544 if (PUGI__UNLIKELY(static_cast<uintptr_t>(offset) >= 256 * xml_memory_block_alignment)) 545 { 546 // insert new marker 547 uint32_t* marker = static_cast<uint32_t*>(result); 548 549 *marker = static_cast<uint32_t>(reinterpret_cast<char*>(marker) - reinterpret_cast<char*>(out_page)); 550 out_page->compact_page_marker = marker; 551 552 // since we don't reuse the page space until we reallocate it, we can just pretend that we freed the marker block 553 // this will make sure deallocate_memory correctly tracks the size 554 out_page->freed_size += sizeof(uint32_t); 555 556 return marker + 1; 557 } 558 else 559 { 560 // roll back uint32_t part 561 _busy_size -= sizeof(uint32_t); 562 563 return result; 564 } 565 } 566 #else allocate_objectxml_allocator567 void* allocate_object(size_t size, xml_memory_page*& out_page) 568 { 569 return allocate_memory(size, out_page); 570 } 571 #endif 572 deallocate_memoryxml_allocator573 void deallocate_memory(void* ptr, size_t size, xml_memory_page* page) 574 { 575 if (page == _root) page->busy_size = _busy_size; 576 577 assert(ptr >= reinterpret_cast<char*>(page) + sizeof(xml_memory_page) && ptr < reinterpret_cast<char*>(page) + sizeof(xml_memory_page) + page->busy_size); 578 (void)!ptr; 579 580 page->freed_size += size; 581 assert(page->freed_size <= page->busy_size); 582 583 if (page->freed_size == page->busy_size) 584 { 585 if (page->next == 0) 586 { 587 assert(_root == page); 588 589 // top page freed, just reset sizes 590 page->busy_size = 0; 591 page->freed_size = 0; 592 593 #ifdef PUGIXML_COMPACT 594 // reset compact state to maximize efficiency 595 page->compact_string_base = 0; 596 page->compact_shared_parent = 0; 597 page->compact_page_marker = 0; 598 #endif 599 600 _busy_size = 0; 601 } 602 else 603 { 604 assert(_root != page); 605 assert(page->prev); 606 607 // remove from the list 608 page->prev->next = page->next; 609 page->next->prev = page->prev; 610 611 // deallocate 612 deallocate_page(page); 613 } 614 } 615 } 616 allocate_stringxml_allocator617 char_t* allocate_string(size_t length) 618 { 619 static const size_t max_encoded_offset = (1 << 16) * xml_memory_block_alignment; 620 621 PUGI__STATIC_ASSERT(xml_memory_page_size <= max_encoded_offset); 622 623 // allocate memory for string and header block 624 size_t size = sizeof(xml_memory_string_header) + length * sizeof(char_t); 625 626 // round size up to block alignment boundary 627 size_t full_size = (size + (xml_memory_block_alignment - 1)) & ~(xml_memory_block_alignment - 1); 628 629 xml_memory_page* page; 630 xml_memory_string_header* header = static_cast<xml_memory_string_header*>(allocate_memory(full_size, page)); 631 632 if (!header) return 0; 633 634 // setup header 635 ptrdiff_t page_offset = reinterpret_cast<char*>(header) - reinterpret_cast<char*>(page) - sizeof(xml_memory_page); 636 637 assert(page_offset % xml_memory_block_alignment == 0); 638 assert(page_offset >= 0 && static_cast<size_t>(page_offset) < max_encoded_offset); 639 header->page_offset = static_cast<uint16_t>(static_cast<size_t>(page_offset) / xml_memory_block_alignment); 640 641 // full_size == 0 for large strings that occupy the whole page 642 assert(full_size % xml_memory_block_alignment == 0); 643 assert(full_size < max_encoded_offset || (page->busy_size == full_size && page_offset == 0)); 644 header->full_size = static_cast<uint16_t>(full_size < max_encoded_offset ? full_size / xml_memory_block_alignment : 0); 645 646 // round-trip through void* to avoid 'cast increases required alignment of target type' warning 647 // header is guaranteed a pointer-sized alignment, which should be enough for char_t 648 return static_cast<char_t*>(static_cast<void*>(header + 1)); 649 } 650 deallocate_stringxml_allocator651 void deallocate_string(char_t* string) 652 { 653 // this function casts pointers through void* to avoid 'cast increases required alignment of target type' warnings 654 // we're guaranteed the proper (pointer-sized) alignment on the input string if it was allocated via allocate_string 655 656 // get header 657 xml_memory_string_header* header = static_cast<xml_memory_string_header*>(static_cast<void*>(string)) - 1; 658 assert(header); 659 660 // deallocate 661 size_t page_offset = sizeof(xml_memory_page) + header->page_offset * xml_memory_block_alignment; 662 xml_memory_page* page = reinterpret_cast<xml_memory_page*>(static_cast<void*>(reinterpret_cast<char*>(header) - page_offset)); 663 664 // if full_size == 0 then this string occupies the whole page 665 size_t full_size = header->full_size == 0 ? page->busy_size : header->full_size * xml_memory_block_alignment; 666 667 deallocate_memory(header, full_size, page); 668 } 669 reservexml_allocator670 bool reserve() 671 { 672 #ifdef PUGIXML_COMPACT 673 return _hash->reserve(); 674 #else 675 return true; 676 #endif 677 } 678 679 xml_memory_page* _root; 680 size_t _busy_size; 681 682 #ifdef PUGIXML_COMPACT 683 compact_hash_table* _hash; 684 #endif 685 }; 686 allocate_memory_oob(size_t size,xml_memory_page * & out_page)687 PUGI__FN_NO_INLINE void* xml_allocator::allocate_memory_oob(size_t size, xml_memory_page*& out_page) 688 { 689 const size_t large_allocation_threshold = xml_memory_page_size / 4; 690 691 xml_memory_page* page = allocate_page(size <= large_allocation_threshold ? xml_memory_page_size : size); 692 out_page = page; 693 694 if (!page) return 0; 695 696 if (size <= large_allocation_threshold) 697 { 698 _root->busy_size = _busy_size; 699 700 // insert page at the end of linked list 701 page->prev = _root; 702 _root->next = page; 703 _root = page; 704 705 _busy_size = size; 706 } 707 else 708 { 709 // insert page before the end of linked list, so that it is deleted as soon as possible 710 // the last page is not deleted even if it's empty (see deallocate_memory) 711 assert(_root->prev); 712 713 page->prev = _root->prev; 714 page->next = _root; 715 716 _root->prev->next = page; 717 _root->prev = page; 718 719 page->busy_size = size; 720 } 721 722 return reinterpret_cast<char*>(page) + sizeof(xml_memory_page); 723 } 724 PUGI__NS_END 725 726 #ifdef PUGIXML_COMPACT 727 PUGI__NS_BEGIN 728 static const uintptr_t compact_alignment_log2 = 2; 729 static const uintptr_t compact_alignment = 1 << compact_alignment_log2; 730 731 class compact_header 732 { 733 public: compact_header(xml_memory_page * page,unsigned int flags)734 compact_header(xml_memory_page* page, unsigned int flags) 735 { 736 PUGI__STATIC_ASSERT(xml_memory_block_alignment == compact_alignment); 737 738 ptrdiff_t offset = (reinterpret_cast<char*>(this) - reinterpret_cast<char*>(page->compact_page_marker)); 739 assert(offset % compact_alignment == 0 && static_cast<uintptr_t>(offset) < 256 * compact_alignment); 740 741 _page = static_cast<unsigned char>(offset >> compact_alignment_log2); 742 _flags = static_cast<unsigned char>(flags); 743 } 744 operator &=(uintptr_t mod)745 void operator&=(uintptr_t mod) 746 { 747 _flags &= static_cast<unsigned char>(mod); 748 } 749 operator |=(uintptr_t mod)750 void operator|=(uintptr_t mod) 751 { 752 _flags |= static_cast<unsigned char>(mod); 753 } 754 operator &(uintptr_t mod) const755 uintptr_t operator&(uintptr_t mod) const 756 { 757 return _flags & mod; 758 } 759 get_page() const760 xml_memory_page* get_page() const 761 { 762 // round-trip through void* to silence 'cast increases required alignment of target type' warnings 763 const char* page_marker = reinterpret_cast<const char*>(this) - (_page << compact_alignment_log2); 764 const char* page = page_marker - *reinterpret_cast<const uint32_t*>(static_cast<const void*>(page_marker)); 765 766 return const_cast<xml_memory_page*>(reinterpret_cast<const xml_memory_page*>(static_cast<const void*>(page))); 767 } 768 769 private: 770 unsigned char _page; 771 unsigned char _flags; 772 }; 773 compact_get_page(const void * object,int header_offset)774 PUGI__FN xml_memory_page* compact_get_page(const void* object, int header_offset) 775 { 776 const compact_header* header = reinterpret_cast<const compact_header*>(static_cast<const char*>(object) - header_offset); 777 778 return header->get_page(); 779 } 780 compact_get_value(const void * object)781 template <int header_offset, typename T> PUGI__FN_NO_INLINE T* compact_get_value(const void* object) 782 { 783 return static_cast<T*>(*compact_get_page(object, header_offset)->allocator->_hash->find(object)); 784 } 785 compact_set_value(const void * object,T * value)786 template <int header_offset, typename T> PUGI__FN_NO_INLINE void compact_set_value(const void* object, T* value) 787 { 788 *compact_get_page(object, header_offset)->allocator->_hash->insert(object) = value; 789 } 790 791 template <typename T, int header_offset, int start = -126> class compact_pointer 792 { 793 public: compact_pointer()794 compact_pointer(): _data(0) 795 { 796 } 797 operator =(const compact_pointer & rhs)798 void operator=(const compact_pointer& rhs) 799 { 800 *this = rhs + 0; 801 } 802 operator =(T * value)803 void operator=(T* value) 804 { 805 if (value) 806 { 807 // value is guaranteed to be compact-aligned; 'this' is not 808 // our decoding is based on 'this' aligned to compact alignment downwards (see operator T*) 809 // so for negative offsets (e.g. -3) we need to adjust the diff by compact_alignment - 1 to 810 // compensate for arithmetic shift rounding for negative values 811 ptrdiff_t diff = reinterpret_cast<char*>(value) - reinterpret_cast<char*>(this); 812 ptrdiff_t offset = ((diff + int(compact_alignment - 1)) >> compact_alignment_log2) - start; 813 814 if (static_cast<uintptr_t>(offset) <= 253) 815 _data = static_cast<unsigned char>(offset + 1); 816 else 817 { 818 compact_set_value<header_offset>(this, value); 819 820 _data = 255; 821 } 822 } 823 else 824 _data = 0; 825 } 826 operator T*() const827 operator T*() const 828 { 829 if (_data) 830 { 831 if (_data < 255) 832 { 833 uintptr_t base = reinterpret_cast<uintptr_t>(this) & ~(compact_alignment - 1); 834 835 return reinterpret_cast<T*>(base + ((_data - 1 + start) << compact_alignment_log2)); 836 } 837 else 838 return compact_get_value<header_offset, T>(this); 839 } 840 else 841 return 0; 842 } 843 operator ->() const844 T* operator->() const 845 { 846 return *this; 847 } 848 849 private: 850 unsigned char _data; 851 }; 852 853 template <typename T, int header_offset> class compact_pointer_parent 854 { 855 public: compact_pointer_parent()856 compact_pointer_parent(): _data(0) 857 { 858 } 859 operator =(const compact_pointer_parent & rhs)860 void operator=(const compact_pointer_parent& rhs) 861 { 862 *this = rhs + 0; 863 } 864 operator =(T * value)865 void operator=(T* value) 866 { 867 if (value) 868 { 869 // value is guaranteed to be compact-aligned; 'this' is not 870 // our decoding is based on 'this' aligned to compact alignment downwards (see operator T*) 871 // so for negative offsets (e.g. -3) we need to adjust the diff by compact_alignment - 1 to 872 // compensate for arithmetic shift behavior for negative values 873 ptrdiff_t diff = reinterpret_cast<char*>(value) - reinterpret_cast<char*>(this); 874 ptrdiff_t offset = ((diff + int(compact_alignment - 1)) >> compact_alignment_log2) + 65533; 875 876 if (static_cast<uintptr_t>(offset) <= 65533) 877 { 878 _data = static_cast<unsigned short>(offset + 1); 879 } 880 else 881 { 882 xml_memory_page* page = compact_get_page(this, header_offset); 883 884 if (PUGI__UNLIKELY(page->compact_shared_parent == 0)) 885 page->compact_shared_parent = value; 886 887 if (page->compact_shared_parent == value) 888 { 889 _data = 65534; 890 } 891 else 892 { 893 compact_set_value<header_offset>(this, value); 894 895 _data = 65535; 896 } 897 } 898 } 899 else 900 { 901 _data = 0; 902 } 903 } 904 operator T*() const905 operator T*() const 906 { 907 if (_data) 908 { 909 if (_data < 65534) 910 { 911 uintptr_t base = reinterpret_cast<uintptr_t>(this) & ~(compact_alignment - 1); 912 913 return reinterpret_cast<T*>(base + ((_data - 1 - 65533) << compact_alignment_log2)); 914 } 915 else if (_data == 65534) 916 return static_cast<T*>(compact_get_page(this, header_offset)->compact_shared_parent); 917 else 918 return compact_get_value<header_offset, T>(this); 919 } 920 else 921 return 0; 922 } 923 operator ->() const924 T* operator->() const 925 { 926 return *this; 927 } 928 929 private: 930 uint16_t _data; 931 }; 932 933 template <int header_offset, int base_offset> class compact_string 934 { 935 public: compact_string()936 compact_string(): _data(0) 937 { 938 } 939 operator =(const compact_string & rhs)940 void operator=(const compact_string& rhs) 941 { 942 *this = rhs + 0; 943 } 944 operator =(char_t * value)945 void operator=(char_t* value) 946 { 947 if (value) 948 { 949 xml_memory_page* page = compact_get_page(this, header_offset); 950 951 if (PUGI__UNLIKELY(page->compact_string_base == 0)) 952 page->compact_string_base = value; 953 954 ptrdiff_t offset = value - page->compact_string_base; 955 956 if (static_cast<uintptr_t>(offset) < (65535 << 7)) 957 { 958 // round-trip through void* to silence 'cast increases required alignment of target type' warnings 959 uint16_t* base = reinterpret_cast<uint16_t*>(static_cast<void*>(reinterpret_cast<char*>(this) - base_offset)); 960 961 if (*base == 0) 962 { 963 *base = static_cast<uint16_t>((offset >> 7) + 1); 964 _data = static_cast<unsigned char>((offset & 127) + 1); 965 } 966 else 967 { 968 ptrdiff_t remainder = offset - ((*base - 1) << 7); 969 970 if (static_cast<uintptr_t>(remainder) <= 253) 971 { 972 _data = static_cast<unsigned char>(remainder + 1); 973 } 974 else 975 { 976 compact_set_value<header_offset>(this, value); 977 978 _data = 255; 979 } 980 } 981 } 982 else 983 { 984 compact_set_value<header_offset>(this, value); 985 986 _data = 255; 987 } 988 } 989 else 990 { 991 _data = 0; 992 } 993 } 994 operator char_t*() const995 operator char_t*() const 996 { 997 if (_data) 998 { 999 if (_data < 255) 1000 { 1001 xml_memory_page* page = compact_get_page(this, header_offset); 1002 1003 // round-trip through void* to silence 'cast increases required alignment of target type' warnings 1004 const uint16_t* base = reinterpret_cast<const uint16_t*>(static_cast<const void*>(reinterpret_cast<const char*>(this) - base_offset)); 1005 assert(*base); 1006 1007 ptrdiff_t offset = ((*base - 1) << 7) + (_data - 1); 1008 1009 return page->compact_string_base + offset; 1010 } 1011 else 1012 { 1013 return compact_get_value<header_offset, char_t>(this); 1014 } 1015 } 1016 else 1017 return 0; 1018 } 1019 1020 private: 1021 unsigned char _data; 1022 }; 1023 PUGI__NS_END 1024 #endif 1025 1026 #ifdef PUGIXML_COMPACT 1027 namespace pugi 1028 { 1029 struct xml_attribute_struct 1030 { xml_attribute_structpugi::xml_attribute_struct1031 xml_attribute_struct(impl::xml_memory_page* page): header(page, 0), namevalue_base(0) 1032 { 1033 PUGI__STATIC_ASSERT(sizeof(xml_attribute_struct) == 8); 1034 } 1035 1036 impl::compact_header header; 1037 1038 uint16_t namevalue_base; 1039 1040 impl::compact_string<4, 2> name; 1041 impl::compact_string<5, 3> value; 1042 1043 impl::compact_pointer<xml_attribute_struct, 6> prev_attribute_c; 1044 impl::compact_pointer<xml_attribute_struct, 7, 0> next_attribute; 1045 }; 1046 1047 struct xml_node_struct 1048 { xml_node_structpugi::xml_node_struct1049 xml_node_struct(impl::xml_memory_page* page, xml_node_type type): header(page, type - 1), namevalue_base(0) 1050 { 1051 PUGI__STATIC_ASSERT(sizeof(xml_node_struct) == 12); 1052 } 1053 1054 impl::compact_header header; 1055 1056 uint16_t namevalue_base; 1057 1058 impl::compact_string<4, 2> name; 1059 impl::compact_string<5, 3> value; 1060 1061 impl::compact_pointer_parent<xml_node_struct, 6> parent; 1062 1063 impl::compact_pointer<xml_node_struct, 8, 0> first_child; 1064 1065 impl::compact_pointer<xml_node_struct, 9> prev_sibling_c; 1066 impl::compact_pointer<xml_node_struct, 10, 0> next_sibling; 1067 1068 impl::compact_pointer<xml_attribute_struct, 11, 0> first_attribute; 1069 }; 1070 } 1071 #else 1072 namespace pugi 1073 { 1074 struct xml_attribute_struct 1075 { 1076 xml_attribute_struct(impl::xml_memory_page* page): header(reinterpret_cast<uintptr_t>(page)), name(0), value(0), prev_attribute_c(0), next_attribute(0) 1077 { 1078 } 1079 1080 uintptr_t header; 1081 1082 char_t* name; 1083 char_t* value; 1084 1085 xml_attribute_struct* prev_attribute_c; 1086 xml_attribute_struct* next_attribute; 1087 }; 1088 1089 struct xml_node_struct 1090 { 1091 xml_node_struct(impl::xml_memory_page* page, xml_node_type type): header(reinterpret_cast<uintptr_t>(page) | (type - 1)), name(0), value(0), parent(0), first_child(0), prev_sibling_c(0), next_sibling(0), first_attribute(0) 1092 { 1093 } 1094 1095 uintptr_t header; 1096 1097 char_t* name; 1098 char_t* value; 1099 1100 xml_node_struct* parent; 1101 1102 xml_node_struct* first_child; 1103 1104 xml_node_struct* prev_sibling_c; 1105 xml_node_struct* next_sibling; 1106 1107 xml_attribute_struct* first_attribute; 1108 }; 1109 } 1110 #endif 1111 1112 PUGI__NS_BEGIN 1113 struct xml_extra_buffer 1114 { 1115 char_t* buffer; 1116 xml_extra_buffer* next; 1117 }; 1118 1119 struct xml_document_struct: public xml_node_struct, public xml_allocator 1120 { xml_document_structxml_document_struct1121 xml_document_struct(xml_memory_page* page): xml_node_struct(page, node_document), xml_allocator(page), buffer(0), extra_buffers(0) 1122 { 1123 #ifdef PUGIXML_COMPACT 1124 _hash = &hash; 1125 #endif 1126 } 1127 1128 const char_t* buffer; 1129 1130 xml_extra_buffer* extra_buffers; 1131 1132 #ifdef PUGIXML_COMPACT 1133 compact_hash_table hash; 1134 #endif 1135 }; 1136 get_allocator(const Object * object)1137 template <typename Object> inline xml_allocator& get_allocator(const Object* object) 1138 { 1139 assert(object); 1140 1141 return *PUGI__GETPAGE(object)->allocator; 1142 } 1143 get_document(const Object * object)1144 template <typename Object> inline xml_document_struct& get_document(const Object* object) 1145 { 1146 assert(object); 1147 1148 return *static_cast<xml_document_struct*>(PUGI__GETPAGE(object)->allocator); 1149 } 1150 PUGI__NS_END 1151 1152 // Low-level DOM operations 1153 PUGI__NS_BEGIN allocate_attribute(xml_allocator & alloc)1154 inline xml_attribute_struct* allocate_attribute(xml_allocator& alloc) 1155 { 1156 xml_memory_page* page; 1157 void* memory = alloc.allocate_object(sizeof(xml_attribute_struct), page); 1158 if (!memory) return 0; 1159 1160 return new (memory) xml_attribute_struct(page); 1161 } 1162 allocate_node(xml_allocator & alloc,xml_node_type type)1163 inline xml_node_struct* allocate_node(xml_allocator& alloc, xml_node_type type) 1164 { 1165 xml_memory_page* page; 1166 void* memory = alloc.allocate_object(sizeof(xml_node_struct), page); 1167 if (!memory) return 0; 1168 1169 return new (memory) xml_node_struct(page, type); 1170 } 1171 destroy_attribute(xml_attribute_struct * a,xml_allocator & alloc)1172 inline void destroy_attribute(xml_attribute_struct* a, xml_allocator& alloc) 1173 { 1174 if (a->header & impl::xml_memory_page_name_allocated_mask) 1175 alloc.deallocate_string(a->name); 1176 1177 if (a->header & impl::xml_memory_page_value_allocated_mask) 1178 alloc.deallocate_string(a->value); 1179 1180 alloc.deallocate_memory(a, sizeof(xml_attribute_struct), PUGI__GETPAGE(a)); 1181 } 1182 destroy_node(xml_node_struct * n,xml_allocator & alloc)1183 inline void destroy_node(xml_node_struct* n, xml_allocator& alloc) 1184 { 1185 if (n->header & impl::xml_memory_page_name_allocated_mask) 1186 alloc.deallocate_string(n->name); 1187 1188 if (n->header & impl::xml_memory_page_value_allocated_mask) 1189 alloc.deallocate_string(n->value); 1190 1191 for (xml_attribute_struct* attr = n->first_attribute; attr; ) 1192 { 1193 xml_attribute_struct* next = attr->next_attribute; 1194 1195 destroy_attribute(attr, alloc); 1196 1197 attr = next; 1198 } 1199 1200 for (xml_node_struct* child = n->first_child; child; ) 1201 { 1202 xml_node_struct* next = child->next_sibling; 1203 1204 destroy_node(child, alloc); 1205 1206 child = next; 1207 } 1208 1209 alloc.deallocate_memory(n, sizeof(xml_node_struct), PUGI__GETPAGE(n)); 1210 } 1211 append_node(xml_node_struct * child,xml_node_struct * node)1212 inline void append_node(xml_node_struct* child, xml_node_struct* node) 1213 { 1214 child->parent = node; 1215 1216 xml_node_struct* head = node->first_child; 1217 1218 if (head) 1219 { 1220 xml_node_struct* tail = head->prev_sibling_c; 1221 1222 tail->next_sibling = child; 1223 child->prev_sibling_c = tail; 1224 head->prev_sibling_c = child; 1225 } 1226 else 1227 { 1228 node->first_child = child; 1229 child->prev_sibling_c = child; 1230 } 1231 } 1232 prepend_node(xml_node_struct * child,xml_node_struct * node)1233 inline void prepend_node(xml_node_struct* child, xml_node_struct* node) 1234 { 1235 child->parent = node; 1236 1237 xml_node_struct* head = node->first_child; 1238 1239 if (head) 1240 { 1241 child->prev_sibling_c = head->prev_sibling_c; 1242 head->prev_sibling_c = child; 1243 } 1244 else 1245 child->prev_sibling_c = child; 1246 1247 child->next_sibling = head; 1248 node->first_child = child; 1249 } 1250 insert_node_after(xml_node_struct * child,xml_node_struct * node)1251 inline void insert_node_after(xml_node_struct* child, xml_node_struct* node) 1252 { 1253 xml_node_struct* parent = node->parent; 1254 1255 child->parent = parent; 1256 1257 if (node->next_sibling) 1258 node->next_sibling->prev_sibling_c = child; 1259 else 1260 parent->first_child->prev_sibling_c = child; 1261 1262 child->next_sibling = node->next_sibling; 1263 child->prev_sibling_c = node; 1264 1265 node->next_sibling = child; 1266 } 1267 insert_node_before(xml_node_struct * child,xml_node_struct * node)1268 inline void insert_node_before(xml_node_struct* child, xml_node_struct* node) 1269 { 1270 xml_node_struct* parent = node->parent; 1271 1272 child->parent = parent; 1273 1274 if (node->prev_sibling_c->next_sibling) 1275 node->prev_sibling_c->next_sibling = child; 1276 else 1277 parent->first_child = child; 1278 1279 child->prev_sibling_c = node->prev_sibling_c; 1280 child->next_sibling = node; 1281 1282 node->prev_sibling_c = child; 1283 } 1284 remove_node(xml_node_struct * node)1285 inline void remove_node(xml_node_struct* node) 1286 { 1287 xml_node_struct* parent = node->parent; 1288 1289 if (node->next_sibling) 1290 node->next_sibling->prev_sibling_c = node->prev_sibling_c; 1291 else 1292 parent->first_child->prev_sibling_c = node->prev_sibling_c; 1293 1294 if (node->prev_sibling_c->next_sibling) 1295 node->prev_sibling_c->next_sibling = node->next_sibling; 1296 else 1297 parent->first_child = node->next_sibling; 1298 1299 node->parent = 0; 1300 node->prev_sibling_c = 0; 1301 node->next_sibling = 0; 1302 } 1303 append_attribute(xml_attribute_struct * attr,xml_node_struct * node)1304 inline void append_attribute(xml_attribute_struct* attr, xml_node_struct* node) 1305 { 1306 xml_attribute_struct* head = node->first_attribute; 1307 1308 if (head) 1309 { 1310 xml_attribute_struct* tail = head->prev_attribute_c; 1311 1312 tail->next_attribute = attr; 1313 attr->prev_attribute_c = tail; 1314 head->prev_attribute_c = attr; 1315 } 1316 else 1317 { 1318 node->first_attribute = attr; 1319 attr->prev_attribute_c = attr; 1320 } 1321 } 1322 prepend_attribute(xml_attribute_struct * attr,xml_node_struct * node)1323 inline void prepend_attribute(xml_attribute_struct* attr, xml_node_struct* node) 1324 { 1325 xml_attribute_struct* head = node->first_attribute; 1326 1327 if (head) 1328 { 1329 attr->prev_attribute_c = head->prev_attribute_c; 1330 head->prev_attribute_c = attr; 1331 } 1332 else 1333 attr->prev_attribute_c = attr; 1334 1335 attr->next_attribute = head; 1336 node->first_attribute = attr; 1337 } 1338 insert_attribute_after(xml_attribute_struct * attr,xml_attribute_struct * place,xml_node_struct * node)1339 inline void insert_attribute_after(xml_attribute_struct* attr, xml_attribute_struct* place, xml_node_struct* node) 1340 { 1341 if (place->next_attribute) 1342 place->next_attribute->prev_attribute_c = attr; 1343 else 1344 node->first_attribute->prev_attribute_c = attr; 1345 1346 attr->next_attribute = place->next_attribute; 1347 attr->prev_attribute_c = place; 1348 place->next_attribute = attr; 1349 } 1350 insert_attribute_before(xml_attribute_struct * attr,xml_attribute_struct * place,xml_node_struct * node)1351 inline void insert_attribute_before(xml_attribute_struct* attr, xml_attribute_struct* place, xml_node_struct* node) 1352 { 1353 if (place->prev_attribute_c->next_attribute) 1354 place->prev_attribute_c->next_attribute = attr; 1355 else 1356 node->first_attribute = attr; 1357 1358 attr->prev_attribute_c = place->prev_attribute_c; 1359 attr->next_attribute = place; 1360 place->prev_attribute_c = attr; 1361 } 1362 remove_attribute(xml_attribute_struct * attr,xml_node_struct * node)1363 inline void remove_attribute(xml_attribute_struct* attr, xml_node_struct* node) 1364 { 1365 if (attr->next_attribute) 1366 attr->next_attribute->prev_attribute_c = attr->prev_attribute_c; 1367 else 1368 node->first_attribute->prev_attribute_c = attr->prev_attribute_c; 1369 1370 if (attr->prev_attribute_c->next_attribute) 1371 attr->prev_attribute_c->next_attribute = attr->next_attribute; 1372 else 1373 node->first_attribute = attr->next_attribute; 1374 1375 attr->prev_attribute_c = 0; 1376 attr->next_attribute = 0; 1377 } 1378 append_new_node(xml_node_struct * node,xml_allocator & alloc,xml_node_type type=node_element)1379 PUGI__FN_NO_INLINE xml_node_struct* append_new_node(xml_node_struct* node, xml_allocator& alloc, xml_node_type type = node_element) 1380 { 1381 if (!alloc.reserve()) return 0; 1382 1383 xml_node_struct* child = allocate_node(alloc, type); 1384 if (!child) return 0; 1385 1386 append_node(child, node); 1387 1388 return child; 1389 } 1390 append_new_attribute(xml_node_struct * node,xml_allocator & alloc)1391 PUGI__FN_NO_INLINE xml_attribute_struct* append_new_attribute(xml_node_struct* node, xml_allocator& alloc) 1392 { 1393 if (!alloc.reserve()) return 0; 1394 1395 xml_attribute_struct* attr = allocate_attribute(alloc); 1396 if (!attr) return 0; 1397 1398 append_attribute(attr, node); 1399 1400 return attr; 1401 } 1402 PUGI__NS_END 1403 1404 // Helper classes for code generation 1405 PUGI__NS_BEGIN 1406 struct opt_false 1407 { 1408 enum { value = 0 }; 1409 }; 1410 1411 struct opt_true 1412 { 1413 enum { value = 1 }; 1414 }; 1415 PUGI__NS_END 1416 1417 // Unicode utilities 1418 PUGI__NS_BEGIN endian_swap(uint16_t value)1419 inline uint16_t endian_swap(uint16_t value) 1420 { 1421 return static_cast<uint16_t>(((value & 0xff) << 8) | (value >> 8)); 1422 } 1423 endian_swap(uint32_t value)1424 inline uint32_t endian_swap(uint32_t value) 1425 { 1426 return ((value & 0xff) << 24) | ((value & 0xff00) << 8) | ((value & 0xff0000) >> 8) | (value >> 24); 1427 } 1428 1429 struct utf8_counter 1430 { 1431 typedef size_t value_type; 1432 lowutf8_counter1433 static value_type low(value_type result, uint32_t ch) 1434 { 1435 // U+0000..U+007F 1436 if (ch < 0x80) return result + 1; 1437 // U+0080..U+07FF 1438 else if (ch < 0x800) return result + 2; 1439 // U+0800..U+FFFF 1440 else return result + 3; 1441 } 1442 highutf8_counter1443 static value_type high(value_type result, uint32_t) 1444 { 1445 // U+10000..U+10FFFF 1446 return result + 4; 1447 } 1448 }; 1449 1450 struct utf8_writer 1451 { 1452 typedef uint8_t* value_type; 1453 lowutf8_writer1454 static value_type low(value_type result, uint32_t ch) 1455 { 1456 // U+0000..U+007F 1457 if (ch < 0x80) 1458 { 1459 *result = static_cast<uint8_t>(ch); 1460 return result + 1; 1461 } 1462 // U+0080..U+07FF 1463 else if (ch < 0x800) 1464 { 1465 result[0] = static_cast<uint8_t>(0xC0 | (ch >> 6)); 1466 result[1] = static_cast<uint8_t>(0x80 | (ch & 0x3F)); 1467 return result + 2; 1468 } 1469 // U+0800..U+FFFF 1470 else 1471 { 1472 result[0] = static_cast<uint8_t>(0xE0 | (ch >> 12)); 1473 result[1] = static_cast<uint8_t>(0x80 | ((ch >> 6) & 0x3F)); 1474 result[2] = static_cast<uint8_t>(0x80 | (ch & 0x3F)); 1475 return result + 3; 1476 } 1477 } 1478 highutf8_writer1479 static value_type high(value_type result, uint32_t ch) 1480 { 1481 // U+10000..U+10FFFF 1482 result[0] = static_cast<uint8_t>(0xF0 | (ch >> 18)); 1483 result[1] = static_cast<uint8_t>(0x80 | ((ch >> 12) & 0x3F)); 1484 result[2] = static_cast<uint8_t>(0x80 | ((ch >> 6) & 0x3F)); 1485 result[3] = static_cast<uint8_t>(0x80 | (ch & 0x3F)); 1486 return result + 4; 1487 } 1488 anyutf8_writer1489 static value_type any(value_type result, uint32_t ch) 1490 { 1491 return (ch < 0x10000) ? low(result, ch) : high(result, ch); 1492 } 1493 }; 1494 1495 struct utf16_counter 1496 { 1497 typedef size_t value_type; 1498 lowutf16_counter1499 static value_type low(value_type result, uint32_t) 1500 { 1501 return result + 1; 1502 } 1503 highutf16_counter1504 static value_type high(value_type result, uint32_t) 1505 { 1506 return result + 2; 1507 } 1508 }; 1509 1510 struct utf16_writer 1511 { 1512 typedef uint16_t* value_type; 1513 lowutf16_writer1514 static value_type low(value_type result, uint32_t ch) 1515 { 1516 *result = static_cast<uint16_t>(ch); 1517 1518 return result + 1; 1519 } 1520 highutf16_writer1521 static value_type high(value_type result, uint32_t ch) 1522 { 1523 uint32_t msh = static_cast<uint32_t>(ch - 0x10000) >> 10; 1524 uint32_t lsh = static_cast<uint32_t>(ch - 0x10000) & 0x3ff; 1525 1526 result[0] = static_cast<uint16_t>(0xD800 + msh); 1527 result[1] = static_cast<uint16_t>(0xDC00 + lsh); 1528 1529 return result + 2; 1530 } 1531 anyutf16_writer1532 static value_type any(value_type result, uint32_t ch) 1533 { 1534 return (ch < 0x10000) ? low(result, ch) : high(result, ch); 1535 } 1536 }; 1537 1538 struct utf32_counter 1539 { 1540 typedef size_t value_type; 1541 lowutf32_counter1542 static value_type low(value_type result, uint32_t) 1543 { 1544 return result + 1; 1545 } 1546 highutf32_counter1547 static value_type high(value_type result, uint32_t) 1548 { 1549 return result + 1; 1550 } 1551 }; 1552 1553 struct utf32_writer 1554 { 1555 typedef uint32_t* value_type; 1556 lowutf32_writer1557 static value_type low(value_type result, uint32_t ch) 1558 { 1559 *result = ch; 1560 1561 return result + 1; 1562 } 1563 highutf32_writer1564 static value_type high(value_type result, uint32_t ch) 1565 { 1566 *result = ch; 1567 1568 return result + 1; 1569 } 1570 anyutf32_writer1571 static value_type any(value_type result, uint32_t ch) 1572 { 1573 *result = ch; 1574 1575 return result + 1; 1576 } 1577 }; 1578 1579 struct latin1_writer 1580 { 1581 typedef uint8_t* value_type; 1582 lowlatin1_writer1583 static value_type low(value_type result, uint32_t ch) 1584 { 1585 *result = static_cast<uint8_t>(ch > 255 ? '?' : ch); 1586 1587 return result + 1; 1588 } 1589 highlatin1_writer1590 static value_type high(value_type result, uint32_t ch) 1591 { 1592 (void)ch; 1593 1594 *result = '?'; 1595 1596 return result + 1; 1597 } 1598 }; 1599 1600 struct utf8_decoder 1601 { 1602 typedef uint8_t type; 1603 processutf8_decoder1604 template <typename Traits> static inline typename Traits::value_type process(const uint8_t* data, size_t size, typename Traits::value_type result, Traits) 1605 { 1606 const uint8_t utf8_byte_mask = 0x3f; 1607 1608 while (size) 1609 { 1610 uint8_t lead = *data; 1611 1612 // 0xxxxxxx -> U+0000..U+007F 1613 if (lead < 0x80) 1614 { 1615 result = Traits::low(result, lead); 1616 data += 1; 1617 size -= 1; 1618 1619 // process aligned single-byte (ascii) blocks 1620 if ((reinterpret_cast<uintptr_t>(data) & 3) == 0) 1621 { 1622 // round-trip through void* to silence 'cast increases required alignment of target type' warnings 1623 while (size >= 4 && (*static_cast<const uint32_t*>(static_cast<const void*>(data)) & 0x80808080) == 0) 1624 { 1625 result = Traits::low(result, data[0]); 1626 result = Traits::low(result, data[1]); 1627 result = Traits::low(result, data[2]); 1628 result = Traits::low(result, data[3]); 1629 data += 4; 1630 size -= 4; 1631 } 1632 } 1633 } 1634 // 110xxxxx -> U+0080..U+07FF 1635 else if (static_cast<unsigned int>(lead - 0xC0) < 0x20 && size >= 2 && (data[1] & 0xc0) == 0x80) 1636 { 1637 result = Traits::low(result, ((lead & ~0xC0) << 6) | (data[1] & utf8_byte_mask)); 1638 data += 2; 1639 size -= 2; 1640 } 1641 // 1110xxxx -> U+0800-U+FFFF 1642 else if (static_cast<unsigned int>(lead - 0xE0) < 0x10 && size >= 3 && (data[1] & 0xc0) == 0x80 && (data[2] & 0xc0) == 0x80) 1643 { 1644 result = Traits::low(result, ((lead & ~0xE0) << 12) | ((data[1] & utf8_byte_mask) << 6) | (data[2] & utf8_byte_mask)); 1645 data += 3; 1646 size -= 3; 1647 } 1648 // 11110xxx -> U+10000..U+10FFFF 1649 else if (static_cast<unsigned int>(lead - 0xF0) < 0x08 && size >= 4 && (data[1] & 0xc0) == 0x80 && (data[2] & 0xc0) == 0x80 && (data[3] & 0xc0) == 0x80) 1650 { 1651 result = Traits::high(result, ((lead & ~0xF0) << 18) | ((data[1] & utf8_byte_mask) << 12) | ((data[2] & utf8_byte_mask) << 6) | (data[3] & utf8_byte_mask)); 1652 data += 4; 1653 size -= 4; 1654 } 1655 // 10xxxxxx or 11111xxx -> invalid 1656 else 1657 { 1658 data += 1; 1659 size -= 1; 1660 } 1661 } 1662 1663 return result; 1664 } 1665 }; 1666 1667 template <typename opt_swap> struct utf16_decoder 1668 { 1669 typedef uint16_t type; 1670 processutf16_decoder1671 template <typename Traits> static inline typename Traits::value_type process(const uint16_t* data, size_t size, typename Traits::value_type result, Traits) 1672 { 1673 while (size) 1674 { 1675 uint16_t lead = opt_swap::value ? endian_swap(*data) : *data; 1676 1677 // U+0000..U+D7FF 1678 if (lead < 0xD800) 1679 { 1680 result = Traits::low(result, lead); 1681 data += 1; 1682 size -= 1; 1683 } 1684 // U+E000..U+FFFF 1685 else if (static_cast<unsigned int>(lead - 0xE000) < 0x2000) 1686 { 1687 result = Traits::low(result, lead); 1688 data += 1; 1689 size -= 1; 1690 } 1691 // surrogate pair lead 1692 else if (static_cast<unsigned int>(lead - 0xD800) < 0x400 && size >= 2) 1693 { 1694 uint16_t next = opt_swap::value ? endian_swap(data[1]) : data[1]; 1695 1696 if (static_cast<unsigned int>(next - 0xDC00) < 0x400) 1697 { 1698 result = Traits::high(result, 0x10000 + ((lead & 0x3ff) << 10) + (next & 0x3ff)); 1699 data += 2; 1700 size -= 2; 1701 } 1702 else 1703 { 1704 data += 1; 1705 size -= 1; 1706 } 1707 } 1708 else 1709 { 1710 data += 1; 1711 size -= 1; 1712 } 1713 } 1714 1715 return result; 1716 } 1717 }; 1718 1719 template <typename opt_swap> struct utf32_decoder 1720 { 1721 typedef uint32_t type; 1722 processutf32_decoder1723 template <typename Traits> static inline typename Traits::value_type process(const uint32_t* data, size_t size, typename Traits::value_type result, Traits) 1724 { 1725 while (size) 1726 { 1727 uint32_t lead = opt_swap::value ? endian_swap(*data) : *data; 1728 1729 // U+0000..U+FFFF 1730 if (lead < 0x10000) 1731 { 1732 result = Traits::low(result, lead); 1733 data += 1; 1734 size -= 1; 1735 } 1736 // U+10000..U+10FFFF 1737 else 1738 { 1739 result = Traits::high(result, lead); 1740 data += 1; 1741 size -= 1; 1742 } 1743 } 1744 1745 return result; 1746 } 1747 }; 1748 1749 struct latin1_decoder 1750 { 1751 typedef uint8_t type; 1752 processlatin1_decoder1753 template <typename Traits> static inline typename Traits::value_type process(const uint8_t* data, size_t size, typename Traits::value_type result, Traits) 1754 { 1755 while (size) 1756 { 1757 result = Traits::low(result, *data); 1758 data += 1; 1759 size -= 1; 1760 } 1761 1762 return result; 1763 } 1764 }; 1765 1766 template <size_t size> struct wchar_selector; 1767 1768 template <> struct wchar_selector<2> 1769 { 1770 typedef uint16_t type; 1771 typedef utf16_counter counter; 1772 typedef utf16_writer writer; 1773 typedef utf16_decoder<opt_false> decoder; 1774 }; 1775 1776 template <> struct wchar_selector<4> 1777 { 1778 typedef uint32_t type; 1779 typedef utf32_counter counter; 1780 typedef utf32_writer writer; 1781 typedef utf32_decoder<opt_false> decoder; 1782 }; 1783 1784 typedef wchar_selector<sizeof(wchar_t)>::counter wchar_counter; 1785 typedef wchar_selector<sizeof(wchar_t)>::writer wchar_writer; 1786 1787 struct wchar_decoder 1788 { 1789 typedef wchar_t type; 1790 processwchar_decoder1791 template <typename Traits> static inline typename Traits::value_type process(const wchar_t* data, size_t size, typename Traits::value_type result, Traits traits) 1792 { 1793 typedef wchar_selector<sizeof(wchar_t)>::decoder decoder; 1794 1795 return decoder::process(reinterpret_cast<const typename decoder::type*>(data), size, result, traits); 1796 } 1797 }; 1798 1799 #ifdef PUGIXML_WCHAR_MODE convert_wchar_endian_swap(wchar_t * result,const wchar_t * data,size_t length)1800 PUGI__FN void convert_wchar_endian_swap(wchar_t* result, const wchar_t* data, size_t length) 1801 { 1802 for (size_t i = 0; i < length; ++i) 1803 result[i] = static_cast<wchar_t>(endian_swap(static_cast<wchar_selector<sizeof(wchar_t)>::type>(data[i]))); 1804 } 1805 #endif 1806 PUGI__NS_END 1807 1808 PUGI__NS_BEGIN 1809 enum chartype_t 1810 { 1811 ct_parse_pcdata = 1, // \0, &, \r, < 1812 ct_parse_attr = 2, // \0, &, \r, ', " 1813 ct_parse_attr_ws = 4, // \0, &, \r, ', ", \n, tab 1814 ct_space = 8, // \r, \n, space, tab 1815 ct_parse_cdata = 16, // \0, ], >, \r 1816 ct_parse_comment = 32, // \0, -, >, \r 1817 ct_symbol = 64, // Any symbol > 127, a-z, A-Z, 0-9, _, :, -, . 1818 ct_start_symbol = 128 // Any symbol > 127, a-z, A-Z, _, : 1819 }; 1820 1821 static const unsigned char chartype_table[256] = 1822 { 1823 55, 0, 0, 0, 0, 0, 0, 0, 0, 12, 12, 0, 0, 63, 0, 0, // 0-15 1824 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 16-31 1825 8, 0, 6, 0, 0, 0, 7, 6, 0, 0, 0, 0, 0, 96, 64, 0, // 32-47 1826 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 192, 0, 1, 0, 48, 0, // 48-63 1827 0, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, // 64-79 1828 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 0, 0, 16, 0, 192, // 80-95 1829 0, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, // 96-111 1830 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 0, 0, 0, 0, 0, // 112-127 1831 1832 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, // 128+ 1833 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 1834 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 1835 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 1836 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 1837 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 1838 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 1839 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192 1840 }; 1841 1842 enum chartypex_t 1843 { 1844 ctx_special_pcdata = 1, // Any symbol >= 0 and < 32 (except \t, \r, \n), &, <, > 1845 ctx_special_attr = 2, // Any symbol >= 0 and < 32 (except \t), &, <, >, " 1846 ctx_start_symbol = 4, // Any symbol > 127, a-z, A-Z, _ 1847 ctx_digit = 8, // 0-9 1848 ctx_symbol = 16 // Any symbol > 127, a-z, A-Z, 0-9, _, -, . 1849 }; 1850 1851 static const unsigned char chartypex_table[256] = 1852 { 1853 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 2, 3, 3, 2, 3, 3, // 0-15 1854 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 16-31 1855 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 16, 16, 0, // 32-47 1856 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0, 3, 0, 3, 0, // 48-63 1857 1858 0, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, // 64-79 1859 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 0, 0, 0, 0, 20, // 80-95 1860 0, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, // 96-111 1861 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 0, 0, 0, 0, 0, // 112-127 1862 1863 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, // 128+ 1864 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 1865 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 1866 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 1867 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 1868 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 1869 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 1870 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20 1871 }; 1872 1873 #ifdef PUGIXML_WCHAR_MODE 1874 #define PUGI__IS_CHARTYPE_IMPL(c, ct, table) ((static_cast<unsigned int>(c) < 128 ? table[static_cast<unsigned int>(c)] : table[128]) & (ct)) 1875 #else 1876 #define PUGI__IS_CHARTYPE_IMPL(c, ct, table) (table[static_cast<unsigned char>(c)] & (ct)) 1877 #endif 1878 1879 #define PUGI__IS_CHARTYPE(c, ct) PUGI__IS_CHARTYPE_IMPL(c, ct, chartype_table) 1880 #define PUGI__IS_CHARTYPEX(c, ct) PUGI__IS_CHARTYPE_IMPL(c, ct, chartypex_table) 1881 is_little_endian()1882 PUGI__FN bool is_little_endian() 1883 { 1884 unsigned int ui = 1; 1885 1886 return *reinterpret_cast<unsigned char*>(&ui) == 1; 1887 } 1888 get_wchar_encoding()1889 PUGI__FN xml_encoding get_wchar_encoding() 1890 { 1891 PUGI__STATIC_ASSERT(sizeof(wchar_t) == 2 || sizeof(wchar_t) == 4); 1892 1893 if (sizeof(wchar_t) == 2) 1894 return is_little_endian() ? encoding_utf16_le : encoding_utf16_be; 1895 else 1896 return is_little_endian() ? encoding_utf32_le : encoding_utf32_be; 1897 } 1898 guess_buffer_encoding(uint8_t d0,uint8_t d1,uint8_t d2,uint8_t d3)1899 PUGI__FN xml_encoding guess_buffer_encoding(uint8_t d0, uint8_t d1, uint8_t d2, uint8_t d3) 1900 { 1901 // look for BOM in first few bytes 1902 if (d0 == 0 && d1 == 0 && d2 == 0xfe && d3 == 0xff) return encoding_utf32_be; 1903 if (d0 == 0xff && d1 == 0xfe && d2 == 0 && d3 == 0) return encoding_utf32_le; 1904 if (d0 == 0xfe && d1 == 0xff) return encoding_utf16_be; 1905 if (d0 == 0xff && d1 == 0xfe) return encoding_utf16_le; 1906 if (d0 == 0xef && d1 == 0xbb && d2 == 0xbf) return encoding_utf8; 1907 1908 // look for <, <? or <?xm in various encodings 1909 if (d0 == 0 && d1 == 0 && d2 == 0 && d3 == 0x3c) return encoding_utf32_be; 1910 if (d0 == 0x3c && d1 == 0 && d2 == 0 && d3 == 0) return encoding_utf32_le; 1911 if (d0 == 0 && d1 == 0x3c && d2 == 0 && d3 == 0x3f) return encoding_utf16_be; 1912 if (d0 == 0x3c && d1 == 0 && d2 == 0x3f && d3 == 0) return encoding_utf16_le; 1913 if (d0 == 0x3c && d1 == 0x3f && d2 == 0x78 && d3 == 0x6d) return encoding_utf8; 1914 1915 // look for utf16 < followed by node name (this may fail, but is better than utf8 since it's zero terminated so early) 1916 if (d0 == 0 && d1 == 0x3c) return encoding_utf16_be; 1917 if (d0 == 0x3c && d1 == 0) return encoding_utf16_le; 1918 1919 // no known BOM detected, assume utf8 1920 return encoding_utf8; 1921 } 1922 get_buffer_encoding(xml_encoding encoding,const void * contents,size_t size)1923 PUGI__FN xml_encoding get_buffer_encoding(xml_encoding encoding, const void* contents, size_t size) 1924 { 1925 // replace wchar encoding with utf implementation 1926 if (encoding == encoding_wchar) return get_wchar_encoding(); 1927 1928 // replace utf16 encoding with utf16 with specific endianness 1929 if (encoding == encoding_utf16) return is_little_endian() ? encoding_utf16_le : encoding_utf16_be; 1930 1931 // replace utf32 encoding with utf32 with specific endianness 1932 if (encoding == encoding_utf32) return is_little_endian() ? encoding_utf32_le : encoding_utf32_be; 1933 1934 // only do autodetection if no explicit encoding is requested 1935 if (encoding != encoding_auto) return encoding; 1936 1937 // skip encoding autodetection if input buffer is too small 1938 if (size < 4) return encoding_utf8; 1939 1940 // try to guess encoding (based on XML specification, Appendix F.1) 1941 const uint8_t* data = static_cast<const uint8_t*>(contents); 1942 1943 PUGI__DMC_VOLATILE uint8_t d0 = data[0], d1 = data[1], d2 = data[2], d3 = data[3]; 1944 1945 return guess_buffer_encoding(d0, d1, d2, d3); 1946 } 1947 get_mutable_buffer(char_t * & out_buffer,size_t & out_length,const void * contents,size_t size,bool is_mutable)1948 PUGI__FN bool get_mutable_buffer(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable) 1949 { 1950 size_t length = size / sizeof(char_t); 1951 1952 if (is_mutable) 1953 { 1954 out_buffer = static_cast<char_t*>(const_cast<void*>(contents)); 1955 out_length = length; 1956 } 1957 else 1958 { 1959 char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t))); 1960 if (!buffer) return false; 1961 1962 if (contents) 1963 memcpy(buffer, contents, length * sizeof(char_t)); 1964 else 1965 assert(length == 0); 1966 1967 buffer[length] = 0; 1968 1969 out_buffer = buffer; 1970 out_length = length + 1; 1971 } 1972 1973 return true; 1974 } 1975 1976 #ifdef PUGIXML_WCHAR_MODE need_endian_swap_utf(xml_encoding le,xml_encoding re)1977 PUGI__FN bool need_endian_swap_utf(xml_encoding le, xml_encoding re) 1978 { 1979 return (le == encoding_utf16_be && re == encoding_utf16_le) || (le == encoding_utf16_le && re == encoding_utf16_be) || 1980 (le == encoding_utf32_be && re == encoding_utf32_le) || (le == encoding_utf32_le && re == encoding_utf32_be); 1981 } 1982 convert_buffer_endian_swap(char_t * & out_buffer,size_t & out_length,const void * contents,size_t size,bool is_mutable)1983 PUGI__FN bool convert_buffer_endian_swap(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable) 1984 { 1985 const char_t* data = static_cast<const char_t*>(contents); 1986 size_t length = size / sizeof(char_t); 1987 1988 if (is_mutable) 1989 { 1990 char_t* buffer = const_cast<char_t*>(data); 1991 1992 convert_wchar_endian_swap(buffer, data, length); 1993 1994 out_buffer = buffer; 1995 out_length = length; 1996 } 1997 else 1998 { 1999 char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t))); 2000 if (!buffer) return false; 2001 2002 convert_wchar_endian_swap(buffer, data, length); 2003 buffer[length] = 0; 2004 2005 out_buffer = buffer; 2006 out_length = length + 1; 2007 } 2008 2009 return true; 2010 } 2011 convert_buffer_generic(char_t * & out_buffer,size_t & out_length,const void * contents,size_t size,D)2012 template <typename D> PUGI__FN bool convert_buffer_generic(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, D) 2013 { 2014 const typename D::type* data = static_cast<const typename D::type*>(contents); 2015 size_t data_length = size / sizeof(typename D::type); 2016 2017 // first pass: get length in wchar_t units 2018 size_t length = D::process(data, data_length, 0, wchar_counter()); 2019 2020 // allocate buffer of suitable length 2021 char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t))); 2022 if (!buffer) return false; 2023 2024 // second pass: convert utf16 input to wchar_t 2025 wchar_writer::value_type obegin = reinterpret_cast<wchar_writer::value_type>(buffer); 2026 wchar_writer::value_type oend = D::process(data, data_length, obegin, wchar_writer()); 2027 2028 assert(oend == obegin + length); 2029 *oend = 0; 2030 2031 out_buffer = buffer; 2032 out_length = length + 1; 2033 2034 return true; 2035 } 2036 convert_buffer(char_t * & out_buffer,size_t & out_length,xml_encoding encoding,const void * contents,size_t size,bool is_mutable)2037 PUGI__FN bool convert_buffer(char_t*& out_buffer, size_t& out_length, xml_encoding encoding, const void* contents, size_t size, bool is_mutable) 2038 { 2039 // get native encoding 2040 xml_encoding wchar_encoding = get_wchar_encoding(); 2041 2042 // fast path: no conversion required 2043 if (encoding == wchar_encoding) 2044 return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable); 2045 2046 // only endian-swapping is required 2047 if (need_endian_swap_utf(encoding, wchar_encoding)) 2048 return convert_buffer_endian_swap(out_buffer, out_length, contents, size, is_mutable); 2049 2050 // source encoding is utf8 2051 if (encoding == encoding_utf8) 2052 return convert_buffer_generic(out_buffer, out_length, contents, size, utf8_decoder()); 2053 2054 // source encoding is utf16 2055 if (encoding == encoding_utf16_be || encoding == encoding_utf16_le) 2056 { 2057 xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be; 2058 2059 return (native_encoding == encoding) ? 2060 convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder<opt_false>()) : 2061 convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder<opt_true>()); 2062 } 2063 2064 // source encoding is utf32 2065 if (encoding == encoding_utf32_be || encoding == encoding_utf32_le) 2066 { 2067 xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be; 2068 2069 return (native_encoding == encoding) ? 2070 convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder<opt_false>()) : 2071 convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder<opt_true>()); 2072 } 2073 2074 // source encoding is latin1 2075 if (encoding == encoding_latin1) 2076 return convert_buffer_generic(out_buffer, out_length, contents, size, latin1_decoder()); 2077 2078 assert(!"Invalid encoding"); 2079 return false; 2080 } 2081 #else convert_buffer_generic(char_t * & out_buffer,size_t & out_length,const void * contents,size_t size,D)2082 template <typename D> PUGI__FN bool convert_buffer_generic(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, D) 2083 { 2084 const typename D::type* data = static_cast<const typename D::type*>(contents); 2085 size_t data_length = size / sizeof(typename D::type); 2086 2087 // first pass: get length in utf8 units 2088 size_t length = D::process(data, data_length, 0, utf8_counter()); 2089 2090 // allocate buffer of suitable length 2091 char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t))); 2092 if (!buffer) return false; 2093 2094 // second pass: convert utf16 input to utf8 2095 uint8_t* obegin = reinterpret_cast<uint8_t*>(buffer); 2096 uint8_t* oend = D::process(data, data_length, obegin, utf8_writer()); 2097 2098 assert(oend == obegin + length); 2099 *oend = 0; 2100 2101 out_buffer = buffer; 2102 out_length = length + 1; 2103 2104 return true; 2105 } 2106 get_latin1_7bit_prefix_length(const uint8_t * data,size_t size)2107 PUGI__FN size_t get_latin1_7bit_prefix_length(const uint8_t* data, size_t size) 2108 { 2109 for (size_t i = 0; i < size; ++i) 2110 if (data[i] > 127) 2111 return i; 2112 2113 return size; 2114 } 2115 convert_buffer_latin1(char_t * & out_buffer,size_t & out_length,const void * contents,size_t size,bool is_mutable)2116 PUGI__FN bool convert_buffer_latin1(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable) 2117 { 2118 const uint8_t* data = static_cast<const uint8_t*>(contents); 2119 size_t data_length = size; 2120 2121 // get size of prefix that does not need utf8 conversion 2122 size_t prefix_length = get_latin1_7bit_prefix_length(data, data_length); 2123 assert(prefix_length <= data_length); 2124 2125 const uint8_t* postfix = data + prefix_length; 2126 size_t postfix_length = data_length - prefix_length; 2127 2128 // if no conversion is needed, just return the original buffer 2129 if (postfix_length == 0) return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable); 2130 2131 // first pass: get length in utf8 units 2132 size_t length = prefix_length + latin1_decoder::process(postfix, postfix_length, 0, utf8_counter()); 2133 2134 // allocate buffer of suitable length 2135 char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t))); 2136 if (!buffer) return false; 2137 2138 // second pass: convert latin1 input to utf8 2139 memcpy(buffer, data, prefix_length); 2140 2141 uint8_t* obegin = reinterpret_cast<uint8_t*>(buffer); 2142 uint8_t* oend = latin1_decoder::process(postfix, postfix_length, obegin + prefix_length, utf8_writer()); 2143 2144 assert(oend == obegin + length); 2145 *oend = 0; 2146 2147 out_buffer = buffer; 2148 out_length = length + 1; 2149 2150 return true; 2151 } 2152 convert_buffer(char_t * & out_buffer,size_t & out_length,xml_encoding encoding,const void * contents,size_t size,bool is_mutable)2153 PUGI__FN bool convert_buffer(char_t*& out_buffer, size_t& out_length, xml_encoding encoding, const void* contents, size_t size, bool is_mutable) 2154 { 2155 // fast path: no conversion required 2156 if (encoding == encoding_utf8) 2157 return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable); 2158 2159 // source encoding is utf16 2160 if (encoding == encoding_utf16_be || encoding == encoding_utf16_le) 2161 { 2162 xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be; 2163 2164 return (native_encoding == encoding) ? 2165 convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder<opt_false>()) : 2166 convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder<opt_true>()); 2167 } 2168 2169 // source encoding is utf32 2170 if (encoding == encoding_utf32_be || encoding == encoding_utf32_le) 2171 { 2172 xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be; 2173 2174 return (native_encoding == encoding) ? 2175 convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder<opt_false>()) : 2176 convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder<opt_true>()); 2177 } 2178 2179 // source encoding is latin1 2180 if (encoding == encoding_latin1) 2181 return convert_buffer_latin1(out_buffer, out_length, contents, size, is_mutable); 2182 2183 assert(!"Invalid encoding"); 2184 return false; 2185 } 2186 #endif 2187 as_utf8_begin(const wchar_t * str,size_t length)2188 PUGI__FN size_t as_utf8_begin(const wchar_t* str, size_t length) 2189 { 2190 // get length in utf8 characters 2191 return wchar_decoder::process(str, length, 0, utf8_counter()); 2192 } 2193 as_utf8_end(char * buffer,size_t size,const wchar_t * str,size_t length)2194 PUGI__FN void as_utf8_end(char* buffer, size_t size, const wchar_t* str, size_t length) 2195 { 2196 // convert to utf8 2197 uint8_t* begin = reinterpret_cast<uint8_t*>(buffer); 2198 uint8_t* end = wchar_decoder::process(str, length, begin, utf8_writer()); 2199 2200 assert(begin + size == end); 2201 (void)!end; 2202 (void)!size; 2203 } 2204 2205 #ifndef PUGIXML_NO_STL as_utf8_impl(const wchar_t * str,size_t length)2206 PUGI__FN std::string as_utf8_impl(const wchar_t* str, size_t length) 2207 { 2208 // first pass: get length in utf8 characters 2209 size_t size = as_utf8_begin(str, length); 2210 2211 // allocate resulting string 2212 std::string result; 2213 result.resize(size); 2214 2215 // second pass: convert to utf8 2216 if (size > 0) as_utf8_end(&result[0], size, str, length); 2217 2218 return result; 2219 } 2220 as_wide_impl(const char * str,size_t size)2221 PUGI__FN std::basic_string<wchar_t> as_wide_impl(const char* str, size_t size) 2222 { 2223 const uint8_t* data = reinterpret_cast<const uint8_t*>(str); 2224 2225 // first pass: get length in wchar_t units 2226 size_t length = utf8_decoder::process(data, size, 0, wchar_counter()); 2227 2228 // allocate resulting string 2229 std::basic_string<wchar_t> result; 2230 result.resize(length); 2231 2232 // second pass: convert to wchar_t 2233 if (length > 0) 2234 { 2235 wchar_writer::value_type begin = reinterpret_cast<wchar_writer::value_type>(&result[0]); 2236 wchar_writer::value_type end = utf8_decoder::process(data, size, begin, wchar_writer()); 2237 2238 assert(begin + length == end); 2239 (void)!end; 2240 } 2241 2242 return result; 2243 } 2244 #endif 2245 2246 template <typename Header> strcpy_insitu_allow(size_t length,const Header & header,uintptr_t header_mask,char_t * target)2247 inline bool strcpy_insitu_allow(size_t length, const Header& header, uintptr_t header_mask, char_t* target) 2248 { 2249 // never reuse shared memory 2250 if (header & xml_memory_page_contents_shared_mask) return false; 2251 2252 size_t target_length = strlength(target); 2253 2254 // always reuse document buffer memory if possible 2255 if ((header & header_mask) == 0) return target_length >= length; 2256 2257 // reuse heap memory if waste is not too great 2258 const size_t reuse_threshold = 32; 2259 2260 return target_length >= length && (target_length < reuse_threshold || target_length - length < target_length / 2); 2261 } 2262 2263 template <typename String, typename Header> strcpy_insitu(String & dest,Header & header,uintptr_t header_mask,const char_t * source,size_t source_length)2264 PUGI__FN bool strcpy_insitu(String& dest, Header& header, uintptr_t header_mask, const char_t* source, size_t source_length) 2265 { 2266 if (source_length == 0) 2267 { 2268 // empty string and null pointer are equivalent, so just deallocate old memory 2269 xml_allocator* alloc = PUGI__GETPAGE_IMPL(header)->allocator; 2270 2271 if (header & header_mask) alloc->deallocate_string(dest); 2272 2273 // mark the string as not allocated 2274 dest = 0; 2275 header &= ~header_mask; 2276 2277 return true; 2278 } 2279 else if (dest && strcpy_insitu_allow(source_length, header, header_mask, dest)) 2280 { 2281 // we can reuse old buffer, so just copy the new data (including zero terminator) 2282 memcpy(dest, source, source_length * sizeof(char_t)); 2283 dest[source_length] = 0; 2284 2285 return true; 2286 } 2287 else 2288 { 2289 xml_allocator* alloc = PUGI__GETPAGE_IMPL(header)->allocator; 2290 2291 if (!alloc->reserve()) return false; 2292 2293 // allocate new buffer 2294 char_t* buf = alloc->allocate_string(source_length + 1); 2295 if (!buf) return false; 2296 2297 // copy the string (including zero terminator) 2298 memcpy(buf, source, source_length * sizeof(char_t)); 2299 buf[source_length] = 0; 2300 2301 // deallocate old buffer (*after* the above to protect against overlapping memory and/or allocation failures) 2302 if (header & header_mask) alloc->deallocate_string(dest); 2303 2304 // the string is now allocated, so set the flag 2305 dest = buf; 2306 header |= header_mask; 2307 2308 return true; 2309 } 2310 } 2311 2312 struct gap 2313 { 2314 char_t* end; 2315 size_t size; 2316 gapgap2317 gap(): end(0), size(0) 2318 { 2319 } 2320 2321 // Push new gap, move s count bytes further (skipping the gap). 2322 // Collapse previous gap. pushgap2323 void push(char_t*& s, size_t count) 2324 { 2325 if (end) // there was a gap already; collapse it 2326 { 2327 // Move [old_gap_end, new_gap_start) to [old_gap_start, ...) 2328 assert(s >= end); 2329 memmove(end - size, end, reinterpret_cast<char*>(s) - reinterpret_cast<char*>(end)); 2330 } 2331 2332 s += count; // end of current gap 2333 2334 // "merge" two gaps 2335 end = s; 2336 size += count; 2337 } 2338 2339 // Collapse all gaps, return past-the-end pointer flushgap2340 char_t* flush(char_t* s) 2341 { 2342 if (end) 2343 { 2344 // Move [old_gap_end, current_pos) to [old_gap_start, ...) 2345 assert(s >= end); 2346 memmove(end - size, end, reinterpret_cast<char*>(s) - reinterpret_cast<char*>(end)); 2347 2348 return s - size; 2349 } 2350 else return s; 2351 } 2352 }; 2353 strconv_escape(char_t * s,gap & g)2354 PUGI__FN char_t* strconv_escape(char_t* s, gap& g) 2355 { 2356 char_t* stre = s + 1; 2357 2358 switch (*stre) 2359 { 2360 case '#': // &#... 2361 { 2362 unsigned int ucsc = 0; 2363 2364 if (stre[1] == 'x') // &#x... (hex code) 2365 { 2366 stre += 2; 2367 2368 char_t ch = *stre; 2369 2370 if (ch == ';') return stre; 2371 2372 for (;;) 2373 { 2374 if (static_cast<unsigned int>(ch - '0') <= 9) 2375 ucsc = 16 * ucsc + (ch - '0'); 2376 else if (static_cast<unsigned int>((ch | ' ') - 'a') <= 5) 2377 ucsc = 16 * ucsc + ((ch | ' ') - 'a' + 10); 2378 else if (ch == ';') 2379 break; 2380 else // cancel 2381 return stre; 2382 2383 ch = *++stre; 2384 } 2385 2386 ++stre; 2387 } 2388 else // &#... (dec code) 2389 { 2390 char_t ch = *++stre; 2391 2392 if (ch == ';') return stre; 2393 2394 for (;;) 2395 { 2396 if (static_cast<unsigned int>(static_cast<unsigned int>(ch) - '0') <= 9) 2397 ucsc = 10 * ucsc + (ch - '0'); 2398 else if (ch == ';') 2399 break; 2400 else // cancel 2401 return stre; 2402 2403 ch = *++stre; 2404 } 2405 2406 ++stre; 2407 } 2408 2409 #ifdef PUGIXML_WCHAR_MODE 2410 s = reinterpret_cast<char_t*>(wchar_writer::any(reinterpret_cast<wchar_writer::value_type>(s), ucsc)); 2411 #else 2412 s = reinterpret_cast<char_t*>(utf8_writer::any(reinterpret_cast<uint8_t*>(s), ucsc)); 2413 #endif 2414 2415 g.push(s, stre - s); 2416 return stre; 2417 } 2418 2419 case 'a': // &a 2420 { 2421 ++stre; 2422 2423 if (*stre == 'm') // &am 2424 { 2425 if (*++stre == 'p' && *++stre == ';') // & 2426 { 2427 *s++ = '&'; 2428 ++stre; 2429 2430 g.push(s, stre - s); 2431 return stre; 2432 } 2433 } 2434 else if (*stre == 'p') // &ap 2435 { 2436 if (*++stre == 'o' && *++stre == 's' && *++stre == ';') // ' 2437 { 2438 *s++ = '\''; 2439 ++stre; 2440 2441 g.push(s, stre - s); 2442 return stre; 2443 } 2444 } 2445 break; 2446 } 2447 2448 case 'g': // &g 2449 { 2450 if (*++stre == 't' && *++stre == ';') // > 2451 { 2452 *s++ = '>'; 2453 ++stre; 2454 2455 g.push(s, stre - s); 2456 return stre; 2457 } 2458 break; 2459 } 2460 2461 case 'l': // &l 2462 { 2463 if (*++stre == 't' && *++stre == ';') // < 2464 { 2465 *s++ = '<'; 2466 ++stre; 2467 2468 g.push(s, stre - s); 2469 return stre; 2470 } 2471 break; 2472 } 2473 2474 case 'q': // &q 2475 { 2476 if (*++stre == 'u' && *++stre == 'o' && *++stre == 't' && *++stre == ';') // " 2477 { 2478 *s++ = '"'; 2479 ++stre; 2480 2481 g.push(s, stre - s); 2482 return stre; 2483 } 2484 break; 2485 } 2486 2487 default: 2488 break; 2489 } 2490 2491 return stre; 2492 } 2493 2494 // Parser utilities 2495 #define PUGI__ENDSWITH(c, e) ((c) == (e) || ((c) == 0 && endch == (e))) 2496 #define PUGI__SKIPWS() { while (PUGI__IS_CHARTYPE(*s, ct_space)) ++s; } 2497 #define PUGI__OPTSET(OPT) ( optmsk & (OPT) ) 2498 #define PUGI__PUSHNODE(TYPE) { cursor = append_new_node(cursor, alloc, TYPE); if (!cursor) PUGI__THROW_ERROR(status_out_of_memory, s); } 2499 #define PUGI__POPNODE() { cursor = cursor->parent; } 2500 #define PUGI__SCANFOR(X) { while (*s != 0 && !(X)) ++s; } 2501 #define PUGI__SCANWHILE(X) { while (X) ++s; } 2502 #define PUGI__SCANWHILE_UNROLL(X) { for (;;) { char_t ss = s[0]; if (PUGI__UNLIKELY(!(X))) { break; } ss = s[1]; if (PUGI__UNLIKELY(!(X))) { s += 1; break; } ss = s[2]; if (PUGI__UNLIKELY(!(X))) { s += 2; break; } ss = s[3]; if (PUGI__UNLIKELY(!(X))) { s += 3; break; } s += 4; } } 2503 #define PUGI__ENDSEG() { ch = *s; *s = 0; ++s; } 2504 #define PUGI__THROW_ERROR(err, m) return error_offset = m, error_status = err, static_cast<char_t*>(0) 2505 #define PUGI__CHECK_ERROR(err, m) { if (*s == 0) PUGI__THROW_ERROR(err, m); } 2506 strconv_comment(char_t * s,char_t endch)2507 PUGI__FN char_t* strconv_comment(char_t* s, char_t endch) 2508 { 2509 gap g; 2510 2511 while (true) 2512 { 2513 PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_comment)); 2514 2515 if (*s == '\r') // Either a single 0x0d or 0x0d 0x0a pair 2516 { 2517 *s++ = '\n'; // replace first one with 0x0a 2518 2519 if (*s == '\n') g.push(s, 1); 2520 } 2521 else if (s[0] == '-' && s[1] == '-' && PUGI__ENDSWITH(s[2], '>')) // comment ends here 2522 { 2523 *g.flush(s) = 0; 2524 2525 return s + (s[2] == '>' ? 3 : 2); 2526 } 2527 else if (*s == 0) 2528 { 2529 return 0; 2530 } 2531 else ++s; 2532 } 2533 } 2534 strconv_cdata(char_t * s,char_t endch)2535 PUGI__FN char_t* strconv_cdata(char_t* s, char_t endch) 2536 { 2537 gap g; 2538 2539 while (true) 2540 { 2541 PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_cdata)); 2542 2543 if (*s == '\r') // Either a single 0x0d or 0x0d 0x0a pair 2544 { 2545 *s++ = '\n'; // replace first one with 0x0a 2546 2547 if (*s == '\n') g.push(s, 1); 2548 } 2549 else if (s[0] == ']' && s[1] == ']' && PUGI__ENDSWITH(s[2], '>')) // CDATA ends here 2550 { 2551 *g.flush(s) = 0; 2552 2553 return s + 1; 2554 } 2555 else if (*s == 0) 2556 { 2557 return 0; 2558 } 2559 else ++s; 2560 } 2561 } 2562 2563 typedef char_t* (*strconv_pcdata_t)(char_t*); 2564 2565 template <typename opt_trim, typename opt_eol, typename opt_escape> struct strconv_pcdata_impl 2566 { parsestrconv_pcdata_impl2567 static char_t* parse(char_t* s) 2568 { 2569 gap g; 2570 2571 char_t* begin = s; 2572 2573 while (true) 2574 { 2575 PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_pcdata)); 2576 2577 if (*s == '<') // PCDATA ends here 2578 { 2579 char_t* end = g.flush(s); 2580 2581 if (opt_trim::value) 2582 while (end > begin && PUGI__IS_CHARTYPE(end[-1], ct_space)) 2583 --end; 2584 2585 *end = 0; 2586 2587 return s + 1; 2588 } 2589 else if (opt_eol::value && *s == '\r') // Either a single 0x0d or 0x0d 0x0a pair 2590 { 2591 *s++ = '\n'; // replace first one with 0x0a 2592 2593 if (*s == '\n') g.push(s, 1); 2594 } 2595 else if (opt_escape::value && *s == '&') 2596 { 2597 s = strconv_escape(s, g); 2598 } 2599 else if (*s == 0) 2600 { 2601 char_t* end = g.flush(s); 2602 2603 if (opt_trim::value) 2604 while (end > begin && PUGI__IS_CHARTYPE(end[-1], ct_space)) 2605 --end; 2606 2607 *end = 0; 2608 2609 return s; 2610 } 2611 else ++s; 2612 } 2613 } 2614 }; 2615 get_strconv_pcdata(unsigned int optmask)2616 PUGI__FN strconv_pcdata_t get_strconv_pcdata(unsigned int optmask) 2617 { 2618 PUGI__STATIC_ASSERT(parse_escapes == 0x10 && parse_eol == 0x20 && parse_trim_pcdata == 0x0800); 2619 2620 switch (((optmask >> 4) & 3) | ((optmask >> 9) & 4)) // get bitmask for flags (eol escapes trim) 2621 { 2622 case 0: return strconv_pcdata_impl<opt_false, opt_false, opt_false>::parse; 2623 case 1: return strconv_pcdata_impl<opt_false, opt_false, opt_true>::parse; 2624 case 2: return strconv_pcdata_impl<opt_false, opt_true, opt_false>::parse; 2625 case 3: return strconv_pcdata_impl<opt_false, opt_true, opt_true>::parse; 2626 case 4: return strconv_pcdata_impl<opt_true, opt_false, opt_false>::parse; 2627 case 5: return strconv_pcdata_impl<opt_true, opt_false, opt_true>::parse; 2628 case 6: return strconv_pcdata_impl<opt_true, opt_true, opt_false>::parse; 2629 case 7: return strconv_pcdata_impl<opt_true, opt_true, opt_true>::parse; 2630 default: assert(false); return 0; // should not get here 2631 } 2632 } 2633 2634 typedef char_t* (*strconv_attribute_t)(char_t*, char_t); 2635 2636 template <typename opt_escape> struct strconv_attribute_impl 2637 { parse_wnormstrconv_attribute_impl2638 static char_t* parse_wnorm(char_t* s, char_t end_quote) 2639 { 2640 gap g; 2641 2642 // trim leading whitespaces 2643 if (PUGI__IS_CHARTYPE(*s, ct_space)) 2644 { 2645 char_t* str = s; 2646 2647 do ++str; 2648 while (PUGI__IS_CHARTYPE(*str, ct_space)); 2649 2650 g.push(s, str - s); 2651 } 2652 2653 while (true) 2654 { 2655 PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr_ws | ct_space)); 2656 2657 if (*s == end_quote) 2658 { 2659 char_t* str = g.flush(s); 2660 2661 do *str-- = 0; 2662 while (PUGI__IS_CHARTYPE(*str, ct_space)); 2663 2664 return s + 1; 2665 } 2666 else if (PUGI__IS_CHARTYPE(*s, ct_space)) 2667 { 2668 *s++ = ' '; 2669 2670 if (PUGI__IS_CHARTYPE(*s, ct_space)) 2671 { 2672 char_t* str = s + 1; 2673 while (PUGI__IS_CHARTYPE(*str, ct_space)) ++str; 2674 2675 g.push(s, str - s); 2676 } 2677 } 2678 else if (opt_escape::value && *s == '&') 2679 { 2680 s = strconv_escape(s, g); 2681 } 2682 else if (!*s) 2683 { 2684 return 0; 2685 } 2686 else ++s; 2687 } 2688 } 2689 parse_wconvstrconv_attribute_impl2690 static char_t* parse_wconv(char_t* s, char_t end_quote) 2691 { 2692 gap g; 2693 2694 while (true) 2695 { 2696 PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr_ws)); 2697 2698 if (*s == end_quote) 2699 { 2700 *g.flush(s) = 0; 2701 2702 return s + 1; 2703 } 2704 else if (PUGI__IS_CHARTYPE(*s, ct_space)) 2705 { 2706 if (*s == '\r') 2707 { 2708 *s++ = ' '; 2709 2710 if (*s == '\n') g.push(s, 1); 2711 } 2712 else *s++ = ' '; 2713 } 2714 else if (opt_escape::value && *s == '&') 2715 { 2716 s = strconv_escape(s, g); 2717 } 2718 else if (!*s) 2719 { 2720 return 0; 2721 } 2722 else ++s; 2723 } 2724 } 2725 parse_eolstrconv_attribute_impl2726 static char_t* parse_eol(char_t* s, char_t end_quote) 2727 { 2728 gap g; 2729 2730 while (true) 2731 { 2732 PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr)); 2733 2734 if (*s == end_quote) 2735 { 2736 *g.flush(s) = 0; 2737 2738 return s + 1; 2739 } 2740 else if (*s == '\r') 2741 { 2742 *s++ = '\n'; 2743 2744 if (*s == '\n') g.push(s, 1); 2745 } 2746 else if (opt_escape::value && *s == '&') 2747 { 2748 s = strconv_escape(s, g); 2749 } 2750 else if (!*s) 2751 { 2752 return 0; 2753 } 2754 else ++s; 2755 } 2756 } 2757 parse_simplestrconv_attribute_impl2758 static char_t* parse_simple(char_t* s, char_t end_quote) 2759 { 2760 gap g; 2761 2762 while (true) 2763 { 2764 PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr)); 2765 2766 if (*s == end_quote) 2767 { 2768 *g.flush(s) = 0; 2769 2770 return s + 1; 2771 } 2772 else if (opt_escape::value && *s == '&') 2773 { 2774 s = strconv_escape(s, g); 2775 } 2776 else if (!*s) 2777 { 2778 return 0; 2779 } 2780 else ++s; 2781 } 2782 } 2783 }; 2784 get_strconv_attribute(unsigned int optmask)2785 PUGI__FN strconv_attribute_t get_strconv_attribute(unsigned int optmask) 2786 { 2787 PUGI__STATIC_ASSERT(parse_escapes == 0x10 && parse_eol == 0x20 && parse_wconv_attribute == 0x40 && parse_wnorm_attribute == 0x80); 2788 2789 switch ((optmask >> 4) & 15) // get bitmask for flags (wconv wnorm eol escapes) 2790 { 2791 case 0: return strconv_attribute_impl<opt_false>::parse_simple; 2792 case 1: return strconv_attribute_impl<opt_true>::parse_simple; 2793 case 2: return strconv_attribute_impl<opt_false>::parse_eol; 2794 case 3: return strconv_attribute_impl<opt_true>::parse_eol; 2795 case 4: return strconv_attribute_impl<opt_false>::parse_wconv; 2796 case 5: return strconv_attribute_impl<opt_true>::parse_wconv; 2797 case 6: return strconv_attribute_impl<opt_false>::parse_wconv; 2798 case 7: return strconv_attribute_impl<opt_true>::parse_wconv; 2799 case 8: return strconv_attribute_impl<opt_false>::parse_wnorm; 2800 case 9: return strconv_attribute_impl<opt_true>::parse_wnorm; 2801 case 10: return strconv_attribute_impl<opt_false>::parse_wnorm; 2802 case 11: return strconv_attribute_impl<opt_true>::parse_wnorm; 2803 case 12: return strconv_attribute_impl<opt_false>::parse_wnorm; 2804 case 13: return strconv_attribute_impl<opt_true>::parse_wnorm; 2805 case 14: return strconv_attribute_impl<opt_false>::parse_wnorm; 2806 case 15: return strconv_attribute_impl<opt_true>::parse_wnorm; 2807 default: assert(false); return 0; // should not get here 2808 } 2809 } 2810 make_parse_result(xml_parse_status status,ptrdiff_t offset=0)2811 inline xml_parse_result make_parse_result(xml_parse_status status, ptrdiff_t offset = 0) 2812 { 2813 xml_parse_result result; 2814 result.status = status; 2815 result.offset = offset; 2816 2817 return result; 2818 } 2819 2820 struct xml_parser 2821 { 2822 xml_allocator alloc; 2823 xml_allocator* alloc_state; 2824 char_t* error_offset; 2825 xml_parse_status error_status; 2826 xml_parserxml_parser2827 xml_parser(xml_allocator* alloc_): alloc(*alloc_), alloc_state(alloc_), error_offset(0), error_status(status_ok) 2828 { 2829 } 2830 ~xml_parserxml_parser2831 ~xml_parser() 2832 { 2833 *alloc_state = alloc; 2834 } 2835 2836 // DOCTYPE consists of nested sections of the following possible types: 2837 // <!-- ... -->, <? ... ?>, "...", '...' 2838 // <![...]]> 2839 // <!...> 2840 // First group can not contain nested groups 2841 // Second group can contain nested groups of the same type 2842 // Third group can contain all other groups parse_doctype_primitivexml_parser2843 char_t* parse_doctype_primitive(char_t* s) 2844 { 2845 if (*s == '"' || *s == '\'') 2846 { 2847 // quoted string 2848 char_t ch = *s++; 2849 PUGI__SCANFOR(*s == ch); 2850 if (!*s) PUGI__THROW_ERROR(status_bad_doctype, s); 2851 2852 s++; 2853 } 2854 else if (s[0] == '<' && s[1] == '?') 2855 { 2856 // <? ... ?> 2857 s += 2; 2858 PUGI__SCANFOR(s[0] == '?' && s[1] == '>'); // no need for ENDSWITH because ?> can't terminate proper doctype 2859 if (!*s) PUGI__THROW_ERROR(status_bad_doctype, s); 2860 2861 s += 2; 2862 } 2863 else if (s[0] == '<' && s[1] == '!' && s[2] == '-' && s[3] == '-') 2864 { 2865 s += 4; 2866 PUGI__SCANFOR(s[0] == '-' && s[1] == '-' && s[2] == '>'); // no need for ENDSWITH because --> can't terminate proper doctype 2867 if (!*s) PUGI__THROW_ERROR(status_bad_doctype, s); 2868 2869 s += 3; 2870 } 2871 else PUGI__THROW_ERROR(status_bad_doctype, s); 2872 2873 return s; 2874 } 2875 parse_doctype_ignorexml_parser2876 char_t* parse_doctype_ignore(char_t* s) 2877 { 2878 size_t depth = 0; 2879 2880 assert(s[0] == '<' && s[1] == '!' && s[2] == '['); 2881 s += 3; 2882 2883 while (*s) 2884 { 2885 if (s[0] == '<' && s[1] == '!' && s[2] == '[') 2886 { 2887 // nested ignore section 2888 s += 3; 2889 depth++; 2890 } 2891 else if (s[0] == ']' && s[1] == ']' && s[2] == '>') 2892 { 2893 // ignore section end 2894 s += 3; 2895 2896 if (depth == 0) 2897 return s; 2898 2899 depth--; 2900 } 2901 else s++; 2902 } 2903 2904 PUGI__THROW_ERROR(status_bad_doctype, s); 2905 } 2906 parse_doctype_groupxml_parser2907 char_t* parse_doctype_group(char_t* s, char_t endch) 2908 { 2909 size_t depth = 0; 2910 2911 assert((s[0] == '<' || s[0] == 0) && s[1] == '!'); 2912 s += 2; 2913 2914 while (*s) 2915 { 2916 if (s[0] == '<' && s[1] == '!' && s[2] != '-') 2917 { 2918 if (s[2] == '[') 2919 { 2920 // ignore 2921 s = parse_doctype_ignore(s); 2922 if (!s) return s; 2923 } 2924 else 2925 { 2926 // some control group 2927 s += 2; 2928 depth++; 2929 } 2930 } 2931 else if (s[0] == '<' || s[0] == '"' || s[0] == '\'') 2932 { 2933 // unknown tag (forbidden), or some primitive group 2934 s = parse_doctype_primitive(s); 2935 if (!s) return s; 2936 } 2937 else if (*s == '>') 2938 { 2939 if (depth == 0) 2940 return s; 2941 2942 depth--; 2943 s++; 2944 } 2945 else s++; 2946 } 2947 2948 if (depth != 0 || endch != '>') PUGI__THROW_ERROR(status_bad_doctype, s); 2949 2950 return s; 2951 } 2952 parse_exclamationxml_parser2953 char_t* parse_exclamation(char_t* s, xml_node_struct* cursor, unsigned int optmsk, char_t endch) 2954 { 2955 // parse node contents, starting with exclamation mark 2956 ++s; 2957 2958 if (*s == '-') // '<!-...' 2959 { 2960 ++s; 2961 2962 if (*s == '-') // '<!--...' 2963 { 2964 ++s; 2965 2966 if (PUGI__OPTSET(parse_comments)) 2967 { 2968 PUGI__PUSHNODE(node_comment); // Append a new node on the tree. 2969 cursor->value = s; // Save the offset. 2970 } 2971 2972 if (PUGI__OPTSET(parse_eol) && PUGI__OPTSET(parse_comments)) 2973 { 2974 s = strconv_comment(s, endch); 2975 2976 if (!s) PUGI__THROW_ERROR(status_bad_comment, cursor->value); 2977 } 2978 else 2979 { 2980 // Scan for terminating '-->'. 2981 PUGI__SCANFOR(s[0] == '-' && s[1] == '-' && PUGI__ENDSWITH(s[2], '>')); 2982 PUGI__CHECK_ERROR(status_bad_comment, s); 2983 2984 if (PUGI__OPTSET(parse_comments)) 2985 *s = 0; // Zero-terminate this segment at the first terminating '-'. 2986 2987 s += (s[2] == '>' ? 3 : 2); // Step over the '\0->'. 2988 } 2989 } 2990 else PUGI__THROW_ERROR(status_bad_comment, s); 2991 } 2992 else if (*s == '[') 2993 { 2994 // '<![CDATA[...' 2995 if (*++s=='C' && *++s=='D' && *++s=='A' && *++s=='T' && *++s=='A' && *++s == '[') 2996 { 2997 ++s; 2998 2999 if (PUGI__OPTSET(parse_cdata)) 3000 { 3001 PUGI__PUSHNODE(node_cdata); // Append a new node on the tree. 3002 cursor->value = s; // Save the offset. 3003 3004 if (PUGI__OPTSET(parse_eol)) 3005 { 3006 s = strconv_cdata(s, endch); 3007 3008 if (!s) PUGI__THROW_ERROR(status_bad_cdata, cursor->value); 3009 } 3010 else 3011 { 3012 // Scan for terminating ']]>'. 3013 PUGI__SCANFOR(s[0] == ']' && s[1] == ']' && PUGI__ENDSWITH(s[2], '>')); 3014 PUGI__CHECK_ERROR(status_bad_cdata, s); 3015 3016 *s++ = 0; // Zero-terminate this segment. 3017 } 3018 } 3019 else // Flagged for discard, but we still have to scan for the terminator. 3020 { 3021 // Scan for terminating ']]>'. 3022 PUGI__SCANFOR(s[0] == ']' && s[1] == ']' && PUGI__ENDSWITH(s[2], '>')); 3023 PUGI__CHECK_ERROR(status_bad_cdata, s); 3024 3025 ++s; 3026 } 3027 3028 s += (s[1] == '>' ? 2 : 1); // Step over the last ']>'. 3029 } 3030 else PUGI__THROW_ERROR(status_bad_cdata, s); 3031 } 3032 else if (s[0] == 'D' && s[1] == 'O' && s[2] == 'C' && s[3] == 'T' && s[4] == 'Y' && s[5] == 'P' && PUGI__ENDSWITH(s[6], 'E')) 3033 { 3034 s -= 2; 3035 3036 if (cursor->parent) PUGI__THROW_ERROR(status_bad_doctype, s); 3037 3038 char_t* mark = s + 9; 3039 3040 s = parse_doctype_group(s, endch); 3041 if (!s) return s; 3042 3043 assert((*s == 0 && endch == '>') || *s == '>'); 3044 if (*s) *s++ = 0; 3045 3046 if (PUGI__OPTSET(parse_doctype)) 3047 { 3048 while (PUGI__IS_CHARTYPE(*mark, ct_space)) ++mark; 3049 3050 PUGI__PUSHNODE(node_doctype); 3051 3052 cursor->value = mark; 3053 } 3054 } 3055 else if (*s == 0 && endch == '-') PUGI__THROW_ERROR(status_bad_comment, s); 3056 else if (*s == 0 && endch == '[') PUGI__THROW_ERROR(status_bad_cdata, s); 3057 else PUGI__THROW_ERROR(status_unrecognized_tag, s); 3058 3059 return s; 3060 } 3061 parse_questionxml_parser3062 char_t* parse_question(char_t* s, xml_node_struct*& ref_cursor, unsigned int optmsk, char_t endch) 3063 { 3064 // load into registers 3065 xml_node_struct* cursor = ref_cursor; 3066 char_t ch = 0; 3067 3068 // parse node contents, starting with question mark 3069 ++s; 3070 3071 // read PI target 3072 char_t* target = s; 3073 3074 if (!PUGI__IS_CHARTYPE(*s, ct_start_symbol)) PUGI__THROW_ERROR(status_bad_pi, s); 3075 3076 PUGI__SCANWHILE(PUGI__IS_CHARTYPE(*s, ct_symbol)); 3077 PUGI__CHECK_ERROR(status_bad_pi, s); 3078 3079 // determine node type; stricmp / strcasecmp is not portable 3080 bool declaration = (target[0] | ' ') == 'x' && (target[1] | ' ') == 'm' && (target[2] | ' ') == 'l' && target + 3 == s; 3081 3082 if (declaration ? PUGI__OPTSET(parse_declaration) : PUGI__OPTSET(parse_pi)) 3083 { 3084 if (declaration) 3085 { 3086 // disallow non top-level declarations 3087 if (cursor->parent) PUGI__THROW_ERROR(status_bad_pi, s); 3088 3089 PUGI__PUSHNODE(node_declaration); 3090 } 3091 else 3092 { 3093 PUGI__PUSHNODE(node_pi); 3094 } 3095 3096 cursor->name = target; 3097 3098 PUGI__ENDSEG(); 3099 3100 // parse value/attributes 3101 if (ch == '?') 3102 { 3103 // empty node 3104 if (!PUGI__ENDSWITH(*s, '>')) PUGI__THROW_ERROR(status_bad_pi, s); 3105 s += (*s == '>'); 3106 3107 PUGI__POPNODE(); 3108 } 3109 else if (PUGI__IS_CHARTYPE(ch, ct_space)) 3110 { 3111 PUGI__SKIPWS(); 3112 3113 // scan for tag end 3114 char_t* value = s; 3115 3116 PUGI__SCANFOR(s[0] == '?' && PUGI__ENDSWITH(s[1], '>')); 3117 PUGI__CHECK_ERROR(status_bad_pi, s); 3118 3119 if (declaration) 3120 { 3121 // replace ending ? with / so that 'element' terminates properly 3122 *s = '/'; 3123 3124 // we exit from this function with cursor at node_declaration, which is a signal to parse() to go to LOC_ATTRIBUTES 3125 s = value; 3126 } 3127 else 3128 { 3129 // store value and step over > 3130 cursor->value = value; 3131 3132 PUGI__POPNODE(); 3133 3134 PUGI__ENDSEG(); 3135 3136 s += (*s == '>'); 3137 } 3138 } 3139 else PUGI__THROW_ERROR(status_bad_pi, s); 3140 } 3141 else 3142 { 3143 // scan for tag end 3144 PUGI__SCANFOR(s[0] == '?' && PUGI__ENDSWITH(s[1], '>')); 3145 PUGI__CHECK_ERROR(status_bad_pi, s); 3146 3147 s += (s[1] == '>' ? 2 : 1); 3148 } 3149 3150 // store from registers 3151 ref_cursor = cursor; 3152 3153 return s; 3154 } 3155 parse_treexml_parser3156 char_t* parse_tree(char_t* s, xml_node_struct* root, unsigned int optmsk, char_t endch) 3157 { 3158 strconv_attribute_t strconv_attribute = get_strconv_attribute(optmsk); 3159 strconv_pcdata_t strconv_pcdata = get_strconv_pcdata(optmsk); 3160 3161 char_t ch = 0; 3162 xml_node_struct* cursor = root; 3163 char_t* mark = s; 3164 3165 while (*s != 0) 3166 { 3167 if (*s == '<') 3168 { 3169 ++s; 3170 3171 LOC_TAG: 3172 if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) // '<#...' 3173 { 3174 PUGI__PUSHNODE(node_element); // Append a new node to the tree. 3175 3176 cursor->name = s; 3177 3178 PUGI__SCANWHILE_UNROLL(PUGI__IS_CHARTYPE(ss, ct_symbol)); // Scan for a terminator. 3179 PUGI__ENDSEG(); // Save char in 'ch', terminate & step over. 3180 3181 if (ch == '>') 3182 { 3183 // end of tag 3184 } 3185 else if (PUGI__IS_CHARTYPE(ch, ct_space)) 3186 { 3187 LOC_ATTRIBUTES: 3188 while (true) 3189 { 3190 PUGI__SKIPWS(); // Eat any whitespace. 3191 3192 if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) // <... #... 3193 { 3194 xml_attribute_struct* a = append_new_attribute(cursor, alloc); // Make space for this attribute. 3195 if (!a) PUGI__THROW_ERROR(status_out_of_memory, s); 3196 3197 a->name = s; // Save the offset. 3198 3199 PUGI__SCANWHILE_UNROLL(PUGI__IS_CHARTYPE(ss, ct_symbol)); // Scan for a terminator. 3200 PUGI__ENDSEG(); // Save char in 'ch', terminate & step over. 3201 3202 if (PUGI__IS_CHARTYPE(ch, ct_space)) 3203 { 3204 PUGI__SKIPWS(); // Eat any whitespace. 3205 3206 ch = *s; 3207 ++s; 3208 } 3209 3210 if (ch == '=') // '<... #=...' 3211 { 3212 PUGI__SKIPWS(); // Eat any whitespace. 3213 3214 if (*s == '"' || *s == '\'') // '<... #="...' 3215 { 3216 ch = *s; // Save quote char to avoid breaking on "''" -or- '""'. 3217 ++s; // Step over the quote. 3218 a->value = s; // Save the offset. 3219 3220 s = strconv_attribute(s, ch); 3221 3222 if (!s) PUGI__THROW_ERROR(status_bad_attribute, a->value); 3223 3224 // After this line the loop continues from the start; 3225 // Whitespaces, / and > are ok, symbols and EOF are wrong, 3226 // everything else will be detected 3227 if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) PUGI__THROW_ERROR(status_bad_attribute, s); 3228 } 3229 else PUGI__THROW_ERROR(status_bad_attribute, s); 3230 } 3231 else PUGI__THROW_ERROR(status_bad_attribute, s); 3232 } 3233 else if (*s == '/') 3234 { 3235 ++s; 3236 3237 if (*s == '>') 3238 { 3239 PUGI__POPNODE(); 3240 s++; 3241 break; 3242 } 3243 else if (*s == 0 && endch == '>') 3244 { 3245 PUGI__POPNODE(); 3246 break; 3247 } 3248 else PUGI__THROW_ERROR(status_bad_start_element, s); 3249 } 3250 else if (*s == '>') 3251 { 3252 ++s; 3253 3254 break; 3255 } 3256 else if (*s == 0 && endch == '>') 3257 { 3258 break; 3259 } 3260 else PUGI__THROW_ERROR(status_bad_start_element, s); 3261 } 3262 3263 // !!! 3264 } 3265 else if (ch == '/') // '<#.../' 3266 { 3267 if (!PUGI__ENDSWITH(*s, '>')) PUGI__THROW_ERROR(status_bad_start_element, s); 3268 3269 PUGI__POPNODE(); // Pop. 3270 3271 s += (*s == '>'); 3272 } 3273 else if (ch == 0) 3274 { 3275 // we stepped over null terminator, backtrack & handle closing tag 3276 --s; 3277 3278 if (endch != '>') PUGI__THROW_ERROR(status_bad_start_element, s); 3279 } 3280 else PUGI__THROW_ERROR(status_bad_start_element, s); 3281 } 3282 else if (*s == '/') 3283 { 3284 ++s; 3285 3286 char_t* name = cursor->name; 3287 if (!name) PUGI__THROW_ERROR(status_end_element_mismatch, s); 3288 3289 while (PUGI__IS_CHARTYPE(*s, ct_symbol)) 3290 { 3291 if (*s++ != *name++) PUGI__THROW_ERROR(status_end_element_mismatch, s); 3292 } 3293 3294 if (*name) 3295 { 3296 if (*s == 0 && name[0] == endch && name[1] == 0) PUGI__THROW_ERROR(status_bad_end_element, s); 3297 else PUGI__THROW_ERROR(status_end_element_mismatch, s); 3298 } 3299 3300 PUGI__POPNODE(); // Pop. 3301 3302 PUGI__SKIPWS(); 3303 3304 if (*s == 0) 3305 { 3306 if (endch != '>') PUGI__THROW_ERROR(status_bad_end_element, s); 3307 } 3308 else 3309 { 3310 if (*s != '>') PUGI__THROW_ERROR(status_bad_end_element, s); 3311 ++s; 3312 } 3313 } 3314 else if (*s == '?') // '<?...' 3315 { 3316 s = parse_question(s, cursor, optmsk, endch); 3317 if (!s) return s; 3318 3319 assert(cursor); 3320 if (PUGI__NODETYPE(cursor) == node_declaration) goto LOC_ATTRIBUTES; 3321 } 3322 else if (*s == '!') // '<!...' 3323 { 3324 s = parse_exclamation(s, cursor, optmsk, endch); 3325 if (!s) return s; 3326 } 3327 else if (*s == 0 && endch == '?') PUGI__THROW_ERROR(status_bad_pi, s); 3328 else PUGI__THROW_ERROR(status_unrecognized_tag, s); 3329 } 3330 else 3331 { 3332 mark = s; // Save this offset while searching for a terminator. 3333 3334 PUGI__SKIPWS(); // Eat whitespace if no genuine PCDATA here. 3335 3336 if (*s == '<' || !*s) 3337 { 3338 // We skipped some whitespace characters because otherwise we would take the tag branch instead of PCDATA one 3339 assert(mark != s); 3340 3341 if (!PUGI__OPTSET(parse_ws_pcdata | parse_ws_pcdata_single) || PUGI__OPTSET(parse_trim_pcdata)) 3342 { 3343 continue; 3344 } 3345 else if (PUGI__OPTSET(parse_ws_pcdata_single)) 3346 { 3347 if (s[0] != '<' || s[1] != '/' || cursor->first_child) continue; 3348 } 3349 } 3350 3351 if (!PUGI__OPTSET(parse_trim_pcdata)) 3352 s = mark; 3353 3354 if (cursor->parent || PUGI__OPTSET(parse_fragment)) 3355 { 3356 PUGI__PUSHNODE(node_pcdata); // Append a new node on the tree. 3357 cursor->value = s; // Save the offset. 3358 3359 s = strconv_pcdata(s); 3360 3361 PUGI__POPNODE(); // Pop since this is a standalone. 3362 3363 if (!*s) break; 3364 } 3365 else 3366 { 3367 PUGI__SCANFOR(*s == '<'); // '...<' 3368 if (!*s) break; 3369 3370 ++s; 3371 } 3372 3373 // We're after '<' 3374 goto LOC_TAG; 3375 } 3376 } 3377 3378 // check that last tag is closed 3379 if (cursor != root) PUGI__THROW_ERROR(status_end_element_mismatch, s); 3380 3381 return s; 3382 } 3383 3384 #ifdef PUGIXML_WCHAR_MODE parse_skip_bomxml_parser3385 static char_t* parse_skip_bom(char_t* s) 3386 { 3387 unsigned int bom = 0xfeff; 3388 return (s[0] == static_cast<wchar_t>(bom)) ? s + 1 : s; 3389 } 3390 #else parse_skip_bomxml_parser3391 static char_t* parse_skip_bom(char_t* s) 3392 { 3393 return (s[0] == '\xef' && s[1] == '\xbb' && s[2] == '\xbf') ? s + 3 : s; 3394 } 3395 #endif 3396 has_element_node_siblingsxml_parser3397 static bool has_element_node_siblings(xml_node_struct* node) 3398 { 3399 while (node) 3400 { 3401 if (PUGI__NODETYPE(node) == node_element) return true; 3402 3403 node = node->next_sibling; 3404 } 3405 3406 return false; 3407 } 3408 parsexml_parser3409 static xml_parse_result parse(char_t* buffer, size_t length, xml_document_struct* xmldoc, xml_node_struct* root, unsigned int optmsk) 3410 { 3411 // early-out for empty documents 3412 if (length == 0) 3413 return make_parse_result(PUGI__OPTSET(parse_fragment) ? status_ok : status_no_document_element); 3414 3415 // get last child of the root before parsing 3416 xml_node_struct* last_root_child = root->first_child ? root->first_child->prev_sibling_c + 0 : 0; 3417 3418 // create parser on stack 3419 xml_parser parser(static_cast<xml_allocator*>(xmldoc)); 3420 3421 // save last character and make buffer zero-terminated (speeds up parsing) 3422 char_t endch = buffer[length - 1]; 3423 buffer[length - 1] = 0; 3424 3425 // skip BOM to make sure it does not end up as part of parse output 3426 char_t* buffer_data = parse_skip_bom(buffer); 3427 3428 // perform actual parsing 3429 parser.parse_tree(buffer_data, root, optmsk, endch); 3430 3431 xml_parse_result result = make_parse_result(parser.error_status, parser.error_offset ? parser.error_offset - buffer : 0); 3432 assert(result.offset >= 0 && static_cast<size_t>(result.offset) <= length); 3433 3434 if (result) 3435 { 3436 // since we removed last character, we have to handle the only possible false positive (stray <) 3437 if (endch == '<') 3438 return make_parse_result(status_unrecognized_tag, length - 1); 3439 3440 // check if there are any element nodes parsed 3441 xml_node_struct* first_root_child_parsed = last_root_child ? last_root_child->next_sibling + 0 : root->first_child+ 0; 3442 3443 if (!PUGI__OPTSET(parse_fragment) && !has_element_node_siblings(first_root_child_parsed)) 3444 return make_parse_result(status_no_document_element, length - 1); 3445 } 3446 else 3447 { 3448 // roll back offset if it occurs on a null terminator in the source buffer 3449 if (result.offset > 0 && static_cast<size_t>(result.offset) == length - 1 && endch == 0) 3450 result.offset--; 3451 } 3452 3453 return result; 3454 } 3455 }; 3456 3457 // Output facilities get_write_native_encoding()3458 PUGI__FN xml_encoding get_write_native_encoding() 3459 { 3460 #ifdef PUGIXML_WCHAR_MODE 3461 return get_wchar_encoding(); 3462 #else 3463 return encoding_utf8; 3464 #endif 3465 } 3466 get_write_encoding(xml_encoding encoding)3467 PUGI__FN xml_encoding get_write_encoding(xml_encoding encoding) 3468 { 3469 // replace wchar encoding with utf implementation 3470 if (encoding == encoding_wchar) return get_wchar_encoding(); 3471 3472 // replace utf16 encoding with utf16 with specific endianness 3473 if (encoding == encoding_utf16) return is_little_endian() ? encoding_utf16_le : encoding_utf16_be; 3474 3475 // replace utf32 encoding with utf32 with specific endianness 3476 if (encoding == encoding_utf32) return is_little_endian() ? encoding_utf32_le : encoding_utf32_be; 3477 3478 // only do autodetection if no explicit encoding is requested 3479 if (encoding != encoding_auto) return encoding; 3480 3481 // assume utf8 encoding 3482 return encoding_utf8; 3483 } 3484 convert_buffer_output_generic(typename T::value_type dest,const char_t * data,size_t length,D,T)3485 template <typename D, typename T> PUGI__FN size_t convert_buffer_output_generic(typename T::value_type dest, const char_t* data, size_t length, D, T) 3486 { 3487 PUGI__STATIC_ASSERT(sizeof(char_t) == sizeof(typename D::type)); 3488 3489 typename T::value_type end = D::process(reinterpret_cast<const typename D::type*>(data), length, dest, T()); 3490 3491 return static_cast<size_t>(end - dest) * sizeof(*dest); 3492 } 3493 convert_buffer_output_generic(typename T::value_type dest,const char_t * data,size_t length,D,T,bool opt_swap)3494 template <typename D, typename T> PUGI__FN size_t convert_buffer_output_generic(typename T::value_type dest, const char_t* data, size_t length, D, T, bool opt_swap) 3495 { 3496 PUGI__STATIC_ASSERT(sizeof(char_t) == sizeof(typename D::type)); 3497 3498 typename T::value_type end = D::process(reinterpret_cast<const typename D::type*>(data), length, dest, T()); 3499 3500 if (opt_swap) 3501 { 3502 for (typename T::value_type i = dest; i != end; ++i) 3503 *i = endian_swap(*i); 3504 } 3505 3506 return static_cast<size_t>(end - dest) * sizeof(*dest); 3507 } 3508 3509 #ifdef PUGIXML_WCHAR_MODE get_valid_length(const char_t * data,size_t length)3510 PUGI__FN size_t get_valid_length(const char_t* data, size_t length) 3511 { 3512 if (length < 1) return 0; 3513 3514 // discard last character if it's the lead of a surrogate pair 3515 return (sizeof(wchar_t) == 2 && static_cast<unsigned int>(static_cast<uint16_t>(data[length - 1]) - 0xD800) < 0x400) ? length - 1 : length; 3516 } 3517 convert_buffer_output(char_t * r_char,uint8_t * r_u8,uint16_t * r_u16,uint32_t * r_u32,const char_t * data,size_t length,xml_encoding encoding)3518 PUGI__FN size_t convert_buffer_output(char_t* r_char, uint8_t* r_u8, uint16_t* r_u16, uint32_t* r_u32, const char_t* data, size_t length, xml_encoding encoding) 3519 { 3520 // only endian-swapping is required 3521 if (need_endian_swap_utf(encoding, get_wchar_encoding())) 3522 { 3523 convert_wchar_endian_swap(r_char, data, length); 3524 3525 return length * sizeof(char_t); 3526 } 3527 3528 // convert to utf8 3529 if (encoding == encoding_utf8) 3530 return convert_buffer_output_generic(r_u8, data, length, wchar_decoder(), utf8_writer()); 3531 3532 // convert to utf16 3533 if (encoding == encoding_utf16_be || encoding == encoding_utf16_le) 3534 { 3535 xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be; 3536 3537 return convert_buffer_output_generic(r_u16, data, length, wchar_decoder(), utf16_writer(), native_encoding != encoding); 3538 } 3539 3540 // convert to utf32 3541 if (encoding == encoding_utf32_be || encoding == encoding_utf32_le) 3542 { 3543 xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be; 3544 3545 return convert_buffer_output_generic(r_u32, data, length, wchar_decoder(), utf32_writer(), native_encoding != encoding); 3546 } 3547 3548 // convert to latin1 3549 if (encoding == encoding_latin1) 3550 return convert_buffer_output_generic(r_u8, data, length, wchar_decoder(), latin1_writer()); 3551 3552 assert(!"Invalid encoding"); 3553 return 0; 3554 } 3555 #else get_valid_length(const char_t * data,size_t length)3556 PUGI__FN size_t get_valid_length(const char_t* data, size_t length) 3557 { 3558 if (length < 5) return 0; 3559 3560 for (size_t i = 1; i <= 4; ++i) 3561 { 3562 uint8_t ch = static_cast<uint8_t>(data[length - i]); 3563 3564 // either a standalone character or a leading one 3565 if ((ch & 0xc0) != 0x80) return length - i; 3566 } 3567 3568 // there are four non-leading characters at the end, sequence tail is broken so might as well process the whole chunk 3569 return length; 3570 } 3571 convert_buffer_output(char_t *,uint8_t * r_u8,uint16_t * r_u16,uint32_t * r_u32,const char_t * data,size_t length,xml_encoding encoding)3572 PUGI__FN size_t convert_buffer_output(char_t* /* r_char */, uint8_t* r_u8, uint16_t* r_u16, uint32_t* r_u32, const char_t* data, size_t length, xml_encoding encoding) 3573 { 3574 if (encoding == encoding_utf16_be || encoding == encoding_utf16_le) 3575 { 3576 xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be; 3577 3578 return convert_buffer_output_generic(r_u16, data, length, utf8_decoder(), utf16_writer(), native_encoding != encoding); 3579 } 3580 3581 if (encoding == encoding_utf32_be || encoding == encoding_utf32_le) 3582 { 3583 xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be; 3584 3585 return convert_buffer_output_generic(r_u32, data, length, utf8_decoder(), utf32_writer(), native_encoding != encoding); 3586 } 3587 3588 if (encoding == encoding_latin1) 3589 return convert_buffer_output_generic(r_u8, data, length, utf8_decoder(), latin1_writer()); 3590 3591 assert(!"Invalid encoding"); 3592 return 0; 3593 } 3594 #endif 3595 3596 class xml_buffered_writer 3597 { 3598 xml_buffered_writer(const xml_buffered_writer&); 3599 xml_buffered_writer& operator=(const xml_buffered_writer&); 3600 3601 public: xml_buffered_writer(xml_writer & writer_,xml_encoding user_encoding)3602 xml_buffered_writer(xml_writer& writer_, xml_encoding user_encoding): writer(writer_), bufsize(0), encoding(get_write_encoding(user_encoding)) 3603 { 3604 PUGI__STATIC_ASSERT(bufcapacity >= 8); 3605 } 3606 flush()3607 size_t flush() 3608 { 3609 flush(buffer, bufsize); 3610 bufsize = 0; 3611 return 0; 3612 } 3613 flush(const char_t * data,size_t size)3614 void flush(const char_t* data, size_t size) 3615 { 3616 if (size == 0) return; 3617 3618 // fast path, just write data 3619 if (encoding == get_write_native_encoding()) 3620 writer.write(data, size * sizeof(char_t)); 3621 else 3622 { 3623 // convert chunk 3624 size_t result = convert_buffer_output(scratch.data_char, scratch.data_u8, scratch.data_u16, scratch.data_u32, data, size, encoding); 3625 assert(result <= sizeof(scratch)); 3626 3627 // write data 3628 writer.write(scratch.data_u8, result); 3629 } 3630 } 3631 write_direct(const char_t * data,size_t length)3632 void write_direct(const char_t* data, size_t length) 3633 { 3634 // flush the remaining buffer contents 3635 flush(); 3636 3637 // handle large chunks 3638 if (length > bufcapacity) 3639 { 3640 if (encoding == get_write_native_encoding()) 3641 { 3642 // fast path, can just write data chunk 3643 writer.write(data, length * sizeof(char_t)); 3644 return; 3645 } 3646 3647 // need to convert in suitable chunks 3648 while (length > bufcapacity) 3649 { 3650 // get chunk size by selecting such number of characters that are guaranteed to fit into scratch buffer 3651 // and form a complete codepoint sequence (i.e. discard start of last codepoint if necessary) 3652 size_t chunk_size = get_valid_length(data, bufcapacity); 3653 assert(chunk_size); 3654 3655 // convert chunk and write 3656 flush(data, chunk_size); 3657 3658 // iterate 3659 data += chunk_size; 3660 length -= chunk_size; 3661 } 3662 3663 // small tail is copied below 3664 bufsize = 0; 3665 } 3666 3667 memcpy(buffer + bufsize, data, length * sizeof(char_t)); 3668 bufsize += length; 3669 } 3670 write_buffer(const char_t * data,size_t length)3671 void write_buffer(const char_t* data, size_t length) 3672 { 3673 size_t offset = bufsize; 3674 3675 if (offset + length <= bufcapacity) 3676 { 3677 memcpy(buffer + offset, data, length * sizeof(char_t)); 3678 bufsize = offset + length; 3679 } 3680 else 3681 { 3682 write_direct(data, length); 3683 } 3684 } 3685 write_string(const char_t * data)3686 void write_string(const char_t* data) 3687 { 3688 // write the part of the string that fits in the buffer 3689 size_t offset = bufsize; 3690 3691 while (*data && offset < bufcapacity) 3692 buffer[offset++] = *data++; 3693 3694 // write the rest 3695 if (offset < bufcapacity) 3696 { 3697 bufsize = offset; 3698 } 3699 else 3700 { 3701 // backtrack a bit if we have split the codepoint 3702 size_t length = offset - bufsize; 3703 size_t extra = length - get_valid_length(data - length, length); 3704 3705 bufsize = offset - extra; 3706 3707 write_direct(data - extra, strlength(data) + extra); 3708 } 3709 } 3710 write(char_t d0)3711 void write(char_t d0) 3712 { 3713 size_t offset = bufsize; 3714 if (offset > bufcapacity - 1) offset = flush(); 3715 3716 buffer[offset + 0] = d0; 3717 bufsize = offset + 1; 3718 } 3719 write(char_t d0,char_t d1)3720 void write(char_t d0, char_t d1) 3721 { 3722 size_t offset = bufsize; 3723 if (offset > bufcapacity - 2) offset = flush(); 3724 3725 buffer[offset + 0] = d0; 3726 buffer[offset + 1] = d1; 3727 bufsize = offset + 2; 3728 } 3729 write(char_t d0,char_t d1,char_t d2)3730 void write(char_t d0, char_t d1, char_t d2) 3731 { 3732 size_t offset = bufsize; 3733 if (offset > bufcapacity - 3) offset = flush(); 3734 3735 buffer[offset + 0] = d0; 3736 buffer[offset + 1] = d1; 3737 buffer[offset + 2] = d2; 3738 bufsize = offset + 3; 3739 } 3740 write(char_t d0,char_t d1,char_t d2,char_t d3)3741 void write(char_t d0, char_t d1, char_t d2, char_t d3) 3742 { 3743 size_t offset = bufsize; 3744 if (offset > bufcapacity - 4) offset = flush(); 3745 3746 buffer[offset + 0] = d0; 3747 buffer[offset + 1] = d1; 3748 buffer[offset + 2] = d2; 3749 buffer[offset + 3] = d3; 3750 bufsize = offset + 4; 3751 } 3752 write(char_t d0,char_t d1,char_t d2,char_t d3,char_t d4)3753 void write(char_t d0, char_t d1, char_t d2, char_t d3, char_t d4) 3754 { 3755 size_t offset = bufsize; 3756 if (offset > bufcapacity - 5) offset = flush(); 3757 3758 buffer[offset + 0] = d0; 3759 buffer[offset + 1] = d1; 3760 buffer[offset + 2] = d2; 3761 buffer[offset + 3] = d3; 3762 buffer[offset + 4] = d4; 3763 bufsize = offset + 5; 3764 } 3765 write(char_t d0,char_t d1,char_t d2,char_t d3,char_t d4,char_t d5)3766 void write(char_t d0, char_t d1, char_t d2, char_t d3, char_t d4, char_t d5) 3767 { 3768 size_t offset = bufsize; 3769 if (offset > bufcapacity - 6) offset = flush(); 3770 3771 buffer[offset + 0] = d0; 3772 buffer[offset + 1] = d1; 3773 buffer[offset + 2] = d2; 3774 buffer[offset + 3] = d3; 3775 buffer[offset + 4] = d4; 3776 buffer[offset + 5] = d5; 3777 bufsize = offset + 6; 3778 } 3779 3780 // utf8 maximum expansion: x4 (-> utf32) 3781 // utf16 maximum expansion: x2 (-> utf32) 3782 // utf32 maximum expansion: x1 3783 enum 3784 { 3785 bufcapacitybytes = 3786 #ifdef PUGIXML_MEMORY_OUTPUT_STACK 3787 PUGIXML_MEMORY_OUTPUT_STACK 3788 #else 3789 10240 3790 #endif 3791 , 3792 bufcapacity = bufcapacitybytes / (sizeof(char_t) + 4) 3793 }; 3794 3795 char_t buffer[bufcapacity]; 3796 3797 union 3798 { 3799 uint8_t data_u8[4 * bufcapacity]; 3800 uint16_t data_u16[2 * bufcapacity]; 3801 uint32_t data_u32[bufcapacity]; 3802 char_t data_char[bufcapacity]; 3803 } scratch; 3804 3805 xml_writer& writer; 3806 size_t bufsize; 3807 xml_encoding encoding; 3808 }; 3809 text_output_escaped(xml_buffered_writer & writer,const char_t * s,chartypex_t type)3810 PUGI__FN void text_output_escaped(xml_buffered_writer& writer, const char_t* s, chartypex_t type) 3811 { 3812 while (*s) 3813 { 3814 const char_t* prev = s; 3815 3816 // While *s is a usual symbol 3817 PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPEX(ss, type)); 3818 3819 writer.write_buffer(prev, static_cast<size_t>(s - prev)); 3820 3821 switch (*s) 3822 { 3823 case 0: break; 3824 case '&': 3825 writer.write('&', 'a', 'm', 'p', ';'); 3826 ++s; 3827 break; 3828 case '<': 3829 writer.write('&', 'l', 't', ';'); 3830 ++s; 3831 break; 3832 case '>': 3833 writer.write('&', 'g', 't', ';'); 3834 ++s; 3835 break; 3836 case '"': 3837 writer.write('&', 'q', 'u', 'o', 't', ';'); 3838 ++s; 3839 break; 3840 default: // s is not a usual symbol 3841 { 3842 unsigned int ch = static_cast<unsigned int>(*s++); 3843 assert(ch < 32); 3844 3845 writer.write('&', '#', static_cast<char_t>((ch / 10) + '0'), static_cast<char_t>((ch % 10) + '0'), ';'); 3846 } 3847 } 3848 } 3849 } 3850 text_output(xml_buffered_writer & writer,const char_t * s,chartypex_t type,unsigned int flags)3851 PUGI__FN void text_output(xml_buffered_writer& writer, const char_t* s, chartypex_t type, unsigned int flags) 3852 { 3853 if (flags & format_no_escapes) 3854 writer.write_string(s); 3855 else 3856 text_output_escaped(writer, s, type); 3857 } 3858 text_output_cdata(xml_buffered_writer & writer,const char_t * s)3859 PUGI__FN void text_output_cdata(xml_buffered_writer& writer, const char_t* s) 3860 { 3861 do 3862 { 3863 writer.write('<', '!', '[', 'C', 'D'); 3864 writer.write('A', 'T', 'A', '['); 3865 3866 const char_t* prev = s; 3867 3868 // look for ]]> sequence - we can't output it as is since it terminates CDATA 3869 while (*s && !(s[0] == ']' && s[1] == ']' && s[2] == '>')) ++s; 3870 3871 // skip ]] if we stopped at ]]>, > will go to the next CDATA section 3872 if (*s) s += 2; 3873 3874 writer.write_buffer(prev, static_cast<size_t>(s - prev)); 3875 3876 writer.write(']', ']', '>'); 3877 } 3878 while (*s); 3879 } 3880 text_output_indent(xml_buffered_writer & writer,const char_t * indent,size_t indent_length,unsigned int depth)3881 PUGI__FN void text_output_indent(xml_buffered_writer& writer, const char_t* indent, size_t indent_length, unsigned int depth) 3882 { 3883 switch (indent_length) 3884 { 3885 case 1: 3886 { 3887 for (unsigned int i = 0; i < depth; ++i) 3888 writer.write(indent[0]); 3889 break; 3890 } 3891 3892 case 2: 3893 { 3894 for (unsigned int i = 0; i < depth; ++i) 3895 writer.write(indent[0], indent[1]); 3896 break; 3897 } 3898 3899 case 3: 3900 { 3901 for (unsigned int i = 0; i < depth; ++i) 3902 writer.write(indent[0], indent[1], indent[2]); 3903 break; 3904 } 3905 3906 case 4: 3907 { 3908 for (unsigned int i = 0; i < depth; ++i) 3909 writer.write(indent[0], indent[1], indent[2], indent[3]); 3910 break; 3911 } 3912 3913 default: 3914 { 3915 for (unsigned int i = 0; i < depth; ++i) 3916 writer.write_buffer(indent, indent_length); 3917 } 3918 } 3919 } 3920 node_output_comment(xml_buffered_writer & writer,const char_t * s)3921 PUGI__FN void node_output_comment(xml_buffered_writer& writer, const char_t* s) 3922 { 3923 writer.write('<', '!', '-', '-'); 3924 3925 while (*s) 3926 { 3927 const char_t* prev = s; 3928 3929 // look for -\0 or -- sequence - we can't output it since -- is illegal in comment body 3930 while (*s && !(s[0] == '-' && (s[1] == '-' || s[1] == 0))) ++s; 3931 3932 writer.write_buffer(prev, static_cast<size_t>(s - prev)); 3933 3934 if (*s) 3935 { 3936 assert(*s == '-'); 3937 3938 writer.write('-', ' '); 3939 ++s; 3940 } 3941 } 3942 3943 writer.write('-', '-', '>'); 3944 } 3945 node_output_pi_value(xml_buffered_writer & writer,const char_t * s)3946 PUGI__FN void node_output_pi_value(xml_buffered_writer& writer, const char_t* s) 3947 { 3948 while (*s) 3949 { 3950 const char_t* prev = s; 3951 3952 // look for ?> sequence - we can't output it since ?> terminates PI 3953 while (*s && !(s[0] == '?' && s[1] == '>')) ++s; 3954 3955 writer.write_buffer(prev, static_cast<size_t>(s - prev)); 3956 3957 if (*s) 3958 { 3959 assert(s[0] == '?' && s[1] == '>'); 3960 3961 writer.write('?', ' ', '>'); 3962 s += 2; 3963 } 3964 } 3965 } 3966 node_output_attributes(xml_buffered_writer & writer,xml_node_struct * node,const char_t * indent,size_t indent_length,unsigned int flags,unsigned int depth)3967 PUGI__FN void node_output_attributes(xml_buffered_writer& writer, xml_node_struct* node, const char_t* indent, size_t indent_length, unsigned int flags, unsigned int depth) 3968 { 3969 const char_t* default_name = PUGIXML_TEXT(":anonymous"); 3970 3971 for (xml_attribute_struct* a = node->first_attribute; a; a = a->next_attribute) 3972 { 3973 if ((flags & (format_indent_attributes | format_raw)) == format_indent_attributes) 3974 { 3975 writer.write('\n'); 3976 3977 text_output_indent(writer, indent, indent_length, depth + 1); 3978 } 3979 else 3980 { 3981 writer.write(' '); 3982 } 3983 3984 writer.write_string(a->name ? a->name + 0 : default_name); 3985 writer.write('=', '"'); 3986 3987 if (a->value) 3988 text_output(writer, a->value, ctx_special_attr, flags); 3989 3990 writer.write('"'); 3991 } 3992 } 3993 node_output_start(xml_buffered_writer & writer,xml_node_struct * node,const char_t * indent,size_t indent_length,unsigned int flags,unsigned int depth)3994 PUGI__FN bool node_output_start(xml_buffered_writer& writer, xml_node_struct* node, const char_t* indent, size_t indent_length, unsigned int flags, unsigned int depth) 3995 { 3996 const char_t* default_name = PUGIXML_TEXT(":anonymous"); 3997 const char_t* name = node->name ? node->name + 0 : default_name; 3998 3999 writer.write('<'); 4000 writer.write_string(name); 4001 4002 if (node->first_attribute) 4003 node_output_attributes(writer, node, indent, indent_length, flags, depth); 4004 4005 if (!node->first_child) 4006 { 4007 writer.write(' ', '/', '>'); 4008 4009 return false; 4010 } 4011 else 4012 { 4013 writer.write('>'); 4014 4015 return true; 4016 } 4017 } 4018 node_output_end(xml_buffered_writer & writer,xml_node_struct * node)4019 PUGI__FN void node_output_end(xml_buffered_writer& writer, xml_node_struct* node) 4020 { 4021 const char_t* default_name = PUGIXML_TEXT(":anonymous"); 4022 const char_t* name = node->name ? node->name + 0 : default_name; 4023 4024 writer.write('<', '/'); 4025 writer.write_string(name); 4026 writer.write('>'); 4027 } 4028 node_output_simple(xml_buffered_writer & writer,xml_node_struct * node,unsigned int flags)4029 PUGI__FN void node_output_simple(xml_buffered_writer& writer, xml_node_struct* node, unsigned int flags) 4030 { 4031 const char_t* default_name = PUGIXML_TEXT(":anonymous"); 4032 4033 switch (PUGI__NODETYPE(node)) 4034 { 4035 case node_pcdata: 4036 text_output(writer, node->value ? node->value + 0 : PUGIXML_TEXT(""), ctx_special_pcdata, flags); 4037 break; 4038 4039 case node_cdata: 4040 text_output_cdata(writer, node->value ? node->value + 0 : PUGIXML_TEXT("")); 4041 break; 4042 4043 case node_comment: 4044 node_output_comment(writer, node->value ? node->value + 0 : PUGIXML_TEXT("")); 4045 break; 4046 4047 case node_pi: 4048 writer.write('<', '?'); 4049 writer.write_string(node->name ? node->name + 0 : default_name); 4050 4051 if (node->value) 4052 { 4053 writer.write(' '); 4054 node_output_pi_value(writer, node->value); 4055 } 4056 4057 writer.write('?', '>'); 4058 break; 4059 4060 case node_declaration: 4061 writer.write('<', '?'); 4062 writer.write_string(node->name ? node->name + 0 : default_name); 4063 node_output_attributes(writer, node, PUGIXML_TEXT(""), 0, flags | format_raw, 0); 4064 writer.write('?', '>'); 4065 break; 4066 4067 case node_doctype: 4068 writer.write('<', '!', 'D', 'O', 'C'); 4069 writer.write('T', 'Y', 'P', 'E'); 4070 4071 if (node->value) 4072 { 4073 writer.write(' '); 4074 writer.write_string(node->value); 4075 } 4076 4077 writer.write('>'); 4078 break; 4079 4080 default: 4081 assert(!"Invalid node type"); 4082 } 4083 } 4084 4085 enum indent_flags_t 4086 { 4087 indent_newline = 1, 4088 indent_indent = 2 4089 }; 4090 node_output(xml_buffered_writer & writer,xml_node_struct * root,const char_t * indent,unsigned int flags,unsigned int depth)4091 PUGI__FN void node_output(xml_buffered_writer& writer, xml_node_struct* root, const char_t* indent, unsigned int flags, unsigned int depth) 4092 { 4093 size_t indent_length = ((flags & (format_indent | format_indent_attributes)) && (flags & format_raw) == 0) ? strlength(indent) : 0; 4094 unsigned int indent_flags = indent_indent; 4095 4096 xml_node_struct* node = root; 4097 4098 do 4099 { 4100 assert(node); 4101 4102 // begin writing current node 4103 if (PUGI__NODETYPE(node) == node_pcdata || PUGI__NODETYPE(node) == node_cdata) 4104 { 4105 node_output_simple(writer, node, flags); 4106 4107 indent_flags = 0; 4108 } 4109 else 4110 { 4111 if ((indent_flags & indent_newline) && (flags & format_raw) == 0) 4112 writer.write('\n'); 4113 4114 if ((indent_flags & indent_indent) && indent_length) 4115 text_output_indent(writer, indent, indent_length, depth); 4116 4117 if (PUGI__NODETYPE(node) == node_element) 4118 { 4119 indent_flags = indent_newline | indent_indent; 4120 4121 if (node_output_start(writer, node, indent, indent_length, flags, depth)) 4122 { 4123 node = node->first_child; 4124 depth++; 4125 continue; 4126 } 4127 } 4128 else if (PUGI__NODETYPE(node) == node_document) 4129 { 4130 indent_flags = indent_indent; 4131 4132 if (node->first_child) 4133 { 4134 node = node->first_child; 4135 continue; 4136 } 4137 } 4138 else 4139 { 4140 node_output_simple(writer, node, flags); 4141 4142 indent_flags = indent_newline | indent_indent; 4143 } 4144 } 4145 4146 // continue to the next node 4147 while (node != root) 4148 { 4149 if (node->next_sibling) 4150 { 4151 node = node->next_sibling; 4152 break; 4153 } 4154 4155 node = node->parent; 4156 4157 // write closing node 4158 if (PUGI__NODETYPE(node) == node_element) 4159 { 4160 depth--; 4161 4162 if ((indent_flags & indent_newline) && (flags & format_raw) == 0) 4163 writer.write('\n'); 4164 4165 if ((indent_flags & indent_indent) && indent_length) 4166 text_output_indent(writer, indent, indent_length, depth); 4167 4168 node_output_end(writer, node); 4169 4170 indent_flags = indent_newline | indent_indent; 4171 } 4172 } 4173 } 4174 while (node != root); 4175 4176 if ((indent_flags & indent_newline) && (flags & format_raw) == 0) 4177 writer.write('\n'); 4178 } 4179 has_declaration(xml_node_struct * node)4180 PUGI__FN bool has_declaration(xml_node_struct* node) 4181 { 4182 for (xml_node_struct* child = node->first_child; child; child = child->next_sibling) 4183 { 4184 xml_node_type type = PUGI__NODETYPE(child); 4185 4186 if (type == node_declaration) return true; 4187 if (type == node_element) return false; 4188 } 4189 4190 return false; 4191 } 4192 is_attribute_of(xml_attribute_struct * attr,xml_node_struct * node)4193 PUGI__FN bool is_attribute_of(xml_attribute_struct* attr, xml_node_struct* node) 4194 { 4195 for (xml_attribute_struct* a = node->first_attribute; a; a = a->next_attribute) 4196 if (a == attr) 4197 return true; 4198 4199 return false; 4200 } 4201 allow_insert_attribute(xml_node_type parent)4202 PUGI__FN bool allow_insert_attribute(xml_node_type parent) 4203 { 4204 return parent == node_element || parent == node_declaration; 4205 } 4206 allow_insert_child(xml_node_type parent,xml_node_type child)4207 PUGI__FN bool allow_insert_child(xml_node_type parent, xml_node_type child) 4208 { 4209 if (parent != node_document && parent != node_element) return false; 4210 if (child == node_document || child == node_null) return false; 4211 if (parent != node_document && (child == node_declaration || child == node_doctype)) return false; 4212 4213 return true; 4214 } 4215 allow_move(xml_node parent,xml_node child)4216 PUGI__FN bool allow_move(xml_node parent, xml_node child) 4217 { 4218 // check that child can be a child of parent 4219 if (!allow_insert_child(parent.type(), child.type())) 4220 return false; 4221 4222 // check that node is not moved between documents 4223 if (parent.root() != child.root()) 4224 return false; 4225 4226 // check that new parent is not in the child subtree 4227 xml_node cur = parent; 4228 4229 while (cur) 4230 { 4231 if (cur == child) 4232 return false; 4233 4234 cur = cur.parent(); 4235 } 4236 4237 return true; 4238 } 4239 4240 template <typename String, typename Header> node_copy_string(String & dest,Header & header,uintptr_t header_mask,char_t * source,Header & source_header,xml_allocator * alloc)4241 PUGI__FN void node_copy_string(String& dest, Header& header, uintptr_t header_mask, char_t* source, Header& source_header, xml_allocator* alloc) 4242 { 4243 assert(!dest && (header & header_mask) == 0); 4244 4245 if (source) 4246 { 4247 if (alloc && (source_header & header_mask) == 0) 4248 { 4249 dest = source; 4250 4251 // since strcpy_insitu can reuse document buffer memory we need to mark both source and dest as shared 4252 header |= xml_memory_page_contents_shared_mask; 4253 source_header |= xml_memory_page_contents_shared_mask; 4254 } 4255 else 4256 strcpy_insitu(dest, header, header_mask, source, strlength(source)); 4257 } 4258 } 4259 node_copy_contents(xml_node_struct * dn,xml_node_struct * sn,xml_allocator * shared_alloc)4260 PUGI__FN void node_copy_contents(xml_node_struct* dn, xml_node_struct* sn, xml_allocator* shared_alloc) 4261 { 4262 node_copy_string(dn->name, dn->header, xml_memory_page_name_allocated_mask, sn->name, sn->header, shared_alloc); 4263 node_copy_string(dn->value, dn->header, xml_memory_page_value_allocated_mask, sn->value, sn->header, shared_alloc); 4264 4265 for (xml_attribute_struct* sa = sn->first_attribute; sa; sa = sa->next_attribute) 4266 { 4267 xml_attribute_struct* da = append_new_attribute(dn, get_allocator(dn)); 4268 4269 if (da) 4270 { 4271 node_copy_string(da->name, da->header, xml_memory_page_name_allocated_mask, sa->name, sa->header, shared_alloc); 4272 node_copy_string(da->value, da->header, xml_memory_page_value_allocated_mask, sa->value, sa->header, shared_alloc); 4273 } 4274 } 4275 } 4276 node_copy_tree(xml_node_struct * dn,xml_node_struct * sn)4277 PUGI__FN void node_copy_tree(xml_node_struct* dn, xml_node_struct* sn) 4278 { 4279 xml_allocator& alloc = get_allocator(dn); 4280 xml_allocator* shared_alloc = (&alloc == &get_allocator(sn)) ? &alloc : 0; 4281 4282 node_copy_contents(dn, sn, shared_alloc); 4283 4284 xml_node_struct* dit = dn; 4285 xml_node_struct* sit = sn->first_child; 4286 4287 while (sit && sit != sn) 4288 { 4289 if (sit != dn) 4290 { 4291 xml_node_struct* copy = append_new_node(dit, alloc, PUGI__NODETYPE(sit)); 4292 4293 if (copy) 4294 { 4295 node_copy_contents(copy, sit, shared_alloc); 4296 4297 if (sit->first_child) 4298 { 4299 dit = copy; 4300 sit = sit->first_child; 4301 continue; 4302 } 4303 } 4304 } 4305 4306 // continue to the next node 4307 do 4308 { 4309 if (sit->next_sibling) 4310 { 4311 sit = sit->next_sibling; 4312 break; 4313 } 4314 4315 sit = sit->parent; 4316 dit = dit->parent; 4317 } 4318 while (sit != sn); 4319 } 4320 } 4321 node_copy_attribute(xml_attribute_struct * da,xml_attribute_struct * sa)4322 PUGI__FN void node_copy_attribute(xml_attribute_struct* da, xml_attribute_struct* sa) 4323 { 4324 xml_allocator& alloc = get_allocator(da); 4325 xml_allocator* shared_alloc = (&alloc == &get_allocator(sa)) ? &alloc : 0; 4326 4327 node_copy_string(da->name, da->header, xml_memory_page_name_allocated_mask, sa->name, sa->header, shared_alloc); 4328 node_copy_string(da->value, da->header, xml_memory_page_value_allocated_mask, sa->value, sa->header, shared_alloc); 4329 } 4330 is_text_node(xml_node_struct * node)4331 inline bool is_text_node(xml_node_struct* node) 4332 { 4333 xml_node_type type = PUGI__NODETYPE(node); 4334 4335 return type == node_pcdata || type == node_cdata; 4336 } 4337 4338 // get value with conversion functions string_to_integer(const char_t * value,U minneg,U maxpos)4339 template <typename U> U string_to_integer(const char_t* value, U minneg, U maxpos) 4340 { 4341 U result = 0; 4342 const char_t* s = value; 4343 4344 while (PUGI__IS_CHARTYPE(*s, ct_space)) 4345 s++; 4346 4347 bool negative = (*s == '-'); 4348 4349 s += (*s == '+' || *s == '-'); 4350 4351 bool overflow = false; 4352 4353 if (s[0] == '0' && (s[1] | ' ') == 'x') 4354 { 4355 s += 2; 4356 4357 // since overflow detection relies on length of the sequence skip leading zeros 4358 while (*s == '0') 4359 s++; 4360 4361 const char_t* start = s; 4362 4363 for (;;) 4364 { 4365 if (static_cast<unsigned>(*s - '0') < 10) 4366 result = result * 16 + (*s - '0'); 4367 else if (static_cast<unsigned>((*s | ' ') - 'a') < 6) 4368 result = result * 16 + ((*s | ' ') - 'a' + 10); 4369 else 4370 break; 4371 4372 s++; 4373 } 4374 4375 size_t digits = static_cast<size_t>(s - start); 4376 4377 overflow = digits > sizeof(U) * 2; 4378 } 4379 else 4380 { 4381 // since overflow detection relies on length of the sequence skip leading zeros 4382 while (*s == '0') 4383 s++; 4384 4385 const char_t* start = s; 4386 4387 for (;;) 4388 { 4389 if (static_cast<unsigned>(*s - '0') < 10) 4390 result = result * 10 + (*s - '0'); 4391 else 4392 break; 4393 4394 s++; 4395 } 4396 4397 size_t digits = static_cast<size_t>(s - start); 4398 4399 PUGI__STATIC_ASSERT(sizeof(U) == 8 || sizeof(U) == 4 || sizeof(U) == 2); 4400 4401 const size_t max_digits10 = sizeof(U) == 8 ? 20 : sizeof(U) == 4 ? 10 : 5; 4402 const char_t max_lead = sizeof(U) == 8 ? '1' : sizeof(U) == 4 ? '4' : '6'; 4403 const size_t high_bit = sizeof(U) * 8 - 1; 4404 4405 overflow = digits >= max_digits10 && !(digits == max_digits10 && (*start < max_lead || (*start == max_lead && result >> high_bit))); 4406 } 4407 4408 if (negative) 4409 return (overflow || result > minneg) ? 0 - minneg : 0 - result; 4410 else 4411 return (overflow || result > maxpos) ? maxpos : result; 4412 } 4413 get_value_int(const char_t * value)4414 PUGI__FN int get_value_int(const char_t* value) 4415 { 4416 return string_to_integer<unsigned int>(value, 0 - static_cast<unsigned int>(INT_MIN), INT_MAX); 4417 } 4418 get_value_uint(const char_t * value)4419 PUGI__FN unsigned int get_value_uint(const char_t* value) 4420 { 4421 return string_to_integer<unsigned int>(value, 0, UINT_MAX); 4422 } 4423 get_value_double(const char_t * value)4424 PUGI__FN double get_value_double(const char_t* value) 4425 { 4426 #ifdef PUGIXML_WCHAR_MODE 4427 return wcstod(value, 0); 4428 #else 4429 return strtod(value, 0); 4430 #endif 4431 } 4432 get_value_float(const char_t * value)4433 PUGI__FN float get_value_float(const char_t* value) 4434 { 4435 #ifdef PUGIXML_WCHAR_MODE 4436 return static_cast<float>(wcstod(value, 0)); 4437 #else 4438 return static_cast<float>(strtod(value, 0)); 4439 #endif 4440 } 4441 get_value_bool(const char_t * value)4442 PUGI__FN bool get_value_bool(const char_t* value) 4443 { 4444 // only look at first char 4445 char_t first = *value; 4446 4447 // 1*, t* (true), T* (True), y* (yes), Y* (YES) 4448 return (first == '1' || first == 't' || first == 'T' || first == 'y' || first == 'Y'); 4449 } 4450 4451 #ifdef PUGIXML_HAS_LONG_LONG get_value_llong(const char_t * value)4452 PUGI__FN long long get_value_llong(const char_t* value) 4453 { 4454 return string_to_integer<unsigned long long>(value, 0 - static_cast<unsigned long long>(LLONG_MIN), LLONG_MAX); 4455 } 4456 get_value_ullong(const char_t * value)4457 PUGI__FN unsigned long long get_value_ullong(const char_t* value) 4458 { 4459 return string_to_integer<unsigned long long>(value, 0, ULLONG_MAX); 4460 } 4461 #endif 4462 4463 template <typename U> integer_to_string(char_t * begin,char_t * end,U value,bool negative)4464 PUGI__FN char_t* integer_to_string(char_t* begin, char_t* end, U value, bool negative) 4465 { 4466 char_t* result = end - 1; 4467 U rest = negative ? 0 - value : value; 4468 4469 do 4470 { 4471 *result-- = static_cast<char_t>('0' + (rest % 10)); 4472 rest /= 10; 4473 } 4474 while (rest); 4475 4476 assert(result >= begin); 4477 (void)begin; 4478 4479 *result = '-'; 4480 4481 return result + !negative; 4482 } 4483 4484 // set value with conversion functions 4485 template <typename String, typename Header> set_value_ascii(String & dest,Header & header,uintptr_t header_mask,char * buf)4486 PUGI__FN bool set_value_ascii(String& dest, Header& header, uintptr_t header_mask, char* buf) 4487 { 4488 #ifdef PUGIXML_WCHAR_MODE 4489 char_t wbuf[128]; 4490 assert(strlen(buf) < sizeof(wbuf) / sizeof(wbuf[0])); 4491 4492 size_t offset = 0; 4493 for (; buf[offset]; ++offset) wbuf[offset] = buf[offset]; 4494 4495 return strcpy_insitu(dest, header, header_mask, wbuf, offset); 4496 #else 4497 return strcpy_insitu(dest, header, header_mask, buf, strlen(buf)); 4498 #endif 4499 } 4500 4501 template <typename String, typename Header> set_value_convert(String & dest,Header & header,uintptr_t header_mask,int value)4502 PUGI__FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, int value) 4503 { 4504 char_t buf[64]; 4505 char_t* end = buf + sizeof(buf) / sizeof(buf[0]); 4506 char_t* begin = integer_to_string<unsigned int>(buf, end, value, value < 0); 4507 4508 return strcpy_insitu(dest, header, header_mask, begin, end - begin); 4509 } 4510 4511 template <typename String, typename Header> set_value_convert(String & dest,Header & header,uintptr_t header_mask,unsigned int value)4512 PUGI__FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, unsigned int value) 4513 { 4514 char_t buf[64]; 4515 char_t* end = buf + sizeof(buf) / sizeof(buf[0]); 4516 char_t* begin = integer_to_string<unsigned int>(buf, end, value, false); 4517 4518 return strcpy_insitu(dest, header, header_mask, begin, end - begin); 4519 } 4520 4521 template <typename String, typename Header> set_value_convert(String & dest,Header & header,uintptr_t header_mask,float value)4522 PUGI__FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, float value) 4523 { 4524 char buf[128]; 4525 sprintf(buf, "%.9g", value); 4526 4527 return set_value_ascii(dest, header, header_mask, buf); 4528 } 4529 4530 template <typename String, typename Header> set_value_convert(String & dest,Header & header,uintptr_t header_mask,double value)4531 PUGI__FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, double value) 4532 { 4533 char buf[128]; 4534 sprintf(buf, "%.17g", value); 4535 4536 return set_value_ascii(dest, header, header_mask, buf); 4537 } 4538 4539 template <typename String, typename Header> set_value_convert(String & dest,Header & header,uintptr_t header_mask,bool value)4540 PUGI__FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, bool value) 4541 { 4542 return strcpy_insitu(dest, header, header_mask, value ? PUGIXML_TEXT("true") : PUGIXML_TEXT("false"), value ? 4 : 5); 4543 } 4544 4545 #ifdef PUGIXML_HAS_LONG_LONG 4546 template <typename String, typename Header> set_value_convert(String & dest,Header & header,uintptr_t header_mask,long long value)4547 PUGI__FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, long long value) 4548 { 4549 char_t buf[64]; 4550 char_t* end = buf + sizeof(buf) / sizeof(buf[0]); 4551 char_t* begin = integer_to_string<unsigned long long>(buf, end, value, value < 0); 4552 4553 return strcpy_insitu(dest, header, header_mask, begin, end - begin); 4554 } 4555 4556 template <typename String, typename Header> set_value_convert(String & dest,Header & header,uintptr_t header_mask,unsigned long long value)4557 PUGI__FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, unsigned long long value) 4558 { 4559 char_t buf[64]; 4560 char_t* end = buf + sizeof(buf) / sizeof(buf[0]); 4561 char_t* begin = integer_to_string<unsigned long long>(buf, end, value, false); 4562 4563 return strcpy_insitu(dest, header, header_mask, begin, end - begin); 4564 } 4565 #endif 4566 load_buffer_impl(xml_document_struct * doc,xml_node_struct * root,void * contents,size_t size,unsigned int options,xml_encoding encoding,bool is_mutable,bool own,char_t ** out_buffer)4567 PUGI__FN xml_parse_result load_buffer_impl(xml_document_struct* doc, xml_node_struct* root, void* contents, size_t size, unsigned int options, xml_encoding encoding, bool is_mutable, bool own, char_t** out_buffer) 4568 { 4569 // check input buffer 4570 if (!contents && size) return make_parse_result(status_io_error); 4571 4572 // get actual encoding 4573 xml_encoding buffer_encoding = impl::get_buffer_encoding(encoding, contents, size); 4574 4575 // get private buffer 4576 char_t* buffer = 0; 4577 size_t length = 0; 4578 4579 if (!impl::convert_buffer(buffer, length, buffer_encoding, contents, size, is_mutable)) return impl::make_parse_result(status_out_of_memory); 4580 4581 // delete original buffer if we performed a conversion 4582 if (own && buffer != contents && contents) impl::xml_memory::deallocate(contents); 4583 4584 // grab onto buffer if it's our buffer, user is responsible for deallocating contents himself 4585 if (own || buffer != contents) *out_buffer = buffer; 4586 4587 // store buffer for offset_debug 4588 doc->buffer = buffer; 4589 4590 // parse 4591 xml_parse_result res = impl::xml_parser::parse(buffer, length, doc, root, options); 4592 4593 // remember encoding 4594 res.encoding = buffer_encoding; 4595 4596 return res; 4597 } 4598 4599 // we need to get length of entire file to load it in memory; the only (relatively) sane way to do it is via seek/tell trick get_file_size(FILE * file,size_t & out_result)4600 PUGI__FN xml_parse_status get_file_size(FILE* file, size_t& out_result) 4601 { 4602 #if defined(PUGI__MSVC_CRT_VERSION) && PUGI__MSVC_CRT_VERSION >= 1400 && !defined(_WIN32_WCE) 4603 // there are 64-bit versions of fseek/ftell, let's use them 4604 typedef __int64 length_type; 4605 4606 _fseeki64(file, 0, SEEK_END); 4607 length_type length = _ftelli64(file); 4608 _fseeki64(file, 0, SEEK_SET); 4609 #elif defined(__MINGW32__) && !defined(__NO_MINGW_LFS) && (!defined(__STRICT_ANSI__) || defined(__MINGW64_VERSION_MAJOR)) 4610 // there are 64-bit versions of fseek/ftell, let's use them 4611 typedef off64_t length_type; 4612 4613 fseeko64(file, 0, SEEK_END); 4614 length_type length = ftello64(file); 4615 fseeko64(file, 0, SEEK_SET); 4616 #else 4617 // if this is a 32-bit OS, long is enough; if this is a unix system, long is 64-bit, which is enough; otherwise we can't do anything anyway. 4618 typedef long length_type; 4619 4620 fseek(file, 0, SEEK_END); 4621 length_type length = ftell(file); 4622 fseek(file, 0, SEEK_SET); 4623 #endif 4624 4625 // check for I/O errors 4626 if (length < 0) return status_io_error; 4627 4628 // check for overflow 4629 size_t result = static_cast<size_t>(length); 4630 4631 if (static_cast<length_type>(result) != length) return status_out_of_memory; 4632 4633 // finalize 4634 out_result = result; 4635 4636 return status_ok; 4637 } 4638 4639 // This function assumes that buffer has extra sizeof(char_t) writable bytes after size zero_terminate_buffer(void * buffer,size_t size,xml_encoding encoding)4640 PUGI__FN size_t zero_terminate_buffer(void* buffer, size_t size, xml_encoding encoding) 4641 { 4642 // We only need to zero-terminate if encoding conversion does not do it for us 4643 #ifdef PUGIXML_WCHAR_MODE 4644 xml_encoding wchar_encoding = get_wchar_encoding(); 4645 4646 if (encoding == wchar_encoding || need_endian_swap_utf(encoding, wchar_encoding)) 4647 { 4648 size_t length = size / sizeof(char_t); 4649 4650 static_cast<char_t*>(buffer)[length] = 0; 4651 return (length + 1) * sizeof(char_t); 4652 } 4653 #else 4654 if (encoding == encoding_utf8) 4655 { 4656 static_cast<char*>(buffer)[size] = 0; 4657 return size + 1; 4658 } 4659 #endif 4660 4661 return size; 4662 } 4663 load_file_impl(xml_document_struct * doc,FILE * file,unsigned int options,xml_encoding encoding,char_t ** out_buffer)4664 PUGI__FN xml_parse_result load_file_impl(xml_document_struct* doc, FILE* file, unsigned int options, xml_encoding encoding, char_t** out_buffer) 4665 { 4666 if (!file) return make_parse_result(status_file_not_found); 4667 4668 // get file size (can result in I/O errors) 4669 size_t size = 0; 4670 xml_parse_status size_status = get_file_size(file, size); 4671 if (size_status != status_ok) return make_parse_result(size_status); 4672 4673 size_t max_suffix_size = sizeof(char_t); 4674 4675 // allocate buffer for the whole file 4676 char* contents = static_cast<char*>(xml_memory::allocate(size + max_suffix_size)); 4677 if (!contents) return make_parse_result(status_out_of_memory); 4678 4679 // read file in memory 4680 size_t read_size = fread(contents, 1, size, file); 4681 4682 if (read_size != size) 4683 { 4684 xml_memory::deallocate(contents); 4685 return make_parse_result(status_io_error); 4686 } 4687 4688 xml_encoding real_encoding = get_buffer_encoding(encoding, contents, size); 4689 4690 return load_buffer_impl(doc, doc, contents, zero_terminate_buffer(contents, size, real_encoding), options, real_encoding, true, true, out_buffer); 4691 } 4692 4693 #ifndef PUGIXML_NO_STL 4694 template <typename T> struct xml_stream_chunk 4695 { createxml_stream_chunk4696 static xml_stream_chunk* create() 4697 { 4698 void* memory = xml_memory::allocate(sizeof(xml_stream_chunk)); 4699 if (!memory) return 0; 4700 4701 return new (memory) xml_stream_chunk(); 4702 } 4703 destroyxml_stream_chunk4704 static void destroy(xml_stream_chunk* chunk) 4705 { 4706 // free chunk chain 4707 while (chunk) 4708 { 4709 xml_stream_chunk* next_ = chunk->next; 4710 4711 xml_memory::deallocate(chunk); 4712 4713 chunk = next_; 4714 } 4715 } 4716 xml_stream_chunkxml_stream_chunk4717 xml_stream_chunk(): next(0), size(0) 4718 { 4719 } 4720 4721 xml_stream_chunk* next; 4722 size_t size; 4723 4724 T data[xml_memory_page_size / sizeof(T)]; 4725 }; 4726 load_stream_data_noseek(std::basic_istream<T> & stream,void ** out_buffer,size_t * out_size)4727 template <typename T> PUGI__FN xml_parse_status load_stream_data_noseek(std::basic_istream<T>& stream, void** out_buffer, size_t* out_size) 4728 { 4729 auto_deleter<xml_stream_chunk<T> > chunks(0, xml_stream_chunk<T>::destroy); 4730 4731 // read file to a chunk list 4732 size_t total = 0; 4733 xml_stream_chunk<T>* last = 0; 4734 4735 while (!stream.eof()) 4736 { 4737 // allocate new chunk 4738 xml_stream_chunk<T>* chunk = xml_stream_chunk<T>::create(); 4739 if (!chunk) return status_out_of_memory; 4740 4741 // append chunk to list 4742 if (last) last = last->next = chunk; 4743 else chunks.data = last = chunk; 4744 4745 // read data to chunk 4746 stream.read(chunk->data, static_cast<std::streamsize>(sizeof(chunk->data) / sizeof(T))); 4747 chunk->size = static_cast<size_t>(stream.gcount()) * sizeof(T); 4748 4749 // read may set failbit | eofbit in case gcount() is less than read length, so check for other I/O errors 4750 if (stream.bad() || (!stream.eof() && stream.fail())) return status_io_error; 4751 4752 // guard against huge files (chunk size is small enough to make this overflow check work) 4753 if (total + chunk->size < total) return status_out_of_memory; 4754 total += chunk->size; 4755 } 4756 4757 size_t max_suffix_size = sizeof(char_t); 4758 4759 // copy chunk list to a contiguous buffer 4760 char* buffer = static_cast<char*>(xml_memory::allocate(total + max_suffix_size)); 4761 if (!buffer) return status_out_of_memory; 4762 4763 char* write = buffer; 4764 4765 for (xml_stream_chunk<T>* chunk = chunks.data; chunk; chunk = chunk->next) 4766 { 4767 assert(write + chunk->size <= buffer + total); 4768 memcpy(write, chunk->data, chunk->size); 4769 write += chunk->size; 4770 } 4771 4772 assert(write == buffer + total); 4773 4774 // return buffer 4775 *out_buffer = buffer; 4776 *out_size = total; 4777 4778 return status_ok; 4779 } 4780 load_stream_data_seek(std::basic_istream<T> & stream,void ** out_buffer,size_t * out_size)4781 template <typename T> PUGI__FN xml_parse_status load_stream_data_seek(std::basic_istream<T>& stream, void** out_buffer, size_t* out_size) 4782 { 4783 // get length of remaining data in stream 4784 typename std::basic_istream<T>::pos_type pos = stream.tellg(); 4785 stream.seekg(0, std::ios::end); 4786 std::streamoff length = stream.tellg() - pos; 4787 stream.seekg(pos); 4788 4789 if (stream.fail() || pos < 0) return status_io_error; 4790 4791 // guard against huge files 4792 size_t read_length = static_cast<size_t>(length); 4793 4794 if (static_cast<std::streamsize>(read_length) != length || length < 0) return status_out_of_memory; 4795 4796 size_t max_suffix_size = sizeof(char_t); 4797 4798 // read stream data into memory (guard against stream exceptions with buffer holder) 4799 auto_deleter<void> buffer(xml_memory::allocate(read_length * sizeof(T) + max_suffix_size), xml_memory::deallocate); 4800 if (!buffer.data) return status_out_of_memory; 4801 4802 stream.read(static_cast<T*>(buffer.data), static_cast<std::streamsize>(read_length)); 4803 4804 // read may set failbit | eofbit in case gcount() is less than read_length (i.e. line ending conversion), so check for other I/O errors 4805 if (stream.bad() || (!stream.eof() && stream.fail())) return status_io_error; 4806 4807 // return buffer 4808 size_t actual_length = static_cast<size_t>(stream.gcount()); 4809 assert(actual_length <= read_length); 4810 4811 *out_buffer = buffer.release(); 4812 *out_size = actual_length * sizeof(T); 4813 4814 return status_ok; 4815 } 4816 load_stream_impl(xml_document_struct * doc,std::basic_istream<T> & stream,unsigned int options,xml_encoding encoding,char_t ** out_buffer)4817 template <typename T> PUGI__FN xml_parse_result load_stream_impl(xml_document_struct* doc, std::basic_istream<T>& stream, unsigned int options, xml_encoding encoding, char_t** out_buffer) 4818 { 4819 void* buffer = 0; 4820 size_t size = 0; 4821 xml_parse_status status = status_ok; 4822 4823 // if stream has an error bit set, bail out (otherwise tellg() can fail and we'll clear error bits) 4824 if (stream.fail()) return make_parse_result(status_io_error); 4825 4826 // load stream to memory (using seek-based implementation if possible, since it's faster and takes less memory) 4827 if (stream.tellg() < 0) 4828 { 4829 stream.clear(); // clear error flags that could be set by a failing tellg 4830 status = load_stream_data_noseek(stream, &buffer, &size); 4831 } 4832 else 4833 status = load_stream_data_seek(stream, &buffer, &size); 4834 4835 if (status != status_ok) return make_parse_result(status); 4836 4837 xml_encoding real_encoding = get_buffer_encoding(encoding, buffer, size); 4838 4839 return load_buffer_impl(doc, doc, buffer, zero_terminate_buffer(buffer, size, real_encoding), options, real_encoding, true, true, out_buffer); 4840 } 4841 #endif 4842 4843 #if defined(PUGI__MSVC_CRT_VERSION) || defined(__BORLANDC__) || (defined(__MINGW32__) && (!defined(__STRICT_ANSI__) || defined(__MINGW64_VERSION_MAJOR))) open_file_wide(const wchar_t * path,const wchar_t * mode)4844 PUGI__FN FILE* open_file_wide(const wchar_t* path, const wchar_t* mode) 4845 { 4846 return _wfopen(path, mode); 4847 } 4848 #else convert_path_heap(const wchar_t * str)4849 PUGI__FN char* convert_path_heap(const wchar_t* str) 4850 { 4851 assert(str); 4852 4853 // first pass: get length in utf8 characters 4854 size_t length = strlength_wide(str); 4855 size_t size = as_utf8_begin(str, length); 4856 4857 // allocate resulting string 4858 char* result = static_cast<char*>(xml_memory::allocate(size + 1)); 4859 if (!result) return 0; 4860 4861 // second pass: convert to utf8 4862 as_utf8_end(result, size, str, length); 4863 4864 // zero-terminate 4865 result[size] = 0; 4866 4867 return result; 4868 } 4869 open_file_wide(const wchar_t * path,const wchar_t * mode)4870 PUGI__FN FILE* open_file_wide(const wchar_t* path, const wchar_t* mode) 4871 { 4872 // there is no standard function to open wide paths, so our best bet is to try utf8 path 4873 char* path_utf8 = convert_path_heap(path); 4874 if (!path_utf8) return 0; 4875 4876 // convert mode to ASCII (we mirror _wfopen interface) 4877 char mode_ascii[4] = {0}; 4878 for (size_t i = 0; mode[i]; ++i) mode_ascii[i] = static_cast<char>(mode[i]); 4879 4880 // try to open the utf8 path 4881 FILE* result = fopen(path_utf8, mode_ascii); 4882 4883 // free dummy buffer 4884 xml_memory::deallocate(path_utf8); 4885 4886 return result; 4887 } 4888 #endif 4889 save_file_impl(const xml_document & doc,FILE * file,const char_t * indent,unsigned int flags,xml_encoding encoding)4890 PUGI__FN bool save_file_impl(const xml_document& doc, FILE* file, const char_t* indent, unsigned int flags, xml_encoding encoding) 4891 { 4892 if (!file) return false; 4893 4894 xml_writer_file writer(file); 4895 doc.save(writer, indent, flags, encoding); 4896 4897 return ferror(file) == 0; 4898 } 4899 4900 struct name_null_sentry 4901 { 4902 xml_node_struct* node; 4903 char_t* name; 4904 name_null_sentryname_null_sentry4905 name_null_sentry(xml_node_struct* node_): node(node_), name(node_->name) 4906 { 4907 node->name = 0; 4908 } 4909 ~name_null_sentryname_null_sentry4910 ~name_null_sentry() 4911 { 4912 node->name = name; 4913 } 4914 }; 4915 PUGI__NS_END 4916 4917 namespace pugi 4918 { xml_writer_file(void * file_)4919 PUGI__FN xml_writer_file::xml_writer_file(void* file_): file(file_) 4920 { 4921 } 4922 write(const void * data,size_t size)4923 PUGI__FN void xml_writer_file::write(const void* data, size_t size) 4924 { 4925 size_t result = fwrite(data, 1, size, static_cast<FILE*>(file)); 4926 (void)!result; // unfortunately we can't do proper error handling here 4927 } 4928 4929 #ifndef PUGIXML_NO_STL xml_writer_stream(std::basic_ostream<char,std::char_traits<char>> & stream)4930 PUGI__FN xml_writer_stream::xml_writer_stream(std::basic_ostream<char, std::char_traits<char> >& stream): narrow_stream(&stream), wide_stream(0) 4931 { 4932 } 4933 xml_writer_stream(std::basic_ostream<wchar_t,std::char_traits<wchar_t>> & stream)4934 PUGI__FN xml_writer_stream::xml_writer_stream(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream): narrow_stream(0), wide_stream(&stream) 4935 { 4936 } 4937 write(const void * data,size_t size)4938 PUGI__FN void xml_writer_stream::write(const void* data, size_t size) 4939 { 4940 if (narrow_stream) 4941 { 4942 assert(!wide_stream); 4943 narrow_stream->write(reinterpret_cast<const char*>(data), static_cast<std::streamsize>(size)); 4944 } 4945 else 4946 { 4947 assert(wide_stream); 4948 assert(size % sizeof(wchar_t) == 0); 4949 4950 wide_stream->write(reinterpret_cast<const wchar_t*>(data), static_cast<std::streamsize>(size / sizeof(wchar_t))); 4951 } 4952 } 4953 #endif 4954 xml_tree_walker()4955 PUGI__FN xml_tree_walker::xml_tree_walker(): _depth(0) 4956 { 4957 } 4958 ~xml_tree_walker()4959 PUGI__FN xml_tree_walker::~xml_tree_walker() 4960 { 4961 } 4962 depth() const4963 PUGI__FN int xml_tree_walker::depth() const 4964 { 4965 return _depth; 4966 } 4967 begin(xml_node &)4968 PUGI__FN bool xml_tree_walker::begin(xml_node&) 4969 { 4970 return true; 4971 } 4972 end(xml_node &)4973 PUGI__FN bool xml_tree_walker::end(xml_node&) 4974 { 4975 return true; 4976 } 4977 xml_attribute()4978 PUGI__FN xml_attribute::xml_attribute(): _attr(0) 4979 { 4980 } 4981 xml_attribute(xml_attribute_struct * attr)4982 PUGI__FN xml_attribute::xml_attribute(xml_attribute_struct* attr): _attr(attr) 4983 { 4984 } 4985 unspecified_bool_xml_attribute(xml_attribute ***)4986 PUGI__FN static void unspecified_bool_xml_attribute(xml_attribute***) 4987 { 4988 } 4989 operator xml_attribute::unspecified_bool_type() const4990 PUGI__FN xml_attribute::operator xml_attribute::unspecified_bool_type() const 4991 { 4992 return _attr ? unspecified_bool_xml_attribute : 0; 4993 } 4994 operator !() const4995 PUGI__FN bool xml_attribute::operator!() const 4996 { 4997 return !_attr; 4998 } 4999 operator ==(const xml_attribute & r) const5000 PUGI__FN bool xml_attribute::operator==(const xml_attribute& r) const 5001 { 5002 return (_attr == r._attr); 5003 } 5004 operator !=(const xml_attribute & r) const5005 PUGI__FN bool xml_attribute::operator!=(const xml_attribute& r) const 5006 { 5007 return (_attr != r._attr); 5008 } 5009 operator <(const xml_attribute & r) const5010 PUGI__FN bool xml_attribute::operator<(const xml_attribute& r) const 5011 { 5012 return (_attr < r._attr); 5013 } 5014 operator >(const xml_attribute & r) const5015 PUGI__FN bool xml_attribute::operator>(const xml_attribute& r) const 5016 { 5017 return (_attr > r._attr); 5018 } 5019 operator <=(const xml_attribute & r) const5020 PUGI__FN bool xml_attribute::operator<=(const xml_attribute& r) const 5021 { 5022 return (_attr <= r._attr); 5023 } 5024 operator >=(const xml_attribute & r) const5025 PUGI__FN bool xml_attribute::operator>=(const xml_attribute& r) const 5026 { 5027 return (_attr >= r._attr); 5028 } 5029 next_attribute() const5030 PUGI__FN xml_attribute xml_attribute::next_attribute() const 5031 { 5032 return _attr ? xml_attribute(_attr->next_attribute) : xml_attribute(); 5033 } 5034 previous_attribute() const5035 PUGI__FN xml_attribute xml_attribute::previous_attribute() const 5036 { 5037 return _attr && _attr->prev_attribute_c->next_attribute ? xml_attribute(_attr->prev_attribute_c) : xml_attribute(); 5038 } 5039 as_string(const char_t * def) const5040 PUGI__FN const char_t* xml_attribute::as_string(const char_t* def) const 5041 { 5042 return (_attr && _attr->value) ? _attr->value + 0 : def; 5043 } 5044 as_int(int def) const5045 PUGI__FN int xml_attribute::as_int(int def) const 5046 { 5047 return (_attr && _attr->value) ? impl::get_value_int(_attr->value) : def; 5048 } 5049 as_uint(unsigned int def) const5050 PUGI__FN unsigned int xml_attribute::as_uint(unsigned int def) const 5051 { 5052 return (_attr && _attr->value) ? impl::get_value_uint(_attr->value) : def; 5053 } 5054 as_double(double def) const5055 PUGI__FN double xml_attribute::as_double(double def) const 5056 { 5057 return (_attr && _attr->value) ? impl::get_value_double(_attr->value) : def; 5058 } 5059 as_float(float def) const5060 PUGI__FN float xml_attribute::as_float(float def) const 5061 { 5062 return (_attr && _attr->value) ? impl::get_value_float(_attr->value) : def; 5063 } 5064 as_bool(bool def) const5065 PUGI__FN bool xml_attribute::as_bool(bool def) const 5066 { 5067 return (_attr && _attr->value) ? impl::get_value_bool(_attr->value) : def; 5068 } 5069 5070 #ifdef PUGIXML_HAS_LONG_LONG as_llong(long long def) const5071 PUGI__FN long long xml_attribute::as_llong(long long def) const 5072 { 5073 return (_attr && _attr->value) ? impl::get_value_llong(_attr->value) : def; 5074 } 5075 as_ullong(unsigned long long def) const5076 PUGI__FN unsigned long long xml_attribute::as_ullong(unsigned long long def) const 5077 { 5078 return (_attr && _attr->value) ? impl::get_value_ullong(_attr->value) : def; 5079 } 5080 #endif 5081 empty() const5082 PUGI__FN bool xml_attribute::empty() const 5083 { 5084 return !_attr; 5085 } 5086 name() const5087 PUGI__FN const char_t* xml_attribute::name() const 5088 { 5089 return (_attr && _attr->name) ? _attr->name + 0 : PUGIXML_TEXT(""); 5090 } 5091 value() const5092 PUGI__FN const char_t* xml_attribute::value() const 5093 { 5094 return (_attr && _attr->value) ? _attr->value + 0 : PUGIXML_TEXT(""); 5095 } 5096 hash_value() const5097 PUGI__FN size_t xml_attribute::hash_value() const 5098 { 5099 return static_cast<size_t>(reinterpret_cast<uintptr_t>(_attr) / sizeof(xml_attribute_struct)); 5100 } 5101 internal_object() const5102 PUGI__FN xml_attribute_struct* xml_attribute::internal_object() const 5103 { 5104 return _attr; 5105 } 5106 operator =(const char_t * rhs)5107 PUGI__FN xml_attribute& xml_attribute::operator=(const char_t* rhs) 5108 { 5109 set_value(rhs); 5110 return *this; 5111 } 5112 operator =(int rhs)5113 PUGI__FN xml_attribute& xml_attribute::operator=(int rhs) 5114 { 5115 set_value(rhs); 5116 return *this; 5117 } 5118 operator =(unsigned int rhs)5119 PUGI__FN xml_attribute& xml_attribute::operator=(unsigned int rhs) 5120 { 5121 set_value(rhs); 5122 return *this; 5123 } 5124 operator =(double rhs)5125 PUGI__FN xml_attribute& xml_attribute::operator=(double rhs) 5126 { 5127 set_value(rhs); 5128 return *this; 5129 } 5130 operator =(float rhs)5131 PUGI__FN xml_attribute& xml_attribute::operator=(float rhs) 5132 { 5133 set_value(rhs); 5134 return *this; 5135 } 5136 operator =(bool rhs)5137 PUGI__FN xml_attribute& xml_attribute::operator=(bool rhs) 5138 { 5139 set_value(rhs); 5140 return *this; 5141 } 5142 5143 #ifdef PUGIXML_HAS_LONG_LONG operator =(long long rhs)5144 PUGI__FN xml_attribute& xml_attribute::operator=(long long rhs) 5145 { 5146 set_value(rhs); 5147 return *this; 5148 } 5149 operator =(unsigned long long rhs)5150 PUGI__FN xml_attribute& xml_attribute::operator=(unsigned long long rhs) 5151 { 5152 set_value(rhs); 5153 return *this; 5154 } 5155 #endif 5156 set_name(const char_t * rhs)5157 PUGI__FN bool xml_attribute::set_name(const char_t* rhs) 5158 { 5159 if (!_attr) return false; 5160 5161 return impl::strcpy_insitu(_attr->name, _attr->header, impl::xml_memory_page_name_allocated_mask, rhs, impl::strlength(rhs)); 5162 } 5163 set_value(const char_t * rhs)5164 PUGI__FN bool xml_attribute::set_value(const char_t* rhs) 5165 { 5166 if (!_attr) return false; 5167 5168 return impl::strcpy_insitu(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, impl::strlength(rhs)); 5169 } 5170 set_value(int rhs)5171 PUGI__FN bool xml_attribute::set_value(int rhs) 5172 { 5173 if (!_attr) return false; 5174 5175 return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs); 5176 } 5177 set_value(unsigned int rhs)5178 PUGI__FN bool xml_attribute::set_value(unsigned int rhs) 5179 { 5180 if (!_attr) return false; 5181 5182 return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs); 5183 } 5184 set_value(double rhs)5185 PUGI__FN bool xml_attribute::set_value(double rhs) 5186 { 5187 if (!_attr) return false; 5188 5189 return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs); 5190 } 5191 set_value(float rhs)5192 PUGI__FN bool xml_attribute::set_value(float rhs) 5193 { 5194 if (!_attr) return false; 5195 5196 return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs); 5197 } 5198 set_value(bool rhs)5199 PUGI__FN bool xml_attribute::set_value(bool rhs) 5200 { 5201 if (!_attr) return false; 5202 5203 return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs); 5204 } 5205 5206 #ifdef PUGIXML_HAS_LONG_LONG set_value(long long rhs)5207 PUGI__FN bool xml_attribute::set_value(long long rhs) 5208 { 5209 if (!_attr) return false; 5210 5211 return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs); 5212 } 5213 set_value(unsigned long long rhs)5214 PUGI__FN bool xml_attribute::set_value(unsigned long long rhs) 5215 { 5216 if (!_attr) return false; 5217 5218 return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs); 5219 } 5220 #endif 5221 5222 #ifdef __BORLANDC__ operator &&(const xml_attribute & lhs,bool rhs)5223 PUGI__FN bool operator&&(const xml_attribute& lhs, bool rhs) 5224 { 5225 return (bool)lhs && rhs; 5226 } 5227 operator ||(const xml_attribute & lhs,bool rhs)5228 PUGI__FN bool operator||(const xml_attribute& lhs, bool rhs) 5229 { 5230 return (bool)lhs || rhs; 5231 } 5232 #endif 5233 xml_node()5234 PUGI__FN xml_node::xml_node(): _root(0) 5235 { 5236 } 5237 xml_node(xml_node_struct * p)5238 PUGI__FN xml_node::xml_node(xml_node_struct* p): _root(p) 5239 { 5240 } 5241 unspecified_bool_xml_node(xml_node ***)5242 PUGI__FN static void unspecified_bool_xml_node(xml_node***) 5243 { 5244 } 5245 operator xml_node::unspecified_bool_type() const5246 PUGI__FN xml_node::operator xml_node::unspecified_bool_type() const 5247 { 5248 return _root ? unspecified_bool_xml_node : 0; 5249 } 5250 operator !() const5251 PUGI__FN bool xml_node::operator!() const 5252 { 5253 return !_root; 5254 } 5255 begin() const5256 PUGI__FN xml_node::iterator xml_node::begin() const 5257 { 5258 return iterator(_root ? _root->first_child + 0 : 0, _root); 5259 } 5260 end() const5261 PUGI__FN xml_node::iterator xml_node::end() const 5262 { 5263 return iterator(0, _root); 5264 } 5265 attributes_begin() const5266 PUGI__FN xml_node::attribute_iterator xml_node::attributes_begin() const 5267 { 5268 return attribute_iterator(_root ? _root->first_attribute + 0 : 0, _root); 5269 } 5270 attributes_end() const5271 PUGI__FN xml_node::attribute_iterator xml_node::attributes_end() const 5272 { 5273 return attribute_iterator(0, _root); 5274 } 5275 children() const5276 PUGI__FN xml_object_range<xml_node_iterator> xml_node::children() const 5277 { 5278 return xml_object_range<xml_node_iterator>(begin(), end()); 5279 } 5280 children(const char_t * name_) const5281 PUGI__FN xml_object_range<xml_named_node_iterator> xml_node::children(const char_t* name_) const 5282 { 5283 return xml_object_range<xml_named_node_iterator>(xml_named_node_iterator(child(name_)._root, _root, name_), xml_named_node_iterator(0, _root, name_)); 5284 } 5285 attributes() const5286 PUGI__FN xml_object_range<xml_attribute_iterator> xml_node::attributes() const 5287 { 5288 return xml_object_range<xml_attribute_iterator>(attributes_begin(), attributes_end()); 5289 } 5290 operator ==(const xml_node & r) const5291 PUGI__FN bool xml_node::operator==(const xml_node& r) const 5292 { 5293 return (_root == r._root); 5294 } 5295 operator !=(const xml_node & r) const5296 PUGI__FN bool xml_node::operator!=(const xml_node& r) const 5297 { 5298 return (_root != r._root); 5299 } 5300 operator <(const xml_node & r) const5301 PUGI__FN bool xml_node::operator<(const xml_node& r) const 5302 { 5303 return (_root < r._root); 5304 } 5305 operator >(const xml_node & r) const5306 PUGI__FN bool xml_node::operator>(const xml_node& r) const 5307 { 5308 return (_root > r._root); 5309 } 5310 operator <=(const xml_node & r) const5311 PUGI__FN bool xml_node::operator<=(const xml_node& r) const 5312 { 5313 return (_root <= r._root); 5314 } 5315 operator >=(const xml_node & r) const5316 PUGI__FN bool xml_node::operator>=(const xml_node& r) const 5317 { 5318 return (_root >= r._root); 5319 } 5320 empty() const5321 PUGI__FN bool xml_node::empty() const 5322 { 5323 return !_root; 5324 } 5325 name() const5326 PUGI__FN const char_t* xml_node::name() const 5327 { 5328 return (_root && _root->name) ? _root->name + 0 : PUGIXML_TEXT(""); 5329 } 5330 type() const5331 PUGI__FN xml_node_type xml_node::type() const 5332 { 5333 return _root ? PUGI__NODETYPE(_root) : node_null; 5334 } 5335 value() const5336 PUGI__FN const char_t* xml_node::value() const 5337 { 5338 return (_root && _root->value) ? _root->value + 0 : PUGIXML_TEXT(""); 5339 } 5340 child(const char_t * name_) const5341 PUGI__FN xml_node xml_node::child(const char_t* name_) const 5342 { 5343 if (!_root) return xml_node(); 5344 5345 for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling) 5346 if (i->name && impl::strequal(name_, i->name)) return xml_node(i); 5347 5348 return xml_node(); 5349 } 5350 attribute(const char_t * name_) const5351 PUGI__FN xml_attribute xml_node::attribute(const char_t* name_) const 5352 { 5353 if (!_root) return xml_attribute(); 5354 5355 for (xml_attribute_struct* i = _root->first_attribute; i; i = i->next_attribute) 5356 if (i->name && impl::strequal(name_, i->name)) 5357 return xml_attribute(i); 5358 5359 return xml_attribute(); 5360 } 5361 next_sibling(const char_t * name_) const5362 PUGI__FN xml_node xml_node::next_sibling(const char_t* name_) const 5363 { 5364 if (!_root) return xml_node(); 5365 5366 for (xml_node_struct* i = _root->next_sibling; i; i = i->next_sibling) 5367 if (i->name && impl::strequal(name_, i->name)) return xml_node(i); 5368 5369 return xml_node(); 5370 } 5371 next_sibling() const5372 PUGI__FN xml_node xml_node::next_sibling() const 5373 { 5374 return _root ? xml_node(_root->next_sibling) : xml_node(); 5375 } 5376 previous_sibling(const char_t * name_) const5377 PUGI__FN xml_node xml_node::previous_sibling(const char_t* name_) const 5378 { 5379 if (!_root) return xml_node(); 5380 5381 for (xml_node_struct* i = _root->prev_sibling_c; i->next_sibling; i = i->prev_sibling_c) 5382 if (i->name && impl::strequal(name_, i->name)) return xml_node(i); 5383 5384 return xml_node(); 5385 } 5386 attribute(const char_t * name_,xml_attribute & hint_) const5387 PUGI__FN xml_attribute xml_node::attribute(const char_t* name_, xml_attribute& hint_) const 5388 { 5389 xml_attribute_struct* hint = hint_._attr; 5390 5391 // if hint is not an attribute of node, behavior is not defined 5392 assert(!hint || (_root && impl::is_attribute_of(hint, _root))); 5393 5394 if (!_root) return xml_attribute(); 5395 5396 // optimistically search from hint up until the end 5397 for (xml_attribute_struct* i = hint; i; i = i->next_attribute) 5398 if (i->name && impl::strequal(name_, i->name)) 5399 { 5400 // update hint to maximize efficiency of searching for consecutive attributes 5401 hint_._attr = i->next_attribute; 5402 5403 return xml_attribute(i); 5404 } 5405 5406 // wrap around and search from the first attribute until the hint 5407 // 'j' null pointer check is technically redundant, but it prevents a crash in case the assertion above fails 5408 for (xml_attribute_struct* j = _root->first_attribute; j && j != hint; j = j->next_attribute) 5409 if (j->name && impl::strequal(name_, j->name)) 5410 { 5411 // update hint to maximize efficiency of searching for consecutive attributes 5412 hint_._attr = j->next_attribute; 5413 5414 return xml_attribute(j); 5415 } 5416 5417 return xml_attribute(); 5418 } 5419 previous_sibling() const5420 PUGI__FN xml_node xml_node::previous_sibling() const 5421 { 5422 if (!_root) return xml_node(); 5423 5424 if (_root->prev_sibling_c->next_sibling) return xml_node(_root->prev_sibling_c); 5425 else return xml_node(); 5426 } 5427 parent() const5428 PUGI__FN xml_node xml_node::parent() const 5429 { 5430 return _root ? xml_node(_root->parent) : xml_node(); 5431 } 5432 root() const5433 PUGI__FN xml_node xml_node::root() const 5434 { 5435 return _root ? xml_node(&impl::get_document(_root)) : xml_node(); 5436 } 5437 text() const5438 PUGI__FN xml_text xml_node::text() const 5439 { 5440 return xml_text(_root); 5441 } 5442 child_value() const5443 PUGI__FN const char_t* xml_node::child_value() const 5444 { 5445 if (!_root) return PUGIXML_TEXT(""); 5446 5447 for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling) 5448 if (impl::is_text_node(i) && i->value) 5449 return i->value; 5450 5451 return PUGIXML_TEXT(""); 5452 } 5453 child_value(const char_t * name_) const5454 PUGI__FN const char_t* xml_node::child_value(const char_t* name_) const 5455 { 5456 return child(name_).child_value(); 5457 } 5458 first_attribute() const5459 PUGI__FN xml_attribute xml_node::first_attribute() const 5460 { 5461 return _root ? xml_attribute(_root->first_attribute) : xml_attribute(); 5462 } 5463 last_attribute() const5464 PUGI__FN xml_attribute xml_node::last_attribute() const 5465 { 5466 return _root && _root->first_attribute ? xml_attribute(_root->first_attribute->prev_attribute_c) : xml_attribute(); 5467 } 5468 first_child() const5469 PUGI__FN xml_node xml_node::first_child() const 5470 { 5471 return _root ? xml_node(_root->first_child) : xml_node(); 5472 } 5473 last_child() const5474 PUGI__FN xml_node xml_node::last_child() const 5475 { 5476 return _root && _root->first_child ? xml_node(_root->first_child->prev_sibling_c) : xml_node(); 5477 } 5478 set_name(const char_t * rhs)5479 PUGI__FN bool xml_node::set_name(const char_t* rhs) 5480 { 5481 xml_node_type type_ = _root ? PUGI__NODETYPE(_root) : node_null; 5482 5483 if (type_ != node_element && type_ != node_pi && type_ != node_declaration) 5484 return false; 5485 5486 return impl::strcpy_insitu(_root->name, _root->header, impl::xml_memory_page_name_allocated_mask, rhs, impl::strlength(rhs)); 5487 } 5488 set_value(const char_t * rhs)5489 PUGI__FN bool xml_node::set_value(const char_t* rhs) 5490 { 5491 xml_node_type type_ = _root ? PUGI__NODETYPE(_root) : node_null; 5492 5493 if (type_ != node_pcdata && type_ != node_cdata && type_ != node_comment && type_ != node_pi && type_ != node_doctype) 5494 return false; 5495 5496 return impl::strcpy_insitu(_root->value, _root->header, impl::xml_memory_page_value_allocated_mask, rhs, impl::strlength(rhs)); 5497 } 5498 append_attribute(const char_t * name_)5499 PUGI__FN xml_attribute xml_node::append_attribute(const char_t* name_) 5500 { 5501 if (!impl::allow_insert_attribute(type())) return xml_attribute(); 5502 5503 impl::xml_allocator& alloc = impl::get_allocator(_root); 5504 if (!alloc.reserve()) return xml_attribute(); 5505 5506 xml_attribute a(impl::allocate_attribute(alloc)); 5507 if (!a) return xml_attribute(); 5508 5509 impl::append_attribute(a._attr, _root); 5510 5511 a.set_name(name_); 5512 5513 return a; 5514 } 5515 prepend_attribute(const char_t * name_)5516 PUGI__FN xml_attribute xml_node::prepend_attribute(const char_t* name_) 5517 { 5518 if (!impl::allow_insert_attribute(type())) return xml_attribute(); 5519 5520 impl::xml_allocator& alloc = impl::get_allocator(_root); 5521 if (!alloc.reserve()) return xml_attribute(); 5522 5523 xml_attribute a(impl::allocate_attribute(alloc)); 5524 if (!a) return xml_attribute(); 5525 5526 impl::prepend_attribute(a._attr, _root); 5527 5528 a.set_name(name_); 5529 5530 return a; 5531 } 5532 insert_attribute_after(const char_t * name_,const xml_attribute & attr)5533 PUGI__FN xml_attribute xml_node::insert_attribute_after(const char_t* name_, const xml_attribute& attr) 5534 { 5535 if (!impl::allow_insert_attribute(type())) return xml_attribute(); 5536 if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute(); 5537 5538 impl::xml_allocator& alloc = impl::get_allocator(_root); 5539 if (!alloc.reserve()) return xml_attribute(); 5540 5541 xml_attribute a(impl::allocate_attribute(alloc)); 5542 if (!a) return xml_attribute(); 5543 5544 impl::insert_attribute_after(a._attr, attr._attr, _root); 5545 5546 a.set_name(name_); 5547 5548 return a; 5549 } 5550 insert_attribute_before(const char_t * name_,const xml_attribute & attr)5551 PUGI__FN xml_attribute xml_node::insert_attribute_before(const char_t* name_, const xml_attribute& attr) 5552 { 5553 if (!impl::allow_insert_attribute(type())) return xml_attribute(); 5554 if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute(); 5555 5556 impl::xml_allocator& alloc = impl::get_allocator(_root); 5557 if (!alloc.reserve()) return xml_attribute(); 5558 5559 xml_attribute a(impl::allocate_attribute(alloc)); 5560 if (!a) return xml_attribute(); 5561 5562 impl::insert_attribute_before(a._attr, attr._attr, _root); 5563 5564 a.set_name(name_); 5565 5566 return a; 5567 } 5568 append_copy(const xml_attribute & proto)5569 PUGI__FN xml_attribute xml_node::append_copy(const xml_attribute& proto) 5570 { 5571 if (!proto) return xml_attribute(); 5572 if (!impl::allow_insert_attribute(type())) return xml_attribute(); 5573 5574 impl::xml_allocator& alloc = impl::get_allocator(_root); 5575 if (!alloc.reserve()) return xml_attribute(); 5576 5577 xml_attribute a(impl::allocate_attribute(alloc)); 5578 if (!a) return xml_attribute(); 5579 5580 impl::append_attribute(a._attr, _root); 5581 impl::node_copy_attribute(a._attr, proto._attr); 5582 5583 return a; 5584 } 5585 prepend_copy(const xml_attribute & proto)5586 PUGI__FN xml_attribute xml_node::prepend_copy(const xml_attribute& proto) 5587 { 5588 if (!proto) return xml_attribute(); 5589 if (!impl::allow_insert_attribute(type())) return xml_attribute(); 5590 5591 impl::xml_allocator& alloc = impl::get_allocator(_root); 5592 if (!alloc.reserve()) return xml_attribute(); 5593 5594 xml_attribute a(impl::allocate_attribute(alloc)); 5595 if (!a) return xml_attribute(); 5596 5597 impl::prepend_attribute(a._attr, _root); 5598 impl::node_copy_attribute(a._attr, proto._attr); 5599 5600 return a; 5601 } 5602 insert_copy_after(const xml_attribute & proto,const xml_attribute & attr)5603 PUGI__FN xml_attribute xml_node::insert_copy_after(const xml_attribute& proto, const xml_attribute& attr) 5604 { 5605 if (!proto) return xml_attribute(); 5606 if (!impl::allow_insert_attribute(type())) return xml_attribute(); 5607 if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute(); 5608 5609 impl::xml_allocator& alloc = impl::get_allocator(_root); 5610 if (!alloc.reserve()) return xml_attribute(); 5611 5612 xml_attribute a(impl::allocate_attribute(alloc)); 5613 if (!a) return xml_attribute(); 5614 5615 impl::insert_attribute_after(a._attr, attr._attr, _root); 5616 impl::node_copy_attribute(a._attr, proto._attr); 5617 5618 return a; 5619 } 5620 insert_copy_before(const xml_attribute & proto,const xml_attribute & attr)5621 PUGI__FN xml_attribute xml_node::insert_copy_before(const xml_attribute& proto, const xml_attribute& attr) 5622 { 5623 if (!proto) return xml_attribute(); 5624 if (!impl::allow_insert_attribute(type())) return xml_attribute(); 5625 if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute(); 5626 5627 impl::xml_allocator& alloc = impl::get_allocator(_root); 5628 if (!alloc.reserve()) return xml_attribute(); 5629 5630 xml_attribute a(impl::allocate_attribute(alloc)); 5631 if (!a) return xml_attribute(); 5632 5633 impl::insert_attribute_before(a._attr, attr._attr, _root); 5634 impl::node_copy_attribute(a._attr, proto._attr); 5635 5636 return a; 5637 } 5638 append_child(xml_node_type type_)5639 PUGI__FN xml_node xml_node::append_child(xml_node_type type_) 5640 { 5641 if (!impl::allow_insert_child(type(), type_)) return xml_node(); 5642 5643 impl::xml_allocator& alloc = impl::get_allocator(_root); 5644 if (!alloc.reserve()) return xml_node(); 5645 5646 xml_node n(impl::allocate_node(alloc, type_)); 5647 if (!n) return xml_node(); 5648 5649 impl::append_node(n._root, _root); 5650 5651 if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml")); 5652 5653 return n; 5654 } 5655 prepend_child(xml_node_type type_)5656 PUGI__FN xml_node xml_node::prepend_child(xml_node_type type_) 5657 { 5658 if (!impl::allow_insert_child(type(), type_)) return xml_node(); 5659 5660 impl::xml_allocator& alloc = impl::get_allocator(_root); 5661 if (!alloc.reserve()) return xml_node(); 5662 5663 xml_node n(impl::allocate_node(alloc, type_)); 5664 if (!n) return xml_node(); 5665 5666 impl::prepend_node(n._root, _root); 5667 5668 if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml")); 5669 5670 return n; 5671 } 5672 insert_child_before(xml_node_type type_,const xml_node & node)5673 PUGI__FN xml_node xml_node::insert_child_before(xml_node_type type_, const xml_node& node) 5674 { 5675 if (!impl::allow_insert_child(type(), type_)) return xml_node(); 5676 if (!node._root || node._root->parent != _root) return xml_node(); 5677 5678 impl::xml_allocator& alloc = impl::get_allocator(_root); 5679 if (!alloc.reserve()) return xml_node(); 5680 5681 xml_node n(impl::allocate_node(alloc, type_)); 5682 if (!n) return xml_node(); 5683 5684 impl::insert_node_before(n._root, node._root); 5685 5686 if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml")); 5687 5688 return n; 5689 } 5690 insert_child_after(xml_node_type type_,const xml_node & node)5691 PUGI__FN xml_node xml_node::insert_child_after(xml_node_type type_, const xml_node& node) 5692 { 5693 if (!impl::allow_insert_child(type(), type_)) return xml_node(); 5694 if (!node._root || node._root->parent != _root) return xml_node(); 5695 5696 impl::xml_allocator& alloc = impl::get_allocator(_root); 5697 if (!alloc.reserve()) return xml_node(); 5698 5699 xml_node n(impl::allocate_node(alloc, type_)); 5700 if (!n) return xml_node(); 5701 5702 impl::insert_node_after(n._root, node._root); 5703 5704 if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml")); 5705 5706 return n; 5707 } 5708 append_child(const char_t * name_)5709 PUGI__FN xml_node xml_node::append_child(const char_t* name_) 5710 { 5711 xml_node result = append_child(node_element); 5712 5713 result.set_name(name_); 5714 5715 return result; 5716 } 5717 prepend_child(const char_t * name_)5718 PUGI__FN xml_node xml_node::prepend_child(const char_t* name_) 5719 { 5720 xml_node result = prepend_child(node_element); 5721 5722 result.set_name(name_); 5723 5724 return result; 5725 } 5726 insert_child_after(const char_t * name_,const xml_node & node)5727 PUGI__FN xml_node xml_node::insert_child_after(const char_t* name_, const xml_node& node) 5728 { 5729 xml_node result = insert_child_after(node_element, node); 5730 5731 result.set_name(name_); 5732 5733 return result; 5734 } 5735 insert_child_before(const char_t * name_,const xml_node & node)5736 PUGI__FN xml_node xml_node::insert_child_before(const char_t* name_, const xml_node& node) 5737 { 5738 xml_node result = insert_child_before(node_element, node); 5739 5740 result.set_name(name_); 5741 5742 return result; 5743 } 5744 append_copy(const xml_node & proto)5745 PUGI__FN xml_node xml_node::append_copy(const xml_node& proto) 5746 { 5747 xml_node_type type_ = proto.type(); 5748 if (!impl::allow_insert_child(type(), type_)) return xml_node(); 5749 5750 impl::xml_allocator& alloc = impl::get_allocator(_root); 5751 if (!alloc.reserve()) return xml_node(); 5752 5753 xml_node n(impl::allocate_node(alloc, type_)); 5754 if (!n) return xml_node(); 5755 5756 impl::append_node(n._root, _root); 5757 impl::node_copy_tree(n._root, proto._root); 5758 5759 return n; 5760 } 5761 prepend_copy(const xml_node & proto)5762 PUGI__FN xml_node xml_node::prepend_copy(const xml_node& proto) 5763 { 5764 xml_node_type type_ = proto.type(); 5765 if (!impl::allow_insert_child(type(), type_)) return xml_node(); 5766 5767 impl::xml_allocator& alloc = impl::get_allocator(_root); 5768 if (!alloc.reserve()) return xml_node(); 5769 5770 xml_node n(impl::allocate_node(alloc, type_)); 5771 if (!n) return xml_node(); 5772 5773 impl::prepend_node(n._root, _root); 5774 impl::node_copy_tree(n._root, proto._root); 5775 5776 return n; 5777 } 5778 insert_copy_after(const xml_node & proto,const xml_node & node)5779 PUGI__FN xml_node xml_node::insert_copy_after(const xml_node& proto, const xml_node& node) 5780 { 5781 xml_node_type type_ = proto.type(); 5782 if (!impl::allow_insert_child(type(), type_)) return xml_node(); 5783 if (!node._root || node._root->parent != _root) return xml_node(); 5784 5785 impl::xml_allocator& alloc = impl::get_allocator(_root); 5786 if (!alloc.reserve()) return xml_node(); 5787 5788 xml_node n(impl::allocate_node(alloc, type_)); 5789 if (!n) return xml_node(); 5790 5791 impl::insert_node_after(n._root, node._root); 5792 impl::node_copy_tree(n._root, proto._root); 5793 5794 return n; 5795 } 5796 insert_copy_before(const xml_node & proto,const xml_node & node)5797 PUGI__FN xml_node xml_node::insert_copy_before(const xml_node& proto, const xml_node& node) 5798 { 5799 xml_node_type type_ = proto.type(); 5800 if (!impl::allow_insert_child(type(), type_)) return xml_node(); 5801 if (!node._root || node._root->parent != _root) return xml_node(); 5802 5803 impl::xml_allocator& alloc = impl::get_allocator(_root); 5804 if (!alloc.reserve()) return xml_node(); 5805 5806 xml_node n(impl::allocate_node(alloc, type_)); 5807 if (!n) return xml_node(); 5808 5809 impl::insert_node_before(n._root, node._root); 5810 impl::node_copy_tree(n._root, proto._root); 5811 5812 return n; 5813 } 5814 append_move(const xml_node & moved)5815 PUGI__FN xml_node xml_node::append_move(const xml_node& moved) 5816 { 5817 if (!impl::allow_move(*this, moved)) return xml_node(); 5818 5819 impl::xml_allocator& alloc = impl::get_allocator(_root); 5820 if (!alloc.reserve()) return xml_node(); 5821 5822 // disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers 5823 impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask; 5824 5825 impl::remove_node(moved._root); 5826 impl::append_node(moved._root, _root); 5827 5828 return moved; 5829 } 5830 prepend_move(const xml_node & moved)5831 PUGI__FN xml_node xml_node::prepend_move(const xml_node& moved) 5832 { 5833 if (!impl::allow_move(*this, moved)) return xml_node(); 5834 5835 impl::xml_allocator& alloc = impl::get_allocator(_root); 5836 if (!alloc.reserve()) return xml_node(); 5837 5838 // disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers 5839 impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask; 5840 5841 impl::remove_node(moved._root); 5842 impl::prepend_node(moved._root, _root); 5843 5844 return moved; 5845 } 5846 insert_move_after(const xml_node & moved,const xml_node & node)5847 PUGI__FN xml_node xml_node::insert_move_after(const xml_node& moved, const xml_node& node) 5848 { 5849 if (!impl::allow_move(*this, moved)) return xml_node(); 5850 if (!node._root || node._root->parent != _root) return xml_node(); 5851 if (moved._root == node._root) return xml_node(); 5852 5853 impl::xml_allocator& alloc = impl::get_allocator(_root); 5854 if (!alloc.reserve()) return xml_node(); 5855 5856 // disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers 5857 impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask; 5858 5859 impl::remove_node(moved._root); 5860 impl::insert_node_after(moved._root, node._root); 5861 5862 return moved; 5863 } 5864 insert_move_before(const xml_node & moved,const xml_node & node)5865 PUGI__FN xml_node xml_node::insert_move_before(const xml_node& moved, const xml_node& node) 5866 { 5867 if (!impl::allow_move(*this, moved)) return xml_node(); 5868 if (!node._root || node._root->parent != _root) return xml_node(); 5869 if (moved._root == node._root) return xml_node(); 5870 5871 impl::xml_allocator& alloc = impl::get_allocator(_root); 5872 if (!alloc.reserve()) return xml_node(); 5873 5874 // disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers 5875 impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask; 5876 5877 impl::remove_node(moved._root); 5878 impl::insert_node_before(moved._root, node._root); 5879 5880 return moved; 5881 } 5882 remove_attribute(const char_t * name_)5883 PUGI__FN bool xml_node::remove_attribute(const char_t* name_) 5884 { 5885 return remove_attribute(attribute(name_)); 5886 } 5887 remove_attribute(const xml_attribute & a)5888 PUGI__FN bool xml_node::remove_attribute(const xml_attribute& a) 5889 { 5890 if (!_root || !a._attr) return false; 5891 if (!impl::is_attribute_of(a._attr, _root)) return false; 5892 5893 impl::xml_allocator& alloc = impl::get_allocator(_root); 5894 if (!alloc.reserve()) return false; 5895 5896 impl::remove_attribute(a._attr, _root); 5897 impl::destroy_attribute(a._attr, alloc); 5898 5899 return true; 5900 } 5901 remove_child(const char_t * name_)5902 PUGI__FN bool xml_node::remove_child(const char_t* name_) 5903 { 5904 return remove_child(child(name_)); 5905 } 5906 remove_child(const xml_node & n)5907 PUGI__FN bool xml_node::remove_child(const xml_node& n) 5908 { 5909 if (!_root || !n._root || n._root->parent != _root) return false; 5910 5911 impl::xml_allocator& alloc = impl::get_allocator(_root); 5912 if (!alloc.reserve()) return false; 5913 5914 impl::remove_node(n._root); 5915 impl::destroy_node(n._root, alloc); 5916 5917 return true; 5918 } 5919 append_buffer(const void * contents,size_t size,unsigned int options,xml_encoding encoding)5920 PUGI__FN xml_parse_result xml_node::append_buffer(const void* contents, size_t size, unsigned int options, xml_encoding encoding) 5921 { 5922 // append_buffer is only valid for elements/documents 5923 if (!impl::allow_insert_child(type(), node_element)) return impl::make_parse_result(status_append_invalid_root); 5924 5925 // get document node 5926 impl::xml_document_struct* doc = &impl::get_document(_root); 5927 5928 // disable document_buffer_order optimization since in a document with multiple buffers comparing buffer pointers does not make sense 5929 doc->header |= impl::xml_memory_page_contents_shared_mask; 5930 5931 // get extra buffer element (we'll store the document fragment buffer there so that we can deallocate it later) 5932 impl::xml_memory_page* page = 0; 5933 impl::xml_extra_buffer* extra = static_cast<impl::xml_extra_buffer*>(doc->allocate_memory(sizeof(impl::xml_extra_buffer), page)); 5934 (void)page; 5935 5936 if (!extra) return impl::make_parse_result(status_out_of_memory); 5937 5938 // add extra buffer to the list 5939 extra->buffer = 0; 5940 extra->next = doc->extra_buffers; 5941 doc->extra_buffers = extra; 5942 5943 // name of the root has to be NULL before parsing - otherwise closing node mismatches will not be detected at the top level 5944 impl::name_null_sentry sentry(_root); 5945 5946 return impl::load_buffer_impl(doc, _root, const_cast<void*>(contents), size, options, encoding, false, false, &extra->buffer); 5947 } 5948 find_child_by_attribute(const char_t * name_,const char_t * attr_name,const char_t * attr_value) const5949 PUGI__FN xml_node xml_node::find_child_by_attribute(const char_t* name_, const char_t* attr_name, const char_t* attr_value) const 5950 { 5951 if (!_root) return xml_node(); 5952 5953 for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling) 5954 if (i->name && impl::strequal(name_, i->name)) 5955 { 5956 for (xml_attribute_struct* a = i->first_attribute; a; a = a->next_attribute) 5957 if (a->name && impl::strequal(attr_name, a->name) && impl::strequal(attr_value, a->value ? a->value + 0 : PUGIXML_TEXT(""))) 5958 return xml_node(i); 5959 } 5960 5961 return xml_node(); 5962 } 5963 find_child_by_attribute(const char_t * attr_name,const char_t * attr_value) const5964 PUGI__FN xml_node xml_node::find_child_by_attribute(const char_t* attr_name, const char_t* attr_value) const 5965 { 5966 if (!_root) return xml_node(); 5967 5968 for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling) 5969 for (xml_attribute_struct* a = i->first_attribute; a; a = a->next_attribute) 5970 if (a->name && impl::strequal(attr_name, a->name) && impl::strequal(attr_value, a->value ? a->value + 0 : PUGIXML_TEXT(""))) 5971 return xml_node(i); 5972 5973 return xml_node(); 5974 } 5975 5976 #ifndef PUGIXML_NO_STL path(char_t delimiter) const5977 PUGI__FN string_t xml_node::path(char_t delimiter) const 5978 { 5979 if (!_root) return string_t(); 5980 5981 size_t offset = 0; 5982 5983 for (xml_node_struct* i = _root; i; i = i->parent) 5984 { 5985 offset += (i != _root); 5986 offset += i->name ? impl::strlength(i->name) : 0; 5987 } 5988 5989 string_t result; 5990 result.resize(offset); 5991 5992 for (xml_node_struct* j = _root; j; j = j->parent) 5993 { 5994 if (j != _root) 5995 result[--offset] = delimiter; 5996 5997 if (j->name && *j->name) 5998 { 5999 size_t length = impl::strlength(j->name); 6000 6001 offset -= length; 6002 memcpy(&result[offset], j->name, length * sizeof(char_t)); 6003 } 6004 } 6005 6006 assert(offset == 0); 6007 6008 return result; 6009 } 6010 #endif 6011 first_element_by_path(const char_t * path_,char_t delimiter) const6012 PUGI__FN xml_node xml_node::first_element_by_path(const char_t* path_, char_t delimiter) const 6013 { 6014 xml_node found = *this; // Current search context. 6015 6016 if (!_root || !path_ || !path_[0]) return found; 6017 6018 if (path_[0] == delimiter) 6019 { 6020 // Absolute path; e.g. '/foo/bar' 6021 found = found.root(); 6022 ++path_; 6023 } 6024 6025 const char_t* path_segment = path_; 6026 6027 while (*path_segment == delimiter) ++path_segment; 6028 6029 const char_t* path_segment_end = path_segment; 6030 6031 while (*path_segment_end && *path_segment_end != delimiter) ++path_segment_end; 6032 6033 if (path_segment == path_segment_end) return found; 6034 6035 const char_t* next_segment = path_segment_end; 6036 6037 while (*next_segment == delimiter) ++next_segment; 6038 6039 if (*path_segment == '.' && path_segment + 1 == path_segment_end) 6040 return found.first_element_by_path(next_segment, delimiter); 6041 else if (*path_segment == '.' && *(path_segment+1) == '.' && path_segment + 2 == path_segment_end) 6042 return found.parent().first_element_by_path(next_segment, delimiter); 6043 else 6044 { 6045 for (xml_node_struct* j = found._root->first_child; j; j = j->next_sibling) 6046 { 6047 if (j->name && impl::strequalrange(j->name, path_segment, static_cast<size_t>(path_segment_end - path_segment))) 6048 { 6049 xml_node subsearch = xml_node(j).first_element_by_path(next_segment, delimiter); 6050 6051 if (subsearch) return subsearch; 6052 } 6053 } 6054 6055 return xml_node(); 6056 } 6057 } 6058 traverse(xml_tree_walker & walker)6059 PUGI__FN bool xml_node::traverse(xml_tree_walker& walker) 6060 { 6061 walker._depth = -1; 6062 6063 xml_node arg_begin = *this; 6064 if (!walker.begin(arg_begin)) return false; 6065 6066 xml_node cur = first_child(); 6067 6068 if (cur) 6069 { 6070 ++walker._depth; 6071 6072 do 6073 { 6074 xml_node arg_for_each = cur; 6075 if (!walker.for_each(arg_for_each)) 6076 return false; 6077 6078 if (cur.first_child()) 6079 { 6080 ++walker._depth; 6081 cur = cur.first_child(); 6082 } 6083 else if (cur.next_sibling()) 6084 cur = cur.next_sibling(); 6085 else 6086 { 6087 // Borland C++ workaround 6088 while (!cur.next_sibling() && cur != *this && !cur.parent().empty()) 6089 { 6090 --walker._depth; 6091 cur = cur.parent(); 6092 } 6093 6094 if (cur != *this) 6095 cur = cur.next_sibling(); 6096 } 6097 } 6098 while (cur && cur != *this); 6099 } 6100 6101 assert(walker._depth == -1); 6102 6103 xml_node arg_end = *this; 6104 return walker.end(arg_end); 6105 } 6106 hash_value() const6107 PUGI__FN size_t xml_node::hash_value() const 6108 { 6109 return static_cast<size_t>(reinterpret_cast<uintptr_t>(_root) / sizeof(xml_node_struct)); 6110 } 6111 internal_object() const6112 PUGI__FN xml_node_struct* xml_node::internal_object() const 6113 { 6114 return _root; 6115 } 6116 print(xml_writer & writer,const char_t * indent,unsigned int flags,xml_encoding encoding,unsigned int depth) const6117 PUGI__FN void xml_node::print(xml_writer& writer, const char_t* indent, unsigned int flags, xml_encoding encoding, unsigned int depth) const 6118 { 6119 if (!_root) return; 6120 6121 impl::xml_buffered_writer buffered_writer(writer, encoding); 6122 6123 impl::node_output(buffered_writer, _root, indent, flags, depth); 6124 6125 buffered_writer.flush(); 6126 } 6127 6128 #ifndef PUGIXML_NO_STL print(std::basic_ostream<char,std::char_traits<char>> & stream,const char_t * indent,unsigned int flags,xml_encoding encoding,unsigned int depth) const6129 PUGI__FN void xml_node::print(std::basic_ostream<char, std::char_traits<char> >& stream, const char_t* indent, unsigned int flags, xml_encoding encoding, unsigned int depth) const 6130 { 6131 xml_writer_stream writer(stream); 6132 6133 print(writer, indent, flags, encoding, depth); 6134 } 6135 print(std::basic_ostream<wchar_t,std::char_traits<wchar_t>> & stream,const char_t * indent,unsigned int flags,unsigned int depth) const6136 PUGI__FN void xml_node::print(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream, const char_t* indent, unsigned int flags, unsigned int depth) const 6137 { 6138 xml_writer_stream writer(stream); 6139 6140 print(writer, indent, flags, encoding_wchar, depth); 6141 } 6142 #endif 6143 offset_debug() const6144 PUGI__FN ptrdiff_t xml_node::offset_debug() const 6145 { 6146 if (!_root) return -1; 6147 6148 impl::xml_document_struct& doc = impl::get_document(_root); 6149 6150 // we can determine the offset reliably only if there is exactly once parse buffer 6151 if (!doc.buffer || doc.extra_buffers) return -1; 6152 6153 switch (type()) 6154 { 6155 case node_document: 6156 return 0; 6157 6158 case node_element: 6159 case node_declaration: 6160 case node_pi: 6161 return _root->name && (_root->header & impl::xml_memory_page_name_allocated_or_shared_mask) == 0 ? _root->name - doc.buffer : -1; 6162 6163 case node_pcdata: 6164 case node_cdata: 6165 case node_comment: 6166 case node_doctype: 6167 return _root->value && (_root->header & impl::xml_memory_page_value_allocated_or_shared_mask) == 0 ? _root->value - doc.buffer : -1; 6168 6169 default: 6170 return -1; 6171 } 6172 } 6173 6174 #ifdef __BORLANDC__ operator &&(const xml_node & lhs,bool rhs)6175 PUGI__FN bool operator&&(const xml_node& lhs, bool rhs) 6176 { 6177 return (bool)lhs && rhs; 6178 } 6179 operator ||(const xml_node & lhs,bool rhs)6180 PUGI__FN bool operator||(const xml_node& lhs, bool rhs) 6181 { 6182 return (bool)lhs || rhs; 6183 } 6184 #endif 6185 xml_text(xml_node_struct * root)6186 PUGI__FN xml_text::xml_text(xml_node_struct* root): _root(root) 6187 { 6188 } 6189 _data() const6190 PUGI__FN xml_node_struct* xml_text::_data() const 6191 { 6192 if (!_root || impl::is_text_node(_root)) return _root; 6193 6194 for (xml_node_struct* node = _root->first_child; node; node = node->next_sibling) 6195 if (impl::is_text_node(node)) 6196 return node; 6197 6198 return 0; 6199 } 6200 _data_new()6201 PUGI__FN xml_node_struct* xml_text::_data_new() 6202 { 6203 xml_node_struct* d = _data(); 6204 if (d) return d; 6205 6206 return xml_node(_root).append_child(node_pcdata).internal_object(); 6207 } 6208 xml_text()6209 PUGI__FN xml_text::xml_text(): _root(0) 6210 { 6211 } 6212 unspecified_bool_xml_text(xml_text ***)6213 PUGI__FN static void unspecified_bool_xml_text(xml_text***) 6214 { 6215 } 6216 operator xml_text::unspecified_bool_type() const6217 PUGI__FN xml_text::operator xml_text::unspecified_bool_type() const 6218 { 6219 return _data() ? unspecified_bool_xml_text : 0; 6220 } 6221 operator !() const6222 PUGI__FN bool xml_text::operator!() const 6223 { 6224 return !_data(); 6225 } 6226 empty() const6227 PUGI__FN bool xml_text::empty() const 6228 { 6229 return _data() == 0; 6230 } 6231 get() const6232 PUGI__FN const char_t* xml_text::get() const 6233 { 6234 xml_node_struct* d = _data(); 6235 6236 return (d && d->value) ? d->value + 0 : PUGIXML_TEXT(""); 6237 } 6238 as_string(const char_t * def) const6239 PUGI__FN const char_t* xml_text::as_string(const char_t* def) const 6240 { 6241 xml_node_struct* d = _data(); 6242 6243 return (d && d->value) ? d->value + 0 : def; 6244 } 6245 as_int(int def) const6246 PUGI__FN int xml_text::as_int(int def) const 6247 { 6248 xml_node_struct* d = _data(); 6249 6250 return (d && d->value) ? impl::get_value_int(d->value) : def; 6251 } 6252 as_uint(unsigned int def) const6253 PUGI__FN unsigned int xml_text::as_uint(unsigned int def) const 6254 { 6255 xml_node_struct* d = _data(); 6256 6257 return (d && d->value) ? impl::get_value_uint(d->value) : def; 6258 } 6259 as_double(double def) const6260 PUGI__FN double xml_text::as_double(double def) const 6261 { 6262 xml_node_struct* d = _data(); 6263 6264 return (d && d->value) ? impl::get_value_double(d->value) : def; 6265 } 6266 as_float(float def) const6267 PUGI__FN float xml_text::as_float(float def) const 6268 { 6269 xml_node_struct* d = _data(); 6270 6271 return (d && d->value) ? impl::get_value_float(d->value) : def; 6272 } 6273 as_bool(bool def) const6274 PUGI__FN bool xml_text::as_bool(bool def) const 6275 { 6276 xml_node_struct* d = _data(); 6277 6278 return (d && d->value) ? impl::get_value_bool(d->value) : def; 6279 } 6280 6281 #ifdef PUGIXML_HAS_LONG_LONG as_llong(long long def) const6282 PUGI__FN long long xml_text::as_llong(long long def) const 6283 { 6284 xml_node_struct* d = _data(); 6285 6286 return (d && d->value) ? impl::get_value_llong(d->value) : def; 6287 } 6288 as_ullong(unsigned long long def) const6289 PUGI__FN unsigned long long xml_text::as_ullong(unsigned long long def) const 6290 { 6291 xml_node_struct* d = _data(); 6292 6293 return (d && d->value) ? impl::get_value_ullong(d->value) : def; 6294 } 6295 #endif 6296 set(const char_t * rhs)6297 PUGI__FN bool xml_text::set(const char_t* rhs) 6298 { 6299 xml_node_struct* dn = _data_new(); 6300 6301 return dn ? impl::strcpy_insitu(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, impl::strlength(rhs)) : false; 6302 } 6303 set(int rhs)6304 PUGI__FN bool xml_text::set(int rhs) 6305 { 6306 xml_node_struct* dn = _data_new(); 6307 6308 return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false; 6309 } 6310 set(unsigned int rhs)6311 PUGI__FN bool xml_text::set(unsigned int rhs) 6312 { 6313 xml_node_struct* dn = _data_new(); 6314 6315 return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false; 6316 } 6317 set(float rhs)6318 PUGI__FN bool xml_text::set(float rhs) 6319 { 6320 xml_node_struct* dn = _data_new(); 6321 6322 return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false; 6323 } 6324 set(double rhs)6325 PUGI__FN bool xml_text::set(double rhs) 6326 { 6327 xml_node_struct* dn = _data_new(); 6328 6329 return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false; 6330 } 6331 set(bool rhs)6332 PUGI__FN bool xml_text::set(bool rhs) 6333 { 6334 xml_node_struct* dn = _data_new(); 6335 6336 return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false; 6337 } 6338 6339 #ifdef PUGIXML_HAS_LONG_LONG set(long long rhs)6340 PUGI__FN bool xml_text::set(long long rhs) 6341 { 6342 xml_node_struct* dn = _data_new(); 6343 6344 return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false; 6345 } 6346 set(unsigned long long rhs)6347 PUGI__FN bool xml_text::set(unsigned long long rhs) 6348 { 6349 xml_node_struct* dn = _data_new(); 6350 6351 return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false; 6352 } 6353 #endif 6354 operator =(const char_t * rhs)6355 PUGI__FN xml_text& xml_text::operator=(const char_t* rhs) 6356 { 6357 set(rhs); 6358 return *this; 6359 } 6360 operator =(int rhs)6361 PUGI__FN xml_text& xml_text::operator=(int rhs) 6362 { 6363 set(rhs); 6364 return *this; 6365 } 6366 operator =(unsigned int rhs)6367 PUGI__FN xml_text& xml_text::operator=(unsigned int rhs) 6368 { 6369 set(rhs); 6370 return *this; 6371 } 6372 operator =(double rhs)6373 PUGI__FN xml_text& xml_text::operator=(double rhs) 6374 { 6375 set(rhs); 6376 return *this; 6377 } 6378 operator =(float rhs)6379 PUGI__FN xml_text& xml_text::operator=(float rhs) 6380 { 6381 set(rhs); 6382 return *this; 6383 } 6384 operator =(bool rhs)6385 PUGI__FN xml_text& xml_text::operator=(bool rhs) 6386 { 6387 set(rhs); 6388 return *this; 6389 } 6390 6391 #ifdef PUGIXML_HAS_LONG_LONG operator =(long long rhs)6392 PUGI__FN xml_text& xml_text::operator=(long long rhs) 6393 { 6394 set(rhs); 6395 return *this; 6396 } 6397 operator =(unsigned long long rhs)6398 PUGI__FN xml_text& xml_text::operator=(unsigned long long rhs) 6399 { 6400 set(rhs); 6401 return *this; 6402 } 6403 #endif 6404 data() const6405 PUGI__FN xml_node xml_text::data() const 6406 { 6407 return xml_node(_data()); 6408 } 6409 6410 #ifdef __BORLANDC__ operator &&(const xml_text & lhs,bool rhs)6411 PUGI__FN bool operator&&(const xml_text& lhs, bool rhs) 6412 { 6413 return (bool)lhs && rhs; 6414 } 6415 operator ||(const xml_text & lhs,bool rhs)6416 PUGI__FN bool operator||(const xml_text& lhs, bool rhs) 6417 { 6418 return (bool)lhs || rhs; 6419 } 6420 #endif 6421 xml_node_iterator()6422 PUGI__FN xml_node_iterator::xml_node_iterator() 6423 { 6424 } 6425 xml_node_iterator(const xml_node & node)6426 PUGI__FN xml_node_iterator::xml_node_iterator(const xml_node& node): _wrap(node), _parent(node.parent()) 6427 { 6428 } 6429 xml_node_iterator(xml_node_struct * ref,xml_node_struct * parent)6430 PUGI__FN xml_node_iterator::xml_node_iterator(xml_node_struct* ref, xml_node_struct* parent): _wrap(ref), _parent(parent) 6431 { 6432 } 6433 operator ==(const xml_node_iterator & rhs) const6434 PUGI__FN bool xml_node_iterator::operator==(const xml_node_iterator& rhs) const 6435 { 6436 return _wrap._root == rhs._wrap._root && _parent._root == rhs._parent._root; 6437 } 6438 operator !=(const xml_node_iterator & rhs) const6439 PUGI__FN bool xml_node_iterator::operator!=(const xml_node_iterator& rhs) const 6440 { 6441 return _wrap._root != rhs._wrap._root || _parent._root != rhs._parent._root; 6442 } 6443 operator *() const6444 PUGI__FN xml_node& xml_node_iterator::operator*() const 6445 { 6446 assert(_wrap._root); 6447 return _wrap; 6448 } 6449 operator ->() const6450 PUGI__FN xml_node* xml_node_iterator::operator->() const 6451 { 6452 assert(_wrap._root); 6453 return const_cast<xml_node*>(&_wrap); // BCC32 workaround 6454 } 6455 operator ++()6456 PUGI__FN const xml_node_iterator& xml_node_iterator::operator++() 6457 { 6458 assert(_wrap._root); 6459 _wrap._root = _wrap._root->next_sibling; 6460 return *this; 6461 } 6462 operator ++(int)6463 PUGI__FN xml_node_iterator xml_node_iterator::operator++(int) 6464 { 6465 xml_node_iterator temp = *this; 6466 ++*this; 6467 return temp; 6468 } 6469 operator --()6470 PUGI__FN const xml_node_iterator& xml_node_iterator::operator--() 6471 { 6472 _wrap = _wrap._root ? _wrap.previous_sibling() : _parent.last_child(); 6473 return *this; 6474 } 6475 operator --(int)6476 PUGI__FN xml_node_iterator xml_node_iterator::operator--(int) 6477 { 6478 xml_node_iterator temp = *this; 6479 --*this; 6480 return temp; 6481 } 6482 xml_attribute_iterator()6483 PUGI__FN xml_attribute_iterator::xml_attribute_iterator() 6484 { 6485 } 6486 xml_attribute_iterator(const xml_attribute & attr,const xml_node & parent)6487 PUGI__FN xml_attribute_iterator::xml_attribute_iterator(const xml_attribute& attr, const xml_node& parent): _wrap(attr), _parent(parent) 6488 { 6489 } 6490 xml_attribute_iterator(xml_attribute_struct * ref,xml_node_struct * parent)6491 PUGI__FN xml_attribute_iterator::xml_attribute_iterator(xml_attribute_struct* ref, xml_node_struct* parent): _wrap(ref), _parent(parent) 6492 { 6493 } 6494 operator ==(const xml_attribute_iterator & rhs) const6495 PUGI__FN bool xml_attribute_iterator::operator==(const xml_attribute_iterator& rhs) const 6496 { 6497 return _wrap._attr == rhs._wrap._attr && _parent._root == rhs._parent._root; 6498 } 6499 operator !=(const xml_attribute_iterator & rhs) const6500 PUGI__FN bool xml_attribute_iterator::operator!=(const xml_attribute_iterator& rhs) const 6501 { 6502 return _wrap._attr != rhs._wrap._attr || _parent._root != rhs._parent._root; 6503 } 6504 operator *() const6505 PUGI__FN xml_attribute& xml_attribute_iterator::operator*() const 6506 { 6507 assert(_wrap._attr); 6508 return _wrap; 6509 } 6510 operator ->() const6511 PUGI__FN xml_attribute* xml_attribute_iterator::operator->() const 6512 { 6513 assert(_wrap._attr); 6514 return const_cast<xml_attribute*>(&_wrap); // BCC32 workaround 6515 } 6516 operator ++()6517 PUGI__FN const xml_attribute_iterator& xml_attribute_iterator::operator++() 6518 { 6519 assert(_wrap._attr); 6520 _wrap._attr = _wrap._attr->next_attribute; 6521 return *this; 6522 } 6523 operator ++(int)6524 PUGI__FN xml_attribute_iterator xml_attribute_iterator::operator++(int) 6525 { 6526 xml_attribute_iterator temp = *this; 6527 ++*this; 6528 return temp; 6529 } 6530 operator --()6531 PUGI__FN const xml_attribute_iterator& xml_attribute_iterator::operator--() 6532 { 6533 _wrap = _wrap._attr ? _wrap.previous_attribute() : _parent.last_attribute(); 6534 return *this; 6535 } 6536 operator --(int)6537 PUGI__FN xml_attribute_iterator xml_attribute_iterator::operator--(int) 6538 { 6539 xml_attribute_iterator temp = *this; 6540 --*this; 6541 return temp; 6542 } 6543 xml_named_node_iterator()6544 PUGI__FN xml_named_node_iterator::xml_named_node_iterator(): _name(0) 6545 { 6546 } 6547 xml_named_node_iterator(const xml_node & node,const char_t * name)6548 PUGI__FN xml_named_node_iterator::xml_named_node_iterator(const xml_node& node, const char_t* name): _wrap(node), _parent(node.parent()), _name(name) 6549 { 6550 } 6551 xml_named_node_iterator(xml_node_struct * ref,xml_node_struct * parent,const char_t * name)6552 PUGI__FN xml_named_node_iterator::xml_named_node_iterator(xml_node_struct* ref, xml_node_struct* parent, const char_t* name): _wrap(ref), _parent(parent), _name(name) 6553 { 6554 } 6555 operator ==(const xml_named_node_iterator & rhs) const6556 PUGI__FN bool xml_named_node_iterator::operator==(const xml_named_node_iterator& rhs) const 6557 { 6558 return _wrap._root == rhs._wrap._root && _parent._root == rhs._parent._root; 6559 } 6560 operator !=(const xml_named_node_iterator & rhs) const6561 PUGI__FN bool xml_named_node_iterator::operator!=(const xml_named_node_iterator& rhs) const 6562 { 6563 return _wrap._root != rhs._wrap._root || _parent._root != rhs._parent._root; 6564 } 6565 operator *() const6566 PUGI__FN xml_node& xml_named_node_iterator::operator*() const 6567 { 6568 assert(_wrap._root); 6569 return _wrap; 6570 } 6571 operator ->() const6572 PUGI__FN xml_node* xml_named_node_iterator::operator->() const 6573 { 6574 assert(_wrap._root); 6575 return const_cast<xml_node*>(&_wrap); // BCC32 workaround 6576 } 6577 operator ++()6578 PUGI__FN const xml_named_node_iterator& xml_named_node_iterator::operator++() 6579 { 6580 assert(_wrap._root); 6581 _wrap = _wrap.next_sibling(_name); 6582 return *this; 6583 } 6584 operator ++(int)6585 PUGI__FN xml_named_node_iterator xml_named_node_iterator::operator++(int) 6586 { 6587 xml_named_node_iterator temp = *this; 6588 ++*this; 6589 return temp; 6590 } 6591 operator --()6592 PUGI__FN const xml_named_node_iterator& xml_named_node_iterator::operator--() 6593 { 6594 if (_wrap._root) 6595 _wrap = _wrap.previous_sibling(_name); 6596 else 6597 { 6598 _wrap = _parent.last_child(); 6599 6600 if (!impl::strequal(_wrap.name(), _name)) 6601 _wrap = _wrap.previous_sibling(_name); 6602 } 6603 6604 return *this; 6605 } 6606 operator --(int)6607 PUGI__FN xml_named_node_iterator xml_named_node_iterator::operator--(int) 6608 { 6609 xml_named_node_iterator temp = *this; 6610 --*this; 6611 return temp; 6612 } 6613 xml_parse_result()6614 PUGI__FN xml_parse_result::xml_parse_result(): status(status_internal_error), offset(0), encoding(encoding_auto) 6615 { 6616 } 6617 operator bool() const6618 PUGI__FN xml_parse_result::operator bool() const 6619 { 6620 return status == status_ok; 6621 } 6622 description() const6623 PUGI__FN const char* xml_parse_result::description() const 6624 { 6625 switch (status) 6626 { 6627 case status_ok: return "No error"; 6628 6629 case status_file_not_found: return "File was not found"; 6630 case status_io_error: return "Error reading from file/stream"; 6631 case status_out_of_memory: return "Could not allocate memory"; 6632 case status_internal_error: return "Internal error occurred"; 6633 6634 case status_unrecognized_tag: return "Could not determine tag type"; 6635 6636 case status_bad_pi: return "Error parsing document declaration/processing instruction"; 6637 case status_bad_comment: return "Error parsing comment"; 6638 case status_bad_cdata: return "Error parsing CDATA section"; 6639 case status_bad_doctype: return "Error parsing document type declaration"; 6640 case status_bad_pcdata: return "Error parsing PCDATA section"; 6641 case status_bad_start_element: return "Error parsing start element tag"; 6642 case status_bad_attribute: return "Error parsing element attribute"; 6643 case status_bad_end_element: return "Error parsing end element tag"; 6644 case status_end_element_mismatch: return "Start-end tags mismatch"; 6645 6646 case status_append_invalid_root: return "Unable to append nodes: root is not an element or document"; 6647 6648 case status_no_document_element: return "No document element found"; 6649 6650 default: return "Unknown error"; 6651 } 6652 } 6653 xml_document()6654 PUGI__FN xml_document::xml_document(): _buffer(0) 6655 { 6656 create(); 6657 } 6658 ~xml_document()6659 PUGI__FN xml_document::~xml_document() 6660 { 6661 destroy(); 6662 } 6663 reset()6664 PUGI__FN void xml_document::reset() 6665 { 6666 destroy(); 6667 create(); 6668 } 6669 reset(const xml_document & proto)6670 PUGI__FN void xml_document::reset(const xml_document& proto) 6671 { 6672 reset(); 6673 6674 for (xml_node cur = proto.first_child(); cur; cur = cur.next_sibling()) 6675 append_copy(cur); 6676 } 6677 create()6678 PUGI__FN void xml_document::create() 6679 { 6680 assert(!_root); 6681 6682 #ifdef PUGIXML_COMPACT 6683 const size_t page_offset = sizeof(uint32_t); 6684 #else 6685 const size_t page_offset = 0; 6686 #endif 6687 6688 // initialize sentinel page 6689 PUGI__STATIC_ASSERT(sizeof(impl::xml_memory_page) + sizeof(impl::xml_document_struct) + impl::xml_memory_page_alignment - sizeof(void*) + page_offset <= sizeof(_memory)); 6690 6691 // align upwards to page boundary 6692 void* page_memory = reinterpret_cast<void*>((reinterpret_cast<uintptr_t>(_memory) + (impl::xml_memory_page_alignment - 1)) & ~(impl::xml_memory_page_alignment - 1)); 6693 6694 // prepare page structure 6695 impl::xml_memory_page* page = impl::xml_memory_page::construct(page_memory); 6696 assert(page); 6697 6698 page->busy_size = impl::xml_memory_page_size; 6699 6700 // setup first page marker 6701 #ifdef PUGIXML_COMPACT 6702 // round-trip through void* to avoid 'cast increases required alignment of target type' warning 6703 page->compact_page_marker = reinterpret_cast<uint32_t*>(static_cast<void*>(reinterpret_cast<char*>(page) + sizeof(impl::xml_memory_page))); 6704 *page->compact_page_marker = sizeof(impl::xml_memory_page); 6705 #endif 6706 6707 // allocate new root 6708 _root = new (reinterpret_cast<char*>(page) + sizeof(impl::xml_memory_page) + page_offset) impl::xml_document_struct(page); 6709 _root->prev_sibling_c = _root; 6710 6711 // setup sentinel page 6712 page->allocator = static_cast<impl::xml_document_struct*>(_root); 6713 6714 // verify the document allocation 6715 assert(reinterpret_cast<char*>(_root) + sizeof(impl::xml_document_struct) <= _memory + sizeof(_memory)); 6716 } 6717 destroy()6718 PUGI__FN void xml_document::destroy() 6719 { 6720 assert(_root); 6721 6722 // destroy static storage 6723 if (_buffer) 6724 { 6725 impl::xml_memory::deallocate(_buffer); 6726 _buffer = 0; 6727 } 6728 6729 // destroy extra buffers (note: no need to destroy linked list nodes, they're allocated using document allocator) 6730 for (impl::xml_extra_buffer* extra = static_cast<impl::xml_document_struct*>(_root)->extra_buffers; extra; extra = extra->next) 6731 { 6732 if (extra->buffer) impl::xml_memory::deallocate(extra->buffer); 6733 } 6734 6735 // destroy dynamic storage, leave sentinel page (it's in static memory) 6736 impl::xml_memory_page* root_page = PUGI__GETPAGE(_root); 6737 assert(root_page && !root_page->prev); 6738 assert(reinterpret_cast<char*>(root_page) >= _memory && reinterpret_cast<char*>(root_page) < _memory + sizeof(_memory)); 6739 6740 for (impl::xml_memory_page* page = root_page->next; page; ) 6741 { 6742 impl::xml_memory_page* next = page->next; 6743 6744 impl::xml_allocator::deallocate_page(page); 6745 6746 page = next; 6747 } 6748 6749 #ifdef PUGIXML_COMPACT 6750 // destroy hash table 6751 static_cast<impl::xml_document_struct*>(_root)->hash.clear(); 6752 #endif 6753 6754 _root = 0; 6755 } 6756 6757 #ifndef PUGIXML_NO_STL load(std::basic_istream<char,std::char_traits<char>> & stream,unsigned int options,xml_encoding encoding)6758 PUGI__FN xml_parse_result xml_document::load(std::basic_istream<char, std::char_traits<char> >& stream, unsigned int options, xml_encoding encoding) 6759 { 6760 reset(); 6761 6762 return impl::load_stream_impl(static_cast<impl::xml_document_struct*>(_root), stream, options, encoding, &_buffer); 6763 } 6764 load(std::basic_istream<wchar_t,std::char_traits<wchar_t>> & stream,unsigned int options)6765 PUGI__FN xml_parse_result xml_document::load(std::basic_istream<wchar_t, std::char_traits<wchar_t> >& stream, unsigned int options) 6766 { 6767 reset(); 6768 6769 return impl::load_stream_impl(static_cast<impl::xml_document_struct*>(_root), stream, options, encoding_wchar, &_buffer); 6770 } 6771 #endif 6772 load_string(const char_t * contents,unsigned int options)6773 PUGI__FN xml_parse_result xml_document::load_string(const char_t* contents, unsigned int options) 6774 { 6775 // Force native encoding (skip autodetection) 6776 #ifdef PUGIXML_WCHAR_MODE 6777 xml_encoding encoding = encoding_wchar; 6778 #else 6779 xml_encoding encoding = encoding_utf8; 6780 #endif 6781 6782 return load_buffer(contents, impl::strlength(contents) * sizeof(char_t), options, encoding); 6783 } 6784 load(const char_t * contents,unsigned int options)6785 PUGI__FN xml_parse_result xml_document::load(const char_t* contents, unsigned int options) 6786 { 6787 return load_string(contents, options); 6788 } 6789 load_file(const char * path_,unsigned int options,xml_encoding encoding)6790 PUGI__FN xml_parse_result xml_document::load_file(const char* path_, unsigned int options, xml_encoding encoding) 6791 { 6792 reset(); 6793 6794 using impl::auto_deleter; // MSVC7 workaround 6795 auto_deleter<FILE, int(*)(FILE*)> file(fopen(path_, "rb"), fclose); 6796 6797 return impl::load_file_impl(static_cast<impl::xml_document_struct*>(_root), file.data, options, encoding, &_buffer); 6798 } 6799 load_file(const wchar_t * path_,unsigned int options,xml_encoding encoding)6800 PUGI__FN xml_parse_result xml_document::load_file(const wchar_t* path_, unsigned int options, xml_encoding encoding) 6801 { 6802 reset(); 6803 6804 using impl::auto_deleter; // MSVC7 workaround 6805 auto_deleter<FILE, int(*)(FILE*)> file(impl::open_file_wide(path_, L"rb"), fclose); 6806 6807 return impl::load_file_impl(static_cast<impl::xml_document_struct*>(_root), file.data, options, encoding, &_buffer); 6808 } 6809 load_buffer(const void * contents,size_t size,unsigned int options,xml_encoding encoding)6810 PUGI__FN xml_parse_result xml_document::load_buffer(const void* contents, size_t size, unsigned int options, xml_encoding encoding) 6811 { 6812 reset(); 6813 6814 return impl::load_buffer_impl(static_cast<impl::xml_document_struct*>(_root), _root, const_cast<void*>(contents), size, options, encoding, false, false, &_buffer); 6815 } 6816 load_buffer_inplace(void * contents,size_t size,unsigned int options,xml_encoding encoding)6817 PUGI__FN xml_parse_result xml_document::load_buffer_inplace(void* contents, size_t size, unsigned int options, xml_encoding encoding) 6818 { 6819 reset(); 6820 6821 return impl::load_buffer_impl(static_cast<impl::xml_document_struct*>(_root), _root, contents, size, options, encoding, true, false, &_buffer); 6822 } 6823 load_buffer_inplace_own(void * contents,size_t size,unsigned int options,xml_encoding encoding)6824 PUGI__FN xml_parse_result xml_document::load_buffer_inplace_own(void* contents, size_t size, unsigned int options, xml_encoding encoding) 6825 { 6826 reset(); 6827 6828 return impl::load_buffer_impl(static_cast<impl::xml_document_struct*>(_root), _root, contents, size, options, encoding, true, true, &_buffer); 6829 } 6830 save(xml_writer & writer,const char_t * indent,unsigned int flags,xml_encoding encoding) const6831 PUGI__FN void xml_document::save(xml_writer& writer, const char_t* indent, unsigned int flags, xml_encoding encoding) const 6832 { 6833 impl::xml_buffered_writer buffered_writer(writer, encoding); 6834 6835 if ((flags & format_write_bom) && encoding != encoding_latin1) 6836 { 6837 // BOM always represents the codepoint U+FEFF, so just write it in native encoding 6838 #ifdef PUGIXML_WCHAR_MODE 6839 unsigned int bom = 0xfeff; 6840 buffered_writer.write(static_cast<wchar_t>(bom)); 6841 #else 6842 buffered_writer.write('\xef', '\xbb', '\xbf'); 6843 #endif 6844 } 6845 6846 if (!(flags & format_no_declaration) && !impl::has_declaration(_root)) 6847 { 6848 buffered_writer.write_string(PUGIXML_TEXT("<?xml version=\"1.0\"")); 6849 if (encoding == encoding_latin1) buffered_writer.write_string(PUGIXML_TEXT(" encoding=\"ISO-8859-1\"")); 6850 buffered_writer.write('?', '>'); 6851 if (!(flags & format_raw)) buffered_writer.write('\n'); 6852 } 6853 6854 impl::node_output(buffered_writer, _root, indent, flags, 0); 6855 6856 buffered_writer.flush(); 6857 } 6858 6859 #ifndef PUGIXML_NO_STL save(std::basic_ostream<char,std::char_traits<char>> & stream,const char_t * indent,unsigned int flags,xml_encoding encoding) const6860 PUGI__FN void xml_document::save(std::basic_ostream<char, std::char_traits<char> >& stream, const char_t* indent, unsigned int flags, xml_encoding encoding) const 6861 { 6862 xml_writer_stream writer(stream); 6863 6864 save(writer, indent, flags, encoding); 6865 } 6866 save(std::basic_ostream<wchar_t,std::char_traits<wchar_t>> & stream,const char_t * indent,unsigned int flags) const6867 PUGI__FN void xml_document::save(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream, const char_t* indent, unsigned int flags) const 6868 { 6869 xml_writer_stream writer(stream); 6870 6871 save(writer, indent, flags, encoding_wchar); 6872 } 6873 #endif 6874 save_file(const char * path_,const char_t * indent,unsigned int flags,xml_encoding encoding) const6875 PUGI__FN bool xml_document::save_file(const char* path_, const char_t* indent, unsigned int flags, xml_encoding encoding) const 6876 { 6877 using impl::auto_deleter; // MSVC7 workaround 6878 auto_deleter<FILE, int(*)(FILE*)> file(fopen(path_, (flags & format_save_file_text) ? "w" : "wb"), fclose); 6879 6880 return impl::save_file_impl(*this, file.data, indent, flags, encoding); 6881 } 6882 save_file(const wchar_t * path_,const char_t * indent,unsigned int flags,xml_encoding encoding) const6883 PUGI__FN bool xml_document::save_file(const wchar_t* path_, const char_t* indent, unsigned int flags, xml_encoding encoding) const 6884 { 6885 using impl::auto_deleter; // MSVC7 workaround 6886 auto_deleter<FILE, int(*)(FILE*)> file(impl::open_file_wide(path_, (flags & format_save_file_text) ? L"w" : L"wb"), fclose); 6887 6888 return impl::save_file_impl(*this, file.data, indent, flags, encoding); 6889 } 6890 document_element() const6891 PUGI__FN xml_node xml_document::document_element() const 6892 { 6893 assert(_root); 6894 6895 for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling) 6896 if (PUGI__NODETYPE(i) == node_element) 6897 return xml_node(i); 6898 6899 return xml_node(); 6900 } 6901 6902 #ifndef PUGIXML_NO_STL as_utf8(const wchar_t * str)6903 PUGI__FN std::string PUGIXML_FUNCTION as_utf8(const wchar_t* str) 6904 { 6905 assert(str); 6906 6907 return impl::as_utf8_impl(str, impl::strlength_wide(str)); 6908 } 6909 as_utf8(const std::basic_string<wchar_t> & str)6910 PUGI__FN std::string PUGIXML_FUNCTION as_utf8(const std::basic_string<wchar_t>& str) 6911 { 6912 return impl::as_utf8_impl(str.c_str(), str.size()); 6913 } 6914 as_wide(const char * str)6915 PUGI__FN std::basic_string<wchar_t> PUGIXML_FUNCTION as_wide(const char* str) 6916 { 6917 assert(str); 6918 6919 return impl::as_wide_impl(str, strlen(str)); 6920 } 6921 as_wide(const std::string & str)6922 PUGI__FN std::basic_string<wchar_t> PUGIXML_FUNCTION as_wide(const std::string& str) 6923 { 6924 return impl::as_wide_impl(str.c_str(), str.size()); 6925 } 6926 #endif 6927 set_memory_management_functions(allocation_function allocate,deallocation_function deallocate)6928 PUGI__FN void PUGIXML_FUNCTION set_memory_management_functions(allocation_function allocate, deallocation_function deallocate) 6929 { 6930 impl::xml_memory::allocate = allocate; 6931 impl::xml_memory::deallocate = deallocate; 6932 } 6933 get_memory_allocation_function()6934 PUGI__FN allocation_function PUGIXML_FUNCTION get_memory_allocation_function() 6935 { 6936 return impl::xml_memory::allocate; 6937 } 6938 get_memory_deallocation_function()6939 PUGI__FN deallocation_function PUGIXML_FUNCTION get_memory_deallocation_function() 6940 { 6941 return impl::xml_memory::deallocate; 6942 } 6943 } 6944 6945 #if !defined(PUGIXML_NO_STL) && (defined(_MSC_VER) || defined(__ICC)) 6946 namespace std 6947 { 6948 // Workarounds for (non-standard) iterator category detection for older versions (MSVC7/IC8 and earlier) _Iter_cat(const pugi::xml_node_iterator &)6949 PUGI__FN std::bidirectional_iterator_tag _Iter_cat(const pugi::xml_node_iterator&) 6950 { 6951 return std::bidirectional_iterator_tag(); 6952 } 6953 _Iter_cat(const pugi::xml_attribute_iterator &)6954 PUGI__FN std::bidirectional_iterator_tag _Iter_cat(const pugi::xml_attribute_iterator&) 6955 { 6956 return std::bidirectional_iterator_tag(); 6957 } 6958 _Iter_cat(const pugi::xml_named_node_iterator &)6959 PUGI__FN std::bidirectional_iterator_tag _Iter_cat(const pugi::xml_named_node_iterator&) 6960 { 6961 return std::bidirectional_iterator_tag(); 6962 } 6963 } 6964 #endif 6965 6966 #if !defined(PUGIXML_NO_STL) && defined(__SUNPRO_CC) 6967 namespace std 6968 { 6969 // Workarounds for (non-standard) iterator category detection __iterator_category(const pugi::xml_node_iterator &)6970 PUGI__FN std::bidirectional_iterator_tag __iterator_category(const pugi::xml_node_iterator&) 6971 { 6972 return std::bidirectional_iterator_tag(); 6973 } 6974 __iterator_category(const pugi::xml_attribute_iterator &)6975 PUGI__FN std::bidirectional_iterator_tag __iterator_category(const pugi::xml_attribute_iterator&) 6976 { 6977 return std::bidirectional_iterator_tag(); 6978 } 6979 __iterator_category(const pugi::xml_named_node_iterator &)6980 PUGI__FN std::bidirectional_iterator_tag __iterator_category(const pugi::xml_named_node_iterator&) 6981 { 6982 return std::bidirectional_iterator_tag(); 6983 } 6984 } 6985 #endif 6986 6987 #ifndef PUGIXML_NO_XPATH 6988 // STL replacements 6989 PUGI__NS_BEGIN 6990 struct equal_to 6991 { operator ()equal_to6992 template <typename T> bool operator()(const T& lhs, const T& rhs) const 6993 { 6994 return lhs == rhs; 6995 } 6996 }; 6997 6998 struct not_equal_to 6999 { operator ()not_equal_to7000 template <typename T> bool operator()(const T& lhs, const T& rhs) const 7001 { 7002 return lhs != rhs; 7003 } 7004 }; 7005 7006 struct less 7007 { operator ()less7008 template <typename T> bool operator()(const T& lhs, const T& rhs) const 7009 { 7010 return lhs < rhs; 7011 } 7012 }; 7013 7014 struct less_equal 7015 { operator ()less_equal7016 template <typename T> bool operator()(const T& lhs, const T& rhs) const 7017 { 7018 return lhs <= rhs; 7019 } 7020 }; 7021 swap(T & lhs,T & rhs)7022 template <typename T> void swap(T& lhs, T& rhs) 7023 { 7024 T temp = lhs; 7025 lhs = rhs; 7026 rhs = temp; 7027 } 7028 7029 template <typename I, typename Pred> I min_element(I begin, I end, const Pred& pred) 7030 { 7031 I result = begin; 7032 7033 for (I it = begin + 1; it != end; ++it) 7034 if (pred(*it, *result)) 7035 result = it; 7036 7037 return result; 7038 } 7039 reverse(I begin,I end)7040 template <typename I> void reverse(I begin, I end) 7041 { 7042 while (end - begin > 1) swap(*begin++, *--end); 7043 } 7044 unique(I begin,I end)7045 template <typename I> I unique(I begin, I end) 7046 { 7047 // fast skip head 7048 while (end - begin > 1 && *begin != *(begin + 1)) begin++; 7049 7050 if (begin == end) return begin; 7051 7052 // last written element 7053 I write = begin++; 7054 7055 // merge unique elements 7056 while (begin != end) 7057 { 7058 if (*begin != *write) 7059 *++write = *begin++; 7060 else 7061 begin++; 7062 } 7063 7064 // past-the-end (write points to live element) 7065 return write + 1; 7066 } 7067 copy_backwards(I begin,I end,I target)7068 template <typename I> void copy_backwards(I begin, I end, I target) 7069 { 7070 while (begin != end) *--target = *--end; 7071 } 7072 insertion_sort(I begin,I end,const Pred & pred,T *)7073 template <typename I, typename Pred, typename T> void insertion_sort(I begin, I end, const Pred& pred, T*) 7074 { 7075 assert(begin != end); 7076 7077 for (I it = begin + 1; it != end; ++it) 7078 { 7079 T val = *it; 7080 7081 if (pred(val, *begin)) 7082 { 7083 // move to front 7084 copy_backwards(begin, it, it + 1); 7085 *begin = val; 7086 } 7087 else 7088 { 7089 I hole = it; 7090 7091 // move hole backwards 7092 while (pred(val, *(hole - 1))) 7093 { 7094 *hole = *(hole - 1); 7095 hole--; 7096 } 7097 7098 // fill hole with element 7099 *hole = val; 7100 } 7101 } 7102 } 7103 7104 // std variant for elements with == partition(I begin,I middle,I end,const Pred & pred,I * out_eqbeg,I * out_eqend)7105 template <typename I, typename Pred> void partition(I begin, I middle, I end, const Pred& pred, I* out_eqbeg, I* out_eqend) 7106 { 7107 I eqbeg = middle, eqend = middle + 1; 7108 7109 // expand equal range 7110 while (eqbeg != begin && *(eqbeg - 1) == *eqbeg) --eqbeg; 7111 while (eqend != end && *eqend == *eqbeg) ++eqend; 7112 7113 // process outer elements 7114 I ltend = eqbeg, gtbeg = eqend; 7115 7116 for (;;) 7117 { 7118 // find the element from the right side that belongs to the left one 7119 for (; gtbeg != end; ++gtbeg) 7120 if (!pred(*eqbeg, *gtbeg)) 7121 { 7122 if (*gtbeg == *eqbeg) swap(*gtbeg, *eqend++); 7123 else break; 7124 } 7125 7126 // find the element from the left side that belongs to the right one 7127 for (; ltend != begin; --ltend) 7128 if (!pred(*(ltend - 1), *eqbeg)) 7129 { 7130 if (*eqbeg == *(ltend - 1)) swap(*(ltend - 1), *--eqbeg); 7131 else break; 7132 } 7133 7134 // scanned all elements 7135 if (gtbeg == end && ltend == begin) 7136 { 7137 *out_eqbeg = eqbeg; 7138 *out_eqend = eqend; 7139 return; 7140 } 7141 7142 // make room for elements by moving equal area 7143 if (gtbeg == end) 7144 { 7145 if (--ltend != --eqbeg) swap(*ltend, *eqbeg); 7146 swap(*eqbeg, *--eqend); 7147 } 7148 else if (ltend == begin) 7149 { 7150 if (eqend != gtbeg) swap(*eqbeg, *eqend); 7151 ++eqend; 7152 swap(*gtbeg++, *eqbeg++); 7153 } 7154 else swap(*gtbeg++, *--ltend); 7155 } 7156 } 7157 median3(I first,I middle,I last,const Pred & pred)7158 template <typename I, typename Pred> void median3(I first, I middle, I last, const Pred& pred) 7159 { 7160 if (pred(*middle, *first)) swap(*middle, *first); 7161 if (pred(*last, *middle)) swap(*last, *middle); 7162 if (pred(*middle, *first)) swap(*middle, *first); 7163 } 7164 median(I first,I middle,I last,const Pred & pred)7165 template <typename I, typename Pred> void median(I first, I middle, I last, const Pred& pred) 7166 { 7167 if (last - first <= 40) 7168 { 7169 // median of three for small chunks 7170 median3(first, middle, last, pred); 7171 } 7172 else 7173 { 7174 // median of nine 7175 size_t step = (last - first + 1) / 8; 7176 7177 median3(first, first + step, first + 2 * step, pred); 7178 median3(middle - step, middle, middle + step, pred); 7179 median3(last - 2 * step, last - step, last, pred); 7180 median3(first + step, middle, last - step, pred); 7181 } 7182 } 7183 sort(I begin,I end,const Pred & pred)7184 template <typename I, typename Pred> void sort(I begin, I end, const Pred& pred) 7185 { 7186 // sort large chunks 7187 while (end - begin > 32) 7188 { 7189 // find median element 7190 I middle = begin + (end - begin) / 2; 7191 median(begin, middle, end - 1, pred); 7192 7193 // partition in three chunks (< = >) 7194 I eqbeg, eqend; 7195 partition(begin, middle, end, pred, &eqbeg, &eqend); 7196 7197 // loop on larger half 7198 if (eqbeg - begin > end - eqend) 7199 { 7200 sort(eqend, end, pred); 7201 end = eqbeg; 7202 } 7203 else 7204 { 7205 sort(begin, eqbeg, pred); 7206 begin = eqend; 7207 } 7208 } 7209 7210 // insertion sort small chunk 7211 if (begin != end) insertion_sort(begin, end, pred, &*begin); 7212 } 7213 PUGI__NS_END 7214 7215 // Allocator used for AST and evaluation stacks 7216 PUGI__NS_BEGIN 7217 static const size_t xpath_memory_page_size = 7218 #ifdef PUGIXML_MEMORY_XPATH_PAGE_SIZE 7219 PUGIXML_MEMORY_XPATH_PAGE_SIZE 7220 #else 7221 4096 7222 #endif 7223 ; 7224 7225 static const uintptr_t xpath_memory_block_alignment = sizeof(double) > sizeof(void*) ? sizeof(double) : sizeof(void*); 7226 7227 struct xpath_memory_block 7228 { 7229 xpath_memory_block* next; 7230 size_t capacity; 7231 7232 union 7233 { 7234 char data[xpath_memory_page_size]; 7235 double alignment; 7236 }; 7237 }; 7238 7239 class xpath_allocator 7240 { 7241 xpath_memory_block* _root; 7242 size_t _root_size; 7243 7244 public: 7245 #ifdef PUGIXML_NO_EXCEPTIONS 7246 jmp_buf* error_handler; 7247 #endif 7248 xpath_allocator(xpath_memory_block * root,size_t root_size=0)7249 xpath_allocator(xpath_memory_block* root, size_t root_size = 0): _root(root), _root_size(root_size) 7250 { 7251 #ifdef PUGIXML_NO_EXCEPTIONS 7252 error_handler = 0; 7253 #endif 7254 } 7255 allocate_nothrow(size_t size)7256 void* allocate_nothrow(size_t size) 7257 { 7258 // round size up to block alignment boundary 7259 size = (size + xpath_memory_block_alignment - 1) & ~(xpath_memory_block_alignment - 1); 7260 7261 if (_root_size + size <= _root->capacity) 7262 { 7263 void* buf = &_root->data[0] + _root_size; 7264 _root_size += size; 7265 return buf; 7266 } 7267 else 7268 { 7269 // make sure we have at least 1/4th of the page free after allocation to satisfy subsequent allocation requests 7270 size_t block_capacity_base = sizeof(_root->data); 7271 size_t block_capacity_req = size + block_capacity_base / 4; 7272 size_t block_capacity = (block_capacity_base > block_capacity_req) ? block_capacity_base : block_capacity_req; 7273 7274 size_t block_size = block_capacity + offsetof(xpath_memory_block, data); 7275 7276 xpath_memory_block* block = static_cast<xpath_memory_block*>(xml_memory::allocate(block_size)); 7277 if (!block) return 0; 7278 7279 block->next = _root; 7280 block->capacity = block_capacity; 7281 7282 _root = block; 7283 _root_size = size; 7284 7285 return block->data; 7286 } 7287 } 7288 allocate(size_t size)7289 void* allocate(size_t size) 7290 { 7291 void* result = allocate_nothrow(size); 7292 7293 if (!result) 7294 { 7295 #ifdef PUGIXML_NO_EXCEPTIONS 7296 assert(error_handler); 7297 longjmp(*error_handler, 1); 7298 #else 7299 throw std::bad_alloc(); 7300 #endif 7301 } 7302 7303 return result; 7304 } 7305 reallocate(void * ptr,size_t old_size,size_t new_size)7306 void* reallocate(void* ptr, size_t old_size, size_t new_size) 7307 { 7308 // round size up to block alignment boundary 7309 old_size = (old_size + xpath_memory_block_alignment - 1) & ~(xpath_memory_block_alignment - 1); 7310 new_size = (new_size + xpath_memory_block_alignment - 1) & ~(xpath_memory_block_alignment - 1); 7311 7312 // we can only reallocate the last object 7313 assert(ptr == 0 || static_cast<char*>(ptr) + old_size == &_root->data[0] + _root_size); 7314 7315 // adjust root size so that we have not allocated the object at all 7316 bool only_object = (_root_size == old_size); 7317 7318 if (ptr) _root_size -= old_size; 7319 7320 // allocate a new version (this will obviously reuse the memory if possible) 7321 void* result = allocate(new_size); 7322 assert(result); 7323 7324 // we have a new block 7325 if (result != ptr && ptr) 7326 { 7327 // copy old data 7328 assert(new_size >= old_size); 7329 memcpy(result, ptr, old_size); 7330 7331 // free the previous page if it had no other objects 7332 if (only_object) 7333 { 7334 assert(_root->data == result); 7335 assert(_root->next); 7336 7337 xpath_memory_block* next = _root->next->next; 7338 7339 if (next) 7340 { 7341 // deallocate the whole page, unless it was the first one 7342 xml_memory::deallocate(_root->next); 7343 _root->next = next; 7344 } 7345 } 7346 } 7347 7348 return result; 7349 } 7350 revert(const xpath_allocator & state)7351 void revert(const xpath_allocator& state) 7352 { 7353 // free all new pages 7354 xpath_memory_block* cur = _root; 7355 7356 while (cur != state._root) 7357 { 7358 xpath_memory_block* next = cur->next; 7359 7360 xml_memory::deallocate(cur); 7361 7362 cur = next; 7363 } 7364 7365 // restore state 7366 _root = state._root; 7367 _root_size = state._root_size; 7368 } 7369 release()7370 void release() 7371 { 7372 xpath_memory_block* cur = _root; 7373 assert(cur); 7374 7375 while (cur->next) 7376 { 7377 xpath_memory_block* next = cur->next; 7378 7379 xml_memory::deallocate(cur); 7380 7381 cur = next; 7382 } 7383 } 7384 }; 7385 7386 struct xpath_allocator_capture 7387 { xpath_allocator_capturexpath_allocator_capture7388 xpath_allocator_capture(xpath_allocator* alloc): _target(alloc), _state(*alloc) 7389 { 7390 } 7391 ~xpath_allocator_capturexpath_allocator_capture7392 ~xpath_allocator_capture() 7393 { 7394 _target->revert(_state); 7395 } 7396 7397 xpath_allocator* _target; 7398 xpath_allocator _state; 7399 }; 7400 7401 struct xpath_stack 7402 { 7403 xpath_allocator* result; 7404 xpath_allocator* temp; 7405 }; 7406 7407 struct xpath_stack_data 7408 { 7409 xpath_memory_block blocks[2]; 7410 xpath_allocator result; 7411 xpath_allocator temp; 7412 xpath_stack stack; 7413 7414 #ifdef PUGIXML_NO_EXCEPTIONS 7415 jmp_buf error_handler; 7416 #endif 7417 xpath_stack_dataxpath_stack_data7418 xpath_stack_data(): result(blocks + 0), temp(blocks + 1) 7419 { 7420 blocks[0].next = blocks[1].next = 0; 7421 blocks[0].capacity = blocks[1].capacity = sizeof(blocks[0].data); 7422 7423 stack.result = &result; 7424 stack.temp = &temp; 7425 7426 #ifdef PUGIXML_NO_EXCEPTIONS 7427 result.error_handler = temp.error_handler = &error_handler; 7428 #endif 7429 } 7430 ~xpath_stack_dataxpath_stack_data7431 ~xpath_stack_data() 7432 { 7433 result.release(); 7434 temp.release(); 7435 } 7436 }; 7437 PUGI__NS_END 7438 7439 // String class 7440 PUGI__NS_BEGIN 7441 class xpath_string 7442 { 7443 const char_t* _buffer; 7444 bool _uses_heap; 7445 size_t _length_heap; 7446 duplicate_string(const char_t * string,size_t length,xpath_allocator * alloc)7447 static char_t* duplicate_string(const char_t* string, size_t length, xpath_allocator* alloc) 7448 { 7449 char_t* result = static_cast<char_t*>(alloc->allocate((length + 1) * sizeof(char_t))); 7450 assert(result); 7451 7452 memcpy(result, string, length * sizeof(char_t)); 7453 result[length] = 0; 7454 7455 return result; 7456 } 7457 xpath_string(const char_t * buffer,bool uses_heap_,size_t length_heap)7458 xpath_string(const char_t* buffer, bool uses_heap_, size_t length_heap): _buffer(buffer), _uses_heap(uses_heap_), _length_heap(length_heap) 7459 { 7460 } 7461 7462 public: from_const(const char_t * str)7463 static xpath_string from_const(const char_t* str) 7464 { 7465 return xpath_string(str, false, 0); 7466 } 7467 from_heap_preallocated(const char_t * begin,const char_t * end)7468 static xpath_string from_heap_preallocated(const char_t* begin, const char_t* end) 7469 { 7470 assert(begin <= end && *end == 0); 7471 7472 return xpath_string(begin, true, static_cast<size_t>(end - begin)); 7473 } 7474 from_heap(const char_t * begin,const char_t * end,xpath_allocator * alloc)7475 static xpath_string from_heap(const char_t* begin, const char_t* end, xpath_allocator* alloc) 7476 { 7477 assert(begin <= end); 7478 7479 size_t length = static_cast<size_t>(end - begin); 7480 7481 return length == 0 ? xpath_string() : xpath_string(duplicate_string(begin, length, alloc), true, length); 7482 } 7483 xpath_string()7484 xpath_string(): _buffer(PUGIXML_TEXT("")), _uses_heap(false), _length_heap(0) 7485 { 7486 } 7487 append(const xpath_string & o,xpath_allocator * alloc)7488 void append(const xpath_string& o, xpath_allocator* alloc) 7489 { 7490 // skip empty sources 7491 if (!*o._buffer) return; 7492 7493 // fast append for constant empty target and constant source 7494 if (!*_buffer && !_uses_heap && !o._uses_heap) 7495 { 7496 _buffer = o._buffer; 7497 } 7498 else 7499 { 7500 // need to make heap copy 7501 size_t target_length = length(); 7502 size_t source_length = o.length(); 7503 size_t result_length = target_length + source_length; 7504 7505 // allocate new buffer 7506 char_t* result = static_cast<char_t*>(alloc->reallocate(_uses_heap ? const_cast<char_t*>(_buffer) : 0, (target_length + 1) * sizeof(char_t), (result_length + 1) * sizeof(char_t))); 7507 assert(result); 7508 7509 // append first string to the new buffer in case there was no reallocation 7510 if (!_uses_heap) memcpy(result, _buffer, target_length * sizeof(char_t)); 7511 7512 // append second string to the new buffer 7513 memcpy(result + target_length, o._buffer, source_length * sizeof(char_t)); 7514 result[result_length] = 0; 7515 7516 // finalize 7517 _buffer = result; 7518 _uses_heap = true; 7519 _length_heap = result_length; 7520 } 7521 } 7522 c_str() const7523 const char_t* c_str() const 7524 { 7525 return _buffer; 7526 } 7527 length() const7528 size_t length() const 7529 { 7530 return _uses_heap ? _length_heap : strlength(_buffer); 7531 } 7532 data(xpath_allocator * alloc)7533 char_t* data(xpath_allocator* alloc) 7534 { 7535 // make private heap copy 7536 if (!_uses_heap) 7537 { 7538 size_t length_ = strlength(_buffer); 7539 7540 _buffer = duplicate_string(_buffer, length_, alloc); 7541 _uses_heap = true; 7542 _length_heap = length_; 7543 } 7544 7545 return const_cast<char_t*>(_buffer); 7546 } 7547 empty() const7548 bool empty() const 7549 { 7550 return *_buffer == 0; 7551 } 7552 operator ==(const xpath_string & o) const7553 bool operator==(const xpath_string& o) const 7554 { 7555 return strequal(_buffer, o._buffer); 7556 } 7557 operator !=(const xpath_string & o) const7558 bool operator!=(const xpath_string& o) const 7559 { 7560 return !strequal(_buffer, o._buffer); 7561 } 7562 uses_heap() const7563 bool uses_heap() const 7564 { 7565 return _uses_heap; 7566 } 7567 }; 7568 PUGI__NS_END 7569 7570 PUGI__NS_BEGIN starts_with(const char_t * string,const char_t * pattern)7571 PUGI__FN bool starts_with(const char_t* string, const char_t* pattern) 7572 { 7573 while (*pattern && *string == *pattern) 7574 { 7575 string++; 7576 pattern++; 7577 } 7578 7579 return *pattern == 0; 7580 } 7581 find_char(const char_t * s,char_t c)7582 PUGI__FN const char_t* find_char(const char_t* s, char_t c) 7583 { 7584 #ifdef PUGIXML_WCHAR_MODE 7585 return wcschr(s, c); 7586 #else 7587 return strchr(s, c); 7588 #endif 7589 } 7590 find_substring(const char_t * s,const char_t * p)7591 PUGI__FN const char_t* find_substring(const char_t* s, const char_t* p) 7592 { 7593 #ifdef PUGIXML_WCHAR_MODE 7594 // MSVC6 wcsstr bug workaround (if s is empty it always returns 0) 7595 return (*p == 0) ? s : wcsstr(s, p); 7596 #else 7597 return strstr(s, p); 7598 #endif 7599 } 7600 7601 // Converts symbol to lower case, if it is an ASCII one tolower_ascii(char_t ch)7602 PUGI__FN char_t tolower_ascii(char_t ch) 7603 { 7604 return static_cast<unsigned int>(ch - 'A') < 26 ? static_cast<char_t>(ch | ' ') : ch; 7605 } 7606 string_value(const xpath_node & na,xpath_allocator * alloc)7607 PUGI__FN xpath_string string_value(const xpath_node& na, xpath_allocator* alloc) 7608 { 7609 if (na.attribute()) 7610 return xpath_string::from_const(na.attribute().value()); 7611 else 7612 { 7613 xml_node n = na.node(); 7614 7615 switch (n.type()) 7616 { 7617 case node_pcdata: 7618 case node_cdata: 7619 case node_comment: 7620 case node_pi: 7621 return xpath_string::from_const(n.value()); 7622 7623 case node_document: 7624 case node_element: 7625 { 7626 xpath_string result; 7627 7628 xml_node cur = n.first_child(); 7629 7630 while (cur && cur != n) 7631 { 7632 if (cur.type() == node_pcdata || cur.type() == node_cdata) 7633 result.append(xpath_string::from_const(cur.value()), alloc); 7634 7635 if (cur.first_child()) 7636 cur = cur.first_child(); 7637 else if (cur.next_sibling()) 7638 cur = cur.next_sibling(); 7639 else 7640 { 7641 while (!cur.next_sibling() && cur != n) 7642 cur = cur.parent(); 7643 7644 if (cur != n) cur = cur.next_sibling(); 7645 } 7646 } 7647 7648 return result; 7649 } 7650 7651 default: 7652 return xpath_string(); 7653 } 7654 } 7655 } 7656 node_is_before_sibling(xml_node_struct * ln,xml_node_struct * rn)7657 PUGI__FN bool node_is_before_sibling(xml_node_struct* ln, xml_node_struct* rn) 7658 { 7659 assert(ln->parent == rn->parent); 7660 7661 // there is no common ancestor (the shared parent is null), nodes are from different documents 7662 if (!ln->parent) return ln < rn; 7663 7664 // determine sibling order 7665 xml_node_struct* ls = ln; 7666 xml_node_struct* rs = rn; 7667 7668 while (ls && rs) 7669 { 7670 if (ls == rn) return true; 7671 if (rs == ln) return false; 7672 7673 ls = ls->next_sibling; 7674 rs = rs->next_sibling; 7675 } 7676 7677 // if rn sibling chain ended ln must be before rn 7678 return !rs; 7679 } 7680 node_is_before(xml_node_struct * ln,xml_node_struct * rn)7681 PUGI__FN bool node_is_before(xml_node_struct* ln, xml_node_struct* rn) 7682 { 7683 // find common ancestor at the same depth, if any 7684 xml_node_struct* lp = ln; 7685 xml_node_struct* rp = rn; 7686 7687 while (lp && rp && lp->parent != rp->parent) 7688 { 7689 lp = lp->parent; 7690 rp = rp->parent; 7691 } 7692 7693 // parents are the same! 7694 if (lp && rp) return node_is_before_sibling(lp, rp); 7695 7696 // nodes are at different depths, need to normalize heights 7697 bool left_higher = !lp; 7698 7699 while (lp) 7700 { 7701 lp = lp->parent; 7702 ln = ln->parent; 7703 } 7704 7705 while (rp) 7706 { 7707 rp = rp->parent; 7708 rn = rn->parent; 7709 } 7710 7711 // one node is the ancestor of the other 7712 if (ln == rn) return left_higher; 7713 7714 // find common ancestor... again 7715 while (ln->parent != rn->parent) 7716 { 7717 ln = ln->parent; 7718 rn = rn->parent; 7719 } 7720 7721 return node_is_before_sibling(ln, rn); 7722 } 7723 node_is_ancestor(xml_node_struct * parent,xml_node_struct * node)7724 PUGI__FN bool node_is_ancestor(xml_node_struct* parent, xml_node_struct* node) 7725 { 7726 while (node && node != parent) node = node->parent; 7727 7728 return parent && node == parent; 7729 } 7730 document_buffer_order(const xpath_node & xnode)7731 PUGI__FN const void* document_buffer_order(const xpath_node& xnode) 7732 { 7733 xml_node_struct* node = xnode.node().internal_object(); 7734 7735 if (node) 7736 { 7737 if ((get_document(node).header & xml_memory_page_contents_shared_mask) == 0) 7738 { 7739 if (node->name && (node->header & impl::xml_memory_page_name_allocated_or_shared_mask) == 0) return node->name; 7740 if (node->value && (node->header & impl::xml_memory_page_value_allocated_or_shared_mask) == 0) return node->value; 7741 } 7742 7743 return 0; 7744 } 7745 7746 xml_attribute_struct* attr = xnode.attribute().internal_object(); 7747 7748 if (attr) 7749 { 7750 if ((get_document(attr).header & xml_memory_page_contents_shared_mask) == 0) 7751 { 7752 if ((attr->header & impl::xml_memory_page_name_allocated_or_shared_mask) == 0) return attr->name; 7753 if ((attr->header & impl::xml_memory_page_value_allocated_or_shared_mask) == 0) return attr->value; 7754 } 7755 7756 return 0; 7757 } 7758 7759 return 0; 7760 } 7761 7762 struct document_order_comparator 7763 { operator ()document_order_comparator7764 bool operator()(const xpath_node& lhs, const xpath_node& rhs) const 7765 { 7766 // optimized document order based check 7767 const void* lo = document_buffer_order(lhs); 7768 const void* ro = document_buffer_order(rhs); 7769 7770 if (lo && ro) return lo < ro; 7771 7772 // slow comparison 7773 xml_node ln = lhs.node(), rn = rhs.node(); 7774 7775 // compare attributes 7776 if (lhs.attribute() && rhs.attribute()) 7777 { 7778 // shared parent 7779 if (lhs.parent() == rhs.parent()) 7780 { 7781 // determine sibling order 7782 for (xml_attribute a = lhs.attribute(); a; a = a.next_attribute()) 7783 if (a == rhs.attribute()) 7784 return true; 7785 7786 return false; 7787 } 7788 7789 // compare attribute parents 7790 ln = lhs.parent(); 7791 rn = rhs.parent(); 7792 } 7793 else if (lhs.attribute()) 7794 { 7795 // attributes go after the parent element 7796 if (lhs.parent() == rhs.node()) return false; 7797 7798 ln = lhs.parent(); 7799 } 7800 else if (rhs.attribute()) 7801 { 7802 // attributes go after the parent element 7803 if (rhs.parent() == lhs.node()) return true; 7804 7805 rn = rhs.parent(); 7806 } 7807 7808 if (ln == rn) return false; 7809 7810 if (!ln || !rn) return ln < rn; 7811 7812 return node_is_before(ln.internal_object(), rn.internal_object()); 7813 } 7814 }; 7815 7816 struct duplicate_comparator 7817 { operator ()duplicate_comparator7818 bool operator()(const xpath_node& lhs, const xpath_node& rhs) const 7819 { 7820 if (lhs.attribute()) return rhs.attribute() ? lhs.attribute() < rhs.attribute() : true; 7821 else return rhs.attribute() ? false : lhs.node() < rhs.node(); 7822 } 7823 }; 7824 gen_nan()7825 PUGI__FN double gen_nan() 7826 { 7827 #if defined(__STDC_IEC_559__) || ((FLT_RADIX - 0 == 2) && (FLT_MAX_EXP - 0 == 128) && (FLT_MANT_DIG - 0 == 24)) 7828 union { float f; uint32_t i; } u[sizeof(float) == sizeof(uint32_t) ? 1 : -1]; 7829 u[0].i = 0x7fc00000; 7830 return u[0].f; 7831 #else 7832 // fallback 7833 const volatile double zero = 0.0; 7834 return zero / zero; 7835 #endif 7836 } 7837 is_nan(double value)7838 PUGI__FN bool is_nan(double value) 7839 { 7840 #if defined(PUGI__MSVC_CRT_VERSION) || defined(__BORLANDC__) 7841 return !!_isnan(value); 7842 #elif defined(fpclassify) && defined(FP_NAN) 7843 return fpclassify(value) == FP_NAN; 7844 #else 7845 // fallback 7846 const volatile double v = value; 7847 return v != v; 7848 #endif 7849 } 7850 convert_number_to_string_special(double value)7851 PUGI__FN const char_t* convert_number_to_string_special(double value) 7852 { 7853 #if defined(PUGI__MSVC_CRT_VERSION) || defined(__BORLANDC__) 7854 if (_finite(value)) return (value == 0) ? PUGIXML_TEXT("0") : 0; 7855 if (_isnan(value)) return PUGIXML_TEXT("NaN"); 7856 return value > 0 ? PUGIXML_TEXT("Infinity") : PUGIXML_TEXT("-Infinity"); 7857 #elif defined(fpclassify) && defined(FP_NAN) && defined(FP_INFINITE) && defined(FP_ZERO) 7858 switch (fpclassify(value)) 7859 { 7860 case FP_NAN: 7861 return PUGIXML_TEXT("NaN"); 7862 7863 case FP_INFINITE: 7864 return value > 0 ? PUGIXML_TEXT("Infinity") : PUGIXML_TEXT("-Infinity"); 7865 7866 case FP_ZERO: 7867 return PUGIXML_TEXT("0"); 7868 7869 default: 7870 return 0; 7871 } 7872 #else 7873 // fallback 7874 const volatile double v = value; 7875 7876 if (v == 0) return PUGIXML_TEXT("0"); 7877 if (v != v) return PUGIXML_TEXT("NaN"); 7878 if (v * 2 == v) return value > 0 ? PUGIXML_TEXT("Infinity") : PUGIXML_TEXT("-Infinity"); 7879 return 0; 7880 #endif 7881 } 7882 convert_number_to_boolean(double value)7883 PUGI__FN bool convert_number_to_boolean(double value) 7884 { 7885 return (value != 0 && !is_nan(value)); 7886 } 7887 truncate_zeros(char * begin,char * end)7888 PUGI__FN void truncate_zeros(char* begin, char* end) 7889 { 7890 while (begin != end && end[-1] == '0') end--; 7891 7892 *end = 0; 7893 } 7894 7895 // gets mantissa digits in the form of 0.xxxxx with 0. implied and the exponent 7896 #if defined(PUGI__MSVC_CRT_VERSION) && PUGI__MSVC_CRT_VERSION >= 1400 && !defined(_WIN32_WCE) convert_number_to_mantissa_exponent(double value,char * buffer,size_t buffer_size,char ** out_mantissa,int * out_exponent)7897 PUGI__FN void convert_number_to_mantissa_exponent(double value, char* buffer, size_t buffer_size, char** out_mantissa, int* out_exponent) 7898 { 7899 // get base values 7900 int sign, exponent; 7901 _ecvt_s(buffer, buffer_size, value, DBL_DIG + 1, &exponent, &sign); 7902 7903 // truncate redundant zeros 7904 truncate_zeros(buffer, buffer + strlen(buffer)); 7905 7906 // fill results 7907 *out_mantissa = buffer; 7908 *out_exponent = exponent; 7909 } 7910 #else convert_number_to_mantissa_exponent(double value,char * buffer,size_t buffer_size,char ** out_mantissa,int * out_exponent)7911 PUGI__FN void convert_number_to_mantissa_exponent(double value, char* buffer, size_t buffer_size, char** out_mantissa, int* out_exponent) 7912 { 7913 // get a scientific notation value with IEEE DBL_DIG decimals 7914 sprintf(buffer, "%.*e", DBL_DIG, value); 7915 assert(strlen(buffer) < buffer_size); 7916 (void)!buffer_size; 7917 7918 // get the exponent (possibly negative) 7919 char* exponent_string = strchr(buffer, 'e'); 7920 assert(exponent_string); 7921 7922 int exponent = atoi(exponent_string + 1); 7923 7924 // extract mantissa string: skip sign 7925 char* mantissa = buffer[0] == '-' ? buffer + 1 : buffer; 7926 assert(mantissa[0] != '0' && mantissa[1] == '.'); 7927 7928 // divide mantissa by 10 to eliminate integer part 7929 mantissa[1] = mantissa[0]; 7930 mantissa++; 7931 exponent++; 7932 7933 // remove extra mantissa digits and zero-terminate mantissa 7934 truncate_zeros(mantissa, exponent_string); 7935 7936 // fill results 7937 *out_mantissa = mantissa; 7938 *out_exponent = exponent; 7939 } 7940 #endif 7941 convert_number_to_string(double value,xpath_allocator * alloc)7942 PUGI__FN xpath_string convert_number_to_string(double value, xpath_allocator* alloc) 7943 { 7944 // try special number conversion 7945 const char_t* special = convert_number_to_string_special(value); 7946 if (special) return xpath_string::from_const(special); 7947 7948 // get mantissa + exponent form 7949 char mantissa_buffer[32]; 7950 7951 char* mantissa; 7952 int exponent; 7953 convert_number_to_mantissa_exponent(value, mantissa_buffer, sizeof(mantissa_buffer), &mantissa, &exponent); 7954 7955 // allocate a buffer of suitable length for the number 7956 size_t result_size = strlen(mantissa_buffer) + (exponent > 0 ? exponent : -exponent) + 4; 7957 char_t* result = static_cast<char_t*>(alloc->allocate(sizeof(char_t) * result_size)); 7958 assert(result); 7959 7960 // make the number! 7961 char_t* s = result; 7962 7963 // sign 7964 if (value < 0) *s++ = '-'; 7965 7966 // integer part 7967 if (exponent <= 0) 7968 { 7969 *s++ = '0'; 7970 } 7971 else 7972 { 7973 while (exponent > 0) 7974 { 7975 assert(*mantissa == 0 || static_cast<unsigned int>(static_cast<unsigned int>(*mantissa) - '0') <= 9); 7976 *s++ = *mantissa ? *mantissa++ : '0'; 7977 exponent--; 7978 } 7979 } 7980 7981 // fractional part 7982 if (*mantissa) 7983 { 7984 // decimal point 7985 *s++ = '.'; 7986 7987 // extra zeroes from negative exponent 7988 while (exponent < 0) 7989 { 7990 *s++ = '0'; 7991 exponent++; 7992 } 7993 7994 // extra mantissa digits 7995 while (*mantissa) 7996 { 7997 assert(static_cast<unsigned int>(*mantissa - '0') <= 9); 7998 *s++ = *mantissa++; 7999 } 8000 } 8001 8002 // zero-terminate 8003 assert(s < result + result_size); 8004 *s = 0; 8005 8006 return xpath_string::from_heap_preallocated(result, s); 8007 } 8008 check_string_to_number_format(const char_t * string)8009 PUGI__FN bool check_string_to_number_format(const char_t* string) 8010 { 8011 // parse leading whitespace 8012 while (PUGI__IS_CHARTYPE(*string, ct_space)) ++string; 8013 8014 // parse sign 8015 if (*string == '-') ++string; 8016 8017 if (!*string) return false; 8018 8019 // if there is no integer part, there should be a decimal part with at least one digit 8020 if (!PUGI__IS_CHARTYPEX(string[0], ctx_digit) && (string[0] != '.' || !PUGI__IS_CHARTYPEX(string[1], ctx_digit))) return false; 8021 8022 // parse integer part 8023 while (PUGI__IS_CHARTYPEX(*string, ctx_digit)) ++string; 8024 8025 // parse decimal part 8026 if (*string == '.') 8027 { 8028 ++string; 8029 8030 while (PUGI__IS_CHARTYPEX(*string, ctx_digit)) ++string; 8031 } 8032 8033 // parse trailing whitespace 8034 while (PUGI__IS_CHARTYPE(*string, ct_space)) ++string; 8035 8036 return *string == 0; 8037 } 8038 convert_string_to_number(const char_t * string)8039 PUGI__FN double convert_string_to_number(const char_t* string) 8040 { 8041 // check string format 8042 if (!check_string_to_number_format(string)) return gen_nan(); 8043 8044 // parse string 8045 #ifdef PUGIXML_WCHAR_MODE 8046 return wcstod(string, 0); 8047 #else 8048 return strtod(string, 0); 8049 #endif 8050 } 8051 convert_string_to_number_scratch(char_t (& buffer)[32],const char_t * begin,const char_t * end,double * out_result)8052 PUGI__FN bool convert_string_to_number_scratch(char_t (&buffer)[32], const char_t* begin, const char_t* end, double* out_result) 8053 { 8054 size_t length = static_cast<size_t>(end - begin); 8055 char_t* scratch = buffer; 8056 8057 if (length >= sizeof(buffer) / sizeof(buffer[0])) 8058 { 8059 // need to make dummy on-heap copy 8060 scratch = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t))); 8061 if (!scratch) return false; 8062 } 8063 8064 // copy string to zero-terminated buffer and perform conversion 8065 memcpy(scratch, begin, length * sizeof(char_t)); 8066 scratch[length] = 0; 8067 8068 *out_result = convert_string_to_number(scratch); 8069 8070 // free dummy buffer 8071 if (scratch != buffer) xml_memory::deallocate(scratch); 8072 8073 return true; 8074 } 8075 round_nearest(double value)8076 PUGI__FN double round_nearest(double value) 8077 { 8078 return floor(value + 0.5); 8079 } 8080 round_nearest_nzero(double value)8081 PUGI__FN double round_nearest_nzero(double value) 8082 { 8083 // same as round_nearest, but returns -0 for [-0.5, -0] 8084 // ceil is used to differentiate between +0 and -0 (we return -0 for [-0.5, -0] and +0 for +0) 8085 return (value >= -0.5 && value <= 0) ? ceil(value) : floor(value + 0.5); 8086 } 8087 qualified_name(const xpath_node & node)8088 PUGI__FN const char_t* qualified_name(const xpath_node& node) 8089 { 8090 return node.attribute() ? node.attribute().name() : node.node().name(); 8091 } 8092 local_name(const xpath_node & node)8093 PUGI__FN const char_t* local_name(const xpath_node& node) 8094 { 8095 const char_t* name = qualified_name(node); 8096 const char_t* p = find_char(name, ':'); 8097 8098 return p ? p + 1 : name; 8099 } 8100 8101 struct namespace_uri_predicate 8102 { 8103 const char_t* prefix; 8104 size_t prefix_length; 8105 namespace_uri_predicatenamespace_uri_predicate8106 namespace_uri_predicate(const char_t* name) 8107 { 8108 const char_t* pos = find_char(name, ':'); 8109 8110 prefix = pos ? name : 0; 8111 prefix_length = pos ? static_cast<size_t>(pos - name) : 0; 8112 } 8113 operator ()namespace_uri_predicate8114 bool operator()(xml_attribute a) const 8115 { 8116 const char_t* name = a.name(); 8117 8118 if (!starts_with(name, PUGIXML_TEXT("xmlns"))) return false; 8119 8120 return prefix ? name[5] == ':' && strequalrange(name + 6, prefix, prefix_length) : name[5] == 0; 8121 } 8122 }; 8123 namespace_uri(xml_node node)8124 PUGI__FN const char_t* namespace_uri(xml_node node) 8125 { 8126 namespace_uri_predicate pred = node.name(); 8127 8128 xml_node p = node; 8129 8130 while (p) 8131 { 8132 xml_attribute a = p.find_attribute(pred); 8133 8134 if (a) return a.value(); 8135 8136 p = p.parent(); 8137 } 8138 8139 return PUGIXML_TEXT(""); 8140 } 8141 namespace_uri(xml_attribute attr,xml_node parent)8142 PUGI__FN const char_t* namespace_uri(xml_attribute attr, xml_node parent) 8143 { 8144 namespace_uri_predicate pred = attr.name(); 8145 8146 // Default namespace does not apply to attributes 8147 if (!pred.prefix) return PUGIXML_TEXT(""); 8148 8149 xml_node p = parent; 8150 8151 while (p) 8152 { 8153 xml_attribute a = p.find_attribute(pred); 8154 8155 if (a) return a.value(); 8156 8157 p = p.parent(); 8158 } 8159 8160 return PUGIXML_TEXT(""); 8161 } 8162 namespace_uri(const xpath_node & node)8163 PUGI__FN const char_t* namespace_uri(const xpath_node& node) 8164 { 8165 return node.attribute() ? namespace_uri(node.attribute(), node.parent()) : namespace_uri(node.node()); 8166 } 8167 normalize_space(char_t * buffer)8168 PUGI__FN char_t* normalize_space(char_t* buffer) 8169 { 8170 char_t* write = buffer; 8171 8172 for (char_t* it = buffer; *it; ) 8173 { 8174 char_t ch = *it++; 8175 8176 if (PUGI__IS_CHARTYPE(ch, ct_space)) 8177 { 8178 // replace whitespace sequence with single space 8179 while (PUGI__IS_CHARTYPE(*it, ct_space)) it++; 8180 8181 // avoid leading spaces 8182 if (write != buffer) *write++ = ' '; 8183 } 8184 else *write++ = ch; 8185 } 8186 8187 // remove trailing space 8188 if (write != buffer && PUGI__IS_CHARTYPE(write[-1], ct_space)) write--; 8189 8190 // zero-terminate 8191 *write = 0; 8192 8193 return write; 8194 } 8195 translate(char_t * buffer,const char_t * from,const char_t * to,size_t to_length)8196 PUGI__FN char_t* translate(char_t* buffer, const char_t* from, const char_t* to, size_t to_length) 8197 { 8198 char_t* write = buffer; 8199 8200 while (*buffer) 8201 { 8202 PUGI__DMC_VOLATILE char_t ch = *buffer++; 8203 8204 const char_t* pos = find_char(from, ch); 8205 8206 if (!pos) 8207 *write++ = ch; // do not process 8208 else if (static_cast<size_t>(pos - from) < to_length) 8209 *write++ = to[pos - from]; // replace 8210 } 8211 8212 // zero-terminate 8213 *write = 0; 8214 8215 return write; 8216 } 8217 translate_table_generate(xpath_allocator * alloc,const char_t * from,const char_t * to)8218 PUGI__FN unsigned char* translate_table_generate(xpath_allocator* alloc, const char_t* from, const char_t* to) 8219 { 8220 unsigned char table[128] = {0}; 8221 8222 while (*from) 8223 { 8224 unsigned int fc = static_cast<unsigned int>(*from); 8225 unsigned int tc = static_cast<unsigned int>(*to); 8226 8227 if (fc >= 128 || tc >= 128) 8228 return 0; 8229 8230 // code=128 means "skip character" 8231 if (!table[fc]) 8232 table[fc] = static_cast<unsigned char>(tc ? tc : 128); 8233 8234 from++; 8235 if (tc) to++; 8236 } 8237 8238 for (int i = 0; i < 128; ++i) 8239 if (!table[i]) 8240 table[i] = static_cast<unsigned char>(i); 8241 8242 void* result = alloc->allocate_nothrow(sizeof(table)); 8243 8244 if (result) 8245 { 8246 memcpy(result, table, sizeof(table)); 8247 } 8248 8249 return static_cast<unsigned char*>(result); 8250 } 8251 translate_table(char_t * buffer,const unsigned char * table)8252 PUGI__FN char_t* translate_table(char_t* buffer, const unsigned char* table) 8253 { 8254 char_t* write = buffer; 8255 8256 while (*buffer) 8257 { 8258 char_t ch = *buffer++; 8259 unsigned int index = static_cast<unsigned int>(ch); 8260 8261 if (index < 128) 8262 { 8263 unsigned char code = table[index]; 8264 8265 // code=128 means "skip character" (table size is 128 so 128 can be a special value) 8266 // this code skips these characters without extra branches 8267 *write = static_cast<char_t>(code); 8268 write += 1 - (code >> 7); 8269 } 8270 else 8271 { 8272 *write++ = ch; 8273 } 8274 } 8275 8276 // zero-terminate 8277 *write = 0; 8278 8279 return write; 8280 } 8281 is_xpath_attribute(const char_t * name)8282 inline bool is_xpath_attribute(const char_t* name) 8283 { 8284 return !(starts_with(name, PUGIXML_TEXT("xmlns")) && (name[5] == 0 || name[5] == ':')); 8285 } 8286 8287 struct xpath_variable_boolean: xpath_variable 8288 { xpath_variable_booleanxpath_variable_boolean8289 xpath_variable_boolean(): xpath_variable(xpath_type_boolean), value(false) 8290 { 8291 } 8292 8293 bool value; 8294 char_t name[1]; 8295 }; 8296 8297 struct xpath_variable_number: xpath_variable 8298 { xpath_variable_numberxpath_variable_number8299 xpath_variable_number(): xpath_variable(xpath_type_number), value(0) 8300 { 8301 } 8302 8303 double value; 8304 char_t name[1]; 8305 }; 8306 8307 struct xpath_variable_string: xpath_variable 8308 { xpath_variable_stringxpath_variable_string8309 xpath_variable_string(): xpath_variable(xpath_type_string), value(0) 8310 { 8311 } 8312 ~xpath_variable_stringxpath_variable_string8313 ~xpath_variable_string() 8314 { 8315 if (value) xml_memory::deallocate(value); 8316 } 8317 8318 char_t* value; 8319 char_t name[1]; 8320 }; 8321 8322 struct xpath_variable_node_set: xpath_variable 8323 { xpath_variable_node_setxpath_variable_node_set8324 xpath_variable_node_set(): xpath_variable(xpath_type_node_set) 8325 { 8326 } 8327 8328 xpath_node_set value; 8329 char_t name[1]; 8330 }; 8331 8332 static const xpath_node_set dummy_node_set; 8333 hash_string(const char_t * str)8334 PUGI__FN unsigned int hash_string(const char_t* str) 8335 { 8336 // Jenkins one-at-a-time hash (http://en.wikipedia.org/wiki/Jenkins_hash_function#one-at-a-time) 8337 unsigned int result = 0; 8338 8339 while (*str) 8340 { 8341 result += static_cast<unsigned int>(*str++); 8342 result += result << 10; 8343 result ^= result >> 6; 8344 } 8345 8346 result += result << 3; 8347 result ^= result >> 11; 8348 result += result << 15; 8349 8350 return result; 8351 } 8352 new_xpath_variable(const char_t * name)8353 template <typename T> PUGI__FN T* new_xpath_variable(const char_t* name) 8354 { 8355 size_t length = strlength(name); 8356 if (length == 0) return 0; // empty variable names are invalid 8357 8358 // $$ we can't use offsetof(T, name) because T is non-POD, so we just allocate additional length characters 8359 void* memory = xml_memory::allocate(sizeof(T) + length * sizeof(char_t)); 8360 if (!memory) return 0; 8361 8362 T* result = new (memory) T(); 8363 8364 memcpy(result->name, name, (length + 1) * sizeof(char_t)); 8365 8366 return result; 8367 } 8368 new_xpath_variable(xpath_value_type type,const char_t * name)8369 PUGI__FN xpath_variable* new_xpath_variable(xpath_value_type type, const char_t* name) 8370 { 8371 switch (type) 8372 { 8373 case xpath_type_node_set: 8374 return new_xpath_variable<xpath_variable_node_set>(name); 8375 8376 case xpath_type_number: 8377 return new_xpath_variable<xpath_variable_number>(name); 8378 8379 case xpath_type_string: 8380 return new_xpath_variable<xpath_variable_string>(name); 8381 8382 case xpath_type_boolean: 8383 return new_xpath_variable<xpath_variable_boolean>(name); 8384 8385 default: 8386 return 0; 8387 } 8388 } 8389 delete_xpath_variable(T * var)8390 template <typename T> PUGI__FN void delete_xpath_variable(T* var) 8391 { 8392 var->~T(); 8393 xml_memory::deallocate(var); 8394 } 8395 delete_xpath_variable(xpath_value_type type,xpath_variable * var)8396 PUGI__FN void delete_xpath_variable(xpath_value_type type, xpath_variable* var) 8397 { 8398 switch (type) 8399 { 8400 case xpath_type_node_set: 8401 delete_xpath_variable(static_cast<xpath_variable_node_set*>(var)); 8402 break; 8403 8404 case xpath_type_number: 8405 delete_xpath_variable(static_cast<xpath_variable_number*>(var)); 8406 break; 8407 8408 case xpath_type_string: 8409 delete_xpath_variable(static_cast<xpath_variable_string*>(var)); 8410 break; 8411 8412 case xpath_type_boolean: 8413 delete_xpath_variable(static_cast<xpath_variable_boolean*>(var)); 8414 break; 8415 8416 default: 8417 assert(!"Invalid variable type"); 8418 } 8419 } 8420 copy_xpath_variable(xpath_variable * lhs,const xpath_variable * rhs)8421 PUGI__FN bool copy_xpath_variable(xpath_variable* lhs, const xpath_variable* rhs) 8422 { 8423 switch (rhs->type()) 8424 { 8425 case xpath_type_node_set: 8426 return lhs->set(static_cast<const xpath_variable_node_set*>(rhs)->value); 8427 8428 case xpath_type_number: 8429 return lhs->set(static_cast<const xpath_variable_number*>(rhs)->value); 8430 8431 case xpath_type_string: 8432 return lhs->set(static_cast<const xpath_variable_string*>(rhs)->value); 8433 8434 case xpath_type_boolean: 8435 return lhs->set(static_cast<const xpath_variable_boolean*>(rhs)->value); 8436 8437 default: 8438 assert(!"Invalid variable type"); 8439 return false; 8440 } 8441 } 8442 get_variable_scratch(char_t (& buffer)[32],xpath_variable_set * set,const char_t * begin,const char_t * end,xpath_variable ** out_result)8443 PUGI__FN bool get_variable_scratch(char_t (&buffer)[32], xpath_variable_set* set, const char_t* begin, const char_t* end, xpath_variable** out_result) 8444 { 8445 size_t length = static_cast<size_t>(end - begin); 8446 char_t* scratch = buffer; 8447 8448 if (length >= sizeof(buffer) / sizeof(buffer[0])) 8449 { 8450 // need to make dummy on-heap copy 8451 scratch = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t))); 8452 if (!scratch) return false; 8453 } 8454 8455 // copy string to zero-terminated buffer and perform lookup 8456 memcpy(scratch, begin, length * sizeof(char_t)); 8457 scratch[length] = 0; 8458 8459 *out_result = set->get(scratch); 8460 8461 // free dummy buffer 8462 if (scratch != buffer) xml_memory::deallocate(scratch); 8463 8464 return true; 8465 } 8466 PUGI__NS_END 8467 8468 // Internal node set class 8469 PUGI__NS_BEGIN xpath_get_order(const xpath_node * begin,const xpath_node * end)8470 PUGI__FN xpath_node_set::type_t xpath_get_order(const xpath_node* begin, const xpath_node* end) 8471 { 8472 if (end - begin < 2) 8473 return xpath_node_set::type_sorted; 8474 8475 document_order_comparator cmp; 8476 8477 bool first = cmp(begin[0], begin[1]); 8478 8479 for (const xpath_node* it = begin + 1; it + 1 < end; ++it) 8480 if (cmp(it[0], it[1]) != first) 8481 return xpath_node_set::type_unsorted; 8482 8483 return first ? xpath_node_set::type_sorted : xpath_node_set::type_sorted_reverse; 8484 } 8485 xpath_sort(xpath_node * begin,xpath_node * end,xpath_node_set::type_t type,bool rev)8486 PUGI__FN xpath_node_set::type_t xpath_sort(xpath_node* begin, xpath_node* end, xpath_node_set::type_t type, bool rev) 8487 { 8488 xpath_node_set::type_t order = rev ? xpath_node_set::type_sorted_reverse : xpath_node_set::type_sorted; 8489 8490 if (type == xpath_node_set::type_unsorted) 8491 { 8492 xpath_node_set::type_t sorted = xpath_get_order(begin, end); 8493 8494 if (sorted == xpath_node_set::type_unsorted) 8495 { 8496 sort(begin, end, document_order_comparator()); 8497 8498 type = xpath_node_set::type_sorted; 8499 } 8500 else 8501 type = sorted; 8502 } 8503 8504 if (type != order) reverse(begin, end); 8505 8506 return order; 8507 } 8508 xpath_first(const xpath_node * begin,const xpath_node * end,xpath_node_set::type_t type)8509 PUGI__FN xpath_node xpath_first(const xpath_node* begin, const xpath_node* end, xpath_node_set::type_t type) 8510 { 8511 if (begin == end) return xpath_node(); 8512 8513 switch (type) 8514 { 8515 case xpath_node_set::type_sorted: 8516 return *begin; 8517 8518 case xpath_node_set::type_sorted_reverse: 8519 return *(end - 1); 8520 8521 case xpath_node_set::type_unsorted: 8522 return *min_element(begin, end, document_order_comparator()); 8523 8524 default: 8525 assert(!"Invalid node set type"); 8526 return xpath_node(); 8527 } 8528 } 8529 8530 class xpath_node_set_raw 8531 { 8532 xpath_node_set::type_t _type; 8533 8534 xpath_node* _begin; 8535 xpath_node* _end; 8536 xpath_node* _eos; 8537 8538 public: xpath_node_set_raw()8539 xpath_node_set_raw(): _type(xpath_node_set::type_unsorted), _begin(0), _end(0), _eos(0) 8540 { 8541 } 8542 begin() const8543 xpath_node* begin() const 8544 { 8545 return _begin; 8546 } 8547 end() const8548 xpath_node* end() const 8549 { 8550 return _end; 8551 } 8552 empty() const8553 bool empty() const 8554 { 8555 return _begin == _end; 8556 } 8557 size() const8558 size_t size() const 8559 { 8560 return static_cast<size_t>(_end - _begin); 8561 } 8562 first() const8563 xpath_node first() const 8564 { 8565 return xpath_first(_begin, _end, _type); 8566 } 8567 8568 void push_back_grow(const xpath_node& node, xpath_allocator* alloc); 8569 push_back(const xpath_node & node,xpath_allocator * alloc)8570 void push_back(const xpath_node& node, xpath_allocator* alloc) 8571 { 8572 if (_end != _eos) 8573 *_end++ = node; 8574 else 8575 push_back_grow(node, alloc); 8576 } 8577 append(const xpath_node * begin_,const xpath_node * end_,xpath_allocator * alloc)8578 void append(const xpath_node* begin_, const xpath_node* end_, xpath_allocator* alloc) 8579 { 8580 if (begin_ == end_) return; 8581 8582 size_t size_ = static_cast<size_t>(_end - _begin); 8583 size_t capacity = static_cast<size_t>(_eos - _begin); 8584 size_t count = static_cast<size_t>(end_ - begin_); 8585 8586 if (size_ + count > capacity) 8587 { 8588 // reallocate the old array or allocate a new one 8589 xpath_node* data = static_cast<xpath_node*>(alloc->reallocate(_begin, capacity * sizeof(xpath_node), (size_ + count) * sizeof(xpath_node))); 8590 assert(data); 8591 8592 // finalize 8593 _begin = data; 8594 _end = data + size_; 8595 _eos = data + size_ + count; 8596 } 8597 8598 memcpy(_end, begin_, count * sizeof(xpath_node)); 8599 _end += count; 8600 } 8601 sort_do()8602 void sort_do() 8603 { 8604 _type = xpath_sort(_begin, _end, _type, false); 8605 } 8606 truncate(xpath_node * pos)8607 void truncate(xpath_node* pos) 8608 { 8609 assert(_begin <= pos && pos <= _end); 8610 8611 _end = pos; 8612 } 8613 remove_duplicates()8614 void remove_duplicates() 8615 { 8616 if (_type == xpath_node_set::type_unsorted) 8617 sort(_begin, _end, duplicate_comparator()); 8618 8619 _end = unique(_begin, _end); 8620 } 8621 type() const8622 xpath_node_set::type_t type() const 8623 { 8624 return _type; 8625 } 8626 set_type(xpath_node_set::type_t value)8627 void set_type(xpath_node_set::type_t value) 8628 { 8629 _type = value; 8630 } 8631 }; 8632 push_back_grow(const xpath_node & node,xpath_allocator * alloc)8633 PUGI__FN_NO_INLINE void xpath_node_set_raw::push_back_grow(const xpath_node& node, xpath_allocator* alloc) 8634 { 8635 size_t capacity = static_cast<size_t>(_eos - _begin); 8636 8637 // get new capacity (1.5x rule) 8638 size_t new_capacity = capacity + capacity / 2 + 1; 8639 8640 // reallocate the old array or allocate a new one 8641 xpath_node* data = static_cast<xpath_node*>(alloc->reallocate(_begin, capacity * sizeof(xpath_node), new_capacity * sizeof(xpath_node))); 8642 assert(data); 8643 8644 // finalize 8645 _begin = data; 8646 _end = data + capacity; 8647 _eos = data + new_capacity; 8648 8649 // push 8650 *_end++ = node; 8651 } 8652 PUGI__NS_END 8653 8654 PUGI__NS_BEGIN 8655 struct xpath_context 8656 { 8657 xpath_node n; 8658 size_t position, size; 8659 xpath_contextxpath_context8660 xpath_context(const xpath_node& n_, size_t position_, size_t size_): n(n_), position(position_), size(size_) 8661 { 8662 } 8663 }; 8664 8665 enum lexeme_t 8666 { 8667 lex_none = 0, 8668 lex_equal, 8669 lex_not_equal, 8670 lex_less, 8671 lex_greater, 8672 lex_less_or_equal, 8673 lex_greater_or_equal, 8674 lex_plus, 8675 lex_minus, 8676 lex_multiply, 8677 lex_union, 8678 lex_var_ref, 8679 lex_open_brace, 8680 lex_close_brace, 8681 lex_quoted_string, 8682 lex_number, 8683 lex_slash, 8684 lex_double_slash, 8685 lex_open_square_brace, 8686 lex_close_square_brace, 8687 lex_string, 8688 lex_comma, 8689 lex_axis_attribute, 8690 lex_dot, 8691 lex_double_dot, 8692 lex_double_colon, 8693 lex_eof 8694 }; 8695 8696 struct xpath_lexer_string 8697 { 8698 const char_t* begin; 8699 const char_t* end; 8700 xpath_lexer_stringxpath_lexer_string8701 xpath_lexer_string(): begin(0), end(0) 8702 { 8703 } 8704 operator ==xpath_lexer_string8705 bool operator==(const char_t* other) const 8706 { 8707 size_t length = static_cast<size_t>(end - begin); 8708 8709 return strequalrange(other, begin, length); 8710 } 8711 }; 8712 8713 class xpath_lexer 8714 { 8715 const char_t* _cur; 8716 const char_t* _cur_lexeme_pos; 8717 xpath_lexer_string _cur_lexeme_contents; 8718 8719 lexeme_t _cur_lexeme; 8720 8721 public: xpath_lexer(const char_t * query)8722 explicit xpath_lexer(const char_t* query): _cur(query) 8723 { 8724 next(); 8725 } 8726 state() const8727 const char_t* state() const 8728 { 8729 return _cur; 8730 } 8731 next()8732 void next() 8733 { 8734 const char_t* cur = _cur; 8735 8736 while (PUGI__IS_CHARTYPE(*cur, ct_space)) ++cur; 8737 8738 // save lexeme position for error reporting 8739 _cur_lexeme_pos = cur; 8740 8741 switch (*cur) 8742 { 8743 case 0: 8744 _cur_lexeme = lex_eof; 8745 break; 8746 8747 case '>': 8748 if (*(cur+1) == '=') 8749 { 8750 cur += 2; 8751 _cur_lexeme = lex_greater_or_equal; 8752 } 8753 else 8754 { 8755 cur += 1; 8756 _cur_lexeme = lex_greater; 8757 } 8758 break; 8759 8760 case '<': 8761 if (*(cur+1) == '=') 8762 { 8763 cur += 2; 8764 _cur_lexeme = lex_less_or_equal; 8765 } 8766 else 8767 { 8768 cur += 1; 8769 _cur_lexeme = lex_less; 8770 } 8771 break; 8772 8773 case '!': 8774 if (*(cur+1) == '=') 8775 { 8776 cur += 2; 8777 _cur_lexeme = lex_not_equal; 8778 } 8779 else 8780 { 8781 _cur_lexeme = lex_none; 8782 } 8783 break; 8784 8785 case '=': 8786 cur += 1; 8787 _cur_lexeme = lex_equal; 8788 8789 break; 8790 8791 case '+': 8792 cur += 1; 8793 _cur_lexeme = lex_plus; 8794 8795 break; 8796 8797 case '-': 8798 cur += 1; 8799 _cur_lexeme = lex_minus; 8800 8801 break; 8802 8803 case '*': 8804 cur += 1; 8805 _cur_lexeme = lex_multiply; 8806 8807 break; 8808 8809 case '|': 8810 cur += 1; 8811 _cur_lexeme = lex_union; 8812 8813 break; 8814 8815 case '$': 8816 cur += 1; 8817 8818 if (PUGI__IS_CHARTYPEX(*cur, ctx_start_symbol)) 8819 { 8820 _cur_lexeme_contents.begin = cur; 8821 8822 while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++; 8823 8824 if (cur[0] == ':' && PUGI__IS_CHARTYPEX(cur[1], ctx_symbol)) // qname 8825 { 8826 cur++; // : 8827 8828 while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++; 8829 } 8830 8831 _cur_lexeme_contents.end = cur; 8832 8833 _cur_lexeme = lex_var_ref; 8834 } 8835 else 8836 { 8837 _cur_lexeme = lex_none; 8838 } 8839 8840 break; 8841 8842 case '(': 8843 cur += 1; 8844 _cur_lexeme = lex_open_brace; 8845 8846 break; 8847 8848 case ')': 8849 cur += 1; 8850 _cur_lexeme = lex_close_brace; 8851 8852 break; 8853 8854 case '[': 8855 cur += 1; 8856 _cur_lexeme = lex_open_square_brace; 8857 8858 break; 8859 8860 case ']': 8861 cur += 1; 8862 _cur_lexeme = lex_close_square_brace; 8863 8864 break; 8865 8866 case ',': 8867 cur += 1; 8868 _cur_lexeme = lex_comma; 8869 8870 break; 8871 8872 case '/': 8873 if (*(cur+1) == '/') 8874 { 8875 cur += 2; 8876 _cur_lexeme = lex_double_slash; 8877 } 8878 else 8879 { 8880 cur += 1; 8881 _cur_lexeme = lex_slash; 8882 } 8883 break; 8884 8885 case '.': 8886 if (*(cur+1) == '.') 8887 { 8888 cur += 2; 8889 _cur_lexeme = lex_double_dot; 8890 } 8891 else if (PUGI__IS_CHARTYPEX(*(cur+1), ctx_digit)) 8892 { 8893 _cur_lexeme_contents.begin = cur; // . 8894 8895 ++cur; 8896 8897 while (PUGI__IS_CHARTYPEX(*cur, ctx_digit)) cur++; 8898 8899 _cur_lexeme_contents.end = cur; 8900 8901 _cur_lexeme = lex_number; 8902 } 8903 else 8904 { 8905 cur += 1; 8906 _cur_lexeme = lex_dot; 8907 } 8908 break; 8909 8910 case '@': 8911 cur += 1; 8912 _cur_lexeme = lex_axis_attribute; 8913 8914 break; 8915 8916 case '"': 8917 case '\'': 8918 { 8919 char_t terminator = *cur; 8920 8921 ++cur; 8922 8923 _cur_lexeme_contents.begin = cur; 8924 while (*cur && *cur != terminator) cur++; 8925 _cur_lexeme_contents.end = cur; 8926 8927 if (!*cur) 8928 _cur_lexeme = lex_none; 8929 else 8930 { 8931 cur += 1; 8932 _cur_lexeme = lex_quoted_string; 8933 } 8934 8935 break; 8936 } 8937 8938 case ':': 8939 if (*(cur+1) == ':') 8940 { 8941 cur += 2; 8942 _cur_lexeme = lex_double_colon; 8943 } 8944 else 8945 { 8946 _cur_lexeme = lex_none; 8947 } 8948 break; 8949 8950 default: 8951 if (PUGI__IS_CHARTYPEX(*cur, ctx_digit)) 8952 { 8953 _cur_lexeme_contents.begin = cur; 8954 8955 while (PUGI__IS_CHARTYPEX(*cur, ctx_digit)) cur++; 8956 8957 if (*cur == '.') 8958 { 8959 cur++; 8960 8961 while (PUGI__IS_CHARTYPEX(*cur, ctx_digit)) cur++; 8962 } 8963 8964 _cur_lexeme_contents.end = cur; 8965 8966 _cur_lexeme = lex_number; 8967 } 8968 else if (PUGI__IS_CHARTYPEX(*cur, ctx_start_symbol)) 8969 { 8970 _cur_lexeme_contents.begin = cur; 8971 8972 while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++; 8973 8974 if (cur[0] == ':') 8975 { 8976 if (cur[1] == '*') // namespace test ncname:* 8977 { 8978 cur += 2; // :* 8979 } 8980 else if (PUGI__IS_CHARTYPEX(cur[1], ctx_symbol)) // namespace test qname 8981 { 8982 cur++; // : 8983 8984 while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++; 8985 } 8986 } 8987 8988 _cur_lexeme_contents.end = cur; 8989 8990 _cur_lexeme = lex_string; 8991 } 8992 else 8993 { 8994 _cur_lexeme = lex_none; 8995 } 8996 } 8997 8998 _cur = cur; 8999 } 9000 current() const9001 lexeme_t current() const 9002 { 9003 return _cur_lexeme; 9004 } 9005 current_pos() const9006 const char_t* current_pos() const 9007 { 9008 return _cur_lexeme_pos; 9009 } 9010 contents() const9011 const xpath_lexer_string& contents() const 9012 { 9013 assert(_cur_lexeme == lex_var_ref || _cur_lexeme == lex_number || _cur_lexeme == lex_string || _cur_lexeme == lex_quoted_string); 9014 9015 return _cur_lexeme_contents; 9016 } 9017 }; 9018 9019 enum ast_type_t 9020 { 9021 ast_unknown, 9022 ast_op_or, // left or right 9023 ast_op_and, // left and right 9024 ast_op_equal, // left = right 9025 ast_op_not_equal, // left != right 9026 ast_op_less, // left < right 9027 ast_op_greater, // left > right 9028 ast_op_less_or_equal, // left <= right 9029 ast_op_greater_or_equal, // left >= right 9030 ast_op_add, // left + right 9031 ast_op_subtract, // left - right 9032 ast_op_multiply, // left * right 9033 ast_op_divide, // left / right 9034 ast_op_mod, // left % right 9035 ast_op_negate, // left - right 9036 ast_op_union, // left | right 9037 ast_predicate, // apply predicate to set; next points to next predicate 9038 ast_filter, // select * from left where right 9039 ast_string_constant, // string constant 9040 ast_number_constant, // number constant 9041 ast_variable, // variable 9042 ast_func_last, // last() 9043 ast_func_position, // position() 9044 ast_func_count, // count(left) 9045 ast_func_id, // id(left) 9046 ast_func_local_name_0, // local-name() 9047 ast_func_local_name_1, // local-name(left) 9048 ast_func_namespace_uri_0, // namespace-uri() 9049 ast_func_namespace_uri_1, // namespace-uri(left) 9050 ast_func_name_0, // name() 9051 ast_func_name_1, // name(left) 9052 ast_func_string_0, // string() 9053 ast_func_string_1, // string(left) 9054 ast_func_concat, // concat(left, right, siblings) 9055 ast_func_starts_with, // starts_with(left, right) 9056 ast_func_contains, // contains(left, right) 9057 ast_func_substring_before, // substring-before(left, right) 9058 ast_func_substring_after, // substring-after(left, right) 9059 ast_func_substring_2, // substring(left, right) 9060 ast_func_substring_3, // substring(left, right, third) 9061 ast_func_string_length_0, // string-length() 9062 ast_func_string_length_1, // string-length(left) 9063 ast_func_normalize_space_0, // normalize-space() 9064 ast_func_normalize_space_1, // normalize-space(left) 9065 ast_func_translate, // translate(left, right, third) 9066 ast_func_boolean, // boolean(left) 9067 ast_func_not, // not(left) 9068 ast_func_true, // true() 9069 ast_func_false, // false() 9070 ast_func_lang, // lang(left) 9071 ast_func_number_0, // number() 9072 ast_func_number_1, // number(left) 9073 ast_func_sum, // sum(left) 9074 ast_func_floor, // floor(left) 9075 ast_func_ceiling, // ceiling(left) 9076 ast_func_round, // round(left) 9077 ast_step, // process set left with step 9078 ast_step_root, // select root node 9079 9080 ast_opt_translate_table, // translate(left, right, third) where right/third are constants 9081 ast_opt_compare_attribute // @name = 'string' 9082 }; 9083 9084 enum axis_t 9085 { 9086 axis_ancestor, 9087 axis_ancestor_or_self, 9088 axis_attribute, 9089 axis_child, 9090 axis_descendant, 9091 axis_descendant_or_self, 9092 axis_following, 9093 axis_following_sibling, 9094 axis_namespace, 9095 axis_parent, 9096 axis_preceding, 9097 axis_preceding_sibling, 9098 axis_self 9099 }; 9100 9101 enum nodetest_t 9102 { 9103 nodetest_none, 9104 nodetest_name, 9105 nodetest_type_node, 9106 nodetest_type_comment, 9107 nodetest_type_pi, 9108 nodetest_type_text, 9109 nodetest_pi, 9110 nodetest_all, 9111 nodetest_all_in_namespace 9112 }; 9113 9114 enum predicate_t 9115 { 9116 predicate_default, 9117 predicate_posinv, 9118 predicate_constant, 9119 predicate_constant_one 9120 }; 9121 9122 enum nodeset_eval_t 9123 { 9124 nodeset_eval_all, 9125 nodeset_eval_any, 9126 nodeset_eval_first 9127 }; 9128 9129 template <axis_t N> struct axis_to_type 9130 { 9131 static const axis_t axis; 9132 }; 9133 9134 template <axis_t N> const axis_t axis_to_type<N>::axis = N; 9135 9136 class xpath_ast_node 9137 { 9138 private: 9139 // node type 9140 char _type; 9141 char _rettype; 9142 9143 // for ast_step 9144 char _axis; 9145 9146 // for ast_step/ast_predicate/ast_filter 9147 char _test; 9148 9149 // tree node structure 9150 xpath_ast_node* _left; 9151 xpath_ast_node* _right; 9152 xpath_ast_node* _next; 9153 9154 union 9155 { 9156 // value for ast_string_constant 9157 const char_t* string; 9158 // value for ast_number_constant 9159 double number; 9160 // variable for ast_variable 9161 xpath_variable* variable; 9162 // node test for ast_step (node name/namespace/node type/pi target) 9163 const char_t* nodetest; 9164 // table for ast_opt_translate_table 9165 const unsigned char* table; 9166 } _data; 9167 9168 xpath_ast_node(const xpath_ast_node&); 9169 xpath_ast_node& operator=(const xpath_ast_node&); 9170 compare_eq(xpath_ast_node * lhs,xpath_ast_node * rhs,const xpath_context & c,const xpath_stack & stack,const Comp & comp)9171 template <class Comp> static bool compare_eq(xpath_ast_node* lhs, xpath_ast_node* rhs, const xpath_context& c, const xpath_stack& stack, const Comp& comp) 9172 { 9173 xpath_value_type lt = lhs->rettype(), rt = rhs->rettype(); 9174 9175 if (lt != xpath_type_node_set && rt != xpath_type_node_set) 9176 { 9177 if (lt == xpath_type_boolean || rt == xpath_type_boolean) 9178 return comp(lhs->eval_boolean(c, stack), rhs->eval_boolean(c, stack)); 9179 else if (lt == xpath_type_number || rt == xpath_type_number) 9180 return comp(lhs->eval_number(c, stack), rhs->eval_number(c, stack)); 9181 else if (lt == xpath_type_string || rt == xpath_type_string) 9182 { 9183 xpath_allocator_capture cr(stack.result); 9184 9185 xpath_string ls = lhs->eval_string(c, stack); 9186 xpath_string rs = rhs->eval_string(c, stack); 9187 9188 return comp(ls, rs); 9189 } 9190 } 9191 else if (lt == xpath_type_node_set && rt == xpath_type_node_set) 9192 { 9193 xpath_allocator_capture cr(stack.result); 9194 9195 xpath_node_set_raw ls = lhs->eval_node_set(c, stack, nodeset_eval_all); 9196 xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all); 9197 9198 for (const xpath_node* li = ls.begin(); li != ls.end(); ++li) 9199 for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri) 9200 { 9201 xpath_allocator_capture cri(stack.result); 9202 9203 if (comp(string_value(*li, stack.result), string_value(*ri, stack.result))) 9204 return true; 9205 } 9206 9207 return false; 9208 } 9209 else 9210 { 9211 if (lt == xpath_type_node_set) 9212 { 9213 swap(lhs, rhs); 9214 swap(lt, rt); 9215 } 9216 9217 if (lt == xpath_type_boolean) 9218 return comp(lhs->eval_boolean(c, stack), rhs->eval_boolean(c, stack)); 9219 else if (lt == xpath_type_number) 9220 { 9221 xpath_allocator_capture cr(stack.result); 9222 9223 double l = lhs->eval_number(c, stack); 9224 xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all); 9225 9226 for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri) 9227 { 9228 xpath_allocator_capture cri(stack.result); 9229 9230 if (comp(l, convert_string_to_number(string_value(*ri, stack.result).c_str()))) 9231 return true; 9232 } 9233 9234 return false; 9235 } 9236 else if (lt == xpath_type_string) 9237 { 9238 xpath_allocator_capture cr(stack.result); 9239 9240 xpath_string l = lhs->eval_string(c, stack); 9241 xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all); 9242 9243 for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri) 9244 { 9245 xpath_allocator_capture cri(stack.result); 9246 9247 if (comp(l, string_value(*ri, stack.result))) 9248 return true; 9249 } 9250 9251 return false; 9252 } 9253 } 9254 9255 assert(!"Wrong types"); 9256 return false; 9257 } 9258 eval_once(xpath_node_set::type_t type,nodeset_eval_t eval)9259 static bool eval_once(xpath_node_set::type_t type, nodeset_eval_t eval) 9260 { 9261 return type == xpath_node_set::type_sorted ? eval != nodeset_eval_all : eval == nodeset_eval_any; 9262 } 9263 compare_rel(xpath_ast_node * lhs,xpath_ast_node * rhs,const xpath_context & c,const xpath_stack & stack,const Comp & comp)9264 template <class Comp> static bool compare_rel(xpath_ast_node* lhs, xpath_ast_node* rhs, const xpath_context& c, const xpath_stack& stack, const Comp& comp) 9265 { 9266 xpath_value_type lt = lhs->rettype(), rt = rhs->rettype(); 9267 9268 if (lt != xpath_type_node_set && rt != xpath_type_node_set) 9269 return comp(lhs->eval_number(c, stack), rhs->eval_number(c, stack)); 9270 else if (lt == xpath_type_node_set && rt == xpath_type_node_set) 9271 { 9272 xpath_allocator_capture cr(stack.result); 9273 9274 xpath_node_set_raw ls = lhs->eval_node_set(c, stack, nodeset_eval_all); 9275 xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all); 9276 9277 for (const xpath_node* li = ls.begin(); li != ls.end(); ++li) 9278 { 9279 xpath_allocator_capture cri(stack.result); 9280 9281 double l = convert_string_to_number(string_value(*li, stack.result).c_str()); 9282 9283 for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri) 9284 { 9285 xpath_allocator_capture crii(stack.result); 9286 9287 if (comp(l, convert_string_to_number(string_value(*ri, stack.result).c_str()))) 9288 return true; 9289 } 9290 } 9291 9292 return false; 9293 } 9294 else if (lt != xpath_type_node_set && rt == xpath_type_node_set) 9295 { 9296 xpath_allocator_capture cr(stack.result); 9297 9298 double l = lhs->eval_number(c, stack); 9299 xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all); 9300 9301 for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri) 9302 { 9303 xpath_allocator_capture cri(stack.result); 9304 9305 if (comp(l, convert_string_to_number(string_value(*ri, stack.result).c_str()))) 9306 return true; 9307 } 9308 9309 return false; 9310 } 9311 else if (lt == xpath_type_node_set && rt != xpath_type_node_set) 9312 { 9313 xpath_allocator_capture cr(stack.result); 9314 9315 xpath_node_set_raw ls = lhs->eval_node_set(c, stack, nodeset_eval_all); 9316 double r = rhs->eval_number(c, stack); 9317 9318 for (const xpath_node* li = ls.begin(); li != ls.end(); ++li) 9319 { 9320 xpath_allocator_capture cri(stack.result); 9321 9322 if (comp(convert_string_to_number(string_value(*li, stack.result).c_str()), r)) 9323 return true; 9324 } 9325 9326 return false; 9327 } 9328 else 9329 { 9330 assert(!"Wrong types"); 9331 return false; 9332 } 9333 } 9334 apply_predicate_boolean(xpath_node_set_raw & ns,size_t first,xpath_ast_node * expr,const xpath_stack & stack,bool once)9335 static void apply_predicate_boolean(xpath_node_set_raw& ns, size_t first, xpath_ast_node* expr, const xpath_stack& stack, bool once) 9336 { 9337 assert(ns.size() >= first); 9338 assert(expr->rettype() != xpath_type_number); 9339 9340 size_t i = 1; 9341 size_t size = ns.size() - first; 9342 9343 xpath_node* last = ns.begin() + first; 9344 9345 // remove_if... or well, sort of 9346 for (xpath_node* it = last; it != ns.end(); ++it, ++i) 9347 { 9348 xpath_context c(*it, i, size); 9349 9350 if (expr->eval_boolean(c, stack)) 9351 { 9352 *last++ = *it; 9353 9354 if (once) break; 9355 } 9356 } 9357 9358 ns.truncate(last); 9359 } 9360 apply_predicate_number(xpath_node_set_raw & ns,size_t first,xpath_ast_node * expr,const xpath_stack & stack,bool once)9361 static void apply_predicate_number(xpath_node_set_raw& ns, size_t first, xpath_ast_node* expr, const xpath_stack& stack, bool once) 9362 { 9363 assert(ns.size() >= first); 9364 assert(expr->rettype() == xpath_type_number); 9365 9366 size_t i = 1; 9367 size_t size = ns.size() - first; 9368 9369 xpath_node* last = ns.begin() + first; 9370 9371 // remove_if... or well, sort of 9372 for (xpath_node* it = last; it != ns.end(); ++it, ++i) 9373 { 9374 xpath_context c(*it, i, size); 9375 9376 if (expr->eval_number(c, stack) == i) 9377 { 9378 *last++ = *it; 9379 9380 if (once) break; 9381 } 9382 } 9383 9384 ns.truncate(last); 9385 } 9386 apply_predicate_number_const(xpath_node_set_raw & ns,size_t first,xpath_ast_node * expr,const xpath_stack & stack)9387 static void apply_predicate_number_const(xpath_node_set_raw& ns, size_t first, xpath_ast_node* expr, const xpath_stack& stack) 9388 { 9389 assert(ns.size() >= first); 9390 assert(expr->rettype() == xpath_type_number); 9391 9392 size_t size = ns.size() - first; 9393 9394 xpath_node* last = ns.begin() + first; 9395 9396 xpath_context c(xpath_node(), 1, size); 9397 9398 double er = expr->eval_number(c, stack); 9399 9400 if (er >= 1.0 && er <= size) 9401 { 9402 size_t eri = static_cast<size_t>(er); 9403 9404 if (er == eri) 9405 { 9406 xpath_node r = last[eri - 1]; 9407 9408 *last++ = r; 9409 } 9410 } 9411 9412 ns.truncate(last); 9413 } 9414 apply_predicate(xpath_node_set_raw & ns,size_t first,const xpath_stack & stack,bool once)9415 void apply_predicate(xpath_node_set_raw& ns, size_t first, const xpath_stack& stack, bool once) 9416 { 9417 if (ns.size() == first) return; 9418 9419 assert(_type == ast_filter || _type == ast_predicate); 9420 9421 if (_test == predicate_constant || _test == predicate_constant_one) 9422 apply_predicate_number_const(ns, first, _right, stack); 9423 else if (_right->rettype() == xpath_type_number) 9424 apply_predicate_number(ns, first, _right, stack, once); 9425 else 9426 apply_predicate_boolean(ns, first, _right, stack, once); 9427 } 9428 apply_predicates(xpath_node_set_raw & ns,size_t first,const xpath_stack & stack,nodeset_eval_t eval)9429 void apply_predicates(xpath_node_set_raw& ns, size_t first, const xpath_stack& stack, nodeset_eval_t eval) 9430 { 9431 if (ns.size() == first) return; 9432 9433 bool last_once = eval_once(ns.type(), eval); 9434 9435 for (xpath_ast_node* pred = _right; pred; pred = pred->_next) 9436 pred->apply_predicate(ns, first, stack, !pred->_next && last_once); 9437 } 9438 step_push(xpath_node_set_raw & ns,xml_attribute_struct * a,xml_node_struct * parent,xpath_allocator * alloc)9439 bool step_push(xpath_node_set_raw& ns, xml_attribute_struct* a, xml_node_struct* parent, xpath_allocator* alloc) 9440 { 9441 assert(a); 9442 9443 const char_t* name = a->name ? a->name + 0 : PUGIXML_TEXT(""); 9444 9445 switch (_test) 9446 { 9447 case nodetest_name: 9448 if (strequal(name, _data.nodetest) && is_xpath_attribute(name)) 9449 { 9450 ns.push_back(xpath_node(xml_attribute(a), xml_node(parent)), alloc); 9451 return true; 9452 } 9453 break; 9454 9455 case nodetest_type_node: 9456 case nodetest_all: 9457 if (is_xpath_attribute(name)) 9458 { 9459 ns.push_back(xpath_node(xml_attribute(a), xml_node(parent)), alloc); 9460 return true; 9461 } 9462 break; 9463 9464 case nodetest_all_in_namespace: 9465 if (starts_with(name, _data.nodetest) && is_xpath_attribute(name)) 9466 { 9467 ns.push_back(xpath_node(xml_attribute(a), xml_node(parent)), alloc); 9468 return true; 9469 } 9470 break; 9471 9472 default: 9473 ; 9474 } 9475 9476 return false; 9477 } 9478 step_push(xpath_node_set_raw & ns,xml_node_struct * n,xpath_allocator * alloc)9479 bool step_push(xpath_node_set_raw& ns, xml_node_struct* n, xpath_allocator* alloc) 9480 { 9481 assert(n); 9482 9483 xml_node_type type = PUGI__NODETYPE(n); 9484 9485 switch (_test) 9486 { 9487 case nodetest_name: 9488 if (type == node_element && n->name && strequal(n->name, _data.nodetest)) 9489 { 9490 ns.push_back(xml_node(n), alloc); 9491 return true; 9492 } 9493 break; 9494 9495 case nodetest_type_node: 9496 ns.push_back(xml_node(n), alloc); 9497 return true; 9498 9499 case nodetest_type_comment: 9500 if (type == node_comment) 9501 { 9502 ns.push_back(xml_node(n), alloc); 9503 return true; 9504 } 9505 break; 9506 9507 case nodetest_type_text: 9508 if (type == node_pcdata || type == node_cdata) 9509 { 9510 ns.push_back(xml_node(n), alloc); 9511 return true; 9512 } 9513 break; 9514 9515 case nodetest_type_pi: 9516 if (type == node_pi) 9517 { 9518 ns.push_back(xml_node(n), alloc); 9519 return true; 9520 } 9521 break; 9522 9523 case nodetest_pi: 9524 if (type == node_pi && n->name && strequal(n->name, _data.nodetest)) 9525 { 9526 ns.push_back(xml_node(n), alloc); 9527 return true; 9528 } 9529 break; 9530 9531 case nodetest_all: 9532 if (type == node_element) 9533 { 9534 ns.push_back(xml_node(n), alloc); 9535 return true; 9536 } 9537 break; 9538 9539 case nodetest_all_in_namespace: 9540 if (type == node_element && n->name && starts_with(n->name, _data.nodetest)) 9541 { 9542 ns.push_back(xml_node(n), alloc); 9543 return true; 9544 } 9545 break; 9546 9547 default: 9548 assert(!"Unknown axis"); 9549 } 9550 9551 return false; 9552 } 9553 step_fill(xpath_node_set_raw & ns,xml_node_struct * n,xpath_allocator * alloc,bool once,T)9554 template <class T> void step_fill(xpath_node_set_raw& ns, xml_node_struct* n, xpath_allocator* alloc, bool once, T) 9555 { 9556 const axis_t axis = T::axis; 9557 9558 switch (axis) 9559 { 9560 case axis_attribute: 9561 { 9562 for (xml_attribute_struct* a = n->first_attribute; a; a = a->next_attribute) 9563 if (step_push(ns, a, n, alloc) & once) 9564 return; 9565 9566 break; 9567 } 9568 9569 case axis_child: 9570 { 9571 for (xml_node_struct* c = n->first_child; c; c = c->next_sibling) 9572 if (step_push(ns, c, alloc) & once) 9573 return; 9574 9575 break; 9576 } 9577 9578 case axis_descendant: 9579 case axis_descendant_or_self: 9580 { 9581 if (axis == axis_descendant_or_self) 9582 if (step_push(ns, n, alloc) & once) 9583 return; 9584 9585 xml_node_struct* cur = n->first_child; 9586 9587 while (cur) 9588 { 9589 if (step_push(ns, cur, alloc) & once) 9590 return; 9591 9592 if (cur->first_child) 9593 cur = cur->first_child; 9594 else 9595 { 9596 while (!cur->next_sibling) 9597 { 9598 cur = cur->parent; 9599 9600 if (cur == n) return; 9601 } 9602 9603 cur = cur->next_sibling; 9604 } 9605 } 9606 9607 break; 9608 } 9609 9610 case axis_following_sibling: 9611 { 9612 for (xml_node_struct* c = n->next_sibling; c; c = c->next_sibling) 9613 if (step_push(ns, c, alloc) & once) 9614 return; 9615 9616 break; 9617 } 9618 9619 case axis_preceding_sibling: 9620 { 9621 for (xml_node_struct* c = n->prev_sibling_c; c->next_sibling; c = c->prev_sibling_c) 9622 if (step_push(ns, c, alloc) & once) 9623 return; 9624 9625 break; 9626 } 9627 9628 case axis_following: 9629 { 9630 xml_node_struct* cur = n; 9631 9632 // exit from this node so that we don't include descendants 9633 while (!cur->next_sibling) 9634 { 9635 cur = cur->parent; 9636 9637 if (!cur) return; 9638 } 9639 9640 cur = cur->next_sibling; 9641 9642 while (cur) 9643 { 9644 if (step_push(ns, cur, alloc) & once) 9645 return; 9646 9647 if (cur->first_child) 9648 cur = cur->first_child; 9649 else 9650 { 9651 while (!cur->next_sibling) 9652 { 9653 cur = cur->parent; 9654 9655 if (!cur) return; 9656 } 9657 9658 cur = cur->next_sibling; 9659 } 9660 } 9661 9662 break; 9663 } 9664 9665 case axis_preceding: 9666 { 9667 xml_node_struct* cur = n; 9668 9669 // exit from this node so that we don't include descendants 9670 while (!cur->prev_sibling_c->next_sibling) 9671 { 9672 cur = cur->parent; 9673 9674 if (!cur) return; 9675 } 9676 9677 cur = cur->prev_sibling_c; 9678 9679 while (cur) 9680 { 9681 if (cur->first_child) 9682 cur = cur->first_child->prev_sibling_c; 9683 else 9684 { 9685 // leaf node, can't be ancestor 9686 if (step_push(ns, cur, alloc) & once) 9687 return; 9688 9689 while (!cur->prev_sibling_c->next_sibling) 9690 { 9691 cur = cur->parent; 9692 9693 if (!cur) return; 9694 9695 if (!node_is_ancestor(cur, n)) 9696 if (step_push(ns, cur, alloc) & once) 9697 return; 9698 } 9699 9700 cur = cur->prev_sibling_c; 9701 } 9702 } 9703 9704 break; 9705 } 9706 9707 case axis_ancestor: 9708 case axis_ancestor_or_self: 9709 { 9710 if (axis == axis_ancestor_or_self) 9711 if (step_push(ns, n, alloc) & once) 9712 return; 9713 9714 xml_node_struct* cur = n->parent; 9715 9716 while (cur) 9717 { 9718 if (step_push(ns, cur, alloc) & once) 9719 return; 9720 9721 cur = cur->parent; 9722 } 9723 9724 break; 9725 } 9726 9727 case axis_self: 9728 { 9729 step_push(ns, n, alloc); 9730 9731 break; 9732 } 9733 9734 case axis_parent: 9735 { 9736 if (n->parent) 9737 step_push(ns, n->parent, alloc); 9738 9739 break; 9740 } 9741 9742 default: 9743 assert(!"Unimplemented axis"); 9744 } 9745 } 9746 step_fill(xpath_node_set_raw & ns,xml_attribute_struct * a,xml_node_struct * p,xpath_allocator * alloc,bool once,T v)9747 template <class T> void step_fill(xpath_node_set_raw& ns, xml_attribute_struct* a, xml_node_struct* p, xpath_allocator* alloc, bool once, T v) 9748 { 9749 const axis_t axis = T::axis; 9750 9751 switch (axis) 9752 { 9753 case axis_ancestor: 9754 case axis_ancestor_or_self: 9755 { 9756 if (axis == axis_ancestor_or_self && _test == nodetest_type_node) // reject attributes based on principal node type test 9757 if (step_push(ns, a, p, alloc) & once) 9758 return; 9759 9760 xml_node_struct* cur = p; 9761 9762 while (cur) 9763 { 9764 if (step_push(ns, cur, alloc) & once) 9765 return; 9766 9767 cur = cur->parent; 9768 } 9769 9770 break; 9771 } 9772 9773 case axis_descendant_or_self: 9774 case axis_self: 9775 { 9776 if (_test == nodetest_type_node) // reject attributes based on principal node type test 9777 step_push(ns, a, p, alloc); 9778 9779 break; 9780 } 9781 9782 case axis_following: 9783 { 9784 xml_node_struct* cur = p; 9785 9786 while (cur) 9787 { 9788 if (cur->first_child) 9789 cur = cur->first_child; 9790 else 9791 { 9792 while (!cur->next_sibling) 9793 { 9794 cur = cur->parent; 9795 9796 if (!cur) return; 9797 } 9798 9799 cur = cur->next_sibling; 9800 } 9801 9802 if (step_push(ns, cur, alloc) & once) 9803 return; 9804 } 9805 9806 break; 9807 } 9808 9809 case axis_parent: 9810 { 9811 step_push(ns, p, alloc); 9812 9813 break; 9814 } 9815 9816 case axis_preceding: 9817 { 9818 // preceding:: axis does not include attribute nodes and attribute ancestors (they are the same as parent's ancestors), so we can reuse node preceding 9819 step_fill(ns, p, alloc, once, v); 9820 break; 9821 } 9822 9823 default: 9824 assert(!"Unimplemented axis"); 9825 } 9826 } 9827 step_fill(xpath_node_set_raw & ns,const xpath_node & xn,xpath_allocator * alloc,bool once,T v)9828 template <class T> void step_fill(xpath_node_set_raw& ns, const xpath_node& xn, xpath_allocator* alloc, bool once, T v) 9829 { 9830 const axis_t axis = T::axis; 9831 const bool axis_has_attributes = (axis == axis_ancestor || axis == axis_ancestor_or_self || axis == axis_descendant_or_self || axis == axis_following || axis == axis_parent || axis == axis_preceding || axis == axis_self); 9832 9833 if (xn.node()) 9834 step_fill(ns, xn.node().internal_object(), alloc, once, v); 9835 else if (axis_has_attributes && xn.attribute() && xn.parent()) 9836 step_fill(ns, xn.attribute().internal_object(), xn.parent().internal_object(), alloc, once, v); 9837 } 9838 step_do(const xpath_context & c,const xpath_stack & stack,nodeset_eval_t eval,T v)9839 template <class T> xpath_node_set_raw step_do(const xpath_context& c, const xpath_stack& stack, nodeset_eval_t eval, T v) 9840 { 9841 const axis_t axis = T::axis; 9842 const bool axis_reverse = (axis == axis_ancestor || axis == axis_ancestor_or_self || axis == axis_preceding || axis == axis_preceding_sibling); 9843 const xpath_node_set::type_t axis_type = axis_reverse ? xpath_node_set::type_sorted_reverse : xpath_node_set::type_sorted; 9844 9845 bool once = 9846 (axis == axis_attribute && _test == nodetest_name) || 9847 (!_right && eval_once(axis_type, eval)) || 9848 (_right && !_right->_next && _right->_test == predicate_constant_one); 9849 9850 xpath_node_set_raw ns; 9851 ns.set_type(axis_type); 9852 9853 if (_left) 9854 { 9855 xpath_node_set_raw s = _left->eval_node_set(c, stack, nodeset_eval_all); 9856 9857 // self axis preserves the original order 9858 if (axis == axis_self) ns.set_type(s.type()); 9859 9860 for (const xpath_node* it = s.begin(); it != s.end(); ++it) 9861 { 9862 size_t size = ns.size(); 9863 9864 // in general, all axes generate elements in a particular order, but there is no order guarantee if axis is applied to two nodes 9865 if (axis != axis_self && size != 0) ns.set_type(xpath_node_set::type_unsorted); 9866 9867 step_fill(ns, *it, stack.result, once, v); 9868 if (_right) apply_predicates(ns, size, stack, eval); 9869 } 9870 } 9871 else 9872 { 9873 step_fill(ns, c.n, stack.result, once, v); 9874 if (_right) apply_predicates(ns, 0, stack, eval); 9875 } 9876 9877 // child, attribute and self axes always generate unique set of nodes 9878 // for other axis, if the set stayed sorted, it stayed unique because the traversal algorithms do not visit the same node twice 9879 if (axis != axis_child && axis != axis_attribute && axis != axis_self && ns.type() == xpath_node_set::type_unsorted) 9880 ns.remove_duplicates(); 9881 9882 return ns; 9883 } 9884 9885 public: xpath_ast_node(ast_type_t type,xpath_value_type rettype_,const char_t * value)9886 xpath_ast_node(ast_type_t type, xpath_value_type rettype_, const char_t* value): 9887 _type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(0), _right(0), _next(0) 9888 { 9889 assert(type == ast_string_constant); 9890 _data.string = value; 9891 } 9892 xpath_ast_node(ast_type_t type,xpath_value_type rettype_,double value)9893 xpath_ast_node(ast_type_t type, xpath_value_type rettype_, double value): 9894 _type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(0), _right(0), _next(0) 9895 { 9896 assert(type == ast_number_constant); 9897 _data.number = value; 9898 } 9899 xpath_ast_node(ast_type_t type,xpath_value_type rettype_,xpath_variable * value)9900 xpath_ast_node(ast_type_t type, xpath_value_type rettype_, xpath_variable* value): 9901 _type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(0), _right(0), _next(0) 9902 { 9903 assert(type == ast_variable); 9904 _data.variable = value; 9905 } 9906 xpath_ast_node(ast_type_t type,xpath_value_type rettype_,xpath_ast_node * left=0,xpath_ast_node * right=0)9907 xpath_ast_node(ast_type_t type, xpath_value_type rettype_, xpath_ast_node* left = 0, xpath_ast_node* right = 0): 9908 _type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(left), _right(right), _next(0) 9909 { 9910 } 9911 xpath_ast_node(ast_type_t type,xpath_ast_node * left,axis_t axis,nodetest_t test,const char_t * contents)9912 xpath_ast_node(ast_type_t type, xpath_ast_node* left, axis_t axis, nodetest_t test, const char_t* contents): 9913 _type(static_cast<char>(type)), _rettype(xpath_type_node_set), _axis(static_cast<char>(axis)), _test(static_cast<char>(test)), _left(left), _right(0), _next(0) 9914 { 9915 assert(type == ast_step); 9916 _data.nodetest = contents; 9917 } 9918 xpath_ast_node(ast_type_t type,xpath_ast_node * left,xpath_ast_node * right,predicate_t test)9919 xpath_ast_node(ast_type_t type, xpath_ast_node* left, xpath_ast_node* right, predicate_t test): 9920 _type(static_cast<char>(type)), _rettype(xpath_type_node_set), _axis(0), _test(static_cast<char>(test)), _left(left), _right(right), _next(0) 9921 { 9922 assert(type == ast_filter || type == ast_predicate); 9923 } 9924 set_next(xpath_ast_node * value)9925 void set_next(xpath_ast_node* value) 9926 { 9927 _next = value; 9928 } 9929 set_right(xpath_ast_node * value)9930 void set_right(xpath_ast_node* value) 9931 { 9932 _right = value; 9933 } 9934 eval_boolean(const xpath_context & c,const xpath_stack & stack)9935 bool eval_boolean(const xpath_context& c, const xpath_stack& stack) 9936 { 9937 switch (_type) 9938 { 9939 case ast_op_or: 9940 return _left->eval_boolean(c, stack) || _right->eval_boolean(c, stack); 9941 9942 case ast_op_and: 9943 return _left->eval_boolean(c, stack) && _right->eval_boolean(c, stack); 9944 9945 case ast_op_equal: 9946 return compare_eq(_left, _right, c, stack, equal_to()); 9947 9948 case ast_op_not_equal: 9949 return compare_eq(_left, _right, c, stack, not_equal_to()); 9950 9951 case ast_op_less: 9952 return compare_rel(_left, _right, c, stack, less()); 9953 9954 case ast_op_greater: 9955 return compare_rel(_right, _left, c, stack, less()); 9956 9957 case ast_op_less_or_equal: 9958 return compare_rel(_left, _right, c, stack, less_equal()); 9959 9960 case ast_op_greater_or_equal: 9961 return compare_rel(_right, _left, c, stack, less_equal()); 9962 9963 case ast_func_starts_with: 9964 { 9965 xpath_allocator_capture cr(stack.result); 9966 9967 xpath_string lr = _left->eval_string(c, stack); 9968 xpath_string rr = _right->eval_string(c, stack); 9969 9970 return starts_with(lr.c_str(), rr.c_str()); 9971 } 9972 9973 case ast_func_contains: 9974 { 9975 xpath_allocator_capture cr(stack.result); 9976 9977 xpath_string lr = _left->eval_string(c, stack); 9978 xpath_string rr = _right->eval_string(c, stack); 9979 9980 return find_substring(lr.c_str(), rr.c_str()) != 0; 9981 } 9982 9983 case ast_func_boolean: 9984 return _left->eval_boolean(c, stack); 9985 9986 case ast_func_not: 9987 return !_left->eval_boolean(c, stack); 9988 9989 case ast_func_true: 9990 return true; 9991 9992 case ast_func_false: 9993 return false; 9994 9995 case ast_func_lang: 9996 { 9997 if (c.n.attribute()) return false; 9998 9999 xpath_allocator_capture cr(stack.result); 10000 10001 xpath_string lang = _left->eval_string(c, stack); 10002 10003 for (xml_node n = c.n.node(); n; n = n.parent()) 10004 { 10005 xml_attribute a = n.attribute(PUGIXML_TEXT("xml:lang")); 10006 10007 if (a) 10008 { 10009 const char_t* value = a.value(); 10010 10011 // strnicmp / strncasecmp is not portable 10012 for (const char_t* lit = lang.c_str(); *lit; ++lit) 10013 { 10014 if (tolower_ascii(*lit) != tolower_ascii(*value)) return false; 10015 ++value; 10016 } 10017 10018 return *value == 0 || *value == '-'; 10019 } 10020 } 10021 10022 return false; 10023 } 10024 10025 case ast_opt_compare_attribute: 10026 { 10027 const char_t* value = (_right->_type == ast_string_constant) ? _right->_data.string : _right->_data.variable->get_string(); 10028 10029 xml_attribute attr = c.n.node().attribute(_left->_data.nodetest); 10030 10031 return attr && strequal(attr.value(), value) && is_xpath_attribute(attr.name()); 10032 } 10033 10034 case ast_variable: 10035 { 10036 assert(_rettype == _data.variable->type()); 10037 10038 if (_rettype == xpath_type_boolean) 10039 return _data.variable->get_boolean(); 10040 10041 // fallthrough to type conversion 10042 } 10043 10044 default: 10045 { 10046 switch (_rettype) 10047 { 10048 case xpath_type_number: 10049 return convert_number_to_boolean(eval_number(c, stack)); 10050 10051 case xpath_type_string: 10052 { 10053 xpath_allocator_capture cr(stack.result); 10054 10055 return !eval_string(c, stack).empty(); 10056 } 10057 10058 case xpath_type_node_set: 10059 { 10060 xpath_allocator_capture cr(stack.result); 10061 10062 return !eval_node_set(c, stack, nodeset_eval_any).empty(); 10063 } 10064 10065 default: 10066 assert(!"Wrong expression for return type boolean"); 10067 return false; 10068 } 10069 } 10070 } 10071 } 10072 eval_number(const xpath_context & c,const xpath_stack & stack)10073 double eval_number(const xpath_context& c, const xpath_stack& stack) 10074 { 10075 switch (_type) 10076 { 10077 case ast_op_add: 10078 return _left->eval_number(c, stack) + _right->eval_number(c, stack); 10079 10080 case ast_op_subtract: 10081 return _left->eval_number(c, stack) - _right->eval_number(c, stack); 10082 10083 case ast_op_multiply: 10084 return _left->eval_number(c, stack) * _right->eval_number(c, stack); 10085 10086 case ast_op_divide: 10087 return _left->eval_number(c, stack) / _right->eval_number(c, stack); 10088 10089 case ast_op_mod: 10090 return fmod(_left->eval_number(c, stack), _right->eval_number(c, stack)); 10091 10092 case ast_op_negate: 10093 return -_left->eval_number(c, stack); 10094 10095 case ast_number_constant: 10096 return _data.number; 10097 10098 case ast_func_last: 10099 return static_cast<double>(c.size); 10100 10101 case ast_func_position: 10102 return static_cast<double>(c.position); 10103 10104 case ast_func_count: 10105 { 10106 xpath_allocator_capture cr(stack.result); 10107 10108 return static_cast<double>(_left->eval_node_set(c, stack, nodeset_eval_all).size()); 10109 } 10110 10111 case ast_func_string_length_0: 10112 { 10113 xpath_allocator_capture cr(stack.result); 10114 10115 return static_cast<double>(string_value(c.n, stack.result).length()); 10116 } 10117 10118 case ast_func_string_length_1: 10119 { 10120 xpath_allocator_capture cr(stack.result); 10121 10122 return static_cast<double>(_left->eval_string(c, stack).length()); 10123 } 10124 10125 case ast_func_number_0: 10126 { 10127 xpath_allocator_capture cr(stack.result); 10128 10129 return convert_string_to_number(string_value(c.n, stack.result).c_str()); 10130 } 10131 10132 case ast_func_number_1: 10133 return _left->eval_number(c, stack); 10134 10135 case ast_func_sum: 10136 { 10137 xpath_allocator_capture cr(stack.result); 10138 10139 double r = 0; 10140 10141 xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_all); 10142 10143 for (const xpath_node* it = ns.begin(); it != ns.end(); ++it) 10144 { 10145 xpath_allocator_capture cri(stack.result); 10146 10147 r += convert_string_to_number(string_value(*it, stack.result).c_str()); 10148 } 10149 10150 return r; 10151 } 10152 10153 case ast_func_floor: 10154 { 10155 double r = _left->eval_number(c, stack); 10156 10157 return r == r ? floor(r) : r; 10158 } 10159 10160 case ast_func_ceiling: 10161 { 10162 double r = _left->eval_number(c, stack); 10163 10164 return r == r ? ceil(r) : r; 10165 } 10166 10167 case ast_func_round: 10168 return round_nearest_nzero(_left->eval_number(c, stack)); 10169 10170 case ast_variable: 10171 { 10172 assert(_rettype == _data.variable->type()); 10173 10174 if (_rettype == xpath_type_number) 10175 return _data.variable->get_number(); 10176 10177 // fallthrough to type conversion 10178 } 10179 10180 default: 10181 { 10182 switch (_rettype) 10183 { 10184 case xpath_type_boolean: 10185 return eval_boolean(c, stack) ? 1 : 0; 10186 10187 case xpath_type_string: 10188 { 10189 xpath_allocator_capture cr(stack.result); 10190 10191 return convert_string_to_number(eval_string(c, stack).c_str()); 10192 } 10193 10194 case xpath_type_node_set: 10195 { 10196 xpath_allocator_capture cr(stack.result); 10197 10198 return convert_string_to_number(eval_string(c, stack).c_str()); 10199 } 10200 10201 default: 10202 assert(!"Wrong expression for return type number"); 10203 return 0; 10204 } 10205 10206 } 10207 } 10208 } 10209 eval_string_concat(const xpath_context & c,const xpath_stack & stack)10210 xpath_string eval_string_concat(const xpath_context& c, const xpath_stack& stack) 10211 { 10212 assert(_type == ast_func_concat); 10213 10214 xpath_allocator_capture ct(stack.temp); 10215 10216 // count the string number 10217 size_t count = 1; 10218 for (xpath_ast_node* nc = _right; nc; nc = nc->_next) count++; 10219 10220 // gather all strings 10221 xpath_string static_buffer[4]; 10222 xpath_string* buffer = static_buffer; 10223 10224 // allocate on-heap for large concats 10225 if (count > sizeof(static_buffer) / sizeof(static_buffer[0])) 10226 { 10227 buffer = static_cast<xpath_string*>(stack.temp->allocate(count * sizeof(xpath_string))); 10228 assert(buffer); 10229 } 10230 10231 // evaluate all strings to temporary stack 10232 xpath_stack swapped_stack = {stack.temp, stack.result}; 10233 10234 buffer[0] = _left->eval_string(c, swapped_stack); 10235 10236 size_t pos = 1; 10237 for (xpath_ast_node* n = _right; n; n = n->_next, ++pos) buffer[pos] = n->eval_string(c, swapped_stack); 10238 assert(pos == count); 10239 10240 // get total length 10241 size_t length = 0; 10242 for (size_t i = 0; i < count; ++i) length += buffer[i].length(); 10243 10244 // create final string 10245 char_t* result = static_cast<char_t*>(stack.result->allocate((length + 1) * sizeof(char_t))); 10246 assert(result); 10247 10248 char_t* ri = result; 10249 10250 for (size_t j = 0; j < count; ++j) 10251 for (const char_t* bi = buffer[j].c_str(); *bi; ++bi) 10252 *ri++ = *bi; 10253 10254 *ri = 0; 10255 10256 return xpath_string::from_heap_preallocated(result, ri); 10257 } 10258 eval_string(const xpath_context & c,const xpath_stack & stack)10259 xpath_string eval_string(const xpath_context& c, const xpath_stack& stack) 10260 { 10261 switch (_type) 10262 { 10263 case ast_string_constant: 10264 return xpath_string::from_const(_data.string); 10265 10266 case ast_func_local_name_0: 10267 { 10268 xpath_node na = c.n; 10269 10270 return xpath_string::from_const(local_name(na)); 10271 } 10272 10273 case ast_func_local_name_1: 10274 { 10275 xpath_allocator_capture cr(stack.result); 10276 10277 xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_first); 10278 xpath_node na = ns.first(); 10279 10280 return xpath_string::from_const(local_name(na)); 10281 } 10282 10283 case ast_func_name_0: 10284 { 10285 xpath_node na = c.n; 10286 10287 return xpath_string::from_const(qualified_name(na)); 10288 } 10289 10290 case ast_func_name_1: 10291 { 10292 xpath_allocator_capture cr(stack.result); 10293 10294 xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_first); 10295 xpath_node na = ns.first(); 10296 10297 return xpath_string::from_const(qualified_name(na)); 10298 } 10299 10300 case ast_func_namespace_uri_0: 10301 { 10302 xpath_node na = c.n; 10303 10304 return xpath_string::from_const(namespace_uri(na)); 10305 } 10306 10307 case ast_func_namespace_uri_1: 10308 { 10309 xpath_allocator_capture cr(stack.result); 10310 10311 xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_first); 10312 xpath_node na = ns.first(); 10313 10314 return xpath_string::from_const(namespace_uri(na)); 10315 } 10316 10317 case ast_func_string_0: 10318 return string_value(c.n, stack.result); 10319 10320 case ast_func_string_1: 10321 return _left->eval_string(c, stack); 10322 10323 case ast_func_concat: 10324 return eval_string_concat(c, stack); 10325 10326 case ast_func_substring_before: 10327 { 10328 xpath_allocator_capture cr(stack.temp); 10329 10330 xpath_stack swapped_stack = {stack.temp, stack.result}; 10331 10332 xpath_string s = _left->eval_string(c, swapped_stack); 10333 xpath_string p = _right->eval_string(c, swapped_stack); 10334 10335 const char_t* pos = find_substring(s.c_str(), p.c_str()); 10336 10337 return pos ? xpath_string::from_heap(s.c_str(), pos, stack.result) : xpath_string(); 10338 } 10339 10340 case ast_func_substring_after: 10341 { 10342 xpath_allocator_capture cr(stack.temp); 10343 10344 xpath_stack swapped_stack = {stack.temp, stack.result}; 10345 10346 xpath_string s = _left->eval_string(c, swapped_stack); 10347 xpath_string p = _right->eval_string(c, swapped_stack); 10348 10349 const char_t* pos = find_substring(s.c_str(), p.c_str()); 10350 if (!pos) return xpath_string(); 10351 10352 const char_t* rbegin = pos + p.length(); 10353 const char_t* rend = s.c_str() + s.length(); 10354 10355 return s.uses_heap() ? xpath_string::from_heap(rbegin, rend, stack.result) : xpath_string::from_const(rbegin); 10356 } 10357 10358 case ast_func_substring_2: 10359 { 10360 xpath_allocator_capture cr(stack.temp); 10361 10362 xpath_stack swapped_stack = {stack.temp, stack.result}; 10363 10364 xpath_string s = _left->eval_string(c, swapped_stack); 10365 size_t s_length = s.length(); 10366 10367 double first = round_nearest(_right->eval_number(c, stack)); 10368 10369 if (is_nan(first)) return xpath_string(); // NaN 10370 else if (first >= s_length + 1) return xpath_string(); 10371 10372 size_t pos = first < 1 ? 1 : static_cast<size_t>(first); 10373 assert(1 <= pos && pos <= s_length + 1); 10374 10375 const char_t* rbegin = s.c_str() + (pos - 1); 10376 const char_t* rend = s.c_str() + s.length(); 10377 10378 return s.uses_heap() ? xpath_string::from_heap(rbegin, rend, stack.result) : xpath_string::from_const(rbegin); 10379 } 10380 10381 case ast_func_substring_3: 10382 { 10383 xpath_allocator_capture cr(stack.temp); 10384 10385 xpath_stack swapped_stack = {stack.temp, stack.result}; 10386 10387 xpath_string s = _left->eval_string(c, swapped_stack); 10388 size_t s_length = s.length(); 10389 10390 double first = round_nearest(_right->eval_number(c, stack)); 10391 double last = first + round_nearest(_right->_next->eval_number(c, stack)); 10392 10393 if (is_nan(first) || is_nan(last)) return xpath_string(); 10394 else if (first >= s_length + 1) return xpath_string(); 10395 else if (first >= last) return xpath_string(); 10396 else if (last < 1) return xpath_string(); 10397 10398 size_t pos = first < 1 ? 1 : static_cast<size_t>(first); 10399 size_t end = last >= s_length + 1 ? s_length + 1 : static_cast<size_t>(last); 10400 10401 assert(1 <= pos && pos <= end && end <= s_length + 1); 10402 const char_t* rbegin = s.c_str() + (pos - 1); 10403 const char_t* rend = s.c_str() + (end - 1); 10404 10405 return (end == s_length + 1 && !s.uses_heap()) ? xpath_string::from_const(rbegin) : xpath_string::from_heap(rbegin, rend, stack.result); 10406 } 10407 10408 case ast_func_normalize_space_0: 10409 { 10410 xpath_string s = string_value(c.n, stack.result); 10411 10412 char_t* begin = s.data(stack.result); 10413 char_t* end = normalize_space(begin); 10414 10415 return xpath_string::from_heap_preallocated(begin, end); 10416 } 10417 10418 case ast_func_normalize_space_1: 10419 { 10420 xpath_string s = _left->eval_string(c, stack); 10421 10422 char_t* begin = s.data(stack.result); 10423 char_t* end = normalize_space(begin); 10424 10425 return xpath_string::from_heap_preallocated(begin, end); 10426 } 10427 10428 case ast_func_translate: 10429 { 10430 xpath_allocator_capture cr(stack.temp); 10431 10432 xpath_stack swapped_stack = {stack.temp, stack.result}; 10433 10434 xpath_string s = _left->eval_string(c, stack); 10435 xpath_string from = _right->eval_string(c, swapped_stack); 10436 xpath_string to = _right->_next->eval_string(c, swapped_stack); 10437 10438 char_t* begin = s.data(stack.result); 10439 char_t* end = translate(begin, from.c_str(), to.c_str(), to.length()); 10440 10441 return xpath_string::from_heap_preallocated(begin, end); 10442 } 10443 10444 case ast_opt_translate_table: 10445 { 10446 xpath_string s = _left->eval_string(c, stack); 10447 10448 char_t* begin = s.data(stack.result); 10449 char_t* end = translate_table(begin, _data.table); 10450 10451 return xpath_string::from_heap_preallocated(begin, end); 10452 } 10453 10454 case ast_variable: 10455 { 10456 assert(_rettype == _data.variable->type()); 10457 10458 if (_rettype == xpath_type_string) 10459 return xpath_string::from_const(_data.variable->get_string()); 10460 10461 // fallthrough to type conversion 10462 } 10463 10464 default: 10465 { 10466 switch (_rettype) 10467 { 10468 case xpath_type_boolean: 10469 return xpath_string::from_const(eval_boolean(c, stack) ? PUGIXML_TEXT("true") : PUGIXML_TEXT("false")); 10470 10471 case xpath_type_number: 10472 return convert_number_to_string(eval_number(c, stack), stack.result); 10473 10474 case xpath_type_node_set: 10475 { 10476 xpath_allocator_capture cr(stack.temp); 10477 10478 xpath_stack swapped_stack = {stack.temp, stack.result}; 10479 10480 xpath_node_set_raw ns = eval_node_set(c, swapped_stack, nodeset_eval_first); 10481 return ns.empty() ? xpath_string() : string_value(ns.first(), stack.result); 10482 } 10483 10484 default: 10485 assert(!"Wrong expression for return type string"); 10486 return xpath_string(); 10487 } 10488 } 10489 } 10490 } 10491 eval_node_set(const xpath_context & c,const xpath_stack & stack,nodeset_eval_t eval)10492 xpath_node_set_raw eval_node_set(const xpath_context& c, const xpath_stack& stack, nodeset_eval_t eval) 10493 { 10494 switch (_type) 10495 { 10496 case ast_op_union: 10497 { 10498 xpath_allocator_capture cr(stack.temp); 10499 10500 xpath_stack swapped_stack = {stack.temp, stack.result}; 10501 10502 xpath_node_set_raw ls = _left->eval_node_set(c, swapped_stack, eval); 10503 xpath_node_set_raw rs = _right->eval_node_set(c, stack, eval); 10504 10505 // we can optimize merging two sorted sets, but this is a very rare operation, so don't bother 10506 rs.set_type(xpath_node_set::type_unsorted); 10507 10508 rs.append(ls.begin(), ls.end(), stack.result); 10509 rs.remove_duplicates(); 10510 10511 return rs; 10512 } 10513 10514 case ast_filter: 10515 { 10516 xpath_node_set_raw set = _left->eval_node_set(c, stack, _test == predicate_constant_one ? nodeset_eval_first : nodeset_eval_all); 10517 10518 // either expression is a number or it contains position() call; sort by document order 10519 if (_test != predicate_posinv) set.sort_do(); 10520 10521 bool once = eval_once(set.type(), eval); 10522 10523 apply_predicate(set, 0, stack, once); 10524 10525 return set; 10526 } 10527 10528 case ast_func_id: 10529 return xpath_node_set_raw(); 10530 10531 case ast_step: 10532 { 10533 switch (_axis) 10534 { 10535 case axis_ancestor: 10536 return step_do(c, stack, eval, axis_to_type<axis_ancestor>()); 10537 10538 case axis_ancestor_or_self: 10539 return step_do(c, stack, eval, axis_to_type<axis_ancestor_or_self>()); 10540 10541 case axis_attribute: 10542 return step_do(c, stack, eval, axis_to_type<axis_attribute>()); 10543 10544 case axis_child: 10545 return step_do(c, stack, eval, axis_to_type<axis_child>()); 10546 10547 case axis_descendant: 10548 return step_do(c, stack, eval, axis_to_type<axis_descendant>()); 10549 10550 case axis_descendant_or_self: 10551 return step_do(c, stack, eval, axis_to_type<axis_descendant_or_self>()); 10552 10553 case axis_following: 10554 return step_do(c, stack, eval, axis_to_type<axis_following>()); 10555 10556 case axis_following_sibling: 10557 return step_do(c, stack, eval, axis_to_type<axis_following_sibling>()); 10558 10559 case axis_namespace: 10560 // namespaced axis is not supported 10561 return xpath_node_set_raw(); 10562 10563 case axis_parent: 10564 return step_do(c, stack, eval, axis_to_type<axis_parent>()); 10565 10566 case axis_preceding: 10567 return step_do(c, stack, eval, axis_to_type<axis_preceding>()); 10568 10569 case axis_preceding_sibling: 10570 return step_do(c, stack, eval, axis_to_type<axis_preceding_sibling>()); 10571 10572 case axis_self: 10573 return step_do(c, stack, eval, axis_to_type<axis_self>()); 10574 10575 default: 10576 assert(!"Unknown axis"); 10577 return xpath_node_set_raw(); 10578 } 10579 } 10580 10581 case ast_step_root: 10582 { 10583 assert(!_right); // root step can't have any predicates 10584 10585 xpath_node_set_raw ns; 10586 10587 ns.set_type(xpath_node_set::type_sorted); 10588 10589 if (c.n.node()) ns.push_back(c.n.node().root(), stack.result); 10590 else if (c.n.attribute()) ns.push_back(c.n.parent().root(), stack.result); 10591 10592 return ns; 10593 } 10594 10595 case ast_variable: 10596 { 10597 assert(_rettype == _data.variable->type()); 10598 10599 if (_rettype == xpath_type_node_set) 10600 { 10601 const xpath_node_set& s = _data.variable->get_node_set(); 10602 10603 xpath_node_set_raw ns; 10604 10605 ns.set_type(s.type()); 10606 ns.append(s.begin(), s.end(), stack.result); 10607 10608 return ns; 10609 } 10610 10611 // fallthrough to type conversion 10612 } 10613 10614 default: 10615 assert(!"Wrong expression for return type node set"); 10616 return xpath_node_set_raw(); 10617 } 10618 } 10619 optimize(xpath_allocator * alloc)10620 void optimize(xpath_allocator* alloc) 10621 { 10622 if (_left) _left->optimize(alloc); 10623 if (_right) _right->optimize(alloc); 10624 if (_next) _next->optimize(alloc); 10625 10626 optimize_self(alloc); 10627 } 10628 optimize_self(xpath_allocator * alloc)10629 void optimize_self(xpath_allocator* alloc) 10630 { 10631 // Rewrite [position()=expr] with [expr] 10632 // Note that this step has to go before classification to recognize [position()=1] 10633 if ((_type == ast_filter || _type == ast_predicate) && 10634 _right->_type == ast_op_equal && _right->_left->_type == ast_func_position && _right->_right->_rettype == xpath_type_number) 10635 { 10636 _right = _right->_right; 10637 } 10638 10639 // Classify filter/predicate ops to perform various optimizations during evaluation 10640 if (_type == ast_filter || _type == ast_predicate) 10641 { 10642 assert(_test == predicate_default); 10643 10644 if (_right->_type == ast_number_constant && _right->_data.number == 1.0) 10645 _test = predicate_constant_one; 10646 else if (_right->_rettype == xpath_type_number && (_right->_type == ast_number_constant || _right->_type == ast_variable || _right->_type == ast_func_last)) 10647 _test = predicate_constant; 10648 else if (_right->_rettype != xpath_type_number && _right->is_posinv_expr()) 10649 _test = predicate_posinv; 10650 } 10651 10652 // Rewrite descendant-or-self::node()/child::foo with descendant::foo 10653 // The former is a full form of //foo, the latter is much faster since it executes the node test immediately 10654 // Do a similar kind of rewrite for self/descendant/descendant-or-self axes 10655 // Note that we only rewrite positionally invariant steps (//foo[1] != /descendant::foo[1]) 10656 if (_type == ast_step && (_axis == axis_child || _axis == axis_self || _axis == axis_descendant || _axis == axis_descendant_or_self) && _left && 10657 _left->_type == ast_step && _left->_axis == axis_descendant_or_self && _left->_test == nodetest_type_node && !_left->_right && 10658 is_posinv_step()) 10659 { 10660 if (_axis == axis_child || _axis == axis_descendant) 10661 _axis = axis_descendant; 10662 else 10663 _axis = axis_descendant_or_self; 10664 10665 _left = _left->_left; 10666 } 10667 10668 // Use optimized lookup table implementation for translate() with constant arguments 10669 if (_type == ast_func_translate && _right->_type == ast_string_constant && _right->_next->_type == ast_string_constant) 10670 { 10671 unsigned char* table = translate_table_generate(alloc, _right->_data.string, _right->_next->_data.string); 10672 10673 if (table) 10674 { 10675 _type = ast_opt_translate_table; 10676 _data.table = table; 10677 } 10678 } 10679 10680 // Use optimized path for @attr = 'value' or @attr = $value 10681 if (_type == ast_op_equal && 10682 _left->_type == ast_step && _left->_axis == axis_attribute && _left->_test == nodetest_name && !_left->_left && !_left->_right && 10683 (_right->_type == ast_string_constant || (_right->_type == ast_variable && _right->_rettype == xpath_type_string))) 10684 { 10685 _type = ast_opt_compare_attribute; 10686 } 10687 } 10688 is_posinv_expr() const10689 bool is_posinv_expr() const 10690 { 10691 switch (_type) 10692 { 10693 case ast_func_position: 10694 case ast_func_last: 10695 return false; 10696 10697 case ast_string_constant: 10698 case ast_number_constant: 10699 case ast_variable: 10700 return true; 10701 10702 case ast_step: 10703 case ast_step_root: 10704 return true; 10705 10706 case ast_predicate: 10707 case ast_filter: 10708 return true; 10709 10710 default: 10711 if (_left && !_left->is_posinv_expr()) return false; 10712 10713 for (xpath_ast_node* n = _right; n; n = n->_next) 10714 if (!n->is_posinv_expr()) return false; 10715 10716 return true; 10717 } 10718 } 10719 is_posinv_step() const10720 bool is_posinv_step() const 10721 { 10722 assert(_type == ast_step); 10723 10724 for (xpath_ast_node* n = _right; n; n = n->_next) 10725 { 10726 assert(n->_type == ast_predicate); 10727 10728 if (n->_test != predicate_posinv) 10729 return false; 10730 } 10731 10732 return true; 10733 } 10734 rettype() const10735 xpath_value_type rettype() const 10736 { 10737 return static_cast<xpath_value_type>(_rettype); 10738 } 10739 }; 10740 10741 struct xpath_parser 10742 { 10743 xpath_allocator* _alloc; 10744 xpath_lexer _lexer; 10745 10746 const char_t* _query; 10747 xpath_variable_set* _variables; 10748 10749 xpath_parse_result* _result; 10750 10751 char_t _scratch[32]; 10752 10753 #ifdef PUGIXML_NO_EXCEPTIONS 10754 jmp_buf _error_handler; 10755 #endif 10756 throw_errorxpath_parser10757 void throw_error(const char* message) 10758 { 10759 _result->error = message; 10760 _result->offset = _lexer.current_pos() - _query; 10761 10762 #ifdef PUGIXML_NO_EXCEPTIONS 10763 longjmp(_error_handler, 1); 10764 #else 10765 throw xpath_exception(*_result); 10766 #endif 10767 } 10768 throw_error_oomxpath_parser10769 void throw_error_oom() 10770 { 10771 #ifdef PUGIXML_NO_EXCEPTIONS 10772 throw_error("Out of memory"); 10773 #else 10774 throw std::bad_alloc(); 10775 #endif 10776 } 10777 alloc_nodexpath_parser10778 void* alloc_node() 10779 { 10780 void* result = _alloc->allocate_nothrow(sizeof(xpath_ast_node)); 10781 10782 if (!result) throw_error_oom(); 10783 10784 return result; 10785 } 10786 alloc_stringxpath_parser10787 const char_t* alloc_string(const xpath_lexer_string& value) 10788 { 10789 if (value.begin) 10790 { 10791 size_t length = static_cast<size_t>(value.end - value.begin); 10792 10793 char_t* c = static_cast<char_t*>(_alloc->allocate_nothrow((length + 1) * sizeof(char_t))); 10794 if (!c) throw_error_oom(); 10795 assert(c); // workaround for clang static analysis 10796 10797 memcpy(c, value.begin, length * sizeof(char_t)); 10798 c[length] = 0; 10799 10800 return c; 10801 } 10802 else return 0; 10803 } 10804 parse_function_helperxpath_parser10805 xpath_ast_node* parse_function_helper(ast_type_t type0, ast_type_t type1, size_t argc, xpath_ast_node* args[2]) 10806 { 10807 assert(argc <= 1); 10808 10809 if (argc == 1 && args[0]->rettype() != xpath_type_node_set) throw_error("Function has to be applied to node set"); 10810 10811 return new (alloc_node()) xpath_ast_node(argc == 0 ? type0 : type1, xpath_type_string, args[0]); 10812 } 10813 parse_functionxpath_parser10814 xpath_ast_node* parse_function(const xpath_lexer_string& name, size_t argc, xpath_ast_node* args[2]) 10815 { 10816 switch (name.begin[0]) 10817 { 10818 case 'b': 10819 if (name == PUGIXML_TEXT("boolean") && argc == 1) 10820 return new (alloc_node()) xpath_ast_node(ast_func_boolean, xpath_type_boolean, args[0]); 10821 10822 break; 10823 10824 case 'c': 10825 if (name == PUGIXML_TEXT("count") && argc == 1) 10826 { 10827 if (args[0]->rettype() != xpath_type_node_set) throw_error("Function has to be applied to node set"); 10828 return new (alloc_node()) xpath_ast_node(ast_func_count, xpath_type_number, args[0]); 10829 } 10830 else if (name == PUGIXML_TEXT("contains") && argc == 2) 10831 return new (alloc_node()) xpath_ast_node(ast_func_contains, xpath_type_boolean, args[0], args[1]); 10832 else if (name == PUGIXML_TEXT("concat") && argc >= 2) 10833 return new (alloc_node()) xpath_ast_node(ast_func_concat, xpath_type_string, args[0], args[1]); 10834 else if (name == PUGIXML_TEXT("ceiling") && argc == 1) 10835 return new (alloc_node()) xpath_ast_node(ast_func_ceiling, xpath_type_number, args[0]); 10836 10837 break; 10838 10839 case 'f': 10840 if (name == PUGIXML_TEXT("false") && argc == 0) 10841 return new (alloc_node()) xpath_ast_node(ast_func_false, xpath_type_boolean); 10842 else if (name == PUGIXML_TEXT("floor") && argc == 1) 10843 return new (alloc_node()) xpath_ast_node(ast_func_floor, xpath_type_number, args[0]); 10844 10845 break; 10846 10847 case 'i': 10848 if (name == PUGIXML_TEXT("id") && argc == 1) 10849 return new (alloc_node()) xpath_ast_node(ast_func_id, xpath_type_node_set, args[0]); 10850 10851 break; 10852 10853 case 'l': 10854 if (name == PUGIXML_TEXT("last") && argc == 0) 10855 return new (alloc_node()) xpath_ast_node(ast_func_last, xpath_type_number); 10856 else if (name == PUGIXML_TEXT("lang") && argc == 1) 10857 return new (alloc_node()) xpath_ast_node(ast_func_lang, xpath_type_boolean, args[0]); 10858 else if (name == PUGIXML_TEXT("local-name") && argc <= 1) 10859 return parse_function_helper(ast_func_local_name_0, ast_func_local_name_1, argc, args); 10860 10861 break; 10862 10863 case 'n': 10864 if (name == PUGIXML_TEXT("name") && argc <= 1) 10865 return parse_function_helper(ast_func_name_0, ast_func_name_1, argc, args); 10866 else if (name == PUGIXML_TEXT("namespace-uri") && argc <= 1) 10867 return parse_function_helper(ast_func_namespace_uri_0, ast_func_namespace_uri_1, argc, args); 10868 else if (name == PUGIXML_TEXT("normalize-space") && argc <= 1) 10869 return new (alloc_node()) xpath_ast_node(argc == 0 ? ast_func_normalize_space_0 : ast_func_normalize_space_1, xpath_type_string, args[0], args[1]); 10870 else if (name == PUGIXML_TEXT("not") && argc == 1) 10871 return new (alloc_node()) xpath_ast_node(ast_func_not, xpath_type_boolean, args[0]); 10872 else if (name == PUGIXML_TEXT("number") && argc <= 1) 10873 return new (alloc_node()) xpath_ast_node(argc == 0 ? ast_func_number_0 : ast_func_number_1, xpath_type_number, args[0]); 10874 10875 break; 10876 10877 case 'p': 10878 if (name == PUGIXML_TEXT("position") && argc == 0) 10879 return new (alloc_node()) xpath_ast_node(ast_func_position, xpath_type_number); 10880 10881 break; 10882 10883 case 'r': 10884 if (name == PUGIXML_TEXT("round") && argc == 1) 10885 return new (alloc_node()) xpath_ast_node(ast_func_round, xpath_type_number, args[0]); 10886 10887 break; 10888 10889 case 's': 10890 if (name == PUGIXML_TEXT("string") && argc <= 1) 10891 return new (alloc_node()) xpath_ast_node(argc == 0 ? ast_func_string_0 : ast_func_string_1, xpath_type_string, args[0]); 10892 else if (name == PUGIXML_TEXT("string-length") && argc <= 1) 10893 return new (alloc_node()) xpath_ast_node(argc == 0 ? ast_func_string_length_0 : ast_func_string_length_1, xpath_type_number, args[0]); 10894 else if (name == PUGIXML_TEXT("starts-with") && argc == 2) 10895 return new (alloc_node()) xpath_ast_node(ast_func_starts_with, xpath_type_boolean, args[0], args[1]); 10896 else if (name == PUGIXML_TEXT("substring-before") && argc == 2) 10897 return new (alloc_node()) xpath_ast_node(ast_func_substring_before, xpath_type_string, args[0], args[1]); 10898 else if (name == PUGIXML_TEXT("substring-after") && argc == 2) 10899 return new (alloc_node()) xpath_ast_node(ast_func_substring_after, xpath_type_string, args[0], args[1]); 10900 else if (name == PUGIXML_TEXT("substring") && (argc == 2 || argc == 3)) 10901 return new (alloc_node()) xpath_ast_node(argc == 2 ? ast_func_substring_2 : ast_func_substring_3, xpath_type_string, args[0], args[1]); 10902 else if (name == PUGIXML_TEXT("sum") && argc == 1) 10903 { 10904 if (args[0]->rettype() != xpath_type_node_set) throw_error("Function has to be applied to node set"); 10905 return new (alloc_node()) xpath_ast_node(ast_func_sum, xpath_type_number, args[0]); 10906 } 10907 10908 break; 10909 10910 case 't': 10911 if (name == PUGIXML_TEXT("translate") && argc == 3) 10912 return new (alloc_node()) xpath_ast_node(ast_func_translate, xpath_type_string, args[0], args[1]); 10913 else if (name == PUGIXML_TEXT("true") && argc == 0) 10914 return new (alloc_node()) xpath_ast_node(ast_func_true, xpath_type_boolean); 10915 10916 break; 10917 10918 default: 10919 break; 10920 } 10921 10922 throw_error("Unrecognized function or wrong parameter count"); 10923 10924 return 0; 10925 } 10926 parse_axis_namexpath_parser10927 axis_t parse_axis_name(const xpath_lexer_string& name, bool& specified) 10928 { 10929 specified = true; 10930 10931 switch (name.begin[0]) 10932 { 10933 case 'a': 10934 if (name == PUGIXML_TEXT("ancestor")) 10935 return axis_ancestor; 10936 else if (name == PUGIXML_TEXT("ancestor-or-self")) 10937 return axis_ancestor_or_self; 10938 else if (name == PUGIXML_TEXT("attribute")) 10939 return axis_attribute; 10940 10941 break; 10942 10943 case 'c': 10944 if (name == PUGIXML_TEXT("child")) 10945 return axis_child; 10946 10947 break; 10948 10949 case 'd': 10950 if (name == PUGIXML_TEXT("descendant")) 10951 return axis_descendant; 10952 else if (name == PUGIXML_TEXT("descendant-or-self")) 10953 return axis_descendant_or_self; 10954 10955 break; 10956 10957 case 'f': 10958 if (name == PUGIXML_TEXT("following")) 10959 return axis_following; 10960 else if (name == PUGIXML_TEXT("following-sibling")) 10961 return axis_following_sibling; 10962 10963 break; 10964 10965 case 'n': 10966 if (name == PUGIXML_TEXT("namespace")) 10967 return axis_namespace; 10968 10969 break; 10970 10971 case 'p': 10972 if (name == PUGIXML_TEXT("parent")) 10973 return axis_parent; 10974 else if (name == PUGIXML_TEXT("preceding")) 10975 return axis_preceding; 10976 else if (name == PUGIXML_TEXT("preceding-sibling")) 10977 return axis_preceding_sibling; 10978 10979 break; 10980 10981 case 's': 10982 if (name == PUGIXML_TEXT("self")) 10983 return axis_self; 10984 10985 break; 10986 10987 default: 10988 break; 10989 } 10990 10991 specified = false; 10992 return axis_child; 10993 } 10994 parse_node_test_typexpath_parser10995 nodetest_t parse_node_test_type(const xpath_lexer_string& name) 10996 { 10997 switch (name.begin[0]) 10998 { 10999 case 'c': 11000 if (name == PUGIXML_TEXT("comment")) 11001 return nodetest_type_comment; 11002 11003 break; 11004 11005 case 'n': 11006 if (name == PUGIXML_TEXT("node")) 11007 return nodetest_type_node; 11008 11009 break; 11010 11011 case 'p': 11012 if (name == PUGIXML_TEXT("processing-instruction")) 11013 return nodetest_type_pi; 11014 11015 break; 11016 11017 case 't': 11018 if (name == PUGIXML_TEXT("text")) 11019 return nodetest_type_text; 11020 11021 break; 11022 11023 default: 11024 break; 11025 } 11026 11027 return nodetest_none; 11028 } 11029 11030 // PrimaryExpr ::= VariableReference | '(' Expr ')' | Literal | Number | FunctionCall parse_primary_expressionxpath_parser11031 xpath_ast_node* parse_primary_expression() 11032 { 11033 switch (_lexer.current()) 11034 { 11035 case lex_var_ref: 11036 { 11037 xpath_lexer_string name = _lexer.contents(); 11038 11039 if (!_variables) 11040 throw_error("Unknown variable: variable set is not provided"); 11041 11042 xpath_variable* var = 0; 11043 if (!get_variable_scratch(_scratch, _variables, name.begin, name.end, &var)) 11044 throw_error_oom(); 11045 11046 if (!var) 11047 throw_error("Unknown variable: variable set does not contain the given name"); 11048 11049 _lexer.next(); 11050 11051 return new (alloc_node()) xpath_ast_node(ast_variable, var->type(), var); 11052 } 11053 11054 case lex_open_brace: 11055 { 11056 _lexer.next(); 11057 11058 xpath_ast_node* n = parse_expression(); 11059 11060 if (_lexer.current() != lex_close_brace) 11061 throw_error("Unmatched braces"); 11062 11063 _lexer.next(); 11064 11065 return n; 11066 } 11067 11068 case lex_quoted_string: 11069 { 11070 const char_t* value = alloc_string(_lexer.contents()); 11071 11072 xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_string_constant, xpath_type_string, value); 11073 _lexer.next(); 11074 11075 return n; 11076 } 11077 11078 case lex_number: 11079 { 11080 double value = 0; 11081 11082 if (!convert_string_to_number_scratch(_scratch, _lexer.contents().begin, _lexer.contents().end, &value)) 11083 throw_error_oom(); 11084 11085 xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_number_constant, xpath_type_number, value); 11086 _lexer.next(); 11087 11088 return n; 11089 } 11090 11091 case lex_string: 11092 { 11093 xpath_ast_node* args[2] = {0}; 11094 size_t argc = 0; 11095 11096 xpath_lexer_string function = _lexer.contents(); 11097 _lexer.next(); 11098 11099 xpath_ast_node* last_arg = 0; 11100 11101 if (_lexer.current() != lex_open_brace) 11102 throw_error("Unrecognized function call"); 11103 _lexer.next(); 11104 11105 if (_lexer.current() != lex_close_brace) 11106 args[argc++] = parse_expression(); 11107 11108 while (_lexer.current() != lex_close_brace) 11109 { 11110 if (_lexer.current() != lex_comma) 11111 throw_error("No comma between function arguments"); 11112 _lexer.next(); 11113 11114 xpath_ast_node* n = parse_expression(); 11115 11116 if (argc < 2) args[argc] = n; 11117 else last_arg->set_next(n); 11118 11119 argc++; 11120 last_arg = n; 11121 } 11122 11123 _lexer.next(); 11124 11125 return parse_function(function, argc, args); 11126 } 11127 11128 default: 11129 throw_error("Unrecognizable primary expression"); 11130 11131 return 0; 11132 } 11133 } 11134 11135 // FilterExpr ::= PrimaryExpr | FilterExpr Predicate 11136 // Predicate ::= '[' PredicateExpr ']' 11137 // PredicateExpr ::= Expr parse_filter_expressionxpath_parser11138 xpath_ast_node* parse_filter_expression() 11139 { 11140 xpath_ast_node* n = parse_primary_expression(); 11141 11142 while (_lexer.current() == lex_open_square_brace) 11143 { 11144 _lexer.next(); 11145 11146 xpath_ast_node* expr = parse_expression(); 11147 11148 if (n->rettype() != xpath_type_node_set) throw_error("Predicate has to be applied to node set"); 11149 11150 n = new (alloc_node()) xpath_ast_node(ast_filter, n, expr, predicate_default); 11151 11152 if (_lexer.current() != lex_close_square_brace) 11153 throw_error("Unmatched square brace"); 11154 11155 _lexer.next(); 11156 } 11157 11158 return n; 11159 } 11160 11161 // Step ::= AxisSpecifier NodeTest Predicate* | AbbreviatedStep 11162 // AxisSpecifier ::= AxisName '::' | '@'? 11163 // NodeTest ::= NameTest | NodeType '(' ')' | 'processing-instruction' '(' Literal ')' 11164 // NameTest ::= '*' | NCName ':' '*' | QName 11165 // AbbreviatedStep ::= '.' | '..' parse_stepxpath_parser11166 xpath_ast_node* parse_step(xpath_ast_node* set) 11167 { 11168 if (set && set->rettype() != xpath_type_node_set) 11169 throw_error("Step has to be applied to node set"); 11170 11171 bool axis_specified = false; 11172 axis_t axis = axis_child; // implied child axis 11173 11174 if (_lexer.current() == lex_axis_attribute) 11175 { 11176 axis = axis_attribute; 11177 axis_specified = true; 11178 11179 _lexer.next(); 11180 } 11181 else if (_lexer.current() == lex_dot) 11182 { 11183 _lexer.next(); 11184 11185 return new (alloc_node()) xpath_ast_node(ast_step, set, axis_self, nodetest_type_node, 0); 11186 } 11187 else if (_lexer.current() == lex_double_dot) 11188 { 11189 _lexer.next(); 11190 11191 return new (alloc_node()) xpath_ast_node(ast_step, set, axis_parent, nodetest_type_node, 0); 11192 } 11193 11194 nodetest_t nt_type = nodetest_none; 11195 xpath_lexer_string nt_name; 11196 11197 if (_lexer.current() == lex_string) 11198 { 11199 // node name test 11200 nt_name = _lexer.contents(); 11201 _lexer.next(); 11202 11203 // was it an axis name? 11204 if (_lexer.current() == lex_double_colon) 11205 { 11206 // parse axis name 11207 if (axis_specified) throw_error("Two axis specifiers in one step"); 11208 11209 axis = parse_axis_name(nt_name, axis_specified); 11210 11211 if (!axis_specified) throw_error("Unknown axis"); 11212 11213 // read actual node test 11214 _lexer.next(); 11215 11216 if (_lexer.current() == lex_multiply) 11217 { 11218 nt_type = nodetest_all; 11219 nt_name = xpath_lexer_string(); 11220 _lexer.next(); 11221 } 11222 else if (_lexer.current() == lex_string) 11223 { 11224 nt_name = _lexer.contents(); 11225 _lexer.next(); 11226 } 11227 else throw_error("Unrecognized node test"); 11228 } 11229 11230 if (nt_type == nodetest_none) 11231 { 11232 // node type test or processing-instruction 11233 if (_lexer.current() == lex_open_brace) 11234 { 11235 _lexer.next(); 11236 11237 if (_lexer.current() == lex_close_brace) 11238 { 11239 _lexer.next(); 11240 11241 nt_type = parse_node_test_type(nt_name); 11242 11243 if (nt_type == nodetest_none) throw_error("Unrecognized node type"); 11244 11245 nt_name = xpath_lexer_string(); 11246 } 11247 else if (nt_name == PUGIXML_TEXT("processing-instruction")) 11248 { 11249 if (_lexer.current() != lex_quoted_string) 11250 throw_error("Only literals are allowed as arguments to processing-instruction()"); 11251 11252 nt_type = nodetest_pi; 11253 nt_name = _lexer.contents(); 11254 _lexer.next(); 11255 11256 if (_lexer.current() != lex_close_brace) 11257 throw_error("Unmatched brace near processing-instruction()"); 11258 _lexer.next(); 11259 } 11260 else 11261 throw_error("Unmatched brace near node type test"); 11262 11263 } 11264 // QName or NCName:* 11265 else 11266 { 11267 if (nt_name.end - nt_name.begin > 2 && nt_name.end[-2] == ':' && nt_name.end[-1] == '*') // NCName:* 11268 { 11269 nt_name.end--; // erase * 11270 11271 nt_type = nodetest_all_in_namespace; 11272 } 11273 else nt_type = nodetest_name; 11274 } 11275 } 11276 } 11277 else if (_lexer.current() == lex_multiply) 11278 { 11279 nt_type = nodetest_all; 11280 _lexer.next(); 11281 } 11282 else throw_error("Unrecognized node test"); 11283 11284 xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_step, set, axis, nt_type, alloc_string(nt_name)); 11285 11286 xpath_ast_node* last = 0; 11287 11288 while (_lexer.current() == lex_open_square_brace) 11289 { 11290 _lexer.next(); 11291 11292 xpath_ast_node* expr = parse_expression(); 11293 11294 xpath_ast_node* pred = new (alloc_node()) xpath_ast_node(ast_predicate, 0, expr, predicate_default); 11295 11296 if (_lexer.current() != lex_close_square_brace) 11297 throw_error("Unmatched square brace"); 11298 _lexer.next(); 11299 11300 if (last) last->set_next(pred); 11301 else n->set_right(pred); 11302 11303 last = pred; 11304 } 11305 11306 return n; 11307 } 11308 11309 // RelativeLocationPath ::= Step | RelativeLocationPath '/' Step | RelativeLocationPath '//' Step parse_relative_location_pathxpath_parser11310 xpath_ast_node* parse_relative_location_path(xpath_ast_node* set) 11311 { 11312 xpath_ast_node* n = parse_step(set); 11313 11314 while (_lexer.current() == lex_slash || _lexer.current() == lex_double_slash) 11315 { 11316 lexeme_t l = _lexer.current(); 11317 _lexer.next(); 11318 11319 if (l == lex_double_slash) 11320 n = new (alloc_node()) xpath_ast_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0); 11321 11322 n = parse_step(n); 11323 } 11324 11325 return n; 11326 } 11327 11328 // LocationPath ::= RelativeLocationPath | AbsoluteLocationPath 11329 // AbsoluteLocationPath ::= '/' RelativeLocationPath? | '//' RelativeLocationPath parse_location_pathxpath_parser11330 xpath_ast_node* parse_location_path() 11331 { 11332 if (_lexer.current() == lex_slash) 11333 { 11334 _lexer.next(); 11335 11336 xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_step_root, xpath_type_node_set); 11337 11338 // relative location path can start from axis_attribute, dot, double_dot, multiply and string lexemes; any other lexeme means standalone root path 11339 lexeme_t l = _lexer.current(); 11340 11341 if (l == lex_string || l == lex_axis_attribute || l == lex_dot || l == lex_double_dot || l == lex_multiply) 11342 return parse_relative_location_path(n); 11343 else 11344 return n; 11345 } 11346 else if (_lexer.current() == lex_double_slash) 11347 { 11348 _lexer.next(); 11349 11350 xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_step_root, xpath_type_node_set); 11351 n = new (alloc_node()) xpath_ast_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0); 11352 11353 return parse_relative_location_path(n); 11354 } 11355 11356 // else clause moved outside of if because of bogus warning 'control may reach end of non-void function being inlined' in gcc 4.0.1 11357 return parse_relative_location_path(0); 11358 } 11359 11360 // PathExpr ::= LocationPath 11361 // | FilterExpr 11362 // | FilterExpr '/' RelativeLocationPath 11363 // | FilterExpr '//' RelativeLocationPath 11364 // UnionExpr ::= PathExpr | UnionExpr '|' PathExpr 11365 // UnaryExpr ::= UnionExpr | '-' UnaryExpr parse_path_or_unary_expressionxpath_parser11366 xpath_ast_node* parse_path_or_unary_expression() 11367 { 11368 // Clarification. 11369 // PathExpr begins with either LocationPath or FilterExpr. 11370 // FilterExpr begins with PrimaryExpr 11371 // PrimaryExpr begins with '$' in case of it being a variable reference, 11372 // '(' in case of it being an expression, string literal, number constant or 11373 // function call. 11374 11375 if (_lexer.current() == lex_var_ref || _lexer.current() == lex_open_brace || 11376 _lexer.current() == lex_quoted_string || _lexer.current() == lex_number || 11377 _lexer.current() == lex_string) 11378 { 11379 if (_lexer.current() == lex_string) 11380 { 11381 // This is either a function call, or not - if not, we shall proceed with location path 11382 const char_t* state = _lexer.state(); 11383 11384 while (PUGI__IS_CHARTYPE(*state, ct_space)) ++state; 11385 11386 if (*state != '(') return parse_location_path(); 11387 11388 // This looks like a function call; however this still can be a node-test. Check it. 11389 if (parse_node_test_type(_lexer.contents()) != nodetest_none) return parse_location_path(); 11390 } 11391 11392 xpath_ast_node* n = parse_filter_expression(); 11393 11394 if (_lexer.current() == lex_slash || _lexer.current() == lex_double_slash) 11395 { 11396 lexeme_t l = _lexer.current(); 11397 _lexer.next(); 11398 11399 if (l == lex_double_slash) 11400 { 11401 if (n->rettype() != xpath_type_node_set) throw_error("Step has to be applied to node set"); 11402 11403 n = new (alloc_node()) xpath_ast_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0); 11404 } 11405 11406 // select from location path 11407 return parse_relative_location_path(n); 11408 } 11409 11410 return n; 11411 } 11412 else if (_lexer.current() == lex_minus) 11413 { 11414 _lexer.next(); 11415 11416 // precedence 7+ - only parses union expressions 11417 xpath_ast_node* expr = parse_expression_rec(parse_path_or_unary_expression(), 7); 11418 11419 return new (alloc_node()) xpath_ast_node(ast_op_negate, xpath_type_number, expr); 11420 } 11421 else 11422 return parse_location_path(); 11423 } 11424 11425 struct binary_op_t 11426 { 11427 ast_type_t asttype; 11428 xpath_value_type rettype; 11429 int precedence; 11430 binary_op_txpath_parser::binary_op_t11431 binary_op_t(): asttype(ast_unknown), rettype(xpath_type_none), precedence(0) 11432 { 11433 } 11434 binary_op_txpath_parser::binary_op_t11435 binary_op_t(ast_type_t asttype_, xpath_value_type rettype_, int precedence_): asttype(asttype_), rettype(rettype_), precedence(precedence_) 11436 { 11437 } 11438 parsexpath_parser::binary_op_t11439 static binary_op_t parse(xpath_lexer& lexer) 11440 { 11441 switch (lexer.current()) 11442 { 11443 case lex_string: 11444 if (lexer.contents() == PUGIXML_TEXT("or")) 11445 return binary_op_t(ast_op_or, xpath_type_boolean, 1); 11446 else if (lexer.contents() == PUGIXML_TEXT("and")) 11447 return binary_op_t(ast_op_and, xpath_type_boolean, 2); 11448 else if (lexer.contents() == PUGIXML_TEXT("div")) 11449 return binary_op_t(ast_op_divide, xpath_type_number, 6); 11450 else if (lexer.contents() == PUGIXML_TEXT("mod")) 11451 return binary_op_t(ast_op_mod, xpath_type_number, 6); 11452 else 11453 return binary_op_t(); 11454 11455 case lex_equal: 11456 return binary_op_t(ast_op_equal, xpath_type_boolean, 3); 11457 11458 case lex_not_equal: 11459 return binary_op_t(ast_op_not_equal, xpath_type_boolean, 3); 11460 11461 case lex_less: 11462 return binary_op_t(ast_op_less, xpath_type_boolean, 4); 11463 11464 case lex_greater: 11465 return binary_op_t(ast_op_greater, xpath_type_boolean, 4); 11466 11467 case lex_less_or_equal: 11468 return binary_op_t(ast_op_less_or_equal, xpath_type_boolean, 4); 11469 11470 case lex_greater_or_equal: 11471 return binary_op_t(ast_op_greater_or_equal, xpath_type_boolean, 4); 11472 11473 case lex_plus: 11474 return binary_op_t(ast_op_add, xpath_type_number, 5); 11475 11476 case lex_minus: 11477 return binary_op_t(ast_op_subtract, xpath_type_number, 5); 11478 11479 case lex_multiply: 11480 return binary_op_t(ast_op_multiply, xpath_type_number, 6); 11481 11482 case lex_union: 11483 return binary_op_t(ast_op_union, xpath_type_node_set, 7); 11484 11485 default: 11486 return binary_op_t(); 11487 } 11488 } 11489 }; 11490 parse_expression_recxpath_parser11491 xpath_ast_node* parse_expression_rec(xpath_ast_node* lhs, int limit) 11492 { 11493 binary_op_t op = binary_op_t::parse(_lexer); 11494 11495 while (op.asttype != ast_unknown && op.precedence >= limit) 11496 { 11497 _lexer.next(); 11498 11499 xpath_ast_node* rhs = parse_path_or_unary_expression(); 11500 11501 binary_op_t nextop = binary_op_t::parse(_lexer); 11502 11503 while (nextop.asttype != ast_unknown && nextop.precedence > op.precedence) 11504 { 11505 rhs = parse_expression_rec(rhs, nextop.precedence); 11506 11507 nextop = binary_op_t::parse(_lexer); 11508 } 11509 11510 if (op.asttype == ast_op_union && (lhs->rettype() != xpath_type_node_set || rhs->rettype() != xpath_type_node_set)) 11511 throw_error("Union operator has to be applied to node sets"); 11512 11513 lhs = new (alloc_node()) xpath_ast_node(op.asttype, op.rettype, lhs, rhs); 11514 11515 op = binary_op_t::parse(_lexer); 11516 } 11517 11518 return lhs; 11519 } 11520 11521 // Expr ::= OrExpr 11522 // OrExpr ::= AndExpr | OrExpr 'or' AndExpr 11523 // AndExpr ::= EqualityExpr | AndExpr 'and' EqualityExpr 11524 // EqualityExpr ::= RelationalExpr 11525 // | EqualityExpr '=' RelationalExpr 11526 // | EqualityExpr '!=' RelationalExpr 11527 // RelationalExpr ::= AdditiveExpr 11528 // | RelationalExpr '<' AdditiveExpr 11529 // | RelationalExpr '>' AdditiveExpr 11530 // | RelationalExpr '<=' AdditiveExpr 11531 // | RelationalExpr '>=' AdditiveExpr 11532 // AdditiveExpr ::= MultiplicativeExpr 11533 // | AdditiveExpr '+' MultiplicativeExpr 11534 // | AdditiveExpr '-' MultiplicativeExpr 11535 // MultiplicativeExpr ::= UnaryExpr 11536 // | MultiplicativeExpr '*' UnaryExpr 11537 // | MultiplicativeExpr 'div' UnaryExpr 11538 // | MultiplicativeExpr 'mod' UnaryExpr parse_expressionxpath_parser11539 xpath_ast_node* parse_expression() 11540 { 11541 return parse_expression_rec(parse_path_or_unary_expression(), 0); 11542 } 11543 xpath_parserxpath_parser11544 xpath_parser(const char_t* query, xpath_variable_set* variables, xpath_allocator* alloc, xpath_parse_result* result): _alloc(alloc), _lexer(query), _query(query), _variables(variables), _result(result) 11545 { 11546 } 11547 parsexpath_parser11548 xpath_ast_node* parse() 11549 { 11550 xpath_ast_node* result = parse_expression(); 11551 11552 if (_lexer.current() != lex_eof) 11553 { 11554 // there are still unparsed tokens left, error 11555 throw_error("Incorrect query"); 11556 } 11557 11558 return result; 11559 } 11560 parsexpath_parser11561 static xpath_ast_node* parse(const char_t* query, xpath_variable_set* variables, xpath_allocator* alloc, xpath_parse_result* result) 11562 { 11563 xpath_parser parser(query, variables, alloc, result); 11564 11565 #ifdef PUGIXML_NO_EXCEPTIONS 11566 int error = setjmp(parser._error_handler); 11567 11568 return (error == 0) ? parser.parse() : 0; 11569 #else 11570 return parser.parse(); 11571 #endif 11572 } 11573 }; 11574 11575 struct xpath_query_impl 11576 { createxpath_query_impl11577 static xpath_query_impl* create() 11578 { 11579 void* memory = xml_memory::allocate(sizeof(xpath_query_impl)); 11580 if (!memory) return 0; 11581 11582 return new (memory) xpath_query_impl(); 11583 } 11584 destroyxpath_query_impl11585 static void destroy(xpath_query_impl* impl) 11586 { 11587 // free all allocated pages 11588 impl->alloc.release(); 11589 11590 // free allocator memory (with the first page) 11591 xml_memory::deallocate(impl); 11592 } 11593 xpath_query_implxpath_query_impl11594 xpath_query_impl(): root(0), alloc(&block) 11595 { 11596 block.next = 0; 11597 block.capacity = sizeof(block.data); 11598 } 11599 11600 xpath_ast_node* root; 11601 xpath_allocator alloc; 11602 xpath_memory_block block; 11603 }; 11604 evaluate_string_impl(xpath_query_impl * impl,const xpath_node & n,xpath_stack_data & sd)11605 PUGI__FN xpath_string evaluate_string_impl(xpath_query_impl* impl, const xpath_node& n, xpath_stack_data& sd) 11606 { 11607 if (!impl) return xpath_string(); 11608 11609 #ifdef PUGIXML_NO_EXCEPTIONS 11610 if (setjmp(sd.error_handler)) return xpath_string(); 11611 #endif 11612 11613 xpath_context c(n, 1, 1); 11614 11615 return impl->root->eval_string(c, sd.stack); 11616 } 11617 evaluate_node_set_prepare(xpath_query_impl * impl)11618 PUGI__FN impl::xpath_ast_node* evaluate_node_set_prepare(xpath_query_impl* impl) 11619 { 11620 if (!impl) return 0; 11621 11622 if (impl->root->rettype() != xpath_type_node_set) 11623 { 11624 #ifdef PUGIXML_NO_EXCEPTIONS 11625 return 0; 11626 #else 11627 xpath_parse_result res; 11628 res.error = "Expression does not evaluate to node set"; 11629 11630 throw xpath_exception(res); 11631 #endif 11632 } 11633 11634 return impl->root; 11635 } 11636 PUGI__NS_END 11637 11638 namespace pugi 11639 { 11640 #ifndef PUGIXML_NO_EXCEPTIONS xpath_exception(const xpath_parse_result & result_)11641 PUGI__FN xpath_exception::xpath_exception(const xpath_parse_result& result_): _result(result_) 11642 { 11643 assert(_result.error); 11644 } 11645 what() const11646 PUGI__FN const char* xpath_exception::what() const throw() 11647 { 11648 return _result.error; 11649 } 11650 result() const11651 PUGI__FN const xpath_parse_result& xpath_exception::result() const 11652 { 11653 return _result; 11654 } 11655 #endif 11656 xpath_node()11657 PUGI__FN xpath_node::xpath_node() 11658 { 11659 } 11660 xpath_node(const xml_node & node_)11661 PUGI__FN xpath_node::xpath_node(const xml_node& node_): _node(node_) 11662 { 11663 } 11664 xpath_node(const xml_attribute & attribute_,const xml_node & parent_)11665 PUGI__FN xpath_node::xpath_node(const xml_attribute& attribute_, const xml_node& parent_): _node(attribute_ ? parent_ : xml_node()), _attribute(attribute_) 11666 { 11667 } 11668 node() const11669 PUGI__FN xml_node xpath_node::node() const 11670 { 11671 return _attribute ? xml_node() : _node; 11672 } 11673 attribute() const11674 PUGI__FN xml_attribute xpath_node::attribute() const 11675 { 11676 return _attribute; 11677 } 11678 parent() const11679 PUGI__FN xml_node xpath_node::parent() const 11680 { 11681 return _attribute ? _node : _node.parent(); 11682 } 11683 unspecified_bool_xpath_node(xpath_node ***)11684 PUGI__FN static void unspecified_bool_xpath_node(xpath_node***) 11685 { 11686 } 11687 operator xpath_node::unspecified_bool_type() const11688 PUGI__FN xpath_node::operator xpath_node::unspecified_bool_type() const 11689 { 11690 return (_node || _attribute) ? unspecified_bool_xpath_node : 0; 11691 } 11692 operator !() const11693 PUGI__FN bool xpath_node::operator!() const 11694 { 11695 return !(_node || _attribute); 11696 } 11697 operator ==(const xpath_node & n) const11698 PUGI__FN bool xpath_node::operator==(const xpath_node& n) const 11699 { 11700 return _node == n._node && _attribute == n._attribute; 11701 } 11702 operator !=(const xpath_node & n) const11703 PUGI__FN bool xpath_node::operator!=(const xpath_node& n) const 11704 { 11705 return _node != n._node || _attribute != n._attribute; 11706 } 11707 11708 #ifdef __BORLANDC__ operator &&(const xpath_node & lhs,bool rhs)11709 PUGI__FN bool operator&&(const xpath_node& lhs, bool rhs) 11710 { 11711 return (bool)lhs && rhs; 11712 } 11713 operator ||(const xpath_node & lhs,bool rhs)11714 PUGI__FN bool operator||(const xpath_node& lhs, bool rhs) 11715 { 11716 return (bool)lhs || rhs; 11717 } 11718 #endif 11719 _assign(const_iterator begin_,const_iterator end_,type_t type_)11720 PUGI__FN void xpath_node_set::_assign(const_iterator begin_, const_iterator end_, type_t type_) 11721 { 11722 assert(begin_ <= end_); 11723 11724 size_t size_ = static_cast<size_t>(end_ - begin_); 11725 11726 if (size_ <= 1) 11727 { 11728 // deallocate old buffer 11729 if (_begin != &_storage) impl::xml_memory::deallocate(_begin); 11730 11731 // use internal buffer 11732 if (begin_ != end_) _storage = *begin_; 11733 11734 _begin = &_storage; 11735 _end = &_storage + size_; 11736 _type = type_; 11737 } 11738 else 11739 { 11740 // make heap copy 11741 xpath_node* storage = static_cast<xpath_node*>(impl::xml_memory::allocate(size_ * sizeof(xpath_node))); 11742 11743 if (!storage) 11744 { 11745 #ifdef PUGIXML_NO_EXCEPTIONS 11746 return; 11747 #else 11748 throw std::bad_alloc(); 11749 #endif 11750 } 11751 11752 memcpy(storage, begin_, size_ * sizeof(xpath_node)); 11753 11754 // deallocate old buffer 11755 if (_begin != &_storage) impl::xml_memory::deallocate(_begin); 11756 11757 // finalize 11758 _begin = storage; 11759 _end = storage + size_; 11760 _type = type_; 11761 } 11762 } 11763 11764 #if __cplusplus >= 201103 _move(xpath_node_set & rhs)11765 PUGI__FN void xpath_node_set::_move(xpath_node_set& rhs) 11766 { 11767 _type = rhs._type; 11768 _storage = rhs._storage; 11769 _begin = (rhs._begin == &rhs._storage) ? &_storage : rhs._begin; 11770 _end = _begin + (rhs._end - rhs._begin); 11771 11772 rhs._type = type_unsorted; 11773 rhs._begin = &rhs._storage; 11774 rhs._end = rhs._begin; 11775 } 11776 #endif 11777 xpath_node_set()11778 PUGI__FN xpath_node_set::xpath_node_set(): _type(type_unsorted), _begin(&_storage), _end(&_storage) 11779 { 11780 } 11781 xpath_node_set(const_iterator begin_,const_iterator end_,type_t type_)11782 PUGI__FN xpath_node_set::xpath_node_set(const_iterator begin_, const_iterator end_, type_t type_): _type(type_unsorted), _begin(&_storage), _end(&_storage) 11783 { 11784 _assign(begin_, end_, type_); 11785 } 11786 ~xpath_node_set()11787 PUGI__FN xpath_node_set::~xpath_node_set() 11788 { 11789 if (_begin != &_storage) 11790 impl::xml_memory::deallocate(_begin); 11791 } 11792 xpath_node_set(const xpath_node_set & ns)11793 PUGI__FN xpath_node_set::xpath_node_set(const xpath_node_set& ns): _type(type_unsorted), _begin(&_storage), _end(&_storage) 11794 { 11795 _assign(ns._begin, ns._end, ns._type); 11796 } 11797 operator =(const xpath_node_set & ns)11798 PUGI__FN xpath_node_set& xpath_node_set::operator=(const xpath_node_set& ns) 11799 { 11800 if (this == &ns) return *this; 11801 11802 _assign(ns._begin, ns._end, ns._type); 11803 11804 return *this; 11805 } 11806 11807 #if __cplusplus >= 201103 xpath_node_set(xpath_node_set && rhs)11808 PUGI__FN xpath_node_set::xpath_node_set(xpath_node_set&& rhs): _type(type_unsorted), _begin(&_storage), _end(&_storage) 11809 { 11810 _move(rhs); 11811 } 11812 operator =(xpath_node_set && rhs)11813 PUGI__FN xpath_node_set& xpath_node_set::operator=(xpath_node_set&& rhs) 11814 { 11815 if (this == &rhs) return *this; 11816 11817 if (_begin != &_storage) 11818 impl::xml_memory::deallocate(_begin); 11819 11820 _move(rhs); 11821 11822 return *this; 11823 } 11824 #endif 11825 type() const11826 PUGI__FN xpath_node_set::type_t xpath_node_set::type() const 11827 { 11828 return _type; 11829 } 11830 size() const11831 PUGI__FN size_t xpath_node_set::size() const 11832 { 11833 return _end - _begin; 11834 } 11835 empty() const11836 PUGI__FN bool xpath_node_set::empty() const 11837 { 11838 return _begin == _end; 11839 } 11840 operator [](size_t index) const11841 PUGI__FN const xpath_node& xpath_node_set::operator[](size_t index) const 11842 { 11843 assert(index < size()); 11844 return _begin[index]; 11845 } 11846 begin() const11847 PUGI__FN xpath_node_set::const_iterator xpath_node_set::begin() const 11848 { 11849 return _begin; 11850 } 11851 end() const11852 PUGI__FN xpath_node_set::const_iterator xpath_node_set::end() const 11853 { 11854 return _end; 11855 } 11856 sort(bool reverse)11857 PUGI__FN void xpath_node_set::sort(bool reverse) 11858 { 11859 _type = impl::xpath_sort(_begin, _end, _type, reverse); 11860 } 11861 first() const11862 PUGI__FN xpath_node xpath_node_set::first() const 11863 { 11864 return impl::xpath_first(_begin, _end, _type); 11865 } 11866 xpath_parse_result()11867 PUGI__FN xpath_parse_result::xpath_parse_result(): error("Internal error"), offset(0) 11868 { 11869 } 11870 operator bool() const11871 PUGI__FN xpath_parse_result::operator bool() const 11872 { 11873 return error == 0; 11874 } 11875 description() const11876 PUGI__FN const char* xpath_parse_result::description() const 11877 { 11878 return error ? error : "No error"; 11879 } 11880 xpath_variable(xpath_value_type type_)11881 PUGI__FN xpath_variable::xpath_variable(xpath_value_type type_): _type(type_), _next(0) 11882 { 11883 } 11884 name() const11885 PUGI__FN const char_t* xpath_variable::name() const 11886 { 11887 switch (_type) 11888 { 11889 case xpath_type_node_set: 11890 return static_cast<const impl::xpath_variable_node_set*>(this)->name; 11891 11892 case xpath_type_number: 11893 return static_cast<const impl::xpath_variable_number*>(this)->name; 11894 11895 case xpath_type_string: 11896 return static_cast<const impl::xpath_variable_string*>(this)->name; 11897 11898 case xpath_type_boolean: 11899 return static_cast<const impl::xpath_variable_boolean*>(this)->name; 11900 11901 default: 11902 assert(!"Invalid variable type"); 11903 return 0; 11904 } 11905 } 11906 type() const11907 PUGI__FN xpath_value_type xpath_variable::type() const 11908 { 11909 return _type; 11910 } 11911 get_boolean() const11912 PUGI__FN bool xpath_variable::get_boolean() const 11913 { 11914 return (_type == xpath_type_boolean) ? static_cast<const impl::xpath_variable_boolean*>(this)->value : false; 11915 } 11916 get_number() const11917 PUGI__FN double xpath_variable::get_number() const 11918 { 11919 return (_type == xpath_type_number) ? static_cast<const impl::xpath_variable_number*>(this)->value : impl::gen_nan(); 11920 } 11921 get_string() const11922 PUGI__FN const char_t* xpath_variable::get_string() const 11923 { 11924 const char_t* value = (_type == xpath_type_string) ? static_cast<const impl::xpath_variable_string*>(this)->value : 0; 11925 return value ? value : PUGIXML_TEXT(""); 11926 } 11927 get_node_set() const11928 PUGI__FN const xpath_node_set& xpath_variable::get_node_set() const 11929 { 11930 return (_type == xpath_type_node_set) ? static_cast<const impl::xpath_variable_node_set*>(this)->value : impl::dummy_node_set; 11931 } 11932 set(bool value)11933 PUGI__FN bool xpath_variable::set(bool value) 11934 { 11935 if (_type != xpath_type_boolean) return false; 11936 11937 static_cast<impl::xpath_variable_boolean*>(this)->value = value; 11938 return true; 11939 } 11940 set(double value)11941 PUGI__FN bool xpath_variable::set(double value) 11942 { 11943 if (_type != xpath_type_number) return false; 11944 11945 static_cast<impl::xpath_variable_number*>(this)->value = value; 11946 return true; 11947 } 11948 set(const char_t * value)11949 PUGI__FN bool xpath_variable::set(const char_t* value) 11950 { 11951 if (_type != xpath_type_string) return false; 11952 11953 impl::xpath_variable_string* var = static_cast<impl::xpath_variable_string*>(this); 11954 11955 // duplicate string 11956 size_t size = (impl::strlength(value) + 1) * sizeof(char_t); 11957 11958 char_t* copy = static_cast<char_t*>(impl::xml_memory::allocate(size)); 11959 if (!copy) return false; 11960 11961 memcpy(copy, value, size); 11962 11963 // replace old string 11964 if (var->value) impl::xml_memory::deallocate(var->value); 11965 var->value = copy; 11966 11967 return true; 11968 } 11969 set(const xpath_node_set & value)11970 PUGI__FN bool xpath_variable::set(const xpath_node_set& value) 11971 { 11972 if (_type != xpath_type_node_set) return false; 11973 11974 static_cast<impl::xpath_variable_node_set*>(this)->value = value; 11975 return true; 11976 } 11977 xpath_variable_set()11978 PUGI__FN xpath_variable_set::xpath_variable_set() 11979 { 11980 for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i) 11981 _data[i] = 0; 11982 } 11983 ~xpath_variable_set()11984 PUGI__FN xpath_variable_set::~xpath_variable_set() 11985 { 11986 for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i) 11987 _destroy(_data[i]); 11988 } 11989 xpath_variable_set(const xpath_variable_set & rhs)11990 PUGI__FN xpath_variable_set::xpath_variable_set(const xpath_variable_set& rhs) 11991 { 11992 for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i) 11993 _data[i] = 0; 11994 11995 _assign(rhs); 11996 } 11997 operator =(const xpath_variable_set & rhs)11998 PUGI__FN xpath_variable_set& xpath_variable_set::operator=(const xpath_variable_set& rhs) 11999 { 12000 if (this == &rhs) return *this; 12001 12002 _assign(rhs); 12003 12004 return *this; 12005 } 12006 12007 #if __cplusplus >= 201103 xpath_variable_set(xpath_variable_set && rhs)12008 PUGI__FN xpath_variable_set::xpath_variable_set(xpath_variable_set&& rhs) 12009 { 12010 for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i) 12011 { 12012 _data[i] = rhs._data[i]; 12013 rhs._data[i] = 0; 12014 } 12015 } 12016 operator =(xpath_variable_set && rhs)12017 PUGI__FN xpath_variable_set& xpath_variable_set::operator=(xpath_variable_set&& rhs) 12018 { 12019 for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i) 12020 { 12021 _destroy(_data[i]); 12022 12023 _data[i] = rhs._data[i]; 12024 rhs._data[i] = 0; 12025 } 12026 12027 return *this; 12028 } 12029 #endif 12030 _assign(const xpath_variable_set & rhs)12031 PUGI__FN void xpath_variable_set::_assign(const xpath_variable_set& rhs) 12032 { 12033 xpath_variable_set temp; 12034 12035 for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i) 12036 if (rhs._data[i] && !_clone(rhs._data[i], &temp._data[i])) 12037 return; 12038 12039 _swap(temp); 12040 } 12041 _swap(xpath_variable_set & rhs)12042 PUGI__FN void xpath_variable_set::_swap(xpath_variable_set& rhs) 12043 { 12044 for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i) 12045 { 12046 xpath_variable* chain = _data[i]; 12047 12048 _data[i] = rhs._data[i]; 12049 rhs._data[i] = chain; 12050 } 12051 } 12052 _find(const char_t * name) const12053 PUGI__FN xpath_variable* xpath_variable_set::_find(const char_t* name) const 12054 { 12055 const size_t hash_size = sizeof(_data) / sizeof(_data[0]); 12056 size_t hash = impl::hash_string(name) % hash_size; 12057 12058 // look for existing variable 12059 for (xpath_variable* var = _data[hash]; var; var = var->_next) 12060 if (impl::strequal(var->name(), name)) 12061 return var; 12062 12063 return 0; 12064 } 12065 _clone(xpath_variable * var,xpath_variable ** out_result)12066 PUGI__FN bool xpath_variable_set::_clone(xpath_variable* var, xpath_variable** out_result) 12067 { 12068 xpath_variable* last = 0; 12069 12070 while (var) 12071 { 12072 // allocate storage for new variable 12073 xpath_variable* nvar = impl::new_xpath_variable(var->_type, var->name()); 12074 if (!nvar) return false; 12075 12076 // link the variable to the result immediately to handle failures gracefully 12077 if (last) 12078 last->_next = nvar; 12079 else 12080 *out_result = nvar; 12081 12082 last = nvar; 12083 12084 // copy the value; this can fail due to out-of-memory conditions 12085 if (!impl::copy_xpath_variable(nvar, var)) return false; 12086 12087 var = var->_next; 12088 } 12089 12090 return true; 12091 } 12092 _destroy(xpath_variable * var)12093 PUGI__FN void xpath_variable_set::_destroy(xpath_variable* var) 12094 { 12095 while (var) 12096 { 12097 xpath_variable* next = var->_next; 12098 12099 impl::delete_xpath_variable(var->_type, var); 12100 12101 var = next; 12102 } 12103 } 12104 add(const char_t * name,xpath_value_type type)12105 PUGI__FN xpath_variable* xpath_variable_set::add(const char_t* name, xpath_value_type type) 12106 { 12107 const size_t hash_size = sizeof(_data) / sizeof(_data[0]); 12108 size_t hash = impl::hash_string(name) % hash_size; 12109 12110 // look for existing variable 12111 for (xpath_variable* var = _data[hash]; var; var = var->_next) 12112 if (impl::strequal(var->name(), name)) 12113 return var->type() == type ? var : 0; 12114 12115 // add new variable 12116 xpath_variable* result = impl::new_xpath_variable(type, name); 12117 12118 if (result) 12119 { 12120 result->_next = _data[hash]; 12121 12122 _data[hash] = result; 12123 } 12124 12125 return result; 12126 } 12127 set(const char_t * name,bool value)12128 PUGI__FN bool xpath_variable_set::set(const char_t* name, bool value) 12129 { 12130 xpath_variable* var = add(name, xpath_type_boolean); 12131 return var ? var->set(value) : false; 12132 } 12133 set(const char_t * name,double value)12134 PUGI__FN bool xpath_variable_set::set(const char_t* name, double value) 12135 { 12136 xpath_variable* var = add(name, xpath_type_number); 12137 return var ? var->set(value) : false; 12138 } 12139 set(const char_t * name,const char_t * value)12140 PUGI__FN bool xpath_variable_set::set(const char_t* name, const char_t* value) 12141 { 12142 xpath_variable* var = add(name, xpath_type_string); 12143 return var ? var->set(value) : false; 12144 } 12145 set(const char_t * name,const xpath_node_set & value)12146 PUGI__FN bool xpath_variable_set::set(const char_t* name, const xpath_node_set& value) 12147 { 12148 xpath_variable* var = add(name, xpath_type_node_set); 12149 return var ? var->set(value) : false; 12150 } 12151 get(const char_t * name)12152 PUGI__FN xpath_variable* xpath_variable_set::get(const char_t* name) 12153 { 12154 return _find(name); 12155 } 12156 get(const char_t * name) const12157 PUGI__FN const xpath_variable* xpath_variable_set::get(const char_t* name) const 12158 { 12159 return _find(name); 12160 } 12161 xpath_query(const char_t * query,xpath_variable_set * variables)12162 PUGI__FN xpath_query::xpath_query(const char_t* query, xpath_variable_set* variables): _impl(0) 12163 { 12164 impl::xpath_query_impl* qimpl = impl::xpath_query_impl::create(); 12165 12166 if (!qimpl) 12167 { 12168 #ifdef PUGIXML_NO_EXCEPTIONS 12169 _result.error = "Out of memory"; 12170 #else 12171 throw std::bad_alloc(); 12172 #endif 12173 } 12174 else 12175 { 12176 using impl::auto_deleter; // MSVC7 workaround 12177 auto_deleter<impl::xpath_query_impl> impl(qimpl, impl::xpath_query_impl::destroy); 12178 12179 qimpl->root = impl::xpath_parser::parse(query, variables, &qimpl->alloc, &_result); 12180 12181 if (qimpl->root) 12182 { 12183 qimpl->root->optimize(&qimpl->alloc); 12184 12185 _impl = impl.release(); 12186 _result.error = 0; 12187 } 12188 } 12189 } 12190 xpath_query()12191 PUGI__FN xpath_query::xpath_query(): _impl(0) 12192 { 12193 } 12194 ~xpath_query()12195 PUGI__FN xpath_query::~xpath_query() 12196 { 12197 if (_impl) 12198 impl::xpath_query_impl::destroy(static_cast<impl::xpath_query_impl*>(_impl)); 12199 } 12200 12201 #if __cplusplus >= 201103 xpath_query(xpath_query && rhs)12202 PUGI__FN xpath_query::xpath_query(xpath_query&& rhs) 12203 { 12204 _impl = rhs._impl; 12205 _result = rhs._result; 12206 rhs._impl = 0; 12207 rhs._result = xpath_parse_result(); 12208 } 12209 operator =(xpath_query && rhs)12210 PUGI__FN xpath_query& xpath_query::operator=(xpath_query&& rhs) 12211 { 12212 if (this == &rhs) return *this; 12213 12214 if (_impl) 12215 impl::xpath_query_impl::destroy(static_cast<impl::xpath_query_impl*>(_impl)); 12216 12217 _impl = rhs._impl; 12218 _result = rhs._result; 12219 rhs._impl = 0; 12220 rhs._result = xpath_parse_result(); 12221 12222 return *this; 12223 } 12224 #endif 12225 return_type() const12226 PUGI__FN xpath_value_type xpath_query::return_type() const 12227 { 12228 if (!_impl) return xpath_type_none; 12229 12230 return static_cast<impl::xpath_query_impl*>(_impl)->root->rettype(); 12231 } 12232 evaluate_boolean(const xpath_node & n) const12233 PUGI__FN bool xpath_query::evaluate_boolean(const xpath_node& n) const 12234 { 12235 if (!_impl) return false; 12236 12237 impl::xpath_context c(n, 1, 1); 12238 impl::xpath_stack_data sd; 12239 12240 #ifdef PUGIXML_NO_EXCEPTIONS 12241 if (setjmp(sd.error_handler)) return false; 12242 #endif 12243 12244 return static_cast<impl::xpath_query_impl*>(_impl)->root->eval_boolean(c, sd.stack); 12245 } 12246 evaluate_number(const xpath_node & n) const12247 PUGI__FN double xpath_query::evaluate_number(const xpath_node& n) const 12248 { 12249 if (!_impl) return impl::gen_nan(); 12250 12251 impl::xpath_context c(n, 1, 1); 12252 impl::xpath_stack_data sd; 12253 12254 #ifdef PUGIXML_NO_EXCEPTIONS 12255 if (setjmp(sd.error_handler)) return impl::gen_nan(); 12256 #endif 12257 12258 return static_cast<impl::xpath_query_impl*>(_impl)->root->eval_number(c, sd.stack); 12259 } 12260 12261 #ifndef PUGIXML_NO_STL evaluate_string(const xpath_node & n) const12262 PUGI__FN string_t xpath_query::evaluate_string(const xpath_node& n) const 12263 { 12264 impl::xpath_stack_data sd; 12265 12266 impl::xpath_string r = impl::evaluate_string_impl(static_cast<impl::xpath_query_impl*>(_impl), n, sd); 12267 12268 return string_t(r.c_str(), r.length()); 12269 } 12270 #endif 12271 evaluate_string(char_t * buffer,size_t capacity,const xpath_node & n) const12272 PUGI__FN size_t xpath_query::evaluate_string(char_t* buffer, size_t capacity, const xpath_node& n) const 12273 { 12274 impl::xpath_stack_data sd; 12275 12276 impl::xpath_string r = impl::evaluate_string_impl(static_cast<impl::xpath_query_impl*>(_impl), n, sd); 12277 12278 size_t full_size = r.length() + 1; 12279 12280 if (capacity > 0) 12281 { 12282 size_t size = (full_size < capacity) ? full_size : capacity; 12283 assert(size > 0); 12284 12285 memcpy(buffer, r.c_str(), (size - 1) * sizeof(char_t)); 12286 buffer[size - 1] = 0; 12287 } 12288 12289 return full_size; 12290 } 12291 evaluate_node_set(const xpath_node & n) const12292 PUGI__FN xpath_node_set xpath_query::evaluate_node_set(const xpath_node& n) const 12293 { 12294 impl::xpath_ast_node* root = impl::evaluate_node_set_prepare(static_cast<impl::xpath_query_impl*>(_impl)); 12295 if (!root) return xpath_node_set(); 12296 12297 impl::xpath_context c(n, 1, 1); 12298 impl::xpath_stack_data sd; 12299 12300 #ifdef PUGIXML_NO_EXCEPTIONS 12301 if (setjmp(sd.error_handler)) return xpath_node_set(); 12302 #endif 12303 12304 impl::xpath_node_set_raw r = root->eval_node_set(c, sd.stack, impl::nodeset_eval_all); 12305 12306 return xpath_node_set(r.begin(), r.end(), r.type()); 12307 } 12308 evaluate_node(const xpath_node & n) const12309 PUGI__FN xpath_node xpath_query::evaluate_node(const xpath_node& n) const 12310 { 12311 impl::xpath_ast_node* root = impl::evaluate_node_set_prepare(static_cast<impl::xpath_query_impl*>(_impl)); 12312 if (!root) return xpath_node(); 12313 12314 impl::xpath_context c(n, 1, 1); 12315 impl::xpath_stack_data sd; 12316 12317 #ifdef PUGIXML_NO_EXCEPTIONS 12318 if (setjmp(sd.error_handler)) return xpath_node(); 12319 #endif 12320 12321 impl::xpath_node_set_raw r = root->eval_node_set(c, sd.stack, impl::nodeset_eval_first); 12322 12323 return r.first(); 12324 } 12325 result() const12326 PUGI__FN const xpath_parse_result& xpath_query::result() const 12327 { 12328 return _result; 12329 } 12330 unspecified_bool_xpath_query(xpath_query ***)12331 PUGI__FN static void unspecified_bool_xpath_query(xpath_query***) 12332 { 12333 } 12334 operator xpath_query::unspecified_bool_type() const12335 PUGI__FN xpath_query::operator xpath_query::unspecified_bool_type() const 12336 { 12337 return _impl ? unspecified_bool_xpath_query : 0; 12338 } 12339 operator !() const12340 PUGI__FN bool xpath_query::operator!() const 12341 { 12342 return !_impl; 12343 } 12344 select_node(const char_t * query,xpath_variable_set * variables) const12345 PUGI__FN xpath_node xml_node::select_node(const char_t* query, xpath_variable_set* variables) const 12346 { 12347 xpath_query q(query, variables); 12348 return select_node(q); 12349 } 12350 select_node(const xpath_query & query) const12351 PUGI__FN xpath_node xml_node::select_node(const xpath_query& query) const 12352 { 12353 return query.evaluate_node(*this); 12354 } 12355 select_nodes(const char_t * query,xpath_variable_set * variables) const12356 PUGI__FN xpath_node_set xml_node::select_nodes(const char_t* query, xpath_variable_set* variables) const 12357 { 12358 xpath_query q(query, variables); 12359 return select_nodes(q); 12360 } 12361 select_nodes(const xpath_query & query) const12362 PUGI__FN xpath_node_set xml_node::select_nodes(const xpath_query& query) const 12363 { 12364 return query.evaluate_node_set(*this); 12365 } 12366 select_single_node(const char_t * query,xpath_variable_set * variables) const12367 PUGI__FN xpath_node xml_node::select_single_node(const char_t* query, xpath_variable_set* variables) const 12368 { 12369 xpath_query q(query, variables); 12370 return select_single_node(q); 12371 } 12372 select_single_node(const xpath_query & query) const12373 PUGI__FN xpath_node xml_node::select_single_node(const xpath_query& query) const 12374 { 12375 return query.evaluate_node(*this); 12376 } 12377 } 12378 12379 #endif 12380 12381 #ifdef __BORLANDC__ 12382 # pragma option pop 12383 #endif 12384 12385 // Intel C++ does not properly keep warning state for function templates, 12386 // so popping warning state at the end of translation unit leads to warnings in the middle. 12387 #if defined(_MSC_VER) && !defined(__INTEL_COMPILER) 12388 # pragma warning(pop) 12389 #endif 12390 12391 // Undefine all local macros (makes sure we're not leaking macros in header-only mode) 12392 #undef PUGI__NO_INLINE 12393 #undef PUGI__UNLIKELY 12394 #undef PUGI__STATIC_ASSERT 12395 #undef PUGI__DMC_VOLATILE 12396 #undef PUGI__MSVC_CRT_VERSION 12397 #undef PUGI__NS_BEGIN 12398 #undef PUGI__NS_END 12399 #undef PUGI__FN 12400 #undef PUGI__FN_NO_INLINE 12401 #undef PUGI__GETPAGE_IMPL 12402 #undef PUGI__GETPAGE 12403 #undef PUGI__NODETYPE 12404 #undef PUGI__IS_CHARTYPE_IMPL 12405 #undef PUGI__IS_CHARTYPE 12406 #undef PUGI__IS_CHARTYPEX 12407 #undef PUGI__ENDSWITH 12408 #undef PUGI__SKIPWS 12409 #undef PUGI__OPTSET 12410 #undef PUGI__PUSHNODE 12411 #undef PUGI__POPNODE 12412 #undef PUGI__SCANFOR 12413 #undef PUGI__SCANWHILE 12414 #undef PUGI__SCANWHILE_UNROLL 12415 #undef PUGI__ENDSEG 12416 #undef PUGI__THROW_ERROR 12417 #undef PUGI__CHECK_ERROR 12418 12419 #endif 12420 12421 /** 12422 * Copyright (c) 2006-2015 Arseny Kapoulkine 12423 * 12424 * Permission is hereby granted, free of charge, to any person 12425 * obtaining a copy of this software and associated documentation 12426 * files (the "Software"), to deal in the Software without 12427 * restriction, including without limitation the rights to use, 12428 * copy, modify, merge, publish, distribute, sublicense, and/or sell 12429 * copies of the Software, and to permit persons to whom the 12430 * Software is furnished to do so, subject to the following 12431 * conditions: 12432 * 12433 * The above copyright notice and this permission notice shall be 12434 * included in all copies or substantial portions of the Software. 12435 * 12436 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 12437 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 12438 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 12439 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 12440 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 12441 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 12442 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 12443 * OTHER DEALINGS IN THE SOFTWARE. 12444 */ 12445