1 /** 2 * pugixml parser - version 1.9 3 * -------------------------------------------------------- 4 * Copyright (C) 2006-2018, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com) 5 * Report bugs and download new versions at http://pugixml.org/ 6 * 7 * This library is distributed under the MIT License. See notice at the end 8 * of this file. 9 * 10 * This work is based on the pugxml parser, which is: 11 * Copyright (C) 2003, by Kristen Wegner (kristen@tima.net) 12 */ 13 14 #ifndef SOURCE_PUGIXML_CPP 15 #define SOURCE_PUGIXML_CPP 16 17 #include "pugixml.hpp" 18 19 #include <stdlib.h> 20 #include <stdio.h> 21 #include <string.h> 22 #include <assert.h> 23 #include <limits.h> 24 25 #ifdef PUGIXML_WCHAR_MODE 26 # include <wchar.h> 27 #endif 28 29 #ifndef PUGIXML_NO_XPATH 30 # include <math.h> 31 # include <float.h> 32 #endif 33 34 #ifndef PUGIXML_NO_STL 35 # include <istream> 36 # include <ostream> 37 # include <string> 38 #endif 39 40 // For placement new 41 #include <new> 42 43 #ifdef _MSC_VER 44 # pragma warning(push) 45 # pragma warning(disable: 4127) // conditional expression is constant 46 # pragma warning(disable: 4324) // structure was padded due to __declspec(align()) 47 # pragma warning(disable: 4702) // unreachable code 48 # pragma warning(disable: 4996) // this function or variable may be unsafe 49 #endif 50 51 #if defined(_MSC_VER) && defined(__c2__) 52 # pragma clang diagnostic push 53 # pragma clang diagnostic ignored "-Wdeprecated" // this function or variable may be unsafe 54 #endif 55 56 #ifdef __INTEL_COMPILER 57 # pragma warning(disable: 177) // function was declared but never referenced 58 # pragma warning(disable: 279) // controlling expression is constant 59 # pragma warning(disable: 1478 1786) // function was declared "deprecated" 60 # pragma warning(disable: 1684) // conversion from pointer to same-sized integral type 61 #endif 62 63 #if defined(__BORLANDC__) && defined(PUGIXML_HEADER_ONLY) 64 # pragma warn -8080 // symbol is declared but never used; disabling this inside push/pop bracket does not make the warning go away 65 #endif 66 67 #ifdef __BORLANDC__ 68 # pragma option push 69 # pragma warn -8008 // condition is always false 70 # pragma warn -8066 // unreachable code 71 #endif 72 73 #ifdef __SNC__ 74 // Using diag_push/diag_pop does not disable the warnings inside templates due to a compiler bug 75 # pragma diag_suppress=178 // function was declared but never referenced 76 # pragma diag_suppress=237 // controlling expression is constant 77 #endif 78 79 #ifdef __TI_COMPILER_VERSION__ 80 # pragma diag_suppress 179 // function was declared but never referenced 81 #endif 82 83 // Inlining controls 84 #if defined(_MSC_VER) && _MSC_VER >= 1300 85 # define PUGI__NO_INLINE __declspec(noinline) 86 #elif defined(__GNUC__) 87 # define PUGI__NO_INLINE __attribute__((noinline)) 88 #else 89 # define PUGI__NO_INLINE 90 #endif 91 92 // Branch weight controls 93 #if defined(__GNUC__) && !defined(__c2__) 94 # define PUGI__UNLIKELY(cond) __builtin_expect(cond, 0) 95 #else 96 # define PUGI__UNLIKELY(cond) (cond) 97 #endif 98 99 // Simple static assertion 100 #define PUGI__STATIC_ASSERT(cond) { static const char condition_failed[(cond) ? 1 : -1] = {0}; (void)condition_failed[0]; } 101 102 // Digital Mars C++ bug workaround for passing char loaded from memory via stack 103 #ifdef __DMC__ 104 # define PUGI__DMC_VOLATILE volatile 105 #else 106 # define PUGI__DMC_VOLATILE 107 #endif 108 109 // Integer sanitizer workaround; we only apply this for clang since gcc8 has no_sanitize but not unsigned-integer-overflow and produces "attribute directive ignored" warnings 110 #if defined(__clang__) && defined(__has_attribute) 111 # if __has_attribute(no_sanitize) 112 # define PUGI__UNSIGNED_OVERFLOW __attribute__((no_sanitize("unsigned-integer-overflow"))) 113 # else 114 # define PUGI__UNSIGNED_OVERFLOW 115 # endif 116 #else 117 # define PUGI__UNSIGNED_OVERFLOW 118 #endif 119 120 // Borland C++ bug workaround for not defining ::memcpy depending on header include order (can't always use std::memcpy because some compilers don't have it at all) 121 #if defined(__BORLANDC__) && !defined(__MEM_H_USING_LIST) 122 using std::memcpy; 123 using std::memmove; 124 using std::memset; 125 #endif 126 127 // Some MinGW/GCC versions have headers that erroneously omit LLONG_MIN/LLONG_MAX/ULLONG_MAX definitions from limits.h in some configurations 128 #if defined(PUGIXML_HAS_LONG_LONG) && defined(__GNUC__) && !defined(LLONG_MAX) && !defined(LLONG_MIN) && !defined(ULLONG_MAX) 129 # define LLONG_MIN (-LLONG_MAX - 1LL) 130 # define LLONG_MAX __LONG_LONG_MAX__ 131 # define ULLONG_MAX (LLONG_MAX * 2ULL + 1ULL) 132 #endif 133 134 // In some environments MSVC is a compiler but the CRT lacks certain MSVC-specific features 135 #if defined(_MSC_VER) && !defined(__S3E__) 136 # define PUGI__MSVC_CRT_VERSION _MSC_VER 137 #endif 138 139 // Not all platforms have snprintf; we define a wrapper that uses snprintf if possible. This only works with buffers with a known size. 140 #if __cplusplus >= 201103 141 # define PUGI__SNPRINTF(buf, ...) snprintf(buf, sizeof(buf), __VA_ARGS__) 142 #elif defined(PUGI__MSVC_CRT_VERSION) && PUGI__MSVC_CRT_VERSION >= 1400 143 # define PUGI__SNPRINTF(buf, ...) _snprintf_s(buf, _countof(buf), _TRUNCATE, __VA_ARGS__) 144 #else 145 # define PUGI__SNPRINTF sprintf 146 #endif 147 148 // We put implementation details into an anonymous namespace in source mode, but have to keep it in non-anonymous namespace in header-only mode to prevent binary bloat. 149 #ifdef PUGIXML_HEADER_ONLY 150 # define PUGI__NS_BEGIN namespace pugi { namespace impl { 151 # define PUGI__NS_END } } 152 # define PUGI__FN inline 153 # define PUGI__FN_NO_INLINE inline 154 #else 155 # if defined(_MSC_VER) && _MSC_VER < 1300 // MSVC6 seems to have an amusing bug with anonymous namespaces inside namespaces 156 # define PUGI__NS_BEGIN namespace pugi { namespace impl { 157 # define PUGI__NS_END } } 158 # else 159 # define PUGI__NS_BEGIN namespace pugi { namespace impl { namespace { 160 # define PUGI__NS_END } } } 161 # endif 162 # define PUGI__FN 163 # define PUGI__FN_NO_INLINE PUGI__NO_INLINE 164 #endif 165 166 // uintptr_t 167 #if (defined(_MSC_VER) && _MSC_VER < 1600) || (defined(__BORLANDC__) && __BORLANDC__ < 0x561) 168 namespace pugi 169 { 170 # ifndef _UINTPTR_T_DEFINED 171 typedef size_t uintptr_t; 172 # endif 173 174 typedef unsigned __int8 uint8_t; 175 typedef unsigned __int16 uint16_t; 176 typedef unsigned __int32 uint32_t; 177 } 178 #else 179 # include <stdint.h> 180 #endif 181 182 // Memory allocation 183 PUGI__NS_BEGIN default_allocate(size_t size)184 PUGI__FN void* default_allocate(size_t size) 185 { 186 return malloc(size); 187 } 188 default_deallocate(void * ptr)189 PUGI__FN void default_deallocate(void* ptr) 190 { 191 free(ptr); 192 } 193 194 template <typename T> 195 struct xml_memory_management_function_storage 196 { 197 static allocation_function allocate; 198 static deallocation_function deallocate; 199 }; 200 201 // Global allocation functions are stored in class statics so that in header mode linker deduplicates them 202 // Without a template<> we'll get multiple definitions of the same static 203 template <typename T> allocation_function xml_memory_management_function_storage<T>::allocate = default_allocate; 204 template <typename T> deallocation_function xml_memory_management_function_storage<T>::deallocate = default_deallocate; 205 206 typedef xml_memory_management_function_storage<int> xml_memory; 207 PUGI__NS_END 208 209 // String utilities 210 PUGI__NS_BEGIN 211 // Get string length strlength(const char_t * s)212 PUGI__FN size_t strlength(const char_t* s) 213 { 214 assert(s); 215 216 #ifdef PUGIXML_WCHAR_MODE 217 return wcslen(s); 218 #else 219 return strlen(s); 220 #endif 221 } 222 223 // Compare two strings strequal(const char_t * src,const char_t * dst)224 PUGI__FN bool strequal(const char_t* src, const char_t* dst) 225 { 226 assert(src && dst); 227 228 #ifdef PUGIXML_WCHAR_MODE 229 return wcscmp(src, dst) == 0; 230 #else 231 return strcmp(src, dst) == 0; 232 #endif 233 } 234 235 // Compare lhs with [rhs_begin, rhs_end) strequalrange(const char_t * lhs,const char_t * rhs,size_t count)236 PUGI__FN bool strequalrange(const char_t* lhs, const char_t* rhs, size_t count) 237 { 238 for (size_t i = 0; i < count; ++i) 239 if (lhs[i] != rhs[i]) 240 return false; 241 242 return lhs[count] == 0; 243 } 244 245 // Get length of wide string, even if CRT lacks wide character support strlength_wide(const wchar_t * s)246 PUGI__FN size_t strlength_wide(const wchar_t* s) 247 { 248 assert(s); 249 250 #ifdef PUGIXML_WCHAR_MODE 251 return wcslen(s); 252 #else 253 const wchar_t* end = s; 254 while (*end) end++; 255 return static_cast<size_t>(end - s); 256 #endif 257 } 258 PUGI__NS_END 259 260 // auto_ptr-like object for exception recovery 261 PUGI__NS_BEGIN 262 template <typename T> struct auto_deleter 263 { 264 typedef void (*D)(T*); 265 266 T* data; 267 D deleter; 268 auto_deleterauto_deleter269 auto_deleter(T* data_, D deleter_): data(data_), deleter(deleter_) 270 { 271 } 272 ~auto_deleterauto_deleter273 ~auto_deleter() 274 { 275 if (data) deleter(data); 276 } 277 releaseauto_deleter278 T* release() 279 { 280 T* result = data; 281 data = 0; 282 return result; 283 } 284 }; 285 PUGI__NS_END 286 287 #ifdef PUGIXML_COMPACT 288 PUGI__NS_BEGIN 289 class compact_hash_table 290 { 291 public: compact_hash_table()292 compact_hash_table(): _items(0), _capacity(0), _count(0) 293 { 294 } 295 clear()296 void clear() 297 { 298 if (_items) 299 { 300 xml_memory::deallocate(_items); 301 _items = 0; 302 _capacity = 0; 303 _count = 0; 304 } 305 } 306 find(const void * key)307 void* find(const void* key) 308 { 309 if (_capacity == 0) return 0; 310 311 item_t* item = get_item(key); 312 assert(item); 313 assert(item->key == key || (item->key == 0 && item->value == 0)); 314 315 return item->value; 316 } 317 insert(const void * key,void * value)318 void insert(const void* key, void* value) 319 { 320 assert(_capacity != 0 && _count < _capacity - _capacity / 4); 321 322 item_t* item = get_item(key); 323 assert(item); 324 325 if (item->key == 0) 326 { 327 _count++; 328 item->key = key; 329 } 330 331 item->value = value; 332 } 333 reserve(size_t extra=16)334 bool reserve(size_t extra = 16) 335 { 336 if (_count + extra >= _capacity - _capacity / 4) 337 return rehash(_count + extra); 338 339 return true; 340 } 341 342 private: 343 struct item_t 344 { 345 const void* key; 346 void* value; 347 }; 348 349 item_t* _items; 350 size_t _capacity; 351 352 size_t _count; 353 354 bool rehash(size_t count); 355 get_item(const void * key)356 item_t* get_item(const void* key) 357 { 358 assert(key); 359 assert(_capacity > 0); 360 361 size_t hashmod = _capacity - 1; 362 size_t bucket = hash(key) & hashmod; 363 364 for (size_t probe = 0; probe <= hashmod; ++probe) 365 { 366 item_t& probe_item = _items[bucket]; 367 368 if (probe_item.key == key || probe_item.key == 0) 369 return &probe_item; 370 371 // hash collision, quadratic probing 372 bucket = (bucket + probe + 1) & hashmod; 373 } 374 375 assert(false && "Hash table is full"); // unreachable 376 return 0; 377 } 378 hash(const void * key)379 static PUGI__UNSIGNED_OVERFLOW unsigned int hash(const void* key) 380 { 381 unsigned int h = static_cast<unsigned int>(reinterpret_cast<uintptr_t>(key)); 382 383 // MurmurHash3 32-bit finalizer 384 h ^= h >> 16; 385 h *= 0x85ebca6bu; 386 h ^= h >> 13; 387 h *= 0xc2b2ae35u; 388 h ^= h >> 16; 389 390 return h; 391 } 392 }; 393 rehash(size_t count)394 PUGI__FN_NO_INLINE bool compact_hash_table::rehash(size_t count) 395 { 396 size_t capacity = 32; 397 while (count >= capacity - capacity / 4) 398 capacity *= 2; 399 400 compact_hash_table rt; 401 rt._capacity = capacity; 402 rt._items = static_cast<item_t*>(xml_memory::allocate(sizeof(item_t) * capacity)); 403 404 if (!rt._items) 405 return false; 406 407 memset(rt._items, 0, sizeof(item_t) * capacity); 408 409 for (size_t i = 0; i < _capacity; ++i) 410 if (_items[i].key) 411 rt.insert(_items[i].key, _items[i].value); 412 413 if (_items) 414 xml_memory::deallocate(_items); 415 416 _capacity = capacity; 417 _items = rt._items; 418 419 assert(_count == rt._count); 420 421 return true; 422 } 423 424 PUGI__NS_END 425 #endif 426 427 PUGI__NS_BEGIN 428 #ifdef PUGIXML_COMPACT 429 static const uintptr_t xml_memory_block_alignment = 4; 430 #else 431 static const uintptr_t xml_memory_block_alignment = sizeof(void*); 432 #endif 433 434 // extra metadata bits 435 static const uintptr_t xml_memory_page_contents_shared_mask = 64; 436 static const uintptr_t xml_memory_page_name_allocated_mask = 32; 437 static const uintptr_t xml_memory_page_value_allocated_mask = 16; 438 static const uintptr_t xml_memory_page_type_mask = 15; 439 440 // combined masks for string uniqueness 441 static const uintptr_t xml_memory_page_name_allocated_or_shared_mask = xml_memory_page_name_allocated_mask | xml_memory_page_contents_shared_mask; 442 static const uintptr_t xml_memory_page_value_allocated_or_shared_mask = xml_memory_page_value_allocated_mask | xml_memory_page_contents_shared_mask; 443 444 #ifdef PUGIXML_COMPACT 445 #define PUGI__GETHEADER_IMPL(object, page, flags) // unused 446 #define PUGI__GETPAGE_IMPL(header) (header).get_page() 447 #else 448 #define PUGI__GETHEADER_IMPL(object, page, flags) (((reinterpret_cast<char*>(object) - reinterpret_cast<char*>(page)) << 8) | (flags)) 449 // this macro casts pointers through void* to avoid 'cast increases required alignment of target type' warnings 450 #define PUGI__GETPAGE_IMPL(header) static_cast<impl::xml_memory_page*>(const_cast<void*>(static_cast<const void*>(reinterpret_cast<const char*>(&header) - (header >> 8)))) 451 #endif 452 453 #define PUGI__GETPAGE(n) PUGI__GETPAGE_IMPL((n)->header) 454 #define PUGI__NODETYPE(n) static_cast<xml_node_type>((n)->header & impl::xml_memory_page_type_mask) 455 456 struct xml_allocator; 457 458 struct xml_memory_page 459 { constructxml_memory_page460 static xml_memory_page* construct(void* memory) 461 { 462 xml_memory_page* result = static_cast<xml_memory_page*>(memory); 463 464 result->allocator = 0; 465 result->prev = 0; 466 result->next = 0; 467 result->busy_size = 0; 468 result->freed_size = 0; 469 470 #ifdef PUGIXML_COMPACT 471 result->compact_string_base = 0; 472 result->compact_shared_parent = 0; 473 result->compact_page_marker = 0; 474 #endif 475 476 return result; 477 } 478 479 xml_allocator* allocator; 480 481 xml_memory_page* prev; 482 xml_memory_page* next; 483 484 size_t busy_size; 485 size_t freed_size; 486 487 #ifdef PUGIXML_COMPACT 488 char_t* compact_string_base; 489 void* compact_shared_parent; 490 uint32_t* compact_page_marker; 491 #endif 492 }; 493 494 static const size_t xml_memory_page_size = 495 #ifdef PUGIXML_MEMORY_PAGE_SIZE 496 (PUGIXML_MEMORY_PAGE_SIZE) 497 #else 498 32768 499 #endif 500 - sizeof(xml_memory_page); 501 502 struct xml_memory_string_header 503 { 504 uint16_t page_offset; // offset from page->data 505 uint16_t full_size; // 0 if string occupies whole page 506 }; 507 508 struct xml_allocator 509 { xml_allocatorxml_allocator510 xml_allocator(xml_memory_page* root): _root(root), _busy_size(root->busy_size) 511 { 512 #ifdef PUGIXML_COMPACT 513 _hash = 0; 514 #endif 515 } 516 allocate_pagexml_allocator517 xml_memory_page* allocate_page(size_t data_size) 518 { 519 size_t size = sizeof(xml_memory_page) + data_size; 520 521 // allocate block with some alignment, leaving memory for worst-case padding 522 void* memory = xml_memory::allocate(size); 523 if (!memory) return 0; 524 525 // prepare page structure 526 xml_memory_page* page = xml_memory_page::construct(memory); 527 assert(page); 528 529 page->allocator = _root->allocator; 530 531 return page; 532 } 533 deallocate_pagexml_allocator534 static void deallocate_page(xml_memory_page* page) 535 { 536 xml_memory::deallocate(page); 537 } 538 539 void* allocate_memory_oob(size_t size, xml_memory_page*& out_page); 540 allocate_memoryxml_allocator541 void* allocate_memory(size_t size, xml_memory_page*& out_page) 542 { 543 if (PUGI__UNLIKELY(_busy_size + size > xml_memory_page_size)) 544 return allocate_memory_oob(size, out_page); 545 546 void* buf = reinterpret_cast<char*>(_root) + sizeof(xml_memory_page) + _busy_size; 547 548 _busy_size += size; 549 550 out_page = _root; 551 552 return buf; 553 } 554 555 #ifdef PUGIXML_COMPACT allocate_objectxml_allocator556 void* allocate_object(size_t size, xml_memory_page*& out_page) 557 { 558 void* result = allocate_memory(size + sizeof(uint32_t), out_page); 559 if (!result) return 0; 560 561 // adjust for marker 562 ptrdiff_t offset = static_cast<char*>(result) - reinterpret_cast<char*>(out_page->compact_page_marker); 563 564 if (PUGI__UNLIKELY(static_cast<uintptr_t>(offset) >= 256 * xml_memory_block_alignment)) 565 { 566 // insert new marker 567 uint32_t* marker = static_cast<uint32_t*>(result); 568 569 *marker = static_cast<uint32_t>(reinterpret_cast<char*>(marker) - reinterpret_cast<char*>(out_page)); 570 out_page->compact_page_marker = marker; 571 572 // since we don't reuse the page space until we reallocate it, we can just pretend that we freed the marker block 573 // this will make sure deallocate_memory correctly tracks the size 574 out_page->freed_size += sizeof(uint32_t); 575 576 return marker + 1; 577 } 578 else 579 { 580 // roll back uint32_t part 581 _busy_size -= sizeof(uint32_t); 582 583 return result; 584 } 585 } 586 #else allocate_objectxml_allocator587 void* allocate_object(size_t size, xml_memory_page*& out_page) 588 { 589 return allocate_memory(size, out_page); 590 } 591 #endif 592 deallocate_memoryxml_allocator593 void deallocate_memory(void* ptr, size_t size, xml_memory_page* page) 594 { 595 if (page == _root) page->busy_size = _busy_size; 596 597 assert(ptr >= reinterpret_cast<char*>(page) + sizeof(xml_memory_page) && ptr < reinterpret_cast<char*>(page) + sizeof(xml_memory_page) + page->busy_size); 598 (void)!ptr; 599 600 page->freed_size += size; 601 assert(page->freed_size <= page->busy_size); 602 603 if (page->freed_size == page->busy_size) 604 { 605 if (page->next == 0) 606 { 607 assert(_root == page); 608 609 // top page freed, just reset sizes 610 page->busy_size = 0; 611 page->freed_size = 0; 612 613 #ifdef PUGIXML_COMPACT 614 // reset compact state to maximize efficiency 615 page->compact_string_base = 0; 616 page->compact_shared_parent = 0; 617 page->compact_page_marker = 0; 618 #endif 619 620 _busy_size = 0; 621 } 622 else 623 { 624 assert(_root != page); 625 assert(page->prev); 626 627 // remove from the list 628 page->prev->next = page->next; 629 page->next->prev = page->prev; 630 631 // deallocate 632 deallocate_page(page); 633 } 634 } 635 } 636 allocate_stringxml_allocator637 char_t* allocate_string(size_t length) 638 { 639 static const size_t max_encoded_offset = (1 << 16) * xml_memory_block_alignment; 640 641 PUGI__STATIC_ASSERT(xml_memory_page_size <= max_encoded_offset); 642 643 // allocate memory for string and header block 644 size_t size = sizeof(xml_memory_string_header) + length * sizeof(char_t); 645 646 // round size up to block alignment boundary 647 size_t full_size = (size + (xml_memory_block_alignment - 1)) & ~(xml_memory_block_alignment - 1); 648 649 xml_memory_page* page; 650 xml_memory_string_header* header = static_cast<xml_memory_string_header*>(allocate_memory(full_size, page)); 651 652 if (!header) return 0; 653 654 // setup header 655 ptrdiff_t page_offset = reinterpret_cast<char*>(header) - reinterpret_cast<char*>(page) - sizeof(xml_memory_page); 656 657 assert(page_offset % xml_memory_block_alignment == 0); 658 assert(page_offset >= 0 && static_cast<size_t>(page_offset) < max_encoded_offset); 659 header->page_offset = static_cast<uint16_t>(static_cast<size_t>(page_offset) / xml_memory_block_alignment); 660 661 // full_size == 0 for large strings that occupy the whole page 662 assert(full_size % xml_memory_block_alignment == 0); 663 assert(full_size < max_encoded_offset || (page->busy_size == full_size && page_offset == 0)); 664 header->full_size = static_cast<uint16_t>(full_size < max_encoded_offset ? full_size / xml_memory_block_alignment : 0); 665 666 // round-trip through void* to avoid 'cast increases required alignment of target type' warning 667 // header is guaranteed a pointer-sized alignment, which should be enough for char_t 668 return static_cast<char_t*>(static_cast<void*>(header + 1)); 669 } 670 deallocate_stringxml_allocator671 void deallocate_string(char_t* string) 672 { 673 // this function casts pointers through void* to avoid 'cast increases required alignment of target type' warnings 674 // we're guaranteed the proper (pointer-sized) alignment on the input string if it was allocated via allocate_string 675 676 // get header 677 xml_memory_string_header* header = static_cast<xml_memory_string_header*>(static_cast<void*>(string)) - 1; 678 assert(header); 679 680 // deallocate 681 size_t page_offset = sizeof(xml_memory_page) + header->page_offset * xml_memory_block_alignment; 682 xml_memory_page* page = reinterpret_cast<xml_memory_page*>(static_cast<void*>(reinterpret_cast<char*>(header) - page_offset)); 683 684 // if full_size == 0 then this string occupies the whole page 685 size_t full_size = header->full_size == 0 ? page->busy_size : header->full_size * xml_memory_block_alignment; 686 687 deallocate_memory(header, full_size, page); 688 } 689 reservexml_allocator690 bool reserve() 691 { 692 #ifdef PUGIXML_COMPACT 693 return _hash->reserve(); 694 #else 695 return true; 696 #endif 697 } 698 699 xml_memory_page* _root; 700 size_t _busy_size; 701 702 #ifdef PUGIXML_COMPACT 703 compact_hash_table* _hash; 704 #endif 705 }; 706 allocate_memory_oob(size_t size,xml_memory_page * & out_page)707 PUGI__FN_NO_INLINE void* xml_allocator::allocate_memory_oob(size_t size, xml_memory_page*& out_page) 708 { 709 const size_t large_allocation_threshold = xml_memory_page_size / 4; 710 711 xml_memory_page* page = allocate_page(size <= large_allocation_threshold ? xml_memory_page_size : size); 712 out_page = page; 713 714 if (!page) return 0; 715 716 if (size <= large_allocation_threshold) 717 { 718 _root->busy_size = _busy_size; 719 720 // insert page at the end of linked list 721 page->prev = _root; 722 _root->next = page; 723 _root = page; 724 725 _busy_size = size; 726 } 727 else 728 { 729 // insert page before the end of linked list, so that it is deleted as soon as possible 730 // the last page is not deleted even if it's empty (see deallocate_memory) 731 assert(_root->prev); 732 733 page->prev = _root->prev; 734 page->next = _root; 735 736 _root->prev->next = page; 737 _root->prev = page; 738 739 page->busy_size = size; 740 } 741 742 return reinterpret_cast<char*>(page) + sizeof(xml_memory_page); 743 } 744 PUGI__NS_END 745 746 #ifdef PUGIXML_COMPACT 747 PUGI__NS_BEGIN 748 static const uintptr_t compact_alignment_log2 = 2; 749 static const uintptr_t compact_alignment = 1 << compact_alignment_log2; 750 751 class compact_header 752 { 753 public: compact_header(xml_memory_page * page,unsigned int flags)754 compact_header(xml_memory_page* page, unsigned int flags) 755 { 756 PUGI__STATIC_ASSERT(xml_memory_block_alignment == compact_alignment); 757 758 ptrdiff_t offset = (reinterpret_cast<char*>(this) - reinterpret_cast<char*>(page->compact_page_marker)); 759 assert(offset % compact_alignment == 0 && static_cast<uintptr_t>(offset) < 256 * compact_alignment); 760 761 _page = static_cast<unsigned char>(offset >> compact_alignment_log2); 762 _flags = static_cast<unsigned char>(flags); 763 } 764 operator &=(uintptr_t mod)765 void operator&=(uintptr_t mod) 766 { 767 _flags &= static_cast<unsigned char>(mod); 768 } 769 operator |=(uintptr_t mod)770 void operator|=(uintptr_t mod) 771 { 772 _flags |= static_cast<unsigned char>(mod); 773 } 774 operator &(uintptr_t mod) const775 uintptr_t operator&(uintptr_t mod) const 776 { 777 return _flags & mod; 778 } 779 get_page() const780 xml_memory_page* get_page() const 781 { 782 // round-trip through void* to silence 'cast increases required alignment of target type' warnings 783 const char* page_marker = reinterpret_cast<const char*>(this) - (_page << compact_alignment_log2); 784 const char* page = page_marker - *reinterpret_cast<const uint32_t*>(static_cast<const void*>(page_marker)); 785 786 return const_cast<xml_memory_page*>(reinterpret_cast<const xml_memory_page*>(static_cast<const void*>(page))); 787 } 788 789 private: 790 unsigned char _page; 791 unsigned char _flags; 792 }; 793 compact_get_page(const void * object,int header_offset)794 PUGI__FN xml_memory_page* compact_get_page(const void* object, int header_offset) 795 { 796 const compact_header* header = reinterpret_cast<const compact_header*>(static_cast<const char*>(object) - header_offset); 797 798 return header->get_page(); 799 } 800 compact_get_value(const void * object)801 template <int header_offset, typename T> PUGI__FN_NO_INLINE T* compact_get_value(const void* object) 802 { 803 return static_cast<T*>(compact_get_page(object, header_offset)->allocator->_hash->find(object)); 804 } 805 compact_set_value(const void * object,T * value)806 template <int header_offset, typename T> PUGI__FN_NO_INLINE void compact_set_value(const void* object, T* value) 807 { 808 compact_get_page(object, header_offset)->allocator->_hash->insert(object, value); 809 } 810 811 template <typename T, int header_offset, int start = -126> class compact_pointer 812 { 813 public: compact_pointer()814 compact_pointer(): _data(0) 815 { 816 } 817 operator =(const compact_pointer & rhs)818 void operator=(const compact_pointer& rhs) 819 { 820 *this = rhs + 0; 821 } 822 operator =(T * value)823 void operator=(T* value) 824 { 825 if (value) 826 { 827 // value is guaranteed to be compact-aligned; 'this' is not 828 // our decoding is based on 'this' aligned to compact alignment downwards (see operator T*) 829 // so for negative offsets (e.g. -3) we need to adjust the diff by compact_alignment - 1 to 830 // compensate for arithmetic shift rounding for negative values 831 ptrdiff_t diff = reinterpret_cast<char*>(value) - reinterpret_cast<char*>(this); 832 ptrdiff_t offset = ((diff + int(compact_alignment - 1)) >> compact_alignment_log2) - start; 833 834 if (static_cast<uintptr_t>(offset) <= 253) 835 _data = static_cast<unsigned char>(offset + 1); 836 else 837 { 838 compact_set_value<header_offset>(this, value); 839 840 _data = 255; 841 } 842 } 843 else 844 _data = 0; 845 } 846 operator T*() const847 operator T*() const 848 { 849 if (_data) 850 { 851 if (_data < 255) 852 { 853 uintptr_t base = reinterpret_cast<uintptr_t>(this) & ~(compact_alignment - 1); 854 855 return reinterpret_cast<T*>(base + (_data - 1 + start) * compact_alignment); 856 } 857 else 858 return compact_get_value<header_offset, T>(this); 859 } 860 else 861 return 0; 862 } 863 operator ->() const864 T* operator->() const 865 { 866 return *this; 867 } 868 869 private: 870 unsigned char _data; 871 }; 872 873 template <typename T, int header_offset> class compact_pointer_parent 874 { 875 public: compact_pointer_parent()876 compact_pointer_parent(): _data(0) 877 { 878 } 879 operator =(const compact_pointer_parent & rhs)880 void operator=(const compact_pointer_parent& rhs) 881 { 882 *this = rhs + 0; 883 } 884 operator =(T * value)885 void operator=(T* value) 886 { 887 if (value) 888 { 889 // value is guaranteed to be compact-aligned; 'this' is not 890 // our decoding is based on 'this' aligned to compact alignment downwards (see operator T*) 891 // so for negative offsets (e.g. -3) we need to adjust the diff by compact_alignment - 1 to 892 // compensate for arithmetic shift behavior for negative values 893 ptrdiff_t diff = reinterpret_cast<char*>(value) - reinterpret_cast<char*>(this); 894 ptrdiff_t offset = ((diff + int(compact_alignment - 1)) >> compact_alignment_log2) + 65533; 895 896 if (static_cast<uintptr_t>(offset) <= 65533) 897 { 898 _data = static_cast<unsigned short>(offset + 1); 899 } 900 else 901 { 902 xml_memory_page* page = compact_get_page(this, header_offset); 903 904 if (PUGI__UNLIKELY(page->compact_shared_parent == 0)) 905 page->compact_shared_parent = value; 906 907 if (page->compact_shared_parent == value) 908 { 909 _data = 65534; 910 } 911 else 912 { 913 compact_set_value<header_offset>(this, value); 914 915 _data = 65535; 916 } 917 } 918 } 919 else 920 { 921 _data = 0; 922 } 923 } 924 operator T*() const925 operator T*() const 926 { 927 if (_data) 928 { 929 if (_data < 65534) 930 { 931 uintptr_t base = reinterpret_cast<uintptr_t>(this) & ~(compact_alignment - 1); 932 933 return reinterpret_cast<T*>(base + (_data - 1 - 65533) * compact_alignment); 934 } 935 else if (_data == 65534) 936 return static_cast<T*>(compact_get_page(this, header_offset)->compact_shared_parent); 937 else 938 return compact_get_value<header_offset, T>(this); 939 } 940 else 941 return 0; 942 } 943 operator ->() const944 T* operator->() const 945 { 946 return *this; 947 } 948 949 private: 950 uint16_t _data; 951 }; 952 953 template <int header_offset, int base_offset> class compact_string 954 { 955 public: compact_string()956 compact_string(): _data(0) 957 { 958 } 959 operator =(const compact_string & rhs)960 void operator=(const compact_string& rhs) 961 { 962 *this = rhs + 0; 963 } 964 operator =(char_t * value)965 void operator=(char_t* value) 966 { 967 if (value) 968 { 969 xml_memory_page* page = compact_get_page(this, header_offset); 970 971 if (PUGI__UNLIKELY(page->compact_string_base == 0)) 972 page->compact_string_base = value; 973 974 ptrdiff_t offset = value - page->compact_string_base; 975 976 if (static_cast<uintptr_t>(offset) < (65535 << 7)) 977 { 978 // round-trip through void* to silence 'cast increases required alignment of target type' warnings 979 uint16_t* base = reinterpret_cast<uint16_t*>(static_cast<void*>(reinterpret_cast<char*>(this) - base_offset)); 980 981 if (*base == 0) 982 { 983 *base = static_cast<uint16_t>((offset >> 7) + 1); 984 _data = static_cast<unsigned char>((offset & 127) + 1); 985 } 986 else 987 { 988 ptrdiff_t remainder = offset - ((*base - 1) << 7); 989 990 if (static_cast<uintptr_t>(remainder) <= 253) 991 { 992 _data = static_cast<unsigned char>(remainder + 1); 993 } 994 else 995 { 996 compact_set_value<header_offset>(this, value); 997 998 _data = 255; 999 } 1000 } 1001 } 1002 else 1003 { 1004 compact_set_value<header_offset>(this, value); 1005 1006 _data = 255; 1007 } 1008 } 1009 else 1010 { 1011 _data = 0; 1012 } 1013 } 1014 operator char_t*() const1015 operator char_t*() const 1016 { 1017 if (_data) 1018 { 1019 if (_data < 255) 1020 { 1021 xml_memory_page* page = compact_get_page(this, header_offset); 1022 1023 // round-trip through void* to silence 'cast increases required alignment of target type' warnings 1024 const uint16_t* base = reinterpret_cast<const uint16_t*>(static_cast<const void*>(reinterpret_cast<const char*>(this) - base_offset)); 1025 assert(*base); 1026 1027 ptrdiff_t offset = ((*base - 1) << 7) + (_data - 1); 1028 1029 return page->compact_string_base + offset; 1030 } 1031 else 1032 { 1033 return compact_get_value<header_offset, char_t>(this); 1034 } 1035 } 1036 else 1037 return 0; 1038 } 1039 1040 private: 1041 unsigned char _data; 1042 }; 1043 PUGI__NS_END 1044 #endif 1045 1046 #ifdef PUGIXML_COMPACT 1047 namespace pugi 1048 { 1049 struct xml_attribute_struct 1050 { xml_attribute_structpugi::xml_attribute_struct1051 xml_attribute_struct(impl::xml_memory_page* page): header(page, 0), namevalue_base(0) 1052 { 1053 PUGI__STATIC_ASSERT(sizeof(xml_attribute_struct) == 8); 1054 } 1055 1056 impl::compact_header header; 1057 1058 uint16_t namevalue_base; 1059 1060 impl::compact_string<4, 2> name; 1061 impl::compact_string<5, 3> value; 1062 1063 impl::compact_pointer<xml_attribute_struct, 6> prev_attribute_c; 1064 impl::compact_pointer<xml_attribute_struct, 7, 0> next_attribute; 1065 }; 1066 1067 struct xml_node_struct 1068 { xml_node_structpugi::xml_node_struct1069 xml_node_struct(impl::xml_memory_page* page, xml_node_type type): header(page, type), namevalue_base(0) 1070 { 1071 PUGI__STATIC_ASSERT(sizeof(xml_node_struct) == 12); 1072 } 1073 1074 impl::compact_header header; 1075 1076 uint16_t namevalue_base; 1077 1078 impl::compact_string<4, 2> name; 1079 impl::compact_string<5, 3> value; 1080 1081 impl::compact_pointer_parent<xml_node_struct, 6> parent; 1082 1083 impl::compact_pointer<xml_node_struct, 8, 0> first_child; 1084 1085 impl::compact_pointer<xml_node_struct, 9> prev_sibling_c; 1086 impl::compact_pointer<xml_node_struct, 10, 0> next_sibling; 1087 1088 impl::compact_pointer<xml_attribute_struct, 11, 0> first_attribute; 1089 }; 1090 } 1091 #else 1092 namespace pugi 1093 { 1094 struct xml_attribute_struct 1095 { 1096 xml_attribute_struct(impl::xml_memory_page* page): name(0), value(0), prev_attribute_c(0), next_attribute(0) 1097 { 1098 header = PUGI__GETHEADER_IMPL(this, page, 0); 1099 } 1100 1101 uintptr_t header; 1102 1103 char_t* name; 1104 char_t* value; 1105 1106 xml_attribute_struct* prev_attribute_c; 1107 xml_attribute_struct* next_attribute; 1108 }; 1109 1110 struct xml_node_struct 1111 { 1112 xml_node_struct(impl::xml_memory_page* page, xml_node_type type): name(0), value(0), parent(0), first_child(0), prev_sibling_c(0), next_sibling(0), first_attribute(0) 1113 { 1114 header = PUGI__GETHEADER_IMPL(this, page, type); 1115 } 1116 1117 uintptr_t header; 1118 1119 char_t* name; 1120 char_t* value; 1121 1122 xml_node_struct* parent; 1123 1124 xml_node_struct* first_child; 1125 1126 xml_node_struct* prev_sibling_c; 1127 xml_node_struct* next_sibling; 1128 1129 xml_attribute_struct* first_attribute; 1130 }; 1131 } 1132 #endif 1133 1134 PUGI__NS_BEGIN 1135 struct xml_extra_buffer 1136 { 1137 char_t* buffer; 1138 xml_extra_buffer* next; 1139 }; 1140 1141 struct xml_document_struct: public xml_node_struct, public xml_allocator 1142 { xml_document_structxml_document_struct1143 xml_document_struct(xml_memory_page* page): xml_node_struct(page, node_document), xml_allocator(page), buffer(0), extra_buffers(0) 1144 { 1145 } 1146 1147 const char_t* buffer; 1148 1149 xml_extra_buffer* extra_buffers; 1150 1151 #ifdef PUGIXML_COMPACT 1152 compact_hash_table hash; 1153 #endif 1154 }; 1155 get_allocator(const Object * object)1156 template <typename Object> inline xml_allocator& get_allocator(const Object* object) 1157 { 1158 assert(object); 1159 1160 return *PUGI__GETPAGE(object)->allocator; 1161 } 1162 get_document(const Object * object)1163 template <typename Object> inline xml_document_struct& get_document(const Object* object) 1164 { 1165 assert(object); 1166 1167 return *static_cast<xml_document_struct*>(PUGI__GETPAGE(object)->allocator); 1168 } 1169 PUGI__NS_END 1170 1171 // Low-level DOM operations 1172 PUGI__NS_BEGIN allocate_attribute(xml_allocator & alloc)1173 inline xml_attribute_struct* allocate_attribute(xml_allocator& alloc) 1174 { 1175 xml_memory_page* page; 1176 void* memory = alloc.allocate_object(sizeof(xml_attribute_struct), page); 1177 if (!memory) return 0; 1178 1179 return new (memory) xml_attribute_struct(page); 1180 } 1181 allocate_node(xml_allocator & alloc,xml_node_type type)1182 inline xml_node_struct* allocate_node(xml_allocator& alloc, xml_node_type type) 1183 { 1184 xml_memory_page* page; 1185 void* memory = alloc.allocate_object(sizeof(xml_node_struct), page); 1186 if (!memory) return 0; 1187 1188 return new (memory) xml_node_struct(page, type); 1189 } 1190 destroy_attribute(xml_attribute_struct * a,xml_allocator & alloc)1191 inline void destroy_attribute(xml_attribute_struct* a, xml_allocator& alloc) 1192 { 1193 if (a->header & impl::xml_memory_page_name_allocated_mask) 1194 alloc.deallocate_string(a->name); 1195 1196 if (a->header & impl::xml_memory_page_value_allocated_mask) 1197 alloc.deallocate_string(a->value); 1198 1199 alloc.deallocate_memory(a, sizeof(xml_attribute_struct), PUGI__GETPAGE(a)); 1200 } 1201 destroy_node(xml_node_struct * n,xml_allocator & alloc)1202 inline void destroy_node(xml_node_struct* n, xml_allocator& alloc) 1203 { 1204 if (n->header & impl::xml_memory_page_name_allocated_mask) 1205 alloc.deallocate_string(n->name); 1206 1207 if (n->header & impl::xml_memory_page_value_allocated_mask) 1208 alloc.deallocate_string(n->value); 1209 1210 for (xml_attribute_struct* attr = n->first_attribute; attr; ) 1211 { 1212 xml_attribute_struct* next = attr->next_attribute; 1213 1214 destroy_attribute(attr, alloc); 1215 1216 attr = next; 1217 } 1218 1219 for (xml_node_struct* child = n->first_child; child; ) 1220 { 1221 xml_node_struct* next = child->next_sibling; 1222 1223 destroy_node(child, alloc); 1224 1225 child = next; 1226 } 1227 1228 alloc.deallocate_memory(n, sizeof(xml_node_struct), PUGI__GETPAGE(n)); 1229 } 1230 append_node(xml_node_struct * child,xml_node_struct * node)1231 inline void append_node(xml_node_struct* child, xml_node_struct* node) 1232 { 1233 child->parent = node; 1234 1235 xml_node_struct* head = node->first_child; 1236 1237 if (head) 1238 { 1239 xml_node_struct* tail = head->prev_sibling_c; 1240 1241 tail->next_sibling = child; 1242 child->prev_sibling_c = tail; 1243 head->prev_sibling_c = child; 1244 } 1245 else 1246 { 1247 node->first_child = child; 1248 child->prev_sibling_c = child; 1249 } 1250 } 1251 prepend_node(xml_node_struct * child,xml_node_struct * node)1252 inline void prepend_node(xml_node_struct* child, xml_node_struct* node) 1253 { 1254 child->parent = node; 1255 1256 xml_node_struct* head = node->first_child; 1257 1258 if (head) 1259 { 1260 child->prev_sibling_c = head->prev_sibling_c; 1261 head->prev_sibling_c = child; 1262 } 1263 else 1264 child->prev_sibling_c = child; 1265 1266 child->next_sibling = head; 1267 node->first_child = child; 1268 } 1269 insert_node_after(xml_node_struct * child,xml_node_struct * node)1270 inline void insert_node_after(xml_node_struct* child, xml_node_struct* node) 1271 { 1272 xml_node_struct* parent = node->parent; 1273 1274 child->parent = parent; 1275 1276 if (node->next_sibling) 1277 node->next_sibling->prev_sibling_c = child; 1278 else 1279 parent->first_child->prev_sibling_c = child; 1280 1281 child->next_sibling = node->next_sibling; 1282 child->prev_sibling_c = node; 1283 1284 node->next_sibling = child; 1285 } 1286 insert_node_before(xml_node_struct * child,xml_node_struct * node)1287 inline void insert_node_before(xml_node_struct* child, xml_node_struct* node) 1288 { 1289 xml_node_struct* parent = node->parent; 1290 1291 child->parent = parent; 1292 1293 if (node->prev_sibling_c->next_sibling) 1294 node->prev_sibling_c->next_sibling = child; 1295 else 1296 parent->first_child = child; 1297 1298 child->prev_sibling_c = node->prev_sibling_c; 1299 child->next_sibling = node; 1300 1301 node->prev_sibling_c = child; 1302 } 1303 remove_node(xml_node_struct * node)1304 inline void remove_node(xml_node_struct* node) 1305 { 1306 xml_node_struct* parent = node->parent; 1307 1308 if (node->next_sibling) 1309 node->next_sibling->prev_sibling_c = node->prev_sibling_c; 1310 else 1311 parent->first_child->prev_sibling_c = node->prev_sibling_c; 1312 1313 if (node->prev_sibling_c->next_sibling) 1314 node->prev_sibling_c->next_sibling = node->next_sibling; 1315 else 1316 parent->first_child = node->next_sibling; 1317 1318 node->parent = 0; 1319 node->prev_sibling_c = 0; 1320 node->next_sibling = 0; 1321 } 1322 append_attribute(xml_attribute_struct * attr,xml_node_struct * node)1323 inline void append_attribute(xml_attribute_struct* attr, xml_node_struct* node) 1324 { 1325 xml_attribute_struct* head = node->first_attribute; 1326 1327 if (head) 1328 { 1329 xml_attribute_struct* tail = head->prev_attribute_c; 1330 1331 tail->next_attribute = attr; 1332 attr->prev_attribute_c = tail; 1333 head->prev_attribute_c = attr; 1334 } 1335 else 1336 { 1337 node->first_attribute = attr; 1338 attr->prev_attribute_c = attr; 1339 } 1340 } 1341 prepend_attribute(xml_attribute_struct * attr,xml_node_struct * node)1342 inline void prepend_attribute(xml_attribute_struct* attr, xml_node_struct* node) 1343 { 1344 xml_attribute_struct* head = node->first_attribute; 1345 1346 if (head) 1347 { 1348 attr->prev_attribute_c = head->prev_attribute_c; 1349 head->prev_attribute_c = attr; 1350 } 1351 else 1352 attr->prev_attribute_c = attr; 1353 1354 attr->next_attribute = head; 1355 node->first_attribute = attr; 1356 } 1357 insert_attribute_after(xml_attribute_struct * attr,xml_attribute_struct * place,xml_node_struct * node)1358 inline void insert_attribute_after(xml_attribute_struct* attr, xml_attribute_struct* place, xml_node_struct* node) 1359 { 1360 if (place->next_attribute) 1361 place->next_attribute->prev_attribute_c = attr; 1362 else 1363 node->first_attribute->prev_attribute_c = attr; 1364 1365 attr->next_attribute = place->next_attribute; 1366 attr->prev_attribute_c = place; 1367 place->next_attribute = attr; 1368 } 1369 insert_attribute_before(xml_attribute_struct * attr,xml_attribute_struct * place,xml_node_struct * node)1370 inline void insert_attribute_before(xml_attribute_struct* attr, xml_attribute_struct* place, xml_node_struct* node) 1371 { 1372 if (place->prev_attribute_c->next_attribute) 1373 place->prev_attribute_c->next_attribute = attr; 1374 else 1375 node->first_attribute = attr; 1376 1377 attr->prev_attribute_c = place->prev_attribute_c; 1378 attr->next_attribute = place; 1379 place->prev_attribute_c = attr; 1380 } 1381 remove_attribute(xml_attribute_struct * attr,xml_node_struct * node)1382 inline void remove_attribute(xml_attribute_struct* attr, xml_node_struct* node) 1383 { 1384 if (attr->next_attribute) 1385 attr->next_attribute->prev_attribute_c = attr->prev_attribute_c; 1386 else 1387 node->first_attribute->prev_attribute_c = attr->prev_attribute_c; 1388 1389 if (attr->prev_attribute_c->next_attribute) 1390 attr->prev_attribute_c->next_attribute = attr->next_attribute; 1391 else 1392 node->first_attribute = attr->next_attribute; 1393 1394 attr->prev_attribute_c = 0; 1395 attr->next_attribute = 0; 1396 } 1397 append_new_node(xml_node_struct * node,xml_allocator & alloc,xml_node_type type=node_element)1398 PUGI__FN_NO_INLINE xml_node_struct* append_new_node(xml_node_struct* node, xml_allocator& alloc, xml_node_type type = node_element) 1399 { 1400 if (!alloc.reserve()) return 0; 1401 1402 xml_node_struct* child = allocate_node(alloc, type); 1403 if (!child) return 0; 1404 1405 append_node(child, node); 1406 1407 return child; 1408 } 1409 append_new_attribute(xml_node_struct * node,xml_allocator & alloc)1410 PUGI__FN_NO_INLINE xml_attribute_struct* append_new_attribute(xml_node_struct* node, xml_allocator& alloc) 1411 { 1412 if (!alloc.reserve()) return 0; 1413 1414 xml_attribute_struct* attr = allocate_attribute(alloc); 1415 if (!attr) return 0; 1416 1417 append_attribute(attr, node); 1418 1419 return attr; 1420 } 1421 PUGI__NS_END 1422 1423 // Helper classes for code generation 1424 PUGI__NS_BEGIN 1425 struct opt_false 1426 { 1427 enum { value = 0 }; 1428 }; 1429 1430 struct opt_true 1431 { 1432 enum { value = 1 }; 1433 }; 1434 PUGI__NS_END 1435 1436 // Unicode utilities 1437 PUGI__NS_BEGIN endian_swap(uint16_t value)1438 inline uint16_t endian_swap(uint16_t value) 1439 { 1440 return static_cast<uint16_t>(((value & 0xff) << 8) | (value >> 8)); 1441 } 1442 endian_swap(uint32_t value)1443 inline uint32_t endian_swap(uint32_t value) 1444 { 1445 return ((value & 0xff) << 24) | ((value & 0xff00) << 8) | ((value & 0xff0000) >> 8) | (value >> 24); 1446 } 1447 1448 struct utf8_counter 1449 { 1450 typedef size_t value_type; 1451 lowutf8_counter1452 static value_type low(value_type result, uint32_t ch) 1453 { 1454 // U+0000..U+007F 1455 if (ch < 0x80) return result + 1; 1456 // U+0080..U+07FF 1457 else if (ch < 0x800) return result + 2; 1458 // U+0800..U+FFFF 1459 else return result + 3; 1460 } 1461 highutf8_counter1462 static value_type high(value_type result, uint32_t) 1463 { 1464 // U+10000..U+10FFFF 1465 return result + 4; 1466 } 1467 }; 1468 1469 struct utf8_writer 1470 { 1471 typedef uint8_t* value_type; 1472 lowutf8_writer1473 static value_type low(value_type result, uint32_t ch) 1474 { 1475 // U+0000..U+007F 1476 if (ch < 0x80) 1477 { 1478 *result = static_cast<uint8_t>(ch); 1479 return result + 1; 1480 } 1481 // U+0080..U+07FF 1482 else if (ch < 0x800) 1483 { 1484 result[0] = static_cast<uint8_t>(0xC0 | (ch >> 6)); 1485 result[1] = static_cast<uint8_t>(0x80 | (ch & 0x3F)); 1486 return result + 2; 1487 } 1488 // U+0800..U+FFFF 1489 else 1490 { 1491 result[0] = static_cast<uint8_t>(0xE0 | (ch >> 12)); 1492 result[1] = static_cast<uint8_t>(0x80 | ((ch >> 6) & 0x3F)); 1493 result[2] = static_cast<uint8_t>(0x80 | (ch & 0x3F)); 1494 return result + 3; 1495 } 1496 } 1497 highutf8_writer1498 static value_type high(value_type result, uint32_t ch) 1499 { 1500 // U+10000..U+10FFFF 1501 result[0] = static_cast<uint8_t>(0xF0 | (ch >> 18)); 1502 result[1] = static_cast<uint8_t>(0x80 | ((ch >> 12) & 0x3F)); 1503 result[2] = static_cast<uint8_t>(0x80 | ((ch >> 6) & 0x3F)); 1504 result[3] = static_cast<uint8_t>(0x80 | (ch & 0x3F)); 1505 return result + 4; 1506 } 1507 anyutf8_writer1508 static value_type any(value_type result, uint32_t ch) 1509 { 1510 return (ch < 0x10000) ? low(result, ch) : high(result, ch); 1511 } 1512 }; 1513 1514 struct utf16_counter 1515 { 1516 typedef size_t value_type; 1517 lowutf16_counter1518 static value_type low(value_type result, uint32_t) 1519 { 1520 return result + 1; 1521 } 1522 highutf16_counter1523 static value_type high(value_type result, uint32_t) 1524 { 1525 return result + 2; 1526 } 1527 }; 1528 1529 struct utf16_writer 1530 { 1531 typedef uint16_t* value_type; 1532 lowutf16_writer1533 static value_type low(value_type result, uint32_t ch) 1534 { 1535 *result = static_cast<uint16_t>(ch); 1536 1537 return result + 1; 1538 } 1539 highutf16_writer1540 static value_type high(value_type result, uint32_t ch) 1541 { 1542 uint32_t msh = static_cast<uint32_t>(ch - 0x10000) >> 10; 1543 uint32_t lsh = static_cast<uint32_t>(ch - 0x10000) & 0x3ff; 1544 1545 result[0] = static_cast<uint16_t>(0xD800 + msh); 1546 result[1] = static_cast<uint16_t>(0xDC00 + lsh); 1547 1548 return result + 2; 1549 } 1550 anyutf16_writer1551 static value_type any(value_type result, uint32_t ch) 1552 { 1553 return (ch < 0x10000) ? low(result, ch) : high(result, ch); 1554 } 1555 }; 1556 1557 struct utf32_counter 1558 { 1559 typedef size_t value_type; 1560 lowutf32_counter1561 static value_type low(value_type result, uint32_t) 1562 { 1563 return result + 1; 1564 } 1565 highutf32_counter1566 static value_type high(value_type result, uint32_t) 1567 { 1568 return result + 1; 1569 } 1570 }; 1571 1572 struct utf32_writer 1573 { 1574 typedef uint32_t* value_type; 1575 lowutf32_writer1576 static value_type low(value_type result, uint32_t ch) 1577 { 1578 *result = ch; 1579 1580 return result + 1; 1581 } 1582 highutf32_writer1583 static value_type high(value_type result, uint32_t ch) 1584 { 1585 *result = ch; 1586 1587 return result + 1; 1588 } 1589 anyutf32_writer1590 static value_type any(value_type result, uint32_t ch) 1591 { 1592 *result = ch; 1593 1594 return result + 1; 1595 } 1596 }; 1597 1598 struct latin1_writer 1599 { 1600 typedef uint8_t* value_type; 1601 lowlatin1_writer1602 static value_type low(value_type result, uint32_t ch) 1603 { 1604 *result = static_cast<uint8_t>(ch > 255 ? '?' : ch); 1605 1606 return result + 1; 1607 } 1608 highlatin1_writer1609 static value_type high(value_type result, uint32_t ch) 1610 { 1611 (void)ch; 1612 1613 *result = '?'; 1614 1615 return result + 1; 1616 } 1617 }; 1618 1619 struct utf8_decoder 1620 { 1621 typedef uint8_t type; 1622 processutf8_decoder1623 template <typename Traits> static inline typename Traits::value_type process(const uint8_t* data, size_t size, typename Traits::value_type result, Traits) 1624 { 1625 const uint8_t utf8_byte_mask = 0x3f; 1626 1627 while (size) 1628 { 1629 uint8_t lead = *data; 1630 1631 // 0xxxxxxx -> U+0000..U+007F 1632 if (lead < 0x80) 1633 { 1634 result = Traits::low(result, lead); 1635 data += 1; 1636 size -= 1; 1637 1638 // process aligned single-byte (ascii) blocks 1639 if ((reinterpret_cast<uintptr_t>(data) & 3) == 0) 1640 { 1641 // round-trip through void* to silence 'cast increases required alignment of target type' warnings 1642 while (size >= 4 && (*static_cast<const uint32_t*>(static_cast<const void*>(data)) & 0x80808080) == 0) 1643 { 1644 result = Traits::low(result, data[0]); 1645 result = Traits::low(result, data[1]); 1646 result = Traits::low(result, data[2]); 1647 result = Traits::low(result, data[3]); 1648 data += 4; 1649 size -= 4; 1650 } 1651 } 1652 } 1653 // 110xxxxx -> U+0080..U+07FF 1654 else if (static_cast<unsigned int>(lead - 0xC0) < 0x20 && size >= 2 && (data[1] & 0xc0) == 0x80) 1655 { 1656 result = Traits::low(result, ((lead & ~0xC0) << 6) | (data[1] & utf8_byte_mask)); 1657 data += 2; 1658 size -= 2; 1659 } 1660 // 1110xxxx -> U+0800-U+FFFF 1661 else if (static_cast<unsigned int>(lead - 0xE0) < 0x10 && size >= 3 && (data[1] & 0xc0) == 0x80 && (data[2] & 0xc0) == 0x80) 1662 { 1663 result = Traits::low(result, ((lead & ~0xE0) << 12) | ((data[1] & utf8_byte_mask) << 6) | (data[2] & utf8_byte_mask)); 1664 data += 3; 1665 size -= 3; 1666 } 1667 // 11110xxx -> U+10000..U+10FFFF 1668 else if (static_cast<unsigned int>(lead - 0xF0) < 0x08 && size >= 4 && (data[1] & 0xc0) == 0x80 && (data[2] & 0xc0) == 0x80 && (data[3] & 0xc0) == 0x80) 1669 { 1670 result = Traits::high(result, ((lead & ~0xF0) << 18) | ((data[1] & utf8_byte_mask) << 12) | ((data[2] & utf8_byte_mask) << 6) | (data[3] & utf8_byte_mask)); 1671 data += 4; 1672 size -= 4; 1673 } 1674 // 10xxxxxx or 11111xxx -> invalid 1675 else 1676 { 1677 data += 1; 1678 size -= 1; 1679 } 1680 } 1681 1682 return result; 1683 } 1684 }; 1685 1686 template <typename opt_swap> struct utf16_decoder 1687 { 1688 typedef uint16_t type; 1689 processutf16_decoder1690 template <typename Traits> static inline typename Traits::value_type process(const uint16_t* data, size_t size, typename Traits::value_type result, Traits) 1691 { 1692 while (size) 1693 { 1694 uint16_t lead = opt_swap::value ? endian_swap(*data) : *data; 1695 1696 // U+0000..U+D7FF 1697 if (lead < 0xD800) 1698 { 1699 result = Traits::low(result, lead); 1700 data += 1; 1701 size -= 1; 1702 } 1703 // U+E000..U+FFFF 1704 else if (static_cast<unsigned int>(lead - 0xE000) < 0x2000) 1705 { 1706 result = Traits::low(result, lead); 1707 data += 1; 1708 size -= 1; 1709 } 1710 // surrogate pair lead 1711 else if (static_cast<unsigned int>(lead - 0xD800) < 0x400 && size >= 2) 1712 { 1713 uint16_t next = opt_swap::value ? endian_swap(data[1]) : data[1]; 1714 1715 if (static_cast<unsigned int>(next - 0xDC00) < 0x400) 1716 { 1717 result = Traits::high(result, 0x10000 + ((lead & 0x3ff) << 10) + (next & 0x3ff)); 1718 data += 2; 1719 size -= 2; 1720 } 1721 else 1722 { 1723 data += 1; 1724 size -= 1; 1725 } 1726 } 1727 else 1728 { 1729 data += 1; 1730 size -= 1; 1731 } 1732 } 1733 1734 return result; 1735 } 1736 }; 1737 1738 template <typename opt_swap> struct utf32_decoder 1739 { 1740 typedef uint32_t type; 1741 processutf32_decoder1742 template <typename Traits> static inline typename Traits::value_type process(const uint32_t* data, size_t size, typename Traits::value_type result, Traits) 1743 { 1744 while (size) 1745 { 1746 uint32_t lead = opt_swap::value ? endian_swap(*data) : *data; 1747 1748 // U+0000..U+FFFF 1749 if (lead < 0x10000) 1750 { 1751 result = Traits::low(result, lead); 1752 data += 1; 1753 size -= 1; 1754 } 1755 // U+10000..U+10FFFF 1756 else 1757 { 1758 result = Traits::high(result, lead); 1759 data += 1; 1760 size -= 1; 1761 } 1762 } 1763 1764 return result; 1765 } 1766 }; 1767 1768 struct latin1_decoder 1769 { 1770 typedef uint8_t type; 1771 processlatin1_decoder1772 template <typename Traits> static inline typename Traits::value_type process(const uint8_t* data, size_t size, typename Traits::value_type result, Traits) 1773 { 1774 while (size) 1775 { 1776 result = Traits::low(result, *data); 1777 data += 1; 1778 size -= 1; 1779 } 1780 1781 return result; 1782 } 1783 }; 1784 1785 template <size_t size> struct wchar_selector; 1786 1787 template <> struct wchar_selector<2> 1788 { 1789 typedef uint16_t type; 1790 typedef utf16_counter counter; 1791 typedef utf16_writer writer; 1792 typedef utf16_decoder<opt_false> decoder; 1793 }; 1794 1795 template <> struct wchar_selector<4> 1796 { 1797 typedef uint32_t type; 1798 typedef utf32_counter counter; 1799 typedef utf32_writer writer; 1800 typedef utf32_decoder<opt_false> decoder; 1801 }; 1802 1803 typedef wchar_selector<sizeof(wchar_t)>::counter wchar_counter; 1804 typedef wchar_selector<sizeof(wchar_t)>::writer wchar_writer; 1805 1806 struct wchar_decoder 1807 { 1808 typedef wchar_t type; 1809 processwchar_decoder1810 template <typename Traits> static inline typename Traits::value_type process(const wchar_t* data, size_t size, typename Traits::value_type result, Traits traits) 1811 { 1812 typedef wchar_selector<sizeof(wchar_t)>::decoder decoder; 1813 1814 return decoder::process(reinterpret_cast<const typename decoder::type*>(data), size, result, traits); 1815 } 1816 }; 1817 1818 #ifdef PUGIXML_WCHAR_MODE convert_wchar_endian_swap(wchar_t * result,const wchar_t * data,size_t length)1819 PUGI__FN void convert_wchar_endian_swap(wchar_t* result, const wchar_t* data, size_t length) 1820 { 1821 for (size_t i = 0; i < length; ++i) 1822 result[i] = static_cast<wchar_t>(endian_swap(static_cast<wchar_selector<sizeof(wchar_t)>::type>(data[i]))); 1823 } 1824 #endif 1825 PUGI__NS_END 1826 1827 PUGI__NS_BEGIN 1828 enum chartype_t 1829 { 1830 ct_parse_pcdata = 1, // \0, &, \r, < 1831 ct_parse_attr = 2, // \0, &, \r, ', " 1832 ct_parse_attr_ws = 4, // \0, &, \r, ', ", \n, tab 1833 ct_space = 8, // \r, \n, space, tab 1834 ct_parse_cdata = 16, // \0, ], >, \r 1835 ct_parse_comment = 32, // \0, -, >, \r 1836 ct_symbol = 64, // Any symbol > 127, a-z, A-Z, 0-9, _, :, -, . 1837 ct_start_symbol = 128 // Any symbol > 127, a-z, A-Z, _, : 1838 }; 1839 1840 static const unsigned char chartype_table[256] = 1841 { 1842 55, 0, 0, 0, 0, 0, 0, 0, 0, 12, 12, 0, 0, 63, 0, 0, // 0-15 1843 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 16-31 1844 8, 0, 6, 0, 0, 0, 7, 6, 0, 0, 0, 0, 0, 96, 64, 0, // 32-47 1845 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 192, 0, 1, 0, 48, 0, // 48-63 1846 0, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, // 64-79 1847 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 0, 0, 16, 0, 192, // 80-95 1848 0, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, // 96-111 1849 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 0, 0, 0, 0, 0, // 112-127 1850 1851 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, // 128+ 1852 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 1853 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 1854 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 1855 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 1856 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 1857 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 1858 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192 1859 }; 1860 1861 enum chartypex_t 1862 { 1863 ctx_special_pcdata = 1, // Any symbol >= 0 and < 32 (except \t, \r, \n), &, <, > 1864 ctx_special_attr = 2, // Any symbol >= 0 and < 32 (except \t), &, <, >, " 1865 ctx_start_symbol = 4, // Any symbol > 127, a-z, A-Z, _ 1866 ctx_digit = 8, // 0-9 1867 ctx_symbol = 16 // Any symbol > 127, a-z, A-Z, 0-9, _, -, . 1868 }; 1869 1870 static const unsigned char chartypex_table[256] = 1871 { 1872 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 2, 3, 3, 2, 3, 3, // 0-15 1873 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 16-31 1874 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 16, 16, 0, // 32-47 1875 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0, 3, 0, 3, 0, // 48-63 1876 1877 0, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, // 64-79 1878 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 0, 0, 0, 0, 20, // 80-95 1879 0, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, // 96-111 1880 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 0, 0, 0, 0, 0, // 112-127 1881 1882 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, // 128+ 1883 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 1884 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 1885 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 1886 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 1887 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 1888 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 1889 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20 1890 }; 1891 1892 #ifdef PUGIXML_WCHAR_MODE 1893 #define PUGI__IS_CHARTYPE_IMPL(c, ct, table) ((static_cast<unsigned int>(c) < 128 ? table[static_cast<unsigned int>(c)] : table[128]) & (ct)) 1894 #else 1895 #define PUGI__IS_CHARTYPE_IMPL(c, ct, table) (table[static_cast<unsigned char>(c)] & (ct)) 1896 #endif 1897 1898 #define PUGI__IS_CHARTYPE(c, ct) PUGI__IS_CHARTYPE_IMPL(c, ct, chartype_table) 1899 #define PUGI__IS_CHARTYPEX(c, ct) PUGI__IS_CHARTYPE_IMPL(c, ct, chartypex_table) 1900 is_little_endian()1901 PUGI__FN bool is_little_endian() 1902 { 1903 unsigned int ui = 1; 1904 1905 return *reinterpret_cast<unsigned char*>(&ui) == 1; 1906 } 1907 get_wchar_encoding()1908 PUGI__FN xml_encoding get_wchar_encoding() 1909 { 1910 PUGI__STATIC_ASSERT(sizeof(wchar_t) == 2 || sizeof(wchar_t) == 4); 1911 1912 if (sizeof(wchar_t) == 2) 1913 return is_little_endian() ? encoding_utf16_le : encoding_utf16_be; 1914 else 1915 return is_little_endian() ? encoding_utf32_le : encoding_utf32_be; 1916 } 1917 parse_declaration_encoding(const uint8_t * data,size_t size,const uint8_t * & out_encoding,size_t & out_length)1918 PUGI__FN bool parse_declaration_encoding(const uint8_t* data, size_t size, const uint8_t*& out_encoding, size_t& out_length) 1919 { 1920 #define PUGI__SCANCHAR(ch) { if (offset >= size || data[offset] != ch) return false; offset++; } 1921 #define PUGI__SCANCHARTYPE(ct) { while (offset < size && PUGI__IS_CHARTYPE(data[offset], ct)) offset++; } 1922 1923 // check if we have a non-empty XML declaration 1924 if (size < 6 || !((data[0] == '<') & (data[1] == '?') & (data[2] == 'x') & (data[3] == 'm') & (data[4] == 'l') && PUGI__IS_CHARTYPE(data[5], ct_space))) 1925 return false; 1926 1927 // scan XML declaration until the encoding field 1928 for (size_t i = 6; i + 1 < size; ++i) 1929 { 1930 // declaration can not contain ? in quoted values 1931 if (data[i] == '?') 1932 return false; 1933 1934 if (data[i] == 'e' && data[i + 1] == 'n') 1935 { 1936 size_t offset = i; 1937 1938 // encoding follows the version field which can't contain 'en' so this has to be the encoding if XML is well formed 1939 PUGI__SCANCHAR('e'); PUGI__SCANCHAR('n'); PUGI__SCANCHAR('c'); PUGI__SCANCHAR('o'); 1940 PUGI__SCANCHAR('d'); PUGI__SCANCHAR('i'); PUGI__SCANCHAR('n'); PUGI__SCANCHAR('g'); 1941 1942 // S? = S? 1943 PUGI__SCANCHARTYPE(ct_space); 1944 PUGI__SCANCHAR('='); 1945 PUGI__SCANCHARTYPE(ct_space); 1946 1947 // the only two valid delimiters are ' and " 1948 uint8_t delimiter = (offset < size && data[offset] == '"') ? '"' : '\''; 1949 1950 PUGI__SCANCHAR(delimiter); 1951 1952 size_t start = offset; 1953 1954 out_encoding = data + offset; 1955 1956 PUGI__SCANCHARTYPE(ct_symbol); 1957 1958 out_length = offset - start; 1959 1960 PUGI__SCANCHAR(delimiter); 1961 1962 return true; 1963 } 1964 } 1965 1966 return false; 1967 1968 #undef PUGI__SCANCHAR 1969 #undef PUGI__SCANCHARTYPE 1970 } 1971 guess_buffer_encoding(const uint8_t * data,size_t size)1972 PUGI__FN xml_encoding guess_buffer_encoding(const uint8_t* data, size_t size) 1973 { 1974 // skip encoding autodetection if input buffer is too small 1975 if (size < 4) return encoding_utf8; 1976 1977 uint8_t d0 = data[0], d1 = data[1], d2 = data[2], d3 = data[3]; 1978 1979 // look for BOM in first few bytes 1980 if (d0 == 0 && d1 == 0 && d2 == 0xfe && d3 == 0xff) return encoding_utf32_be; 1981 if (d0 == 0xff && d1 == 0xfe && d2 == 0 && d3 == 0) return encoding_utf32_le; 1982 if (d0 == 0xfe && d1 == 0xff) return encoding_utf16_be; 1983 if (d0 == 0xff && d1 == 0xfe) return encoding_utf16_le; 1984 if (d0 == 0xef && d1 == 0xbb && d2 == 0xbf) return encoding_utf8; 1985 1986 // look for <, <? or <?xm in various encodings 1987 if (d0 == 0 && d1 == 0 && d2 == 0 && d3 == 0x3c) return encoding_utf32_be; 1988 if (d0 == 0x3c && d1 == 0 && d2 == 0 && d3 == 0) return encoding_utf32_le; 1989 if (d0 == 0 && d1 == 0x3c && d2 == 0 && d3 == 0x3f) return encoding_utf16_be; 1990 if (d0 == 0x3c && d1 == 0 && d2 == 0x3f && d3 == 0) return encoding_utf16_le; 1991 1992 // look for utf16 < followed by node name (this may fail, but is better than utf8 since it's zero terminated so early) 1993 if (d0 == 0 && d1 == 0x3c) return encoding_utf16_be; 1994 if (d0 == 0x3c && d1 == 0) return encoding_utf16_le; 1995 1996 // no known BOM detected; parse declaration 1997 const uint8_t* enc = 0; 1998 size_t enc_length = 0; 1999 2000 if (d0 == 0x3c && d1 == 0x3f && d2 == 0x78 && d3 == 0x6d && parse_declaration_encoding(data, size, enc, enc_length)) 2001 { 2002 // iso-8859-1 (case-insensitive) 2003 if (enc_length == 10 2004 && (enc[0] | ' ') == 'i' && (enc[1] | ' ') == 's' && (enc[2] | ' ') == 'o' 2005 && enc[3] == '-' && enc[4] == '8' && enc[5] == '8' && enc[6] == '5' && enc[7] == '9' 2006 && enc[8] == '-' && enc[9] == '1') 2007 return encoding_latin1; 2008 2009 // latin1 (case-insensitive) 2010 if (enc_length == 6 2011 && (enc[0] | ' ') == 'l' && (enc[1] | ' ') == 'a' && (enc[2] | ' ') == 't' 2012 && (enc[3] | ' ') == 'i' && (enc[4] | ' ') == 'n' 2013 && enc[5] == '1') 2014 return encoding_latin1; 2015 } 2016 2017 return encoding_utf8; 2018 } 2019 get_buffer_encoding(xml_encoding encoding,const void * contents,size_t size)2020 PUGI__FN xml_encoding get_buffer_encoding(xml_encoding encoding, const void* contents, size_t size) 2021 { 2022 // replace wchar encoding with utf implementation 2023 if (encoding == encoding_wchar) return get_wchar_encoding(); 2024 2025 // replace utf16 encoding with utf16 with specific endianness 2026 if (encoding == encoding_utf16) return is_little_endian() ? encoding_utf16_le : encoding_utf16_be; 2027 2028 // replace utf32 encoding with utf32 with specific endianness 2029 if (encoding == encoding_utf32) return is_little_endian() ? encoding_utf32_le : encoding_utf32_be; 2030 2031 // only do autodetection if no explicit encoding is requested 2032 if (encoding != encoding_auto) return encoding; 2033 2034 // try to guess encoding (based on XML specification, Appendix F.1) 2035 const uint8_t* data = static_cast<const uint8_t*>(contents); 2036 2037 return guess_buffer_encoding(data, size); 2038 } 2039 get_mutable_buffer(char_t * & out_buffer,size_t & out_length,const void * contents,size_t size,bool is_mutable)2040 PUGI__FN bool get_mutable_buffer(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable) 2041 { 2042 size_t length = size / sizeof(char_t); 2043 2044 if (is_mutable) 2045 { 2046 out_buffer = static_cast<char_t*>(const_cast<void*>(contents)); 2047 out_length = length; 2048 } 2049 else 2050 { 2051 char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t))); 2052 if (!buffer) return false; 2053 2054 if (contents) 2055 memcpy(buffer, contents, length * sizeof(char_t)); 2056 else 2057 assert(length == 0); 2058 2059 buffer[length] = 0; 2060 2061 out_buffer = buffer; 2062 out_length = length + 1; 2063 } 2064 2065 return true; 2066 } 2067 2068 #ifdef PUGIXML_WCHAR_MODE need_endian_swap_utf(xml_encoding le,xml_encoding re)2069 PUGI__FN bool need_endian_swap_utf(xml_encoding le, xml_encoding re) 2070 { 2071 return (le == encoding_utf16_be && re == encoding_utf16_le) || (le == encoding_utf16_le && re == encoding_utf16_be) || 2072 (le == encoding_utf32_be && re == encoding_utf32_le) || (le == encoding_utf32_le && re == encoding_utf32_be); 2073 } 2074 convert_buffer_endian_swap(char_t * & out_buffer,size_t & out_length,const void * contents,size_t size,bool is_mutable)2075 PUGI__FN bool convert_buffer_endian_swap(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable) 2076 { 2077 const char_t* data = static_cast<const char_t*>(contents); 2078 size_t length = size / sizeof(char_t); 2079 2080 if (is_mutable) 2081 { 2082 char_t* buffer = const_cast<char_t*>(data); 2083 2084 convert_wchar_endian_swap(buffer, data, length); 2085 2086 out_buffer = buffer; 2087 out_length = length; 2088 } 2089 else 2090 { 2091 char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t))); 2092 if (!buffer) return false; 2093 2094 convert_wchar_endian_swap(buffer, data, length); 2095 buffer[length] = 0; 2096 2097 out_buffer = buffer; 2098 out_length = length + 1; 2099 } 2100 2101 return true; 2102 } 2103 convert_buffer_generic(char_t * & out_buffer,size_t & out_length,const void * contents,size_t size,D)2104 template <typename D> PUGI__FN bool convert_buffer_generic(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, D) 2105 { 2106 const typename D::type* data = static_cast<const typename D::type*>(contents); 2107 size_t data_length = size / sizeof(typename D::type); 2108 2109 // first pass: get length in wchar_t units 2110 size_t length = D::process(data, data_length, 0, wchar_counter()); 2111 2112 // allocate buffer of suitable length 2113 char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t))); 2114 if (!buffer) return false; 2115 2116 // second pass: convert utf16 input to wchar_t 2117 wchar_writer::value_type obegin = reinterpret_cast<wchar_writer::value_type>(buffer); 2118 wchar_writer::value_type oend = D::process(data, data_length, obegin, wchar_writer()); 2119 2120 assert(oend == obegin + length); 2121 *oend = 0; 2122 2123 out_buffer = buffer; 2124 out_length = length + 1; 2125 2126 return true; 2127 } 2128 convert_buffer(char_t * & out_buffer,size_t & out_length,xml_encoding encoding,const void * contents,size_t size,bool is_mutable)2129 PUGI__FN bool convert_buffer(char_t*& out_buffer, size_t& out_length, xml_encoding encoding, const void* contents, size_t size, bool is_mutable) 2130 { 2131 // get native encoding 2132 xml_encoding wchar_encoding = get_wchar_encoding(); 2133 2134 // fast path: no conversion required 2135 if (encoding == wchar_encoding) 2136 return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable); 2137 2138 // only endian-swapping is required 2139 if (need_endian_swap_utf(encoding, wchar_encoding)) 2140 return convert_buffer_endian_swap(out_buffer, out_length, contents, size, is_mutable); 2141 2142 // source encoding is utf8 2143 if (encoding == encoding_utf8) 2144 return convert_buffer_generic(out_buffer, out_length, contents, size, utf8_decoder()); 2145 2146 // source encoding is utf16 2147 if (encoding == encoding_utf16_be || encoding == encoding_utf16_le) 2148 { 2149 xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be; 2150 2151 return (native_encoding == encoding) ? 2152 convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder<opt_false>()) : 2153 convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder<opt_true>()); 2154 } 2155 2156 // source encoding is utf32 2157 if (encoding == encoding_utf32_be || encoding == encoding_utf32_le) 2158 { 2159 xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be; 2160 2161 return (native_encoding == encoding) ? 2162 convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder<opt_false>()) : 2163 convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder<opt_true>()); 2164 } 2165 2166 // source encoding is latin1 2167 if (encoding == encoding_latin1) 2168 return convert_buffer_generic(out_buffer, out_length, contents, size, latin1_decoder()); 2169 2170 assert(false && "Invalid encoding"); // unreachable 2171 return false; 2172 } 2173 #else convert_buffer_generic(char_t * & out_buffer,size_t & out_length,const void * contents,size_t size,D)2174 template <typename D> PUGI__FN bool convert_buffer_generic(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, D) 2175 { 2176 const typename D::type* data = static_cast<const typename D::type*>(contents); 2177 size_t data_length = size / sizeof(typename D::type); 2178 2179 // first pass: get length in utf8 units 2180 size_t length = D::process(data, data_length, 0, utf8_counter()); 2181 2182 // allocate buffer of suitable length 2183 char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t))); 2184 if (!buffer) return false; 2185 2186 // second pass: convert utf16 input to utf8 2187 uint8_t* obegin = reinterpret_cast<uint8_t*>(buffer); 2188 uint8_t* oend = D::process(data, data_length, obegin, utf8_writer()); 2189 2190 assert(oend == obegin + length); 2191 *oend = 0; 2192 2193 out_buffer = buffer; 2194 out_length = length + 1; 2195 2196 return true; 2197 } 2198 get_latin1_7bit_prefix_length(const uint8_t * data,size_t size)2199 PUGI__FN size_t get_latin1_7bit_prefix_length(const uint8_t* data, size_t size) 2200 { 2201 for (size_t i = 0; i < size; ++i) 2202 if (data[i] > 127) 2203 return i; 2204 2205 return size; 2206 } 2207 convert_buffer_latin1(char_t * & out_buffer,size_t & out_length,const void * contents,size_t size,bool is_mutable)2208 PUGI__FN bool convert_buffer_latin1(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable) 2209 { 2210 const uint8_t* data = static_cast<const uint8_t*>(contents); 2211 size_t data_length = size; 2212 2213 // get size of prefix that does not need utf8 conversion 2214 size_t prefix_length = get_latin1_7bit_prefix_length(data, data_length); 2215 assert(prefix_length <= data_length); 2216 2217 const uint8_t* postfix = data + prefix_length; 2218 size_t postfix_length = data_length - prefix_length; 2219 2220 // if no conversion is needed, just return the original buffer 2221 if (postfix_length == 0) return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable); 2222 2223 // first pass: get length in utf8 units 2224 size_t length = prefix_length + latin1_decoder::process(postfix, postfix_length, 0, utf8_counter()); 2225 2226 // allocate buffer of suitable length 2227 char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t))); 2228 if (!buffer) return false; 2229 2230 // second pass: convert latin1 input to utf8 2231 memcpy(buffer, data, prefix_length); 2232 2233 uint8_t* obegin = reinterpret_cast<uint8_t*>(buffer); 2234 uint8_t* oend = latin1_decoder::process(postfix, postfix_length, obegin + prefix_length, utf8_writer()); 2235 2236 assert(oend == obegin + length); 2237 *oend = 0; 2238 2239 out_buffer = buffer; 2240 out_length = length + 1; 2241 2242 return true; 2243 } 2244 convert_buffer(char_t * & out_buffer,size_t & out_length,xml_encoding encoding,const void * contents,size_t size,bool is_mutable)2245 PUGI__FN bool convert_buffer(char_t*& out_buffer, size_t& out_length, xml_encoding encoding, const void* contents, size_t size, bool is_mutable) 2246 { 2247 // fast path: no conversion required 2248 if (encoding == encoding_utf8) 2249 return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable); 2250 2251 // source encoding is utf16 2252 if (encoding == encoding_utf16_be || encoding == encoding_utf16_le) 2253 { 2254 xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be; 2255 2256 return (native_encoding == encoding) ? 2257 convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder<opt_false>()) : 2258 convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder<opt_true>()); 2259 } 2260 2261 // source encoding is utf32 2262 if (encoding == encoding_utf32_be || encoding == encoding_utf32_le) 2263 { 2264 xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be; 2265 2266 return (native_encoding == encoding) ? 2267 convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder<opt_false>()) : 2268 convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder<opt_true>()); 2269 } 2270 2271 // source encoding is latin1 2272 if (encoding == encoding_latin1) 2273 return convert_buffer_latin1(out_buffer, out_length, contents, size, is_mutable); 2274 2275 assert(false && "Invalid encoding"); // unreachable 2276 return false; 2277 } 2278 #endif 2279 as_utf8_begin(const wchar_t * str,size_t length)2280 PUGI__FN size_t as_utf8_begin(const wchar_t* str, size_t length) 2281 { 2282 // get length in utf8 characters 2283 return wchar_decoder::process(str, length, 0, utf8_counter()); 2284 } 2285 as_utf8_end(char * buffer,size_t size,const wchar_t * str,size_t length)2286 PUGI__FN void as_utf8_end(char* buffer, size_t size, const wchar_t* str, size_t length) 2287 { 2288 // convert to utf8 2289 uint8_t* begin = reinterpret_cast<uint8_t*>(buffer); 2290 uint8_t* end = wchar_decoder::process(str, length, begin, utf8_writer()); 2291 2292 assert(begin + size == end); 2293 (void)!end; 2294 (void)!size; 2295 } 2296 2297 #ifndef PUGIXML_NO_STL as_utf8_impl(const wchar_t * str,size_t length)2298 PUGI__FN std::string as_utf8_impl(const wchar_t* str, size_t length) 2299 { 2300 // first pass: get length in utf8 characters 2301 size_t size = as_utf8_begin(str, length); 2302 2303 // allocate resulting string 2304 std::string result; 2305 result.resize(size); 2306 2307 // second pass: convert to utf8 2308 if (size > 0) as_utf8_end(&result[0], size, str, length); 2309 2310 return result; 2311 } 2312 as_wide_impl(const char * str,size_t size)2313 PUGI__FN std::basic_string<wchar_t> as_wide_impl(const char* str, size_t size) 2314 { 2315 const uint8_t* data = reinterpret_cast<const uint8_t*>(str); 2316 2317 // first pass: get length in wchar_t units 2318 size_t length = utf8_decoder::process(data, size, 0, wchar_counter()); 2319 2320 // allocate resulting string 2321 std::basic_string<wchar_t> result; 2322 result.resize(length); 2323 2324 // second pass: convert to wchar_t 2325 if (length > 0) 2326 { 2327 wchar_writer::value_type begin = reinterpret_cast<wchar_writer::value_type>(&result[0]); 2328 wchar_writer::value_type end = utf8_decoder::process(data, size, begin, wchar_writer()); 2329 2330 assert(begin + length == end); 2331 (void)!end; 2332 } 2333 2334 return result; 2335 } 2336 #endif 2337 2338 template <typename Header> strcpy_insitu_allow(size_t length,const Header & header,uintptr_t header_mask,char_t * target)2339 inline bool strcpy_insitu_allow(size_t length, const Header& header, uintptr_t header_mask, char_t* target) 2340 { 2341 // never reuse shared memory 2342 if (header & xml_memory_page_contents_shared_mask) return false; 2343 2344 size_t target_length = strlength(target); 2345 2346 // always reuse document buffer memory if possible 2347 if ((header & header_mask) == 0) return target_length >= length; 2348 2349 // reuse heap memory if waste is not too great 2350 const size_t reuse_threshold = 32; 2351 2352 return target_length >= length && (target_length < reuse_threshold || target_length - length < target_length / 2); 2353 } 2354 2355 template <typename String, typename Header> strcpy_insitu(String & dest,Header & header,uintptr_t header_mask,const char_t * source,size_t source_length)2356 PUGI__FN bool strcpy_insitu(String& dest, Header& header, uintptr_t header_mask, const char_t* source, size_t source_length) 2357 { 2358 if (source_length == 0) 2359 { 2360 // empty string and null pointer are equivalent, so just deallocate old memory 2361 xml_allocator* alloc = PUGI__GETPAGE_IMPL(header)->allocator; 2362 2363 if (header & header_mask) alloc->deallocate_string(dest); 2364 2365 // mark the string as not allocated 2366 dest = 0; 2367 header &= ~header_mask; 2368 2369 return true; 2370 } 2371 else if (dest && strcpy_insitu_allow(source_length, header, header_mask, dest)) 2372 { 2373 // we can reuse old buffer, so just copy the new data (including zero terminator) 2374 memcpy(dest, source, source_length * sizeof(char_t)); 2375 dest[source_length] = 0; 2376 2377 return true; 2378 } 2379 else 2380 { 2381 xml_allocator* alloc = PUGI__GETPAGE_IMPL(header)->allocator; 2382 2383 if (!alloc->reserve()) return false; 2384 2385 // allocate new buffer 2386 char_t* buf = alloc->allocate_string(source_length + 1); 2387 if (!buf) return false; 2388 2389 // copy the string (including zero terminator) 2390 memcpy(buf, source, source_length * sizeof(char_t)); 2391 buf[source_length] = 0; 2392 2393 // deallocate old buffer (*after* the above to protect against overlapping memory and/or allocation failures) 2394 if (header & header_mask) alloc->deallocate_string(dest); 2395 2396 // the string is now allocated, so set the flag 2397 dest = buf; 2398 header |= header_mask; 2399 2400 return true; 2401 } 2402 } 2403 2404 struct gap 2405 { 2406 char_t* end; 2407 size_t size; 2408 gapgap2409 gap(): end(0), size(0) 2410 { 2411 } 2412 2413 // Push new gap, move s count bytes further (skipping the gap). 2414 // Collapse previous gap. pushgap2415 void push(char_t*& s, size_t count) 2416 { 2417 if (end) // there was a gap already; collapse it 2418 { 2419 // Move [old_gap_end, new_gap_start) to [old_gap_start, ...) 2420 assert(s >= end); 2421 memmove(end - size, end, reinterpret_cast<char*>(s) - reinterpret_cast<char*>(end)); 2422 } 2423 2424 s += count; // end of current gap 2425 2426 // "merge" two gaps 2427 end = s; 2428 size += count; 2429 } 2430 2431 // Collapse all gaps, return past-the-end pointer flushgap2432 char_t* flush(char_t* s) 2433 { 2434 if (end) 2435 { 2436 // Move [old_gap_end, current_pos) to [old_gap_start, ...) 2437 assert(s >= end); 2438 memmove(end - size, end, reinterpret_cast<char*>(s) - reinterpret_cast<char*>(end)); 2439 2440 return s - size; 2441 } 2442 else return s; 2443 } 2444 }; 2445 strconv_escape(char_t * s,gap & g)2446 PUGI__FN char_t* strconv_escape(char_t* s, gap& g) 2447 { 2448 char_t* stre = s + 1; 2449 2450 switch (*stre) 2451 { 2452 case '#': // &#... 2453 { 2454 unsigned int ucsc = 0; 2455 2456 if (stre[1] == 'x') // &#x... (hex code) 2457 { 2458 stre += 2; 2459 2460 char_t ch = *stre; 2461 2462 if (ch == ';') return stre; 2463 2464 for (;;) 2465 { 2466 if (static_cast<unsigned int>(ch - '0') <= 9) 2467 ucsc = 16 * ucsc + (ch - '0'); 2468 else if (static_cast<unsigned int>((ch | ' ') - 'a') <= 5) 2469 ucsc = 16 * ucsc + ((ch | ' ') - 'a' + 10); 2470 else if (ch == ';') 2471 break; 2472 else // cancel 2473 return stre; 2474 2475 ch = *++stre; 2476 } 2477 2478 ++stre; 2479 } 2480 else // &#... (dec code) 2481 { 2482 char_t ch = *++stre; 2483 2484 if (ch == ';') return stre; 2485 2486 for (;;) 2487 { 2488 if (static_cast<unsigned int>(ch - '0') <= 9) 2489 ucsc = 10 * ucsc + (ch - '0'); 2490 else if (ch == ';') 2491 break; 2492 else // cancel 2493 return stre; 2494 2495 ch = *++stre; 2496 } 2497 2498 ++stre; 2499 } 2500 2501 #ifdef PUGIXML_WCHAR_MODE 2502 s = reinterpret_cast<char_t*>(wchar_writer::any(reinterpret_cast<wchar_writer::value_type>(s), ucsc)); 2503 #else 2504 s = reinterpret_cast<char_t*>(utf8_writer::any(reinterpret_cast<uint8_t*>(s), ucsc)); 2505 #endif 2506 2507 g.push(s, stre - s); 2508 return stre; 2509 } 2510 2511 case 'a': // &a 2512 { 2513 ++stre; 2514 2515 if (*stre == 'm') // &am 2516 { 2517 if (*++stre == 'p' && *++stre == ';') // & 2518 { 2519 *s++ = '&'; 2520 ++stre; 2521 2522 g.push(s, stre - s); 2523 return stre; 2524 } 2525 } 2526 else if (*stre == 'p') // &ap 2527 { 2528 if (*++stre == 'o' && *++stre == 's' && *++stre == ';') // ' 2529 { 2530 *s++ = '\''; 2531 ++stre; 2532 2533 g.push(s, stre - s); 2534 return stre; 2535 } 2536 } 2537 break; 2538 } 2539 2540 case 'g': // &g 2541 { 2542 if (*++stre == 't' && *++stre == ';') // > 2543 { 2544 *s++ = '>'; 2545 ++stre; 2546 2547 g.push(s, stre - s); 2548 return stre; 2549 } 2550 break; 2551 } 2552 2553 case 'l': // &l 2554 { 2555 if (*++stre == 't' && *++stre == ';') // < 2556 { 2557 *s++ = '<'; 2558 ++stre; 2559 2560 g.push(s, stre - s); 2561 return stre; 2562 } 2563 break; 2564 } 2565 2566 case 'q': // &q 2567 { 2568 if (*++stre == 'u' && *++stre == 'o' && *++stre == 't' && *++stre == ';') // " 2569 { 2570 *s++ = '"'; 2571 ++stre; 2572 2573 g.push(s, stre - s); 2574 return stre; 2575 } 2576 break; 2577 } 2578 2579 default: 2580 break; 2581 } 2582 2583 return stre; 2584 } 2585 2586 // Parser utilities 2587 #define PUGI__ENDSWITH(c, e) ((c) == (e) || ((c) == 0 && endch == (e))) 2588 #define PUGI__SKIPWS() { while (PUGI__IS_CHARTYPE(*s, ct_space)) ++s; } 2589 #define PUGI__OPTSET(OPT) ( optmsk & (OPT) ) 2590 #define PUGI__PUSHNODE(TYPE) { cursor = append_new_node(cursor, *alloc, TYPE); if (!cursor) PUGI__THROW_ERROR(status_out_of_memory, s); } 2591 #define PUGI__POPNODE() { cursor = cursor->parent; } 2592 #define PUGI__SCANFOR(X) { while (*s != 0 && !(X)) ++s; } 2593 #define PUGI__SCANWHILE(X) { while (X) ++s; } 2594 #define PUGI__SCANWHILE_UNROLL(X) { for (;;) { char_t ss = s[0]; if (PUGI__UNLIKELY(!(X))) { break; } ss = s[1]; if (PUGI__UNLIKELY(!(X))) { s += 1; break; } ss = s[2]; if (PUGI__UNLIKELY(!(X))) { s += 2; break; } ss = s[3]; if (PUGI__UNLIKELY(!(X))) { s += 3; break; } s += 4; } } 2595 #define PUGI__ENDSEG() { ch = *s; *s = 0; ++s; } 2596 #define PUGI__THROW_ERROR(err, m) return error_offset = m, error_status = err, static_cast<char_t*>(0) 2597 #define PUGI__CHECK_ERROR(err, m) { if (*s == 0) PUGI__THROW_ERROR(err, m); } 2598 strconv_comment(char_t * s,char_t endch)2599 PUGI__FN char_t* strconv_comment(char_t* s, char_t endch) 2600 { 2601 gap g; 2602 2603 while (true) 2604 { 2605 PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_comment)); 2606 2607 if (*s == '\r') // Either a single 0x0d or 0x0d 0x0a pair 2608 { 2609 *s++ = '\n'; // replace first one with 0x0a 2610 2611 if (*s == '\n') g.push(s, 1); 2612 } 2613 else if (s[0] == '-' && s[1] == '-' && PUGI__ENDSWITH(s[2], '>')) // comment ends here 2614 { 2615 *g.flush(s) = 0; 2616 2617 return s + (s[2] == '>' ? 3 : 2); 2618 } 2619 else if (*s == 0) 2620 { 2621 return 0; 2622 } 2623 else ++s; 2624 } 2625 } 2626 strconv_cdata(char_t * s,char_t endch)2627 PUGI__FN char_t* strconv_cdata(char_t* s, char_t endch) 2628 { 2629 gap g; 2630 2631 while (true) 2632 { 2633 PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_cdata)); 2634 2635 if (*s == '\r') // Either a single 0x0d or 0x0d 0x0a pair 2636 { 2637 *s++ = '\n'; // replace first one with 0x0a 2638 2639 if (*s == '\n') g.push(s, 1); 2640 } 2641 else if (s[0] == ']' && s[1] == ']' && PUGI__ENDSWITH(s[2], '>')) // CDATA ends here 2642 { 2643 *g.flush(s) = 0; 2644 2645 return s + 1; 2646 } 2647 else if (*s == 0) 2648 { 2649 return 0; 2650 } 2651 else ++s; 2652 } 2653 } 2654 2655 typedef char_t* (*strconv_pcdata_t)(char_t*); 2656 2657 template <typename opt_trim, typename opt_eol, typename opt_escape> struct strconv_pcdata_impl 2658 { parsestrconv_pcdata_impl2659 static char_t* parse(char_t* s) 2660 { 2661 gap g; 2662 2663 char_t* begin = s; 2664 2665 while (true) 2666 { 2667 PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_pcdata)); 2668 2669 if (*s == '<') // PCDATA ends here 2670 { 2671 char_t* end = g.flush(s); 2672 2673 if (opt_trim::value) 2674 while (end > begin && PUGI__IS_CHARTYPE(end[-1], ct_space)) 2675 --end; 2676 2677 *end = 0; 2678 2679 return s + 1; 2680 } 2681 else if (opt_eol::value && *s == '\r') // Either a single 0x0d or 0x0d 0x0a pair 2682 { 2683 *s++ = '\n'; // replace first one with 0x0a 2684 2685 if (*s == '\n') g.push(s, 1); 2686 } 2687 else if (opt_escape::value && *s == '&') 2688 { 2689 s = strconv_escape(s, g); 2690 } 2691 else if (*s == 0) 2692 { 2693 char_t* end = g.flush(s); 2694 2695 if (opt_trim::value) 2696 while (end > begin && PUGI__IS_CHARTYPE(end[-1], ct_space)) 2697 --end; 2698 2699 *end = 0; 2700 2701 return s; 2702 } 2703 else ++s; 2704 } 2705 } 2706 }; 2707 get_strconv_pcdata(unsigned int optmask)2708 PUGI__FN strconv_pcdata_t get_strconv_pcdata(unsigned int optmask) 2709 { 2710 PUGI__STATIC_ASSERT(parse_escapes == 0x10 && parse_eol == 0x20 && parse_trim_pcdata == 0x0800); 2711 2712 switch (((optmask >> 4) & 3) | ((optmask >> 9) & 4)) // get bitmask for flags (eol escapes trim) 2713 { 2714 case 0: return strconv_pcdata_impl<opt_false, opt_false, opt_false>::parse; 2715 case 1: return strconv_pcdata_impl<opt_false, opt_false, opt_true>::parse; 2716 case 2: return strconv_pcdata_impl<opt_false, opt_true, opt_false>::parse; 2717 case 3: return strconv_pcdata_impl<opt_false, opt_true, opt_true>::parse; 2718 case 4: return strconv_pcdata_impl<opt_true, opt_false, opt_false>::parse; 2719 case 5: return strconv_pcdata_impl<opt_true, opt_false, opt_true>::parse; 2720 case 6: return strconv_pcdata_impl<opt_true, opt_true, opt_false>::parse; 2721 case 7: return strconv_pcdata_impl<opt_true, opt_true, opt_true>::parse; 2722 default: assert(false); return 0; // unreachable 2723 } 2724 } 2725 2726 typedef char_t* (*strconv_attribute_t)(char_t*, char_t); 2727 2728 template <typename opt_escape> struct strconv_attribute_impl 2729 { parse_wnormstrconv_attribute_impl2730 static char_t* parse_wnorm(char_t* s, char_t end_quote) 2731 { 2732 gap g; 2733 2734 // trim leading whitespaces 2735 if (PUGI__IS_CHARTYPE(*s, ct_space)) 2736 { 2737 char_t* str = s; 2738 2739 do ++str; 2740 while (PUGI__IS_CHARTYPE(*str, ct_space)); 2741 2742 g.push(s, str - s); 2743 } 2744 2745 while (true) 2746 { 2747 PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr_ws | ct_space)); 2748 2749 if (*s == end_quote) 2750 { 2751 char_t* str = g.flush(s); 2752 2753 do *str-- = 0; 2754 while (PUGI__IS_CHARTYPE(*str, ct_space)); 2755 2756 return s + 1; 2757 } 2758 else if (PUGI__IS_CHARTYPE(*s, ct_space)) 2759 { 2760 *s++ = ' '; 2761 2762 if (PUGI__IS_CHARTYPE(*s, ct_space)) 2763 { 2764 char_t* str = s + 1; 2765 while (PUGI__IS_CHARTYPE(*str, ct_space)) ++str; 2766 2767 g.push(s, str - s); 2768 } 2769 } 2770 else if (opt_escape::value && *s == '&') 2771 { 2772 s = strconv_escape(s, g); 2773 } 2774 else if (!*s) 2775 { 2776 return 0; 2777 } 2778 else ++s; 2779 } 2780 } 2781 parse_wconvstrconv_attribute_impl2782 static char_t* parse_wconv(char_t* s, char_t end_quote) 2783 { 2784 gap g; 2785 2786 while (true) 2787 { 2788 PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr_ws)); 2789 2790 if (*s == end_quote) 2791 { 2792 *g.flush(s) = 0; 2793 2794 return s + 1; 2795 } 2796 else if (PUGI__IS_CHARTYPE(*s, ct_space)) 2797 { 2798 if (*s == '\r') 2799 { 2800 *s++ = ' '; 2801 2802 if (*s == '\n') g.push(s, 1); 2803 } 2804 else *s++ = ' '; 2805 } 2806 else if (opt_escape::value && *s == '&') 2807 { 2808 s = strconv_escape(s, g); 2809 } 2810 else if (!*s) 2811 { 2812 return 0; 2813 } 2814 else ++s; 2815 } 2816 } 2817 parse_eolstrconv_attribute_impl2818 static char_t* parse_eol(char_t* s, char_t end_quote) 2819 { 2820 gap g; 2821 2822 while (true) 2823 { 2824 PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr)); 2825 2826 if (*s == end_quote) 2827 { 2828 *g.flush(s) = 0; 2829 2830 return s + 1; 2831 } 2832 else if (*s == '\r') 2833 { 2834 *s++ = '\n'; 2835 2836 if (*s == '\n') g.push(s, 1); 2837 } 2838 else if (opt_escape::value && *s == '&') 2839 { 2840 s = strconv_escape(s, g); 2841 } 2842 else if (!*s) 2843 { 2844 return 0; 2845 } 2846 else ++s; 2847 } 2848 } 2849 parse_simplestrconv_attribute_impl2850 static char_t* parse_simple(char_t* s, char_t end_quote) 2851 { 2852 gap g; 2853 2854 while (true) 2855 { 2856 PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr)); 2857 2858 if (*s == end_quote) 2859 { 2860 *g.flush(s) = 0; 2861 2862 return s + 1; 2863 } 2864 else if (opt_escape::value && *s == '&') 2865 { 2866 s = strconv_escape(s, g); 2867 } 2868 else if (!*s) 2869 { 2870 return 0; 2871 } 2872 else ++s; 2873 } 2874 } 2875 }; 2876 get_strconv_attribute(unsigned int optmask)2877 PUGI__FN strconv_attribute_t get_strconv_attribute(unsigned int optmask) 2878 { 2879 PUGI__STATIC_ASSERT(parse_escapes == 0x10 && parse_eol == 0x20 && parse_wconv_attribute == 0x40 && parse_wnorm_attribute == 0x80); 2880 2881 switch ((optmask >> 4) & 15) // get bitmask for flags (wconv wnorm eol escapes) 2882 { 2883 case 0: return strconv_attribute_impl<opt_false>::parse_simple; 2884 case 1: return strconv_attribute_impl<opt_true>::parse_simple; 2885 case 2: return strconv_attribute_impl<opt_false>::parse_eol; 2886 case 3: return strconv_attribute_impl<opt_true>::parse_eol; 2887 case 4: return strconv_attribute_impl<opt_false>::parse_wconv; 2888 case 5: return strconv_attribute_impl<opt_true>::parse_wconv; 2889 case 6: return strconv_attribute_impl<opt_false>::parse_wconv; 2890 case 7: return strconv_attribute_impl<opt_true>::parse_wconv; 2891 case 8: return strconv_attribute_impl<opt_false>::parse_wnorm; 2892 case 9: return strconv_attribute_impl<opt_true>::parse_wnorm; 2893 case 10: return strconv_attribute_impl<opt_false>::parse_wnorm; 2894 case 11: return strconv_attribute_impl<opt_true>::parse_wnorm; 2895 case 12: return strconv_attribute_impl<opt_false>::parse_wnorm; 2896 case 13: return strconv_attribute_impl<opt_true>::parse_wnorm; 2897 case 14: return strconv_attribute_impl<opt_false>::parse_wnorm; 2898 case 15: return strconv_attribute_impl<opt_true>::parse_wnorm; 2899 default: assert(false); return 0; // unreachable 2900 } 2901 } 2902 make_parse_result(xml_parse_status status,ptrdiff_t offset=0)2903 inline xml_parse_result make_parse_result(xml_parse_status status, ptrdiff_t offset = 0) 2904 { 2905 xml_parse_result result; 2906 result.status = status; 2907 result.offset = offset; 2908 2909 return result; 2910 } 2911 2912 struct xml_parser 2913 { 2914 xml_allocator* alloc; 2915 char_t* error_offset; 2916 xml_parse_status error_status; 2917 xml_parserxml_parser2918 xml_parser(xml_allocator* alloc_): alloc(alloc_), error_offset(0), error_status(status_ok) 2919 { 2920 } 2921 2922 // DOCTYPE consists of nested sections of the following possible types: 2923 // <!-- ... -->, <? ... ?>, "...", '...' 2924 // <![...]]> 2925 // <!...> 2926 // First group can not contain nested groups 2927 // Second group can contain nested groups of the same type 2928 // Third group can contain all other groups parse_doctype_primitivexml_parser2929 char_t* parse_doctype_primitive(char_t* s) 2930 { 2931 if (*s == '"' || *s == '\'') 2932 { 2933 // quoted string 2934 char_t ch = *s++; 2935 PUGI__SCANFOR(*s == ch); 2936 if (!*s) PUGI__THROW_ERROR(status_bad_doctype, s); 2937 2938 s++; 2939 } 2940 else if (s[0] == '<' && s[1] == '?') 2941 { 2942 // <? ... ?> 2943 s += 2; 2944 PUGI__SCANFOR(s[0] == '?' && s[1] == '>'); // no need for ENDSWITH because ?> can't terminate proper doctype 2945 if (!*s) PUGI__THROW_ERROR(status_bad_doctype, s); 2946 2947 s += 2; 2948 } 2949 else if (s[0] == '<' && s[1] == '!' && s[2] == '-' && s[3] == '-') 2950 { 2951 s += 4; 2952 PUGI__SCANFOR(s[0] == '-' && s[1] == '-' && s[2] == '>'); // no need for ENDSWITH because --> can't terminate proper doctype 2953 if (!*s) PUGI__THROW_ERROR(status_bad_doctype, s); 2954 2955 s += 3; 2956 } 2957 else PUGI__THROW_ERROR(status_bad_doctype, s); 2958 2959 return s; 2960 } 2961 parse_doctype_ignorexml_parser2962 char_t* parse_doctype_ignore(char_t* s) 2963 { 2964 size_t depth = 0; 2965 2966 assert(s[0] == '<' && s[1] == '!' && s[2] == '['); 2967 s += 3; 2968 2969 while (*s) 2970 { 2971 if (s[0] == '<' && s[1] == '!' && s[2] == '[') 2972 { 2973 // nested ignore section 2974 s += 3; 2975 depth++; 2976 } 2977 else if (s[0] == ']' && s[1] == ']' && s[2] == '>') 2978 { 2979 // ignore section end 2980 s += 3; 2981 2982 if (depth == 0) 2983 return s; 2984 2985 depth--; 2986 } 2987 else s++; 2988 } 2989 2990 PUGI__THROW_ERROR(status_bad_doctype, s); 2991 } 2992 parse_doctype_groupxml_parser2993 char_t* parse_doctype_group(char_t* s, char_t endch) 2994 { 2995 size_t depth = 0; 2996 2997 assert((s[0] == '<' || s[0] == 0) && s[1] == '!'); 2998 s += 2; 2999 3000 while (*s) 3001 { 3002 if (s[0] == '<' && s[1] == '!' && s[2] != '-') 3003 { 3004 if (s[2] == '[') 3005 { 3006 // ignore 3007 s = parse_doctype_ignore(s); 3008 if (!s) return s; 3009 } 3010 else 3011 { 3012 // some control group 3013 s += 2; 3014 depth++; 3015 } 3016 } 3017 else if (s[0] == '<' || s[0] == '"' || s[0] == '\'') 3018 { 3019 // unknown tag (forbidden), or some primitive group 3020 s = parse_doctype_primitive(s); 3021 if (!s) return s; 3022 } 3023 else if (*s == '>') 3024 { 3025 if (depth == 0) 3026 return s; 3027 3028 depth--; 3029 s++; 3030 } 3031 else s++; 3032 } 3033 3034 if (depth != 0 || endch != '>') PUGI__THROW_ERROR(status_bad_doctype, s); 3035 3036 return s; 3037 } 3038 parse_exclamationxml_parser3039 char_t* parse_exclamation(char_t* s, xml_node_struct* cursor, unsigned int optmsk, char_t endch) 3040 { 3041 // parse node contents, starting with exclamation mark 3042 ++s; 3043 3044 if (*s == '-') // '<!-...' 3045 { 3046 ++s; 3047 3048 if (*s == '-') // '<!--...' 3049 { 3050 ++s; 3051 3052 if (PUGI__OPTSET(parse_comments)) 3053 { 3054 PUGI__PUSHNODE(node_comment); // Append a new node on the tree. 3055 cursor->value = s; // Save the offset. 3056 } 3057 3058 if (PUGI__OPTSET(parse_eol) && PUGI__OPTSET(parse_comments)) 3059 { 3060 s = strconv_comment(s, endch); 3061 3062 if (!s) PUGI__THROW_ERROR(status_bad_comment, cursor->value); 3063 } 3064 else 3065 { 3066 // Scan for terminating '-->'. 3067 PUGI__SCANFOR(s[0] == '-' && s[1] == '-' && PUGI__ENDSWITH(s[2], '>')); 3068 PUGI__CHECK_ERROR(status_bad_comment, s); 3069 3070 if (PUGI__OPTSET(parse_comments)) 3071 *s = 0; // Zero-terminate this segment at the first terminating '-'. 3072 3073 s += (s[2] == '>' ? 3 : 2); // Step over the '\0->'. 3074 } 3075 } 3076 else PUGI__THROW_ERROR(status_bad_comment, s); 3077 } 3078 else if (*s == '[') 3079 { 3080 // '<![CDATA[...' 3081 if (*++s=='C' && *++s=='D' && *++s=='A' && *++s=='T' && *++s=='A' && *++s == '[') 3082 { 3083 ++s; 3084 3085 if (PUGI__OPTSET(parse_cdata)) 3086 { 3087 PUGI__PUSHNODE(node_cdata); // Append a new node on the tree. 3088 cursor->value = s; // Save the offset. 3089 3090 if (PUGI__OPTSET(parse_eol)) 3091 { 3092 s = strconv_cdata(s, endch); 3093 3094 if (!s) PUGI__THROW_ERROR(status_bad_cdata, cursor->value); 3095 } 3096 else 3097 { 3098 // Scan for terminating ']]>'. 3099 PUGI__SCANFOR(s[0] == ']' && s[1] == ']' && PUGI__ENDSWITH(s[2], '>')); 3100 PUGI__CHECK_ERROR(status_bad_cdata, s); 3101 3102 *s++ = 0; // Zero-terminate this segment. 3103 } 3104 } 3105 else // Flagged for discard, but we still have to scan for the terminator. 3106 { 3107 // Scan for terminating ']]>'. 3108 PUGI__SCANFOR(s[0] == ']' && s[1] == ']' && PUGI__ENDSWITH(s[2], '>')); 3109 PUGI__CHECK_ERROR(status_bad_cdata, s); 3110 3111 ++s; 3112 } 3113 3114 s += (s[1] == '>' ? 2 : 1); // Step over the last ']>'. 3115 } 3116 else PUGI__THROW_ERROR(status_bad_cdata, s); 3117 } 3118 else if (s[0] == 'D' && s[1] == 'O' && s[2] == 'C' && s[3] == 'T' && s[4] == 'Y' && s[5] == 'P' && PUGI__ENDSWITH(s[6], 'E')) 3119 { 3120 s -= 2; 3121 3122 if (cursor->parent) PUGI__THROW_ERROR(status_bad_doctype, s); 3123 3124 char_t* mark = s + 9; 3125 3126 s = parse_doctype_group(s, endch); 3127 if (!s) return s; 3128 3129 assert((*s == 0 && endch == '>') || *s == '>'); 3130 if (*s) *s++ = 0; 3131 3132 if (PUGI__OPTSET(parse_doctype)) 3133 { 3134 while (PUGI__IS_CHARTYPE(*mark, ct_space)) ++mark; 3135 3136 PUGI__PUSHNODE(node_doctype); 3137 3138 cursor->value = mark; 3139 } 3140 } 3141 else if (*s == 0 && endch == '-') PUGI__THROW_ERROR(status_bad_comment, s); 3142 else if (*s == 0 && endch == '[') PUGI__THROW_ERROR(status_bad_cdata, s); 3143 else PUGI__THROW_ERROR(status_unrecognized_tag, s); 3144 3145 return s; 3146 } 3147 parse_questionxml_parser3148 char_t* parse_question(char_t* s, xml_node_struct*& ref_cursor, unsigned int optmsk, char_t endch) 3149 { 3150 // load into registers 3151 xml_node_struct* cursor = ref_cursor; 3152 char_t ch = 0; 3153 3154 // parse node contents, starting with question mark 3155 ++s; 3156 3157 // read PI target 3158 char_t* target = s; 3159 3160 if (!PUGI__IS_CHARTYPE(*s, ct_start_symbol)) PUGI__THROW_ERROR(status_bad_pi, s); 3161 3162 PUGI__SCANWHILE(PUGI__IS_CHARTYPE(*s, ct_symbol)); 3163 PUGI__CHECK_ERROR(status_bad_pi, s); 3164 3165 // determine node type; stricmp / strcasecmp is not portable 3166 bool declaration = (target[0] | ' ') == 'x' && (target[1] | ' ') == 'm' && (target[2] | ' ') == 'l' && target + 3 == s; 3167 3168 if (declaration ? PUGI__OPTSET(parse_declaration) : PUGI__OPTSET(parse_pi)) 3169 { 3170 if (declaration) 3171 { 3172 // disallow non top-level declarations 3173 if (cursor->parent) PUGI__THROW_ERROR(status_bad_pi, s); 3174 3175 PUGI__PUSHNODE(node_declaration); 3176 } 3177 else 3178 { 3179 PUGI__PUSHNODE(node_pi); 3180 } 3181 3182 cursor->name = target; 3183 3184 PUGI__ENDSEG(); 3185 3186 // parse value/attributes 3187 if (ch == '?') 3188 { 3189 // empty node 3190 if (!PUGI__ENDSWITH(*s, '>')) PUGI__THROW_ERROR(status_bad_pi, s); 3191 s += (*s == '>'); 3192 3193 PUGI__POPNODE(); 3194 } 3195 else if (PUGI__IS_CHARTYPE(ch, ct_space)) 3196 { 3197 PUGI__SKIPWS(); 3198 3199 // scan for tag end 3200 char_t* value = s; 3201 3202 PUGI__SCANFOR(s[0] == '?' && PUGI__ENDSWITH(s[1], '>')); 3203 PUGI__CHECK_ERROR(status_bad_pi, s); 3204 3205 if (declaration) 3206 { 3207 // replace ending ? with / so that 'element' terminates properly 3208 *s = '/'; 3209 3210 // we exit from this function with cursor at node_declaration, which is a signal to parse() to go to LOC_ATTRIBUTES 3211 s = value; 3212 } 3213 else 3214 { 3215 // store value and step over > 3216 cursor->value = value; 3217 3218 PUGI__POPNODE(); 3219 3220 PUGI__ENDSEG(); 3221 3222 s += (*s == '>'); 3223 } 3224 } 3225 else PUGI__THROW_ERROR(status_bad_pi, s); 3226 } 3227 else 3228 { 3229 // scan for tag end 3230 PUGI__SCANFOR(s[0] == '?' && PUGI__ENDSWITH(s[1], '>')); 3231 PUGI__CHECK_ERROR(status_bad_pi, s); 3232 3233 s += (s[1] == '>' ? 2 : 1); 3234 } 3235 3236 // store from registers 3237 ref_cursor = cursor; 3238 3239 return s; 3240 } 3241 parse_treexml_parser3242 char_t* parse_tree(char_t* s, xml_node_struct* root, unsigned int optmsk, char_t endch) 3243 { 3244 strconv_attribute_t strconv_attribute = get_strconv_attribute(optmsk); 3245 strconv_pcdata_t strconv_pcdata = get_strconv_pcdata(optmsk); 3246 3247 char_t ch = 0; 3248 xml_node_struct* cursor = root; 3249 char_t* mark = s; 3250 3251 while (*s != 0) 3252 { 3253 if (*s == '<') 3254 { 3255 ++s; 3256 3257 LOC_TAG: 3258 if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) // '<#...' 3259 { 3260 PUGI__PUSHNODE(node_element); // Append a new node to the tree. 3261 3262 cursor->name = s; 3263 3264 PUGI__SCANWHILE_UNROLL(PUGI__IS_CHARTYPE(ss, ct_symbol)); // Scan for a terminator. 3265 PUGI__ENDSEG(); // Save char in 'ch', terminate & step over. 3266 3267 if (ch == '>') 3268 { 3269 // end of tag 3270 } 3271 else if (PUGI__IS_CHARTYPE(ch, ct_space)) 3272 { 3273 LOC_ATTRIBUTES: 3274 while (true) 3275 { 3276 PUGI__SKIPWS(); // Eat any whitespace. 3277 3278 if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) // <... #... 3279 { 3280 xml_attribute_struct* a = append_new_attribute(cursor, *alloc); // Make space for this attribute. 3281 if (!a) PUGI__THROW_ERROR(status_out_of_memory, s); 3282 3283 a->name = s; // Save the offset. 3284 3285 PUGI__SCANWHILE_UNROLL(PUGI__IS_CHARTYPE(ss, ct_symbol)); // Scan for a terminator. 3286 PUGI__ENDSEG(); // Save char in 'ch', terminate & step over. 3287 3288 if (PUGI__IS_CHARTYPE(ch, ct_space)) 3289 { 3290 PUGI__SKIPWS(); // Eat any whitespace. 3291 3292 ch = *s; 3293 ++s; 3294 } 3295 3296 if (ch == '=') // '<... #=...' 3297 { 3298 PUGI__SKIPWS(); // Eat any whitespace. 3299 3300 if (*s == '"' || *s == '\'') // '<... #="...' 3301 { 3302 ch = *s; // Save quote char to avoid breaking on "''" -or- '""'. 3303 ++s; // Step over the quote. 3304 a->value = s; // Save the offset. 3305 3306 s = strconv_attribute(s, ch); 3307 3308 if (!s) PUGI__THROW_ERROR(status_bad_attribute, a->value); 3309 3310 // After this line the loop continues from the start; 3311 // Whitespaces, / and > are ok, symbols and EOF are wrong, 3312 // everything else will be detected 3313 if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) PUGI__THROW_ERROR(status_bad_attribute, s); 3314 } 3315 else PUGI__THROW_ERROR(status_bad_attribute, s); 3316 } 3317 else PUGI__THROW_ERROR(status_bad_attribute, s); 3318 } 3319 else if (*s == '/') 3320 { 3321 ++s; 3322 3323 if (*s == '>') 3324 { 3325 PUGI__POPNODE(); 3326 s++; 3327 break; 3328 } 3329 else if (*s == 0 && endch == '>') 3330 { 3331 PUGI__POPNODE(); 3332 break; 3333 } 3334 else PUGI__THROW_ERROR(status_bad_start_element, s); 3335 } 3336 else if (*s == '>') 3337 { 3338 ++s; 3339 3340 break; 3341 } 3342 else if (*s == 0 && endch == '>') 3343 { 3344 break; 3345 } 3346 else PUGI__THROW_ERROR(status_bad_start_element, s); 3347 } 3348 3349 // !!! 3350 } 3351 else if (ch == '/') // '<#.../' 3352 { 3353 if (!PUGI__ENDSWITH(*s, '>')) PUGI__THROW_ERROR(status_bad_start_element, s); 3354 3355 PUGI__POPNODE(); // Pop. 3356 3357 s += (*s == '>'); 3358 } 3359 else if (ch == 0) 3360 { 3361 // we stepped over null terminator, backtrack & handle closing tag 3362 --s; 3363 3364 if (endch != '>') PUGI__THROW_ERROR(status_bad_start_element, s); 3365 } 3366 else PUGI__THROW_ERROR(status_bad_start_element, s); 3367 } 3368 else if (*s == '/') 3369 { 3370 ++s; 3371 3372 mark = s; 3373 3374 char_t* name = cursor->name; 3375 if (!name) PUGI__THROW_ERROR(status_end_element_mismatch, mark); 3376 3377 while (PUGI__IS_CHARTYPE(*s, ct_symbol)) 3378 { 3379 if (*s++ != *name++) PUGI__THROW_ERROR(status_end_element_mismatch, mark); 3380 } 3381 3382 if (*name) 3383 { 3384 if (*s == 0 && name[0] == endch && name[1] == 0) PUGI__THROW_ERROR(status_bad_end_element, s); 3385 else PUGI__THROW_ERROR(status_end_element_mismatch, mark); 3386 } 3387 3388 PUGI__POPNODE(); // Pop. 3389 3390 PUGI__SKIPWS(); 3391 3392 if (*s == 0) 3393 { 3394 if (endch != '>') PUGI__THROW_ERROR(status_bad_end_element, s); 3395 } 3396 else 3397 { 3398 if (*s != '>') PUGI__THROW_ERROR(status_bad_end_element, s); 3399 ++s; 3400 } 3401 } 3402 else if (*s == '?') // '<?...' 3403 { 3404 s = parse_question(s, cursor, optmsk, endch); 3405 if (!s) return s; 3406 3407 assert(cursor); 3408 if (PUGI__NODETYPE(cursor) == node_declaration) goto LOC_ATTRIBUTES; 3409 } 3410 else if (*s == '!') // '<!...' 3411 { 3412 s = parse_exclamation(s, cursor, optmsk, endch); 3413 if (!s) return s; 3414 } 3415 else if (*s == 0 && endch == '?') PUGI__THROW_ERROR(status_bad_pi, s); 3416 else PUGI__THROW_ERROR(status_unrecognized_tag, s); 3417 } 3418 else 3419 { 3420 mark = s; // Save this offset while searching for a terminator. 3421 3422 PUGI__SKIPWS(); // Eat whitespace if no genuine PCDATA here. 3423 3424 if (*s == '<' || !*s) 3425 { 3426 // We skipped some whitespace characters because otherwise we would take the tag branch instead of PCDATA one 3427 assert(mark != s); 3428 3429 if (!PUGI__OPTSET(parse_ws_pcdata | parse_ws_pcdata_single) || PUGI__OPTSET(parse_trim_pcdata)) 3430 { 3431 continue; 3432 } 3433 else if (PUGI__OPTSET(parse_ws_pcdata_single)) 3434 { 3435 if (s[0] != '<' || s[1] != '/' || cursor->first_child) continue; 3436 } 3437 } 3438 3439 if (!PUGI__OPTSET(parse_trim_pcdata)) 3440 s = mark; 3441 3442 if (cursor->parent || PUGI__OPTSET(parse_fragment)) 3443 { 3444 if (PUGI__OPTSET(parse_embed_pcdata) && cursor->parent && !cursor->first_child && !cursor->value) 3445 { 3446 cursor->value = s; // Save the offset. 3447 } 3448 else 3449 { 3450 PUGI__PUSHNODE(node_pcdata); // Append a new node on the tree. 3451 3452 cursor->value = s; // Save the offset. 3453 3454 PUGI__POPNODE(); // Pop since this is a standalone. 3455 } 3456 3457 s = strconv_pcdata(s); 3458 3459 if (!*s) break; 3460 } 3461 else 3462 { 3463 PUGI__SCANFOR(*s == '<'); // '...<' 3464 if (!*s) break; 3465 3466 ++s; 3467 } 3468 3469 // We're after '<' 3470 goto LOC_TAG; 3471 } 3472 } 3473 3474 // check that last tag is closed 3475 if (cursor != root) PUGI__THROW_ERROR(status_end_element_mismatch, s); 3476 3477 return s; 3478 } 3479 3480 #ifdef PUGIXML_WCHAR_MODE parse_skip_bomxml_parser3481 static char_t* parse_skip_bom(char_t* s) 3482 { 3483 unsigned int bom = 0xfeff; 3484 return (s[0] == static_cast<wchar_t>(bom)) ? s + 1 : s; 3485 } 3486 #else parse_skip_bomxml_parser3487 static char_t* parse_skip_bom(char_t* s) 3488 { 3489 return (s[0] == '\xef' && s[1] == '\xbb' && s[2] == '\xbf') ? s + 3 : s; 3490 } 3491 #endif 3492 has_element_node_siblingsxml_parser3493 static bool has_element_node_siblings(xml_node_struct* node) 3494 { 3495 while (node) 3496 { 3497 if (PUGI__NODETYPE(node) == node_element) return true; 3498 3499 node = node->next_sibling; 3500 } 3501 3502 return false; 3503 } 3504 parsexml_parser3505 static xml_parse_result parse(char_t* buffer, size_t length, xml_document_struct* xmldoc, xml_node_struct* root, unsigned int optmsk) 3506 { 3507 // early-out for empty documents 3508 if (length == 0) 3509 return make_parse_result(PUGI__OPTSET(parse_fragment) ? status_ok : status_no_document_element); 3510 3511 // get last child of the root before parsing 3512 xml_node_struct* last_root_child = root->first_child ? root->first_child->prev_sibling_c + 0 : 0; 3513 3514 // create parser on stack 3515 xml_parser parser(static_cast<xml_allocator*>(xmldoc)); 3516 3517 // save last character and make buffer zero-terminated (speeds up parsing) 3518 char_t endch = buffer[length - 1]; 3519 buffer[length - 1] = 0; 3520 3521 // skip BOM to make sure it does not end up as part of parse output 3522 char_t* buffer_data = parse_skip_bom(buffer); 3523 3524 // perform actual parsing 3525 parser.parse_tree(buffer_data, root, optmsk, endch); 3526 3527 xml_parse_result result = make_parse_result(parser.error_status, parser.error_offset ? parser.error_offset - buffer : 0); 3528 assert(result.offset >= 0 && static_cast<size_t>(result.offset) <= length); 3529 3530 if (result) 3531 { 3532 // since we removed last character, we have to handle the only possible false positive (stray <) 3533 if (endch == '<') 3534 return make_parse_result(status_unrecognized_tag, length - 1); 3535 3536 // check if there are any element nodes parsed 3537 xml_node_struct* first_root_child_parsed = last_root_child ? last_root_child->next_sibling + 0 : root->first_child+ 0; 3538 3539 if (!PUGI__OPTSET(parse_fragment) && !has_element_node_siblings(first_root_child_parsed)) 3540 return make_parse_result(status_no_document_element, length - 1); 3541 } 3542 else 3543 { 3544 // roll back offset if it occurs on a null terminator in the source buffer 3545 if (result.offset > 0 && static_cast<size_t>(result.offset) == length - 1 && endch == 0) 3546 result.offset--; 3547 } 3548 3549 return result; 3550 } 3551 }; 3552 3553 // Output facilities get_write_native_encoding()3554 PUGI__FN xml_encoding get_write_native_encoding() 3555 { 3556 #ifdef PUGIXML_WCHAR_MODE 3557 return get_wchar_encoding(); 3558 #else 3559 return encoding_utf8; 3560 #endif 3561 } 3562 get_write_encoding(xml_encoding encoding)3563 PUGI__FN xml_encoding get_write_encoding(xml_encoding encoding) 3564 { 3565 // replace wchar encoding with utf implementation 3566 if (encoding == encoding_wchar) return get_wchar_encoding(); 3567 3568 // replace utf16 encoding with utf16 with specific endianness 3569 if (encoding == encoding_utf16) return is_little_endian() ? encoding_utf16_le : encoding_utf16_be; 3570 3571 // replace utf32 encoding with utf32 with specific endianness 3572 if (encoding == encoding_utf32) return is_little_endian() ? encoding_utf32_le : encoding_utf32_be; 3573 3574 // only do autodetection if no explicit encoding is requested 3575 if (encoding != encoding_auto) return encoding; 3576 3577 // assume utf8 encoding 3578 return encoding_utf8; 3579 } 3580 convert_buffer_output_generic(typename T::value_type dest,const char_t * data,size_t length,D,T)3581 template <typename D, typename T> PUGI__FN size_t convert_buffer_output_generic(typename T::value_type dest, const char_t* data, size_t length, D, T) 3582 { 3583 PUGI__STATIC_ASSERT(sizeof(char_t) == sizeof(typename D::type)); 3584 3585 typename T::value_type end = D::process(reinterpret_cast<const typename D::type*>(data), length, dest, T()); 3586 3587 return static_cast<size_t>(end - dest) * sizeof(*dest); 3588 } 3589 convert_buffer_output_generic(typename T::value_type dest,const char_t * data,size_t length,D,T,bool opt_swap)3590 template <typename D, typename T> PUGI__FN size_t convert_buffer_output_generic(typename T::value_type dest, const char_t* data, size_t length, D, T, bool opt_swap) 3591 { 3592 PUGI__STATIC_ASSERT(sizeof(char_t) == sizeof(typename D::type)); 3593 3594 typename T::value_type end = D::process(reinterpret_cast<const typename D::type*>(data), length, dest, T()); 3595 3596 if (opt_swap) 3597 { 3598 for (typename T::value_type i = dest; i != end; ++i) 3599 *i = endian_swap(*i); 3600 } 3601 3602 return static_cast<size_t>(end - dest) * sizeof(*dest); 3603 } 3604 3605 #ifdef PUGIXML_WCHAR_MODE get_valid_length(const char_t * data,size_t length)3606 PUGI__FN size_t get_valid_length(const char_t* data, size_t length) 3607 { 3608 if (length < 1) return 0; 3609 3610 // discard last character if it's the lead of a surrogate pair 3611 return (sizeof(wchar_t) == 2 && static_cast<unsigned int>(static_cast<uint16_t>(data[length - 1]) - 0xD800) < 0x400) ? length - 1 : length; 3612 } 3613 convert_buffer_output(char_t * r_char,uint8_t * r_u8,uint16_t * r_u16,uint32_t * r_u32,const char_t * data,size_t length,xml_encoding encoding)3614 PUGI__FN size_t convert_buffer_output(char_t* r_char, uint8_t* r_u8, uint16_t* r_u16, uint32_t* r_u32, const char_t* data, size_t length, xml_encoding encoding) 3615 { 3616 // only endian-swapping is required 3617 if (need_endian_swap_utf(encoding, get_wchar_encoding())) 3618 { 3619 convert_wchar_endian_swap(r_char, data, length); 3620 3621 return length * sizeof(char_t); 3622 } 3623 3624 // convert to utf8 3625 if (encoding == encoding_utf8) 3626 return convert_buffer_output_generic(r_u8, data, length, wchar_decoder(), utf8_writer()); 3627 3628 // convert to utf16 3629 if (encoding == encoding_utf16_be || encoding == encoding_utf16_le) 3630 { 3631 xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be; 3632 3633 return convert_buffer_output_generic(r_u16, data, length, wchar_decoder(), utf16_writer(), native_encoding != encoding); 3634 } 3635 3636 // convert to utf32 3637 if (encoding == encoding_utf32_be || encoding == encoding_utf32_le) 3638 { 3639 xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be; 3640 3641 return convert_buffer_output_generic(r_u32, data, length, wchar_decoder(), utf32_writer(), native_encoding != encoding); 3642 } 3643 3644 // convert to latin1 3645 if (encoding == encoding_latin1) 3646 return convert_buffer_output_generic(r_u8, data, length, wchar_decoder(), latin1_writer()); 3647 3648 assert(false && "Invalid encoding"); // unreachable 3649 return 0; 3650 } 3651 #else get_valid_length(const char_t * data,size_t length)3652 PUGI__FN size_t get_valid_length(const char_t* data, size_t length) 3653 { 3654 if (length < 5) return 0; 3655 3656 for (size_t i = 1; i <= 4; ++i) 3657 { 3658 uint8_t ch = static_cast<uint8_t>(data[length - i]); 3659 3660 // either a standalone character or a leading one 3661 if ((ch & 0xc0) != 0x80) return length - i; 3662 } 3663 3664 // there are four non-leading characters at the end, sequence tail is broken so might as well process the whole chunk 3665 return length; 3666 } 3667 convert_buffer_output(char_t *,uint8_t * r_u8,uint16_t * r_u16,uint32_t * r_u32,const char_t * data,size_t length,xml_encoding encoding)3668 PUGI__FN size_t convert_buffer_output(char_t* /* r_char */, uint8_t* r_u8, uint16_t* r_u16, uint32_t* r_u32, const char_t* data, size_t length, xml_encoding encoding) 3669 { 3670 if (encoding == encoding_utf16_be || encoding == encoding_utf16_le) 3671 { 3672 xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be; 3673 3674 return convert_buffer_output_generic(r_u16, data, length, utf8_decoder(), utf16_writer(), native_encoding != encoding); 3675 } 3676 3677 if (encoding == encoding_utf32_be || encoding == encoding_utf32_le) 3678 { 3679 xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be; 3680 3681 return convert_buffer_output_generic(r_u32, data, length, utf8_decoder(), utf32_writer(), native_encoding != encoding); 3682 } 3683 3684 if (encoding == encoding_latin1) 3685 return convert_buffer_output_generic(r_u8, data, length, utf8_decoder(), latin1_writer()); 3686 3687 assert(false && "Invalid encoding"); // unreachable 3688 return 0; 3689 } 3690 #endif 3691 3692 class xml_buffered_writer 3693 { 3694 xml_buffered_writer(const xml_buffered_writer&); 3695 xml_buffered_writer& operator=(const xml_buffered_writer&); 3696 3697 public: xml_buffered_writer(xml_writer & writer_,xml_encoding user_encoding)3698 xml_buffered_writer(xml_writer& writer_, xml_encoding user_encoding): writer(writer_), bufsize(0), encoding(get_write_encoding(user_encoding)) 3699 { 3700 PUGI__STATIC_ASSERT(bufcapacity >= 8); 3701 } 3702 flush()3703 size_t flush() 3704 { 3705 flush(buffer, bufsize); 3706 bufsize = 0; 3707 return 0; 3708 } 3709 flush(const char_t * data,size_t size)3710 void flush(const char_t* data, size_t size) 3711 { 3712 if (size == 0) return; 3713 3714 // fast path, just write data 3715 if (encoding == get_write_native_encoding()) 3716 writer.write(data, size * sizeof(char_t)); 3717 else 3718 { 3719 // convert chunk 3720 size_t result = convert_buffer_output(scratch.data_char, scratch.data_u8, scratch.data_u16, scratch.data_u32, data, size, encoding); 3721 assert(result <= sizeof(scratch)); 3722 3723 // write data 3724 writer.write(scratch.data_u8, result); 3725 } 3726 } 3727 write_direct(const char_t * data,size_t length)3728 void write_direct(const char_t* data, size_t length) 3729 { 3730 // flush the remaining buffer contents 3731 flush(); 3732 3733 // handle large chunks 3734 if (length > bufcapacity) 3735 { 3736 if (encoding == get_write_native_encoding()) 3737 { 3738 // fast path, can just write data chunk 3739 writer.write(data, length * sizeof(char_t)); 3740 return; 3741 } 3742 3743 // need to convert in suitable chunks 3744 while (length > bufcapacity) 3745 { 3746 // get chunk size by selecting such number of characters that are guaranteed to fit into scratch buffer 3747 // and form a complete codepoint sequence (i.e. discard start of last codepoint if necessary) 3748 size_t chunk_size = get_valid_length(data, bufcapacity); 3749 assert(chunk_size); 3750 3751 // convert chunk and write 3752 flush(data, chunk_size); 3753 3754 // iterate 3755 data += chunk_size; 3756 length -= chunk_size; 3757 } 3758 3759 // small tail is copied below 3760 bufsize = 0; 3761 } 3762 3763 memcpy(buffer + bufsize, data, length * sizeof(char_t)); 3764 bufsize += length; 3765 } 3766 write_buffer(const char_t * data,size_t length)3767 void write_buffer(const char_t* data, size_t length) 3768 { 3769 size_t offset = bufsize; 3770 3771 if (offset + length <= bufcapacity) 3772 { 3773 memcpy(buffer + offset, data, length * sizeof(char_t)); 3774 bufsize = offset + length; 3775 } 3776 else 3777 { 3778 write_direct(data, length); 3779 } 3780 } 3781 write_string(const char_t * data)3782 void write_string(const char_t* data) 3783 { 3784 // write the part of the string that fits in the buffer 3785 size_t offset = bufsize; 3786 3787 while (*data && offset < bufcapacity) 3788 buffer[offset++] = *data++; 3789 3790 // write the rest 3791 if (offset < bufcapacity) 3792 { 3793 bufsize = offset; 3794 } 3795 else 3796 { 3797 // backtrack a bit if we have split the codepoint 3798 size_t length = offset - bufsize; 3799 size_t extra = length - get_valid_length(data - length, length); 3800 3801 bufsize = offset - extra; 3802 3803 write_direct(data - extra, strlength(data) + extra); 3804 } 3805 } 3806 write(char_t d0)3807 void write(char_t d0) 3808 { 3809 size_t offset = bufsize; 3810 if (offset > bufcapacity - 1) offset = flush(); 3811 3812 buffer[offset + 0] = d0; 3813 bufsize = offset + 1; 3814 } 3815 write(char_t d0,char_t d1)3816 void write(char_t d0, char_t d1) 3817 { 3818 size_t offset = bufsize; 3819 if (offset > bufcapacity - 2) offset = flush(); 3820 3821 buffer[offset + 0] = d0; 3822 buffer[offset + 1] = d1; 3823 bufsize = offset + 2; 3824 } 3825 write(char_t d0,char_t d1,char_t d2)3826 void write(char_t d0, char_t d1, char_t d2) 3827 { 3828 size_t offset = bufsize; 3829 if (offset > bufcapacity - 3) offset = flush(); 3830 3831 buffer[offset + 0] = d0; 3832 buffer[offset + 1] = d1; 3833 buffer[offset + 2] = d2; 3834 bufsize = offset + 3; 3835 } 3836 write(char_t d0,char_t d1,char_t d2,char_t d3)3837 void write(char_t d0, char_t d1, char_t d2, char_t d3) 3838 { 3839 size_t offset = bufsize; 3840 if (offset > bufcapacity - 4) offset = flush(); 3841 3842 buffer[offset + 0] = d0; 3843 buffer[offset + 1] = d1; 3844 buffer[offset + 2] = d2; 3845 buffer[offset + 3] = d3; 3846 bufsize = offset + 4; 3847 } 3848 write(char_t d0,char_t d1,char_t d2,char_t d3,char_t d4)3849 void write(char_t d0, char_t d1, char_t d2, char_t d3, char_t d4) 3850 { 3851 size_t offset = bufsize; 3852 if (offset > bufcapacity - 5) offset = flush(); 3853 3854 buffer[offset + 0] = d0; 3855 buffer[offset + 1] = d1; 3856 buffer[offset + 2] = d2; 3857 buffer[offset + 3] = d3; 3858 buffer[offset + 4] = d4; 3859 bufsize = offset + 5; 3860 } 3861 write(char_t d0,char_t d1,char_t d2,char_t d3,char_t d4,char_t d5)3862 void write(char_t d0, char_t d1, char_t d2, char_t d3, char_t d4, char_t d5) 3863 { 3864 size_t offset = bufsize; 3865 if (offset > bufcapacity - 6) offset = flush(); 3866 3867 buffer[offset + 0] = d0; 3868 buffer[offset + 1] = d1; 3869 buffer[offset + 2] = d2; 3870 buffer[offset + 3] = d3; 3871 buffer[offset + 4] = d4; 3872 buffer[offset + 5] = d5; 3873 bufsize = offset + 6; 3874 } 3875 3876 // utf8 maximum expansion: x4 (-> utf32) 3877 // utf16 maximum expansion: x2 (-> utf32) 3878 // utf32 maximum expansion: x1 3879 enum 3880 { 3881 bufcapacitybytes = 3882 #ifdef PUGIXML_MEMORY_OUTPUT_STACK 3883 PUGIXML_MEMORY_OUTPUT_STACK 3884 #else 3885 10240 3886 #endif 3887 , 3888 bufcapacity = bufcapacitybytes / (sizeof(char_t) + 4) 3889 }; 3890 3891 char_t buffer[bufcapacity]; 3892 3893 union 3894 { 3895 uint8_t data_u8[4 * bufcapacity]; 3896 uint16_t data_u16[2 * bufcapacity]; 3897 uint32_t data_u32[bufcapacity]; 3898 char_t data_char[bufcapacity]; 3899 } scratch; 3900 3901 xml_writer& writer; 3902 size_t bufsize; 3903 xml_encoding encoding; 3904 }; 3905 text_output_escaped(xml_buffered_writer & writer,const char_t * s,chartypex_t type)3906 PUGI__FN void text_output_escaped(xml_buffered_writer& writer, const char_t* s, chartypex_t type) 3907 { 3908 while (*s) 3909 { 3910 const char_t* prev = s; 3911 3912 // While *s is a usual symbol 3913 PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPEX(ss, type)); 3914 3915 writer.write_buffer(prev, static_cast<size_t>(s - prev)); 3916 3917 switch (*s) 3918 { 3919 case 0: break; 3920 case '&': 3921 writer.write('&', 'a', 'm', 'p', ';'); 3922 ++s; 3923 break; 3924 case '<': 3925 writer.write('&', 'l', 't', ';'); 3926 ++s; 3927 break; 3928 case '>': 3929 writer.write('&', 'g', 't', ';'); 3930 ++s; 3931 break; 3932 case '"': 3933 writer.write('&', 'q', 'u', 'o', 't', ';'); 3934 ++s; 3935 break; 3936 default: // s is not a usual symbol 3937 { 3938 unsigned int ch = static_cast<unsigned int>(*s++); 3939 assert(ch < 32); 3940 3941 writer.write('&', '#', static_cast<char_t>((ch / 10) + '0'), static_cast<char_t>((ch % 10) + '0'), ';'); 3942 } 3943 } 3944 } 3945 } 3946 text_output(xml_buffered_writer & writer,const char_t * s,chartypex_t type,unsigned int flags)3947 PUGI__FN void text_output(xml_buffered_writer& writer, const char_t* s, chartypex_t type, unsigned int flags) 3948 { 3949 if (flags & format_no_escapes) 3950 writer.write_string(s); 3951 else 3952 text_output_escaped(writer, s, type); 3953 } 3954 text_output_cdata(xml_buffered_writer & writer,const char_t * s)3955 PUGI__FN void text_output_cdata(xml_buffered_writer& writer, const char_t* s) 3956 { 3957 do 3958 { 3959 writer.write('<', '!', '[', 'C', 'D'); 3960 writer.write('A', 'T', 'A', '['); 3961 3962 const char_t* prev = s; 3963 3964 // look for ]]> sequence - we can't output it as is since it terminates CDATA 3965 while (*s && !(s[0] == ']' && s[1] == ']' && s[2] == '>')) ++s; 3966 3967 // skip ]] if we stopped at ]]>, > will go to the next CDATA section 3968 if (*s) s += 2; 3969 3970 writer.write_buffer(prev, static_cast<size_t>(s - prev)); 3971 3972 writer.write(']', ']', '>'); 3973 } 3974 while (*s); 3975 } 3976 text_output_indent(xml_buffered_writer & writer,const char_t * indent,size_t indent_length,unsigned int depth)3977 PUGI__FN void text_output_indent(xml_buffered_writer& writer, const char_t* indent, size_t indent_length, unsigned int depth) 3978 { 3979 switch (indent_length) 3980 { 3981 case 1: 3982 { 3983 for (unsigned int i = 0; i < depth; ++i) 3984 writer.write(indent[0]); 3985 break; 3986 } 3987 3988 case 2: 3989 { 3990 for (unsigned int i = 0; i < depth; ++i) 3991 writer.write(indent[0], indent[1]); 3992 break; 3993 } 3994 3995 case 3: 3996 { 3997 for (unsigned int i = 0; i < depth; ++i) 3998 writer.write(indent[0], indent[1], indent[2]); 3999 break; 4000 } 4001 4002 case 4: 4003 { 4004 for (unsigned int i = 0; i < depth; ++i) 4005 writer.write(indent[0], indent[1], indent[2], indent[3]); 4006 break; 4007 } 4008 4009 default: 4010 { 4011 for (unsigned int i = 0; i < depth; ++i) 4012 writer.write_buffer(indent, indent_length); 4013 } 4014 } 4015 } 4016 node_output_comment(xml_buffered_writer & writer,const char_t * s)4017 PUGI__FN void node_output_comment(xml_buffered_writer& writer, const char_t* s) 4018 { 4019 writer.write('<', '!', '-', '-'); 4020 4021 while (*s) 4022 { 4023 const char_t* prev = s; 4024 4025 // look for -\0 or -- sequence - we can't output it since -- is illegal in comment body 4026 while (*s && !(s[0] == '-' && (s[1] == '-' || s[1] == 0))) ++s; 4027 4028 writer.write_buffer(prev, static_cast<size_t>(s - prev)); 4029 4030 if (*s) 4031 { 4032 assert(*s == '-'); 4033 4034 writer.write('-', ' '); 4035 ++s; 4036 } 4037 } 4038 4039 writer.write('-', '-', '>'); 4040 } 4041 node_output_pi_value(xml_buffered_writer & writer,const char_t * s)4042 PUGI__FN void node_output_pi_value(xml_buffered_writer& writer, const char_t* s) 4043 { 4044 while (*s) 4045 { 4046 const char_t* prev = s; 4047 4048 // look for ?> sequence - we can't output it since ?> terminates PI 4049 while (*s && !(s[0] == '?' && s[1] == '>')) ++s; 4050 4051 writer.write_buffer(prev, static_cast<size_t>(s - prev)); 4052 4053 if (*s) 4054 { 4055 assert(s[0] == '?' && s[1] == '>'); 4056 4057 writer.write('?', ' ', '>'); 4058 s += 2; 4059 } 4060 } 4061 } 4062 node_output_attributes(xml_buffered_writer & writer,xml_node_struct * node,const char_t * indent,size_t indent_length,unsigned int flags,unsigned int depth)4063 PUGI__FN void node_output_attributes(xml_buffered_writer& writer, xml_node_struct* node, const char_t* indent, size_t indent_length, unsigned int flags, unsigned int depth) 4064 { 4065 const char_t* default_name = PUGIXML_TEXT(":anonymous"); 4066 4067 for (xml_attribute_struct* a = node->first_attribute; a; a = a->next_attribute) 4068 { 4069 if ((flags & (format_indent_attributes | format_raw)) == format_indent_attributes) 4070 { 4071 writer.write('\n'); 4072 4073 text_output_indent(writer, indent, indent_length, depth + 1); 4074 } 4075 else 4076 { 4077 writer.write(' '); 4078 } 4079 4080 writer.write_string(a->name ? a->name + 0 : default_name); 4081 writer.write('=', '"'); 4082 4083 if (a->value) 4084 text_output(writer, a->value, ctx_special_attr, flags); 4085 4086 writer.write('"'); 4087 } 4088 } 4089 node_output_start(xml_buffered_writer & writer,xml_node_struct * node,const char_t * indent,size_t indent_length,unsigned int flags,unsigned int depth)4090 PUGI__FN bool node_output_start(xml_buffered_writer& writer, xml_node_struct* node, const char_t* indent, size_t indent_length, unsigned int flags, unsigned int depth) 4091 { 4092 const char_t* default_name = PUGIXML_TEXT(":anonymous"); 4093 const char_t* name = node->name ? node->name + 0 : default_name; 4094 4095 writer.write('<'); 4096 writer.write_string(name); 4097 4098 if (node->first_attribute) 4099 node_output_attributes(writer, node, indent, indent_length, flags, depth); 4100 4101 // element nodes can have value if parse_embed_pcdata was used 4102 if (!node->value) 4103 { 4104 if (!node->first_child) 4105 { 4106 if (flags & format_no_empty_element_tags) 4107 { 4108 writer.write('>', '<', '/'); 4109 writer.write_string(name); 4110 writer.write('>'); 4111 4112 return false; 4113 } 4114 else 4115 { 4116 if ((flags & format_raw) == 0) 4117 writer.write(' '); 4118 4119 writer.write('/', '>'); 4120 4121 return false; 4122 } 4123 } 4124 else 4125 { 4126 writer.write('>'); 4127 4128 return true; 4129 } 4130 } 4131 else 4132 { 4133 writer.write('>'); 4134 4135 text_output(writer, node->value, ctx_special_pcdata, flags); 4136 4137 if (!node->first_child) 4138 { 4139 writer.write('<', '/'); 4140 writer.write_string(name); 4141 writer.write('>'); 4142 4143 return false; 4144 } 4145 else 4146 { 4147 return true; 4148 } 4149 } 4150 } 4151 node_output_end(xml_buffered_writer & writer,xml_node_struct * node)4152 PUGI__FN void node_output_end(xml_buffered_writer& writer, xml_node_struct* node) 4153 { 4154 const char_t* default_name = PUGIXML_TEXT(":anonymous"); 4155 const char_t* name = node->name ? node->name + 0 : default_name; 4156 4157 writer.write('<', '/'); 4158 writer.write_string(name); 4159 writer.write('>'); 4160 } 4161 node_output_simple(xml_buffered_writer & writer,xml_node_struct * node,unsigned int flags)4162 PUGI__FN void node_output_simple(xml_buffered_writer& writer, xml_node_struct* node, unsigned int flags) 4163 { 4164 const char_t* default_name = PUGIXML_TEXT(":anonymous"); 4165 4166 switch (PUGI__NODETYPE(node)) 4167 { 4168 case node_pcdata: 4169 text_output(writer, node->value ? node->value + 0 : PUGIXML_TEXT(""), ctx_special_pcdata, flags); 4170 break; 4171 4172 case node_cdata: 4173 text_output_cdata(writer, node->value ? node->value + 0 : PUGIXML_TEXT("")); 4174 break; 4175 4176 case node_comment: 4177 node_output_comment(writer, node->value ? node->value + 0 : PUGIXML_TEXT("")); 4178 break; 4179 4180 case node_pi: 4181 writer.write('<', '?'); 4182 writer.write_string(node->name ? node->name + 0 : default_name); 4183 4184 if (node->value) 4185 { 4186 writer.write(' '); 4187 node_output_pi_value(writer, node->value); 4188 } 4189 4190 writer.write('?', '>'); 4191 break; 4192 4193 case node_declaration: 4194 writer.write('<', '?'); 4195 writer.write_string(node->name ? node->name + 0 : default_name); 4196 node_output_attributes(writer, node, PUGIXML_TEXT(""), 0, flags | format_raw, 0); 4197 writer.write('?', '>'); 4198 break; 4199 4200 case node_doctype: 4201 writer.write('<', '!', 'D', 'O', 'C'); 4202 writer.write('T', 'Y', 'P', 'E'); 4203 4204 if (node->value) 4205 { 4206 writer.write(' '); 4207 writer.write_string(node->value); 4208 } 4209 4210 writer.write('>'); 4211 break; 4212 4213 default: 4214 assert(false && "Invalid node type"); // unreachable 4215 } 4216 } 4217 4218 enum indent_flags_t 4219 { 4220 indent_newline = 1, 4221 indent_indent = 2 4222 }; 4223 node_output(xml_buffered_writer & writer,xml_node_struct * root,const char_t * indent,unsigned int flags,unsigned int depth)4224 PUGI__FN void node_output(xml_buffered_writer& writer, xml_node_struct* root, const char_t* indent, unsigned int flags, unsigned int depth) 4225 { 4226 size_t indent_length = ((flags & (format_indent | format_indent_attributes)) && (flags & format_raw) == 0) ? strlength(indent) : 0; 4227 unsigned int indent_flags = indent_indent; 4228 4229 xml_node_struct* node = root; 4230 4231 do 4232 { 4233 assert(node); 4234 4235 // begin writing current node 4236 if (PUGI__NODETYPE(node) == node_pcdata || PUGI__NODETYPE(node) == node_cdata) 4237 { 4238 node_output_simple(writer, node, flags); 4239 4240 indent_flags = 0; 4241 } 4242 else 4243 { 4244 if ((indent_flags & indent_newline) && (flags & format_raw) == 0) 4245 writer.write('\n'); 4246 4247 if ((indent_flags & indent_indent) && indent_length) 4248 text_output_indent(writer, indent, indent_length, depth); 4249 4250 if (PUGI__NODETYPE(node) == node_element) 4251 { 4252 indent_flags = indent_newline | indent_indent; 4253 4254 if (node_output_start(writer, node, indent, indent_length, flags, depth)) 4255 { 4256 // element nodes can have value if parse_embed_pcdata was used 4257 if (node->value) 4258 indent_flags = 0; 4259 4260 node = node->first_child; 4261 depth++; 4262 continue; 4263 } 4264 } 4265 else if (PUGI__NODETYPE(node) == node_document) 4266 { 4267 indent_flags = indent_indent; 4268 4269 if (node->first_child) 4270 { 4271 node = node->first_child; 4272 continue; 4273 } 4274 } 4275 else 4276 { 4277 node_output_simple(writer, node, flags); 4278 4279 indent_flags = indent_newline | indent_indent; 4280 } 4281 } 4282 4283 // continue to the next node 4284 while (node != root) 4285 { 4286 if (node->next_sibling) 4287 { 4288 node = node->next_sibling; 4289 break; 4290 } 4291 4292 node = node->parent; 4293 4294 // write closing node 4295 if (PUGI__NODETYPE(node) == node_element) 4296 { 4297 depth--; 4298 4299 if ((indent_flags & indent_newline) && (flags & format_raw) == 0) 4300 writer.write('\n'); 4301 4302 if ((indent_flags & indent_indent) && indent_length) 4303 text_output_indent(writer, indent, indent_length, depth); 4304 4305 node_output_end(writer, node); 4306 4307 indent_flags = indent_newline | indent_indent; 4308 } 4309 } 4310 } 4311 while (node != root); 4312 4313 if ((indent_flags & indent_newline) && (flags & format_raw) == 0) 4314 writer.write('\n'); 4315 } 4316 has_declaration(xml_node_struct * node)4317 PUGI__FN bool has_declaration(xml_node_struct* node) 4318 { 4319 for (xml_node_struct* child = node->first_child; child; child = child->next_sibling) 4320 { 4321 xml_node_type type = PUGI__NODETYPE(child); 4322 4323 if (type == node_declaration) return true; 4324 if (type == node_element) return false; 4325 } 4326 4327 return false; 4328 } 4329 is_attribute_of(xml_attribute_struct * attr,xml_node_struct * node)4330 PUGI__FN bool is_attribute_of(xml_attribute_struct* attr, xml_node_struct* node) 4331 { 4332 for (xml_attribute_struct* a = node->first_attribute; a; a = a->next_attribute) 4333 if (a == attr) 4334 return true; 4335 4336 return false; 4337 } 4338 allow_insert_attribute(xml_node_type parent)4339 PUGI__FN bool allow_insert_attribute(xml_node_type parent) 4340 { 4341 return parent == node_element || parent == node_declaration; 4342 } 4343 allow_insert_child(xml_node_type parent,xml_node_type child)4344 PUGI__FN bool allow_insert_child(xml_node_type parent, xml_node_type child) 4345 { 4346 if (parent != node_document && parent != node_element) return false; 4347 if (child == node_document || child == node_null) return false; 4348 if (parent != node_document && (child == node_declaration || child == node_doctype)) return false; 4349 4350 return true; 4351 } 4352 allow_move(xml_node parent,xml_node child)4353 PUGI__FN bool allow_move(xml_node parent, xml_node child) 4354 { 4355 // check that child can be a child of parent 4356 if (!allow_insert_child(parent.type(), child.type())) 4357 return false; 4358 4359 // check that node is not moved between documents 4360 if (parent.root() != child.root()) 4361 return false; 4362 4363 // check that new parent is not in the child subtree 4364 xml_node cur = parent; 4365 4366 while (cur) 4367 { 4368 if (cur == child) 4369 return false; 4370 4371 cur = cur.parent(); 4372 } 4373 4374 return true; 4375 } 4376 4377 template <typename String, typename Header> node_copy_string(String & dest,Header & header,uintptr_t header_mask,char_t * source,Header & source_header,xml_allocator * alloc)4378 PUGI__FN void node_copy_string(String& dest, Header& header, uintptr_t header_mask, char_t* source, Header& source_header, xml_allocator* alloc) 4379 { 4380 assert(!dest && (header & header_mask) == 0); 4381 4382 if (source) 4383 { 4384 if (alloc && (source_header & header_mask) == 0) 4385 { 4386 dest = source; 4387 4388 // since strcpy_insitu can reuse document buffer memory we need to mark both source and dest as shared 4389 header |= xml_memory_page_contents_shared_mask; 4390 source_header |= xml_memory_page_contents_shared_mask; 4391 } 4392 else 4393 strcpy_insitu(dest, header, header_mask, source, strlength(source)); 4394 } 4395 } 4396 node_copy_contents(xml_node_struct * dn,xml_node_struct * sn,xml_allocator * shared_alloc)4397 PUGI__FN void node_copy_contents(xml_node_struct* dn, xml_node_struct* sn, xml_allocator* shared_alloc) 4398 { 4399 node_copy_string(dn->name, dn->header, xml_memory_page_name_allocated_mask, sn->name, sn->header, shared_alloc); 4400 node_copy_string(dn->value, dn->header, xml_memory_page_value_allocated_mask, sn->value, sn->header, shared_alloc); 4401 4402 for (xml_attribute_struct* sa = sn->first_attribute; sa; sa = sa->next_attribute) 4403 { 4404 xml_attribute_struct* da = append_new_attribute(dn, get_allocator(dn)); 4405 4406 if (da) 4407 { 4408 node_copy_string(da->name, da->header, xml_memory_page_name_allocated_mask, sa->name, sa->header, shared_alloc); 4409 node_copy_string(da->value, da->header, xml_memory_page_value_allocated_mask, sa->value, sa->header, shared_alloc); 4410 } 4411 } 4412 } 4413 node_copy_tree(xml_node_struct * dn,xml_node_struct * sn)4414 PUGI__FN void node_copy_tree(xml_node_struct* dn, xml_node_struct* sn) 4415 { 4416 xml_allocator& alloc = get_allocator(dn); 4417 xml_allocator* shared_alloc = (&alloc == &get_allocator(sn)) ? &alloc : 0; 4418 4419 node_copy_contents(dn, sn, shared_alloc); 4420 4421 xml_node_struct* dit = dn; 4422 xml_node_struct* sit = sn->first_child; 4423 4424 while (sit && sit != sn) 4425 { 4426 // when a tree is copied into one of the descendants, we need to skip that subtree to avoid an infinite loop 4427 if (sit != dn) 4428 { 4429 xml_node_struct* copy = append_new_node(dit, alloc, PUGI__NODETYPE(sit)); 4430 4431 if (copy) 4432 { 4433 node_copy_contents(copy, sit, shared_alloc); 4434 4435 if (sit->first_child) 4436 { 4437 dit = copy; 4438 sit = sit->first_child; 4439 continue; 4440 } 4441 } 4442 } 4443 4444 // continue to the next node 4445 do 4446 { 4447 if (sit->next_sibling) 4448 { 4449 sit = sit->next_sibling; 4450 break; 4451 } 4452 4453 sit = sit->parent; 4454 dit = dit->parent; 4455 } 4456 while (sit != sn); 4457 } 4458 } 4459 node_copy_attribute(xml_attribute_struct * da,xml_attribute_struct * sa)4460 PUGI__FN void node_copy_attribute(xml_attribute_struct* da, xml_attribute_struct* sa) 4461 { 4462 xml_allocator& alloc = get_allocator(da); 4463 xml_allocator* shared_alloc = (&alloc == &get_allocator(sa)) ? &alloc : 0; 4464 4465 node_copy_string(da->name, da->header, xml_memory_page_name_allocated_mask, sa->name, sa->header, shared_alloc); 4466 node_copy_string(da->value, da->header, xml_memory_page_value_allocated_mask, sa->value, sa->header, shared_alloc); 4467 } 4468 is_text_node(xml_node_struct * node)4469 inline bool is_text_node(xml_node_struct* node) 4470 { 4471 xml_node_type type = PUGI__NODETYPE(node); 4472 4473 return type == node_pcdata || type == node_cdata; 4474 } 4475 4476 // get value with conversion functions string_to_integer(const char_t * value,U minv,U maxv)4477 template <typename U> PUGI__FN PUGI__UNSIGNED_OVERFLOW U string_to_integer(const char_t* value, U minv, U maxv) 4478 { 4479 U result = 0; 4480 const char_t* s = value; 4481 4482 while (PUGI__IS_CHARTYPE(*s, ct_space)) 4483 s++; 4484 4485 bool negative = (*s == '-'); 4486 4487 s += (*s == '+' || *s == '-'); 4488 4489 bool overflow = false; 4490 4491 if (s[0] == '0' && (s[1] | ' ') == 'x') 4492 { 4493 s += 2; 4494 4495 // since overflow detection relies on length of the sequence skip leading zeros 4496 while (*s == '0') 4497 s++; 4498 4499 const char_t* start = s; 4500 4501 for (;;) 4502 { 4503 if (static_cast<unsigned>(*s - '0') < 10) 4504 result = result * 16 + (*s - '0'); 4505 else if (static_cast<unsigned>((*s | ' ') - 'a') < 6) 4506 result = result * 16 + ((*s | ' ') - 'a' + 10); 4507 else 4508 break; 4509 4510 s++; 4511 } 4512 4513 size_t digits = static_cast<size_t>(s - start); 4514 4515 overflow = digits > sizeof(U) * 2; 4516 } 4517 else 4518 { 4519 // since overflow detection relies on length of the sequence skip leading zeros 4520 while (*s == '0') 4521 s++; 4522 4523 const char_t* start = s; 4524 4525 for (;;) 4526 { 4527 if (static_cast<unsigned>(*s - '0') < 10) 4528 result = result * 10 + (*s - '0'); 4529 else 4530 break; 4531 4532 s++; 4533 } 4534 4535 size_t digits = static_cast<size_t>(s - start); 4536 4537 PUGI__STATIC_ASSERT(sizeof(U) == 8 || sizeof(U) == 4 || sizeof(U) == 2); 4538 4539 const size_t max_digits10 = sizeof(U) == 8 ? 20 : sizeof(U) == 4 ? 10 : 5; 4540 const char_t max_lead = sizeof(U) == 8 ? '1' : sizeof(U) == 4 ? '4' : '6'; 4541 const size_t high_bit = sizeof(U) * 8 - 1; 4542 4543 overflow = digits >= max_digits10 && !(digits == max_digits10 && (*start < max_lead || (*start == max_lead && result >> high_bit))); 4544 } 4545 4546 if (negative) 4547 { 4548 // Workaround for crayc++ CC-3059: Expected no overflow in routine. 4549 #ifdef _CRAYC 4550 return (overflow || result > ~minv + 1) ? minv : ~result + 1; 4551 #else 4552 return (overflow || result > 0 - minv) ? minv : 0 - result; 4553 #endif 4554 } 4555 else 4556 return (overflow || result > maxv) ? maxv : result; 4557 } 4558 get_value_int(const char_t * value)4559 PUGI__FN int get_value_int(const char_t* value) 4560 { 4561 return string_to_integer<unsigned int>(value, static_cast<unsigned int>(INT_MIN), INT_MAX); 4562 } 4563 get_value_uint(const char_t * value)4564 PUGI__FN unsigned int get_value_uint(const char_t* value) 4565 { 4566 return string_to_integer<unsigned int>(value, 0, UINT_MAX); 4567 } 4568 get_value_double(const char_t * value)4569 PUGI__FN double get_value_double(const char_t* value) 4570 { 4571 #ifdef PUGIXML_WCHAR_MODE 4572 return wcstod(value, 0); 4573 #else 4574 return strtod(value, 0); 4575 #endif 4576 } 4577 get_value_float(const char_t * value)4578 PUGI__FN float get_value_float(const char_t* value) 4579 { 4580 #ifdef PUGIXML_WCHAR_MODE 4581 return static_cast<float>(wcstod(value, 0)); 4582 #else 4583 return static_cast<float>(strtod(value, 0)); 4584 #endif 4585 } 4586 get_value_bool(const char_t * value)4587 PUGI__FN bool get_value_bool(const char_t* value) 4588 { 4589 // only look at first char 4590 char_t first = *value; 4591 4592 // 1*, t* (true), T* (True), y* (yes), Y* (YES) 4593 return (first == '1' || first == 't' || first == 'T' || first == 'y' || first == 'Y'); 4594 } 4595 4596 #ifdef PUGIXML_HAS_LONG_LONG get_value_llong(const char_t * value)4597 PUGI__FN long long get_value_llong(const char_t* value) 4598 { 4599 return string_to_integer<unsigned long long>(value, static_cast<unsigned long long>(LLONG_MIN), LLONG_MAX); 4600 } 4601 get_value_ullong(const char_t * value)4602 PUGI__FN unsigned long long get_value_ullong(const char_t* value) 4603 { 4604 return string_to_integer<unsigned long long>(value, 0, ULLONG_MAX); 4605 } 4606 #endif 4607 integer_to_string(char_t * begin,char_t * end,U value,bool negative)4608 template <typename U> PUGI__FN PUGI__UNSIGNED_OVERFLOW char_t* integer_to_string(char_t* begin, char_t* end, U value, bool negative) 4609 { 4610 char_t* result = end - 1; 4611 U rest = negative ? 0 - value : value; 4612 4613 do 4614 { 4615 *result-- = static_cast<char_t>('0' + (rest % 10)); 4616 rest /= 10; 4617 } 4618 while (rest); 4619 4620 assert(result >= begin); 4621 (void)begin; 4622 4623 *result = '-'; 4624 4625 return result + !negative; 4626 } 4627 4628 // set value with conversion functions 4629 template <typename String, typename Header> set_value_ascii(String & dest,Header & header,uintptr_t header_mask,char * buf)4630 PUGI__FN bool set_value_ascii(String& dest, Header& header, uintptr_t header_mask, char* buf) 4631 { 4632 #ifdef PUGIXML_WCHAR_MODE 4633 char_t wbuf[128]; 4634 assert(strlen(buf) < sizeof(wbuf) / sizeof(wbuf[0])); 4635 4636 size_t offset = 0; 4637 for (; buf[offset]; ++offset) wbuf[offset] = buf[offset]; 4638 4639 return strcpy_insitu(dest, header, header_mask, wbuf, offset); 4640 #else 4641 return strcpy_insitu(dest, header, header_mask, buf, strlen(buf)); 4642 #endif 4643 } 4644 4645 template <typename U, typename String, typename Header> set_value_integer(String & dest,Header & header,uintptr_t header_mask,U value,bool negative)4646 PUGI__FN bool set_value_integer(String& dest, Header& header, uintptr_t header_mask, U value, bool negative) 4647 { 4648 char_t buf[64]; 4649 char_t* end = buf + sizeof(buf) / sizeof(buf[0]); 4650 char_t* begin = integer_to_string(buf, end, value, negative); 4651 4652 return strcpy_insitu(dest, header, header_mask, begin, end - begin); 4653 } 4654 4655 template <typename String, typename Header> set_value_convert(String & dest,Header & header,uintptr_t header_mask,float value)4656 PUGI__FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, float value) 4657 { 4658 char buf[128]; 4659 PUGI__SNPRINTF(buf, "%.9g", value); 4660 4661 return set_value_ascii(dest, header, header_mask, buf); 4662 } 4663 4664 template <typename String, typename Header> set_value_convert(String & dest,Header & header,uintptr_t header_mask,double value)4665 PUGI__FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, double value) 4666 { 4667 char buf[128]; 4668 PUGI__SNPRINTF(buf, "%.17g", value); 4669 4670 return set_value_ascii(dest, header, header_mask, buf); 4671 } 4672 4673 template <typename String, typename Header> set_value_bool(String & dest,Header & header,uintptr_t header_mask,bool value)4674 PUGI__FN bool set_value_bool(String& dest, Header& header, uintptr_t header_mask, bool value) 4675 { 4676 return strcpy_insitu(dest, header, header_mask, value ? PUGIXML_TEXT("true") : PUGIXML_TEXT("false"), value ? 4 : 5); 4677 } 4678 load_buffer_impl(xml_document_struct * doc,xml_node_struct * root,void * contents,size_t size,unsigned int options,xml_encoding encoding,bool is_mutable,bool own,char_t ** out_buffer)4679 PUGI__FN xml_parse_result load_buffer_impl(xml_document_struct* doc, xml_node_struct* root, void* contents, size_t size, unsigned int options, xml_encoding encoding, bool is_mutable, bool own, char_t** out_buffer) 4680 { 4681 // check input buffer 4682 if (!contents && size) return make_parse_result(status_io_error); 4683 4684 // get actual encoding 4685 xml_encoding buffer_encoding = impl::get_buffer_encoding(encoding, contents, size); 4686 4687 // get private buffer 4688 char_t* buffer = 0; 4689 size_t length = 0; 4690 4691 if (!impl::convert_buffer(buffer, length, buffer_encoding, contents, size, is_mutable)) return impl::make_parse_result(status_out_of_memory); 4692 4693 // delete original buffer if we performed a conversion 4694 if (own && buffer != contents && contents) impl::xml_memory::deallocate(contents); 4695 4696 // grab onto buffer if it's our buffer, user is responsible for deallocating contents himself 4697 if (own || buffer != contents) *out_buffer = buffer; 4698 4699 // store buffer for offset_debug 4700 doc->buffer = buffer; 4701 4702 // parse 4703 xml_parse_result res = impl::xml_parser::parse(buffer, length, doc, root, options); 4704 4705 // remember encoding 4706 res.encoding = buffer_encoding; 4707 4708 return res; 4709 } 4710 4711 // we need to get length of entire file to load it in memory; the only (relatively) sane way to do it is via seek/tell trick get_file_size(FILE * file,size_t & out_result)4712 PUGI__FN xml_parse_status get_file_size(FILE* file, size_t& out_result) 4713 { 4714 #if defined(PUGI__MSVC_CRT_VERSION) && PUGI__MSVC_CRT_VERSION >= 1400 && !defined(_WIN32_WCE) 4715 // there are 64-bit versions of fseek/ftell, let's use them 4716 typedef __int64 length_type; 4717 4718 _fseeki64(file, 0, SEEK_END); 4719 length_type length = _ftelli64(file); 4720 _fseeki64(file, 0, SEEK_SET); 4721 #elif defined(__MINGW32__) && !defined(__NO_MINGW_LFS) && (!defined(__STRICT_ANSI__) || defined(__MINGW64_VERSION_MAJOR)) 4722 // there are 64-bit versions of fseek/ftell, let's use them 4723 typedef off64_t length_type; 4724 4725 fseeko64(file, 0, SEEK_END); 4726 length_type length = ftello64(file); 4727 fseeko64(file, 0, SEEK_SET); 4728 #else 4729 // if this is a 32-bit OS, long is enough; if this is a unix system, long is 64-bit, which is enough; otherwise we can't do anything anyway. 4730 typedef long length_type; 4731 4732 fseek(file, 0, SEEK_END); 4733 length_type length = ftell(file); 4734 fseek(file, 0, SEEK_SET); 4735 #endif 4736 4737 // check for I/O errors 4738 if (length < 0) return status_io_error; 4739 4740 // check for overflow 4741 size_t result = static_cast<size_t>(length); 4742 4743 if (static_cast<length_type>(result) != length) return status_out_of_memory; 4744 4745 // finalize 4746 out_result = result; 4747 4748 return status_ok; 4749 } 4750 4751 // This function assumes that buffer has extra sizeof(char_t) writable bytes after size zero_terminate_buffer(void * buffer,size_t size,xml_encoding encoding)4752 PUGI__FN size_t zero_terminate_buffer(void* buffer, size_t size, xml_encoding encoding) 4753 { 4754 // We only need to zero-terminate if encoding conversion does not do it for us 4755 #ifdef PUGIXML_WCHAR_MODE 4756 xml_encoding wchar_encoding = get_wchar_encoding(); 4757 4758 if (encoding == wchar_encoding || need_endian_swap_utf(encoding, wchar_encoding)) 4759 { 4760 size_t length = size / sizeof(char_t); 4761 4762 static_cast<char_t*>(buffer)[length] = 0; 4763 return (length + 1) * sizeof(char_t); 4764 } 4765 #else 4766 if (encoding == encoding_utf8) 4767 { 4768 static_cast<char*>(buffer)[size] = 0; 4769 return size + 1; 4770 } 4771 #endif 4772 4773 return size; 4774 } 4775 load_file_impl(xml_document_struct * doc,FILE * file,unsigned int options,xml_encoding encoding,char_t ** out_buffer)4776 PUGI__FN xml_parse_result load_file_impl(xml_document_struct* doc, FILE* file, unsigned int options, xml_encoding encoding, char_t** out_buffer) 4777 { 4778 if (!file) return make_parse_result(status_file_not_found); 4779 4780 // get file size (can result in I/O errors) 4781 size_t size = 0; 4782 xml_parse_status size_status = get_file_size(file, size); 4783 if (size_status != status_ok) return make_parse_result(size_status); 4784 4785 size_t max_suffix_size = sizeof(char_t); 4786 4787 // allocate buffer for the whole file 4788 char* contents = static_cast<char*>(xml_memory::allocate(size + max_suffix_size)); 4789 if (!contents) return make_parse_result(status_out_of_memory); 4790 4791 // read file in memory 4792 size_t read_size = fread(contents, 1, size, file); 4793 4794 if (read_size != size) 4795 { 4796 xml_memory::deallocate(contents); 4797 return make_parse_result(status_io_error); 4798 } 4799 4800 xml_encoding real_encoding = get_buffer_encoding(encoding, contents, size); 4801 4802 return load_buffer_impl(doc, doc, contents, zero_terminate_buffer(contents, size, real_encoding), options, real_encoding, true, true, out_buffer); 4803 } 4804 close_file(FILE * file)4805 PUGI__FN void close_file(FILE* file) 4806 { 4807 fclose(file); 4808 } 4809 4810 #ifndef PUGIXML_NO_STL 4811 template <typename T> struct xml_stream_chunk 4812 { createxml_stream_chunk4813 static xml_stream_chunk* create() 4814 { 4815 void* memory = xml_memory::allocate(sizeof(xml_stream_chunk)); 4816 if (!memory) return 0; 4817 4818 return new (memory) xml_stream_chunk(); 4819 } 4820 destroyxml_stream_chunk4821 static void destroy(xml_stream_chunk* chunk) 4822 { 4823 // free chunk chain 4824 while (chunk) 4825 { 4826 xml_stream_chunk* next_ = chunk->next; 4827 4828 xml_memory::deallocate(chunk); 4829 4830 chunk = next_; 4831 } 4832 } 4833 xml_stream_chunkxml_stream_chunk4834 xml_stream_chunk(): next(0), size(0) 4835 { 4836 } 4837 4838 xml_stream_chunk* next; 4839 size_t size; 4840 4841 T data[xml_memory_page_size / sizeof(T)]; 4842 }; 4843 load_stream_data_noseek(std::basic_istream<T> & stream,void ** out_buffer,size_t * out_size)4844 template <typename T> PUGI__FN xml_parse_status load_stream_data_noseek(std::basic_istream<T>& stream, void** out_buffer, size_t* out_size) 4845 { 4846 auto_deleter<xml_stream_chunk<T> > chunks(0, xml_stream_chunk<T>::destroy); 4847 4848 // read file to a chunk list 4849 size_t total = 0; 4850 xml_stream_chunk<T>* last = 0; 4851 4852 while (!stream.eof()) 4853 { 4854 // allocate new chunk 4855 xml_stream_chunk<T>* chunk = xml_stream_chunk<T>::create(); 4856 if (!chunk) return status_out_of_memory; 4857 4858 // append chunk to list 4859 if (last) last = last->next = chunk; 4860 else chunks.data = last = chunk; 4861 4862 // read data to chunk 4863 stream.read(chunk->data, static_cast<std::streamsize>(sizeof(chunk->data) / sizeof(T))); 4864 chunk->size = static_cast<size_t>(stream.gcount()) * sizeof(T); 4865 4866 // read may set failbit | eofbit in case gcount() is less than read length, so check for other I/O errors 4867 if (stream.bad() || (!stream.eof() && stream.fail())) return status_io_error; 4868 4869 // guard against huge files (chunk size is small enough to make this overflow check work) 4870 if (total + chunk->size < total) return status_out_of_memory; 4871 total += chunk->size; 4872 } 4873 4874 size_t max_suffix_size = sizeof(char_t); 4875 4876 // copy chunk list to a contiguous buffer 4877 char* buffer = static_cast<char*>(xml_memory::allocate(total + max_suffix_size)); 4878 if (!buffer) return status_out_of_memory; 4879 4880 char* write = buffer; 4881 4882 for (xml_stream_chunk<T>* chunk = chunks.data; chunk; chunk = chunk->next) 4883 { 4884 assert(write + chunk->size <= buffer + total); 4885 memcpy(write, chunk->data, chunk->size); 4886 write += chunk->size; 4887 } 4888 4889 assert(write == buffer + total); 4890 4891 // return buffer 4892 *out_buffer = buffer; 4893 *out_size = total; 4894 4895 return status_ok; 4896 } 4897 load_stream_data_seek(std::basic_istream<T> & stream,void ** out_buffer,size_t * out_size)4898 template <typename T> PUGI__FN xml_parse_status load_stream_data_seek(std::basic_istream<T>& stream, void** out_buffer, size_t* out_size) 4899 { 4900 // get length of remaining data in stream 4901 typename std::basic_istream<T>::pos_type pos = stream.tellg(); 4902 stream.seekg(0, std::ios::end); 4903 std::streamoff length = stream.tellg() - pos; 4904 stream.seekg(pos); 4905 4906 if (stream.fail() || pos < 0) return status_io_error; 4907 4908 // guard against huge files 4909 size_t read_length = static_cast<size_t>(length); 4910 4911 if (static_cast<std::streamsize>(read_length) != length || length < 0) return status_out_of_memory; 4912 4913 size_t max_suffix_size = sizeof(char_t); 4914 4915 // read stream data into memory (guard against stream exceptions with buffer holder) 4916 auto_deleter<void> buffer(xml_memory::allocate(read_length * sizeof(T) + max_suffix_size), xml_memory::deallocate); 4917 if (!buffer.data) return status_out_of_memory; 4918 4919 stream.read(static_cast<T*>(buffer.data), static_cast<std::streamsize>(read_length)); 4920 4921 // read may set failbit | eofbit in case gcount() is less than read_length (i.e. line ending conversion), so check for other I/O errors 4922 if (stream.bad() || (!stream.eof() && stream.fail())) return status_io_error; 4923 4924 // return buffer 4925 size_t actual_length = static_cast<size_t>(stream.gcount()); 4926 assert(actual_length <= read_length); 4927 4928 *out_buffer = buffer.release(); 4929 *out_size = actual_length * sizeof(T); 4930 4931 return status_ok; 4932 } 4933 load_stream_impl(xml_document_struct * doc,std::basic_istream<T> & stream,unsigned int options,xml_encoding encoding,char_t ** out_buffer)4934 template <typename T> PUGI__FN xml_parse_result load_stream_impl(xml_document_struct* doc, std::basic_istream<T>& stream, unsigned int options, xml_encoding encoding, char_t** out_buffer) 4935 { 4936 void* buffer = 0; 4937 size_t size = 0; 4938 xml_parse_status status = status_ok; 4939 4940 // if stream has an error bit set, bail out (otherwise tellg() can fail and we'll clear error bits) 4941 if (stream.fail()) return make_parse_result(status_io_error); 4942 4943 // load stream to memory (using seek-based implementation if possible, since it's faster and takes less memory) 4944 if (stream.tellg() < 0) 4945 { 4946 stream.clear(); // clear error flags that could be set by a failing tellg 4947 status = load_stream_data_noseek(stream, &buffer, &size); 4948 } 4949 else 4950 status = load_stream_data_seek(stream, &buffer, &size); 4951 4952 if (status != status_ok) return make_parse_result(status); 4953 4954 xml_encoding real_encoding = get_buffer_encoding(encoding, buffer, size); 4955 4956 return load_buffer_impl(doc, doc, buffer, zero_terminate_buffer(buffer, size, real_encoding), options, real_encoding, true, true, out_buffer); 4957 } 4958 #endif 4959 4960 #if defined(PUGI__MSVC_CRT_VERSION) || defined(__BORLANDC__) || (defined(__MINGW32__) && (!defined(__STRICT_ANSI__) || defined(__MINGW64_VERSION_MAJOR))) open_file_wide(const wchar_t * path,const wchar_t * mode)4961 PUGI__FN FILE* open_file_wide(const wchar_t* path, const wchar_t* mode) 4962 { 4963 return _wfopen(path, mode); 4964 } 4965 #else convert_path_heap(const wchar_t * str)4966 PUGI__FN char* convert_path_heap(const wchar_t* str) 4967 { 4968 assert(str); 4969 4970 // first pass: get length in utf8 characters 4971 size_t length = strlength_wide(str); 4972 size_t size = as_utf8_begin(str, length); 4973 4974 // allocate resulting string 4975 char* result = static_cast<char*>(xml_memory::allocate(size + 1)); 4976 if (!result) return 0; 4977 4978 // second pass: convert to utf8 4979 as_utf8_end(result, size, str, length); 4980 4981 // zero-terminate 4982 result[size] = 0; 4983 4984 return result; 4985 } 4986 open_file_wide(const wchar_t * path,const wchar_t * mode)4987 PUGI__FN FILE* open_file_wide(const wchar_t* path, const wchar_t* mode) 4988 { 4989 // there is no standard function to open wide paths, so our best bet is to try utf8 path 4990 char* path_utf8 = convert_path_heap(path); 4991 if (!path_utf8) return 0; 4992 4993 // convert mode to ASCII (we mirror _wfopen interface) 4994 char mode_ascii[4] = {0}; 4995 for (size_t i = 0; mode[i]; ++i) mode_ascii[i] = static_cast<char>(mode[i]); 4996 4997 // try to open the utf8 path 4998 FILE* result = fopen(path_utf8, mode_ascii); 4999 5000 // free dummy buffer 5001 xml_memory::deallocate(path_utf8); 5002 5003 return result; 5004 } 5005 #endif 5006 save_file_impl(const xml_document & doc,FILE * file,const char_t * indent,unsigned int flags,xml_encoding encoding)5007 PUGI__FN bool save_file_impl(const xml_document& doc, FILE* file, const char_t* indent, unsigned int flags, xml_encoding encoding) 5008 { 5009 if (!file) return false; 5010 5011 xml_writer_file writer(file); 5012 doc.save(writer, indent, flags, encoding); 5013 5014 return ferror(file) == 0; 5015 } 5016 5017 struct name_null_sentry 5018 { 5019 xml_node_struct* node; 5020 char_t* name; 5021 name_null_sentryname_null_sentry5022 name_null_sentry(xml_node_struct* node_): node(node_), name(node_->name) 5023 { 5024 node->name = 0; 5025 } 5026 ~name_null_sentryname_null_sentry5027 ~name_null_sentry() 5028 { 5029 node->name = name; 5030 } 5031 }; 5032 PUGI__NS_END 5033 5034 namespace pugi 5035 { xml_writer_file(void * file_)5036 PUGI__FN xml_writer_file::xml_writer_file(void* file_): file(file_) 5037 { 5038 } 5039 write(const void * data,size_t size)5040 PUGI__FN void xml_writer_file::write(const void* data, size_t size) 5041 { 5042 size_t result = fwrite(data, 1, size, static_cast<FILE*>(file)); 5043 (void)!result; // unfortunately we can't do proper error handling here 5044 } 5045 5046 #ifndef PUGIXML_NO_STL xml_writer_stream(std::basic_ostream<char,std::char_traits<char>> & stream)5047 PUGI__FN xml_writer_stream::xml_writer_stream(std::basic_ostream<char, std::char_traits<char> >& stream): narrow_stream(&stream), wide_stream(0) 5048 { 5049 } 5050 xml_writer_stream(std::basic_ostream<wchar_t,std::char_traits<wchar_t>> & stream)5051 PUGI__FN xml_writer_stream::xml_writer_stream(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream): narrow_stream(0), wide_stream(&stream) 5052 { 5053 } 5054 write(const void * data,size_t size)5055 PUGI__FN void xml_writer_stream::write(const void* data, size_t size) 5056 { 5057 if (narrow_stream) 5058 { 5059 assert(!wide_stream); 5060 narrow_stream->write(reinterpret_cast<const char*>(data), static_cast<std::streamsize>(size)); 5061 } 5062 else 5063 { 5064 assert(wide_stream); 5065 assert(size % sizeof(wchar_t) == 0); 5066 5067 wide_stream->write(reinterpret_cast<const wchar_t*>(data), static_cast<std::streamsize>(size / sizeof(wchar_t))); 5068 } 5069 } 5070 #endif 5071 xml_tree_walker()5072 PUGI__FN xml_tree_walker::xml_tree_walker(): _depth(0) 5073 { 5074 } 5075 ~xml_tree_walker()5076 PUGI__FN xml_tree_walker::~xml_tree_walker() 5077 { 5078 } 5079 depth() const5080 PUGI__FN int xml_tree_walker::depth() const 5081 { 5082 return _depth; 5083 } 5084 begin(xml_node &)5085 PUGI__FN bool xml_tree_walker::begin(xml_node&) 5086 { 5087 return true; 5088 } 5089 end(xml_node &)5090 PUGI__FN bool xml_tree_walker::end(xml_node&) 5091 { 5092 return true; 5093 } 5094 xml_attribute()5095 PUGI__FN xml_attribute::xml_attribute(): _attr(0) 5096 { 5097 } 5098 xml_attribute(xml_attribute_struct * attr)5099 PUGI__FN xml_attribute::xml_attribute(xml_attribute_struct* attr): _attr(attr) 5100 { 5101 } 5102 unspecified_bool_xml_attribute(xml_attribute ***)5103 PUGI__FN static void unspecified_bool_xml_attribute(xml_attribute***) 5104 { 5105 } 5106 operator xml_attribute::unspecified_bool_type() const5107 PUGI__FN xml_attribute::operator xml_attribute::unspecified_bool_type() const 5108 { 5109 return _attr ? unspecified_bool_xml_attribute : 0; 5110 } 5111 operator !() const5112 PUGI__FN bool xml_attribute::operator!() const 5113 { 5114 return !_attr; 5115 } 5116 operator ==(const xml_attribute & r) const5117 PUGI__FN bool xml_attribute::operator==(const xml_attribute& r) const 5118 { 5119 return (_attr == r._attr); 5120 } 5121 operator !=(const xml_attribute & r) const5122 PUGI__FN bool xml_attribute::operator!=(const xml_attribute& r) const 5123 { 5124 return (_attr != r._attr); 5125 } 5126 operator <(const xml_attribute & r) const5127 PUGI__FN bool xml_attribute::operator<(const xml_attribute& r) const 5128 { 5129 return (_attr < r._attr); 5130 } 5131 operator >(const xml_attribute & r) const5132 PUGI__FN bool xml_attribute::operator>(const xml_attribute& r) const 5133 { 5134 return (_attr > r._attr); 5135 } 5136 operator <=(const xml_attribute & r) const5137 PUGI__FN bool xml_attribute::operator<=(const xml_attribute& r) const 5138 { 5139 return (_attr <= r._attr); 5140 } 5141 operator >=(const xml_attribute & r) const5142 PUGI__FN bool xml_attribute::operator>=(const xml_attribute& r) const 5143 { 5144 return (_attr >= r._attr); 5145 } 5146 next_attribute() const5147 PUGI__FN xml_attribute xml_attribute::next_attribute() const 5148 { 5149 return _attr ? xml_attribute(_attr->next_attribute) : xml_attribute(); 5150 } 5151 previous_attribute() const5152 PUGI__FN xml_attribute xml_attribute::previous_attribute() const 5153 { 5154 return _attr && _attr->prev_attribute_c->next_attribute ? xml_attribute(_attr->prev_attribute_c) : xml_attribute(); 5155 } 5156 as_string(const char_t * def) const5157 PUGI__FN const char_t* xml_attribute::as_string(const char_t* def) const 5158 { 5159 return (_attr && _attr->value) ? _attr->value + 0 : def; 5160 } 5161 as_int(int def) const5162 PUGI__FN int xml_attribute::as_int(int def) const 5163 { 5164 return (_attr && _attr->value) ? impl::get_value_int(_attr->value) : def; 5165 } 5166 as_uint(unsigned int def) const5167 PUGI__FN unsigned int xml_attribute::as_uint(unsigned int def) const 5168 { 5169 return (_attr && _attr->value) ? impl::get_value_uint(_attr->value) : def; 5170 } 5171 as_double(double def) const5172 PUGI__FN double xml_attribute::as_double(double def) const 5173 { 5174 return (_attr && _attr->value) ? impl::get_value_double(_attr->value) : def; 5175 } 5176 as_float(float def) const5177 PUGI__FN float xml_attribute::as_float(float def) const 5178 { 5179 return (_attr && _attr->value) ? impl::get_value_float(_attr->value) : def; 5180 } 5181 as_bool(bool def) const5182 PUGI__FN bool xml_attribute::as_bool(bool def) const 5183 { 5184 return (_attr && _attr->value) ? impl::get_value_bool(_attr->value) : def; 5185 } 5186 5187 #ifdef PUGIXML_HAS_LONG_LONG as_llong(long long def) const5188 PUGI__FN long long xml_attribute::as_llong(long long def) const 5189 { 5190 return (_attr && _attr->value) ? impl::get_value_llong(_attr->value) : def; 5191 } 5192 as_ullong(unsigned long long def) const5193 PUGI__FN unsigned long long xml_attribute::as_ullong(unsigned long long def) const 5194 { 5195 return (_attr && _attr->value) ? impl::get_value_ullong(_attr->value) : def; 5196 } 5197 #endif 5198 empty() const5199 PUGI__FN bool xml_attribute::empty() const 5200 { 5201 return !_attr; 5202 } 5203 name() const5204 PUGI__FN const char_t* xml_attribute::name() const 5205 { 5206 return (_attr && _attr->name) ? _attr->name + 0 : PUGIXML_TEXT(""); 5207 } 5208 value() const5209 PUGI__FN const char_t* xml_attribute::value() const 5210 { 5211 return (_attr && _attr->value) ? _attr->value + 0 : PUGIXML_TEXT(""); 5212 } 5213 hash_value() const5214 PUGI__FN size_t xml_attribute::hash_value() const 5215 { 5216 return static_cast<size_t>(reinterpret_cast<uintptr_t>(_attr) / sizeof(xml_attribute_struct)); 5217 } 5218 internal_object() const5219 PUGI__FN xml_attribute_struct* xml_attribute::internal_object() const 5220 { 5221 return _attr; 5222 } 5223 operator =(const char_t * rhs)5224 PUGI__FN xml_attribute& xml_attribute::operator=(const char_t* rhs) 5225 { 5226 set_value(rhs); 5227 return *this; 5228 } 5229 operator =(int rhs)5230 PUGI__FN xml_attribute& xml_attribute::operator=(int rhs) 5231 { 5232 set_value(rhs); 5233 return *this; 5234 } 5235 operator =(unsigned int rhs)5236 PUGI__FN xml_attribute& xml_attribute::operator=(unsigned int rhs) 5237 { 5238 set_value(rhs); 5239 return *this; 5240 } 5241 operator =(long rhs)5242 PUGI__FN xml_attribute& xml_attribute::operator=(long rhs) 5243 { 5244 set_value(rhs); 5245 return *this; 5246 } 5247 operator =(unsigned long rhs)5248 PUGI__FN xml_attribute& xml_attribute::operator=(unsigned long rhs) 5249 { 5250 set_value(rhs); 5251 return *this; 5252 } 5253 operator =(double rhs)5254 PUGI__FN xml_attribute& xml_attribute::operator=(double rhs) 5255 { 5256 set_value(rhs); 5257 return *this; 5258 } 5259 operator =(float rhs)5260 PUGI__FN xml_attribute& xml_attribute::operator=(float rhs) 5261 { 5262 set_value(rhs); 5263 return *this; 5264 } 5265 operator =(bool rhs)5266 PUGI__FN xml_attribute& xml_attribute::operator=(bool rhs) 5267 { 5268 set_value(rhs); 5269 return *this; 5270 } 5271 5272 #ifdef PUGIXML_HAS_LONG_LONG operator =(long long rhs)5273 PUGI__FN xml_attribute& xml_attribute::operator=(long long rhs) 5274 { 5275 set_value(rhs); 5276 return *this; 5277 } 5278 operator =(unsigned long long rhs)5279 PUGI__FN xml_attribute& xml_attribute::operator=(unsigned long long rhs) 5280 { 5281 set_value(rhs); 5282 return *this; 5283 } 5284 #endif 5285 set_name(const char_t * rhs)5286 PUGI__FN bool xml_attribute::set_name(const char_t* rhs) 5287 { 5288 if (!_attr) return false; 5289 5290 return impl::strcpy_insitu(_attr->name, _attr->header, impl::xml_memory_page_name_allocated_mask, rhs, impl::strlength(rhs)); 5291 } 5292 set_value(const char_t * rhs)5293 PUGI__FN bool xml_attribute::set_value(const char_t* rhs) 5294 { 5295 if (!_attr) return false; 5296 5297 return impl::strcpy_insitu(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, impl::strlength(rhs)); 5298 } 5299 set_value(int rhs)5300 PUGI__FN bool xml_attribute::set_value(int rhs) 5301 { 5302 if (!_attr) return false; 5303 5304 return impl::set_value_integer<unsigned int>(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0); 5305 } 5306 set_value(unsigned int rhs)5307 PUGI__FN bool xml_attribute::set_value(unsigned int rhs) 5308 { 5309 if (!_attr) return false; 5310 5311 return impl::set_value_integer<unsigned int>(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, false); 5312 } 5313 set_value(long rhs)5314 PUGI__FN bool xml_attribute::set_value(long rhs) 5315 { 5316 if (!_attr) return false; 5317 5318 return impl::set_value_integer<unsigned long>(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0); 5319 } 5320 set_value(unsigned long rhs)5321 PUGI__FN bool xml_attribute::set_value(unsigned long rhs) 5322 { 5323 if (!_attr) return false; 5324 5325 return impl::set_value_integer<unsigned long>(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, false); 5326 } 5327 set_value(double rhs)5328 PUGI__FN bool xml_attribute::set_value(double rhs) 5329 { 5330 if (!_attr) return false; 5331 5332 return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs); 5333 } 5334 set_value(float rhs)5335 PUGI__FN bool xml_attribute::set_value(float rhs) 5336 { 5337 if (!_attr) return false; 5338 5339 return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs); 5340 } 5341 set_value(bool rhs)5342 PUGI__FN bool xml_attribute::set_value(bool rhs) 5343 { 5344 if (!_attr) return false; 5345 5346 return impl::set_value_bool(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs); 5347 } 5348 5349 #ifdef PUGIXML_HAS_LONG_LONG set_value(long long rhs)5350 PUGI__FN bool xml_attribute::set_value(long long rhs) 5351 { 5352 if (!_attr) return false; 5353 5354 return impl::set_value_integer<unsigned long long>(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0); 5355 } 5356 set_value(unsigned long long rhs)5357 PUGI__FN bool xml_attribute::set_value(unsigned long long rhs) 5358 { 5359 if (!_attr) return false; 5360 5361 return impl::set_value_integer<unsigned long long>(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, false); 5362 } 5363 #endif 5364 5365 #ifdef __BORLANDC__ operator &&(const xml_attribute & lhs,bool rhs)5366 PUGI__FN bool operator&&(const xml_attribute& lhs, bool rhs) 5367 { 5368 return (bool)lhs && rhs; 5369 } 5370 operator ||(const xml_attribute & lhs,bool rhs)5371 PUGI__FN bool operator||(const xml_attribute& lhs, bool rhs) 5372 { 5373 return (bool)lhs || rhs; 5374 } 5375 #endif 5376 xml_node()5377 PUGI__FN xml_node::xml_node(): _root(0) 5378 { 5379 } 5380 xml_node(xml_node_struct * p)5381 PUGI__FN xml_node::xml_node(xml_node_struct* p): _root(p) 5382 { 5383 } 5384 unspecified_bool_xml_node(xml_node ***)5385 PUGI__FN static void unspecified_bool_xml_node(xml_node***) 5386 { 5387 } 5388 operator xml_node::unspecified_bool_type() const5389 PUGI__FN xml_node::operator xml_node::unspecified_bool_type() const 5390 { 5391 return _root ? unspecified_bool_xml_node : 0; 5392 } 5393 operator !() const5394 PUGI__FN bool xml_node::operator!() const 5395 { 5396 return !_root; 5397 } 5398 begin() const5399 PUGI__FN xml_node::iterator xml_node::begin() const 5400 { 5401 return iterator(_root ? _root->first_child + 0 : 0, _root); 5402 } 5403 end() const5404 PUGI__FN xml_node::iterator xml_node::end() const 5405 { 5406 return iterator(0, _root); 5407 } 5408 attributes_begin() const5409 PUGI__FN xml_node::attribute_iterator xml_node::attributes_begin() const 5410 { 5411 return attribute_iterator(_root ? _root->first_attribute + 0 : 0, _root); 5412 } 5413 attributes_end() const5414 PUGI__FN xml_node::attribute_iterator xml_node::attributes_end() const 5415 { 5416 return attribute_iterator(0, _root); 5417 } 5418 children() const5419 PUGI__FN xml_object_range<xml_node_iterator> xml_node::children() const 5420 { 5421 return xml_object_range<xml_node_iterator>(begin(), end()); 5422 } 5423 children(const char_t * name_) const5424 PUGI__FN xml_object_range<xml_named_node_iterator> xml_node::children(const char_t* name_) const 5425 { 5426 return xml_object_range<xml_named_node_iterator>(xml_named_node_iterator(child(name_)._root, _root, name_), xml_named_node_iterator(0, _root, name_)); 5427 } 5428 attributes() const5429 PUGI__FN xml_object_range<xml_attribute_iterator> xml_node::attributes() const 5430 { 5431 return xml_object_range<xml_attribute_iterator>(attributes_begin(), attributes_end()); 5432 } 5433 operator ==(const xml_node & r) const5434 PUGI__FN bool xml_node::operator==(const xml_node& r) const 5435 { 5436 return (_root == r._root); 5437 } 5438 operator !=(const xml_node & r) const5439 PUGI__FN bool xml_node::operator!=(const xml_node& r) const 5440 { 5441 return (_root != r._root); 5442 } 5443 operator <(const xml_node & r) const5444 PUGI__FN bool xml_node::operator<(const xml_node& r) const 5445 { 5446 return (_root < r._root); 5447 } 5448 operator >(const xml_node & r) const5449 PUGI__FN bool xml_node::operator>(const xml_node& r) const 5450 { 5451 return (_root > r._root); 5452 } 5453 operator <=(const xml_node & r) const5454 PUGI__FN bool xml_node::operator<=(const xml_node& r) const 5455 { 5456 return (_root <= r._root); 5457 } 5458 operator >=(const xml_node & r) const5459 PUGI__FN bool xml_node::operator>=(const xml_node& r) const 5460 { 5461 return (_root >= r._root); 5462 } 5463 empty() const5464 PUGI__FN bool xml_node::empty() const 5465 { 5466 return !_root; 5467 } 5468 name() const5469 PUGI__FN const char_t* xml_node::name() const 5470 { 5471 return (_root && _root->name) ? _root->name + 0 : PUGIXML_TEXT(""); 5472 } 5473 type() const5474 PUGI__FN xml_node_type xml_node::type() const 5475 { 5476 return _root ? PUGI__NODETYPE(_root) : node_null; 5477 } 5478 value() const5479 PUGI__FN const char_t* xml_node::value() const 5480 { 5481 return (_root && _root->value) ? _root->value + 0 : PUGIXML_TEXT(""); 5482 } 5483 child(const char_t * name_) const5484 PUGI__FN xml_node xml_node::child(const char_t* name_) const 5485 { 5486 if (!_root) return xml_node(); 5487 5488 for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling) 5489 if (i->name && impl::strequal(name_, i->name)) return xml_node(i); 5490 5491 return xml_node(); 5492 } 5493 attribute(const char_t * name_) const5494 PUGI__FN xml_attribute xml_node::attribute(const char_t* name_) const 5495 { 5496 if (!_root) return xml_attribute(); 5497 5498 for (xml_attribute_struct* i = _root->first_attribute; i; i = i->next_attribute) 5499 if (i->name && impl::strequal(name_, i->name)) 5500 return xml_attribute(i); 5501 5502 return xml_attribute(); 5503 } 5504 next_sibling(const char_t * name_) const5505 PUGI__FN xml_node xml_node::next_sibling(const char_t* name_) const 5506 { 5507 if (!_root) return xml_node(); 5508 5509 for (xml_node_struct* i = _root->next_sibling; i; i = i->next_sibling) 5510 if (i->name && impl::strequal(name_, i->name)) return xml_node(i); 5511 5512 return xml_node(); 5513 } 5514 next_sibling() const5515 PUGI__FN xml_node xml_node::next_sibling() const 5516 { 5517 return _root ? xml_node(_root->next_sibling) : xml_node(); 5518 } 5519 previous_sibling(const char_t * name_) const5520 PUGI__FN xml_node xml_node::previous_sibling(const char_t* name_) const 5521 { 5522 if (!_root) return xml_node(); 5523 5524 for (xml_node_struct* i = _root->prev_sibling_c; i->next_sibling; i = i->prev_sibling_c) 5525 if (i->name && impl::strequal(name_, i->name)) return xml_node(i); 5526 5527 return xml_node(); 5528 } 5529 attribute(const char_t * name_,xml_attribute & hint_) const5530 PUGI__FN xml_attribute xml_node::attribute(const char_t* name_, xml_attribute& hint_) const 5531 { 5532 xml_attribute_struct* hint = hint_._attr; 5533 5534 // if hint is not an attribute of node, behavior is not defined 5535 assert(!hint || (_root && impl::is_attribute_of(hint, _root))); 5536 5537 if (!_root) return xml_attribute(); 5538 5539 // optimistically search from hint up until the end 5540 for (xml_attribute_struct* i = hint; i; i = i->next_attribute) 5541 if (i->name && impl::strequal(name_, i->name)) 5542 { 5543 // update hint to maximize efficiency of searching for consecutive attributes 5544 hint_._attr = i->next_attribute; 5545 5546 return xml_attribute(i); 5547 } 5548 5549 // wrap around and search from the first attribute until the hint 5550 // 'j' null pointer check is technically redundant, but it prevents a crash in case the assertion above fails 5551 for (xml_attribute_struct* j = _root->first_attribute; j && j != hint; j = j->next_attribute) 5552 if (j->name && impl::strequal(name_, j->name)) 5553 { 5554 // update hint to maximize efficiency of searching for consecutive attributes 5555 hint_._attr = j->next_attribute; 5556 5557 return xml_attribute(j); 5558 } 5559 5560 return xml_attribute(); 5561 } 5562 previous_sibling() const5563 PUGI__FN xml_node xml_node::previous_sibling() const 5564 { 5565 if (!_root) return xml_node(); 5566 5567 if (_root->prev_sibling_c->next_sibling) return xml_node(_root->prev_sibling_c); 5568 else return xml_node(); 5569 } 5570 parent() const5571 PUGI__FN xml_node xml_node::parent() const 5572 { 5573 return _root ? xml_node(_root->parent) : xml_node(); 5574 } 5575 root() const5576 PUGI__FN xml_node xml_node::root() const 5577 { 5578 return _root ? xml_node(&impl::get_document(_root)) : xml_node(); 5579 } 5580 text() const5581 PUGI__FN xml_text xml_node::text() const 5582 { 5583 return xml_text(_root); 5584 } 5585 child_value() const5586 PUGI__FN const char_t* xml_node::child_value() const 5587 { 5588 if (!_root) return PUGIXML_TEXT(""); 5589 5590 // element nodes can have value if parse_embed_pcdata was used 5591 if (PUGI__NODETYPE(_root) == node_element && _root->value) 5592 return _root->value; 5593 5594 for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling) 5595 if (impl::is_text_node(i) && i->value) 5596 return i->value; 5597 5598 return PUGIXML_TEXT(""); 5599 } 5600 child_value(const char_t * name_) const5601 PUGI__FN const char_t* xml_node::child_value(const char_t* name_) const 5602 { 5603 return child(name_).child_value(); 5604 } 5605 first_attribute() const5606 PUGI__FN xml_attribute xml_node::first_attribute() const 5607 { 5608 return _root ? xml_attribute(_root->first_attribute) : xml_attribute(); 5609 } 5610 last_attribute() const5611 PUGI__FN xml_attribute xml_node::last_attribute() const 5612 { 5613 return _root && _root->first_attribute ? xml_attribute(_root->first_attribute->prev_attribute_c) : xml_attribute(); 5614 } 5615 first_child() const5616 PUGI__FN xml_node xml_node::first_child() const 5617 { 5618 return _root ? xml_node(_root->first_child) : xml_node(); 5619 } 5620 last_child() const5621 PUGI__FN xml_node xml_node::last_child() const 5622 { 5623 return _root && _root->first_child ? xml_node(_root->first_child->prev_sibling_c) : xml_node(); 5624 } 5625 set_name(const char_t * rhs)5626 PUGI__FN bool xml_node::set_name(const char_t* rhs) 5627 { 5628 xml_node_type type_ = _root ? PUGI__NODETYPE(_root) : node_null; 5629 5630 if (type_ != node_element && type_ != node_pi && type_ != node_declaration) 5631 return false; 5632 5633 return impl::strcpy_insitu(_root->name, _root->header, impl::xml_memory_page_name_allocated_mask, rhs, impl::strlength(rhs)); 5634 } 5635 set_value(const char_t * rhs)5636 PUGI__FN bool xml_node::set_value(const char_t* rhs) 5637 { 5638 xml_node_type type_ = _root ? PUGI__NODETYPE(_root) : node_null; 5639 5640 if (type_ != node_pcdata && type_ != node_cdata && type_ != node_comment && type_ != node_pi && type_ != node_doctype) 5641 return false; 5642 5643 return impl::strcpy_insitu(_root->value, _root->header, impl::xml_memory_page_value_allocated_mask, rhs, impl::strlength(rhs)); 5644 } 5645 append_attribute(const char_t * name_)5646 PUGI__FN xml_attribute xml_node::append_attribute(const char_t* name_) 5647 { 5648 if (!impl::allow_insert_attribute(type())) return xml_attribute(); 5649 5650 impl::xml_allocator& alloc = impl::get_allocator(_root); 5651 if (!alloc.reserve()) return xml_attribute(); 5652 5653 xml_attribute a(impl::allocate_attribute(alloc)); 5654 if (!a) return xml_attribute(); 5655 5656 impl::append_attribute(a._attr, _root); 5657 5658 a.set_name(name_); 5659 5660 return a; 5661 } 5662 prepend_attribute(const char_t * name_)5663 PUGI__FN xml_attribute xml_node::prepend_attribute(const char_t* name_) 5664 { 5665 if (!impl::allow_insert_attribute(type())) return xml_attribute(); 5666 5667 impl::xml_allocator& alloc = impl::get_allocator(_root); 5668 if (!alloc.reserve()) return xml_attribute(); 5669 5670 xml_attribute a(impl::allocate_attribute(alloc)); 5671 if (!a) return xml_attribute(); 5672 5673 impl::prepend_attribute(a._attr, _root); 5674 5675 a.set_name(name_); 5676 5677 return a; 5678 } 5679 insert_attribute_after(const char_t * name_,const xml_attribute & attr)5680 PUGI__FN xml_attribute xml_node::insert_attribute_after(const char_t* name_, const xml_attribute& attr) 5681 { 5682 if (!impl::allow_insert_attribute(type())) return xml_attribute(); 5683 if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute(); 5684 5685 impl::xml_allocator& alloc = impl::get_allocator(_root); 5686 if (!alloc.reserve()) return xml_attribute(); 5687 5688 xml_attribute a(impl::allocate_attribute(alloc)); 5689 if (!a) return xml_attribute(); 5690 5691 impl::insert_attribute_after(a._attr, attr._attr, _root); 5692 5693 a.set_name(name_); 5694 5695 return a; 5696 } 5697 insert_attribute_before(const char_t * name_,const xml_attribute & attr)5698 PUGI__FN xml_attribute xml_node::insert_attribute_before(const char_t* name_, const xml_attribute& attr) 5699 { 5700 if (!impl::allow_insert_attribute(type())) return xml_attribute(); 5701 if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute(); 5702 5703 impl::xml_allocator& alloc = impl::get_allocator(_root); 5704 if (!alloc.reserve()) return xml_attribute(); 5705 5706 xml_attribute a(impl::allocate_attribute(alloc)); 5707 if (!a) return xml_attribute(); 5708 5709 impl::insert_attribute_before(a._attr, attr._attr, _root); 5710 5711 a.set_name(name_); 5712 5713 return a; 5714 } 5715 append_copy(const xml_attribute & proto)5716 PUGI__FN xml_attribute xml_node::append_copy(const xml_attribute& proto) 5717 { 5718 if (!proto) return xml_attribute(); 5719 if (!impl::allow_insert_attribute(type())) return xml_attribute(); 5720 5721 impl::xml_allocator& alloc = impl::get_allocator(_root); 5722 if (!alloc.reserve()) return xml_attribute(); 5723 5724 xml_attribute a(impl::allocate_attribute(alloc)); 5725 if (!a) return xml_attribute(); 5726 5727 impl::append_attribute(a._attr, _root); 5728 impl::node_copy_attribute(a._attr, proto._attr); 5729 5730 return a; 5731 } 5732 prepend_copy(const xml_attribute & proto)5733 PUGI__FN xml_attribute xml_node::prepend_copy(const xml_attribute& proto) 5734 { 5735 if (!proto) return xml_attribute(); 5736 if (!impl::allow_insert_attribute(type())) return xml_attribute(); 5737 5738 impl::xml_allocator& alloc = impl::get_allocator(_root); 5739 if (!alloc.reserve()) return xml_attribute(); 5740 5741 xml_attribute a(impl::allocate_attribute(alloc)); 5742 if (!a) return xml_attribute(); 5743 5744 impl::prepend_attribute(a._attr, _root); 5745 impl::node_copy_attribute(a._attr, proto._attr); 5746 5747 return a; 5748 } 5749 insert_copy_after(const xml_attribute & proto,const xml_attribute & attr)5750 PUGI__FN xml_attribute xml_node::insert_copy_after(const xml_attribute& proto, const xml_attribute& attr) 5751 { 5752 if (!proto) return xml_attribute(); 5753 if (!impl::allow_insert_attribute(type())) return xml_attribute(); 5754 if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute(); 5755 5756 impl::xml_allocator& alloc = impl::get_allocator(_root); 5757 if (!alloc.reserve()) return xml_attribute(); 5758 5759 xml_attribute a(impl::allocate_attribute(alloc)); 5760 if (!a) return xml_attribute(); 5761 5762 impl::insert_attribute_after(a._attr, attr._attr, _root); 5763 impl::node_copy_attribute(a._attr, proto._attr); 5764 5765 return a; 5766 } 5767 insert_copy_before(const xml_attribute & proto,const xml_attribute & attr)5768 PUGI__FN xml_attribute xml_node::insert_copy_before(const xml_attribute& proto, const xml_attribute& attr) 5769 { 5770 if (!proto) return xml_attribute(); 5771 if (!impl::allow_insert_attribute(type())) return xml_attribute(); 5772 if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute(); 5773 5774 impl::xml_allocator& alloc = impl::get_allocator(_root); 5775 if (!alloc.reserve()) return xml_attribute(); 5776 5777 xml_attribute a(impl::allocate_attribute(alloc)); 5778 if (!a) return xml_attribute(); 5779 5780 impl::insert_attribute_before(a._attr, attr._attr, _root); 5781 impl::node_copy_attribute(a._attr, proto._attr); 5782 5783 return a; 5784 } 5785 append_child(xml_node_type type_)5786 PUGI__FN xml_node xml_node::append_child(xml_node_type type_) 5787 { 5788 if (!impl::allow_insert_child(type(), type_)) return xml_node(); 5789 5790 impl::xml_allocator& alloc = impl::get_allocator(_root); 5791 if (!alloc.reserve()) return xml_node(); 5792 5793 xml_node n(impl::allocate_node(alloc, type_)); 5794 if (!n) return xml_node(); 5795 5796 impl::append_node(n._root, _root); 5797 5798 if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml")); 5799 5800 return n; 5801 } 5802 prepend_child(xml_node_type type_)5803 PUGI__FN xml_node xml_node::prepend_child(xml_node_type type_) 5804 { 5805 if (!impl::allow_insert_child(type(), type_)) return xml_node(); 5806 5807 impl::xml_allocator& alloc = impl::get_allocator(_root); 5808 if (!alloc.reserve()) return xml_node(); 5809 5810 xml_node n(impl::allocate_node(alloc, type_)); 5811 if (!n) return xml_node(); 5812 5813 impl::prepend_node(n._root, _root); 5814 5815 if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml")); 5816 5817 return n; 5818 } 5819 insert_child_before(xml_node_type type_,const xml_node & node)5820 PUGI__FN xml_node xml_node::insert_child_before(xml_node_type type_, const xml_node& node) 5821 { 5822 if (!impl::allow_insert_child(type(), type_)) return xml_node(); 5823 if (!node._root || node._root->parent != _root) return xml_node(); 5824 5825 impl::xml_allocator& alloc = impl::get_allocator(_root); 5826 if (!alloc.reserve()) return xml_node(); 5827 5828 xml_node n(impl::allocate_node(alloc, type_)); 5829 if (!n) return xml_node(); 5830 5831 impl::insert_node_before(n._root, node._root); 5832 5833 if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml")); 5834 5835 return n; 5836 } 5837 insert_child_after(xml_node_type type_,const xml_node & node)5838 PUGI__FN xml_node xml_node::insert_child_after(xml_node_type type_, const xml_node& node) 5839 { 5840 if (!impl::allow_insert_child(type(), type_)) return xml_node(); 5841 if (!node._root || node._root->parent != _root) return xml_node(); 5842 5843 impl::xml_allocator& alloc = impl::get_allocator(_root); 5844 if (!alloc.reserve()) return xml_node(); 5845 5846 xml_node n(impl::allocate_node(alloc, type_)); 5847 if (!n) return xml_node(); 5848 5849 impl::insert_node_after(n._root, node._root); 5850 5851 if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml")); 5852 5853 return n; 5854 } 5855 append_child(const char_t * name_)5856 PUGI__FN xml_node xml_node::append_child(const char_t* name_) 5857 { 5858 xml_node result = append_child(node_element); 5859 5860 result.set_name(name_); 5861 5862 return result; 5863 } 5864 prepend_child(const char_t * name_)5865 PUGI__FN xml_node xml_node::prepend_child(const char_t* name_) 5866 { 5867 xml_node result = prepend_child(node_element); 5868 5869 result.set_name(name_); 5870 5871 return result; 5872 } 5873 insert_child_after(const char_t * name_,const xml_node & node)5874 PUGI__FN xml_node xml_node::insert_child_after(const char_t* name_, const xml_node& node) 5875 { 5876 xml_node result = insert_child_after(node_element, node); 5877 5878 result.set_name(name_); 5879 5880 return result; 5881 } 5882 insert_child_before(const char_t * name_,const xml_node & node)5883 PUGI__FN xml_node xml_node::insert_child_before(const char_t* name_, const xml_node& node) 5884 { 5885 xml_node result = insert_child_before(node_element, node); 5886 5887 result.set_name(name_); 5888 5889 return result; 5890 } 5891 append_copy(const xml_node & proto)5892 PUGI__FN xml_node xml_node::append_copy(const xml_node& proto) 5893 { 5894 xml_node_type type_ = proto.type(); 5895 if (!impl::allow_insert_child(type(), type_)) return xml_node(); 5896 5897 impl::xml_allocator& alloc = impl::get_allocator(_root); 5898 if (!alloc.reserve()) return xml_node(); 5899 5900 xml_node n(impl::allocate_node(alloc, type_)); 5901 if (!n) return xml_node(); 5902 5903 impl::append_node(n._root, _root); 5904 impl::node_copy_tree(n._root, proto._root); 5905 5906 return n; 5907 } 5908 prepend_copy(const xml_node & proto)5909 PUGI__FN xml_node xml_node::prepend_copy(const xml_node& proto) 5910 { 5911 xml_node_type type_ = proto.type(); 5912 if (!impl::allow_insert_child(type(), type_)) return xml_node(); 5913 5914 impl::xml_allocator& alloc = impl::get_allocator(_root); 5915 if (!alloc.reserve()) return xml_node(); 5916 5917 xml_node n(impl::allocate_node(alloc, type_)); 5918 if (!n) return xml_node(); 5919 5920 impl::prepend_node(n._root, _root); 5921 impl::node_copy_tree(n._root, proto._root); 5922 5923 return n; 5924 } 5925 insert_copy_after(const xml_node & proto,const xml_node & node)5926 PUGI__FN xml_node xml_node::insert_copy_after(const xml_node& proto, const xml_node& node) 5927 { 5928 xml_node_type type_ = proto.type(); 5929 if (!impl::allow_insert_child(type(), type_)) return xml_node(); 5930 if (!node._root || node._root->parent != _root) return xml_node(); 5931 5932 impl::xml_allocator& alloc = impl::get_allocator(_root); 5933 if (!alloc.reserve()) return xml_node(); 5934 5935 xml_node n(impl::allocate_node(alloc, type_)); 5936 if (!n) return xml_node(); 5937 5938 impl::insert_node_after(n._root, node._root); 5939 impl::node_copy_tree(n._root, proto._root); 5940 5941 return n; 5942 } 5943 insert_copy_before(const xml_node & proto,const xml_node & node)5944 PUGI__FN xml_node xml_node::insert_copy_before(const xml_node& proto, const xml_node& node) 5945 { 5946 xml_node_type type_ = proto.type(); 5947 if (!impl::allow_insert_child(type(), type_)) return xml_node(); 5948 if (!node._root || node._root->parent != _root) return xml_node(); 5949 5950 impl::xml_allocator& alloc = impl::get_allocator(_root); 5951 if (!alloc.reserve()) return xml_node(); 5952 5953 xml_node n(impl::allocate_node(alloc, type_)); 5954 if (!n) return xml_node(); 5955 5956 impl::insert_node_before(n._root, node._root); 5957 impl::node_copy_tree(n._root, proto._root); 5958 5959 return n; 5960 } 5961 append_move(const xml_node & moved)5962 PUGI__FN xml_node xml_node::append_move(const xml_node& moved) 5963 { 5964 if (!impl::allow_move(*this, moved)) return xml_node(); 5965 5966 impl::xml_allocator& alloc = impl::get_allocator(_root); 5967 if (!alloc.reserve()) return xml_node(); 5968 5969 // disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers 5970 impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask; 5971 5972 impl::remove_node(moved._root); 5973 impl::append_node(moved._root, _root); 5974 5975 return moved; 5976 } 5977 prepend_move(const xml_node & moved)5978 PUGI__FN xml_node xml_node::prepend_move(const xml_node& moved) 5979 { 5980 if (!impl::allow_move(*this, moved)) return xml_node(); 5981 5982 impl::xml_allocator& alloc = impl::get_allocator(_root); 5983 if (!alloc.reserve()) return xml_node(); 5984 5985 // disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers 5986 impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask; 5987 5988 impl::remove_node(moved._root); 5989 impl::prepend_node(moved._root, _root); 5990 5991 return moved; 5992 } 5993 insert_move_after(const xml_node & moved,const xml_node & node)5994 PUGI__FN xml_node xml_node::insert_move_after(const xml_node& moved, const xml_node& node) 5995 { 5996 if (!impl::allow_move(*this, moved)) return xml_node(); 5997 if (!node._root || node._root->parent != _root) return xml_node(); 5998 if (moved._root == node._root) return xml_node(); 5999 6000 impl::xml_allocator& alloc = impl::get_allocator(_root); 6001 if (!alloc.reserve()) return xml_node(); 6002 6003 // disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers 6004 impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask; 6005 6006 impl::remove_node(moved._root); 6007 impl::insert_node_after(moved._root, node._root); 6008 6009 return moved; 6010 } 6011 insert_move_before(const xml_node & moved,const xml_node & node)6012 PUGI__FN xml_node xml_node::insert_move_before(const xml_node& moved, const xml_node& node) 6013 { 6014 if (!impl::allow_move(*this, moved)) return xml_node(); 6015 if (!node._root || node._root->parent != _root) return xml_node(); 6016 if (moved._root == node._root) return xml_node(); 6017 6018 impl::xml_allocator& alloc = impl::get_allocator(_root); 6019 if (!alloc.reserve()) return xml_node(); 6020 6021 // disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers 6022 impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask; 6023 6024 impl::remove_node(moved._root); 6025 impl::insert_node_before(moved._root, node._root); 6026 6027 return moved; 6028 } 6029 remove_attribute(const char_t * name_)6030 PUGI__FN bool xml_node::remove_attribute(const char_t* name_) 6031 { 6032 return remove_attribute(attribute(name_)); 6033 } 6034 remove_attribute(const xml_attribute & a)6035 PUGI__FN bool xml_node::remove_attribute(const xml_attribute& a) 6036 { 6037 if (!_root || !a._attr) return false; 6038 if (!impl::is_attribute_of(a._attr, _root)) return false; 6039 6040 impl::xml_allocator& alloc = impl::get_allocator(_root); 6041 if (!alloc.reserve()) return false; 6042 6043 impl::remove_attribute(a._attr, _root); 6044 impl::destroy_attribute(a._attr, alloc); 6045 6046 return true; 6047 } 6048 remove_child(const char_t * name_)6049 PUGI__FN bool xml_node::remove_child(const char_t* name_) 6050 { 6051 return remove_child(child(name_)); 6052 } 6053 remove_child(const xml_node & n)6054 PUGI__FN bool xml_node::remove_child(const xml_node& n) 6055 { 6056 if (!_root || !n._root || n._root->parent != _root) return false; 6057 6058 impl::xml_allocator& alloc = impl::get_allocator(_root); 6059 if (!alloc.reserve()) return false; 6060 6061 impl::remove_node(n._root); 6062 impl::destroy_node(n._root, alloc); 6063 6064 return true; 6065 } 6066 append_buffer(const void * contents,size_t size,unsigned int options,xml_encoding encoding)6067 PUGI__FN xml_parse_result xml_node::append_buffer(const void* contents, size_t size, unsigned int options, xml_encoding encoding) 6068 { 6069 // append_buffer is only valid for elements/documents 6070 if (!impl::allow_insert_child(type(), node_element)) return impl::make_parse_result(status_append_invalid_root); 6071 6072 // get document node 6073 impl::xml_document_struct* doc = &impl::get_document(_root); 6074 6075 // disable document_buffer_order optimization since in a document with multiple buffers comparing buffer pointers does not make sense 6076 doc->header |= impl::xml_memory_page_contents_shared_mask; 6077 6078 // get extra buffer element (we'll store the document fragment buffer there so that we can deallocate it later) 6079 impl::xml_memory_page* page = 0; 6080 impl::xml_extra_buffer* extra = static_cast<impl::xml_extra_buffer*>(doc->allocate_memory(sizeof(impl::xml_extra_buffer) + sizeof(void*), page)); 6081 (void)page; 6082 6083 if (!extra) return impl::make_parse_result(status_out_of_memory); 6084 6085 #ifdef PUGIXML_COMPACT 6086 // align the memory block to a pointer boundary; this is required for compact mode where memory allocations are only 4b aligned 6087 // note that this requires up to sizeof(void*)-1 additional memory, which the allocation above takes into account 6088 extra = reinterpret_cast<impl::xml_extra_buffer*>((reinterpret_cast<uintptr_t>(extra) + (sizeof(void*) - 1)) & ~(sizeof(void*) - 1)); 6089 #endif 6090 6091 // add extra buffer to the list 6092 extra->buffer = 0; 6093 extra->next = doc->extra_buffers; 6094 doc->extra_buffers = extra; 6095 6096 // name of the root has to be NULL before parsing - otherwise closing node mismatches will not be detected at the top level 6097 impl::name_null_sentry sentry(_root); 6098 6099 return impl::load_buffer_impl(doc, _root, const_cast<void*>(contents), size, options, encoding, false, false, &extra->buffer); 6100 } 6101 find_child_by_attribute(const char_t * name_,const char_t * attr_name,const char_t * attr_value) const6102 PUGI__FN xml_node xml_node::find_child_by_attribute(const char_t* name_, const char_t* attr_name, const char_t* attr_value) const 6103 { 6104 if (!_root) return xml_node(); 6105 6106 for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling) 6107 if (i->name && impl::strequal(name_, i->name)) 6108 { 6109 for (xml_attribute_struct* a = i->first_attribute; a; a = a->next_attribute) 6110 if (a->name && impl::strequal(attr_name, a->name) && impl::strequal(attr_value, a->value ? a->value + 0 : PUGIXML_TEXT(""))) 6111 return xml_node(i); 6112 } 6113 6114 return xml_node(); 6115 } 6116 find_child_by_attribute(const char_t * attr_name,const char_t * attr_value) const6117 PUGI__FN xml_node xml_node::find_child_by_attribute(const char_t* attr_name, const char_t* attr_value) const 6118 { 6119 if (!_root) return xml_node(); 6120 6121 for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling) 6122 for (xml_attribute_struct* a = i->first_attribute; a; a = a->next_attribute) 6123 if (a->name && impl::strequal(attr_name, a->name) && impl::strequal(attr_value, a->value ? a->value + 0 : PUGIXML_TEXT(""))) 6124 return xml_node(i); 6125 6126 return xml_node(); 6127 } 6128 6129 #ifndef PUGIXML_NO_STL path(char_t delimiter) const6130 PUGI__FN string_t xml_node::path(char_t delimiter) const 6131 { 6132 if (!_root) return string_t(); 6133 6134 size_t offset = 0; 6135 6136 for (xml_node_struct* i = _root; i; i = i->parent) 6137 { 6138 offset += (i != _root); 6139 offset += i->name ? impl::strlength(i->name) : 0; 6140 } 6141 6142 string_t result; 6143 result.resize(offset); 6144 6145 for (xml_node_struct* j = _root; j; j = j->parent) 6146 { 6147 if (j != _root) 6148 result[--offset] = delimiter; 6149 6150 if (j->name) 6151 { 6152 size_t length = impl::strlength(j->name); 6153 6154 offset -= length; 6155 memcpy(&result[offset], j->name, length * sizeof(char_t)); 6156 } 6157 } 6158 6159 assert(offset == 0); 6160 6161 return result; 6162 } 6163 #endif 6164 first_element_by_path(const char_t * path_,char_t delimiter) const6165 PUGI__FN xml_node xml_node::first_element_by_path(const char_t* path_, char_t delimiter) const 6166 { 6167 xml_node found = *this; // Current search context. 6168 6169 if (!_root || !path_[0]) return found; 6170 6171 if (path_[0] == delimiter) 6172 { 6173 // Absolute path; e.g. '/foo/bar' 6174 found = found.root(); 6175 ++path_; 6176 } 6177 6178 const char_t* path_segment = path_; 6179 6180 while (*path_segment == delimiter) ++path_segment; 6181 6182 const char_t* path_segment_end = path_segment; 6183 6184 while (*path_segment_end && *path_segment_end != delimiter) ++path_segment_end; 6185 6186 if (path_segment == path_segment_end) return found; 6187 6188 const char_t* next_segment = path_segment_end; 6189 6190 while (*next_segment == delimiter) ++next_segment; 6191 6192 if (*path_segment == '.' && path_segment + 1 == path_segment_end) 6193 return found.first_element_by_path(next_segment, delimiter); 6194 else if (*path_segment == '.' && *(path_segment+1) == '.' && path_segment + 2 == path_segment_end) 6195 return found.parent().first_element_by_path(next_segment, delimiter); 6196 else 6197 { 6198 for (xml_node_struct* j = found._root->first_child; j; j = j->next_sibling) 6199 { 6200 if (j->name && impl::strequalrange(j->name, path_segment, static_cast<size_t>(path_segment_end - path_segment))) 6201 { 6202 xml_node subsearch = xml_node(j).first_element_by_path(next_segment, delimiter); 6203 6204 if (subsearch) return subsearch; 6205 } 6206 } 6207 6208 return xml_node(); 6209 } 6210 } 6211 traverse(xml_tree_walker & walker)6212 PUGI__FN bool xml_node::traverse(xml_tree_walker& walker) 6213 { 6214 walker._depth = -1; 6215 6216 xml_node arg_begin(_root); 6217 if (!walker.begin(arg_begin)) return false; 6218 6219 xml_node_struct* cur = _root ? _root->first_child + 0 : 0; 6220 6221 if (cur) 6222 { 6223 ++walker._depth; 6224 6225 do 6226 { 6227 xml_node arg_for_each(cur); 6228 if (!walker.for_each(arg_for_each)) 6229 return false; 6230 6231 if (cur->first_child) 6232 { 6233 ++walker._depth; 6234 cur = cur->first_child; 6235 } 6236 else if (cur->next_sibling) 6237 cur = cur->next_sibling; 6238 else 6239 { 6240 while (!cur->next_sibling && cur != _root && cur->parent) 6241 { 6242 --walker._depth; 6243 cur = cur->parent; 6244 } 6245 6246 if (cur != _root) 6247 cur = cur->next_sibling; 6248 } 6249 } 6250 while (cur && cur != _root); 6251 } 6252 6253 assert(walker._depth == -1); 6254 6255 xml_node arg_end(_root); 6256 return walker.end(arg_end); 6257 } 6258 hash_value() const6259 PUGI__FN size_t xml_node::hash_value() const 6260 { 6261 return static_cast<size_t>(reinterpret_cast<uintptr_t>(_root) / sizeof(xml_node_struct)); 6262 } 6263 internal_object() const6264 PUGI__FN xml_node_struct* xml_node::internal_object() const 6265 { 6266 return _root; 6267 } 6268 print(xml_writer & writer,const char_t * indent,unsigned int flags,xml_encoding encoding,unsigned int depth) const6269 PUGI__FN void xml_node::print(xml_writer& writer, const char_t* indent, unsigned int flags, xml_encoding encoding, unsigned int depth) const 6270 { 6271 if (!_root) return; 6272 6273 impl::xml_buffered_writer buffered_writer(writer, encoding); 6274 6275 impl::node_output(buffered_writer, _root, indent, flags, depth); 6276 6277 buffered_writer.flush(); 6278 } 6279 6280 #ifndef PUGIXML_NO_STL print(std::basic_ostream<char,std::char_traits<char>> & stream,const char_t * indent,unsigned int flags,xml_encoding encoding,unsigned int depth) const6281 PUGI__FN void xml_node::print(std::basic_ostream<char, std::char_traits<char> >& stream, const char_t* indent, unsigned int flags, xml_encoding encoding, unsigned int depth) const 6282 { 6283 xml_writer_stream writer(stream); 6284 6285 print(writer, indent, flags, encoding, depth); 6286 } 6287 print(std::basic_ostream<wchar_t,std::char_traits<wchar_t>> & stream,const char_t * indent,unsigned int flags,unsigned int depth) const6288 PUGI__FN void xml_node::print(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream, const char_t* indent, unsigned int flags, unsigned int depth) const 6289 { 6290 xml_writer_stream writer(stream); 6291 6292 print(writer, indent, flags, encoding_wchar, depth); 6293 } 6294 #endif 6295 offset_debug() const6296 PUGI__FN ptrdiff_t xml_node::offset_debug() const 6297 { 6298 if (!_root) return -1; 6299 6300 impl::xml_document_struct& doc = impl::get_document(_root); 6301 6302 // we can determine the offset reliably only if there is exactly once parse buffer 6303 if (!doc.buffer || doc.extra_buffers) return -1; 6304 6305 switch (type()) 6306 { 6307 case node_document: 6308 return 0; 6309 6310 case node_element: 6311 case node_declaration: 6312 case node_pi: 6313 return _root->name && (_root->header & impl::xml_memory_page_name_allocated_or_shared_mask) == 0 ? _root->name - doc.buffer : -1; 6314 6315 case node_pcdata: 6316 case node_cdata: 6317 case node_comment: 6318 case node_doctype: 6319 return _root->value && (_root->header & impl::xml_memory_page_value_allocated_or_shared_mask) == 0 ? _root->value - doc.buffer : -1; 6320 6321 default: 6322 assert(false && "Invalid node type"); // unreachable 6323 return -1; 6324 } 6325 } 6326 6327 #ifdef __BORLANDC__ operator &&(const xml_node & lhs,bool rhs)6328 PUGI__FN bool operator&&(const xml_node& lhs, bool rhs) 6329 { 6330 return (bool)lhs && rhs; 6331 } 6332 operator ||(const xml_node & lhs,bool rhs)6333 PUGI__FN bool operator||(const xml_node& lhs, bool rhs) 6334 { 6335 return (bool)lhs || rhs; 6336 } 6337 #endif 6338 xml_text(xml_node_struct * root)6339 PUGI__FN xml_text::xml_text(xml_node_struct* root): _root(root) 6340 { 6341 } 6342 _data() const6343 PUGI__FN xml_node_struct* xml_text::_data() const 6344 { 6345 if (!_root || impl::is_text_node(_root)) return _root; 6346 6347 // element nodes can have value if parse_embed_pcdata was used 6348 if (PUGI__NODETYPE(_root) == node_element && _root->value) 6349 return _root; 6350 6351 for (xml_node_struct* node = _root->first_child; node; node = node->next_sibling) 6352 if (impl::is_text_node(node)) 6353 return node; 6354 6355 return 0; 6356 } 6357 _data_new()6358 PUGI__FN xml_node_struct* xml_text::_data_new() 6359 { 6360 xml_node_struct* d = _data(); 6361 if (d) return d; 6362 6363 return xml_node(_root).append_child(node_pcdata).internal_object(); 6364 } 6365 xml_text()6366 PUGI__FN xml_text::xml_text(): _root(0) 6367 { 6368 } 6369 unspecified_bool_xml_text(xml_text ***)6370 PUGI__FN static void unspecified_bool_xml_text(xml_text***) 6371 { 6372 } 6373 operator xml_text::unspecified_bool_type() const6374 PUGI__FN xml_text::operator xml_text::unspecified_bool_type() const 6375 { 6376 return _data() ? unspecified_bool_xml_text : 0; 6377 } 6378 operator !() const6379 PUGI__FN bool xml_text::operator!() const 6380 { 6381 return !_data(); 6382 } 6383 empty() const6384 PUGI__FN bool xml_text::empty() const 6385 { 6386 return _data() == 0; 6387 } 6388 get() const6389 PUGI__FN const char_t* xml_text::get() const 6390 { 6391 xml_node_struct* d = _data(); 6392 6393 return (d && d->value) ? d->value + 0 : PUGIXML_TEXT(""); 6394 } 6395 as_string(const char_t * def) const6396 PUGI__FN const char_t* xml_text::as_string(const char_t* def) const 6397 { 6398 xml_node_struct* d = _data(); 6399 6400 return (d && d->value) ? d->value + 0 : def; 6401 } 6402 as_int(int def) const6403 PUGI__FN int xml_text::as_int(int def) const 6404 { 6405 xml_node_struct* d = _data(); 6406 6407 return (d && d->value) ? impl::get_value_int(d->value) : def; 6408 } 6409 as_uint(unsigned int def) const6410 PUGI__FN unsigned int xml_text::as_uint(unsigned int def) const 6411 { 6412 xml_node_struct* d = _data(); 6413 6414 return (d && d->value) ? impl::get_value_uint(d->value) : def; 6415 } 6416 as_double(double def) const6417 PUGI__FN double xml_text::as_double(double def) const 6418 { 6419 xml_node_struct* d = _data(); 6420 6421 return (d && d->value) ? impl::get_value_double(d->value) : def; 6422 } 6423 as_float(float def) const6424 PUGI__FN float xml_text::as_float(float def) const 6425 { 6426 xml_node_struct* d = _data(); 6427 6428 return (d && d->value) ? impl::get_value_float(d->value) : def; 6429 } 6430 as_bool(bool def) const6431 PUGI__FN bool xml_text::as_bool(bool def) const 6432 { 6433 xml_node_struct* d = _data(); 6434 6435 return (d && d->value) ? impl::get_value_bool(d->value) : def; 6436 } 6437 6438 #ifdef PUGIXML_HAS_LONG_LONG as_llong(long long def) const6439 PUGI__FN long long xml_text::as_llong(long long def) const 6440 { 6441 xml_node_struct* d = _data(); 6442 6443 return (d && d->value) ? impl::get_value_llong(d->value) : def; 6444 } 6445 as_ullong(unsigned long long def) const6446 PUGI__FN unsigned long long xml_text::as_ullong(unsigned long long def) const 6447 { 6448 xml_node_struct* d = _data(); 6449 6450 return (d && d->value) ? impl::get_value_ullong(d->value) : def; 6451 } 6452 #endif 6453 set(const char_t * rhs)6454 PUGI__FN bool xml_text::set(const char_t* rhs) 6455 { 6456 xml_node_struct* dn = _data_new(); 6457 6458 return dn ? impl::strcpy_insitu(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, impl::strlength(rhs)) : false; 6459 } 6460 set(int rhs)6461 PUGI__FN bool xml_text::set(int rhs) 6462 { 6463 xml_node_struct* dn = _data_new(); 6464 6465 return dn ? impl::set_value_integer<unsigned int>(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0) : false; 6466 } 6467 set(unsigned int rhs)6468 PUGI__FN bool xml_text::set(unsigned int rhs) 6469 { 6470 xml_node_struct* dn = _data_new(); 6471 6472 return dn ? impl::set_value_integer<unsigned int>(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, false) : false; 6473 } 6474 set(long rhs)6475 PUGI__FN bool xml_text::set(long rhs) 6476 { 6477 xml_node_struct* dn = _data_new(); 6478 6479 return dn ? impl::set_value_integer<unsigned long>(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0) : false; 6480 } 6481 set(unsigned long rhs)6482 PUGI__FN bool xml_text::set(unsigned long rhs) 6483 { 6484 xml_node_struct* dn = _data_new(); 6485 6486 return dn ? impl::set_value_integer<unsigned long>(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, false) : false; 6487 } 6488 set(float rhs)6489 PUGI__FN bool xml_text::set(float rhs) 6490 { 6491 xml_node_struct* dn = _data_new(); 6492 6493 return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false; 6494 } 6495 set(double rhs)6496 PUGI__FN bool xml_text::set(double rhs) 6497 { 6498 xml_node_struct* dn = _data_new(); 6499 6500 return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false; 6501 } 6502 set(bool rhs)6503 PUGI__FN bool xml_text::set(bool rhs) 6504 { 6505 xml_node_struct* dn = _data_new(); 6506 6507 return dn ? impl::set_value_bool(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false; 6508 } 6509 6510 #ifdef PUGIXML_HAS_LONG_LONG set(long long rhs)6511 PUGI__FN bool xml_text::set(long long rhs) 6512 { 6513 xml_node_struct* dn = _data_new(); 6514 6515 return dn ? impl::set_value_integer<unsigned long long>(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0) : false; 6516 } 6517 set(unsigned long long rhs)6518 PUGI__FN bool xml_text::set(unsigned long long rhs) 6519 { 6520 xml_node_struct* dn = _data_new(); 6521 6522 return dn ? impl::set_value_integer<unsigned long long>(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, false) : false; 6523 } 6524 #endif 6525 operator =(const char_t * rhs)6526 PUGI__FN xml_text& xml_text::operator=(const char_t* rhs) 6527 { 6528 set(rhs); 6529 return *this; 6530 } 6531 operator =(int rhs)6532 PUGI__FN xml_text& xml_text::operator=(int rhs) 6533 { 6534 set(rhs); 6535 return *this; 6536 } 6537 operator =(unsigned int rhs)6538 PUGI__FN xml_text& xml_text::operator=(unsigned int rhs) 6539 { 6540 set(rhs); 6541 return *this; 6542 } 6543 operator =(long rhs)6544 PUGI__FN xml_text& xml_text::operator=(long rhs) 6545 { 6546 set(rhs); 6547 return *this; 6548 } 6549 operator =(unsigned long rhs)6550 PUGI__FN xml_text& xml_text::operator=(unsigned long rhs) 6551 { 6552 set(rhs); 6553 return *this; 6554 } 6555 operator =(double rhs)6556 PUGI__FN xml_text& xml_text::operator=(double rhs) 6557 { 6558 set(rhs); 6559 return *this; 6560 } 6561 operator =(float rhs)6562 PUGI__FN xml_text& xml_text::operator=(float rhs) 6563 { 6564 set(rhs); 6565 return *this; 6566 } 6567 operator =(bool rhs)6568 PUGI__FN xml_text& xml_text::operator=(bool rhs) 6569 { 6570 set(rhs); 6571 return *this; 6572 } 6573 6574 #ifdef PUGIXML_HAS_LONG_LONG operator =(long long rhs)6575 PUGI__FN xml_text& xml_text::operator=(long long rhs) 6576 { 6577 set(rhs); 6578 return *this; 6579 } 6580 operator =(unsigned long long rhs)6581 PUGI__FN xml_text& xml_text::operator=(unsigned long long rhs) 6582 { 6583 set(rhs); 6584 return *this; 6585 } 6586 #endif 6587 data() const6588 PUGI__FN xml_node xml_text::data() const 6589 { 6590 return xml_node(_data()); 6591 } 6592 6593 #ifdef __BORLANDC__ operator &&(const xml_text & lhs,bool rhs)6594 PUGI__FN bool operator&&(const xml_text& lhs, bool rhs) 6595 { 6596 return (bool)lhs && rhs; 6597 } 6598 operator ||(const xml_text & lhs,bool rhs)6599 PUGI__FN bool operator||(const xml_text& lhs, bool rhs) 6600 { 6601 return (bool)lhs || rhs; 6602 } 6603 #endif 6604 xml_node_iterator()6605 PUGI__FN xml_node_iterator::xml_node_iterator() 6606 { 6607 } 6608 xml_node_iterator(const xml_node & node)6609 PUGI__FN xml_node_iterator::xml_node_iterator(const xml_node& node): _wrap(node), _parent(node.parent()) 6610 { 6611 } 6612 xml_node_iterator(xml_node_struct * ref,xml_node_struct * parent)6613 PUGI__FN xml_node_iterator::xml_node_iterator(xml_node_struct* ref, xml_node_struct* parent): _wrap(ref), _parent(parent) 6614 { 6615 } 6616 operator ==(const xml_node_iterator & rhs) const6617 PUGI__FN bool xml_node_iterator::operator==(const xml_node_iterator& rhs) const 6618 { 6619 return _wrap._root == rhs._wrap._root && _parent._root == rhs._parent._root; 6620 } 6621 operator !=(const xml_node_iterator & rhs) const6622 PUGI__FN bool xml_node_iterator::operator!=(const xml_node_iterator& rhs) const 6623 { 6624 return _wrap._root != rhs._wrap._root || _parent._root != rhs._parent._root; 6625 } 6626 operator *() const6627 PUGI__FN xml_node& xml_node_iterator::operator*() const 6628 { 6629 assert(_wrap._root); 6630 return _wrap; 6631 } 6632 operator ->() const6633 PUGI__FN xml_node* xml_node_iterator::operator->() const 6634 { 6635 assert(_wrap._root); 6636 return const_cast<xml_node*>(&_wrap); // BCC5 workaround 6637 } 6638 operator ++()6639 PUGI__FN const xml_node_iterator& xml_node_iterator::operator++() 6640 { 6641 assert(_wrap._root); 6642 _wrap._root = _wrap._root->next_sibling; 6643 return *this; 6644 } 6645 operator ++(int)6646 PUGI__FN xml_node_iterator xml_node_iterator::operator++(int) 6647 { 6648 xml_node_iterator temp = *this; 6649 ++*this; 6650 return temp; 6651 } 6652 operator --()6653 PUGI__FN const xml_node_iterator& xml_node_iterator::operator--() 6654 { 6655 _wrap = _wrap._root ? _wrap.previous_sibling() : _parent.last_child(); 6656 return *this; 6657 } 6658 operator --(int)6659 PUGI__FN xml_node_iterator xml_node_iterator::operator--(int) 6660 { 6661 xml_node_iterator temp = *this; 6662 --*this; 6663 return temp; 6664 } 6665 xml_attribute_iterator()6666 PUGI__FN xml_attribute_iterator::xml_attribute_iterator() 6667 { 6668 } 6669 xml_attribute_iterator(const xml_attribute & attr,const xml_node & parent)6670 PUGI__FN xml_attribute_iterator::xml_attribute_iterator(const xml_attribute& attr, const xml_node& parent): _wrap(attr), _parent(parent) 6671 { 6672 } 6673 xml_attribute_iterator(xml_attribute_struct * ref,xml_node_struct * parent)6674 PUGI__FN xml_attribute_iterator::xml_attribute_iterator(xml_attribute_struct* ref, xml_node_struct* parent): _wrap(ref), _parent(parent) 6675 { 6676 } 6677 operator ==(const xml_attribute_iterator & rhs) const6678 PUGI__FN bool xml_attribute_iterator::operator==(const xml_attribute_iterator& rhs) const 6679 { 6680 return _wrap._attr == rhs._wrap._attr && _parent._root == rhs._parent._root; 6681 } 6682 operator !=(const xml_attribute_iterator & rhs) const6683 PUGI__FN bool xml_attribute_iterator::operator!=(const xml_attribute_iterator& rhs) const 6684 { 6685 return _wrap._attr != rhs._wrap._attr || _parent._root != rhs._parent._root; 6686 } 6687 operator *() const6688 PUGI__FN xml_attribute& xml_attribute_iterator::operator*() const 6689 { 6690 assert(_wrap._attr); 6691 return _wrap; 6692 } 6693 operator ->() const6694 PUGI__FN xml_attribute* xml_attribute_iterator::operator->() const 6695 { 6696 assert(_wrap._attr); 6697 return const_cast<xml_attribute*>(&_wrap); // BCC5 workaround 6698 } 6699 operator ++()6700 PUGI__FN const xml_attribute_iterator& xml_attribute_iterator::operator++() 6701 { 6702 assert(_wrap._attr); 6703 _wrap._attr = _wrap._attr->next_attribute; 6704 return *this; 6705 } 6706 operator ++(int)6707 PUGI__FN xml_attribute_iterator xml_attribute_iterator::operator++(int) 6708 { 6709 xml_attribute_iterator temp = *this; 6710 ++*this; 6711 return temp; 6712 } 6713 operator --()6714 PUGI__FN const xml_attribute_iterator& xml_attribute_iterator::operator--() 6715 { 6716 _wrap = _wrap._attr ? _wrap.previous_attribute() : _parent.last_attribute(); 6717 return *this; 6718 } 6719 operator --(int)6720 PUGI__FN xml_attribute_iterator xml_attribute_iterator::operator--(int) 6721 { 6722 xml_attribute_iterator temp = *this; 6723 --*this; 6724 return temp; 6725 } 6726 xml_named_node_iterator()6727 PUGI__FN xml_named_node_iterator::xml_named_node_iterator(): _name(0) 6728 { 6729 } 6730 xml_named_node_iterator(const xml_node & node,const char_t * name)6731 PUGI__FN xml_named_node_iterator::xml_named_node_iterator(const xml_node& node, const char_t* name): _wrap(node), _parent(node.parent()), _name(name) 6732 { 6733 } 6734 xml_named_node_iterator(xml_node_struct * ref,xml_node_struct * parent,const char_t * name)6735 PUGI__FN xml_named_node_iterator::xml_named_node_iterator(xml_node_struct* ref, xml_node_struct* parent, const char_t* name): _wrap(ref), _parent(parent), _name(name) 6736 { 6737 } 6738 operator ==(const xml_named_node_iterator & rhs) const6739 PUGI__FN bool xml_named_node_iterator::operator==(const xml_named_node_iterator& rhs) const 6740 { 6741 return _wrap._root == rhs._wrap._root && _parent._root == rhs._parent._root; 6742 } 6743 operator !=(const xml_named_node_iterator & rhs) const6744 PUGI__FN bool xml_named_node_iterator::operator!=(const xml_named_node_iterator& rhs) const 6745 { 6746 return _wrap._root != rhs._wrap._root || _parent._root != rhs._parent._root; 6747 } 6748 operator *() const6749 PUGI__FN xml_node& xml_named_node_iterator::operator*() const 6750 { 6751 assert(_wrap._root); 6752 return _wrap; 6753 } 6754 operator ->() const6755 PUGI__FN xml_node* xml_named_node_iterator::operator->() const 6756 { 6757 assert(_wrap._root); 6758 return const_cast<xml_node*>(&_wrap); // BCC5 workaround 6759 } 6760 operator ++()6761 PUGI__FN const xml_named_node_iterator& xml_named_node_iterator::operator++() 6762 { 6763 assert(_wrap._root); 6764 _wrap = _wrap.next_sibling(_name); 6765 return *this; 6766 } 6767 operator ++(int)6768 PUGI__FN xml_named_node_iterator xml_named_node_iterator::operator++(int) 6769 { 6770 xml_named_node_iterator temp = *this; 6771 ++*this; 6772 return temp; 6773 } 6774 operator --()6775 PUGI__FN const xml_named_node_iterator& xml_named_node_iterator::operator--() 6776 { 6777 if (_wrap._root) 6778 _wrap = _wrap.previous_sibling(_name); 6779 else 6780 { 6781 _wrap = _parent.last_child(); 6782 6783 if (!impl::strequal(_wrap.name(), _name)) 6784 _wrap = _wrap.previous_sibling(_name); 6785 } 6786 6787 return *this; 6788 } 6789 operator --(int)6790 PUGI__FN xml_named_node_iterator xml_named_node_iterator::operator--(int) 6791 { 6792 xml_named_node_iterator temp = *this; 6793 --*this; 6794 return temp; 6795 } 6796 xml_parse_result()6797 PUGI__FN xml_parse_result::xml_parse_result(): status(status_internal_error), offset(0), encoding(encoding_auto) 6798 { 6799 } 6800 operator bool() const6801 PUGI__FN xml_parse_result::operator bool() const 6802 { 6803 return status == status_ok; 6804 } 6805 description() const6806 PUGI__FN const char* xml_parse_result::description() const 6807 { 6808 switch (status) 6809 { 6810 case status_ok: return "No error"; 6811 6812 case status_file_not_found: return "File was not found"; 6813 case status_io_error: return "Error reading from file/stream"; 6814 case status_out_of_memory: return "Could not allocate memory"; 6815 case status_internal_error: return "Internal error occurred"; 6816 6817 case status_unrecognized_tag: return "Could not determine tag type"; 6818 6819 case status_bad_pi: return "Error parsing document declaration/processing instruction"; 6820 case status_bad_comment: return "Error parsing comment"; 6821 case status_bad_cdata: return "Error parsing CDATA section"; 6822 case status_bad_doctype: return "Error parsing document type declaration"; 6823 case status_bad_pcdata: return "Error parsing PCDATA section"; 6824 case status_bad_start_element: return "Error parsing start element tag"; 6825 case status_bad_attribute: return "Error parsing element attribute"; 6826 case status_bad_end_element: return "Error parsing end element tag"; 6827 case status_end_element_mismatch: return "Start-end tags mismatch"; 6828 6829 case status_append_invalid_root: return "Unable to append nodes: root is not an element or document"; 6830 6831 case status_no_document_element: return "No document element found"; 6832 6833 default: return "Unknown error"; 6834 } 6835 } 6836 xml_document()6837 PUGI__FN xml_document::xml_document(): _buffer(0) 6838 { 6839 _create(); 6840 } 6841 ~xml_document()6842 PUGI__FN xml_document::~xml_document() 6843 { 6844 _destroy(); 6845 } 6846 6847 #ifdef PUGIXML_HAS_MOVE xml_document(xml_document && rhs)6848 PUGI__FN xml_document::xml_document(xml_document&& rhs) PUGIXML_NOEXCEPT_IF_NOT_COMPACT: _buffer(0) 6849 { 6850 _create(); 6851 _move(rhs); 6852 } 6853 operator =(xml_document && rhs)6854 PUGI__FN xml_document& xml_document::operator=(xml_document&& rhs) PUGIXML_NOEXCEPT_IF_NOT_COMPACT 6855 { 6856 if (this == &rhs) return *this; 6857 6858 _destroy(); 6859 _create(); 6860 _move(rhs); 6861 6862 return *this; 6863 } 6864 #endif 6865 reset()6866 PUGI__FN void xml_document::reset() 6867 { 6868 _destroy(); 6869 _create(); 6870 } 6871 reset(const xml_document & proto)6872 PUGI__FN void xml_document::reset(const xml_document& proto) 6873 { 6874 reset(); 6875 6876 for (xml_node cur = proto.first_child(); cur; cur = cur.next_sibling()) 6877 append_copy(cur); 6878 } 6879 _create()6880 PUGI__FN void xml_document::_create() 6881 { 6882 assert(!_root); 6883 6884 #ifdef PUGIXML_COMPACT 6885 // space for page marker for the first page (uint32_t), rounded up to pointer size; assumes pointers are at least 32-bit 6886 const size_t page_offset = sizeof(void*); 6887 #else 6888 const size_t page_offset = 0; 6889 #endif 6890 6891 // initialize sentinel page 6892 PUGI__STATIC_ASSERT(sizeof(impl::xml_memory_page) + sizeof(impl::xml_document_struct) + page_offset <= sizeof(_memory)); 6893 6894 // prepare page structure 6895 impl::xml_memory_page* page = impl::xml_memory_page::construct(_memory); 6896 assert(page); 6897 6898 page->busy_size = impl::xml_memory_page_size; 6899 6900 // setup first page marker 6901 #ifdef PUGIXML_COMPACT 6902 // round-trip through void* to avoid 'cast increases required alignment of target type' warning 6903 page->compact_page_marker = reinterpret_cast<uint32_t*>(static_cast<void*>(reinterpret_cast<char*>(page) + sizeof(impl::xml_memory_page))); 6904 *page->compact_page_marker = sizeof(impl::xml_memory_page); 6905 #endif 6906 6907 // allocate new root 6908 _root = new (reinterpret_cast<char*>(page) + sizeof(impl::xml_memory_page) + page_offset) impl::xml_document_struct(page); 6909 _root->prev_sibling_c = _root; 6910 6911 // setup sentinel page 6912 page->allocator = static_cast<impl::xml_document_struct*>(_root); 6913 6914 // setup hash table pointer in allocator 6915 #ifdef PUGIXML_COMPACT 6916 page->allocator->_hash = &static_cast<impl::xml_document_struct*>(_root)->hash; 6917 #endif 6918 6919 // verify the document allocation 6920 assert(reinterpret_cast<char*>(_root) + sizeof(impl::xml_document_struct) <= _memory + sizeof(_memory)); 6921 } 6922 _destroy()6923 PUGI__FN void xml_document::_destroy() 6924 { 6925 assert(_root); 6926 6927 // destroy static storage 6928 if (_buffer) 6929 { 6930 impl::xml_memory::deallocate(_buffer); 6931 _buffer = 0; 6932 } 6933 6934 // destroy extra buffers (note: no need to destroy linked list nodes, they're allocated using document allocator) 6935 for (impl::xml_extra_buffer* extra = static_cast<impl::xml_document_struct*>(_root)->extra_buffers; extra; extra = extra->next) 6936 { 6937 if (extra->buffer) impl::xml_memory::deallocate(extra->buffer); 6938 } 6939 6940 // destroy dynamic storage, leave sentinel page (it's in static memory) 6941 impl::xml_memory_page* root_page = PUGI__GETPAGE(_root); 6942 assert(root_page && !root_page->prev); 6943 assert(reinterpret_cast<char*>(root_page) >= _memory && reinterpret_cast<char*>(root_page) < _memory + sizeof(_memory)); 6944 6945 for (impl::xml_memory_page* page = root_page->next; page; ) 6946 { 6947 impl::xml_memory_page* next = page->next; 6948 6949 impl::xml_allocator::deallocate_page(page); 6950 6951 page = next; 6952 } 6953 6954 #ifdef PUGIXML_COMPACT 6955 // destroy hash table 6956 static_cast<impl::xml_document_struct*>(_root)->hash.clear(); 6957 #endif 6958 6959 _root = 0; 6960 } 6961 6962 #ifdef PUGIXML_HAS_MOVE _move(xml_document & rhs)6963 PUGI__FN void xml_document::_move(xml_document& rhs) PUGIXML_NOEXCEPT_IF_NOT_COMPACT 6964 { 6965 impl::xml_document_struct* doc = static_cast<impl::xml_document_struct*>(_root); 6966 impl::xml_document_struct* other = static_cast<impl::xml_document_struct*>(rhs._root); 6967 6968 // save first child pointer for later; this needs hash access 6969 xml_node_struct* other_first_child = other->first_child; 6970 6971 #ifdef PUGIXML_COMPACT 6972 // reserve space for the hash table up front; this is the only operation that can fail 6973 // if it does, we have no choice but to throw (if we have exceptions) 6974 if (other_first_child) 6975 { 6976 size_t other_children = 0; 6977 for (xml_node_struct* node = other_first_child; node; node = node->next_sibling) 6978 other_children++; 6979 6980 // in compact mode, each pointer assignment could result in a hash table request 6981 // during move, we have to relocate document first_child and parents of all children 6982 // normally there's just one child and its parent has a pointerless encoding but 6983 // we assume the worst here 6984 if (!other->_hash->reserve(other_children + 1)) 6985 { 6986 #ifdef PUGIXML_NO_EXCEPTIONS 6987 return; 6988 #else 6989 throw std::bad_alloc(); 6990 #endif 6991 } 6992 } 6993 #endif 6994 6995 // move allocation state 6996 doc->_root = other->_root; 6997 doc->_busy_size = other->_busy_size; 6998 6999 // move buffer state 7000 doc->buffer = other->buffer; 7001 doc->extra_buffers = other->extra_buffers; 7002 _buffer = rhs._buffer; 7003 7004 #ifdef PUGIXML_COMPACT 7005 // move compact hash; note that the hash table can have pointers to other but they will be "inactive", similarly to nodes removed with remove_child 7006 doc->hash = other->hash; 7007 doc->_hash = &doc->hash; 7008 7009 // make sure we don't access other hash up until the end when we reinitialize other document 7010 other->_hash = 0; 7011 #endif 7012 7013 // move page structure 7014 impl::xml_memory_page* doc_page = PUGI__GETPAGE(doc); 7015 assert(doc_page && !doc_page->prev && !doc_page->next); 7016 7017 impl::xml_memory_page* other_page = PUGI__GETPAGE(other); 7018 assert(other_page && !other_page->prev); 7019 7020 // relink pages since root page is embedded into xml_document 7021 if (impl::xml_memory_page* page = other_page->next) 7022 { 7023 assert(page->prev == other_page); 7024 7025 page->prev = doc_page; 7026 7027 doc_page->next = page; 7028 other_page->next = 0; 7029 } 7030 7031 // make sure pages point to the correct document state 7032 for (impl::xml_memory_page* page = doc_page->next; page; page = page->next) 7033 { 7034 assert(page->allocator == other); 7035 7036 page->allocator = doc; 7037 7038 #ifdef PUGIXML_COMPACT 7039 // this automatically migrates most children between documents and prevents ->parent assignment from allocating 7040 if (page->compact_shared_parent == other) 7041 page->compact_shared_parent = doc; 7042 #endif 7043 } 7044 7045 // move tree structure 7046 assert(!doc->first_child); 7047 7048 doc->first_child = other_first_child; 7049 7050 for (xml_node_struct* node = other_first_child; node; node = node->next_sibling) 7051 { 7052 #ifdef PUGIXML_COMPACT 7053 // most children will have migrated when we reassigned compact_shared_parent 7054 assert(node->parent == other || node->parent == doc); 7055 7056 node->parent = doc; 7057 #else 7058 assert(node->parent == other); 7059 node->parent = doc; 7060 #endif 7061 } 7062 7063 // reset other document 7064 new (other) impl::xml_document_struct(PUGI__GETPAGE(other)); 7065 rhs._buffer = 0; 7066 } 7067 #endif 7068 7069 #ifndef PUGIXML_NO_STL load(std::basic_istream<char,std::char_traits<char>> & stream,unsigned int options,xml_encoding encoding)7070 PUGI__FN xml_parse_result xml_document::load(std::basic_istream<char, std::char_traits<char> >& stream, unsigned int options, xml_encoding encoding) 7071 { 7072 reset(); 7073 7074 return impl::load_stream_impl(static_cast<impl::xml_document_struct*>(_root), stream, options, encoding, &_buffer); 7075 } 7076 load(std::basic_istream<wchar_t,std::char_traits<wchar_t>> & stream,unsigned int options)7077 PUGI__FN xml_parse_result xml_document::load(std::basic_istream<wchar_t, std::char_traits<wchar_t> >& stream, unsigned int options) 7078 { 7079 reset(); 7080 7081 return impl::load_stream_impl(static_cast<impl::xml_document_struct*>(_root), stream, options, encoding_wchar, &_buffer); 7082 } 7083 #endif 7084 load_string(const char_t * contents,unsigned int options)7085 PUGI__FN xml_parse_result xml_document::load_string(const char_t* contents, unsigned int options) 7086 { 7087 // Force native encoding (skip autodetection) 7088 #ifdef PUGIXML_WCHAR_MODE 7089 xml_encoding encoding = encoding_wchar; 7090 #else 7091 xml_encoding encoding = encoding_utf8; 7092 #endif 7093 7094 return load_buffer(contents, impl::strlength(contents) * sizeof(char_t), options, encoding); 7095 } 7096 load(const char_t * contents,unsigned int options)7097 PUGI__FN xml_parse_result xml_document::load(const char_t* contents, unsigned int options) 7098 { 7099 return load_string(contents, options); 7100 } 7101 load_file(const char * path_,unsigned int options,xml_encoding encoding)7102 PUGI__FN xml_parse_result xml_document::load_file(const char* path_, unsigned int options, xml_encoding encoding) 7103 { 7104 reset(); 7105 7106 using impl::auto_deleter; // MSVC7 workaround 7107 auto_deleter<FILE> file(fopen(path_, "rb"), impl::close_file); 7108 7109 return impl::load_file_impl(static_cast<impl::xml_document_struct*>(_root), file.data, options, encoding, &_buffer); 7110 } 7111 load_file(const wchar_t * path_,unsigned int options,xml_encoding encoding)7112 PUGI__FN xml_parse_result xml_document::load_file(const wchar_t* path_, unsigned int options, xml_encoding encoding) 7113 { 7114 reset(); 7115 7116 using impl::auto_deleter; // MSVC7 workaround 7117 auto_deleter<FILE> file(impl::open_file_wide(path_, L"rb"), impl::close_file); 7118 7119 return impl::load_file_impl(static_cast<impl::xml_document_struct*>(_root), file.data, options, encoding, &_buffer); 7120 } 7121 load_buffer(const void * contents,size_t size,unsigned int options,xml_encoding encoding)7122 PUGI__FN xml_parse_result xml_document::load_buffer(const void* contents, size_t size, unsigned int options, xml_encoding encoding) 7123 { 7124 reset(); 7125 7126 return impl::load_buffer_impl(static_cast<impl::xml_document_struct*>(_root), _root, const_cast<void*>(contents), size, options, encoding, false, false, &_buffer); 7127 } 7128 load_buffer_inplace(void * contents,size_t size,unsigned int options,xml_encoding encoding)7129 PUGI__FN xml_parse_result xml_document::load_buffer_inplace(void* contents, size_t size, unsigned int options, xml_encoding encoding) 7130 { 7131 reset(); 7132 7133 return impl::load_buffer_impl(static_cast<impl::xml_document_struct*>(_root), _root, contents, size, options, encoding, true, false, &_buffer); 7134 } 7135 load_buffer_inplace_own(void * contents,size_t size,unsigned int options,xml_encoding encoding)7136 PUGI__FN xml_parse_result xml_document::load_buffer_inplace_own(void* contents, size_t size, unsigned int options, xml_encoding encoding) 7137 { 7138 reset(); 7139 7140 return impl::load_buffer_impl(static_cast<impl::xml_document_struct*>(_root), _root, contents, size, options, encoding, true, true, &_buffer); 7141 } 7142 save(xml_writer & writer,const char_t * indent,unsigned int flags,xml_encoding encoding) const7143 PUGI__FN void xml_document::save(xml_writer& writer, const char_t* indent, unsigned int flags, xml_encoding encoding) const 7144 { 7145 impl::xml_buffered_writer buffered_writer(writer, encoding); 7146 7147 if ((flags & format_write_bom) && encoding != encoding_latin1) 7148 { 7149 // BOM always represents the codepoint U+FEFF, so just write it in native encoding 7150 #ifdef PUGIXML_WCHAR_MODE 7151 unsigned int bom = 0xfeff; 7152 buffered_writer.write(static_cast<wchar_t>(bom)); 7153 #else 7154 buffered_writer.write('\xef', '\xbb', '\xbf'); 7155 #endif 7156 } 7157 7158 if (!(flags & format_no_declaration) && !impl::has_declaration(_root)) 7159 { 7160 buffered_writer.write_string(PUGIXML_TEXT("<?xml version=\"1.0\"")); 7161 if (encoding == encoding_latin1) buffered_writer.write_string(PUGIXML_TEXT(" encoding=\"ISO-8859-1\"")); 7162 buffered_writer.write('?', '>'); 7163 if (!(flags & format_raw)) buffered_writer.write('\n'); 7164 } 7165 7166 impl::node_output(buffered_writer, _root, indent, flags, 0); 7167 7168 buffered_writer.flush(); 7169 } 7170 7171 #ifndef PUGIXML_NO_STL save(std::basic_ostream<char,std::char_traits<char>> & stream,const char_t * indent,unsigned int flags,xml_encoding encoding) const7172 PUGI__FN void xml_document::save(std::basic_ostream<char, std::char_traits<char> >& stream, const char_t* indent, unsigned int flags, xml_encoding encoding) const 7173 { 7174 xml_writer_stream writer(stream); 7175 7176 save(writer, indent, flags, encoding); 7177 } 7178 save(std::basic_ostream<wchar_t,std::char_traits<wchar_t>> & stream,const char_t * indent,unsigned int flags) const7179 PUGI__FN void xml_document::save(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream, const char_t* indent, unsigned int flags) const 7180 { 7181 xml_writer_stream writer(stream); 7182 7183 save(writer, indent, flags, encoding_wchar); 7184 } 7185 #endif 7186 save_file(const char * path_,const char_t * indent,unsigned int flags,xml_encoding encoding) const7187 PUGI__FN bool xml_document::save_file(const char* path_, const char_t* indent, unsigned int flags, xml_encoding encoding) const 7188 { 7189 using impl::auto_deleter; // MSVC7 workaround 7190 auto_deleter<FILE> file(fopen(path_, (flags & format_save_file_text) ? "w" : "wb"), impl::close_file); 7191 7192 return impl::save_file_impl(*this, file.data, indent, flags, encoding); 7193 } 7194 save_file(const wchar_t * path_,const char_t * indent,unsigned int flags,xml_encoding encoding) const7195 PUGI__FN bool xml_document::save_file(const wchar_t* path_, const char_t* indent, unsigned int flags, xml_encoding encoding) const 7196 { 7197 using impl::auto_deleter; // MSVC7 workaround 7198 auto_deleter<FILE> file(impl::open_file_wide(path_, (flags & format_save_file_text) ? L"w" : L"wb"), impl::close_file); 7199 7200 return impl::save_file_impl(*this, file.data, indent, flags, encoding); 7201 } 7202 document_element() const7203 PUGI__FN xml_node xml_document::document_element() const 7204 { 7205 assert(_root); 7206 7207 for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling) 7208 if (PUGI__NODETYPE(i) == node_element) 7209 return xml_node(i); 7210 7211 return xml_node(); 7212 } 7213 7214 #ifndef PUGIXML_NO_STL as_utf8(const wchar_t * str)7215 PUGI__FN std::string PUGIXML_FUNCTION as_utf8(const wchar_t* str) 7216 { 7217 assert(str); 7218 7219 return impl::as_utf8_impl(str, impl::strlength_wide(str)); 7220 } 7221 as_utf8(const std::basic_string<wchar_t> & str)7222 PUGI__FN std::string PUGIXML_FUNCTION as_utf8(const std::basic_string<wchar_t>& str) 7223 { 7224 return impl::as_utf8_impl(str.c_str(), str.size()); 7225 } 7226 as_wide(const char * str)7227 PUGI__FN std::basic_string<wchar_t> PUGIXML_FUNCTION as_wide(const char* str) 7228 { 7229 assert(str); 7230 7231 return impl::as_wide_impl(str, strlen(str)); 7232 } 7233 as_wide(const std::string & str)7234 PUGI__FN std::basic_string<wchar_t> PUGIXML_FUNCTION as_wide(const std::string& str) 7235 { 7236 return impl::as_wide_impl(str.c_str(), str.size()); 7237 } 7238 #endif 7239 set_memory_management_functions(allocation_function allocate,deallocation_function deallocate)7240 PUGI__FN void PUGIXML_FUNCTION set_memory_management_functions(allocation_function allocate, deallocation_function deallocate) 7241 { 7242 impl::xml_memory::allocate = allocate; 7243 impl::xml_memory::deallocate = deallocate; 7244 } 7245 get_memory_allocation_function()7246 PUGI__FN allocation_function PUGIXML_FUNCTION get_memory_allocation_function() 7247 { 7248 return impl::xml_memory::allocate; 7249 } 7250 get_memory_deallocation_function()7251 PUGI__FN deallocation_function PUGIXML_FUNCTION get_memory_deallocation_function() 7252 { 7253 return impl::xml_memory::deallocate; 7254 } 7255 } 7256 7257 #if !defined(PUGIXML_NO_STL) && (defined(_MSC_VER) || defined(__ICC)) 7258 namespace std 7259 { 7260 // Workarounds for (non-standard) iterator category detection for older versions (MSVC7/IC8 and earlier) _Iter_cat(const pugi::xml_node_iterator &)7261 PUGI__FN std::bidirectional_iterator_tag _Iter_cat(const pugi::xml_node_iterator&) 7262 { 7263 return std::bidirectional_iterator_tag(); 7264 } 7265 _Iter_cat(const pugi::xml_attribute_iterator &)7266 PUGI__FN std::bidirectional_iterator_tag _Iter_cat(const pugi::xml_attribute_iterator&) 7267 { 7268 return std::bidirectional_iterator_tag(); 7269 } 7270 _Iter_cat(const pugi::xml_named_node_iterator &)7271 PUGI__FN std::bidirectional_iterator_tag _Iter_cat(const pugi::xml_named_node_iterator&) 7272 { 7273 return std::bidirectional_iterator_tag(); 7274 } 7275 } 7276 #endif 7277 7278 #if !defined(PUGIXML_NO_STL) && defined(__SUNPRO_CC) 7279 namespace std 7280 { 7281 // Workarounds for (non-standard) iterator category detection __iterator_category(const pugi::xml_node_iterator &)7282 PUGI__FN std::bidirectional_iterator_tag __iterator_category(const pugi::xml_node_iterator&) 7283 { 7284 return std::bidirectional_iterator_tag(); 7285 } 7286 __iterator_category(const pugi::xml_attribute_iterator &)7287 PUGI__FN std::bidirectional_iterator_tag __iterator_category(const pugi::xml_attribute_iterator&) 7288 { 7289 return std::bidirectional_iterator_tag(); 7290 } 7291 __iterator_category(const pugi::xml_named_node_iterator &)7292 PUGI__FN std::bidirectional_iterator_tag __iterator_category(const pugi::xml_named_node_iterator&) 7293 { 7294 return std::bidirectional_iterator_tag(); 7295 } 7296 } 7297 #endif 7298 7299 #ifndef PUGIXML_NO_XPATH 7300 // STL replacements 7301 PUGI__NS_BEGIN 7302 struct equal_to 7303 { operator ()equal_to7304 template <typename T> bool operator()(const T& lhs, const T& rhs) const 7305 { 7306 return lhs == rhs; 7307 } 7308 }; 7309 7310 struct not_equal_to 7311 { operator ()not_equal_to7312 template <typename T> bool operator()(const T& lhs, const T& rhs) const 7313 { 7314 return lhs != rhs; 7315 } 7316 }; 7317 7318 struct less 7319 { operator ()less7320 template <typename T> bool operator()(const T& lhs, const T& rhs) const 7321 { 7322 return lhs < rhs; 7323 } 7324 }; 7325 7326 struct less_equal 7327 { operator ()less_equal7328 template <typename T> bool operator()(const T& lhs, const T& rhs) const 7329 { 7330 return lhs <= rhs; 7331 } 7332 }; 7333 swap(T & lhs,T & rhs)7334 template <typename T> void swap(T& lhs, T& rhs) 7335 { 7336 T temp = lhs; 7337 lhs = rhs; 7338 rhs = temp; 7339 } 7340 7341 template <typename I, typename Pred> I min_element(I begin, I end, const Pred& pred) 7342 { 7343 I result = begin; 7344 7345 for (I it = begin + 1; it != end; ++it) 7346 if (pred(*it, *result)) 7347 result = it; 7348 7349 return result; 7350 } 7351 reverse(I begin,I end)7352 template <typename I> void reverse(I begin, I end) 7353 { 7354 while (end - begin > 1) swap(*begin++, *--end); 7355 } 7356 unique(I begin,I end)7357 template <typename I> I unique(I begin, I end) 7358 { 7359 // fast skip head 7360 while (end - begin > 1 && *begin != *(begin + 1)) begin++; 7361 7362 if (begin == end) return begin; 7363 7364 // last written element 7365 I write = begin++; 7366 7367 // merge unique elements 7368 while (begin != end) 7369 { 7370 if (*begin != *write) 7371 *++write = *begin++; 7372 else 7373 begin++; 7374 } 7375 7376 // past-the-end (write points to live element) 7377 return write + 1; 7378 } 7379 insertion_sort(T * begin,T * end,const Pred & pred)7380 template <typename T, typename Pred> void insertion_sort(T* begin, T* end, const Pred& pred) 7381 { 7382 if (begin == end) 7383 return; 7384 7385 for (T* it = begin + 1; it != end; ++it) 7386 { 7387 T val = *it; 7388 T* hole = it; 7389 7390 // move hole backwards 7391 while (hole > begin && pred(val, *(hole - 1))) 7392 { 7393 *hole = *(hole - 1); 7394 hole--; 7395 } 7396 7397 // fill hole with element 7398 *hole = val; 7399 } 7400 } 7401 7402 template <typename I, typename Pred> I median3(I first, I middle, I last, const Pred& pred) 7403 { 7404 if (pred(*middle, *first)) swap(middle, first); 7405 if (pred(*last, *middle)) swap(last, middle); 7406 if (pred(*middle, *first)) swap(middle, first); 7407 7408 return middle; 7409 } 7410 partition3(T * begin,T * end,T pivot,const Pred & pred,T ** out_eqbeg,T ** out_eqend)7411 template <typename T, typename Pred> void partition3(T* begin, T* end, T pivot, const Pred& pred, T** out_eqbeg, T** out_eqend) 7412 { 7413 // invariant: array is split into 4 groups: = < ? > (each variable denotes the boundary between the groups) 7414 T* eq = begin; 7415 T* lt = begin; 7416 T* gt = end; 7417 7418 while (lt < gt) 7419 { 7420 if (pred(*lt, pivot)) 7421 lt++; 7422 else if (*lt == pivot) 7423 swap(*eq++, *lt++); 7424 else 7425 swap(*lt, *--gt); 7426 } 7427 7428 // we now have just 4 groups: = < >; move equal elements to the middle 7429 T* eqbeg = gt; 7430 7431 for (T* it = begin; it != eq; ++it) 7432 swap(*it, *--eqbeg); 7433 7434 *out_eqbeg = eqbeg; 7435 *out_eqend = gt; 7436 } 7437 sort(I begin,I end,const Pred & pred)7438 template <typename I, typename Pred> void sort(I begin, I end, const Pred& pred) 7439 { 7440 // sort large chunks 7441 while (end - begin > 16) 7442 { 7443 // find median element 7444 I middle = begin + (end - begin) / 2; 7445 I median = median3(begin, middle, end - 1, pred); 7446 7447 // partition in three chunks (< = >) 7448 I eqbeg, eqend; 7449 partition3(begin, end, *median, pred, &eqbeg, &eqend); 7450 7451 // loop on larger half 7452 if (eqbeg - begin > end - eqend) 7453 { 7454 sort(eqend, end, pred); 7455 end = eqbeg; 7456 } 7457 else 7458 { 7459 sort(begin, eqbeg, pred); 7460 begin = eqend; 7461 } 7462 } 7463 7464 // insertion sort small chunk 7465 insertion_sort(begin, end, pred); 7466 } 7467 PUGI__NS_END 7468 7469 // Allocator used for AST and evaluation stacks 7470 PUGI__NS_BEGIN 7471 static const size_t xpath_memory_page_size = 7472 #ifdef PUGIXML_MEMORY_XPATH_PAGE_SIZE 7473 PUGIXML_MEMORY_XPATH_PAGE_SIZE 7474 #else 7475 4096 7476 #endif 7477 ; 7478 7479 static const uintptr_t xpath_memory_block_alignment = sizeof(double) > sizeof(void*) ? sizeof(double) : sizeof(void*); 7480 7481 struct xpath_memory_block 7482 { 7483 xpath_memory_block* next; 7484 size_t capacity; 7485 7486 union 7487 { 7488 char data[xpath_memory_page_size]; 7489 double alignment; 7490 }; 7491 }; 7492 7493 struct xpath_allocator 7494 { 7495 xpath_memory_block* _root; 7496 size_t _root_size; 7497 bool* _error; 7498 xpath_allocatorxpath_allocator7499 xpath_allocator(xpath_memory_block* root, bool* error = 0): _root(root), _root_size(0), _error(error) 7500 { 7501 } 7502 allocatexpath_allocator7503 void* allocate(size_t size) 7504 { 7505 // round size up to block alignment boundary 7506 size = (size + xpath_memory_block_alignment - 1) & ~(xpath_memory_block_alignment - 1); 7507 7508 if (_root_size + size <= _root->capacity) 7509 { 7510 void* buf = &_root->data[0] + _root_size; 7511 _root_size += size; 7512 return buf; 7513 } 7514 else 7515 { 7516 // make sure we have at least 1/4th of the page free after allocation to satisfy subsequent allocation requests 7517 size_t block_capacity_base = sizeof(_root->data); 7518 size_t block_capacity_req = size + block_capacity_base / 4; 7519 size_t block_capacity = (block_capacity_base > block_capacity_req) ? block_capacity_base : block_capacity_req; 7520 7521 size_t block_size = block_capacity + offsetof(xpath_memory_block, data); 7522 7523 xpath_memory_block* block = static_cast<xpath_memory_block*>(xml_memory::allocate(block_size)); 7524 if (!block) 7525 { 7526 if (_error) *_error = true; 7527 return 0; 7528 } 7529 7530 block->next = _root; 7531 block->capacity = block_capacity; 7532 7533 _root = block; 7534 _root_size = size; 7535 7536 return block->data; 7537 } 7538 } 7539 reallocatexpath_allocator7540 void* reallocate(void* ptr, size_t old_size, size_t new_size) 7541 { 7542 // round size up to block alignment boundary 7543 old_size = (old_size + xpath_memory_block_alignment - 1) & ~(xpath_memory_block_alignment - 1); 7544 new_size = (new_size + xpath_memory_block_alignment - 1) & ~(xpath_memory_block_alignment - 1); 7545 7546 // we can only reallocate the last object 7547 assert(ptr == 0 || static_cast<char*>(ptr) + old_size == &_root->data[0] + _root_size); 7548 7549 // try to reallocate the object inplace 7550 if (ptr && _root_size - old_size + new_size <= _root->capacity) 7551 { 7552 _root_size = _root_size - old_size + new_size; 7553 return ptr; 7554 } 7555 7556 // allocate a new block 7557 void* result = allocate(new_size); 7558 if (!result) return 0; 7559 7560 // we have a new block 7561 if (ptr) 7562 { 7563 // copy old data (we only support growing) 7564 assert(new_size >= old_size); 7565 memcpy(result, ptr, old_size); 7566 7567 // free the previous page if it had no other objects 7568 assert(_root->data == result); 7569 assert(_root->next); 7570 7571 if (_root->next->data == ptr) 7572 { 7573 // deallocate the whole page, unless it was the first one 7574 xpath_memory_block* next = _root->next->next; 7575 7576 if (next) 7577 { 7578 xml_memory::deallocate(_root->next); 7579 _root->next = next; 7580 } 7581 } 7582 } 7583 7584 return result; 7585 } 7586 revertxpath_allocator7587 void revert(const xpath_allocator& state) 7588 { 7589 // free all new pages 7590 xpath_memory_block* cur = _root; 7591 7592 while (cur != state._root) 7593 { 7594 xpath_memory_block* next = cur->next; 7595 7596 xml_memory::deallocate(cur); 7597 7598 cur = next; 7599 } 7600 7601 // restore state 7602 _root = state._root; 7603 _root_size = state._root_size; 7604 } 7605 releasexpath_allocator7606 void release() 7607 { 7608 xpath_memory_block* cur = _root; 7609 assert(cur); 7610 7611 while (cur->next) 7612 { 7613 xpath_memory_block* next = cur->next; 7614 7615 xml_memory::deallocate(cur); 7616 7617 cur = next; 7618 } 7619 } 7620 }; 7621 7622 struct xpath_allocator_capture 7623 { xpath_allocator_capturexpath_allocator_capture7624 xpath_allocator_capture(xpath_allocator* alloc): _target(alloc), _state(*alloc) 7625 { 7626 } 7627 ~xpath_allocator_capturexpath_allocator_capture7628 ~xpath_allocator_capture() 7629 { 7630 _target->revert(_state); 7631 } 7632 7633 xpath_allocator* _target; 7634 xpath_allocator _state; 7635 }; 7636 7637 struct xpath_stack 7638 { 7639 xpath_allocator* result; 7640 xpath_allocator* temp; 7641 }; 7642 7643 struct xpath_stack_data 7644 { 7645 xpath_memory_block blocks[2]; 7646 xpath_allocator result; 7647 xpath_allocator temp; 7648 xpath_stack stack; 7649 bool oom; 7650 xpath_stack_dataxpath_stack_data7651 xpath_stack_data(): result(blocks + 0, &oom), temp(blocks + 1, &oom), oom(false) 7652 { 7653 blocks[0].next = blocks[1].next = 0; 7654 blocks[0].capacity = blocks[1].capacity = sizeof(blocks[0].data); 7655 7656 stack.result = &result; 7657 stack.temp = &temp; 7658 } 7659 ~xpath_stack_dataxpath_stack_data7660 ~xpath_stack_data() 7661 { 7662 result.release(); 7663 temp.release(); 7664 } 7665 }; 7666 PUGI__NS_END 7667 7668 // String class 7669 PUGI__NS_BEGIN 7670 class xpath_string 7671 { 7672 const char_t* _buffer; 7673 bool _uses_heap; 7674 size_t _length_heap; 7675 duplicate_string(const char_t * string,size_t length,xpath_allocator * alloc)7676 static char_t* duplicate_string(const char_t* string, size_t length, xpath_allocator* alloc) 7677 { 7678 char_t* result = static_cast<char_t*>(alloc->allocate((length + 1) * sizeof(char_t))); 7679 if (!result) return 0; 7680 7681 memcpy(result, string, length * sizeof(char_t)); 7682 result[length] = 0; 7683 7684 return result; 7685 } 7686 xpath_string(const char_t * buffer,bool uses_heap_,size_t length_heap)7687 xpath_string(const char_t* buffer, bool uses_heap_, size_t length_heap): _buffer(buffer), _uses_heap(uses_heap_), _length_heap(length_heap) 7688 { 7689 } 7690 7691 public: from_const(const char_t * str)7692 static xpath_string from_const(const char_t* str) 7693 { 7694 return xpath_string(str, false, 0); 7695 } 7696 from_heap_preallocated(const char_t * begin,const char_t * end)7697 static xpath_string from_heap_preallocated(const char_t* begin, const char_t* end) 7698 { 7699 assert(begin <= end && *end == 0); 7700 7701 return xpath_string(begin, true, static_cast<size_t>(end - begin)); 7702 } 7703 from_heap(const char_t * begin,const char_t * end,xpath_allocator * alloc)7704 static xpath_string from_heap(const char_t* begin, const char_t* end, xpath_allocator* alloc) 7705 { 7706 assert(begin <= end); 7707 7708 if (begin == end) 7709 return xpath_string(); 7710 7711 size_t length = static_cast<size_t>(end - begin); 7712 const char_t* data = duplicate_string(begin, length, alloc); 7713 7714 return data ? xpath_string(data, true, length) : xpath_string(); 7715 } 7716 xpath_string()7717 xpath_string(): _buffer(PUGIXML_TEXT("")), _uses_heap(false), _length_heap(0) 7718 { 7719 } 7720 append(const xpath_string & o,xpath_allocator * alloc)7721 void append(const xpath_string& o, xpath_allocator* alloc) 7722 { 7723 // skip empty sources 7724 if (!*o._buffer) return; 7725 7726 // fast append for constant empty target and constant source 7727 if (!*_buffer && !_uses_heap && !o._uses_heap) 7728 { 7729 _buffer = o._buffer; 7730 } 7731 else 7732 { 7733 // need to make heap copy 7734 size_t target_length = length(); 7735 size_t source_length = o.length(); 7736 size_t result_length = target_length + source_length; 7737 7738 // allocate new buffer 7739 char_t* result = static_cast<char_t*>(alloc->reallocate(_uses_heap ? const_cast<char_t*>(_buffer) : 0, (target_length + 1) * sizeof(char_t), (result_length + 1) * sizeof(char_t))); 7740 if (!result) return; 7741 7742 // append first string to the new buffer in case there was no reallocation 7743 if (!_uses_heap) memcpy(result, _buffer, target_length * sizeof(char_t)); 7744 7745 // append second string to the new buffer 7746 memcpy(result + target_length, o._buffer, source_length * sizeof(char_t)); 7747 result[result_length] = 0; 7748 7749 // finalize 7750 _buffer = result; 7751 _uses_heap = true; 7752 _length_heap = result_length; 7753 } 7754 } 7755 c_str() const7756 const char_t* c_str() const 7757 { 7758 return _buffer; 7759 } 7760 length() const7761 size_t length() const 7762 { 7763 return _uses_heap ? _length_heap : strlength(_buffer); 7764 } 7765 data(xpath_allocator * alloc)7766 char_t* data(xpath_allocator* alloc) 7767 { 7768 // make private heap copy 7769 if (!_uses_heap) 7770 { 7771 size_t length_ = strlength(_buffer); 7772 const char_t* data_ = duplicate_string(_buffer, length_, alloc); 7773 7774 if (!data_) return 0; 7775 7776 _buffer = data_; 7777 _uses_heap = true; 7778 _length_heap = length_; 7779 } 7780 7781 return const_cast<char_t*>(_buffer); 7782 } 7783 empty() const7784 bool empty() const 7785 { 7786 return *_buffer == 0; 7787 } 7788 operator ==(const xpath_string & o) const7789 bool operator==(const xpath_string& o) const 7790 { 7791 return strequal(_buffer, o._buffer); 7792 } 7793 operator !=(const xpath_string & o) const7794 bool operator!=(const xpath_string& o) const 7795 { 7796 return !strequal(_buffer, o._buffer); 7797 } 7798 uses_heap() const7799 bool uses_heap() const 7800 { 7801 return _uses_heap; 7802 } 7803 }; 7804 PUGI__NS_END 7805 7806 PUGI__NS_BEGIN starts_with(const char_t * string,const char_t * pattern)7807 PUGI__FN bool starts_with(const char_t* string, const char_t* pattern) 7808 { 7809 while (*pattern && *string == *pattern) 7810 { 7811 string++; 7812 pattern++; 7813 } 7814 7815 return *pattern == 0; 7816 } 7817 find_char(const char_t * s,char_t c)7818 PUGI__FN const char_t* find_char(const char_t* s, char_t c) 7819 { 7820 #ifdef PUGIXML_WCHAR_MODE 7821 return wcschr(s, c); 7822 #else 7823 return strchr(s, c); 7824 #endif 7825 } 7826 find_substring(const char_t * s,const char_t * p)7827 PUGI__FN const char_t* find_substring(const char_t* s, const char_t* p) 7828 { 7829 #ifdef PUGIXML_WCHAR_MODE 7830 // MSVC6 wcsstr bug workaround (if s is empty it always returns 0) 7831 return (*p == 0) ? s : wcsstr(s, p); 7832 #else 7833 return strstr(s, p); 7834 #endif 7835 } 7836 7837 // Converts symbol to lower case, if it is an ASCII one tolower_ascii(char_t ch)7838 PUGI__FN char_t tolower_ascii(char_t ch) 7839 { 7840 return static_cast<unsigned int>(ch - 'A') < 26 ? static_cast<char_t>(ch | ' ') : ch; 7841 } 7842 string_value(const xpath_node & na,xpath_allocator * alloc)7843 PUGI__FN xpath_string string_value(const xpath_node& na, xpath_allocator* alloc) 7844 { 7845 if (na.attribute()) 7846 return xpath_string::from_const(na.attribute().value()); 7847 else 7848 { 7849 xml_node n = na.node(); 7850 7851 switch (n.type()) 7852 { 7853 case node_pcdata: 7854 case node_cdata: 7855 case node_comment: 7856 case node_pi: 7857 return xpath_string::from_const(n.value()); 7858 7859 case node_document: 7860 case node_element: 7861 { 7862 xpath_string result; 7863 7864 // element nodes can have value if parse_embed_pcdata was used 7865 if (n.value()[0]) 7866 result.append(xpath_string::from_const(n.value()), alloc); 7867 7868 xml_node cur = n.first_child(); 7869 7870 while (cur && cur != n) 7871 { 7872 if (cur.type() == node_pcdata || cur.type() == node_cdata) 7873 result.append(xpath_string::from_const(cur.value()), alloc); 7874 7875 if (cur.first_child()) 7876 cur = cur.first_child(); 7877 else if (cur.next_sibling()) 7878 cur = cur.next_sibling(); 7879 else 7880 { 7881 while (!cur.next_sibling() && cur != n) 7882 cur = cur.parent(); 7883 7884 if (cur != n) cur = cur.next_sibling(); 7885 } 7886 } 7887 7888 return result; 7889 } 7890 7891 default: 7892 return xpath_string(); 7893 } 7894 } 7895 } 7896 node_is_before_sibling(xml_node_struct * ln,xml_node_struct * rn)7897 PUGI__FN bool node_is_before_sibling(xml_node_struct* ln, xml_node_struct* rn) 7898 { 7899 assert(ln->parent == rn->parent); 7900 7901 // there is no common ancestor (the shared parent is null), nodes are from different documents 7902 if (!ln->parent) return ln < rn; 7903 7904 // determine sibling order 7905 xml_node_struct* ls = ln; 7906 xml_node_struct* rs = rn; 7907 7908 while (ls && rs) 7909 { 7910 if (ls == rn) return true; 7911 if (rs == ln) return false; 7912 7913 ls = ls->next_sibling; 7914 rs = rs->next_sibling; 7915 } 7916 7917 // if rn sibling chain ended ln must be before rn 7918 return !rs; 7919 } 7920 node_is_before(xml_node_struct * ln,xml_node_struct * rn)7921 PUGI__FN bool node_is_before(xml_node_struct* ln, xml_node_struct* rn) 7922 { 7923 // find common ancestor at the same depth, if any 7924 xml_node_struct* lp = ln; 7925 xml_node_struct* rp = rn; 7926 7927 while (lp && rp && lp->parent != rp->parent) 7928 { 7929 lp = lp->parent; 7930 rp = rp->parent; 7931 } 7932 7933 // parents are the same! 7934 if (lp && rp) return node_is_before_sibling(lp, rp); 7935 7936 // nodes are at different depths, need to normalize heights 7937 bool left_higher = !lp; 7938 7939 while (lp) 7940 { 7941 lp = lp->parent; 7942 ln = ln->parent; 7943 } 7944 7945 while (rp) 7946 { 7947 rp = rp->parent; 7948 rn = rn->parent; 7949 } 7950 7951 // one node is the ancestor of the other 7952 if (ln == rn) return left_higher; 7953 7954 // find common ancestor... again 7955 while (ln->parent != rn->parent) 7956 { 7957 ln = ln->parent; 7958 rn = rn->parent; 7959 } 7960 7961 return node_is_before_sibling(ln, rn); 7962 } 7963 node_is_ancestor(xml_node_struct * parent,xml_node_struct * node)7964 PUGI__FN bool node_is_ancestor(xml_node_struct* parent, xml_node_struct* node) 7965 { 7966 while (node && node != parent) node = node->parent; 7967 7968 return parent && node == parent; 7969 } 7970 document_buffer_order(const xpath_node & xnode)7971 PUGI__FN const void* document_buffer_order(const xpath_node& xnode) 7972 { 7973 xml_node_struct* node = xnode.node().internal_object(); 7974 7975 if (node) 7976 { 7977 if ((get_document(node).header & xml_memory_page_contents_shared_mask) == 0) 7978 { 7979 if (node->name && (node->header & impl::xml_memory_page_name_allocated_or_shared_mask) == 0) return node->name; 7980 if (node->value && (node->header & impl::xml_memory_page_value_allocated_or_shared_mask) == 0) return node->value; 7981 } 7982 7983 return 0; 7984 } 7985 7986 xml_attribute_struct* attr = xnode.attribute().internal_object(); 7987 7988 if (attr) 7989 { 7990 if ((get_document(attr).header & xml_memory_page_contents_shared_mask) == 0) 7991 { 7992 if ((attr->header & impl::xml_memory_page_name_allocated_or_shared_mask) == 0) return attr->name; 7993 if ((attr->header & impl::xml_memory_page_value_allocated_or_shared_mask) == 0) return attr->value; 7994 } 7995 7996 return 0; 7997 } 7998 7999 return 0; 8000 } 8001 8002 struct document_order_comparator 8003 { operator ()document_order_comparator8004 bool operator()(const xpath_node& lhs, const xpath_node& rhs) const 8005 { 8006 // optimized document order based check 8007 const void* lo = document_buffer_order(lhs); 8008 const void* ro = document_buffer_order(rhs); 8009 8010 if (lo && ro) return lo < ro; 8011 8012 // slow comparison 8013 xml_node ln = lhs.node(), rn = rhs.node(); 8014 8015 // compare attributes 8016 if (lhs.attribute() && rhs.attribute()) 8017 { 8018 // shared parent 8019 if (lhs.parent() == rhs.parent()) 8020 { 8021 // determine sibling order 8022 for (xml_attribute a = lhs.attribute(); a; a = a.next_attribute()) 8023 if (a == rhs.attribute()) 8024 return true; 8025 8026 return false; 8027 } 8028 8029 // compare attribute parents 8030 ln = lhs.parent(); 8031 rn = rhs.parent(); 8032 } 8033 else if (lhs.attribute()) 8034 { 8035 // attributes go after the parent element 8036 if (lhs.parent() == rhs.node()) return false; 8037 8038 ln = lhs.parent(); 8039 } 8040 else if (rhs.attribute()) 8041 { 8042 // attributes go after the parent element 8043 if (rhs.parent() == lhs.node()) return true; 8044 8045 rn = rhs.parent(); 8046 } 8047 8048 if (ln == rn) return false; 8049 8050 if (!ln || !rn) return ln < rn; 8051 8052 return node_is_before(ln.internal_object(), rn.internal_object()); 8053 } 8054 }; 8055 8056 struct duplicate_comparator 8057 { operator ()duplicate_comparator8058 bool operator()(const xpath_node& lhs, const xpath_node& rhs) const 8059 { 8060 if (lhs.attribute()) return rhs.attribute() ? lhs.attribute() < rhs.attribute() : true; 8061 else return rhs.attribute() ? false : lhs.node() < rhs.node(); 8062 } 8063 }; 8064 gen_nan()8065 PUGI__FN double gen_nan() 8066 { 8067 #if defined(__STDC_IEC_559__) || ((FLT_RADIX - 0 == 2) && (FLT_MAX_EXP - 0 == 128) && (FLT_MANT_DIG - 0 == 24)) 8068 PUGI__STATIC_ASSERT(sizeof(float) == sizeof(uint32_t)); 8069 typedef uint32_t UI; // BCC5 workaround 8070 union { float f; UI i; } u; 8071 u.i = 0x7fc00000; 8072 return u.f; 8073 #else 8074 // fallback 8075 const volatile double zero = 0.0; 8076 return zero / zero; 8077 #endif 8078 } 8079 is_nan(double value)8080 PUGI__FN bool is_nan(double value) 8081 { 8082 #if defined(PUGI__MSVC_CRT_VERSION) || defined(__BORLANDC__) 8083 return !!_isnan(value); 8084 #elif defined(fpclassify) && defined(FP_NAN) 8085 return fpclassify(value) == FP_NAN; 8086 #else 8087 // fallback 8088 const volatile double v = value; 8089 return v != v; 8090 #endif 8091 } 8092 convert_number_to_string_special(double value)8093 PUGI__FN const char_t* convert_number_to_string_special(double value) 8094 { 8095 #if defined(PUGI__MSVC_CRT_VERSION) || defined(__BORLANDC__) 8096 if (_finite(value)) return (value == 0) ? PUGIXML_TEXT("0") : 0; 8097 if (_isnan(value)) return PUGIXML_TEXT("NaN"); 8098 return value > 0 ? PUGIXML_TEXT("Infinity") : PUGIXML_TEXT("-Infinity"); 8099 #elif defined(fpclassify) && defined(FP_NAN) && defined(FP_INFINITE) && defined(FP_ZERO) 8100 switch (fpclassify(value)) 8101 { 8102 case FP_NAN: 8103 return PUGIXML_TEXT("NaN"); 8104 8105 case FP_INFINITE: 8106 return value > 0 ? PUGIXML_TEXT("Infinity") : PUGIXML_TEXT("-Infinity"); 8107 8108 case FP_ZERO: 8109 return PUGIXML_TEXT("0"); 8110 8111 default: 8112 return 0; 8113 } 8114 #else 8115 // fallback 8116 const volatile double v = value; 8117 8118 if (v == 0) return PUGIXML_TEXT("0"); 8119 if (v != v) return PUGIXML_TEXT("NaN"); 8120 if (v * 2 == v) return value > 0 ? PUGIXML_TEXT("Infinity") : PUGIXML_TEXT("-Infinity"); 8121 return 0; 8122 #endif 8123 } 8124 convert_number_to_boolean(double value)8125 PUGI__FN bool convert_number_to_boolean(double value) 8126 { 8127 return (value != 0 && !is_nan(value)); 8128 } 8129 truncate_zeros(char * begin,char * end)8130 PUGI__FN void truncate_zeros(char* begin, char* end) 8131 { 8132 while (begin != end && end[-1] == '0') end--; 8133 8134 *end = 0; 8135 } 8136 8137 // gets mantissa digits in the form of 0.xxxxx with 0. implied and the exponent 8138 #if defined(PUGI__MSVC_CRT_VERSION) && PUGI__MSVC_CRT_VERSION >= 1400 && !defined(_WIN32_WCE) convert_number_to_mantissa_exponent(double value,char (& buffer)[32],char ** out_mantissa,int * out_exponent)8139 PUGI__FN void convert_number_to_mantissa_exponent(double value, char (&buffer)[32], char** out_mantissa, int* out_exponent) 8140 { 8141 // get base values 8142 int sign, exponent; 8143 _ecvt_s(buffer, sizeof(buffer), value, DBL_DIG + 1, &exponent, &sign); 8144 8145 // truncate redundant zeros 8146 truncate_zeros(buffer, buffer + strlen(buffer)); 8147 8148 // fill results 8149 *out_mantissa = buffer; 8150 *out_exponent = exponent; 8151 } 8152 #else convert_number_to_mantissa_exponent(double value,char (& buffer)[32],char ** out_mantissa,int * out_exponent)8153 PUGI__FN void convert_number_to_mantissa_exponent(double value, char (&buffer)[32], char** out_mantissa, int* out_exponent) 8154 { 8155 // get a scientific notation value with IEEE DBL_DIG decimals 8156 PUGI__SNPRINTF(buffer, "%.*e", DBL_DIG, value); 8157 8158 // get the exponent (possibly negative) 8159 char* exponent_string = strchr(buffer, 'e'); 8160 assert(exponent_string); 8161 8162 int exponent = atoi(exponent_string + 1); 8163 8164 // extract mantissa string: skip sign 8165 char* mantissa = buffer[0] == '-' ? buffer + 1 : buffer; 8166 assert(mantissa[0] != '0' && mantissa[1] == '.'); 8167 8168 // divide mantissa by 10 to eliminate integer part 8169 mantissa[1] = mantissa[0]; 8170 mantissa++; 8171 exponent++; 8172 8173 // remove extra mantissa digits and zero-terminate mantissa 8174 truncate_zeros(mantissa, exponent_string); 8175 8176 // fill results 8177 *out_mantissa = mantissa; 8178 *out_exponent = exponent; 8179 } 8180 #endif 8181 convert_number_to_string(double value,xpath_allocator * alloc)8182 PUGI__FN xpath_string convert_number_to_string(double value, xpath_allocator* alloc) 8183 { 8184 // try special number conversion 8185 const char_t* special = convert_number_to_string_special(value); 8186 if (special) return xpath_string::from_const(special); 8187 8188 // get mantissa + exponent form 8189 char mantissa_buffer[32]; 8190 8191 char* mantissa; 8192 int exponent; 8193 convert_number_to_mantissa_exponent(value, mantissa_buffer, &mantissa, &exponent); 8194 8195 // allocate a buffer of suitable length for the number 8196 size_t result_size = strlen(mantissa_buffer) + (exponent > 0 ? exponent : -exponent) + 4; 8197 char_t* result = static_cast<char_t*>(alloc->allocate(sizeof(char_t) * result_size)); 8198 if (!result) return xpath_string(); 8199 8200 // make the number! 8201 char_t* s = result; 8202 8203 // sign 8204 if (value < 0) *s++ = '-'; 8205 8206 // integer part 8207 if (exponent <= 0) 8208 { 8209 *s++ = '0'; 8210 } 8211 else 8212 { 8213 while (exponent > 0) 8214 { 8215 assert(*mantissa == 0 || static_cast<unsigned int>(*mantissa - '0') <= 9); 8216 *s++ = *mantissa ? *mantissa++ : '0'; 8217 exponent--; 8218 } 8219 } 8220 8221 // fractional part 8222 if (*mantissa) 8223 { 8224 // decimal point 8225 *s++ = '.'; 8226 8227 // extra zeroes from negative exponent 8228 while (exponent < 0) 8229 { 8230 *s++ = '0'; 8231 exponent++; 8232 } 8233 8234 // extra mantissa digits 8235 while (*mantissa) 8236 { 8237 assert(static_cast<unsigned int>(*mantissa - '0') <= 9); 8238 *s++ = *mantissa++; 8239 } 8240 } 8241 8242 // zero-terminate 8243 assert(s < result + result_size); 8244 *s = 0; 8245 8246 return xpath_string::from_heap_preallocated(result, s); 8247 } 8248 check_string_to_number_format(const char_t * string)8249 PUGI__FN bool check_string_to_number_format(const char_t* string) 8250 { 8251 // parse leading whitespace 8252 while (PUGI__IS_CHARTYPE(*string, ct_space)) ++string; 8253 8254 // parse sign 8255 if (*string == '-') ++string; 8256 8257 if (!*string) return false; 8258 8259 // if there is no integer part, there should be a decimal part with at least one digit 8260 if (!PUGI__IS_CHARTYPEX(string[0], ctx_digit) && (string[0] != '.' || !PUGI__IS_CHARTYPEX(string[1], ctx_digit))) return false; 8261 8262 // parse integer part 8263 while (PUGI__IS_CHARTYPEX(*string, ctx_digit)) ++string; 8264 8265 // parse decimal part 8266 if (*string == '.') 8267 { 8268 ++string; 8269 8270 while (PUGI__IS_CHARTYPEX(*string, ctx_digit)) ++string; 8271 } 8272 8273 // parse trailing whitespace 8274 while (PUGI__IS_CHARTYPE(*string, ct_space)) ++string; 8275 8276 return *string == 0; 8277 } 8278 convert_string_to_number(const char_t * string)8279 PUGI__FN double convert_string_to_number(const char_t* string) 8280 { 8281 // check string format 8282 if (!check_string_to_number_format(string)) return gen_nan(); 8283 8284 // parse string 8285 #ifdef PUGIXML_WCHAR_MODE 8286 return wcstod(string, 0); 8287 #else 8288 return strtod(string, 0); 8289 #endif 8290 } 8291 convert_string_to_number_scratch(char_t (& buffer)[32],const char_t * begin,const char_t * end,double * out_result)8292 PUGI__FN bool convert_string_to_number_scratch(char_t (&buffer)[32], const char_t* begin, const char_t* end, double* out_result) 8293 { 8294 size_t length = static_cast<size_t>(end - begin); 8295 char_t* scratch = buffer; 8296 8297 if (length >= sizeof(buffer) / sizeof(buffer[0])) 8298 { 8299 // need to make dummy on-heap copy 8300 scratch = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t))); 8301 if (!scratch) return false; 8302 } 8303 8304 // copy string to zero-terminated buffer and perform conversion 8305 memcpy(scratch, begin, length * sizeof(char_t)); 8306 scratch[length] = 0; 8307 8308 *out_result = convert_string_to_number(scratch); 8309 8310 // free dummy buffer 8311 if (scratch != buffer) xml_memory::deallocate(scratch); 8312 8313 return true; 8314 } 8315 round_nearest(double value)8316 PUGI__FN double round_nearest(double value) 8317 { 8318 return floor(value + 0.5); 8319 } 8320 round_nearest_nzero(double value)8321 PUGI__FN double round_nearest_nzero(double value) 8322 { 8323 // same as round_nearest, but returns -0 for [-0.5, -0] 8324 // ceil is used to differentiate between +0 and -0 (we return -0 for [-0.5, -0] and +0 for +0) 8325 return (value >= -0.5 && value <= 0) ? ceil(value) : floor(value + 0.5); 8326 } 8327 qualified_name(const xpath_node & node)8328 PUGI__FN const char_t* qualified_name(const xpath_node& node) 8329 { 8330 return node.attribute() ? node.attribute().name() : node.node().name(); 8331 } 8332 local_name(const xpath_node & node)8333 PUGI__FN const char_t* local_name(const xpath_node& node) 8334 { 8335 const char_t* name = qualified_name(node); 8336 const char_t* p = find_char(name, ':'); 8337 8338 return p ? p + 1 : name; 8339 } 8340 8341 struct namespace_uri_predicate 8342 { 8343 const char_t* prefix; 8344 size_t prefix_length; 8345 namespace_uri_predicatenamespace_uri_predicate8346 namespace_uri_predicate(const char_t* name) 8347 { 8348 const char_t* pos = find_char(name, ':'); 8349 8350 prefix = pos ? name : 0; 8351 prefix_length = pos ? static_cast<size_t>(pos - name) : 0; 8352 } 8353 operator ()namespace_uri_predicate8354 bool operator()(xml_attribute a) const 8355 { 8356 const char_t* name = a.name(); 8357 8358 if (!starts_with(name, PUGIXML_TEXT("xmlns"))) return false; 8359 8360 return prefix ? name[5] == ':' && strequalrange(name + 6, prefix, prefix_length) : name[5] == 0; 8361 } 8362 }; 8363 namespace_uri(xml_node node)8364 PUGI__FN const char_t* namespace_uri(xml_node node) 8365 { 8366 namespace_uri_predicate pred = node.name(); 8367 8368 xml_node p = node; 8369 8370 while (p) 8371 { 8372 xml_attribute a = p.find_attribute(pred); 8373 8374 if (a) return a.value(); 8375 8376 p = p.parent(); 8377 } 8378 8379 return PUGIXML_TEXT(""); 8380 } 8381 namespace_uri(xml_attribute attr,xml_node parent)8382 PUGI__FN const char_t* namespace_uri(xml_attribute attr, xml_node parent) 8383 { 8384 namespace_uri_predicate pred = attr.name(); 8385 8386 // Default namespace does not apply to attributes 8387 if (!pred.prefix) return PUGIXML_TEXT(""); 8388 8389 xml_node p = parent; 8390 8391 while (p) 8392 { 8393 xml_attribute a = p.find_attribute(pred); 8394 8395 if (a) return a.value(); 8396 8397 p = p.parent(); 8398 } 8399 8400 return PUGIXML_TEXT(""); 8401 } 8402 namespace_uri(const xpath_node & node)8403 PUGI__FN const char_t* namespace_uri(const xpath_node& node) 8404 { 8405 return node.attribute() ? namespace_uri(node.attribute(), node.parent()) : namespace_uri(node.node()); 8406 } 8407 normalize_space(char_t * buffer)8408 PUGI__FN char_t* normalize_space(char_t* buffer) 8409 { 8410 char_t* write = buffer; 8411 8412 for (char_t* it = buffer; *it; ) 8413 { 8414 char_t ch = *it++; 8415 8416 if (PUGI__IS_CHARTYPE(ch, ct_space)) 8417 { 8418 // replace whitespace sequence with single space 8419 while (PUGI__IS_CHARTYPE(*it, ct_space)) it++; 8420 8421 // avoid leading spaces 8422 if (write != buffer) *write++ = ' '; 8423 } 8424 else *write++ = ch; 8425 } 8426 8427 // remove trailing space 8428 if (write != buffer && PUGI__IS_CHARTYPE(write[-1], ct_space)) write--; 8429 8430 // zero-terminate 8431 *write = 0; 8432 8433 return write; 8434 } 8435 translate(char_t * buffer,const char_t * from,const char_t * to,size_t to_length)8436 PUGI__FN char_t* translate(char_t* buffer, const char_t* from, const char_t* to, size_t to_length) 8437 { 8438 char_t* write = buffer; 8439 8440 while (*buffer) 8441 { 8442 PUGI__DMC_VOLATILE char_t ch = *buffer++; 8443 8444 const char_t* pos = find_char(from, ch); 8445 8446 if (!pos) 8447 *write++ = ch; // do not process 8448 else if (static_cast<size_t>(pos - from) < to_length) 8449 *write++ = to[pos - from]; // replace 8450 } 8451 8452 // zero-terminate 8453 *write = 0; 8454 8455 return write; 8456 } 8457 translate_table_generate(xpath_allocator * alloc,const char_t * from,const char_t * to)8458 PUGI__FN unsigned char* translate_table_generate(xpath_allocator* alloc, const char_t* from, const char_t* to) 8459 { 8460 unsigned char table[128] = {0}; 8461 8462 while (*from) 8463 { 8464 unsigned int fc = static_cast<unsigned int>(*from); 8465 unsigned int tc = static_cast<unsigned int>(*to); 8466 8467 if (fc >= 128 || tc >= 128) 8468 return 0; 8469 8470 // code=128 means "skip character" 8471 if (!table[fc]) 8472 table[fc] = static_cast<unsigned char>(tc ? tc : 128); 8473 8474 from++; 8475 if (tc) to++; 8476 } 8477 8478 for (int i = 0; i < 128; ++i) 8479 if (!table[i]) 8480 table[i] = static_cast<unsigned char>(i); 8481 8482 void* result = alloc->allocate(sizeof(table)); 8483 if (!result) return 0; 8484 8485 memcpy(result, table, sizeof(table)); 8486 8487 return static_cast<unsigned char*>(result); 8488 } 8489 translate_table(char_t * buffer,const unsigned char * table)8490 PUGI__FN char_t* translate_table(char_t* buffer, const unsigned char* table) 8491 { 8492 char_t* write = buffer; 8493 8494 while (*buffer) 8495 { 8496 char_t ch = *buffer++; 8497 unsigned int index = static_cast<unsigned int>(ch); 8498 8499 if (index < 128) 8500 { 8501 unsigned char code = table[index]; 8502 8503 // code=128 means "skip character" (table size is 128 so 128 can be a special value) 8504 // this code skips these characters without extra branches 8505 *write = static_cast<char_t>(code); 8506 write += 1 - (code >> 7); 8507 } 8508 else 8509 { 8510 *write++ = ch; 8511 } 8512 } 8513 8514 // zero-terminate 8515 *write = 0; 8516 8517 return write; 8518 } 8519 is_xpath_attribute(const char_t * name)8520 inline bool is_xpath_attribute(const char_t* name) 8521 { 8522 return !(starts_with(name, PUGIXML_TEXT("xmlns")) && (name[5] == 0 || name[5] == ':')); 8523 } 8524 8525 struct xpath_variable_boolean: xpath_variable 8526 { xpath_variable_booleanxpath_variable_boolean8527 xpath_variable_boolean(): xpath_variable(xpath_type_boolean), value(false) 8528 { 8529 } 8530 8531 bool value; 8532 char_t name[1]; 8533 }; 8534 8535 struct xpath_variable_number: xpath_variable 8536 { xpath_variable_numberxpath_variable_number8537 xpath_variable_number(): xpath_variable(xpath_type_number), value(0) 8538 { 8539 } 8540 8541 double value; 8542 char_t name[1]; 8543 }; 8544 8545 struct xpath_variable_string: xpath_variable 8546 { xpath_variable_stringxpath_variable_string8547 xpath_variable_string(): xpath_variable(xpath_type_string), value(0) 8548 { 8549 } 8550 ~xpath_variable_stringxpath_variable_string8551 ~xpath_variable_string() 8552 { 8553 if (value) xml_memory::deallocate(value); 8554 } 8555 8556 char_t* value; 8557 char_t name[1]; 8558 }; 8559 8560 struct xpath_variable_node_set: xpath_variable 8561 { xpath_variable_node_setxpath_variable_node_set8562 xpath_variable_node_set(): xpath_variable(xpath_type_node_set) 8563 { 8564 } 8565 8566 xpath_node_set value; 8567 char_t name[1]; 8568 }; 8569 8570 static const xpath_node_set dummy_node_set; 8571 hash_string(const char_t * str)8572 PUGI__FN PUGI__UNSIGNED_OVERFLOW unsigned int hash_string(const char_t* str) 8573 { 8574 // Jenkins one-at-a-time hash (http://en.wikipedia.org/wiki/Jenkins_hash_function#one-at-a-time) 8575 unsigned int result = 0; 8576 8577 while (*str) 8578 { 8579 result += static_cast<unsigned int>(*str++); 8580 result += result << 10; 8581 result ^= result >> 6; 8582 } 8583 8584 result += result << 3; 8585 result ^= result >> 11; 8586 result += result << 15; 8587 8588 return result; 8589 } 8590 new_xpath_variable(const char_t * name)8591 template <typename T> PUGI__FN T* new_xpath_variable(const char_t* name) 8592 { 8593 size_t length = strlength(name); 8594 if (length == 0) return 0; // empty variable names are invalid 8595 8596 // $$ we can't use offsetof(T, name) because T is non-POD, so we just allocate additional length characters 8597 void* memory = xml_memory::allocate(sizeof(T) + length * sizeof(char_t)); 8598 if (!memory) return 0; 8599 8600 T* result = new (memory) T(); 8601 8602 memcpy(result->name, name, (length + 1) * sizeof(char_t)); 8603 8604 return result; 8605 } 8606 new_xpath_variable(xpath_value_type type,const char_t * name)8607 PUGI__FN xpath_variable* new_xpath_variable(xpath_value_type type, const char_t* name) 8608 { 8609 switch (type) 8610 { 8611 case xpath_type_node_set: 8612 return new_xpath_variable<xpath_variable_node_set>(name); 8613 8614 case xpath_type_number: 8615 return new_xpath_variable<xpath_variable_number>(name); 8616 8617 case xpath_type_string: 8618 return new_xpath_variable<xpath_variable_string>(name); 8619 8620 case xpath_type_boolean: 8621 return new_xpath_variable<xpath_variable_boolean>(name); 8622 8623 default: 8624 return 0; 8625 } 8626 } 8627 delete_xpath_variable(T * var)8628 template <typename T> PUGI__FN void delete_xpath_variable(T* var) 8629 { 8630 var->~T(); 8631 xml_memory::deallocate(var); 8632 } 8633 delete_xpath_variable(xpath_value_type type,xpath_variable * var)8634 PUGI__FN void delete_xpath_variable(xpath_value_type type, xpath_variable* var) 8635 { 8636 switch (type) 8637 { 8638 case xpath_type_node_set: 8639 delete_xpath_variable(static_cast<xpath_variable_node_set*>(var)); 8640 break; 8641 8642 case xpath_type_number: 8643 delete_xpath_variable(static_cast<xpath_variable_number*>(var)); 8644 break; 8645 8646 case xpath_type_string: 8647 delete_xpath_variable(static_cast<xpath_variable_string*>(var)); 8648 break; 8649 8650 case xpath_type_boolean: 8651 delete_xpath_variable(static_cast<xpath_variable_boolean*>(var)); 8652 break; 8653 8654 default: 8655 assert(false && "Invalid variable type"); // unreachable 8656 } 8657 } 8658 copy_xpath_variable(xpath_variable * lhs,const xpath_variable * rhs)8659 PUGI__FN bool copy_xpath_variable(xpath_variable* lhs, const xpath_variable* rhs) 8660 { 8661 switch (rhs->type()) 8662 { 8663 case xpath_type_node_set: 8664 return lhs->set(static_cast<const xpath_variable_node_set*>(rhs)->value); 8665 8666 case xpath_type_number: 8667 return lhs->set(static_cast<const xpath_variable_number*>(rhs)->value); 8668 8669 case xpath_type_string: 8670 return lhs->set(static_cast<const xpath_variable_string*>(rhs)->value); 8671 8672 case xpath_type_boolean: 8673 return lhs->set(static_cast<const xpath_variable_boolean*>(rhs)->value); 8674 8675 default: 8676 assert(false && "Invalid variable type"); // unreachable 8677 return false; 8678 } 8679 } 8680 get_variable_scratch(char_t (& buffer)[32],xpath_variable_set * set,const char_t * begin,const char_t * end,xpath_variable ** out_result)8681 PUGI__FN bool get_variable_scratch(char_t (&buffer)[32], xpath_variable_set* set, const char_t* begin, const char_t* end, xpath_variable** out_result) 8682 { 8683 size_t length = static_cast<size_t>(end - begin); 8684 char_t* scratch = buffer; 8685 8686 if (length >= sizeof(buffer) / sizeof(buffer[0])) 8687 { 8688 // need to make dummy on-heap copy 8689 scratch = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t))); 8690 if (!scratch) return false; 8691 } 8692 8693 // copy string to zero-terminated buffer and perform lookup 8694 memcpy(scratch, begin, length * sizeof(char_t)); 8695 scratch[length] = 0; 8696 8697 *out_result = set->get(scratch); 8698 8699 // free dummy buffer 8700 if (scratch != buffer) xml_memory::deallocate(scratch); 8701 8702 return true; 8703 } 8704 PUGI__NS_END 8705 8706 // Internal node set class 8707 PUGI__NS_BEGIN xpath_get_order(const xpath_node * begin,const xpath_node * end)8708 PUGI__FN xpath_node_set::type_t xpath_get_order(const xpath_node* begin, const xpath_node* end) 8709 { 8710 if (end - begin < 2) 8711 return xpath_node_set::type_sorted; 8712 8713 document_order_comparator cmp; 8714 8715 bool first = cmp(begin[0], begin[1]); 8716 8717 for (const xpath_node* it = begin + 1; it + 1 < end; ++it) 8718 if (cmp(it[0], it[1]) != first) 8719 return xpath_node_set::type_unsorted; 8720 8721 return first ? xpath_node_set::type_sorted : xpath_node_set::type_sorted_reverse; 8722 } 8723 xpath_sort(xpath_node * begin,xpath_node * end,xpath_node_set::type_t type,bool rev)8724 PUGI__FN xpath_node_set::type_t xpath_sort(xpath_node* begin, xpath_node* end, xpath_node_set::type_t type, bool rev) 8725 { 8726 xpath_node_set::type_t order = rev ? xpath_node_set::type_sorted_reverse : xpath_node_set::type_sorted; 8727 8728 if (type == xpath_node_set::type_unsorted) 8729 { 8730 xpath_node_set::type_t sorted = xpath_get_order(begin, end); 8731 8732 if (sorted == xpath_node_set::type_unsorted) 8733 { 8734 sort(begin, end, document_order_comparator()); 8735 8736 type = xpath_node_set::type_sorted; 8737 } 8738 else 8739 type = sorted; 8740 } 8741 8742 if (type != order) reverse(begin, end); 8743 8744 return order; 8745 } 8746 xpath_first(const xpath_node * begin,const xpath_node * end,xpath_node_set::type_t type)8747 PUGI__FN xpath_node xpath_first(const xpath_node* begin, const xpath_node* end, xpath_node_set::type_t type) 8748 { 8749 if (begin == end) return xpath_node(); 8750 8751 switch (type) 8752 { 8753 case xpath_node_set::type_sorted: 8754 return *begin; 8755 8756 case xpath_node_set::type_sorted_reverse: 8757 return *(end - 1); 8758 8759 case xpath_node_set::type_unsorted: 8760 return *min_element(begin, end, document_order_comparator()); 8761 8762 default: 8763 assert(false && "Invalid node set type"); // unreachable 8764 return xpath_node(); 8765 } 8766 } 8767 8768 class xpath_node_set_raw 8769 { 8770 xpath_node_set::type_t _type; 8771 8772 xpath_node* _begin; 8773 xpath_node* _end; 8774 xpath_node* _eos; 8775 8776 public: xpath_node_set_raw()8777 xpath_node_set_raw(): _type(xpath_node_set::type_unsorted), _begin(0), _end(0), _eos(0) 8778 { 8779 } 8780 begin() const8781 xpath_node* begin() const 8782 { 8783 return _begin; 8784 } 8785 end() const8786 xpath_node* end() const 8787 { 8788 return _end; 8789 } 8790 empty() const8791 bool empty() const 8792 { 8793 return _begin == _end; 8794 } 8795 size() const8796 size_t size() const 8797 { 8798 return static_cast<size_t>(_end - _begin); 8799 } 8800 first() const8801 xpath_node first() const 8802 { 8803 return xpath_first(_begin, _end, _type); 8804 } 8805 8806 void push_back_grow(const xpath_node& node, xpath_allocator* alloc); 8807 push_back(const xpath_node & node,xpath_allocator * alloc)8808 void push_back(const xpath_node& node, xpath_allocator* alloc) 8809 { 8810 if (_end != _eos) 8811 *_end++ = node; 8812 else 8813 push_back_grow(node, alloc); 8814 } 8815 append(const xpath_node * begin_,const xpath_node * end_,xpath_allocator * alloc)8816 void append(const xpath_node* begin_, const xpath_node* end_, xpath_allocator* alloc) 8817 { 8818 if (begin_ == end_) return; 8819 8820 size_t size_ = static_cast<size_t>(_end - _begin); 8821 size_t capacity = static_cast<size_t>(_eos - _begin); 8822 size_t count = static_cast<size_t>(end_ - begin_); 8823 8824 if (size_ + count > capacity) 8825 { 8826 // reallocate the old array or allocate a new one 8827 xpath_node* data = static_cast<xpath_node*>(alloc->reallocate(_begin, capacity * sizeof(xpath_node), (size_ + count) * sizeof(xpath_node))); 8828 if (!data) return; 8829 8830 // finalize 8831 _begin = data; 8832 _end = data + size_; 8833 _eos = data + size_ + count; 8834 } 8835 8836 memcpy(_end, begin_, count * sizeof(xpath_node)); 8837 _end += count; 8838 } 8839 sort_do()8840 void sort_do() 8841 { 8842 _type = xpath_sort(_begin, _end, _type, false); 8843 } 8844 truncate(xpath_node * pos)8845 void truncate(xpath_node* pos) 8846 { 8847 assert(_begin <= pos && pos <= _end); 8848 8849 _end = pos; 8850 } 8851 remove_duplicates()8852 void remove_duplicates() 8853 { 8854 if (_type == xpath_node_set::type_unsorted) 8855 sort(_begin, _end, duplicate_comparator()); 8856 8857 _end = unique(_begin, _end); 8858 } 8859 type() const8860 xpath_node_set::type_t type() const 8861 { 8862 return _type; 8863 } 8864 set_type(xpath_node_set::type_t value)8865 void set_type(xpath_node_set::type_t value) 8866 { 8867 _type = value; 8868 } 8869 }; 8870 push_back_grow(const xpath_node & node,xpath_allocator * alloc)8871 PUGI__FN_NO_INLINE void xpath_node_set_raw::push_back_grow(const xpath_node& node, xpath_allocator* alloc) 8872 { 8873 size_t capacity = static_cast<size_t>(_eos - _begin); 8874 8875 // get new capacity (1.5x rule) 8876 size_t new_capacity = capacity + capacity / 2 + 1; 8877 8878 // reallocate the old array or allocate a new one 8879 xpath_node* data = static_cast<xpath_node*>(alloc->reallocate(_begin, capacity * sizeof(xpath_node), new_capacity * sizeof(xpath_node))); 8880 if (!data) return; 8881 8882 // finalize 8883 _begin = data; 8884 _end = data + capacity; 8885 _eos = data + new_capacity; 8886 8887 // push 8888 *_end++ = node; 8889 } 8890 PUGI__NS_END 8891 8892 PUGI__NS_BEGIN 8893 struct xpath_context 8894 { 8895 xpath_node n; 8896 size_t position, size; 8897 xpath_contextxpath_context8898 xpath_context(const xpath_node& n_, size_t position_, size_t size_): n(n_), position(position_), size(size_) 8899 { 8900 } 8901 }; 8902 8903 enum lexeme_t 8904 { 8905 lex_none = 0, 8906 lex_equal, 8907 lex_not_equal, 8908 lex_less, 8909 lex_greater, 8910 lex_less_or_equal, 8911 lex_greater_or_equal, 8912 lex_plus, 8913 lex_minus, 8914 lex_multiply, 8915 lex_union, 8916 lex_var_ref, 8917 lex_open_brace, 8918 lex_close_brace, 8919 lex_quoted_string, 8920 lex_number, 8921 lex_slash, 8922 lex_double_slash, 8923 lex_open_square_brace, 8924 lex_close_square_brace, 8925 lex_string, 8926 lex_comma, 8927 lex_axis_attribute, 8928 lex_dot, 8929 lex_double_dot, 8930 lex_double_colon, 8931 lex_eof 8932 }; 8933 8934 struct xpath_lexer_string 8935 { 8936 const char_t* begin; 8937 const char_t* end; 8938 xpath_lexer_stringxpath_lexer_string8939 xpath_lexer_string(): begin(0), end(0) 8940 { 8941 } 8942 operator ==xpath_lexer_string8943 bool operator==(const char_t* other) const 8944 { 8945 size_t length = static_cast<size_t>(end - begin); 8946 8947 return strequalrange(other, begin, length); 8948 } 8949 }; 8950 8951 class xpath_lexer 8952 { 8953 const char_t* _cur; 8954 const char_t* _cur_lexeme_pos; 8955 xpath_lexer_string _cur_lexeme_contents; 8956 8957 lexeme_t _cur_lexeme; 8958 8959 public: xpath_lexer(const char_t * query)8960 explicit xpath_lexer(const char_t* query): _cur(query) 8961 { 8962 next(); 8963 } 8964 state() const8965 const char_t* state() const 8966 { 8967 return _cur; 8968 } 8969 next()8970 void next() 8971 { 8972 const char_t* cur = _cur; 8973 8974 while (PUGI__IS_CHARTYPE(*cur, ct_space)) ++cur; 8975 8976 // save lexeme position for error reporting 8977 _cur_lexeme_pos = cur; 8978 8979 switch (*cur) 8980 { 8981 case 0: 8982 _cur_lexeme = lex_eof; 8983 break; 8984 8985 case '>': 8986 if (*(cur+1) == '=') 8987 { 8988 cur += 2; 8989 _cur_lexeme = lex_greater_or_equal; 8990 } 8991 else 8992 { 8993 cur += 1; 8994 _cur_lexeme = lex_greater; 8995 } 8996 break; 8997 8998 case '<': 8999 if (*(cur+1) == '=') 9000 { 9001 cur += 2; 9002 _cur_lexeme = lex_less_or_equal; 9003 } 9004 else 9005 { 9006 cur += 1; 9007 _cur_lexeme = lex_less; 9008 } 9009 break; 9010 9011 case '!': 9012 if (*(cur+1) == '=') 9013 { 9014 cur += 2; 9015 _cur_lexeme = lex_not_equal; 9016 } 9017 else 9018 { 9019 _cur_lexeme = lex_none; 9020 } 9021 break; 9022 9023 case '=': 9024 cur += 1; 9025 _cur_lexeme = lex_equal; 9026 9027 break; 9028 9029 case '+': 9030 cur += 1; 9031 _cur_lexeme = lex_plus; 9032 9033 break; 9034 9035 case '-': 9036 cur += 1; 9037 _cur_lexeme = lex_minus; 9038 9039 break; 9040 9041 case '*': 9042 cur += 1; 9043 _cur_lexeme = lex_multiply; 9044 9045 break; 9046 9047 case '|': 9048 cur += 1; 9049 _cur_lexeme = lex_union; 9050 9051 break; 9052 9053 case '$': 9054 cur += 1; 9055 9056 if (PUGI__IS_CHARTYPEX(*cur, ctx_start_symbol)) 9057 { 9058 _cur_lexeme_contents.begin = cur; 9059 9060 while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++; 9061 9062 if (cur[0] == ':' && PUGI__IS_CHARTYPEX(cur[1], ctx_symbol)) // qname 9063 { 9064 cur++; // : 9065 9066 while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++; 9067 } 9068 9069 _cur_lexeme_contents.end = cur; 9070 9071 _cur_lexeme = lex_var_ref; 9072 } 9073 else 9074 { 9075 _cur_lexeme = lex_none; 9076 } 9077 9078 break; 9079 9080 case '(': 9081 cur += 1; 9082 _cur_lexeme = lex_open_brace; 9083 9084 break; 9085 9086 case ')': 9087 cur += 1; 9088 _cur_lexeme = lex_close_brace; 9089 9090 break; 9091 9092 case '[': 9093 cur += 1; 9094 _cur_lexeme = lex_open_square_brace; 9095 9096 break; 9097 9098 case ']': 9099 cur += 1; 9100 _cur_lexeme = lex_close_square_brace; 9101 9102 break; 9103 9104 case ',': 9105 cur += 1; 9106 _cur_lexeme = lex_comma; 9107 9108 break; 9109 9110 case '/': 9111 if (*(cur+1) == '/') 9112 { 9113 cur += 2; 9114 _cur_lexeme = lex_double_slash; 9115 } 9116 else 9117 { 9118 cur += 1; 9119 _cur_lexeme = lex_slash; 9120 } 9121 break; 9122 9123 case '.': 9124 if (*(cur+1) == '.') 9125 { 9126 cur += 2; 9127 _cur_lexeme = lex_double_dot; 9128 } 9129 else if (PUGI__IS_CHARTYPEX(*(cur+1), ctx_digit)) 9130 { 9131 _cur_lexeme_contents.begin = cur; // . 9132 9133 ++cur; 9134 9135 while (PUGI__IS_CHARTYPEX(*cur, ctx_digit)) cur++; 9136 9137 _cur_lexeme_contents.end = cur; 9138 9139 _cur_lexeme = lex_number; 9140 } 9141 else 9142 { 9143 cur += 1; 9144 _cur_lexeme = lex_dot; 9145 } 9146 break; 9147 9148 case '@': 9149 cur += 1; 9150 _cur_lexeme = lex_axis_attribute; 9151 9152 break; 9153 9154 case '"': 9155 case '\'': 9156 { 9157 char_t terminator = *cur; 9158 9159 ++cur; 9160 9161 _cur_lexeme_contents.begin = cur; 9162 while (*cur && *cur != terminator) cur++; 9163 _cur_lexeme_contents.end = cur; 9164 9165 if (!*cur) 9166 _cur_lexeme = lex_none; 9167 else 9168 { 9169 cur += 1; 9170 _cur_lexeme = lex_quoted_string; 9171 } 9172 9173 break; 9174 } 9175 9176 case ':': 9177 if (*(cur+1) == ':') 9178 { 9179 cur += 2; 9180 _cur_lexeme = lex_double_colon; 9181 } 9182 else 9183 { 9184 _cur_lexeme = lex_none; 9185 } 9186 break; 9187 9188 default: 9189 if (PUGI__IS_CHARTYPEX(*cur, ctx_digit)) 9190 { 9191 _cur_lexeme_contents.begin = cur; 9192 9193 while (PUGI__IS_CHARTYPEX(*cur, ctx_digit)) cur++; 9194 9195 if (*cur == '.') 9196 { 9197 cur++; 9198 9199 while (PUGI__IS_CHARTYPEX(*cur, ctx_digit)) cur++; 9200 } 9201 9202 _cur_lexeme_contents.end = cur; 9203 9204 _cur_lexeme = lex_number; 9205 } 9206 else if (PUGI__IS_CHARTYPEX(*cur, ctx_start_symbol)) 9207 { 9208 _cur_lexeme_contents.begin = cur; 9209 9210 while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++; 9211 9212 if (cur[0] == ':') 9213 { 9214 if (cur[1] == '*') // namespace test ncname:* 9215 { 9216 cur += 2; // :* 9217 } 9218 else if (PUGI__IS_CHARTYPEX(cur[1], ctx_symbol)) // namespace test qname 9219 { 9220 cur++; // : 9221 9222 while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++; 9223 } 9224 } 9225 9226 _cur_lexeme_contents.end = cur; 9227 9228 _cur_lexeme = lex_string; 9229 } 9230 else 9231 { 9232 _cur_lexeme = lex_none; 9233 } 9234 } 9235 9236 _cur = cur; 9237 } 9238 current() const9239 lexeme_t current() const 9240 { 9241 return _cur_lexeme; 9242 } 9243 current_pos() const9244 const char_t* current_pos() const 9245 { 9246 return _cur_lexeme_pos; 9247 } 9248 contents() const9249 const xpath_lexer_string& contents() const 9250 { 9251 assert(_cur_lexeme == lex_var_ref || _cur_lexeme == lex_number || _cur_lexeme == lex_string || _cur_lexeme == lex_quoted_string); 9252 9253 return _cur_lexeme_contents; 9254 } 9255 }; 9256 9257 enum ast_type_t 9258 { 9259 ast_unknown, 9260 ast_op_or, // left or right 9261 ast_op_and, // left and right 9262 ast_op_equal, // left = right 9263 ast_op_not_equal, // left != right 9264 ast_op_less, // left < right 9265 ast_op_greater, // left > right 9266 ast_op_less_or_equal, // left <= right 9267 ast_op_greater_or_equal, // left >= right 9268 ast_op_add, // left + right 9269 ast_op_subtract, // left - right 9270 ast_op_multiply, // left * right 9271 ast_op_divide, // left / right 9272 ast_op_mod, // left % right 9273 ast_op_negate, // left - right 9274 ast_op_union, // left | right 9275 ast_predicate, // apply predicate to set; next points to next predicate 9276 ast_filter, // select * from left where right 9277 ast_string_constant, // string constant 9278 ast_number_constant, // number constant 9279 ast_variable, // variable 9280 ast_func_last, // last() 9281 ast_func_position, // position() 9282 ast_func_count, // count(left) 9283 ast_func_id, // id(left) 9284 ast_func_local_name_0, // local-name() 9285 ast_func_local_name_1, // local-name(left) 9286 ast_func_namespace_uri_0, // namespace-uri() 9287 ast_func_namespace_uri_1, // namespace-uri(left) 9288 ast_func_name_0, // name() 9289 ast_func_name_1, // name(left) 9290 ast_func_string_0, // string() 9291 ast_func_string_1, // string(left) 9292 ast_func_concat, // concat(left, right, siblings) 9293 ast_func_starts_with, // starts_with(left, right) 9294 ast_func_contains, // contains(left, right) 9295 ast_func_substring_before, // substring-before(left, right) 9296 ast_func_substring_after, // substring-after(left, right) 9297 ast_func_substring_2, // substring(left, right) 9298 ast_func_substring_3, // substring(left, right, third) 9299 ast_func_string_length_0, // string-length() 9300 ast_func_string_length_1, // string-length(left) 9301 ast_func_normalize_space_0, // normalize-space() 9302 ast_func_normalize_space_1, // normalize-space(left) 9303 ast_func_translate, // translate(left, right, third) 9304 ast_func_boolean, // boolean(left) 9305 ast_func_not, // not(left) 9306 ast_func_true, // true() 9307 ast_func_false, // false() 9308 ast_func_lang, // lang(left) 9309 ast_func_number_0, // number() 9310 ast_func_number_1, // number(left) 9311 ast_func_sum, // sum(left) 9312 ast_func_floor, // floor(left) 9313 ast_func_ceiling, // ceiling(left) 9314 ast_func_round, // round(left) 9315 ast_step, // process set left with step 9316 ast_step_root, // select root node 9317 9318 ast_opt_translate_table, // translate(left, right, third) where right/third are constants 9319 ast_opt_compare_attribute // @name = 'string' 9320 }; 9321 9322 enum axis_t 9323 { 9324 axis_ancestor, 9325 axis_ancestor_or_self, 9326 axis_attribute, 9327 axis_child, 9328 axis_descendant, 9329 axis_descendant_or_self, 9330 axis_following, 9331 axis_following_sibling, 9332 axis_namespace, 9333 axis_parent, 9334 axis_preceding, 9335 axis_preceding_sibling, 9336 axis_self 9337 }; 9338 9339 enum nodetest_t 9340 { 9341 nodetest_none, 9342 nodetest_name, 9343 nodetest_type_node, 9344 nodetest_type_comment, 9345 nodetest_type_pi, 9346 nodetest_type_text, 9347 nodetest_pi, 9348 nodetest_all, 9349 nodetest_all_in_namespace 9350 }; 9351 9352 enum predicate_t 9353 { 9354 predicate_default, 9355 predicate_posinv, 9356 predicate_constant, 9357 predicate_constant_one 9358 }; 9359 9360 enum nodeset_eval_t 9361 { 9362 nodeset_eval_all, 9363 nodeset_eval_any, 9364 nodeset_eval_first 9365 }; 9366 9367 template <axis_t N> struct axis_to_type 9368 { 9369 static const axis_t axis; 9370 }; 9371 9372 template <axis_t N> const axis_t axis_to_type<N>::axis = N; 9373 9374 class xpath_ast_node 9375 { 9376 private: 9377 // node type 9378 char _type; 9379 char _rettype; 9380 9381 // for ast_step 9382 char _axis; 9383 9384 // for ast_step/ast_predicate/ast_filter 9385 char _test; 9386 9387 // tree node structure 9388 xpath_ast_node* _left; 9389 xpath_ast_node* _right; 9390 xpath_ast_node* _next; 9391 9392 union 9393 { 9394 // value for ast_string_constant 9395 const char_t* string; 9396 // value for ast_number_constant 9397 double number; 9398 // variable for ast_variable 9399 xpath_variable* variable; 9400 // node test for ast_step (node name/namespace/node type/pi target) 9401 const char_t* nodetest; 9402 // table for ast_opt_translate_table 9403 const unsigned char* table; 9404 } _data; 9405 9406 xpath_ast_node(const xpath_ast_node&); 9407 xpath_ast_node& operator=(const xpath_ast_node&); 9408 compare_eq(xpath_ast_node * lhs,xpath_ast_node * rhs,const xpath_context & c,const xpath_stack & stack,const Comp & comp)9409 template <class Comp> static bool compare_eq(xpath_ast_node* lhs, xpath_ast_node* rhs, const xpath_context& c, const xpath_stack& stack, const Comp& comp) 9410 { 9411 xpath_value_type lt = lhs->rettype(), rt = rhs->rettype(); 9412 9413 if (lt != xpath_type_node_set && rt != xpath_type_node_set) 9414 { 9415 if (lt == xpath_type_boolean || rt == xpath_type_boolean) 9416 return comp(lhs->eval_boolean(c, stack), rhs->eval_boolean(c, stack)); 9417 else if (lt == xpath_type_number || rt == xpath_type_number) 9418 return comp(lhs->eval_number(c, stack), rhs->eval_number(c, stack)); 9419 else if (lt == xpath_type_string || rt == xpath_type_string) 9420 { 9421 xpath_allocator_capture cr(stack.result); 9422 9423 xpath_string ls = lhs->eval_string(c, stack); 9424 xpath_string rs = rhs->eval_string(c, stack); 9425 9426 return comp(ls, rs); 9427 } 9428 } 9429 else if (lt == xpath_type_node_set && rt == xpath_type_node_set) 9430 { 9431 xpath_allocator_capture cr(stack.result); 9432 9433 xpath_node_set_raw ls = lhs->eval_node_set(c, stack, nodeset_eval_all); 9434 xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all); 9435 9436 for (const xpath_node* li = ls.begin(); li != ls.end(); ++li) 9437 for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri) 9438 { 9439 xpath_allocator_capture cri(stack.result); 9440 9441 if (comp(string_value(*li, stack.result), string_value(*ri, stack.result))) 9442 return true; 9443 } 9444 9445 return false; 9446 } 9447 else 9448 { 9449 if (lt == xpath_type_node_set) 9450 { 9451 swap(lhs, rhs); 9452 swap(lt, rt); 9453 } 9454 9455 if (lt == xpath_type_boolean) 9456 return comp(lhs->eval_boolean(c, stack), rhs->eval_boolean(c, stack)); 9457 else if (lt == xpath_type_number) 9458 { 9459 xpath_allocator_capture cr(stack.result); 9460 9461 double l = lhs->eval_number(c, stack); 9462 xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all); 9463 9464 for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri) 9465 { 9466 xpath_allocator_capture cri(stack.result); 9467 9468 if (comp(l, convert_string_to_number(string_value(*ri, stack.result).c_str()))) 9469 return true; 9470 } 9471 9472 return false; 9473 } 9474 else if (lt == xpath_type_string) 9475 { 9476 xpath_allocator_capture cr(stack.result); 9477 9478 xpath_string l = lhs->eval_string(c, stack); 9479 xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all); 9480 9481 for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri) 9482 { 9483 xpath_allocator_capture cri(stack.result); 9484 9485 if (comp(l, string_value(*ri, stack.result))) 9486 return true; 9487 } 9488 9489 return false; 9490 } 9491 } 9492 9493 assert(false && "Wrong types"); // unreachable 9494 return false; 9495 } 9496 eval_once(xpath_node_set::type_t type,nodeset_eval_t eval)9497 static bool eval_once(xpath_node_set::type_t type, nodeset_eval_t eval) 9498 { 9499 return type == xpath_node_set::type_sorted ? eval != nodeset_eval_all : eval == nodeset_eval_any; 9500 } 9501 compare_rel(xpath_ast_node * lhs,xpath_ast_node * rhs,const xpath_context & c,const xpath_stack & stack,const Comp & comp)9502 template <class Comp> static bool compare_rel(xpath_ast_node* lhs, xpath_ast_node* rhs, const xpath_context& c, const xpath_stack& stack, const Comp& comp) 9503 { 9504 xpath_value_type lt = lhs->rettype(), rt = rhs->rettype(); 9505 9506 if (lt != xpath_type_node_set && rt != xpath_type_node_set) 9507 return comp(lhs->eval_number(c, stack), rhs->eval_number(c, stack)); 9508 else if (lt == xpath_type_node_set && rt == xpath_type_node_set) 9509 { 9510 xpath_allocator_capture cr(stack.result); 9511 9512 xpath_node_set_raw ls = lhs->eval_node_set(c, stack, nodeset_eval_all); 9513 xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all); 9514 9515 for (const xpath_node* li = ls.begin(); li != ls.end(); ++li) 9516 { 9517 xpath_allocator_capture cri(stack.result); 9518 9519 double l = convert_string_to_number(string_value(*li, stack.result).c_str()); 9520 9521 for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri) 9522 { 9523 xpath_allocator_capture crii(stack.result); 9524 9525 if (comp(l, convert_string_to_number(string_value(*ri, stack.result).c_str()))) 9526 return true; 9527 } 9528 } 9529 9530 return false; 9531 } 9532 else if (lt != xpath_type_node_set && rt == xpath_type_node_set) 9533 { 9534 xpath_allocator_capture cr(stack.result); 9535 9536 double l = lhs->eval_number(c, stack); 9537 xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all); 9538 9539 for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri) 9540 { 9541 xpath_allocator_capture cri(stack.result); 9542 9543 if (comp(l, convert_string_to_number(string_value(*ri, stack.result).c_str()))) 9544 return true; 9545 } 9546 9547 return false; 9548 } 9549 else if (lt == xpath_type_node_set && rt != xpath_type_node_set) 9550 { 9551 xpath_allocator_capture cr(stack.result); 9552 9553 xpath_node_set_raw ls = lhs->eval_node_set(c, stack, nodeset_eval_all); 9554 double r = rhs->eval_number(c, stack); 9555 9556 for (const xpath_node* li = ls.begin(); li != ls.end(); ++li) 9557 { 9558 xpath_allocator_capture cri(stack.result); 9559 9560 if (comp(convert_string_to_number(string_value(*li, stack.result).c_str()), r)) 9561 return true; 9562 } 9563 9564 return false; 9565 } 9566 else 9567 { 9568 assert(false && "Wrong types"); // unreachable 9569 return false; 9570 } 9571 } 9572 apply_predicate_boolean(xpath_node_set_raw & ns,size_t first,xpath_ast_node * expr,const xpath_stack & stack,bool once)9573 static void apply_predicate_boolean(xpath_node_set_raw& ns, size_t first, xpath_ast_node* expr, const xpath_stack& stack, bool once) 9574 { 9575 assert(ns.size() >= first); 9576 assert(expr->rettype() != xpath_type_number); 9577 9578 size_t i = 1; 9579 size_t size = ns.size() - first; 9580 9581 xpath_node* last = ns.begin() + first; 9582 9583 // remove_if... or well, sort of 9584 for (xpath_node* it = last; it != ns.end(); ++it, ++i) 9585 { 9586 xpath_context c(*it, i, size); 9587 9588 if (expr->eval_boolean(c, stack)) 9589 { 9590 *last++ = *it; 9591 9592 if (once) break; 9593 } 9594 } 9595 9596 ns.truncate(last); 9597 } 9598 apply_predicate_number(xpath_node_set_raw & ns,size_t first,xpath_ast_node * expr,const xpath_stack & stack,bool once)9599 static void apply_predicate_number(xpath_node_set_raw& ns, size_t first, xpath_ast_node* expr, const xpath_stack& stack, bool once) 9600 { 9601 assert(ns.size() >= first); 9602 assert(expr->rettype() == xpath_type_number); 9603 9604 size_t i = 1; 9605 size_t size = ns.size() - first; 9606 9607 xpath_node* last = ns.begin() + first; 9608 9609 // remove_if... or well, sort of 9610 for (xpath_node* it = last; it != ns.end(); ++it, ++i) 9611 { 9612 xpath_context c(*it, i, size); 9613 9614 if (expr->eval_number(c, stack) == i) 9615 { 9616 *last++ = *it; 9617 9618 if (once) break; 9619 } 9620 } 9621 9622 ns.truncate(last); 9623 } 9624 apply_predicate_number_const(xpath_node_set_raw & ns,size_t first,xpath_ast_node * expr,const xpath_stack & stack)9625 static void apply_predicate_number_const(xpath_node_set_raw& ns, size_t first, xpath_ast_node* expr, const xpath_stack& stack) 9626 { 9627 assert(ns.size() >= first); 9628 assert(expr->rettype() == xpath_type_number); 9629 9630 size_t size = ns.size() - first; 9631 9632 xpath_node* last = ns.begin() + first; 9633 9634 xpath_context c(xpath_node(), 1, size); 9635 9636 double er = expr->eval_number(c, stack); 9637 9638 if (er >= 1.0 && er <= size) 9639 { 9640 size_t eri = static_cast<size_t>(er); 9641 9642 if (er == eri) 9643 { 9644 xpath_node r = last[eri - 1]; 9645 9646 *last++ = r; 9647 } 9648 } 9649 9650 ns.truncate(last); 9651 } 9652 apply_predicate(xpath_node_set_raw & ns,size_t first,const xpath_stack & stack,bool once)9653 void apply_predicate(xpath_node_set_raw& ns, size_t first, const xpath_stack& stack, bool once) 9654 { 9655 if (ns.size() == first) return; 9656 9657 assert(_type == ast_filter || _type == ast_predicate); 9658 9659 if (_test == predicate_constant || _test == predicate_constant_one) 9660 apply_predicate_number_const(ns, first, _right, stack); 9661 else if (_right->rettype() == xpath_type_number) 9662 apply_predicate_number(ns, first, _right, stack, once); 9663 else 9664 apply_predicate_boolean(ns, first, _right, stack, once); 9665 } 9666 apply_predicates(xpath_node_set_raw & ns,size_t first,const xpath_stack & stack,nodeset_eval_t eval)9667 void apply_predicates(xpath_node_set_raw& ns, size_t first, const xpath_stack& stack, nodeset_eval_t eval) 9668 { 9669 if (ns.size() == first) return; 9670 9671 bool last_once = eval_once(ns.type(), eval); 9672 9673 for (xpath_ast_node* pred = _right; pred; pred = pred->_next) 9674 pred->apply_predicate(ns, first, stack, !pred->_next && last_once); 9675 } 9676 step_push(xpath_node_set_raw & ns,xml_attribute_struct * a,xml_node_struct * parent,xpath_allocator * alloc)9677 bool step_push(xpath_node_set_raw& ns, xml_attribute_struct* a, xml_node_struct* parent, xpath_allocator* alloc) 9678 { 9679 assert(a); 9680 9681 const char_t* name = a->name ? a->name + 0 : PUGIXML_TEXT(""); 9682 9683 switch (_test) 9684 { 9685 case nodetest_name: 9686 if (strequal(name, _data.nodetest) && is_xpath_attribute(name)) 9687 { 9688 ns.push_back(xpath_node(xml_attribute(a), xml_node(parent)), alloc); 9689 return true; 9690 } 9691 break; 9692 9693 case nodetest_type_node: 9694 case nodetest_all: 9695 if (is_xpath_attribute(name)) 9696 { 9697 ns.push_back(xpath_node(xml_attribute(a), xml_node(parent)), alloc); 9698 return true; 9699 } 9700 break; 9701 9702 case nodetest_all_in_namespace: 9703 if (starts_with(name, _data.nodetest) && is_xpath_attribute(name)) 9704 { 9705 ns.push_back(xpath_node(xml_attribute(a), xml_node(parent)), alloc); 9706 return true; 9707 } 9708 break; 9709 9710 default: 9711 ; 9712 } 9713 9714 return false; 9715 } 9716 step_push(xpath_node_set_raw & ns,xml_node_struct * n,xpath_allocator * alloc)9717 bool step_push(xpath_node_set_raw& ns, xml_node_struct* n, xpath_allocator* alloc) 9718 { 9719 assert(n); 9720 9721 xml_node_type type = PUGI__NODETYPE(n); 9722 9723 switch (_test) 9724 { 9725 case nodetest_name: 9726 if (type == node_element && n->name && strequal(n->name, _data.nodetest)) 9727 { 9728 ns.push_back(xml_node(n), alloc); 9729 return true; 9730 } 9731 break; 9732 9733 case nodetest_type_node: 9734 ns.push_back(xml_node(n), alloc); 9735 return true; 9736 9737 case nodetest_type_comment: 9738 if (type == node_comment) 9739 { 9740 ns.push_back(xml_node(n), alloc); 9741 return true; 9742 } 9743 break; 9744 9745 case nodetest_type_text: 9746 if (type == node_pcdata || type == node_cdata) 9747 { 9748 ns.push_back(xml_node(n), alloc); 9749 return true; 9750 } 9751 break; 9752 9753 case nodetest_type_pi: 9754 if (type == node_pi) 9755 { 9756 ns.push_back(xml_node(n), alloc); 9757 return true; 9758 } 9759 break; 9760 9761 case nodetest_pi: 9762 if (type == node_pi && n->name && strequal(n->name, _data.nodetest)) 9763 { 9764 ns.push_back(xml_node(n), alloc); 9765 return true; 9766 } 9767 break; 9768 9769 case nodetest_all: 9770 if (type == node_element) 9771 { 9772 ns.push_back(xml_node(n), alloc); 9773 return true; 9774 } 9775 break; 9776 9777 case nodetest_all_in_namespace: 9778 if (type == node_element && n->name && starts_with(n->name, _data.nodetest)) 9779 { 9780 ns.push_back(xml_node(n), alloc); 9781 return true; 9782 } 9783 break; 9784 9785 default: 9786 assert(false && "Unknown axis"); // unreachable 9787 } 9788 9789 return false; 9790 } 9791 step_fill(xpath_node_set_raw & ns,xml_node_struct * n,xpath_allocator * alloc,bool once,T)9792 template <class T> void step_fill(xpath_node_set_raw& ns, xml_node_struct* n, xpath_allocator* alloc, bool once, T) 9793 { 9794 const axis_t axis = T::axis; 9795 9796 switch (axis) 9797 { 9798 case axis_attribute: 9799 { 9800 for (xml_attribute_struct* a = n->first_attribute; a; a = a->next_attribute) 9801 if (step_push(ns, a, n, alloc) & once) 9802 return; 9803 9804 break; 9805 } 9806 9807 case axis_child: 9808 { 9809 for (xml_node_struct* c = n->first_child; c; c = c->next_sibling) 9810 if (step_push(ns, c, alloc) & once) 9811 return; 9812 9813 break; 9814 } 9815 9816 case axis_descendant: 9817 case axis_descendant_or_self: 9818 { 9819 if (axis == axis_descendant_or_self) 9820 if (step_push(ns, n, alloc) & once) 9821 return; 9822 9823 xml_node_struct* cur = n->first_child; 9824 9825 while (cur) 9826 { 9827 if (step_push(ns, cur, alloc) & once) 9828 return; 9829 9830 if (cur->first_child) 9831 cur = cur->first_child; 9832 else 9833 { 9834 while (!cur->next_sibling) 9835 { 9836 cur = cur->parent; 9837 9838 if (cur == n) return; 9839 } 9840 9841 cur = cur->next_sibling; 9842 } 9843 } 9844 9845 break; 9846 } 9847 9848 case axis_following_sibling: 9849 { 9850 for (xml_node_struct* c = n->next_sibling; c; c = c->next_sibling) 9851 if (step_push(ns, c, alloc) & once) 9852 return; 9853 9854 break; 9855 } 9856 9857 case axis_preceding_sibling: 9858 { 9859 for (xml_node_struct* c = n->prev_sibling_c; c->next_sibling; c = c->prev_sibling_c) 9860 if (step_push(ns, c, alloc) & once) 9861 return; 9862 9863 break; 9864 } 9865 9866 case axis_following: 9867 { 9868 xml_node_struct* cur = n; 9869 9870 // exit from this node so that we don't include descendants 9871 while (!cur->next_sibling) 9872 { 9873 cur = cur->parent; 9874 9875 if (!cur) return; 9876 } 9877 9878 cur = cur->next_sibling; 9879 9880 while (cur) 9881 { 9882 if (step_push(ns, cur, alloc) & once) 9883 return; 9884 9885 if (cur->first_child) 9886 cur = cur->first_child; 9887 else 9888 { 9889 while (!cur->next_sibling) 9890 { 9891 cur = cur->parent; 9892 9893 if (!cur) return; 9894 } 9895 9896 cur = cur->next_sibling; 9897 } 9898 } 9899 9900 break; 9901 } 9902 9903 case axis_preceding: 9904 { 9905 xml_node_struct* cur = n; 9906 9907 // exit from this node so that we don't include descendants 9908 while (!cur->prev_sibling_c->next_sibling) 9909 { 9910 cur = cur->parent; 9911 9912 if (!cur) return; 9913 } 9914 9915 cur = cur->prev_sibling_c; 9916 9917 while (cur) 9918 { 9919 if (cur->first_child) 9920 cur = cur->first_child->prev_sibling_c; 9921 else 9922 { 9923 // leaf node, can't be ancestor 9924 if (step_push(ns, cur, alloc) & once) 9925 return; 9926 9927 while (!cur->prev_sibling_c->next_sibling) 9928 { 9929 cur = cur->parent; 9930 9931 if (!cur) return; 9932 9933 if (!node_is_ancestor(cur, n)) 9934 if (step_push(ns, cur, alloc) & once) 9935 return; 9936 } 9937 9938 cur = cur->prev_sibling_c; 9939 } 9940 } 9941 9942 break; 9943 } 9944 9945 case axis_ancestor: 9946 case axis_ancestor_or_self: 9947 { 9948 if (axis == axis_ancestor_or_self) 9949 if (step_push(ns, n, alloc) & once) 9950 return; 9951 9952 xml_node_struct* cur = n->parent; 9953 9954 while (cur) 9955 { 9956 if (step_push(ns, cur, alloc) & once) 9957 return; 9958 9959 cur = cur->parent; 9960 } 9961 9962 break; 9963 } 9964 9965 case axis_self: 9966 { 9967 step_push(ns, n, alloc); 9968 9969 break; 9970 } 9971 9972 case axis_parent: 9973 { 9974 if (n->parent) 9975 step_push(ns, n->parent, alloc); 9976 9977 break; 9978 } 9979 9980 default: 9981 assert(false && "Unimplemented axis"); // unreachable 9982 } 9983 } 9984 step_fill(xpath_node_set_raw & ns,xml_attribute_struct * a,xml_node_struct * p,xpath_allocator * alloc,bool once,T v)9985 template <class T> void step_fill(xpath_node_set_raw& ns, xml_attribute_struct* a, xml_node_struct* p, xpath_allocator* alloc, bool once, T v) 9986 { 9987 const axis_t axis = T::axis; 9988 9989 switch (axis) 9990 { 9991 case axis_ancestor: 9992 case axis_ancestor_or_self: 9993 { 9994 if (axis == axis_ancestor_or_self && _test == nodetest_type_node) // reject attributes based on principal node type test 9995 if (step_push(ns, a, p, alloc) & once) 9996 return; 9997 9998 xml_node_struct* cur = p; 9999 10000 while (cur) 10001 { 10002 if (step_push(ns, cur, alloc) & once) 10003 return; 10004 10005 cur = cur->parent; 10006 } 10007 10008 break; 10009 } 10010 10011 case axis_descendant_or_self: 10012 case axis_self: 10013 { 10014 if (_test == nodetest_type_node) // reject attributes based on principal node type test 10015 step_push(ns, a, p, alloc); 10016 10017 break; 10018 } 10019 10020 case axis_following: 10021 { 10022 xml_node_struct* cur = p; 10023 10024 while (cur) 10025 { 10026 if (cur->first_child) 10027 cur = cur->first_child; 10028 else 10029 { 10030 while (!cur->next_sibling) 10031 { 10032 cur = cur->parent; 10033 10034 if (!cur) return; 10035 } 10036 10037 cur = cur->next_sibling; 10038 } 10039 10040 if (step_push(ns, cur, alloc) & once) 10041 return; 10042 } 10043 10044 break; 10045 } 10046 10047 case axis_parent: 10048 { 10049 step_push(ns, p, alloc); 10050 10051 break; 10052 } 10053 10054 case axis_preceding: 10055 { 10056 // preceding:: axis does not include attribute nodes and attribute ancestors (they are the same as parent's ancestors), so we can reuse node preceding 10057 step_fill(ns, p, alloc, once, v); 10058 break; 10059 } 10060 10061 default: 10062 assert(false && "Unimplemented axis"); // unreachable 10063 } 10064 } 10065 step_fill(xpath_node_set_raw & ns,const xpath_node & xn,xpath_allocator * alloc,bool once,T v)10066 template <class T> void step_fill(xpath_node_set_raw& ns, const xpath_node& xn, xpath_allocator* alloc, bool once, T v) 10067 { 10068 const axis_t axis = T::axis; 10069 const bool axis_has_attributes = (axis == axis_ancestor || axis == axis_ancestor_or_self || axis == axis_descendant_or_self || axis == axis_following || axis == axis_parent || axis == axis_preceding || axis == axis_self); 10070 10071 if (xn.node()) 10072 step_fill(ns, xn.node().internal_object(), alloc, once, v); 10073 else if (axis_has_attributes && xn.attribute() && xn.parent()) 10074 step_fill(ns, xn.attribute().internal_object(), xn.parent().internal_object(), alloc, once, v); 10075 } 10076 step_do(const xpath_context & c,const xpath_stack & stack,nodeset_eval_t eval,T v)10077 template <class T> xpath_node_set_raw step_do(const xpath_context& c, const xpath_stack& stack, nodeset_eval_t eval, T v) 10078 { 10079 const axis_t axis = T::axis; 10080 const bool axis_reverse = (axis == axis_ancestor || axis == axis_ancestor_or_self || axis == axis_preceding || axis == axis_preceding_sibling); 10081 const xpath_node_set::type_t axis_type = axis_reverse ? xpath_node_set::type_sorted_reverse : xpath_node_set::type_sorted; 10082 10083 bool once = 10084 (axis == axis_attribute && _test == nodetest_name) || 10085 (!_right && eval_once(axis_type, eval)) || 10086 (_right && !_right->_next && _right->_test == predicate_constant_one); 10087 10088 xpath_node_set_raw ns; 10089 ns.set_type(axis_type); 10090 10091 if (_left) 10092 { 10093 xpath_node_set_raw s = _left->eval_node_set(c, stack, nodeset_eval_all); 10094 10095 // self axis preserves the original order 10096 if (axis == axis_self) ns.set_type(s.type()); 10097 10098 for (const xpath_node* it = s.begin(); it != s.end(); ++it) 10099 { 10100 size_t size = ns.size(); 10101 10102 // in general, all axes generate elements in a particular order, but there is no order guarantee if axis is applied to two nodes 10103 if (axis != axis_self && size != 0) ns.set_type(xpath_node_set::type_unsorted); 10104 10105 step_fill(ns, *it, stack.result, once, v); 10106 if (_right) apply_predicates(ns, size, stack, eval); 10107 } 10108 } 10109 else 10110 { 10111 step_fill(ns, c.n, stack.result, once, v); 10112 if (_right) apply_predicates(ns, 0, stack, eval); 10113 } 10114 10115 // child, attribute and self axes always generate unique set of nodes 10116 // for other axis, if the set stayed sorted, it stayed unique because the traversal algorithms do not visit the same node twice 10117 if (axis != axis_child && axis != axis_attribute && axis != axis_self && ns.type() == xpath_node_set::type_unsorted) 10118 ns.remove_duplicates(); 10119 10120 return ns; 10121 } 10122 10123 public: xpath_ast_node(ast_type_t type,xpath_value_type rettype_,const char_t * value)10124 xpath_ast_node(ast_type_t type, xpath_value_type rettype_, const char_t* value): 10125 _type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(0), _right(0), _next(0) 10126 { 10127 assert(type == ast_string_constant); 10128 _data.string = value; 10129 } 10130 xpath_ast_node(ast_type_t type,xpath_value_type rettype_,double value)10131 xpath_ast_node(ast_type_t type, xpath_value_type rettype_, double value): 10132 _type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(0), _right(0), _next(0) 10133 { 10134 assert(type == ast_number_constant); 10135 _data.number = value; 10136 } 10137 xpath_ast_node(ast_type_t type,xpath_value_type rettype_,xpath_variable * value)10138 xpath_ast_node(ast_type_t type, xpath_value_type rettype_, xpath_variable* value): 10139 _type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(0), _right(0), _next(0) 10140 { 10141 assert(type == ast_variable); 10142 _data.variable = value; 10143 } 10144 xpath_ast_node(ast_type_t type,xpath_value_type rettype_,xpath_ast_node * left=0,xpath_ast_node * right=0)10145 xpath_ast_node(ast_type_t type, xpath_value_type rettype_, xpath_ast_node* left = 0, xpath_ast_node* right = 0): 10146 _type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(left), _right(right), _next(0) 10147 { 10148 } 10149 xpath_ast_node(ast_type_t type,xpath_ast_node * left,axis_t axis,nodetest_t test,const char_t * contents)10150 xpath_ast_node(ast_type_t type, xpath_ast_node* left, axis_t axis, nodetest_t test, const char_t* contents): 10151 _type(static_cast<char>(type)), _rettype(xpath_type_node_set), _axis(static_cast<char>(axis)), _test(static_cast<char>(test)), _left(left), _right(0), _next(0) 10152 { 10153 assert(type == ast_step); 10154 _data.nodetest = contents; 10155 } 10156 xpath_ast_node(ast_type_t type,xpath_ast_node * left,xpath_ast_node * right,predicate_t test)10157 xpath_ast_node(ast_type_t type, xpath_ast_node* left, xpath_ast_node* right, predicate_t test): 10158 _type(static_cast<char>(type)), _rettype(xpath_type_node_set), _axis(0), _test(static_cast<char>(test)), _left(left), _right(right), _next(0) 10159 { 10160 assert(type == ast_filter || type == ast_predicate); 10161 } 10162 set_next(xpath_ast_node * value)10163 void set_next(xpath_ast_node* value) 10164 { 10165 _next = value; 10166 } 10167 set_right(xpath_ast_node * value)10168 void set_right(xpath_ast_node* value) 10169 { 10170 _right = value; 10171 } 10172 eval_boolean(const xpath_context & c,const xpath_stack & stack)10173 bool eval_boolean(const xpath_context& c, const xpath_stack& stack) 10174 { 10175 switch (_type) 10176 { 10177 case ast_op_or: 10178 return _left->eval_boolean(c, stack) || _right->eval_boolean(c, stack); 10179 10180 case ast_op_and: 10181 return _left->eval_boolean(c, stack) && _right->eval_boolean(c, stack); 10182 10183 case ast_op_equal: 10184 return compare_eq(_left, _right, c, stack, equal_to()); 10185 10186 case ast_op_not_equal: 10187 return compare_eq(_left, _right, c, stack, not_equal_to()); 10188 10189 case ast_op_less: 10190 return compare_rel(_left, _right, c, stack, less()); 10191 10192 case ast_op_greater: 10193 return compare_rel(_right, _left, c, stack, less()); 10194 10195 case ast_op_less_or_equal: 10196 return compare_rel(_left, _right, c, stack, less_equal()); 10197 10198 case ast_op_greater_or_equal: 10199 return compare_rel(_right, _left, c, stack, less_equal()); 10200 10201 case ast_func_starts_with: 10202 { 10203 xpath_allocator_capture cr(stack.result); 10204 10205 xpath_string lr = _left->eval_string(c, stack); 10206 xpath_string rr = _right->eval_string(c, stack); 10207 10208 return starts_with(lr.c_str(), rr.c_str()); 10209 } 10210 10211 case ast_func_contains: 10212 { 10213 xpath_allocator_capture cr(stack.result); 10214 10215 xpath_string lr = _left->eval_string(c, stack); 10216 xpath_string rr = _right->eval_string(c, stack); 10217 10218 return find_substring(lr.c_str(), rr.c_str()) != 0; 10219 } 10220 10221 case ast_func_boolean: 10222 return _left->eval_boolean(c, stack); 10223 10224 case ast_func_not: 10225 return !_left->eval_boolean(c, stack); 10226 10227 case ast_func_true: 10228 return true; 10229 10230 case ast_func_false: 10231 return false; 10232 10233 case ast_func_lang: 10234 { 10235 if (c.n.attribute()) return false; 10236 10237 xpath_allocator_capture cr(stack.result); 10238 10239 xpath_string lang = _left->eval_string(c, stack); 10240 10241 for (xml_node n = c.n.node(); n; n = n.parent()) 10242 { 10243 xml_attribute a = n.attribute(PUGIXML_TEXT("xml:lang")); 10244 10245 if (a) 10246 { 10247 const char_t* value = a.value(); 10248 10249 // strnicmp / strncasecmp is not portable 10250 for (const char_t* lit = lang.c_str(); *lit; ++lit) 10251 { 10252 if (tolower_ascii(*lit) != tolower_ascii(*value)) return false; 10253 ++value; 10254 } 10255 10256 return *value == 0 || *value == '-'; 10257 } 10258 } 10259 10260 return false; 10261 } 10262 10263 case ast_opt_compare_attribute: 10264 { 10265 const char_t* value = (_right->_type == ast_string_constant) ? _right->_data.string : _right->_data.variable->get_string(); 10266 10267 xml_attribute attr = c.n.node().attribute(_left->_data.nodetest); 10268 10269 return attr && strequal(attr.value(), value) && is_xpath_attribute(attr.name()); 10270 } 10271 10272 case ast_variable: 10273 { 10274 assert(_rettype == _data.variable->type()); 10275 10276 if (_rettype == xpath_type_boolean) 10277 return _data.variable->get_boolean(); 10278 } 10279 10280 // fallthrough 10281 default: 10282 { 10283 switch (_rettype) 10284 { 10285 case xpath_type_number: 10286 return convert_number_to_boolean(eval_number(c, stack)); 10287 10288 case xpath_type_string: 10289 { 10290 xpath_allocator_capture cr(stack.result); 10291 10292 return !eval_string(c, stack).empty(); 10293 } 10294 10295 case xpath_type_node_set: 10296 { 10297 xpath_allocator_capture cr(stack.result); 10298 10299 return !eval_node_set(c, stack, nodeset_eval_any).empty(); 10300 } 10301 10302 default: 10303 assert(false && "Wrong expression for return type boolean"); // unreachable 10304 return false; 10305 } 10306 } 10307 } 10308 } 10309 eval_number(const xpath_context & c,const xpath_stack & stack)10310 double eval_number(const xpath_context& c, const xpath_stack& stack) 10311 { 10312 switch (_type) 10313 { 10314 case ast_op_add: 10315 return _left->eval_number(c, stack) + _right->eval_number(c, stack); 10316 10317 case ast_op_subtract: 10318 return _left->eval_number(c, stack) - _right->eval_number(c, stack); 10319 10320 case ast_op_multiply: 10321 return _left->eval_number(c, stack) * _right->eval_number(c, stack); 10322 10323 case ast_op_divide: 10324 return _left->eval_number(c, stack) / _right->eval_number(c, stack); 10325 10326 case ast_op_mod: 10327 return fmod(_left->eval_number(c, stack), _right->eval_number(c, stack)); 10328 10329 case ast_op_negate: 10330 return -_left->eval_number(c, stack); 10331 10332 case ast_number_constant: 10333 return _data.number; 10334 10335 case ast_func_last: 10336 return static_cast<double>(c.size); 10337 10338 case ast_func_position: 10339 return static_cast<double>(c.position); 10340 10341 case ast_func_count: 10342 { 10343 xpath_allocator_capture cr(stack.result); 10344 10345 return static_cast<double>(_left->eval_node_set(c, stack, nodeset_eval_all).size()); 10346 } 10347 10348 case ast_func_string_length_0: 10349 { 10350 xpath_allocator_capture cr(stack.result); 10351 10352 return static_cast<double>(string_value(c.n, stack.result).length()); 10353 } 10354 10355 case ast_func_string_length_1: 10356 { 10357 xpath_allocator_capture cr(stack.result); 10358 10359 return static_cast<double>(_left->eval_string(c, stack).length()); 10360 } 10361 10362 case ast_func_number_0: 10363 { 10364 xpath_allocator_capture cr(stack.result); 10365 10366 return convert_string_to_number(string_value(c.n, stack.result).c_str()); 10367 } 10368 10369 case ast_func_number_1: 10370 return _left->eval_number(c, stack); 10371 10372 case ast_func_sum: 10373 { 10374 xpath_allocator_capture cr(stack.result); 10375 10376 double r = 0; 10377 10378 xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_all); 10379 10380 for (const xpath_node* it = ns.begin(); it != ns.end(); ++it) 10381 { 10382 xpath_allocator_capture cri(stack.result); 10383 10384 r += convert_string_to_number(string_value(*it, stack.result).c_str()); 10385 } 10386 10387 return r; 10388 } 10389 10390 case ast_func_floor: 10391 { 10392 double r = _left->eval_number(c, stack); 10393 10394 return r == r ? floor(r) : r; 10395 } 10396 10397 case ast_func_ceiling: 10398 { 10399 double r = _left->eval_number(c, stack); 10400 10401 return r == r ? ceil(r) : r; 10402 } 10403 10404 case ast_func_round: 10405 return round_nearest_nzero(_left->eval_number(c, stack)); 10406 10407 case ast_variable: 10408 { 10409 assert(_rettype == _data.variable->type()); 10410 10411 if (_rettype == xpath_type_number) 10412 return _data.variable->get_number(); 10413 } 10414 10415 // fallthrough 10416 default: 10417 { 10418 switch (_rettype) 10419 { 10420 case xpath_type_boolean: 10421 return eval_boolean(c, stack) ? 1 : 0; 10422 10423 case xpath_type_string: 10424 { 10425 xpath_allocator_capture cr(stack.result); 10426 10427 return convert_string_to_number(eval_string(c, stack).c_str()); 10428 } 10429 10430 case xpath_type_node_set: 10431 { 10432 xpath_allocator_capture cr(stack.result); 10433 10434 return convert_string_to_number(eval_string(c, stack).c_str()); 10435 } 10436 10437 default: 10438 assert(false && "Wrong expression for return type number"); // unreachable 10439 return 0; 10440 } 10441 10442 } 10443 } 10444 } 10445 eval_string_concat(const xpath_context & c,const xpath_stack & stack)10446 xpath_string eval_string_concat(const xpath_context& c, const xpath_stack& stack) 10447 { 10448 assert(_type == ast_func_concat); 10449 10450 xpath_allocator_capture ct(stack.temp); 10451 10452 // count the string number 10453 size_t count = 1; 10454 for (xpath_ast_node* nc = _right; nc; nc = nc->_next) count++; 10455 10456 // allocate a buffer for temporary string objects 10457 xpath_string* buffer = static_cast<xpath_string*>(stack.temp->allocate(count * sizeof(xpath_string))); 10458 if (!buffer) return xpath_string(); 10459 10460 // evaluate all strings to temporary stack 10461 xpath_stack swapped_stack = {stack.temp, stack.result}; 10462 10463 buffer[0] = _left->eval_string(c, swapped_stack); 10464 10465 size_t pos = 1; 10466 for (xpath_ast_node* n = _right; n; n = n->_next, ++pos) buffer[pos] = n->eval_string(c, swapped_stack); 10467 assert(pos == count); 10468 10469 // get total length 10470 size_t length = 0; 10471 for (size_t i = 0; i < count; ++i) length += buffer[i].length(); 10472 10473 // create final string 10474 char_t* result = static_cast<char_t*>(stack.result->allocate((length + 1) * sizeof(char_t))); 10475 if (!result) return xpath_string(); 10476 10477 char_t* ri = result; 10478 10479 for (size_t j = 0; j < count; ++j) 10480 for (const char_t* bi = buffer[j].c_str(); *bi; ++bi) 10481 *ri++ = *bi; 10482 10483 *ri = 0; 10484 10485 return xpath_string::from_heap_preallocated(result, ri); 10486 } 10487 eval_string(const xpath_context & c,const xpath_stack & stack)10488 xpath_string eval_string(const xpath_context& c, const xpath_stack& stack) 10489 { 10490 switch (_type) 10491 { 10492 case ast_string_constant: 10493 return xpath_string::from_const(_data.string); 10494 10495 case ast_func_local_name_0: 10496 { 10497 xpath_node na = c.n; 10498 10499 return xpath_string::from_const(local_name(na)); 10500 } 10501 10502 case ast_func_local_name_1: 10503 { 10504 xpath_allocator_capture cr(stack.result); 10505 10506 xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_first); 10507 xpath_node na = ns.first(); 10508 10509 return xpath_string::from_const(local_name(na)); 10510 } 10511 10512 case ast_func_name_0: 10513 { 10514 xpath_node na = c.n; 10515 10516 return xpath_string::from_const(qualified_name(na)); 10517 } 10518 10519 case ast_func_name_1: 10520 { 10521 xpath_allocator_capture cr(stack.result); 10522 10523 xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_first); 10524 xpath_node na = ns.first(); 10525 10526 return xpath_string::from_const(qualified_name(na)); 10527 } 10528 10529 case ast_func_namespace_uri_0: 10530 { 10531 xpath_node na = c.n; 10532 10533 return xpath_string::from_const(namespace_uri(na)); 10534 } 10535 10536 case ast_func_namespace_uri_1: 10537 { 10538 xpath_allocator_capture cr(stack.result); 10539 10540 xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_first); 10541 xpath_node na = ns.first(); 10542 10543 return xpath_string::from_const(namespace_uri(na)); 10544 } 10545 10546 case ast_func_string_0: 10547 return string_value(c.n, stack.result); 10548 10549 case ast_func_string_1: 10550 return _left->eval_string(c, stack); 10551 10552 case ast_func_concat: 10553 return eval_string_concat(c, stack); 10554 10555 case ast_func_substring_before: 10556 { 10557 xpath_allocator_capture cr(stack.temp); 10558 10559 xpath_stack swapped_stack = {stack.temp, stack.result}; 10560 10561 xpath_string s = _left->eval_string(c, swapped_stack); 10562 xpath_string p = _right->eval_string(c, swapped_stack); 10563 10564 const char_t* pos = find_substring(s.c_str(), p.c_str()); 10565 10566 return pos ? xpath_string::from_heap(s.c_str(), pos, stack.result) : xpath_string(); 10567 } 10568 10569 case ast_func_substring_after: 10570 { 10571 xpath_allocator_capture cr(stack.temp); 10572 10573 xpath_stack swapped_stack = {stack.temp, stack.result}; 10574 10575 xpath_string s = _left->eval_string(c, swapped_stack); 10576 xpath_string p = _right->eval_string(c, swapped_stack); 10577 10578 const char_t* pos = find_substring(s.c_str(), p.c_str()); 10579 if (!pos) return xpath_string(); 10580 10581 const char_t* rbegin = pos + p.length(); 10582 const char_t* rend = s.c_str() + s.length(); 10583 10584 return s.uses_heap() ? xpath_string::from_heap(rbegin, rend, stack.result) : xpath_string::from_const(rbegin); 10585 } 10586 10587 case ast_func_substring_2: 10588 { 10589 xpath_allocator_capture cr(stack.temp); 10590 10591 xpath_stack swapped_stack = {stack.temp, stack.result}; 10592 10593 xpath_string s = _left->eval_string(c, swapped_stack); 10594 size_t s_length = s.length(); 10595 10596 double first = round_nearest(_right->eval_number(c, stack)); 10597 10598 if (is_nan(first)) return xpath_string(); // NaN 10599 else if (first >= s_length + 1) return xpath_string(); 10600 10601 size_t pos = first < 1 ? 1 : static_cast<size_t>(first); 10602 assert(1 <= pos && pos <= s_length + 1); 10603 10604 const char_t* rbegin = s.c_str() + (pos - 1); 10605 const char_t* rend = s.c_str() + s.length(); 10606 10607 return s.uses_heap() ? xpath_string::from_heap(rbegin, rend, stack.result) : xpath_string::from_const(rbegin); 10608 } 10609 10610 case ast_func_substring_3: 10611 { 10612 xpath_allocator_capture cr(stack.temp); 10613 10614 xpath_stack swapped_stack = {stack.temp, stack.result}; 10615 10616 xpath_string s = _left->eval_string(c, swapped_stack); 10617 size_t s_length = s.length(); 10618 10619 double first = round_nearest(_right->eval_number(c, stack)); 10620 double last = first + round_nearest(_right->_next->eval_number(c, stack)); 10621 10622 if (is_nan(first) || is_nan(last)) return xpath_string(); 10623 else if (first >= s_length + 1) return xpath_string(); 10624 else if (first >= last) return xpath_string(); 10625 else if (last < 1) return xpath_string(); 10626 10627 size_t pos = first < 1 ? 1 : static_cast<size_t>(first); 10628 size_t end = last >= s_length + 1 ? s_length + 1 : static_cast<size_t>(last); 10629 10630 assert(1 <= pos && pos <= end && end <= s_length + 1); 10631 const char_t* rbegin = s.c_str() + (pos - 1); 10632 const char_t* rend = s.c_str() + (end - 1); 10633 10634 return (end == s_length + 1 && !s.uses_heap()) ? xpath_string::from_const(rbegin) : xpath_string::from_heap(rbegin, rend, stack.result); 10635 } 10636 10637 case ast_func_normalize_space_0: 10638 { 10639 xpath_string s = string_value(c.n, stack.result); 10640 10641 char_t* begin = s.data(stack.result); 10642 if (!begin) return xpath_string(); 10643 10644 char_t* end = normalize_space(begin); 10645 10646 return xpath_string::from_heap_preallocated(begin, end); 10647 } 10648 10649 case ast_func_normalize_space_1: 10650 { 10651 xpath_string s = _left->eval_string(c, stack); 10652 10653 char_t* begin = s.data(stack.result); 10654 if (!begin) return xpath_string(); 10655 10656 char_t* end = normalize_space(begin); 10657 10658 return xpath_string::from_heap_preallocated(begin, end); 10659 } 10660 10661 case ast_func_translate: 10662 { 10663 xpath_allocator_capture cr(stack.temp); 10664 10665 xpath_stack swapped_stack = {stack.temp, stack.result}; 10666 10667 xpath_string s = _left->eval_string(c, stack); 10668 xpath_string from = _right->eval_string(c, swapped_stack); 10669 xpath_string to = _right->_next->eval_string(c, swapped_stack); 10670 10671 char_t* begin = s.data(stack.result); 10672 if (!begin) return xpath_string(); 10673 10674 char_t* end = translate(begin, from.c_str(), to.c_str(), to.length()); 10675 10676 return xpath_string::from_heap_preallocated(begin, end); 10677 } 10678 10679 case ast_opt_translate_table: 10680 { 10681 xpath_string s = _left->eval_string(c, stack); 10682 10683 char_t* begin = s.data(stack.result); 10684 if (!begin) return xpath_string(); 10685 10686 char_t* end = translate_table(begin, _data.table); 10687 10688 return xpath_string::from_heap_preallocated(begin, end); 10689 } 10690 10691 case ast_variable: 10692 { 10693 assert(_rettype == _data.variable->type()); 10694 10695 if (_rettype == xpath_type_string) 10696 return xpath_string::from_const(_data.variable->get_string()); 10697 } 10698 10699 // fallthrough 10700 default: 10701 { 10702 switch (_rettype) 10703 { 10704 case xpath_type_boolean: 10705 return xpath_string::from_const(eval_boolean(c, stack) ? PUGIXML_TEXT("true") : PUGIXML_TEXT("false")); 10706 10707 case xpath_type_number: 10708 return convert_number_to_string(eval_number(c, stack), stack.result); 10709 10710 case xpath_type_node_set: 10711 { 10712 xpath_allocator_capture cr(stack.temp); 10713 10714 xpath_stack swapped_stack = {stack.temp, stack.result}; 10715 10716 xpath_node_set_raw ns = eval_node_set(c, swapped_stack, nodeset_eval_first); 10717 return ns.empty() ? xpath_string() : string_value(ns.first(), stack.result); 10718 } 10719 10720 default: 10721 assert(false && "Wrong expression for return type string"); // unreachable 10722 return xpath_string(); 10723 } 10724 } 10725 } 10726 } 10727 eval_node_set(const xpath_context & c,const xpath_stack & stack,nodeset_eval_t eval)10728 xpath_node_set_raw eval_node_set(const xpath_context& c, const xpath_stack& stack, nodeset_eval_t eval) 10729 { 10730 switch (_type) 10731 { 10732 case ast_op_union: 10733 { 10734 xpath_allocator_capture cr(stack.temp); 10735 10736 xpath_stack swapped_stack = {stack.temp, stack.result}; 10737 10738 xpath_node_set_raw ls = _left->eval_node_set(c, swapped_stack, eval); 10739 xpath_node_set_raw rs = _right->eval_node_set(c, stack, eval); 10740 10741 // we can optimize merging two sorted sets, but this is a very rare operation, so don't bother 10742 rs.set_type(xpath_node_set::type_unsorted); 10743 10744 rs.append(ls.begin(), ls.end(), stack.result); 10745 rs.remove_duplicates(); 10746 10747 return rs; 10748 } 10749 10750 case ast_filter: 10751 { 10752 xpath_node_set_raw set = _left->eval_node_set(c, stack, _test == predicate_constant_one ? nodeset_eval_first : nodeset_eval_all); 10753 10754 // either expression is a number or it contains position() call; sort by document order 10755 if (_test != predicate_posinv) set.sort_do(); 10756 10757 bool once = eval_once(set.type(), eval); 10758 10759 apply_predicate(set, 0, stack, once); 10760 10761 return set; 10762 } 10763 10764 case ast_func_id: 10765 return xpath_node_set_raw(); 10766 10767 case ast_step: 10768 { 10769 switch (_axis) 10770 { 10771 case axis_ancestor: 10772 return step_do(c, stack, eval, axis_to_type<axis_ancestor>()); 10773 10774 case axis_ancestor_or_self: 10775 return step_do(c, stack, eval, axis_to_type<axis_ancestor_or_self>()); 10776 10777 case axis_attribute: 10778 return step_do(c, stack, eval, axis_to_type<axis_attribute>()); 10779 10780 case axis_child: 10781 return step_do(c, stack, eval, axis_to_type<axis_child>()); 10782 10783 case axis_descendant: 10784 return step_do(c, stack, eval, axis_to_type<axis_descendant>()); 10785 10786 case axis_descendant_or_self: 10787 return step_do(c, stack, eval, axis_to_type<axis_descendant_or_self>()); 10788 10789 case axis_following: 10790 return step_do(c, stack, eval, axis_to_type<axis_following>()); 10791 10792 case axis_following_sibling: 10793 return step_do(c, stack, eval, axis_to_type<axis_following_sibling>()); 10794 10795 case axis_namespace: 10796 // namespaced axis is not supported 10797 return xpath_node_set_raw(); 10798 10799 case axis_parent: 10800 return step_do(c, stack, eval, axis_to_type<axis_parent>()); 10801 10802 case axis_preceding: 10803 return step_do(c, stack, eval, axis_to_type<axis_preceding>()); 10804 10805 case axis_preceding_sibling: 10806 return step_do(c, stack, eval, axis_to_type<axis_preceding_sibling>()); 10807 10808 case axis_self: 10809 return step_do(c, stack, eval, axis_to_type<axis_self>()); 10810 10811 default: 10812 assert(false && "Unknown axis"); // unreachable 10813 return xpath_node_set_raw(); 10814 } 10815 } 10816 10817 case ast_step_root: 10818 { 10819 assert(!_right); // root step can't have any predicates 10820 10821 xpath_node_set_raw ns; 10822 10823 ns.set_type(xpath_node_set::type_sorted); 10824 10825 if (c.n.node()) ns.push_back(c.n.node().root(), stack.result); 10826 else if (c.n.attribute()) ns.push_back(c.n.parent().root(), stack.result); 10827 10828 return ns; 10829 } 10830 10831 case ast_variable: 10832 { 10833 assert(_rettype == _data.variable->type()); 10834 10835 if (_rettype == xpath_type_node_set) 10836 { 10837 const xpath_node_set& s = _data.variable->get_node_set(); 10838 10839 xpath_node_set_raw ns; 10840 10841 ns.set_type(s.type()); 10842 ns.append(s.begin(), s.end(), stack.result); 10843 10844 return ns; 10845 } 10846 } 10847 10848 // fallthrough 10849 default: 10850 assert(false && "Wrong expression for return type node set"); // unreachable 10851 return xpath_node_set_raw(); 10852 } 10853 } 10854 optimize(xpath_allocator * alloc)10855 void optimize(xpath_allocator* alloc) 10856 { 10857 if (_left) 10858 _left->optimize(alloc); 10859 10860 if (_right) 10861 _right->optimize(alloc); 10862 10863 if (_next) 10864 _next->optimize(alloc); 10865 10866 optimize_self(alloc); 10867 } 10868 optimize_self(xpath_allocator * alloc)10869 void optimize_self(xpath_allocator* alloc) 10870 { 10871 // Rewrite [position()=expr] with [expr] 10872 // Note that this step has to go before classification to recognize [position()=1] 10873 if ((_type == ast_filter || _type == ast_predicate) && 10874 _right->_type == ast_op_equal && _right->_left->_type == ast_func_position && _right->_right->_rettype == xpath_type_number) 10875 { 10876 _right = _right->_right; 10877 } 10878 10879 // Classify filter/predicate ops to perform various optimizations during evaluation 10880 if (_type == ast_filter || _type == ast_predicate) 10881 { 10882 assert(_test == predicate_default); 10883 10884 if (_right->_type == ast_number_constant && _right->_data.number == 1.0) 10885 _test = predicate_constant_one; 10886 else if (_right->_rettype == xpath_type_number && (_right->_type == ast_number_constant || _right->_type == ast_variable || _right->_type == ast_func_last)) 10887 _test = predicate_constant; 10888 else if (_right->_rettype != xpath_type_number && _right->is_posinv_expr()) 10889 _test = predicate_posinv; 10890 } 10891 10892 // Rewrite descendant-or-self::node()/child::foo with descendant::foo 10893 // The former is a full form of //foo, the latter is much faster since it executes the node test immediately 10894 // Do a similar kind of rewrite for self/descendant/descendant-or-self axes 10895 // Note that we only rewrite positionally invariant steps (//foo[1] != /descendant::foo[1]) 10896 if (_type == ast_step && (_axis == axis_child || _axis == axis_self || _axis == axis_descendant || _axis == axis_descendant_or_self) && _left && 10897 _left->_type == ast_step && _left->_axis == axis_descendant_or_self && _left->_test == nodetest_type_node && !_left->_right && 10898 is_posinv_step()) 10899 { 10900 if (_axis == axis_child || _axis == axis_descendant) 10901 _axis = axis_descendant; 10902 else 10903 _axis = axis_descendant_or_self; 10904 10905 _left = _left->_left; 10906 } 10907 10908 // Use optimized lookup table implementation for translate() with constant arguments 10909 if (_type == ast_func_translate && _right->_type == ast_string_constant && _right->_next->_type == ast_string_constant) 10910 { 10911 unsigned char* table = translate_table_generate(alloc, _right->_data.string, _right->_next->_data.string); 10912 10913 if (table) 10914 { 10915 _type = ast_opt_translate_table; 10916 _data.table = table; 10917 } 10918 } 10919 10920 // Use optimized path for @attr = 'value' or @attr = $value 10921 if (_type == ast_op_equal && 10922 _left->_type == ast_step && _left->_axis == axis_attribute && _left->_test == nodetest_name && !_left->_left && !_left->_right && 10923 (_right->_type == ast_string_constant || (_right->_type == ast_variable && _right->_rettype == xpath_type_string))) 10924 { 10925 _type = ast_opt_compare_attribute; 10926 } 10927 } 10928 is_posinv_expr() const10929 bool is_posinv_expr() const 10930 { 10931 switch (_type) 10932 { 10933 case ast_func_position: 10934 case ast_func_last: 10935 return false; 10936 10937 case ast_string_constant: 10938 case ast_number_constant: 10939 case ast_variable: 10940 return true; 10941 10942 case ast_step: 10943 case ast_step_root: 10944 return true; 10945 10946 case ast_predicate: 10947 case ast_filter: 10948 return true; 10949 10950 default: 10951 if (_left && !_left->is_posinv_expr()) return false; 10952 10953 for (xpath_ast_node* n = _right; n; n = n->_next) 10954 if (!n->is_posinv_expr()) return false; 10955 10956 return true; 10957 } 10958 } 10959 is_posinv_step() const10960 bool is_posinv_step() const 10961 { 10962 assert(_type == ast_step); 10963 10964 for (xpath_ast_node* n = _right; n; n = n->_next) 10965 { 10966 assert(n->_type == ast_predicate); 10967 10968 if (n->_test != predicate_posinv) 10969 return false; 10970 } 10971 10972 return true; 10973 } 10974 rettype() const10975 xpath_value_type rettype() const 10976 { 10977 return static_cast<xpath_value_type>(_rettype); 10978 } 10979 }; 10980 10981 struct xpath_parser 10982 { 10983 xpath_allocator* _alloc; 10984 xpath_lexer _lexer; 10985 10986 const char_t* _query; 10987 xpath_variable_set* _variables; 10988 10989 xpath_parse_result* _result; 10990 10991 char_t _scratch[32]; 10992 errorxpath_parser10993 xpath_ast_node* error(const char* message) 10994 { 10995 _result->error = message; 10996 _result->offset = _lexer.current_pos() - _query; 10997 10998 return 0; 10999 } 11000 error_oomxpath_parser11001 xpath_ast_node* error_oom() 11002 { 11003 assert(_alloc->_error); 11004 *_alloc->_error = true; 11005 11006 return 0; 11007 } 11008 alloc_nodexpath_parser11009 void* alloc_node() 11010 { 11011 return _alloc->allocate(sizeof(xpath_ast_node)); 11012 } 11013 alloc_nodexpath_parser11014 xpath_ast_node* alloc_node(ast_type_t type, xpath_value_type rettype, const char_t* value) 11015 { 11016 void* memory = alloc_node(); 11017 return memory ? new (memory) xpath_ast_node(type, rettype, value) : 0; 11018 } 11019 alloc_nodexpath_parser11020 xpath_ast_node* alloc_node(ast_type_t type, xpath_value_type rettype, double value) 11021 { 11022 void* memory = alloc_node(); 11023 return memory ? new (memory) xpath_ast_node(type, rettype, value) : 0; 11024 } 11025 alloc_nodexpath_parser11026 xpath_ast_node* alloc_node(ast_type_t type, xpath_value_type rettype, xpath_variable* value) 11027 { 11028 void* memory = alloc_node(); 11029 return memory ? new (memory) xpath_ast_node(type, rettype, value) : 0; 11030 } 11031 alloc_nodexpath_parser11032 xpath_ast_node* alloc_node(ast_type_t type, xpath_value_type rettype, xpath_ast_node* left = 0, xpath_ast_node* right = 0) 11033 { 11034 void* memory = alloc_node(); 11035 return memory ? new (memory) xpath_ast_node(type, rettype, left, right) : 0; 11036 } 11037 alloc_nodexpath_parser11038 xpath_ast_node* alloc_node(ast_type_t type, xpath_ast_node* left, axis_t axis, nodetest_t test, const char_t* contents) 11039 { 11040 void* memory = alloc_node(); 11041 return memory ? new (memory) xpath_ast_node(type, left, axis, test, contents) : 0; 11042 } 11043 alloc_nodexpath_parser11044 xpath_ast_node* alloc_node(ast_type_t type, xpath_ast_node* left, xpath_ast_node* right, predicate_t test) 11045 { 11046 void* memory = alloc_node(); 11047 return memory ? new (memory) xpath_ast_node(type, left, right, test) : 0; 11048 } 11049 alloc_stringxpath_parser11050 const char_t* alloc_string(const xpath_lexer_string& value) 11051 { 11052 if (!value.begin) 11053 return PUGIXML_TEXT(""); 11054 11055 size_t length = static_cast<size_t>(value.end - value.begin); 11056 11057 char_t* c = static_cast<char_t*>(_alloc->allocate((length + 1) * sizeof(char_t))); 11058 if (!c) return 0; 11059 11060 memcpy(c, value.begin, length * sizeof(char_t)); 11061 c[length] = 0; 11062 11063 return c; 11064 } 11065 parse_functionxpath_parser11066 xpath_ast_node* parse_function(const xpath_lexer_string& name, size_t argc, xpath_ast_node* args[2]) 11067 { 11068 switch (name.begin[0]) 11069 { 11070 case 'b': 11071 if (name == PUGIXML_TEXT("boolean") && argc == 1) 11072 return alloc_node(ast_func_boolean, xpath_type_boolean, args[0]); 11073 11074 break; 11075 11076 case 'c': 11077 if (name == PUGIXML_TEXT("count") && argc == 1) 11078 { 11079 if (args[0]->rettype() != xpath_type_node_set) return error("Function has to be applied to node set"); 11080 return alloc_node(ast_func_count, xpath_type_number, args[0]); 11081 } 11082 else if (name == PUGIXML_TEXT("contains") && argc == 2) 11083 return alloc_node(ast_func_contains, xpath_type_boolean, args[0], args[1]); 11084 else if (name == PUGIXML_TEXT("concat") && argc >= 2) 11085 return alloc_node(ast_func_concat, xpath_type_string, args[0], args[1]); 11086 else if (name == PUGIXML_TEXT("ceiling") && argc == 1) 11087 return alloc_node(ast_func_ceiling, xpath_type_number, args[0]); 11088 11089 break; 11090 11091 case 'f': 11092 if (name == PUGIXML_TEXT("false") && argc == 0) 11093 return alloc_node(ast_func_false, xpath_type_boolean); 11094 else if (name == PUGIXML_TEXT("floor") && argc == 1) 11095 return alloc_node(ast_func_floor, xpath_type_number, args[0]); 11096 11097 break; 11098 11099 case 'i': 11100 if (name == PUGIXML_TEXT("id") && argc == 1) 11101 return alloc_node(ast_func_id, xpath_type_node_set, args[0]); 11102 11103 break; 11104 11105 case 'l': 11106 if (name == PUGIXML_TEXT("last") && argc == 0) 11107 return alloc_node(ast_func_last, xpath_type_number); 11108 else if (name == PUGIXML_TEXT("lang") && argc == 1) 11109 return alloc_node(ast_func_lang, xpath_type_boolean, args[0]); 11110 else if (name == PUGIXML_TEXT("local-name") && argc <= 1) 11111 { 11112 if (argc == 1 && args[0]->rettype() != xpath_type_node_set) return error("Function has to be applied to node set"); 11113 return alloc_node(argc == 0 ? ast_func_local_name_0 : ast_func_local_name_1, xpath_type_string, args[0]); 11114 } 11115 11116 break; 11117 11118 case 'n': 11119 if (name == PUGIXML_TEXT("name") && argc <= 1) 11120 { 11121 if (argc == 1 && args[0]->rettype() != xpath_type_node_set) return error("Function has to be applied to node set"); 11122 return alloc_node(argc == 0 ? ast_func_name_0 : ast_func_name_1, xpath_type_string, args[0]); 11123 } 11124 else if (name == PUGIXML_TEXT("namespace-uri") && argc <= 1) 11125 { 11126 if (argc == 1 && args[0]->rettype() != xpath_type_node_set) return error("Function has to be applied to node set"); 11127 return alloc_node(argc == 0 ? ast_func_namespace_uri_0 : ast_func_namespace_uri_1, xpath_type_string, args[0]); 11128 } 11129 else if (name == PUGIXML_TEXT("normalize-space") && argc <= 1) 11130 return alloc_node(argc == 0 ? ast_func_normalize_space_0 : ast_func_normalize_space_1, xpath_type_string, args[0], args[1]); 11131 else if (name == PUGIXML_TEXT("not") && argc == 1) 11132 return alloc_node(ast_func_not, xpath_type_boolean, args[0]); 11133 else if (name == PUGIXML_TEXT("number") && argc <= 1) 11134 return alloc_node(argc == 0 ? ast_func_number_0 : ast_func_number_1, xpath_type_number, args[0]); 11135 11136 break; 11137 11138 case 'p': 11139 if (name == PUGIXML_TEXT("position") && argc == 0) 11140 return alloc_node(ast_func_position, xpath_type_number); 11141 11142 break; 11143 11144 case 'r': 11145 if (name == PUGIXML_TEXT("round") && argc == 1) 11146 return alloc_node(ast_func_round, xpath_type_number, args[0]); 11147 11148 break; 11149 11150 case 's': 11151 if (name == PUGIXML_TEXT("string") && argc <= 1) 11152 return alloc_node(argc == 0 ? ast_func_string_0 : ast_func_string_1, xpath_type_string, args[0]); 11153 else if (name == PUGIXML_TEXT("string-length") && argc <= 1) 11154 return alloc_node(argc == 0 ? ast_func_string_length_0 : ast_func_string_length_1, xpath_type_number, args[0]); 11155 else if (name == PUGIXML_TEXT("starts-with") && argc == 2) 11156 return alloc_node(ast_func_starts_with, xpath_type_boolean, args[0], args[1]); 11157 else if (name == PUGIXML_TEXT("substring-before") && argc == 2) 11158 return alloc_node(ast_func_substring_before, xpath_type_string, args[0], args[1]); 11159 else if (name == PUGIXML_TEXT("substring-after") && argc == 2) 11160 return alloc_node(ast_func_substring_after, xpath_type_string, args[0], args[1]); 11161 else if (name == PUGIXML_TEXT("substring") && (argc == 2 || argc == 3)) 11162 return alloc_node(argc == 2 ? ast_func_substring_2 : ast_func_substring_3, xpath_type_string, args[0], args[1]); 11163 else if (name == PUGIXML_TEXT("sum") && argc == 1) 11164 { 11165 if (args[0]->rettype() != xpath_type_node_set) return error("Function has to be applied to node set"); 11166 return alloc_node(ast_func_sum, xpath_type_number, args[0]); 11167 } 11168 11169 break; 11170 11171 case 't': 11172 if (name == PUGIXML_TEXT("translate") && argc == 3) 11173 return alloc_node(ast_func_translate, xpath_type_string, args[0], args[1]); 11174 else if (name == PUGIXML_TEXT("true") && argc == 0) 11175 return alloc_node(ast_func_true, xpath_type_boolean); 11176 11177 break; 11178 11179 default: 11180 break; 11181 } 11182 11183 return error("Unrecognized function or wrong parameter count"); 11184 } 11185 parse_axis_namexpath_parser11186 axis_t parse_axis_name(const xpath_lexer_string& name, bool& specified) 11187 { 11188 specified = true; 11189 11190 switch (name.begin[0]) 11191 { 11192 case 'a': 11193 if (name == PUGIXML_TEXT("ancestor")) 11194 return axis_ancestor; 11195 else if (name == PUGIXML_TEXT("ancestor-or-self")) 11196 return axis_ancestor_or_self; 11197 else if (name == PUGIXML_TEXT("attribute")) 11198 return axis_attribute; 11199 11200 break; 11201 11202 case 'c': 11203 if (name == PUGIXML_TEXT("child")) 11204 return axis_child; 11205 11206 break; 11207 11208 case 'd': 11209 if (name == PUGIXML_TEXT("descendant")) 11210 return axis_descendant; 11211 else if (name == PUGIXML_TEXT("descendant-or-self")) 11212 return axis_descendant_or_self; 11213 11214 break; 11215 11216 case 'f': 11217 if (name == PUGIXML_TEXT("following")) 11218 return axis_following; 11219 else if (name == PUGIXML_TEXT("following-sibling")) 11220 return axis_following_sibling; 11221 11222 break; 11223 11224 case 'n': 11225 if (name == PUGIXML_TEXT("namespace")) 11226 return axis_namespace; 11227 11228 break; 11229 11230 case 'p': 11231 if (name == PUGIXML_TEXT("parent")) 11232 return axis_parent; 11233 else if (name == PUGIXML_TEXT("preceding")) 11234 return axis_preceding; 11235 else if (name == PUGIXML_TEXT("preceding-sibling")) 11236 return axis_preceding_sibling; 11237 11238 break; 11239 11240 case 's': 11241 if (name == PUGIXML_TEXT("self")) 11242 return axis_self; 11243 11244 break; 11245 11246 default: 11247 break; 11248 } 11249 11250 specified = false; 11251 return axis_child; 11252 } 11253 parse_node_test_typexpath_parser11254 nodetest_t parse_node_test_type(const xpath_lexer_string& name) 11255 { 11256 switch (name.begin[0]) 11257 { 11258 case 'c': 11259 if (name == PUGIXML_TEXT("comment")) 11260 return nodetest_type_comment; 11261 11262 break; 11263 11264 case 'n': 11265 if (name == PUGIXML_TEXT("node")) 11266 return nodetest_type_node; 11267 11268 break; 11269 11270 case 'p': 11271 if (name == PUGIXML_TEXT("processing-instruction")) 11272 return nodetest_type_pi; 11273 11274 break; 11275 11276 case 't': 11277 if (name == PUGIXML_TEXT("text")) 11278 return nodetest_type_text; 11279 11280 break; 11281 11282 default: 11283 break; 11284 } 11285 11286 return nodetest_none; 11287 } 11288 11289 // PrimaryExpr ::= VariableReference | '(' Expr ')' | Literal | Number | FunctionCall parse_primary_expressionxpath_parser11290 xpath_ast_node* parse_primary_expression() 11291 { 11292 switch (_lexer.current()) 11293 { 11294 case lex_var_ref: 11295 { 11296 xpath_lexer_string name = _lexer.contents(); 11297 11298 if (!_variables) 11299 return error("Unknown variable: variable set is not provided"); 11300 11301 xpath_variable* var = 0; 11302 if (!get_variable_scratch(_scratch, _variables, name.begin, name.end, &var)) 11303 return error_oom(); 11304 11305 if (!var) 11306 return error("Unknown variable: variable set does not contain the given name"); 11307 11308 _lexer.next(); 11309 11310 return alloc_node(ast_variable, var->type(), var); 11311 } 11312 11313 case lex_open_brace: 11314 { 11315 _lexer.next(); 11316 11317 xpath_ast_node* n = parse_expression(); 11318 if (!n) return 0; 11319 11320 if (_lexer.current() != lex_close_brace) 11321 return error("Expected ')' to match an opening '('"); 11322 11323 _lexer.next(); 11324 11325 return n; 11326 } 11327 11328 case lex_quoted_string: 11329 { 11330 const char_t* value = alloc_string(_lexer.contents()); 11331 if (!value) return 0; 11332 11333 _lexer.next(); 11334 11335 return alloc_node(ast_string_constant, xpath_type_string, value); 11336 } 11337 11338 case lex_number: 11339 { 11340 double value = 0; 11341 11342 if (!convert_string_to_number_scratch(_scratch, _lexer.contents().begin, _lexer.contents().end, &value)) 11343 return error_oom(); 11344 11345 _lexer.next(); 11346 11347 return alloc_node(ast_number_constant, xpath_type_number, value); 11348 } 11349 11350 case lex_string: 11351 { 11352 xpath_ast_node* args[2] = {0}; 11353 size_t argc = 0; 11354 11355 xpath_lexer_string function = _lexer.contents(); 11356 _lexer.next(); 11357 11358 xpath_ast_node* last_arg = 0; 11359 11360 if (_lexer.current() != lex_open_brace) 11361 return error("Unrecognized function call"); 11362 _lexer.next(); 11363 11364 while (_lexer.current() != lex_close_brace) 11365 { 11366 if (argc > 0) 11367 { 11368 if (_lexer.current() != lex_comma) 11369 return error("No comma between function arguments"); 11370 _lexer.next(); 11371 } 11372 11373 xpath_ast_node* n = parse_expression(); 11374 if (!n) return 0; 11375 11376 if (argc < 2) args[argc] = n; 11377 else last_arg->set_next(n); 11378 11379 argc++; 11380 last_arg = n; 11381 } 11382 11383 _lexer.next(); 11384 11385 return parse_function(function, argc, args); 11386 } 11387 11388 default: 11389 return error("Unrecognizable primary expression"); 11390 } 11391 } 11392 11393 // FilterExpr ::= PrimaryExpr | FilterExpr Predicate 11394 // Predicate ::= '[' PredicateExpr ']' 11395 // PredicateExpr ::= Expr parse_filter_expressionxpath_parser11396 xpath_ast_node* parse_filter_expression() 11397 { 11398 xpath_ast_node* n = parse_primary_expression(); 11399 if (!n) return 0; 11400 11401 while (_lexer.current() == lex_open_square_brace) 11402 { 11403 _lexer.next(); 11404 11405 if (n->rettype() != xpath_type_node_set) 11406 return error("Predicate has to be applied to node set"); 11407 11408 xpath_ast_node* expr = parse_expression(); 11409 if (!expr) return 0; 11410 11411 n = alloc_node(ast_filter, n, expr, predicate_default); 11412 if (!n) return 0; 11413 11414 if (_lexer.current() != lex_close_square_brace) 11415 return error("Expected ']' to match an opening '['"); 11416 11417 _lexer.next(); 11418 } 11419 11420 return n; 11421 } 11422 11423 // Step ::= AxisSpecifier NodeTest Predicate* | AbbreviatedStep 11424 // AxisSpecifier ::= AxisName '::' | '@'? 11425 // NodeTest ::= NameTest | NodeType '(' ')' | 'processing-instruction' '(' Literal ')' 11426 // NameTest ::= '*' | NCName ':' '*' | QName 11427 // AbbreviatedStep ::= '.' | '..' parse_stepxpath_parser11428 xpath_ast_node* parse_step(xpath_ast_node* set) 11429 { 11430 if (set && set->rettype() != xpath_type_node_set) 11431 return error("Step has to be applied to node set"); 11432 11433 bool axis_specified = false; 11434 axis_t axis = axis_child; // implied child axis 11435 11436 if (_lexer.current() == lex_axis_attribute) 11437 { 11438 axis = axis_attribute; 11439 axis_specified = true; 11440 11441 _lexer.next(); 11442 } 11443 else if (_lexer.current() == lex_dot) 11444 { 11445 _lexer.next(); 11446 11447 if (_lexer.current() == lex_open_square_brace) 11448 return error("Predicates are not allowed after an abbreviated step"); 11449 11450 return alloc_node(ast_step, set, axis_self, nodetest_type_node, 0); 11451 } 11452 else if (_lexer.current() == lex_double_dot) 11453 { 11454 _lexer.next(); 11455 11456 if (_lexer.current() == lex_open_square_brace) 11457 return error("Predicates are not allowed after an abbreviated step"); 11458 11459 return alloc_node(ast_step, set, axis_parent, nodetest_type_node, 0); 11460 } 11461 11462 nodetest_t nt_type = nodetest_none; 11463 xpath_lexer_string nt_name; 11464 11465 if (_lexer.current() == lex_string) 11466 { 11467 // node name test 11468 nt_name = _lexer.contents(); 11469 _lexer.next(); 11470 11471 // was it an axis name? 11472 if (_lexer.current() == lex_double_colon) 11473 { 11474 // parse axis name 11475 if (axis_specified) 11476 return error("Two axis specifiers in one step"); 11477 11478 axis = parse_axis_name(nt_name, axis_specified); 11479 11480 if (!axis_specified) 11481 return error("Unknown axis"); 11482 11483 // read actual node test 11484 _lexer.next(); 11485 11486 if (_lexer.current() == lex_multiply) 11487 { 11488 nt_type = nodetest_all; 11489 nt_name = xpath_lexer_string(); 11490 _lexer.next(); 11491 } 11492 else if (_lexer.current() == lex_string) 11493 { 11494 nt_name = _lexer.contents(); 11495 _lexer.next(); 11496 } 11497 else 11498 { 11499 return error("Unrecognized node test"); 11500 } 11501 } 11502 11503 if (nt_type == nodetest_none) 11504 { 11505 // node type test or processing-instruction 11506 if (_lexer.current() == lex_open_brace) 11507 { 11508 _lexer.next(); 11509 11510 if (_lexer.current() == lex_close_brace) 11511 { 11512 _lexer.next(); 11513 11514 nt_type = parse_node_test_type(nt_name); 11515 11516 if (nt_type == nodetest_none) 11517 return error("Unrecognized node type"); 11518 11519 nt_name = xpath_lexer_string(); 11520 } 11521 else if (nt_name == PUGIXML_TEXT("processing-instruction")) 11522 { 11523 if (_lexer.current() != lex_quoted_string) 11524 return error("Only literals are allowed as arguments to processing-instruction()"); 11525 11526 nt_type = nodetest_pi; 11527 nt_name = _lexer.contents(); 11528 _lexer.next(); 11529 11530 if (_lexer.current() != lex_close_brace) 11531 return error("Unmatched brace near processing-instruction()"); 11532 _lexer.next(); 11533 } 11534 else 11535 { 11536 return error("Unmatched brace near node type test"); 11537 } 11538 } 11539 // QName or NCName:* 11540 else 11541 { 11542 if (nt_name.end - nt_name.begin > 2 && nt_name.end[-2] == ':' && nt_name.end[-1] == '*') // NCName:* 11543 { 11544 nt_name.end--; // erase * 11545 11546 nt_type = nodetest_all_in_namespace; 11547 } 11548 else 11549 { 11550 nt_type = nodetest_name; 11551 } 11552 } 11553 } 11554 } 11555 else if (_lexer.current() == lex_multiply) 11556 { 11557 nt_type = nodetest_all; 11558 _lexer.next(); 11559 } 11560 else 11561 { 11562 return error("Unrecognized node test"); 11563 } 11564 11565 const char_t* nt_name_copy = alloc_string(nt_name); 11566 if (!nt_name_copy) return 0; 11567 11568 xpath_ast_node* n = alloc_node(ast_step, set, axis, nt_type, nt_name_copy); 11569 if (!n) return 0; 11570 11571 xpath_ast_node* last = 0; 11572 11573 while (_lexer.current() == lex_open_square_brace) 11574 { 11575 _lexer.next(); 11576 11577 xpath_ast_node* expr = parse_expression(); 11578 if (!expr) return 0; 11579 11580 xpath_ast_node* pred = alloc_node(ast_predicate, 0, expr, predicate_default); 11581 if (!pred) return 0; 11582 11583 if (_lexer.current() != lex_close_square_brace) 11584 return error("Expected ']' to match an opening '['"); 11585 _lexer.next(); 11586 11587 if (last) last->set_next(pred); 11588 else n->set_right(pred); 11589 11590 last = pred; 11591 } 11592 11593 return n; 11594 } 11595 11596 // RelativeLocationPath ::= Step | RelativeLocationPath '/' Step | RelativeLocationPath '//' Step parse_relative_location_pathxpath_parser11597 xpath_ast_node* parse_relative_location_path(xpath_ast_node* set) 11598 { 11599 xpath_ast_node* n = parse_step(set); 11600 if (!n) return 0; 11601 11602 while (_lexer.current() == lex_slash || _lexer.current() == lex_double_slash) 11603 { 11604 lexeme_t l = _lexer.current(); 11605 _lexer.next(); 11606 11607 if (l == lex_double_slash) 11608 { 11609 n = alloc_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0); 11610 if (!n) return 0; 11611 } 11612 11613 n = parse_step(n); 11614 if (!n) return 0; 11615 } 11616 11617 return n; 11618 } 11619 11620 // LocationPath ::= RelativeLocationPath | AbsoluteLocationPath 11621 // AbsoluteLocationPath ::= '/' RelativeLocationPath? | '//' RelativeLocationPath parse_location_pathxpath_parser11622 xpath_ast_node* parse_location_path() 11623 { 11624 if (_lexer.current() == lex_slash) 11625 { 11626 _lexer.next(); 11627 11628 xpath_ast_node* n = alloc_node(ast_step_root, xpath_type_node_set); 11629 if (!n) return 0; 11630 11631 // relative location path can start from axis_attribute, dot, double_dot, multiply and string lexemes; any other lexeme means standalone root path 11632 lexeme_t l = _lexer.current(); 11633 11634 if (l == lex_string || l == lex_axis_attribute || l == lex_dot || l == lex_double_dot || l == lex_multiply) 11635 return parse_relative_location_path(n); 11636 else 11637 return n; 11638 } 11639 else if (_lexer.current() == lex_double_slash) 11640 { 11641 _lexer.next(); 11642 11643 xpath_ast_node* n = alloc_node(ast_step_root, xpath_type_node_set); 11644 if (!n) return 0; 11645 11646 n = alloc_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0); 11647 if (!n) return 0; 11648 11649 return parse_relative_location_path(n); 11650 } 11651 11652 // else clause moved outside of if because of bogus warning 'control may reach end of non-void function being inlined' in gcc 4.0.1 11653 return parse_relative_location_path(0); 11654 } 11655 11656 // PathExpr ::= LocationPath 11657 // | FilterExpr 11658 // | FilterExpr '/' RelativeLocationPath 11659 // | FilterExpr '//' RelativeLocationPath 11660 // UnionExpr ::= PathExpr | UnionExpr '|' PathExpr 11661 // UnaryExpr ::= UnionExpr | '-' UnaryExpr parse_path_or_unary_expressionxpath_parser11662 xpath_ast_node* parse_path_or_unary_expression() 11663 { 11664 // Clarification. 11665 // PathExpr begins with either LocationPath or FilterExpr. 11666 // FilterExpr begins with PrimaryExpr 11667 // PrimaryExpr begins with '$' in case of it being a variable reference, 11668 // '(' in case of it being an expression, string literal, number constant or 11669 // function call. 11670 if (_lexer.current() == lex_var_ref || _lexer.current() == lex_open_brace || 11671 _lexer.current() == lex_quoted_string || _lexer.current() == lex_number || 11672 _lexer.current() == lex_string) 11673 { 11674 if (_lexer.current() == lex_string) 11675 { 11676 // This is either a function call, or not - if not, we shall proceed with location path 11677 const char_t* state = _lexer.state(); 11678 11679 while (PUGI__IS_CHARTYPE(*state, ct_space)) ++state; 11680 11681 if (*state != '(') 11682 return parse_location_path(); 11683 11684 // This looks like a function call; however this still can be a node-test. Check it. 11685 if (parse_node_test_type(_lexer.contents()) != nodetest_none) 11686 return parse_location_path(); 11687 } 11688 11689 xpath_ast_node* n = parse_filter_expression(); 11690 if (!n) return 0; 11691 11692 if (_lexer.current() == lex_slash || _lexer.current() == lex_double_slash) 11693 { 11694 lexeme_t l = _lexer.current(); 11695 _lexer.next(); 11696 11697 if (l == lex_double_slash) 11698 { 11699 if (n->rettype() != xpath_type_node_set) 11700 return error("Step has to be applied to node set"); 11701 11702 n = alloc_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0); 11703 if (!n) return 0; 11704 } 11705 11706 // select from location path 11707 return parse_relative_location_path(n); 11708 } 11709 11710 return n; 11711 } 11712 else if (_lexer.current() == lex_minus) 11713 { 11714 _lexer.next(); 11715 11716 // precedence 7+ - only parses union expressions 11717 xpath_ast_node* n = parse_expression(7); 11718 if (!n) return 0; 11719 11720 return alloc_node(ast_op_negate, xpath_type_number, n); 11721 } 11722 else 11723 { 11724 return parse_location_path(); 11725 } 11726 } 11727 11728 struct binary_op_t 11729 { 11730 ast_type_t asttype; 11731 xpath_value_type rettype; 11732 int precedence; 11733 binary_op_txpath_parser::binary_op_t11734 binary_op_t(): asttype(ast_unknown), rettype(xpath_type_none), precedence(0) 11735 { 11736 } 11737 binary_op_txpath_parser::binary_op_t11738 binary_op_t(ast_type_t asttype_, xpath_value_type rettype_, int precedence_): asttype(asttype_), rettype(rettype_), precedence(precedence_) 11739 { 11740 } 11741 parsexpath_parser::binary_op_t11742 static binary_op_t parse(xpath_lexer& lexer) 11743 { 11744 switch (lexer.current()) 11745 { 11746 case lex_string: 11747 if (lexer.contents() == PUGIXML_TEXT("or")) 11748 return binary_op_t(ast_op_or, xpath_type_boolean, 1); 11749 else if (lexer.contents() == PUGIXML_TEXT("and")) 11750 return binary_op_t(ast_op_and, xpath_type_boolean, 2); 11751 else if (lexer.contents() == PUGIXML_TEXT("div")) 11752 return binary_op_t(ast_op_divide, xpath_type_number, 6); 11753 else if (lexer.contents() == PUGIXML_TEXT("mod")) 11754 return binary_op_t(ast_op_mod, xpath_type_number, 6); 11755 else 11756 return binary_op_t(); 11757 11758 case lex_equal: 11759 return binary_op_t(ast_op_equal, xpath_type_boolean, 3); 11760 11761 case lex_not_equal: 11762 return binary_op_t(ast_op_not_equal, xpath_type_boolean, 3); 11763 11764 case lex_less: 11765 return binary_op_t(ast_op_less, xpath_type_boolean, 4); 11766 11767 case lex_greater: 11768 return binary_op_t(ast_op_greater, xpath_type_boolean, 4); 11769 11770 case lex_less_or_equal: 11771 return binary_op_t(ast_op_less_or_equal, xpath_type_boolean, 4); 11772 11773 case lex_greater_or_equal: 11774 return binary_op_t(ast_op_greater_or_equal, xpath_type_boolean, 4); 11775 11776 case lex_plus: 11777 return binary_op_t(ast_op_add, xpath_type_number, 5); 11778 11779 case lex_minus: 11780 return binary_op_t(ast_op_subtract, xpath_type_number, 5); 11781 11782 case lex_multiply: 11783 return binary_op_t(ast_op_multiply, xpath_type_number, 6); 11784 11785 case lex_union: 11786 return binary_op_t(ast_op_union, xpath_type_node_set, 7); 11787 11788 default: 11789 return binary_op_t(); 11790 } 11791 } 11792 }; 11793 parse_expression_recxpath_parser11794 xpath_ast_node* parse_expression_rec(xpath_ast_node* lhs, int limit) 11795 { 11796 binary_op_t op = binary_op_t::parse(_lexer); 11797 11798 while (op.asttype != ast_unknown && op.precedence >= limit) 11799 { 11800 _lexer.next(); 11801 11802 xpath_ast_node* rhs = parse_path_or_unary_expression(); 11803 if (!rhs) return 0; 11804 11805 binary_op_t nextop = binary_op_t::parse(_lexer); 11806 11807 while (nextop.asttype != ast_unknown && nextop.precedence > op.precedence) 11808 { 11809 rhs = parse_expression_rec(rhs, nextop.precedence); 11810 if (!rhs) return 0; 11811 11812 nextop = binary_op_t::parse(_lexer); 11813 } 11814 11815 if (op.asttype == ast_op_union && (lhs->rettype() != xpath_type_node_set || rhs->rettype() != xpath_type_node_set)) 11816 return error("Union operator has to be applied to node sets"); 11817 11818 lhs = alloc_node(op.asttype, op.rettype, lhs, rhs); 11819 if (!lhs) return 0; 11820 11821 op = binary_op_t::parse(_lexer); 11822 } 11823 11824 return lhs; 11825 } 11826 11827 // Expr ::= OrExpr 11828 // OrExpr ::= AndExpr | OrExpr 'or' AndExpr 11829 // AndExpr ::= EqualityExpr | AndExpr 'and' EqualityExpr 11830 // EqualityExpr ::= RelationalExpr 11831 // | EqualityExpr '=' RelationalExpr 11832 // | EqualityExpr '!=' RelationalExpr 11833 // RelationalExpr ::= AdditiveExpr 11834 // | RelationalExpr '<' AdditiveExpr 11835 // | RelationalExpr '>' AdditiveExpr 11836 // | RelationalExpr '<=' AdditiveExpr 11837 // | RelationalExpr '>=' AdditiveExpr 11838 // AdditiveExpr ::= MultiplicativeExpr 11839 // | AdditiveExpr '+' MultiplicativeExpr 11840 // | AdditiveExpr '-' MultiplicativeExpr 11841 // MultiplicativeExpr ::= UnaryExpr 11842 // | MultiplicativeExpr '*' UnaryExpr 11843 // | MultiplicativeExpr 'div' UnaryExpr 11844 // | MultiplicativeExpr 'mod' UnaryExpr parse_expressionxpath_parser11845 xpath_ast_node* parse_expression(int limit = 0) 11846 { 11847 xpath_ast_node* n = parse_path_or_unary_expression(); 11848 if (!n) return 0; 11849 11850 return parse_expression_rec(n, limit); 11851 } 11852 xpath_parserxpath_parser11853 xpath_parser(const char_t* query, xpath_variable_set* variables, xpath_allocator* alloc, xpath_parse_result* result): _alloc(alloc), _lexer(query), _query(query), _variables(variables), _result(result) 11854 { 11855 } 11856 parsexpath_parser11857 xpath_ast_node* parse() 11858 { 11859 xpath_ast_node* n = parse_expression(); 11860 if (!n) return 0; 11861 11862 // check if there are unparsed tokens left 11863 if (_lexer.current() != lex_eof) 11864 return error("Incorrect query"); 11865 11866 return n; 11867 } 11868 parsexpath_parser11869 static xpath_ast_node* parse(const char_t* query, xpath_variable_set* variables, xpath_allocator* alloc, xpath_parse_result* result) 11870 { 11871 xpath_parser parser(query, variables, alloc, result); 11872 11873 return parser.parse(); 11874 } 11875 }; 11876 11877 struct xpath_query_impl 11878 { createxpath_query_impl11879 static xpath_query_impl* create() 11880 { 11881 void* memory = xml_memory::allocate(sizeof(xpath_query_impl)); 11882 if (!memory) return 0; 11883 11884 return new (memory) xpath_query_impl(); 11885 } 11886 destroyxpath_query_impl11887 static void destroy(xpath_query_impl* impl) 11888 { 11889 // free all allocated pages 11890 impl->alloc.release(); 11891 11892 // free allocator memory (with the first page) 11893 xml_memory::deallocate(impl); 11894 } 11895 xpath_query_implxpath_query_impl11896 xpath_query_impl(): root(0), alloc(&block, &oom), oom(false) 11897 { 11898 block.next = 0; 11899 block.capacity = sizeof(block.data); 11900 } 11901 11902 xpath_ast_node* root; 11903 xpath_allocator alloc; 11904 xpath_memory_block block; 11905 bool oom; 11906 }; 11907 evaluate_node_set_prepare(xpath_query_impl * impl)11908 PUGI__FN impl::xpath_ast_node* evaluate_node_set_prepare(xpath_query_impl* impl) 11909 { 11910 if (!impl) return 0; 11911 11912 if (impl->root->rettype() != xpath_type_node_set) 11913 { 11914 #ifdef PUGIXML_NO_EXCEPTIONS 11915 return 0; 11916 #else 11917 xpath_parse_result res; 11918 res.error = "Expression does not evaluate to node set"; 11919 11920 throw xpath_exception(res); 11921 #endif 11922 } 11923 11924 return impl->root; 11925 } 11926 PUGI__NS_END 11927 11928 namespace pugi 11929 { 11930 #ifndef PUGIXML_NO_EXCEPTIONS xpath_exception(const xpath_parse_result & result_)11931 PUGI__FN xpath_exception::xpath_exception(const xpath_parse_result& result_): _result(result_) 11932 { 11933 assert(_result.error); 11934 } 11935 what() const11936 PUGI__FN const char* xpath_exception::what() const throw() 11937 { 11938 return _result.error; 11939 } 11940 result() const11941 PUGI__FN const xpath_parse_result& xpath_exception::result() const 11942 { 11943 return _result; 11944 } 11945 #endif 11946 xpath_node()11947 PUGI__FN xpath_node::xpath_node() 11948 { 11949 } 11950 xpath_node(const xml_node & node_)11951 PUGI__FN xpath_node::xpath_node(const xml_node& node_): _node(node_) 11952 { 11953 } 11954 xpath_node(const xml_attribute & attribute_,const xml_node & parent_)11955 PUGI__FN xpath_node::xpath_node(const xml_attribute& attribute_, const xml_node& parent_): _node(attribute_ ? parent_ : xml_node()), _attribute(attribute_) 11956 { 11957 } 11958 node() const11959 PUGI__FN xml_node xpath_node::node() const 11960 { 11961 return _attribute ? xml_node() : _node; 11962 } 11963 attribute() const11964 PUGI__FN xml_attribute xpath_node::attribute() const 11965 { 11966 return _attribute; 11967 } 11968 parent() const11969 PUGI__FN xml_node xpath_node::parent() const 11970 { 11971 return _attribute ? _node : _node.parent(); 11972 } 11973 unspecified_bool_xpath_node(xpath_node ***)11974 PUGI__FN static void unspecified_bool_xpath_node(xpath_node***) 11975 { 11976 } 11977 operator xpath_node::unspecified_bool_type() const11978 PUGI__FN xpath_node::operator xpath_node::unspecified_bool_type() const 11979 { 11980 return (_node || _attribute) ? unspecified_bool_xpath_node : 0; 11981 } 11982 operator !() const11983 PUGI__FN bool xpath_node::operator!() const 11984 { 11985 return !(_node || _attribute); 11986 } 11987 operator ==(const xpath_node & n) const11988 PUGI__FN bool xpath_node::operator==(const xpath_node& n) const 11989 { 11990 return _node == n._node && _attribute == n._attribute; 11991 } 11992 operator !=(const xpath_node & n) const11993 PUGI__FN bool xpath_node::operator!=(const xpath_node& n) const 11994 { 11995 return _node != n._node || _attribute != n._attribute; 11996 } 11997 11998 #ifdef __BORLANDC__ operator &&(const xpath_node & lhs,bool rhs)11999 PUGI__FN bool operator&&(const xpath_node& lhs, bool rhs) 12000 { 12001 return (bool)lhs && rhs; 12002 } 12003 operator ||(const xpath_node & lhs,bool rhs)12004 PUGI__FN bool operator||(const xpath_node& lhs, bool rhs) 12005 { 12006 return (bool)lhs || rhs; 12007 } 12008 #endif 12009 _assign(const_iterator begin_,const_iterator end_,type_t type_)12010 PUGI__FN void xpath_node_set::_assign(const_iterator begin_, const_iterator end_, type_t type_) 12011 { 12012 assert(begin_ <= end_); 12013 12014 size_t size_ = static_cast<size_t>(end_ - begin_); 12015 12016 if (size_ <= 1) 12017 { 12018 // deallocate old buffer 12019 if (_begin != &_storage) impl::xml_memory::deallocate(_begin); 12020 12021 // use internal buffer 12022 if (begin_ != end_) _storage = *begin_; 12023 12024 _begin = &_storage; 12025 _end = &_storage + size_; 12026 _type = type_; 12027 } 12028 else 12029 { 12030 // make heap copy 12031 xpath_node* storage = static_cast<xpath_node*>(impl::xml_memory::allocate(size_ * sizeof(xpath_node))); 12032 12033 if (!storage) 12034 { 12035 #ifdef PUGIXML_NO_EXCEPTIONS 12036 return; 12037 #else 12038 throw std::bad_alloc(); 12039 #endif 12040 } 12041 12042 memcpy(storage, begin_, size_ * sizeof(xpath_node)); 12043 12044 // deallocate old buffer 12045 if (_begin != &_storage) impl::xml_memory::deallocate(_begin); 12046 12047 // finalize 12048 _begin = storage; 12049 _end = storage + size_; 12050 _type = type_; 12051 } 12052 } 12053 12054 #ifdef PUGIXML_HAS_MOVE _move(xpath_node_set & rhs)12055 PUGI__FN void xpath_node_set::_move(xpath_node_set& rhs) PUGIXML_NOEXCEPT 12056 { 12057 _type = rhs._type; 12058 _storage = rhs._storage; 12059 _begin = (rhs._begin == &rhs._storage) ? &_storage : rhs._begin; 12060 _end = _begin + (rhs._end - rhs._begin); 12061 12062 rhs._type = type_unsorted; 12063 rhs._begin = &rhs._storage; 12064 rhs._end = rhs._begin; 12065 } 12066 #endif 12067 xpath_node_set()12068 PUGI__FN xpath_node_set::xpath_node_set(): _type(type_unsorted), _begin(&_storage), _end(&_storage) 12069 { 12070 } 12071 xpath_node_set(const_iterator begin_,const_iterator end_,type_t type_)12072 PUGI__FN xpath_node_set::xpath_node_set(const_iterator begin_, const_iterator end_, type_t type_): _type(type_unsorted), _begin(&_storage), _end(&_storage) 12073 { 12074 _assign(begin_, end_, type_); 12075 } 12076 ~xpath_node_set()12077 PUGI__FN xpath_node_set::~xpath_node_set() 12078 { 12079 if (_begin != &_storage) 12080 impl::xml_memory::deallocate(_begin); 12081 } 12082 xpath_node_set(const xpath_node_set & ns)12083 PUGI__FN xpath_node_set::xpath_node_set(const xpath_node_set& ns): _type(type_unsorted), _begin(&_storage), _end(&_storage) 12084 { 12085 _assign(ns._begin, ns._end, ns._type); 12086 } 12087 operator =(const xpath_node_set & ns)12088 PUGI__FN xpath_node_set& xpath_node_set::operator=(const xpath_node_set& ns) 12089 { 12090 if (this == &ns) return *this; 12091 12092 _assign(ns._begin, ns._end, ns._type); 12093 12094 return *this; 12095 } 12096 12097 #ifdef PUGIXML_HAS_MOVE xpath_node_set(xpath_node_set && rhs)12098 PUGI__FN xpath_node_set::xpath_node_set(xpath_node_set&& rhs) PUGIXML_NOEXCEPT: _type(type_unsorted), _begin(&_storage), _end(&_storage) 12099 { 12100 _move(rhs); 12101 } 12102 operator =(xpath_node_set && rhs)12103 PUGI__FN xpath_node_set& xpath_node_set::operator=(xpath_node_set&& rhs) PUGIXML_NOEXCEPT 12104 { 12105 if (this == &rhs) return *this; 12106 12107 if (_begin != &_storage) 12108 impl::xml_memory::deallocate(_begin); 12109 12110 _move(rhs); 12111 12112 return *this; 12113 } 12114 #endif 12115 type() const12116 PUGI__FN xpath_node_set::type_t xpath_node_set::type() const 12117 { 12118 return _type; 12119 } 12120 size() const12121 PUGI__FN size_t xpath_node_set::size() const 12122 { 12123 return _end - _begin; 12124 } 12125 empty() const12126 PUGI__FN bool xpath_node_set::empty() const 12127 { 12128 return _begin == _end; 12129 } 12130 operator [](size_t index) const12131 PUGI__FN const xpath_node& xpath_node_set::operator[](size_t index) const 12132 { 12133 assert(index < size()); 12134 return _begin[index]; 12135 } 12136 begin() const12137 PUGI__FN xpath_node_set::const_iterator xpath_node_set::begin() const 12138 { 12139 return _begin; 12140 } 12141 end() const12142 PUGI__FN xpath_node_set::const_iterator xpath_node_set::end() const 12143 { 12144 return _end; 12145 } 12146 sort(bool reverse)12147 PUGI__FN void xpath_node_set::sort(bool reverse) 12148 { 12149 _type = impl::xpath_sort(_begin, _end, _type, reverse); 12150 } 12151 first() const12152 PUGI__FN xpath_node xpath_node_set::first() const 12153 { 12154 return impl::xpath_first(_begin, _end, _type); 12155 } 12156 xpath_parse_result()12157 PUGI__FN xpath_parse_result::xpath_parse_result(): error("Internal error"), offset(0) 12158 { 12159 } 12160 operator bool() const12161 PUGI__FN xpath_parse_result::operator bool() const 12162 { 12163 return error == 0; 12164 } 12165 description() const12166 PUGI__FN const char* xpath_parse_result::description() const 12167 { 12168 return error ? error : "No error"; 12169 } 12170 xpath_variable(xpath_value_type type_)12171 PUGI__FN xpath_variable::xpath_variable(xpath_value_type type_): _type(type_), _next(0) 12172 { 12173 } 12174 name() const12175 PUGI__FN const char_t* xpath_variable::name() const 12176 { 12177 switch (_type) 12178 { 12179 case xpath_type_node_set: 12180 return static_cast<const impl::xpath_variable_node_set*>(this)->name; 12181 12182 case xpath_type_number: 12183 return static_cast<const impl::xpath_variable_number*>(this)->name; 12184 12185 case xpath_type_string: 12186 return static_cast<const impl::xpath_variable_string*>(this)->name; 12187 12188 case xpath_type_boolean: 12189 return static_cast<const impl::xpath_variable_boolean*>(this)->name; 12190 12191 default: 12192 assert(false && "Invalid variable type"); // unreachable 12193 return 0; 12194 } 12195 } 12196 type() const12197 PUGI__FN xpath_value_type xpath_variable::type() const 12198 { 12199 return _type; 12200 } 12201 get_boolean() const12202 PUGI__FN bool xpath_variable::get_boolean() const 12203 { 12204 return (_type == xpath_type_boolean) ? static_cast<const impl::xpath_variable_boolean*>(this)->value : false; 12205 } 12206 get_number() const12207 PUGI__FN double xpath_variable::get_number() const 12208 { 12209 return (_type == xpath_type_number) ? static_cast<const impl::xpath_variable_number*>(this)->value : impl::gen_nan(); 12210 } 12211 get_string() const12212 PUGI__FN const char_t* xpath_variable::get_string() const 12213 { 12214 const char_t* value = (_type == xpath_type_string) ? static_cast<const impl::xpath_variable_string*>(this)->value : 0; 12215 return value ? value : PUGIXML_TEXT(""); 12216 } 12217 get_node_set() const12218 PUGI__FN const xpath_node_set& xpath_variable::get_node_set() const 12219 { 12220 return (_type == xpath_type_node_set) ? static_cast<const impl::xpath_variable_node_set*>(this)->value : impl::dummy_node_set; 12221 } 12222 set(bool value)12223 PUGI__FN bool xpath_variable::set(bool value) 12224 { 12225 if (_type != xpath_type_boolean) return false; 12226 12227 static_cast<impl::xpath_variable_boolean*>(this)->value = value; 12228 return true; 12229 } 12230 set(double value)12231 PUGI__FN bool xpath_variable::set(double value) 12232 { 12233 if (_type != xpath_type_number) return false; 12234 12235 static_cast<impl::xpath_variable_number*>(this)->value = value; 12236 return true; 12237 } 12238 set(const char_t * value)12239 PUGI__FN bool xpath_variable::set(const char_t* value) 12240 { 12241 if (_type != xpath_type_string) return false; 12242 12243 impl::xpath_variable_string* var = static_cast<impl::xpath_variable_string*>(this); 12244 12245 // duplicate string 12246 size_t size = (impl::strlength(value) + 1) * sizeof(char_t); 12247 12248 char_t* copy = static_cast<char_t*>(impl::xml_memory::allocate(size)); 12249 if (!copy) return false; 12250 12251 memcpy(copy, value, size); 12252 12253 // replace old string 12254 if (var->value) impl::xml_memory::deallocate(var->value); 12255 var->value = copy; 12256 12257 return true; 12258 } 12259 set(const xpath_node_set & value)12260 PUGI__FN bool xpath_variable::set(const xpath_node_set& value) 12261 { 12262 if (_type != xpath_type_node_set) return false; 12263 12264 static_cast<impl::xpath_variable_node_set*>(this)->value = value; 12265 return true; 12266 } 12267 xpath_variable_set()12268 PUGI__FN xpath_variable_set::xpath_variable_set() 12269 { 12270 for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i) 12271 _data[i] = 0; 12272 } 12273 ~xpath_variable_set()12274 PUGI__FN xpath_variable_set::~xpath_variable_set() 12275 { 12276 for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i) 12277 _destroy(_data[i]); 12278 } 12279 xpath_variable_set(const xpath_variable_set & rhs)12280 PUGI__FN xpath_variable_set::xpath_variable_set(const xpath_variable_set& rhs) 12281 { 12282 for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i) 12283 _data[i] = 0; 12284 12285 _assign(rhs); 12286 } 12287 operator =(const xpath_variable_set & rhs)12288 PUGI__FN xpath_variable_set& xpath_variable_set::operator=(const xpath_variable_set& rhs) 12289 { 12290 if (this == &rhs) return *this; 12291 12292 _assign(rhs); 12293 12294 return *this; 12295 } 12296 12297 #ifdef PUGIXML_HAS_MOVE xpath_variable_set(xpath_variable_set && rhs)12298 PUGI__FN xpath_variable_set::xpath_variable_set(xpath_variable_set&& rhs) PUGIXML_NOEXCEPT 12299 { 12300 for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i) 12301 { 12302 _data[i] = rhs._data[i]; 12303 rhs._data[i] = 0; 12304 } 12305 } 12306 operator =(xpath_variable_set && rhs)12307 PUGI__FN xpath_variable_set& xpath_variable_set::operator=(xpath_variable_set&& rhs) PUGIXML_NOEXCEPT 12308 { 12309 for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i) 12310 { 12311 _destroy(_data[i]); 12312 12313 _data[i] = rhs._data[i]; 12314 rhs._data[i] = 0; 12315 } 12316 12317 return *this; 12318 } 12319 #endif 12320 _assign(const xpath_variable_set & rhs)12321 PUGI__FN void xpath_variable_set::_assign(const xpath_variable_set& rhs) 12322 { 12323 xpath_variable_set temp; 12324 12325 for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i) 12326 if (rhs._data[i] && !_clone(rhs._data[i], &temp._data[i])) 12327 return; 12328 12329 _swap(temp); 12330 } 12331 _swap(xpath_variable_set & rhs)12332 PUGI__FN void xpath_variable_set::_swap(xpath_variable_set& rhs) 12333 { 12334 for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i) 12335 { 12336 xpath_variable* chain = _data[i]; 12337 12338 _data[i] = rhs._data[i]; 12339 rhs._data[i] = chain; 12340 } 12341 } 12342 _find(const char_t * name) const12343 PUGI__FN xpath_variable* xpath_variable_set::_find(const char_t* name) const 12344 { 12345 const size_t hash_size = sizeof(_data) / sizeof(_data[0]); 12346 size_t hash = impl::hash_string(name) % hash_size; 12347 12348 // look for existing variable 12349 for (xpath_variable* var = _data[hash]; var; var = var->_next) 12350 if (impl::strequal(var->name(), name)) 12351 return var; 12352 12353 return 0; 12354 } 12355 _clone(xpath_variable * var,xpath_variable ** out_result)12356 PUGI__FN bool xpath_variable_set::_clone(xpath_variable* var, xpath_variable** out_result) 12357 { 12358 xpath_variable* last = 0; 12359 12360 while (var) 12361 { 12362 // allocate storage for new variable 12363 xpath_variable* nvar = impl::new_xpath_variable(var->_type, var->name()); 12364 if (!nvar) return false; 12365 12366 // link the variable to the result immediately to handle failures gracefully 12367 if (last) 12368 last->_next = nvar; 12369 else 12370 *out_result = nvar; 12371 12372 last = nvar; 12373 12374 // copy the value; this can fail due to out-of-memory conditions 12375 if (!impl::copy_xpath_variable(nvar, var)) return false; 12376 12377 var = var->_next; 12378 } 12379 12380 return true; 12381 } 12382 _destroy(xpath_variable * var)12383 PUGI__FN void xpath_variable_set::_destroy(xpath_variable* var) 12384 { 12385 while (var) 12386 { 12387 xpath_variable* next = var->_next; 12388 12389 impl::delete_xpath_variable(var->_type, var); 12390 12391 var = next; 12392 } 12393 } 12394 add(const char_t * name,xpath_value_type type)12395 PUGI__FN xpath_variable* xpath_variable_set::add(const char_t* name, xpath_value_type type) 12396 { 12397 const size_t hash_size = sizeof(_data) / sizeof(_data[0]); 12398 size_t hash = impl::hash_string(name) % hash_size; 12399 12400 // look for existing variable 12401 for (xpath_variable* var = _data[hash]; var; var = var->_next) 12402 if (impl::strequal(var->name(), name)) 12403 return var->type() == type ? var : 0; 12404 12405 // add new variable 12406 xpath_variable* result = impl::new_xpath_variable(type, name); 12407 12408 if (result) 12409 { 12410 result->_next = _data[hash]; 12411 12412 _data[hash] = result; 12413 } 12414 12415 return result; 12416 } 12417 set(const char_t * name,bool value)12418 PUGI__FN bool xpath_variable_set::set(const char_t* name, bool value) 12419 { 12420 xpath_variable* var = add(name, xpath_type_boolean); 12421 return var ? var->set(value) : false; 12422 } 12423 set(const char_t * name,double value)12424 PUGI__FN bool xpath_variable_set::set(const char_t* name, double value) 12425 { 12426 xpath_variable* var = add(name, xpath_type_number); 12427 return var ? var->set(value) : false; 12428 } 12429 set(const char_t * name,const char_t * value)12430 PUGI__FN bool xpath_variable_set::set(const char_t* name, const char_t* value) 12431 { 12432 xpath_variable* var = add(name, xpath_type_string); 12433 return var ? var->set(value) : false; 12434 } 12435 set(const char_t * name,const xpath_node_set & value)12436 PUGI__FN bool xpath_variable_set::set(const char_t* name, const xpath_node_set& value) 12437 { 12438 xpath_variable* var = add(name, xpath_type_node_set); 12439 return var ? var->set(value) : false; 12440 } 12441 get(const char_t * name)12442 PUGI__FN xpath_variable* xpath_variable_set::get(const char_t* name) 12443 { 12444 return _find(name); 12445 } 12446 get(const char_t * name) const12447 PUGI__FN const xpath_variable* xpath_variable_set::get(const char_t* name) const 12448 { 12449 return _find(name); 12450 } 12451 xpath_query(const char_t * query,xpath_variable_set * variables)12452 PUGI__FN xpath_query::xpath_query(const char_t* query, xpath_variable_set* variables): _impl(0) 12453 { 12454 impl::xpath_query_impl* qimpl = impl::xpath_query_impl::create(); 12455 12456 if (!qimpl) 12457 { 12458 #ifdef PUGIXML_NO_EXCEPTIONS 12459 _result.error = "Out of memory"; 12460 #else 12461 throw std::bad_alloc(); 12462 #endif 12463 } 12464 else 12465 { 12466 using impl::auto_deleter; // MSVC7 workaround 12467 auto_deleter<impl::xpath_query_impl> impl(qimpl, impl::xpath_query_impl::destroy); 12468 12469 qimpl->root = impl::xpath_parser::parse(query, variables, &qimpl->alloc, &_result); 12470 12471 if (qimpl->root) 12472 { 12473 qimpl->root->optimize(&qimpl->alloc); 12474 12475 _impl = impl.release(); 12476 _result.error = 0; 12477 } 12478 else 12479 { 12480 #ifdef PUGIXML_NO_EXCEPTIONS 12481 if (qimpl->oom) _result.error = "Out of memory"; 12482 #else 12483 if (qimpl->oom) throw std::bad_alloc(); 12484 throw xpath_exception(_result); 12485 #endif 12486 } 12487 } 12488 } 12489 xpath_query()12490 PUGI__FN xpath_query::xpath_query(): _impl(0) 12491 { 12492 } 12493 ~xpath_query()12494 PUGI__FN xpath_query::~xpath_query() 12495 { 12496 if (_impl) 12497 impl::xpath_query_impl::destroy(static_cast<impl::xpath_query_impl*>(_impl)); 12498 } 12499 12500 #ifdef PUGIXML_HAS_MOVE xpath_query(xpath_query && rhs)12501 PUGI__FN xpath_query::xpath_query(xpath_query&& rhs) PUGIXML_NOEXCEPT 12502 { 12503 _impl = rhs._impl; 12504 _result = rhs._result; 12505 rhs._impl = 0; 12506 rhs._result = xpath_parse_result(); 12507 } 12508 operator =(xpath_query && rhs)12509 PUGI__FN xpath_query& xpath_query::operator=(xpath_query&& rhs) PUGIXML_NOEXCEPT 12510 { 12511 if (this == &rhs) return *this; 12512 12513 if (_impl) 12514 impl::xpath_query_impl::destroy(static_cast<impl::xpath_query_impl*>(_impl)); 12515 12516 _impl = rhs._impl; 12517 _result = rhs._result; 12518 rhs._impl = 0; 12519 rhs._result = xpath_parse_result(); 12520 12521 return *this; 12522 } 12523 #endif 12524 return_type() const12525 PUGI__FN xpath_value_type xpath_query::return_type() const 12526 { 12527 if (!_impl) return xpath_type_none; 12528 12529 return static_cast<impl::xpath_query_impl*>(_impl)->root->rettype(); 12530 } 12531 evaluate_boolean(const xpath_node & n) const12532 PUGI__FN bool xpath_query::evaluate_boolean(const xpath_node& n) const 12533 { 12534 if (!_impl) return false; 12535 12536 impl::xpath_context c(n, 1, 1); 12537 impl::xpath_stack_data sd; 12538 12539 bool r = static_cast<impl::xpath_query_impl*>(_impl)->root->eval_boolean(c, sd.stack); 12540 12541 if (sd.oom) 12542 { 12543 #ifdef PUGIXML_NO_EXCEPTIONS 12544 return false; 12545 #else 12546 throw std::bad_alloc(); 12547 #endif 12548 } 12549 12550 return r; 12551 } 12552 evaluate_number(const xpath_node & n) const12553 PUGI__FN double xpath_query::evaluate_number(const xpath_node& n) const 12554 { 12555 if (!_impl) return impl::gen_nan(); 12556 12557 impl::xpath_context c(n, 1, 1); 12558 impl::xpath_stack_data sd; 12559 12560 double r = static_cast<impl::xpath_query_impl*>(_impl)->root->eval_number(c, sd.stack); 12561 12562 if (sd.oom) 12563 { 12564 #ifdef PUGIXML_NO_EXCEPTIONS 12565 return impl::gen_nan(); 12566 #else 12567 throw std::bad_alloc(); 12568 #endif 12569 } 12570 12571 return r; 12572 } 12573 12574 #ifndef PUGIXML_NO_STL evaluate_string(const xpath_node & n) const12575 PUGI__FN string_t xpath_query::evaluate_string(const xpath_node& n) const 12576 { 12577 if (!_impl) return string_t(); 12578 12579 impl::xpath_context c(n, 1, 1); 12580 impl::xpath_stack_data sd; 12581 12582 impl::xpath_string r = static_cast<impl::xpath_query_impl*>(_impl)->root->eval_string(c, sd.stack); 12583 12584 if (sd.oom) 12585 { 12586 #ifdef PUGIXML_NO_EXCEPTIONS 12587 return string_t(); 12588 #else 12589 throw std::bad_alloc(); 12590 #endif 12591 } 12592 12593 return string_t(r.c_str(), r.length()); 12594 } 12595 #endif 12596 evaluate_string(char_t * buffer,size_t capacity,const xpath_node & n) const12597 PUGI__FN size_t xpath_query::evaluate_string(char_t* buffer, size_t capacity, const xpath_node& n) const 12598 { 12599 impl::xpath_context c(n, 1, 1); 12600 impl::xpath_stack_data sd; 12601 12602 impl::xpath_string r = _impl ? static_cast<impl::xpath_query_impl*>(_impl)->root->eval_string(c, sd.stack) : impl::xpath_string(); 12603 12604 if (sd.oom) 12605 { 12606 #ifdef PUGIXML_NO_EXCEPTIONS 12607 r = impl::xpath_string(); 12608 #else 12609 throw std::bad_alloc(); 12610 #endif 12611 } 12612 12613 size_t full_size = r.length() + 1; 12614 12615 if (capacity > 0) 12616 { 12617 size_t size = (full_size < capacity) ? full_size : capacity; 12618 assert(size > 0); 12619 12620 memcpy(buffer, r.c_str(), (size - 1) * sizeof(char_t)); 12621 buffer[size - 1] = 0; 12622 } 12623 12624 return full_size; 12625 } 12626 evaluate_node_set(const xpath_node & n) const12627 PUGI__FN xpath_node_set xpath_query::evaluate_node_set(const xpath_node& n) const 12628 { 12629 impl::xpath_ast_node* root = impl::evaluate_node_set_prepare(static_cast<impl::xpath_query_impl*>(_impl)); 12630 if (!root) return xpath_node_set(); 12631 12632 impl::xpath_context c(n, 1, 1); 12633 impl::xpath_stack_data sd; 12634 12635 impl::xpath_node_set_raw r = root->eval_node_set(c, sd.stack, impl::nodeset_eval_all); 12636 12637 if (sd.oom) 12638 { 12639 #ifdef PUGIXML_NO_EXCEPTIONS 12640 return xpath_node_set(); 12641 #else 12642 throw std::bad_alloc(); 12643 #endif 12644 } 12645 12646 return xpath_node_set(r.begin(), r.end(), r.type()); 12647 } 12648 evaluate_node(const xpath_node & n) const12649 PUGI__FN xpath_node xpath_query::evaluate_node(const xpath_node& n) const 12650 { 12651 impl::xpath_ast_node* root = impl::evaluate_node_set_prepare(static_cast<impl::xpath_query_impl*>(_impl)); 12652 if (!root) return xpath_node(); 12653 12654 impl::xpath_context c(n, 1, 1); 12655 impl::xpath_stack_data sd; 12656 12657 impl::xpath_node_set_raw r = root->eval_node_set(c, sd.stack, impl::nodeset_eval_first); 12658 12659 if (sd.oom) 12660 { 12661 #ifdef PUGIXML_NO_EXCEPTIONS 12662 return xpath_node(); 12663 #else 12664 throw std::bad_alloc(); 12665 #endif 12666 } 12667 12668 return r.first(); 12669 } 12670 result() const12671 PUGI__FN const xpath_parse_result& xpath_query::result() const 12672 { 12673 return _result; 12674 } 12675 unspecified_bool_xpath_query(xpath_query ***)12676 PUGI__FN static void unspecified_bool_xpath_query(xpath_query***) 12677 { 12678 } 12679 operator xpath_query::unspecified_bool_type() const12680 PUGI__FN xpath_query::operator xpath_query::unspecified_bool_type() const 12681 { 12682 return _impl ? unspecified_bool_xpath_query : 0; 12683 } 12684 operator !() const12685 PUGI__FN bool xpath_query::operator!() const 12686 { 12687 return !_impl; 12688 } 12689 select_node(const char_t * query,xpath_variable_set * variables) const12690 PUGI__FN xpath_node xml_node::select_node(const char_t* query, xpath_variable_set* variables) const 12691 { 12692 xpath_query q(query, variables); 12693 return q.evaluate_node(*this); 12694 } 12695 select_node(const xpath_query & query) const12696 PUGI__FN xpath_node xml_node::select_node(const xpath_query& query) const 12697 { 12698 return query.evaluate_node(*this); 12699 } 12700 select_nodes(const char_t * query,xpath_variable_set * variables) const12701 PUGI__FN xpath_node_set xml_node::select_nodes(const char_t* query, xpath_variable_set* variables) const 12702 { 12703 xpath_query q(query, variables); 12704 return q.evaluate_node_set(*this); 12705 } 12706 select_nodes(const xpath_query & query) const12707 PUGI__FN xpath_node_set xml_node::select_nodes(const xpath_query& query) const 12708 { 12709 return query.evaluate_node_set(*this); 12710 } 12711 select_single_node(const char_t * query,xpath_variable_set * variables) const12712 PUGI__FN xpath_node xml_node::select_single_node(const char_t* query, xpath_variable_set* variables) const 12713 { 12714 xpath_query q(query, variables); 12715 return q.evaluate_node(*this); 12716 } 12717 select_single_node(const xpath_query & query) const12718 PUGI__FN xpath_node xml_node::select_single_node(const xpath_query& query) const 12719 { 12720 return query.evaluate_node(*this); 12721 } 12722 } 12723 12724 #endif 12725 12726 #ifdef __BORLANDC__ 12727 # pragma option pop 12728 #endif 12729 12730 // Intel C++ does not properly keep warning state for function templates, 12731 // so popping warning state at the end of translation unit leads to warnings in the middle. 12732 #if defined(_MSC_VER) && !defined(__INTEL_COMPILER) 12733 # pragma warning(pop) 12734 #endif 12735 12736 #if defined(_MSC_VER) && defined(__c2__) 12737 # pragma clang diagnostic pop 12738 #endif 12739 12740 // Undefine all local macros (makes sure we're not leaking macros in header-only mode) 12741 #undef PUGI__NO_INLINE 12742 #undef PUGI__UNLIKELY 12743 #undef PUGI__STATIC_ASSERT 12744 #undef PUGI__DMC_VOLATILE 12745 #undef PUGI__UNSIGNED_OVERFLOW 12746 #undef PUGI__MSVC_CRT_VERSION 12747 #undef PUGI__SNPRINTF 12748 #undef PUGI__NS_BEGIN 12749 #undef PUGI__NS_END 12750 #undef PUGI__FN 12751 #undef PUGI__FN_NO_INLINE 12752 #undef PUGI__GETHEADER_IMPL 12753 #undef PUGI__GETPAGE_IMPL 12754 #undef PUGI__GETPAGE 12755 #undef PUGI__NODETYPE 12756 #undef PUGI__IS_CHARTYPE_IMPL 12757 #undef PUGI__IS_CHARTYPE 12758 #undef PUGI__IS_CHARTYPEX 12759 #undef PUGI__ENDSWITH 12760 #undef PUGI__SKIPWS 12761 #undef PUGI__OPTSET 12762 #undef PUGI__PUSHNODE 12763 #undef PUGI__POPNODE 12764 #undef PUGI__SCANFOR 12765 #undef PUGI__SCANWHILE 12766 #undef PUGI__SCANWHILE_UNROLL 12767 #undef PUGI__ENDSEG 12768 #undef PUGI__THROW_ERROR 12769 #undef PUGI__CHECK_ERROR 12770 12771 #endif 12772 12773 /** 12774 * Copyright (c) 2006-2018 Arseny Kapoulkine 12775 * 12776 * Permission is hereby granted, free of charge, to any person 12777 * obtaining a copy of this software and associated documentation 12778 * files (the "Software"), to deal in the Software without 12779 * restriction, including without limitation the rights to use, 12780 * copy, modify, merge, publish, distribute, sublicense, and/or sell 12781 * copies of the Software, and to permit persons to whom the 12782 * Software is furnished to do so, subject to the following 12783 * conditions: 12784 * 12785 * The above copyright notice and this permission notice shall be 12786 * included in all copies or substantial portions of the Software. 12787 * 12788 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 12789 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 12790 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 12791 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 12792 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 12793 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 12794 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 12795 * OTHER DEALINGS IN THE SOFTWARE. 12796 */ 12797