1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ 2 /************************************************************************* 3 * 4 * Copyright (c) 2018 Kohei Yoshida 5 * 6 * Permission is hereby granted, free of charge, to any person 7 * obtaining a copy of this software and associated documentation 8 * files (the "Software"), to deal in the Software without 9 * restriction, including without limitation the rights to use, 10 * copy, modify, merge, publish, distribute, sublicense, and/or sell 11 * copies of the Software, and to permit persons to whom the 12 * Software is furnished to do so, subject to the following 13 * conditions: 14 * 15 * The above copyright notice and this permission notice shall be 16 * included in all copies or substantial portions of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 20 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 21 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 22 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 23 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 25 * OTHER DEALINGS IN THE SOFTWARE. 26 * 27 ************************************************************************/ 28 29 #ifndef INCLUDED_MDDS_RTREE_HPP 30 #define INCLUDED_MDDS_RTREE_HPP 31 32 #include <deque> 33 #include <vector> 34 #include <cstdlib> 35 #include <string> 36 #include <unordered_set> 37 #include <unordered_map> 38 #include <functional> 39 40 namespace mdds { 41 42 namespace detail { namespace rtree { 43 44 struct default_rtree_trait 45 { 46 /** 47 * Number of dimensions in bounding rectangles. 48 */ 49 constexpr static size_t dimensions = 2; 50 51 /** 52 * Minimum number of child nodes that must be present in each directory 53 * node. Exception is the root node, which is allowed to have less than 54 * the minimum number of nodes, but only when it's a leaf directory node. 55 */ 56 constexpr static size_t min_node_size = 40; 57 58 /** 59 * Maximum number of child nodes that each directory node is allowed to 60 * have. There are no exceptions to this rule. 61 */ 62 constexpr static size_t max_node_size = 100; 63 64 /** 65 * Maximum depth a tree is allowed to have. 66 */ 67 constexpr static size_t max_tree_depth = 100; 68 69 /** 70 * A flag to determine whether or not to perform forced reinsertion when a 71 * directory node overflows, before attempting to split the node. 72 */ 73 constexpr static bool enable_forced_reinsertion = true; 74 75 /** 76 * Number of nodes to get re-inserted during forced reinsertion. This 77 * should be roughly 30% of max_node_size + 1, and should not be greater 78 * than max_node_size - min_node_size + 1. 79 */ 80 constexpr static size_t reinsertion_size = 30; 81 }; 82 83 struct integrity_check_properties 84 { 85 /** 86 * When true, the integrity check will throw an exception on the first 87 * validation failure. When false, it will run through the entire tree 88 * and report all encountered validation failures then throw an exception 89 * if there is at least one failure. 90 */ 91 bool throw_on_first_error = true; 92 93 /** 94 * When true, a node containing children less than the minimum node size 95 * will be treated as an error. 96 */ 97 bool error_on_min_node_size = true; 98 }; 99 100 enum class node_type { unspecified, directory_leaf, directory_nonleaf, value }; 101 102 enum class export_tree_type 103 { 104 /** 105 * Textural representation of a tree structure. Indent levels represent 106 * depths, and each line represents a single node. 107 */ 108 formatted_node_properties, 109 110 /** 111 * The extents of all directory and value nodes are exported as Wavefront 112 * .obj format. Only 2 dimensional trees are supported for now. 113 * 114 * For a 2-dimensional tree, each depth is represented by a 2D plane 115 * filled with rectangles representing the extents of either value or 116 * directory nodes at that depth level. The depth planes are then stacked 117 * vertically. 118 */ 119 extent_as_obj, 120 121 /** 122 * The extents of all directory and value nodes are exported as a scalable 123 * vector graphics (SVG) format. Only 2 dimensional trees are supported. 124 */ 125 extent_as_svg 126 }; 127 128 enum class search_type 129 { 130 /** 131 * Pick up all objects whose extents overlap with the specified search 132 * extent. 133 */ 134 overlap, 135 136 /** 137 * Pick up all objects whose extents exactly match the specified search 138 * extent. 139 */ 140 match, 141 }; 142 143 template<typename _NodePtrT> 144 struct ptr_to_string; 145 146 }} 147 148 template<typename _Key, typename _Value, typename _Trait = detail::rtree::default_rtree_trait> 149 class rtree 150 { 151 using trait_type = _Trait; 152 153 constexpr static size_t max_dist_size = trait_type::max_node_size - trait_type::min_node_size * 2 + 2; 154 155 public: 156 using key_type = _Key; 157 using value_type = _Value; 158 159 struct point_type 160 { 161 key_type d[trait_type::dimensions]; 162 163 point_type(); 164 point_type(std::initializer_list<key_type> vs); 165 166 std::string to_string() const; 167 168 bool operator== (const point_type& other) const; 169 bool operator!= (const point_type& other) const; 170 }; 171 172 struct extent_type 173 { 174 point_type start; 175 point_type end; 176 177 extent_type(); 178 extent_type(const point_type& _start, const point_type& _end); 179 180 std::string to_string() const; 181 182 bool is_point() const; 183 184 bool operator== (const extent_type& other) const; 185 bool operator!= (const extent_type& other) const; 186 187 /** 188 * Determine whether or not the specified point lies within this 189 * extent. 190 * 191 * @param pt point to query with. 192 * 193 * @return true if the point lies within this extent, or false 194 * otherwise. 195 */ 196 bool contains(const point_type& pt) const; 197 198 /** 199 * Determine whether or not the specified extent lies <i>entirely</i> 200 * within this extent. 201 * 202 * @param bb extent to query with. 203 * 204 * @return true if the specified extent lies entirely within this 205 * extent, or otherwise false. 206 */ 207 bool contains(const extent_type& bb) const; 208 209 /** 210 * Determine whether or not the specified extent overlaps with this 211 * extent either partially or fully. 212 * 213 * @param bb extent to query with. 214 * 215 * @return true if the specified extent overlaps with this extent, or 216 * otherwise false. 217 */ 218 bool intersects(const extent_type& bb) const; 219 220 /** 221 * Determine whether or not another bounding box is within this 222 * bounding box and shares a part of its boundaries. 223 */ 224 bool contains_at_boundary(const extent_type& other) const; 225 }; 226 227 using node_type = detail::rtree::node_type; 228 using export_tree_type = detail::rtree::export_tree_type; 229 using search_type = detail::rtree::search_type; 230 using integrity_check_properties = detail::rtree::integrity_check_properties; 231 232 struct node_properties 233 { 234 node_type type; 235 extent_type extent; 236 }; 237 238 private: 239 240 struct node; 241 struct directory_node; 242 243 /** 244 * This class is intentionally only movable and non-copyable, to prevent 245 * accidental copying of its object. To "copy" this class, you must use 246 * its clone() method explicitly. 247 */ 248 struct node_store 249 { 250 node_type type; 251 extent_type extent; 252 node_store* parent; 253 node* node_ptr; 254 size_t count; 255 256 bool valid_pointer; 257 258 node_store(const node_store&) = delete; 259 node_store& operator= (const node_store&) = delete; 260 261 node_store(); 262 node_store(node_store&& r); 263 node_store(node_type _type, const extent_type& _extent, node* _node_ptr); 264 ~node_store(); 265 266 node_store clone() const; 267 268 static node_store create_leaf_directory_node(); 269 static node_store create_nonleaf_directory_node(); 270 static node_store create_value_node(const extent_type& extent, value_type&& v); 271 static node_store create_value_node(const extent_type& extent, const value_type& v); 272 273 node_store& operator= (node_store&& other); 274 275 /** 276 * Re-calculate the extent based on its current children. 277 * 278 * @return true if the extent has changed, false otherwise. 279 */ 280 bool pack(); 281 282 /** 283 * Re-calculate the extent of the parent nodes recursively all the way 284 * up to the root node. 285 */ 286 void pack_upward(); 287 288 bool is_directory() const; 289 bool is_root() const; 290 bool exceeds_capacity() const; 291 292 void swap(node_store& other); 293 294 /** 295 * Have all its child nodes update their parent pointers if the memory 296 * location of this node has been invalidated. Run the tree 297 * recursively until no more invalid pointers have been found. 298 */ 299 void reset_parent_pointers_of_children(); 300 301 /** 302 * Update its parent pointer and all its child nodes' parent pointers 303 * if the memory location of this node has been invalidated. Run the 304 * tree recursively until no more invalid pointers have been found. 305 */ 306 void reset_parent_pointers(); 307 308 directory_node* get_directory_node(); 309 const directory_node* get_directory_node() const; 310 311 bool erase_child(const node_store* p); 312 }; 313 314 using dir_store_type = std::deque<node_store>; 315 316 struct dir_store_segment 317 { 318 typename dir_store_type::iterator begin; 319 typename dir_store_type::iterator end; 320 size_t size; 321 dir_store_segmentmdds::rtree::dir_store_segment322 dir_store_segment() : size(0) {} 323 dir_store_segmentmdds::rtree::dir_store_segment324 dir_store_segment( 325 typename dir_store_type::iterator _begin, 326 typename dir_store_type::iterator _end, 327 size_t _size) : 328 begin(std::move(_begin)), 329 end(std::move(_end)), 330 size(_size) {} 331 }; 332 333 struct distribution 334 { 335 dir_store_segment g1; 336 dir_store_segment g2; 337 distributionmdds::rtree::distribution338 distribution(size_t dist, dir_store_type& nodes) 339 { 340 g1.begin = nodes.begin(); 341 g1.end = g1.begin; 342 g1.size = trait_type::min_node_size - 1 + dist; 343 std::advance(g1.end, g1.size); 344 345 g2.begin = g1.end; 346 g2.end = nodes.end(); 347 g2.size = nodes.size() - g1.size; 348 } 349 }; 350 351 struct node 352 { 353 node(); 354 node(const node&) = delete; 355 ~node(); 356 }; 357 358 struct value_node : public node 359 { 360 value_type value; 361 362 value_node() = delete; 363 value_node(value_type&& _value); 364 value_node(const value_type& _value); 365 ~value_node(); 366 }; 367 368 /** 369 * Directory node can either contain all value nodes, or all directory 370 * nodes as its child nodes. 371 */ 372 struct directory_node : public node 373 { 374 dir_store_type children; 375 376 directory_node(const directory_node&) = delete; 377 directory_node& operator=(const directory_node&) = delete; 378 379 directory_node(); 380 ~directory_node(); 381 382 bool erase(const node_store* p); 383 384 node_store* get_child_with_minimal_overlap(const extent_type& bb); 385 node_store* get_child_with_minimal_area_enlargement(const extent_type& bb); 386 387 extent_type calc_extent() const; 388 key_type calc_overlap_cost(const extent_type& bb) const; 389 bool has_leaf_directory() const; 390 }; 391 392 struct orphan_node_entry 393 { 394 node_store ns; 395 size_t depth; 396 }; 397 398 using orphan_node_entries_type = std::deque<orphan_node_entry>; 399 400 struct insertion_point 401 { 402 node_store* ns; 403 size_t depth; 404 }; 405 406 public: 407 408 template<typename _NS> 409 class search_results_base 410 { 411 friend class rtree; 412 protected: 413 using node_store_type = _NS; 414 415 struct entry 416 { 417 node_store_type* ns; 418 size_t depth; 419 420 entry(node_store_type* _ns, size_t _depth); 421 }; 422 423 using store_type = std::vector<entry>; 424 store_type m_store; 425 426 void add_node_store(node_store_type* ns, size_t depth); 427 }; 428 429 class const_iterator; 430 class iterator; 431 class search_results; 432 433 class const_search_results : public search_results_base<const node_store> 434 { 435 using base_type = search_results_base<const node_store>; 436 using base_type::m_store; 437 public: 438 const_iterator cbegin() const; 439 const_iterator cend() const; 440 const_iterator begin() const; 441 const_iterator end() const; 442 }; 443 444 class search_results : public search_results_base<node_store> 445 { 446 using base_type = search_results_base<node_store>; 447 using base_type::m_store; 448 public: 449 iterator begin(); 450 iterator end(); 451 }; 452 453 template<typename _SelfIter, typename _StoreIter, typename _ValueT> 454 class iterator_base 455 { 456 public: 457 using store_iterator_type = _StoreIter; 458 using self_iterator_type = _SelfIter; 459 460 protected: 461 store_iterator_type m_pos; 462 463 iterator_base(store_iterator_type pos); 464 465 public: 466 // iterator traits 467 using value_type = _ValueT; 468 using pointer = value_type*; 469 using reference = value_type&; 470 using difference_type = std::ptrdiff_t; 471 using iterator_category = std::bidirectional_iterator_tag; 472 473 bool operator== (const self_iterator_type& other) const; 474 bool operator!= (const self_iterator_type& other) const; 475 476 self_iterator_type& operator++(); 477 self_iterator_type operator++(int); 478 479 self_iterator_type& operator--(); 480 self_iterator_type operator--(int); 481 482 const extent_type& extent() const; 483 size_t depth() const; 484 }; 485 486 class const_iterator : public iterator_base< 487 const_iterator, 488 typename const_search_results::store_type::const_iterator, 489 const rtree::value_type> 490 { 491 using base_type = iterator_base< 492 const_iterator, 493 typename const_search_results::store_type::const_iterator, 494 const rtree::value_type>; 495 496 using store_type = typename const_search_results::store_type; 497 using typename base_type::store_iterator_type; 498 using base_type::m_pos; 499 500 friend class rtree; 501 502 const_iterator(store_iterator_type pos); 503 public: 504 using typename base_type::value_type; 505 506 value_type& operator*() const; 507 value_type* operator->() const; 508 }; 509 510 class iterator : public iterator_base< 511 iterator, 512 typename search_results::store_type::iterator, 513 rtree::value_type> 514 { 515 using base_type = iterator_base< 516 iterator, 517 typename search_results::store_type::iterator, 518 rtree::value_type>; 519 520 using store_type = typename const_search_results::store_type; 521 using typename base_type::store_iterator_type; 522 using base_type::m_pos; 523 524 friend class rtree; 525 526 iterator(store_iterator_type pos); 527 public: 528 using typename base_type::value_type; 529 530 value_type& operator*(); 531 value_type* operator->(); 532 }; 533 534 /** 535 * Loader optimized for loading a large number of value objects. A 536 * resultant tree will have a higher chance of being well balanced than if 537 * the value objects were inserted individually into the tree. 538 */ 539 class bulk_loader 540 { 541 dir_store_type m_store; 542 543 public: 544 bulk_loader(); 545 546 void insert(const extent_type& extent, value_type&& value); 547 void insert(const extent_type& extent, const value_type& value); 548 549 void insert(const point_type& position, value_type&& value); 550 void insert(const point_type& position, const value_type& value); 551 552 rtree pack(); 553 554 private: 555 void pack_level(dir_store_type& store, size_t depth); 556 557 void insert_impl(const extent_type& extent, value_type&& value); 558 void insert_impl(const extent_type& extent, const value_type& value); 559 }; 560 561 rtree(); 562 rtree(rtree&& other); 563 rtree(const rtree& other); 564 565 private: 566 rtree(node_store&& root); // only used internally by bulk_loader. 567 568 public: 569 ~rtree(); 570 571 rtree& operator= (const rtree& other); 572 573 rtree& operator= (rtree&& other); 574 575 /** 576 * Insert a new value associated with a bounding box. The new value 577 * object will be moved into the container. 578 * 579 * @param extent bounding box associated with the value. 580 * @param value value being inserted. 581 */ 582 void insert(const extent_type& extent, value_type&& value); 583 584 /** 585 * Insert a new value associated with a bounding box. A copy of the new 586 * value object will be placed into the container. 587 * 588 * @param extent bounding box associated with the value. 589 * @param value value being inserted. 590 */ 591 void insert(const extent_type& extent, const value_type& value); 592 593 /** 594 * Insert a new value associated with a point. The new value object will 595 * be moved into the container. 596 * 597 * @param position point associated with the value. 598 * @param value value being inserted. 599 */ 600 void insert(const point_type& position, value_type&& value); 601 602 /** 603 * Insert a new value associated with a point. A copy of the new value 604 * object will be placed into the container. 605 * 606 * @param position point associated with the value. 607 * @param value value being inserted. 608 */ 609 void insert(const point_type& position, const value_type& value); 610 611 /** 612 * Search the tree and collect all value objects whose extents either 613 * contain the specified point, or exactly match the specified point. 614 * 615 * @param pt reference point to use for the search. 616 * @param st search type that determines the satisfying condition of the 617 * search with respect to the reference point. 618 * 619 * @return collection of all value objects that satisfy the specified 620 * search condition. This collection is immutable. 621 */ 622 const_search_results search(const point_type& pt, search_type st) const; 623 624 /** 625 * Search the tree and collect all value objects whose extents either 626 * contain the specified point, or exactly match the specified point. 627 * 628 * @param pt reference point to use for the search. 629 * @param st search type that determines the satisfying condition of the 630 * search with respect to the reference point. 631 * 632 * @return collection of all value objects that satisfy the specified 633 * search condition. This collection is mutable. 634 */ 635 search_results search(const point_type& pt, search_type st); 636 637 /** 638 * Search the tree and collect all value objects whose extents either 639 * overlaps with the specified extent, or exactly match the specified 640 * extent. 641 * 642 * @param extent reference extent to use for the search. 643 * @param st search type that determines the satisfying condition of the 644 * search with respect to the reference extent. 645 * 646 * @return collection of all value objects that satisfy the specified 647 * search condition. This collection is immutable. 648 */ 649 const_search_results search(const extent_type& extent, search_type st) const; 650 651 /** 652 * Search the tree and collect all value objects whose extents either 653 * overlaps with the specified extent, or exactly match the specified 654 * extent. 655 * 656 * @param extent reference extent to use for the search. 657 * @param st search type that determines the satisfying condition of the 658 * search with respect to the reference extent. 659 * 660 * @return collection of all value objects that satisfy the specified 661 * search condition. This collection is mutable. 662 */ 663 search_results search(const extent_type& extent, search_type st); 664 665 /** 666 * Erase the value object referenced by the iterator passed to this 667 * method. 668 * 669 * <p>The iterator object will become invalid if the call results in an 670 * erasure of a value.</p> 671 * 672 * @param pos iterator that refernces the value object to erase. 673 */ 674 void erase(const const_iterator& pos); 675 676 /** 677 * Erase the value object referenced by the iterator passed to this 678 * method. 679 * 680 * <p>The iterator object will become invalid if the call results in an 681 * erasure of a value.</p> 682 * 683 * @param pos iterator that refernces the value object to erase. 684 */ 685 void erase(const iterator& pos); 686 687 /** 688 * Get the minimum bounding extent of the root node of the tree. The 689 * extent returned from this method is the minimum extent that contains 690 * the extents of all objects stored in the tree. 691 * 692 * @return immutable reference to the extent of the root node of the tree. 693 */ 694 const extent_type& extent() const; 695 696 /** 697 * Check whether or not the tree stores any objects. 698 * 699 * @return true if the tree is empty, otherwise false. 700 */ 701 bool empty() const; 702 703 /** 704 * Return the number of value nodes currently stored in the tree. 705 * 706 * @return number of value nodes currently in the tree. 707 */ 708 size_t size() const; 709 710 /** 711 * Swap the content of the tree with another instance. 712 * 713 * @param other another instance to swap the content with. 714 */ 715 void swap(rtree& other); 716 717 /** 718 * Empty the entire container. 719 */ 720 void clear(); 721 722 /** 723 * Walk down the entire tree depth first. 724 * 725 * @func function or function object that gets called at each node in the 726 * tree. 727 */ 728 template<typename _Func> 729 void walk(_Func func) const; 730 731 /** 732 * Check the integrity of the entire tree structure. 733 * 734 * @param props specify how the check is to be performed. 735 * 736 * @exception integrity_error if the integrity check fails. 737 */ 738 void check_integrity(const integrity_check_properties& props) const; 739 740 /** 741 * Export the structure of a tree in textural format. 742 * 743 * @param mode specify the format in which to represent the structure of a 744 * tree. 745 * 746 * @return string representation of the tree structure. 747 */ 748 std::string export_tree(export_tree_type mode) const; 749 750 private: 751 752 void insert_impl(const point_type& start, const point_type& end, value_type&& value); 753 void insert_impl(const point_type& start, const point_type& end, const value_type& value); 754 755 void erase_impl(const node_store* ns, size_t depth); 756 757 /** 758 * Build and return a callable function object that you can call in order 759 * to convert node's memory location to a normalized unique string still 760 * representative of that node. 761 */ 762 detail::rtree::ptr_to_string<const node_store*> build_ptr_to_string_map() const; 763 764 std::string export_tree_formatted() const; 765 std::string export_tree_extent_as_obj() const; 766 std::string export_tree_extent_as_svg() const; 767 768 void insert(node_store&& ns, std::unordered_set<size_t>* reinserted_depths); 769 void insert_dir(node_store&& ns, size_t max_depth); 770 771 /** 772 * Split an overfilled node. The node to split is expected to have 773 * exactly M+1 child nodes where M is the maximum number of child nodes a 774 * single node is allowed to have. 775 * 776 * @param ns node to split. 777 */ 778 void split_node(node_store* ns); 779 780 void perform_forced_reinsertion(node_store* ns, std::unordered_set<size_t>& reinserted_depth); 781 782 /** 783 * Determine the best dimension to split by, and sort the child nodes by 784 * that dimension. 785 * 786 * @param children child nodes to sort. 787 */ 788 void sort_dir_store_by_split_dimension(dir_store_type& children); 789 790 void sort_dir_store_by_dimension(size_t dim, dir_store_type& children); 791 792 size_t pick_optimal_distribution(dir_store_type& children) const; 793 794 insertion_point find_leaf_directory_node_for_insertion(const extent_type& bb); 795 node_store* find_nonleaf_directory_node_for_insertion(const extent_type& bb, size_t max_depth); 796 797 template<typename _Func> 798 void descend_with_func(_Func func) const; 799 800 using search_condition_type = std::function<bool(const node_store&)>; 801 802 template<typename _ResT> 803 void search_descend( 804 size_t depth, const search_condition_type& dir_cond, const search_condition_type& value_cond, 805 typename _ResT::node_store_type& ns, _ResT& results) const; 806 807 void shrink_tree_upward(node_store* ns, const extent_type& bb_affected); 808 809 private: 810 node_store m_root; 811 }; 812 813 } // namespace mdds 814 815 #include "rtree_def.inl" 816 817 #endif 818 819 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */ 820