1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*************************************************************************
3  *
4  * Copyright (c) 2018 Kohei Yoshida
5  *
6  * Permission is hereby granted, free of charge, to any person
7  * obtaining a copy of this software and associated documentation
8  * files (the "Software"), to deal in the Software without
9  * restriction, including without limitation the rights to use,
10  * copy, modify, merge, publish, distribute, sublicense, and/or sell
11  * copies of the Software, and to permit persons to whom the
12  * Software is furnished to do so, subject to the following
13  * conditions:
14  *
15  * The above copyright notice and this permission notice shall be
16  * included in all copies or substantial portions of the Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
20  * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
21  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
22  * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
23  * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
25  * OTHER DEALINGS IN THE SOFTWARE.
26  *
27  ************************************************************************/
28 
29 #ifndef INCLUDED_MDDS_RTREE_HPP
30 #define INCLUDED_MDDS_RTREE_HPP
31 
32 #include <deque>
33 #include <vector>
34 #include <cstdlib>
35 #include <string>
36 #include <unordered_set>
37 #include <unordered_map>
38 #include <functional>
39 
40 namespace mdds {
41 
42 namespace detail { namespace rtree {
43 
44 struct default_rtree_trait
45 {
46     /**
47      * Number of dimensions in bounding rectangles.
48      */
49     constexpr static size_t dimensions = 2;
50 
51     /**
52      * Minimum number of child nodes that must be present in each directory
53      * node.  Exception is the root node, which is allowed to have less than
54      * the minimum number of nodes, but only when it's a leaf directory node.
55      */
56     constexpr static size_t min_node_size = 40;
57 
58     /**
59      * Maximum number of child nodes that each directory node is allowed to
60      * have.  There are no exceptions to this rule.
61      */
62     constexpr static size_t max_node_size = 100;
63 
64     /**
65      * Maximum depth a tree is allowed to have.
66      */
67     constexpr static size_t max_tree_depth = 100;
68 
69     /**
70      * A flag to determine whether or not to perform forced reinsertion when a
71      * directory node overflows, before attempting to split the node.
72      */
73     constexpr static bool enable_forced_reinsertion = true;
74 
75     /**
76      * Number of nodes to get re-inserted during forced reinsertion.  This
77      * should be roughly 30% of max_node_size + 1, and should not be greater
78      * than max_node_size - min_node_size + 1.
79      */
80     constexpr static size_t reinsertion_size = 30;
81 };
82 
83 struct integrity_check_properties
84 {
85     /**
86      * When true, the integrity check will throw an exception on the first
87      * validation failure.  When false, it will run through the entire tree
88      * and report all encountered validation failures then throw an exception
89      * if there is at least one failure.
90      */
91     bool throw_on_first_error = true;
92 
93     /**
94      * When true, a node containing children less than the minimum node size
95      * will be treated as an error.
96      */
97     bool error_on_min_node_size = true;
98 };
99 
100 enum class node_type { unspecified, directory_leaf, directory_nonleaf, value };
101 
102 enum class export_tree_type
103 {
104     /**
105      * Textural representation of a tree structure.  Indent levels represent
106      * depths, and each line represents a single node.
107      */
108     formatted_node_properties,
109 
110     /**
111      * The extents of all directory and value nodes are exported as Wavefront
112      * .obj format.  Only 2 dimensional trees are supported for now.
113      *
114      * For a 2-dimensional tree, each depth is represented by a 2D plane
115      * filled with rectangles representing the extents of either value or
116      * directory nodes at that depth level.  The depth planes are then stacked
117      * vertically.
118      */
119     extent_as_obj,
120 
121     /**
122      * The extents of all directory and value nodes are exported as a scalable
123      * vector graphics (SVG) format.  Only 2 dimensional trees are supported.
124      */
125     extent_as_svg
126 };
127 
128 enum class search_type
129 {
130     /**
131      * Pick up all objects whose extents overlap with the specified search
132      * extent.
133      */
134     overlap,
135 
136     /**
137      * Pick up all objects whose extents exactly match the specified search
138      * extent.
139      */
140     match,
141 };
142 
143 template<typename _NodePtrT>
144 struct ptr_to_string;
145 
146 }}
147 
148 template<typename _Key, typename _Value, typename _Trait = detail::rtree::default_rtree_trait>
149 class rtree
150 {
151     using trait_type = _Trait;
152 
153     constexpr static size_t max_dist_size = trait_type::max_node_size - trait_type::min_node_size * 2 + 2;
154 
155 public:
156     using key_type = _Key;
157     using value_type = _Value;
158 
159     struct point_type
160     {
161         key_type d[trait_type::dimensions];
162 
163         point_type();
164         point_type(std::initializer_list<key_type> vs);
165 
166         std::string to_string() const;
167 
168         bool operator== (const point_type& other) const;
169         bool operator!= (const point_type& other) const;
170     };
171 
172     struct extent_type
173     {
174         point_type start;
175         point_type end;
176 
177         extent_type();
178         extent_type(const point_type& _start, const point_type& _end);
179 
180         std::string to_string() const;
181 
182         bool is_point() const;
183 
184         bool operator== (const extent_type& other) const;
185         bool operator!= (const extent_type& other) const;
186 
187         /**
188          * Determine whether or not the specified point lies within this
189          * extent.
190          *
191          * @param pt point to query with.
192          *
193          * @return true if the point lies within this extent, or false
194          *         otherwise.
195          */
196         bool contains(const point_type& pt) const;
197 
198         /**
199          * Determine whether or not the specified extent lies <i>entirely</i>
200          * within this extent.
201          *
202          * @param bb extent to query with.
203          *
204          * @return true if the specified extent lies entirely within this
205          *         extent, or otherwise false.
206          */
207         bool contains(const extent_type& bb) const;
208 
209         /**
210          * Determine whether or not the specified extent overlaps with this
211          * extent either partially or fully.
212          *
213          * @param bb extent to query with.
214          *
215          * @return true if the specified extent overlaps with this extent, or
216          *         otherwise false.
217          */
218         bool intersects(const extent_type& bb) const;
219 
220         /**
221          * Determine whether or not another bounding box is within this
222          * bounding box and shares a part of its boundaries.
223          */
224         bool contains_at_boundary(const extent_type& other) const;
225     };
226 
227     using node_type = detail::rtree::node_type;
228     using export_tree_type = detail::rtree::export_tree_type;
229     using search_type = detail::rtree::search_type;
230     using integrity_check_properties = detail::rtree::integrity_check_properties;
231 
232     struct node_properties
233     {
234         node_type type;
235         extent_type extent;
236     };
237 
238 private:
239 
240     struct node;
241     struct directory_node;
242 
243     /**
244      * This class is intentionally only movable and non-copyable, to prevent
245      * accidental copying of its object. To "copy" this class, you must use
246      * its clone() method explicitly.
247      */
248     struct node_store
249     {
250         node_type type;
251         extent_type extent;
252         node_store* parent;
253         node* node_ptr;
254         size_t count;
255 
256         bool valid_pointer;
257 
258         node_store(const node_store&) = delete;
259         node_store& operator= (const node_store&) = delete;
260 
261         node_store();
262         node_store(node_store&& r);
263         node_store(node_type _type, const extent_type& _extent, node* _node_ptr);
264         ~node_store();
265 
266         node_store clone() const;
267 
268         static node_store create_leaf_directory_node();
269         static node_store create_nonleaf_directory_node();
270         static node_store create_value_node(const extent_type& extent, value_type&& v);
271         static node_store create_value_node(const extent_type& extent, const value_type& v);
272 
273         node_store& operator= (node_store&& other);
274 
275         /**
276          * Re-calculate the extent based on its current children.
277          *
278          * @return true if the extent has changed, false otherwise.
279          */
280         bool pack();
281 
282         /**
283          * Re-calculate the extent of the parent nodes recursively all the way
284          * up to the root node.
285          */
286         void pack_upward();
287 
288         bool is_directory() const;
289         bool is_root() const;
290         bool exceeds_capacity() const;
291 
292         void swap(node_store& other);
293 
294         /**
295          * Have all its child nodes update their parent pointers if the memory
296          * location of this node has been invalidated.  Run the tree
297          * recursively until no more invalid pointers have been found.
298          */
299         void reset_parent_pointers_of_children();
300 
301         /**
302          * Update its parent pointer and all its child nodes' parent pointers
303          * if the memory location of this node has been invalidated. Run the
304          * tree recursively until no more invalid pointers have been found.
305          */
306         void reset_parent_pointers();
307 
308         directory_node* get_directory_node();
309         const directory_node* get_directory_node() const;
310 
311         bool erase_child(const node_store* p);
312     };
313 
314     using dir_store_type = std::deque<node_store>;
315 
316     struct dir_store_segment
317     {
318         typename dir_store_type::iterator begin;
319         typename dir_store_type::iterator end;
320         size_t size;
321 
dir_store_segmentmdds::rtree::dir_store_segment322         dir_store_segment() : size(0) {}
323 
dir_store_segmentmdds::rtree::dir_store_segment324         dir_store_segment(
325             typename dir_store_type::iterator _begin,
326             typename dir_store_type::iterator _end,
327             size_t _size) :
328             begin(std::move(_begin)),
329             end(std::move(_end)),
330             size(_size) {}
331     };
332 
333     struct distribution
334     {
335         dir_store_segment g1;
336         dir_store_segment g2;
337 
distributionmdds::rtree::distribution338         distribution(size_t dist, dir_store_type& nodes)
339         {
340             g1.begin = nodes.begin();
341             g1.end = g1.begin;
342             g1.size = trait_type::min_node_size - 1 + dist;
343             std::advance(g1.end, g1.size);
344 
345             g2.begin = g1.end;
346             g2.end = nodes.end();
347             g2.size = nodes.size() - g1.size;
348         }
349     };
350 
351     struct node
352     {
353         node();
354         node(const node&) = delete;
355         ~node();
356     };
357 
358     struct value_node : public node
359     {
360         value_type value;
361 
362         value_node() = delete;
363         value_node(value_type&& _value);
364         value_node(const value_type& _value);
365         ~value_node();
366     };
367 
368     /**
369      * Directory node can either contain all value nodes, or all directory
370      * nodes as its child nodes.
371      */
372     struct directory_node : public node
373     {
374         dir_store_type children;
375 
376         directory_node(const directory_node&) = delete;
377         directory_node& operator=(const directory_node&) = delete;
378 
379         directory_node();
380         ~directory_node();
381 
382         bool erase(const node_store* p);
383 
384         node_store* get_child_with_minimal_overlap(const extent_type& bb);
385         node_store* get_child_with_minimal_area_enlargement(const extent_type& bb);
386 
387         extent_type calc_extent() const;
388         key_type calc_overlap_cost(const extent_type& bb) const;
389         bool has_leaf_directory() const;
390     };
391 
392     struct orphan_node_entry
393     {
394         node_store ns;
395         size_t depth;
396     };
397 
398     using orphan_node_entries_type = std::deque<orphan_node_entry>;
399 
400     struct insertion_point
401     {
402         node_store* ns;
403         size_t depth;
404     };
405 
406 public:
407 
408     template<typename _NS>
409     class search_results_base
410     {
411         friend class rtree;
412     protected:
413         using node_store_type = _NS;
414 
415         struct entry
416         {
417             node_store_type* ns;
418             size_t depth;
419 
420             entry(node_store_type* _ns, size_t _depth);
421         };
422 
423         using store_type = std::vector<entry>;
424         store_type m_store;
425 
426         void add_node_store(node_store_type* ns, size_t depth);
427     };
428 
429     class const_iterator;
430     class iterator;
431     class search_results;
432 
433     class const_search_results : public search_results_base<const node_store>
434     {
435         using base_type = search_results_base<const node_store>;
436         using base_type::m_store;
437     public:
438         const_iterator cbegin() const;
439         const_iterator cend() const;
440         const_iterator begin() const;
441         const_iterator end() const;
442     };
443 
444     class search_results : public search_results_base<node_store>
445     {
446         using base_type = search_results_base<node_store>;
447         using base_type::m_store;
448     public:
449         iterator begin();
450         iterator end();
451     };
452 
453     template<typename _SelfIter, typename _StoreIter, typename _ValueT>
454     class iterator_base
455     {
456     public:
457         using store_iterator_type = _StoreIter;
458         using self_iterator_type = _SelfIter;
459 
460     protected:
461         store_iterator_type m_pos;
462 
463         iterator_base(store_iterator_type pos);
464 
465     public:
466         // iterator traits
467         using value_type = _ValueT;
468         using pointer = value_type*;
469         using reference = value_type&;
470         using difference_type = std::ptrdiff_t;
471         using iterator_category = std::bidirectional_iterator_tag;
472 
473         bool operator== (const self_iterator_type& other) const;
474         bool operator!= (const self_iterator_type& other) const;
475 
476         self_iterator_type& operator++();
477         self_iterator_type operator++(int);
478 
479         self_iterator_type& operator--();
480         self_iterator_type operator--(int);
481 
482         const extent_type& extent() const;
483         size_t depth() const;
484     };
485 
486     class const_iterator : public iterator_base<
487         const_iterator,
488         typename const_search_results::store_type::const_iterator,
489         const rtree::value_type>
490     {
491         using base_type = iterator_base<
492             const_iterator,
493             typename const_search_results::store_type::const_iterator,
494             const rtree::value_type>;
495 
496         using store_type = typename const_search_results::store_type;
497         using typename base_type::store_iterator_type;
498         using base_type::m_pos;
499 
500         friend class rtree;
501 
502         const_iterator(store_iterator_type pos);
503     public:
504         using typename base_type::value_type;
505 
506         value_type& operator*() const;
507         value_type* operator->() const;
508     };
509 
510     class iterator : public iterator_base<
511         iterator,
512         typename search_results::store_type::iterator,
513         rtree::value_type>
514     {
515         using base_type = iterator_base<
516             iterator,
517             typename search_results::store_type::iterator,
518             rtree::value_type>;
519 
520         using store_type = typename const_search_results::store_type;
521         using typename base_type::store_iterator_type;
522         using base_type::m_pos;
523 
524         friend class rtree;
525 
526         iterator(store_iterator_type pos);
527     public:
528         using typename base_type::value_type;
529 
530         value_type& operator*();
531         value_type* operator->();
532     };
533 
534     /**
535      * Loader optimized for loading a large number of value objects.  A
536      * resultant tree will have a higher chance of being well balanced than if
537      * the value objects were inserted individually into the tree.
538      */
539     class bulk_loader
540     {
541         dir_store_type m_store;
542 
543     public:
544         bulk_loader();
545 
546         void insert(const extent_type& extent, value_type&& value);
547         void insert(const extent_type& extent, const value_type& value);
548 
549         void insert(const point_type& position, value_type&& value);
550         void insert(const point_type& position, const value_type& value);
551 
552         rtree pack();
553 
554     private:
555         void pack_level(dir_store_type& store, size_t depth);
556 
557         void insert_impl(const extent_type& extent, value_type&& value);
558         void insert_impl(const extent_type& extent, const value_type& value);
559     };
560 
561     rtree();
562     rtree(rtree&& other);
563     rtree(const rtree& other);
564 
565 private:
566     rtree(node_store&& root); // only used internally by bulk_loader.
567 
568 public:
569     ~rtree();
570 
571     rtree& operator= (const rtree& other);
572 
573     rtree& operator= (rtree&& other);
574 
575     /**
576      * Insert a new value associated with a bounding box.  The new value
577      * object will be moved into the container.
578      *
579      * @param extent bounding box associated with the value.
580      * @param value value being inserted.
581      */
582     void insert(const extent_type& extent, value_type&& value);
583 
584     /**
585      * Insert a new value associated with a bounding box.  A copy of the new
586      * value object will be placed into the container.
587      *
588      * @param extent bounding box associated with the value.
589      * @param value value being inserted.
590      */
591     void insert(const extent_type& extent, const value_type& value);
592 
593     /**
594      * Insert a new value associated with a point.  The new value object will
595      * be moved into the container.
596      *
597      * @param position point associated with the value.
598      * @param value value being inserted.
599      */
600     void insert(const point_type& position, value_type&& value);
601 
602     /**
603      * Insert a new value associated with a point.  A copy of the new value
604      * object will be placed into the container.
605      *
606      * @param position point associated with the value.
607      * @param value value being inserted.
608      */
609     void insert(const point_type& position, const value_type& value);
610 
611     /**
612      * Search the tree and collect all value objects whose extents either
613      * contain the specified point, or exactly match the specified point.
614      *
615      * @param pt reference point to use for the search.
616      * @param st search type that determines the satisfying condition of the
617      *           search with respect to the reference point.
618      *
619      * @return collection of all value objects that satisfy the specified
620      *         search condition.  This collection is immutable.
621      */
622     const_search_results search(const point_type& pt, search_type st) const;
623 
624     /**
625      * Search the tree and collect all value objects whose extents either
626      * contain the specified point, or exactly match the specified point.
627      *
628      * @param pt reference point to use for the search.
629      * @param st search type that determines the satisfying condition of the
630      *           search with respect to the reference point.
631      *
632      * @return collection of all value objects that satisfy the specified
633      *         search condition.  This collection is mutable.
634      */
635     search_results search(const point_type& pt, search_type st);
636 
637     /**
638      * Search the tree and collect all value objects whose extents either
639      * overlaps with the specified extent, or exactly match the specified
640      * extent.
641      *
642      * @param extent reference extent to use for the search.
643      * @param st search type that determines the satisfying condition of the
644      *           search with respect to the reference extent.
645      *
646      * @return collection of all value objects that satisfy the specified
647      *         search condition.  This collection is immutable.
648      */
649     const_search_results search(const extent_type& extent, search_type st) const;
650 
651     /**
652      * Search the tree and collect all value objects whose extents either
653      * overlaps with the specified extent, or exactly match the specified
654      * extent.
655      *
656      * @param extent reference extent to use for the search.
657      * @param st search type that determines the satisfying condition of the
658      *           search with respect to the reference extent.
659      *
660      * @return collection of all value objects that satisfy the specified
661      *         search condition.  This collection is mutable.
662      */
663     search_results search(const extent_type& extent, search_type st);
664 
665     /**
666      * Erase the value object referenced by the iterator passed to this
667      * method.
668      *
669      * <p>The iterator object will become invalid if the call results in an
670      * erasure of a value.</p>
671      *
672      * @param pos iterator that refernces the value object to erase.
673      */
674     void erase(const const_iterator& pos);
675 
676     /**
677      * Erase the value object referenced by the iterator passed to this
678      * method.
679      *
680      * <p>The iterator object will become invalid if the call results in an
681      * erasure of a value.</p>
682      *
683      * @param pos iterator that refernces the value object to erase.
684      */
685     void erase(const iterator& pos);
686 
687     /**
688      * Get the minimum bounding extent of the root node of the tree. The
689      * extent returned from this method is the minimum extent that contains
690      * the extents of all objects stored in the tree.
691      *
692      * @return immutable reference to the extent of the root node of the tree.
693      */
694     const extent_type& extent() const;
695 
696     /**
697      * Check whether or not the tree stores any objects.
698      *
699      * @return true if the tree is empty, otherwise false.
700      */
701     bool empty() const;
702 
703     /**
704      * Return the number of value nodes currently stored in the tree.
705      *
706      * @return number of value nodes currently in the tree.
707      */
708     size_t size() const;
709 
710     /**
711      * Swap the content of the tree with another instance.
712      *
713      * @param other another instance to swap the content with.
714      */
715     void swap(rtree& other);
716 
717     /**
718      * Empty the entire container.
719      */
720     void clear();
721 
722     /**
723      * Walk down the entire tree depth first.
724      *
725      * @func function or function object that gets called at each node in the
726      *       tree.
727      */
728     template<typename _Func>
729     void walk(_Func func) const;
730 
731     /**
732      * Check the integrity of the entire tree structure.
733      *
734      * @param props specify how the check is to be performed.
735      *
736      * @exception integrity_error if the integrity check fails.
737      */
738     void check_integrity(const integrity_check_properties& props) const;
739 
740     /**
741      * Export the structure of a tree in textural format.
742      *
743      * @param mode specify the format in which to represent the structure of a
744      *             tree.
745      *
746      * @return string representation of the tree structure.
747      */
748     std::string export_tree(export_tree_type mode) const;
749 
750 private:
751 
752     void insert_impl(const point_type& start, const point_type& end, value_type&& value);
753     void insert_impl(const point_type& start, const point_type& end, const value_type& value);
754 
755     void erase_impl(const node_store* ns, size_t depth);
756 
757     /**
758      * Build and return a callable function object that you can call in order
759      * to convert node's memory location to a normalized unique string still
760      * representative of that node.
761      */
762     detail::rtree::ptr_to_string<const node_store*> build_ptr_to_string_map() const;
763 
764     std::string export_tree_formatted() const;
765     std::string export_tree_extent_as_obj() const;
766     std::string export_tree_extent_as_svg() const;
767 
768     void insert(node_store&& ns, std::unordered_set<size_t>* reinserted_depths);
769     void insert_dir(node_store&& ns, size_t max_depth);
770 
771     /**
772      * Split an overfilled node.  The node to split is expected to have
773      * exactly M+1 child nodes where M is the maximum number of child nodes a
774      * single node is allowed to have.
775      *
776      * @param ns node to split.
777      */
778     void split_node(node_store* ns);
779 
780     void perform_forced_reinsertion(node_store* ns, std::unordered_set<size_t>& reinserted_depth);
781 
782     /**
783      * Determine the best dimension to split by, and sort the child nodes by
784      * that dimension.
785      *
786      * @param children child nodes to sort.
787      */
788     void sort_dir_store_by_split_dimension(dir_store_type& children);
789 
790     void sort_dir_store_by_dimension(size_t dim, dir_store_type& children);
791 
792     size_t pick_optimal_distribution(dir_store_type& children) const;
793 
794     insertion_point find_leaf_directory_node_for_insertion(const extent_type& bb);
795     node_store* find_nonleaf_directory_node_for_insertion(const extent_type& bb, size_t max_depth);
796 
797     template<typename _Func>
798     void descend_with_func(_Func func) const;
799 
800     using search_condition_type = std::function<bool(const node_store&)>;
801 
802     template<typename _ResT>
803     void search_descend(
804         size_t depth, const search_condition_type& dir_cond, const search_condition_type& value_cond,
805         typename _ResT::node_store_type& ns, _ResT& results) const;
806 
807     void shrink_tree_upward(node_store* ns, const extent_type& bb_affected);
808 
809 private:
810     node_store m_root;
811 };
812 
813 } // namespace mdds
814 
815 #include "rtree_def.inl"
816 
817 #endif
818 
819 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
820