1 // Boost.Geometry Index
2 //
3 // R-tree initial packing
4 //
5 // Copyright (c) 2011-2017 Adam Wulkiewicz, Lodz, Poland.
6 // Copyright (c) 2020 Caian Benedicto, Campinas, Brazil.
7 //
8 // This file was modified by Oracle on 2019.
9 // Modifications copyright (c) 2019 Oracle and/or its affiliates.
10 // Contributed and/or modified by Adam Wulkiewicz, on behalf of Oracle
11 //
12 // Use, modification and distribution is subject to the Boost Software License,
13 // Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
14 // http://www.boost.org/LICENSE_1_0.txt)
15 
16 #ifndef BOOST_GEOMETRY_INDEX_DETAIL_RTREE_PACK_CREATE_HPP
17 #define BOOST_GEOMETRY_INDEX_DETAIL_RTREE_PACK_CREATE_HPP
18 
19 #include <boost/core/ignore_unused.hpp>
20 
21 #include <boost/geometry/algorithms/expand.hpp>
22 #include <boost/geometry/index/detail/algorithms/bounds.hpp>
23 #include <boost/geometry/index/detail/algorithms/nth_element.hpp>
24 #include <boost/geometry/index/detail/rtree/node/subtree_destroyer.hpp>
25 
26 #include <boost/geometry/algorithms/detail/expand_by_epsilon.hpp>
27 
28 namespace boost { namespace geometry { namespace index { namespace detail { namespace rtree {
29 
30 namespace pack_utils {
31 
32 template <std::size_t Dimension>
33 struct biggest_edge
34 {
35     BOOST_STATIC_ASSERT(0 < Dimension);
36     template <typename Box>
applyboost::geometry::index::detail::rtree::pack_utils::biggest_edge37     static inline void apply(Box const& box, typename coordinate_type<Box>::type & length, std::size_t & dim_index)
38     {
39         biggest_edge<Dimension-1>::apply(box, length, dim_index);
40         typename coordinate_type<Box>::type curr
41             = geometry::get<max_corner, Dimension-1>(box) - geometry::get<min_corner, Dimension-1>(box);
42         if ( length < curr )
43         {
44             dim_index = Dimension - 1;
45             length = curr;
46         }
47     }
48 };
49 
50 template <>
51 struct biggest_edge<1>
52 {
53     template <typename Box>
applyboost::geometry::index::detail::rtree::pack_utils::biggest_edge54     static inline void apply(Box const& box, typename coordinate_type<Box>::type & length, std::size_t & dim_index)
55     {
56         dim_index = 0;
57         length = geometry::get<max_corner, 0>(box) - geometry::get<min_corner, 0>(box);
58     }
59 };
60 
61 template <std::size_t I>
62 struct point_entries_comparer
63 {
64     template <typename PointEntry>
operator ()boost::geometry::index::detail::rtree::pack_utils::point_entries_comparer65     bool operator()(PointEntry const& e1, PointEntry const& e2) const
66     {
67         return geometry::get<I>(e1.first) < geometry::get<I>(e2.first);
68     }
69 };
70 
71 template <std::size_t I, std::size_t Dimension>
72 struct nth_element_and_half_boxes
73 {
74     template <typename EIt, typename Box>
applyboost::geometry::index::detail::rtree::pack_utils::nth_element_and_half_boxes75     static inline void apply(EIt first, EIt median, EIt last, Box const& box, Box & left, Box & right, std::size_t dim_index)
76     {
77         if ( I == dim_index )
78         {
79             index::detail::nth_element(first, median, last, point_entries_comparer<I>());
80 
81             geometry::convert(box, left);
82             geometry::convert(box, right);
83             typename coordinate_type<Box>::type edge_len
84                 = geometry::get<max_corner, I>(box) - geometry::get<min_corner, I>(box);
85             typename coordinate_type<Box>::type median
86                 = geometry::get<min_corner, I>(box) + edge_len / 2;
87             geometry::set<max_corner, I>(left, median);
88             geometry::set<min_corner, I>(right, median);
89         }
90         else
91             nth_element_and_half_boxes<I+1, Dimension>::apply(first, median, last, box, left, right, dim_index);
92     }
93 };
94 
95 template <std::size_t Dimension>
96 struct nth_element_and_half_boxes<Dimension, Dimension>
97 {
98     template <typename EIt, typename Box>
applyboost::geometry::index::detail::rtree::pack_utils::nth_element_and_half_boxes99     static inline void apply(EIt , EIt , EIt , Box const& , Box & , Box & , std::size_t ) {}
100 };
101 
102 } // namespace pack_utils
103 
104 // STR leafs number are calculated as rcount/max
105 // and the number of splitting planes for each dimension as (count/max)^(1/dimension)
106 // <-> for dimension==2 -> sqrt(count/max)
107 //
108 // The main flaw of this algorithm is that the resulting tree will have bad structure for:
109 // 1. non-uniformly distributed elements
110 //      Statistic check could be performed, e.g. based on variance of lengths of elements edges for each dimension
111 // 2. elements distributed mainly along one axis
112 //      Calculate bounding box of all elements and then number of dividing planes for a dimension
113 //      from the length of BB edge for this dimension (more or less assuming that elements are uniformly-distributed squares)
114 //
115 // Another thing is that the last node may have less elements than Max or even Min.
116 // The number of splitting planes must be chosen more carefully than count/max
117 //
118 // This algorithm is something between STR and TGS
119 // it is more similar to the top-down recursive kd-tree creation algorithm
120 // using object median split and split axis of greatest BB edge
121 // BB is only used as a hint (assuming objects are distributed uniformly)
122 //
123 // Implemented algorithm guarantees that the number of elements in nodes will be between Min and Max
124 // and that nodes are packed as tightly as possible
125 // e.g. for 177 values Max = 5 and Min = 2 it will construct the following tree:
126 // ROOT                 177
127 // L1          125               52
128 // L2  25  25  25  25  25   25  17    10
129 // L3  5x5 5x5 5x5 5x5 5x5  5x5 3x5+2 2x5
130 
131 template <typename MembersHolder>
132 class pack
133 {
134     typedef typename MembersHolder::node node;
135     typedef typename MembersHolder::internal_node internal_node;
136     typedef typename MembersHolder::leaf leaf;
137 
138     typedef typename MembersHolder::node_pointer node_pointer;
139     typedef typename MembersHolder::size_type size_type;
140     typedef typename MembersHolder::parameters_type parameters_type;
141     typedef typename MembersHolder::translator_type translator_type;
142     typedef typename MembersHolder::allocators_type allocators_type;
143 
144     typedef typename MembersHolder::box_type box_type;
145     typedef typename geometry::point_type<box_type>::type point_type;
146     typedef typename geometry::coordinate_type<point_type>::type coordinate_type;
147     typedef typename detail::default_content_result<box_type>::type content_type;
148     typedef typename detail::strategy_type<parameters_type>::type strategy_type;
149     static const std::size_t dimension = geometry::dimension<point_type>::value;
150 
151     typedef typename rtree::container_from_elements_type<
152         typename rtree::elements_type<leaf>::type,
153         size_type
154     >::type values_counts_container;
155 
156     typedef typename rtree::elements_type<internal_node>::type internal_elements;
157     typedef typename internal_elements::value_type internal_element;
158 
159     typedef rtree::subtree_destroyer<MembersHolder> subtree_destroyer;
160 
161 public:
162     // Arbitrary iterators
163     template <typename InIt> inline static
apply(InIt first,InIt last,size_type & values_count,size_type & leafs_level,parameters_type const & parameters,translator_type const & translator,allocators_type & allocators)164     node_pointer apply(InIt first, InIt last,
165                        size_type & values_count,
166                        size_type & leafs_level,
167                        parameters_type const& parameters,
168                        translator_type const& translator,
169                        allocators_type & allocators)
170     {
171         return apply(first, last, values_count, leafs_level, parameters, translator,
172                      allocators, boost::container::new_allocator<void>());
173     }
174 
175     template <typename InIt, typename TmpAlloc> inline static
apply(InIt first,InIt last,size_type & values_count,size_type & leafs_level,parameters_type const & parameters,translator_type const & translator,allocators_type & allocators,TmpAlloc const & temp_allocator)176     node_pointer apply(InIt first, InIt last,
177                        size_type & values_count,
178                        size_type & leafs_level,
179                        parameters_type const& parameters,
180                        translator_type const& translator,
181                        allocators_type & allocators,
182                        TmpAlloc const& temp_allocator)
183     {
184         typedef typename std::iterator_traits<InIt>::difference_type diff_type;
185 
186         diff_type diff = std::distance(first, last);
187         if ( diff <= 0 )
188             return node_pointer(0);
189 
190         typedef std::pair<point_type, InIt> entry_type;
191         typedef typename boost::container::allocator_traits<TmpAlloc>::
192             template rebind_alloc<entry_type> temp_entry_allocator_type;
193 
194         temp_entry_allocator_type temp_entry_allocator(temp_allocator);
195         boost::container::vector<entry_type, temp_entry_allocator_type> entries(temp_entry_allocator);
196 
197         values_count = static_cast<size_type>(diff);
198         entries.reserve(values_count);
199 
200         expandable_box<box_type, strategy_type> hint_box(detail::get_strategy(parameters));
201         for ( ; first != last ; ++first )
202         {
203             // NOTE: support for iterators not returning true references adapted
204             // to Geometry concept and default translator returning true reference
205             // An alternative would be to dereference the iterator and translate
206             // in one expression each time the indexable was needed.
207             typename std::iterator_traits<InIt>::reference in_ref = *first;
208             typename translator_type::result_type indexable = translator(in_ref);
209 
210             // NOTE: added for consistency with insert()
211             // CONSIDER: alternative - ignore invalid indexable or throw an exception
212             BOOST_GEOMETRY_INDEX_ASSERT(detail::is_valid(indexable), "Indexable is invalid");
213 
214             hint_box.expand(indexable);
215 
216             point_type pt;
217             geometry::centroid(indexable, pt);
218             entries.push_back(std::make_pair(pt, first));
219         }
220 
221         subtree_elements_counts subtree_counts = calculate_subtree_elements_counts(values_count, parameters, leafs_level);
222         internal_element el = per_level(entries.begin(), entries.end(), hint_box.get(), values_count, subtree_counts,
223                                         parameters, translator, allocators);
224 
225         return el.second;
226     }
227 
228 private:
229     template <typename BoxType, typename Strategy>
230     class expandable_box
231     {
232     public:
expandable_box(Strategy const & strategy)233         explicit expandable_box(Strategy const& strategy)
234             : m_strategy(strategy), m_initialized(false)
235         {}
236 
237         template <typename Indexable>
expandable_box(Indexable const & indexable,Strategy const & strategy)238         explicit expandable_box(Indexable const& indexable, Strategy const& strategy)
239             : m_strategy(strategy), m_initialized(true)
240         {
241             detail::bounds(indexable, m_box, m_strategy);
242         }
243 
244         template <typename Indexable>
expand(Indexable const & indexable)245         void expand(Indexable const& indexable)
246         {
247             if ( !m_initialized )
248             {
249                 // it's guaranteed that the Box will be initialized
250                 // only for Points, Boxes and Segments but that's ok
251                 // since only those Geometries can be stored
252                 detail::bounds(indexable, m_box, m_strategy);
253                 m_initialized = true;
254             }
255             else
256             {
257                 detail::expand(m_box, indexable, m_strategy);
258             }
259         }
260 
expand_by_epsilon()261         void expand_by_epsilon()
262         {
263             geometry::detail::expand_by_epsilon(m_box);
264         }
265 
get() const266         BoxType const& get() const
267         {
268             BOOST_GEOMETRY_INDEX_ASSERT(m_initialized, "uninitialized envelope accessed");
269             return m_box;
270         }
271 
272     private:
273         BoxType m_box;
274         Strategy m_strategy;
275         bool m_initialized;
276     };
277 
278     struct subtree_elements_counts
279     {
subtree_elements_countsboost::geometry::index::detail::rtree::pack::subtree_elements_counts280         subtree_elements_counts(size_type ma, size_type mi) : maxc(ma), minc(mi) {}
281         size_type maxc;
282         size_type minc;
283     };
284 
285     template <typename EIt> inline static
per_level(EIt first,EIt last,box_type const & hint_box,size_type values_count,subtree_elements_counts const & subtree_counts,parameters_type const & parameters,translator_type const & translator,allocators_type & allocators)286     internal_element per_level(EIt first, EIt last,
287                                box_type const& hint_box,
288                                size_type values_count,
289                                subtree_elements_counts const& subtree_counts,
290                                parameters_type const& parameters,
291                                translator_type const& translator,
292                                allocators_type & allocators)
293     {
294         BOOST_GEOMETRY_INDEX_ASSERT(0 < std::distance(first, last) && static_cast<size_type>(std::distance(first, last)) == values_count,
295                                     "unexpected parameters");
296 
297         if ( subtree_counts.maxc <= 1 )
298         {
299             // ROOT or LEAF
300             BOOST_GEOMETRY_INDEX_ASSERT(values_count <= parameters.get_max_elements(),
301                                         "too big number of elements");
302             // if !root check m_parameters.get_min_elements() <= count
303 
304             // create new leaf node
305             node_pointer n = rtree::create_node<allocators_type, leaf>::apply(allocators);                       // MAY THROW (A)
306             subtree_destroyer auto_remover(n, allocators);
307             leaf & l = rtree::get<leaf>(*n);
308 
309             // reserve space for values
310             rtree::elements(l).reserve(values_count);                                                       // MAY THROW (A)
311 
312             // calculate values box and copy values
313             //   initialize the box explicitly to avoid GCC-4.4 uninitialized variable warnings with O2
314             expandable_box<box_type, strategy_type> elements_box(translator(*(first->second)),
315                                                                  detail::get_strategy(parameters));
316             rtree::elements(l).push_back(*(first->second));                                                 // MAY THROW (A?,C)
317             for ( ++first ; first != last ; ++first )
318             {
319                 // NOTE: push_back() must be called at the end in order to support move_iterator.
320                 //       The iterator is dereferenced 2x (no temporary reference) to support
321                 //       non-true reference types and move_iterator without boost::forward<>.
322                 elements_box.expand(translator(*(first->second)));
323                 rtree::elements(l).push_back(*(first->second));                                             // MAY THROW (A?,C)
324             }
325 
326 #ifdef BOOST_GEOMETRY_INDEX_EXPERIMENTAL_ENLARGE_BY_EPSILON
327             // Enlarge bounds of a leaf node.
328             // It's because Points and Segments are compared WRT machine epsilon
329             // This ensures that leafs bounds correspond to the stored elements
330             // NOTE: this is done only if the Indexable is a different kind of Geometry
331             //   than the bounds (only Box for now). Spatial predicates are checked
332             //   the same way for Geometry of the same kind.
333             if ( BOOST_GEOMETRY_CONDITION((
334                     ! index::detail::is_bounding_geometry
335                         <
336                             typename indexable_type<translator_type>::type
337                         >::value )) )
338             {
339                 elements_box.expand_by_epsilon();
340             }
341 #endif
342 
343             auto_remover.release();
344             return internal_element(elements_box.get(), n);
345         }
346 
347         // calculate next max and min subtree counts
348         subtree_elements_counts next_subtree_counts = subtree_counts;
349         next_subtree_counts.maxc /= parameters.get_max_elements();
350         next_subtree_counts.minc /= parameters.get_max_elements();
351 
352         // create new internal node
353         node_pointer n = rtree::create_node<allocators_type, internal_node>::apply(allocators);                  // MAY THROW (A)
354         subtree_destroyer auto_remover(n, allocators);
355         internal_node & in = rtree::get<internal_node>(*n);
356 
357         // reserve space for values
358         size_type nodes_count = calculate_nodes_count(values_count, subtree_counts);
359         rtree::elements(in).reserve(nodes_count);                                                           // MAY THROW (A)
360         // calculate values box and copy values
361         expandable_box<box_type, strategy_type> elements_box(detail::get_strategy(parameters));
362 
363         per_level_packets(first, last, hint_box, values_count, subtree_counts, next_subtree_counts,
364                           rtree::elements(in), elements_box,
365                           parameters, translator, allocators);
366 
367         auto_remover.release();
368         return internal_element(elements_box.get(), n);
369     }
370 
371     template <typename EIt, typename ExpandableBox> inline static
per_level_packets(EIt first,EIt last,box_type const & hint_box,size_type values_count,subtree_elements_counts const & subtree_counts,subtree_elements_counts const & next_subtree_counts,internal_elements & elements,ExpandableBox & elements_box,parameters_type const & parameters,translator_type const & translator,allocators_type & allocators)372     void per_level_packets(EIt first, EIt last,
373                            box_type const& hint_box,
374                            size_type values_count,
375                            subtree_elements_counts const& subtree_counts,
376                            subtree_elements_counts const& next_subtree_counts,
377                            internal_elements & elements,
378                            ExpandableBox & elements_box,
379                            parameters_type const& parameters,
380                            translator_type const& translator,
381                            allocators_type & allocators)
382     {
383         BOOST_GEOMETRY_INDEX_ASSERT(0 < std::distance(first, last) && static_cast<size_type>(std::distance(first, last)) == values_count,
384                                     "unexpected parameters");
385 
386         BOOST_GEOMETRY_INDEX_ASSERT(subtree_counts.minc <= values_count,
387                                     "too small number of elements");
388 
389         // only one packet
390         if ( values_count <= subtree_counts.maxc )
391         {
392             // the end, move to the next level
393             internal_element el = per_level(first, last, hint_box, values_count, next_subtree_counts,
394                                             parameters, translator, allocators);
395 
396             // in case if push_back() do throw here
397             // and even if this is not probable (previously reserved memory, nonthrowing pairs copy)
398             // this case is also tested by exceptions test.
399             subtree_destroyer auto_remover(el.second, allocators);
400             // this container should have memory allocated, reserve() called outside
401             elements.push_back(el);                                                 // MAY THROW (A?,C) - however in normal conditions shouldn't
402             auto_remover.release();
403 
404             elements_box.expand(el.first);
405             return;
406         }
407 
408         size_type median_count = calculate_median_count(values_count, subtree_counts);
409         EIt median = first + median_count;
410 
411         coordinate_type greatest_length;
412         std::size_t greatest_dim_index = 0;
413         pack_utils::biggest_edge<dimension>::apply(hint_box, greatest_length, greatest_dim_index);
414         box_type left, right;
415         pack_utils::nth_element_and_half_boxes<0, dimension>
416             ::apply(first, median, last, hint_box, left, right, greatest_dim_index);
417 
418         per_level_packets(first, median, left,
419                           median_count, subtree_counts, next_subtree_counts,
420                           elements, elements_box,
421                           parameters, translator, allocators);
422         per_level_packets(median, last, right,
423                           values_count - median_count, subtree_counts, next_subtree_counts,
424                           elements, elements_box,
425                           parameters, translator, allocators);
426     }
427 
428     inline static
calculate_subtree_elements_counts(size_type elements_count,parameters_type const & parameters,size_type & leafs_level)429     subtree_elements_counts calculate_subtree_elements_counts(size_type elements_count, parameters_type const& parameters, size_type & leafs_level)
430     {
431         boost::ignore_unused(parameters);
432 
433         subtree_elements_counts res(1, 1);
434         leafs_level = 0;
435 
436         size_type smax = parameters.get_max_elements();
437         for ( ; smax < elements_count ; smax *= parameters.get_max_elements(), ++leafs_level )
438             res.maxc = smax;
439 
440         res.minc = parameters.get_min_elements() * (res.maxc / parameters.get_max_elements());
441 
442         return res;
443     }
444 
445     inline static
calculate_nodes_count(size_type count,subtree_elements_counts const & subtree_counts)446     size_type calculate_nodes_count(size_type count,
447                                     subtree_elements_counts const& subtree_counts)
448     {
449         size_type n = count / subtree_counts.maxc;
450         size_type r = count % subtree_counts.maxc;
451 
452         if ( 0 < r && r < subtree_counts.minc )
453         {
454             size_type count_minus_min = count - subtree_counts.minc;
455             n = count_minus_min / subtree_counts.maxc;
456             r = count_minus_min % subtree_counts.maxc;
457             ++n;
458         }
459 
460         if ( 0 < r )
461             ++n;
462 
463         return n;
464     }
465 
466     inline static
calculate_median_count(size_type count,subtree_elements_counts const & subtree_counts)467     size_type calculate_median_count(size_type count,
468                                      subtree_elements_counts const& subtree_counts)
469     {
470         // e.g. for max = 5, min = 2, count = 52, subtree_max = 25, subtree_min = 10
471 
472         size_type n = count / subtree_counts.maxc; // e.g. 52 / 25 = 2
473         size_type r = count % subtree_counts.maxc; // e.g. 52 % 25 = 2
474         size_type median_count = (n / 2) * subtree_counts.maxc; // e.g. 2 / 2 * 25 = 25
475 
476         if ( 0 != r ) // e.g. 0 != 2
477         {
478             if ( subtree_counts.minc <= r ) // e.g. 10 <= 2 == false
479             {
480                 //BOOST_GEOMETRY_INDEX_ASSERT(0 < n, "unexpected value");
481                 median_count = ((n+1)/2) * subtree_counts.maxc; // if calculated ((2+1)/2) * 25 which would be ok, but not in all cases
482             }
483             else // r < subtree_counts.second  // e.g. 2 < 10 == true
484             {
485                 size_type count_minus_min = count - subtree_counts.minc; // e.g. 52 - 10 = 42
486                 n = count_minus_min / subtree_counts.maxc; // e.g. 42 / 25 = 1
487                 r = count_minus_min % subtree_counts.maxc; // e.g. 42 % 25 = 17
488                 if ( r == 0 )                               // e.g. false
489                 {
490                     // n can't be equal to 0 because then there wouldn't be any element in the other node
491                     //BOOST_GEOMETRY_INDEX_ASSERT(0 < n, "unexpected value");
492                     median_count = ((n+1)/2) * subtree_counts.maxc;     // if calculated ((1+1)/2) * 25 which would be ok, but not in all cases
493                 }
494                 else
495                 {
496                     if ( n == 0 )                                        // e.g. false
497                         median_count = r;                                // if calculated -> 17 which is wrong!
498                     else
499                         median_count = ((n+2)/2) * subtree_counts.maxc; // e.g. ((1+2)/2) * 25 = 25
500                 }
501             }
502         }
503 
504         return median_count;
505     }
506 };
507 
508 }}}}} // namespace boost::geometry::index::detail::rtree
509 
510 #endif // BOOST_GEOMETRY_INDEX_DETAIL_RTREE_PACK_CREATE_HPP
511