1 // Boost.Geometry Index
2 //
3 // R-tree initial packing
4 //
5 // Copyright (c) 2011-2017 Adam Wulkiewicz, Lodz, Poland.
6 //
7 // This file was modified by Oracle on 2019.
8 // Modifications copyright (c) 2019 Oracle and/or its affiliates.
9 // Contributed and/or modified by Adam Wulkiewicz, on behalf of Oracle
10 //
11 // Use, modification and distribution is subject to the Boost Software License,
12 // Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
13 // http://www.boost.org/LICENSE_1_0.txt)
14 
15 #ifndef BOOST_GEOMETRY_INDEX_DETAIL_RTREE_PACK_CREATE_HPP
16 #define BOOST_GEOMETRY_INDEX_DETAIL_RTREE_PACK_CREATE_HPP
17 
18 #include <boost/core/ignore_unused.hpp>
19 
20 #include <boost/geometry/algorithms/expand.hpp>
21 #include <boost/geometry/index/detail/algorithms/bounds.hpp>
22 #include <boost/geometry/index/detail/algorithms/nth_element.hpp>
23 
24 #include <boost/geometry/algorithms/detail/expand_by_epsilon.hpp>
25 
26 namespace boost { namespace geometry { namespace index { namespace detail { namespace rtree {
27 
28 namespace pack_utils {
29 
30 template <std::size_t Dimension>
31 struct biggest_edge
32 {
33     BOOST_STATIC_ASSERT(0 < Dimension);
34     template <typename Box>
applyboost::geometry::index::detail::rtree::pack_utils::biggest_edge35     static inline void apply(Box const& box, typename coordinate_type<Box>::type & length, std::size_t & dim_index)
36     {
37         biggest_edge<Dimension-1>::apply(box, length, dim_index);
38         typename coordinate_type<Box>::type curr
39             = geometry::get<max_corner, Dimension-1>(box) - geometry::get<min_corner, Dimension-1>(box);
40         if ( length < curr )
41         {
42             dim_index = Dimension - 1;
43             length = curr;
44         }
45     }
46 };
47 
48 template <>
49 struct biggest_edge<1>
50 {
51     template <typename Box>
applyboost::geometry::index::detail::rtree::pack_utils::biggest_edge52     static inline void apply(Box const& box, typename coordinate_type<Box>::type & length, std::size_t & dim_index)
53     {
54         dim_index = 0;
55         length = geometry::get<max_corner, 0>(box) - geometry::get<min_corner, 0>(box);
56     }
57 };
58 
59 template <std::size_t I>
60 struct point_entries_comparer
61 {
62     template <typename PointEntry>
operator ()boost::geometry::index::detail::rtree::pack_utils::point_entries_comparer63     bool operator()(PointEntry const& e1, PointEntry const& e2) const
64     {
65         return geometry::get<I>(e1.first) < geometry::get<I>(e2.first);
66     }
67 };
68 
69 template <std::size_t I, std::size_t Dimension>
70 struct nth_element_and_half_boxes
71 {
72     template <typename EIt, typename Box>
applyboost::geometry::index::detail::rtree::pack_utils::nth_element_and_half_boxes73     static inline void apply(EIt first, EIt median, EIt last, Box const& box, Box & left, Box & right, std::size_t dim_index)
74     {
75         if ( I == dim_index )
76         {
77             index::detail::nth_element(first, median, last, point_entries_comparer<I>());
78 
79             geometry::convert(box, left);
80             geometry::convert(box, right);
81             typename coordinate_type<Box>::type edge_len
82                 = geometry::get<max_corner, I>(box) - geometry::get<min_corner, I>(box);
83             typename coordinate_type<Box>::type median
84                 = geometry::get<min_corner, I>(box) + edge_len / 2;
85             geometry::set<max_corner, I>(left, median);
86             geometry::set<min_corner, I>(right, median);
87         }
88         else
89             nth_element_and_half_boxes<I+1, Dimension>::apply(first, median, last, box, left, right, dim_index);
90     }
91 };
92 
93 template <std::size_t Dimension>
94 struct nth_element_and_half_boxes<Dimension, Dimension>
95 {
96     template <typename EIt, typename Box>
applyboost::geometry::index::detail::rtree::pack_utils::nth_element_and_half_boxes97     static inline void apply(EIt , EIt , EIt , Box const& , Box & , Box & , std::size_t ) {}
98 };
99 
100 } // namespace pack_utils
101 
102 // STR leafs number are calculated as rcount/max
103 // and the number of splitting planes for each dimension as (count/max)^(1/dimension)
104 // <-> for dimension==2 -> sqrt(count/max)
105 //
106 // The main flaw of this algorithm is that the resulting tree will have bad structure for:
107 // 1. non-uniformly distributed elements
108 //      Statistic check could be performed, e.g. based on variance of lengths of elements edges for each dimension
109 // 2. elements distributed mainly along one axis
110 //      Calculate bounding box of all elements and then number of dividing planes for a dimension
111 //      from the length of BB edge for this dimension (more or less assuming that elements are uniformly-distributed squares)
112 //
113 // Another thing is that the last node may have less elements than Max or even Min.
114 // The number of splitting planes must be chosen more carefully than count/max
115 //
116 // This algorithm is something between STR and TGS
117 // it is more similar to the top-down recursive kd-tree creation algorithm
118 // using object median split and split axis of greatest BB edge
119 // BB is only used as a hint (assuming objects are distributed uniformly)
120 //
121 // Implemented algorithm guarantees that the number of elements in nodes will be between Min and Max
122 // and that nodes are packed as tightly as possible
123 // e.g. for 177 values Max = 5 and Min = 2 it will construct the following tree:
124 // ROOT                 177
125 // L1          125               52
126 // L2  25  25  25  25  25   25  17    10
127 // L3  5x5 5x5 5x5 5x5 5x5  5x5 3x5+2 2x5
128 
129 template <typename Value, typename Options, typename Translator, typename Box, typename Allocators>
130 class pack
131 {
132     typedef typename rtree::node<Value, typename Options::parameters_type, Box, Allocators, typename Options::node_tag>::type node;
133     typedef typename rtree::internal_node<Value, typename Options::parameters_type, Box, Allocators, typename Options::node_tag>::type internal_node;
134     typedef typename rtree::leaf<Value, typename Options::parameters_type, Box, Allocators, typename Options::node_tag>::type leaf;
135 
136     typedef typename Allocators::node_pointer node_pointer;
137     typedef rtree::subtree_destroyer<Value, Options, Translator, Box, Allocators> subtree_destroyer;
138     typedef typename Allocators::size_type size_type;
139 
140     typedef typename geometry::point_type<Box>::type point_type;
141     typedef typename geometry::coordinate_type<point_type>::type coordinate_type;
142     typedef typename detail::default_content_result<Box>::type content_type;
143     typedef typename Options::parameters_type parameters_type;
144     typedef typename detail::strategy_type<parameters_type>::type strategy_type;
145     static const std::size_t dimension = geometry::dimension<point_type>::value;
146 
147     typedef typename rtree::container_from_elements_type<
148         typename rtree::elements_type<leaf>::type,
149         std::size_t
150     >::type values_counts_container;
151 
152     typedef typename rtree::elements_type<internal_node>::type internal_elements;
153     typedef typename internal_elements::value_type internal_element;
154 
155 public:
156     // Arbitrary iterators
157     template <typename InIt> inline static
apply(InIt first,InIt last,size_type & values_count,size_type & leafs_level,parameters_type const & parameters,Translator const & translator,Allocators & allocators)158     node_pointer apply(InIt first, InIt last, size_type & values_count, size_type & leafs_level,
159                        parameters_type const& parameters, Translator const& translator, Allocators & allocators)
160     {
161         typedef typename std::iterator_traits<InIt>::difference_type diff_type;
162 
163         diff_type diff = std::distance(first, last);
164         if ( diff <= 0 )
165             return node_pointer(0);
166 
167         typedef std::pair<point_type, InIt> entry_type;
168         std::vector<entry_type> entries;
169 
170         values_count = static_cast<size_type>(diff);
171         entries.reserve(values_count);
172 
173         expandable_box<Box, strategy_type> hint_box(detail::get_strategy(parameters));
174         for ( ; first != last ; ++first )
175         {
176             // NOTE: support for iterators not returning true references adapted
177             // to Geometry concept and default translator returning true reference
178             // An alternative would be to dereference the iterator and translate
179             // in one expression each time the indexable was needed.
180             typename std::iterator_traits<InIt>::reference in_ref = *first;
181             typename Translator::result_type indexable = translator(in_ref);
182 
183             // NOTE: added for consistency with insert()
184             // CONSIDER: alternative - ignore invalid indexable or throw an exception
185             BOOST_GEOMETRY_INDEX_ASSERT(detail::is_valid(indexable), "Indexable is invalid");
186 
187             hint_box.expand(indexable);
188 
189             point_type pt;
190             geometry::centroid(indexable, pt);
191             entries.push_back(std::make_pair(pt, first));
192         }
193 
194         subtree_elements_counts subtree_counts = calculate_subtree_elements_counts(values_count, parameters, leafs_level);
195         internal_element el = per_level(entries.begin(), entries.end(), hint_box.get(), values_count, subtree_counts,
196                                         parameters, translator, allocators);
197 
198         return el.second;
199     }
200 
201 private:
202     template <typename BoxType, typename Strategy>
203     class expandable_box
204     {
205     public:
expandable_box(Strategy const & strategy)206         explicit expandable_box(Strategy const& strategy)
207             : m_strategy(strategy), m_initialized(false)
208         {}
209 
210         template <typename Indexable>
expandable_box(Indexable const & indexable,Strategy const & strategy)211         explicit expandable_box(Indexable const& indexable, Strategy const& strategy)
212             : m_strategy(strategy), m_initialized(true)
213         {
214             detail::bounds(indexable, m_box, m_strategy);
215         }
216 
217         template <typename Indexable>
expand(Indexable const & indexable)218         void expand(Indexable const& indexable)
219         {
220             if ( !m_initialized )
221             {
222                 // it's guaranteed that the Box will be initialized
223                 // only for Points, Boxes and Segments but that's ok
224                 // since only those Geometries can be stored
225                 detail::bounds(indexable, m_box, m_strategy);
226                 m_initialized = true;
227             }
228             else
229             {
230                 detail::expand(m_box, indexable, m_strategy);
231             }
232         }
233 
expand_by_epsilon()234         void expand_by_epsilon()
235         {
236             geometry::detail::expand_by_epsilon(m_box);
237         }
238 
get() const239         BoxType const& get() const
240         {
241             BOOST_GEOMETRY_INDEX_ASSERT(m_initialized, "uninitialized envelope accessed");
242             return m_box;
243         }
244 
245     private:
246         BoxType m_box;
247         Strategy m_strategy;
248         bool m_initialized;
249     };
250 
251     struct subtree_elements_counts
252     {
subtree_elements_countsboost::geometry::index::detail::rtree::pack::subtree_elements_counts253         subtree_elements_counts(std::size_t ma, std::size_t mi) : maxc(ma), minc(mi) {}
254         std::size_t maxc;
255         std::size_t minc;
256     };
257 
258     template <typename EIt> inline static
per_level(EIt first,EIt last,Box const & hint_box,std::size_t values_count,subtree_elements_counts const & subtree_counts,parameters_type const & parameters,Translator const & translator,Allocators & allocators)259     internal_element per_level(EIt first, EIt last, Box const& hint_box, std::size_t values_count, subtree_elements_counts const& subtree_counts,
260                                parameters_type const& parameters, Translator const& translator, Allocators & allocators)
261     {
262         BOOST_GEOMETRY_INDEX_ASSERT(0 < std::distance(first, last) && static_cast<std::size_t>(std::distance(first, last)) == values_count,
263                                     "unexpected parameters");
264 
265         if ( subtree_counts.maxc <= 1 )
266         {
267             // ROOT or LEAF
268             BOOST_GEOMETRY_INDEX_ASSERT(values_count <= parameters.get_max_elements(),
269                                         "too big number of elements");
270             // if !root check m_parameters.get_min_elements() <= count
271 
272             // create new leaf node
273             node_pointer n = rtree::create_node<Allocators, leaf>::apply(allocators);                       // MAY THROW (A)
274             subtree_destroyer auto_remover(n, allocators);
275             leaf & l = rtree::get<leaf>(*n);
276 
277             // reserve space for values
278             rtree::elements(l).reserve(values_count);                                                       // MAY THROW (A)
279 
280             // calculate values box and copy values
281             //   initialize the box explicitly to avoid GCC-4.4 uninitialized variable warnings with O2
282             expandable_box<Box, strategy_type> elements_box(translator(*(first->second)),
283                                                             detail::get_strategy(parameters));
284             rtree::elements(l).push_back(*(first->second));                                                 // MAY THROW (A?,C)
285             for ( ++first ; first != last ; ++first )
286             {
287                 // NOTE: push_back() must be called at the end in order to support move_iterator.
288                 //       The iterator is dereferenced 2x (no temporary reference) to support
289                 //       non-true reference types and move_iterator without boost::forward<>.
290                 elements_box.expand(translator(*(first->second)));
291                 rtree::elements(l).push_back(*(first->second));                                             // MAY THROW (A?,C)
292             }
293 
294 #ifdef BOOST_GEOMETRY_INDEX_EXPERIMENTAL_ENLARGE_BY_EPSILON
295             // Enlarge bounds of a leaf node.
296             // It's because Points and Segments are compared WRT machine epsilon
297             // This ensures that leafs bounds correspond to the stored elements
298             // NOTE: this is done only if the Indexable is a different kind of Geometry
299             //   than the bounds (only Box for now). Spatial predicates are checked
300             //   the same way for Geometry of the same kind.
301             if ( BOOST_GEOMETRY_CONDITION((
302                     ! index::detail::is_bounding_geometry
303                         <
304                             typename indexable_type<Translator>::type
305                         >::value )) )
306             {
307                 elements_box.expand_by_epsilon();
308             }
309 #endif
310 
311             auto_remover.release();
312             return internal_element(elements_box.get(), n);
313         }
314 
315         // calculate next max and min subtree counts
316         subtree_elements_counts next_subtree_counts = subtree_counts;
317         next_subtree_counts.maxc /= parameters.get_max_elements();
318         next_subtree_counts.minc /= parameters.get_max_elements();
319 
320         // create new internal node
321         node_pointer n = rtree::create_node<Allocators, internal_node>::apply(allocators);                  // MAY THROW (A)
322         subtree_destroyer auto_remover(n, allocators);
323         internal_node & in = rtree::get<internal_node>(*n);
324 
325         // reserve space for values
326         std::size_t nodes_count = calculate_nodes_count(values_count, subtree_counts);
327         rtree::elements(in).reserve(nodes_count);                                                           // MAY THROW (A)
328         // calculate values box and copy values
329         expandable_box<Box, strategy_type> elements_box(detail::get_strategy(parameters));
330 
331         per_level_packets(first, last, hint_box, values_count, subtree_counts, next_subtree_counts,
332                           rtree::elements(in), elements_box,
333                           parameters, translator, allocators);
334 
335         auto_remover.release();
336         return internal_element(elements_box.get(), n);
337     }
338 
339     template <typename EIt, typename ExpandableBox> inline static
per_level_packets(EIt first,EIt last,Box const & hint_box,std::size_t values_count,subtree_elements_counts const & subtree_counts,subtree_elements_counts const & next_subtree_counts,internal_elements & elements,ExpandableBox & elements_box,parameters_type const & parameters,Translator const & translator,Allocators & allocators)340     void per_level_packets(EIt first, EIt last, Box const& hint_box,
341                            std::size_t values_count,
342                            subtree_elements_counts const& subtree_counts,
343                            subtree_elements_counts const& next_subtree_counts,
344                            internal_elements & elements, ExpandableBox & elements_box,
345                            parameters_type const& parameters, Translator const& translator, Allocators & allocators)
346     {
347         BOOST_GEOMETRY_INDEX_ASSERT(0 < std::distance(first, last) && static_cast<std::size_t>(std::distance(first, last)) == values_count,
348                                     "unexpected parameters");
349 
350         BOOST_GEOMETRY_INDEX_ASSERT(subtree_counts.minc <= values_count,
351                                     "too small number of elements");
352 
353         // only one packet
354         if ( values_count <= subtree_counts.maxc )
355         {
356             // the end, move to the next level
357             internal_element el = per_level(first, last, hint_box, values_count, next_subtree_counts,
358                                             parameters, translator, allocators);
359 
360             // in case if push_back() do throw here
361             // and even if this is not probable (previously reserved memory, nonthrowing pairs copy)
362             // this case is also tested by exceptions test.
363             subtree_destroyer auto_remover(el.second, allocators);
364             // this container should have memory allocated, reserve() called outside
365             elements.push_back(el);                                                 // MAY THROW (A?,C) - however in normal conditions shouldn't
366             auto_remover.release();
367 
368             elements_box.expand(el.first);
369             return;
370         }
371 
372         std::size_t median_count = calculate_median_count(values_count, subtree_counts);
373         EIt median = first + median_count;
374 
375         coordinate_type greatest_length;
376         std::size_t greatest_dim_index = 0;
377         pack_utils::biggest_edge<dimension>::apply(hint_box, greatest_length, greatest_dim_index);
378         Box left, right;
379         pack_utils::nth_element_and_half_boxes<0, dimension>
380             ::apply(first, median, last, hint_box, left, right, greatest_dim_index);
381 
382         per_level_packets(first, median, left,
383                           median_count, subtree_counts, next_subtree_counts,
384                           elements, elements_box,
385                           parameters, translator, allocators);
386         per_level_packets(median, last, right,
387                           values_count - median_count, subtree_counts, next_subtree_counts,
388                           elements, elements_box,
389                           parameters, translator, allocators);
390     }
391 
392     inline static
calculate_subtree_elements_counts(std::size_t elements_count,parameters_type const & parameters,size_type & leafs_level)393     subtree_elements_counts calculate_subtree_elements_counts(std::size_t elements_count, parameters_type const& parameters, size_type & leafs_level)
394     {
395         boost::ignore_unused(parameters);
396 
397         subtree_elements_counts res(1, 1);
398         leafs_level = 0;
399 
400         std::size_t smax = parameters.get_max_elements();
401         for ( ; smax < elements_count ; smax *= parameters.get_max_elements(), ++leafs_level )
402             res.maxc = smax;
403 
404         res.minc = parameters.get_min_elements() * (res.maxc / parameters.get_max_elements());
405 
406         return res;
407     }
408 
409     inline static
calculate_nodes_count(std::size_t count,subtree_elements_counts const & subtree_counts)410     std::size_t calculate_nodes_count(std::size_t count,
411                                       subtree_elements_counts const& subtree_counts)
412     {
413         std::size_t n = count / subtree_counts.maxc;
414         std::size_t r = count % subtree_counts.maxc;
415 
416         if ( 0 < r && r < subtree_counts.minc )
417         {
418             std::size_t count_minus_min = count - subtree_counts.minc;
419             n = count_minus_min / subtree_counts.maxc;
420             r = count_minus_min % subtree_counts.maxc;
421             ++n;
422         }
423 
424         if ( 0 < r )
425             ++n;
426 
427         return n;
428     }
429 
430     inline static
calculate_median_count(std::size_t count,subtree_elements_counts const & subtree_counts)431     std::size_t calculate_median_count(std::size_t count,
432                                        subtree_elements_counts const& subtree_counts)
433     {
434         // e.g. for max = 5, min = 2, count = 52, subtree_max = 25, subtree_min = 10
435 
436         std::size_t n = count / subtree_counts.maxc; // e.g. 52 / 25 = 2
437         std::size_t r = count % subtree_counts.maxc; // e.g. 52 % 25 = 2
438         std::size_t median_count = (n / 2) * subtree_counts.maxc; // e.g. 2 / 2 * 25 = 25
439 
440         if ( 0 != r ) // e.g. 0 != 2
441         {
442             if ( subtree_counts.minc <= r ) // e.g. 10 <= 2 == false
443             {
444                 //BOOST_GEOMETRY_INDEX_ASSERT(0 < n, "unexpected value");
445                 median_count = ((n+1)/2) * subtree_counts.maxc; // if calculated ((2+1)/2) * 25 which would be ok, but not in all cases
446             }
447             else // r < subtree_counts.second  // e.g. 2 < 10 == true
448             {
449                 std::size_t count_minus_min = count - subtree_counts.minc; // e.g. 52 - 10 = 42
450                 n = count_minus_min / subtree_counts.maxc; // e.g. 42 / 25 = 1
451                 r = count_minus_min % subtree_counts.maxc; // e.g. 42 % 25 = 17
452                 if ( r == 0 )                               // e.g. false
453                 {
454                     // n can't be equal to 0 because then there wouldn't be any element in the other node
455                     //BOOST_GEOMETRY_INDEX_ASSERT(0 < n, "unexpected value");
456                     median_count = ((n+1)/2) * subtree_counts.maxc;     // if calculated ((1+1)/2) * 25 which would be ok, but not in all cases
457                 }
458                 else
459                 {
460                     if ( n == 0 )                                        // e.g. false
461                         median_count = r;                                // if calculated -> 17 which is wrong!
462                     else
463                         median_count = ((n+2)/2) * subtree_counts.maxc; // e.g. ((1+2)/2) * 25 = 25
464                 }
465             }
466         }
467 
468         return median_count;
469     }
470 };
471 
472 }}}}} // namespace boost::geometry::index::detail::rtree
473 
474 #endif // BOOST_GEOMETRY_INDEX_DETAIL_RTREE_PACK_CREATE_HPP
475