1 // Boost.Geometry Index
2 //
3 // R-tree initial packing
4 //
5 // Copyright (c) 2011-2017 Adam Wulkiewicz, Lodz, Poland.
6 //
7 // Use, modification and distribution is subject to the Boost Software License,
8 // Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
9 // http://www.boost.org/LICENSE_1_0.txt)
10 
11 #ifndef BOOST_GEOMETRY_INDEX_DETAIL_RTREE_PACK_CREATE_HPP
12 #define BOOST_GEOMETRY_INDEX_DETAIL_RTREE_PACK_CREATE_HPP
13 
14 #include <boost/core/ignore_unused.hpp>
15 
16 #include <boost/geometry/algorithms/expand.hpp>
17 #include <boost/geometry/index/detail/algorithms/bounds.hpp>
18 #include <boost/geometry/index/detail/algorithms/nth_element.hpp>
19 
20 #include <boost/geometry/algorithms/detail/expand_by_epsilon.hpp>
21 
22 namespace boost { namespace geometry { namespace index { namespace detail { namespace rtree {
23 
24 namespace pack_utils {
25 
26 template <std::size_t Dimension>
27 struct biggest_edge
28 {
29     BOOST_STATIC_ASSERT(0 < Dimension);
30     template <typename Box>
applyboost::geometry::index::detail::rtree::pack_utils::biggest_edge31     static inline void apply(Box const& box, typename coordinate_type<Box>::type & length, std::size_t & dim_index)
32     {
33         biggest_edge<Dimension-1>::apply(box, length, dim_index);
34         typename coordinate_type<Box>::type curr
35             = geometry::get<max_corner, Dimension-1>(box) - geometry::get<min_corner, Dimension-1>(box);
36         if ( length < curr )
37         {
38             dim_index = Dimension - 1;
39             length = curr;
40         }
41     }
42 };
43 
44 template <>
45 struct biggest_edge<1>
46 {
47     template <typename Box>
applyboost::geometry::index::detail::rtree::pack_utils::biggest_edge48     static inline void apply(Box const& box, typename coordinate_type<Box>::type & length, std::size_t & dim_index)
49     {
50         dim_index = 0;
51         length = geometry::get<max_corner, 0>(box) - geometry::get<min_corner, 0>(box);
52     }
53 };
54 
55 template <std::size_t I>
56 struct point_entries_comparer
57 {
58     template <typename PointEntry>
operator ()boost::geometry::index::detail::rtree::pack_utils::point_entries_comparer59     bool operator()(PointEntry const& e1, PointEntry const& e2) const
60     {
61         return geometry::get<I>(e1.first) < geometry::get<I>(e2.first);
62     }
63 };
64 
65 template <std::size_t I, std::size_t Dimension>
66 struct nth_element_and_half_boxes
67 {
68     template <typename EIt, typename Box>
applyboost::geometry::index::detail::rtree::pack_utils::nth_element_and_half_boxes69     static inline void apply(EIt first, EIt median, EIt last, Box const& box, Box & left, Box & right, std::size_t dim_index)
70     {
71         if ( I == dim_index )
72         {
73             index::detail::nth_element(first, median, last, point_entries_comparer<I>());
74 
75             geometry::convert(box, left);
76             geometry::convert(box, right);
77             typename coordinate_type<Box>::type edge_len
78                 = geometry::get<max_corner, I>(box) - geometry::get<min_corner, I>(box);
79             typename coordinate_type<Box>::type median
80                 = geometry::get<min_corner, I>(box) + edge_len / 2;
81             geometry::set<max_corner, I>(left, median);
82             geometry::set<min_corner, I>(right, median);
83         }
84         else
85             nth_element_and_half_boxes<I+1, Dimension>::apply(first, median, last, box, left, right, dim_index);
86     }
87 };
88 
89 template <std::size_t Dimension>
90 struct nth_element_and_half_boxes<Dimension, Dimension>
91 {
92     template <typename EIt, typename Box>
applyboost::geometry::index::detail::rtree::pack_utils::nth_element_and_half_boxes93     static inline void apply(EIt , EIt , EIt , Box const& , Box & , Box & , std::size_t ) {}
94 };
95 
96 } // namespace pack_utils
97 
98 // STR leafs number are calculated as rcount/max
99 // and the number of splitting planes for each dimension as (count/max)^(1/dimension)
100 // <-> for dimension==2 -> sqrt(count/max)
101 //
102 // The main flaw of this algorithm is that the resulting tree will have bad structure for:
103 // 1. non-uniformly distributed elements
104 //      Statistic check could be performed, e.g. based on variance of lengths of elements edges for each dimension
105 // 2. elements distributed mainly along one axis
106 //      Calculate bounding box of all elements and then number of dividing planes for a dimension
107 //      from the length of BB edge for this dimension (more or less assuming that elements are uniformly-distributed squares)
108 //
109 // Another thing is that the last node may have less elements than Max or even Min.
110 // The number of splitting planes must be chosen more carefully than count/max
111 //
112 // This algorithm is something between STR and TGS
113 // it is more similar to the top-down recursive kd-tree creation algorithm
114 // using object median split and split axis of greatest BB edge
115 // BB is only used as a hint (assuming objects are distributed uniformly)
116 //
117 // Implemented algorithm guarantees that the number of elements in nodes will be between Min and Max
118 // and that nodes are packed as tightly as possible
119 // e.g. for 177 values Max = 5 and Min = 2 it will construct the following tree:
120 // ROOT                 177
121 // L1          125               52
122 // L2  25  25  25  25  25   25  17    10
123 // L3  5x5 5x5 5x5 5x5 5x5  5x5 3x5+2 2x5
124 
125 template <typename Value, typename Options, typename Translator, typename Box, typename Allocators>
126 class pack
127 {
128     typedef typename rtree::node<Value, typename Options::parameters_type, Box, Allocators, typename Options::node_tag>::type node;
129     typedef typename rtree::internal_node<Value, typename Options::parameters_type, Box, Allocators, typename Options::node_tag>::type internal_node;
130     typedef typename rtree::leaf<Value, typename Options::parameters_type, Box, Allocators, typename Options::node_tag>::type leaf;
131 
132     typedef typename Allocators::node_pointer node_pointer;
133     typedef rtree::subtree_destroyer<Value, Options, Translator, Box, Allocators> subtree_destroyer;
134     typedef typename Allocators::size_type size_type;
135 
136     typedef typename geometry::point_type<Box>::type point_type;
137     typedef typename geometry::coordinate_type<point_type>::type coordinate_type;
138     typedef typename detail::default_content_result<Box>::type content_type;
139     typedef typename Options::parameters_type parameters_type;
140     static const std::size_t dimension = geometry::dimension<point_type>::value;
141 
142     typedef typename rtree::container_from_elements_type<
143         typename rtree::elements_type<leaf>::type,
144         std::size_t
145     >::type values_counts_container;
146 
147     typedef typename rtree::elements_type<internal_node>::type internal_elements;
148     typedef typename internal_elements::value_type internal_element;
149 
150 public:
151     // Arbitrary iterators
152     template <typename InIt> inline static
apply(InIt first,InIt last,size_type & values_count,size_type & leafs_level,parameters_type const & parameters,Translator const & translator,Allocators & allocators)153     node_pointer apply(InIt first, InIt last, size_type & values_count, size_type & leafs_level,
154                        parameters_type const& parameters, Translator const& translator, Allocators & allocators)
155     {
156         typedef typename std::iterator_traits<InIt>::difference_type diff_type;
157 
158         diff_type diff = std::distance(first, last);
159         if ( diff <= 0 )
160             return node_pointer(0);
161 
162         typedef std::pair<point_type, InIt> entry_type;
163         std::vector<entry_type> entries;
164 
165         values_count = static_cast<size_type>(diff);
166         entries.reserve(values_count);
167 
168         expandable_box<Box> hint_box;
169         for ( ; first != last ; ++first )
170         {
171             // NOTE: support for iterators not returning true references adapted
172             // to Geometry concept and default translator returning true reference
173             // An alternative would be to dereference the iterator and translate
174             // in one expression each time the indexable was needed.
175             typename std::iterator_traits<InIt>::reference in_ref = *first;
176             typename Translator::result_type indexable = translator(in_ref);
177 
178             // NOTE: added for consistency with insert()
179             // CONSIDER: alternative - ignore invalid indexable or throw an exception
180             BOOST_GEOMETRY_INDEX_ASSERT(detail::is_valid(indexable), "Indexable is invalid");
181 
182             hint_box.expand(indexable);
183 
184             point_type pt;
185             geometry::centroid(indexable, pt);
186             entries.push_back(std::make_pair(pt, first));
187         }
188 
189         subtree_elements_counts subtree_counts = calculate_subtree_elements_counts(values_count, parameters, leafs_level);
190         internal_element el = per_level(entries.begin(), entries.end(), hint_box.get(), values_count, subtree_counts,
191                                         parameters, translator, allocators);
192 
193         return el.second;
194     }
195 
196 private:
197     template <typename BoxType>
198     class expandable_box
199     {
200     public:
expandable_box()201         expandable_box()
202             : m_initialized(false)
203         {}
204 
205         template <typename Indexable>
expandable_box(Indexable const & indexable)206         explicit expandable_box(Indexable const& indexable)
207             : m_initialized(true)
208         {
209             detail::bounds(indexable, m_box);
210         }
211 
212         template <typename Indexable>
expand(Indexable const & indexable)213         void expand(Indexable const& indexable)
214         {
215             if ( !m_initialized )
216             {
217                 // it's guaranteed that the Box will be initialized
218                 // only for Points, Boxes and Segments but that's ok
219                 // since only those Geometries can be stored
220                 detail::bounds(indexable, m_box);
221                 m_initialized = true;
222             }
223             else
224             {
225                 geometry::expand(m_box, indexable);
226             }
227         }
228 
expand_by_epsilon()229         void expand_by_epsilon()
230         {
231             geometry::detail::expand_by_epsilon(m_box);
232         }
233 
get() const234         BoxType const& get() const
235         {
236             BOOST_GEOMETRY_INDEX_ASSERT(m_initialized, "uninitialized envelope accessed");
237             return m_box;
238         }
239 
240     private:
241         bool m_initialized;
242         BoxType m_box;
243     };
244 
245     struct subtree_elements_counts
246     {
subtree_elements_countsboost::geometry::index::detail::rtree::pack::subtree_elements_counts247         subtree_elements_counts(std::size_t ma, std::size_t mi) : maxc(ma), minc(mi) {}
248         std::size_t maxc;
249         std::size_t minc;
250     };
251 
252     template <typename EIt> inline static
per_level(EIt first,EIt last,Box const & hint_box,std::size_t values_count,subtree_elements_counts const & subtree_counts,parameters_type const & parameters,Translator const & translator,Allocators & allocators)253     internal_element per_level(EIt first, EIt last, Box const& hint_box, std::size_t values_count, subtree_elements_counts const& subtree_counts,
254                                parameters_type const& parameters, Translator const& translator, Allocators & allocators)
255     {
256         BOOST_GEOMETRY_INDEX_ASSERT(0 < std::distance(first, last) && static_cast<std::size_t>(std::distance(first, last)) == values_count,
257                                     "unexpected parameters");
258 
259         if ( subtree_counts.maxc <= 1 )
260         {
261             // ROOT or LEAF
262             BOOST_GEOMETRY_INDEX_ASSERT(values_count <= parameters.get_max_elements(),
263                                         "too big number of elements");
264             // if !root check m_parameters.get_min_elements() <= count
265 
266             // create new leaf node
267             node_pointer n = rtree::create_node<Allocators, leaf>::apply(allocators);                       // MAY THROW (A)
268             subtree_destroyer auto_remover(n, allocators);
269             leaf & l = rtree::get<leaf>(*n);
270 
271             // reserve space for values
272             rtree::elements(l).reserve(values_count);                                                       // MAY THROW (A)
273 
274             // calculate values box and copy values
275             //   initialize the box explicitly to avoid GCC-4.4 uninitialized variable warnings with O2
276             expandable_box<Box> elements_box(translator(*(first->second)));
277             rtree::elements(l).push_back(*(first->second));                                                 // MAY THROW (A?,C)
278             for ( ++first ; first != last ; ++first )
279             {
280                 // NOTE: push_back() must be called at the end in order to support move_iterator.
281                 //       The iterator is dereferenced 2x (no temporary reference) to support
282                 //       non-true reference types and move_iterator without boost::forward<>.
283                 elements_box.expand(translator(*(first->second)));
284                 rtree::elements(l).push_back(*(first->second));                                             // MAY THROW (A?,C)
285             }
286 
287 #ifdef BOOST_GEOMETRY_INDEX_EXPERIMENTAL_ENLARGE_BY_EPSILON
288             // Enlarge bounds of a leaf node.
289             // It's because Points and Segments are compared WRT machine epsilon
290             // This ensures that leafs bounds correspond to the stored elements
291             // NOTE: this is done only if the Indexable is a different kind of Geometry
292             //   than the bounds (only Box for now). Spatial predicates are checked
293             //   the same way for Geometry of the same kind.
294             if ( BOOST_GEOMETRY_CONDITION((
295                     ! index::detail::is_bounding_geometry
296                         <
297                             typename indexable_type<Translator>::type
298                         >::value )) )
299             {
300                 elements_box.expand_by_epsilon();
301             }
302 #endif
303 
304             auto_remover.release();
305             return internal_element(elements_box.get(), n);
306         }
307 
308         // calculate next max and min subtree counts
309         subtree_elements_counts next_subtree_counts = subtree_counts;
310         next_subtree_counts.maxc /= parameters.get_max_elements();
311         next_subtree_counts.minc /= parameters.get_max_elements();
312 
313         // create new internal node
314         node_pointer n = rtree::create_node<Allocators, internal_node>::apply(allocators);                  // MAY THROW (A)
315         subtree_destroyer auto_remover(n, allocators);
316         internal_node & in = rtree::get<internal_node>(*n);
317 
318         // reserve space for values
319         std::size_t nodes_count = calculate_nodes_count(values_count, subtree_counts);
320         rtree::elements(in).reserve(nodes_count);                                                           // MAY THROW (A)
321         // calculate values box and copy values
322         expandable_box<Box> elements_box;
323 
324         per_level_packets(first, last, hint_box, values_count, subtree_counts, next_subtree_counts,
325                           rtree::elements(in), elements_box,
326                           parameters, translator, allocators);
327 
328         auto_remover.release();
329         return internal_element(elements_box.get(), n);
330     }
331 
332     template <typename EIt, typename ExpandableBox> inline static
per_level_packets(EIt first,EIt last,Box const & hint_box,std::size_t values_count,subtree_elements_counts const & subtree_counts,subtree_elements_counts const & next_subtree_counts,internal_elements & elements,ExpandableBox & elements_box,parameters_type const & parameters,Translator const & translator,Allocators & allocators)333     void per_level_packets(EIt first, EIt last, Box const& hint_box,
334                            std::size_t values_count,
335                            subtree_elements_counts const& subtree_counts,
336                            subtree_elements_counts const& next_subtree_counts,
337                            internal_elements & elements, ExpandableBox & elements_box,
338                            parameters_type const& parameters, Translator const& translator, Allocators & allocators)
339     {
340         BOOST_GEOMETRY_INDEX_ASSERT(0 < std::distance(first, last) && static_cast<std::size_t>(std::distance(first, last)) == values_count,
341                                     "unexpected parameters");
342 
343         BOOST_GEOMETRY_INDEX_ASSERT(subtree_counts.minc <= values_count,
344                                     "too small number of elements");
345 
346         // only one packet
347         if ( values_count <= subtree_counts.maxc )
348         {
349             // the end, move to the next level
350             internal_element el = per_level(first, last, hint_box, values_count, next_subtree_counts,
351                                             parameters, translator, allocators);
352 
353             // in case if push_back() do throw here
354             // and even if this is not probable (previously reserved memory, nonthrowing pairs copy)
355             // this case is also tested by exceptions test.
356             subtree_destroyer auto_remover(el.second, allocators);
357             // this container should have memory allocated, reserve() called outside
358             elements.push_back(el);                                                 // MAY THROW (A?,C) - however in normal conditions shouldn't
359             auto_remover.release();
360 
361             elements_box.expand(el.first);
362             return;
363         }
364 
365         std::size_t median_count = calculate_median_count(values_count, subtree_counts);
366         EIt median = first + median_count;
367 
368         coordinate_type greatest_length;
369         std::size_t greatest_dim_index = 0;
370         pack_utils::biggest_edge<dimension>::apply(hint_box, greatest_length, greatest_dim_index);
371         Box left, right;
372         pack_utils::nth_element_and_half_boxes<0, dimension>
373             ::apply(first, median, last, hint_box, left, right, greatest_dim_index);
374 
375         per_level_packets(first, median, left,
376                           median_count, subtree_counts, next_subtree_counts,
377                           elements, elements_box,
378                           parameters, translator, allocators);
379         per_level_packets(median, last, right,
380                           values_count - median_count, subtree_counts, next_subtree_counts,
381                           elements, elements_box,
382                           parameters, translator, allocators);
383     }
384 
385     inline static
calculate_subtree_elements_counts(std::size_t elements_count,parameters_type const & parameters,size_type & leafs_level)386     subtree_elements_counts calculate_subtree_elements_counts(std::size_t elements_count, parameters_type const& parameters, size_type & leafs_level)
387     {
388         boost::ignore_unused(parameters);
389 
390         subtree_elements_counts res(1, 1);
391         leafs_level = 0;
392 
393         std::size_t smax = parameters.get_max_elements();
394         for ( ; smax < elements_count ; smax *= parameters.get_max_elements(), ++leafs_level )
395             res.maxc = smax;
396 
397         res.minc = parameters.get_min_elements() * (res.maxc / parameters.get_max_elements());
398 
399         return res;
400     }
401 
402     inline static
calculate_nodes_count(std::size_t count,subtree_elements_counts const & subtree_counts)403     std::size_t calculate_nodes_count(std::size_t count,
404                                       subtree_elements_counts const& subtree_counts)
405     {
406         std::size_t n = count / subtree_counts.maxc;
407         std::size_t r = count % subtree_counts.maxc;
408 
409         if ( 0 < r && r < subtree_counts.minc )
410         {
411             std::size_t count_minus_min = count - subtree_counts.minc;
412             n = count_minus_min / subtree_counts.maxc;
413             r = count_minus_min % subtree_counts.maxc;
414             ++n;
415         }
416 
417         if ( 0 < r )
418             ++n;
419 
420         return n;
421     }
422 
423     inline static
calculate_median_count(std::size_t count,subtree_elements_counts const & subtree_counts)424     std::size_t calculate_median_count(std::size_t count,
425                                        subtree_elements_counts const& subtree_counts)
426     {
427         // e.g. for max = 5, min = 2, count = 52, subtree_max = 25, subtree_min = 10
428 
429         std::size_t n = count / subtree_counts.maxc; // e.g. 52 / 25 = 2
430         std::size_t r = count % subtree_counts.maxc; // e.g. 52 % 25 = 2
431         std::size_t median_count = (n / 2) * subtree_counts.maxc; // e.g. 2 / 2 * 25 = 25
432 
433         if ( 0 != r ) // e.g. 0 != 2
434         {
435             if ( subtree_counts.minc <= r ) // e.g. 10 <= 2 == false
436             {
437                 //BOOST_GEOMETRY_INDEX_ASSERT(0 < n, "unexpected value");
438                 median_count = ((n+1)/2) * subtree_counts.maxc; // if calculated ((2+1)/2) * 25 which would be ok, but not in all cases
439             }
440             else // r < subtree_counts.second  // e.g. 2 < 10 == true
441             {
442                 std::size_t count_minus_min = count - subtree_counts.minc; // e.g. 52 - 10 = 42
443                 n = count_minus_min / subtree_counts.maxc; // e.g. 42 / 25 = 1
444                 r = count_minus_min % subtree_counts.maxc; // e.g. 42 % 25 = 17
445                 if ( r == 0 )                               // e.g. false
446                 {
447                     // n can't be equal to 0 because then there wouldn't be any element in the other node
448                     //BOOST_GEOMETRY_INDEX_ASSERT(0 < n, "unexpected value");
449                     median_count = ((n+1)/2) * subtree_counts.maxc;     // if calculated ((1+1)/2) * 25 which would be ok, but not in all cases
450                 }
451                 else
452                 {
453                     if ( n == 0 )                                        // e.g. false
454                         median_count = r;                                // if calculated -> 17 which is wrong!
455                     else
456                         median_count = ((n+2)/2) * subtree_counts.maxc; // e.g. ((1+2)/2) * 25 = 25
457                 }
458             }
459         }
460 
461         return median_count;
462     }
463 };
464 
465 }}}}} // namespace boost::geometry::index::detail::rtree
466 
467 #endif // BOOST_GEOMETRY_INDEX_DETAIL_RTREE_PACK_CREATE_HPP
468