1 // Boost.Geometry Index
2 //
3 // R-tree initial packing
4 //
5 // Copyright (c) 2011-2015 Adam Wulkiewicz, Lodz, Poland.
6 //
7 // Use, modification and distribution is subject to the Boost Software License,
8 // Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
9 // http://www.boost.org/LICENSE_1_0.txt)
10 
11 #ifndef BOOST_GEOMETRY_INDEX_DETAIL_RTREE_PACK_CREATE_HPP
12 #define BOOST_GEOMETRY_INDEX_DETAIL_RTREE_PACK_CREATE_HPP
13 
14 #include <boost/geometry/algorithms/expand.hpp>
15 #include <boost/geometry/index/detail/algorithms/bounds.hpp>
16 
17 #include <boost/geometry/algorithms/detail/expand_by_epsilon.hpp>
18 
19 namespace boost { namespace geometry { namespace index { namespace detail { namespace rtree {
20 
21 namespace pack_utils {
22 
23 template <std::size_t Dimension>
24 struct biggest_edge
25 {
26     BOOST_STATIC_ASSERT(0 < Dimension);
27     template <typename Box>
applyboost::geometry::index::detail::rtree::pack_utils::biggest_edge28     static inline void apply(Box const& box, typename coordinate_type<Box>::type & length, std::size_t & dim_index)
29     {
30         biggest_edge<Dimension-1>::apply(box, length, dim_index);
31         typename coordinate_type<Box>::type curr
32             = geometry::get<max_corner, Dimension-1>(box) - geometry::get<min_corner, Dimension-1>(box);
33         if ( length < curr )
34         {
35             dim_index = Dimension - 1;
36             length = curr;
37         }
38     }
39 };
40 
41 template <>
42 struct biggest_edge<1>
43 {
44     template <typename Box>
applyboost::geometry::index::detail::rtree::pack_utils::biggest_edge45     static inline void apply(Box const& box, typename coordinate_type<Box>::type & length, std::size_t & dim_index)
46     {
47         dim_index = 0;
48         length = geometry::get<max_corner, 0>(box) - geometry::get<min_corner, 0>(box);
49     }
50 };
51 
52 template <std::size_t I>
53 struct point_entries_comparer
54 {
55     template <typename PointEntry>
operator ()boost::geometry::index::detail::rtree::pack_utils::point_entries_comparer56     bool operator()(PointEntry const& e1, PointEntry const& e2) const
57     {
58         return geometry::get<I>(e1.first) < geometry::get<I>(e2.first);
59     }
60 };
61 
62 template <std::size_t I, std::size_t Dimension>
63 struct nth_element_and_half_boxes
64 {
65     template <typename EIt, typename Box>
applyboost::geometry::index::detail::rtree::pack_utils::nth_element_and_half_boxes66     static inline void apply(EIt first, EIt median, EIt last, Box const& box, Box & left, Box & right, std::size_t dim_index)
67     {
68         if ( I == dim_index )
69         {
70             std::nth_element(first, median, last, point_entries_comparer<I>());
71 
72             geometry::convert(box, left);
73             geometry::convert(box, right);
74             typename coordinate_type<Box>::type edge_len
75                 = geometry::get<max_corner, I>(box) - geometry::get<min_corner, I>(box);
76             typename coordinate_type<Box>::type median
77                 = geometry::get<min_corner, I>(box) + edge_len / 2;
78             geometry::set<max_corner, I>(left, median);
79             geometry::set<min_corner, I>(right, median);
80         }
81         else
82             nth_element_and_half_boxes<I+1, Dimension>::apply(first, median, last, box, left, right, dim_index);
83     }
84 };
85 
86 template <std::size_t Dimension>
87 struct nth_element_and_half_boxes<Dimension, Dimension>
88 {
89     template <typename EIt, typename Box>
applyboost::geometry::index::detail::rtree::pack_utils::nth_element_and_half_boxes90     static inline void apply(EIt , EIt , EIt , Box const& , Box & , Box & , std::size_t ) {}
91 };
92 
93 } // namespace pack_utils
94 
95 // STR leafs number are calculated as rcount/max
96 // and the number of splitting planes for each dimension as (count/max)^(1/dimension)
97 // <-> for dimension==2 -> sqrt(count/max)
98 //
99 // The main flaw of this algorithm is that the resulting tree will have bad structure for:
100 // 1. non-uniformly distributed elements
101 //      Statistic check could be performed, e.g. based on variance of lengths of elements edges for each dimension
102 // 2. elements distributed mainly along one axis
103 //      Calculate bounding box of all elements and then number of dividing planes for a dimension
104 //      from the length of BB edge for this dimension (more or less assuming that elements are uniformly-distributed squares)
105 //
106 // Another thing is that the last node may have less elements than Max or even Min.
107 // The number of splitting planes must be chosen more carefully than count/max
108 //
109 // This algorithm is something between STR and TGS
110 // it is more similar to the top-down recursive kd-tree creation algorithm
111 // using object median split and split axis of greatest BB edge
112 // BB is only used as a hint (assuming objects are distributed uniformly)
113 //
114 // Implemented algorithm guarantees that the number of elements in nodes will be between Min and Max
115 // and that nodes are packed as tightly as possible
116 // e.g. for 177 values Max = 5 and Min = 2 it will construct the following tree:
117 // ROOT                 177
118 // L1          125               52
119 // L2  25  25  25  25  25   25  17    10
120 // L3  5x5 5x5 5x5 5x5 5x5  5x5 3x5+2 2x5
121 
122 template <typename Value, typename Options, typename Translator, typename Box, typename Allocators>
123 class pack
124 {
125     typedef typename rtree::node<Value, typename Options::parameters_type, Box, Allocators, typename Options::node_tag>::type node;
126     typedef typename rtree::internal_node<Value, typename Options::parameters_type, Box, Allocators, typename Options::node_tag>::type internal_node;
127     typedef typename rtree::leaf<Value, typename Options::parameters_type, Box, Allocators, typename Options::node_tag>::type leaf;
128 
129     typedef typename Allocators::node_pointer node_pointer;
130     typedef rtree::subtree_destroyer<Value, Options, Translator, Box, Allocators> subtree_destroyer;
131     typedef typename Allocators::size_type size_type;
132 
133     typedef typename geometry::point_type<Box>::type point_type;
134     typedef typename geometry::coordinate_type<point_type>::type coordinate_type;
135     typedef typename detail::default_content_result<Box>::type content_type;
136     typedef typename Options::parameters_type parameters_type;
137     static const std::size_t dimension = geometry::dimension<point_type>::value;
138 
139     typedef typename rtree::container_from_elements_type<
140         typename rtree::elements_type<leaf>::type,
141         std::size_t
142     >::type values_counts_container;
143 
144     typedef typename rtree::elements_type<internal_node>::type internal_elements;
145     typedef typename internal_elements::value_type internal_element;
146 
147 public:
148     // Arbitrary iterators
149     template <typename InIt> inline static
apply(InIt first,InIt last,size_type & values_count,size_type & leafs_level,parameters_type const & parameters,Translator const & translator,Allocators & allocators)150     node_pointer apply(InIt first, InIt last, size_type & values_count, size_type & leafs_level,
151                        parameters_type const& parameters, Translator const& translator, Allocators & allocators)
152     {
153         typedef typename std::iterator_traits<InIt>::difference_type diff_type;
154 
155         diff_type diff = std::distance(first, last);
156         if ( diff <= 0 )
157             return node_pointer(0);
158 
159         typedef std::pair<point_type, InIt> entry_type;
160         std::vector<entry_type> entries;
161 
162         values_count = static_cast<size_type>(diff);
163         entries.reserve(values_count);
164 
165         expandable_box<Box> hint_box;
166         for ( ; first != last ; ++first )
167         {
168             // NOTE: support for iterators not returning true references adapted
169             // to Geometry concept and default translator returning true reference
170             // An alternative would be to dereference the iterator and translate
171             // in one expression each time the indexable was needed.
172             typename std::iterator_traits<InIt>::reference in_ref = *first;
173             typename Translator::result_type indexable = translator(in_ref);
174 
175             // NOTE: added for consistency with insert()
176             // CONSIDER: alternative - ignore invalid indexable or throw an exception
177             BOOST_GEOMETRY_INDEX_ASSERT(detail::is_valid(indexable), "Indexable is invalid");
178 
179             hint_box.expand(indexable);
180 
181             point_type pt;
182             geometry::centroid(indexable, pt);
183             entries.push_back(std::make_pair(pt, first));
184         }
185 
186         subtree_elements_counts subtree_counts = calculate_subtree_elements_counts(values_count, parameters, leafs_level);
187         internal_element el = per_level(entries.begin(), entries.end(), hint_box.get(), values_count, subtree_counts,
188                                         parameters, translator, allocators);
189 
190         return el.second;
191     }
192 
193 private:
194     template <typename BoxType>
195     class expandable_box
196     {
197     public:
expandable_box()198         expandable_box()
199             : m_initialized(false)
200         {}
201 
202         template <typename Indexable>
expand(Indexable const & indexable)203         void expand(Indexable const& indexable)
204         {
205             if ( !m_initialized )
206             {
207                 // it's guaranteed that the Box will be initialized
208                 // only for Points, Boxes and Segments but that's ok
209                 // since only those Geometries can be stored
210                 detail::bounds(indexable, m_box);
211                 m_initialized = true;
212             }
213             else
214             {
215                 geometry::expand(m_box, indexable);
216             }
217         }
218 
expand_by_epsilon()219         void expand_by_epsilon()
220         {
221             geometry::detail::expand_by_epsilon(m_box);
222         }
223 
get() const224         BoxType const& get() const
225         {
226             BOOST_GEOMETRY_INDEX_ASSERT(m_initialized, "uninitialized envelope accessed");
227             return m_box;
228         }
229 
230     private:
231         bool m_initialized;
232         BoxType m_box;
233     };
234 
235     struct subtree_elements_counts
236     {
subtree_elements_countsboost::geometry::index::detail::rtree::pack::subtree_elements_counts237         subtree_elements_counts(std::size_t ma, std::size_t mi) : maxc(ma), minc(mi) {}
238         std::size_t maxc;
239         std::size_t minc;
240     };
241 
242     template <typename EIt> inline static
per_level(EIt first,EIt last,Box const & hint_box,std::size_t values_count,subtree_elements_counts const & subtree_counts,parameters_type const & parameters,Translator const & translator,Allocators & allocators)243     internal_element per_level(EIt first, EIt last, Box const& hint_box, std::size_t values_count, subtree_elements_counts const& subtree_counts,
244                                parameters_type const& parameters, Translator const& translator, Allocators & allocators)
245     {
246         BOOST_GEOMETRY_INDEX_ASSERT(0 < std::distance(first, last) && static_cast<std::size_t>(std::distance(first, last)) == values_count,
247                                     "unexpected parameters");
248 
249         if ( subtree_counts.maxc <= 1 )
250         {
251             // ROOT or LEAF
252             BOOST_GEOMETRY_INDEX_ASSERT(values_count <= parameters.get_max_elements(),
253                                         "too big number of elements");
254             // if !root check m_parameters.get_min_elements() <= count
255 
256             // create new leaf node
257             node_pointer n = rtree::create_node<Allocators, leaf>::apply(allocators);                       // MAY THROW (A)
258             subtree_destroyer auto_remover(n, allocators);
259             leaf & l = rtree::get<leaf>(*n);
260 
261             // reserve space for values
262             rtree::elements(l).reserve(values_count);                                                       // MAY THROW (A)
263             // calculate values box and copy values
264             expandable_box<Box> elements_box;
265             for ( ; first != last ; ++first )
266             {
267                 // NOTE: push_back() must be called at the end in order to support move_iterator.
268                 //       The iterator is dereferenced 2x (no temporary reference) to support
269                 //       non-true reference types and move_iterator without boost::forward<>.
270                 elements_box.expand(translator(*(first->second)));
271                 rtree::elements(l).push_back(*(first->second));                                             // MAY THROW (A?,C)
272             }
273 
274 #ifdef BOOST_GEOMETRY_INDEX_EXPERIMENTAL_ENLARGE_BY_EPSILON
275             // Enlarge bounds of a leaf node.
276             // It's because Points and Segments are compared WRT machine epsilon
277             // This ensures that leafs bounds correspond to the stored elements
278             // NOTE: this is done only if the Indexable is a different kind of Geometry
279             //   than the bounds (only Box for now). Spatial predicates are checked
280             //   the same way for Geometry of the same kind.
281             if ( BOOST_GEOMETRY_CONDITION((
282                     ! index::detail::is_bounding_geometry
283                         <
284                             typename indexable_type<Translator>::type
285                         >::value )) )
286             {
287                 elements_box.expand_by_epsilon();
288             }
289 #endif
290 
291             auto_remover.release();
292             return internal_element(elements_box.get(), n);
293         }
294 
295         // calculate next max and min subtree counts
296         subtree_elements_counts next_subtree_counts = subtree_counts;
297         next_subtree_counts.maxc /= parameters.get_max_elements();
298         next_subtree_counts.minc /= parameters.get_max_elements();
299 
300         // create new internal node
301         node_pointer n = rtree::create_node<Allocators, internal_node>::apply(allocators);                  // MAY THROW (A)
302         subtree_destroyer auto_remover(n, allocators);
303         internal_node & in = rtree::get<internal_node>(*n);
304 
305         // reserve space for values
306         std::size_t nodes_count = calculate_nodes_count(values_count, subtree_counts);
307         rtree::elements(in).reserve(nodes_count);                                                           // MAY THROW (A)
308         // calculate values box and copy values
309         expandable_box<Box> elements_box;
310 
311         per_level_packets(first, last, hint_box, values_count, subtree_counts, next_subtree_counts,
312                           rtree::elements(in), elements_box,
313                           parameters, translator, allocators);
314 
315         auto_remover.release();
316         return internal_element(elements_box.get(), n);
317     }
318 
319     template <typename EIt, typename ExpandableBox> inline static
per_level_packets(EIt first,EIt last,Box const & hint_box,std::size_t values_count,subtree_elements_counts const & subtree_counts,subtree_elements_counts const & next_subtree_counts,internal_elements & elements,ExpandableBox & elements_box,parameters_type const & parameters,Translator const & translator,Allocators & allocators)320     void per_level_packets(EIt first, EIt last, Box const& hint_box,
321                            std::size_t values_count,
322                            subtree_elements_counts const& subtree_counts,
323                            subtree_elements_counts const& next_subtree_counts,
324                            internal_elements & elements, ExpandableBox & elements_box,
325                            parameters_type const& parameters, Translator const& translator, Allocators & allocators)
326     {
327         BOOST_GEOMETRY_INDEX_ASSERT(0 < std::distance(first, last) && static_cast<std::size_t>(std::distance(first, last)) == values_count,
328                                     "unexpected parameters");
329 
330         BOOST_GEOMETRY_INDEX_ASSERT(subtree_counts.minc <= values_count,
331                                     "too small number of elements");
332 
333         // only one packet
334         if ( values_count <= subtree_counts.maxc )
335         {
336             // the end, move to the next level
337             internal_element el = per_level(first, last, hint_box, values_count, next_subtree_counts,
338                                             parameters, translator, allocators);
339 
340             // in case if push_back() do throw here
341             // and even if this is not probable (previously reserved memory, nonthrowing pairs copy)
342             // this case is also tested by exceptions test.
343             subtree_destroyer auto_remover(el.second, allocators);
344             // this container should have memory allocated, reserve() called outside
345             elements.push_back(el);                                                 // MAY THROW (A?,C) - however in normal conditions shouldn't
346             auto_remover.release();
347 
348             elements_box.expand(el.first);
349             return;
350         }
351 
352         std::size_t median_count = calculate_median_count(values_count, subtree_counts);
353         EIt median = first + median_count;
354 
355         coordinate_type greatest_length;
356         std::size_t greatest_dim_index = 0;
357         pack_utils::biggest_edge<dimension>::apply(hint_box, greatest_length, greatest_dim_index);
358         Box left, right;
359         pack_utils::nth_element_and_half_boxes<0, dimension>
360             ::apply(first, median, last, hint_box, left, right, greatest_dim_index);
361 
362         per_level_packets(first, median, left,
363                           median_count, subtree_counts, next_subtree_counts,
364                           elements, elements_box,
365                           parameters, translator, allocators);
366         per_level_packets(median, last, right,
367                           values_count - median_count, subtree_counts, next_subtree_counts,
368                           elements, elements_box,
369                           parameters, translator, allocators);
370     }
371 
372     inline static
calculate_subtree_elements_counts(std::size_t elements_count,parameters_type const & parameters,size_type & leafs_level)373     subtree_elements_counts calculate_subtree_elements_counts(std::size_t elements_count, parameters_type const& parameters, size_type & leafs_level)
374     {
375         boost::ignore_unused_variable_warning(parameters);
376 
377         subtree_elements_counts res(1, 1);
378         leafs_level = 0;
379 
380         std::size_t smax = parameters.get_max_elements();
381         for ( ; smax < elements_count ; smax *= parameters.get_max_elements(), ++leafs_level )
382             res.maxc = smax;
383 
384         res.minc = parameters.get_min_elements() * (res.maxc / parameters.get_max_elements());
385 
386         return res;
387     }
388 
389     inline static
calculate_nodes_count(std::size_t count,subtree_elements_counts const & subtree_counts)390     std::size_t calculate_nodes_count(std::size_t count,
391                                       subtree_elements_counts const& subtree_counts)
392     {
393         std::size_t n = count / subtree_counts.maxc;
394         std::size_t r = count % subtree_counts.maxc;
395 
396         if ( 0 < r && r < subtree_counts.minc )
397         {
398             std::size_t count_minus_min = count - subtree_counts.minc;
399             n = count_minus_min / subtree_counts.maxc;
400             r = count_minus_min % subtree_counts.maxc;
401             ++n;
402         }
403 
404         if ( 0 < r )
405             ++n;
406 
407         return n;
408     }
409 
410     inline static
calculate_median_count(std::size_t count,subtree_elements_counts const & subtree_counts)411     std::size_t calculate_median_count(std::size_t count,
412                                        subtree_elements_counts const& subtree_counts)
413     {
414         // e.g. for max = 5, min = 2, count = 52, subtree_max = 25, subtree_min = 10
415 
416         std::size_t n = count / subtree_counts.maxc; // e.g. 52 / 25 = 2
417         std::size_t r = count % subtree_counts.maxc; // e.g. 52 % 25 = 2
418         std::size_t median_count = (n / 2) * subtree_counts.maxc; // e.g. 2 / 2 * 25 = 25
419 
420         if ( 0 != r ) // e.g. 0 != 2
421         {
422             if ( subtree_counts.minc <= r ) // e.g. 10 <= 2 == false
423             {
424                 //BOOST_GEOMETRY_INDEX_ASSERT(0 < n, "unexpected value");
425                 median_count = ((n+1)/2) * subtree_counts.maxc; // if calculated ((2+1)/2) * 25 which would be ok, but not in all cases
426             }
427             else // r < subtree_counts.second  // e.g. 2 < 10 == true
428             {
429                 std::size_t count_minus_min = count - subtree_counts.minc; // e.g. 52 - 10 = 42
430                 n = count_minus_min / subtree_counts.maxc; // e.g. 42 / 25 = 1
431                 r = count_minus_min % subtree_counts.maxc; // e.g. 42 % 25 = 17
432                 if ( r == 0 )                               // e.g. false
433                 {
434                     // n can't be equal to 0 because then there wouldn't be any element in the other node
435                     //BOOST_GEOMETRY_INDEX_ASSERT(0 < n, "unexpected value");
436                     median_count = ((n+1)/2) * subtree_counts.maxc;     // if calculated ((1+1)/2) * 25 which would be ok, but not in all cases
437                 }
438                 else
439                 {
440                     if ( n == 0 )                                        // e.g. false
441                         median_count = r;                                // if calculated -> 17 which is wrong!
442                     else
443                         median_count = ((n+2)/2) * subtree_counts.maxc; // e.g. ((1+2)/2) * 25 = 25
444                 }
445             }
446         }
447 
448         return median_count;
449     }
450 };
451 
452 }}}}} // namespace boost::geometry::index::detail::rtree
453 
454 #endif // BOOST_GEOMETRY_INDEX_DETAIL_RTREE_PACK_CREATE_HPP
455