1 // Boost.Geometry Index 2 // 3 // R-tree initial packing 4 // 5 // Copyright (c) 2011-2017 Adam Wulkiewicz, Lodz, Poland. 6 // 7 // Use, modification and distribution is subject to the Boost Software License, 8 // Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at 9 // http://www.boost.org/LICENSE_1_0.txt) 10 11 #ifndef BOOST_GEOMETRY_INDEX_DETAIL_RTREE_PACK_CREATE_HPP 12 #define BOOST_GEOMETRY_INDEX_DETAIL_RTREE_PACK_CREATE_HPP 13 14 #include <boost/core/ignore_unused.hpp> 15 16 #include <boost/geometry/algorithms/expand.hpp> 17 #include <boost/geometry/index/detail/algorithms/bounds.hpp> 18 #include <boost/geometry/index/detail/algorithms/nth_element.hpp> 19 20 #include <boost/geometry/algorithms/detail/expand_by_epsilon.hpp> 21 22 namespace boost { namespace geometry { namespace index { namespace detail { namespace rtree { 23 24 namespace pack_utils { 25 26 template <std::size_t Dimension> 27 struct biggest_edge 28 { 29 BOOST_STATIC_ASSERT(0 < Dimension); 30 template <typename Box> applyboost::geometry::index::detail::rtree::pack_utils::biggest_edge31 static inline void apply(Box const& box, typename coordinate_type<Box>::type & length, std::size_t & dim_index) 32 { 33 biggest_edge<Dimension-1>::apply(box, length, dim_index); 34 typename coordinate_type<Box>::type curr 35 = geometry::get<max_corner, Dimension-1>(box) - geometry::get<min_corner, Dimension-1>(box); 36 if ( length < curr ) 37 { 38 dim_index = Dimension - 1; 39 length = curr; 40 } 41 } 42 }; 43 44 template <> 45 struct biggest_edge<1> 46 { 47 template <typename Box> applyboost::geometry::index::detail::rtree::pack_utils::biggest_edge48 static inline void apply(Box const& box, typename coordinate_type<Box>::type & length, std::size_t & dim_index) 49 { 50 dim_index = 0; 51 length = geometry::get<max_corner, 0>(box) - geometry::get<min_corner, 0>(box); 52 } 53 }; 54 55 template <std::size_t I> 56 struct point_entries_comparer 57 { 58 template <typename PointEntry> operator ()boost::geometry::index::detail::rtree::pack_utils::point_entries_comparer59 bool operator()(PointEntry const& e1, PointEntry const& e2) const 60 { 61 return geometry::get<I>(e1.first) < geometry::get<I>(e2.first); 62 } 63 }; 64 65 template <std::size_t I, std::size_t Dimension> 66 struct nth_element_and_half_boxes 67 { 68 template <typename EIt, typename Box> applyboost::geometry::index::detail::rtree::pack_utils::nth_element_and_half_boxes69 static inline void apply(EIt first, EIt median, EIt last, Box const& box, Box & left, Box & right, std::size_t dim_index) 70 { 71 if ( I == dim_index ) 72 { 73 index::detail::nth_element(first, median, last, point_entries_comparer<I>()); 74 75 geometry::convert(box, left); 76 geometry::convert(box, right); 77 typename coordinate_type<Box>::type edge_len 78 = geometry::get<max_corner, I>(box) - geometry::get<min_corner, I>(box); 79 typename coordinate_type<Box>::type median 80 = geometry::get<min_corner, I>(box) + edge_len / 2; 81 geometry::set<max_corner, I>(left, median); 82 geometry::set<min_corner, I>(right, median); 83 } 84 else 85 nth_element_and_half_boxes<I+1, Dimension>::apply(first, median, last, box, left, right, dim_index); 86 } 87 }; 88 89 template <std::size_t Dimension> 90 struct nth_element_and_half_boxes<Dimension, Dimension> 91 { 92 template <typename EIt, typename Box> applyboost::geometry::index::detail::rtree::pack_utils::nth_element_and_half_boxes93 static inline void apply(EIt , EIt , EIt , Box const& , Box & , Box & , std::size_t ) {} 94 }; 95 96 } // namespace pack_utils 97 98 // STR leafs number are calculated as rcount/max 99 // and the number of splitting planes for each dimension as (count/max)^(1/dimension) 100 // <-> for dimension==2 -> sqrt(count/max) 101 // 102 // The main flaw of this algorithm is that the resulting tree will have bad structure for: 103 // 1. non-uniformly distributed elements 104 // Statistic check could be performed, e.g. based on variance of lengths of elements edges for each dimension 105 // 2. elements distributed mainly along one axis 106 // Calculate bounding box of all elements and then number of dividing planes for a dimension 107 // from the length of BB edge for this dimension (more or less assuming that elements are uniformly-distributed squares) 108 // 109 // Another thing is that the last node may have less elements than Max or even Min. 110 // The number of splitting planes must be chosen more carefully than count/max 111 // 112 // This algorithm is something between STR and TGS 113 // it is more similar to the top-down recursive kd-tree creation algorithm 114 // using object median split and split axis of greatest BB edge 115 // BB is only used as a hint (assuming objects are distributed uniformly) 116 // 117 // Implemented algorithm guarantees that the number of elements in nodes will be between Min and Max 118 // and that nodes are packed as tightly as possible 119 // e.g. for 177 values Max = 5 and Min = 2 it will construct the following tree: 120 // ROOT 177 121 // L1 125 52 122 // L2 25 25 25 25 25 25 17 10 123 // L3 5x5 5x5 5x5 5x5 5x5 5x5 3x5+2 2x5 124 125 template <typename Value, typename Options, typename Translator, typename Box, typename Allocators> 126 class pack 127 { 128 typedef typename rtree::node<Value, typename Options::parameters_type, Box, Allocators, typename Options::node_tag>::type node; 129 typedef typename rtree::internal_node<Value, typename Options::parameters_type, Box, Allocators, typename Options::node_tag>::type internal_node; 130 typedef typename rtree::leaf<Value, typename Options::parameters_type, Box, Allocators, typename Options::node_tag>::type leaf; 131 132 typedef typename Allocators::node_pointer node_pointer; 133 typedef rtree::subtree_destroyer<Value, Options, Translator, Box, Allocators> subtree_destroyer; 134 typedef typename Allocators::size_type size_type; 135 136 typedef typename geometry::point_type<Box>::type point_type; 137 typedef typename geometry::coordinate_type<point_type>::type coordinate_type; 138 typedef typename detail::default_content_result<Box>::type content_type; 139 typedef typename Options::parameters_type parameters_type; 140 static const std::size_t dimension = geometry::dimension<point_type>::value; 141 142 typedef typename rtree::container_from_elements_type< 143 typename rtree::elements_type<leaf>::type, 144 std::size_t 145 >::type values_counts_container; 146 147 typedef typename rtree::elements_type<internal_node>::type internal_elements; 148 typedef typename internal_elements::value_type internal_element; 149 150 public: 151 // Arbitrary iterators 152 template <typename InIt> inline static apply(InIt first,InIt last,size_type & values_count,size_type & leafs_level,parameters_type const & parameters,Translator const & translator,Allocators & allocators)153 node_pointer apply(InIt first, InIt last, size_type & values_count, size_type & leafs_level, 154 parameters_type const& parameters, Translator const& translator, Allocators & allocators) 155 { 156 typedef typename std::iterator_traits<InIt>::difference_type diff_type; 157 158 diff_type diff = std::distance(first, last); 159 if ( diff <= 0 ) 160 return node_pointer(0); 161 162 typedef std::pair<point_type, InIt> entry_type; 163 std::vector<entry_type> entries; 164 165 values_count = static_cast<size_type>(diff); 166 entries.reserve(values_count); 167 168 expandable_box<Box> hint_box; 169 for ( ; first != last ; ++first ) 170 { 171 // NOTE: support for iterators not returning true references adapted 172 // to Geometry concept and default translator returning true reference 173 // An alternative would be to dereference the iterator and translate 174 // in one expression each time the indexable was needed. 175 typename std::iterator_traits<InIt>::reference in_ref = *first; 176 typename Translator::result_type indexable = translator(in_ref); 177 178 // NOTE: added for consistency with insert() 179 // CONSIDER: alternative - ignore invalid indexable or throw an exception 180 BOOST_GEOMETRY_INDEX_ASSERT(detail::is_valid(indexable), "Indexable is invalid"); 181 182 hint_box.expand(indexable); 183 184 point_type pt; 185 geometry::centroid(indexable, pt); 186 entries.push_back(std::make_pair(pt, first)); 187 } 188 189 subtree_elements_counts subtree_counts = calculate_subtree_elements_counts(values_count, parameters, leafs_level); 190 internal_element el = per_level(entries.begin(), entries.end(), hint_box.get(), values_count, subtree_counts, 191 parameters, translator, allocators); 192 193 return el.second; 194 } 195 196 private: 197 template <typename BoxType> 198 class expandable_box 199 { 200 public: expandable_box()201 expandable_box() 202 : m_initialized(false) 203 {} 204 205 template <typename Indexable> expandable_box(Indexable const & indexable)206 explicit expandable_box(Indexable const& indexable) 207 : m_initialized(true) 208 { 209 detail::bounds(indexable, m_box); 210 } 211 212 template <typename Indexable> expand(Indexable const & indexable)213 void expand(Indexable const& indexable) 214 { 215 if ( !m_initialized ) 216 { 217 // it's guaranteed that the Box will be initialized 218 // only for Points, Boxes and Segments but that's ok 219 // since only those Geometries can be stored 220 detail::bounds(indexable, m_box); 221 m_initialized = true; 222 } 223 else 224 { 225 geometry::expand(m_box, indexable); 226 } 227 } 228 expand_by_epsilon()229 void expand_by_epsilon() 230 { 231 geometry::detail::expand_by_epsilon(m_box); 232 } 233 get() const234 BoxType const& get() const 235 { 236 BOOST_GEOMETRY_INDEX_ASSERT(m_initialized, "uninitialized envelope accessed"); 237 return m_box; 238 } 239 240 private: 241 bool m_initialized; 242 BoxType m_box; 243 }; 244 245 struct subtree_elements_counts 246 { subtree_elements_countsboost::geometry::index::detail::rtree::pack::subtree_elements_counts247 subtree_elements_counts(std::size_t ma, std::size_t mi) : maxc(ma), minc(mi) {} 248 std::size_t maxc; 249 std::size_t minc; 250 }; 251 252 template <typename EIt> inline static per_level(EIt first,EIt last,Box const & hint_box,std::size_t values_count,subtree_elements_counts const & subtree_counts,parameters_type const & parameters,Translator const & translator,Allocators & allocators)253 internal_element per_level(EIt first, EIt last, Box const& hint_box, std::size_t values_count, subtree_elements_counts const& subtree_counts, 254 parameters_type const& parameters, Translator const& translator, Allocators & allocators) 255 { 256 BOOST_GEOMETRY_INDEX_ASSERT(0 < std::distance(first, last) && static_cast<std::size_t>(std::distance(first, last)) == values_count, 257 "unexpected parameters"); 258 259 if ( subtree_counts.maxc <= 1 ) 260 { 261 // ROOT or LEAF 262 BOOST_GEOMETRY_INDEX_ASSERT(values_count <= parameters.get_max_elements(), 263 "too big number of elements"); 264 // if !root check m_parameters.get_min_elements() <= count 265 266 // create new leaf node 267 node_pointer n = rtree::create_node<Allocators, leaf>::apply(allocators); // MAY THROW (A) 268 subtree_destroyer auto_remover(n, allocators); 269 leaf & l = rtree::get<leaf>(*n); 270 271 // reserve space for values 272 rtree::elements(l).reserve(values_count); // MAY THROW (A) 273 274 // calculate values box and copy values 275 // initialize the box explicitly to avoid GCC-4.4 uninitialized variable warnings with O2 276 expandable_box<Box> elements_box(translator(*(first->second))); 277 rtree::elements(l).push_back(*(first->second)); // MAY THROW (A?,C) 278 for ( ++first ; first != last ; ++first ) 279 { 280 // NOTE: push_back() must be called at the end in order to support move_iterator. 281 // The iterator is dereferenced 2x (no temporary reference) to support 282 // non-true reference types and move_iterator without boost::forward<>. 283 elements_box.expand(translator(*(first->second))); 284 rtree::elements(l).push_back(*(first->second)); // MAY THROW (A?,C) 285 } 286 287 #ifdef BOOST_GEOMETRY_INDEX_EXPERIMENTAL_ENLARGE_BY_EPSILON 288 // Enlarge bounds of a leaf node. 289 // It's because Points and Segments are compared WRT machine epsilon 290 // This ensures that leafs bounds correspond to the stored elements 291 // NOTE: this is done only if the Indexable is a different kind of Geometry 292 // than the bounds (only Box for now). Spatial predicates are checked 293 // the same way for Geometry of the same kind. 294 if ( BOOST_GEOMETRY_CONDITION(( 295 ! index::detail::is_bounding_geometry 296 < 297 typename indexable_type<Translator>::type 298 >::value )) ) 299 { 300 elements_box.expand_by_epsilon(); 301 } 302 #endif 303 304 auto_remover.release(); 305 return internal_element(elements_box.get(), n); 306 } 307 308 // calculate next max and min subtree counts 309 subtree_elements_counts next_subtree_counts = subtree_counts; 310 next_subtree_counts.maxc /= parameters.get_max_elements(); 311 next_subtree_counts.minc /= parameters.get_max_elements(); 312 313 // create new internal node 314 node_pointer n = rtree::create_node<Allocators, internal_node>::apply(allocators); // MAY THROW (A) 315 subtree_destroyer auto_remover(n, allocators); 316 internal_node & in = rtree::get<internal_node>(*n); 317 318 // reserve space for values 319 std::size_t nodes_count = calculate_nodes_count(values_count, subtree_counts); 320 rtree::elements(in).reserve(nodes_count); // MAY THROW (A) 321 // calculate values box and copy values 322 expandable_box<Box> elements_box; 323 324 per_level_packets(first, last, hint_box, values_count, subtree_counts, next_subtree_counts, 325 rtree::elements(in), elements_box, 326 parameters, translator, allocators); 327 328 auto_remover.release(); 329 return internal_element(elements_box.get(), n); 330 } 331 332 template <typename EIt, typename ExpandableBox> inline static per_level_packets(EIt first,EIt last,Box const & hint_box,std::size_t values_count,subtree_elements_counts const & subtree_counts,subtree_elements_counts const & next_subtree_counts,internal_elements & elements,ExpandableBox & elements_box,parameters_type const & parameters,Translator const & translator,Allocators & allocators)333 void per_level_packets(EIt first, EIt last, Box const& hint_box, 334 std::size_t values_count, 335 subtree_elements_counts const& subtree_counts, 336 subtree_elements_counts const& next_subtree_counts, 337 internal_elements & elements, ExpandableBox & elements_box, 338 parameters_type const& parameters, Translator const& translator, Allocators & allocators) 339 { 340 BOOST_GEOMETRY_INDEX_ASSERT(0 < std::distance(first, last) && static_cast<std::size_t>(std::distance(first, last)) == values_count, 341 "unexpected parameters"); 342 343 BOOST_GEOMETRY_INDEX_ASSERT(subtree_counts.minc <= values_count, 344 "too small number of elements"); 345 346 // only one packet 347 if ( values_count <= subtree_counts.maxc ) 348 { 349 // the end, move to the next level 350 internal_element el = per_level(first, last, hint_box, values_count, next_subtree_counts, 351 parameters, translator, allocators); 352 353 // in case if push_back() do throw here 354 // and even if this is not probable (previously reserved memory, nonthrowing pairs copy) 355 // this case is also tested by exceptions test. 356 subtree_destroyer auto_remover(el.second, allocators); 357 // this container should have memory allocated, reserve() called outside 358 elements.push_back(el); // MAY THROW (A?,C) - however in normal conditions shouldn't 359 auto_remover.release(); 360 361 elements_box.expand(el.first); 362 return; 363 } 364 365 std::size_t median_count = calculate_median_count(values_count, subtree_counts); 366 EIt median = first + median_count; 367 368 coordinate_type greatest_length; 369 std::size_t greatest_dim_index = 0; 370 pack_utils::biggest_edge<dimension>::apply(hint_box, greatest_length, greatest_dim_index); 371 Box left, right; 372 pack_utils::nth_element_and_half_boxes<0, dimension> 373 ::apply(first, median, last, hint_box, left, right, greatest_dim_index); 374 375 per_level_packets(first, median, left, 376 median_count, subtree_counts, next_subtree_counts, 377 elements, elements_box, 378 parameters, translator, allocators); 379 per_level_packets(median, last, right, 380 values_count - median_count, subtree_counts, next_subtree_counts, 381 elements, elements_box, 382 parameters, translator, allocators); 383 } 384 385 inline static calculate_subtree_elements_counts(std::size_t elements_count,parameters_type const & parameters,size_type & leafs_level)386 subtree_elements_counts calculate_subtree_elements_counts(std::size_t elements_count, parameters_type const& parameters, size_type & leafs_level) 387 { 388 boost::ignore_unused(parameters); 389 390 subtree_elements_counts res(1, 1); 391 leafs_level = 0; 392 393 std::size_t smax = parameters.get_max_elements(); 394 for ( ; smax < elements_count ; smax *= parameters.get_max_elements(), ++leafs_level ) 395 res.maxc = smax; 396 397 res.minc = parameters.get_min_elements() * (res.maxc / parameters.get_max_elements()); 398 399 return res; 400 } 401 402 inline static calculate_nodes_count(std::size_t count,subtree_elements_counts const & subtree_counts)403 std::size_t calculate_nodes_count(std::size_t count, 404 subtree_elements_counts const& subtree_counts) 405 { 406 std::size_t n = count / subtree_counts.maxc; 407 std::size_t r = count % subtree_counts.maxc; 408 409 if ( 0 < r && r < subtree_counts.minc ) 410 { 411 std::size_t count_minus_min = count - subtree_counts.minc; 412 n = count_minus_min / subtree_counts.maxc; 413 r = count_minus_min % subtree_counts.maxc; 414 ++n; 415 } 416 417 if ( 0 < r ) 418 ++n; 419 420 return n; 421 } 422 423 inline static calculate_median_count(std::size_t count,subtree_elements_counts const & subtree_counts)424 std::size_t calculate_median_count(std::size_t count, 425 subtree_elements_counts const& subtree_counts) 426 { 427 // e.g. for max = 5, min = 2, count = 52, subtree_max = 25, subtree_min = 10 428 429 std::size_t n = count / subtree_counts.maxc; // e.g. 52 / 25 = 2 430 std::size_t r = count % subtree_counts.maxc; // e.g. 52 % 25 = 2 431 std::size_t median_count = (n / 2) * subtree_counts.maxc; // e.g. 2 / 2 * 25 = 25 432 433 if ( 0 != r ) // e.g. 0 != 2 434 { 435 if ( subtree_counts.minc <= r ) // e.g. 10 <= 2 == false 436 { 437 //BOOST_GEOMETRY_INDEX_ASSERT(0 < n, "unexpected value"); 438 median_count = ((n+1)/2) * subtree_counts.maxc; // if calculated ((2+1)/2) * 25 which would be ok, but not in all cases 439 } 440 else // r < subtree_counts.second // e.g. 2 < 10 == true 441 { 442 std::size_t count_minus_min = count - subtree_counts.minc; // e.g. 52 - 10 = 42 443 n = count_minus_min / subtree_counts.maxc; // e.g. 42 / 25 = 1 444 r = count_minus_min % subtree_counts.maxc; // e.g. 42 % 25 = 17 445 if ( r == 0 ) // e.g. false 446 { 447 // n can't be equal to 0 because then there wouldn't be any element in the other node 448 //BOOST_GEOMETRY_INDEX_ASSERT(0 < n, "unexpected value"); 449 median_count = ((n+1)/2) * subtree_counts.maxc; // if calculated ((1+1)/2) * 25 which would be ok, but not in all cases 450 } 451 else 452 { 453 if ( n == 0 ) // e.g. false 454 median_count = r; // if calculated -> 17 which is wrong! 455 else 456 median_count = ((n+2)/2) * subtree_counts.maxc; // e.g. ((1+2)/2) * 25 = 25 457 } 458 } 459 } 460 461 return median_count; 462 } 463 }; 464 465 }}}}} // namespace boost::geometry::index::detail::rtree 466 467 #endif // BOOST_GEOMETRY_INDEX_DETAIL_RTREE_PACK_CREATE_HPP 468