1 /** 2 * \file NearestNeighbor.hpp 3 * \brief Header for GeographicLib::NearestNeighbor class 4 * 5 * Copyright (c) Charles Karney (2016-2020) <charles@karney.com> and licensed 6 * under the MIT/X11 License. For more information, see 7 * https://geographiclib.sourceforge.io/ 8 **********************************************************************/ 9 10 #if !defined(GEOGRAPHICLIB_NEARESTNEIGHBOR_HPP) 11 #define GEOGRAPHICLIB_NEARESTNEIGHBOR_HPP 1 12 13 #include <algorithm> // for nth_element, max_element, etc. 14 #include <vector> 15 #include <queue> // for priority_queue 16 #include <utility> // for swap + pair 17 #include <cstring> 18 #include <limits> 19 #include <cmath> 20 #include <iostream> 21 #include <sstream> 22 // Only for GeographicLib::GeographicErr 23 #include <GeographicLib/Constants.hpp> 24 25 #if defined(GEOGRAPHICLIB_HAVE_BOOST_SERIALIZATION) && \ 26 GEOGRAPHICLIB_HAVE_BOOST_SERIALIZATION 27 #include <boost/serialization/nvp.hpp> 28 #include <boost/serialization/split_member.hpp> 29 #include <boost/serialization/array.hpp> 30 #include <boost/serialization/vector.hpp> 31 #endif 32 33 #if defined(_MSC_VER) 34 // Squelch warnings about constant conditional expressions 35 # pragma warning (push) 36 # pragma warning (disable: 4127) 37 #endif 38 39 namespace GeographicLib { 40 41 /** 42 * \brief Nearest-neighbor calculations 43 * 44 * This class solves the nearest-neighbor problm using a vantage-point tree 45 * as described in \ref nearest. 46 * 47 * This class is templated so that it can handle arbitrary metric spaces as 48 * follows: 49 * 50 * @tparam dist_t the type used for measuring distances; it can be a real or 51 * signed integer type; in typical geodetic applications, \e dist_t might 52 * be <code>double</code>. 53 * @tparam pos_t the type for specifying the positions of points; geodetic 54 * application might bundled the latitude and longitude into a 55 * <code>std::pair<dist_t, dist_t></code>. 56 * @tparam distfun_t the type of a function object which takes takes two 57 * positions (of type \e pos_t) and returns the distance (of type \e 58 * dist_t); in geodetic applications, this might be a class which is 59 * constructed with a Geodesic object and which implements a member 60 * function with a signature <code>dist_t operator() (const pos_t&, const 61 * pos_t&) const</code>, which returns the geodesic distance between two 62 * points. 63 * 64 * \note The distance measure must satisfy the triangle inequality, \f$ 65 * d(a,c) \le d(a,b) + d(b,c) \f$ for all points \e a, \e b, \e c. The 66 * geodesic distance (given by Geodesic::Inverse) does, while the great 67 * ellipse distance and the rhumb line distance <i>do not</i>. If you use 68 * the ordinary Euclidean distance, i.e., \f$ \sqrt{(x_a-x_b)^2 + 69 * (y_a-y_b)^2} \f$ for two dimensions, don't be tempted to leave out the 70 * square root in the interests of "efficiency"; the squared distance does 71 * not satisfy the triangle inequality! 72 * 73 * \note This is a "header-only" implementation and, as such, depends in a 74 * minimal way on the rest of GeographicLib (the only dependency is through 75 * the use of GeographicLib::GeographicErr for handling compile-time and 76 * run-time exceptions). Therefore, it is easy to extract this class from 77 * the rest of GeographicLib and use it as a stand-alone facility. 78 * 79 * The \e dist_t type must support numeric_limits queries (specifically: 80 * is_signed, is_integer, max(), digits). 81 * 82 * The NearestNeighbor object is constructed with a vector of points (type \e 83 * pos_t) and a distance function (type \e distfun_t). However the object 84 * does \e not store the points. When querying the object with Search(), 85 * it's necessary to supply the same vector of points and the same distance 86 * function. 87 * 88 * There's no capability in this implementation to add or remove points from 89 * the set. Instead Initialize() should be called to re-initialize the 90 * object with the modified vector of points. 91 * 92 * Because of the overhead in constructing a NearestNeighbor object for a 93 * large set of points, functions Save() and Load() are provided to save the 94 * object to an external file. operator<<(), operator>>() and <a 95 * href="https://www.boost.org/libs/serialization/doc"> Boost 96 * serialization</a> can also be used to save and restore a NearestNeighbor 97 * object. This is illustrated in the example. 98 * 99 * Example of use: 100 * \include example-NearestNeighbor.cpp 101 **********************************************************************/ 102 template <typename dist_t, typename pos_t, class distfun_t> 103 class NearestNeighbor { 104 // For tracking changes to the I/O format 105 static const int version = 1; 106 // This is what we get "free"; but if sizeof(dist_t) = 1 (unlikely), allow 107 // 4 slots (and this accommodates the default value bucket = 4). 108 static const int maxbucket = 109 (2 + ((4 * sizeof(dist_t)) / sizeof(int) >= 2 ? 110 (4 * sizeof(dist_t)) / sizeof(int) : 2)); 111 public: 112 113 /** 114 * Default constructor for NearestNeighbor. 115 * 116 * This is equivalent to specifying an empty set of points. 117 **********************************************************************/ NearestNeighbor()118 NearestNeighbor() : _numpoints(0), _bucket(0), _cost(0) {} 119 120 /** 121 * Constructor for NearestNeighbor. 122 * 123 * @param[in] pts a vector of points to include in the set. 124 * @param[in] dist the distance function object. 125 * @param[in] bucket the size of the buckets at the leaf nodes; this must 126 * lie in [0, 2 + 4*sizeof(dist_t)/sizeof(int)] (default 4). 127 * @exception GeographicErr if the value of \e bucket is out of bounds or 128 * the size of \e pts is too big for an int. 129 * @exception std::bad_alloc if memory for the tree can't be allocated. 130 * 131 * \e pts may contain coincident points (i.e., the distance between them 132 * vanishes); these are treated as distinct. 133 * 134 * The choice of \e bucket is a tradeoff between space and efficiency. A 135 * larger \e bucket decreases the size of the NearestNeighbor object which 136 * scales as pts.size() / max(1, bucket) and reduces the number of distance 137 * calculations to construct the object by log2(bucket) * pts.size(). 138 * However each search then requires about bucket additional distance 139 * calculations. 140 * 141 * \warning The distances computed by \e dist must satisfy the standard 142 * metric conditions. If not, the results are undefined. Neither the data 143 * in \e pts nor the query points should contain NaNs or infinities because 144 * such data violates the metric conditions. 145 * 146 * \warning The same arguments \e pts and \e dist must be provided 147 * to the Search() function. 148 **********************************************************************/ NearestNeighbor(const std::vector<pos_t> & pts,const distfun_t & dist,int bucket=4)149 NearestNeighbor(const std::vector<pos_t>& pts, const distfun_t& dist, 150 int bucket = 4) { 151 Initialize(pts, dist, bucket); 152 } 153 154 /** 155 * Initialize or re-initialize NearestNeighbor. 156 * 157 * @param[in] pts a vector of points to include in the tree. 158 * @param[in] dist the distance function object. 159 * @param[in] bucket the size of the buckets at the leaf nodes; this must 160 * lie in [0, 2 + 4*sizeof(dist_t)/sizeof(int)] (default 4). 161 * @exception GeographicErr if the value of \e bucket is out of bounds or 162 * the size of \e pts is too big for an int. 163 * @exception std::bad_alloc if memory for the tree can't be allocated. 164 * 165 * See also the documentation on the constructor. 166 * 167 * If an exception is thrown, the state of the NearestNeighbor is 168 * unchanged. 169 **********************************************************************/ Initialize(const std::vector<pos_t> & pts,const distfun_t & dist,int bucket=4)170 void Initialize(const std::vector<pos_t>& pts, const distfun_t& dist, 171 int bucket = 4) { 172 static_assert(std::numeric_limits<dist_t>::is_signed, 173 "dist_t must be a signed type"); 174 if (!( 0 <= bucket && bucket <= maxbucket )) 175 throw GeographicLib::GeographicErr 176 ("bucket must lie in [0, 2 + 4*sizeof(dist_t)/sizeof(int)]"); 177 if (pts.size() > size_t(std::numeric_limits<int>::max())) 178 throw GeographicLib::GeographicErr("pts array too big"); 179 // the pair contains distance+id 180 std::vector<item> ids(pts.size()); 181 for (int k = int(ids.size()); k--;) 182 ids[k] = std::make_pair(dist_t(0), k); 183 int cost = 0; 184 std::vector<Node> tree; 185 init(pts, dist, bucket, tree, ids, cost, 186 0, int(ids.size()), int(ids.size()/2)); 187 _tree.swap(tree); 188 _numpoints = int(pts.size()); 189 _bucket = bucket; 190 _mc = _sc = 0; 191 _cost = cost; _c1 = _k = _cmax = 0; 192 _cmin = std::numeric_limits<int>::max(); 193 } 194 195 /** 196 * Search the NearestNeighbor. 197 * 198 * @param[in] pts the vector of points used for initialization. 199 * @param[in] dist the distance function object used for initialization. 200 * @param[in] query the query point. 201 * @param[out] ind a vector of indices to the closest points found. 202 * @param[in] k the number of points to search for (default = 1). 203 * @param[in] maxdist only return points with distances of \e maxdist or 204 * less from \e query (default is the maximum \e dist_t). 205 * @param[in] mindist only return points with distances of more than 206 * \e mindist from \e query (default = −1). 207 * @param[in] exhaustive whether to do an exhaustive search (default true). 208 * @param[in] tol the tolerance on the results (default 0). 209 * @return the distance to the closest point found (−1 if no points 210 * are found). 211 * @exception GeographicErr if \e pts has a different size from that used 212 * to construct the object. 213 * 214 * The indices returned in \e ind are sorted by distance from \e query 215 * (closest first). 216 * 217 * The simplest invocation is with just the 4 non-optional arguments. This 218 * returns the closest distance and the index to the closest point in 219 * <i>ind</i><sub>0</sub>. If there are several points equally close, then 220 * <i>ind</i><sub>0</sub> gives the index of an arbirary one of them. If 221 * there's no closest point (because the set of points is empty), then \e 222 * ind is empty and −1 is returned. 223 * 224 * With \e exhaustive = true and \e tol = 0 (their default values), this 225 * finds the indices of \e k closest neighbors to \e query whose distances 226 * to \e query are in (\e mindist, \e maxdist]. If \e mindist and \e 227 * maxdist have their default values, then these bounds have no effect. If 228 * \e query is one of the points in the tree, then set \e mindist = 0 to 229 * prevent this point (and other coincident points) from being returned. 230 * 231 * If \e exhaustive = false, exit as soon as \e k results satisfying the 232 * distance criteria are found. If less than \e k results are returned 233 * then the search was exhaustive even if \e exhaustive = false. 234 * 235 * If \e tol is positive, do an approximate search; in this case the 236 * results are to be interpreted as follows: if the <i>k</i>'th distance is 237 * \e dk, then all results with distances less than or equal \e dk − 238 * \e tol are correct; all others are suspect — there may be other 239 * closer results with distances greater or equal to \e dk − \e tol. 240 * If less than \e k results are found, then the search is exact. 241 * 242 * \e mindist should be used to exclude a "small" neighborhood of the query 243 * point (relative to the average spacing of the data). If \e mindist is 244 * large, the efficiency of the search deteriorates. 245 * 246 * \note Only the shortest distance is returned (as as the function value). 247 * The distances to other points (indexed by <i>ind</i><sub><i>j</i></sub> 248 * for \e j > 0) can be found by invoking \e dist again. 249 * 250 * \warning The arguments \e pts and \e dist must be identical to those 251 * used to initialize the NearestNeighbor; if not, this function will 252 * return some meaningless result (however, if the size of \e pts is wrong, 253 * this function throw an exception). 254 * 255 * \warning The query point cannot be a NaN or infinite because then the 256 * metric conditions are violated. 257 **********************************************************************/ Search(const std::vector<pos_t> & pts,const distfun_t & dist,const pos_t & query,std::vector<int> & ind,int k=1,dist_t maxdist=std::numeric_limits<dist_t>::max (),dist_t mindist=-1,bool exhaustive=true,dist_t tol=0) const258 dist_t Search(const std::vector<pos_t>& pts, const distfun_t& dist, 259 const pos_t& query, 260 std::vector<int>& ind, 261 int k = 1, 262 dist_t maxdist = std::numeric_limits<dist_t>::max(), 263 dist_t mindist = -1, 264 bool exhaustive = true, 265 dist_t tol = 0) const { 266 if (_numpoints != int(pts.size())) 267 throw GeographicLib::GeographicErr("pts array has wrong size"); 268 std::priority_queue<item> results; 269 if (_numpoints > 0 && k > 0 && maxdist > mindist) { 270 // distance to the kth closest point so far 271 dist_t tau = maxdist; 272 // first is negative of how far query is outside boundary of node 273 // +1 if on boundary or inside 274 // second is node index 275 std::priority_queue<item> todo; 276 todo.push(std::make_pair(dist_t(1), int(_tree.size()) - 1)); 277 int c = 0; 278 while (!todo.empty()) { 279 int n = todo.top().second; 280 dist_t d = -todo.top().first; 281 todo.pop(); 282 dist_t tau1 = tau - tol; 283 // compare tau and d again since tau may have become smaller. 284 if (!( n >= 0 && tau1 >= d )) continue; 285 const Node& current = _tree[n]; 286 dist_t dst = 0; // to suppress warning about uninitialized variable 287 bool exitflag = false, leaf = current.index < 0; 288 for (int i = 0; i < (leaf ? _bucket : 1); ++i) { 289 int index = leaf ? current.leaves[i] : current.index; 290 if (index < 0) break; 291 dst = dist(pts[index], query); 292 ++c; 293 294 if (dst > mindist && dst <= tau) { 295 if (int(results.size()) == k) results.pop(); 296 results.push(std::make_pair(dst, index)); 297 if (int(results.size()) == k) { 298 if (exhaustive) 299 tau = results.top().first; 300 else { 301 exitflag = true; 302 break; 303 } 304 if (tau <= tol) { 305 exitflag = true; 306 break; 307 } 308 } 309 } 310 } 311 if (exitflag) break; 312 313 if (current.index < 0) continue; 314 tau1 = tau - tol; 315 for (int l = 0; l < 2; ++l) { 316 if (current.data.child[l] >= 0 && 317 dst + current.data.upper[l] >= mindist) { 318 if (dst < current.data.lower[l]) { 319 d = current.data.lower[l] - dst; 320 if (tau1 >= d) 321 todo.push(std::make_pair(-d, current.data.child[l])); 322 } else if (dst > current.data.upper[l]) { 323 d = dst - current.data.upper[l]; 324 if (tau1 >= d) 325 todo.push(std::make_pair(-d, current.data.child[l])); 326 } else 327 todo.push(std::make_pair(dist_t(1), current.data.child[l])); 328 } 329 } 330 } 331 ++_k; 332 _c1 += c; 333 double omc = _mc; 334 _mc += (c - omc) / _k; 335 _sc += (c - omc) * (c - _mc); 336 if (c > _cmax) _cmax = c; 337 if (c < _cmin) _cmin = c; 338 } 339 340 dist_t d = -1; 341 ind.resize(results.size()); 342 343 for (int i = int(ind.size()); i--;) { 344 ind[i] = int(results.top().second); 345 if (i == 0) d = results.top().first; 346 results.pop(); 347 } 348 return d; 349 350 } 351 352 /** 353 * @return the total number of points in the set. 354 **********************************************************************/ NumPoints() const355 int NumPoints() const { return _numpoints; } 356 357 /** 358 * Write the object to an I/O stream. 359 * 360 * @param[in,out] os the stream to write to. 361 * @param[in] bin if true (the default) save in binary mode. 362 * @exception std::bad_alloc if memory for the string representation of the 363 * object can't be allocated. 364 * 365 * The counters tracking the statistics of searches are not saved; however 366 * the initializtion cost is saved. The format of the binary saves is \e 367 * not portable. 368 * 369 * \note <a href="https://www.boost.org/libs/serialization/doc"> 370 * Boost serialization</a> can also be used to save and restore a 371 * NearestNeighbor object. This requires that the 372 * GEOGRAPHICLIB_HAVE_BOOST_SERIALIZATION macro be defined. 373 **********************************************************************/ Save(std::ostream & os,bool bin=true) const374 void Save(std::ostream& os, bool bin = true) const { 375 int realspec = std::numeric_limits<dist_t>::digits * 376 (std::numeric_limits<dist_t>::is_integer ? -1 : 1); 377 if (bin) { 378 char id[] = "NearestNeighbor_"; 379 os.write(id, 16); 380 int buf[6]; 381 buf[0] = version; 382 buf[1] = realspec; 383 buf[2] = _bucket; 384 buf[3] = _numpoints; 385 buf[4] = int(_tree.size()); 386 buf[5] = _cost; 387 os.write(reinterpret_cast<const char *>(buf), 6 * sizeof(int)); 388 for (int i = 0; i < int(_tree.size()); ++i) { 389 const Node& node = _tree[i]; 390 os.write(reinterpret_cast<const char *>(&node.index), sizeof(int)); 391 if (node.index >= 0) { 392 os.write(reinterpret_cast<const char *>(node.data.lower), 393 2 * sizeof(dist_t)); 394 os.write(reinterpret_cast<const char *>(node.data.upper), 395 2 * sizeof(dist_t)); 396 os.write(reinterpret_cast<const char *>(node.data.child), 397 2 * sizeof(int)); 398 } else { 399 os.write(reinterpret_cast<const char *>(node.leaves), 400 _bucket * sizeof(int)); 401 } 402 } 403 } else { 404 std::stringstream ostring; 405 // Ensure enough precision for type dist_t. With C++11, max_digits10 406 // can be used instead. 407 if (!std::numeric_limits<dist_t>::is_integer) { 408 static const int prec 409 = int(std::ceil(std::numeric_limits<dist_t>::digits * 410 std::log10(2.0) + 1)); 411 ostring.precision(prec); 412 } 413 ostring << version << " " << realspec << " " << _bucket << " " 414 << _numpoints << " " << _tree.size() << " " << _cost; 415 for (int i = 0; i < int(_tree.size()); ++i) { 416 const Node& node = _tree[i]; 417 ostring << "\n" << node.index; 418 if (node.index >= 0) { 419 for (int l = 0; l < 2; ++l) 420 ostring << " " << node.data.lower[l] << " " << node.data.upper[l] 421 << " " << node.data.child[l]; 422 } else { 423 for (int l = 0; l < _bucket; ++l) 424 ostring << " " << node.leaves[l]; 425 } 426 } 427 os << ostring.str(); 428 } 429 } 430 431 /** 432 * Read the object from an I/O stream. 433 * 434 * @param[in,out] is the stream to read from 435 * @param[in] bin if true (the default) load in binary mode. 436 * @exception GeographicErr if the state read from \e is is illegal. 437 * @exception std::bad_alloc if memory for the tree can't be allocated. 438 * 439 * The counters tracking the statistics of searches are reset by this 440 * operation. Binary data must have been saved on a machine with the same 441 * architecture. If an exception is thrown, the state of the 442 * NearestNeighbor is unchanged. 443 * 444 * \note <a href="https://www.boost.org/libs/serialization/doc"> 445 * Boost serialization</a> can also be used to save and restore a 446 * NearestNeighbor object. This requires that the 447 * GEOGRAPHICLIB_HAVE_BOOST_SERIALIZATION macro be defined. 448 * 449 * \warning The same arguments \e pts and \e dist used for 450 * initialization must be provided to the Search() function. 451 **********************************************************************/ Load(std::istream & is,bool bin=true)452 void Load(std::istream& is, bool bin = true) { 453 int version1, realspec, bucket, numpoints, treesize, cost; 454 if (bin) { 455 char id[17]; 456 is.read(id, 16); 457 id[16] = '\0'; 458 if (!(std::strcmp(id, "NearestNeighbor_") == 0)) 459 throw GeographicLib::GeographicErr("Bad ID"); 460 is.read(reinterpret_cast<char *>(&version1), sizeof(int)); 461 is.read(reinterpret_cast<char *>(&realspec), sizeof(int)); 462 is.read(reinterpret_cast<char *>(&bucket), sizeof(int)); 463 is.read(reinterpret_cast<char *>(&numpoints), sizeof(int)); 464 is.read(reinterpret_cast<char *>(&treesize), sizeof(int)); 465 is.read(reinterpret_cast<char *>(&cost), sizeof(int)); 466 } else { 467 if (!( is >> version1 >> realspec >> bucket >> numpoints >> treesize 468 >> cost )) 469 throw GeographicLib::GeographicErr("Bad header"); 470 } 471 if (!( version1 == version )) 472 throw GeographicLib::GeographicErr("Incompatible version"); 473 if (!( realspec == std::numeric_limits<dist_t>::digits * 474 (std::numeric_limits<dist_t>::is_integer ? -1 : 1) )) 475 throw GeographicLib::GeographicErr("Different dist_t types"); 476 if (!( 0 <= bucket && bucket <= maxbucket )) 477 throw GeographicLib::GeographicErr("Bad bucket size"); 478 if (!( 0 <= treesize && treesize <= numpoints )) 479 throw 480 GeographicLib::GeographicErr("Bad number of points or tree size"); 481 if (!( 0 <= cost )) 482 throw GeographicLib::GeographicErr("Bad value for cost"); 483 std::vector<Node> tree; 484 tree.reserve(treesize); 485 for (int i = 0; i < treesize; ++i) { 486 Node node; 487 if (bin) { 488 is.read(reinterpret_cast<char *>(&node.index), sizeof(int)); 489 if (node.index >= 0) { 490 is.read(reinterpret_cast<char *>(node.data.lower), 491 2 * sizeof(dist_t)); 492 is.read(reinterpret_cast<char *>(node.data.upper), 493 2 * sizeof(dist_t)); 494 is.read(reinterpret_cast<char *>(node.data.child), 495 2 * sizeof(int)); 496 } else { 497 is.read(reinterpret_cast<char *>(node.leaves), 498 bucket * sizeof(int)); 499 for (int l = bucket; l < maxbucket; ++l) 500 node.leaves[l] = 0; 501 } 502 } else { 503 if (!( is >> node.index )) 504 throw GeographicLib::GeographicErr("Bad index"); 505 if (node.index >= 0) { 506 for (int l = 0; l < 2; ++l) { 507 if (!( is >> node.data.lower[l] >> node.data.upper[l] 508 >> node.data.child[l] )) 509 throw GeographicLib::GeographicErr("Bad node data"); 510 } 511 } else { 512 // Must be at least one valid leaf followed by a sequence end 513 // markers (-1). 514 for (int l = 0; l < bucket; ++l) { 515 if (!( is >> node.leaves[l] )) 516 throw GeographicLib::GeographicErr("Bad leaf data"); 517 } 518 for (int l = bucket; l < maxbucket; ++l) 519 node.leaves[l] = 0; 520 } 521 } 522 node.Check(numpoints, treesize, bucket); 523 tree.push_back(node); 524 } 525 _tree.swap(tree); 526 _numpoints = numpoints; 527 _bucket = bucket; 528 _mc = _sc = 0; 529 _cost = cost; _c1 = _k = _cmax = 0; 530 _cmin = std::numeric_limits<int>::max(); 531 } 532 533 /** 534 * Write the object to stream \e os as text. 535 * 536 * @param[in,out] os the output stream. 537 * @param[in] t the NearestNeighbor object to be saved. 538 * @exception std::bad_alloc if memory for the string representation of the 539 * object can't be allocated. 540 **********************************************************************/ operator <<(std::ostream & os,const NearestNeighbor & t)541 friend std::ostream& operator<<(std::ostream& os, const NearestNeighbor& t) 542 { t.Save(os, false); return os; } 543 544 /** 545 * Read the object from stream \e is as text. 546 * 547 * @param[in,out] is the input stream. 548 * @param[out] t the NearestNeighbor object to be loaded. 549 * @exception GeographicErr if the state read from \e is is illegal. 550 * @exception std::bad_alloc if memory for the tree can't be allocated. 551 **********************************************************************/ operator >>(std::istream & is,NearestNeighbor & t)552 friend std::istream& operator>>(std::istream& is, NearestNeighbor& t) 553 { t.Load(is, false); return is; } 554 555 /** 556 * Swap with another NearestNeighbor object. 557 * 558 * @param[in,out] t the NearestNeighbor object to swap with. 559 **********************************************************************/ swap(NearestNeighbor & t)560 void swap(NearestNeighbor& t) { 561 std::swap(_numpoints, t._numpoints); 562 std::swap(_bucket, t._bucket); 563 std::swap(_cost, t._cost); 564 _tree.swap(t._tree); 565 std::swap(_mc, t._mc); 566 std::swap(_sc, t._sc); 567 std::swap(_c1, t._c1); 568 std::swap(_k, t._k); 569 std::swap(_cmin, t._cmin); 570 std::swap(_cmax, t._cmax); 571 } 572 573 /** 574 * The accumulated statistics on the searches so far. 575 * 576 * @param[out] setupcost the cost of initializing the NearestNeighbor. 577 * @param[out] numsearches the number of calls to Search(). 578 * @param[out] searchcost the total cost of the calls to Search(). 579 * @param[out] mincost the minimum cost of a Search(). 580 * @param[out] maxcost the maximum cost of a Search(). 581 * @param[out] mean the mean cost of a Search(). 582 * @param[out] sd the standard deviation in the cost of a Search(). 583 * 584 * Here "cost" measures the number of distance calculations needed. Note 585 * that the accumulation of statistics is \e not thread safe. 586 **********************************************************************/ Statistics(int & setupcost,int & numsearches,int & searchcost,int & mincost,int & maxcost,double & mean,double & sd) const587 void Statistics(int& setupcost, int& numsearches, int& searchcost, 588 int& mincost, int& maxcost, 589 double& mean, double& sd) const { 590 setupcost = _cost; numsearches = _k; searchcost = _c1; 591 mincost = _cmin; maxcost = _cmax; 592 mean = _mc; sd = std::sqrt(_sc / (_k - 1)); 593 } 594 595 /** 596 * Reset the counters for the accumulated statistics on the searches so 597 * far. 598 **********************************************************************/ ResetStatistics() const599 void ResetStatistics() const { 600 _mc = _sc = 0; 601 _c1 = _k = _cmax = 0; 602 _cmin = std::numeric_limits<int>::max(); 603 } 604 605 private: 606 // Package up a dist_t and an int. We will want to sort on the dist_t so 607 // put it first. 608 typedef std::pair<dist_t, int> item; 609 // \cond SKIP 610 class Node { 611 public: 612 struct bounds { 613 dist_t lower[2], upper[2]; // bounds on inner/outer distances 614 int child[2]; 615 }; 616 union { 617 bounds data; 618 int leaves[maxbucket]; 619 }; 620 int index; 621 Node()622 Node() 623 : index(-1) 624 { 625 for (int i = 0; i < 2; ++i) { 626 data.lower[i] = data.upper[i] = 0; 627 data.child[i] = -1; 628 } 629 } 630 631 // Sanity check on a Node Check(int numpoints,int treesize,int bucket) const632 void Check(int numpoints, int treesize, int bucket) const { 633 if (!( -1 <= index && index < numpoints )) 634 throw GeographicLib::GeographicErr("Bad index"); 635 if (index >= 0) { 636 if (!( -1 <= data.child[0] && data.child[0] < treesize && 637 -1 <= data.child[1] && data.child[1] < treesize )) 638 throw GeographicLib::GeographicErr("Bad child pointers"); 639 if (!( 0 <= data.lower[0] && data.lower[0] <= data.upper[0] && 640 data.upper[0] <= data.lower[1] && 641 data.lower[1] <= data.upper[1] )) 642 throw GeographicLib::GeographicErr("Bad bounds"); 643 } else { 644 // Must be at least one valid leaf followed by a sequence end markers 645 // (-1). 646 bool start = true; 647 for (int l = 0; l < bucket; ++l) { 648 if (!( (start ? 649 ((l == 0 ? 0 : -1) <= leaves[l] && leaves[l] < numpoints) : 650 leaves[l] == -1) )) 651 throw GeographicLib::GeographicErr("Bad leaf data"); 652 start = leaves[l] >= 0; 653 } 654 for (int l = bucket; l < maxbucket; ++l) { 655 if (leaves[l] != 0) 656 throw GeographicLib::GeographicErr("Bad leaf data"); 657 } 658 } 659 } 660 661 #if defined(GEOGRAPHICLIB_HAVE_BOOST_SERIALIZATION) && \ 662 GEOGRAPHICLIB_HAVE_BOOST_SERIALIZATION 663 friend class boost::serialization::access; 664 template<class Archive> save(Archive & ar,const unsigned int) const665 void save(Archive& ar, const unsigned int) const { 666 ar & boost::serialization::make_nvp("index", index); 667 if (index < 0) 668 ar & boost::serialization::make_nvp("leaves", leaves); 669 else 670 ar & boost::serialization::make_nvp("lower", data.lower) 671 & boost::serialization::make_nvp("upper", data.upper) 672 & boost::serialization::make_nvp("child", data.child); 673 } 674 template<class Archive> load(Archive & ar,const unsigned int)675 void load(Archive& ar, const unsigned int) { 676 ar & boost::serialization::make_nvp("index", index); 677 if (index < 0) 678 ar & boost::serialization::make_nvp("leaves", leaves); 679 else 680 ar & boost::serialization::make_nvp("lower", data.lower) 681 & boost::serialization::make_nvp("upper", data.upper) 682 & boost::serialization::make_nvp("child", data.child); 683 } 684 template<class Archive> serialize(Archive & ar,const unsigned int file_version)685 void serialize(Archive& ar, const unsigned int file_version) 686 { boost::serialization::split_member(ar, *this, file_version); } 687 #endif 688 }; 689 // \endcond 690 #if defined(GEOGRAPHICLIB_HAVE_BOOST_SERIALIZATION) && \ 691 GEOGRAPHICLIB_HAVE_BOOST_SERIALIZATION 692 friend class boost::serialization::access; save(Archive & ar,const unsigned) const693 template<class Archive> void save(Archive& ar, const unsigned) const { 694 int realspec = std::numeric_limits<dist_t>::digits * 695 (std::numeric_limits<dist_t>::is_integer ? -1 : 1); 696 // Need to use version1, otherwise load error in debug mode on Linux: 697 // undefined reference to GeographicLib::NearestNeighbor<...>::version. 698 int version1 = version; 699 ar & boost::serialization::make_nvp("version", version1) 700 & boost::serialization::make_nvp("realspec", realspec) 701 & boost::serialization::make_nvp("bucket", _bucket) 702 & boost::serialization::make_nvp("numpoints", _numpoints) 703 & boost::serialization::make_nvp("cost", _cost) 704 & boost::serialization::make_nvp("tree", _tree); 705 } load(Archive & ar,const unsigned)706 template<class Archive> void load(Archive& ar, const unsigned) { 707 int version1, realspec, bucket, numpoints, cost; 708 ar & boost::serialization::make_nvp("version", version1); 709 if (version1 != version) 710 throw GeographicLib::GeographicErr("Incompatible version"); 711 std::vector<Node> tree; 712 ar & boost::serialization::make_nvp("realspec", realspec); 713 if (!( realspec == std::numeric_limits<dist_t>::digits * 714 (std::numeric_limits<dist_t>::is_integer ? -1 : 1) )) 715 throw GeographicLib::GeographicErr("Different dist_t types"); 716 ar & boost::serialization::make_nvp("bucket", bucket); 717 if (!( 0 <= bucket && bucket <= maxbucket )) 718 throw GeographicLib::GeographicErr("Bad bucket size"); 719 ar & boost::serialization::make_nvp("numpoints", numpoints) 720 & boost::serialization::make_nvp("cost", cost) 721 & boost::serialization::make_nvp("tree", tree); 722 if (!( 0 <= int(tree.size()) && int(tree.size()) <= numpoints )) 723 throw 724 GeographicLib::GeographicErr("Bad number of points or tree size"); 725 for (int i = 0; i < int(tree.size()); ++i) 726 tree[i].Check(numpoints, int(tree.size()), bucket); 727 _tree.swap(tree); 728 _numpoints = numpoints; 729 _bucket = bucket; 730 _mc = _sc = 0; 731 _cost = cost; _c1 = _k = _cmax = 0; 732 _cmin = std::numeric_limits<int>::max(); 733 } 734 template<class Archive> serialize(Archive & ar,const unsigned int file_version)735 void serialize(Archive& ar, const unsigned int file_version) 736 { boost::serialization::split_member(ar, *this, file_version); } 737 #endif 738 739 int _numpoints, _bucket, _cost; 740 std::vector<Node> _tree; 741 // Counters to track stastistics on the cost of searches 742 mutable double _mc, _sc; 743 mutable int _c1, _k, _cmin, _cmax; 744 init(const std::vector<pos_t> & pts,const distfun_t & dist,int bucket,std::vector<Node> & tree,std::vector<item> & ids,int & cost,int l,int u,int vp)745 int init(const std::vector<pos_t>& pts, const distfun_t& dist, int bucket, 746 std::vector<Node>& tree, std::vector<item>& ids, int& cost, 747 int l, int u, int vp) { 748 749 if (u == l) 750 return -1; 751 Node node; 752 753 if (u - l > (bucket == 0 ? 1 : bucket)) { 754 755 // choose a vantage point and move it to the start 756 int i = vp; 757 std::swap(ids[l], ids[i]); 758 759 int m = (u + l + 1) / 2; 760 761 for (int k = l + 1; k < u; ++k) { 762 ids[k].first = dist(pts[ids[l].second], pts[ids[k].second]); 763 ++cost; 764 } 765 // partition around the median distance 766 std::nth_element(ids.begin() + l + 1, 767 ids.begin() + m, 768 ids.begin() + u); 769 node.index = ids[l].second; 770 if (m > l + 1) { // node.child[0] is possibly empty 771 typename std::vector<item>::iterator 772 t = std::min_element(ids.begin() + l + 1, ids.begin() + m); 773 node.data.lower[0] = t->first; 774 t = std::max_element(ids.begin() + l + 1, ids.begin() + m); 775 node.data.upper[0] = t->first; 776 // Use point with max distance as vantage point; this point act as a 777 // "corner" point and leads to a good partition. 778 node.data.child[0] = init(pts, dist, bucket, tree, ids, cost, 779 l + 1, m, int(t - ids.begin())); 780 } 781 typename std::vector<item>::iterator 782 t = std::max_element(ids.begin() + m, ids.begin() + u); 783 node.data.lower[1] = ids[m].first; 784 node.data.upper[1] = t->first; 785 // Use point with max distance as vantage point here too 786 node.data.child[1] = init(pts, dist, bucket, tree, ids, cost, 787 m, u, int(t - ids.begin())); 788 } else { 789 if (bucket == 0) 790 node.index = ids[l].second; 791 else { 792 node.index = -1; 793 // Sort the bucket entries so that the tree is independent of the 794 // implementation of nth_element. 795 std::sort(ids.begin() + l, ids.begin() + u); 796 for (int i = l; i < u; ++i) 797 node.leaves[i-l] = ids[i].second; 798 for (int i = u - l; i < bucket; ++i) 799 node.leaves[i] = -1; 800 for (int i = bucket; i < maxbucket; ++i) 801 node.leaves[i] = 0; 802 } 803 } 804 805 tree.push_back(node); 806 return int(tree.size()) - 1; 807 } 808 809 }; 810 811 } // namespace GeographicLib 812 813 namespace std { 814 815 /** 816 * Swap two GeographicLib::NearestNeighbor objects. 817 * 818 * @tparam dist_t the type used for measuring distances. 819 * @tparam pos_t the type for specifying the positions of points. 820 * @tparam distfun_t the type for a function object which calculates 821 * distances between points. 822 * @param[in,out] a the first GeographicLib::NearestNeighbor to swap. 823 * @param[in,out] b the second GeographicLib::NearestNeighbor to swap. 824 **********************************************************************/ 825 template <typename dist_t, typename pos_t, class distfun_t> swap(GeographicLib::NearestNeighbor<dist_t,pos_t,distfun_t> & a,GeographicLib::NearestNeighbor<dist_t,pos_t,distfun_t> & b)826 void swap(GeographicLib::NearestNeighbor<dist_t, pos_t, distfun_t>& a, 827 GeographicLib::NearestNeighbor<dist_t, pos_t, distfun_t>& b) { 828 a.swap(b); 829 } 830 831 } // namespace std 832 833 #if defined(_MSC_VER) 834 # pragma warning (pop) 835 #endif 836 837 #endif // GEOGRAPHICLIB_NEARESTNEIGHBOR_HPP 838