1 /**
2  * \file NearestNeighbor.hpp
3  * \brief Header for GeographicLib::NearestNeighbor class
4  *
5  * Copyright (c) Charles Karney (2016-2020) <charles@karney.com> and licensed
6  * under the MIT/X11 License.  For more information, see
7  * https://geographiclib.sourceforge.io/
8  **********************************************************************/
9 
10 #if !defined(GEOGRAPHICLIB_NEARESTNEIGHBOR_HPP)
11 #define GEOGRAPHICLIB_NEARESTNEIGHBOR_HPP 1
12 
13 #include <algorithm>            // for nth_element, max_element, etc.
14 #include <vector>
15 #include <queue>                // for priority_queue
16 #include <utility>              // for swap + pair
17 #include <cstring>
18 #include <limits>
19 #include <cmath>
20 #include <iostream>
21 #include <sstream>
22 // Only for GeographicLib::GeographicErr
23 #include <GeographicLib/Constants.hpp>
24 
25 #if defined(GEOGRAPHICLIB_HAVE_BOOST_SERIALIZATION) && \
26   GEOGRAPHICLIB_HAVE_BOOST_SERIALIZATION
27 #include <boost/serialization/nvp.hpp>
28 #include <boost/serialization/split_member.hpp>
29 #include <boost/serialization/array.hpp>
30 #include <boost/serialization/vector.hpp>
31 #endif
32 
33 #if defined(_MSC_VER)
34 // Squelch warnings about constant conditional expressions
35 #  pragma warning (push)
36 #  pragma warning (disable: 4127)
37 #endif
38 
39 namespace GeographicLib {
40 
41   /**
42    * \brief Nearest-neighbor calculations
43    *
44    * This class solves the nearest-neighbor problm using a vantage-point tree
45    * as described in \ref nearest.
46    *
47    * This class is templated so that it can handle arbitrary metric spaces as
48    * follows:
49    *
50    * @tparam dist_t the type used for measuring distances; it can be a real or
51    *   signed integer type; in typical geodetic applications, \e dist_t might
52    *   be <code>double</code>.
53    * @tparam pos_t the type for specifying the positions of points; geodetic
54    *   application might bundled the latitude and longitude into a
55    *   <code>std::pair<dist_t, dist_t></code>.
56    * @tparam distfun_t the type of a function object which takes takes two
57    *   positions (of type \e pos_t) and returns the distance (of type \e
58    *   dist_t); in geodetic applications, this might be a class which is
59    *   constructed with a Geodesic object and which implements a member
60    *   function with a signature <code>dist_t operator() (const pos_t&, const
61    *   pos_t&) const</code>, which returns the geodesic distance between two
62    *   points.
63    *
64    * \note The distance measure must satisfy the triangle inequality, \f$
65    * d(a,c) \le d(a,b) + d(b,c) \f$ for all points \e a, \e b, \e c.  The
66    * geodesic distance (given by Geodesic::Inverse) does, while the great
67    * ellipse distance and the rhumb line distance <i>do not</i>.  If you use
68    * the ordinary Euclidean distance, i.e., \f$ \sqrt{(x_a-x_b)^2 +
69    * (y_a-y_b)^2} \f$ for two dimensions, don't be tempted to leave out the
70    * square root in the interests of "efficiency"; the squared distance does
71    * not satisfy the triangle inequality!
72    *
73    * \note This is a "header-only" implementation and, as such, depends in a
74    * minimal way on the rest of GeographicLib (the only dependency is through
75    * the use of GeographicLib::GeographicErr for handling compile-time and
76    * run-time exceptions).  Therefore, it is easy to extract this class from
77    * the rest of GeographicLib and use it as a stand-alone facility.
78    *
79    * The \e dist_t type must support numeric_limits queries (specifically:
80    * is_signed, is_integer, max(), digits).
81    *
82    * The NearestNeighbor object is constructed with a vector of points (type \e
83    * pos_t) and a distance function (type \e distfun_t).  However the object
84    * does \e not store the points.  When querying the object with Search(),
85    * it's necessary to supply the same vector of points and the same distance
86    * function.
87    *
88    * There's no capability in this implementation to add or remove points from
89    * the set.  Instead Initialize() should be called to re-initialize the
90    * object with the modified vector of points.
91    *
92    * Because of the overhead in constructing a NearestNeighbor object for a
93    * large set of points, functions Save() and Load() are provided to save the
94    * object to an external file.  operator<<(), operator>>() and <a
95    * href="https://www.boost.org/libs/serialization/doc"> Boost
96    * serialization</a> can also be used to save and restore a NearestNeighbor
97    * object.  This is illustrated in the example.
98    *
99    * Example of use:
100    * \include example-NearestNeighbor.cpp
101    **********************************************************************/
102   template <typename dist_t, typename pos_t, class distfun_t>
103   class NearestNeighbor {
104     // For tracking changes to the I/O format
105     static const int version = 1;
106     // This is what we get "free"; but if sizeof(dist_t) = 1 (unlikely), allow
107     // 4 slots (and this accommodates the default value bucket = 4).
108     static const int maxbucket =
109       (2 + ((4 * sizeof(dist_t)) / sizeof(int) >= 2 ?
110             (4 * sizeof(dist_t)) / sizeof(int) : 2));
111   public:
112 
113     /**
114      * Default constructor for NearestNeighbor.
115      *
116      * This is equivalent to specifying an empty set of points.
117      **********************************************************************/
NearestNeighbor()118     NearestNeighbor() : _numpoints(0), _bucket(0), _cost(0) {}
119 
120     /**
121      * Constructor for NearestNeighbor.
122      *
123      * @param[in] pts a vector of points to include in the set.
124      * @param[in] dist the distance function object.
125      * @param[in] bucket the size of the buckets at the leaf nodes; this must
126      *   lie in [0, 2 + 4*sizeof(dist_t)/sizeof(int)] (default 4).
127      * @exception GeographicErr if the value of \e bucket is out of bounds or
128      *   the size of \e pts is too big for an int.
129      * @exception std::bad_alloc if memory for the tree can't be allocated.
130      *
131      * \e pts may contain coincident points (i.e., the distance between them
132      * vanishes); these are treated as distinct.
133      *
134      * The choice of \e bucket is a tradeoff between space and efficiency.  A
135      * larger \e bucket decreases the size of the NearestNeighbor object which
136      * scales as pts.size() / max(1, bucket) and reduces the number of distance
137      * calculations to construct the object by log2(bucket) * pts.size().
138      * However each search then requires about bucket additional distance
139      * calculations.
140      *
141      * \warning The distances computed by \e dist must satisfy the standard
142      * metric conditions.  If not, the results are undefined.  Neither the data
143      * in \e pts nor the query points should contain NaNs or infinities because
144      * such data violates the metric conditions.
145      *
146      * \warning The same arguments \e pts and \e dist must be provided
147      * to the Search() function.
148      **********************************************************************/
NearestNeighbor(const std::vector<pos_t> & pts,const distfun_t & dist,int bucket=4)149     NearestNeighbor(const std::vector<pos_t>& pts, const distfun_t& dist,
150                     int bucket = 4) {
151       Initialize(pts, dist, bucket);
152     }
153 
154     /**
155      * Initialize or re-initialize NearestNeighbor.
156      *
157      * @param[in] pts a vector of points to include in the tree.
158      * @param[in] dist the distance function object.
159      * @param[in] bucket the size of the buckets at the leaf nodes; this must
160      *   lie in [0, 2 + 4*sizeof(dist_t)/sizeof(int)] (default 4).
161      * @exception GeographicErr if the value of \e bucket is out of bounds or
162      *   the size of \e pts is too big for an int.
163      * @exception std::bad_alloc if memory for the tree can't be allocated.
164      *
165      * See also the documentation on the constructor.
166      *
167      * If an exception is thrown, the state of the NearestNeighbor is
168      * unchanged.
169      **********************************************************************/
Initialize(const std::vector<pos_t> & pts,const distfun_t & dist,int bucket=4)170     void Initialize(const std::vector<pos_t>& pts, const distfun_t& dist,
171                     int bucket = 4) {
172       static_assert(std::numeric_limits<dist_t>::is_signed,
173                     "dist_t must be a signed type");
174       if (!( 0 <= bucket && bucket <= maxbucket ))
175         throw GeographicLib::GeographicErr
176           ("bucket must lie in [0, 2 + 4*sizeof(dist_t)/sizeof(int)]");
177       if (pts.size() > size_t(std::numeric_limits<int>::max()))
178         throw GeographicLib::GeographicErr("pts array too big");
179       // the pair contains distance+id
180       std::vector<item> ids(pts.size());
181       for (int k = int(ids.size()); k--;)
182         ids[k] = std::make_pair(dist_t(0), k);
183       int cost = 0;
184       std::vector<Node> tree;
185       init(pts, dist, bucket, tree, ids, cost,
186            0, int(ids.size()), int(ids.size()/2));
187       _tree.swap(tree);
188       _numpoints = int(pts.size());
189       _bucket = bucket;
190       _mc = _sc = 0;
191       _cost = cost; _c1 = _k = _cmax = 0;
192       _cmin = std::numeric_limits<int>::max();
193     }
194 
195     /**
196      * Search the NearestNeighbor.
197      *
198      * @param[in] pts the vector of points used for initialization.
199      * @param[in] dist the distance function object used for initialization.
200      * @param[in] query the query point.
201      * @param[out] ind a vector of indices to the closest points found.
202      * @param[in] k the number of points to search for (default = 1).
203      * @param[in] maxdist only return points with distances of \e maxdist or
204      *   less from \e query (default is the maximum \e dist_t).
205      * @param[in] mindist only return points with distances of more than
206      *   \e mindist from \e query (default = &minus;1).
207      * @param[in] exhaustive whether to do an exhaustive search (default true).
208      * @param[in] tol the tolerance on the results (default 0).
209      * @return the distance to the closest point found (&minus;1 if no points
210      *   are found).
211      * @exception GeographicErr if \e pts has a different size from that used
212      *   to construct the object.
213      *
214      * The indices returned in \e ind are sorted by distance from \e query
215      * (closest first).
216      *
217      * The simplest invocation is with just the 4 non-optional arguments.  This
218      * returns the closest distance and the index to the closest point in
219      * <i>ind</i><sub>0</sub>.  If there are several points equally close, then
220      * <i>ind</i><sub>0</sub> gives the index of an arbirary one of them.  If
221      * there's no closest point (because the set of points is empty), then \e
222      * ind is empty and &minus;1 is returned.
223      *
224      * With \e exhaustive = true and \e tol = 0 (their default values), this
225      * finds the indices of \e k closest neighbors to \e query whose distances
226      * to \e query are in (\e mindist, \e maxdist].  If \e mindist and \e
227      * maxdist have their default values, then these bounds have no effect.  If
228      * \e query is one of the points in the tree, then set \e mindist = 0 to
229      * prevent this point (and other coincident points) from being returned.
230      *
231      * If \e exhaustive = false, exit as soon as \e k results satisfying the
232      * distance criteria are found.  If less than \e k results are returned
233      * then the search was exhaustive even if \e exhaustive = false.
234      *
235      * If \e tol is positive, do an approximate search; in this case the
236      * results are to be interpreted as follows: if the <i>k</i>'th distance is
237      * \e dk, then all results with distances less than or equal \e dk &minus;
238      * \e tol are correct; all others are suspect &mdash; there may be other
239      * closer results with distances greater or equal to \e dk &minus; \e tol.
240      * If less than \e k results are found, then the search is exact.
241      *
242      * \e mindist should be used to exclude a "small" neighborhood of the query
243      * point (relative to the average spacing of the data).  If \e mindist is
244      * large, the efficiency of the search deteriorates.
245      *
246      * \note Only the shortest distance is returned (as as the function value).
247      * The distances to other points (indexed by <i>ind</i><sub><i>j</i></sub>
248      * for \e j > 0) can be found by invoking \e dist again.
249      *
250      * \warning The arguments \e pts and \e dist must be identical to those
251      * used to initialize the NearestNeighbor; if not, this function will
252      * return some meaningless result (however, if the size of \e pts is wrong,
253      * this function throw an exception).
254      *
255      * \warning The query point cannot be a NaN or infinite because then the
256      * metric conditions are violated.
257      **********************************************************************/
Search(const std::vector<pos_t> & pts,const distfun_t & dist,const pos_t & query,std::vector<int> & ind,int k=1,dist_t maxdist=std::numeric_limits<dist_t>::max (),dist_t mindist=-1,bool exhaustive=true,dist_t tol=0) const258     dist_t Search(const std::vector<pos_t>& pts, const distfun_t& dist,
259                   const pos_t& query,
260                   std::vector<int>& ind,
261                   int k = 1,
262                   dist_t maxdist = std::numeric_limits<dist_t>::max(),
263                   dist_t mindist = -1,
264                   bool exhaustive = true,
265                   dist_t tol = 0) const {
266       if (_numpoints != int(pts.size()))
267           throw GeographicLib::GeographicErr("pts array has wrong size");
268       std::priority_queue<item> results;
269       if (_numpoints > 0 && k > 0 && maxdist > mindist) {
270         // distance to the kth closest point so far
271         dist_t tau = maxdist;
272         // first is negative of how far query is outside boundary of node
273         // +1 if on boundary or inside
274         // second is node index
275         std::priority_queue<item> todo;
276         todo.push(std::make_pair(dist_t(1), int(_tree.size()) - 1));
277         int c = 0;
278         while (!todo.empty()) {
279           int n = todo.top().second;
280           dist_t d = -todo.top().first;
281           todo.pop();
282           dist_t tau1 = tau - tol;
283           // compare tau and d again since tau may have become smaller.
284           if (!( n >= 0 && tau1 >= d )) continue;
285           const Node& current = _tree[n];
286           dist_t dst = 0;   // to suppress warning about uninitialized variable
287           bool exitflag = false, leaf = current.index < 0;
288           for (int i = 0; i < (leaf ? _bucket : 1); ++i) {
289             int index = leaf ? current.leaves[i] : current.index;
290             if (index < 0) break;
291             dst = dist(pts[index], query);
292             ++c;
293 
294             if (dst > mindist && dst <= tau) {
295               if (int(results.size()) == k) results.pop();
296               results.push(std::make_pair(dst, index));
297               if (int(results.size()) == k) {
298                 if (exhaustive)
299                   tau = results.top().first;
300                 else {
301                   exitflag = true;
302                   break;
303                 }
304                 if (tau <= tol) {
305                   exitflag = true;
306                   break;
307                 }
308               }
309             }
310           }
311           if (exitflag) break;
312 
313           if (current.index < 0) continue;
314           tau1 = tau - tol;
315           for (int l = 0; l < 2; ++l) {
316             if (current.data.child[l] >= 0 &&
317                 dst + current.data.upper[l] >= mindist) {
318               if (dst < current.data.lower[l]) {
319                 d = current.data.lower[l] - dst;
320                 if (tau1 >= d)
321                   todo.push(std::make_pair(-d, current.data.child[l]));
322               } else if (dst > current.data.upper[l]) {
323                 d = dst - current.data.upper[l];
324                 if (tau1 >= d)
325                   todo.push(std::make_pair(-d, current.data.child[l]));
326               } else
327                 todo.push(std::make_pair(dist_t(1), current.data.child[l]));
328             }
329           }
330         }
331         ++_k;
332         _c1 += c;
333         double omc = _mc;
334         _mc += (c - omc) / _k;
335         _sc += (c - omc) * (c - _mc);
336         if (c > _cmax) _cmax = c;
337         if (c < _cmin) _cmin = c;
338       }
339 
340       dist_t d = -1;
341       ind.resize(results.size());
342 
343       for (int i = int(ind.size()); i--;) {
344         ind[i] = int(results.top().second);
345         if (i == 0) d = results.top().first;
346         results.pop();
347       }
348       return d;
349 
350     }
351 
352     /**
353      * @return the total number of points in the set.
354      **********************************************************************/
NumPoints() const355     int NumPoints() const { return _numpoints; }
356 
357     /**
358      * Write the object to an I/O stream.
359      *
360      * @param[in,out] os the stream to write to.
361      * @param[in] bin if true (the default) save in binary mode.
362      * @exception std::bad_alloc if memory for the string representation of the
363      *   object can't be allocated.
364      *
365      * The counters tracking the statistics of searches are not saved; however
366      * the initializtion cost is saved.  The format of the binary saves is \e
367      * not portable.
368      *
369      * \note <a href="https://www.boost.org/libs/serialization/doc">
370      * Boost serialization</a> can also be used to save and restore a
371      * NearestNeighbor object.  This requires that the
372      * GEOGRAPHICLIB_HAVE_BOOST_SERIALIZATION macro be defined.
373      **********************************************************************/
Save(std::ostream & os,bool bin=true) const374     void Save(std::ostream& os, bool bin = true) const {
375       int realspec = std::numeric_limits<dist_t>::digits *
376         (std::numeric_limits<dist_t>::is_integer ? -1 : 1);
377       if (bin) {
378         char id[] = "NearestNeighbor_";
379         os.write(id, 16);
380         int buf[6];
381         buf[0] = version;
382         buf[1] = realspec;
383         buf[2] = _bucket;
384         buf[3] = _numpoints;
385         buf[4] = int(_tree.size());
386         buf[5] = _cost;
387         os.write(reinterpret_cast<const char *>(buf), 6 * sizeof(int));
388         for (int i = 0; i < int(_tree.size()); ++i) {
389           const Node& node = _tree[i];
390           os.write(reinterpret_cast<const char *>(&node.index), sizeof(int));
391           if (node.index >= 0) {
392             os.write(reinterpret_cast<const char *>(node.data.lower),
393                      2 * sizeof(dist_t));
394             os.write(reinterpret_cast<const char *>(node.data.upper),
395                      2 * sizeof(dist_t));
396             os.write(reinterpret_cast<const char *>(node.data.child),
397                      2 * sizeof(int));
398           } else {
399             os.write(reinterpret_cast<const char *>(node.leaves),
400                      _bucket * sizeof(int));
401           }
402         }
403       } else {
404         std::stringstream ostring;
405           // Ensure enough precision for type dist_t.  With C++11, max_digits10
406           // can be used instead.
407         if (!std::numeric_limits<dist_t>::is_integer) {
408           static const int prec
409             = int(std::ceil(std::numeric_limits<dist_t>::digits *
410                             std::log10(2.0) + 1));
411           ostring.precision(prec);
412         }
413         ostring << version << " " << realspec << " " << _bucket << " "
414                 << _numpoints << " " << _tree.size() << " " << _cost;
415         for (int i = 0; i < int(_tree.size()); ++i) {
416           const Node& node = _tree[i];
417           ostring << "\n" << node.index;
418           if (node.index >= 0) {
419             for (int l = 0; l < 2; ++l)
420               ostring << " " << node.data.lower[l] << " " << node.data.upper[l]
421                       << " " << node.data.child[l];
422           } else {
423             for (int l = 0; l < _bucket; ++l)
424               ostring << " " << node.leaves[l];
425           }
426         }
427         os << ostring.str();
428       }
429     }
430 
431     /**
432      * Read the object from an I/O stream.
433      *
434      * @param[in,out] is the stream to read from
435      * @param[in] bin if true (the default) load in binary mode.
436      * @exception GeographicErr if the state read from \e is is illegal.
437      * @exception std::bad_alloc if memory for the tree can't be allocated.
438      *
439      * The counters tracking the statistics of searches are reset by this
440      * operation.  Binary data must have been saved on a machine with the same
441      * architecture.  If an exception is thrown, the state of the
442      * NearestNeighbor is unchanged.
443      *
444      * \note <a href="https://www.boost.org/libs/serialization/doc">
445      * Boost serialization</a> can also be used to save and restore a
446      * NearestNeighbor object.  This requires that the
447      * GEOGRAPHICLIB_HAVE_BOOST_SERIALIZATION macro be defined.
448      *
449      * \warning The same arguments \e pts and \e dist used for
450      * initialization must be provided to the Search() function.
451      **********************************************************************/
Load(std::istream & is,bool bin=true)452     void Load(std::istream& is, bool bin = true) {
453       int version1, realspec, bucket, numpoints, treesize, cost;
454       if (bin) {
455         char id[17];
456         is.read(id, 16);
457         id[16] = '\0';
458         if (!(std::strcmp(id, "NearestNeighbor_") == 0))
459           throw GeographicLib::GeographicErr("Bad ID");
460         is.read(reinterpret_cast<char *>(&version1), sizeof(int));
461         is.read(reinterpret_cast<char *>(&realspec), sizeof(int));
462         is.read(reinterpret_cast<char *>(&bucket), sizeof(int));
463         is.read(reinterpret_cast<char *>(&numpoints), sizeof(int));
464         is.read(reinterpret_cast<char *>(&treesize), sizeof(int));
465         is.read(reinterpret_cast<char *>(&cost), sizeof(int));
466       } else {
467         if (!( is >> version1 >> realspec >> bucket >> numpoints >> treesize
468                >> cost ))
469           throw GeographicLib::GeographicErr("Bad header");
470       }
471       if (!( version1 == version ))
472         throw GeographicLib::GeographicErr("Incompatible version");
473       if (!( realspec == std::numeric_limits<dist_t>::digits *
474              (std::numeric_limits<dist_t>::is_integer ? -1 : 1) ))
475         throw GeographicLib::GeographicErr("Different dist_t types");
476       if (!( 0 <= bucket && bucket <= maxbucket ))
477         throw GeographicLib::GeographicErr("Bad bucket size");
478       if (!( 0 <= treesize && treesize <= numpoints ))
479         throw
480           GeographicLib::GeographicErr("Bad number of points or tree size");
481       if (!( 0 <= cost ))
482         throw GeographicLib::GeographicErr("Bad value for cost");
483       std::vector<Node> tree;
484       tree.reserve(treesize);
485       for (int i = 0; i < treesize; ++i) {
486         Node node;
487         if (bin) {
488           is.read(reinterpret_cast<char *>(&node.index), sizeof(int));
489           if (node.index >= 0) {
490             is.read(reinterpret_cast<char *>(node.data.lower),
491                     2 * sizeof(dist_t));
492             is.read(reinterpret_cast<char *>(node.data.upper),
493                     2 * sizeof(dist_t));
494             is.read(reinterpret_cast<char *>(node.data.child),
495                     2 * sizeof(int));
496           } else {
497             is.read(reinterpret_cast<char *>(node.leaves),
498                     bucket * sizeof(int));
499             for (int l = bucket; l < maxbucket; ++l)
500               node.leaves[l] = 0;
501           }
502         } else {
503           if (!( is >> node.index ))
504             throw GeographicLib::GeographicErr("Bad index");
505           if (node.index >= 0) {
506             for (int l = 0; l < 2; ++l) {
507               if (!( is >> node.data.lower[l] >> node.data.upper[l]
508                      >> node.data.child[l] ))
509                 throw GeographicLib::GeographicErr("Bad node data");
510             }
511           } else {
512             // Must be at least one valid leaf followed by a sequence end
513             // markers (-1).
514             for (int l = 0; l < bucket; ++l) {
515               if (!( is >> node.leaves[l] ))
516                 throw GeographicLib::GeographicErr("Bad leaf data");
517             }
518             for (int l = bucket; l < maxbucket; ++l)
519               node.leaves[l] = 0;
520           }
521         }
522         node.Check(numpoints, treesize, bucket);
523         tree.push_back(node);
524       }
525       _tree.swap(tree);
526       _numpoints = numpoints;
527       _bucket = bucket;
528       _mc = _sc = 0;
529       _cost = cost; _c1 = _k = _cmax = 0;
530       _cmin = std::numeric_limits<int>::max();
531     }
532 
533     /**
534      * Write the object to stream \e os as text.
535      *
536      * @param[in,out] os the output stream.
537      * @param[in] t the NearestNeighbor object to be saved.
538      * @exception std::bad_alloc if memory for the string representation of the
539      *   object can't be allocated.
540      **********************************************************************/
operator <<(std::ostream & os,const NearestNeighbor & t)541     friend std::ostream& operator<<(std::ostream& os, const NearestNeighbor& t)
542     { t.Save(os, false); return os; }
543 
544     /**
545      * Read the object from stream \e is as text.
546      *
547      * @param[in,out] is the input stream.
548      * @param[out] t the NearestNeighbor object to be loaded.
549      * @exception GeographicErr if the state read from \e is is illegal.
550      * @exception std::bad_alloc if memory for the tree can't be allocated.
551      **********************************************************************/
operator >>(std::istream & is,NearestNeighbor & t)552     friend std::istream& operator>>(std::istream& is, NearestNeighbor& t)
553     { t.Load(is, false); return is; }
554 
555     /**
556      * Swap with another NearestNeighbor object.
557      *
558      * @param[in,out] t the NearestNeighbor object to swap with.
559      **********************************************************************/
swap(NearestNeighbor & t)560     void swap(NearestNeighbor& t) {
561       std::swap(_numpoints, t._numpoints);
562       std::swap(_bucket, t._bucket);
563       std::swap(_cost, t._cost);
564       _tree.swap(t._tree);
565       std::swap(_mc, t._mc);
566       std::swap(_sc, t._sc);
567       std::swap(_c1, t._c1);
568       std::swap(_k, t._k);
569       std::swap(_cmin, t._cmin);
570       std::swap(_cmax, t._cmax);
571     }
572 
573     /**
574      * The accumulated statistics on the searches so far.
575      *
576      * @param[out] setupcost the cost of initializing the NearestNeighbor.
577      * @param[out] numsearches the number of calls to Search().
578      * @param[out] searchcost the total cost of the calls to Search().
579      * @param[out] mincost the minimum cost of a Search().
580      * @param[out] maxcost the maximum cost of a Search().
581      * @param[out] mean the mean cost of a Search().
582      * @param[out] sd the standard deviation in the cost of a Search().
583      *
584      * Here "cost" measures the number of distance calculations needed.  Note
585      * that the accumulation of statistics is \e not thread safe.
586      **********************************************************************/
Statistics(int & setupcost,int & numsearches,int & searchcost,int & mincost,int & maxcost,double & mean,double & sd) const587     void Statistics(int& setupcost, int& numsearches, int& searchcost,
588                     int& mincost, int& maxcost,
589                     double& mean, double& sd) const {
590       setupcost = _cost; numsearches = _k; searchcost = _c1;
591       mincost = _cmin; maxcost = _cmax;
592       mean = _mc; sd = std::sqrt(_sc / (_k - 1));
593     }
594 
595     /**
596      * Reset the counters for the accumulated statistics on the searches so
597      * far.
598      **********************************************************************/
ResetStatistics() const599     void ResetStatistics() const {
600       _mc = _sc = 0;
601       _c1 = _k = _cmax = 0;
602       _cmin = std::numeric_limits<int>::max();
603     }
604 
605   private:
606     // Package up a dist_t and an int.  We will want to sort on the dist_t so
607     // put it first.
608     typedef std::pair<dist_t, int> item;
609     // \cond SKIP
610     class Node {
611     public:
612       struct bounds {
613         dist_t lower[2], upper[2]; // bounds on inner/outer distances
614         int child[2];
615       };
616       union {
617         bounds data;
618         int leaves[maxbucket];
619       };
620       int index;
621 
Node()622       Node()
623         : index(-1)
624       {
625         for (int i = 0; i < 2; ++i) {
626           data.lower[i] = data.upper[i] = 0;
627           data.child[i] = -1;
628         }
629       }
630 
631       // Sanity check on a Node
Check(int numpoints,int treesize,int bucket) const632       void Check(int numpoints, int treesize, int bucket) const {
633         if (!( -1 <= index && index < numpoints ))
634           throw GeographicLib::GeographicErr("Bad index");
635         if (index >= 0) {
636           if (!( -1 <= data.child[0] && data.child[0] < treesize &&
637                  -1 <= data.child[1] && data.child[1] < treesize ))
638             throw GeographicLib::GeographicErr("Bad child pointers");
639           if (!( 0 <= data.lower[0] && data.lower[0] <= data.upper[0] &&
640                  data.upper[0] <= data.lower[1] &&
641                  data.lower[1] <= data.upper[1] ))
642             throw GeographicLib::GeographicErr("Bad bounds");
643         } else {
644           // Must be at least one valid leaf followed by a sequence end markers
645           // (-1).
646           bool start = true;
647           for (int l = 0; l < bucket; ++l) {
648             if (!( (start ?
649                     ((l == 0 ? 0 : -1) <= leaves[l] && leaves[l] < numpoints) :
650                     leaves[l] == -1) ))
651               throw GeographicLib::GeographicErr("Bad leaf data");
652             start = leaves[l] >= 0;
653           }
654           for (int l = bucket; l < maxbucket; ++l) {
655             if (leaves[l] != 0)
656               throw GeographicLib::GeographicErr("Bad leaf data");
657           }
658         }
659       }
660 
661 #if defined(GEOGRAPHICLIB_HAVE_BOOST_SERIALIZATION) && \
662   GEOGRAPHICLIB_HAVE_BOOST_SERIALIZATION
663       friend class boost::serialization::access;
664       template<class Archive>
save(Archive & ar,const unsigned int) const665       void save(Archive& ar, const unsigned int) const {
666         ar & boost::serialization::make_nvp("index", index);
667         if (index < 0)
668           ar & boost::serialization::make_nvp("leaves", leaves);
669         else
670           ar & boost::serialization::make_nvp("lower", data.lower)
671             & boost::serialization::make_nvp("upper", data.upper)
672             & boost::serialization::make_nvp("child", data.child);
673       }
674       template<class Archive>
load(Archive & ar,const unsigned int)675       void load(Archive& ar, const unsigned int) {
676         ar & boost::serialization::make_nvp("index", index);
677         if (index < 0)
678           ar & boost::serialization::make_nvp("leaves", leaves);
679         else
680           ar & boost::serialization::make_nvp("lower", data.lower)
681             & boost::serialization::make_nvp("upper", data.upper)
682             & boost::serialization::make_nvp("child", data.child);
683       }
684       template<class Archive>
serialize(Archive & ar,const unsigned int file_version)685       void serialize(Archive& ar, const unsigned int file_version)
686       { boost::serialization::split_member(ar, *this, file_version); }
687 #endif
688     };
689     // \endcond
690 #if defined(GEOGRAPHICLIB_HAVE_BOOST_SERIALIZATION) && \
691   GEOGRAPHICLIB_HAVE_BOOST_SERIALIZATION
692     friend class boost::serialization::access;
save(Archive & ar,const unsigned) const693     template<class Archive> void save(Archive& ar, const unsigned) const {
694       int realspec = std::numeric_limits<dist_t>::digits *
695         (std::numeric_limits<dist_t>::is_integer ? -1 : 1);
696       // Need to use version1, otherwise load error in debug mode on Linux:
697       // undefined reference to GeographicLib::NearestNeighbor<...>::version.
698       int version1 = version;
699       ar & boost::serialization::make_nvp("version", version1)
700         & boost::serialization::make_nvp("realspec", realspec)
701         & boost::serialization::make_nvp("bucket", _bucket)
702         & boost::serialization::make_nvp("numpoints", _numpoints)
703         & boost::serialization::make_nvp("cost", _cost)
704         & boost::serialization::make_nvp("tree", _tree);
705     }
load(Archive & ar,const unsigned)706     template<class Archive> void load(Archive& ar, const unsigned) {
707       int version1, realspec, bucket, numpoints, cost;
708       ar & boost::serialization::make_nvp("version", version1);
709       if (version1 != version)
710         throw GeographicLib::GeographicErr("Incompatible version");
711       std::vector<Node> tree;
712       ar & boost::serialization::make_nvp("realspec", realspec);
713       if (!( realspec == std::numeric_limits<dist_t>::digits *
714              (std::numeric_limits<dist_t>::is_integer ? -1 : 1) ))
715         throw GeographicLib::GeographicErr("Different dist_t types");
716       ar & boost::serialization::make_nvp("bucket", bucket);
717       if (!( 0 <= bucket && bucket <= maxbucket ))
718         throw GeographicLib::GeographicErr("Bad bucket size");
719       ar & boost::serialization::make_nvp("numpoints", numpoints)
720         & boost::serialization::make_nvp("cost", cost)
721         & boost::serialization::make_nvp("tree", tree);
722       if (!( 0 <= int(tree.size()) && int(tree.size()) <= numpoints ))
723         throw
724           GeographicLib::GeographicErr("Bad number of points or tree size");
725       for (int i = 0; i < int(tree.size()); ++i)
726         tree[i].Check(numpoints, int(tree.size()), bucket);
727       _tree.swap(tree);
728       _numpoints = numpoints;
729       _bucket = bucket;
730       _mc = _sc = 0;
731       _cost = cost; _c1 = _k = _cmax = 0;
732       _cmin = std::numeric_limits<int>::max();
733     }
734     template<class Archive>
serialize(Archive & ar,const unsigned int file_version)735     void serialize(Archive& ar, const unsigned int file_version)
736     { boost::serialization::split_member(ar, *this, file_version); }
737 #endif
738 
739     int _numpoints, _bucket, _cost;
740     std::vector<Node> _tree;
741     // Counters to track stastistics on the cost of searches
742     mutable double _mc, _sc;
743     mutable int _c1, _k, _cmin, _cmax;
744 
init(const std::vector<pos_t> & pts,const distfun_t & dist,int bucket,std::vector<Node> & tree,std::vector<item> & ids,int & cost,int l,int u,int vp)745     int init(const std::vector<pos_t>& pts, const distfun_t& dist, int bucket,
746              std::vector<Node>& tree, std::vector<item>& ids, int& cost,
747              int l, int u, int vp) {
748 
749       if (u == l)
750         return -1;
751       Node node;
752 
753       if (u - l > (bucket == 0 ? 1 : bucket)) {
754 
755         // choose a vantage point and move it to the start
756         int i = vp;
757         std::swap(ids[l], ids[i]);
758 
759         int m = (u + l + 1) / 2;
760 
761         for (int k = l + 1; k < u; ++k) {
762           ids[k].first = dist(pts[ids[l].second], pts[ids[k].second]);
763           ++cost;
764         }
765         // partition around the median distance
766         std::nth_element(ids.begin() + l + 1,
767                          ids.begin() + m,
768                          ids.begin() + u);
769         node.index = ids[l].second;
770         if (m > l + 1) {        // node.child[0] is possibly empty
771           typename std::vector<item>::iterator
772             t = std::min_element(ids.begin() + l + 1, ids.begin() + m);
773           node.data.lower[0] = t->first;
774           t = std::max_element(ids.begin() + l + 1, ids.begin() + m);
775           node.data.upper[0] = t->first;
776           // Use point with max distance as vantage point; this point act as a
777           // "corner" point and leads to a good partition.
778           node.data.child[0] = init(pts, dist, bucket, tree, ids, cost,
779                                     l + 1, m, int(t - ids.begin()));
780         }
781         typename std::vector<item>::iterator
782           t = std::max_element(ids.begin() + m, ids.begin() + u);
783         node.data.lower[1] = ids[m].first;
784         node.data.upper[1] = t->first;
785         // Use point with max distance as vantage point here too
786         node.data.child[1] = init(pts, dist, bucket, tree, ids, cost,
787                                   m, u, int(t - ids.begin()));
788       } else {
789         if (bucket == 0)
790           node.index = ids[l].second;
791         else {
792           node.index = -1;
793           // Sort the bucket entries so that the tree is independent of the
794           // implementation of nth_element.
795           std::sort(ids.begin() + l, ids.begin() + u);
796           for (int i = l; i < u; ++i)
797             node.leaves[i-l] = ids[i].second;
798           for (int i = u - l; i < bucket; ++i)
799             node.leaves[i] = -1;
800           for (int i = bucket; i < maxbucket; ++i)
801             node.leaves[i] = 0;
802         }
803       }
804 
805       tree.push_back(node);
806       return int(tree.size()) - 1;
807     }
808 
809   };
810 
811 } // namespace GeographicLib
812 
813 namespace std {
814 
815   /**
816    * Swap two GeographicLib::NearestNeighbor objects.
817    *
818    * @tparam dist_t the type used for measuring distances.
819    * @tparam pos_t the type for specifying the positions of points.
820    * @tparam distfun_t the type for a function object which calculates
821    *   distances between points.
822    * @param[in,out] a the first GeographicLib::NearestNeighbor to swap.
823    * @param[in,out] b the second GeographicLib::NearestNeighbor to swap.
824    **********************************************************************/
825   template <typename dist_t, typename pos_t, class distfun_t>
swap(GeographicLib::NearestNeighbor<dist_t,pos_t,distfun_t> & a,GeographicLib::NearestNeighbor<dist_t,pos_t,distfun_t> & b)826   void swap(GeographicLib::NearestNeighbor<dist_t, pos_t, distfun_t>& a,
827             GeographicLib::NearestNeighbor<dist_t, pos_t, distfun_t>& b) {
828     a.swap(b);
829   }
830 
831 } // namespace std
832 
833 #if defined(_MSC_VER)
834 #  pragma warning (pop)
835 #endif
836 
837 #endif  // GEOGRAPHICLIB_NEARESTNEIGHBOR_HPP
838