1 //---------------------------------------------------------------------- 2 // File: ANN.h 3 // Programmer: Sunil Arya and David Mount 4 // Description: Basic include file for approximate nearest 5 // neighbor searching. 6 // Last modified: 01/27/10 (Version 1.1.2) 7 //---------------------------------------------------------------------- 8 // Copyright (c) 1997-2010 University of Maryland and Sunil Arya and 9 // David Mount. All Rights Reserved. 10 // 11 // This software and related documentation is part of the Approximate 12 // Nearest Neighbor Library (ANN). This software is provided under 13 // the provisions of the Lesser GNU Public License (LGPL). See the 14 // file ../ReadMe.txt for further information. 15 // 16 // The University of Maryland (U.M.) and the authors make no 17 // representations about the suitability or fitness of this software for 18 // any purpose. It is provided "as is" without express or implied 19 // warranty. 20 //---------------------------------------------------------------------- 21 // History: 22 // Revision 0.1 03/04/98 23 // Initial release 24 // Revision 1.0 04/01/05 25 // Added copyright and revision information 26 // Added ANNcoordPrec for coordinate precision. 27 // Added methods theDim, nPoints, maxPoints, thePoints to ANNpointSet. 28 // Cleaned up C++ structure for modern compilers 29 // Revision 1.1 05/03/05 30 // Added fixed-radius k-NN searching 31 // Revision 1.1.2 01/27/10 32 // Fixed minor compilation bugs for new versions of gcc 33 //---------------------------------------------------------------------- 34 35 //---------------------------------------------------------------------- 36 // ANN - approximate nearest neighbor searching 37 // ANN is a library for approximate nearest neighbor searching, 38 // based on the use of standard and priority search in kd-trees 39 // and balanced box-decomposition (bbd) trees. Here are some 40 // references to the main algorithmic techniques used here: 41 // 42 // kd-trees: 43 // Friedman, Bentley, and Finkel, ``An algorithm for finding 44 // best matches in logarithmic expected time,'' ACM 45 // Transactions on Mathematical Software, 3(3):209-226, 1977. 46 // 47 // Priority search in kd-trees: 48 // Arya and Mount, ``Algorithms for fast vector quantization,'' 49 // Proc. of DCC '93: Data Compression Conference, eds. J. A. 50 // Storer and M. Cohn, IEEE Press, 1993, 381-390. 51 // 52 // Approximate nearest neighbor search and bbd-trees: 53 // Arya, Mount, Netanyahu, Silverman, and Wu, ``An optimal 54 // algorithm for approximate nearest neighbor searching,'' 55 // 5th Ann. ACM-SIAM Symposium on Discrete Algorithms, 56 // 1994, 573-582. 57 //---------------------------------------------------------------------- 58 59 #ifndef ANN_H 60 #define ANN_H 61 62 #ifdef WIN32 63 //---------------------------------------------------------------------- 64 // For Microsoft Visual C++, externally accessible symbols must be 65 // explicitly indicated with DLL_API, which is somewhat like "extern." 66 // 67 // The following ifdef block is the standard way of creating macros 68 // which make exporting from a DLL simpler. All files within this DLL 69 // are compiled with the DLL_EXPORTS preprocessor symbol defined on the 70 // command line. In contrast, projects that use (or import) the DLL 71 // objects do not define the DLL_EXPORTS symbol. This way any other 72 // project whose source files include this file see DLL_API functions as 73 // being imported from a DLL, wheras this DLL sees symbols defined with 74 // this macro as being exported. 75 //---------------------------------------------------------------------- 76 //#ifdef DLL_EXPORTS 77 // #define DLL_API __declspec(dllexport) 78 //#else 79 // #define DLL_API __declspec(dllimport) 80 //#endif 81 #define DLL_API 82 //---------------------------------------------------------------------- 83 // DLL_API is ignored for all other systems 84 //---------------------------------------------------------------------- 85 #else 86 #define DLL_API 87 #endif 88 89 //---------------------------------------------------------------------- 90 // basic includes 91 //---------------------------------------------------------------------- 92 93 #include <cstdlib> // standard lib includes 94 #include <cmath> // math includes 95 #include <iostream> // I/O streams 96 #include <cstring> // C-style strings 97 98 //---------------------------------------------------------------------- 99 // Limits 100 // There are a number of places where we use the maximum double value as 101 // default initializers (and others may be used, depending on the 102 // data/distance representation). These can usually be found in limits.h 103 // (as LONG_MAX, INT_MAX) or in float.h (as DBL_MAX, FLT_MAX). 104 // 105 // Not all systems have these files. If you are using such a system, 106 // you should set the preprocessor symbol ANN_NO_LIMITS_H when 107 // compiling, and modify the statements below to generate the 108 // appropriate value. For practical purposes, this does not need to be 109 // the maximum double value. It is sufficient that it be at least as 110 // large than the maximum squared distance between between any two 111 // points. 112 //---------------------------------------------------------------------- 113 #ifdef ANN_NO_LIMITS_H // limits.h unavailable 114 #include <cvalues> // replacement for limits.h 115 const double ANN_DBL_MAX = MAXDOUBLE; // insert maximum double 116 #else 117 #include <climits> 118 #include <cfloat> 119 const double ANN_DBL_MAX = DBL_MAX; 120 #endif 121 122 #define ANNversion "1.1.2" // ANN version and information 123 #define ANNversionCmt "" 124 #define ANNcopyright "David M. Mount and Sunil Arya" 125 #define ANNlatestRev "Jan 27, 2010" 126 127 //---------------------------------------------------------------------- 128 // ANNbool 129 // This is a simple boolean type. Although ANSI C++ is supposed 130 // to support the type bool, some compilers do not have it. 131 //---------------------------------------------------------------------- 132 133 enum ANNbool {ANNfalse = 0, ANNtrue = 1}; // ANN boolean type (non ANSI C++) 134 135 //---------------------------------------------------------------------- 136 // ANNcoord, ANNdist 137 // ANNcoord and ANNdist are the types used for representing 138 // point coordinates and distances. They can be modified by the 139 // user, with some care. It is assumed that they are both numeric 140 // types, and that ANNdist is generally of an equal or higher type 141 // from ANNcoord. A variable of type ANNdist should be large 142 // enough to store the sum of squared components of a variable 143 // of type ANNcoord for the number of dimensions needed in the 144 // application. For example, the following combinations are 145 // legal: 146 // 147 // ANNcoord ANNdist 148 // --------- ------------------------------- 149 // short short, int, long, float, double 150 // int int, long, float, double 151 // long long, float, double 152 // float float, double 153 // double double 154 // 155 // It is the user's responsibility to make sure that overflow does 156 // not occur in distance calculation. 157 //---------------------------------------------------------------------- 158 159 typedef double ANNcoord; // coordinate data type 160 typedef double ANNdist; // distance data type 161 162 //---------------------------------------------------------------------- 163 // ANNidx 164 // ANNidx is a point index. When the data structure is built, the 165 // points are given as an array. Nearest neighbor results are 166 // returned as an integer index into this array. To make it 167 // clearer when this is happening, we define the integer type 168 // ANNidx. Indexing starts from 0. 169 // 170 // For fixed-radius near neighbor searching, it is possible that 171 // there are not k nearest neighbors within the search radius. To 172 // indicate this, the algorithm returns ANN_NULL_IDX as its result. 173 // It should be distinguishable from any valid array index. 174 //---------------------------------------------------------------------- 175 176 typedef int ANNidx; // point index 177 const ANNidx ANN_NULL_IDX = -1; // a NULL point index 178 179 //---------------------------------------------------------------------- 180 // Infinite distance: 181 // The code assumes that there is an "infinite distance" which it 182 // uses to initialize distances before performing nearest neighbor 183 // searches. It should be as larger or larger than any legitimate 184 // nearest neighbor distance. 185 // 186 // On most systems, these should be found in the standard include 187 // file <limits.h> or possibly <float.h>. If you do not have these 188 // file, some suggested values are listed below, assuming 64-bit 189 // long, 32-bit int and 16-bit short. 190 // 191 // ANNdist ANN_DIST_INF Values (see <limits.h> or <float.h>) 192 // ------- ------------ ------------------------------------ 193 // double DBL_MAX 1.79769313486231570e+308 194 // float FLT_MAX 3.40282346638528860e+38 195 // long LONG_MAX 0x7fffffffffffffff 196 // int INT_MAX 0x7fffffff 197 // short SHRT_MAX 0x7fff 198 //---------------------------------------------------------------------- 199 200 const ANNdist ANN_DIST_INF = ANN_DBL_MAX; 201 202 //---------------------------------------------------------------------- 203 // Significant digits for tree dumps: 204 // When floating point coordinates are used, the routine that dumps 205 // a tree needs to know roughly how many significant digits there 206 // are in a ANNcoord, so it can output points to full precision. 207 // This is defined to be ANNcoordPrec. On most systems these 208 // values can be found in the standard include files <limits.h> or 209 // <float.h>. For integer types, the value is essentially ignored. 210 // 211 // ANNcoord ANNcoordPrec Values (see <limits.h> or <float.h>) 212 // -------- ------------ ------------------------------------ 213 // double DBL_DIG 15 214 // float FLT_DIG 6 215 // long doesn't matter 19 216 // int doesn't matter 10 217 // short doesn't matter 5 218 //---------------------------------------------------------------------- 219 220 #ifdef DBL_DIG // number of sig. bits in ANNcoord 221 const int ANNcoordPrec = DBL_DIG; 222 #else 223 const int ANNcoordPrec = 15; // default precision 224 #endif 225 226 //---------------------------------------------------------------------- 227 // Self match? 228 // In some applications, the nearest neighbor of a point is not 229 // allowed to be the point itself. This occurs, for example, when 230 // computing all nearest neighbors in a set. By setting the 231 // parameter ANN_ALLOW_SELF_MATCH to ANNfalse, the nearest neighbor 232 // is the closest point whose distance from the query point is 233 // strictly positive. 234 //---------------------------------------------------------------------- 235 236 const ANNbool ANN_ALLOW_SELF_MATCH = ANNtrue; 237 238 //---------------------------------------------------------------------- 239 // Norms and metrics: 240 // ANN supports any Minkowski norm for defining distance. In 241 // particular, for any p >= 1, the L_p Minkowski norm defines the 242 // length of a d-vector (v0, v1, ..., v(d-1)) to be 243 // 244 // (|v0|^p + |v1|^p + ... + |v(d-1)|^p)^(1/p), 245 // 246 // (where ^ denotes exponentiation, and |.| denotes absolute 247 // value). The distance between two points is defined to be the 248 // norm of the vector joining them. Some common distance metrics 249 // include 250 // 251 // Euclidean metric p = 2 252 // Manhattan metric p = 1 253 // Max metric p = infinity 254 // 255 // In the case of the max metric, the norm is computed by taking 256 // the maxima of the absolute values of the components. ANN is 257 // highly "coordinate-based" and does not support general distances 258 // functions (e.g. those obeying just the triangle inequality). It 259 // also does not support distance functions based on 260 // inner-products. 261 // 262 // For the purpose of computing nearest neighbors, it is not 263 // necessary to compute the final power (1/p). Thus the only 264 // component that is used by the program is |v(i)|^p. 265 // 266 // ANN parameterizes the distance computation through the following 267 // macros. (Macros are used rather than procedures for 268 // efficiency.) Recall that the distance between two points is 269 // given by the length of the vector joining them, and the length 270 // or norm of a vector v is given by formula: 271 // 272 // |v| = ROOT(POW(v0) # POW(v1) # ... # POW(v(d-1))) 273 // 274 // where ROOT, POW are unary functions and # is an associative and 275 // commutative binary operator mapping the following types: 276 // 277 // ** POW: ANNcoord --> ANNdist 278 // ** #: ANNdist x ANNdist --> ANNdist 279 // ** ROOT: ANNdist (>0) --> double 280 // 281 // For early termination in distance calculation (partial distance 282 // calculation) we assume that POW and # together are monotonically 283 // increasing on sequences of arguments, meaning that for all 284 // v0..vk and y: 285 // 286 // POW(v0) #...# POW(vk) <= (POW(v0) #...# POW(vk)) # POW(y). 287 // 288 // Incremental Distance Calculation: 289 // The program uses an optimized method of computing distances for 290 // kd-trees and bd-trees, called incremental distance calculation. 291 // It is used when distances are to be updated when only a single 292 // coordinate of a point has been changed. In order to use this, 293 // we assume that there is an incremental update function DIFF(x,y) 294 // for #, such that if: 295 // 296 // s = x0 # ... # xi # ... # xk 297 // 298 // then if s' is equal to s but with xi replaced by y, that is, 299 // 300 // s' = x0 # ... # y # ... # xk 301 // 302 // then the length of s' can be computed by: 303 // 304 // |s'| = |s| # DIFF(xi,y). 305 // 306 // Thus, if # is + then DIFF(xi,y) is (yi-x). For the L_infinity 307 // norm we make use of the fact that in the program this function 308 // is only invoked when y > xi, and hence DIFF(xi,y)=y. 309 // 310 // Finally, for approximate nearest neighbor queries we assume 311 // that POW and ROOT are related such that 312 // 313 // v*ROOT(x) = ROOT(POW(v)*x) 314 // 315 // Here are the values for the various Minkowski norms: 316 // 317 // L_p: p even: p odd: 318 // ------------------------- ------------------------ 319 // POW(v) = v^p POW(v) = |v|^p 320 // ROOT(x) = x^(1/p) ROOT(x) = x^(1/p) 321 // # = + # = + 322 // DIFF(x,y) = y - x DIFF(x,y) = y - x 323 // 324 // L_inf: 325 // POW(v) = |v| 326 // ROOT(x) = x 327 // # = max 328 // DIFF(x,y) = y 329 // 330 // By default the Euclidean norm is assumed. To change the norm, 331 // uncomment the appropriate set of macros below. 332 //---------------------------------------------------------------------- 333 334 //---------------------------------------------------------------------- 335 // Use the following for the Euclidean norm 336 //---------------------------------------------------------------------- 337 #define ANN_POW(v) ((v)*(v)) 338 #define ANN_ROOT(x) sqrt(x) 339 #define ANN_SUM(x,y) ((x) + (y)) 340 #define ANN_DIFF(x,y) ((y) - (x)) 341 342 //---------------------------------------------------------------------- 343 // Use the following for the L_1 (Manhattan) norm 344 //---------------------------------------------------------------------- 345 // #define ANN_POW(v) fabs(v) 346 // #define ANN_ROOT(x) (x) 347 // #define ANN_SUM(x,y) ((x) + (y)) 348 // #define ANN_DIFF(x,y) ((y) - (x)) 349 350 //---------------------------------------------------------------------- 351 // Use the following for a general L_p norm 352 //---------------------------------------------------------------------- 353 // #define ANN_POW(v) pow(fabs(v),p) 354 // #define ANN_ROOT(x) pow(fabs(x),1/p) 355 // #define ANN_SUM(x,y) ((x) + (y)) 356 // #define ANN_DIFF(x,y) ((y) - (x)) 357 358 //---------------------------------------------------------------------- 359 // Use the following for the L_infinity (Max) norm 360 //---------------------------------------------------------------------- 361 // #define ANN_POW(v) fabs(v) 362 // #define ANN_ROOT(x) (x) 363 // #define ANN_SUM(x,y) ((x) > (y) ? (x) : (y)) 364 // #define ANN_DIFF(x,y) (y) 365 366 //---------------------------------------------------------------------- 367 // Array types 368 // The following array types are of basic interest. A point is 369 // just a dimensionless array of coordinates, a point array is a 370 // dimensionless array of points. A distance array is a 371 // dimensionless array of distances and an index array is a 372 // dimensionless array of point indices. The latter two are used 373 // when returning the results of k-nearest neighbor queries. 374 //---------------------------------------------------------------------- 375 376 typedef ANNcoord* ANNpoint; // a point 377 378 // [Bruno Levy] define an alternative version 379 // of ANNpointArray, that avoids needing to 380 // store pointers if data is contiguous in 381 // memory. 382 #define ANN_CONTIGUOUS_POINT_ARRAY 383 #ifdef ANN_CONTIGUOUS_POINT_ARRAY 384 class ANNpointArray { 385 public: ANNpointArray(const ANNcoord * data,size_t stride)386 ANNpointArray( 387 const ANNcoord* data, size_t stride 388 ) : data_(const_cast<ANNcoord*>(data)), stride_(stride) { 389 } ANNpointArray()390 ANNpointArray() : data_(NULL), stride_(0) { 391 } 392 393 // This constructor is needed for allowing NULL 394 // value (or default argument) for ANNpointArray. 395 #ifdef NDEBUG ANNpointArray(void *)396 ANNpointArray(void*) : data_(NULL), stride_(0) { 397 } 398 #else ANNpointArray(void * p)399 ANNpointArray(void* p) : data_(NULL), stride_(0) { 400 if(p != NULL) { abort() ; } 401 } 402 #endif 403 404 ANNpoint operator[](size_t i) { 405 return data_ + i*stride_ ; 406 } 407 private: 408 friend void annDeallocPts(ANNpointArray &pa) ; 409 ANNcoord* data_ ; 410 size_t stride_ ; 411 } ; 412 #else 413 typedef ANNpoint* ANNpointArray; // an array of points 414 #endif 415 416 typedef ANNdist* ANNdistArray; // an array of distances 417 typedef ANNidx* ANNidxArray; // an array of point indices 418 419 //---------------------------------------------------------------------- 420 // Basic point and array utilities: 421 // The following procedures are useful supplements to ANN's nearest 422 // neighbor capabilities. 423 // 424 // annDist(): 425 // Computes the (squared) distance between a pair of points. 426 // Note that this routine is not used internally by ANN for 427 // computing distance calculations. For reasons of efficiency 428 // this is done using incremental distance calculation. Thus, 429 // this routine cannot be modified as a method of changing the 430 // metric. 431 // 432 // Because points (somewhat like strings in C) are stored as 433 // pointers. Consequently, creating and destroying copies of 434 // points may require storage allocation. These procedures do 435 // this. 436 // 437 // annAllocPt() and annDeallocPt(): 438 // Allocate a deallocate storage for a single point, and 439 // return a pointer to it. The argument to AllocPt() is 440 // used to initialize all components. 441 // 442 // annAllocPts() and annDeallocPts(): 443 // Allocate and deallocate an array of points as well a 444 // place to store their coordinates, and initializes the 445 // points to point to their respective coordinates. It 446 // allocates point storage in a contiguous block large 447 // enough to store all the points. It performs no 448 // initialization. 449 // 450 // annCopyPt(): 451 // Creates a copy of a given point, allocating space for 452 // the new point. It returns a pointer to the newly 453 // allocated copy. 454 //---------------------------------------------------------------------- 455 456 DLL_API ANNdist annDist( 457 int dim, // dimension of space 458 ANNpoint p, // points 459 ANNpoint q); 460 461 DLL_API ANNpoint annAllocPt( 462 int dim, // dimension 463 ANNcoord c = 0); // coordinate value (all equal) 464 465 DLL_API ANNpointArray annAllocPts( 466 int n, // number of points 467 int dim); // dimension 468 469 DLL_API void annDeallocPt( 470 ANNpoint &p); // deallocate 1 point 471 472 DLL_API void annDeallocPts( 473 ANNpointArray &pa); // point array 474 475 DLL_API ANNpoint annCopyPt( 476 int dim, // dimension 477 ANNpoint source); // point to copy 478 479 //---------------------------------------------------------------------- 480 //Overall structure: ANN supports a number of different data structures 481 //for approximate and exact nearest neighbor searching. These are: 482 // 483 // ANNbruteForce A simple brute-force search structure. 484 // ANNkd_tree A kd-tree tree search structure. ANNbd_tree 485 // A bd-tree tree search structure (a kd-tree with shrink 486 // capabilities). 487 // 488 // At a minimum, each of these data structures support k-nearest 489 // neighbor queries. The nearest neighbor query, annkSearch, 490 // returns an integer identifier and the distance to the nearest 491 // neighbor(s) and annRangeSearch returns the nearest points that 492 // lie within a given query ball. 493 // 494 // Each structure is built by invoking the appropriate constructor 495 // and passing it (at a minimum) the array of points, the total 496 // number of points and the dimension of the space. Each structure 497 // is also assumed to support a destructor and member functions 498 // that return basic information about the point set. 499 // 500 // Note that the array of points is not copied by the data 501 // structure (for reasons of space efficiency), and it is assumed 502 // to be constant throughout the lifetime of the search structure. 503 // 504 // The search algorithm, annkSearch, is given the query point (q), 505 // and the desired number of nearest neighbors to report (k), and 506 // the error bound (eps) (whose default value is 0, implying exact 507 // nearest neighbors). It returns two arrays which are assumed to 508 // contain at least k elements: one (nn_idx) contains the indices 509 // (within the point array) of the nearest neighbors and the other 510 // (dd) contains the squared distances to these nearest neighbors. 511 // 512 // The search algorithm, annkFRSearch, is a fixed-radius kNN 513 // search. In addition to a query point, it is given a (squared) 514 // radius bound. (This is done for consistency, because the search 515 // returns distances as squared quantities.) It does two things. 516 // First, it computes the k nearest neighbors within the radius 517 // bound, and second, it returns the total number of points lying 518 // within the radius bound. It is permitted to set k = 0, in which 519 // case it effectively answers a range counting query. If the 520 // error bound epsilon is positive, then the search is approximate 521 // in the sense that it is free to ignore any point that lies 522 // outside a ball of radius r/(1+epsilon), where r is the given 523 // (unsquared) radius bound. 524 // 525 // The generic object from which all the search structures are 526 // dervied is given below. It is a virtual object, and is useless 527 // by itself. 528 //---------------------------------------------------------------------- 529 530 class DLL_API ANNpointSet { 531 public: ~ANNpointSet()532 virtual ~ANNpointSet() {} // virtual distructor 533 534 virtual void annkSearch( // approx k near neighbor search 535 ANNpoint q, // query point 536 int k, // number of near neighbors to return 537 ANNidxArray nn_idx, // nearest neighbor array (modified) 538 ANNdistArray dd, // dist to near neighbors (modified) 539 double eps=0.0 // error bound 540 ) = 0; // pure virtual (defined elsewhere) 541 542 virtual int annkFRSearch( // approx fixed-radius kNN search 543 ANNpoint q, // query point 544 ANNdist sqRad, // squared radius 545 int k = 0, // number of near neighbors to return 546 ANNidxArray nn_idx = NULL, // nearest neighbor array (modified) 547 ANNdistArray dd = NULL, // dist to near neighbors (modified) 548 double eps=0.0 // error bound 549 ) = 0; // pure virtual (defined elsewhere) 550 551 virtual int theDim() = 0; // return dimension of space 552 virtual int nPoints() = 0; // return number of points 553 // return pointer to points 554 virtual ANNpointArray thePoints() = 0; 555 }; 556 557 //---------------------------------------------------------------------- 558 // Brute-force nearest neighbor search: 559 // The brute-force search structure is very simple but inefficient. 560 // It has been provided primarily for the sake of comparison with 561 // and validation of the more complex search structures. 562 // 563 // Query processing is the same as described above, but the value 564 // of epsilon is ignored, since all distance calculations are 565 // performed exactly. 566 // 567 // WARNING: This data structure is very slow, and should not be 568 // used unless the number of points is very small. 569 // 570 // Internal information: 571 // --------------------- 572 // This data structure bascially consists of the array of points 573 // (each a pointer to an array of coordinates). The search is 574 // performed by a simple linear scan of all the points. 575 //---------------------------------------------------------------------- 576 577 class DLL_API ANNbruteForce: public ANNpointSet { 578 int dim; // dimension 579 int n_pts; // number of points 580 ANNpointArray pts; // point array 581 public: 582 ANNbruteForce( // constructor from point array 583 ANNpointArray pa, // point array 584 int n, // number of points 585 int dd); // dimension 586 587 ~ANNbruteForce(); // destructor 588 589 void annkSearch( // approx k near neighbor search 590 ANNpoint q, // query point 591 int k, // number of near neighbors to return 592 ANNidxArray nn_idx, // nearest neighbor array (modified) 593 ANNdistArray dd, // dist to near neighbors (modified) 594 double eps=0.0); // error bound 595 596 int annkFRSearch( // approx fixed-radius kNN search 597 ANNpoint q, // query point 598 ANNdist sqRad, // squared radius 599 int k = 0, // number of near neighbors to return 600 ANNidxArray nn_idx = NULL, // nearest neighbor array (modified) 601 ANNdistArray dd = NULL, // dist to near neighbors (modified) 602 double eps=0.0); // error bound 603 theDim()604 int theDim() // return dimension of space 605 { return dim; } 606 nPoints()607 int nPoints() // return number of points 608 { return n_pts; } 609 thePoints()610 ANNpointArray thePoints() // return pointer to points 611 { return pts; } 612 }; 613 614 //---------------------------------------------------------------------- 615 // kd- and bd-tree splitting and shrinking rules 616 // kd-trees supports a collection of different splitting rules. 617 // In addition to the standard kd-tree splitting rule proposed 618 // by Friedman, Bentley, and Finkel, we have introduced a 619 // number of other splitting rules, which seem to perform 620 // as well or better (for the distributions we have tested). 621 // 622 // The splitting methods given below allow the user to tailor 623 // the data structure to the particular data set. They are 624 // are described in greater details in the kd_split.cc source 625 // file. The method ANN_KD_SUGGEST is the method chosen (rather 626 // subjectively) by the implementors as the one giving the 627 // fastest performance, and is the default splitting method. 628 // 629 // As with splitting rules, there are a number of different 630 // shrinking rules. The shrinking rule ANN_BD_NONE does no 631 // shrinking (and hence produces a kd-tree tree). The rule 632 // ANN_BD_SUGGEST uses the implementors favorite rule. 633 //---------------------------------------------------------------------- 634 635 enum ANNsplitRule { 636 ANN_KD_STD = 0, // the optimized kd-splitting rule 637 ANN_KD_MIDPT = 1, // midpoint split 638 ANN_KD_FAIR = 2, // fair split 639 ANN_KD_SL_MIDPT = 3, // sliding midpoint splitting method 640 ANN_KD_SL_FAIR = 4, // sliding fair split method 641 ANN_KD_SUGGEST = 5}; // the authors' suggestion for best 642 const int ANN_N_SPLIT_RULES = 6; // number of split rules 643 644 enum ANNshrinkRule { 645 ANN_BD_NONE = 0, // no shrinking at all (just kd-tree) 646 ANN_BD_SIMPLE = 1, // simple splitting 647 ANN_BD_CENTROID = 2, // centroid splitting 648 ANN_BD_SUGGEST = 3}; // the authors' suggested choice 649 const int ANN_N_SHRINK_RULES = 4; // number of shrink rules 650 651 //---------------------------------------------------------------------- 652 // kd-tree: 653 // The main search data structure supported by ANN is a kd-tree. 654 // The main constructor is given a set of points and a choice of 655 // splitting method to use in building the tree. 656 // 657 // Construction: 658 // ------------- 659 // The constructor is given the point array, number of points, 660 // dimension, bucket size (default = 1), and the splitting rule 661 // (default = ANN_KD_SUGGEST). The point array is not copied, and 662 // is assumed to be kept constant throughout the lifetime of the 663 // search structure. There is also a "load" constructor that 664 // builds a tree from a file description that was created by the 665 // Dump operation. 666 // 667 // Search: 668 // ------- 669 // There are two search methods: 670 // 671 // Standard search (annkSearch()): 672 // Searches nodes in tree-traversal order, always visiting 673 // the closer child first. 674 // Priority search (annkPriSearch()): 675 // Searches nodes in order of increasing distance of the 676 // associated cell from the query point. For many 677 // distributions the standard search seems to work just 678 // fine, but priority search is safer for worst-case 679 // performance. 680 // 681 // Printing: 682 // --------- 683 // There are two methods provided for printing the tree. Print() 684 // is used to produce a "human-readable" display of the tree, with 685 // indenation, which is handy for debugging. Dump() produces a 686 // format that is suitable reading by another program. There is a 687 // "load" constructor, which constructs a tree which is assumed to 688 // have been saved by the Dump() procedure. 689 // 690 // Performance and Structure Statistics: 691 // ------------------------------------- 692 // The procedure getStats() collects statistics information on the 693 // tree (its size, height, etc.) See ANNperf.h for information on 694 // the stats structure it returns. 695 // 696 // Internal information: 697 // --------------------- 698 // The data structure consists of three major chunks of storage. 699 // The first (implicit) storage are the points themselves (pts), 700 // which have been provided by the users as an argument to the 701 // constructor, or are allocated dynamically if the tree is built 702 // using the load constructor). These should not be changed during 703 // the lifetime of the search structure. It is the user's 704 // responsibility to delete these after the tree is destroyed. 705 // 706 // The second is the tree itself (which is dynamically allocated in 707 // the constructor) and is given as a pointer to its root node 708 // (root). These nodes are automatically deallocated when the tree 709 // is deleted. See the file src/kd_tree.h for further information 710 // on the structure of the tree nodes. 711 // 712 // Each leaf of the tree does not contain a pointer directly to a 713 // point, but rather contains a pointer to a "bucket", which is an 714 // array consisting of point indices. The third major chunk of 715 // storage is an array (pidx), which is a large array in which all 716 // these bucket subarrays reside. (The reason for storing them 717 // separately is the buckets are typically small, but of varying 718 // sizes. This was done to avoid fragmentation.) This array is 719 // also deallocated when the tree is deleted. 720 // 721 // In addition to this, the tree consists of a number of other 722 // pieces of information which are used in searching and for 723 // subsequent tree operations. These consist of the following: 724 // 725 // dim Dimension of space 726 // n_pts Number of points currently in the tree 727 // n_max Maximum number of points that are allowed 728 // in the tree 729 // bkt_size Maximum bucket size (no. of points per leaf) 730 // bnd_box_lo Bounding box low point 731 // bnd_box_hi Bounding box high point 732 // splitRule Splitting method used 733 // 734 //---------------------------------------------------------------------- 735 736 //---------------------------------------------------------------------- 737 // Some types and objects used by kd-tree functions 738 // See src/kd_tree.h and src/kd_tree.cpp for definitions 739 //---------------------------------------------------------------------- 740 class ANNkdStats; // stats on kd-tree 741 class ANNkd_node; // generic node in a kd-tree 742 typedef ANNkd_node* ANNkd_ptr; // pointer to a kd-tree node 743 744 class DLL_API ANNkd_tree: public ANNpointSet { 745 protected: 746 int dim; // dimension of space 747 int n_pts; // number of points in tree 748 int bkt_size; // bucket size 749 ANNpointArray pts; // the points 750 ANNidxArray pidx; // point indices (to pts array) 751 ANNkd_ptr root; // root of kd-tree 752 ANNpoint bnd_box_lo; // bounding box low point 753 ANNpoint bnd_box_hi; // bounding box high point 754 755 void SkeletonTree( // construct skeleton tree 756 int n, // number of points 757 int dd, // dimension 758 int bs, // bucket size 759 ANNpointArray pa = NULL, // point array (optional) 760 ANNidxArray pi = NULL); // point indices (optional) 761 762 public: 763 ANNkd_tree( // build skeleton tree 764 int n = 0, // number of points 765 int dd = 0, // dimension 766 int bs = 1); // bucket size 767 768 ANNkd_tree( // build from point array 769 ANNpointArray pa, // point array 770 int n, // number of points 771 int dd, // dimension 772 int bs = 1, // bucket size 773 ANNsplitRule split = ANN_KD_SUGGEST); // splitting method 774 775 ANNkd_tree( // build from dump file 776 std::istream& in); // input stream for dump file 777 778 ~ANNkd_tree(); // tree destructor 779 780 void annkSearch( // approx k near neighbor search 781 ANNpoint q, // query point 782 int k, // number of near neighbors to return 783 ANNidxArray nn_idx, // nearest neighbor array (modified) 784 ANNdistArray dd, // dist to near neighbors (modified) 785 double eps=0.0); // error bound 786 787 void annkPriSearch( // priority k near neighbor search 788 ANNpoint q, // query point 789 int k, // number of near neighbors to return 790 ANNidxArray nn_idx, // nearest neighbor array (modified) 791 ANNdistArray dd, // dist to near neighbors (modified) 792 double eps=0.0); // error bound 793 794 int annkFRSearch( // approx fixed-radius kNN search 795 ANNpoint q, // the query point 796 ANNdist sqRad, // squared radius of query ball 797 int k, // number of neighbors to return 798 ANNidxArray nn_idx = NULL, // nearest neighbor array (modified) 799 ANNdistArray dd = NULL, // dist to near neighbors (modified) 800 double eps=0.0); // error bound 801 theDim()802 int theDim() // return dimension of space 803 { return dim; } 804 nPoints()805 int nPoints() // return number of points 806 { return n_pts; } 807 thePoints()808 ANNpointArray thePoints() // return pointer to points 809 { return pts; } 810 811 virtual void Print( // print the tree (for debugging) 812 ANNbool with_pts, // print points as well? 813 std::ostream& out); // output stream 814 815 virtual void Dump( // dump entire tree 816 ANNbool with_pts, // print points as well? 817 std::ostream& out); // output stream 818 819 virtual void getStats( // compute tree statistics 820 ANNkdStats& st); // the statistics (modified) 821 }; 822 823 //---------------------------------------------------------------------- 824 // Box decomposition tree (bd-tree) 825 // The bd-tree is inherited from a kd-tree. The main difference 826 // in the bd-tree and the kd-tree is a new type of internal node 827 // called a shrinking node (in the kd-tree there is only one type 828 // of internal node, a splitting node). The shrinking node 829 // makes it possible to generate balanced trees in which the 830 // cells have bounded aspect ratio, by allowing the decomposition 831 // to zoom in on regions of dense point concentration. Although 832 // this is a nice idea in theory, few point distributions are so 833 // densely clustered that this is really needed. 834 //---------------------------------------------------------------------- 835 836 class DLL_API ANNbd_tree: public ANNkd_tree { 837 public: 838 ANNbd_tree( // build skeleton tree 839 int n, // number of points 840 int dd, // dimension 841 int bs = 1) // bucket size ANNkd_tree(n,dd,bs)842 : ANNkd_tree(n, dd, bs) {} // build base kd-tree 843 844 ANNbd_tree( // build from point array 845 ANNpointArray pa, // point array 846 int n, // number of points 847 int dd, // dimension 848 int bs = 1, // bucket size 849 ANNsplitRule split = ANN_KD_SUGGEST, // splitting rule 850 ANNshrinkRule shrink = ANN_BD_SUGGEST); // shrinking rule 851 852 ANNbd_tree( // build from dump file 853 std::istream& in); // input stream for dump file 854 }; 855 856 //---------------------------------------------------------------------- 857 // Other functions 858 // annMaxPtsVisit Sets a limit on the maximum number of points 859 // to visit in the search. 860 // annClose Can be called when all use of ANN is finished. 861 // It clears up a minor memory leak. 862 //---------------------------------------------------------------------- 863 864 DLL_API void annMaxPtsVisit( // max. pts to visit in search 865 int maxPts); // the limit 866 867 DLL_API void annClose(); // called to end use of ANN 868 869 #endif 870