1 //----------------------------------------------------------------------
2 // File:			ANN.h
3 // Programmer:		Sunil Arya and David Mount
4 // Description:		Basic include file for approximate nearest
5 //					neighbor searching.
6 // Last modified:	01/27/10 (Version 1.1.2)
7 //----------------------------------------------------------------------
8 // Copyright (c) 1997-2010 University of Maryland and Sunil Arya and
9 // David Mount.  All Rights Reserved.
10 //
11 // This software and related documentation is part of the Approximate
12 // Nearest Neighbor Library (ANN).  This software is provided under
13 // the provisions of the Lesser GNU Public License (LGPL).  See the
14 // file ../ReadMe.txt for further information.
15 //
16 // The University of Maryland (U.M.) and the authors make no
17 // representations about the suitability or fitness of this software for
18 // any purpose.  It is provided "as is" without express or implied
19 // warranty.
20 //----------------------------------------------------------------------
21 // History:
22 //	Revision 0.1  03/04/98
23 //		Initial release
24 //	Revision 1.0  04/01/05
25 //		Added copyright and revision information
26 //		Added ANNcoordPrec for coordinate precision.
27 //		Added methods theDim, nPoints, maxPoints, thePoints to ANNpointSet.
28 //		Cleaned up C++ structure for modern compilers
29 //	Revision 1.1  05/03/05
30 //		Added fixed-radius k-NN searching
31 //	Revision 1.1.2  01/27/10
32 //		Fixed minor compilation bugs for new versions of gcc
33 //----------------------------------------------------------------------
34 
35 //----------------------------------------------------------------------
36 // ANN - approximate nearest neighbor searching
37 //	ANN is a library for approximate nearest neighbor searching,
38 //	based on the use of standard and priority search in kd-trees
39 //	and balanced box-decomposition (bbd) trees. Here are some
40 //	references to the main algorithmic techniques used here:
41 //
42 //		kd-trees:
43 //			Friedman, Bentley, and Finkel, ``An algorithm for finding
44 //				best matches in logarithmic expected time,'' ACM
45 //				Transactions on Mathematical Software, 3(3):209-226, 1977.
46 //
47 //		Priority search in kd-trees:
48 //			Arya and Mount, ``Algorithms for fast vector quantization,''
49 //				Proc. of DCC '93: Data Compression Conference, eds. J. A.
50 //				Storer and M. Cohn, IEEE Press, 1993, 381-390.
51 //
52 //		Approximate nearest neighbor search and bbd-trees:
53 //			Arya, Mount, Netanyahu, Silverman, and Wu, ``An optimal
54 //				algorithm for approximate nearest neighbor searching,''
55 //				5th Ann. ACM-SIAM Symposium on Discrete Algorithms,
56 //				1994, 573-582.
57 //----------------------------------------------------------------------
58 
59 #ifndef ANN_H
60 #define ANN_H
61 
62 #ifdef WIN32
63   //----------------------------------------------------------------------
64   // For Microsoft Visual C++, externally accessible symbols must be
65   // explicitly indicated with DLL_API, which is somewhat like "extern."
66   //
67   // The following ifdef block is the standard way of creating macros
68   // which make exporting from a DLL simpler. All files within this DLL
69   // are compiled with the DLL_EXPORTS preprocessor symbol defined on the
70   // command line. In contrast, projects that use (or import) the DLL
71   // objects do not define the DLL_EXPORTS symbol. This way any other
72   // project whose source files include this file see DLL_API functions as
73   // being imported from a DLL, wheras this DLL sees symbols defined with
74   // this macro as being exported.
75   //----------------------------------------------------------------------
76   //#ifdef DLL_EXPORTS
77   //	 #define DLL_API __declspec(dllexport)
78   //#else
79   //	#define DLL_API __declspec(dllimport)
80   //#endif
81   #define DLL_API
82   //----------------------------------------------------------------------
83   // DLL_API is ignored for all other systems
84   //----------------------------------------------------------------------
85 #else
86   #define DLL_API
87 #endif
88 
89 //----------------------------------------------------------------------
90 //  basic includes
91 //----------------------------------------------------------------------
92 
93 #include <cstdlib>			// standard lib includes
94 #include <cmath>			// math includes
95 #include <iostream>			// I/O streams
96 #include <cstring>			// C-style strings
97 
98 //----------------------------------------------------------------------
99 // Limits
100 // There are a number of places where we use the maximum double value as
101 // default initializers (and others may be used, depending on the
102 // data/distance representation). These can usually be found in limits.h
103 // (as LONG_MAX, INT_MAX) or in float.h (as DBL_MAX, FLT_MAX).
104 //
105 // Not all systems have these files.  If you are using such a system,
106 // you should set the preprocessor symbol ANN_NO_LIMITS_H when
107 // compiling, and modify the statements below to generate the
108 // appropriate value. For practical purposes, this does not need to be
109 // the maximum double value. It is sufficient that it be at least as
110 // large than the maximum squared distance between between any two
111 // points.
112 //----------------------------------------------------------------------
113 #ifdef ANN_NO_LIMITS_H					// limits.h unavailable
114   #include <cvalues>					// replacement for limits.h
115   const double ANN_DBL_MAX = MAXDOUBLE;	// insert maximum double
116 #else
117   #include <climits>
118   #include <cfloat>
119   const double ANN_DBL_MAX = DBL_MAX;
120 #endif
121 
122 #define ANNversion 		"1.1.2"			// ANN version and information
123 #define ANNversionCmt	""
124 #define ANNcopyright	"David M. Mount and Sunil Arya"
125 #define ANNlatestRev	"Jan 27, 2010"
126 
127 //----------------------------------------------------------------------
128 //	ANNbool
129 //	This is a simple boolean type. Although ANSI C++ is supposed
130 //	to support the type bool, some compilers do not have it.
131 //----------------------------------------------------------------------
132 
133 enum ANNbool {ANNfalse = 0, ANNtrue = 1}; // ANN boolean type (non ANSI C++)
134 
135 //----------------------------------------------------------------------
136 //	ANNcoord, ANNdist
137 //		ANNcoord and ANNdist are the types used for representing
138 //		point coordinates and distances.  They can be modified by the
139 //		user, with some care.  It is assumed that they are both numeric
140 //		types, and that ANNdist is generally of an equal or higher type
141 //		from ANNcoord.	A variable of type ANNdist should be large
142 //		enough to store the sum of squared components of a variable
143 //		of type ANNcoord for the number of dimensions needed in the
144 //		application.  For example, the following combinations are
145 //		legal:
146 //
147 //		ANNcoord		ANNdist
148 //		---------		-------------------------------
149 //		short			short, int, long, float, double
150 //		int				int, long, float, double
151 //		long			long, float, double
152 //		float			float, double
153 //		double			double
154 //
155 //		It is the user's responsibility to make sure that overflow does
156 //		not occur in distance calculation.
157 //----------------------------------------------------------------------
158 
159 typedef double	ANNcoord;				// coordinate data type
160 typedef double	ANNdist;				// distance data type
161 
162 //----------------------------------------------------------------------
163 //	ANNidx
164 //		ANNidx is a point index.  When the data structure is built, the
165 //		points are given as an array.  Nearest neighbor results are
166 //		returned as an integer index into this array.  To make it
167 //		clearer when this is happening, we define the integer type
168 //		ANNidx.	 Indexing starts from 0.
169 //
170 //		For fixed-radius near neighbor searching, it is possible that
171 //		there are not k nearest neighbors within the search radius.  To
172 //		indicate this, the algorithm returns ANN_NULL_IDX as its result.
173 //		It should be distinguishable from any valid array index.
174 //----------------------------------------------------------------------
175 
176 typedef int		ANNidx;					// point index
177 const ANNidx	ANN_NULL_IDX = -1;		// a NULL point index
178 
179 //----------------------------------------------------------------------
180 //	Infinite distance:
181 //		The code assumes that there is an "infinite distance" which it
182 //		uses to initialize distances before performing nearest neighbor
183 //		searches.  It should be as larger or larger than any legitimate
184 //		nearest neighbor distance.
185 //
186 //		On most systems, these should be found in the standard include
187 //		file <limits.h> or possibly <float.h>.  If you do not have these
188 //		file, some suggested values are listed below, assuming 64-bit
189 //		long, 32-bit int and 16-bit short.
190 //
191 //		ANNdist ANN_DIST_INF	Values (see <limits.h> or <float.h>)
192 //		------- ------------	------------------------------------
193 //		double	DBL_MAX			1.79769313486231570e+308
194 //		float	FLT_MAX			3.40282346638528860e+38
195 //		long	LONG_MAX		0x7fffffffffffffff
196 //		int		INT_MAX			0x7fffffff
197 //		short	SHRT_MAX		0x7fff
198 //----------------------------------------------------------------------
199 
200 const ANNdist	ANN_DIST_INF = ANN_DBL_MAX;
201 
202 //----------------------------------------------------------------------
203 //	Significant digits for tree dumps:
204 //		When floating point coordinates are used, the routine that dumps
205 //		a tree needs to know roughly how many significant digits there
206 //		are in a ANNcoord, so it can output points to full precision.
207 //		This is defined to be ANNcoordPrec.  On most systems these
208 //		values can be found in the standard include files <limits.h> or
209 //		<float.h>.  For integer types, the value is essentially ignored.
210 //
211 //		ANNcoord ANNcoordPrec	Values (see <limits.h> or <float.h>)
212 //		-------- ------------	------------------------------------
213 //		double	 DBL_DIG		15
214 //		float	 FLT_DIG		6
215 //		long	 doesn't matter 19
216 //		int		 doesn't matter 10
217 //		short	 doesn't matter 5
218 //----------------------------------------------------------------------
219 
220 #ifdef DBL_DIG							// number of sig. bits in ANNcoord
221 	const int	 ANNcoordPrec	= DBL_DIG;
222 #else
223 	const int	 ANNcoordPrec	= 15;	// default precision
224 #endif
225 
226 //----------------------------------------------------------------------
227 // Self match?
228 //	In some applications, the nearest neighbor of a point is not
229 //	allowed to be the point itself. This occurs, for example, when
230 //	computing all nearest neighbors in a set.  By setting the
231 //	parameter ANN_ALLOW_SELF_MATCH to ANNfalse, the nearest neighbor
232 //	is the closest point whose distance from the query point is
233 //	strictly positive.
234 //----------------------------------------------------------------------
235 
236 const ANNbool	ANN_ALLOW_SELF_MATCH	= ANNtrue;
237 
238 //----------------------------------------------------------------------
239 //	Norms and metrics:
240 //		ANN supports any Minkowski norm for defining distance.  In
241 //		particular, for any p >= 1, the L_p Minkowski norm defines the
242 //		length of a d-vector (v0, v1, ..., v(d-1)) to be
243 //
244 //				(|v0|^p + |v1|^p + ... + |v(d-1)|^p)^(1/p),
245 //
246 //		(where ^ denotes exponentiation, and |.| denotes absolute
247 //		value).  The distance between two points is defined to be the
248 //		norm of the vector joining them.  Some common distance metrics
249 //		include
250 //
251 //				Euclidean metric		p = 2
252 //				Manhattan metric		p = 1
253 //				Max metric				p = infinity
254 //
255 //		In the case of the max metric, the norm is computed by taking
256 //		the maxima of the absolute values of the components.  ANN is
257 //		highly "coordinate-based" and does not support general distances
258 //		functions (e.g. those obeying just the triangle inequality).  It
259 //		also does not support distance functions based on
260 //		inner-products.
261 //
262 //		For the purpose of computing nearest neighbors, it is not
263 //		necessary to compute the final power (1/p).  Thus the only
264 //		component that is used by the program is |v(i)|^p.
265 //
266 //		ANN parameterizes the distance computation through the following
267 //		macros.  (Macros are used rather than procedures for
268 //		efficiency.) Recall that the distance between two points is
269 //		given by the length of the vector joining them, and the length
270 //		or norm of a vector v is given by formula:
271 //
272 //				|v| = ROOT(POW(v0) # POW(v1) # ... # POW(v(d-1)))
273 //
274 //		where ROOT, POW are unary functions and # is an associative and
275 //		commutative binary operator mapping the following types:
276 //
277 //			**	POW:	ANNcoord				--> ANNdist
278 //			**	#:		ANNdist x ANNdist		--> ANNdist
279 //			**	ROOT:	ANNdist (>0)			--> double
280 //
281 //		For early termination in distance calculation (partial distance
282 //		calculation) we assume that POW and # together are monotonically
283 //		increasing on sequences of arguments, meaning that for all
284 //		v0..vk and y:
285 //
286 //		POW(v0) #...# POW(vk) <= (POW(v0) #...# POW(vk)) # POW(y).
287 //
288 //	Incremental Distance Calculation:
289 //		The program uses an optimized method of computing distances for
290 //		kd-trees and bd-trees, called incremental distance calculation.
291 //		It is used when distances are to be updated when only a single
292 //		coordinate of a point has been changed.  In order to use this,
293 //		we assume that there is an incremental update function DIFF(x,y)
294 //		for #, such that if:
295 //
296 //					s = x0 # ... # xi # ... # xk
297 //
298 //		then if s' is equal to s but with xi replaced by y, that is,
299 //
300 //					s' = x0 # ... # y # ... # xk
301 //
302 //		then the length of s' can be computed by:
303 //
304 //					|s'| = |s| # DIFF(xi,y).
305 //
306 //		Thus, if # is + then DIFF(xi,y) is (yi-x).  For the L_infinity
307 //		norm we make use of the fact that in the program this function
308 //		is only invoked when y > xi, and hence DIFF(xi,y)=y.
309 //
310 //		Finally, for approximate nearest neighbor queries we assume
311 //		that POW and ROOT are related such that
312 //
313 //					v*ROOT(x) = ROOT(POW(v)*x)
314 //
315 //		Here are the values for the various Minkowski norms:
316 //
317 //		L_p:	p even:							p odd:
318 //				-------------------------		------------------------
319 //				POW(v)			= v^p			POW(v)			= |v|^p
320 //				ROOT(x)			= x^(1/p)		ROOT(x)			= x^(1/p)
321 //				#				= +				#				= +
322 //				DIFF(x,y)		= y - x			DIFF(x,y)		= y - x
323 //
324 //		L_inf:
325 //				POW(v)			= |v|
326 //				ROOT(x)			= x
327 //				#				= max
328 //				DIFF(x,y)		= y
329 //
330 //		By default the Euclidean norm is assumed.  To change the norm,
331 //		uncomment the appropriate set of macros below.
332 //----------------------------------------------------------------------
333 
334 //----------------------------------------------------------------------
335 //	Use the following for the Euclidean norm
336 //----------------------------------------------------------------------
337 #define ANN_POW(v)			((v)*(v))
338 #define ANN_ROOT(x)			sqrt(x)
339 #define ANN_SUM(x,y)		((x) + (y))
340 #define ANN_DIFF(x,y)		((y) - (x))
341 
342 //----------------------------------------------------------------------
343 //	Use the following for the L_1 (Manhattan) norm
344 //----------------------------------------------------------------------
345 // #define ANN_POW(v)		fabs(v)
346 // #define ANN_ROOT(x)		(x)
347 // #define ANN_SUM(x,y)		((x) + (y))
348 // #define ANN_DIFF(x,y)	((y) - (x))
349 
350 //----------------------------------------------------------------------
351 //	Use the following for a general L_p norm
352 //----------------------------------------------------------------------
353 // #define ANN_POW(v)		pow(fabs(v),p)
354 // #define ANN_ROOT(x)		pow(fabs(x),1/p)
355 // #define ANN_SUM(x,y)		((x) + (y))
356 // #define ANN_DIFF(x,y)	((y) - (x))
357 
358 //----------------------------------------------------------------------
359 //	Use the following for the L_infinity (Max) norm
360 //----------------------------------------------------------------------
361 // #define ANN_POW(v)		fabs(v)
362 // #define ANN_ROOT(x)		(x)
363 // #define ANN_SUM(x,y)		((x) > (y) ? (x) : (y))
364 // #define ANN_DIFF(x,y)	(y)
365 
366 //----------------------------------------------------------------------
367 //	Array types
368 //		The following array types are of basic interest.  A point is
369 //		just a dimensionless array of coordinates, a point array is a
370 //		dimensionless array of points.  A distance array is a
371 //		dimensionless array of distances and an index array is a
372 //		dimensionless array of point indices.  The latter two are used
373 //		when returning the results of k-nearest neighbor queries.
374 //----------------------------------------------------------------------
375 
376 typedef ANNcoord* ANNpoint;			// a point
377 
378 // [Bruno Levy] define an alternative version
379 // of ANNpointArray, that avoids needing to
380 // store pointers if data is contiguous in
381 // memory.
382 #define ANN_CONTIGUOUS_POINT_ARRAY
383 #ifdef ANN_CONTIGUOUS_POINT_ARRAY
384 class ANNpointArray {
385  public:
ANNpointArray(const ANNcoord * data,size_t stride)386    ANNpointArray(
387        const ANNcoord* data, size_t stride
388    ) : data_(const_cast<ANNcoord*>(data)), stride_(stride) {
389    }
ANNpointArray()390    ANNpointArray() : data_(NULL), stride_(0) {
391    }
392 
393    // This constructor is needed for allowing NULL
394    // value (or default argument) for ANNpointArray.
395 #ifdef NDEBUG
ANNpointArray(void *)396    ANNpointArray(void*) : data_(NULL), stride_(0) {
397    }
398 #else
ANNpointArray(void * p)399    ANNpointArray(void* p) : data_(NULL), stride_(0) {
400 	if(p != NULL) { abort() ; }
401    }
402 #endif
403 
404    ANNpoint operator[](size_t i) {
405        return data_ + i*stride_ ;
406    }
407  private:
408    friend void annDeallocPts(ANNpointArray &pa) ;
409    ANNcoord* data_ ;
410    size_t stride_ ;
411 } ;
412 #else
413 typedef ANNpoint* ANNpointArray;	// an array of points
414 #endif
415 
416 typedef ANNdist*  ANNdistArray;		// an array of distances
417 typedef ANNidx*   ANNidxArray;		// an array of point indices
418 
419 //----------------------------------------------------------------------
420 //	Basic point and array utilities:
421 //		The following procedures are useful supplements to ANN's nearest
422 //		neighbor capabilities.
423 //
424 //		annDist():
425 //			Computes the (squared) distance between a pair of points.
426 //			Note that this routine is not used internally by ANN for
427 //			computing distance calculations.  For reasons of efficiency
428 //			this is done using incremental distance calculation.  Thus,
429 //			this routine cannot be modified as a method of changing the
430 //			metric.
431 //
432 //		Because points (somewhat like strings in C) are stored as
433 //		pointers.  Consequently, creating and destroying copies of
434 //		points may require storage allocation.  These procedures do
435 //		this.
436 //
437 //		annAllocPt() and annDeallocPt():
438 //				Allocate a deallocate storage for a single point, and
439 //				return a pointer to it.  The argument to AllocPt() is
440 //				used to initialize all components.
441 //
442 //		annAllocPts() and annDeallocPts():
443 //				Allocate and deallocate an array of points as well a
444 //				place to store their coordinates, and initializes the
445 //				points to point to their respective coordinates.  It
446 //				allocates point storage in a contiguous block large
447 //				enough to store all the points.  It performs no
448 //				initialization.
449 //
450 //		annCopyPt():
451 //				Creates a copy of a given point, allocating space for
452 //				the new point.  It returns a pointer to the newly
453 //				allocated copy.
454 //----------------------------------------------------------------------
455 
456 DLL_API ANNdist annDist(
457 	int				dim,		// dimension of space
458 	ANNpoint		p,			// points
459 	ANNpoint		q);
460 
461 DLL_API ANNpoint annAllocPt(
462 	int				dim,		// dimension
463 	ANNcoord		c = 0);		// coordinate value (all equal)
464 
465 DLL_API ANNpointArray annAllocPts(
466 	int				n,			// number of points
467 	int				dim);		// dimension
468 
469 DLL_API void annDeallocPt(
470 	ANNpoint		&p);		// deallocate 1 point
471 
472 DLL_API void annDeallocPts(
473 	ANNpointArray	&pa);		// point array
474 
475 DLL_API ANNpoint annCopyPt(
476 	int				dim,		// dimension
477 	ANNpoint		source);	// point to copy
478 
479 //----------------------------------------------------------------------
480 //Overall structure: ANN supports a number of different data structures
481 //for approximate and exact nearest neighbor searching.  These are:
482 //
483 //		ANNbruteForce	A simple brute-force search structure.
484 //		ANNkd_tree		A kd-tree tree search structure.  ANNbd_tree
485 //		A bd-tree tree search structure (a kd-tree with shrink
486 //		capabilities).
487 //
488 //		At a minimum, each of these data structures support k-nearest
489 //		neighbor queries.  The nearest neighbor query, annkSearch,
490 //		returns an integer identifier and the distance to the nearest
491 //		neighbor(s) and annRangeSearch returns the nearest points that
492 //		lie within a given query ball.
493 //
494 //		Each structure is built by invoking the appropriate constructor
495 //		and passing it (at a minimum) the array of points, the total
496 //		number of points and the dimension of the space.  Each structure
497 //		is also assumed to support a destructor and member functions
498 //		that return basic information about the point set.
499 //
500 //		Note that the array of points is not copied by the data
501 //		structure (for reasons of space efficiency), and it is assumed
502 //		to be constant throughout the lifetime of the search structure.
503 //
504 //		The search algorithm, annkSearch, is given the query point (q),
505 //		and the desired number of nearest neighbors to report (k), and
506 //		the error bound (eps) (whose default value is 0, implying exact
507 //		nearest neighbors).  It returns two arrays which are assumed to
508 //		contain at least k elements: one (nn_idx) contains the indices
509 //		(within the point array) of the nearest neighbors and the other
510 //		(dd) contains the squared distances to these nearest neighbors.
511 //
512 //		The search algorithm, annkFRSearch, is a fixed-radius kNN
513 //		search.  In addition to a query point, it is given a (squared)
514 //		radius bound.  (This is done for consistency, because the search
515 //		returns distances as squared quantities.) It does two things.
516 //		First, it computes the k nearest neighbors within the radius
517 //		bound, and second, it returns the total number of points lying
518 //		within the radius bound. It is permitted to set k = 0, in which
519 //		case it effectively answers a range counting query.  If the
520 //		error bound epsilon is positive, then the search is approximate
521 //		in the sense that it is free to ignore any point that lies
522 //		outside a ball of radius r/(1+epsilon), where r is the given
523 //		(unsquared) radius bound.
524 //
525 //		The generic object from which all the search structures are
526 //		dervied is given below.  It is a virtual object, and is useless
527 //		by itself.
528 //----------------------------------------------------------------------
529 
530 class DLL_API ANNpointSet {
531 public:
~ANNpointSet()532 	virtual ~ANNpointSet() {}			// virtual distructor
533 
534 	virtual void annkSearch(			// approx k near neighbor search
535 		ANNpoint		q,				// query point
536 		int				k,				// number of near neighbors to return
537 		ANNidxArray		nn_idx,			// nearest neighbor array (modified)
538 		ANNdistArray	dd,				// dist to near neighbors (modified)
539 		double			eps=0.0			// error bound
540 		) = 0;							// pure virtual (defined elsewhere)
541 
542 	virtual int annkFRSearch(			// approx fixed-radius kNN search
543 		ANNpoint		q,				// query point
544 		ANNdist			sqRad,			// squared radius
545 		int				k = 0,			// number of near neighbors to return
546 		ANNidxArray		nn_idx = NULL,	// nearest neighbor array (modified)
547 		ANNdistArray	dd = NULL,		// dist to near neighbors (modified)
548 		double			eps=0.0			// error bound
549 		) = 0;							// pure virtual (defined elsewhere)
550 
551 	virtual int theDim() = 0;			// return dimension of space
552 	virtual int nPoints() = 0;			// return number of points
553 										// return pointer to points
554 	virtual ANNpointArray thePoints() = 0;
555 };
556 
557 //----------------------------------------------------------------------
558 //	Brute-force nearest neighbor search:
559 //		The brute-force search structure is very simple but inefficient.
560 //		It has been provided primarily for the sake of comparison with
561 //		and validation of the more complex search structures.
562 //
563 //		Query processing is the same as described above, but the value
564 //		of epsilon is ignored, since all distance calculations are
565 //		performed exactly.
566 //
567 //		WARNING: This data structure is very slow, and should not be
568 //		used unless the number of points is very small.
569 //
570 //		Internal information:
571 //		---------------------
572 //		This data structure bascially consists of the array of points
573 //		(each a pointer to an array of coordinates).  The search is
574 //		performed by a simple linear scan of all the points.
575 //----------------------------------------------------------------------
576 
577 class DLL_API ANNbruteForce: public ANNpointSet {
578 	int				dim;				// dimension
579 	int				n_pts;				// number of points
580 	ANNpointArray	pts;				// point array
581 public:
582 	ANNbruteForce(						// constructor from point array
583 		ANNpointArray	pa,				// point array
584 		int				n,				// number of points
585 		int				dd);			// dimension
586 
587 	~ANNbruteForce();					// destructor
588 
589 	void annkSearch(					// approx k near neighbor search
590 		ANNpoint		q,				// query point
591 		int				k,				// number of near neighbors to return
592 		ANNidxArray		nn_idx,			// nearest neighbor array (modified)
593 		ANNdistArray	dd,				// dist to near neighbors (modified)
594 		double			eps=0.0);		// error bound
595 
596 	int annkFRSearch(					// approx fixed-radius kNN search
597 		ANNpoint		q,				// query point
598 		ANNdist			sqRad,			// squared radius
599 		int				k = 0,			// number of near neighbors to return
600 		ANNidxArray		nn_idx = NULL,	// nearest neighbor array (modified)
601 		ANNdistArray	dd = NULL,		// dist to near neighbors (modified)
602 		double			eps=0.0);		// error bound
603 
theDim()604 	int theDim()						// return dimension of space
605 		{ return dim; }
606 
nPoints()607 	int nPoints()						// return number of points
608 		{ return n_pts; }
609 
thePoints()610 	ANNpointArray thePoints()			// return pointer to points
611 		{  return pts;  }
612 };
613 
614 //----------------------------------------------------------------------
615 // kd- and bd-tree splitting and shrinking rules
616 //		kd-trees supports a collection of different splitting rules.
617 //		In addition to the standard kd-tree splitting rule proposed
618 //		by Friedman, Bentley, and Finkel, we have introduced a
619 //		number of other splitting rules, which seem to perform
620 //		as well or better (for the distributions we have tested).
621 //
622 //		The splitting methods given below allow the user to tailor
623 //		the data structure to the particular data set.  They are
624 //		are described in greater details in the kd_split.cc source
625 //		file.  The method ANN_KD_SUGGEST is the method chosen (rather
626 //		subjectively) by the implementors as the one giving the
627 //		fastest performance, and is the default splitting method.
628 //
629 //		As with splitting rules, there are a number of different
630 //		shrinking rules.  The shrinking rule ANN_BD_NONE does no
631 //		shrinking (and hence produces a kd-tree tree).  The rule
632 //		ANN_BD_SUGGEST uses the implementors favorite rule.
633 //----------------------------------------------------------------------
634 
635 enum ANNsplitRule {
636 		ANN_KD_STD				= 0,	// the optimized kd-splitting rule
637 		ANN_KD_MIDPT			= 1,	// midpoint split
638 		ANN_KD_FAIR				= 2,	// fair split
639 		ANN_KD_SL_MIDPT			= 3,	// sliding midpoint splitting method
640 		ANN_KD_SL_FAIR			= 4,	// sliding fair split method
641 		ANN_KD_SUGGEST			= 5};	// the authors' suggestion for best
642 const int ANN_N_SPLIT_RULES		= 6;	// number of split rules
643 
644 enum ANNshrinkRule {
645 		ANN_BD_NONE				= 0,	// no shrinking at all (just kd-tree)
646 		ANN_BD_SIMPLE			= 1,	// simple splitting
647 		ANN_BD_CENTROID			= 2,	// centroid splitting
648 		ANN_BD_SUGGEST			= 3};	// the authors' suggested choice
649 const int ANN_N_SHRINK_RULES	= 4;	// number of shrink rules
650 
651 //----------------------------------------------------------------------
652 //	kd-tree:
653 //		The main search data structure supported by ANN is a kd-tree.
654 //		The main constructor is given a set of points and a choice of
655 //		splitting method to use in building the tree.
656 //
657 //		Construction:
658 //		-------------
659 //		The constructor is given the point array, number of points,
660 //		dimension, bucket size (default = 1), and the splitting rule
661 //		(default = ANN_KD_SUGGEST).  The point array is not copied, and
662 //		is assumed to be kept constant throughout the lifetime of the
663 //		search structure.  There is also a "load" constructor that
664 //		builds a tree from a file description that was created by the
665 //		Dump operation.
666 //
667 //		Search:
668 //		-------
669 //		There are two search methods:
670 //
671 //			Standard search (annkSearch()):
672 //				Searches nodes in tree-traversal order, always visiting
673 //				the closer child first.
674 //			Priority search (annkPriSearch()):
675 //				Searches nodes in order of increasing distance of the
676 //				associated cell from the query point.  For many
677 //				distributions the standard search seems to work just
678 //				fine, but priority search is safer for worst-case
679 //				performance.
680 //
681 //		Printing:
682 //		---------
683 //		There are two methods provided for printing the tree.  Print()
684 //		is used to produce a "human-readable" display of the tree, with
685 //		indenation, which is handy for debugging.  Dump() produces a
686 //		format that is suitable reading by another program.  There is a
687 //		"load" constructor, which constructs a tree which is assumed to
688 //		have been saved by the Dump() procedure.
689 //
690 //		Performance and Structure Statistics:
691 //		-------------------------------------
692 //		The procedure getStats() collects statistics information on the
693 //		tree (its size, height, etc.)  See ANNperf.h for information on
694 //		the stats structure it returns.
695 //
696 //		Internal information:
697 //		---------------------
698 //		The data structure consists of three major chunks of storage.
699 //		The first (implicit) storage are the points themselves (pts),
700 //		which have been provided by the users as an argument to the
701 //		constructor, or are allocated dynamically if the tree is built
702 //		using the load constructor).  These should not be changed during
703 //		the lifetime of the search structure.  It is the user's
704 //		responsibility to delete these after the tree is destroyed.
705 //
706 //		The second is the tree itself (which is dynamically allocated in
707 //		the constructor) and is given as a pointer to its root node
708 //		(root).  These nodes are automatically deallocated when the tree
709 //		is deleted.  See the file src/kd_tree.h for further information
710 //		on the structure of the tree nodes.
711 //
712 //		Each leaf of the tree does not contain a pointer directly to a
713 //		point, but rather contains a pointer to a "bucket", which is an
714 //		array consisting of point indices.  The third major chunk of
715 //		storage is an array (pidx), which is a large array in which all
716 //		these bucket subarrays reside.  (The reason for storing them
717 //		separately is the buckets are typically small, but of varying
718 //		sizes.  This was done to avoid fragmentation.)  This array is
719 //		also deallocated when the tree is deleted.
720 //
721 //		In addition to this, the tree consists of a number of other
722 //		pieces of information which are used in searching and for
723 //		subsequent tree operations.  These consist of the following:
724 //
725 //		dim						Dimension of space
726 //		n_pts					Number of points currently in the tree
727 //		n_max					Maximum number of points that are allowed
728 //								in the tree
729 //		bkt_size				Maximum bucket size (no. of points per leaf)
730 //		bnd_box_lo				Bounding box low point
731 //		bnd_box_hi				Bounding box high point
732 //		splitRule				Splitting method used
733 //
734 //----------------------------------------------------------------------
735 
736 //----------------------------------------------------------------------
737 // Some types and objects used by kd-tree functions
738 // See src/kd_tree.h and src/kd_tree.cpp for definitions
739 //----------------------------------------------------------------------
740 class ANNkdStats;				// stats on kd-tree
741 class ANNkd_node;				// generic node in a kd-tree
742 typedef ANNkd_node*	ANNkd_ptr;	// pointer to a kd-tree node
743 
744 class DLL_API ANNkd_tree: public ANNpointSet {
745 protected:
746 	int				dim;				// dimension of space
747 	int				n_pts;				// number of points in tree
748 	int				bkt_size;			// bucket size
749 	ANNpointArray	pts;				// the points
750 	ANNidxArray		pidx;				// point indices (to pts array)
751 	ANNkd_ptr		root;				// root of kd-tree
752 	ANNpoint		bnd_box_lo;			// bounding box low point
753 	ANNpoint		bnd_box_hi;			// bounding box high point
754 
755 	void SkeletonTree(					// construct skeleton tree
756 		int				n,				// number of points
757 		int				dd,				// dimension
758 		int				bs,				// bucket size
759 		ANNpointArray pa = NULL,		// point array (optional)
760 		ANNidxArray pi = NULL);			// point indices (optional)
761 
762 public:
763 	ANNkd_tree(							// build skeleton tree
764 		int				n = 0,			// number of points
765 		int				dd = 0,			// dimension
766 		int				bs = 1);		// bucket size
767 
768 	ANNkd_tree(							// build from point array
769 		ANNpointArray	pa,				// point array
770 		int				n,				// number of points
771 		int				dd,				// dimension
772 		int				bs = 1,			// bucket size
773 		ANNsplitRule	split = ANN_KD_SUGGEST);	// splitting method
774 
775 	ANNkd_tree(							// build from dump file
776 		std::istream&	in);			// input stream for dump file
777 
778 	~ANNkd_tree();						// tree destructor
779 
780 	void annkSearch(					// approx k near neighbor search
781 		ANNpoint		q,				// query point
782 		int				k,				// number of near neighbors to return
783 		ANNidxArray		nn_idx,			// nearest neighbor array (modified)
784 		ANNdistArray	dd,				// dist to near neighbors (modified)
785 		double			eps=0.0);		// error bound
786 
787 	void annkPriSearch( 				// priority k near neighbor search
788 		ANNpoint		q,				// query point
789 		int				k,				// number of near neighbors to return
790 		ANNidxArray		nn_idx,			// nearest neighbor array (modified)
791 		ANNdistArray	dd,				// dist to near neighbors (modified)
792 		double			eps=0.0);		// error bound
793 
794 	int annkFRSearch(					// approx fixed-radius kNN search
795 		ANNpoint		q,				// the query point
796 		ANNdist			sqRad,			// squared radius of query ball
797 		int				k,				// number of neighbors to return
798 		ANNidxArray		nn_idx = NULL,	// nearest neighbor array (modified)
799 		ANNdistArray	dd = NULL,		// dist to near neighbors (modified)
800 		double			eps=0.0);		// error bound
801 
theDim()802 	int theDim()						// return dimension of space
803 		{ return dim; }
804 
nPoints()805 	int nPoints()						// return number of points
806 		{ return n_pts; }
807 
thePoints()808 	ANNpointArray thePoints()			// return pointer to points
809 		{  return pts;  }
810 
811 	virtual void Print(					// print the tree (for debugging)
812 		ANNbool			with_pts,		// print points as well?
813 		std::ostream&	out);			// output stream
814 
815 	virtual void Dump(					// dump entire tree
816 		ANNbool			with_pts,		// print points as well?
817 		std::ostream&	out);			// output stream
818 
819 	virtual void getStats(				// compute tree statistics
820 		ANNkdStats&		st);			// the statistics (modified)
821 };
822 
823 //----------------------------------------------------------------------
824 //	Box decomposition tree (bd-tree)
825 //		The bd-tree is inherited from a kd-tree.  The main difference
826 //		in the bd-tree and the kd-tree is a new type of internal node
827 //		called a shrinking node (in the kd-tree there is only one type
828 //		of internal node, a splitting node).  The shrinking node
829 //		makes it possible to generate balanced trees in which the
830 //		cells have bounded aspect ratio, by allowing the decomposition
831 //		to zoom in on regions of dense point concentration.  Although
832 //		this is a nice idea in theory, few point distributions are so
833 //		densely clustered that this is really needed.
834 //----------------------------------------------------------------------
835 
836 class DLL_API ANNbd_tree: public ANNkd_tree {
837 public:
838 	ANNbd_tree(							// build skeleton tree
839 		int				n,				// number of points
840 		int				dd,				// dimension
841 		int				bs = 1)			// bucket size
ANNkd_tree(n,dd,bs)842 		: ANNkd_tree(n, dd, bs) {}		// build base kd-tree
843 
844 	ANNbd_tree(							// build from point array
845 		ANNpointArray	pa,				// point array
846 		int				n,				// number of points
847 		int				dd,				// dimension
848 		int				bs = 1,			// bucket size
849 		ANNsplitRule	split  = ANN_KD_SUGGEST,	// splitting rule
850 		ANNshrinkRule	shrink = ANN_BD_SUGGEST);	// shrinking rule
851 
852 	ANNbd_tree(							// build from dump file
853 		std::istream&	in);			// input stream for dump file
854 };
855 
856 //----------------------------------------------------------------------
857 //	Other functions
858 //	annMaxPtsVisit		Sets a limit on the maximum number of points
859 //						to visit in the search.
860 //  annClose			Can be called when all use of ANN is finished.
861 //						It clears up a minor memory leak.
862 //----------------------------------------------------------------------
863 
864 DLL_API void annMaxPtsVisit(	// max. pts to visit in search
865 	int				maxPts);	// the limit
866 
867 DLL_API void annClose();		// called to end use of ANN
868 
869 #endif
870