1 //----------------------------------------------------------------------
2 // File:			ANN.h
3 // Programmer:		Sunil Arya and David Mount
4 // Description:		Basic include file for approximate nearest
5 //					neighbor searching.
6 // Last modified:	01/27/10 (Version 1.1.2)
7 //----------------------------------------------------------------------
8 // Copyright (c) 1997-2010 University of Maryland and Sunil Arya and
9 // David Mount.  All Rights Reserved.
10 //
11 // This software and related documentation is part of the Approximate
12 // Nearest Neighbor Library (ANN).  This software is provided under
13 // the provisions of the Lesser GNU Public License (LGPL).  See the
14 // file ../ReadMe.txt for further information.
15 //
16 // The University of Maryland (U.M.) and the authors make no
17 // representations about the suitability or fitness of this software for
18 // any purpose.  It is provided "as is" without express or implied
19 // warranty.
20 //----------------------------------------------------------------------
21 // History:
22 //	Revision 0.1  03/04/98
23 //		Initial release
24 //	Revision 1.0  04/01/05
25 //		Added copyright and revision information
26 //		Added ANNcoordPrec for coordinate precision.
27 //		Added methods theDim, nPoints, maxPoints, thePoints to ANNpointSet.
28 //		Cleaned up C++ structure for modern compilers
29 //	Revision 1.1  05/03/05
30 //		Added fixed-radius k-NN searching
31 //	Revision 1.1.2  01/27/10
32 //		Fixed minor compilation bugs for new versions of gcc
33 //----------------------------------------------------------------------
34 
35 //----------------------------------------------------------------------
36 // ANN - approximate nearest neighbor searching
37 //	ANN is a library for approximate nearest neighbor searching,
38 //	based on the use of standard and priority search in kd-trees
39 //	and balanced box-decomposition (bbd) trees. Here are some
40 //	references to the main algorithmic techniques used here:
41 //
42 //		kd-trees:
43 //			Friedman, Bentley, and Finkel, ``An algorithm for finding
44 //				best matches in logarithmic expected time,'' ACM
45 //				Transactions on Mathematical Software, 3(3):209-226, 1977.
46 //
47 //		Priority search in kd-trees:
48 //			Arya and Mount, ``Algorithms for fast vector quantization,''
49 //				Proc. of DCC '93: Data Compression Conference, eds. J. A.
50 //				Storer and M. Cohn, IEEE Press, 1993, 381-390.
51 //
52 //		Approximate nearest neighbor search and bbd-trees:
53 //			Arya, Mount, Netanyahu, Silverman, and Wu, ``An optimal
54 //				algorithm for approximate nearest neighbor searching,''
55 //				5th Ann. ACM-SIAM Symposium on Discrete Algorithms,
56 //				1994, 573-582.
57 //----------------------------------------------------------------------
58 
59 #ifndef ANN_H
60 #define ANN_H
61 
62 #ifdef WIN32
63   //----------------------------------------------------------------------
64   // For Microsoft Visual C++, externally accessible symbols must be
65   // explicitly indicated with DLL_API, which is somewhat like "extern."
66   //
67   // The following ifdef block is the standard way of creating macros
68   // which make exporting from a DLL simpler. All files within this DLL
69   // are compiled with the DLL_EXPORTS preprocessor symbol defined on the
70   // command line. In contrast, projects that use (or import) the DLL
71   // objects do not define the DLL_EXPORTS symbol. This way any other
72   // project whose source files include this file see DLL_API functions as
73   // being imported from a DLL, wheras this DLL sees symbols defined with
74   // this macro as being exported.
75   //----------------------------------------------------------------------
76   #ifdef DLL_EXPORTS
77 	 #define DLL_API __declspec(dllexport)
78   #else
79 // BMA: This results in link warnings about local Dakota symbols
80 // hiding those from the library (when building static libs)
81 //	#define DLL_API __declspec(dllimport)
82 // BMA: This doesn't help at all:
83 //	#define DLL_API extern
84 // BMA TODO: Dakota builds static libraries, so this works for our use
85 // case, but may need to make this conditional if migrating to DLL:
86 	#define DLL_API
87   #endif
88   //----------------------------------------------------------------------
89   // DLL_API is ignored for all other systems
90   //----------------------------------------------------------------------
91 #else
92   #define DLL_API
93 #endif
94 
95 //----------------------------------------------------------------------
96 //  basic includes
97 //----------------------------------------------------------------------
98 
99 #include <cstdlib>			// standard lib includes
100 #include <cmath>			// math includes
101 #include <iostream>			// I/O streams
102 #include <cstring>			// C-style strings
103 
104 //----------------------------------------------------------------------
105 // Limits
106 // There are a number of places where we use the maximum double value as
107 // default initializers (and others may be used, depending on the
108 // data/distance representation). These can usually be found in limits.h
109 // (as LONG_MAX, INT_MAX) or in float.h (as DBL_MAX, FLT_MAX).
110 //
111 // Not all systems have these files.  If you are using such a system,
112 // you should set the preprocessor symbol ANN_NO_LIMITS_H when
113 // compiling, and modify the statements below to generate the
114 // appropriate value. For practical purposes, this does not need to be
115 // the maximum double value. It is sufficient that it be at least as
116 // large than the maximum squared distance between between any two
117 // points.
118 //----------------------------------------------------------------------
119 #ifdef ANN_NO_LIMITS_H					// limits.h unavailable
120   #include <cvalues>					// replacement for limits.h
121   const double ANN_DBL_MAX = MAXDOUBLE;	// insert maximum double
122 #else
123   #include <climits>
124   #include <cfloat>
125   const double ANN_DBL_MAX = DBL_MAX;
126 #endif
127 
128 
129 // -----
130 // Runtime norm selector; see included header
131 // -----
132 #define ANN_NORM_SELECT 1
133 #include "ANNnormselect.h"
134 
135 #define ANNversion 		"1.1.2"			// ANN version and information
136 #define ANNversionCmt	""
137 #define ANNcopyright	"David M. Mount and Sunil Arya"
138 #define ANNlatestRev	"Jan 27, 2010"
139 
140 //----------------------------------------------------------------------
141 //	ANNbool
142 //	This is a simple boolean type. Although ANSI C++ is supposed
143 //	to support the type bool, some compilers do not have it.
144 //----------------------------------------------------------------------
145 
146 enum ANNbool {ANNfalse = 0, ANNtrue = 1}; // ANN boolean type (non ANSI C++)
147 
148 //----------------------------------------------------------------------
149 //	ANNcoord, ANNdist
150 //		ANNcoord and ANNdist are the types used for representing
151 //		point coordinates and distances.  They can be modified by the
152 //		user, with some care.  It is assumed that they are both numeric
153 //		types, and that ANNdist is generally of an equal or higher type
154 //		from ANNcoord.	A variable of type ANNdist should be large
155 //		enough to store the sum of squared components of a variable
156 //		of type ANNcoord for the number of dimensions needed in the
157 //		application.  For example, the following combinations are
158 //		legal:
159 //
160 //		ANNcoord		ANNdist
161 //		---------		-------------------------------
162 //		short			short, int, long, float, double
163 //		int				int, long, float, double
164 //		long			long, float, double
165 //		float			float, double
166 //		double			double
167 //
168 //		It is the user's responsibility to make sure that overflow does
169 //		not occur in distance calculation.
170 //----------------------------------------------------------------------
171 
172 typedef double	ANNcoord;				// coordinate data type
173 typedef double	ANNdist;				// distance data type
174 
175 //----------------------------------------------------------------------
176 //	ANNidx
177 //		ANNidx is a point index.  When the data structure is built, the
178 //		points are given as an array.  Nearest neighbor results are
179 //		returned as an integer index into this array.  To make it
180 //		clearer when this is happening, we define the integer type
181 //		ANNidx.	 Indexing starts from 0.
182 //
183 //		For fixed-radius near neighbor searching, it is possible that
184 //		there are not k nearest neighbors within the search radius.  To
185 //		indicate this, the algorithm returns ANN_NULL_IDX as its result.
186 //		It should be distinguishable from any valid array index.
187 //----------------------------------------------------------------------
188 
189 typedef int		ANNidx;					// point index
190 const ANNidx	ANN_NULL_IDX = -1;		// a NULL point index
191 
192 //----------------------------------------------------------------------
193 //	Infinite distance:
194 //		The code assumes that there is an "infinite distance" which it
195 //		uses to initialize distances before performing nearest neighbor
196 //		searches.  It should be as larger or larger than any legitimate
197 //		nearest neighbor distance.
198 //
199 //		On most systems, these should be found in the standard include
200 //		file <limits.h> or possibly <float.h>.  If you do not have these
201 //		file, some suggested values are listed below, assuming 64-bit
202 //		long, 32-bit int and 16-bit short.
203 //
204 //		ANNdist ANN_DIST_INF	Values (see <limits.h> or <float.h>)
205 //		------- ------------	------------------------------------
206 //		double	DBL_MAX			1.79769313486231570e+308
207 //		float	FLT_MAX			3.40282346638528860e+38
208 //		long	LONG_MAX		0x7fffffffffffffff
209 //		int		INT_MAX			0x7fffffff
210 //		short	SHRT_MAX		0x7fff
211 //----------------------------------------------------------------------
212 
213 const ANNdist	ANN_DIST_INF = ANN_DBL_MAX;
214 
215 //----------------------------------------------------------------------
216 //	Significant digits for tree dumps:
217 //		When floating point coordinates are used, the routine that dumps
218 //		a tree needs to know roughly how many significant digits there
219 //		are in a ANNcoord, so it can output points to full precision.
220 //		This is defined to be ANNcoordPrec.  On most systems these
221 //		values can be found in the standard include files <limits.h> or
222 //		<float.h>.  For integer types, the value is essentially ignored.
223 //
224 //		ANNcoord ANNcoordPrec	Values (see <limits.h> or <float.h>)
225 //		-------- ------------	------------------------------------
226 //		double	 DBL_DIG		15
227 //		float	 FLT_DIG		6
228 //		long	 doesn't matter 19
229 //		int		 doesn't matter 10
230 //		short	 doesn't matter 5
231 //----------------------------------------------------------------------
232 
233 #ifdef DBL_DIG							// number of sig. bits in ANNcoord
234 	const int	 ANNcoordPrec	= DBL_DIG;
235 #else
236 	const int	 ANNcoordPrec	= 15;	// default precision
237 #endif
238 
239 //----------------------------------------------------------------------
240 // Self match?
241 //	In some applications, the nearest neighbor of a point is not
242 //	allowed to be the point itself. This occurs, for example, when
243 //	computing all nearest neighbors in a set.  By setting the
244 //	parameter ANN_ALLOW_SELF_MATCH to ANNfalse, the nearest neighbor
245 //	is the closest point whose distance from the query point is
246 //	strictly positive.
247 //----------------------------------------------------------------------
248 
249 const ANNbool	ANN_ALLOW_SELF_MATCH	= ANNtrue;
250 
251 //----------------------------------------------------------------------
252 //	Norms and metrics:
253 //		ANN supports any Minkowski norm for defining distance.  In
254 //		particular, for any p >= 1, the L_p Minkowski norm defines the
255 //		length of a d-vector (v0, v1, ..., v(d-1)) to be
256 //
257 //				(|v0|^p + |v1|^p + ... + |v(d-1)|^p)^(1/p),
258 //
259 //		(where ^ denotes exponentiation, and |.| denotes absolute
260 //		value).  The distance between two points is defined to be the
261 //		norm of the vector joining them.  Some common distance metrics
262 //		include
263 //
264 //				Euclidean metric		p = 2
265 //				Manhattan metric		p = 1
266 //				Max metric				p = infinity
267 //
268 //		In the case of the max metric, the norm is computed by taking
269 //		the maxima of the absolute values of the components.  ANN is
270 //		highly "coordinate-based" and does not support general distances
271 //		functions (e.g. those obeying just the triangle inequality).  It
272 //		also does not support distance functions based on
273 //		inner-products.
274 //
275 //		For the purpose of computing nearest neighbors, it is not
276 //		necessary to compute the final power (1/p).  Thus the only
277 //		component that is used by the program is |v(i)|^p.
278 //
279 //		ANN parameterizes the distance computation through the following
280 //		macros.  (Macros are used rather than procedures for
281 //		efficiency.) Recall that the distance between two points is
282 //		given by the length of the vector joining them, and the length
283 //		or norm of a vector v is given by formula:
284 //
285 //				|v| = ROOT(POW(v0) # POW(v1) # ... # POW(v(d-1)))
286 //
287 //		where ROOT, POW are unary functions and # is an associative and
288 //		commutative binary operator mapping the following types:
289 //
290 //			**	POW:	ANNcoord				--> ANNdist
291 //			**	#:		ANNdist x ANNdist		--> ANNdist
292 //			**	ROOT:	ANNdist (>0)			--> double
293 //
294 //		For early termination in distance calculation (partial distance
295 //		calculation) we assume that POW and # together are monotonically
296 //		increasing on sequences of arguments, meaning that for all
297 //		v0..vk and y:
298 //
299 //		POW(v0) #...# POW(vk) <= (POW(v0) #...# POW(vk)) # POW(y).
300 //
301 //	Incremental Distance Calculation:
302 //		The program uses an optimized method of computing distances for
303 //		kd-trees and bd-trees, called incremental distance calculation.
304 //		It is used when distances are to be updated when only a single
305 //		coordinate of a point has been changed.  In order to use this,
306 //		we assume that there is an incremental update function DIFF(x,y)
307 //		for #, such that if:
308 //
309 //					s = x0 # ... # xi # ... # xk
310 //
311 //		then if s' is equal to s but with xi replaced by y, that is,
312 //
313 //					s' = x0 # ... # y # ... # xk
314 //
315 //		then the length of s' can be computed by:
316 //
317 //					|s'| = |s| # DIFF(xi,y).
318 //
319 //		Thus, if # is + then DIFF(xi,y) is (yi-x).  For the L_infinity
320 //		norm we make use of the fact that in the program this function
321 //		is only invoked when y > xi, and hence DIFF(xi,y)=y.
322 //
323 //		Finally, for approximate nearest neighbor queries we assume
324 //		that POW and ROOT are related such that
325 //
326 //					v*ROOT(x) = ROOT(POW(v)*x)
327 //
328 //		Here are the values for the various Minkowski norms:
329 //
330 //		L_p:	p even:							p odd:
331 //				-------------------------		------------------------
332 //				POW(v)			= v^p			POW(v)			= |v|^p
333 //				ROOT(x)			= x^(1/p)		ROOT(x)			= x^(1/p)
334 //				#				= +				#				= +
335 //				DIFF(x,y)		= y - x			DIFF(x,y)		= y - x
336 //
337 //		L_inf:
338 //				POW(v)			= |v|
339 //				ROOT(x)			= x
340 //				#				= max
341 //				DIFF(x,y)		= y
342 //
343 //		By default the Euclidean norm is assumed.  To change the norm,
344 //		uncomment the appropriate set of macros below.
345 //----------------------------------------------------------------------
346 
347 #ifdef ANN_NORM_SELECT
348 
349 #define ANN_POW(v)	(approxnn::normSelector::instance().pow((v)))
350 #define ANN_ROOT(x)	approxnn::normSelector::instance().root((x))
351 #define ANN_SUM(x,y)	(approxnn::normSelector::instance().sum((x),(y)))
352 #define ANN_DIFF(x,y)   (approxnn::normSelector::instance().diff((x),(y)))
353 
354 #else
355 
356 //----------------------------------------------------------------------
357 //	Use the following for the Euclidean norm
358 //----------------------------------------------------------------------
359 #define ANN_POW(v)			((v)*(v))
360 #define ANN_ROOT(x)			sqrt(x)
361 #define ANN_SUM(x,y)		((x) + (y))
362 #define ANN_DIFF(x,y)		((y) - (x))
363 
364 //----------------------------------------------------------------------
365 //	Use the following for the L_1 (Manhattan) norm
366 //----------------------------------------------------------------------
367 // #define ANN_POW(v)		fabs(v)
368 // #define ANN_ROOT(x)		(x)
369 // #define ANN_SUM(x,y)		((x) + (y))
370 // #define ANN_DIFF(x,y)	((y) - (x))
371 
372 //----------------------------------------------------------------------
373 //	Use the following for a general L_p norm
374 //----------------------------------------------------------------------
375 // #define ANN_POW(v)		pow(fabs(v),p)
376 // #define ANN_ROOT(x)		pow(fabs(x),1/p)
377 // #define ANN_SUM(x,y)		((x) + (y))
378 // #define ANN_DIFF(x,y)	((y) - (x))
379 
380 //----------------------------------------------------------------------
381 //	Use the following for the L_infinity (Max) norm
382 //----------------------------------------------------------------------
383 // #define ANN_POW(v)		fabs(v)
384 // #define ANN_ROOT(x)		(x)
385 // #define ANN_SUM(x,y)		((x) > (y) ? (x) : (y))
386 // #define ANN_DIFF(x,y)	(y)
387 
388 #endif  // DAKOTA_ANN_NORM_SELECTOR
389 
390 //----------------------------------------------------------------------
391 //	Array types
392 //		The following array types are of basic interest.  A point is
393 //		just a dimensionless array of coordinates, a point array is a
394 //		dimensionless array of points.  A distance array is a
395 //		dimensionless array of distances and an index array is a
396 //		dimensionless array of point indices.  The latter two are used
397 //		when returning the results of k-nearest neighbor queries.
398 //----------------------------------------------------------------------
399 
400 typedef ANNcoord* ANNpoint;			// a point
401 typedef ANNpoint* ANNpointArray;	// an array of points
402 typedef ANNdist*  ANNdistArray;		// an array of distances
403 typedef ANNidx*   ANNidxArray;		// an array of point indices
404 
405 //----------------------------------------------------------------------
406 //	Basic point and array utilities:
407 //		The following procedures are useful supplements to ANN's nearest
408 //		neighbor capabilities.
409 //
410 //		annDist():
411 //			Computes the (squared) distance between a pair of points.
412 //			Note that this routine is not used internally by ANN for
413 //			computing distance calculations.  For reasons of efficiency
414 //			this is done using incremental distance calculation.  Thus,
415 //			this routine cannot be modified as a method of changing the
416 //			metric.
417 //
418 //		Because points (somewhat like strings in C) are stored as
419 //		pointers.  Consequently, creating and destroying copies of
420 //		points may require storage allocation.  These procedures do
421 //		this.
422 //
423 //		annAllocPt() and annDeallocPt():
424 //				Allocate a deallocate storage for a single point, and
425 //				return a pointer to it.  The argument to AllocPt() is
426 //				used to initialize all components.
427 //
428 //		annAllocPts() and annDeallocPts():
429 //				Allocate and deallocate an array of points as well a
430 //				place to store their coordinates, and initializes the
431 //				points to point to their respective coordinates.  It
432 //				allocates point storage in a contiguous block large
433 //				enough to store all the points.  It performs no
434 //				initialization.
435 //
436 //		annCopyPt():
437 //				Creates a copy of a given point, allocating space for
438 //				the new point.  It returns a pointer to the newly
439 //				allocated copy.
440 //----------------------------------------------------------------------
441 
442 DLL_API ANNdist annDist(
443 	int				dim,		// dimension of space
444 	ANNpoint		p,			// points
445 	ANNpoint		q);
446 
447 DLL_API ANNpoint annAllocPt(
448 	int				dim,		// dimension
449 	ANNcoord		c = 0);		// coordinate value (all equal)
450 
451 DLL_API ANNpointArray annAllocPts(
452 	int				n,			// number of points
453 	int				dim);		// dimension
454 
455 DLL_API void annDeallocPt(
456 	ANNpoint		&p);		// deallocate 1 point
457 
458 DLL_API void annDeallocPts(
459 	ANNpointArray	&pa);		// point array
460 
461 DLL_API ANNpoint annCopyPt(
462 	int				dim,		// dimension
463 	ANNpoint		source);	// point to copy
464 
465 //----------------------------------------------------------------------
466 //Overall structure: ANN supports a number of different data structures
467 //for approximate and exact nearest neighbor searching.  These are:
468 //
469 //		ANNbruteForce	A simple brute-force search structure.
470 //		ANNkd_tree		A kd-tree tree search structure.  ANNbd_tree
471 //		A bd-tree tree search structure (a kd-tree with shrink
472 //		capabilities).
473 //
474 //		At a minimum, each of these data structures support k-nearest
475 //		neighbor queries.  The nearest neighbor query, annkSearch,
476 //		returns an integer identifier and the distance to the nearest
477 //		neighbor(s) and annRangeSearch returns the nearest points that
478 //		lie within a given query ball.
479 //
480 //		Each structure is built by invoking the appropriate constructor
481 //		and passing it (at a minimum) the array of points, the total
482 //		number of points and the dimension of the space.  Each structure
483 //		is also assumed to support a destructor and member functions
484 //		that return basic information about the point set.
485 //
486 //		Note that the array of points is not copied by the data
487 //		structure (for reasons of space efficiency), and it is assumed
488 //		to be constant throughout the lifetime of the search structure.
489 //
490 //		The search algorithm, annkSearch, is given the query point (q),
491 //		and the desired number of nearest neighbors to report (k), and
492 //		the error bound (eps) (whose default value is 0, implying exact
493 //		nearest neighbors).  It returns two arrays which are assumed to
494 //		contain at least k elements: one (nn_idx) contains the indices
495 //		(within the point array) of the nearest neighbors and the other
496 //		(dd) contains the squared distances to these nearest neighbors.
497 //
498 //		The search algorithm, annkFRSearch, is a fixed-radius kNN
499 //		search.  In addition to a query point, it is given a (squared)
500 //		radius bound.  (This is done for consistency, because the search
501 //		returns distances as squared quantities.) It does two things.
502 //		First, it computes the k nearest neighbors within the radius
503 //		bound, and second, it returns the total number of points lying
504 //		within the radius bound. It is permitted to set k = 0, in which
505 //		case it effectively answers a range counting query.  If the
506 //		error bound epsilon is positive, then the search is approximate
507 //		in the sense that it is free to ignore any point that lies
508 //		outside a ball of radius r/(1+epsilon), where r is the given
509 //		(unsquared) radius bound.
510 //
511 //		The generic object from which all the search structures are
512 //		dervied is given below.  It is a virtual object, and is useless
513 //		by itself.
514 //----------------------------------------------------------------------
515 
516 class DLL_API ANNpointSet {
517 public:
~ANNpointSet()518 	virtual ~ANNpointSet() {}			// virtual distructor
519 
520 	virtual void annkSearch(			// approx k near neighbor search
521 		ANNpoint		q,				// query point
522 		int				k,				// number of near neighbors to return
523 		ANNidxArray		nn_idx,			// nearest neighbor array (modified)
524 		ANNdistArray	dd,				// dist to near neighbors (modified)
525 		double			eps=0.0			// error bound
526 		) = 0;							// pure virtual (defined elsewhere)
527 
528 	virtual int annkFRSearch(			// approx fixed-radius kNN search
529 		ANNpoint		q,				// query point
530 		ANNdist			sqRad,			// squared radius
531 		int				k = 0,			// number of near neighbors to return
532 		ANNidxArray		nn_idx = NULL,	// nearest neighbor array (modified)
533 		ANNdistArray	dd = NULL,		// dist to near neighbors (modified)
534 		double			eps=0.0			// error bound
535 		) = 0;							// pure virtual (defined elsewhere)
536 
537 	virtual int theDim() = 0;			// return dimension of space
538 	virtual int nPoints() = 0;			// return number of points
539 										// return pointer to points
540 	virtual ANNpointArray thePoints() = 0;
541 };
542 
543 //----------------------------------------------------------------------
544 //	Brute-force nearest neighbor search:
545 //		The brute-force search structure is very simple but inefficient.
546 //		It has been provided primarily for the sake of comparison with
547 //		and validation of the more complex search structures.
548 //
549 //		Query processing is the same as described above, but the value
550 //		of epsilon is ignored, since all distance calculations are
551 //		performed exactly.
552 //
553 //		WARNING: This data structure is very slow, and should not be
554 //		used unless the number of points is very small.
555 //
556 //		Internal information:
557 //		---------------------
558 //		This data structure bascially consists of the array of points
559 //		(each a pointer to an array of coordinates).  The search is
560 //		performed by a simple linear scan of all the points.
561 //----------------------------------------------------------------------
562 
563 class DLL_API ANNbruteForce: public ANNpointSet {
564 	int				dim;				// dimension
565 	int				n_pts;				// number of points
566 	ANNpointArray	pts;				// point array
567 public:
568 	ANNbruteForce(						// constructor from point array
569 		ANNpointArray	pa,				// point array
570 		int				n,				// number of points
571 		int				dd);			// dimension
572 
573 	~ANNbruteForce();					// destructor
574 
575 	void annkSearch(					// approx k near neighbor search
576 		ANNpoint		q,				// query point
577 		int				k,				// number of near neighbors to return
578 		ANNidxArray		nn_idx,			// nearest neighbor array (modified)
579 		ANNdistArray	dd,				// dist to near neighbors (modified)
580 		double			eps=0.0);		// error bound
581 
582 	int annkFRSearch(					// approx fixed-radius kNN search
583 		ANNpoint		q,				// query point
584 		ANNdist			sqRad,			// squared radius
585 		int				k = 0,			// number of near neighbors to return
586 		ANNidxArray		nn_idx = NULL,	// nearest neighbor array (modified)
587 		ANNdistArray	dd = NULL,		// dist to near neighbors (modified)
588 		double			eps=0.0);		// error bound
589 
theDim()590 	int theDim()						// return dimension of space
591 		{ return dim; }
592 
nPoints()593 	int nPoints()						// return number of points
594 		{ return n_pts; }
595 
thePoints()596 	ANNpointArray thePoints()			// return pointer to points
597 		{  return pts;  }
598 };
599 
600 //----------------------------------------------------------------------
601 // kd- and bd-tree splitting and shrinking rules
602 //		kd-trees supports a collection of different splitting rules.
603 //		In addition to the standard kd-tree splitting rule proposed
604 //		by Friedman, Bentley, and Finkel, we have introduced a
605 //		number of other splitting rules, which seem to perform
606 //		as well or better (for the distributions we have tested).
607 //
608 //		The splitting methods given below allow the user to tailor
609 //		the data structure to the particular data set.  They are
610 //		are described in greater details in the kd_split.cc source
611 //		file.  The method ANN_KD_SUGGEST is the method chosen (rather
612 //		subjectively) by the implementors as the one giving the
613 //		fastest performance, and is the default splitting method.
614 //
615 //		As with splitting rules, there are a number of different
616 //		shrinking rules.  The shrinking rule ANN_BD_NONE does no
617 //		shrinking (and hence produces a kd-tree tree).  The rule
618 //		ANN_BD_SUGGEST uses the implementors favorite rule.
619 //----------------------------------------------------------------------
620 
621 enum ANNsplitRule {
622 		ANN_KD_STD				= 0,	// the optimized kd-splitting rule
623 		ANN_KD_MIDPT			= 1,	// midpoint split
624 		ANN_KD_FAIR				= 2,	// fair split
625 		ANN_KD_SL_MIDPT			= 3,	// sliding midpoint splitting method
626 		ANN_KD_SL_FAIR			= 4,	// sliding fair split method
627 		ANN_KD_SUGGEST			= 5};	// the authors' suggestion for best
628 const int ANN_N_SPLIT_RULES		= 6;	// number of split rules
629 
630 enum ANNshrinkRule {
631 		ANN_BD_NONE				= 0,	// no shrinking at all (just kd-tree)
632 		ANN_BD_SIMPLE			= 1,	// simple splitting
633 		ANN_BD_CENTROID			= 2,	// centroid splitting
634 		ANN_BD_SUGGEST			= 3};	// the authors' suggested choice
635 const int ANN_N_SHRINK_RULES	= 4;	// number of shrink rules
636 
637 //----------------------------------------------------------------------
638 //	kd-tree:
639 //		The main search data structure supported by ANN is a kd-tree.
640 //		The main constructor is given a set of points and a choice of
641 //		splitting method to use in building the tree.
642 //
643 //		Construction:
644 //		-------------
645 //		The constructor is given the point array, number of points,
646 //		dimension, bucket size (default = 1), and the splitting rule
647 //		(default = ANN_KD_SUGGEST).  The point array is not copied, and
648 //		is assumed to be kept constant throughout the lifetime of the
649 //		search structure.  There is also a "load" constructor that
650 //		builds a tree from a file description that was created by the
651 //		Dump operation.
652 //
653 //		Search:
654 //		-------
655 //		There are two search methods:
656 //
657 //			Standard search (annkSearch()):
658 //				Searches nodes in tree-traversal order, always visiting
659 //				the closer child first.
660 //			Priority search (annkPriSearch()):
661 //				Searches nodes in order of increasing distance of the
662 //				associated cell from the query point.  For many
663 //				distributions the standard search seems to work just
664 //				fine, but priority search is safer for worst-case
665 //				performance.
666 //
667 //		Printing:
668 //		---------
669 //		There are two methods provided for printing the tree.  Print()
670 //		is used to produce a "human-readable" display of the tree, with
671 //		indenation, which is handy for debugging.  Dump() produces a
672 //		format that is suitable reading by another program.  There is a
673 //		"load" constructor, which constructs a tree which is assumed to
674 //		have been saved by the Dump() procedure.
675 //
676 //		Performance and Structure Statistics:
677 //		-------------------------------------
678 //		The procedure getStats() collects statistics information on the
679 //		tree (its size, height, etc.)  See ANNperf.h for information on
680 //		the stats structure it returns.
681 //
682 //		Internal information:
683 //		---------------------
684 //		The data structure consists of three major chunks of storage.
685 //		The first (implicit) storage are the points themselves (pts),
686 //		which have been provided by the users as an argument to the
687 //		constructor, or are allocated dynamically if the tree is built
688 //		using the load constructor).  These should not be changed during
689 //		the lifetime of the search structure.  It is the user's
690 //		responsibility to delete these after the tree is destroyed.
691 //
692 //		The second is the tree itself (which is dynamically allocated in
693 //		the constructor) and is given as a pointer to its root node
694 //		(root).  These nodes are automatically deallocated when the tree
695 //		is deleted.  See the file src/kd_tree.h for further information
696 //		on the structure of the tree nodes.
697 //
698 //		Each leaf of the tree does not contain a pointer directly to a
699 //		point, but rather contains a pointer to a "bucket", which is an
700 //		array consisting of point indices.  The third major chunk of
701 //		storage is an array (pidx), which is a large array in which all
702 //		these bucket subarrays reside.  (The reason for storing them
703 //		separately is the buckets are typically small, but of varying
704 //		sizes.  This was done to avoid fragmentation.)  This array is
705 //		also deallocated when the tree is deleted.
706 //
707 //		In addition to this, the tree consists of a number of other
708 //		pieces of information which are used in searching and for
709 //		subsequent tree operations.  These consist of the following:
710 //
711 //		dim						Dimension of space
712 //		n_pts					Number of points currently in the tree
713 //		n_max					Maximum number of points that are allowed
714 //								in the tree
715 //		bkt_size				Maximum bucket size (no. of points per leaf)
716 //		bnd_box_lo				Bounding box low point
717 //		bnd_box_hi				Bounding box high point
718 //		splitRule				Splitting method used
719 //
720 //----------------------------------------------------------------------
721 
722 //----------------------------------------------------------------------
723 // Some types and objects used by kd-tree functions
724 // See src/kd_tree.h and src/kd_tree.cpp for definitions
725 //----------------------------------------------------------------------
726 class ANNkdStats;				// stats on kd-tree
727 class ANNkd_node;				// generic node in a kd-tree
728 typedef ANNkd_node*	ANNkd_ptr;	// pointer to a kd-tree node
729 
730 class DLL_API ANNkd_tree: public ANNpointSet {
731 protected:
732 	int				dim;				// dimension of space
733 	int				n_pts;				// number of points in tree
734 	int				bkt_size;			// bucket size
735 	ANNpointArray	pts;				// the points
736 	ANNidxArray		pidx;				// point indices (to pts array)
737 	ANNkd_ptr		root;				// root of kd-tree
738 	ANNpoint		bnd_box_lo;			// bounding box low point
739 	ANNpoint		bnd_box_hi;			// bounding box high point
740 
741 	void SkeletonTree(					// construct skeleton tree
742 		int				n,				// number of points
743 		int				dd,				// dimension
744 		int				bs,				// bucket size
745 		ANNpointArray pa = NULL,		// point array (optional)
746 		ANNidxArray pi = NULL);			// point indices (optional)
747 
748 public:
749 	ANNkd_tree(							// build skeleton tree
750 		int				n = 0,			// number of points
751 		int				dd = 0,			// dimension
752 		int				bs = 1);		// bucket size
753 
754 	ANNkd_tree(							// build from point array
755 		ANNpointArray	pa,				// point array
756 		int				n,				// number of points
757 		int				dd,				// dimension
758 		int				bs = 1,			// bucket size
759 		ANNsplitRule	split = ANN_KD_SUGGEST);	// splitting method
760 
761 	ANNkd_tree(							// build from dump file
762 		std::istream&	in);			// input stream for dump file
763 
764 	~ANNkd_tree();						// tree destructor
765 
766 	void annkSearch(					// approx k near neighbor search
767 		ANNpoint		q,				// query point
768 		int				k,				// number of near neighbors to return
769 		ANNidxArray		nn_idx,			// nearest neighbor array (modified)
770 		ANNdistArray	dd,				// dist to near neighbors (modified)
771 		double			eps=0.0);		// error bound
772 
773 	void annkPriSearch( 				// priority k near neighbor search
774 		ANNpoint		q,				// query point
775 		int				k,				// number of near neighbors to return
776 		ANNidxArray		nn_idx,			// nearest neighbor array (modified)
777 		ANNdistArray	dd,				// dist to near neighbors (modified)
778 		double			eps=0.0);		// error bound
779 
780 	int annkFRSearch(					// approx fixed-radius kNN search
781 		ANNpoint		q,				// the query point
782 		ANNdist			sqRad,			// squared radius of query ball
783 		int				k,				// number of neighbors to return
784 		ANNidxArray		nn_idx = NULL,	// nearest neighbor array (modified)
785 		ANNdistArray	dd = NULL,		// dist to near neighbors (modified)
786 		double			eps=0.0);		// error bound
787 
theDim()788 	int theDim()						// return dimension of space
789 		{ return dim; }
790 
nPoints()791 	int nPoints()						// return number of points
792 		{ return n_pts; }
793 
thePoints()794 	ANNpointArray thePoints()			// return pointer to points
795 		{  return pts;  }
796 
797 	virtual void Print(					// print the tree (for debugging)
798 		ANNbool			with_pts,		// print points as well?
799 		std::ostream&	out);			// output stream
800 
801 	virtual void Dump(					// dump entire tree
802 		ANNbool			with_pts,		// print points as well?
803 		std::ostream&	out);			// output stream
804 
805 	virtual void getStats(				// compute tree statistics
806 		ANNkdStats&		st);			// the statistics (modified)
807 };
808 
809 //----------------------------------------------------------------------
810 //	Box decomposition tree (bd-tree)
811 //		The bd-tree is inherited from a kd-tree.  The main difference
812 //		in the bd-tree and the kd-tree is a new type of internal node
813 //		called a shrinking node (in the kd-tree there is only one type
814 //		of internal node, a splitting node).  The shrinking node
815 //		makes it possible to generate balanced trees in which the
816 //		cells have bounded aspect ratio, by allowing the decomposition
817 //		to zoom in on regions of dense point concentration.  Although
818 //		this is a nice idea in theory, few point distributions are so
819 //		densely clustered that this is really needed.
820 //----------------------------------------------------------------------
821 
822 class DLL_API ANNbd_tree: public ANNkd_tree {
823 public:
824 	ANNbd_tree(							// build skeleton tree
825 		int				n,				// number of points
826 		int				dd,				// dimension
827 		int				bs = 1)			// bucket size
ANNkd_tree(n,dd,bs)828 		: ANNkd_tree(n, dd, bs) {}		// build base kd-tree
829 
830 	ANNbd_tree(							// build from point array
831 		ANNpointArray	pa,				// point array
832 		int				n,				// number of points
833 		int				dd,				// dimension
834 		int				bs = 1,			// bucket size
835 		ANNsplitRule	split  = ANN_KD_SUGGEST,	// splitting rule
836 		ANNshrinkRule	shrink = ANN_BD_SUGGEST);	// shrinking rule
837 
838 	ANNbd_tree(							// build from dump file
839 		std::istream&	in);			// input stream for dump file
840 };
841 
842 //----------------------------------------------------------------------
843 //	Other functions
844 //	annMaxPtsVisit		Sets a limit on the maximum number of points
845 //						to visit in the search.
846 //  annClose			Can be called when all use of ANN is finished.
847 //						It clears up a minor memory leak.
848 //----------------------------------------------------------------------
849 
850 DLL_API void annMaxPtsVisit(	// max. pts to visit in search
851 	int				maxPts);	// the limit
852 
853 DLL_API void annClose();		// called to end use of ANN
854 
855 #endif
856