1 /**********************************************************************
2 * Software License Agreement (BSD License)
3 *
4 * Copyright 2011 Andreas Muetzel (amuetzel@uni-koblenz.de). All rights reserved.
5 *
6 * THE BSD LICENSE
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 *
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 *************************************************************************/
29 #ifndef FLANN_UTIL_CUDA_RESULTSET_H
30 #define FLANN_UTIL_CUDA_RESULTSET_H
31
32 #include <flann/util/cuda/heap.h>
33 #include <limits>
34
35 __device__ __forceinline__
infinity()36 float infinity()
37 {
38 return __int_as_float(0x7f800000);
39 }
40
41 #ifndef INFINITY
42 #define INFINITY infinity()
43 #endif
44
45 namespace flann
46 {
47 namespace cuda
48 {
49 //! result set for the 1nn search. Doesn't do any global memory accesses on its own,
50 template< typename DistanceType >
51 struct SingleResultSet
52 {
53 int bestIndex;
54 DistanceType bestDist;
55 const DistanceType epsError;
56
57 __device__ __host__
SingleResultSetSingleResultSet58 SingleResultSet( DistanceType eps ) : bestIndex(-1),bestDist(INFINITY), epsError(eps){ }
59
60 __device__
61 inline float
worstDistSingleResultSet62 worstDist()
63 {
64 return bestDist;
65 }
66
67 __device__
68 inline void
insertSingleResultSet69 insert(int index, DistanceType dist)
70 {
71 if( dist <= bestDist ) {
72 bestIndex=index;
73 bestDist=dist;
74 }
75 }
76
77 DistanceType* resultDist;
78 int* resultIndex;
79
80 __device__
81 inline void
setResultLocationSingleResultSet82 setResultLocation( DistanceType* dists, int* index, int thread, int stride )
83 {
84 resultDist=dists+thread*stride;
85 resultIndex=index+thread*stride;
86 if( stride != 1 ) {
87 for( int i=1; i<stride; i++ ) {
88 resultDist[i]=INFINITY;
89 resultIndex[i]=-1;
90 }
91 }
92 }
93
94 __device__
95 inline void
finishSingleResultSet96 finish()
97 {
98 resultDist[0]=bestDist;
99 resultIndex[0]=bestIndex;
100 }
101 };
102
103 template< typename DistanceType >
104 struct GreaterThan
105 {
106 __device__
operatorGreaterThan107 bool operator()(DistanceType a, DistanceType b)
108 {
109 return a>b;
110 }
111 };
112
113
114 // using this and the template uses 2 or 3 registers more than the direct implementation in the kNearestKernel, but
115 // there is no speed difference.
116 // Setting useHeap as a template parameter leads to a whole lot of things being
117 // optimized away by nvcc.
118 // Register counts are the same as when removing not-needed variables in explicit specializations
119 // and the "if( useHeap )" branches are eliminated at compile time.
120 // The downside of this: a bit more complex kernel launch code.
121 template< typename DistanceType, bool useHeap >
122 struct KnnResultSet
123 {
124 int foundNeighbors;
125 DistanceType largestHeapDist;
126 int maxDistIndex;
127 const int k;
128 const bool sorted;
129 const DistanceType epsError;
130
131
132 __device__ __host__
KnnResultSetKnnResultSet133 KnnResultSet(int knn, bool sortResults, DistanceType eps) : foundNeighbors(0),largestHeapDist(INFINITY),k(knn), sorted(sortResults), epsError(eps){ }
134
135 // __host__ __device__
136 // KnnResultSet(const KnnResultSet& o):foundNeighbors(o.foundNeighbors),largestHeapDist(o.largestHeapDist),k(o.k){ }
137
138 __device__
139 inline DistanceType
worstDistKnnResultSet140 worstDist()
141 {
142 return largestHeapDist;
143 }
144
145 __device__
146 inline void
insertKnnResultSet147 insert(int index, DistanceType dist)
148 {
149 if( foundNeighbors<k ) {
150 resultDist[foundNeighbors]=dist;
151 resultIndex[foundNeighbors]=index;
152 if( foundNeighbors==k-1) {
153 if( useHeap ) {
154 flann::cuda::heap::make_heap(resultDist,resultIndex,k,GreaterThan<DistanceType>());
155 largestHeapDist=resultDist[0];
156 }
157 else {
158 findLargestDistIndex();
159 }
160
161 }
162 foundNeighbors++;
163 }
164 else if( dist < largestHeapDist ) {
165 if( useHeap ) {
166 resultDist[0]=dist;
167 resultIndex[0]=index;
168 flann::cuda::heap::sift_down(resultDist,resultIndex,0,k,GreaterThan<DistanceType>());
169 largestHeapDist=resultDist[0];
170 }
171 else {
172 resultDist[maxDistIndex]=dist;
173 resultIndex[maxDistIndex]=index;
174 findLargestDistIndex();
175 }
176
177 }
178 }
179
180 __device__
181 void
findLargestDistIndexKnnResultSet182 findLargestDistIndex( )
183 {
184 largestHeapDist=resultDist[0];
185 maxDistIndex=0;
186 for( int i=1; i<k; i++ )
187 if( resultDist[i] > largestHeapDist ) {
188 maxDistIndex=i;
189 largestHeapDist=resultDist[i];
190 }
191 }
192
193 float* resultDist;
194 int* resultIndex;
195
196 __device__
197 inline void
setResultLocationKnnResultSet198 setResultLocation( DistanceType* dists, int* index, int thread, int stride )
199 {
200 resultDist=dists+stride*thread;
201 resultIndex=index+stride*thread;
202 for( int i=0; i<stride; i++ ) {
203 resultDist[i]=INFINITY;
204 resultIndex[i]=-1;
205 // resultIndex[tid+i*blockDim.x]=-1;
206 // resultDist[tid+i*blockDim.x]=INFINITY;
207 }
208 }
209
210 __host__ __device__
211 inline void
finishKnnResultSet212 finish()
213 {
214 if( sorted ) {
215 if( !useHeap ) flann::cuda::heap::make_heap(resultDist,resultIndex,k,GreaterThan<DistanceType>());
216 for( int i=k-1; i>0; i-- ) {
217 flann::cuda::swap( resultDist[0], resultDist[i] );
218 flann::cuda::swap( resultIndex[0], resultIndex[i] );
219 flann::cuda::heap::sift_down( resultDist,resultIndex, 0, i, GreaterThan<DistanceType>() );
220 }
221 }
222 }
223 };
224
225 template <typename DistanceType>
226 struct CountingRadiusResultSet
227 {
228 int count_;
229 DistanceType radius_sq_;
230 int max_neighbors_;
231
232 __device__ __host__
CountingRadiusResultSetCountingRadiusResultSet233 CountingRadiusResultSet(DistanceType radius, int max_neighbors) : count_(0),radius_sq_(radius), max_neighbors_(max_neighbors){ }
234
235 __device__
236 inline DistanceType
worstDistCountingRadiusResultSet237 worstDist()
238 {
239 return radius_sq_;
240 }
241
242 __device__
243 inline void
insertCountingRadiusResultSet244 insert(int index, float dist)
245 {
246 if( dist < radius_sq_ ) {
247 count_++;
248 }
249 }
250
251 int* resultIndex;
252
253 __device__
254 inline void
setResultLocationCountingRadiusResultSet255 setResultLocation( DistanceType* /*dists*/, int* count, int thread, int stride )
256 {
257 resultIndex=count+thread*stride;
258 }
259
260 __device__
261 inline void
finishCountingRadiusResultSet262 finish()
263 {
264 if(( max_neighbors_<=0) ||( count_<=max_neighbors_) ) resultIndex[0]=count_;
265 else resultIndex[0]=max_neighbors_;
266 }
267 };
268
269 template<typename DistanceType, bool useHeap>
270 struct RadiusKnnResultSet
271 {
272 int foundNeighbors;
273 DistanceType largestHeapDist;
274 int maxDistElem;
275 const int k;
276 const bool sorted;
277 const DistanceType radius_sq_;
278 int* segment_starts_;
279 // int count_;
280
281
282 __device__ __host__
RadiusKnnResultSetRadiusKnnResultSet283 RadiusKnnResultSet(DistanceType radius, int knn, int* segment_starts, bool sortResults) : foundNeighbors(0),largestHeapDist(radius),k(knn), sorted(sortResults), radius_sq_(radius),segment_starts_(segment_starts) { }
284
285 // __host__ __device__
286 // KnnResultSet(const KnnResultSet& o):foundNeighbors(o.foundNeighbors),largestHeapDist(o.largestHeapDist),k(o.k){ }
287
288 __device__
289 inline DistanceType
worstDistRadiusKnnResultSet290 worstDist()
291 {
292 return largestHeapDist;
293 }
294
295 __device__
296 inline void
insertRadiusKnnResultSet297 insert(int index, DistanceType dist)
298 {
299 if( dist < radius_sq_ ) {
300 if( foundNeighbors<k ) {
301 resultDist[foundNeighbors]=dist;
302 resultIndex[foundNeighbors]=index;
303 if(( foundNeighbors==k-1) && useHeap) {
304 if( useHeap ) {
305 flann::cuda::heap::make_heap(resultDist,resultIndex,k,GreaterThan<DistanceType>());
306 largestHeapDist=resultDist[0];
307 }
308 else {
309 findLargestDistIndex();
310 }
311 }
312 foundNeighbors++;
313
314 }
315 else if( dist < largestHeapDist ) {
316 if( useHeap ) {
317 resultDist[0]=dist;
318 resultIndex[0]=index;
319 flann::cuda::heap::sift_down(resultDist,resultIndex,0,k,GreaterThan<DistanceType>());
320 largestHeapDist=resultDist[0];
321 }
322 else {
323 resultDist[maxDistElem]=dist;
324 resultIndex[maxDistElem]=index;
325 findLargestDistIndex();
326 }
327 }
328 }
329 }
330
331 __device__
332 void
findLargestDistIndexRadiusKnnResultSet333 findLargestDistIndex( )
334 {
335 largestHeapDist=resultDist[0];
336 maxDistElem=0;
337 for( int i=1; i<k; i++ )
338 if( resultDist[i] > largestHeapDist ) {
339 maxDistElem=i;
340 largestHeapDist=resultDist[i];
341 }
342 }
343
344
345 DistanceType* resultDist;
346 int* resultIndex;
347
348 __device__
349 inline void
setResultLocationRadiusKnnResultSet350 setResultLocation( DistanceType* dists, int* index, int thread, int /*stride*/ )
351 {
352 resultDist=dists+segment_starts_[thread];
353 resultIndex=index+segment_starts_[thread];
354 }
355
356 __device__
357 inline void
finishRadiusKnnResultSet358 finish()
359 {
360 if( sorted ) {
361 if( !useHeap ) flann::cuda::heap::make_heap(resultDist,resultIndex,k,GreaterThan<DistanceType>());
362 for( int i=foundNeighbors-1; i>0; i-- ) {
363 flann::cuda::swap( resultDist[0], resultDist[i] );
364 flann::cuda::swap( resultIndex[0], resultIndex[i] );
365 flann::cuda::heap::sift_down( resultDist,resultIndex, 0, i, GreaterThan<DistanceType>() );
366 }
367 }
368 }
369 };
370
371 // Difference to RadiusKnnResultSet: Works like KnnResultSet, doesn't pack the results densely (as the RadiusResultSet does)
372 template <typename DistanceType, bool useHeap>
373 struct KnnRadiusResultSet
374 {
375 int foundNeighbors;
376 DistanceType largestHeapDist;
377 int maxDistIndex;
378 const int k;
379 const bool sorted;
380 const DistanceType epsError;
381 const DistanceType radius_sq;
382
383
384 __device__ __host__
KnnRadiusResultSetKnnRadiusResultSet385 KnnRadiusResultSet(int knn, bool sortResults, DistanceType eps, DistanceType radius) : foundNeighbors(0),largestHeapDist(radius),k(knn), sorted(sortResults), epsError(eps),radius_sq(radius){ }
386
387 // __host__ __device__
388 // KnnResultSet(const KnnResultSet& o):foundNeighbors(o.foundNeighbors),largestHeapDist(o.largestHeapDist),k(o.k){ }
389
390 __device__
391 inline DistanceType
worstDistKnnRadiusResultSet392 worstDist()
393 {
394 return largestHeapDist;
395 }
396
397 __device__
398 inline void
insertKnnRadiusResultSet399 insert(int index, DistanceType dist)
400 {
401 if( dist < largestHeapDist ) {
402 if( foundNeighbors<k ) {
403 resultDist[foundNeighbors]=dist;
404 resultIndex[foundNeighbors]=index;
405 if( foundNeighbors==k-1 ) {
406 if( useHeap ) {
407 flann::cuda::heap::make_heap(resultDist,resultIndex,k,GreaterThan<DistanceType>());
408 largestHeapDist=resultDist[0];
409 }
410 else {
411 findLargestDistIndex();
412 }
413 }
414 foundNeighbors++;
415 }
416 else { //if( dist < largestHeapDist )
417 if( useHeap ) {
418 resultDist[0]=dist;
419 resultIndex[0]=index;
420 flann::cuda::heap::sift_down(resultDist,resultIndex,0,k,GreaterThan<DistanceType>());
421 largestHeapDist=resultDist[0];
422 }
423 else {
424 resultDist[maxDistIndex]=dist;
425 resultIndex[maxDistIndex]=index;
426 findLargestDistIndex();
427 }
428 }
429 }
430 }
431 __device__
432 void
findLargestDistIndexKnnRadiusResultSet433 findLargestDistIndex( )
434 {
435 largestHeapDist=resultDist[0];
436 maxDistIndex=0;
437 for( int i=1; i<k; i++ )
438 if( resultDist[i] > largestHeapDist ) {
439 maxDistIndex=i;
440 largestHeapDist=resultDist[i];
441 }
442 }
443
444 DistanceType* resultDist;
445 int* resultIndex;
446
447 __device__
448 inline void
setResultLocationKnnRadiusResultSet449 setResultLocation( DistanceType* dists, int* index, int thread, int stride )
450 {
451 resultDist=dists+stride*thread;
452 resultIndex=index+stride*thread;
453 for( int i=0; i<stride; i++ ) {
454 resultDist[i]=INFINITY;
455 resultIndex[i]=-1;
456 // resultIndex[tid+i*blockDim.x]=-1;
457 // resultDist[tid+i*blockDim.x]=INFINITY;
458 }
459 }
460
461 __device__
462 inline void
finishKnnRadiusResultSet463 finish()
464 {
465 if( sorted ) {
466 if( !useHeap ) flann::cuda::heap::make_heap(resultDist,resultIndex,k,GreaterThan<DistanceType>());
467 for( int i=k-1; i>0; i-- ) {
468 flann::cuda::swap( resultDist[0], resultDist[i] );
469 flann::cuda::swap( resultIndex[0], resultIndex[i] );
470 flann::cuda::heap::sift_down( resultDist,resultIndex, 0, i, GreaterThan<DistanceType>() );
471 }
472 }
473 }
474 };
475
476 //! fills the radius output buffer.
477 //! IMPORTANT ASSERTION: ASSUMES THAT THERE IS ENOUGH SPACE FOR EVERY NEIGHBOR! IF THIS ISN'T
478 //! TRUE, USE KnnRadiusResultSet! (Otherwise, the neighbors of one element might overflow into the next element, or past the buffer.)
479 template< typename DistanceType >
480 struct RadiusResultSet
481 {
482 DistanceType radius_sq_;
483 int* segment_starts_;
484 int count_;
485 bool sorted_;
486
487 __device__ __host__
RadiusResultSetRadiusResultSet488 RadiusResultSet(DistanceType radius, int* segment_starts, bool sorted) : radius_sq_(radius), segment_starts_(segment_starts), count_(0), sorted_(sorted){ }
489
490 __device__
491 inline DistanceType
worstDistRadiusResultSet492 worstDist()
493 {
494 return radius_sq_;
495 }
496
497 __device__
498 inline void
insertRadiusResultSet499 insert(int index, DistanceType dist)
500 {
501 if( dist < radius_sq_ ) {
502 resultIndex[count_]=index;
503 resultDist[count_]=dist;
504 count_++;
505 }
506 }
507
508 int* resultIndex;
509 DistanceType* resultDist;
510
511 __device__
512 inline void
setResultLocationRadiusResultSet513 setResultLocation( DistanceType* dists, int* index, int thread, int /*stride*/ )
514 {
515 resultIndex=index+segment_starts_[thread];
516 resultDist=dists+segment_starts_[thread];
517 }
518
519 __device__
520 inline void
finishRadiusResultSet521 finish()
522 {
523 if( sorted_ ) {
524 flann::cuda::heap::make_heap( resultDist,resultIndex, count_, GreaterThan<DistanceType>() );
525 for( int i=count_-1; i>0; i-- ) {
526 flann::cuda::swap( resultDist[0], resultDist[i] );
527 flann::cuda::swap( resultIndex[0], resultIndex[i] );
528 flann::cuda::heap::sift_down( resultDist,resultIndex, 0, i, GreaterThan<DistanceType>() );
529 }
530 }
531 }
532 };
533 }
534 }
535
536 #endif
537