1 /***********************************************************************
2  * Software License Agreement (BSD License)
3  *
4  * Copyright 2008-2009  Marius Muja (mariusm@cs.ubc.ca). All rights reserved.
5  * Copyright 2008-2009  David G. Lowe (lowe@cs.ubc.ca). All rights reserved.
6  *
7  * THE BSD LICENSE
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  *
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
20  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
21  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
22  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
23  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
24  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
28  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29  *************************************************************************/
30 
31 #ifndef FLANN_DIST_H_
32 #define FLANN_DIST_H_
33 
34 #include <cmath>
35 #include <cstdlib>
36 #include <string.h>
37 #ifdef _MSC_VER
38 typedef unsigned __int32 uint32_t;
39 typedef unsigned __int64 uint64_t;
40 #else
41 #include <stdint.h>
42 #endif
43 
44 #include "flann/defines.h"
45 
46 
47 namespace flann
48 {
49 
50 template<typename T>
51 struct Accumulator { typedef T Type; };
52 template<>
53 struct Accumulator<unsigned char>  { typedef float Type; };
54 template<>
55 struct Accumulator<unsigned short> { typedef float Type; };
56 template<>
57 struct Accumulator<unsigned int> { typedef float Type; };
58 template<>
59 struct Accumulator<char>   { typedef float Type; };
60 template<>
61 struct Accumulator<short>  { typedef float Type; };
62 template<>
63 struct Accumulator<int> { typedef float Type; };
64 
65 
66 
67 /**
68  * Squared Euclidean distance functor.
69  *
70  * This is the simpler, unrolled version. This is preferable for
71  * very low dimensionality data (eg 3D points)
72  */
73 template<class T>
74 struct L2_Simple
75 {
76     typedef bool is_kdtree_distance;
77 
78     typedef T ElementType;
79     typedef typename Accumulator<T>::Type ResultType;
80 
81     template <typename Iterator1, typename Iterator2>
82     ResultType operator()(Iterator1 a, Iterator2 b, size_t size, ResultType /*worst_dist*/ = -1) const
83     {
84         ResultType result = ResultType();
85         ResultType diff;
86         for(size_t i = 0; i < size; ++i ) {
87             diff = *a++ - *b++;
88             result += diff*diff;
89         }
90         return result;
91     }
92 
93     template <typename U, typename V>
94     inline ResultType accum_dist(const U& a, const V& b, int) const
95     {
96         return (a-b)*(a-b);
97     }
98 };
99 
100 template<class T>
101 struct L2_3D
102 {
103     typedef bool is_kdtree_distance;
104 
105     typedef T ElementType;
106     typedef typename Accumulator<T>::Type ResultType;
107 
108     template <typename Iterator1, typename Iterator2>
109     ResultType operator()(Iterator1 a, Iterator2 b, size_t size, ResultType /*worst_dist*/ = -1) const
110     {
111         ResultType result = ResultType();
112         ResultType diff;
113         diff = *a++ - *b++;
114         result += diff*diff;
115         diff = *a++ - *b++;
116         result += diff*diff;
117         diff = *a++ - *b++;
118         result += diff*diff;
119         return result;
120     }
121 
122     template <typename U, typename V>
123     inline ResultType accum_dist(const U& a, const V& b, int) const
124     {
125         return (a-b)*(a-b);
126     }
127 };
128 
129 /**
130  * Squared Euclidean distance functor, optimized version
131  */
132 template<class T>
133 struct L2
134 {
135     typedef bool is_kdtree_distance;
136 
137     typedef T ElementType;
138     typedef typename Accumulator<T>::Type ResultType;
139 
140     /**
141      *  Compute the squared Euclidean distance between two vectors.
142      *
143      *	This is highly optimised, with loop unrolling, as it is one
144      *	of the most expensive inner loops.
145      *
146      *	The computation of squared root at the end is omitted for
147      *	efficiency.
148      */
149     template <typename Iterator1, typename Iterator2>
150     ResultType operator()(Iterator1 a, Iterator2 b, size_t size, ResultType worst_dist = -1) const
151     {
152         ResultType result = ResultType();
153         ResultType diff0, diff1, diff2, diff3;
154         Iterator1 last = a + size;
155         Iterator1 lastgroup = last - 3;
156 
157         /* Process 4 items with each loop for efficiency. */
158         while (a < lastgroup) {
159             diff0 = (ResultType)(a[0] - b[0]);
160             diff1 = (ResultType)(a[1] - b[1]);
161             diff2 = (ResultType)(a[2] - b[2]);
162             diff3 = (ResultType)(a[3] - b[3]);
163             result += diff0 * diff0 + diff1 * diff1 + diff2 * diff2 + diff3 * diff3;
164             a += 4;
165             b += 4;
166 
167             if ((worst_dist>0)&&(result>worst_dist)) {
168                 return result;
169             }
170         }
171         /* Process last 0-3 pixels.  Not needed for standard vector lengths. */
172         while (a < last) {
173             diff0 = (ResultType)(*a++ - *b++);
174             result += diff0 * diff0;
175         }
176         return result;
177     }
178 
179     /**
180      *	Partial euclidean distance, using just one dimension. This is used by the
181      *	kd-tree when computing partial distances while traversing the tree.
182      *
183      *	Squared root is omitted for efficiency.
184      */
185     template <typename U, typename V>
186     inline ResultType accum_dist(const U& a, const V& b, int) const
187     {
188         return (a-b)*(a-b);
189     }
190 };
191 
192 
193 /*
194  * Manhattan distance functor, optimized version
195  */
196 template<class T>
197 struct L1
198 {
199     typedef bool is_kdtree_distance;
200 
201     typedef T ElementType;
202     typedef typename Accumulator<T>::Type ResultType;
203 
204     /**
205      *  Compute the Manhattan (L_1) distance between two vectors.
206      *
207      *	This is highly optimised, with loop unrolling, as it is one
208      *	of the most expensive inner loops.
209      */
210     template <typename Iterator1, typename Iterator2>
211     ResultType operator()(Iterator1 a, Iterator2 b, size_t size, ResultType worst_dist = -1) const
212     {
213         ResultType result = ResultType();
214         ResultType diff0, diff1, diff2, diff3;
215         Iterator1 last = a + size;
216         Iterator1 lastgroup = last - 3;
217 
218         /* Process 4 items with each loop for efficiency. */
219         while (a < lastgroup) {
220             diff0 = (ResultType)std::abs(a[0] - b[0]);
221             diff1 = (ResultType)std::abs(a[1] - b[1]);
222             diff2 = (ResultType)std::abs(a[2] - b[2]);
223             diff3 = (ResultType)std::abs(a[3] - b[3]);
224             result += diff0 + diff1 + diff2 + diff3;
225             a += 4;
226             b += 4;
227 
228             if ((worst_dist>0)&&(result>worst_dist)) {
229                 return result;
230             }
231         }
232         /* Process last 0-3 pixels.  Not needed for standard vector lengths. */
233         while (a < last) {
234             diff0 = (ResultType)std::abs(*a++ - *b++);
235             result += diff0;
236         }
237         return result;
238     }
239 
240     /**
241      * Partial distance, used by the kd-tree.
242      */
243     template <typename U, typename V>
244     inline ResultType accum_dist(const U& a, const V& b, int) const
245     {
246         return std::abs(a-b);
247     }
248 };
249 
250 
251 
252 template<class T>
253 struct MinkowskiDistance
254 {
255     typedef bool is_kdtree_distance;
256 
257     typedef T ElementType;
258     typedef typename Accumulator<T>::Type ResultType;
259 
260     int order;
261 
262     MinkowskiDistance(int order_) : order(order_) {}
263 
264     /**
265      *  Compute the Minkowsky (L_p) distance between two vectors.
266      *
267      *	This is highly optimised, with loop unrolling, as it is one
268      *	of the most expensive inner loops.
269      *
270      *	The computation of squared root at the end is omitted for
271      *	efficiency.
272      */
273     template <typename Iterator1, typename Iterator2>
274     ResultType operator()(Iterator1 a, Iterator2 b, size_t size, ResultType worst_dist = -1) const
275     {
276         ResultType result = ResultType();
277         ResultType diff0, diff1, diff2, diff3;
278         Iterator1 last = a + size;
279         Iterator1 lastgroup = last - 3;
280 
281         /* Process 4 items with each loop for efficiency. */
282         while (a < lastgroup) {
283             diff0 = (ResultType)std::abs(a[0] - b[0]);
284             diff1 = (ResultType)std::abs(a[1] - b[1]);
285             diff2 = (ResultType)std::abs(a[2] - b[2]);
286             diff3 = (ResultType)std::abs(a[3] - b[3]);
287             result += pow(diff0,order) + pow(diff1,order) + pow(diff2,order) + pow(diff3,order);
288             a += 4;
289             b += 4;
290 
291             if ((worst_dist>0)&&(result>worst_dist)) {
292                 return result;
293             }
294         }
295         /* Process last 0-3 pixels.  Not needed for standard vector lengths. */
296         while (a < last) {
297             diff0 = (ResultType)std::abs(*a++ - *b++);
298             result += pow(diff0,order);
299         }
300         return result;
301     }
302 
303     /**
304      * Partial distance, used by the kd-tree.
305      */
306     template <typename U, typename V>
307     inline ResultType accum_dist(const U& a, const V& b, int) const
308     {
309         return pow(static_cast<ResultType>(std::abs(a-b)),order);
310     }
311 };
312 
313 
314 
315 template<class T>
316 struct MaxDistance
317 {
318     typedef bool is_vector_space_distance;
319 
320     typedef T ElementType;
321     typedef typename Accumulator<T>::Type ResultType;
322 
323     /**
324      *  Compute the max distance (L_infinity) between two vectors.
325      *
326      *  This distance is not a valid kdtree distance, it's not dimensionwise additive.
327      */
328     template <typename Iterator1, typename Iterator2>
329     ResultType operator()(Iterator1 a, Iterator2 b, size_t size, ResultType worst_dist = -1) const
330     {
331         ResultType result = ResultType();
332         ResultType diff0, diff1, diff2, diff3;
333         Iterator1 last = a + size;
334         Iterator1 lastgroup = last - 3;
335 
336         /* Process 4 items with each loop for efficiency. */
337         while (a < lastgroup) {
338             diff0 = std::abs(a[0] - b[0]);
339             diff1 = std::abs(a[1] - b[1]);
340             diff2 = std::abs(a[2] - b[2]);
341             diff3 = std::abs(a[3] - b[3]);
342             if (diff0>result) {result = diff0; }
343             if (diff1>result) {result = diff1; }
344             if (diff2>result) {result = diff2; }
345             if (diff3>result) {result = diff3; }
346             a += 4;
347             b += 4;
348 
349             if ((worst_dist>0)&&(result>worst_dist)) {
350                 return result;
351             }
352         }
353         /* Process last 0-3 pixels.  Not needed for standard vector lengths. */
354         while (a < last) {
355             diff0 = std::abs(*a++ - *b++);
356             result = (diff0>result) ? diff0 : result;
357         }
358         return result;
359     }
360 
361     /* This distance functor is not dimension-wise additive, which
362      * makes it an invalid kd-tree distance, not implementing the accum_dist method */
363 
364 };
365 
366 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
367 
368 /**
369  * Hamming distance functor - counts the bit differences between two strings - useful for the Brief descriptor
370  * bit count of A exclusive XOR'ed with B
371  */
372 struct HammingLUT
373 {
374     typedef unsigned char ElementType;
375     typedef int ResultType;
376 
377     /** this will count the bits in a ^ b
378      */
379     ResultType operator()(const unsigned char* a, const unsigned char* b, int size) const
380     {
381         ResultType result = 0;
382         for (int i = 0; i < size; i++) {
383             result += byteBitsLookUp(a[i] ^ b[i]);
384         }
385         return result;
386     }
387 
388 
389     /** \brief given a byte, count the bits using a look up table
390      *  \param b the byte to count bits.  The look up table has an entry for all
391      *  values of b, where that entry is the number of bits.
392      *  \return the number of bits in byte b
393      */
394     static unsigned char byteBitsLookUp(unsigned char b)
395     {
396         static const unsigned char table[256]  = {
397             /* 0 */ 0, /* 1 */ 1, /* 2 */ 1, /* 3 */ 2,
398             /* 4 */ 1, /* 5 */ 2, /* 6 */ 2, /* 7 */ 3,
399             /* 8 */ 1, /* 9 */ 2, /* a */ 2, /* b */ 3,
400             /* c */ 2, /* d */ 3, /* e */ 3, /* f */ 4,
401             /* 10 */ 1, /* 11 */ 2, /* 12 */ 2, /* 13 */ 3,
402             /* 14 */ 2, /* 15 */ 3, /* 16 */ 3, /* 17 */ 4,
403             /* 18 */ 2, /* 19 */ 3, /* 1a */ 3, /* 1b */ 4,
404             /* 1c */ 3, /* 1d */ 4, /* 1e */ 4, /* 1f */ 5,
405             /* 20 */ 1, /* 21 */ 2, /* 22 */ 2, /* 23 */ 3,
406             /* 24 */ 2, /* 25 */ 3, /* 26 */ 3, /* 27 */ 4,
407             /* 28 */ 2, /* 29 */ 3, /* 2a */ 3, /* 2b */ 4,
408             /* 2c */ 3, /* 2d */ 4, /* 2e */ 4, /* 2f */ 5,
409             /* 30 */ 2, /* 31 */ 3, /* 32 */ 3, /* 33 */ 4,
410             /* 34 */ 3, /* 35 */ 4, /* 36 */ 4, /* 37 */ 5,
411             /* 38 */ 3, /* 39 */ 4, /* 3a */ 4, /* 3b */ 5,
412             /* 3c */ 4, /* 3d */ 5, /* 3e */ 5, /* 3f */ 6,
413             /* 40 */ 1, /* 41 */ 2, /* 42 */ 2, /* 43 */ 3,
414             /* 44 */ 2, /* 45 */ 3, /* 46 */ 3, /* 47 */ 4,
415             /* 48 */ 2, /* 49 */ 3, /* 4a */ 3, /* 4b */ 4,
416             /* 4c */ 3, /* 4d */ 4, /* 4e */ 4, /* 4f */ 5,
417             /* 50 */ 2, /* 51 */ 3, /* 52 */ 3, /* 53 */ 4,
418             /* 54 */ 3, /* 55 */ 4, /* 56 */ 4, /* 57 */ 5,
419             /* 58 */ 3, /* 59 */ 4, /* 5a */ 4, /* 5b */ 5,
420             /* 5c */ 4, /* 5d */ 5, /* 5e */ 5, /* 5f */ 6,
421             /* 60 */ 2, /* 61 */ 3, /* 62 */ 3, /* 63 */ 4,
422             /* 64 */ 3, /* 65 */ 4, /* 66 */ 4, /* 67 */ 5,
423             /* 68 */ 3, /* 69 */ 4, /* 6a */ 4, /* 6b */ 5,
424             /* 6c */ 4, /* 6d */ 5, /* 6e */ 5, /* 6f */ 6,
425             /* 70 */ 3, /* 71 */ 4, /* 72 */ 4, /* 73 */ 5,
426             /* 74 */ 4, /* 75 */ 5, /* 76 */ 5, /* 77 */ 6,
427             /* 78 */ 4, /* 79 */ 5, /* 7a */ 5, /* 7b */ 6,
428             /* 7c */ 5, /* 7d */ 6, /* 7e */ 6, /* 7f */ 7,
429             /* 80 */ 1, /* 81 */ 2, /* 82 */ 2, /* 83 */ 3,
430             /* 84 */ 2, /* 85 */ 3, /* 86 */ 3, /* 87 */ 4,
431             /* 88 */ 2, /* 89 */ 3, /* 8a */ 3, /* 8b */ 4,
432             /* 8c */ 3, /* 8d */ 4, /* 8e */ 4, /* 8f */ 5,
433             /* 90 */ 2, /* 91 */ 3, /* 92 */ 3, /* 93 */ 4,
434             /* 94 */ 3, /* 95 */ 4, /* 96 */ 4, /* 97 */ 5,
435             /* 98 */ 3, /* 99 */ 4, /* 9a */ 4, /* 9b */ 5,
436             /* 9c */ 4, /* 9d */ 5, /* 9e */ 5, /* 9f */ 6,
437             /* a0 */ 2, /* a1 */ 3, /* a2 */ 3, /* a3 */ 4,
438             /* a4 */ 3, /* a5 */ 4, /* a6 */ 4, /* a7 */ 5,
439             /* a8 */ 3, /* a9 */ 4, /* aa */ 4, /* ab */ 5,
440             /* ac */ 4, /* ad */ 5, /* ae */ 5, /* af */ 6,
441             /* b0 */ 3, /* b1 */ 4, /* b2 */ 4, /* b3 */ 5,
442             /* b4 */ 4, /* b5 */ 5, /* b6 */ 5, /* b7 */ 6,
443             /* b8 */ 4, /* b9 */ 5, /* ba */ 5, /* bb */ 6,
444             /* bc */ 5, /* bd */ 6, /* be */ 6, /* bf */ 7,
445             /* c0 */ 2, /* c1 */ 3, /* c2 */ 3, /* c3 */ 4,
446             /* c4 */ 3, /* c5 */ 4, /* c6 */ 4, /* c7 */ 5,
447             /* c8 */ 3, /* c9 */ 4, /* ca */ 4, /* cb */ 5,
448             /* cc */ 4, /* cd */ 5, /* ce */ 5, /* cf */ 6,
449             /* d0 */ 3, /* d1 */ 4, /* d2 */ 4, /* d3 */ 5,
450             /* d4 */ 4, /* d5 */ 5, /* d6 */ 5, /* d7 */ 6,
451             /* d8 */ 4, /* d9 */ 5, /* da */ 5, /* db */ 6,
452             /* dc */ 5, /* dd */ 6, /* de */ 6, /* df */ 7,
453             /* e0 */ 3, /* e1 */ 4, /* e2 */ 4, /* e3 */ 5,
454             /* e4 */ 4, /* e5 */ 5, /* e6 */ 5, /* e7 */ 6,
455             /* e8 */ 4, /* e9 */ 5, /* ea */ 5, /* eb */ 6,
456             /* ec */ 5, /* ed */ 6, /* ee */ 6, /* ef */ 7,
457             /* f0 */ 4, /* f1 */ 5, /* f2 */ 5, /* f3 */ 6,
458             /* f4 */ 5, /* f5 */ 6, /* f6 */ 6, /* f7 */ 7,
459             /* f8 */ 5, /* f9 */ 6, /* fa */ 6, /* fb */ 7,
460             /* fc */ 6, /* fd */ 7, /* fe */ 7, /* ff */ 8
461         };
462         return table[b];
463     }
464 };
465 
466 /**
467  * Hamming distance functor (pop count between two binary vectors, i.e. xor them and count the number of bits set)
468  * That code was taken from brief.cpp in OpenCV
469  */
470 template<class T>
471 struct HammingPopcnt
472 {
473     typedef T ElementType;
474     typedef int ResultType;
475 
476     template<typename Iterator1, typename Iterator2>
477     ResultType operator()(Iterator1 a, Iterator2 b, size_t size, ResultType /*worst_dist*/ = -1) const
478     {
479         ResultType result = 0;
480 #if __GNUC__
481 #if ANDROID && HAVE_NEON
482         static uint64_t features = android_getCpuFeatures();
483         if ((features& ANDROID_CPU_ARM_FEATURE_NEON)) {
484             for (size_t i = 0; i < size; i += 16) {
485                 uint8x16_t A_vec = vld1q_u8 (a + i);
486                 uint8x16_t B_vec = vld1q_u8 (b + i);
487                 //uint8x16_t veorq_u8 (uint8x16_t, uint8x16_t)
488                 uint8x16_t AxorB = veorq_u8 (A_vec, B_vec);
489 
490                 uint8x16_t bitsSet += vcntq_u8 (AxorB);
491                 //uint16x8_t vpadalq_u8 (uint16x8_t, uint8x16_t)
492                 uint16x8_t bitSet8 = vpaddlq_u8 (bitsSet);
493                 uint32x4_t bitSet4 = vpaddlq_u16 (bitSet8);
494 
495                 uint64x2_t bitSet2 = vpaddlq_u32 (bitSet4);
496                 result += vgetq_lane_u64 (bitSet2,0);
497                 result += vgetq_lane_u64 (bitSet2,1);
498             }
499         }
500         else
501 #endif
502         //for portability just use unsigned long -- and use the __builtin_popcountll (see docs for __builtin_popcountll)
503         typedef unsigned long long pop_t;
504         const size_t modulo = size % sizeof(pop_t);
505         const pop_t* a2 = reinterpret_cast<const pop_t*> (a);
506         const pop_t* b2 = reinterpret_cast<const pop_t*> (b);
507         const pop_t* a2_end = a2 + (size / sizeof(pop_t));
508 
509         for (; a2 != a2_end; ++a2, ++b2) result += __builtin_popcountll((*a2) ^ (*b2));
510 
511         if (modulo) {
512             //in the case where size is not dividable by sizeof(size_t)
513             //need to mask off the bits at the end
514             pop_t a_final = 0, b_final = 0;
515             memcpy(&a_final, a2, modulo);
516             memcpy(&b_final, b2, modulo);
517             result += __builtin_popcountll(a_final ^ b_final);
518         }
519 #else
520         HammingLUT lut;
521         result = lut(reinterpret_cast<const unsigned char*> (a),
522                      reinterpret_cast<const unsigned char*> (b), size * sizeof(pop_t));
523 #endif
524         return result;
525     }
526 };
527 
528 template<typename T>
529 struct Hamming
530 {
531     typedef T ElementType;
532     typedef unsigned int ResultType;
533 
534     /** This is popcount_3() from:
535      * http://en.wikipedia.org/wiki/Hamming_weight */
536     unsigned int popcnt32(uint32_t n) const
537     {
538         n -= ((n >> 1) & 0x55555555);
539         n = (n & 0x33333333) + ((n >> 2) & 0x33333333);
540         return (((n + (n >> 4))& 0xF0F0F0F)* 0x1010101) >> 24;
541     }
542 
543     unsigned int popcnt64(uint64_t n) const
544     {
545         n -= ((n >> 1) & 0x5555555555555555LL);
546         n = (n & 0x3333333333333333LL) + ((n >> 2) & 0x3333333333333333LL);
547         return (((n + (n >> 4))& 0x0f0f0f0f0f0f0f0fLL)* 0x0101010101010101LL) >> 56;
548     }
549 
550     template <typename Iterator1, typename Iterator2>
551     ResultType operator()(Iterator1 a, Iterator2 b, size_t size, ResultType /*worst_dist*/ = 0) const
552     {
553 #ifdef FLANN_PLATFORM_64_BIT
554         const uint64_t* pa = reinterpret_cast<const uint64_t*>(a);
555         const uint64_t* pb = reinterpret_cast<const uint64_t*>(b);
556         ResultType result = 0;
557         size /= (sizeof(uint64_t)/sizeof(unsigned char));
558         for(size_t i = 0; i < size; ++i ) {
559             result += popcnt64(*pa ^ *pb);
560             ++pa;
561             ++pb;
562         }
563 #else
564         const uint32_t* pa = reinterpret_cast<const uint32_t*>(a);
565         const uint32_t* pb = reinterpret_cast<const uint32_t*>(b);
566         ResultType result = 0;
567         size /= (sizeof(uint32_t)/sizeof(unsigned char));
568         for(size_t i = 0; i < size; ++i ) {
569         	result += popcnt32(*pa ^ *pb);
570         	++pa;
571         	++pb;
572         }
573 #endif
574         return result;
575     }
576 };
577 
578 
579 
580 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
581 
582 template<class T>
583 struct HistIntersectionDistance
584 {
585     typedef bool is_kdtree_distance;
586 
587     typedef T ElementType;
588     typedef typename Accumulator<T>::Type ResultType;
589 
590     /**
591      *  Compute the histogram intersection distance
592      */
593     template <typename Iterator1, typename Iterator2>
594     ResultType operator()(Iterator1 a, Iterator2 b, size_t size, ResultType worst_dist = -1) const
595     {
596         ResultType result = ResultType();
597         ResultType min0, min1, min2, min3;
598         Iterator1 last = a + size;
599         Iterator1 lastgroup = last - 3;
600 
601         /* Process 4 items with each loop for efficiency. */
602         while (a < lastgroup) {
603             min0 = (ResultType)(a[0] < b[0] ? a[0] : b[0]);
604             min1 = (ResultType)(a[1] < b[1] ? a[1] : b[1]);
605             min2 = (ResultType)(a[2] < b[2] ? a[2] : b[2]);
606             min3 = (ResultType)(a[3] < b[3] ? a[3] : b[3]);
607             result += min0 + min1 + min2 + min3;
608             a += 4;
609             b += 4;
610             if ((worst_dist>0)&&(result>worst_dist)) {
611                 return result;
612             }
613         }
614         /* Process last 0-3 pixels.  Not needed for standard vector lengths. */
615         while (a < last) {
616             min0 = (ResultType)(*a < *b ? *a : *b);
617             result += min0;
618             ++a;
619             ++b;
620         }
621         return result;
622     }
623 
624     /**
625      * Partial distance, used by the kd-tree.
626      */
627     template <typename U, typename V>
628     inline ResultType accum_dist(const U& a, const V& b, int) const
629     {
630         return a<b ? a : b;
631     }
632 };
633 
634 
635 
636 template<class T>
637 struct HellingerDistance
638 {
639     typedef bool is_kdtree_distance;
640 
641     typedef T ElementType;
642     typedef typename Accumulator<T>::Type ResultType;
643 
644     /**
645      *  Compute the histogram intersection distance
646      */
647     template <typename Iterator1, typename Iterator2>
648     ResultType operator()(Iterator1 a, Iterator2 b, size_t size, ResultType /*worst_dist*/ = -1) const
649     {
650         ResultType result = ResultType();
651         ResultType diff0, diff1, diff2, diff3;
652         Iterator1 last = a + size;
653         Iterator1 lastgroup = last - 3;
654 
655         /* Process 4 items with each loop for efficiency. */
656         while (a < lastgroup) {
657             diff0 = sqrt(static_cast<ResultType>(a[0])) - sqrt(static_cast<ResultType>(b[0]));
658             diff1 = sqrt(static_cast<ResultType>(a[1])) - sqrt(static_cast<ResultType>(b[1]));
659             diff2 = sqrt(static_cast<ResultType>(a[2])) - sqrt(static_cast<ResultType>(b[2]));
660             diff3 = sqrt(static_cast<ResultType>(a[3])) - sqrt(static_cast<ResultType>(b[3]));
661             result += diff0 * diff0 + diff1 * diff1 + diff2 * diff2 + diff3 * diff3;
662             a += 4;
663             b += 4;
664         }
665         while (a < last) {
666             diff0 = sqrt(static_cast<ResultType>(*a++)) - sqrt(static_cast<ResultType>(*b++));
667             result += diff0 * diff0;
668         }
669         return result;
670     }
671 
672     /**
673      * Partial distance, used by the kd-tree.
674      */
675     template <typename U, typename V>
676     inline ResultType accum_dist(const U& a, const V& b, int) const
677     {
678         return sqrt(static_cast<ResultType>(a)) - sqrt(static_cast<ResultType>(b));
679     }
680 };
681 
682 
683 template<class T>
684 struct ChiSquareDistance
685 {
686     typedef bool is_kdtree_distance;
687 
688     typedef T ElementType;
689     typedef typename Accumulator<T>::Type ResultType;
690 
691     /**
692      *  Compute the chi-square distance
693      */
694     template <typename Iterator1, typename Iterator2>
695     ResultType operator()(Iterator1 a, Iterator2 b, size_t size, ResultType worst_dist = -1) const
696     {
697         ResultType result = ResultType();
698         ResultType sum, diff;
699         Iterator1 last = a + size;
700 
701         while (a < last) {
702             sum = (ResultType)(*a + *b);
703             if (sum>0) {
704                 diff = (ResultType)(*a - *b);
705                 result += diff*diff/sum;
706             }
707             ++a;
708             ++b;
709 
710             if ((worst_dist>0)&&(result>worst_dist)) {
711                 return result;
712             }
713         }
714         return result;
715     }
716 
717     /**
718      * Partial distance, used by the kd-tree.
719      */
720     template <typename U, typename V>
721     inline ResultType accum_dist(const U& a, const V& b, int) const
722     {
723         ResultType result = ResultType();
724         ResultType sum, diff;
725 
726         sum = (ResultType)(a+b);
727         if (sum>0) {
728             diff = (ResultType)(a-b);
729             result = diff*diff/sum;
730         }
731         return result;
732     }
733 };
734 
735 
736 template<class T>
737 struct KL_Divergence
738 {
739     typedef bool is_kdtree_distance;
740 
741     typedef T ElementType;
742     typedef typename Accumulator<T>::Type ResultType;
743 
744     /**
745      *  Compute the Kullback–Leibler divergence
746      */
747     template <typename Iterator1, typename Iterator2>
748     ResultType operator()(Iterator1 a, Iterator2 b, size_t size, ResultType worst_dist = -1) const
749     {
750         ResultType result = ResultType();
751         Iterator1 last = a + size;
752 
753         while (a < last) {
754             if (* a != 0) {
755                 ResultType ratio = (ResultType)(*a / *b);
756                 if (ratio>0) {
757                     result += *a * log(ratio);
758                 }
759             }
760             ++a;
761             ++b;
762 
763             if ((worst_dist>0)&&(result>worst_dist)) {
764                 return result;
765             }
766         }
767         return result;
768     }
769 
770     /**
771      * Partial distance, used by the kd-tree.
772      */
773     template <typename U, typename V>
774     inline ResultType accum_dist(const U& a, const V& b, int) const
775     {
776         ResultType result = ResultType();
777         ResultType ratio = (ResultType)(a / b);
778         if (ratio>0) {
779             result = a * log(ratio);
780         }
781         return result;
782     }
783 };
784 
785 }
786 
787 #endif //FLANN_DIST_H_
788