1 /**
2  * Copyright (c) Facebook, Inc. and its affiliates.
3  *
4  * This source code is licensed under the MIT license found in the
5  * LICENSE file in the root directory of this source tree.
6  */
7 
8 /*
9  * C++ support for heaps. The set of functions is tailored for efficient
10  * similarity search.
11  *
12  * There is no specific object for a heap, and the functions that operate on a
13  * single heap are inlined, because heaps are often small. More complex
14  * functions are implemented in Heaps.cpp
15  *
16  * All heap functions rely on a C template class that define the type of the
17  * keys and values and their ordering (increasing with CMax and decreasing with
18  * Cmin). The C types are defined in ordered_key_value.h
19  */
20 
21 #ifndef FAISS_Heap_h
22 #define FAISS_Heap_h
23 
24 #include <climits>
25 #include <cmath>
26 #include <cstring>
27 
28 #include <stdint.h>
29 #include <cassert>
30 #include <cstdio>
31 
32 #include <limits>
33 
34 #include <faiss/utils/ordered_key_value.h>
35 
36 namespace faiss {
37 
38 /*******************************************************************
39  * Basic heap ops: push and pop
40  *******************************************************************/
41 
42 /** Pops the top element from the heap defined by bh_val[0..k-1] and
43  * bh_ids[0..k-1].  on output the element at k-1 is undefined.
44  */
45 template <class C>
heap_pop(size_t k,typename C::T * bh_val,typename C::TI * bh_ids)46 inline void heap_pop(size_t k, typename C::T* bh_val, typename C::TI* bh_ids) {
47     bh_val--; /* Use 1-based indexing for easier node->child translation */
48     bh_ids--;
49     typename C::T val = bh_val[k];
50     size_t i = 1, i1, i2;
51     while (1) {
52         i1 = i << 1;
53         i2 = i1 + 1;
54         if (i1 > k)
55             break;
56         if (i2 == k + 1 || C::cmp(bh_val[i1], bh_val[i2])) {
57             if (C::cmp(val, bh_val[i1]))
58                 break;
59             bh_val[i] = bh_val[i1];
60             bh_ids[i] = bh_ids[i1];
61             i = i1;
62         } else {
63             if (C::cmp(val, bh_val[i2]))
64                 break;
65             bh_val[i] = bh_val[i2];
66             bh_ids[i] = bh_ids[i2];
67             i = i2;
68         }
69     }
70     bh_val[i] = bh_val[k];
71     bh_ids[i] = bh_ids[k];
72 }
73 
74 /** Pushes the element (val, ids) into the heap bh_val[0..k-2] and
75  * bh_ids[0..k-2].  on output the element at k-1 is defined.
76  */
77 template <class C>
heap_push(size_t k,typename C::T * bh_val,typename C::TI * bh_ids,typename C::T val,typename C::TI ids)78 inline void heap_push(
79         size_t k,
80         typename C::T* bh_val,
81         typename C::TI* bh_ids,
82         typename C::T val,
83         typename C::TI ids) {
84     bh_val--; /* Use 1-based indexing for easier node->child translation */
85     bh_ids--;
86     size_t i = k, i_father;
87     while (i > 1) {
88         i_father = i >> 1;
89         if (!C::cmp(val, bh_val[i_father])) /* the heap structure is ok */
90             break;
91         bh_val[i] = bh_val[i_father];
92         bh_ids[i] = bh_ids[i_father];
93         i = i_father;
94     }
95     bh_val[i] = val;
96     bh_ids[i] = ids;
97 }
98 
99 /** Replace the top element from the heap defined by bh_val[0..k-1] and
100  * bh_ids[0..k-1].
101  */
102 template <class C>
heap_replace_top(size_t k,typename C::T * bh_val,typename C::TI * bh_ids,typename C::T val,typename C::TI ids)103 inline void heap_replace_top(
104         size_t k,
105         typename C::T* bh_val,
106         typename C::TI* bh_ids,
107         typename C::T val,
108         typename C::TI ids) {
109     bh_val--; /* Use 1-based indexing for easier node->child translation */
110     bh_ids--;
111     size_t i = 1, i1, i2;
112     while (1) {
113         i1 = i << 1;
114         i2 = i1 + 1;
115         if (i1 > k)
116             break;
117         if (i2 == k + 1 || C::cmp(bh_val[i1], bh_val[i2])) {
118             if (C::cmp(val, bh_val[i1]))
119                 break;
120             bh_val[i] = bh_val[i1];
121             bh_ids[i] = bh_ids[i1];
122             i = i1;
123         } else {
124             if (C::cmp(val, bh_val[i2]))
125                 break;
126             bh_val[i] = bh_val[i2];
127             bh_ids[i] = bh_ids[i2];
128             i = i2;
129         }
130     }
131     bh_val[i] = val;
132     bh_ids[i] = ids;
133 }
134 
135 /* Partial instanciation for heaps with TI = int64_t */
136 
137 template <typename T>
minheap_pop(size_t k,T * bh_val,int64_t * bh_ids)138 inline void minheap_pop(size_t k, T* bh_val, int64_t* bh_ids) {
139     heap_pop<CMin<T, int64_t>>(k, bh_val, bh_ids);
140 }
141 
142 template <typename T>
minheap_push(size_t k,T * bh_val,int64_t * bh_ids,T val,int64_t ids)143 inline void minheap_push(
144         size_t k,
145         T* bh_val,
146         int64_t* bh_ids,
147         T val,
148         int64_t ids) {
149     heap_push<CMin<T, int64_t>>(k, bh_val, bh_ids, val, ids);
150 }
151 
152 template <typename T>
minheap_replace_top(size_t k,T * bh_val,int64_t * bh_ids,T val,int64_t ids)153 inline void minheap_replace_top(
154         size_t k,
155         T* bh_val,
156         int64_t* bh_ids,
157         T val,
158         int64_t ids) {
159     heap_replace_top<CMin<T, int64_t>>(k, bh_val, bh_ids, val, ids);
160 }
161 
162 template <typename T>
maxheap_pop(size_t k,T * bh_val,int64_t * bh_ids)163 inline void maxheap_pop(size_t k, T* bh_val, int64_t* bh_ids) {
164     heap_pop<CMax<T, int64_t>>(k, bh_val, bh_ids);
165 }
166 
167 template <typename T>
maxheap_push(size_t k,T * bh_val,int64_t * bh_ids,T val,int64_t ids)168 inline void maxheap_push(
169         size_t k,
170         T* bh_val,
171         int64_t* bh_ids,
172         T val,
173         int64_t ids) {
174     heap_push<CMax<T, int64_t>>(k, bh_val, bh_ids, val, ids);
175 }
176 
177 template <typename T>
maxheap_replace_top(size_t k,T * bh_val,int64_t * bh_ids,T val,int64_t ids)178 inline void maxheap_replace_top(
179         size_t k,
180         T* bh_val,
181         int64_t* bh_ids,
182         T val,
183         int64_t ids) {
184     heap_replace_top<CMax<T, int64_t>>(k, bh_val, bh_ids, val, ids);
185 }
186 
187 /*******************************************************************
188  * Heap initialization
189  *******************************************************************/
190 
191 /* Initialization phase for the heap (with unconditionnal pushes).
192  * Store k0 elements in a heap containing up to k values. Note that
193  * (bh_val, bh_ids) can be the same as (x, ids) */
194 template <class C>
195 inline void heap_heapify(
196         size_t k,
197         typename C::T* bh_val,
198         typename C::TI* bh_ids,
199         const typename C::T* x = nullptr,
200         const typename C::TI* ids = nullptr,
201         size_t k0 = 0) {
202     if (k0 > 0)
203         assert(x);
204 
205     if (ids) {
206         for (size_t i = 0; i < k0; i++)
207             heap_push<C>(i + 1, bh_val, bh_ids, x[i], ids[i]);
208     } else {
209         for (size_t i = 0; i < k0; i++)
210             heap_push<C>(i + 1, bh_val, bh_ids, x[i], i);
211     }
212 
213     for (size_t i = k0; i < k; i++) {
214         bh_val[i] = C::neutral();
215         bh_ids[i] = -1;
216     }
217 }
218 
219 template <typename T>
220 inline void minheap_heapify(
221         size_t k,
222         T* bh_val,
223         int64_t* bh_ids,
224         const T* x = nullptr,
225         const int64_t* ids = nullptr,
226         size_t k0 = 0) {
227     heap_heapify<CMin<T, int64_t>>(k, bh_val, bh_ids, x, ids, k0);
228 }
229 
230 template <typename T>
231 inline void maxheap_heapify(
232         size_t k,
233         T* bh_val,
234         int64_t* bh_ids,
235         const T* x = nullptr,
236         const int64_t* ids = nullptr,
237         size_t k0 = 0) {
238     heap_heapify<CMax<T, int64_t>>(k, bh_val, bh_ids, x, ids, k0);
239 }
240 
241 /*******************************************************************
242  * Add n elements to the heap
243  *******************************************************************/
244 
245 /* Add some elements to the heap  */
246 template <class C>
heap_addn(size_t k,typename C::T * bh_val,typename C::TI * bh_ids,const typename C::T * x,const typename C::TI * ids,size_t n)247 inline void heap_addn(
248         size_t k,
249         typename C::T* bh_val,
250         typename C::TI* bh_ids,
251         const typename C::T* x,
252         const typename C::TI* ids,
253         size_t n) {
254     size_t i;
255     if (ids)
256         for (i = 0; i < n; i++) {
257             if (C::cmp(bh_val[0], x[i])) {
258                 heap_replace_top<C>(k, bh_val, bh_ids, x[i], ids[i]);
259             }
260         }
261     else
262         for (i = 0; i < n; i++) {
263             if (C::cmp(bh_val[0], x[i])) {
264                 heap_replace_top<C>(k, bh_val, bh_ids, x[i], i);
265             }
266         }
267 }
268 
269 /* Partial instanciation for heaps with TI = int64_t */
270 
271 template <typename T>
minheap_addn(size_t k,T * bh_val,int64_t * bh_ids,const T * x,const int64_t * ids,size_t n)272 inline void minheap_addn(
273         size_t k,
274         T* bh_val,
275         int64_t* bh_ids,
276         const T* x,
277         const int64_t* ids,
278         size_t n) {
279     heap_addn<CMin<T, int64_t>>(k, bh_val, bh_ids, x, ids, n);
280 }
281 
282 template <typename T>
maxheap_addn(size_t k,T * bh_val,int64_t * bh_ids,const T * x,const int64_t * ids,size_t n)283 inline void maxheap_addn(
284         size_t k,
285         T* bh_val,
286         int64_t* bh_ids,
287         const T* x,
288         const int64_t* ids,
289         size_t n) {
290     heap_addn<CMax<T, int64_t>>(k, bh_val, bh_ids, x, ids, n);
291 }
292 
293 /*******************************************************************
294  * Heap finalization (reorder elements)
295  *******************************************************************/
296 
297 /* This function maps a binary heap into an sorted structure.
298    It returns the number  */
299 template <typename C>
heap_reorder(size_t k,typename C::T * bh_val,typename C::TI * bh_ids)300 inline size_t heap_reorder(
301         size_t k,
302         typename C::T* bh_val,
303         typename C::TI* bh_ids) {
304     size_t i, ii;
305 
306     for (i = 0, ii = 0; i < k; i++) {
307         /* top element should be put at the end of the list */
308         typename C::T val = bh_val[0];
309         typename C::TI id = bh_ids[0];
310 
311         /* boundary case: we will over-ride this value if not a true element */
312         heap_pop<C>(k - i, bh_val, bh_ids);
313         bh_val[k - ii - 1] = val;
314         bh_ids[k - ii - 1] = id;
315         if (id != -1)
316             ii++;
317     }
318     /* Count the number of elements which are effectively returned */
319     size_t nel = ii;
320 
321     memmove(bh_val, bh_val + k - ii, ii * sizeof(*bh_val));
322     memmove(bh_ids, bh_ids + k - ii, ii * sizeof(*bh_ids));
323 
324     for (; ii < k; ii++) {
325         bh_val[ii] = C::neutral();
326         bh_ids[ii] = -1;
327     }
328     return nel;
329 }
330 
331 template <typename T>
minheap_reorder(size_t k,T * bh_val,int64_t * bh_ids)332 inline size_t minheap_reorder(size_t k, T* bh_val, int64_t* bh_ids) {
333     return heap_reorder<CMin<T, int64_t>>(k, bh_val, bh_ids);
334 }
335 
336 template <typename T>
maxheap_reorder(size_t k,T * bh_val,int64_t * bh_ids)337 inline size_t maxheap_reorder(size_t k, T* bh_val, int64_t* bh_ids) {
338     return heap_reorder<CMax<T, int64_t>>(k, bh_val, bh_ids);
339 }
340 
341 /*******************************************************************
342  * Operations on heap arrays
343  *******************************************************************/
344 
345 /** a template structure for a set of [min|max]-heaps it is tailored
346  * so that the actual data of the heaps can just live in compact
347  * arrays.
348  */
349 template <typename C>
350 struct HeapArray {
351     typedef typename C::TI TI;
352     typedef typename C::T T;
353 
354     size_t nh; ///< number of heaps
355     size_t k;  ///< allocated size per heap
356     TI* ids;   ///< identifiers (size nh * k)
357     T* val;    ///< values (distances or similarities), size nh * k
358 
359     /// Return the list of values for a heap
get_valHeapArray360     T* get_val(size_t key) {
361         return val + key * k;
362     }
363 
364     /// Correspponding identifiers
get_idsHeapArray365     TI* get_ids(size_t key) {
366         return ids + key * k;
367     }
368 
369     /// prepare all the heaps before adding
370     void heapify();
371 
372     /** add nj elements to heaps i0:i0+ni, with sequential ids
373      *
374      * @param nj    nb of elements to add to each heap
375      * @param vin   elements to add, size ni * nj
376      * @param j0    add this to the ids that are added
377      * @param i0    first heap to update
378      * @param ni    nb of elements to update (-1 = use nh)
379      */
380     void addn(
381             size_t nj,
382             const T* vin,
383             TI j0 = 0,
384             size_t i0 = 0,
385             int64_t ni = -1);
386 
387     /** same as addn
388      *
389      * @param id_in     ids of the elements to add, size ni * nj
390      * @param id_stride stride for id_in
391      */
392     void addn_with_ids(
393             size_t nj,
394             const T* vin,
395             const TI* id_in = nullptr,
396             int64_t id_stride = 0,
397             size_t i0 = 0,
398             int64_t ni = -1);
399 
400     /// reorder all the heaps
401     void reorder();
402 
403     /** this is not really a heap function. It just finds the per-line
404      *   extrema of each line of array D
405      * @param vals_out    extreme value of each line (size nh, or NULL)
406      * @param idx_out     index of extreme value (size nh or NULL)
407      */
408     void per_line_extrema(T* vals_out, TI* idx_out) const;
409 };
410 
411 /* Define useful heaps */
412 typedef HeapArray<CMin<float, int64_t>> float_minheap_array_t;
413 typedef HeapArray<CMin<int, int64_t>> int_minheap_array_t;
414 
415 typedef HeapArray<CMax<float, int64_t>> float_maxheap_array_t;
416 typedef HeapArray<CMax<int, int64_t>> int_maxheap_array_t;
417 
418 // The heap templates are instanciated explicitly in Heap.cpp
419 
420 /*********************************************************************
421  * Indirect heaps: instead of having
422  *
423  *          node i = (bh_ids[i], bh_val[i]),
424  *
425  * in indirect heaps,
426  *
427  *          node i = (bh_ids[i], bh_val[bh_ids[i]]),
428  *
429  *********************************************************************/
430 
431 template <class C>
indirect_heap_pop(size_t k,const typename C::T * bh_val,typename C::TI * bh_ids)432 inline void indirect_heap_pop(
433         size_t k,
434         const typename C::T* bh_val,
435         typename C::TI* bh_ids) {
436     bh_ids--; /* Use 1-based indexing for easier node->child translation */
437     typename C::T val = bh_val[bh_ids[k]];
438     size_t i = 1;
439     while (1) {
440         size_t i1 = i << 1;
441         size_t i2 = i1 + 1;
442         if (i1 > k)
443             break;
444         typename C::TI id1 = bh_ids[i1], id2 = bh_ids[i2];
445         if (i2 == k + 1 || C::cmp(bh_val[id1], bh_val[id2])) {
446             if (C::cmp(val, bh_val[id1]))
447                 break;
448             bh_ids[i] = id1;
449             i = i1;
450         } else {
451             if (C::cmp(val, bh_val[id2]))
452                 break;
453             bh_ids[i] = id2;
454             i = i2;
455         }
456     }
457     bh_ids[i] = bh_ids[k];
458 }
459 
460 template <class C>
indirect_heap_push(size_t k,const typename C::T * bh_val,typename C::TI * bh_ids,typename C::TI id)461 inline void indirect_heap_push(
462         size_t k,
463         const typename C::T* bh_val,
464         typename C::TI* bh_ids,
465         typename C::TI id) {
466     bh_ids--; /* Use 1-based indexing for easier node->child translation */
467     typename C::T val = bh_val[id];
468     size_t i = k;
469     while (i > 1) {
470         size_t i_father = i >> 1;
471         if (!C::cmp(val, bh_val[bh_ids[i_father]]))
472             break;
473         bh_ids[i] = bh_ids[i_father];
474         i = i_father;
475     }
476     bh_ids[i] = id;
477 }
478 
479 } // namespace faiss
480 
481 #endif /* FAISS_Heap_h */
482