1 /**
2 * Copyright (c) Facebook, Inc. and its affiliates.
3 *
4 * This source code is licensed under the MIT license found in the
5 * LICENSE file in the root directory of this source tree.
6 */
7
8 /*
9 * C++ support for heaps. The set of functions is tailored for efficient
10 * similarity search.
11 *
12 * There is no specific object for a heap, and the functions that operate on a
13 * single heap are inlined, because heaps are often small. More complex
14 * functions are implemented in Heaps.cpp
15 *
16 * All heap functions rely on a C template class that define the type of the
17 * keys and values and their ordering (increasing with CMax and decreasing with
18 * Cmin). The C types are defined in ordered_key_value.h
19 */
20
21 #ifndef FAISS_Heap_h
22 #define FAISS_Heap_h
23
24 #include <climits>
25 #include <cmath>
26 #include <cstring>
27
28 #include <stdint.h>
29 #include <cassert>
30 #include <cstdio>
31
32 #include <limits>
33
34 #include <faiss/utils/ordered_key_value.h>
35
36 namespace faiss {
37
38 /*******************************************************************
39 * Basic heap ops: push and pop
40 *******************************************************************/
41
42 /** Pops the top element from the heap defined by bh_val[0..k-1] and
43 * bh_ids[0..k-1]. on output the element at k-1 is undefined.
44 */
45 template <class C>
heap_pop(size_t k,typename C::T * bh_val,typename C::TI * bh_ids)46 inline void heap_pop(size_t k, typename C::T* bh_val, typename C::TI* bh_ids) {
47 bh_val--; /* Use 1-based indexing for easier node->child translation */
48 bh_ids--;
49 typename C::T val = bh_val[k];
50 size_t i = 1, i1, i2;
51 while (1) {
52 i1 = i << 1;
53 i2 = i1 + 1;
54 if (i1 > k)
55 break;
56 if (i2 == k + 1 || C::cmp(bh_val[i1], bh_val[i2])) {
57 if (C::cmp(val, bh_val[i1]))
58 break;
59 bh_val[i] = bh_val[i1];
60 bh_ids[i] = bh_ids[i1];
61 i = i1;
62 } else {
63 if (C::cmp(val, bh_val[i2]))
64 break;
65 bh_val[i] = bh_val[i2];
66 bh_ids[i] = bh_ids[i2];
67 i = i2;
68 }
69 }
70 bh_val[i] = bh_val[k];
71 bh_ids[i] = bh_ids[k];
72 }
73
74 /** Pushes the element (val, ids) into the heap bh_val[0..k-2] and
75 * bh_ids[0..k-2]. on output the element at k-1 is defined.
76 */
77 template <class C>
heap_push(size_t k,typename C::T * bh_val,typename C::TI * bh_ids,typename C::T val,typename C::TI ids)78 inline void heap_push(
79 size_t k,
80 typename C::T* bh_val,
81 typename C::TI* bh_ids,
82 typename C::T val,
83 typename C::TI ids) {
84 bh_val--; /* Use 1-based indexing for easier node->child translation */
85 bh_ids--;
86 size_t i = k, i_father;
87 while (i > 1) {
88 i_father = i >> 1;
89 if (!C::cmp(val, bh_val[i_father])) /* the heap structure is ok */
90 break;
91 bh_val[i] = bh_val[i_father];
92 bh_ids[i] = bh_ids[i_father];
93 i = i_father;
94 }
95 bh_val[i] = val;
96 bh_ids[i] = ids;
97 }
98
99 /** Replace the top element from the heap defined by bh_val[0..k-1] and
100 * bh_ids[0..k-1].
101 */
102 template <class C>
heap_replace_top(size_t k,typename C::T * bh_val,typename C::TI * bh_ids,typename C::T val,typename C::TI ids)103 inline void heap_replace_top(
104 size_t k,
105 typename C::T* bh_val,
106 typename C::TI* bh_ids,
107 typename C::T val,
108 typename C::TI ids) {
109 bh_val--; /* Use 1-based indexing for easier node->child translation */
110 bh_ids--;
111 size_t i = 1, i1, i2;
112 while (1) {
113 i1 = i << 1;
114 i2 = i1 + 1;
115 if (i1 > k)
116 break;
117 if (i2 == k + 1 || C::cmp(bh_val[i1], bh_val[i2])) {
118 if (C::cmp(val, bh_val[i1]))
119 break;
120 bh_val[i] = bh_val[i1];
121 bh_ids[i] = bh_ids[i1];
122 i = i1;
123 } else {
124 if (C::cmp(val, bh_val[i2]))
125 break;
126 bh_val[i] = bh_val[i2];
127 bh_ids[i] = bh_ids[i2];
128 i = i2;
129 }
130 }
131 bh_val[i] = val;
132 bh_ids[i] = ids;
133 }
134
135 /* Partial instanciation for heaps with TI = int64_t */
136
137 template <typename T>
minheap_pop(size_t k,T * bh_val,int64_t * bh_ids)138 inline void minheap_pop(size_t k, T* bh_val, int64_t* bh_ids) {
139 heap_pop<CMin<T, int64_t>>(k, bh_val, bh_ids);
140 }
141
142 template <typename T>
minheap_push(size_t k,T * bh_val,int64_t * bh_ids,T val,int64_t ids)143 inline void minheap_push(
144 size_t k,
145 T* bh_val,
146 int64_t* bh_ids,
147 T val,
148 int64_t ids) {
149 heap_push<CMin<T, int64_t>>(k, bh_val, bh_ids, val, ids);
150 }
151
152 template <typename T>
minheap_replace_top(size_t k,T * bh_val,int64_t * bh_ids,T val,int64_t ids)153 inline void minheap_replace_top(
154 size_t k,
155 T* bh_val,
156 int64_t* bh_ids,
157 T val,
158 int64_t ids) {
159 heap_replace_top<CMin<T, int64_t>>(k, bh_val, bh_ids, val, ids);
160 }
161
162 template <typename T>
maxheap_pop(size_t k,T * bh_val,int64_t * bh_ids)163 inline void maxheap_pop(size_t k, T* bh_val, int64_t* bh_ids) {
164 heap_pop<CMax<T, int64_t>>(k, bh_val, bh_ids);
165 }
166
167 template <typename T>
maxheap_push(size_t k,T * bh_val,int64_t * bh_ids,T val,int64_t ids)168 inline void maxheap_push(
169 size_t k,
170 T* bh_val,
171 int64_t* bh_ids,
172 T val,
173 int64_t ids) {
174 heap_push<CMax<T, int64_t>>(k, bh_val, bh_ids, val, ids);
175 }
176
177 template <typename T>
maxheap_replace_top(size_t k,T * bh_val,int64_t * bh_ids,T val,int64_t ids)178 inline void maxheap_replace_top(
179 size_t k,
180 T* bh_val,
181 int64_t* bh_ids,
182 T val,
183 int64_t ids) {
184 heap_replace_top<CMax<T, int64_t>>(k, bh_val, bh_ids, val, ids);
185 }
186
187 /*******************************************************************
188 * Heap initialization
189 *******************************************************************/
190
191 /* Initialization phase for the heap (with unconditionnal pushes).
192 * Store k0 elements in a heap containing up to k values. Note that
193 * (bh_val, bh_ids) can be the same as (x, ids) */
194 template <class C>
195 inline void heap_heapify(
196 size_t k,
197 typename C::T* bh_val,
198 typename C::TI* bh_ids,
199 const typename C::T* x = nullptr,
200 const typename C::TI* ids = nullptr,
201 size_t k0 = 0) {
202 if (k0 > 0)
203 assert(x);
204
205 if (ids) {
206 for (size_t i = 0; i < k0; i++)
207 heap_push<C>(i + 1, bh_val, bh_ids, x[i], ids[i]);
208 } else {
209 for (size_t i = 0; i < k0; i++)
210 heap_push<C>(i + 1, bh_val, bh_ids, x[i], i);
211 }
212
213 for (size_t i = k0; i < k; i++) {
214 bh_val[i] = C::neutral();
215 bh_ids[i] = -1;
216 }
217 }
218
219 template <typename T>
220 inline void minheap_heapify(
221 size_t k,
222 T* bh_val,
223 int64_t* bh_ids,
224 const T* x = nullptr,
225 const int64_t* ids = nullptr,
226 size_t k0 = 0) {
227 heap_heapify<CMin<T, int64_t>>(k, bh_val, bh_ids, x, ids, k0);
228 }
229
230 template <typename T>
231 inline void maxheap_heapify(
232 size_t k,
233 T* bh_val,
234 int64_t* bh_ids,
235 const T* x = nullptr,
236 const int64_t* ids = nullptr,
237 size_t k0 = 0) {
238 heap_heapify<CMax<T, int64_t>>(k, bh_val, bh_ids, x, ids, k0);
239 }
240
241 /*******************************************************************
242 * Add n elements to the heap
243 *******************************************************************/
244
245 /* Add some elements to the heap */
246 template <class C>
heap_addn(size_t k,typename C::T * bh_val,typename C::TI * bh_ids,const typename C::T * x,const typename C::TI * ids,size_t n)247 inline void heap_addn(
248 size_t k,
249 typename C::T* bh_val,
250 typename C::TI* bh_ids,
251 const typename C::T* x,
252 const typename C::TI* ids,
253 size_t n) {
254 size_t i;
255 if (ids)
256 for (i = 0; i < n; i++) {
257 if (C::cmp(bh_val[0], x[i])) {
258 heap_replace_top<C>(k, bh_val, bh_ids, x[i], ids[i]);
259 }
260 }
261 else
262 for (i = 0; i < n; i++) {
263 if (C::cmp(bh_val[0], x[i])) {
264 heap_replace_top<C>(k, bh_val, bh_ids, x[i], i);
265 }
266 }
267 }
268
269 /* Partial instanciation for heaps with TI = int64_t */
270
271 template <typename T>
minheap_addn(size_t k,T * bh_val,int64_t * bh_ids,const T * x,const int64_t * ids,size_t n)272 inline void minheap_addn(
273 size_t k,
274 T* bh_val,
275 int64_t* bh_ids,
276 const T* x,
277 const int64_t* ids,
278 size_t n) {
279 heap_addn<CMin<T, int64_t>>(k, bh_val, bh_ids, x, ids, n);
280 }
281
282 template <typename T>
maxheap_addn(size_t k,T * bh_val,int64_t * bh_ids,const T * x,const int64_t * ids,size_t n)283 inline void maxheap_addn(
284 size_t k,
285 T* bh_val,
286 int64_t* bh_ids,
287 const T* x,
288 const int64_t* ids,
289 size_t n) {
290 heap_addn<CMax<T, int64_t>>(k, bh_val, bh_ids, x, ids, n);
291 }
292
293 /*******************************************************************
294 * Heap finalization (reorder elements)
295 *******************************************************************/
296
297 /* This function maps a binary heap into an sorted structure.
298 It returns the number */
299 template <typename C>
heap_reorder(size_t k,typename C::T * bh_val,typename C::TI * bh_ids)300 inline size_t heap_reorder(
301 size_t k,
302 typename C::T* bh_val,
303 typename C::TI* bh_ids) {
304 size_t i, ii;
305
306 for (i = 0, ii = 0; i < k; i++) {
307 /* top element should be put at the end of the list */
308 typename C::T val = bh_val[0];
309 typename C::TI id = bh_ids[0];
310
311 /* boundary case: we will over-ride this value if not a true element */
312 heap_pop<C>(k - i, bh_val, bh_ids);
313 bh_val[k - ii - 1] = val;
314 bh_ids[k - ii - 1] = id;
315 if (id != -1)
316 ii++;
317 }
318 /* Count the number of elements which are effectively returned */
319 size_t nel = ii;
320
321 memmove(bh_val, bh_val + k - ii, ii * sizeof(*bh_val));
322 memmove(bh_ids, bh_ids + k - ii, ii * sizeof(*bh_ids));
323
324 for (; ii < k; ii++) {
325 bh_val[ii] = C::neutral();
326 bh_ids[ii] = -1;
327 }
328 return nel;
329 }
330
331 template <typename T>
minheap_reorder(size_t k,T * bh_val,int64_t * bh_ids)332 inline size_t minheap_reorder(size_t k, T* bh_val, int64_t* bh_ids) {
333 return heap_reorder<CMin<T, int64_t>>(k, bh_val, bh_ids);
334 }
335
336 template <typename T>
maxheap_reorder(size_t k,T * bh_val,int64_t * bh_ids)337 inline size_t maxheap_reorder(size_t k, T* bh_val, int64_t* bh_ids) {
338 return heap_reorder<CMax<T, int64_t>>(k, bh_val, bh_ids);
339 }
340
341 /*******************************************************************
342 * Operations on heap arrays
343 *******************************************************************/
344
345 /** a template structure for a set of [min|max]-heaps it is tailored
346 * so that the actual data of the heaps can just live in compact
347 * arrays.
348 */
349 template <typename C>
350 struct HeapArray {
351 typedef typename C::TI TI;
352 typedef typename C::T T;
353
354 size_t nh; ///< number of heaps
355 size_t k; ///< allocated size per heap
356 TI* ids; ///< identifiers (size nh * k)
357 T* val; ///< values (distances or similarities), size nh * k
358
359 /// Return the list of values for a heap
get_valHeapArray360 T* get_val(size_t key) {
361 return val + key * k;
362 }
363
364 /// Correspponding identifiers
get_idsHeapArray365 TI* get_ids(size_t key) {
366 return ids + key * k;
367 }
368
369 /// prepare all the heaps before adding
370 void heapify();
371
372 /** add nj elements to heaps i0:i0+ni, with sequential ids
373 *
374 * @param nj nb of elements to add to each heap
375 * @param vin elements to add, size ni * nj
376 * @param j0 add this to the ids that are added
377 * @param i0 first heap to update
378 * @param ni nb of elements to update (-1 = use nh)
379 */
380 void addn(
381 size_t nj,
382 const T* vin,
383 TI j0 = 0,
384 size_t i0 = 0,
385 int64_t ni = -1);
386
387 /** same as addn
388 *
389 * @param id_in ids of the elements to add, size ni * nj
390 * @param id_stride stride for id_in
391 */
392 void addn_with_ids(
393 size_t nj,
394 const T* vin,
395 const TI* id_in = nullptr,
396 int64_t id_stride = 0,
397 size_t i0 = 0,
398 int64_t ni = -1);
399
400 /// reorder all the heaps
401 void reorder();
402
403 /** this is not really a heap function. It just finds the per-line
404 * extrema of each line of array D
405 * @param vals_out extreme value of each line (size nh, or NULL)
406 * @param idx_out index of extreme value (size nh or NULL)
407 */
408 void per_line_extrema(T* vals_out, TI* idx_out) const;
409 };
410
411 /* Define useful heaps */
412 typedef HeapArray<CMin<float, int64_t>> float_minheap_array_t;
413 typedef HeapArray<CMin<int, int64_t>> int_minheap_array_t;
414
415 typedef HeapArray<CMax<float, int64_t>> float_maxheap_array_t;
416 typedef HeapArray<CMax<int, int64_t>> int_maxheap_array_t;
417
418 // The heap templates are instanciated explicitly in Heap.cpp
419
420 /*********************************************************************
421 * Indirect heaps: instead of having
422 *
423 * node i = (bh_ids[i], bh_val[i]),
424 *
425 * in indirect heaps,
426 *
427 * node i = (bh_ids[i], bh_val[bh_ids[i]]),
428 *
429 *********************************************************************/
430
431 template <class C>
indirect_heap_pop(size_t k,const typename C::T * bh_val,typename C::TI * bh_ids)432 inline void indirect_heap_pop(
433 size_t k,
434 const typename C::T* bh_val,
435 typename C::TI* bh_ids) {
436 bh_ids--; /* Use 1-based indexing for easier node->child translation */
437 typename C::T val = bh_val[bh_ids[k]];
438 size_t i = 1;
439 while (1) {
440 size_t i1 = i << 1;
441 size_t i2 = i1 + 1;
442 if (i1 > k)
443 break;
444 typename C::TI id1 = bh_ids[i1], id2 = bh_ids[i2];
445 if (i2 == k + 1 || C::cmp(bh_val[id1], bh_val[id2])) {
446 if (C::cmp(val, bh_val[id1]))
447 break;
448 bh_ids[i] = id1;
449 i = i1;
450 } else {
451 if (C::cmp(val, bh_val[id2]))
452 break;
453 bh_ids[i] = id2;
454 i = i2;
455 }
456 }
457 bh_ids[i] = bh_ids[k];
458 }
459
460 template <class C>
indirect_heap_push(size_t k,const typename C::T * bh_val,typename C::TI * bh_ids,typename C::TI id)461 inline void indirect_heap_push(
462 size_t k,
463 const typename C::T* bh_val,
464 typename C::TI* bh_ids,
465 typename C::TI id) {
466 bh_ids--; /* Use 1-based indexing for easier node->child translation */
467 typename C::T val = bh_val[id];
468 size_t i = k;
469 while (i > 1) {
470 size_t i_father = i >> 1;
471 if (!C::cmp(val, bh_val[bh_ids[i_father]]))
472 break;
473 bh_ids[i] = bh_ids[i_father];
474 i = i_father;
475 }
476 bh_ids[i] = id;
477 }
478
479 } // namespace faiss
480
481 #endif /* FAISS_Heap_h */
482