1 /**
2  * Copyright (c) Facebook, Inc. and its affiliates.
3  *
4  * This source code is licensed under the MIT license found in the
5  * LICENSE file in the root directory of this source tree.
6  */
7 
8 // -*- c++ -*-
9 
10 #ifndef FAISS_IVFLIB_H
11 #define FAISS_IVFLIB_H
12 
13 /** Since IVF (inverted file) indexes are of so much use for
14  * large-scale use cases, we group a few functions related to them in
15  * this small library. Most functions work both on IndexIVFs and
16  * IndexIVFs embedded within an IndexPreTransform.
17  */
18 
19 #include <faiss/IndexIVF.h>
20 #include <vector>
21 
22 namespace faiss {
23 namespace ivflib {
24 
25 /** check if two indexes have the same parameters and are trained in
26  * the same way, otherwise throw. */
27 void check_compatible_for_merge(const Index* index1, const Index* index2);
28 
29 /** get an IndexIVF from an index. The index may be an IndexIVF or
30  * some wrapper class that encloses an IndexIVF
31  *
32  * throws an exception if this is not the case.
33  */
34 const IndexIVF* extract_index_ivf(const Index* index);
35 IndexIVF* extract_index_ivf(Index* index);
36 
37 /// same as above but returns nullptr instead of throwing on failure
38 const IndexIVF* try_extract_index_ivf(const Index* index);
39 IndexIVF* try_extract_index_ivf(Index* index);
40 
41 /** Merge index1 into index0. Works on IndexIVF's and IndexIVF's
42  *  embedded in a IndexPreTransform. On output, the index1 is empty.
43  *
44  * @param shift_ids: translate the ids from index1 to index0->prev_ntotal
45  */
46 void merge_into(Index* index0, Index* index1, bool shift_ids);
47 
48 typedef Index::idx_t idx_t;
49 
50 /* Returns the cluster the embeddings belong to.
51  *
52  * @param index      Index, which should be an IVF index
53  *                   (otherwise there are no clusters)
54  * @param embeddings object descriptors for which the centroids should be found,
55  *                   size num_objects * d
56  * @param centroid_ids
57  *                   cluster id each object belongs to, size num_objects
58  */
59 void search_centroid(Index* index, const float* x, int n, idx_t* centroid_ids);
60 
61 /* Returns the cluster the embeddings belong to.
62  *
63  * @param index      Index, which should be an IVF index
64  *                   (otherwise there are no clusters)
65  * @param query_centroid_ids
66  *                   centroid ids corresponding to the query vectors (size n)
67  * @param result_centroid_ids
68  *                   centroid ids corresponding to the results (size n * k)
69  * other arguments are the same as the standard search function
70  */
71 void search_and_return_centroids(
72         Index* index,
73         size_t n,
74         const float* xin,
75         long k,
76         float* distances,
77         idx_t* labels,
78         idx_t* query_centroid_ids,
79         idx_t* result_centroid_ids);
80 
81 /** A set of IndexIVFs concatenated together in a FIFO fashion.
82  * at each "step", the oldest index slice is removed and a new index is added.
83  */
84 struct SlidingIndexWindow {
85     /// common index that contains the sliding window
86     Index* index;
87 
88     /// InvertedLists of index
89     ArrayInvertedLists* ils;
90 
91     /// number of slices currently in index
92     int n_slice;
93 
94     /// same as index->nlist
95     size_t nlist;
96 
97     /// cumulative list sizes at each slice
98     std::vector<std::vector<size_t>> sizes;
99 
100     /// index should be initially empty and trained
101     SlidingIndexWindow(Index* index);
102 
103     /** Add one index to the current index and remove the oldest one.
104      *
105      * @param sub_index        slice to swap in (can be NULL)
106      * @param remove_oldest    if true, remove the oldest slices */
107     void step(const Index* sub_index, bool remove_oldest);
108 };
109 
110 /// Get a subset of inverted lists [i0, i1)
111 ArrayInvertedLists* get_invlist_range(const Index* index, long i0, long i1);
112 
113 /// Set a subset of inverted lists
114 void set_invlist_range(Index* index, long i0, long i1, ArrayInvertedLists* src);
115 
116 /** search an IndexIVF, possibly embedded in an IndexPreTransform with
117  * given parameters. This is a way to set the nprobe and get
118  * statdistics in a thread-safe way.
119  *
120  * Optionally returns (if non-nullptr):
121  * - nb_dis: number of distances computed
122  * - ms_per_stage: [0]: preprocessing time
123  *                 [1]: coarse quantization,
124  *                 [2]: list scanning
125  */
126 void search_with_parameters(
127         const Index* index,
128         idx_t n,
129         const float* x,
130         idx_t k,
131         float* distances,
132         idx_t* labels,
133         const IVFSearchParameters* params,
134         size_t* nb_dis = nullptr,
135         double* ms_per_stage = nullptr);
136 
137 /** same as search_with_parameters but for range search */
138 void range_search_with_parameters(
139         const Index* index,
140         idx_t n,
141         const float* x,
142         float radius,
143         RangeSearchResult* result,
144         const IVFSearchParameters* params,
145         size_t* nb_dis = nullptr,
146         double* ms_per_stage = nullptr);
147 
148 } // namespace ivflib
149 } // namespace faiss
150 
151 #endif
152