1 /** 2 * Copyright (c) Facebook, Inc. and its affiliates. 3 * 4 * This source code is licensed under the MIT license found in the 5 * LICENSE file in the root directory of this source tree. 6 */ 7 8 // -*- c++ -*- 9 10 #ifndef FAISS_IVFLIB_H 11 #define FAISS_IVFLIB_H 12 13 /** Since IVF (inverted file) indexes are of so much use for 14 * large-scale use cases, we group a few functions related to them in 15 * this small library. Most functions work both on IndexIVFs and 16 * IndexIVFs embedded within an IndexPreTransform. 17 */ 18 19 #include <faiss/IndexIVF.h> 20 #include <vector> 21 22 namespace faiss { 23 namespace ivflib { 24 25 /** check if two indexes have the same parameters and are trained in 26 * the same way, otherwise throw. */ 27 void check_compatible_for_merge(const Index* index1, const Index* index2); 28 29 /** get an IndexIVF from an index. The index may be an IndexIVF or 30 * some wrapper class that encloses an IndexIVF 31 * 32 * throws an exception if this is not the case. 33 */ 34 const IndexIVF* extract_index_ivf(const Index* index); 35 IndexIVF* extract_index_ivf(Index* index); 36 37 /// same as above but returns nullptr instead of throwing on failure 38 const IndexIVF* try_extract_index_ivf(const Index* index); 39 IndexIVF* try_extract_index_ivf(Index* index); 40 41 /** Merge index1 into index0. Works on IndexIVF's and IndexIVF's 42 * embedded in a IndexPreTransform. On output, the index1 is empty. 43 * 44 * @param shift_ids: translate the ids from index1 to index0->prev_ntotal 45 */ 46 void merge_into(Index* index0, Index* index1, bool shift_ids); 47 48 typedef Index::idx_t idx_t; 49 50 /* Returns the cluster the embeddings belong to. 51 * 52 * @param index Index, which should be an IVF index 53 * (otherwise there are no clusters) 54 * @param embeddings object descriptors for which the centroids should be found, 55 * size num_objects * d 56 * @param centroid_ids 57 * cluster id each object belongs to, size num_objects 58 */ 59 void search_centroid(Index* index, const float* x, int n, idx_t* centroid_ids); 60 61 /* Returns the cluster the embeddings belong to. 62 * 63 * @param index Index, which should be an IVF index 64 * (otherwise there are no clusters) 65 * @param query_centroid_ids 66 * centroid ids corresponding to the query vectors (size n) 67 * @param result_centroid_ids 68 * centroid ids corresponding to the results (size n * k) 69 * other arguments are the same as the standard search function 70 */ 71 void search_and_return_centroids( 72 Index* index, 73 size_t n, 74 const float* xin, 75 long k, 76 float* distances, 77 idx_t* labels, 78 idx_t* query_centroid_ids, 79 idx_t* result_centroid_ids); 80 81 /** A set of IndexIVFs concatenated together in a FIFO fashion. 82 * at each "step", the oldest index slice is removed and a new index is added. 83 */ 84 struct SlidingIndexWindow { 85 /// common index that contains the sliding window 86 Index* index; 87 88 /// InvertedLists of index 89 ArrayInvertedLists* ils; 90 91 /// number of slices currently in index 92 int n_slice; 93 94 /// same as index->nlist 95 size_t nlist; 96 97 /// cumulative list sizes at each slice 98 std::vector<std::vector<size_t>> sizes; 99 100 /// index should be initially empty and trained 101 SlidingIndexWindow(Index* index); 102 103 /** Add one index to the current index and remove the oldest one. 104 * 105 * @param sub_index slice to swap in (can be NULL) 106 * @param remove_oldest if true, remove the oldest slices */ 107 void step(const Index* sub_index, bool remove_oldest); 108 }; 109 110 /// Get a subset of inverted lists [i0, i1) 111 ArrayInvertedLists* get_invlist_range(const Index* index, long i0, long i1); 112 113 /// Set a subset of inverted lists 114 void set_invlist_range(Index* index, long i0, long i1, ArrayInvertedLists* src); 115 116 /** search an IndexIVF, possibly embedded in an IndexPreTransform with 117 * given parameters. This is a way to set the nprobe and get 118 * statdistics in a thread-safe way. 119 * 120 * Optionally returns (if non-nullptr): 121 * - nb_dis: number of distances computed 122 * - ms_per_stage: [0]: preprocessing time 123 * [1]: coarse quantization, 124 * [2]: list scanning 125 */ 126 void search_with_parameters( 127 const Index* index, 128 idx_t n, 129 const float* x, 130 idx_t k, 131 float* distances, 132 idx_t* labels, 133 const IVFSearchParameters* params, 134 size_t* nb_dis = nullptr, 135 double* ms_per_stage = nullptr); 136 137 /** same as search_with_parameters but for range search */ 138 void range_search_with_parameters( 139 const Index* index, 140 idx_t n, 141 const float* x, 142 float radius, 143 RangeSearchResult* result, 144 const IVFSearchParameters* params, 145 size_t* nb_dis = nullptr, 146 double* ms_per_stage = nullptr); 147 148 } // namespace ivflib 149 } // namespace faiss 150 151 #endif 152