1 /** 2 * Copyright (c) Facebook, Inc. and its affiliates. 3 * 4 * This source code is licensed under the MIT license found in the 5 * LICENSE file in the root directory of this source tree. 6 */ 7 8 #pragma once 9 10 #include <faiss/Index.h> 11 #include <faiss/MetricType.h> 12 #include <faiss/gpu/GpuIndicesOptions.h> 13 #include <thrust/device_vector.h> 14 #include <faiss/gpu/utils/DeviceTensor.cuh> 15 #include <faiss/gpu/utils/DeviceVector.cuh> 16 #include <memory> 17 #include <vector> 18 19 namespace faiss { 20 struct InvertedLists; 21 } 22 23 namespace faiss { 24 namespace gpu { 25 26 class GpuResources; 27 class FlatIndex; 28 29 /// Base inverted list functionality for IVFFlat and IVFPQ 30 class IVFBase { 31 public: 32 IVFBase(GpuResources* resources, 33 faiss::MetricType metric, 34 float metricArg, 35 /// We do not own this reference 36 FlatIndex* quantizer, 37 bool interleavedLayout, 38 IndicesOptions indicesOptions, 39 MemorySpace space); 40 41 virtual ~IVFBase(); 42 43 /// Reserve GPU memory in our inverted lists for this number of vectors 44 void reserveMemory(size_t numVecs); 45 46 /// Clear out all inverted lists, but retain the coarse quantizer 47 /// and the product quantizer info 48 void reset(); 49 50 /// Return the number of dimensions we are indexing 51 int getDim() const; 52 53 /// After adding vectors, one can call this to reclaim device memory 54 /// to exactly the amount needed. Returns space reclaimed in bytes 55 size_t reclaimMemory(); 56 57 /// Returns the number of inverted lists 58 size_t getNumLists() const; 59 60 /// For debugging purposes, return the list length of a particular 61 /// list 62 int getListLength(int listId) const; 63 64 /// Return the list indices of a particular list back to the CPU 65 std::vector<Index::idx_t> getListIndices(int listId) const; 66 67 /// Return the encoded vectors of a particular list back to the CPU 68 std::vector<uint8_t> getListVectorData(int listId, bool gpuFormat) const; 69 70 /// Copy all inverted lists from a CPU representation to ourselves 71 void copyInvertedListsFrom(const InvertedLists* ivf); 72 73 /// Copy all inverted lists from ourselves to a CPU representation 74 void copyInvertedListsTo(InvertedLists* ivf); 75 76 /// Classify and encode/add vectors to our IVF lists. 77 /// The input data must be on our current device. 78 /// Returns the number of vectors successfully added. Vectors may 79 /// not be able to be added because they contain NaNs. 80 int addVectors( 81 Tensor<float, 2, true>& vecs, 82 Tensor<Index::idx_t, 1, true>& indices); 83 84 protected: 85 /// Adds a set of codes and indices to a list, with the representation 86 /// coming from the CPU equivalent 87 void addEncodedVectorsToList_( 88 int listId, 89 // resident on the host 90 const void* codes, 91 // resident on the host 92 const Index::idx_t* indices, 93 size_t numVecs); 94 95 /// Returns the number of bytes in which an IVF list containing numVecs 96 /// vectors is encoded on the device. Note that due to padding this is not 97 /// the same as the encoding size for a subset of vectors in an IVF list; 98 /// this is the size for an entire IVF list 99 virtual size_t getGpuVectorsEncodingSize_(int numVecs) const = 0; 100 virtual size_t getCpuVectorsEncodingSize_(int numVecs) const = 0; 101 102 /// Translate to our preferred GPU encoding 103 virtual std::vector<uint8_t> translateCodesToGpu_( 104 std::vector<uint8_t> codes, 105 size_t numVecs) const = 0; 106 107 /// Translate from our preferred GPU encoding 108 virtual std::vector<uint8_t> translateCodesFromGpu_( 109 std::vector<uint8_t> codes, 110 size_t numVecs) const = 0; 111 112 /// Append vectors to our on-device lists 113 virtual void appendVectors_( 114 Tensor<float, 2, true>& vecs, 115 Tensor<Index::idx_t, 1, true>& indices, 116 Tensor<int, 1, true>& uniqueLists, 117 Tensor<int, 1, true>& vectorsByUniqueList, 118 Tensor<int, 1, true>& uniqueListVectorStart, 119 Tensor<int, 1, true>& uniqueListStartOffset, 120 Tensor<int, 1, true>& listIds, 121 Tensor<int, 1, true>& listOffset, 122 cudaStream_t stream) = 0; 123 124 /// Reclaim memory consumed on the device for our inverted lists 125 /// `exact` means we trim exactly to the memory needed 126 size_t reclaimMemory_(bool exact); 127 128 /// Update all device-side list pointer and size information 129 void updateDeviceListInfo_(cudaStream_t stream); 130 131 /// For a set of list IDs, update device-side list pointer and size 132 /// information 133 void updateDeviceListInfo_( 134 const std::vector<int>& listIds, 135 cudaStream_t stream); 136 137 /// Shared function to copy indices from CPU to GPU 138 void addIndicesFromCpu_( 139 int listId, 140 const Index::idx_t* indices, 141 size_t numVecs); 142 143 protected: 144 /// Collection of GPU resources that we use 145 GpuResources* resources_; 146 147 /// Metric type of the index 148 faiss::MetricType metric_; 149 150 /// Metric arg 151 float metricArg_; 152 153 /// Quantizer object 154 FlatIndex* quantizer_; 155 156 /// Expected dimensionality of the vectors 157 const int dim_; 158 159 /// Number of inverted lists we maintain 160 const int numLists_; 161 162 /// Whether or not our index uses an interleaved by 32 layout: 163 /// The default memory layout is [vector][PQ/SQ component]: 164 /// (v0 d0) (v0 d1) ... (v0 dD-1) (v1 d0) (v1 d1) ... 165 /// 166 /// The interleaved by 32 memory layout is: 167 /// [vector / 32][PQ/SQ component][vector % 32] with padding: 168 /// (v0 d0) (v1 d0) ... (v31 d0) (v0 d1) (v1 d1) ... (v31 dD-1) (v32 d0) 169 /// (v33 d0) ... so the list length is always a multiple of num quantizers * 170 /// 32 171 bool interleavedLayout_; 172 173 /// How are user indices stored on the GPU? 174 const IndicesOptions indicesOptions_; 175 176 /// What memory space our inverted list storage is in 177 const MemorySpace space_; 178 179 /// Device representation of all inverted list data 180 /// id -> data 181 thrust::device_vector<void*> deviceListDataPointers_; 182 183 /// Device representation of all inverted list index pointers 184 /// id -> data 185 thrust::device_vector<void*> deviceListIndexPointers_; 186 187 /// Device representation of all inverted list lengths 188 /// id -> length in number of vectors 189 thrust::device_vector<int> deviceListLengths_; 190 191 /// Maximum list length seen 192 int maxListLength_; 193 194 struct DeviceIVFList { 195 DeviceIVFList(GpuResources* res, const AllocInfo& info); 196 197 /// The on-device memory for this particular IVF list 198 DeviceVector<uint8_t> data; 199 200 /// The number of vectors encoded in this list, which may be unrelated 201 /// to the above allocated data size 202 int numVecs; 203 }; 204 205 /// Device memory for each separate list, as managed by the host. 206 /// Device memory as stored in DeviceVector is stored as unique_ptr 207 /// since deviceList*Pointers_ must remain valid despite 208 /// resizing (and potential re-allocation) of deviceList*_ 209 std::vector<std::unique_ptr<DeviceIVFList>> deviceListData_; 210 std::vector<std::unique_ptr<DeviceIVFList>> deviceListIndices_; 211 212 /// If we are storing indices on the CPU (indicesOptions_ is 213 /// INDICES_CPU), then this maintains a CPU-side map of what 214 /// (inverted list id, offset) maps to which user index 215 std::vector<std::vector<Index::idx_t>> listOffsetToUserIndex_; 216 }; 217 218 } // namespace gpu 219 } // namespace faiss 220