1 /**
2  * Copyright (c) Facebook, Inc. and its affiliates.
3  *
4  * This source code is licensed under the MIT license found in the
5  * LICENSE file in the root directory of this source tree.
6  */
7 
8 #pragma once
9 
10 #include <faiss/Index.h>
11 #include <faiss/MetricType.h>
12 #include <faiss/gpu/GpuIndicesOptions.h>
13 #include <thrust/device_vector.h>
14 #include <faiss/gpu/utils/DeviceTensor.cuh>
15 #include <faiss/gpu/utils/DeviceVector.cuh>
16 #include <memory>
17 #include <vector>
18 
19 namespace faiss {
20 struct InvertedLists;
21 }
22 
23 namespace faiss {
24 namespace gpu {
25 
26 class GpuResources;
27 class FlatIndex;
28 
29 /// Base inverted list functionality for IVFFlat and IVFPQ
30 class IVFBase {
31    public:
32     IVFBase(GpuResources* resources,
33             faiss::MetricType metric,
34             float metricArg,
35             /// We do not own this reference
36             FlatIndex* quantizer,
37             bool interleavedLayout,
38             IndicesOptions indicesOptions,
39             MemorySpace space);
40 
41     virtual ~IVFBase();
42 
43     /// Reserve GPU memory in our inverted lists for this number of vectors
44     void reserveMemory(size_t numVecs);
45 
46     /// Clear out all inverted lists, but retain the coarse quantizer
47     /// and the product quantizer info
48     void reset();
49 
50     /// Return the number of dimensions we are indexing
51     int getDim() const;
52 
53     /// After adding vectors, one can call this to reclaim device memory
54     /// to exactly the amount needed. Returns space reclaimed in bytes
55     size_t reclaimMemory();
56 
57     /// Returns the number of inverted lists
58     size_t getNumLists() const;
59 
60     /// For debugging purposes, return the list length of a particular
61     /// list
62     int getListLength(int listId) const;
63 
64     /// Return the list indices of a particular list back to the CPU
65     std::vector<Index::idx_t> getListIndices(int listId) const;
66 
67     /// Return the encoded vectors of a particular list back to the CPU
68     std::vector<uint8_t> getListVectorData(int listId, bool gpuFormat) const;
69 
70     /// Copy all inverted lists from a CPU representation to ourselves
71     void copyInvertedListsFrom(const InvertedLists* ivf);
72 
73     /// Copy all inverted lists from ourselves to a CPU representation
74     void copyInvertedListsTo(InvertedLists* ivf);
75 
76     /// Classify and encode/add vectors to our IVF lists.
77     /// The input data must be on our current device.
78     /// Returns the number of vectors successfully added. Vectors may
79     /// not be able to be added because they contain NaNs.
80     int addVectors(
81             Tensor<float, 2, true>& vecs,
82             Tensor<Index::idx_t, 1, true>& indices);
83 
84    protected:
85     /// Adds a set of codes and indices to a list, with the representation
86     /// coming from the CPU equivalent
87     void addEncodedVectorsToList_(
88             int listId,
89             // resident on the host
90             const void* codes,
91             // resident on the host
92             const Index::idx_t* indices,
93             size_t numVecs);
94 
95     /// Returns the number of bytes in which an IVF list containing numVecs
96     /// vectors is encoded on the device. Note that due to padding this is not
97     /// the same as the encoding size for a subset of vectors in an IVF list;
98     /// this is the size for an entire IVF list
99     virtual size_t getGpuVectorsEncodingSize_(int numVecs) const = 0;
100     virtual size_t getCpuVectorsEncodingSize_(int numVecs) const = 0;
101 
102     /// Translate to our preferred GPU encoding
103     virtual std::vector<uint8_t> translateCodesToGpu_(
104             std::vector<uint8_t> codes,
105             size_t numVecs) const = 0;
106 
107     /// Translate from our preferred GPU encoding
108     virtual std::vector<uint8_t> translateCodesFromGpu_(
109             std::vector<uint8_t> codes,
110             size_t numVecs) const = 0;
111 
112     /// Append vectors to our on-device lists
113     virtual void appendVectors_(
114             Tensor<float, 2, true>& vecs,
115             Tensor<Index::idx_t, 1, true>& indices,
116             Tensor<int, 1, true>& uniqueLists,
117             Tensor<int, 1, true>& vectorsByUniqueList,
118             Tensor<int, 1, true>& uniqueListVectorStart,
119             Tensor<int, 1, true>& uniqueListStartOffset,
120             Tensor<int, 1, true>& listIds,
121             Tensor<int, 1, true>& listOffset,
122             cudaStream_t stream) = 0;
123 
124     /// Reclaim memory consumed on the device for our inverted lists
125     /// `exact` means we trim exactly to the memory needed
126     size_t reclaimMemory_(bool exact);
127 
128     /// Update all device-side list pointer and size information
129     void updateDeviceListInfo_(cudaStream_t stream);
130 
131     /// For a set of list IDs, update device-side list pointer and size
132     /// information
133     void updateDeviceListInfo_(
134             const std::vector<int>& listIds,
135             cudaStream_t stream);
136 
137     /// Shared function to copy indices from CPU to GPU
138     void addIndicesFromCpu_(
139             int listId,
140             const Index::idx_t* indices,
141             size_t numVecs);
142 
143    protected:
144     /// Collection of GPU resources that we use
145     GpuResources* resources_;
146 
147     /// Metric type of the index
148     faiss::MetricType metric_;
149 
150     /// Metric arg
151     float metricArg_;
152 
153     /// Quantizer object
154     FlatIndex* quantizer_;
155 
156     /// Expected dimensionality of the vectors
157     const int dim_;
158 
159     /// Number of inverted lists we maintain
160     const int numLists_;
161 
162     /// Whether or not our index uses an interleaved by 32 layout:
163     /// The default memory layout is [vector][PQ/SQ component]:
164     /// (v0 d0) (v0 d1) ... (v0 dD-1) (v1 d0) (v1 d1) ...
165     ///
166     /// The interleaved by 32 memory layout is:
167     /// [vector / 32][PQ/SQ component][vector % 32] with padding:
168     /// (v0 d0) (v1 d0) ... (v31 d0) (v0 d1) (v1 d1) ... (v31 dD-1) (v32 d0)
169     /// (v33 d0) ... so the list length is always a multiple of num quantizers *
170     /// 32
171     bool interleavedLayout_;
172 
173     /// How are user indices stored on the GPU?
174     const IndicesOptions indicesOptions_;
175 
176     /// What memory space our inverted list storage is in
177     const MemorySpace space_;
178 
179     /// Device representation of all inverted list data
180     /// id -> data
181     thrust::device_vector<void*> deviceListDataPointers_;
182 
183     /// Device representation of all inverted list index pointers
184     /// id -> data
185     thrust::device_vector<void*> deviceListIndexPointers_;
186 
187     /// Device representation of all inverted list lengths
188     /// id -> length in number of vectors
189     thrust::device_vector<int> deviceListLengths_;
190 
191     /// Maximum list length seen
192     int maxListLength_;
193 
194     struct DeviceIVFList {
195         DeviceIVFList(GpuResources* res, const AllocInfo& info);
196 
197         /// The on-device memory for this particular IVF list
198         DeviceVector<uint8_t> data;
199 
200         /// The number of vectors encoded in this list, which may be unrelated
201         /// to the above allocated data size
202         int numVecs;
203     };
204 
205     /// Device memory for each separate list, as managed by the host.
206     /// Device memory as stored in DeviceVector is stored as unique_ptr
207     /// since deviceList*Pointers_ must remain valid despite
208     /// resizing (and potential re-allocation) of deviceList*_
209     std::vector<std::unique_ptr<DeviceIVFList>> deviceListData_;
210     std::vector<std::unique_ptr<DeviceIVFList>> deviceListIndices_;
211 
212     /// If we are storing indices on the CPU (indicesOptions_ is
213     /// INDICES_CPU), then this maintains a CPU-side map of what
214     /// (inverted list id, offset) maps to which user index
215     std::vector<std::vector<Index::idx_t>> listOffsetToUserIndex_;
216 };
217 
218 } // namespace gpu
219 } // namespace faiss
220