1 /**
2 * Copyright (c) Facebook, Inc. and its affiliates.
3 *
4 * This source code is licensed under the MIT license found in the
5 * LICENSE file in the root directory of this source tree.
6 */
7
8 #include <faiss/gpu/utils/DeviceUtils.h>
9 #include <faiss/gpu/impl/Distance.cuh>
10 #include <faiss/gpu/impl/FlatIndex.cuh>
11 #include <faiss/gpu/impl/L2Norm.cuh>
12 #include <faiss/gpu/impl/VectorResidual.cuh>
13 #include <faiss/gpu/utils/ConversionOperators.cuh>
14 #include <faiss/gpu/utils/CopyUtils.cuh>
15 #include <faiss/gpu/utils/Transpose.cuh>
16
17 namespace faiss {
18 namespace gpu {
19
FlatIndex(GpuResources * res,int dim,bool useFloat16,bool storeTransposed,MemorySpace space)20 FlatIndex::FlatIndex(
21 GpuResources* res,
22 int dim,
23 bool useFloat16,
24 bool storeTransposed,
25 MemorySpace space)
26 : resources_(res),
27 dim_(dim),
28 useFloat16_(useFloat16),
29 storeTransposed_(storeTransposed),
30 space_(space),
31 num_(0),
32 rawData_(
33 res,
34 AllocInfo(
35 AllocType::FlatData,
36 getCurrentDevice(),
37 space,
38 res->getDefaultStreamCurrentDevice())) {}
39
getUseFloat16() const40 bool FlatIndex::getUseFloat16() const {
41 return useFloat16_;
42 }
43
44 /// Returns the number of vectors we contain
getSize() const45 int FlatIndex::getSize() const {
46 if (useFloat16_) {
47 return vectorsHalf_.getSize(0);
48 } else {
49 return vectors_.getSize(0);
50 }
51 }
52
getDim() const53 int FlatIndex::getDim() const {
54 if (useFloat16_) {
55 return vectorsHalf_.getSize(1);
56 } else {
57 return vectors_.getSize(1);
58 }
59 }
60
reserve(size_t numVecs,cudaStream_t stream)61 void FlatIndex::reserve(size_t numVecs, cudaStream_t stream) {
62 if (useFloat16_) {
63 rawData_.reserve(numVecs * dim_ * sizeof(half), stream);
64 } else {
65 rawData_.reserve(numVecs * dim_ * sizeof(float), stream);
66 }
67 }
68
69 template <>
getVectorsRef()70 Tensor<float, 2, true>& FlatIndex::getVectorsRef<float>() {
71 // Should not call this unless we are in float32 mode
72 FAISS_ASSERT(!useFloat16_);
73 return getVectorsFloat32Ref();
74 }
75
76 template <>
getVectorsRef()77 Tensor<half, 2, true>& FlatIndex::getVectorsRef<half>() {
78 // Should not call this unless we are in float16 mode
79 FAISS_ASSERT(useFloat16_);
80 return getVectorsFloat16Ref();
81 }
82
getVectorsFloat32Ref()83 Tensor<float, 2, true>& FlatIndex::getVectorsFloat32Ref() {
84 // Should not call this unless we are in float32 mode
85 FAISS_ASSERT(!useFloat16_);
86
87 return vectors_;
88 }
89
getVectorsFloat16Ref()90 Tensor<half, 2, true>& FlatIndex::getVectorsFloat16Ref() {
91 // Should not call this unless we are in float16 mode
92 FAISS_ASSERT(useFloat16_);
93
94 return vectorsHalf_;
95 }
96
getVectorsFloat32Copy(cudaStream_t stream)97 DeviceTensor<float, 2, true> FlatIndex::getVectorsFloat32Copy(
98 cudaStream_t stream) {
99 return getVectorsFloat32Copy(0, num_, stream);
100 }
101
getVectorsFloat32Copy(int from,int num,cudaStream_t stream)102 DeviceTensor<float, 2, true> FlatIndex::getVectorsFloat32Copy(
103 int from,
104 int num,
105 cudaStream_t stream) {
106 DeviceTensor<float, 2, true> vecFloat32(
107 resources_, makeDevAlloc(AllocType::Other, stream), {num, dim_});
108
109 if (useFloat16_) {
110 auto halfNarrow = vectorsHalf_.narrowOutermost(from, num);
111 convertTensor<half, float, 2>(stream, halfNarrow, vecFloat32);
112 } else {
113 vectors_.copyTo(vecFloat32, stream);
114 }
115
116 return vecFloat32;
117 }
118
query(Tensor<float,2,true> & input,int k,faiss::MetricType metric,float metricArg,Tensor<float,2,true> & outDistances,Tensor<int,2,true> & outIndices,bool exactDistance)119 void FlatIndex::query(
120 Tensor<float, 2, true>& input,
121 int k,
122 faiss::MetricType metric,
123 float metricArg,
124 Tensor<float, 2, true>& outDistances,
125 Tensor<int, 2, true>& outIndices,
126 bool exactDistance) {
127 auto stream = resources_->getDefaultStreamCurrentDevice();
128
129 if (useFloat16_) {
130 // We need to convert the input to float16 for comparison to ourselves
131 auto inputHalf = convertTensorTemporary<float, half, 2>(
132 resources_, stream, input);
133
134 query(inputHalf,
135 k,
136 metric,
137 metricArg,
138 outDistances,
139 outIndices,
140 exactDistance);
141 } else {
142 bfKnnOnDevice(
143 resources_,
144 getCurrentDevice(),
145 stream,
146 storeTransposed_ ? vectorsTransposed_ : vectors_,
147 !storeTransposed_, // is vectors row major?
148 &norms_,
149 input,
150 true, // input is row major
151 k,
152 metric,
153 metricArg,
154 outDistances,
155 outIndices,
156 !exactDistance);
157 }
158 }
159
query(Tensor<half,2,true> & input,int k,faiss::MetricType metric,float metricArg,Tensor<float,2,true> & outDistances,Tensor<int,2,true> & outIndices,bool exactDistance)160 void FlatIndex::query(
161 Tensor<half, 2, true>& input,
162 int k,
163 faiss::MetricType metric,
164 float metricArg,
165 Tensor<float, 2, true>& outDistances,
166 Tensor<int, 2, true>& outIndices,
167 bool exactDistance) {
168 FAISS_ASSERT(useFloat16_);
169
170 bfKnnOnDevice(
171 resources_,
172 getCurrentDevice(),
173 resources_->getDefaultStreamCurrentDevice(),
174 storeTransposed_ ? vectorsHalfTransposed_ : vectorsHalf_,
175 !storeTransposed_, // is vectors row major?
176 &norms_,
177 input,
178 true, // input is row major
179 k,
180 metric,
181 metricArg,
182 outDistances,
183 outIndices,
184 !exactDistance);
185 }
186
computeResidual(Tensor<float,2,true> & vecs,Tensor<int,1,true> & listIds,Tensor<float,2,true> & residuals)187 void FlatIndex::computeResidual(
188 Tensor<float, 2, true>& vecs,
189 Tensor<int, 1, true>& listIds,
190 Tensor<float, 2, true>& residuals) {
191 if (useFloat16_) {
192 runCalcResidual(
193 vecs,
194 getVectorsFloat16Ref(),
195 listIds,
196 residuals,
197 resources_->getDefaultStreamCurrentDevice());
198 } else {
199 runCalcResidual(
200 vecs,
201 getVectorsFloat32Ref(),
202 listIds,
203 residuals,
204 resources_->getDefaultStreamCurrentDevice());
205 }
206 }
207
reconstruct(Tensor<int,1,true> & listIds,Tensor<float,2,true> & vecs)208 void FlatIndex::reconstruct(
209 Tensor<int, 1, true>& listIds,
210 Tensor<float, 2, true>& vecs) {
211 if (useFloat16_) {
212 runReconstruct(
213 listIds,
214 getVectorsFloat16Ref(),
215 vecs,
216 resources_->getDefaultStreamCurrentDevice());
217 } else {
218 runReconstruct(
219 listIds,
220 getVectorsFloat32Ref(),
221 vecs,
222 resources_->getDefaultStreamCurrentDevice());
223 }
224 }
225
reconstruct(Tensor<int,2,true> & listIds,Tensor<float,3,true> & vecs)226 void FlatIndex::reconstruct(
227 Tensor<int, 2, true>& listIds,
228 Tensor<float, 3, true>& vecs) {
229 auto listIds1 = listIds.downcastOuter<1>();
230 auto vecs2 = vecs.downcastOuter<2>();
231
232 reconstruct(listIds1, vecs2);
233 }
234
add(const float * data,int numVecs,cudaStream_t stream)235 void FlatIndex::add(const float* data, int numVecs, cudaStream_t stream) {
236 if (numVecs == 0) {
237 return;
238 }
239
240 if (useFloat16_) {
241 // Make sure that `data` is on our device; we'll run the
242 // conversion on our device
243 auto devData = toDeviceTemporary<float, 2>(
244 resources_,
245 getCurrentDevice(),
246 (float*)data,
247 stream,
248 {numVecs, dim_});
249
250 auto devDataHalf = convertTensorTemporary<float, half, 2>(
251 resources_, stream, devData);
252
253 rawData_.append(
254 (char*)devDataHalf.data(),
255 devDataHalf.getSizeInBytes(),
256 stream,
257 true /* reserve exactly */);
258 } else {
259 rawData_.append(
260 (char*)data,
261 (size_t)dim_ * numVecs * sizeof(float),
262 stream,
263 true /* reserve exactly */);
264 }
265
266 num_ += numVecs;
267
268 if (useFloat16_) {
269 DeviceTensor<half, 2, true> vectorsHalf(
270 (half*)rawData_.data(), {(int)num_, dim_});
271 vectorsHalf_ = std::move(vectorsHalf);
272 } else {
273 DeviceTensor<float, 2, true> vectors(
274 (float*)rawData_.data(), {(int)num_, dim_});
275 vectors_ = std::move(vectors);
276 }
277
278 if (storeTransposed_) {
279 if (useFloat16_) {
280 vectorsHalfTransposed_ = DeviceTensor<half, 2, true>(
281 resources_,
282 makeSpaceAlloc(AllocType::FlatData, space_, stream),
283 {dim_, (int)num_});
284 runTransposeAny(vectorsHalf_, 0, 1, vectorsHalfTransposed_, stream);
285 } else {
286 vectorsTransposed_ = DeviceTensor<float, 2, true>(
287 resources_,
288 makeSpaceAlloc(AllocType::FlatData, space_, stream),
289 {dim_, (int)num_});
290 runTransposeAny(vectors_, 0, 1, vectorsTransposed_, stream);
291 }
292 }
293
294 // Precompute L2 norms of our database
295 if (useFloat16_) {
296 DeviceTensor<float, 1, true> norms(
297 resources_,
298 makeSpaceAlloc(AllocType::FlatData, space_, stream),
299 {(int)num_});
300 runL2Norm(vectorsHalf_, true, norms, true, stream);
301 norms_ = std::move(norms);
302 } else {
303 DeviceTensor<float, 1, true> norms(
304 resources_,
305 makeSpaceAlloc(AllocType::FlatData, space_, stream),
306 {(int)num_});
307 runL2Norm(vectors_, true, norms, true, stream);
308 norms_ = std::move(norms);
309 }
310 }
311
reset()312 void FlatIndex::reset() {
313 rawData_.clear();
314 vectors_ = DeviceTensor<float, 2, true>();
315 vectorsTransposed_ = DeviceTensor<float, 2, true>();
316 vectorsHalf_ = DeviceTensor<half, 2, true>();
317 vectorsHalfTransposed_ = DeviceTensor<half, 2, true>();
318 norms_ = DeviceTensor<float, 1, true>();
319 num_ = 0;
320 }
321
322 } // namespace gpu
323 } // namespace faiss
324