1 /**
2  * Copyright (c) Facebook, Inc. and its affiliates.
3  *
4  * This source code is licensed under the MIT license found in the
5  * LICENSE file in the root directory of this source tree.
6  */
7 
8 #include <faiss/gpu/utils/DeviceUtils.h>
9 #include <faiss/gpu/impl/Distance.cuh>
10 #include <faiss/gpu/impl/FlatIndex.cuh>
11 #include <faiss/gpu/impl/L2Norm.cuh>
12 #include <faiss/gpu/impl/VectorResidual.cuh>
13 #include <faiss/gpu/utils/ConversionOperators.cuh>
14 #include <faiss/gpu/utils/CopyUtils.cuh>
15 #include <faiss/gpu/utils/Transpose.cuh>
16 
17 namespace faiss {
18 namespace gpu {
19 
FlatIndex(GpuResources * res,int dim,bool useFloat16,bool storeTransposed,MemorySpace space)20 FlatIndex::FlatIndex(
21         GpuResources* res,
22         int dim,
23         bool useFloat16,
24         bool storeTransposed,
25         MemorySpace space)
26         : resources_(res),
27           dim_(dim),
28           useFloat16_(useFloat16),
29           storeTransposed_(storeTransposed),
30           space_(space),
31           num_(0),
32           rawData_(
33                   res,
34                   AllocInfo(
35                           AllocType::FlatData,
36                           getCurrentDevice(),
37                           space,
38                           res->getDefaultStreamCurrentDevice())) {}
39 
getUseFloat16() const40 bool FlatIndex::getUseFloat16() const {
41     return useFloat16_;
42 }
43 
44 /// Returns the number of vectors we contain
getSize() const45 int FlatIndex::getSize() const {
46     if (useFloat16_) {
47         return vectorsHalf_.getSize(0);
48     } else {
49         return vectors_.getSize(0);
50     }
51 }
52 
getDim() const53 int FlatIndex::getDim() const {
54     if (useFloat16_) {
55         return vectorsHalf_.getSize(1);
56     } else {
57         return vectors_.getSize(1);
58     }
59 }
60 
reserve(size_t numVecs,cudaStream_t stream)61 void FlatIndex::reserve(size_t numVecs, cudaStream_t stream) {
62     if (useFloat16_) {
63         rawData_.reserve(numVecs * dim_ * sizeof(half), stream);
64     } else {
65         rawData_.reserve(numVecs * dim_ * sizeof(float), stream);
66     }
67 }
68 
69 template <>
getVectorsRef()70 Tensor<float, 2, true>& FlatIndex::getVectorsRef<float>() {
71     // Should not call this unless we are in float32 mode
72     FAISS_ASSERT(!useFloat16_);
73     return getVectorsFloat32Ref();
74 }
75 
76 template <>
getVectorsRef()77 Tensor<half, 2, true>& FlatIndex::getVectorsRef<half>() {
78     // Should not call this unless we are in float16 mode
79     FAISS_ASSERT(useFloat16_);
80     return getVectorsFloat16Ref();
81 }
82 
getVectorsFloat32Ref()83 Tensor<float, 2, true>& FlatIndex::getVectorsFloat32Ref() {
84     // Should not call this unless we are in float32 mode
85     FAISS_ASSERT(!useFloat16_);
86 
87     return vectors_;
88 }
89 
getVectorsFloat16Ref()90 Tensor<half, 2, true>& FlatIndex::getVectorsFloat16Ref() {
91     // Should not call this unless we are in float16 mode
92     FAISS_ASSERT(useFloat16_);
93 
94     return vectorsHalf_;
95 }
96 
getVectorsFloat32Copy(cudaStream_t stream)97 DeviceTensor<float, 2, true> FlatIndex::getVectorsFloat32Copy(
98         cudaStream_t stream) {
99     return getVectorsFloat32Copy(0, num_, stream);
100 }
101 
getVectorsFloat32Copy(int from,int num,cudaStream_t stream)102 DeviceTensor<float, 2, true> FlatIndex::getVectorsFloat32Copy(
103         int from,
104         int num,
105         cudaStream_t stream) {
106     DeviceTensor<float, 2, true> vecFloat32(
107             resources_, makeDevAlloc(AllocType::Other, stream), {num, dim_});
108 
109     if (useFloat16_) {
110         auto halfNarrow = vectorsHalf_.narrowOutermost(from, num);
111         convertTensor<half, float, 2>(stream, halfNarrow, vecFloat32);
112     } else {
113         vectors_.copyTo(vecFloat32, stream);
114     }
115 
116     return vecFloat32;
117 }
118 
query(Tensor<float,2,true> & input,int k,faiss::MetricType metric,float metricArg,Tensor<float,2,true> & outDistances,Tensor<int,2,true> & outIndices,bool exactDistance)119 void FlatIndex::query(
120         Tensor<float, 2, true>& input,
121         int k,
122         faiss::MetricType metric,
123         float metricArg,
124         Tensor<float, 2, true>& outDistances,
125         Tensor<int, 2, true>& outIndices,
126         bool exactDistance) {
127     auto stream = resources_->getDefaultStreamCurrentDevice();
128 
129     if (useFloat16_) {
130         // We need to convert the input to float16 for comparison to ourselves
131         auto inputHalf = convertTensorTemporary<float, half, 2>(
132                 resources_, stream, input);
133 
134         query(inputHalf,
135               k,
136               metric,
137               metricArg,
138               outDistances,
139               outIndices,
140               exactDistance);
141     } else {
142         bfKnnOnDevice(
143                 resources_,
144                 getCurrentDevice(),
145                 stream,
146                 storeTransposed_ ? vectorsTransposed_ : vectors_,
147                 !storeTransposed_, // is vectors row major?
148                 &norms_,
149                 input,
150                 true, // input is row major
151                 k,
152                 metric,
153                 metricArg,
154                 outDistances,
155                 outIndices,
156                 !exactDistance);
157     }
158 }
159 
query(Tensor<half,2,true> & input,int k,faiss::MetricType metric,float metricArg,Tensor<float,2,true> & outDistances,Tensor<int,2,true> & outIndices,bool exactDistance)160 void FlatIndex::query(
161         Tensor<half, 2, true>& input,
162         int k,
163         faiss::MetricType metric,
164         float metricArg,
165         Tensor<float, 2, true>& outDistances,
166         Tensor<int, 2, true>& outIndices,
167         bool exactDistance) {
168     FAISS_ASSERT(useFloat16_);
169 
170     bfKnnOnDevice(
171             resources_,
172             getCurrentDevice(),
173             resources_->getDefaultStreamCurrentDevice(),
174             storeTransposed_ ? vectorsHalfTransposed_ : vectorsHalf_,
175             !storeTransposed_, // is vectors row major?
176             &norms_,
177             input,
178             true, // input is row major
179             k,
180             metric,
181             metricArg,
182             outDistances,
183             outIndices,
184             !exactDistance);
185 }
186 
computeResidual(Tensor<float,2,true> & vecs,Tensor<int,1,true> & listIds,Tensor<float,2,true> & residuals)187 void FlatIndex::computeResidual(
188         Tensor<float, 2, true>& vecs,
189         Tensor<int, 1, true>& listIds,
190         Tensor<float, 2, true>& residuals) {
191     if (useFloat16_) {
192         runCalcResidual(
193                 vecs,
194                 getVectorsFloat16Ref(),
195                 listIds,
196                 residuals,
197                 resources_->getDefaultStreamCurrentDevice());
198     } else {
199         runCalcResidual(
200                 vecs,
201                 getVectorsFloat32Ref(),
202                 listIds,
203                 residuals,
204                 resources_->getDefaultStreamCurrentDevice());
205     }
206 }
207 
reconstruct(Tensor<int,1,true> & listIds,Tensor<float,2,true> & vecs)208 void FlatIndex::reconstruct(
209         Tensor<int, 1, true>& listIds,
210         Tensor<float, 2, true>& vecs) {
211     if (useFloat16_) {
212         runReconstruct(
213                 listIds,
214                 getVectorsFloat16Ref(),
215                 vecs,
216                 resources_->getDefaultStreamCurrentDevice());
217     } else {
218         runReconstruct(
219                 listIds,
220                 getVectorsFloat32Ref(),
221                 vecs,
222                 resources_->getDefaultStreamCurrentDevice());
223     }
224 }
225 
reconstruct(Tensor<int,2,true> & listIds,Tensor<float,3,true> & vecs)226 void FlatIndex::reconstruct(
227         Tensor<int, 2, true>& listIds,
228         Tensor<float, 3, true>& vecs) {
229     auto listIds1 = listIds.downcastOuter<1>();
230     auto vecs2 = vecs.downcastOuter<2>();
231 
232     reconstruct(listIds1, vecs2);
233 }
234 
add(const float * data,int numVecs,cudaStream_t stream)235 void FlatIndex::add(const float* data, int numVecs, cudaStream_t stream) {
236     if (numVecs == 0) {
237         return;
238     }
239 
240     if (useFloat16_) {
241         // Make sure that `data` is on our device; we'll run the
242         // conversion on our device
243         auto devData = toDeviceTemporary<float, 2>(
244                 resources_,
245                 getCurrentDevice(),
246                 (float*)data,
247                 stream,
248                 {numVecs, dim_});
249 
250         auto devDataHalf = convertTensorTemporary<float, half, 2>(
251                 resources_, stream, devData);
252 
253         rawData_.append(
254                 (char*)devDataHalf.data(),
255                 devDataHalf.getSizeInBytes(),
256                 stream,
257                 true /* reserve exactly */);
258     } else {
259         rawData_.append(
260                 (char*)data,
261                 (size_t)dim_ * numVecs * sizeof(float),
262                 stream,
263                 true /* reserve exactly */);
264     }
265 
266     num_ += numVecs;
267 
268     if (useFloat16_) {
269         DeviceTensor<half, 2, true> vectorsHalf(
270                 (half*)rawData_.data(), {(int)num_, dim_});
271         vectorsHalf_ = std::move(vectorsHalf);
272     } else {
273         DeviceTensor<float, 2, true> vectors(
274                 (float*)rawData_.data(), {(int)num_, dim_});
275         vectors_ = std::move(vectors);
276     }
277 
278     if (storeTransposed_) {
279         if (useFloat16_) {
280             vectorsHalfTransposed_ = DeviceTensor<half, 2, true>(
281                     resources_,
282                     makeSpaceAlloc(AllocType::FlatData, space_, stream),
283                     {dim_, (int)num_});
284             runTransposeAny(vectorsHalf_, 0, 1, vectorsHalfTransposed_, stream);
285         } else {
286             vectorsTransposed_ = DeviceTensor<float, 2, true>(
287                     resources_,
288                     makeSpaceAlloc(AllocType::FlatData, space_, stream),
289                     {dim_, (int)num_});
290             runTransposeAny(vectors_, 0, 1, vectorsTransposed_, stream);
291         }
292     }
293 
294     // Precompute L2 norms of our database
295     if (useFloat16_) {
296         DeviceTensor<float, 1, true> norms(
297                 resources_,
298                 makeSpaceAlloc(AllocType::FlatData, space_, stream),
299                 {(int)num_});
300         runL2Norm(vectorsHalf_, true, norms, true, stream);
301         norms_ = std::move(norms);
302     } else {
303         DeviceTensor<float, 1, true> norms(
304                 resources_,
305                 makeSpaceAlloc(AllocType::FlatData, space_, stream),
306                 {(int)num_});
307         runL2Norm(vectors_, true, norms, true, stream);
308         norms_ = std::move(norms);
309     }
310 }
311 
reset()312 void FlatIndex::reset() {
313     rawData_.clear();
314     vectors_ = DeviceTensor<float, 2, true>();
315     vectorsTransposed_ = DeviceTensor<float, 2, true>();
316     vectorsHalf_ = DeviceTensor<half, 2, true>();
317     vectorsHalfTransposed_ = DeviceTensor<half, 2, true>();
318     norms_ = DeviceTensor<float, 1, true>();
319     num_ = 0;
320 }
321 
322 } // namespace gpu
323 } // namespace faiss
324