1 /**
2  * Copyright (c) Facebook, Inc. and its affiliates.
3  *
4  * This source code is licensed under the MIT license found in the
5  * LICENSE file in the root directory of this source tree.
6  */
7 
8 #pragma once
9 
10 #include <faiss/MetricType.h>
11 #include <faiss/gpu/GpuResources.h>
12 #include <faiss/gpu/utils/DeviceTensor.cuh>
13 #include <faiss/gpu/utils/DeviceVector.cuh>
14 
15 namespace faiss {
16 namespace gpu {
17 
18 class GpuResources;
19 
20 /// Holder of GPU resources for a particular flat index
21 class FlatIndex {
22    public:
23     FlatIndex(
24             GpuResources* res,
25             int dim,
26             bool useFloat16,
27             bool storeTransposed,
28             MemorySpace space);
29 
30     /// Whether or not this flat index primarily stores data in float16
31     bool getUseFloat16() const;
32 
33     /// Returns the number of vectors we contain
34     int getSize() const;
35 
36     /// Returns the dimensionality of the vectors
37     int getDim() const;
38 
39     /// Reserve storage that can contain at least this many vectors
40     void reserve(size_t numVecs, cudaStream_t stream);
41 
42     /// Returns the vectors based on the type desired; the FlatIndex must be of
43     /// the same type (float16 or float32) to not assert
44     template <typename T>
45     Tensor<T, 2, true>& getVectorsRef();
46 
47     /// Returns a reference to our vectors currently in use
48     Tensor<float, 2, true>& getVectorsFloat32Ref();
49 
50     /// Returns a reference to our vectors currently in use (useFloat16 mode)
51     Tensor<half, 2, true>& getVectorsFloat16Ref();
52 
53     /// Performs a copy of the vectors on the given device, converting
54     /// as needed from float16
55     DeviceTensor<float, 2, true> getVectorsFloat32Copy(cudaStream_t stream);
56 
57     /// Returns only a subset of the vectors
58     DeviceTensor<float, 2, true> getVectorsFloat32Copy(
59             int from,
60             int num,
61             cudaStream_t stream);
62 
63     void query(
64             Tensor<float, 2, true>& vecs,
65             int k,
66             faiss::MetricType metric,
67             float metricArg,
68             Tensor<float, 2, true>& outDistances,
69             Tensor<int, 2, true>& outIndices,
70             bool exactDistance);
71 
72     void query(
73             Tensor<half, 2, true>& vecs,
74             int k,
75             faiss::MetricType metric,
76             float metricArg,
77             Tensor<float, 2, true>& outDistances,
78             Tensor<int, 2, true>& outIndices,
79             bool exactDistance);
80 
81     /// Compute residual for set of vectors
82     void computeResidual(
83             Tensor<float, 2, true>& vecs,
84             Tensor<int, 1, true>& listIds,
85             Tensor<float, 2, true>& residuals);
86 
87     /// Gather vectors given the set of IDs
88     void reconstruct(
89             Tensor<int, 1, true>& listIds,
90             Tensor<float, 2, true>& vecs);
91 
92     void reconstruct(
93             Tensor<int, 2, true>& listIds,
94             Tensor<float, 3, true>& vecs);
95 
96     /// Add vectors to ourselves; the pointer passed can be on the host
97     /// or the device
98     void add(const float* data, int numVecs, cudaStream_t stream);
99 
100     /// Free all storage
101     void reset();
102 
103    private:
104     /// Collection of GPU resources that we use
105     GpuResources* resources_;
106 
107     /// Dimensionality of our vectors
108     const int dim_;
109 
110     /// Float16 data format
111     const bool useFloat16_;
112 
113     /// Store vectors in transposed layout for speed; makes addition to
114     /// the index slower
115     const bool storeTransposed_;
116 
117     /// Memory space for our allocations
118     MemorySpace space_;
119 
120     /// How many vectors we have
121     int num_;
122 
123     /// The underlying expandable storage
124     DeviceVector<char> rawData_;
125 
126     /// Vectors currently in rawData_
127     DeviceTensor<float, 2, true> vectors_;
128     DeviceTensor<float, 2, true> vectorsTransposed_;
129 
130     /// Vectors currently in rawData_, float16 form
131     DeviceTensor<half, 2, true> vectorsHalf_;
132     DeviceTensor<half, 2, true> vectorsHalfTransposed_;
133 
134     /// Precomputed L2 norms
135     DeviceTensor<float, 1, true> norms_;
136 };
137 
138 } // namespace gpu
139 } // namespace faiss
140