1 /** 2 * Copyright (c) Facebook, Inc. and its affiliates. 3 * 4 * This source code is licensed under the MIT license found in the 5 * LICENSE file in the root directory of this source tree. 6 */ 7 8 #pragma once 9 10 #include <faiss/MetricType.h> 11 #include <faiss/gpu/GpuResources.h> 12 #include <faiss/gpu/utils/DeviceTensor.cuh> 13 #include <faiss/gpu/utils/DeviceVector.cuh> 14 15 namespace faiss { 16 namespace gpu { 17 18 class GpuResources; 19 20 /// Holder of GPU resources for a particular flat index 21 class FlatIndex { 22 public: 23 FlatIndex( 24 GpuResources* res, 25 int dim, 26 bool useFloat16, 27 bool storeTransposed, 28 MemorySpace space); 29 30 /// Whether or not this flat index primarily stores data in float16 31 bool getUseFloat16() const; 32 33 /// Returns the number of vectors we contain 34 int getSize() const; 35 36 /// Returns the dimensionality of the vectors 37 int getDim() const; 38 39 /// Reserve storage that can contain at least this many vectors 40 void reserve(size_t numVecs, cudaStream_t stream); 41 42 /// Returns the vectors based on the type desired; the FlatIndex must be of 43 /// the same type (float16 or float32) to not assert 44 template <typename T> 45 Tensor<T, 2, true>& getVectorsRef(); 46 47 /// Returns a reference to our vectors currently in use 48 Tensor<float, 2, true>& getVectorsFloat32Ref(); 49 50 /// Returns a reference to our vectors currently in use (useFloat16 mode) 51 Tensor<half, 2, true>& getVectorsFloat16Ref(); 52 53 /// Performs a copy of the vectors on the given device, converting 54 /// as needed from float16 55 DeviceTensor<float, 2, true> getVectorsFloat32Copy(cudaStream_t stream); 56 57 /// Returns only a subset of the vectors 58 DeviceTensor<float, 2, true> getVectorsFloat32Copy( 59 int from, 60 int num, 61 cudaStream_t stream); 62 63 void query( 64 Tensor<float, 2, true>& vecs, 65 int k, 66 faiss::MetricType metric, 67 float metricArg, 68 Tensor<float, 2, true>& outDistances, 69 Tensor<int, 2, true>& outIndices, 70 bool exactDistance); 71 72 void query( 73 Tensor<half, 2, true>& vecs, 74 int k, 75 faiss::MetricType metric, 76 float metricArg, 77 Tensor<float, 2, true>& outDistances, 78 Tensor<int, 2, true>& outIndices, 79 bool exactDistance); 80 81 /// Compute residual for set of vectors 82 void computeResidual( 83 Tensor<float, 2, true>& vecs, 84 Tensor<int, 1, true>& listIds, 85 Tensor<float, 2, true>& residuals); 86 87 /// Gather vectors given the set of IDs 88 void reconstruct( 89 Tensor<int, 1, true>& listIds, 90 Tensor<float, 2, true>& vecs); 91 92 void reconstruct( 93 Tensor<int, 2, true>& listIds, 94 Tensor<float, 3, true>& vecs); 95 96 /// Add vectors to ourselves; the pointer passed can be on the host 97 /// or the device 98 void add(const float* data, int numVecs, cudaStream_t stream); 99 100 /// Free all storage 101 void reset(); 102 103 private: 104 /// Collection of GPU resources that we use 105 GpuResources* resources_; 106 107 /// Dimensionality of our vectors 108 const int dim_; 109 110 /// Float16 data format 111 const bool useFloat16_; 112 113 /// Store vectors in transposed layout for speed; makes addition to 114 /// the index slower 115 const bool storeTransposed_; 116 117 /// Memory space for our allocations 118 MemorySpace space_; 119 120 /// How many vectors we have 121 int num_; 122 123 /// The underlying expandable storage 124 DeviceVector<char> rawData_; 125 126 /// Vectors currently in rawData_ 127 DeviceTensor<float, 2, true> vectors_; 128 DeviceTensor<float, 2, true> vectorsTransposed_; 129 130 /// Vectors currently in rawData_, float16 form 131 DeviceTensor<half, 2, true> vectorsHalf_; 132 DeviceTensor<half, 2, true> vectorsHalfTransposed_; 133 134 /// Precomputed L2 norms 135 DeviceTensor<float, 1, true> norms_; 136 }; 137 138 } // namespace gpu 139 } // namespace faiss 140