1 /**
2 * Copyright (c) Facebook, Inc. and its affiliates.
3 *
4 * This source code is licensed under the MIT license found in the
5 * LICENSE file in the root directory of this source tree.
6 */
7
8 #include <faiss/IndexFlat.h>
9 #include <faiss/gpu/GpuIndexFlat.h>
10 #include <faiss/gpu/StandardGpuResources.h>
11 #include <faiss/gpu/test/TestUtils.h>
12 #include <faiss/gpu/utils/DeviceUtils.h>
13 #include <gtest/gtest.h>
14 #include <sstream>
15 #include <vector>
16
17 // FIXME: figure out a better way to test fp16
18 constexpr float kF16MaxRelErr = 0.07f;
19 constexpr float kF32MaxRelErr = 6e-3f;
20
21 struct TestFlatOptions {
TestFlatOptionsTestFlatOptions22 TestFlatOptions()
23 : metric(faiss::MetricType::METRIC_L2),
24 metricArg(0),
25 useFloat16(false),
26 useTransposed(false),
27 numVecsOverride(-1),
28 numQueriesOverride(-1),
29 kOverride(-1),
30 dimOverride(-1) {}
31
32 faiss::MetricType metric;
33 float metricArg;
34
35 bool useFloat16;
36 bool useTransposed;
37 int numVecsOverride;
38 int numQueriesOverride;
39 int kOverride;
40 int dimOverride;
41 };
42
testFlat(const TestFlatOptions & opt)43 void testFlat(const TestFlatOptions& opt) {
44 int numVecs = opt.numVecsOverride > 0 ? opt.numVecsOverride
45 : faiss::gpu::randVal(1000, 5000);
46 int dim = opt.dimOverride > 0 ? opt.dimOverride
47 : faiss::gpu::randVal(50, 800);
48 int numQuery = opt.numQueriesOverride > 0 ? opt.numQueriesOverride
49 : faiss::gpu::randVal(1, 512);
50
51 // Due to loss of precision in a float16 accumulator, for large k,
52 // the number of differences is pretty huge. Restrict ourselves to a
53 // fairly small `k` for float16
54 int k = opt.useFloat16
55 ? std::min(faiss::gpu::randVal(1, 50), numVecs)
56 : std::min(
57 faiss::gpu::randVal(1, faiss::gpu::getMaxKSelection()),
58 numVecs);
59 if (opt.kOverride > 0) {
60 k = opt.kOverride;
61 }
62
63 faiss::IndexFlat cpuIndex(dim, opt.metric);
64 cpuIndex.metric_arg = opt.metricArg;
65
66 // Construct on a random device to test multi-device, if we have
67 // multiple devices
68 int device = faiss::gpu::randVal(0, faiss::gpu::getNumDevices() - 1);
69
70 faiss::gpu::StandardGpuResources res;
71 res.noTempMemory();
72
73 faiss::gpu::GpuIndexFlatConfig config;
74 config.device = device;
75 config.useFloat16 = opt.useFloat16;
76 config.storeTransposed = opt.useTransposed;
77
78 faiss::gpu::GpuIndexFlat gpuIndex(&res, dim, opt.metric, config);
79 gpuIndex.metric_arg = opt.metricArg;
80
81 std::vector<float> vecs = faiss::gpu::randVecs(numVecs, dim);
82 cpuIndex.add(numVecs, vecs.data());
83 gpuIndex.add(numVecs, vecs.data());
84
85 std::stringstream str;
86 str << "metric " << opt.metric << " marg " << opt.metricArg << " numVecs "
87 << numVecs << " dim " << dim << " useFloat16 " << opt.useFloat16
88 << " transposed " << opt.useTransposed << " numQuery " << numQuery
89 << " k " << k;
90
91 // To some extent, we depend upon the relative error for the test
92 // for float16
93 faiss::gpu::compareIndices(
94 cpuIndex,
95 gpuIndex,
96 numQuery,
97 dim,
98 k,
99 str.str(),
100 opt.useFloat16 ? kF16MaxRelErr : kF32MaxRelErr,
101 // FIXME: the fp16 bounds are
102 // useless when math (the accumulator) is
103 // in fp16. Figure out another way to test
104 opt.useFloat16 ? 0.99f : 0.1f,
105 opt.useFloat16 ? 0.65f : 0.015f);
106 }
107
TEST(TestGpuIndexFlat,IP_Float32)108 TEST(TestGpuIndexFlat, IP_Float32) {
109 for (int tries = 0; tries < 3; ++tries) {
110 TestFlatOptions opt;
111 opt.metric = faiss::MetricType::METRIC_INNER_PRODUCT;
112 opt.useFloat16 = false;
113 opt.useTransposed = false;
114
115 testFlat(opt);
116
117 opt.useTransposed = true;
118 testFlat(opt);
119 }
120 }
121
TEST(TestGpuIndexFlat,L1_Float32)122 TEST(TestGpuIndexFlat, L1_Float32) {
123 TestFlatOptions opt;
124 opt.metric = faiss::MetricType::METRIC_L1;
125 opt.useFloat16 = false;
126 opt.useTransposed = false;
127
128 testFlat(opt);
129
130 opt.useTransposed = true;
131 testFlat(opt);
132 }
133
TEST(TestGpuIndexFlat,Lp_Float32)134 TEST(TestGpuIndexFlat, Lp_Float32) {
135 TestFlatOptions opt;
136 opt.metric = faiss::MetricType::METRIC_Lp;
137 opt.metricArg = 5;
138 opt.useFloat16 = false;
139 opt.useTransposed = false;
140
141 testFlat(opt);
142
143 // Don't bother testing the transposed version, the L1 test should be good
144 // enough for that
145 }
146
TEST(TestGpuIndexFlat,L2_Float32)147 TEST(TestGpuIndexFlat, L2_Float32) {
148 for (int tries = 0; tries < 3; ++tries) {
149 TestFlatOptions opt;
150 opt.metric = faiss::MetricType::METRIC_L2;
151
152 opt.useFloat16 = false;
153 opt.useTransposed = false;
154
155 testFlat(opt);
156
157 opt.useTransposed = true;
158 testFlat(opt);
159 }
160 }
161
162 // test specialized k == 1 codepath
TEST(TestGpuIndexFlat,L2_Float32_K1)163 TEST(TestGpuIndexFlat, L2_Float32_K1) {
164 for (int tries = 0; tries < 3; ++tries) {
165 TestFlatOptions opt;
166 opt.metric = faiss::MetricType::METRIC_L2;
167 opt.useFloat16 = false;
168 opt.useTransposed = false;
169 opt.kOverride = 1;
170
171 testFlat(opt);
172 }
173 }
174
TEST(TestGpuIndexFlat,IP_Float16)175 TEST(TestGpuIndexFlat, IP_Float16) {
176 for (int tries = 0; tries < 3; ++tries) {
177 TestFlatOptions opt;
178 opt.metric = faiss::MetricType::METRIC_INNER_PRODUCT;
179 opt.useFloat16 = true;
180 opt.useTransposed = false;
181
182 testFlat(opt);
183
184 opt.useTransposed = true;
185 testFlat(opt);
186 }
187 }
188
TEST(TestGpuIndexFlat,L2_Float16)189 TEST(TestGpuIndexFlat, L2_Float16) {
190 for (int tries = 0; tries < 3; ++tries) {
191 TestFlatOptions opt;
192 opt.metric = faiss::MetricType::METRIC_L2;
193 opt.useFloat16 = true;
194 opt.useTransposed = false;
195
196 testFlat(opt);
197
198 opt.useTransposed = true;
199 testFlat(opt);
200 }
201 }
202
203 // test specialized k == 1 codepath
TEST(TestGpuIndexFlat,L2_Float16_K1)204 TEST(TestGpuIndexFlat, L2_Float16_K1) {
205 for (int tries = 0; tries < 3; ++tries) {
206 TestFlatOptions opt;
207 opt.metric = faiss::MetricType::METRIC_L2;
208 opt.useFloat16 = true;
209 opt.useTransposed = false;
210 opt.kOverride = 1;
211
212 testFlat(opt);
213 }
214 }
215
216 // test tiling along a huge vector set
TEST(TestGpuIndexFlat,L2_Tiling)217 TEST(TestGpuIndexFlat, L2_Tiling) {
218 for (int tries = 0; tries < 2; ++tries) {
219 TestFlatOptions opt;
220 opt.metric = faiss::MetricType::METRIC_L2;
221 opt.useFloat16 = false;
222 opt.useTransposed = false;
223 opt.numVecsOverride = 1000000;
224
225 // keep the rest of the problem reasonably small
226 opt.numQueriesOverride = 4;
227 opt.dimOverride = 64;
228 opt.kOverride = 64;
229
230 testFlat(opt);
231 }
232 }
233
TEST(TestGpuIndexFlat,QueryEmpty)234 TEST(TestGpuIndexFlat, QueryEmpty) {
235 faiss::gpu::StandardGpuResources res;
236 res.noTempMemory();
237
238 faiss::gpu::GpuIndexFlatConfig config;
239 config.device = 0;
240 config.useFloat16 = false;
241 config.storeTransposed = false;
242
243 int dim = 128;
244 faiss::gpu::GpuIndexFlatL2 gpuIndex(&res, dim, config);
245
246 // Querying an empty index should not blow up, and just return
247 // (FLT_MAX, -1)
248 int numQuery = 10;
249 int k = 50;
250 std::vector<float> queries(numQuery * dim, 1.0f);
251
252 std::vector<float> dist(numQuery * k, 0);
253 std::vector<faiss::Index::idx_t> ind(numQuery * k);
254
255 gpuIndex.search(numQuery, queries.data(), k, dist.data(), ind.data());
256
257 for (auto d : dist) {
258 EXPECT_EQ(d, std::numeric_limits<float>::max());
259 }
260
261 for (auto i : ind) {
262 EXPECT_EQ(i, -1);
263 }
264 }
265
TEST(TestGpuIndexFlat,CopyFrom)266 TEST(TestGpuIndexFlat, CopyFrom) {
267 int numVecs = faiss::gpu::randVal(100, 200);
268 int dim = faiss::gpu::randVal(1, 1000);
269
270 faiss::IndexFlatL2 cpuIndex(dim);
271
272 std::vector<float> vecs = faiss::gpu::randVecs(numVecs, dim);
273 cpuIndex.add(numVecs, vecs.data());
274
275 faiss::gpu::StandardGpuResources res;
276 res.noTempMemory();
277
278 // Fill with garbage values
279 int device = faiss::gpu::randVal(0, faiss::gpu::getNumDevices() - 1);
280
281 faiss::gpu::GpuIndexFlatConfig config;
282 config.device = device;
283 config.useFloat16 = false;
284 config.storeTransposed = false;
285
286 faiss::gpu::GpuIndexFlatL2 gpuIndex(&res, 2000, config);
287 gpuIndex.copyFrom(&cpuIndex);
288
289 EXPECT_EQ(cpuIndex.ntotal, gpuIndex.ntotal);
290 EXPECT_EQ(gpuIndex.ntotal, numVecs);
291
292 EXPECT_EQ(cpuIndex.d, gpuIndex.d);
293 EXPECT_EQ(cpuIndex.d, dim);
294
295 int idx = faiss::gpu::randVal(0, numVecs - 1);
296
297 std::vector<float> gpuVals(dim);
298 gpuIndex.reconstruct(idx, gpuVals.data());
299
300 std::vector<float> cpuVals(dim);
301 cpuIndex.reconstruct(idx, cpuVals.data());
302
303 EXPECT_EQ(gpuVals, cpuVals);
304 }
305
TEST(TestGpuIndexFlat,CopyTo)306 TEST(TestGpuIndexFlat, CopyTo) {
307 faiss::gpu::StandardGpuResources res;
308 res.noTempMemory();
309
310 int numVecs = faiss::gpu::randVal(100, 200);
311 int dim = faiss::gpu::randVal(1, 1000);
312
313 int device = faiss::gpu::randVal(0, faiss::gpu::getNumDevices() - 1);
314
315 faiss::gpu::GpuIndexFlatConfig config;
316 config.device = device;
317 config.useFloat16 = false;
318 config.storeTransposed = false;
319
320 faiss::gpu::GpuIndexFlatL2 gpuIndex(&res, dim, config);
321
322 std::vector<float> vecs = faiss::gpu::randVecs(numVecs, dim);
323 gpuIndex.add(numVecs, vecs.data());
324
325 // Fill with garbage values
326 faiss::IndexFlatL2 cpuIndex(2000);
327 gpuIndex.copyTo(&cpuIndex);
328
329 EXPECT_EQ(cpuIndex.ntotal, gpuIndex.ntotal);
330 EXPECT_EQ(gpuIndex.ntotal, numVecs);
331
332 EXPECT_EQ(cpuIndex.d, gpuIndex.d);
333 EXPECT_EQ(cpuIndex.d, dim);
334
335 int idx = faiss::gpu::randVal(0, numVecs - 1);
336
337 std::vector<float> gpuVals(dim);
338 gpuIndex.reconstruct(idx, gpuVals.data());
339
340 std::vector<float> cpuVals(dim);
341 cpuIndex.reconstruct(idx, cpuVals.data());
342
343 EXPECT_EQ(gpuVals, cpuVals);
344 }
345
TEST(TestGpuIndexFlat,UnifiedMemory)346 TEST(TestGpuIndexFlat, UnifiedMemory) {
347 // Construct on a random device to test multi-device, if we have
348 // multiple devices
349 int device = faiss::gpu::randVal(0, faiss::gpu::getNumDevices() - 1);
350
351 if (!faiss::gpu::getFullUnifiedMemSupport(device)) {
352 return;
353 }
354
355 int dim = 256;
356
357 // FIXME: GpuIndexFlat doesn't support > 2^31 (vecs * dims) due to
358 // kernel indexing, so we can't test unified memory for memory
359 // oversubscription.
360 size_t numVecs = 50000;
361 int numQuery = 10;
362 int k = 10;
363
364 faiss::IndexFlatL2 cpuIndexL2(dim);
365
366 faiss::gpu::StandardGpuResources res;
367 res.noTempMemory();
368
369 faiss::gpu::GpuIndexFlatConfig config;
370 config.device = device;
371 config.memorySpace = faiss::gpu::MemorySpace::Unified;
372
373 faiss::gpu::GpuIndexFlatL2 gpuIndexL2(&res, dim, config);
374
375 std::vector<float> vecs = faiss::gpu::randVecs(numVecs, dim);
376 cpuIndexL2.add(numVecs, vecs.data());
377 gpuIndexL2.add(numVecs, vecs.data());
378
379 // To some extent, we depend upon the relative error for the test
380 // for float16
381 faiss::gpu::compareIndices(
382 cpuIndexL2,
383 gpuIndexL2,
384 numQuery,
385 dim,
386 k,
387 "Unified Memory",
388 kF32MaxRelErr,
389 0.1f,
390 0.015f);
391 }
392
main(int argc,char ** argv)393 int main(int argc, char** argv) {
394 testing::InitGoogleTest(&argc, argv);
395
396 // just run with a fixed test seed
397 faiss::gpu::setTestSeed(100);
398
399 return RUN_ALL_TESTS();
400 }
401