1 /**
2  * Copyright (c) Facebook, Inc. and its affiliates.
3  *
4  * This source code is licensed under the MIT license found in the
5  * LICENSE file in the root directory of this source tree.
6  */
7 
8 #include <faiss/IndexFlat.h>
9 #include <faiss/gpu/GpuIndexFlat.h>
10 #include <faiss/gpu/StandardGpuResources.h>
11 #include <faiss/gpu/test/TestUtils.h>
12 #include <faiss/gpu/utils/DeviceUtils.h>
13 #include <gtest/gtest.h>
14 #include <sstream>
15 #include <vector>
16 
17 // FIXME: figure out a better way to test fp16
18 constexpr float kF16MaxRelErr = 0.07f;
19 constexpr float kF32MaxRelErr = 6e-3f;
20 
21 struct TestFlatOptions {
TestFlatOptionsTestFlatOptions22     TestFlatOptions()
23             : metric(faiss::MetricType::METRIC_L2),
24               metricArg(0),
25               useFloat16(false),
26               useTransposed(false),
27               numVecsOverride(-1),
28               numQueriesOverride(-1),
29               kOverride(-1),
30               dimOverride(-1) {}
31 
32     faiss::MetricType metric;
33     float metricArg;
34 
35     bool useFloat16;
36     bool useTransposed;
37     int numVecsOverride;
38     int numQueriesOverride;
39     int kOverride;
40     int dimOverride;
41 };
42 
testFlat(const TestFlatOptions & opt)43 void testFlat(const TestFlatOptions& opt) {
44     int numVecs = opt.numVecsOverride > 0 ? opt.numVecsOverride
45                                           : faiss::gpu::randVal(1000, 5000);
46     int dim = opt.dimOverride > 0 ? opt.dimOverride
47                                   : faiss::gpu::randVal(50, 800);
48     int numQuery = opt.numQueriesOverride > 0 ? opt.numQueriesOverride
49                                               : faiss::gpu::randVal(1, 512);
50 
51     // Due to loss of precision in a float16 accumulator, for large k,
52     // the number of differences is pretty huge. Restrict ourselves to a
53     // fairly small `k` for float16
54     int k = opt.useFloat16
55             ? std::min(faiss::gpu::randVal(1, 50), numVecs)
56             : std::min(
57                       faiss::gpu::randVal(1, faiss::gpu::getMaxKSelection()),
58                       numVecs);
59     if (opt.kOverride > 0) {
60         k = opt.kOverride;
61     }
62 
63     faiss::IndexFlat cpuIndex(dim, opt.metric);
64     cpuIndex.metric_arg = opt.metricArg;
65 
66     // Construct on a random device to test multi-device, if we have
67     // multiple devices
68     int device = faiss::gpu::randVal(0, faiss::gpu::getNumDevices() - 1);
69 
70     faiss::gpu::StandardGpuResources res;
71     res.noTempMemory();
72 
73     faiss::gpu::GpuIndexFlatConfig config;
74     config.device = device;
75     config.useFloat16 = opt.useFloat16;
76     config.storeTransposed = opt.useTransposed;
77 
78     faiss::gpu::GpuIndexFlat gpuIndex(&res, dim, opt.metric, config);
79     gpuIndex.metric_arg = opt.metricArg;
80 
81     std::vector<float> vecs = faiss::gpu::randVecs(numVecs, dim);
82     cpuIndex.add(numVecs, vecs.data());
83     gpuIndex.add(numVecs, vecs.data());
84 
85     std::stringstream str;
86     str << "metric " << opt.metric << " marg " << opt.metricArg << " numVecs "
87         << numVecs << " dim " << dim << " useFloat16 " << opt.useFloat16
88         << " transposed " << opt.useTransposed << " numQuery " << numQuery
89         << " k " << k;
90 
91     // To some extent, we depend upon the relative error for the test
92     // for float16
93     faiss::gpu::compareIndices(
94             cpuIndex,
95             gpuIndex,
96             numQuery,
97             dim,
98             k,
99             str.str(),
100             opt.useFloat16 ? kF16MaxRelErr : kF32MaxRelErr,
101             // FIXME: the fp16 bounds are
102             // useless when math (the accumulator) is
103             // in fp16. Figure out another way to test
104             opt.useFloat16 ? 0.99f : 0.1f,
105             opt.useFloat16 ? 0.65f : 0.015f);
106 }
107 
TEST(TestGpuIndexFlat,IP_Float32)108 TEST(TestGpuIndexFlat, IP_Float32) {
109     for (int tries = 0; tries < 3; ++tries) {
110         TestFlatOptions opt;
111         opt.metric = faiss::MetricType::METRIC_INNER_PRODUCT;
112         opt.useFloat16 = false;
113         opt.useTransposed = false;
114 
115         testFlat(opt);
116 
117         opt.useTransposed = true;
118         testFlat(opt);
119     }
120 }
121 
TEST(TestGpuIndexFlat,L1_Float32)122 TEST(TestGpuIndexFlat, L1_Float32) {
123     TestFlatOptions opt;
124     opt.metric = faiss::MetricType::METRIC_L1;
125     opt.useFloat16 = false;
126     opt.useTransposed = false;
127 
128     testFlat(opt);
129 
130     opt.useTransposed = true;
131     testFlat(opt);
132 }
133 
TEST(TestGpuIndexFlat,Lp_Float32)134 TEST(TestGpuIndexFlat, Lp_Float32) {
135     TestFlatOptions opt;
136     opt.metric = faiss::MetricType::METRIC_Lp;
137     opt.metricArg = 5;
138     opt.useFloat16 = false;
139     opt.useTransposed = false;
140 
141     testFlat(opt);
142 
143     // Don't bother testing the transposed version, the L1 test should be good
144     // enough for that
145 }
146 
TEST(TestGpuIndexFlat,L2_Float32)147 TEST(TestGpuIndexFlat, L2_Float32) {
148     for (int tries = 0; tries < 3; ++tries) {
149         TestFlatOptions opt;
150         opt.metric = faiss::MetricType::METRIC_L2;
151 
152         opt.useFloat16 = false;
153         opt.useTransposed = false;
154 
155         testFlat(opt);
156 
157         opt.useTransposed = true;
158         testFlat(opt);
159     }
160 }
161 
162 // test specialized k == 1 codepath
TEST(TestGpuIndexFlat,L2_Float32_K1)163 TEST(TestGpuIndexFlat, L2_Float32_K1) {
164     for (int tries = 0; tries < 3; ++tries) {
165         TestFlatOptions opt;
166         opt.metric = faiss::MetricType::METRIC_L2;
167         opt.useFloat16 = false;
168         opt.useTransposed = false;
169         opt.kOverride = 1;
170 
171         testFlat(opt);
172     }
173 }
174 
TEST(TestGpuIndexFlat,IP_Float16)175 TEST(TestGpuIndexFlat, IP_Float16) {
176     for (int tries = 0; tries < 3; ++tries) {
177         TestFlatOptions opt;
178         opt.metric = faiss::MetricType::METRIC_INNER_PRODUCT;
179         opt.useFloat16 = true;
180         opt.useTransposed = false;
181 
182         testFlat(opt);
183 
184         opt.useTransposed = true;
185         testFlat(opt);
186     }
187 }
188 
TEST(TestGpuIndexFlat,L2_Float16)189 TEST(TestGpuIndexFlat, L2_Float16) {
190     for (int tries = 0; tries < 3; ++tries) {
191         TestFlatOptions opt;
192         opt.metric = faiss::MetricType::METRIC_L2;
193         opt.useFloat16 = true;
194         opt.useTransposed = false;
195 
196         testFlat(opt);
197 
198         opt.useTransposed = true;
199         testFlat(opt);
200     }
201 }
202 
203 // test specialized k == 1 codepath
TEST(TestGpuIndexFlat,L2_Float16_K1)204 TEST(TestGpuIndexFlat, L2_Float16_K1) {
205     for (int tries = 0; tries < 3; ++tries) {
206         TestFlatOptions opt;
207         opt.metric = faiss::MetricType::METRIC_L2;
208         opt.useFloat16 = true;
209         opt.useTransposed = false;
210         opt.kOverride = 1;
211 
212         testFlat(opt);
213     }
214 }
215 
216 // test tiling along a huge vector set
TEST(TestGpuIndexFlat,L2_Tiling)217 TEST(TestGpuIndexFlat, L2_Tiling) {
218     for (int tries = 0; tries < 2; ++tries) {
219         TestFlatOptions opt;
220         opt.metric = faiss::MetricType::METRIC_L2;
221         opt.useFloat16 = false;
222         opt.useTransposed = false;
223         opt.numVecsOverride = 1000000;
224 
225         // keep the rest of the problem reasonably small
226         opt.numQueriesOverride = 4;
227         opt.dimOverride = 64;
228         opt.kOverride = 64;
229 
230         testFlat(opt);
231     }
232 }
233 
TEST(TestGpuIndexFlat,QueryEmpty)234 TEST(TestGpuIndexFlat, QueryEmpty) {
235     faiss::gpu::StandardGpuResources res;
236     res.noTempMemory();
237 
238     faiss::gpu::GpuIndexFlatConfig config;
239     config.device = 0;
240     config.useFloat16 = false;
241     config.storeTransposed = false;
242 
243     int dim = 128;
244     faiss::gpu::GpuIndexFlatL2 gpuIndex(&res, dim, config);
245 
246     // Querying an empty index should not blow up, and just return
247     // (FLT_MAX, -1)
248     int numQuery = 10;
249     int k = 50;
250     std::vector<float> queries(numQuery * dim, 1.0f);
251 
252     std::vector<float> dist(numQuery * k, 0);
253     std::vector<faiss::Index::idx_t> ind(numQuery * k);
254 
255     gpuIndex.search(numQuery, queries.data(), k, dist.data(), ind.data());
256 
257     for (auto d : dist) {
258         EXPECT_EQ(d, std::numeric_limits<float>::max());
259     }
260 
261     for (auto i : ind) {
262         EXPECT_EQ(i, -1);
263     }
264 }
265 
TEST(TestGpuIndexFlat,CopyFrom)266 TEST(TestGpuIndexFlat, CopyFrom) {
267     int numVecs = faiss::gpu::randVal(100, 200);
268     int dim = faiss::gpu::randVal(1, 1000);
269 
270     faiss::IndexFlatL2 cpuIndex(dim);
271 
272     std::vector<float> vecs = faiss::gpu::randVecs(numVecs, dim);
273     cpuIndex.add(numVecs, vecs.data());
274 
275     faiss::gpu::StandardGpuResources res;
276     res.noTempMemory();
277 
278     // Fill with garbage values
279     int device = faiss::gpu::randVal(0, faiss::gpu::getNumDevices() - 1);
280 
281     faiss::gpu::GpuIndexFlatConfig config;
282     config.device = device;
283     config.useFloat16 = false;
284     config.storeTransposed = false;
285 
286     faiss::gpu::GpuIndexFlatL2 gpuIndex(&res, 2000, config);
287     gpuIndex.copyFrom(&cpuIndex);
288 
289     EXPECT_EQ(cpuIndex.ntotal, gpuIndex.ntotal);
290     EXPECT_EQ(gpuIndex.ntotal, numVecs);
291 
292     EXPECT_EQ(cpuIndex.d, gpuIndex.d);
293     EXPECT_EQ(cpuIndex.d, dim);
294 
295     int idx = faiss::gpu::randVal(0, numVecs - 1);
296 
297     std::vector<float> gpuVals(dim);
298     gpuIndex.reconstruct(idx, gpuVals.data());
299 
300     std::vector<float> cpuVals(dim);
301     cpuIndex.reconstruct(idx, cpuVals.data());
302 
303     EXPECT_EQ(gpuVals, cpuVals);
304 }
305 
TEST(TestGpuIndexFlat,CopyTo)306 TEST(TestGpuIndexFlat, CopyTo) {
307     faiss::gpu::StandardGpuResources res;
308     res.noTempMemory();
309 
310     int numVecs = faiss::gpu::randVal(100, 200);
311     int dim = faiss::gpu::randVal(1, 1000);
312 
313     int device = faiss::gpu::randVal(0, faiss::gpu::getNumDevices() - 1);
314 
315     faiss::gpu::GpuIndexFlatConfig config;
316     config.device = device;
317     config.useFloat16 = false;
318     config.storeTransposed = false;
319 
320     faiss::gpu::GpuIndexFlatL2 gpuIndex(&res, dim, config);
321 
322     std::vector<float> vecs = faiss::gpu::randVecs(numVecs, dim);
323     gpuIndex.add(numVecs, vecs.data());
324 
325     // Fill with garbage values
326     faiss::IndexFlatL2 cpuIndex(2000);
327     gpuIndex.copyTo(&cpuIndex);
328 
329     EXPECT_EQ(cpuIndex.ntotal, gpuIndex.ntotal);
330     EXPECT_EQ(gpuIndex.ntotal, numVecs);
331 
332     EXPECT_EQ(cpuIndex.d, gpuIndex.d);
333     EXPECT_EQ(cpuIndex.d, dim);
334 
335     int idx = faiss::gpu::randVal(0, numVecs - 1);
336 
337     std::vector<float> gpuVals(dim);
338     gpuIndex.reconstruct(idx, gpuVals.data());
339 
340     std::vector<float> cpuVals(dim);
341     cpuIndex.reconstruct(idx, cpuVals.data());
342 
343     EXPECT_EQ(gpuVals, cpuVals);
344 }
345 
TEST(TestGpuIndexFlat,UnifiedMemory)346 TEST(TestGpuIndexFlat, UnifiedMemory) {
347     // Construct on a random device to test multi-device, if we have
348     // multiple devices
349     int device = faiss::gpu::randVal(0, faiss::gpu::getNumDevices() - 1);
350 
351     if (!faiss::gpu::getFullUnifiedMemSupport(device)) {
352         return;
353     }
354 
355     int dim = 256;
356 
357     // FIXME: GpuIndexFlat doesn't support > 2^31 (vecs * dims) due to
358     // kernel indexing, so we can't test unified memory for memory
359     // oversubscription.
360     size_t numVecs = 50000;
361     int numQuery = 10;
362     int k = 10;
363 
364     faiss::IndexFlatL2 cpuIndexL2(dim);
365 
366     faiss::gpu::StandardGpuResources res;
367     res.noTempMemory();
368 
369     faiss::gpu::GpuIndexFlatConfig config;
370     config.device = device;
371     config.memorySpace = faiss::gpu::MemorySpace::Unified;
372 
373     faiss::gpu::GpuIndexFlatL2 gpuIndexL2(&res, dim, config);
374 
375     std::vector<float> vecs = faiss::gpu::randVecs(numVecs, dim);
376     cpuIndexL2.add(numVecs, vecs.data());
377     gpuIndexL2.add(numVecs, vecs.data());
378 
379     // To some extent, we depend upon the relative error for the test
380     // for float16
381     faiss::gpu::compareIndices(
382             cpuIndexL2,
383             gpuIndexL2,
384             numQuery,
385             dim,
386             k,
387             "Unified Memory",
388             kF32MaxRelErr,
389             0.1f,
390             0.015f);
391 }
392 
main(int argc,char ** argv)393 int main(int argc, char** argv) {
394     testing::InitGoogleTest(&argc, argv);
395 
396     // just run with a fixed test seed
397     faiss::gpu::setTestSeed(100);
398 
399     return RUN_ALL_TESTS();
400 }
401