1 /**
2 * Copyright (c) Facebook, Inc. and its affiliates.
3 *
4 * This source code is licensed under the MIT license found in the
5 * LICENSE file in the root directory of this source tree.
6 */
7
8 #include <faiss/gpu/test/TestUtils.h>
9 #include <faiss/utils/random.h>
10 #include <gtest/gtest.h>
11 #include <time.h>
12 #include <cmath>
13 #include <set>
14 #include <sstream>
15 #include <unordered_map>
16
17 namespace faiss {
18 namespace gpu {
19
relativeError(float a,float b)20 inline float relativeError(float a, float b) {
21 return std::abs(a - b) / (0.5f * (std::abs(a) + std::abs(b)));
22 }
23
24 // This seed is also used for the faiss float_rand API; in a test it
25 // is all within a single thread, so it is ok
26 long s_seed = 1;
27 std::mt19937 rng(1);
28 std::uniform_int_distribution<> distrib;
29
newTestSeed()30 void newTestSeed() {
31 struct timespec t;
32 clock_gettime(CLOCK_REALTIME, &t);
33
34 setTestSeed(t.tv_nsec);
35 }
36
setTestSeed(long seed)37 void setTestSeed(long seed) {
38 printf("testing with random seed %ld\n", seed);
39
40 rng = std::mt19937(seed);
41 s_seed = seed;
42 }
43
randVal(int a,int b)44 int randVal(int a, int b) {
45 EXPECT_GE(a, 0);
46 EXPECT_LE(a, b);
47
48 return a + (distrib(rng) % (b + 1 - a));
49 }
50
randBool()51 bool randBool() {
52 return randSelect<bool>({true, false});
53 }
54
randVecs(size_t num,size_t dim)55 std::vector<float> randVecs(size_t num, size_t dim) {
56 std::vector<float> v(num * dim);
57
58 faiss::float_rand(v.data(), v.size(), s_seed);
59 // unfortunately we generate separate sets of vectors, and don't
60 // want the same values
61 ++s_seed;
62
63 return v;
64 }
65
randBinaryVecs(size_t num,size_t dim)66 std::vector<unsigned char> randBinaryVecs(size_t num, size_t dim) {
67 std::vector<unsigned char> v(num * (dim / 8));
68
69 faiss::byte_rand(v.data(), v.size(), s_seed);
70 // unfortunately we generate separate sets of vectors, and don't
71 // want the same values
72 ++s_seed;
73
74 return v;
75 }
76
compareIndices(const std::vector<float> & queryVecs,faiss::Index & refIndex,faiss::Index & testIndex,int numQuery,int,int k,const std::string & configMsg,float maxRelativeError,float pctMaxDiff1,float pctMaxDiffN)77 void compareIndices(
78 const std::vector<float>& queryVecs,
79 faiss::Index& refIndex,
80 faiss::Index& testIndex,
81 int numQuery,
82 int /*dim*/,
83 int k,
84 const std::string& configMsg,
85 float maxRelativeError,
86 float pctMaxDiff1,
87 float pctMaxDiffN) {
88 // Compare
89 std::vector<float> refDistance(numQuery * k, 0);
90 std::vector<faiss::Index::idx_t> refIndices(numQuery * k, -1);
91 refIndex.search(
92 numQuery,
93 queryVecs.data(),
94 k,
95 refDistance.data(),
96 refIndices.data());
97
98 std::vector<float> testDistance(numQuery * k, 0);
99 std::vector<faiss::Index::idx_t> testIndices(numQuery * k, -1);
100 testIndex.search(
101 numQuery,
102 queryVecs.data(),
103 k,
104 testDistance.data(),
105 testIndices.data());
106
107 faiss::gpu::compareLists(
108 refDistance.data(),
109 refIndices.data(),
110 testDistance.data(),
111 testIndices.data(),
112 numQuery,
113 k,
114 configMsg,
115 true,
116 false,
117 true,
118 maxRelativeError,
119 pctMaxDiff1,
120 pctMaxDiffN);
121 }
122
compareIndices(faiss::Index & refIndex,faiss::Index & testIndex,int numQuery,int dim,int k,const std::string & configMsg,float maxRelativeError,float pctMaxDiff1,float pctMaxDiffN)123 void compareIndices(
124 faiss::Index& refIndex,
125 faiss::Index& testIndex,
126 int numQuery,
127 int dim,
128 int k,
129 const std::string& configMsg,
130 float maxRelativeError,
131 float pctMaxDiff1,
132 float pctMaxDiffN) {
133 auto queryVecs = faiss::gpu::randVecs(numQuery, dim);
134
135 compareIndices(
136 queryVecs,
137 refIndex,
138 testIndex,
139 numQuery,
140 dim,
141 k,
142 configMsg,
143 maxRelativeError,
144 pctMaxDiff1,
145 pctMaxDiffN);
146 }
147
148 template <typename T>
lookup(const T * p,int i,int j,int,int dim2)149 inline T lookup(const T* p, int i, int j, int /*dim1*/, int dim2) {
150 return p[i * dim2 + j];
151 }
152
compareLists(const float * refDist,const faiss::Index::idx_t * refInd,const float * testDist,const faiss::Index::idx_t * testInd,int dim1,int dim2,const std::string & configMsg,bool printBasicStats,bool printDiffs,bool assertOnErr,float maxRelativeError,float pctMaxDiff1,float pctMaxDiffN)153 void compareLists(
154 const float* refDist,
155 const faiss::Index::idx_t* refInd,
156 const float* testDist,
157 const faiss::Index::idx_t* testInd,
158 int dim1,
159 int dim2,
160 const std::string& configMsg,
161 bool printBasicStats,
162 bool printDiffs,
163 bool assertOnErr,
164 float maxRelativeError,
165 float pctMaxDiff1,
166 float pctMaxDiffN) {
167 float maxAbsErr = 0.0f;
168 for (int i = 0; i < dim1 * dim2; ++i) {
169 maxAbsErr = std::max(maxAbsErr, std::abs(refDist[i] - testDist[i]));
170 }
171 int numResults = dim1 * dim2;
172
173 // query -> {index -> result position}
174 std::vector<std::unordered_map<faiss::Index::idx_t, int>> refIndexMap;
175
176 for (int query = 0; query < dim1; ++query) {
177 std::unordered_map<faiss::Index::idx_t, int> indices;
178
179 for (int result = 0; result < dim2; ++result) {
180 indices[lookup(refInd, query, result, dim1, dim2)] = result;
181 }
182
183 refIndexMap.emplace_back(std::move(indices));
184 }
185
186 // See how far off the indices are
187 // Keep track of the difference for each entry
188 std::vector<std::vector<int>> indexDiffs;
189
190 int diff1 = 0; // index differs by 1
191 int diffN = 0; // index differs by >1
192 int diffInf = 0; // index not found in the other
193 int nonUniqueIndices = 0;
194
195 double avgDiff = 0.0;
196 int maxDiff = 0;
197 float maxRelErr = 0.0f;
198
199 for (int query = 0; query < dim1; ++query) {
200 std::vector<int> diffs;
201 std::set<faiss::Index::idx_t> uniqueIndices;
202
203 auto& indices = refIndexMap[query];
204
205 for (int result = 0; result < dim2; ++result) {
206 auto t = lookup(testInd, query, result, dim1, dim2);
207
208 // All indices reported within a query should be unique; this is
209 // a serious error if is otherwise the case.
210 // If -1 is reported (no result due to IVF partitioning or not
211 // enough entries in the index), then duplicates are allowed, but
212 // both the reference and test must have -1 in the same position.
213 if (t == -1) {
214 EXPECT_EQ(lookup(refInd, query, result, dim1, dim2), t);
215 } else {
216 bool uniqueIndex = uniqueIndices.count(t) == 0;
217 if (assertOnErr) {
218 EXPECT_TRUE(uniqueIndex) << configMsg << " " << query << " "
219 << result << " " << t;
220 }
221
222 if (!uniqueIndex) {
223 ++nonUniqueIndices;
224 } else {
225 uniqueIndices.insert(t);
226 }
227
228 auto it = indices.find(t);
229 if (it != indices.end()) {
230 int diff = std::abs(result - it->second);
231 diffs.push_back(diff);
232
233 if (diff == 1) {
234 ++diff1;
235 maxDiff = std::max(diff, maxDiff);
236 } else if (diff > 1) {
237 ++diffN;
238 maxDiff = std::max(diff, maxDiff);
239 }
240
241 avgDiff += (double)diff;
242 } else {
243 ++diffInf;
244 diffs.push_back(-1);
245 // don't count this for maxDiff
246 }
247 }
248
249 auto refD = lookup(refDist, query, result, dim1, dim2);
250 auto testD = lookup(testDist, query, result, dim1, dim2);
251
252 float relErr = relativeError(refD, testD);
253
254 if (assertOnErr) {
255 EXPECT_LE(relErr, maxRelativeError)
256 << configMsg << " (" << query << ", " << result
257 << ") refD: " << refD << " testD: " << testD;
258 }
259
260 maxRelErr = std::max(maxRelErr, relErr);
261 }
262
263 indexDiffs.emplace_back(std::move(diffs));
264 }
265
266 if (assertOnErr) {
267 EXPECT_LE(
268 (float)(diff1 + diffN + diffInf),
269 (float)numResults * pctMaxDiff1)
270 << configMsg;
271
272 // Don't count diffInf because that could be diff1 as far as we
273 // know
274 EXPECT_LE((float)diffN, (float)numResults * pctMaxDiffN) << configMsg;
275 }
276
277 avgDiff /= (double)numResults;
278
279 if (printBasicStats) {
280 if (!configMsg.empty()) {
281 printf("Config\n"
282 "----------------------------\n"
283 "%s\n",
284 configMsg.c_str());
285 }
286
287 printf("Result error and differences\n"
288 "----------------------------\n"
289 "max abs diff %.7f rel diff %.7f\n"
290 "idx diff avg: %.5g max: %d\n"
291 "idx diff of 1: %d (%.3f%% of queries)\n"
292 "idx diff of >1: %d (%.3f%% of queries)\n"
293 "idx diff not found: %d (%.3f%% of queries)"
294 " [typically a last element inversion]\n"
295 "non-unique indices: %d (a serious error if >0)\n",
296 maxAbsErr,
297 maxRelErr,
298 avgDiff,
299 maxDiff,
300 diff1,
301 100.0f * (float)diff1 / (float)numResults,
302 diffN,
303 100.0f * (float)diffN / (float)numResults,
304 diffInf,
305 100.0f * (float)diffInf / (float)numResults,
306 nonUniqueIndices);
307 }
308
309 if (printDiffs) {
310 printf("differences:\n");
311 printf("==================\n");
312 for (int query = 0; query < dim1; ++query) {
313 for (int result = 0; result < dim2; ++result) {
314 long refI = lookup(refInd, query, result, dim1, dim2);
315 long testI = lookup(testInd, query, result, dim1, dim2);
316
317 if (refI != testI) {
318 float refD = lookup(refDist, query, result, dim1, dim2);
319 float testD = lookup(testDist, query, result, dim1, dim2);
320
321 float maxDist = std::max(refD, testD);
322 float delta = std::abs(refD - testD);
323
324 float relErr = delta / maxDist;
325
326 if (refD == testD) {
327 printf("(%d, %d [%d]) (ref %ld tst %ld dist ==)\n",
328 query,
329 result,
330 indexDiffs[query][result],
331 refI,
332 testI);
333 } else {
334 printf("(%d, %d [%d]) (ref %ld tst %ld abs %.8f "
335 "rel %.8f ref %a tst %a)\n",
336 query,
337 result,
338 indexDiffs[query][result],
339 refI,
340 testI,
341 delta,
342 relErr,
343 refD,
344 testD);
345 }
346 }
347 }
348 }
349 }
350 }
351
352 } // namespace gpu
353 } // namespace faiss
354