1 /**
2 * Copyright (c) Facebook, Inc. and its affiliates.
3 *
4 * This source code is licensed under the MIT license found in the
5 * LICENSE file in the root directory of this source tree.
6 */
7
8 #include <faiss/gpu/GpuResources.h>
9 #include <faiss/gpu/impl/RemapIndices.h>
10 #include <faiss/gpu/utils/DeviceUtils.h>
11 #include <faiss/invlists/InvertedLists.h>
12 #include <thrust/host_vector.h>
13 #include <faiss/gpu/impl/FlatIndex.cuh>
14 #include <faiss/gpu/impl/IVFAppend.cuh>
15 #include <faiss/gpu/impl/IVFBase.cuh>
16 #include <faiss/gpu/utils/CopyUtils.cuh>
17 #include <faiss/gpu/utils/DeviceDefs.cuh>
18 #include <faiss/gpu/utils/HostTensor.cuh>
19 #include <limits>
20 #include <unordered_map>
21
22 namespace faiss {
23 namespace gpu {
24
DeviceIVFList(GpuResources * res,const AllocInfo & info)25 IVFBase::DeviceIVFList::DeviceIVFList(GpuResources* res, const AllocInfo& info)
26 : data(res, info), numVecs(0) {}
27
IVFBase(GpuResources * resources,faiss::MetricType metric,float metricArg,FlatIndex * quantizer,bool interleavedLayout,IndicesOptions indicesOptions,MemorySpace space)28 IVFBase::IVFBase(
29 GpuResources* resources,
30 faiss::MetricType metric,
31 float metricArg,
32 FlatIndex* quantizer,
33 bool interleavedLayout,
34 IndicesOptions indicesOptions,
35 MemorySpace space)
36 : resources_(resources),
37 metric_(metric),
38 metricArg_(metricArg),
39 quantizer_(quantizer),
40 dim_(quantizer->getDim()),
41 numLists_(quantizer->getSize()),
42 interleavedLayout_(interleavedLayout),
43 indicesOptions_(indicesOptions),
44 space_(space),
45 maxListLength_(0) {
46 reset();
47 }
48
~IVFBase()49 IVFBase::~IVFBase() {}
50
reserveMemory(size_t numVecs)51 void IVFBase::reserveMemory(size_t numVecs) {
52 auto stream = resources_->getDefaultStreamCurrentDevice();
53
54 auto vecsPerList = numVecs / deviceListData_.size();
55 if (vecsPerList < 1) {
56 return;
57 }
58
59 auto bytesPerDataList = getGpuVectorsEncodingSize_(vecsPerList);
60
61 for (auto& list : deviceListData_) {
62 list->data.reserve(bytesPerDataList, stream);
63 }
64
65 if ((indicesOptions_ == INDICES_32_BIT) ||
66 (indicesOptions_ == INDICES_64_BIT)) {
67 // Reserve for index lists as well
68 size_t bytesPerIndexList = vecsPerList *
69 (indicesOptions_ == INDICES_32_BIT ? sizeof(int)
70 : sizeof(Index::idx_t));
71
72 for (auto& list : deviceListIndices_) {
73 list->data.reserve(bytesPerIndexList, stream);
74 }
75 }
76
77 // Update device info for all lists, since the base pointers may
78 // have changed
79 updateDeviceListInfo_(stream);
80 }
81
reset()82 void IVFBase::reset() {
83 deviceListData_.clear();
84 deviceListIndices_.clear();
85 deviceListDataPointers_.clear();
86 deviceListIndexPointers_.clear();
87 deviceListLengths_.clear();
88 listOffsetToUserIndex_.clear();
89
90 auto info = AllocInfo(
91 AllocType::IVFLists,
92 getCurrentDevice(),
93 space_,
94 resources_->getDefaultStreamCurrentDevice());
95
96 for (size_t i = 0; i < numLists_; ++i) {
97 deviceListData_.emplace_back(std::unique_ptr<DeviceIVFList>(
98 new DeviceIVFList(resources_, info)));
99
100 deviceListIndices_.emplace_back(std::unique_ptr<DeviceIVFList>(
101 new DeviceIVFList(resources_, info)));
102
103 listOffsetToUserIndex_.emplace_back(std::vector<Index::idx_t>());
104 }
105
106 deviceListDataPointers_.resize(numLists_, nullptr);
107 deviceListIndexPointers_.resize(numLists_, nullptr);
108 deviceListLengths_.resize(numLists_, 0);
109 maxListLength_ = 0;
110 }
111
getDim() const112 int IVFBase::getDim() const {
113 return dim_;
114 }
115
reclaimMemory()116 size_t IVFBase::reclaimMemory() {
117 // Reclaim all unused memory exactly
118 return reclaimMemory_(true);
119 }
120
reclaimMemory_(bool exact)121 size_t IVFBase::reclaimMemory_(bool exact) {
122 auto stream = resources_->getDefaultStreamCurrentDevice();
123
124 size_t totalReclaimed = 0;
125
126 for (int i = 0; i < deviceListData_.size(); ++i) {
127 auto& data = deviceListData_[i]->data;
128 totalReclaimed += data.reclaim(exact, stream);
129
130 deviceListDataPointers_[i] = data.data();
131 }
132
133 for (int i = 0; i < deviceListIndices_.size(); ++i) {
134 auto& indices = deviceListIndices_[i]->data;
135 totalReclaimed += indices.reclaim(exact, stream);
136
137 deviceListIndexPointers_[i] = indices.data();
138 }
139
140 // Update device info for all lists, since the base pointers may
141 // have changed
142 updateDeviceListInfo_(stream);
143
144 return totalReclaimed;
145 }
146
updateDeviceListInfo_(cudaStream_t stream)147 void IVFBase::updateDeviceListInfo_(cudaStream_t stream) {
148 std::vector<int> listIds(deviceListData_.size());
149 for (int i = 0; i < deviceListData_.size(); ++i) {
150 listIds[i] = i;
151 }
152
153 updateDeviceListInfo_(listIds, stream);
154 }
155
updateDeviceListInfo_(const std::vector<int> & listIds,cudaStream_t stream)156 void IVFBase::updateDeviceListInfo_(
157 const std::vector<int>& listIds,
158 cudaStream_t stream) {
159 HostTensor<int, 1, true> hostListsToUpdate({(int)listIds.size()});
160 HostTensor<int, 1, true> hostNewListLength({(int)listIds.size()});
161 HostTensor<void*, 1, true> hostNewDataPointers({(int)listIds.size()});
162 HostTensor<void*, 1, true> hostNewIndexPointers({(int)listIds.size()});
163
164 for (int i = 0; i < listIds.size(); ++i) {
165 auto listId = listIds[i];
166 auto& data = deviceListData_[listId];
167 auto& indices = deviceListIndices_[listId];
168
169 hostListsToUpdate[i] = listId;
170 hostNewListLength[i] = data->numVecs;
171 hostNewDataPointers[i] = data->data.data();
172 hostNewIndexPointers[i] = indices->data.data();
173 }
174
175 // Copy the above update sets to the GPU
176 DeviceTensor<int, 1, true> listsToUpdate(
177 resources_,
178 makeTempAlloc(AllocType::Other, stream),
179 hostListsToUpdate);
180 DeviceTensor<int, 1, true> newListLength(
181 resources_,
182 makeTempAlloc(AllocType::Other, stream),
183 hostNewListLength);
184 DeviceTensor<void*, 1, true> newDataPointers(
185 resources_,
186 makeTempAlloc(AllocType::Other, stream),
187 hostNewDataPointers);
188 DeviceTensor<void*, 1, true> newIndexPointers(
189 resources_,
190 makeTempAlloc(AllocType::Other, stream),
191 hostNewIndexPointers);
192
193 // Update all pointers to the lists on the device that may have
194 // changed
195 runUpdateListPointers(
196 listsToUpdate,
197 newListLength,
198 newDataPointers,
199 newIndexPointers,
200 deviceListLengths_,
201 deviceListDataPointers_,
202 deviceListIndexPointers_,
203 stream);
204 }
205
getNumLists() const206 size_t IVFBase::getNumLists() const {
207 return numLists_;
208 }
209
getListLength(int listId) const210 int IVFBase::getListLength(int listId) const {
211 FAISS_THROW_IF_NOT_FMT(
212 listId < numLists_,
213 "IVF list %d is out of bounds (%d lists total)",
214 listId,
215 numLists_);
216 FAISS_ASSERT(listId < deviceListLengths_.size());
217 FAISS_ASSERT(listId < deviceListData_.size());
218
219 // LHS is the GPU resident value, RHS is the CPU resident value
220 FAISS_ASSERT(
221 deviceListLengths_[listId] == deviceListData_[listId]->numVecs);
222
223 return deviceListData_[listId]->numVecs;
224 }
225
getListIndices(int listId) const226 std::vector<Index::idx_t> IVFBase::getListIndices(int listId) const {
227 FAISS_THROW_IF_NOT_FMT(
228 listId < numLists_,
229 "IVF list %d is out of bounds (%d lists total)",
230 listId,
231 numLists_);
232 FAISS_ASSERT(listId < deviceListData_.size());
233 FAISS_ASSERT(listId < deviceListLengths_.size());
234
235 auto stream = resources_->getDefaultStreamCurrentDevice();
236
237 if (indicesOptions_ == INDICES_32_BIT) {
238 // The data is stored as int32 on the GPU
239 FAISS_ASSERT(listId < deviceListIndices_.size());
240
241 auto intInd = deviceListIndices_[listId]->data.copyToHost<int>(stream);
242
243 std::vector<Index::idx_t> out(intInd.size());
244 for (size_t i = 0; i < intInd.size(); ++i) {
245 out[i] = (Index::idx_t)intInd[i];
246 }
247
248 return out;
249 } else if (indicesOptions_ == INDICES_64_BIT) {
250 // The data is stored as int64 on the GPU
251 FAISS_ASSERT(listId < deviceListIndices_.size());
252
253 return deviceListIndices_[listId]->data.copyToHost<Index::idx_t>(
254 stream);
255 } else if (indicesOptions_ == INDICES_CPU) {
256 // The data is not stored on the GPU
257 FAISS_ASSERT(listId < listOffsetToUserIndex_.size());
258
259 auto& userIds = listOffsetToUserIndex_[listId];
260
261 // We should have the same number of indices on the CPU as we do vectors
262 // encoded on the GPU
263 FAISS_ASSERT(userIds.size() == deviceListData_[listId]->numVecs);
264
265 // this will return a copy
266 return userIds;
267 } else {
268 // unhandled indices type (includes INDICES_IVF)
269 FAISS_ASSERT(false);
270 return std::vector<Index::idx_t>();
271 }
272 }
273
getListVectorData(int listId,bool gpuFormat) const274 std::vector<uint8_t> IVFBase::getListVectorData(int listId, bool gpuFormat)
275 const {
276 FAISS_THROW_IF_NOT_FMT(
277 listId < numLists_,
278 "IVF list %d is out of bounds (%d lists total)",
279 listId,
280 numLists_);
281 FAISS_ASSERT(listId < deviceListData_.size());
282 FAISS_ASSERT(listId < deviceListLengths_.size());
283
284 auto stream = resources_->getDefaultStreamCurrentDevice();
285
286 auto& list = deviceListData_[listId];
287 auto gpuCodes = list->data.copyToHost<uint8_t>(stream);
288
289 if (gpuFormat) {
290 return gpuCodes;
291 } else {
292 // The GPU layout may be different than the CPU layout (e.g., vectors
293 // rather than dimensions interleaved), translate back if necessary
294 return translateCodesFromGpu_(std::move(gpuCodes), list->numVecs);
295 }
296 }
297
copyInvertedListsFrom(const InvertedLists * ivf)298 void IVFBase::copyInvertedListsFrom(const InvertedLists* ivf) {
299 size_t nlist = ivf ? ivf->nlist : 0;
300 for (size_t i = 0; i < nlist; ++i) {
301 size_t listSize = ivf->list_size(i);
302
303 // GPU index can only support max int entries per list
304 FAISS_THROW_IF_NOT_FMT(
305 listSize <= (size_t)std::numeric_limits<int>::max(),
306 "GPU inverted list can only support "
307 "%zu entries; %zu found",
308 (size_t)std::numeric_limits<int>::max(),
309 listSize);
310
311 addEncodedVectorsToList_(
312 i, ivf->get_codes(i), ivf->get_ids(i), listSize);
313 }
314 }
315
copyInvertedListsTo(InvertedLists * ivf)316 void IVFBase::copyInvertedListsTo(InvertedLists* ivf) {
317 for (int i = 0; i < numLists_; ++i) {
318 auto listIndices = getListIndices(i);
319 auto listData = getListVectorData(i, false);
320
321 ivf->add_entries(
322 i, listIndices.size(), listIndices.data(), listData.data());
323 }
324 }
325
addEncodedVectorsToList_(int listId,const void * codes,const Index::idx_t * indices,size_t numVecs)326 void IVFBase::addEncodedVectorsToList_(
327 int listId,
328 const void* codes,
329 const Index::idx_t* indices,
330 size_t numVecs) {
331 auto stream = resources_->getDefaultStreamCurrentDevice();
332
333 // This list must already exist
334 FAISS_ASSERT(listId < deviceListData_.size());
335
336 // This list must currently be empty
337 auto& listCodes = deviceListData_[listId];
338 FAISS_ASSERT(listCodes->data.size() == 0);
339 FAISS_ASSERT(listCodes->numVecs == 0);
340
341 // If there's nothing to add, then there's nothing we have to do
342 if (numVecs == 0) {
343 return;
344 }
345
346 // The GPU might have a different layout of the memory
347 auto gpuListSizeInBytes = getGpuVectorsEncodingSize_(numVecs);
348 auto cpuListSizeInBytes = getCpuVectorsEncodingSize_(numVecs);
349
350 // We only have int32 length representaz3tions on the GPU per each
351 // list; the length is in sizeof(char)
352 FAISS_ASSERT(gpuListSizeInBytes <= (size_t)std::numeric_limits<int>::max());
353
354 // Translate the codes as needed to our preferred form
355 std::vector<uint8_t> codesV(cpuListSizeInBytes);
356 std::memcpy(codesV.data(), codes, cpuListSizeInBytes);
357 auto translatedCodes = translateCodesToGpu_(std::move(codesV), numVecs);
358
359 listCodes->data.append(
360 translatedCodes.data(),
361 gpuListSizeInBytes,
362 stream,
363 true /* exact reserved size */);
364 listCodes->numVecs = numVecs;
365
366 // Handle the indices as well
367 addIndicesFromCpu_(listId, indices, numVecs);
368
369 deviceListDataPointers_[listId] = listCodes->data.data();
370 deviceListLengths_[listId] = numVecs;
371
372 // We update this as well, since the multi-pass algorithm uses it
373 maxListLength_ = std::max(maxListLength_, (int)numVecs);
374
375 // device_vector add is potentially happening on a different stream
376 // than our default stream
377 if (resources_->getDefaultStreamCurrentDevice() != 0) {
378 streamWait({stream}, {0});
379 }
380 }
381
addIndicesFromCpu_(int listId,const Index::idx_t * indices,size_t numVecs)382 void IVFBase::addIndicesFromCpu_(
383 int listId,
384 const Index::idx_t* indices,
385 size_t numVecs) {
386 auto stream = resources_->getDefaultStreamCurrentDevice();
387
388 // This list must currently be empty
389 auto& listIndices = deviceListIndices_[listId];
390 FAISS_ASSERT(listIndices->data.size() == 0);
391 FAISS_ASSERT(listIndices->numVecs == 0);
392
393 if (indicesOptions_ == INDICES_32_BIT) {
394 // Make sure that all indices are in bounds
395 std::vector<int> indices32(numVecs);
396 for (size_t i = 0; i < numVecs; ++i) {
397 auto ind = indices[i];
398 FAISS_ASSERT(ind <= (Index::idx_t)std::numeric_limits<int>::max());
399 indices32[i] = (int)ind;
400 }
401
402 static_assert(sizeof(int) == 4, "");
403
404 listIndices->data.append(
405 (uint8_t*)indices32.data(),
406 numVecs * sizeof(int),
407 stream,
408 true /* exact reserved size */);
409
410 } else if (indicesOptions_ == INDICES_64_BIT) {
411 listIndices->data.append(
412 (uint8_t*)indices,
413 numVecs * sizeof(Index::idx_t),
414 stream,
415 true /* exact reserved size */);
416 } else if (indicesOptions_ == INDICES_CPU) {
417 // indices are stored on the CPU
418 FAISS_ASSERT(listId < listOffsetToUserIndex_.size());
419
420 auto& userIndices = listOffsetToUserIndex_[listId];
421 userIndices.insert(userIndices.begin(), indices, indices + numVecs);
422 } else {
423 // indices are not stored
424 FAISS_ASSERT(indicesOptions_ == INDICES_IVF);
425 }
426
427 deviceListIndexPointers_[listId] = listIndices->data.data();
428 }
429
addVectors(Tensor<float,2,true> & vecs,Tensor<Index::idx_t,1,true> & indices)430 int IVFBase::addVectors(
431 Tensor<float, 2, true>& vecs,
432 Tensor<Index::idx_t, 1, true>& indices) {
433 FAISS_ASSERT(vecs.getSize(0) == indices.getSize(0));
434 FAISS_ASSERT(vecs.getSize(1) == dim_);
435
436 auto stream = resources_->getDefaultStreamCurrentDevice();
437
438 // Determine which IVF lists we need to append to
439
440 // We don't actually need this
441 DeviceTensor<float, 2, true> listDistance(
442 resources_,
443 makeTempAlloc(AllocType::Other, stream),
444 {vecs.getSize(0), 1});
445 // We use this
446 DeviceTensor<int, 2, true> listIds2d(
447 resources_,
448 makeTempAlloc(AllocType::Other, stream),
449 {vecs.getSize(0), 1});
450
451 quantizer_->query(
452 vecs, 1, metric_, metricArg_, listDistance, listIds2d, false);
453
454 // Copy the lists that we wish to append to back to the CPU
455 // FIXME: really this can be into pinned memory and a true async
456 // copy on a different stream; we can start the copy early, but it's
457 // tiny
458 auto listIdsHost = listIds2d.copyToVector(stream);
459
460 // Now we add the encoded vectors to the individual lists
461 // First, make sure that there is space available for adding the new
462 // encoded vectors and indices
463
464 // list id -> vectors being added
465 std::unordered_map<int, std::vector<int>> listToVectorIds;
466
467 // vector id -> which list it is being appended to
468 std::vector<int> vectorIdToList(vecs.getSize(0));
469
470 // vector id -> offset in list
471 // (we already have vector id -> list id in listIds)
472 std::vector<int> listOffsetHost(listIdsHost.size());
473
474 // Number of valid vectors that we actually add; we return this
475 int numAdded = 0;
476
477 for (int i = 0; i < listIdsHost.size(); ++i) {
478 int listId = listIdsHost[i];
479
480 // Add vector could be invalid (contains NaNs etc)
481 if (listId < 0) {
482 listOffsetHost[i] = -1;
483 vectorIdToList[i] = -1;
484 continue;
485 }
486
487 FAISS_ASSERT(listId < numLists_);
488 ++numAdded;
489 vectorIdToList[i] = listId;
490
491 int offset = deviceListData_[listId]->numVecs;
492
493 auto it = listToVectorIds.find(listId);
494 if (it != listToVectorIds.end()) {
495 offset += it->second.size();
496 it->second.push_back(i);
497 } else {
498 listToVectorIds[listId] = std::vector<int>{i};
499 }
500
501 listOffsetHost[i] = offset;
502 }
503
504 // If we didn't add anything (all invalid vectors that didn't map to IVF
505 // clusters), no need to continue
506 if (numAdded == 0) {
507 return 0;
508 }
509
510 // unique lists being added to
511 std::vector<int> uniqueLists;
512
513 for (auto& vecs : listToVectorIds) {
514 uniqueLists.push_back(vecs.first);
515 }
516
517 std::sort(uniqueLists.begin(), uniqueLists.end());
518
519 // In the same order as uniqueLists, list the vectors being added to that
520 // list contiguously (unique list 0 vectors ...)(unique list 1 vectors ...)
521 // ...
522 std::vector<int> vectorsByUniqueList;
523
524 // For each of the unique lists, the start offset in vectorsByUniqueList
525 std::vector<int> uniqueListVectorStart;
526
527 // For each of the unique lists, where we start appending in that list by
528 // the vector offset
529 std::vector<int> uniqueListStartOffset;
530
531 // For each of the unique lists, find the vectors which should be appended
532 // to that list
533 for (auto ul : uniqueLists) {
534 uniqueListVectorStart.push_back(vectorsByUniqueList.size());
535
536 FAISS_ASSERT(listToVectorIds.count(ul) != 0);
537
538 // The vectors we are adding to this list
539 auto& vecs = listToVectorIds[ul];
540 vectorsByUniqueList.insert(
541 vectorsByUniqueList.end(), vecs.begin(), vecs.end());
542
543 // How many vectors we previously had (which is where we start appending
544 // on the device)
545 uniqueListStartOffset.push_back(deviceListData_[ul]->numVecs);
546 }
547
548 // We terminate uniqueListVectorStart with the overall number of vectors
549 // being added, which could be different than vecs.getSize(0) as some
550 // vectors could be invalid
551 uniqueListVectorStart.push_back(vectorsByUniqueList.size());
552
553 // We need to resize the data structures for the inverted lists on
554 // the GPUs, which means that they might need reallocation, which
555 // means that their base address may change. Figure out the new base
556 // addresses, and update those in a batch on the device
557 {
558 // Resize all of the lists that we are appending to
559 for (auto& counts : listToVectorIds) {
560 auto listId = counts.first;
561 int numVecsToAdd = counts.second.size();
562
563 auto& codes = deviceListData_[listId];
564 int oldNumVecs = codes->numVecs;
565 int newNumVecs = codes->numVecs + numVecsToAdd;
566
567 auto newSizeBytes = getGpuVectorsEncodingSize_(newNumVecs);
568 codes->data.resize(newSizeBytes, stream);
569 codes->numVecs = newNumVecs;
570
571 auto& indices = deviceListIndices_[listId];
572 if ((indicesOptions_ == INDICES_32_BIT) ||
573 (indicesOptions_ == INDICES_64_BIT)) {
574 size_t indexSize = (indicesOptions_ == INDICES_32_BIT)
575 ? sizeof(int)
576 : sizeof(Index::idx_t);
577
578 indices->data.resize(
579 indices->data.size() + numVecsToAdd * indexSize,
580 stream);
581 FAISS_ASSERT(indices->numVecs == oldNumVecs);
582 indices->numVecs = newNumVecs;
583
584 } else if (indicesOptions_ == INDICES_CPU) {
585 // indices are stored on the CPU side
586 FAISS_ASSERT(listId < listOffsetToUserIndex_.size());
587
588 auto& userIndices = listOffsetToUserIndex_[listId];
589 userIndices.resize(newNumVecs);
590 } else {
591 // indices are not stored on the GPU or CPU side
592 FAISS_ASSERT(indicesOptions_ == INDICES_IVF);
593 }
594
595 // This is used by the multi-pass query to decide how much scratch
596 // space to allocate for intermediate results
597 maxListLength_ = std::max(maxListLength_, newNumVecs);
598 }
599
600 // Update all pointers and sizes on the device for lists that we
601 // appended to
602 updateDeviceListInfo_(uniqueLists, stream);
603 }
604
605 // If we're maintaining the indices on the CPU side, update our
606 // map. We already resized our map above.
607 if (indicesOptions_ == INDICES_CPU) {
608 // We need to maintain the indices on the CPU side
609 HostTensor<Index::idx_t, 1, true> hostIndices(indices, stream);
610
611 for (int i = 0; i < hostIndices.getSize(0); ++i) {
612 int listId = listIdsHost[i];
613
614 // Add vector could be invalid (contains NaNs etc)
615 if (listId < 0) {
616 continue;
617 }
618
619 int offset = listOffsetHost[i];
620 FAISS_ASSERT(offset >= 0);
621
622 FAISS_ASSERT(listId < listOffsetToUserIndex_.size());
623 auto& userIndices = listOffsetToUserIndex_[listId];
624
625 FAISS_ASSERT(offset < userIndices.size());
626 userIndices[offset] = hostIndices[i];
627 }
628 }
629
630 // Copy the offsets to the GPU
631 auto listIdsDevice = listIds2d.downcastOuter<1>();
632 auto listOffsetDevice =
633 toDeviceTemporary(resources_, listOffsetHost, stream);
634 auto uniqueListsDevice = toDeviceTemporary(resources_, uniqueLists, stream);
635 auto vectorsByUniqueListDevice =
636 toDeviceTemporary(resources_, vectorsByUniqueList, stream);
637 auto uniqueListVectorStartDevice =
638 toDeviceTemporary(resources_, uniqueListVectorStart, stream);
639 auto uniqueListStartOffsetDevice =
640 toDeviceTemporary(resources_, uniqueListStartOffset, stream);
641
642 // Actually encode and append the vectors
643 appendVectors_(
644 vecs,
645 indices,
646 uniqueListsDevice,
647 vectorsByUniqueListDevice,
648 uniqueListVectorStartDevice,
649 uniqueListStartOffsetDevice,
650 listIdsDevice,
651 listOffsetDevice,
652 stream);
653
654 // We added this number
655 return numAdded;
656 }
657
658 } // namespace gpu
659 } // namespace faiss
660