1 /**
2  * Copyright (c) Facebook, Inc. and its affiliates.
3  *
4  * This source code is licensed under the MIT license found in the
5  * LICENSE file in the root directory of this source tree.
6  */
7 
8 #include <faiss/gpu/GpuResources.h>
9 #include <faiss/gpu/impl/RemapIndices.h>
10 #include <faiss/gpu/utils/DeviceUtils.h>
11 #include <faiss/invlists/InvertedLists.h>
12 #include <thrust/host_vector.h>
13 #include <faiss/gpu/impl/FlatIndex.cuh>
14 #include <faiss/gpu/impl/IVFAppend.cuh>
15 #include <faiss/gpu/impl/IVFBase.cuh>
16 #include <faiss/gpu/utils/CopyUtils.cuh>
17 #include <faiss/gpu/utils/DeviceDefs.cuh>
18 #include <faiss/gpu/utils/HostTensor.cuh>
19 #include <limits>
20 #include <unordered_map>
21 
22 namespace faiss {
23 namespace gpu {
24 
DeviceIVFList(GpuResources * res,const AllocInfo & info)25 IVFBase::DeviceIVFList::DeviceIVFList(GpuResources* res, const AllocInfo& info)
26         : data(res, info), numVecs(0) {}
27 
IVFBase(GpuResources * resources,faiss::MetricType metric,float metricArg,FlatIndex * quantizer,bool interleavedLayout,IndicesOptions indicesOptions,MemorySpace space)28 IVFBase::IVFBase(
29         GpuResources* resources,
30         faiss::MetricType metric,
31         float metricArg,
32         FlatIndex* quantizer,
33         bool interleavedLayout,
34         IndicesOptions indicesOptions,
35         MemorySpace space)
36         : resources_(resources),
37           metric_(metric),
38           metricArg_(metricArg),
39           quantizer_(quantizer),
40           dim_(quantizer->getDim()),
41           numLists_(quantizer->getSize()),
42           interleavedLayout_(interleavedLayout),
43           indicesOptions_(indicesOptions),
44           space_(space),
45           maxListLength_(0) {
46     reset();
47 }
48 
~IVFBase()49 IVFBase::~IVFBase() {}
50 
reserveMemory(size_t numVecs)51 void IVFBase::reserveMemory(size_t numVecs) {
52     auto stream = resources_->getDefaultStreamCurrentDevice();
53 
54     auto vecsPerList = numVecs / deviceListData_.size();
55     if (vecsPerList < 1) {
56         return;
57     }
58 
59     auto bytesPerDataList = getGpuVectorsEncodingSize_(vecsPerList);
60 
61     for (auto& list : deviceListData_) {
62         list->data.reserve(bytesPerDataList, stream);
63     }
64 
65     if ((indicesOptions_ == INDICES_32_BIT) ||
66         (indicesOptions_ == INDICES_64_BIT)) {
67         // Reserve for index lists as well
68         size_t bytesPerIndexList = vecsPerList *
69                 (indicesOptions_ == INDICES_32_BIT ? sizeof(int)
70                                                    : sizeof(Index::idx_t));
71 
72         for (auto& list : deviceListIndices_) {
73             list->data.reserve(bytesPerIndexList, stream);
74         }
75     }
76 
77     // Update device info for all lists, since the base pointers may
78     // have changed
79     updateDeviceListInfo_(stream);
80 }
81 
reset()82 void IVFBase::reset() {
83     deviceListData_.clear();
84     deviceListIndices_.clear();
85     deviceListDataPointers_.clear();
86     deviceListIndexPointers_.clear();
87     deviceListLengths_.clear();
88     listOffsetToUserIndex_.clear();
89 
90     auto info = AllocInfo(
91             AllocType::IVFLists,
92             getCurrentDevice(),
93             space_,
94             resources_->getDefaultStreamCurrentDevice());
95 
96     for (size_t i = 0; i < numLists_; ++i) {
97         deviceListData_.emplace_back(std::unique_ptr<DeviceIVFList>(
98                 new DeviceIVFList(resources_, info)));
99 
100         deviceListIndices_.emplace_back(std::unique_ptr<DeviceIVFList>(
101                 new DeviceIVFList(resources_, info)));
102 
103         listOffsetToUserIndex_.emplace_back(std::vector<Index::idx_t>());
104     }
105 
106     deviceListDataPointers_.resize(numLists_, nullptr);
107     deviceListIndexPointers_.resize(numLists_, nullptr);
108     deviceListLengths_.resize(numLists_, 0);
109     maxListLength_ = 0;
110 }
111 
getDim() const112 int IVFBase::getDim() const {
113     return dim_;
114 }
115 
reclaimMemory()116 size_t IVFBase::reclaimMemory() {
117     // Reclaim all unused memory exactly
118     return reclaimMemory_(true);
119 }
120 
reclaimMemory_(bool exact)121 size_t IVFBase::reclaimMemory_(bool exact) {
122     auto stream = resources_->getDefaultStreamCurrentDevice();
123 
124     size_t totalReclaimed = 0;
125 
126     for (int i = 0; i < deviceListData_.size(); ++i) {
127         auto& data = deviceListData_[i]->data;
128         totalReclaimed += data.reclaim(exact, stream);
129 
130         deviceListDataPointers_[i] = data.data();
131     }
132 
133     for (int i = 0; i < deviceListIndices_.size(); ++i) {
134         auto& indices = deviceListIndices_[i]->data;
135         totalReclaimed += indices.reclaim(exact, stream);
136 
137         deviceListIndexPointers_[i] = indices.data();
138     }
139 
140     // Update device info for all lists, since the base pointers may
141     // have changed
142     updateDeviceListInfo_(stream);
143 
144     return totalReclaimed;
145 }
146 
updateDeviceListInfo_(cudaStream_t stream)147 void IVFBase::updateDeviceListInfo_(cudaStream_t stream) {
148     std::vector<int> listIds(deviceListData_.size());
149     for (int i = 0; i < deviceListData_.size(); ++i) {
150         listIds[i] = i;
151     }
152 
153     updateDeviceListInfo_(listIds, stream);
154 }
155 
updateDeviceListInfo_(const std::vector<int> & listIds,cudaStream_t stream)156 void IVFBase::updateDeviceListInfo_(
157         const std::vector<int>& listIds,
158         cudaStream_t stream) {
159     HostTensor<int, 1, true> hostListsToUpdate({(int)listIds.size()});
160     HostTensor<int, 1, true> hostNewListLength({(int)listIds.size()});
161     HostTensor<void*, 1, true> hostNewDataPointers({(int)listIds.size()});
162     HostTensor<void*, 1, true> hostNewIndexPointers({(int)listIds.size()});
163 
164     for (int i = 0; i < listIds.size(); ++i) {
165         auto listId = listIds[i];
166         auto& data = deviceListData_[listId];
167         auto& indices = deviceListIndices_[listId];
168 
169         hostListsToUpdate[i] = listId;
170         hostNewListLength[i] = data->numVecs;
171         hostNewDataPointers[i] = data->data.data();
172         hostNewIndexPointers[i] = indices->data.data();
173     }
174 
175     // Copy the above update sets to the GPU
176     DeviceTensor<int, 1, true> listsToUpdate(
177             resources_,
178             makeTempAlloc(AllocType::Other, stream),
179             hostListsToUpdate);
180     DeviceTensor<int, 1, true> newListLength(
181             resources_,
182             makeTempAlloc(AllocType::Other, stream),
183             hostNewListLength);
184     DeviceTensor<void*, 1, true> newDataPointers(
185             resources_,
186             makeTempAlloc(AllocType::Other, stream),
187             hostNewDataPointers);
188     DeviceTensor<void*, 1, true> newIndexPointers(
189             resources_,
190             makeTempAlloc(AllocType::Other, stream),
191             hostNewIndexPointers);
192 
193     // Update all pointers to the lists on the device that may have
194     // changed
195     runUpdateListPointers(
196             listsToUpdate,
197             newListLength,
198             newDataPointers,
199             newIndexPointers,
200             deviceListLengths_,
201             deviceListDataPointers_,
202             deviceListIndexPointers_,
203             stream);
204 }
205 
getNumLists() const206 size_t IVFBase::getNumLists() const {
207     return numLists_;
208 }
209 
getListLength(int listId) const210 int IVFBase::getListLength(int listId) const {
211     FAISS_THROW_IF_NOT_FMT(
212             listId < numLists_,
213             "IVF list %d is out of bounds (%d lists total)",
214             listId,
215             numLists_);
216     FAISS_ASSERT(listId < deviceListLengths_.size());
217     FAISS_ASSERT(listId < deviceListData_.size());
218 
219     // LHS is the GPU resident value, RHS is the CPU resident value
220     FAISS_ASSERT(
221             deviceListLengths_[listId] == deviceListData_[listId]->numVecs);
222 
223     return deviceListData_[listId]->numVecs;
224 }
225 
getListIndices(int listId) const226 std::vector<Index::idx_t> IVFBase::getListIndices(int listId) const {
227     FAISS_THROW_IF_NOT_FMT(
228             listId < numLists_,
229             "IVF list %d is out of bounds (%d lists total)",
230             listId,
231             numLists_);
232     FAISS_ASSERT(listId < deviceListData_.size());
233     FAISS_ASSERT(listId < deviceListLengths_.size());
234 
235     auto stream = resources_->getDefaultStreamCurrentDevice();
236 
237     if (indicesOptions_ == INDICES_32_BIT) {
238         // The data is stored as int32 on the GPU
239         FAISS_ASSERT(listId < deviceListIndices_.size());
240 
241         auto intInd = deviceListIndices_[listId]->data.copyToHost<int>(stream);
242 
243         std::vector<Index::idx_t> out(intInd.size());
244         for (size_t i = 0; i < intInd.size(); ++i) {
245             out[i] = (Index::idx_t)intInd[i];
246         }
247 
248         return out;
249     } else if (indicesOptions_ == INDICES_64_BIT) {
250         // The data is stored as int64 on the GPU
251         FAISS_ASSERT(listId < deviceListIndices_.size());
252 
253         return deviceListIndices_[listId]->data.copyToHost<Index::idx_t>(
254                 stream);
255     } else if (indicesOptions_ == INDICES_CPU) {
256         // The data is not stored on the GPU
257         FAISS_ASSERT(listId < listOffsetToUserIndex_.size());
258 
259         auto& userIds = listOffsetToUserIndex_[listId];
260 
261         // We should have the same number of indices on the CPU as we do vectors
262         // encoded on the GPU
263         FAISS_ASSERT(userIds.size() == deviceListData_[listId]->numVecs);
264 
265         // this will return a copy
266         return userIds;
267     } else {
268         // unhandled indices type (includes INDICES_IVF)
269         FAISS_ASSERT(false);
270         return std::vector<Index::idx_t>();
271     }
272 }
273 
getListVectorData(int listId,bool gpuFormat) const274 std::vector<uint8_t> IVFBase::getListVectorData(int listId, bool gpuFormat)
275         const {
276     FAISS_THROW_IF_NOT_FMT(
277             listId < numLists_,
278             "IVF list %d is out of bounds (%d lists total)",
279             listId,
280             numLists_);
281     FAISS_ASSERT(listId < deviceListData_.size());
282     FAISS_ASSERT(listId < deviceListLengths_.size());
283 
284     auto stream = resources_->getDefaultStreamCurrentDevice();
285 
286     auto& list = deviceListData_[listId];
287     auto gpuCodes = list->data.copyToHost<uint8_t>(stream);
288 
289     if (gpuFormat) {
290         return gpuCodes;
291     } else {
292         // The GPU layout may be different than the CPU layout (e.g., vectors
293         // rather than dimensions interleaved), translate back if necessary
294         return translateCodesFromGpu_(std::move(gpuCodes), list->numVecs);
295     }
296 }
297 
copyInvertedListsFrom(const InvertedLists * ivf)298 void IVFBase::copyInvertedListsFrom(const InvertedLists* ivf) {
299     size_t nlist = ivf ? ivf->nlist : 0;
300     for (size_t i = 0; i < nlist; ++i) {
301         size_t listSize = ivf->list_size(i);
302 
303         // GPU index can only support max int entries per list
304         FAISS_THROW_IF_NOT_FMT(
305                 listSize <= (size_t)std::numeric_limits<int>::max(),
306                 "GPU inverted list can only support "
307                 "%zu entries; %zu found",
308                 (size_t)std::numeric_limits<int>::max(),
309                 listSize);
310 
311         addEncodedVectorsToList_(
312                 i, ivf->get_codes(i), ivf->get_ids(i), listSize);
313     }
314 }
315 
copyInvertedListsTo(InvertedLists * ivf)316 void IVFBase::copyInvertedListsTo(InvertedLists* ivf) {
317     for (int i = 0; i < numLists_; ++i) {
318         auto listIndices = getListIndices(i);
319         auto listData = getListVectorData(i, false);
320 
321         ivf->add_entries(
322                 i, listIndices.size(), listIndices.data(), listData.data());
323     }
324 }
325 
addEncodedVectorsToList_(int listId,const void * codes,const Index::idx_t * indices,size_t numVecs)326 void IVFBase::addEncodedVectorsToList_(
327         int listId,
328         const void* codes,
329         const Index::idx_t* indices,
330         size_t numVecs) {
331     auto stream = resources_->getDefaultStreamCurrentDevice();
332 
333     // This list must already exist
334     FAISS_ASSERT(listId < deviceListData_.size());
335 
336     // This list must currently be empty
337     auto& listCodes = deviceListData_[listId];
338     FAISS_ASSERT(listCodes->data.size() == 0);
339     FAISS_ASSERT(listCodes->numVecs == 0);
340 
341     // If there's nothing to add, then there's nothing we have to do
342     if (numVecs == 0) {
343         return;
344     }
345 
346     // The GPU might have a different layout of the memory
347     auto gpuListSizeInBytes = getGpuVectorsEncodingSize_(numVecs);
348     auto cpuListSizeInBytes = getCpuVectorsEncodingSize_(numVecs);
349 
350     // We only have int32 length representaz3tions on the GPU per each
351     // list; the length is in sizeof(char)
352     FAISS_ASSERT(gpuListSizeInBytes <= (size_t)std::numeric_limits<int>::max());
353 
354     // Translate the codes as needed to our preferred form
355     std::vector<uint8_t> codesV(cpuListSizeInBytes);
356     std::memcpy(codesV.data(), codes, cpuListSizeInBytes);
357     auto translatedCodes = translateCodesToGpu_(std::move(codesV), numVecs);
358 
359     listCodes->data.append(
360             translatedCodes.data(),
361             gpuListSizeInBytes,
362             stream,
363             true /* exact reserved size */);
364     listCodes->numVecs = numVecs;
365 
366     // Handle the indices as well
367     addIndicesFromCpu_(listId, indices, numVecs);
368 
369     deviceListDataPointers_[listId] = listCodes->data.data();
370     deviceListLengths_[listId] = numVecs;
371 
372     // We update this as well, since the multi-pass algorithm uses it
373     maxListLength_ = std::max(maxListLength_, (int)numVecs);
374 
375     // device_vector add is potentially happening on a different stream
376     // than our default stream
377     if (resources_->getDefaultStreamCurrentDevice() != 0) {
378         streamWait({stream}, {0});
379     }
380 }
381 
addIndicesFromCpu_(int listId,const Index::idx_t * indices,size_t numVecs)382 void IVFBase::addIndicesFromCpu_(
383         int listId,
384         const Index::idx_t* indices,
385         size_t numVecs) {
386     auto stream = resources_->getDefaultStreamCurrentDevice();
387 
388     // This list must currently be empty
389     auto& listIndices = deviceListIndices_[listId];
390     FAISS_ASSERT(listIndices->data.size() == 0);
391     FAISS_ASSERT(listIndices->numVecs == 0);
392 
393     if (indicesOptions_ == INDICES_32_BIT) {
394         // Make sure that all indices are in bounds
395         std::vector<int> indices32(numVecs);
396         for (size_t i = 0; i < numVecs; ++i) {
397             auto ind = indices[i];
398             FAISS_ASSERT(ind <= (Index::idx_t)std::numeric_limits<int>::max());
399             indices32[i] = (int)ind;
400         }
401 
402         static_assert(sizeof(int) == 4, "");
403 
404         listIndices->data.append(
405                 (uint8_t*)indices32.data(),
406                 numVecs * sizeof(int),
407                 stream,
408                 true /* exact reserved size */);
409 
410     } else if (indicesOptions_ == INDICES_64_BIT) {
411         listIndices->data.append(
412                 (uint8_t*)indices,
413                 numVecs * sizeof(Index::idx_t),
414                 stream,
415                 true /* exact reserved size */);
416     } else if (indicesOptions_ == INDICES_CPU) {
417         // indices are stored on the CPU
418         FAISS_ASSERT(listId < listOffsetToUserIndex_.size());
419 
420         auto& userIndices = listOffsetToUserIndex_[listId];
421         userIndices.insert(userIndices.begin(), indices, indices + numVecs);
422     } else {
423         // indices are not stored
424         FAISS_ASSERT(indicesOptions_ == INDICES_IVF);
425     }
426 
427     deviceListIndexPointers_[listId] = listIndices->data.data();
428 }
429 
addVectors(Tensor<float,2,true> & vecs,Tensor<Index::idx_t,1,true> & indices)430 int IVFBase::addVectors(
431         Tensor<float, 2, true>& vecs,
432         Tensor<Index::idx_t, 1, true>& indices) {
433     FAISS_ASSERT(vecs.getSize(0) == indices.getSize(0));
434     FAISS_ASSERT(vecs.getSize(1) == dim_);
435 
436     auto stream = resources_->getDefaultStreamCurrentDevice();
437 
438     // Determine which IVF lists we need to append to
439 
440     // We don't actually need this
441     DeviceTensor<float, 2, true> listDistance(
442             resources_,
443             makeTempAlloc(AllocType::Other, stream),
444             {vecs.getSize(0), 1});
445     // We use this
446     DeviceTensor<int, 2, true> listIds2d(
447             resources_,
448             makeTempAlloc(AllocType::Other, stream),
449             {vecs.getSize(0), 1});
450 
451     quantizer_->query(
452             vecs, 1, metric_, metricArg_, listDistance, listIds2d, false);
453 
454     // Copy the lists that we wish to append to back to the CPU
455     // FIXME: really this can be into pinned memory and a true async
456     // copy on a different stream; we can start the copy early, but it's
457     // tiny
458     auto listIdsHost = listIds2d.copyToVector(stream);
459 
460     // Now we add the encoded vectors to the individual lists
461     // First, make sure that there is space available for adding the new
462     // encoded vectors and indices
463 
464     // list id -> vectors being added
465     std::unordered_map<int, std::vector<int>> listToVectorIds;
466 
467     // vector id -> which list it is being appended to
468     std::vector<int> vectorIdToList(vecs.getSize(0));
469 
470     // vector id -> offset in list
471     // (we already have vector id -> list id in listIds)
472     std::vector<int> listOffsetHost(listIdsHost.size());
473 
474     // Number of valid vectors that we actually add; we return this
475     int numAdded = 0;
476 
477     for (int i = 0; i < listIdsHost.size(); ++i) {
478         int listId = listIdsHost[i];
479 
480         // Add vector could be invalid (contains NaNs etc)
481         if (listId < 0) {
482             listOffsetHost[i] = -1;
483             vectorIdToList[i] = -1;
484             continue;
485         }
486 
487         FAISS_ASSERT(listId < numLists_);
488         ++numAdded;
489         vectorIdToList[i] = listId;
490 
491         int offset = deviceListData_[listId]->numVecs;
492 
493         auto it = listToVectorIds.find(listId);
494         if (it != listToVectorIds.end()) {
495             offset += it->second.size();
496             it->second.push_back(i);
497         } else {
498             listToVectorIds[listId] = std::vector<int>{i};
499         }
500 
501         listOffsetHost[i] = offset;
502     }
503 
504     // If we didn't add anything (all invalid vectors that didn't map to IVF
505     // clusters), no need to continue
506     if (numAdded == 0) {
507         return 0;
508     }
509 
510     // unique lists being added to
511     std::vector<int> uniqueLists;
512 
513     for (auto& vecs : listToVectorIds) {
514         uniqueLists.push_back(vecs.first);
515     }
516 
517     std::sort(uniqueLists.begin(), uniqueLists.end());
518 
519     // In the same order as uniqueLists, list the vectors being added to that
520     // list contiguously (unique list 0 vectors ...)(unique list 1 vectors ...)
521     // ...
522     std::vector<int> vectorsByUniqueList;
523 
524     // For each of the unique lists, the start offset in vectorsByUniqueList
525     std::vector<int> uniqueListVectorStart;
526 
527     // For each of the unique lists, where we start appending in that list by
528     // the vector offset
529     std::vector<int> uniqueListStartOffset;
530 
531     // For each of the unique lists, find the vectors which should be appended
532     // to that list
533     for (auto ul : uniqueLists) {
534         uniqueListVectorStart.push_back(vectorsByUniqueList.size());
535 
536         FAISS_ASSERT(listToVectorIds.count(ul) != 0);
537 
538         // The vectors we are adding to this list
539         auto& vecs = listToVectorIds[ul];
540         vectorsByUniqueList.insert(
541                 vectorsByUniqueList.end(), vecs.begin(), vecs.end());
542 
543         // How many vectors we previously had (which is where we start appending
544         // on the device)
545         uniqueListStartOffset.push_back(deviceListData_[ul]->numVecs);
546     }
547 
548     // We terminate uniqueListVectorStart with the overall number of vectors
549     // being added, which could be different than vecs.getSize(0) as some
550     // vectors could be invalid
551     uniqueListVectorStart.push_back(vectorsByUniqueList.size());
552 
553     // We need to resize the data structures for the inverted lists on
554     // the GPUs, which means that they might need reallocation, which
555     // means that their base address may change. Figure out the new base
556     // addresses, and update those in a batch on the device
557     {
558         // Resize all of the lists that we are appending to
559         for (auto& counts : listToVectorIds) {
560             auto listId = counts.first;
561             int numVecsToAdd = counts.second.size();
562 
563             auto& codes = deviceListData_[listId];
564             int oldNumVecs = codes->numVecs;
565             int newNumVecs = codes->numVecs + numVecsToAdd;
566 
567             auto newSizeBytes = getGpuVectorsEncodingSize_(newNumVecs);
568             codes->data.resize(newSizeBytes, stream);
569             codes->numVecs = newNumVecs;
570 
571             auto& indices = deviceListIndices_[listId];
572             if ((indicesOptions_ == INDICES_32_BIT) ||
573                 (indicesOptions_ == INDICES_64_BIT)) {
574                 size_t indexSize = (indicesOptions_ == INDICES_32_BIT)
575                         ? sizeof(int)
576                         : sizeof(Index::idx_t);
577 
578                 indices->data.resize(
579                         indices->data.size() + numVecsToAdd * indexSize,
580                         stream);
581                 FAISS_ASSERT(indices->numVecs == oldNumVecs);
582                 indices->numVecs = newNumVecs;
583 
584             } else if (indicesOptions_ == INDICES_CPU) {
585                 // indices are stored on the CPU side
586                 FAISS_ASSERT(listId < listOffsetToUserIndex_.size());
587 
588                 auto& userIndices = listOffsetToUserIndex_[listId];
589                 userIndices.resize(newNumVecs);
590             } else {
591                 // indices are not stored on the GPU or CPU side
592                 FAISS_ASSERT(indicesOptions_ == INDICES_IVF);
593             }
594 
595             // This is used by the multi-pass query to decide how much scratch
596             // space to allocate for intermediate results
597             maxListLength_ = std::max(maxListLength_, newNumVecs);
598         }
599 
600         // Update all pointers and sizes on the device for lists that we
601         // appended to
602         updateDeviceListInfo_(uniqueLists, stream);
603     }
604 
605     // If we're maintaining the indices on the CPU side, update our
606     // map. We already resized our map above.
607     if (indicesOptions_ == INDICES_CPU) {
608         // We need to maintain the indices on the CPU side
609         HostTensor<Index::idx_t, 1, true> hostIndices(indices, stream);
610 
611         for (int i = 0; i < hostIndices.getSize(0); ++i) {
612             int listId = listIdsHost[i];
613 
614             // Add vector could be invalid (contains NaNs etc)
615             if (listId < 0) {
616                 continue;
617             }
618 
619             int offset = listOffsetHost[i];
620             FAISS_ASSERT(offset >= 0);
621 
622             FAISS_ASSERT(listId < listOffsetToUserIndex_.size());
623             auto& userIndices = listOffsetToUserIndex_[listId];
624 
625             FAISS_ASSERT(offset < userIndices.size());
626             userIndices[offset] = hostIndices[i];
627         }
628     }
629 
630     // Copy the offsets to the GPU
631     auto listIdsDevice = listIds2d.downcastOuter<1>();
632     auto listOffsetDevice =
633             toDeviceTemporary(resources_, listOffsetHost, stream);
634     auto uniqueListsDevice = toDeviceTemporary(resources_, uniqueLists, stream);
635     auto vectorsByUniqueListDevice =
636             toDeviceTemporary(resources_, vectorsByUniqueList, stream);
637     auto uniqueListVectorStartDevice =
638             toDeviceTemporary(resources_, uniqueListVectorStart, stream);
639     auto uniqueListStartOffsetDevice =
640             toDeviceTemporary(resources_, uniqueListStartOffset, stream);
641 
642     // Actually encode and append the vectors
643     appendVectors_(
644             vecs,
645             indices,
646             uniqueListsDevice,
647             vectorsByUniqueListDevice,
648             uniqueListVectorStartDevice,
649             uniqueListStartOffsetDevice,
650             listIdsDevice,
651             listOffsetDevice,
652             stream);
653 
654     // We added this number
655     return numAdded;
656 }
657 
658 } // namespace gpu
659 } // namespace faiss
660