1 /**
2 * Copyright (c) Facebook, Inc. and its affiliates.
3 *
4 * This source code is licensed under the MIT license found in the
5 * LICENSE file in the root directory of this source tree.
6 */
7
8 // -*- c++ -*-
9
10 #include <faiss/invlists/OnDiskInvertedLists.h>
11
12 #include <pthread.h>
13
14 #include <unordered_set>
15
16 #include <sys/mman.h>
17 #include <sys/stat.h>
18 #include <sys/types.h>
19 #include <unistd.h>
20
21 #include <faiss/impl/FaissAssert.h>
22 #include <faiss/utils/utils.h>
23
24 #include <faiss/impl/io.h>
25 #include <faiss/impl/io_macros.h>
26
27 namespace faiss {
28
29 /**********************************************
30 * LockLevels
31 **********************************************/
32
33 struct LockLevels {
34 /* There n times lock1(n), one lock2 and one lock3
35 * Invariants:
36 * a single thread can hold one lock1(n) for some n
37 * a single thread can hold lock2, if it holds lock1(n) for some n
38 * a single thread can hold lock3, if it holds lock1(n) for some n
39 * AND lock2 AND no other thread holds lock1(m) for m != n
40 */
41 pthread_mutex_t mutex1;
42 pthread_cond_t level1_cv;
43 pthread_cond_t level2_cv;
44 pthread_cond_t level3_cv;
45
46 std::unordered_set<int> level1_holders; // which level1 locks are held
47 int n_level2; // nb threads that wait on level2
48 bool level3_in_use; // a threads waits on level3
49 bool level2_in_use;
50
LockLevelsfaiss::LockLevels51 LockLevels() {
52 pthread_mutex_init(&mutex1, nullptr);
53 pthread_cond_init(&level1_cv, nullptr);
54 pthread_cond_init(&level2_cv, nullptr);
55 pthread_cond_init(&level3_cv, nullptr);
56 n_level2 = 0;
57 level2_in_use = false;
58 level3_in_use = false;
59 }
60
~LockLevelsfaiss::LockLevels61 ~LockLevels() {
62 pthread_cond_destroy(&level1_cv);
63 pthread_cond_destroy(&level2_cv);
64 pthread_cond_destroy(&level3_cv);
65 pthread_mutex_destroy(&mutex1);
66 }
67
lock_1faiss::LockLevels68 void lock_1(int no) {
69 pthread_mutex_lock(&mutex1);
70 while (level3_in_use || level1_holders.count(no) > 0) {
71 pthread_cond_wait(&level1_cv, &mutex1);
72 }
73 level1_holders.insert(no);
74 pthread_mutex_unlock(&mutex1);
75 }
76
unlock_1faiss::LockLevels77 void unlock_1(int no) {
78 pthread_mutex_lock(&mutex1);
79 assert(level1_holders.count(no) == 1);
80 level1_holders.erase(no);
81 if (level3_in_use) { // a writer is waiting
82 pthread_cond_signal(&level3_cv);
83 } else {
84 pthread_cond_broadcast(&level1_cv);
85 }
86 pthread_mutex_unlock(&mutex1);
87 }
88
lock_2faiss::LockLevels89 void lock_2() {
90 pthread_mutex_lock(&mutex1);
91 n_level2++;
92 if (level3_in_use) { // tell waiting level3 that we are blocked
93 pthread_cond_signal(&level3_cv);
94 }
95 while (level2_in_use) {
96 pthread_cond_wait(&level2_cv, &mutex1);
97 }
98 level2_in_use = true;
99 pthread_mutex_unlock(&mutex1);
100 }
101
unlock_2faiss::LockLevels102 void unlock_2() {
103 pthread_mutex_lock(&mutex1);
104 level2_in_use = false;
105 n_level2--;
106 pthread_cond_signal(&level2_cv);
107 pthread_mutex_unlock(&mutex1);
108 }
109
lock_3faiss::LockLevels110 void lock_3() {
111 pthread_mutex_lock(&mutex1);
112 level3_in_use = true;
113 // wait until there are no level1 holders anymore except the
114 // ones that are waiting on level2 (we are holding lock2)
115 while (level1_holders.size() > n_level2) {
116 pthread_cond_wait(&level3_cv, &mutex1);
117 }
118 // don't release the lock!
119 }
120
unlock_3faiss::LockLevels121 void unlock_3() {
122 level3_in_use = false;
123 // wake up all level1_holders
124 pthread_cond_broadcast(&level1_cv);
125 pthread_mutex_unlock(&mutex1);
126 }
127
printfaiss::LockLevels128 void print() {
129 pthread_mutex_lock(&mutex1);
130 printf("State: level3_in_use=%d n_level2=%d level1_holders: [",
131 int(level3_in_use),
132 n_level2);
133 for (int k : level1_holders) {
134 printf("%d ", k);
135 }
136 printf("]\n");
137 pthread_mutex_unlock(&mutex1);
138 }
139 };
140
141 /**********************************************
142 * OngoingPrefetch
143 **********************************************/
144
145 struct OnDiskInvertedLists::OngoingPrefetch {
146 struct Thread {
147 pthread_t pth;
148 OngoingPrefetch* pf;
149
one_listfaiss::OnDiskInvertedLists::OngoingPrefetch::Thread150 bool one_list() {
151 idx_t list_no = pf->get_next_list();
152 if (list_no == -1)
153 return false;
154 const OnDiskInvertedLists* od = pf->od;
155 od->locks->lock_1(list_no);
156 size_t n = od->list_size(list_no);
157 const Index::idx_t* idx = od->get_ids(list_no);
158 const uint8_t* codes = od->get_codes(list_no);
159 int cs = 0;
160 for (size_t i = 0; i < n; i++) {
161 cs += idx[i];
162 }
163 const idx_t* codes8 = (const idx_t*)codes;
164 idx_t n8 = n * od->code_size / 8;
165
166 for (size_t i = 0; i < n8; i++) {
167 cs += codes8[i];
168 }
169 od->locks->unlock_1(list_no);
170
171 global_cs += cs & 1;
172 return true;
173 }
174 };
175
176 std::vector<Thread> threads;
177
178 pthread_mutex_t list_ids_mutex;
179 std::vector<idx_t> list_ids;
180 int cur_list;
181
182 // mutex for the list of tasks
183 pthread_mutex_t mutex;
184
185 // pretext to avoid code below to be optimized out
186 static int global_cs;
187
188 const OnDiskInvertedLists* od;
189
OngoingPrefetchfaiss::OnDiskInvertedLists::OngoingPrefetch190 explicit OngoingPrefetch(const OnDiskInvertedLists* od) : od(od) {
191 pthread_mutex_init(&mutex, nullptr);
192 pthread_mutex_init(&list_ids_mutex, nullptr);
193 cur_list = 0;
194 }
195
prefetch_listfaiss::OnDiskInvertedLists::OngoingPrefetch196 static void* prefetch_list(void* arg) {
197 Thread* th = static_cast<Thread*>(arg);
198
199 while (th->one_list())
200 ;
201
202 return nullptr;
203 }
204
get_next_listfaiss::OnDiskInvertedLists::OngoingPrefetch205 idx_t get_next_list() {
206 idx_t list_no = -1;
207 pthread_mutex_lock(&list_ids_mutex);
208 if (cur_list >= 0 && cur_list < list_ids.size()) {
209 list_no = list_ids[cur_list++];
210 }
211 pthread_mutex_unlock(&list_ids_mutex);
212 return list_no;
213 }
214
prefetch_listsfaiss::OnDiskInvertedLists::OngoingPrefetch215 void prefetch_lists(const idx_t* list_nos, int n) {
216 pthread_mutex_lock(&mutex);
217 pthread_mutex_lock(&list_ids_mutex);
218 list_ids.clear();
219 pthread_mutex_unlock(&list_ids_mutex);
220 for (auto& th : threads) {
221 pthread_join(th.pth, nullptr);
222 }
223
224 threads.resize(0);
225 cur_list = 0;
226 int nt = std::min(n, od->prefetch_nthread);
227
228 if (nt > 0) {
229 // prepare tasks
230 for (int i = 0; i < n; i++) {
231 idx_t list_no = list_nos[i];
232 if (list_no >= 0 && od->list_size(list_no) > 0) {
233 list_ids.push_back(list_no);
234 }
235 }
236 // prepare threads
237 threads.resize(nt);
238 for (Thread& th : threads) {
239 th.pf = this;
240 pthread_create(&th.pth, nullptr, prefetch_list, &th);
241 }
242 }
243 pthread_mutex_unlock(&mutex);
244 }
245
~OngoingPrefetchfaiss::OnDiskInvertedLists::OngoingPrefetch246 ~OngoingPrefetch() {
247 pthread_mutex_lock(&mutex);
248 for (auto& th : threads) {
249 pthread_join(th.pth, nullptr);
250 }
251 pthread_mutex_unlock(&mutex);
252 pthread_mutex_destroy(&mutex);
253 pthread_mutex_destroy(&list_ids_mutex);
254 }
255 };
256
257 int OnDiskInvertedLists::OngoingPrefetch::global_cs = 0;
258
prefetch_lists(const idx_t * list_nos,int n) const259 void OnDiskInvertedLists::prefetch_lists(const idx_t* list_nos, int n) const {
260 pf->prefetch_lists(list_nos, n);
261 }
262
263 /**********************************************
264 * OnDiskInvertedLists: mmapping
265 **********************************************/
266
do_mmap()267 void OnDiskInvertedLists::do_mmap() {
268 const char* rw_flags = read_only ? "r" : "r+";
269 int prot = read_only ? PROT_READ : PROT_WRITE | PROT_READ;
270 FILE* f = fopen(filename.c_str(), rw_flags);
271 FAISS_THROW_IF_NOT_FMT(
272 f,
273 "could not open %s in mode %s: %s",
274 filename.c_str(),
275 rw_flags,
276 strerror(errno));
277
278 uint8_t* ptro =
279 (uint8_t*)mmap(nullptr, totsize, prot, MAP_SHARED, fileno(f), 0);
280
281 FAISS_THROW_IF_NOT_FMT(
282 ptro != MAP_FAILED,
283 "could not mmap %s: %s",
284 filename.c_str(),
285 strerror(errno));
286 ptr = ptro;
287 fclose(f);
288 }
289
update_totsize(size_t new_size)290 void OnDiskInvertedLists::update_totsize(size_t new_size) {
291 // unmap file
292 if (ptr != nullptr) {
293 int err = munmap(ptr, totsize);
294 FAISS_THROW_IF_NOT_FMT(err == 0, "munmap error: %s", strerror(errno));
295 }
296 if (totsize == 0) {
297 // must create file before truncating it
298 FILE* f = fopen(filename.c_str(), "w");
299 FAISS_THROW_IF_NOT_FMT(
300 f,
301 "could not open %s in mode W: %s",
302 filename.c_str(),
303 strerror(errno));
304 fclose(f);
305 }
306
307 if (new_size > totsize) {
308 if (!slots.empty() &&
309 slots.back().offset + slots.back().capacity == totsize) {
310 slots.back().capacity += new_size - totsize;
311 } else {
312 slots.push_back(Slot(totsize, new_size - totsize));
313 }
314 } else {
315 assert(!"not implemented");
316 }
317
318 totsize = new_size;
319
320 // create file
321 printf("resizing %s to %zd bytes\n", filename.c_str(), totsize);
322
323 int err = truncate(filename.c_str(), totsize);
324
325 FAISS_THROW_IF_NOT_FMT(
326 err == 0,
327 "truncate %s to %ld: %s",
328 filename.c_str(),
329 totsize,
330 strerror(errno));
331 do_mmap();
332 }
333
334 /**********************************************
335 * OnDiskInvertedLists
336 **********************************************/
337
338 #define INVALID_OFFSET (size_t)(-1)
339
OnDiskOneList()340 OnDiskOneList::OnDiskOneList() : size(0), capacity(0), offset(INVALID_OFFSET) {}
341
Slot(size_t offset,size_t capacity)342 OnDiskInvertedLists::Slot::Slot(size_t offset, size_t capacity)
343 : offset(offset), capacity(capacity) {}
344
Slot()345 OnDiskInvertedLists::Slot::Slot() : offset(0), capacity(0) {}
346
OnDiskInvertedLists(size_t nlist,size_t code_size,const char * filename)347 OnDiskInvertedLists::OnDiskInvertedLists(
348 size_t nlist,
349 size_t code_size,
350 const char* filename)
351 : InvertedLists(nlist, code_size),
352 filename(filename),
353 totsize(0),
354 ptr(nullptr),
355 read_only(false),
356 locks(new LockLevels()),
357 pf(new OngoingPrefetch(this)),
358 prefetch_nthread(32) {
359 lists.resize(nlist);
360
361 // slots starts empty
362 }
363
OnDiskInvertedLists()364 OnDiskInvertedLists::OnDiskInvertedLists() : OnDiskInvertedLists(0, 0, "") {}
365
~OnDiskInvertedLists()366 OnDiskInvertedLists::~OnDiskInvertedLists() {
367 delete pf;
368
369 // unmap all lists
370 if (ptr != nullptr) {
371 int err = munmap(ptr, totsize);
372 if (err != 0) {
373 fprintf(stderr, "mumap error: %s", strerror(errno));
374 }
375 }
376 delete locks;
377 }
378
list_size(size_t list_no) const379 size_t OnDiskInvertedLists::list_size(size_t list_no) const {
380 return lists[list_no].size;
381 }
382
get_codes(size_t list_no) const383 const uint8_t* OnDiskInvertedLists::get_codes(size_t list_no) const {
384 if (lists[list_no].offset == INVALID_OFFSET) {
385 return nullptr;
386 }
387
388 return ptr + lists[list_no].offset;
389 }
390
get_ids(size_t list_no) const391 const Index::idx_t* OnDiskInvertedLists::get_ids(size_t list_no) const {
392 if (lists[list_no].offset == INVALID_OFFSET) {
393 return nullptr;
394 }
395
396 return (
397 const idx_t*)(ptr + lists[list_no].offset + code_size * lists[list_no].capacity);
398 }
399
update_entries(size_t list_no,size_t offset,size_t n_entry,const idx_t * ids_in,const uint8_t * codes_in)400 void OnDiskInvertedLists::update_entries(
401 size_t list_no,
402 size_t offset,
403 size_t n_entry,
404 const idx_t* ids_in,
405 const uint8_t* codes_in) {
406 FAISS_THROW_IF_NOT(!read_only);
407 if (n_entry == 0)
408 return;
409 const List& l = lists[list_no];
410 assert(n_entry + offset <= l.size);
411 idx_t* ids = const_cast<idx_t*>(get_ids(list_no));
412 memcpy(ids + offset, ids_in, sizeof(ids_in[0]) * n_entry);
413 uint8_t* codes = const_cast<uint8_t*>(get_codes(list_no));
414 memcpy(codes + offset * code_size, codes_in, code_size * n_entry);
415 }
416
add_entries(size_t list_no,size_t n_entry,const idx_t * ids,const uint8_t * code)417 size_t OnDiskInvertedLists::add_entries(
418 size_t list_no,
419 size_t n_entry,
420 const idx_t* ids,
421 const uint8_t* code) {
422 FAISS_THROW_IF_NOT(!read_only);
423 locks->lock_1(list_no);
424 size_t o = list_size(list_no);
425 resize_locked(list_no, n_entry + o);
426 update_entries(list_no, o, n_entry, ids, code);
427 locks->unlock_1(list_no);
428 return o;
429 }
430
resize(size_t list_no,size_t new_size)431 void OnDiskInvertedLists::resize(size_t list_no, size_t new_size) {
432 FAISS_THROW_IF_NOT(!read_only);
433 locks->lock_1(list_no);
434 resize_locked(list_no, new_size);
435 locks->unlock_1(list_no);
436 }
437
resize_locked(size_t list_no,size_t new_size)438 void OnDiskInvertedLists::resize_locked(size_t list_no, size_t new_size) {
439 List& l = lists[list_no];
440
441 if (new_size <= l.capacity && new_size > l.capacity / 2) {
442 l.size = new_size;
443 return;
444 }
445
446 // otherwise we release the current slot, and find a new one
447
448 locks->lock_2();
449 free_slot(l.offset, l.capacity);
450
451 List new_l;
452
453 if (new_size == 0) {
454 new_l = List();
455 } else {
456 new_l.size = new_size;
457 new_l.capacity = 1;
458 while (new_l.capacity < new_size) {
459 new_l.capacity *= 2;
460 }
461 new_l.offset =
462 allocate_slot(new_l.capacity * (sizeof(idx_t) + code_size));
463 }
464
465 // copy common data
466 if (l.offset != new_l.offset) {
467 size_t n = std::min(new_size, l.size);
468 if (n > 0) {
469 memcpy(ptr + new_l.offset, get_codes(list_no), n * code_size);
470 memcpy(ptr + new_l.offset + new_l.capacity * code_size,
471 get_ids(list_no),
472 n * sizeof(idx_t));
473 }
474 }
475
476 lists[list_no] = new_l;
477 locks->unlock_2();
478 }
479
allocate_slot(size_t capacity)480 size_t OnDiskInvertedLists::allocate_slot(size_t capacity) {
481 // should hold lock2
482
483 auto it = slots.begin();
484 while (it != slots.end() && it->capacity < capacity) {
485 it++;
486 }
487
488 if (it == slots.end()) {
489 // not enough capacity
490 size_t new_size = totsize == 0 ? 32 : totsize * 2;
491 while (new_size - totsize < capacity) {
492 new_size *= 2;
493 }
494 locks->lock_3();
495 update_totsize(new_size);
496 locks->unlock_3();
497 it = slots.begin();
498 while (it != slots.end() && it->capacity < capacity) {
499 it++;
500 }
501 assert(it != slots.end());
502 }
503
504 size_t o = it->offset;
505 if (it->capacity == capacity) {
506 slots.erase(it);
507 } else {
508 // take from beginning of slot
509 it->capacity -= capacity;
510 it->offset += capacity;
511 }
512
513 return o;
514 }
515
free_slot(size_t offset,size_t capacity)516 void OnDiskInvertedLists::free_slot(size_t offset, size_t capacity) {
517 // should hold lock2
518 if (capacity == 0)
519 return;
520
521 auto it = slots.begin();
522 while (it != slots.end() && it->offset <= offset) {
523 it++;
524 }
525
526 size_t inf = 1UL << 60;
527
528 size_t end_prev = inf;
529 if (it != slots.begin()) {
530 auto prev = it;
531 prev--;
532 end_prev = prev->offset + prev->capacity;
533 }
534
535 size_t begin_next = 1L << 60;
536 if (it != slots.end()) {
537 begin_next = it->offset;
538 }
539
540 assert(end_prev == inf || offset >= end_prev);
541 assert(offset + capacity <= begin_next);
542
543 if (offset == end_prev) {
544 auto prev = it;
545 prev--;
546 if (offset + capacity == begin_next) {
547 prev->capacity += capacity + it->capacity;
548 slots.erase(it);
549 } else {
550 prev->capacity += capacity;
551 }
552 } else {
553 if (offset + capacity == begin_next) {
554 it->offset -= capacity;
555 it->capacity += capacity;
556 } else {
557 slots.insert(it, Slot(offset, capacity));
558 }
559 }
560
561 // TODO shrink global storage if needed
562 }
563
564 /*****************************************
565 * Compact form
566 *****************************************/
567
merge_from(const InvertedLists ** ils,int n_il,bool verbose)568 size_t OnDiskInvertedLists::merge_from(
569 const InvertedLists** ils,
570 int n_il,
571 bool verbose) {
572 FAISS_THROW_IF_NOT_MSG(
573 totsize == 0, "works only on an empty InvertedLists");
574
575 std::vector<size_t> sizes(nlist);
576 for (int i = 0; i < n_il; i++) {
577 const InvertedLists* il = ils[i];
578 FAISS_THROW_IF_NOT(il->nlist == nlist && il->code_size == code_size);
579
580 for (size_t j = 0; j < nlist; j++) {
581 sizes[j] += il->list_size(j);
582 }
583 }
584
585 size_t cums = 0;
586 size_t ntotal = 0;
587 for (size_t j = 0; j < nlist; j++) {
588 ntotal += sizes[j];
589 lists[j].size = 0;
590 lists[j].capacity = sizes[j];
591 lists[j].offset = cums;
592 cums += lists[j].capacity * (sizeof(idx_t) + code_size);
593 }
594
595 update_totsize(cums);
596
597 size_t nmerged = 0;
598 double t0 = getmillisecs(), last_t = t0;
599
600 #pragma omp parallel for
601 for (size_t j = 0; j < nlist; j++) {
602 List& l = lists[j];
603 for (int i = 0; i < n_il; i++) {
604 const InvertedLists* il = ils[i];
605 size_t n_entry = il->list_size(j);
606 l.size += n_entry;
607 update_entries(
608 j,
609 l.size - n_entry,
610 n_entry,
611 ScopedIds(il, j).get(),
612 ScopedCodes(il, j).get());
613 }
614 assert(l.size == l.capacity);
615 if (verbose) {
616 #pragma omp critical
617 {
618 nmerged++;
619 double t1 = getmillisecs();
620 if (t1 - last_t > 500) {
621 printf("merged %zd lists in %.3f s\r",
622 nmerged,
623 (t1 - t0) / 1000.0);
624 fflush(stdout);
625 last_t = t1;
626 }
627 }
628 }
629 }
630 if (verbose) {
631 printf("\n");
632 }
633
634 return ntotal;
635 }
636
merge_from_1(const InvertedLists * ils,bool verbose)637 size_t OnDiskInvertedLists::merge_from_1(
638 const InvertedLists* ils,
639 bool verbose) {
640 return merge_from(&ils, 1, verbose);
641 }
642
crop_invlists(size_t l0,size_t l1)643 void OnDiskInvertedLists::crop_invlists(size_t l0, size_t l1) {
644 FAISS_THROW_IF_NOT(0 <= l0 && l0 <= l1 && l1 <= nlist);
645
646 std::vector<List> new_lists(l1 - l0);
647 memcpy(new_lists.data(), &lists[l0], (l1 - l0) * sizeof(List));
648
649 lists.swap(new_lists);
650
651 nlist = l1 - l0;
652 }
653
set_all_lists_sizes(const size_t * sizes)654 void OnDiskInvertedLists::set_all_lists_sizes(const size_t* sizes) {
655 size_t ofs = 0;
656 for (size_t i = 0; i < nlist; i++) {
657 lists[i].offset = ofs;
658 lists[i].capacity = lists[i].size = sizes[i];
659 ofs += sizes[i] * (sizeof(idx_t) + code_size);
660 }
661 }
662
663 /*******************************************************
664 * I/O support via callbacks
665 *******************************************************/
666
OnDiskInvertedListsIOHook()667 OnDiskInvertedListsIOHook::OnDiskInvertedListsIOHook()
668 : InvertedListsIOHook("ilod", typeid(OnDiskInvertedLists).name()) {}
669
write(const InvertedLists * ils,IOWriter * f) const670 void OnDiskInvertedListsIOHook::write(const InvertedLists* ils, IOWriter* f)
671 const {
672 uint32_t h = fourcc("ilod");
673 WRITE1(h);
674 WRITE1(ils->nlist);
675 WRITE1(ils->code_size);
676 const OnDiskInvertedLists* od =
677 dynamic_cast<const OnDiskInvertedLists*>(ils);
678 // this is a POD object
679 WRITEVECTOR(od->lists);
680
681 {
682 std::vector<OnDiskInvertedLists::Slot> v(
683 od->slots.begin(), od->slots.end());
684 WRITEVECTOR(v);
685 }
686 {
687 std::vector<char> x(od->filename.begin(), od->filename.end());
688 WRITEVECTOR(x);
689 }
690 WRITE1(od->totsize);
691 }
692
read(IOReader * f,int io_flags) const693 InvertedLists* OnDiskInvertedListsIOHook::read(IOReader* f, int io_flags)
694 const {
695 OnDiskInvertedLists* od = new OnDiskInvertedLists();
696 od->read_only = io_flags & IO_FLAG_READ_ONLY;
697 READ1(od->nlist);
698 READ1(od->code_size);
699 // this is a POD object
700 READVECTOR(od->lists);
701 {
702 std::vector<OnDiskInvertedLists::Slot> v;
703 READVECTOR(v);
704 od->slots.assign(v.begin(), v.end());
705 }
706 {
707 std::vector<char> x;
708 READVECTOR(x);
709 od->filename.assign(x.begin(), x.end());
710
711 if (io_flags & IO_FLAG_ONDISK_SAME_DIR) {
712 FileIOReader* reader = dynamic_cast<FileIOReader*>(f);
713 FAISS_THROW_IF_NOT_MSG(
714 reader,
715 "IO_FLAG_ONDISK_SAME_DIR only supported "
716 "when reading from file");
717 std::string indexname = reader->name;
718 std::string dirname = "./";
719 size_t slash = indexname.find_last_of('/');
720 if (slash != std::string::npos) {
721 dirname = indexname.substr(0, slash + 1);
722 }
723 std::string filename = od->filename;
724 slash = filename.find_last_of('/');
725 if (slash != std::string::npos) {
726 filename = filename.substr(slash + 1);
727 }
728 filename = dirname + filename;
729 printf("IO_FLAG_ONDISK_SAME_DIR: "
730 "updating ondisk filename from %s to %s\n",
731 od->filename.c_str(),
732 filename.c_str());
733 od->filename = filename;
734 }
735 }
736 READ1(od->totsize);
737 if (!(io_flags & IO_FLAG_SKIP_IVF_DATA)) {
738 od->do_mmap();
739 }
740 return od;
741 }
742
743 /** read from a ArrayInvertedLists into this invertedlist type */
read_ArrayInvertedLists(IOReader * f,int,size_t nlist,size_t code_size,const std::vector<size_t> & sizes) const744 InvertedLists* OnDiskInvertedListsIOHook::read_ArrayInvertedLists(
745 IOReader* f,
746 int /* io_flags */,
747 size_t nlist,
748 size_t code_size,
749 const std::vector<size_t>& sizes) const {
750 auto ails = new OnDiskInvertedLists();
751 ails->nlist = nlist;
752 ails->code_size = code_size;
753 ails->read_only = true;
754 ails->lists.resize(nlist);
755
756 FileIOReader* reader = dynamic_cast<FileIOReader*>(f);
757 FAISS_THROW_IF_NOT_MSG(reader, "mmap only supported for File objects");
758 FILE* fdesc = reader->f;
759 size_t o0 = ftell(fdesc);
760 size_t o = o0;
761 { // do the mmap
762 struct stat buf;
763 int ret = fstat(fileno(fdesc), &buf);
764 FAISS_THROW_IF_NOT_FMT(ret == 0, "fstat failed: %s", strerror(errno));
765 ails->totsize = buf.st_size;
766 ails->ptr = (uint8_t*)mmap(
767 nullptr,
768 ails->totsize,
769 PROT_READ,
770 MAP_SHARED,
771 fileno(fdesc),
772 0);
773 FAISS_THROW_IF_NOT_FMT(
774 ails->ptr != MAP_FAILED, "could not mmap: %s", strerror(errno));
775 }
776
777 FAISS_THROW_IF_NOT(o <= ails->totsize);
778
779 for (size_t i = 0; i < ails->nlist; i++) {
780 OnDiskInvertedLists::List& l = ails->lists[i];
781 l.size = l.capacity = sizes[i];
782 l.offset = o;
783 o += l.size * (sizeof(OnDiskInvertedLists::idx_t) + ails->code_size);
784 }
785 // resume normal reading of file
786 fseek(fdesc, o, SEEK_SET);
787
788 return ails;
789 }
790
791 } // namespace faiss
792