1 /** 2 * Copyright (c) Facebook, Inc. and its affiliates. 3 * 4 * This source code is licensed under the MIT license found in the 5 * LICENSE file in the root directory of this source tree. 6 */ 7 8 // -*- c++ -*- 9 10 #ifndef FAISS_ON_DISK_INVERTED_LISTS_H 11 #define FAISS_ON_DISK_INVERTED_LISTS_H 12 13 #include <list> 14 #include <typeinfo> 15 #include <vector> 16 17 #include <faiss/IndexIVF.h> 18 #include <faiss/index_io.h> 19 #include <faiss/invlists/InvertedListsIOHook.h> 20 21 namespace faiss { 22 23 struct LockLevels; 24 25 struct OnDiskOneList { 26 size_t size; // size of inverted list (entries) 27 size_t capacity; // allocated size (entries) 28 size_t offset; // offset in buffer (bytes) 29 OnDiskOneList(); 30 }; 31 32 /** On-disk storage of inverted lists. 33 * 34 * The data is stored in a mmapped chunk of memory (base ptointer ptr, 35 * size totsize). Each list is a range of memory that contains (object 36 * List) that contains: 37 * 38 * - uint8_t codes[capacity * code_size] 39 * - followed by idx_t ids[capacity] 40 * 41 * in each of the arrays, the size <= capacity first elements are 42 * used, the rest is not initialized. 43 * 44 * Addition and resize are supported by: 45 * - roundind up the capacity of the lists to a power of two 46 * - maintaining a list of empty slots, sorted by size. 47 * - resizing the mmapped block is adjusted as needed. 48 * 49 * An OnDiskInvertedLists is compact if the size == capacity for all 50 * lists and there are no available slots. 51 * 52 * Addition to the invlists is slow. For incremental add it is better 53 * to use a default ArrayInvertedLists object and convert it to an 54 * OnDisk with merge_from. 55 * 56 * When it is known that a set of lists will be accessed, it is useful 57 * to call prefetch_lists, that launches a set of threads to read the 58 * lists in parallel. 59 */ 60 struct OnDiskInvertedLists : InvertedLists { 61 using List = OnDiskOneList; 62 63 // size nlist 64 std::vector<List> lists; 65 66 struct Slot { 67 size_t offset; // bytes 68 size_t capacity; // bytes 69 Slot(size_t offset, size_t capacity); 70 Slot(); 71 }; 72 73 // size whatever space remains 74 std::list<Slot> slots; 75 76 std::string filename; 77 size_t totsize; 78 uint8_t* ptr; // mmap base pointer 79 bool read_only; /// are inverted lists mapped read-only 80 81 OnDiskInvertedLists(size_t nlist, size_t code_size, const char* filename); 82 83 size_t list_size(size_t list_no) const override; 84 const uint8_t* get_codes(size_t list_no) const override; 85 const idx_t* get_ids(size_t list_no) const override; 86 87 size_t add_entries( 88 size_t list_no, 89 size_t n_entry, 90 const idx_t* ids, 91 const uint8_t* code) override; 92 93 void update_entries( 94 size_t list_no, 95 size_t offset, 96 size_t n_entry, 97 const idx_t* ids, 98 const uint8_t* code) override; 99 100 void resize(size_t list_no, size_t new_size) override; 101 102 // copy all inverted lists into *this, in compact form (without 103 // allocating slots) 104 size_t merge_from( 105 const InvertedLists** ils, 106 int n_il, 107 bool verbose = false); 108 109 /// same as merge_from for a single invlist 110 size_t merge_from_1(const InvertedLists* il, bool verbose = false); 111 112 /// restrict the inverted lists to l0:l1 without touching the mmapped region 113 void crop_invlists(size_t l0, size_t l1); 114 115 void prefetch_lists(const idx_t* list_nos, int nlist) const override; 116 117 ~OnDiskInvertedLists() override; 118 119 // private 120 121 LockLevels* locks; 122 123 // encapsulates the threads that are busy prefeteching 124 struct OngoingPrefetch; 125 OngoingPrefetch* pf; 126 int prefetch_nthread; 127 128 void do_mmap(); 129 void update_totsize(size_t new_totsize); 130 void resize_locked(size_t list_no, size_t new_size); 131 size_t allocate_slot(size_t capacity); 132 void free_slot(size_t offset, size_t capacity); 133 134 /// override all list sizes and make a packed storage 135 void set_all_lists_sizes(const size_t* sizes); 136 137 // empty constructor for the I/O functions 138 OnDiskInvertedLists(); 139 }; 140 141 struct OnDiskInvertedListsIOHook : InvertedListsIOHook { 142 OnDiskInvertedListsIOHook(); 143 void write(const InvertedLists* ils, IOWriter* f) const override; 144 InvertedLists* read(IOReader* f, int io_flags) const override; 145 InvertedLists* read_ArrayInvertedLists( 146 IOReader* f, 147 int io_flags, 148 size_t nlist, 149 size_t code_size, 150 const std::vector<size_t>& sizes) const override; 151 }; 152 153 } // namespace faiss 154 155 #endif 156