1 /**
2  * Copyright (c) Facebook, Inc. and its affiliates.
3  *
4  * This source code is licensed under the MIT license found in the
5  * LICENSE file in the root directory of this source tree.
6  */
7 
8 // -*- c++ -*-
9 
10 #ifndef FAISS_ON_DISK_INVERTED_LISTS_H
11 #define FAISS_ON_DISK_INVERTED_LISTS_H
12 
13 #include <list>
14 #include <typeinfo>
15 #include <vector>
16 
17 #include <faiss/IndexIVF.h>
18 #include <faiss/index_io.h>
19 #include <faiss/invlists/InvertedListsIOHook.h>
20 
21 namespace faiss {
22 
23 struct LockLevels;
24 
25 struct OnDiskOneList {
26     size_t size;     // size of inverted list (entries)
27     size_t capacity; // allocated size (entries)
28     size_t offset;   // offset in buffer (bytes)
29     OnDiskOneList();
30 };
31 
32 /** On-disk storage of inverted lists.
33  *
34  * The data is stored in a mmapped chunk of memory (base ptointer ptr,
35  * size totsize). Each list is a range of memory that contains (object
36  * List) that contains:
37  *
38  * - uint8_t codes[capacity * code_size]
39  * - followed by idx_t ids[capacity]
40  *
41  * in each of the arrays, the size <= capacity first elements are
42  * used, the rest is not initialized.
43  *
44  * Addition and resize are supported by:
45  * - roundind up the capacity of the lists to a power of two
46  * - maintaining a list of empty slots, sorted by size.
47  * - resizing the mmapped block is adjusted as needed.
48  *
49  * An OnDiskInvertedLists is compact if the size == capacity for all
50  * lists and there are no available slots.
51  *
52  * Addition to the invlists is slow. For incremental add it is better
53  * to use a default ArrayInvertedLists object and convert it to an
54  * OnDisk with merge_from.
55  *
56  * When it is known that a set of lists will be accessed, it is useful
57  * to call prefetch_lists, that launches a set of threads to read the
58  * lists in parallel.
59  */
60 struct OnDiskInvertedLists : InvertedLists {
61     using List = OnDiskOneList;
62 
63     // size nlist
64     std::vector<List> lists;
65 
66     struct Slot {
67         size_t offset;   // bytes
68         size_t capacity; // bytes
69         Slot(size_t offset, size_t capacity);
70         Slot();
71     };
72 
73     // size whatever space remains
74     std::list<Slot> slots;
75 
76     std::string filename;
77     size_t totsize;
78     uint8_t* ptr;   // mmap base pointer
79     bool read_only; /// are inverted lists mapped read-only
80 
81     OnDiskInvertedLists(size_t nlist, size_t code_size, const char* filename);
82 
83     size_t list_size(size_t list_no) const override;
84     const uint8_t* get_codes(size_t list_no) const override;
85     const idx_t* get_ids(size_t list_no) const override;
86 
87     size_t add_entries(
88             size_t list_no,
89             size_t n_entry,
90             const idx_t* ids,
91             const uint8_t* code) override;
92 
93     void update_entries(
94             size_t list_no,
95             size_t offset,
96             size_t n_entry,
97             const idx_t* ids,
98             const uint8_t* code) override;
99 
100     void resize(size_t list_no, size_t new_size) override;
101 
102     // copy all inverted lists into *this, in compact form (without
103     // allocating slots)
104     size_t merge_from(
105             const InvertedLists** ils,
106             int n_il,
107             bool verbose = false);
108 
109     /// same as merge_from for a single invlist
110     size_t merge_from_1(const InvertedLists* il, bool verbose = false);
111 
112     /// restrict the inverted lists to l0:l1 without touching the mmapped region
113     void crop_invlists(size_t l0, size_t l1);
114 
115     void prefetch_lists(const idx_t* list_nos, int nlist) const override;
116 
117     ~OnDiskInvertedLists() override;
118 
119     // private
120 
121     LockLevels* locks;
122 
123     // encapsulates the threads that are busy prefeteching
124     struct OngoingPrefetch;
125     OngoingPrefetch* pf;
126     int prefetch_nthread;
127 
128     void do_mmap();
129     void update_totsize(size_t new_totsize);
130     void resize_locked(size_t list_no, size_t new_size);
131     size_t allocate_slot(size_t capacity);
132     void free_slot(size_t offset, size_t capacity);
133 
134     /// override all list sizes and make a packed storage
135     void set_all_lists_sizes(const size_t* sizes);
136 
137     // empty constructor for the I/O functions
138     OnDiskInvertedLists();
139 };
140 
141 struct OnDiskInvertedListsIOHook : InvertedListsIOHook {
142     OnDiskInvertedListsIOHook();
143     void write(const InvertedLists* ils, IOWriter* f) const override;
144     InvertedLists* read(IOReader* f, int io_flags) const override;
145     InvertedLists* read_ArrayInvertedLists(
146             IOReader* f,
147             int io_flags,
148             size_t nlist,
149             size_t code_size,
150             const std::vector<size_t>& sizes) const override;
151 };
152 
153 } // namespace faiss
154 
155 #endif
156