1 // Copyright (c) 2018 The Bitcoin Core developers
2 // Distributed under the MIT software license, see the accompanying
3 // file COPYING or http://www.opensource.org/licenses/mit-license.php.
4 
5 #include <map>
6 
7 #include <dbwrapper.h>
8 #include <index/blockfilterindex.h>
9 #include <util/system.h>
10 #include <validation.h>
11 
12 /* The index database stores three items for each block: the disk location of the encoded filter,
13  * its dSHA256 hash, and the header. Those belonging to blocks on the active chain are indexed by
14  * height, and those belonging to blocks that have been reorganized out of the active chain are
15  * indexed by block hash. This ensures that filter data for any block that becomes part of the
16  * active chain can always be retrieved, alleviating timing concerns.
17  *
18  * The filters themselves are stored in flat files and referenced by the LevelDB entries. This
19  * minimizes the amount of data written to LevelDB and keeps the database values constant size. The
20  * disk location of the next block filter to be written (represented as a FlatFilePos) is stored
21  * under the DB_FILTER_POS key.
22  *
23  * Keys for the height index have the type [DB_BLOCK_HEIGHT, uint32 (BE)]. The height is represented
24  * as big-endian so that sequential reads of filters by height are fast.
25  * Keys for the hash index have the type [DB_BLOCK_HASH, uint256].
26  */
27 constexpr char DB_BLOCK_HASH = 's';
28 constexpr char DB_BLOCK_HEIGHT = 't';
29 constexpr char DB_FILTER_POS = 'P';
30 
31 constexpr unsigned int MAX_FLTR_FILE_SIZE = 0x1000000; // 16 MiB
32 /** The pre-allocation chunk size for fltr?????.dat files */
33 constexpr unsigned int FLTR_FILE_CHUNK_SIZE = 0x100000; // 1 MiB
34 /** Maximum size of the cfheaders cache
35  *  We have a limit to prevent a bug in filling this cache
36  *  potentially turning into an OOM. At 2000 entries, this cache
37  *  is big enough for a 2,000,000 length block chain, which
38  *  we should be enough until ~2047. */
39 constexpr size_t CF_HEADERS_CACHE_MAX_SZ{2000};
40 
41 namespace {
42 
43 struct DBVal {
44     uint256 hash;
45     uint256 header;
46     FlatFilePos pos;
47 
SERIALIZE_METHODS__anon905aa7fc0111::DBVal48     SERIALIZE_METHODS(DBVal, obj) { READWRITE(obj.hash, obj.header, obj.pos); }
49 };
50 
51 struct DBHeightKey {
52     int height;
53 
DBHeightKey__anon905aa7fc0111::DBHeightKey54     DBHeightKey() : height(0) {}
DBHeightKey__anon905aa7fc0111::DBHeightKey55     explicit DBHeightKey(int height_in) : height(height_in) {}
56 
57     template<typename Stream>
Serialize__anon905aa7fc0111::DBHeightKey58     void Serialize(Stream& s) const
59     {
60         ser_writedata8(s, DB_BLOCK_HEIGHT);
61         ser_writedata32be(s, height);
62     }
63 
64     template<typename Stream>
Unserialize__anon905aa7fc0111::DBHeightKey65     void Unserialize(Stream& s)
66     {
67         char prefix = ser_readdata8(s);
68         if (prefix != DB_BLOCK_HEIGHT) {
69             throw std::ios_base::failure("Invalid format for block filter index DB height key");
70         }
71         height = ser_readdata32be(s);
72     }
73 };
74 
75 struct DBHashKey {
76     uint256 hash;
77 
DBHashKey__anon905aa7fc0111::DBHashKey78     explicit DBHashKey(const uint256& hash_in) : hash(hash_in) {}
79 
SERIALIZE_METHODS__anon905aa7fc0111::DBHashKey80     SERIALIZE_METHODS(DBHashKey, obj) {
81         char prefix = DB_BLOCK_HASH;
82         READWRITE(prefix);
83         if (prefix != DB_BLOCK_HASH) {
84             throw std::ios_base::failure("Invalid format for block filter index DB hash key");
85         }
86 
87         READWRITE(obj.hash);
88     }
89 };
90 
91 }; // namespace
92 
93 static std::map<BlockFilterType, BlockFilterIndex> g_filter_indexes;
94 
BlockFilterIndex(BlockFilterType filter_type,size_t n_cache_size,bool f_memory,bool f_wipe)95 BlockFilterIndex::BlockFilterIndex(BlockFilterType filter_type,
96                                    size_t n_cache_size, bool f_memory, bool f_wipe)
97     : m_filter_type(filter_type)
98 {
99     const std::string& filter_name = BlockFilterTypeName(filter_type);
100     if (filter_name.empty()) throw std::invalid_argument("unknown filter_type");
101 
102     fs::path path = GetDataDir() / "indexes" / "blockfilter" / filter_name;
103     fs::create_directories(path);
104 
105     m_name = filter_name + " block filter index";
106     m_db = MakeUnique<BaseIndex::DB>(path / "db", n_cache_size, f_memory, f_wipe);
107     m_filter_fileseq = MakeUnique<FlatFileSeq>(std::move(path), "fltr", FLTR_FILE_CHUNK_SIZE);
108 }
109 
Init()110 bool BlockFilterIndex::Init()
111 {
112     if (!m_db->Read(DB_FILTER_POS, m_next_filter_pos)) {
113         // Check that the cause of the read failure is that the key does not exist. Any other errors
114         // indicate database corruption or a disk failure, and starting the index would cause
115         // further corruption.
116         if (m_db->Exists(DB_FILTER_POS)) {
117             return error("%s: Cannot read current %s state; index may be corrupted",
118                          __func__, GetName());
119         }
120 
121         // If the DB_FILTER_POS is not set, then initialize to the first location.
122         m_next_filter_pos.nFile = 0;
123         m_next_filter_pos.nPos = 0;
124     }
125     return BaseIndex::Init();
126 }
127 
CommitInternal(CDBBatch & batch)128 bool BlockFilterIndex::CommitInternal(CDBBatch& batch)
129 {
130     const FlatFilePos& pos = m_next_filter_pos;
131 
132     // Flush current filter file to disk.
133     CAutoFile file(m_filter_fileseq->Open(pos), SER_DISK, CLIENT_VERSION);
134     if (file.IsNull()) {
135         return error("%s: Failed to open filter file %d", __func__, pos.nFile);
136     }
137     if (!FileCommit(file.Get())) {
138         return error("%s: Failed to commit filter file %d", __func__, pos.nFile);
139     }
140 
141     batch.Write(DB_FILTER_POS, pos);
142     return BaseIndex::CommitInternal(batch);
143 }
144 
ReadFilterFromDisk(const FlatFilePos & pos,BlockFilter & filter) const145 bool BlockFilterIndex::ReadFilterFromDisk(const FlatFilePos& pos, BlockFilter& filter) const
146 {
147     CAutoFile filein(m_filter_fileseq->Open(pos, true), SER_DISK, CLIENT_VERSION);
148     if (filein.IsNull()) {
149         return false;
150     }
151 
152     uint256 block_hash;
153     std::vector<unsigned char> encoded_filter;
154     try {
155         filein >> block_hash >> encoded_filter;
156         filter = BlockFilter(GetFilterType(), block_hash, std::move(encoded_filter));
157     }
158     catch (const std::exception& e) {
159         return error("%s: Failed to deserialize block filter from disk: %s", __func__, e.what());
160     }
161 
162     return true;
163 }
164 
WriteFilterToDisk(FlatFilePos & pos,const BlockFilter & filter)165 size_t BlockFilterIndex::WriteFilterToDisk(FlatFilePos& pos, const BlockFilter& filter)
166 {
167     assert(filter.GetFilterType() == GetFilterType());
168 
169     size_t data_size =
170         GetSerializeSize(filter.GetBlockHash(), CLIENT_VERSION) +
171         GetSerializeSize(filter.GetEncodedFilter(), CLIENT_VERSION);
172 
173     // If writing the filter would overflow the file, flush and move to the next one.
174     if (pos.nPos + data_size > MAX_FLTR_FILE_SIZE) {
175         CAutoFile last_file(m_filter_fileseq->Open(pos), SER_DISK, CLIENT_VERSION);
176         if (last_file.IsNull()) {
177             LogPrintf("%s: Failed to open filter file %d\n", __func__, pos.nFile);
178             return 0;
179         }
180         if (!TruncateFile(last_file.Get(), pos.nPos)) {
181             LogPrintf("%s: Failed to truncate filter file %d\n", __func__, pos.nFile);
182             return 0;
183         }
184         if (!FileCommit(last_file.Get())) {
185             LogPrintf("%s: Failed to commit filter file %d\n", __func__, pos.nFile);
186             return 0;
187         }
188 
189         pos.nFile++;
190         pos.nPos = 0;
191     }
192 
193     // Pre-allocate sufficient space for filter data.
194     bool out_of_space;
195     m_filter_fileseq->Allocate(pos, data_size, out_of_space);
196     if (out_of_space) {
197         LogPrintf("%s: out of disk space\n", __func__);
198         return 0;
199     }
200 
201     CAutoFile fileout(m_filter_fileseq->Open(pos), SER_DISK, CLIENT_VERSION);
202     if (fileout.IsNull()) {
203         LogPrintf("%s: Failed to open filter file %d\n", __func__, pos.nFile);
204         return 0;
205     }
206 
207     fileout << filter.GetBlockHash() << filter.GetEncodedFilter();
208     return data_size;
209 }
210 
WriteBlock(const CBlock & block,const CBlockIndex * pindex)211 bool BlockFilterIndex::WriteBlock(const CBlock& block, const CBlockIndex* pindex)
212 {
213     CBlockUndo block_undo;
214     uint256 prev_header;
215 
216     if (pindex->nHeight > 0) {
217         if (!UndoReadFromDisk(block_undo, pindex)) {
218             return false;
219         }
220 
221         std::pair<uint256, DBVal> read_out;
222         if (!m_db->Read(DBHeightKey(pindex->nHeight - 1), read_out)) {
223             return false;
224         }
225 
226         uint256 expected_block_hash = pindex->pprev->GetBlockHash();
227         if (read_out.first != expected_block_hash) {
228             return error("%s: previous block header belongs to unexpected block %s; expected %s",
229                          __func__, read_out.first.ToString(), expected_block_hash.ToString());
230         }
231 
232         prev_header = read_out.second.header;
233     }
234 
235     BlockFilter filter(m_filter_type, block, block_undo);
236 
237     size_t bytes_written = WriteFilterToDisk(m_next_filter_pos, filter);
238     if (bytes_written == 0) return false;
239 
240     std::pair<uint256, DBVal> value;
241     value.first = pindex->GetBlockHash();
242     value.second.hash = filter.GetHash();
243     value.second.header = filter.ComputeHeader(prev_header);
244     value.second.pos = m_next_filter_pos;
245 
246     if (!m_db->Write(DBHeightKey(pindex->nHeight), value)) {
247         return false;
248     }
249 
250     m_next_filter_pos.nPos += bytes_written;
251     return true;
252 }
253 
CopyHeightIndexToHashIndex(CDBIterator & db_it,CDBBatch & batch,const std::string & index_name,int start_height,int stop_height)254 static bool CopyHeightIndexToHashIndex(CDBIterator& db_it, CDBBatch& batch,
255                                        const std::string& index_name,
256                                        int start_height, int stop_height)
257 {
258     DBHeightKey key(start_height);
259     db_it.Seek(key);
260 
261     for (int height = start_height; height <= stop_height; ++height) {
262         if (!db_it.GetKey(key) || key.height != height) {
263             return error("%s: unexpected key in %s: expected (%c, %d)",
264                          __func__, index_name, DB_BLOCK_HEIGHT, height);
265         }
266 
267         std::pair<uint256, DBVal> value;
268         if (!db_it.GetValue(value)) {
269             return error("%s: unable to read value in %s at key (%c, %d)",
270                          __func__, index_name, DB_BLOCK_HEIGHT, height);
271         }
272 
273         batch.Write(DBHashKey(value.first), std::move(value.second));
274 
275         db_it.Next();
276     }
277     return true;
278 }
279 
Rewind(const CBlockIndex * current_tip,const CBlockIndex * new_tip)280 bool BlockFilterIndex::Rewind(const CBlockIndex* current_tip, const CBlockIndex* new_tip)
281 {
282     assert(current_tip->GetAncestor(new_tip->nHeight) == new_tip);
283 
284     CDBBatch batch(*m_db);
285     std::unique_ptr<CDBIterator> db_it(m_db->NewIterator());
286 
287     // During a reorg, we need to copy all filters for blocks that are getting disconnected from the
288     // height index to the hash index so we can still find them when the height index entries are
289     // overwritten.
290     if (!CopyHeightIndexToHashIndex(*db_it, batch, m_name, new_tip->nHeight, current_tip->nHeight)) {
291         return false;
292     }
293 
294     // The latest filter position gets written in Commit by the call to the BaseIndex::Rewind.
295     // But since this creates new references to the filter, the position should get updated here
296     // atomically as well in case Commit fails.
297     batch.Write(DB_FILTER_POS, m_next_filter_pos);
298     if (!m_db->WriteBatch(batch)) return false;
299 
300     return BaseIndex::Rewind(current_tip, new_tip);
301 }
302 
LookupOne(const CDBWrapper & db,const CBlockIndex * block_index,DBVal & result)303 static bool LookupOne(const CDBWrapper& db, const CBlockIndex* block_index, DBVal& result)
304 {
305     // First check if the result is stored under the height index and the value there matches the
306     // block hash. This should be the case if the block is on the active chain.
307     std::pair<uint256, DBVal> read_out;
308     if (!db.Read(DBHeightKey(block_index->nHeight), read_out)) {
309         return false;
310     }
311     if (read_out.first == block_index->GetBlockHash()) {
312         result = std::move(read_out.second);
313         return true;
314     }
315 
316     // If value at the height index corresponds to an different block, the result will be stored in
317     // the hash index.
318     return db.Read(DBHashKey(block_index->GetBlockHash()), result);
319 }
320 
LookupRange(CDBWrapper & db,const std::string & index_name,int start_height,const CBlockIndex * stop_index,std::vector<DBVal> & results)321 static bool LookupRange(CDBWrapper& db, const std::string& index_name, int start_height,
322                         const CBlockIndex* stop_index, std::vector<DBVal>& results)
323 {
324     if (start_height < 0) {
325         return error("%s: start height (%d) is negative", __func__, start_height);
326     }
327     if (start_height > stop_index->nHeight) {
328         return error("%s: start height (%d) is greater than stop height (%d)",
329                      __func__, start_height, stop_index->nHeight);
330     }
331 
332     size_t results_size = static_cast<size_t>(stop_index->nHeight - start_height + 1);
333     std::vector<std::pair<uint256, DBVal>> values(results_size);
334 
335     DBHeightKey key(start_height);
336     std::unique_ptr<CDBIterator> db_it(db.NewIterator());
337     db_it->Seek(DBHeightKey(start_height));
338     for (int height = start_height; height <= stop_index->nHeight; ++height) {
339         if (!db_it->Valid() || !db_it->GetKey(key) || key.height != height) {
340             return false;
341         }
342 
343         size_t i = static_cast<size_t>(height - start_height);
344         if (!db_it->GetValue(values[i])) {
345             return error("%s: unable to read value in %s at key (%c, %d)",
346                          __func__, index_name, DB_BLOCK_HEIGHT, height);
347         }
348 
349         db_it->Next();
350     }
351 
352     results.resize(results_size);
353 
354     // Iterate backwards through block indexes collecting results in order to access the block hash
355     // of each entry in case we need to look it up in the hash index.
356     for (const CBlockIndex* block_index = stop_index;
357          block_index && block_index->nHeight >= start_height;
358          block_index = block_index->pprev) {
359         uint256 block_hash = block_index->GetBlockHash();
360 
361         size_t i = static_cast<size_t>(block_index->nHeight - start_height);
362         if (block_hash == values[i].first) {
363             results[i] = std::move(values[i].second);
364             continue;
365         }
366 
367         if (!db.Read(DBHashKey(block_hash), results[i])) {
368             return error("%s: unable to read value in %s at key (%c, %s)",
369                          __func__, index_name, DB_BLOCK_HASH, block_hash.ToString());
370         }
371     }
372 
373     return true;
374 }
375 
LookupFilter(const CBlockIndex * block_index,BlockFilter & filter_out) const376 bool BlockFilterIndex::LookupFilter(const CBlockIndex* block_index, BlockFilter& filter_out) const
377 {
378     DBVal entry;
379     if (!LookupOne(*m_db, block_index, entry)) {
380         return false;
381     }
382 
383     return ReadFilterFromDisk(entry.pos, filter_out);
384 }
385 
LookupFilterHeader(const CBlockIndex * block_index,uint256 & header_out)386 bool BlockFilterIndex::LookupFilterHeader(const CBlockIndex* block_index, uint256& header_out)
387 {
388     LOCK(m_cs_headers_cache);
389 
390     bool is_checkpoint{block_index->nHeight % CFCHECKPT_INTERVAL == 0};
391 
392     if (is_checkpoint) {
393         // Try to find the block in the headers cache if this is a checkpoint height.
394         auto header = m_headers_cache.find(block_index->GetBlockHash());
395         if (header != m_headers_cache.end()) {
396             header_out = header->second;
397             return true;
398         }
399     }
400 
401     DBVal entry;
402     if (!LookupOne(*m_db, block_index, entry)) {
403         return false;
404     }
405 
406     if (is_checkpoint &&
407         m_headers_cache.size() < CF_HEADERS_CACHE_MAX_SZ) {
408         // Add to the headers cache if this is a checkpoint height.
409         m_headers_cache.emplace(block_index->GetBlockHash(), entry.header);
410     }
411 
412     header_out = entry.header;
413     return true;
414 }
415 
LookupFilterRange(int start_height,const CBlockIndex * stop_index,std::vector<BlockFilter> & filters_out) const416 bool BlockFilterIndex::LookupFilterRange(int start_height, const CBlockIndex* stop_index,
417                                          std::vector<BlockFilter>& filters_out) const
418 {
419     std::vector<DBVal> entries;
420     if (!LookupRange(*m_db, m_name, start_height, stop_index, entries)) {
421         return false;
422     }
423 
424     filters_out.resize(entries.size());
425     auto filter_pos_it = filters_out.begin();
426     for (const auto& entry : entries) {
427         if (!ReadFilterFromDisk(entry.pos, *filter_pos_it)) {
428             return false;
429         }
430         ++filter_pos_it;
431     }
432 
433     return true;
434 }
435 
LookupFilterHashRange(int start_height,const CBlockIndex * stop_index,std::vector<uint256> & hashes_out) const436 bool BlockFilterIndex::LookupFilterHashRange(int start_height, const CBlockIndex* stop_index,
437                                              std::vector<uint256>& hashes_out) const
438 
439 {
440     std::vector<DBVal> entries;
441     if (!LookupRange(*m_db, m_name, start_height, stop_index, entries)) {
442         return false;
443     }
444 
445     hashes_out.clear();
446     hashes_out.reserve(entries.size());
447     for (const auto& entry : entries) {
448         hashes_out.push_back(entry.hash);
449     }
450     return true;
451 }
452 
GetBlockFilterIndex(BlockFilterType filter_type)453 BlockFilterIndex* GetBlockFilterIndex(BlockFilterType filter_type)
454 {
455     auto it = g_filter_indexes.find(filter_type);
456     return it != g_filter_indexes.end() ? &it->second : nullptr;
457 }
458 
ForEachBlockFilterIndex(std::function<void (BlockFilterIndex &)> fn)459 void ForEachBlockFilterIndex(std::function<void (BlockFilterIndex&)> fn)
460 {
461     for (auto& entry : g_filter_indexes) fn(entry.second);
462 }
463 
InitBlockFilterIndex(BlockFilterType filter_type,size_t n_cache_size,bool f_memory,bool f_wipe)464 bool InitBlockFilterIndex(BlockFilterType filter_type,
465                           size_t n_cache_size, bool f_memory, bool f_wipe)
466 {
467     auto result = g_filter_indexes.emplace(std::piecewise_construct,
468                                            std::forward_as_tuple(filter_type),
469                                            std::forward_as_tuple(filter_type,
470                                                                  n_cache_size, f_memory, f_wipe));
471     return result.second;
472 }
473 
DestroyBlockFilterIndex(BlockFilterType filter_type)474 bool DestroyBlockFilterIndex(BlockFilterType filter_type)
475 {
476     return g_filter_indexes.erase(filter_type);
477 }
478 
DestroyAllBlockFilterIndexes()479 void DestroyAllBlockFilterIndexes()
480 {
481     g_filter_indexes.clear();
482 }
483