1 // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
2 // This source code is licensed under both the GPLv2 (found in the
3 // COPYING file in the root directory) and Apache 2.0 License
4 // (found in the LICENSE.Apache file in the root directory).
5 //
6 // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
7 // Use of this source code is governed by a BSD-style license that can be
8 // found in the LICENSE file. See the AUTHORS file for names of contributors.
9 #include <cstdlib>
10 #include <functional>
11 #include <memory>
12
13 #include "cache/cache_entry_roles.h"
14 #include "cache/lru_cache.h"
15 #include "db/column_family.h"
16 #include "db/db_test_util.h"
17 #include "port/stack_trace.h"
18 #include "rocksdb/statistics.h"
19 #include "rocksdb/table.h"
20 #include "util/compression.h"
21 #include "util/defer.h"
22 #include "util/random.h"
23 #include "utilities/fault_injection_fs.h"
24
25 namespace ROCKSDB_NAMESPACE {
26
27 class DBBlockCacheTest : public DBTestBase {
28 private:
29 size_t miss_count_ = 0;
30 size_t hit_count_ = 0;
31 size_t insert_count_ = 0;
32 size_t failure_count_ = 0;
33 size_t compression_dict_miss_count_ = 0;
34 size_t compression_dict_hit_count_ = 0;
35 size_t compression_dict_insert_count_ = 0;
36 size_t compressed_miss_count_ = 0;
37 size_t compressed_hit_count_ = 0;
38 size_t compressed_insert_count_ = 0;
39 size_t compressed_failure_count_ = 0;
40
41 public:
42 const size_t kNumBlocks = 10;
43 const size_t kValueSize = 100;
44
DBBlockCacheTest()45 DBBlockCacheTest()
46 : DBTestBase("db_block_cache_test", /*env_do_fsync=*/true) {}
47
GetTableOptions()48 BlockBasedTableOptions GetTableOptions() {
49 BlockBasedTableOptions table_options;
50 // Set a small enough block size so that each key-value get its own block.
51 table_options.block_size = 1;
52 return table_options;
53 }
54
GetOptions(const BlockBasedTableOptions & table_options)55 Options GetOptions(const BlockBasedTableOptions& table_options) {
56 Options options = CurrentOptions();
57 options.create_if_missing = true;
58 options.avoid_flush_during_recovery = false;
59 // options.compression = kNoCompression;
60 options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
61 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
62 return options;
63 }
64
InitTable(const Options &)65 void InitTable(const Options& /*options*/) {
66 std::string value(kValueSize, 'a');
67 for (size_t i = 0; i < kNumBlocks; i++) {
68 ASSERT_OK(Put(ToString(i), value.c_str()));
69 }
70 }
71
RecordCacheCounters(const Options & options)72 void RecordCacheCounters(const Options& options) {
73 miss_count_ = TestGetTickerCount(options, BLOCK_CACHE_MISS);
74 hit_count_ = TestGetTickerCount(options, BLOCK_CACHE_HIT);
75 insert_count_ = TestGetTickerCount(options, BLOCK_CACHE_ADD);
76 failure_count_ = TestGetTickerCount(options, BLOCK_CACHE_ADD_FAILURES);
77 compressed_miss_count_ =
78 TestGetTickerCount(options, BLOCK_CACHE_COMPRESSED_MISS);
79 compressed_hit_count_ =
80 TestGetTickerCount(options, BLOCK_CACHE_COMPRESSED_HIT);
81 compressed_insert_count_ =
82 TestGetTickerCount(options, BLOCK_CACHE_COMPRESSED_ADD);
83 compressed_failure_count_ =
84 TestGetTickerCount(options, BLOCK_CACHE_COMPRESSED_ADD_FAILURES);
85 }
86
RecordCacheCountersForCompressionDict(const Options & options)87 void RecordCacheCountersForCompressionDict(const Options& options) {
88 compression_dict_miss_count_ =
89 TestGetTickerCount(options, BLOCK_CACHE_COMPRESSION_DICT_MISS);
90 compression_dict_hit_count_ =
91 TestGetTickerCount(options, BLOCK_CACHE_COMPRESSION_DICT_HIT);
92 compression_dict_insert_count_ =
93 TestGetTickerCount(options, BLOCK_CACHE_COMPRESSION_DICT_ADD);
94 }
95
CheckCacheCounters(const Options & options,size_t expected_misses,size_t expected_hits,size_t expected_inserts,size_t expected_failures)96 void CheckCacheCounters(const Options& options, size_t expected_misses,
97 size_t expected_hits, size_t expected_inserts,
98 size_t expected_failures) {
99 size_t new_miss_count = TestGetTickerCount(options, BLOCK_CACHE_MISS);
100 size_t new_hit_count = TestGetTickerCount(options, BLOCK_CACHE_HIT);
101 size_t new_insert_count = TestGetTickerCount(options, BLOCK_CACHE_ADD);
102 size_t new_failure_count =
103 TestGetTickerCount(options, BLOCK_CACHE_ADD_FAILURES);
104 ASSERT_EQ(miss_count_ + expected_misses, new_miss_count);
105 ASSERT_EQ(hit_count_ + expected_hits, new_hit_count);
106 ASSERT_EQ(insert_count_ + expected_inserts, new_insert_count);
107 ASSERT_EQ(failure_count_ + expected_failures, new_failure_count);
108 miss_count_ = new_miss_count;
109 hit_count_ = new_hit_count;
110 insert_count_ = new_insert_count;
111 failure_count_ = new_failure_count;
112 }
113
CheckCacheCountersForCompressionDict(const Options & options,size_t expected_compression_dict_misses,size_t expected_compression_dict_hits,size_t expected_compression_dict_inserts)114 void CheckCacheCountersForCompressionDict(
115 const Options& options, size_t expected_compression_dict_misses,
116 size_t expected_compression_dict_hits,
117 size_t expected_compression_dict_inserts) {
118 size_t new_compression_dict_miss_count =
119 TestGetTickerCount(options, BLOCK_CACHE_COMPRESSION_DICT_MISS);
120 size_t new_compression_dict_hit_count =
121 TestGetTickerCount(options, BLOCK_CACHE_COMPRESSION_DICT_HIT);
122 size_t new_compression_dict_insert_count =
123 TestGetTickerCount(options, BLOCK_CACHE_COMPRESSION_DICT_ADD);
124 ASSERT_EQ(compression_dict_miss_count_ + expected_compression_dict_misses,
125 new_compression_dict_miss_count);
126 ASSERT_EQ(compression_dict_hit_count_ + expected_compression_dict_hits,
127 new_compression_dict_hit_count);
128 ASSERT_EQ(
129 compression_dict_insert_count_ + expected_compression_dict_inserts,
130 new_compression_dict_insert_count);
131 compression_dict_miss_count_ = new_compression_dict_miss_count;
132 compression_dict_hit_count_ = new_compression_dict_hit_count;
133 compression_dict_insert_count_ = new_compression_dict_insert_count;
134 }
135
CheckCompressedCacheCounters(const Options & options,size_t expected_misses,size_t expected_hits,size_t expected_inserts,size_t expected_failures)136 void CheckCompressedCacheCounters(const Options& options,
137 size_t expected_misses,
138 size_t expected_hits,
139 size_t expected_inserts,
140 size_t expected_failures) {
141 size_t new_miss_count =
142 TestGetTickerCount(options, BLOCK_CACHE_COMPRESSED_MISS);
143 size_t new_hit_count =
144 TestGetTickerCount(options, BLOCK_CACHE_COMPRESSED_HIT);
145 size_t new_insert_count =
146 TestGetTickerCount(options, BLOCK_CACHE_COMPRESSED_ADD);
147 size_t new_failure_count =
148 TestGetTickerCount(options, BLOCK_CACHE_COMPRESSED_ADD_FAILURES);
149 ASSERT_EQ(compressed_miss_count_ + expected_misses, new_miss_count);
150 ASSERT_EQ(compressed_hit_count_ + expected_hits, new_hit_count);
151 ASSERT_EQ(compressed_insert_count_ + expected_inserts, new_insert_count);
152 ASSERT_EQ(compressed_failure_count_ + expected_failures, new_failure_count);
153 compressed_miss_count_ = new_miss_count;
154 compressed_hit_count_ = new_hit_count;
155 compressed_insert_count_ = new_insert_count;
156 compressed_failure_count_ = new_failure_count;
157 }
158
159 #ifndef ROCKSDB_LITE
GetCacheEntryRoleCountsBg()160 const std::array<size_t, kNumCacheEntryRoles> GetCacheEntryRoleCountsBg() {
161 // Verify in cache entry role stats
162 ColumnFamilyHandleImpl* cfh =
163 static_cast<ColumnFamilyHandleImpl*>(dbfull()->DefaultColumnFamily());
164 InternalStats* internal_stats_ptr = cfh->cfd()->internal_stats();
165 InternalStats::CacheEntryRoleStats stats;
166 internal_stats_ptr->TEST_GetCacheEntryRoleStats(&stats,
167 /*foreground=*/false);
168 return stats.entry_counts;
169 }
170 #endif // ROCKSDB_LITE
171 };
172
TEST_F(DBBlockCacheTest,IteratorBlockCacheUsage)173 TEST_F(DBBlockCacheTest, IteratorBlockCacheUsage) {
174 ReadOptions read_options;
175 read_options.fill_cache = false;
176 auto table_options = GetTableOptions();
177 auto options = GetOptions(table_options);
178 InitTable(options);
179
180 LRUCacheOptions co;
181 co.capacity = 0;
182 co.num_shard_bits = 0;
183 co.strict_capacity_limit = false;
184 // Needed not to count entry stats collector
185 co.metadata_charge_policy = kDontChargeCacheMetadata;
186 std::shared_ptr<Cache> cache = NewLRUCache(co);
187 table_options.block_cache = cache;
188 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
189 Reopen(options);
190 RecordCacheCounters(options);
191
192 std::vector<std::unique_ptr<Iterator>> iterators(kNumBlocks - 1);
193 Iterator* iter = nullptr;
194
195 ASSERT_EQ(0, cache->GetUsage());
196 iter = db_->NewIterator(read_options);
197 iter->Seek(ToString(0));
198 ASSERT_LT(0, cache->GetUsage());
199 delete iter;
200 iter = nullptr;
201 ASSERT_EQ(0, cache->GetUsage());
202 }
203
TEST_F(DBBlockCacheTest,TestWithoutCompressedBlockCache)204 TEST_F(DBBlockCacheTest, TestWithoutCompressedBlockCache) {
205 ReadOptions read_options;
206 auto table_options = GetTableOptions();
207 auto options = GetOptions(table_options);
208 InitTable(options);
209
210 LRUCacheOptions co;
211 co.capacity = 0;
212 co.num_shard_bits = 0;
213 co.strict_capacity_limit = false;
214 // Needed not to count entry stats collector
215 co.metadata_charge_policy = kDontChargeCacheMetadata;
216 std::shared_ptr<Cache> cache = NewLRUCache(co);
217 table_options.block_cache = cache;
218 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
219 Reopen(options);
220 RecordCacheCounters(options);
221
222 std::vector<std::unique_ptr<Iterator>> iterators(kNumBlocks - 1);
223 Iterator* iter = nullptr;
224
225 // Load blocks into cache.
226 for (size_t i = 0; i + 1 < kNumBlocks; i++) {
227 iter = db_->NewIterator(read_options);
228 iter->Seek(ToString(i));
229 ASSERT_OK(iter->status());
230 CheckCacheCounters(options, 1, 0, 1, 0);
231 iterators[i].reset(iter);
232 }
233 size_t usage = cache->GetUsage();
234 ASSERT_LT(0, usage);
235 cache->SetCapacity(usage);
236 ASSERT_EQ(usage, cache->GetPinnedUsage());
237
238 // Test with strict capacity limit.
239 cache->SetStrictCapacityLimit(true);
240 iter = db_->NewIterator(read_options);
241 iter->Seek(ToString(kNumBlocks - 1));
242 ASSERT_TRUE(iter->status().IsIncomplete());
243 CheckCacheCounters(options, 1, 0, 0, 1);
244 delete iter;
245 iter = nullptr;
246
247 // Release iterators and access cache again.
248 for (size_t i = 0; i + 1 < kNumBlocks; i++) {
249 iterators[i].reset();
250 CheckCacheCounters(options, 0, 0, 0, 0);
251 }
252 ASSERT_EQ(0, cache->GetPinnedUsage());
253 for (size_t i = 0; i + 1 < kNumBlocks; i++) {
254 iter = db_->NewIterator(read_options);
255 iter->Seek(ToString(i));
256 ASSERT_OK(iter->status());
257 CheckCacheCounters(options, 0, 1, 0, 0);
258 iterators[i].reset(iter);
259 }
260 }
261
262 #ifdef SNAPPY
TEST_F(DBBlockCacheTest,TestWithCompressedBlockCache)263 TEST_F(DBBlockCacheTest, TestWithCompressedBlockCache) {
264 Options options = CurrentOptions();
265 options.create_if_missing = true;
266 options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
267
268 BlockBasedTableOptions table_options;
269 table_options.no_block_cache = true;
270 table_options.block_cache_compressed = nullptr;
271 table_options.block_size = 1;
272 table_options.filter_policy.reset(NewBloomFilterPolicy(20));
273 table_options.cache_index_and_filter_blocks = false;
274 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
275 options.compression = CompressionType::kSnappyCompression;
276
277 DestroyAndReopen(options);
278
279 std::string value(kValueSize, 'a');
280 for (size_t i = 0; i < kNumBlocks; i++) {
281 ASSERT_OK(Put(ToString(i), value));
282 ASSERT_OK(Flush());
283 }
284
285 ReadOptions read_options;
286 std::shared_ptr<Cache> compressed_cache = NewLRUCache(1 << 25, 0, false);
287 LRUCacheOptions co;
288 co.capacity = 0;
289 co.num_shard_bits = 0;
290 co.strict_capacity_limit = false;
291 // Needed not to count entry stats collector
292 co.metadata_charge_policy = kDontChargeCacheMetadata;
293 std::shared_ptr<Cache> cache = NewLRUCache(co);
294 table_options.block_cache = cache;
295 table_options.no_block_cache = false;
296 table_options.block_cache_compressed = compressed_cache;
297 table_options.max_auto_readahead_size = 0;
298 table_options.cache_index_and_filter_blocks = false;
299 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
300 Reopen(options);
301 RecordCacheCounters(options);
302
303 // Load blocks into cache.
304 for (size_t i = 0; i < kNumBlocks - 1; i++) {
305 ASSERT_EQ(value, Get(ToString(i)));
306 CheckCacheCounters(options, 1, 0, 1, 0);
307 CheckCompressedCacheCounters(options, 1, 0, 1, 0);
308 }
309
310 size_t usage = cache->GetUsage();
311 ASSERT_EQ(0, usage);
312 ASSERT_EQ(usage, cache->GetPinnedUsage());
313 size_t compressed_usage = compressed_cache->GetUsage();
314 ASSERT_LT(0, compressed_usage);
315 // Compressed block cache cannot be pinned.
316 ASSERT_EQ(0, compressed_cache->GetPinnedUsage());
317
318 // Set strict capacity limit flag. Now block will only load into compressed
319 // block cache.
320 cache->SetCapacity(usage);
321 cache->SetStrictCapacityLimit(true);
322 ASSERT_EQ(usage, cache->GetPinnedUsage());
323
324 // Load last key block.
325 ASSERT_EQ("Result incomplete: Insert failed due to LRU cache being full.",
326 Get(ToString(kNumBlocks - 1)));
327 // Failure will also record the miss counter.
328 CheckCacheCounters(options, 1, 0, 0, 1);
329 CheckCompressedCacheCounters(options, 1, 0, 1, 0);
330
331 // Clear strict capacity limit flag. This time we shall hit compressed block
332 // cache and load into block cache.
333 cache->SetStrictCapacityLimit(false);
334 // Load last key block.
335 ASSERT_EQ(value, Get(ToString(kNumBlocks - 1)));
336 CheckCacheCounters(options, 1, 0, 1, 0);
337 CheckCompressedCacheCounters(options, 0, 1, 0, 0);
338 }
339 #endif // SNAPPY
340
341 #ifndef ROCKSDB_LITE
342
343 // Make sure that when options.block_cache is set, after a new table is
344 // created its index/filter blocks are added to block cache.
TEST_F(DBBlockCacheTest,IndexAndFilterBlocksOfNewTableAddedToCache)345 TEST_F(DBBlockCacheTest, IndexAndFilterBlocksOfNewTableAddedToCache) {
346 Options options = CurrentOptions();
347 options.create_if_missing = true;
348 options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
349 BlockBasedTableOptions table_options;
350 table_options.cache_index_and_filter_blocks = true;
351 table_options.filter_policy.reset(NewBloomFilterPolicy(20));
352 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
353 CreateAndReopenWithCF({"pikachu"}, options);
354
355 ASSERT_OK(Put(1, "key", "val"));
356 // Create a new table.
357 ASSERT_OK(Flush(1));
358
359 // index/filter blocks added to block cache right after table creation.
360 ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_INDEX_MISS));
361 ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS));
362 ASSERT_EQ(2, /* only index/filter were added */
363 TestGetTickerCount(options, BLOCK_CACHE_ADD));
364 ASSERT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_DATA_MISS));
365 uint64_t int_num;
366 ASSERT_TRUE(
367 dbfull()->GetIntProperty("rocksdb.estimate-table-readers-mem", &int_num));
368 ASSERT_EQ(int_num, 0U);
369
370 // Make sure filter block is in cache.
371 std::string value;
372 ReadOptions ropt;
373 db_->KeyMayExist(ReadOptions(), handles_[1], "key", &value);
374
375 // Miss count should remain the same.
376 ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS));
377 ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT));
378
379 db_->KeyMayExist(ReadOptions(), handles_[1], "key", &value);
380 ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS));
381 ASSERT_EQ(2, TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT));
382
383 // Make sure index block is in cache.
384 auto index_block_hit = TestGetTickerCount(options, BLOCK_CACHE_INDEX_HIT);
385 value = Get(1, "key");
386 ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_INDEX_MISS));
387 ASSERT_EQ(index_block_hit + 1,
388 TestGetTickerCount(options, BLOCK_CACHE_INDEX_HIT));
389
390 value = Get(1, "key");
391 ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_INDEX_MISS));
392 ASSERT_EQ(index_block_hit + 2,
393 TestGetTickerCount(options, BLOCK_CACHE_INDEX_HIT));
394 }
395
396 // With fill_cache = false, fills up the cache, then iterates over the entire
397 // db, verify dummy entries inserted in `BlockBasedTable::NewDataBlockIterator`
398 // does not cause heap-use-after-free errors in COMPILE_WITH_ASAN=1 runs
TEST_F(DBBlockCacheTest,FillCacheAndIterateDB)399 TEST_F(DBBlockCacheTest, FillCacheAndIterateDB) {
400 ReadOptions read_options;
401 read_options.fill_cache = false;
402 auto table_options = GetTableOptions();
403 auto options = GetOptions(table_options);
404 InitTable(options);
405
406 std::shared_ptr<Cache> cache = NewLRUCache(10, 0, true);
407 table_options.block_cache = cache;
408 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
409 Reopen(options);
410 ASSERT_OK(Put("key1", "val1"));
411 ASSERT_OK(Put("key2", "val2"));
412 ASSERT_OK(Flush());
413 ASSERT_OK(Put("key3", "val3"));
414 ASSERT_OK(Put("key4", "val4"));
415 ASSERT_OK(Flush());
416 ASSERT_OK(Put("key5", "val5"));
417 ASSERT_OK(Put("key6", "val6"));
418 ASSERT_OK(Flush());
419
420 Iterator* iter = nullptr;
421
422 iter = db_->NewIterator(read_options);
423 iter->Seek(ToString(0));
424 while (iter->Valid()) {
425 iter->Next();
426 }
427 delete iter;
428 iter = nullptr;
429 }
430
TEST_F(DBBlockCacheTest,IndexAndFilterBlocksStats)431 TEST_F(DBBlockCacheTest, IndexAndFilterBlocksStats) {
432 Options options = CurrentOptions();
433 options.create_if_missing = true;
434 options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
435 BlockBasedTableOptions table_options;
436 table_options.cache_index_and_filter_blocks = true;
437 LRUCacheOptions co;
438 // 500 bytes are enough to hold the first two blocks
439 co.capacity = 500;
440 co.num_shard_bits = 0;
441 co.strict_capacity_limit = false;
442 co.metadata_charge_policy = kDontChargeCacheMetadata;
443 std::shared_ptr<Cache> cache = NewLRUCache(co);
444 table_options.block_cache = cache;
445 table_options.filter_policy.reset(NewBloomFilterPolicy(20, true));
446 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
447 CreateAndReopenWithCF({"pikachu"}, options);
448
449 ASSERT_OK(Put(1, "longer_key", "val"));
450 // Create a new table
451 ASSERT_OK(Flush(1));
452 size_t index_bytes_insert =
453 TestGetTickerCount(options, BLOCK_CACHE_INDEX_BYTES_INSERT);
454 size_t filter_bytes_insert =
455 TestGetTickerCount(options, BLOCK_CACHE_FILTER_BYTES_INSERT);
456 ASSERT_GT(index_bytes_insert, 0);
457 ASSERT_GT(filter_bytes_insert, 0);
458 ASSERT_EQ(cache->GetUsage(), index_bytes_insert + filter_bytes_insert);
459 // set the cache capacity to the current usage
460 cache->SetCapacity(index_bytes_insert + filter_bytes_insert);
461 // The index and filter eviction statistics were broken by the refactoring
462 // that moved the readers out of the block cache. Disabling these until we can
463 // bring the stats back.
464 // ASSERT_EQ(TestGetTickerCount(options, BLOCK_CACHE_INDEX_BYTES_EVICT), 0);
465 // ASSERT_EQ(TestGetTickerCount(options, BLOCK_CACHE_FILTER_BYTES_EVICT), 0);
466 // Note that the second key needs to be no longer than the first one.
467 // Otherwise the second index block may not fit in cache.
468 ASSERT_OK(Put(1, "key", "val"));
469 // Create a new table
470 ASSERT_OK(Flush(1));
471 // cache evicted old index and block entries
472 ASSERT_GT(TestGetTickerCount(options, BLOCK_CACHE_INDEX_BYTES_INSERT),
473 index_bytes_insert);
474 ASSERT_GT(TestGetTickerCount(options, BLOCK_CACHE_FILTER_BYTES_INSERT),
475 filter_bytes_insert);
476 // The index and filter eviction statistics were broken by the refactoring
477 // that moved the readers out of the block cache. Disabling these until we can
478 // bring the stats back.
479 // ASSERT_EQ(TestGetTickerCount(options, BLOCK_CACHE_INDEX_BYTES_EVICT),
480 // index_bytes_insert);
481 // ASSERT_EQ(TestGetTickerCount(options, BLOCK_CACHE_FILTER_BYTES_EVICT),
482 // filter_bytes_insert);
483 }
484
485 #if (defined OS_LINUX || defined OS_WIN)
TEST_F(DBBlockCacheTest,WarmCacheWithDataBlocksDuringFlush)486 TEST_F(DBBlockCacheTest, WarmCacheWithDataBlocksDuringFlush) {
487 Options options = CurrentOptions();
488 options.create_if_missing = true;
489 options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
490
491 BlockBasedTableOptions table_options;
492 table_options.block_cache = NewLRUCache(1 << 25, 0, false);
493 table_options.cache_index_and_filter_blocks = false;
494 table_options.prepopulate_block_cache =
495 BlockBasedTableOptions::PrepopulateBlockCache::kFlushOnly;
496 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
497 DestroyAndReopen(options);
498
499 std::string value(kValueSize, 'a');
500 for (size_t i = 1; i <= kNumBlocks; i++) {
501 ASSERT_OK(Put(ToString(i), value));
502 ASSERT_OK(Flush());
503 ASSERT_EQ(i, options.statistics->getTickerCount(BLOCK_CACHE_DATA_ADD));
504 ASSERT_EQ(value, Get(ToString(i)));
505 ASSERT_EQ(0, options.statistics->getTickerCount(BLOCK_CACHE_DATA_MISS));
506 ASSERT_EQ(i, options.statistics->getTickerCount(BLOCK_CACHE_DATA_HIT));
507 }
508 // Verify compaction not counted
509 ASSERT_OK(db_->CompactRange(CompactRangeOptions(), /*begin=*/nullptr,
510 /*end=*/nullptr));
511 EXPECT_EQ(kNumBlocks,
512 options.statistics->getTickerCount(BLOCK_CACHE_DATA_ADD));
513 }
514
515 // This test cache data, index and filter blocks during flush.
TEST_F(DBBlockCacheTest,WarmCacheWithBlocksDuringFlush)516 TEST_F(DBBlockCacheTest, WarmCacheWithBlocksDuringFlush) {
517 Options options = CurrentOptions();
518 options.create_if_missing = true;
519 options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
520
521 BlockBasedTableOptions table_options;
522 table_options.block_cache = NewLRUCache(1 << 25, 0, false);
523 table_options.cache_index_and_filter_blocks = true;
524 table_options.prepopulate_block_cache =
525 BlockBasedTableOptions::PrepopulateBlockCache::kFlushOnly;
526 table_options.filter_policy.reset(NewBloomFilterPolicy(10, false));
527 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
528 DestroyAndReopen(options);
529
530 std::string value(kValueSize, 'a');
531 for (size_t i = 1; i <= kNumBlocks; i++) {
532 ASSERT_OK(Put(ToString(i), value));
533 ASSERT_OK(Flush());
534 ASSERT_EQ(i, options.statistics->getTickerCount(BLOCK_CACHE_DATA_ADD));
535 ASSERT_EQ(i, options.statistics->getTickerCount(BLOCK_CACHE_INDEX_ADD));
536 ASSERT_EQ(i, options.statistics->getTickerCount(BLOCK_CACHE_FILTER_ADD));
537
538 ASSERT_EQ(value, Get(ToString(i)));
539
540 ASSERT_EQ(0, options.statistics->getTickerCount(BLOCK_CACHE_DATA_MISS));
541 ASSERT_EQ(i, options.statistics->getTickerCount(BLOCK_CACHE_DATA_HIT));
542
543 ASSERT_EQ(0, options.statistics->getTickerCount(BLOCK_CACHE_INDEX_MISS));
544 ASSERT_EQ(i * 3, options.statistics->getTickerCount(BLOCK_CACHE_INDEX_HIT));
545
546 ASSERT_EQ(0, options.statistics->getTickerCount(BLOCK_CACHE_FILTER_MISS));
547 ASSERT_EQ(i * 2,
548 options.statistics->getTickerCount(BLOCK_CACHE_FILTER_HIT));
549 }
550 // Verify compaction not counted
551 ASSERT_OK(db_->CompactRange(CompactRangeOptions(), /*begin=*/nullptr,
552 /*end=*/nullptr));
553 EXPECT_EQ(kNumBlocks,
554 options.statistics->getTickerCount(BLOCK_CACHE_DATA_ADD));
555 // Index and filter blocks are automatically warmed when the new table file
556 // is automatically opened at the end of compaction. This is not easily
557 // disabled so results in the new index and filter blocks being warmed.
558 EXPECT_EQ(1 + kNumBlocks,
559 options.statistics->getTickerCount(BLOCK_CACHE_INDEX_ADD));
560 EXPECT_EQ(1 + kNumBlocks,
561 options.statistics->getTickerCount(BLOCK_CACHE_FILTER_ADD));
562 }
563
TEST_F(DBBlockCacheTest,DynamicallyWarmCacheDuringFlush)564 TEST_F(DBBlockCacheTest, DynamicallyWarmCacheDuringFlush) {
565 Options options = CurrentOptions();
566 options.create_if_missing = true;
567 options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
568
569 BlockBasedTableOptions table_options;
570 table_options.block_cache = NewLRUCache(1 << 25, 0, false);
571 table_options.cache_index_and_filter_blocks = false;
572 table_options.prepopulate_block_cache =
573 BlockBasedTableOptions::PrepopulateBlockCache::kFlushOnly;
574
575 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
576 DestroyAndReopen(options);
577
578 std::string value(kValueSize, 'a');
579
580 for (size_t i = 1; i <= 5; i++) {
581 ASSERT_OK(Put(ToString(i), value));
582 ASSERT_OK(Flush());
583 ASSERT_EQ(1,
584 options.statistics->getAndResetTickerCount(BLOCK_CACHE_DATA_ADD));
585
586 ASSERT_EQ(value, Get(ToString(i)));
587 ASSERT_EQ(0,
588 options.statistics->getAndResetTickerCount(BLOCK_CACHE_DATA_ADD));
589 ASSERT_EQ(
590 0, options.statistics->getAndResetTickerCount(BLOCK_CACHE_DATA_MISS));
591 ASSERT_EQ(1,
592 options.statistics->getAndResetTickerCount(BLOCK_CACHE_DATA_HIT));
593 }
594
595 ASSERT_OK(dbfull()->SetOptions(
596 {{"block_based_table_factory", "{prepopulate_block_cache=kDisable;}"}}));
597
598 for (size_t i = 6; i <= kNumBlocks; i++) {
599 ASSERT_OK(Put(ToString(i), value));
600 ASSERT_OK(Flush());
601 ASSERT_EQ(0,
602 options.statistics->getAndResetTickerCount(BLOCK_CACHE_DATA_ADD));
603
604 ASSERT_EQ(value, Get(ToString(i)));
605 ASSERT_EQ(1,
606 options.statistics->getAndResetTickerCount(BLOCK_CACHE_DATA_ADD));
607 ASSERT_EQ(
608 1, options.statistics->getAndResetTickerCount(BLOCK_CACHE_DATA_MISS));
609 ASSERT_EQ(0,
610 options.statistics->getAndResetTickerCount(BLOCK_CACHE_DATA_HIT));
611 }
612 }
613 #endif
614
615 namespace {
616
617 // A mock cache wraps LRUCache, and record how many entries have been
618 // inserted for each priority.
619 class MockCache : public LRUCache {
620 public:
621 static uint32_t high_pri_insert_count;
622 static uint32_t low_pri_insert_count;
623
MockCache()624 MockCache()
625 : LRUCache((size_t)1 << 25 /*capacity*/, 0 /*num_shard_bits*/,
626 false /*strict_capacity_limit*/, 0.0 /*high_pri_pool_ratio*/) {
627 }
628
629 using ShardedCache::Insert;
630
Insert(const Slice & key,void * value,const Cache::CacheItemHelper * helper_cb,size_t charge,Handle ** handle,Priority priority)631 Status Insert(const Slice& key, void* value,
632 const Cache::CacheItemHelper* helper_cb, size_t charge,
633 Handle** handle, Priority priority) override {
634 DeleterFn delete_cb = helper_cb->del_cb;
635 if (priority == Priority::LOW) {
636 low_pri_insert_count++;
637 } else {
638 high_pri_insert_count++;
639 }
640 return LRUCache::Insert(key, value, charge, delete_cb, handle, priority);
641 }
642 };
643
644 uint32_t MockCache::high_pri_insert_count = 0;
645 uint32_t MockCache::low_pri_insert_count = 0;
646
647 } // anonymous namespace
648
TEST_F(DBBlockCacheTest,IndexAndFilterBlocksCachePriority)649 TEST_F(DBBlockCacheTest, IndexAndFilterBlocksCachePriority) {
650 for (auto priority : {Cache::Priority::LOW, Cache::Priority::HIGH}) {
651 Options options = CurrentOptions();
652 options.create_if_missing = true;
653 options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
654 BlockBasedTableOptions table_options;
655 table_options.cache_index_and_filter_blocks = true;
656 table_options.block_cache.reset(new MockCache());
657 table_options.filter_policy.reset(NewBloomFilterPolicy(20));
658 table_options.cache_index_and_filter_blocks_with_high_priority =
659 priority == Cache::Priority::HIGH ? true : false;
660 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
661 DestroyAndReopen(options);
662
663 MockCache::high_pri_insert_count = 0;
664 MockCache::low_pri_insert_count = 0;
665
666 // Create a new table.
667 ASSERT_OK(Put("foo", "value"));
668 ASSERT_OK(Put("bar", "value"));
669 ASSERT_OK(Flush());
670 ASSERT_EQ(1, NumTableFilesAtLevel(0));
671
672 // index/filter blocks added to block cache right after table creation.
673 ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_INDEX_MISS));
674 ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS));
675 ASSERT_EQ(2, /* only index/filter were added */
676 TestGetTickerCount(options, BLOCK_CACHE_ADD));
677 ASSERT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_DATA_MISS));
678 if (priority == Cache::Priority::LOW) {
679 ASSERT_EQ(0u, MockCache::high_pri_insert_count);
680 ASSERT_EQ(2u, MockCache::low_pri_insert_count);
681 } else {
682 ASSERT_EQ(2u, MockCache::high_pri_insert_count);
683 ASSERT_EQ(0u, MockCache::low_pri_insert_count);
684 }
685
686 // Access data block.
687 ASSERT_EQ("value", Get("foo"));
688
689 ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_INDEX_MISS));
690 ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS));
691 ASSERT_EQ(3, /*adding data block*/
692 TestGetTickerCount(options, BLOCK_CACHE_ADD));
693 ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_DATA_MISS));
694
695 // Data block should be inserted with low priority.
696 if (priority == Cache::Priority::LOW) {
697 ASSERT_EQ(0u, MockCache::high_pri_insert_count);
698 ASSERT_EQ(3u, MockCache::low_pri_insert_count);
699 } else {
700 ASSERT_EQ(2u, MockCache::high_pri_insert_count);
701 ASSERT_EQ(1u, MockCache::low_pri_insert_count);
702 }
703 }
704 }
705
706 namespace {
707
708 // An LRUCache wrapper that can falsely report "not found" on Lookup.
709 // This allows us to manipulate BlockBasedTableReader into thinking
710 // another thread inserted the data in between Lookup and Insert,
711 // while mostly preserving the LRUCache interface/behavior.
712 class LookupLiarCache : public CacheWrapper {
713 int nth_lookup_not_found_ = 0;
714
715 public:
LookupLiarCache(std::shared_ptr<Cache> target)716 explicit LookupLiarCache(std::shared_ptr<Cache> target)
717 : CacheWrapper(std::move(target)) {}
718
719 using Cache::Lookup;
Lookup(const Slice & key,Statistics * stats)720 Handle* Lookup(const Slice& key, Statistics* stats) override {
721 if (nth_lookup_not_found_ == 1) {
722 nth_lookup_not_found_ = 0;
723 return nullptr;
724 }
725 if (nth_lookup_not_found_ > 1) {
726 --nth_lookup_not_found_;
727 }
728 return CacheWrapper::Lookup(key, stats);
729 }
730
731 // 1 == next lookup, 2 == after next, etc.
SetNthLookupNotFound(int n)732 void SetNthLookupNotFound(int n) { nth_lookup_not_found_ = n; }
733 };
734
735 } // anonymous namespace
736
TEST_F(DBBlockCacheTest,AddRedundantStats)737 TEST_F(DBBlockCacheTest, AddRedundantStats) {
738 const size_t capacity = size_t{1} << 25;
739 const int num_shard_bits = 0; // 1 shard
740 int iterations_tested = 0;
741 for (std::shared_ptr<Cache> base_cache :
742 {NewLRUCache(capacity, num_shard_bits),
743 NewClockCache(capacity, num_shard_bits)}) {
744 if (!base_cache) {
745 // Skip clock cache when not supported
746 continue;
747 }
748 ++iterations_tested;
749 Options options = CurrentOptions();
750 options.create_if_missing = true;
751 options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
752
753 std::shared_ptr<LookupLiarCache> cache =
754 std::make_shared<LookupLiarCache>(base_cache);
755
756 BlockBasedTableOptions table_options;
757 table_options.cache_index_and_filter_blocks = true;
758 table_options.block_cache = cache;
759 table_options.filter_policy.reset(NewBloomFilterPolicy(50));
760 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
761 DestroyAndReopen(options);
762
763 // Create a new table.
764 ASSERT_OK(Put("foo", "value"));
765 ASSERT_OK(Put("bar", "value"));
766 ASSERT_OK(Flush());
767 ASSERT_EQ(1, NumTableFilesAtLevel(0));
768
769 // Normal access filter+index+data.
770 ASSERT_EQ("value", Get("foo"));
771
772 ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_INDEX_ADD));
773 ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_ADD));
774 ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_DATA_ADD));
775 // --------
776 ASSERT_EQ(3, TestGetTickerCount(options, BLOCK_CACHE_ADD));
777
778 ASSERT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_INDEX_ADD_REDUNDANT));
779 ASSERT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_FILTER_ADD_REDUNDANT));
780 ASSERT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_DATA_ADD_REDUNDANT));
781 // --------
782 ASSERT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_ADD_REDUNDANT));
783
784 // Againt access filter+index+data, but force redundant load+insert on index
785 cache->SetNthLookupNotFound(2);
786 ASSERT_EQ("value", Get("bar"));
787
788 ASSERT_EQ(2, TestGetTickerCount(options, BLOCK_CACHE_INDEX_ADD));
789 ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_ADD));
790 ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_DATA_ADD));
791 // --------
792 ASSERT_EQ(4, TestGetTickerCount(options, BLOCK_CACHE_ADD));
793
794 ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_INDEX_ADD_REDUNDANT));
795 ASSERT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_FILTER_ADD_REDUNDANT));
796 ASSERT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_DATA_ADD_REDUNDANT));
797 // --------
798 ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_ADD_REDUNDANT));
799
800 // Access just filter (with high probability), and force redundant
801 // load+insert
802 cache->SetNthLookupNotFound(1);
803 ASSERT_EQ("NOT_FOUND", Get("this key was not added"));
804
805 EXPECT_EQ(2, TestGetTickerCount(options, BLOCK_CACHE_INDEX_ADD));
806 EXPECT_EQ(2, TestGetTickerCount(options, BLOCK_CACHE_FILTER_ADD));
807 EXPECT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_DATA_ADD));
808 // --------
809 EXPECT_EQ(5, TestGetTickerCount(options, BLOCK_CACHE_ADD));
810
811 EXPECT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_INDEX_ADD_REDUNDANT));
812 EXPECT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_ADD_REDUNDANT));
813 EXPECT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_DATA_ADD_REDUNDANT));
814 // --------
815 EXPECT_EQ(2, TestGetTickerCount(options, BLOCK_CACHE_ADD_REDUNDANT));
816
817 // Access just data, forcing redundant load+insert
818 ReadOptions read_options;
819 std::unique_ptr<Iterator> iter{db_->NewIterator(read_options)};
820 cache->SetNthLookupNotFound(1);
821 iter->SeekToFirst();
822 ASSERT_TRUE(iter->Valid());
823 ASSERT_EQ(iter->key(), "bar");
824
825 EXPECT_EQ(2, TestGetTickerCount(options, BLOCK_CACHE_INDEX_ADD));
826 EXPECT_EQ(2, TestGetTickerCount(options, BLOCK_CACHE_FILTER_ADD));
827 EXPECT_EQ(2, TestGetTickerCount(options, BLOCK_CACHE_DATA_ADD));
828 // --------
829 EXPECT_EQ(6, TestGetTickerCount(options, BLOCK_CACHE_ADD));
830
831 EXPECT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_INDEX_ADD_REDUNDANT));
832 EXPECT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_ADD_REDUNDANT));
833 EXPECT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_DATA_ADD_REDUNDANT));
834 // --------
835 EXPECT_EQ(3, TestGetTickerCount(options, BLOCK_CACHE_ADD_REDUNDANT));
836 }
837 EXPECT_GE(iterations_tested, 1);
838 }
839
TEST_F(DBBlockCacheTest,ParanoidFileChecks)840 TEST_F(DBBlockCacheTest, ParanoidFileChecks) {
841 Options options = CurrentOptions();
842 options.create_if_missing = true;
843 options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
844 options.level0_file_num_compaction_trigger = 2;
845 options.paranoid_file_checks = true;
846 BlockBasedTableOptions table_options;
847 table_options.cache_index_and_filter_blocks = false;
848 table_options.filter_policy.reset(NewBloomFilterPolicy(20));
849 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
850 CreateAndReopenWithCF({"pikachu"}, options);
851
852 ASSERT_OK(Put(1, "1_key", "val"));
853 ASSERT_OK(Put(1, "9_key", "val"));
854 // Create a new table.
855 ASSERT_OK(Flush(1));
856 ASSERT_EQ(1, /* read and cache data block */
857 TestGetTickerCount(options, BLOCK_CACHE_ADD));
858
859 ASSERT_OK(Put(1, "1_key2", "val2"));
860 ASSERT_OK(Put(1, "9_key2", "val2"));
861 // Create a new SST file. This will further trigger a compaction
862 // and generate another file.
863 ASSERT_OK(Flush(1));
864 ASSERT_OK(dbfull()->TEST_WaitForCompact());
865 ASSERT_EQ(3, /* Totally 3 files created up to now */
866 TestGetTickerCount(options, BLOCK_CACHE_ADD));
867
868 // After disabling options.paranoid_file_checks. NO further block
869 // is added after generating a new file.
870 ASSERT_OK(
871 dbfull()->SetOptions(handles_[1], {{"paranoid_file_checks", "false"}}));
872
873 ASSERT_OK(Put(1, "1_key3", "val3"));
874 ASSERT_OK(Put(1, "9_key3", "val3"));
875 ASSERT_OK(Flush(1));
876 ASSERT_OK(Put(1, "1_key4", "val4"));
877 ASSERT_OK(Put(1, "9_key4", "val4"));
878 ASSERT_OK(Flush(1));
879 ASSERT_OK(dbfull()->TEST_WaitForCompact());
880 ASSERT_EQ(3, /* Totally 3 files created up to now */
881 TestGetTickerCount(options, BLOCK_CACHE_ADD));
882 }
883
TEST_F(DBBlockCacheTest,CompressedCache)884 TEST_F(DBBlockCacheTest, CompressedCache) {
885 if (!Snappy_Supported()) {
886 return;
887 }
888 int num_iter = 80;
889
890 // Run this test three iterations.
891 // Iteration 1: only a uncompressed block cache
892 // Iteration 2: only a compressed block cache
893 // Iteration 3: both block cache and compressed cache
894 // Iteration 4: both block cache and compressed cache, but DB is not
895 // compressed
896 for (int iter = 0; iter < 4; iter++) {
897 Options options = CurrentOptions();
898 options.write_buffer_size = 64 * 1024; // small write buffer
899 options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
900
901 BlockBasedTableOptions table_options;
902 switch (iter) {
903 case 0:
904 // only uncompressed block cache
905 table_options.block_cache = NewLRUCache(8 * 1024);
906 table_options.block_cache_compressed = nullptr;
907 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
908 break;
909 case 1:
910 // no block cache, only compressed cache
911 table_options.no_block_cache = true;
912 table_options.block_cache = nullptr;
913 table_options.block_cache_compressed = NewLRUCache(8 * 1024);
914 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
915 break;
916 case 2:
917 // both compressed and uncompressed block cache
918 table_options.block_cache = NewLRUCache(1024);
919 table_options.block_cache_compressed = NewLRUCache(8 * 1024);
920 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
921 break;
922 case 3:
923 // both block cache and compressed cache, but DB is not compressed
924 // also, make block cache sizes bigger, to trigger block cache hits
925 table_options.block_cache = NewLRUCache(1024 * 1024);
926 table_options.block_cache_compressed = NewLRUCache(8 * 1024 * 1024);
927 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
928 options.compression = kNoCompression;
929 break;
930 default:
931 FAIL();
932 }
933 CreateAndReopenWithCF({"pikachu"}, options);
934 // default column family doesn't have block cache
935 Options no_block_cache_opts;
936 no_block_cache_opts.statistics = options.statistics;
937 no_block_cache_opts = CurrentOptions(no_block_cache_opts);
938 BlockBasedTableOptions table_options_no_bc;
939 table_options_no_bc.no_block_cache = true;
940 no_block_cache_opts.table_factory.reset(
941 NewBlockBasedTableFactory(table_options_no_bc));
942 ReopenWithColumnFamilies(
943 {"default", "pikachu"},
944 std::vector<Options>({no_block_cache_opts, options}));
945
946 Random rnd(301);
947
948 // Write 8MB (80 values, each 100K)
949 ASSERT_EQ(NumTableFilesAtLevel(0, 1), 0);
950 std::vector<std::string> values;
951 std::string str;
952 for (int i = 0; i < num_iter; i++) {
953 if (i % 4 == 0) { // high compression ratio
954 str = rnd.RandomString(1000);
955 }
956 values.push_back(str);
957 ASSERT_OK(Put(1, Key(i), values[i]));
958 }
959
960 // flush all data from memtable so that reads are from block cache
961 ASSERT_OK(Flush(1));
962
963 for (int i = 0; i < num_iter; i++) {
964 ASSERT_EQ(Get(1, Key(i)), values[i]);
965 }
966
967 // check that we triggered the appropriate code paths in the cache
968 switch (iter) {
969 case 0:
970 // only uncompressed block cache
971 ASSERT_GT(TestGetTickerCount(options, BLOCK_CACHE_MISS), 0);
972 ASSERT_EQ(TestGetTickerCount(options, BLOCK_CACHE_COMPRESSED_MISS), 0);
973 break;
974 case 1:
975 // no block cache, only compressed cache
976 ASSERT_EQ(TestGetTickerCount(options, BLOCK_CACHE_MISS), 0);
977 ASSERT_GT(TestGetTickerCount(options, BLOCK_CACHE_COMPRESSED_MISS), 0);
978 break;
979 case 2:
980 // both compressed and uncompressed block cache
981 ASSERT_GT(TestGetTickerCount(options, BLOCK_CACHE_MISS), 0);
982 ASSERT_GT(TestGetTickerCount(options, BLOCK_CACHE_COMPRESSED_MISS), 0);
983 break;
984 case 3:
985 // both compressed and uncompressed block cache
986 ASSERT_GT(TestGetTickerCount(options, BLOCK_CACHE_MISS), 0);
987 ASSERT_GT(TestGetTickerCount(options, BLOCK_CACHE_HIT), 0);
988 ASSERT_GT(TestGetTickerCount(options, BLOCK_CACHE_COMPRESSED_MISS), 0);
989 // compressed doesn't have any hits since blocks are not compressed on
990 // storage
991 ASSERT_EQ(TestGetTickerCount(options, BLOCK_CACHE_COMPRESSED_HIT), 0);
992 break;
993 default:
994 FAIL();
995 }
996
997 options.create_if_missing = true;
998 DestroyAndReopen(options);
999 }
1000 }
1001
TEST_F(DBBlockCacheTest,CacheCompressionDict)1002 TEST_F(DBBlockCacheTest, CacheCompressionDict) {
1003 const int kNumFiles = 4;
1004 const int kNumEntriesPerFile = 128;
1005 const int kNumBytesPerEntry = 1024;
1006
1007 // Try all the available libraries that support dictionary compression
1008 std::vector<CompressionType> compression_types;
1009 if (Zlib_Supported()) {
1010 compression_types.push_back(kZlibCompression);
1011 }
1012 if (LZ4_Supported()) {
1013 compression_types.push_back(kLZ4Compression);
1014 compression_types.push_back(kLZ4HCCompression);
1015 }
1016 if (ZSTD_Supported()) {
1017 compression_types.push_back(kZSTD);
1018 } else if (ZSTDNotFinal_Supported()) {
1019 compression_types.push_back(kZSTDNotFinalCompression);
1020 }
1021 Random rnd(301);
1022 for (auto compression_type : compression_types) {
1023 Options options = CurrentOptions();
1024 options.bottommost_compression = compression_type;
1025 options.bottommost_compression_opts.max_dict_bytes = 4096;
1026 options.bottommost_compression_opts.enabled = true;
1027 options.create_if_missing = true;
1028 options.num_levels = 2;
1029 options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
1030 options.target_file_size_base = kNumEntriesPerFile * kNumBytesPerEntry;
1031 BlockBasedTableOptions table_options;
1032 table_options.cache_index_and_filter_blocks = true;
1033 table_options.block_cache.reset(new MockCache());
1034 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
1035 DestroyAndReopen(options);
1036
1037 RecordCacheCountersForCompressionDict(options);
1038
1039 for (int i = 0; i < kNumFiles; ++i) {
1040 ASSERT_EQ(i, NumTableFilesAtLevel(0, 0));
1041 for (int j = 0; j < kNumEntriesPerFile; ++j) {
1042 std::string value = rnd.RandomString(kNumBytesPerEntry);
1043 ASSERT_OK(Put(Key(j * kNumFiles + i), value.c_str()));
1044 }
1045 ASSERT_OK(Flush());
1046 }
1047 ASSERT_OK(dbfull()->TEST_WaitForCompact());
1048 ASSERT_EQ(0, NumTableFilesAtLevel(0));
1049 ASSERT_EQ(kNumFiles, NumTableFilesAtLevel(1));
1050
1051 // Compression dictionary blocks are preloaded.
1052 CheckCacheCountersForCompressionDict(
1053 options, kNumFiles /* expected_compression_dict_misses */,
1054 0 /* expected_compression_dict_hits */,
1055 kNumFiles /* expected_compression_dict_inserts */);
1056
1057 // Seek to a key in a file. It should cause the SST's dictionary meta-block
1058 // to be read.
1059 RecordCacheCounters(options);
1060 RecordCacheCountersForCompressionDict(options);
1061 ReadOptions read_options;
1062 ASSERT_NE("NOT_FOUND", Get(Key(kNumFiles * kNumEntriesPerFile - 1)));
1063 // Two block hits: index and dictionary since they are prefetched
1064 // One block missed/added: data block
1065 CheckCacheCounters(options, 1 /* expected_misses */, 2 /* expected_hits */,
1066 1 /* expected_inserts */, 0 /* expected_failures */);
1067 CheckCacheCountersForCompressionDict(
1068 options, 0 /* expected_compression_dict_misses */,
1069 1 /* expected_compression_dict_hits */,
1070 0 /* expected_compression_dict_inserts */);
1071 }
1072 }
1073
ClearCache(Cache * cache)1074 static void ClearCache(Cache* cache) {
1075 auto roles = CopyCacheDeleterRoleMap();
1076 std::deque<std::string> keys;
1077 Cache::ApplyToAllEntriesOptions opts;
1078 auto callback = [&](const Slice& key, void* /*value*/, size_t /*charge*/,
1079 Cache::DeleterFn deleter) {
1080 if (roles.find(deleter) == roles.end()) {
1081 // Keep the stats collector
1082 return;
1083 }
1084 keys.push_back(key.ToString());
1085 };
1086 cache->ApplyToAllEntries(callback, opts);
1087 for (auto& k : keys) {
1088 cache->Erase(k);
1089 }
1090 }
1091
TEST_F(DBBlockCacheTest,CacheEntryRoleStats)1092 TEST_F(DBBlockCacheTest, CacheEntryRoleStats) {
1093 const size_t capacity = size_t{1} << 25;
1094 int iterations_tested = 0;
1095 for (bool partition : {false, true}) {
1096 for (std::shared_ptr<Cache> cache :
1097 {NewLRUCache(capacity), NewClockCache(capacity)}) {
1098 if (!cache) {
1099 // Skip clock cache when not supported
1100 continue;
1101 }
1102 ++iterations_tested;
1103
1104 Options options = CurrentOptions();
1105 SetTimeElapseOnlySleepOnReopen(&options);
1106 options.create_if_missing = true;
1107 options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
1108 options.max_open_files = 13;
1109 options.table_cache_numshardbits = 0;
1110 // If this wakes up, it could interfere with test
1111 options.stats_dump_period_sec = 0;
1112
1113 BlockBasedTableOptions table_options;
1114 table_options.block_cache = cache;
1115 table_options.cache_index_and_filter_blocks = true;
1116 table_options.filter_policy.reset(NewBloomFilterPolicy(50));
1117 if (partition) {
1118 table_options.index_type = BlockBasedTableOptions::kTwoLevelIndexSearch;
1119 table_options.partition_filters = true;
1120 }
1121 table_options.metadata_cache_options.top_level_index_pinning =
1122 PinningTier::kNone;
1123 table_options.metadata_cache_options.partition_pinning =
1124 PinningTier::kNone;
1125 table_options.metadata_cache_options.unpartitioned_pinning =
1126 PinningTier::kNone;
1127 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
1128 DestroyAndReopen(options);
1129
1130 // Create a new table.
1131 ASSERT_OK(Put("foo", "value"));
1132 ASSERT_OK(Put("bar", "value"));
1133 ASSERT_OK(Flush());
1134
1135 ASSERT_OK(Put("zfoo", "value"));
1136 ASSERT_OK(Put("zbar", "value"));
1137 ASSERT_OK(Flush());
1138
1139 ASSERT_EQ(2, NumTableFilesAtLevel(0));
1140
1141 // Fresh cache
1142 ClearCache(cache.get());
1143
1144 std::array<size_t, kNumCacheEntryRoles> expected{};
1145 // For CacheEntryStatsCollector
1146 expected[static_cast<size_t>(CacheEntryRole::kMisc)] = 1;
1147 EXPECT_EQ(expected, GetCacheEntryRoleCountsBg());
1148
1149 std::array<size_t, kNumCacheEntryRoles> prev_expected = expected;
1150
1151 // First access only filters
1152 ASSERT_EQ("NOT_FOUND", Get("different from any key added"));
1153 expected[static_cast<size_t>(CacheEntryRole::kFilterBlock)] += 2;
1154 if (partition) {
1155 expected[static_cast<size_t>(CacheEntryRole::kFilterMetaBlock)] += 2;
1156 }
1157 // Within some time window, we will get cached entry stats
1158 EXPECT_EQ(prev_expected, GetCacheEntryRoleCountsBg());
1159 // Not enough to force a miss
1160 env_->MockSleepForSeconds(45);
1161 EXPECT_EQ(prev_expected, GetCacheEntryRoleCountsBg());
1162 // Enough to force a miss
1163 env_->MockSleepForSeconds(601);
1164 EXPECT_EQ(expected, GetCacheEntryRoleCountsBg());
1165
1166 // Now access index and data block
1167 ASSERT_EQ("value", Get("foo"));
1168 expected[static_cast<size_t>(CacheEntryRole::kIndexBlock)]++;
1169 if (partition) {
1170 // top-level
1171 expected[static_cast<size_t>(CacheEntryRole::kIndexBlock)]++;
1172 }
1173 expected[static_cast<size_t>(CacheEntryRole::kDataBlock)]++;
1174 // Enough to force a miss
1175 env_->MockSleepForSeconds(601);
1176 // But inject a simulated long scan so that we need a longer
1177 // interval to force a miss next time.
1178 SyncPoint::GetInstance()->SetCallBack(
1179 "CacheEntryStatsCollector::GetStats:AfterApplyToAllEntries",
1180 [this](void*) {
1181 // To spend no more than 0.2% of time scanning, we would need
1182 // interval of at least 10000s
1183 env_->MockSleepForSeconds(20);
1184 });
1185 SyncPoint::GetInstance()->EnableProcessing();
1186 EXPECT_EQ(expected, GetCacheEntryRoleCountsBg());
1187 prev_expected = expected;
1188 SyncPoint::GetInstance()->DisableProcessing();
1189 SyncPoint::GetInstance()->ClearAllCallBacks();
1190
1191 // The same for other file
1192 ASSERT_EQ("value", Get("zfoo"));
1193 expected[static_cast<size_t>(CacheEntryRole::kIndexBlock)]++;
1194 if (partition) {
1195 // top-level
1196 expected[static_cast<size_t>(CacheEntryRole::kIndexBlock)]++;
1197 }
1198 expected[static_cast<size_t>(CacheEntryRole::kDataBlock)]++;
1199 // Because of the simulated long scan, this is not enough to force
1200 // a miss
1201 env_->MockSleepForSeconds(601);
1202 EXPECT_EQ(prev_expected, GetCacheEntryRoleCountsBg());
1203 // But this is enough
1204 env_->MockSleepForSeconds(10000);
1205 EXPECT_EQ(expected, GetCacheEntryRoleCountsBg());
1206 prev_expected = expected;
1207
1208 // Also check the GetProperty interface
1209 std::map<std::string, std::string> values;
1210 ASSERT_TRUE(
1211 db_->GetMapProperty(DB::Properties::kBlockCacheEntryStats, &values));
1212
1213 EXPECT_EQ(
1214 ToString(expected[static_cast<size_t>(CacheEntryRole::kIndexBlock)]),
1215 values["count.index-block"]);
1216 EXPECT_EQ(
1217 ToString(expected[static_cast<size_t>(CacheEntryRole::kDataBlock)]),
1218 values["count.data-block"]);
1219 EXPECT_EQ(
1220 ToString(expected[static_cast<size_t>(CacheEntryRole::kFilterBlock)]),
1221 values["count.filter-block"]);
1222 EXPECT_EQ(
1223 ToString(
1224 prev_expected[static_cast<size_t>(CacheEntryRole::kWriteBuffer)]),
1225 values["count.write-buffer"]);
1226 EXPECT_EQ(ToString(expected[static_cast<size_t>(CacheEntryRole::kMisc)]),
1227 values["count.misc"]);
1228
1229 // Add one for kWriteBuffer
1230 {
1231 WriteBufferManager wbm(size_t{1} << 20, cache);
1232 wbm.ReserveMem(1024);
1233 expected[static_cast<size_t>(CacheEntryRole::kWriteBuffer)]++;
1234 // Now we check that the GetProperty interface is more agressive about
1235 // re-scanning stats, but not totally aggressive.
1236 // Within some time window, we will get cached entry stats
1237 env_->MockSleepForSeconds(1);
1238 EXPECT_EQ(ToString(prev_expected[static_cast<size_t>(
1239 CacheEntryRole::kWriteBuffer)]),
1240 values["count.write-buffer"]);
1241 // Not enough for a "background" miss but enough for a "foreground" miss
1242 env_->MockSleepForSeconds(45);
1243
1244 ASSERT_TRUE(db_->GetMapProperty(DB::Properties::kBlockCacheEntryStats,
1245 &values));
1246 EXPECT_EQ(
1247 ToString(
1248 expected[static_cast<size_t>(CacheEntryRole::kWriteBuffer)]),
1249 values["count.write-buffer"]);
1250 }
1251 prev_expected = expected;
1252
1253 // With collector pinned in cache, we should be able to hit
1254 // even if the cache is full
1255 ClearCache(cache.get());
1256 Cache::Handle* h = nullptr;
1257 ASSERT_OK(cache->Insert("Fill-it-up", nullptr, capacity + 1,
1258 GetNoopDeleterForRole<CacheEntryRole::kMisc>(),
1259 &h, Cache::Priority::HIGH));
1260 ASSERT_GT(cache->GetUsage(), cache->GetCapacity());
1261 expected = {};
1262 // For CacheEntryStatsCollector
1263 expected[static_cast<size_t>(CacheEntryRole::kMisc)] = 1;
1264 // For Fill-it-up
1265 expected[static_cast<size_t>(CacheEntryRole::kMisc)]++;
1266 // Still able to hit on saved stats
1267 EXPECT_EQ(prev_expected, GetCacheEntryRoleCountsBg());
1268 // Enough to force a miss
1269 env_->MockSleepForSeconds(1000);
1270 EXPECT_EQ(expected, GetCacheEntryRoleCountsBg());
1271
1272 cache->Release(h);
1273
1274 // Now we test that the DB mutex is not held during scans, for the ways
1275 // we know how to (possibly) trigger them. Without a better good way to
1276 // check this, we simply inject an acquire & release of the DB mutex
1277 // deep in the stat collection code. If we were already holding the
1278 // mutex, that is UB that would at least be found by TSAN.
1279 int scan_count = 0;
1280 SyncPoint::GetInstance()->SetCallBack(
1281 "CacheEntryStatsCollector::GetStats:AfterApplyToAllEntries",
1282 [this, &scan_count](void*) {
1283 dbfull()->TEST_LockMutex();
1284 dbfull()->TEST_UnlockMutex();
1285 ++scan_count;
1286 });
1287 SyncPoint::GetInstance()->EnableProcessing();
1288
1289 // Different things that might trigger a scan, with mock sleeps to
1290 // force a miss.
1291 env_->MockSleepForSeconds(10000);
1292 dbfull()->DumpStats();
1293 ASSERT_EQ(scan_count, 1);
1294
1295 env_->MockSleepForSeconds(10000);
1296 ASSERT_TRUE(
1297 db_->GetMapProperty(DB::Properties::kBlockCacheEntryStats, &values));
1298 ASSERT_EQ(scan_count, 2);
1299
1300 env_->MockSleepForSeconds(10000);
1301 std::string value_str;
1302 ASSERT_TRUE(
1303 db_->GetProperty(DB::Properties::kBlockCacheEntryStats, &value_str));
1304 ASSERT_EQ(scan_count, 3);
1305
1306 env_->MockSleepForSeconds(10000);
1307 ASSERT_TRUE(db_->GetProperty(DB::Properties::kCFStats, &value_str));
1308 // To match historical speed, querying this property no longer triggers
1309 // a scan, even if results are old. But periodic dump stats should keep
1310 // things reasonably updated.
1311 ASSERT_EQ(scan_count, /*unchanged*/ 3);
1312
1313 SyncPoint::GetInstance()->DisableProcessing();
1314 SyncPoint::GetInstance()->ClearAllCallBacks();
1315 }
1316 EXPECT_GE(iterations_tested, 1);
1317 }
1318 }
1319
1320 #endif // ROCKSDB_LITE
1321
1322 class DBBlockCacheKeyTest
1323 : public DBTestBase,
1324 public testing::WithParamInterface<std::tuple<bool, bool>> {
1325 public:
DBBlockCacheKeyTest()1326 DBBlockCacheKeyTest()
1327 : DBTestBase("db_block_cache_test", /*env_do_fsync=*/false) {}
1328
SetUp()1329 void SetUp() override {
1330 use_compressed_cache_ = std::get<0>(GetParam());
1331 exclude_file_numbers_ = std::get<1>(GetParam());
1332 }
1333
1334 bool use_compressed_cache_;
1335 bool exclude_file_numbers_;
1336 };
1337
1338 // Disable LinkFile so that we can physically copy a DB using Checkpoint.
1339 // Disable file GetUniqueId to enable stable cache keys.
1340 class StableCacheKeyTestFS : public FaultInjectionTestFS {
1341 public:
StableCacheKeyTestFS(const std::shared_ptr<FileSystem> & base)1342 explicit StableCacheKeyTestFS(const std::shared_ptr<FileSystem>& base)
1343 : FaultInjectionTestFS(base) {
1344 SetFailGetUniqueId(true);
1345 }
1346
~StableCacheKeyTestFS()1347 virtual ~StableCacheKeyTestFS() override {}
1348
LinkFile(const std::string &,const std::string &,const IOOptions &,IODebugContext *)1349 IOStatus LinkFile(const std::string&, const std::string&, const IOOptions&,
1350 IODebugContext*) override {
1351 return IOStatus::NotSupported("Disabled");
1352 }
1353 };
1354
TEST_P(DBBlockCacheKeyTest,StableCacheKeys)1355 TEST_P(DBBlockCacheKeyTest, StableCacheKeys) {
1356 std::shared_ptr<StableCacheKeyTestFS> test_fs{
1357 new StableCacheKeyTestFS(env_->GetFileSystem())};
1358 std::unique_ptr<CompositeEnvWrapper> test_env{
1359 new CompositeEnvWrapper(env_, test_fs)};
1360
1361 Options options = CurrentOptions();
1362 options.create_if_missing = true;
1363 options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
1364 options.env = test_env.get();
1365
1366 BlockBasedTableOptions table_options;
1367
1368 int key_count = 0;
1369 uint64_t expected_stat = 0;
1370
1371 std::function<void()> verify_stats;
1372 if (use_compressed_cache_) {
1373 if (!Snappy_Supported()) {
1374 ROCKSDB_GTEST_SKIP("Compressed cache test requires snappy support");
1375 return;
1376 }
1377 options.compression = CompressionType::kSnappyCompression;
1378 table_options.no_block_cache = true;
1379 table_options.block_cache_compressed = NewLRUCache(1 << 25, 0, false);
1380 verify_stats = [&options, &expected_stat] {
1381 // One for ordinary SST file and one for external SST file
1382 ASSERT_EQ(expected_stat,
1383 options.statistics->getTickerCount(BLOCK_CACHE_COMPRESSED_ADD));
1384 };
1385 } else {
1386 table_options.cache_index_and_filter_blocks = true;
1387 table_options.block_cache = NewLRUCache(1 << 25, 0, false);
1388 verify_stats = [&options, &expected_stat] {
1389 ASSERT_EQ(expected_stat,
1390 options.statistics->getTickerCount(BLOCK_CACHE_DATA_ADD));
1391 ASSERT_EQ(expected_stat,
1392 options.statistics->getTickerCount(BLOCK_CACHE_INDEX_ADD));
1393 ASSERT_EQ(expected_stat,
1394 options.statistics->getTickerCount(BLOCK_CACHE_FILTER_ADD));
1395 };
1396 }
1397
1398 table_options.filter_policy.reset(NewBloomFilterPolicy(10, false));
1399 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
1400 CreateAndReopenWithCF({"koko"}, options);
1401
1402 if (exclude_file_numbers_) {
1403 // Simulate something like old behavior without file numbers in properties.
1404 // This is a "control" side of the test that also ensures safely degraded
1405 // behavior on old files.
1406 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
1407 "PropertyBlockBuilder::AddTableProperty:Start", [&](void* arg) {
1408 TableProperties* props = reinterpret_cast<TableProperties*>(arg);
1409 props->orig_file_number = 0;
1410 });
1411 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
1412 }
1413
1414 std::function<void()> perform_gets = [&key_count, &expected_stat, this]() {
1415 if (exclude_file_numbers_) {
1416 // No cache key reuse should happen, because we can't rely on current
1417 // file number being stable
1418 expected_stat += key_count;
1419 } else {
1420 // Cache keys should be stable
1421 expected_stat = key_count;
1422 }
1423 for (int i = 0; i < key_count; ++i) {
1424 ASSERT_EQ(Get(1, Key(i)), "abc");
1425 }
1426 };
1427
1428 // Ordinary SST files with same session id
1429 const std::string something_compressible(500U, 'x');
1430 for (int i = 0; i < 2; ++i) {
1431 ASSERT_OK(Put(1, Key(key_count), "abc"));
1432 ASSERT_OK(Put(1, Key(key_count) + "a", something_compressible));
1433 ASSERT_OK(Flush(1));
1434 ++key_count;
1435 }
1436
1437 #ifndef ROCKSDB_LITE
1438 // Save an export of those ordinary SST files for later
1439 std::string export_files_dir = dbname_ + "/exported";
1440 ExportImportFilesMetaData* metadata_ptr_ = nullptr;
1441 Checkpoint* checkpoint;
1442 ASSERT_OK(Checkpoint::Create(db_, &checkpoint));
1443 ASSERT_OK(checkpoint->ExportColumnFamily(handles_[1], export_files_dir,
1444 &metadata_ptr_));
1445 ASSERT_NE(metadata_ptr_, nullptr);
1446 delete checkpoint;
1447 checkpoint = nullptr;
1448
1449 // External SST files with same session id
1450 SstFileWriter sst_file_writer(EnvOptions(), options);
1451 std::vector<std::string> external;
1452 for (int i = 0; i < 2; ++i) {
1453 std::string f = dbname_ + "/external" + ToString(i) + ".sst";
1454 external.push_back(f);
1455 ASSERT_OK(sst_file_writer.Open(f));
1456 ASSERT_OK(sst_file_writer.Put(Key(key_count), "abc"));
1457 ASSERT_OK(
1458 sst_file_writer.Put(Key(key_count) + "a", something_compressible));
1459 ++key_count;
1460 ExternalSstFileInfo external_info;
1461 ASSERT_OK(sst_file_writer.Finish(&external_info));
1462 IngestExternalFileOptions ingest_opts;
1463 ASSERT_OK(db_->IngestExternalFile(handles_[1], {f}, ingest_opts));
1464 }
1465
1466 if (exclude_file_numbers_) {
1467 // FIXME(peterd): figure out where these extra two ADDs are coming from
1468 options.statistics->recordTick(BLOCK_CACHE_INDEX_ADD,
1469 uint64_t{0} - uint64_t{2});
1470 options.statistics->recordTick(BLOCK_CACHE_FILTER_ADD,
1471 uint64_t{0} - uint64_t{2});
1472 options.statistics->recordTick(BLOCK_CACHE_COMPRESSED_ADD,
1473 uint64_t{0} - uint64_t{2});
1474 }
1475 #endif
1476
1477 perform_gets();
1478 verify_stats();
1479
1480 // Make sure we can cache hit after re-open
1481 ReopenWithColumnFamilies({"default", "koko"}, options);
1482
1483 perform_gets();
1484 verify_stats();
1485
1486 // Make sure we can cache hit even on a full copy of the DB. Using
1487 // StableCacheKeyTestFS, Checkpoint will resort to full copy not hard link.
1488 // (Checkpoint not available in LITE mode to test this.)
1489 #ifndef ROCKSDB_LITE
1490 auto db_copy_name = dbname_ + "-copy";
1491 ASSERT_OK(Checkpoint::Create(db_, &checkpoint));
1492 ASSERT_OK(checkpoint->CreateCheckpoint(db_copy_name));
1493 delete checkpoint;
1494
1495 Close();
1496 Destroy(options);
1497
1498 // Switch to the DB copy
1499 SaveAndRestore<std::string> save_dbname(&dbname_, db_copy_name);
1500 ReopenWithColumnFamilies({"default", "koko"}, options);
1501
1502 perform_gets();
1503 verify_stats();
1504
1505 // And ensure that re-importing + ingesting the same files into a
1506 // different DB uses same cache keys
1507 DestroyAndReopen(options);
1508
1509 ColumnFamilyHandle* cfh = nullptr;
1510 ASSERT_OK(db_->CreateColumnFamilyWithImport(ColumnFamilyOptions(), "yoyo",
1511 ImportColumnFamilyOptions(),
1512 *metadata_ptr_, &cfh));
1513 ASSERT_NE(cfh, nullptr);
1514 delete cfh;
1515 cfh = nullptr;
1516 delete metadata_ptr_;
1517 metadata_ptr_ = nullptr;
1518
1519 DestroyDB(export_files_dir, options);
1520
1521 ReopenWithColumnFamilies({"default", "yoyo"}, options);
1522
1523 IngestExternalFileOptions ingest_opts;
1524 ASSERT_OK(db_->IngestExternalFile(handles_[1], {external}, ingest_opts));
1525
1526 if (exclude_file_numbers_) {
1527 // FIXME(peterd): figure out where these extra two ADDs are coming from
1528 options.statistics->recordTick(BLOCK_CACHE_INDEX_ADD,
1529 uint64_t{0} - uint64_t{2});
1530 options.statistics->recordTick(BLOCK_CACHE_FILTER_ADD,
1531 uint64_t{0} - uint64_t{2});
1532 }
1533
1534 perform_gets();
1535 verify_stats();
1536 #endif // !ROCKSDB_LITE
1537
1538 Close();
1539 Destroy(options);
1540 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
1541 }
1542
1543 INSTANTIATE_TEST_CASE_P(DBBlockCacheKeyTest, DBBlockCacheKeyTest,
1544 ::testing::Combine(::testing::Bool(),
1545 ::testing::Bool()));
1546
1547 class DBBlockCachePinningTest
1548 : public DBTestBase,
1549 public testing::WithParamInterface<
1550 std::tuple<bool, PinningTier, PinningTier, PinningTier>> {
1551 public:
DBBlockCachePinningTest()1552 DBBlockCachePinningTest()
1553 : DBTestBase("db_block_cache_test", /*env_do_fsync=*/false) {}
1554
SetUp()1555 void SetUp() override {
1556 partition_index_and_filters_ = std::get<0>(GetParam());
1557 top_level_index_pinning_ = std::get<1>(GetParam());
1558 partition_pinning_ = std::get<2>(GetParam());
1559 unpartitioned_pinning_ = std::get<3>(GetParam());
1560 }
1561
1562 bool partition_index_and_filters_;
1563 PinningTier top_level_index_pinning_;
1564 PinningTier partition_pinning_;
1565 PinningTier unpartitioned_pinning_;
1566 };
1567
TEST_P(DBBlockCachePinningTest,TwoLevelDB)1568 TEST_P(DBBlockCachePinningTest, TwoLevelDB) {
1569 // Creates one file in L0 and one file in L1. Both files have enough data that
1570 // their index and filter blocks are partitioned. The L1 file will also have
1571 // a compression dictionary (those are trained only during compaction), which
1572 // must be unpartitioned.
1573 const int kKeySize = 32;
1574 const int kBlockSize = 128;
1575 const int kNumBlocksPerFile = 128;
1576 const int kNumKeysPerFile = kBlockSize * kNumBlocksPerFile / kKeySize;
1577
1578 Options options = CurrentOptions();
1579 // `kNoCompression` makes the unit test more portable. But it relies on the
1580 // current behavior of persisting/accessing dictionary even when there's no
1581 // (de)compression happening, which seems fairly likely to change over time.
1582 options.compression = kNoCompression;
1583 options.compression_opts.max_dict_bytes = 4 << 10;
1584 options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
1585 BlockBasedTableOptions table_options;
1586 table_options.block_cache = NewLRUCache(1 << 20 /* capacity */);
1587 table_options.block_size = kBlockSize;
1588 table_options.metadata_block_size = kBlockSize;
1589 table_options.cache_index_and_filter_blocks = true;
1590 table_options.metadata_cache_options.top_level_index_pinning =
1591 top_level_index_pinning_;
1592 table_options.metadata_cache_options.partition_pinning = partition_pinning_;
1593 table_options.metadata_cache_options.unpartitioned_pinning =
1594 unpartitioned_pinning_;
1595 table_options.filter_policy.reset(
1596 NewBloomFilterPolicy(10 /* bits_per_key */));
1597 if (partition_index_and_filters_) {
1598 table_options.index_type =
1599 BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch;
1600 table_options.partition_filters = true;
1601 }
1602 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
1603 Reopen(options);
1604
1605 Random rnd(301);
1606 for (int i = 0; i < 2; ++i) {
1607 for (int j = 0; j < kNumKeysPerFile; ++j) {
1608 ASSERT_OK(Put(Key(i * kNumKeysPerFile + j), rnd.RandomString(kKeySize)));
1609 }
1610 ASSERT_OK(Flush());
1611 if (i == 0) {
1612 // Prevent trivial move so file will be rewritten with dictionary and
1613 // reopened with L1's pinning settings.
1614 CompactRangeOptions cro;
1615 cro.bottommost_level_compaction = BottommostLevelCompaction::kForce;
1616 ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr));
1617 }
1618 }
1619
1620 // Clear all unpinned blocks so unpinned blocks will show up as cache misses
1621 // when reading a key from a file.
1622 table_options.block_cache->EraseUnRefEntries();
1623
1624 // Get base cache values
1625 uint64_t filter_misses = TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS);
1626 uint64_t index_misses = TestGetTickerCount(options, BLOCK_CACHE_INDEX_MISS);
1627 uint64_t compression_dict_misses =
1628 TestGetTickerCount(options, BLOCK_CACHE_COMPRESSION_DICT_MISS);
1629
1630 // Read a key from the L0 file
1631 Get(Key(kNumKeysPerFile));
1632 uint64_t expected_filter_misses = filter_misses;
1633 uint64_t expected_index_misses = index_misses;
1634 uint64_t expected_compression_dict_misses = compression_dict_misses;
1635 if (partition_index_and_filters_) {
1636 if (top_level_index_pinning_ == PinningTier::kNone) {
1637 ++expected_filter_misses;
1638 ++expected_index_misses;
1639 }
1640 if (partition_pinning_ == PinningTier::kNone) {
1641 ++expected_filter_misses;
1642 ++expected_index_misses;
1643 }
1644 } else {
1645 if (unpartitioned_pinning_ == PinningTier::kNone) {
1646 ++expected_filter_misses;
1647 ++expected_index_misses;
1648 }
1649 }
1650 if (unpartitioned_pinning_ == PinningTier::kNone) {
1651 ++expected_compression_dict_misses;
1652 }
1653 ASSERT_EQ(expected_filter_misses,
1654 TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS));
1655 ASSERT_EQ(expected_index_misses,
1656 TestGetTickerCount(options, BLOCK_CACHE_INDEX_MISS));
1657 ASSERT_EQ(expected_compression_dict_misses,
1658 TestGetTickerCount(options, BLOCK_CACHE_COMPRESSION_DICT_MISS));
1659
1660 // Clear all unpinned blocks so unpinned blocks will show up as cache misses
1661 // when reading a key from a file.
1662 table_options.block_cache->EraseUnRefEntries();
1663
1664 // Read a key from the L1 file
1665 Get(Key(0));
1666 if (partition_index_and_filters_) {
1667 if (top_level_index_pinning_ == PinningTier::kNone ||
1668 top_level_index_pinning_ == PinningTier::kFlushedAndSimilar) {
1669 ++expected_filter_misses;
1670 ++expected_index_misses;
1671 }
1672 if (partition_pinning_ == PinningTier::kNone ||
1673 partition_pinning_ == PinningTier::kFlushedAndSimilar) {
1674 ++expected_filter_misses;
1675 ++expected_index_misses;
1676 }
1677 } else {
1678 if (unpartitioned_pinning_ == PinningTier::kNone ||
1679 unpartitioned_pinning_ == PinningTier::kFlushedAndSimilar) {
1680 ++expected_filter_misses;
1681 ++expected_index_misses;
1682 }
1683 }
1684 if (unpartitioned_pinning_ == PinningTier::kNone ||
1685 unpartitioned_pinning_ == PinningTier::kFlushedAndSimilar) {
1686 ++expected_compression_dict_misses;
1687 }
1688 ASSERT_EQ(expected_filter_misses,
1689 TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS));
1690 ASSERT_EQ(expected_index_misses,
1691 TestGetTickerCount(options, BLOCK_CACHE_INDEX_MISS));
1692 ASSERT_EQ(expected_compression_dict_misses,
1693 TestGetTickerCount(options, BLOCK_CACHE_COMPRESSION_DICT_MISS));
1694 }
1695
1696 INSTANTIATE_TEST_CASE_P(
1697 DBBlockCachePinningTest, DBBlockCachePinningTest,
1698 ::testing::Combine(
1699 ::testing::Bool(),
1700 ::testing::Values(PinningTier::kNone, PinningTier::kFlushedAndSimilar,
1701 PinningTier::kAll),
1702 ::testing::Values(PinningTier::kNone, PinningTier::kFlushedAndSimilar,
1703 PinningTier::kAll),
1704 ::testing::Values(PinningTier::kNone, PinningTier::kFlushedAndSimilar,
1705 PinningTier::kAll)));
1706
1707 } // namespace ROCKSDB_NAMESPACE
1708
main(int argc,char ** argv)1709 int main(int argc, char** argv) {
1710 ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
1711 ::testing::InitGoogleTest(&argc, argv);
1712 return RUN_ALL_TESTS();
1713 }
1714