1 //  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
2 //  This source code is licensed under both the GPLv2 (found in the
3 //  COPYING file in the root directory) and Apache 2.0 License
4 //  (found in the LICENSE.Apache file in the root directory).
5 //
6 // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
7 // Use of this source code is governed by a BSD-style license that can be
8 // found in the LICENSE file. See the AUTHORS file for names of contributors.
9 #include <cstdlib>
10 #include <functional>
11 #include <memory>
12 
13 #include "cache/cache_entry_roles.h"
14 #include "cache/lru_cache.h"
15 #include "db/column_family.h"
16 #include "db/db_test_util.h"
17 #include "port/stack_trace.h"
18 #include "rocksdb/statistics.h"
19 #include "rocksdb/table.h"
20 #include "util/compression.h"
21 #include "util/defer.h"
22 #include "util/random.h"
23 #include "utilities/fault_injection_fs.h"
24 
25 namespace ROCKSDB_NAMESPACE {
26 
27 class DBBlockCacheTest : public DBTestBase {
28  private:
29   size_t miss_count_ = 0;
30   size_t hit_count_ = 0;
31   size_t insert_count_ = 0;
32   size_t failure_count_ = 0;
33   size_t compression_dict_miss_count_ = 0;
34   size_t compression_dict_hit_count_ = 0;
35   size_t compression_dict_insert_count_ = 0;
36   size_t compressed_miss_count_ = 0;
37   size_t compressed_hit_count_ = 0;
38   size_t compressed_insert_count_ = 0;
39   size_t compressed_failure_count_ = 0;
40 
41  public:
42   const size_t kNumBlocks = 10;
43   const size_t kValueSize = 100;
44 
DBBlockCacheTest()45   DBBlockCacheTest()
46       : DBTestBase("db_block_cache_test", /*env_do_fsync=*/true) {}
47 
GetTableOptions()48   BlockBasedTableOptions GetTableOptions() {
49     BlockBasedTableOptions table_options;
50     // Set a small enough block size so that each key-value get its own block.
51     table_options.block_size = 1;
52     return table_options;
53   }
54 
GetOptions(const BlockBasedTableOptions & table_options)55   Options GetOptions(const BlockBasedTableOptions& table_options) {
56     Options options = CurrentOptions();
57     options.create_if_missing = true;
58     options.avoid_flush_during_recovery = false;
59     // options.compression = kNoCompression;
60     options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
61     options.table_factory.reset(NewBlockBasedTableFactory(table_options));
62     return options;
63   }
64 
InitTable(const Options &)65   void InitTable(const Options& /*options*/) {
66     std::string value(kValueSize, 'a');
67     for (size_t i = 0; i < kNumBlocks; i++) {
68       ASSERT_OK(Put(ToString(i), value.c_str()));
69     }
70   }
71 
RecordCacheCounters(const Options & options)72   void RecordCacheCounters(const Options& options) {
73     miss_count_ = TestGetTickerCount(options, BLOCK_CACHE_MISS);
74     hit_count_ = TestGetTickerCount(options, BLOCK_CACHE_HIT);
75     insert_count_ = TestGetTickerCount(options, BLOCK_CACHE_ADD);
76     failure_count_ = TestGetTickerCount(options, BLOCK_CACHE_ADD_FAILURES);
77     compressed_miss_count_ =
78         TestGetTickerCount(options, BLOCK_CACHE_COMPRESSED_MISS);
79     compressed_hit_count_ =
80         TestGetTickerCount(options, BLOCK_CACHE_COMPRESSED_HIT);
81     compressed_insert_count_ =
82         TestGetTickerCount(options, BLOCK_CACHE_COMPRESSED_ADD);
83     compressed_failure_count_ =
84         TestGetTickerCount(options, BLOCK_CACHE_COMPRESSED_ADD_FAILURES);
85   }
86 
RecordCacheCountersForCompressionDict(const Options & options)87   void RecordCacheCountersForCompressionDict(const Options& options) {
88     compression_dict_miss_count_ =
89         TestGetTickerCount(options, BLOCK_CACHE_COMPRESSION_DICT_MISS);
90     compression_dict_hit_count_ =
91         TestGetTickerCount(options, BLOCK_CACHE_COMPRESSION_DICT_HIT);
92     compression_dict_insert_count_ =
93         TestGetTickerCount(options, BLOCK_CACHE_COMPRESSION_DICT_ADD);
94   }
95 
CheckCacheCounters(const Options & options,size_t expected_misses,size_t expected_hits,size_t expected_inserts,size_t expected_failures)96   void CheckCacheCounters(const Options& options, size_t expected_misses,
97                           size_t expected_hits, size_t expected_inserts,
98                           size_t expected_failures) {
99     size_t new_miss_count = TestGetTickerCount(options, BLOCK_CACHE_MISS);
100     size_t new_hit_count = TestGetTickerCount(options, BLOCK_CACHE_HIT);
101     size_t new_insert_count = TestGetTickerCount(options, BLOCK_CACHE_ADD);
102     size_t new_failure_count =
103         TestGetTickerCount(options, BLOCK_CACHE_ADD_FAILURES);
104     ASSERT_EQ(miss_count_ + expected_misses, new_miss_count);
105     ASSERT_EQ(hit_count_ + expected_hits, new_hit_count);
106     ASSERT_EQ(insert_count_ + expected_inserts, new_insert_count);
107     ASSERT_EQ(failure_count_ + expected_failures, new_failure_count);
108     miss_count_ = new_miss_count;
109     hit_count_ = new_hit_count;
110     insert_count_ = new_insert_count;
111     failure_count_ = new_failure_count;
112   }
113 
CheckCacheCountersForCompressionDict(const Options & options,size_t expected_compression_dict_misses,size_t expected_compression_dict_hits,size_t expected_compression_dict_inserts)114   void CheckCacheCountersForCompressionDict(
115       const Options& options, size_t expected_compression_dict_misses,
116       size_t expected_compression_dict_hits,
117       size_t expected_compression_dict_inserts) {
118     size_t new_compression_dict_miss_count =
119         TestGetTickerCount(options, BLOCK_CACHE_COMPRESSION_DICT_MISS);
120     size_t new_compression_dict_hit_count =
121         TestGetTickerCount(options, BLOCK_CACHE_COMPRESSION_DICT_HIT);
122     size_t new_compression_dict_insert_count =
123         TestGetTickerCount(options, BLOCK_CACHE_COMPRESSION_DICT_ADD);
124     ASSERT_EQ(compression_dict_miss_count_ + expected_compression_dict_misses,
125               new_compression_dict_miss_count);
126     ASSERT_EQ(compression_dict_hit_count_ + expected_compression_dict_hits,
127               new_compression_dict_hit_count);
128     ASSERT_EQ(
129         compression_dict_insert_count_ + expected_compression_dict_inserts,
130         new_compression_dict_insert_count);
131     compression_dict_miss_count_ = new_compression_dict_miss_count;
132     compression_dict_hit_count_ = new_compression_dict_hit_count;
133     compression_dict_insert_count_ = new_compression_dict_insert_count;
134   }
135 
CheckCompressedCacheCounters(const Options & options,size_t expected_misses,size_t expected_hits,size_t expected_inserts,size_t expected_failures)136   void CheckCompressedCacheCounters(const Options& options,
137                                     size_t expected_misses,
138                                     size_t expected_hits,
139                                     size_t expected_inserts,
140                                     size_t expected_failures) {
141     size_t new_miss_count =
142         TestGetTickerCount(options, BLOCK_CACHE_COMPRESSED_MISS);
143     size_t new_hit_count =
144         TestGetTickerCount(options, BLOCK_CACHE_COMPRESSED_HIT);
145     size_t new_insert_count =
146         TestGetTickerCount(options, BLOCK_CACHE_COMPRESSED_ADD);
147     size_t new_failure_count =
148         TestGetTickerCount(options, BLOCK_CACHE_COMPRESSED_ADD_FAILURES);
149     ASSERT_EQ(compressed_miss_count_ + expected_misses, new_miss_count);
150     ASSERT_EQ(compressed_hit_count_ + expected_hits, new_hit_count);
151     ASSERT_EQ(compressed_insert_count_ + expected_inserts, new_insert_count);
152     ASSERT_EQ(compressed_failure_count_ + expected_failures, new_failure_count);
153     compressed_miss_count_ = new_miss_count;
154     compressed_hit_count_ = new_hit_count;
155     compressed_insert_count_ = new_insert_count;
156     compressed_failure_count_ = new_failure_count;
157   }
158 
159 #ifndef ROCKSDB_LITE
GetCacheEntryRoleCountsBg()160   const std::array<size_t, kNumCacheEntryRoles> GetCacheEntryRoleCountsBg() {
161     // Verify in cache entry role stats
162     ColumnFamilyHandleImpl* cfh =
163         static_cast<ColumnFamilyHandleImpl*>(dbfull()->DefaultColumnFamily());
164     InternalStats* internal_stats_ptr = cfh->cfd()->internal_stats();
165     InternalStats::CacheEntryRoleStats stats;
166     internal_stats_ptr->TEST_GetCacheEntryRoleStats(&stats,
167                                                     /*foreground=*/false);
168     return stats.entry_counts;
169   }
170 #endif  // ROCKSDB_LITE
171 };
172 
TEST_F(DBBlockCacheTest,IteratorBlockCacheUsage)173 TEST_F(DBBlockCacheTest, IteratorBlockCacheUsage) {
174   ReadOptions read_options;
175   read_options.fill_cache = false;
176   auto table_options = GetTableOptions();
177   auto options = GetOptions(table_options);
178   InitTable(options);
179 
180   LRUCacheOptions co;
181   co.capacity = 0;
182   co.num_shard_bits = 0;
183   co.strict_capacity_limit = false;
184   // Needed not to count entry stats collector
185   co.metadata_charge_policy = kDontChargeCacheMetadata;
186   std::shared_ptr<Cache> cache = NewLRUCache(co);
187   table_options.block_cache = cache;
188   options.table_factory.reset(NewBlockBasedTableFactory(table_options));
189   Reopen(options);
190   RecordCacheCounters(options);
191 
192   std::vector<std::unique_ptr<Iterator>> iterators(kNumBlocks - 1);
193   Iterator* iter = nullptr;
194 
195   ASSERT_EQ(0, cache->GetUsage());
196   iter = db_->NewIterator(read_options);
197   iter->Seek(ToString(0));
198   ASSERT_LT(0, cache->GetUsage());
199   delete iter;
200   iter = nullptr;
201   ASSERT_EQ(0, cache->GetUsage());
202 }
203 
TEST_F(DBBlockCacheTest,TestWithoutCompressedBlockCache)204 TEST_F(DBBlockCacheTest, TestWithoutCompressedBlockCache) {
205   ReadOptions read_options;
206   auto table_options = GetTableOptions();
207   auto options = GetOptions(table_options);
208   InitTable(options);
209 
210   LRUCacheOptions co;
211   co.capacity = 0;
212   co.num_shard_bits = 0;
213   co.strict_capacity_limit = false;
214   // Needed not to count entry stats collector
215   co.metadata_charge_policy = kDontChargeCacheMetadata;
216   std::shared_ptr<Cache> cache = NewLRUCache(co);
217   table_options.block_cache = cache;
218   options.table_factory.reset(NewBlockBasedTableFactory(table_options));
219   Reopen(options);
220   RecordCacheCounters(options);
221 
222   std::vector<std::unique_ptr<Iterator>> iterators(kNumBlocks - 1);
223   Iterator* iter = nullptr;
224 
225   // Load blocks into cache.
226   for (size_t i = 0; i + 1 < kNumBlocks; i++) {
227     iter = db_->NewIterator(read_options);
228     iter->Seek(ToString(i));
229     ASSERT_OK(iter->status());
230     CheckCacheCounters(options, 1, 0, 1, 0);
231     iterators[i].reset(iter);
232   }
233   size_t usage = cache->GetUsage();
234   ASSERT_LT(0, usage);
235   cache->SetCapacity(usage);
236   ASSERT_EQ(usage, cache->GetPinnedUsage());
237 
238   // Test with strict capacity limit.
239   cache->SetStrictCapacityLimit(true);
240   iter = db_->NewIterator(read_options);
241   iter->Seek(ToString(kNumBlocks - 1));
242   ASSERT_TRUE(iter->status().IsIncomplete());
243   CheckCacheCounters(options, 1, 0, 0, 1);
244   delete iter;
245   iter = nullptr;
246 
247   // Release iterators and access cache again.
248   for (size_t i = 0; i + 1 < kNumBlocks; i++) {
249     iterators[i].reset();
250     CheckCacheCounters(options, 0, 0, 0, 0);
251   }
252   ASSERT_EQ(0, cache->GetPinnedUsage());
253   for (size_t i = 0; i + 1 < kNumBlocks; i++) {
254     iter = db_->NewIterator(read_options);
255     iter->Seek(ToString(i));
256     ASSERT_OK(iter->status());
257     CheckCacheCounters(options, 0, 1, 0, 0);
258     iterators[i].reset(iter);
259   }
260 }
261 
262 #ifdef SNAPPY
TEST_F(DBBlockCacheTest,TestWithCompressedBlockCache)263 TEST_F(DBBlockCacheTest, TestWithCompressedBlockCache) {
264   Options options = CurrentOptions();
265   options.create_if_missing = true;
266   options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
267 
268   BlockBasedTableOptions table_options;
269   table_options.no_block_cache = true;
270   table_options.block_cache_compressed = nullptr;
271   table_options.block_size = 1;
272   table_options.filter_policy.reset(NewBloomFilterPolicy(20));
273   table_options.cache_index_and_filter_blocks = false;
274   options.table_factory.reset(NewBlockBasedTableFactory(table_options));
275   options.compression = CompressionType::kSnappyCompression;
276 
277   DestroyAndReopen(options);
278 
279   std::string value(kValueSize, 'a');
280   for (size_t i = 0; i < kNumBlocks; i++) {
281     ASSERT_OK(Put(ToString(i), value));
282     ASSERT_OK(Flush());
283   }
284 
285   ReadOptions read_options;
286   std::shared_ptr<Cache> compressed_cache = NewLRUCache(1 << 25, 0, false);
287   LRUCacheOptions co;
288   co.capacity = 0;
289   co.num_shard_bits = 0;
290   co.strict_capacity_limit = false;
291   // Needed not to count entry stats collector
292   co.metadata_charge_policy = kDontChargeCacheMetadata;
293   std::shared_ptr<Cache> cache = NewLRUCache(co);
294   table_options.block_cache = cache;
295   table_options.no_block_cache = false;
296   table_options.block_cache_compressed = compressed_cache;
297   table_options.max_auto_readahead_size = 0;
298   table_options.cache_index_and_filter_blocks = false;
299   options.table_factory.reset(NewBlockBasedTableFactory(table_options));
300   Reopen(options);
301   RecordCacheCounters(options);
302 
303   // Load blocks into cache.
304   for (size_t i = 0; i < kNumBlocks - 1; i++) {
305     ASSERT_EQ(value, Get(ToString(i)));
306     CheckCacheCounters(options, 1, 0, 1, 0);
307     CheckCompressedCacheCounters(options, 1, 0, 1, 0);
308   }
309 
310   size_t usage = cache->GetUsage();
311   ASSERT_EQ(0, usage);
312   ASSERT_EQ(usage, cache->GetPinnedUsage());
313   size_t compressed_usage = compressed_cache->GetUsage();
314   ASSERT_LT(0, compressed_usage);
315   // Compressed block cache cannot be pinned.
316   ASSERT_EQ(0, compressed_cache->GetPinnedUsage());
317 
318   // Set strict capacity limit flag. Now block will only load into compressed
319   // block cache.
320   cache->SetCapacity(usage);
321   cache->SetStrictCapacityLimit(true);
322   ASSERT_EQ(usage, cache->GetPinnedUsage());
323 
324   // Load last key block.
325   ASSERT_EQ("Result incomplete: Insert failed due to LRU cache being full.",
326             Get(ToString(kNumBlocks - 1)));
327   // Failure will also record the miss counter.
328   CheckCacheCounters(options, 1, 0, 0, 1);
329   CheckCompressedCacheCounters(options, 1, 0, 1, 0);
330 
331   // Clear strict capacity limit flag. This time we shall hit compressed block
332   // cache and load into block cache.
333   cache->SetStrictCapacityLimit(false);
334   // Load last key block.
335   ASSERT_EQ(value, Get(ToString(kNumBlocks - 1)));
336   CheckCacheCounters(options, 1, 0, 1, 0);
337   CheckCompressedCacheCounters(options, 0, 1, 0, 0);
338 }
339 #endif  // SNAPPY
340 
341 #ifndef ROCKSDB_LITE
342 
343 // Make sure that when options.block_cache is set, after a new table is
344 // created its index/filter blocks are added to block cache.
TEST_F(DBBlockCacheTest,IndexAndFilterBlocksOfNewTableAddedToCache)345 TEST_F(DBBlockCacheTest, IndexAndFilterBlocksOfNewTableAddedToCache) {
346   Options options = CurrentOptions();
347   options.create_if_missing = true;
348   options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
349   BlockBasedTableOptions table_options;
350   table_options.cache_index_and_filter_blocks = true;
351   table_options.filter_policy.reset(NewBloomFilterPolicy(20));
352   options.table_factory.reset(NewBlockBasedTableFactory(table_options));
353   CreateAndReopenWithCF({"pikachu"}, options);
354 
355   ASSERT_OK(Put(1, "key", "val"));
356   // Create a new table.
357   ASSERT_OK(Flush(1));
358 
359   // index/filter blocks added to block cache right after table creation.
360   ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_INDEX_MISS));
361   ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS));
362   ASSERT_EQ(2, /* only index/filter were added */
363             TestGetTickerCount(options, BLOCK_CACHE_ADD));
364   ASSERT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_DATA_MISS));
365   uint64_t int_num;
366   ASSERT_TRUE(
367       dbfull()->GetIntProperty("rocksdb.estimate-table-readers-mem", &int_num));
368   ASSERT_EQ(int_num, 0U);
369 
370   // Make sure filter block is in cache.
371   std::string value;
372   ReadOptions ropt;
373   db_->KeyMayExist(ReadOptions(), handles_[1], "key", &value);
374 
375   // Miss count should remain the same.
376   ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS));
377   ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT));
378 
379   db_->KeyMayExist(ReadOptions(), handles_[1], "key", &value);
380   ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS));
381   ASSERT_EQ(2, TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT));
382 
383   // Make sure index block is in cache.
384   auto index_block_hit = TestGetTickerCount(options, BLOCK_CACHE_INDEX_HIT);
385   value = Get(1, "key");
386   ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_INDEX_MISS));
387   ASSERT_EQ(index_block_hit + 1,
388             TestGetTickerCount(options, BLOCK_CACHE_INDEX_HIT));
389 
390   value = Get(1, "key");
391   ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_INDEX_MISS));
392   ASSERT_EQ(index_block_hit + 2,
393             TestGetTickerCount(options, BLOCK_CACHE_INDEX_HIT));
394 }
395 
396 // With fill_cache = false, fills up the cache, then iterates over the entire
397 // db, verify dummy entries inserted in `BlockBasedTable::NewDataBlockIterator`
398 // does not cause heap-use-after-free errors in COMPILE_WITH_ASAN=1 runs
TEST_F(DBBlockCacheTest,FillCacheAndIterateDB)399 TEST_F(DBBlockCacheTest, FillCacheAndIterateDB) {
400   ReadOptions read_options;
401   read_options.fill_cache = false;
402   auto table_options = GetTableOptions();
403   auto options = GetOptions(table_options);
404   InitTable(options);
405 
406   std::shared_ptr<Cache> cache = NewLRUCache(10, 0, true);
407   table_options.block_cache = cache;
408   options.table_factory.reset(NewBlockBasedTableFactory(table_options));
409   Reopen(options);
410   ASSERT_OK(Put("key1", "val1"));
411   ASSERT_OK(Put("key2", "val2"));
412   ASSERT_OK(Flush());
413   ASSERT_OK(Put("key3", "val3"));
414   ASSERT_OK(Put("key4", "val4"));
415   ASSERT_OK(Flush());
416   ASSERT_OK(Put("key5", "val5"));
417   ASSERT_OK(Put("key6", "val6"));
418   ASSERT_OK(Flush());
419 
420   Iterator* iter = nullptr;
421 
422   iter = db_->NewIterator(read_options);
423   iter->Seek(ToString(0));
424   while (iter->Valid()) {
425     iter->Next();
426   }
427   delete iter;
428   iter = nullptr;
429 }
430 
TEST_F(DBBlockCacheTest,IndexAndFilterBlocksStats)431 TEST_F(DBBlockCacheTest, IndexAndFilterBlocksStats) {
432   Options options = CurrentOptions();
433   options.create_if_missing = true;
434   options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
435   BlockBasedTableOptions table_options;
436   table_options.cache_index_and_filter_blocks = true;
437   LRUCacheOptions co;
438   // 500 bytes are enough to hold the first two blocks
439   co.capacity = 500;
440   co.num_shard_bits = 0;
441   co.strict_capacity_limit = false;
442   co.metadata_charge_policy = kDontChargeCacheMetadata;
443   std::shared_ptr<Cache> cache = NewLRUCache(co);
444   table_options.block_cache = cache;
445   table_options.filter_policy.reset(NewBloomFilterPolicy(20, true));
446   options.table_factory.reset(NewBlockBasedTableFactory(table_options));
447   CreateAndReopenWithCF({"pikachu"}, options);
448 
449   ASSERT_OK(Put(1, "longer_key", "val"));
450   // Create a new table
451   ASSERT_OK(Flush(1));
452   size_t index_bytes_insert =
453       TestGetTickerCount(options, BLOCK_CACHE_INDEX_BYTES_INSERT);
454   size_t filter_bytes_insert =
455       TestGetTickerCount(options, BLOCK_CACHE_FILTER_BYTES_INSERT);
456   ASSERT_GT(index_bytes_insert, 0);
457   ASSERT_GT(filter_bytes_insert, 0);
458   ASSERT_EQ(cache->GetUsage(), index_bytes_insert + filter_bytes_insert);
459   // set the cache capacity to the current usage
460   cache->SetCapacity(index_bytes_insert + filter_bytes_insert);
461   // The index and filter eviction statistics were broken by the refactoring
462   // that moved the readers out of the block cache. Disabling these until we can
463   // bring the stats back.
464   // ASSERT_EQ(TestGetTickerCount(options, BLOCK_CACHE_INDEX_BYTES_EVICT), 0);
465   // ASSERT_EQ(TestGetTickerCount(options, BLOCK_CACHE_FILTER_BYTES_EVICT), 0);
466   // Note that the second key needs to be no longer than the first one.
467   // Otherwise the second index block may not fit in cache.
468   ASSERT_OK(Put(1, "key", "val"));
469   // Create a new table
470   ASSERT_OK(Flush(1));
471   // cache evicted old index and block entries
472   ASSERT_GT(TestGetTickerCount(options, BLOCK_CACHE_INDEX_BYTES_INSERT),
473             index_bytes_insert);
474   ASSERT_GT(TestGetTickerCount(options, BLOCK_CACHE_FILTER_BYTES_INSERT),
475             filter_bytes_insert);
476   // The index and filter eviction statistics were broken by the refactoring
477   // that moved the readers out of the block cache. Disabling these until we can
478   // bring the stats back.
479   // ASSERT_EQ(TestGetTickerCount(options, BLOCK_CACHE_INDEX_BYTES_EVICT),
480   //           index_bytes_insert);
481   // ASSERT_EQ(TestGetTickerCount(options, BLOCK_CACHE_FILTER_BYTES_EVICT),
482   //           filter_bytes_insert);
483 }
484 
485 #if (defined OS_LINUX || defined OS_WIN)
TEST_F(DBBlockCacheTest,WarmCacheWithDataBlocksDuringFlush)486 TEST_F(DBBlockCacheTest, WarmCacheWithDataBlocksDuringFlush) {
487   Options options = CurrentOptions();
488   options.create_if_missing = true;
489   options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
490 
491   BlockBasedTableOptions table_options;
492   table_options.block_cache = NewLRUCache(1 << 25, 0, false);
493   table_options.cache_index_and_filter_blocks = false;
494   table_options.prepopulate_block_cache =
495       BlockBasedTableOptions::PrepopulateBlockCache::kFlushOnly;
496   options.table_factory.reset(NewBlockBasedTableFactory(table_options));
497   DestroyAndReopen(options);
498 
499   std::string value(kValueSize, 'a');
500   for (size_t i = 1; i <= kNumBlocks; i++) {
501     ASSERT_OK(Put(ToString(i), value));
502     ASSERT_OK(Flush());
503     ASSERT_EQ(i, options.statistics->getTickerCount(BLOCK_CACHE_DATA_ADD));
504     ASSERT_EQ(value, Get(ToString(i)));
505     ASSERT_EQ(0, options.statistics->getTickerCount(BLOCK_CACHE_DATA_MISS));
506     ASSERT_EQ(i, options.statistics->getTickerCount(BLOCK_CACHE_DATA_HIT));
507   }
508   // Verify compaction not counted
509   ASSERT_OK(db_->CompactRange(CompactRangeOptions(), /*begin=*/nullptr,
510                               /*end=*/nullptr));
511   EXPECT_EQ(kNumBlocks,
512             options.statistics->getTickerCount(BLOCK_CACHE_DATA_ADD));
513 }
514 
515 // This test cache data, index and filter blocks during flush.
TEST_F(DBBlockCacheTest,WarmCacheWithBlocksDuringFlush)516 TEST_F(DBBlockCacheTest, WarmCacheWithBlocksDuringFlush) {
517   Options options = CurrentOptions();
518   options.create_if_missing = true;
519   options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
520 
521   BlockBasedTableOptions table_options;
522   table_options.block_cache = NewLRUCache(1 << 25, 0, false);
523   table_options.cache_index_and_filter_blocks = true;
524   table_options.prepopulate_block_cache =
525       BlockBasedTableOptions::PrepopulateBlockCache::kFlushOnly;
526   table_options.filter_policy.reset(NewBloomFilterPolicy(10, false));
527   options.table_factory.reset(NewBlockBasedTableFactory(table_options));
528   DestroyAndReopen(options);
529 
530   std::string value(kValueSize, 'a');
531   for (size_t i = 1; i <= kNumBlocks; i++) {
532     ASSERT_OK(Put(ToString(i), value));
533     ASSERT_OK(Flush());
534     ASSERT_EQ(i, options.statistics->getTickerCount(BLOCK_CACHE_DATA_ADD));
535     ASSERT_EQ(i, options.statistics->getTickerCount(BLOCK_CACHE_INDEX_ADD));
536     ASSERT_EQ(i, options.statistics->getTickerCount(BLOCK_CACHE_FILTER_ADD));
537 
538     ASSERT_EQ(value, Get(ToString(i)));
539 
540     ASSERT_EQ(0, options.statistics->getTickerCount(BLOCK_CACHE_DATA_MISS));
541     ASSERT_EQ(i, options.statistics->getTickerCount(BLOCK_CACHE_DATA_HIT));
542 
543     ASSERT_EQ(0, options.statistics->getTickerCount(BLOCK_CACHE_INDEX_MISS));
544     ASSERT_EQ(i * 3, options.statistics->getTickerCount(BLOCK_CACHE_INDEX_HIT));
545 
546     ASSERT_EQ(0, options.statistics->getTickerCount(BLOCK_CACHE_FILTER_MISS));
547     ASSERT_EQ(i * 2,
548               options.statistics->getTickerCount(BLOCK_CACHE_FILTER_HIT));
549   }
550   // Verify compaction not counted
551   ASSERT_OK(db_->CompactRange(CompactRangeOptions(), /*begin=*/nullptr,
552                               /*end=*/nullptr));
553   EXPECT_EQ(kNumBlocks,
554             options.statistics->getTickerCount(BLOCK_CACHE_DATA_ADD));
555   // Index and filter blocks are automatically warmed when the new table file
556   // is automatically opened at the end of compaction. This is not easily
557   // disabled so results in the new index and filter blocks being warmed.
558   EXPECT_EQ(1 + kNumBlocks,
559             options.statistics->getTickerCount(BLOCK_CACHE_INDEX_ADD));
560   EXPECT_EQ(1 + kNumBlocks,
561             options.statistics->getTickerCount(BLOCK_CACHE_FILTER_ADD));
562 }
563 
TEST_F(DBBlockCacheTest,DynamicallyWarmCacheDuringFlush)564 TEST_F(DBBlockCacheTest, DynamicallyWarmCacheDuringFlush) {
565   Options options = CurrentOptions();
566   options.create_if_missing = true;
567   options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
568 
569   BlockBasedTableOptions table_options;
570   table_options.block_cache = NewLRUCache(1 << 25, 0, false);
571   table_options.cache_index_and_filter_blocks = false;
572   table_options.prepopulate_block_cache =
573       BlockBasedTableOptions::PrepopulateBlockCache::kFlushOnly;
574 
575   options.table_factory.reset(NewBlockBasedTableFactory(table_options));
576   DestroyAndReopen(options);
577 
578   std::string value(kValueSize, 'a');
579 
580   for (size_t i = 1; i <= 5; i++) {
581     ASSERT_OK(Put(ToString(i), value));
582     ASSERT_OK(Flush());
583     ASSERT_EQ(1,
584               options.statistics->getAndResetTickerCount(BLOCK_CACHE_DATA_ADD));
585 
586     ASSERT_EQ(value, Get(ToString(i)));
587     ASSERT_EQ(0,
588               options.statistics->getAndResetTickerCount(BLOCK_CACHE_DATA_ADD));
589     ASSERT_EQ(
590         0, options.statistics->getAndResetTickerCount(BLOCK_CACHE_DATA_MISS));
591     ASSERT_EQ(1,
592               options.statistics->getAndResetTickerCount(BLOCK_CACHE_DATA_HIT));
593   }
594 
595   ASSERT_OK(dbfull()->SetOptions(
596       {{"block_based_table_factory", "{prepopulate_block_cache=kDisable;}"}}));
597 
598   for (size_t i = 6; i <= kNumBlocks; i++) {
599     ASSERT_OK(Put(ToString(i), value));
600     ASSERT_OK(Flush());
601     ASSERT_EQ(0,
602               options.statistics->getAndResetTickerCount(BLOCK_CACHE_DATA_ADD));
603 
604     ASSERT_EQ(value, Get(ToString(i)));
605     ASSERT_EQ(1,
606               options.statistics->getAndResetTickerCount(BLOCK_CACHE_DATA_ADD));
607     ASSERT_EQ(
608         1, options.statistics->getAndResetTickerCount(BLOCK_CACHE_DATA_MISS));
609     ASSERT_EQ(0,
610               options.statistics->getAndResetTickerCount(BLOCK_CACHE_DATA_HIT));
611   }
612 }
613 #endif
614 
615 namespace {
616 
617 // A mock cache wraps LRUCache, and record how many entries have been
618 // inserted for each priority.
619 class MockCache : public LRUCache {
620  public:
621   static uint32_t high_pri_insert_count;
622   static uint32_t low_pri_insert_count;
623 
MockCache()624   MockCache()
625       : LRUCache((size_t)1 << 25 /*capacity*/, 0 /*num_shard_bits*/,
626                  false /*strict_capacity_limit*/, 0.0 /*high_pri_pool_ratio*/) {
627   }
628 
629   using ShardedCache::Insert;
630 
Insert(const Slice & key,void * value,const Cache::CacheItemHelper * helper_cb,size_t charge,Handle ** handle,Priority priority)631   Status Insert(const Slice& key, void* value,
632                 const Cache::CacheItemHelper* helper_cb, size_t charge,
633                 Handle** handle, Priority priority) override {
634     DeleterFn delete_cb = helper_cb->del_cb;
635     if (priority == Priority::LOW) {
636       low_pri_insert_count++;
637     } else {
638       high_pri_insert_count++;
639     }
640     return LRUCache::Insert(key, value, charge, delete_cb, handle, priority);
641   }
642 };
643 
644 uint32_t MockCache::high_pri_insert_count = 0;
645 uint32_t MockCache::low_pri_insert_count = 0;
646 
647 }  // anonymous namespace
648 
TEST_F(DBBlockCacheTest,IndexAndFilterBlocksCachePriority)649 TEST_F(DBBlockCacheTest, IndexAndFilterBlocksCachePriority) {
650   for (auto priority : {Cache::Priority::LOW, Cache::Priority::HIGH}) {
651     Options options = CurrentOptions();
652     options.create_if_missing = true;
653     options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
654     BlockBasedTableOptions table_options;
655     table_options.cache_index_and_filter_blocks = true;
656     table_options.block_cache.reset(new MockCache());
657     table_options.filter_policy.reset(NewBloomFilterPolicy(20));
658     table_options.cache_index_and_filter_blocks_with_high_priority =
659         priority == Cache::Priority::HIGH ? true : false;
660     options.table_factory.reset(NewBlockBasedTableFactory(table_options));
661     DestroyAndReopen(options);
662 
663     MockCache::high_pri_insert_count = 0;
664     MockCache::low_pri_insert_count = 0;
665 
666     // Create a new table.
667     ASSERT_OK(Put("foo", "value"));
668     ASSERT_OK(Put("bar", "value"));
669     ASSERT_OK(Flush());
670     ASSERT_EQ(1, NumTableFilesAtLevel(0));
671 
672     // index/filter blocks added to block cache right after table creation.
673     ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_INDEX_MISS));
674     ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS));
675     ASSERT_EQ(2, /* only index/filter were added */
676               TestGetTickerCount(options, BLOCK_CACHE_ADD));
677     ASSERT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_DATA_MISS));
678     if (priority == Cache::Priority::LOW) {
679       ASSERT_EQ(0u, MockCache::high_pri_insert_count);
680       ASSERT_EQ(2u, MockCache::low_pri_insert_count);
681     } else {
682       ASSERT_EQ(2u, MockCache::high_pri_insert_count);
683       ASSERT_EQ(0u, MockCache::low_pri_insert_count);
684     }
685 
686     // Access data block.
687     ASSERT_EQ("value", Get("foo"));
688 
689     ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_INDEX_MISS));
690     ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS));
691     ASSERT_EQ(3, /*adding data block*/
692               TestGetTickerCount(options, BLOCK_CACHE_ADD));
693     ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_DATA_MISS));
694 
695     // Data block should be inserted with low priority.
696     if (priority == Cache::Priority::LOW) {
697       ASSERT_EQ(0u, MockCache::high_pri_insert_count);
698       ASSERT_EQ(3u, MockCache::low_pri_insert_count);
699     } else {
700       ASSERT_EQ(2u, MockCache::high_pri_insert_count);
701       ASSERT_EQ(1u, MockCache::low_pri_insert_count);
702     }
703   }
704 }
705 
706 namespace {
707 
708 // An LRUCache wrapper that can falsely report "not found" on Lookup.
709 // This allows us to manipulate BlockBasedTableReader into thinking
710 // another thread inserted the data in between Lookup and Insert,
711 // while mostly preserving the LRUCache interface/behavior.
712 class LookupLiarCache : public CacheWrapper {
713   int nth_lookup_not_found_ = 0;
714 
715  public:
LookupLiarCache(std::shared_ptr<Cache> target)716   explicit LookupLiarCache(std::shared_ptr<Cache> target)
717       : CacheWrapper(std::move(target)) {}
718 
719   using Cache::Lookup;
Lookup(const Slice & key,Statistics * stats)720   Handle* Lookup(const Slice& key, Statistics* stats) override {
721     if (nth_lookup_not_found_ == 1) {
722       nth_lookup_not_found_ = 0;
723       return nullptr;
724     }
725     if (nth_lookup_not_found_ > 1) {
726       --nth_lookup_not_found_;
727     }
728     return CacheWrapper::Lookup(key, stats);
729   }
730 
731   // 1 == next lookup, 2 == after next, etc.
SetNthLookupNotFound(int n)732   void SetNthLookupNotFound(int n) { nth_lookup_not_found_ = n; }
733 };
734 
735 }  // anonymous namespace
736 
TEST_F(DBBlockCacheTest,AddRedundantStats)737 TEST_F(DBBlockCacheTest, AddRedundantStats) {
738   const size_t capacity = size_t{1} << 25;
739   const int num_shard_bits = 0;  // 1 shard
740   int iterations_tested = 0;
741   for (std::shared_ptr<Cache> base_cache :
742        {NewLRUCache(capacity, num_shard_bits),
743         NewClockCache(capacity, num_shard_bits)}) {
744     if (!base_cache) {
745       // Skip clock cache when not supported
746       continue;
747     }
748     ++iterations_tested;
749     Options options = CurrentOptions();
750     options.create_if_missing = true;
751     options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
752 
753     std::shared_ptr<LookupLiarCache> cache =
754         std::make_shared<LookupLiarCache>(base_cache);
755 
756     BlockBasedTableOptions table_options;
757     table_options.cache_index_and_filter_blocks = true;
758     table_options.block_cache = cache;
759     table_options.filter_policy.reset(NewBloomFilterPolicy(50));
760     options.table_factory.reset(NewBlockBasedTableFactory(table_options));
761     DestroyAndReopen(options);
762 
763     // Create a new table.
764     ASSERT_OK(Put("foo", "value"));
765     ASSERT_OK(Put("bar", "value"));
766     ASSERT_OK(Flush());
767     ASSERT_EQ(1, NumTableFilesAtLevel(0));
768 
769     // Normal access filter+index+data.
770     ASSERT_EQ("value", Get("foo"));
771 
772     ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_INDEX_ADD));
773     ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_ADD));
774     ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_DATA_ADD));
775     // --------
776     ASSERT_EQ(3, TestGetTickerCount(options, BLOCK_CACHE_ADD));
777 
778     ASSERT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_INDEX_ADD_REDUNDANT));
779     ASSERT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_FILTER_ADD_REDUNDANT));
780     ASSERT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_DATA_ADD_REDUNDANT));
781     // --------
782     ASSERT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_ADD_REDUNDANT));
783 
784     // Againt access filter+index+data, but force redundant load+insert on index
785     cache->SetNthLookupNotFound(2);
786     ASSERT_EQ("value", Get("bar"));
787 
788     ASSERT_EQ(2, TestGetTickerCount(options, BLOCK_CACHE_INDEX_ADD));
789     ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_ADD));
790     ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_DATA_ADD));
791     // --------
792     ASSERT_EQ(4, TestGetTickerCount(options, BLOCK_CACHE_ADD));
793 
794     ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_INDEX_ADD_REDUNDANT));
795     ASSERT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_FILTER_ADD_REDUNDANT));
796     ASSERT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_DATA_ADD_REDUNDANT));
797     // --------
798     ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_ADD_REDUNDANT));
799 
800     // Access just filter (with high probability), and force redundant
801     // load+insert
802     cache->SetNthLookupNotFound(1);
803     ASSERT_EQ("NOT_FOUND", Get("this key was not added"));
804 
805     EXPECT_EQ(2, TestGetTickerCount(options, BLOCK_CACHE_INDEX_ADD));
806     EXPECT_EQ(2, TestGetTickerCount(options, BLOCK_CACHE_FILTER_ADD));
807     EXPECT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_DATA_ADD));
808     // --------
809     EXPECT_EQ(5, TestGetTickerCount(options, BLOCK_CACHE_ADD));
810 
811     EXPECT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_INDEX_ADD_REDUNDANT));
812     EXPECT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_ADD_REDUNDANT));
813     EXPECT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_DATA_ADD_REDUNDANT));
814     // --------
815     EXPECT_EQ(2, TestGetTickerCount(options, BLOCK_CACHE_ADD_REDUNDANT));
816 
817     // Access just data, forcing redundant load+insert
818     ReadOptions read_options;
819     std::unique_ptr<Iterator> iter{db_->NewIterator(read_options)};
820     cache->SetNthLookupNotFound(1);
821     iter->SeekToFirst();
822     ASSERT_TRUE(iter->Valid());
823     ASSERT_EQ(iter->key(), "bar");
824 
825     EXPECT_EQ(2, TestGetTickerCount(options, BLOCK_CACHE_INDEX_ADD));
826     EXPECT_EQ(2, TestGetTickerCount(options, BLOCK_CACHE_FILTER_ADD));
827     EXPECT_EQ(2, TestGetTickerCount(options, BLOCK_CACHE_DATA_ADD));
828     // --------
829     EXPECT_EQ(6, TestGetTickerCount(options, BLOCK_CACHE_ADD));
830 
831     EXPECT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_INDEX_ADD_REDUNDANT));
832     EXPECT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_ADD_REDUNDANT));
833     EXPECT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_DATA_ADD_REDUNDANT));
834     // --------
835     EXPECT_EQ(3, TestGetTickerCount(options, BLOCK_CACHE_ADD_REDUNDANT));
836   }
837   EXPECT_GE(iterations_tested, 1);
838 }
839 
TEST_F(DBBlockCacheTest,ParanoidFileChecks)840 TEST_F(DBBlockCacheTest, ParanoidFileChecks) {
841   Options options = CurrentOptions();
842   options.create_if_missing = true;
843   options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
844   options.level0_file_num_compaction_trigger = 2;
845   options.paranoid_file_checks = true;
846   BlockBasedTableOptions table_options;
847   table_options.cache_index_and_filter_blocks = false;
848   table_options.filter_policy.reset(NewBloomFilterPolicy(20));
849   options.table_factory.reset(NewBlockBasedTableFactory(table_options));
850   CreateAndReopenWithCF({"pikachu"}, options);
851 
852   ASSERT_OK(Put(1, "1_key", "val"));
853   ASSERT_OK(Put(1, "9_key", "val"));
854   // Create a new table.
855   ASSERT_OK(Flush(1));
856   ASSERT_EQ(1, /* read and cache data block */
857             TestGetTickerCount(options, BLOCK_CACHE_ADD));
858 
859   ASSERT_OK(Put(1, "1_key2", "val2"));
860   ASSERT_OK(Put(1, "9_key2", "val2"));
861   // Create a new SST file. This will further trigger a compaction
862   // and generate another file.
863   ASSERT_OK(Flush(1));
864   ASSERT_OK(dbfull()->TEST_WaitForCompact());
865   ASSERT_EQ(3, /* Totally 3 files created up to now */
866             TestGetTickerCount(options, BLOCK_CACHE_ADD));
867 
868   // After disabling options.paranoid_file_checks. NO further block
869   // is added after generating a new file.
870   ASSERT_OK(
871       dbfull()->SetOptions(handles_[1], {{"paranoid_file_checks", "false"}}));
872 
873   ASSERT_OK(Put(1, "1_key3", "val3"));
874   ASSERT_OK(Put(1, "9_key3", "val3"));
875   ASSERT_OK(Flush(1));
876   ASSERT_OK(Put(1, "1_key4", "val4"));
877   ASSERT_OK(Put(1, "9_key4", "val4"));
878   ASSERT_OK(Flush(1));
879   ASSERT_OK(dbfull()->TEST_WaitForCompact());
880   ASSERT_EQ(3, /* Totally 3 files created up to now */
881             TestGetTickerCount(options, BLOCK_CACHE_ADD));
882 }
883 
TEST_F(DBBlockCacheTest,CompressedCache)884 TEST_F(DBBlockCacheTest, CompressedCache) {
885   if (!Snappy_Supported()) {
886     return;
887   }
888   int num_iter = 80;
889 
890   // Run this test three iterations.
891   // Iteration 1: only a uncompressed block cache
892   // Iteration 2: only a compressed block cache
893   // Iteration 3: both block cache and compressed cache
894   // Iteration 4: both block cache and compressed cache, but DB is not
895   // compressed
896   for (int iter = 0; iter < 4; iter++) {
897     Options options = CurrentOptions();
898     options.write_buffer_size = 64 * 1024;  // small write buffer
899     options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
900 
901     BlockBasedTableOptions table_options;
902     switch (iter) {
903       case 0:
904         // only uncompressed block cache
905         table_options.block_cache = NewLRUCache(8 * 1024);
906         table_options.block_cache_compressed = nullptr;
907         options.table_factory.reset(NewBlockBasedTableFactory(table_options));
908         break;
909       case 1:
910         // no block cache, only compressed cache
911         table_options.no_block_cache = true;
912         table_options.block_cache = nullptr;
913         table_options.block_cache_compressed = NewLRUCache(8 * 1024);
914         options.table_factory.reset(NewBlockBasedTableFactory(table_options));
915         break;
916       case 2:
917         // both compressed and uncompressed block cache
918         table_options.block_cache = NewLRUCache(1024);
919         table_options.block_cache_compressed = NewLRUCache(8 * 1024);
920         options.table_factory.reset(NewBlockBasedTableFactory(table_options));
921         break;
922       case 3:
923         // both block cache and compressed cache, but DB is not compressed
924         // also, make block cache sizes bigger, to trigger block cache hits
925         table_options.block_cache = NewLRUCache(1024 * 1024);
926         table_options.block_cache_compressed = NewLRUCache(8 * 1024 * 1024);
927         options.table_factory.reset(NewBlockBasedTableFactory(table_options));
928         options.compression = kNoCompression;
929         break;
930       default:
931         FAIL();
932     }
933     CreateAndReopenWithCF({"pikachu"}, options);
934     // default column family doesn't have block cache
935     Options no_block_cache_opts;
936     no_block_cache_opts.statistics = options.statistics;
937     no_block_cache_opts = CurrentOptions(no_block_cache_opts);
938     BlockBasedTableOptions table_options_no_bc;
939     table_options_no_bc.no_block_cache = true;
940     no_block_cache_opts.table_factory.reset(
941         NewBlockBasedTableFactory(table_options_no_bc));
942     ReopenWithColumnFamilies(
943         {"default", "pikachu"},
944         std::vector<Options>({no_block_cache_opts, options}));
945 
946     Random rnd(301);
947 
948     // Write 8MB (80 values, each 100K)
949     ASSERT_EQ(NumTableFilesAtLevel(0, 1), 0);
950     std::vector<std::string> values;
951     std::string str;
952     for (int i = 0; i < num_iter; i++) {
953       if (i % 4 == 0) {  // high compression ratio
954         str = rnd.RandomString(1000);
955       }
956       values.push_back(str);
957       ASSERT_OK(Put(1, Key(i), values[i]));
958     }
959 
960     // flush all data from memtable so that reads are from block cache
961     ASSERT_OK(Flush(1));
962 
963     for (int i = 0; i < num_iter; i++) {
964       ASSERT_EQ(Get(1, Key(i)), values[i]);
965     }
966 
967     // check that we triggered the appropriate code paths in the cache
968     switch (iter) {
969       case 0:
970         // only uncompressed block cache
971         ASSERT_GT(TestGetTickerCount(options, BLOCK_CACHE_MISS), 0);
972         ASSERT_EQ(TestGetTickerCount(options, BLOCK_CACHE_COMPRESSED_MISS), 0);
973         break;
974       case 1:
975         // no block cache, only compressed cache
976         ASSERT_EQ(TestGetTickerCount(options, BLOCK_CACHE_MISS), 0);
977         ASSERT_GT(TestGetTickerCount(options, BLOCK_CACHE_COMPRESSED_MISS), 0);
978         break;
979       case 2:
980         // both compressed and uncompressed block cache
981         ASSERT_GT(TestGetTickerCount(options, BLOCK_CACHE_MISS), 0);
982         ASSERT_GT(TestGetTickerCount(options, BLOCK_CACHE_COMPRESSED_MISS), 0);
983         break;
984       case 3:
985         // both compressed and uncompressed block cache
986         ASSERT_GT(TestGetTickerCount(options, BLOCK_CACHE_MISS), 0);
987         ASSERT_GT(TestGetTickerCount(options, BLOCK_CACHE_HIT), 0);
988         ASSERT_GT(TestGetTickerCount(options, BLOCK_CACHE_COMPRESSED_MISS), 0);
989         // compressed doesn't have any hits since blocks are not compressed on
990         // storage
991         ASSERT_EQ(TestGetTickerCount(options, BLOCK_CACHE_COMPRESSED_HIT), 0);
992         break;
993       default:
994         FAIL();
995     }
996 
997     options.create_if_missing = true;
998     DestroyAndReopen(options);
999   }
1000 }
1001 
TEST_F(DBBlockCacheTest,CacheCompressionDict)1002 TEST_F(DBBlockCacheTest, CacheCompressionDict) {
1003   const int kNumFiles = 4;
1004   const int kNumEntriesPerFile = 128;
1005   const int kNumBytesPerEntry = 1024;
1006 
1007   // Try all the available libraries that support dictionary compression
1008   std::vector<CompressionType> compression_types;
1009   if (Zlib_Supported()) {
1010     compression_types.push_back(kZlibCompression);
1011   }
1012   if (LZ4_Supported()) {
1013     compression_types.push_back(kLZ4Compression);
1014     compression_types.push_back(kLZ4HCCompression);
1015   }
1016   if (ZSTD_Supported()) {
1017     compression_types.push_back(kZSTD);
1018   } else if (ZSTDNotFinal_Supported()) {
1019     compression_types.push_back(kZSTDNotFinalCompression);
1020   }
1021   Random rnd(301);
1022   for (auto compression_type : compression_types) {
1023     Options options = CurrentOptions();
1024     options.bottommost_compression = compression_type;
1025     options.bottommost_compression_opts.max_dict_bytes = 4096;
1026     options.bottommost_compression_opts.enabled = true;
1027     options.create_if_missing = true;
1028     options.num_levels = 2;
1029     options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
1030     options.target_file_size_base = kNumEntriesPerFile * kNumBytesPerEntry;
1031     BlockBasedTableOptions table_options;
1032     table_options.cache_index_and_filter_blocks = true;
1033     table_options.block_cache.reset(new MockCache());
1034     options.table_factory.reset(NewBlockBasedTableFactory(table_options));
1035     DestroyAndReopen(options);
1036 
1037     RecordCacheCountersForCompressionDict(options);
1038 
1039     for (int i = 0; i < kNumFiles; ++i) {
1040       ASSERT_EQ(i, NumTableFilesAtLevel(0, 0));
1041       for (int j = 0; j < kNumEntriesPerFile; ++j) {
1042         std::string value = rnd.RandomString(kNumBytesPerEntry);
1043         ASSERT_OK(Put(Key(j * kNumFiles + i), value.c_str()));
1044       }
1045       ASSERT_OK(Flush());
1046     }
1047     ASSERT_OK(dbfull()->TEST_WaitForCompact());
1048     ASSERT_EQ(0, NumTableFilesAtLevel(0));
1049     ASSERT_EQ(kNumFiles, NumTableFilesAtLevel(1));
1050 
1051     // Compression dictionary blocks are preloaded.
1052     CheckCacheCountersForCompressionDict(
1053         options, kNumFiles /* expected_compression_dict_misses */,
1054         0 /* expected_compression_dict_hits */,
1055         kNumFiles /* expected_compression_dict_inserts */);
1056 
1057     // Seek to a key in a file. It should cause the SST's dictionary meta-block
1058     // to be read.
1059     RecordCacheCounters(options);
1060     RecordCacheCountersForCompressionDict(options);
1061     ReadOptions read_options;
1062     ASSERT_NE("NOT_FOUND", Get(Key(kNumFiles * kNumEntriesPerFile - 1)));
1063     // Two block hits: index and dictionary since they are prefetched
1064     // One block missed/added: data block
1065     CheckCacheCounters(options, 1 /* expected_misses */, 2 /* expected_hits */,
1066                        1 /* expected_inserts */, 0 /* expected_failures */);
1067     CheckCacheCountersForCompressionDict(
1068         options, 0 /* expected_compression_dict_misses */,
1069         1 /* expected_compression_dict_hits */,
1070         0 /* expected_compression_dict_inserts */);
1071   }
1072 }
1073 
ClearCache(Cache * cache)1074 static void ClearCache(Cache* cache) {
1075   auto roles = CopyCacheDeleterRoleMap();
1076   std::deque<std::string> keys;
1077   Cache::ApplyToAllEntriesOptions opts;
1078   auto callback = [&](const Slice& key, void* /*value*/, size_t /*charge*/,
1079                       Cache::DeleterFn deleter) {
1080     if (roles.find(deleter) == roles.end()) {
1081       // Keep the stats collector
1082       return;
1083     }
1084     keys.push_back(key.ToString());
1085   };
1086   cache->ApplyToAllEntries(callback, opts);
1087   for (auto& k : keys) {
1088     cache->Erase(k);
1089   }
1090 }
1091 
TEST_F(DBBlockCacheTest,CacheEntryRoleStats)1092 TEST_F(DBBlockCacheTest, CacheEntryRoleStats) {
1093   const size_t capacity = size_t{1} << 25;
1094   int iterations_tested = 0;
1095   for (bool partition : {false, true}) {
1096     for (std::shared_ptr<Cache> cache :
1097          {NewLRUCache(capacity), NewClockCache(capacity)}) {
1098       if (!cache) {
1099         // Skip clock cache when not supported
1100         continue;
1101       }
1102       ++iterations_tested;
1103 
1104       Options options = CurrentOptions();
1105       SetTimeElapseOnlySleepOnReopen(&options);
1106       options.create_if_missing = true;
1107       options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
1108       options.max_open_files = 13;
1109       options.table_cache_numshardbits = 0;
1110       // If this wakes up, it could interfere with test
1111       options.stats_dump_period_sec = 0;
1112 
1113       BlockBasedTableOptions table_options;
1114       table_options.block_cache = cache;
1115       table_options.cache_index_and_filter_blocks = true;
1116       table_options.filter_policy.reset(NewBloomFilterPolicy(50));
1117       if (partition) {
1118         table_options.index_type = BlockBasedTableOptions::kTwoLevelIndexSearch;
1119         table_options.partition_filters = true;
1120       }
1121       table_options.metadata_cache_options.top_level_index_pinning =
1122           PinningTier::kNone;
1123       table_options.metadata_cache_options.partition_pinning =
1124           PinningTier::kNone;
1125       table_options.metadata_cache_options.unpartitioned_pinning =
1126           PinningTier::kNone;
1127       options.table_factory.reset(NewBlockBasedTableFactory(table_options));
1128       DestroyAndReopen(options);
1129 
1130       // Create a new table.
1131       ASSERT_OK(Put("foo", "value"));
1132       ASSERT_OK(Put("bar", "value"));
1133       ASSERT_OK(Flush());
1134 
1135       ASSERT_OK(Put("zfoo", "value"));
1136       ASSERT_OK(Put("zbar", "value"));
1137       ASSERT_OK(Flush());
1138 
1139       ASSERT_EQ(2, NumTableFilesAtLevel(0));
1140 
1141       // Fresh cache
1142       ClearCache(cache.get());
1143 
1144       std::array<size_t, kNumCacheEntryRoles> expected{};
1145       // For CacheEntryStatsCollector
1146       expected[static_cast<size_t>(CacheEntryRole::kMisc)] = 1;
1147       EXPECT_EQ(expected, GetCacheEntryRoleCountsBg());
1148 
1149       std::array<size_t, kNumCacheEntryRoles> prev_expected = expected;
1150 
1151       // First access only filters
1152       ASSERT_EQ("NOT_FOUND", Get("different from any key added"));
1153       expected[static_cast<size_t>(CacheEntryRole::kFilterBlock)] += 2;
1154       if (partition) {
1155         expected[static_cast<size_t>(CacheEntryRole::kFilterMetaBlock)] += 2;
1156       }
1157       // Within some time window, we will get cached entry stats
1158       EXPECT_EQ(prev_expected, GetCacheEntryRoleCountsBg());
1159       // Not enough to force a miss
1160       env_->MockSleepForSeconds(45);
1161       EXPECT_EQ(prev_expected, GetCacheEntryRoleCountsBg());
1162       // Enough to force a miss
1163       env_->MockSleepForSeconds(601);
1164       EXPECT_EQ(expected, GetCacheEntryRoleCountsBg());
1165 
1166       // Now access index and data block
1167       ASSERT_EQ("value", Get("foo"));
1168       expected[static_cast<size_t>(CacheEntryRole::kIndexBlock)]++;
1169       if (partition) {
1170         // top-level
1171         expected[static_cast<size_t>(CacheEntryRole::kIndexBlock)]++;
1172       }
1173       expected[static_cast<size_t>(CacheEntryRole::kDataBlock)]++;
1174       // Enough to force a miss
1175       env_->MockSleepForSeconds(601);
1176       // But inject a simulated long scan so that we need a longer
1177       // interval to force a miss next time.
1178       SyncPoint::GetInstance()->SetCallBack(
1179           "CacheEntryStatsCollector::GetStats:AfterApplyToAllEntries",
1180           [this](void*) {
1181             // To spend no more than 0.2% of time scanning, we would need
1182             // interval of at least 10000s
1183             env_->MockSleepForSeconds(20);
1184           });
1185       SyncPoint::GetInstance()->EnableProcessing();
1186       EXPECT_EQ(expected, GetCacheEntryRoleCountsBg());
1187       prev_expected = expected;
1188       SyncPoint::GetInstance()->DisableProcessing();
1189       SyncPoint::GetInstance()->ClearAllCallBacks();
1190 
1191       // The same for other file
1192       ASSERT_EQ("value", Get("zfoo"));
1193       expected[static_cast<size_t>(CacheEntryRole::kIndexBlock)]++;
1194       if (partition) {
1195         // top-level
1196         expected[static_cast<size_t>(CacheEntryRole::kIndexBlock)]++;
1197       }
1198       expected[static_cast<size_t>(CacheEntryRole::kDataBlock)]++;
1199       // Because of the simulated long scan, this is not enough to force
1200       // a miss
1201       env_->MockSleepForSeconds(601);
1202       EXPECT_EQ(prev_expected, GetCacheEntryRoleCountsBg());
1203       // But this is enough
1204       env_->MockSleepForSeconds(10000);
1205       EXPECT_EQ(expected, GetCacheEntryRoleCountsBg());
1206       prev_expected = expected;
1207 
1208       // Also check the GetProperty interface
1209       std::map<std::string, std::string> values;
1210       ASSERT_TRUE(
1211           db_->GetMapProperty(DB::Properties::kBlockCacheEntryStats, &values));
1212 
1213       EXPECT_EQ(
1214           ToString(expected[static_cast<size_t>(CacheEntryRole::kIndexBlock)]),
1215           values["count.index-block"]);
1216       EXPECT_EQ(
1217           ToString(expected[static_cast<size_t>(CacheEntryRole::kDataBlock)]),
1218           values["count.data-block"]);
1219       EXPECT_EQ(
1220           ToString(expected[static_cast<size_t>(CacheEntryRole::kFilterBlock)]),
1221           values["count.filter-block"]);
1222       EXPECT_EQ(
1223           ToString(
1224               prev_expected[static_cast<size_t>(CacheEntryRole::kWriteBuffer)]),
1225           values["count.write-buffer"]);
1226       EXPECT_EQ(ToString(expected[static_cast<size_t>(CacheEntryRole::kMisc)]),
1227                 values["count.misc"]);
1228 
1229       // Add one for kWriteBuffer
1230       {
1231         WriteBufferManager wbm(size_t{1} << 20, cache);
1232         wbm.ReserveMem(1024);
1233         expected[static_cast<size_t>(CacheEntryRole::kWriteBuffer)]++;
1234         // Now we check that the GetProperty interface is more agressive about
1235         // re-scanning stats, but not totally aggressive.
1236         // Within some time window, we will get cached entry stats
1237         env_->MockSleepForSeconds(1);
1238         EXPECT_EQ(ToString(prev_expected[static_cast<size_t>(
1239                       CacheEntryRole::kWriteBuffer)]),
1240                   values["count.write-buffer"]);
1241         // Not enough for a "background" miss but enough for a "foreground" miss
1242         env_->MockSleepForSeconds(45);
1243 
1244         ASSERT_TRUE(db_->GetMapProperty(DB::Properties::kBlockCacheEntryStats,
1245                                         &values));
1246         EXPECT_EQ(
1247             ToString(
1248                 expected[static_cast<size_t>(CacheEntryRole::kWriteBuffer)]),
1249             values["count.write-buffer"]);
1250       }
1251       prev_expected = expected;
1252 
1253       // With collector pinned in cache, we should be able to hit
1254       // even if the cache is full
1255       ClearCache(cache.get());
1256       Cache::Handle* h = nullptr;
1257       ASSERT_OK(cache->Insert("Fill-it-up", nullptr, capacity + 1,
1258                               GetNoopDeleterForRole<CacheEntryRole::kMisc>(),
1259                               &h, Cache::Priority::HIGH));
1260       ASSERT_GT(cache->GetUsage(), cache->GetCapacity());
1261       expected = {};
1262       // For CacheEntryStatsCollector
1263       expected[static_cast<size_t>(CacheEntryRole::kMisc)] = 1;
1264       // For Fill-it-up
1265       expected[static_cast<size_t>(CacheEntryRole::kMisc)]++;
1266       // Still able to hit on saved stats
1267       EXPECT_EQ(prev_expected, GetCacheEntryRoleCountsBg());
1268       // Enough to force a miss
1269       env_->MockSleepForSeconds(1000);
1270       EXPECT_EQ(expected, GetCacheEntryRoleCountsBg());
1271 
1272       cache->Release(h);
1273 
1274       // Now we test that the DB mutex is not held during scans, for the ways
1275       // we know how to (possibly) trigger them. Without a better good way to
1276       // check this, we simply inject an acquire & release of the DB mutex
1277       // deep in the stat collection code. If we were already holding the
1278       // mutex, that is UB that would at least be found by TSAN.
1279       int scan_count = 0;
1280       SyncPoint::GetInstance()->SetCallBack(
1281           "CacheEntryStatsCollector::GetStats:AfterApplyToAllEntries",
1282           [this, &scan_count](void*) {
1283             dbfull()->TEST_LockMutex();
1284             dbfull()->TEST_UnlockMutex();
1285             ++scan_count;
1286           });
1287       SyncPoint::GetInstance()->EnableProcessing();
1288 
1289       // Different things that might trigger a scan, with mock sleeps to
1290       // force a miss.
1291       env_->MockSleepForSeconds(10000);
1292       dbfull()->DumpStats();
1293       ASSERT_EQ(scan_count, 1);
1294 
1295       env_->MockSleepForSeconds(10000);
1296       ASSERT_TRUE(
1297           db_->GetMapProperty(DB::Properties::kBlockCacheEntryStats, &values));
1298       ASSERT_EQ(scan_count, 2);
1299 
1300       env_->MockSleepForSeconds(10000);
1301       std::string value_str;
1302       ASSERT_TRUE(
1303           db_->GetProperty(DB::Properties::kBlockCacheEntryStats, &value_str));
1304       ASSERT_EQ(scan_count, 3);
1305 
1306       env_->MockSleepForSeconds(10000);
1307       ASSERT_TRUE(db_->GetProperty(DB::Properties::kCFStats, &value_str));
1308       // To match historical speed, querying this property no longer triggers
1309       // a scan, even if results are old. But periodic dump stats should keep
1310       // things reasonably updated.
1311       ASSERT_EQ(scan_count, /*unchanged*/ 3);
1312 
1313       SyncPoint::GetInstance()->DisableProcessing();
1314       SyncPoint::GetInstance()->ClearAllCallBacks();
1315     }
1316     EXPECT_GE(iterations_tested, 1);
1317   }
1318 }
1319 
1320 #endif  // ROCKSDB_LITE
1321 
1322 class DBBlockCacheKeyTest
1323     : public DBTestBase,
1324       public testing::WithParamInterface<std::tuple<bool, bool>> {
1325  public:
DBBlockCacheKeyTest()1326   DBBlockCacheKeyTest()
1327       : DBTestBase("db_block_cache_test", /*env_do_fsync=*/false) {}
1328 
SetUp()1329   void SetUp() override {
1330     use_compressed_cache_ = std::get<0>(GetParam());
1331     exclude_file_numbers_ = std::get<1>(GetParam());
1332   }
1333 
1334   bool use_compressed_cache_;
1335   bool exclude_file_numbers_;
1336 };
1337 
1338 // Disable LinkFile so that we can physically copy a DB using Checkpoint.
1339 // Disable file GetUniqueId to enable stable cache keys.
1340 class StableCacheKeyTestFS : public FaultInjectionTestFS {
1341  public:
StableCacheKeyTestFS(const std::shared_ptr<FileSystem> & base)1342   explicit StableCacheKeyTestFS(const std::shared_ptr<FileSystem>& base)
1343       : FaultInjectionTestFS(base) {
1344     SetFailGetUniqueId(true);
1345   }
1346 
~StableCacheKeyTestFS()1347   virtual ~StableCacheKeyTestFS() override {}
1348 
LinkFile(const std::string &,const std::string &,const IOOptions &,IODebugContext *)1349   IOStatus LinkFile(const std::string&, const std::string&, const IOOptions&,
1350                     IODebugContext*) override {
1351     return IOStatus::NotSupported("Disabled");
1352   }
1353 };
1354 
TEST_P(DBBlockCacheKeyTest,StableCacheKeys)1355 TEST_P(DBBlockCacheKeyTest, StableCacheKeys) {
1356   std::shared_ptr<StableCacheKeyTestFS> test_fs{
1357       new StableCacheKeyTestFS(env_->GetFileSystem())};
1358   std::unique_ptr<CompositeEnvWrapper> test_env{
1359       new CompositeEnvWrapper(env_, test_fs)};
1360 
1361   Options options = CurrentOptions();
1362   options.create_if_missing = true;
1363   options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
1364   options.env = test_env.get();
1365 
1366   BlockBasedTableOptions table_options;
1367 
1368   int key_count = 0;
1369   uint64_t expected_stat = 0;
1370 
1371   std::function<void()> verify_stats;
1372   if (use_compressed_cache_) {
1373     if (!Snappy_Supported()) {
1374       ROCKSDB_GTEST_SKIP("Compressed cache test requires snappy support");
1375       return;
1376     }
1377     options.compression = CompressionType::kSnappyCompression;
1378     table_options.no_block_cache = true;
1379     table_options.block_cache_compressed = NewLRUCache(1 << 25, 0, false);
1380     verify_stats = [&options, &expected_stat] {
1381       // One for ordinary SST file and one for external SST file
1382       ASSERT_EQ(expected_stat,
1383                 options.statistics->getTickerCount(BLOCK_CACHE_COMPRESSED_ADD));
1384     };
1385   } else {
1386     table_options.cache_index_and_filter_blocks = true;
1387     table_options.block_cache = NewLRUCache(1 << 25, 0, false);
1388     verify_stats = [&options, &expected_stat] {
1389       ASSERT_EQ(expected_stat,
1390                 options.statistics->getTickerCount(BLOCK_CACHE_DATA_ADD));
1391       ASSERT_EQ(expected_stat,
1392                 options.statistics->getTickerCount(BLOCK_CACHE_INDEX_ADD));
1393       ASSERT_EQ(expected_stat,
1394                 options.statistics->getTickerCount(BLOCK_CACHE_FILTER_ADD));
1395     };
1396   }
1397 
1398   table_options.filter_policy.reset(NewBloomFilterPolicy(10, false));
1399   options.table_factory.reset(NewBlockBasedTableFactory(table_options));
1400   CreateAndReopenWithCF({"koko"}, options);
1401 
1402   if (exclude_file_numbers_) {
1403     // Simulate something like old behavior without file numbers in properties.
1404     // This is a "control" side of the test that also ensures safely degraded
1405     // behavior on old files.
1406     ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
1407         "PropertyBlockBuilder::AddTableProperty:Start", [&](void* arg) {
1408           TableProperties* props = reinterpret_cast<TableProperties*>(arg);
1409           props->orig_file_number = 0;
1410         });
1411     ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
1412   }
1413 
1414   std::function<void()> perform_gets = [&key_count, &expected_stat, this]() {
1415     if (exclude_file_numbers_) {
1416       // No cache key reuse should happen, because we can't rely on current
1417       // file number being stable
1418       expected_stat += key_count;
1419     } else {
1420       // Cache keys should be stable
1421       expected_stat = key_count;
1422     }
1423     for (int i = 0; i < key_count; ++i) {
1424       ASSERT_EQ(Get(1, Key(i)), "abc");
1425     }
1426   };
1427 
1428   // Ordinary SST files with same session id
1429   const std::string something_compressible(500U, 'x');
1430   for (int i = 0; i < 2; ++i) {
1431     ASSERT_OK(Put(1, Key(key_count), "abc"));
1432     ASSERT_OK(Put(1, Key(key_count) + "a", something_compressible));
1433     ASSERT_OK(Flush(1));
1434     ++key_count;
1435   }
1436 
1437 #ifndef ROCKSDB_LITE
1438   // Save an export of those ordinary SST files for later
1439   std::string export_files_dir = dbname_ + "/exported";
1440   ExportImportFilesMetaData* metadata_ptr_ = nullptr;
1441   Checkpoint* checkpoint;
1442   ASSERT_OK(Checkpoint::Create(db_, &checkpoint));
1443   ASSERT_OK(checkpoint->ExportColumnFamily(handles_[1], export_files_dir,
1444                                            &metadata_ptr_));
1445   ASSERT_NE(metadata_ptr_, nullptr);
1446   delete checkpoint;
1447   checkpoint = nullptr;
1448 
1449   // External SST files with same session id
1450   SstFileWriter sst_file_writer(EnvOptions(), options);
1451   std::vector<std::string> external;
1452   for (int i = 0; i < 2; ++i) {
1453     std::string f = dbname_ + "/external" + ToString(i) + ".sst";
1454     external.push_back(f);
1455     ASSERT_OK(sst_file_writer.Open(f));
1456     ASSERT_OK(sst_file_writer.Put(Key(key_count), "abc"));
1457     ASSERT_OK(
1458         sst_file_writer.Put(Key(key_count) + "a", something_compressible));
1459     ++key_count;
1460     ExternalSstFileInfo external_info;
1461     ASSERT_OK(sst_file_writer.Finish(&external_info));
1462     IngestExternalFileOptions ingest_opts;
1463     ASSERT_OK(db_->IngestExternalFile(handles_[1], {f}, ingest_opts));
1464   }
1465 
1466   if (exclude_file_numbers_) {
1467     // FIXME(peterd): figure out where these extra two ADDs are coming from
1468     options.statistics->recordTick(BLOCK_CACHE_INDEX_ADD,
1469                                    uint64_t{0} - uint64_t{2});
1470     options.statistics->recordTick(BLOCK_CACHE_FILTER_ADD,
1471                                    uint64_t{0} - uint64_t{2});
1472     options.statistics->recordTick(BLOCK_CACHE_COMPRESSED_ADD,
1473                                    uint64_t{0} - uint64_t{2});
1474   }
1475 #endif
1476 
1477   perform_gets();
1478   verify_stats();
1479 
1480   // Make sure we can cache hit after re-open
1481   ReopenWithColumnFamilies({"default", "koko"}, options);
1482 
1483   perform_gets();
1484   verify_stats();
1485 
1486   // Make sure we can cache hit even on a full copy of the DB. Using
1487   // StableCacheKeyTestFS, Checkpoint will resort to full copy not hard link.
1488   // (Checkpoint  not available in LITE mode to test this.)
1489 #ifndef ROCKSDB_LITE
1490   auto db_copy_name = dbname_ + "-copy";
1491   ASSERT_OK(Checkpoint::Create(db_, &checkpoint));
1492   ASSERT_OK(checkpoint->CreateCheckpoint(db_copy_name));
1493   delete checkpoint;
1494 
1495   Close();
1496   Destroy(options);
1497 
1498   // Switch to the DB copy
1499   SaveAndRestore<std::string> save_dbname(&dbname_, db_copy_name);
1500   ReopenWithColumnFamilies({"default", "koko"}, options);
1501 
1502   perform_gets();
1503   verify_stats();
1504 
1505   // And ensure that re-importing + ingesting the same files into a
1506   // different DB uses same cache keys
1507   DestroyAndReopen(options);
1508 
1509   ColumnFamilyHandle* cfh = nullptr;
1510   ASSERT_OK(db_->CreateColumnFamilyWithImport(ColumnFamilyOptions(), "yoyo",
1511                                               ImportColumnFamilyOptions(),
1512                                               *metadata_ptr_, &cfh));
1513   ASSERT_NE(cfh, nullptr);
1514   delete cfh;
1515   cfh = nullptr;
1516   delete metadata_ptr_;
1517   metadata_ptr_ = nullptr;
1518 
1519   DestroyDB(export_files_dir, options);
1520 
1521   ReopenWithColumnFamilies({"default", "yoyo"}, options);
1522 
1523   IngestExternalFileOptions ingest_opts;
1524   ASSERT_OK(db_->IngestExternalFile(handles_[1], {external}, ingest_opts));
1525 
1526   if (exclude_file_numbers_) {
1527     // FIXME(peterd): figure out where these extra two ADDs are coming from
1528     options.statistics->recordTick(BLOCK_CACHE_INDEX_ADD,
1529                                    uint64_t{0} - uint64_t{2});
1530     options.statistics->recordTick(BLOCK_CACHE_FILTER_ADD,
1531                                    uint64_t{0} - uint64_t{2});
1532   }
1533 
1534   perform_gets();
1535   verify_stats();
1536 #endif  // !ROCKSDB_LITE
1537 
1538   Close();
1539   Destroy(options);
1540   ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
1541 }
1542 
1543 INSTANTIATE_TEST_CASE_P(DBBlockCacheKeyTest, DBBlockCacheKeyTest,
1544                         ::testing::Combine(::testing::Bool(),
1545                                            ::testing::Bool()));
1546 
1547 class DBBlockCachePinningTest
1548     : public DBTestBase,
1549       public testing::WithParamInterface<
1550           std::tuple<bool, PinningTier, PinningTier, PinningTier>> {
1551  public:
DBBlockCachePinningTest()1552   DBBlockCachePinningTest()
1553       : DBTestBase("db_block_cache_test", /*env_do_fsync=*/false) {}
1554 
SetUp()1555   void SetUp() override {
1556     partition_index_and_filters_ = std::get<0>(GetParam());
1557     top_level_index_pinning_ = std::get<1>(GetParam());
1558     partition_pinning_ = std::get<2>(GetParam());
1559     unpartitioned_pinning_ = std::get<3>(GetParam());
1560   }
1561 
1562   bool partition_index_and_filters_;
1563   PinningTier top_level_index_pinning_;
1564   PinningTier partition_pinning_;
1565   PinningTier unpartitioned_pinning_;
1566 };
1567 
TEST_P(DBBlockCachePinningTest,TwoLevelDB)1568 TEST_P(DBBlockCachePinningTest, TwoLevelDB) {
1569   // Creates one file in L0 and one file in L1. Both files have enough data that
1570   // their index and filter blocks are partitioned. The L1 file will also have
1571   // a compression dictionary (those are trained only during compaction), which
1572   // must be unpartitioned.
1573   const int kKeySize = 32;
1574   const int kBlockSize = 128;
1575   const int kNumBlocksPerFile = 128;
1576   const int kNumKeysPerFile = kBlockSize * kNumBlocksPerFile / kKeySize;
1577 
1578   Options options = CurrentOptions();
1579   // `kNoCompression` makes the unit test more portable. But it relies on the
1580   // current behavior of persisting/accessing dictionary even when there's no
1581   // (de)compression happening, which seems fairly likely to change over time.
1582   options.compression = kNoCompression;
1583   options.compression_opts.max_dict_bytes = 4 << 10;
1584   options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
1585   BlockBasedTableOptions table_options;
1586   table_options.block_cache = NewLRUCache(1 << 20 /* capacity */);
1587   table_options.block_size = kBlockSize;
1588   table_options.metadata_block_size = kBlockSize;
1589   table_options.cache_index_and_filter_blocks = true;
1590   table_options.metadata_cache_options.top_level_index_pinning =
1591       top_level_index_pinning_;
1592   table_options.metadata_cache_options.partition_pinning = partition_pinning_;
1593   table_options.metadata_cache_options.unpartitioned_pinning =
1594       unpartitioned_pinning_;
1595   table_options.filter_policy.reset(
1596       NewBloomFilterPolicy(10 /* bits_per_key */));
1597   if (partition_index_and_filters_) {
1598     table_options.index_type =
1599         BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch;
1600     table_options.partition_filters = true;
1601   }
1602   options.table_factory.reset(NewBlockBasedTableFactory(table_options));
1603   Reopen(options);
1604 
1605   Random rnd(301);
1606   for (int i = 0; i < 2; ++i) {
1607     for (int j = 0; j < kNumKeysPerFile; ++j) {
1608       ASSERT_OK(Put(Key(i * kNumKeysPerFile + j), rnd.RandomString(kKeySize)));
1609     }
1610     ASSERT_OK(Flush());
1611     if (i == 0) {
1612       // Prevent trivial move so file will be rewritten with dictionary and
1613       // reopened with L1's pinning settings.
1614       CompactRangeOptions cro;
1615       cro.bottommost_level_compaction = BottommostLevelCompaction::kForce;
1616       ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr));
1617     }
1618   }
1619 
1620   // Clear all unpinned blocks so unpinned blocks will show up as cache misses
1621   // when reading a key from a file.
1622   table_options.block_cache->EraseUnRefEntries();
1623 
1624   // Get base cache values
1625   uint64_t filter_misses = TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS);
1626   uint64_t index_misses = TestGetTickerCount(options, BLOCK_CACHE_INDEX_MISS);
1627   uint64_t compression_dict_misses =
1628       TestGetTickerCount(options, BLOCK_CACHE_COMPRESSION_DICT_MISS);
1629 
1630   // Read a key from the L0 file
1631   Get(Key(kNumKeysPerFile));
1632   uint64_t expected_filter_misses = filter_misses;
1633   uint64_t expected_index_misses = index_misses;
1634   uint64_t expected_compression_dict_misses = compression_dict_misses;
1635   if (partition_index_and_filters_) {
1636     if (top_level_index_pinning_ == PinningTier::kNone) {
1637       ++expected_filter_misses;
1638       ++expected_index_misses;
1639     }
1640     if (partition_pinning_ == PinningTier::kNone) {
1641       ++expected_filter_misses;
1642       ++expected_index_misses;
1643     }
1644   } else {
1645     if (unpartitioned_pinning_ == PinningTier::kNone) {
1646       ++expected_filter_misses;
1647       ++expected_index_misses;
1648     }
1649   }
1650   if (unpartitioned_pinning_ == PinningTier::kNone) {
1651     ++expected_compression_dict_misses;
1652   }
1653   ASSERT_EQ(expected_filter_misses,
1654             TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS));
1655   ASSERT_EQ(expected_index_misses,
1656             TestGetTickerCount(options, BLOCK_CACHE_INDEX_MISS));
1657   ASSERT_EQ(expected_compression_dict_misses,
1658             TestGetTickerCount(options, BLOCK_CACHE_COMPRESSION_DICT_MISS));
1659 
1660   // Clear all unpinned blocks so unpinned blocks will show up as cache misses
1661   // when reading a key from a file.
1662   table_options.block_cache->EraseUnRefEntries();
1663 
1664   // Read a key from the L1 file
1665   Get(Key(0));
1666   if (partition_index_and_filters_) {
1667     if (top_level_index_pinning_ == PinningTier::kNone ||
1668         top_level_index_pinning_ == PinningTier::kFlushedAndSimilar) {
1669       ++expected_filter_misses;
1670       ++expected_index_misses;
1671     }
1672     if (partition_pinning_ == PinningTier::kNone ||
1673         partition_pinning_ == PinningTier::kFlushedAndSimilar) {
1674       ++expected_filter_misses;
1675       ++expected_index_misses;
1676     }
1677   } else {
1678     if (unpartitioned_pinning_ == PinningTier::kNone ||
1679         unpartitioned_pinning_ == PinningTier::kFlushedAndSimilar) {
1680       ++expected_filter_misses;
1681       ++expected_index_misses;
1682     }
1683   }
1684   if (unpartitioned_pinning_ == PinningTier::kNone ||
1685       unpartitioned_pinning_ == PinningTier::kFlushedAndSimilar) {
1686     ++expected_compression_dict_misses;
1687   }
1688   ASSERT_EQ(expected_filter_misses,
1689             TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS));
1690   ASSERT_EQ(expected_index_misses,
1691             TestGetTickerCount(options, BLOCK_CACHE_INDEX_MISS));
1692   ASSERT_EQ(expected_compression_dict_misses,
1693             TestGetTickerCount(options, BLOCK_CACHE_COMPRESSION_DICT_MISS));
1694 }
1695 
1696 INSTANTIATE_TEST_CASE_P(
1697     DBBlockCachePinningTest, DBBlockCachePinningTest,
1698     ::testing::Combine(
1699         ::testing::Bool(),
1700         ::testing::Values(PinningTier::kNone, PinningTier::kFlushedAndSimilar,
1701                           PinningTier::kAll),
1702         ::testing::Values(PinningTier::kNone, PinningTier::kFlushedAndSimilar,
1703                           PinningTier::kAll),
1704         ::testing::Values(PinningTier::kNone, PinningTier::kFlushedAndSimilar,
1705                           PinningTier::kAll)));
1706 
1707 }  // namespace ROCKSDB_NAMESPACE
1708 
main(int argc,char ** argv)1709 int main(int argc, char** argv) {
1710   ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
1711   ::testing::InitGoogleTest(&argc, argv);
1712   return RUN_ALL_TESTS();
1713 }
1714