1 //  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
2 //  This source code is licensed under both the GPLv2 (found in the
3 //  COPYING file in the root directory) and Apache 2.0 License
4 //  (found in the LICENSE.Apache file in the root directory).
5 
6 #include <map>
7 
8 #include "rocksdb/filter_policy.h"
9 
10 #include "table/block_based/block_based_table_reader.h"
11 #include "table/block_based/partitioned_filter_block.h"
12 #include "table/block_based/filter_policy_internal.h"
13 
14 #include "index_builder.h"
15 #include "logging/logging.h"
16 #include "test_util/testharness.h"
17 #include "test_util/testutil.h"
18 #include "util/coding.h"
19 #include "util/hash.h"
20 
21 namespace ROCKSDB_NAMESPACE {
22 
23 std::map<uint64_t, std::string> blooms;
24 
25 class MockedBlockBasedTable : public BlockBasedTable {
26  public:
MockedBlockBasedTable(Rep * rep,PartitionedIndexBuilder * pib)27   MockedBlockBasedTable(Rep* rep, PartitionedIndexBuilder* pib)
28       : BlockBasedTable(rep, /*block_cache_tracer=*/nullptr) {
29     // Initialize what Open normally does as much as necessary for the test
30     rep->index_key_includes_seq = pib->seperator_is_key_plus_seq();
31     rep->index_value_is_full = !pib->get_use_value_delta_encoding();
32   }
33 };
34 
35 class MyPartitionedFilterBlockReader : public PartitionedFilterBlockReader {
36  public:
MyPartitionedFilterBlockReader(BlockBasedTable * t,CachableEntry<Block> && filter_block)37   MyPartitionedFilterBlockReader(BlockBasedTable* t,
38                                  CachableEntry<Block>&& filter_block)
39       : PartitionedFilterBlockReader(t, std::move(filter_block)) {
40     for (const auto& pair : blooms) {
41       const uint64_t offset = pair.first;
42       const std::string& bloom = pair.second;
43 
44       assert(t);
45       assert(t->get_rep());
46       CachableEntry<ParsedFullFilterBlock> block(
47           new ParsedFullFilterBlock(
48               t->get_rep()->table_options.filter_policy.get(),
49               BlockContents(Slice(bloom))),
50           nullptr /* cache */, nullptr /* cache_handle */,
51           true /* own_value */);
52       filter_map_[offset] = std::move(block);
53     }
54   }
55 };
56 
57 class PartitionedFilterBlockTest
58     : public testing::Test,
59       virtual public ::testing::WithParamInterface<uint32_t> {
60  public:
61   Options options_;
62   ImmutableCFOptions ioptions_;
63   EnvOptions env_options_;
64   BlockBasedTableOptions table_options_;
65   InternalKeyComparator icomp_;
66   std::unique_ptr<BlockBasedTable> table_;
67   std::shared_ptr<Cache> cache_;
68   int bits_per_key_;
69 
PartitionedFilterBlockTest()70   PartitionedFilterBlockTest()
71       : ioptions_(options_),
72         env_options_(options_),
73         icomp_(options_.comparator),
74         bits_per_key_(10) {
75     table_options_.filter_policy.reset(
76         NewBloomFilterPolicy(bits_per_key_, false));
77     table_options_.format_version = GetParam();
78     table_options_.index_block_restart_interval = 3;
79   }
80 
~PartitionedFilterBlockTest()81   ~PartitionedFilterBlockTest() override {}
82 
83   const std::string keys[4] = {"afoo", "bar", "box", "hello"};
84   const std::string missing_keys[2] = {"missing", "other"};
85 
MaxIndexSize()86   uint64_t MaxIndexSize() {
87     int num_keys = sizeof(keys) / sizeof(*keys);
88     uint64_t max_key_size = 0;
89     for (int i = 1; i < num_keys; i++) {
90       max_key_size = std::max(max_key_size, static_cast<uint64_t>(keys[i].size()));
91     }
92     uint64_t max_index_size = num_keys * (max_key_size + 8 /*handle*/);
93     return max_index_size;
94   }
95 
MaxFilterSize()96   uint64_t MaxFilterSize() {
97     int num_keys = sizeof(keys) / sizeof(*keys);
98     // General, rough over-approximation
99     return num_keys * bits_per_key_ + (CACHE_LINE_SIZE * 8 + /*metadata*/ 5);
100   }
101 
102   uint64_t last_offset = 10;
Write(const Slice & slice)103   BlockHandle Write(const Slice& slice) {
104     BlockHandle bh(last_offset + 1, slice.size());
105     blooms[bh.offset()] = slice.ToString();
106     last_offset += bh.size();
107     return bh;
108   }
109 
NewIndexBuilder()110   PartitionedIndexBuilder* NewIndexBuilder() {
111     const bool kValueDeltaEncoded = true;
112     return PartitionedIndexBuilder::CreateIndexBuilder(
113         &icomp_, !kValueDeltaEncoded, table_options_);
114   }
115 
NewBuilder(PartitionedIndexBuilder * const p_index_builder,const SliceTransform * prefix_extractor=nullptr)116   PartitionedFilterBlockBuilder* NewBuilder(
117       PartitionedIndexBuilder* const p_index_builder,
118       const SliceTransform* prefix_extractor = nullptr) {
119     assert(table_options_.block_size_deviation <= 100);
120     auto partition_size = static_cast<uint32_t>(
121              ((table_options_.metadata_block_size *
122                (100 - table_options_.block_size_deviation)) +
123               99) /
124              100);
125     partition_size = std::max(partition_size, static_cast<uint32_t>(1));
126     const bool kValueDeltaEncoded = true;
127     return new PartitionedFilterBlockBuilder(
128         prefix_extractor, table_options_.whole_key_filtering,
129         BloomFilterPolicy::GetBuilderFromContext(
130             FilterBuildingContext(table_options_)),
131         table_options_.index_block_restart_interval, !kValueDeltaEncoded,
132         p_index_builder, partition_size);
133   }
134 
NewReader(PartitionedFilterBlockBuilder * builder,PartitionedIndexBuilder * pib)135   PartitionedFilterBlockReader* NewReader(
136       PartitionedFilterBlockBuilder* builder, PartitionedIndexBuilder* pib) {
137     BlockHandle bh;
138     Status status;
139     Slice slice;
140     do {
141       slice = builder->Finish(bh, &status);
142       bh = Write(slice);
143     } while (status.IsIncomplete());
144 
145     constexpr bool skip_filters = false;
146     constexpr int level = 0;
147     constexpr bool immortal_table = false;
148     table_.reset(new MockedBlockBasedTable(
149         new BlockBasedTable::Rep(ioptions_, env_options_, table_options_,
150                                  icomp_, skip_filters, level, immortal_table),
151         pib));
152     BlockContents contents(slice);
153     CachableEntry<Block> block(
154         new Block(std::move(contents), kDisableGlobalSequenceNumber,
155                   0 /* read_amp_bytes_per_bit */, nullptr),
156         nullptr /* cache */, nullptr /* cache_handle */, true /* own_value */);
157     auto reader =
158         new MyPartitionedFilterBlockReader(table_.get(), std::move(block));
159     return reader;
160   }
161 
VerifyReader(PartitionedFilterBlockBuilder * builder,PartitionedIndexBuilder * pib,bool empty=false,const SliceTransform * prefix_extractor=nullptr)162   void VerifyReader(PartitionedFilterBlockBuilder* builder,
163                     PartitionedIndexBuilder* pib, bool empty = false,
164                     const SliceTransform* prefix_extractor = nullptr) {
165     std::unique_ptr<PartitionedFilterBlockReader> reader(
166         NewReader(builder, pib));
167     // Querying added keys
168     const bool no_io = true;
169     for (auto key : keys) {
170       auto ikey = InternalKey(key, 0, ValueType::kTypeValue);
171       const Slice ikey_slice = Slice(*ikey.rep());
172       ASSERT_TRUE(reader->KeyMayMatch(key, prefix_extractor, kNotValid, !no_io,
173                                       &ikey_slice, /*get_context=*/nullptr,
174                                       /*lookup_context=*/nullptr));
175     }
176     {
177       // querying a key twice
178       auto ikey = InternalKey(keys[0], 0, ValueType::kTypeValue);
179       const Slice ikey_slice = Slice(*ikey.rep());
180       ASSERT_TRUE(reader->KeyMayMatch(
181           keys[0], prefix_extractor, kNotValid, !no_io, &ikey_slice,
182           /*get_context=*/nullptr, /*lookup_context=*/nullptr));
183     }
184     // querying missing keys
185     for (auto key : missing_keys) {
186       auto ikey = InternalKey(key, 0, ValueType::kTypeValue);
187       const Slice ikey_slice = Slice(*ikey.rep());
188       if (empty) {
189         ASSERT_TRUE(reader->KeyMayMatch(
190             key, prefix_extractor, kNotValid, !no_io, &ikey_slice,
191             /*get_context=*/nullptr, /*lookup_context=*/nullptr));
192       } else {
193         // assuming a good hash function
194         ASSERT_FALSE(reader->KeyMayMatch(
195             key, prefix_extractor, kNotValid, !no_io, &ikey_slice,
196             /*get_context=*/nullptr, /*lookup_context=*/nullptr));
197       }
198     }
199   }
200 
TestBlockPerKey()201   int TestBlockPerKey() {
202     std::unique_ptr<PartitionedIndexBuilder> pib(NewIndexBuilder());
203     std::unique_ptr<PartitionedFilterBlockBuilder> builder(
204         NewBuilder(pib.get()));
205     int i = 0;
206     builder->Add(keys[i]);
207     CutABlock(pib.get(), keys[i], keys[i + 1]);
208     i++;
209     builder->Add(keys[i]);
210     CutABlock(pib.get(), keys[i], keys[i + 1]);
211     i++;
212     builder->Add(keys[i]);
213     builder->Add(keys[i]);
214     CutABlock(pib.get(), keys[i], keys[i + 1]);
215     i++;
216     builder->Add(keys[i]);
217     CutABlock(pib.get(), keys[i]);
218 
219     VerifyReader(builder.get(), pib.get());
220     return CountNumOfIndexPartitions(pib.get());
221   }
222 
TestBlockPerTwoKeys(const SliceTransform * prefix_extractor=nullptr)223   void TestBlockPerTwoKeys(const SliceTransform* prefix_extractor = nullptr) {
224     std::unique_ptr<PartitionedIndexBuilder> pib(NewIndexBuilder());
225     std::unique_ptr<PartitionedFilterBlockBuilder> builder(
226         NewBuilder(pib.get(), prefix_extractor));
227     int i = 0;
228     builder->Add(keys[i]);
229     i++;
230     builder->Add(keys[i]);
231     CutABlock(pib.get(), keys[i], keys[i + 1]);
232     i++;
233     builder->Add(keys[i]);
234     builder->Add(keys[i]);
235     i++;
236     builder->Add(keys[i]);
237     CutABlock(pib.get(), keys[i]);
238 
239     VerifyReader(builder.get(), pib.get(), prefix_extractor);
240   }
241 
TestBlockPerAllKeys()242   void TestBlockPerAllKeys() {
243     std::unique_ptr<PartitionedIndexBuilder> pib(NewIndexBuilder());
244     std::unique_ptr<PartitionedFilterBlockBuilder> builder(
245         NewBuilder(pib.get()));
246     int i = 0;
247     builder->Add(keys[i]);
248     i++;
249     builder->Add(keys[i]);
250     i++;
251     builder->Add(keys[i]);
252     builder->Add(keys[i]);
253     i++;
254     builder->Add(keys[i]);
255     CutABlock(pib.get(), keys[i]);
256 
257     VerifyReader(builder.get(), pib.get());
258   }
259 
CutABlock(PartitionedIndexBuilder * builder,const std::string & user_key)260   void CutABlock(PartitionedIndexBuilder* builder,
261                  const std::string& user_key) {
262     // Assuming a block is cut, add an entry to the index
263     std::string key =
264         std::string(*InternalKey(user_key, 0, ValueType::kTypeValue).rep());
265     BlockHandle dont_care_block_handle(1, 1);
266     builder->AddIndexEntry(&key, nullptr, dont_care_block_handle);
267   }
268 
CutABlock(PartitionedIndexBuilder * builder,const std::string & user_key,const std::string & next_user_key)269   void CutABlock(PartitionedIndexBuilder* builder, const std::string& user_key,
270                  const std::string& next_user_key) {
271     // Assuming a block is cut, add an entry to the index
272     std::string key =
273         std::string(*InternalKey(user_key, 0, ValueType::kTypeValue).rep());
274     std::string next_key = std::string(
275         *InternalKey(next_user_key, 0, ValueType::kTypeValue).rep());
276     BlockHandle dont_care_block_handle(1, 1);
277     Slice slice = Slice(next_key.data(), next_key.size());
278     builder->AddIndexEntry(&key, &slice, dont_care_block_handle);
279   }
280 
CountNumOfIndexPartitions(PartitionedIndexBuilder * builder)281   int CountNumOfIndexPartitions(PartitionedIndexBuilder* builder) {
282     IndexBuilder::IndexBlocks dont_care_ib;
283     BlockHandle dont_care_bh(10, 10);
284     Status s;
285     int cnt = 0;
286     do {
287       s = builder->Finish(&dont_care_ib, dont_care_bh);
288       cnt++;
289     } while (s.IsIncomplete());
290     return cnt - 1;  // 1 is 2nd level index
291   }
292 };
293 
294 INSTANTIATE_TEST_CASE_P(FormatDef, PartitionedFilterBlockTest,
295                         testing::Values(test::kDefaultFormatVersion));
296 INSTANTIATE_TEST_CASE_P(FormatLatest, PartitionedFilterBlockTest,
297                         testing::Values(test::kLatestFormatVersion));
298 
TEST_P(PartitionedFilterBlockTest,EmptyBuilder)299 TEST_P(PartitionedFilterBlockTest, EmptyBuilder) {
300   std::unique_ptr<PartitionedIndexBuilder> pib(NewIndexBuilder());
301   std::unique_ptr<PartitionedFilterBlockBuilder> builder(NewBuilder(pib.get()));
302   const bool empty = true;
303   VerifyReader(builder.get(), pib.get(), empty);
304 }
305 
TEST_P(PartitionedFilterBlockTest,OneBlock)306 TEST_P(PartitionedFilterBlockTest, OneBlock) {
307   uint64_t max_index_size = MaxIndexSize();
308   for (uint64_t i = 1; i < max_index_size + 1; i++) {
309     table_options_.metadata_block_size = i;
310     TestBlockPerAllKeys();
311   }
312 }
313 
TEST_P(PartitionedFilterBlockTest,TwoBlocksPerKey)314 TEST_P(PartitionedFilterBlockTest, TwoBlocksPerKey) {
315   uint64_t max_index_size = MaxIndexSize();
316   for (uint64_t i = 1; i < max_index_size + 1; i++) {
317     table_options_.metadata_block_size = i;
318     TestBlockPerTwoKeys();
319   }
320 }
321 
322 // This reproduces the bug that a prefix is the same among multiple consecutive
323 // blocks but the bug would add it only to the first block.
TEST_P(PartitionedFilterBlockTest,SamePrefixInMultipleBlocks)324 TEST_P(PartitionedFilterBlockTest, SamePrefixInMultipleBlocks) {
325   // some small number to cause partition cuts
326   table_options_.metadata_block_size = 1;
327   std::unique_ptr<const SliceTransform> prefix_extractor(
328       ROCKSDB_NAMESPACE::NewFixedPrefixTransform(1));
329   std::unique_ptr<PartitionedIndexBuilder> pib(NewIndexBuilder());
330   std::unique_ptr<PartitionedFilterBlockBuilder> builder(
331       NewBuilder(pib.get(), prefix_extractor.get()));
332   const std::string pkeys[3] = {"p-key10", "p-key20", "p-key30"};
333   builder->Add(pkeys[0]);
334   CutABlock(pib.get(), pkeys[0], pkeys[1]);
335   builder->Add(pkeys[1]);
336   CutABlock(pib.get(), pkeys[1], pkeys[2]);
337   builder->Add(pkeys[2]);
338   CutABlock(pib.get(), pkeys[2]);
339   std::unique_ptr<PartitionedFilterBlockReader> reader(
340       NewReader(builder.get(), pib.get()));
341   for (auto key : pkeys) {
342     auto ikey = InternalKey(key, 0, ValueType::kTypeValue);
343     const Slice ikey_slice = Slice(*ikey.rep());
344     ASSERT_TRUE(reader->PrefixMayMatch(
345         prefix_extractor->Transform(key), prefix_extractor.get(), kNotValid,
346         /*no_io=*/false, &ikey_slice, /*get_context=*/nullptr,
347         /*lookup_context=*/nullptr));
348   }
349   // Non-existent keys but with the same prefix
350   const std::string pnonkeys[4] = {"p-key9", "p-key11", "p-key21", "p-key31"};
351   for (auto key : pnonkeys) {
352     auto ikey = InternalKey(key, 0, ValueType::kTypeValue);
353     const Slice ikey_slice = Slice(*ikey.rep());
354     ASSERT_TRUE(reader->PrefixMayMatch(
355         prefix_extractor->Transform(key), prefix_extractor.get(), kNotValid,
356         /*no_io=*/false, &ikey_slice, /*get_context=*/nullptr,
357         /*lookup_context=*/nullptr));
358   }
359 }
360 
361 // This reproduces the bug in format_version=3 that the seeking the prefix will
362 // lead us to the partition before the one that has filter for the prefix.
TEST_P(PartitionedFilterBlockTest,PrefixInWrongPartitionBug)363 TEST_P(PartitionedFilterBlockTest, PrefixInWrongPartitionBug) {
364   // some small number to cause partition cuts
365   table_options_.metadata_block_size = 1;
366   std::unique_ptr<const SliceTransform> prefix_extractor(
367       ROCKSDB_NAMESPACE::NewFixedPrefixTransform(2));
368   std::unique_ptr<PartitionedIndexBuilder> pib(NewIndexBuilder());
369   std::unique_ptr<PartitionedFilterBlockBuilder> builder(
370       NewBuilder(pib.get(), prefix_extractor.get()));
371   // In the bug, searching for prefix "p3" on an index with format version 3,
372   // will give the key "p3" and the partition of the keys that are <= p3, i.e.,
373   // p2-keys, where the filter for prefix "p3" does not exist.
374   const std::string pkeys[] = {"p1-key1", "p2-key2", "p3-key3", "p4-key3",
375                                "p5-key3"};
376   builder->Add(pkeys[0]);
377   CutABlock(pib.get(), pkeys[0], pkeys[1]);
378   builder->Add(pkeys[1]);
379   CutABlock(pib.get(), pkeys[1], pkeys[2]);
380   builder->Add(pkeys[2]);
381   CutABlock(pib.get(), pkeys[2], pkeys[3]);
382   builder->Add(pkeys[3]);
383   CutABlock(pib.get(), pkeys[3], pkeys[4]);
384   builder->Add(pkeys[4]);
385   CutABlock(pib.get(), pkeys[4]);
386   std::unique_ptr<PartitionedFilterBlockReader> reader(
387       NewReader(builder.get(), pib.get()));
388   for (auto key : pkeys) {
389     auto prefix = prefix_extractor->Transform(key);
390     auto ikey = InternalKey(prefix, 0, ValueType::kTypeValue);
391     const Slice ikey_slice = Slice(*ikey.rep());
392     ASSERT_TRUE(reader->PrefixMayMatch(
393         prefix, prefix_extractor.get(), kNotValid,
394         /*no_io=*/false, &ikey_slice, /*get_context=*/nullptr,
395         /*lookup_context=*/nullptr));
396   }
397 }
398 
TEST_P(PartitionedFilterBlockTest,OneBlockPerKey)399 TEST_P(PartitionedFilterBlockTest, OneBlockPerKey) {
400   uint64_t max_index_size = MaxIndexSize();
401   for (uint64_t i = 1; i < max_index_size + 1; i++) {
402     table_options_.metadata_block_size = i;
403     TestBlockPerKey();
404   }
405 }
406 
TEST_P(PartitionedFilterBlockTest,PartitionCount)407 TEST_P(PartitionedFilterBlockTest, PartitionCount) {
408   int num_keys = sizeof(keys) / sizeof(*keys);
409   table_options_.metadata_block_size =
410       std::max(MaxIndexSize(), MaxFilterSize());
411   int partitions = TestBlockPerKey();
412   ASSERT_EQ(partitions, 1);
413   // A low number ensures cutting a block after each key
414   table_options_.metadata_block_size = 1;
415   partitions = TestBlockPerKey();
416   ASSERT_EQ(partitions, num_keys - 1 /* last two keys make one flush */);
417 }
418 
419 }  // namespace ROCKSDB_NAMESPACE
420 
main(int argc,char ** argv)421 int main(int argc, char** argv) {
422   ::testing::InitGoogleTest(&argc, argv);
423   return RUN_ALL_TESTS();
424 }
425