1 // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
2 // This source code is licensed under both the GPLv2 (found in the
3 // COPYING file in the root directory) and Apache 2.0 License
4 // (found in the LICENSE.Apache file in the root directory).
5 //
6 // Copyright (c) 2012 The LevelDB Authors. All rights reserved.
7 // Use of this source code is governed by a BSD-style license that can be
8 // found in the LICENSE file. See the AUTHORS file for names of contributors.
9
10 #ifndef GFLAGS
11 #include <cstdio>
main()12 int main() {
13 fprintf(stderr, "Please install gflags to run this test... Skipping...\n");
14 return 0;
15 }
16 #else
17
18 #include <array>
19 #include <cmath>
20 #include <vector>
21
22 #include "memory/arena.h"
23 #include "port/jemalloc_helper.h"
24 #include "rocksdb/filter_policy.h"
25 #include "table/block_based/filter_policy_internal.h"
26 #include "test_util/testharness.h"
27 #include "test_util/testutil.h"
28 #include "util/gflags_compat.h"
29 #include "util/hash.h"
30
31 using GFLAGS_NAMESPACE::ParseCommandLineFlags;
32
33 // The test is not fully designed for bits_per_key other than 10, but with
34 // this parameter you can easily explore the behavior of other bits_per_key.
35 // See also filter_bench.
36 DEFINE_int32(bits_per_key, 10, "");
37
38 namespace ROCKSDB_NAMESPACE {
39
40 static const int kVerbose = 1;
41
Key(int i,char * buffer)42 static Slice Key(int i, char* buffer) {
43 std::string s;
44 PutFixed32(&s, static_cast<uint32_t>(i));
45 memcpy(buffer, s.c_str(), sizeof(i));
46 return Slice(buffer, sizeof(i));
47 }
48
NextLength(int length)49 static int NextLength(int length) {
50 if (length < 10) {
51 length += 1;
52 } else if (length < 100) {
53 length += 10;
54 } else if (length < 1000) {
55 length += 100;
56 } else {
57 length += 1000;
58 }
59 return length;
60 }
61
62 class BlockBasedBloomTest : public testing::Test {
63 private:
64 std::unique_ptr<const FilterPolicy> policy_;
65 std::string filter_;
66 std::vector<std::string> keys_;
67
68 public:
BlockBasedBloomTest()69 BlockBasedBloomTest() { ResetPolicy(); }
70
Reset()71 void Reset() {
72 keys_.clear();
73 filter_.clear();
74 }
75
ResetPolicy(double bits_per_key)76 void ResetPolicy(double bits_per_key) {
77 policy_.reset(new BloomFilterPolicy(bits_per_key,
78 BloomFilterPolicy::kDeprecatedBlock));
79 Reset();
80 }
81
ResetPolicy()82 void ResetPolicy() { ResetPolicy(FLAGS_bits_per_key); }
83
Add(const Slice & s)84 void Add(const Slice& s) {
85 keys_.push_back(s.ToString());
86 }
87
Build()88 void Build() {
89 std::vector<Slice> key_slices;
90 for (size_t i = 0; i < keys_.size(); i++) {
91 key_slices.push_back(Slice(keys_[i]));
92 }
93 filter_.clear();
94 policy_->CreateFilter(&key_slices[0], static_cast<int>(key_slices.size()),
95 &filter_);
96 keys_.clear();
97 if (kVerbose >= 2) DumpFilter();
98 }
99
FilterSize() const100 size_t FilterSize() const {
101 return filter_.size();
102 }
103
FilterData() const104 Slice FilterData() const { return Slice(filter_); }
105
DumpFilter()106 void DumpFilter() {
107 fprintf(stderr, "F(");
108 for (size_t i = 0; i+1 < filter_.size(); i++) {
109 const unsigned int c = static_cast<unsigned int>(filter_[i]);
110 for (int j = 0; j < 8; j++) {
111 fprintf(stderr, "%c", (c & (1 <<j)) ? '1' : '.');
112 }
113 }
114 fprintf(stderr, ")\n");
115 }
116
Matches(const Slice & s)117 bool Matches(const Slice& s) {
118 if (!keys_.empty()) {
119 Build();
120 }
121 return policy_->KeyMayMatch(s, filter_);
122 }
123
FalsePositiveRate()124 double FalsePositiveRate() {
125 char buffer[sizeof(int)];
126 int result = 0;
127 for (int i = 0; i < 10000; i++) {
128 if (Matches(Key(i + 1000000000, buffer))) {
129 result++;
130 }
131 }
132 return result / 10000.0;
133 }
134 };
135
TEST_F(BlockBasedBloomTest,EmptyFilter)136 TEST_F(BlockBasedBloomTest, EmptyFilter) {
137 ASSERT_TRUE(! Matches("hello"));
138 ASSERT_TRUE(! Matches("world"));
139 }
140
TEST_F(BlockBasedBloomTest,Small)141 TEST_F(BlockBasedBloomTest, Small) {
142 Add("hello");
143 Add("world");
144 ASSERT_TRUE(Matches("hello"));
145 ASSERT_TRUE(Matches("world"));
146 ASSERT_TRUE(! Matches("x"));
147 ASSERT_TRUE(! Matches("foo"));
148 }
149
TEST_F(BlockBasedBloomTest,VaryingLengths)150 TEST_F(BlockBasedBloomTest, VaryingLengths) {
151 char buffer[sizeof(int)];
152
153 // Count number of filters that significantly exceed the false positive rate
154 int mediocre_filters = 0;
155 int good_filters = 0;
156
157 for (int length = 1; length <= 10000; length = NextLength(length)) {
158 Reset();
159 for (int i = 0; i < length; i++) {
160 Add(Key(i, buffer));
161 }
162 Build();
163
164 ASSERT_LE(FilterSize(), (size_t)((length * FLAGS_bits_per_key / 8) + 40))
165 << length;
166
167 // All added keys must match
168 for (int i = 0; i < length; i++) {
169 ASSERT_TRUE(Matches(Key(i, buffer)))
170 << "Length " << length << "; key " << i;
171 }
172
173 // Check false positive rate
174 double rate = FalsePositiveRate();
175 if (kVerbose >= 1) {
176 fprintf(stderr, "False positives: %5.2f%% @ length = %6d ; bytes = %6d\n",
177 rate*100.0, length, static_cast<int>(FilterSize()));
178 }
179 if (FLAGS_bits_per_key == 10) {
180 ASSERT_LE(rate, 0.02); // Must not be over 2%
181 if (rate > 0.0125) {
182 mediocre_filters++; // Allowed, but not too often
183 } else {
184 good_filters++;
185 }
186 }
187 }
188 if (FLAGS_bits_per_key == 10 && kVerbose >= 1) {
189 fprintf(stderr, "Filters: %d good, %d mediocre\n",
190 good_filters, mediocre_filters);
191 }
192 ASSERT_LE(mediocre_filters, good_filters/5);
193 }
194
195 // Ensure the implementation doesn't accidentally change in an
196 // incompatible way
TEST_F(BlockBasedBloomTest,Schema)197 TEST_F(BlockBasedBloomTest, Schema) {
198 char buffer[sizeof(int)];
199
200 ResetPolicy(8); // num_probes = 5
201 for (int key = 0; key < 87; key++) {
202 Add(Key(key, buffer));
203 }
204 Build();
205 ASSERT_EQ(BloomHash(FilterData()), 3589896109U);
206
207 ResetPolicy(9); // num_probes = 6
208 for (int key = 0; key < 87; key++) {
209 Add(Key(key, buffer));
210 }
211 Build();
212 ASSERT_EQ(BloomHash(FilterData()), 969445585U);
213
214 ResetPolicy(11); // num_probes = 7
215 for (int key = 0; key < 87; key++) {
216 Add(Key(key, buffer));
217 }
218 Build();
219 ASSERT_EQ(BloomHash(FilterData()), 1694458207U);
220
221 ResetPolicy(10); // num_probes = 6
222 for (int key = 0; key < 87; key++) {
223 Add(Key(key, buffer));
224 }
225 Build();
226 ASSERT_EQ(BloomHash(FilterData()), 2373646410U);
227
228 ResetPolicy(10);
229 for (int key = /*CHANGED*/ 1; key < 87; key++) {
230 Add(Key(key, buffer));
231 }
232 Build();
233 ASSERT_EQ(BloomHash(FilterData()), 1908442116U);
234
235 ResetPolicy(10);
236 for (int key = 1; key < /*CHANGED*/ 88; key++) {
237 Add(Key(key, buffer));
238 }
239 Build();
240 ASSERT_EQ(BloomHash(FilterData()), 3057004015U);
241
242 // With new fractional bits_per_key, check that we are rounding to
243 // whole bits per key for old Bloom filters.
244 ResetPolicy(9.5); // Treated as 10
245 for (int key = 1; key < 88; key++) {
246 Add(Key(key, buffer));
247 }
248 Build();
249 ASSERT_EQ(BloomHash(FilterData()), /*SAME*/ 3057004015U);
250
251 ResetPolicy(10.499); // Treated as 10
252 for (int key = 1; key < 88; key++) {
253 Add(Key(key, buffer));
254 }
255 Build();
256 ASSERT_EQ(BloomHash(FilterData()), /*SAME*/ 3057004015U);
257
258 ResetPolicy();
259 }
260
261 // Different bits-per-byte
262
263 class FullBloomTest : public testing::TestWithParam<BloomFilterPolicy::Mode> {
264 protected:
265 BlockBasedTableOptions table_options_;
266
267 private:
268 std::shared_ptr<const FilterPolicy>& policy_;
269 std::unique_ptr<FilterBitsBuilder> bits_builder_;
270 std::unique_ptr<FilterBitsReader> bits_reader_;
271 std::unique_ptr<const char[]> buf_;
272 size_t filter_size_;
273
274 public:
FullBloomTest()275 FullBloomTest() : policy_(table_options_.filter_policy), filter_size_(0) {
276 ResetPolicy();
277 }
278
GetBuiltinFilterBitsBuilder()279 BuiltinFilterBitsBuilder* GetBuiltinFilterBitsBuilder() {
280 // Throws on bad cast
281 return &dynamic_cast<BuiltinFilterBitsBuilder&>(*bits_builder_);
282 }
283
GetBloomFilterPolicy()284 const BloomFilterPolicy* GetBloomFilterPolicy() {
285 // Throws on bad cast
286 return &dynamic_cast<const BloomFilterPolicy&>(*policy_);
287 }
288
Reset()289 void Reset() {
290 bits_builder_.reset(BloomFilterPolicy::GetBuilderFromContext(
291 FilterBuildingContext(table_options_)));
292 bits_reader_.reset(nullptr);
293 buf_.reset(nullptr);
294 filter_size_ = 0;
295 }
296
ResetPolicy(double bits_per_key)297 void ResetPolicy(double bits_per_key) {
298 policy_.reset(new BloomFilterPolicy(bits_per_key, GetParam()));
299 Reset();
300 }
301
ResetPolicy()302 void ResetPolicy() { ResetPolicy(FLAGS_bits_per_key); }
303
Add(const Slice & s)304 void Add(const Slice& s) {
305 bits_builder_->AddKey(s);
306 }
307
OpenRaw(const Slice & s)308 void OpenRaw(const Slice& s) {
309 bits_reader_.reset(policy_->GetFilterBitsReader(s));
310 }
311
Build()312 void Build() {
313 Slice filter = bits_builder_->Finish(&buf_);
314 bits_reader_.reset(policy_->GetFilterBitsReader(filter));
315 filter_size_ = filter.size();
316 }
317
FilterSize() const318 size_t FilterSize() const {
319 return filter_size_;
320 }
321
FilterData()322 Slice FilterData() { return Slice(buf_.get(), filter_size_); }
323
GetNumProbesFromFilterData()324 int GetNumProbesFromFilterData() {
325 assert(filter_size_ >= 5);
326 int8_t raw_num_probes = static_cast<int8_t>(buf_.get()[filter_size_ - 5]);
327 if (raw_num_probes == -1) { // New bloom filter marker
328 return static_cast<uint8_t>(buf_.get()[filter_size_ - 3]);
329 } else {
330 return raw_num_probes;
331 }
332 }
333
GetRibbonSeedFromFilterData()334 int GetRibbonSeedFromFilterData() {
335 assert(filter_size_ >= 5);
336 // Check for ribbon marker
337 assert(-2 == static_cast<int8_t>(buf_.get()[filter_size_ - 5]));
338 return static_cast<uint8_t>(buf_.get()[filter_size_ - 4]);
339 }
340
Matches(const Slice & s)341 bool Matches(const Slice& s) {
342 if (bits_reader_ == nullptr) {
343 Build();
344 }
345 return bits_reader_->MayMatch(s);
346 }
347
348 // Provides a kind of fingerprint on the Bloom filter's
349 // behavior, for reasonbly high FP rates.
PackedMatches()350 uint64_t PackedMatches() {
351 char buffer[sizeof(int)];
352 uint64_t result = 0;
353 for (int i = 0; i < 64; i++) {
354 if (Matches(Key(i + 12345, buffer))) {
355 result |= uint64_t{1} << i;
356 }
357 }
358 return result;
359 }
360
361 // Provides a kind of fingerprint on the Bloom filter's
362 // behavior, for lower FP rates.
FirstFPs(int count)363 std::string FirstFPs(int count) {
364 char buffer[sizeof(int)];
365 std::string rv;
366 int fp_count = 0;
367 for (int i = 0; i < 1000000; i++) {
368 // Pack four match booleans into each hexadecimal digit
369 if (Matches(Key(i + 1000000, buffer))) {
370 ++fp_count;
371 rv += std::to_string(i);
372 if (fp_count == count) {
373 break;
374 }
375 rv += ',';
376 }
377 }
378 return rv;
379 }
380
FalsePositiveRate()381 double FalsePositiveRate() {
382 char buffer[sizeof(int)];
383 int result = 0;
384 for (int i = 0; i < 10000; i++) {
385 if (Matches(Key(i + 1000000000, buffer))) {
386 result++;
387 }
388 }
389 return result / 10000.0;
390 }
391 };
392
TEST_P(FullBloomTest,FilterSize)393 TEST_P(FullBloomTest, FilterSize) {
394 // In addition to checking the consistency of space computation, we are
395 // checking that denoted and computed doubles are interpreted as expected
396 // as bits_per_key values.
397 bool some_computed_less_than_denoted = false;
398 // Note: enforced minimum is 1 bit per key (1000 millibits), and enforced
399 // maximum is 100 bits per key (100000 millibits).
400 for (auto bpk :
401 std::vector<std::pair<double, int> >{{-HUGE_VAL, 1000},
402 {-INFINITY, 1000},
403 {0.0, 1000},
404 {1.234, 1234},
405 {3.456, 3456},
406 {9.5, 9500},
407 {10.0, 10000},
408 {10.499, 10499},
409 {21.345, 21345},
410 {99.999, 99999},
411 {1234.0, 100000},
412 {HUGE_VAL, 100000},
413 {INFINITY, 100000},
414 {NAN, 100000}}) {
415 ResetPolicy(bpk.first);
416 auto bfp = GetBloomFilterPolicy();
417 EXPECT_EQ(bpk.second, bfp->GetMillibitsPerKey());
418 EXPECT_EQ((bpk.second + 500) / 1000, bfp->GetWholeBitsPerKey());
419
420 double computed = bpk.first;
421 // This transforms e.g. 9.5 -> 9.499999999999998, which we still
422 // round to 10 for whole bits per key.
423 computed += 0.5;
424 computed /= 1234567.0;
425 computed *= 1234567.0;
426 computed -= 0.5;
427 some_computed_less_than_denoted |= (computed < bpk.first);
428 ResetPolicy(computed);
429 bfp = GetBloomFilterPolicy();
430 EXPECT_EQ(bpk.second, bfp->GetMillibitsPerKey());
431 EXPECT_EQ((bpk.second + 500) / 1000, bfp->GetWholeBitsPerKey());
432
433 auto bits_builder = GetBuiltinFilterBitsBuilder();
434
435 size_t n = 1;
436 size_t space = 0;
437 for (; n < 1000000; n += 1 + n / 1000) {
438 // Ensure consistency between CalculateSpace and ApproximateNumEntries
439 space = bits_builder->CalculateSpace(n);
440 size_t n2 = bits_builder->ApproximateNumEntries(space);
441 EXPECT_GE(n2, n);
442 size_t space2 = bits_builder->CalculateSpace(n2);
443 if (n > 12000 && GetParam() == BloomFilterPolicy::kStandard128Ribbon) {
444 // TODO(peterd): better approximation?
445 EXPECT_GE(space2, space);
446 EXPECT_LE(space2 * 0.998, space * 1.0);
447 } else {
448 EXPECT_EQ(space2, space);
449 }
450 }
451 // Until size_t overflow
452 for (; n < (n + n / 3); n += n / 3) {
453 // Ensure space computation is not overflowing; capped is OK
454 size_t space2 = bits_builder->CalculateSpace(n);
455 EXPECT_GE(space2, space);
456 space = space2;
457 }
458 }
459 // Check that the compiler hasn't optimized our computation into nothing
460 EXPECT_TRUE(some_computed_less_than_denoted);
461 ResetPolicy();
462 }
463
TEST_P(FullBloomTest,FullEmptyFilter)464 TEST_P(FullBloomTest, FullEmptyFilter) {
465 // Empty filter is not match, at this level
466 ASSERT_TRUE(!Matches("hello"));
467 ASSERT_TRUE(!Matches("world"));
468 }
469
TEST_P(FullBloomTest,FullSmall)470 TEST_P(FullBloomTest, FullSmall) {
471 Add("hello");
472 Add("world");
473 ASSERT_TRUE(Matches("hello"));
474 ASSERT_TRUE(Matches("world"));
475 ASSERT_TRUE(!Matches("x"));
476 ASSERT_TRUE(!Matches("foo"));
477 }
478
TEST_P(FullBloomTest,FullVaryingLengths)479 TEST_P(FullBloomTest, FullVaryingLengths) {
480 char buffer[sizeof(int)];
481
482 // Count number of filters that significantly exceed the false positive rate
483 int mediocre_filters = 0;
484 int good_filters = 0;
485
486 for (int length = 1; length <= 10000; length = NextLength(length)) {
487 Reset();
488 for (int i = 0; i < length; i++) {
489 Add(Key(i, buffer));
490 }
491 Build();
492
493 EXPECT_LE(FilterSize(), (size_t)((length * FLAGS_bits_per_key / 8) +
494 CACHE_LINE_SIZE * 2 + 5));
495
496 // All added keys must match
497 for (int i = 0; i < length; i++) {
498 ASSERT_TRUE(Matches(Key(i, buffer)))
499 << "Length " << length << "; key " << i;
500 }
501
502 // Check false positive rate
503 double rate = FalsePositiveRate();
504 if (kVerbose >= 1) {
505 fprintf(stderr, "False positives: %5.2f%% @ length = %6d ; bytes = %6d\n",
506 rate*100.0, length, static_cast<int>(FilterSize()));
507 }
508 if (FLAGS_bits_per_key == 10) {
509 EXPECT_LE(rate, 0.02); // Must not be over 2%
510 if (rate > 0.0125) {
511 mediocre_filters++; // Allowed, but not too often
512 } else {
513 good_filters++;
514 }
515 }
516 }
517 if (kVerbose >= 1) {
518 fprintf(stderr, "Filters: %d good, %d mediocre\n",
519 good_filters, mediocre_filters);
520 }
521 EXPECT_LE(mediocre_filters, good_filters / 5);
522 }
523
TEST_P(FullBloomTest,OptimizeForMemory)524 TEST_P(FullBloomTest, OptimizeForMemory) {
525 char buffer[sizeof(int)];
526 for (bool offm : {true, false}) {
527 table_options_.optimize_filters_for_memory = offm;
528 ResetPolicy();
529 Random32 rnd(12345);
530 uint64_t total_size = 0;
531 uint64_t total_mem = 0;
532 int64_t total_keys = 0;
533 double total_fp_rate = 0;
534 constexpr int nfilters = 100;
535 for (int i = 0; i < nfilters; ++i) {
536 int nkeys = static_cast<int>(rnd.Uniformish(10000)) + 100;
537 Reset();
538 for (int j = 0; j < nkeys; ++j) {
539 Add(Key(j, buffer));
540 }
541 Build();
542 size_t size = FilterData().size();
543 total_size += size;
544 // optimize_filters_for_memory currently depends on malloc_usable_size
545 // but we run the rest of the test to ensure no bad behavior without it.
546 #ifdef ROCKSDB_MALLOC_USABLE_SIZE
547 size = malloc_usable_size(const_cast<char*>(FilterData().data()));
548 #endif // ROCKSDB_MALLOC_USABLE_SIZE
549 total_mem += size;
550 total_keys += nkeys;
551 total_fp_rate += FalsePositiveRate();
552 }
553 if (FLAGS_bits_per_key == 10) {
554 EXPECT_LE(total_fp_rate / double{nfilters}, 0.011);
555 EXPECT_GE(total_fp_rate / double{nfilters}, 0.008);
556 }
557
558 int64_t ex_min_total_size = int64_t{FLAGS_bits_per_key} * total_keys / 8;
559 if (GetParam() == BloomFilterPolicy::kStandard128Ribbon) {
560 // ~ 30% savings vs. Bloom filter
561 ex_min_total_size = 7 * ex_min_total_size / 10;
562 }
563 EXPECT_GE(static_cast<int64_t>(total_size), ex_min_total_size);
564
565 int64_t blocked_bloom_overhead = nfilters * (CACHE_LINE_SIZE + 5);
566 if (GetParam() == BloomFilterPolicy::kLegacyBloom) {
567 // this config can add extra cache line to make odd number
568 blocked_bloom_overhead += nfilters * CACHE_LINE_SIZE;
569 }
570
571 EXPECT_GE(total_mem, total_size);
572
573 // optimize_filters_for_memory not implemented with legacy Bloom
574 if (offm && GetParam() != BloomFilterPolicy::kLegacyBloom) {
575 // This value can include a small extra penalty for kExtraPadding
576 fprintf(stderr, "Internal fragmentation (optimized): %g%%\n",
577 (total_mem - total_size) * 100.0 / total_size);
578 // Less than 1% internal fragmentation
579 EXPECT_LE(total_mem, total_size * 101 / 100);
580 // Up to 2% storage penalty
581 EXPECT_LE(static_cast<int64_t>(total_size),
582 ex_min_total_size * 102 / 100 + blocked_bloom_overhead);
583 } else {
584 fprintf(stderr, "Internal fragmentation (not optimized): %g%%\n",
585 (total_mem - total_size) * 100.0 / total_size);
586 // TODO: add control checks for more allocators?
587 #ifdef ROCKSDB_JEMALLOC
588 fprintf(stderr, "Jemalloc detected? %d\n", HasJemalloc());
589 if (HasJemalloc()) {
590 #ifdef ROCKSDB_MALLOC_USABLE_SIZE
591 // More than 5% internal fragmentation
592 EXPECT_GE(total_mem, total_size * 105 / 100);
593 #endif // ROCKSDB_MALLOC_USABLE_SIZE
594 }
595 #endif // ROCKSDB_JEMALLOC
596 // No storage penalty, just usual overhead
597 EXPECT_LE(static_cast<int64_t>(total_size),
598 ex_min_total_size + blocked_bloom_overhead);
599 }
600 }
601 }
602
603 namespace {
SelectByCacheLineSize(uint32_t for64,uint32_t for128,uint32_t for256)604 inline uint32_t SelectByCacheLineSize(uint32_t for64, uint32_t for128,
605 uint32_t for256) {
606 (void)for64;
607 (void)for128;
608 (void)for256;
609 #if CACHE_LINE_SIZE == 64
610 return for64;
611 #elif CACHE_LINE_SIZE == 128
612 return for128;
613 #elif CACHE_LINE_SIZE == 256
614 return for256;
615 #else
616 #error "CACHE_LINE_SIZE unknown or unrecognized"
617 #endif
618 }
619 } // namespace
620
621 // Ensure the implementation doesn't accidentally change in an
622 // incompatible way. This test doesn't check the reading side
623 // (FirstFPs/PackedMatches) for LegacyBloom because it requires the
624 // ability to read filters generated using other cache line sizes.
625 // See RawSchema.
TEST_P(FullBloomTest,Schema)626 TEST_P(FullBloomTest, Schema) {
627 #define EXPECT_EQ_Bloom(a, b) \
628 { \
629 if (GetParam() != BloomFilterPolicy::kStandard128Ribbon) { \
630 EXPECT_EQ(a, b); \
631 } \
632 }
633 #define EXPECT_EQ_Ribbon(a, b) \
634 { \
635 if (GetParam() == BloomFilterPolicy::kStandard128Ribbon) { \
636 EXPECT_EQ(a, b); \
637 } \
638 }
639 #define EXPECT_EQ_FastBloom(a, b) \
640 { \
641 if (GetParam() == BloomFilterPolicy::kFastLocalBloom) { \
642 EXPECT_EQ(a, b); \
643 } \
644 }
645 #define EXPECT_EQ_LegacyBloom(a, b) \
646 { \
647 if (GetParam() == BloomFilterPolicy::kLegacyBloom) { \
648 EXPECT_EQ(a, b); \
649 } \
650 }
651 #define EXPECT_EQ_NotLegacy(a, b) \
652 { \
653 if (GetParam() != BloomFilterPolicy::kLegacyBloom) { \
654 EXPECT_EQ(a, b); \
655 } \
656 }
657
658 char buffer[sizeof(int)];
659
660 // First do a small number of keys, where Ribbon config will fall back on
661 // fast Bloom filter and generate the same data
662 ResetPolicy(5); // num_probes = 3
663 for (int key = 0; key < 87; key++) {
664 Add(Key(key, buffer));
665 }
666 Build();
667 EXPECT_EQ(GetNumProbesFromFilterData(), 3);
668
669 EXPECT_EQ_NotLegacy(BloomHash(FilterData()), 4130687756U);
670
671 EXPECT_EQ_NotLegacy("31,38,40,43,61,83,86,112,125,131", FirstFPs(10));
672
673 // Now use enough keys so that changing bits / key by 1 is guaranteed to
674 // change number of allocated cache lines. So keys > max cache line bits.
675
676 // Note that the first attempted Ribbon seed is determined by the hash
677 // of the first key added (for pseudorandomness in practice, determinism in
678 // testing)
679
680 ResetPolicy(2); // num_probes = 1
681 for (int key = 0; key < 2087; key++) {
682 Add(Key(key, buffer));
683 }
684 Build();
685 EXPECT_EQ_Bloom(GetNumProbesFromFilterData(), 1);
686 EXPECT_EQ_Ribbon(GetRibbonSeedFromFilterData(), 61);
687
688 EXPECT_EQ_LegacyBloom(
689 BloomHash(FilterData()),
690 SelectByCacheLineSize(1567096579, 1964771444, 2659542661U));
691 EXPECT_EQ_FastBloom(BloomHash(FilterData()), 3817481309U);
692 EXPECT_EQ_Ribbon(BloomHash(FilterData()), 1705851228);
693
694 EXPECT_EQ_FastBloom("11,13,17,25,29,30,35,37,45,53", FirstFPs(10));
695 EXPECT_EQ_Ribbon("3,8,10,17,19,20,23,28,31,32", FirstFPs(10));
696
697 ResetPolicy(3); // num_probes = 2
698 for (int key = 0; key < 2087; key++) {
699 Add(Key(key, buffer));
700 }
701 Build();
702 EXPECT_EQ_Bloom(GetNumProbesFromFilterData(), 2);
703 EXPECT_EQ_Ribbon(GetRibbonSeedFromFilterData(), 61);
704
705 EXPECT_EQ_LegacyBloom(
706 BloomHash(FilterData()),
707 SelectByCacheLineSize(2707206547U, 2571983456U, 218344685));
708 EXPECT_EQ_FastBloom(BloomHash(FilterData()), 2807269961U);
709 EXPECT_EQ_Ribbon(BloomHash(FilterData()), 1095342358);
710
711 EXPECT_EQ_FastBloom("4,15,17,24,27,28,29,53,63,70", FirstFPs(10));
712 EXPECT_EQ_Ribbon("3,17,20,28,32,33,36,43,49,54", FirstFPs(10));
713
714 ResetPolicy(5); // num_probes = 3
715 for (int key = 0; key < 2087; key++) {
716 Add(Key(key, buffer));
717 }
718 Build();
719 EXPECT_EQ_Bloom(GetNumProbesFromFilterData(), 3);
720 EXPECT_EQ_Ribbon(GetRibbonSeedFromFilterData(), 61);
721
722 EXPECT_EQ_LegacyBloom(
723 BloomHash(FilterData()),
724 SelectByCacheLineSize(515748486, 94611728, 2436112214U));
725 EXPECT_EQ_FastBloom(BloomHash(FilterData()), 204628445);
726 EXPECT_EQ_Ribbon(BloomHash(FilterData()), 3971337699U);
727
728 EXPECT_EQ_FastBloom("15,24,29,39,53,87,89,100,103,104", FirstFPs(10));
729 EXPECT_EQ_Ribbon("3,33,36,43,67,70,76,78,84,102", FirstFPs(10));
730
731 ResetPolicy(8); // num_probes = 5
732 for (int key = 0; key < 2087; key++) {
733 Add(Key(key, buffer));
734 }
735 Build();
736 EXPECT_EQ_Bloom(GetNumProbesFromFilterData(), 5);
737 EXPECT_EQ_Ribbon(GetRibbonSeedFromFilterData(), 61);
738
739 EXPECT_EQ_LegacyBloom(
740 BloomHash(FilterData()),
741 SelectByCacheLineSize(1302145999, 2811644657U, 756553699));
742 EXPECT_EQ_FastBloom(BloomHash(FilterData()), 355564975);
743 EXPECT_EQ_Ribbon(BloomHash(FilterData()), 3651449053U);
744
745 EXPECT_EQ_FastBloom("16,60,66,126,220,238,244,256,265,287", FirstFPs(10));
746 EXPECT_EQ_Ribbon("33,187,203,296,300,322,411,419,547,582", FirstFPs(10));
747
748 ResetPolicy(9); // num_probes = 6
749 for (int key = 0; key < 2087; key++) {
750 Add(Key(key, buffer));
751 }
752 Build();
753 EXPECT_EQ_Bloom(GetNumProbesFromFilterData(), 6);
754 EXPECT_EQ_Ribbon(GetRibbonSeedFromFilterData(), 61);
755
756 EXPECT_EQ_LegacyBloom(
757 BloomHash(FilterData()),
758 SelectByCacheLineSize(2092755149, 661139132, 1182970461));
759 EXPECT_EQ_FastBloom(BloomHash(FilterData()), 2137566013U);
760 EXPECT_EQ_Ribbon(BloomHash(FilterData()), 1005676675);
761
762 EXPECT_EQ_FastBloom("156,367,791,872,945,1015,1139,1159,1265", FirstFPs(9));
763 EXPECT_EQ_Ribbon("33,187,203,296,411,419,604,612,615,619", FirstFPs(10));
764
765 ResetPolicy(11); // num_probes = 7
766 for (int key = 0; key < 2087; key++) {
767 Add(Key(key, buffer));
768 }
769 Build();
770 EXPECT_EQ_Bloom(GetNumProbesFromFilterData(), 7);
771 EXPECT_EQ_Ribbon(GetRibbonSeedFromFilterData(), 61);
772
773 EXPECT_EQ_LegacyBloom(
774 BloomHash(FilterData()),
775 SelectByCacheLineSize(3755609649U, 1812694762, 1449142939));
776 EXPECT_EQ_FastBloom(BloomHash(FilterData()), 2561502687U);
777 EXPECT_EQ_Ribbon(BloomHash(FilterData()), 3129900846U);
778
779 EXPECT_EQ_FastBloom("34,74,130,236,643,882,962,1015,1035,1110", FirstFPs(10));
780 EXPECT_EQ_Ribbon("411,419,623,665,727,794,955,1052,1323,1330", FirstFPs(10));
781
782 // This used to be 9 probes, but 8 is a better choice for speed,
783 // especially with SIMD groups of 8 probes, with essentially no
784 // change in FP rate.
785 // FP rate @ 9 probes, old Bloom: 0.4321%
786 // FP rate @ 9 probes, new Bloom: 0.1846%
787 // FP rate @ 8 probes, new Bloom: 0.1843%
788 ResetPolicy(14); // num_probes = 8 (new), 9 (old)
789 for (int key = 0; key < 2087; key++) {
790 Add(Key(key, buffer));
791 }
792 Build();
793 EXPECT_EQ_LegacyBloom(GetNumProbesFromFilterData(), 9);
794 EXPECT_EQ_FastBloom(GetNumProbesFromFilterData(), 8);
795 EXPECT_EQ_Ribbon(GetRibbonSeedFromFilterData(), 61);
796
797 EXPECT_EQ_LegacyBloom(
798 BloomHash(FilterData()),
799 SelectByCacheLineSize(178861123, 379087593, 2574136516U));
800 EXPECT_EQ_FastBloom(BloomHash(FilterData()), 3709876890U);
801 EXPECT_EQ_Ribbon(BloomHash(FilterData()), 1855638875);
802
803 EXPECT_EQ_FastBloom("130,240,522,565,989,2002,2526,3147,3543", FirstFPs(9));
804 EXPECT_EQ_Ribbon("665,727,1323,1755,3866,4232,4442,4492,4736", FirstFPs(9));
805
806 // This used to be 11 probes, but 9 is a better choice for speed
807 // AND accuracy.
808 // FP rate @ 11 probes, old Bloom: 0.3571%
809 // FP rate @ 11 probes, new Bloom: 0.0884%
810 // FP rate @ 9 probes, new Bloom: 0.0843%
811 ResetPolicy(16); // num_probes = 9 (new), 11 (old)
812 for (int key = 0; key < 2087; key++) {
813 Add(Key(key, buffer));
814 }
815 Build();
816 EXPECT_EQ_LegacyBloom(GetNumProbesFromFilterData(), 11);
817 EXPECT_EQ_FastBloom(GetNumProbesFromFilterData(), 9);
818 EXPECT_EQ_Ribbon(GetRibbonSeedFromFilterData(), 61);
819
820 EXPECT_EQ_LegacyBloom(
821 BloomHash(FilterData()),
822 SelectByCacheLineSize(1129406313, 3049154394U, 1727750964));
823 EXPECT_EQ_FastBloom(BloomHash(FilterData()), 1087138490);
824 EXPECT_EQ_Ribbon(BloomHash(FilterData()), 459379967);
825
826 EXPECT_EQ_FastBloom("3299,3611,3916,6620,7822,8079,8482,8942", FirstFPs(8));
827 EXPECT_EQ_Ribbon("727,1323,1755,4442,4736,5386,6974,7154,8222", FirstFPs(9));
828
829 ResetPolicy(10); // num_probes = 6, but different memory ratio vs. 9
830 for (int key = 0; key < 2087; key++) {
831 Add(Key(key, buffer));
832 }
833 Build();
834 EXPECT_EQ_Bloom(GetNumProbesFromFilterData(), 6);
835 EXPECT_EQ_Ribbon(GetRibbonSeedFromFilterData(), 61);
836
837 EXPECT_EQ_LegacyBloom(
838 BloomHash(FilterData()),
839 SelectByCacheLineSize(1478976371, 2910591341U, 1182970461));
840 EXPECT_EQ_FastBloom(BloomHash(FilterData()), 2498541272U);
841 EXPECT_EQ_Ribbon(BloomHash(FilterData()), 1273231667);
842
843 EXPECT_EQ_FastBloom("16,126,133,422,466,472,813,1002,1035", FirstFPs(9));
844 EXPECT_EQ_Ribbon("296,411,419,612,619,623,630,665,686,727", FirstFPs(10));
845
846 ResetPolicy(10);
847 for (int key = /*CHANGED*/ 1; key < 2087; key++) {
848 Add(Key(key, buffer));
849 }
850 Build();
851 EXPECT_EQ_Bloom(GetNumProbesFromFilterData(), 6);
852 EXPECT_EQ_Ribbon(GetRibbonSeedFromFilterData(), /*CHANGED*/ 184);
853
854 EXPECT_EQ_LegacyBloom(
855 BloomHash(FilterData()),
856 SelectByCacheLineSize(4205696321U, 1132081253U, 2385981855U));
857 EXPECT_EQ_FastBloom(BloomHash(FilterData()), 2058382345U);
858 EXPECT_EQ_Ribbon(BloomHash(FilterData()), 3007790572U);
859
860 EXPECT_EQ_FastBloom("16,126,133,422,466,472,813,1002,1035", FirstFPs(9));
861 EXPECT_EQ_Ribbon("33,152,383,497,589,633,737,781,911,990", FirstFPs(10));
862
863 ResetPolicy(10);
864 for (int key = 1; key < /*CHANGED*/ 2088; key++) {
865 Add(Key(key, buffer));
866 }
867 Build();
868 EXPECT_EQ_Bloom(GetNumProbesFromFilterData(), 6);
869 EXPECT_EQ_Ribbon(GetRibbonSeedFromFilterData(), 184);
870
871 EXPECT_EQ_LegacyBloom(
872 BloomHash(FilterData()),
873 SelectByCacheLineSize(2885052954U, 769447944, 4175124908U));
874 EXPECT_EQ_FastBloom(BloomHash(FilterData()), 23699164);
875 EXPECT_EQ_Ribbon(BloomHash(FilterData()), 1942323379);
876
877 EXPECT_EQ_FastBloom("16,126,133,422,466,472,813,1002,1035", FirstFPs(9));
878 EXPECT_EQ_Ribbon("33,95,360,589,737,911,990,1048,1081,1414", FirstFPs(10));
879
880 // With new fractional bits_per_key, check that we are rounding to
881 // whole bits per key for old Bloom filters but fractional for
882 // new Bloom filter.
883 ResetPolicy(9.5);
884 for (int key = 1; key < 2088; key++) {
885 Add(Key(key, buffer));
886 }
887 Build();
888 EXPECT_EQ_Bloom(GetNumProbesFromFilterData(), 6);
889 EXPECT_EQ_Ribbon(GetRibbonSeedFromFilterData(), 184);
890
891 EXPECT_EQ_LegacyBloom(
892 BloomHash(FilterData()),
893 /*SAME*/ SelectByCacheLineSize(2885052954U, 769447944, 4175124908U));
894 EXPECT_EQ_FastBloom(BloomHash(FilterData()), 3166884174U);
895 EXPECT_EQ_Ribbon(BloomHash(FilterData()), 1148258663);
896
897 EXPECT_EQ_FastBloom("126,156,367,444,458,791,813,976,1015", FirstFPs(9));
898 EXPECT_EQ_Ribbon("33,54,95,360,589,693,737,911,990,1048", FirstFPs(10));
899
900 ResetPolicy(10.499);
901 for (int key = 1; key < 2088; key++) {
902 Add(Key(key, buffer));
903 }
904 Build();
905 EXPECT_EQ_LegacyBloom(GetNumProbesFromFilterData(), 6);
906 EXPECT_EQ_FastBloom(GetNumProbesFromFilterData(), 7);
907 EXPECT_EQ_Ribbon(GetRibbonSeedFromFilterData(), 184);
908
909 EXPECT_EQ_LegacyBloom(
910 BloomHash(FilterData()),
911 /*SAME*/ SelectByCacheLineSize(2885052954U, 769447944, 4175124908U));
912 EXPECT_EQ_FastBloom(BloomHash(FilterData()), 4098502778U);
913 EXPECT_EQ_Ribbon(BloomHash(FilterData()), 792138188);
914
915 EXPECT_EQ_FastBloom("16,236,240,472,1015,1045,1111,1409,1465", FirstFPs(9));
916 EXPECT_EQ_Ribbon("33,95,360,589,737,990,1048,1081,1414,1643", FirstFPs(10));
917
918 ResetPolicy();
919 }
920
921 // A helper class for testing custom or corrupt filter bits as read by
922 // built-in FilterBitsReaders.
923 struct RawFilterTester {
924 // Buffer, from which we always return a tail Slice, so the
925 // last five bytes are always the metadata bytes.
926 std::array<char, 3000> data_;
927 // Points five bytes from the end
928 char* metadata_ptr_;
929
RawFilterTesterROCKSDB_NAMESPACE::RawFilterTester930 RawFilterTester() : metadata_ptr_(&*(data_.end() - 5)) {}
931
ResetNoFillROCKSDB_NAMESPACE::RawFilterTester932 Slice ResetNoFill(uint32_t len_without_metadata, uint32_t num_lines,
933 uint32_t num_probes) {
934 metadata_ptr_[0] = static_cast<char>(num_probes);
935 EncodeFixed32(metadata_ptr_ + 1, num_lines);
936 uint32_t len = len_without_metadata + /*metadata*/ 5;
937 assert(len <= data_.size());
938 return Slice(metadata_ptr_ - len_without_metadata, len);
939 }
940
ResetROCKSDB_NAMESPACE::RawFilterTester941 Slice Reset(uint32_t len_without_metadata, uint32_t num_lines,
942 uint32_t num_probes, bool fill_ones) {
943 data_.fill(fill_ones ? 0xff : 0);
944 return ResetNoFill(len_without_metadata, num_lines, num_probes);
945 }
946
ResetWeirdFillROCKSDB_NAMESPACE::RawFilterTester947 Slice ResetWeirdFill(uint32_t len_without_metadata, uint32_t num_lines,
948 uint32_t num_probes) {
949 for (uint32_t i = 0; i < data_.size(); ++i) {
950 data_[i] = static_cast<char>(0x7b7b >> (i % 7));
951 }
952 return ResetNoFill(len_without_metadata, num_lines, num_probes);
953 }
954 };
955
TEST_P(FullBloomTest,RawSchema)956 TEST_P(FullBloomTest, RawSchema) {
957 RawFilterTester cft;
958 // Legacy Bloom configurations
959 // Two probes, about 3/4 bits set: ~50% "FP" rate
960 // One 256-byte cache line.
961 OpenRaw(cft.ResetWeirdFill(256, 1, 2));
962 EXPECT_EQ(uint64_t{11384799501900898790U}, PackedMatches());
963
964 // Two 128-byte cache lines.
965 OpenRaw(cft.ResetWeirdFill(256, 2, 2));
966 EXPECT_EQ(uint64_t{10157853359773492589U}, PackedMatches());
967
968 // Four 64-byte cache lines.
969 OpenRaw(cft.ResetWeirdFill(256, 4, 2));
970 EXPECT_EQ(uint64_t{7123594913907464682U}, PackedMatches());
971
972 // Fast local Bloom configurations (marker 255 -> -1)
973 // Two probes, about 3/4 bits set: ~50% "FP" rate
974 // Four 64-byte cache lines.
975 OpenRaw(cft.ResetWeirdFill(256, 2U << 8, 255));
976 EXPECT_EQ(uint64_t{9957045189927952471U}, PackedMatches());
977
978 // Ribbon configurations (marker 254 -> -2)
979
980 // Even though the builder never builds configurations this
981 // small (preferring Bloom), we can test that the configuration
982 // can be read, for possible future-proofing.
983
984 // 256 slots, one result column = 32 bytes (2 blocks, seed 0)
985 // ~50% FP rate:
986 // 0b0101010111110101010000110000011011011111100100001110010011101010
987 OpenRaw(cft.ResetWeirdFill(32, 2U << 8, 254));
988 EXPECT_EQ(uint64_t{6193930559317665002U}, PackedMatches());
989
990 // 256 slots, three-to-four result columns = 112 bytes
991 // ~ 1 in 10 FP rate:
992 // 0b0000000000100000000000000000000001000001000000010000101000000000
993 OpenRaw(cft.ResetWeirdFill(112, 2U << 8, 254));
994 EXPECT_EQ(uint64_t{9007200345328128U}, PackedMatches());
995 }
996
TEST_P(FullBloomTest,CorruptFilters)997 TEST_P(FullBloomTest, CorruptFilters) {
998 RawFilterTester cft;
999
1000 for (bool fill : {false, true}) {
1001 // Legacy Bloom configurations
1002 // Good filter bits - returns same as fill
1003 OpenRaw(cft.Reset(CACHE_LINE_SIZE, 1, 6, fill));
1004 ASSERT_EQ(fill, Matches("hello"));
1005 ASSERT_EQ(fill, Matches("world"));
1006
1007 // Good filter bits - returns same as fill
1008 OpenRaw(cft.Reset(CACHE_LINE_SIZE * 3, 3, 6, fill));
1009 ASSERT_EQ(fill, Matches("hello"));
1010 ASSERT_EQ(fill, Matches("world"));
1011
1012 // Good filter bits - returns same as fill
1013 // 256 is unusual but legal cache line size
1014 OpenRaw(cft.Reset(256 * 3, 3, 6, fill));
1015 ASSERT_EQ(fill, Matches("hello"));
1016 ASSERT_EQ(fill, Matches("world"));
1017
1018 // Good filter bits - returns same as fill
1019 // 30 should be max num_probes
1020 OpenRaw(cft.Reset(CACHE_LINE_SIZE, 1, 30, fill));
1021 ASSERT_EQ(fill, Matches("hello"));
1022 ASSERT_EQ(fill, Matches("world"));
1023
1024 // Good filter bits - returns same as fill
1025 // 1 should be min num_probes
1026 OpenRaw(cft.Reset(CACHE_LINE_SIZE, 1, 1, fill));
1027 ASSERT_EQ(fill, Matches("hello"));
1028 ASSERT_EQ(fill, Matches("world"));
1029
1030 // Type 1 trivial filter bits - returns true as if FP by zero probes
1031 OpenRaw(cft.Reset(CACHE_LINE_SIZE, 1, 0, fill));
1032 ASSERT_TRUE(Matches("hello"));
1033 ASSERT_TRUE(Matches("world"));
1034
1035 // Type 2 trivial filter bits - returns false as if built from zero keys
1036 OpenRaw(cft.Reset(0, 0, 6, fill));
1037 ASSERT_FALSE(Matches("hello"));
1038 ASSERT_FALSE(Matches("world"));
1039
1040 // Type 2 trivial filter bits - returns false as if built from zero keys
1041 OpenRaw(cft.Reset(0, 37, 6, fill));
1042 ASSERT_FALSE(Matches("hello"));
1043 ASSERT_FALSE(Matches("world"));
1044
1045 // Type 2 trivial filter bits - returns false as 0 size trumps 0 probes
1046 OpenRaw(cft.Reset(0, 0, 0, fill));
1047 ASSERT_FALSE(Matches("hello"));
1048 ASSERT_FALSE(Matches("world"));
1049
1050 // Bad filter bits - returns true for safety
1051 // No solution to 0 * x == CACHE_LINE_SIZE
1052 OpenRaw(cft.Reset(CACHE_LINE_SIZE, 0, 6, fill));
1053 ASSERT_TRUE(Matches("hello"));
1054 ASSERT_TRUE(Matches("world"));
1055
1056 // Bad filter bits - returns true for safety
1057 // Can't have 3 * x == 4 for integer x
1058 OpenRaw(cft.Reset(4, 3, 6, fill));
1059 ASSERT_TRUE(Matches("hello"));
1060 ASSERT_TRUE(Matches("world"));
1061
1062 // Bad filter bits - returns true for safety
1063 // 97 bytes is not a power of two, so not a legal cache line size
1064 OpenRaw(cft.Reset(97 * 3, 3, 6, fill));
1065 ASSERT_TRUE(Matches("hello"));
1066 ASSERT_TRUE(Matches("world"));
1067
1068 // Bad filter bits - returns true for safety
1069 // 65 bytes is not a power of two, so not a legal cache line size
1070 OpenRaw(cft.Reset(65 * 3, 3, 6, fill));
1071 ASSERT_TRUE(Matches("hello"));
1072 ASSERT_TRUE(Matches("world"));
1073
1074 // Bad filter bits - returns false as if built from zero keys
1075 // < 5 bytes overall means missing even metadata
1076 OpenRaw(cft.Reset(static_cast<uint32_t>(-1), 3, 6, fill));
1077 ASSERT_FALSE(Matches("hello"));
1078 ASSERT_FALSE(Matches("world"));
1079
1080 OpenRaw(cft.Reset(static_cast<uint32_t>(-5), 3, 6, fill));
1081 ASSERT_FALSE(Matches("hello"));
1082 ASSERT_FALSE(Matches("world"));
1083
1084 // Dubious filter bits - returns same as fill (for now)
1085 // 31 is not a useful num_probes, nor generated by RocksDB unless directly
1086 // using filter bits API without BloomFilterPolicy.
1087 OpenRaw(cft.Reset(CACHE_LINE_SIZE, 1, 31, fill));
1088 ASSERT_EQ(fill, Matches("hello"));
1089 ASSERT_EQ(fill, Matches("world"));
1090
1091 // Dubious filter bits - returns same as fill (for now)
1092 // Similar, with 127, largest positive char
1093 OpenRaw(cft.Reset(CACHE_LINE_SIZE, 1, 127, fill));
1094 ASSERT_EQ(fill, Matches("hello"));
1095 ASSERT_EQ(fill, Matches("world"));
1096
1097 // Dubious filter bits - returns true (for now)
1098 // num_probes set to 128 / -128, lowest negative char
1099 // NB: Bug in implementation interprets this as negative and has same
1100 // effect as zero probes, but effectively reserves negative char values
1101 // for future use.
1102 OpenRaw(cft.Reset(CACHE_LINE_SIZE, 1, 128, fill));
1103 ASSERT_TRUE(Matches("hello"));
1104 ASSERT_TRUE(Matches("world"));
1105
1106 // Dubious filter bits - returns true (for now)
1107 // Similar, with 253 / -3
1108 OpenRaw(cft.Reset(CACHE_LINE_SIZE, 1, 253, fill));
1109 ASSERT_TRUE(Matches("hello"));
1110 ASSERT_TRUE(Matches("world"));
1111
1112 // #########################################################
1113 // Fast local Bloom configurations (marker 255 -> -1)
1114 // Good config with six probes
1115 OpenRaw(cft.Reset(CACHE_LINE_SIZE, 6U << 8, 255, fill));
1116 ASSERT_EQ(fill, Matches("hello"));
1117 ASSERT_EQ(fill, Matches("world"));
1118
1119 // Becomes bad/reserved config (always true) if any other byte set
1120 OpenRaw(cft.Reset(CACHE_LINE_SIZE, (6U << 8) | 1U, 255, fill));
1121 ASSERT_TRUE(Matches("hello"));
1122 ASSERT_TRUE(Matches("world"));
1123
1124 OpenRaw(cft.Reset(CACHE_LINE_SIZE, (6U << 8) | (1U << 16), 255, fill));
1125 ASSERT_TRUE(Matches("hello"));
1126 ASSERT_TRUE(Matches("world"));
1127
1128 OpenRaw(cft.Reset(CACHE_LINE_SIZE, (6U << 8) | (1U << 24), 255, fill));
1129 ASSERT_TRUE(Matches("hello"));
1130 ASSERT_TRUE(Matches("world"));
1131
1132 // Good config, max 30 probes
1133 OpenRaw(cft.Reset(CACHE_LINE_SIZE, 30U << 8, 255, fill));
1134 ASSERT_EQ(fill, Matches("hello"));
1135 ASSERT_EQ(fill, Matches("world"));
1136
1137 // Bad/reserved config (always true) if more than 30
1138 OpenRaw(cft.Reset(CACHE_LINE_SIZE, 31U << 8, 255, fill));
1139 ASSERT_TRUE(Matches("hello"));
1140 ASSERT_TRUE(Matches("world"));
1141
1142 OpenRaw(cft.Reset(CACHE_LINE_SIZE, 33U << 8, 255, fill));
1143 ASSERT_TRUE(Matches("hello"));
1144 ASSERT_TRUE(Matches("world"));
1145
1146 OpenRaw(cft.Reset(CACHE_LINE_SIZE, 66U << 8, 255, fill));
1147 ASSERT_TRUE(Matches("hello"));
1148 ASSERT_TRUE(Matches("world"));
1149
1150 OpenRaw(cft.Reset(CACHE_LINE_SIZE, 130U << 8, 255, fill));
1151 ASSERT_TRUE(Matches("hello"));
1152 ASSERT_TRUE(Matches("world"));
1153 }
1154
1155 // #########################################################
1156 // Ribbon configurations (marker 254 -> -2)
1157 // ("fill" doesn't work to detect good configurations, we just
1158 // have to rely on TN probability)
1159
1160 // Good: 2 blocks * 16 bytes / segment * 4 columns = 128 bytes
1161 // seed = 123
1162 OpenRaw(cft.Reset(128, (2U << 8) + 123U, 254, false));
1163 ASSERT_FALSE(Matches("hello"));
1164 ASSERT_FALSE(Matches("world"));
1165
1166 // Good: 2 blocks * 16 bytes / segment * 8 columns = 256 bytes
1167 OpenRaw(cft.Reset(256, (2U << 8) + 123U, 254, false));
1168 ASSERT_FALSE(Matches("hello"));
1169 ASSERT_FALSE(Matches("world"));
1170
1171 // Surprisingly OK: 5000 blocks (640,000 slots) in only 1024 bits
1172 // -> average close to 0 columns
1173 OpenRaw(cft.Reset(128, (5000U << 8) + 123U, 254, false));
1174 // *Almost* all FPs
1175 ASSERT_TRUE(Matches("hello"));
1176 ASSERT_TRUE(Matches("world"));
1177 // Need many queries to find a "true negative"
1178 for (int i = 0; Matches(ToString(i)); ++i) {
1179 ASSERT_LT(i, 1000);
1180 }
1181
1182 // Bad: 1 block not allowed (for implementation detail reasons)
1183 OpenRaw(cft.Reset(128, (1U << 8) + 123U, 254, false));
1184 ASSERT_TRUE(Matches("hello"));
1185 ASSERT_TRUE(Matches("world"));
1186
1187 // Bad: 0 blocks not allowed
1188 OpenRaw(cft.Reset(128, (0U << 8) + 123U, 254, false));
1189 ASSERT_TRUE(Matches("hello"));
1190 ASSERT_TRUE(Matches("world"));
1191 }
1192
1193 INSTANTIATE_TEST_CASE_P(Full, FullBloomTest,
1194 testing::Values(BloomFilterPolicy::kLegacyBloom,
1195 BloomFilterPolicy::kFastLocalBloom,
1196 BloomFilterPolicy::kStandard128Ribbon));
1197
1198 } // namespace ROCKSDB_NAMESPACE
1199
main(int argc,char ** argv)1200 int main(int argc, char** argv) {
1201 ::testing::InitGoogleTest(&argc, argv);
1202 ParseCommandLineFlags(&argc, &argv, true);
1203
1204 return RUN_ALL_TESTS();
1205 }
1206
1207 #endif // GFLAGS
1208