1 //  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
2 //  This source code is licensed under both the GPLv2 (found in the
3 //  COPYING file in the root directory) and Apache 2.0 License
4 //  (found in the LICENSE.Apache file in the root directory).
5 //
6 // Copyright (c) 2012 The LevelDB Authors. All rights reserved.
7 // Use of this source code is governed by a BSD-style license that can be
8 // found in the LICENSE file. See the AUTHORS file for names of contributors.
9 
10 #ifndef GFLAGS
11 #include <cstdio>
main()12 int main() {
13   fprintf(stderr, "Please install gflags to run this test... Skipping...\n");
14   return 0;
15 }
16 #else
17 
18 #include <array>
19 #include <cmath>
20 #include <vector>
21 
22 #include "memory/arena.h"
23 #include "port/jemalloc_helper.h"
24 #include "rocksdb/filter_policy.h"
25 #include "table/block_based/filter_policy_internal.h"
26 #include "test_util/testharness.h"
27 #include "test_util/testutil.h"
28 #include "util/gflags_compat.h"
29 #include "util/hash.h"
30 
31 using GFLAGS_NAMESPACE::ParseCommandLineFlags;
32 
33 // The test is not fully designed for bits_per_key other than 10, but with
34 // this parameter you can easily explore the behavior of other bits_per_key.
35 // See also filter_bench.
36 DEFINE_int32(bits_per_key, 10, "");
37 
38 namespace ROCKSDB_NAMESPACE {
39 
40 static const int kVerbose = 1;
41 
Key(int i,char * buffer)42 static Slice Key(int i, char* buffer) {
43   std::string s;
44   PutFixed32(&s, static_cast<uint32_t>(i));
45   memcpy(buffer, s.c_str(), sizeof(i));
46   return Slice(buffer, sizeof(i));
47 }
48 
NextLength(int length)49 static int NextLength(int length) {
50   if (length < 10) {
51     length += 1;
52   } else if (length < 100) {
53     length += 10;
54   } else if (length < 1000) {
55     length += 100;
56   } else {
57     length += 1000;
58   }
59   return length;
60 }
61 
62 class BlockBasedBloomTest : public testing::Test {
63  private:
64   std::unique_ptr<const FilterPolicy> policy_;
65   std::string filter_;
66   std::vector<std::string> keys_;
67 
68  public:
BlockBasedBloomTest()69   BlockBasedBloomTest() { ResetPolicy(); }
70 
Reset()71   void Reset() {
72     keys_.clear();
73     filter_.clear();
74   }
75 
ResetPolicy(double bits_per_key)76   void ResetPolicy(double bits_per_key) {
77     policy_.reset(new BloomFilterPolicy(bits_per_key,
78                                         BloomFilterPolicy::kDeprecatedBlock));
79     Reset();
80   }
81 
ResetPolicy()82   void ResetPolicy() { ResetPolicy(FLAGS_bits_per_key); }
83 
Add(const Slice & s)84   void Add(const Slice& s) {
85     keys_.push_back(s.ToString());
86   }
87 
Build()88   void Build() {
89     std::vector<Slice> key_slices;
90     for (size_t i = 0; i < keys_.size(); i++) {
91       key_slices.push_back(Slice(keys_[i]));
92     }
93     filter_.clear();
94     policy_->CreateFilter(&key_slices[0], static_cast<int>(key_slices.size()),
95                           &filter_);
96     keys_.clear();
97     if (kVerbose >= 2) DumpFilter();
98   }
99 
FilterSize() const100   size_t FilterSize() const {
101     return filter_.size();
102   }
103 
FilterData() const104   Slice FilterData() const { return Slice(filter_); }
105 
DumpFilter()106   void DumpFilter() {
107     fprintf(stderr, "F(");
108     for (size_t i = 0; i+1 < filter_.size(); i++) {
109       const unsigned int c = static_cast<unsigned int>(filter_[i]);
110       for (int j = 0; j < 8; j++) {
111         fprintf(stderr, "%c", (c & (1 <<j)) ? '1' : '.');
112       }
113     }
114     fprintf(stderr, ")\n");
115   }
116 
Matches(const Slice & s)117   bool Matches(const Slice& s) {
118     if (!keys_.empty()) {
119       Build();
120     }
121     return policy_->KeyMayMatch(s, filter_);
122   }
123 
FalsePositiveRate()124   double FalsePositiveRate() {
125     char buffer[sizeof(int)];
126     int result = 0;
127     for (int i = 0; i < 10000; i++) {
128       if (Matches(Key(i + 1000000000, buffer))) {
129         result++;
130       }
131     }
132     return result / 10000.0;
133   }
134 };
135 
TEST_F(BlockBasedBloomTest,EmptyFilter)136 TEST_F(BlockBasedBloomTest, EmptyFilter) {
137   ASSERT_TRUE(! Matches("hello"));
138   ASSERT_TRUE(! Matches("world"));
139 }
140 
TEST_F(BlockBasedBloomTest,Small)141 TEST_F(BlockBasedBloomTest, Small) {
142   Add("hello");
143   Add("world");
144   ASSERT_TRUE(Matches("hello"));
145   ASSERT_TRUE(Matches("world"));
146   ASSERT_TRUE(! Matches("x"));
147   ASSERT_TRUE(! Matches("foo"));
148 }
149 
TEST_F(BlockBasedBloomTest,VaryingLengths)150 TEST_F(BlockBasedBloomTest, VaryingLengths) {
151   char buffer[sizeof(int)];
152 
153   // Count number of filters that significantly exceed the false positive rate
154   int mediocre_filters = 0;
155   int good_filters = 0;
156 
157   for (int length = 1; length <= 10000; length = NextLength(length)) {
158     Reset();
159     for (int i = 0; i < length; i++) {
160       Add(Key(i, buffer));
161     }
162     Build();
163 
164     ASSERT_LE(FilterSize(), (size_t)((length * FLAGS_bits_per_key / 8) + 40))
165         << length;
166 
167     // All added keys must match
168     for (int i = 0; i < length; i++) {
169       ASSERT_TRUE(Matches(Key(i, buffer)))
170           << "Length " << length << "; key " << i;
171     }
172 
173     // Check false positive rate
174     double rate = FalsePositiveRate();
175     if (kVerbose >= 1) {
176       fprintf(stderr, "False positives: %5.2f%% @ length = %6d ; bytes = %6d\n",
177               rate*100.0, length, static_cast<int>(FilterSize()));
178     }
179     if (FLAGS_bits_per_key == 10) {
180       ASSERT_LE(rate, 0.02);  // Must not be over 2%
181       if (rate > 0.0125) {
182         mediocre_filters++;  // Allowed, but not too often
183       } else {
184         good_filters++;
185       }
186     }
187   }
188   if (FLAGS_bits_per_key == 10 && kVerbose >= 1) {
189     fprintf(stderr, "Filters: %d good, %d mediocre\n",
190             good_filters, mediocre_filters);
191   }
192   ASSERT_LE(mediocre_filters, good_filters/5);
193 }
194 
195 // Ensure the implementation doesn't accidentally change in an
196 // incompatible way
TEST_F(BlockBasedBloomTest,Schema)197 TEST_F(BlockBasedBloomTest, Schema) {
198   char buffer[sizeof(int)];
199 
200   ResetPolicy(8);  // num_probes = 5
201   for (int key = 0; key < 87; key++) {
202     Add(Key(key, buffer));
203   }
204   Build();
205   ASSERT_EQ(BloomHash(FilterData()), 3589896109U);
206 
207   ResetPolicy(9);  // num_probes = 6
208   for (int key = 0; key < 87; key++) {
209     Add(Key(key, buffer));
210   }
211   Build();
212   ASSERT_EQ(BloomHash(FilterData()), 969445585U);
213 
214   ResetPolicy(11);  // num_probes = 7
215   for (int key = 0; key < 87; key++) {
216     Add(Key(key, buffer));
217   }
218   Build();
219   ASSERT_EQ(BloomHash(FilterData()), 1694458207U);
220 
221   ResetPolicy(10);  // num_probes = 6
222   for (int key = 0; key < 87; key++) {
223     Add(Key(key, buffer));
224   }
225   Build();
226   ASSERT_EQ(BloomHash(FilterData()), 2373646410U);
227 
228   ResetPolicy(10);
229   for (int key = /*CHANGED*/ 1; key < 87; key++) {
230     Add(Key(key, buffer));
231   }
232   Build();
233   ASSERT_EQ(BloomHash(FilterData()), 1908442116U);
234 
235   ResetPolicy(10);
236   for (int key = 1; key < /*CHANGED*/ 88; key++) {
237     Add(Key(key, buffer));
238   }
239   Build();
240   ASSERT_EQ(BloomHash(FilterData()), 3057004015U);
241 
242   // With new fractional bits_per_key, check that we are rounding to
243   // whole bits per key for old Bloom filters.
244   ResetPolicy(9.5);  // Treated as 10
245   for (int key = 1; key < 88; key++) {
246     Add(Key(key, buffer));
247   }
248   Build();
249   ASSERT_EQ(BloomHash(FilterData()), /*SAME*/ 3057004015U);
250 
251   ResetPolicy(10.499);  // Treated as 10
252   for (int key = 1; key < 88; key++) {
253     Add(Key(key, buffer));
254   }
255   Build();
256   ASSERT_EQ(BloomHash(FilterData()), /*SAME*/ 3057004015U);
257 
258   ResetPolicy();
259 }
260 
261 // Different bits-per-byte
262 
263 class FullBloomTest : public testing::TestWithParam<BloomFilterPolicy::Mode> {
264  protected:
265   BlockBasedTableOptions table_options_;
266 
267  private:
268   std::shared_ptr<const FilterPolicy>& policy_;
269   std::unique_ptr<FilterBitsBuilder> bits_builder_;
270   std::unique_ptr<FilterBitsReader> bits_reader_;
271   std::unique_ptr<const char[]> buf_;
272   size_t filter_size_;
273 
274  public:
FullBloomTest()275   FullBloomTest() : policy_(table_options_.filter_policy), filter_size_(0) {
276     ResetPolicy();
277   }
278 
GetBuiltinFilterBitsBuilder()279   BuiltinFilterBitsBuilder* GetBuiltinFilterBitsBuilder() {
280     // Throws on bad cast
281     return &dynamic_cast<BuiltinFilterBitsBuilder&>(*bits_builder_);
282   }
283 
GetBloomFilterPolicy()284   const BloomFilterPolicy* GetBloomFilterPolicy() {
285     // Throws on bad cast
286     return &dynamic_cast<const BloomFilterPolicy&>(*policy_);
287   }
288 
Reset()289   void Reset() {
290     bits_builder_.reset(BloomFilterPolicy::GetBuilderFromContext(
291         FilterBuildingContext(table_options_)));
292     bits_reader_.reset(nullptr);
293     buf_.reset(nullptr);
294     filter_size_ = 0;
295   }
296 
ResetPolicy(double bits_per_key)297   void ResetPolicy(double bits_per_key) {
298     policy_.reset(new BloomFilterPolicy(bits_per_key, GetParam()));
299     Reset();
300   }
301 
ResetPolicy()302   void ResetPolicy() { ResetPolicy(FLAGS_bits_per_key); }
303 
Add(const Slice & s)304   void Add(const Slice& s) {
305     bits_builder_->AddKey(s);
306   }
307 
OpenRaw(const Slice & s)308   void OpenRaw(const Slice& s) {
309     bits_reader_.reset(policy_->GetFilterBitsReader(s));
310   }
311 
Build()312   void Build() {
313     Slice filter = bits_builder_->Finish(&buf_);
314     bits_reader_.reset(policy_->GetFilterBitsReader(filter));
315     filter_size_ = filter.size();
316   }
317 
FilterSize() const318   size_t FilterSize() const {
319     return filter_size_;
320   }
321 
FilterData()322   Slice FilterData() { return Slice(buf_.get(), filter_size_); }
323 
GetNumProbesFromFilterData()324   int GetNumProbesFromFilterData() {
325     assert(filter_size_ >= 5);
326     int8_t raw_num_probes = static_cast<int8_t>(buf_.get()[filter_size_ - 5]);
327     if (raw_num_probes == -1) {  // New bloom filter marker
328       return static_cast<uint8_t>(buf_.get()[filter_size_ - 3]);
329     } else {
330       return raw_num_probes;
331     }
332   }
333 
GetRibbonSeedFromFilterData()334   int GetRibbonSeedFromFilterData() {
335     assert(filter_size_ >= 5);
336     // Check for ribbon marker
337     assert(-2 == static_cast<int8_t>(buf_.get()[filter_size_ - 5]));
338     return static_cast<uint8_t>(buf_.get()[filter_size_ - 4]);
339   }
340 
Matches(const Slice & s)341   bool Matches(const Slice& s) {
342     if (bits_reader_ == nullptr) {
343       Build();
344     }
345     return bits_reader_->MayMatch(s);
346   }
347 
348   // Provides a kind of fingerprint on the Bloom filter's
349   // behavior, for reasonbly high FP rates.
PackedMatches()350   uint64_t PackedMatches() {
351     char buffer[sizeof(int)];
352     uint64_t result = 0;
353     for (int i = 0; i < 64; i++) {
354       if (Matches(Key(i + 12345, buffer))) {
355         result |= uint64_t{1} << i;
356       }
357     }
358     return result;
359   }
360 
361   // Provides a kind of fingerprint on the Bloom filter's
362   // behavior, for lower FP rates.
FirstFPs(int count)363   std::string FirstFPs(int count) {
364     char buffer[sizeof(int)];
365     std::string rv;
366     int fp_count = 0;
367     for (int i = 0; i < 1000000; i++) {
368       // Pack four match booleans into each hexadecimal digit
369       if (Matches(Key(i + 1000000, buffer))) {
370         ++fp_count;
371         rv += std::to_string(i);
372         if (fp_count == count) {
373           break;
374         }
375         rv += ',';
376       }
377     }
378     return rv;
379   }
380 
FalsePositiveRate()381   double FalsePositiveRate() {
382     char buffer[sizeof(int)];
383     int result = 0;
384     for (int i = 0; i < 10000; i++) {
385       if (Matches(Key(i + 1000000000, buffer))) {
386         result++;
387       }
388     }
389     return result / 10000.0;
390   }
391 };
392 
TEST_P(FullBloomTest,FilterSize)393 TEST_P(FullBloomTest, FilterSize) {
394   // In addition to checking the consistency of space computation, we are
395   // checking that denoted and computed doubles are interpreted as expected
396   // as bits_per_key values.
397   bool some_computed_less_than_denoted = false;
398   // Note: enforced minimum is 1 bit per key (1000 millibits), and enforced
399   // maximum is 100 bits per key (100000 millibits).
400   for (auto bpk :
401        std::vector<std::pair<double, int> >{{-HUGE_VAL, 1000},
402                                             {-INFINITY, 1000},
403                                             {0.0, 1000},
404                                             {1.234, 1234},
405                                             {3.456, 3456},
406                                             {9.5, 9500},
407                                             {10.0, 10000},
408                                             {10.499, 10499},
409                                             {21.345, 21345},
410                                             {99.999, 99999},
411                                             {1234.0, 100000},
412                                             {HUGE_VAL, 100000},
413                                             {INFINITY, 100000},
414                                             {NAN, 100000}}) {
415     ResetPolicy(bpk.first);
416     auto bfp = GetBloomFilterPolicy();
417     EXPECT_EQ(bpk.second, bfp->GetMillibitsPerKey());
418     EXPECT_EQ((bpk.second + 500) / 1000, bfp->GetWholeBitsPerKey());
419 
420     double computed = bpk.first;
421     // This transforms e.g. 9.5 -> 9.499999999999998, which we still
422     // round to 10 for whole bits per key.
423     computed += 0.5;
424     computed /= 1234567.0;
425     computed *= 1234567.0;
426     computed -= 0.5;
427     some_computed_less_than_denoted |= (computed < bpk.first);
428     ResetPolicy(computed);
429     bfp = GetBloomFilterPolicy();
430     EXPECT_EQ(bpk.second, bfp->GetMillibitsPerKey());
431     EXPECT_EQ((bpk.second + 500) / 1000, bfp->GetWholeBitsPerKey());
432 
433     auto bits_builder = GetBuiltinFilterBitsBuilder();
434 
435     size_t n = 1;
436     size_t space = 0;
437     for (; n < 1000000; n += 1 + n / 1000) {
438       // Ensure consistency between CalculateSpace and ApproximateNumEntries
439       space = bits_builder->CalculateSpace(n);
440       size_t n2 = bits_builder->ApproximateNumEntries(space);
441       EXPECT_GE(n2, n);
442       size_t space2 = bits_builder->CalculateSpace(n2);
443       if (n > 12000 && GetParam() == BloomFilterPolicy::kStandard128Ribbon) {
444         // TODO(peterd): better approximation?
445         EXPECT_GE(space2, space);
446         EXPECT_LE(space2 * 0.998, space * 1.0);
447       } else {
448         EXPECT_EQ(space2, space);
449       }
450     }
451     // Until size_t overflow
452     for (; n < (n + n / 3); n += n / 3) {
453       // Ensure space computation is not overflowing; capped is OK
454       size_t space2 = bits_builder->CalculateSpace(n);
455       EXPECT_GE(space2, space);
456       space = space2;
457     }
458   }
459   // Check that the compiler hasn't optimized our computation into nothing
460   EXPECT_TRUE(some_computed_less_than_denoted);
461   ResetPolicy();
462 }
463 
TEST_P(FullBloomTest,FullEmptyFilter)464 TEST_P(FullBloomTest, FullEmptyFilter) {
465   // Empty filter is not match, at this level
466   ASSERT_TRUE(!Matches("hello"));
467   ASSERT_TRUE(!Matches("world"));
468 }
469 
TEST_P(FullBloomTest,FullSmall)470 TEST_P(FullBloomTest, FullSmall) {
471   Add("hello");
472   Add("world");
473   ASSERT_TRUE(Matches("hello"));
474   ASSERT_TRUE(Matches("world"));
475   ASSERT_TRUE(!Matches("x"));
476   ASSERT_TRUE(!Matches("foo"));
477 }
478 
TEST_P(FullBloomTest,FullVaryingLengths)479 TEST_P(FullBloomTest, FullVaryingLengths) {
480   char buffer[sizeof(int)];
481 
482   // Count number of filters that significantly exceed the false positive rate
483   int mediocre_filters = 0;
484   int good_filters = 0;
485 
486   for (int length = 1; length <= 10000; length = NextLength(length)) {
487     Reset();
488     for (int i = 0; i < length; i++) {
489       Add(Key(i, buffer));
490     }
491     Build();
492 
493     EXPECT_LE(FilterSize(), (size_t)((length * FLAGS_bits_per_key / 8) +
494                                      CACHE_LINE_SIZE * 2 + 5));
495 
496     // All added keys must match
497     for (int i = 0; i < length; i++) {
498       ASSERT_TRUE(Matches(Key(i, buffer)))
499           << "Length " << length << "; key " << i;
500     }
501 
502     // Check false positive rate
503     double rate = FalsePositiveRate();
504     if (kVerbose >= 1) {
505       fprintf(stderr, "False positives: %5.2f%% @ length = %6d ; bytes = %6d\n",
506               rate*100.0, length, static_cast<int>(FilterSize()));
507     }
508     if (FLAGS_bits_per_key == 10) {
509       EXPECT_LE(rate, 0.02);  // Must not be over 2%
510       if (rate > 0.0125) {
511         mediocre_filters++;  // Allowed, but not too often
512       } else {
513         good_filters++;
514       }
515     }
516   }
517   if (kVerbose >= 1) {
518     fprintf(stderr, "Filters: %d good, %d mediocre\n",
519             good_filters, mediocre_filters);
520   }
521   EXPECT_LE(mediocre_filters, good_filters / 5);
522 }
523 
TEST_P(FullBloomTest,OptimizeForMemory)524 TEST_P(FullBloomTest, OptimizeForMemory) {
525   char buffer[sizeof(int)];
526   for (bool offm : {true, false}) {
527     table_options_.optimize_filters_for_memory = offm;
528     ResetPolicy();
529     Random32 rnd(12345);
530     uint64_t total_size = 0;
531     uint64_t total_mem = 0;
532     int64_t total_keys = 0;
533     double total_fp_rate = 0;
534     constexpr int nfilters = 100;
535     for (int i = 0; i < nfilters; ++i) {
536       int nkeys = static_cast<int>(rnd.Uniformish(10000)) + 100;
537       Reset();
538       for (int j = 0; j < nkeys; ++j) {
539         Add(Key(j, buffer));
540       }
541       Build();
542       size_t size = FilterData().size();
543       total_size += size;
544       // optimize_filters_for_memory currently depends on malloc_usable_size
545       // but we run the rest of the test to ensure no bad behavior without it.
546 #ifdef ROCKSDB_MALLOC_USABLE_SIZE
547       size = malloc_usable_size(const_cast<char*>(FilterData().data()));
548 #endif  // ROCKSDB_MALLOC_USABLE_SIZE
549       total_mem += size;
550       total_keys += nkeys;
551       total_fp_rate += FalsePositiveRate();
552     }
553     if (FLAGS_bits_per_key == 10) {
554       EXPECT_LE(total_fp_rate / double{nfilters}, 0.011);
555       EXPECT_GE(total_fp_rate / double{nfilters}, 0.008);
556     }
557 
558     int64_t ex_min_total_size = int64_t{FLAGS_bits_per_key} * total_keys / 8;
559     if (GetParam() == BloomFilterPolicy::kStandard128Ribbon) {
560       // ~ 30% savings vs. Bloom filter
561       ex_min_total_size = 7 * ex_min_total_size / 10;
562     }
563     EXPECT_GE(static_cast<int64_t>(total_size), ex_min_total_size);
564 
565     int64_t blocked_bloom_overhead = nfilters * (CACHE_LINE_SIZE + 5);
566     if (GetParam() == BloomFilterPolicy::kLegacyBloom) {
567       // this config can add extra cache line to make odd number
568       blocked_bloom_overhead += nfilters * CACHE_LINE_SIZE;
569     }
570 
571     EXPECT_GE(total_mem, total_size);
572 
573     // optimize_filters_for_memory not implemented with legacy Bloom
574     if (offm && GetParam() != BloomFilterPolicy::kLegacyBloom) {
575       // This value can include a small extra penalty for kExtraPadding
576       fprintf(stderr, "Internal fragmentation (optimized): %g%%\n",
577               (total_mem - total_size) * 100.0 / total_size);
578       // Less than 1% internal fragmentation
579       EXPECT_LE(total_mem, total_size * 101 / 100);
580       // Up to 2% storage penalty
581       EXPECT_LE(static_cast<int64_t>(total_size),
582                 ex_min_total_size * 102 / 100 + blocked_bloom_overhead);
583     } else {
584       fprintf(stderr, "Internal fragmentation (not optimized): %g%%\n",
585               (total_mem - total_size) * 100.0 / total_size);
586       // TODO: add control checks for more allocators?
587 #ifdef ROCKSDB_JEMALLOC
588       fprintf(stderr, "Jemalloc detected? %d\n", HasJemalloc());
589       if (HasJemalloc()) {
590 #ifdef ROCKSDB_MALLOC_USABLE_SIZE
591         // More than 5% internal fragmentation
592         EXPECT_GE(total_mem, total_size * 105 / 100);
593 #endif  // ROCKSDB_MALLOC_USABLE_SIZE
594       }
595 #endif  // ROCKSDB_JEMALLOC
596       // No storage penalty, just usual overhead
597       EXPECT_LE(static_cast<int64_t>(total_size),
598                 ex_min_total_size + blocked_bloom_overhead);
599     }
600   }
601 }
602 
603 namespace {
SelectByCacheLineSize(uint32_t for64,uint32_t for128,uint32_t for256)604 inline uint32_t SelectByCacheLineSize(uint32_t for64, uint32_t for128,
605                                       uint32_t for256) {
606   (void)for64;
607   (void)for128;
608   (void)for256;
609 #if CACHE_LINE_SIZE == 64
610   return for64;
611 #elif CACHE_LINE_SIZE == 128
612   return for128;
613 #elif CACHE_LINE_SIZE == 256
614   return for256;
615 #else
616 #error "CACHE_LINE_SIZE unknown or unrecognized"
617 #endif
618 }
619 }  // namespace
620 
621 // Ensure the implementation doesn't accidentally change in an
622 // incompatible way. This test doesn't check the reading side
623 // (FirstFPs/PackedMatches) for LegacyBloom because it requires the
624 // ability to read filters generated using other cache line sizes.
625 // See RawSchema.
TEST_P(FullBloomTest,Schema)626 TEST_P(FullBloomTest, Schema) {
627 #define EXPECT_EQ_Bloom(a, b)                                  \
628   {                                                            \
629     if (GetParam() != BloomFilterPolicy::kStandard128Ribbon) { \
630       EXPECT_EQ(a, b);                                         \
631     }                                                          \
632   }
633 #define EXPECT_EQ_Ribbon(a, b)                                 \
634   {                                                            \
635     if (GetParam() == BloomFilterPolicy::kStandard128Ribbon) { \
636       EXPECT_EQ(a, b);                                         \
637     }                                                          \
638   }
639 #define EXPECT_EQ_FastBloom(a, b)                           \
640   {                                                         \
641     if (GetParam() == BloomFilterPolicy::kFastLocalBloom) { \
642       EXPECT_EQ(a, b);                                      \
643     }                                                       \
644   }
645 #define EXPECT_EQ_LegacyBloom(a, b)                      \
646   {                                                      \
647     if (GetParam() == BloomFilterPolicy::kLegacyBloom) { \
648       EXPECT_EQ(a, b);                                   \
649     }                                                    \
650   }
651 #define EXPECT_EQ_NotLegacy(a, b)                        \
652   {                                                      \
653     if (GetParam() != BloomFilterPolicy::kLegacyBloom) { \
654       EXPECT_EQ(a, b);                                   \
655     }                                                    \
656   }
657 
658   char buffer[sizeof(int)];
659 
660   // First do a small number of keys, where Ribbon config will fall back on
661   // fast Bloom filter and generate the same data
662   ResetPolicy(5);  // num_probes = 3
663   for (int key = 0; key < 87; key++) {
664     Add(Key(key, buffer));
665   }
666   Build();
667   EXPECT_EQ(GetNumProbesFromFilterData(), 3);
668 
669   EXPECT_EQ_NotLegacy(BloomHash(FilterData()), 4130687756U);
670 
671   EXPECT_EQ_NotLegacy("31,38,40,43,61,83,86,112,125,131", FirstFPs(10));
672 
673   // Now use enough keys so that changing bits / key by 1 is guaranteed to
674   // change number of allocated cache lines. So keys > max cache line bits.
675 
676   // Note that the first attempted Ribbon seed is determined by the hash
677   // of the first key added (for pseudorandomness in practice, determinism in
678   // testing)
679 
680   ResetPolicy(2);  // num_probes = 1
681   for (int key = 0; key < 2087; key++) {
682     Add(Key(key, buffer));
683   }
684   Build();
685   EXPECT_EQ_Bloom(GetNumProbesFromFilterData(), 1);
686   EXPECT_EQ_Ribbon(GetRibbonSeedFromFilterData(), 61);
687 
688   EXPECT_EQ_LegacyBloom(
689       BloomHash(FilterData()),
690       SelectByCacheLineSize(1567096579, 1964771444, 2659542661U));
691   EXPECT_EQ_FastBloom(BloomHash(FilterData()), 3817481309U);
692   EXPECT_EQ_Ribbon(BloomHash(FilterData()), 1705851228);
693 
694   EXPECT_EQ_FastBloom("11,13,17,25,29,30,35,37,45,53", FirstFPs(10));
695   EXPECT_EQ_Ribbon("3,8,10,17,19,20,23,28,31,32", FirstFPs(10));
696 
697   ResetPolicy(3);  // num_probes = 2
698   for (int key = 0; key < 2087; key++) {
699     Add(Key(key, buffer));
700   }
701   Build();
702   EXPECT_EQ_Bloom(GetNumProbesFromFilterData(), 2);
703   EXPECT_EQ_Ribbon(GetRibbonSeedFromFilterData(), 61);
704 
705   EXPECT_EQ_LegacyBloom(
706       BloomHash(FilterData()),
707       SelectByCacheLineSize(2707206547U, 2571983456U, 218344685));
708   EXPECT_EQ_FastBloom(BloomHash(FilterData()), 2807269961U);
709   EXPECT_EQ_Ribbon(BloomHash(FilterData()), 1095342358);
710 
711   EXPECT_EQ_FastBloom("4,15,17,24,27,28,29,53,63,70", FirstFPs(10));
712   EXPECT_EQ_Ribbon("3,17,20,28,32,33,36,43,49,54", FirstFPs(10));
713 
714   ResetPolicy(5);  // num_probes = 3
715   for (int key = 0; key < 2087; key++) {
716     Add(Key(key, buffer));
717   }
718   Build();
719   EXPECT_EQ_Bloom(GetNumProbesFromFilterData(), 3);
720   EXPECT_EQ_Ribbon(GetRibbonSeedFromFilterData(), 61);
721 
722   EXPECT_EQ_LegacyBloom(
723       BloomHash(FilterData()),
724       SelectByCacheLineSize(515748486, 94611728, 2436112214U));
725   EXPECT_EQ_FastBloom(BloomHash(FilterData()), 204628445);
726   EXPECT_EQ_Ribbon(BloomHash(FilterData()), 3971337699U);
727 
728   EXPECT_EQ_FastBloom("15,24,29,39,53,87,89,100,103,104", FirstFPs(10));
729   EXPECT_EQ_Ribbon("3,33,36,43,67,70,76,78,84,102", FirstFPs(10));
730 
731   ResetPolicy(8);  // num_probes = 5
732   for (int key = 0; key < 2087; key++) {
733     Add(Key(key, buffer));
734   }
735   Build();
736   EXPECT_EQ_Bloom(GetNumProbesFromFilterData(), 5);
737   EXPECT_EQ_Ribbon(GetRibbonSeedFromFilterData(), 61);
738 
739   EXPECT_EQ_LegacyBloom(
740       BloomHash(FilterData()),
741       SelectByCacheLineSize(1302145999, 2811644657U, 756553699));
742   EXPECT_EQ_FastBloom(BloomHash(FilterData()), 355564975);
743   EXPECT_EQ_Ribbon(BloomHash(FilterData()), 3651449053U);
744 
745   EXPECT_EQ_FastBloom("16,60,66,126,220,238,244,256,265,287", FirstFPs(10));
746   EXPECT_EQ_Ribbon("33,187,203,296,300,322,411,419,547,582", FirstFPs(10));
747 
748   ResetPolicy(9);  // num_probes = 6
749   for (int key = 0; key < 2087; key++) {
750     Add(Key(key, buffer));
751   }
752   Build();
753   EXPECT_EQ_Bloom(GetNumProbesFromFilterData(), 6);
754   EXPECT_EQ_Ribbon(GetRibbonSeedFromFilterData(), 61);
755 
756   EXPECT_EQ_LegacyBloom(
757       BloomHash(FilterData()),
758       SelectByCacheLineSize(2092755149, 661139132, 1182970461));
759   EXPECT_EQ_FastBloom(BloomHash(FilterData()), 2137566013U);
760   EXPECT_EQ_Ribbon(BloomHash(FilterData()), 1005676675);
761 
762   EXPECT_EQ_FastBloom("156,367,791,872,945,1015,1139,1159,1265", FirstFPs(9));
763   EXPECT_EQ_Ribbon("33,187,203,296,411,419,604,612,615,619", FirstFPs(10));
764 
765   ResetPolicy(11);  // num_probes = 7
766   for (int key = 0; key < 2087; key++) {
767     Add(Key(key, buffer));
768   }
769   Build();
770   EXPECT_EQ_Bloom(GetNumProbesFromFilterData(), 7);
771   EXPECT_EQ_Ribbon(GetRibbonSeedFromFilterData(), 61);
772 
773   EXPECT_EQ_LegacyBloom(
774       BloomHash(FilterData()),
775       SelectByCacheLineSize(3755609649U, 1812694762, 1449142939));
776   EXPECT_EQ_FastBloom(BloomHash(FilterData()), 2561502687U);
777   EXPECT_EQ_Ribbon(BloomHash(FilterData()), 3129900846U);
778 
779   EXPECT_EQ_FastBloom("34,74,130,236,643,882,962,1015,1035,1110", FirstFPs(10));
780   EXPECT_EQ_Ribbon("411,419,623,665,727,794,955,1052,1323,1330", FirstFPs(10));
781 
782   // This used to be 9 probes, but 8 is a better choice for speed,
783   // especially with SIMD groups of 8 probes, with essentially no
784   // change in FP rate.
785   // FP rate @ 9 probes, old Bloom: 0.4321%
786   // FP rate @ 9 probes, new Bloom: 0.1846%
787   // FP rate @ 8 probes, new Bloom: 0.1843%
788   ResetPolicy(14);  // num_probes = 8 (new), 9 (old)
789   for (int key = 0; key < 2087; key++) {
790     Add(Key(key, buffer));
791   }
792   Build();
793   EXPECT_EQ_LegacyBloom(GetNumProbesFromFilterData(), 9);
794   EXPECT_EQ_FastBloom(GetNumProbesFromFilterData(), 8);
795   EXPECT_EQ_Ribbon(GetRibbonSeedFromFilterData(), 61);
796 
797   EXPECT_EQ_LegacyBloom(
798       BloomHash(FilterData()),
799       SelectByCacheLineSize(178861123, 379087593, 2574136516U));
800   EXPECT_EQ_FastBloom(BloomHash(FilterData()), 3709876890U);
801   EXPECT_EQ_Ribbon(BloomHash(FilterData()), 1855638875);
802 
803   EXPECT_EQ_FastBloom("130,240,522,565,989,2002,2526,3147,3543", FirstFPs(9));
804   EXPECT_EQ_Ribbon("665,727,1323,1755,3866,4232,4442,4492,4736", FirstFPs(9));
805 
806   // This used to be 11 probes, but 9 is a better choice for speed
807   // AND accuracy.
808   // FP rate @ 11 probes, old Bloom: 0.3571%
809   // FP rate @ 11 probes, new Bloom: 0.0884%
810   // FP rate @  9 probes, new Bloom: 0.0843%
811   ResetPolicy(16);  // num_probes = 9 (new), 11 (old)
812   for (int key = 0; key < 2087; key++) {
813     Add(Key(key, buffer));
814   }
815   Build();
816   EXPECT_EQ_LegacyBloom(GetNumProbesFromFilterData(), 11);
817   EXPECT_EQ_FastBloom(GetNumProbesFromFilterData(), 9);
818   EXPECT_EQ_Ribbon(GetRibbonSeedFromFilterData(), 61);
819 
820   EXPECT_EQ_LegacyBloom(
821       BloomHash(FilterData()),
822       SelectByCacheLineSize(1129406313, 3049154394U, 1727750964));
823   EXPECT_EQ_FastBloom(BloomHash(FilterData()), 1087138490);
824   EXPECT_EQ_Ribbon(BloomHash(FilterData()), 459379967);
825 
826   EXPECT_EQ_FastBloom("3299,3611,3916,6620,7822,8079,8482,8942", FirstFPs(8));
827   EXPECT_EQ_Ribbon("727,1323,1755,4442,4736,5386,6974,7154,8222", FirstFPs(9));
828 
829   ResetPolicy(10);  // num_probes = 6, but different memory ratio vs. 9
830   for (int key = 0; key < 2087; key++) {
831     Add(Key(key, buffer));
832   }
833   Build();
834   EXPECT_EQ_Bloom(GetNumProbesFromFilterData(), 6);
835   EXPECT_EQ_Ribbon(GetRibbonSeedFromFilterData(), 61);
836 
837   EXPECT_EQ_LegacyBloom(
838       BloomHash(FilterData()),
839       SelectByCacheLineSize(1478976371, 2910591341U, 1182970461));
840   EXPECT_EQ_FastBloom(BloomHash(FilterData()), 2498541272U);
841   EXPECT_EQ_Ribbon(BloomHash(FilterData()), 1273231667);
842 
843   EXPECT_EQ_FastBloom("16,126,133,422,466,472,813,1002,1035", FirstFPs(9));
844   EXPECT_EQ_Ribbon("296,411,419,612,619,623,630,665,686,727", FirstFPs(10));
845 
846   ResetPolicy(10);
847   for (int key = /*CHANGED*/ 1; key < 2087; key++) {
848     Add(Key(key, buffer));
849   }
850   Build();
851   EXPECT_EQ_Bloom(GetNumProbesFromFilterData(), 6);
852   EXPECT_EQ_Ribbon(GetRibbonSeedFromFilterData(), /*CHANGED*/ 184);
853 
854   EXPECT_EQ_LegacyBloom(
855       BloomHash(FilterData()),
856       SelectByCacheLineSize(4205696321U, 1132081253U, 2385981855U));
857   EXPECT_EQ_FastBloom(BloomHash(FilterData()), 2058382345U);
858   EXPECT_EQ_Ribbon(BloomHash(FilterData()), 3007790572U);
859 
860   EXPECT_EQ_FastBloom("16,126,133,422,466,472,813,1002,1035", FirstFPs(9));
861   EXPECT_EQ_Ribbon("33,152,383,497,589,633,737,781,911,990", FirstFPs(10));
862 
863   ResetPolicy(10);
864   for (int key = 1; key < /*CHANGED*/ 2088; key++) {
865     Add(Key(key, buffer));
866   }
867   Build();
868   EXPECT_EQ_Bloom(GetNumProbesFromFilterData(), 6);
869   EXPECT_EQ_Ribbon(GetRibbonSeedFromFilterData(), 184);
870 
871   EXPECT_EQ_LegacyBloom(
872       BloomHash(FilterData()),
873       SelectByCacheLineSize(2885052954U, 769447944, 4175124908U));
874   EXPECT_EQ_FastBloom(BloomHash(FilterData()), 23699164);
875   EXPECT_EQ_Ribbon(BloomHash(FilterData()), 1942323379);
876 
877   EXPECT_EQ_FastBloom("16,126,133,422,466,472,813,1002,1035", FirstFPs(9));
878   EXPECT_EQ_Ribbon("33,95,360,589,737,911,990,1048,1081,1414", FirstFPs(10));
879 
880   // With new fractional bits_per_key, check that we are rounding to
881   // whole bits per key for old Bloom filters but fractional for
882   // new Bloom filter.
883   ResetPolicy(9.5);
884   for (int key = 1; key < 2088; key++) {
885     Add(Key(key, buffer));
886   }
887   Build();
888   EXPECT_EQ_Bloom(GetNumProbesFromFilterData(), 6);
889   EXPECT_EQ_Ribbon(GetRibbonSeedFromFilterData(), 184);
890 
891   EXPECT_EQ_LegacyBloom(
892       BloomHash(FilterData()),
893       /*SAME*/ SelectByCacheLineSize(2885052954U, 769447944, 4175124908U));
894   EXPECT_EQ_FastBloom(BloomHash(FilterData()), 3166884174U);
895   EXPECT_EQ_Ribbon(BloomHash(FilterData()), 1148258663);
896 
897   EXPECT_EQ_FastBloom("126,156,367,444,458,791,813,976,1015", FirstFPs(9));
898   EXPECT_EQ_Ribbon("33,54,95,360,589,693,737,911,990,1048", FirstFPs(10));
899 
900   ResetPolicy(10.499);
901   for (int key = 1; key < 2088; key++) {
902     Add(Key(key, buffer));
903   }
904   Build();
905   EXPECT_EQ_LegacyBloom(GetNumProbesFromFilterData(), 6);
906   EXPECT_EQ_FastBloom(GetNumProbesFromFilterData(), 7);
907   EXPECT_EQ_Ribbon(GetRibbonSeedFromFilterData(), 184);
908 
909   EXPECT_EQ_LegacyBloom(
910       BloomHash(FilterData()),
911       /*SAME*/ SelectByCacheLineSize(2885052954U, 769447944, 4175124908U));
912   EXPECT_EQ_FastBloom(BloomHash(FilterData()), 4098502778U);
913   EXPECT_EQ_Ribbon(BloomHash(FilterData()), 792138188);
914 
915   EXPECT_EQ_FastBloom("16,236,240,472,1015,1045,1111,1409,1465", FirstFPs(9));
916   EXPECT_EQ_Ribbon("33,95,360,589,737,990,1048,1081,1414,1643", FirstFPs(10));
917 
918   ResetPolicy();
919 }
920 
921 // A helper class for testing custom or corrupt filter bits as read by
922 // built-in FilterBitsReaders.
923 struct RawFilterTester {
924   // Buffer, from which we always return a tail Slice, so the
925   // last five bytes are always the metadata bytes.
926   std::array<char, 3000> data_;
927   // Points five bytes from the end
928   char* metadata_ptr_;
929 
RawFilterTesterROCKSDB_NAMESPACE::RawFilterTester930   RawFilterTester() : metadata_ptr_(&*(data_.end() - 5)) {}
931 
ResetNoFillROCKSDB_NAMESPACE::RawFilterTester932   Slice ResetNoFill(uint32_t len_without_metadata, uint32_t num_lines,
933                      uint32_t num_probes) {
934     metadata_ptr_[0] = static_cast<char>(num_probes);
935     EncodeFixed32(metadata_ptr_ + 1, num_lines);
936     uint32_t len = len_without_metadata + /*metadata*/ 5;
937     assert(len <= data_.size());
938     return Slice(metadata_ptr_ - len_without_metadata, len);
939   }
940 
ResetROCKSDB_NAMESPACE::RawFilterTester941   Slice Reset(uint32_t len_without_metadata, uint32_t num_lines,
942                uint32_t num_probes, bool fill_ones) {
943     data_.fill(fill_ones ? 0xff : 0);
944     return ResetNoFill(len_without_metadata, num_lines, num_probes);
945   }
946 
ResetWeirdFillROCKSDB_NAMESPACE::RawFilterTester947   Slice ResetWeirdFill(uint32_t len_without_metadata, uint32_t num_lines,
948                         uint32_t num_probes) {
949     for (uint32_t i = 0; i < data_.size(); ++i) {
950       data_[i] = static_cast<char>(0x7b7b >> (i % 7));
951     }
952     return ResetNoFill(len_without_metadata, num_lines, num_probes);
953   }
954 };
955 
TEST_P(FullBloomTest,RawSchema)956 TEST_P(FullBloomTest, RawSchema) {
957   RawFilterTester cft;
958   // Legacy Bloom configurations
959   // Two probes, about 3/4 bits set: ~50% "FP" rate
960   // One 256-byte cache line.
961   OpenRaw(cft.ResetWeirdFill(256, 1, 2));
962   EXPECT_EQ(uint64_t{11384799501900898790U}, PackedMatches());
963 
964   // Two 128-byte cache lines.
965   OpenRaw(cft.ResetWeirdFill(256, 2, 2));
966   EXPECT_EQ(uint64_t{10157853359773492589U}, PackedMatches());
967 
968   // Four 64-byte cache lines.
969   OpenRaw(cft.ResetWeirdFill(256, 4, 2));
970   EXPECT_EQ(uint64_t{7123594913907464682U}, PackedMatches());
971 
972   // Fast local Bloom configurations (marker 255 -> -1)
973   // Two probes, about 3/4 bits set: ~50% "FP" rate
974   // Four 64-byte cache lines.
975   OpenRaw(cft.ResetWeirdFill(256, 2U << 8, 255));
976   EXPECT_EQ(uint64_t{9957045189927952471U}, PackedMatches());
977 
978   // Ribbon configurations (marker 254 -> -2)
979 
980   // Even though the builder never builds configurations this
981   // small (preferring Bloom), we can test that the configuration
982   // can be read, for possible future-proofing.
983 
984   // 256 slots, one result column = 32 bytes (2 blocks, seed 0)
985   // ~50% FP rate:
986   // 0b0101010111110101010000110000011011011111100100001110010011101010
987   OpenRaw(cft.ResetWeirdFill(32, 2U << 8, 254));
988   EXPECT_EQ(uint64_t{6193930559317665002U}, PackedMatches());
989 
990   // 256 slots, three-to-four result columns = 112 bytes
991   // ~ 1 in 10 FP rate:
992   // 0b0000000000100000000000000000000001000001000000010000101000000000
993   OpenRaw(cft.ResetWeirdFill(112, 2U << 8, 254));
994   EXPECT_EQ(uint64_t{9007200345328128U}, PackedMatches());
995 }
996 
TEST_P(FullBloomTest,CorruptFilters)997 TEST_P(FullBloomTest, CorruptFilters) {
998   RawFilterTester cft;
999 
1000   for (bool fill : {false, true}) {
1001     // Legacy Bloom configurations
1002     // Good filter bits - returns same as fill
1003     OpenRaw(cft.Reset(CACHE_LINE_SIZE, 1, 6, fill));
1004     ASSERT_EQ(fill, Matches("hello"));
1005     ASSERT_EQ(fill, Matches("world"));
1006 
1007     // Good filter bits - returns same as fill
1008     OpenRaw(cft.Reset(CACHE_LINE_SIZE * 3, 3, 6, fill));
1009     ASSERT_EQ(fill, Matches("hello"));
1010     ASSERT_EQ(fill, Matches("world"));
1011 
1012     // Good filter bits - returns same as fill
1013     // 256 is unusual but legal cache line size
1014     OpenRaw(cft.Reset(256 * 3, 3, 6, fill));
1015     ASSERT_EQ(fill, Matches("hello"));
1016     ASSERT_EQ(fill, Matches("world"));
1017 
1018     // Good filter bits - returns same as fill
1019     // 30 should be max num_probes
1020     OpenRaw(cft.Reset(CACHE_LINE_SIZE, 1, 30, fill));
1021     ASSERT_EQ(fill, Matches("hello"));
1022     ASSERT_EQ(fill, Matches("world"));
1023 
1024     // Good filter bits - returns same as fill
1025     // 1 should be min num_probes
1026     OpenRaw(cft.Reset(CACHE_LINE_SIZE, 1, 1, fill));
1027     ASSERT_EQ(fill, Matches("hello"));
1028     ASSERT_EQ(fill, Matches("world"));
1029 
1030     // Type 1 trivial filter bits - returns true as if FP by zero probes
1031     OpenRaw(cft.Reset(CACHE_LINE_SIZE, 1, 0, fill));
1032     ASSERT_TRUE(Matches("hello"));
1033     ASSERT_TRUE(Matches("world"));
1034 
1035     // Type 2 trivial filter bits - returns false as if built from zero keys
1036     OpenRaw(cft.Reset(0, 0, 6, fill));
1037     ASSERT_FALSE(Matches("hello"));
1038     ASSERT_FALSE(Matches("world"));
1039 
1040     // Type 2 trivial filter bits - returns false as if built from zero keys
1041     OpenRaw(cft.Reset(0, 37, 6, fill));
1042     ASSERT_FALSE(Matches("hello"));
1043     ASSERT_FALSE(Matches("world"));
1044 
1045     // Type 2 trivial filter bits - returns false as 0 size trumps 0 probes
1046     OpenRaw(cft.Reset(0, 0, 0, fill));
1047     ASSERT_FALSE(Matches("hello"));
1048     ASSERT_FALSE(Matches("world"));
1049 
1050     // Bad filter bits - returns true for safety
1051     // No solution to 0 * x == CACHE_LINE_SIZE
1052     OpenRaw(cft.Reset(CACHE_LINE_SIZE, 0, 6, fill));
1053     ASSERT_TRUE(Matches("hello"));
1054     ASSERT_TRUE(Matches("world"));
1055 
1056     // Bad filter bits - returns true for safety
1057     // Can't have 3 * x == 4 for integer x
1058     OpenRaw(cft.Reset(4, 3, 6, fill));
1059     ASSERT_TRUE(Matches("hello"));
1060     ASSERT_TRUE(Matches("world"));
1061 
1062     // Bad filter bits - returns true for safety
1063     // 97 bytes is not a power of two, so not a legal cache line size
1064     OpenRaw(cft.Reset(97 * 3, 3, 6, fill));
1065     ASSERT_TRUE(Matches("hello"));
1066     ASSERT_TRUE(Matches("world"));
1067 
1068     // Bad filter bits - returns true for safety
1069     // 65 bytes is not a power of two, so not a legal cache line size
1070     OpenRaw(cft.Reset(65 * 3, 3, 6, fill));
1071     ASSERT_TRUE(Matches("hello"));
1072     ASSERT_TRUE(Matches("world"));
1073 
1074     // Bad filter bits - returns false as if built from zero keys
1075     // < 5 bytes overall means missing even metadata
1076     OpenRaw(cft.Reset(static_cast<uint32_t>(-1), 3, 6, fill));
1077     ASSERT_FALSE(Matches("hello"));
1078     ASSERT_FALSE(Matches("world"));
1079 
1080     OpenRaw(cft.Reset(static_cast<uint32_t>(-5), 3, 6, fill));
1081     ASSERT_FALSE(Matches("hello"));
1082     ASSERT_FALSE(Matches("world"));
1083 
1084     // Dubious filter bits - returns same as fill (for now)
1085     // 31 is not a useful num_probes, nor generated by RocksDB unless directly
1086     // using filter bits API without BloomFilterPolicy.
1087     OpenRaw(cft.Reset(CACHE_LINE_SIZE, 1, 31, fill));
1088     ASSERT_EQ(fill, Matches("hello"));
1089     ASSERT_EQ(fill, Matches("world"));
1090 
1091     // Dubious filter bits - returns same as fill (for now)
1092     // Similar, with 127, largest positive char
1093     OpenRaw(cft.Reset(CACHE_LINE_SIZE, 1, 127, fill));
1094     ASSERT_EQ(fill, Matches("hello"));
1095     ASSERT_EQ(fill, Matches("world"));
1096 
1097     // Dubious filter bits - returns true (for now)
1098     // num_probes set to 128 / -128, lowest negative char
1099     // NB: Bug in implementation interprets this as negative and has same
1100     // effect as zero probes, but effectively reserves negative char values
1101     // for future use.
1102     OpenRaw(cft.Reset(CACHE_LINE_SIZE, 1, 128, fill));
1103     ASSERT_TRUE(Matches("hello"));
1104     ASSERT_TRUE(Matches("world"));
1105 
1106     // Dubious filter bits - returns true (for now)
1107     // Similar, with 253 / -3
1108     OpenRaw(cft.Reset(CACHE_LINE_SIZE, 1, 253, fill));
1109     ASSERT_TRUE(Matches("hello"));
1110     ASSERT_TRUE(Matches("world"));
1111 
1112     // #########################################################
1113     // Fast local Bloom configurations (marker 255 -> -1)
1114     // Good config with six probes
1115     OpenRaw(cft.Reset(CACHE_LINE_SIZE, 6U << 8, 255, fill));
1116     ASSERT_EQ(fill, Matches("hello"));
1117     ASSERT_EQ(fill, Matches("world"));
1118 
1119     // Becomes bad/reserved config (always true) if any other byte set
1120     OpenRaw(cft.Reset(CACHE_LINE_SIZE, (6U << 8) | 1U, 255, fill));
1121     ASSERT_TRUE(Matches("hello"));
1122     ASSERT_TRUE(Matches("world"));
1123 
1124     OpenRaw(cft.Reset(CACHE_LINE_SIZE, (6U << 8) | (1U << 16), 255, fill));
1125     ASSERT_TRUE(Matches("hello"));
1126     ASSERT_TRUE(Matches("world"));
1127 
1128     OpenRaw(cft.Reset(CACHE_LINE_SIZE, (6U << 8) | (1U << 24), 255, fill));
1129     ASSERT_TRUE(Matches("hello"));
1130     ASSERT_TRUE(Matches("world"));
1131 
1132     // Good config, max 30 probes
1133     OpenRaw(cft.Reset(CACHE_LINE_SIZE, 30U << 8, 255, fill));
1134     ASSERT_EQ(fill, Matches("hello"));
1135     ASSERT_EQ(fill, Matches("world"));
1136 
1137     // Bad/reserved config (always true) if more than 30
1138     OpenRaw(cft.Reset(CACHE_LINE_SIZE, 31U << 8, 255, fill));
1139     ASSERT_TRUE(Matches("hello"));
1140     ASSERT_TRUE(Matches("world"));
1141 
1142     OpenRaw(cft.Reset(CACHE_LINE_SIZE, 33U << 8, 255, fill));
1143     ASSERT_TRUE(Matches("hello"));
1144     ASSERT_TRUE(Matches("world"));
1145 
1146     OpenRaw(cft.Reset(CACHE_LINE_SIZE, 66U << 8, 255, fill));
1147     ASSERT_TRUE(Matches("hello"));
1148     ASSERT_TRUE(Matches("world"));
1149 
1150     OpenRaw(cft.Reset(CACHE_LINE_SIZE, 130U << 8, 255, fill));
1151     ASSERT_TRUE(Matches("hello"));
1152     ASSERT_TRUE(Matches("world"));
1153   }
1154 
1155   // #########################################################
1156   // Ribbon configurations (marker 254 -> -2)
1157   // ("fill" doesn't work to detect good configurations, we just
1158   // have to rely on TN probability)
1159 
1160   // Good: 2 blocks * 16 bytes / segment * 4 columns = 128 bytes
1161   // seed = 123
1162   OpenRaw(cft.Reset(128, (2U << 8) + 123U, 254, false));
1163   ASSERT_FALSE(Matches("hello"));
1164   ASSERT_FALSE(Matches("world"));
1165 
1166   // Good: 2 blocks * 16 bytes / segment * 8 columns = 256 bytes
1167   OpenRaw(cft.Reset(256, (2U << 8) + 123U, 254, false));
1168   ASSERT_FALSE(Matches("hello"));
1169   ASSERT_FALSE(Matches("world"));
1170 
1171   // Surprisingly OK: 5000 blocks (640,000 slots) in only 1024 bits
1172   // -> average close to 0 columns
1173   OpenRaw(cft.Reset(128, (5000U << 8) + 123U, 254, false));
1174   // *Almost* all FPs
1175   ASSERT_TRUE(Matches("hello"));
1176   ASSERT_TRUE(Matches("world"));
1177   // Need many queries to find a "true negative"
1178   for (int i = 0; Matches(ToString(i)); ++i) {
1179     ASSERT_LT(i, 1000);
1180   }
1181 
1182   // Bad: 1 block not allowed (for implementation detail reasons)
1183   OpenRaw(cft.Reset(128, (1U << 8) + 123U, 254, false));
1184   ASSERT_TRUE(Matches("hello"));
1185   ASSERT_TRUE(Matches("world"));
1186 
1187   // Bad: 0 blocks not allowed
1188   OpenRaw(cft.Reset(128, (0U << 8) + 123U, 254, false));
1189   ASSERT_TRUE(Matches("hello"));
1190   ASSERT_TRUE(Matches("world"));
1191 }
1192 
1193 INSTANTIATE_TEST_CASE_P(Full, FullBloomTest,
1194                         testing::Values(BloomFilterPolicy::kLegacyBloom,
1195                                         BloomFilterPolicy::kFastLocalBloom,
1196                                         BloomFilterPolicy::kStandard128Ribbon));
1197 
1198 }  // namespace ROCKSDB_NAMESPACE
1199 
main(int argc,char ** argv)1200 int main(int argc, char** argv) {
1201   ::testing::InitGoogleTest(&argc, argv);
1202   ParseCommandLineFlags(&argc, &argv, true);
1203 
1204   return RUN_ALL_TESTS();
1205 }
1206 
1207 #endif  // GFLAGS
1208