1 //  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
2 //  This source code is licensed under both the GPLv2 (found in the
3 //  COPYING file in the root directory) and Apache 2.0 License
4 //  (found in the LICENSE.Apache file in the root directory).
5 //
6 // Copyright (c) 2012 The LevelDB Authors. All rights reserved.
7 // Use of this source code is governed by a BSD-style license that can be
8 // found in the LICENSE file. See the AUTHORS file for names of contributors.
9 
10 #ifndef GFLAGS
11 #include <cstdio>
main()12 int main() {
13   fprintf(stderr, "Please install gflags to run this test... Skipping...\n");
14   return 0;
15 }
16 #else
17 
18 #include <array>
19 #include <cmath>
20 #include <vector>
21 
22 #include "logging/logging.h"
23 #include "memory/arena.h"
24 #include "rocksdb/filter_policy.h"
25 #include "table/block_based/filter_policy_internal.h"
26 #include "test_util/testharness.h"
27 #include "test_util/testutil.h"
28 #include "util/gflags_compat.h"
29 #include "util/hash.h"
30 
31 using GFLAGS_NAMESPACE::ParseCommandLineFlags;
32 
33 DEFINE_int32(bits_per_key, 10, "");
34 
35 namespace ROCKSDB_NAMESPACE {
36 
37 static const int kVerbose = 1;
38 
Key(int i,char * buffer)39 static Slice Key(int i, char* buffer) {
40   std::string s;
41   PutFixed32(&s, static_cast<uint32_t>(i));
42   memcpy(buffer, s.c_str(), sizeof(i));
43   return Slice(buffer, sizeof(i));
44 }
45 
NextLength(int length)46 static int NextLength(int length) {
47   if (length < 10) {
48     length += 1;
49   } else if (length < 100) {
50     length += 10;
51   } else if (length < 1000) {
52     length += 100;
53   } else {
54     length += 1000;
55   }
56   return length;
57 }
58 
59 class BlockBasedBloomTest : public testing::Test {
60  private:
61   std::unique_ptr<const FilterPolicy> policy_;
62   std::string filter_;
63   std::vector<std::string> keys_;
64 
65  public:
BlockBasedBloomTest()66   BlockBasedBloomTest() { ResetPolicy(); }
67 
Reset()68   void Reset() {
69     keys_.clear();
70     filter_.clear();
71   }
72 
ResetPolicy(double bits_per_key)73   void ResetPolicy(double bits_per_key) {
74     policy_.reset(new BloomFilterPolicy(bits_per_key,
75                                         BloomFilterPolicy::kDeprecatedBlock));
76     Reset();
77   }
78 
ResetPolicy()79   void ResetPolicy() { ResetPolicy(FLAGS_bits_per_key); }
80 
Add(const Slice & s)81   void Add(const Slice& s) {
82     keys_.push_back(s.ToString());
83   }
84 
Build()85   void Build() {
86     std::vector<Slice> key_slices;
87     for (size_t i = 0; i < keys_.size(); i++) {
88       key_slices.push_back(Slice(keys_[i]));
89     }
90     filter_.clear();
91     policy_->CreateFilter(&key_slices[0], static_cast<int>(key_slices.size()),
92                           &filter_);
93     keys_.clear();
94     if (kVerbose >= 2) DumpFilter();
95   }
96 
FilterSize() const97   size_t FilterSize() const {
98     return filter_.size();
99   }
100 
FilterData() const101   Slice FilterData() const { return Slice(filter_); }
102 
DumpFilter()103   void DumpFilter() {
104     fprintf(stderr, "F(");
105     for (size_t i = 0; i+1 < filter_.size(); i++) {
106       const unsigned int c = static_cast<unsigned int>(filter_[i]);
107       for (int j = 0; j < 8; j++) {
108         fprintf(stderr, "%c", (c & (1 <<j)) ? '1' : '.');
109       }
110     }
111     fprintf(stderr, ")\n");
112   }
113 
Matches(const Slice & s)114   bool Matches(const Slice& s) {
115     if (!keys_.empty()) {
116       Build();
117     }
118     return policy_->KeyMayMatch(s, filter_);
119   }
120 
FalsePositiveRate()121   double FalsePositiveRate() {
122     char buffer[sizeof(int)];
123     int result = 0;
124     for (int i = 0; i < 10000; i++) {
125       if (Matches(Key(i + 1000000000, buffer))) {
126         result++;
127       }
128     }
129     return result / 10000.0;
130   }
131 };
132 
TEST_F(BlockBasedBloomTest,EmptyFilter)133 TEST_F(BlockBasedBloomTest, EmptyFilter) {
134   ASSERT_TRUE(! Matches("hello"));
135   ASSERT_TRUE(! Matches("world"));
136 }
137 
TEST_F(BlockBasedBloomTest,Small)138 TEST_F(BlockBasedBloomTest, Small) {
139   Add("hello");
140   Add("world");
141   ASSERT_TRUE(Matches("hello"));
142   ASSERT_TRUE(Matches("world"));
143   ASSERT_TRUE(! Matches("x"));
144   ASSERT_TRUE(! Matches("foo"));
145 }
146 
TEST_F(BlockBasedBloomTest,VaryingLengths)147 TEST_F(BlockBasedBloomTest, VaryingLengths) {
148   char buffer[sizeof(int)];
149 
150   // Count number of filters that significantly exceed the false positive rate
151   int mediocre_filters = 0;
152   int good_filters = 0;
153 
154   for (int length = 1; length <= 10000; length = NextLength(length)) {
155     Reset();
156     for (int i = 0; i < length; i++) {
157       Add(Key(i, buffer));
158     }
159     Build();
160 
161     ASSERT_LE(FilterSize(), (size_t)((length * 10 / 8) + 40)) << length;
162 
163     // All added keys must match
164     for (int i = 0; i < length; i++) {
165       ASSERT_TRUE(Matches(Key(i, buffer)))
166           << "Length " << length << "; key " << i;
167     }
168 
169     // Check false positive rate
170     double rate = FalsePositiveRate();
171     if (kVerbose >= 1) {
172       fprintf(stderr, "False positives: %5.2f%% @ length = %6d ; bytes = %6d\n",
173               rate*100.0, length, static_cast<int>(FilterSize()));
174     }
175     ASSERT_LE(rate, 0.02);   // Must not be over 2%
176     if (rate > 0.0125) mediocre_filters++;  // Allowed, but not too often
177     else good_filters++;
178   }
179   if (kVerbose >= 1) {
180     fprintf(stderr, "Filters: %d good, %d mediocre\n",
181             good_filters, mediocre_filters);
182   }
183   ASSERT_LE(mediocre_filters, good_filters/5);
184 }
185 
186 // Ensure the implementation doesn't accidentally change in an
187 // incompatible way
TEST_F(BlockBasedBloomTest,Schema)188 TEST_F(BlockBasedBloomTest, Schema) {
189   char buffer[sizeof(int)];
190 
191   ResetPolicy(8);  // num_probes = 5
192   for (int key = 0; key < 87; key++) {
193     Add(Key(key, buffer));
194   }
195   Build();
196   ASSERT_EQ(BloomHash(FilterData()), 3589896109U);
197 
198   ResetPolicy(9);  // num_probes = 6
199   for (int key = 0; key < 87; key++) {
200     Add(Key(key, buffer));
201   }
202   Build();
203   ASSERT_EQ(BloomHash(FilterData()), 969445585);
204 
205   ResetPolicy(11);  // num_probes = 7
206   for (int key = 0; key < 87; key++) {
207     Add(Key(key, buffer));
208   }
209   Build();
210   ASSERT_EQ(BloomHash(FilterData()), 1694458207);
211 
212   ResetPolicy(10);  // num_probes = 6
213   for (int key = 0; key < 87; key++) {
214     Add(Key(key, buffer));
215   }
216   Build();
217   ASSERT_EQ(BloomHash(FilterData()), 2373646410U);
218 
219   ResetPolicy(10);
220   for (int key = /*CHANGED*/ 1; key < 87; key++) {
221     Add(Key(key, buffer));
222   }
223   Build();
224   ASSERT_EQ(BloomHash(FilterData()), 1908442116);
225 
226   ResetPolicy(10);
227   for (int key = 1; key < /*CHANGED*/ 88; key++) {
228     Add(Key(key, buffer));
229   }
230   Build();
231   ASSERT_EQ(BloomHash(FilterData()), 3057004015U);
232 
233   // With new fractional bits_per_key, check that we are rounding to
234   // whole bits per key for old Bloom filters.
235   ResetPolicy(9.5);  // Treated as 10
236   for (int key = 1; key < 88; key++) {
237     Add(Key(key, buffer));
238   }
239   Build();
240   ASSERT_EQ(BloomHash(FilterData()), /*SAME*/ 3057004015U);
241 
242   ResetPolicy(10.499);  // Treated as 10
243   for (int key = 1; key < 88; key++) {
244     Add(Key(key, buffer));
245   }
246   Build();
247   ASSERT_EQ(BloomHash(FilterData()), /*SAME*/ 3057004015U);
248 
249   ResetPolicy();
250 }
251 
252 // Different bits-per-byte
253 
254 class FullBloomTest : public testing::TestWithParam<BloomFilterPolicy::Mode> {
255  private:
256   BlockBasedTableOptions table_options_;
257   std::shared_ptr<const FilterPolicy>& policy_;
258   std::unique_ptr<FilterBitsBuilder> bits_builder_;
259   std::unique_ptr<FilterBitsReader> bits_reader_;
260   std::unique_ptr<const char[]> buf_;
261   size_t filter_size_;
262 
263  public:
FullBloomTest()264   FullBloomTest() : policy_(table_options_.filter_policy), filter_size_(0) {
265     ResetPolicy();
266   }
267 
GetBuiltinFilterBitsBuilder()268   BuiltinFilterBitsBuilder* GetBuiltinFilterBitsBuilder() {
269     // Throws on bad cast
270     return &dynamic_cast<BuiltinFilterBitsBuilder&>(*bits_builder_);
271   }
272 
GetBloomFilterPolicy()273   const BloomFilterPolicy* GetBloomFilterPolicy() {
274     // Throws on bad cast
275     return &dynamic_cast<const BloomFilterPolicy&>(*policy_);
276   }
277 
Reset()278   void Reset() {
279     bits_builder_.reset(BloomFilterPolicy::GetBuilderFromContext(
280         FilterBuildingContext(table_options_)));
281     bits_reader_.reset(nullptr);
282     buf_.reset(nullptr);
283     filter_size_ = 0;
284   }
285 
ResetPolicy(double bits_per_key)286   void ResetPolicy(double bits_per_key) {
287     policy_.reset(new BloomFilterPolicy(bits_per_key, GetParam()));
288     Reset();
289   }
290 
ResetPolicy()291   void ResetPolicy() { ResetPolicy(FLAGS_bits_per_key); }
292 
Add(const Slice & s)293   void Add(const Slice& s) {
294     bits_builder_->AddKey(s);
295   }
296 
OpenRaw(const Slice & s)297   void OpenRaw(const Slice& s) {
298     bits_reader_.reset(policy_->GetFilterBitsReader(s));
299   }
300 
Build()301   void Build() {
302     Slice filter = bits_builder_->Finish(&buf_);
303     bits_reader_.reset(policy_->GetFilterBitsReader(filter));
304     filter_size_ = filter.size();
305   }
306 
FilterSize() const307   size_t FilterSize() const {
308     return filter_size_;
309   }
310 
FilterData()311   Slice FilterData() { return Slice(buf_.get(), filter_size_); }
312 
GetNumProbesFromFilterData()313   int GetNumProbesFromFilterData() {
314     assert(filter_size_ >= 5);
315     int8_t raw_num_probes = static_cast<int8_t>(buf_.get()[filter_size_ - 5]);
316     if (raw_num_probes == -1) {  // New bloom filter marker
317       return static_cast<uint8_t>(buf_.get()[filter_size_ - 3]);
318     } else {
319       return raw_num_probes;
320     }
321   }
322 
Matches(const Slice & s)323   bool Matches(const Slice& s) {
324     if (bits_reader_ == nullptr) {
325       Build();
326     }
327     return bits_reader_->MayMatch(s);
328   }
329 
330   // Provides a kind of fingerprint on the Bloom filter's
331   // behavior, for reasonbly high FP rates.
PackedMatches()332   uint64_t PackedMatches() {
333     char buffer[sizeof(int)];
334     uint64_t result = 0;
335     for (int i = 0; i < 64; i++) {
336       if (Matches(Key(i + 12345, buffer))) {
337         result |= uint64_t{1} << i;
338       }
339     }
340     return result;
341   }
342 
343   // Provides a kind of fingerprint on the Bloom filter's
344   // behavior, for lower FP rates.
FirstFPs(int count)345   std::string FirstFPs(int count) {
346     char buffer[sizeof(int)];
347     std::string rv;
348     int fp_count = 0;
349     for (int i = 0; i < 1000000; i++) {
350       // Pack four match booleans into each hexadecimal digit
351       if (Matches(Key(i + 1000000, buffer))) {
352         ++fp_count;
353         rv += std::to_string(i);
354         if (fp_count == count) {
355           break;
356         }
357         rv += ',';
358       }
359     }
360     return rv;
361   }
362 
FalsePositiveRate()363   double FalsePositiveRate() {
364     char buffer[sizeof(int)];
365     int result = 0;
366     for (int i = 0; i < 10000; i++) {
367       if (Matches(Key(i + 1000000000, buffer))) {
368         result++;
369       }
370     }
371     return result / 10000.0;
372   }
373 
SelectByImpl(uint32_t for_legacy_bloom,uint32_t for_fast_local_bloom)374   uint32_t SelectByImpl(uint32_t for_legacy_bloom,
375                         uint32_t for_fast_local_bloom) {
376     switch (GetParam()) {
377       case BloomFilterPolicy::kLegacyBloom:
378         return for_legacy_bloom;
379       case BloomFilterPolicy::kFastLocalBloom:
380         return for_fast_local_bloom;
381       case BloomFilterPolicy::kDeprecatedBlock:
382       case BloomFilterPolicy::kAuto:
383           /* N/A */;
384     }
385     // otherwise
386     assert(false);
387     return 0;
388   }
389 };
390 
TEST_P(FullBloomTest,FilterSize)391 TEST_P(FullBloomTest, FilterSize) {
392   // In addition to checking the consistency of space computation, we are
393   // checking that denoted and computed doubles are interpreted as expected
394   // as bits_per_key values.
395   bool some_computed_less_than_denoted = false;
396   // Note: enforced minimum is 1 bit per key (1000 millibits), and enforced
397   // maximum is 100 bits per key (100000 millibits).
398   for (auto bpk :
399        std::vector<std::pair<double, int> >{{-HUGE_VAL, 1000},
400                                             {-INFINITY, 1000},
401                                             {0.0, 1000},
402                                             {1.234, 1234},
403                                             {3.456, 3456},
404                                             {9.5, 9500},
405                                             {10.0, 10000},
406                                             {10.499, 10499},
407                                             {21.345, 21345},
408                                             {99.999, 99999},
409                                             {1234.0, 100000},
410                                             {HUGE_VAL, 100000},
411                                             {INFINITY, 100000},
412                                             {NAN, 100000}}) {
413     ResetPolicy(bpk.first);
414     auto bfp = GetBloomFilterPolicy();
415     EXPECT_EQ(bpk.second, bfp->GetMillibitsPerKey());
416     EXPECT_EQ((bpk.second + 500) / 1000, bfp->GetWholeBitsPerKey());
417 
418     double computed = bpk.first;
419     // This transforms e.g. 9.5 -> 9.499999999999998, which we still
420     // round to 10 for whole bits per key.
421     computed += 0.5;
422     computed /= 1234567.0;
423     computed *= 1234567.0;
424     computed -= 0.5;
425     some_computed_less_than_denoted |= (computed < bpk.first);
426     ResetPolicy(computed);
427     bfp = GetBloomFilterPolicy();
428     EXPECT_EQ(bpk.second, bfp->GetMillibitsPerKey());
429     EXPECT_EQ((bpk.second + 500) / 1000, bfp->GetWholeBitsPerKey());
430 
431     auto bits_builder = GetBuiltinFilterBitsBuilder();
432     for (int n = 1; n < 100; n++) {
433       auto space = bits_builder->CalculateSpace(n);
434       auto n2 = bits_builder->CalculateNumEntry(space);
435       EXPECT_GE(n2, n);
436       auto space2 = bits_builder->CalculateSpace(n2);
437       EXPECT_EQ(space, space2);
438     }
439   }
440   // Check that the compiler hasn't optimized our computation into nothing
441   EXPECT_TRUE(some_computed_less_than_denoted);
442   ResetPolicy();
443 }
444 
TEST_P(FullBloomTest,FullEmptyFilter)445 TEST_P(FullBloomTest, FullEmptyFilter) {
446   // Empty filter is not match, at this level
447   ASSERT_TRUE(!Matches("hello"));
448   ASSERT_TRUE(!Matches("world"));
449 }
450 
TEST_P(FullBloomTest,FullSmall)451 TEST_P(FullBloomTest, FullSmall) {
452   Add("hello");
453   Add("world");
454   ASSERT_TRUE(Matches("hello"));
455   ASSERT_TRUE(Matches("world"));
456   ASSERT_TRUE(!Matches("x"));
457   ASSERT_TRUE(!Matches("foo"));
458 }
459 
TEST_P(FullBloomTest,FullVaryingLengths)460 TEST_P(FullBloomTest, FullVaryingLengths) {
461   char buffer[sizeof(int)];
462 
463   // Count number of filters that significantly exceed the false positive rate
464   int mediocre_filters = 0;
465   int good_filters = 0;
466 
467   for (int length = 1; length <= 10000; length = NextLength(length)) {
468     Reset();
469     for (int i = 0; i < length; i++) {
470       Add(Key(i, buffer));
471     }
472     Build();
473 
474     ASSERT_LE(FilterSize(),
475               (size_t)((length * 10 / 8) + CACHE_LINE_SIZE * 2 + 5));
476 
477     // All added keys must match
478     for (int i = 0; i < length; i++) {
479       ASSERT_TRUE(Matches(Key(i, buffer)))
480           << "Length " << length << "; key " << i;
481     }
482 
483     // Check false positive rate
484     double rate = FalsePositiveRate();
485     if (kVerbose >= 1) {
486       fprintf(stderr, "False positives: %5.2f%% @ length = %6d ; bytes = %6d\n",
487               rate*100.0, length, static_cast<int>(FilterSize()));
488     }
489     ASSERT_LE(rate, 0.02);   // Must not be over 2%
490     if (rate > 0.0125)
491       mediocre_filters++;  // Allowed, but not too often
492     else
493       good_filters++;
494   }
495   if (kVerbose >= 1) {
496     fprintf(stderr, "Filters: %d good, %d mediocre\n",
497             good_filters, mediocre_filters);
498   }
499   ASSERT_LE(mediocre_filters, good_filters/5);
500 }
501 
502 namespace {
SelectByCacheLineSize(uint32_t for64,uint32_t for128,uint32_t for256)503 inline uint32_t SelectByCacheLineSize(uint32_t for64, uint32_t for128,
504                                       uint32_t for256) {
505   (void)for64;
506   (void)for128;
507   (void)for256;
508 #if CACHE_LINE_SIZE == 64
509   return for64;
510 #elif CACHE_LINE_SIZE == 128
511   return for128;
512 #elif CACHE_LINE_SIZE == 256
513   return for256;
514 #else
515 #error "CACHE_LINE_SIZE unknown or unrecognized"
516 #endif
517 }
518 }  // namespace
519 
520 // Ensure the implementation doesn't accidentally change in an
521 // incompatible way. This test doesn't check the reading side
522 // (FirstFPs/PackedMatches) for LegacyBloom because it requires the
523 // ability to read filters generated using other cache line sizes.
524 // See RawSchema.
TEST_P(FullBloomTest,Schema)525 TEST_P(FullBloomTest, Schema) {
526   char buffer[sizeof(int)];
527 
528   // Use enough keys so that changing bits / key by 1 is guaranteed to
529   // change number of allocated cache lines. So keys > max cache line bits.
530 
531   ResetPolicy(2);  // num_probes = 1
532   for (int key = 0; key < 2087; key++) {
533     Add(Key(key, buffer));
534   }
535   Build();
536   EXPECT_EQ(GetNumProbesFromFilterData(), 1);
537   EXPECT_EQ(
538       BloomHash(FilterData()),
539       SelectByImpl(SelectByCacheLineSize(1567096579, 1964771444, 2659542661U),
540                    3817481309U));
541   if (GetParam() == BloomFilterPolicy::kFastLocalBloom) {
542     EXPECT_EQ("11,13,17,25,29,30,35,37,45,53", FirstFPs(10));
543   }
544 
545   ResetPolicy(3);  // num_probes = 2
546   for (int key = 0; key < 2087; key++) {
547     Add(Key(key, buffer));
548   }
549   Build();
550   EXPECT_EQ(GetNumProbesFromFilterData(), 2);
551   EXPECT_EQ(
552       BloomHash(FilterData()),
553       SelectByImpl(SelectByCacheLineSize(2707206547U, 2571983456U, 218344685),
554                    2807269961U));
555   if (GetParam() == BloomFilterPolicy::kFastLocalBloom) {
556     EXPECT_EQ("4,15,17,24,27,28,29,53,63,70", FirstFPs(10));
557   }
558 
559   ResetPolicy(5);  // num_probes = 3
560   for (int key = 0; key < 2087; key++) {
561     Add(Key(key, buffer));
562   }
563   Build();
564   EXPECT_EQ(GetNumProbesFromFilterData(), 3);
565   EXPECT_EQ(
566       BloomHash(FilterData()),
567       SelectByImpl(SelectByCacheLineSize(515748486, 94611728, 2436112214U),
568                    204628445));
569   if (GetParam() == BloomFilterPolicy::kFastLocalBloom) {
570     EXPECT_EQ("15,24,29,39,53,87,89,100,103,104", FirstFPs(10));
571   }
572 
573   ResetPolicy(8);  // num_probes = 5
574   for (int key = 0; key < 2087; key++) {
575     Add(Key(key, buffer));
576   }
577   Build();
578   EXPECT_EQ(GetNumProbesFromFilterData(), 5);
579   EXPECT_EQ(
580       BloomHash(FilterData()),
581       SelectByImpl(SelectByCacheLineSize(1302145999, 2811644657U, 756553699),
582                    355564975));
583   if (GetParam() == BloomFilterPolicy::kFastLocalBloom) {
584     EXPECT_EQ("16,60,66,126,220,238,244,256,265,287", FirstFPs(10));
585   }
586 
587   ResetPolicy(9);  // num_probes = 6
588   for (int key = 0; key < 2087; key++) {
589     Add(Key(key, buffer));
590   }
591   Build();
592   EXPECT_EQ(GetNumProbesFromFilterData(), 6);
593   EXPECT_EQ(
594       BloomHash(FilterData()),
595       SelectByImpl(SelectByCacheLineSize(2092755149, 661139132, 1182970461),
596                    2137566013U));
597   if (GetParam() == BloomFilterPolicy::kFastLocalBloom) {
598     EXPECT_EQ("156,367,791,872,945,1015,1139,1159,1265,1435", FirstFPs(10));
599   }
600 
601   ResetPolicy(11);  // num_probes = 7
602   for (int key = 0; key < 2087; key++) {
603     Add(Key(key, buffer));
604   }
605   Build();
606   EXPECT_EQ(GetNumProbesFromFilterData(), 7);
607   EXPECT_EQ(
608       BloomHash(FilterData()),
609       SelectByImpl(SelectByCacheLineSize(3755609649U, 1812694762, 1449142939),
610                    2561502687U));
611   if (GetParam() == BloomFilterPolicy::kFastLocalBloom) {
612     EXPECT_EQ("34,74,130,236,643,882,962,1015,1035,1110", FirstFPs(10));
613   }
614 
615   // This used to be 9 probes, but 8 is a better choice for speed,
616   // especially with SIMD groups of 8 probes, with essentially no
617   // change in FP rate.
618   // FP rate @ 9 probes, old Bloom: 0.4321%
619   // FP rate @ 9 probes, new Bloom: 0.1846%
620   // FP rate @ 8 probes, new Bloom: 0.1843%
621   ResetPolicy(14);  // num_probes = 8 (new), 9 (old)
622   for (int key = 0; key < 2087; key++) {
623     Add(Key(key, buffer));
624   }
625   Build();
626   EXPECT_EQ(GetNumProbesFromFilterData(), SelectByImpl(9, 8));
627   EXPECT_EQ(
628       BloomHash(FilterData()),
629       SelectByImpl(SelectByCacheLineSize(178861123, 379087593, 2574136516U),
630                    3709876890U));
631   if (GetParam() == BloomFilterPolicy::kFastLocalBloom) {
632     EXPECT_EQ("130,240,522,565,989,2002,2526,3147,3543", FirstFPs(9));
633   }
634 
635   // This used to be 11 probes, but 9 is a better choice for speed
636   // AND accuracy.
637   // FP rate @ 11 probes, old Bloom: 0.3571%
638   // FP rate @ 11 probes, new Bloom: 0.0884%
639   // FP rate @  9 probes, new Bloom: 0.0843%
640   ResetPolicy(16);  // num_probes = 9 (new), 11 (old)
641   for (int key = 0; key < 2087; key++) {
642     Add(Key(key, buffer));
643   }
644   Build();
645   EXPECT_EQ(GetNumProbesFromFilterData(), SelectByImpl(11, 9));
646   EXPECT_EQ(
647       BloomHash(FilterData()),
648       SelectByImpl(SelectByCacheLineSize(1129406313, 3049154394U, 1727750964),
649                    1087138490));
650   if (GetParam() == BloomFilterPolicy::kFastLocalBloom) {
651     EXPECT_EQ("3299,3611,3916,6620,7822,8079,8482,8942,10167", FirstFPs(9));
652   }
653 
654   ResetPolicy(10);  // num_probes = 6, but different memory ratio vs. 9
655   for (int key = 0; key < 2087; key++) {
656     Add(Key(key, buffer));
657   }
658   Build();
659   EXPECT_EQ(GetNumProbesFromFilterData(), 6);
660   EXPECT_EQ(
661       BloomHash(FilterData()),
662       SelectByImpl(SelectByCacheLineSize(1478976371, 2910591341U, 1182970461),
663                    2498541272U));
664   if (GetParam() == BloomFilterPolicy::kFastLocalBloom) {
665     EXPECT_EQ("16,126,133,422,466,472,813,1002,1035,1159", FirstFPs(10));
666   }
667 
668   ResetPolicy(10);
669   for (int key = /*CHANGED*/ 1; key < 2087; key++) {
670     Add(Key(key, buffer));
671   }
672   Build();
673   EXPECT_EQ(GetNumProbesFromFilterData(), 6);
674   EXPECT_EQ(
675       BloomHash(FilterData()),
676       SelectByImpl(SelectByCacheLineSize(4205696321U, 1132081253U, 2385981855U),
677                    2058382345U));
678   if (GetParam() == BloomFilterPolicy::kFastLocalBloom) {
679     EXPECT_EQ("16,126,133,422,466,472,813,1002,1035,1159", FirstFPs(10));
680   }
681 
682   ResetPolicy(10);
683   for (int key = 1; key < /*CHANGED*/ 2088; key++) {
684     Add(Key(key, buffer));
685   }
686   Build();
687   EXPECT_EQ(GetNumProbesFromFilterData(), 6);
688   EXPECT_EQ(
689       BloomHash(FilterData()),
690       SelectByImpl(SelectByCacheLineSize(2885052954U, 769447944, 4175124908U),
691                    23699164));
692   if (GetParam() == BloomFilterPolicy::kFastLocalBloom) {
693     EXPECT_EQ("16,126,133,422,466,472,813,1002,1035,1159", FirstFPs(10));
694   }
695 
696   // With new fractional bits_per_key, check that we are rounding to
697   // whole bits per key for old Bloom filters but fractional for
698   // new Bloom filter.
699   ResetPolicy(9.5);
700   for (int key = 1; key < 2088; key++) {
701     Add(Key(key, buffer));
702   }
703   Build();
704   EXPECT_EQ(GetNumProbesFromFilterData(), 6);
705   EXPECT_EQ(BloomHash(FilterData()),
706             SelectByImpl(/*SAME*/ SelectByCacheLineSize(2885052954U, 769447944,
707                                                         4175124908U),
708                          /*CHANGED*/ 3166884174U));
709   if (GetParam() == BloomFilterPolicy::kFastLocalBloom) {
710     EXPECT_EQ(/*CHANGED*/ "126,156,367,444,458,791,813,976,1015,1035",
711               FirstFPs(10));
712   }
713 
714   ResetPolicy(10.499);
715   for (int key = 1; key < 2088; key++) {
716     Add(Key(key, buffer));
717   }
718   Build();
719   EXPECT_EQ(GetNumProbesFromFilterData(), SelectByImpl(6, 7));
720   EXPECT_EQ(BloomHash(FilterData()),
721             SelectByImpl(/*SAME*/ SelectByCacheLineSize(2885052954U, 769447944,
722                                                         4175124908U),
723                          /*CHANGED*/ 4098502778U));
724   if (GetParam() == BloomFilterPolicy::kFastLocalBloom) {
725     EXPECT_EQ(/*CHANGED*/ "16,236,240,472,1015,1045,1111,1409,1465,1612",
726               FirstFPs(10));
727   }
728 
729   ResetPolicy();
730 }
731 
732 // A helper class for testing custom or corrupt filter bits as read by
733 // built-in FilterBitsReaders.
734 struct RawFilterTester {
735   // Buffer, from which we always return a tail Slice, so the
736   // last five bytes are always the metadata bytes.
737   std::array<char, 3000> data_;
738   // Points five bytes from the end
739   char* metadata_ptr_;
740 
RawFilterTesterROCKSDB_NAMESPACE::RawFilterTester741   RawFilterTester() : metadata_ptr_(&*(data_.end() - 5)) {}
742 
ResetNoFillROCKSDB_NAMESPACE::RawFilterTester743   Slice ResetNoFill(uint32_t len_without_metadata, uint32_t num_lines,
744                      uint32_t num_probes) {
745     metadata_ptr_[0] = static_cast<char>(num_probes);
746     EncodeFixed32(metadata_ptr_ + 1, num_lines);
747     uint32_t len = len_without_metadata + /*metadata*/ 5;
748     assert(len <= data_.size());
749     return Slice(metadata_ptr_ - len_without_metadata, len);
750   }
751 
ResetROCKSDB_NAMESPACE::RawFilterTester752   Slice Reset(uint32_t len_without_metadata, uint32_t num_lines,
753                uint32_t num_probes, bool fill_ones) {
754     data_.fill(fill_ones ? 0xff : 0);
755     return ResetNoFill(len_without_metadata, num_lines, num_probes);
756   }
757 
ResetWeirdFillROCKSDB_NAMESPACE::RawFilterTester758   Slice ResetWeirdFill(uint32_t len_without_metadata, uint32_t num_lines,
759                         uint32_t num_probes) {
760     for (uint32_t i = 0; i < data_.size(); ++i) {
761       data_[i] = static_cast<char>(0x7b7b >> (i % 7));
762     }
763     return ResetNoFill(len_without_metadata, num_lines, num_probes);
764   }
765 };
766 
TEST_P(FullBloomTest,RawSchema)767 TEST_P(FullBloomTest, RawSchema) {
768   RawFilterTester cft;
769   // Two probes, about 3/4 bits set: ~50% "FP" rate
770   // One 256-byte cache line.
771   OpenRaw(cft.ResetWeirdFill(256, 1, 2));
772   EXPECT_EQ(uint64_t{11384799501900898790U}, PackedMatches());
773 
774   // Two 128-byte cache lines.
775   OpenRaw(cft.ResetWeirdFill(256, 2, 2));
776   EXPECT_EQ(uint64_t{10157853359773492589U}, PackedMatches());
777 
778   // Four 64-byte cache lines.
779   OpenRaw(cft.ResetWeirdFill(256, 4, 2));
780   EXPECT_EQ(uint64_t{7123594913907464682U}, PackedMatches());
781 }
782 
TEST_P(FullBloomTest,CorruptFilters)783 TEST_P(FullBloomTest, CorruptFilters) {
784   RawFilterTester cft;
785 
786   for (bool fill : {false, true}) {
787     // Good filter bits - returns same as fill
788     OpenRaw(cft.Reset(CACHE_LINE_SIZE, 1, 6, fill));
789     ASSERT_EQ(fill, Matches("hello"));
790     ASSERT_EQ(fill, Matches("world"));
791 
792     // Good filter bits - returns same as fill
793     OpenRaw(cft.Reset(CACHE_LINE_SIZE * 3, 3, 6, fill));
794     ASSERT_EQ(fill, Matches("hello"));
795     ASSERT_EQ(fill, Matches("world"));
796 
797     // Good filter bits - returns same as fill
798     // 256 is unusual but legal cache line size
799     OpenRaw(cft.Reset(256 * 3, 3, 6, fill));
800     ASSERT_EQ(fill, Matches("hello"));
801     ASSERT_EQ(fill, Matches("world"));
802 
803     // Good filter bits - returns same as fill
804     // 30 should be max num_probes
805     OpenRaw(cft.Reset(CACHE_LINE_SIZE, 1, 30, fill));
806     ASSERT_EQ(fill, Matches("hello"));
807     ASSERT_EQ(fill, Matches("world"));
808 
809     // Good filter bits - returns same as fill
810     // 1 should be min num_probes
811     OpenRaw(cft.Reset(CACHE_LINE_SIZE, 1, 1, fill));
812     ASSERT_EQ(fill, Matches("hello"));
813     ASSERT_EQ(fill, Matches("world"));
814 
815     // Type 1 trivial filter bits - returns true as if FP by zero probes
816     OpenRaw(cft.Reset(CACHE_LINE_SIZE, 1, 0, fill));
817     ASSERT_TRUE(Matches("hello"));
818     ASSERT_TRUE(Matches("world"));
819 
820     // Type 2 trivial filter bits - returns false as if built from zero keys
821     OpenRaw(cft.Reset(0, 0, 6, fill));
822     ASSERT_FALSE(Matches("hello"));
823     ASSERT_FALSE(Matches("world"));
824 
825     // Type 2 trivial filter bits - returns false as if built from zero keys
826     OpenRaw(cft.Reset(0, 37, 6, fill));
827     ASSERT_FALSE(Matches("hello"));
828     ASSERT_FALSE(Matches("world"));
829 
830     // Type 2 trivial filter bits - returns false as 0 size trumps 0 probes
831     OpenRaw(cft.Reset(0, 0, 0, fill));
832     ASSERT_FALSE(Matches("hello"));
833     ASSERT_FALSE(Matches("world"));
834 
835     // Bad filter bits - returns true for safety
836     // No solution to 0 * x == CACHE_LINE_SIZE
837     OpenRaw(cft.Reset(CACHE_LINE_SIZE, 0, 6, fill));
838     ASSERT_TRUE(Matches("hello"));
839     ASSERT_TRUE(Matches("world"));
840 
841     // Bad filter bits - returns true for safety
842     // Can't have 3 * x == 4 for integer x
843     OpenRaw(cft.Reset(4, 3, 6, fill));
844     ASSERT_TRUE(Matches("hello"));
845     ASSERT_TRUE(Matches("world"));
846 
847     // Bad filter bits - returns true for safety
848     // 97 bytes is not a power of two, so not a legal cache line size
849     OpenRaw(cft.Reset(97 * 3, 3, 6, fill));
850     ASSERT_TRUE(Matches("hello"));
851     ASSERT_TRUE(Matches("world"));
852 
853     // Bad filter bits - returns true for safety
854     // 65 bytes is not a power of two, so not a legal cache line size
855     OpenRaw(cft.Reset(65 * 3, 3, 6, fill));
856     ASSERT_TRUE(Matches("hello"));
857     ASSERT_TRUE(Matches("world"));
858 
859     // Bad filter bits - returns false as if built from zero keys
860     // < 5 bytes overall means missing even metadata
861     OpenRaw(cft.Reset(-1, 3, 6, fill));
862     ASSERT_FALSE(Matches("hello"));
863     ASSERT_FALSE(Matches("world"));
864 
865     OpenRaw(cft.Reset(-5, 3, 6, fill));
866     ASSERT_FALSE(Matches("hello"));
867     ASSERT_FALSE(Matches("world"));
868 
869     // Dubious filter bits - returns same as fill (for now)
870     // 31 is not a useful num_probes, nor generated by RocksDB unless directly
871     // using filter bits API without BloomFilterPolicy.
872     OpenRaw(cft.Reset(CACHE_LINE_SIZE, 1, 31, fill));
873     ASSERT_EQ(fill, Matches("hello"));
874     ASSERT_EQ(fill, Matches("world"));
875 
876     // Dubious filter bits - returns same as fill (for now)
877     // Similar, with 127, largest positive char
878     OpenRaw(cft.Reset(CACHE_LINE_SIZE, 1, 127, fill));
879     ASSERT_EQ(fill, Matches("hello"));
880     ASSERT_EQ(fill, Matches("world"));
881 
882     // Dubious filter bits - returns true (for now)
883     // num_probes set to 128 / -128, lowest negative char
884     // NB: Bug in implementation interprets this as negative and has same
885     // effect as zero probes, but effectively reserves negative char values
886     // for future use.
887     OpenRaw(cft.Reset(CACHE_LINE_SIZE, 1, 128, fill));
888     ASSERT_TRUE(Matches("hello"));
889     ASSERT_TRUE(Matches("world"));
890 
891     // Dubious filter bits - returns true (for now)
892     // Similar, with 255 / -1
893     OpenRaw(cft.Reset(CACHE_LINE_SIZE, 1, 255, fill));
894     ASSERT_TRUE(Matches("hello"));
895     ASSERT_TRUE(Matches("world"));
896   }
897 }
898 
899 INSTANTIATE_TEST_CASE_P(Full, FullBloomTest,
900                         testing::Values(BloomFilterPolicy::kLegacyBloom,
901                                         BloomFilterPolicy::kFastLocalBloom));
902 
903 }  // namespace ROCKSDB_NAMESPACE
904 
main(int argc,char ** argv)905 int main(int argc, char** argv) {
906   ::testing::InitGoogleTest(&argc, argv);
907   ParseCommandLineFlags(&argc, &argv, true);
908 
909   return RUN_ALL_TESTS();
910 }
911 
912 #endif  // GFLAGS
913