1 // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
2 //  This source code is licensed under both the GPLv2 (found in the
3 //  COPYING file in the root directory) and Apache 2.0 License
4 //  (found in the LICENSE.Apache file in the root directory).
5 
6 #pragma once
7 
8 #include <array>
9 #include <string>
10 #include "port/port.h"
11 #include "rocksdb/slice.h"
12 #include "table/multiget_context.h"
13 #include "util/hash.h"
14 
15 #include <atomic>
16 #include <memory>
17 
18 namespace ROCKSDB_NAMESPACE {
19 
20 class Slice;
21 class Allocator;
22 class Logger;
23 
24 // A Bloom filter intended only to be used in memory, never serialized in a way
25 // that could lead to schema incompatibility. Supports opt-in lock-free
26 // concurrent access.
27 //
28 // This implementation is also intended for applications generally preferring
29 // speed vs. maximum accuracy: roughly 0.9x BF op latency for 1.1x FP rate.
30 // For 1% FP rate, that means that the latency of a look-up triggered by an FP
31 // should be less than roughly 100x the cost of a Bloom filter op.
32 //
33 // For simplicity and performance, the current implementation requires
34 // num_probes to be a multiple of two and <= 10.
35 //
36 class DynamicBloom {
37  public:
38   // allocator: pass allocator to bloom filter, hence trace the usage of memory
39   // total_bits: fixed total bits for the bloom
40   // num_probes: number of hash probes for a single key
41   // hash_func:  customized hash function
42   // huge_page_tlb_size:  if >0, try to allocate bloom bytes from huge page TLB
43   //                      within this page size. Need to reserve huge pages for
44   //                      it to be allocated, like:
45   //                         sysctl -w vm.nr_hugepages=20
46   //                     See linux doc Documentation/vm/hugetlbpage.txt
47   explicit DynamicBloom(Allocator* allocator, uint32_t total_bits,
48                         uint32_t num_probes = 6, size_t huge_page_tlb_size = 0,
49                         Logger* logger = nullptr);
50 
~DynamicBloom()51   ~DynamicBloom() {}
52 
53   // Assuming single threaded access to this function.
54   void Add(const Slice& key);
55 
56   // Like Add, but may be called concurrent with other functions.
57   void AddConcurrently(const Slice& key);
58 
59   // Assuming single threaded access to this function.
60   void AddHash(uint32_t hash);
61 
62   // Like AddHash, but may be called concurrent with other functions.
63   void AddHashConcurrently(uint32_t hash);
64 
65   // Multithreaded access to this function is OK
66   bool MayContain(const Slice& key) const;
67 
68   void MayContain(int num_keys, Slice** keys, bool* may_match) const;
69 
70   // Multithreaded access to this function is OK
71   bool MayContainHash(uint32_t hash) const;
72 
73   void Prefetch(uint32_t h);
74 
75  private:
76   // Length of the structure, in 64-bit words. For this structure, "word"
77   // will always refer to 64-bit words.
78   uint32_t kLen;
79   // We make the k probes in pairs, two for each 64-bit read/write. Thus,
80   // this stores k/2, the number of words to double-probe.
81   const uint32_t kNumDoubleProbes;
82 
83   std::atomic<uint64_t>* data_;
84 
85   // or_func(ptr, mask) should effect *ptr |= mask with the appropriate
86   // concurrency safety, working with bytes.
87   template <typename OrFunc>
88   void AddHash(uint32_t hash, const OrFunc& or_func);
89 
90   bool DoubleProbe(uint32_t h32, size_t a) const;
91 };
92 
Add(const Slice & key)93 inline void DynamicBloom::Add(const Slice& key) { AddHash(BloomHash(key)); }
94 
AddConcurrently(const Slice & key)95 inline void DynamicBloom::AddConcurrently(const Slice& key) {
96   AddHashConcurrently(BloomHash(key));
97 }
98 
AddHash(uint32_t hash)99 inline void DynamicBloom::AddHash(uint32_t hash) {
100   AddHash(hash, [](std::atomic<uint64_t>* ptr, uint64_t mask) {
101     ptr->store(ptr->load(std::memory_order_relaxed) | mask,
102                std::memory_order_relaxed);
103   });
104 }
105 
AddHashConcurrently(uint32_t hash)106 inline void DynamicBloom::AddHashConcurrently(uint32_t hash) {
107   AddHash(hash, [](std::atomic<uint64_t>* ptr, uint64_t mask) {
108     // Happens-before between AddHash and MaybeContains is handled by
109     // access to versions_->LastSequence(), so all we have to do here is
110     // avoid races (so we don't give the compiler a license to mess up
111     // our code) and not lose bits.  std::memory_order_relaxed is enough
112     // for that.
113     if ((mask & ptr->load(std::memory_order_relaxed)) != mask) {
114       ptr->fetch_or(mask, std::memory_order_relaxed);
115     }
116   });
117 }
118 
MayContain(const Slice & key)119 inline bool DynamicBloom::MayContain(const Slice& key) const {
120   return (MayContainHash(BloomHash(key)));
121 }
122 
MayContain(int num_keys,Slice ** keys,bool * may_match)123 inline void DynamicBloom::MayContain(int num_keys, Slice** keys,
124                                      bool* may_match) const {
125   std::array<uint32_t, MultiGetContext::MAX_BATCH_SIZE> hashes;
126   std::array<size_t, MultiGetContext::MAX_BATCH_SIZE> byte_offsets;
127   for (int i = 0; i < num_keys; ++i) {
128     hashes[i] = BloomHash(*keys[i]);
129     size_t a = fastrange32(kLen, hashes[i]);
130     PREFETCH(data_ + a, 0, 3);
131     byte_offsets[i] = a;
132   }
133 
134   for (int i = 0; i < num_keys; i++) {
135     may_match[i] = DoubleProbe(hashes[i], byte_offsets[i]);
136   }
137 }
138 
139 #if defined(_MSC_VER)
140 #pragma warning(push)
141 // local variable is initialized but not referenced
142 #pragma warning(disable : 4189)
143 #endif
Prefetch(uint32_t h32)144 inline void DynamicBloom::Prefetch(uint32_t h32) {
145   size_t a = fastrange32(kLen, h32);
146   PREFETCH(data_ + a, 0, 3);
147 }
148 #if defined(_MSC_VER)
149 #pragma warning(pop)
150 #endif
151 
152 // Speed hacks in this implementation:
153 // * Uses fastrange instead of %
154 // * Minimum logic to determine first (and all) probed memory addresses.
155 //   (Uses constant bit-xor offsets from the starting probe address.)
156 // * (Major) Two probes per 64-bit memory fetch/write.
157 //   Code simplification / optimization: only allow even number of probes.
158 // * Very fast and effective (murmur-like) hash expansion/re-mixing. (At
159 // least on recent CPUs, integer multiplication is very cheap. Each 64-bit
160 // remix provides five pairs of bit addresses within a uint64_t.)
161 //   Code simplification / optimization: only allow up to 10 probes, from a
162 //   single 64-bit remix.
163 //
164 // The FP rate penalty for this implementation, vs. standard Bloom filter, is
165 // roughly 1.12x on top of the 1.15x penalty for a 512-bit cache-local Bloom.
166 // This implementation does not explicitly use the cache line size, but is
167 // effectively cache-local (up to 16 probes) because of the bit-xor offsetting.
168 //
169 // NB: could easily be upgraded to support a 64-bit hash and
170 // total_bits > 2^32 (512MB). (The latter is a bad idea without the former,
171 // because of false positives.)
172 
MayContainHash(uint32_t h32)173 inline bool DynamicBloom::MayContainHash(uint32_t h32) const {
174   size_t a = fastrange32(kLen, h32);
175   PREFETCH(data_ + a, 0, 3);
176   return DoubleProbe(h32, a);
177 }
178 
DoubleProbe(uint32_t h32,size_t byte_offset)179 inline bool DynamicBloom::DoubleProbe(uint32_t h32, size_t byte_offset) const {
180   // Expand/remix with 64-bit golden ratio
181   uint64_t h = 0x9e3779b97f4a7c13ULL * h32;
182   for (unsigned i = 0;; ++i) {
183     // Two bit probes per uint64_t probe
184     uint64_t mask =
185         ((uint64_t)1 << (h & 63)) | ((uint64_t)1 << ((h >> 6) & 63));
186     uint64_t val = data_[byte_offset ^ i].load(std::memory_order_relaxed);
187     if (i + 1 >= kNumDoubleProbes) {
188       return (val & mask) == mask;
189     } else if ((val & mask) != mask) {
190       return false;
191     }
192     h = (h >> 12) | (h << 52);
193   }
194 }
195 
196 template <typename OrFunc>
AddHash(uint32_t h32,const OrFunc & or_func)197 inline void DynamicBloom::AddHash(uint32_t h32, const OrFunc& or_func) {
198   size_t a = fastrange32(kLen, h32);
199   PREFETCH(data_ + a, 0, 3);
200   // Expand/remix with 64-bit golden ratio
201   uint64_t h = 0x9e3779b97f4a7c13ULL * h32;
202   for (unsigned i = 0;; ++i) {
203     // Two bit probes per uint64_t probe
204     uint64_t mask =
205         ((uint64_t)1 << (h & 63)) | ((uint64_t)1 << ((h >> 6) & 63));
206     or_func(&data_[a ^ i], mask);
207     if (i + 1 >= kNumDoubleProbes) {
208       return;
209     }
210     h = (h >> 12) | (h << 52);
211   }
212 }
213 
214 }  // namespace ROCKSDB_NAMESPACE
215