1 /*-------------------------------------------------------------------------
2  *
3  * bloomfilter.c
4  *		Space-efficient set membership testing
5  *
6  * A Bloom filter is a probabilistic data structure that is used to test an
7  * element's membership of a set.  False positives are possible, but false
8  * negatives are not; a test of membership of the set returns either "possibly
9  * in set" or "definitely not in set".  This is typically very space efficient,
10  * which can be a decisive advantage.
11  *
12  * Elements can be added to the set, but not removed.  The more elements that
13  * are added, the larger the probability of false positives.  Caller must hint
14  * an estimated total size of the set when the Bloom filter is initialized.
15  * This is used to balance the use of memory against the final false positive
16  * rate.
17  *
18  * The implementation is well suited to data synchronization problems between
19  * unordered sets, especially where predictable performance is important and
20  * some false positives are acceptable.  It's also well suited to cache
21  * filtering problems where a relatively small and/or low cardinality set is
22  * fingerprinted, especially when many subsequent membership tests end up
23  * indicating that values of interest are not present.  That should save the
24  * caller many authoritative lookups, such as expensive probes of a much larger
25  * on-disk structure.
26  *
27  * Copyright (c) 2018-2021, PostgreSQL Global Development Group
28  *
29  * IDENTIFICATION
30  *	  src/backend/lib/bloomfilter.c
31  *
32  *-------------------------------------------------------------------------
33  */
34 #include "postgres.h"
35 
36 #include <math.h>
37 
38 #include "common/hashfn.h"
39 #include "lib/bloomfilter.h"
40 #include "port/pg_bitutils.h"
41 
42 #define MAX_HASH_FUNCS		10
43 
44 struct bloom_filter
45 {
46 	/* K hash functions are used, seeded by caller's seed */
47 	int			k_hash_funcs;
48 	uint64		seed;
49 	/* m is bitset size, in bits.  Must be a power of two <= 2^32.  */
50 	uint64		m;
51 	unsigned char bitset[FLEXIBLE_ARRAY_MEMBER];
52 };
53 
54 static int	my_bloom_power(uint64 target_bitset_bits);
55 static int	optimal_k(uint64 bitset_bits, int64 total_elems);
56 static void k_hashes(bloom_filter *filter, uint32 *hashes, unsigned char *elem,
57 					 size_t len);
58 static inline uint32 mod_m(uint32 a, uint64 m);
59 
60 /*
61  * Create Bloom filter in caller's memory context.  We aim for a false positive
62  * rate of between 1% and 2% when bitset size is not constrained by memory
63  * availability.
64  *
65  * total_elems is an estimate of the final size of the set.  It should be
66  * approximately correct, but the implementation can cope well with it being
67  * off by perhaps a factor of five or more.  See "Bloom Filters in
68  * Probabilistic Verification" (Dillinger & Manolios, 2004) for details of why
69  * this is the case.
70  *
71  * bloom_work_mem is sized in KB, in line with the general work_mem convention.
72  * This determines the size of the underlying bitset (trivial bookkeeping space
73  * isn't counted).  The bitset is always sized as a power of two number of
74  * bits, and the largest possible bitset is 512MB (2^32 bits).  The
75  * implementation allocates only enough memory to target its standard false
76  * positive rate, using a simple formula with caller's total_elems estimate as
77  * an input.  The bitset might be as small as 1MB, even when bloom_work_mem is
78  * much higher.
79  *
80  * The Bloom filter is seeded using a value provided by the caller.  Using a
81  * distinct seed value on every call makes it unlikely that the same false
82  * positives will reoccur when the same set is fingerprinted a second time.
83  * Callers that don't care about this pass a constant as their seed, typically
84  * 0.  Callers can use a pseudo-random seed in the range of 0 - INT_MAX by
85  * calling random().
86  */
87 bloom_filter *
bloom_create(int64 total_elems,int bloom_work_mem,uint64 seed)88 bloom_create(int64 total_elems, int bloom_work_mem, uint64 seed)
89 {
90 	bloom_filter *filter;
91 	int			bloom_power;
92 	uint64		bitset_bytes;
93 	uint64		bitset_bits;
94 
95 	/*
96 	 * Aim for two bytes per element; this is sufficient to get a false
97 	 * positive rate below 1%, independent of the size of the bitset or total
98 	 * number of elements.  Also, if rounding down the size of the bitset to
99 	 * the next lowest power of two turns out to be a significant drop, the
100 	 * false positive rate still won't exceed 2% in almost all cases.
101 	 */
102 	bitset_bytes = Min(bloom_work_mem * UINT64CONST(1024), total_elems * 2);
103 	bitset_bytes = Max(1024 * 1024, bitset_bytes);
104 
105 	/*
106 	 * Size in bits should be the highest power of two <= target.  bitset_bits
107 	 * is uint64 because PG_UINT32_MAX is 2^32 - 1, not 2^32
108 	 */
109 	bloom_power = my_bloom_power(bitset_bytes * BITS_PER_BYTE);
110 	bitset_bits = UINT64CONST(1) << bloom_power;
111 	bitset_bytes = bitset_bits / BITS_PER_BYTE;
112 
113 	/* Allocate bloom filter with unset bitset */
114 	filter = palloc0(offsetof(bloom_filter, bitset) +
115 					 sizeof(unsigned char) * bitset_bytes);
116 	filter->k_hash_funcs = optimal_k(bitset_bits, total_elems);
117 	filter->seed = seed;
118 	filter->m = bitset_bits;
119 
120 	return filter;
121 }
122 
123 /*
124  * Free Bloom filter
125  */
126 void
bloom_free(bloom_filter * filter)127 bloom_free(bloom_filter *filter)
128 {
129 	pfree(filter);
130 }
131 
132 /*
133  * Add element to Bloom filter
134  */
135 void
bloom_add_element(bloom_filter * filter,unsigned char * elem,size_t len)136 bloom_add_element(bloom_filter *filter, unsigned char *elem, size_t len)
137 {
138 	uint32		hashes[MAX_HASH_FUNCS];
139 	int			i;
140 
141 	k_hashes(filter, hashes, elem, len);
142 
143 	/* Map a bit-wise address to a byte-wise address + bit offset */
144 	for (i = 0; i < filter->k_hash_funcs; i++)
145 	{
146 		filter->bitset[hashes[i] >> 3] |= 1 << (hashes[i] & 7);
147 	}
148 }
149 
150 /*
151  * Test if Bloom filter definitely lacks element.
152  *
153  * Returns true if the element is definitely not in the set of elements
154  * observed by bloom_add_element().  Otherwise, returns false, indicating that
155  * element is probably present in set.
156  */
157 bool
bloom_lacks_element(bloom_filter * filter,unsigned char * elem,size_t len)158 bloom_lacks_element(bloom_filter *filter, unsigned char *elem, size_t len)
159 {
160 	uint32		hashes[MAX_HASH_FUNCS];
161 	int			i;
162 
163 	k_hashes(filter, hashes, elem, len);
164 
165 	/* Map a bit-wise address to a byte-wise address + bit offset */
166 	for (i = 0; i < filter->k_hash_funcs; i++)
167 	{
168 		if (!(filter->bitset[hashes[i] >> 3] & (1 << (hashes[i] & 7))))
169 			return true;
170 	}
171 
172 	return false;
173 }
174 
175 /*
176  * What proportion of bits are currently set?
177  *
178  * Returns proportion, expressed as a multiplier of filter size.  That should
179  * generally be close to 0.5, even when we have more than enough memory to
180  * ensure a false positive rate within target 1% to 2% band, since more hash
181  * functions are used as more memory is available per element.
182  *
183  * This is the only instrumentation that is low overhead enough to appear in
184  * debug traces.  When debugging Bloom filter code, it's likely to be far more
185  * interesting to directly test the false positive rate.
186  */
187 double
bloom_prop_bits_set(bloom_filter * filter)188 bloom_prop_bits_set(bloom_filter *filter)
189 {
190 	int			bitset_bytes = filter->m / BITS_PER_BYTE;
191 	uint64		bits_set = pg_popcount((char *) filter->bitset, bitset_bytes);
192 
193 	return bits_set / (double) filter->m;
194 }
195 
196 /*
197  * Which element in the sequence of powers of two is less than or equal to
198  * target_bitset_bits?
199  *
200  * Value returned here must be generally safe as the basis for actual bitset
201  * size.
202  *
203  * Bitset is never allowed to exceed 2 ^ 32 bits (512MB).  This is sufficient
204  * for the needs of all current callers, and allows us to use 32-bit hash
205  * functions.  It also makes it easy to stay under the MaxAllocSize restriction
206  * (caller needs to leave room for non-bitset fields that appear before
207  * flexible array member, so a 1GB bitset would use an allocation that just
208  * exceeds MaxAllocSize).
209  */
210 static int
my_bloom_power(uint64 target_bitset_bits)211 my_bloom_power(uint64 target_bitset_bits)
212 {
213 	int			bloom_power = -1;
214 
215 	while (target_bitset_bits > 0 && bloom_power < 32)
216 	{
217 		bloom_power++;
218 		target_bitset_bits >>= 1;
219 	}
220 
221 	return bloom_power;
222 }
223 
224 /*
225  * Determine optimal number of hash functions based on size of filter in bits,
226  * and projected total number of elements.  The optimal number is the number
227  * that minimizes the false positive rate.
228  */
229 static int
optimal_k(uint64 bitset_bits,int64 total_elems)230 optimal_k(uint64 bitset_bits, int64 total_elems)
231 {
232 	int			k = rint(log(2.0) * bitset_bits / total_elems);
233 
234 	return Max(1, Min(k, MAX_HASH_FUNCS));
235 }
236 
237 /*
238  * Generate k hash values for element.
239  *
240  * Caller passes array, which is filled-in with k values determined by hashing
241  * caller's element.
242  *
243  * Only 2 real independent hash functions are actually used to support an
244  * interface of up to MAX_HASH_FUNCS hash functions; enhanced double hashing is
245  * used to make this work.  The main reason we prefer enhanced double hashing
246  * to classic double hashing is that the latter has an issue with collisions
247  * when using power of two sized bitsets.  See Dillinger & Manolios for full
248  * details.
249  */
250 static void
k_hashes(bloom_filter * filter,uint32 * hashes,unsigned char * elem,size_t len)251 k_hashes(bloom_filter *filter, uint32 *hashes, unsigned char *elem, size_t len)
252 {
253 	uint64		hash;
254 	uint32		x,
255 				y;
256 	uint64		m;
257 	int			i;
258 
259 	/* Use 64-bit hashing to get two independent 32-bit hashes */
260 	hash = DatumGetUInt64(hash_any_extended(elem, len, filter->seed));
261 	x = (uint32) hash;
262 	y = (uint32) (hash >> 32);
263 	m = filter->m;
264 
265 	x = mod_m(x, m);
266 	y = mod_m(y, m);
267 
268 	/* Accumulate hashes */
269 	hashes[0] = x;
270 	for (i = 1; i < filter->k_hash_funcs; i++)
271 	{
272 		x = mod_m(x + y, m);
273 		y = mod_m(y + i, m);
274 
275 		hashes[i] = x;
276 	}
277 }
278 
279 /*
280  * Calculate "val MOD m" inexpensively.
281  *
282  * Assumes that m (which is bitset size) is a power of two.
283  *
284  * Using a power of two number of bits for bitset size allows us to use bitwise
285  * AND operations to calculate the modulo of a hash value.  It's also a simple
286  * way of avoiding the modulo bias effect.
287  */
288 static inline uint32
mod_m(uint32 val,uint64 m)289 mod_m(uint32 val, uint64 m)
290 {
291 	Assert(m <= PG_UINT32_MAX + UINT64CONST(1));
292 	Assert(((m - 1) & m) == 0);
293 
294 	return val & (m - 1);
295 }
296