1 /*
2  * Bloom filter tracking which scores are present in our arenas
3  * and (more importantly) which are not.
4  */
5 
6 #include "stdinc.h"
7 #include "dat.h"
8 #include "fns.h"
9 
10 int ignorebloom;
11 
12 int
bloominit(Bloom * b,vlong vsize,u8int * data)13 bloominit(Bloom *b, vlong vsize, u8int *data)
14 {
15 	ulong size;
16 
17 	size = vsize;
18 	if(size != vsize){	/* truncation */
19 		werrstr("bloom data too big");
20 		return -1;
21 	}
22 
23 	b->size = size;
24 	b->nhash = 32;	/* will be fixed by caller on initialization */
25 	if(data != nil)
26 		if(unpackbloomhead(b, data) < 0)
27 			return -1;
28 
29 	b->bitmask = (b->size<<3) - 1;
30 	b->data = data;
31 	return 0;
32 }
33 
34 void
wbbloomhead(Bloom * b)35 wbbloomhead(Bloom *b)
36 {
37 	packbloomhead(b, b->data);
38 }
39 
40 Bloom*
readbloom(Part * p)41 readbloom(Part *p)
42 {
43 	uchar buf[512];
44 	Bloom *b;
45 
46 	b = vtmallocz(sizeof *b);
47 	if(readpart(p, 0, buf, sizeof buf) < 0)
48 		return nil;
49 	/*
50 	 * pass buf as b->data so that bloominit
51 	 * can parse header.  won't be used for
52 	 * accessing bits (cleared below).
53 	 */
54 	if(bloominit(b, 0, buf) < 0){
55 		vtfree(b);
56 		return nil;
57 	}else{
58 		/*
59 		 * default block size is system page size.
60 		 * the bloom filter is usually very big.
61 		 * bump the block size up to speed i/o.
62 		 */
63 		if(p->blocksize < (1<<20)){
64 			p->blocksize = 1<<20;
65 			if(p->blocksize > p->size)
66 				p->blocksize = p->size;
67 		}
68 	}
69 	b->part = p;
70 	b->data = nil;
71 	return b;
72 }
73 
74 int
resetbloom(Bloom * b)75 resetbloom(Bloom *b)
76 {
77 	uchar *data;
78 
79 	data = vtmallocz(b->size);
80 	b->data = data;
81 	if(b->size == MaxBloomSize)	/* 2^32 overflows ulong */
82 		addstat(StatBloomBits, b->size*8-1);
83 	else
84 		addstat(StatBloomBits, b->size*8);
85 	return 0;
86 }
87 
88 int
loadbloom(Bloom * b)89 loadbloom(Bloom *b)
90 {
91 	int i, n;
92 	uint ones;
93 	uchar *data;
94 	u32int *a;
95 
96 	data = vtmallocz(b->size);
97 	if(readpart(b->part, 0, data, b->size) < 0){
98 		vtfree(b);
99 		vtfree(data);
100 		return -1;
101 	}
102 	b->data = data;
103 
104 	a = (u32int*)b->data;
105 	n = b->size/4;
106 	ones = 0;
107 	for(i=0; i<n; i++)
108 		ones += countbits(a[i]);
109 	addstat(StatBloomOnes, ones);
110 
111 	if(b->size == MaxBloomSize)	/* 2^32 overflows ulong */
112 		addstat(StatBloomBits, b->size*8-1);
113 	else
114 		addstat(StatBloomBits, b->size*8);
115 
116 	return 0;
117 }
118 
119 int
writebloom(Bloom * b)120 writebloom(Bloom *b)
121 {
122 	wbbloomhead(b);
123 	if(writepart(b->part, 0, b->data, b->size) < 0)
124 		return -1;
125 	if(flushpart(b->part) < 0)
126 		return -1;
127 	return 0;
128 }
129 
130 /*
131  * Derive two random 32-bit quantities a, b from the score
132  * and then use a+b*i as a sequence of bloom filter indices.
133  * Michael Mitzenmacher has a recent (2005) paper saying this is okay.
134  * We reserve the bottom bytes (BloomHeadSize*8 bits) for the header.
135  */
136 static void
gethashes(u8int * score,ulong * h)137 gethashes(u8int *score, ulong *h)
138 {
139 	int i;
140 	u32int a, b;
141 
142 	a = 0;
143 	b = 0;
144 	for(i=4; i+8<=VtScoreSize; i+=8){
145 		a ^= *(u32int*)(score+i);
146 		b ^= *(u32int*)(score+i+4);
147 	}
148 	if(i+4 <= VtScoreSize)	/* 20 is not 4-aligned */
149 		a ^= *(u32int*)(score+i);
150 	for(i=0; i<BloomMaxHash; i++, a+=b)
151 		h[i] = a < BloomHeadSize*8 ? BloomHeadSize*8 : a;
152 }
153 
154 static void
_markbloomfilter(Bloom * b,u8int * score)155 _markbloomfilter(Bloom *b, u8int *score)
156 {
157 	int i, nnew;
158 	ulong h[BloomMaxHash];
159 	u32int x, *y, z, *tab;
160 
161 	trace("markbloomfilter", "markbloomfilter %V", score);
162 	gethashes(score, h);
163 	nnew = 0;
164 	tab = (u32int*)b->data;
165 	for(i=0; i<b->nhash; i++){
166 		x = h[i];
167 		y = &tab[(x&b->bitmask)>>5];
168 		z = 1<<(x&31);
169 		if(!(*y&z)){
170 			nnew++;
171 			*y |= z;
172 		}
173 	}
174 	if(nnew)
175 		addstat(StatBloomOnes, nnew);
176 
177 	trace("markbloomfilter", "markbloomfilter exit");
178 }
179 
180 static int
_inbloomfilter(Bloom * b,u8int * score)181 _inbloomfilter(Bloom *b, u8int *score)
182 {
183 	int i;
184 	ulong h[BloomMaxHash], x;
185 	u32int *tab;
186 
187 	gethashes(score, h);
188 	tab = (u32int*)b->data;
189 	for(i=0; i<b->nhash; i++){
190 		x = h[i];
191 		if(!(tab[(x&b->bitmask)>>5] & (1<<(x&31))))
192 			return 0;
193 	}
194 	return 1;
195 }
196 
197 int
inbloomfilter(Bloom * b,u8int * score)198 inbloomfilter(Bloom *b, u8int *score)
199 {
200 	int r;
201 
202 	if(b == nil || b->data == nil)
203 		return 1;
204 
205 	if(ignorebloom)
206 		return 1;
207 
208 	rlock(&b->lk);
209 	r = _inbloomfilter(b, score);
210 	runlock(&b->lk);
211 	addstat(StatBloomLookup, 1);
212 	if(r)
213 		addstat(StatBloomMiss, 1);
214 	else
215 		addstat(StatBloomHit, 1);
216 	return r;
217 }
218 
219 void
markbloomfilter(Bloom * b,u8int * score)220 markbloomfilter(Bloom *b, u8int *score)
221 {
222 	if(b == nil || b->data == nil)
223 		return;
224 
225 	rlock(&b->lk);
226 	qlock(&b->mod);
227 	_markbloomfilter(b, score);
228 	qunlock(&b->mod);
229 	runlock(&b->lk);
230 }
231 
232 void
markbloomfiltern(Bloom * b,u8int score[][20],int n)233 markbloomfiltern(Bloom *b, u8int score[][20], int n)
234 {
235 	int i;
236 
237 	if(b == nil || b->data == nil)
238 		return;
239 
240 	rlock(&b->lk);
241 	qlock(&b->mod);
242 	for(i=0; i<n; i++)
243 		_markbloomfilter(b, score[i]);
244 	qunlock(&b->mod);
245 	runlock(&b->lk);
246 }
247 
248 static void
bloomwriteproc(void * v)249 bloomwriteproc(void *v)
250 {
251 	int ret;
252 	Bloom *b;
253 
254 	threadsetname("bloomwriteproc");
255 	b = v;
256 	for(;;){
257 		recv(b->writechan, 0);
258 		if((ret=writebloom(b)) < 0)
259 			fprint(2, "oops! writing bloom: %r\n");
260 		else
261 			ret = 0;
262 		sendul(b->writedonechan, ret);
263 	}
264 }
265 
266 void
startbloomproc(Bloom * b)267 startbloomproc(Bloom *b)
268 {
269 	b->writechan = chancreate(sizeof(void*), 0);
270 	b->writedonechan = chancreate(sizeof(ulong), 0);
271 	vtproc(bloomwriteproc, b);
272 }
273