1 /*
2 * Bloom filter tracking which scores are present in our arenas
3 * and (more importantly) which are not.
4 */
5
6 #include "stdinc.h"
7 #include "dat.h"
8 #include "fns.h"
9
10 int ignorebloom;
11
12 int
bloominit(Bloom * b,vlong vsize,u8int * data)13 bloominit(Bloom *b, vlong vsize, u8int *data)
14 {
15 ulong size;
16
17 size = vsize;
18 if(size != vsize){ /* truncation */
19 werrstr("bloom data too big");
20 return -1;
21 }
22
23 b->size = size;
24 b->nhash = 32; /* will be fixed by caller on initialization */
25 if(data != nil)
26 if(unpackbloomhead(b, data) < 0)
27 return -1;
28
29 b->bitmask = (b->size<<3) - 1;
30 b->data = data;
31 return 0;
32 }
33
34 void
wbbloomhead(Bloom * b)35 wbbloomhead(Bloom *b)
36 {
37 packbloomhead(b, b->data);
38 }
39
40 Bloom*
readbloom(Part * p)41 readbloom(Part *p)
42 {
43 uchar buf[512];
44 Bloom *b;
45
46 b = vtmallocz(sizeof *b);
47 if(readpart(p, 0, buf, sizeof buf) < 0)
48 return nil;
49 /*
50 * pass buf as b->data so that bloominit
51 * can parse header. won't be used for
52 * accessing bits (cleared below).
53 */
54 if(bloominit(b, 0, buf) < 0){
55 vtfree(b);
56 return nil;
57 }else{
58 /*
59 * default block size is system page size.
60 * the bloom filter is usually very big.
61 * bump the block size up to speed i/o.
62 */
63 if(p->blocksize < (1<<20)){
64 p->blocksize = 1<<20;
65 if(p->blocksize > p->size)
66 p->blocksize = p->size;
67 }
68 }
69 b->part = p;
70 b->data = nil;
71 return b;
72 }
73
74 int
resetbloom(Bloom * b)75 resetbloom(Bloom *b)
76 {
77 uchar *data;
78
79 data = vtmallocz(b->size);
80 b->data = data;
81 if(b->size == MaxBloomSize) /* 2^32 overflows ulong */
82 addstat(StatBloomBits, b->size*8-1);
83 else
84 addstat(StatBloomBits, b->size*8);
85 return 0;
86 }
87
88 int
loadbloom(Bloom * b)89 loadbloom(Bloom *b)
90 {
91 int i, n;
92 uint ones;
93 uchar *data;
94 u32int *a;
95
96 data = vtmallocz(b->size);
97 if(readpart(b->part, 0, data, b->size) < 0){
98 vtfree(b);
99 vtfree(data);
100 return -1;
101 }
102 b->data = data;
103
104 a = (u32int*)b->data;
105 n = b->size/4;
106 ones = 0;
107 for(i=0; i<n; i++)
108 ones += countbits(a[i]);
109 addstat(StatBloomOnes, ones);
110
111 if(b->size == MaxBloomSize) /* 2^32 overflows ulong */
112 addstat(StatBloomBits, b->size*8-1);
113 else
114 addstat(StatBloomBits, b->size*8);
115
116 return 0;
117 }
118
119 int
writebloom(Bloom * b)120 writebloom(Bloom *b)
121 {
122 wbbloomhead(b);
123 if(writepart(b->part, 0, b->data, b->size) < 0)
124 return -1;
125 if(flushpart(b->part) < 0)
126 return -1;
127 return 0;
128 }
129
130 /*
131 * Derive two random 32-bit quantities a, b from the score
132 * and then use a+b*i as a sequence of bloom filter indices.
133 * Michael Mitzenmacher has a recent (2005) paper saying this is okay.
134 * We reserve the bottom bytes (BloomHeadSize*8 bits) for the header.
135 */
136 static void
gethashes(u8int * score,ulong * h)137 gethashes(u8int *score, ulong *h)
138 {
139 int i;
140 u32int a, b;
141
142 a = 0;
143 b = 0;
144 for(i=4; i+8<=VtScoreSize; i+=8){
145 a ^= *(u32int*)(score+i);
146 b ^= *(u32int*)(score+i+4);
147 }
148 if(i+4 <= VtScoreSize) /* 20 is not 4-aligned */
149 a ^= *(u32int*)(score+i);
150 for(i=0; i<BloomMaxHash; i++, a+=b)
151 h[i] = a < BloomHeadSize*8 ? BloomHeadSize*8 : a;
152 }
153
154 static void
_markbloomfilter(Bloom * b,u8int * score)155 _markbloomfilter(Bloom *b, u8int *score)
156 {
157 int i, nnew;
158 ulong h[BloomMaxHash];
159 u32int x, *y, z, *tab;
160
161 trace("markbloomfilter", "markbloomfilter %V", score);
162 gethashes(score, h);
163 nnew = 0;
164 tab = (u32int*)b->data;
165 for(i=0; i<b->nhash; i++){
166 x = h[i];
167 y = &tab[(x&b->bitmask)>>5];
168 z = 1<<(x&31);
169 if(!(*y&z)){
170 nnew++;
171 *y |= z;
172 }
173 }
174 if(nnew)
175 addstat(StatBloomOnes, nnew);
176
177 trace("markbloomfilter", "markbloomfilter exit");
178 }
179
180 static int
_inbloomfilter(Bloom * b,u8int * score)181 _inbloomfilter(Bloom *b, u8int *score)
182 {
183 int i;
184 ulong h[BloomMaxHash], x;
185 u32int *tab;
186
187 gethashes(score, h);
188 tab = (u32int*)b->data;
189 for(i=0; i<b->nhash; i++){
190 x = h[i];
191 if(!(tab[(x&b->bitmask)>>5] & (1<<(x&31))))
192 return 0;
193 }
194 return 1;
195 }
196
197 int
inbloomfilter(Bloom * b,u8int * score)198 inbloomfilter(Bloom *b, u8int *score)
199 {
200 int r;
201
202 if(b == nil || b->data == nil)
203 return 1;
204
205 if(ignorebloom)
206 return 1;
207
208 rlock(&b->lk);
209 r = _inbloomfilter(b, score);
210 runlock(&b->lk);
211 addstat(StatBloomLookup, 1);
212 if(r)
213 addstat(StatBloomMiss, 1);
214 else
215 addstat(StatBloomHit, 1);
216 return r;
217 }
218
219 void
markbloomfilter(Bloom * b,u8int * score)220 markbloomfilter(Bloom *b, u8int *score)
221 {
222 if(b == nil || b->data == nil)
223 return;
224
225 rlock(&b->lk);
226 qlock(&b->mod);
227 _markbloomfilter(b, score);
228 qunlock(&b->mod);
229 runlock(&b->lk);
230 }
231
232 void
markbloomfiltern(Bloom * b,u8int score[][20],int n)233 markbloomfiltern(Bloom *b, u8int score[][20], int n)
234 {
235 int i;
236
237 if(b == nil || b->data == nil)
238 return;
239
240 rlock(&b->lk);
241 qlock(&b->mod);
242 for(i=0; i<n; i++)
243 _markbloomfilter(b, score[i]);
244 qunlock(&b->mod);
245 runlock(&b->lk);
246 }
247
248 static void
bloomwriteproc(void * v)249 bloomwriteproc(void *v)
250 {
251 int ret;
252 Bloom *b;
253
254 threadsetname("bloomwriteproc");
255 b = v;
256 for(;;){
257 recv(b->writechan, 0);
258 if((ret=writebloom(b)) < 0)
259 fprint(2, "oops! writing bloom: %r\n");
260 else
261 ret = 0;
262 sendul(b->writedonechan, ret);
263 }
264 }
265
266 void
startbloomproc(Bloom * b)267 startbloomproc(Bloom *b)
268 {
269 b->writechan = chancreate(sizeof(void*), 0);
270 b->writedonechan = chancreate(sizeof(ulong), 0);
271 vtproc(bloomwriteproc, b);
272 }
273