1 /*
2  * Index, mapping scores to log positions.
3  *
4  * The index is made up of some number of index sections, each of
5  * which is typically stored on a different disk.  The blocks in all the
6  * index sections are logically numbered, with each index section
7  * responsible for a range of blocks.  Blocks are typically 8kB.
8  *
9  * The N index blocks are treated as a giant hash table.  The top 32 bits
10  * of score are used as the key for a lookup.  Each index block holds
11  * one hash bucket, which is responsible for ceil(2^32 / N) of the key space.
12  *
13  * The index is sized so that a particular bucket is extraordinarily
14  * unlikely to overflow: assuming compressed data blocks are 4kB
15  * on disk, and assuming each block has a 40 byte index entry,
16  * the index data will be 1% of the total data.  Since scores are essentially
17  * random, all buckets should be about the same fullness.
18  * A factor of 5 gives us a wide comfort boundary to account for
19  * random variation.  So the index disk space should be 5% of the arena disk space.
20  */
21 
22 #include "stdinc.h"
23 #include "dat.h"
24 #include "fns.h"
25 
26 static int	initindex1(Index*);
27 static ISect	*initisect1(ISect *is);
28 
29 #define KEY(k,d)	((d) ? (k)>>(32-(d)) : 0)
30 
31 static char IndexMagic[] = "venti index configuration";
32 
33 Index*
initindex(char * name,ISect ** sects,int n)34 initindex(char *name, ISect **sects, int n)
35 {
36 	IFile f;
37 	Index *ix;
38 	ISect *is;
39 	u32int last, blocksize, tabsize;
40 	int i;
41 
42 	if(n <= 0){
43 fprint(2, "bad n\n");
44 		seterr(EOk, "no index sections to initialize index");
45 		return nil;
46 	}
47 	ix = MKZ(Index);
48 	if(ix == nil){
49 fprint(2, "no mem\n");
50 		seterr(EOk, "can't initialize index: out of memory");
51 		freeindex(ix);
52 		return nil;
53 	}
54 
55 	tabsize = sects[0]->tabsize;
56 	if(partifile(&f, sects[0]->part, sects[0]->tabbase, tabsize) < 0)
57 		return nil;
58 	if(parseindex(&f, ix) < 0){
59 		freeifile(&f);
60 		freeindex(ix);
61 		return nil;
62 	}
63 	freeifile(&f);
64 	if(namecmp(ix->name, name) != 0){
65 		seterr(ECorrupt, "mismatched index name: found %s expected %s", ix->name, name);
66 		return nil;
67 	}
68 	if(ix->nsects != n){
69 		seterr(ECorrupt, "mismatched number index sections: found %d expected %d", n, ix->nsects);
70 		freeindex(ix);
71 		return nil;
72 	}
73 	ix->sects = sects;
74 	last = 0;
75 	blocksize = ix->blocksize;
76 	for(i = 0; i < ix->nsects; i++){
77 		is = sects[i];
78 		if(namecmp(is->index, ix->name) != 0) {
79 			seterr(ECorrupt, "%s: index name is %s, not %s",
80 				sects[i]->part->name, is->index, ix->name);
81 		bad:
82 			freeindex(ix);
83 			return nil;
84 		}
85 		if(is->blocksize != blocksize) {
86 			seterr(ECorrupt, "%s: blocksize is %d, not %d",
87 				sects[i]->part->name, (int)is->blocksize, (int)blocksize);
88 			goto bad;
89 		}
90 		if(is->tabsize != tabsize) {
91 			seterr(ECorrupt, "%s: tabsize is %d, not %d",
92 				sects[i]->part->name, (int)is->tabsize, (int)tabsize);
93 			goto bad;
94 		}
95 		if(namecmp(is->name, ix->smap[i].name) != 0) {
96 			seterr(ECorrupt, "%s: name is %s, not %s",
97 				sects[i]->part->name, is->name, ix->smap[i].name);
98 			goto bad;
99 		}
100 		if(is->start != ix->smap[i].start || is->stop != ix->smap[i].stop) {
101 			seterr(ECorrupt, "%s: range is %lld,%lld, not %lld,%lld",
102 				sects[i]->part->name, is->start, is->stop,
103 				ix->smap[i].start, ix->smap[i].stop);
104 			goto bad;
105 		}
106 		if(is->start > is->stop) {
107 			seterr(ECorrupt, "%s: invalid range %lld,%lld",
108 				sects[i]->part->name, is->start, is->stop);
109 			goto bad;
110 		}
111 		if(is->start != last || is->start > is->stop) {
112 			seterr(ECorrupt, "%s: range %lld-%lld, but last section ended at %lld",
113 				sects[i]->part->name, is->start, is->stop, last);
114 			goto bad;
115 		}
116 		last = is->stop;
117 	}
118 	ix->tabsize = tabsize;
119 	ix->buckets = last;
120 
121 	if(initindex1(ix) < 0){
122 		freeindex(ix);
123 		return nil;
124 	}
125 
126 	ix->arenas = MKNZ(Arena*, ix->narenas);
127 	if(maparenas(ix->amap, ix->arenas, ix->narenas, ix->name) < 0){
128 		freeindex(ix);
129 		return nil;
130 	}
131 
132 	return ix;
133 }
134 
135 static int
initindex1(Index * ix)136 initindex1(Index *ix)
137 {
138 	u32int buckets;
139 
140 	ix->div = (((u64int)1 << 32) + ix->buckets - 1) / ix->buckets;
141 	buckets = (((u64int)1 << 32) - 1) / ix->div + 1;
142 	if(buckets != ix->buckets){
143 		seterr(ECorrupt, "inconsistent math for divisor and buckets in %s", ix->name);
144 		return -1;
145 	}
146 
147 	return 0;
148 }
149 
150 int
wbindex(Index * ix)151 wbindex(Index *ix)
152 {
153 	Fmt f;
154 	ZBlock *b;
155 	int i;
156 
157 	if(ix->nsects == 0){
158 		seterr(EOk, "no sections in index %s", ix->name);
159 		return -1;
160 	}
161 	b = alloczblock(ix->tabsize, 1, ix->blocksize);
162 	if(b == nil){
163 		seterr(EOk, "can't write index configuration: out of memory");
164 		return -1;
165 	}
166 	fmtzbinit(&f, b);
167 	if(outputindex(&f, ix) < 0){
168 		seterr(EOk, "can't make index configuration: table storage too small %d", ix->tabsize);
169 		freezblock(b);
170 		return -1;
171 	}
172 	for(i = 0; i < ix->nsects; i++){
173 		if(writepart(ix->sects[i]->part, ix->sects[i]->tabbase, b->data, ix->tabsize) < 0
174 		|| flushpart(ix->sects[i]->part) < 0){
175 			seterr(EOk, "can't write index: %r");
176 			freezblock(b);
177 			return -1;
178 		}
179 	}
180 	freezblock(b);
181 
182 	for(i = 0; i < ix->nsects; i++)
183 		if(wbisect(ix->sects[i]) < 0)
184 			return -1;
185 
186 	return 0;
187 }
188 
189 /*
190  * index: IndexMagic '\n' version '\n' name '\n' blocksize '\n' [V2: bitblocks '\n'] sections arenas
191  * version, blocksize: u32int
192  * name: max. ANameSize string
193  * sections, arenas: AMap
194  */
195 int
outputindex(Fmt * f,Index * ix)196 outputindex(Fmt *f, Index *ix)
197 {
198 	if(fmtprint(f, "%s\n%ud\n%s\n%ud\n", IndexMagic, ix->version, ix->name, ix->blocksize) < 0
199 	|| outputamap(f, ix->smap, ix->nsects) < 0
200 	|| outputamap(f, ix->amap, ix->narenas) < 0)
201 		return -1;
202 	return 0;
203 }
204 
205 int
parseindex(IFile * f,Index * ix)206 parseindex(IFile *f, Index *ix)
207 {
208 	AMapN amn;
209 	u32int v;
210 	char *s;
211 
212 	/*
213 	 * magic
214 	 */
215 	s = ifileline(f);
216 	if(s == nil || strcmp(s, IndexMagic) != 0){
217 		seterr(ECorrupt, "bad index magic for %s", f->name);
218 		return -1;
219 	}
220 
221 	/*
222 	 * version
223 	 */
224 	if(ifileu32int(f, &v) < 0){
225 		seterr(ECorrupt, "syntax error: bad version number in %s", f->name);
226 		return -1;
227 	}
228 	ix->version = v;
229 	if(ix->version != IndexVersion){
230 		seterr(ECorrupt, "bad version number in %s", f->name);
231 		return -1;
232 	}
233 
234 	/*
235 	 * name
236 	 */
237 	if(ifilename(f, ix->name) < 0){
238 		seterr(ECorrupt, "syntax error: bad index name in %s", f->name);
239 		return -1;
240 	}
241 
242 	/*
243 	 * block size
244 	 */
245 	if(ifileu32int(f, &v) < 0){
246 		seterr(ECorrupt, "syntax error: bad block size number in %s", f->name);
247 		return -1;
248 	}
249 	ix->blocksize = v;
250 
251 	if(parseamap(f, &amn) < 0)
252 		return -1;
253 	ix->nsects = amn.n;
254 	ix->smap = amn.map;
255 
256 	if(parseamap(f, &amn) < 0)
257 		return -1;
258 	ix->narenas = amn.n;
259 	ix->amap = amn.map;
260 
261 	return 0;
262 }
263 
264 /*
265  * initialize an entirely new index
266  */
267 Index *
newindex(char * name,ISect ** sects,int n)268 newindex(char *name, ISect **sects, int n)
269 {
270 	Index *ix;
271 	AMap *smap;
272 	u64int nb;
273 	u32int div, ub, xb, start, stop, blocksize, tabsize;
274 	int i, j;
275 
276 	if(n < 1){
277 		seterr(EOk, "creating index with no index sections");
278 		return nil;
279 	}
280 
281 	/*
282 	 * compute the total buckets available in the index,
283 	 * and the total buckets which are used.
284 	 */
285 	nb = 0;
286 	blocksize = sects[0]->blocksize;
287 	tabsize = sects[0]->tabsize;
288 	for(i = 0; i < n; i++){
289 		/*
290 		 * allow index, start, and stop to be set if index is correct
291 		 * and start and stop are what we would have picked.
292 		 * this allows calling fmtindex to reformat the index after
293 		 * replacing a bad index section with a freshly formatted one.
294 		 * start and stop are checked below.
295 		 */
296 		if(sects[i]->index[0] != '\0' && strcmp(sects[i]->index, name) != 0){
297 			seterr(EOk, "creating new index using non-empty section %s", sects[i]->name);
298 			return nil;
299 		}
300 		if(blocksize != sects[i]->blocksize){
301 			seterr(EOk, "%s has block size %d, but %s has %d",
302 				sects[0]->part->name, (int)blocksize,
303 				sects[i]->part->name, (int)sects[i]->blocksize);
304 			return nil;
305 		}
306 		if(tabsize != sects[i]->tabsize){
307 			seterr(EOk, "%s has table size %d, but %s has %d",
308 				sects[0]->part->name, (int)tabsize,
309 				sects[i]->part->name, (int)sects[i]->tabsize);
310 			return nil;
311 		}
312 		nb += sects[i]->blocks;
313 	}
314 
315 	/*
316 	 * check for duplicate names
317 	 */
318 	for(i = 0; i < n; i++){
319 		for(j = i + 1; j < n; j++){
320 			if(namecmp(sects[i]->name, sects[j]->name) == 0){
321 				seterr(EOk, "%s and %s both have section name %s",
322 					sects[i]->part->name,
323 					sects[j]->part->name,
324 					sects[i]->name);
325 				return nil;
326 			}
327 		}
328 	}
329 
330 	if(nb >= ((u64int)1 << 32)){
331 		fprint(2, "%s: index is 2^32 blocks or more; ignoring some of it\n",
332 			argv0);
333 		nb = ((u64int)1 << 32) - 1;
334 	}
335 
336 	div = (((u64int)1 << 32) + nb - 1) / nb;
337 	if(div < 100){
338 		fprint(2, "%s: index divisor %d too coarse; "
339 			"index larger than needed, ignoring some of it\n",
340 			argv0, div);
341 		div = 100;
342 		nb = (((u64int)1 << 32) - 1) / (100 - 1);
343 	}
344 	ub = (((u64int)1 << 32) - 1) / div + 1;
345 	if(ub > nb){
346 		seterr(EBug, "index initialization math wrong");
347 		return nil;
348 	}
349 	xb = nb - ub;
350 
351 	/*
352 	 * initialize each of the index sections
353 	 * and the section map table
354 	 */
355 	smap = MKNZ(AMap, n);
356 	if(smap == nil){
357 		seterr(EOk, "can't create new index: out of memory");
358 		return nil;
359 	}
360 	start = 0;
361 	for(i = 0; i < n; i++){
362 		stop = start + sects[i]->blocks - xb / n;
363 		if(i == n - 1)
364 			stop = ub;
365 
366 		if(sects[i]->start != 0 || sects[i]->stop != 0)
367 		if(sects[i]->start != start || sects[i]->stop != stop){
368 			seterr(EOk, "creating new index using non-empty section %s", sects[i]->name);
369 			return nil;
370 		}
371 
372 		sects[i]->start = start;
373 		sects[i]->stop = stop;
374 		namecp(sects[i]->index, name);
375 
376 		smap[i].start = start;
377 		smap[i].stop = stop;
378 		namecp(smap[i].name, sects[i]->name);
379 		start = stop;
380 	}
381 
382 	/*
383 	 * initialize the index itself
384 	 */
385 	ix = MKZ(Index);
386 	if(ix == nil){
387 		seterr(EOk, "can't create new index: out of memory");
388 		free(smap);
389 		return nil;
390 	}
391 	ix->version = IndexVersion;
392 	namecp(ix->name, name);
393 	ix->sects = sects;
394 	ix->smap = smap;
395 	ix->nsects = n;
396 	ix->blocksize = blocksize;
397 	ix->buckets = ub;
398 	ix->tabsize = tabsize;
399 	ix->div = div;
400 
401 	if(initindex1(ix) < 0){
402 		free(smap);
403 		return nil;
404 	}
405 
406 	return ix;
407 }
408 
409 ISect*
initisect(Part * part)410 initisect(Part *part)
411 {
412 	ISect *is;
413 	ZBlock *b;
414 	int ok;
415 
416 	b = alloczblock(HeadSize, 0, 0);
417 	if(b == nil || readpart(part, PartBlank, b->data, HeadSize) < 0){
418 		seterr(EAdmin, "can't read index section header: %r");
419 		return nil;
420 	}
421 
422 	is = MKZ(ISect);
423 	if(is == nil){
424 		freezblock(b);
425 		return nil;
426 	}
427 	is->part = part;
428 	ok = unpackisect(is, b->data);
429 	freezblock(b);
430 	if(ok < 0){
431 		seterr(ECorrupt, "corrupted index section header: %r");
432 		freeisect(is);
433 		return nil;
434 	}
435 
436 	if(is->version != ISectVersion1 && is->version != ISectVersion2){
437 		seterr(EAdmin, "unknown index section version %d", is->version);
438 		freeisect(is);
439 		return nil;
440 	}
441 
442 	return initisect1(is);
443 }
444 
445 ISect*
newisect(Part * part,u32int vers,char * name,u32int blocksize,u32int tabsize)446 newisect(Part *part, u32int vers, char *name, u32int blocksize, u32int tabsize)
447 {
448 	ISect *is;
449 	u32int tabbase;
450 
451 	is = MKZ(ISect);
452 	if(is == nil)
453 		return nil;
454 
455 	namecp(is->name, name);
456 	is->version = vers;
457 	is->part = part;
458 	is->blocksize = blocksize;
459 	is->start = 0;
460 	is->stop = 0;
461 	tabbase = (PartBlank + HeadSize + blocksize - 1) & ~(blocksize - 1);
462 	is->blockbase = (tabbase + tabsize + blocksize - 1) & ~(blocksize - 1);
463 	is->blocks = is->part->size / blocksize - is->blockbase / blocksize;
464 	is->bucketmagic = 0;
465 	if(is->version == ISectVersion2){
466 		do{
467 			is->bucketmagic = fastrand();
468 		}while(is->bucketmagic==0);
469 	}
470 	is = initisect1(is);
471 	if(is == nil)
472 		return nil;
473 
474 	return is;
475 }
476 
477 /*
478  * initialize the computed parameters for an index
479  */
480 static ISect*
initisect1(ISect * is)481 initisect1(ISect *is)
482 {
483 	u64int v;
484 
485 	is->buckmax = (is->blocksize - IBucketSize) / IEntrySize;
486 	is->blocklog = u64log2(is->blocksize);
487 	if(is->blocksize != (1 << is->blocklog)){
488 		seterr(ECorrupt, "illegal non-power-of-2 bucket size %d\n", is->blocksize);
489 		freeisect(is);
490 		return nil;
491 	}
492 	partblocksize(is->part, is->blocksize);
493 	is->tabbase = (PartBlank + HeadSize + is->blocksize - 1) & ~(is->blocksize - 1);
494 	if(is->tabbase >= is->blockbase){
495 		seterr(ECorrupt, "index section config table overlaps bucket storage");
496 		freeisect(is);
497 		return nil;
498 	}
499 	is->tabsize = is->blockbase - is->tabbase;
500 	v = is->part->size & ~(u64int)(is->blocksize - 1);
501 	if(is->blockbase + (u64int)is->blocks * is->blocksize != v){
502 		seterr(ECorrupt, "invalid blocks in index section %s", is->name);
503 		/* ZZZ what to do?
504 		freeisect(is);
505 		return nil;
506 		*/
507 	}
508 
509 	if(is->stop - is->start > is->blocks){
510 		seterr(ECorrupt, "index section overflows available space");
511 		freeisect(is);
512 		return nil;
513 	}
514 	if(is->start > is->stop){
515 		seterr(ECorrupt, "invalid index section range");
516 		freeisect(is);
517 		return nil;
518 	}
519 
520 	return is;
521 }
522 
523 int
wbisect(ISect * is)524 wbisect(ISect *is)
525 {
526 	ZBlock *b;
527 
528 	b = alloczblock(HeadSize, 1, 0);
529 	if(b == nil){
530 		/* ZZZ set error? */
531 		return -1;
532 	}
533 
534 	if(packisect(is, b->data) < 0){
535 		seterr(ECorrupt, "can't make index section header: %r");
536 		freezblock(b);
537 		return -1;
538 	}
539 	if(writepart(is->part, PartBlank, b->data, HeadSize) < 0 || flushpart(is->part) < 0){
540 		seterr(EAdmin, "can't write index section header: %r");
541 		freezblock(b);
542 		return -1;
543 	}
544 	freezblock(b);
545 
546 	return 0;
547 }
548 
549 void
freeisect(ISect * is)550 freeisect(ISect *is)
551 {
552 	if(is == nil)
553 		return;
554 	free(is);
555 }
556 
557 void
freeindex(Index * ix)558 freeindex(Index *ix)
559 {
560 	int i;
561 
562 	if(ix == nil)
563 		return;
564 	free(ix->amap);
565 	free(ix->arenas);
566 	if(ix->sects)
567 		for(i = 0; i < ix->nsects; i++)
568 			freeisect(ix->sects[i]);
569 	free(ix->sects);
570 	free(ix->smap);
571 	free(ix);
572 }
573 
574 /*
575  * write a clump to an available arena in the index
576  * and return the address of the clump within the index.
577 ZZZ question: should this distinguish between an arena
578 filling up and real errors writing the clump?
579  */
580 u64int
writeiclump(Index * ix,Clump * c,u8int * clbuf)581 writeiclump(Index *ix, Clump *c, u8int *clbuf)
582 {
583 	u64int a;
584 	int i;
585 	IAddr ia;
586 	AState as;
587 
588 	trace(TraceLump, "writeiclump enter");
589 	qlock(&ix->writing);
590 	for(i = ix->mapalloc; i < ix->narenas; i++){
591 		a = writeaclump(ix->arenas[i], c, clbuf);
592 		if(a != TWID64){
593 			ix->mapalloc = i;
594 			ia.addr = ix->amap[i].start + a;
595 			ia.type = c->info.type;
596 			ia.size = c->info.uncsize;
597 			ia.blocks = (c->info.size + ClumpSize + (1<<ABlockLog) - 1) >> ABlockLog;
598 			as.arena = ix->arenas[i];
599 			as.aa = ia.addr;
600 			as.stats = as.arena->memstats;
601 			insertscore(c->info.score, &ia, IEDirty, &as);
602 			qunlock(&ix->writing);
603 			trace(TraceLump, "writeiclump exit");
604 			return ia.addr;
605 		}
606 	}
607 	qunlock(&ix->writing);
608 
609 	seterr(EAdmin, "no space left in arenas");
610 	trace(TraceLump, "writeiclump failed");
611 	return TWID64;
612 }
613 
614 /*
615  * convert an arena index to an relative arena address
616  */
617 Arena*
amapitoa(Index * ix,u64int a,u64int * aa)618 amapitoa(Index *ix, u64int a, u64int *aa)
619 {
620 	int i, r, l, m;
621 
622 	l = 1;
623 	r = ix->narenas - 1;
624 	while(l <= r){
625 		m = (r + l) / 2;
626 		if(ix->amap[m].start <= a)
627 			l = m + 1;
628 		else
629 			r = m - 1;
630 	}
631 	l--;
632 
633 	if(a > ix->amap[l].stop){
634 for(i=0; i<ix->narenas; i++)
635 	print("arena %d: %llux - %llux\n", i, ix->amap[i].start, ix->amap[i].stop);
636 print("want arena %d for %llux\n", l, a);
637 		seterr(ECrash, "unmapped address passed to amapitoa");
638 		return nil;
639 	}
640 
641 	if(ix->arenas[l] == nil){
642 		seterr(ECrash, "unmapped arena selected in amapitoa");
643 		return nil;
644 	}
645 	*aa = a - ix->amap[l].start;
646 	return ix->arenas[l];
647 }
648 
649 /*
650  * convert an arena index to the bounds of the containing arena group.
651  */
652 Arena*
amapitoag(Index * ix,u64int a,u64int * gstart,u64int * glimit,int * g)653 amapitoag(Index *ix, u64int a, u64int *gstart, u64int *glimit, int *g)
654 {
655 	u64int aa;
656 	Arena *arena;
657 
658 	arena = amapitoa(ix, a, &aa);
659 	if(arena == nil)
660 		return nil;
661 	if(arenatog(arena, aa, gstart, glimit, g) < 0)
662 		return nil;
663 	*gstart += a - aa;
664 	*glimit += a - aa;
665 	return arena;
666 }
667 
668 int
iaddrcmp(IAddr * ia1,IAddr * ia2)669 iaddrcmp(IAddr *ia1, IAddr *ia2)
670 {
671 	return ia1->type != ia2->type
672 		|| ia1->size != ia2->size
673 		|| ia1->blocks != ia2->blocks
674 		|| ia1->addr != ia2->addr;
675 }
676 
677 /*
678  * lookup the score in the partition
679  *
680  * nothing needs to be explicitly locked:
681  * only static parts of ix are used, and
682  * the bucket is locked by the DBlock lock.
683  */
684 int
loadientry(Index * ix,u8int * score,int type,IEntry * ie)685 loadientry(Index *ix, u8int *score, int type, IEntry *ie)
686 {
687 	ISect *is;
688 	DBlock *b;
689 	IBucket ib;
690 	u32int buck;
691 	int h, ok;
692 
693 	ok = -1;
694 
695 	trace(TraceLump, "loadientry enter");
696 
697 	/*
698 	qlock(&stats.lock);
699 	stats.indexreads++;
700 	qunlock(&stats.lock);
701 	*/
702 
703 	if(!inbloomfilter(mainindex->bloom, score)){
704 		trace(TraceLump, "loadientry bloomhit");
705 		return -1;
706 	}
707 
708 	trace(TraceLump, "loadientry loadibucket");
709 	b = loadibucket(ix, score, &is, &buck, &ib);
710 	trace(TraceLump, "loadientry loadedibucket");
711 	if(b == nil)
712 		return -1;
713 
714 	if(okibucket(&ib, is) < 0){
715 		trace(TraceLump, "loadientry badbucket");
716 		goto out;
717 	}
718 
719 	h = bucklook(score, type, ib.data, ib.n);
720 	if(h & 1){
721 		h ^= 1;
722 		trace(TraceLump, "loadientry found");
723 		unpackientry(ie, &ib.data[h]);
724 		ok = 0;
725 		goto out;
726 	}
727 	trace(TraceLump, "loadientry notfound");
728 	addstat(StatBloomFalseMiss, 1);
729 out:
730 	putdblock(b);
731 	trace(TraceLump, "loadientry exit");
732 	return ok;
733 }
734 
735 int
okibucket(IBucket * ib,ISect * is)736 okibucket(IBucket *ib, ISect *is)
737 {
738 	if(ib->n <= is->buckmax)
739 		return 0;
740 
741 	seterr(EICorrupt, "corrupted disk index bucket: n=%ud max=%ud, range=[%lud,%lud)",
742 		ib->n, is->buckmax, is->start, is->stop);
743 	return -1;
744 }
745 
746 /*
747  * look for score within data;
748  * return 1 | byte index of matching index,
749  * or 0 | index of least element > score
750  */
751 int
bucklook(u8int * score,int otype,u8int * data,int n)752 bucklook(u8int *score, int otype, u8int *data, int n)
753 {
754 	int i, r, l, m, h, c, cc, type;
755 
756 	if(otype == -1)
757 		type = -1;
758 	else
759 		type = vttodisktype(otype);
760 	l = 0;
761 	r = n - 1;
762 	while(l <= r){
763 		m = (r + l) >> 1;
764 		h = m * IEntrySize;
765 		for(i = 0; i < VtScoreSize; i++){
766 			c = score[i];
767 			cc = data[h + i];
768 			if(c != cc){
769 				if(c > cc)
770 					l = m + 1;
771 				else
772 					r = m - 1;
773 				goto cont;
774 			}
775 		}
776 		cc = data[h + IEntryTypeOff];
777 		if(type != cc && type != -1){
778 			if(type > cc)
779 				l = m + 1;
780 			else
781 				r = m - 1;
782 			goto cont;
783 		}
784 		return h | 1;
785 	cont:;
786 	}
787 
788 	return l * IEntrySize;
789 }
790 
791 /*
792  * compare two IEntries; consistent with bucklook
793  */
794 int
ientrycmp(const void * vie1,const void * vie2)795 ientrycmp(const void *vie1, const void *vie2)
796 {
797 	u8int *ie1, *ie2;
798 	int i, v1, v2;
799 
800 	ie1 = (u8int*)vie1;
801 	ie2 = (u8int*)vie2;
802 	for(i = 0; i < VtScoreSize; i++){
803 		v1 = ie1[i];
804 		v2 = ie2[i];
805 		if(v1 != v2){
806 			if(v1 < v2)
807 				return -1;
808 			return 1;
809 		}
810 	}
811 	v1 = ie1[IEntryTypeOff];
812 	v2 = ie2[IEntryTypeOff];
813 	if(v1 != v2){
814 		if(v1 < v2)
815 			return -1;
816 		return 1;
817 	}
818 	return 0;
819 }
820 
821 /*
822  * find the number of the index section holding bucket #buck
823  */
824 int
indexsect0(Index * ix,u32int buck)825 indexsect0(Index *ix, u32int buck)
826 {
827 	int r, l, m;
828 
829 	l = 1;
830 	r = ix->nsects - 1;
831 	while(l <= r){
832 		m = (r + l) >> 1;
833 		if(ix->sects[m]->start <= buck)
834 			l = m + 1;
835 		else
836 			r = m - 1;
837 	}
838 	return l - 1;
839 }
840 
841 /*
842  * load the index block at bucket #buck
843  */
844 static DBlock*
loadibucket0(Index * ix,u32int buck,ISect ** pis,u32int * pbuck,IBucket * ib,int mode)845 loadibucket0(Index *ix, u32int buck, ISect **pis, u32int *pbuck, IBucket *ib, int mode)
846 {
847 	ISect *is;
848 	DBlock *b;
849 
850 	is = ix->sects[indexsect0(ix, buck)];
851 	if(buck < is->start || is->stop <= buck){
852 		seterr(EAdmin, "index lookup out of range: %ud not found in index\n", buck);
853 		return nil;
854 	}
855 
856 	buck -= is->start;
857 	if((b = getdblock(is->part, is->blockbase + ((u64int)buck << is->blocklog), mode)) == nil)
858 		return nil;
859 
860 	if(pis)
861 		*pis = is;
862 	if(pbuck)
863 		*pbuck = buck;
864 	if(ib)
865 		unpackibucket(ib, b->data, is->bucketmagic);
866 	return b;
867 }
868 
869 /*
870  * find the number of the index section holding score
871  */
872 int
indexsect1(Index * ix,u8int * score)873 indexsect1(Index *ix, u8int *score)
874 {
875 	return indexsect0(ix, hashbits(score, 32) / ix->div);
876 }
877 
878 /*
879  * load the index block responsible for score.
880  */
881 static DBlock*
loadibucket1(Index * ix,u8int * score,ISect ** pis,u32int * pbuck,IBucket * ib)882 loadibucket1(Index *ix, u8int *score, ISect **pis, u32int *pbuck, IBucket *ib)
883 {
884 	return loadibucket0(ix, hashbits(score, 32)/ix->div, pis, pbuck, ib, OREAD);
885 }
886 
887 int
indexsect(Index * ix,u8int * score)888 indexsect(Index *ix, u8int *score)
889 {
890 	return indexsect1(ix, score);
891 }
892 
893 DBlock*
loadibucket(Index * ix,u8int * score,ISect ** pis,u32int * pbuck,IBucket * ib)894 loadibucket(Index *ix, u8int *score, ISect **pis, u32int *pbuck, IBucket *ib)
895 {
896 	return loadibucket1(ix, score, pis, pbuck, ib);
897 }
898