1 #include "stdinc.h"
2 #include "dat.h"
3 #include "fns.h"
4
5 typedef struct ASum ASum;
6
7 struct ASum
8 {
9 Arena *arena;
10 ASum *next;
11 };
12
13 static void sealarena(Arena *arena);
14 static int okarena(Arena *arena);
15 static int loadarena(Arena *arena);
16 static CIBlock *getcib(Arena *arena, int clump, int writing, CIBlock *rock);
17 static void putcib(Arena *arena, CIBlock *cib);
18 static void sumproc(void *);
19 static void loadcig(Arena *arena);
20
21 static QLock sumlock;
22 static Rendez sumwait;
23 static ASum *sumq;
24 static ASum *sumqtail;
25 static uchar zero[8192];
26
27 int arenasumsleeptime;
28
29 int
initarenasum(void)30 initarenasum(void)
31 {
32 needzeroscore(); /* OS X */
33
34 qlock(&sumlock);
35 sumwait.l = &sumlock;
36 qunlock(&sumlock);
37
38 if(vtproc(sumproc, nil) < 0){
39 seterr(EOk, "can't start arena checksum slave: %r");
40 return -1;
41 }
42 return 0;
43 }
44
45 /*
46 * make an Arena, and initialize it based upon the disk header and trailer.
47 */
48 Arena*
initarena(Part * part,u64int base,u64int size,u32int blocksize)49 initarena(Part *part, u64int base, u64int size, u32int blocksize)
50 {
51 Arena *arena;
52
53 arena = MKZ(Arena);
54 arena->part = part;
55 arena->blocksize = blocksize;
56 arena->clumpmax = arena->blocksize / ClumpInfoSize;
57 arena->base = base + blocksize;
58 arena->size = size - 2 * blocksize;
59
60 if(loadarena(arena) < 0){
61 seterr(ECorrupt, "arena header or trailer corrupted");
62 freearena(arena);
63 return nil;
64 }
65 if(okarena(arena) < 0){
66 freearena(arena);
67 return nil;
68 }
69
70 if(arena->diskstats.sealed && scorecmp(zeroscore, arena->score)==0)
71 sealarena(arena);
72
73 return arena;
74 }
75
76 void
freearena(Arena * arena)77 freearena(Arena *arena)
78 {
79 if(arena == nil)
80 return;
81 free(arena);
82 }
83
84 Arena*
newarena(Part * part,u32int vers,char * name,u64int base,u64int size,u32int blocksize)85 newarena(Part *part, u32int vers, char *name, u64int base, u64int size, u32int blocksize)
86 {
87 int bsize;
88 Arena *arena;
89
90 if(nameok(name) < 0){
91 seterr(EOk, "illegal arena name", name);
92 return nil;
93 }
94 arena = MKZ(Arena);
95 arena->part = part;
96 arena->version = vers;
97 if(vers == ArenaVersion4)
98 arena->clumpmagic = _ClumpMagic;
99 else{
100 do
101 arena->clumpmagic = fastrand();
102 while(arena->clumpmagic==_ClumpMagic || arena->clumpmagic==0);
103 }
104 arena->blocksize = blocksize;
105 arena->clumpmax = arena->blocksize / ClumpInfoSize;
106 arena->base = base + blocksize;
107 arena->size = size - 2 * blocksize;
108
109 namecp(arena->name, name);
110
111 bsize = sizeof zero;
112 if(bsize > arena->blocksize)
113 bsize = arena->blocksize;
114
115 if(wbarena(arena)<0 || wbarenahead(arena)<0
116 || writepart(arena->part, arena->base, zero, bsize)<0){
117 freearena(arena);
118 return nil;
119 }
120
121 return arena;
122 }
123
124 int
readclumpinfo(Arena * arena,int clump,ClumpInfo * ci)125 readclumpinfo(Arena *arena, int clump, ClumpInfo *ci)
126 {
127 CIBlock *cib, r;
128
129 cib = getcib(arena, clump, 0, &r);
130 if(cib == nil)
131 return -1;
132 unpackclumpinfo(ci, &cib->data->data[cib->offset]);
133 putcib(arena, cib);
134 return 0;
135 }
136
137 int
readclumpinfos(Arena * arena,int clump,ClumpInfo * cis,int n)138 readclumpinfos(Arena *arena, int clump, ClumpInfo *cis, int n)
139 {
140 CIBlock *cib, r;
141 int i;
142
143 /*
144 * because the clump blocks are laid out
145 * in reverse order at the end of the arena,
146 * it can be a few percent faster to read
147 * the clumps backwards, which reads the
148 * disk blocks forwards.
149 */
150 for(i = n-1; i >= 0; i--){
151 cib = getcib(arena, clump + i, 0, &r);
152 if(cib == nil){
153 n = i;
154 continue;
155 }
156 unpackclumpinfo(&cis[i], &cib->data->data[cib->offset]);
157 putcib(arena, cib);
158 }
159 return n;
160 }
161
162 /*
163 * write directory information for one clump
164 * must be called the arena locked
165 */
166 int
writeclumpinfo(Arena * arena,int clump,ClumpInfo * ci)167 writeclumpinfo(Arena *arena, int clump, ClumpInfo *ci)
168 {
169 CIBlock *cib, r;
170
171 cib = getcib(arena, clump, 1, &r);
172 if(cib == nil)
173 return -1;
174 dirtydblock(cib->data, DirtyArenaCib);
175 packclumpinfo(ci, &cib->data->data[cib->offset]);
176 putcib(arena, cib);
177 return 0;
178 }
179
180 u64int
arenadirsize(Arena * arena,u32int clumps)181 arenadirsize(Arena *arena, u32int clumps)
182 {
183 return ((clumps / arena->clumpmax) + 1) * arena->blocksize;
184 }
185
186 /*
187 * read a clump of data
188 * n is a hint of the size of the data, not including the header
189 * make sure it won't run off the end, then return the number of bytes actually read
190 */
191 u32int
readarena(Arena * arena,u64int aa,u8int * buf,long n)192 readarena(Arena *arena, u64int aa, u8int *buf, long n)
193 {
194 DBlock *b;
195 u64int a;
196 u32int blocksize, off, m;
197 long nn;
198
199 if(n == 0)
200 return -1;
201
202 qlock(&arena->lock);
203 a = arena->size - arenadirsize(arena, arena->memstats.clumps);
204 qunlock(&arena->lock);
205 if(aa >= a){
206 seterr(EOk, "reading beyond arena clump storage: clumps=%d aa=%lld a=%lld -1 clumps=%lld\n", arena->memstats.clumps, aa, a, arena->size - arenadirsize(arena, arena->memstats.clumps - 1));
207 return -1;
208 }
209 if(aa + n > a)
210 n = a - aa;
211
212 blocksize = arena->blocksize;
213 a = arena->base + aa;
214 off = a & (blocksize - 1);
215 a -= off;
216 nn = 0;
217 for(;;){
218 b = getdblock(arena->part, a, OREAD);
219 if(b == nil)
220 return -1;
221 m = blocksize - off;
222 if(m > n - nn)
223 m = n - nn;
224 memmove(&buf[nn], &b->data[off], m);
225 putdblock(b);
226 nn += m;
227 if(nn == n)
228 break;
229 off = 0;
230 a += blocksize;
231 }
232 return n;
233 }
234
235 /*
236 * write some data to the clump section at a given offset
237 * used to fix up corrupted arenas.
238 */
239 u32int
writearena(Arena * arena,u64int aa,u8int * clbuf,u32int n)240 writearena(Arena *arena, u64int aa, u8int *clbuf, u32int n)
241 {
242 DBlock *b;
243 u64int a;
244 u32int blocksize, off, m;
245 long nn;
246 int ok;
247
248 if(n == 0)
249 return -1;
250
251 qlock(&arena->lock);
252 a = arena->size - arenadirsize(arena, arena->memstats.clumps);
253 if(aa >= a || aa + n > a){
254 qunlock(&arena->lock);
255 seterr(EOk, "writing beyond arena clump storage");
256 return -1;
257 }
258
259 blocksize = arena->blocksize;
260 a = arena->base + aa;
261 off = a & (blocksize - 1);
262 a -= off;
263 nn = 0;
264 for(;;){
265 b = getdblock(arena->part, a, off != 0 || off + n < blocksize ? ORDWR : OWRITE);
266 if(b == nil){
267 qunlock(&arena->lock);
268 return -1;
269 }
270 dirtydblock(b, DirtyArena);
271 m = blocksize - off;
272 if(m > n - nn)
273 m = n - nn;
274 memmove(&b->data[off], &clbuf[nn], m);
275 ok = 0;
276 putdblock(b);
277 if(ok < 0){
278 qunlock(&arena->lock);
279 return -1;
280 }
281 nn += m;
282 if(nn == n)
283 break;
284 off = 0;
285 a += blocksize;
286 }
287 qunlock(&arena->lock);
288 return n;
289 }
290
291 /*
292 * allocate space for the clump and write it,
293 * updating the arena directory
294 ZZZ question: should this distinguish between an arena
295 filling up and real errors writing the clump?
296 */
297 u64int
writeaclump(Arena * arena,Clump * c,u8int * clbuf)298 writeaclump(Arena *arena, Clump *c, u8int *clbuf)
299 {
300 DBlock *b;
301 u64int a, aa;
302 u32int clump, n, nn, m, off, blocksize;
303 int ok;
304
305 n = c->info.size + ClumpSize + U32Size;
306 qlock(&arena->lock);
307 aa = arena->memstats.used;
308 if(arena->memstats.sealed
309 || aa + n + U32Size + arenadirsize(arena, arena->memstats.clumps + 1) > arena->size){
310 if(!arena->memstats.sealed){
311 logerr(EOk, "seal memstats %s", arena->name);
312 arena->memstats.sealed = 1;
313 wbarena(arena);
314 }
315 qunlock(&arena->lock);
316 return TWID64;
317 }
318 if(packclump(c, &clbuf[0], arena->clumpmagic) < 0){
319 qunlock(&arena->lock);
320 return TWID64;
321 }
322
323 /*
324 * write the data out one block at a time
325 */
326 blocksize = arena->blocksize;
327 a = arena->base + aa;
328 off = a & (blocksize - 1);
329 a -= off;
330 nn = 0;
331 for(;;){
332 b = getdblock(arena->part, a, off != 0 ? ORDWR : OWRITE);
333 if(b == nil){
334 qunlock(&arena->lock);
335 return TWID64;
336 }
337 dirtydblock(b, DirtyArena);
338 m = blocksize - off;
339 if(m > n - nn)
340 m = n - nn;
341 memmove(&b->data[off], &clbuf[nn], m);
342 ok = 0;
343 putdblock(b);
344 if(ok < 0){
345 qunlock(&arena->lock);
346 return TWID64;
347 }
348 nn += m;
349 if(nn == n)
350 break;
351 off = 0;
352 a += blocksize;
353 }
354
355 arena->memstats.used += c->info.size + ClumpSize;
356 arena->memstats.uncsize += c->info.uncsize;
357 if(c->info.size < c->info.uncsize)
358 arena->memstats.cclumps++;
359
360 clump = arena->memstats.clumps;
361 if(clump % ArenaCIGSize == 0){
362 if(arena->cig == nil){
363 loadcig(arena);
364 if(arena->cig == nil)
365 goto NoCIG;
366 }
367 /* add aa as start of next cig */
368 if(clump/ArenaCIGSize != arena->ncig){
369 fprint(2, "bad arena cig computation %s: writing clump %d but %d cigs\n",
370 arena->name, clump, arena->ncig);
371 arena->ncig = -1;
372 vtfree(arena->cig);
373 arena->cig = nil;
374 goto NoCIG;
375 }
376 arena->cig = vtrealloc(arena->cig, (arena->ncig+1)*sizeof arena->cig[0]);
377 arena->cig[arena->ncig++].offset = aa;
378 }
379 NoCIG:
380 arena->memstats.clumps++;
381
382 if(arena->memstats.clumps == 0)
383 sysfatal("clumps wrapped");
384 arena->wtime = now();
385 if(arena->ctime == 0)
386 arena->ctime = arena->wtime;
387
388 writeclumpinfo(arena, clump, &c->info);
389 wbarena(arena);
390
391 qunlock(&arena->lock);
392
393 return aa;
394 }
395
396 int
atailcmp(ATailStats * a,ATailStats * b)397 atailcmp(ATailStats *a, ATailStats *b)
398 {
399 /* good test */
400 if(a->used < b->used)
401 return -1;
402 if(a->used > b->used)
403 return 1;
404
405 /* suspect tests - why order this way? (no one cares) */
406 if(a->clumps < b->clumps)
407 return -1;
408 if(a->clumps > b->clumps)
409 return 1;
410 if(a->cclumps < b->cclumps)
411 return -1;
412 if(a->cclumps > b->cclumps)
413 return 1;
414 if(a->uncsize < b->uncsize)
415 return -1;
416 if(a->uncsize > b->uncsize)
417 return 1;
418 if(a->sealed < b->sealed)
419 return -1;
420 if(a->sealed > b->sealed)
421 return 1;
422
423 /* everything matches */
424 return 0;
425 }
426
427 void
setatailstate(AState * as)428 setatailstate(AState *as)
429 {
430 int i, j, osealed;
431 Arena *a;
432 Index *ix;
433
434 trace(0, "setatailstate %s 0x%llux clumps %d", as->arena->name, as->aa, as->stats.clumps);
435
436 /*
437 * Look up as->arena to find index.
438 */
439 needmainindex(); /* OS X linker */
440 ix = mainindex;
441 for(i=0; i<ix->narenas; i++)
442 if(ix->arenas[i] == as->arena)
443 break;
444 if(i==ix->narenas || as->aa < ix->amap[i].start || as->aa >= ix->amap[i].stop || as->arena != ix->arenas[i]){
445 fprint(2, "funny settailstate 0x%llux\n", as->aa);
446 return;
447 }
448
449 for(j=0; j<=i; j++){
450 a = ix->arenas[j];
451 if(atailcmp(&a->diskstats, &a->memstats) == 0)
452 continue;
453 qlock(&a->lock);
454 osealed = a->diskstats.sealed;
455 if(j == i)
456 a->diskstats = as->stats;
457 else
458 a->diskstats = a->memstats;
459 wbarena(a);
460 if(a->diskstats.sealed != osealed && !a->inqueue)
461 sealarena(a);
462 qunlock(&a->lock);
463 }
464 }
465
466 /*
467 * once sealed, an arena never has any data added to it.
468 * it should only be changed to fix errors.
469 * this also syncs the clump directory.
470 */
471 static void
sealarena(Arena * arena)472 sealarena(Arena *arena)
473 {
474 arena->inqueue = 1;
475 backsumarena(arena);
476 }
477
478 void
backsumarena(Arena * arena)479 backsumarena(Arena *arena)
480 {
481 ASum *as;
482
483 as = MK(ASum);
484 if(as == nil)
485 return;
486 qlock(&sumlock);
487 as->arena = arena;
488 as->next = nil;
489 if(sumq)
490 sumqtail->next = as;
491 else
492 sumq = as;
493 sumqtail = as;
494 /*
495 * Might get here while initializing arenas,
496 * before initarenasum has been called.
497 */
498 if(sumwait.l)
499 rwakeup(&sumwait);
500 qunlock(&sumlock);
501 }
502
503 static void
sumproc(void * unused)504 sumproc(void *unused)
505 {
506 ASum *as;
507 Arena *arena;
508
509 USED(unused);
510
511 for(;;){
512 qlock(&sumlock);
513 while(sumq == nil)
514 rsleep(&sumwait);
515 as = sumq;
516 sumq = as->next;
517 qunlock(&sumlock);
518 arena = as->arena;
519 free(as);
520 sumarena(arena);
521 }
522 }
523
524 void
sumarena(Arena * arena)525 sumarena(Arena *arena)
526 {
527 ZBlock *b;
528 DigestState s;
529 u64int a, e;
530 u32int bs;
531 int t;
532 u8int score[VtScoreSize];
533
534 bs = MaxIoSize;
535 if(bs < arena->blocksize)
536 bs = arena->blocksize;
537
538 /*
539 * read & sum all blocks except the last one
540 */
541 flushdcache();
542 memset(&s, 0, sizeof s);
543 b = alloczblock(bs, 0, arena->part->blocksize);
544 e = arena->base + arena->size;
545 for(a = arena->base - arena->blocksize; a + arena->blocksize <= e; a += bs){
546 disksched();
547 while((t=arenasumsleeptime) == SleepForever){
548 sleep(1000);
549 disksched();
550 }
551 sleep(t);
552 if(a + bs > e)
553 bs = arena->blocksize;
554 if(readpart(arena->part, a, b->data, bs) < 0)
555 goto ReadErr;
556 addstat(StatSumRead, 1);
557 addstat(StatSumReadBytes, bs);
558 sha1(b->data, bs, nil, &s);
559 }
560
561 /*
562 * the last one is special, since it may already have the checksum included
563 */
564 bs = arena->blocksize;
565 if(readpart(arena->part, e, b->data, bs) < 0){
566 ReadErr:
567 logerr(EOk, "sumarena can't sum %s, read at %lld failed: %r", arena->name, a);
568 freezblock(b);
569 return;
570 }
571 addstat(StatSumRead, 1);
572 addstat(StatSumReadBytes, bs);
573
574 sha1(b->data, bs-VtScoreSize, nil, &s);
575 sha1(zeroscore, VtScoreSize, nil, &s);
576 sha1(nil, 0, score, &s);
577
578 /*
579 * check for no checksum or the same
580 */
581 if(scorecmp(score, &b->data[bs - VtScoreSize]) != 0
582 && scorecmp(zeroscore, &b->data[bs - VtScoreSize]) != 0)
583 logerr(EOk, "overwriting mismatched checksums for arena=%s, found=%V calculated=%V",
584 arena->name, &b->data[bs - VtScoreSize], score);
585 freezblock(b);
586
587 qlock(&arena->lock);
588 scorecp(arena->score, score);
589 wbarena(arena);
590 qunlock(&arena->lock);
591 }
592
593 /*
594 * write the arena trailer block to the partition
595 */
596 int
wbarena(Arena * arena)597 wbarena(Arena *arena)
598 {
599 DBlock *b;
600 int bad;
601
602 if((b = getdblock(arena->part, arena->base + arena->size, OWRITE)) == nil){
603 logerr(EAdmin, "can't write arena trailer: %r");
604 return -1;
605 }
606 dirtydblock(b, DirtyArenaTrailer);
607 bad = okarena(arena)<0 || packarena(arena, b->data)<0;
608 scorecp(b->data + arena->blocksize - VtScoreSize, arena->score);
609 putdblock(b);
610 if(bad)
611 return -1;
612 return 0;
613 }
614
615 int
wbarenahead(Arena * arena)616 wbarenahead(Arena *arena)
617 {
618 ZBlock *b;
619 ArenaHead head;
620 int bad;
621
622 namecp(head.name, arena->name);
623 head.version = arena->version;
624 head.size = arena->size + 2 * arena->blocksize;
625 head.blocksize = arena->blocksize;
626 head.clumpmagic = arena->clumpmagic;
627 b = alloczblock(arena->blocksize, 1, arena->part->blocksize);
628 if(b == nil){
629 logerr(EAdmin, "can't write arena header: %r");
630 /* ZZZ add error message? */
631 return -1;
632 }
633 /*
634 * this writepart is okay because it only happens
635 * during initialization.
636 */
637 bad = packarenahead(&head, b->data)<0 ||
638 writepart(arena->part, arena->base - arena->blocksize, b->data, arena->blocksize)<0 ||
639 flushpart(arena->part)<0;
640 freezblock(b);
641 if(bad)
642 return -1;
643 return 0;
644 }
645
646 /*
647 * read the arena header and trailer blocks from disk
648 */
649 static int
loadarena(Arena * arena)650 loadarena(Arena *arena)
651 {
652 ArenaHead head;
653 ZBlock *b;
654
655 b = alloczblock(arena->blocksize, 0, arena->part->blocksize);
656 if(b == nil)
657 return -1;
658 if(readpart(arena->part, arena->base + arena->size, b->data, arena->blocksize) < 0){
659 freezblock(b);
660 return -1;
661 }
662 if(unpackarena(arena, b->data) < 0){
663 freezblock(b);
664 return -1;
665 }
666 if(arena->version != ArenaVersion4 && arena->version != ArenaVersion5){
667 seterr(EAdmin, "unknown arena version %d", arena->version);
668 freezblock(b);
669 return -1;
670 }
671 scorecp(arena->score, &b->data[arena->blocksize - VtScoreSize]);
672
673 if(readpart(arena->part, arena->base - arena->blocksize, b->data, arena->blocksize) < 0){
674 logerr(EAdmin, "can't read arena header: %r");
675 freezblock(b);
676 return 0;
677 }
678 if(unpackarenahead(&head, b->data) < 0)
679 logerr(ECorrupt, "corrupted arena header: %r");
680 else if(namecmp(arena->name, head.name)!=0
681 || arena->clumpmagic != head.clumpmagic
682 || arena->version != head.version
683 || arena->blocksize != head.blocksize
684 || arena->size + 2 * arena->blocksize != head.size){
685 if(namecmp(arena->name, head.name)!=0)
686 logerr(ECorrupt, "arena tail name %s head %s",
687 arena->name, head.name);
688 else if(arena->clumpmagic != head.clumpmagic)
689 logerr(ECorrupt, "arena tail clumpmagic 0x%lux head 0x%lux",
690 (ulong)arena->clumpmagic, (ulong)head.clumpmagic);
691 else if(arena->version != head.version)
692 logerr(ECorrupt, "arena tail version %d head version %d",
693 arena->version, head.version);
694 else if(arena->blocksize != head.blocksize)
695 logerr(ECorrupt, "arena tail block size %d head %d",
696 arena->blocksize, head.blocksize);
697 else if(arena->size+2*arena->blocksize != head.size)
698 logerr(ECorrupt, "arena tail size %lud head %lud",
699 (ulong)arena->size+2*arena->blocksize, head.size);
700 else
701 logerr(ECorrupt, "arena header inconsistent with arena data");
702 }
703 freezblock(b);
704
705 return 0;
706 }
707
708 static int
okarena(Arena * arena)709 okarena(Arena *arena)
710 {
711 u64int dsize;
712 int ok;
713
714 ok = 0;
715 dsize = arenadirsize(arena, arena->diskstats.clumps);
716 if(arena->diskstats.used + dsize > arena->size){
717 seterr(ECorrupt, "arena %s used > size", arena->name);
718 ok = -1;
719 }
720
721 if(arena->diskstats.cclumps > arena->diskstats.clumps)
722 logerr(ECorrupt, "arena %s has more compressed clumps than total clumps", arena->name);
723
724 /*
725 * This need not be true if some of the disk is corrupted.
726 *
727 if(arena->diskstats.uncsize + arena->diskstats.clumps * ClumpSize + arena->blocksize < arena->diskstats.used)
728 logerr(ECorrupt, "arena %s uncompressed size inconsistent with used space %lld %d %lld", arena->name, arena->diskstats.uncsize, arena->diskstats.clumps, arena->diskstats.used);
729 */
730
731 /*
732 * this happens; it's harmless.
733 *
734 if(arena->ctime > arena->wtime)
735 logerr(ECorrupt, "arena %s creation time after last write time", arena->name);
736 */
737 return ok;
738 }
739
740 static CIBlock*
getcib(Arena * arena,int clump,int writing,CIBlock * rock)741 getcib(Arena *arena, int clump, int writing, CIBlock *rock)
742 {
743 int mode;
744 CIBlock *cib;
745 u32int block, off;
746
747 if(clump >= arena->memstats.clumps){
748 seterr(EOk, "clump directory access out of range");
749 return nil;
750 }
751 block = clump / arena->clumpmax;
752 off = (clump - block * arena->clumpmax) * ClumpInfoSize;
753 cib = rock;
754 cib->block = block;
755 cib->offset = off;
756
757 if(writing){
758 if(off == 0 && clump == arena->memstats.clumps-1)
759 mode = OWRITE;
760 else
761 mode = ORDWR;
762 }else
763 mode = OREAD;
764
765 cib->data = getdblock(arena->part,
766 arena->base + arena->size - (block + 1) * arena->blocksize, mode);
767 if(cib->data == nil)
768 return nil;
769 return cib;
770 }
771
772 static void
putcib(Arena * arena,CIBlock * cib)773 putcib(Arena *arena, CIBlock *cib)
774 {
775 USED(arena);
776
777 putdblock(cib->data);
778 cib->data = nil;
779 }
780
781
782 /*
783 * For index entry readahead purposes, the arenas are
784 * broken into smaller subpieces, called clump info groups
785 * or cigs. Each cig has ArenaCIGSize clumps (ArenaCIGSize
786 * is chosen to make the index entries take up about half
787 * a megabyte). The index entries do not contain enough
788 * information to determine what the clump index is for
789 * a given address in an arena. That info is needed both for
790 * figuring out which clump group an address belongs to
791 * and for prefetching a clump group's index entries from
792 * the arena table of contents. The first time clump groups
793 * are accessed, we scan the entire arena table of contents
794 * (which might be 10s of megabytes), recording the data
795 * offset of each clump group.
796 */
797
798 /*
799 * load clump info group information by scanning entire toc.
800 */
801 static void
loadcig(Arena * arena)802 loadcig(Arena *arena)
803 {
804 u32int i, j, ncig, nci;
805 ArenaCIG *cig;
806 ClumpInfo *ci;
807 u64int offset;
808 int ms;
809
810 if(arena->cig || arena->ncig < 0)
811 return;
812
813 // fprint(2, "loadcig %s\n", arena->name);
814
815 ncig = (arena->memstats.clumps+ArenaCIGSize-1) / ArenaCIGSize;
816 if(ncig == 0){
817 arena->cig = vtmalloc(1);
818 arena->ncig = 0;
819 return;
820 }
821
822 ms = msec();
823 cig = vtmalloc(ncig*sizeof cig[0]);
824 ci = vtmalloc(ArenaCIGSize*sizeof ci[0]);
825 offset = 0;
826 for(i=0; i<ncig; i++){
827 nci = readclumpinfos(arena, i*ArenaCIGSize, ci, ArenaCIGSize);
828 cig[i].offset = offset;
829 for(j=0; j<nci; j++)
830 offset += ClumpSize + ci[j].size;
831 if(nci < ArenaCIGSize){
832 if(i != ncig-1){
833 vtfree(ci);
834 vtfree(cig);
835 arena->ncig = -1;
836 fprint(2, "loadcig %s: got %ud cigs, expected %ud\n", arena->name, i+1, ncig);
837 goto out;
838 }
839 }
840 }
841 vtfree(ci);
842
843 arena->ncig = ncig;
844 arena->cig = cig;
845
846 out:
847 ms = msec() - ms;
848 addstat2(StatCigLoad, 1, StatCigLoadTime, ms);
849 }
850
851 /*
852 * convert arena address into arena group + data boundaries.
853 */
854 int
arenatog(Arena * arena,u64int addr,u64int * gstart,u64int * glimit,int * g)855 arenatog(Arena *arena, u64int addr, u64int *gstart, u64int *glimit, int *g)
856 {
857 int r, l, m;
858
859 qlock(&arena->lock);
860 if(arena->cig == nil)
861 loadcig(arena);
862 if(arena->cig == nil || arena->ncig == 0){
863 qunlock(&arena->lock);
864 return -1;
865 }
866
867 l = 1;
868 r = arena->ncig - 1;
869 while(l <= r){
870 m = (r + l) / 2;
871 if(arena->cig[m].offset <= addr)
872 l = m + 1;
873 else
874 r = m - 1;
875 }
876 l--;
877
878 *g = l;
879 *gstart = arena->cig[l].offset;
880 if(l+1 < arena->ncig)
881 *glimit = arena->cig[l+1].offset;
882 else
883 *glimit = arena->memstats.used;
884 qunlock(&arena->lock);
885 return 0;
886 }
887
888 /*
889 * load the clump info for group g into the index entries.
890 */
891 int
asumload(Arena * arena,int g,IEntry * entries,int nentries)892 asumload(Arena *arena, int g, IEntry *entries, int nentries)
893 {
894 int i, base, limit;
895 u64int addr;
896 ClumpInfo ci;
897 IEntry *ie;
898
899 if(nentries < ArenaCIGSize){
900 fprint(2, "asking for too few entries\n");
901 return -1;
902 }
903
904 qlock(&arena->lock);
905 if(arena->cig == nil)
906 loadcig(arena);
907 if(arena->cig == nil || arena->ncig == 0 || g >= arena->ncig){
908 qunlock(&arena->lock);
909 return -1;
910 }
911
912 addr = 0;
913 base = g*ArenaCIGSize;
914 limit = base + ArenaCIGSize;
915 if(base > arena->memstats.clumps)
916 base = arena->memstats.clumps;
917 ie = entries;
918 for(i=base; i<limit; i++){
919 if(readclumpinfo(arena, i, &ci) < 0)
920 break;
921 if(ci.type != VtCorruptType){
922 scorecp(ie->score, ci.score);
923 ie->ia.type = ci.type;
924 ie->ia.size = ci.uncsize;
925 ie->ia.blocks = (ci.size + ClumpSize + (1<<ABlockLog) - 1) >> ABlockLog;
926 ie->ia.addr = addr;
927 ie++;
928 }
929 addr += ClumpSize + ci.size;
930 }
931 qunlock(&arena->lock);
932 return ie - entries;
933 }
934