1 /*- 2 * Copyright (c) 1990, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * %sccs.include.redist.c% 6 */ 7 8 #if defined(LIBC_SCCS) && !defined(lint) 9 static char sccsid[] = "@(#)mpool.c 8.2 (Berkeley) 02/21/94"; 10 #endif /* LIBC_SCCS and not lint */ 11 12 #include <sys/param.h> 13 #include <sys/stat.h> 14 15 #include <errno.h> 16 #include <stdio.h> 17 #include <stdlib.h> 18 #include <string.h> 19 #include <unistd.h> 20 21 #include <db.h> 22 #define __MPOOLINTERFACE_PRIVATE 23 #include "mpool.h" 24 25 static BKT *mpool_bkt __P((MPOOL *)); 26 static BKT *mpool_look __P((MPOOL *, pgno_t)); 27 static int mpool_write __P((MPOOL *, BKT *)); 28 #ifdef DEBUG 29 static void __mpoolerr __P((const char *fmt, ...)); 30 #endif 31 32 /* 33 * MPOOL_OPEN -- initialize a memory pool. 34 * 35 * Parameters: 36 * key: Shared buffer key. 37 * fd: File descriptor. 38 * pagesize: File page size. 39 * maxcache: Max number of cached pages. 40 * 41 * Returns: 42 * MPOOL pointer, NULL on error. 43 */ 44 MPOOL * 45 mpool_open(key, fd, pagesize, maxcache) 46 DBT *key; 47 int fd; 48 pgno_t pagesize, maxcache; 49 { 50 struct stat sb; 51 MPOOL *mp; 52 int entry; 53 54 if (fstat(fd, &sb)) 55 return (NULL); 56 /* XXX 57 * We should only set st_size to 0 for pipes -- 4.4BSD has the fix so 58 * that stat(2) returns true for ISSOCK on pipes. Until then, this is 59 * fairly close. 60 */ 61 if (!S_ISREG(sb.st_mode)) { 62 errno = ESPIPE; 63 return (NULL); 64 } 65 66 if ((mp = (MPOOL *)malloc(sizeof(MPOOL))) == NULL) 67 return (NULL); 68 mp->free.cnext = mp->free.cprev = (BKT *)&mp->free; 69 mp->lru.cnext = mp->lru.cprev = (BKT *)&mp->lru; 70 for (entry = 0; entry < HASHSIZE; ++entry) 71 mp->hashtable[entry].hnext = mp->hashtable[entry].hprev = 72 mp->hashtable[entry].cnext = mp->hashtable[entry].cprev = 73 (BKT *)&mp->hashtable[entry]; 74 mp->curcache = 0; 75 mp->maxcache = maxcache; 76 mp->pagesize = pagesize; 77 mp->npages = sb.st_size / pagesize; 78 mp->fd = fd; 79 mp->pgcookie = NULL; 80 mp->pgin = mp->pgout = NULL; 81 82 #ifdef STATISTICS 83 mp->cachehit = mp->cachemiss = mp->pagealloc = mp->pageflush = 84 mp->pageget = mp->pagenew = mp->pageput = mp->pageread = 85 mp->pagewrite = 0; 86 #endif 87 return (mp); 88 } 89 90 /* 91 * MPOOL_FILTER -- initialize input/output filters. 92 * 93 * Parameters: 94 * pgin: Page in conversion routine. 95 * pgout: Page out conversion routine. 96 * pgcookie: Cookie for page in/out routines. 97 */ 98 void 99 mpool_filter(mp, pgin, pgout, pgcookie) 100 MPOOL *mp; 101 void (*pgin) __P((void *, pgno_t, void *)); 102 void (*pgout) __P((void *, pgno_t, void *)); 103 void *pgcookie; 104 { 105 mp->pgin = pgin; 106 mp->pgout = pgout; 107 mp->pgcookie = pgcookie; 108 } 109 110 /* 111 * MPOOL_NEW -- get a new page 112 * 113 * Parameters: 114 * mp: mpool cookie 115 * pgnoadddr: place to store new page number 116 * Returns: 117 * RET_ERROR, RET_SUCCESS 118 */ 119 void * 120 mpool_new(mp, pgnoaddr) 121 MPOOL *mp; 122 pgno_t *pgnoaddr; 123 { 124 BKT *b; 125 BKTHDR *hp; 126 127 #ifdef STATISTICS 128 ++mp->pagenew; 129 #endif 130 /* 131 * Get a BKT from the cache. Assign a new page number, attach it to 132 * the hash and lru chains and return. 133 */ 134 if ((b = mpool_bkt(mp)) == NULL) 135 return (NULL); 136 *pgnoaddr = b->pgno = mp->npages++; 137 b->flags = MPOOL_PINNED; 138 inshash(b, b->pgno); 139 inschain(b, &mp->lru); 140 return (b->page); 141 } 142 143 /* 144 * MPOOL_GET -- get a page from the pool 145 * 146 * Parameters: 147 * mp: mpool cookie 148 * pgno: page number 149 * flags: not used 150 * 151 * Returns: 152 * RET_ERROR, RET_SUCCESS 153 */ 154 void * 155 mpool_get(mp, pgno, flags) 156 MPOOL *mp; 157 pgno_t pgno; 158 u_int flags; /* XXX not used? */ 159 { 160 BKT *b; 161 BKTHDR *hp; 162 off_t off; 163 int nr; 164 165 /* 166 * If asking for a specific page that is already in the cache, find 167 * it and return it. 168 */ 169 if (b = mpool_look(mp, pgno)) { 170 #ifdef STATISTICS 171 ++mp->pageget; 172 #endif 173 #ifdef DEBUG 174 if (b->flags & MPOOL_PINNED) 175 __mpoolerr("mpool_get: page %d already pinned", 176 b->pgno); 177 #endif 178 rmchain(b); 179 inschain(b, &mp->lru); 180 b->flags |= MPOOL_PINNED; 181 return (b->page); 182 } 183 184 /* Not allowed to retrieve a non-existent page. */ 185 if (pgno >= mp->npages) { 186 errno = EINVAL; 187 return (NULL); 188 } 189 190 /* Get a page from the cache. */ 191 if ((b = mpool_bkt(mp)) == NULL) 192 return (NULL); 193 b->pgno = pgno; 194 b->flags = MPOOL_PINNED; 195 196 #ifdef STATISTICS 197 ++mp->pageread; 198 #endif 199 /* Read in the contents. */ 200 off = mp->pagesize * pgno; 201 if (lseek(mp->fd, off, SEEK_SET) != off) 202 return (NULL); 203 if ((nr = read(mp->fd, b->page, mp->pagesize)) != mp->pagesize) { 204 if (nr >= 0) 205 errno = EFTYPE; 206 return (NULL); 207 } 208 if (mp->pgin) 209 (mp->pgin)(mp->pgcookie, b->pgno, b->page); 210 211 inshash(b, b->pgno); 212 inschain(b, &mp->lru); 213 #ifdef STATISTICS 214 ++mp->pageget; 215 #endif 216 return (b->page); 217 } 218 219 /* 220 * MPOOL_PUT -- return a page to the pool 221 * 222 * Parameters: 223 * mp: mpool cookie 224 * page: page pointer 225 * pgno: page number 226 * 227 * Returns: 228 * RET_ERROR, RET_SUCCESS 229 */ 230 int 231 mpool_put(mp, page, flags) 232 MPOOL *mp; 233 void *page; 234 u_int flags; 235 { 236 BKT *baddr; 237 #ifdef DEBUG 238 BKT *b; 239 #endif 240 241 #ifdef STATISTICS 242 ++mp->pageput; 243 #endif 244 baddr = (BKT *)((char *)page - sizeof(BKT)); 245 #ifdef DEBUG 246 if (!(baddr->flags & MPOOL_PINNED)) 247 __mpoolerr("mpool_put: page %d not pinned", b->pgno); 248 for (b = mp->lru.cnext; b != (BKT *)&mp->lru; b = b->cnext) { 249 if (b == (BKT *)&mp->lru) 250 __mpoolerr("mpool_put: %0x: bad address", baddr); 251 if (b == baddr) 252 break; 253 } 254 #endif 255 baddr->flags &= ~MPOOL_PINNED; 256 baddr->flags |= flags & MPOOL_DIRTY; 257 return (RET_SUCCESS); 258 } 259 260 /* 261 * MPOOL_CLOSE -- close the buffer pool 262 * 263 * Parameters: 264 * mp: mpool cookie 265 * 266 * Returns: 267 * RET_ERROR, RET_SUCCESS 268 */ 269 int 270 mpool_close(mp) 271 MPOOL *mp; 272 { 273 BKT *b, *next; 274 275 /* Free up any space allocated to the lru pages. */ 276 for (b = mp->lru.cprev; b != (BKT *)&mp->lru; b = next) { 277 next = b->cprev; 278 free(b); 279 } 280 free(mp); 281 return (RET_SUCCESS); 282 } 283 284 /* 285 * MPOOL_SYNC -- sync the file to disk. 286 * 287 * Parameters: 288 * mp: mpool cookie 289 * 290 * Returns: 291 * RET_ERROR, RET_SUCCESS 292 */ 293 int 294 mpool_sync(mp) 295 MPOOL *mp; 296 { 297 BKT *b; 298 299 for (b = mp->lru.cprev; b != (BKT *)&mp->lru; b = b->cprev) 300 if (b->flags & MPOOL_DIRTY && mpool_write(mp, b) == RET_ERROR) 301 return (RET_ERROR); 302 return (fsync(mp->fd) ? RET_ERROR : RET_SUCCESS); 303 } 304 305 /* 306 * MPOOL_BKT -- get/create a BKT from the cache 307 * 308 * Parameters: 309 * mp: mpool cookie 310 * 311 * Returns: 312 * NULL on failure and a pointer to the BKT on success 313 */ 314 static BKT * 315 mpool_bkt(mp) 316 MPOOL *mp; 317 { 318 BKT *b; 319 320 if (mp->curcache < mp->maxcache) 321 goto new; 322 323 /* 324 * If the cache is maxxed out, search the lru list for a buffer we 325 * can flush. If we find one, write it if necessary and take it off 326 * any lists. If we don't find anything we grow the cache anyway. 327 * The cache never shrinks. 328 */ 329 for (b = mp->lru.cprev; b != (BKT *)&mp->lru; b = b->cprev) 330 if (!(b->flags & MPOOL_PINNED)) { 331 if (b->flags & MPOOL_DIRTY && 332 mpool_write(mp, b) == RET_ERROR) 333 return (NULL); 334 rmhash(b); 335 rmchain(b); 336 #ifdef STATISTICS 337 ++mp->pageflush; 338 #endif 339 #ifdef DEBUG 340 { 341 void *spage; 342 spage = b->page; 343 memset(b, 0xff, sizeof(BKT) + mp->pagesize); 344 b->page = spage; 345 } 346 #endif 347 return (b); 348 } 349 350 new: if ((b = (BKT *)malloc(sizeof(BKT) + mp->pagesize)) == NULL) 351 return (NULL); 352 #ifdef STATISTICS 353 ++mp->pagealloc; 354 #endif 355 #ifdef DEBUG 356 memset(b, 0xff, sizeof(BKT) + mp->pagesize); 357 #endif 358 b->page = (char *)b + sizeof(BKT); 359 ++mp->curcache; 360 return (b); 361 } 362 363 /* 364 * MPOOL_WRITE -- sync a page to disk 365 * 366 * Parameters: 367 * mp: mpool cookie 368 * 369 * Returns: 370 * RET_ERROR, RET_SUCCESS 371 */ 372 static int 373 mpool_write(mp, b) 374 MPOOL *mp; 375 BKT *b; 376 { 377 off_t off; 378 379 if (mp->pgout) 380 (mp->pgout)(mp->pgcookie, b->pgno, b->page); 381 382 #ifdef STATISTICS 383 ++mp->pagewrite; 384 #endif 385 off = mp->pagesize * b->pgno; 386 if (lseek(mp->fd, off, SEEK_SET) != off) 387 return (RET_ERROR); 388 if (write(mp->fd, b->page, mp->pagesize) != mp->pagesize) 389 return (RET_ERROR); 390 b->flags &= ~MPOOL_DIRTY; 391 return (RET_SUCCESS); 392 } 393 394 /* 395 * MPOOL_LOOK -- lookup a page 396 * 397 * Parameters: 398 * mp: mpool cookie 399 * pgno: page number 400 * 401 * Returns: 402 * NULL on failure and a pointer to the BKT on success 403 */ 404 static BKT * 405 mpool_look(mp, pgno) 406 MPOOL *mp; 407 pgno_t pgno; 408 { 409 register BKT *b; 410 register BKTHDR *tb; 411 412 /* XXX 413 * If find the buffer, put it first on the hash chain so can 414 * find it again quickly. 415 */ 416 tb = &mp->hashtable[HASHKEY(pgno)]; 417 for (b = tb->hnext; b != (BKT *)tb; b = b->hnext) 418 if (b->pgno == pgno) { 419 #ifdef STATISTICS 420 ++mp->cachehit; 421 #endif 422 return (b); 423 } 424 #ifdef STATISTICS 425 ++mp->cachemiss; 426 #endif 427 return (NULL); 428 } 429 430 #ifdef STATISTICS 431 /* 432 * MPOOL_STAT -- cache statistics 433 * 434 * Parameters: 435 * mp: mpool cookie 436 */ 437 void 438 mpool_stat(mp) 439 MPOOL *mp; 440 { 441 BKT *b; 442 int cnt; 443 char *sep; 444 445 (void)fprintf(stderr, "%lu pages in the file\n", mp->npages); 446 (void)fprintf(stderr, 447 "page size %lu, cacheing %lu pages of %lu page max cache\n", 448 mp->pagesize, mp->curcache, mp->maxcache); 449 (void)fprintf(stderr, "%lu page puts, %lu page gets, %lu page new\n", 450 mp->pageput, mp->pageget, mp->pagenew); 451 (void)fprintf(stderr, "%lu page allocs, %lu page flushes\n", 452 mp->pagealloc, mp->pageflush); 453 if (mp->cachehit + mp->cachemiss) 454 (void)fprintf(stderr, 455 "%.0f%% cache hit rate (%lu hits, %lu misses)\n", 456 ((double)mp->cachehit / (mp->cachehit + mp->cachemiss)) 457 * 100, mp->cachehit, mp->cachemiss); 458 (void)fprintf(stderr, "%lu page reads, %lu page writes\n", 459 mp->pageread, mp->pagewrite); 460 461 sep = ""; 462 cnt = 0; 463 for (b = mp->lru.cnext; b != (BKT *)&mp->lru; b = b->cnext) { 464 (void)fprintf(stderr, "%s%d", sep, b->pgno); 465 if (b->flags & MPOOL_DIRTY) 466 (void)fprintf(stderr, "d"); 467 if (b->flags & MPOOL_PINNED) 468 (void)fprintf(stderr, "P"); 469 if (++cnt == 10) { 470 sep = "\n"; 471 cnt = 0; 472 } else 473 sep = ", "; 474 475 } 476 (void)fprintf(stderr, "\n"); 477 } 478 #endif 479 480 #ifdef DEBUG 481 #if __STDC__ 482 #include <stdarg.h> 483 #else 484 #include <varargs.h> 485 #endif 486 487 static void 488 #if __STDC__ 489 __mpoolerr(const char *fmt, ...) 490 #else 491 __mpoolerr(fmt, va_alist) 492 char *fmt; 493 va_dcl 494 #endif 495 { 496 va_list ap; 497 #if __STDC__ 498 va_start(ap, fmt); 499 #else 500 va_start(ap); 501 #endif 502 (void)vfprintf(stderr, fmt, ap); 503 va_end(ap); 504 (void)fprintf(stderr, "\n"); 505 abort(); 506 /* NOTREACHED */ 507 } 508 #endif 509