1 /*- 2 * Copyright (c) 1990 The Regents of the University of California. 3 * All rights reserved. 4 * 5 * %sccs.include.redist.c% 6 */ 7 8 #if defined(LIBC_SCCS) && !defined(lint) 9 static char sccsid[] = "@(#)mpool.c 5.5 (Berkeley) 02/19/93"; 10 #endif /* LIBC_SCCS and not lint */ 11 12 #include <sys/param.h> 13 #include <sys/stat.h> 14 15 #include <errno.h> 16 #include <stdio.h> 17 #include <stdlib.h> 18 #include <string.h> 19 #include <unistd.h> 20 21 #include <db.h> 22 #define __MPOOLINTERFACE_PRIVATE 23 #include "mpool.h" 24 25 static BKT *mpool_bkt __P((MPOOL *)); 26 static BKT *mpool_look __P((MPOOL *, pgno_t)); 27 static int mpool_write __P((MPOOL *, BKT *)); 28 #ifdef DEBUG 29 static void err __P((const char *fmt, ...)); 30 #endif 31 32 /* 33 * MPOOL_OPEN -- initialize a memory pool. 34 * 35 * Parameters: 36 * key: Shared buffer key. 37 * fd: File descriptor. 38 * pagesize: File page size. 39 * maxcache: Max number of cached pages. 40 * 41 * Returns: 42 * MPOOL pointer, NULL on error. 43 */ 44 MPOOL * 45 mpool_open(key, fd, pagesize, maxcache) 46 DBT *key; 47 int fd; 48 pgno_t pagesize, maxcache; 49 { 50 struct stat sb; 51 MPOOL *mp; 52 int entry; 53 54 if (fstat(fd, &sb)) 55 return (NULL); 56 /* XXX 57 * We should only set st_size to 0 for pipes -- 4.4BSD has the fix so 58 * that stat(2) returns true for ISSOCK on pipes. Until then, this is 59 * fairly close. 60 */ 61 if (!S_ISREG(sb.st_mode)) { 62 errno = ESPIPE; 63 return (NULL); 64 } 65 66 if ((mp = malloc(sizeof(MPOOL))) == NULL) 67 return (NULL); 68 mp->free.cnext = mp->free.cprev = (BKT *)&mp->free; 69 mp->lru.cnext = mp->lru.cprev = (BKT *)&mp->lru; 70 for (entry = 0; entry < HASHSIZE; ++entry) 71 mp->hashtable[entry].hnext = mp->hashtable[entry].hprev = 72 mp->hashtable[entry].cnext = mp->hashtable[entry].cprev = 73 (BKT *)&mp->hashtable[entry]; 74 mp->curcache = 0; 75 mp->maxcache = maxcache; 76 mp->pagesize = pagesize; 77 mp->npages = sb.st_size / pagesize; 78 mp->fd = fd; 79 mp->pgcookie = NULL; 80 mp->pgin = mp->pgout = NULL; 81 82 #ifdef STATISTICS 83 mp->cachehit = mp->cachemiss = mp->pagealloc = mp->pageflush = 84 mp->pageget = mp->pagenew = mp->pageput = mp->pageread = 85 mp->pagewrite = 0; 86 #endif 87 return (mp); 88 } 89 90 /* 91 * MPOOL_FILTER -- initialize input/output filters. 92 * 93 * Parameters: 94 * pgin: Page in conversion routine. 95 * pgout: Page out conversion routine. 96 * pgcookie: Cookie for page in/out routines. 97 */ 98 void 99 mpool_filter(mp, pgin, pgout, pgcookie) 100 MPOOL *mp; 101 void (*pgin) __P((void *, pgno_t, void *)); 102 void (*pgout) __P((void *, pgno_t, void *)); 103 void *pgcookie; 104 { 105 mp->pgin = pgin; 106 mp->pgout = pgout; 107 mp->pgcookie = pgcookie; 108 } 109 110 /* 111 * MPOOL_NEW -- get a new page 112 * 113 * Parameters: 114 * mp: mpool cookie 115 * pgnoadddr: place to store new page number 116 * Returns: 117 * RET_ERROR, RET_SUCCESS 118 */ 119 void * 120 mpool_new(mp, pgnoaddr) 121 MPOOL *mp; 122 pgno_t *pgnoaddr; 123 { 124 BKT *b; 125 BKTHDR *hp; 126 127 #ifdef STATISTICS 128 ++mp->pagenew; 129 #endif 130 /* 131 * Get a BKT from the cache. Assign a new page number, attach it to 132 * the hash and lru chains and return. 133 */ 134 if ((b = mpool_bkt(mp)) == NULL) 135 return (NULL); 136 *pgnoaddr = b->pgno = mp->npages++; 137 b->flags = MPOOL_PINNED; 138 inshash(b, b->pgno); 139 inschain(b, &mp->lru); 140 return (b->page); 141 } 142 143 /* 144 * MPOOL_GET -- get a page from the pool 145 * 146 * Parameters: 147 * mp: mpool cookie 148 * pgno: page number 149 * flags: not used 150 * 151 * Returns: 152 * RET_ERROR, RET_SUCCESS 153 */ 154 void * 155 mpool_get(mp, pgno, flags) 156 MPOOL *mp; 157 pgno_t pgno; 158 u_int flags; /* XXX not used? */ 159 { 160 BKT *b; 161 BKTHDR *hp; 162 off_t off; 163 int nr; 164 165 /* 166 * If asking for a specific page that is already in the cache, find 167 * it and return it. 168 */ 169 if (b = mpool_look(mp, pgno)) { 170 #ifdef STATISTICS 171 ++mp->pageget; 172 #endif 173 #ifdef DEBUG 174 if (b->flags & MPOOL_PINNED) 175 err("mpool_get: page %d already pinned", b->pgno); 176 #endif 177 rmchain(b); 178 inschain(b, &mp->lru); 179 b->flags |= MPOOL_PINNED; 180 return (b->page); 181 } 182 183 /* Not allowed to retrieve a non-existent page. */ 184 if (pgno >= mp->npages) { 185 errno = EINVAL; 186 return (NULL); 187 } 188 189 /* Get a page from the cache. */ 190 if ((b = mpool_bkt(mp)) == NULL) 191 return (NULL); 192 b->pgno = pgno; 193 b->flags = MPOOL_PINNED; 194 195 #ifdef STATISTICS 196 ++mp->pageread; 197 #endif 198 /* Read in the contents. */ 199 off = mp->pagesize * pgno; 200 if (lseek(mp->fd, off, SEEK_SET) != off) 201 return (NULL); 202 if ((nr = read(mp->fd, b->page, mp->pagesize)) != mp->pagesize) { 203 if (nr >= 0) 204 errno = EFTYPE; 205 return (NULL); 206 } 207 if (mp->pgin) 208 (mp->pgin)(mp->pgcookie, b->pgno, b->page); 209 210 inshash(b, b->pgno); 211 inschain(b, &mp->lru); 212 #ifdef STATISTICS 213 ++mp->pageget; 214 #endif 215 return (b->page); 216 } 217 218 /* 219 * MPOOL_PUT -- return a page to the pool 220 * 221 * Parameters: 222 * mp: mpool cookie 223 * page: page pointer 224 * pgno: page number 225 * 226 * Returns: 227 * RET_ERROR, RET_SUCCESS 228 */ 229 int 230 mpool_put(mp, page, flags) 231 MPOOL *mp; 232 void *page; 233 u_int flags; 234 { 235 BKT *baddr; 236 #ifdef DEBUG 237 BKT *b; 238 #endif 239 240 #ifdef STATISTICS 241 ++mp->pageput; 242 #endif 243 baddr = (BKT *)((char *)page - sizeof(BKT)); 244 #ifdef DEBUG 245 if (!(baddr->flags & MPOOL_PINNED)) 246 err("mpool_put: page %d not pinned", b->pgno); 247 for (b = mp->lru.cnext; b != (BKT *)&mp->lru; b = b->cnext) { 248 if (b == (BKT *)&mp->lru) 249 err("mpool_put: %0x: bad address", baddr); 250 if (b == baddr) 251 break; 252 } 253 #endif 254 baddr->flags &= ~MPOOL_PINNED; 255 baddr->flags |= flags & MPOOL_DIRTY; 256 return (RET_SUCCESS); 257 } 258 259 /* 260 * MPOOL_CLOSE -- close the buffer pool 261 * 262 * Parameters: 263 * mp: mpool cookie 264 * 265 * Returns: 266 * RET_ERROR, RET_SUCCESS 267 */ 268 int 269 mpool_close(mp) 270 MPOOL *mp; 271 { 272 BKT *b, *next; 273 274 /* Free up any space allocated to the lru pages. */ 275 for (b = mp->lru.cprev; b != (BKT *)&mp->lru; b = next) { 276 next = b->cprev; 277 free(b); 278 } 279 free(mp); 280 return (RET_SUCCESS); 281 } 282 283 /* 284 * MPOOL_SYNC -- sync the file to disk. 285 * 286 * Parameters: 287 * mp: mpool cookie 288 * 289 * Returns: 290 * RET_ERROR, RET_SUCCESS 291 */ 292 int 293 mpool_sync(mp) 294 MPOOL *mp; 295 { 296 BKT *b; 297 298 for (b = mp->lru.cprev; b != (BKT *)&mp->lru; b = b->cprev) 299 if (b->flags & MPOOL_DIRTY && mpool_write(mp, b) == RET_ERROR) 300 return (RET_ERROR); 301 return (fsync(mp->fd) ? RET_ERROR : RET_SUCCESS); 302 } 303 304 /* 305 * MPOOL_BKT -- get/create a BKT from the cache 306 * 307 * Parameters: 308 * mp: mpool cookie 309 * 310 * Returns: 311 * NULL on failure and a pointer to the BKT on success 312 */ 313 static BKT * 314 mpool_bkt(mp) 315 MPOOL *mp; 316 { 317 BKT *b; 318 319 if (mp->curcache < mp->maxcache) 320 goto new; 321 322 /* 323 * If the cache is maxxed out, search the lru list for a buffer we 324 * can flush. If we find one, write it if necessary and take it off 325 * any lists. If we don't find anything we grow the cache anyway. 326 * The cache never shrinks. 327 */ 328 for (b = mp->lru.cprev; b != (BKT *)&mp->lru; b = b->cprev) 329 if (!(b->flags & MPOOL_PINNED)) { 330 if (b->flags & MPOOL_DIRTY && 331 mpool_write(mp, b) == RET_ERROR) 332 return (NULL); 333 rmhash(b); 334 rmchain(b); 335 #ifdef STATISTICS 336 ++mp->pageflush; 337 #endif 338 #ifdef DEBUG 339 { 340 void *spage; 341 spage = b->page; 342 memset(b, 0xff, sizeof(BKT) + mp->pagesize); 343 b->page = spage; 344 } 345 #endif 346 return (b); 347 } 348 349 new: if ((b = malloc(sizeof(BKT) + mp->pagesize)) == NULL) 350 return (NULL); 351 #ifdef STATISTICS 352 ++mp->pagealloc; 353 #endif 354 #ifdef DEBUG 355 memset(b, 0xff, sizeof(BKT) + mp->pagesize); 356 #endif 357 b->page = (char *)b + sizeof(BKT); 358 ++mp->curcache; 359 return (b); 360 } 361 362 /* 363 * MPOOL_WRITE -- sync a page to disk 364 * 365 * Parameters: 366 * mp: mpool cookie 367 * 368 * Returns: 369 * RET_ERROR, RET_SUCCESS 370 */ 371 static int 372 mpool_write(mp, b) 373 MPOOL *mp; 374 BKT *b; 375 { 376 off_t off; 377 378 if (mp->pgout) 379 (mp->pgout)(mp->pgcookie, b->pgno, b->page); 380 381 #ifdef STATISTICS 382 ++mp->pagewrite; 383 #endif 384 off = mp->pagesize * b->pgno; 385 if (lseek(mp->fd, off, SEEK_SET) != off) 386 return (RET_ERROR); 387 if (write(mp->fd, b->page, mp->pagesize) != mp->pagesize) 388 return (RET_ERROR); 389 b->flags &= ~MPOOL_DIRTY; 390 return (RET_SUCCESS); 391 } 392 393 /* 394 * MPOOL_LOOK -- lookup a page 395 * 396 * Parameters: 397 * mp: mpool cookie 398 * pgno: page number 399 * 400 * Returns: 401 * NULL on failure and a pointer to the BKT on success 402 */ 403 static BKT * 404 mpool_look(mp, pgno) 405 MPOOL *mp; 406 pgno_t pgno; 407 { 408 register BKT *b; 409 register BKTHDR *tb; 410 411 /* XXX 412 * If find the buffer, put it first on the hash chain so can 413 * find it again quickly. 414 */ 415 tb = &mp->hashtable[HASHKEY(pgno)]; 416 for (b = tb->hnext; b != (BKT *)tb; b = b->hnext) 417 if (b->pgno == pgno) { 418 #ifdef STATISTICS 419 ++mp->cachehit; 420 #endif 421 return (b); 422 } 423 #ifdef STATISTICS 424 ++mp->cachemiss; 425 #endif 426 return (NULL); 427 } 428 429 #ifdef STATISTICS 430 /* 431 * MPOOL_STAT -- cache statistics 432 * 433 * Parameters: 434 * mp: mpool cookie 435 */ 436 void 437 mpool_stat(mp) 438 MPOOL *mp; 439 { 440 BKT *b; 441 int cnt; 442 char *sep; 443 444 (void)fprintf(stderr, "%lu pages in the file\n", mp->npages); 445 (void)fprintf(stderr, 446 "page size %lu, cacheing %lu pages of %lu page max cache\n", 447 mp->pagesize, mp->curcache, mp->maxcache); 448 (void)fprintf(stderr, "%lu page puts, %lu page gets, %lu page new\n", 449 mp->pageput, mp->pageget, mp->pagenew); 450 (void)fprintf(stderr, "%lu page allocs, %lu page flushes\n", 451 mp->pagealloc, mp->pageflush); 452 if (mp->cachehit + mp->cachemiss) 453 (void)fprintf(stderr, 454 "%.0f%% cache hit rate (%lu hits, %lu misses)\n", 455 ((double)mp->cachehit / (mp->cachehit + mp->cachemiss)) 456 * 100, mp->cachehit, mp->cachemiss); 457 (void)fprintf(stderr, "%lu page reads, %lu page writes\n", 458 mp->pageread, mp->pagewrite); 459 460 sep = ""; 461 cnt = 0; 462 for (b = mp->lru.cnext; b != (BKT *)&mp->lru; b = b->cnext) { 463 (void)fprintf(stderr, "%s%d", sep, b->pgno); 464 if (b->flags & MPOOL_DIRTY) 465 (void)fprintf(stderr, "d"); 466 if (b->flags & MPOOL_PINNED) 467 (void)fprintf(stderr, "P"); 468 if (++cnt == 10) { 469 sep = "\n"; 470 cnt = 0; 471 } else 472 sep = ", "; 473 474 } 475 (void)fprintf(stderr, "\n"); 476 } 477 #endif 478 479 #ifdef DEBUG 480 #if __STDC__ 481 #include <stdarg.h> 482 #else 483 #include <varargs.h> 484 #endif 485 486 static void 487 #if __STDC__ 488 err(const char *fmt, ...) 489 #else 490 err(fmt, va_alist) 491 char *fmt; 492 va_dcl 493 #endif 494 { 495 va_list ap; 496 #if __STDC__ 497 va_start(ap, fmt); 498 #else 499 va_start(ap); 500 #endif 501 (void)vfprintf(stderr, fmt, ap); 502 va_end(ap); 503 (void)fprintf(stderr, "\n"); 504 abort(); 505 /* NOTREACHED */ 506 } 507 #endif 508