1 /* $NetBSD: mpool.c,v 1.23 2016/09/24 21:31:25 christos Exp $ */
2
3 /*-
4 * Copyright (c) 1990, 1993, 1994
5 * The Regents of the University of California. All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the University nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31
32 #if HAVE_NBTOOL_CONFIG_H
33 #include "nbtool_config.h"
34 #endif
35
36 #include <sys/cdefs.h>
37 __RCSID("$NetBSD: mpool.c,v 1.23 2016/09/24 21:31:25 christos Exp $");
38
39 #include "namespace.h"
40 #include <sys/queue.h>
41 #include <sys/stat.h>
42
43 #include <errno.h>
44 #include <stdio.h>
45 #include <stdlib.h>
46 #include <string.h>
47 #include <unistd.h>
48
49 #include <db.h>
50
51 #define __MPOOLINTERFACE_PRIVATE
52 #include <mpool.h>
53
54 #ifdef __weak_alias
55 __weak_alias(mpool_close,_mpool_close)
56 __weak_alias(mpool_filter,_mpool_filter)
57 __weak_alias(mpool_get,_mpool_get)
58 __weak_alias(mpool_new,_mpool_new)
59 __weak_alias(mpool_newf,_mpool_newf)
60 __weak_alias(mpool_open,_mpool_open)
61 __weak_alias(mpool_put,_mpool_put)
62 __weak_alias(mpool_sync,_mpool_sync)
63 #endif
64
65 static BKT *mpool_bkt(MPOOL *);
66 static BKT *mpool_look(MPOOL *, pgno_t);
67 static int mpool_write(MPOOL *, BKT *);
68
69 /*
70 * mpool_open --
71 * Initialize a memory pool.
72 */
73 /*ARGSUSED*/
74 MPOOL *
mpool_open(void * key,int fd,pgno_t pagesize,pgno_t maxcache)75 mpool_open(void *key, int fd, pgno_t pagesize, pgno_t maxcache)
76 {
77 struct stat sb;
78 MPOOL *mp;
79 int entry;
80
81 /*
82 * Get information about the file.
83 *
84 * XXX
85 * We don't currently handle pipes, although we should.
86 */
87 if (fstat(fd, &sb))
88 return NULL;
89 if (!S_ISREG(sb.st_mode)) {
90 errno = ESPIPE;
91 return NULL;
92 }
93
94 /* Allocate and initialize the MPOOL cookie. */
95 if ((mp = calloc(1, sizeof(*mp))) == NULL)
96 return (NULL);
97 TAILQ_INIT(&mp->lqh);
98 for (entry = 0; entry < HASHSIZE; ++entry)
99 TAILQ_INIT(&mp->hqh[entry]);
100 mp->maxcache = maxcache;
101 mp->npages = (pgno_t)(sb.st_size / pagesize);
102 mp->pagesize = pagesize;
103 mp->fd = fd;
104 return mp;
105 }
106
107 /*
108 * mpool_filter --
109 * Initialize input/output filters.
110 */
111 void
mpool_filter(MPOOL * mp,void (* pgin)(void *,pgno_t,void *),void (* pgout)(void *,pgno_t,void *),void * pgcookie)112 mpool_filter(MPOOL *mp, void (*pgin)(void *, pgno_t, void *),
113 void (*pgout)(void *, pgno_t, void *), void *pgcookie)
114 {
115 mp->pgin = pgin;
116 mp->pgout = pgout;
117 mp->pgcookie = pgcookie;
118 }
119
120 /*
121 * mpool_new --
122 * Get a new page of memory.
123 */
124 void *
mpool_newf(MPOOL * mp,pgno_t * pgnoaddr,unsigned int flags)125 mpool_newf(MPOOL *mp, pgno_t *pgnoaddr, unsigned int flags)
126 {
127 struct _hqh *head;
128 BKT *bp;
129
130 if (mp->npages == MAX_PAGE_NUMBER) {
131 (void)fprintf(stderr, "mpool_new: page allocation overflow.\n");
132 abort();
133 }
134 #ifdef STATISTICS
135 ++mp->pagenew;
136 #endif
137 /*
138 * Get a BKT from the cache. Assign a new page number, attach
139 * it to the head of the hash chain, the tail of the lru chain,
140 * and return.
141 */
142 if ((bp = mpool_bkt(mp)) == NULL)
143 return NULL;
144
145 if (flags == MPOOL_PAGE_REQUEST) {
146 mp->npages++;
147 bp->pgno = *pgnoaddr;
148 } else
149 bp->pgno = *pgnoaddr = mp->npages++;
150
151 bp->flags = MPOOL_PINNED | MPOOL_INUSE;
152
153 head = &mp->hqh[HASHKEY(bp->pgno)];
154 TAILQ_INSERT_HEAD(head, bp, hq);
155 TAILQ_INSERT_TAIL(&mp->lqh, bp, q);
156 return bp->page;
157 }
158
159 void *
mpool_new(MPOOL * mp,pgno_t * pgnoaddr)160 mpool_new(MPOOL *mp, pgno_t *pgnoaddr)
161 {
162 return mpool_newf(mp, pgnoaddr, 0);
163 }
164
165 int
mpool_delete(MPOOL * mp,void * page)166 mpool_delete(MPOOL *mp, void *page)
167 {
168 struct _hqh *head;
169 BKT *bp;
170
171 bp = (void *)((char *)page - sizeof(BKT));
172
173 #ifdef DEBUG
174 if (!(bp->flags & MPOOL_PINNED)) {
175 (void)fprintf(stderr,
176 "%s: page %d not pinned\n", __func__, bp->pgno);
177 abort();
178 }
179 #endif
180
181 /* Remove from the hash and lru queues. */
182 head = &mp->hqh[HASHKEY(bp->pgno)];
183 TAILQ_REMOVE(head, bp, hq);
184 TAILQ_REMOVE(&mp->lqh, bp, q);
185
186 free(bp);
187 return RET_SUCCESS;
188 }
189
190 /*
191 * mpool_get
192 * Get a page.
193 */
194 /*ARGSUSED*/
195 void *
mpool_get(MPOOL * mp,pgno_t pgno,unsigned int flags)196 mpool_get(MPOOL *mp, pgno_t pgno, unsigned int flags)
197 {
198 struct _hqh *head;
199 BKT *bp;
200 off_t off;
201 ssize_t nr;
202
203 /* Check for attempt to retrieve a non-existent page. */
204 if (pgno >= mp->npages) {
205 errno = EINVAL;
206 return NULL;
207 }
208
209 #ifdef STATISTICS
210 ++mp->pageget;
211 #endif
212
213 /* Check for a page that is cached. */
214 if ((bp = mpool_look(mp, pgno)) != NULL) {
215 #ifdef DEBUG
216 if (!(flags & MPOOL_IGNOREPIN) && bp->flags & MPOOL_PINNED) {
217 (void)fprintf(stderr,
218 "mpool_get: page %d already pinned\n", bp->pgno);
219 abort();
220 }
221 #endif
222 /*
223 * Move the page to the head of the hash chain and the tail
224 * of the lru chain.
225 */
226 head = &mp->hqh[HASHKEY(bp->pgno)];
227 TAILQ_REMOVE(head, bp, hq);
228 TAILQ_INSERT_HEAD(head, bp, hq);
229 TAILQ_REMOVE(&mp->lqh, bp, q);
230 TAILQ_INSERT_TAIL(&mp->lqh, bp, q);
231
232 /* Return a pinned page. */
233 if (!(flags & MPOOL_IGNOREPIN))
234 bp->flags |= MPOOL_PINNED;
235 return bp->page;
236 }
237
238 /* Get a page from the cache. */
239 if ((bp = mpool_bkt(mp)) == NULL)
240 return NULL;
241
242 /* Read in the contents. */
243 #ifdef STATISTICS
244 ++mp->pageread;
245 #endif
246 off = mp->pagesize * pgno;
247 if (off / mp->pagesize != pgno) {
248 /* Run past the end of the file, or at least the part we
249 can address without large-file support? */
250 errno = E2BIG;
251 return NULL;
252 }
253
254 if ((nr = pread(mp->fd, bp->page, (size_t)mp->pagesize, off)) != (int)mp->pagesize) {
255 if (nr > 0) {
256 errno = EFTYPE;
257 return NULL;
258 } else if (nr == 0) {
259 /*
260 * A zero-length reads, means you need to create a
261 * new page.
262 */
263 memset(bp->page, 0, mp->pagesize);
264 } else
265 return NULL;
266 }
267
268 /* Set the page number, pin the page. */
269 bp->pgno = pgno;
270 if (!(flags & MPOOL_IGNOREPIN))
271 bp->flags = MPOOL_PINNED;
272 bp->flags |= MPOOL_INUSE;
273
274 /*
275 * Add the page to the head of the hash chain and the tail
276 * of the lru chain.
277 */
278 head = &mp->hqh[HASHKEY(bp->pgno)];
279 TAILQ_INSERT_HEAD(head, bp, hq);
280 TAILQ_INSERT_TAIL(&mp->lqh, bp, q);
281
282 /* Run through the user's filter. */
283 if (mp->pgin != NULL)
284 (mp->pgin)(mp->pgcookie, bp->pgno, bp->page);
285
286 return bp->page;
287 }
288
289 /*
290 * mpool_put
291 * Return a page.
292 */
293 /*ARGSUSED*/
294 int
mpool_put(MPOOL * mp,void * page,u_int flags)295 mpool_put(MPOOL *mp, void *page, u_int flags)
296 {
297 BKT *bp;
298
299 #ifdef STATISTICS
300 ++mp->pageput;
301 #endif
302 bp = (void *)((intptr_t)page - sizeof(BKT));
303 #ifdef DEBUG
304 if (!(bp->flags & MPOOL_PINNED)) {
305 (void)fprintf(stderr,
306 "mpool_put: page %d not pinned\n", bp->pgno);
307 abort();
308 }
309 #endif
310 bp->flags &= ~MPOOL_PINNED;
311 if (flags & MPOOL_DIRTY)
312 bp->flags |= flags & MPOOL_DIRTY;
313 return (RET_SUCCESS);
314 }
315
316 /*
317 * mpool_close
318 * Close the buffer pool.
319 */
320 int
mpool_close(MPOOL * mp)321 mpool_close(MPOOL *mp)
322 {
323 BKT *bp;
324
325 /* Free up any space allocated to the lru pages. */
326 while (!TAILQ_EMPTY(&mp->lqh)) {
327 bp = TAILQ_FIRST(&mp->lqh);
328 TAILQ_REMOVE(&mp->lqh, bp, q);
329 free(bp);
330 }
331
332 /* Free the MPOOL cookie. */
333 free(mp);
334 return RET_SUCCESS;
335 }
336
337 /*
338 * mpool_sync
339 * Sync the pool to disk.
340 */
341 int
mpool_sync(MPOOL * mp)342 mpool_sync(MPOOL *mp)
343 {
344 BKT *bp;
345
346 /* Walk the lru chain, flushing any dirty pages to disk. */
347 TAILQ_FOREACH(bp, &mp->lqh, q)
348 if (bp->flags & MPOOL_DIRTY &&
349 mpool_write(mp, bp) == RET_ERROR)
350 return RET_ERROR;
351
352 /* Sync the file descriptor. */
353 return fsync(mp->fd) ? RET_ERROR : RET_SUCCESS;
354 }
355
356 /*
357 * mpool_bkt
358 * Get a page from the cache (or create one).
359 */
360 static BKT *
mpool_bkt(MPOOL * mp)361 mpool_bkt(MPOOL *mp)
362 {
363 struct _hqh *head;
364 BKT *bp;
365
366 /* If under the max cached, always create a new page. */
367 if (mp->curcache < mp->maxcache)
368 goto new;
369
370 /*
371 * If the cache is max'd out, walk the lru list for a buffer we
372 * can flush. If we find one, write it (if necessary) and take it
373 * off any lists. If we don't find anything we grow the cache anyway.
374 * The cache never shrinks.
375 */
376 TAILQ_FOREACH(bp, &mp->lqh, q)
377 if (!(bp->flags & MPOOL_PINNED)) {
378 /* Flush if dirty. */
379 if (bp->flags & MPOOL_DIRTY &&
380 mpool_write(mp, bp) == RET_ERROR)
381 return NULL;
382 #ifdef STATISTICS
383 ++mp->pageflush;
384 #endif
385 /* Remove from the hash and lru queues. */
386 head = &mp->hqh[HASHKEY(bp->pgno)];
387 TAILQ_REMOVE(head, bp, hq);
388 TAILQ_REMOVE(&mp->lqh, bp, q);
389 #ifdef DEBUG
390 {
391 void *spage = bp->page;
392 (void)memset(bp, 0xff,
393 (size_t)(sizeof(BKT) + mp->pagesize));
394 bp->page = spage;
395 }
396 #endif
397 return bp;
398 }
399
400 new: if ((bp = calloc(1, (size_t)(sizeof(BKT) + mp->pagesize))) == NULL)
401 return NULL;
402 #ifdef STATISTICS
403 ++mp->pagealloc;
404 #endif
405 #if defined(DEBUG) || defined(PURIFY)
406 (void)memset(bp, 0xff, (size_t)(sizeof(BKT) + mp->pagesize));
407 #endif
408 bp->page = (void *)((intptr_t)bp + sizeof(BKT));
409 ++mp->curcache;
410 return bp;
411 }
412
413 /*
414 * mpool_write
415 * Write a page to disk.
416 */
417 static int
mpool_write(MPOOL * mp,BKT * bp)418 mpool_write(MPOOL *mp, BKT *bp)
419 {
420 off_t off;
421
422 #ifdef STATISTICS
423 ++mp->pagewrite;
424 #endif
425
426 /* Run through the user's filter. */
427 if (mp->pgout)
428 (mp->pgout)(mp->pgcookie, bp->pgno, bp->page);
429
430 off = mp->pagesize * bp->pgno;
431 if (off / mp->pagesize != bp->pgno) {
432 /* Run past the end of the file, or at least the part we
433 can address without large-file support? */
434 errno = E2BIG;
435 return RET_ERROR;
436 }
437
438 if (pwrite(mp->fd, bp->page, (size_t)mp->pagesize, off) !=
439 (ssize_t)mp->pagesize)
440 return RET_ERROR;
441
442 /*
443 * Re-run through the input filter since this page may soon be
444 * accessed via the cache, and whatever the user's output filter
445 * did may screw things up if we don't let the input filter
446 * restore the in-core copy.
447 */
448 if (mp->pgin)
449 (mp->pgin)(mp->pgcookie, bp->pgno, bp->page);
450
451 bp->flags &= ~MPOOL_DIRTY;
452 return RET_SUCCESS;
453 }
454
455 /*
456 * mpool_look
457 * Lookup a page in the cache.
458 */
459 static BKT *
mpool_look(MPOOL * mp,pgno_t pgno)460 mpool_look(MPOOL *mp, pgno_t pgno)
461 {
462 struct _hqh *head;
463 BKT *bp;
464
465 head = &mp->hqh[HASHKEY(pgno)];
466 TAILQ_FOREACH(bp, head, hq)
467 if (bp->pgno == pgno) {
468 #ifdef STATISTICS
469 ++mp->cachehit;
470 #endif
471 return bp;
472 }
473 #ifdef STATISTICS
474 ++mp->cachemiss;
475 #endif
476 return NULL;
477 }
478
479 #ifdef STATISTICS
480 /*
481 * mpool_stat
482 * Print out cache statistics.
483 */
484 void
mpool_stat(mp)485 mpool_stat(mp)
486 MPOOL *mp;
487 {
488 BKT *bp;
489 int cnt;
490 const char *sep;
491
492 (void)fprintf(stderr, "%lu pages in the file\n", (u_long)mp->npages);
493 (void)fprintf(stderr,
494 "page size %lu, cacheing %lu pages of %lu page max cache\n",
495 (u_long)mp->pagesize, (u_long)mp->curcache, (u_long)mp->maxcache);
496 (void)fprintf(stderr, "%lu page puts, %lu page gets, %lu page new\n",
497 mp->pageput, mp->pageget, mp->pagenew);
498 (void)fprintf(stderr, "%lu page allocs, %lu page flushes\n",
499 mp->pagealloc, mp->pageflush);
500 if (mp->cachehit + mp->cachemiss)
501 (void)fprintf(stderr,
502 "%.0f%% cache hit rate (%lu hits, %lu misses)\n",
503 ((double)mp->cachehit / (mp->cachehit + mp->cachemiss))
504 * 100, mp->cachehit, mp->cachemiss);
505 (void)fprintf(stderr, "%lu page reads, %lu page writes\n",
506 mp->pageread, mp->pagewrite);
507
508 sep = "";
509 cnt = 0;
510 TAILQ_FOREACH(bp, &mp->lqh, q) {
511 (void)fprintf(stderr, "%s%d", sep, bp->pgno);
512 if (bp->flags & MPOOL_DIRTY)
513 (void)fprintf(stderr, "d");
514 if (bp->flags & MPOOL_PINNED)
515 (void)fprintf(stderr, "P");
516 if (++cnt == 10) {
517 sep = "\n";
518 cnt = 0;
519 } else
520 sep = ", ";
521
522 }
523 (void)fprintf(stderr, "\n");
524 }
525 #endif
526