xref: /netbsd/lib/libc/db/mpool/mpool.c (revision bf9ec67e)
1 /*	$NetBSD: mpool.c,v 1.13 2002/01/22 20:41:22 thorpej Exp $	*/
2 
3 /*-
4  * Copyright (c) 1990, 1993, 1994
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. All advertising materials mentioning features or use of this software
16  *    must display the following acknowledgement:
17  *	This product includes software developed by the University of
18  *	California, Berkeley and its contributors.
19  * 4. Neither the name of the University nor the names of its contributors
20  *    may be used to endorse or promote products derived from this software
21  *    without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  */
35 
36 #include <sys/cdefs.h>
37 #if defined(LIBC_SCCS) && !defined(lint)
38 #if 0
39 static char sccsid[] = "@(#)mpool.c	8.5 (Berkeley) 7/26/94";
40 #else
41 __RCSID("$NetBSD: mpool.c,v 1.13 2002/01/22 20:41:22 thorpej Exp $");
42 #endif
43 #endif /* LIBC_SCCS and not lint */
44 
45 #include "namespace.h"
46 #include <sys/queue.h>
47 #include <sys/stat.h>
48 
49 #include <errno.h>
50 #include <stdio.h>
51 #include <stdlib.h>
52 #include <string.h>
53 #include <unistd.h>
54 
55 #include <db.h>
56 
57 #define	__MPOOLINTERFACE_PRIVATE
58 #include <mpool.h>
59 
60 #ifdef __weak_alias
61 __weak_alias(mpool_close,_mpool_close)
62 __weak_alias(mpool_filter,_mpool_filter)
63 __weak_alias(mpool_get,_mpool_get)
64 __weak_alias(mpool_new,_mpool_new)
65 __weak_alias(mpool_open,_mpool_open)
66 __weak_alias(mpool_put,_mpool_put)
67 __weak_alias(mpool_sync,_mpool_sync)
68 #endif
69 
70 static BKT *mpool_bkt __P((MPOOL *));
71 static BKT *mpool_look __P((MPOOL *, pgno_t));
72 static int  mpool_write __P((MPOOL *, BKT *));
73 
74 /*
75  * mpool_open --
76  *	Initialize a memory pool.
77  */
78 /*ARGSUSED*/
79 MPOOL *
80 mpool_open(key, fd, pagesize, maxcache)
81 	void *key;
82 	int fd;
83 	pgno_t pagesize, maxcache;
84 {
85 	struct stat sb;
86 	MPOOL *mp;
87 	int entry;
88 
89 	/*
90 	 * Get information about the file.
91 	 *
92 	 * XXX
93 	 * We don't currently handle pipes, although we should.
94 	 */
95 	if (fstat(fd, &sb))
96 		return (NULL);
97 	if (!S_ISREG(sb.st_mode)) {
98 		errno = ESPIPE;
99 		return (NULL);
100 	}
101 
102 	/* Allocate and initialize the MPOOL cookie. */
103 	if ((mp = (MPOOL *)calloc(1, sizeof(MPOOL))) == NULL)
104 		return (NULL);
105 	CIRCLEQ_INIT(&mp->lqh);
106 	for (entry = 0; entry < HASHSIZE; ++entry)
107 		CIRCLEQ_INIT(&mp->hqh[entry]);
108 	mp->maxcache = maxcache;
109 	mp->npages = (pgno_t)(sb.st_size / pagesize);
110 	mp->pagesize = pagesize;
111 	mp->fd = fd;
112 	return (mp);
113 }
114 
115 /*
116  * mpool_filter --
117  *	Initialize input/output filters.
118  */
119 void
120 mpool_filter(mp, pgin, pgout, pgcookie)
121 	MPOOL *mp;
122 	void (*pgin) __P((void *, pgno_t, void *));
123 	void (*pgout) __P((void *, pgno_t, void *));
124 	void *pgcookie;
125 {
126 	mp->pgin = pgin;
127 	mp->pgout = pgout;
128 	mp->pgcookie = pgcookie;
129 }
130 
131 /*
132  * mpool_new --
133  *	Get a new page of memory.
134  */
135 void *
136 mpool_new(mp, pgnoaddr)
137 	MPOOL *mp;
138 	pgno_t *pgnoaddr;
139 {
140 	struct _hqh *head;
141 	BKT *bp;
142 
143 	if (mp->npages == MAX_PAGE_NUMBER) {
144 		(void)fprintf(stderr, "mpool_new: page allocation overflow.\n");
145 		abort();
146 	}
147 #ifdef STATISTICS
148 	++mp->pagenew;
149 #endif
150 	/*
151 	 * Get a BKT from the cache.  Assign a new page number, attach
152 	 * it to the head of the hash chain, the tail of the lru chain,
153 	 * and return.
154 	 */
155 	if ((bp = mpool_bkt(mp)) == NULL)
156 		return (NULL);
157 	*pgnoaddr = bp->pgno = mp->npages++;
158 	bp->flags = MPOOL_PINNED;
159 
160 	head = &mp->hqh[HASHKEY(bp->pgno)];
161 	CIRCLEQ_INSERT_HEAD(head, bp, hq);
162 	CIRCLEQ_INSERT_TAIL(&mp->lqh, bp, q);
163 	return (bp->page);
164 }
165 
166 /*
167  * mpool_get
168  *	Get a page.
169  */
170 /*ARGSUSED*/
171 void *
172 mpool_get(mp, pgno, flags)
173 	MPOOL *mp;
174 	pgno_t pgno;
175 	u_int flags;				/* XXX not used? */
176 {
177 	struct _hqh *head;
178 	BKT *bp;
179 	off_t off;
180 	int nr;
181 
182 	/* Check for attempt to retrieve a non-existent page. */
183 	if (pgno >= mp->npages) {
184 		errno = EINVAL;
185 		return (NULL);
186 	}
187 
188 #ifdef STATISTICS
189 	++mp->pageget;
190 #endif
191 
192 	/* Check for a page that is cached. */
193 	if ((bp = mpool_look(mp, pgno)) != NULL) {
194 #ifdef DEBUG
195 		if (bp->flags & MPOOL_PINNED) {
196 			(void)fprintf(stderr,
197 			    "mpool_get: page %d already pinned\n", bp->pgno);
198 			abort();
199 		}
200 #endif
201 		/*
202 		 * Move the page to the head of the hash chain and the tail
203 		 * of the lru chain.
204 		 */
205 		head = &mp->hqh[HASHKEY(bp->pgno)];
206 		CIRCLEQ_REMOVE(head, bp, hq);
207 		CIRCLEQ_INSERT_HEAD(head, bp, hq);
208 		CIRCLEQ_REMOVE(&mp->lqh, bp, q);
209 		CIRCLEQ_INSERT_TAIL(&mp->lqh, bp, q);
210 
211 		/* Return a pinned page. */
212 		bp->flags |= MPOOL_PINNED;
213 		return (bp->page);
214 	}
215 
216 	/* Get a page from the cache. */
217 	if ((bp = mpool_bkt(mp)) == NULL)
218 		return (NULL);
219 
220 	/* Read in the contents. */
221 #ifdef STATISTICS
222 	++mp->pageread;
223 #endif
224 	off = mp->pagesize * pgno;
225 	if ((nr = pread(mp->fd, bp->page, (size_t)mp->pagesize, off)) != (int)mp->pagesize) {
226 		if (nr >= 0)
227 			errno = EFTYPE;
228 		return (NULL);
229 	}
230 
231 	/* Set the page number, pin the page. */
232 	bp->pgno = pgno;
233 	bp->flags = MPOOL_PINNED;
234 
235 	/*
236 	 * Add the page to the head of the hash chain and the tail
237 	 * of the lru chain.
238 	 */
239 	head = &mp->hqh[HASHKEY(bp->pgno)];
240 	CIRCLEQ_INSERT_HEAD(head, bp, hq);
241 	CIRCLEQ_INSERT_TAIL(&mp->lqh, bp, q);
242 
243 	/* Run through the user's filter. */
244 	if (mp->pgin != NULL)
245 		(mp->pgin)(mp->pgcookie, bp->pgno, bp->page);
246 
247 	return (bp->page);
248 }
249 
250 /*
251  * mpool_put
252  *	Return a page.
253  */
254 /*ARGSUSED*/
255 int
256 mpool_put(mp, page, flags)
257 	MPOOL *mp;
258 	void *page;
259 	u_int flags;
260 {
261 	BKT *bp;
262 
263 #ifdef STATISTICS
264 	++mp->pageput;
265 #endif
266 	bp = (BKT *)(void *)((char *)page - sizeof(BKT));
267 #ifdef DEBUG
268 	if (!(bp->flags & MPOOL_PINNED)) {
269 		(void)fprintf(stderr,
270 		    "mpool_put: page %d not pinned\n", bp->pgno);
271 		abort();
272 	}
273 #endif
274 	bp->flags &= ~MPOOL_PINNED;
275 	bp->flags |= flags & MPOOL_DIRTY;
276 	return (RET_SUCCESS);
277 }
278 
279 /*
280  * mpool_close
281  *	Close the buffer pool.
282  */
283 int
284 mpool_close(mp)
285 	MPOOL *mp;
286 {
287 	BKT *bp;
288 
289 	/* Free up any space allocated to the lru pages. */
290 	while ((bp = mp->lqh.cqh_first) != (void *)&mp->lqh) {
291 		CIRCLEQ_REMOVE(&mp->lqh, mp->lqh.cqh_first, q);
292 		free(bp);
293 	}
294 
295 	/* Free the MPOOL cookie. */
296 	free(mp);
297 	return (RET_SUCCESS);
298 }
299 
300 /*
301  * mpool_sync
302  *	Sync the pool to disk.
303  */
304 int
305 mpool_sync(mp)
306 	MPOOL *mp;
307 {
308 	BKT *bp;
309 
310 	/* Walk the lru chain, flushing any dirty pages to disk. */
311 	for (bp = mp->lqh.cqh_first;
312 	    bp != (void *)&mp->lqh; bp = bp->q.cqe_next)
313 		if (bp->flags & MPOOL_DIRTY &&
314 		    mpool_write(mp, bp) == RET_ERROR)
315 			return (RET_ERROR);
316 
317 	/* Sync the file descriptor. */
318 	return (fsync(mp->fd) ? RET_ERROR : RET_SUCCESS);
319 }
320 
321 /*
322  * mpool_bkt
323  *	Get a page from the cache (or create one).
324  */
325 static BKT *
326 mpool_bkt(mp)
327 	MPOOL *mp;
328 {
329 	struct _hqh *head;
330 	BKT *bp;
331 
332 	/* If under the max cached, always create a new page. */
333 	if (mp->curcache < mp->maxcache)
334 		goto new;
335 
336 	/*
337 	 * If the cache is max'd out, walk the lru list for a buffer we
338 	 * can flush.  If we find one, write it (if necessary) and take it
339 	 * off any lists.  If we don't find anything we grow the cache anyway.
340 	 * The cache never shrinks.
341 	 */
342 	for (bp = mp->lqh.cqh_first;
343 	    bp != (void *)&mp->lqh; bp = bp->q.cqe_next)
344 		if (!(bp->flags & MPOOL_PINNED)) {
345 			/* Flush if dirty. */
346 			if (bp->flags & MPOOL_DIRTY &&
347 			    mpool_write(mp, bp) == RET_ERROR)
348 				return (NULL);
349 #ifdef STATISTICS
350 			++mp->pageflush;
351 #endif
352 			/* Remove from the hash and lru queues. */
353 			head = &mp->hqh[HASHKEY(bp->pgno)];
354 			CIRCLEQ_REMOVE(head, bp, hq);
355 			CIRCLEQ_REMOVE(&mp->lqh, bp, q);
356 #ifdef DEBUG
357 			{ void *spage;
358 				spage = bp->page;
359 				memset(bp, 0xff, sizeof(BKT) + mp->pagesize);
360 				bp->page = spage;
361 			}
362 #endif
363 			return (bp);
364 		}
365 
366 new:	if ((bp = (BKT *)malloc((size_t)(sizeof(BKT) + mp->pagesize))) == NULL)
367 		return (NULL);
368 #ifdef STATISTICS
369 	++mp->pagealloc;
370 #endif
371 #if defined(DEBUG) || defined(PURIFY)
372 	memset(bp, 0xff, sizeof(BKT) + mp->pagesize);
373 #endif
374 	bp->page = (char *)(void *)bp + sizeof(BKT);
375 	++mp->curcache;
376 	return (bp);
377 }
378 
379 /*
380  * mpool_write
381  *	Write a page to disk.
382  */
383 static int
384 mpool_write(mp, bp)
385 	MPOOL *mp;
386 	BKT *bp;
387 {
388 	off_t off;
389 
390 #ifdef STATISTICS
391 	++mp->pagewrite;
392 #endif
393 
394 	/* Run through the user's filter. */
395 	if (mp->pgout)
396 		(mp->pgout)(mp->pgcookie, bp->pgno, bp->page);
397 
398 	off = mp->pagesize * bp->pgno;
399 	if (pwrite(mp->fd, bp->page, (size_t)mp->pagesize, off) != (int)mp->pagesize)
400 		return (RET_ERROR);
401 
402 	/*
403 	 * Re-run through the input filter since this page may soon be
404 	 * accessed via the cache, and whatever the user's output filter
405 	 * did may screw things up if we don't let the input filter
406 	 * restore the in-core copy.
407 	 */
408 	if (mp->pgin)
409 		(mp->pgin)(mp->pgcookie, bp->pgno, bp->page);
410 
411 	bp->flags &= ~MPOOL_DIRTY;
412 	return (RET_SUCCESS);
413 }
414 
415 /*
416  * mpool_look
417  *	Lookup a page in the cache.
418  */
419 static BKT *
420 mpool_look(mp, pgno)
421 	MPOOL *mp;
422 	pgno_t pgno;
423 {
424 	struct _hqh *head;
425 	BKT *bp;
426 
427 	head = &mp->hqh[HASHKEY(pgno)];
428 	for (bp = head->cqh_first; bp != (void *)head; bp = bp->hq.cqe_next)
429 		if (bp->pgno == pgno) {
430 #ifdef STATISTICS
431 			++mp->cachehit;
432 #endif
433 			return (bp);
434 		}
435 #ifdef STATISTICS
436 	++mp->cachemiss;
437 #endif
438 	return (NULL);
439 }
440 
441 #ifdef STATISTICS
442 /*
443  * mpool_stat
444  *	Print out cache statistics.
445  */
446 void
447 mpool_stat(mp)
448 	MPOOL *mp;
449 {
450 	BKT *bp;
451 	int cnt;
452 	char *sep;
453 
454 	(void)fprintf(stderr, "%lu pages in the file\n", mp->npages);
455 	(void)fprintf(stderr,
456 	    "page size %lu, cacheing %lu pages of %lu page max cache\n",
457 	    mp->pagesize, mp->curcache, mp->maxcache);
458 	(void)fprintf(stderr, "%lu page puts, %lu page gets, %lu page new\n",
459 	    mp->pageput, mp->pageget, mp->pagenew);
460 	(void)fprintf(stderr, "%lu page allocs, %lu page flushes\n",
461 	    mp->pagealloc, mp->pageflush);
462 	if (mp->cachehit + mp->cachemiss)
463 		(void)fprintf(stderr,
464 		    "%.0f%% cache hit rate (%lu hits, %lu misses)\n",
465 		    ((double)mp->cachehit / (mp->cachehit + mp->cachemiss))
466 		    * 100, mp->cachehit, mp->cachemiss);
467 	(void)fprintf(stderr, "%lu page reads, %lu page writes\n",
468 	    mp->pageread, mp->pagewrite);
469 
470 	sep = "";
471 	cnt = 0;
472 	for (bp = mp->lqh.cqh_first;
473 	    bp != (void *)&mp->lqh; bp = bp->q.cqe_next) {
474 		(void)fprintf(stderr, "%s%d", sep, bp->pgno);
475 		if (bp->flags & MPOOL_DIRTY)
476 			(void)fprintf(stderr, "d");
477 		if (bp->flags & MPOOL_PINNED)
478 			(void)fprintf(stderr, "P");
479 		if (++cnt == 10) {
480 			sep = "\n";
481 			cnt = 0;
482 		} else
483 			sep = ", ";
484 
485 	}
486 	(void)fprintf(stderr, "\n");
487 }
488 #endif
489