xref: /dragonfly/lib/libc/db/mpool/mpool.c (revision 9bb2a92d)
1 /*-
2  * Copyright (c) 1990, 1993, 1994
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. All advertising materials mentioning features or use of this software
14  *    must display the following acknowledgement:
15  *	This product includes software developed by the University of
16  *	California, Berkeley and its contributors.
17  * 4. Neither the name of the University nor the names of its contributors
18  *    may be used to endorse or promote products derived from this software
19  *    without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  *
33  * $FreeBSD: src/lib/libc/db/mpool/mpool.c,v 1.5.2.1 2001/03/05 23:05:01 obrien Exp $
34  * $DragonFly: src/lib/libc/db/mpool/mpool.c,v 1.3 2003/11/12 20:21:23 eirikn Exp $
35  *
36  * @(#)mpool.c	8.5 (Berkeley) 7/26/94
37  */
38 
39 #include <sys/param.h>
40 #include <sys/queue.h>
41 #include <sys/stat.h>
42 
43 #include <errno.h>
44 #include <stdio.h>
45 #include <stdlib.h>
46 #include <string.h>
47 #include <unistd.h>
48 
49 #include <db.h>
50 
51 #define	__MPOOLINTERFACE_PRIVATE
52 #include <mpool.h>
53 
54 static BKT *mpool_bkt (MPOOL *);
55 static BKT *mpool_look (MPOOL *, pgno_t);
56 static int  mpool_write (MPOOL *, BKT *);
57 
58 /*
59  * mpool_open --
60  *	Initialize a memory pool.
61  */
62 MPOOL *
63 mpool_open(key, fd, pagesize, maxcache)
64 	void *key;
65 	int fd;
66 	pgno_t pagesize, maxcache;
67 {
68 	struct stat sb;
69 	MPOOL *mp;
70 	int entry;
71 
72 	/*
73 	 * Get information about the file.
74 	 *
75 	 * XXX
76 	 * We don't currently handle pipes, although we should.
77 	 */
78 	if (fstat(fd, &sb))
79 		return (NULL);
80 	if (!S_ISREG(sb.st_mode)) {
81 		errno = ESPIPE;
82 		return (NULL);
83 	}
84 
85 	/* Allocate and initialize the MPOOL cookie. */
86 	if ((mp = (MPOOL *)calloc(1, sizeof(MPOOL))) == NULL)
87 		return (NULL);
88 	TAILQ_INIT(&mp->lqh);
89 	for (entry = 0; entry < HASHSIZE; ++entry)
90 		TAILQ_INIT(&mp->hqh[entry]);
91 	mp->maxcache = maxcache;
92 	mp->npages = sb.st_size / pagesize;
93 	mp->pagesize = pagesize;
94 	mp->fd = fd;
95 	return (mp);
96 }
97 
98 /*
99  * mpool_filter --
100  *	Initialize input/output filters.
101  */
102 void
103 mpool_filter(mp, pgin, pgout, pgcookie)
104 	MPOOL *mp;
105 	void (*pgin) (void *, pgno_t, void *);
106 	void (*pgout) (void *, pgno_t, void *);
107 	void *pgcookie;
108 {
109 	mp->pgin = pgin;
110 	mp->pgout = pgout;
111 	mp->pgcookie = pgcookie;
112 }
113 
114 /*
115  * mpool_new --
116  *	Get a new page of memory.
117  */
118 void *
119 mpool_new(mp, pgnoaddr)
120 	MPOOL *mp;
121 	pgno_t *pgnoaddr;
122 {
123 	struct _hqh *head;
124 	BKT *bp;
125 
126 	if (mp->npages == MAX_PAGE_NUMBER) {
127 		(void)fprintf(stderr, "mpool_new: page allocation overflow.\n");
128 		abort();
129 	}
130 #ifdef STATISTICS
131 	++mp->pagenew;
132 #endif
133 	/*
134 	 * Get a BKT from the cache.  Assign a new page number, attach
135 	 * it to the head of the hash chain, the tail of the lru chain,
136 	 * and return.
137 	 */
138 	if ((bp = mpool_bkt(mp)) == NULL)
139 		return (NULL);
140 	*pgnoaddr = bp->pgno = mp->npages++;
141 	bp->flags = MPOOL_PINNED;
142 
143 	head = &mp->hqh[HASHKEY(bp->pgno)];
144 	TAILQ_INSERT_HEAD(head, bp, hq);
145 	TAILQ_INSERT_TAIL(&mp->lqh, bp, q);
146 	return (bp->page);
147 }
148 
149 /*
150  * mpool_get
151  *	Get a page.
152  */
153 void *
154 mpool_get(mp, pgno, flags)
155 	MPOOL *mp;
156 	pgno_t pgno;
157 	u_int flags;				/* XXX not used? */
158 {
159 	struct _hqh *head;
160 	BKT *bp;
161 	off_t off;
162 	int nr;
163 
164 	/* Check for attempt to retrieve a non-existent page. */
165 	if (pgno >= mp->npages) {
166 		errno = EINVAL;
167 		return (NULL);
168 	}
169 
170 #ifdef STATISTICS
171 	++mp->pageget;
172 #endif
173 
174 	/* Check for a page that is cached. */
175 	if ((bp = mpool_look(mp, pgno)) != NULL) {
176 #ifdef DEBUG
177 		if (bp->flags & MPOOL_PINNED) {
178 			(void)fprintf(stderr,
179 			    "mpool_get: page %d already pinned\n", bp->pgno);
180 			abort();
181 		}
182 #endif
183 		/*
184 		 * Move the page to the head of the hash chain and the tail
185 		 * of the lru chain.
186 		 */
187 		head = &mp->hqh[HASHKEY(bp->pgno)];
188 		TAILQ_REMOVE(head, bp, hq);
189 		TAILQ_INSERT_HEAD(head, bp, hq);
190 		TAILQ_REMOVE(&mp->lqh, bp, q);
191 		TAILQ_INSERT_TAIL(&mp->lqh, bp, q);
192 
193 		/* Return a pinned page. */
194 		bp->flags |= MPOOL_PINNED;
195 		return (bp->page);
196 	}
197 
198 	/* Get a page from the cache. */
199 	if ((bp = mpool_bkt(mp)) == NULL)
200 		return (NULL);
201 
202 	/* Read in the contents. */
203 #ifdef STATISTICS
204 	++mp->pageread;
205 #endif
206 	off = mp->pagesize * pgno;
207 	if (lseek(mp->fd, off, SEEK_SET) != off)
208 		return (NULL);
209 	if ((nr = _read(mp->fd, bp->page, mp->pagesize)) != mp->pagesize) {
210 		if (nr >= 0)
211 			errno = EFTYPE;
212 		return (NULL);
213 	}
214 
215 	/* Set the page number, pin the page. */
216 	bp->pgno = pgno;
217 	bp->flags = MPOOL_PINNED;
218 
219 	/*
220 	 * Add the page to the head of the hash chain and the tail
221 	 * of the lru chain.
222 	 */
223 	head = &mp->hqh[HASHKEY(bp->pgno)];
224 	TAILQ_INSERT_HEAD(head, bp, hq);
225 	TAILQ_INSERT_TAIL(&mp->lqh, bp, q);
226 
227 	/* Run through the user's filter. */
228 	if (mp->pgin != NULL)
229 		(mp->pgin)(mp->pgcookie, bp->pgno, bp->page);
230 
231 	return (bp->page);
232 }
233 
234 /*
235  * mpool_put
236  *	Return a page.
237  */
238 int
239 mpool_put(mp, page, flags)
240 	MPOOL *mp;
241 	void *page;
242 	u_int flags;
243 {
244 	BKT *bp;
245 
246 #ifdef STATISTICS
247 	++mp->pageput;
248 #endif
249 	bp = (BKT *)((char *)page - sizeof(BKT));
250 #ifdef DEBUG
251 	if (!(bp->flags & MPOOL_PINNED)) {
252 		(void)fprintf(stderr,
253 		    "mpool_put: page %d not pinned\n", bp->pgno);
254 		abort();
255 	}
256 #endif
257 	bp->flags &= ~MPOOL_PINNED;
258 	bp->flags |= flags & MPOOL_DIRTY;
259 	return (RET_SUCCESS);
260 }
261 
262 /*
263  * mpool_close
264  *	Close the buffer pool.
265  */
266 int
267 mpool_close(mp)
268 	MPOOL *mp;
269 {
270 	BKT *bp;
271 
272 	/* Free up any space allocated to the lru pages. */
273 	while (!TAILQ_EMPTY(&mp->lqh)) {
274 		bp = TAILQ_FIRST(&mp->lqh);
275 		TAILQ_REMOVE(&mp->lqh, bp, q);
276 		free(bp);
277 	}
278 
279 	/* Free the MPOOL cookie. */
280 	free(mp);
281 	return (RET_SUCCESS);
282 }
283 
284 /*
285  * mpool_sync
286  *	Sync the pool to disk.
287  */
288 int
289 mpool_sync(mp)
290 	MPOOL *mp;
291 {
292 	BKT *bp;
293 
294 	/* Walk the lru chain, flushing any dirty pages to disk. */
295 	TAILQ_FOREACH(bp, &mp->lqh, q)
296 		if (bp->flags & MPOOL_DIRTY &&
297 		    mpool_write(mp, bp) == RET_ERROR)
298 			return (RET_ERROR);
299 
300 	/* Sync the file descriptor. */
301 	return (_fsync(mp->fd) ? RET_ERROR : RET_SUCCESS);
302 }
303 
304 /*
305  * mpool_bkt
306  *	Get a page from the cache (or create one).
307  */
308 static BKT *
309 mpool_bkt(mp)
310 	MPOOL *mp;
311 {
312 	struct _hqh *head;
313 	BKT *bp;
314 
315 	/* If under the max cached, always create a new page. */
316 	if (mp->curcache < mp->maxcache)
317 		goto new;
318 
319 	/*
320 	 * If the cache is max'd out, walk the lru list for a buffer we
321 	 * can flush.  If we find one, write it (if necessary) and take it
322 	 * off any lists.  If we don't find anything we grow the cache anyway.
323 	 * The cache never shrinks.
324 	 */
325 	TAILQ_FOREACH(bp, &mp->lqh, q)
326 		if (!(bp->flags & MPOOL_PINNED)) {
327 			/* Flush if dirty. */
328 			if (bp->flags & MPOOL_DIRTY &&
329 			    mpool_write(mp, bp) == RET_ERROR)
330 				return (NULL);
331 #ifdef STATISTICS
332 			++mp->pageflush;
333 #endif
334 			/* Remove from the hash and lru queues. */
335 			head = &mp->hqh[HASHKEY(bp->pgno)];
336 			TAILQ_REMOVE(head, bp, hq);
337 			TAILQ_REMOVE(&mp->lqh, bp, q);
338 #ifdef DEBUG
339 			{ void *spage;
340 				spage = bp->page;
341 				memset(bp, 0xff, sizeof(BKT) + mp->pagesize);
342 				bp->page = spage;
343 			}
344 #endif
345 			return (bp);
346 		}
347 
348 new:	if ((bp = (BKT *)malloc(sizeof(BKT) + mp->pagesize)) == NULL)
349 		return (NULL);
350 #ifdef STATISTICS
351 	++mp->pagealloc;
352 #endif
353 #if defined(DEBUG) || defined(PURIFY)
354 	memset(bp, 0xff, sizeof(BKT) + mp->pagesize);
355 #endif
356 	bp->page = (char *)bp + sizeof(BKT);
357 	++mp->curcache;
358 	return (bp);
359 }
360 
361 /*
362  * mpool_write
363  *	Write a page to disk.
364  */
365 static int
366 mpool_write(mp, bp)
367 	MPOOL *mp;
368 	BKT *bp;
369 {
370 	off_t off;
371 
372 #ifdef STATISTICS
373 	++mp->pagewrite;
374 #endif
375 
376 	/* Run through the user's filter. */
377 	if (mp->pgout)
378 		(mp->pgout)(mp->pgcookie, bp->pgno, bp->page);
379 
380 	off = mp->pagesize * bp->pgno;
381 	if (lseek(mp->fd, off, SEEK_SET) != off)
382 		return (RET_ERROR);
383 	if (_write(mp->fd, bp->page, mp->pagesize) != mp->pagesize)
384 		return (RET_ERROR);
385 
386 	bp->flags &= ~MPOOL_DIRTY;
387 	return (RET_SUCCESS);
388 }
389 
390 /*
391  * mpool_look
392  *	Lookup a page in the cache.
393  */
394 static BKT *
395 mpool_look(mp, pgno)
396 	MPOOL *mp;
397 	pgno_t pgno;
398 {
399 	struct _hqh *head;
400 	BKT *bp;
401 
402 	head = &mp->hqh[HASHKEY(pgno)];
403 	TAILQ_FOREACH(bp, head, hq)
404 		if (bp->pgno == pgno) {
405 #ifdef STATISTICS
406 			++mp->cachehit;
407 #endif
408 			return (bp);
409 		}
410 #ifdef STATISTICS
411 	++mp->cachemiss;
412 #endif
413 	return (NULL);
414 }
415 
416 #ifdef STATISTICS
417 /*
418  * mpool_stat
419  *	Print out cache statistics.
420  */
421 void
422 mpool_stat(mp)
423 	MPOOL *mp;
424 {
425 	BKT *bp;
426 	int cnt;
427 	char *sep;
428 
429 	(void)fprintf(stderr, "%lu pages in the file\n", mp->npages);
430 	(void)fprintf(stderr,
431 	    "page size %lu, cacheing %lu pages of %lu page max cache\n",
432 	    mp->pagesize, mp->curcache, mp->maxcache);
433 	(void)fprintf(stderr, "%lu page puts, %lu page gets, %lu page new\n",
434 	    mp->pageput, mp->pageget, mp->pagenew);
435 	(void)fprintf(stderr, "%lu page allocs, %lu page flushes\n",
436 	    mp->pagealloc, mp->pageflush);
437 	if (mp->cachehit + mp->cachemiss)
438 		(void)fprintf(stderr,
439 		    "%.0f%% cache hit rate (%lu hits, %lu misses)\n",
440 		    ((double)mp->cachehit / (mp->cachehit + mp->cachemiss))
441 		    * 100, mp->cachehit, mp->cachemiss);
442 	(void)fprintf(stderr, "%lu page reads, %lu page writes\n",
443 	    mp->pageread, mp->pagewrite);
444 
445 	sep = "";
446 	cnt = 0;
447 	TAILQ_FOREACH(bp, &mp->lqh, q) {
448 		(void)fprintf(stderr, "%s%d", sep, bp->pgno);
449 		if (bp->flags & MPOOL_DIRTY)
450 			(void)fprintf(stderr, "d");
451 		if (bp->flags & MPOOL_PINNED)
452 			(void)fprintf(stderr, "P");
453 		if (++cnt == 10) {
454 			sep = "\n";
455 			cnt = 0;
456 		} else
457 			sep = ", ";
458 
459 	}
460 	(void)fprintf(stderr, "\n");
461 }
462 #endif
463