xref: /dragonfly/lib/libc/db/mpool/mpool.c (revision 6bd457ed)
1 /*-
2  * Copyright (c) 1990, 1993, 1994
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. All advertising materials mentioning features or use of this software
14  *    must display the following acknowledgement:
15  *	This product includes software developed by the University of
16  *	California, Berkeley and its contributors.
17  * 4. Neither the name of the University nor the names of its contributors
18  *    may be used to endorse or promote products derived from this software
19  *    without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  *
33  * $FreeBSD: src/lib/libc/db/mpool/mpool.c,v 1.5.2.1 2001/03/05 23:05:01 obrien Exp $
34  * $DragonFly: src/lib/libc/db/mpool/mpool.c,v 1.4 2005/01/31 22:29:11 dillon Exp $
35  *
36  * @(#)mpool.c	8.5 (Berkeley) 7/26/94
37  */
38 
39 #include "namespace.h"
40 #include <sys/param.h>
41 #include <sys/queue.h>
42 #include <sys/stat.h>
43 
44 #include <errno.h>
45 #include <stdio.h>
46 #include <stdlib.h>
47 #include <string.h>
48 #include <unistd.h>
49 #include "un-namespace.h"
50 
51 #include <db.h>
52 
53 #define	__MPOOLINTERFACE_PRIVATE
54 #include <mpool.h>
55 
56 static BKT *mpool_bkt (MPOOL *);
57 static BKT *mpool_look (MPOOL *, pgno_t);
58 static int  mpool_write (MPOOL *, BKT *);
59 
60 /*
61  * mpool_open --
62  *	Initialize a memory pool.
63  */
64 MPOOL *
65 mpool_open(key, fd, pagesize, maxcache)
66 	void *key;
67 	int fd;
68 	pgno_t pagesize, maxcache;
69 {
70 	struct stat sb;
71 	MPOOL *mp;
72 	int entry;
73 
74 	/*
75 	 * Get information about the file.
76 	 *
77 	 * XXX
78 	 * We don't currently handle pipes, although we should.
79 	 */
80 	if (_fstat(fd, &sb))
81 		return (NULL);
82 	if (!S_ISREG(sb.st_mode)) {
83 		errno = ESPIPE;
84 		return (NULL);
85 	}
86 
87 	/* Allocate and initialize the MPOOL cookie. */
88 	if ((mp = (MPOOL *)calloc(1, sizeof(MPOOL))) == NULL)
89 		return (NULL);
90 	TAILQ_INIT(&mp->lqh);
91 	for (entry = 0; entry < HASHSIZE; ++entry)
92 		TAILQ_INIT(&mp->hqh[entry]);
93 	mp->maxcache = maxcache;
94 	mp->npages = sb.st_size / pagesize;
95 	mp->pagesize = pagesize;
96 	mp->fd = fd;
97 	return (mp);
98 }
99 
100 /*
101  * mpool_filter --
102  *	Initialize input/output filters.
103  */
104 void
105 mpool_filter(mp, pgin, pgout, pgcookie)
106 	MPOOL *mp;
107 	void (*pgin) (void *, pgno_t, void *);
108 	void (*pgout) (void *, pgno_t, void *);
109 	void *pgcookie;
110 {
111 	mp->pgin = pgin;
112 	mp->pgout = pgout;
113 	mp->pgcookie = pgcookie;
114 }
115 
116 /*
117  * mpool_new --
118  *	Get a new page of memory.
119  */
120 void *
121 mpool_new(mp, pgnoaddr)
122 	MPOOL *mp;
123 	pgno_t *pgnoaddr;
124 {
125 	struct _hqh *head;
126 	BKT *bp;
127 
128 	if (mp->npages == MAX_PAGE_NUMBER) {
129 		(void)fprintf(stderr, "mpool_new: page allocation overflow.\n");
130 		abort();
131 	}
132 #ifdef STATISTICS
133 	++mp->pagenew;
134 #endif
135 	/*
136 	 * Get a BKT from the cache.  Assign a new page number, attach
137 	 * it to the head of the hash chain, the tail of the lru chain,
138 	 * and return.
139 	 */
140 	if ((bp = mpool_bkt(mp)) == NULL)
141 		return (NULL);
142 	*pgnoaddr = bp->pgno = mp->npages++;
143 	bp->flags = MPOOL_PINNED;
144 
145 	head = &mp->hqh[HASHKEY(bp->pgno)];
146 	TAILQ_INSERT_HEAD(head, bp, hq);
147 	TAILQ_INSERT_TAIL(&mp->lqh, bp, q);
148 	return (bp->page);
149 }
150 
151 /*
152  * mpool_get
153  *	Get a page.
154  */
155 void *
156 mpool_get(mp, pgno, flags)
157 	MPOOL *mp;
158 	pgno_t pgno;
159 	u_int flags;				/* XXX not used? */
160 {
161 	struct _hqh *head;
162 	BKT *bp;
163 	off_t off;
164 	int nr;
165 
166 	/* Check for attempt to retrieve a non-existent page. */
167 	if (pgno >= mp->npages) {
168 		errno = EINVAL;
169 		return (NULL);
170 	}
171 
172 #ifdef STATISTICS
173 	++mp->pageget;
174 #endif
175 
176 	/* Check for a page that is cached. */
177 	if ((bp = mpool_look(mp, pgno)) != NULL) {
178 #ifdef DEBUG
179 		if (bp->flags & MPOOL_PINNED) {
180 			(void)fprintf(stderr,
181 			    "mpool_get: page %d already pinned\n", bp->pgno);
182 			abort();
183 		}
184 #endif
185 		/*
186 		 * Move the page to the head of the hash chain and the tail
187 		 * of the lru chain.
188 		 */
189 		head = &mp->hqh[HASHKEY(bp->pgno)];
190 		TAILQ_REMOVE(head, bp, hq);
191 		TAILQ_INSERT_HEAD(head, bp, hq);
192 		TAILQ_REMOVE(&mp->lqh, bp, q);
193 		TAILQ_INSERT_TAIL(&mp->lqh, bp, q);
194 
195 		/* Return a pinned page. */
196 		bp->flags |= MPOOL_PINNED;
197 		return (bp->page);
198 	}
199 
200 	/* Get a page from the cache. */
201 	if ((bp = mpool_bkt(mp)) == NULL)
202 		return (NULL);
203 
204 	/* Read in the contents. */
205 #ifdef STATISTICS
206 	++mp->pageread;
207 #endif
208 	off = mp->pagesize * pgno;
209 	if (lseek(mp->fd, off, SEEK_SET) != off)
210 		return (NULL);
211 	if ((nr = _read(mp->fd, bp->page, mp->pagesize)) != mp->pagesize) {
212 		if (nr >= 0)
213 			errno = EFTYPE;
214 		return (NULL);
215 	}
216 
217 	/* Set the page number, pin the page. */
218 	bp->pgno = pgno;
219 	bp->flags = MPOOL_PINNED;
220 
221 	/*
222 	 * Add the page to the head of the hash chain and the tail
223 	 * of the lru chain.
224 	 */
225 	head = &mp->hqh[HASHKEY(bp->pgno)];
226 	TAILQ_INSERT_HEAD(head, bp, hq);
227 	TAILQ_INSERT_TAIL(&mp->lqh, bp, q);
228 
229 	/* Run through the user's filter. */
230 	if (mp->pgin != NULL)
231 		(mp->pgin)(mp->pgcookie, bp->pgno, bp->page);
232 
233 	return (bp->page);
234 }
235 
236 /*
237  * mpool_put
238  *	Return a page.
239  */
240 int
241 mpool_put(mp, page, flags)
242 	MPOOL *mp;
243 	void *page;
244 	u_int flags;
245 {
246 	BKT *bp;
247 
248 #ifdef STATISTICS
249 	++mp->pageput;
250 #endif
251 	bp = (BKT *)((char *)page - sizeof(BKT));
252 #ifdef DEBUG
253 	if (!(bp->flags & MPOOL_PINNED)) {
254 		(void)fprintf(stderr,
255 		    "mpool_put: page %d not pinned\n", bp->pgno);
256 		abort();
257 	}
258 #endif
259 	bp->flags &= ~MPOOL_PINNED;
260 	bp->flags |= flags & MPOOL_DIRTY;
261 	return (RET_SUCCESS);
262 }
263 
264 /*
265  * mpool_close
266  *	Close the buffer pool.
267  */
268 int
269 mpool_close(mp)
270 	MPOOL *mp;
271 {
272 	BKT *bp;
273 
274 	/* Free up any space allocated to the lru pages. */
275 	while (!TAILQ_EMPTY(&mp->lqh)) {
276 		bp = TAILQ_FIRST(&mp->lqh);
277 		TAILQ_REMOVE(&mp->lqh, bp, q);
278 		free(bp);
279 	}
280 
281 	/* Free the MPOOL cookie. */
282 	free(mp);
283 	return (RET_SUCCESS);
284 }
285 
286 /*
287  * mpool_sync
288  *	Sync the pool to disk.
289  */
290 int
291 mpool_sync(mp)
292 	MPOOL *mp;
293 {
294 	BKT *bp;
295 
296 	/* Walk the lru chain, flushing any dirty pages to disk. */
297 	TAILQ_FOREACH(bp, &mp->lqh, q)
298 		if (bp->flags & MPOOL_DIRTY &&
299 		    mpool_write(mp, bp) == RET_ERROR)
300 			return (RET_ERROR);
301 
302 	/* Sync the file descriptor. */
303 	return (_fsync(mp->fd) ? RET_ERROR : RET_SUCCESS);
304 }
305 
306 /*
307  * mpool_bkt
308  *	Get a page from the cache (or create one).
309  */
310 static BKT *
311 mpool_bkt(mp)
312 	MPOOL *mp;
313 {
314 	struct _hqh *head;
315 	BKT *bp;
316 
317 	/* If under the max cached, always create a new page. */
318 	if (mp->curcache < mp->maxcache)
319 		goto new;
320 
321 	/*
322 	 * If the cache is max'd out, walk the lru list for a buffer we
323 	 * can flush.  If we find one, write it (if necessary) and take it
324 	 * off any lists.  If we don't find anything we grow the cache anyway.
325 	 * The cache never shrinks.
326 	 */
327 	TAILQ_FOREACH(bp, &mp->lqh, q)
328 		if (!(bp->flags & MPOOL_PINNED)) {
329 			/* Flush if dirty. */
330 			if (bp->flags & MPOOL_DIRTY &&
331 			    mpool_write(mp, bp) == RET_ERROR)
332 				return (NULL);
333 #ifdef STATISTICS
334 			++mp->pageflush;
335 #endif
336 			/* Remove from the hash and lru queues. */
337 			head = &mp->hqh[HASHKEY(bp->pgno)];
338 			TAILQ_REMOVE(head, bp, hq);
339 			TAILQ_REMOVE(&mp->lqh, bp, q);
340 #ifdef DEBUG
341 			{ void *spage;
342 				spage = bp->page;
343 				memset(bp, 0xff, sizeof(BKT) + mp->pagesize);
344 				bp->page = spage;
345 			}
346 #endif
347 			return (bp);
348 		}
349 
350 new:	if ((bp = (BKT *)malloc(sizeof(BKT) + mp->pagesize)) == NULL)
351 		return (NULL);
352 #ifdef STATISTICS
353 	++mp->pagealloc;
354 #endif
355 #if defined(DEBUG) || defined(PURIFY)
356 	memset(bp, 0xff, sizeof(BKT) + mp->pagesize);
357 #endif
358 	bp->page = (char *)bp + sizeof(BKT);
359 	++mp->curcache;
360 	return (bp);
361 }
362 
363 /*
364  * mpool_write
365  *	Write a page to disk.
366  */
367 static int
368 mpool_write(mp, bp)
369 	MPOOL *mp;
370 	BKT *bp;
371 {
372 	off_t off;
373 
374 #ifdef STATISTICS
375 	++mp->pagewrite;
376 #endif
377 
378 	/* Run through the user's filter. */
379 	if (mp->pgout)
380 		(mp->pgout)(mp->pgcookie, bp->pgno, bp->page);
381 
382 	off = mp->pagesize * bp->pgno;
383 	if (lseek(mp->fd, off, SEEK_SET) != off)
384 		return (RET_ERROR);
385 	if (_write(mp->fd, bp->page, mp->pagesize) != mp->pagesize)
386 		return (RET_ERROR);
387 
388 	bp->flags &= ~MPOOL_DIRTY;
389 	return (RET_SUCCESS);
390 }
391 
392 /*
393  * mpool_look
394  *	Lookup a page in the cache.
395  */
396 static BKT *
397 mpool_look(mp, pgno)
398 	MPOOL *mp;
399 	pgno_t pgno;
400 {
401 	struct _hqh *head;
402 	BKT *bp;
403 
404 	head = &mp->hqh[HASHKEY(pgno)];
405 	TAILQ_FOREACH(bp, head, hq)
406 		if (bp->pgno == pgno) {
407 #ifdef STATISTICS
408 			++mp->cachehit;
409 #endif
410 			return (bp);
411 		}
412 #ifdef STATISTICS
413 	++mp->cachemiss;
414 #endif
415 	return (NULL);
416 }
417 
418 #ifdef STATISTICS
419 /*
420  * mpool_stat
421  *	Print out cache statistics.
422  */
423 void
424 mpool_stat(mp)
425 	MPOOL *mp;
426 {
427 	BKT *bp;
428 	int cnt;
429 	char *sep;
430 
431 	(void)fprintf(stderr, "%lu pages in the file\n", mp->npages);
432 	(void)fprintf(stderr,
433 	    "page size %lu, cacheing %lu pages of %lu page max cache\n",
434 	    mp->pagesize, mp->curcache, mp->maxcache);
435 	(void)fprintf(stderr, "%lu page puts, %lu page gets, %lu page new\n",
436 	    mp->pageput, mp->pageget, mp->pagenew);
437 	(void)fprintf(stderr, "%lu page allocs, %lu page flushes\n",
438 	    mp->pagealloc, mp->pageflush);
439 	if (mp->cachehit + mp->cachemiss)
440 		(void)fprintf(stderr,
441 		    "%.0f%% cache hit rate (%lu hits, %lu misses)\n",
442 		    ((double)mp->cachehit / (mp->cachehit + mp->cachemiss))
443 		    * 100, mp->cachehit, mp->cachemiss);
444 	(void)fprintf(stderr, "%lu page reads, %lu page writes\n",
445 	    mp->pageread, mp->pagewrite);
446 
447 	sep = "";
448 	cnt = 0;
449 	TAILQ_FOREACH(bp, &mp->lqh, q) {
450 		(void)fprintf(stderr, "%s%d", sep, bp->pgno);
451 		if (bp->flags & MPOOL_DIRTY)
452 			(void)fprintf(stderr, "d");
453 		if (bp->flags & MPOOL_PINNED)
454 			(void)fprintf(stderr, "P");
455 		if (++cnt == 10) {
456 			sep = "\n";
457 			cnt = 0;
458 		} else
459 			sep = ", ";
460 
461 	}
462 	(void)fprintf(stderr, "\n");
463 }
464 #endif
465