xref: /dragonfly/lib/libc/db/mpool/mpool.c (revision 36a3d1d6)
1 /*-
2  * Copyright (c) 1990, 1993, 1994
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. Neither the name of the University nor the names of its contributors
14  *    may be used to endorse or promote products derived from this software
15  *    without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  *
29  * $FreeBSD: src/lib/libc/db/mpool/mpool.c,v 1.5.2.1 2001/03/05 23:05:01 obrien Exp $
30  * $DragonFly: src/lib/libc/db/mpool/mpool.c,v 1.7 2005/11/19 20:46:32 swildner Exp $
31  *
32  * @(#)mpool.c	8.5 (Berkeley) 7/26/94
33  */
34 
35 #include "namespace.h"
36 #include <sys/param.h>
37 #include <sys/queue.h>
38 #include <sys/stat.h>
39 
40 #include <errno.h>
41 #include <stdio.h>
42 #include <stdlib.h>
43 #include <string.h>
44 #include <unistd.h>
45 #include "un-namespace.h"
46 
47 #include <db.h>
48 
49 #define	__MPOOLINTERFACE_PRIVATE
50 #include <mpool.h>
51 
52 static BKT *mpool_bkt (MPOOL *);
53 static BKT *mpool_look (MPOOL *, pgno_t);
54 static int  mpool_write (MPOOL *, BKT *);
55 
56 /*
57  * mpool_open --
58  *	Initialize a memory pool.
59  */
60 MPOOL *
61 mpool_open(void *key __unused, int fd, pgno_t pagesize, pgno_t maxcache)
62 {
63 	struct stat sb;
64 	MPOOL *mp;
65 	int entry;
66 
67 	/*
68 	 * Get information about the file.
69 	 *
70 	 * XXX
71 	 * We don't currently handle pipes, although we should.
72 	 */
73 	if (_fstat(fd, &sb))
74 		return (NULL);
75 	if (!S_ISREG(sb.st_mode)) {
76 		errno = ESPIPE;
77 		return (NULL);
78 	}
79 
80 	/* Allocate and initialize the MPOOL cookie. */
81 	if ((mp = (MPOOL *)calloc(1, sizeof(MPOOL))) == NULL)
82 		return (NULL);
83 	TAILQ_INIT(&mp->lqh);
84 	for (entry = 0; entry < HASHSIZE; ++entry)
85 		TAILQ_INIT(&mp->hqh[entry]);
86 	mp->maxcache = maxcache;
87 	mp->npages = sb.st_size / pagesize;
88 	mp->pagesize = pagesize;
89 	mp->fd = fd;
90 	return (mp);
91 }
92 
93 /*
94  * mpool_filter --
95  *	Initialize input/output filters.
96  */
97 void
98 mpool_filter(MPOOL *mp, void (*pgin)(void *, pgno_t, void *),
99 	     void (*pgout)(void *, pgno_t, void *), void *pgcookie)
100 {
101 	mp->pgin = pgin;
102 	mp->pgout = pgout;
103 	mp->pgcookie = pgcookie;
104 }
105 
106 /*
107  * mpool_new --
108  *	Get a new page of memory.
109  */
110 void *
111 mpool_new(MPOOL *mp, pgno_t *pgnoaddr)
112 {
113 	struct _hqh *head;
114 	BKT *bp;
115 
116 	if (mp->npages == MAX_PAGE_NUMBER) {
117 		fprintf(stderr, "mpool_new: page allocation overflow.\n");
118 		abort();
119 	}
120 #ifdef STATISTICS
121 	++mp->pagenew;
122 #endif
123 	/*
124 	 * Get a BKT from the cache.  Assign a new page number, attach
125 	 * it to the head of the hash chain, the tail of the lru chain,
126 	 * and return.
127 	 */
128 	if ((bp = mpool_bkt(mp)) == NULL)
129 		return (NULL);
130 	*pgnoaddr = bp->pgno = mp->npages++;
131 	bp->flags = MPOOL_PINNED;
132 
133 	head = &mp->hqh[HASHKEY(bp->pgno)];
134 	TAILQ_INSERT_HEAD(head, bp, hq);
135 	TAILQ_INSERT_TAIL(&mp->lqh, bp, q);
136 	return (bp->page);
137 }
138 
139 /*
140  * mpool_get
141  *	Get a page.
142  */
143 void *
144 mpool_get(MPOOL *mp, pgno_t pgno, u_int flags __unused)
145 {
146 	struct _hqh *head;
147 	BKT *bp;
148 	off_t off;
149 	int nr;
150 
151 	/* Check for attempt to retrieve a non-existent page. */
152 	if (pgno >= mp->npages) {
153 		errno = EINVAL;
154 		return (NULL);
155 	}
156 
157 #ifdef STATISTICS
158 	++mp->pageget;
159 #endif
160 
161 	/* Check for a page that is cached. */
162 	if ((bp = mpool_look(mp, pgno)) != NULL) {
163 #ifdef DEBUG
164 		if (bp->flags & MPOOL_PINNED) {
165 			fprintf(stderr,
166 			    "mpool_get: page %d already pinned\n", bp->pgno);
167 			abort();
168 		}
169 #endif
170 		/*
171 		 * Move the page to the head of the hash chain and the tail
172 		 * of the lru chain.
173 		 */
174 		head = &mp->hqh[HASHKEY(bp->pgno)];
175 		TAILQ_REMOVE(head, bp, hq);
176 		TAILQ_INSERT_HEAD(head, bp, hq);
177 		TAILQ_REMOVE(&mp->lqh, bp, q);
178 		TAILQ_INSERT_TAIL(&mp->lqh, bp, q);
179 
180 		/* Return a pinned page. */
181 		bp->flags |= MPOOL_PINNED;
182 		return (bp->page);
183 	}
184 
185 	/* Get a page from the cache. */
186 	if ((bp = mpool_bkt(mp)) == NULL)
187 		return (NULL);
188 
189 	/* Read in the contents. */
190 #ifdef STATISTICS
191 	++mp->pageread;
192 #endif
193 	off = mp->pagesize * pgno;
194 	nr = pread(mp->fd, bp->page, mp->pagesize, off);
195 	if (nr != mp->pagesize) {
196 		if (nr >= 0)
197 			errno = EFTYPE;
198 		return (NULL);
199 	}
200 
201 	/* Set the page number, pin the page. */
202 	bp->pgno = pgno;
203 	bp->flags = MPOOL_PINNED;
204 
205 	/*
206 	 * Add the page to the head of the hash chain and the tail
207 	 * of the lru chain.
208 	 */
209 	head = &mp->hqh[HASHKEY(bp->pgno)];
210 	TAILQ_INSERT_HEAD(head, bp, hq);
211 	TAILQ_INSERT_TAIL(&mp->lqh, bp, q);
212 
213 	/* Run through the user's filter. */
214 	if (mp->pgin != NULL)
215 		(mp->pgin)(mp->pgcookie, bp->pgno, bp->page);
216 
217 	return (bp->page);
218 }
219 
220 /*
221  * mpool_put
222  *	Return a page.
223  */
224 int
225 mpool_put(MPOOL *mp __unused, void *page, u_int flags)
226 {
227 	BKT *bp;
228 
229 #ifdef STATISTICS
230 	++mp->pageput;
231 #endif
232 	bp = (BKT *)((char *)page - sizeof(BKT));
233 #ifdef DEBUG
234 	if (!(bp->flags & MPOOL_PINNED)) {
235 		fprintf(stderr,
236 		    "mpool_put: page %d not pinned\n", bp->pgno);
237 		abort();
238 	}
239 #endif
240 	bp->flags &= ~MPOOL_PINNED;
241 	bp->flags |= flags & MPOOL_DIRTY;
242 	return (RET_SUCCESS);
243 }
244 
245 /*
246  * mpool_close
247  *	Close the buffer pool.
248  */
249 int
250 mpool_close(MPOOL *mp)
251 {
252 	BKT *bp;
253 
254 	/* Free up any space allocated to the lru pages. */
255 	while (!TAILQ_EMPTY(&mp->lqh)) {
256 		bp = TAILQ_FIRST(&mp->lqh);
257 		TAILQ_REMOVE(&mp->lqh, bp, q);
258 		free(bp);
259 	}
260 
261 	/* Free the MPOOL cookie. */
262 	free(mp);
263 	return (RET_SUCCESS);
264 }
265 
266 /*
267  * mpool_sync
268  *	Sync the pool to disk.
269  */
270 int
271 mpool_sync(MPOOL *mp)
272 {
273 	BKT *bp;
274 
275 	/* Walk the lru chain, flushing any dirty pages to disk. */
276 	TAILQ_FOREACH(bp, &mp->lqh, q)
277 		if (bp->flags & MPOOL_DIRTY &&
278 		    mpool_write(mp, bp) == RET_ERROR)
279 			return (RET_ERROR);
280 
281 	/* Sync the file descriptor. */
282 	return (_fsync(mp->fd) ? RET_ERROR : RET_SUCCESS);
283 }
284 
285 /*
286  * mpool_bkt
287  *	Get a page from the cache (or create one).
288  */
289 static BKT *
290 mpool_bkt(MPOOL *mp)
291 {
292 	struct _hqh *head;
293 	BKT *bp;
294 
295 	/* If under the max cached, always create a new page. */
296 	if (mp->curcache < mp->maxcache)
297 		goto new;
298 
299 	/*
300 	 * If the cache is max'd out, walk the lru list for a buffer we
301 	 * can flush.  If we find one, write it (if necessary) and take it
302 	 * off any lists.  If we don't find anything we grow the cache anyway.
303 	 * The cache never shrinks.
304 	 */
305 	TAILQ_FOREACH(bp, &mp->lqh, q)
306 		if (!(bp->flags & MPOOL_PINNED)) {
307 			/* Flush if dirty. */
308 			if (bp->flags & MPOOL_DIRTY &&
309 			    mpool_write(mp, bp) == RET_ERROR)
310 				return (NULL);
311 #ifdef STATISTICS
312 			++mp->pageflush;
313 #endif
314 			/* Remove from the hash and lru queues. */
315 			head = &mp->hqh[HASHKEY(bp->pgno)];
316 			TAILQ_REMOVE(head, bp, hq);
317 			TAILQ_REMOVE(&mp->lqh, bp, q);
318 #ifdef DEBUG
319 			{ void *spage;
320 				spage = bp->page;
321 				memset(bp, 0xff, sizeof(BKT) + mp->pagesize);
322 				bp->page = spage;
323 			}
324 #endif
325 			return (bp);
326 		}
327 
328 new:	if ((bp = (BKT *)calloc(1, sizeof(BKT) + mp->pagesize)) == NULL)
329 		return (NULL);
330 #ifdef STATISTICS
331 	++mp->pagealloc;
332 #endif
333 #if defined(DEBUG) || defined(PURIFY)
334 	memset(bp, 0xff, sizeof(BKT) + mp->pagesize);
335 #endif
336 	bp->page = (char *)bp + sizeof(BKT);
337 	++mp->curcache;
338 	return (bp);
339 }
340 
341 /*
342  * mpool_write
343  *	Write a page to disk.
344  */
345 static int
346 mpool_write(MPOOL *mp, BKT *bp)
347 {
348 	off_t off;
349 
350 #ifdef STATISTICS
351 	++mp->pagewrite;
352 #endif
353 
354 	/* Run through the user's filter. */
355 	if (mp->pgout)
356 		(mp->pgout)(mp->pgcookie, bp->pgno, bp->page);
357 
358 	off = mp->pagesize * bp->pgno;
359 	if (pwrite(mp->fd, bp->page, mp->pagesize, off) != mp->pagesize)
360 		return (RET_ERROR);
361 
362 	bp->flags &= ~MPOOL_DIRTY;
363 	return (RET_SUCCESS);
364 }
365 
366 /*
367  * mpool_look
368  *	Lookup a page in the cache.
369  */
370 static BKT *
371 mpool_look(MPOOL *mp, pgno_t pgno)
372 {
373 	struct _hqh *head;
374 	BKT *bp;
375 
376 	head = &mp->hqh[HASHKEY(pgno)];
377 	TAILQ_FOREACH(bp, head, hq)
378 		if (bp->pgno == pgno) {
379 #ifdef STATISTICS
380 			++mp->cachehit;
381 #endif
382 			return (bp);
383 		}
384 #ifdef STATISTICS
385 	++mp->cachemiss;
386 #endif
387 	return (NULL);
388 }
389 
390 #ifdef STATISTICS
391 /*
392  * mpool_stat
393  *	Print out cache statistics.
394  */
395 void
396 mpool_stat(MPOOL *mp)
397 {
398 	BKT *bp;
399 	int cnt;
400 	char *sep;
401 
402 	fprintf(stderr, "%u pages in the file\n", mp->npages);
403 	fprintf(stderr,
404 	    "page size %lu, caching %u pages of %u page max cache\n",
405 	    mp->pagesize, mp->curcache, mp->maxcache);
406 	fprintf(stderr, "%lu page puts, %lu page gets, %lu page new\n",
407 	    mp->pageput, mp->pageget, mp->pagenew);
408 	fprintf(stderr, "%lu page allocs, %lu page flushes\n",
409 	    mp->pagealloc, mp->pageflush);
410 	if (mp->cachehit + mp->cachemiss)
411 		fprintf(stderr,
412 		    "%.0f%% cache hit rate (%lu hits, %lu misses)\n",
413 		    ((double)mp->cachehit / (mp->cachehit + mp->cachemiss))
414 		    * 100, mp->cachehit, mp->cachemiss);
415 	fprintf(stderr, "%lu page reads, %lu page writes\n",
416 	    mp->pageread, mp->pagewrite);
417 
418 	sep = "";
419 	cnt = 0;
420 	TAILQ_FOREACH(bp, &mp->lqh, q) {
421 		fprintf(stderr, "%s%d", sep, bp->pgno);
422 		if (bp->flags & MPOOL_DIRTY)
423 			fprintf(stderr, "d");
424 		if (bp->flags & MPOOL_PINNED)
425 			fprintf(stderr, "P");
426 		if (++cnt == 10) {
427 			sep = "\n";
428 			cnt = 0;
429 		} else
430 			sep = ", ";
431 
432 	}
433 	fprintf(stderr, "\n");
434 }
435 #endif
436