xref: /original-bsd/sys/vm/swap_pager.c (revision a6d8c59f)
1 /*
2  * Copyright (c) 1990 University of Utah.
3  * Copyright (c) 1991 The Regents of the University of California.
4  * All rights reserved.
5  *
6  * This code is derived from software contributed to Berkeley by
7  * the Systems Programming Group of the University of Utah Computer
8  * Science Department.
9  *
10  * %sccs.include.redist.c%
11  *
12  * from: Utah $Hdr: swap_pager.c 1.4 91/04/30$
13  *
14  *	@(#)swap_pager.c	7.17 (Berkeley) 11/29/92
15  */
16 
17 /*
18  * Quick hack to page to dedicated partition(s).
19  * TODO:
20  *	Add multiprocessor locks
21  *	Deal with async writes in a better fashion
22  */
23 
24 #include "swappager.h"
25 #if NSWAPPAGER > 0
26 
27 #include <sys/param.h>
28 #include <sys/systm.h>
29 #include <sys/proc.h>
30 #include <sys/buf.h>
31 #include <sys/map.h>
32 #include <sys/vnode.h>
33 #include <sys/malloc.h>
34 
35 #include <miscfs/specfs/specdev.h>
36 
37 #include <vm/vm.h>
38 #include <vm/vm_page.h>
39 #include <vm/vm_pageout.h>
40 #include <vm/swap_pager.h>
41 
42 #define NSWSIZES	16	/* size of swtab */
43 #define NPENDINGIO	64	/* max # of pending cleans */
44 #define MAXDADDRS	64	/* max # of disk addrs for fixed allocations */
45 
46 #ifdef DEBUG
47 int	swpagerdebug = 0x100;
48 #define	SDB_FOLLOW	0x001
49 #define SDB_INIT	0x002
50 #define SDB_ALLOC	0x004
51 #define SDB_IO		0x008
52 #define SDB_WRITE	0x010
53 #define SDB_FAIL	0x020
54 #define SDB_ALLOCBLK	0x040
55 #define SDB_FULL	0x080
56 #define SDB_ANOM	0x100
57 #define SDB_ANOMPANIC	0x200
58 #endif
59 
60 struct swpagerclean {
61 	queue_head_t		spc_list;
62 	int			spc_flags;
63 	struct buf		*spc_bp;
64 	sw_pager_t		spc_swp;
65 	vm_offset_t		spc_kva;
66 	vm_page_t		spc_m;
67 } swcleanlist[NPENDINGIO];
68 typedef struct swpagerclean *swp_clean_t;
69 
70 
71 /* spc_flags values */
72 #define SPC_FREE	0x00
73 #define SPC_BUSY	0x01
74 #define SPC_DONE	0x02
75 #define SPC_ERROR	0x04
76 #define SPC_DIRTY	0x08
77 
78 struct swtab {
79 	vm_size_t st_osize;	/* size of object (bytes) */
80 	int	  st_bsize;	/* vs. size of swap block (DEV_BSIZE units) */
81 #ifdef DEBUG
82 	u_long	  st_inuse;	/* number in this range in use */
83 	u_long	  st_usecnt;	/* total used of this size */
84 #endif
85 } swtab[NSWSIZES+1];
86 
87 #ifdef DEBUG
88 int		swap_pager_pendingio;	/* max pending async "clean" ops */
89 int		swap_pager_poip;	/* pageouts in progress */
90 int		swap_pager_piip;	/* pageins in progress */
91 #endif
92 
93 queue_head_t	swap_pager_inuse;	/* list of pending page cleans */
94 queue_head_t	swap_pager_free;	/* list of free pager clean structs */
95 queue_head_t	swap_pager_list;	/* list of "named" anon regions */
96 
97 static int		swap_pager_finish __P((swp_clean_t));
98 static void 		swap_pager_init __P((void));
99 static vm_pager_t	swap_pager_alloc __P((caddr_t, vm_size_t, vm_prot_t));
100 static boolean_t	swap_pager_clean __P((vm_page_t, int));
101 static void		swap_pager_dealloc __P((vm_pager_t));
102 static int		swap_pager_getpage
103 			    __P((vm_pager_t, vm_page_t, boolean_t));
104 static boolean_t	swap_pager_haspage __P((vm_pager_t, vm_offset_t));
105 static int		swap_pager_io __P((sw_pager_t, vm_page_t, int));
106 static void		swap_pager_iodone __P((struct buf *));
107 static int		swap_pager_putpage
108 			    __P((vm_pager_t, vm_page_t, boolean_t));
109 
110 struct pagerops swappagerops = {
111 	swap_pager_init,
112 	swap_pager_alloc,
113 	swap_pager_dealloc,
114 	swap_pager_getpage,
115 	swap_pager_putpage,
116 	swap_pager_haspage
117 };
118 
119 static void
120 swap_pager_init()
121 {
122 	register swp_clean_t spc;
123 	register int i, bsize;
124 	extern int dmmin, dmmax;
125 	int maxbsize;
126 
127 #ifdef DEBUG
128 	if (swpagerdebug & (SDB_FOLLOW|SDB_INIT))
129 		printf("swpg_init()\n");
130 #endif
131 	dfltpagerops = &swappagerops;
132 	queue_init(&swap_pager_list);
133 
134 	/*
135 	 * Initialize clean lists
136 	 */
137 	queue_init(&swap_pager_inuse);
138 	queue_init(&swap_pager_free);
139 	for (i = 0, spc = swcleanlist; i < NPENDINGIO; i++, spc++) {
140 		queue_enter(&swap_pager_free, spc, swp_clean_t, spc_list);
141 		spc->spc_flags = SPC_FREE;
142 	}
143 
144 	/*
145 	 * Calculate the swap allocation constants.
146 	 */
147         if (dmmin == 0) {
148                 dmmin = DMMIN;
149 		if (dmmin < CLBYTES/DEV_BSIZE)
150 			dmmin = CLBYTES/DEV_BSIZE;
151 	}
152         if (dmmax == 0)
153                 dmmax = DMMAX;
154 
155 	/*
156 	 * Fill in our table of object size vs. allocation size
157 	 */
158 	bsize = btodb(PAGE_SIZE);
159 	if (bsize < dmmin)
160 		bsize = dmmin;
161 	maxbsize = btodb(sizeof(sw_bm_t) * NBBY * PAGE_SIZE);
162 	if (maxbsize > dmmax)
163 		maxbsize = dmmax;
164 	for (i = 0; i < NSWSIZES; i++) {
165 		swtab[i].st_osize = (vm_size_t) (MAXDADDRS * dbtob(bsize));
166 		swtab[i].st_bsize = bsize;
167 #ifdef DEBUG
168 		if (swpagerdebug & SDB_INIT)
169 			printf("swpg_init: ix %d, size %x, bsize %x\n",
170 			       i, swtab[i].st_osize, swtab[i].st_bsize);
171 #endif
172 		if (bsize >= maxbsize)
173 			break;
174 		bsize *= 2;
175 	}
176 	swtab[i].st_osize = 0;
177 	swtab[i].st_bsize = bsize;
178 }
179 
180 /*
181  * Allocate a pager structure and associated resources.
182  * Note that if we are called from the pageout daemon (handle == NULL)
183  * we should not wait for memory as it could resulting in deadlock.
184  */
185 static vm_pager_t
186 swap_pager_alloc(handle, size, prot)
187 	caddr_t handle;
188 	register vm_size_t size;
189 	vm_prot_t prot;
190 {
191 	register vm_pager_t pager;
192 	register sw_pager_t swp;
193 	struct swtab *swt;
194 	int waitok;
195 
196 #ifdef DEBUG
197 	if (swpagerdebug & (SDB_FOLLOW|SDB_ALLOC))
198 		printf("swpg_alloc(%x, %x, %x)\n", handle, size, prot);
199 #endif
200 	/*
201 	 * If this is a "named" anonymous region, look it up and
202 	 * return the appropriate pager if it exists.
203 	 */
204 	if (handle) {
205 		pager = vm_pager_lookup(&swap_pager_list, handle);
206 		if (pager != NULL) {
207 			/*
208 			 * Use vm_object_lookup to gain a reference
209 			 * to the object and also to remove from the
210 			 * object cache.
211 			 */
212 			if (vm_object_lookup(pager) == NULL)
213 				panic("swap_pager_alloc: bad object");
214 			return(pager);
215 		}
216 	}
217 	/*
218 	 * Pager doesn't exist, allocate swap management resources
219 	 * and initialize.
220 	 */
221 	waitok = handle ? M_WAITOK : M_NOWAIT;
222 	pager = (vm_pager_t)malloc(sizeof *pager, M_VMPAGER, waitok);
223 	if (pager == NULL)
224 		return(NULL);
225 	swp = (sw_pager_t)malloc(sizeof *swp, M_VMPGDATA, waitok);
226 	if (swp == NULL) {
227 #ifdef DEBUG
228 		if (swpagerdebug & SDB_FAIL)
229 			printf("swpg_alloc: swpager malloc failed\n");
230 #endif
231 		free((caddr_t)pager, M_VMPAGER);
232 		return(NULL);
233 	}
234 	size = round_page(size);
235 	for (swt = swtab; swt->st_osize; swt++)
236 		if (size <= swt->st_osize)
237 			break;
238 #ifdef DEBUG
239 	swt->st_inuse++;
240 	swt->st_usecnt++;
241 #endif
242 	swp->sw_osize = size;
243 	swp->sw_bsize = swt->st_bsize;
244 	swp->sw_nblocks = (btodb(size) + swp->sw_bsize - 1) / swp->sw_bsize;
245 	swp->sw_blocks = (sw_blk_t)
246 		malloc(swp->sw_nblocks*sizeof(*swp->sw_blocks),
247 		       M_VMPGDATA, M_NOWAIT);
248 	if (swp->sw_blocks == NULL) {
249 		free((caddr_t)swp, M_VMPGDATA);
250 		free((caddr_t)pager, M_VMPAGER);
251 #ifdef DEBUG
252 		if (swpagerdebug & SDB_FAIL)
253 			printf("swpg_alloc: sw_blocks malloc failed\n");
254 		swt->st_inuse--;
255 		swt->st_usecnt--;
256 #endif
257 		return(FALSE);
258 	}
259 	bzero((caddr_t)swp->sw_blocks,
260 	      swp->sw_nblocks * sizeof(*swp->sw_blocks));
261 	swp->sw_poip = 0;
262 	if (handle) {
263 		vm_object_t object;
264 
265 		swp->sw_flags = SW_NAMED;
266 		queue_enter(&swap_pager_list, pager, vm_pager_t, pg_list);
267 		/*
268 		 * Consistant with other pagers: return with object
269 		 * referenced.  Can't do this with handle == NULL
270 		 * since it might be the pageout daemon calling.
271 		 */
272 		object = vm_object_allocate(size);
273 		vm_object_enter(object, pager);
274 		vm_object_setpager(object, pager, 0, FALSE);
275 	} else {
276 		swp->sw_flags = 0;
277 		queue_init(&pager->pg_list);
278 	}
279 	pager->pg_handle = handle;
280 	pager->pg_ops = &swappagerops;
281 	pager->pg_type = PG_SWAP;
282 	pager->pg_data = (caddr_t)swp;
283 
284 #ifdef DEBUG
285 	if (swpagerdebug & SDB_ALLOC)
286 		printf("swpg_alloc: pg_data %x, %x of %x at %x\n",
287 		       swp, swp->sw_nblocks, swp->sw_bsize, swp->sw_blocks);
288 #endif
289 	return(pager);
290 }
291 
292 static void
293 swap_pager_dealloc(pager)
294 	vm_pager_t pager;
295 {
296 	register int i;
297 	register sw_blk_t bp;
298 	register sw_pager_t swp;
299 	struct swtab *swt;
300 	int s;
301 
302 #ifdef DEBUG
303 	/* save panic time state */
304 	if ((swpagerdebug & SDB_ANOMPANIC) && panicstr)
305 		return;
306 	if (swpagerdebug & (SDB_FOLLOW|SDB_ALLOC))
307 		printf("swpg_dealloc(%x)\n", pager);
308 #endif
309 	/*
310 	 * Remove from list right away so lookups will fail if we
311 	 * block for pageout completion.
312 	 */
313 	swp = (sw_pager_t) pager->pg_data;
314 	if (swp->sw_flags & SW_NAMED) {
315 		queue_remove(&swap_pager_list, pager, vm_pager_t, pg_list);
316 		swp->sw_flags &= ~SW_NAMED;
317 	}
318 #ifdef DEBUG
319 	for (swt = swtab; swt->st_osize; swt++)
320 		if (swp->sw_osize <= swt->st_osize)
321 			break;
322 	swt->st_inuse--;
323 #endif
324 
325 	/*
326 	 * Wait for all pageouts to finish and remove
327 	 * all entries from cleaning list.
328 	 */
329 	s = splbio();
330 	while (swp->sw_poip) {
331 		swp->sw_flags |= SW_WANTED;
332 		assert_wait((int)swp, 0);
333 		thread_block();
334 	}
335 	splx(s);
336 	(void) swap_pager_clean(NULL, B_WRITE);
337 
338 	/*
339 	 * Free left over swap blocks
340 	 */
341 	for (i = 0, bp = swp->sw_blocks; i < swp->sw_nblocks; i++, bp++)
342 		if (bp->swb_block) {
343 #ifdef DEBUG
344 			if (swpagerdebug & (SDB_ALLOCBLK|SDB_FULL))
345 				printf("swpg_dealloc: blk %x\n",
346 				       bp->swb_block);
347 #endif
348 			rmfree(swapmap, swp->sw_bsize, bp->swb_block);
349 		}
350 	/*
351 	 * Free swap management resources
352 	 */
353 	free((caddr_t)swp->sw_blocks, M_VMPGDATA);
354 	free((caddr_t)swp, M_VMPGDATA);
355 	free((caddr_t)pager, M_VMPAGER);
356 }
357 
358 static int
359 swap_pager_getpage(pager, m, sync)
360 	vm_pager_t pager;
361 	vm_page_t m;
362 	boolean_t sync;
363 {
364 #ifdef DEBUG
365 	if (swpagerdebug & SDB_FOLLOW)
366 		printf("swpg_getpage(%x, %x, %d)\n", pager, m, sync);
367 #endif
368 	return(swap_pager_io((sw_pager_t)pager->pg_data, m, B_READ));
369 }
370 
371 static int
372 swap_pager_putpage(pager, m, sync)
373 	vm_pager_t pager;
374 	vm_page_t m;
375 	boolean_t sync;
376 {
377 	int flags;
378 
379 #ifdef DEBUG
380 	if (swpagerdebug & SDB_FOLLOW)
381 		printf("swpg_putpage(%x, %x, %d)\n", pager, m, sync);
382 #endif
383 	if (pager == NULL) {
384 		(void) swap_pager_clean(NULL, B_WRITE);
385 		return (VM_PAGER_OK);		/* ??? */
386 	}
387 	flags = B_WRITE;
388 	if (!sync)
389 		flags |= B_ASYNC;
390 	return(swap_pager_io((sw_pager_t)pager->pg_data, m, flags));
391 }
392 
393 static boolean_t
394 swap_pager_haspage(pager, offset)
395 	vm_pager_t pager;
396 	vm_offset_t offset;
397 {
398 	register sw_pager_t swp;
399 	register sw_blk_t swb;
400 	int ix;
401 
402 #ifdef DEBUG
403 	if (swpagerdebug & (SDB_FOLLOW|SDB_ALLOCBLK))
404 		printf("swpg_haspage(%x, %x) ", pager, offset);
405 #endif
406 	swp = (sw_pager_t) pager->pg_data;
407 	ix = offset / dbtob(swp->sw_bsize);
408 	if (swp->sw_blocks == NULL || ix >= swp->sw_nblocks) {
409 #ifdef DEBUG
410 		if (swpagerdebug & (SDB_FAIL|SDB_FOLLOW|SDB_ALLOCBLK))
411 			printf("swpg_haspage: %x bad offset %x, ix %x\n",
412 			       swp->sw_blocks, offset, ix);
413 #endif
414 		return(FALSE);
415 	}
416 	swb = &swp->sw_blocks[ix];
417 	if (swb->swb_block)
418 		ix = atop(offset % dbtob(swp->sw_bsize));
419 #ifdef DEBUG
420 	if (swpagerdebug & SDB_ALLOCBLK)
421 		printf("%x blk %x+%x ", swp->sw_blocks, swb->swb_block, ix);
422 	if (swpagerdebug & (SDB_FOLLOW|SDB_ALLOCBLK))
423 		printf("-> %c\n",
424 		       "FT"[swb->swb_block && (swb->swb_mask & (1 << ix))]);
425 #endif
426 	if (swb->swb_block && (swb->swb_mask & (1 << ix)))
427 		return(TRUE);
428 	return(FALSE);
429 }
430 
431 /*
432  * Scaled down version of swap().
433  * Assumes that PAGE_SIZE < MAXPHYS; i.e. only one operation needed.
434  * BOGUS:  lower level IO routines expect a KVA so we have to map our
435  * provided physical page into the KVA to keep them happy.
436  */
437 static int
438 swap_pager_io(swp, m, flags)
439 	register sw_pager_t swp;
440 	vm_page_t m;
441 	int flags;
442 {
443 	register struct buf *bp;
444 	register sw_blk_t swb;
445 	register int s;
446 	int ix;
447 	boolean_t rv;
448 	vm_offset_t kva, off;
449 	swp_clean_t spc;
450 
451 #ifdef DEBUG
452 	/* save panic time state */
453 	if ((swpagerdebug & SDB_ANOMPANIC) && panicstr)
454 		return (VM_PAGER_FAIL);		/* XXX: correct return? */
455 	if (swpagerdebug & (SDB_FOLLOW|SDB_IO))
456 		printf("swpg_io(%x, %x, %x)\n", swp, m, flags);
457 #endif
458 
459 	/*
460 	 * For reads (pageins) and synchronous writes, we clean up
461 	 * all completed async pageouts.
462 	 */
463 	if ((flags & B_ASYNC) == 0) {
464 		s = splbio();
465 #ifdef DEBUG
466 		/*
467 		 * Check to see if this page is currently being cleaned.
468 		 * If it is, we just wait til the operation is done before
469 		 * continuing.
470 		 */
471 		while (swap_pager_clean(m, flags&B_READ)) {
472 			if (swpagerdebug & SDB_ANOM)
473 				printf("swap_pager_io: page %x cleaning\n", m);
474 
475 			swp->sw_flags |= SW_WANTED;
476 			assert_wait((int)swp, 0);
477 			thread_block();
478 		}
479 #else
480 		(void) swap_pager_clean(m, flags&B_READ);
481 #endif
482 		splx(s);
483 	}
484 	/*
485 	 * For async writes (pageouts), we cleanup completed pageouts so
486 	 * that all available resources are freed.  Also tells us if this
487 	 * page is already being cleaned.  If it is, or no resources
488 	 * are available, we try again later.
489 	 */
490 	else if (swap_pager_clean(m, B_WRITE) ||
491 		 queue_empty(&swap_pager_free)) {
492 #ifdef DEBUG
493 		if ((swpagerdebug & SDB_ANOM) &&
494 		    !queue_empty(&swap_pager_free))
495 			printf("swap_pager_io: page %x already cleaning\n", m);
496 #endif
497 		return(VM_PAGER_FAIL);
498 	}
499 
500 	/*
501 	 * Determine swap block and allocate as necessary.
502 	 */
503 	off = m->offset + m->object->paging_offset;
504 	ix = off / dbtob(swp->sw_bsize);
505 	if (swp->sw_blocks == NULL || ix >= swp->sw_nblocks) {
506 #ifdef DEBUG
507 		if (swpagerdebug & SDB_FAIL)
508 			printf("swpg_io: bad offset %x+%x(%d) in %x\n",
509 			       m->offset, m->object->paging_offset,
510 			       ix, swp->sw_blocks);
511 #endif
512 		return(VM_PAGER_FAIL);
513 	}
514 	swb = &swp->sw_blocks[ix];
515 	off = off % dbtob(swp->sw_bsize);
516 	if (flags & B_READ) {
517 		if (swb->swb_block == 0 ||
518 		    (swb->swb_mask & (1 << atop(off))) == 0) {
519 #ifdef DEBUG
520 			if (swpagerdebug & (SDB_ALLOCBLK|SDB_FAIL))
521 				printf("swpg_io: %x bad read: blk %x+%x, mask %x, off %x+%x\n",
522 				       swp->sw_blocks,
523 				       swb->swb_block, atop(off),
524 				       swb->swb_mask,
525 				       m->offset, m->object->paging_offset);
526 #endif
527 			/* XXX: should we zero page here?? */
528 			return(VM_PAGER_FAIL);
529 		}
530 	} else if (swb->swb_block == 0) {
531 		swb->swb_block = rmalloc(swapmap, swp->sw_bsize);
532 		if (swb->swb_block == 0) {
533 #ifdef DEBUG
534 			if (swpagerdebug & SDB_FAIL)
535 				printf("swpg_io: rmalloc of %x failed\n",
536 				       swp->sw_bsize);
537 #endif
538 			return(VM_PAGER_FAIL);
539 		}
540 #ifdef DEBUG
541 		if (swpagerdebug & (SDB_FULL|SDB_ALLOCBLK))
542 			printf("swpg_io: %x alloc blk %x at ix %x\n",
543 			       swp->sw_blocks, swb->swb_block, ix);
544 #endif
545 	}
546 
547 	/*
548 	 * Allocate a kernel virtual address and initialize so that PTE
549 	 * is available for lower level IO drivers.
550 	 */
551 	kva = vm_pager_map_page(m);
552 
553 	/*
554 	 * Get a swap buffer header and perform the IO
555 	 */
556 	s = splbio();
557 	while (bswlist.b_actf == NULL) {
558 #ifdef DEBUG
559 		if (swpagerdebug & SDB_ANOM)
560 			printf("swap_pager_io: wait on swbuf for %x (%d)\n",
561 			       m, flags);
562 #endif
563 		bswlist.b_flags |= B_WANTED;
564 		sleep((caddr_t)&bswlist, PSWP+1);
565 	}
566 	bp = bswlist.b_actf;
567 	bswlist.b_actf = bp->b_actf;
568 	splx(s);
569 	bp->b_flags = B_BUSY | (flags & B_READ);
570 	bp->b_proc = &proc0;	/* XXX (but without B_PHYS set this is ok) */
571 	bp->b_un.b_addr = (caddr_t)kva;
572 	bp->b_blkno = swb->swb_block + btodb(off);
573 	VHOLD(swapdev_vp);
574 	bp->b_vp = swapdev_vp;
575 	if (swapdev_vp->v_type == VBLK)
576 		bp->b_dev = swapdev_vp->v_rdev;
577 	bp->b_bcount = PAGE_SIZE;
578 	if ((bp->b_flags & B_READ) == 0) {
579 		bp->b_dirtyoff = 0;
580 		bp->b_dirtyend = PAGE_SIZE;
581 		swapdev_vp->v_numoutput++;
582 	}
583 
584 	/*
585 	 * If this is an async write we set up additional buffer fields
586 	 * and place a "cleaning" entry on the inuse queue.
587 	 */
588 	if ((flags & (B_READ|B_ASYNC)) == B_ASYNC) {
589 #ifdef DEBUG
590 		if (queue_empty(&swap_pager_free))
591 			panic("swpg_io: lost spc");
592 #endif
593 		queue_remove_first(&swap_pager_free,
594 				   spc, swp_clean_t, spc_list);
595 #ifdef DEBUG
596 		if (spc->spc_flags != SPC_FREE)
597 			panic("swpg_io: bad free spc");
598 #endif
599 		spc->spc_flags = SPC_BUSY;
600 		spc->spc_bp = bp;
601 		spc->spc_swp = swp;
602 		spc->spc_kva = kva;
603 		spc->spc_m = m;
604 		bp->b_flags |= B_CALL;
605 		bp->b_iodone = swap_pager_iodone;
606 		s = splbio();
607 		swp->sw_poip++;
608 		queue_enter(&swap_pager_inuse, spc, swp_clean_t, spc_list);
609 
610 #ifdef DEBUG
611 		swap_pager_poip++;
612 		if (swpagerdebug & SDB_WRITE)
613 			printf("swpg_io: write: bp=%x swp=%x spc=%x poip=%d\n",
614 			       bp, swp, spc, swp->sw_poip);
615 		if ((swpagerdebug & SDB_ALLOCBLK) &&
616 		    (swb->swb_mask & (1 << atop(off))) == 0)
617 			printf("swpg_io: %x write blk %x+%x\n",
618 			       swp->sw_blocks, swb->swb_block, atop(off));
619 #endif
620 		swb->swb_mask |= (1 << atop(off));
621 		splx(s);
622 	}
623 #ifdef DEBUG
624 	if (swpagerdebug & SDB_IO)
625 		printf("swpg_io: IO start: bp %x, db %x, va %x, pa %x\n",
626 		       bp, swb->swb_block+btodb(off), kva, VM_PAGE_TO_PHYS(m));
627 #endif
628 	VOP_STRATEGY(bp);
629 	if ((flags & (B_READ|B_ASYNC)) == B_ASYNC) {
630 #ifdef DEBUG
631 		if (swpagerdebug & SDB_IO)
632 			printf("swpg_io:  IO started: bp %x\n", bp);
633 #endif
634 		return(VM_PAGER_PEND);
635 	}
636 	s = splbio();
637 #ifdef DEBUG
638 	if (flags & B_READ)
639 		swap_pager_piip++;
640 	else
641 		swap_pager_poip++;
642 #endif
643 	while ((bp->b_flags & B_DONE) == 0) {
644 		assert_wait((int)bp, 0);
645 		thread_block();
646 	}
647 #ifdef DEBUG
648 	if (flags & B_READ)
649 		--swap_pager_piip;
650 	else
651 		--swap_pager_poip;
652 #endif
653 	rv = (bp->b_flags & B_ERROR) ? VM_PAGER_ERROR : VM_PAGER_OK;
654 	bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_PAGET|B_UAREA|B_DIRTY);
655 	bp->b_actf = bswlist.b_actf;
656 	bswlist.b_actf = bp;
657 	if (bp->b_vp)
658 		brelvp(bp);
659 	if (bswlist.b_flags & B_WANTED) {
660 		bswlist.b_flags &= ~B_WANTED;
661 		thread_wakeup((int)&bswlist);
662 	}
663 	if ((flags & B_READ) == 0 && rv == VM_PAGER_OK) {
664 		m->flags |= PG_CLEAN;
665 		pmap_clear_modify(VM_PAGE_TO_PHYS(m));
666 	}
667 	splx(s);
668 #ifdef DEBUG
669 	if (swpagerdebug & SDB_IO)
670 		printf("swpg_io:  IO done: bp %x, rv %d\n", bp, rv);
671 	if ((swpagerdebug & SDB_FAIL) && rv == VM_PAGER_ERROR)
672 		printf("swpg_io: IO error\n");
673 #endif
674 	vm_pager_unmap_page(kva);
675 	return(rv);
676 }
677 
678 static boolean_t
679 swap_pager_clean(m, rw)
680 	vm_page_t m;
681 	int rw;
682 {
683 	register swp_clean_t spc, tspc;
684 	register int s;
685 
686 #ifdef DEBUG
687 	/* save panic time state */
688 	if ((swpagerdebug & SDB_ANOMPANIC) && panicstr)
689 		return (FALSE);			/* ??? */
690 	if (swpagerdebug & SDB_FOLLOW)
691 		printf("swpg_clean(%x, %d)\n", m, rw);
692 #endif
693 	tspc = NULL;
694 	for (;;) {
695 		/*
696 		 * Look up and removal from inuse list must be done
697 		 * at splbio() to avoid conflicts with swap_pager_iodone.
698 		 */
699 		s = splbio();
700 		spc = (swp_clean_t) queue_first(&swap_pager_inuse);
701 		while (!queue_end(&swap_pager_inuse, (queue_entry_t)spc)) {
702 			if ((spc->spc_flags & SPC_DONE) &&
703 			    swap_pager_finish(spc)) {
704 				queue_remove(&swap_pager_inuse, spc,
705 					     swp_clean_t, spc_list);
706 				break;
707 			}
708 			if (m && m == spc->spc_m) {
709 #ifdef DEBUG
710 				if (swpagerdebug & SDB_ANOM)
711 					printf("swap_pager_clean: page %x on list, flags %x\n",
712 					       m, spc->spc_flags);
713 #endif
714 				tspc = spc;
715 			}
716 			spc = (swp_clean_t) queue_next(&spc->spc_list);
717 		}
718 
719 		/*
720 		 * No operations done, thats all we can do for now.
721 		 */
722 		if (queue_end(&swap_pager_inuse, (queue_entry_t)spc))
723 			break;
724 		splx(s);
725 
726 		/*
727 		 * The desired page was found to be busy earlier in
728 		 * the scan but has since completed.
729 		 */
730 		if (tspc && tspc == spc) {
731 #ifdef DEBUG
732 			if (swpagerdebug & SDB_ANOM)
733 				printf("swap_pager_clean: page %x done while looking\n",
734 				       m);
735 #endif
736 			tspc = NULL;
737 		}
738 		spc->spc_flags = SPC_FREE;
739 		vm_pager_unmap_page(spc->spc_kva);
740 		queue_enter(&swap_pager_free, spc, swp_clean_t, spc_list);
741 #ifdef DEBUG
742 		if (swpagerdebug & SDB_WRITE)
743 			printf("swpg_clean: free spc %x\n", spc);
744 #endif
745 	}
746 #ifdef DEBUG
747 	/*
748 	 * If we found that the desired page is already being cleaned
749 	 * mark it so that swap_pager_iodone() will not set the clean
750 	 * flag before the pageout daemon has another chance to clean it.
751 	 */
752 	if (tspc && rw == B_WRITE) {
753 		if (swpagerdebug & SDB_ANOM)
754 			printf("swap_pager_clean: page %x on clean list\n",
755 			       tspc);
756 		tspc->spc_flags |= SPC_DIRTY;
757 	}
758 #endif
759 	splx(s);
760 
761 #ifdef DEBUG
762 	if (swpagerdebug & SDB_WRITE)
763 		printf("swpg_clean: return %d\n", tspc ? TRUE : FALSE);
764 	if ((swpagerdebug & SDB_ANOM) && tspc)
765 		printf("swpg_clean: %s of cleaning page %x\n",
766 		       rw == B_READ ? "get" : "put", m);
767 #endif
768 	return(tspc ? TRUE : FALSE);
769 }
770 
771 static int
772 swap_pager_finish(spc)
773 	register swp_clean_t spc;
774 {
775 	vm_object_t object = spc->spc_m->object;
776 
777 	/*
778 	 * Mark the paging operation as done.
779 	 * (XXX) If we cannot get the lock, leave it til later.
780 	 * (XXX) Also we are assuming that an async write is a
781 	 *       pageout operation that has incremented the counter.
782 	 */
783 	if (!vm_object_lock_try(object))
784 		return(0);
785 
786 	if (--object->paging_in_progress == 0)
787 		thread_wakeup((int) object);
788 
789 #ifdef DEBUG
790 	/*
791 	 * XXX: this isn't even close to the right thing to do,
792 	 * introduces a variety of race conditions.
793 	 *
794 	 * If dirty, vm_pageout() has attempted to clean the page
795 	 * again.  In this case we do not do anything as we will
796 	 * see the page again shortly.
797 	 */
798 	if (spc->spc_flags & SPC_DIRTY) {
799 		if (swpagerdebug & SDB_ANOM)
800 			printf("swap_pager_finish: page %x dirty again\n",
801 			       spc->spc_m);
802 		spc->spc_m->flags &= ~PG_BUSY;
803 		PAGE_WAKEUP(spc->spc_m);
804 		vm_object_unlock(object);
805 		return(1);
806 	}
807 #endif
808 	/*
809 	 * If no error mark as clean and inform the pmap system.
810 	 * If error, mark as dirty so we will try again.
811 	 * (XXX could get stuck doing this, should give up after awhile)
812 	 */
813 	if (spc->spc_flags & SPC_ERROR) {
814 		printf("swap_pager_finish: clean of page %x failed\n",
815 		       VM_PAGE_TO_PHYS(spc->spc_m));
816 		spc->spc_m->flags |= PG_LAUNDRY;
817 	} else {
818 		spc->spc_m->flags |= PG_CLEAN;
819 		pmap_clear_modify(VM_PAGE_TO_PHYS(spc->spc_m));
820 	}
821 	spc->spc_m->flags &= ~PG_BUSY;
822 	PAGE_WAKEUP(spc->spc_m);
823 
824 	vm_object_unlock(object);
825 	return(1);
826 }
827 
828 static void
829 swap_pager_iodone(bp)
830 	register struct buf *bp;
831 {
832 	register swp_clean_t spc;
833 	daddr_t blk;
834 	int s;
835 
836 #ifdef DEBUG
837 	/* save panic time state */
838 	if ((swpagerdebug & SDB_ANOMPANIC) && panicstr)
839 		return;
840 	if (swpagerdebug & SDB_FOLLOW)
841 		printf("swpg_iodone(%x)\n", bp);
842 #endif
843 	s = splbio();
844 	spc = (swp_clean_t) queue_first(&swap_pager_inuse);
845 	while (!queue_end(&swap_pager_inuse, (queue_entry_t)spc)) {
846 		if (spc->spc_bp == bp)
847 			break;
848 		spc = (swp_clean_t) queue_next(&spc->spc_list);
849 	}
850 #ifdef DEBUG
851 	if (queue_end(&swap_pager_inuse, (queue_entry_t)spc))
852 		panic("swap_pager_iodone: bp not found");
853 #endif
854 
855 	spc->spc_flags &= ~SPC_BUSY;
856 	spc->spc_flags |= SPC_DONE;
857 	if (bp->b_flags & B_ERROR)
858 		spc->spc_flags |= SPC_ERROR;
859 	spc->spc_bp = NULL;
860 	blk = bp->b_blkno;
861 
862 #ifdef DEBUG
863 	--swap_pager_poip;
864 	if (swpagerdebug & SDB_WRITE)
865 		printf("swpg_iodone: bp=%x swp=%x flags=%x spc=%x poip=%x\n",
866 		       bp, spc->spc_swp, spc->spc_swp->sw_flags,
867 		       spc, spc->spc_swp->sw_poip);
868 #endif
869 
870 	spc->spc_swp->sw_poip--;
871 	if (spc->spc_swp->sw_flags & SW_WANTED) {
872 		spc->spc_swp->sw_flags &= ~SW_WANTED;
873 		thread_wakeup((int)spc->spc_swp);
874 	}
875 
876 	bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_PAGET|B_UAREA|B_DIRTY);
877 	bp->b_actf = bswlist.b_actf;
878 	bswlist.b_actf = bp;
879 	if (bp->b_vp)
880 		brelvp(bp);
881 	if (bswlist.b_flags & B_WANTED) {
882 		bswlist.b_flags &= ~B_WANTED;
883 		thread_wakeup((int)&bswlist);
884 	}
885 	/*
886 	 * Only kick the pageout daemon if we are really hurting
887 	 * for pages, otherwise this page will be picked up later.
888 	 */
889 	if (cnt.v_free_count < cnt.v_free_min)
890 		thread_wakeup((int) &vm_pages_needed);
891 	splx(s);
892 }
893 #endif
894