xref: /original-bsd/sys/vm/swap_pager.c (revision b12f46df)
1 /*
2  * Copyright (c) 1990 University of Utah.
3  * Copyright (c) 1991 The Regents of the University of California.
4  * All rights reserved.
5  *
6  * This code is derived from software contributed to Berkeley by
7  * the Systems Programming Group of the University of Utah Computer
8  * Science Department.
9  *
10  * %sccs.include.redist.c%
11  *
12  * from: Utah $Hdr: swap_pager.c 1.4 91/04/30$
13  *
14  *	@(#)swap_pager.c	7.13 (Berkeley) 09/21/92
15  */
16 
17 /*
18  * Quick hack to page to dedicated partition(s).
19  * TODO:
20  *	Add multiprocessor locks
21  *	Deal with async writes in a better fashion
22  */
23 
24 #include "swappager.h"
25 #if NSWAPPAGER > 0
26 
27 #include <sys/param.h>
28 #include <sys/systm.h>
29 #include <sys/proc.h>
30 #include <sys/buf.h>
31 #include <sys/map.h>
32 #include <sys/vnode.h>
33 #include <sys/malloc.h>
34 
35 #include <miscfs/specfs/specdev.h>
36 
37 #include <vm/vm.h>
38 #include <vm/queue.h>
39 #include <vm/vm_page.h>
40 #include <vm/vm_pageout.h>
41 #include <vm/swap_pager.h>
42 
43 #define NSWSIZES	16	/* size of swtab */
44 #define NPENDINGIO	64	/* max # of pending cleans */
45 #define MAXDADDRS	64	/* max # of disk addrs for fixed allocations */
46 
47 #ifdef DEBUG
48 int	swpagerdebug = 0x100;
49 #define	SDB_FOLLOW	0x001
50 #define SDB_INIT	0x002
51 #define SDB_ALLOC	0x004
52 #define SDB_IO		0x008
53 #define SDB_WRITE	0x010
54 #define SDB_FAIL	0x020
55 #define SDB_ALLOCBLK	0x040
56 #define SDB_FULL	0x080
57 #define SDB_ANOM	0x100
58 #define SDB_ANOMPANIC	0x200
59 #endif
60 
61 struct swpagerclean {
62 	queue_head_t		spc_list;
63 	int			spc_flags;
64 	struct buf		*spc_bp;
65 	sw_pager_t		spc_swp;
66 	vm_offset_t		spc_kva;
67 	vm_page_t		spc_m;
68 } swcleanlist[NPENDINGIO];
69 typedef struct swpagerclean *swp_clean_t;
70 
71 
72 /* spc_flags values */
73 #define SPC_FREE	0x00
74 #define SPC_BUSY	0x01
75 #define SPC_DONE	0x02
76 #define SPC_ERROR	0x04
77 #define SPC_DIRTY	0x08
78 
79 struct swtab {
80 	vm_size_t st_osize;	/* size of object (bytes) */
81 	int	  st_bsize;	/* vs. size of swap block (DEV_BSIZE units) */
82 #ifdef DEBUG
83 	u_long	  st_inuse;	/* number in this range in use */
84 	u_long	  st_usecnt;	/* total used of this size */
85 #endif
86 } swtab[NSWSIZES+1];
87 
88 #ifdef DEBUG
89 int		swap_pager_pendingio;	/* max pending async "clean" ops */
90 int		swap_pager_poip;	/* pageouts in progress */
91 int		swap_pager_piip;	/* pageins in progress */
92 #endif
93 
94 queue_head_t	swap_pager_inuse;	/* list of pending page cleans */
95 queue_head_t	swap_pager_free;	/* list of free pager clean structs */
96 queue_head_t	swap_pager_list;	/* list of "named" anon regions */
97 
98 static int		swap_pager_finish __P((swp_clean_t));
99 static void 		swap_pager_init __P((void));
100 static vm_pager_t	swap_pager_alloc __P((caddr_t, vm_size_t, vm_prot_t));
101 static boolean_t	swap_pager_clean __P((vm_page_t, int));
102 static void		swap_pager_dealloc __P((vm_pager_t));
103 static int		swap_pager_getpage
104 			    __P((vm_pager_t, vm_page_t, boolean_t));
105 static boolean_t	swap_pager_haspage __P((vm_pager_t, vm_offset_t));
106 static int		swap_pager_io __P((sw_pager_t, vm_page_t, int));
107 static void		swap_pager_iodone __P((struct buf *));
108 static int		swap_pager_putpage
109 			    __P((vm_pager_t, vm_page_t, boolean_t));
110 
111 struct pagerops swappagerops = {
112 	swap_pager_init,
113 	swap_pager_alloc,
114 	swap_pager_dealloc,
115 	swap_pager_getpage,
116 	swap_pager_putpage,
117 	swap_pager_haspage
118 };
119 
120 static void
121 swap_pager_init()
122 {
123 	register swp_clean_t spc;
124 	register int i, bsize;
125 	extern int dmmin, dmmax;
126 	int maxbsize;
127 
128 #ifdef DEBUG
129 	if (swpagerdebug & (SDB_FOLLOW|SDB_INIT))
130 		printf("swpg_init()\n");
131 #endif
132 	dfltpagerops = &swappagerops;
133 	queue_init(&swap_pager_list);
134 
135 	/*
136 	 * Initialize clean lists
137 	 */
138 	queue_init(&swap_pager_inuse);
139 	queue_init(&swap_pager_free);
140 	for (i = 0, spc = swcleanlist; i < NPENDINGIO; i++, spc++) {
141 		queue_enter(&swap_pager_free, spc, swp_clean_t, spc_list);
142 		spc->spc_flags = SPC_FREE;
143 	}
144 
145 	/*
146 	 * Calculate the swap allocation constants.
147 	 */
148         if (dmmin == 0) {
149                 dmmin = DMMIN;
150 		if (dmmin < CLBYTES/DEV_BSIZE)
151 			dmmin = CLBYTES/DEV_BSIZE;
152 	}
153         if (dmmax == 0)
154                 dmmax = DMMAX;
155 
156 	/*
157 	 * Fill in our table of object size vs. allocation size
158 	 */
159 	bsize = btodb(PAGE_SIZE);
160 	if (bsize < dmmin)
161 		bsize = dmmin;
162 	maxbsize = btodb(sizeof(sw_bm_t) * NBBY * PAGE_SIZE);
163 	if (maxbsize > dmmax)
164 		maxbsize = dmmax;
165 	for (i = 0; i < NSWSIZES; i++) {
166 		swtab[i].st_osize = (vm_size_t) (MAXDADDRS * dbtob(bsize));
167 		swtab[i].st_bsize = bsize;
168 #ifdef DEBUG
169 		if (swpagerdebug & SDB_INIT)
170 			printf("swpg_init: ix %d, size %x, bsize %x\n",
171 			       i, swtab[i].st_osize, swtab[i].st_bsize);
172 #endif
173 		if (bsize >= maxbsize)
174 			break;
175 		bsize *= 2;
176 	}
177 	swtab[i].st_osize = 0;
178 	swtab[i].st_bsize = bsize;
179 }
180 
181 /*
182  * Allocate a pager structure and associated resources.
183  * Note that if we are called from the pageout daemon (handle == NULL)
184  * we should not wait for memory as it could resulting in deadlock.
185  */
186 static vm_pager_t
187 swap_pager_alloc(handle, size, prot)
188 	caddr_t handle;
189 	register vm_size_t size;
190 	vm_prot_t prot;
191 {
192 	register vm_pager_t pager;
193 	register sw_pager_t swp;
194 	struct swtab *swt;
195 	int waitok;
196 
197 #ifdef DEBUG
198 	if (swpagerdebug & (SDB_FOLLOW|SDB_ALLOC))
199 		printf("swpg_alloc(%x, %x, %x)\n", handle, size, prot);
200 #endif
201 	/*
202 	 * If this is a "named" anonymous region, look it up and
203 	 * return the appropriate pager if it exists.
204 	 */
205 	if (handle) {
206 		pager = vm_pager_lookup(&swap_pager_list, handle);
207 		if (pager != NULL) {
208 			/*
209 			 * Use vm_object_lookup to gain a reference
210 			 * to the object and also to remove from the
211 			 * object cache.
212 			 */
213 			if (vm_object_lookup(pager) == NULL)
214 				panic("swap_pager_alloc: bad object");
215 			return(pager);
216 		}
217 	}
218 	/*
219 	 * Pager doesn't exist, allocate swap management resources
220 	 * and initialize.
221 	 */
222 	waitok = handle ? M_WAITOK : M_NOWAIT;
223 	pager = (vm_pager_t)malloc(sizeof *pager, M_VMPAGER, waitok);
224 	if (pager == NULL)
225 		return(NULL);
226 	swp = (sw_pager_t)malloc(sizeof *swp, M_VMPGDATA, waitok);
227 	if (swp == NULL) {
228 #ifdef DEBUG
229 		if (swpagerdebug & SDB_FAIL)
230 			printf("swpg_alloc: swpager malloc failed\n");
231 #endif
232 		free((caddr_t)pager, M_VMPAGER);
233 		return(NULL);
234 	}
235 	size = round_page(size);
236 	for (swt = swtab; swt->st_osize; swt++)
237 		if (size <= swt->st_osize)
238 			break;
239 #ifdef DEBUG
240 	swt->st_inuse++;
241 	swt->st_usecnt++;
242 #endif
243 	swp->sw_osize = size;
244 	swp->sw_bsize = swt->st_bsize;
245 	swp->sw_nblocks = (btodb(size) + swp->sw_bsize - 1) / swp->sw_bsize;
246 	swp->sw_blocks = (sw_blk_t)
247 		malloc(swp->sw_nblocks*sizeof(*swp->sw_blocks),
248 		       M_VMPGDATA, M_NOWAIT);
249 	if (swp->sw_blocks == NULL) {
250 		free((caddr_t)swp, M_VMPGDATA);
251 		free((caddr_t)pager, M_VMPAGER);
252 #ifdef DEBUG
253 		if (swpagerdebug & SDB_FAIL)
254 			printf("swpg_alloc: sw_blocks malloc failed\n");
255 		swt->st_inuse--;
256 		swt->st_usecnt--;
257 #endif
258 		return(FALSE);
259 	}
260 	bzero((caddr_t)swp->sw_blocks,
261 	      swp->sw_nblocks * sizeof(*swp->sw_blocks));
262 	swp->sw_poip = 0;
263 	if (handle) {
264 		vm_object_t object;
265 
266 		swp->sw_flags = SW_NAMED;
267 		queue_enter(&swap_pager_list, pager, vm_pager_t, pg_list);
268 		/*
269 		 * Consistant with other pagers: return with object
270 		 * referenced.  Can't do this with handle == NULL
271 		 * since it might be the pageout daemon calling.
272 		 */
273 		object = vm_object_allocate(size);
274 		vm_object_enter(object, pager);
275 		vm_object_setpager(object, pager, 0, FALSE);
276 	} else {
277 		swp->sw_flags = 0;
278 		queue_init(&pager->pg_list);
279 	}
280 	pager->pg_handle = handle;
281 	pager->pg_ops = &swappagerops;
282 	pager->pg_type = PG_SWAP;
283 	pager->pg_data = (caddr_t)swp;
284 
285 #ifdef DEBUG
286 	if (swpagerdebug & SDB_ALLOC)
287 		printf("swpg_alloc: pg_data %x, %x of %x at %x\n",
288 		       swp, swp->sw_nblocks, swp->sw_bsize, swp->sw_blocks);
289 #endif
290 	return(pager);
291 }
292 
293 static void
294 swap_pager_dealloc(pager)
295 	vm_pager_t pager;
296 {
297 	register int i;
298 	register sw_blk_t bp;
299 	register sw_pager_t swp;
300 	struct swtab *swt;
301 	int s;
302 
303 #ifdef DEBUG
304 	/* save panic time state */
305 	if ((swpagerdebug & SDB_ANOMPANIC) && panicstr)
306 		return;
307 	if (swpagerdebug & (SDB_FOLLOW|SDB_ALLOC))
308 		printf("swpg_dealloc(%x)\n", pager);
309 #endif
310 	/*
311 	 * Remove from list right away so lookups will fail if we
312 	 * block for pageout completion.
313 	 */
314 	swp = (sw_pager_t) pager->pg_data;
315 	if (swp->sw_flags & SW_NAMED) {
316 		queue_remove(&swap_pager_list, pager, vm_pager_t, pg_list);
317 		swp->sw_flags &= ~SW_NAMED;
318 	}
319 #ifdef DEBUG
320 	for (swt = swtab; swt->st_osize; swt++)
321 		if (swp->sw_osize <= swt->st_osize)
322 			break;
323 	swt->st_inuse--;
324 #endif
325 
326 	/*
327 	 * Wait for all pageouts to finish and remove
328 	 * all entries from cleaning list.
329 	 */
330 	s = splbio();
331 	while (swp->sw_poip) {
332 		swp->sw_flags |= SW_WANTED;
333 		assert_wait((int)swp, 0);
334 		thread_block();
335 	}
336 	splx(s);
337 	(void) swap_pager_clean(NULL, B_WRITE);
338 
339 	/*
340 	 * Free left over swap blocks
341 	 */
342 	for (i = 0, bp = swp->sw_blocks; i < swp->sw_nblocks; i++, bp++)
343 		if (bp->swb_block) {
344 #ifdef DEBUG
345 			if (swpagerdebug & (SDB_ALLOCBLK|SDB_FULL))
346 				printf("swpg_dealloc: blk %x\n",
347 				       bp->swb_block);
348 #endif
349 			rmfree(swapmap, swp->sw_bsize, bp->swb_block);
350 		}
351 	/*
352 	 * Free swap management resources
353 	 */
354 	free((caddr_t)swp->sw_blocks, M_VMPGDATA);
355 	free((caddr_t)swp, M_VMPGDATA);
356 	free((caddr_t)pager, M_VMPAGER);
357 }
358 
359 static int
360 swap_pager_getpage(pager, m, sync)
361 	vm_pager_t pager;
362 	vm_page_t m;
363 	boolean_t sync;
364 {
365 #ifdef DEBUG
366 	if (swpagerdebug & SDB_FOLLOW)
367 		printf("swpg_getpage(%x, %x, %d)\n", pager, m, sync);
368 #endif
369 	return(swap_pager_io((sw_pager_t)pager->pg_data, m, B_READ));
370 }
371 
372 static int
373 swap_pager_putpage(pager, m, sync)
374 	vm_pager_t pager;
375 	vm_page_t m;
376 	boolean_t sync;
377 {
378 	int flags;
379 
380 #ifdef DEBUG
381 	if (swpagerdebug & SDB_FOLLOW)
382 		printf("swpg_putpage(%x, %x, %d)\n", pager, m, sync);
383 #endif
384 	if (pager == NULL) {
385 		(void) swap_pager_clean(NULL, B_WRITE);
386 		return (VM_PAGER_OK);		/* ??? */
387 	}
388 	flags = B_WRITE;
389 	if (!sync)
390 		flags |= B_ASYNC;
391 	return(swap_pager_io((sw_pager_t)pager->pg_data, m, flags));
392 }
393 
394 static boolean_t
395 swap_pager_haspage(pager, offset)
396 	vm_pager_t pager;
397 	vm_offset_t offset;
398 {
399 	register sw_pager_t swp;
400 	register sw_blk_t swb;
401 	int ix;
402 
403 #ifdef DEBUG
404 	if (swpagerdebug & (SDB_FOLLOW|SDB_ALLOCBLK))
405 		printf("swpg_haspage(%x, %x) ", pager, offset);
406 #endif
407 	swp = (sw_pager_t) pager->pg_data;
408 	ix = offset / dbtob(swp->sw_bsize);
409 	if (swp->sw_blocks == NULL || ix >= swp->sw_nblocks) {
410 #ifdef DEBUG
411 		if (swpagerdebug & (SDB_FAIL|SDB_FOLLOW|SDB_ALLOCBLK))
412 			printf("swpg_haspage: %x bad offset %x, ix %x\n",
413 			       swp->sw_blocks, offset, ix);
414 #endif
415 		return(FALSE);
416 	}
417 	swb = &swp->sw_blocks[ix];
418 	if (swb->swb_block)
419 		ix = atop(offset % dbtob(swp->sw_bsize));
420 #ifdef DEBUG
421 	if (swpagerdebug & SDB_ALLOCBLK)
422 		printf("%x blk %x+%x ", swp->sw_blocks, swb->swb_block, ix);
423 	if (swpagerdebug & (SDB_FOLLOW|SDB_ALLOCBLK))
424 		printf("-> %c\n",
425 		       "FT"[swb->swb_block && (swb->swb_mask & (1 << ix))]);
426 #endif
427 	if (swb->swb_block && (swb->swb_mask & (1 << ix)))
428 		return(TRUE);
429 	return(FALSE);
430 }
431 
432 /*
433  * Scaled down version of swap().
434  * Assumes that PAGE_SIZE < MAXPHYS; i.e. only one operation needed.
435  * BOGUS:  lower level IO routines expect a KVA so we have to map our
436  * provided physical page into the KVA to keep them happy.
437  */
438 static int
439 swap_pager_io(swp, m, flags)
440 	register sw_pager_t swp;
441 	vm_page_t m;
442 	int flags;
443 {
444 	register struct buf *bp;
445 	register sw_blk_t swb;
446 	register int s;
447 	int ix;
448 	boolean_t rv;
449 	vm_offset_t kva, off;
450 	swp_clean_t spc;
451 
452 #ifdef DEBUG
453 	/* save panic time state */
454 	if ((swpagerdebug & SDB_ANOMPANIC) && panicstr)
455 		return (VM_PAGER_FAIL);		/* XXX: correct return? */
456 	if (swpagerdebug & (SDB_FOLLOW|SDB_IO))
457 		printf("swpg_io(%x, %x, %x)\n", swp, m, flags);
458 #endif
459 
460 	/*
461 	 * For reads (pageins) and synchronous writes, we clean up
462 	 * all completed async pageouts.
463 	 */
464 	if ((flags & B_ASYNC) == 0) {
465 		s = splbio();
466 #ifdef DEBUG
467 		/*
468 		 * Check to see if this page is currently being cleaned.
469 		 * If it is, we just wait til the operation is done before
470 		 * continuing.
471 		 */
472 		while (swap_pager_clean(m, flags&B_READ)) {
473 			if (swpagerdebug & SDB_ANOM)
474 				printf("swap_pager_io: page %x cleaning\n", m);
475 
476 			swp->sw_flags |= SW_WANTED;
477 			assert_wait((int)swp, 0);
478 			thread_block();
479 		}
480 #else
481 		(void) swap_pager_clean(m, flags&B_READ);
482 #endif
483 		splx(s);
484 	}
485 	/*
486 	 * For async writes (pageouts), we cleanup completed pageouts so
487 	 * that all available resources are freed.  Also tells us if this
488 	 * page is already being cleaned.  If it is, or no resources
489 	 * are available, we try again later.
490 	 */
491 	else if (swap_pager_clean(m, B_WRITE) ||
492 		 queue_empty(&swap_pager_free)) {
493 #ifdef DEBUG
494 		if ((swpagerdebug & SDB_ANOM) &&
495 		    !queue_empty(&swap_pager_free))
496 			printf("swap_pager_io: page %x already cleaning\n", m);
497 #endif
498 		return(VM_PAGER_FAIL);
499 	}
500 
501 	/*
502 	 * Determine swap block and allocate as necessary.
503 	 */
504 	off = m->offset + m->object->paging_offset;
505 	ix = off / dbtob(swp->sw_bsize);
506 	if (swp->sw_blocks == NULL || ix >= swp->sw_nblocks) {
507 #ifdef DEBUG
508 		if (swpagerdebug & SDB_FAIL)
509 			printf("swpg_io: bad offset %x+%x(%d) in %x\n",
510 			       m->offset, m->object->paging_offset,
511 			       ix, swp->sw_blocks);
512 #endif
513 		return(VM_PAGER_FAIL);
514 	}
515 	swb = &swp->sw_blocks[ix];
516 	off = off % dbtob(swp->sw_bsize);
517 	if (flags & B_READ) {
518 		if (swb->swb_block == 0 ||
519 		    (swb->swb_mask & (1 << atop(off))) == 0) {
520 #ifdef DEBUG
521 			if (swpagerdebug & (SDB_ALLOCBLK|SDB_FAIL))
522 				printf("swpg_io: %x bad read: blk %x+%x, mask %x, off %x+%x\n",
523 				       swp->sw_blocks,
524 				       swb->swb_block, atop(off),
525 				       swb->swb_mask,
526 				       m->offset, m->object->paging_offset);
527 #endif
528 			/* XXX: should we zero page here?? */
529 			return(VM_PAGER_FAIL);
530 		}
531 	} else if (swb->swb_block == 0) {
532 		swb->swb_block = rmalloc(swapmap, swp->sw_bsize);
533 		if (swb->swb_block == 0) {
534 #ifdef DEBUG
535 			if (swpagerdebug & SDB_FAIL)
536 				printf("swpg_io: rmalloc of %x failed\n",
537 				       swp->sw_bsize);
538 #endif
539 			return(VM_PAGER_FAIL);
540 		}
541 #ifdef DEBUG
542 		if (swpagerdebug & (SDB_FULL|SDB_ALLOCBLK))
543 			printf("swpg_io: %x alloc blk %x at ix %x\n",
544 			       swp->sw_blocks, swb->swb_block, ix);
545 #endif
546 	}
547 
548 	/*
549 	 * Allocate a kernel virtual address and initialize so that PTE
550 	 * is available for lower level IO drivers.
551 	 */
552 	kva = vm_pager_map_page(m);
553 
554 	/*
555 	 * Get a swap buffer header and perform the IO
556 	 */
557 	s = splbio();
558 	while (bswlist.av_forw == NULL) {
559 #ifdef DEBUG
560 		if (swpagerdebug & SDB_ANOM)
561 			printf("swap_pager_io: wait on swbuf for %x (%d)\n",
562 			       m, flags);
563 #endif
564 		bswlist.b_flags |= B_WANTED;
565 		sleep((caddr_t)&bswlist, PSWP+1);
566 	}
567 	bp = bswlist.av_forw;
568 	bswlist.av_forw = bp->av_forw;
569 	splx(s);
570 	bp->b_flags = B_BUSY | (flags & B_READ);
571 	bp->b_proc = &proc0;	/* XXX (but without B_PHYS set this is ok) */
572 	bp->b_un.b_addr = (caddr_t)kva;
573 	bp->b_blkno = swb->swb_block + btodb(off);
574 	VHOLD(swapdev_vp);
575 	bp->b_vp = swapdev_vp;
576 	if (swapdev_vp->v_type == VBLK)
577 		bp->b_dev = swapdev_vp->v_rdev;
578 	bp->b_bcount = PAGE_SIZE;
579 	if ((bp->b_flags & B_READ) == 0) {
580 		bp->b_dirtyoff = 0;
581 		bp->b_dirtyend = PAGE_SIZE;
582 		swapdev_vp->v_numoutput++;
583 	}
584 
585 	/*
586 	 * If this is an async write we set up additional buffer fields
587 	 * and place a "cleaning" entry on the inuse queue.
588 	 */
589 	if ((flags & (B_READ|B_ASYNC)) == B_ASYNC) {
590 #ifdef DEBUG
591 		if (queue_empty(&swap_pager_free))
592 			panic("swpg_io: lost spc");
593 #endif
594 		queue_remove_first(&swap_pager_free,
595 				   spc, swp_clean_t, spc_list);
596 #ifdef DEBUG
597 		if (spc->spc_flags != SPC_FREE)
598 			panic("swpg_io: bad free spc");
599 #endif
600 		spc->spc_flags = SPC_BUSY;
601 		spc->spc_bp = bp;
602 		spc->spc_swp = swp;
603 		spc->spc_kva = kva;
604 		spc->spc_m = m;
605 		bp->b_flags |= B_CALL;
606 		bp->b_iodone = swap_pager_iodone;
607 		s = splbio();
608 		swp->sw_poip++;
609 		queue_enter(&swap_pager_inuse, spc, swp_clean_t, spc_list);
610 
611 #ifdef DEBUG
612 		swap_pager_poip++;
613 		if (swpagerdebug & SDB_WRITE)
614 			printf("swpg_io: write: bp=%x swp=%x spc=%x poip=%d\n",
615 			       bp, swp, spc, swp->sw_poip);
616 		if ((swpagerdebug & SDB_ALLOCBLK) &&
617 		    (swb->swb_mask & (1 << atop(off))) == 0)
618 			printf("swpg_io: %x write blk %x+%x\n",
619 			       swp->sw_blocks, swb->swb_block, atop(off));
620 #endif
621 		swb->swb_mask |= (1 << atop(off));
622 		splx(s);
623 	}
624 #ifdef DEBUG
625 	if (swpagerdebug & SDB_IO)
626 		printf("swpg_io: IO start: bp %x, db %x, va %x, pa %x\n",
627 		       bp, swb->swb_block+btodb(off), kva, VM_PAGE_TO_PHYS(m));
628 #endif
629 	VOP_STRATEGY(bp);
630 	if ((flags & (B_READ|B_ASYNC)) == B_ASYNC) {
631 #ifdef DEBUG
632 		if (swpagerdebug & SDB_IO)
633 			printf("swpg_io:  IO started: bp %x\n", bp);
634 #endif
635 		return(VM_PAGER_PEND);
636 	}
637 	s = splbio();
638 #ifdef DEBUG
639 	if (flags & B_READ)
640 		swap_pager_piip++;
641 	else
642 		swap_pager_poip++;
643 #endif
644 	while ((bp->b_flags & B_DONE) == 0) {
645 		assert_wait((int)bp, 0);
646 		thread_block();
647 	}
648 #ifdef DEBUG
649 	if (flags & B_READ)
650 		--swap_pager_piip;
651 	else
652 		--swap_pager_poip;
653 #endif
654 	rv = (bp->b_flags & B_ERROR) ? VM_PAGER_ERROR : VM_PAGER_OK;
655 	bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_PAGET|B_UAREA|B_DIRTY);
656 	bp->av_forw = bswlist.av_forw;
657 	bswlist.av_forw = bp;
658 	if (bp->b_vp)
659 		brelvp(bp);
660 	if (bswlist.b_flags & B_WANTED) {
661 		bswlist.b_flags &= ~B_WANTED;
662 		thread_wakeup((int)&bswlist);
663 	}
664 	if ((flags & B_READ) == 0 && rv == VM_PAGER_OK) {
665 		m->clean = TRUE;
666 		pmap_clear_modify(VM_PAGE_TO_PHYS(m));
667 	}
668 	splx(s);
669 #ifdef DEBUG
670 	if (swpagerdebug & SDB_IO)
671 		printf("swpg_io:  IO done: bp %x, rv %d\n", bp, rv);
672 	if ((swpagerdebug & SDB_FAIL) && rv == VM_PAGER_ERROR)
673 		printf("swpg_io: IO error\n");
674 #endif
675 	vm_pager_unmap_page(kva);
676 	return(rv);
677 }
678 
679 static boolean_t
680 swap_pager_clean(m, rw)
681 	vm_page_t m;
682 	int rw;
683 {
684 	register swp_clean_t spc, tspc;
685 	register int s;
686 
687 #ifdef DEBUG
688 	/* save panic time state */
689 	if ((swpagerdebug & SDB_ANOMPANIC) && panicstr)
690 		return (FALSE);			/* ??? */
691 	if (swpagerdebug & SDB_FOLLOW)
692 		printf("swpg_clean(%x, %d)\n", m, rw);
693 #endif
694 	tspc = NULL;
695 	for (;;) {
696 		/*
697 		 * Look up and removal from inuse list must be done
698 		 * at splbio() to avoid conflicts with swap_pager_iodone.
699 		 */
700 		s = splbio();
701 		spc = (swp_clean_t) queue_first(&swap_pager_inuse);
702 		while (!queue_end(&swap_pager_inuse, (queue_entry_t)spc)) {
703 			if ((spc->spc_flags & SPC_DONE) &&
704 			    swap_pager_finish(spc)) {
705 				queue_remove(&swap_pager_inuse, spc,
706 					     swp_clean_t, spc_list);
707 				break;
708 			}
709 			if (m && m == spc->spc_m) {
710 #ifdef DEBUG
711 				if (swpagerdebug & SDB_ANOM)
712 					printf("swap_pager_clean: page %x on list, flags %x\n",
713 					       m, spc->spc_flags);
714 #endif
715 				tspc = spc;
716 			}
717 			spc = (swp_clean_t) queue_next(&spc->spc_list);
718 		}
719 
720 		/*
721 		 * No operations done, thats all we can do for now.
722 		 */
723 		if (queue_end(&swap_pager_inuse, (queue_entry_t)spc))
724 			break;
725 		splx(s);
726 
727 		/*
728 		 * The desired page was found to be busy earlier in
729 		 * the scan but has since completed.
730 		 */
731 		if (tspc && tspc == spc) {
732 #ifdef DEBUG
733 			if (swpagerdebug & SDB_ANOM)
734 				printf("swap_pager_clean: page %x done while looking\n",
735 				       m);
736 #endif
737 			tspc = NULL;
738 		}
739 		spc->spc_flags = SPC_FREE;
740 		vm_pager_unmap_page(spc->spc_kva);
741 		queue_enter(&swap_pager_free, spc, swp_clean_t, spc_list);
742 #ifdef DEBUG
743 		if (swpagerdebug & SDB_WRITE)
744 			printf("swpg_clean: free spc %x\n", spc);
745 #endif
746 	}
747 #ifdef DEBUG
748 	/*
749 	 * If we found that the desired page is already being cleaned
750 	 * mark it so that swap_pager_iodone() will not set the clean
751 	 * flag before the pageout daemon has another chance to clean it.
752 	 */
753 	if (tspc && rw == B_WRITE) {
754 		if (swpagerdebug & SDB_ANOM)
755 			printf("swap_pager_clean: page %x on clean list\n",
756 			       tspc);
757 		tspc->spc_flags |= SPC_DIRTY;
758 	}
759 #endif
760 	splx(s);
761 
762 #ifdef DEBUG
763 	if (swpagerdebug & SDB_WRITE)
764 		printf("swpg_clean: return %d\n", tspc ? TRUE : FALSE);
765 	if ((swpagerdebug & SDB_ANOM) && tspc)
766 		printf("swpg_clean: %s of cleaning page %x\n",
767 		       rw == B_READ ? "get" : "put", m);
768 #endif
769 	return(tspc ? TRUE : FALSE);
770 }
771 
772 static int
773 swap_pager_finish(spc)
774 	register swp_clean_t spc;
775 {
776 	vm_object_t object = spc->spc_m->object;
777 
778 	/*
779 	 * Mark the paging operation as done.
780 	 * (XXX) If we cannot get the lock, leave it til later.
781 	 * (XXX) Also we are assuming that an async write is a
782 	 *       pageout operation that has incremented the counter.
783 	 */
784 	if (!vm_object_lock_try(object))
785 		return(0);
786 
787 	if (--object->paging_in_progress == 0)
788 		thread_wakeup((int) object);
789 
790 #ifdef DEBUG
791 	/*
792 	 * XXX: this isn't even close to the right thing to do,
793 	 * introduces a variety of race conditions.
794 	 *
795 	 * If dirty, vm_pageout() has attempted to clean the page
796 	 * again.  In this case we do not do anything as we will
797 	 * see the page again shortly.
798 	 */
799 	if (spc->spc_flags & SPC_DIRTY) {
800 		if (swpagerdebug & SDB_ANOM)
801 			printf("swap_pager_finish: page %x dirty again\n",
802 			       spc->spc_m);
803 		spc->spc_m->busy = FALSE;
804 		PAGE_WAKEUP(spc->spc_m);
805 		vm_object_unlock(object);
806 		return(1);
807 	}
808 #endif
809 	/*
810 	 * If no error mark as clean and inform the pmap system.
811 	 * If error, mark as dirty so we will try again.
812 	 * (XXX could get stuck doing this, should give up after awhile)
813 	 */
814 	if (spc->spc_flags & SPC_ERROR) {
815 		printf("swap_pager_finish: clean of page %x failed\n",
816 		       VM_PAGE_TO_PHYS(spc->spc_m));
817 		spc->spc_m->laundry = TRUE;
818 	} else {
819 		spc->spc_m->clean = TRUE;
820 		pmap_clear_modify(VM_PAGE_TO_PHYS(spc->spc_m));
821 	}
822 	spc->spc_m->busy = FALSE;
823 	PAGE_WAKEUP(spc->spc_m);
824 
825 	vm_object_unlock(object);
826 	return(1);
827 }
828 
829 static void
830 swap_pager_iodone(bp)
831 	register struct buf *bp;
832 {
833 	register swp_clean_t spc;
834 	daddr_t blk;
835 	int s;
836 
837 #ifdef DEBUG
838 	/* save panic time state */
839 	if ((swpagerdebug & SDB_ANOMPANIC) && panicstr)
840 		return;
841 	if (swpagerdebug & SDB_FOLLOW)
842 		printf("swpg_iodone(%x)\n", bp);
843 #endif
844 	s = splbio();
845 	spc = (swp_clean_t) queue_first(&swap_pager_inuse);
846 	while (!queue_end(&swap_pager_inuse, (queue_entry_t)spc)) {
847 		if (spc->spc_bp == bp)
848 			break;
849 		spc = (swp_clean_t) queue_next(&spc->spc_list);
850 	}
851 #ifdef DEBUG
852 	if (queue_end(&swap_pager_inuse, (queue_entry_t)spc))
853 		panic("swap_pager_iodone: bp not found");
854 #endif
855 
856 	spc->spc_flags &= ~SPC_BUSY;
857 	spc->spc_flags |= SPC_DONE;
858 	if (bp->b_flags & B_ERROR)
859 		spc->spc_flags |= SPC_ERROR;
860 	spc->spc_bp = NULL;
861 	blk = bp->b_blkno;
862 
863 #ifdef DEBUG
864 	--swap_pager_poip;
865 	if (swpagerdebug & SDB_WRITE)
866 		printf("swpg_iodone: bp=%x swp=%x flags=%x spc=%x poip=%x\n",
867 		       bp, spc->spc_swp, spc->spc_swp->sw_flags,
868 		       spc, spc->spc_swp->sw_poip);
869 #endif
870 
871 	spc->spc_swp->sw_poip--;
872 	if (spc->spc_swp->sw_flags & SW_WANTED) {
873 		spc->spc_swp->sw_flags &= ~SW_WANTED;
874 		thread_wakeup((int)spc->spc_swp);
875 	}
876 
877 	bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_PAGET|B_UAREA|B_DIRTY);
878 	bp->av_forw = bswlist.av_forw;
879 	bswlist.av_forw = bp;
880 	if (bp->b_vp)
881 		brelvp(bp);
882 	if (bswlist.b_flags & B_WANTED) {
883 		bswlist.b_flags &= ~B_WANTED;
884 		thread_wakeup((int)&bswlist);
885 	}
886 	thread_wakeup((int) &vm_pages_needed);
887 	splx(s);
888 }
889 #endif
890