xref: /original-bsd/sys/vm/swap_pager.c (revision 703f6d5d)
1 /*
2  * Copyright (c) 1990 University of Utah.
3  * Copyright (c) 1991 The Regents of the University of California.
4  * All rights reserved.
5  *
6  * This code is derived from software contributed to Berkeley by
7  * the Systems Programming Group of the University of Utah Computer
8  * Science Department.
9  *
10  * %sccs.include.redist.c%
11  *
12  * from: Utah $Hdr: swap_pager.c 1.4 91/04/30$
13  *
14  *	@(#)swap_pager.c	7.4 (Berkeley) 05/07/91
15  */
16 
17 /*
18  * Quick hack to page to dedicated partition(s).
19  * TODO:
20  *	Add multiprocessor locks
21  *	Deal with async writes in a better fashion
22  */
23 
24 #include "swappager.h"
25 #if NSWAPPAGER > 0
26 
27 #include "param.h"
28 #include "proc.h"
29 #include "buf.h"
30 #include "map.h"
31 #include "systm.h"
32 #include "specdev.h"
33 #include "vnode.h"
34 #include "malloc.h"
35 #include "queue.h"
36 
37 #include "vm_param.h"
38 #include "queue.h"
39 #include "lock.h"
40 #include "vm_prot.h"
41 #include "vm_object.h"
42 #include "vm_page.h"
43 #include "vm_pageout.h"
44 #include "swap_pager.h"
45 
46 #define NSWSIZES	16	/* size of swtab */
47 #define NPENDINGIO	64	/* max # of pending cleans */
48 #define MAXDADDRS	64	/* max # of disk addrs for fixed allocations */
49 
50 #ifdef DEBUG
51 int	swpagerdebug = 0x100;
52 #define	SDB_FOLLOW	0x001
53 #define SDB_INIT	0x002
54 #define SDB_ALLOC	0x004
55 #define SDB_IO		0x008
56 #define SDB_WRITE	0x010
57 #define SDB_FAIL	0x020
58 #define SDB_ALLOCBLK	0x040
59 #define SDB_FULL	0x080
60 #define SDB_ANOM	0x100
61 #define SDB_ANOMPANIC	0x200
62 #endif
63 
64 struct swpagerclean {
65 	queue_head_t		spc_list;
66 	int			spc_flags;
67 	struct buf		*spc_bp;
68 	sw_pager_t		spc_swp;
69 	vm_offset_t		spc_kva;
70 	vm_page_t		spc_m;
71 } swcleanlist[NPENDINGIO];
72 typedef	struct swpagerclean	*swp_clean_t;
73 
74 /* spc_flags values */
75 #define SPC_FREE	0x00
76 #define SPC_BUSY	0x01
77 #define SPC_DONE	0x02
78 #define SPC_ERROR	0x04
79 #define SPC_DIRTY	0x08
80 
81 struct swtab {
82 	vm_size_t st_osize;	/* size of object (bytes) */
83 	int	  st_bsize;	/* vs. size of swap block (DEV_BSIZE units) */
84 #ifdef DEBUG
85 	u_long	  st_inuse;	/* number in this range in use */
86 	u_long	  st_usecnt;	/* total used of this size */
87 #endif
88 } swtab[NSWSIZES+1];
89 
90 #ifdef DEBUG
91 int		swap_pager_pendingio;	/* max pending async "clean" ops */
92 int		swap_pager_poip;	/* pageouts in progress */
93 int		swap_pager_piip;	/* pageins in progress */
94 #endif
95 
96 queue_head_t	swap_pager_inuse;	/* list of pending page cleans */
97 queue_head_t	swap_pager_free;	/* list of free pager clean structs */
98 queue_head_t	swap_pager_list;	/* list of "named" anon regions */
99 
100 void
101 swap_pager_init()
102 {
103 	register swp_clean_t spc;
104 	register int i, bsize;
105 	extern int dmmin, dmmax;
106 	int maxbsize;
107 
108 #ifdef DEBUG
109 	if (swpagerdebug & (SDB_FOLLOW|SDB_INIT))
110 		printf("swpg_init()\n");
111 #endif
112 	dfltpagerops = &swappagerops;
113 	queue_init(&swap_pager_list);
114 
115 	/*
116 	 * Initialize clean lists
117 	 */
118 	queue_init(&swap_pager_inuse);
119 	queue_init(&swap_pager_free);
120 	for (i = 0, spc = swcleanlist; i < NPENDINGIO; i++, spc++) {
121 		queue_enter(&swap_pager_free, spc, swp_clean_t, spc_list);
122 		spc->spc_flags = SPC_FREE;
123 	}
124 
125 	/*
126 	 * Calculate the swap allocation constants.
127 	 */
128         if (dmmin == 0) {
129                 dmmin = DMMIN;
130 		if (dmmin < CLBYTES/DEV_BSIZE)
131 			dmmin = CLBYTES/DEV_BSIZE;
132 	}
133         if (dmmax == 0)
134                 dmmax = DMMAX;
135 
136 	/*
137 	 * Fill in our table of object size vs. allocation size
138 	 */
139 	bsize = btodb(PAGE_SIZE);
140 	if (bsize < dmmin)
141 		bsize = dmmin;
142 	maxbsize = btodb(sizeof(sw_bm_t) * NBBY * PAGE_SIZE);
143 	if (maxbsize > dmmax)
144 		maxbsize = dmmax;
145 	for (i = 0; i < NSWSIZES; i++) {
146 		swtab[i].st_osize = (vm_size_t) (MAXDADDRS * dbtob(bsize));
147 		swtab[i].st_bsize = bsize;
148 #ifdef DEBUG
149 		if (swpagerdebug & SDB_INIT)
150 			printf("swpg_init: ix %d, size %x, bsize %x\n",
151 			       i, swtab[i].st_osize, swtab[i].st_bsize);
152 #endif
153 		if (bsize >= maxbsize)
154 			break;
155 		bsize *= 2;
156 	}
157 	swtab[i].st_osize = 0;
158 	swtab[i].st_bsize = bsize;
159 }
160 
161 /*
162  * Allocate a pager structure and associated resources.
163  * Note that if we are called from the pageout daemon (handle == NULL)
164  * we should not wait for memory as it could resulting in deadlock.
165  */
166 vm_pager_t
167 swap_pager_alloc(handle, size, prot)
168 	caddr_t handle;
169 	register vm_size_t size;
170 	vm_prot_t prot;
171 {
172 	register vm_pager_t pager;
173 	register sw_pager_t swp;
174 	struct swtab *swt;
175 	int waitok;
176 
177 #ifdef DEBUG
178 	if (swpagerdebug & (SDB_FOLLOW|SDB_ALLOC))
179 		printf("swpg_alloc(%x, %x, %x)\n", handle, size, prot);
180 #endif
181 	/*
182 	 * If this is a "named" anonymous region, look it up and
183 	 * return the appropriate pager if it exists.
184 	 */
185 	if (handle) {
186 		pager = vm_pager_lookup(&swap_pager_list, handle);
187 		if (pager != NULL) {
188 			/*
189 			 * Use vm_object_lookup to gain a reference
190 			 * to the object and also to remove from the
191 			 * object cache.
192 			 */
193 			if (vm_object_lookup(pager) == NULL)
194 				panic("swap_pager_alloc: bad object");
195 			return(pager);
196 		}
197 	}
198 	/*
199 	 * Pager doesn't exist, allocate swap management resources
200 	 * and initialize.
201 	 */
202 	waitok = handle ? M_WAITOK : M_NOWAIT;
203 	pager = (vm_pager_t)malloc(sizeof *pager, M_VMPAGER, waitok);
204 	if (pager == NULL)
205 		return(NULL);
206 	swp = (sw_pager_t)malloc(sizeof *swp, M_VMPGDATA, waitok);
207 	if (swp == NULL) {
208 #ifdef DEBUG
209 		if (swpagerdebug & SDB_FAIL)
210 			printf("swpg_alloc: swpager malloc failed\n");
211 #endif
212 		free((caddr_t)pager, M_VMPAGER);
213 		return(NULL);
214 	}
215 	size = round_page(size);
216 	for (swt = swtab; swt->st_osize; swt++)
217 		if (size <= swt->st_osize)
218 			break;
219 #ifdef DEBUG
220 	swt->st_inuse++;
221 	swt->st_usecnt++;
222 #endif
223 	swp->sw_osize = size;
224 	swp->sw_bsize = swt->st_bsize;
225 	swp->sw_nblocks = (btodb(size) + swp->sw_bsize - 1) / swp->sw_bsize;
226 	swp->sw_blocks = (sw_blk_t)
227 		malloc(swp->sw_nblocks*sizeof(*swp->sw_blocks),
228 		       M_VMPGDATA, M_NOWAIT);
229 	if (swp->sw_blocks == NULL) {
230 		free((caddr_t)swp, M_VMPGDATA);
231 		free((caddr_t)pager, M_VMPAGER);
232 #ifdef DEBUG
233 		if (swpagerdebug & SDB_FAIL)
234 			printf("swpg_alloc: sw_blocks malloc failed\n");
235 		swt->st_inuse--;
236 		swt->st_usecnt--;
237 #endif
238 		return(FALSE);
239 	}
240 	bzero((caddr_t)swp->sw_blocks,
241 	      swp->sw_nblocks * sizeof(*swp->sw_blocks));
242 	swp->sw_poip = 0;
243 	if (handle) {
244 		vm_object_t object;
245 
246 		swp->sw_flags = SW_NAMED;
247 		queue_enter(&swap_pager_list, pager, vm_pager_t, pg_list);
248 		/*
249 		 * Consistant with other pagers: return with object
250 		 * referenced.  Can't do this with handle == NULL
251 		 * since it might be the pageout daemon calling.
252 		 */
253 		object = vm_object_allocate(size);
254 		vm_object_enter(object, pager);
255 		vm_object_setpager(object, pager, 0, FALSE);
256 	} else {
257 		swp->sw_flags = 0;
258 		queue_init(&pager->pg_list);
259 	}
260 	pager->pg_handle = handle;
261 	pager->pg_ops = &swappagerops;
262 	pager->pg_type = PG_SWAP;
263 	pager->pg_data = (caddr_t)swp;
264 
265 #ifdef DEBUG
266 	if (swpagerdebug & SDB_ALLOC)
267 		printf("swpg_alloc: pg_data %x, %x of %x at %x\n",
268 		       swp, swp->sw_nblocks, swp->sw_bsize, swp->sw_blocks);
269 #endif
270 	return(pager);
271 }
272 
273 void
274 swap_pager_dealloc(pager)
275 	vm_pager_t pager;
276 {
277 	register int i;
278 	register sw_blk_t bp;
279 	register sw_pager_t swp;
280 	struct swtab *swt;
281 	int s;
282 
283 #ifdef DEBUG
284 	/* save panic time state */
285 	if ((swpagerdebug & SDB_ANOMPANIC) && panicstr)
286 		return;
287 	if (swpagerdebug & (SDB_FOLLOW|SDB_ALLOC))
288 		printf("swpg_dealloc(%x)\n", pager);
289 #endif
290 	/*
291 	 * Remove from list right away so lookups will fail if we
292 	 * block for pageout completion.
293 	 */
294 	swp = (sw_pager_t) pager->pg_data;
295 	if (swp->sw_flags & SW_NAMED) {
296 		queue_remove(&swap_pager_list, pager, vm_pager_t, pg_list);
297 		swp->sw_flags &= ~SW_NAMED;
298 	}
299 #ifdef DEBUG
300 	for (swt = swtab; swt->st_osize; swt++)
301 		if (swp->sw_osize <= swt->st_osize)
302 			break;
303 	swt->st_inuse--;
304 #endif
305 
306 	/*
307 	 * Wait for all pageouts to finish and remove
308 	 * all entries from cleaning list.
309 	 */
310 	s = splbio();
311 	while (swp->sw_poip) {
312 		swp->sw_flags |= SW_WANTED;
313 		assert_wait((int)swp);
314 		thread_block();
315 	}
316 	splx(s);
317 	(void) swap_pager_clean(NULL, B_WRITE);
318 
319 	/*
320 	 * Free left over swap blocks
321 	 */
322 	for (i = 0, bp = swp->sw_blocks; i < swp->sw_nblocks; i++, bp++)
323 		if (bp->swb_block) {
324 #ifdef DEBUG
325 			if (swpagerdebug & (SDB_ALLOCBLK|SDB_FULL))
326 				printf("swpg_dealloc: blk %x\n",
327 				       bp->swb_block);
328 #endif
329 			rmfree(swapmap, swp->sw_bsize, bp->swb_block);
330 		}
331 	/*
332 	 * Free swap management resources
333 	 */
334 	free((caddr_t)swp->sw_blocks, M_VMPGDATA);
335 	free((caddr_t)swp, M_VMPGDATA);
336 	free((caddr_t)pager, M_VMPAGER);
337 }
338 
339 swap_pager_getpage(pager, m, sync)
340 	vm_pager_t pager;
341 	vm_page_t m;
342 	boolean_t sync;
343 {
344 #ifdef DEBUG
345 	if (swpagerdebug & SDB_FOLLOW)
346 		printf("swpg_getpage(%x, %x, %d)\n", pager, m, sync);
347 #endif
348 	return(swap_pager_io((sw_pager_t)pager->pg_data, m, B_READ));
349 }
350 
351 swap_pager_putpage(pager, m, sync)
352 	vm_pager_t pager;
353 	vm_page_t m;
354 	boolean_t sync;
355 {
356 	int flags;
357 
358 #ifdef DEBUG
359 	if (swpagerdebug & SDB_FOLLOW)
360 		printf("swpg_putpage(%x, %x, %d)\n", pager, m, sync);
361 #endif
362 	if (pager == NULL) {
363 		(void) swap_pager_clean(NULL, B_WRITE);
364 		return;
365 	}
366 	flags = B_WRITE;
367 	if (!sync)
368 		flags |= B_ASYNC;
369 	return(swap_pager_io((sw_pager_t)pager->pg_data, m, flags));
370 }
371 
372 boolean_t
373 swap_pager_haspage(pager, offset)
374 	vm_pager_t pager;
375 	vm_offset_t offset;
376 {
377 	register sw_pager_t swp;
378 	register sw_blk_t swb;
379 	int ix;
380 
381 #ifdef DEBUG
382 	if (swpagerdebug & (SDB_FOLLOW|SDB_ALLOCBLK))
383 		printf("swpg_haspage(%x, %x) ", pager, offset);
384 #endif
385 	swp = (sw_pager_t) pager->pg_data;
386 	ix = offset / dbtob(swp->sw_bsize);
387 	if (swp->sw_blocks == NULL || ix >= swp->sw_nblocks) {
388 #ifdef DEBUG
389 		if (swpagerdebug & (SDB_FAIL|SDB_FOLLOW|SDB_ALLOCBLK))
390 			printf("swpg_haspage: %x bad offset %x, ix %x\n",
391 			       swp->sw_blocks, offset, ix);
392 #endif
393 		return(FALSE);
394 	}
395 	swb = &swp->sw_blocks[ix];
396 	if (swb->swb_block)
397 		ix = atop(offset % dbtob(swp->sw_bsize));
398 #ifdef DEBUG
399 	if (swpagerdebug & SDB_ALLOCBLK)
400 		printf("%x blk %x+%x ", swp->sw_blocks, swb->swb_block, ix);
401 	if (swpagerdebug & (SDB_FOLLOW|SDB_ALLOCBLK))
402 		printf("-> %c\n",
403 		       "FT"[swb->swb_block && (swb->swb_mask & (1 << ix))]);
404 #endif
405 	if (swb->swb_block && (swb->swb_mask & (1 << ix)))
406 		return(TRUE);
407 	return(FALSE);
408 }
409 
410 /*
411  * Scaled down version of swap().
412  * Assumes that PAGE_SIZE < MAXPHYS; i.e. only one operation needed.
413  * BOGUS:  lower level IO routines expect a KVA so we have to map our
414  * provided physical page into the KVA to keep them happy.
415  */
416 swap_pager_io(swp, m, flags)
417 	register sw_pager_t swp;
418 	vm_page_t m;
419 	int flags;
420 {
421 	register struct buf *bp;
422 	register sw_blk_t swb;
423 	register int s;
424 	int ix;
425 	boolean_t rv;
426 	vm_offset_t kva, off;
427 	swp_clean_t spc;
428 
429 #ifdef DEBUG
430 	/* save panic time state */
431 	if ((swpagerdebug & SDB_ANOMPANIC) && panicstr)
432 		return;
433 	if (swpagerdebug & (SDB_FOLLOW|SDB_IO))
434 		printf("swpg_io(%x, %x, %x)\n", swp, m, flags);
435 #endif
436 
437 	/*
438 	 * For reads (pageins) and synchronous writes, we clean up
439 	 * all completed async pageouts.
440 	 */
441 	if ((flags & B_ASYNC) == 0) {
442 		s = splbio();
443 #ifdef DEBUG
444 		/*
445 		 * Check to see if this page is currently being cleaned.
446 		 * If it is, we just wait til the operation is done before
447 		 * continuing.
448 		 */
449 		while (swap_pager_clean(m, flags&B_READ)) {
450 			if (swpagerdebug & SDB_ANOM)
451 				printf("swap_pager_io: page %x cleaning\n", m);
452 
453 			swp->sw_flags |= SW_WANTED;
454 			assert_wait((int)swp);
455 			thread_block();
456 		}
457 #else
458 		(void) swap_pager_clean(m, flags&B_READ);
459 #endif
460 		splx(s);
461 	}
462 	/*
463 	 * For async writes (pageouts), we cleanup completed pageouts so
464 	 * that all available resources are freed.  Also tells us if this
465 	 * page is already being cleaned.  If it is, or no resources
466 	 * are available, we try again later.
467 	 */
468 	else if (swap_pager_clean(m, B_WRITE) ||
469 		 queue_empty(&swap_pager_free)) {
470 #ifdef DEBUG
471 		if ((swpagerdebug & SDB_ANOM) &&
472 		    !queue_empty(&swap_pager_free))
473 			printf("swap_pager_io: page %x already cleaning\n", m);
474 #endif
475 		return(VM_PAGER_FAIL);
476 	}
477 
478 	/*
479 	 * Determine swap block and allocate as necessary.
480 	 */
481 	off = m->offset + m->object->paging_offset;
482 	ix = off / dbtob(swp->sw_bsize);
483 	if (swp->sw_blocks == NULL || ix >= swp->sw_nblocks) {
484 #ifdef DEBUG
485 		if (swpagerdebug & SDB_FAIL)
486 			printf("swpg_io: bad offset %x+%x(%d) in %x\n",
487 			       m->offset, m->object->paging_offset,
488 			       ix, swp->sw_blocks);
489 #endif
490 		return(VM_PAGER_FAIL);
491 	}
492 	swb = &swp->sw_blocks[ix];
493 	off = off % dbtob(swp->sw_bsize);
494 	if (flags & B_READ) {
495 		if (swb->swb_block == 0 ||
496 		    (swb->swb_mask & (1 << atop(off))) == 0) {
497 #ifdef DEBUG
498 			if (swpagerdebug & (SDB_ALLOCBLK|SDB_FAIL))
499 				printf("swpg_io: %x bad read: blk %x+%x, mask %x, off %x+%x\n",
500 				       swp->sw_blocks,
501 				       swb->swb_block, atop(off),
502 				       swb->swb_mask,
503 				       m->offset, m->object->paging_offset);
504 #endif
505 			/* XXX: should we zero page here?? */
506 			return(VM_PAGER_FAIL);
507 		}
508 	} else if (swb->swb_block == 0) {
509 		swb->swb_block = rmalloc(swapmap, swp->sw_bsize);
510 		if (swb->swb_block == 0) {
511 #ifdef DEBUG
512 			if (swpagerdebug & SDB_FAIL)
513 				printf("swpg_io: rmalloc of %x failed\n",
514 				       swp->sw_bsize);
515 #endif
516 			return(VM_PAGER_FAIL);
517 		}
518 #ifdef DEBUG
519 		if (swpagerdebug & (SDB_FULL|SDB_ALLOCBLK))
520 			printf("swpg_io: %x alloc blk %x at ix %x\n",
521 			       swp->sw_blocks, swb->swb_block, ix);
522 #endif
523 	}
524 
525 	/*
526 	 * Allocate a kernel virtual address and initialize so that PTE
527 	 * is available for lower level IO drivers.
528 	 */
529 	kva = vm_pager_map_page(m);
530 
531 	/*
532 	 * Get a swap buffer header and perform the IO
533 	 */
534 	s = splbio();
535 	while (bswlist.av_forw == NULL) {
536 #ifdef DEBUG
537 		if (swpagerdebug & SDB_ANOM)
538 			printf("swap_pager_io: wait on swbuf for %x (%d)\n",
539 			       m, flags);
540 #endif
541 		bswlist.b_flags |= B_WANTED;
542 		sleep((caddr_t)&bswlist, PSWP+1);
543 	}
544 	bp = bswlist.av_forw;
545 	bswlist.av_forw = bp->av_forw;
546 	splx(s);
547 	bp->b_flags = B_BUSY | (flags & B_READ);
548 	bp->b_proc = &proc0;	/* XXX (but without B_PHYS set this is ok) */
549 	bp->b_un.b_addr = (caddr_t)kva;
550 	bp->b_blkno = swb->swb_block + btodb(off);
551 	VHOLD(swapdev_vp);
552 	bp->b_vp = swapdev_vp;
553 	if (swapdev_vp->v_type == VBLK)
554 		bp->b_dev = swapdev_vp->v_rdev;
555 	bp->b_bcount = PAGE_SIZE;
556 	if ((bp->b_flags & B_READ) == 0)
557 		swapdev_vp->v_numoutput++;
558 
559 	/*
560 	 * If this is an async write we set up additional buffer fields
561 	 * and place a "cleaning" entry on the inuse queue.
562 	 */
563 	if ((flags & (B_READ|B_ASYNC)) == B_ASYNC) {
564 #ifdef DEBUG
565 		if (queue_empty(&swap_pager_free))
566 			panic("swpg_io: lost spc");
567 #endif
568 		queue_remove_first(&swap_pager_free,
569 				   spc, swp_clean_t, spc_list);
570 #ifdef DEBUG
571 		if (spc->spc_flags != SPC_FREE)
572 			panic("swpg_io: bad free spc");
573 #endif
574 		spc->spc_flags = SPC_BUSY;
575 		spc->spc_bp = bp;
576 		spc->spc_swp = swp;
577 		spc->spc_kva = kva;
578 		spc->spc_m = m;
579 		bp->b_flags |= B_CALL;
580 		bp->b_iodone = swap_pager_iodone;
581 		s = splbio();
582 		swp->sw_poip++;
583 		queue_enter(&swap_pager_inuse, spc, swp_clean_t, spc_list);
584 
585 #ifdef DEBUG
586 		swap_pager_poip++;
587 		if (swpagerdebug & SDB_WRITE)
588 			printf("swpg_io: write: bp=%x swp=%x spc=%x poip=%d\n",
589 			       bp, swp, spc, swp->sw_poip);
590 		if ((swpagerdebug & SDB_ALLOCBLK) &&
591 		    (swb->swb_mask & (1 << atop(off))) == 0)
592 			printf("swpg_io: %x write blk %x+%x\n",
593 			       swp->sw_blocks, swb->swb_block, atop(off));
594 #endif
595 		swb->swb_mask |= (1 << atop(off));
596 		splx(s);
597 	}
598 #ifdef DEBUG
599 	if (swpagerdebug & SDB_IO)
600 		printf("swpg_io: IO start: bp %x, db %x, va %x, pa %x\n",
601 		       bp, swb->swb_block+btodb(off), kva, VM_PAGE_TO_PHYS(m));
602 #endif
603 	VOP_STRATEGY(bp);
604 	if ((flags & (B_READ|B_ASYNC)) == B_ASYNC) {
605 #ifdef DEBUG
606 		if (swpagerdebug & SDB_IO)
607 			printf("swpg_io:  IO started: bp %x\n", bp);
608 #endif
609 		return(VM_PAGER_PEND);
610 	}
611 	s = splbio();
612 #ifdef DEBUG
613 	if (flags & B_READ)
614 		swap_pager_piip++;
615 	else
616 		swap_pager_poip++;
617 #endif
618 	while ((bp->b_flags & B_DONE) == 0) {
619 		assert_wait((int)bp);
620 		thread_block();
621 	}
622 #ifdef DEBUG
623 	if (flags & B_READ)
624 		--swap_pager_piip;
625 	else
626 		--swap_pager_poip;
627 #endif
628 	rv = (bp->b_flags & B_ERROR) ? VM_PAGER_FAIL : VM_PAGER_OK;
629 	bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_PAGET|B_UAREA|B_DIRTY);
630 	bp->av_forw = bswlist.av_forw;
631 	bswlist.av_forw = bp;
632 	if (bp->b_vp)
633 		brelvp(bp);
634 	if (bswlist.b_flags & B_WANTED) {
635 		bswlist.b_flags &= ~B_WANTED;
636 		thread_wakeup((int)&bswlist);
637 	}
638 	if ((flags & B_READ) == 0 && rv == VM_PAGER_OK) {
639 		m->clean = TRUE;
640 		pmap_clear_modify(VM_PAGE_TO_PHYS(m));
641 	}
642 	splx(s);
643 #ifdef DEBUG
644 	if (swpagerdebug & SDB_IO)
645 		printf("swpg_io:  IO done: bp %x, rv %d\n", bp, rv);
646 	if ((swpagerdebug & SDB_FAIL) && rv == VM_PAGER_FAIL)
647 		printf("swpg_io: IO error\n");
648 #endif
649 	vm_pager_unmap_page(kva);
650 	return(rv);
651 }
652 
653 boolean_t
654 swap_pager_clean(m, rw)
655 	vm_page_t m;
656 	int rw;
657 {
658 	register swp_clean_t spc, tspc;
659 	register int s;
660 
661 #ifdef DEBUG
662 	/* save panic time state */
663 	if ((swpagerdebug & SDB_ANOMPANIC) && panicstr)
664 		return;
665 	if (swpagerdebug & SDB_FOLLOW)
666 		printf("swpg_clean(%x, %d)\n", m, rw);
667 #endif
668 	tspc = NULL;
669 	for (;;) {
670 		/*
671 		 * Look up and removal from inuse list must be done
672 		 * at splbio() to avoid conflicts with swap_pager_iodone.
673 		 */
674 		s = splbio();
675 		spc = (swp_clean_t) queue_first(&swap_pager_inuse);
676 		while (!queue_end(&swap_pager_inuse, (queue_entry_t)spc)) {
677 			if ((spc->spc_flags & SPC_DONE) &&
678 			    swap_pager_finish(spc)) {
679 				queue_remove(&swap_pager_inuse, spc,
680 					     swp_clean_t, spc_list);
681 				break;
682 			}
683 			if (m && m == spc->spc_m) {
684 #ifdef DEBUG
685 				if (swpagerdebug & SDB_ANOM)
686 					printf("swap_pager_clean: page %x on list, flags %x\n",
687 					       m, spc->spc_flags);
688 #endif
689 				tspc = spc;
690 			}
691 			spc = (swp_clean_t) queue_next(&spc->spc_list);
692 		}
693 
694 		/*
695 		 * No operations done, thats all we can do for now.
696 		 */
697 		if (queue_end(&swap_pager_inuse, (queue_entry_t)spc))
698 			break;
699 		splx(s);
700 
701 		/*
702 		 * The desired page was found to be busy earlier in
703 		 * the scan but has since completed.
704 		 */
705 		if (tspc && tspc == spc) {
706 #ifdef DEBUG
707 			if (swpagerdebug & SDB_ANOM)
708 				printf("swap_pager_clean: page %x done while looking\n",
709 				       m);
710 #endif
711 			tspc = NULL;
712 		}
713 		spc->spc_flags = SPC_FREE;
714 		vm_pager_unmap_page(spc->spc_kva);
715 		queue_enter(&swap_pager_free, spc, swp_clean_t, spc_list);
716 #ifdef DEBUG
717 		if (swpagerdebug & SDB_WRITE)
718 			printf("swpg_clean: free spc %x\n", spc);
719 #endif
720 	}
721 #ifdef DEBUG
722 	/*
723 	 * If we found that the desired page is already being cleaned
724 	 * mark it so that swap_pager_iodone() will not set the clean
725 	 * flag before the pageout daemon has another chance to clean it.
726 	 */
727 	if (tspc && rw == B_WRITE) {
728 		if (swpagerdebug & SDB_ANOM)
729 			printf("swap_pager_clean: page %x on clean list\n",
730 			       tspc);
731 		tspc->spc_flags |= SPC_DIRTY;
732 	}
733 #endif
734 	splx(s);
735 
736 #ifdef DEBUG
737 	if (swpagerdebug & SDB_WRITE)
738 		printf("swpg_clean: return %d\n", tspc ? TRUE : FALSE);
739 	if ((swpagerdebug & SDB_ANOM) && tspc)
740 		printf("swpg_clean: %s of cleaning page %x\n",
741 		       rw == B_READ ? "get" : "put", m);
742 #endif
743 	return(tspc ? TRUE : FALSE);
744 }
745 
746 swap_pager_finish(spc)
747 	register swp_clean_t spc;
748 {
749 	vm_object_t object = spc->spc_m->object;
750 
751 	/*
752 	 * Mark the paging operation as done.
753 	 * (XXX) If we cannot get the lock, leave it til later.
754 	 * (XXX) Also we are assuming that an async write is a
755 	 *       pageout operation that has incremented the counter.
756 	 */
757 	if (!vm_object_lock_try(object))
758 		return(0);
759 
760 	if (--object->paging_in_progress == 0)
761 		thread_wakeup((int) object);
762 
763 #ifdef DEBUG
764 	/*
765 	 * XXX: this isn't even close to the right thing to do,
766 	 * introduces a variety of race conditions.
767 	 *
768 	 * If dirty, vm_pageout() has attempted to clean the page
769 	 * again.  In this case we do not do anything as we will
770 	 * see the page again shortly.
771 	 */
772 	if (spc->spc_flags & SPC_DIRTY) {
773 		if (swpagerdebug & SDB_ANOM)
774 			printf("swap_pager_finish: page %x dirty again\n",
775 			       spc->spc_m);
776 		spc->spc_m->busy = FALSE;
777 		PAGE_WAKEUP(spc->spc_m);
778 		vm_object_unlock(object);
779 		return(1);
780 	}
781 #endif
782 	/*
783 	 * If no error mark as clean and inform the pmap system.
784 	 * If error, mark as dirty so we will try again.
785 	 * (XXX could get stuck doing this, should give up after awhile)
786 	 */
787 	if (spc->spc_flags & SPC_ERROR) {
788 		printf("swap_pager_finish: clean of page %x failed\n",
789 		       VM_PAGE_TO_PHYS(spc->spc_m));
790 		spc->spc_m->laundry = TRUE;
791 	} else {
792 		spc->spc_m->clean = TRUE;
793 		pmap_clear_modify(VM_PAGE_TO_PHYS(spc->spc_m));
794 	}
795 	spc->spc_m->busy = FALSE;
796 	PAGE_WAKEUP(spc->spc_m);
797 
798 	vm_object_unlock(object);
799 	return(1);
800 }
801 
802 swap_pager_iodone(bp)
803 	register struct buf *bp;
804 {
805 	register swp_clean_t spc;
806 	daddr_t blk;
807 	int s;
808 
809 #ifdef DEBUG
810 	/* save panic time state */
811 	if ((swpagerdebug & SDB_ANOMPANIC) && panicstr)
812 		return;
813 	if (swpagerdebug & SDB_FOLLOW)
814 		printf("swpg_iodone(%x)\n", bp);
815 #endif
816 	s = splbio();
817 	spc = (swp_clean_t) queue_first(&swap_pager_inuse);
818 	while (!queue_end(&swap_pager_inuse, (queue_entry_t)spc)) {
819 		if (spc->spc_bp == bp)
820 			break;
821 		spc = (swp_clean_t) queue_next(&spc->spc_list);
822 	}
823 #ifdef DEBUG
824 	if (queue_end(&swap_pager_inuse, (queue_entry_t)spc))
825 		panic("swap_pager_iodone: bp not found");
826 #endif
827 
828 	spc->spc_flags &= ~SPC_BUSY;
829 	spc->spc_flags |= SPC_DONE;
830 	if (bp->b_flags & B_ERROR)
831 		spc->spc_flags |= SPC_ERROR;
832 	spc->spc_bp = NULL;
833 	blk = bp->b_blkno;
834 
835 #ifdef DEBUG
836 	--swap_pager_poip;
837 	if (swpagerdebug & SDB_WRITE)
838 		printf("swpg_iodone: bp=%x swp=%x flags=%x spc=%x poip=%x\n",
839 		       bp, spc->spc_swp, spc->spc_swp->sw_flags,
840 		       spc, spc->spc_swp->sw_poip);
841 #endif
842 
843 	spc->spc_swp->sw_poip--;
844 	if (spc->spc_swp->sw_flags & SW_WANTED) {
845 		spc->spc_swp->sw_flags &= ~SW_WANTED;
846 		thread_wakeup((int)spc->spc_swp);
847 	}
848 
849 	bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_PAGET|B_UAREA|B_DIRTY);
850 	bp->av_forw = bswlist.av_forw;
851 	bswlist.av_forw = bp;
852 	if (bp->b_vp)
853 		brelvp(bp);
854 	if (bswlist.b_flags & B_WANTED) {
855 		bswlist.b_flags &= ~B_WANTED;
856 		thread_wakeup((int)&bswlist);
857 	}
858 	thread_wakeup((int) &vm_pages_needed);
859 	splx(s);
860 }
861 #endif
862