xref: /original-bsd/sys/vm/swap_pager.c (revision 321b8d38)
1 /*
2  * Copyright (c) 1990 University of Utah.
3  * Copyright (c) 1991 The Regents of the University of California.
4  * All rights reserved.
5  *
6  * This code is derived from software contributed to Berkeley by
7  * the Systems Programming Group of the University of Utah Computer
8  * Science Department.
9  *
10  * %sccs.include.redist.c%
11  *
12  *	@(#)swap_pager.c	7.1 (Berkeley) 12/05/90
13  */
14 
15 /*
16  * Quick hack to page to dedicated partition(s).
17  * TODO:
18  *	Add multiprocessor locks
19  *	Deal with async writes in a better fashion
20  */
21 
22 #include "swappager.h"
23 #if NSWAPPAGER > 0
24 
25 #include "param.h"
26 #include "user.h"
27 #include "proc.h"
28 #include "buf.h"
29 #include "map.h"
30 #include "systm.h"
31 #include "specdev.h"
32 #include "vnode.h"
33 #include "malloc.h"
34 #include "queue.h"
35 
36 #include "../vm/vm_param.h"
37 #include "../vm/vm_pager.h"
38 #include "../vm/vm_page.h"
39 #include "../vm/vm_pageout.h"
40 #include "../vm/swap_pager.h"
41 
42 #define NSWSIZES	16	/* size of swtab */
43 #define NPENDINGIO	64	/* max # of pending cleans */
44 #define MAXDADDRS	64	/* max # of disk addrs for fixed allocations */
45 
46 #ifdef DEBUG
47 int	swpagerdebug = 0x100;
48 #define	SDB_FOLLOW	0x001
49 #define SDB_INIT	0x002
50 #define SDB_ALLOC	0x004
51 #define SDB_IO		0x008
52 #define SDB_WRITE	0x010
53 #define SDB_FAIL	0x020
54 #define SDB_ALLOCBLK	0x040
55 #define SDB_FULL	0x080
56 #define SDB_ANOM	0x100
57 #define SDB_ANOMPANIC	0x200
58 #endif
59 
60 struct swpagerclean {
61 	queue_head_t		spc_list;
62 	int			spc_flags;
63 	struct buf		*spc_bp;
64 	sw_pager_t		spc_swp;
65 	vm_offset_t		spc_kva;
66 	vm_page_t		spc_m;
67 } swcleanlist[NPENDINGIO];
68 typedef	struct swpagerclean	*swp_clean_t;
69 
70 #define SWP_CLEAN_NULL		((swp_clean_t)0)
71 
72 /* spc_flags values */
73 #define SPC_FREE	0x00
74 #define SPC_BUSY	0x01
75 #define SPC_DONE	0x02
76 #define SPC_ERROR	0x04
77 #define SPC_DIRTY	0x08
78 
79 struct swtab {
80 	vm_size_t st_osize;	/* size of object (bytes) */
81 	int	  st_bsize;	/* vs. size of swap block (DEV_BSIZE units) */
82 #ifdef DEBUG
83 	u_long	  st_inuse;	/* number in this range in use */
84 	u_long	  st_usecnt;	/* total used of this size */
85 #endif
86 } swtab[NSWSIZES+1];
87 
88 #ifdef DEBUG
89 int		swap_pager_pendingio;	/* max pending async "clean" ops */
90 int		swap_pager_poip;	/* pageouts in progress */
91 int		swap_pager_piip;	/* pageins in progress */
92 #endif
93 
94 queue_head_t	swap_pager_inuse;	/* list of pending page cleans */
95 queue_head_t	swap_pager_free;	/* list of free pager clean structs */
96 queue_head_t	swap_pager_list;	/* list of "named" anon regions */
97 
98 void
99 swap_pager_init()
100 {
101 	register swp_clean_t spc;
102 	register int i, bsize;
103 	extern int dmmin, dmmax;
104 	int maxbsize;
105 
106 #ifdef DEBUG
107 	if (swpagerdebug & (SDB_FOLLOW|SDB_INIT))
108 		printf("swpg_init()\n");
109 #endif
110 	dfltpagerops = &swappagerops;
111 	queue_init(&swap_pager_list);
112 
113 	/*
114 	 * Initialize clean lists
115 	 */
116 	queue_init(&swap_pager_inuse);
117 	queue_init(&swap_pager_free);
118 	for (i = 0, spc = swcleanlist; i < NPENDINGIO; i++, spc++) {
119 		queue_enter(&swap_pager_free, spc, swp_clean_t, spc_list);
120 		spc->spc_flags = SPC_FREE;
121 	}
122 
123 	/*
124 	 * Calculate the swap allocation constants.
125 	 */
126         if (dmmin == 0) {
127                 dmmin = DMMIN;
128 		if (dmmin < CLBYTES/DEV_BSIZE)
129 			dmmin = CLBYTES/DEV_BSIZE;
130 	}
131         if (dmmax == 0)
132                 dmmax = DMMAX;
133 
134 	/*
135 	 * Fill in our table of object size vs. allocation size
136 	 */
137 	bsize = btodb(PAGE_SIZE);
138 	if (bsize < dmmin)
139 		bsize = dmmin;
140 	maxbsize = btodb(sizeof(sw_bm_t) * NBBY * PAGE_SIZE);
141 	if (maxbsize > dmmax)
142 		maxbsize = dmmax;
143 	for (i = 0; i < NSWSIZES; i++) {
144 		swtab[i].st_osize = (vm_size_t) (MAXDADDRS * dbtob(bsize));
145 		swtab[i].st_bsize = bsize;
146 #ifdef DEBUG
147 		if (swpagerdebug & SDB_INIT)
148 			printf("swpg_init: ix %d, size %x, bsize %x\n",
149 			       i, swtab[i].st_osize, swtab[i].st_bsize);
150 #endif
151 		if (bsize >= maxbsize)
152 			break;
153 		bsize *= 2;
154 	}
155 	swtab[i].st_osize = 0;
156 	swtab[i].st_bsize = bsize;
157 }
158 
159 /*
160  * Allocate a pager structure and associated resources.
161  * Note that if we are called from the pageout daemon (handle == NULL)
162  * we should not wait for memory as it could resulting in deadlock.
163  */
164 vm_pager_t
165 swap_pager_alloc(handle, size, prot)
166 	caddr_t handle;
167 	register vm_size_t size;
168 	vm_prot_t prot;
169 {
170 	register vm_pager_t pager;
171 	register sw_pager_t swp;
172 	struct swtab *swt;
173 	int waitok;
174 
175 #ifdef DEBUG
176 	if (swpagerdebug & (SDB_FOLLOW|SDB_ALLOC))
177 		printf("swpg_alloc(%x, %x, %x)\n", handle, size, prot);
178 #endif
179 	/*
180 	 * If this is a "named" anonymous region, look it up and
181 	 * return the appropriate pager if it exists.
182 	 */
183 	if (handle) {
184 		pager = vm_pager_lookup(&swap_pager_list, handle);
185 		if (pager != VM_PAGER_NULL) {
186 			/*
187 			 * Use vm_object_lookup to gain a reference
188 			 * to the object and also to remove from the
189 			 * object cache.
190 			 */
191 			if (vm_object_lookup(pager) == VM_OBJECT_NULL)
192 				panic("swap_pager_alloc: bad object");
193 			return(pager);
194 		}
195 	}
196 	/*
197 	 * Pager doesn't exist, allocate swap management resources
198 	 * and initialize.
199 	 */
200 	waitok = handle ? M_WAITOK : M_NOWAIT;
201 	pager = (vm_pager_t)malloc(sizeof *pager, M_VMPAGER, waitok);
202 	if (pager == VM_PAGER_NULL)
203 		return(VM_PAGER_NULL);
204 	swp = (sw_pager_t)malloc(sizeof *swp, M_VMPGDATA, waitok);
205 	if (swp == NULL) {
206 #ifdef DEBUG
207 		if (swpagerdebug & SDB_FAIL)
208 			printf("swpg_alloc: swpager malloc failed\n");
209 #endif
210 		free((caddr_t)pager, M_VMPAGER);
211 		return(VM_PAGER_NULL);
212 	}
213 	size = round_page(size);
214 	for (swt = swtab; swt->st_osize; swt++)
215 		if (size <= swt->st_osize)
216 			break;
217 #ifdef DEBUG
218 	swt->st_inuse++;
219 	swt->st_usecnt++;
220 #endif
221 	swp->sw_osize = size;
222 	swp->sw_bsize = swt->st_bsize;
223 	swp->sw_nblocks = (btodb(size) + swp->sw_bsize - 1) / swp->sw_bsize;
224 	swp->sw_blocks = (sw_blk_t)
225 		malloc(swp->sw_nblocks*sizeof(*swp->sw_blocks),
226 		       M_VMPGDATA, M_NOWAIT);
227 	if (swp->sw_blocks == NULL) {
228 		free((caddr_t)swp, M_VMPGDATA);
229 		free((caddr_t)pager, M_VMPAGER);
230 #ifdef DEBUG
231 		if (swpagerdebug & SDB_FAIL)
232 			printf("swpg_alloc: sw_blocks malloc failed\n");
233 		swt->st_inuse--;
234 		swt->st_usecnt--;
235 #endif
236 		return(FALSE);
237 	}
238 	bzero((caddr_t)swp->sw_blocks,
239 	      swp->sw_nblocks * sizeof(*swp->sw_blocks));
240 	swp->sw_poip = 0;
241 	if (handle) {
242 		vm_object_t object;
243 
244 		swp->sw_flags = SW_NAMED;
245 		queue_enter(&swap_pager_list, pager, vm_pager_t, pg_list);
246 		/*
247 		 * Consistant with other pagers: return with object
248 		 * referenced.  Can't do this with handle == NULL
249 		 * since it might be the pageout daemon calling.
250 		 */
251 		object = vm_object_allocate(size);
252 		vm_object_enter(object, pager);
253 		vm_object_setpager(object, pager, 0, FALSE);
254 	} else {
255 		swp->sw_flags = 0;
256 		queue_init(&pager->pg_list);
257 	}
258 	pager->pg_handle = handle;
259 	pager->pg_ops = &swappagerops;
260 	pager->pg_type = PG_SWAP;
261 	pager->pg_data = (caddr_t)swp;
262 
263 #ifdef DEBUG
264 	if (swpagerdebug & SDB_ALLOC)
265 		printf("swpg_alloc: pg_data %x, %x of %x at %x\n",
266 		       swp, swp->sw_nblocks, swp->sw_bsize, swp->sw_blocks);
267 #endif
268 	return(pager);
269 }
270 
271 void
272 swap_pager_dealloc(pager)
273 	vm_pager_t pager;
274 {
275 	register int i;
276 	register sw_blk_t bp;
277 	register sw_pager_t swp;
278 	struct swtab *swt;
279 	int s;
280 
281 #ifdef DEBUG
282 	/* save panic time state */
283 	if ((swpagerdebug & SDB_ANOMPANIC) && panicstr)
284 		return;
285 	if (swpagerdebug & (SDB_FOLLOW|SDB_ALLOC))
286 		printf("swpg_dealloc(%x)\n", pager);
287 #endif
288 	/*
289 	 * Remove from list right away so lookups will fail if we
290 	 * block for pageout completion.
291 	 */
292 	swp = (sw_pager_t) pager->pg_data;
293 	if (swp->sw_flags & SW_NAMED) {
294 		queue_remove(&swap_pager_list, pager, vm_pager_t, pg_list);
295 		swp->sw_flags &= ~SW_NAMED;
296 	}
297 #ifdef DEBUG
298 	for (swt = swtab; swt->st_osize; swt++)
299 		if (swp->sw_osize <= swt->st_osize)
300 			break;
301 	swt->st_inuse--;
302 #endif
303 
304 	/*
305 	 * Wait for all pageouts to finish and remove
306 	 * all entries from cleaning list.
307 	 */
308 	s = splbio();
309 	while (swp->sw_poip) {
310 		swp->sw_flags |= SW_WANTED;
311 		assert_wait((int)swp);
312 		thread_block();
313 	}
314 	splx(s);
315 	(void) swap_pager_clean(VM_PAGE_NULL, B_WRITE);
316 
317 	/*
318 	 * Free left over swap blocks
319 	 */
320 	for (i = 0, bp = swp->sw_blocks; i < swp->sw_nblocks; i++, bp++)
321 		if (bp->swb_block) {
322 #ifdef DEBUG
323 			if (swpagerdebug & (SDB_ALLOCBLK|SDB_FULL))
324 				printf("swpg_dealloc: blk %x\n",
325 				       bp->swb_block);
326 #endif
327 			rmfree(swapmap, swp->sw_bsize, bp->swb_block);
328 		}
329 	/*
330 	 * Free swap management resources
331 	 */
332 	free((caddr_t)swp->sw_blocks, M_VMPGDATA);
333 	free((caddr_t)swp, M_VMPGDATA);
334 	free((caddr_t)pager, M_VMPAGER);
335 }
336 
337 swap_pager_getpage(pager, m, sync)
338 	vm_pager_t pager;
339 	vm_page_t m;
340 	boolean_t sync;
341 {
342 #ifdef DEBUG
343 	if (swpagerdebug & SDB_FOLLOW)
344 		printf("swpg_getpage(%x, %x, %d)\n", pager, m, sync);
345 #endif
346 	return(swap_pager_io((sw_pager_t)pager->pg_data, m, B_READ));
347 }
348 
349 swap_pager_putpage(pager, m, sync)
350 	vm_pager_t pager;
351 	vm_page_t m;
352 	boolean_t sync;
353 {
354 	int flags;
355 
356 #ifdef DEBUG
357 	if (swpagerdebug & SDB_FOLLOW)
358 		printf("swpg_putpage(%x, %x, %d)\n", pager, m, sync);
359 #endif
360 	if (pager == VM_PAGER_NULL) {
361 		(void) swap_pager_clean(VM_PAGE_NULL, B_WRITE);
362 		return;
363 	}
364 	flags = B_WRITE;
365 	if (!sync)
366 		flags |= B_ASYNC;
367 	return(swap_pager_io((sw_pager_t)pager->pg_data, m, flags));
368 }
369 
370 boolean_t
371 swap_pager_haspage(pager, offset)
372 	vm_pager_t pager;
373 	vm_offset_t offset;
374 {
375 	register sw_pager_t swp;
376 	register sw_blk_t swb;
377 	int ix;
378 
379 #ifdef DEBUG
380 	if (swpagerdebug & (SDB_FOLLOW|SDB_ALLOCBLK))
381 		printf("swpg_haspage(%x, %x) ", pager, offset);
382 #endif
383 	swp = (sw_pager_t) pager->pg_data;
384 	ix = offset / dbtob(swp->sw_bsize);
385 	if (swp->sw_blocks == NULL || ix >= swp->sw_nblocks) {
386 #ifdef DEBUG
387 		if (swpagerdebug & (SDB_FAIL|SDB_FOLLOW|SDB_ALLOCBLK))
388 			printf("swpg_haspage: %x bad offset %x, ix %x\n",
389 			       swp->sw_blocks, offset, ix);
390 #endif
391 		return(FALSE);
392 	}
393 	swb = &swp->sw_blocks[ix];
394 	if (swb->swb_block)
395 		ix = atop(offset % dbtob(swp->sw_bsize));
396 #ifdef DEBUG
397 	if (swpagerdebug & SDB_ALLOCBLK)
398 		printf("%x blk %x+%x ", swp->sw_blocks, swb->swb_block, ix);
399 	if (swpagerdebug & (SDB_FOLLOW|SDB_ALLOCBLK))
400 		printf("-> %c\n",
401 		       "FT"[swb->swb_block && (swb->swb_mask & (1 << ix))]);
402 #endif
403 	if (swb->swb_block && (swb->swb_mask & (1 << ix)))
404 		return(TRUE);
405 	return(FALSE);
406 }
407 
408 /*
409  * Scaled down version of swap().
410  * Assumes that PAGE_SIZE < MAXPHYS; i.e. only one operation needed.
411  * BOGUS:  lower level IO routines expect a KVA so we have to map our
412  * provided physical page into the KVA to keep them happy.
413  */
414 swap_pager_io(swp, m, flags)
415 	register sw_pager_t swp;
416 	vm_page_t m;
417 	int flags;
418 {
419 	register struct buf *bp;
420 	register sw_blk_t swb;
421 	register int s;
422 	int ix;
423 	boolean_t rv;
424 	vm_offset_t kva, off;
425 	swp_clean_t spc;
426 
427 #ifdef DEBUG
428 	/* save panic time state */
429 	if ((swpagerdebug & SDB_ANOMPANIC) && panicstr)
430 		return;
431 	if (swpagerdebug & (SDB_FOLLOW|SDB_IO))
432 		printf("swpg_io(%x, %x, %x)\n", swp, m, flags);
433 #endif
434 
435 	/*
436 	 * For reads (pageins) and synchronous writes, we clean up
437 	 * all completed async pageouts and check to see if this
438 	 * page is currently being cleaned.  If it is, we just wait
439 	 * til the operation is done before continuing.
440 	 */
441 	if ((flags & B_ASYNC) == 0) {
442 		s = splbio();
443 		while (swap_pager_clean(m, flags&B_READ)) {
444 			swp->sw_flags |= SW_WANTED;
445 			assert_wait((int)swp);
446 			thread_block();
447 		}
448 		splx(s);
449 	}
450 	/*
451 	 * For async writes (pageouts), we cleanup completed pageouts so
452 	 * that all available resources are freed.  Also tells us if this
453 	 * page is already being cleaned.  If it is, or no resources
454 	 * are available, we try again later.
455 	 */
456 	else if (swap_pager_clean(m, B_WRITE) || queue_empty(&swap_pager_free))
457 		return(VM_PAGER_FAIL);
458 
459 	/*
460 	 * Determine swap block and allocate as necessary.
461 	 */
462 	off = m->offset + m->object->paging_offset;
463 	ix = off / dbtob(swp->sw_bsize);
464 	if (swp->sw_blocks == NULL || ix >= swp->sw_nblocks) {
465 #ifdef DEBUG
466 		if (swpagerdebug & SDB_FAIL)
467 			printf("swpg_io: bad offset %x+%x(%d) in %x\n",
468 			       m->offset, m->object->paging_offset,
469 			       ix, swp->sw_blocks);
470 #endif
471 		return(VM_PAGER_FAIL);
472 	}
473 	swb = &swp->sw_blocks[ix];
474 	off = off % dbtob(swp->sw_bsize);
475 	if (flags & B_READ) {
476 		if (swb->swb_block == 0 ||
477 		    (swb->swb_mask & (1 << atop(off))) == 0) {
478 #ifdef DEBUG
479 			if (swpagerdebug & (SDB_ALLOCBLK|SDB_FAIL))
480 				printf("swpg_io: %x bad read: blk %x+%x, mask %x, off %x+%x\n",
481 				       swp->sw_blocks,
482 				       swb->swb_block, atop(off),
483 				       swb->swb_mask,
484 				       m->offset, m->object->paging_offset);
485 #endif
486 			/* XXX: should we zero page here?? */
487 			return(VM_PAGER_FAIL);
488 		}
489 	} else if (swb->swb_block == 0) {
490 		swb->swb_block = rmalloc(swapmap, swp->sw_bsize);
491 		if (swb->swb_block == 0) {
492 #ifdef DEBUG
493 			if (swpagerdebug & SDB_FAIL)
494 				printf("swpg_io: rmalloc of %x failed\n",
495 				       swp->sw_bsize);
496 #endif
497 			return(VM_PAGER_FAIL);
498 		}
499 #ifdef DEBUG
500 		if (swpagerdebug & (SDB_FULL|SDB_ALLOCBLK))
501 			printf("swpg_io: %x alloc blk %x at ix %x\n",
502 			       swp->sw_blocks, swb->swb_block, ix);
503 #endif
504 	}
505 
506 	/*
507 	 * Allocate a kernel virtual address and initialize so that PTE
508 	 * is available for lower level IO drivers.
509 	 */
510 	kva = vm_pager_map_page(m);
511 
512 	/*
513 	 * Get a swap buffer header and perform the IO
514 	 */
515 	s = splbio();
516 	while (bswlist.av_forw == NULL) {
517 #ifdef DEBUG
518 		if (swpagerdebug & SDB_ANOM)
519 			printf("swpg_io: wait on swbuf for %x (%d)\n",
520 			       m, flags);
521 #endif
522 		bswlist.b_flags |= B_WANTED;
523 		sleep((caddr_t)&bswlist, PSWP+1);
524 	}
525 	bp = bswlist.av_forw;
526 	bswlist.av_forw = bp->av_forw;
527 	splx(s);
528 	bp->b_flags = B_BUSY | (flags & B_READ);
529 	bp->b_proc = &proc[0];	/* XXX (but without B_PHYS set this is ok) */
530 	bp->b_un.b_addr = (caddr_t)kva;
531 	bp->b_blkno = swb->swb_block + btodb(off);
532 	VHOLD(swapdev_vp);
533 	bp->b_vp = swapdev_vp;
534 	bp->b_dev = swapdev_vp->v_rdev;
535 	bp->b_bcount = PAGE_SIZE;
536 	if ((bp->b_flags & B_READ) == 0)
537 		swapdev_vp->v_numoutput++;
538 
539 	/*
540 	 * If this is an async write we set up additional buffer fields
541 	 * and place a "cleaning" entry on the inuse queue.
542 	 */
543 	if ((flags & (B_READ|B_ASYNC)) == B_ASYNC) {
544 #ifdef DEBUG
545 		if (queue_empty(&swap_pager_free))
546 			panic("swpg_io: lost spc");
547 #endif
548 		queue_remove_first(&swap_pager_free,
549 				   spc, swp_clean_t, spc_list);
550 #ifdef DEBUG
551 		if (spc->spc_flags != SPC_FREE)
552 			panic("swpg_io: bad free spc");
553 #endif
554 		spc->spc_flags = SPC_BUSY;
555 		spc->spc_bp = bp;
556 		spc->spc_swp = swp;
557 		spc->spc_kva = kva;
558 		spc->spc_m = m;
559 #ifdef DEBUG
560 		m->pagerowned = 1;
561 #endif
562 		bp->b_flags |= B_CALL;
563 		bp->b_iodone = swap_pager_iodone;
564 		s = splbio();
565 		swp->sw_poip++;
566 		queue_enter(&swap_pager_inuse, spc, swp_clean_t, spc_list);
567 
568 #ifdef DEBUG
569 		swap_pager_poip++;
570 		if (swpagerdebug & SDB_WRITE)
571 			printf("swpg_io: write: bp=%x swp=%x spc=%x poip=%d\n",
572 			       bp, swp, spc, swp->sw_poip);
573 		if ((swpagerdebug & SDB_ALLOCBLK) &&
574 		    (swb->swb_mask & (1 << atop(off))) == 0)
575 			printf("swpg_io: %x write blk %x+%x\n",
576 			       swp->sw_blocks, swb->swb_block, atop(off));
577 #endif
578 		swb->swb_mask |= (1 << atop(off));
579 		/*
580 		 * XXX: Block write faults til we are done.
581 		 */
582 		m->page_lock = VM_PROT_WRITE;
583 		m->unlock_request = VM_PROT_ALL;
584 		pmap_copy_on_write(VM_PAGE_TO_PHYS(m));
585 		splx(s);
586 	}
587 #ifdef DEBUG
588 	if (swpagerdebug & SDB_IO)
589 		printf("swpg_io: IO start: bp %x, db %x, va %x, pa %x\n",
590 		       bp, swb->swb_block+btodb(off), kva, VM_PAGE_TO_PHYS(m));
591 #endif
592 	VOP_STRATEGY(bp);
593 	if ((flags & (B_READ|B_ASYNC)) == B_ASYNC) {
594 #ifdef DEBUG
595 		if (swpagerdebug & SDB_IO)
596 			printf("swpg_io:  IO started: bp %x\n", bp);
597 #endif
598 		return(VM_PAGER_PEND);
599 	}
600 	s = splbio();
601 #ifdef DEBUG
602 	if (flags & B_READ)
603 		swap_pager_piip++;
604 	else
605 		swap_pager_poip++;
606 #endif
607 	while ((bp->b_flags & B_DONE) == 0) {
608 		assert_wait((int)bp);
609 		thread_block();
610 	}
611 #ifdef DEBUG
612 	if (flags & B_READ)
613 		--swap_pager_piip;
614 	else
615 		--swap_pager_poip;
616 #endif
617 	rv = (bp->b_flags & B_ERROR) ? VM_PAGER_FAIL : VM_PAGER_OK;
618 	bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_PAGET|B_UAREA|B_DIRTY);
619 	bp->av_forw = bswlist.av_forw;
620 	bswlist.av_forw = bp;
621 	if (bp->b_vp)
622 		brelvp(bp);
623 	if (bswlist.b_flags & B_WANTED) {
624 		bswlist.b_flags &= ~B_WANTED;
625 		thread_wakeup((int)&bswlist);
626 	}
627 	if ((flags & B_READ) == 0 && rv == VM_PAGER_OK) {
628 		m->clean = 1;
629 		pmap_clear_modify(VM_PAGE_TO_PHYS(m));
630 	}
631 	splx(s);
632 #ifdef DEBUG
633 	if (swpagerdebug & SDB_IO)
634 		printf("swpg_io:  IO done: bp %x, rv %d\n", bp, rv);
635 	if ((swpagerdebug & SDB_FAIL) && rv == VM_PAGER_FAIL)
636 		printf("swpg_io: IO error\n");
637 #endif
638 	vm_pager_unmap_page(kva);
639 	return(rv);
640 }
641 
642 boolean_t
643 swap_pager_clean(m, rw)
644 	vm_page_t m;
645 	int rw;
646 {
647 	register swp_clean_t spc, tspc;
648 	register int s;
649 
650 #ifdef DEBUG
651 	/* save panic time state */
652 	if ((swpagerdebug & SDB_ANOMPANIC) && panicstr)
653 		return;
654 	if (swpagerdebug & SDB_FOLLOW)
655 		printf("swpg_clean(%x, %d)\n", m, rw);
656 #endif
657 	tspc = SWP_CLEAN_NULL;
658 	for (;;) {
659 		/*
660 		 * Look up and removal from inuse list must be done
661 		 * at splbio() to avoid conflicts with swap_pager_iodone.
662 		 */
663 		s = splbio();
664 		spc = (swp_clean_t) queue_first(&swap_pager_inuse);
665 		while (!queue_end(&swap_pager_inuse, (queue_entry_t)spc)) {
666 			if ((spc->spc_flags & SPC_DONE) &&
667 			    swap_pager_finish(spc)) {
668 				queue_remove(&swap_pager_inuse, spc,
669 					     swp_clean_t, spc_list);
670 				break;
671 			}
672 			if (m && m == spc->spc_m) {
673 #ifdef DEBUG
674 				if (swpagerdebug & SDB_ANOM)
675 					printf("swpg_clean: %x on list, flags %x\n",
676 					       m, spc->spc_flags);
677 #endif
678 				tspc = spc;
679 			}
680 			spc = (swp_clean_t) queue_next(&spc->spc_list);
681 		}
682 
683 		/*
684 		 * No operations done, thats all we can do for now.
685 		 */
686 		if (queue_end(&swap_pager_inuse, (queue_entry_t)spc))
687 			break;
688 		splx(s);
689 
690 		/*
691 		 * The desired page was found to be busy earlier in
692 		 * the scan but has since completed.
693 		 */
694 		if (tspc && tspc == spc) {
695 #ifdef DEBUG
696 			if (swpagerdebug & SDB_ANOM)
697 				printf("swpg_clean: %x done while looking\n",
698 				       m);
699 #endif
700 			tspc = SWP_CLEAN_NULL;
701 		}
702 		spc->spc_flags = SPC_FREE;
703 		vm_pager_unmap_page(spc->spc_kva);
704 		queue_enter(&swap_pager_free, spc, swp_clean_t, spc_list);
705 #ifdef DEBUG
706 		if (swpagerdebug & SDB_WRITE)
707 			printf("swpg_clean: free spc %x\n", spc);
708 #endif
709 	}
710 	/*
711 	 * If we found that the desired page is already being cleaned
712 	 * mark it so that swap_pager_iodone() will not set the clean
713 	 * flag before the pageout daemon has another chance to clean it.
714 	 */
715 	if (tspc && rw == B_WRITE) {
716 #ifdef DEBUG
717 		if (swpagerdebug & SDB_ANOM)
718 			printf("swpg_clean: %x on clean list\n", tspc);
719 #endif
720 		tspc->spc_flags |= SPC_DIRTY;
721 	}
722 	splx(s);
723 
724 #ifdef DEBUG
725 	if (swpagerdebug & SDB_WRITE)
726 		printf("swpg_clean: return %d\n", tspc ? TRUE : FALSE);
727 	if ((swpagerdebug & SDB_ANOM) && tspc)
728 		printf("swpg_clean: %s of cleaning page %x\n",
729 		       rw == B_READ ? "get" : "put", m);
730 #endif
731 	return(tspc ? TRUE : FALSE);
732 }
733 
734 swap_pager_finish(spc)
735 	register swp_clean_t spc;
736 {
737 	vm_object_t object = spc->spc_m->object;
738 
739 	/*
740 	 * Mark the paging operation as done.
741 	 * (XXX) If we cannot get the lock, leave it til later.
742 	 * (XXX) Also we are assuming that an async write is a
743 	 *       pageout operation that has incremented the counter.
744 	 */
745 	if (!vm_object_lock_try(object))
746 		return(0);
747 
748 #ifdef DEBUG
749 	spc->spc_m->pagerowned = 0;
750 #endif
751 
752 	if (--object->paging_in_progress == 0)
753 		thread_wakeup((int) object);
754 
755 	/*
756 	 * XXX: this isn't even close to the right thing to do,
757 	 * introduces a variety of race conditions.
758 	 *
759 	 * If dirty, vm_pageout() has attempted to clean the page
760 	 * again.  In this case we do not do anything as we will
761 	 * see the page again shortly.  Otherwise, if no error mark
762 	 * as clean and inform the pmap system.  If error, mark as
763 	 * dirty so we will try again (XXX: could get stuck doing
764 	 * this, should give up after awhile).
765 	 */
766 	if ((spc->spc_flags & SPC_DIRTY) == 0) {
767 		if (spc->spc_flags & SPC_ERROR) {
768 			printf("swap_pager: clean of %x failed\n",
769 			       VM_PAGE_TO_PHYS(spc->spc_m));
770 			spc->spc_m->laundry = TRUE;
771 		} else {
772 			spc->spc_m->clean = TRUE;
773 			pmap_clear_modify(VM_PAGE_TO_PHYS(spc->spc_m));
774 		}
775 	}
776 	/*
777 	 * XXX: allow blocked write faults to continue
778 	 */
779 	spc->spc_m->page_lock = spc->spc_m->unlock_request = VM_PROT_NONE;
780 	PAGE_WAKEUP(spc->spc_m);
781 
782 	vm_object_unlock(object);
783 	return(1);
784 }
785 
786 swap_pager_iodone(bp)
787 	register struct buf *bp;
788 {
789 	register swp_clean_t spc;
790 	daddr_t blk;
791 	int s;
792 
793 #ifdef DEBUG
794 	/* save panic time state */
795 	if ((swpagerdebug & SDB_ANOMPANIC) && panicstr)
796 		return;
797 	if (swpagerdebug & SDB_FOLLOW)
798 		printf("swpg_iodone(%x)\n", bp);
799 #endif
800 	s = splbio();
801 	spc = (swp_clean_t) queue_first(&swap_pager_inuse);
802 	while (!queue_end(&swap_pager_inuse, (queue_entry_t)spc)) {
803 		if (spc->spc_bp == bp)
804 			break;
805 		spc = (swp_clean_t) queue_next(&spc->spc_list);
806 	}
807 #ifdef DEBUG
808 	if (queue_end(&swap_pager_inuse, (queue_entry_t)spc))
809 		panic("swpg_iodone: bp not found");
810 #endif
811 
812 	spc->spc_flags &= ~SPC_BUSY;
813 	spc->spc_flags |= SPC_DONE;
814 	if (bp->b_flags & B_ERROR)
815 		spc->spc_flags |= SPC_ERROR;
816 	spc->spc_bp = NULL;
817 	blk = bp->b_blkno;
818 
819 #ifdef DEBUG
820 	--swap_pager_poip;
821 	if (swpagerdebug & SDB_WRITE)
822 		printf("swpg_iodone: bp=%x swp=%x flags=%x spc=%x poip=%x\n",
823 		       bp, spc->spc_swp, spc->spc_swp->sw_flags,
824 		       spc, spc->spc_swp->sw_poip);
825 #endif
826 
827 	spc->spc_swp->sw_poip--;
828 	if (spc->spc_swp->sw_flags & SW_WANTED) {
829 		spc->spc_swp->sw_flags &= ~SW_WANTED;
830 		thread_wakeup((int)spc->spc_swp);
831 	}
832 
833 	bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_PAGET|B_UAREA|B_DIRTY);
834 	bp->av_forw = bswlist.av_forw;
835 	bswlist.av_forw = bp;
836 	if (bp->b_vp)
837 		brelvp(bp);
838 	if (bswlist.b_flags & B_WANTED) {
839 		bswlist.b_flags &= ~B_WANTED;
840 		thread_wakeup((int)&bswlist);
841 	}
842 #if 0
843 	/*
844 	 * XXX: this isn't even close to the right thing to do,
845 	 * introduces a variety of race conditions.
846 	 *
847 	 * If dirty, vm_pageout() has attempted to clean the page
848 	 * again.  In this case we do not do anything as we will
849 	 * see the page again shortly.  Otherwise, if no error mark
850 	 * as clean and inform the pmap system.  If error, mark as
851 	 * dirty so we will try again (XXX: could get stuck doing
852 	 * this, should give up after awhile).
853 	 */
854 	if ((spc->spc_flags & SPC_DIRTY) == 0) {
855 		if (spc->spc_flags & SPC_ERROR) {
856 			printf("swap_pager: clean of %x (block %x) failed\n",
857 			       VM_PAGE_TO_PHYS(spc->spc_m), blk);
858 			spc->spc_m->laundry = TRUE;
859 		} else {
860 			spc->spc_m->clean = TRUE;
861 			pmap_clear_modify(VM_PAGE_TO_PHYS(spc->spc_m));
862 		}
863 	}
864 	/*
865 	 * XXX: allow blocked write faults to continue
866 	 */
867 	spc->spc_m->page_lock = spc->spc_m->unlock_request = VM_PROT_NONE;
868 	PAGE_WAKEUP(spc->spc_m);
869 #endif
870 
871 	thread_wakeup((int) &vm_pages_needed);
872 	splx(s);
873 }
874 #endif
875