xref: /original-bsd/sys/vm/swap_pager.c (revision e59fb703)
1 /*
2  * Copyright (c) 1990 University of Utah.
3  * Copyright (c) 1991 The Regents of the University of California.
4  * All rights reserved.
5  *
6  * This code is derived from software contributed to Berkeley by
7  * the Systems Programming Group of the University of Utah Computer
8  * Science Department.
9  *
10  * %sccs.include.redist.c%
11  *
12  * from: Utah $Hdr: swap_pager.c 1.4 91/04/30$
13  *
14  *	@(#)swap_pager.c	7.5 (Berkeley) 07/25/91
15  */
16 
17 /*
18  * Quick hack to page to dedicated partition(s).
19  * TODO:
20  *	Add multiprocessor locks
21  *	Deal with async writes in a better fashion
22  */
23 
24 #include "swappager.h"
25 #if NSWAPPAGER > 0
26 
27 #include "param.h"
28 #include "proc.h"
29 #include "buf.h"
30 #include "map.h"
31 #include "systm.h"
32 #include "specdev.h"
33 #include "vnode.h"
34 #include "malloc.h"
35 #include "queue.h"
36 
37 #include "vm.h"
38 #include "vm_page.h"
39 #include "vm_pageout.h"
40 #include "swap_pager.h"
41 
42 #define NSWSIZES	16	/* size of swtab */
43 #define NPENDINGIO	64	/* max # of pending cleans */
44 #define MAXDADDRS	64	/* max # of disk addrs for fixed allocations */
45 
46 #ifdef DEBUG
47 int	swpagerdebug = 0x100;
48 #define	SDB_FOLLOW	0x001
49 #define SDB_INIT	0x002
50 #define SDB_ALLOC	0x004
51 #define SDB_IO		0x008
52 #define SDB_WRITE	0x010
53 #define SDB_FAIL	0x020
54 #define SDB_ALLOCBLK	0x040
55 #define SDB_FULL	0x080
56 #define SDB_ANOM	0x100
57 #define SDB_ANOMPANIC	0x200
58 #endif
59 
60 struct swpagerclean {
61 	queue_head_t		spc_list;
62 	int			spc_flags;
63 	struct buf		*spc_bp;
64 	sw_pager_t		spc_swp;
65 	vm_offset_t		spc_kva;
66 	vm_page_t		spc_m;
67 } swcleanlist[NPENDINGIO];
68 typedef	struct swpagerclean	*swp_clean_t;
69 
70 /* spc_flags values */
71 #define SPC_FREE	0x00
72 #define SPC_BUSY	0x01
73 #define SPC_DONE	0x02
74 #define SPC_ERROR	0x04
75 #define SPC_DIRTY	0x08
76 
77 struct swtab {
78 	vm_size_t st_osize;	/* size of object (bytes) */
79 	int	  st_bsize;	/* vs. size of swap block (DEV_BSIZE units) */
80 #ifdef DEBUG
81 	u_long	  st_inuse;	/* number in this range in use */
82 	u_long	  st_usecnt;	/* total used of this size */
83 #endif
84 } swtab[NSWSIZES+1];
85 
86 #ifdef DEBUG
87 int		swap_pager_pendingio;	/* max pending async "clean" ops */
88 int		swap_pager_poip;	/* pageouts in progress */
89 int		swap_pager_piip;	/* pageins in progress */
90 #endif
91 
92 queue_head_t	swap_pager_inuse;	/* list of pending page cleans */
93 queue_head_t	swap_pager_free;	/* list of free pager clean structs */
94 queue_head_t	swap_pager_list;	/* list of "named" anon regions */
95 
96 void
97 swap_pager_init()
98 {
99 	register swp_clean_t spc;
100 	register int i, bsize;
101 	extern int dmmin, dmmax;
102 	int maxbsize;
103 
104 #ifdef DEBUG
105 	if (swpagerdebug & (SDB_FOLLOW|SDB_INIT))
106 		printf("swpg_init()\n");
107 #endif
108 	dfltpagerops = &swappagerops;
109 	queue_init(&swap_pager_list);
110 
111 	/*
112 	 * Initialize clean lists
113 	 */
114 	queue_init(&swap_pager_inuse);
115 	queue_init(&swap_pager_free);
116 	for (i = 0, spc = swcleanlist; i < NPENDINGIO; i++, spc++) {
117 		queue_enter(&swap_pager_free, spc, swp_clean_t, spc_list);
118 		spc->spc_flags = SPC_FREE;
119 	}
120 
121 	/*
122 	 * Calculate the swap allocation constants.
123 	 */
124         if (dmmin == 0) {
125                 dmmin = DMMIN;
126 		if (dmmin < CLBYTES/DEV_BSIZE)
127 			dmmin = CLBYTES/DEV_BSIZE;
128 	}
129         if (dmmax == 0)
130                 dmmax = DMMAX;
131 
132 	/*
133 	 * Fill in our table of object size vs. allocation size
134 	 */
135 	bsize = btodb(PAGE_SIZE);
136 	if (bsize < dmmin)
137 		bsize = dmmin;
138 	maxbsize = btodb(sizeof(sw_bm_t) * NBBY * PAGE_SIZE);
139 	if (maxbsize > dmmax)
140 		maxbsize = dmmax;
141 	for (i = 0; i < NSWSIZES; i++) {
142 		swtab[i].st_osize = (vm_size_t) (MAXDADDRS * dbtob(bsize));
143 		swtab[i].st_bsize = bsize;
144 #ifdef DEBUG
145 		if (swpagerdebug & SDB_INIT)
146 			printf("swpg_init: ix %d, size %x, bsize %x\n",
147 			       i, swtab[i].st_osize, swtab[i].st_bsize);
148 #endif
149 		if (bsize >= maxbsize)
150 			break;
151 		bsize *= 2;
152 	}
153 	swtab[i].st_osize = 0;
154 	swtab[i].st_bsize = bsize;
155 }
156 
157 /*
158  * Allocate a pager structure and associated resources.
159  * Note that if we are called from the pageout daemon (handle == NULL)
160  * we should not wait for memory as it could resulting in deadlock.
161  */
162 vm_pager_t
163 swap_pager_alloc(handle, size, prot)
164 	caddr_t handle;
165 	register vm_size_t size;
166 	vm_prot_t prot;
167 {
168 	register vm_pager_t pager;
169 	register sw_pager_t swp;
170 	struct swtab *swt;
171 	int waitok;
172 
173 #ifdef DEBUG
174 	if (swpagerdebug & (SDB_FOLLOW|SDB_ALLOC))
175 		printf("swpg_alloc(%x, %x, %x)\n", handle, size, prot);
176 #endif
177 	/*
178 	 * If this is a "named" anonymous region, look it up and
179 	 * return the appropriate pager if it exists.
180 	 */
181 	if (handle) {
182 		pager = vm_pager_lookup(&swap_pager_list, handle);
183 		if (pager != NULL) {
184 			/*
185 			 * Use vm_object_lookup to gain a reference
186 			 * to the object and also to remove from the
187 			 * object cache.
188 			 */
189 			if (vm_object_lookup(pager) == NULL)
190 				panic("swap_pager_alloc: bad object");
191 			return(pager);
192 		}
193 	}
194 	/*
195 	 * Pager doesn't exist, allocate swap management resources
196 	 * and initialize.
197 	 */
198 	waitok = handle ? M_WAITOK : M_NOWAIT;
199 	pager = (vm_pager_t)malloc(sizeof *pager, M_VMPAGER, waitok);
200 	if (pager == NULL)
201 		return(NULL);
202 	swp = (sw_pager_t)malloc(sizeof *swp, M_VMPGDATA, waitok);
203 	if (swp == NULL) {
204 #ifdef DEBUG
205 		if (swpagerdebug & SDB_FAIL)
206 			printf("swpg_alloc: swpager malloc failed\n");
207 #endif
208 		free((caddr_t)pager, M_VMPAGER);
209 		return(NULL);
210 	}
211 	size = round_page(size);
212 	for (swt = swtab; swt->st_osize; swt++)
213 		if (size <= swt->st_osize)
214 			break;
215 #ifdef DEBUG
216 	swt->st_inuse++;
217 	swt->st_usecnt++;
218 #endif
219 	swp->sw_osize = size;
220 	swp->sw_bsize = swt->st_bsize;
221 	swp->sw_nblocks = (btodb(size) + swp->sw_bsize - 1) / swp->sw_bsize;
222 	swp->sw_blocks = (sw_blk_t)
223 		malloc(swp->sw_nblocks*sizeof(*swp->sw_blocks),
224 		       M_VMPGDATA, M_NOWAIT);
225 	if (swp->sw_blocks == NULL) {
226 		free((caddr_t)swp, M_VMPGDATA);
227 		free((caddr_t)pager, M_VMPAGER);
228 #ifdef DEBUG
229 		if (swpagerdebug & SDB_FAIL)
230 			printf("swpg_alloc: sw_blocks malloc failed\n");
231 		swt->st_inuse--;
232 		swt->st_usecnt--;
233 #endif
234 		return(FALSE);
235 	}
236 	bzero((caddr_t)swp->sw_blocks,
237 	      swp->sw_nblocks * sizeof(*swp->sw_blocks));
238 	swp->sw_poip = 0;
239 	if (handle) {
240 		vm_object_t object;
241 
242 		swp->sw_flags = SW_NAMED;
243 		queue_enter(&swap_pager_list, pager, vm_pager_t, pg_list);
244 		/*
245 		 * Consistant with other pagers: return with object
246 		 * referenced.  Can't do this with handle == NULL
247 		 * since it might be the pageout daemon calling.
248 		 */
249 		object = vm_object_allocate(size);
250 		vm_object_enter(object, pager);
251 		vm_object_setpager(object, pager, 0, FALSE);
252 	} else {
253 		swp->sw_flags = 0;
254 		queue_init(&pager->pg_list);
255 	}
256 	pager->pg_handle = handle;
257 	pager->pg_ops = &swappagerops;
258 	pager->pg_type = PG_SWAP;
259 	pager->pg_data = (caddr_t)swp;
260 
261 #ifdef DEBUG
262 	if (swpagerdebug & SDB_ALLOC)
263 		printf("swpg_alloc: pg_data %x, %x of %x at %x\n",
264 		       swp, swp->sw_nblocks, swp->sw_bsize, swp->sw_blocks);
265 #endif
266 	return(pager);
267 }
268 
269 void
270 swap_pager_dealloc(pager)
271 	vm_pager_t pager;
272 {
273 	register int i;
274 	register sw_blk_t bp;
275 	register sw_pager_t swp;
276 	struct swtab *swt;
277 	int s;
278 
279 #ifdef DEBUG
280 	/* save panic time state */
281 	if ((swpagerdebug & SDB_ANOMPANIC) && panicstr)
282 		return;
283 	if (swpagerdebug & (SDB_FOLLOW|SDB_ALLOC))
284 		printf("swpg_dealloc(%x)\n", pager);
285 #endif
286 	/*
287 	 * Remove from list right away so lookups will fail if we
288 	 * block for pageout completion.
289 	 */
290 	swp = (sw_pager_t) pager->pg_data;
291 	if (swp->sw_flags & SW_NAMED) {
292 		queue_remove(&swap_pager_list, pager, vm_pager_t, pg_list);
293 		swp->sw_flags &= ~SW_NAMED;
294 	}
295 #ifdef DEBUG
296 	for (swt = swtab; swt->st_osize; swt++)
297 		if (swp->sw_osize <= swt->st_osize)
298 			break;
299 	swt->st_inuse--;
300 #endif
301 
302 	/*
303 	 * Wait for all pageouts to finish and remove
304 	 * all entries from cleaning list.
305 	 */
306 	s = splbio();
307 	while (swp->sw_poip) {
308 		swp->sw_flags |= SW_WANTED;
309 		assert_wait((int)swp);
310 		thread_block();
311 	}
312 	splx(s);
313 	(void) swap_pager_clean(NULL, B_WRITE);
314 
315 	/*
316 	 * Free left over swap blocks
317 	 */
318 	for (i = 0, bp = swp->sw_blocks; i < swp->sw_nblocks; i++, bp++)
319 		if (bp->swb_block) {
320 #ifdef DEBUG
321 			if (swpagerdebug & (SDB_ALLOCBLK|SDB_FULL))
322 				printf("swpg_dealloc: blk %x\n",
323 				       bp->swb_block);
324 #endif
325 			rmfree(swapmap, swp->sw_bsize, bp->swb_block);
326 		}
327 	/*
328 	 * Free swap management resources
329 	 */
330 	free((caddr_t)swp->sw_blocks, M_VMPGDATA);
331 	free((caddr_t)swp, M_VMPGDATA);
332 	free((caddr_t)pager, M_VMPAGER);
333 }
334 
335 swap_pager_getpage(pager, m, sync)
336 	vm_pager_t pager;
337 	vm_page_t m;
338 	boolean_t sync;
339 {
340 #ifdef DEBUG
341 	if (swpagerdebug & SDB_FOLLOW)
342 		printf("swpg_getpage(%x, %x, %d)\n", pager, m, sync);
343 #endif
344 	return(swap_pager_io((sw_pager_t)pager->pg_data, m, B_READ));
345 }
346 
347 swap_pager_putpage(pager, m, sync)
348 	vm_pager_t pager;
349 	vm_page_t m;
350 	boolean_t sync;
351 {
352 	int flags;
353 
354 #ifdef DEBUG
355 	if (swpagerdebug & SDB_FOLLOW)
356 		printf("swpg_putpage(%x, %x, %d)\n", pager, m, sync);
357 #endif
358 	if (pager == NULL) {
359 		(void) swap_pager_clean(NULL, B_WRITE);
360 		return;
361 	}
362 	flags = B_WRITE;
363 	if (!sync)
364 		flags |= B_ASYNC;
365 	return(swap_pager_io((sw_pager_t)pager->pg_data, m, flags));
366 }
367 
368 boolean_t
369 swap_pager_haspage(pager, offset)
370 	vm_pager_t pager;
371 	vm_offset_t offset;
372 {
373 	register sw_pager_t swp;
374 	register sw_blk_t swb;
375 	int ix;
376 
377 #ifdef DEBUG
378 	if (swpagerdebug & (SDB_FOLLOW|SDB_ALLOCBLK))
379 		printf("swpg_haspage(%x, %x) ", pager, offset);
380 #endif
381 	swp = (sw_pager_t) pager->pg_data;
382 	ix = offset / dbtob(swp->sw_bsize);
383 	if (swp->sw_blocks == NULL || ix >= swp->sw_nblocks) {
384 #ifdef DEBUG
385 		if (swpagerdebug & (SDB_FAIL|SDB_FOLLOW|SDB_ALLOCBLK))
386 			printf("swpg_haspage: %x bad offset %x, ix %x\n",
387 			       swp->sw_blocks, offset, ix);
388 #endif
389 		return(FALSE);
390 	}
391 	swb = &swp->sw_blocks[ix];
392 	if (swb->swb_block)
393 		ix = atop(offset % dbtob(swp->sw_bsize));
394 #ifdef DEBUG
395 	if (swpagerdebug & SDB_ALLOCBLK)
396 		printf("%x blk %x+%x ", swp->sw_blocks, swb->swb_block, ix);
397 	if (swpagerdebug & (SDB_FOLLOW|SDB_ALLOCBLK))
398 		printf("-> %c\n",
399 		       "FT"[swb->swb_block && (swb->swb_mask & (1 << ix))]);
400 #endif
401 	if (swb->swb_block && (swb->swb_mask & (1 << ix)))
402 		return(TRUE);
403 	return(FALSE);
404 }
405 
406 /*
407  * Scaled down version of swap().
408  * Assumes that PAGE_SIZE < MAXPHYS; i.e. only one operation needed.
409  * BOGUS:  lower level IO routines expect a KVA so we have to map our
410  * provided physical page into the KVA to keep them happy.
411  */
412 swap_pager_io(swp, m, flags)
413 	register sw_pager_t swp;
414 	vm_page_t m;
415 	int flags;
416 {
417 	register struct buf *bp;
418 	register sw_blk_t swb;
419 	register int s;
420 	int ix;
421 	boolean_t rv;
422 	vm_offset_t kva, off;
423 	swp_clean_t spc;
424 
425 #ifdef DEBUG
426 	/* save panic time state */
427 	if ((swpagerdebug & SDB_ANOMPANIC) && panicstr)
428 		return;
429 	if (swpagerdebug & (SDB_FOLLOW|SDB_IO))
430 		printf("swpg_io(%x, %x, %x)\n", swp, m, flags);
431 #endif
432 
433 	/*
434 	 * For reads (pageins) and synchronous writes, we clean up
435 	 * all completed async pageouts.
436 	 */
437 	if ((flags & B_ASYNC) == 0) {
438 		s = splbio();
439 #ifdef DEBUG
440 		/*
441 		 * Check to see if this page is currently being cleaned.
442 		 * If it is, we just wait til the operation is done before
443 		 * continuing.
444 		 */
445 		while (swap_pager_clean(m, flags&B_READ)) {
446 			if (swpagerdebug & SDB_ANOM)
447 				printf("swap_pager_io: page %x cleaning\n", m);
448 
449 			swp->sw_flags |= SW_WANTED;
450 			assert_wait((int)swp);
451 			thread_block();
452 		}
453 #else
454 		(void) swap_pager_clean(m, flags&B_READ);
455 #endif
456 		splx(s);
457 	}
458 	/*
459 	 * For async writes (pageouts), we cleanup completed pageouts so
460 	 * that all available resources are freed.  Also tells us if this
461 	 * page is already being cleaned.  If it is, or no resources
462 	 * are available, we try again later.
463 	 */
464 	else if (swap_pager_clean(m, B_WRITE) ||
465 		 queue_empty(&swap_pager_free)) {
466 #ifdef DEBUG
467 		if ((swpagerdebug & SDB_ANOM) &&
468 		    !queue_empty(&swap_pager_free))
469 			printf("swap_pager_io: page %x already cleaning\n", m);
470 #endif
471 		return(VM_PAGER_FAIL);
472 	}
473 
474 	/*
475 	 * Determine swap block and allocate as necessary.
476 	 */
477 	off = m->offset + m->object->paging_offset;
478 	ix = off / dbtob(swp->sw_bsize);
479 	if (swp->sw_blocks == NULL || ix >= swp->sw_nblocks) {
480 #ifdef DEBUG
481 		if (swpagerdebug & SDB_FAIL)
482 			printf("swpg_io: bad offset %x+%x(%d) in %x\n",
483 			       m->offset, m->object->paging_offset,
484 			       ix, swp->sw_blocks);
485 #endif
486 		return(VM_PAGER_FAIL);
487 	}
488 	swb = &swp->sw_blocks[ix];
489 	off = off % dbtob(swp->sw_bsize);
490 	if (flags & B_READ) {
491 		if (swb->swb_block == 0 ||
492 		    (swb->swb_mask & (1 << atop(off))) == 0) {
493 #ifdef DEBUG
494 			if (swpagerdebug & (SDB_ALLOCBLK|SDB_FAIL))
495 				printf("swpg_io: %x bad read: blk %x+%x, mask %x, off %x+%x\n",
496 				       swp->sw_blocks,
497 				       swb->swb_block, atop(off),
498 				       swb->swb_mask,
499 				       m->offset, m->object->paging_offset);
500 #endif
501 			/* XXX: should we zero page here?? */
502 			return(VM_PAGER_FAIL);
503 		}
504 	} else if (swb->swb_block == 0) {
505 		swb->swb_block = rmalloc(swapmap, swp->sw_bsize);
506 		if (swb->swb_block == 0) {
507 #ifdef DEBUG
508 			if (swpagerdebug & SDB_FAIL)
509 				printf("swpg_io: rmalloc of %x failed\n",
510 				       swp->sw_bsize);
511 #endif
512 			return(VM_PAGER_FAIL);
513 		}
514 #ifdef DEBUG
515 		if (swpagerdebug & (SDB_FULL|SDB_ALLOCBLK))
516 			printf("swpg_io: %x alloc blk %x at ix %x\n",
517 			       swp->sw_blocks, swb->swb_block, ix);
518 #endif
519 	}
520 
521 	/*
522 	 * Allocate a kernel virtual address and initialize so that PTE
523 	 * is available for lower level IO drivers.
524 	 */
525 	kva = vm_pager_map_page(m);
526 
527 	/*
528 	 * Get a swap buffer header and perform the IO
529 	 */
530 	s = splbio();
531 	while (bswlist.av_forw == NULL) {
532 #ifdef DEBUG
533 		if (swpagerdebug & SDB_ANOM)
534 			printf("swap_pager_io: wait on swbuf for %x (%d)\n",
535 			       m, flags);
536 #endif
537 		bswlist.b_flags |= B_WANTED;
538 		sleep((caddr_t)&bswlist, PSWP+1);
539 	}
540 	bp = bswlist.av_forw;
541 	bswlist.av_forw = bp->av_forw;
542 	splx(s);
543 	bp->b_flags = B_BUSY | (flags & B_READ);
544 	bp->b_proc = &proc0;	/* XXX (but without B_PHYS set this is ok) */
545 	bp->b_un.b_addr = (caddr_t)kva;
546 	bp->b_blkno = swb->swb_block + btodb(off);
547 	VHOLD(swapdev_vp);
548 	bp->b_vp = swapdev_vp;
549 	if (swapdev_vp->v_type == VBLK)
550 		bp->b_dev = swapdev_vp->v_rdev;
551 	bp->b_bcount = PAGE_SIZE;
552 	if ((bp->b_flags & B_READ) == 0)
553 		swapdev_vp->v_numoutput++;
554 
555 	/*
556 	 * If this is an async write we set up additional buffer fields
557 	 * and place a "cleaning" entry on the inuse queue.
558 	 */
559 	if ((flags & (B_READ|B_ASYNC)) == B_ASYNC) {
560 #ifdef DEBUG
561 		if (queue_empty(&swap_pager_free))
562 			panic("swpg_io: lost spc");
563 #endif
564 		queue_remove_first(&swap_pager_free,
565 				   spc, swp_clean_t, spc_list);
566 #ifdef DEBUG
567 		if (spc->spc_flags != SPC_FREE)
568 			panic("swpg_io: bad free spc");
569 #endif
570 		spc->spc_flags = SPC_BUSY;
571 		spc->spc_bp = bp;
572 		spc->spc_swp = swp;
573 		spc->spc_kva = kva;
574 		spc->spc_m = m;
575 		bp->b_flags |= B_CALL;
576 		bp->b_iodone = swap_pager_iodone;
577 		s = splbio();
578 		swp->sw_poip++;
579 		queue_enter(&swap_pager_inuse, spc, swp_clean_t, spc_list);
580 
581 #ifdef DEBUG
582 		swap_pager_poip++;
583 		if (swpagerdebug & SDB_WRITE)
584 			printf("swpg_io: write: bp=%x swp=%x spc=%x poip=%d\n",
585 			       bp, swp, spc, swp->sw_poip);
586 		if ((swpagerdebug & SDB_ALLOCBLK) &&
587 		    (swb->swb_mask & (1 << atop(off))) == 0)
588 			printf("swpg_io: %x write blk %x+%x\n",
589 			       swp->sw_blocks, swb->swb_block, atop(off));
590 #endif
591 		swb->swb_mask |= (1 << atop(off));
592 		splx(s);
593 	}
594 #ifdef DEBUG
595 	if (swpagerdebug & SDB_IO)
596 		printf("swpg_io: IO start: bp %x, db %x, va %x, pa %x\n",
597 		       bp, swb->swb_block+btodb(off), kva, VM_PAGE_TO_PHYS(m));
598 #endif
599 	VOP_STRATEGY(bp);
600 	if ((flags & (B_READ|B_ASYNC)) == B_ASYNC) {
601 #ifdef DEBUG
602 		if (swpagerdebug & SDB_IO)
603 			printf("swpg_io:  IO started: bp %x\n", bp);
604 #endif
605 		return(VM_PAGER_PEND);
606 	}
607 	s = splbio();
608 #ifdef DEBUG
609 	if (flags & B_READ)
610 		swap_pager_piip++;
611 	else
612 		swap_pager_poip++;
613 #endif
614 	while ((bp->b_flags & B_DONE) == 0) {
615 		assert_wait((int)bp);
616 		thread_block();
617 	}
618 #ifdef DEBUG
619 	if (flags & B_READ)
620 		--swap_pager_piip;
621 	else
622 		--swap_pager_poip;
623 #endif
624 	rv = (bp->b_flags & B_ERROR) ? VM_PAGER_FAIL : VM_PAGER_OK;
625 	bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_PAGET|B_UAREA|B_DIRTY);
626 	bp->av_forw = bswlist.av_forw;
627 	bswlist.av_forw = bp;
628 	if (bp->b_vp)
629 		brelvp(bp);
630 	if (bswlist.b_flags & B_WANTED) {
631 		bswlist.b_flags &= ~B_WANTED;
632 		thread_wakeup((int)&bswlist);
633 	}
634 	if ((flags & B_READ) == 0 && rv == VM_PAGER_OK) {
635 		m->clean = TRUE;
636 		pmap_clear_modify(VM_PAGE_TO_PHYS(m));
637 	}
638 	splx(s);
639 #ifdef DEBUG
640 	if (swpagerdebug & SDB_IO)
641 		printf("swpg_io:  IO done: bp %x, rv %d\n", bp, rv);
642 	if ((swpagerdebug & SDB_FAIL) && rv == VM_PAGER_FAIL)
643 		printf("swpg_io: IO error\n");
644 #endif
645 	vm_pager_unmap_page(kva);
646 	return(rv);
647 }
648 
649 boolean_t
650 swap_pager_clean(m, rw)
651 	vm_page_t m;
652 	int rw;
653 {
654 	register swp_clean_t spc, tspc;
655 	register int s;
656 
657 #ifdef DEBUG
658 	/* save panic time state */
659 	if ((swpagerdebug & SDB_ANOMPANIC) && panicstr)
660 		return;
661 	if (swpagerdebug & SDB_FOLLOW)
662 		printf("swpg_clean(%x, %d)\n", m, rw);
663 #endif
664 	tspc = NULL;
665 	for (;;) {
666 		/*
667 		 * Look up and removal from inuse list must be done
668 		 * at splbio() to avoid conflicts with swap_pager_iodone.
669 		 */
670 		s = splbio();
671 		spc = (swp_clean_t) queue_first(&swap_pager_inuse);
672 		while (!queue_end(&swap_pager_inuse, (queue_entry_t)spc)) {
673 			if ((spc->spc_flags & SPC_DONE) &&
674 			    swap_pager_finish(spc)) {
675 				queue_remove(&swap_pager_inuse, spc,
676 					     swp_clean_t, spc_list);
677 				break;
678 			}
679 			if (m && m == spc->spc_m) {
680 #ifdef DEBUG
681 				if (swpagerdebug & SDB_ANOM)
682 					printf("swap_pager_clean: page %x on list, flags %x\n",
683 					       m, spc->spc_flags);
684 #endif
685 				tspc = spc;
686 			}
687 			spc = (swp_clean_t) queue_next(&spc->spc_list);
688 		}
689 
690 		/*
691 		 * No operations done, thats all we can do for now.
692 		 */
693 		if (queue_end(&swap_pager_inuse, (queue_entry_t)spc))
694 			break;
695 		splx(s);
696 
697 		/*
698 		 * The desired page was found to be busy earlier in
699 		 * the scan but has since completed.
700 		 */
701 		if (tspc && tspc == spc) {
702 #ifdef DEBUG
703 			if (swpagerdebug & SDB_ANOM)
704 				printf("swap_pager_clean: page %x done while looking\n",
705 				       m);
706 #endif
707 			tspc = NULL;
708 		}
709 		spc->spc_flags = SPC_FREE;
710 		vm_pager_unmap_page(spc->spc_kva);
711 		queue_enter(&swap_pager_free, spc, swp_clean_t, spc_list);
712 #ifdef DEBUG
713 		if (swpagerdebug & SDB_WRITE)
714 			printf("swpg_clean: free spc %x\n", spc);
715 #endif
716 	}
717 #ifdef DEBUG
718 	/*
719 	 * If we found that the desired page is already being cleaned
720 	 * mark it so that swap_pager_iodone() will not set the clean
721 	 * flag before the pageout daemon has another chance to clean it.
722 	 */
723 	if (tspc && rw == B_WRITE) {
724 		if (swpagerdebug & SDB_ANOM)
725 			printf("swap_pager_clean: page %x on clean list\n",
726 			       tspc);
727 		tspc->spc_flags |= SPC_DIRTY;
728 	}
729 #endif
730 	splx(s);
731 
732 #ifdef DEBUG
733 	if (swpagerdebug & SDB_WRITE)
734 		printf("swpg_clean: return %d\n", tspc ? TRUE : FALSE);
735 	if ((swpagerdebug & SDB_ANOM) && tspc)
736 		printf("swpg_clean: %s of cleaning page %x\n",
737 		       rw == B_READ ? "get" : "put", m);
738 #endif
739 	return(tspc ? TRUE : FALSE);
740 }
741 
742 swap_pager_finish(spc)
743 	register swp_clean_t spc;
744 {
745 	vm_object_t object = spc->spc_m->object;
746 
747 	/*
748 	 * Mark the paging operation as done.
749 	 * (XXX) If we cannot get the lock, leave it til later.
750 	 * (XXX) Also we are assuming that an async write is a
751 	 *       pageout operation that has incremented the counter.
752 	 */
753 	if (!vm_object_lock_try(object))
754 		return(0);
755 
756 	if (--object->paging_in_progress == 0)
757 		thread_wakeup((int) object);
758 
759 #ifdef DEBUG
760 	/*
761 	 * XXX: this isn't even close to the right thing to do,
762 	 * introduces a variety of race conditions.
763 	 *
764 	 * If dirty, vm_pageout() has attempted to clean the page
765 	 * again.  In this case we do not do anything as we will
766 	 * see the page again shortly.
767 	 */
768 	if (spc->spc_flags & SPC_DIRTY) {
769 		if (swpagerdebug & SDB_ANOM)
770 			printf("swap_pager_finish: page %x dirty again\n",
771 			       spc->spc_m);
772 		spc->spc_m->busy = FALSE;
773 		PAGE_WAKEUP(spc->spc_m);
774 		vm_object_unlock(object);
775 		return(1);
776 	}
777 #endif
778 	/*
779 	 * If no error mark as clean and inform the pmap system.
780 	 * If error, mark as dirty so we will try again.
781 	 * (XXX could get stuck doing this, should give up after awhile)
782 	 */
783 	if (spc->spc_flags & SPC_ERROR) {
784 		printf("swap_pager_finish: clean of page %x failed\n",
785 		       VM_PAGE_TO_PHYS(spc->spc_m));
786 		spc->spc_m->laundry = TRUE;
787 	} else {
788 		spc->spc_m->clean = TRUE;
789 		pmap_clear_modify(VM_PAGE_TO_PHYS(spc->spc_m));
790 	}
791 	spc->spc_m->busy = FALSE;
792 	PAGE_WAKEUP(spc->spc_m);
793 
794 	vm_object_unlock(object);
795 	return(1);
796 }
797 
798 swap_pager_iodone(bp)
799 	register struct buf *bp;
800 {
801 	register swp_clean_t spc;
802 	daddr_t blk;
803 	int s;
804 
805 #ifdef DEBUG
806 	/* save panic time state */
807 	if ((swpagerdebug & SDB_ANOMPANIC) && panicstr)
808 		return;
809 	if (swpagerdebug & SDB_FOLLOW)
810 		printf("swpg_iodone(%x)\n", bp);
811 #endif
812 	s = splbio();
813 	spc = (swp_clean_t) queue_first(&swap_pager_inuse);
814 	while (!queue_end(&swap_pager_inuse, (queue_entry_t)spc)) {
815 		if (spc->spc_bp == bp)
816 			break;
817 		spc = (swp_clean_t) queue_next(&spc->spc_list);
818 	}
819 #ifdef DEBUG
820 	if (queue_end(&swap_pager_inuse, (queue_entry_t)spc))
821 		panic("swap_pager_iodone: bp not found");
822 #endif
823 
824 	spc->spc_flags &= ~SPC_BUSY;
825 	spc->spc_flags |= SPC_DONE;
826 	if (bp->b_flags & B_ERROR)
827 		spc->spc_flags |= SPC_ERROR;
828 	spc->spc_bp = NULL;
829 	blk = bp->b_blkno;
830 
831 #ifdef DEBUG
832 	--swap_pager_poip;
833 	if (swpagerdebug & SDB_WRITE)
834 		printf("swpg_iodone: bp=%x swp=%x flags=%x spc=%x poip=%x\n",
835 		       bp, spc->spc_swp, spc->spc_swp->sw_flags,
836 		       spc, spc->spc_swp->sw_poip);
837 #endif
838 
839 	spc->spc_swp->sw_poip--;
840 	if (spc->spc_swp->sw_flags & SW_WANTED) {
841 		spc->spc_swp->sw_flags &= ~SW_WANTED;
842 		thread_wakeup((int)spc->spc_swp);
843 	}
844 
845 	bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_PAGET|B_UAREA|B_DIRTY);
846 	bp->av_forw = bswlist.av_forw;
847 	bswlist.av_forw = bp;
848 	if (bp->b_vp)
849 		brelvp(bp);
850 	if (bswlist.b_flags & B_WANTED) {
851 		bswlist.b_flags &= ~B_WANTED;
852 		thread_wakeup((int)&bswlist);
853 	}
854 	thread_wakeup((int) &vm_pages_needed);
855 	splx(s);
856 }
857 #endif
858