xref: /original-bsd/sys/vm/swap_pager.c (revision 3705696b)
1 /*
2  * Copyright (c) 1990 University of Utah.
3  * Copyright (c) 1991, 1993
4  *	The Regents of the University of California.  All rights reserved.
5  *
6  * This code is derived from software contributed to Berkeley by
7  * the Systems Programming Group of the University of Utah Computer
8  * Science Department.
9  *
10  * %sccs.include.redist.c%
11  *
12  * from: Utah $Hdr: swap_pager.c 1.4 91/04/30$
13  *
14  *	@(#)swap_pager.c	8.1 (Berkeley) 06/11/93
15  */
16 
17 /*
18  * Quick hack to page to dedicated partition(s).
19  * TODO:
20  *	Add multiprocessor locks
21  *	Deal with async writes in a better fashion
22  */
23 
24 #include <sys/param.h>
25 #include <sys/systm.h>
26 #include <sys/proc.h>
27 #include <sys/buf.h>
28 #include <sys/map.h>
29 #include <sys/vnode.h>
30 #include <sys/malloc.h>
31 
32 #include <miscfs/specfs/specdev.h>
33 
34 #include <vm/vm.h>
35 #include <vm/vm_page.h>
36 #include <vm/vm_pageout.h>
37 #include <vm/swap_pager.h>
38 
39 #define NSWSIZES	16	/* size of swtab */
40 #define NPENDINGIO	64	/* max # of pending cleans */
41 #define MAXDADDRS	64	/* max # of disk addrs for fixed allocations */
42 
43 #ifdef DEBUG
44 int	swpagerdebug = 0x100;
45 #define	SDB_FOLLOW	0x001
46 #define SDB_INIT	0x002
47 #define SDB_ALLOC	0x004
48 #define SDB_IO		0x008
49 #define SDB_WRITE	0x010
50 #define SDB_FAIL	0x020
51 #define SDB_ALLOCBLK	0x040
52 #define SDB_FULL	0x080
53 #define SDB_ANOM	0x100
54 #define SDB_ANOMPANIC	0x200
55 #endif
56 
57 struct swpagerclean {
58 	queue_head_t		spc_list;
59 	int			spc_flags;
60 	struct buf		*spc_bp;
61 	sw_pager_t		spc_swp;
62 	vm_offset_t		spc_kva;
63 	vm_page_t		spc_m;
64 } swcleanlist[NPENDINGIO];
65 typedef struct swpagerclean *swp_clean_t;
66 
67 
68 /* spc_flags values */
69 #define SPC_FREE	0x00
70 #define SPC_BUSY	0x01
71 #define SPC_DONE	0x02
72 #define SPC_ERROR	0x04
73 #define SPC_DIRTY	0x08
74 
75 struct swtab {
76 	vm_size_t st_osize;	/* size of object (bytes) */
77 	int	  st_bsize;	/* vs. size of swap block (DEV_BSIZE units) */
78 #ifdef DEBUG
79 	u_long	  st_inuse;	/* number in this range in use */
80 	u_long	  st_usecnt;	/* total used of this size */
81 #endif
82 } swtab[NSWSIZES+1];
83 
84 #ifdef DEBUG
85 int		swap_pager_pendingio;	/* max pending async "clean" ops */
86 int		swap_pager_poip;	/* pageouts in progress */
87 int		swap_pager_piip;	/* pageins in progress */
88 #endif
89 
90 queue_head_t	swap_pager_inuse;	/* list of pending page cleans */
91 queue_head_t	swap_pager_free;	/* list of free pager clean structs */
92 queue_head_t	swap_pager_list;	/* list of "named" anon regions */
93 
94 static int		swap_pager_finish __P((swp_clean_t));
95 static void 		swap_pager_init __P((void));
96 static vm_pager_t	swap_pager_alloc __P((caddr_t, vm_size_t, vm_prot_t));
97 static boolean_t	swap_pager_clean __P((vm_page_t, int));
98 static void		swap_pager_dealloc __P((vm_pager_t));
99 static int		swap_pager_getpage
100 			    __P((vm_pager_t, vm_page_t, boolean_t));
101 static boolean_t	swap_pager_haspage __P((vm_pager_t, vm_offset_t));
102 static int		swap_pager_io __P((sw_pager_t, vm_page_t, int));
103 static void		swap_pager_iodone __P((struct buf *));
104 static int		swap_pager_putpage
105 			    __P((vm_pager_t, vm_page_t, boolean_t));
106 
107 struct pagerops swappagerops = {
108 	swap_pager_init,
109 	swap_pager_alloc,
110 	swap_pager_dealloc,
111 	swap_pager_getpage,
112 	swap_pager_putpage,
113 	swap_pager_haspage
114 };
115 
116 static void
117 swap_pager_init()
118 {
119 	register swp_clean_t spc;
120 	register int i, bsize;
121 	extern int dmmin, dmmax;
122 	int maxbsize;
123 
124 #ifdef DEBUG
125 	if (swpagerdebug & (SDB_FOLLOW|SDB_INIT))
126 		printf("swpg_init()\n");
127 #endif
128 	dfltpagerops = &swappagerops;
129 	queue_init(&swap_pager_list);
130 
131 	/*
132 	 * Initialize clean lists
133 	 */
134 	queue_init(&swap_pager_inuse);
135 	queue_init(&swap_pager_free);
136 	for (i = 0, spc = swcleanlist; i < NPENDINGIO; i++, spc++) {
137 		queue_enter(&swap_pager_free, spc, swp_clean_t, spc_list);
138 		spc->spc_flags = SPC_FREE;
139 	}
140 
141 	/*
142 	 * Calculate the swap allocation constants.
143 	 */
144         if (dmmin == 0) {
145                 dmmin = DMMIN;
146 		if (dmmin < CLBYTES/DEV_BSIZE)
147 			dmmin = CLBYTES/DEV_BSIZE;
148 	}
149         if (dmmax == 0)
150                 dmmax = DMMAX;
151 
152 	/*
153 	 * Fill in our table of object size vs. allocation size
154 	 */
155 	bsize = btodb(PAGE_SIZE);
156 	if (bsize < dmmin)
157 		bsize = dmmin;
158 	maxbsize = btodb(sizeof(sw_bm_t) * NBBY * PAGE_SIZE);
159 	if (maxbsize > dmmax)
160 		maxbsize = dmmax;
161 	for (i = 0; i < NSWSIZES; i++) {
162 		swtab[i].st_osize = (vm_size_t) (MAXDADDRS * dbtob(bsize));
163 		swtab[i].st_bsize = bsize;
164 #ifdef DEBUG
165 		if (swpagerdebug & SDB_INIT)
166 			printf("swpg_init: ix %d, size %x, bsize %x\n",
167 			       i, swtab[i].st_osize, swtab[i].st_bsize);
168 #endif
169 		if (bsize >= maxbsize)
170 			break;
171 		bsize *= 2;
172 	}
173 	swtab[i].st_osize = 0;
174 	swtab[i].st_bsize = bsize;
175 }
176 
177 /*
178  * Allocate a pager structure and associated resources.
179  * Note that if we are called from the pageout daemon (handle == NULL)
180  * we should not wait for memory as it could resulting in deadlock.
181  */
182 static vm_pager_t
183 swap_pager_alloc(handle, size, prot)
184 	caddr_t handle;
185 	register vm_size_t size;
186 	vm_prot_t prot;
187 {
188 	register vm_pager_t pager;
189 	register sw_pager_t swp;
190 	struct swtab *swt;
191 	int waitok;
192 
193 #ifdef DEBUG
194 	if (swpagerdebug & (SDB_FOLLOW|SDB_ALLOC))
195 		printf("swpg_alloc(%x, %x, %x)\n", handle, size, prot);
196 #endif
197 	/*
198 	 * If this is a "named" anonymous region, look it up and
199 	 * return the appropriate pager if it exists.
200 	 */
201 	if (handle) {
202 		pager = vm_pager_lookup(&swap_pager_list, handle);
203 		if (pager != NULL) {
204 			/*
205 			 * Use vm_object_lookup to gain a reference
206 			 * to the object and also to remove from the
207 			 * object cache.
208 			 */
209 			if (vm_object_lookup(pager) == NULL)
210 				panic("swap_pager_alloc: bad object");
211 			return(pager);
212 		}
213 	}
214 	/*
215 	 * Pager doesn't exist, allocate swap management resources
216 	 * and initialize.
217 	 */
218 	waitok = handle ? M_WAITOK : M_NOWAIT;
219 	pager = (vm_pager_t)malloc(sizeof *pager, M_VMPAGER, waitok);
220 	if (pager == NULL)
221 		return(NULL);
222 	swp = (sw_pager_t)malloc(sizeof *swp, M_VMPGDATA, waitok);
223 	if (swp == NULL) {
224 #ifdef DEBUG
225 		if (swpagerdebug & SDB_FAIL)
226 			printf("swpg_alloc: swpager malloc failed\n");
227 #endif
228 		free((caddr_t)pager, M_VMPAGER);
229 		return(NULL);
230 	}
231 	size = round_page(size);
232 	for (swt = swtab; swt->st_osize; swt++)
233 		if (size <= swt->st_osize)
234 			break;
235 #ifdef DEBUG
236 	swt->st_inuse++;
237 	swt->st_usecnt++;
238 #endif
239 	swp->sw_osize = size;
240 	swp->sw_bsize = swt->st_bsize;
241 	swp->sw_nblocks = (btodb(size) + swp->sw_bsize - 1) / swp->sw_bsize;
242 	swp->sw_blocks = (sw_blk_t)
243 		malloc(swp->sw_nblocks*sizeof(*swp->sw_blocks),
244 		       M_VMPGDATA, M_NOWAIT);
245 	if (swp->sw_blocks == NULL) {
246 		free((caddr_t)swp, M_VMPGDATA);
247 		free((caddr_t)pager, M_VMPAGER);
248 #ifdef DEBUG
249 		if (swpagerdebug & SDB_FAIL)
250 			printf("swpg_alloc: sw_blocks malloc failed\n");
251 		swt->st_inuse--;
252 		swt->st_usecnt--;
253 #endif
254 		return(FALSE);
255 	}
256 	bzero((caddr_t)swp->sw_blocks,
257 	      swp->sw_nblocks * sizeof(*swp->sw_blocks));
258 	swp->sw_poip = 0;
259 	if (handle) {
260 		vm_object_t object;
261 
262 		swp->sw_flags = SW_NAMED;
263 		queue_enter(&swap_pager_list, pager, vm_pager_t, pg_list);
264 		/*
265 		 * Consistant with other pagers: return with object
266 		 * referenced.  Can't do this with handle == NULL
267 		 * since it might be the pageout daemon calling.
268 		 */
269 		object = vm_object_allocate(size);
270 		vm_object_enter(object, pager);
271 		vm_object_setpager(object, pager, 0, FALSE);
272 	} else {
273 		swp->sw_flags = 0;
274 		queue_init(&pager->pg_list);
275 	}
276 	pager->pg_handle = handle;
277 	pager->pg_ops = &swappagerops;
278 	pager->pg_type = PG_SWAP;
279 	pager->pg_data = (caddr_t)swp;
280 
281 #ifdef DEBUG
282 	if (swpagerdebug & SDB_ALLOC)
283 		printf("swpg_alloc: pg_data %x, %x of %x at %x\n",
284 		       swp, swp->sw_nblocks, swp->sw_bsize, swp->sw_blocks);
285 #endif
286 	return(pager);
287 }
288 
289 static void
290 swap_pager_dealloc(pager)
291 	vm_pager_t pager;
292 {
293 	register int i;
294 	register sw_blk_t bp;
295 	register sw_pager_t swp;
296 	struct swtab *swt;
297 	int s;
298 
299 #ifdef DEBUG
300 	/* save panic time state */
301 	if ((swpagerdebug & SDB_ANOMPANIC) && panicstr)
302 		return;
303 	if (swpagerdebug & (SDB_FOLLOW|SDB_ALLOC))
304 		printf("swpg_dealloc(%x)\n", pager);
305 #endif
306 	/*
307 	 * Remove from list right away so lookups will fail if we
308 	 * block for pageout completion.
309 	 */
310 	swp = (sw_pager_t) pager->pg_data;
311 	if (swp->sw_flags & SW_NAMED) {
312 		queue_remove(&swap_pager_list, pager, vm_pager_t, pg_list);
313 		swp->sw_flags &= ~SW_NAMED;
314 	}
315 #ifdef DEBUG
316 	for (swt = swtab; swt->st_osize; swt++)
317 		if (swp->sw_osize <= swt->st_osize)
318 			break;
319 	swt->st_inuse--;
320 #endif
321 
322 	/*
323 	 * Wait for all pageouts to finish and remove
324 	 * all entries from cleaning list.
325 	 */
326 	s = splbio();
327 	while (swp->sw_poip) {
328 		swp->sw_flags |= SW_WANTED;
329 		assert_wait((int)swp, 0);
330 		thread_block();
331 	}
332 	splx(s);
333 	(void) swap_pager_clean(NULL, B_WRITE);
334 
335 	/*
336 	 * Free left over swap blocks
337 	 */
338 	for (i = 0, bp = swp->sw_blocks; i < swp->sw_nblocks; i++, bp++)
339 		if (bp->swb_block) {
340 #ifdef DEBUG
341 			if (swpagerdebug & (SDB_ALLOCBLK|SDB_FULL))
342 				printf("swpg_dealloc: blk %x\n",
343 				       bp->swb_block);
344 #endif
345 			rmfree(swapmap, swp->sw_bsize, bp->swb_block);
346 		}
347 	/*
348 	 * Free swap management resources
349 	 */
350 	free((caddr_t)swp->sw_blocks, M_VMPGDATA);
351 	free((caddr_t)swp, M_VMPGDATA);
352 	free((caddr_t)pager, M_VMPAGER);
353 }
354 
355 static int
356 swap_pager_getpage(pager, m, sync)
357 	vm_pager_t pager;
358 	vm_page_t m;
359 	boolean_t sync;
360 {
361 #ifdef DEBUG
362 	if (swpagerdebug & SDB_FOLLOW)
363 		printf("swpg_getpage(%x, %x, %d)\n", pager, m, sync);
364 #endif
365 	return(swap_pager_io((sw_pager_t)pager->pg_data, m, B_READ));
366 }
367 
368 static int
369 swap_pager_putpage(pager, m, sync)
370 	vm_pager_t pager;
371 	vm_page_t m;
372 	boolean_t sync;
373 {
374 	int flags;
375 
376 #ifdef DEBUG
377 	if (swpagerdebug & SDB_FOLLOW)
378 		printf("swpg_putpage(%x, %x, %d)\n", pager, m, sync);
379 #endif
380 	if (pager == NULL) {
381 		(void) swap_pager_clean(NULL, B_WRITE);
382 		return (VM_PAGER_OK);		/* ??? */
383 	}
384 	flags = B_WRITE;
385 	if (!sync)
386 		flags |= B_ASYNC;
387 	return(swap_pager_io((sw_pager_t)pager->pg_data, m, flags));
388 }
389 
390 static boolean_t
391 swap_pager_haspage(pager, offset)
392 	vm_pager_t pager;
393 	vm_offset_t offset;
394 {
395 	register sw_pager_t swp;
396 	register sw_blk_t swb;
397 	int ix;
398 
399 #ifdef DEBUG
400 	if (swpagerdebug & (SDB_FOLLOW|SDB_ALLOCBLK))
401 		printf("swpg_haspage(%x, %x) ", pager, offset);
402 #endif
403 	swp = (sw_pager_t) pager->pg_data;
404 	ix = offset / dbtob(swp->sw_bsize);
405 	if (swp->sw_blocks == NULL || ix >= swp->sw_nblocks) {
406 #ifdef DEBUG
407 		if (swpagerdebug & (SDB_FAIL|SDB_FOLLOW|SDB_ALLOCBLK))
408 			printf("swpg_haspage: %x bad offset %x, ix %x\n",
409 			       swp->sw_blocks, offset, ix);
410 #endif
411 		return(FALSE);
412 	}
413 	swb = &swp->sw_blocks[ix];
414 	if (swb->swb_block)
415 		ix = atop(offset % dbtob(swp->sw_bsize));
416 #ifdef DEBUG
417 	if (swpagerdebug & SDB_ALLOCBLK)
418 		printf("%x blk %x+%x ", swp->sw_blocks, swb->swb_block, ix);
419 	if (swpagerdebug & (SDB_FOLLOW|SDB_ALLOCBLK))
420 		printf("-> %c\n",
421 		       "FT"[swb->swb_block && (swb->swb_mask & (1 << ix))]);
422 #endif
423 	if (swb->swb_block && (swb->swb_mask & (1 << ix)))
424 		return(TRUE);
425 	return(FALSE);
426 }
427 
428 /*
429  * Scaled down version of swap().
430  * Assumes that PAGE_SIZE < MAXPHYS; i.e. only one operation needed.
431  * BOGUS:  lower level IO routines expect a KVA so we have to map our
432  * provided physical page into the KVA to keep them happy.
433  */
434 static int
435 swap_pager_io(swp, m, flags)
436 	register sw_pager_t swp;
437 	vm_page_t m;
438 	int flags;
439 {
440 	register struct buf *bp;
441 	register sw_blk_t swb;
442 	register int s;
443 	int ix;
444 	boolean_t rv;
445 	vm_offset_t kva, off;
446 	swp_clean_t spc;
447 
448 #ifdef DEBUG
449 	/* save panic time state */
450 	if ((swpagerdebug & SDB_ANOMPANIC) && panicstr)
451 		return (VM_PAGER_FAIL);		/* XXX: correct return? */
452 	if (swpagerdebug & (SDB_FOLLOW|SDB_IO))
453 		printf("swpg_io(%x, %x, %x)\n", swp, m, flags);
454 #endif
455 
456 	/*
457 	 * For reads (pageins) and synchronous writes, we clean up
458 	 * all completed async pageouts.
459 	 */
460 	if ((flags & B_ASYNC) == 0) {
461 		s = splbio();
462 #ifdef DEBUG
463 		/*
464 		 * Check to see if this page is currently being cleaned.
465 		 * If it is, we just wait til the operation is done before
466 		 * continuing.
467 		 */
468 		while (swap_pager_clean(m, flags&B_READ)) {
469 			if (swpagerdebug & SDB_ANOM)
470 				printf("swap_pager_io: page %x cleaning\n", m);
471 
472 			swp->sw_flags |= SW_WANTED;
473 			assert_wait((int)swp, 0);
474 			thread_block();
475 		}
476 #else
477 		(void) swap_pager_clean(m, flags&B_READ);
478 #endif
479 		splx(s);
480 	}
481 	/*
482 	 * For async writes (pageouts), we cleanup completed pageouts so
483 	 * that all available resources are freed.  Also tells us if this
484 	 * page is already being cleaned.  If it is, or no resources
485 	 * are available, we try again later.
486 	 */
487 	else if (swap_pager_clean(m, B_WRITE) ||
488 		 queue_empty(&swap_pager_free)) {
489 #ifdef DEBUG
490 		if ((swpagerdebug & SDB_ANOM) &&
491 		    !queue_empty(&swap_pager_free))
492 			printf("swap_pager_io: page %x already cleaning\n", m);
493 #endif
494 		return(VM_PAGER_FAIL);
495 	}
496 
497 	/*
498 	 * Determine swap block and allocate as necessary.
499 	 */
500 	off = m->offset + m->object->paging_offset;
501 	ix = off / dbtob(swp->sw_bsize);
502 	if (swp->sw_blocks == NULL || ix >= swp->sw_nblocks) {
503 #ifdef DEBUG
504 		if (swpagerdebug & SDB_FAIL)
505 			printf("swpg_io: bad offset %x+%x(%d) in %x\n",
506 			       m->offset, m->object->paging_offset,
507 			       ix, swp->sw_blocks);
508 #endif
509 		return(VM_PAGER_FAIL);
510 	}
511 	swb = &swp->sw_blocks[ix];
512 	off = off % dbtob(swp->sw_bsize);
513 	if (flags & B_READ) {
514 		if (swb->swb_block == 0 ||
515 		    (swb->swb_mask & (1 << atop(off))) == 0) {
516 #ifdef DEBUG
517 			if (swpagerdebug & (SDB_ALLOCBLK|SDB_FAIL))
518 				printf("swpg_io: %x bad read: blk %x+%x, mask %x, off %x+%x\n",
519 				       swp->sw_blocks,
520 				       swb->swb_block, atop(off),
521 				       swb->swb_mask,
522 				       m->offset, m->object->paging_offset);
523 #endif
524 			/* XXX: should we zero page here?? */
525 			return(VM_PAGER_FAIL);
526 		}
527 	} else if (swb->swb_block == 0) {
528 		swb->swb_block = rmalloc(swapmap, swp->sw_bsize);
529 		if (swb->swb_block == 0) {
530 #ifdef DEBUG
531 			if (swpagerdebug & SDB_FAIL)
532 				printf("swpg_io: rmalloc of %x failed\n",
533 				       swp->sw_bsize);
534 #endif
535 			return(VM_PAGER_FAIL);
536 		}
537 #ifdef DEBUG
538 		if (swpagerdebug & (SDB_FULL|SDB_ALLOCBLK))
539 			printf("swpg_io: %x alloc blk %x at ix %x\n",
540 			       swp->sw_blocks, swb->swb_block, ix);
541 #endif
542 	}
543 
544 	/*
545 	 * Allocate a kernel virtual address and initialize so that PTE
546 	 * is available for lower level IO drivers.
547 	 */
548 	kva = vm_pager_map_page(m);
549 
550 	/*
551 	 * Get a swap buffer header and perform the IO
552 	 */
553 	s = splbio();
554 	while (bswlist.b_actf == NULL) {
555 #ifdef DEBUG
556 		if (swpagerdebug & SDB_ANOM)
557 			printf("swap_pager_io: wait on swbuf for %x (%d)\n",
558 			       m, flags);
559 #endif
560 		bswlist.b_flags |= B_WANTED;
561 		sleep((caddr_t)&bswlist, PSWP+1);
562 	}
563 	bp = bswlist.b_actf;
564 	bswlist.b_actf = bp->b_actf;
565 	splx(s);
566 	bp->b_flags = B_BUSY | (flags & B_READ);
567 	bp->b_proc = &proc0;	/* XXX (but without B_PHYS set this is ok) */
568 	bp->b_un.b_addr = (caddr_t)kva;
569 	bp->b_blkno = swb->swb_block + btodb(off);
570 	VHOLD(swapdev_vp);
571 	bp->b_vp = swapdev_vp;
572 	if (swapdev_vp->v_type == VBLK)
573 		bp->b_dev = swapdev_vp->v_rdev;
574 	bp->b_bcount = PAGE_SIZE;
575 	if ((bp->b_flags & B_READ) == 0) {
576 		bp->b_dirtyoff = 0;
577 		bp->b_dirtyend = PAGE_SIZE;
578 		swapdev_vp->v_numoutput++;
579 	}
580 
581 	/*
582 	 * If this is an async write we set up additional buffer fields
583 	 * and place a "cleaning" entry on the inuse queue.
584 	 */
585 	if ((flags & (B_READ|B_ASYNC)) == B_ASYNC) {
586 #ifdef DEBUG
587 		if (queue_empty(&swap_pager_free))
588 			panic("swpg_io: lost spc");
589 #endif
590 		queue_remove_first(&swap_pager_free,
591 				   spc, swp_clean_t, spc_list);
592 #ifdef DEBUG
593 		if (spc->spc_flags != SPC_FREE)
594 			panic("swpg_io: bad free spc");
595 #endif
596 		spc->spc_flags = SPC_BUSY;
597 		spc->spc_bp = bp;
598 		spc->spc_swp = swp;
599 		spc->spc_kva = kva;
600 		spc->spc_m = m;
601 		bp->b_flags |= B_CALL;
602 		bp->b_iodone = swap_pager_iodone;
603 		s = splbio();
604 		swp->sw_poip++;
605 		queue_enter(&swap_pager_inuse, spc, swp_clean_t, spc_list);
606 
607 #ifdef DEBUG
608 		swap_pager_poip++;
609 		if (swpagerdebug & SDB_WRITE)
610 			printf("swpg_io: write: bp=%x swp=%x spc=%x poip=%d\n",
611 			       bp, swp, spc, swp->sw_poip);
612 		if ((swpagerdebug & SDB_ALLOCBLK) &&
613 		    (swb->swb_mask & (1 << atop(off))) == 0)
614 			printf("swpg_io: %x write blk %x+%x\n",
615 			       swp->sw_blocks, swb->swb_block, atop(off));
616 #endif
617 		swb->swb_mask |= (1 << atop(off));
618 		splx(s);
619 	}
620 #ifdef DEBUG
621 	if (swpagerdebug & SDB_IO)
622 		printf("swpg_io: IO start: bp %x, db %x, va %x, pa %x\n",
623 		       bp, swb->swb_block+btodb(off), kva, VM_PAGE_TO_PHYS(m));
624 #endif
625 	VOP_STRATEGY(bp);
626 	if ((flags & (B_READ|B_ASYNC)) == B_ASYNC) {
627 #ifdef DEBUG
628 		if (swpagerdebug & SDB_IO)
629 			printf("swpg_io:  IO started: bp %x\n", bp);
630 #endif
631 		return(VM_PAGER_PEND);
632 	}
633 	s = splbio();
634 #ifdef DEBUG
635 	if (flags & B_READ)
636 		swap_pager_piip++;
637 	else
638 		swap_pager_poip++;
639 #endif
640 	while ((bp->b_flags & B_DONE) == 0) {
641 		assert_wait((int)bp, 0);
642 		thread_block();
643 	}
644 #ifdef DEBUG
645 	if (flags & B_READ)
646 		--swap_pager_piip;
647 	else
648 		--swap_pager_poip;
649 #endif
650 	rv = (bp->b_flags & B_ERROR) ? VM_PAGER_ERROR : VM_PAGER_OK;
651 	bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_PAGET|B_UAREA|B_DIRTY);
652 	bp->b_actf = bswlist.b_actf;
653 	bswlist.b_actf = bp;
654 	if (bp->b_vp)
655 		brelvp(bp);
656 	if (bswlist.b_flags & B_WANTED) {
657 		bswlist.b_flags &= ~B_WANTED;
658 		thread_wakeup((int)&bswlist);
659 	}
660 	if ((flags & B_READ) == 0 && rv == VM_PAGER_OK) {
661 		m->flags |= PG_CLEAN;
662 		pmap_clear_modify(VM_PAGE_TO_PHYS(m));
663 	}
664 	splx(s);
665 #ifdef DEBUG
666 	if (swpagerdebug & SDB_IO)
667 		printf("swpg_io:  IO done: bp %x, rv %d\n", bp, rv);
668 	if ((swpagerdebug & SDB_FAIL) && rv == VM_PAGER_ERROR)
669 		printf("swpg_io: IO error\n");
670 #endif
671 	vm_pager_unmap_page(kva);
672 	return(rv);
673 }
674 
675 static boolean_t
676 swap_pager_clean(m, rw)
677 	vm_page_t m;
678 	int rw;
679 {
680 	register swp_clean_t spc, tspc;
681 	register int s;
682 
683 #ifdef DEBUG
684 	/* save panic time state */
685 	if ((swpagerdebug & SDB_ANOMPANIC) && panicstr)
686 		return (FALSE);			/* ??? */
687 	if (swpagerdebug & SDB_FOLLOW)
688 		printf("swpg_clean(%x, %d)\n", m, rw);
689 #endif
690 	tspc = NULL;
691 	for (;;) {
692 		/*
693 		 * Look up and removal from inuse list must be done
694 		 * at splbio() to avoid conflicts with swap_pager_iodone.
695 		 */
696 		s = splbio();
697 		spc = (swp_clean_t) queue_first(&swap_pager_inuse);
698 		while (!queue_end(&swap_pager_inuse, (queue_entry_t)spc)) {
699 			if ((spc->spc_flags & SPC_DONE) &&
700 			    swap_pager_finish(spc)) {
701 				queue_remove(&swap_pager_inuse, spc,
702 					     swp_clean_t, spc_list);
703 				break;
704 			}
705 			if (m && m == spc->spc_m) {
706 #ifdef DEBUG
707 				if (swpagerdebug & SDB_ANOM)
708 					printf("swap_pager_clean: page %x on list, flags %x\n",
709 					       m, spc->spc_flags);
710 #endif
711 				tspc = spc;
712 			}
713 			spc = (swp_clean_t) queue_next(&spc->spc_list);
714 		}
715 
716 		/*
717 		 * No operations done, thats all we can do for now.
718 		 */
719 		if (queue_end(&swap_pager_inuse, (queue_entry_t)spc))
720 			break;
721 		splx(s);
722 
723 		/*
724 		 * The desired page was found to be busy earlier in
725 		 * the scan but has since completed.
726 		 */
727 		if (tspc && tspc == spc) {
728 #ifdef DEBUG
729 			if (swpagerdebug & SDB_ANOM)
730 				printf("swap_pager_clean: page %x done while looking\n",
731 				       m);
732 #endif
733 			tspc = NULL;
734 		}
735 		spc->spc_flags = SPC_FREE;
736 		vm_pager_unmap_page(spc->spc_kva);
737 		queue_enter(&swap_pager_free, spc, swp_clean_t, spc_list);
738 #ifdef DEBUG
739 		if (swpagerdebug & SDB_WRITE)
740 			printf("swpg_clean: free spc %x\n", spc);
741 #endif
742 	}
743 #ifdef DEBUG
744 	/*
745 	 * If we found that the desired page is already being cleaned
746 	 * mark it so that swap_pager_iodone() will not set the clean
747 	 * flag before the pageout daemon has another chance to clean it.
748 	 */
749 	if (tspc && rw == B_WRITE) {
750 		if (swpagerdebug & SDB_ANOM)
751 			printf("swap_pager_clean: page %x on clean list\n",
752 			       tspc);
753 		tspc->spc_flags |= SPC_DIRTY;
754 	}
755 #endif
756 	splx(s);
757 
758 #ifdef DEBUG
759 	if (swpagerdebug & SDB_WRITE)
760 		printf("swpg_clean: return %d\n", tspc ? TRUE : FALSE);
761 	if ((swpagerdebug & SDB_ANOM) && tspc)
762 		printf("swpg_clean: %s of cleaning page %x\n",
763 		       rw == B_READ ? "get" : "put", m);
764 #endif
765 	return(tspc ? TRUE : FALSE);
766 }
767 
768 static int
769 swap_pager_finish(spc)
770 	register swp_clean_t spc;
771 {
772 	vm_object_t object = spc->spc_m->object;
773 
774 	/*
775 	 * Mark the paging operation as done.
776 	 * (XXX) If we cannot get the lock, leave it til later.
777 	 * (XXX) Also we are assuming that an async write is a
778 	 *       pageout operation that has incremented the counter.
779 	 */
780 	if (!vm_object_lock_try(object))
781 		return(0);
782 
783 	if (--object->paging_in_progress == 0)
784 		thread_wakeup((int) object);
785 
786 #ifdef DEBUG
787 	/*
788 	 * XXX: this isn't even close to the right thing to do,
789 	 * introduces a variety of race conditions.
790 	 *
791 	 * If dirty, vm_pageout() has attempted to clean the page
792 	 * again.  In this case we do not do anything as we will
793 	 * see the page again shortly.
794 	 */
795 	if (spc->spc_flags & SPC_DIRTY) {
796 		if (swpagerdebug & SDB_ANOM)
797 			printf("swap_pager_finish: page %x dirty again\n",
798 			       spc->spc_m);
799 		spc->spc_m->flags &= ~PG_BUSY;
800 		PAGE_WAKEUP(spc->spc_m);
801 		vm_object_unlock(object);
802 		return(1);
803 	}
804 #endif
805 	/*
806 	 * If no error mark as clean and inform the pmap system.
807 	 * If error, mark as dirty so we will try again.
808 	 * (XXX could get stuck doing this, should give up after awhile)
809 	 */
810 	if (spc->spc_flags & SPC_ERROR) {
811 		printf("swap_pager_finish: clean of page %x failed\n",
812 		       VM_PAGE_TO_PHYS(spc->spc_m));
813 		spc->spc_m->flags |= PG_LAUNDRY;
814 	} else {
815 		spc->spc_m->flags |= PG_CLEAN;
816 		pmap_clear_modify(VM_PAGE_TO_PHYS(spc->spc_m));
817 	}
818 	spc->spc_m->flags &= ~PG_BUSY;
819 	PAGE_WAKEUP(spc->spc_m);
820 
821 	vm_object_unlock(object);
822 	return(1);
823 }
824 
825 static void
826 swap_pager_iodone(bp)
827 	register struct buf *bp;
828 {
829 	register swp_clean_t spc;
830 	daddr_t blk;
831 	int s;
832 
833 #ifdef DEBUG
834 	/* save panic time state */
835 	if ((swpagerdebug & SDB_ANOMPANIC) && panicstr)
836 		return;
837 	if (swpagerdebug & SDB_FOLLOW)
838 		printf("swpg_iodone(%x)\n", bp);
839 #endif
840 	s = splbio();
841 	spc = (swp_clean_t) queue_first(&swap_pager_inuse);
842 	while (!queue_end(&swap_pager_inuse, (queue_entry_t)spc)) {
843 		if (spc->spc_bp == bp)
844 			break;
845 		spc = (swp_clean_t) queue_next(&spc->spc_list);
846 	}
847 #ifdef DEBUG
848 	if (queue_end(&swap_pager_inuse, (queue_entry_t)spc))
849 		panic("swap_pager_iodone: bp not found");
850 #endif
851 
852 	spc->spc_flags &= ~SPC_BUSY;
853 	spc->spc_flags |= SPC_DONE;
854 	if (bp->b_flags & B_ERROR)
855 		spc->spc_flags |= SPC_ERROR;
856 	spc->spc_bp = NULL;
857 	blk = bp->b_blkno;
858 
859 #ifdef DEBUG
860 	--swap_pager_poip;
861 	if (swpagerdebug & SDB_WRITE)
862 		printf("swpg_iodone: bp=%x swp=%x flags=%x spc=%x poip=%x\n",
863 		       bp, spc->spc_swp, spc->spc_swp->sw_flags,
864 		       spc, spc->spc_swp->sw_poip);
865 #endif
866 
867 	spc->spc_swp->sw_poip--;
868 	if (spc->spc_swp->sw_flags & SW_WANTED) {
869 		spc->spc_swp->sw_flags &= ~SW_WANTED;
870 		thread_wakeup((int)spc->spc_swp);
871 	}
872 
873 	bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_PAGET|B_UAREA|B_DIRTY);
874 	bp->b_actf = bswlist.b_actf;
875 	bswlist.b_actf = bp;
876 	if (bp->b_vp)
877 		brelvp(bp);
878 	if (bswlist.b_flags & B_WANTED) {
879 		bswlist.b_flags &= ~B_WANTED;
880 		thread_wakeup((int)&bswlist);
881 	}
882 	/*
883 	 * Only kick the pageout daemon if we are really hurting
884 	 * for pages, otherwise this page will be picked up later.
885 	 */
886 	if (cnt.v_free_count < cnt.v_free_min)
887 		thread_wakeup((int) &vm_pages_needed);
888 	splx(s);
889 }
890