xref: /original-bsd/sys/vm/swap_pager.c (revision 56058e1d)
1 /*
2  * Copyright (c) 1990 University of Utah.
3  * Copyright (c) 1991, 1993
4  *	The Regents of the University of California.  All rights reserved.
5  *
6  * This code is derived from software contributed to Berkeley by
7  * the Systems Programming Group of the University of Utah Computer
8  * Science Department.
9  *
10  * %sccs.include.redist.c%
11  *
12  * from: Utah $Hdr: swap_pager.c 1.4 91/04/30$
13  *
14  *	@(#)swap_pager.c	8.5 (Berkeley) 12/30/93
15  */
16 
17 /*
18  * Quick hack to page to dedicated partition(s).
19  * TODO:
20  *	Add multiprocessor locks
21  *	Deal with async writes in a better fashion
22  */
23 
24 #include <sys/param.h>
25 #include <sys/systm.h>
26 #include <sys/proc.h>
27 #include <sys/buf.h>
28 #include <sys/map.h>
29 #include <sys/vnode.h>
30 #include <sys/malloc.h>
31 
32 #include <miscfs/specfs/specdev.h>
33 
34 #include <vm/vm.h>
35 #include <vm/vm_page.h>
36 #include <vm/vm_pageout.h>
37 #include <vm/swap_pager.h>
38 
39 #define NSWSIZES	16	/* size of swtab */
40 #define NPENDINGIO	64	/* max # of pending cleans */
41 #define MAXDADDRS	64	/* max # of disk addrs for fixed allocations */
42 
43 #ifdef DEBUG
44 int	swpagerdebug = 0x100;
45 #define	SDB_FOLLOW	0x001
46 #define SDB_INIT	0x002
47 #define SDB_ALLOC	0x004
48 #define SDB_IO		0x008
49 #define SDB_WRITE	0x010
50 #define SDB_FAIL	0x020
51 #define SDB_ALLOCBLK	0x040
52 #define SDB_FULL	0x080
53 #define SDB_ANOM	0x100
54 #define SDB_ANOMPANIC	0x200
55 #endif
56 
57 TAILQ_HEAD(swpclean, swpagerclean);
58 
59 struct swpagerclean {
60 	TAILQ_ENTRY(swpagerclean)	spc_list;
61 	int				spc_flags;
62 	struct buf			*spc_bp;
63 	sw_pager_t			spc_swp;
64 	vm_offset_t			spc_kva;
65 	vm_page_t			spc_m;
66 } swcleanlist[NPENDINGIO];
67 typedef struct swpagerclean *swp_clean_t;
68 
69 
70 /* spc_flags values */
71 #define SPC_FREE	0x00
72 #define SPC_BUSY	0x01
73 #define SPC_DONE	0x02
74 #define SPC_ERROR	0x04
75 #define SPC_DIRTY	0x08
76 
77 struct swtab {
78 	vm_size_t st_osize;	/* size of object (bytes) */
79 	int	  st_bsize;	/* vs. size of swap block (DEV_BSIZE units) */
80 #ifdef DEBUG
81 	u_long	  st_inuse;	/* number in this range in use */
82 	u_long	  st_usecnt;	/* total used of this size */
83 #endif
84 } swtab[NSWSIZES+1];
85 
86 #ifdef DEBUG
87 int		swap_pager_pendingio;	/* max pending async "clean" ops */
88 int		swap_pager_poip;	/* pageouts in progress */
89 int		swap_pager_piip;	/* pageins in progress */
90 #endif
91 
92 struct swpclean	swap_pager_inuse;	/* list of pending page cleans */
93 struct swpclean	swap_pager_free;	/* list of free pager clean structs */
94 struct pagerlst	swap_pager_list;	/* list of "named" anon regions */
95 
96 static int		swap_pager_finish __P((swp_clean_t));
97 static void 		swap_pager_init __P((void));
98 static vm_pager_t	swap_pager_alloc
99 			    __P((caddr_t, vm_size_t, vm_prot_t, vm_offset_t));
100 static boolean_t	swap_pager_clean __P((vm_page_t, int));
101 static void		swap_pager_dealloc __P((vm_pager_t));
102 static int		swap_pager_getpage
103 			    __P((vm_pager_t, vm_page_t, boolean_t));
104 static boolean_t	swap_pager_haspage __P((vm_pager_t, vm_offset_t));
105 static int		swap_pager_io __P((sw_pager_t, vm_page_t, int));
106 static void		swap_pager_iodone __P((struct buf *));
107 static int		swap_pager_putpage
108 			    __P((vm_pager_t, vm_page_t, boolean_t));
109 
110 struct pagerops swappagerops = {
111 	swap_pager_init,
112 	swap_pager_alloc,
113 	swap_pager_dealloc,
114 	swap_pager_getpage,
115 	swap_pager_putpage,
116 	swap_pager_haspage
117 };
118 
119 static void
120 swap_pager_init()
121 {
122 	register swp_clean_t spc;
123 	register int i, bsize;
124 	extern int dmmin, dmmax;
125 	int maxbsize;
126 
127 #ifdef DEBUG
128 	if (swpagerdebug & (SDB_FOLLOW|SDB_INIT))
129 		printf("swpg_init()\n");
130 #endif
131 	dfltpagerops = &swappagerops;
132 	TAILQ_INIT(&swap_pager_list);
133 
134 	/*
135 	 * Initialize clean lists
136 	 */
137 	TAILQ_INIT(&swap_pager_inuse);
138 	TAILQ_INIT(&swap_pager_free);
139 	for (i = 0, spc = swcleanlist; i < NPENDINGIO; i++, spc++) {
140 		TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list);
141 		spc->spc_flags = SPC_FREE;
142 	}
143 
144 	/*
145 	 * Calculate the swap allocation constants.
146 	 */
147         if (dmmin == 0) {
148                 dmmin = DMMIN;
149 		if (dmmin < CLBYTES/DEV_BSIZE)
150 			dmmin = CLBYTES/DEV_BSIZE;
151 	}
152         if (dmmax == 0)
153                 dmmax = DMMAX;
154 
155 	/*
156 	 * Fill in our table of object size vs. allocation size
157 	 */
158 	bsize = btodb(PAGE_SIZE);
159 	if (bsize < dmmin)
160 		bsize = dmmin;
161 	maxbsize = btodb(sizeof(sw_bm_t) * NBBY * PAGE_SIZE);
162 	if (maxbsize > dmmax)
163 		maxbsize = dmmax;
164 	for (i = 0; i < NSWSIZES; i++) {
165 		swtab[i].st_osize = (vm_size_t) (MAXDADDRS * dbtob(bsize));
166 		swtab[i].st_bsize = bsize;
167 #ifdef DEBUG
168 		if (swpagerdebug & SDB_INIT)
169 			printf("swpg_init: ix %d, size %x, bsize %x\n",
170 			       i, swtab[i].st_osize, swtab[i].st_bsize);
171 #endif
172 		if (bsize >= maxbsize)
173 			break;
174 		bsize *= 2;
175 	}
176 	swtab[i].st_osize = 0;
177 	swtab[i].st_bsize = bsize;
178 }
179 
180 /*
181  * Allocate a pager structure and associated resources.
182  * Note that if we are called from the pageout daemon (handle == NULL)
183  * we should not wait for memory as it could resulting in deadlock.
184  */
185 static vm_pager_t
186 swap_pager_alloc(handle, size, prot, foff)
187 	caddr_t handle;
188 	register vm_size_t size;
189 	vm_prot_t prot;
190 	vm_offset_t foff;
191 {
192 	register vm_pager_t pager;
193 	register sw_pager_t swp;
194 	struct swtab *swt;
195 	int waitok;
196 
197 #ifdef DEBUG
198 	if (swpagerdebug & (SDB_FOLLOW|SDB_ALLOC))
199 		printf("swpg_alloc(%x, %x, %x)\n", handle, size, prot);
200 #endif
201 	/*
202 	 * If this is a "named" anonymous region, look it up and
203 	 * return the appropriate pager if it exists.
204 	 */
205 	if (handle) {
206 		pager = vm_pager_lookup(&swap_pager_list, handle);
207 		if (pager != NULL) {
208 			/*
209 			 * Use vm_object_lookup to gain a reference
210 			 * to the object and also to remove from the
211 			 * object cache.
212 			 */
213 			if (vm_object_lookup(pager) == NULL)
214 				panic("swap_pager_alloc: bad object");
215 			return(pager);
216 		}
217 	}
218 	/*
219 	 * Pager doesn't exist, allocate swap management resources
220 	 * and initialize.
221 	 */
222 	waitok = handle ? M_WAITOK : M_NOWAIT;
223 	pager = (vm_pager_t)malloc(sizeof *pager, M_VMPAGER, waitok);
224 	if (pager == NULL)
225 		return(NULL);
226 	swp = (sw_pager_t)malloc(sizeof *swp, M_VMPGDATA, waitok);
227 	if (swp == NULL) {
228 #ifdef DEBUG
229 		if (swpagerdebug & SDB_FAIL)
230 			printf("swpg_alloc: swpager malloc failed\n");
231 #endif
232 		free((caddr_t)pager, M_VMPAGER);
233 		return(NULL);
234 	}
235 	size = round_page(size);
236 	for (swt = swtab; swt->st_osize; swt++)
237 		if (size <= swt->st_osize)
238 			break;
239 #ifdef DEBUG
240 	swt->st_inuse++;
241 	swt->st_usecnt++;
242 #endif
243 	swp->sw_osize = size;
244 	swp->sw_bsize = swt->st_bsize;
245 	swp->sw_nblocks = (btodb(size) + swp->sw_bsize - 1) / swp->sw_bsize;
246 	swp->sw_blocks = (sw_blk_t)
247 		malloc(swp->sw_nblocks*sizeof(*swp->sw_blocks),
248 		       M_VMPGDATA, M_NOWAIT);
249 	if (swp->sw_blocks == NULL) {
250 		free((caddr_t)swp, M_VMPGDATA);
251 		free((caddr_t)pager, M_VMPAGER);
252 #ifdef DEBUG
253 		if (swpagerdebug & SDB_FAIL)
254 			printf("swpg_alloc: sw_blocks malloc failed\n");
255 		swt->st_inuse--;
256 		swt->st_usecnt--;
257 #endif
258 		return(FALSE);
259 	}
260 	bzero((caddr_t)swp->sw_blocks,
261 	      swp->sw_nblocks * sizeof(*swp->sw_blocks));
262 	swp->sw_poip = 0;
263 	if (handle) {
264 		vm_object_t object;
265 
266 		swp->sw_flags = SW_NAMED;
267 		TAILQ_INSERT_TAIL(&swap_pager_list, pager, pg_list);
268 		/*
269 		 * Consistant with other pagers: return with object
270 		 * referenced.  Can't do this with handle == NULL
271 		 * since it might be the pageout daemon calling.
272 		 */
273 		object = vm_object_allocate(size);
274 		vm_object_enter(object, pager);
275 		vm_object_setpager(object, pager, 0, FALSE);
276 	} else {
277 		swp->sw_flags = 0;
278 		pager->pg_list.tqe_next = NULL;
279 		pager->pg_list.tqe_prev = NULL;
280 	}
281 	pager->pg_handle = handle;
282 	pager->pg_ops = &swappagerops;
283 	pager->pg_type = PG_SWAP;
284 	pager->pg_data = swp;
285 
286 #ifdef DEBUG
287 	if (swpagerdebug & SDB_ALLOC)
288 		printf("swpg_alloc: pg_data %x, %x of %x at %x\n",
289 		       swp, swp->sw_nblocks, swp->sw_bsize, swp->sw_blocks);
290 #endif
291 	return(pager);
292 }
293 
294 static void
295 swap_pager_dealloc(pager)
296 	vm_pager_t pager;
297 {
298 	register int i;
299 	register sw_blk_t bp;
300 	register sw_pager_t swp;
301 	struct swtab *swt;
302 	int s;
303 
304 #ifdef DEBUG
305 	/* save panic time state */
306 	if ((swpagerdebug & SDB_ANOMPANIC) && panicstr)
307 		return;
308 	if (swpagerdebug & (SDB_FOLLOW|SDB_ALLOC))
309 		printf("swpg_dealloc(%x)\n", pager);
310 #endif
311 	/*
312 	 * Remove from list right away so lookups will fail if we
313 	 * block for pageout completion.
314 	 */
315 	swp = (sw_pager_t) pager->pg_data;
316 	if (swp->sw_flags & SW_NAMED) {
317 		TAILQ_REMOVE(&swap_pager_list, pager, pg_list);
318 		swp->sw_flags &= ~SW_NAMED;
319 	}
320 #ifdef DEBUG
321 	for (swt = swtab; swt->st_osize; swt++)
322 		if (swp->sw_osize <= swt->st_osize)
323 			break;
324 	swt->st_inuse--;
325 #endif
326 
327 	/*
328 	 * Wait for all pageouts to finish and remove
329 	 * all entries from cleaning list.
330 	 */
331 	s = splbio();
332 	while (swp->sw_poip) {
333 		swp->sw_flags |= SW_WANTED;
334 		assert_wait((int)swp, 0);
335 		thread_block();
336 	}
337 	splx(s);
338 	(void) swap_pager_clean(NULL, B_WRITE);
339 
340 	/*
341 	 * Free left over swap blocks
342 	 */
343 	for (i = 0, bp = swp->sw_blocks; i < swp->sw_nblocks; i++, bp++)
344 		if (bp->swb_block) {
345 #ifdef DEBUG
346 			if (swpagerdebug & (SDB_ALLOCBLK|SDB_FULL))
347 				printf("swpg_dealloc: blk %x\n",
348 				       bp->swb_block);
349 #endif
350 			rmfree(swapmap, swp->sw_bsize, bp->swb_block);
351 		}
352 	/*
353 	 * Free swap management resources
354 	 */
355 	free((caddr_t)swp->sw_blocks, M_VMPGDATA);
356 	free((caddr_t)swp, M_VMPGDATA);
357 	free((caddr_t)pager, M_VMPAGER);
358 }
359 
360 static int
361 swap_pager_getpage(pager, m, sync)
362 	vm_pager_t pager;
363 	vm_page_t m;
364 	boolean_t sync;
365 {
366 #ifdef DEBUG
367 	if (swpagerdebug & SDB_FOLLOW)
368 		printf("swpg_getpage(%x, %x, %d)\n", pager, m, sync);
369 #endif
370 	return(swap_pager_io((sw_pager_t)pager->pg_data, m, B_READ));
371 }
372 
373 static int
374 swap_pager_putpage(pager, m, sync)
375 	vm_pager_t pager;
376 	vm_page_t m;
377 	boolean_t sync;
378 {
379 	int flags;
380 
381 #ifdef DEBUG
382 	if (swpagerdebug & SDB_FOLLOW)
383 		printf("swpg_putpage(%x, %x, %d)\n", pager, m, sync);
384 #endif
385 	if (pager == NULL) {
386 		(void) swap_pager_clean(NULL, B_WRITE);
387 		return (VM_PAGER_OK);		/* ??? */
388 	}
389 	flags = B_WRITE;
390 	if (!sync)
391 		flags |= B_ASYNC;
392 	return(swap_pager_io((sw_pager_t)pager->pg_data, m, flags));
393 }
394 
395 static boolean_t
396 swap_pager_haspage(pager, offset)
397 	vm_pager_t pager;
398 	vm_offset_t offset;
399 {
400 	register sw_pager_t swp;
401 	register sw_blk_t swb;
402 	int ix;
403 
404 #ifdef DEBUG
405 	if (swpagerdebug & (SDB_FOLLOW|SDB_ALLOCBLK))
406 		printf("swpg_haspage(%x, %x) ", pager, offset);
407 #endif
408 	swp = (sw_pager_t) pager->pg_data;
409 	ix = offset / dbtob(swp->sw_bsize);
410 	if (swp->sw_blocks == NULL || ix >= swp->sw_nblocks) {
411 #ifdef DEBUG
412 		if (swpagerdebug & (SDB_FAIL|SDB_FOLLOW|SDB_ALLOCBLK))
413 			printf("swpg_haspage: %x bad offset %x, ix %x\n",
414 			       swp->sw_blocks, offset, ix);
415 #endif
416 		return(FALSE);
417 	}
418 	swb = &swp->sw_blocks[ix];
419 	if (swb->swb_block)
420 		ix = atop(offset % dbtob(swp->sw_bsize));
421 #ifdef DEBUG
422 	if (swpagerdebug & SDB_ALLOCBLK)
423 		printf("%x blk %x+%x ", swp->sw_blocks, swb->swb_block, ix);
424 	if (swpagerdebug & (SDB_FOLLOW|SDB_ALLOCBLK))
425 		printf("-> %c\n",
426 		       "FT"[swb->swb_block && (swb->swb_mask & (1 << ix))]);
427 #endif
428 	if (swb->swb_block && (swb->swb_mask & (1 << ix)))
429 		return(TRUE);
430 	return(FALSE);
431 }
432 
433 /*
434  * Scaled down version of swap().
435  * Assumes that PAGE_SIZE < MAXPHYS; i.e. only one operation needed.
436  * BOGUS:  lower level IO routines expect a KVA so we have to map our
437  * provided physical page into the KVA to keep them happy.
438  */
439 static int
440 swap_pager_io(swp, m, flags)
441 	register sw_pager_t swp;
442 	vm_page_t m;
443 	int flags;
444 {
445 	register struct buf *bp;
446 	register sw_blk_t swb;
447 	register int s;
448 	int ix;
449 	boolean_t rv;
450 	vm_offset_t kva, off;
451 	swp_clean_t spc;
452 
453 #ifdef DEBUG
454 	/* save panic time state */
455 	if ((swpagerdebug & SDB_ANOMPANIC) && panicstr)
456 		return (VM_PAGER_FAIL);		/* XXX: correct return? */
457 	if (swpagerdebug & (SDB_FOLLOW|SDB_IO))
458 		printf("swpg_io(%x, %x, %x)\n", swp, m, flags);
459 	if ((flags & (B_READ|B_ASYNC)) == (B_READ|B_ASYNC))
460 		panic("swap_pager_io: cannot do ASYNC reads");
461 #endif
462 
463 	/*
464 	 * First determine if the page exists in the pager if this is
465 	 * a sync read.  This quickly handles cases where we are
466 	 * following shadow chains looking for the top level object
467 	 * with the page.
468 	 */
469 	off = m->offset + m->object->paging_offset;
470 	ix = off / dbtob(swp->sw_bsize);
471 	if (swp->sw_blocks == NULL || ix >= swp->sw_nblocks)
472 		return(VM_PAGER_FAIL);
473 	swb = &swp->sw_blocks[ix];
474 	off = off % dbtob(swp->sw_bsize);
475 	if ((flags & B_READ) &&
476 	    (swb->swb_block == 0 || (swb->swb_mask & (1 << atop(off))) == 0))
477 		return(VM_PAGER_FAIL);
478 
479 	/*
480 	 * For reads (pageins) and synchronous writes, we clean up
481 	 * all completed async pageouts.
482 	 */
483 	if ((flags & B_ASYNC) == 0) {
484 		s = splbio();
485 #ifdef DEBUG
486 		/*
487 		 * Check to see if this page is currently being cleaned.
488 		 * If it is, we just wait til the operation is done before
489 		 * continuing.
490 		 */
491 		while (swap_pager_clean(m, flags&B_READ)) {
492 			if (swpagerdebug & SDB_ANOM)
493 				printf("swap_pager_io: page %x cleaning\n", m);
494 
495 			swp->sw_flags |= SW_WANTED;
496 			assert_wait((int)swp, 0);
497 			thread_block();
498 		}
499 #else
500 		(void) swap_pager_clean(m, flags&B_READ);
501 #endif
502 		splx(s);
503 	}
504 	/*
505 	 * For async writes (pageouts), we cleanup completed pageouts so
506 	 * that all available resources are freed.  Also tells us if this
507 	 * page is already being cleaned.  If it is, or no resources
508 	 * are available, we try again later.
509 	 */
510 	else if (swap_pager_clean(m, B_WRITE) ||
511 		 swap_pager_free.tqh_first == NULL) {
512 #ifdef DEBUG
513 		if ((swpagerdebug & SDB_ANOM) &&
514 		    swap_pager_free.tqh_first != NULL)
515 			printf("swap_pager_io: page %x already cleaning\n", m);
516 #endif
517 		return(VM_PAGER_FAIL);
518 	}
519 
520 	/*
521 	 * Allocate a swap block if necessary.
522 	 */
523 	if (swb->swb_block == 0) {
524 		swb->swb_block = rmalloc(swapmap, swp->sw_bsize);
525 		if (swb->swb_block == 0) {
526 #ifdef DEBUG
527 			if (swpagerdebug & SDB_FAIL)
528 				printf("swpg_io: rmalloc of %x failed\n",
529 				       swp->sw_bsize);
530 #endif
531 			return(VM_PAGER_FAIL);
532 		}
533 #ifdef DEBUG
534 		if (swpagerdebug & (SDB_FULL|SDB_ALLOCBLK))
535 			printf("swpg_io: %x alloc blk %x at ix %x\n",
536 			       swp->sw_blocks, swb->swb_block, ix);
537 #endif
538 	}
539 
540 	/*
541 	 * Allocate a kernel virtual address and initialize so that PTE
542 	 * is available for lower level IO drivers.
543 	 */
544 	kva = vm_pager_map_page(m);
545 
546 	/*
547 	 * Get a swap buffer header and perform the IO
548 	 */
549 	s = splbio();
550 	while (bswlist.b_actf == NULL) {
551 #ifdef DEBUG
552 		if (swpagerdebug & SDB_ANOM)
553 			printf("swap_pager_io: wait on swbuf for %x (%d)\n",
554 			       m, flags);
555 #endif
556 		bswlist.b_flags |= B_WANTED;
557 		tsleep((caddr_t)&bswlist, PSWP+1, "swpgio", 0);
558 	}
559 	bp = bswlist.b_actf;
560 	bswlist.b_actf = bp->b_actf;
561 	splx(s);
562 	bp->b_flags = B_BUSY | (flags & B_READ);
563 	bp->b_proc = &proc0;	/* XXX (but without B_PHYS set this is ok) */
564 	bp->b_data = (caddr_t)kva;
565 	bp->b_blkno = swb->swb_block + btodb(off);
566 	VHOLD(swapdev_vp);
567 	bp->b_vp = swapdev_vp;
568 	if (swapdev_vp->v_type == VBLK)
569 		bp->b_dev = swapdev_vp->v_rdev;
570 	bp->b_bcount = PAGE_SIZE;
571 	if ((bp->b_flags & B_READ) == 0) {
572 		bp->b_dirtyoff = 0;
573 		bp->b_dirtyend = PAGE_SIZE;
574 		swapdev_vp->v_numoutput++;
575 	}
576 
577 	/*
578 	 * If this is an async write we set up additional buffer fields
579 	 * and place a "cleaning" entry on the inuse queue.
580 	 */
581 	if ((flags & (B_READ|B_ASYNC)) == B_ASYNC) {
582 #ifdef DEBUG
583 		if (swap_pager_free.tqh_first == NULL)
584 			panic("swpg_io: lost spc");
585 #endif
586 		spc = swap_pager_free.tqh_first;
587 		TAILQ_REMOVE(&swap_pager_free, spc, spc_list);
588 #ifdef DEBUG
589 		if (spc->spc_flags != SPC_FREE)
590 			panic("swpg_io: bad free spc");
591 #endif
592 		spc->spc_flags = SPC_BUSY;
593 		spc->spc_bp = bp;
594 		spc->spc_swp = swp;
595 		spc->spc_kva = kva;
596 		spc->spc_m = m;
597 		bp->b_flags |= B_CALL;
598 		bp->b_iodone = swap_pager_iodone;
599 		s = splbio();
600 		swp->sw_poip++;
601 		TAILQ_INSERT_TAIL(&swap_pager_inuse, spc, spc_list);
602 
603 #ifdef DEBUG
604 		swap_pager_poip++;
605 		if (swpagerdebug & SDB_WRITE)
606 			printf("swpg_io: write: bp=%x swp=%x spc=%x poip=%d\n",
607 			       bp, swp, spc, swp->sw_poip);
608 		if ((swpagerdebug & SDB_ALLOCBLK) &&
609 		    (swb->swb_mask & (1 << atop(off))) == 0)
610 			printf("swpg_io: %x write blk %x+%x\n",
611 			       swp->sw_blocks, swb->swb_block, atop(off));
612 #endif
613 		swb->swb_mask |= (1 << atop(off));
614 		splx(s);
615 	}
616 #ifdef DEBUG
617 	if (swpagerdebug & SDB_IO)
618 		printf("swpg_io: IO start: bp %x, db %x, va %x, pa %x\n",
619 		       bp, swb->swb_block+btodb(off), kva, VM_PAGE_TO_PHYS(m));
620 #endif
621 	VOP_STRATEGY(bp);
622 	if ((flags & (B_READ|B_ASYNC)) == B_ASYNC) {
623 #ifdef DEBUG
624 		if (swpagerdebug & SDB_IO)
625 			printf("swpg_io:  IO started: bp %x\n", bp);
626 #endif
627 		return(VM_PAGER_PEND);
628 	}
629 	s = splbio();
630 #ifdef DEBUG
631 	if (flags & B_READ)
632 		swap_pager_piip++;
633 	else
634 		swap_pager_poip++;
635 #endif
636 	while ((bp->b_flags & B_DONE) == 0) {
637 		assert_wait((int)bp, 0);
638 		thread_block();
639 	}
640 #ifdef DEBUG
641 	if (flags & B_READ)
642 		--swap_pager_piip;
643 	else
644 		--swap_pager_poip;
645 #endif
646 	rv = (bp->b_flags & B_ERROR) ? VM_PAGER_ERROR : VM_PAGER_OK;
647 	bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_PAGET|B_UAREA|B_DIRTY);
648 	bp->b_actf = bswlist.b_actf;
649 	bswlist.b_actf = bp;
650 	if (bp->b_vp)
651 		brelvp(bp);
652 	if (bswlist.b_flags & B_WANTED) {
653 		bswlist.b_flags &= ~B_WANTED;
654 		thread_wakeup((int)&bswlist);
655 	}
656 	if ((flags & B_READ) == 0 && rv == VM_PAGER_OK) {
657 		m->flags |= PG_CLEAN;
658 		pmap_clear_modify(VM_PAGE_TO_PHYS(m));
659 	}
660 	splx(s);
661 #ifdef DEBUG
662 	if (swpagerdebug & SDB_IO)
663 		printf("swpg_io:  IO done: bp %x, rv %d\n", bp, rv);
664 	if ((swpagerdebug & SDB_FAIL) && rv == VM_PAGER_ERROR)
665 		printf("swpg_io: IO error\n");
666 #endif
667 	vm_pager_unmap_page(kva);
668 	return(rv);
669 }
670 
671 static boolean_t
672 swap_pager_clean(m, rw)
673 	vm_page_t m;
674 	int rw;
675 {
676 	register swp_clean_t spc, tspc;
677 	register int s;
678 
679 #ifdef DEBUG
680 	/* save panic time state */
681 	if ((swpagerdebug & SDB_ANOMPANIC) && panicstr)
682 		return (FALSE);			/* ??? */
683 	if (swpagerdebug & SDB_FOLLOW)
684 		printf("swpg_clean(%x, %d)\n", m, rw);
685 #endif
686 	tspc = NULL;
687 	for (;;) {
688 		/*
689 		 * Look up and removal from inuse list must be done
690 		 * at splbio() to avoid conflicts with swap_pager_iodone.
691 		 */
692 		s = splbio();
693 		for (spc = swap_pager_inuse.tqh_first;
694 		     spc != NULL;
695 		     spc = spc->spc_list.tqe_next) {
696 			if ((spc->spc_flags & SPC_DONE) &&
697 			    swap_pager_finish(spc)) {
698 				TAILQ_REMOVE(&swap_pager_inuse, spc, spc_list);
699 				break;
700 			}
701 			if (m && m == spc->spc_m) {
702 #ifdef DEBUG
703 				if (swpagerdebug & SDB_ANOM)
704 					printf("swap_pager_clean: page %x on list, flags %x\n",
705 					       m, spc->spc_flags);
706 #endif
707 				tspc = spc;
708 			}
709 		}
710 
711 		/*
712 		 * No operations done, thats all we can do for now.
713 		 */
714 		if (spc == NULL)
715 			break;
716 		splx(s);
717 
718 		/*
719 		 * The desired page was found to be busy earlier in
720 		 * the scan but has since completed.
721 		 */
722 		if (tspc && tspc == spc) {
723 #ifdef DEBUG
724 			if (swpagerdebug & SDB_ANOM)
725 				printf("swap_pager_clean: page %x done while looking\n",
726 				       m);
727 #endif
728 			tspc = NULL;
729 		}
730 		spc->spc_flags = SPC_FREE;
731 		vm_pager_unmap_page(spc->spc_kva);
732 		TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list);
733 #ifdef DEBUG
734 		if (swpagerdebug & SDB_WRITE)
735 			printf("swpg_clean: free spc %x\n", spc);
736 #endif
737 	}
738 #ifdef DEBUG
739 	/*
740 	 * If we found that the desired page is already being cleaned
741 	 * mark it so that swap_pager_iodone() will not set the clean
742 	 * flag before the pageout daemon has another chance to clean it.
743 	 */
744 	if (tspc && rw == B_WRITE) {
745 		if (swpagerdebug & SDB_ANOM)
746 			printf("swap_pager_clean: page %x on clean list\n",
747 			       tspc);
748 		tspc->spc_flags |= SPC_DIRTY;
749 	}
750 #endif
751 	splx(s);
752 
753 #ifdef DEBUG
754 	if (swpagerdebug & SDB_WRITE)
755 		printf("swpg_clean: return %d\n", tspc ? TRUE : FALSE);
756 	if ((swpagerdebug & SDB_ANOM) && tspc)
757 		printf("swpg_clean: %s of cleaning page %x\n",
758 		       rw == B_READ ? "get" : "put", m);
759 #endif
760 	return(tspc ? TRUE : FALSE);
761 }
762 
763 static int
764 swap_pager_finish(spc)
765 	register swp_clean_t spc;
766 {
767 	vm_object_t object = spc->spc_m->object;
768 
769 	/*
770 	 * Mark the paging operation as done.
771 	 * (XXX) If we cannot get the lock, leave it til later.
772 	 * (XXX) Also we are assuming that an async write is a
773 	 *       pageout operation that has incremented the counter.
774 	 */
775 	if (!vm_object_lock_try(object))
776 		return(0);
777 
778 	if (--object->paging_in_progress == 0)
779 		thread_wakeup((int) object);
780 
781 #ifdef DEBUG
782 	/*
783 	 * XXX: this isn't even close to the right thing to do,
784 	 * introduces a variety of race conditions.
785 	 *
786 	 * If dirty, vm_pageout() has attempted to clean the page
787 	 * again.  In this case we do not do anything as we will
788 	 * see the page again shortly.
789 	 */
790 	if (spc->spc_flags & SPC_DIRTY) {
791 		if (swpagerdebug & SDB_ANOM)
792 			printf("swap_pager_finish: page %x dirty again\n",
793 			       spc->spc_m);
794 		spc->spc_m->flags &= ~PG_BUSY;
795 		PAGE_WAKEUP(spc->spc_m);
796 		vm_object_unlock(object);
797 		return(1);
798 	}
799 #endif
800 	/*
801 	 * If no error mark as clean and inform the pmap system.
802 	 * If error, mark as dirty so we will try again.
803 	 * (XXX could get stuck doing this, should give up after awhile)
804 	 */
805 	if (spc->spc_flags & SPC_ERROR) {
806 		printf("swap_pager_finish: clean of page %x failed\n",
807 		       VM_PAGE_TO_PHYS(spc->spc_m));
808 		spc->spc_m->flags |= PG_LAUNDRY;
809 	} else {
810 		spc->spc_m->flags |= PG_CLEAN;
811 		pmap_clear_modify(VM_PAGE_TO_PHYS(spc->spc_m));
812 	}
813 	spc->spc_m->flags &= ~PG_BUSY;
814 	PAGE_WAKEUP(spc->spc_m);
815 
816 	vm_object_unlock(object);
817 	return(1);
818 }
819 
820 static void
821 swap_pager_iodone(bp)
822 	register struct buf *bp;
823 {
824 	register swp_clean_t spc;
825 	daddr_t blk;
826 	int s;
827 
828 #ifdef DEBUG
829 	/* save panic time state */
830 	if ((swpagerdebug & SDB_ANOMPANIC) && panicstr)
831 		return;
832 	if (swpagerdebug & SDB_FOLLOW)
833 		printf("swpg_iodone(%x)\n", bp);
834 #endif
835 	s = splbio();
836 	for (spc = swap_pager_inuse.tqh_first;
837 	     spc != NULL;
838 	     spc = spc->spc_list.tqe_next)
839 		if (spc->spc_bp == bp)
840 			break;
841 #ifdef DEBUG
842 	if (spc == NULL)
843 		panic("swap_pager_iodone: bp not found");
844 #endif
845 
846 	spc->spc_flags &= ~SPC_BUSY;
847 	spc->spc_flags |= SPC_DONE;
848 	if (bp->b_flags & B_ERROR)
849 		spc->spc_flags |= SPC_ERROR;
850 	spc->spc_bp = NULL;
851 	blk = bp->b_blkno;
852 
853 #ifdef DEBUG
854 	--swap_pager_poip;
855 	if (swpagerdebug & SDB_WRITE)
856 		printf("swpg_iodone: bp=%x swp=%x flags=%x spc=%x poip=%x\n",
857 		       bp, spc->spc_swp, spc->spc_swp->sw_flags,
858 		       spc, spc->spc_swp->sw_poip);
859 #endif
860 
861 	spc->spc_swp->sw_poip--;
862 	if (spc->spc_swp->sw_flags & SW_WANTED) {
863 		spc->spc_swp->sw_flags &= ~SW_WANTED;
864 		thread_wakeup((int)spc->spc_swp);
865 	}
866 
867 	bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_PAGET|B_UAREA|B_DIRTY);
868 	bp->b_actf = bswlist.b_actf;
869 	bswlist.b_actf = bp;
870 	if (bp->b_vp)
871 		brelvp(bp);
872 	if (bswlist.b_flags & B_WANTED) {
873 		bswlist.b_flags &= ~B_WANTED;
874 		thread_wakeup((int)&bswlist);
875 	}
876 	/*
877 	 * Only kick the pageout daemon if we are really hurting
878 	 * for pages, otherwise this page will be picked up later.
879 	 */
880 	if (cnt.v_free_count < cnt.v_free_min)
881 		thread_wakeup((int) &vm_pages_needed);
882 	splx(s);
883 }
884