1 /*
2 * Copyright (c) 1990 University of Utah.
3 * Copyright (c) 1991, 1993
4 * The Regents of the University of California. All rights reserved.
5 *
6 * This code is derived from software contributed to Berkeley by
7 * the Systems Programming Group of the University of Utah Computer
8 * Science Department.
9 *
10 * %sccs.include.redist.c%
11 *
12 * from: Utah $Hdr: swap_pager.c 1.4 91/04/30$
13 *
14 * @(#)swap_pager.c 8.9 (Berkeley) 03/21/94
15 */
16
17 /*
18 * Quick hack to page to dedicated partition(s).
19 * TODO:
20 * Add multiprocessor locks
21 * Deal with async writes in a better fashion
22 */
23
24 #include <sys/param.h>
25 #include <sys/systm.h>
26 #include <sys/proc.h>
27 #include <sys/buf.h>
28 #include <sys/map.h>
29 #include <sys/vnode.h>
30 #include <sys/malloc.h>
31
32 #include <miscfs/specfs/specdev.h>
33
34 #include <vm/vm.h>
35 #include <vm/vm_page.h>
36 #include <vm/vm_pageout.h>
37 #include <vm/swap_pager.h>
38
39 #define NSWSIZES 16 /* size of swtab */
40 #define MAXDADDRS 64 /* max # of disk addrs for fixed allocations */
41 #ifndef NPENDINGIO
42 #define NPENDINGIO 64 /* max # of pending cleans */
43 #endif
44
45 #ifdef DEBUG
46 int swpagerdebug = 0x100;
47 #define SDB_FOLLOW 0x001
48 #define SDB_INIT 0x002
49 #define SDB_ALLOC 0x004
50 #define SDB_IO 0x008
51 #define SDB_WRITE 0x010
52 #define SDB_FAIL 0x020
53 #define SDB_ALLOCBLK 0x040
54 #define SDB_FULL 0x080
55 #define SDB_ANOM 0x100
56 #define SDB_ANOMPANIC 0x200
57 #define SDB_CLUSTER 0x400
58 #define SDB_PARANOIA 0x800
59 #endif
60
61 TAILQ_HEAD(swpclean, swpagerclean);
62
63 struct swpagerclean {
64 TAILQ_ENTRY(swpagerclean) spc_list;
65 int spc_flags;
66 struct buf *spc_bp;
67 sw_pager_t spc_swp;
68 vm_offset_t spc_kva;
69 vm_page_t spc_m;
70 int spc_npages;
71 } swcleanlist[NPENDINGIO];
72 typedef struct swpagerclean *swp_clean_t;
73
74 /* spc_flags values */
75 #define SPC_FREE 0x00
76 #define SPC_BUSY 0x01
77 #define SPC_DONE 0x02
78 #define SPC_ERROR 0x04
79
80 struct swtab {
81 vm_size_t st_osize; /* size of object (bytes) */
82 int st_bsize; /* vs. size of swap block (DEV_BSIZE units) */
83 #ifdef DEBUG
84 u_long st_inuse; /* number in this range in use */
85 u_long st_usecnt; /* total used of this size */
86 #endif
87 } swtab[NSWSIZES+1];
88
89 #ifdef DEBUG
90 int swap_pager_poip; /* pageouts in progress */
91 int swap_pager_piip; /* pageins in progress */
92 #endif
93
94 int swap_pager_maxcluster; /* maximum cluster size */
95 int swap_pager_npendingio; /* number of pager clean structs */
96
97 struct swpclean swap_pager_inuse; /* list of pending page cleans */
98 struct swpclean swap_pager_free; /* list of free pager clean structs */
99 struct pagerlst swap_pager_list; /* list of "named" anon regions */
100
101 static void swap_pager_init __P((void));
102 static vm_pager_t swap_pager_alloc
103 __P((caddr_t, vm_size_t, vm_prot_t, vm_offset_t));
104 static void swap_pager_clean __P((int));
105 #ifdef DEBUG
106 static void swap_pager_clean_check __P((vm_page_t *, int, int));
107 #endif
108 static void swap_pager_cluster
109 __P((vm_pager_t, vm_offset_t,
110 vm_offset_t *, vm_offset_t *));
111 static void swap_pager_dealloc __P((vm_pager_t));
112 static int swap_pager_getpage
113 __P((vm_pager_t, vm_page_t *, int, boolean_t));
114 static boolean_t swap_pager_haspage __P((vm_pager_t, vm_offset_t));
115 static int swap_pager_io __P((sw_pager_t, vm_page_t *, int, int));
116 static void swap_pager_iodone __P((struct buf *));
117 static int swap_pager_putpage
118 __P((vm_pager_t, vm_page_t *, int, boolean_t));
119
120 struct pagerops swappagerops = {
121 swap_pager_init,
122 swap_pager_alloc,
123 swap_pager_dealloc,
124 swap_pager_getpage,
125 swap_pager_putpage,
126 swap_pager_haspage,
127 swap_pager_cluster
128 };
129
130 static void
swap_pager_init()131 swap_pager_init()
132 {
133 register swp_clean_t spc;
134 register int i, bsize;
135 extern int dmmin, dmmax;
136 int maxbsize;
137
138 #ifdef DEBUG
139 if (swpagerdebug & (SDB_FOLLOW|SDB_INIT))
140 printf("swpg_init()\n");
141 #endif
142 dfltpagerops = &swappagerops;
143 TAILQ_INIT(&swap_pager_list);
144
145 /*
146 * Allocate async IO structures.
147 *
148 * XXX it would be nice if we could do this dynamically based on
149 * the value of nswbuf (since we are ultimately limited by that)
150 * but neither nswbuf or malloc has been initialized yet. So the
151 * structs are statically allocated above.
152 */
153 swap_pager_npendingio = NPENDINGIO;
154
155 /*
156 * Initialize clean lists
157 */
158 TAILQ_INIT(&swap_pager_inuse);
159 TAILQ_INIT(&swap_pager_free);
160 for (i = 0, spc = swcleanlist; i < swap_pager_npendingio; i++, spc++) {
161 TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list);
162 spc->spc_flags = SPC_FREE;
163 }
164
165 /*
166 * Calculate the swap allocation constants.
167 */
168 if (dmmin == 0) {
169 dmmin = DMMIN;
170 if (dmmin < CLBYTES/DEV_BSIZE)
171 dmmin = CLBYTES/DEV_BSIZE;
172 }
173 if (dmmax == 0)
174 dmmax = DMMAX;
175
176 /*
177 * Fill in our table of object size vs. allocation size
178 */
179 bsize = btodb(PAGE_SIZE);
180 if (bsize < dmmin)
181 bsize = dmmin;
182 maxbsize = btodb(sizeof(sw_bm_t) * NBBY * PAGE_SIZE);
183 if (maxbsize > dmmax)
184 maxbsize = dmmax;
185 for (i = 0; i < NSWSIZES; i++) {
186 swtab[i].st_osize = (vm_size_t) (MAXDADDRS * dbtob(bsize));
187 swtab[i].st_bsize = bsize;
188 if (bsize <= btodb(MAXPHYS))
189 swap_pager_maxcluster = dbtob(bsize);
190 #ifdef DEBUG
191 if (swpagerdebug & SDB_INIT)
192 printf("swpg_init: ix %d, size %x, bsize %x\n",
193 i, swtab[i].st_osize, swtab[i].st_bsize);
194 #endif
195 if (bsize >= maxbsize)
196 break;
197 bsize *= 2;
198 }
199 swtab[i].st_osize = 0;
200 swtab[i].st_bsize = bsize;
201 }
202
203 /*
204 * Allocate a pager structure and associated resources.
205 * Note that if we are called from the pageout daemon (handle == NULL)
206 * we should not wait for memory as it could resulting in deadlock.
207 */
208 static vm_pager_t
swap_pager_alloc(handle,size,prot,foff)209 swap_pager_alloc(handle, size, prot, foff)
210 caddr_t handle;
211 register vm_size_t size;
212 vm_prot_t prot;
213 vm_offset_t foff;
214 {
215 register vm_pager_t pager;
216 register sw_pager_t swp;
217 struct swtab *swt;
218 int waitok;
219
220 #ifdef DEBUG
221 if (swpagerdebug & (SDB_FOLLOW|SDB_ALLOC))
222 printf("swpg_alloc(%x, %x, %x)\n", handle, size, prot);
223 #endif
224 /*
225 * If this is a "named" anonymous region, look it up and
226 * return the appropriate pager if it exists.
227 */
228 if (handle) {
229 pager = vm_pager_lookup(&swap_pager_list, handle);
230 if (pager != NULL) {
231 /*
232 * Use vm_object_lookup to gain a reference
233 * to the object and also to remove from the
234 * object cache.
235 */
236 if (vm_object_lookup(pager) == NULL)
237 panic("swap_pager_alloc: bad object");
238 return(pager);
239 }
240 }
241 /*
242 * Pager doesn't exist, allocate swap management resources
243 * and initialize.
244 */
245 waitok = handle ? M_WAITOK : M_NOWAIT;
246 pager = (vm_pager_t)malloc(sizeof *pager, M_VMPAGER, waitok);
247 if (pager == NULL)
248 return(NULL);
249 swp = (sw_pager_t)malloc(sizeof *swp, M_VMPGDATA, waitok);
250 if (swp == NULL) {
251 #ifdef DEBUG
252 if (swpagerdebug & SDB_FAIL)
253 printf("swpg_alloc: swpager malloc failed\n");
254 #endif
255 free((caddr_t)pager, M_VMPAGER);
256 return(NULL);
257 }
258 size = round_page(size);
259 for (swt = swtab; swt->st_osize; swt++)
260 if (size <= swt->st_osize)
261 break;
262 #ifdef DEBUG
263 swt->st_inuse++;
264 swt->st_usecnt++;
265 #endif
266 swp->sw_osize = size;
267 swp->sw_bsize = swt->st_bsize;
268 swp->sw_nblocks = (btodb(size) + swp->sw_bsize - 1) / swp->sw_bsize;
269 swp->sw_blocks = (sw_blk_t)
270 malloc(swp->sw_nblocks*sizeof(*swp->sw_blocks),
271 M_VMPGDATA, M_NOWAIT);
272 if (swp->sw_blocks == NULL) {
273 free((caddr_t)swp, M_VMPGDATA);
274 free((caddr_t)pager, M_VMPAGER);
275 #ifdef DEBUG
276 if (swpagerdebug & SDB_FAIL)
277 printf("swpg_alloc: sw_blocks malloc failed\n");
278 swt->st_inuse--;
279 swt->st_usecnt--;
280 #endif
281 return(FALSE);
282 }
283 bzero((caddr_t)swp->sw_blocks,
284 swp->sw_nblocks * sizeof(*swp->sw_blocks));
285 swp->sw_poip = 0;
286 if (handle) {
287 vm_object_t object;
288
289 swp->sw_flags = SW_NAMED;
290 TAILQ_INSERT_TAIL(&swap_pager_list, pager, pg_list);
291 /*
292 * Consistant with other pagers: return with object
293 * referenced. Can't do this with handle == NULL
294 * since it might be the pageout daemon calling.
295 */
296 object = vm_object_allocate(size);
297 vm_object_enter(object, pager);
298 vm_object_setpager(object, pager, 0, FALSE);
299 } else {
300 swp->sw_flags = 0;
301 pager->pg_list.tqe_next = NULL;
302 pager->pg_list.tqe_prev = NULL;
303 }
304 pager->pg_handle = handle;
305 pager->pg_ops = &swappagerops;
306 pager->pg_type = PG_SWAP;
307 pager->pg_flags = PG_CLUSTERPUT;
308 pager->pg_data = swp;
309
310 #ifdef DEBUG
311 if (swpagerdebug & SDB_ALLOC)
312 printf("swpg_alloc: pg_data %x, %x of %x at %x\n",
313 swp, swp->sw_nblocks, swp->sw_bsize, swp->sw_blocks);
314 #endif
315 return(pager);
316 }
317
318 static void
swap_pager_dealloc(pager)319 swap_pager_dealloc(pager)
320 vm_pager_t pager;
321 {
322 register int i;
323 register sw_blk_t bp;
324 register sw_pager_t swp;
325 struct swtab *swt;
326 int s;
327
328 #ifdef DEBUG
329 /* save panic time state */
330 if ((swpagerdebug & SDB_ANOMPANIC) && panicstr)
331 return;
332 if (swpagerdebug & (SDB_FOLLOW|SDB_ALLOC))
333 printf("swpg_dealloc(%x)\n", pager);
334 #endif
335 /*
336 * Remove from list right away so lookups will fail if we
337 * block for pageout completion.
338 */
339 swp = (sw_pager_t) pager->pg_data;
340 if (swp->sw_flags & SW_NAMED) {
341 TAILQ_REMOVE(&swap_pager_list, pager, pg_list);
342 swp->sw_flags &= ~SW_NAMED;
343 }
344 #ifdef DEBUG
345 for (swt = swtab; swt->st_osize; swt++)
346 if (swp->sw_osize <= swt->st_osize)
347 break;
348 swt->st_inuse--;
349 #endif
350
351 /*
352 * Wait for all pageouts to finish and remove
353 * all entries from cleaning list.
354 */
355 s = splbio();
356 while (swp->sw_poip) {
357 swp->sw_flags |= SW_WANTED;
358 (void) tsleep(swp, PVM, "swpgdealloc", 0);
359 }
360 splx(s);
361 swap_pager_clean(B_WRITE);
362
363 /*
364 * Free left over swap blocks
365 */
366 for (i = 0, bp = swp->sw_blocks; i < swp->sw_nblocks; i++, bp++)
367 if (bp->swb_block) {
368 #ifdef DEBUG
369 if (swpagerdebug & (SDB_ALLOCBLK|SDB_FULL))
370 printf("swpg_dealloc: blk %x\n",
371 bp->swb_block);
372 #endif
373 rmfree(swapmap, swp->sw_bsize, bp->swb_block);
374 }
375 /*
376 * Free swap management resources
377 */
378 free((caddr_t)swp->sw_blocks, M_VMPGDATA);
379 free((caddr_t)swp, M_VMPGDATA);
380 free((caddr_t)pager, M_VMPAGER);
381 }
382
383 static int
swap_pager_getpage(pager,mlist,npages,sync)384 swap_pager_getpage(pager, mlist, npages, sync)
385 vm_pager_t pager;
386 vm_page_t *mlist;
387 int npages;
388 boolean_t sync;
389 {
390 #ifdef DEBUG
391 if (swpagerdebug & SDB_FOLLOW)
392 printf("swpg_getpage(%x, %x, %x, %x)\n",
393 pager, mlist, npages, sync);
394 #endif
395 return(swap_pager_io((sw_pager_t)pager->pg_data,
396 mlist, npages, B_READ));
397 }
398
399 static int
swap_pager_putpage(pager,mlist,npages,sync)400 swap_pager_putpage(pager, mlist, npages, sync)
401 vm_pager_t pager;
402 vm_page_t *mlist;
403 int npages;
404 boolean_t sync;
405 {
406 int flags;
407
408 #ifdef DEBUG
409 if (swpagerdebug & SDB_FOLLOW)
410 printf("swpg_putpage(%x, %x, %x, %x)\n",
411 pager, mlist, npages, sync);
412 #endif
413 if (pager == NULL) {
414 swap_pager_clean(B_WRITE);
415 return (VM_PAGER_OK); /* ??? */
416 }
417 flags = B_WRITE;
418 if (!sync)
419 flags |= B_ASYNC;
420 return(swap_pager_io((sw_pager_t)pager->pg_data,
421 mlist, npages, flags));
422 }
423
424 static boolean_t
swap_pager_haspage(pager,offset)425 swap_pager_haspage(pager, offset)
426 vm_pager_t pager;
427 vm_offset_t offset;
428 {
429 register sw_pager_t swp;
430 register sw_blk_t swb;
431 int ix;
432
433 #ifdef DEBUG
434 if (swpagerdebug & (SDB_FOLLOW|SDB_ALLOCBLK))
435 printf("swpg_haspage(%x, %x) ", pager, offset);
436 #endif
437 swp = (sw_pager_t) pager->pg_data;
438 ix = offset / dbtob(swp->sw_bsize);
439 if (swp->sw_blocks == NULL || ix >= swp->sw_nblocks) {
440 #ifdef DEBUG
441 if (swpagerdebug & (SDB_FAIL|SDB_FOLLOW|SDB_ALLOCBLK))
442 printf("swpg_haspage: %x bad offset %x, ix %x\n",
443 swp->sw_blocks, offset, ix);
444 #endif
445 return(FALSE);
446 }
447 swb = &swp->sw_blocks[ix];
448 if (swb->swb_block)
449 ix = atop(offset % dbtob(swp->sw_bsize));
450 #ifdef DEBUG
451 if (swpagerdebug & SDB_ALLOCBLK)
452 printf("%x blk %x+%x ", swp->sw_blocks, swb->swb_block, ix);
453 if (swpagerdebug & (SDB_FOLLOW|SDB_ALLOCBLK))
454 printf("-> %c\n",
455 "FT"[swb->swb_block && (swb->swb_mask & (1 << ix))]);
456 #endif
457 if (swb->swb_block && (swb->swb_mask & (1 << ix)))
458 return(TRUE);
459 return(FALSE);
460 }
461
462 static void
swap_pager_cluster(pager,offset,loffset,hoffset)463 swap_pager_cluster(pager, offset, loffset, hoffset)
464 vm_pager_t pager;
465 vm_offset_t offset;
466 vm_offset_t *loffset;
467 vm_offset_t *hoffset;
468 {
469 sw_pager_t swp;
470 register int bsize;
471 vm_offset_t loff, hoff;
472
473 #ifdef DEBUG
474 if (swpagerdebug & (SDB_FOLLOW|SDB_CLUSTER))
475 printf("swpg_cluster(%x, %x) ", pager, offset);
476 #endif
477 swp = (sw_pager_t) pager->pg_data;
478 bsize = dbtob(swp->sw_bsize);
479 if (bsize > swap_pager_maxcluster)
480 bsize = swap_pager_maxcluster;
481
482 loff = offset - (offset % bsize);
483 if (loff >= swp->sw_osize)
484 panic("swap_pager_cluster: bad offset");
485
486 hoff = loff + bsize;
487 if (hoff > swp->sw_osize)
488 hoff = swp->sw_osize;
489
490 *loffset = loff;
491 *hoffset = hoff;
492 #ifdef DEBUG
493 if (swpagerdebug & (SDB_FOLLOW|SDB_CLUSTER))
494 printf("returns [%x-%x]\n", loff, hoff);
495 #endif
496 }
497
498 /*
499 * Scaled down version of swap().
500 * Assumes that PAGE_SIZE < MAXPHYS; i.e. only one operation needed.
501 * BOGUS: lower level IO routines expect a KVA so we have to map our
502 * provided physical page into the KVA to keep them happy.
503 */
504 static int
swap_pager_io(swp,mlist,npages,flags)505 swap_pager_io(swp, mlist, npages, flags)
506 register sw_pager_t swp;
507 vm_page_t *mlist;
508 int npages;
509 int flags;
510 {
511 register struct buf *bp;
512 register sw_blk_t swb;
513 register int s;
514 int ix, mask;
515 boolean_t rv;
516 vm_offset_t kva, off;
517 swp_clean_t spc;
518 vm_page_t m;
519
520 #ifdef DEBUG
521 /* save panic time state */
522 if ((swpagerdebug & SDB_ANOMPANIC) && panicstr)
523 return (VM_PAGER_FAIL); /* XXX: correct return? */
524 if (swpagerdebug & (SDB_FOLLOW|SDB_IO))
525 printf("swpg_io(%x, %x, %x, %x)\n", swp, mlist, npages, flags);
526 if (flags & B_READ) {
527 if (flags & B_ASYNC)
528 panic("swap_pager_io: cannot do ASYNC reads");
529 if (npages != 1)
530 panic("swap_pager_io: cannot do clustered reads");
531 }
532 #endif
533
534 /*
535 * First determine if the page exists in the pager if this is
536 * a sync read. This quickly handles cases where we are
537 * following shadow chains looking for the top level object
538 * with the page.
539 */
540 m = *mlist;
541 off = m->offset + m->object->paging_offset;
542 ix = off / dbtob(swp->sw_bsize);
543 if (swp->sw_blocks == NULL || ix >= swp->sw_nblocks) {
544 #ifdef DEBUG
545 if ((flags & B_READ) == 0 && (swpagerdebug & SDB_ANOM)) {
546 printf("swap_pager_io: no swap block on write\n");
547 return(VM_PAGER_BAD);
548 }
549 #endif
550 return(VM_PAGER_FAIL);
551 }
552 swb = &swp->sw_blocks[ix];
553 off = off % dbtob(swp->sw_bsize);
554 if ((flags & B_READ) &&
555 (swb->swb_block == 0 || (swb->swb_mask & (1 << atop(off))) == 0))
556 return(VM_PAGER_FAIL);
557
558 /*
559 * For reads (pageins) and synchronous writes, we clean up
560 * all completed async pageouts.
561 */
562 if ((flags & B_ASYNC) == 0) {
563 s = splbio();
564 swap_pager_clean(flags&B_READ);
565 #ifdef DEBUG
566 if (swpagerdebug & SDB_PARANOIA)
567 swap_pager_clean_check(mlist, npages, flags&B_READ);
568 #endif
569 splx(s);
570 }
571 /*
572 * For async writes (pageouts), we cleanup completed pageouts so
573 * that all available resources are freed. Also tells us if this
574 * page is already being cleaned. If it is, or no resources
575 * are available, we try again later.
576 */
577 else {
578 swap_pager_clean(B_WRITE);
579 #ifdef DEBUG
580 if (swpagerdebug & SDB_PARANOIA)
581 swap_pager_clean_check(mlist, npages, B_WRITE);
582 #endif
583 if (swap_pager_free.tqh_first == NULL) {
584 #ifdef DEBUG
585 if (swpagerdebug & SDB_FAIL)
586 printf("%s: no available io headers\n",
587 "swap_pager_io");
588 #endif
589 return(VM_PAGER_AGAIN);
590 }
591 }
592
593 /*
594 * Allocate a swap block if necessary.
595 */
596 if (swb->swb_block == 0) {
597 swb->swb_block = rmalloc(swapmap, swp->sw_bsize);
598 if (swb->swb_block == 0) {
599 #ifdef DEBUG
600 if (swpagerdebug & SDB_FAIL)
601 printf("swpg_io: rmalloc of %x failed\n",
602 swp->sw_bsize);
603 #endif
604 /*
605 * XXX this is technically a resource shortage that
606 * should return AGAIN, but the situation isn't likely
607 * to be remedied just by delaying a little while and
608 * trying again (the pageout daemon's current response
609 * to AGAIN) so we just return FAIL.
610 */
611 return(VM_PAGER_FAIL);
612 }
613 #ifdef DEBUG
614 if (swpagerdebug & (SDB_FULL|SDB_ALLOCBLK))
615 printf("swpg_io: %x alloc blk %x at ix %x\n",
616 swp->sw_blocks, swb->swb_block, ix);
617 #endif
618 }
619
620 /*
621 * Allocate a kernel virtual address and initialize so that PTE
622 * is available for lower level IO drivers.
623 */
624 kva = vm_pager_map_pages(mlist, npages, !(flags & B_ASYNC));
625 if (kva == NULL) {
626 #ifdef DEBUG
627 if (swpagerdebug & SDB_FAIL)
628 printf("%s: no KVA space to map pages\n",
629 "swap_pager_io");
630 #endif
631 return(VM_PAGER_AGAIN);
632 }
633
634 /*
635 * Get a swap buffer header and initialize it.
636 */
637 s = splbio();
638 while (bswlist.b_actf == NULL) {
639 #ifdef DEBUG
640 if (swpagerdebug & SDB_ANOM)
641 printf("swap_pager_io: wait on swbuf for %x (%d)\n",
642 m, flags);
643 #endif
644 bswlist.b_flags |= B_WANTED;
645 tsleep((caddr_t)&bswlist, PSWP+1, "swpgiobuf", 0);
646 }
647 bp = bswlist.b_actf;
648 bswlist.b_actf = bp->b_actf;
649 splx(s);
650 bp->b_flags = B_BUSY | (flags & B_READ);
651 bp->b_proc = &proc0; /* XXX (but without B_PHYS set this is ok) */
652 bp->b_data = (caddr_t)kva;
653 bp->b_blkno = swb->swb_block + btodb(off);
654 VHOLD(swapdev_vp);
655 bp->b_vp = swapdev_vp;
656 if (swapdev_vp->v_type == VBLK)
657 bp->b_dev = swapdev_vp->v_rdev;
658 bp->b_bcount = npages * PAGE_SIZE;
659
660 /*
661 * For writes we set up additional buffer fields, record a pageout
662 * in progress and mark that these swap blocks are now allocated.
663 */
664 if ((bp->b_flags & B_READ) == 0) {
665 bp->b_dirtyoff = 0;
666 bp->b_dirtyend = npages * PAGE_SIZE;
667 swapdev_vp->v_numoutput++;
668 s = splbio();
669 swp->sw_poip++;
670 splx(s);
671 mask = (~(~0 << npages)) << atop(off);
672 #ifdef DEBUG
673 swap_pager_poip++;
674 if (swpagerdebug & SDB_WRITE)
675 printf("swpg_io: write: bp=%x swp=%x poip=%d\n",
676 bp, swp, swp->sw_poip);
677 if ((swpagerdebug & SDB_ALLOCBLK) &&
678 (swb->swb_mask & mask) != mask)
679 printf("swpg_io: %x write %d pages at %x+%x\n",
680 swp->sw_blocks, npages, swb->swb_block,
681 atop(off));
682 if (swpagerdebug & SDB_CLUSTER)
683 printf("swpg_io: off=%x, npg=%x, mask=%x, bmask=%x\n",
684 off, npages, mask, swb->swb_mask);
685 #endif
686 swb->swb_mask |= mask;
687 }
688 /*
689 * If this is an async write we set up still more buffer fields
690 * and place a "cleaning" entry on the inuse queue.
691 */
692 if ((flags & (B_READ|B_ASYNC)) == B_ASYNC) {
693 #ifdef DEBUG
694 if (swap_pager_free.tqh_first == NULL)
695 panic("swpg_io: lost spc");
696 #endif
697 spc = swap_pager_free.tqh_first;
698 TAILQ_REMOVE(&swap_pager_free, spc, spc_list);
699 #ifdef DEBUG
700 if (spc->spc_flags != SPC_FREE)
701 panic("swpg_io: bad free spc");
702 #endif
703 spc->spc_flags = SPC_BUSY;
704 spc->spc_bp = bp;
705 spc->spc_swp = swp;
706 spc->spc_kva = kva;
707 /*
708 * Record the first page. This allows swap_pager_clean
709 * to efficiently handle the common case of a single page.
710 * For clusters, it allows us to locate the object easily
711 * and we then reconstruct the rest of the mlist from spc_kva.
712 */
713 spc->spc_m = m;
714 spc->spc_npages = npages;
715 bp->b_flags |= B_CALL;
716 bp->b_iodone = swap_pager_iodone;
717 s = splbio();
718 TAILQ_INSERT_TAIL(&swap_pager_inuse, spc, spc_list);
719 splx(s);
720 }
721
722 /*
723 * Finally, start the IO operation.
724 * If it is async we are all done, otherwise we must wait for
725 * completion and cleanup afterwards.
726 */
727 #ifdef DEBUG
728 if (swpagerdebug & SDB_IO)
729 printf("swpg_io: IO start: bp %x, db %x, va %x, pa %x\n",
730 bp, swb->swb_block+btodb(off), kva, VM_PAGE_TO_PHYS(m));
731 #endif
732 VOP_STRATEGY(bp);
733 if ((flags & (B_READ|B_ASYNC)) == B_ASYNC) {
734 #ifdef DEBUG
735 if (swpagerdebug & SDB_IO)
736 printf("swpg_io: IO started: bp %x\n", bp);
737 #endif
738 return(VM_PAGER_PEND);
739 }
740 s = splbio();
741 #ifdef DEBUG
742 if (flags & B_READ)
743 swap_pager_piip++;
744 else
745 swap_pager_poip++;
746 #endif
747 while ((bp->b_flags & B_DONE) == 0)
748 (void) tsleep(bp, PVM, "swpgio", 0);
749 if ((flags & B_READ) == 0)
750 --swp->sw_poip;
751 #ifdef DEBUG
752 if (flags & B_READ)
753 --swap_pager_piip;
754 else
755 --swap_pager_poip;
756 #endif
757 rv = (bp->b_flags & B_ERROR) ? VM_PAGER_ERROR : VM_PAGER_OK;
758 bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_PAGET|B_UAREA|B_DIRTY);
759 bp->b_actf = bswlist.b_actf;
760 bswlist.b_actf = bp;
761 if (bp->b_vp)
762 brelvp(bp);
763 if (bswlist.b_flags & B_WANTED) {
764 bswlist.b_flags &= ~B_WANTED;
765 wakeup(&bswlist);
766 }
767 if ((flags & B_READ) == 0 && rv == VM_PAGER_OK) {
768 m->flags |= PG_CLEAN;
769 pmap_clear_modify(VM_PAGE_TO_PHYS(m));
770 }
771 splx(s);
772 #ifdef DEBUG
773 if (swpagerdebug & SDB_IO)
774 printf("swpg_io: IO done: bp %x, rv %d\n", bp, rv);
775 if ((swpagerdebug & SDB_FAIL) && rv == VM_PAGER_ERROR)
776 printf("swpg_io: IO error\n");
777 #endif
778 vm_pager_unmap_pages(kva, npages);
779 return(rv);
780 }
781
782 static void
swap_pager_clean(rw)783 swap_pager_clean(rw)
784 int rw;
785 {
786 register swp_clean_t spc;
787 register int s, i;
788 vm_object_t object;
789 vm_page_t m;
790
791 #ifdef DEBUG
792 /* save panic time state */
793 if ((swpagerdebug & SDB_ANOMPANIC) && panicstr)
794 return;
795 if (swpagerdebug & SDB_FOLLOW)
796 printf("swpg_clean(%x)\n", rw);
797 #endif
798
799 for (;;) {
800 /*
801 * Look up and removal from inuse list must be done
802 * at splbio() to avoid conflicts with swap_pager_iodone.
803 */
804 s = splbio();
805 for (spc = swap_pager_inuse.tqh_first;
806 spc != NULL;
807 spc = spc->spc_list.tqe_next) {
808 /*
809 * If the operation is done, remove it from the
810 * list and process it.
811 *
812 * XXX if we can't get the object lock we also
813 * leave it on the list and try again later.
814 * Is there something better we could do?
815 */
816 if ((spc->spc_flags & SPC_DONE) &&
817 vm_object_lock_try(spc->spc_m->object)) {
818 TAILQ_REMOVE(&swap_pager_inuse, spc, spc_list);
819 break;
820 }
821 }
822 splx(s);
823
824 /*
825 * No operations done, thats all we can do for now.
826 */
827 if (spc == NULL)
828 break;
829
830 /*
831 * Found a completed operation so finish it off.
832 * Note: no longer at splbio since entry is off the list.
833 */
834 m = spc->spc_m;
835 object = m->object;
836
837 /*
838 * Process each page in the cluster.
839 * The first page is explicitly kept in the cleaning
840 * entry, others must be reconstructed from the KVA.
841 */
842 for (i = 0; i < spc->spc_npages; i++) {
843 if (i)
844 m = vm_pager_atop(spc->spc_kva + ptoa(i));
845 /*
846 * If no error mark as clean and inform the pmap
847 * system. If there was an error, mark as dirty
848 * so we will try again.
849 *
850 * XXX could get stuck doing this, should give up
851 * after awhile.
852 */
853 if (spc->spc_flags & SPC_ERROR) {
854 printf("%s: clean of page %x failed\n",
855 "swap_pager_clean",
856 VM_PAGE_TO_PHYS(m));
857 m->flags |= PG_LAUNDRY;
858 } else {
859 m->flags |= PG_CLEAN;
860 pmap_clear_modify(VM_PAGE_TO_PHYS(m));
861 }
862 m->flags &= ~PG_BUSY;
863 PAGE_WAKEUP(m);
864 }
865
866 /*
867 * Done with the object, decrement the paging count
868 * and unlock it.
869 */
870 if (--object->paging_in_progress == 0)
871 wakeup(object);
872 vm_object_unlock(object);
873
874 /*
875 * Free up KVM used and put the entry back on the list.
876 */
877 vm_pager_unmap_pages(spc->spc_kva, spc->spc_npages);
878 spc->spc_flags = SPC_FREE;
879 TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list);
880 #ifdef DEBUG
881 if (swpagerdebug & SDB_WRITE)
882 printf("swpg_clean: free spc %x\n", spc);
883 #endif
884 }
885 }
886
887 #ifdef DEBUG
888 static void
swap_pager_clean_check(mlist,npages,rw)889 swap_pager_clean_check(mlist, npages, rw)
890 vm_page_t *mlist;
891 int npages;
892 int rw;
893 {
894 register swp_clean_t spc;
895 boolean_t bad;
896 int i, j, s;
897 vm_page_t m;
898
899 if (panicstr)
900 return;
901
902 bad = FALSE;
903 s = splbio();
904 for (spc = swap_pager_inuse.tqh_first;
905 spc != NULL;
906 spc = spc->spc_list.tqe_next) {
907 for (j = 0; j < spc->spc_npages; j++) {
908 m = vm_pager_atop(spc->spc_kva + ptoa(j));
909 for (i = 0; i < npages; i++)
910 if (m == mlist[i]) {
911 if (swpagerdebug & SDB_ANOM)
912 printf(
913 "swpg_clean_check: %s: page %x on list, flags %x\n",
914 rw == B_WRITE ? "write" : "read", mlist[i], spc->spc_flags);
915 bad = TRUE;
916 }
917 }
918 }
919 splx(s);
920 if (bad)
921 panic("swpg_clean_check");
922 }
923 #endif
924
925 static void
swap_pager_iodone(bp)926 swap_pager_iodone(bp)
927 register struct buf *bp;
928 {
929 register swp_clean_t spc;
930 daddr_t blk;
931 int s;
932
933 #ifdef DEBUG
934 /* save panic time state */
935 if ((swpagerdebug & SDB_ANOMPANIC) && panicstr)
936 return;
937 if (swpagerdebug & SDB_FOLLOW)
938 printf("swpg_iodone(%x)\n", bp);
939 #endif
940 s = splbio();
941 for (spc = swap_pager_inuse.tqh_first;
942 spc != NULL;
943 spc = spc->spc_list.tqe_next)
944 if (spc->spc_bp == bp)
945 break;
946 #ifdef DEBUG
947 if (spc == NULL)
948 panic("swap_pager_iodone: bp not found");
949 #endif
950
951 spc->spc_flags &= ~SPC_BUSY;
952 spc->spc_flags |= SPC_DONE;
953 if (bp->b_flags & B_ERROR)
954 spc->spc_flags |= SPC_ERROR;
955 spc->spc_bp = NULL;
956 blk = bp->b_blkno;
957
958 #ifdef DEBUG
959 --swap_pager_poip;
960 if (swpagerdebug & SDB_WRITE)
961 printf("swpg_iodone: bp=%x swp=%x flags=%x spc=%x poip=%x\n",
962 bp, spc->spc_swp, spc->spc_swp->sw_flags,
963 spc, spc->spc_swp->sw_poip);
964 #endif
965
966 spc->spc_swp->sw_poip--;
967 if (spc->spc_swp->sw_flags & SW_WANTED) {
968 spc->spc_swp->sw_flags &= ~SW_WANTED;
969 wakeup(spc->spc_swp);
970 }
971
972 bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_PAGET|B_UAREA|B_DIRTY);
973 bp->b_actf = bswlist.b_actf;
974 bswlist.b_actf = bp;
975 if (bp->b_vp)
976 brelvp(bp);
977 if (bswlist.b_flags & B_WANTED) {
978 bswlist.b_flags &= ~B_WANTED;
979 wakeup(&bswlist);
980 }
981 wakeup(&vm_pages_needed);
982 splx(s);
983 }
984