1 /* 2 * Copyright (c) 1990 University of Utah. 3 * Copyright (c) 1991, 1993 4 * The Regents of the University of California. All rights reserved. 5 * 6 * This code is derived from software contributed to Berkeley by 7 * the Systems Programming Group of the University of Utah Computer 8 * Science Department. 9 * 10 * %sccs.include.redist.c% 11 * 12 * from: Utah $Hdr: swap_pager.c 1.4 91/04/30$ 13 * 14 * @(#)swap_pager.c 8.8 (Berkeley) 01/13/94 15 */ 16 17 /* 18 * Quick hack to page to dedicated partition(s). 19 * TODO: 20 * Add multiprocessor locks 21 * Deal with async writes in a better fashion 22 */ 23 24 #include <sys/param.h> 25 #include <sys/systm.h> 26 #include <sys/proc.h> 27 #include <sys/buf.h> 28 #include <sys/map.h> 29 #include <sys/vnode.h> 30 #include <sys/malloc.h> 31 32 #include <miscfs/specfs/specdev.h> 33 34 #include <vm/vm.h> 35 #include <vm/vm_page.h> 36 #include <vm/vm_pageout.h> 37 #include <vm/swap_pager.h> 38 39 #define NSWSIZES 16 /* size of swtab */ 40 #define MAXDADDRS 64 /* max # of disk addrs for fixed allocations */ 41 #ifndef NPENDINGIO 42 #define NPENDINGIO 64 /* max # of pending cleans */ 43 #endif 44 45 #ifdef DEBUG 46 int swpagerdebug = 0x100; 47 #define SDB_FOLLOW 0x001 48 #define SDB_INIT 0x002 49 #define SDB_ALLOC 0x004 50 #define SDB_IO 0x008 51 #define SDB_WRITE 0x010 52 #define SDB_FAIL 0x020 53 #define SDB_ALLOCBLK 0x040 54 #define SDB_FULL 0x080 55 #define SDB_ANOM 0x100 56 #define SDB_ANOMPANIC 0x200 57 #define SDB_CLUSTER 0x400 58 #define SDB_PARANOIA 0x800 59 #endif 60 61 TAILQ_HEAD(swpclean, swpagerclean); 62 63 struct swpagerclean { 64 TAILQ_ENTRY(swpagerclean) spc_list; 65 int spc_flags; 66 struct buf *spc_bp; 67 sw_pager_t spc_swp; 68 vm_offset_t spc_kva; 69 vm_page_t spc_m; 70 int spc_npages; 71 } swcleanlist[NPENDINGIO]; 72 typedef struct swpagerclean *swp_clean_t; 73 74 /* spc_flags values */ 75 #define SPC_FREE 0x00 76 #define SPC_BUSY 0x01 77 #define SPC_DONE 0x02 78 #define SPC_ERROR 0x04 79 80 struct swtab { 81 vm_size_t st_osize; /* size of object (bytes) */ 82 int st_bsize; /* vs. size of swap block (DEV_BSIZE units) */ 83 #ifdef DEBUG 84 u_long st_inuse; /* number in this range in use */ 85 u_long st_usecnt; /* total used of this size */ 86 #endif 87 } swtab[NSWSIZES+1]; 88 89 #ifdef DEBUG 90 int swap_pager_poip; /* pageouts in progress */ 91 int swap_pager_piip; /* pageins in progress */ 92 #endif 93 94 int swap_pager_maxcluster; /* maximum cluster size */ 95 int swap_pager_npendingio; /* number of pager clean structs */ 96 97 struct swpclean swap_pager_inuse; /* list of pending page cleans */ 98 struct swpclean swap_pager_free; /* list of free pager clean structs */ 99 struct pagerlst swap_pager_list; /* list of "named" anon regions */ 100 101 static void swap_pager_init __P((void)); 102 static vm_pager_t swap_pager_alloc 103 __P((caddr_t, vm_size_t, vm_prot_t, vm_offset_t)); 104 static void swap_pager_clean __P((int)); 105 #ifdef DEBUG 106 static void swap_pager_clean_check __P((vm_page_t *, int, int)); 107 #endif 108 static void swap_pager_cluster 109 __P((vm_pager_t, vm_offset_t, 110 vm_offset_t *, vm_offset_t *)); 111 static void swap_pager_dealloc __P((vm_pager_t)); 112 static int swap_pager_getpage 113 __P((vm_pager_t, vm_page_t *, int, boolean_t)); 114 static boolean_t swap_pager_haspage __P((vm_pager_t, vm_offset_t)); 115 static int swap_pager_io __P((sw_pager_t, vm_page_t *, int, int)); 116 static void swap_pager_iodone __P((struct buf *)); 117 static int swap_pager_putpage 118 __P((vm_pager_t, vm_page_t *, int, boolean_t)); 119 120 struct pagerops swappagerops = { 121 swap_pager_init, 122 swap_pager_alloc, 123 swap_pager_dealloc, 124 swap_pager_getpage, 125 swap_pager_putpage, 126 swap_pager_haspage, 127 swap_pager_cluster 128 }; 129 130 static void 131 swap_pager_init() 132 { 133 register swp_clean_t spc; 134 register int i, bsize; 135 extern int dmmin, dmmax; 136 int maxbsize; 137 138 #ifdef DEBUG 139 if (swpagerdebug & (SDB_FOLLOW|SDB_INIT)) 140 printf("swpg_init()\n"); 141 #endif 142 dfltpagerops = &swappagerops; 143 TAILQ_INIT(&swap_pager_list); 144 145 /* 146 * Allocate async IO structures. 147 * 148 * XXX it would be nice if we could do this dynamically based on 149 * the value of nswbuf (since we are ultimately limited by that) 150 * but neither nswbuf or malloc has been initialized yet. So the 151 * structs are statically allocated above. 152 */ 153 swap_pager_npendingio = NPENDINGIO; 154 155 /* 156 * Initialize clean lists 157 */ 158 TAILQ_INIT(&swap_pager_inuse); 159 TAILQ_INIT(&swap_pager_free); 160 for (i = 0, spc = swcleanlist; i < swap_pager_npendingio; i++, spc++) { 161 TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list); 162 spc->spc_flags = SPC_FREE; 163 } 164 165 /* 166 * Calculate the swap allocation constants. 167 */ 168 if (dmmin == 0) { 169 dmmin = DMMIN; 170 if (dmmin < CLBYTES/DEV_BSIZE) 171 dmmin = CLBYTES/DEV_BSIZE; 172 } 173 if (dmmax == 0) 174 dmmax = DMMAX; 175 176 /* 177 * Fill in our table of object size vs. allocation size 178 */ 179 bsize = btodb(PAGE_SIZE); 180 if (bsize < dmmin) 181 bsize = dmmin; 182 maxbsize = btodb(sizeof(sw_bm_t) * NBBY * PAGE_SIZE); 183 if (maxbsize > dmmax) 184 maxbsize = dmmax; 185 for (i = 0; i < NSWSIZES; i++) { 186 swtab[i].st_osize = (vm_size_t) (MAXDADDRS * dbtob(bsize)); 187 swtab[i].st_bsize = bsize; 188 if (bsize <= btodb(MAXPHYS)) 189 swap_pager_maxcluster = dbtob(bsize); 190 #ifdef DEBUG 191 if (swpagerdebug & SDB_INIT) 192 printf("swpg_init: ix %d, size %x, bsize %x\n", 193 i, swtab[i].st_osize, swtab[i].st_bsize); 194 #endif 195 if (bsize >= maxbsize) 196 break; 197 bsize *= 2; 198 } 199 swtab[i].st_osize = 0; 200 swtab[i].st_bsize = bsize; 201 } 202 203 /* 204 * Allocate a pager structure and associated resources. 205 * Note that if we are called from the pageout daemon (handle == NULL) 206 * we should not wait for memory as it could resulting in deadlock. 207 */ 208 static vm_pager_t 209 swap_pager_alloc(handle, size, prot, foff) 210 caddr_t handle; 211 register vm_size_t size; 212 vm_prot_t prot; 213 vm_offset_t foff; 214 { 215 register vm_pager_t pager; 216 register sw_pager_t swp; 217 struct swtab *swt; 218 int waitok; 219 220 #ifdef DEBUG 221 if (swpagerdebug & (SDB_FOLLOW|SDB_ALLOC)) 222 printf("swpg_alloc(%x, %x, %x)\n", handle, size, prot); 223 #endif 224 /* 225 * If this is a "named" anonymous region, look it up and 226 * return the appropriate pager if it exists. 227 */ 228 if (handle) { 229 pager = vm_pager_lookup(&swap_pager_list, handle); 230 if (pager != NULL) { 231 /* 232 * Use vm_object_lookup to gain a reference 233 * to the object and also to remove from the 234 * object cache. 235 */ 236 if (vm_object_lookup(pager) == NULL) 237 panic("swap_pager_alloc: bad object"); 238 return(pager); 239 } 240 } 241 /* 242 * Pager doesn't exist, allocate swap management resources 243 * and initialize. 244 */ 245 waitok = handle ? M_WAITOK : M_NOWAIT; 246 pager = (vm_pager_t)malloc(sizeof *pager, M_VMPAGER, waitok); 247 if (pager == NULL) 248 return(NULL); 249 swp = (sw_pager_t)malloc(sizeof *swp, M_VMPGDATA, waitok); 250 if (swp == NULL) { 251 #ifdef DEBUG 252 if (swpagerdebug & SDB_FAIL) 253 printf("swpg_alloc: swpager malloc failed\n"); 254 #endif 255 free((caddr_t)pager, M_VMPAGER); 256 return(NULL); 257 } 258 size = round_page(size); 259 for (swt = swtab; swt->st_osize; swt++) 260 if (size <= swt->st_osize) 261 break; 262 #ifdef DEBUG 263 swt->st_inuse++; 264 swt->st_usecnt++; 265 #endif 266 swp->sw_osize = size; 267 swp->sw_bsize = swt->st_bsize; 268 swp->sw_nblocks = (btodb(size) + swp->sw_bsize - 1) / swp->sw_bsize; 269 swp->sw_blocks = (sw_blk_t) 270 malloc(swp->sw_nblocks*sizeof(*swp->sw_blocks), 271 M_VMPGDATA, M_NOWAIT); 272 if (swp->sw_blocks == NULL) { 273 free((caddr_t)swp, M_VMPGDATA); 274 free((caddr_t)pager, M_VMPAGER); 275 #ifdef DEBUG 276 if (swpagerdebug & SDB_FAIL) 277 printf("swpg_alloc: sw_blocks malloc failed\n"); 278 swt->st_inuse--; 279 swt->st_usecnt--; 280 #endif 281 return(FALSE); 282 } 283 bzero((caddr_t)swp->sw_blocks, 284 swp->sw_nblocks * sizeof(*swp->sw_blocks)); 285 swp->sw_poip = 0; 286 if (handle) { 287 vm_object_t object; 288 289 swp->sw_flags = SW_NAMED; 290 TAILQ_INSERT_TAIL(&swap_pager_list, pager, pg_list); 291 /* 292 * Consistant with other pagers: return with object 293 * referenced. Can't do this with handle == NULL 294 * since it might be the pageout daemon calling. 295 */ 296 object = vm_object_allocate(size); 297 vm_object_enter(object, pager); 298 vm_object_setpager(object, pager, 0, FALSE); 299 } else { 300 swp->sw_flags = 0; 301 pager->pg_list.tqe_next = NULL; 302 pager->pg_list.tqe_prev = NULL; 303 } 304 pager->pg_handle = handle; 305 pager->pg_ops = &swappagerops; 306 pager->pg_type = PG_SWAP; 307 pager->pg_flags = PG_CLUSTERPUT; 308 pager->pg_data = swp; 309 310 #ifdef DEBUG 311 if (swpagerdebug & SDB_ALLOC) 312 printf("swpg_alloc: pg_data %x, %x of %x at %x\n", 313 swp, swp->sw_nblocks, swp->sw_bsize, swp->sw_blocks); 314 #endif 315 return(pager); 316 } 317 318 static void 319 swap_pager_dealloc(pager) 320 vm_pager_t pager; 321 { 322 register int i; 323 register sw_blk_t bp; 324 register sw_pager_t swp; 325 struct swtab *swt; 326 int s; 327 328 #ifdef DEBUG 329 /* save panic time state */ 330 if ((swpagerdebug & SDB_ANOMPANIC) && panicstr) 331 return; 332 if (swpagerdebug & (SDB_FOLLOW|SDB_ALLOC)) 333 printf("swpg_dealloc(%x)\n", pager); 334 #endif 335 /* 336 * Remove from list right away so lookups will fail if we 337 * block for pageout completion. 338 */ 339 swp = (sw_pager_t) pager->pg_data; 340 if (swp->sw_flags & SW_NAMED) { 341 TAILQ_REMOVE(&swap_pager_list, pager, pg_list); 342 swp->sw_flags &= ~SW_NAMED; 343 } 344 #ifdef DEBUG 345 for (swt = swtab; swt->st_osize; swt++) 346 if (swp->sw_osize <= swt->st_osize) 347 break; 348 swt->st_inuse--; 349 #endif 350 351 /* 352 * Wait for all pageouts to finish and remove 353 * all entries from cleaning list. 354 */ 355 s = splbio(); 356 while (swp->sw_poip) { 357 swp->sw_flags |= SW_WANTED; 358 (void) tsleep(swp, PVM, "swpgdealloc", 0); 359 } 360 splx(s); 361 swap_pager_clean(B_WRITE); 362 363 /* 364 * Free left over swap blocks 365 */ 366 for (i = 0, bp = swp->sw_blocks; i < swp->sw_nblocks; i++, bp++) 367 if (bp->swb_block) { 368 #ifdef DEBUG 369 if (swpagerdebug & (SDB_ALLOCBLK|SDB_FULL)) 370 printf("swpg_dealloc: blk %x\n", 371 bp->swb_block); 372 #endif 373 rmfree(swapmap, swp->sw_bsize, bp->swb_block); 374 } 375 /* 376 * Free swap management resources 377 */ 378 free((caddr_t)swp->sw_blocks, M_VMPGDATA); 379 free((caddr_t)swp, M_VMPGDATA); 380 free((caddr_t)pager, M_VMPAGER); 381 } 382 383 static int 384 swap_pager_getpage(pager, mlist, npages, sync) 385 vm_pager_t pager; 386 vm_page_t *mlist; 387 int npages; 388 boolean_t sync; 389 { 390 #ifdef DEBUG 391 if (swpagerdebug & SDB_FOLLOW) 392 printf("swpg_getpage(%x, %x, %x, %x)\n", 393 pager, mlist, npages, sync); 394 #endif 395 return(swap_pager_io((sw_pager_t)pager->pg_data, 396 mlist, npages, B_READ)); 397 } 398 399 static int 400 swap_pager_putpage(pager, mlist, npages, sync) 401 vm_pager_t pager; 402 vm_page_t *mlist; 403 int npages; 404 boolean_t sync; 405 { 406 int flags; 407 408 #ifdef DEBUG 409 if (swpagerdebug & SDB_FOLLOW) 410 printf("swpg_putpage(%x, %x, %x, %x)\n", 411 pager, mlist, npages, sync); 412 #endif 413 if (pager == NULL) { 414 swap_pager_clean(B_WRITE); 415 return (VM_PAGER_OK); /* ??? */ 416 } 417 flags = B_WRITE; 418 if (!sync) 419 flags |= B_ASYNC; 420 return(swap_pager_io((sw_pager_t)pager->pg_data, 421 mlist, npages, flags)); 422 } 423 424 static boolean_t 425 swap_pager_haspage(pager, offset) 426 vm_pager_t pager; 427 vm_offset_t offset; 428 { 429 register sw_pager_t swp; 430 register sw_blk_t swb; 431 int ix; 432 433 #ifdef DEBUG 434 if (swpagerdebug & (SDB_FOLLOW|SDB_ALLOCBLK)) 435 printf("swpg_haspage(%x, %x) ", pager, offset); 436 #endif 437 swp = (sw_pager_t) pager->pg_data; 438 ix = offset / dbtob(swp->sw_bsize); 439 if (swp->sw_blocks == NULL || ix >= swp->sw_nblocks) { 440 #ifdef DEBUG 441 if (swpagerdebug & (SDB_FAIL|SDB_FOLLOW|SDB_ALLOCBLK)) 442 printf("swpg_haspage: %x bad offset %x, ix %x\n", 443 swp->sw_blocks, offset, ix); 444 #endif 445 return(FALSE); 446 } 447 swb = &swp->sw_blocks[ix]; 448 if (swb->swb_block) 449 ix = atop(offset % dbtob(swp->sw_bsize)); 450 #ifdef DEBUG 451 if (swpagerdebug & SDB_ALLOCBLK) 452 printf("%x blk %x+%x ", swp->sw_blocks, swb->swb_block, ix); 453 if (swpagerdebug & (SDB_FOLLOW|SDB_ALLOCBLK)) 454 printf("-> %c\n", 455 "FT"[swb->swb_block && (swb->swb_mask & (1 << ix))]); 456 #endif 457 if (swb->swb_block && (swb->swb_mask & (1 << ix))) 458 return(TRUE); 459 return(FALSE); 460 } 461 462 static void 463 swap_pager_cluster(pager, offset, loffset, hoffset) 464 vm_pager_t pager; 465 vm_offset_t offset; 466 vm_offset_t *loffset; 467 vm_offset_t *hoffset; 468 { 469 sw_pager_t swp; 470 register int bsize; 471 vm_offset_t loff, hoff; 472 473 #ifdef DEBUG 474 if (swpagerdebug & (SDB_FOLLOW|SDB_CLUSTER)) 475 printf("swpg_cluster(%x, %x) ", pager, offset); 476 #endif 477 swp = (sw_pager_t) pager->pg_data; 478 bsize = dbtob(swp->sw_bsize); 479 if (bsize > swap_pager_maxcluster) 480 bsize = swap_pager_maxcluster; 481 482 loff = offset - (offset % bsize); 483 if (loff >= swp->sw_osize) 484 panic("swap_pager_cluster: bad offset"); 485 486 hoff = loff + bsize; 487 if (hoff > swp->sw_osize) 488 hoff = swp->sw_osize; 489 490 *loffset = loff; 491 *hoffset = hoff; 492 #ifdef DEBUG 493 if (swpagerdebug & (SDB_FOLLOW|SDB_CLUSTER)) 494 printf("returns [%x-%x]\n", loff, hoff); 495 #endif 496 } 497 498 /* 499 * Scaled down version of swap(). 500 * Assumes that PAGE_SIZE < MAXPHYS; i.e. only one operation needed. 501 * BOGUS: lower level IO routines expect a KVA so we have to map our 502 * provided physical page into the KVA to keep them happy. 503 */ 504 static int 505 swap_pager_io(swp, mlist, npages, flags) 506 register sw_pager_t swp; 507 vm_page_t *mlist; 508 int npages; 509 int flags; 510 { 511 register struct buf *bp; 512 register sw_blk_t swb; 513 register int s; 514 int ix, mask; 515 boolean_t rv; 516 vm_offset_t kva, off; 517 swp_clean_t spc; 518 vm_page_t m; 519 520 #ifdef DEBUG 521 /* save panic time state */ 522 if ((swpagerdebug & SDB_ANOMPANIC) && panicstr) 523 return (VM_PAGER_FAIL); /* XXX: correct return? */ 524 if (swpagerdebug & (SDB_FOLLOW|SDB_IO)) 525 printf("swpg_io(%x, %x, %x, %x)\n", swp, mlist, npages, flags); 526 if (flags & B_READ) { 527 if (flags & B_ASYNC) 528 panic("swap_pager_io: cannot do ASYNC reads"); 529 if (npages != 1) 530 panic("swap_pager_io: cannot do clustered reads"); 531 } 532 #endif 533 534 /* 535 * First determine if the page exists in the pager if this is 536 * a sync read. This quickly handles cases where we are 537 * following shadow chains looking for the top level object 538 * with the page. 539 */ 540 m = *mlist; 541 off = m->offset + m->object->paging_offset; 542 ix = off / dbtob(swp->sw_bsize); 543 if (swp->sw_blocks == NULL || ix >= swp->sw_nblocks) { 544 #ifdef DEBUG 545 if ((flags & B_READ) == 0 && (swpagerdebug & SDB_ANOM)) { 546 printf("swap_pager_io: no swap block on write\n"); 547 return(VM_PAGER_BAD); 548 } 549 #endif 550 return(VM_PAGER_FAIL); 551 } 552 swb = &swp->sw_blocks[ix]; 553 off = off % dbtob(swp->sw_bsize); 554 if ((flags & B_READ) && 555 (swb->swb_block == 0 || (swb->swb_mask & (1 << atop(off))) == 0)) 556 return(VM_PAGER_FAIL); 557 558 /* 559 * For reads (pageins) and synchronous writes, we clean up 560 * all completed async pageouts. 561 */ 562 if ((flags & B_ASYNC) == 0) { 563 s = splbio(); 564 swap_pager_clean(flags&B_READ); 565 #ifdef DEBUG 566 if (swpagerdebug & SDB_PARANOIA) 567 swap_pager_clean_check(mlist, npages, flags&B_READ); 568 #endif 569 splx(s); 570 } 571 /* 572 * For async writes (pageouts), we cleanup completed pageouts so 573 * that all available resources are freed. Also tells us if this 574 * page is already being cleaned. If it is, or no resources 575 * are available, we try again later. 576 */ 577 else { 578 swap_pager_clean(B_WRITE); 579 #ifdef DEBUG 580 if (swpagerdebug & SDB_PARANOIA) 581 swap_pager_clean_check(mlist, npages, B_WRITE); 582 #endif 583 if (swap_pager_free.tqh_first == NULL) { 584 #ifdef DEBUG 585 if (swpagerdebug & SDB_FAIL) 586 printf("%s: no available io headers\n", 587 "swap_pager_io"); 588 #endif 589 return(VM_PAGER_AGAIN); 590 } 591 } 592 593 /* 594 * Allocate a swap block if necessary. 595 */ 596 if (swb->swb_block == 0) { 597 swb->swb_block = rmalloc(swapmap, swp->sw_bsize); 598 if (swb->swb_block == 0) { 599 #ifdef DEBUG 600 if (swpagerdebug & SDB_FAIL) 601 printf("swpg_io: rmalloc of %x failed\n", 602 swp->sw_bsize); 603 #endif 604 /* 605 * XXX this is technically a resource shortage that 606 * should return AGAIN, but the situation isn't likely 607 * to be remedied just by delaying a little while and 608 * trying again (the pageout daemon's current response 609 * to AGAIN) so we just return FAIL. 610 */ 611 return(VM_PAGER_FAIL); 612 } 613 #ifdef DEBUG 614 if (swpagerdebug & (SDB_FULL|SDB_ALLOCBLK)) 615 printf("swpg_io: %x alloc blk %x at ix %x\n", 616 swp->sw_blocks, swb->swb_block, ix); 617 #endif 618 } 619 620 /* 621 * Allocate a kernel virtual address and initialize so that PTE 622 * is available for lower level IO drivers. 623 */ 624 kva = vm_pager_map_pages(mlist, npages, !(flags & B_ASYNC)); 625 if (kva == NULL) { 626 #ifdef DEBUG 627 if (swpagerdebug & SDB_FAIL) 628 printf("%s: no KVA space to map pages\n", 629 "swap_pager_io"); 630 #endif 631 return(VM_PAGER_AGAIN); 632 } 633 634 /* 635 * Get a swap buffer header and initialize it. 636 */ 637 s = splbio(); 638 while (bswlist.b_actf == NULL) { 639 #ifdef DEBUG 640 if (swpagerdebug & SDB_ANOM) 641 printf("swap_pager_io: wait on swbuf for %x (%d)\n", 642 m, flags); 643 #endif 644 bswlist.b_flags |= B_WANTED; 645 tsleep((caddr_t)&bswlist, PSWP+1, "swpgiobuf", 0); 646 } 647 bp = bswlist.b_actf; 648 bswlist.b_actf = bp->b_actf; 649 splx(s); 650 bp->b_flags = B_BUSY | (flags & B_READ); 651 bp->b_proc = &proc0; /* XXX (but without B_PHYS set this is ok) */ 652 bp->b_data = (caddr_t)kva; 653 bp->b_blkno = swb->swb_block + btodb(off); 654 VHOLD(swapdev_vp); 655 bp->b_vp = swapdev_vp; 656 if (swapdev_vp->v_type == VBLK) 657 bp->b_dev = swapdev_vp->v_rdev; 658 bp->b_bcount = npages * PAGE_SIZE; 659 660 /* 661 * For writes we set up additional buffer fields, record a pageout 662 * in progress and mark that these swap blocks are now allocated. 663 */ 664 if ((bp->b_flags & B_READ) == 0) { 665 bp->b_dirtyoff = 0; 666 bp->b_dirtyend = npages * PAGE_SIZE; 667 swapdev_vp->v_numoutput++; 668 s = splbio(); 669 swp->sw_poip++; 670 splx(s); 671 mask = (~(~0 << npages)) << atop(off); 672 #ifdef DEBUG 673 swap_pager_poip++; 674 if (swpagerdebug & SDB_WRITE) 675 printf("swpg_io: write: bp=%x swp=%x poip=%d\n", 676 bp, swp, swp->sw_poip); 677 if ((swpagerdebug & SDB_ALLOCBLK) && 678 (swb->swb_mask & mask) != mask) 679 printf("swpg_io: %x write %d pages at %x+%x\n", 680 swp->sw_blocks, npages, swb->swb_block, 681 atop(off)); 682 if (swpagerdebug & SDB_CLUSTER) 683 printf("swpg_io: off=%x, npg=%x, mask=%x, bmask=%x\n", 684 off, npages, mask, swb->swb_mask); 685 #endif 686 swb->swb_mask |= mask; 687 } 688 /* 689 * If this is an async write we set up still more buffer fields 690 * and place a "cleaning" entry on the inuse queue. 691 */ 692 if ((flags & (B_READ|B_ASYNC)) == B_ASYNC) { 693 #ifdef DEBUG 694 if (swap_pager_free.tqh_first == NULL) 695 panic("swpg_io: lost spc"); 696 #endif 697 spc = swap_pager_free.tqh_first; 698 TAILQ_REMOVE(&swap_pager_free, spc, spc_list); 699 #ifdef DEBUG 700 if (spc->spc_flags != SPC_FREE) 701 panic("swpg_io: bad free spc"); 702 #endif 703 spc->spc_flags = SPC_BUSY; 704 spc->spc_bp = bp; 705 spc->spc_swp = swp; 706 spc->spc_kva = kva; 707 /* 708 * Record the first page. This allows swap_pager_clean 709 * to efficiently handle the common case of a single page. 710 * For clusters, it allows us to locate the object easily 711 * and we then reconstruct the rest of the mlist from spc_kva. 712 */ 713 spc->spc_m = m; 714 spc->spc_npages = npages; 715 bp->b_flags |= B_CALL; 716 bp->b_iodone = swap_pager_iodone; 717 s = splbio(); 718 TAILQ_INSERT_TAIL(&swap_pager_inuse, spc, spc_list); 719 splx(s); 720 } 721 722 /* 723 * Finally, start the IO operation. 724 * If it is async we are all done, otherwise we must wait for 725 * completion and cleanup afterwards. 726 */ 727 #ifdef DEBUG 728 if (swpagerdebug & SDB_IO) 729 printf("swpg_io: IO start: bp %x, db %x, va %x, pa %x\n", 730 bp, swb->swb_block+btodb(off), kva, VM_PAGE_TO_PHYS(m)); 731 #endif 732 VOP_STRATEGY(bp); 733 if ((flags & (B_READ|B_ASYNC)) == B_ASYNC) { 734 #ifdef DEBUG 735 if (swpagerdebug & SDB_IO) 736 printf("swpg_io: IO started: bp %x\n", bp); 737 #endif 738 return(VM_PAGER_PEND); 739 } 740 s = splbio(); 741 #ifdef DEBUG 742 if (flags & B_READ) 743 swap_pager_piip++; 744 else 745 swap_pager_poip++; 746 #endif 747 while ((bp->b_flags & B_DONE) == 0) 748 (void) tsleep(bp, PVM, "swpgio", 0); 749 if ((flags & B_READ) == 0) 750 --swp->sw_poip; 751 #ifdef DEBUG 752 if (flags & B_READ) 753 --swap_pager_piip; 754 else 755 --swap_pager_poip; 756 #endif 757 rv = (bp->b_flags & B_ERROR) ? VM_PAGER_ERROR : VM_PAGER_OK; 758 bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_PAGET|B_UAREA|B_DIRTY); 759 bp->b_actf = bswlist.b_actf; 760 bswlist.b_actf = bp; 761 if (bp->b_vp) 762 brelvp(bp); 763 if (bswlist.b_flags & B_WANTED) { 764 bswlist.b_flags &= ~B_WANTED; 765 wakeup(&bswlist); 766 } 767 if ((flags & B_READ) == 0 && rv == VM_PAGER_OK) { 768 m->flags |= PG_CLEAN; 769 pmap_clear_modify(VM_PAGE_TO_PHYS(m)); 770 } 771 splx(s); 772 #ifdef DEBUG 773 if (swpagerdebug & SDB_IO) 774 printf("swpg_io: IO done: bp %x, rv %d\n", bp, rv); 775 if ((swpagerdebug & SDB_FAIL) && rv == VM_PAGER_ERROR) 776 printf("swpg_io: IO error\n"); 777 #endif 778 vm_pager_unmap_pages(kva, npages); 779 return(rv); 780 } 781 782 static void 783 swap_pager_clean(rw) 784 int rw; 785 { 786 register swp_clean_t spc; 787 register int s, i; 788 vm_object_t object; 789 vm_page_t m; 790 791 #ifdef DEBUG 792 /* save panic time state */ 793 if ((swpagerdebug & SDB_ANOMPANIC) && panicstr) 794 return; 795 if (swpagerdebug & SDB_FOLLOW) 796 printf("swpg_clean(%x)\n", rw); 797 #endif 798 799 for (;;) { 800 /* 801 * Look up and removal from inuse list must be done 802 * at splbio() to avoid conflicts with swap_pager_iodone. 803 */ 804 s = splbio(); 805 for (spc = swap_pager_inuse.tqh_first; 806 spc != NULL; 807 spc = spc->spc_list.tqe_next) { 808 /* 809 * If the operation is done, remove it from the 810 * list and process it. 811 * 812 * XXX if we can't get the object lock we also 813 * leave it on the list and try again later. 814 * Is there something better we could do? 815 */ 816 if ((spc->spc_flags & SPC_DONE) && 817 vm_object_lock_try(spc->spc_m->object)) { 818 TAILQ_REMOVE(&swap_pager_inuse, spc, spc_list); 819 break; 820 } 821 } 822 823 /* 824 * No operations done, thats all we can do for now. 825 */ 826 if (spc == NULL) { 827 splx(s); 828 break; 829 } 830 splx(s); 831 832 /* 833 * Found a completed operation so finish it off. 834 * Note: no longer at splbio since entry is off the list. 835 */ 836 m = spc->spc_m; 837 object = m->object; 838 839 /* 840 * Process each page in the cluster. 841 * The first page is explicitly kept in the cleaning 842 * entry, others must be reconstructed from the KVA. 843 */ 844 for (i = 0; i < spc->spc_npages; i++) { 845 if (i) 846 m = vm_pager_atop(spc->spc_kva + ptoa(i)); 847 /* 848 * If no error mark as clean and inform the pmap 849 * system. If there was an error, mark as dirty 850 * so we will try again. 851 * 852 * XXX could get stuck doing this, should give up 853 * after awhile. 854 */ 855 if (spc->spc_flags & SPC_ERROR) { 856 printf("%s: clean of page %x failed\n", 857 "swap_pager_clean", 858 VM_PAGE_TO_PHYS(m)); 859 m->flags |= PG_LAUNDRY; 860 } else { 861 m->flags |= PG_CLEAN; 862 pmap_clear_modify(VM_PAGE_TO_PHYS(m)); 863 } 864 m->flags &= ~PG_BUSY; 865 PAGE_WAKEUP(m); 866 } 867 868 /* 869 * Done with the object, decrement the paging count 870 * and unlock it. 871 */ 872 if (--object->paging_in_progress == 0) 873 wakeup(object); 874 vm_object_unlock(object); 875 876 /* 877 * Free up KVM used and put the entry back on the list. 878 */ 879 vm_pager_unmap_pages(spc->spc_kva, spc->spc_npages); 880 spc->spc_flags = SPC_FREE; 881 TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list); 882 #ifdef DEBUG 883 if (swpagerdebug & SDB_WRITE) 884 printf("swpg_clean: free spc %x\n", spc); 885 #endif 886 } 887 } 888 889 #ifdef DEBUG 890 static void 891 swap_pager_clean_check(mlist, npages, rw) 892 vm_page_t *mlist; 893 int npages; 894 int rw; 895 { 896 register swp_clean_t spc; 897 boolean_t bad; 898 int i, j, s; 899 vm_page_t m; 900 901 if (panicstr) 902 return; 903 904 bad = FALSE; 905 s = splbio(); 906 for (spc = swap_pager_inuse.tqh_first; 907 spc != NULL; 908 spc = spc->spc_list.tqe_next) { 909 for (j = 0; j < spc->spc_npages; j++) { 910 m = vm_pager_atop(spc->spc_kva + ptoa(j)); 911 for (i = 0; i < npages; i++) 912 if (m == mlist[i]) { 913 if (swpagerdebug & SDB_ANOM) 914 printf( 915 "swpg_clean_check: %s: page %x on list, flags %x\n", 916 rw == B_WRITE ? "write" : "read", mlist[i], spc->spc_flags); 917 bad = TRUE; 918 } 919 } 920 } 921 splx(s); 922 if (bad) 923 panic("swpg_clean_check"); 924 } 925 #endif 926 927 static void 928 swap_pager_iodone(bp) 929 register struct buf *bp; 930 { 931 register swp_clean_t spc; 932 daddr_t blk; 933 int s; 934 935 #ifdef DEBUG 936 /* save panic time state */ 937 if ((swpagerdebug & SDB_ANOMPANIC) && panicstr) 938 return; 939 if (swpagerdebug & SDB_FOLLOW) 940 printf("swpg_iodone(%x)\n", bp); 941 #endif 942 s = splbio(); 943 for (spc = swap_pager_inuse.tqh_first; 944 spc != NULL; 945 spc = spc->spc_list.tqe_next) 946 if (spc->spc_bp == bp) 947 break; 948 #ifdef DEBUG 949 if (spc == NULL) 950 panic("swap_pager_iodone: bp not found"); 951 #endif 952 953 spc->spc_flags &= ~SPC_BUSY; 954 spc->spc_flags |= SPC_DONE; 955 if (bp->b_flags & B_ERROR) 956 spc->spc_flags |= SPC_ERROR; 957 spc->spc_bp = NULL; 958 blk = bp->b_blkno; 959 960 #ifdef DEBUG 961 --swap_pager_poip; 962 if (swpagerdebug & SDB_WRITE) 963 printf("swpg_iodone: bp=%x swp=%x flags=%x spc=%x poip=%x\n", 964 bp, spc->spc_swp, spc->spc_swp->sw_flags, 965 spc, spc->spc_swp->sw_poip); 966 #endif 967 968 spc->spc_swp->sw_poip--; 969 if (spc->spc_swp->sw_flags & SW_WANTED) { 970 spc->spc_swp->sw_flags &= ~SW_WANTED; 971 wakeup(spc->spc_swp); 972 } 973 974 bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_PAGET|B_UAREA|B_DIRTY); 975 bp->b_actf = bswlist.b_actf; 976 bswlist.b_actf = bp; 977 if (bp->b_vp) 978 brelvp(bp); 979 if (bswlist.b_flags & B_WANTED) { 980 bswlist.b_flags &= ~B_WANTED; 981 wakeup(&bswlist); 982 } 983 wakeup(&vm_pages_needed); 984 splx(s); 985 } 986