1 /* $OpenBSD: uvm_pager.c,v 1.60 2011/07/03 18:34:14 oga Exp $ */ 2 /* $NetBSD: uvm_pager.c,v 1.36 2000/11/27 18:26:41 chs Exp $ */ 3 4 /* 5 * 6 * Copyright (c) 1997 Charles D. Cranor and Washington University. 7 * All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. All advertising materials mentioning features or use of this software 18 * must display the following acknowledgement: 19 * This product includes software developed by Charles D. Cranor and 20 * Washington University. 21 * 4. The name of the author may not be used to endorse or promote products 22 * derived from this software without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 25 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 26 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 27 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 28 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 29 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 30 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 31 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 32 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 33 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 34 * 35 * from: Id: uvm_pager.c,v 1.1.2.23 1998/02/02 20:38:06 chuck Exp 36 */ 37 38 /* 39 * uvm_pager.c: generic functions used to assist the pagers. 40 */ 41 42 #include <sys/param.h> 43 #include <sys/systm.h> 44 #include <sys/proc.h> 45 #include <sys/malloc.h> 46 #include <sys/pool.h> 47 #include <sys/vnode.h> 48 #include <sys/buf.h> 49 50 #include <uvm/uvm.h> 51 52 struct pool *uvm_aiobuf_pool; 53 54 struct uvm_pagerops *uvmpagerops[] = { 55 &aobj_pager, 56 &uvm_deviceops, 57 &uvm_vnodeops, 58 }; 59 60 /* 61 * the pager map: provides KVA for I/O 62 * 63 * Each uvm_pseg has room for MAX_PAGERMAP_SEGS pager io space of 64 * MAXBSIZE bytes. 65 * 66 * The number of uvm_pseg instances is dynamic using an array segs. 67 * At most UVM_PSEG_COUNT instances can exist. 68 * 69 * psegs[0] always exists (so that the pager can always map in pages). 70 * psegs[0] element 0 is always reserved for the pagedaemon. 71 * 72 * Any other pseg is automatically created when no space is available 73 * and automatically destroyed when it is no longer in use. 74 */ 75 #define MAX_PAGER_SEGS 16 76 #define PSEG_NUMSEGS (PAGER_MAP_SIZE / MAX_PAGER_SEGS / MAXBSIZE) 77 struct uvm_pseg { 78 /* Start of virtual space; 0 if not inited. */ 79 vaddr_t start; 80 /* Bitmap of the segments in use in this pseg. */ 81 int use; 82 }; 83 struct mutex uvm_pseg_lck; 84 struct uvm_pseg psegs[PSEG_NUMSEGS]; 85 86 #define UVM_PSEG_FULL(pseg) ((pseg)->use == (1 << MAX_PAGER_SEGS) - 1) 87 #define UVM_PSEG_EMPTY(pseg) ((pseg)->use == 0) 88 #define UVM_PSEG_INUSE(pseg,id) (((pseg)->use & (1 << (id))) != 0) 89 90 void uvm_pseg_init(struct uvm_pseg *); 91 vaddr_t uvm_pseg_get(int); 92 void uvm_pseg_release(vaddr_t); 93 94 /* 95 * uvm_pager_init: init pagers (at boot time) 96 */ 97 98 void 99 uvm_pager_init(void) 100 { 101 int lcv; 102 103 /* 104 * init pager map 105 */ 106 107 uvm_pseg_init(&psegs[0]); 108 mtx_init(&uvm_pseg_lck, IPL_VM); 109 110 /* 111 * init ASYNC I/O queue 112 */ 113 114 TAILQ_INIT(&uvm.aio_done); 115 116 /* 117 * call pager init functions 118 */ 119 for (lcv = 0 ; lcv < sizeof(uvmpagerops)/sizeof(struct uvm_pagerops *); 120 lcv++) { 121 if (uvmpagerops[lcv]->pgo_init) 122 uvmpagerops[lcv]->pgo_init(); 123 } 124 } 125 126 /* 127 * Initialize a uvm_pseg. 128 * 129 * May fail, in which case seg->start == 0. 130 * 131 * Caller locks uvm_pseg_lck. 132 */ 133 void 134 uvm_pseg_init(struct uvm_pseg *pseg) 135 { 136 KASSERT(pseg->start == 0); 137 KASSERT(pseg->use == 0); 138 pseg->start = uvm_km_valloc_try(kernel_map, MAX_PAGER_SEGS * MAXBSIZE); 139 } 140 141 /* 142 * Acquire a pager map segment. 143 * 144 * Returns a vaddr for paging. 0 on failure. 145 * 146 * Caller does not lock. 147 */ 148 vaddr_t 149 uvm_pseg_get(int flags) 150 { 151 int i; 152 struct uvm_pseg *pseg; 153 154 mtx_enter(&uvm_pseg_lck); 155 156 pager_seg_restart: 157 /* Find first pseg that has room. */ 158 for (pseg = &psegs[0]; pseg != &psegs[PSEG_NUMSEGS]; pseg++) { 159 if (UVM_PSEG_FULL(pseg)) 160 continue; 161 162 if (pseg->start == 0) { 163 /* Need initialization. */ 164 uvm_pseg_init(pseg); 165 if (pseg->start == 0) 166 goto pager_seg_fail; 167 } 168 169 /* Keep index 0 reserved for pagedaemon. */ 170 if (pseg == &psegs[0] && curproc != uvm.pagedaemon_proc) 171 i = 1; 172 else 173 i = 0; 174 175 for (; i < MAX_PAGER_SEGS; i++) { 176 if (!UVM_PSEG_INUSE(pseg, i)) { 177 pseg->use |= 1 << i; 178 mtx_leave(&uvm_pseg_lck); 179 return pseg->start + i * MAXBSIZE; 180 } 181 } 182 } 183 184 pager_seg_fail: 185 if ((flags & UVMPAGER_MAPIN_WAITOK) != 0) { 186 msleep(&psegs, &uvm_pseg_lck, PVM, "pagerseg", 0); 187 goto pager_seg_restart; 188 } 189 190 mtx_leave(&uvm_pseg_lck); 191 return 0; 192 } 193 194 /* 195 * Release a pager map segment. 196 * 197 * Caller does not lock. 198 * 199 * Deallocates pseg if it is no longer in use. 200 */ 201 void 202 uvm_pseg_release(vaddr_t segaddr) 203 { 204 int id; 205 struct uvm_pseg *pseg; 206 vaddr_t va = 0; 207 208 for (pseg = &psegs[0]; pseg != &psegs[PSEG_NUMSEGS]; pseg++) { 209 if (pseg->start <= segaddr && 210 segaddr < pseg->start + MAX_PAGER_SEGS * MAXBSIZE) 211 break; 212 } 213 KASSERT(pseg != &psegs[PSEG_NUMSEGS]); 214 215 id = (segaddr - pseg->start) / MAXBSIZE; 216 KASSERT(id >= 0 && id < MAX_PAGER_SEGS); 217 218 /* test for no remainder */ 219 KDASSERT(segaddr == pseg->start + id * MAXBSIZE); 220 221 mtx_enter(&uvm_pseg_lck); 222 223 KASSERT(UVM_PSEG_INUSE(pseg, id)); 224 225 pseg->use &= ~(1 << id); 226 wakeup(&psegs); 227 228 if (pseg != &psegs[0] && UVM_PSEG_EMPTY(pseg)) { 229 va = pseg->start; 230 pseg->start = 0; 231 } 232 233 mtx_leave(&uvm_pseg_lck); 234 235 if (va) 236 uvm_km_free(kernel_map, va, MAX_PAGER_SEGS * MAXBSIZE); 237 } 238 239 /* 240 * uvm_pagermapin: map pages into KVA for I/O that needs mappings 241 * 242 * We basically just km_valloc a blank map entry to reserve the space in the 243 * kernel map and then use pmap_enter() to put the mappings in by hand. 244 */ 245 vaddr_t 246 uvm_pagermapin(struct vm_page **pps, int npages, int flags) 247 { 248 vaddr_t kva, cva; 249 vm_prot_t prot; 250 vsize_t size; 251 struct vm_page *pp; 252 253 prot = VM_PROT_READ; 254 if (flags & UVMPAGER_MAPIN_READ) 255 prot |= VM_PROT_WRITE; 256 size = ptoa(npages); 257 258 KASSERT(size <= MAXBSIZE); 259 260 kva = uvm_pseg_get(flags); 261 if (kva == 0) 262 return 0; 263 264 for (cva = kva ; size != 0 ; size -= PAGE_SIZE, cva += PAGE_SIZE) { 265 pp = *pps++; 266 KASSERT(pp); 267 KASSERT(pp->pg_flags & PG_BUSY); 268 /* Allow pmap_enter to fail. */ 269 if (pmap_enter(pmap_kernel(), cva, VM_PAGE_TO_PHYS(pp), 270 prot, PMAP_WIRED | PMAP_CANFAIL | prot) != 0) { 271 pmap_remove(pmap_kernel(), kva, cva); 272 pmap_update(pmap_kernel()); 273 uvm_pseg_release(kva); 274 return 0; 275 } 276 } 277 pmap_update(pmap_kernel()); 278 return kva; 279 } 280 281 /* 282 * uvm_pagermapout: remove KVA mapping 283 * 284 * We remove our mappings by hand and then remove the mapping. 285 */ 286 void 287 uvm_pagermapout(vaddr_t kva, int npages) 288 { 289 290 pmap_remove(pmap_kernel(), kva, kva + (npages << PAGE_SHIFT)); 291 pmap_update(pmap_kernel()); 292 uvm_pseg_release(kva); 293 294 } 295 296 /* 297 * uvm_mk_pcluster 298 * 299 * generic "make 'pager put' cluster" function. a pager can either 300 * [1] set pgo_mk_pcluster to NULL (never cluster), [2] set it to this 301 * generic function, or [3] set it to a pager specific function. 302 * 303 * => caller must lock object _and_ pagequeues (since we need to look 304 * at active vs. inactive bits, etc.) 305 * => caller must make center page busy and write-protect it 306 * => we mark all cluster pages busy for the caller 307 * => the caller must unbusy all pages (and check wanted/released 308 * status if it drops the object lock) 309 * => flags: 310 * PGO_ALLPAGES: all pages in object are valid targets 311 * !PGO_ALLPAGES: use "lo" and "hi" to limit range of cluster 312 * PGO_DOACTCLUST: include active pages in cluster. 313 * PGO_FREE: set the PG_RELEASED bits on the cluster so they'll be freed 314 * in async io (caller must clean on error). 315 * NOTE: the caller should clear PG_CLEANCHK bits if PGO_DOACTCLUST. 316 * PG_CLEANCHK is only a hint, but clearing will help reduce 317 * the number of calls we make to the pmap layer. 318 */ 319 320 struct vm_page ** 321 uvm_mk_pcluster(struct uvm_object *uobj, struct vm_page **pps, int *npages, 322 struct vm_page *center, int flags, voff_t mlo, voff_t mhi) 323 { 324 struct vm_page **ppsp, *pclust; 325 voff_t lo, hi, curoff; 326 int center_idx, forward, incr; 327 328 /* 329 * center page should already be busy and write protected. XXX: 330 * suppose page is wired? if we lock, then a process could 331 * fault/block on it. if we don't lock, a process could write the 332 * pages in the middle of an I/O. (consider an msync()). let's 333 * lock it for now (better to delay than corrupt data?). 334 */ 335 336 /* 337 * get cluster boundaries, check sanity, and apply our limits as well. 338 */ 339 340 uobj->pgops->pgo_cluster(uobj, center->offset, &lo, &hi); 341 if ((flags & PGO_ALLPAGES) == 0) { 342 if (lo < mlo) 343 lo = mlo; 344 if (hi > mhi) 345 hi = mhi; 346 } 347 if ((hi - lo) >> PAGE_SHIFT > *npages) { /* pps too small, bail out! */ 348 pps[0] = center; 349 *npages = 1; 350 return(pps); 351 } 352 353 /* 354 * now determine the center and attempt to cluster around the 355 * edges 356 */ 357 358 center_idx = (center->offset - lo) >> PAGE_SHIFT; 359 pps[center_idx] = center; /* plug in the center page */ 360 ppsp = &pps[center_idx]; 361 *npages = 1; 362 363 /* 364 * attempt to cluster around the left [backward], and then 365 * the right side [forward]. 366 * 367 * note that for inactive pages (pages that have been deactivated) 368 * there are no valid mappings and PG_CLEAN should be up to date. 369 * [i.e. there is no need to query the pmap with pmap_is_modified 370 * since there are no mappings]. 371 */ 372 373 for (forward = 0 ; forward <= 1 ; forward++) { 374 incr = forward ? PAGE_SIZE : -PAGE_SIZE; 375 curoff = center->offset + incr; 376 for ( ;(forward == 0 && curoff >= lo) || 377 (forward && curoff < hi); 378 curoff += incr) { 379 380 pclust = uvm_pagelookup(uobj, curoff); /* lookup page */ 381 if (pclust == NULL) { 382 break; /* no page */ 383 } 384 /* handle active pages */ 385 /* NOTE: inactive pages don't have pmap mappings */ 386 if ((pclust->pg_flags & PQ_INACTIVE) == 0) { 387 if ((flags & PGO_DOACTCLUST) == 0) { 388 /* dont want mapped pages at all */ 389 break; 390 } 391 392 /* make sure "clean" bit is sync'd */ 393 if ((pclust->pg_flags & PG_CLEANCHK) == 0) { 394 if ((pclust->pg_flags & (PG_CLEAN|PG_BUSY)) 395 == PG_CLEAN && 396 pmap_is_modified(pclust)) 397 atomic_clearbits_int( 398 &pclust->pg_flags, 399 PG_CLEAN); 400 /* now checked */ 401 atomic_setbits_int(&pclust->pg_flags, 402 PG_CLEANCHK); 403 } 404 } 405 406 /* is page available for cleaning and does it need it */ 407 if ((pclust->pg_flags & (PG_CLEAN|PG_BUSY)) != 0) { 408 break; /* page is already clean or is busy */ 409 } 410 411 /* yes! enroll the page in our array */ 412 atomic_setbits_int(&pclust->pg_flags, PG_BUSY); 413 UVM_PAGE_OWN(pclust, "uvm_mk_pcluster"); 414 415 /* 416 * If we want to free after io is done, and we're 417 * async, set the released flag 418 */ 419 if ((flags & (PGO_FREE|PGO_SYNCIO)) == PGO_FREE) 420 atomic_setbits_int(&pclust->pg_flags, 421 PG_RELEASED); 422 423 /* XXX: protect wired page? see above comment. */ 424 pmap_page_protect(pclust, VM_PROT_READ); 425 if (!forward) { 426 ppsp--; /* back up one page */ 427 *ppsp = pclust; 428 } else { 429 /* move forward one page */ 430 ppsp[*npages] = pclust; 431 } 432 (*npages)++; 433 } 434 } 435 436 /* 437 * done! return the cluster array to the caller!!! 438 */ 439 440 return(ppsp); 441 } 442 443 /* 444 * uvm_pager_put: high level pageout routine 445 * 446 * we want to pageout page "pg" to backing store, clustering if 447 * possible. 448 * 449 * => page queues must be locked by caller 450 * => if page is not swap-backed, then "uobj" points to the object 451 * backing it. this object should be locked by the caller. 452 * => if page is swap-backed, then "uobj" should be NULL. 453 * => "pg" should be PG_BUSY (by caller), and !PG_CLEAN 454 * for swap-backed memory, "pg" can be NULL if there is no page 455 * of interest [sometimes the case for the pagedaemon] 456 * => "ppsp_ptr" should point to an array of npages vm_page pointers 457 * for possible cluster building 458 * => flags (first two for non-swap-backed pages) 459 * PGO_ALLPAGES: all pages in uobj are valid targets 460 * PGO_DOACTCLUST: include "PQ_ACTIVE" pages as valid targets 461 * PGO_SYNCIO: do SYNC I/O (no async) 462 * PGO_PDFREECLUST: pagedaemon: drop cluster on successful I/O 463 * PGO_FREE: tell the aio daemon to free pages in the async case. 464 * => start/stop: if (uobj && !PGO_ALLPAGES) limit targets to this range 465 * if (!uobj) start is the (daddr64_t) of the starting swapblk 466 * => return state: 467 * 1. we return the VM_PAGER status code of the pageout 468 * 2. we return with the page queues unlocked 469 * 3. if (uobj != NULL) [!swap_backed] we return with 470 * uobj locked _only_ if PGO_PDFREECLUST is set 471 * AND result != VM_PAGER_PEND. in all other cases 472 * we return with uobj unlocked. [this is a hack 473 * that allows the pagedaemon to save one lock/unlock 474 * pair in the !swap_backed case since we have to 475 * lock the uobj to drop the cluster anyway] 476 * 4. on errors we always drop the cluster. thus, if we return 477 * !PEND, !OK, then the caller only has to worry about 478 * un-busying the main page (not the cluster pages). 479 * 5. on success, if !PGO_PDFREECLUST, we return the cluster 480 * with all pages busy (caller must un-busy and check 481 * wanted/released flags). 482 */ 483 484 int 485 uvm_pager_put(struct uvm_object *uobj, struct vm_page *pg, 486 struct vm_page ***ppsp_ptr, int *npages, int flags, 487 voff_t start, voff_t stop) 488 { 489 int result; 490 daddr64_t swblk; 491 struct vm_page **ppsp = *ppsp_ptr; 492 493 /* 494 * note that uobj is null if we are doing a swap-backed pageout. 495 * note that uobj is !null if we are doing normal object pageout. 496 * note that the page queues must be locked to cluster. 497 */ 498 499 if (uobj) { /* if !swap-backed */ 500 501 /* 502 * attempt to build a cluster for pageout using its 503 * make-put-cluster function (if it has one). 504 */ 505 506 if (uobj->pgops->pgo_mk_pcluster) { 507 ppsp = uobj->pgops->pgo_mk_pcluster(uobj, ppsp, 508 npages, pg, flags, start, stop); 509 *ppsp_ptr = ppsp; /* update caller's pointer */ 510 } else { 511 ppsp[0] = pg; 512 *npages = 1; 513 } 514 515 swblk = 0; /* XXX: keep gcc happy */ 516 517 } else { 518 519 /* 520 * for swap-backed pageout, the caller (the pagedaemon) has 521 * already built the cluster for us. the starting swap 522 * block we are writing to has been passed in as "start." 523 * "pg" could be NULL if there is no page we are especially 524 * interested in (in which case the whole cluster gets dropped 525 * in the event of an error or a sync "done"). 526 */ 527 swblk = (daddr64_t) start; 528 /* ppsp and npages should be ok */ 529 } 530 531 /* now that we've clustered we can unlock the page queues */ 532 uvm_unlock_pageq(); 533 534 /* 535 * now attempt the I/O. if we have a failure and we are 536 * clustered, we will drop the cluster and try again. 537 */ 538 539 ReTry: 540 if (uobj) { 541 /* object is locked */ 542 result = uobj->pgops->pgo_put(uobj, ppsp, *npages, flags); 543 /* object is now unlocked */ 544 } else { 545 /* nothing locked */ 546 /* XXX daddr64_t -> int */ 547 result = uvm_swap_put(swblk, ppsp, *npages, flags); 548 /* nothing locked */ 549 } 550 551 /* 552 * we have attempted the I/O. 553 * 554 * if the I/O was a success then: 555 * if !PGO_PDFREECLUST, we return the cluster to the 556 * caller (who must un-busy all pages) 557 * else we un-busy cluster pages for the pagedaemon 558 * 559 * if I/O is pending (async i/o) then we return the pending code. 560 * [in this case the async i/o done function must clean up when 561 * i/o is done...] 562 */ 563 564 if (result == VM_PAGER_PEND || result == VM_PAGER_OK) { 565 if (result == VM_PAGER_OK && (flags & PGO_PDFREECLUST)) { 566 /* 567 * drop cluster and relock object (only if I/O is 568 * not pending) 569 */ 570 if (uobj) 571 /* required for dropcluster */ 572 simple_lock(&uobj->vmobjlock); 573 if (*npages > 1 || pg == NULL) 574 uvm_pager_dropcluster(uobj, pg, ppsp, npages, 575 PGO_PDFREECLUST); 576 /* if (uobj): object still locked, as per 577 * return-state item #3 */ 578 } 579 return (result); 580 } 581 582 /* 583 * a pager error occured (even after dropping the cluster, if there 584 * was one). give up! the caller only has one page ("pg") 585 * to worry about. 586 */ 587 588 if (*npages > 1 || pg == NULL) { 589 if (uobj) { 590 simple_lock(&uobj->vmobjlock); 591 } 592 uvm_pager_dropcluster(uobj, pg, ppsp, npages, PGO_REALLOCSWAP); 593 594 /* 595 * for failed swap-backed pageouts with a "pg", 596 * we need to reset pg's swslot to either: 597 * "swblk" (for transient errors, so we can retry), 598 * or 0 (for hard errors). 599 */ 600 601 if (uobj == NULL && pg != NULL) { 602 /* XXX daddr64_t -> int */ 603 int nswblk = (result == VM_PAGER_AGAIN) ? swblk : 0; 604 if (pg->pg_flags & PQ_ANON) { 605 simple_lock(&pg->uanon->an_lock); 606 pg->uanon->an_swslot = nswblk; 607 simple_unlock(&pg->uanon->an_lock); 608 } else { 609 simple_lock(&pg->uobject->vmobjlock); 610 uao_set_swslot(pg->uobject, 611 pg->offset >> PAGE_SHIFT, 612 nswblk); 613 simple_unlock(&pg->uobject->vmobjlock); 614 } 615 } 616 if (result == VM_PAGER_AGAIN) { 617 618 /* 619 * for transient failures, free all the swslots that 620 * we're not going to retry with. 621 */ 622 623 if (uobj == NULL) { 624 if (pg) { 625 /* XXX daddr64_t -> int */ 626 uvm_swap_free(swblk + 1, *npages - 1); 627 } else { 628 /* XXX daddr64_t -> int */ 629 uvm_swap_free(swblk, *npages); 630 } 631 } 632 if (pg) { 633 ppsp[0] = pg; 634 *npages = 1; 635 goto ReTry; 636 } 637 } else if (uobj == NULL) { 638 639 /* 640 * for hard errors on swap-backed pageouts, 641 * mark the swslots as bad. note that we do not 642 * free swslots that we mark bad. 643 */ 644 645 /* XXX daddr64_t -> int */ 646 uvm_swap_markbad(swblk, *npages); 647 } 648 } 649 650 /* 651 * a pager error occurred (even after dropping the cluster, if there 652 * was one). give up! the caller only has one page ("pg") 653 * to worry about. 654 */ 655 656 if (uobj && (flags & PGO_PDFREECLUST) != 0) 657 simple_lock(&uobj->vmobjlock); 658 return(result); 659 } 660 661 /* 662 * uvm_pager_dropcluster: drop a cluster we have built (because we 663 * got an error, or, if PGO_PDFREECLUST we are un-busying the 664 * cluster pages on behalf of the pagedaemon). 665 * 666 * => uobj, if non-null, is a non-swap-backed object that is 667 * locked by the caller. we return with this object still 668 * locked. 669 * => page queues are not locked 670 * => pg is our page of interest (the one we clustered around, can be null) 671 * => ppsp/npages is our current cluster 672 * => flags: PGO_PDFREECLUST: pageout was a success: un-busy cluster 673 * pages on behalf of the pagedaemon. 674 * PGO_REALLOCSWAP: drop previously allocated swap slots for 675 * clustered swap-backed pages (except for "pg" if !NULL) 676 * "swblk" is the start of swap alloc (e.g. for ppsp[0]) 677 * [only meaningful if swap-backed (uobj == NULL)] 678 */ 679 680 void 681 uvm_pager_dropcluster(struct uvm_object *uobj, struct vm_page *pg, 682 struct vm_page **ppsp, int *npages, int flags) 683 { 684 int lcv; 685 686 /* 687 * drop all pages but "pg" 688 */ 689 690 for (lcv = 0 ; lcv < *npages ; lcv++) { 691 692 /* skip "pg" or empty slot */ 693 if (ppsp[lcv] == pg || ppsp[lcv] == NULL) 694 continue; 695 696 /* 697 * if swap-backed, gain lock on object that owns page. note 698 * that PQ_ANON bit can't change as long as we are holding 699 * the PG_BUSY bit (so there is no need to lock the page 700 * queues to test it). 701 * 702 * once we have the lock, dispose of the pointer to swap, if 703 * requested 704 */ 705 if (!uobj) { 706 if (ppsp[lcv]->pg_flags & PQ_ANON) { 707 simple_lock(&ppsp[lcv]->uanon->an_lock); 708 if (flags & PGO_REALLOCSWAP) 709 /* zap swap block */ 710 ppsp[lcv]->uanon->an_swslot = 0; 711 } else { 712 simple_lock(&ppsp[lcv]->uobject->vmobjlock); 713 if (flags & PGO_REALLOCSWAP) 714 uao_set_swslot(ppsp[lcv]->uobject, 715 ppsp[lcv]->offset >> PAGE_SHIFT, 0); 716 } 717 } 718 719 /* did someone want the page while we had it busy-locked? */ 720 if (ppsp[lcv]->pg_flags & PG_WANTED) { 721 /* still holding obj lock */ 722 wakeup(ppsp[lcv]); 723 } 724 725 /* if page was released, release it. otherwise un-busy it */ 726 if (ppsp[lcv]->pg_flags & PG_RELEASED && 727 ppsp[lcv]->pg_flags & PQ_ANON) { 728 /* so that anfree will free */ 729 atomic_clearbits_int(&ppsp[lcv]->pg_flags, 730 PG_BUSY); 731 UVM_PAGE_OWN(ppsp[lcv], NULL); 732 733 simple_unlock(&ppsp[lcv]->uanon->an_lock); 734 /* kills anon and frees pg */ 735 uvm_anfree(ppsp[lcv]->uanon); 736 737 continue; 738 } else { 739 /* 740 * if we were planning on async io then we would 741 * have PG_RELEASED set, clear that with the others. 742 */ 743 atomic_clearbits_int(&ppsp[lcv]->pg_flags, 744 PG_BUSY|PG_WANTED|PG_FAKE|PG_RELEASED); 745 UVM_PAGE_OWN(ppsp[lcv], NULL); 746 } 747 748 /* 749 * if we are operating on behalf of the pagedaemon and we 750 * had a successful pageout update the page! 751 */ 752 if (flags & PGO_PDFREECLUST) { 753 pmap_clear_reference(ppsp[lcv]); 754 pmap_clear_modify(ppsp[lcv]); 755 atomic_setbits_int(&ppsp[lcv]->pg_flags, PG_CLEAN); 756 } 757 758 /* if anonymous cluster, unlock object and move on */ 759 if (!uobj) { 760 if (ppsp[lcv]->pg_flags & PQ_ANON) 761 simple_unlock(&ppsp[lcv]->uanon->an_lock); 762 else 763 simple_unlock(&ppsp[lcv]->uobject->vmobjlock); 764 } 765 } 766 } 767 768 /* 769 * interrupt-context iodone handler for single-buf i/os 770 * or the top-level buf of a nested-buf i/o. 771 * 772 * => must be at splbio(). 773 */ 774 775 void 776 uvm_aio_biodone(struct buf *bp) 777 { 778 splassert(IPL_BIO); 779 780 /* reset b_iodone for when this is a single-buf i/o. */ 781 bp->b_iodone = uvm_aio_aiodone; 782 783 mtx_enter(&uvm.aiodoned_lock); /* locks uvm.aio_done */ 784 TAILQ_INSERT_TAIL(&uvm.aio_done, bp, b_freelist); 785 wakeup(&uvm.aiodoned); 786 mtx_leave(&uvm.aiodoned_lock); 787 } 788 789 /* 790 * uvm_aio_aiodone: do iodone processing for async i/os. 791 * this should be called in thread context, not interrupt context. 792 */ 793 794 void 795 uvm_aio_aiodone(struct buf *bp) 796 { 797 int npages = bp->b_bufsize >> PAGE_SHIFT; 798 struct vm_page *pg, *pgs[MAXPHYS >> PAGE_SHIFT]; 799 struct uvm_object *uobj; 800 int i, error; 801 boolean_t write, swap; 802 803 KASSERT(npages <= MAXPHYS >> PAGE_SHIFT); 804 splassert(IPL_BIO); 805 806 error = (bp->b_flags & B_ERROR) ? (bp->b_error ? bp->b_error : EIO) : 0; 807 write = (bp->b_flags & B_READ) == 0; 808 809 uobj = NULL; 810 for (i = 0; i < npages; i++) 811 pgs[i] = uvm_atopg((vaddr_t)bp->b_data + (i << PAGE_SHIFT)); 812 uvm_pagermapout((vaddr_t)bp->b_data, npages); 813 #ifdef UVM_SWAP_ENCRYPT 814 /* 815 * XXX - assumes that we only get ASYNC writes. used to be above. 816 */ 817 if (pgs[0]->pg_flags & PQ_ENCRYPT) { 818 uvm_swap_freepages(pgs, npages); 819 goto freed; 820 } 821 #endif /* UVM_SWAP_ENCRYPT */ 822 for (i = 0; i < npages; i++) { 823 pg = pgs[i]; 824 825 if (i == 0) { 826 swap = (pg->pg_flags & PQ_SWAPBACKED) != 0; 827 if (!swap) { 828 uobj = pg->uobject; 829 simple_lock(&uobj->vmobjlock); 830 } 831 } 832 KASSERT(swap || pg->uobject == uobj); 833 if (swap) { 834 if (pg->pg_flags & PQ_ANON) { 835 simple_lock(&pg->uanon->an_lock); 836 } else { 837 simple_lock(&pg->uobject->vmobjlock); 838 } 839 } 840 841 /* 842 * if this is a read and we got an error, mark the pages 843 * PG_RELEASED so that uvm_page_unbusy() will free them. 844 */ 845 if (!write && error) { 846 atomic_setbits_int(&pg->pg_flags, PG_RELEASED); 847 continue; 848 } 849 KASSERT(!write || (pgs[i]->pg_flags & PG_FAKE) == 0); 850 851 /* 852 * if this is a read and the page is PG_FAKE, 853 * or this was a successful write, 854 * mark the page PG_CLEAN and not PG_FAKE. 855 */ 856 857 if ((pgs[i]->pg_flags & PG_FAKE) || (write && error != ENOMEM)) { 858 pmap_clear_reference(pgs[i]); 859 pmap_clear_modify(pgs[i]); 860 atomic_setbits_int(&pgs[i]->pg_flags, PG_CLEAN); 861 atomic_clearbits_int(&pgs[i]->pg_flags, PG_FAKE); 862 } 863 if (swap) { 864 if (pg->pg_flags & PQ_ANON) { 865 simple_unlock(&pg->uanon->an_lock); 866 } else { 867 simple_unlock(&pg->uobject->vmobjlock); 868 } 869 } 870 } 871 uvm_page_unbusy(pgs, npages); 872 if (!swap) { 873 simple_unlock(&uobj->vmobjlock); 874 } 875 876 #ifdef UVM_SWAP_ENCRYPT 877 freed: 878 #endif 879 if (write && (bp->b_flags & B_AGE) != 0 && bp->b_vp != NULL) { 880 vwakeup(bp->b_vp); 881 } 882 pool_put(&bufpool, bp); 883 } 884