1 /* $OpenBSD: uvm_aobj.c,v 1.108 2023/05/13 09:24:59 mpi Exp $ */ 2 /* $NetBSD: uvm_aobj.c,v 1.39 2001/02/18 21:19:08 chs Exp $ */ 3 4 /* 5 * Copyright (c) 1998 Chuck Silvers, Charles D. Cranor and 6 * Washington University. 7 * All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 * 29 * from: Id: uvm_aobj.c,v 1.1.2.5 1998/02/06 05:14:38 chs Exp 30 */ 31 /* 32 * uvm_aobj.c: anonymous memory uvm_object pager 33 * 34 * author: Chuck Silvers <chuq@chuq.com> 35 * started: Jan-1998 36 * 37 * - design mostly from Chuck Cranor 38 */ 39 40 #include <sys/param.h> 41 #include <sys/systm.h> 42 #include <sys/malloc.h> 43 #include <sys/kernel.h> 44 #include <sys/pool.h> 45 #include <sys/stdint.h> 46 #include <sys/atomic.h> 47 48 #include <uvm/uvm.h> 49 50 /* 51 * An anonymous UVM object (aobj) manages anonymous-memory. In addition to 52 * keeping the list of resident pages, it may also keep a list of allocated 53 * swap blocks. Depending on the size of the object, this list is either 54 * stored in an array (small objects) or in a hash table (large objects). 55 */ 56 57 /* 58 * Note: for hash tables, we break the address space of the aobj into blocks 59 * of UAO_SWHASH_CLUSTER_SIZE pages, which shall be a power of two. 60 */ 61 #define UAO_SWHASH_CLUSTER_SHIFT 4 62 #define UAO_SWHASH_CLUSTER_SIZE (1 << UAO_SWHASH_CLUSTER_SHIFT) 63 64 /* Get the "tag" for this page index. */ 65 #define UAO_SWHASH_ELT_TAG(idx) ((idx) >> UAO_SWHASH_CLUSTER_SHIFT) 66 #define UAO_SWHASH_ELT_PAGESLOT_IDX(idx) \ 67 ((idx) & (UAO_SWHASH_CLUSTER_SIZE - 1)) 68 69 /* Given an ELT and a page index, find the swap slot. */ 70 #define UAO_SWHASH_ELT_PAGESLOT(elt, idx) \ 71 ((elt)->slots[UAO_SWHASH_ELT_PAGESLOT_IDX(idx)]) 72 73 /* Given an ELT, return its pageidx base. */ 74 #define UAO_SWHASH_ELT_PAGEIDX_BASE(elt) \ 75 ((elt)->tag << UAO_SWHASH_CLUSTER_SHIFT) 76 77 /* The hash function. */ 78 #define UAO_SWHASH_HASH(aobj, idx) \ 79 (&(aobj)->u_swhash[(((idx) >> UAO_SWHASH_CLUSTER_SHIFT) \ 80 & (aobj)->u_swhashmask)]) 81 82 /* 83 * The threshold which determines whether we will use an array or a 84 * hash table to store the list of allocated swap blocks. 85 */ 86 #define UAO_SWHASH_THRESHOLD (UAO_SWHASH_CLUSTER_SIZE * 4) 87 #define UAO_USES_SWHASH(aobj) \ 88 ((aobj)->u_pages > UAO_SWHASH_THRESHOLD) 89 90 /* The number of buckets in a hash, with an upper bound. */ 91 #define UAO_SWHASH_MAXBUCKETS 256 92 #define UAO_SWHASH_BUCKETS(pages) \ 93 (min((pages) >> UAO_SWHASH_CLUSTER_SHIFT, UAO_SWHASH_MAXBUCKETS)) 94 95 96 /* 97 * uao_swhash_elt: when a hash table is being used, this structure defines 98 * the format of an entry in the bucket list. 99 */ 100 struct uao_swhash_elt { 101 LIST_ENTRY(uao_swhash_elt) list; /* the hash list */ 102 voff_t tag; /* our 'tag' */ 103 int count; /* our number of active slots */ 104 int slots[UAO_SWHASH_CLUSTER_SIZE]; /* the slots */ 105 }; 106 107 /* 108 * uao_swhash: the swap hash table structure 109 */ 110 LIST_HEAD(uao_swhash, uao_swhash_elt); 111 112 /* 113 * uao_swhash_elt_pool: pool of uao_swhash_elt structures 114 */ 115 struct pool uao_swhash_elt_pool; 116 117 /* 118 * uvm_aobj: the actual anon-backed uvm_object 119 * 120 * => the uvm_object is at the top of the structure, this allows 121 * (struct uvm_aobj *) == (struct uvm_object *) 122 * => only one of u_swslots and u_swhash is used in any given aobj 123 */ 124 struct uvm_aobj { 125 struct uvm_object u_obj; /* has: pgops, memt, #pages, #refs */ 126 int u_pages; /* number of pages in entire object */ 127 int u_flags; /* the flags (see uvm_aobj.h) */ 128 /* 129 * Either an array or hashtable (array of bucket heads) of 130 * offset -> swapslot mappings for the aobj. 131 */ 132 #define u_swslots u_swap.slot_array 133 #define u_swhash u_swap.slot_hash 134 union swslots { 135 int *slot_array; 136 struct uao_swhash *slot_hash; 137 } u_swap; 138 u_long u_swhashmask; /* mask for hashtable */ 139 LIST_ENTRY(uvm_aobj) u_list; /* global list of aobjs */ 140 }; 141 142 struct pool uvm_aobj_pool; 143 144 static struct uao_swhash_elt *uao_find_swhash_elt(struct uvm_aobj *, int, 145 boolean_t); 146 static boolean_t uao_flush(struct uvm_object *, voff_t, 147 voff_t, int); 148 static void uao_free(struct uvm_aobj *); 149 static int uao_get(struct uvm_object *, voff_t, 150 vm_page_t *, int *, int, vm_prot_t, 151 int, int); 152 static boolean_t uao_pagein(struct uvm_aobj *, int, int); 153 static boolean_t uao_pagein_page(struct uvm_aobj *, int); 154 155 void uao_dropswap_range(struct uvm_object *, voff_t, voff_t); 156 void uao_shrink_flush(struct uvm_object *, int, int); 157 int uao_shrink_hash(struct uvm_object *, int); 158 int uao_shrink_array(struct uvm_object *, int); 159 int uao_shrink_convert(struct uvm_object *, int); 160 161 int uao_grow_hash(struct uvm_object *, int); 162 int uao_grow_array(struct uvm_object *, int); 163 int uao_grow_convert(struct uvm_object *, int); 164 165 /* 166 * aobj_pager 167 * 168 * note that some functions (e.g. put) are handled elsewhere 169 */ 170 const struct uvm_pagerops aobj_pager = { 171 .pgo_reference = uao_reference, 172 .pgo_detach = uao_detach, 173 .pgo_flush = uao_flush, 174 .pgo_get = uao_get, 175 }; 176 177 /* 178 * uao_list: global list of active aobjs, locked by uao_list_lock 179 * 180 * Lock ordering: generally the locking order is object lock, then list lock. 181 * in the case of swap off we have to iterate over the list, and thus the 182 * ordering is reversed. In that case we must use trylocking to prevent 183 * deadlock. 184 */ 185 static LIST_HEAD(aobjlist, uvm_aobj) uao_list = LIST_HEAD_INITIALIZER(uao_list); 186 static struct mutex uao_list_lock = MUTEX_INITIALIZER(IPL_MPFLOOR); 187 188 189 /* 190 * functions 191 */ 192 /* 193 * hash table/array related functions 194 */ 195 /* 196 * uao_find_swhash_elt: find (or create) a hash table entry for a page 197 * offset. 198 */ 199 static struct uao_swhash_elt * 200 uao_find_swhash_elt(struct uvm_aobj *aobj, int pageidx, boolean_t create) 201 { 202 struct uao_swhash *swhash; 203 struct uao_swhash_elt *elt; 204 voff_t page_tag; 205 206 swhash = UAO_SWHASH_HASH(aobj, pageidx); /* first hash to get bucket */ 207 page_tag = UAO_SWHASH_ELT_TAG(pageidx); /* tag to search for */ 208 209 /* 210 * now search the bucket for the requested tag 211 */ 212 LIST_FOREACH(elt, swhash, list) { 213 if (elt->tag == page_tag) 214 return elt; 215 } 216 217 if (!create) 218 return NULL; 219 220 /* 221 * allocate a new entry for the bucket and init/insert it in 222 */ 223 elt = pool_get(&uao_swhash_elt_pool, PR_NOWAIT | PR_ZERO); 224 /* 225 * XXX We cannot sleep here as the hash table might disappear 226 * from under our feet. And we run the risk of deadlocking 227 * the pagedeamon. In fact this code will only be called by 228 * the pagedaemon and allocation will only fail if we 229 * exhausted the pagedeamon reserve. In that case we're 230 * doomed anyway, so panic. 231 */ 232 if (elt == NULL) 233 panic("%s: can't allocate entry", __func__); 234 LIST_INSERT_HEAD(swhash, elt, list); 235 elt->tag = page_tag; 236 237 return elt; 238 } 239 240 /* 241 * uao_find_swslot: find the swap slot number for an aobj/pageidx 242 */ 243 int 244 uao_find_swslot(struct uvm_object *uobj, int pageidx) 245 { 246 struct uvm_aobj *aobj = (struct uvm_aobj *)uobj; 247 248 KASSERT(UVM_OBJ_IS_AOBJ(uobj)); 249 250 /* 251 * if noswap flag is set, then we never return a slot 252 */ 253 if (aobj->u_flags & UAO_FLAG_NOSWAP) 254 return 0; 255 256 /* 257 * if hashing, look in hash table. 258 */ 259 if (UAO_USES_SWHASH(aobj)) { 260 struct uao_swhash_elt *elt = 261 uao_find_swhash_elt(aobj, pageidx, FALSE); 262 263 if (elt) 264 return UAO_SWHASH_ELT_PAGESLOT(elt, pageidx); 265 else 266 return 0; 267 } 268 269 /* 270 * otherwise, look in the array 271 */ 272 return aobj->u_swslots[pageidx]; 273 } 274 275 /* 276 * uao_set_swslot: set the swap slot for a page in an aobj. 277 * 278 * => setting a slot to zero frees the slot 279 * => object must be locked by caller 280 * => we return the old slot number, or -1 if we failed to allocate 281 * memory to record the new slot number 282 */ 283 int 284 uao_set_swslot(struct uvm_object *uobj, int pageidx, int slot) 285 { 286 struct uvm_aobj *aobj = (struct uvm_aobj *)uobj; 287 int oldslot; 288 289 KASSERT(rw_write_held(uobj->vmobjlock) || uobj->uo_refs == 0); 290 KASSERT(UVM_OBJ_IS_AOBJ(uobj)); 291 292 /* 293 * if noswap flag is set, then we can't set a slot 294 */ 295 if (aobj->u_flags & UAO_FLAG_NOSWAP) { 296 if (slot == 0) 297 return 0; /* a clear is ok */ 298 299 /* but a set is not */ 300 printf("uao_set_swslot: uobj = %p\n", uobj); 301 panic("uao_set_swslot: attempt to set a slot on a NOSWAP object"); 302 } 303 304 /* 305 * are we using a hash table? if so, add it in the hash. 306 */ 307 if (UAO_USES_SWHASH(aobj)) { 308 /* 309 * Avoid allocating an entry just to free it again if 310 * the page had not swap slot in the first place, and 311 * we are freeing. 312 */ 313 struct uao_swhash_elt *elt = 314 uao_find_swhash_elt(aobj, pageidx, slot ? TRUE : FALSE); 315 if (elt == NULL) { 316 KASSERT(slot == 0); 317 return 0; 318 } 319 320 oldslot = UAO_SWHASH_ELT_PAGESLOT(elt, pageidx); 321 UAO_SWHASH_ELT_PAGESLOT(elt, pageidx) = slot; 322 323 /* 324 * now adjust the elt's reference counter and free it if we've 325 * dropped it to zero. 326 */ 327 if (slot) { 328 if (oldslot == 0) 329 elt->count++; 330 } else { 331 if (oldslot) 332 elt->count--; 333 334 if (elt->count == 0) { 335 LIST_REMOVE(elt, list); 336 pool_put(&uao_swhash_elt_pool, elt); 337 } 338 } 339 } else { 340 /* we are using an array */ 341 oldslot = aobj->u_swslots[pageidx]; 342 aobj->u_swslots[pageidx] = slot; 343 } 344 return oldslot; 345 } 346 /* 347 * end of hash/array functions 348 */ 349 350 /* 351 * uao_free: free all resources held by an aobj, and then free the aobj 352 * 353 * => the aobj should be dead 354 */ 355 static void 356 uao_free(struct uvm_aobj *aobj) 357 { 358 struct uvm_object *uobj = &aobj->u_obj; 359 360 KASSERT(UVM_OBJ_IS_AOBJ(uobj)); 361 KASSERT(rw_write_held(uobj->vmobjlock)); 362 uao_dropswap_range(uobj, 0, 0); 363 rw_exit(uobj->vmobjlock); 364 365 if (UAO_USES_SWHASH(aobj)) { 366 /* 367 * free the hash table itself. 368 */ 369 hashfree(aobj->u_swhash, UAO_SWHASH_BUCKETS(aobj->u_pages), M_UVMAOBJ); 370 } else { 371 free(aobj->u_swslots, M_UVMAOBJ, aobj->u_pages * sizeof(int)); 372 } 373 374 /* 375 * finally free the aobj itself 376 */ 377 uvm_obj_destroy(uobj); 378 pool_put(&uvm_aobj_pool, aobj); 379 } 380 381 /* 382 * pager functions 383 */ 384 385 #ifdef TMPFS 386 /* 387 * Shrink an aobj to a given number of pages. The procedure is always the same: 388 * assess the necessity of data structure conversion (hash to array), secure 389 * resources, flush pages and drop swap slots. 390 * 391 */ 392 393 void 394 uao_shrink_flush(struct uvm_object *uobj, int startpg, int endpg) 395 { 396 KASSERT(startpg < endpg); 397 KASSERT(uobj->uo_refs == 1); 398 uao_flush(uobj, (voff_t)startpg << PAGE_SHIFT, 399 (voff_t)endpg << PAGE_SHIFT, PGO_FREE); 400 uao_dropswap_range(uobj, startpg, endpg); 401 } 402 403 int 404 uao_shrink_hash(struct uvm_object *uobj, int pages) 405 { 406 struct uvm_aobj *aobj = (struct uvm_aobj *)uobj; 407 struct uao_swhash *new_swhash; 408 struct uao_swhash_elt *elt; 409 unsigned long new_hashmask; 410 int i; 411 412 KASSERT(UAO_USES_SWHASH(aobj)); 413 414 /* 415 * If the size of the hash table doesn't change, all we need to do is 416 * to adjust the page count. 417 */ 418 if (UAO_SWHASH_BUCKETS(aobj->u_pages) == UAO_SWHASH_BUCKETS(pages)) { 419 uao_shrink_flush(uobj, pages, aobj->u_pages); 420 aobj->u_pages = pages; 421 return 0; 422 } 423 424 new_swhash = hashinit(UAO_SWHASH_BUCKETS(pages), M_UVMAOBJ, 425 M_WAITOK | M_CANFAIL, &new_hashmask); 426 if (new_swhash == NULL) 427 return ENOMEM; 428 429 uao_shrink_flush(uobj, pages, aobj->u_pages); 430 431 /* 432 * Even though the hash table size is changing, the hash of the buckets 433 * we are interested in copying should not change. 434 */ 435 for (i = 0; i < UAO_SWHASH_BUCKETS(aobj->u_pages); i++) { 436 while (LIST_EMPTY(&aobj->u_swhash[i]) == 0) { 437 elt = LIST_FIRST(&aobj->u_swhash[i]); 438 LIST_REMOVE(elt, list); 439 LIST_INSERT_HEAD(&new_swhash[i], elt, list); 440 } 441 } 442 443 hashfree(aobj->u_swhash, UAO_SWHASH_BUCKETS(aobj->u_pages), M_UVMAOBJ); 444 445 aobj->u_swhash = new_swhash; 446 aobj->u_pages = pages; 447 aobj->u_swhashmask = new_hashmask; 448 449 return 0; 450 } 451 452 int 453 uao_shrink_convert(struct uvm_object *uobj, int pages) 454 { 455 struct uvm_aobj *aobj = (struct uvm_aobj *)uobj; 456 struct uao_swhash_elt *elt; 457 int i, *new_swslots; 458 459 new_swslots = mallocarray(pages, sizeof(int), M_UVMAOBJ, 460 M_WAITOK | M_CANFAIL | M_ZERO); 461 if (new_swslots == NULL) 462 return ENOMEM; 463 464 uao_shrink_flush(uobj, pages, aobj->u_pages); 465 466 /* Convert swap slots from hash to array. */ 467 for (i = 0; i < pages; i++) { 468 elt = uao_find_swhash_elt(aobj, i, FALSE); 469 if (elt != NULL) { 470 new_swslots[i] = UAO_SWHASH_ELT_PAGESLOT(elt, i); 471 if (new_swslots[i] != 0) 472 elt->count--; 473 if (elt->count == 0) { 474 LIST_REMOVE(elt, list); 475 pool_put(&uao_swhash_elt_pool, elt); 476 } 477 } 478 } 479 480 hashfree(aobj->u_swhash, UAO_SWHASH_BUCKETS(aobj->u_pages), M_UVMAOBJ); 481 482 aobj->u_swslots = new_swslots; 483 aobj->u_pages = pages; 484 485 return 0; 486 } 487 488 int 489 uao_shrink_array(struct uvm_object *uobj, int pages) 490 { 491 struct uvm_aobj *aobj = (struct uvm_aobj *)uobj; 492 int i, *new_swslots; 493 494 new_swslots = mallocarray(pages, sizeof(int), M_UVMAOBJ, 495 M_WAITOK | M_CANFAIL | M_ZERO); 496 if (new_swslots == NULL) 497 return ENOMEM; 498 499 uao_shrink_flush(uobj, pages, aobj->u_pages); 500 501 for (i = 0; i < pages; i++) 502 new_swslots[i] = aobj->u_swslots[i]; 503 504 free(aobj->u_swslots, M_UVMAOBJ, aobj->u_pages * sizeof(int)); 505 506 aobj->u_swslots = new_swslots; 507 aobj->u_pages = pages; 508 509 return 0; 510 } 511 512 int 513 uao_shrink(struct uvm_object *uobj, int pages) 514 { 515 struct uvm_aobj *aobj = (struct uvm_aobj *)uobj; 516 517 KASSERT(pages < aobj->u_pages); 518 519 /* 520 * Distinguish between three possible cases: 521 * 1. aobj uses hash and must be converted to array. 522 * 2. aobj uses array and array size needs to be adjusted. 523 * 3. aobj uses hash and hash size needs to be adjusted. 524 */ 525 if (pages > UAO_SWHASH_THRESHOLD) 526 return uao_shrink_hash(uobj, pages); /* case 3 */ 527 else if (aobj->u_pages > UAO_SWHASH_THRESHOLD) 528 return uao_shrink_convert(uobj, pages); /* case 1 */ 529 else 530 return uao_shrink_array(uobj, pages); /* case 2 */ 531 } 532 533 /* 534 * Grow an aobj to a given number of pages. Right now we only adjust the swap 535 * slots. We could additionally handle page allocation directly, so that they 536 * don't happen through uvm_fault(). That would allow us to use another 537 * mechanism for the swap slots other than malloc(). It is thus mandatory that 538 * the caller of these functions does not allow faults to happen in case of 539 * growth error. 540 */ 541 int 542 uao_grow_array(struct uvm_object *uobj, int pages) 543 { 544 struct uvm_aobj *aobj = (struct uvm_aobj *)uobj; 545 int i, *new_swslots; 546 547 KASSERT(aobj->u_pages <= UAO_SWHASH_THRESHOLD); 548 549 new_swslots = mallocarray(pages, sizeof(int), M_UVMAOBJ, 550 M_WAITOK | M_CANFAIL | M_ZERO); 551 if (new_swslots == NULL) 552 return ENOMEM; 553 554 for (i = 0; i < aobj->u_pages; i++) 555 new_swslots[i] = aobj->u_swslots[i]; 556 557 free(aobj->u_swslots, M_UVMAOBJ, aobj->u_pages * sizeof(int)); 558 559 aobj->u_swslots = new_swslots; 560 aobj->u_pages = pages; 561 562 return 0; 563 } 564 565 int 566 uao_grow_hash(struct uvm_object *uobj, int pages) 567 { 568 struct uvm_aobj *aobj = (struct uvm_aobj *)uobj; 569 struct uao_swhash *new_swhash; 570 struct uao_swhash_elt *elt; 571 unsigned long new_hashmask; 572 int i; 573 574 KASSERT(pages > UAO_SWHASH_THRESHOLD); 575 576 /* 577 * If the size of the hash table doesn't change, all we need to do is 578 * to adjust the page count. 579 */ 580 if (UAO_SWHASH_BUCKETS(aobj->u_pages) == UAO_SWHASH_BUCKETS(pages)) { 581 aobj->u_pages = pages; 582 return 0; 583 } 584 585 KASSERT(UAO_SWHASH_BUCKETS(aobj->u_pages) < UAO_SWHASH_BUCKETS(pages)); 586 587 new_swhash = hashinit(UAO_SWHASH_BUCKETS(pages), M_UVMAOBJ, 588 M_WAITOK | M_CANFAIL, &new_hashmask); 589 if (new_swhash == NULL) 590 return ENOMEM; 591 592 for (i = 0; i < UAO_SWHASH_BUCKETS(aobj->u_pages); i++) { 593 while (LIST_EMPTY(&aobj->u_swhash[i]) == 0) { 594 elt = LIST_FIRST(&aobj->u_swhash[i]); 595 LIST_REMOVE(elt, list); 596 LIST_INSERT_HEAD(&new_swhash[i], elt, list); 597 } 598 } 599 600 hashfree(aobj->u_swhash, UAO_SWHASH_BUCKETS(aobj->u_pages), M_UVMAOBJ); 601 602 aobj->u_swhash = new_swhash; 603 aobj->u_pages = pages; 604 aobj->u_swhashmask = new_hashmask; 605 606 return 0; 607 } 608 609 int 610 uao_grow_convert(struct uvm_object *uobj, int pages) 611 { 612 struct uvm_aobj *aobj = (struct uvm_aobj *)uobj; 613 struct uao_swhash *new_swhash; 614 struct uao_swhash_elt *elt; 615 unsigned long new_hashmask; 616 int i, *old_swslots; 617 618 new_swhash = hashinit(UAO_SWHASH_BUCKETS(pages), M_UVMAOBJ, 619 M_WAITOK | M_CANFAIL, &new_hashmask); 620 if (new_swhash == NULL) 621 return ENOMEM; 622 623 /* Set these now, so we can use uao_find_swhash_elt(). */ 624 old_swslots = aobj->u_swslots; 625 aobj->u_swhash = new_swhash; 626 aobj->u_swhashmask = new_hashmask; 627 628 for (i = 0; i < aobj->u_pages; i++) { 629 if (old_swslots[i] != 0) { 630 elt = uao_find_swhash_elt(aobj, i, TRUE); 631 elt->count++; 632 UAO_SWHASH_ELT_PAGESLOT(elt, i) = old_swslots[i]; 633 } 634 } 635 636 free(old_swslots, M_UVMAOBJ, aobj->u_pages * sizeof(int)); 637 aobj->u_pages = pages; 638 639 return 0; 640 } 641 642 int 643 uao_grow(struct uvm_object *uobj, int pages) 644 { 645 struct uvm_aobj *aobj = (struct uvm_aobj *)uobj; 646 647 KASSERT(pages > aobj->u_pages); 648 649 /* 650 * Distinguish between three possible cases: 651 * 1. aobj uses hash and hash size needs to be adjusted. 652 * 2. aobj uses array and array size needs to be adjusted. 653 * 3. aobj uses array and must be converted to hash. 654 */ 655 if (pages <= UAO_SWHASH_THRESHOLD) 656 return uao_grow_array(uobj, pages); /* case 2 */ 657 else if (aobj->u_pages > UAO_SWHASH_THRESHOLD) 658 return uao_grow_hash(uobj, pages); /* case 1 */ 659 else 660 return uao_grow_convert(uobj, pages); 661 } 662 #endif /* TMPFS */ 663 664 /* 665 * uao_create: create an aobj of the given size and return its uvm_object. 666 * 667 * => for normal use, flags are zero or UAO_FLAG_CANFAIL. 668 * => for the kernel object, the flags are: 669 * UAO_FLAG_KERNOBJ - allocate the kernel object (can only happen once) 670 * UAO_FLAG_KERNSWAP - enable swapping of kernel object (" ") 671 */ 672 struct uvm_object * 673 uao_create(vsize_t size, int flags) 674 { 675 static struct uvm_aobj kernel_object_store; 676 static struct rwlock bootstrap_kernel_object_lock; 677 static int kobj_alloced = 0; 678 int pages = round_page(size) >> PAGE_SHIFT; 679 struct uvm_aobj *aobj; 680 int refs; 681 682 /* 683 * Allocate a new aobj, unless kernel object is requested. 684 */ 685 if (flags & UAO_FLAG_KERNOBJ) { 686 KASSERT(!kobj_alloced); 687 aobj = &kernel_object_store; 688 aobj->u_pages = pages; 689 aobj->u_flags = UAO_FLAG_NOSWAP; 690 refs = UVM_OBJ_KERN; 691 kobj_alloced = UAO_FLAG_KERNOBJ; 692 } else if (flags & UAO_FLAG_KERNSWAP) { 693 KASSERT(kobj_alloced == UAO_FLAG_KERNOBJ); 694 aobj = &kernel_object_store; 695 kobj_alloced = UAO_FLAG_KERNSWAP; 696 } else { 697 aobj = pool_get(&uvm_aobj_pool, PR_WAITOK); 698 aobj->u_pages = pages; 699 aobj->u_flags = 0; 700 refs = 1; 701 } 702 703 /* 704 * allocate hash/array if necessary 705 */ 706 if (flags == 0 || (flags & (UAO_FLAG_KERNSWAP | UAO_FLAG_CANFAIL))) { 707 int mflags; 708 709 if (flags) 710 mflags = M_NOWAIT; 711 else 712 mflags = M_WAITOK; 713 714 /* allocate hash table or array depending on object size */ 715 if (UAO_USES_SWHASH(aobj)) { 716 aobj->u_swhash = hashinit(UAO_SWHASH_BUCKETS(pages), 717 M_UVMAOBJ, mflags, &aobj->u_swhashmask); 718 if (aobj->u_swhash == NULL) { 719 if (flags & UAO_FLAG_CANFAIL) { 720 pool_put(&uvm_aobj_pool, aobj); 721 return NULL; 722 } 723 panic("uao_create: hashinit swhash failed"); 724 } 725 } else { 726 aobj->u_swslots = mallocarray(pages, sizeof(int), 727 M_UVMAOBJ, mflags|M_ZERO); 728 if (aobj->u_swslots == NULL) { 729 if (flags & UAO_FLAG_CANFAIL) { 730 pool_put(&uvm_aobj_pool, aobj); 731 return NULL; 732 } 733 panic("uao_create: malloc swslots failed"); 734 } 735 } 736 737 if (flags & UAO_FLAG_KERNSWAP) { 738 aobj->u_flags &= ~UAO_FLAG_NOSWAP; /* clear noswap */ 739 return &aobj->u_obj; 740 /* done! */ 741 } 742 } 743 744 /* 745 * Initialise UVM object. 746 */ 747 uvm_obj_init(&aobj->u_obj, &aobj_pager, refs); 748 if (flags & UAO_FLAG_KERNOBJ) { 749 /* Use a temporary static lock for kernel_object. */ 750 rw_init(&bootstrap_kernel_object_lock, "kobjlk"); 751 uvm_obj_setlock(&aobj->u_obj, &bootstrap_kernel_object_lock); 752 } 753 754 /* 755 * now that aobj is ready, add it to the global list 756 */ 757 mtx_enter(&uao_list_lock); 758 LIST_INSERT_HEAD(&uao_list, aobj, u_list); 759 mtx_leave(&uao_list_lock); 760 761 return &aobj->u_obj; 762 } 763 764 765 766 /* 767 * uao_init: set up aobj pager subsystem 768 * 769 * => called at boot time from uvm_pager_init() 770 */ 771 void 772 uao_init(void) 773 { 774 /* 775 * NOTE: Pages for this pool must not come from a pageable 776 * kernel map! 777 */ 778 pool_init(&uao_swhash_elt_pool, sizeof(struct uao_swhash_elt), 0, 779 IPL_NONE, PR_WAITOK, "uaoeltpl", NULL); 780 pool_init(&uvm_aobj_pool, sizeof(struct uvm_aobj), 0, 781 IPL_NONE, PR_WAITOK, "aobjpl", NULL); 782 } 783 784 /* 785 * uao_reference: hold a reference to an anonymous UVM object. 786 */ 787 void 788 uao_reference(struct uvm_object *uobj) 789 { 790 /* Kernel object is persistent. */ 791 if (UVM_OBJ_IS_KERN_OBJECT(uobj)) 792 return; 793 794 atomic_inc_int(&uobj->uo_refs); 795 } 796 797 798 /* 799 * uao_detach: drop a reference to an anonymous UVM object. 800 */ 801 void 802 uao_detach(struct uvm_object *uobj) 803 { 804 struct uvm_aobj *aobj = (struct uvm_aobj *)uobj; 805 struct vm_page *pg; 806 807 /* 808 * Detaching from kernel_object is a NOP. 809 */ 810 if (UVM_OBJ_IS_KERN_OBJECT(uobj)) 811 return; 812 813 /* 814 * Drop the reference. If it was the last one, destroy the object. 815 */ 816 if (atomic_dec_int_nv(&uobj->uo_refs) > 0) { 817 return; 818 } 819 820 /* 821 * Remove the aobj from the global list. 822 */ 823 mtx_enter(&uao_list_lock); 824 LIST_REMOVE(aobj, u_list); 825 mtx_leave(&uao_list_lock); 826 827 /* 828 * Free all the pages left in the aobj. For each page, when the 829 * page is no longer busy (and thus after any disk I/O that it is 830 * involved in is complete), release any swap resources and free 831 * the page itself. 832 */ 833 rw_enter(uobj->vmobjlock, RW_WRITE); 834 while ((pg = RBT_ROOT(uvm_objtree, &uobj->memt)) != NULL) { 835 pmap_page_protect(pg, PROT_NONE); 836 if (pg->pg_flags & PG_BUSY) { 837 uvm_pagewait(pg, uobj->vmobjlock, "uao_det"); 838 rw_enter(uobj->vmobjlock, RW_WRITE); 839 continue; 840 } 841 uao_dropswap(&aobj->u_obj, pg->offset >> PAGE_SHIFT); 842 uvm_lock_pageq(); 843 uvm_pagefree(pg); 844 uvm_unlock_pageq(); 845 } 846 847 /* 848 * Finally, free the anonymous UVM object itself. 849 */ 850 uao_free(aobj); 851 } 852 853 /* 854 * uao_flush: flush pages out of a uvm object 855 * 856 * => if PGO_CLEANIT is not set, then we will not block. 857 * => if PGO_ALLPAGE is set, then all pages in the object are valid targets 858 * for flushing. 859 * => NOTE: we are allowed to lock the page queues, so the caller 860 * must not be holding the lock on them [e.g. pagedaemon had 861 * better not call us with the queues locked] 862 * => we return TRUE unless we encountered some sort of I/O error 863 * XXXJRT currently never happens, as we never directly initiate 864 * XXXJRT I/O 865 */ 866 boolean_t 867 uao_flush(struct uvm_object *uobj, voff_t start, voff_t stop, int flags) 868 { 869 struct uvm_aobj *aobj = (struct uvm_aobj *) uobj; 870 struct vm_page *pg; 871 voff_t curoff; 872 873 KASSERT(UVM_OBJ_IS_AOBJ(uobj)); 874 KASSERT(rw_write_held(uobj->vmobjlock)); 875 876 if (flags & PGO_ALLPAGES) { 877 start = 0; 878 stop = (voff_t)aobj->u_pages << PAGE_SHIFT; 879 } else { 880 start = trunc_page(start); 881 stop = round_page(stop); 882 if (stop > ((voff_t)aobj->u_pages << PAGE_SHIFT)) { 883 printf("uao_flush: strange, got an out of range " 884 "flush (fixed)\n"); 885 stop = (voff_t)aobj->u_pages << PAGE_SHIFT; 886 } 887 } 888 889 /* 890 * Don't need to do any work here if we're not freeing 891 * or deactivating pages. 892 */ 893 if ((flags & (PGO_DEACTIVATE|PGO_FREE)) == 0) { 894 return TRUE; 895 } 896 897 curoff = start; 898 for (;;) { 899 if (curoff < stop) { 900 pg = uvm_pagelookup(uobj, curoff); 901 curoff += PAGE_SIZE; 902 if (pg == NULL) 903 continue; 904 } else { 905 break; 906 } 907 908 /* Make sure page is unbusy, else wait for it. */ 909 if (pg->pg_flags & PG_BUSY) { 910 uvm_pagewait(pg, uobj->vmobjlock, "uaoflsh"); 911 rw_enter(uobj->vmobjlock, RW_WRITE); 912 curoff -= PAGE_SIZE; 913 continue; 914 } 915 916 switch (flags & (PGO_CLEANIT|PGO_FREE|PGO_DEACTIVATE)) { 917 /* 918 * XXX In these first 3 cases, we always just 919 * XXX deactivate the page. We may want to 920 * XXX handle the different cases more specifically 921 * XXX in the future. 922 */ 923 case PGO_CLEANIT|PGO_FREE: 924 /* FALLTHROUGH */ 925 case PGO_CLEANIT|PGO_DEACTIVATE: 926 /* FALLTHROUGH */ 927 case PGO_DEACTIVATE: 928 deactivate_it: 929 if (pg->wire_count != 0) 930 continue; 931 932 uvm_lock_pageq(); 933 pmap_page_protect(pg, PROT_NONE); 934 uvm_pagedeactivate(pg); 935 uvm_unlock_pageq(); 936 937 continue; 938 case PGO_FREE: 939 /* 940 * If there are multiple references to 941 * the object, just deactivate the page. 942 */ 943 if (uobj->uo_refs > 1) 944 goto deactivate_it; 945 946 /* XXX skip the page if it's wired */ 947 if (pg->wire_count != 0) 948 continue; 949 950 /* 951 * free the swap slot and the page. 952 */ 953 pmap_page_protect(pg, PROT_NONE); 954 955 /* 956 * freeing swapslot here is not strictly necessary. 957 * however, leaving it here doesn't save much 958 * because we need to update swap accounting anyway. 959 */ 960 uao_dropswap(uobj, pg->offset >> PAGE_SHIFT); 961 uvm_lock_pageq(); 962 uvm_pagefree(pg); 963 uvm_unlock_pageq(); 964 965 continue; 966 default: 967 panic("uao_flush: weird flags"); 968 } 969 } 970 971 return TRUE; 972 } 973 974 /* 975 * uao_get: fetch me a page 976 * 977 * we have three cases: 978 * 1: page is resident -> just return the page. 979 * 2: page is zero-fill -> allocate a new page and zero it. 980 * 3: page is swapped out -> fetch the page from swap. 981 * 982 * cases 1 can be handled with PGO_LOCKED, cases 2 and 3 cannot. 983 * so, if the "center" page hits case 3 (or any page, with PGO_ALLPAGES), 984 * then we will need to return VM_PAGER_UNLOCK. 985 * 986 * => flags: PGO_ALLPAGES: get all of the pages 987 * PGO_LOCKED: fault data structures are locked 988 * => NOTE: offset is the offset of pps[0], _NOT_ pps[centeridx] 989 * => NOTE: caller must check for released pages!! 990 */ 991 static int 992 uao_get(struct uvm_object *uobj, voff_t offset, struct vm_page **pps, 993 int *npagesp, int centeridx, vm_prot_t access_type, int advice, int flags) 994 { 995 struct uvm_aobj *aobj = (struct uvm_aobj *)uobj; 996 voff_t current_offset; 997 vm_page_t ptmp; 998 int lcv, gotpages, maxpages, swslot, rv, pageidx; 999 boolean_t done; 1000 1001 KASSERT(UVM_OBJ_IS_AOBJ(uobj)); 1002 KASSERT(rw_write_held(uobj->vmobjlock)); 1003 1004 /* 1005 * get number of pages 1006 */ 1007 maxpages = *npagesp; 1008 1009 if (flags & PGO_LOCKED) { 1010 /* 1011 * step 1a: get pages that are already resident. only do 1012 * this if the data structures are locked (i.e. the first 1013 * time through). 1014 */ 1015 1016 done = TRUE; /* be optimistic */ 1017 gotpages = 0; /* # of pages we got so far */ 1018 1019 for (lcv = 0, current_offset = offset ; lcv < maxpages ; 1020 lcv++, current_offset += PAGE_SIZE) { 1021 /* do we care about this page? if not, skip it */ 1022 if (pps[lcv] == PGO_DONTCARE) 1023 continue; 1024 1025 ptmp = uvm_pagelookup(uobj, current_offset); 1026 1027 /* 1028 * if page is new, attempt to allocate the page, 1029 * zero-fill'd. 1030 */ 1031 if (ptmp == NULL && uao_find_swslot(uobj, 1032 current_offset >> PAGE_SHIFT) == 0) { 1033 ptmp = uvm_pagealloc(uobj, current_offset, 1034 NULL, UVM_PGA_ZERO); 1035 if (ptmp) { 1036 /* new page */ 1037 atomic_clearbits_int(&ptmp->pg_flags, 1038 PG_BUSY|PG_FAKE); 1039 atomic_setbits_int(&ptmp->pg_flags, 1040 PQ_AOBJ); 1041 UVM_PAGE_OWN(ptmp, NULL); 1042 } 1043 } 1044 1045 /* 1046 * to be useful must get a non-busy page 1047 */ 1048 if (ptmp == NULL || 1049 (ptmp->pg_flags & PG_BUSY) != 0) { 1050 if (lcv == centeridx || 1051 (flags & PGO_ALLPAGES) != 0) 1052 /* need to do a wait or I/O! */ 1053 done = FALSE; 1054 continue; 1055 } 1056 1057 /* 1058 * useful page: plug it in our result array 1059 */ 1060 atomic_setbits_int(&ptmp->pg_flags, PG_BUSY); 1061 UVM_PAGE_OWN(ptmp, "uao_get1"); 1062 pps[lcv] = ptmp; 1063 gotpages++; 1064 1065 } 1066 1067 /* 1068 * step 1b: now we've either done everything needed or we 1069 * to unlock and do some waiting or I/O. 1070 */ 1071 *npagesp = gotpages; 1072 if (done) 1073 /* bingo! */ 1074 return VM_PAGER_OK; 1075 else 1076 /* EEK! Need to unlock and I/O */ 1077 return VM_PAGER_UNLOCK; 1078 } 1079 1080 /* 1081 * step 2: get non-resident or busy pages. 1082 * data structures are unlocked. 1083 */ 1084 for (lcv = 0, current_offset = offset ; lcv < maxpages ; 1085 lcv++, current_offset += PAGE_SIZE) { 1086 /* 1087 * - skip over pages we've already gotten or don't want 1088 * - skip over pages we don't _have_ to get 1089 */ 1090 if (pps[lcv] != NULL || 1091 (lcv != centeridx && (flags & PGO_ALLPAGES) == 0)) 1092 continue; 1093 1094 pageidx = current_offset >> PAGE_SHIFT; 1095 1096 /* 1097 * we have yet to locate the current page (pps[lcv]). we 1098 * first look for a page that is already at the current offset. 1099 * if we find a page, we check to see if it is busy or 1100 * released. if that is the case, then we sleep on the page 1101 * until it is no longer busy or released and repeat the lookup. 1102 * if the page we found is neither busy nor released, then we 1103 * busy it (so we own it) and plug it into pps[lcv]. this 1104 * 'break's the following while loop and indicates we are 1105 * ready to move on to the next page in the "lcv" loop above. 1106 * 1107 * if we exit the while loop with pps[lcv] still set to NULL, 1108 * then it means that we allocated a new busy/fake/clean page 1109 * ptmp in the object and we need to do I/O to fill in the data. 1110 */ 1111 1112 /* top of "pps" while loop */ 1113 while (pps[lcv] == NULL) { 1114 /* look for a resident page */ 1115 ptmp = uvm_pagelookup(uobj, current_offset); 1116 1117 /* not resident? allocate one now (if we can) */ 1118 if (ptmp == NULL) { 1119 1120 ptmp = uvm_pagealloc(uobj, current_offset, 1121 NULL, 0); 1122 1123 /* out of RAM? */ 1124 if (ptmp == NULL) { 1125 rw_exit(uobj->vmobjlock); 1126 uvm_wait("uao_getpage"); 1127 rw_enter(uobj->vmobjlock, RW_WRITE); 1128 /* goto top of pps while loop */ 1129 continue; 1130 } 1131 1132 /* 1133 * safe with PQ's unlocked: because we just 1134 * alloc'd the page 1135 */ 1136 atomic_setbits_int(&ptmp->pg_flags, PQ_AOBJ); 1137 1138 /* 1139 * got new page ready for I/O. break pps while 1140 * loop. pps[lcv] is still NULL. 1141 */ 1142 break; 1143 } 1144 1145 /* page is there, see if we need to wait on it */ 1146 if ((ptmp->pg_flags & PG_BUSY) != 0) { 1147 uvm_pagewait(ptmp, uobj->vmobjlock, "uao_get"); 1148 rw_enter(uobj->vmobjlock, RW_WRITE); 1149 continue; /* goto top of pps while loop */ 1150 } 1151 1152 /* 1153 * if we get here then the page is resident and 1154 * unbusy. we busy it now (so we own it). 1155 */ 1156 /* we own it, caller must un-busy */ 1157 atomic_setbits_int(&ptmp->pg_flags, PG_BUSY); 1158 UVM_PAGE_OWN(ptmp, "uao_get2"); 1159 pps[lcv] = ptmp; 1160 } 1161 1162 /* 1163 * if we own the valid page at the correct offset, pps[lcv] will 1164 * point to it. nothing more to do except go to the next page. 1165 */ 1166 if (pps[lcv]) 1167 continue; /* next lcv */ 1168 1169 /* 1170 * we have a "fake/busy/clean" page that we just allocated. 1171 * do the needed "i/o", either reading from swap or zeroing. 1172 */ 1173 swslot = uao_find_swslot(uobj, pageidx); 1174 1175 /* just zero the page if there's nothing in swap. */ 1176 if (swslot == 0) { 1177 /* page hasn't existed before, just zero it. */ 1178 uvm_pagezero(ptmp); 1179 } else { 1180 /* 1181 * page in the swapped-out page. 1182 * unlock object for i/o, relock when done. 1183 */ 1184 1185 rw_exit(uobj->vmobjlock); 1186 rv = uvm_swap_get(ptmp, swslot, PGO_SYNCIO); 1187 rw_enter(uobj->vmobjlock, RW_WRITE); 1188 1189 /* 1190 * I/O done. check for errors. 1191 */ 1192 if (rv != VM_PAGER_OK) { 1193 /* 1194 * remove the swap slot from the aobj 1195 * and mark the aobj as having no real slot. 1196 * don't free the swap slot, thus preventing 1197 * it from being used again. 1198 */ 1199 swslot = uao_set_swslot(&aobj->u_obj, pageidx, 1200 SWSLOT_BAD); 1201 uvm_swap_markbad(swslot, 1); 1202 1203 if (ptmp->pg_flags & PG_WANTED) 1204 wakeup(ptmp); 1205 atomic_clearbits_int(&ptmp->pg_flags, 1206 PG_WANTED|PG_BUSY); 1207 UVM_PAGE_OWN(ptmp, NULL); 1208 uvm_lock_pageq(); 1209 uvm_pagefree(ptmp); 1210 uvm_unlock_pageq(); 1211 rw_exit(uobj->vmobjlock); 1212 1213 return rv; 1214 } 1215 } 1216 1217 /* 1218 * we got the page! clear the fake flag (indicates valid 1219 * data now in page) and plug into our result array. note 1220 * that page is still busy. 1221 * 1222 * it is the callers job to: 1223 * => check if the page is released 1224 * => unbusy the page 1225 * => activate the page 1226 */ 1227 atomic_clearbits_int(&ptmp->pg_flags, PG_FAKE); 1228 pmap_clear_modify(ptmp); /* ... and clean */ 1229 pps[lcv] = ptmp; 1230 1231 } /* lcv loop */ 1232 1233 rw_exit(uobj->vmobjlock); 1234 return VM_PAGER_OK; 1235 } 1236 1237 /* 1238 * uao_dropswap: release any swap resources from this aobj page. 1239 * 1240 * => aobj must be locked or have a reference count of 0. 1241 */ 1242 int 1243 uao_dropswap(struct uvm_object *uobj, int pageidx) 1244 { 1245 int slot; 1246 1247 KASSERT(UVM_OBJ_IS_AOBJ(uobj)); 1248 1249 slot = uao_set_swslot(uobj, pageidx, 0); 1250 if (slot) { 1251 uvm_swap_free(slot, 1); 1252 } 1253 return slot; 1254 } 1255 1256 /* 1257 * page in every page in every aobj that is paged-out to a range of swslots. 1258 * 1259 * => aobj must be locked and is returned locked. 1260 * => returns TRUE if pagein was aborted due to lack of memory. 1261 */ 1262 boolean_t 1263 uao_swap_off(int startslot, int endslot) 1264 { 1265 struct uvm_aobj *aobj; 1266 1267 /* 1268 * Walk the list of all anonymous UVM objects. Grab the first. 1269 */ 1270 mtx_enter(&uao_list_lock); 1271 if ((aobj = LIST_FIRST(&uao_list)) == NULL) { 1272 mtx_leave(&uao_list_lock); 1273 return FALSE; 1274 } 1275 uao_reference(&aobj->u_obj); 1276 1277 do { 1278 struct uvm_aobj *nextaobj; 1279 boolean_t rv; 1280 1281 /* 1282 * Prefetch the next object and immediately hold a reference 1283 * on it, so neither the current nor the next entry could 1284 * disappear while we are iterating. 1285 */ 1286 if ((nextaobj = LIST_NEXT(aobj, u_list)) != NULL) { 1287 uao_reference(&nextaobj->u_obj); 1288 } 1289 mtx_leave(&uao_list_lock); 1290 1291 /* 1292 * Page in all pages in the swap slot range. 1293 */ 1294 rw_enter(aobj->u_obj.vmobjlock, RW_WRITE); 1295 rv = uao_pagein(aobj, startslot, endslot); 1296 rw_exit(aobj->u_obj.vmobjlock); 1297 1298 /* Drop the reference of the current object. */ 1299 uao_detach(&aobj->u_obj); 1300 if (rv) { 1301 if (nextaobj) { 1302 uao_detach(&nextaobj->u_obj); 1303 } 1304 return rv; 1305 } 1306 1307 aobj = nextaobj; 1308 mtx_enter(&uao_list_lock); 1309 } while (aobj); 1310 1311 /* 1312 * done with traversal, unlock the list 1313 */ 1314 mtx_leave(&uao_list_lock); 1315 return FALSE; 1316 } 1317 1318 /* 1319 * page in any pages from aobj in the given range. 1320 * 1321 * => returns TRUE if pagein was aborted due to lack of memory. 1322 */ 1323 static boolean_t 1324 uao_pagein(struct uvm_aobj *aobj, int startslot, int endslot) 1325 { 1326 boolean_t rv; 1327 1328 if (UAO_USES_SWHASH(aobj)) { 1329 struct uao_swhash_elt *elt; 1330 int bucket; 1331 1332 restart: 1333 for (bucket = aobj->u_swhashmask; bucket >= 0; bucket--) { 1334 for (elt = LIST_FIRST(&aobj->u_swhash[bucket]); 1335 elt != NULL; 1336 elt = LIST_NEXT(elt, list)) { 1337 int i; 1338 1339 for (i = 0; i < UAO_SWHASH_CLUSTER_SIZE; i++) { 1340 int slot = elt->slots[i]; 1341 1342 /* 1343 * if the slot isn't in range, skip it. 1344 */ 1345 if (slot < startslot || 1346 slot >= endslot) { 1347 continue; 1348 } 1349 1350 /* 1351 * process the page, 1352 * the start over on this object 1353 * since the swhash elt 1354 * may have been freed. 1355 */ 1356 rv = uao_pagein_page(aobj, 1357 UAO_SWHASH_ELT_PAGEIDX_BASE(elt) + i); 1358 if (rv) { 1359 return rv; 1360 } 1361 goto restart; 1362 } 1363 } 1364 } 1365 } else { 1366 int i; 1367 1368 for (i = 0; i < aobj->u_pages; i++) { 1369 int slot = aobj->u_swslots[i]; 1370 1371 /* 1372 * if the slot isn't in range, skip it 1373 */ 1374 if (slot < startslot || slot >= endslot) { 1375 continue; 1376 } 1377 1378 /* 1379 * process the page. 1380 */ 1381 rv = uao_pagein_page(aobj, i); 1382 if (rv) { 1383 return rv; 1384 } 1385 } 1386 } 1387 1388 return FALSE; 1389 } 1390 1391 /* 1392 * uao_pagein_page: page in a single page from an anonymous UVM object. 1393 * 1394 * => Returns TRUE if pagein was aborted due to lack of memory. 1395 */ 1396 static boolean_t 1397 uao_pagein_page(struct uvm_aobj *aobj, int pageidx) 1398 { 1399 struct uvm_object *uobj = &aobj->u_obj; 1400 struct vm_page *pg; 1401 int rv, slot, npages; 1402 1403 pg = NULL; 1404 npages = 1; 1405 1406 KASSERT(rw_write_held(uobj->vmobjlock)); 1407 rv = uao_get(&aobj->u_obj, (voff_t)pageidx << PAGE_SHIFT, 1408 &pg, &npages, 0, PROT_READ | PROT_WRITE, 0, 0); 1409 1410 /* 1411 * relock and finish up. 1412 */ 1413 rw_enter(uobj->vmobjlock, RW_WRITE); 1414 switch (rv) { 1415 case VM_PAGER_OK: 1416 break; 1417 1418 case VM_PAGER_ERROR: 1419 case VM_PAGER_REFAULT: 1420 /* 1421 * nothing more to do on errors. 1422 * VM_PAGER_REFAULT can only mean that the anon was freed, 1423 * so again there's nothing to do. 1424 */ 1425 return FALSE; 1426 } 1427 1428 /* 1429 * ok, we've got the page now. 1430 * mark it as dirty, clear its swslot and un-busy it. 1431 */ 1432 slot = uao_set_swslot(&aobj->u_obj, pageidx, 0); 1433 uvm_swap_free(slot, 1); 1434 atomic_clearbits_int(&pg->pg_flags, PG_BUSY|PG_CLEAN|PG_FAKE); 1435 UVM_PAGE_OWN(pg, NULL); 1436 1437 /* 1438 * deactivate the page (to put it on a page queue). 1439 */ 1440 pmap_clear_reference(pg); 1441 uvm_lock_pageq(); 1442 uvm_pagedeactivate(pg); 1443 uvm_unlock_pageq(); 1444 1445 return FALSE; 1446 } 1447 1448 /* 1449 * uao_dropswap_range: drop swapslots in the range. 1450 * 1451 * => aobj must be locked and is returned locked. 1452 * => start is inclusive. end is exclusive. 1453 */ 1454 void 1455 uao_dropswap_range(struct uvm_object *uobj, voff_t start, voff_t end) 1456 { 1457 struct uvm_aobj *aobj = (struct uvm_aobj *)uobj; 1458 int swpgonlydelta = 0; 1459 1460 KASSERT(UVM_OBJ_IS_AOBJ(uobj)); 1461 KASSERT(rw_write_held(uobj->vmobjlock)); 1462 1463 if (end == 0) { 1464 end = INT64_MAX; 1465 } 1466 1467 if (UAO_USES_SWHASH(aobj)) { 1468 int i, hashbuckets = aobj->u_swhashmask + 1; 1469 voff_t taghi; 1470 voff_t taglo; 1471 1472 taglo = UAO_SWHASH_ELT_TAG(start); 1473 taghi = UAO_SWHASH_ELT_TAG(end); 1474 1475 for (i = 0; i < hashbuckets; i++) { 1476 struct uao_swhash_elt *elt, *next; 1477 1478 for (elt = LIST_FIRST(&aobj->u_swhash[i]); 1479 elt != NULL; 1480 elt = next) { 1481 int startidx, endidx; 1482 int j; 1483 1484 next = LIST_NEXT(elt, list); 1485 1486 if (elt->tag < taglo || taghi < elt->tag) { 1487 continue; 1488 } 1489 1490 if (elt->tag == taglo) { 1491 startidx = 1492 UAO_SWHASH_ELT_PAGESLOT_IDX(start); 1493 } else { 1494 startidx = 0; 1495 } 1496 1497 if (elt->tag == taghi) { 1498 endidx = 1499 UAO_SWHASH_ELT_PAGESLOT_IDX(end); 1500 } else { 1501 endidx = UAO_SWHASH_CLUSTER_SIZE; 1502 } 1503 1504 for (j = startidx; j < endidx; j++) { 1505 int slot = elt->slots[j]; 1506 1507 KASSERT(uvm_pagelookup(&aobj->u_obj, 1508 (voff_t)(UAO_SWHASH_ELT_PAGEIDX_BASE(elt) 1509 + j) << PAGE_SHIFT) == NULL); 1510 1511 if (slot > 0) { 1512 uvm_swap_free(slot, 1); 1513 swpgonlydelta++; 1514 KASSERT(elt->count > 0); 1515 elt->slots[j] = 0; 1516 elt->count--; 1517 } 1518 } 1519 1520 if (elt->count == 0) { 1521 LIST_REMOVE(elt, list); 1522 pool_put(&uao_swhash_elt_pool, elt); 1523 } 1524 } 1525 } 1526 } else { 1527 int i; 1528 1529 if (aobj->u_pages < end) { 1530 end = aobj->u_pages; 1531 } 1532 for (i = start; i < end; i++) { 1533 int slot = aobj->u_swslots[i]; 1534 1535 if (slot > 0) { 1536 uvm_swap_free(slot, 1); 1537 swpgonlydelta++; 1538 } 1539 } 1540 } 1541 1542 /* 1543 * adjust the counter of pages only in swap for all 1544 * the swap slots we've freed. 1545 */ 1546 if (swpgonlydelta > 0) { 1547 KASSERT(uvmexp.swpgonly >= swpgonlydelta); 1548 atomic_add_int(&uvmexp.swpgonly, -swpgonlydelta); 1549 } 1550 } 1551