1 /* $NetBSD: uvm_amap.c,v 1.48 2002/11/30 18:28:04 bouyer Exp $ */ 2 3 /* 4 * 5 * Copyright (c) 1997 Charles D. Cranor and Washington University. 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by Charles D. Cranor and 19 * Washington University. 20 * 4. The name of the author may not be used to endorse or promote products 21 * derived from this software without specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 24 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 25 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 26 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 27 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 28 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 29 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 30 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 31 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 32 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 33 */ 34 35 /* 36 * uvm_amap.c: amap operations 37 */ 38 39 /* 40 * this file contains functions that perform operations on amaps. see 41 * uvm_amap.h for a brief explanation of the role of amaps in uvm. 42 */ 43 44 #include <sys/cdefs.h> 45 __KERNEL_RCSID(0, "$NetBSD: uvm_amap.c,v 1.48 2002/11/30 18:28:04 bouyer Exp $"); 46 47 #undef UVM_AMAP_INLINE /* enable/disable amap inlines */ 48 49 #include "opt_uvmhist.h" 50 51 #include <sys/param.h> 52 #include <sys/systm.h> 53 #include <sys/proc.h> 54 #include <sys/malloc.h> 55 #include <sys/kernel.h> 56 #include <sys/pool.h> 57 58 #define UVM_AMAP_C /* ensure disabled inlines are in */ 59 #include <uvm/uvm.h> 60 #include <uvm/uvm_swap.h> 61 62 /* 63 * pool for allocation of vm_map structures. note that the pool has 64 * its own simplelock for its protection. also note that in order to 65 * avoid an endless loop, the amap pool's allocator cannot allocate 66 * memory from an amap (it currently goes through the kernel uobj, so 67 * we are ok). 68 */ 69 70 struct pool uvm_amap_pool; 71 72 /* 73 * local functions 74 */ 75 76 static struct vm_amap *amap_alloc1 __P((int, int, int)); 77 78 #ifdef UVM_AMAP_PPREF 79 /* 80 * what is ppref? ppref is an _optional_ amap feature which is used 81 * to keep track of reference counts on a per-page basis. it is enabled 82 * when UVM_AMAP_PPREF is defined. 83 * 84 * when enabled, an array of ints is allocated for the pprefs. this 85 * array is allocated only when a partial reference is added to the 86 * map (either by unmapping part of the amap, or gaining a reference 87 * to only a part of an amap). if the malloc of the array fails 88 * (M_NOWAIT), then we set the array pointer to PPREF_NONE to indicate 89 * that we tried to do ppref's but couldn't alloc the array so just 90 * give up (after all, this is an optional feature!). 91 * 92 * the array is divided into page sized "chunks." for chunks of length 1, 93 * the chunk reference count plus one is stored in that chunk's slot. 94 * for chunks of length > 1 the first slot contains (the reference count 95 * plus one) * -1. [the negative value indicates that the length is 96 * greater than one.] the second slot of the chunk contains the length 97 * of the chunk. here is an example: 98 * 99 * actual REFS: 2 2 2 2 3 1 1 0 0 0 4 4 0 1 1 1 100 * ppref: -3 4 x x 4 -2 2 -1 3 x -5 2 1 -2 3 x 101 * <----------><-><----><-------><----><-><-------> 102 * (x = don't care) 103 * 104 * this allows us to allow one int to contain the ref count for the whole 105 * chunk. note that the "plus one" part is needed because a reference 106 * count of zero is neither positive or negative (need a way to tell 107 * if we've got one zero or a bunch of them). 108 * 109 * here are some in-line functions to help us. 110 */ 111 112 static __inline void pp_getreflen __P((int *, int, int *, int *)); 113 static __inline void pp_setreflen __P((int *, int, int, int)); 114 115 /* 116 * pp_getreflen: get the reference and length for a specific offset 117 * 118 * => ppref's amap must be locked 119 */ 120 static __inline void 121 pp_getreflen(ppref, offset, refp, lenp) 122 int *ppref, offset, *refp, *lenp; 123 { 124 125 if (ppref[offset] > 0) { /* chunk size must be 1 */ 126 *refp = ppref[offset] - 1; /* don't forget to adjust */ 127 *lenp = 1; 128 } else { 129 *refp = (ppref[offset] * -1) - 1; 130 *lenp = ppref[offset+1]; 131 } 132 } 133 134 /* 135 * pp_setreflen: set the reference and length for a specific offset 136 * 137 * => ppref's amap must be locked 138 */ 139 static __inline void 140 pp_setreflen(ppref, offset, ref, len) 141 int *ppref, offset, ref, len; 142 { 143 if (len == 1) { 144 ppref[offset] = ref + 1; 145 } else { 146 ppref[offset] = (ref + 1) * -1; 147 ppref[offset+1] = len; 148 } 149 } 150 #endif 151 152 /* 153 * amap_init: called at boot time to init global amap data structures 154 */ 155 156 void 157 amap_init(void) 158 { 159 160 /* 161 * Initialize the vm_amap pool. 162 */ 163 164 pool_init(&uvm_amap_pool, sizeof(struct vm_amap), 0, 0, 0, 165 "amappl", &pool_allocator_nointr); 166 } 167 168 /* 169 * amap_alloc1: internal function that allocates an amap, but does not 170 * init the overlay. 171 * 172 * => lock on returned amap is init'd 173 */ 174 static inline struct vm_amap * 175 amap_alloc1(slots, padslots, waitf) 176 int slots, padslots, waitf; 177 { 178 struct vm_amap *amap; 179 int totalslots; 180 181 amap = pool_get(&uvm_amap_pool, (waitf == M_WAITOK) ? PR_WAITOK : 0); 182 if (amap == NULL) 183 return(NULL); 184 185 totalslots = malloc_roundup((slots + padslots) * sizeof(int)) / 186 sizeof(int); 187 simple_lock_init(&amap->am_l); 188 amap->am_ref = 1; 189 amap->am_flags = 0; 190 #ifdef UVM_AMAP_PPREF 191 amap->am_ppref = NULL; 192 #endif 193 amap->am_maxslot = totalslots; 194 amap->am_nslot = slots; 195 amap->am_nused = 0; 196 197 amap->am_slots = malloc(totalslots * sizeof(int), M_UVMAMAP, 198 waitf); 199 if (amap->am_slots == NULL) 200 goto fail1; 201 202 amap->am_bckptr = malloc(totalslots * sizeof(int), M_UVMAMAP, waitf); 203 if (amap->am_bckptr == NULL) 204 goto fail2; 205 206 amap->am_anon = malloc(totalslots * sizeof(struct vm_anon *), 207 M_UVMAMAP, waitf); 208 if (amap->am_anon == NULL) 209 goto fail3; 210 211 return(amap); 212 213 fail3: 214 free(amap->am_bckptr, M_UVMAMAP); 215 fail2: 216 free(amap->am_slots, M_UVMAMAP); 217 fail1: 218 pool_put(&uvm_amap_pool, amap); 219 return (NULL); 220 } 221 222 /* 223 * amap_alloc: allocate an amap to manage "sz" bytes of anonymous VM 224 * 225 * => caller should ensure sz is a multiple of PAGE_SIZE 226 * => reference count to new amap is set to one 227 * => new amap is returned unlocked 228 */ 229 230 struct vm_amap * 231 amap_alloc(sz, padsz, waitf) 232 vaddr_t sz, padsz; 233 int waitf; 234 { 235 struct vm_amap *amap; 236 int slots, padslots; 237 UVMHIST_FUNC("amap_alloc"); UVMHIST_CALLED(maphist); 238 239 AMAP_B2SLOT(slots, sz); 240 AMAP_B2SLOT(padslots, padsz); 241 242 amap = amap_alloc1(slots, padslots, waitf); 243 if (amap) 244 memset(amap->am_anon, 0, 245 amap->am_maxslot * sizeof(struct vm_anon *)); 246 247 UVMHIST_LOG(maphist,"<- done, amap = 0x%x, sz=%d", amap, sz, 0, 0); 248 return(amap); 249 } 250 251 252 /* 253 * amap_free: free an amap 254 * 255 * => the amap must be unlocked 256 * => the amap should have a zero reference count and be empty 257 */ 258 void 259 amap_free(amap) 260 struct vm_amap *amap; 261 { 262 UVMHIST_FUNC("amap_free"); UVMHIST_CALLED(maphist); 263 264 KASSERT(amap->am_ref == 0 && amap->am_nused == 0); 265 LOCK_ASSERT(!simple_lock_held(&amap->am_l)); 266 free(amap->am_slots, M_UVMAMAP); 267 free(amap->am_bckptr, M_UVMAMAP); 268 free(amap->am_anon, M_UVMAMAP); 269 #ifdef UVM_AMAP_PPREF 270 if (amap->am_ppref && amap->am_ppref != PPREF_NONE) 271 free(amap->am_ppref, M_UVMAMAP); 272 #endif 273 pool_put(&uvm_amap_pool, amap); 274 UVMHIST_LOG(maphist,"<- done, freed amap = 0x%x", amap, 0, 0, 0); 275 } 276 277 /* 278 * amap_extend: extend the size of an amap (if needed) 279 * 280 * => called from uvm_map when we want to extend an amap to cover 281 * a new mapping (rather than allocate a new one) 282 * => amap should be unlocked (we will lock it) 283 * => to safely extend an amap it should have a reference count of 284 * one (thus it can't be shared) 285 */ 286 int 287 amap_extend(entry, addsize, flags) 288 struct vm_map_entry *entry; 289 vsize_t addsize; 290 int flags; 291 { 292 struct vm_amap *amap = entry->aref.ar_amap; 293 int slotoff = entry->aref.ar_pageoff; 294 int slotmapped, slotadd, slotneed, slotadded, slotalloc; 295 int slotadj, slotspace; 296 #ifdef UVM_AMAP_PPREF 297 int *newppref, *oldppref; 298 #endif 299 int i, *newsl, *newbck, *oldsl, *oldbck; 300 struct vm_anon **newover, **oldover; 301 int mflag = (flags & AMAP_EXTEND_NOWAIT) ? M_NOWAIT : 302 (M_WAITOK | M_CANFAIL); 303 304 UVMHIST_FUNC("amap_extend"); UVMHIST_CALLED(maphist); 305 306 UVMHIST_LOG(maphist, " (entry=0x%x, addsize=0x%x, flags=0x%x)", 307 entry, addsize, flags, 0); 308 309 /* 310 * first, determine how many slots we need in the amap. don't 311 * forget that ar_pageoff could be non-zero: this means that 312 * there are some unused slots before us in the amap. 313 */ 314 315 amap_lock(amap); 316 AMAP_B2SLOT(slotmapped, entry->end - entry->start); /* slots mapped */ 317 AMAP_B2SLOT(slotadd, addsize); /* slots to add */ 318 if (flags & AMAP_EXTEND_FORWARDS) { 319 slotneed = slotoff + slotmapped + slotadd; 320 slotadj = 0; 321 slotspace = 0; 322 } 323 else { 324 slotneed = slotadd + slotmapped; 325 slotadj = slotadd - slotoff; 326 slotspace = amap->am_maxslot - slotmapped; 327 } 328 329 /* 330 * case 1: we already have enough slots in the map and thus 331 * only need to bump the reference counts on the slots we are 332 * adding. 333 */ 334 335 if (flags & AMAP_EXTEND_FORWARDS) { 336 if (amap->am_nslot >= slotneed) { 337 #ifdef UVM_AMAP_PPREF 338 if (amap->am_ppref && amap->am_ppref != PPREF_NONE) { 339 amap_pp_adjref(amap, slotoff + slotmapped, 340 slotadd, 1); 341 } 342 #endif 343 amap_unlock(amap); 344 UVMHIST_LOG(maphist, 345 "<- done (case 1f), amap = 0x%x, sltneed=%d", 346 amap, slotneed, 0, 0); 347 return 0; 348 } 349 } else { 350 if (slotadj <= 0) { 351 slotoff -= slotadd; 352 entry->aref.ar_pageoff = slotoff; 353 #ifdef UVM_AMAP_PPREF 354 if (amap->am_ppref && amap->am_ppref != PPREF_NONE) { 355 amap_pp_adjref(amap, slotoff, slotadd, 1); 356 } 357 #endif 358 amap_unlock(amap); 359 UVMHIST_LOG(maphist, 360 "<- done (case 1b), amap = 0x%x, sltneed=%d", 361 amap, slotneed, 0, 0); 362 return 0; 363 } 364 } 365 366 /* 367 * case 2: we pre-allocated slots for use and we just need to 368 * bump nslot up to take account for these slots. 369 */ 370 371 if (amap->am_maxslot >= slotneed) { 372 if (flags & AMAP_EXTEND_FORWARDS) { 373 #ifdef UVM_AMAP_PPREF 374 if (amap->am_ppref && amap->am_ppref != PPREF_NONE) { 375 if ((slotoff + slotmapped) < amap->am_nslot) 376 amap_pp_adjref(amap, 377 slotoff + slotmapped, 378 (amap->am_nslot - 379 (slotoff + slotmapped)), 1); 380 pp_setreflen(amap->am_ppref, amap->am_nslot, 1, 381 slotneed - amap->am_nslot); 382 } 383 #endif 384 amap->am_nslot = slotneed; 385 amap_unlock(amap); 386 387 /* 388 * no need to zero am_anon since that was done at 389 * alloc time and we never shrink an allocation. 390 */ 391 392 UVMHIST_LOG(maphist,"<- done (case 2f), amap = 0x%x, " 393 "slotneed=%d", amap, slotneed, 0, 0); 394 return 0; 395 } else { 396 #ifdef UVM_AMAP_PPREF 397 if (amap->am_ppref && amap->am_ppref != PPREF_NONE) { 398 /* 399 * Slide up the ref counts on the pages that 400 * are actually in use. 401 */ 402 memmove(amap->am_ppref + slotspace, 403 amap->am_ppref + slotoff, 404 slotmapped * sizeof(int)); 405 /* 406 * Mark the (adjusted) gap at the front as 407 * referenced/not referenced. 408 */ 409 pp_setreflen(amap->am_ppref, 410 0, 0, slotspace - slotadd); 411 pp_setreflen(amap->am_ppref, 412 slotspace - slotadd, 1, slotadd); 413 } 414 #endif 415 416 /* 417 * Slide the anon pointers up and clear out 418 * the space we just made. 419 */ 420 memmove(amap->am_anon + slotspace, 421 amap->am_anon + slotoff, 422 slotmapped * sizeof(struct vm_anon*)); 423 memset(amap->am_anon + slotoff, 0, 424 (slotspace - slotoff) * sizeof(struct vm_anon *)); 425 426 /* 427 * Slide the backpointers up, but don't bother 428 * wiping out the old slots. 429 */ 430 memmove(amap->am_bckptr + slotspace, 431 amap->am_bckptr + slotoff, 432 slotmapped * sizeof(int)); 433 434 /* 435 * Adjust all the useful active slot numbers. 436 */ 437 for (i = 0; i < amap->am_nused; i++) 438 amap->am_slots[i] += (slotspace - slotoff); 439 440 /* 441 * We just filled all the empty space in the 442 * front of the amap by activating a few new 443 * slots. 444 */ 445 amap->am_nslot = amap->am_maxslot; 446 entry->aref.ar_pageoff = slotspace - slotadd; 447 amap_unlock(amap); 448 449 UVMHIST_LOG(maphist,"<- done (case 2b), amap = 0x%x, " 450 "slotneed=%d", amap, slotneed, 0, 0); 451 return 0; 452 } 453 } 454 455 /* 456 * case 3: we need to malloc a new amap and copy all the amap 457 * data over from old amap to the new one. 458 * 459 * XXXCDC: could we take advantage of a kernel realloc()? 460 */ 461 462 amap_unlock(amap); /* unlock in case we sleep in malloc */ 463 slotalloc = malloc_roundup(slotneed * sizeof(int)) / sizeof(int); 464 #ifdef UVM_AMAP_PPREF 465 newppref = NULL; 466 if (amap->am_ppref && amap->am_ppref != PPREF_NONE) 467 newppref = malloc(slotalloc * sizeof(int), M_UVMAMAP, mflag); 468 #endif 469 newsl = malloc(slotalloc * sizeof(int), M_UVMAMAP, mflag); 470 newbck = malloc(slotalloc * sizeof(int), M_UVMAMAP, mflag); 471 newover = malloc(slotalloc * sizeof(struct vm_anon *), M_UVMAMAP, 472 mflag); 473 if (newsl == NULL || newbck == NULL || newover == NULL) { 474 #ifdef UVM_AMAP_PPREF 475 if (newppref != NULL) { 476 free(newppref, M_UVMAMAP); 477 } 478 #endif 479 if (newsl != NULL) { 480 free(newsl, M_UVMAMAP); 481 } 482 if (newbck != NULL) { 483 free(newbck, M_UVMAMAP); 484 } 485 if (newover != NULL) { 486 free(newover, M_UVMAMAP); 487 } 488 return ENOMEM; 489 } 490 amap_lock(amap); 491 KASSERT(amap->am_maxslot < slotneed); 492 493 /* 494 * now copy everything over to new malloc'd areas... 495 */ 496 497 slotadded = slotalloc - amap->am_nslot; 498 if (!(flags & AMAP_EXTEND_FORWARDS)) 499 slotspace = slotalloc - slotmapped; 500 501 /* do am_slots */ 502 oldsl = amap->am_slots; 503 if (flags & AMAP_EXTEND_FORWARDS) 504 memcpy(newsl, oldsl, sizeof(int) * amap->am_nused); 505 else 506 for (i = 0; i < amap->am_nused; i++) 507 newsl[i] = oldsl[i] + slotspace - slotoff; 508 amap->am_slots = newsl; 509 510 /* do am_anon */ 511 oldover = amap->am_anon; 512 if (flags & AMAP_EXTEND_FORWARDS) { 513 memcpy(newover, oldover, 514 sizeof(struct vm_anon *) * amap->am_nslot); 515 memset(newover + amap->am_nslot, 0, 516 sizeof(struct vm_anon *) * slotadded); 517 } else { 518 memcpy(newover + slotspace, oldover + slotoff, 519 sizeof(struct vm_anon *) * slotmapped); 520 memset(newover, 0, 521 sizeof(struct vm_anon *) * slotspace); 522 } 523 amap->am_anon = newover; 524 525 /* do am_bckptr */ 526 oldbck = amap->am_bckptr; 527 if (flags & AMAP_EXTEND_FORWARDS) 528 memcpy(newbck, oldbck, sizeof(int) * amap->am_nslot); 529 else 530 memcpy(newbck + slotspace, oldbck + slotoff, 531 sizeof(int) * slotmapped); 532 amap->am_bckptr = newbck; 533 534 #ifdef UVM_AMAP_PPREF 535 /* do ppref */ 536 oldppref = amap->am_ppref; 537 if (newppref) { 538 if (flags & AMAP_EXTEND_FORWARDS) { 539 memcpy(newppref, oldppref, 540 sizeof(int) * amap->am_nslot); 541 memset(newppref + amap->am_nslot, 0, 542 sizeof(int) * slotadded); 543 } else { 544 memcpy(newppref + slotspace, oldppref + slotoff, 545 sizeof(int) * slotmapped); 546 } 547 amap->am_ppref = newppref; 548 if ((flags & AMAP_EXTEND_FORWARDS) && 549 (slotoff + slotmapped) < amap->am_nslot) 550 amap_pp_adjref(amap, slotoff + slotmapped, 551 (amap->am_nslot - (slotoff + slotmapped)), 1); 552 if (flags & AMAP_EXTEND_FORWARDS) 553 pp_setreflen(newppref, amap->am_nslot, 1, 554 slotneed - amap->am_nslot); 555 else { 556 pp_setreflen(newppref, 0, 0, 557 slotalloc - slotneed); 558 pp_setreflen(newppref, slotalloc - slotneed, 1, 559 slotneed - slotmapped); 560 } 561 } else { 562 if (amap->am_ppref) 563 amap->am_ppref = PPREF_NONE; 564 } 565 #endif 566 567 /* update master values */ 568 if (flags & AMAP_EXTEND_FORWARDS) 569 amap->am_nslot = slotneed; 570 else { 571 entry->aref.ar_pageoff = slotspace - slotadd; 572 amap->am_nslot = slotalloc; 573 } 574 amap->am_maxslot = slotalloc; 575 576 amap_unlock(amap); 577 free(oldsl, M_UVMAMAP); 578 free(oldbck, M_UVMAMAP); 579 free(oldover, M_UVMAMAP); 580 #ifdef UVM_AMAP_PPREF 581 if (oldppref && oldppref != PPREF_NONE) 582 free(oldppref, M_UVMAMAP); 583 #endif 584 UVMHIST_LOG(maphist,"<- done (case 3), amap = 0x%x, slotneed=%d", 585 amap, slotneed, 0, 0); 586 return 0; 587 } 588 589 /* 590 * amap_share_protect: change protection of anons in a shared amap 591 * 592 * for shared amaps, given the current data structure layout, it is 593 * not possible for us to directly locate all maps referencing the 594 * shared anon (to change the protection). in order to protect data 595 * in shared maps we use pmap_page_protect(). [this is useful for IPC 596 * mechanisms like map entry passing that may want to write-protect 597 * all mappings of a shared amap.] we traverse am_anon or am_slots 598 * depending on the current state of the amap. 599 * 600 * => entry's map and amap must be locked by the caller 601 */ 602 void 603 amap_share_protect(entry, prot) 604 struct vm_map_entry *entry; 605 vm_prot_t prot; 606 { 607 struct vm_amap *amap = entry->aref.ar_amap; 608 int slots, lcv, slot, stop; 609 610 LOCK_ASSERT(simple_lock_held(&amap->am_l)); 611 612 AMAP_B2SLOT(slots, (entry->end - entry->start)); 613 stop = entry->aref.ar_pageoff + slots; 614 615 if (slots < amap->am_nused) { 616 /* cheaper to traverse am_anon */ 617 for (lcv = entry->aref.ar_pageoff ; lcv < stop ; lcv++) { 618 if (amap->am_anon[lcv] == NULL) 619 continue; 620 if (amap->am_anon[lcv]->u.an_page != NULL) 621 pmap_page_protect(amap->am_anon[lcv]->u.an_page, 622 prot); 623 } 624 return; 625 } 626 627 /* cheaper to traverse am_slots */ 628 for (lcv = 0 ; lcv < amap->am_nused ; lcv++) { 629 slot = amap->am_slots[lcv]; 630 if (slot < entry->aref.ar_pageoff || slot >= stop) 631 continue; 632 if (amap->am_anon[slot]->u.an_page != NULL) 633 pmap_page_protect(amap->am_anon[slot]->u.an_page, prot); 634 } 635 } 636 637 /* 638 * amap_wipeout: wipeout all anon's in an amap; then free the amap! 639 * 640 * => called from amap_unref when the final reference to an amap is 641 * discarded (i.e. when reference count == 1) 642 * => the amap should be locked (by the caller) 643 */ 644 645 void 646 amap_wipeout(amap) 647 struct vm_amap *amap; 648 { 649 int lcv, slot; 650 struct vm_anon *anon; 651 UVMHIST_FUNC("amap_wipeout"); UVMHIST_CALLED(maphist); 652 UVMHIST_LOG(maphist,"(amap=0x%x)", amap, 0,0,0); 653 654 amap_unlock(amap); 655 for (lcv = 0 ; lcv < amap->am_nused ; lcv++) { 656 int refs; 657 658 slot = amap->am_slots[lcv]; 659 anon = amap->am_anon[slot]; 660 661 if (anon == NULL || anon->an_ref == 0) 662 panic("amap_wipeout: corrupt amap"); 663 664 simple_lock(&anon->an_lock); 665 UVMHIST_LOG(maphist," processing anon 0x%x, ref=%d", anon, 666 anon->an_ref, 0, 0); 667 refs = --anon->an_ref; 668 simple_unlock(&anon->an_lock); 669 if (refs == 0) { 670 671 /* 672 * we had the last reference to a vm_anon. free it. 673 */ 674 675 uvm_anfree(anon); 676 } 677 678 /* 679 * XXX 680 * releasing the swap space held by an N anons is an O(N^2) 681 * operation because of the implementation of extents. 682 * if there are many anons, tearing down an exiting process' 683 * address space can take many seconds, which causes very 684 * annoying pauses. we yield here to give other processes 685 * a chance to run. this should be removed once the performance 686 * of swap space management is improved. 687 */ 688 689 if (curproc->p_cpu->ci_schedstate.spc_flags & SPCF_SHOULDYIELD) 690 preempt(NULL); 691 } 692 693 /* 694 * now we free the map 695 */ 696 697 amap->am_ref = 0; /* ... was one */ 698 amap->am_nused = 0; 699 amap_free(amap); /* will unlock and free amap */ 700 UVMHIST_LOG(maphist,"<- done!", 0,0,0,0); 701 } 702 703 /* 704 * amap_copy: ensure that a map entry's "needs_copy" flag is false 705 * by copying the amap if necessary. 706 * 707 * => an entry with a null amap pointer will get a new (blank) one. 708 * => the map that the map entry belongs to must be locked by caller. 709 * => the amap currently attached to "entry" (if any) must be unlocked. 710 * => if canchunk is true, then we may clip the entry into a chunk 711 * => "startva" and "endva" are used only if canchunk is true. they are 712 * used to limit chunking (e.g. if you have a large space that you 713 * know you are going to need to allocate amaps for, there is no point 714 * in allowing that to be chunked) 715 */ 716 717 void 718 amap_copy(map, entry, waitf, canchunk, startva, endva) 719 struct vm_map *map; 720 struct vm_map_entry *entry; 721 int waitf; 722 boolean_t canchunk; 723 vaddr_t startva, endva; 724 { 725 struct vm_amap *amap, *srcamap; 726 int slots, lcv; 727 vaddr_t chunksize; 728 UVMHIST_FUNC("amap_copy"); UVMHIST_CALLED(maphist); 729 UVMHIST_LOG(maphist, " (map=%p, entry=%p, waitf=%d)", 730 map, entry, waitf, 0); 731 732 /* 733 * is there a map to copy? if not, create one from scratch. 734 */ 735 736 if (entry->aref.ar_amap == NULL) { 737 738 /* 739 * check to see if we have a large amap that we can 740 * chunk. we align startva/endva to chunk-sized 741 * boundaries and then clip to them. 742 */ 743 744 if (canchunk && atop(entry->end - entry->start) >= 745 UVM_AMAP_LARGE) { 746 /* convert slots to bytes */ 747 chunksize = UVM_AMAP_CHUNK << PAGE_SHIFT; 748 startva = (startva / chunksize) * chunksize; 749 endva = roundup(endva, chunksize); 750 UVMHIST_LOG(maphist, " chunk amap ==> clip 0x%x->0x%x" 751 "to 0x%x->0x%x", entry->start, entry->end, startva, 752 endva); 753 UVM_MAP_CLIP_START(map, entry, startva); 754 /* watch out for endva wrap-around! */ 755 if (endva >= startva) 756 UVM_MAP_CLIP_END(map, entry, endva); 757 } 758 759 UVMHIST_LOG(maphist, "<- done [creating new amap 0x%x->0x%x]", 760 entry->start, entry->end, 0, 0); 761 entry->aref.ar_pageoff = 0; 762 entry->aref.ar_amap = amap_alloc(entry->end - entry->start, 0, 763 waitf); 764 if (entry->aref.ar_amap != NULL) 765 entry->etype &= ~UVM_ET_NEEDSCOPY; 766 return; 767 } 768 769 /* 770 * first check and see if we are the only map entry 771 * referencing the amap we currently have. if so, then we can 772 * just take it over rather than copying it. note that we are 773 * reading am_ref with the amap unlocked... the value can only 774 * be one if we have the only reference to the amap (via our 775 * locked map). if we are greater than one we fall through to 776 * the next case (where we double check the value). 777 */ 778 779 if (entry->aref.ar_amap->am_ref == 1) { 780 entry->etype &= ~UVM_ET_NEEDSCOPY; 781 UVMHIST_LOG(maphist, "<- done [ref cnt = 1, took it over]", 782 0, 0, 0, 0); 783 return; 784 } 785 786 /* 787 * looks like we need to copy the map. 788 */ 789 790 UVMHIST_LOG(maphist," amap=%p, ref=%d, must copy it", 791 entry->aref.ar_amap, entry->aref.ar_amap->am_ref, 0, 0); 792 AMAP_B2SLOT(slots, entry->end - entry->start); 793 amap = amap_alloc1(slots, 0, waitf); 794 if (amap == NULL) { 795 UVMHIST_LOG(maphist, " amap_alloc1 failed", 0,0,0,0); 796 return; 797 } 798 srcamap = entry->aref.ar_amap; 799 amap_lock(srcamap); 800 801 /* 802 * need to double check reference count now that we've got the 803 * src amap locked down. the reference count could have 804 * changed while we were in malloc. if the reference count 805 * dropped down to one we take over the old map rather than 806 * copying the amap. 807 */ 808 809 if (srcamap->am_ref == 1) { /* take it over? */ 810 entry->etype &= ~UVM_ET_NEEDSCOPY; 811 amap->am_ref--; /* drop final reference to map */ 812 amap_unlock(amap); 813 amap_free(amap); /* dispose of new (unused) amap */ 814 amap_unlock(srcamap); 815 return; 816 } 817 818 /* 819 * we must copy it now. 820 */ 821 822 UVMHIST_LOG(maphist, " copying amap now",0, 0, 0, 0); 823 for (lcv = 0 ; lcv < slots; lcv++) { 824 amap->am_anon[lcv] = 825 srcamap->am_anon[entry->aref.ar_pageoff + lcv]; 826 if (amap->am_anon[lcv] == NULL) 827 continue; 828 simple_lock(&amap->am_anon[lcv]->an_lock); 829 amap->am_anon[lcv]->an_ref++; 830 simple_unlock(&amap->am_anon[lcv]->an_lock); 831 amap->am_bckptr[lcv] = amap->am_nused; 832 amap->am_slots[amap->am_nused] = lcv; 833 amap->am_nused++; 834 } 835 memset(&amap->am_anon[lcv], 0, 836 (amap->am_maxslot - lcv) * sizeof(struct vm_anon *)); 837 838 /* 839 * drop our reference to the old amap (srcamap) and unlock. 840 * we know that the reference count on srcamap is greater than 841 * one (we checked above), so there is no way we could drop 842 * the count to zero. [and no need to worry about freeing it] 843 */ 844 845 srcamap->am_ref--; 846 if (srcamap->am_ref == 1 && (srcamap->am_flags & AMAP_SHARED) != 0) 847 srcamap->am_flags &= ~AMAP_SHARED; /* clear shared flag */ 848 #ifdef UVM_AMAP_PPREF 849 if (srcamap->am_ppref && srcamap->am_ppref != PPREF_NONE) { 850 amap_pp_adjref(srcamap, entry->aref.ar_pageoff, 851 (entry->end - entry->start) >> PAGE_SHIFT, -1); 852 } 853 #endif 854 855 amap_unlock(srcamap); 856 857 /* 858 * install new amap. 859 */ 860 861 entry->aref.ar_pageoff = 0; 862 entry->aref.ar_amap = amap; 863 entry->etype &= ~UVM_ET_NEEDSCOPY; 864 UVMHIST_LOG(maphist, "<- done",0, 0, 0, 0); 865 } 866 867 /* 868 * amap_cow_now: resolve all copy-on-write faults in an amap now for fork(2) 869 * 870 * called during fork(2) when the parent process has a wired map 871 * entry. in that case we want to avoid write-protecting pages 872 * in the parent's map (e.g. like what you'd do for a COW page) 873 * so we resolve the COW here. 874 * 875 * => assume parent's entry was wired, thus all pages are resident. 876 * => assume pages that are loaned out (loan_count) are already mapped 877 * read-only in all maps, and thus no need for us to worry about them 878 * => assume both parent and child vm_map's are locked 879 * => caller passes child's map/entry in to us 880 * => if we run out of memory we will unlock the amap and sleep _with_ the 881 * parent and child vm_map's locked(!). we have to do this since 882 * we are in the middle of a fork(2) and we can't let the parent 883 * map change until we are done copying all the map entrys. 884 * => XXXCDC: out of memory should cause fork to fail, but there is 885 * currently no easy way to do this (needs fix) 886 * => page queues must be unlocked (we may lock them) 887 */ 888 889 void 890 amap_cow_now(map, entry) 891 struct vm_map *map; 892 struct vm_map_entry *entry; 893 { 894 struct vm_amap *amap = entry->aref.ar_amap; 895 int lcv, slot; 896 struct vm_anon *anon, *nanon; 897 struct vm_page *pg, *npg; 898 899 /* 900 * note that if we unlock the amap then we must ReStart the "lcv" for 901 * loop because some other process could reorder the anon's in the 902 * am_anon[] array on us while the lock is dropped. 903 */ 904 905 ReStart: 906 amap_lock(amap); 907 908 for (lcv = 0 ; lcv < amap->am_nused ; lcv++) { 909 910 /* 911 * get the page 912 */ 913 914 slot = amap->am_slots[lcv]; 915 anon = amap->am_anon[slot]; 916 simple_lock(&anon->an_lock); 917 pg = anon->u.an_page; 918 919 /* 920 * page must be resident since parent is wired 921 */ 922 923 if (pg == NULL) 924 panic("amap_cow_now: non-resident wired page in anon %p", 925 anon); 926 927 /* 928 * if the anon ref count is one and the page is not loaned, 929 * then we are safe (the child has exclusive access to the 930 * page). if the page is loaned, then it must already be 931 * mapped read-only. 932 * 933 * we only need to get involved when these are not true. 934 * [note: if loan_count == 0, then the anon must own the page] 935 */ 936 937 if (anon->an_ref > 1 && pg->loan_count == 0) { 938 939 /* 940 * if the page is busy then we have to unlock, wait for 941 * it and then restart. 942 */ 943 if (pg->flags & PG_BUSY) { 944 pg->flags |= PG_WANTED; 945 amap_unlock(amap); 946 UVM_UNLOCK_AND_WAIT(pg, &anon->an_lock, FALSE, 947 "cownow", 0); 948 goto ReStart; 949 } 950 951 /* 952 * ok, time to do a copy-on-write to a new anon 953 */ 954 nanon = uvm_analloc(); 955 if (nanon) { 956 /* nanon is locked! */ 957 npg = uvm_pagealloc(NULL, 0, nanon, 0); 958 } else 959 npg = NULL; /* XXX: quiet gcc warning */ 960 961 if (nanon == NULL || npg == NULL) { 962 /* out of memory */ 963 /* 964 * XXXCDC: we should cause fork to fail, but 965 * we can't ... 966 */ 967 if (nanon) { 968 nanon->an_ref--; 969 simple_unlock(&nanon->an_lock); 970 uvm_anfree(nanon); 971 } 972 simple_unlock(&anon->an_lock); 973 amap_unlock(amap); 974 uvm_wait("cownowpage"); 975 goto ReStart; 976 } 977 978 /* 979 * got it... now we can copy the data and replace anon 980 * with our new one... 981 */ 982 983 uvm_pagecopy(pg, npg); /* old -> new */ 984 anon->an_ref--; /* can't drop to zero */ 985 amap->am_anon[slot] = nanon; /* replace */ 986 987 /* 988 * drop PG_BUSY on new page ... since we have had it's 989 * owner locked the whole time it can't be 990 * PG_RELEASED | PG_WANTED. 991 */ 992 993 uvm_lock_pageq(); 994 uvm_pageactivate(npg); 995 uvm_unlock_pageq(); 996 npg->flags &= ~(PG_BUSY|PG_FAKE); 997 UVM_PAGE_OWN(npg, NULL); 998 simple_unlock(&nanon->an_lock); 999 } 1000 simple_unlock(&anon->an_lock); 1001 } 1002 amap_unlock(amap); 1003 } 1004 1005 /* 1006 * amap_splitref: split a single reference into two separate references 1007 * 1008 * => called from uvm_map's clip routines 1009 * => origref's map should be locked 1010 * => origref->ar_amap should be unlocked (we will lock) 1011 */ 1012 void 1013 amap_splitref(origref, splitref, offset) 1014 struct vm_aref *origref, *splitref; 1015 vaddr_t offset; 1016 { 1017 int leftslots; 1018 1019 AMAP_B2SLOT(leftslots, offset); 1020 if (leftslots == 0) 1021 panic("amap_splitref: split at zero offset"); 1022 1023 amap_lock(origref->ar_amap); 1024 1025 /* 1026 * now: amap is locked and we have a valid am_mapped array. 1027 */ 1028 1029 if (origref->ar_amap->am_nslot - origref->ar_pageoff - leftslots <= 0) 1030 panic("amap_splitref: map size check failed"); 1031 1032 #ifdef UVM_AMAP_PPREF 1033 /* 1034 * establish ppref before we add a duplicate reference to the amap 1035 */ 1036 if (origref->ar_amap->am_ppref == NULL) 1037 amap_pp_establish(origref->ar_amap); 1038 #endif 1039 1040 splitref->ar_amap = origref->ar_amap; 1041 splitref->ar_amap->am_ref++; /* not a share reference */ 1042 splitref->ar_pageoff = origref->ar_pageoff + leftslots; 1043 1044 amap_unlock(origref->ar_amap); 1045 } 1046 1047 #ifdef UVM_AMAP_PPREF 1048 1049 /* 1050 * amap_pp_establish: add a ppref array to an amap, if possible 1051 * 1052 * => amap locked by caller 1053 */ 1054 void 1055 amap_pp_establish(amap) 1056 struct vm_amap *amap; 1057 { 1058 amap->am_ppref = malloc(sizeof(int) * amap->am_maxslot, 1059 M_UVMAMAP, M_NOWAIT); 1060 1061 /* 1062 * if we fail then we just won't use ppref for this amap 1063 */ 1064 1065 if (amap->am_ppref == NULL) { 1066 amap->am_ppref = PPREF_NONE; /* not using it */ 1067 return; 1068 } 1069 memset(amap->am_ppref, 0, sizeof(int) * amap->am_maxslot); 1070 pp_setreflen(amap->am_ppref, 0, amap->am_ref, amap->am_nslot); 1071 return; 1072 } 1073 1074 /* 1075 * amap_pp_adjref: adjust reference count to a part of an amap using the 1076 * per-page reference count array. 1077 * 1078 * => map and amap locked by caller 1079 * => caller must check that ppref != PPREF_NONE before calling 1080 */ 1081 void 1082 amap_pp_adjref(amap, curslot, slotlen, adjval) 1083 struct vm_amap *amap; 1084 int curslot; 1085 vsize_t slotlen; 1086 int adjval; 1087 { 1088 int stopslot, *ppref, lcv, prevlcv; 1089 int ref, len, prevref, prevlen; 1090 1091 stopslot = curslot + slotlen; 1092 ppref = amap->am_ppref; 1093 prevlcv = 0; 1094 1095 /* 1096 * first advance to the correct place in the ppref array, 1097 * fragment if needed. 1098 */ 1099 1100 for (lcv = 0 ; lcv < curslot ; lcv += len) { 1101 pp_getreflen(ppref, lcv, &ref, &len); 1102 if (lcv + len > curslot) { /* goes past start? */ 1103 pp_setreflen(ppref, lcv, ref, curslot - lcv); 1104 pp_setreflen(ppref, curslot, ref, len - (curslot -lcv)); 1105 len = curslot - lcv; /* new length of entry @ lcv */ 1106 } 1107 prevlcv = lcv; 1108 } 1109 if (lcv != 0) 1110 pp_getreflen(ppref, prevlcv, &prevref, &prevlen); 1111 else { 1112 /* Ensure that the "prevref == ref" test below always 1113 * fails, since we're starting from the beginning of 1114 * the ppref array; that is, there is no previous 1115 * chunk. 1116 */ 1117 prevref = -1; 1118 prevlen = 0; 1119 } 1120 1121 /* 1122 * now adjust reference counts in range. merge the first 1123 * changed entry with the last unchanged entry if possible. 1124 */ 1125 1126 if (lcv != curslot) 1127 panic("amap_pp_adjref: overshot target"); 1128 1129 for (/* lcv already set */; lcv < stopslot ; lcv += len) { 1130 pp_getreflen(ppref, lcv, &ref, &len); 1131 if (lcv + len > stopslot) { /* goes past end? */ 1132 pp_setreflen(ppref, lcv, ref, stopslot - lcv); 1133 pp_setreflen(ppref, stopslot, ref, 1134 len - (stopslot - lcv)); 1135 len = stopslot - lcv; 1136 } 1137 ref += adjval; 1138 if (ref < 0) 1139 panic("amap_pp_adjref: negative reference count"); 1140 if (lcv == prevlcv + prevlen && ref == prevref) { 1141 pp_setreflen(ppref, prevlcv, ref, prevlen + len); 1142 } else { 1143 pp_setreflen(ppref, lcv, ref, len); 1144 } 1145 if (ref == 0) 1146 amap_wiperange(amap, lcv, len); 1147 } 1148 1149 } 1150 1151 /* 1152 * amap_wiperange: wipe out a range of an amap 1153 * [different from amap_wipeout because the amap is kept intact] 1154 * 1155 * => both map and amap must be locked by caller. 1156 */ 1157 void 1158 amap_wiperange(amap, slotoff, slots) 1159 struct vm_amap *amap; 1160 int slotoff, slots; 1161 { 1162 int byanon, lcv, stop, curslot, ptr, slotend; 1163 struct vm_anon *anon; 1164 1165 /* 1166 * we can either traverse the amap by am_anon or by am_slots depending 1167 * on which is cheaper. decide now. 1168 */ 1169 1170 if (slots < amap->am_nused) { 1171 byanon = TRUE; 1172 lcv = slotoff; 1173 stop = slotoff + slots; 1174 slotend = 0; 1175 } else { 1176 byanon = FALSE; 1177 lcv = 0; 1178 stop = amap->am_nused; 1179 slotend = slotoff + slots; 1180 } 1181 1182 while (lcv < stop) { 1183 int refs; 1184 1185 if (byanon) { 1186 curslot = lcv++; /* lcv advances here */ 1187 if (amap->am_anon[curslot] == NULL) 1188 continue; 1189 } else { 1190 curslot = amap->am_slots[lcv]; 1191 if (curslot < slotoff || curslot >= slotend) { 1192 lcv++; /* lcv advances here */ 1193 continue; 1194 } 1195 stop--; /* drop stop, since anon will be removed */ 1196 } 1197 anon = amap->am_anon[curslot]; 1198 1199 /* 1200 * remove it from the amap 1201 */ 1202 1203 amap->am_anon[curslot] = NULL; 1204 ptr = amap->am_bckptr[curslot]; 1205 if (ptr != (amap->am_nused - 1)) { 1206 amap->am_slots[ptr] = 1207 amap->am_slots[amap->am_nused - 1]; 1208 amap->am_bckptr[amap->am_slots[ptr]] = 1209 ptr; /* back ptr. */ 1210 } 1211 amap->am_nused--; 1212 1213 /* 1214 * drop anon reference count 1215 */ 1216 1217 simple_lock(&anon->an_lock); 1218 refs = --anon->an_ref; 1219 simple_unlock(&anon->an_lock); 1220 if (refs == 0) { 1221 1222 /* 1223 * we just eliminated the last reference to an anon. 1224 * free it. 1225 */ 1226 1227 uvm_anfree(anon); 1228 } 1229 } 1230 } 1231 1232 #endif 1233