1 /* $OpenBSD: uvm_amap.c,v 1.50 2013/05/30 16:39:26 tedu Exp $ */ 2 /* $NetBSD: uvm_amap.c,v 1.27 2000/11/25 06:27:59 chs Exp $ */ 3 4 /* 5 * 6 * Copyright (c) 1997 Charles D. Cranor and Washington University. 7 * All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. All advertising materials mentioning features or use of this software 18 * must display the following acknowledgement: 19 * This product includes software developed by Charles D. Cranor and 20 * Washington University. 21 * 4. The name of the author may not be used to endorse or promote products 22 * derived from this software without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 25 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 26 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 27 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 28 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 29 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 30 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 31 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 32 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 33 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 34 */ 35 36 /* 37 * uvm_amap.c: amap operations 38 */ 39 40 /* 41 * this file contains functions that perform operations on amaps. see 42 * uvm_amap.h for a brief explanation of the role of amaps in uvm. 43 */ 44 45 #include <sys/param.h> 46 #include <sys/systm.h> 47 #include <sys/proc.h> 48 #include <sys/malloc.h> 49 #include <sys/kernel.h> 50 #include <sys/pool.h> 51 52 #include <uvm/uvm.h> 53 #include <uvm/uvm_swap.h> 54 55 /* 56 * pool for allocation of vm_map structures. note that in order to 57 * avoid an endless loop, the amap pool's allocator cannot allocate 58 * memory from an amap (it currently goes through the kernel uobj, so 59 * we are ok). 60 */ 61 62 struct pool uvm_amap_pool; 63 64 LIST_HEAD(, vm_amap) amap_list; 65 66 #define MALLOC_SLOT_UNIT (2 * sizeof(int) + sizeof(struct vm_anon *)) 67 68 /* 69 * local functions 70 */ 71 72 static struct vm_amap *amap_alloc1(int, int, int); 73 static __inline void amap_list_insert(struct vm_amap *); 74 static __inline void amap_list_remove(struct vm_amap *); 75 76 static __inline void 77 amap_list_insert(struct vm_amap *amap) 78 { 79 LIST_INSERT_HEAD(&amap_list, amap, am_list); 80 } 81 82 static __inline void 83 amap_list_remove(struct vm_amap *amap) 84 { 85 LIST_REMOVE(amap, am_list); 86 } 87 88 #ifdef UVM_AMAP_PPREF 89 /* 90 * what is ppref? ppref is an _optional_ amap feature which is used 91 * to keep track of reference counts on a per-page basis. it is enabled 92 * when UVM_AMAP_PPREF is defined. 93 * 94 * when enabled, an array of ints is allocated for the pprefs. this 95 * array is allocated only when a partial reference is added to the 96 * map (either by unmapping part of the amap, or gaining a reference 97 * to only a part of an amap). if the malloc of the array fails 98 * (M_NOWAIT), then we set the array pointer to PPREF_NONE to indicate 99 * that we tried to do ppref's but couldn't alloc the array so just 100 * give up (after all, this is an optional feature!). 101 * 102 * the array is divided into page sized "chunks." for chunks of length 1, 103 * the chunk reference count plus one is stored in that chunk's slot. 104 * for chunks of length > 1 the first slot contains (the reference count 105 * plus one) * -1. [the negative value indicates that the length is 106 * greater than one.] the second slot of the chunk contains the length 107 * of the chunk. here is an example: 108 * 109 * actual REFS: 2 2 2 2 3 1 1 0 0 0 4 4 0 1 1 1 110 * ppref: -3 4 x x 4 -2 2 -1 3 x -5 2 1 -2 3 x 111 * <----------><-><----><-------><----><-><-------> 112 * (x = don't care) 113 * 114 * this allows us to allow one int to contain the ref count for the whole 115 * chunk. note that the "plus one" part is needed because a reference 116 * count of zero is neither positive or negative (need a way to tell 117 * if we've got one zero or a bunch of them). 118 * 119 * here are some in-line functions to help us. 120 */ 121 122 static __inline void pp_getreflen(int *, int, int *, int *); 123 static __inline void pp_setreflen(int *, int, int, int); 124 125 /* 126 * pp_getreflen: get the reference and length for a specific offset 127 */ 128 static __inline void 129 pp_getreflen(int *ppref, int offset, int *refp, int *lenp) 130 { 131 132 if (ppref[offset] > 0) { /* chunk size must be 1 */ 133 *refp = ppref[offset] - 1; /* don't forget to adjust */ 134 *lenp = 1; 135 } else { 136 *refp = (ppref[offset] * -1) - 1; 137 *lenp = ppref[offset+1]; 138 } 139 } 140 141 /* 142 * pp_setreflen: set the reference and length for a specific offset 143 */ 144 static __inline void 145 pp_setreflen(int *ppref, int offset, int ref, int len) 146 { 147 if (len == 1) { 148 ppref[offset] = ref + 1; 149 } else { 150 ppref[offset] = (ref + 1) * -1; 151 ppref[offset+1] = len; 152 } 153 } 154 #endif 155 156 /* 157 * amap_init: called at boot time to init global amap data structures 158 */ 159 160 void 161 amap_init(void) 162 { 163 /* 164 * Initialize the vm_amap pool. 165 */ 166 pool_init(&uvm_amap_pool, sizeof(struct vm_amap), 0, 0, 0, 167 "amappl", &pool_allocator_nointr); 168 pool_sethiwat(&uvm_amap_pool, 4096); 169 } 170 171 /* 172 * amap_alloc1: internal function that allocates an amap, but does not 173 * init the overlay. 174 */ 175 static inline struct vm_amap * 176 amap_alloc1(int slots, int padslots, int waitf) 177 { 178 struct vm_amap *amap; 179 int totalslots; 180 181 amap = pool_get(&uvm_amap_pool, (waitf == M_WAITOK) ? PR_WAITOK 182 : PR_NOWAIT); 183 if (amap == NULL) 184 return(NULL); 185 186 totalslots = malloc_roundup((slots + padslots) * MALLOC_SLOT_UNIT) / 187 MALLOC_SLOT_UNIT; 188 amap->am_ref = 1; 189 amap->am_flags = 0; 190 #ifdef UVM_AMAP_PPREF 191 amap->am_ppref = NULL; 192 #endif 193 amap->am_maxslot = totalslots; 194 amap->am_nslot = slots; 195 amap->am_nused = 0; 196 197 amap->am_slots = malloc(totalslots * MALLOC_SLOT_UNIT, M_UVMAMAP, 198 waitf); 199 if (amap->am_slots == NULL) 200 goto fail1; 201 202 amap->am_bckptr = (int *)(((char *)amap->am_slots) + totalslots * 203 sizeof(int)); 204 amap->am_anon = (struct vm_anon **)(((char *)amap->am_bckptr) + 205 totalslots * sizeof(int)); 206 207 return(amap); 208 209 fail1: 210 pool_put(&uvm_amap_pool, amap); 211 return (NULL); 212 } 213 214 /* 215 * amap_alloc: allocate an amap to manage "sz" bytes of anonymous VM 216 * 217 * => caller should ensure sz is a multiple of PAGE_SIZE 218 * => reference count to new amap is set to one 219 */ 220 221 struct vm_amap * 222 amap_alloc(vaddr_t sz, vaddr_t padsz, int waitf) 223 { 224 struct vm_amap *amap; 225 int slots, padslots; 226 227 AMAP_B2SLOT(slots, sz); /* load slots */ 228 AMAP_B2SLOT(padslots, padsz); 229 230 amap = amap_alloc1(slots, padslots, waitf); 231 if (amap) { 232 memset(amap->am_anon, 0, 233 amap->am_maxslot * sizeof(struct vm_anon *)); 234 amap_list_insert(amap); 235 } 236 237 return(amap); 238 } 239 240 241 /* 242 * amap_free: free an amap 243 * 244 * => the amap should have a zero reference count and be empty 245 */ 246 void 247 amap_free(struct vm_amap *amap) 248 { 249 250 KASSERT(amap->am_ref == 0 && amap->am_nused == 0); 251 KASSERT((amap->am_flags & AMAP_SWAPOFF) == 0); 252 253 free(amap->am_slots, M_UVMAMAP); 254 #ifdef UVM_AMAP_PPREF 255 if (amap->am_ppref && amap->am_ppref != PPREF_NONE) 256 free(amap->am_ppref, M_UVMAMAP); 257 #endif 258 pool_put(&uvm_amap_pool, amap); 259 260 } 261 262 /* 263 * amap_extend: extend the size of an amap (if needed) 264 * 265 * => called from uvm_map when we want to extend an amap to cover 266 * a new mapping (rather than allocate a new one) 267 * => to safely extend an amap it should have a reference count of 268 * one (thus it can't be shared) 269 * => XXXCDC: support padding at this level? 270 */ 271 int 272 amap_extend(struct vm_map_entry *entry, vsize_t addsize) 273 { 274 struct vm_amap *amap = entry->aref.ar_amap; 275 int slotoff = entry->aref.ar_pageoff; 276 int slotmapped, slotadd, slotneed, slotalloc; 277 #ifdef UVM_AMAP_PPREF 278 int *newppref, *oldppref; 279 #endif 280 u_int *newsl, *newbck, *oldsl, *oldbck; 281 struct vm_anon **newover, **oldover; 282 int slotadded; 283 284 /* 285 * first, determine how many slots we need in the amap. don't 286 * forget that ar_pageoff could be non-zero: this means that 287 * there are some unused slots before us in the amap. 288 */ 289 290 AMAP_B2SLOT(slotmapped, entry->end - entry->start); /* slots mapped */ 291 AMAP_B2SLOT(slotadd, addsize); /* slots to add */ 292 slotneed = slotoff + slotmapped + slotadd; 293 294 /* 295 * case 1: we already have enough slots in the map and thus 296 * only need to bump the reference counts on the slots we are 297 * adding. 298 */ 299 300 if (amap->am_nslot >= slotneed) { 301 #ifdef UVM_AMAP_PPREF 302 if (amap->am_ppref && amap->am_ppref != PPREF_NONE) { 303 amap_pp_adjref(amap, slotoff + slotmapped, slotadd, 1); 304 } 305 #endif 306 return (0); 307 } 308 309 /* 310 * case 2: we pre-allocated slots for use and we just need to 311 * bump nslot up to take account for these slots. 312 */ 313 314 if (amap->am_maxslot >= slotneed) { 315 #ifdef UVM_AMAP_PPREF 316 if (amap->am_ppref && amap->am_ppref != PPREF_NONE) { 317 if ((slotoff + slotmapped) < amap->am_nslot) 318 amap_pp_adjref(amap, slotoff + slotmapped, 319 (amap->am_nslot - (slotoff + slotmapped)), 320 1); 321 pp_setreflen(amap->am_ppref, amap->am_nslot, 1, 322 slotneed - amap->am_nslot); 323 } 324 #endif 325 amap->am_nslot = slotneed; 326 327 /* 328 * no need to zero am_anon since that was done at 329 * alloc time and we never shrink an allocation. 330 */ 331 return (0); 332 } 333 334 /* 335 * case 3: we need to malloc a new amap and copy all the amap 336 * data over from old amap to the new one. 337 * 338 * XXXCDC: could we take advantage of a kernel realloc()? 339 */ 340 341 if (slotneed >= UVM_AMAP_LARGE) 342 return E2BIG; 343 344 slotalloc = malloc_roundup(slotneed * MALLOC_SLOT_UNIT) / 345 MALLOC_SLOT_UNIT; 346 #ifdef UVM_AMAP_PPREF 347 newppref = NULL; 348 if (amap->am_ppref && amap->am_ppref != PPREF_NONE) { 349 newppref = malloc(slotalloc *sizeof(int), M_UVMAMAP, 350 M_WAITOK | M_CANFAIL); 351 if (newppref == NULL) { 352 /* give up if malloc fails */ 353 free(amap->am_ppref, M_UVMAMAP); 354 amap->am_ppref = PPREF_NONE; 355 } 356 } 357 #endif 358 newsl = malloc(slotalloc * MALLOC_SLOT_UNIT, M_UVMAMAP, 359 M_WAITOK | M_CANFAIL); 360 if (newsl == NULL) { 361 #ifdef UVM_AMAP_PPREF 362 if (newppref != NULL) { 363 free(newppref, M_UVMAMAP); 364 } 365 #endif 366 return (ENOMEM); 367 } 368 newbck = (int *)(((char *)newsl) + slotalloc * sizeof(int)); 369 newover = (struct vm_anon **)(((char *)newbck) + slotalloc * 370 sizeof(int)); 371 KASSERT(amap->am_maxslot < slotneed); 372 373 /* 374 * now copy everything over to new malloc'd areas... 375 */ 376 377 slotadded = slotalloc - amap->am_nslot; 378 379 /* do am_slots */ 380 oldsl = amap->am_slots; 381 memcpy(newsl, oldsl, sizeof(int) * amap->am_nused); 382 amap->am_slots = newsl; 383 384 /* do am_anon */ 385 oldover = amap->am_anon; 386 memcpy(newover, oldover, sizeof(struct vm_anon *) * amap->am_nslot); 387 memset(newover + amap->am_nslot, 0, sizeof(struct vm_anon *) * 388 slotadded); 389 amap->am_anon = newover; 390 391 /* do am_bckptr */ 392 oldbck = amap->am_bckptr; 393 memcpy(newbck, oldbck, sizeof(int) * amap->am_nslot); 394 memset(newbck + amap->am_nslot, 0, sizeof(int) * slotadded); /* XXX: needed? */ 395 amap->am_bckptr = newbck; 396 397 #ifdef UVM_AMAP_PPREF 398 /* do ppref */ 399 oldppref = amap->am_ppref; 400 if (newppref) { 401 memcpy(newppref, oldppref, sizeof(int) * amap->am_nslot); 402 memset(newppref + amap->am_nslot, 0, sizeof(int) * slotadded); 403 amap->am_ppref = newppref; 404 if ((slotoff + slotmapped) < amap->am_nslot) 405 amap_pp_adjref(amap, slotoff + slotmapped, 406 (amap->am_nslot - (slotoff + slotmapped)), 1); 407 pp_setreflen(newppref, amap->am_nslot, 1, 408 slotneed - amap->am_nslot); 409 } 410 #endif 411 412 /* update master values */ 413 amap->am_nslot = slotneed; 414 amap->am_maxslot = slotalloc; 415 416 /* and free */ 417 free(oldsl, M_UVMAMAP); 418 #ifdef UVM_AMAP_PPREF 419 if (oldppref && oldppref != PPREF_NONE) 420 free(oldppref, M_UVMAMAP); 421 #endif 422 return (0); 423 } 424 425 /* 426 * amap_share_protect: change protection of anons in a shared amap 427 * 428 * for shared amaps, given the current data structure layout, it is 429 * not possible for us to directly locate all maps referencing the 430 * shared anon (to change the protection). in order to protect data 431 * in shared maps we use pmap_page_protect(). [this is useful for IPC 432 * mechanisms like map entry passing that may want to write-protect 433 * all mappings of a shared amap.] we traverse am_anon or am_slots 434 * depending on the current state of the amap. 435 */ 436 void 437 amap_share_protect(struct vm_map_entry *entry, vm_prot_t prot) 438 { 439 struct vm_amap *amap = entry->aref.ar_amap; 440 int slots, lcv, slot, stop; 441 442 AMAP_B2SLOT(slots, (entry->end - entry->start)); 443 stop = entry->aref.ar_pageoff + slots; 444 445 if (slots < amap->am_nused) { 446 /* cheaper to traverse am_anon */ 447 for (lcv = entry->aref.ar_pageoff ; lcv < stop ; lcv++) { 448 if (amap->am_anon[lcv] == NULL) 449 continue; 450 if (amap->am_anon[lcv]->an_page != NULL) 451 pmap_page_protect(amap->am_anon[lcv]->an_page, 452 prot); 453 } 454 return; 455 } 456 457 /* cheaper to traverse am_slots */ 458 for (lcv = 0 ; lcv < amap->am_nused ; lcv++) { 459 slot = amap->am_slots[lcv]; 460 if (slot < entry->aref.ar_pageoff || slot >= stop) 461 continue; 462 if (amap->am_anon[slot]->an_page != NULL) 463 pmap_page_protect(amap->am_anon[slot]->an_page, prot); 464 } 465 return; 466 } 467 468 /* 469 * amap_wipeout: wipeout all anon's in an amap; then free the amap! 470 * 471 * => called from amap_unref when the final reference to an amap is 472 * discarded (i.e. when reference count == 1) 473 */ 474 475 void 476 amap_wipeout(struct vm_amap *amap) 477 { 478 int lcv, slot; 479 struct vm_anon *anon; 480 481 KASSERT(amap->am_ref == 0); 482 483 if (__predict_false((amap->am_flags & AMAP_SWAPOFF) != 0)) { 484 /* 485 * amap_swap_off will call us again. 486 */ 487 return; 488 } 489 amap_list_remove(amap); 490 491 for (lcv = 0 ; lcv < amap->am_nused ; lcv++) { 492 int refs; 493 494 slot = amap->am_slots[lcv]; 495 anon = amap->am_anon[slot]; 496 497 if (anon == NULL || anon->an_ref == 0) 498 panic("amap_wipeout: corrupt amap"); 499 500 refs = --anon->an_ref; 501 if (refs == 0) { 502 /* 503 * we had the last reference to a vm_anon. free it. 504 */ 505 uvm_anfree(anon); 506 } 507 } 508 509 /* 510 * now we free the map 511 */ 512 513 amap->am_ref = 0; /* ... was one */ 514 amap->am_nused = 0; 515 amap_free(amap); /* will free amap */ 516 } 517 518 /* 519 * amap_copy: ensure that a map entry's "needs_copy" flag is false 520 * by copying the amap if necessary. 521 * 522 * => an entry with a null amap pointer will get a new (blank) one. 523 * => if canchunk is true, then we may clip the entry into a chunk 524 * => "startva" and "endva" are used only if canchunk is true. they are 525 * used to limit chunking (e.g. if you have a large space that you 526 * know you are going to need to allocate amaps for, there is no point 527 * in allowing that to be chunked) 528 */ 529 530 void 531 amap_copy(struct vm_map *map, struct vm_map_entry *entry, int waitf, 532 boolean_t canchunk, vaddr_t startva, vaddr_t endva) 533 { 534 struct vm_amap *amap, *srcamap; 535 int slots, lcv; 536 vaddr_t chunksize; 537 538 /* 539 * is there a map to copy? if not, create one from scratch. 540 */ 541 542 if (entry->aref.ar_amap == NULL) { 543 544 /* 545 * check to see if we have a large amap that we can 546 * chunk. we align startva/endva to chunk-sized 547 * boundaries and then clip to them. 548 */ 549 550 if (canchunk && atop(entry->end - entry->start) >= 551 UVM_AMAP_LARGE) { 552 /* convert slots to bytes */ 553 chunksize = UVM_AMAP_CHUNK << PAGE_SHIFT; 554 startva = (startva / chunksize) * chunksize; 555 endva = roundup(endva, chunksize); 556 UVM_MAP_CLIP_START(map, entry, startva); 557 /* watch out for endva wrap-around! */ 558 if (endva >= startva) 559 UVM_MAP_CLIP_END(map, entry, endva); 560 } 561 562 entry->aref.ar_pageoff = 0; 563 entry->aref.ar_amap = amap_alloc(entry->end - entry->start, 0, 564 waitf); 565 if (entry->aref.ar_amap != NULL) 566 entry->etype &= ~UVM_ET_NEEDSCOPY; 567 return; 568 } 569 570 /* 571 * first check and see if we are the only map entry 572 * referencing the amap we currently have. if so, then we can 573 * just take it over rather than copying it. the value can only 574 * be one if we have the only reference to the amap 575 */ 576 577 if (entry->aref.ar_amap->am_ref == 1) { 578 entry->etype &= ~UVM_ET_NEEDSCOPY; 579 return; 580 } 581 582 /* 583 * looks like we need to copy the map. 584 */ 585 586 AMAP_B2SLOT(slots, entry->end - entry->start); 587 amap = amap_alloc1(slots, 0, waitf); 588 if (amap == NULL) 589 return; 590 srcamap = entry->aref.ar_amap; 591 592 /* 593 * need to double check reference count now. the reference count 594 * could have changed while we were in malloc. if the reference count 595 * dropped down to one we take over the old map rather than 596 * copying the amap. 597 */ 598 599 if (srcamap->am_ref == 1) { /* take it over? */ 600 entry->etype &= ~UVM_ET_NEEDSCOPY; 601 amap->am_ref--; /* drop final reference to map */ 602 amap_free(amap); /* dispose of new (unused) amap */ 603 return; 604 } 605 606 /* 607 * we must copy it now. 608 */ 609 610 for (lcv = 0 ; lcv < slots; lcv++) { 611 amap->am_anon[lcv] = 612 srcamap->am_anon[entry->aref.ar_pageoff + lcv]; 613 if (amap->am_anon[lcv] == NULL) 614 continue; 615 amap->am_anon[lcv]->an_ref++; 616 amap->am_bckptr[lcv] = amap->am_nused; 617 amap->am_slots[amap->am_nused] = lcv; 618 amap->am_nused++; 619 } 620 memset(&amap->am_anon[lcv], 0, 621 (amap->am_maxslot - lcv) * sizeof(struct vm_anon *)); 622 623 /* 624 * drop our reference to the old amap (srcamap). 625 * we know that the reference count on srcamap is greater than 626 * one (we checked above), so there is no way we could drop 627 * the count to zero. [and no need to worry about freeing it] 628 */ 629 630 srcamap->am_ref--; 631 if (srcamap->am_ref == 1 && (srcamap->am_flags & AMAP_SHARED) != 0) 632 srcamap->am_flags &= ~AMAP_SHARED; /* clear shared flag */ 633 #ifdef UVM_AMAP_PPREF 634 if (srcamap->am_ppref && srcamap->am_ppref != PPREF_NONE) { 635 amap_pp_adjref(srcamap, entry->aref.ar_pageoff, 636 (entry->end - entry->start) >> PAGE_SHIFT, -1); 637 } 638 #endif 639 640 /* 641 * install new amap. 642 */ 643 644 entry->aref.ar_pageoff = 0; 645 entry->aref.ar_amap = amap; 646 entry->etype &= ~UVM_ET_NEEDSCOPY; 647 648 amap_list_insert(amap); 649 } 650 651 /* 652 * amap_cow_now: resolve all copy-on-write faults in an amap now for fork(2) 653 * 654 * called during fork(2) when the parent process has a wired map 655 * entry. in that case we want to avoid write-protecting pages 656 * in the parent's map (e.g. like what you'd do for a COW page) 657 * so we resolve the COW here. 658 * 659 * => assume parent's entry was wired, thus all pages are resident. 660 * => assume pages that are loaned out (loan_count) are already mapped 661 * read-only in all maps, and thus no need for us to worry about them 662 * => caller passes child's map/entry in to us 663 * => XXXCDC: out of memory should cause fork to fail, but there is 664 * currently no easy way to do this (needs fix) 665 */ 666 667 void 668 amap_cow_now(struct vm_map *map, struct vm_map_entry *entry) 669 { 670 struct vm_amap *amap = entry->aref.ar_amap; 671 int lcv, slot; 672 struct vm_anon *anon, *nanon; 673 struct vm_page *pg, *npg; 674 675 /* 676 * note that if we wait, we must ReStart the "lcv" for loop because 677 * some other process could reorder the anon's in the 678 * am_anon[] array on us. 679 */ 680 ReStart: 681 for (lcv = 0 ; lcv < amap->am_nused ; lcv++) { 682 683 /* 684 * get the page 685 */ 686 687 slot = amap->am_slots[lcv]; 688 anon = amap->am_anon[slot]; 689 pg = anon->an_page; 690 691 /* 692 * page must be resident since parent is wired 693 */ 694 695 if (pg == NULL) 696 panic("amap_cow_now: non-resident wired page in anon %p", 697 anon); 698 699 /* 700 * if the anon ref count is one and the page is not loaned, 701 * then we are safe (the child has exclusive access to the 702 * page). if the page is loaned, then it must already be 703 * mapped read-only. 704 * 705 * we only need to get involved when these are not true. 706 * [note: if loan_count == 0, then the anon must own the page] 707 */ 708 709 if (anon->an_ref > 1 && pg->loan_count == 0) { 710 711 /* 712 * if the page is busy then we have to wait for 713 * it and then restart. 714 */ 715 if (pg->pg_flags & PG_BUSY) { 716 atomic_setbits_int(&pg->pg_flags, PG_WANTED); 717 UVM_WAIT(pg, FALSE, "cownow", 0); 718 goto ReStart; 719 } 720 721 /* 722 * ok, time to do a copy-on-write to a new anon 723 */ 724 nanon = uvm_analloc(); 725 if (nanon) { 726 npg = uvm_pagealloc(NULL, 0, nanon, 0); 727 } else 728 npg = NULL; /* XXX: quiet gcc warning */ 729 730 if (nanon == NULL || npg == NULL) { 731 /* out of memory */ 732 /* 733 * XXXCDC: we should cause fork to fail, but 734 * we can't ... 735 */ 736 if (nanon) { 737 uvm_anfree(nanon); 738 } 739 uvm_wait("cownowpage"); 740 goto ReStart; 741 } 742 743 /* 744 * got it... now we can copy the data and replace anon 745 * with our new one... 746 */ 747 uvm_pagecopy(pg, npg); /* old -> new */ 748 anon->an_ref--; /* can't drop to zero */ 749 amap->am_anon[slot] = nanon; /* replace */ 750 751 /* 752 * drop PG_BUSY on new page ... since we have had its 753 * owner locked the whole time it can't be 754 * PG_RELEASED | PG_WANTED. 755 */ 756 atomic_clearbits_int(&npg->pg_flags, PG_BUSY|PG_FAKE); 757 UVM_PAGE_OWN(npg, NULL); 758 uvm_lock_pageq(); 759 uvm_pageactivate(npg); 760 uvm_unlock_pageq(); 761 } 762 763 /* 764 * done with this anon, next ...! 765 */ 766 767 } /* end of 'for' loop */ 768 } 769 770 /* 771 * amap_splitref: split a single reference into two separate references 772 * 773 * => called from uvm_map's clip routines 774 */ 775 void 776 amap_splitref(struct vm_aref *origref, struct vm_aref *splitref, vaddr_t offset) 777 { 778 int leftslots; 779 780 AMAP_B2SLOT(leftslots, offset); 781 if (leftslots == 0) 782 panic("amap_splitref: split at zero offset"); 783 784 /* 785 * now: we have a valid am_mapped array. 786 */ 787 788 if (origref->ar_amap->am_nslot - origref->ar_pageoff - leftslots <= 0) 789 panic("amap_splitref: map size check failed"); 790 791 #ifdef UVM_AMAP_PPREF 792 /* 793 * establish ppref before we add a duplicate reference to the amap 794 */ 795 if (origref->ar_amap->am_ppref == NULL) 796 amap_pp_establish(origref->ar_amap); 797 #endif 798 799 splitref->ar_amap = origref->ar_amap; 800 splitref->ar_amap->am_ref++; /* not a share reference */ 801 splitref->ar_pageoff = origref->ar_pageoff + leftslots; 802 } 803 804 #ifdef UVM_AMAP_PPREF 805 806 /* 807 * amap_pp_establish: add a ppref array to an amap, if possible 808 */ 809 void 810 amap_pp_establish(struct vm_amap *amap) 811 { 812 813 amap->am_ppref = malloc(sizeof(int) * amap->am_maxslot, 814 M_UVMAMAP, M_NOWAIT|M_ZERO); 815 816 /* 817 * if we fail then we just won't use ppref for this amap 818 */ 819 if (amap->am_ppref == NULL) { 820 amap->am_ppref = PPREF_NONE; /* not using it */ 821 return; 822 } 823 824 /* 825 * init ppref 826 */ 827 pp_setreflen(amap->am_ppref, 0, amap->am_ref, amap->am_nslot); 828 } 829 830 /* 831 * amap_pp_adjref: adjust reference count to a part of an amap using the 832 * per-page reference count array. 833 * 834 * => caller must check that ppref != PPREF_NONE before calling 835 */ 836 void 837 amap_pp_adjref(struct vm_amap *amap, int curslot, vsize_t slotlen, int adjval) 838 { 839 int stopslot, *ppref, lcv, prevlcv; 840 int ref, len, prevref, prevlen; 841 842 stopslot = curslot + slotlen; 843 ppref = amap->am_ppref; 844 prevlcv = 0; 845 846 /* 847 * first advance to the correct place in the ppref array, 848 * fragment if needed. 849 */ 850 851 for (lcv = 0 ; lcv < curslot ; lcv += len) { 852 pp_getreflen(ppref, lcv, &ref, &len); 853 if (lcv + len > curslot) { /* goes past start? */ 854 pp_setreflen(ppref, lcv, ref, curslot - lcv); 855 pp_setreflen(ppref, curslot, ref, len - (curslot -lcv)); 856 len = curslot - lcv; /* new length of entry @ lcv */ 857 } 858 prevlcv = lcv; 859 } 860 if (lcv != 0) 861 pp_getreflen(ppref, prevlcv, &prevref, &prevlen); 862 else { 863 /* Ensure that the "prevref == ref" test below always 864 * fails, since we're starting from the beginning of 865 * the ppref array; that is, there is no previous 866 * chunk. 867 */ 868 prevref = -1; 869 prevlen = 0; 870 } 871 872 /* 873 * now adjust reference counts in range. merge the first 874 * changed entry with the last unchanged entry if possible. 875 */ 876 877 if (lcv != curslot) 878 panic("amap_pp_adjref: overshot target"); 879 880 for (/* lcv already set */; lcv < stopslot ; lcv += len) { 881 pp_getreflen(ppref, lcv, &ref, &len); 882 if (lcv + len > stopslot) { /* goes past end? */ 883 pp_setreflen(ppref, lcv, ref, stopslot - lcv); 884 pp_setreflen(ppref, stopslot, ref, 885 len - (stopslot - lcv)); 886 len = stopslot - lcv; 887 } 888 ref += adjval; 889 if (ref < 0) 890 panic("amap_pp_adjref: negative reference count"); 891 if (lcv == prevlcv + prevlen && ref == prevref) { 892 pp_setreflen(ppref, prevlcv, ref, prevlen + len); 893 } else { 894 pp_setreflen(ppref, lcv, ref, len); 895 } 896 if (ref == 0) 897 amap_wiperange(amap, lcv, len); 898 } 899 900 } 901 902 /* 903 * amap_wiperange: wipe out a range of an amap 904 * [different from amap_wipeout because the amap is kept intact] 905 */ 906 void 907 amap_wiperange(struct vm_amap *amap, int slotoff, int slots) 908 { 909 int byanon, lcv, stop, curslot, ptr, slotend; 910 struct vm_anon *anon; 911 912 /* 913 * we can either traverse the amap by am_anon or by am_slots depending 914 * on which is cheaper. decide now. 915 */ 916 917 if (slots < amap->am_nused) { 918 byanon = TRUE; 919 lcv = slotoff; 920 stop = slotoff + slots; 921 } else { 922 byanon = FALSE; 923 lcv = 0; 924 stop = amap->am_nused; 925 slotend = slotoff + slots; 926 } 927 928 while (lcv < stop) { 929 int refs; 930 931 if (byanon) { 932 curslot = lcv++; /* lcv advances here */ 933 if (amap->am_anon[curslot] == NULL) 934 continue; 935 } else { 936 curslot = amap->am_slots[lcv]; 937 if (curslot < slotoff || curslot >= slotend) { 938 lcv++; /* lcv advances here */ 939 continue; 940 } 941 stop--; /* drop stop, since anon will be removed */ 942 } 943 anon = amap->am_anon[curslot]; 944 945 /* 946 * remove it from the amap 947 */ 948 amap->am_anon[curslot] = NULL; 949 ptr = amap->am_bckptr[curslot]; 950 if (ptr != (amap->am_nused - 1)) { 951 amap->am_slots[ptr] = 952 amap->am_slots[amap->am_nused - 1]; 953 amap->am_bckptr[amap->am_slots[ptr]] = 954 ptr; /* back ptr. */ 955 } 956 amap->am_nused--; 957 958 /* 959 * drop anon reference count 960 */ 961 refs = --anon->an_ref; 962 if (refs == 0) { 963 /* 964 * we just eliminated the last reference to an anon. 965 * free it. 966 */ 967 uvm_anfree(anon); 968 } 969 } 970 } 971 972 #endif 973 974 /* 975 * amap_swap_off: pagein anonymous pages in amaps and drop swap slots. 976 * 977 * => note that we don't always traverse all anons. 978 * eg. amaps being wiped out, released anons. 979 * => return TRUE if failed. 980 */ 981 982 boolean_t 983 amap_swap_off(int startslot, int endslot) 984 { 985 struct vm_amap *am; 986 struct vm_amap *am_next; 987 struct vm_amap marker_prev; 988 struct vm_amap marker_next; 989 boolean_t rv = FALSE; 990 991 #if defined(DIAGNOSTIC) 992 memset(&marker_prev, 0, sizeof(marker_prev)); 993 memset(&marker_next, 0, sizeof(marker_next)); 994 #endif /* defined(DIAGNOSTIC) */ 995 996 for (am = LIST_FIRST(&amap_list); am != NULL && !rv; am = am_next) { 997 int i; 998 999 LIST_INSERT_BEFORE(am, &marker_prev, am_list); 1000 LIST_INSERT_AFTER(am, &marker_next, am_list); 1001 1002 if (am->am_nused <= 0) { 1003 goto next; 1004 } 1005 1006 for (i = 0; i < am->am_nused; i++) { 1007 int slot; 1008 int swslot; 1009 struct vm_anon *anon; 1010 1011 slot = am->am_slots[i]; 1012 anon = am->am_anon[slot]; 1013 1014 swslot = anon->an_swslot; 1015 if (swslot < startslot || endslot <= swslot) { 1016 continue; 1017 } 1018 1019 am->am_flags |= AMAP_SWAPOFF; 1020 1021 rv = uvm_anon_pagein(anon); 1022 1023 am->am_flags &= ~AMAP_SWAPOFF; 1024 if (amap_refs(am) == 0) { 1025 amap_wipeout(am); 1026 am = NULL; 1027 break; 1028 } 1029 if (rv) { 1030 break; 1031 } 1032 i = 0; 1033 } 1034 1035 next: 1036 KASSERT(LIST_NEXT(&marker_prev, am_list) == &marker_next || 1037 LIST_NEXT(LIST_NEXT(&marker_prev, am_list), am_list) == 1038 &marker_next); 1039 am_next = LIST_NEXT(&marker_next, am_list); 1040 LIST_REMOVE(&marker_prev, am_list); 1041 LIST_REMOVE(&marker_next, am_list); 1042 } 1043 1044 return rv; 1045 } 1046 1047 /* 1048 * amap_lookup: look up a page in an amap 1049 */ 1050 struct vm_anon * 1051 amap_lookup(struct vm_aref *aref, vaddr_t offset) 1052 { 1053 int slot; 1054 struct vm_amap *amap = aref->ar_amap; 1055 1056 AMAP_B2SLOT(slot, offset); 1057 slot += aref->ar_pageoff; 1058 1059 if (slot >= amap->am_nslot) 1060 panic("amap_lookup: offset out of range"); 1061 1062 return(amap->am_anon[slot]); 1063 } 1064 1065 /* 1066 * amap_lookups: look up a range of pages in an amap 1067 * 1068 * => XXXCDC: this interface is biased toward array-based amaps. fix. 1069 */ 1070 void 1071 amap_lookups(struct vm_aref *aref, vaddr_t offset, 1072 struct vm_anon **anons, int npages) 1073 { 1074 int slot; 1075 struct vm_amap *amap = aref->ar_amap; 1076 1077 AMAP_B2SLOT(slot, offset); 1078 slot += aref->ar_pageoff; 1079 1080 if ((slot + (npages - 1)) >= amap->am_nslot) 1081 panic("amap_lookups: offset out of range"); 1082 1083 memcpy(anons, &amap->am_anon[slot], npages * sizeof(struct vm_anon *)); 1084 1085 return; 1086 } 1087 1088 /* 1089 * amap_add: add (or replace) a page to an amap 1090 * 1091 * => returns an "offset" which is meaningful to amap_unadd(). 1092 */ 1093 void 1094 amap_add(struct vm_aref *aref, vaddr_t offset, struct vm_anon *anon, 1095 boolean_t replace) 1096 { 1097 int slot; 1098 struct vm_amap *amap = aref->ar_amap; 1099 1100 AMAP_B2SLOT(slot, offset); 1101 slot += aref->ar_pageoff; 1102 1103 if (slot >= amap->am_nslot) 1104 panic("amap_add: offset out of range"); 1105 1106 if (replace) { 1107 1108 if (amap->am_anon[slot] == NULL) 1109 panic("amap_add: replacing null anon"); 1110 if (amap->am_anon[slot]->an_page != NULL && 1111 (amap->am_flags & AMAP_SHARED) != 0) { 1112 pmap_page_protect(amap->am_anon[slot]->an_page, 1113 VM_PROT_NONE); 1114 /* 1115 * XXX: suppose page is supposed to be wired somewhere? 1116 */ 1117 } 1118 } else { /* !replace */ 1119 if (amap->am_anon[slot] != NULL) 1120 panic("amap_add: slot in use"); 1121 1122 amap->am_bckptr[slot] = amap->am_nused; 1123 amap->am_slots[amap->am_nused] = slot; 1124 amap->am_nused++; 1125 } 1126 amap->am_anon[slot] = anon; 1127 } 1128 1129 /* 1130 * amap_unadd: remove a page from an amap 1131 */ 1132 void 1133 amap_unadd(struct vm_aref *aref, vaddr_t offset) 1134 { 1135 int ptr, slot; 1136 struct vm_amap *amap = aref->ar_amap; 1137 1138 AMAP_B2SLOT(slot, offset); 1139 slot += aref->ar_pageoff; 1140 1141 if (slot >= amap->am_nslot) 1142 panic("amap_unadd: offset out of range"); 1143 1144 if (amap->am_anon[slot] == NULL) 1145 panic("amap_unadd: nothing there"); 1146 1147 amap->am_anon[slot] = NULL; 1148 ptr = amap->am_bckptr[slot]; 1149 1150 if (ptr != (amap->am_nused - 1)) { /* swap to keep slots contig? */ 1151 amap->am_slots[ptr] = amap->am_slots[amap->am_nused - 1]; 1152 amap->am_bckptr[amap->am_slots[ptr]] = ptr; /* back link */ 1153 } 1154 amap->am_nused--; 1155 } 1156 1157 /* 1158 * amap_ref: gain a reference to an amap 1159 * 1160 * => "offset" and "len" are in units of pages 1161 * => called at fork time to gain the child's reference 1162 */ 1163 void 1164 amap_ref(struct vm_amap *amap, vaddr_t offset, vsize_t len, int flags) 1165 { 1166 1167 amap->am_ref++; 1168 if (flags & AMAP_SHARED) 1169 amap->am_flags |= AMAP_SHARED; 1170 #ifdef UVM_AMAP_PPREF 1171 if (amap->am_ppref == NULL && (flags & AMAP_REFALL) == 0 && 1172 len != amap->am_nslot) 1173 amap_pp_establish(amap); 1174 if (amap->am_ppref && amap->am_ppref != PPREF_NONE) { 1175 if (flags & AMAP_REFALL) 1176 amap_pp_adjref(amap, 0, amap->am_nslot, 1); 1177 else 1178 amap_pp_adjref(amap, offset, len, 1); 1179 } 1180 #endif 1181 } 1182 1183 /* 1184 * amap_unref: remove a reference to an amap 1185 * 1186 * => caller must remove all pmap-level references to this amap before 1187 * dropping the reference 1188 * => called from uvm_unmap_detach [only] ... note that entry is no 1189 * longer part of a map 1190 */ 1191 void 1192 amap_unref(struct vm_amap *amap, vaddr_t offset, vsize_t len, boolean_t all) 1193 { 1194 1195 /* 1196 * if we are the last reference, free the amap and return. 1197 */ 1198 1199 if (amap->am_ref-- == 1) { 1200 amap_wipeout(amap); /* drops final ref and frees */ 1201 return; 1202 } 1203 1204 /* 1205 * otherwise just drop the reference count(s) 1206 */ 1207 if (amap->am_ref == 1 && (amap->am_flags & AMAP_SHARED) != 0) 1208 amap->am_flags &= ~AMAP_SHARED; /* clear shared flag */ 1209 #ifdef UVM_AMAP_PPREF 1210 if (amap->am_ppref == NULL && all == 0 && len != amap->am_nslot) 1211 amap_pp_establish(amap); 1212 if (amap->am_ppref && amap->am_ppref != PPREF_NONE) { 1213 if (all) 1214 amap_pp_adjref(amap, 0, amap->am_nslot, -1); 1215 else 1216 amap_pp_adjref(amap, offset, len, -1); 1217 } 1218 #endif 1219 } 1220