1 /* 2 * Copyright (c) 1991, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * The Mach Operating System project at Carnegie-Mellon University. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by the University of 19 * California, Berkeley and its contributors. 20 * 4. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * from: @(#)vm_map.c 8.3 (Berkeley) 1/12/94 37 * 38 * 39 * Copyright (c) 1987, 1990 Carnegie-Mellon University. 40 * All rights reserved. 41 * 42 * Authors: Avadis Tevanian, Jr., Michael Wayne Young 43 * 44 * Permission to use, copy, modify and distribute this software and 45 * its documentation is hereby granted, provided that both the copyright 46 * notice and this permission notice appear in all copies of the 47 * software, derivative works or modified versions, and any portions 48 * thereof, and that both notices appear in supporting documentation. 49 * 50 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 51 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 52 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 53 * 54 * Carnegie Mellon requests users of this software to return to 55 * 56 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 57 * School of Computer Science 58 * Carnegie Mellon University 59 * Pittsburgh PA 15213-3890 60 * 61 * any improvements or extensions that they make and grant Carnegie the 62 * rights to redistribute these changes. 63 * 64 * $FreeBSD: src/sys/vm/vm_map.c,v 1.187.2.19 2003/05/27 00:47:02 alc Exp $ 65 * $DragonFly: src/sys/vm/vm_map.c,v 1.7 2003/07/23 07:14:19 dillon Exp $ 66 */ 67 68 /* 69 * Virtual memory mapping module. 70 */ 71 72 #include <sys/param.h> 73 #include <sys/systm.h> 74 #include <sys/proc.h> 75 #include <sys/lock.h> 76 #include <sys/vmmeter.h> 77 #include <sys/mman.h> 78 #include <sys/vnode.h> 79 #include <sys/resourcevar.h> 80 #include <sys/shm.h> 81 82 #include <vm/vm.h> 83 #include <vm/vm_param.h> 84 #include <vm/pmap.h> 85 #include <vm/vm_map.h> 86 #include <vm/vm_page.h> 87 #include <vm/vm_object.h> 88 #include <vm/vm_pager.h> 89 #include <vm/vm_kern.h> 90 #include <vm/vm_extern.h> 91 #include <vm/swap_pager.h> 92 #include <vm/vm_zone.h> 93 94 /* 95 * Virtual memory maps provide for the mapping, protection, 96 * and sharing of virtual memory objects. In addition, 97 * this module provides for an efficient virtual copy of 98 * memory from one map to another. 99 * 100 * Synchronization is required prior to most operations. 101 * 102 * Maps consist of an ordered doubly-linked list of simple 103 * entries; a single hint is used to speed up lookups. 104 * 105 * Since portions of maps are specified by start/end addresses, 106 * which may not align with existing map entries, all 107 * routines merely "clip" entries to these start/end values. 108 * [That is, an entry is split into two, bordering at a 109 * start or end value.] Note that these clippings may not 110 * always be necessary (as the two resulting entries are then 111 * not changed); however, the clipping is done for convenience. 112 * 113 * As mentioned above, virtual copy operations are performed 114 * by copying VM object references from one map to 115 * another, and then marking both regions as copy-on-write. 116 */ 117 118 /* 119 * vm_map_startup: 120 * 121 * Initialize the vm_map module. Must be called before 122 * any other vm_map routines. 123 * 124 * Map and entry structures are allocated from the general 125 * purpose memory pool with some exceptions: 126 * 127 * - The kernel map and kmem submap are allocated statically. 128 * - Kernel map entries are allocated out of a static pool. 129 * 130 * These restrictions are necessary since malloc() uses the 131 * maps and requires map entries. 132 */ 133 134 static struct vm_zone kmapentzone_store, mapentzone_store, mapzone_store; 135 static vm_zone_t mapentzone, kmapentzone, mapzone, vmspace_zone; 136 static struct vm_object kmapentobj, mapentobj, mapobj; 137 138 static struct vm_map_entry map_entry_init[MAX_MAPENT]; 139 static struct vm_map_entry kmap_entry_init[MAX_KMAPENT]; 140 static struct vm_map map_init[MAX_KMAP]; 141 142 static void _vm_map_clip_end __P((vm_map_t, vm_map_entry_t, vm_offset_t)); 143 static void _vm_map_clip_start __P((vm_map_t, vm_map_entry_t, vm_offset_t)); 144 static vm_map_entry_t vm_map_entry_create __P((vm_map_t)); 145 static void vm_map_entry_delete __P((vm_map_t, vm_map_entry_t)); 146 static void vm_map_entry_dispose __P((vm_map_t, vm_map_entry_t)); 147 static void vm_map_entry_unwire __P((vm_map_t, vm_map_entry_t)); 148 static void vm_map_copy_entry __P((vm_map_t, vm_map_t, vm_map_entry_t, 149 vm_map_entry_t)); 150 static void vm_map_split __P((vm_map_entry_t)); 151 static void vm_map_unclip_range __P((vm_map_t map, vm_map_entry_t start_entry, vm_offset_t start, vm_offset_t end, int flags)); 152 153 void 154 vm_map_startup() 155 { 156 mapzone = &mapzone_store; 157 zbootinit(mapzone, "MAP", sizeof (struct vm_map), 158 map_init, MAX_KMAP); 159 kmapentzone = &kmapentzone_store; 160 zbootinit(kmapentzone, "KMAP ENTRY", sizeof (struct vm_map_entry), 161 kmap_entry_init, MAX_KMAPENT); 162 mapentzone = &mapentzone_store; 163 zbootinit(mapentzone, "MAP ENTRY", sizeof (struct vm_map_entry), 164 map_entry_init, MAX_MAPENT); 165 } 166 167 /* 168 * Allocate a vmspace structure, including a vm_map and pmap, 169 * and initialize those structures. The refcnt is set to 1. 170 * The remaining fields must be initialized by the caller. 171 */ 172 struct vmspace * 173 vmspace_alloc(min, max) 174 vm_offset_t min, max; 175 { 176 struct vmspace *vm; 177 178 vm = zalloc(vmspace_zone); 179 vm_map_init(&vm->vm_map, min, max); 180 pmap_pinit(vmspace_pmap(vm)); 181 vm->vm_map.pmap = vmspace_pmap(vm); /* XXX */ 182 vm->vm_refcnt = 1; 183 vm->vm_shm = NULL; 184 vm->vm_exitingcnt = 0; 185 return (vm); 186 } 187 188 void 189 vm_init2(void) { 190 zinitna(kmapentzone, &kmapentobj, 191 NULL, 0, lmin((VM_MAX_KERNEL_ADDRESS - KERNBASE) / PAGE_SIZE, 192 vmstats.v_page_count) / 8, ZONE_INTERRUPT, 1); 193 zinitna(mapentzone, &mapentobj, 194 NULL, 0, 0, 0, 1); 195 zinitna(mapzone, &mapobj, 196 NULL, 0, 0, 0, 1); 197 vmspace_zone = zinit("VMSPACE", sizeof (struct vmspace), 0, 0, 3); 198 pmap_init2(); 199 vm_object_init2(); 200 } 201 202 static __inline void 203 vmspace_dofree(struct vmspace *vm) 204 { 205 /* 206 * Make sure any SysV shm is freed, it might not have in 207 * exit1() 208 */ 209 shmexit(vm); 210 211 /* 212 * Lock the map, to wait out all other references to it. 213 * Delete all of the mappings and pages they hold, then call 214 * the pmap module to reclaim anything left. 215 */ 216 vm_map_lock(&vm->vm_map); 217 (void) vm_map_delete(&vm->vm_map, vm->vm_map.min_offset, 218 vm->vm_map.max_offset); 219 vm_map_unlock(&vm->vm_map); 220 221 pmap_release(vmspace_pmap(vm)); 222 zfree(vmspace_zone, vm); 223 } 224 225 void 226 vmspace_free(struct vmspace *vm) 227 { 228 if (vm->vm_refcnt == 0) 229 panic("vmspace_free: attempt to free already freed vmspace"); 230 231 if (--vm->vm_refcnt == 0 && vm->vm_exitingcnt == 0) 232 vmspace_dofree(vm); 233 } 234 235 void 236 vmspace_exitfree(struct proc *p) 237 { 238 struct vmspace *vm; 239 240 vm = p->p_vmspace; 241 p->p_vmspace = NULL; 242 243 /* 244 * cleanup by parent process wait()ing on exiting child. vm_refcnt 245 * may not be 0 (e.g. fork() and child exits without exec()ing). 246 * exitingcnt may increment above 0 and drop back down to zero 247 * several times while vm_refcnt is held non-zero. vm_refcnt 248 * may also increment above 0 and drop back down to zero several 249 * times while vm_exitingcnt is held non-zero. 250 * 251 * The last wait on the exiting child's vmspace will clean up 252 * the remainder of the vmspace. 253 */ 254 if (--vm->vm_exitingcnt == 0 && vm->vm_refcnt == 0) 255 vmspace_dofree(vm); 256 } 257 258 /* 259 * vmspace_swap_count() - count the approximate swap useage in pages for a 260 * vmspace. 261 * 262 * Swap useage is determined by taking the proportional swap used by 263 * VM objects backing the VM map. To make up for fractional losses, 264 * if the VM object has any swap use at all the associated map entries 265 * count for at least 1 swap page. 266 */ 267 int 268 vmspace_swap_count(struct vmspace *vmspace) 269 { 270 vm_map_t map = &vmspace->vm_map; 271 vm_map_entry_t cur; 272 int count = 0; 273 274 for (cur = map->header.next; cur != &map->header; cur = cur->next) { 275 vm_object_t object; 276 277 if ((cur->eflags & MAP_ENTRY_IS_SUB_MAP) == 0 && 278 (object = cur->object.vm_object) != NULL && 279 object->type == OBJT_SWAP 280 ) { 281 int n = (cur->end - cur->start) / PAGE_SIZE; 282 283 if (object->un_pager.swp.swp_bcount) { 284 count += object->un_pager.swp.swp_bcount * 285 SWAP_META_PAGES * n / object->size + 1; 286 } 287 } 288 } 289 return(count); 290 } 291 292 293 /* 294 * vm_map_create: 295 * 296 * Creates and returns a new empty VM map with 297 * the given physical map structure, and having 298 * the given lower and upper address bounds. 299 */ 300 vm_map_t 301 vm_map_create(pmap, min, max) 302 pmap_t pmap; 303 vm_offset_t min, max; 304 { 305 vm_map_t result; 306 307 result = zalloc(mapzone); 308 vm_map_init(result, min, max); 309 result->pmap = pmap; 310 return (result); 311 } 312 313 /* 314 * Initialize an existing vm_map structure 315 * such as that in the vmspace structure. 316 * The pmap is set elsewhere. 317 */ 318 void 319 vm_map_init(map, min, max) 320 struct vm_map *map; 321 vm_offset_t min, max; 322 { 323 map->header.next = map->header.prev = &map->header; 324 map->nentries = 0; 325 map->size = 0; 326 map->system_map = 0; 327 map->infork = 0; 328 map->min_offset = min; 329 map->max_offset = max; 330 map->first_free = &map->header; 331 map->hint = &map->header; 332 map->timestamp = 0; 333 lockinit(&map->lock, 0, "thrd_sleep", 0, LK_NOPAUSE); 334 } 335 336 /* 337 * vm_map_entry_create: [ internal use only ] 338 * 339 * Allocates a VM map entry for insertion. No entry fields are filled 340 * in. this ruotine may be called from an interrupt. 341 */ 342 static vm_map_entry_t 343 vm_map_entry_create(map) 344 vm_map_t map; 345 { 346 vm_map_entry_t new_entry; 347 348 if (map->system_map || !mapentzone) 349 new_entry = zalloc(kmapentzone); 350 else 351 new_entry = zalloc(mapentzone); 352 if (new_entry == NULL) 353 panic("vm_map_entry_create: kernel resources exhausted"); 354 return(new_entry); 355 } 356 357 /* 358 * vm_map_entry_dispose: [ internal use only ] 359 * 360 * Dispose of a vm_map_entry that is no longer being referenced. This 361 * function may be called from an interrupt. 362 */ 363 static void 364 vm_map_entry_dispose(map, entry) 365 vm_map_t map; 366 vm_map_entry_t entry; 367 { 368 if (map->system_map || !mapentzone) 369 zfree(kmapentzone, entry); 370 else 371 zfree(mapentzone, entry); 372 } 373 374 375 /* 376 * vm_map_entry_{un,}link: 377 * 378 * Insert/remove entries from maps. 379 */ 380 static __inline void 381 vm_map_entry_link(vm_map_t map, 382 vm_map_entry_t after_where, 383 vm_map_entry_t entry) 384 { 385 map->nentries++; 386 entry->prev = after_where; 387 entry->next = after_where->next; 388 entry->next->prev = entry; 389 after_where->next = entry; 390 } 391 392 static __inline void 393 vm_map_entry_unlink(vm_map_t map, 394 vm_map_entry_t entry) 395 { 396 vm_map_entry_t prev; 397 vm_map_entry_t next; 398 399 if (entry->eflags & MAP_ENTRY_IN_TRANSITION) 400 panic("vm_map_entry_unlink: attempt to mess with locked entry! %p", entry); 401 prev = entry->prev; 402 next = entry->next; 403 next->prev = prev; 404 prev->next = next; 405 map->nentries--; 406 } 407 408 /* 409 * SAVE_HINT: 410 * 411 * Saves the specified entry as the hint for 412 * future lookups. 413 */ 414 #define SAVE_HINT(map,value) \ 415 (map)->hint = (value); 416 417 /* 418 * vm_map_lookup_entry: [ internal use only ] 419 * 420 * Finds the map entry containing (or 421 * immediately preceding) the specified address 422 * in the given map; the entry is returned 423 * in the "entry" parameter. The boolean 424 * result indicates whether the address is 425 * actually contained in the map. 426 */ 427 boolean_t 428 vm_map_lookup_entry(map, address, entry) 429 vm_map_t map; 430 vm_offset_t address; 431 vm_map_entry_t *entry; /* OUT */ 432 { 433 vm_map_entry_t cur; 434 vm_map_entry_t last; 435 436 /* 437 * Start looking either from the head of the list, or from the hint. 438 */ 439 440 cur = map->hint; 441 442 if (cur == &map->header) 443 cur = cur->next; 444 445 if (address >= cur->start) { 446 /* 447 * Go from hint to end of list. 448 * 449 * But first, make a quick check to see if we are already looking 450 * at the entry we want (which is usually the case). Note also 451 * that we don't need to save the hint here... it is the same 452 * hint (unless we are at the header, in which case the hint 453 * didn't buy us anything anyway). 454 */ 455 last = &map->header; 456 if ((cur != last) && (cur->end > address)) { 457 *entry = cur; 458 return (TRUE); 459 } 460 } else { 461 /* 462 * Go from start to hint, *inclusively* 463 */ 464 last = cur->next; 465 cur = map->header.next; 466 } 467 468 /* 469 * Search linearly 470 */ 471 472 while (cur != last) { 473 if (cur->end > address) { 474 if (address >= cur->start) { 475 /* 476 * Save this lookup for future hints, and 477 * return 478 */ 479 480 *entry = cur; 481 SAVE_HINT(map, cur); 482 return (TRUE); 483 } 484 break; 485 } 486 cur = cur->next; 487 } 488 *entry = cur->prev; 489 SAVE_HINT(map, *entry); 490 return (FALSE); 491 } 492 493 /* 494 * vm_map_insert: 495 * 496 * Inserts the given whole VM object into the target 497 * map at the specified address range. The object's 498 * size should match that of the address range. 499 * 500 * Requires that the map be locked, and leaves it so. 501 * 502 * If object is non-NULL, ref count must be bumped by caller 503 * prior to making call to account for the new entry. 504 */ 505 int 506 vm_map_insert(vm_map_t map, vm_object_t object, vm_ooffset_t offset, 507 vm_offset_t start, vm_offset_t end, vm_prot_t prot, vm_prot_t max, 508 int cow) 509 { 510 vm_map_entry_t new_entry; 511 vm_map_entry_t prev_entry; 512 vm_map_entry_t temp_entry; 513 vm_eflags_t protoeflags; 514 515 /* 516 * Check that the start and end points are not bogus. 517 */ 518 519 if ((start < map->min_offset) || (end > map->max_offset) || 520 (start >= end)) 521 return (KERN_INVALID_ADDRESS); 522 523 /* 524 * Find the entry prior to the proposed starting address; if it's part 525 * of an existing entry, this range is bogus. 526 */ 527 528 if (vm_map_lookup_entry(map, start, &temp_entry)) 529 return (KERN_NO_SPACE); 530 531 prev_entry = temp_entry; 532 533 /* 534 * Assert that the next entry doesn't overlap the end point. 535 */ 536 537 if ((prev_entry->next != &map->header) && 538 (prev_entry->next->start < end)) 539 return (KERN_NO_SPACE); 540 541 protoeflags = 0; 542 543 if (cow & MAP_COPY_ON_WRITE) 544 protoeflags |= MAP_ENTRY_COW|MAP_ENTRY_NEEDS_COPY; 545 546 if (cow & MAP_NOFAULT) { 547 protoeflags |= MAP_ENTRY_NOFAULT; 548 549 KASSERT(object == NULL, 550 ("vm_map_insert: paradoxical MAP_NOFAULT request")); 551 } 552 if (cow & MAP_DISABLE_SYNCER) 553 protoeflags |= MAP_ENTRY_NOSYNC; 554 if (cow & MAP_DISABLE_COREDUMP) 555 protoeflags |= MAP_ENTRY_NOCOREDUMP; 556 557 if (object) { 558 /* 559 * When object is non-NULL, it could be shared with another 560 * process. We have to set or clear OBJ_ONEMAPPING 561 * appropriately. 562 */ 563 if ((object->ref_count > 1) || (object->shadow_count != 0)) { 564 vm_object_clear_flag(object, OBJ_ONEMAPPING); 565 } 566 } 567 else if ((prev_entry != &map->header) && 568 (prev_entry->eflags == protoeflags) && 569 (prev_entry->end == start) && 570 (prev_entry->wired_count == 0) && 571 ((prev_entry->object.vm_object == NULL) || 572 vm_object_coalesce(prev_entry->object.vm_object, 573 OFF_TO_IDX(prev_entry->offset), 574 (vm_size_t)(prev_entry->end - prev_entry->start), 575 (vm_size_t)(end - prev_entry->end)))) { 576 /* 577 * We were able to extend the object. Determine if we 578 * can extend the previous map entry to include the 579 * new range as well. 580 */ 581 if ((prev_entry->inheritance == VM_INHERIT_DEFAULT) && 582 (prev_entry->protection == prot) && 583 (prev_entry->max_protection == max)) { 584 map->size += (end - prev_entry->end); 585 prev_entry->end = end; 586 vm_map_simplify_entry(map, prev_entry); 587 return (KERN_SUCCESS); 588 } 589 590 /* 591 * If we can extend the object but cannot extend the 592 * map entry, we have to create a new map entry. We 593 * must bump the ref count on the extended object to 594 * account for it. object may be NULL. 595 */ 596 object = prev_entry->object.vm_object; 597 offset = prev_entry->offset + 598 (prev_entry->end - prev_entry->start); 599 vm_object_reference(object); 600 } 601 602 /* 603 * NOTE: if conditionals fail, object can be NULL here. This occurs 604 * in things like the buffer map where we manage kva but do not manage 605 * backing objects. 606 */ 607 608 /* 609 * Create a new entry 610 */ 611 612 new_entry = vm_map_entry_create(map); 613 new_entry->start = start; 614 new_entry->end = end; 615 616 new_entry->eflags = protoeflags; 617 new_entry->object.vm_object = object; 618 new_entry->offset = offset; 619 new_entry->avail_ssize = 0; 620 621 new_entry->inheritance = VM_INHERIT_DEFAULT; 622 new_entry->protection = prot; 623 new_entry->max_protection = max; 624 new_entry->wired_count = 0; 625 626 /* 627 * Insert the new entry into the list 628 */ 629 630 vm_map_entry_link(map, prev_entry, new_entry); 631 map->size += new_entry->end - new_entry->start; 632 633 /* 634 * Update the free space hint 635 */ 636 if ((map->first_free == prev_entry) && 637 (prev_entry->end >= new_entry->start)) { 638 map->first_free = new_entry; 639 } 640 641 #if 0 642 /* 643 * Temporarily removed to avoid MAP_STACK panic, due to 644 * MAP_STACK being a huge hack. Will be added back in 645 * when MAP_STACK (and the user stack mapping) is fixed. 646 */ 647 /* 648 * It may be possible to simplify the entry 649 */ 650 vm_map_simplify_entry(map, new_entry); 651 #endif 652 653 if (cow & (MAP_PREFAULT|MAP_PREFAULT_PARTIAL)) { 654 pmap_object_init_pt(map->pmap, start, 655 object, OFF_TO_IDX(offset), end - start, 656 cow & MAP_PREFAULT_PARTIAL); 657 } 658 659 return (KERN_SUCCESS); 660 } 661 662 /* 663 * Find sufficient space for `length' bytes in the given map, starting at 664 * `start'. The map must be locked. Returns 0 on success, 1 on no space. 665 */ 666 int 667 vm_map_findspace(map, start, length, addr) 668 vm_map_t map; 669 vm_offset_t start; 670 vm_size_t length; 671 vm_offset_t *addr; 672 { 673 vm_map_entry_t entry, next; 674 vm_offset_t end; 675 676 if (start < map->min_offset) 677 start = map->min_offset; 678 if (start > map->max_offset) 679 return (1); 680 681 /* 682 * Look for the first possible address; if there's already something 683 * at this address, we have to start after it. 684 */ 685 if (start == map->min_offset) { 686 if ((entry = map->first_free) != &map->header) 687 start = entry->end; 688 } else { 689 vm_map_entry_t tmp; 690 691 if (vm_map_lookup_entry(map, start, &tmp)) 692 start = tmp->end; 693 entry = tmp; 694 } 695 696 /* 697 * Look through the rest of the map, trying to fit a new region in the 698 * gap between existing regions, or after the very last region. 699 */ 700 for (;; start = (entry = next)->end) { 701 /* 702 * Find the end of the proposed new region. Be sure we didn't 703 * go beyond the end of the map, or wrap around the address; 704 * if so, we lose. Otherwise, if this is the last entry, or 705 * if the proposed new region fits before the next entry, we 706 * win. 707 */ 708 end = start + length; 709 if (end > map->max_offset || end < start) 710 return (1); 711 next = entry->next; 712 if (next == &map->header || next->start >= end) 713 break; 714 } 715 SAVE_HINT(map, entry); 716 *addr = start; 717 if (map == kernel_map) { 718 vm_offset_t ksize; 719 if ((ksize = round_page(start + length)) > kernel_vm_end) { 720 pmap_growkernel(ksize); 721 } 722 } 723 return (0); 724 } 725 726 /* 727 * vm_map_find finds an unallocated region in the target address 728 * map with the given length. The search is defined to be 729 * first-fit from the specified address; the region found is 730 * returned in the same parameter. 731 * 732 * If object is non-NULL, ref count must be bumped by caller 733 * prior to making call to account for the new entry. 734 */ 735 int 736 vm_map_find(vm_map_t map, vm_object_t object, vm_ooffset_t offset, 737 vm_offset_t *addr, /* IN/OUT */ 738 vm_size_t length, boolean_t find_space, vm_prot_t prot, 739 vm_prot_t max, int cow) 740 { 741 vm_offset_t start; 742 int result, s = 0; 743 744 start = *addr; 745 746 if (map == kmem_map || map == mb_map) 747 s = splvm(); 748 749 vm_map_lock(map); 750 if (find_space) { 751 if (vm_map_findspace(map, start, length, addr)) { 752 vm_map_unlock(map); 753 if (map == kmem_map || map == mb_map) 754 splx(s); 755 return (KERN_NO_SPACE); 756 } 757 start = *addr; 758 } 759 result = vm_map_insert(map, object, offset, 760 start, start + length, prot, max, cow); 761 vm_map_unlock(map); 762 763 if (map == kmem_map || map == mb_map) 764 splx(s); 765 766 return (result); 767 } 768 769 /* 770 * vm_map_simplify_entry: 771 * 772 * Simplify the given map entry by merging with either neighbor. This 773 * routine also has the ability to merge with both neighbors. 774 * 775 * The map must be locked. 776 * 777 * This routine guarentees that the passed entry remains valid (though 778 * possibly extended). When merging, this routine may delete one or 779 * both neighbors. No action is taken on entries which have their 780 * in-transition flag set. 781 */ 782 void 783 vm_map_simplify_entry(map, entry) 784 vm_map_t map; 785 vm_map_entry_t entry; 786 { 787 vm_map_entry_t next, prev; 788 vm_size_t prevsize, esize; 789 790 if (entry->eflags & (MAP_ENTRY_IN_TRANSITION | MAP_ENTRY_IS_SUB_MAP)) { 791 ++mycpu->gd_cnt.v_intrans_coll; 792 return; 793 } 794 795 prev = entry->prev; 796 if (prev != &map->header) { 797 prevsize = prev->end - prev->start; 798 if ( (prev->end == entry->start) && 799 (prev->object.vm_object == entry->object.vm_object) && 800 (!prev->object.vm_object || 801 (prev->offset + prevsize == entry->offset)) && 802 (prev->eflags == entry->eflags) && 803 (prev->protection == entry->protection) && 804 (prev->max_protection == entry->max_protection) && 805 (prev->inheritance == entry->inheritance) && 806 (prev->wired_count == entry->wired_count)) { 807 if (map->first_free == prev) 808 map->first_free = entry; 809 if (map->hint == prev) 810 map->hint = entry; 811 vm_map_entry_unlink(map, prev); 812 entry->start = prev->start; 813 entry->offset = prev->offset; 814 if (prev->object.vm_object) 815 vm_object_deallocate(prev->object.vm_object); 816 vm_map_entry_dispose(map, prev); 817 } 818 } 819 820 next = entry->next; 821 if (next != &map->header) { 822 esize = entry->end - entry->start; 823 if ((entry->end == next->start) && 824 (next->object.vm_object == entry->object.vm_object) && 825 (!entry->object.vm_object || 826 (entry->offset + esize == next->offset)) && 827 (next->eflags == entry->eflags) && 828 (next->protection == entry->protection) && 829 (next->max_protection == entry->max_protection) && 830 (next->inheritance == entry->inheritance) && 831 (next->wired_count == entry->wired_count)) { 832 if (map->first_free == next) 833 map->first_free = entry; 834 if (map->hint == next) 835 map->hint = entry; 836 vm_map_entry_unlink(map, next); 837 entry->end = next->end; 838 if (next->object.vm_object) 839 vm_object_deallocate(next->object.vm_object); 840 vm_map_entry_dispose(map, next); 841 } 842 } 843 } 844 /* 845 * vm_map_clip_start: [ internal use only ] 846 * 847 * Asserts that the given entry begins at or after 848 * the specified address; if necessary, 849 * it splits the entry into two. 850 */ 851 #define vm_map_clip_start(map, entry, startaddr) \ 852 { \ 853 if (startaddr > entry->start) \ 854 _vm_map_clip_start(map, entry, startaddr); \ 855 } 856 857 /* 858 * This routine is called only when it is known that 859 * the entry must be split. 860 */ 861 static void 862 _vm_map_clip_start(map, entry, start) 863 vm_map_t map; 864 vm_map_entry_t entry; 865 vm_offset_t start; 866 { 867 vm_map_entry_t new_entry; 868 869 /* 870 * Split off the front portion -- note that we must insert the new 871 * entry BEFORE this one, so that this entry has the specified 872 * starting address. 873 */ 874 875 vm_map_simplify_entry(map, entry); 876 877 /* 878 * If there is no object backing this entry, we might as well create 879 * one now. If we defer it, an object can get created after the map 880 * is clipped, and individual objects will be created for the split-up 881 * map. This is a bit of a hack, but is also about the best place to 882 * put this improvement. 883 */ 884 885 if (entry->object.vm_object == NULL && !map->system_map) { 886 vm_object_t object; 887 object = vm_object_allocate(OBJT_DEFAULT, 888 atop(entry->end - entry->start)); 889 entry->object.vm_object = object; 890 entry->offset = 0; 891 } 892 893 new_entry = vm_map_entry_create(map); 894 *new_entry = *entry; 895 896 new_entry->end = start; 897 entry->offset += (start - entry->start); 898 entry->start = start; 899 900 vm_map_entry_link(map, entry->prev, new_entry); 901 902 if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) { 903 vm_object_reference(new_entry->object.vm_object); 904 } 905 } 906 907 /* 908 * vm_map_clip_end: [ internal use only ] 909 * 910 * Asserts that the given entry ends at or before 911 * the specified address; if necessary, 912 * it splits the entry into two. 913 */ 914 915 #define vm_map_clip_end(map, entry, endaddr) \ 916 { \ 917 if (endaddr < entry->end) \ 918 _vm_map_clip_end(map, entry, endaddr); \ 919 } 920 921 /* 922 * This routine is called only when it is known that 923 * the entry must be split. 924 */ 925 static void 926 _vm_map_clip_end(map, entry, end) 927 vm_map_t map; 928 vm_map_entry_t entry; 929 vm_offset_t end; 930 { 931 vm_map_entry_t new_entry; 932 933 /* 934 * If there is no object backing this entry, we might as well create 935 * one now. If we defer it, an object can get created after the map 936 * is clipped, and individual objects will be created for the split-up 937 * map. This is a bit of a hack, but is also about the best place to 938 * put this improvement. 939 */ 940 941 if (entry->object.vm_object == NULL && !map->system_map) { 942 vm_object_t object; 943 object = vm_object_allocate(OBJT_DEFAULT, 944 atop(entry->end - entry->start)); 945 entry->object.vm_object = object; 946 entry->offset = 0; 947 } 948 949 /* 950 * Create a new entry and insert it AFTER the specified entry 951 */ 952 953 new_entry = vm_map_entry_create(map); 954 *new_entry = *entry; 955 956 new_entry->start = entry->end = end; 957 new_entry->offset += (end - entry->start); 958 959 vm_map_entry_link(map, entry, new_entry); 960 961 if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) { 962 vm_object_reference(new_entry->object.vm_object); 963 } 964 } 965 966 /* 967 * VM_MAP_RANGE_CHECK: [ internal use only ] 968 * 969 * Asserts that the starting and ending region 970 * addresses fall within the valid range of the map. 971 */ 972 #define VM_MAP_RANGE_CHECK(map, start, end) \ 973 { \ 974 if (start < vm_map_min(map)) \ 975 start = vm_map_min(map); \ 976 if (end > vm_map_max(map)) \ 977 end = vm_map_max(map); \ 978 if (start > end) \ 979 start = end; \ 980 } 981 982 /* 983 * vm_map_transition_wait: [ kernel use only ] 984 * 985 * Used to block when an in-transition collison occurs. The map 986 * is unlocked for the sleep and relocked before the return. 987 */ 988 static 989 void 990 vm_map_transition_wait(vm_map_t map) 991 { 992 vm_map_unlock(map); 993 tsleep(map, 0, "vment", 0); 994 vm_map_lock(map); 995 } 996 997 /* 998 * CLIP_CHECK_BACK 999 * CLIP_CHECK_FWD 1000 * 1001 * When we do blocking operations with the map lock held it is 1002 * possible that a clip might have occured on our in-transit entry, 1003 * requiring an adjustment to the entry in our loop. These macros 1004 * help the pageable and clip_range code deal with the case. The 1005 * conditional costs virtually nothing if no clipping has occured. 1006 */ 1007 1008 #define CLIP_CHECK_BACK(entry, save_start) \ 1009 do { \ 1010 while (entry->start != save_start) { \ 1011 entry = entry->prev; \ 1012 KASSERT(entry != &map->header, ("bad entry clip")); \ 1013 } \ 1014 } while(0) 1015 1016 #define CLIP_CHECK_FWD(entry, save_end) \ 1017 do { \ 1018 while (entry->end != save_end) { \ 1019 entry = entry->next; \ 1020 KASSERT(entry != &map->header, ("bad entry clip")); \ 1021 } \ 1022 } while(0) 1023 1024 1025 /* 1026 * vm_map_clip_range: [ kernel use only ] 1027 * 1028 * Clip the specified range and return the base entry. The 1029 * range may cover several entries starting at the returned base 1030 * and the first and last entry in the covering sequence will be 1031 * properly clipped to the requested start and end address. 1032 * 1033 * If no holes are allowed you should pass the MAP_CLIP_NO_HOLES 1034 * flag. 1035 * 1036 * The MAP_ENTRY_IN_TRANSITION flag will be set for the entries 1037 * covered by the requested range. 1038 * 1039 * The map must be exclusively locked on entry and will remain locked 1040 * on return. If no range exists or the range contains holes and you 1041 * specified that no holes were allowed, NULL will be returned. This 1042 * routine may temporarily unlock the map in order avoid a deadlock when 1043 * sleeping. 1044 */ 1045 static 1046 vm_map_entry_t 1047 vm_map_clip_range(vm_map_t map, vm_offset_t start, vm_offset_t end, int flags) 1048 { 1049 vm_map_entry_t start_entry; 1050 vm_map_entry_t entry; 1051 1052 /* 1053 * Locate the entry and effect initial clipping. The in-transition 1054 * case does not occur very often so do not try to optimize it. 1055 */ 1056 again: 1057 if (vm_map_lookup_entry(map, start, &start_entry) == FALSE) 1058 return (NULL); 1059 entry = start_entry; 1060 if (entry->eflags & MAP_ENTRY_IN_TRANSITION) { 1061 entry->eflags |= MAP_ENTRY_NEEDS_WAKEUP; 1062 ++mycpu->gd_cnt.v_intrans_coll; 1063 ++mycpu->gd_cnt.v_intrans_wait; 1064 vm_map_transition_wait(map); 1065 /* 1066 * entry and/or start_entry may have been clipped while 1067 * we slept, or may have gone away entirely. We have 1068 * to restart from the lookup. 1069 */ 1070 goto again; 1071 } 1072 /* 1073 * Since we hold an exclusive map lock we do not have to restart 1074 * after clipping, even though clipping may block in zalloc. 1075 */ 1076 vm_map_clip_start(map, entry, start); 1077 vm_map_clip_end(map, entry, end); 1078 entry->eflags |= MAP_ENTRY_IN_TRANSITION; 1079 1080 /* 1081 * Scan entries covered by the range. When working on the next 1082 * entry a restart need only re-loop on the current entry which 1083 * we have already locked, since 'next' may have changed. Also, 1084 * even though entry is safe, it may have been clipped so we 1085 * have to iterate forwards through the clip after sleeping. 1086 */ 1087 while (entry->next != &map->header && entry->next->start < end) { 1088 vm_map_entry_t next = entry->next; 1089 1090 if (flags & MAP_CLIP_NO_HOLES) { 1091 if (next->start > entry->end) { 1092 vm_map_unclip_range(map, start_entry, 1093 start, entry->end, flags); 1094 return(NULL); 1095 } 1096 } 1097 1098 if (next->eflags & MAP_ENTRY_IN_TRANSITION) { 1099 vm_offset_t save_end = entry->end; 1100 next->eflags |= MAP_ENTRY_NEEDS_WAKEUP; 1101 ++mycpu->gd_cnt.v_intrans_coll; 1102 ++mycpu->gd_cnt.v_intrans_wait; 1103 vm_map_transition_wait(map); 1104 1105 /* 1106 * clips might have occured while we blocked. 1107 */ 1108 CLIP_CHECK_FWD(entry, save_end); 1109 CLIP_CHECK_BACK(start_entry, start); 1110 continue; 1111 } 1112 /* 1113 * No restart necessary even though clip_end may block, we 1114 * are holding the map lock. 1115 */ 1116 vm_map_clip_end(map, next, end); 1117 next->eflags |= MAP_ENTRY_IN_TRANSITION; 1118 entry = next; 1119 } 1120 if (flags & MAP_CLIP_NO_HOLES) { 1121 if (entry->end != end) { 1122 vm_map_unclip_range(map, start_entry, 1123 start, entry->end, flags); 1124 return(NULL); 1125 } 1126 } 1127 return(start_entry); 1128 } 1129 1130 /* 1131 * vm_map_unclip_range: [ kernel use only ] 1132 * 1133 * Undo the effect of vm_map_clip_range(). You should pass the same 1134 * flags and the same range that you passed to vm_map_clip_range(). 1135 * This code will clear the in-transition flag on the entries and 1136 * wake up anyone waiting. This code will also simplify the sequence 1137 * and attempt to merge it with entries before and after the sequence. 1138 * 1139 * The map must be locked on entry and will remain locked on return. 1140 * 1141 * Note that you should also pass the start_entry returned by 1142 * vm_map_clip_range(). However, if you block between the two calls 1143 * with the map unlocked please be aware that the start_entry may 1144 * have been clipped and you may need to scan it backwards to find 1145 * the entry corresponding with the original start address. You are 1146 * responsible for this, vm_map_unclip_range() expects the correct 1147 * start_entry to be passed to it and will KASSERT otherwise. 1148 */ 1149 static 1150 void 1151 vm_map_unclip_range( 1152 vm_map_t map, 1153 vm_map_entry_t start_entry, 1154 vm_offset_t start, 1155 vm_offset_t end, 1156 int flags) 1157 { 1158 vm_map_entry_t entry; 1159 1160 entry = start_entry; 1161 1162 KASSERT(entry->start == start, ("unclip_range: illegal base entry")); 1163 while (entry != &map->header && entry->start < end) { 1164 KASSERT(entry->eflags & MAP_ENTRY_IN_TRANSITION, ("in-transition flag not set during unclip on: %p", entry)); 1165 KASSERT(entry->end <= end, ("unclip_range: tail wasn't clipped")); 1166 entry->eflags &= ~MAP_ENTRY_IN_TRANSITION; 1167 if (entry->eflags & MAP_ENTRY_NEEDS_WAKEUP) { 1168 entry->eflags &= ~MAP_ENTRY_NEEDS_WAKEUP; 1169 wakeup(map); 1170 } 1171 entry = entry->next; 1172 } 1173 1174 /* 1175 * Simplification does not block so there is no restart case. 1176 */ 1177 entry = start_entry; 1178 while (entry != &map->header && entry->start < end) { 1179 vm_map_simplify_entry(map, entry); 1180 entry = entry->next; 1181 } 1182 } 1183 1184 /* 1185 * vm_map_submap: [ kernel use only ] 1186 * 1187 * Mark the given range as handled by a subordinate map. 1188 * 1189 * This range must have been created with vm_map_find, 1190 * and no other operations may have been performed on this 1191 * range prior to calling vm_map_submap. 1192 * 1193 * Only a limited number of operations can be performed 1194 * within this rage after calling vm_map_submap: 1195 * vm_fault 1196 * [Don't try vm_map_copy!] 1197 * 1198 * To remove a submapping, one must first remove the 1199 * range from the superior map, and then destroy the 1200 * submap (if desired). [Better yet, don't try it.] 1201 */ 1202 int 1203 vm_map_submap(map, start, end, submap) 1204 vm_map_t map; 1205 vm_offset_t start; 1206 vm_offset_t end; 1207 vm_map_t submap; 1208 { 1209 vm_map_entry_t entry; 1210 int result = KERN_INVALID_ARGUMENT; 1211 1212 vm_map_lock(map); 1213 1214 VM_MAP_RANGE_CHECK(map, start, end); 1215 1216 if (vm_map_lookup_entry(map, start, &entry)) { 1217 vm_map_clip_start(map, entry, start); 1218 } else { 1219 entry = entry->next; 1220 } 1221 1222 vm_map_clip_end(map, entry, end); 1223 1224 if ((entry->start == start) && (entry->end == end) && 1225 ((entry->eflags & MAP_ENTRY_COW) == 0) && 1226 (entry->object.vm_object == NULL)) { 1227 entry->object.sub_map = submap; 1228 entry->eflags |= MAP_ENTRY_IS_SUB_MAP; 1229 result = KERN_SUCCESS; 1230 } 1231 vm_map_unlock(map); 1232 1233 return (result); 1234 } 1235 1236 /* 1237 * vm_map_protect: 1238 * 1239 * Sets the protection of the specified address 1240 * region in the target map. If "set_max" is 1241 * specified, the maximum protection is to be set; 1242 * otherwise, only the current protection is affected. 1243 */ 1244 int 1245 vm_map_protect(vm_map_t map, vm_offset_t start, vm_offset_t end, 1246 vm_prot_t new_prot, boolean_t set_max) 1247 { 1248 vm_map_entry_t current; 1249 vm_map_entry_t entry; 1250 1251 vm_map_lock(map); 1252 1253 VM_MAP_RANGE_CHECK(map, start, end); 1254 1255 if (vm_map_lookup_entry(map, start, &entry)) { 1256 vm_map_clip_start(map, entry, start); 1257 } else { 1258 entry = entry->next; 1259 } 1260 1261 /* 1262 * Make a first pass to check for protection violations. 1263 */ 1264 1265 current = entry; 1266 while ((current != &map->header) && (current->start < end)) { 1267 if (current->eflags & MAP_ENTRY_IS_SUB_MAP) { 1268 vm_map_unlock(map); 1269 return (KERN_INVALID_ARGUMENT); 1270 } 1271 if ((new_prot & current->max_protection) != new_prot) { 1272 vm_map_unlock(map); 1273 return (KERN_PROTECTION_FAILURE); 1274 } 1275 current = current->next; 1276 } 1277 1278 /* 1279 * Go back and fix up protections. [Note that clipping is not 1280 * necessary the second time.] 1281 */ 1282 1283 current = entry; 1284 1285 while ((current != &map->header) && (current->start < end)) { 1286 vm_prot_t old_prot; 1287 1288 vm_map_clip_end(map, current, end); 1289 1290 old_prot = current->protection; 1291 if (set_max) 1292 current->protection = 1293 (current->max_protection = new_prot) & 1294 old_prot; 1295 else 1296 current->protection = new_prot; 1297 1298 /* 1299 * Update physical map if necessary. Worry about copy-on-write 1300 * here -- CHECK THIS XXX 1301 */ 1302 1303 if (current->protection != old_prot) { 1304 #define MASK(entry) (((entry)->eflags & MAP_ENTRY_COW) ? ~VM_PROT_WRITE : \ 1305 VM_PROT_ALL) 1306 1307 pmap_protect(map->pmap, current->start, 1308 current->end, 1309 current->protection & MASK(current)); 1310 #undef MASK 1311 } 1312 1313 vm_map_simplify_entry(map, current); 1314 1315 current = current->next; 1316 } 1317 1318 vm_map_unlock(map); 1319 return (KERN_SUCCESS); 1320 } 1321 1322 /* 1323 * vm_map_madvise: 1324 * 1325 * This routine traverses a processes map handling the madvise 1326 * system call. Advisories are classified as either those effecting 1327 * the vm_map_entry structure, or those effecting the underlying 1328 * objects. 1329 */ 1330 1331 int 1332 vm_map_madvise(map, start, end, behav) 1333 vm_map_t map; 1334 vm_offset_t start, end; 1335 int behav; 1336 { 1337 vm_map_entry_t current, entry; 1338 int modify_map = 0; 1339 1340 /* 1341 * Some madvise calls directly modify the vm_map_entry, in which case 1342 * we need to use an exclusive lock on the map and we need to perform 1343 * various clipping operations. Otherwise we only need a read-lock 1344 * on the map. 1345 */ 1346 1347 switch(behav) { 1348 case MADV_NORMAL: 1349 case MADV_SEQUENTIAL: 1350 case MADV_RANDOM: 1351 case MADV_NOSYNC: 1352 case MADV_AUTOSYNC: 1353 case MADV_NOCORE: 1354 case MADV_CORE: 1355 modify_map = 1; 1356 vm_map_lock(map); 1357 break; 1358 case MADV_WILLNEED: 1359 case MADV_DONTNEED: 1360 case MADV_FREE: 1361 vm_map_lock_read(map); 1362 break; 1363 default: 1364 return (KERN_INVALID_ARGUMENT); 1365 } 1366 1367 /* 1368 * Locate starting entry and clip if necessary. 1369 */ 1370 1371 VM_MAP_RANGE_CHECK(map, start, end); 1372 1373 if (vm_map_lookup_entry(map, start, &entry)) { 1374 if (modify_map) 1375 vm_map_clip_start(map, entry, start); 1376 } else { 1377 entry = entry->next; 1378 } 1379 1380 if (modify_map) { 1381 /* 1382 * madvise behaviors that are implemented in the vm_map_entry. 1383 * 1384 * We clip the vm_map_entry so that behavioral changes are 1385 * limited to the specified address range. 1386 */ 1387 for (current = entry; 1388 (current != &map->header) && (current->start < end); 1389 current = current->next 1390 ) { 1391 if (current->eflags & MAP_ENTRY_IS_SUB_MAP) 1392 continue; 1393 1394 vm_map_clip_end(map, current, end); 1395 1396 switch (behav) { 1397 case MADV_NORMAL: 1398 vm_map_entry_set_behavior(current, MAP_ENTRY_BEHAV_NORMAL); 1399 break; 1400 case MADV_SEQUENTIAL: 1401 vm_map_entry_set_behavior(current, MAP_ENTRY_BEHAV_SEQUENTIAL); 1402 break; 1403 case MADV_RANDOM: 1404 vm_map_entry_set_behavior(current, MAP_ENTRY_BEHAV_RANDOM); 1405 break; 1406 case MADV_NOSYNC: 1407 current->eflags |= MAP_ENTRY_NOSYNC; 1408 break; 1409 case MADV_AUTOSYNC: 1410 current->eflags &= ~MAP_ENTRY_NOSYNC; 1411 break; 1412 case MADV_NOCORE: 1413 current->eflags |= MAP_ENTRY_NOCOREDUMP; 1414 break; 1415 case MADV_CORE: 1416 current->eflags &= ~MAP_ENTRY_NOCOREDUMP; 1417 break; 1418 default: 1419 break; 1420 } 1421 vm_map_simplify_entry(map, current); 1422 } 1423 vm_map_unlock(map); 1424 } else { 1425 vm_pindex_t pindex; 1426 int count; 1427 1428 /* 1429 * madvise behaviors that are implemented in the underlying 1430 * vm_object. 1431 * 1432 * Since we don't clip the vm_map_entry, we have to clip 1433 * the vm_object pindex and count. 1434 */ 1435 for (current = entry; 1436 (current != &map->header) && (current->start < end); 1437 current = current->next 1438 ) { 1439 vm_offset_t useStart; 1440 1441 if (current->eflags & MAP_ENTRY_IS_SUB_MAP) 1442 continue; 1443 1444 pindex = OFF_TO_IDX(current->offset); 1445 count = atop(current->end - current->start); 1446 useStart = current->start; 1447 1448 if (current->start < start) { 1449 pindex += atop(start - current->start); 1450 count -= atop(start - current->start); 1451 useStart = start; 1452 } 1453 if (current->end > end) 1454 count -= atop(current->end - end); 1455 1456 if (count <= 0) 1457 continue; 1458 1459 vm_object_madvise(current->object.vm_object, 1460 pindex, count, behav); 1461 if (behav == MADV_WILLNEED) { 1462 pmap_object_init_pt( 1463 map->pmap, 1464 useStart, 1465 current->object.vm_object, 1466 pindex, 1467 (count << PAGE_SHIFT), 1468 MAP_PREFAULT_MADVISE 1469 ); 1470 } 1471 } 1472 vm_map_unlock_read(map); 1473 } 1474 return(0); 1475 } 1476 1477 1478 /* 1479 * vm_map_inherit: 1480 * 1481 * Sets the inheritance of the specified address 1482 * range in the target map. Inheritance 1483 * affects how the map will be shared with 1484 * child maps at the time of vm_map_fork. 1485 */ 1486 int 1487 vm_map_inherit(vm_map_t map, vm_offset_t start, vm_offset_t end, 1488 vm_inherit_t new_inheritance) 1489 { 1490 vm_map_entry_t entry; 1491 vm_map_entry_t temp_entry; 1492 1493 switch (new_inheritance) { 1494 case VM_INHERIT_NONE: 1495 case VM_INHERIT_COPY: 1496 case VM_INHERIT_SHARE: 1497 break; 1498 default: 1499 return (KERN_INVALID_ARGUMENT); 1500 } 1501 1502 vm_map_lock(map); 1503 1504 VM_MAP_RANGE_CHECK(map, start, end); 1505 1506 if (vm_map_lookup_entry(map, start, &temp_entry)) { 1507 entry = temp_entry; 1508 vm_map_clip_start(map, entry, start); 1509 } else 1510 entry = temp_entry->next; 1511 1512 while ((entry != &map->header) && (entry->start < end)) { 1513 vm_map_clip_end(map, entry, end); 1514 1515 entry->inheritance = new_inheritance; 1516 1517 vm_map_simplify_entry(map, entry); 1518 1519 entry = entry->next; 1520 } 1521 1522 vm_map_unlock(map); 1523 return (KERN_SUCCESS); 1524 } 1525 1526 /* 1527 * Implement the semantics of mlock 1528 */ 1529 int 1530 vm_map_user_pageable(map, start, real_end, new_pageable) 1531 vm_map_t map; 1532 vm_offset_t start; 1533 vm_offset_t real_end; 1534 boolean_t new_pageable; 1535 { 1536 vm_map_entry_t entry; 1537 vm_map_entry_t start_entry; 1538 vm_offset_t end; 1539 int rv = KERN_SUCCESS; 1540 1541 vm_map_lock(map); 1542 VM_MAP_RANGE_CHECK(map, start, real_end); 1543 end = real_end; 1544 1545 start_entry = vm_map_clip_range(map, start, end, MAP_CLIP_NO_HOLES); 1546 if (start_entry == NULL) { 1547 vm_map_unlock(map); 1548 return (KERN_INVALID_ADDRESS); 1549 } 1550 1551 if (new_pageable == 0) { 1552 entry = start_entry; 1553 while ((entry != &map->header) && (entry->start < end)) { 1554 vm_offset_t save_start; 1555 vm_offset_t save_end; 1556 1557 /* 1558 * Already user wired or hard wired (trivial cases) 1559 */ 1560 if (entry->eflags & MAP_ENTRY_USER_WIRED) { 1561 entry = entry->next; 1562 continue; 1563 } 1564 if (entry->wired_count != 0) { 1565 entry->wired_count++; 1566 entry->eflags |= MAP_ENTRY_USER_WIRED; 1567 entry = entry->next; 1568 continue; 1569 } 1570 1571 /* 1572 * A new wiring requires instantiation of appropriate 1573 * management structures and the faulting in of the 1574 * page. 1575 */ 1576 if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) { 1577 int copyflag = entry->eflags & MAP_ENTRY_NEEDS_COPY; 1578 if (copyflag && ((entry->protection & VM_PROT_WRITE) != 0)) { 1579 1580 vm_object_shadow(&entry->object.vm_object, 1581 &entry->offset, 1582 atop(entry->end - entry->start)); 1583 entry->eflags &= ~MAP_ENTRY_NEEDS_COPY; 1584 1585 } else if (entry->object.vm_object == NULL && 1586 !map->system_map) { 1587 1588 entry->object.vm_object = 1589 vm_object_allocate(OBJT_DEFAULT, 1590 atop(entry->end - entry->start)); 1591 entry->offset = (vm_offset_t) 0; 1592 1593 } 1594 } 1595 entry->wired_count++; 1596 entry->eflags |= MAP_ENTRY_USER_WIRED; 1597 1598 /* 1599 * Now fault in the area. The map lock needs to be 1600 * manipulated to avoid deadlocks. The in-transition 1601 * flag protects the entries. 1602 */ 1603 save_start = entry->start; 1604 save_end = entry->end; 1605 vm_map_unlock(map); 1606 map->timestamp++; 1607 rv = vm_fault_user_wire(map, save_start, save_end); 1608 vm_map_lock(map); 1609 if (rv) { 1610 CLIP_CHECK_BACK(entry, save_start); 1611 for (;;) { 1612 KASSERT(entry->wired_count == 1, ("bad wired_count on entry")); 1613 entry->eflags &= ~MAP_ENTRY_USER_WIRED; 1614 entry->wired_count = 0; 1615 if (entry->end == save_end) 1616 break; 1617 entry = entry->next; 1618 KASSERT(entry != &map->header, ("bad entry clip during backout")); 1619 } 1620 end = save_start; /* unwire the rest */ 1621 break; 1622 } 1623 /* 1624 * note that even though the entry might have been 1625 * clipped, the USER_WIRED flag we set prevents 1626 * duplication so we do not have to do a 1627 * clip check. 1628 */ 1629 entry = entry->next; 1630 } 1631 1632 /* 1633 * If we failed fall through to the unwiring section to 1634 * unwire what we had wired so far. 'end' has already 1635 * been adjusted. 1636 */ 1637 if (rv) 1638 new_pageable = 1; 1639 1640 /* 1641 * start_entry might have been clipped if we unlocked the 1642 * map and blocked. No matter how clipped it has gotten 1643 * there should be a fragment that is on our start boundary. 1644 */ 1645 CLIP_CHECK_BACK(start_entry, start); 1646 } 1647 1648 /* 1649 * Deal with the unwiring case. 1650 */ 1651 if (new_pageable) { 1652 /* 1653 * This is the unwiring case. We must first ensure that the 1654 * range to be unwired is really wired down. We know there 1655 * are no holes. 1656 */ 1657 entry = start_entry; 1658 while ((entry != &map->header) && (entry->start < end)) { 1659 if ((entry->eflags & MAP_ENTRY_USER_WIRED) == 0) { 1660 rv = KERN_INVALID_ARGUMENT; 1661 goto done; 1662 } 1663 KASSERT(entry->wired_count != 0, ("wired count was 0 with USER_WIRED set! %p", entry)); 1664 entry = entry->next; 1665 } 1666 1667 /* 1668 * Now decrement the wiring count for each region. If a region 1669 * becomes completely unwired, unwire its physical pages and 1670 * mappings. 1671 */ 1672 while ((entry != &map->header) && (entry->start < end)) { 1673 KASSERT(entry->eflags & MAP_ENTRY_USER_WIRED, ("expected USER_WIRED on entry %p", entry)); 1674 entry->eflags &= ~MAP_ENTRY_USER_WIRED; 1675 entry->wired_count--; 1676 if (entry->wired_count == 0) 1677 vm_fault_unwire(map, entry->start, entry->end); 1678 entry = entry->next; 1679 } 1680 } 1681 done: 1682 vm_map_unclip_range(map, start_entry, start, real_end, 1683 MAP_CLIP_NO_HOLES); 1684 map->timestamp++; 1685 vm_map_unlock(map); 1686 return (rv); 1687 } 1688 1689 /* 1690 * vm_map_pageable: 1691 * 1692 * Sets the pageability of the specified address 1693 * range in the target map. Regions specified 1694 * as not pageable require locked-down physical 1695 * memory and physical page maps. 1696 * 1697 * The map must not be locked, but a reference 1698 * must remain to the map throughout the call. 1699 */ 1700 int 1701 vm_map_pageable(map, start, real_end, new_pageable) 1702 vm_map_t map; 1703 vm_offset_t start; 1704 vm_offset_t real_end; 1705 boolean_t new_pageable; 1706 { 1707 vm_map_entry_t entry; 1708 vm_map_entry_t start_entry; 1709 vm_offset_t end; 1710 int rv = KERN_SUCCESS; 1711 int s; 1712 1713 vm_map_lock(map); 1714 VM_MAP_RANGE_CHECK(map, start, real_end); 1715 end = real_end; 1716 1717 start_entry = vm_map_clip_range(map, start, end, MAP_CLIP_NO_HOLES); 1718 if (start_entry == NULL) { 1719 vm_map_unlock(map); 1720 return (KERN_INVALID_ADDRESS); 1721 } 1722 if (new_pageable == 0) { 1723 /* 1724 * Wiring. 1725 * 1726 * 1. Holding the write lock, we create any shadow or zero-fill 1727 * objects that need to be created. Then we clip each map 1728 * entry to the region to be wired and increment its wiring 1729 * count. We create objects before clipping the map entries 1730 * to avoid object proliferation. 1731 * 1732 * 2. We downgrade to a read lock, and call vm_fault_wire to 1733 * fault in the pages for any newly wired area (wired_count is 1734 * 1). 1735 * 1736 * Downgrading to a read lock for vm_fault_wire avoids a 1737 * possible deadlock with another process that may have faulted 1738 * on one of the pages to be wired (it would mark the page busy, 1739 * blocking us, then in turn block on the map lock that we 1740 * hold). Because of problems in the recursive lock package, 1741 * we cannot upgrade to a write lock in vm_map_lookup. Thus, 1742 * any actions that require the write lock must be done 1743 * beforehand. Because we keep the read lock on the map, the 1744 * copy-on-write status of the entries we modify here cannot 1745 * change. 1746 */ 1747 1748 entry = start_entry; 1749 while ((entry != &map->header) && (entry->start < end)) { 1750 /* 1751 * Trivial case if the entry is already wired 1752 */ 1753 if (entry->wired_count) { 1754 entry->wired_count++; 1755 entry = entry->next; 1756 continue; 1757 } 1758 1759 /* 1760 * The entry is being newly wired, we have to setup 1761 * appropriate management structures. A shadow 1762 * object is required for a copy-on-write region, 1763 * or a normal object for a zero-fill region. We 1764 * do not have to do this for entries that point to sub 1765 * maps because we won't hold the lock on the sub map. 1766 */ 1767 if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) { 1768 int copyflag = entry->eflags & MAP_ENTRY_NEEDS_COPY; 1769 if (copyflag && 1770 ((entry->protection & VM_PROT_WRITE) != 0)) { 1771 1772 vm_object_shadow(&entry->object.vm_object, 1773 &entry->offset, 1774 atop(entry->end - entry->start)); 1775 entry->eflags &= ~MAP_ENTRY_NEEDS_COPY; 1776 } else if (entry->object.vm_object == NULL && 1777 !map->system_map) { 1778 entry->object.vm_object = 1779 vm_object_allocate(OBJT_DEFAULT, 1780 atop(entry->end - entry->start)); 1781 entry->offset = (vm_offset_t) 0; 1782 } 1783 } 1784 1785 entry->wired_count++; 1786 entry = entry->next; 1787 } 1788 1789 /* 1790 * Pass 2. 1791 */ 1792 1793 /* 1794 * HACK HACK HACK HACK 1795 * 1796 * Unlock the map to avoid deadlocks. The in-transit flag 1797 * protects us from most changes but note that 1798 * clipping may still occur. To prevent clipping from 1799 * occuring after the unlock, except for when we are 1800 * blocking in vm_fault_wire, we must run at splvm(). 1801 * Otherwise our accesses to entry->start and entry->end 1802 * could be corrupted. We have to set splvm() prior to 1803 * unlocking so start_entry does not change out from 1804 * under us at the very beginning of the loop. 1805 * 1806 * HACK HACK HACK HACK 1807 */ 1808 1809 s = splvm(); 1810 vm_map_unlock(map); 1811 1812 entry = start_entry; 1813 while (entry != &map->header && entry->start < end) { 1814 /* 1815 * If vm_fault_wire fails for any page we need to undo 1816 * what has been done. We decrement the wiring count 1817 * for those pages which have not yet been wired (now) 1818 * and unwire those that have (later). 1819 */ 1820 vm_offset_t save_start = entry->start; 1821 vm_offset_t save_end = entry->end; 1822 1823 if (entry->wired_count == 1) 1824 rv = vm_fault_wire(map, entry->start, entry->end); 1825 if (rv) { 1826 CLIP_CHECK_BACK(entry, save_start); 1827 for (;;) { 1828 KASSERT(entry->wired_count == 1, ("wired_count changed unexpectedly")); 1829 entry->wired_count = 0; 1830 if (entry->end == save_end) 1831 break; 1832 entry = entry->next; 1833 KASSERT(entry != &map->header, ("bad entry clip during backout")); 1834 } 1835 end = save_start; 1836 break; 1837 } 1838 CLIP_CHECK_FWD(entry, save_end); 1839 entry = entry->next; 1840 } 1841 splx(s); 1842 1843 /* 1844 * relock. start_entry is still IN_TRANSITION and must 1845 * still exist, but may have been clipped (handled just 1846 * below). 1847 */ 1848 vm_map_lock(map); 1849 1850 /* 1851 * If a failure occured undo everything by falling through 1852 * to the unwiring code. 'end' has already been adjusted 1853 * appropriately. 1854 */ 1855 if (rv) 1856 new_pageable = 1; 1857 1858 /* 1859 * start_entry might have been clipped if we unlocked the 1860 * map and blocked. No matter how clipped it has gotten 1861 * there should be a fragment that is on our start boundary. 1862 */ 1863 CLIP_CHECK_BACK(start_entry, start); 1864 } 1865 1866 if (new_pageable) { 1867 /* 1868 * This is the unwiring case. We must first ensure that the 1869 * range to be unwired is really wired down. We know there 1870 * are no holes. 1871 */ 1872 entry = start_entry; 1873 while ((entry != &map->header) && (entry->start < end)) { 1874 if (entry->wired_count == 0) { 1875 rv = KERN_INVALID_ARGUMENT; 1876 goto done; 1877 } 1878 entry = entry->next; 1879 } 1880 1881 /* 1882 * Now decrement the wiring count for each region. If a region 1883 * becomes completely unwired, unwire its physical pages and 1884 * mappings. 1885 */ 1886 entry = start_entry; 1887 while ((entry != &map->header) && (entry->start < end)) { 1888 entry->wired_count--; 1889 if (entry->wired_count == 0) 1890 vm_fault_unwire(map, entry->start, entry->end); 1891 entry = entry->next; 1892 } 1893 } 1894 done: 1895 vm_map_unclip_range(map, start_entry, start, real_end, 1896 MAP_CLIP_NO_HOLES); 1897 map->timestamp++; 1898 vm_map_unlock(map); 1899 return (rv); 1900 } 1901 1902 /* 1903 * vm_map_clean 1904 * 1905 * Push any dirty cached pages in the address range to their pager. 1906 * If syncio is TRUE, dirty pages are written synchronously. 1907 * If invalidate is TRUE, any cached pages are freed as well. 1908 * 1909 * Returns an error if any part of the specified range is not mapped. 1910 */ 1911 int 1912 vm_map_clean(map, start, end, syncio, invalidate) 1913 vm_map_t map; 1914 vm_offset_t start; 1915 vm_offset_t end; 1916 boolean_t syncio; 1917 boolean_t invalidate; 1918 { 1919 vm_map_entry_t current; 1920 vm_map_entry_t entry; 1921 vm_size_t size; 1922 vm_object_t object; 1923 vm_ooffset_t offset; 1924 1925 vm_map_lock_read(map); 1926 VM_MAP_RANGE_CHECK(map, start, end); 1927 if (!vm_map_lookup_entry(map, start, &entry)) { 1928 vm_map_unlock_read(map); 1929 return (KERN_INVALID_ADDRESS); 1930 } 1931 /* 1932 * Make a first pass to check for holes. 1933 */ 1934 for (current = entry; current->start < end; current = current->next) { 1935 if (current->eflags & MAP_ENTRY_IS_SUB_MAP) { 1936 vm_map_unlock_read(map); 1937 return (KERN_INVALID_ARGUMENT); 1938 } 1939 if (end > current->end && 1940 (current->next == &map->header || 1941 current->end != current->next->start)) { 1942 vm_map_unlock_read(map); 1943 return (KERN_INVALID_ADDRESS); 1944 } 1945 } 1946 1947 if (invalidate) 1948 pmap_remove(vm_map_pmap(map), start, end); 1949 /* 1950 * Make a second pass, cleaning/uncaching pages from the indicated 1951 * objects as we go. 1952 */ 1953 for (current = entry; current->start < end; current = current->next) { 1954 offset = current->offset + (start - current->start); 1955 size = (end <= current->end ? end : current->end) - start; 1956 if (current->eflags & MAP_ENTRY_IS_SUB_MAP) { 1957 vm_map_t smap; 1958 vm_map_entry_t tentry; 1959 vm_size_t tsize; 1960 1961 smap = current->object.sub_map; 1962 vm_map_lock_read(smap); 1963 (void) vm_map_lookup_entry(smap, offset, &tentry); 1964 tsize = tentry->end - offset; 1965 if (tsize < size) 1966 size = tsize; 1967 object = tentry->object.vm_object; 1968 offset = tentry->offset + (offset - tentry->start); 1969 vm_map_unlock_read(smap); 1970 } else { 1971 object = current->object.vm_object; 1972 } 1973 /* 1974 * Note that there is absolutely no sense in writing out 1975 * anonymous objects, so we track down the vnode object 1976 * to write out. 1977 * We invalidate (remove) all pages from the address space 1978 * anyway, for semantic correctness. 1979 * 1980 * note: certain anonymous maps, such as MAP_NOSYNC maps, 1981 * may start out with a NULL object. 1982 */ 1983 while (object && object->backing_object) { 1984 object = object->backing_object; 1985 offset += object->backing_object_offset; 1986 if (object->size < OFF_TO_IDX( offset + size)) 1987 size = IDX_TO_OFF(object->size) - offset; 1988 } 1989 if (object && (object->type == OBJT_VNODE) && 1990 (current->protection & VM_PROT_WRITE)) { 1991 /* 1992 * Flush pages if writing is allowed, invalidate them 1993 * if invalidation requested. Pages undergoing I/O 1994 * will be ignored by vm_object_page_remove(). 1995 * 1996 * We cannot lock the vnode and then wait for paging 1997 * to complete without deadlocking against vm_fault. 1998 * Instead we simply call vm_object_page_remove() and 1999 * allow it to block internally on a page-by-page 2000 * basis when it encounters pages undergoing async 2001 * I/O. 2002 */ 2003 int flags; 2004 2005 vm_object_reference(object); 2006 vn_lock(object->handle, LK_EXCLUSIVE | LK_RETRY, curthread); 2007 flags = (syncio || invalidate) ? OBJPC_SYNC : 0; 2008 flags |= invalidate ? OBJPC_INVAL : 0; 2009 vm_object_page_clean(object, 2010 OFF_TO_IDX(offset), 2011 OFF_TO_IDX(offset + size + PAGE_MASK), 2012 flags); 2013 VOP_UNLOCK(object->handle, 0, curthread); 2014 vm_object_deallocate(object); 2015 } 2016 if (object && invalidate && 2017 ((object->type == OBJT_VNODE) || 2018 (object->type == OBJT_DEVICE))) { 2019 vm_object_reference(object); 2020 vm_object_page_remove(object, 2021 OFF_TO_IDX(offset), 2022 OFF_TO_IDX(offset + size + PAGE_MASK), 2023 FALSE); 2024 vm_object_deallocate(object); 2025 } 2026 start += size; 2027 } 2028 2029 vm_map_unlock_read(map); 2030 return (KERN_SUCCESS); 2031 } 2032 2033 /* 2034 * vm_map_entry_unwire: [ internal use only ] 2035 * 2036 * Make the region specified by this entry pageable. 2037 * 2038 * The map in question should be locked. 2039 * [This is the reason for this routine's existence.] 2040 */ 2041 static void 2042 vm_map_entry_unwire(map, entry) 2043 vm_map_t map; 2044 vm_map_entry_t entry; 2045 { 2046 vm_fault_unwire(map, entry->start, entry->end); 2047 entry->wired_count = 0; 2048 } 2049 2050 /* 2051 * vm_map_entry_delete: [ internal use only ] 2052 * 2053 * Deallocate the given entry from the target map. 2054 */ 2055 static void 2056 vm_map_entry_delete(map, entry) 2057 vm_map_t map; 2058 vm_map_entry_t entry; 2059 { 2060 vm_map_entry_unlink(map, entry); 2061 map->size -= entry->end - entry->start; 2062 2063 if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) { 2064 vm_object_deallocate(entry->object.vm_object); 2065 } 2066 2067 vm_map_entry_dispose(map, entry); 2068 } 2069 2070 /* 2071 * vm_map_delete: [ internal use only ] 2072 * 2073 * Deallocates the given address range from the target 2074 * map. 2075 */ 2076 int 2077 vm_map_delete(map, start, end) 2078 vm_map_t map; 2079 vm_offset_t start; 2080 vm_offset_t end; 2081 { 2082 vm_object_t object; 2083 vm_map_entry_t entry; 2084 vm_map_entry_t first_entry; 2085 2086 /* 2087 * Find the start of the region, and clip it 2088 */ 2089 2090 again: 2091 if (!vm_map_lookup_entry(map, start, &first_entry)) 2092 entry = first_entry->next; 2093 else { 2094 entry = first_entry; 2095 vm_map_clip_start(map, entry, start); 2096 /* 2097 * Fix the lookup hint now, rather than each time though the 2098 * loop. 2099 */ 2100 SAVE_HINT(map, entry->prev); 2101 } 2102 2103 /* 2104 * Save the free space hint 2105 */ 2106 2107 if (entry == &map->header) { 2108 map->first_free = &map->header; 2109 } else if (map->first_free->start >= start) { 2110 map->first_free = entry->prev; 2111 } 2112 2113 /* 2114 * Step through all entries in this region 2115 */ 2116 2117 while ((entry != &map->header) && (entry->start < end)) { 2118 vm_map_entry_t next; 2119 vm_offset_t s, e; 2120 vm_pindex_t offidxstart, offidxend, count; 2121 2122 /* 2123 * If we hit an in-transition entry we have to sleep and 2124 * retry. It's easier (and not really slower) to just retry 2125 * since this case occurs so rarely and the hint is already 2126 * pointing at the right place. We have to reset the 2127 * start offset so as not to accidently delete an entry 2128 * another process just created in vacated space. 2129 */ 2130 if (entry->eflags & MAP_ENTRY_IN_TRANSITION) { 2131 entry->eflags |= MAP_ENTRY_NEEDS_WAKEUP; 2132 start = entry->start; 2133 ++mycpu->gd_cnt.v_intrans_coll; 2134 ++mycpu->gd_cnt.v_intrans_wait; 2135 vm_map_transition_wait(map); 2136 goto again; 2137 } 2138 vm_map_clip_end(map, entry, end); 2139 2140 s = entry->start; 2141 e = entry->end; 2142 next = entry->next; 2143 2144 offidxstart = OFF_TO_IDX(entry->offset); 2145 count = OFF_TO_IDX(e - s); 2146 object = entry->object.vm_object; 2147 2148 /* 2149 * Unwire before removing addresses from the pmap; otherwise, 2150 * unwiring will put the entries back in the pmap. 2151 */ 2152 if (entry->wired_count != 0) { 2153 vm_map_entry_unwire(map, entry); 2154 } 2155 2156 offidxend = offidxstart + count; 2157 2158 if ((object == kernel_object) || (object == kmem_object)) { 2159 vm_object_page_remove(object, offidxstart, offidxend, FALSE); 2160 } else { 2161 pmap_remove(map->pmap, s, e); 2162 if (object != NULL && 2163 object->ref_count != 1 && 2164 (object->flags & (OBJ_NOSPLIT|OBJ_ONEMAPPING)) == OBJ_ONEMAPPING && 2165 (object->type == OBJT_DEFAULT || object->type == OBJT_SWAP)) { 2166 vm_object_collapse(object); 2167 vm_object_page_remove(object, offidxstart, offidxend, FALSE); 2168 if (object->type == OBJT_SWAP) { 2169 swap_pager_freespace(object, offidxstart, count); 2170 } 2171 if (offidxend >= object->size && 2172 offidxstart < object->size) { 2173 object->size = offidxstart; 2174 } 2175 } 2176 } 2177 2178 /* 2179 * Delete the entry (which may delete the object) only after 2180 * removing all pmap entries pointing to its pages. 2181 * (Otherwise, its page frames may be reallocated, and any 2182 * modify bits will be set in the wrong object!) 2183 */ 2184 vm_map_entry_delete(map, entry); 2185 entry = next; 2186 } 2187 return (KERN_SUCCESS); 2188 } 2189 2190 /* 2191 * vm_map_remove: 2192 * 2193 * Remove the given address range from the target map. 2194 * This is the exported form of vm_map_delete. 2195 */ 2196 int 2197 vm_map_remove(map, start, end) 2198 vm_map_t map; 2199 vm_offset_t start; 2200 vm_offset_t end; 2201 { 2202 int result, s = 0; 2203 2204 if (map == kmem_map || map == mb_map) 2205 s = splvm(); 2206 2207 vm_map_lock(map); 2208 VM_MAP_RANGE_CHECK(map, start, end); 2209 result = vm_map_delete(map, start, end); 2210 vm_map_unlock(map); 2211 2212 if (map == kmem_map || map == mb_map) 2213 splx(s); 2214 2215 return (result); 2216 } 2217 2218 /* 2219 * vm_map_check_protection: 2220 * 2221 * Assert that the target map allows the specified 2222 * privilege on the entire address region given. 2223 * The entire region must be allocated. 2224 */ 2225 boolean_t 2226 vm_map_check_protection(vm_map_t map, vm_offset_t start, vm_offset_t end, 2227 vm_prot_t protection) 2228 { 2229 vm_map_entry_t entry; 2230 vm_map_entry_t tmp_entry; 2231 2232 if (!vm_map_lookup_entry(map, start, &tmp_entry)) { 2233 return (FALSE); 2234 } 2235 entry = tmp_entry; 2236 2237 while (start < end) { 2238 if (entry == &map->header) { 2239 return (FALSE); 2240 } 2241 /* 2242 * No holes allowed! 2243 */ 2244 2245 if (start < entry->start) { 2246 return (FALSE); 2247 } 2248 /* 2249 * Check protection associated with entry. 2250 */ 2251 2252 if ((entry->protection & protection) != protection) { 2253 return (FALSE); 2254 } 2255 /* go to next entry */ 2256 2257 start = entry->end; 2258 entry = entry->next; 2259 } 2260 return (TRUE); 2261 } 2262 2263 /* 2264 * Split the pages in a map entry into a new object. This affords 2265 * easier removal of unused pages, and keeps object inheritance from 2266 * being a negative impact on memory usage. 2267 */ 2268 static void 2269 vm_map_split(entry) 2270 vm_map_entry_t entry; 2271 { 2272 vm_page_t m; 2273 vm_object_t orig_object, new_object, source; 2274 vm_offset_t s, e; 2275 vm_pindex_t offidxstart, offidxend, idx; 2276 vm_size_t size; 2277 vm_ooffset_t offset; 2278 2279 orig_object = entry->object.vm_object; 2280 if (orig_object->type != OBJT_DEFAULT && orig_object->type != OBJT_SWAP) 2281 return; 2282 if (orig_object->ref_count <= 1) 2283 return; 2284 2285 offset = entry->offset; 2286 s = entry->start; 2287 e = entry->end; 2288 2289 offidxstart = OFF_TO_IDX(offset); 2290 offidxend = offidxstart + OFF_TO_IDX(e - s); 2291 size = offidxend - offidxstart; 2292 2293 new_object = vm_pager_allocate(orig_object->type, 2294 NULL, IDX_TO_OFF(size), VM_PROT_ALL, 0LL); 2295 if (new_object == NULL) 2296 return; 2297 2298 source = orig_object->backing_object; 2299 if (source != NULL) { 2300 vm_object_reference(source); /* Referenced by new_object */ 2301 LIST_INSERT_HEAD(&source->shadow_head, 2302 new_object, shadow_list); 2303 vm_object_clear_flag(source, OBJ_ONEMAPPING); 2304 new_object->backing_object_offset = 2305 orig_object->backing_object_offset + IDX_TO_OFF(offidxstart); 2306 new_object->backing_object = source; 2307 source->shadow_count++; 2308 source->generation++; 2309 } 2310 2311 for (idx = 0; idx < size; idx++) { 2312 vm_page_t m; 2313 2314 retry: 2315 m = vm_page_lookup(orig_object, offidxstart + idx); 2316 if (m == NULL) 2317 continue; 2318 2319 /* 2320 * We must wait for pending I/O to complete before we can 2321 * rename the page. 2322 * 2323 * We do not have to VM_PROT_NONE the page as mappings should 2324 * not be changed by this operation. 2325 */ 2326 if (vm_page_sleep_busy(m, TRUE, "spltwt")) 2327 goto retry; 2328 2329 vm_page_busy(m); 2330 vm_page_rename(m, new_object, idx); 2331 /* page automatically made dirty by rename and cache handled */ 2332 vm_page_busy(m); 2333 } 2334 2335 if (orig_object->type == OBJT_SWAP) { 2336 vm_object_pip_add(orig_object, 1); 2337 /* 2338 * copy orig_object pages into new_object 2339 * and destroy unneeded pages in 2340 * shadow object. 2341 */ 2342 swap_pager_copy(orig_object, new_object, offidxstart, 0); 2343 vm_object_pip_wakeup(orig_object); 2344 } 2345 2346 for (idx = 0; idx < size; idx++) { 2347 m = vm_page_lookup(new_object, idx); 2348 if (m) { 2349 vm_page_wakeup(m); 2350 } 2351 } 2352 2353 entry->object.vm_object = new_object; 2354 entry->offset = 0LL; 2355 vm_object_deallocate(orig_object); 2356 } 2357 2358 /* 2359 * vm_map_copy_entry: 2360 * 2361 * Copies the contents of the source entry to the destination 2362 * entry. The entries *must* be aligned properly. 2363 */ 2364 static void 2365 vm_map_copy_entry(src_map, dst_map, src_entry, dst_entry) 2366 vm_map_t src_map, dst_map; 2367 vm_map_entry_t src_entry, dst_entry; 2368 { 2369 vm_object_t src_object; 2370 2371 if ((dst_entry->eflags|src_entry->eflags) & MAP_ENTRY_IS_SUB_MAP) 2372 return; 2373 2374 if (src_entry->wired_count == 0) { 2375 2376 /* 2377 * If the source entry is marked needs_copy, it is already 2378 * write-protected. 2379 */ 2380 if ((src_entry->eflags & MAP_ENTRY_NEEDS_COPY) == 0) { 2381 pmap_protect(src_map->pmap, 2382 src_entry->start, 2383 src_entry->end, 2384 src_entry->protection & ~VM_PROT_WRITE); 2385 } 2386 2387 /* 2388 * Make a copy of the object. 2389 */ 2390 if ((src_object = src_entry->object.vm_object) != NULL) { 2391 2392 if ((src_object->handle == NULL) && 2393 (src_object->type == OBJT_DEFAULT || 2394 src_object->type == OBJT_SWAP)) { 2395 vm_object_collapse(src_object); 2396 if ((src_object->flags & (OBJ_NOSPLIT|OBJ_ONEMAPPING)) == OBJ_ONEMAPPING) { 2397 vm_map_split(src_entry); 2398 src_object = src_entry->object.vm_object; 2399 } 2400 } 2401 2402 vm_object_reference(src_object); 2403 vm_object_clear_flag(src_object, OBJ_ONEMAPPING); 2404 dst_entry->object.vm_object = src_object; 2405 src_entry->eflags |= (MAP_ENTRY_COW|MAP_ENTRY_NEEDS_COPY); 2406 dst_entry->eflags |= (MAP_ENTRY_COW|MAP_ENTRY_NEEDS_COPY); 2407 dst_entry->offset = src_entry->offset; 2408 } else { 2409 dst_entry->object.vm_object = NULL; 2410 dst_entry->offset = 0; 2411 } 2412 2413 pmap_copy(dst_map->pmap, src_map->pmap, dst_entry->start, 2414 dst_entry->end - dst_entry->start, src_entry->start); 2415 } else { 2416 /* 2417 * Of course, wired down pages can't be set copy-on-write. 2418 * Cause wired pages to be copied into the new map by 2419 * simulating faults (the new pages are pageable) 2420 */ 2421 vm_fault_copy_entry(dst_map, src_map, dst_entry, src_entry); 2422 } 2423 } 2424 2425 /* 2426 * vmspace_fork: 2427 * Create a new process vmspace structure and vm_map 2428 * based on those of an existing process. The new map 2429 * is based on the old map, according to the inheritance 2430 * values on the regions in that map. 2431 * 2432 * The source map must not be locked. 2433 */ 2434 struct vmspace * 2435 vmspace_fork(vm1) 2436 struct vmspace *vm1; 2437 { 2438 struct vmspace *vm2; 2439 vm_map_t old_map = &vm1->vm_map; 2440 vm_map_t new_map; 2441 vm_map_entry_t old_entry; 2442 vm_map_entry_t new_entry; 2443 vm_object_t object; 2444 2445 vm_map_lock(old_map); 2446 old_map->infork = 1; 2447 2448 vm2 = vmspace_alloc(old_map->min_offset, old_map->max_offset); 2449 bcopy(&vm1->vm_startcopy, &vm2->vm_startcopy, 2450 (caddr_t) (vm1 + 1) - (caddr_t) &vm1->vm_startcopy); 2451 new_map = &vm2->vm_map; /* XXX */ 2452 new_map->timestamp = 1; 2453 2454 old_entry = old_map->header.next; 2455 2456 while (old_entry != &old_map->header) { 2457 if (old_entry->eflags & MAP_ENTRY_IS_SUB_MAP) 2458 panic("vm_map_fork: encountered a submap"); 2459 2460 switch (old_entry->inheritance) { 2461 case VM_INHERIT_NONE: 2462 break; 2463 2464 case VM_INHERIT_SHARE: 2465 /* 2466 * Clone the entry, creating the shared object if necessary. 2467 */ 2468 object = old_entry->object.vm_object; 2469 if (object == NULL) { 2470 object = vm_object_allocate(OBJT_DEFAULT, 2471 atop(old_entry->end - old_entry->start)); 2472 old_entry->object.vm_object = object; 2473 old_entry->offset = (vm_offset_t) 0; 2474 } 2475 2476 /* 2477 * Add the reference before calling vm_object_shadow 2478 * to insure that a shadow object is created. 2479 */ 2480 vm_object_reference(object); 2481 if (old_entry->eflags & MAP_ENTRY_NEEDS_COPY) { 2482 vm_object_shadow(&old_entry->object.vm_object, 2483 &old_entry->offset, 2484 atop(old_entry->end - old_entry->start)); 2485 old_entry->eflags &= ~MAP_ENTRY_NEEDS_COPY; 2486 /* Transfer the second reference too. */ 2487 vm_object_reference( 2488 old_entry->object.vm_object); 2489 vm_object_deallocate(object); 2490 object = old_entry->object.vm_object; 2491 } 2492 vm_object_clear_flag(object, OBJ_ONEMAPPING); 2493 2494 /* 2495 * Clone the entry, referencing the shared object. 2496 */ 2497 new_entry = vm_map_entry_create(new_map); 2498 *new_entry = *old_entry; 2499 new_entry->eflags &= ~MAP_ENTRY_USER_WIRED; 2500 new_entry->wired_count = 0; 2501 2502 /* 2503 * Insert the entry into the new map -- we know we're 2504 * inserting at the end of the new map. 2505 */ 2506 2507 vm_map_entry_link(new_map, new_map->header.prev, 2508 new_entry); 2509 2510 /* 2511 * Update the physical map 2512 */ 2513 2514 pmap_copy(new_map->pmap, old_map->pmap, 2515 new_entry->start, 2516 (old_entry->end - old_entry->start), 2517 old_entry->start); 2518 break; 2519 2520 case VM_INHERIT_COPY: 2521 /* 2522 * Clone the entry and link into the map. 2523 */ 2524 new_entry = vm_map_entry_create(new_map); 2525 *new_entry = *old_entry; 2526 new_entry->eflags &= ~MAP_ENTRY_USER_WIRED; 2527 new_entry->wired_count = 0; 2528 new_entry->object.vm_object = NULL; 2529 vm_map_entry_link(new_map, new_map->header.prev, 2530 new_entry); 2531 vm_map_copy_entry(old_map, new_map, old_entry, 2532 new_entry); 2533 break; 2534 } 2535 old_entry = old_entry->next; 2536 } 2537 2538 new_map->size = old_map->size; 2539 old_map->infork = 0; 2540 vm_map_unlock(old_map); 2541 2542 return (vm2); 2543 } 2544 2545 int 2546 vm_map_stack (vm_map_t map, vm_offset_t addrbos, vm_size_t max_ssize, 2547 vm_prot_t prot, vm_prot_t max, int cow) 2548 { 2549 vm_map_entry_t prev_entry; 2550 vm_map_entry_t new_stack_entry; 2551 vm_size_t init_ssize; 2552 int rv; 2553 2554 if (VM_MIN_ADDRESS > 0 && addrbos < VM_MIN_ADDRESS) 2555 return (KERN_NO_SPACE); 2556 2557 if (max_ssize < sgrowsiz) 2558 init_ssize = max_ssize; 2559 else 2560 init_ssize = sgrowsiz; 2561 2562 vm_map_lock(map); 2563 2564 /* If addr is already mapped, no go */ 2565 if (vm_map_lookup_entry(map, addrbos, &prev_entry)) { 2566 vm_map_unlock(map); 2567 return (KERN_NO_SPACE); 2568 } 2569 2570 /* If we would blow our VMEM resource limit, no go */ 2571 if (map->size + init_ssize > 2572 curproc->p_rlimit[RLIMIT_VMEM].rlim_cur) { 2573 vm_map_unlock(map); 2574 return (KERN_NO_SPACE); 2575 } 2576 2577 /* If we can't accomodate max_ssize in the current mapping, 2578 * no go. However, we need to be aware that subsequent user 2579 * mappings might map into the space we have reserved for 2580 * stack, and currently this space is not protected. 2581 * 2582 * Hopefully we will at least detect this condition 2583 * when we try to grow the stack. 2584 */ 2585 if ((prev_entry->next != &map->header) && 2586 (prev_entry->next->start < addrbos + max_ssize)) { 2587 vm_map_unlock(map); 2588 return (KERN_NO_SPACE); 2589 } 2590 2591 /* We initially map a stack of only init_ssize. We will 2592 * grow as needed later. Since this is to be a grow 2593 * down stack, we map at the top of the range. 2594 * 2595 * Note: we would normally expect prot and max to be 2596 * VM_PROT_ALL, and cow to be 0. Possibly we should 2597 * eliminate these as input parameters, and just 2598 * pass these values here in the insert call. 2599 */ 2600 rv = vm_map_insert(map, NULL, 0, addrbos + max_ssize - init_ssize, 2601 addrbos + max_ssize, prot, max, cow); 2602 2603 /* Now set the avail_ssize amount */ 2604 if (rv == KERN_SUCCESS){ 2605 if (prev_entry != &map->header) 2606 vm_map_clip_end(map, prev_entry, addrbos + max_ssize - init_ssize); 2607 new_stack_entry = prev_entry->next; 2608 if (new_stack_entry->end != addrbos + max_ssize || 2609 new_stack_entry->start != addrbos + max_ssize - init_ssize) 2610 panic ("Bad entry start/end for new stack entry"); 2611 else 2612 new_stack_entry->avail_ssize = max_ssize - init_ssize; 2613 } 2614 2615 vm_map_unlock(map); 2616 return (rv); 2617 } 2618 2619 /* Attempts to grow a vm stack entry. Returns KERN_SUCCESS if the 2620 * desired address is already mapped, or if we successfully grow 2621 * the stack. Also returns KERN_SUCCESS if addr is outside the 2622 * stack range (this is strange, but preserves compatibility with 2623 * the grow function in vm_machdep.c). 2624 */ 2625 int 2626 vm_map_growstack (struct proc *p, vm_offset_t addr) 2627 { 2628 vm_map_entry_t prev_entry; 2629 vm_map_entry_t stack_entry; 2630 vm_map_entry_t new_stack_entry; 2631 struct vmspace *vm = p->p_vmspace; 2632 vm_map_t map = &vm->vm_map; 2633 vm_offset_t end; 2634 int grow_amount; 2635 int rv = KERN_SUCCESS; 2636 int is_procstack; 2637 int use_read_lock = 1; 2638 2639 Retry: 2640 if (use_read_lock) 2641 vm_map_lock_read(map); 2642 else 2643 vm_map_lock(map); 2644 2645 /* If addr is already in the entry range, no need to grow.*/ 2646 if (vm_map_lookup_entry(map, addr, &prev_entry)) 2647 goto done; 2648 2649 if ((stack_entry = prev_entry->next) == &map->header) 2650 goto done; 2651 if (prev_entry == &map->header) 2652 end = stack_entry->start - stack_entry->avail_ssize; 2653 else 2654 end = prev_entry->end; 2655 2656 /* This next test mimics the old grow function in vm_machdep.c. 2657 * It really doesn't quite make sense, but we do it anyway 2658 * for compatibility. 2659 * 2660 * If not growable stack, return success. This signals the 2661 * caller to proceed as he would normally with normal vm. 2662 */ 2663 if (stack_entry->avail_ssize < 1 || 2664 addr >= stack_entry->start || 2665 addr < stack_entry->start - stack_entry->avail_ssize) { 2666 goto done; 2667 } 2668 2669 /* Find the minimum grow amount */ 2670 grow_amount = roundup (stack_entry->start - addr, PAGE_SIZE); 2671 if (grow_amount > stack_entry->avail_ssize) { 2672 rv = KERN_NO_SPACE; 2673 goto done; 2674 } 2675 2676 /* If there is no longer enough space between the entries 2677 * nogo, and adjust the available space. Note: this 2678 * should only happen if the user has mapped into the 2679 * stack area after the stack was created, and is 2680 * probably an error. 2681 * 2682 * This also effectively destroys any guard page the user 2683 * might have intended by limiting the stack size. 2684 */ 2685 if (grow_amount > stack_entry->start - end) { 2686 if (use_read_lock && vm_map_lock_upgrade(map)) { 2687 use_read_lock = 0; 2688 goto Retry; 2689 } 2690 use_read_lock = 0; 2691 stack_entry->avail_ssize = stack_entry->start - end; 2692 rv = KERN_NO_SPACE; 2693 goto done; 2694 } 2695 2696 is_procstack = addr >= (vm_offset_t)vm->vm_maxsaddr; 2697 2698 /* If this is the main process stack, see if we're over the 2699 * stack limit. 2700 */ 2701 if (is_procstack && (ctob(vm->vm_ssize) + grow_amount > 2702 p->p_rlimit[RLIMIT_STACK].rlim_cur)) { 2703 rv = KERN_NO_SPACE; 2704 goto done; 2705 } 2706 2707 /* Round up the grow amount modulo SGROWSIZ */ 2708 grow_amount = roundup (grow_amount, sgrowsiz); 2709 if (grow_amount > stack_entry->avail_ssize) { 2710 grow_amount = stack_entry->avail_ssize; 2711 } 2712 if (is_procstack && (ctob(vm->vm_ssize) + grow_amount > 2713 p->p_rlimit[RLIMIT_STACK].rlim_cur)) { 2714 grow_amount = p->p_rlimit[RLIMIT_STACK].rlim_cur - 2715 ctob(vm->vm_ssize); 2716 } 2717 2718 /* If we would blow our VMEM resource limit, no go */ 2719 if (map->size + grow_amount > 2720 curproc->p_rlimit[RLIMIT_VMEM].rlim_cur) { 2721 rv = KERN_NO_SPACE; 2722 goto done; 2723 } 2724 2725 if (use_read_lock && vm_map_lock_upgrade(map)) { 2726 use_read_lock = 0; 2727 goto Retry; 2728 } 2729 use_read_lock = 0; 2730 2731 /* Get the preliminary new entry start value */ 2732 addr = stack_entry->start - grow_amount; 2733 2734 /* If this puts us into the previous entry, cut back our growth 2735 * to the available space. Also, see the note above. 2736 */ 2737 if (addr < end) { 2738 stack_entry->avail_ssize = stack_entry->start - end; 2739 addr = end; 2740 } 2741 2742 rv = vm_map_insert(map, NULL, 0, addr, stack_entry->start, 2743 VM_PROT_ALL, 2744 VM_PROT_ALL, 2745 0); 2746 2747 /* Adjust the available stack space by the amount we grew. */ 2748 if (rv == KERN_SUCCESS) { 2749 if (prev_entry != &map->header) 2750 vm_map_clip_end(map, prev_entry, addr); 2751 new_stack_entry = prev_entry->next; 2752 if (new_stack_entry->end != stack_entry->start || 2753 new_stack_entry->start != addr) 2754 panic ("Bad stack grow start/end in new stack entry"); 2755 else { 2756 new_stack_entry->avail_ssize = stack_entry->avail_ssize - 2757 (new_stack_entry->end - 2758 new_stack_entry->start); 2759 if (is_procstack) 2760 vm->vm_ssize += btoc(new_stack_entry->end - 2761 new_stack_entry->start); 2762 } 2763 } 2764 2765 done: 2766 if (use_read_lock) 2767 vm_map_unlock_read(map); 2768 else 2769 vm_map_unlock(map); 2770 return (rv); 2771 } 2772 2773 /* 2774 * Unshare the specified VM space for exec. If other processes are 2775 * mapped to it, then create a new one. The new vmspace is null. 2776 */ 2777 2778 void 2779 vmspace_exec(struct proc *p) { 2780 struct vmspace *oldvmspace = p->p_vmspace; 2781 struct vmspace *newvmspace; 2782 vm_map_t map = &p->p_vmspace->vm_map; 2783 2784 newvmspace = vmspace_alloc(map->min_offset, map->max_offset); 2785 bcopy(&oldvmspace->vm_startcopy, &newvmspace->vm_startcopy, 2786 (caddr_t) (newvmspace + 1) - (caddr_t) &newvmspace->vm_startcopy); 2787 /* 2788 * This code is written like this for prototype purposes. The 2789 * goal is to avoid running down the vmspace here, but let the 2790 * other process's that are still using the vmspace to finally 2791 * run it down. Even though there is little or no chance of blocking 2792 * here, it is a good idea to keep this form for future mods. 2793 */ 2794 vmspace_free(oldvmspace); 2795 p->p_vmspace = newvmspace; 2796 pmap_pinit2(vmspace_pmap(newvmspace)); 2797 if (p == curproc) 2798 pmap_activate(p); 2799 } 2800 2801 /* 2802 * Unshare the specified VM space for forcing COW. This 2803 * is called by rfork, for the (RFMEM|RFPROC) == 0 case. 2804 */ 2805 2806 void 2807 vmspace_unshare(struct proc *p) { 2808 struct vmspace *oldvmspace = p->p_vmspace; 2809 struct vmspace *newvmspace; 2810 2811 if (oldvmspace->vm_refcnt == 1) 2812 return; 2813 newvmspace = vmspace_fork(oldvmspace); 2814 vmspace_free(oldvmspace); 2815 p->p_vmspace = newvmspace; 2816 pmap_pinit2(vmspace_pmap(newvmspace)); 2817 if (p == curproc) 2818 pmap_activate(p); 2819 } 2820 2821 2822 /* 2823 * vm_map_lookup: 2824 * 2825 * Finds the VM object, offset, and 2826 * protection for a given virtual address in the 2827 * specified map, assuming a page fault of the 2828 * type specified. 2829 * 2830 * Leaves the map in question locked for read; return 2831 * values are guaranteed until a vm_map_lookup_done 2832 * call is performed. Note that the map argument 2833 * is in/out; the returned map must be used in 2834 * the call to vm_map_lookup_done. 2835 * 2836 * A handle (out_entry) is returned for use in 2837 * vm_map_lookup_done, to make that fast. 2838 * 2839 * If a lookup is requested with "write protection" 2840 * specified, the map may be changed to perform virtual 2841 * copying operations, although the data referenced will 2842 * remain the same. 2843 */ 2844 int 2845 vm_map_lookup(vm_map_t *var_map, /* IN/OUT */ 2846 vm_offset_t vaddr, 2847 vm_prot_t fault_typea, 2848 vm_map_entry_t *out_entry, /* OUT */ 2849 vm_object_t *object, /* OUT */ 2850 vm_pindex_t *pindex, /* OUT */ 2851 vm_prot_t *out_prot, /* OUT */ 2852 boolean_t *wired) /* OUT */ 2853 { 2854 vm_map_entry_t entry; 2855 vm_map_t map = *var_map; 2856 vm_prot_t prot; 2857 vm_prot_t fault_type = fault_typea; 2858 int use_read_lock = 1; 2859 int rv = KERN_SUCCESS; 2860 2861 RetryLookup: 2862 if (use_read_lock) 2863 vm_map_lock_read(map); 2864 else 2865 vm_map_lock(map); 2866 2867 /* 2868 * If the map has an interesting hint, try it before calling full 2869 * blown lookup routine. 2870 */ 2871 entry = map->hint; 2872 *out_entry = entry; 2873 2874 if ((entry == &map->header) || 2875 (vaddr < entry->start) || (vaddr >= entry->end)) { 2876 vm_map_entry_t tmp_entry; 2877 2878 /* 2879 * Entry was either not a valid hint, or the vaddr was not 2880 * contained in the entry, so do a full lookup. 2881 */ 2882 if (!vm_map_lookup_entry(map, vaddr, &tmp_entry)) { 2883 rv = KERN_INVALID_ADDRESS; 2884 goto done; 2885 } 2886 2887 entry = tmp_entry; 2888 *out_entry = entry; 2889 } 2890 2891 /* 2892 * Handle submaps. 2893 */ 2894 2895 if (entry->eflags & MAP_ENTRY_IS_SUB_MAP) { 2896 vm_map_t old_map = map; 2897 2898 *var_map = map = entry->object.sub_map; 2899 if (use_read_lock) 2900 vm_map_unlock_read(old_map); 2901 else 2902 vm_map_unlock(old_map); 2903 use_read_lock = 1; 2904 goto RetryLookup; 2905 } 2906 2907 /* 2908 * Check whether this task is allowed to have this page. 2909 * Note the special case for MAP_ENTRY_COW 2910 * pages with an override. This is to implement a forced 2911 * COW for debuggers. 2912 */ 2913 2914 if (fault_type & VM_PROT_OVERRIDE_WRITE) 2915 prot = entry->max_protection; 2916 else 2917 prot = entry->protection; 2918 2919 fault_type &= (VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE); 2920 if ((fault_type & prot) != fault_type) { 2921 rv = KERN_PROTECTION_FAILURE; 2922 goto done; 2923 } 2924 2925 if ((entry->eflags & MAP_ENTRY_USER_WIRED) && 2926 (entry->eflags & MAP_ENTRY_COW) && 2927 (fault_type & VM_PROT_WRITE) && 2928 (fault_typea & VM_PROT_OVERRIDE_WRITE) == 0) { 2929 rv = KERN_PROTECTION_FAILURE; 2930 goto done; 2931 } 2932 2933 /* 2934 * If this page is not pageable, we have to get it for all possible 2935 * accesses. 2936 */ 2937 2938 *wired = (entry->wired_count != 0); 2939 if (*wired) 2940 prot = fault_type = entry->protection; 2941 2942 /* 2943 * If the entry was copy-on-write, we either ... 2944 */ 2945 2946 if (entry->eflags & MAP_ENTRY_NEEDS_COPY) { 2947 /* 2948 * If we want to write the page, we may as well handle that 2949 * now since we've got the map locked. 2950 * 2951 * If we don't need to write the page, we just demote the 2952 * permissions allowed. 2953 */ 2954 2955 if (fault_type & VM_PROT_WRITE) { 2956 /* 2957 * Make a new object, and place it in the object 2958 * chain. Note that no new references have appeared 2959 * -- one just moved from the map to the new 2960 * object. 2961 */ 2962 2963 if (use_read_lock && vm_map_lock_upgrade(map)) { 2964 use_read_lock = 0; 2965 goto RetryLookup; 2966 } 2967 use_read_lock = 0; 2968 2969 vm_object_shadow( 2970 &entry->object.vm_object, 2971 &entry->offset, 2972 atop(entry->end - entry->start)); 2973 2974 entry->eflags &= ~MAP_ENTRY_NEEDS_COPY; 2975 } else { 2976 /* 2977 * We're attempting to read a copy-on-write page -- 2978 * don't allow writes. 2979 */ 2980 2981 prot &= ~VM_PROT_WRITE; 2982 } 2983 } 2984 2985 /* 2986 * Create an object if necessary. 2987 */ 2988 if (entry->object.vm_object == NULL && 2989 !map->system_map) { 2990 if (use_read_lock && vm_map_lock_upgrade(map)) { 2991 use_read_lock = 0; 2992 goto RetryLookup; 2993 } 2994 use_read_lock = 0; 2995 entry->object.vm_object = vm_object_allocate(OBJT_DEFAULT, 2996 atop(entry->end - entry->start)); 2997 entry->offset = 0; 2998 } 2999 3000 /* 3001 * Return the object/offset from this entry. If the entry was 3002 * copy-on-write or empty, it has been fixed up. 3003 */ 3004 3005 *pindex = OFF_TO_IDX((vaddr - entry->start) + entry->offset); 3006 *object = entry->object.vm_object; 3007 3008 /* 3009 * Return whether this is the only map sharing this data. On 3010 * success we return with a read lock held on the map. On failure 3011 * we return with the map unlocked. 3012 */ 3013 *out_prot = prot; 3014 done: 3015 if (rv == KERN_SUCCESS) { 3016 if (use_read_lock == 0) 3017 vm_map_lock_downgrade(map); 3018 } else if (use_read_lock) { 3019 vm_map_unlock_read(map); 3020 } else { 3021 vm_map_unlock(map); 3022 } 3023 return (rv); 3024 } 3025 3026 /* 3027 * vm_map_lookup_done: 3028 * 3029 * Releases locks acquired by a vm_map_lookup 3030 * (according to the handle returned by that lookup). 3031 */ 3032 3033 void 3034 vm_map_lookup_done(map, entry) 3035 vm_map_t map; 3036 vm_map_entry_t entry; 3037 { 3038 /* 3039 * Unlock the main-level map 3040 */ 3041 3042 vm_map_unlock_read(map); 3043 } 3044 3045 /* 3046 * Implement uiomove with VM operations. This handles (and collateral changes) 3047 * support every combination of source object modification, and COW type 3048 * operations. 3049 */ 3050 int 3051 vm_uiomove(mapa, srcobject, cp, cnta, uaddra, npages) 3052 vm_map_t mapa; 3053 vm_object_t srcobject; 3054 off_t cp; 3055 int cnta; 3056 vm_offset_t uaddra; 3057 int *npages; 3058 { 3059 vm_map_t map; 3060 vm_object_t first_object, oldobject, object; 3061 vm_map_entry_t entry; 3062 vm_prot_t prot; 3063 boolean_t wired; 3064 int tcnt, rv; 3065 vm_offset_t uaddr, start, end, tend; 3066 vm_pindex_t first_pindex, osize, oindex; 3067 off_t ooffset; 3068 int cnt; 3069 3070 if (npages) 3071 *npages = 0; 3072 3073 cnt = cnta; 3074 uaddr = uaddra; 3075 3076 while (cnt > 0) { 3077 map = mapa; 3078 3079 if ((vm_map_lookup(&map, uaddr, 3080 VM_PROT_READ, &entry, &first_object, 3081 &first_pindex, &prot, &wired)) != KERN_SUCCESS) { 3082 return EFAULT; 3083 } 3084 3085 vm_map_clip_start(map, entry, uaddr); 3086 3087 tcnt = cnt; 3088 tend = uaddr + tcnt; 3089 if (tend > entry->end) { 3090 tcnt = entry->end - uaddr; 3091 tend = entry->end; 3092 } 3093 3094 vm_map_clip_end(map, entry, tend); 3095 3096 start = entry->start; 3097 end = entry->end; 3098 3099 osize = atop(tcnt); 3100 3101 oindex = OFF_TO_IDX(cp); 3102 if (npages) { 3103 vm_pindex_t idx; 3104 for (idx = 0; idx < osize; idx++) { 3105 vm_page_t m; 3106 if ((m = vm_page_lookup(srcobject, oindex + idx)) == NULL) { 3107 vm_map_lookup_done(map, entry); 3108 return 0; 3109 } 3110 /* 3111 * disallow busy or invalid pages, but allow 3112 * m->busy pages if they are entirely valid. 3113 */ 3114 if ((m->flags & PG_BUSY) || 3115 ((m->valid & VM_PAGE_BITS_ALL) != VM_PAGE_BITS_ALL)) { 3116 vm_map_lookup_done(map, entry); 3117 return 0; 3118 } 3119 } 3120 } 3121 3122 /* 3123 * If we are changing an existing map entry, just redirect 3124 * the object, and change mappings. 3125 */ 3126 if ((first_object->type == OBJT_VNODE) && 3127 ((oldobject = entry->object.vm_object) == first_object)) { 3128 3129 if ((entry->offset != cp) || (oldobject != srcobject)) { 3130 /* 3131 * Remove old window into the file 3132 */ 3133 pmap_remove (map->pmap, uaddr, tend); 3134 3135 /* 3136 * Force copy on write for mmaped regions 3137 */ 3138 vm_object_pmap_copy_1 (srcobject, oindex, oindex + osize); 3139 3140 /* 3141 * Point the object appropriately 3142 */ 3143 if (oldobject != srcobject) { 3144 3145 /* 3146 * Set the object optimization hint flag 3147 */ 3148 vm_object_set_flag(srcobject, OBJ_OPT); 3149 vm_object_reference(srcobject); 3150 entry->object.vm_object = srcobject; 3151 3152 if (oldobject) { 3153 vm_object_deallocate(oldobject); 3154 } 3155 } 3156 3157 entry->offset = cp; 3158 map->timestamp++; 3159 } else { 3160 pmap_remove (map->pmap, uaddr, tend); 3161 } 3162 3163 } else if ((first_object->ref_count == 1) && 3164 (first_object->size == osize) && 3165 ((first_object->type == OBJT_DEFAULT) || 3166 (first_object->type == OBJT_SWAP)) ) { 3167 3168 oldobject = first_object->backing_object; 3169 3170 if ((first_object->backing_object_offset != cp) || 3171 (oldobject != srcobject)) { 3172 /* 3173 * Remove old window into the file 3174 */ 3175 pmap_remove (map->pmap, uaddr, tend); 3176 3177 /* 3178 * Remove unneeded old pages 3179 */ 3180 vm_object_page_remove(first_object, 0, 0, 0); 3181 3182 /* 3183 * Invalidate swap space 3184 */ 3185 if (first_object->type == OBJT_SWAP) { 3186 swap_pager_freespace(first_object, 3187 0, 3188 first_object->size); 3189 } 3190 3191 /* 3192 * Force copy on write for mmaped regions 3193 */ 3194 vm_object_pmap_copy_1 (srcobject, oindex, oindex + osize); 3195 3196 /* 3197 * Point the object appropriately 3198 */ 3199 if (oldobject != srcobject) { 3200 3201 /* 3202 * Set the object optimization hint flag 3203 */ 3204 vm_object_set_flag(srcobject, OBJ_OPT); 3205 vm_object_reference(srcobject); 3206 3207 if (oldobject) { 3208 LIST_REMOVE( 3209 first_object, shadow_list); 3210 oldobject->shadow_count--; 3211 /* XXX bump generation? */ 3212 vm_object_deallocate(oldobject); 3213 } 3214 3215 LIST_INSERT_HEAD(&srcobject->shadow_head, 3216 first_object, shadow_list); 3217 srcobject->shadow_count++; 3218 /* XXX bump generation? */ 3219 3220 first_object->backing_object = srcobject; 3221 } 3222 first_object->backing_object_offset = cp; 3223 map->timestamp++; 3224 } else { 3225 pmap_remove (map->pmap, uaddr, tend); 3226 } 3227 /* 3228 * Otherwise, we have to do a logical mmap. 3229 */ 3230 } else { 3231 3232 vm_object_set_flag(srcobject, OBJ_OPT); 3233 vm_object_reference(srcobject); 3234 3235 pmap_remove (map->pmap, uaddr, tend); 3236 3237 vm_object_pmap_copy_1 (srcobject, oindex, oindex + osize); 3238 vm_map_lock_upgrade(map); 3239 3240 if (entry == &map->header) { 3241 map->first_free = &map->header; 3242 } else if (map->first_free->start >= start) { 3243 map->first_free = entry->prev; 3244 } 3245 3246 SAVE_HINT(map, entry->prev); 3247 vm_map_entry_delete(map, entry); 3248 3249 object = srcobject; 3250 ooffset = cp; 3251 3252 rv = vm_map_insert(map, object, ooffset, start, tend, 3253 VM_PROT_ALL, VM_PROT_ALL, MAP_COPY_ON_WRITE); 3254 3255 if (rv != KERN_SUCCESS) 3256 panic("vm_uiomove: could not insert new entry: %d", rv); 3257 } 3258 3259 /* 3260 * Map the window directly, if it is already in memory 3261 */ 3262 pmap_object_init_pt(map->pmap, uaddr, 3263 srcobject, oindex, tcnt, 0); 3264 3265 map->timestamp++; 3266 vm_map_unlock(map); 3267 3268 cnt -= tcnt; 3269 uaddr += tcnt; 3270 cp += tcnt; 3271 if (npages) 3272 *npages += osize; 3273 } 3274 return 0; 3275 } 3276 3277 /* 3278 * Performs the copy_on_write operations necessary to allow the virtual copies 3279 * into user space to work. This has to be called for write(2) system calls 3280 * from other processes, file unlinking, and file size shrinkage. 3281 */ 3282 void 3283 vm_freeze_copyopts(object, froma, toa) 3284 vm_object_t object; 3285 vm_pindex_t froma, toa; 3286 { 3287 int rv; 3288 vm_object_t robject; 3289 vm_pindex_t idx; 3290 3291 if ((object == NULL) || 3292 ((object->flags & OBJ_OPT) == 0)) 3293 return; 3294 3295 if (object->shadow_count > object->ref_count) 3296 panic("vm_freeze_copyopts: sc > rc"); 3297 3298 while((robject = LIST_FIRST(&object->shadow_head)) != NULL) { 3299 vm_pindex_t bo_pindex; 3300 vm_page_t m_in, m_out; 3301 3302 bo_pindex = OFF_TO_IDX(robject->backing_object_offset); 3303 3304 vm_object_reference(robject); 3305 3306 vm_object_pip_wait(robject, "objfrz"); 3307 3308 if (robject->ref_count == 1) { 3309 vm_object_deallocate(robject); 3310 continue; 3311 } 3312 3313 vm_object_pip_add(robject, 1); 3314 3315 for (idx = 0; idx < robject->size; idx++) { 3316 3317 m_out = vm_page_grab(robject, idx, 3318 VM_ALLOC_NORMAL | VM_ALLOC_RETRY); 3319 3320 if (m_out->valid == 0) { 3321 m_in = vm_page_grab(object, bo_pindex + idx, 3322 VM_ALLOC_NORMAL | VM_ALLOC_RETRY); 3323 if (m_in->valid == 0) { 3324 rv = vm_pager_get_pages(object, &m_in, 1, 0); 3325 if (rv != VM_PAGER_OK) { 3326 printf("vm_freeze_copyopts: cannot read page from file: %lx\n", (long)m_in->pindex); 3327 continue; 3328 } 3329 vm_page_deactivate(m_in); 3330 } 3331 3332 vm_page_protect(m_in, VM_PROT_NONE); 3333 pmap_copy_page(VM_PAGE_TO_PHYS(m_in), VM_PAGE_TO_PHYS(m_out)); 3334 m_out->valid = m_in->valid; 3335 vm_page_dirty(m_out); 3336 vm_page_activate(m_out); 3337 vm_page_wakeup(m_in); 3338 } 3339 vm_page_wakeup(m_out); 3340 } 3341 3342 object->shadow_count--; 3343 object->ref_count--; 3344 LIST_REMOVE(robject, shadow_list); 3345 robject->backing_object = NULL; 3346 robject->backing_object_offset = 0; 3347 3348 vm_object_pip_wakeup(robject); 3349 vm_object_deallocate(robject); 3350 } 3351 3352 vm_object_clear_flag(object, OBJ_OPT); 3353 } 3354 3355 #include "opt_ddb.h" 3356 #ifdef DDB 3357 #include <sys/kernel.h> 3358 3359 #include <ddb/ddb.h> 3360 3361 /* 3362 * vm_map_print: [ debug ] 3363 */ 3364 DB_SHOW_COMMAND(map, vm_map_print) 3365 { 3366 static int nlines; 3367 /* XXX convert args. */ 3368 vm_map_t map = (vm_map_t)addr; 3369 boolean_t full = have_addr; 3370 3371 vm_map_entry_t entry; 3372 3373 db_iprintf("Task map %p: pmap=%p, nentries=%d, version=%u\n", 3374 (void *)map, 3375 (void *)map->pmap, map->nentries, map->timestamp); 3376 nlines++; 3377 3378 if (!full && db_indent) 3379 return; 3380 3381 db_indent += 2; 3382 for (entry = map->header.next; entry != &map->header; 3383 entry = entry->next) { 3384 db_iprintf("map entry %p: start=%p, end=%p\n", 3385 (void *)entry, (void *)entry->start, (void *)entry->end); 3386 nlines++; 3387 { 3388 static char *inheritance_name[4] = 3389 {"share", "copy", "none", "donate_copy"}; 3390 3391 db_iprintf(" prot=%x/%x/%s", 3392 entry->protection, 3393 entry->max_protection, 3394 inheritance_name[(int)(unsigned char)entry->inheritance]); 3395 if (entry->wired_count != 0) 3396 db_printf(", wired"); 3397 } 3398 if (entry->eflags & MAP_ENTRY_IS_SUB_MAP) { 3399 /* XXX no %qd in kernel. Truncate entry->offset. */ 3400 db_printf(", share=%p, offset=0x%lx\n", 3401 (void *)entry->object.sub_map, 3402 (long)entry->offset); 3403 nlines++; 3404 if ((entry->prev == &map->header) || 3405 (entry->prev->object.sub_map != 3406 entry->object.sub_map)) { 3407 db_indent += 2; 3408 vm_map_print((db_expr_t)(intptr_t) 3409 entry->object.sub_map, 3410 full, 0, (char *)0); 3411 db_indent -= 2; 3412 } 3413 } else { 3414 /* XXX no %qd in kernel. Truncate entry->offset. */ 3415 db_printf(", object=%p, offset=0x%lx", 3416 (void *)entry->object.vm_object, 3417 (long)entry->offset); 3418 if (entry->eflags & MAP_ENTRY_COW) 3419 db_printf(", copy (%s)", 3420 (entry->eflags & MAP_ENTRY_NEEDS_COPY) ? "needed" : "done"); 3421 db_printf("\n"); 3422 nlines++; 3423 3424 if ((entry->prev == &map->header) || 3425 (entry->prev->object.vm_object != 3426 entry->object.vm_object)) { 3427 db_indent += 2; 3428 vm_object_print((db_expr_t)(intptr_t) 3429 entry->object.vm_object, 3430 full, 0, (char *)0); 3431 nlines += 4; 3432 db_indent -= 2; 3433 } 3434 } 3435 } 3436 db_indent -= 2; 3437 if (db_indent == 0) 3438 nlines = 0; 3439 } 3440 3441 3442 DB_SHOW_COMMAND(procvm, procvm) 3443 { 3444 struct proc *p; 3445 3446 if (have_addr) { 3447 p = (struct proc *) addr; 3448 } else { 3449 p = curproc; 3450 } 3451 3452 db_printf("p = %p, vmspace = %p, map = %p, pmap = %p\n", 3453 (void *)p, (void *)p->p_vmspace, (void *)&p->p_vmspace->vm_map, 3454 (void *)vmspace_pmap(p->p_vmspace)); 3455 3456 vm_map_print((db_expr_t)(intptr_t)&p->p_vmspace->vm_map, 1, 0, NULL); 3457 } 3458 3459 #endif /* DDB */ 3460