1 /* 2 * Copyright (c) 1991, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * The Mach Operating System project at Carnegie-Mellon University. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by the University of 19 * California, Berkeley and its contributors. 20 * 4. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * from: @(#)vm_map.c 8.3 (Berkeley) 1/12/94 37 * 38 * 39 * Copyright (c) 1987, 1990 Carnegie-Mellon University. 40 * All rights reserved. 41 * 42 * Authors: Avadis Tevanian, Jr., Michael Wayne Young 43 * 44 * Permission to use, copy, modify and distribute this software and 45 * its documentation is hereby granted, provided that both the copyright 46 * notice and this permission notice appear in all copies of the 47 * software, derivative works or modified versions, and any portions 48 * thereof, and that both notices appear in supporting documentation. 49 * 50 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 51 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 52 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 53 * 54 * Carnegie Mellon requests users of this software to return to 55 * 56 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 57 * School of Computer Science 58 * Carnegie Mellon University 59 * Pittsburgh PA 15213-3890 60 * 61 * any improvements or extensions that they make and grant Carnegie the 62 * rights to redistribute these changes. 63 * 64 * $FreeBSD: src/sys/vm/vm_map.c,v 1.187.2.19 2003/05/27 00:47:02 alc Exp $ 65 * $DragonFly: src/sys/vm/vm_map.c,v 1.2 2003/06/17 04:29:00 dillon Exp $ 66 */ 67 68 /* 69 * Virtual memory mapping module. 70 */ 71 72 #include <sys/param.h> 73 #include <sys/systm.h> 74 #include <sys/proc.h> 75 #include <sys/vmmeter.h> 76 #include <sys/mman.h> 77 #include <sys/vnode.h> 78 #include <sys/resourcevar.h> 79 80 #include <vm/vm.h> 81 #include <vm/vm_param.h> 82 #include <sys/lock.h> 83 #include <vm/pmap.h> 84 #include <vm/vm_map.h> 85 #include <vm/vm_page.h> 86 #include <vm/vm_object.h> 87 #include <vm/vm_pager.h> 88 #include <vm/vm_kern.h> 89 #include <vm/vm_extern.h> 90 #include <vm/swap_pager.h> 91 #include <vm/vm_zone.h> 92 93 /* 94 * Virtual memory maps provide for the mapping, protection, 95 * and sharing of virtual memory objects. In addition, 96 * this module provides for an efficient virtual copy of 97 * memory from one map to another. 98 * 99 * Synchronization is required prior to most operations. 100 * 101 * Maps consist of an ordered doubly-linked list of simple 102 * entries; a single hint is used to speed up lookups. 103 * 104 * Since portions of maps are specified by start/end addresses, 105 * which may not align with existing map entries, all 106 * routines merely "clip" entries to these start/end values. 107 * [That is, an entry is split into two, bordering at a 108 * start or end value.] Note that these clippings may not 109 * always be necessary (as the two resulting entries are then 110 * not changed); however, the clipping is done for convenience. 111 * 112 * As mentioned above, virtual copy operations are performed 113 * by copying VM object references from one map to 114 * another, and then marking both regions as copy-on-write. 115 */ 116 117 /* 118 * vm_map_startup: 119 * 120 * Initialize the vm_map module. Must be called before 121 * any other vm_map routines. 122 * 123 * Map and entry structures are allocated from the general 124 * purpose memory pool with some exceptions: 125 * 126 * - The kernel map and kmem submap are allocated statically. 127 * - Kernel map entries are allocated out of a static pool. 128 * 129 * These restrictions are necessary since malloc() uses the 130 * maps and requires map entries. 131 */ 132 133 static struct vm_zone kmapentzone_store, mapentzone_store, mapzone_store; 134 static vm_zone_t mapentzone, kmapentzone, mapzone, vmspace_zone; 135 static struct vm_object kmapentobj, mapentobj, mapobj; 136 137 static struct vm_map_entry map_entry_init[MAX_MAPENT]; 138 static struct vm_map_entry kmap_entry_init[MAX_KMAPENT]; 139 static struct vm_map map_init[MAX_KMAP]; 140 141 static void _vm_map_clip_end __P((vm_map_t, vm_map_entry_t, vm_offset_t)); 142 static void _vm_map_clip_start __P((vm_map_t, vm_map_entry_t, vm_offset_t)); 143 static vm_map_entry_t vm_map_entry_create __P((vm_map_t)); 144 static void vm_map_entry_delete __P((vm_map_t, vm_map_entry_t)); 145 static void vm_map_entry_dispose __P((vm_map_t, vm_map_entry_t)); 146 static void vm_map_entry_unwire __P((vm_map_t, vm_map_entry_t)); 147 static void vm_map_copy_entry __P((vm_map_t, vm_map_t, vm_map_entry_t, 148 vm_map_entry_t)); 149 static void vm_map_split __P((vm_map_entry_t)); 150 static void vm_map_unclip_range __P((vm_map_t map, vm_map_entry_t start_entry, vm_offset_t start, vm_offset_t end, int flags)); 151 152 void 153 vm_map_startup() 154 { 155 mapzone = &mapzone_store; 156 zbootinit(mapzone, "MAP", sizeof (struct vm_map), 157 map_init, MAX_KMAP); 158 kmapentzone = &kmapentzone_store; 159 zbootinit(kmapentzone, "KMAP ENTRY", sizeof (struct vm_map_entry), 160 kmap_entry_init, MAX_KMAPENT); 161 mapentzone = &mapentzone_store; 162 zbootinit(mapentzone, "MAP ENTRY", sizeof (struct vm_map_entry), 163 map_entry_init, MAX_MAPENT); 164 } 165 166 /* 167 * Allocate a vmspace structure, including a vm_map and pmap, 168 * and initialize those structures. The refcnt is set to 1. 169 * The remaining fields must be initialized by the caller. 170 */ 171 struct vmspace * 172 vmspace_alloc(min, max) 173 vm_offset_t min, max; 174 { 175 struct vmspace *vm; 176 177 vm = zalloc(vmspace_zone); 178 vm_map_init(&vm->vm_map, min, max); 179 pmap_pinit(vmspace_pmap(vm)); 180 vm->vm_map.pmap = vmspace_pmap(vm); /* XXX */ 181 vm->vm_refcnt = 1; 182 vm->vm_shm = NULL; 183 vm->vm_exitingcnt = 0; 184 return (vm); 185 } 186 187 void 188 vm_init2(void) { 189 zinitna(kmapentzone, &kmapentobj, 190 NULL, 0, lmin((VM_MAX_KERNEL_ADDRESS - KERNBASE) / PAGE_SIZE, 191 cnt.v_page_count) / 8, ZONE_INTERRUPT, 1); 192 zinitna(mapentzone, &mapentobj, 193 NULL, 0, 0, 0, 1); 194 zinitna(mapzone, &mapobj, 195 NULL, 0, 0, 0, 1); 196 vmspace_zone = zinit("VMSPACE", sizeof (struct vmspace), 0, 0, 3); 197 pmap_init2(); 198 vm_object_init2(); 199 } 200 201 static __inline void 202 vmspace_dofree(struct vmspace *vm) 203 { 204 /* 205 * Lock the map, to wait out all other references to it. 206 * Delete all of the mappings and pages they hold, then call 207 * the pmap module to reclaim anything left. 208 */ 209 vm_map_lock(&vm->vm_map); 210 (void) vm_map_delete(&vm->vm_map, vm->vm_map.min_offset, 211 vm->vm_map.max_offset); 212 vm_map_unlock(&vm->vm_map); 213 214 pmap_release(vmspace_pmap(vm)); 215 zfree(vmspace_zone, vm); 216 } 217 218 void 219 vmspace_free(struct vmspace *vm) 220 { 221 if (vm->vm_refcnt == 0) 222 panic("vmspace_free: attempt to free already freed vmspace"); 223 224 if (--vm->vm_refcnt == 0 && vm->vm_exitingcnt == 0) 225 vmspace_dofree(vm); 226 } 227 228 void 229 vmspace_exitfree(struct proc *p) 230 { 231 struct vmspace *vm; 232 233 vm = p->p_vmspace; 234 p->p_vmspace = NULL; 235 236 /* 237 * cleanup by parent process wait()ing on exiting child. vm_refcnt 238 * may not be 0 (e.g. fork() and child exits without exec()ing). 239 * exitingcnt may increment above 0 and drop back down to zero 240 * several times while vm_refcnt is held non-zero. vm_refcnt 241 * may also increment above 0 and drop back down to zero several 242 * times while vm_exitingcnt is held non-zero. 243 * 244 * The last wait on the exiting child's vmspace will clean up 245 * the remainder of the vmspace. 246 */ 247 if (--vm->vm_exitingcnt == 0 && vm->vm_refcnt == 0) 248 vmspace_dofree(vm); 249 } 250 251 /* 252 * vmspace_swap_count() - count the approximate swap useage in pages for a 253 * vmspace. 254 * 255 * Swap useage is determined by taking the proportional swap used by 256 * VM objects backing the VM map. To make up for fractional losses, 257 * if the VM object has any swap use at all the associated map entries 258 * count for at least 1 swap page. 259 */ 260 int 261 vmspace_swap_count(struct vmspace *vmspace) 262 { 263 vm_map_t map = &vmspace->vm_map; 264 vm_map_entry_t cur; 265 int count = 0; 266 267 for (cur = map->header.next; cur != &map->header; cur = cur->next) { 268 vm_object_t object; 269 270 if ((cur->eflags & MAP_ENTRY_IS_SUB_MAP) == 0 && 271 (object = cur->object.vm_object) != NULL && 272 object->type == OBJT_SWAP 273 ) { 274 int n = (cur->end - cur->start) / PAGE_SIZE; 275 276 if (object->un_pager.swp.swp_bcount) { 277 count += object->un_pager.swp.swp_bcount * 278 SWAP_META_PAGES * n / object->size + 1; 279 } 280 } 281 } 282 return(count); 283 } 284 285 286 /* 287 * vm_map_create: 288 * 289 * Creates and returns a new empty VM map with 290 * the given physical map structure, and having 291 * the given lower and upper address bounds. 292 */ 293 vm_map_t 294 vm_map_create(pmap, min, max) 295 pmap_t pmap; 296 vm_offset_t min, max; 297 { 298 vm_map_t result; 299 300 result = zalloc(mapzone); 301 vm_map_init(result, min, max); 302 result->pmap = pmap; 303 return (result); 304 } 305 306 /* 307 * Initialize an existing vm_map structure 308 * such as that in the vmspace structure. 309 * The pmap is set elsewhere. 310 */ 311 void 312 vm_map_init(map, min, max) 313 struct vm_map *map; 314 vm_offset_t min, max; 315 { 316 map->header.next = map->header.prev = &map->header; 317 map->nentries = 0; 318 map->size = 0; 319 map->system_map = 0; 320 map->infork = 0; 321 map->min_offset = min; 322 map->max_offset = max; 323 map->first_free = &map->header; 324 map->hint = &map->header; 325 map->timestamp = 0; 326 lockinit(&map->lock, PVM, "thrd_sleep", 0, LK_NOPAUSE); 327 } 328 329 /* 330 * vm_map_entry_dispose: [ internal use only ] 331 * 332 * Inverse of vm_map_entry_create. 333 */ 334 static void 335 vm_map_entry_dispose(map, entry) 336 vm_map_t map; 337 vm_map_entry_t entry; 338 { 339 if (map->system_map || !mapentzone) 340 zfreei(kmapentzone, entry); 341 else 342 zfree(mapentzone, entry); 343 } 344 345 /* 346 * vm_map_entry_create: [ internal use only ] 347 * 348 * Allocates a VM map entry for insertion. 349 * No entry fields are filled in. This routine is 350 */ 351 static vm_map_entry_t 352 vm_map_entry_create(map) 353 vm_map_t map; 354 { 355 vm_map_entry_t new_entry; 356 357 if (map->system_map || !mapentzone) 358 new_entry = zalloci(kmapentzone); 359 else 360 new_entry = zalloc(mapentzone); 361 if (new_entry == NULL) 362 panic("vm_map_entry_create: kernel resources exhausted"); 363 return(new_entry); 364 } 365 366 /* 367 * vm_map_entry_{un,}link: 368 * 369 * Insert/remove entries from maps. 370 */ 371 static __inline void 372 vm_map_entry_link(vm_map_t map, 373 vm_map_entry_t after_where, 374 vm_map_entry_t entry) 375 { 376 map->nentries++; 377 entry->prev = after_where; 378 entry->next = after_where->next; 379 entry->next->prev = entry; 380 after_where->next = entry; 381 } 382 383 static __inline void 384 vm_map_entry_unlink(vm_map_t map, 385 vm_map_entry_t entry) 386 { 387 vm_map_entry_t prev; 388 vm_map_entry_t next; 389 390 if (entry->eflags & MAP_ENTRY_IN_TRANSITION) 391 panic("vm_map_entry_unlink: attempt to mess with locked entry! %p", entry); 392 prev = entry->prev; 393 next = entry->next; 394 next->prev = prev; 395 prev->next = next; 396 map->nentries--; 397 } 398 399 /* 400 * SAVE_HINT: 401 * 402 * Saves the specified entry as the hint for 403 * future lookups. 404 */ 405 #define SAVE_HINT(map,value) \ 406 (map)->hint = (value); 407 408 /* 409 * vm_map_lookup_entry: [ internal use only ] 410 * 411 * Finds the map entry containing (or 412 * immediately preceding) the specified address 413 * in the given map; the entry is returned 414 * in the "entry" parameter. The boolean 415 * result indicates whether the address is 416 * actually contained in the map. 417 */ 418 boolean_t 419 vm_map_lookup_entry(map, address, entry) 420 vm_map_t map; 421 vm_offset_t address; 422 vm_map_entry_t *entry; /* OUT */ 423 { 424 vm_map_entry_t cur; 425 vm_map_entry_t last; 426 427 /* 428 * Start looking either from the head of the list, or from the hint. 429 */ 430 431 cur = map->hint; 432 433 if (cur == &map->header) 434 cur = cur->next; 435 436 if (address >= cur->start) { 437 /* 438 * Go from hint to end of list. 439 * 440 * But first, make a quick check to see if we are already looking 441 * at the entry we want (which is usually the case). Note also 442 * that we don't need to save the hint here... it is the same 443 * hint (unless we are at the header, in which case the hint 444 * didn't buy us anything anyway). 445 */ 446 last = &map->header; 447 if ((cur != last) && (cur->end > address)) { 448 *entry = cur; 449 return (TRUE); 450 } 451 } else { 452 /* 453 * Go from start to hint, *inclusively* 454 */ 455 last = cur->next; 456 cur = map->header.next; 457 } 458 459 /* 460 * Search linearly 461 */ 462 463 while (cur != last) { 464 if (cur->end > address) { 465 if (address >= cur->start) { 466 /* 467 * Save this lookup for future hints, and 468 * return 469 */ 470 471 *entry = cur; 472 SAVE_HINT(map, cur); 473 return (TRUE); 474 } 475 break; 476 } 477 cur = cur->next; 478 } 479 *entry = cur->prev; 480 SAVE_HINT(map, *entry); 481 return (FALSE); 482 } 483 484 /* 485 * vm_map_insert: 486 * 487 * Inserts the given whole VM object into the target 488 * map at the specified address range. The object's 489 * size should match that of the address range. 490 * 491 * Requires that the map be locked, and leaves it so. 492 * 493 * If object is non-NULL, ref count must be bumped by caller 494 * prior to making call to account for the new entry. 495 */ 496 int 497 vm_map_insert(vm_map_t map, vm_object_t object, vm_ooffset_t offset, 498 vm_offset_t start, vm_offset_t end, vm_prot_t prot, vm_prot_t max, 499 int cow) 500 { 501 vm_map_entry_t new_entry; 502 vm_map_entry_t prev_entry; 503 vm_map_entry_t temp_entry; 504 vm_eflags_t protoeflags; 505 506 /* 507 * Check that the start and end points are not bogus. 508 */ 509 510 if ((start < map->min_offset) || (end > map->max_offset) || 511 (start >= end)) 512 return (KERN_INVALID_ADDRESS); 513 514 /* 515 * Find the entry prior to the proposed starting address; if it's part 516 * of an existing entry, this range is bogus. 517 */ 518 519 if (vm_map_lookup_entry(map, start, &temp_entry)) 520 return (KERN_NO_SPACE); 521 522 prev_entry = temp_entry; 523 524 /* 525 * Assert that the next entry doesn't overlap the end point. 526 */ 527 528 if ((prev_entry->next != &map->header) && 529 (prev_entry->next->start < end)) 530 return (KERN_NO_SPACE); 531 532 protoeflags = 0; 533 534 if (cow & MAP_COPY_ON_WRITE) 535 protoeflags |= MAP_ENTRY_COW|MAP_ENTRY_NEEDS_COPY; 536 537 if (cow & MAP_NOFAULT) { 538 protoeflags |= MAP_ENTRY_NOFAULT; 539 540 KASSERT(object == NULL, 541 ("vm_map_insert: paradoxical MAP_NOFAULT request")); 542 } 543 if (cow & MAP_DISABLE_SYNCER) 544 protoeflags |= MAP_ENTRY_NOSYNC; 545 if (cow & MAP_DISABLE_COREDUMP) 546 protoeflags |= MAP_ENTRY_NOCOREDUMP; 547 548 if (object) { 549 /* 550 * When object is non-NULL, it could be shared with another 551 * process. We have to set or clear OBJ_ONEMAPPING 552 * appropriately. 553 */ 554 if ((object->ref_count > 1) || (object->shadow_count != 0)) { 555 vm_object_clear_flag(object, OBJ_ONEMAPPING); 556 } 557 } 558 else if ((prev_entry != &map->header) && 559 (prev_entry->eflags == protoeflags) && 560 (prev_entry->end == start) && 561 (prev_entry->wired_count == 0) && 562 ((prev_entry->object.vm_object == NULL) || 563 vm_object_coalesce(prev_entry->object.vm_object, 564 OFF_TO_IDX(prev_entry->offset), 565 (vm_size_t)(prev_entry->end - prev_entry->start), 566 (vm_size_t)(end - prev_entry->end)))) { 567 /* 568 * We were able to extend the object. Determine if we 569 * can extend the previous map entry to include the 570 * new range as well. 571 */ 572 if ((prev_entry->inheritance == VM_INHERIT_DEFAULT) && 573 (prev_entry->protection == prot) && 574 (prev_entry->max_protection == max)) { 575 map->size += (end - prev_entry->end); 576 prev_entry->end = end; 577 vm_map_simplify_entry(map, prev_entry); 578 return (KERN_SUCCESS); 579 } 580 581 /* 582 * If we can extend the object but cannot extend the 583 * map entry, we have to create a new map entry. We 584 * must bump the ref count on the extended object to 585 * account for it. object may be NULL. 586 */ 587 object = prev_entry->object.vm_object; 588 offset = prev_entry->offset + 589 (prev_entry->end - prev_entry->start); 590 vm_object_reference(object); 591 } 592 593 /* 594 * NOTE: if conditionals fail, object can be NULL here. This occurs 595 * in things like the buffer map where we manage kva but do not manage 596 * backing objects. 597 */ 598 599 /* 600 * Create a new entry 601 */ 602 603 new_entry = vm_map_entry_create(map); 604 new_entry->start = start; 605 new_entry->end = end; 606 607 new_entry->eflags = protoeflags; 608 new_entry->object.vm_object = object; 609 new_entry->offset = offset; 610 new_entry->avail_ssize = 0; 611 612 new_entry->inheritance = VM_INHERIT_DEFAULT; 613 new_entry->protection = prot; 614 new_entry->max_protection = max; 615 new_entry->wired_count = 0; 616 617 /* 618 * Insert the new entry into the list 619 */ 620 621 vm_map_entry_link(map, prev_entry, new_entry); 622 map->size += new_entry->end - new_entry->start; 623 624 /* 625 * Update the free space hint 626 */ 627 if ((map->first_free == prev_entry) && 628 (prev_entry->end >= new_entry->start)) { 629 map->first_free = new_entry; 630 } 631 632 #if 0 633 /* 634 * Temporarily removed to avoid MAP_STACK panic, due to 635 * MAP_STACK being a huge hack. Will be added back in 636 * when MAP_STACK (and the user stack mapping) is fixed. 637 */ 638 /* 639 * It may be possible to simplify the entry 640 */ 641 vm_map_simplify_entry(map, new_entry); 642 #endif 643 644 if (cow & (MAP_PREFAULT|MAP_PREFAULT_PARTIAL)) { 645 pmap_object_init_pt(map->pmap, start, 646 object, OFF_TO_IDX(offset), end - start, 647 cow & MAP_PREFAULT_PARTIAL); 648 } 649 650 return (KERN_SUCCESS); 651 } 652 653 /* 654 * Find sufficient space for `length' bytes in the given map, starting at 655 * `start'. The map must be locked. Returns 0 on success, 1 on no space. 656 */ 657 int 658 vm_map_findspace(map, start, length, addr) 659 vm_map_t map; 660 vm_offset_t start; 661 vm_size_t length; 662 vm_offset_t *addr; 663 { 664 vm_map_entry_t entry, next; 665 vm_offset_t end; 666 667 if (start < map->min_offset) 668 start = map->min_offset; 669 if (start > map->max_offset) 670 return (1); 671 672 /* 673 * Look for the first possible address; if there's already something 674 * at this address, we have to start after it. 675 */ 676 if (start == map->min_offset) { 677 if ((entry = map->first_free) != &map->header) 678 start = entry->end; 679 } else { 680 vm_map_entry_t tmp; 681 682 if (vm_map_lookup_entry(map, start, &tmp)) 683 start = tmp->end; 684 entry = tmp; 685 } 686 687 /* 688 * Look through the rest of the map, trying to fit a new region in the 689 * gap between existing regions, or after the very last region. 690 */ 691 for (;; start = (entry = next)->end) { 692 /* 693 * Find the end of the proposed new region. Be sure we didn't 694 * go beyond the end of the map, or wrap around the address; 695 * if so, we lose. Otherwise, if this is the last entry, or 696 * if the proposed new region fits before the next entry, we 697 * win. 698 */ 699 end = start + length; 700 if (end > map->max_offset || end < start) 701 return (1); 702 next = entry->next; 703 if (next == &map->header || next->start >= end) 704 break; 705 } 706 SAVE_HINT(map, entry); 707 *addr = start; 708 if (map == kernel_map) { 709 vm_offset_t ksize; 710 if ((ksize = round_page(start + length)) > kernel_vm_end) { 711 pmap_growkernel(ksize); 712 } 713 } 714 return (0); 715 } 716 717 /* 718 * vm_map_find finds an unallocated region in the target address 719 * map with the given length. The search is defined to be 720 * first-fit from the specified address; the region found is 721 * returned in the same parameter. 722 * 723 * If object is non-NULL, ref count must be bumped by caller 724 * prior to making call to account for the new entry. 725 */ 726 int 727 vm_map_find(vm_map_t map, vm_object_t object, vm_ooffset_t offset, 728 vm_offset_t *addr, /* IN/OUT */ 729 vm_size_t length, boolean_t find_space, vm_prot_t prot, 730 vm_prot_t max, int cow) 731 { 732 vm_offset_t start; 733 int result, s = 0; 734 735 start = *addr; 736 737 if (map == kmem_map || map == mb_map) 738 s = splvm(); 739 740 vm_map_lock(map); 741 if (find_space) { 742 if (vm_map_findspace(map, start, length, addr)) { 743 vm_map_unlock(map); 744 if (map == kmem_map || map == mb_map) 745 splx(s); 746 return (KERN_NO_SPACE); 747 } 748 start = *addr; 749 } 750 result = vm_map_insert(map, object, offset, 751 start, start + length, prot, max, cow); 752 vm_map_unlock(map); 753 754 if (map == kmem_map || map == mb_map) 755 splx(s); 756 757 return (result); 758 } 759 760 /* 761 * vm_map_simplify_entry: 762 * 763 * Simplify the given map entry by merging with either neighbor. This 764 * routine also has the ability to merge with both neighbors. 765 * 766 * The map must be locked. 767 * 768 * This routine guarentees that the passed entry remains valid (though 769 * possibly extended). When merging, this routine may delete one or 770 * both neighbors. No action is taken on entries which have their 771 * in-transition flag set. 772 */ 773 void 774 vm_map_simplify_entry(map, entry) 775 vm_map_t map; 776 vm_map_entry_t entry; 777 { 778 vm_map_entry_t next, prev; 779 vm_size_t prevsize, esize; 780 781 if (entry->eflags & (MAP_ENTRY_IN_TRANSITION | MAP_ENTRY_IS_SUB_MAP)) { 782 ++cnt.v_intrans_coll; 783 return; 784 } 785 786 prev = entry->prev; 787 if (prev != &map->header) { 788 prevsize = prev->end - prev->start; 789 if ( (prev->end == entry->start) && 790 (prev->object.vm_object == entry->object.vm_object) && 791 (!prev->object.vm_object || 792 (prev->offset + prevsize == entry->offset)) && 793 (prev->eflags == entry->eflags) && 794 (prev->protection == entry->protection) && 795 (prev->max_protection == entry->max_protection) && 796 (prev->inheritance == entry->inheritance) && 797 (prev->wired_count == entry->wired_count)) { 798 if (map->first_free == prev) 799 map->first_free = entry; 800 if (map->hint == prev) 801 map->hint = entry; 802 vm_map_entry_unlink(map, prev); 803 entry->start = prev->start; 804 entry->offset = prev->offset; 805 if (prev->object.vm_object) 806 vm_object_deallocate(prev->object.vm_object); 807 vm_map_entry_dispose(map, prev); 808 } 809 } 810 811 next = entry->next; 812 if (next != &map->header) { 813 esize = entry->end - entry->start; 814 if ((entry->end == next->start) && 815 (next->object.vm_object == entry->object.vm_object) && 816 (!entry->object.vm_object || 817 (entry->offset + esize == next->offset)) && 818 (next->eflags == entry->eflags) && 819 (next->protection == entry->protection) && 820 (next->max_protection == entry->max_protection) && 821 (next->inheritance == entry->inheritance) && 822 (next->wired_count == entry->wired_count)) { 823 if (map->first_free == next) 824 map->first_free = entry; 825 if (map->hint == next) 826 map->hint = entry; 827 vm_map_entry_unlink(map, next); 828 entry->end = next->end; 829 if (next->object.vm_object) 830 vm_object_deallocate(next->object.vm_object); 831 vm_map_entry_dispose(map, next); 832 } 833 } 834 } 835 /* 836 * vm_map_clip_start: [ internal use only ] 837 * 838 * Asserts that the given entry begins at or after 839 * the specified address; if necessary, 840 * it splits the entry into two. 841 */ 842 #define vm_map_clip_start(map, entry, startaddr) \ 843 { \ 844 if (startaddr > entry->start) \ 845 _vm_map_clip_start(map, entry, startaddr); \ 846 } 847 848 /* 849 * This routine is called only when it is known that 850 * the entry must be split. 851 */ 852 static void 853 _vm_map_clip_start(map, entry, start) 854 vm_map_t map; 855 vm_map_entry_t entry; 856 vm_offset_t start; 857 { 858 vm_map_entry_t new_entry; 859 860 /* 861 * Split off the front portion -- note that we must insert the new 862 * entry BEFORE this one, so that this entry has the specified 863 * starting address. 864 */ 865 866 vm_map_simplify_entry(map, entry); 867 868 /* 869 * If there is no object backing this entry, we might as well create 870 * one now. If we defer it, an object can get created after the map 871 * is clipped, and individual objects will be created for the split-up 872 * map. This is a bit of a hack, but is also about the best place to 873 * put this improvement. 874 */ 875 876 if (entry->object.vm_object == NULL && !map->system_map) { 877 vm_object_t object; 878 object = vm_object_allocate(OBJT_DEFAULT, 879 atop(entry->end - entry->start)); 880 entry->object.vm_object = object; 881 entry->offset = 0; 882 } 883 884 new_entry = vm_map_entry_create(map); 885 *new_entry = *entry; 886 887 new_entry->end = start; 888 entry->offset += (start - entry->start); 889 entry->start = start; 890 891 vm_map_entry_link(map, entry->prev, new_entry); 892 893 if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) { 894 vm_object_reference(new_entry->object.vm_object); 895 } 896 } 897 898 /* 899 * vm_map_clip_end: [ internal use only ] 900 * 901 * Asserts that the given entry ends at or before 902 * the specified address; if necessary, 903 * it splits the entry into two. 904 */ 905 906 #define vm_map_clip_end(map, entry, endaddr) \ 907 { \ 908 if (endaddr < entry->end) \ 909 _vm_map_clip_end(map, entry, endaddr); \ 910 } 911 912 /* 913 * This routine is called only when it is known that 914 * the entry must be split. 915 */ 916 static void 917 _vm_map_clip_end(map, entry, end) 918 vm_map_t map; 919 vm_map_entry_t entry; 920 vm_offset_t end; 921 { 922 vm_map_entry_t new_entry; 923 924 /* 925 * If there is no object backing this entry, we might as well create 926 * one now. If we defer it, an object can get created after the map 927 * is clipped, and individual objects will be created for the split-up 928 * map. This is a bit of a hack, but is also about the best place to 929 * put this improvement. 930 */ 931 932 if (entry->object.vm_object == NULL && !map->system_map) { 933 vm_object_t object; 934 object = vm_object_allocate(OBJT_DEFAULT, 935 atop(entry->end - entry->start)); 936 entry->object.vm_object = object; 937 entry->offset = 0; 938 } 939 940 /* 941 * Create a new entry and insert it AFTER the specified entry 942 */ 943 944 new_entry = vm_map_entry_create(map); 945 *new_entry = *entry; 946 947 new_entry->start = entry->end = end; 948 new_entry->offset += (end - entry->start); 949 950 vm_map_entry_link(map, entry, new_entry); 951 952 if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) { 953 vm_object_reference(new_entry->object.vm_object); 954 } 955 } 956 957 /* 958 * VM_MAP_RANGE_CHECK: [ internal use only ] 959 * 960 * Asserts that the starting and ending region 961 * addresses fall within the valid range of the map. 962 */ 963 #define VM_MAP_RANGE_CHECK(map, start, end) \ 964 { \ 965 if (start < vm_map_min(map)) \ 966 start = vm_map_min(map); \ 967 if (end > vm_map_max(map)) \ 968 end = vm_map_max(map); \ 969 if (start > end) \ 970 start = end; \ 971 } 972 973 /* 974 * vm_map_transition_wait: [ kernel use only ] 975 * 976 * Used to block when an in-transition collison occurs. The map 977 * is unlocked for the sleep and relocked before the return. 978 */ 979 static 980 void 981 vm_map_transition_wait(vm_map_t map) 982 { 983 vm_map_unlock(map); 984 tsleep(map, PVM, "vment", 0); 985 vm_map_lock(map); 986 } 987 988 /* 989 * CLIP_CHECK_BACK 990 * CLIP_CHECK_FWD 991 * 992 * When we do blocking operations with the map lock held it is 993 * possible that a clip might have occured on our in-transit entry, 994 * requiring an adjustment to the entry in our loop. These macros 995 * help the pageable and clip_range code deal with the case. The 996 * conditional costs virtually nothing if no clipping has occured. 997 */ 998 999 #define CLIP_CHECK_BACK(entry, save_start) \ 1000 do { \ 1001 while (entry->start != save_start) { \ 1002 entry = entry->prev; \ 1003 KASSERT(entry != &map->header, ("bad entry clip")); \ 1004 } \ 1005 } while(0) 1006 1007 #define CLIP_CHECK_FWD(entry, save_end) \ 1008 do { \ 1009 while (entry->end != save_end) { \ 1010 entry = entry->next; \ 1011 KASSERT(entry != &map->header, ("bad entry clip")); \ 1012 } \ 1013 } while(0) 1014 1015 1016 /* 1017 * vm_map_clip_range: [ kernel use only ] 1018 * 1019 * Clip the specified range and return the base entry. The 1020 * range may cover several entries starting at the returned base 1021 * and the first and last entry in the covering sequence will be 1022 * properly clipped to the requested start and end address. 1023 * 1024 * If no holes are allowed you should pass the MAP_CLIP_NO_HOLES 1025 * flag. 1026 * 1027 * The MAP_ENTRY_IN_TRANSITION flag will be set for the entries 1028 * covered by the requested range. 1029 * 1030 * The map must be exclusively locked on entry and will remain locked 1031 * on return. If no range exists or the range contains holes and you 1032 * specified that no holes were allowed, NULL will be returned. This 1033 * routine may temporarily unlock the map in order avoid a deadlock when 1034 * sleeping. 1035 */ 1036 static 1037 vm_map_entry_t 1038 vm_map_clip_range(vm_map_t map, vm_offset_t start, vm_offset_t end, int flags) 1039 { 1040 vm_map_entry_t start_entry; 1041 vm_map_entry_t entry; 1042 1043 /* 1044 * Locate the entry and effect initial clipping. The in-transition 1045 * case does not occur very often so do not try to optimize it. 1046 */ 1047 again: 1048 if (vm_map_lookup_entry(map, start, &start_entry) == FALSE) 1049 return (NULL); 1050 entry = start_entry; 1051 if (entry->eflags & MAP_ENTRY_IN_TRANSITION) { 1052 entry->eflags |= MAP_ENTRY_NEEDS_WAKEUP; 1053 ++cnt.v_intrans_coll; 1054 ++cnt.v_intrans_wait; 1055 vm_map_transition_wait(map); 1056 /* 1057 * entry and/or start_entry may have been clipped while 1058 * we slept, or may have gone away entirely. We have 1059 * to restart from the lookup. 1060 */ 1061 goto again; 1062 } 1063 /* 1064 * Since we hold an exclusive map lock we do not have to restart 1065 * after clipping, even though clipping may block in zalloc. 1066 */ 1067 vm_map_clip_start(map, entry, start); 1068 vm_map_clip_end(map, entry, end); 1069 entry->eflags |= MAP_ENTRY_IN_TRANSITION; 1070 1071 /* 1072 * Scan entries covered by the range. When working on the next 1073 * entry a restart need only re-loop on the current entry which 1074 * we have already locked, since 'next' may have changed. Also, 1075 * even though entry is safe, it may have been clipped so we 1076 * have to iterate forwards through the clip after sleeping. 1077 */ 1078 while (entry->next != &map->header && entry->next->start < end) { 1079 vm_map_entry_t next = entry->next; 1080 1081 if (flags & MAP_CLIP_NO_HOLES) { 1082 if (next->start > entry->end) { 1083 vm_map_unclip_range(map, start_entry, 1084 start, entry->end, flags); 1085 return(NULL); 1086 } 1087 } 1088 1089 if (next->eflags & MAP_ENTRY_IN_TRANSITION) { 1090 vm_offset_t save_end = entry->end; 1091 next->eflags |= MAP_ENTRY_NEEDS_WAKEUP; 1092 ++cnt.v_intrans_coll; 1093 ++cnt.v_intrans_wait; 1094 vm_map_transition_wait(map); 1095 1096 /* 1097 * clips might have occured while we blocked. 1098 */ 1099 CLIP_CHECK_FWD(entry, save_end); 1100 CLIP_CHECK_BACK(start_entry, start); 1101 continue; 1102 } 1103 /* 1104 * No restart necessary even though clip_end may block, we 1105 * are holding the map lock. 1106 */ 1107 vm_map_clip_end(map, next, end); 1108 next->eflags |= MAP_ENTRY_IN_TRANSITION; 1109 entry = next; 1110 } 1111 if (flags & MAP_CLIP_NO_HOLES) { 1112 if (entry->end != end) { 1113 vm_map_unclip_range(map, start_entry, 1114 start, entry->end, flags); 1115 return(NULL); 1116 } 1117 } 1118 return(start_entry); 1119 } 1120 1121 /* 1122 * vm_map_unclip_range: [ kernel use only ] 1123 * 1124 * Undo the effect of vm_map_clip_range(). You should pass the same 1125 * flags and the same range that you passed to vm_map_clip_range(). 1126 * This code will clear the in-transition flag on the entries and 1127 * wake up anyone waiting. This code will also simplify the sequence 1128 * and attempt to merge it with entries before and after the sequence. 1129 * 1130 * The map must be locked on entry and will remain locked on return. 1131 * 1132 * Note that you should also pass the start_entry returned by 1133 * vm_map_clip_range(). However, if you block between the two calls 1134 * with the map unlocked please be aware that the start_entry may 1135 * have been clipped and you may need to scan it backwards to find 1136 * the entry corresponding with the original start address. You are 1137 * responsible for this, vm_map_unclip_range() expects the correct 1138 * start_entry to be passed to it and will KASSERT otherwise. 1139 */ 1140 static 1141 void 1142 vm_map_unclip_range( 1143 vm_map_t map, 1144 vm_map_entry_t start_entry, 1145 vm_offset_t start, 1146 vm_offset_t end, 1147 int flags) 1148 { 1149 vm_map_entry_t entry; 1150 1151 entry = start_entry; 1152 1153 KASSERT(entry->start == start, ("unclip_range: illegal base entry")); 1154 while (entry != &map->header && entry->start < end) { 1155 KASSERT(entry->eflags & MAP_ENTRY_IN_TRANSITION, ("in-transition flag not set during unclip on: %p", entry)); 1156 KASSERT(entry->end <= end, ("unclip_range: tail wasn't clipped")); 1157 entry->eflags &= ~MAP_ENTRY_IN_TRANSITION; 1158 if (entry->eflags & MAP_ENTRY_NEEDS_WAKEUP) { 1159 entry->eflags &= ~MAP_ENTRY_NEEDS_WAKEUP; 1160 wakeup(map); 1161 } 1162 entry = entry->next; 1163 } 1164 1165 /* 1166 * Simplification does not block so there is no restart case. 1167 */ 1168 entry = start_entry; 1169 while (entry != &map->header && entry->start < end) { 1170 vm_map_simplify_entry(map, entry); 1171 entry = entry->next; 1172 } 1173 } 1174 1175 /* 1176 * vm_map_submap: [ kernel use only ] 1177 * 1178 * Mark the given range as handled by a subordinate map. 1179 * 1180 * This range must have been created with vm_map_find, 1181 * and no other operations may have been performed on this 1182 * range prior to calling vm_map_submap. 1183 * 1184 * Only a limited number of operations can be performed 1185 * within this rage after calling vm_map_submap: 1186 * vm_fault 1187 * [Don't try vm_map_copy!] 1188 * 1189 * To remove a submapping, one must first remove the 1190 * range from the superior map, and then destroy the 1191 * submap (if desired). [Better yet, don't try it.] 1192 */ 1193 int 1194 vm_map_submap(map, start, end, submap) 1195 vm_map_t map; 1196 vm_offset_t start; 1197 vm_offset_t end; 1198 vm_map_t submap; 1199 { 1200 vm_map_entry_t entry; 1201 int result = KERN_INVALID_ARGUMENT; 1202 1203 vm_map_lock(map); 1204 1205 VM_MAP_RANGE_CHECK(map, start, end); 1206 1207 if (vm_map_lookup_entry(map, start, &entry)) { 1208 vm_map_clip_start(map, entry, start); 1209 } else { 1210 entry = entry->next; 1211 } 1212 1213 vm_map_clip_end(map, entry, end); 1214 1215 if ((entry->start == start) && (entry->end == end) && 1216 ((entry->eflags & MAP_ENTRY_COW) == 0) && 1217 (entry->object.vm_object == NULL)) { 1218 entry->object.sub_map = submap; 1219 entry->eflags |= MAP_ENTRY_IS_SUB_MAP; 1220 result = KERN_SUCCESS; 1221 } 1222 vm_map_unlock(map); 1223 1224 return (result); 1225 } 1226 1227 /* 1228 * vm_map_protect: 1229 * 1230 * Sets the protection of the specified address 1231 * region in the target map. If "set_max" is 1232 * specified, the maximum protection is to be set; 1233 * otherwise, only the current protection is affected. 1234 */ 1235 int 1236 vm_map_protect(vm_map_t map, vm_offset_t start, vm_offset_t end, 1237 vm_prot_t new_prot, boolean_t set_max) 1238 { 1239 vm_map_entry_t current; 1240 vm_map_entry_t entry; 1241 1242 vm_map_lock(map); 1243 1244 VM_MAP_RANGE_CHECK(map, start, end); 1245 1246 if (vm_map_lookup_entry(map, start, &entry)) { 1247 vm_map_clip_start(map, entry, start); 1248 } else { 1249 entry = entry->next; 1250 } 1251 1252 /* 1253 * Make a first pass to check for protection violations. 1254 */ 1255 1256 current = entry; 1257 while ((current != &map->header) && (current->start < end)) { 1258 if (current->eflags & MAP_ENTRY_IS_SUB_MAP) { 1259 vm_map_unlock(map); 1260 return (KERN_INVALID_ARGUMENT); 1261 } 1262 if ((new_prot & current->max_protection) != new_prot) { 1263 vm_map_unlock(map); 1264 return (KERN_PROTECTION_FAILURE); 1265 } 1266 current = current->next; 1267 } 1268 1269 /* 1270 * Go back and fix up protections. [Note that clipping is not 1271 * necessary the second time.] 1272 */ 1273 1274 current = entry; 1275 1276 while ((current != &map->header) && (current->start < end)) { 1277 vm_prot_t old_prot; 1278 1279 vm_map_clip_end(map, current, end); 1280 1281 old_prot = current->protection; 1282 if (set_max) 1283 current->protection = 1284 (current->max_protection = new_prot) & 1285 old_prot; 1286 else 1287 current->protection = new_prot; 1288 1289 /* 1290 * Update physical map if necessary. Worry about copy-on-write 1291 * here -- CHECK THIS XXX 1292 */ 1293 1294 if (current->protection != old_prot) { 1295 #define MASK(entry) (((entry)->eflags & MAP_ENTRY_COW) ? ~VM_PROT_WRITE : \ 1296 VM_PROT_ALL) 1297 1298 pmap_protect(map->pmap, current->start, 1299 current->end, 1300 current->protection & MASK(current)); 1301 #undef MASK 1302 } 1303 1304 vm_map_simplify_entry(map, current); 1305 1306 current = current->next; 1307 } 1308 1309 vm_map_unlock(map); 1310 return (KERN_SUCCESS); 1311 } 1312 1313 /* 1314 * vm_map_madvise: 1315 * 1316 * This routine traverses a processes map handling the madvise 1317 * system call. Advisories are classified as either those effecting 1318 * the vm_map_entry structure, or those effecting the underlying 1319 * objects. 1320 */ 1321 1322 int 1323 vm_map_madvise(map, start, end, behav) 1324 vm_map_t map; 1325 vm_offset_t start, end; 1326 int behav; 1327 { 1328 vm_map_entry_t current, entry; 1329 int modify_map = 0; 1330 1331 /* 1332 * Some madvise calls directly modify the vm_map_entry, in which case 1333 * we need to use an exclusive lock on the map and we need to perform 1334 * various clipping operations. Otherwise we only need a read-lock 1335 * on the map. 1336 */ 1337 1338 switch(behav) { 1339 case MADV_NORMAL: 1340 case MADV_SEQUENTIAL: 1341 case MADV_RANDOM: 1342 case MADV_NOSYNC: 1343 case MADV_AUTOSYNC: 1344 case MADV_NOCORE: 1345 case MADV_CORE: 1346 modify_map = 1; 1347 vm_map_lock(map); 1348 break; 1349 case MADV_WILLNEED: 1350 case MADV_DONTNEED: 1351 case MADV_FREE: 1352 vm_map_lock_read(map); 1353 break; 1354 default: 1355 return (KERN_INVALID_ARGUMENT); 1356 } 1357 1358 /* 1359 * Locate starting entry and clip if necessary. 1360 */ 1361 1362 VM_MAP_RANGE_CHECK(map, start, end); 1363 1364 if (vm_map_lookup_entry(map, start, &entry)) { 1365 if (modify_map) 1366 vm_map_clip_start(map, entry, start); 1367 } else { 1368 entry = entry->next; 1369 } 1370 1371 if (modify_map) { 1372 /* 1373 * madvise behaviors that are implemented in the vm_map_entry. 1374 * 1375 * We clip the vm_map_entry so that behavioral changes are 1376 * limited to the specified address range. 1377 */ 1378 for (current = entry; 1379 (current != &map->header) && (current->start < end); 1380 current = current->next 1381 ) { 1382 if (current->eflags & MAP_ENTRY_IS_SUB_MAP) 1383 continue; 1384 1385 vm_map_clip_end(map, current, end); 1386 1387 switch (behav) { 1388 case MADV_NORMAL: 1389 vm_map_entry_set_behavior(current, MAP_ENTRY_BEHAV_NORMAL); 1390 break; 1391 case MADV_SEQUENTIAL: 1392 vm_map_entry_set_behavior(current, MAP_ENTRY_BEHAV_SEQUENTIAL); 1393 break; 1394 case MADV_RANDOM: 1395 vm_map_entry_set_behavior(current, MAP_ENTRY_BEHAV_RANDOM); 1396 break; 1397 case MADV_NOSYNC: 1398 current->eflags |= MAP_ENTRY_NOSYNC; 1399 break; 1400 case MADV_AUTOSYNC: 1401 current->eflags &= ~MAP_ENTRY_NOSYNC; 1402 break; 1403 case MADV_NOCORE: 1404 current->eflags |= MAP_ENTRY_NOCOREDUMP; 1405 break; 1406 case MADV_CORE: 1407 current->eflags &= ~MAP_ENTRY_NOCOREDUMP; 1408 break; 1409 default: 1410 break; 1411 } 1412 vm_map_simplify_entry(map, current); 1413 } 1414 vm_map_unlock(map); 1415 } else { 1416 vm_pindex_t pindex; 1417 int count; 1418 1419 /* 1420 * madvise behaviors that are implemented in the underlying 1421 * vm_object. 1422 * 1423 * Since we don't clip the vm_map_entry, we have to clip 1424 * the vm_object pindex and count. 1425 */ 1426 for (current = entry; 1427 (current != &map->header) && (current->start < end); 1428 current = current->next 1429 ) { 1430 vm_offset_t useStart; 1431 1432 if (current->eflags & MAP_ENTRY_IS_SUB_MAP) 1433 continue; 1434 1435 pindex = OFF_TO_IDX(current->offset); 1436 count = atop(current->end - current->start); 1437 useStart = current->start; 1438 1439 if (current->start < start) { 1440 pindex += atop(start - current->start); 1441 count -= atop(start - current->start); 1442 useStart = start; 1443 } 1444 if (current->end > end) 1445 count -= atop(current->end - end); 1446 1447 if (count <= 0) 1448 continue; 1449 1450 vm_object_madvise(current->object.vm_object, 1451 pindex, count, behav); 1452 if (behav == MADV_WILLNEED) { 1453 pmap_object_init_pt( 1454 map->pmap, 1455 useStart, 1456 current->object.vm_object, 1457 pindex, 1458 (count << PAGE_SHIFT), 1459 MAP_PREFAULT_MADVISE 1460 ); 1461 } 1462 } 1463 vm_map_unlock_read(map); 1464 } 1465 return(0); 1466 } 1467 1468 1469 /* 1470 * vm_map_inherit: 1471 * 1472 * Sets the inheritance of the specified address 1473 * range in the target map. Inheritance 1474 * affects how the map will be shared with 1475 * child maps at the time of vm_map_fork. 1476 */ 1477 int 1478 vm_map_inherit(vm_map_t map, vm_offset_t start, vm_offset_t end, 1479 vm_inherit_t new_inheritance) 1480 { 1481 vm_map_entry_t entry; 1482 vm_map_entry_t temp_entry; 1483 1484 switch (new_inheritance) { 1485 case VM_INHERIT_NONE: 1486 case VM_INHERIT_COPY: 1487 case VM_INHERIT_SHARE: 1488 break; 1489 default: 1490 return (KERN_INVALID_ARGUMENT); 1491 } 1492 1493 vm_map_lock(map); 1494 1495 VM_MAP_RANGE_CHECK(map, start, end); 1496 1497 if (vm_map_lookup_entry(map, start, &temp_entry)) { 1498 entry = temp_entry; 1499 vm_map_clip_start(map, entry, start); 1500 } else 1501 entry = temp_entry->next; 1502 1503 while ((entry != &map->header) && (entry->start < end)) { 1504 vm_map_clip_end(map, entry, end); 1505 1506 entry->inheritance = new_inheritance; 1507 1508 vm_map_simplify_entry(map, entry); 1509 1510 entry = entry->next; 1511 } 1512 1513 vm_map_unlock(map); 1514 return (KERN_SUCCESS); 1515 } 1516 1517 /* 1518 * Implement the semantics of mlock 1519 */ 1520 int 1521 vm_map_user_pageable(map, start, real_end, new_pageable) 1522 vm_map_t map; 1523 vm_offset_t start; 1524 vm_offset_t real_end; 1525 boolean_t new_pageable; 1526 { 1527 vm_map_entry_t entry; 1528 vm_map_entry_t start_entry; 1529 vm_offset_t end; 1530 int rv = KERN_SUCCESS; 1531 1532 vm_map_lock(map); 1533 VM_MAP_RANGE_CHECK(map, start, real_end); 1534 end = real_end; 1535 1536 start_entry = vm_map_clip_range(map, start, end, MAP_CLIP_NO_HOLES); 1537 if (start_entry == NULL) { 1538 vm_map_unlock(map); 1539 return (KERN_INVALID_ADDRESS); 1540 } 1541 1542 if (new_pageable == 0) { 1543 entry = start_entry; 1544 while ((entry != &map->header) && (entry->start < end)) { 1545 vm_offset_t save_start; 1546 vm_offset_t save_end; 1547 1548 /* 1549 * Already user wired or hard wired (trivial cases) 1550 */ 1551 if (entry->eflags & MAP_ENTRY_USER_WIRED) { 1552 entry = entry->next; 1553 continue; 1554 } 1555 if (entry->wired_count != 0) { 1556 entry->wired_count++; 1557 entry->eflags |= MAP_ENTRY_USER_WIRED; 1558 entry = entry->next; 1559 continue; 1560 } 1561 1562 /* 1563 * A new wiring requires instantiation of appropriate 1564 * management structures and the faulting in of the 1565 * page. 1566 */ 1567 if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) { 1568 int copyflag = entry->eflags & MAP_ENTRY_NEEDS_COPY; 1569 if (copyflag && ((entry->protection & VM_PROT_WRITE) != 0)) { 1570 1571 vm_object_shadow(&entry->object.vm_object, 1572 &entry->offset, 1573 atop(entry->end - entry->start)); 1574 entry->eflags &= ~MAP_ENTRY_NEEDS_COPY; 1575 1576 } else if (entry->object.vm_object == NULL && 1577 !map->system_map) { 1578 1579 entry->object.vm_object = 1580 vm_object_allocate(OBJT_DEFAULT, 1581 atop(entry->end - entry->start)); 1582 entry->offset = (vm_offset_t) 0; 1583 1584 } 1585 } 1586 entry->wired_count++; 1587 entry->eflags |= MAP_ENTRY_USER_WIRED; 1588 1589 /* 1590 * Now fault in the area. The map lock needs to be 1591 * manipulated to avoid deadlocks. The in-transition 1592 * flag protects the entries. 1593 */ 1594 save_start = entry->start; 1595 save_end = entry->end; 1596 vm_map_unlock(map); 1597 map->timestamp++; 1598 rv = vm_fault_user_wire(map, save_start, save_end); 1599 vm_map_lock(map); 1600 if (rv) { 1601 CLIP_CHECK_BACK(entry, save_start); 1602 for (;;) { 1603 KASSERT(entry->wired_count == 1, ("bad wired_count on entry")); 1604 entry->eflags &= ~MAP_ENTRY_USER_WIRED; 1605 entry->wired_count = 0; 1606 if (entry->end == save_end) 1607 break; 1608 entry = entry->next; 1609 KASSERT(entry != &map->header, ("bad entry clip during backout")); 1610 } 1611 end = save_start; /* unwire the rest */ 1612 break; 1613 } 1614 /* 1615 * note that even though the entry might have been 1616 * clipped, the USER_WIRED flag we set prevents 1617 * duplication so we do not have to do a 1618 * clip check. 1619 */ 1620 entry = entry->next; 1621 } 1622 1623 /* 1624 * If we failed fall through to the unwiring section to 1625 * unwire what we had wired so far. 'end' has already 1626 * been adjusted. 1627 */ 1628 if (rv) 1629 new_pageable = 1; 1630 1631 /* 1632 * start_entry might have been clipped if we unlocked the 1633 * map and blocked. No matter how clipped it has gotten 1634 * there should be a fragment that is on our start boundary. 1635 */ 1636 CLIP_CHECK_BACK(start_entry, start); 1637 } 1638 1639 /* 1640 * Deal with the unwiring case. 1641 */ 1642 if (new_pageable) { 1643 /* 1644 * This is the unwiring case. We must first ensure that the 1645 * range to be unwired is really wired down. We know there 1646 * are no holes. 1647 */ 1648 entry = start_entry; 1649 while ((entry != &map->header) && (entry->start < end)) { 1650 if ((entry->eflags & MAP_ENTRY_USER_WIRED) == 0) { 1651 rv = KERN_INVALID_ARGUMENT; 1652 goto done; 1653 } 1654 KASSERT(entry->wired_count != 0, ("wired count was 0 with USER_WIRED set! %p", entry)); 1655 entry = entry->next; 1656 } 1657 1658 /* 1659 * Now decrement the wiring count for each region. If a region 1660 * becomes completely unwired, unwire its physical pages and 1661 * mappings. 1662 */ 1663 while ((entry != &map->header) && (entry->start < end)) { 1664 KASSERT(entry->eflags & MAP_ENTRY_USER_WIRED, ("expected USER_WIRED on entry %p", entry)); 1665 entry->eflags &= ~MAP_ENTRY_USER_WIRED; 1666 entry->wired_count--; 1667 if (entry->wired_count == 0) 1668 vm_fault_unwire(map, entry->start, entry->end); 1669 entry = entry->next; 1670 } 1671 } 1672 done: 1673 vm_map_unclip_range(map, start_entry, start, real_end, 1674 MAP_CLIP_NO_HOLES); 1675 map->timestamp++; 1676 vm_map_unlock(map); 1677 return (rv); 1678 } 1679 1680 /* 1681 * vm_map_pageable: 1682 * 1683 * Sets the pageability of the specified address 1684 * range in the target map. Regions specified 1685 * as not pageable require locked-down physical 1686 * memory and physical page maps. 1687 * 1688 * The map must not be locked, but a reference 1689 * must remain to the map throughout the call. 1690 */ 1691 int 1692 vm_map_pageable(map, start, real_end, new_pageable) 1693 vm_map_t map; 1694 vm_offset_t start; 1695 vm_offset_t real_end; 1696 boolean_t new_pageable; 1697 { 1698 vm_map_entry_t entry; 1699 vm_map_entry_t start_entry; 1700 vm_offset_t end; 1701 int rv = KERN_SUCCESS; 1702 int s; 1703 1704 vm_map_lock(map); 1705 VM_MAP_RANGE_CHECK(map, start, real_end); 1706 end = real_end; 1707 1708 start_entry = vm_map_clip_range(map, start, end, MAP_CLIP_NO_HOLES); 1709 if (start_entry == NULL) { 1710 vm_map_unlock(map); 1711 return (KERN_INVALID_ADDRESS); 1712 } 1713 if (new_pageable == 0) { 1714 /* 1715 * Wiring. 1716 * 1717 * 1. Holding the write lock, we create any shadow or zero-fill 1718 * objects that need to be created. Then we clip each map 1719 * entry to the region to be wired and increment its wiring 1720 * count. We create objects before clipping the map entries 1721 * to avoid object proliferation. 1722 * 1723 * 2. We downgrade to a read lock, and call vm_fault_wire to 1724 * fault in the pages for any newly wired area (wired_count is 1725 * 1). 1726 * 1727 * Downgrading to a read lock for vm_fault_wire avoids a 1728 * possible deadlock with another process that may have faulted 1729 * on one of the pages to be wired (it would mark the page busy, 1730 * blocking us, then in turn block on the map lock that we 1731 * hold). Because of problems in the recursive lock package, 1732 * we cannot upgrade to a write lock in vm_map_lookup. Thus, 1733 * any actions that require the write lock must be done 1734 * beforehand. Because we keep the read lock on the map, the 1735 * copy-on-write status of the entries we modify here cannot 1736 * change. 1737 */ 1738 1739 entry = start_entry; 1740 while ((entry != &map->header) && (entry->start < end)) { 1741 /* 1742 * Trivial case if the entry is already wired 1743 */ 1744 if (entry->wired_count) { 1745 entry->wired_count++; 1746 entry = entry->next; 1747 continue; 1748 } 1749 1750 /* 1751 * The entry is being newly wired, we have to setup 1752 * appropriate management structures. A shadow 1753 * object is required for a copy-on-write region, 1754 * or a normal object for a zero-fill region. We 1755 * do not have to do this for entries that point to sub 1756 * maps because we won't hold the lock on the sub map. 1757 */ 1758 if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) { 1759 int copyflag = entry->eflags & MAP_ENTRY_NEEDS_COPY; 1760 if (copyflag && 1761 ((entry->protection & VM_PROT_WRITE) != 0)) { 1762 1763 vm_object_shadow(&entry->object.vm_object, 1764 &entry->offset, 1765 atop(entry->end - entry->start)); 1766 entry->eflags &= ~MAP_ENTRY_NEEDS_COPY; 1767 } else if (entry->object.vm_object == NULL && 1768 !map->system_map) { 1769 entry->object.vm_object = 1770 vm_object_allocate(OBJT_DEFAULT, 1771 atop(entry->end - entry->start)); 1772 entry->offset = (vm_offset_t) 0; 1773 } 1774 } 1775 1776 entry->wired_count++; 1777 entry = entry->next; 1778 } 1779 1780 /* 1781 * Pass 2. 1782 */ 1783 1784 /* 1785 * HACK HACK HACK HACK 1786 * 1787 * Unlock the map to avoid deadlocks. The in-transit flag 1788 * protects us from most changes but note that 1789 * clipping may still occur. To prevent clipping from 1790 * occuring after the unlock, except for when we are 1791 * blocking in vm_fault_wire, we must run at splvm(). 1792 * Otherwise our accesses to entry->start and entry->end 1793 * could be corrupted. We have to set splvm() prior to 1794 * unlocking so start_entry does not change out from 1795 * under us at the very beginning of the loop. 1796 * 1797 * HACK HACK HACK HACK 1798 */ 1799 1800 s = splvm(); 1801 vm_map_unlock(map); 1802 1803 entry = start_entry; 1804 while (entry != &map->header && entry->start < end) { 1805 /* 1806 * If vm_fault_wire fails for any page we need to undo 1807 * what has been done. We decrement the wiring count 1808 * for those pages which have not yet been wired (now) 1809 * and unwire those that have (later). 1810 */ 1811 vm_offset_t save_start = entry->start; 1812 vm_offset_t save_end = entry->end; 1813 1814 if (entry->wired_count == 1) 1815 rv = vm_fault_wire(map, entry->start, entry->end); 1816 if (rv) { 1817 CLIP_CHECK_BACK(entry, save_start); 1818 for (;;) { 1819 KASSERT(entry->wired_count == 1, ("wired_count changed unexpectedly")); 1820 entry->wired_count = 0; 1821 if (entry->end == save_end) 1822 break; 1823 entry = entry->next; 1824 KASSERT(entry != &map->header, ("bad entry clip during backout")); 1825 } 1826 end = save_start; 1827 break; 1828 } 1829 CLIP_CHECK_FWD(entry, save_end); 1830 entry = entry->next; 1831 } 1832 splx(s); 1833 1834 /* 1835 * relock. start_entry is still IN_TRANSITION and must 1836 * still exist, but may have been clipped (handled just 1837 * below). 1838 */ 1839 vm_map_lock(map); 1840 1841 /* 1842 * If a failure occured undo everything by falling through 1843 * to the unwiring code. 'end' has already been adjusted 1844 * appropriately. 1845 */ 1846 if (rv) 1847 new_pageable = 1; 1848 1849 /* 1850 * start_entry might have been clipped if we unlocked the 1851 * map and blocked. No matter how clipped it has gotten 1852 * there should be a fragment that is on our start boundary. 1853 */ 1854 CLIP_CHECK_BACK(start_entry, start); 1855 } 1856 1857 if (new_pageable) { 1858 /* 1859 * This is the unwiring case. We must first ensure that the 1860 * range to be unwired is really wired down. We know there 1861 * are no holes. 1862 */ 1863 entry = start_entry; 1864 while ((entry != &map->header) && (entry->start < end)) { 1865 if (entry->wired_count == 0) { 1866 rv = KERN_INVALID_ARGUMENT; 1867 goto done; 1868 } 1869 entry = entry->next; 1870 } 1871 1872 /* 1873 * Now decrement the wiring count for each region. If a region 1874 * becomes completely unwired, unwire its physical pages and 1875 * mappings. 1876 */ 1877 entry = start_entry; 1878 while ((entry != &map->header) && (entry->start < end)) { 1879 entry->wired_count--; 1880 if (entry->wired_count == 0) 1881 vm_fault_unwire(map, entry->start, entry->end); 1882 entry = entry->next; 1883 } 1884 } 1885 done: 1886 vm_map_unclip_range(map, start_entry, start, real_end, 1887 MAP_CLIP_NO_HOLES); 1888 map->timestamp++; 1889 vm_map_unlock(map); 1890 return (rv); 1891 } 1892 1893 /* 1894 * vm_map_clean 1895 * 1896 * Push any dirty cached pages in the address range to their pager. 1897 * If syncio is TRUE, dirty pages are written synchronously. 1898 * If invalidate is TRUE, any cached pages are freed as well. 1899 * 1900 * Returns an error if any part of the specified range is not mapped. 1901 */ 1902 int 1903 vm_map_clean(map, start, end, syncio, invalidate) 1904 vm_map_t map; 1905 vm_offset_t start; 1906 vm_offset_t end; 1907 boolean_t syncio; 1908 boolean_t invalidate; 1909 { 1910 vm_map_entry_t current; 1911 vm_map_entry_t entry; 1912 vm_size_t size; 1913 vm_object_t object; 1914 vm_ooffset_t offset; 1915 1916 vm_map_lock_read(map); 1917 VM_MAP_RANGE_CHECK(map, start, end); 1918 if (!vm_map_lookup_entry(map, start, &entry)) { 1919 vm_map_unlock_read(map); 1920 return (KERN_INVALID_ADDRESS); 1921 } 1922 /* 1923 * Make a first pass to check for holes. 1924 */ 1925 for (current = entry; current->start < end; current = current->next) { 1926 if (current->eflags & MAP_ENTRY_IS_SUB_MAP) { 1927 vm_map_unlock_read(map); 1928 return (KERN_INVALID_ARGUMENT); 1929 } 1930 if (end > current->end && 1931 (current->next == &map->header || 1932 current->end != current->next->start)) { 1933 vm_map_unlock_read(map); 1934 return (KERN_INVALID_ADDRESS); 1935 } 1936 } 1937 1938 if (invalidate) 1939 pmap_remove(vm_map_pmap(map), start, end); 1940 /* 1941 * Make a second pass, cleaning/uncaching pages from the indicated 1942 * objects as we go. 1943 */ 1944 for (current = entry; current->start < end; current = current->next) { 1945 offset = current->offset + (start - current->start); 1946 size = (end <= current->end ? end : current->end) - start; 1947 if (current->eflags & MAP_ENTRY_IS_SUB_MAP) { 1948 vm_map_t smap; 1949 vm_map_entry_t tentry; 1950 vm_size_t tsize; 1951 1952 smap = current->object.sub_map; 1953 vm_map_lock_read(smap); 1954 (void) vm_map_lookup_entry(smap, offset, &tentry); 1955 tsize = tentry->end - offset; 1956 if (tsize < size) 1957 size = tsize; 1958 object = tentry->object.vm_object; 1959 offset = tentry->offset + (offset - tentry->start); 1960 vm_map_unlock_read(smap); 1961 } else { 1962 object = current->object.vm_object; 1963 } 1964 /* 1965 * Note that there is absolutely no sense in writing out 1966 * anonymous objects, so we track down the vnode object 1967 * to write out. 1968 * We invalidate (remove) all pages from the address space 1969 * anyway, for semantic correctness. 1970 * 1971 * note: certain anonymous maps, such as MAP_NOSYNC maps, 1972 * may start out with a NULL object. 1973 */ 1974 while (object && object->backing_object) { 1975 object = object->backing_object; 1976 offset += object->backing_object_offset; 1977 if (object->size < OFF_TO_IDX( offset + size)) 1978 size = IDX_TO_OFF(object->size) - offset; 1979 } 1980 if (object && (object->type == OBJT_VNODE) && 1981 (current->protection & VM_PROT_WRITE)) { 1982 /* 1983 * Flush pages if writing is allowed, invalidate them 1984 * if invalidation requested. Pages undergoing I/O 1985 * will be ignored by vm_object_page_remove(). 1986 * 1987 * We cannot lock the vnode and then wait for paging 1988 * to complete without deadlocking against vm_fault. 1989 * Instead we simply call vm_object_page_remove() and 1990 * allow it to block internally on a page-by-page 1991 * basis when it encounters pages undergoing async 1992 * I/O. 1993 */ 1994 int flags; 1995 1996 vm_object_reference(object); 1997 vn_lock(object->handle, LK_EXCLUSIVE | LK_RETRY, curproc); 1998 flags = (syncio || invalidate) ? OBJPC_SYNC : 0; 1999 flags |= invalidate ? OBJPC_INVAL : 0; 2000 vm_object_page_clean(object, 2001 OFF_TO_IDX(offset), 2002 OFF_TO_IDX(offset + size + PAGE_MASK), 2003 flags); 2004 VOP_UNLOCK(object->handle, 0, curproc); 2005 vm_object_deallocate(object); 2006 } 2007 if (object && invalidate && 2008 ((object->type == OBJT_VNODE) || 2009 (object->type == OBJT_DEVICE))) { 2010 vm_object_reference(object); 2011 vm_object_page_remove(object, 2012 OFF_TO_IDX(offset), 2013 OFF_TO_IDX(offset + size + PAGE_MASK), 2014 FALSE); 2015 vm_object_deallocate(object); 2016 } 2017 start += size; 2018 } 2019 2020 vm_map_unlock_read(map); 2021 return (KERN_SUCCESS); 2022 } 2023 2024 /* 2025 * vm_map_entry_unwire: [ internal use only ] 2026 * 2027 * Make the region specified by this entry pageable. 2028 * 2029 * The map in question should be locked. 2030 * [This is the reason for this routine's existence.] 2031 */ 2032 static void 2033 vm_map_entry_unwire(map, entry) 2034 vm_map_t map; 2035 vm_map_entry_t entry; 2036 { 2037 vm_fault_unwire(map, entry->start, entry->end); 2038 entry->wired_count = 0; 2039 } 2040 2041 /* 2042 * vm_map_entry_delete: [ internal use only ] 2043 * 2044 * Deallocate the given entry from the target map. 2045 */ 2046 static void 2047 vm_map_entry_delete(map, entry) 2048 vm_map_t map; 2049 vm_map_entry_t entry; 2050 { 2051 vm_map_entry_unlink(map, entry); 2052 map->size -= entry->end - entry->start; 2053 2054 if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) { 2055 vm_object_deallocate(entry->object.vm_object); 2056 } 2057 2058 vm_map_entry_dispose(map, entry); 2059 } 2060 2061 /* 2062 * vm_map_delete: [ internal use only ] 2063 * 2064 * Deallocates the given address range from the target 2065 * map. 2066 */ 2067 int 2068 vm_map_delete(map, start, end) 2069 vm_map_t map; 2070 vm_offset_t start; 2071 vm_offset_t end; 2072 { 2073 vm_object_t object; 2074 vm_map_entry_t entry; 2075 vm_map_entry_t first_entry; 2076 2077 /* 2078 * Find the start of the region, and clip it 2079 */ 2080 2081 again: 2082 if (!vm_map_lookup_entry(map, start, &first_entry)) 2083 entry = first_entry->next; 2084 else { 2085 entry = first_entry; 2086 vm_map_clip_start(map, entry, start); 2087 /* 2088 * Fix the lookup hint now, rather than each time though the 2089 * loop. 2090 */ 2091 SAVE_HINT(map, entry->prev); 2092 } 2093 2094 /* 2095 * Save the free space hint 2096 */ 2097 2098 if (entry == &map->header) { 2099 map->first_free = &map->header; 2100 } else if (map->first_free->start >= start) { 2101 map->first_free = entry->prev; 2102 } 2103 2104 /* 2105 * Step through all entries in this region 2106 */ 2107 2108 while ((entry != &map->header) && (entry->start < end)) { 2109 vm_map_entry_t next; 2110 vm_offset_t s, e; 2111 vm_pindex_t offidxstart, offidxend, count; 2112 2113 /* 2114 * If we hit an in-transition entry we have to sleep and 2115 * retry. It's easier (and not really slower) to just retry 2116 * since this case occurs so rarely and the hint is already 2117 * pointing at the right place. We have to reset the 2118 * start offset so as not to accidently delete an entry 2119 * another process just created in vacated space. 2120 */ 2121 if (entry->eflags & MAP_ENTRY_IN_TRANSITION) { 2122 entry->eflags |= MAP_ENTRY_NEEDS_WAKEUP; 2123 start = entry->start; 2124 ++cnt.v_intrans_coll; 2125 ++cnt.v_intrans_wait; 2126 vm_map_transition_wait(map); 2127 goto again; 2128 } 2129 vm_map_clip_end(map, entry, end); 2130 2131 s = entry->start; 2132 e = entry->end; 2133 next = entry->next; 2134 2135 offidxstart = OFF_TO_IDX(entry->offset); 2136 count = OFF_TO_IDX(e - s); 2137 object = entry->object.vm_object; 2138 2139 /* 2140 * Unwire before removing addresses from the pmap; otherwise, 2141 * unwiring will put the entries back in the pmap. 2142 */ 2143 if (entry->wired_count != 0) { 2144 vm_map_entry_unwire(map, entry); 2145 } 2146 2147 offidxend = offidxstart + count; 2148 2149 if ((object == kernel_object) || (object == kmem_object)) { 2150 vm_object_page_remove(object, offidxstart, offidxend, FALSE); 2151 } else { 2152 pmap_remove(map->pmap, s, e); 2153 if (object != NULL && 2154 object->ref_count != 1 && 2155 (object->flags & (OBJ_NOSPLIT|OBJ_ONEMAPPING)) == OBJ_ONEMAPPING && 2156 (object->type == OBJT_DEFAULT || object->type == OBJT_SWAP)) { 2157 vm_object_collapse(object); 2158 vm_object_page_remove(object, offidxstart, offidxend, FALSE); 2159 if (object->type == OBJT_SWAP) { 2160 swap_pager_freespace(object, offidxstart, count); 2161 } 2162 if (offidxend >= object->size && 2163 offidxstart < object->size) { 2164 object->size = offidxstart; 2165 } 2166 } 2167 } 2168 2169 /* 2170 * Delete the entry (which may delete the object) only after 2171 * removing all pmap entries pointing to its pages. 2172 * (Otherwise, its page frames may be reallocated, and any 2173 * modify bits will be set in the wrong object!) 2174 */ 2175 vm_map_entry_delete(map, entry); 2176 entry = next; 2177 } 2178 return (KERN_SUCCESS); 2179 } 2180 2181 /* 2182 * vm_map_remove: 2183 * 2184 * Remove the given address range from the target map. 2185 * This is the exported form of vm_map_delete. 2186 */ 2187 int 2188 vm_map_remove(map, start, end) 2189 vm_map_t map; 2190 vm_offset_t start; 2191 vm_offset_t end; 2192 { 2193 int result, s = 0; 2194 2195 if (map == kmem_map || map == mb_map) 2196 s = splvm(); 2197 2198 vm_map_lock(map); 2199 VM_MAP_RANGE_CHECK(map, start, end); 2200 result = vm_map_delete(map, start, end); 2201 vm_map_unlock(map); 2202 2203 if (map == kmem_map || map == mb_map) 2204 splx(s); 2205 2206 return (result); 2207 } 2208 2209 /* 2210 * vm_map_check_protection: 2211 * 2212 * Assert that the target map allows the specified 2213 * privilege on the entire address region given. 2214 * The entire region must be allocated. 2215 */ 2216 boolean_t 2217 vm_map_check_protection(vm_map_t map, vm_offset_t start, vm_offset_t end, 2218 vm_prot_t protection) 2219 { 2220 vm_map_entry_t entry; 2221 vm_map_entry_t tmp_entry; 2222 2223 if (!vm_map_lookup_entry(map, start, &tmp_entry)) { 2224 return (FALSE); 2225 } 2226 entry = tmp_entry; 2227 2228 while (start < end) { 2229 if (entry == &map->header) { 2230 return (FALSE); 2231 } 2232 /* 2233 * No holes allowed! 2234 */ 2235 2236 if (start < entry->start) { 2237 return (FALSE); 2238 } 2239 /* 2240 * Check protection associated with entry. 2241 */ 2242 2243 if ((entry->protection & protection) != protection) { 2244 return (FALSE); 2245 } 2246 /* go to next entry */ 2247 2248 start = entry->end; 2249 entry = entry->next; 2250 } 2251 return (TRUE); 2252 } 2253 2254 /* 2255 * Split the pages in a map entry into a new object. This affords 2256 * easier removal of unused pages, and keeps object inheritance from 2257 * being a negative impact on memory usage. 2258 */ 2259 static void 2260 vm_map_split(entry) 2261 vm_map_entry_t entry; 2262 { 2263 vm_page_t m; 2264 vm_object_t orig_object, new_object, source; 2265 vm_offset_t s, e; 2266 vm_pindex_t offidxstart, offidxend, idx; 2267 vm_size_t size; 2268 vm_ooffset_t offset; 2269 2270 orig_object = entry->object.vm_object; 2271 if (orig_object->type != OBJT_DEFAULT && orig_object->type != OBJT_SWAP) 2272 return; 2273 if (orig_object->ref_count <= 1) 2274 return; 2275 2276 offset = entry->offset; 2277 s = entry->start; 2278 e = entry->end; 2279 2280 offidxstart = OFF_TO_IDX(offset); 2281 offidxend = offidxstart + OFF_TO_IDX(e - s); 2282 size = offidxend - offidxstart; 2283 2284 new_object = vm_pager_allocate(orig_object->type, 2285 NULL, IDX_TO_OFF(size), VM_PROT_ALL, 0LL); 2286 if (new_object == NULL) 2287 return; 2288 2289 source = orig_object->backing_object; 2290 if (source != NULL) { 2291 vm_object_reference(source); /* Referenced by new_object */ 2292 LIST_INSERT_HEAD(&source->shadow_head, 2293 new_object, shadow_list); 2294 vm_object_clear_flag(source, OBJ_ONEMAPPING); 2295 new_object->backing_object_offset = 2296 orig_object->backing_object_offset + IDX_TO_OFF(offidxstart); 2297 new_object->backing_object = source; 2298 source->shadow_count++; 2299 source->generation++; 2300 } 2301 2302 for (idx = 0; idx < size; idx++) { 2303 vm_page_t m; 2304 2305 retry: 2306 m = vm_page_lookup(orig_object, offidxstart + idx); 2307 if (m == NULL) 2308 continue; 2309 2310 /* 2311 * We must wait for pending I/O to complete before we can 2312 * rename the page. 2313 * 2314 * We do not have to VM_PROT_NONE the page as mappings should 2315 * not be changed by this operation. 2316 */ 2317 if (vm_page_sleep_busy(m, TRUE, "spltwt")) 2318 goto retry; 2319 2320 vm_page_busy(m); 2321 vm_page_rename(m, new_object, idx); 2322 /* page automatically made dirty by rename and cache handled */ 2323 vm_page_busy(m); 2324 } 2325 2326 if (orig_object->type == OBJT_SWAP) { 2327 vm_object_pip_add(orig_object, 1); 2328 /* 2329 * copy orig_object pages into new_object 2330 * and destroy unneeded pages in 2331 * shadow object. 2332 */ 2333 swap_pager_copy(orig_object, new_object, offidxstart, 0); 2334 vm_object_pip_wakeup(orig_object); 2335 } 2336 2337 for (idx = 0; idx < size; idx++) { 2338 m = vm_page_lookup(new_object, idx); 2339 if (m) { 2340 vm_page_wakeup(m); 2341 } 2342 } 2343 2344 entry->object.vm_object = new_object; 2345 entry->offset = 0LL; 2346 vm_object_deallocate(orig_object); 2347 } 2348 2349 /* 2350 * vm_map_copy_entry: 2351 * 2352 * Copies the contents of the source entry to the destination 2353 * entry. The entries *must* be aligned properly. 2354 */ 2355 static void 2356 vm_map_copy_entry(src_map, dst_map, src_entry, dst_entry) 2357 vm_map_t src_map, dst_map; 2358 vm_map_entry_t src_entry, dst_entry; 2359 { 2360 vm_object_t src_object; 2361 2362 if ((dst_entry->eflags|src_entry->eflags) & MAP_ENTRY_IS_SUB_MAP) 2363 return; 2364 2365 if (src_entry->wired_count == 0) { 2366 2367 /* 2368 * If the source entry is marked needs_copy, it is already 2369 * write-protected. 2370 */ 2371 if ((src_entry->eflags & MAP_ENTRY_NEEDS_COPY) == 0) { 2372 pmap_protect(src_map->pmap, 2373 src_entry->start, 2374 src_entry->end, 2375 src_entry->protection & ~VM_PROT_WRITE); 2376 } 2377 2378 /* 2379 * Make a copy of the object. 2380 */ 2381 if ((src_object = src_entry->object.vm_object) != NULL) { 2382 2383 if ((src_object->handle == NULL) && 2384 (src_object->type == OBJT_DEFAULT || 2385 src_object->type == OBJT_SWAP)) { 2386 vm_object_collapse(src_object); 2387 if ((src_object->flags & (OBJ_NOSPLIT|OBJ_ONEMAPPING)) == OBJ_ONEMAPPING) { 2388 vm_map_split(src_entry); 2389 src_object = src_entry->object.vm_object; 2390 } 2391 } 2392 2393 vm_object_reference(src_object); 2394 vm_object_clear_flag(src_object, OBJ_ONEMAPPING); 2395 dst_entry->object.vm_object = src_object; 2396 src_entry->eflags |= (MAP_ENTRY_COW|MAP_ENTRY_NEEDS_COPY); 2397 dst_entry->eflags |= (MAP_ENTRY_COW|MAP_ENTRY_NEEDS_COPY); 2398 dst_entry->offset = src_entry->offset; 2399 } else { 2400 dst_entry->object.vm_object = NULL; 2401 dst_entry->offset = 0; 2402 } 2403 2404 pmap_copy(dst_map->pmap, src_map->pmap, dst_entry->start, 2405 dst_entry->end - dst_entry->start, src_entry->start); 2406 } else { 2407 /* 2408 * Of course, wired down pages can't be set copy-on-write. 2409 * Cause wired pages to be copied into the new map by 2410 * simulating faults (the new pages are pageable) 2411 */ 2412 vm_fault_copy_entry(dst_map, src_map, dst_entry, src_entry); 2413 } 2414 } 2415 2416 /* 2417 * vmspace_fork: 2418 * Create a new process vmspace structure and vm_map 2419 * based on those of an existing process. The new map 2420 * is based on the old map, according to the inheritance 2421 * values on the regions in that map. 2422 * 2423 * The source map must not be locked. 2424 */ 2425 struct vmspace * 2426 vmspace_fork(vm1) 2427 struct vmspace *vm1; 2428 { 2429 struct vmspace *vm2; 2430 vm_map_t old_map = &vm1->vm_map; 2431 vm_map_t new_map; 2432 vm_map_entry_t old_entry; 2433 vm_map_entry_t new_entry; 2434 vm_object_t object; 2435 2436 vm_map_lock(old_map); 2437 old_map->infork = 1; 2438 2439 vm2 = vmspace_alloc(old_map->min_offset, old_map->max_offset); 2440 bcopy(&vm1->vm_startcopy, &vm2->vm_startcopy, 2441 (caddr_t) (vm1 + 1) - (caddr_t) &vm1->vm_startcopy); 2442 new_map = &vm2->vm_map; /* XXX */ 2443 new_map->timestamp = 1; 2444 2445 old_entry = old_map->header.next; 2446 2447 while (old_entry != &old_map->header) { 2448 if (old_entry->eflags & MAP_ENTRY_IS_SUB_MAP) 2449 panic("vm_map_fork: encountered a submap"); 2450 2451 switch (old_entry->inheritance) { 2452 case VM_INHERIT_NONE: 2453 break; 2454 2455 case VM_INHERIT_SHARE: 2456 /* 2457 * Clone the entry, creating the shared object if necessary. 2458 */ 2459 object = old_entry->object.vm_object; 2460 if (object == NULL) { 2461 object = vm_object_allocate(OBJT_DEFAULT, 2462 atop(old_entry->end - old_entry->start)); 2463 old_entry->object.vm_object = object; 2464 old_entry->offset = (vm_offset_t) 0; 2465 } 2466 2467 /* 2468 * Add the reference before calling vm_object_shadow 2469 * to insure that a shadow object is created. 2470 */ 2471 vm_object_reference(object); 2472 if (old_entry->eflags & MAP_ENTRY_NEEDS_COPY) { 2473 vm_object_shadow(&old_entry->object.vm_object, 2474 &old_entry->offset, 2475 atop(old_entry->end - old_entry->start)); 2476 old_entry->eflags &= ~MAP_ENTRY_NEEDS_COPY; 2477 /* Transfer the second reference too. */ 2478 vm_object_reference( 2479 old_entry->object.vm_object); 2480 vm_object_deallocate(object); 2481 object = old_entry->object.vm_object; 2482 } 2483 vm_object_clear_flag(object, OBJ_ONEMAPPING); 2484 2485 /* 2486 * Clone the entry, referencing the shared object. 2487 */ 2488 new_entry = vm_map_entry_create(new_map); 2489 *new_entry = *old_entry; 2490 new_entry->eflags &= ~MAP_ENTRY_USER_WIRED; 2491 new_entry->wired_count = 0; 2492 2493 /* 2494 * Insert the entry into the new map -- we know we're 2495 * inserting at the end of the new map. 2496 */ 2497 2498 vm_map_entry_link(new_map, new_map->header.prev, 2499 new_entry); 2500 2501 /* 2502 * Update the physical map 2503 */ 2504 2505 pmap_copy(new_map->pmap, old_map->pmap, 2506 new_entry->start, 2507 (old_entry->end - old_entry->start), 2508 old_entry->start); 2509 break; 2510 2511 case VM_INHERIT_COPY: 2512 /* 2513 * Clone the entry and link into the map. 2514 */ 2515 new_entry = vm_map_entry_create(new_map); 2516 *new_entry = *old_entry; 2517 new_entry->eflags &= ~MAP_ENTRY_USER_WIRED; 2518 new_entry->wired_count = 0; 2519 new_entry->object.vm_object = NULL; 2520 vm_map_entry_link(new_map, new_map->header.prev, 2521 new_entry); 2522 vm_map_copy_entry(old_map, new_map, old_entry, 2523 new_entry); 2524 break; 2525 } 2526 old_entry = old_entry->next; 2527 } 2528 2529 new_map->size = old_map->size; 2530 old_map->infork = 0; 2531 vm_map_unlock(old_map); 2532 2533 return (vm2); 2534 } 2535 2536 int 2537 vm_map_stack (vm_map_t map, vm_offset_t addrbos, vm_size_t max_ssize, 2538 vm_prot_t prot, vm_prot_t max, int cow) 2539 { 2540 vm_map_entry_t prev_entry; 2541 vm_map_entry_t new_stack_entry; 2542 vm_size_t init_ssize; 2543 int rv; 2544 2545 if (VM_MIN_ADDRESS > 0 && addrbos < VM_MIN_ADDRESS) 2546 return (KERN_NO_SPACE); 2547 2548 if (max_ssize < sgrowsiz) 2549 init_ssize = max_ssize; 2550 else 2551 init_ssize = sgrowsiz; 2552 2553 vm_map_lock(map); 2554 2555 /* If addr is already mapped, no go */ 2556 if (vm_map_lookup_entry(map, addrbos, &prev_entry)) { 2557 vm_map_unlock(map); 2558 return (KERN_NO_SPACE); 2559 } 2560 2561 /* If we would blow our VMEM resource limit, no go */ 2562 if (map->size + init_ssize > 2563 curproc->p_rlimit[RLIMIT_VMEM].rlim_cur) { 2564 vm_map_unlock(map); 2565 return (KERN_NO_SPACE); 2566 } 2567 2568 /* If we can't accomodate max_ssize in the current mapping, 2569 * no go. However, we need to be aware that subsequent user 2570 * mappings might map into the space we have reserved for 2571 * stack, and currently this space is not protected. 2572 * 2573 * Hopefully we will at least detect this condition 2574 * when we try to grow the stack. 2575 */ 2576 if ((prev_entry->next != &map->header) && 2577 (prev_entry->next->start < addrbos + max_ssize)) { 2578 vm_map_unlock(map); 2579 return (KERN_NO_SPACE); 2580 } 2581 2582 /* We initially map a stack of only init_ssize. We will 2583 * grow as needed later. Since this is to be a grow 2584 * down stack, we map at the top of the range. 2585 * 2586 * Note: we would normally expect prot and max to be 2587 * VM_PROT_ALL, and cow to be 0. Possibly we should 2588 * eliminate these as input parameters, and just 2589 * pass these values here in the insert call. 2590 */ 2591 rv = vm_map_insert(map, NULL, 0, addrbos + max_ssize - init_ssize, 2592 addrbos + max_ssize, prot, max, cow); 2593 2594 /* Now set the avail_ssize amount */ 2595 if (rv == KERN_SUCCESS){ 2596 if (prev_entry != &map->header) 2597 vm_map_clip_end(map, prev_entry, addrbos + max_ssize - init_ssize); 2598 new_stack_entry = prev_entry->next; 2599 if (new_stack_entry->end != addrbos + max_ssize || 2600 new_stack_entry->start != addrbos + max_ssize - init_ssize) 2601 panic ("Bad entry start/end for new stack entry"); 2602 else 2603 new_stack_entry->avail_ssize = max_ssize - init_ssize; 2604 } 2605 2606 vm_map_unlock(map); 2607 return (rv); 2608 } 2609 2610 /* Attempts to grow a vm stack entry. Returns KERN_SUCCESS if the 2611 * desired address is already mapped, or if we successfully grow 2612 * the stack. Also returns KERN_SUCCESS if addr is outside the 2613 * stack range (this is strange, but preserves compatibility with 2614 * the grow function in vm_machdep.c). 2615 */ 2616 int 2617 vm_map_growstack (struct proc *p, vm_offset_t addr) 2618 { 2619 vm_map_entry_t prev_entry; 2620 vm_map_entry_t stack_entry; 2621 vm_map_entry_t new_stack_entry; 2622 struct vmspace *vm = p->p_vmspace; 2623 vm_map_t map = &vm->vm_map; 2624 vm_offset_t end; 2625 int grow_amount; 2626 int rv = KERN_SUCCESS; 2627 int is_procstack; 2628 int use_read_lock = 1; 2629 2630 Retry: 2631 if (use_read_lock) 2632 vm_map_lock_read(map); 2633 else 2634 vm_map_lock(map); 2635 2636 /* If addr is already in the entry range, no need to grow.*/ 2637 if (vm_map_lookup_entry(map, addr, &prev_entry)) 2638 goto done; 2639 2640 if ((stack_entry = prev_entry->next) == &map->header) 2641 goto done; 2642 if (prev_entry == &map->header) 2643 end = stack_entry->start - stack_entry->avail_ssize; 2644 else 2645 end = prev_entry->end; 2646 2647 /* This next test mimics the old grow function in vm_machdep.c. 2648 * It really doesn't quite make sense, but we do it anyway 2649 * for compatibility. 2650 * 2651 * If not growable stack, return success. This signals the 2652 * caller to proceed as he would normally with normal vm. 2653 */ 2654 if (stack_entry->avail_ssize < 1 || 2655 addr >= stack_entry->start || 2656 addr < stack_entry->start - stack_entry->avail_ssize) { 2657 goto done; 2658 } 2659 2660 /* Find the minimum grow amount */ 2661 grow_amount = roundup (stack_entry->start - addr, PAGE_SIZE); 2662 if (grow_amount > stack_entry->avail_ssize) { 2663 rv = KERN_NO_SPACE; 2664 goto done; 2665 } 2666 2667 /* If there is no longer enough space between the entries 2668 * nogo, and adjust the available space. Note: this 2669 * should only happen if the user has mapped into the 2670 * stack area after the stack was created, and is 2671 * probably an error. 2672 * 2673 * This also effectively destroys any guard page the user 2674 * might have intended by limiting the stack size. 2675 */ 2676 if (grow_amount > stack_entry->start - end) { 2677 if (use_read_lock && vm_map_lock_upgrade(map)) { 2678 use_read_lock = 0; 2679 goto Retry; 2680 } 2681 use_read_lock = 0; 2682 stack_entry->avail_ssize = stack_entry->start - end; 2683 rv = KERN_NO_SPACE; 2684 goto done; 2685 } 2686 2687 is_procstack = addr >= (vm_offset_t)vm->vm_maxsaddr; 2688 2689 /* If this is the main process stack, see if we're over the 2690 * stack limit. 2691 */ 2692 if (is_procstack && (ctob(vm->vm_ssize) + grow_amount > 2693 p->p_rlimit[RLIMIT_STACK].rlim_cur)) { 2694 rv = KERN_NO_SPACE; 2695 goto done; 2696 } 2697 2698 /* Round up the grow amount modulo SGROWSIZ */ 2699 grow_amount = roundup (grow_amount, sgrowsiz); 2700 if (grow_amount > stack_entry->avail_ssize) { 2701 grow_amount = stack_entry->avail_ssize; 2702 } 2703 if (is_procstack && (ctob(vm->vm_ssize) + grow_amount > 2704 p->p_rlimit[RLIMIT_STACK].rlim_cur)) { 2705 grow_amount = p->p_rlimit[RLIMIT_STACK].rlim_cur - 2706 ctob(vm->vm_ssize); 2707 } 2708 2709 /* If we would blow our VMEM resource limit, no go */ 2710 if (map->size + grow_amount > 2711 curproc->p_rlimit[RLIMIT_VMEM].rlim_cur) { 2712 rv = KERN_NO_SPACE; 2713 goto done; 2714 } 2715 2716 if (use_read_lock && vm_map_lock_upgrade(map)) { 2717 use_read_lock = 0; 2718 goto Retry; 2719 } 2720 use_read_lock = 0; 2721 2722 /* Get the preliminary new entry start value */ 2723 addr = stack_entry->start - grow_amount; 2724 2725 /* If this puts us into the previous entry, cut back our growth 2726 * to the available space. Also, see the note above. 2727 */ 2728 if (addr < end) { 2729 stack_entry->avail_ssize = stack_entry->start - end; 2730 addr = end; 2731 } 2732 2733 rv = vm_map_insert(map, NULL, 0, addr, stack_entry->start, 2734 VM_PROT_ALL, 2735 VM_PROT_ALL, 2736 0); 2737 2738 /* Adjust the available stack space by the amount we grew. */ 2739 if (rv == KERN_SUCCESS) { 2740 if (prev_entry != &map->header) 2741 vm_map_clip_end(map, prev_entry, addr); 2742 new_stack_entry = prev_entry->next; 2743 if (new_stack_entry->end != stack_entry->start || 2744 new_stack_entry->start != addr) 2745 panic ("Bad stack grow start/end in new stack entry"); 2746 else { 2747 new_stack_entry->avail_ssize = stack_entry->avail_ssize - 2748 (new_stack_entry->end - 2749 new_stack_entry->start); 2750 if (is_procstack) 2751 vm->vm_ssize += btoc(new_stack_entry->end - 2752 new_stack_entry->start); 2753 } 2754 } 2755 2756 done: 2757 if (use_read_lock) 2758 vm_map_unlock_read(map); 2759 else 2760 vm_map_unlock(map); 2761 return (rv); 2762 } 2763 2764 /* 2765 * Unshare the specified VM space for exec. If other processes are 2766 * mapped to it, then create a new one. The new vmspace is null. 2767 */ 2768 2769 void 2770 vmspace_exec(struct proc *p) { 2771 struct vmspace *oldvmspace = p->p_vmspace; 2772 struct vmspace *newvmspace; 2773 vm_map_t map = &p->p_vmspace->vm_map; 2774 2775 newvmspace = vmspace_alloc(map->min_offset, map->max_offset); 2776 bcopy(&oldvmspace->vm_startcopy, &newvmspace->vm_startcopy, 2777 (caddr_t) (newvmspace + 1) - (caddr_t) &newvmspace->vm_startcopy); 2778 /* 2779 * This code is written like this for prototype purposes. The 2780 * goal is to avoid running down the vmspace here, but let the 2781 * other process's that are still using the vmspace to finally 2782 * run it down. Even though there is little or no chance of blocking 2783 * here, it is a good idea to keep this form for future mods. 2784 */ 2785 vmspace_free(oldvmspace); 2786 p->p_vmspace = newvmspace; 2787 pmap_pinit2(vmspace_pmap(newvmspace)); 2788 if (p == curproc) 2789 pmap_activate(p); 2790 } 2791 2792 /* 2793 * Unshare the specified VM space for forcing COW. This 2794 * is called by rfork, for the (RFMEM|RFPROC) == 0 case. 2795 */ 2796 2797 void 2798 vmspace_unshare(struct proc *p) { 2799 struct vmspace *oldvmspace = p->p_vmspace; 2800 struct vmspace *newvmspace; 2801 2802 if (oldvmspace->vm_refcnt == 1) 2803 return; 2804 newvmspace = vmspace_fork(oldvmspace); 2805 vmspace_free(oldvmspace); 2806 p->p_vmspace = newvmspace; 2807 pmap_pinit2(vmspace_pmap(newvmspace)); 2808 if (p == curproc) 2809 pmap_activate(p); 2810 } 2811 2812 2813 /* 2814 * vm_map_lookup: 2815 * 2816 * Finds the VM object, offset, and 2817 * protection for a given virtual address in the 2818 * specified map, assuming a page fault of the 2819 * type specified. 2820 * 2821 * Leaves the map in question locked for read; return 2822 * values are guaranteed until a vm_map_lookup_done 2823 * call is performed. Note that the map argument 2824 * is in/out; the returned map must be used in 2825 * the call to vm_map_lookup_done. 2826 * 2827 * A handle (out_entry) is returned for use in 2828 * vm_map_lookup_done, to make that fast. 2829 * 2830 * If a lookup is requested with "write protection" 2831 * specified, the map may be changed to perform virtual 2832 * copying operations, although the data referenced will 2833 * remain the same. 2834 */ 2835 int 2836 vm_map_lookup(vm_map_t *var_map, /* IN/OUT */ 2837 vm_offset_t vaddr, 2838 vm_prot_t fault_typea, 2839 vm_map_entry_t *out_entry, /* OUT */ 2840 vm_object_t *object, /* OUT */ 2841 vm_pindex_t *pindex, /* OUT */ 2842 vm_prot_t *out_prot, /* OUT */ 2843 boolean_t *wired) /* OUT */ 2844 { 2845 vm_map_entry_t entry; 2846 vm_map_t map = *var_map; 2847 vm_prot_t prot; 2848 vm_prot_t fault_type = fault_typea; 2849 int use_read_lock = 1; 2850 int rv = KERN_SUCCESS; 2851 2852 RetryLookup: 2853 if (use_read_lock) 2854 vm_map_lock_read(map); 2855 else 2856 vm_map_lock(map); 2857 2858 /* 2859 * If the map has an interesting hint, try it before calling full 2860 * blown lookup routine. 2861 */ 2862 entry = map->hint; 2863 *out_entry = entry; 2864 2865 if ((entry == &map->header) || 2866 (vaddr < entry->start) || (vaddr >= entry->end)) { 2867 vm_map_entry_t tmp_entry; 2868 2869 /* 2870 * Entry was either not a valid hint, or the vaddr was not 2871 * contained in the entry, so do a full lookup. 2872 */ 2873 if (!vm_map_lookup_entry(map, vaddr, &tmp_entry)) { 2874 rv = KERN_INVALID_ADDRESS; 2875 goto done; 2876 } 2877 2878 entry = tmp_entry; 2879 *out_entry = entry; 2880 } 2881 2882 /* 2883 * Handle submaps. 2884 */ 2885 2886 if (entry->eflags & MAP_ENTRY_IS_SUB_MAP) { 2887 vm_map_t old_map = map; 2888 2889 *var_map = map = entry->object.sub_map; 2890 if (use_read_lock) 2891 vm_map_unlock_read(old_map); 2892 else 2893 vm_map_unlock(old_map); 2894 use_read_lock = 1; 2895 goto RetryLookup; 2896 } 2897 2898 /* 2899 * Check whether this task is allowed to have this page. 2900 * Note the special case for MAP_ENTRY_COW 2901 * pages with an override. This is to implement a forced 2902 * COW for debuggers. 2903 */ 2904 2905 if (fault_type & VM_PROT_OVERRIDE_WRITE) 2906 prot = entry->max_protection; 2907 else 2908 prot = entry->protection; 2909 2910 fault_type &= (VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE); 2911 if ((fault_type & prot) != fault_type) { 2912 rv = KERN_PROTECTION_FAILURE; 2913 goto done; 2914 } 2915 2916 if ((entry->eflags & MAP_ENTRY_USER_WIRED) && 2917 (entry->eflags & MAP_ENTRY_COW) && 2918 (fault_type & VM_PROT_WRITE) && 2919 (fault_typea & VM_PROT_OVERRIDE_WRITE) == 0) { 2920 rv = KERN_PROTECTION_FAILURE; 2921 goto done; 2922 } 2923 2924 /* 2925 * If this page is not pageable, we have to get it for all possible 2926 * accesses. 2927 */ 2928 2929 *wired = (entry->wired_count != 0); 2930 if (*wired) 2931 prot = fault_type = entry->protection; 2932 2933 /* 2934 * If the entry was copy-on-write, we either ... 2935 */ 2936 2937 if (entry->eflags & MAP_ENTRY_NEEDS_COPY) { 2938 /* 2939 * If we want to write the page, we may as well handle that 2940 * now since we've got the map locked. 2941 * 2942 * If we don't need to write the page, we just demote the 2943 * permissions allowed. 2944 */ 2945 2946 if (fault_type & VM_PROT_WRITE) { 2947 /* 2948 * Make a new object, and place it in the object 2949 * chain. Note that no new references have appeared 2950 * -- one just moved from the map to the new 2951 * object. 2952 */ 2953 2954 if (use_read_lock && vm_map_lock_upgrade(map)) { 2955 use_read_lock = 0; 2956 goto RetryLookup; 2957 } 2958 use_read_lock = 0; 2959 2960 vm_object_shadow( 2961 &entry->object.vm_object, 2962 &entry->offset, 2963 atop(entry->end - entry->start)); 2964 2965 entry->eflags &= ~MAP_ENTRY_NEEDS_COPY; 2966 } else { 2967 /* 2968 * We're attempting to read a copy-on-write page -- 2969 * don't allow writes. 2970 */ 2971 2972 prot &= ~VM_PROT_WRITE; 2973 } 2974 } 2975 2976 /* 2977 * Create an object if necessary. 2978 */ 2979 if (entry->object.vm_object == NULL && 2980 !map->system_map) { 2981 if (use_read_lock && vm_map_lock_upgrade(map)) { 2982 use_read_lock = 0; 2983 goto RetryLookup; 2984 } 2985 use_read_lock = 0; 2986 entry->object.vm_object = vm_object_allocate(OBJT_DEFAULT, 2987 atop(entry->end - entry->start)); 2988 entry->offset = 0; 2989 } 2990 2991 /* 2992 * Return the object/offset from this entry. If the entry was 2993 * copy-on-write or empty, it has been fixed up. 2994 */ 2995 2996 *pindex = OFF_TO_IDX((vaddr - entry->start) + entry->offset); 2997 *object = entry->object.vm_object; 2998 2999 /* 3000 * Return whether this is the only map sharing this data. On 3001 * success we return with a read lock held on the map. On failure 3002 * we return with the map unlocked. 3003 */ 3004 *out_prot = prot; 3005 done: 3006 if (rv == KERN_SUCCESS) { 3007 if (use_read_lock == 0) 3008 vm_map_lock_downgrade(map); 3009 } else if (use_read_lock) { 3010 vm_map_unlock_read(map); 3011 } else { 3012 vm_map_unlock(map); 3013 } 3014 return (rv); 3015 } 3016 3017 /* 3018 * vm_map_lookup_done: 3019 * 3020 * Releases locks acquired by a vm_map_lookup 3021 * (according to the handle returned by that lookup). 3022 */ 3023 3024 void 3025 vm_map_lookup_done(map, entry) 3026 vm_map_t map; 3027 vm_map_entry_t entry; 3028 { 3029 /* 3030 * Unlock the main-level map 3031 */ 3032 3033 vm_map_unlock_read(map); 3034 } 3035 3036 /* 3037 * Implement uiomove with VM operations. This handles (and collateral changes) 3038 * support every combination of source object modification, and COW type 3039 * operations. 3040 */ 3041 int 3042 vm_uiomove(mapa, srcobject, cp, cnta, uaddra, npages) 3043 vm_map_t mapa; 3044 vm_object_t srcobject; 3045 off_t cp; 3046 int cnta; 3047 vm_offset_t uaddra; 3048 int *npages; 3049 { 3050 vm_map_t map; 3051 vm_object_t first_object, oldobject, object; 3052 vm_map_entry_t entry; 3053 vm_prot_t prot; 3054 boolean_t wired; 3055 int tcnt, rv; 3056 vm_offset_t uaddr, start, end, tend; 3057 vm_pindex_t first_pindex, osize, oindex; 3058 off_t ooffset; 3059 int cnt; 3060 3061 if (npages) 3062 *npages = 0; 3063 3064 cnt = cnta; 3065 uaddr = uaddra; 3066 3067 while (cnt > 0) { 3068 map = mapa; 3069 3070 if ((vm_map_lookup(&map, uaddr, 3071 VM_PROT_READ, &entry, &first_object, 3072 &first_pindex, &prot, &wired)) != KERN_SUCCESS) { 3073 return EFAULT; 3074 } 3075 3076 vm_map_clip_start(map, entry, uaddr); 3077 3078 tcnt = cnt; 3079 tend = uaddr + tcnt; 3080 if (tend > entry->end) { 3081 tcnt = entry->end - uaddr; 3082 tend = entry->end; 3083 } 3084 3085 vm_map_clip_end(map, entry, tend); 3086 3087 start = entry->start; 3088 end = entry->end; 3089 3090 osize = atop(tcnt); 3091 3092 oindex = OFF_TO_IDX(cp); 3093 if (npages) { 3094 vm_pindex_t idx; 3095 for (idx = 0; idx < osize; idx++) { 3096 vm_page_t m; 3097 if ((m = vm_page_lookup(srcobject, oindex + idx)) == NULL) { 3098 vm_map_lookup_done(map, entry); 3099 return 0; 3100 } 3101 /* 3102 * disallow busy or invalid pages, but allow 3103 * m->busy pages if they are entirely valid. 3104 */ 3105 if ((m->flags & PG_BUSY) || 3106 ((m->valid & VM_PAGE_BITS_ALL) != VM_PAGE_BITS_ALL)) { 3107 vm_map_lookup_done(map, entry); 3108 return 0; 3109 } 3110 } 3111 } 3112 3113 /* 3114 * If we are changing an existing map entry, just redirect 3115 * the object, and change mappings. 3116 */ 3117 if ((first_object->type == OBJT_VNODE) && 3118 ((oldobject = entry->object.vm_object) == first_object)) { 3119 3120 if ((entry->offset != cp) || (oldobject != srcobject)) { 3121 /* 3122 * Remove old window into the file 3123 */ 3124 pmap_remove (map->pmap, uaddr, tend); 3125 3126 /* 3127 * Force copy on write for mmaped regions 3128 */ 3129 vm_object_pmap_copy_1 (srcobject, oindex, oindex + osize); 3130 3131 /* 3132 * Point the object appropriately 3133 */ 3134 if (oldobject != srcobject) { 3135 3136 /* 3137 * Set the object optimization hint flag 3138 */ 3139 vm_object_set_flag(srcobject, OBJ_OPT); 3140 vm_object_reference(srcobject); 3141 entry->object.vm_object = srcobject; 3142 3143 if (oldobject) { 3144 vm_object_deallocate(oldobject); 3145 } 3146 } 3147 3148 entry->offset = cp; 3149 map->timestamp++; 3150 } else { 3151 pmap_remove (map->pmap, uaddr, tend); 3152 } 3153 3154 } else if ((first_object->ref_count == 1) && 3155 (first_object->size == osize) && 3156 ((first_object->type == OBJT_DEFAULT) || 3157 (first_object->type == OBJT_SWAP)) ) { 3158 3159 oldobject = first_object->backing_object; 3160 3161 if ((first_object->backing_object_offset != cp) || 3162 (oldobject != srcobject)) { 3163 /* 3164 * Remove old window into the file 3165 */ 3166 pmap_remove (map->pmap, uaddr, tend); 3167 3168 /* 3169 * Remove unneeded old pages 3170 */ 3171 vm_object_page_remove(first_object, 0, 0, 0); 3172 3173 /* 3174 * Invalidate swap space 3175 */ 3176 if (first_object->type == OBJT_SWAP) { 3177 swap_pager_freespace(first_object, 3178 0, 3179 first_object->size); 3180 } 3181 3182 /* 3183 * Force copy on write for mmaped regions 3184 */ 3185 vm_object_pmap_copy_1 (srcobject, oindex, oindex + osize); 3186 3187 /* 3188 * Point the object appropriately 3189 */ 3190 if (oldobject != srcobject) { 3191 3192 /* 3193 * Set the object optimization hint flag 3194 */ 3195 vm_object_set_flag(srcobject, OBJ_OPT); 3196 vm_object_reference(srcobject); 3197 3198 if (oldobject) { 3199 LIST_REMOVE( 3200 first_object, shadow_list); 3201 oldobject->shadow_count--; 3202 /* XXX bump generation? */ 3203 vm_object_deallocate(oldobject); 3204 } 3205 3206 LIST_INSERT_HEAD(&srcobject->shadow_head, 3207 first_object, shadow_list); 3208 srcobject->shadow_count++; 3209 /* XXX bump generation? */ 3210 3211 first_object->backing_object = srcobject; 3212 } 3213 first_object->backing_object_offset = cp; 3214 map->timestamp++; 3215 } else { 3216 pmap_remove (map->pmap, uaddr, tend); 3217 } 3218 /* 3219 * Otherwise, we have to do a logical mmap. 3220 */ 3221 } else { 3222 3223 vm_object_set_flag(srcobject, OBJ_OPT); 3224 vm_object_reference(srcobject); 3225 3226 pmap_remove (map->pmap, uaddr, tend); 3227 3228 vm_object_pmap_copy_1 (srcobject, oindex, oindex + osize); 3229 vm_map_lock_upgrade(map); 3230 3231 if (entry == &map->header) { 3232 map->first_free = &map->header; 3233 } else if (map->first_free->start >= start) { 3234 map->first_free = entry->prev; 3235 } 3236 3237 SAVE_HINT(map, entry->prev); 3238 vm_map_entry_delete(map, entry); 3239 3240 object = srcobject; 3241 ooffset = cp; 3242 3243 rv = vm_map_insert(map, object, ooffset, start, tend, 3244 VM_PROT_ALL, VM_PROT_ALL, MAP_COPY_ON_WRITE); 3245 3246 if (rv != KERN_SUCCESS) 3247 panic("vm_uiomove: could not insert new entry: %d", rv); 3248 } 3249 3250 /* 3251 * Map the window directly, if it is already in memory 3252 */ 3253 pmap_object_init_pt(map->pmap, uaddr, 3254 srcobject, oindex, tcnt, 0); 3255 3256 map->timestamp++; 3257 vm_map_unlock(map); 3258 3259 cnt -= tcnt; 3260 uaddr += tcnt; 3261 cp += tcnt; 3262 if (npages) 3263 *npages += osize; 3264 } 3265 return 0; 3266 } 3267 3268 /* 3269 * Performs the copy_on_write operations necessary to allow the virtual copies 3270 * into user space to work. This has to be called for write(2) system calls 3271 * from other processes, file unlinking, and file size shrinkage. 3272 */ 3273 void 3274 vm_freeze_copyopts(object, froma, toa) 3275 vm_object_t object; 3276 vm_pindex_t froma, toa; 3277 { 3278 int rv; 3279 vm_object_t robject; 3280 vm_pindex_t idx; 3281 3282 if ((object == NULL) || 3283 ((object->flags & OBJ_OPT) == 0)) 3284 return; 3285 3286 if (object->shadow_count > object->ref_count) 3287 panic("vm_freeze_copyopts: sc > rc"); 3288 3289 while((robject = LIST_FIRST(&object->shadow_head)) != NULL) { 3290 vm_pindex_t bo_pindex; 3291 vm_page_t m_in, m_out; 3292 3293 bo_pindex = OFF_TO_IDX(robject->backing_object_offset); 3294 3295 vm_object_reference(robject); 3296 3297 vm_object_pip_wait(robject, "objfrz"); 3298 3299 if (robject->ref_count == 1) { 3300 vm_object_deallocate(robject); 3301 continue; 3302 } 3303 3304 vm_object_pip_add(robject, 1); 3305 3306 for (idx = 0; idx < robject->size; idx++) { 3307 3308 m_out = vm_page_grab(robject, idx, 3309 VM_ALLOC_NORMAL | VM_ALLOC_RETRY); 3310 3311 if (m_out->valid == 0) { 3312 m_in = vm_page_grab(object, bo_pindex + idx, 3313 VM_ALLOC_NORMAL | VM_ALLOC_RETRY); 3314 if (m_in->valid == 0) { 3315 rv = vm_pager_get_pages(object, &m_in, 1, 0); 3316 if (rv != VM_PAGER_OK) { 3317 printf("vm_freeze_copyopts: cannot read page from file: %lx\n", (long)m_in->pindex); 3318 continue; 3319 } 3320 vm_page_deactivate(m_in); 3321 } 3322 3323 vm_page_protect(m_in, VM_PROT_NONE); 3324 pmap_copy_page(VM_PAGE_TO_PHYS(m_in), VM_PAGE_TO_PHYS(m_out)); 3325 m_out->valid = m_in->valid; 3326 vm_page_dirty(m_out); 3327 vm_page_activate(m_out); 3328 vm_page_wakeup(m_in); 3329 } 3330 vm_page_wakeup(m_out); 3331 } 3332 3333 object->shadow_count--; 3334 object->ref_count--; 3335 LIST_REMOVE(robject, shadow_list); 3336 robject->backing_object = NULL; 3337 robject->backing_object_offset = 0; 3338 3339 vm_object_pip_wakeup(robject); 3340 vm_object_deallocate(robject); 3341 } 3342 3343 vm_object_clear_flag(object, OBJ_OPT); 3344 } 3345 3346 #include "opt_ddb.h" 3347 #ifdef DDB 3348 #include <sys/kernel.h> 3349 3350 #include <ddb/ddb.h> 3351 3352 /* 3353 * vm_map_print: [ debug ] 3354 */ 3355 DB_SHOW_COMMAND(map, vm_map_print) 3356 { 3357 static int nlines; 3358 /* XXX convert args. */ 3359 vm_map_t map = (vm_map_t)addr; 3360 boolean_t full = have_addr; 3361 3362 vm_map_entry_t entry; 3363 3364 db_iprintf("Task map %p: pmap=%p, nentries=%d, version=%u\n", 3365 (void *)map, 3366 (void *)map->pmap, map->nentries, map->timestamp); 3367 nlines++; 3368 3369 if (!full && db_indent) 3370 return; 3371 3372 db_indent += 2; 3373 for (entry = map->header.next; entry != &map->header; 3374 entry = entry->next) { 3375 db_iprintf("map entry %p: start=%p, end=%p\n", 3376 (void *)entry, (void *)entry->start, (void *)entry->end); 3377 nlines++; 3378 { 3379 static char *inheritance_name[4] = 3380 {"share", "copy", "none", "donate_copy"}; 3381 3382 db_iprintf(" prot=%x/%x/%s", 3383 entry->protection, 3384 entry->max_protection, 3385 inheritance_name[(int)(unsigned char)entry->inheritance]); 3386 if (entry->wired_count != 0) 3387 db_printf(", wired"); 3388 } 3389 if (entry->eflags & MAP_ENTRY_IS_SUB_MAP) { 3390 /* XXX no %qd in kernel. Truncate entry->offset. */ 3391 db_printf(", share=%p, offset=0x%lx\n", 3392 (void *)entry->object.sub_map, 3393 (long)entry->offset); 3394 nlines++; 3395 if ((entry->prev == &map->header) || 3396 (entry->prev->object.sub_map != 3397 entry->object.sub_map)) { 3398 db_indent += 2; 3399 vm_map_print((db_expr_t)(intptr_t) 3400 entry->object.sub_map, 3401 full, 0, (char *)0); 3402 db_indent -= 2; 3403 } 3404 } else { 3405 /* XXX no %qd in kernel. Truncate entry->offset. */ 3406 db_printf(", object=%p, offset=0x%lx", 3407 (void *)entry->object.vm_object, 3408 (long)entry->offset); 3409 if (entry->eflags & MAP_ENTRY_COW) 3410 db_printf(", copy (%s)", 3411 (entry->eflags & MAP_ENTRY_NEEDS_COPY) ? "needed" : "done"); 3412 db_printf("\n"); 3413 nlines++; 3414 3415 if ((entry->prev == &map->header) || 3416 (entry->prev->object.vm_object != 3417 entry->object.vm_object)) { 3418 db_indent += 2; 3419 vm_object_print((db_expr_t)(intptr_t) 3420 entry->object.vm_object, 3421 full, 0, (char *)0); 3422 nlines += 4; 3423 db_indent -= 2; 3424 } 3425 } 3426 } 3427 db_indent -= 2; 3428 if (db_indent == 0) 3429 nlines = 0; 3430 } 3431 3432 3433 DB_SHOW_COMMAND(procvm, procvm) 3434 { 3435 struct proc *p; 3436 3437 if (have_addr) { 3438 p = (struct proc *) addr; 3439 } else { 3440 p = curproc; 3441 } 3442 3443 db_printf("p = %p, vmspace = %p, map = %p, pmap = %p\n", 3444 (void *)p, (void *)p->p_vmspace, (void *)&p->p_vmspace->vm_map, 3445 (void *)vmspace_pmap(p->p_vmspace)); 3446 3447 vm_map_print((db_expr_t)(intptr_t)&p->p_vmspace->vm_map, 1, 0, NULL); 3448 } 3449 3450 #endif /* DDB */ 3451