1 /* $OpenBSD: uvm_map.c,v 1.328 2024/04/02 08:39:17 deraadt Exp $ */ 2 /* $NetBSD: uvm_map.c,v 1.86 2000/11/27 08:40:03 chs Exp $ */ 3 4 /* 5 * Copyright (c) 2011 Ariane van der Steldt <ariane@openbsd.org> 6 * 7 * Permission to use, copy, modify, and distribute this software for any 8 * purpose with or without fee is hereby granted, provided that the above 9 * copyright notice and this permission notice appear in all copies. 10 * 11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 18 * 19 * 20 * Copyright (c) 1997 Charles D. Cranor and Washington University. 21 * Copyright (c) 1991, 1993, The Regents of the University of California. 22 * 23 * All rights reserved. 24 * 25 * This code is derived from software contributed to Berkeley by 26 * The Mach Operating System project at Carnegie-Mellon University. 27 * 28 * Redistribution and use in source and binary forms, with or without 29 * modification, are permitted provided that the following conditions 30 * are met: 31 * 1. Redistributions of source code must retain the above copyright 32 * notice, this list of conditions and the following disclaimer. 33 * 2. Redistributions in binary form must reproduce the above copyright 34 * notice, this list of conditions and the following disclaimer in the 35 * documentation and/or other materials provided with the distribution. 36 * 3. Neither the name of the University nor the names of its contributors 37 * may be used to endorse or promote products derived from this software 38 * without specific prior written permission. 39 * 40 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 41 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 42 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 43 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 44 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 45 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 46 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 47 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 48 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 49 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 50 * SUCH DAMAGE. 51 * 52 * @(#)vm_map.c 8.3 (Berkeley) 1/12/94 53 * from: Id: uvm_map.c,v 1.1.2.27 1998/02/07 01:16:54 chs Exp 54 * 55 * 56 * Copyright (c) 1987, 1990 Carnegie-Mellon University. 57 * All rights reserved. 58 * 59 * Permission to use, copy, modify and distribute this software and 60 * its documentation is hereby granted, provided that both the copyright 61 * notice and this permission notice appear in all copies of the 62 * software, derivative works or modified versions, and any portions 63 * thereof, and that both notices appear in supporting documentation. 64 * 65 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 66 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 67 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 68 * 69 * Carnegie Mellon requests users of this software to return to 70 * 71 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 72 * School of Computer Science 73 * Carnegie Mellon University 74 * Pittsburgh PA 15213-3890 75 * 76 * any improvements or extensions that they make and grant Carnegie the 77 * rights to redistribute these changes. 78 */ 79 80 /* 81 * uvm_map.c: uvm map operations 82 */ 83 84 /* #define DEBUG */ 85 /* #define VMMAP_DEBUG */ 86 87 #include <sys/param.h> 88 #include <sys/systm.h> 89 #include <sys/acct.h> 90 #include <sys/mman.h> 91 #include <sys/proc.h> 92 #include <sys/malloc.h> 93 #include <sys/pool.h> 94 #include <sys/sysctl.h> 95 #include <sys/signalvar.h> 96 #include <sys/syslog.h> 97 #include <sys/user.h> 98 #include <sys/tracepoint.h> 99 100 #ifdef SYSVSHM 101 #include <sys/shm.h> 102 #endif 103 104 #include <uvm/uvm.h> 105 106 #ifdef DDB 107 #include <uvm/uvm_ddb.h> 108 #endif 109 110 #include <uvm/uvm_addr.h> 111 112 113 vsize_t uvmspace_dused(struct vm_map*, vaddr_t, vaddr_t); 114 int uvm_mapent_isjoinable(struct vm_map*, 115 struct vm_map_entry*, struct vm_map_entry*); 116 struct vm_map_entry *uvm_mapent_merge(struct vm_map*, struct vm_map_entry*, 117 struct vm_map_entry*, struct uvm_map_deadq*); 118 struct vm_map_entry *uvm_mapent_tryjoin(struct vm_map*, 119 struct vm_map_entry*, struct uvm_map_deadq*); 120 struct vm_map_entry *uvm_map_mkentry(struct vm_map*, struct vm_map_entry*, 121 struct vm_map_entry*, vaddr_t, vsize_t, int, 122 struct uvm_map_deadq*, struct vm_map_entry*); 123 struct vm_map_entry *uvm_mapent_alloc(struct vm_map*, int); 124 void uvm_mapent_free(struct vm_map_entry*); 125 void uvm_unmap_kill_entry(struct vm_map*, 126 struct vm_map_entry*); 127 void uvm_unmap_kill_entry_withlock(struct vm_map *, 128 struct vm_map_entry *, int); 129 void uvm_unmap_detach_intrsafe(struct uvm_map_deadq *); 130 void uvm_mapent_mkfree(struct vm_map*, 131 struct vm_map_entry*, struct vm_map_entry**, 132 struct uvm_map_deadq*, boolean_t); 133 void uvm_map_pageable_pgon(struct vm_map*, 134 struct vm_map_entry*, struct vm_map_entry*, 135 vaddr_t, vaddr_t); 136 int uvm_map_pageable_wire(struct vm_map*, 137 struct vm_map_entry*, struct vm_map_entry*, 138 vaddr_t, vaddr_t, int); 139 void uvm_map_setup_entries(struct vm_map*); 140 void uvm_map_setup_md(struct vm_map*); 141 void uvm_map_teardown(struct vm_map*); 142 void uvm_map_vmspace_update(struct vm_map*, 143 struct uvm_map_deadq*, int); 144 void uvm_map_kmem_grow(struct vm_map*, 145 struct uvm_map_deadq*, vsize_t, int); 146 void uvm_map_freelist_update_clear(struct vm_map*, 147 struct uvm_map_deadq*); 148 void uvm_map_freelist_update_refill(struct vm_map *, int); 149 void uvm_map_freelist_update(struct vm_map*, 150 struct uvm_map_deadq*, vaddr_t, vaddr_t, 151 vaddr_t, vaddr_t, int); 152 struct vm_map_entry *uvm_map_fix_space(struct vm_map*, struct vm_map_entry*, 153 vaddr_t, vaddr_t, int); 154 int uvm_map_findspace(struct vm_map*, 155 struct vm_map_entry**, struct vm_map_entry**, 156 vaddr_t*, vsize_t, vaddr_t, vaddr_t, vm_prot_t, 157 vaddr_t); 158 vsize_t uvm_map_addr_augment_get(struct vm_map_entry*); 159 void uvm_map_addr_augment(struct vm_map_entry*); 160 161 int uvm_map_inentry_recheck(u_long, vaddr_t, 162 struct p_inentry *); 163 boolean_t uvm_map_inentry_fix(struct proc *, struct p_inentry *, 164 vaddr_t, int (*)(vm_map_entry_t), u_long); 165 /* 166 * Tree management functions. 167 */ 168 169 static inline void uvm_mapent_copy(struct vm_map_entry*, 170 struct vm_map_entry*); 171 static inline int uvm_mapentry_addrcmp(const struct vm_map_entry*, 172 const struct vm_map_entry*); 173 void uvm_mapent_free_insert(struct vm_map*, 174 struct uvm_addr_state*, struct vm_map_entry*); 175 void uvm_mapent_free_remove(struct vm_map*, 176 struct uvm_addr_state*, struct vm_map_entry*); 177 void uvm_mapent_addr_insert(struct vm_map*, 178 struct vm_map_entry*); 179 void uvm_mapent_addr_remove(struct vm_map*, 180 struct vm_map_entry*); 181 void uvm_map_splitentry(struct vm_map*, 182 struct vm_map_entry*, struct vm_map_entry*, 183 vaddr_t); 184 vsize_t uvm_map_boundary(struct vm_map*, vaddr_t, vaddr_t); 185 186 /* 187 * uvm_vmspace_fork helper functions. 188 */ 189 struct vm_map_entry *uvm_mapent_clone(struct vm_map*, vaddr_t, vsize_t, 190 vsize_t, vm_prot_t, vm_prot_t, 191 struct vm_map_entry*, struct uvm_map_deadq*, int, 192 int); 193 struct vm_map_entry *uvm_mapent_share(struct vm_map*, vaddr_t, vsize_t, 194 vsize_t, vm_prot_t, vm_prot_t, struct vm_map*, 195 struct vm_map_entry*, struct uvm_map_deadq*); 196 struct vm_map_entry *uvm_mapent_forkshared(struct vmspace*, struct vm_map*, 197 struct vm_map*, struct vm_map_entry*, 198 struct uvm_map_deadq*); 199 struct vm_map_entry *uvm_mapent_forkcopy(struct vmspace*, struct vm_map*, 200 struct vm_map*, struct vm_map_entry*, 201 struct uvm_map_deadq*); 202 struct vm_map_entry *uvm_mapent_forkzero(struct vmspace*, struct vm_map*, 203 struct vm_map*, struct vm_map_entry*, 204 struct uvm_map_deadq*); 205 206 /* 207 * Tree validation. 208 */ 209 #ifdef VMMAP_DEBUG 210 void uvm_tree_assert(struct vm_map*, int, char*, 211 char*, int); 212 #define UVM_ASSERT(map, cond, file, line) \ 213 uvm_tree_assert((map), (cond), #cond, (file), (line)) 214 void uvm_tree_sanity(struct vm_map*, char*, int); 215 void uvm_tree_size_chk(struct vm_map*, char*, int); 216 void vmspace_validate(struct vm_map*); 217 #else 218 #define uvm_tree_sanity(_map, _file, _line) do {} while (0) 219 #define uvm_tree_size_chk(_map, _file, _line) do {} while (0) 220 #define vmspace_validate(_map) do {} while (0) 221 #endif 222 223 /* 224 * The kernel map will initially be VM_MAP_KSIZE_INIT bytes. 225 * Every time that gets cramped, we grow by at least VM_MAP_KSIZE_DELTA bytes. 226 * 227 * We attempt to grow by UVM_MAP_KSIZE_ALLOCMUL times the allocation size 228 * each time. 229 */ 230 #define VM_MAP_KSIZE_INIT (512 * (vaddr_t)PAGE_SIZE) 231 #define VM_MAP_KSIZE_DELTA (256 * (vaddr_t)PAGE_SIZE) 232 #define VM_MAP_KSIZE_ALLOCMUL 4 233 234 /* auto-allocate address lower bound */ 235 #define VMMAP_MIN_ADDR PAGE_SIZE 236 237 238 #ifdef DEADBEEF0 239 #define UVMMAP_DEADBEEF ((unsigned long)DEADBEEF0) 240 #else 241 #define UVMMAP_DEADBEEF ((unsigned long)0xdeadd0d0) 242 #endif 243 244 #ifdef DEBUG 245 int uvm_map_printlocks = 0; 246 247 #define LPRINTF(_args) \ 248 do { \ 249 if (uvm_map_printlocks) \ 250 printf _args; \ 251 } while (0) 252 #else 253 #define LPRINTF(_args) do {} while (0) 254 #endif 255 256 static struct mutex uvm_kmapent_mtx; 257 static struct timeval uvm_kmapent_last_warn_time; 258 static struct timeval uvm_kmapent_warn_rate = { 10, 0 }; 259 260 const char vmmapbsy[] = "vmmapbsy"; 261 262 /* 263 * pool for vmspace structures. 264 */ 265 struct pool uvm_vmspace_pool; 266 267 /* 268 * pool for dynamically-allocated map entries. 269 */ 270 struct pool uvm_map_entry_pool; 271 struct pool uvm_map_entry_kmem_pool; 272 273 /* 274 * This global represents the end of the kernel virtual address 275 * space. If we want to exceed this, we must grow the kernel 276 * virtual address space dynamically. 277 * 278 * Note, this variable is locked by kernel_map's lock. 279 */ 280 vaddr_t uvm_maxkaddr; 281 282 /* 283 * Locking predicate. 284 */ 285 #define UVM_MAP_REQ_WRITE(_map) \ 286 do { \ 287 if ((_map)->ref_count > 0) { \ 288 if (((_map)->flags & VM_MAP_INTRSAFE) == 0) \ 289 rw_assert_wrlock(&(_map)->lock); \ 290 else \ 291 MUTEX_ASSERT_LOCKED(&(_map)->mtx); \ 292 } \ 293 } while (0) 294 295 #define vm_map_modflags(map, set, clear) \ 296 do { \ 297 mtx_enter(&(map)->flags_lock); \ 298 (map)->flags = ((map)->flags | (set)) & ~(clear); \ 299 mtx_leave(&(map)->flags_lock); \ 300 } while (0) 301 302 303 /* 304 * Tree describing entries by address. 305 * 306 * Addresses are unique. 307 * Entries with start == end may only exist if they are the first entry 308 * (sorted by address) within a free-memory tree. 309 */ 310 311 static inline int 312 uvm_mapentry_addrcmp(const struct vm_map_entry *e1, 313 const struct vm_map_entry *e2) 314 { 315 return e1->start < e2->start ? -1 : e1->start > e2->start; 316 } 317 318 /* 319 * Copy mapentry. 320 */ 321 static inline void 322 uvm_mapent_copy(struct vm_map_entry *src, struct vm_map_entry *dst) 323 { 324 caddr_t csrc, cdst; 325 size_t sz; 326 327 csrc = (caddr_t)src; 328 cdst = (caddr_t)dst; 329 csrc += offsetof(struct vm_map_entry, uvm_map_entry_start_copy); 330 cdst += offsetof(struct vm_map_entry, uvm_map_entry_start_copy); 331 332 sz = offsetof(struct vm_map_entry, uvm_map_entry_stop_copy) - 333 offsetof(struct vm_map_entry, uvm_map_entry_start_copy); 334 memcpy(cdst, csrc, sz); 335 } 336 337 /* 338 * Handle free-list insertion. 339 */ 340 void 341 uvm_mapent_free_insert(struct vm_map *map, struct uvm_addr_state *uaddr, 342 struct vm_map_entry *entry) 343 { 344 const struct uvm_addr_functions *fun; 345 #ifdef VMMAP_DEBUG 346 vaddr_t min, max, bound; 347 #endif 348 349 #ifdef VMMAP_DEBUG 350 /* 351 * Boundary check. 352 * Boundaries are folded if they go on the same free list. 353 */ 354 min = VMMAP_FREE_START(entry); 355 max = VMMAP_FREE_END(entry); 356 357 while (min < max) { 358 bound = uvm_map_boundary(map, min, max); 359 KASSERT(uvm_map_uaddr(map, min) == uaddr); 360 min = bound; 361 } 362 #endif 363 KDASSERT((entry->fspace & (vaddr_t)PAGE_MASK) == 0); 364 KASSERT((entry->etype & UVM_ET_FREEMAPPED) == 0); 365 366 UVM_MAP_REQ_WRITE(map); 367 368 /* Actual insert: forward to uaddr pointer. */ 369 if (uaddr != NULL) { 370 fun = uaddr->uaddr_functions; 371 KDASSERT(fun != NULL); 372 if (fun->uaddr_free_insert != NULL) 373 (*fun->uaddr_free_insert)(map, uaddr, entry); 374 entry->etype |= UVM_ET_FREEMAPPED; 375 } 376 377 /* Update fspace augmentation. */ 378 uvm_map_addr_augment(entry); 379 } 380 381 /* 382 * Handle free-list removal. 383 */ 384 void 385 uvm_mapent_free_remove(struct vm_map *map, struct uvm_addr_state *uaddr, 386 struct vm_map_entry *entry) 387 { 388 const struct uvm_addr_functions *fun; 389 390 KASSERT((entry->etype & UVM_ET_FREEMAPPED) != 0 || uaddr == NULL); 391 KASSERT(uvm_map_uaddr_e(map, entry) == uaddr); 392 UVM_MAP_REQ_WRITE(map); 393 394 if (uaddr != NULL) { 395 fun = uaddr->uaddr_functions; 396 if (fun->uaddr_free_remove != NULL) 397 (*fun->uaddr_free_remove)(map, uaddr, entry); 398 entry->etype &= ~UVM_ET_FREEMAPPED; 399 } 400 } 401 402 /* 403 * Handle address tree insertion. 404 */ 405 void 406 uvm_mapent_addr_insert(struct vm_map *map, struct vm_map_entry *entry) 407 { 408 struct vm_map_entry *res; 409 410 if (!RBT_CHECK(uvm_map_addr, entry, UVMMAP_DEADBEEF)) 411 panic("uvm_mapent_addr_insert: entry still in addr list"); 412 KDASSERT(entry->start <= entry->end); 413 KDASSERT((entry->start & (vaddr_t)PAGE_MASK) == 0 && 414 (entry->end & (vaddr_t)PAGE_MASK) == 0); 415 416 TRACEPOINT(uvm, map_insert, 417 entry->start, entry->end, entry->protection, NULL); 418 419 UVM_MAP_REQ_WRITE(map); 420 res = RBT_INSERT(uvm_map_addr, &map->addr, entry); 421 if (res != NULL) { 422 panic("uvm_mapent_addr_insert: map %p entry %p " 423 "(0x%lx-0x%lx G=0x%lx F=0x%lx) insert collision " 424 "with entry %p (0x%lx-0x%lx G=0x%lx F=0x%lx)", 425 map, entry, 426 entry->start, entry->end, entry->guard, entry->fspace, 427 res, res->start, res->end, res->guard, res->fspace); 428 } 429 } 430 431 /* 432 * Handle address tree removal. 433 */ 434 void 435 uvm_mapent_addr_remove(struct vm_map *map, struct vm_map_entry *entry) 436 { 437 struct vm_map_entry *res; 438 439 TRACEPOINT(uvm, map_remove, 440 entry->start, entry->end, entry->protection, NULL); 441 442 UVM_MAP_REQ_WRITE(map); 443 res = RBT_REMOVE(uvm_map_addr, &map->addr, entry); 444 if (res != entry) 445 panic("uvm_mapent_addr_remove"); 446 RBT_POISON(uvm_map_addr, entry, UVMMAP_DEADBEEF); 447 } 448 449 /* 450 * uvm_map_reference: add reference to a map 451 * 452 * => map need not be locked 453 */ 454 void 455 uvm_map_reference(struct vm_map *map) 456 { 457 atomic_inc_int(&map->ref_count); 458 } 459 460 void 461 uvm_map_lock_entry(struct vm_map_entry *entry) 462 { 463 if (entry->aref.ar_amap != NULL) { 464 amap_lock(entry->aref.ar_amap); 465 } 466 if (UVM_ET_ISOBJ(entry)) { 467 rw_enter(entry->object.uvm_obj->vmobjlock, RW_WRITE); 468 } 469 } 470 471 void 472 uvm_map_unlock_entry(struct vm_map_entry *entry) 473 { 474 if (UVM_ET_ISOBJ(entry)) { 475 rw_exit(entry->object.uvm_obj->vmobjlock); 476 } 477 if (entry->aref.ar_amap != NULL) { 478 amap_unlock(entry->aref.ar_amap); 479 } 480 } 481 482 /* 483 * Calculate the dused delta. 484 */ 485 vsize_t 486 uvmspace_dused(struct vm_map *map, vaddr_t min, vaddr_t max) 487 { 488 struct vmspace *vm; 489 vsize_t sz; 490 vaddr_t lmax; 491 vaddr_t stack_begin, stack_end; /* Position of stack. */ 492 493 KASSERT(map->flags & VM_MAP_ISVMSPACE); 494 vm_map_assert_anylock(map); 495 496 vm = (struct vmspace *)map; 497 stack_begin = MIN((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr); 498 stack_end = MAX((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr); 499 500 sz = 0; 501 while (min != max) { 502 lmax = max; 503 if (min < stack_begin && lmax > stack_begin) 504 lmax = stack_begin; 505 else if (min < stack_end && lmax > stack_end) 506 lmax = stack_end; 507 508 if (min >= stack_begin && min < stack_end) { 509 /* nothing */ 510 } else 511 sz += lmax - min; 512 min = lmax; 513 } 514 515 return sz >> PAGE_SHIFT; 516 } 517 518 /* 519 * Find the entry describing the given address. 520 */ 521 struct vm_map_entry* 522 uvm_map_entrybyaddr(struct uvm_map_addr *atree, vaddr_t addr) 523 { 524 struct vm_map_entry *iter; 525 526 iter = RBT_ROOT(uvm_map_addr, atree); 527 while (iter != NULL) { 528 if (iter->start > addr) 529 iter = RBT_LEFT(uvm_map_addr, iter); 530 else if (VMMAP_FREE_END(iter) <= addr) 531 iter = RBT_RIGHT(uvm_map_addr, iter); 532 else 533 return iter; 534 } 535 return NULL; 536 } 537 538 /* 539 * DEAD_ENTRY_PUSH(struct vm_map_deadq *deadq, struct vm_map_entry *entry) 540 * 541 * Push dead entries into a linked list. 542 * Since the linked list abuses the address tree for storage, the entry 543 * may not be linked in a map. 544 * 545 * *head must be initialized to NULL before the first call to this macro. 546 * uvm_unmap_detach(*head, 0) will remove dead entries. 547 */ 548 static inline void 549 dead_entry_push(struct uvm_map_deadq *deadq, struct vm_map_entry *entry) 550 { 551 TAILQ_INSERT_TAIL(deadq, entry, dfree.deadq); 552 } 553 #define DEAD_ENTRY_PUSH(_headptr, _entry) \ 554 dead_entry_push((_headptr), (_entry)) 555 556 /* 557 * Test if memory starting at addr with sz bytes is free. 558 * 559 * Fills in *start_ptr and *end_ptr to be the first and last entry describing 560 * the space. 561 * If called with prefilled *start_ptr and *end_ptr, they are to be correct. 562 */ 563 int 564 uvm_map_isavail(struct vm_map *map, struct uvm_addr_state *uaddr, 565 struct vm_map_entry **start_ptr, struct vm_map_entry **end_ptr, 566 vaddr_t addr, vsize_t sz) 567 { 568 struct uvm_addr_state *free; 569 struct uvm_map_addr *atree; 570 struct vm_map_entry *i, *i_end; 571 572 if (addr + sz < addr) 573 return 0; 574 575 vm_map_assert_anylock(map); 576 577 /* 578 * Kernel memory above uvm_maxkaddr is considered unavailable. 579 */ 580 if ((map->flags & VM_MAP_ISVMSPACE) == 0) { 581 if (addr + sz > uvm_maxkaddr) 582 return 0; 583 } 584 585 atree = &map->addr; 586 587 /* 588 * Fill in first, last, so they point at the entries containing the 589 * first and last address of the range. 590 * Note that if they are not NULL, we don't perform the lookup. 591 */ 592 KDASSERT(atree != NULL && start_ptr != NULL && end_ptr != NULL); 593 if (*start_ptr == NULL) { 594 *start_ptr = uvm_map_entrybyaddr(atree, addr); 595 if (*start_ptr == NULL) 596 return 0; 597 } else 598 KASSERT(*start_ptr == uvm_map_entrybyaddr(atree, addr)); 599 if (*end_ptr == NULL) { 600 if (VMMAP_FREE_END(*start_ptr) >= addr + sz) 601 *end_ptr = *start_ptr; 602 else { 603 *end_ptr = uvm_map_entrybyaddr(atree, addr + sz - 1); 604 if (*end_ptr == NULL) 605 return 0; 606 } 607 } else 608 KASSERT(*end_ptr == uvm_map_entrybyaddr(atree, addr + sz - 1)); 609 610 /* Validation. */ 611 KDASSERT(*start_ptr != NULL && *end_ptr != NULL); 612 KDASSERT((*start_ptr)->start <= addr && 613 VMMAP_FREE_END(*start_ptr) > addr && 614 (*end_ptr)->start < addr + sz && 615 VMMAP_FREE_END(*end_ptr) >= addr + sz); 616 617 /* 618 * Check the none of the entries intersects with <addr, addr+sz>. 619 * Also, if the entry belong to uaddr_exe or uaddr_brk_stack, it is 620 * considered unavailable unless called by those allocators. 621 */ 622 i = *start_ptr; 623 i_end = RBT_NEXT(uvm_map_addr, *end_ptr); 624 for (; i != i_end; 625 i = RBT_NEXT(uvm_map_addr, i)) { 626 if (i->start != i->end && i->end > addr) 627 return 0; 628 629 /* 630 * uaddr_exe and uaddr_brk_stack may only be used 631 * by these allocators and the NULL uaddr (i.e. no 632 * uaddr). 633 * Reject if this requirement is not met. 634 */ 635 if (uaddr != NULL) { 636 free = uvm_map_uaddr_e(map, i); 637 638 if (uaddr != free && free != NULL && 639 (free == map->uaddr_exe || 640 free == map->uaddr_brk_stack)) 641 return 0; 642 } 643 } 644 645 return -1; 646 } 647 648 /* 649 * Invoke each address selector until an address is found. 650 * Will not invoke uaddr_exe. 651 */ 652 int 653 uvm_map_findspace(struct vm_map *map, struct vm_map_entry**first, 654 struct vm_map_entry**last, vaddr_t *addr, vsize_t sz, 655 vaddr_t pmap_align, vaddr_t pmap_offset, vm_prot_t prot, vaddr_t hint) 656 { 657 struct uvm_addr_state *uaddr; 658 int i; 659 660 /* 661 * Allocation for sz bytes at any address, 662 * using the addr selectors in order. 663 */ 664 for (i = 0; i < nitems(map->uaddr_any); i++) { 665 uaddr = map->uaddr_any[i]; 666 667 if (uvm_addr_invoke(map, uaddr, first, last, 668 addr, sz, pmap_align, pmap_offset, prot, hint) == 0) 669 return 0; 670 } 671 672 /* Fall back to brk() and stack() address selectors. */ 673 uaddr = map->uaddr_brk_stack; 674 if (uvm_addr_invoke(map, uaddr, first, last, 675 addr, sz, pmap_align, pmap_offset, prot, hint) == 0) 676 return 0; 677 678 return ENOMEM; 679 } 680 681 /* Calculate entry augmentation value. */ 682 vsize_t 683 uvm_map_addr_augment_get(struct vm_map_entry *entry) 684 { 685 vsize_t augment; 686 struct vm_map_entry *left, *right; 687 688 augment = entry->fspace; 689 if ((left = RBT_LEFT(uvm_map_addr, entry)) != NULL) 690 augment = MAX(augment, left->fspace_augment); 691 if ((right = RBT_RIGHT(uvm_map_addr, entry)) != NULL) 692 augment = MAX(augment, right->fspace_augment); 693 return augment; 694 } 695 696 /* 697 * Update augmentation data in entry. 698 */ 699 void 700 uvm_map_addr_augment(struct vm_map_entry *entry) 701 { 702 vsize_t augment; 703 704 while (entry != NULL) { 705 /* Calculate value for augmentation. */ 706 augment = uvm_map_addr_augment_get(entry); 707 708 /* 709 * Descend update. 710 * Once we find an entry that already has the correct value, 711 * stop, since it means all its parents will use the correct 712 * value too. 713 */ 714 if (entry->fspace_augment == augment) 715 return; 716 entry->fspace_augment = augment; 717 entry = RBT_PARENT(uvm_map_addr, entry); 718 } 719 } 720 721 /* 722 * uvm_mapanon: establish a valid mapping in map for an anon 723 * 724 * => *addr and sz must be a multiple of PAGE_SIZE. 725 * => *addr is ignored, except if flags contains UVM_FLAG_FIXED. 726 * => map must be unlocked. 727 * 728 * => align: align vaddr, must be a power-of-2. 729 * Align is only a hint and will be ignored if the alignment fails. 730 */ 731 int 732 uvm_mapanon(struct vm_map *map, vaddr_t *addr, vsize_t sz, 733 vsize_t align, unsigned int flags) 734 { 735 struct vm_map_entry *first, *last, *entry, *new; 736 struct uvm_map_deadq dead; 737 vm_prot_t prot; 738 vm_prot_t maxprot; 739 vm_inherit_t inherit; 740 int advice; 741 int error; 742 vaddr_t pmap_align, pmap_offset; 743 vaddr_t hint; 744 745 KASSERT((map->flags & VM_MAP_ISVMSPACE) == VM_MAP_ISVMSPACE); 746 KASSERT(map != kernel_map); 747 KASSERT((map->flags & UVM_FLAG_HOLE) == 0); 748 KASSERT((map->flags & VM_MAP_INTRSAFE) == 0); 749 splassert(IPL_NONE); 750 KASSERT((flags & UVM_FLAG_TRYLOCK) == 0); 751 752 /* 753 * We use pmap_align and pmap_offset as alignment and offset variables. 754 * 755 * Because the align parameter takes precedence over pmap prefer, 756 * the pmap_align will need to be set to align, with pmap_offset = 0, 757 * if pmap_prefer will not align. 758 */ 759 pmap_align = MAX(align, PAGE_SIZE); 760 pmap_offset = 0; 761 762 /* Decode parameters. */ 763 prot = UVM_PROTECTION(flags); 764 maxprot = UVM_MAXPROTECTION(flags); 765 advice = UVM_ADVICE(flags); 766 inherit = UVM_INHERIT(flags); 767 error = 0; 768 hint = trunc_page(*addr); 769 TAILQ_INIT(&dead); 770 KASSERT((sz & (vaddr_t)PAGE_MASK) == 0); 771 KASSERT((align & (align - 1)) == 0); 772 773 /* Check protection. */ 774 if ((prot & maxprot) != prot) 775 return EACCES; 776 777 /* 778 * Before grabbing the lock, allocate a map entry for later 779 * use to ensure we don't wait for memory while holding the 780 * vm_map_lock. 781 */ 782 new = uvm_mapent_alloc(map, flags); 783 if (new == NULL) 784 return ENOMEM; 785 786 vm_map_lock(map); 787 first = last = NULL; 788 if (flags & UVM_FLAG_FIXED) { 789 /* 790 * Fixed location. 791 * 792 * Note: we ignore align, pmap_prefer. 793 * Fill in first, last and *addr. 794 */ 795 KASSERT((*addr & PAGE_MASK) == 0); 796 797 /* Check that the space is available. */ 798 if (flags & UVM_FLAG_UNMAP) { 799 if ((flags & UVM_FLAG_STACK) && 800 !uvm_map_is_stack_remappable(map, *addr, sz, 801 (flags & UVM_FLAG_SIGALTSTACK))) { 802 error = EINVAL; 803 goto unlock; 804 } 805 if (uvm_unmap_remove(map, *addr, *addr + sz, &dead, 806 FALSE, TRUE, 807 (flags & UVM_FLAG_SIGALTSTACK) ? FALSE : TRUE) != 0) { 808 error = EPERM; /* immutable entries found */ 809 goto unlock; 810 } 811 } 812 if (!uvm_map_isavail(map, NULL, &first, &last, *addr, sz)) { 813 error = ENOMEM; 814 goto unlock; 815 } 816 } else if (*addr != 0 && (*addr & PAGE_MASK) == 0 && 817 (align == 0 || (*addr & (align - 1)) == 0) && 818 uvm_map_isavail(map, NULL, &first, &last, *addr, sz)) { 819 /* 820 * Address used as hint. 821 * 822 * Note: we enforce the alignment restriction, 823 * but ignore pmap_prefer. 824 */ 825 } else if ((prot & PROT_EXEC) != 0 && map->uaddr_exe != NULL) { 826 /* Run selection algorithm for executables. */ 827 error = uvm_addr_invoke(map, map->uaddr_exe, &first, &last, 828 addr, sz, pmap_align, pmap_offset, prot, hint); 829 830 if (error != 0) 831 goto unlock; 832 } else { 833 /* Update freelists from vmspace. */ 834 uvm_map_vmspace_update(map, &dead, flags); 835 836 error = uvm_map_findspace(map, &first, &last, addr, sz, 837 pmap_align, pmap_offset, prot, hint); 838 839 if (error != 0) 840 goto unlock; 841 } 842 843 /* Double-check if selected address doesn't cause overflow. */ 844 if (*addr + sz < *addr) { 845 error = ENOMEM; 846 goto unlock; 847 } 848 849 /* If we only want a query, return now. */ 850 if (flags & UVM_FLAG_QUERY) { 851 error = 0; 852 goto unlock; 853 } 854 855 /* 856 * Create new entry. 857 * first and last may be invalidated after this call. 858 */ 859 entry = uvm_map_mkentry(map, first, last, *addr, sz, flags, &dead, 860 new); 861 if (entry == NULL) { 862 error = ENOMEM; 863 goto unlock; 864 } 865 new = NULL; 866 KDASSERT(entry->start == *addr && entry->end == *addr + sz); 867 entry->object.uvm_obj = NULL; 868 entry->offset = 0; 869 entry->protection = prot; 870 entry->max_protection = maxprot; 871 entry->inheritance = inherit; 872 entry->wired_count = 0; 873 entry->advice = advice; 874 if (flags & UVM_FLAG_STACK) { 875 entry->etype |= UVM_ET_STACK; 876 if (flags & (UVM_FLAG_FIXED | UVM_FLAG_UNMAP)) 877 map->sserial++; 878 } 879 if (flags & UVM_FLAG_COPYONW) { 880 entry->etype |= UVM_ET_COPYONWRITE; 881 if ((flags & UVM_FLAG_OVERLAY) == 0) 882 entry->etype |= UVM_ET_NEEDSCOPY; 883 } 884 if (flags & UVM_FLAG_CONCEAL) 885 entry->etype |= UVM_ET_CONCEAL; 886 if (flags & UVM_FLAG_OVERLAY) { 887 entry->aref.ar_pageoff = 0; 888 entry->aref.ar_amap = amap_alloc(sz, M_WAITOK, 0); 889 } 890 891 /* Update map and process statistics. */ 892 map->size += sz; 893 if (prot != PROT_NONE) { 894 ((struct vmspace *)map)->vm_dused += 895 uvmspace_dused(map, *addr, *addr + sz); 896 } 897 898 unlock: 899 vm_map_unlock(map); 900 901 /* 902 * Remove dead entries. 903 * 904 * Dead entries may be the result of merging. 905 * uvm_map_mkentry may also create dead entries, when it attempts to 906 * destroy free-space entries. 907 */ 908 uvm_unmap_detach(&dead, 0); 909 910 if (new) 911 uvm_mapent_free(new); 912 return error; 913 } 914 915 /* 916 * uvm_map: establish a valid mapping in map 917 * 918 * => *addr and sz must be a multiple of PAGE_SIZE. 919 * => map must be unlocked. 920 * => <uobj,uoffset> value meanings (4 cases): 921 * [1] <NULL,uoffset> == uoffset is a hint for PMAP_PREFER 922 * [2] <NULL,UVM_UNKNOWN_OFFSET> == don't PMAP_PREFER 923 * [3] <uobj,uoffset> == normal mapping 924 * [4] <uobj,UVM_UNKNOWN_OFFSET> == uvm_map finds offset based on VA 925 * 926 * case [4] is for kernel mappings where we don't know the offset until 927 * we've found a virtual address. note that kernel object offsets are 928 * always relative to vm_map_min(kernel_map). 929 * 930 * => align: align vaddr, must be a power-of-2. 931 * Align is only a hint and will be ignored if the alignment fails. 932 */ 933 int 934 uvm_map(struct vm_map *map, vaddr_t *addr, vsize_t sz, 935 struct uvm_object *uobj, voff_t uoffset, 936 vsize_t align, unsigned int flags) 937 { 938 struct vm_map_entry *first, *last, *entry, *new; 939 struct uvm_map_deadq dead; 940 vm_prot_t prot; 941 vm_prot_t maxprot; 942 vm_inherit_t inherit; 943 int advice; 944 int error; 945 vaddr_t pmap_align, pmap_offset; 946 vaddr_t hint; 947 948 if ((map->flags & VM_MAP_INTRSAFE) == 0) 949 splassert(IPL_NONE); 950 else 951 splassert(IPL_VM); 952 953 /* 954 * We use pmap_align and pmap_offset as alignment and offset variables. 955 * 956 * Because the align parameter takes precedence over pmap prefer, 957 * the pmap_align will need to be set to align, with pmap_offset = 0, 958 * if pmap_prefer will not align. 959 */ 960 if (uoffset == UVM_UNKNOWN_OFFSET) { 961 pmap_align = MAX(align, PAGE_SIZE); 962 pmap_offset = 0; 963 } else { 964 pmap_align = MAX(PMAP_PREFER_ALIGN(), PAGE_SIZE); 965 pmap_offset = PMAP_PREFER_OFFSET(uoffset); 966 967 if (align == 0 || 968 (align <= pmap_align && (pmap_offset & (align - 1)) == 0)) { 969 /* pmap_offset satisfies align, no change. */ 970 } else { 971 /* Align takes precedence over pmap prefer. */ 972 pmap_align = align; 973 pmap_offset = 0; 974 } 975 } 976 977 /* Decode parameters. */ 978 prot = UVM_PROTECTION(flags); 979 maxprot = UVM_MAXPROTECTION(flags); 980 advice = UVM_ADVICE(flags); 981 inherit = UVM_INHERIT(flags); 982 error = 0; 983 hint = trunc_page(*addr); 984 TAILQ_INIT(&dead); 985 KASSERT((sz & (vaddr_t)PAGE_MASK) == 0); 986 KASSERT((align & (align - 1)) == 0); 987 988 /* Holes are incompatible with other types of mappings. */ 989 if (flags & UVM_FLAG_HOLE) { 990 KASSERT(uobj == NULL && (flags & UVM_FLAG_FIXED) && 991 (flags & (UVM_FLAG_OVERLAY | UVM_FLAG_COPYONW)) == 0); 992 } 993 994 /* Unset hint for kernel_map non-fixed allocations. */ 995 if (!(map->flags & VM_MAP_ISVMSPACE) && !(flags & UVM_FLAG_FIXED)) 996 hint = 0; 997 998 /* Check protection. */ 999 if ((prot & maxprot) != prot) 1000 return EACCES; 1001 1002 if (map == kernel_map && 1003 (prot & (PROT_WRITE | PROT_EXEC)) == (PROT_WRITE | PROT_EXEC)) 1004 panic("uvm_map: kernel map W^X violation requested"); 1005 1006 /* 1007 * Before grabbing the lock, allocate a map entry for later 1008 * use to ensure we don't wait for memory while holding the 1009 * vm_map_lock. 1010 */ 1011 new = uvm_mapent_alloc(map, flags); 1012 if (new == NULL) 1013 return ENOMEM; 1014 1015 if (flags & UVM_FLAG_TRYLOCK) { 1016 if (vm_map_lock_try(map) == FALSE) { 1017 error = EFAULT; 1018 goto out; 1019 } 1020 } else { 1021 vm_map_lock(map); 1022 } 1023 1024 first = last = NULL; 1025 if (flags & UVM_FLAG_FIXED) { 1026 /* 1027 * Fixed location. 1028 * 1029 * Note: we ignore align, pmap_prefer. 1030 * Fill in first, last and *addr. 1031 */ 1032 KASSERT((*addr & PAGE_MASK) == 0); 1033 1034 /* 1035 * Grow pmap to include allocated address. 1036 * If the growth fails, the allocation will fail too. 1037 */ 1038 if ((map->flags & VM_MAP_ISVMSPACE) == 0 && 1039 uvm_maxkaddr < (*addr + sz)) { 1040 uvm_map_kmem_grow(map, &dead, 1041 *addr + sz - uvm_maxkaddr, flags); 1042 } 1043 1044 /* Check that the space is available. */ 1045 if (flags & UVM_FLAG_UNMAP) { 1046 if (uvm_unmap_remove(map, *addr, *addr + sz, &dead, 1047 FALSE, TRUE, TRUE) != 0) { 1048 error = EPERM; /* immutable entries found */ 1049 goto unlock; 1050 } 1051 } 1052 if (!uvm_map_isavail(map, NULL, &first, &last, *addr, sz)) { 1053 error = ENOMEM; 1054 goto unlock; 1055 } 1056 } else if (*addr != 0 && (*addr & PAGE_MASK) == 0 && 1057 (map->flags & VM_MAP_ISVMSPACE) == VM_MAP_ISVMSPACE && 1058 (align == 0 || (*addr & (align - 1)) == 0) && 1059 uvm_map_isavail(map, NULL, &first, &last, *addr, sz)) { 1060 /* 1061 * Address used as hint. 1062 * 1063 * Note: we enforce the alignment restriction, 1064 * but ignore pmap_prefer. 1065 */ 1066 } else if ((prot & PROT_EXEC) != 0 && map->uaddr_exe != NULL) { 1067 /* Run selection algorithm for executables. */ 1068 error = uvm_addr_invoke(map, map->uaddr_exe, &first, &last, 1069 addr, sz, pmap_align, pmap_offset, prot, hint); 1070 1071 /* Grow kernel memory and try again. */ 1072 if (error != 0 && (map->flags & VM_MAP_ISVMSPACE) == 0) { 1073 uvm_map_kmem_grow(map, &dead, sz, flags); 1074 1075 error = uvm_addr_invoke(map, map->uaddr_exe, 1076 &first, &last, addr, sz, 1077 pmap_align, pmap_offset, prot, hint); 1078 } 1079 1080 if (error != 0) 1081 goto unlock; 1082 } else { 1083 /* Update freelists from vmspace. */ 1084 if (map->flags & VM_MAP_ISVMSPACE) 1085 uvm_map_vmspace_update(map, &dead, flags); 1086 1087 error = uvm_map_findspace(map, &first, &last, addr, sz, 1088 pmap_align, pmap_offset, prot, hint); 1089 1090 /* Grow kernel memory and try again. */ 1091 if (error != 0 && (map->flags & VM_MAP_ISVMSPACE) == 0) { 1092 uvm_map_kmem_grow(map, &dead, sz, flags); 1093 1094 error = uvm_map_findspace(map, &first, &last, addr, sz, 1095 pmap_align, pmap_offset, prot, hint); 1096 } 1097 1098 if (error != 0) 1099 goto unlock; 1100 } 1101 1102 /* Double-check if selected address doesn't cause overflow. */ 1103 if (*addr + sz < *addr) { 1104 error = ENOMEM; 1105 goto unlock; 1106 } 1107 1108 KASSERT((map->flags & VM_MAP_ISVMSPACE) == VM_MAP_ISVMSPACE || 1109 uvm_maxkaddr >= *addr + sz); 1110 1111 /* If we only want a query, return now. */ 1112 if (flags & UVM_FLAG_QUERY) { 1113 error = 0; 1114 goto unlock; 1115 } 1116 1117 if (uobj == NULL) 1118 uoffset = 0; 1119 else if (uoffset == UVM_UNKNOWN_OFFSET) { 1120 KASSERT(UVM_OBJ_IS_KERN_OBJECT(uobj)); 1121 uoffset = *addr - vm_map_min(kernel_map); 1122 } 1123 1124 /* 1125 * Create new entry. 1126 * first and last may be invalidated after this call. 1127 */ 1128 entry = uvm_map_mkentry(map, first, last, *addr, sz, flags, &dead, 1129 new); 1130 if (entry == NULL) { 1131 error = ENOMEM; 1132 goto unlock; 1133 } 1134 new = NULL; 1135 KDASSERT(entry->start == *addr && entry->end == *addr + sz); 1136 entry->object.uvm_obj = uobj; 1137 entry->offset = uoffset; 1138 entry->protection = prot; 1139 entry->max_protection = maxprot; 1140 entry->inheritance = inherit; 1141 entry->wired_count = 0; 1142 entry->advice = advice; 1143 if (flags & UVM_FLAG_STACK) { 1144 entry->etype |= UVM_ET_STACK; 1145 if (flags & UVM_FLAG_UNMAP) 1146 map->sserial++; 1147 } 1148 if (uobj) 1149 entry->etype |= UVM_ET_OBJ; 1150 else if (flags & UVM_FLAG_HOLE) 1151 entry->etype |= UVM_ET_HOLE; 1152 if (flags & UVM_FLAG_NOFAULT) 1153 entry->etype |= UVM_ET_NOFAULT; 1154 if (flags & UVM_FLAG_WC) 1155 entry->etype |= UVM_ET_WC; 1156 if (flags & UVM_FLAG_COPYONW) { 1157 entry->etype |= UVM_ET_COPYONWRITE; 1158 if ((flags & UVM_FLAG_OVERLAY) == 0) 1159 entry->etype |= UVM_ET_NEEDSCOPY; 1160 } 1161 if (flags & UVM_FLAG_CONCEAL) 1162 entry->etype |= UVM_ET_CONCEAL; 1163 if (flags & UVM_FLAG_OVERLAY) { 1164 entry->aref.ar_pageoff = 0; 1165 entry->aref.ar_amap = amap_alloc(sz, M_WAITOK, 0); 1166 } 1167 1168 /* Update map and process statistics. */ 1169 if (!(flags & UVM_FLAG_HOLE)) { 1170 map->size += sz; 1171 if ((map->flags & VM_MAP_ISVMSPACE) && uobj == NULL && 1172 prot != PROT_NONE) { 1173 ((struct vmspace *)map)->vm_dused += 1174 uvmspace_dused(map, *addr, *addr + sz); 1175 } 1176 } 1177 1178 /* 1179 * Try to merge entry. 1180 * 1181 * Userland allocations are kept separated most of the time. 1182 * Forego the effort of merging what most of the time can't be merged 1183 * and only try the merge if it concerns a kernel entry. 1184 */ 1185 if ((flags & UVM_FLAG_NOMERGE) == 0 && 1186 (map->flags & VM_MAP_ISVMSPACE) == 0) 1187 uvm_mapent_tryjoin(map, entry, &dead); 1188 1189 unlock: 1190 vm_map_unlock(map); 1191 1192 /* 1193 * Remove dead entries. 1194 * 1195 * Dead entries may be the result of merging. 1196 * uvm_map_mkentry may also create dead entries, when it attempts to 1197 * destroy free-space entries. 1198 */ 1199 if (map->flags & VM_MAP_INTRSAFE) 1200 uvm_unmap_detach_intrsafe(&dead); 1201 else 1202 uvm_unmap_detach(&dead, 0); 1203 out: 1204 if (new) 1205 uvm_mapent_free(new); 1206 return error; 1207 } 1208 1209 /* 1210 * True iff e1 and e2 can be joined together. 1211 */ 1212 int 1213 uvm_mapent_isjoinable(struct vm_map *map, struct vm_map_entry *e1, 1214 struct vm_map_entry *e2) 1215 { 1216 KDASSERT(e1 != NULL && e2 != NULL); 1217 1218 /* Must be the same entry type and not have free memory between. */ 1219 if (e1->etype != e2->etype || e1->end != e2->start) 1220 return 0; 1221 1222 /* Submaps are never joined. */ 1223 if (UVM_ET_ISSUBMAP(e1)) 1224 return 0; 1225 1226 /* Never merge wired memory. */ 1227 if (VM_MAPENT_ISWIRED(e1) || VM_MAPENT_ISWIRED(e2)) 1228 return 0; 1229 1230 /* Protection, inheritance and advice must be equal. */ 1231 if (e1->protection != e2->protection || 1232 e1->max_protection != e2->max_protection || 1233 e1->inheritance != e2->inheritance || 1234 e1->advice != e2->advice) 1235 return 0; 1236 1237 /* If uvm_object: object itself and offsets within object must match. */ 1238 if (UVM_ET_ISOBJ(e1)) { 1239 if (e1->object.uvm_obj != e2->object.uvm_obj) 1240 return 0; 1241 if (e1->offset + (e1->end - e1->start) != e2->offset) 1242 return 0; 1243 } 1244 1245 /* 1246 * Cannot join shared amaps. 1247 * Note: no need to lock amap to look at refs, since we don't care 1248 * about its exact value. 1249 * If it is 1 (i.e. we have the only reference) it will stay there. 1250 */ 1251 if (e1->aref.ar_amap && amap_refs(e1->aref.ar_amap) != 1) 1252 return 0; 1253 if (e2->aref.ar_amap && amap_refs(e2->aref.ar_amap) != 1) 1254 return 0; 1255 1256 /* Apparently, e1 and e2 match. */ 1257 return 1; 1258 } 1259 1260 /* 1261 * Join support function. 1262 * 1263 * Returns the merged entry on success. 1264 * Returns NULL if the merge failed. 1265 */ 1266 struct vm_map_entry* 1267 uvm_mapent_merge(struct vm_map *map, struct vm_map_entry *e1, 1268 struct vm_map_entry *e2, struct uvm_map_deadq *dead) 1269 { 1270 struct uvm_addr_state *free; 1271 1272 /* 1273 * Merging is not supported for map entries that 1274 * contain an amap in e1. This should never happen 1275 * anyway, because only kernel entries are merged. 1276 * These do not contain amaps. 1277 * e2 contains no real information in its amap, 1278 * so it can be erased immediately. 1279 */ 1280 KASSERT(e1->aref.ar_amap == NULL); 1281 1282 /* 1283 * Don't drop obj reference: 1284 * uvm_unmap_detach will do this for us. 1285 */ 1286 free = uvm_map_uaddr_e(map, e1); 1287 uvm_mapent_free_remove(map, free, e1); 1288 1289 free = uvm_map_uaddr_e(map, e2); 1290 uvm_mapent_free_remove(map, free, e2); 1291 uvm_mapent_addr_remove(map, e2); 1292 e1->end = e2->end; 1293 e1->guard = e2->guard; 1294 e1->fspace = e2->fspace; 1295 uvm_mapent_free_insert(map, free, e1); 1296 1297 DEAD_ENTRY_PUSH(dead, e2); 1298 return e1; 1299 } 1300 1301 /* 1302 * Attempt forward and backward joining of entry. 1303 * 1304 * Returns entry after joins. 1305 * We are guaranteed that the amap of entry is either non-existent or 1306 * has never been used. 1307 */ 1308 struct vm_map_entry* 1309 uvm_mapent_tryjoin(struct vm_map *map, struct vm_map_entry *entry, 1310 struct uvm_map_deadq *dead) 1311 { 1312 struct vm_map_entry *other; 1313 struct vm_map_entry *merged; 1314 1315 /* Merge with previous entry. */ 1316 other = RBT_PREV(uvm_map_addr, entry); 1317 if (other && uvm_mapent_isjoinable(map, other, entry)) { 1318 merged = uvm_mapent_merge(map, other, entry, dead); 1319 if (merged) 1320 entry = merged; 1321 } 1322 1323 /* 1324 * Merge with next entry. 1325 * 1326 * Because amap can only extend forward and the next entry 1327 * probably contains sensible info, only perform forward merging 1328 * in the absence of an amap. 1329 */ 1330 other = RBT_NEXT(uvm_map_addr, entry); 1331 if (other && entry->aref.ar_amap == NULL && 1332 other->aref.ar_amap == NULL && 1333 uvm_mapent_isjoinable(map, entry, other)) { 1334 merged = uvm_mapent_merge(map, entry, other, dead); 1335 if (merged) 1336 entry = merged; 1337 } 1338 1339 return entry; 1340 } 1341 1342 /* 1343 * Kill entries that are no longer in a map. 1344 */ 1345 void 1346 uvm_unmap_detach(struct uvm_map_deadq *deadq, int flags) 1347 { 1348 struct vm_map_entry *entry, *tmp; 1349 int waitok = flags & UVM_PLA_WAITOK; 1350 1351 TAILQ_FOREACH_SAFE(entry, deadq, dfree.deadq, tmp) { 1352 /* Drop reference to amap, if we've got one. */ 1353 if (entry->aref.ar_amap) 1354 amap_unref(entry->aref.ar_amap, 1355 entry->aref.ar_pageoff, 1356 atop(entry->end - entry->start), 1357 flags & AMAP_REFALL); 1358 1359 /* Skip entries for which we have to grab the kernel lock. */ 1360 if (UVM_ET_ISSUBMAP(entry) || UVM_ET_ISOBJ(entry)) 1361 continue; 1362 1363 TAILQ_REMOVE(deadq, entry, dfree.deadq); 1364 uvm_mapent_free(entry); 1365 } 1366 1367 if (TAILQ_EMPTY(deadq)) 1368 return; 1369 1370 KERNEL_LOCK(); 1371 while ((entry = TAILQ_FIRST(deadq)) != NULL) { 1372 if (waitok) 1373 uvm_pause(); 1374 /* Drop reference to our backing object, if we've got one. */ 1375 if (UVM_ET_ISSUBMAP(entry)) { 1376 /* ... unlikely to happen, but play it safe */ 1377 uvm_map_deallocate(entry->object.sub_map); 1378 } else if (UVM_ET_ISOBJ(entry) && 1379 entry->object.uvm_obj->pgops->pgo_detach) { 1380 entry->object.uvm_obj->pgops->pgo_detach( 1381 entry->object.uvm_obj); 1382 } 1383 1384 /* Step to next. */ 1385 TAILQ_REMOVE(deadq, entry, dfree.deadq); 1386 uvm_mapent_free(entry); 1387 } 1388 KERNEL_UNLOCK(); 1389 } 1390 1391 void 1392 uvm_unmap_detach_intrsafe(struct uvm_map_deadq *deadq) 1393 { 1394 struct vm_map_entry *entry; 1395 1396 while ((entry = TAILQ_FIRST(deadq)) != NULL) { 1397 KASSERT(entry->aref.ar_amap == NULL); 1398 KASSERT(!UVM_ET_ISSUBMAP(entry)); 1399 KASSERT(!UVM_ET_ISOBJ(entry)); 1400 TAILQ_REMOVE(deadq, entry, dfree.deadq); 1401 uvm_mapent_free(entry); 1402 } 1403 } 1404 1405 /* 1406 * Create and insert new entry. 1407 * 1408 * Returned entry contains new addresses and is inserted properly in the tree. 1409 * first and last are (probably) no longer valid. 1410 */ 1411 struct vm_map_entry* 1412 uvm_map_mkentry(struct vm_map *map, struct vm_map_entry *first, 1413 struct vm_map_entry *last, vaddr_t addr, vsize_t sz, int flags, 1414 struct uvm_map_deadq *dead, struct vm_map_entry *new) 1415 { 1416 struct vm_map_entry *entry, *prev; 1417 struct uvm_addr_state *free; 1418 vaddr_t min, max; /* free space boundaries for new entry */ 1419 1420 KDASSERT(map != NULL); 1421 KDASSERT(first != NULL); 1422 KDASSERT(last != NULL); 1423 KDASSERT(dead != NULL); 1424 KDASSERT(sz > 0); 1425 KDASSERT(addr + sz > addr); 1426 KDASSERT(first->end <= addr && VMMAP_FREE_END(first) > addr); 1427 KDASSERT(last->start < addr + sz && VMMAP_FREE_END(last) >= addr + sz); 1428 KDASSERT(uvm_map_isavail(map, NULL, &first, &last, addr, sz)); 1429 uvm_tree_sanity(map, __FILE__, __LINE__); 1430 1431 min = addr + sz; 1432 max = VMMAP_FREE_END(last); 1433 1434 /* Initialize new entry. */ 1435 if (new == NULL) 1436 entry = uvm_mapent_alloc(map, flags); 1437 else 1438 entry = new; 1439 if (entry == NULL) 1440 return NULL; 1441 entry->offset = 0; 1442 entry->etype = 0; 1443 entry->wired_count = 0; 1444 entry->aref.ar_pageoff = 0; 1445 entry->aref.ar_amap = NULL; 1446 1447 entry->start = addr; 1448 entry->end = min; 1449 entry->guard = 0; 1450 entry->fspace = 0; 1451 1452 vm_map_assert_wrlock(map); 1453 1454 /* Reset free space in first. */ 1455 free = uvm_map_uaddr_e(map, first); 1456 uvm_mapent_free_remove(map, free, first); 1457 first->guard = 0; 1458 first->fspace = 0; 1459 1460 /* 1461 * Remove all entries that are fully replaced. 1462 * We are iterating using last in reverse order. 1463 */ 1464 for (; first != last; last = prev) { 1465 prev = RBT_PREV(uvm_map_addr, last); 1466 1467 KDASSERT(last->start == last->end); 1468 free = uvm_map_uaddr_e(map, last); 1469 uvm_mapent_free_remove(map, free, last); 1470 uvm_mapent_addr_remove(map, last); 1471 DEAD_ENTRY_PUSH(dead, last); 1472 } 1473 /* Remove first if it is entirely inside <addr, addr+sz>. */ 1474 if (first->start == addr) { 1475 uvm_mapent_addr_remove(map, first); 1476 DEAD_ENTRY_PUSH(dead, first); 1477 } else { 1478 uvm_map_fix_space(map, first, VMMAP_FREE_START(first), 1479 addr, flags); 1480 } 1481 1482 /* Finally, link in entry. */ 1483 uvm_mapent_addr_insert(map, entry); 1484 uvm_map_fix_space(map, entry, min, max, flags); 1485 1486 uvm_tree_sanity(map, __FILE__, __LINE__); 1487 return entry; 1488 } 1489 1490 1491 /* 1492 * uvm_mapent_alloc: allocate a map entry 1493 */ 1494 struct vm_map_entry * 1495 uvm_mapent_alloc(struct vm_map *map, int flags) 1496 { 1497 struct vm_map_entry *me, *ne; 1498 int pool_flags; 1499 int i; 1500 1501 pool_flags = PR_WAITOK; 1502 if (flags & UVM_FLAG_TRYLOCK) 1503 pool_flags = PR_NOWAIT; 1504 1505 if (map->flags & VM_MAP_INTRSAFE || cold) { 1506 mtx_enter(&uvm_kmapent_mtx); 1507 if (SLIST_EMPTY(&uvm.kentry_free)) { 1508 ne = km_alloc(PAGE_SIZE, &kv_page, &kp_dirty, 1509 &kd_nowait); 1510 if (ne == NULL) 1511 panic("uvm_mapent_alloc: cannot allocate map " 1512 "entry"); 1513 for (i = 0; i < PAGE_SIZE / sizeof(*ne); i++) { 1514 SLIST_INSERT_HEAD(&uvm.kentry_free, 1515 &ne[i], daddrs.addr_kentry); 1516 } 1517 if (ratecheck(&uvm_kmapent_last_warn_time, 1518 &uvm_kmapent_warn_rate)) 1519 printf("uvm_mapent_alloc: out of static " 1520 "map entries\n"); 1521 } 1522 me = SLIST_FIRST(&uvm.kentry_free); 1523 SLIST_REMOVE_HEAD(&uvm.kentry_free, daddrs.addr_kentry); 1524 uvmexp.kmapent++; 1525 mtx_leave(&uvm_kmapent_mtx); 1526 me->flags = UVM_MAP_STATIC; 1527 } else if (map == kernel_map) { 1528 splassert(IPL_NONE); 1529 me = pool_get(&uvm_map_entry_kmem_pool, pool_flags); 1530 if (me == NULL) 1531 goto out; 1532 me->flags = UVM_MAP_KMEM; 1533 } else { 1534 splassert(IPL_NONE); 1535 me = pool_get(&uvm_map_entry_pool, pool_flags); 1536 if (me == NULL) 1537 goto out; 1538 me->flags = 0; 1539 } 1540 1541 RBT_POISON(uvm_map_addr, me, UVMMAP_DEADBEEF); 1542 out: 1543 return me; 1544 } 1545 1546 /* 1547 * uvm_mapent_free: free map entry 1548 * 1549 * => XXX: static pool for kernel map? 1550 */ 1551 void 1552 uvm_mapent_free(struct vm_map_entry *me) 1553 { 1554 if (me->flags & UVM_MAP_STATIC) { 1555 mtx_enter(&uvm_kmapent_mtx); 1556 SLIST_INSERT_HEAD(&uvm.kentry_free, me, daddrs.addr_kentry); 1557 uvmexp.kmapent--; 1558 mtx_leave(&uvm_kmapent_mtx); 1559 } else if (me->flags & UVM_MAP_KMEM) { 1560 splassert(IPL_NONE); 1561 pool_put(&uvm_map_entry_kmem_pool, me); 1562 } else { 1563 splassert(IPL_NONE); 1564 pool_put(&uvm_map_entry_pool, me); 1565 } 1566 } 1567 1568 /* 1569 * uvm_map_lookup_entry: find map entry at or before an address. 1570 * 1571 * => map must at least be read-locked by caller 1572 * => entry is returned in "entry" 1573 * => return value is true if address is in the returned entry 1574 * ET_HOLE entries are considered to not contain a mapping, ergo FALSE is 1575 * returned for those mappings. 1576 */ 1577 boolean_t 1578 uvm_map_lookup_entry(struct vm_map *map, vaddr_t address, 1579 struct vm_map_entry **entry) 1580 { 1581 vm_map_assert_anylock(map); 1582 1583 *entry = uvm_map_entrybyaddr(&map->addr, address); 1584 return *entry != NULL && !UVM_ET_ISHOLE(*entry) && 1585 (*entry)->start <= address && (*entry)->end > address; 1586 } 1587 1588 /* 1589 * Stack must be in a MAP_STACK entry. PROT_NONE indicates stack not yet 1590 * grown -- then uvm_map_check_region_range() should not cache the entry 1591 * because growth won't be seen. 1592 */ 1593 int 1594 uvm_map_inentry_sp(vm_map_entry_t entry) 1595 { 1596 if ((entry->etype & UVM_ET_STACK) == 0) { 1597 if (entry->protection == PROT_NONE) 1598 return (-1); /* don't update range */ 1599 return (0); 1600 } 1601 return (1); 1602 } 1603 1604 int 1605 uvm_map_inentry_recheck(u_long serial, vaddr_t addr, struct p_inentry *ie) 1606 { 1607 return (serial != ie->ie_serial || ie->ie_start == 0 || 1608 addr < ie->ie_start || addr >= ie->ie_end); 1609 } 1610 1611 /* 1612 * Inside a vm_map find the reg address and verify it via function. 1613 * Remember low and high addresses of region if valid and return TRUE, 1614 * else return FALSE. 1615 */ 1616 boolean_t 1617 uvm_map_inentry_fix(struct proc *p, struct p_inentry *ie, vaddr_t addr, 1618 int (*fn)(vm_map_entry_t), u_long serial) 1619 { 1620 vm_map_t map = &p->p_vmspace->vm_map; 1621 vm_map_entry_t entry; 1622 int ret; 1623 1624 if (addr < map->min_offset || addr >= map->max_offset) 1625 return (FALSE); 1626 1627 /* lock map */ 1628 vm_map_lock_read(map); 1629 1630 /* lookup */ 1631 if (!uvm_map_lookup_entry(map, trunc_page(addr), &entry)) { 1632 vm_map_unlock_read(map); 1633 return (FALSE); 1634 } 1635 1636 ret = (*fn)(entry); 1637 if (ret == 0) { 1638 vm_map_unlock_read(map); 1639 return (FALSE); 1640 } else if (ret == 1) { 1641 ie->ie_start = entry->start; 1642 ie->ie_end = entry->end; 1643 ie->ie_serial = serial; 1644 } else { 1645 /* do not update, re-check later */ 1646 } 1647 vm_map_unlock_read(map); 1648 return (TRUE); 1649 } 1650 1651 boolean_t 1652 uvm_map_inentry(struct proc *p, struct p_inentry *ie, vaddr_t addr, 1653 const char *fmt, int (*fn)(vm_map_entry_t), u_long serial) 1654 { 1655 union sigval sv; 1656 boolean_t ok = TRUE; 1657 1658 if (uvm_map_inentry_recheck(serial, addr, ie)) { 1659 ok = uvm_map_inentry_fix(p, ie, addr, fn, serial); 1660 if (!ok) { 1661 KERNEL_LOCK(); 1662 printf(fmt, p->p_p->ps_comm, p->p_p->ps_pid, p->p_tid, 1663 addr, ie->ie_start, ie->ie_end-1); 1664 p->p_p->ps_acflag |= AMAP; 1665 sv.sival_ptr = (void *)PROC_PC(p); 1666 trapsignal(p, SIGSEGV, 0, SEGV_ACCERR, sv); 1667 KERNEL_UNLOCK(); 1668 } 1669 } 1670 return (ok); 1671 } 1672 1673 /* 1674 * Check whether the given address range can be converted to a MAP_STACK 1675 * mapping. 1676 * 1677 * Must be called with map locked. 1678 */ 1679 boolean_t 1680 uvm_map_is_stack_remappable(struct vm_map *map, vaddr_t addr, vaddr_t sz, 1681 int sigaltstack_check) 1682 { 1683 vaddr_t end = addr + sz; 1684 struct vm_map_entry *first, *iter, *prev = NULL; 1685 1686 vm_map_assert_anylock(map); 1687 1688 if (!uvm_map_lookup_entry(map, addr, &first)) { 1689 printf("map stack 0x%lx-0x%lx of map %p failed: no mapping\n", 1690 addr, end, map); 1691 return FALSE; 1692 } 1693 1694 /* 1695 * Check that the address range exists and is contiguous. 1696 */ 1697 for (iter = first; iter != NULL && iter->start < end; 1698 prev = iter, iter = RBT_NEXT(uvm_map_addr, iter)) { 1699 /* 1700 * Make sure that we do not have holes in the range. 1701 */ 1702 #if 0 1703 if (prev != NULL) { 1704 printf("prev->start 0x%lx, prev->end 0x%lx, " 1705 "iter->start 0x%lx, iter->end 0x%lx\n", 1706 prev->start, prev->end, iter->start, iter->end); 1707 } 1708 #endif 1709 1710 if (prev != NULL && prev->end != iter->start) { 1711 printf("map stack 0x%lx-0x%lx of map %p failed: " 1712 "hole in range\n", addr, end, map); 1713 return FALSE; 1714 } 1715 if (iter->start == iter->end || UVM_ET_ISHOLE(iter)) { 1716 printf("map stack 0x%lx-0x%lx of map %p failed: " 1717 "hole in range\n", addr, end, map); 1718 return FALSE; 1719 } 1720 if (sigaltstack_check) { 1721 if (iter->protection != (PROT_READ | PROT_WRITE)) 1722 return FALSE; 1723 } 1724 } 1725 1726 return TRUE; 1727 } 1728 1729 /* 1730 * Remap the middle-pages of an existing mapping as a stack range. 1731 * If there exists a previous contiguous mapping with the given range 1732 * [addr, addr + sz), with protection PROT_READ|PROT_WRITE, then the 1733 * mapping is dropped, and a new anon mapping is created and marked as 1734 * a stack. 1735 * 1736 * Must be called with map unlocked. 1737 */ 1738 int 1739 uvm_map_remap_as_stack(struct proc *p, vaddr_t addr, vaddr_t sz) 1740 { 1741 vm_map_t map = &p->p_vmspace->vm_map; 1742 vaddr_t start, end; 1743 int error; 1744 int flags = UVM_MAPFLAG(PROT_READ | PROT_WRITE, 1745 PROT_READ | PROT_WRITE | PROT_EXEC, 1746 MAP_INHERIT_COPY, MADV_NORMAL, 1747 UVM_FLAG_STACK | UVM_FLAG_FIXED | UVM_FLAG_UNMAP | 1748 UVM_FLAG_COPYONW | UVM_FLAG_SIGALTSTACK); 1749 1750 start = round_page(addr); 1751 end = trunc_page(addr + sz); 1752 #ifdef MACHINE_STACK_GROWS_UP 1753 if (end == addr + sz) 1754 end -= PAGE_SIZE; 1755 #else 1756 if (start == addr) 1757 start += PAGE_SIZE; 1758 #endif 1759 1760 if (start < map->min_offset || end >= map->max_offset || end < start) 1761 return EINVAL; 1762 1763 /* 1764 * UVM_FLAG_SIGALTSTACK indicates that immutable may be bypassed, 1765 * but the range is checked that it is contiguous, is not a syscall 1766 * mapping, and protection RW. Then, a new mapping (all zero) is 1767 * placed upon the region, which prevents an attacker from pivoting 1768 * into pre-placed MAP_STACK space. 1769 */ 1770 error = uvm_mapanon(map, &start, end - start, 0, flags); 1771 if (error != 0) 1772 printf("map stack for pid %d failed\n", p->p_p->ps_pid); 1773 1774 return error; 1775 } 1776 1777 /* 1778 * uvm_map_pie: return a random load address for a PIE executable 1779 * properly aligned. 1780 */ 1781 #ifndef VM_PIE_MAX_ADDR 1782 #define VM_PIE_MAX_ADDR (VM_MAXUSER_ADDRESS / 4) 1783 #endif 1784 1785 #ifndef VM_PIE_MIN_ADDR 1786 #define VM_PIE_MIN_ADDR VM_MIN_ADDRESS 1787 #endif 1788 1789 #ifndef VM_PIE_MIN_ALIGN 1790 #define VM_PIE_MIN_ALIGN PAGE_SIZE 1791 #endif 1792 1793 vaddr_t 1794 uvm_map_pie(vaddr_t align) 1795 { 1796 vaddr_t addr, space, min; 1797 1798 align = MAX(align, VM_PIE_MIN_ALIGN); 1799 1800 /* round up to next alignment */ 1801 min = (VM_PIE_MIN_ADDR + align - 1) & ~(align - 1); 1802 1803 if (align >= VM_PIE_MAX_ADDR || min >= VM_PIE_MAX_ADDR) 1804 return (align); 1805 1806 space = (VM_PIE_MAX_ADDR - min) / align; 1807 space = MIN(space, (u_int32_t)-1); 1808 1809 addr = (vaddr_t)arc4random_uniform((u_int32_t)space) * align; 1810 addr += min; 1811 1812 return (addr); 1813 } 1814 1815 void 1816 uvm_unmap(struct vm_map *map, vaddr_t start, vaddr_t end) 1817 { 1818 struct uvm_map_deadq dead; 1819 1820 KASSERT((start & (vaddr_t)PAGE_MASK) == 0 && 1821 (end & (vaddr_t)PAGE_MASK) == 0); 1822 TAILQ_INIT(&dead); 1823 vm_map_lock(map); 1824 uvm_unmap_remove(map, start, end, &dead, FALSE, TRUE, FALSE); 1825 vm_map_unlock(map); 1826 1827 if (map->flags & VM_MAP_INTRSAFE) 1828 uvm_unmap_detach_intrsafe(&dead); 1829 else 1830 uvm_unmap_detach(&dead, 0); 1831 } 1832 1833 /* 1834 * Mark entry as free. 1835 * 1836 * entry will be put on the dead list. 1837 * The free space will be merged into the previous or a new entry, 1838 * unless markfree is false. 1839 */ 1840 void 1841 uvm_mapent_mkfree(struct vm_map *map, struct vm_map_entry *entry, 1842 struct vm_map_entry **prev_ptr, struct uvm_map_deadq *dead, 1843 boolean_t markfree) 1844 { 1845 struct uvm_addr_state *free; 1846 struct vm_map_entry *prev; 1847 vaddr_t addr; /* Start of freed range. */ 1848 vaddr_t end; /* End of freed range. */ 1849 1850 UVM_MAP_REQ_WRITE(map); 1851 1852 prev = *prev_ptr; 1853 if (prev == entry) 1854 *prev_ptr = prev = NULL; 1855 1856 if (prev == NULL || 1857 VMMAP_FREE_END(prev) != entry->start) 1858 prev = RBT_PREV(uvm_map_addr, entry); 1859 1860 /* Entry is describing only free memory and has nothing to drain into. */ 1861 if (prev == NULL && entry->start == entry->end && markfree) { 1862 *prev_ptr = entry; 1863 return; 1864 } 1865 1866 addr = entry->start; 1867 end = VMMAP_FREE_END(entry); 1868 free = uvm_map_uaddr_e(map, entry); 1869 uvm_mapent_free_remove(map, free, entry); 1870 uvm_mapent_addr_remove(map, entry); 1871 DEAD_ENTRY_PUSH(dead, entry); 1872 1873 if (markfree) { 1874 if (prev) { 1875 free = uvm_map_uaddr_e(map, prev); 1876 uvm_mapent_free_remove(map, free, prev); 1877 } 1878 *prev_ptr = uvm_map_fix_space(map, prev, addr, end, 0); 1879 } 1880 } 1881 1882 /* 1883 * Unwire and release referenced amap and object from map entry. 1884 */ 1885 void 1886 uvm_unmap_kill_entry_withlock(struct vm_map *map, struct vm_map_entry *entry, 1887 int needlock) 1888 { 1889 /* Unwire removed map entry. */ 1890 if (VM_MAPENT_ISWIRED(entry)) { 1891 KERNEL_LOCK(); 1892 entry->wired_count = 0; 1893 uvm_fault_unwire_locked(map, entry->start, entry->end); 1894 KERNEL_UNLOCK(); 1895 } 1896 1897 if (needlock) 1898 uvm_map_lock_entry(entry); 1899 1900 /* Entry-type specific code. */ 1901 if (UVM_ET_ISHOLE(entry)) { 1902 /* Nothing to be done for holes. */ 1903 } else if (map->flags & VM_MAP_INTRSAFE) { 1904 KASSERT(vm_map_pmap(map) == pmap_kernel()); 1905 1906 uvm_km_pgremove_intrsafe(entry->start, entry->end); 1907 } else if (UVM_ET_ISOBJ(entry) && 1908 UVM_OBJ_IS_KERN_OBJECT(entry->object.uvm_obj)) { 1909 KASSERT(vm_map_pmap(map) == pmap_kernel()); 1910 /* 1911 * Note: kernel object mappings are currently used in 1912 * two ways: 1913 * [1] "normal" mappings of pages in the kernel object 1914 * [2] uvm_km_valloc'd allocations in which we 1915 * pmap_enter in some non-kernel-object page 1916 * (e.g. vmapbuf). 1917 * 1918 * for case [1], we need to remove the mapping from 1919 * the pmap and then remove the page from the kernel 1920 * object (because, once pages in a kernel object are 1921 * unmapped they are no longer needed, unlike, say, 1922 * a vnode where you might want the data to persist 1923 * until flushed out of a queue). 1924 * 1925 * for case [2], we need to remove the mapping from 1926 * the pmap. there shouldn't be any pages at the 1927 * specified offset in the kernel object [but it 1928 * doesn't hurt to call uvm_km_pgremove just to be 1929 * safe?] 1930 * 1931 * uvm_km_pgremove currently does the following: 1932 * for pages in the kernel object range: 1933 * - drops the swap slot 1934 * - uvm_pagefree the page 1935 * 1936 * note there is version of uvm_km_pgremove() that 1937 * is used for "intrsafe" objects. 1938 */ 1939 /* 1940 * remove mappings from pmap and drop the pages 1941 * from the object. offsets are always relative 1942 * to vm_map_min(kernel_map). 1943 */ 1944 uvm_km_pgremove(entry->object.uvm_obj, entry->start, 1945 entry->end); 1946 } else { 1947 /* remove mappings the standard way. */ 1948 pmap_remove(map->pmap, entry->start, entry->end); 1949 } 1950 1951 if (needlock) 1952 uvm_map_unlock_entry(entry); 1953 } 1954 1955 void 1956 uvm_unmap_kill_entry(struct vm_map *map, struct vm_map_entry *entry) 1957 { 1958 uvm_unmap_kill_entry_withlock(map, entry, 0); 1959 } 1960 1961 /* 1962 * Remove all entries from start to end. 1963 * 1964 * If remove_holes, then remove ET_HOLE entries as well. 1965 * If markfree, entry will be properly marked free, otherwise, no replacement 1966 * entry will be put in the tree (corrupting the tree). 1967 */ 1968 int 1969 uvm_unmap_remove(struct vm_map *map, vaddr_t start, vaddr_t end, 1970 struct uvm_map_deadq *dead, boolean_t remove_holes, 1971 boolean_t markfree, boolean_t checkimmutable) 1972 { 1973 struct vm_map_entry *prev_hint, *next, *entry; 1974 1975 start = MAX(start, map->min_offset); 1976 end = MIN(end, map->max_offset); 1977 if (start >= end) 1978 return 0; 1979 1980 vm_map_assert_wrlock(map); 1981 1982 /* Find first affected entry. */ 1983 entry = uvm_map_entrybyaddr(&map->addr, start); 1984 KDASSERT(entry != NULL && entry->start <= start); 1985 1986 if (checkimmutable) { 1987 struct vm_map_entry *entry1 = entry; 1988 1989 /* Refuse to unmap if any entries are immutable */ 1990 if (entry1->end <= start) 1991 entry1 = RBT_NEXT(uvm_map_addr, entry1); 1992 for (; entry1 != NULL && entry1->start < end; entry1 = next) { 1993 KDASSERT(entry1->start >= start); 1994 next = RBT_NEXT(uvm_map_addr, entry1); 1995 /* Treat memory holes as free space. */ 1996 if (entry1->start == entry1->end || UVM_ET_ISHOLE(entry1)) 1997 continue; 1998 if (entry1->etype & UVM_ET_IMMUTABLE) 1999 return EPERM; 2000 } 2001 } 2002 2003 if (entry->end <= start && markfree) 2004 entry = RBT_NEXT(uvm_map_addr, entry); 2005 else 2006 UVM_MAP_CLIP_START(map, entry, start); 2007 2008 /* 2009 * Iterate entries until we reach end address. 2010 * prev_hint hints where the freed space can be appended to. 2011 */ 2012 prev_hint = NULL; 2013 for (; entry != NULL && entry->start < end; entry = next) { 2014 KDASSERT(entry->start >= start); 2015 if (entry->end > end || !markfree) 2016 UVM_MAP_CLIP_END(map, entry, end); 2017 KDASSERT(entry->start >= start && entry->end <= end); 2018 next = RBT_NEXT(uvm_map_addr, entry); 2019 2020 /* Don't remove holes unless asked to do so. */ 2021 if (UVM_ET_ISHOLE(entry)) { 2022 if (!remove_holes) { 2023 prev_hint = entry; 2024 continue; 2025 } 2026 } 2027 2028 /* A stack has been removed.. */ 2029 if (UVM_ET_ISSTACK(entry) && (map->flags & VM_MAP_ISVMSPACE)) 2030 map->sserial++; 2031 2032 /* Kill entry. */ 2033 uvm_unmap_kill_entry_withlock(map, entry, 1); 2034 2035 /* Update space usage. */ 2036 if ((map->flags & VM_MAP_ISVMSPACE) && 2037 entry->object.uvm_obj == NULL && 2038 entry->protection != PROT_NONE && 2039 !UVM_ET_ISHOLE(entry)) { 2040 ((struct vmspace *)map)->vm_dused -= 2041 uvmspace_dused(map, entry->start, entry->end); 2042 } 2043 if (!UVM_ET_ISHOLE(entry)) 2044 map->size -= entry->end - entry->start; 2045 2046 /* Actual removal of entry. */ 2047 uvm_mapent_mkfree(map, entry, &prev_hint, dead, markfree); 2048 } 2049 2050 pmap_update(vm_map_pmap(map)); 2051 2052 #ifdef VMMAP_DEBUG 2053 if (markfree) { 2054 for (entry = uvm_map_entrybyaddr(&map->addr, start); 2055 entry != NULL && entry->start < end; 2056 entry = RBT_NEXT(uvm_map_addr, entry)) { 2057 KDASSERT(entry->end <= start || 2058 entry->start == entry->end || 2059 UVM_ET_ISHOLE(entry)); 2060 } 2061 } else { 2062 vaddr_t a; 2063 for (a = start; a < end; a += PAGE_SIZE) 2064 KDASSERT(uvm_map_entrybyaddr(&map->addr, a) == NULL); 2065 } 2066 #endif 2067 return 0; 2068 } 2069 2070 /* 2071 * Mark all entries from first until end (exclusive) as pageable. 2072 * 2073 * Lock must be exclusive on entry and will not be touched. 2074 */ 2075 void 2076 uvm_map_pageable_pgon(struct vm_map *map, struct vm_map_entry *first, 2077 struct vm_map_entry *end, vaddr_t start_addr, vaddr_t end_addr) 2078 { 2079 struct vm_map_entry *iter; 2080 2081 for (iter = first; iter != end; 2082 iter = RBT_NEXT(uvm_map_addr, iter)) { 2083 KDASSERT(iter->start >= start_addr && iter->end <= end_addr); 2084 if (!VM_MAPENT_ISWIRED(iter) || UVM_ET_ISHOLE(iter)) 2085 continue; 2086 2087 iter->wired_count = 0; 2088 uvm_fault_unwire_locked(map, iter->start, iter->end); 2089 } 2090 } 2091 2092 /* 2093 * Mark all entries from first until end (exclusive) as wired. 2094 * 2095 * Lockflags determines the lock state on return from this function. 2096 * Lock must be exclusive on entry. 2097 */ 2098 int 2099 uvm_map_pageable_wire(struct vm_map *map, struct vm_map_entry *first, 2100 struct vm_map_entry *end, vaddr_t start_addr, vaddr_t end_addr, 2101 int lockflags) 2102 { 2103 struct vm_map_entry *iter; 2104 #ifdef DIAGNOSTIC 2105 unsigned int timestamp_save; 2106 #endif 2107 int error; 2108 2109 /* 2110 * Wire pages in two passes: 2111 * 2112 * 1: holding the write lock, we create any anonymous maps that need 2113 * to be created. then we clip each map entry to the region to 2114 * be wired and increment its wiring count. 2115 * 2116 * 2: we mark the map busy, unlock it and call uvm_fault_wire to fault 2117 * in the pages for any newly wired area (wired_count == 1). 2118 */ 2119 for (iter = first; iter != end; 2120 iter = RBT_NEXT(uvm_map_addr, iter)) { 2121 KDASSERT(iter->start >= start_addr && iter->end <= end_addr); 2122 if (UVM_ET_ISHOLE(iter) || iter->start == iter->end || 2123 iter->protection == PROT_NONE) 2124 continue; 2125 2126 /* 2127 * Perform actions of vm_map_lookup that need the write lock. 2128 * - create an anonymous map for copy-on-write 2129 * - anonymous map for zero-fill 2130 * Skip submaps. 2131 */ 2132 if (!VM_MAPENT_ISWIRED(iter) && !UVM_ET_ISSUBMAP(iter) && 2133 UVM_ET_ISNEEDSCOPY(iter) && 2134 ((iter->protection & PROT_WRITE) || 2135 iter->object.uvm_obj == NULL)) { 2136 amap_copy(map, iter, M_WAITOK, 2137 UVM_ET_ISSTACK(iter) ? FALSE : TRUE, 2138 iter->start, iter->end); 2139 } 2140 iter->wired_count++; 2141 } 2142 2143 /* 2144 * Pass 2. 2145 */ 2146 #ifdef DIAGNOSTIC 2147 timestamp_save = map->timestamp; 2148 #endif 2149 vm_map_busy(map); 2150 vm_map_unlock(map); 2151 2152 error = 0; 2153 for (iter = first; error == 0 && iter != end; 2154 iter = RBT_NEXT(uvm_map_addr, iter)) { 2155 if (UVM_ET_ISHOLE(iter) || iter->start == iter->end || 2156 iter->protection == PROT_NONE) 2157 continue; 2158 2159 error = uvm_fault_wire(map, iter->start, iter->end, 2160 iter->protection); 2161 } 2162 2163 vm_map_lock(map); 2164 vm_map_unbusy(map); 2165 2166 if (error) { 2167 #ifdef DIAGNOSTIC 2168 if (timestamp_save != map->timestamp) 2169 panic("uvm_map_pageable_wire: stale map"); 2170 #endif 2171 2172 /* 2173 * first is no longer needed to restart loops. 2174 * Use it as iterator to unmap successful mappings. 2175 */ 2176 for (; first != iter; 2177 first = RBT_NEXT(uvm_map_addr, first)) { 2178 if (UVM_ET_ISHOLE(first) || 2179 first->start == first->end || 2180 first->protection == PROT_NONE) 2181 continue; 2182 2183 first->wired_count--; 2184 if (!VM_MAPENT_ISWIRED(first)) { 2185 uvm_fault_unwire_locked(map, 2186 first->start, first->end); 2187 } 2188 } 2189 2190 /* decrease counter in the rest of the entries */ 2191 for (; iter != end; 2192 iter = RBT_NEXT(uvm_map_addr, iter)) { 2193 if (UVM_ET_ISHOLE(iter) || iter->start == iter->end || 2194 iter->protection == PROT_NONE) 2195 continue; 2196 2197 iter->wired_count--; 2198 } 2199 2200 if ((lockflags & UVM_LK_EXIT) == 0) 2201 vm_map_unlock(map); 2202 return error; 2203 } 2204 2205 2206 if ((lockflags & UVM_LK_EXIT) == 0) { 2207 vm_map_unlock(map); 2208 } else { 2209 #ifdef DIAGNOSTIC 2210 if (timestamp_save != map->timestamp) 2211 panic("uvm_map_pageable_wire: stale map"); 2212 #endif 2213 } 2214 return 0; 2215 } 2216 2217 /* 2218 * uvm_map_pageable: set pageability of a range in a map. 2219 * 2220 * Flags: 2221 * UVM_LK_ENTER: map is already locked by caller 2222 * UVM_LK_EXIT: don't unlock map on exit 2223 * 2224 * The full range must be in use (entries may not have fspace != 0). 2225 * UVM_ET_HOLE counts as unmapped. 2226 */ 2227 int 2228 uvm_map_pageable(struct vm_map *map, vaddr_t start, vaddr_t end, 2229 boolean_t new_pageable, int lockflags) 2230 { 2231 struct vm_map_entry *first, *last, *tmp; 2232 int error; 2233 2234 start = trunc_page(start); 2235 end = round_page(end); 2236 2237 if (start > end) 2238 return EINVAL; 2239 if (start == end) 2240 return 0; /* nothing to do */ 2241 if (start < map->min_offset) 2242 return EFAULT; /* why? see first XXX below */ 2243 if (end > map->max_offset) 2244 return EINVAL; /* why? see second XXX below */ 2245 2246 KASSERT(map->flags & VM_MAP_PAGEABLE); 2247 if ((lockflags & UVM_LK_ENTER) == 0) 2248 vm_map_lock(map); 2249 2250 /* 2251 * Find first entry. 2252 * 2253 * Initial test on start is different, because of the different 2254 * error returned. Rest is tested further down. 2255 */ 2256 first = uvm_map_entrybyaddr(&map->addr, start); 2257 if (first->end <= start || UVM_ET_ISHOLE(first)) { 2258 /* 2259 * XXX if the first address is not mapped, it is EFAULT? 2260 */ 2261 error = EFAULT; 2262 goto out; 2263 } 2264 2265 /* Check that the range has no holes. */ 2266 for (last = first; last != NULL && last->start < end; 2267 last = RBT_NEXT(uvm_map_addr, last)) { 2268 if (UVM_ET_ISHOLE(last) || 2269 (last->end < end && VMMAP_FREE_END(last) != last->end)) { 2270 /* 2271 * XXX unmapped memory in range, why is it EINVAL 2272 * instead of EFAULT? 2273 */ 2274 error = EINVAL; 2275 goto out; 2276 } 2277 } 2278 2279 /* 2280 * Last ended at the first entry after the range. 2281 * Move back one step. 2282 * 2283 * Note that last may be NULL. 2284 */ 2285 if (last == NULL) { 2286 last = RBT_MAX(uvm_map_addr, &map->addr); 2287 if (last->end < end) { 2288 error = EINVAL; 2289 goto out; 2290 } 2291 } else { 2292 KASSERT(last != first); 2293 last = RBT_PREV(uvm_map_addr, last); 2294 } 2295 2296 /* Wire/unwire pages here. */ 2297 if (new_pageable) { 2298 /* 2299 * Mark pageable. 2300 * entries that are not wired are untouched. 2301 */ 2302 if (VM_MAPENT_ISWIRED(first)) 2303 UVM_MAP_CLIP_START(map, first, start); 2304 /* 2305 * Split last at end. 2306 * Make tmp be the first entry after what is to be touched. 2307 * If last is not wired, don't touch it. 2308 */ 2309 if (VM_MAPENT_ISWIRED(last)) { 2310 UVM_MAP_CLIP_END(map, last, end); 2311 tmp = RBT_NEXT(uvm_map_addr, last); 2312 } else 2313 tmp = last; 2314 2315 uvm_map_pageable_pgon(map, first, tmp, start, end); 2316 error = 0; 2317 2318 out: 2319 if ((lockflags & UVM_LK_EXIT) == 0) 2320 vm_map_unlock(map); 2321 return error; 2322 } else { 2323 /* 2324 * Mark entries wired. 2325 * entries are always touched (because recovery needs this). 2326 */ 2327 if (!VM_MAPENT_ISWIRED(first)) 2328 UVM_MAP_CLIP_START(map, first, start); 2329 /* 2330 * Split last at end. 2331 * Make tmp be the first entry after what is to be touched. 2332 * If last is not wired, don't touch it. 2333 */ 2334 if (!VM_MAPENT_ISWIRED(last)) { 2335 UVM_MAP_CLIP_END(map, last, end); 2336 tmp = RBT_NEXT(uvm_map_addr, last); 2337 } else 2338 tmp = last; 2339 2340 return uvm_map_pageable_wire(map, first, tmp, start, end, 2341 lockflags); 2342 } 2343 } 2344 2345 /* 2346 * uvm_map_pageable_all: special case of uvm_map_pageable - affects 2347 * all mapped regions. 2348 * 2349 * Map must not be locked. 2350 * If no flags are specified, all regions are unwired. 2351 */ 2352 int 2353 uvm_map_pageable_all(struct vm_map *map, int flags, vsize_t limit) 2354 { 2355 vsize_t size; 2356 struct vm_map_entry *iter; 2357 2358 KASSERT(map->flags & VM_MAP_PAGEABLE); 2359 vm_map_lock(map); 2360 2361 if (flags == 0) { 2362 uvm_map_pageable_pgon(map, RBT_MIN(uvm_map_addr, &map->addr), 2363 NULL, map->min_offset, map->max_offset); 2364 2365 vm_map_modflags(map, 0, VM_MAP_WIREFUTURE); 2366 vm_map_unlock(map); 2367 return 0; 2368 } 2369 2370 if (flags & MCL_FUTURE) 2371 vm_map_modflags(map, VM_MAP_WIREFUTURE, 0); 2372 if (!(flags & MCL_CURRENT)) { 2373 vm_map_unlock(map); 2374 return 0; 2375 } 2376 2377 /* 2378 * Count number of pages in all non-wired entries. 2379 * If the number exceeds the limit, abort. 2380 */ 2381 size = 0; 2382 RBT_FOREACH(iter, uvm_map_addr, &map->addr) { 2383 if (VM_MAPENT_ISWIRED(iter) || UVM_ET_ISHOLE(iter)) 2384 continue; 2385 2386 size += iter->end - iter->start; 2387 } 2388 2389 if (atop(size) + uvmexp.wired > uvmexp.wiredmax) { 2390 vm_map_unlock(map); 2391 return ENOMEM; 2392 } 2393 2394 /* XXX non-pmap_wired_count case must be handled by caller */ 2395 #ifdef pmap_wired_count 2396 if (limit != 0 && 2397 size + ptoa(pmap_wired_count(vm_map_pmap(map))) > limit) { 2398 vm_map_unlock(map); 2399 return ENOMEM; 2400 } 2401 #endif 2402 2403 /* 2404 * uvm_map_pageable_wire will release lock 2405 */ 2406 return uvm_map_pageable_wire(map, RBT_MIN(uvm_map_addr, &map->addr), 2407 NULL, map->min_offset, map->max_offset, 0); 2408 } 2409 2410 /* 2411 * Initialize map. 2412 * 2413 * Allocates sufficient entries to describe the free memory in the map. 2414 */ 2415 void 2416 uvm_map_setup(struct vm_map *map, pmap_t pmap, vaddr_t min, vaddr_t max, 2417 int flags) 2418 { 2419 int i; 2420 2421 KASSERT((min & (vaddr_t)PAGE_MASK) == 0); 2422 KASSERT((max & (vaddr_t)PAGE_MASK) == 0 || 2423 (max & (vaddr_t)PAGE_MASK) == (vaddr_t)PAGE_MASK); 2424 2425 /* 2426 * Update parameters. 2427 * 2428 * This code handles (vaddr_t)-1 and other page mask ending addresses 2429 * properly. 2430 * We lose the top page if the full virtual address space is used. 2431 */ 2432 if (max & (vaddr_t)PAGE_MASK) { 2433 max += 1; 2434 if (max == 0) /* overflow */ 2435 max -= PAGE_SIZE; 2436 } 2437 2438 RBT_INIT(uvm_map_addr, &map->addr); 2439 map->uaddr_exe = NULL; 2440 for (i = 0; i < nitems(map->uaddr_any); ++i) 2441 map->uaddr_any[i] = NULL; 2442 map->uaddr_brk_stack = NULL; 2443 2444 map->pmap = pmap; 2445 map->size = 0; 2446 map->ref_count = 0; 2447 map->min_offset = min; 2448 map->max_offset = max; 2449 map->b_start = map->b_end = 0; /* Empty brk() area by default. */ 2450 map->s_start = map->s_end = 0; /* Empty stack area by default. */ 2451 map->flags = flags; 2452 map->timestamp = 0; 2453 map->busy = NULL; 2454 if (flags & VM_MAP_ISVMSPACE) 2455 rw_init_flags(&map->lock, "vmmaplk", RWL_DUPOK); 2456 else 2457 rw_init(&map->lock, "kmmaplk"); 2458 mtx_init(&map->mtx, IPL_VM); 2459 mtx_init(&map->flags_lock, IPL_VM); 2460 2461 /* Configure the allocators. */ 2462 if (flags & VM_MAP_ISVMSPACE) 2463 uvm_map_setup_md(map); 2464 else 2465 map->uaddr_any[3] = &uaddr_kbootstrap; 2466 2467 /* 2468 * Fill map entries. 2469 * We do not need to write-lock the map here because only the current 2470 * thread sees it right now. Initialize ref_count to 0 above to avoid 2471 * bogus triggering of lock-not-held assertions. 2472 */ 2473 uvm_map_setup_entries(map); 2474 uvm_tree_sanity(map, __FILE__, __LINE__); 2475 map->ref_count = 1; 2476 } 2477 2478 /* 2479 * Destroy the map. 2480 * 2481 * This is the inverse operation to uvm_map_setup. 2482 */ 2483 void 2484 uvm_map_teardown(struct vm_map *map) 2485 { 2486 struct uvm_map_deadq dead_entries; 2487 struct vm_map_entry *entry, *tmp; 2488 #ifdef VMMAP_DEBUG 2489 size_t numq, numt; 2490 #endif 2491 int i; 2492 2493 KERNEL_ASSERT_LOCKED(); 2494 KERNEL_UNLOCK(); 2495 KERNEL_ASSERT_UNLOCKED(); 2496 2497 KASSERT((map->flags & VM_MAP_INTRSAFE) == 0); 2498 2499 vm_map_lock(map); 2500 2501 /* Remove address selectors. */ 2502 uvm_addr_destroy(map->uaddr_exe); 2503 map->uaddr_exe = NULL; 2504 for (i = 0; i < nitems(map->uaddr_any); i++) { 2505 uvm_addr_destroy(map->uaddr_any[i]); 2506 map->uaddr_any[i] = NULL; 2507 } 2508 uvm_addr_destroy(map->uaddr_brk_stack); 2509 map->uaddr_brk_stack = NULL; 2510 2511 /* 2512 * Remove entries. 2513 * 2514 * The following is based on graph breadth-first search. 2515 * 2516 * In color terms: 2517 * - the dead_entries set contains all nodes that are reachable 2518 * (i.e. both the black and the grey nodes) 2519 * - any entry not in dead_entries is white 2520 * - any entry that appears in dead_entries before entry, 2521 * is black, the rest is grey. 2522 * The set [entry, end] is also referred to as the wavefront. 2523 * 2524 * Since the tree is always a fully connected graph, the breadth-first 2525 * search guarantees that each vmmap_entry is visited exactly once. 2526 * The vm_map is broken down in linear time. 2527 */ 2528 TAILQ_INIT(&dead_entries); 2529 if ((entry = RBT_ROOT(uvm_map_addr, &map->addr)) != NULL) 2530 DEAD_ENTRY_PUSH(&dead_entries, entry); 2531 while (entry != NULL) { 2532 sched_pause(yield); 2533 uvm_unmap_kill_entry(map, entry); 2534 if ((tmp = RBT_LEFT(uvm_map_addr, entry)) != NULL) 2535 DEAD_ENTRY_PUSH(&dead_entries, tmp); 2536 if ((tmp = RBT_RIGHT(uvm_map_addr, entry)) != NULL) 2537 DEAD_ENTRY_PUSH(&dead_entries, tmp); 2538 /* Update wave-front. */ 2539 entry = TAILQ_NEXT(entry, dfree.deadq); 2540 } 2541 2542 vm_map_unlock(map); 2543 2544 #ifdef VMMAP_DEBUG 2545 numt = numq = 0; 2546 RBT_FOREACH(entry, uvm_map_addr, &map->addr) 2547 numt++; 2548 TAILQ_FOREACH(entry, &dead_entries, dfree.deadq) 2549 numq++; 2550 KASSERT(numt == numq); 2551 #endif 2552 uvm_unmap_detach(&dead_entries, UVM_PLA_WAITOK); 2553 2554 KERNEL_LOCK(); 2555 2556 pmap_destroy(map->pmap); 2557 map->pmap = NULL; 2558 } 2559 2560 /* 2561 * Populate map with free-memory entries. 2562 * 2563 * Map must be initialized and empty. 2564 */ 2565 void 2566 uvm_map_setup_entries(struct vm_map *map) 2567 { 2568 KDASSERT(RBT_EMPTY(uvm_map_addr, &map->addr)); 2569 2570 uvm_map_fix_space(map, NULL, map->min_offset, map->max_offset, 0); 2571 } 2572 2573 /* 2574 * Split entry at given address. 2575 * 2576 * orig: entry that is to be split. 2577 * next: a newly allocated map entry that is not linked. 2578 * split: address at which the split is done. 2579 */ 2580 void 2581 uvm_map_splitentry(struct vm_map *map, struct vm_map_entry *orig, 2582 struct vm_map_entry *next, vaddr_t split) 2583 { 2584 struct uvm_addr_state *free, *free_before; 2585 vsize_t adj; 2586 2587 if ((split & PAGE_MASK) != 0) { 2588 panic("uvm_map_splitentry: split address 0x%lx " 2589 "not on page boundary!", split); 2590 } 2591 KDASSERT(map != NULL && orig != NULL && next != NULL); 2592 uvm_tree_sanity(map, __FILE__, __LINE__); 2593 KASSERT(orig->start < split && VMMAP_FREE_END(orig) > split); 2594 2595 #ifdef VMMAP_DEBUG 2596 KDASSERT(RBT_FIND(uvm_map_addr, &map->addr, orig) == orig); 2597 KDASSERT(RBT_FIND(uvm_map_addr, &map->addr, next) != next); 2598 #endif /* VMMAP_DEBUG */ 2599 2600 /* 2601 * Free space will change, unlink from free space tree. 2602 */ 2603 free = uvm_map_uaddr_e(map, orig); 2604 uvm_mapent_free_remove(map, free, orig); 2605 2606 adj = split - orig->start; 2607 2608 uvm_mapent_copy(orig, next); 2609 if (split >= orig->end) { 2610 next->etype = 0; 2611 next->offset = 0; 2612 next->wired_count = 0; 2613 next->start = next->end = split; 2614 next->guard = 0; 2615 next->fspace = VMMAP_FREE_END(orig) - split; 2616 next->aref.ar_amap = NULL; 2617 next->aref.ar_pageoff = 0; 2618 orig->guard = MIN(orig->guard, split - orig->end); 2619 orig->fspace = split - VMMAP_FREE_START(orig); 2620 } else { 2621 orig->fspace = 0; 2622 orig->guard = 0; 2623 orig->end = next->start = split; 2624 2625 if (next->aref.ar_amap) { 2626 amap_splitref(&orig->aref, &next->aref, adj); 2627 } 2628 if (UVM_ET_ISSUBMAP(orig)) { 2629 uvm_map_reference(next->object.sub_map); 2630 next->offset += adj; 2631 } else if (UVM_ET_ISOBJ(orig)) { 2632 if (next->object.uvm_obj->pgops && 2633 next->object.uvm_obj->pgops->pgo_reference) { 2634 KERNEL_LOCK(); 2635 next->object.uvm_obj->pgops->pgo_reference( 2636 next->object.uvm_obj); 2637 KERNEL_UNLOCK(); 2638 } 2639 next->offset += adj; 2640 } 2641 } 2642 2643 /* 2644 * Link next into address tree. 2645 * Link orig and next into free-space tree. 2646 * 2647 * Don't insert 'next' into the addr tree until orig has been linked, 2648 * in case the free-list looks at adjacent entries in the addr tree 2649 * for its decisions. 2650 */ 2651 if (orig->fspace > 0) 2652 free_before = free; 2653 else 2654 free_before = uvm_map_uaddr_e(map, orig); 2655 uvm_mapent_free_insert(map, free_before, orig); 2656 uvm_mapent_addr_insert(map, next); 2657 uvm_mapent_free_insert(map, free, next); 2658 2659 uvm_tree_sanity(map, __FILE__, __LINE__); 2660 } 2661 2662 2663 #ifdef VMMAP_DEBUG 2664 2665 void 2666 uvm_tree_assert(struct vm_map *map, int test, char *test_str, 2667 char *file, int line) 2668 { 2669 char* map_special; 2670 2671 if (test) 2672 return; 2673 2674 if (map == kernel_map) 2675 map_special = " (kernel_map)"; 2676 else if (map == kmem_map) 2677 map_special = " (kmem_map)"; 2678 else 2679 map_special = ""; 2680 panic("uvm_tree_sanity %p%s (%s %d): %s", map, map_special, file, 2681 line, test_str); 2682 } 2683 2684 /* 2685 * Check that map is sane. 2686 */ 2687 void 2688 uvm_tree_sanity(struct vm_map *map, char *file, int line) 2689 { 2690 struct vm_map_entry *iter; 2691 vaddr_t addr; 2692 vaddr_t min, max, bound; /* Bounds checker. */ 2693 struct uvm_addr_state *free; 2694 2695 addr = vm_map_min(map); 2696 RBT_FOREACH(iter, uvm_map_addr, &map->addr) { 2697 /* 2698 * Valid start, end. 2699 * Catch overflow for end+fspace. 2700 */ 2701 UVM_ASSERT(map, iter->end >= iter->start, file, line); 2702 UVM_ASSERT(map, VMMAP_FREE_END(iter) >= iter->end, file, line); 2703 2704 /* May not be empty. */ 2705 UVM_ASSERT(map, iter->start < VMMAP_FREE_END(iter), 2706 file, line); 2707 2708 /* Addresses for entry must lie within map boundaries. */ 2709 UVM_ASSERT(map, iter->start >= vm_map_min(map) && 2710 VMMAP_FREE_END(iter) <= vm_map_max(map), file, line); 2711 2712 /* Tree may not have gaps. */ 2713 UVM_ASSERT(map, iter->start == addr, file, line); 2714 addr = VMMAP_FREE_END(iter); 2715 2716 /* 2717 * Free space may not cross boundaries, unless the same 2718 * free list is used on both sides of the border. 2719 */ 2720 min = VMMAP_FREE_START(iter); 2721 max = VMMAP_FREE_END(iter); 2722 2723 while (min < max && 2724 (bound = uvm_map_boundary(map, min, max)) != max) { 2725 UVM_ASSERT(map, 2726 uvm_map_uaddr(map, bound - 1) == 2727 uvm_map_uaddr(map, bound), 2728 file, line); 2729 min = bound; 2730 } 2731 2732 free = uvm_map_uaddr_e(map, iter); 2733 if (free) { 2734 UVM_ASSERT(map, (iter->etype & UVM_ET_FREEMAPPED) != 0, 2735 file, line); 2736 } else { 2737 UVM_ASSERT(map, (iter->etype & UVM_ET_FREEMAPPED) == 0, 2738 file, line); 2739 } 2740 } 2741 UVM_ASSERT(map, addr == vm_map_max(map), file, line); 2742 } 2743 2744 void 2745 uvm_tree_size_chk(struct vm_map *map, char *file, int line) 2746 { 2747 struct vm_map_entry *iter; 2748 vsize_t size; 2749 2750 size = 0; 2751 RBT_FOREACH(iter, uvm_map_addr, &map->addr) { 2752 if (!UVM_ET_ISHOLE(iter)) 2753 size += iter->end - iter->start; 2754 } 2755 2756 if (map->size != size) 2757 printf("map size = 0x%lx, should be 0x%lx\n", map->size, size); 2758 UVM_ASSERT(map, map->size == size, file, line); 2759 2760 vmspace_validate(map); 2761 } 2762 2763 /* 2764 * This function validates the statistics on vmspace. 2765 */ 2766 void 2767 vmspace_validate(struct vm_map *map) 2768 { 2769 struct vmspace *vm; 2770 struct vm_map_entry *iter; 2771 vaddr_t imin, imax; 2772 vaddr_t stack_begin, stack_end; /* Position of stack. */ 2773 vsize_t stack, heap; /* Measured sizes. */ 2774 2775 if (!(map->flags & VM_MAP_ISVMSPACE)) 2776 return; 2777 2778 vm = (struct vmspace *)map; 2779 stack_begin = MIN((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr); 2780 stack_end = MAX((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr); 2781 2782 stack = heap = 0; 2783 RBT_FOREACH(iter, uvm_map_addr, &map->addr) { 2784 imin = imax = iter->start; 2785 2786 if (UVM_ET_ISHOLE(iter) || iter->object.uvm_obj != NULL || 2787 iter->protection != PROT_NONE) 2788 continue; 2789 2790 /* 2791 * Update stack, heap. 2792 * Keep in mind that (theoretically) the entries of 2793 * userspace and stack may be joined. 2794 */ 2795 while (imin != iter->end) { 2796 /* 2797 * Set imax to the first boundary crossed between 2798 * imin and stack addresses. 2799 */ 2800 imax = iter->end; 2801 if (imin < stack_begin && imax > stack_begin) 2802 imax = stack_begin; 2803 else if (imin < stack_end && imax > stack_end) 2804 imax = stack_end; 2805 2806 if (imin >= stack_begin && imin < stack_end) 2807 stack += imax - imin; 2808 else 2809 heap += imax - imin; 2810 imin = imax; 2811 } 2812 } 2813 2814 heap >>= PAGE_SHIFT; 2815 if (heap != vm->vm_dused) { 2816 printf("vmspace stack range: 0x%lx-0x%lx\n", 2817 stack_begin, stack_end); 2818 panic("vmspace_validate: vmspace.vm_dused invalid, " 2819 "expected %ld pgs, got %d pgs in map %p", 2820 heap, vm->vm_dused, 2821 map); 2822 } 2823 } 2824 2825 #endif /* VMMAP_DEBUG */ 2826 2827 /* 2828 * uvm_map_init: init mapping system at boot time. note that we allocate 2829 * and init the static pool of structs vm_map_entry for the kernel here. 2830 */ 2831 void 2832 uvm_map_init(void) 2833 { 2834 static struct vm_map_entry kernel_map_entry[MAX_KMAPENT]; 2835 int lcv; 2836 2837 /* now set up static pool of kernel map entries ... */ 2838 mtx_init(&uvm_kmapent_mtx, IPL_VM); 2839 SLIST_INIT(&uvm.kentry_free); 2840 for (lcv = 0 ; lcv < MAX_KMAPENT ; lcv++) { 2841 SLIST_INSERT_HEAD(&uvm.kentry_free, 2842 &kernel_map_entry[lcv], daddrs.addr_kentry); 2843 } 2844 2845 /* initialize the map-related pools. */ 2846 pool_init(&uvm_vmspace_pool, sizeof(struct vmspace), 0, 2847 IPL_NONE, PR_WAITOK, "vmsppl", NULL); 2848 pool_init(&uvm_map_entry_pool, sizeof(struct vm_map_entry), 0, 2849 IPL_VM, PR_WAITOK, "vmmpepl", NULL); 2850 pool_init(&uvm_map_entry_kmem_pool, sizeof(struct vm_map_entry), 0, 2851 IPL_VM, 0, "vmmpekpl", NULL); 2852 pool_sethiwat(&uvm_map_entry_pool, 8192); 2853 2854 uvm_addr_init(); 2855 } 2856 2857 #if defined(DDB) 2858 2859 /* 2860 * DDB hooks 2861 */ 2862 2863 /* 2864 * uvm_map_printit: actually prints the map 2865 */ 2866 void 2867 uvm_map_printit(struct vm_map *map, boolean_t full, 2868 int (*pr)(const char *, ...)) 2869 { 2870 struct vmspace *vm; 2871 struct vm_map_entry *entry; 2872 struct uvm_addr_state *free; 2873 int in_free, i; 2874 char buf[8]; 2875 2876 (*pr)("MAP %p: [0x%lx->0x%lx]\n", map, map->min_offset,map->max_offset); 2877 (*pr)("\tbrk() allocate range: 0x%lx-0x%lx\n", 2878 map->b_start, map->b_end); 2879 (*pr)("\tstack allocate range: 0x%lx-0x%lx\n", 2880 map->s_start, map->s_end); 2881 (*pr)("\tsz=%u, ref=%d, version=%u, flags=0x%x\n", 2882 map->size, map->ref_count, map->timestamp, 2883 map->flags); 2884 (*pr)("\tpmap=%p(resident=%d)\n", map->pmap, 2885 pmap_resident_count(map->pmap)); 2886 2887 /* struct vmspace handling. */ 2888 if (map->flags & VM_MAP_ISVMSPACE) { 2889 vm = (struct vmspace *)map; 2890 2891 (*pr)("\tvm_refcnt=%d vm_shm=%p vm_rssize=%u vm_swrss=%u\n", 2892 vm->vm_refcnt, vm->vm_shm, vm->vm_rssize, vm->vm_swrss); 2893 (*pr)("\tvm_tsize=%u vm_dsize=%u\n", 2894 vm->vm_tsize, vm->vm_dsize); 2895 (*pr)("\tvm_taddr=%p vm_daddr=%p\n", 2896 vm->vm_taddr, vm->vm_daddr); 2897 (*pr)("\tvm_maxsaddr=%p vm_minsaddr=%p\n", 2898 vm->vm_maxsaddr, vm->vm_minsaddr); 2899 } 2900 2901 if (!full) 2902 goto print_uaddr; 2903 RBT_FOREACH(entry, uvm_map_addr, &map->addr) { 2904 (*pr)(" - %p: 0x%lx->0x%lx: obj=%p/0x%llx, amap=%p/%d\n", 2905 entry, entry->start, entry->end, entry->object.uvm_obj, 2906 (long long)entry->offset, entry->aref.ar_amap, 2907 entry->aref.ar_pageoff); 2908 (*pr)("\tsubmap=%c, cow=%c, nc=%c, stack=%c, " 2909 "prot(max)=%d/%d, inh=%d, " 2910 "wc=%d, adv=%d\n", 2911 (entry->etype & UVM_ET_SUBMAP) ? 'T' : 'F', 2912 (entry->etype & UVM_ET_COPYONWRITE) ? 'T' : 'F', 2913 (entry->etype & UVM_ET_NEEDSCOPY) ? 'T' : 'F', 2914 (entry->etype & UVM_ET_STACK) ? 'T' : 'F', 2915 entry->protection, entry->max_protection, 2916 entry->inheritance, entry->wired_count, entry->advice); 2917 2918 free = uvm_map_uaddr_e(map, entry); 2919 in_free = (free != NULL); 2920 (*pr)("\thole=%c, free=%c, guard=0x%lx, " 2921 "free=0x%lx-0x%lx\n", 2922 (entry->etype & UVM_ET_HOLE) ? 'T' : 'F', 2923 in_free ? 'T' : 'F', 2924 entry->guard, 2925 VMMAP_FREE_START(entry), VMMAP_FREE_END(entry)); 2926 (*pr)("\tfspace_augment=%lu\n", entry->fspace_augment); 2927 (*pr)("\tfreemapped=%c, uaddr=%p\n", 2928 (entry->etype & UVM_ET_FREEMAPPED) ? 'T' : 'F', free); 2929 if (free) { 2930 (*pr)("\t\t(0x%lx-0x%lx %s)\n", 2931 free->uaddr_minaddr, free->uaddr_maxaddr, 2932 free->uaddr_functions->uaddr_name); 2933 } 2934 } 2935 2936 print_uaddr: 2937 uvm_addr_print(map->uaddr_exe, "exe", full, pr); 2938 for (i = 0; i < nitems(map->uaddr_any); i++) { 2939 snprintf(&buf[0], sizeof(buf), "any[%d]", i); 2940 uvm_addr_print(map->uaddr_any[i], &buf[0], full, pr); 2941 } 2942 uvm_addr_print(map->uaddr_brk_stack, "brk/stack", full, pr); 2943 } 2944 2945 /* 2946 * uvm_object_printit: actually prints the object 2947 */ 2948 void 2949 uvm_object_printit(struct uvm_object *uobj, boolean_t full, 2950 int (*pr)(const char *, ...)) 2951 { 2952 struct vm_page *pg; 2953 int cnt = 0; 2954 2955 (*pr)("OBJECT %p: pgops=%p, npages=%d, ", 2956 uobj, uobj->pgops, uobj->uo_npages); 2957 if (UVM_OBJ_IS_KERN_OBJECT(uobj)) 2958 (*pr)("refs=<SYSTEM>\n"); 2959 else 2960 (*pr)("refs=%d\n", uobj->uo_refs); 2961 2962 if (!full) { 2963 return; 2964 } 2965 (*pr)(" PAGES <pg,offset>:\n "); 2966 RBT_FOREACH(pg, uvm_objtree, &uobj->memt) { 2967 (*pr)("<%p,0x%llx> ", pg, (long long)pg->offset); 2968 if ((cnt % 3) == 2) { 2969 (*pr)("\n "); 2970 } 2971 cnt++; 2972 } 2973 if ((cnt % 3) != 2) { 2974 (*pr)("\n"); 2975 } 2976 } 2977 2978 /* 2979 * uvm_page_printit: actually print the page 2980 */ 2981 static const char page_flagbits[] = 2982 "\20\1BUSY\2WANTED\3TABLED\4CLEAN\5CLEANCHK\6RELEASED\7FAKE\10RDONLY" 2983 "\11ZERO\12DEV\15PAGER1\21FREE\22INACTIVE\23ACTIVE\25ANON\26AOBJ" 2984 "\27ENCRYPT\31PMAP0\32PMAP1\33PMAP2\34PMAP3\35PMAP4\36PMAP5"; 2985 2986 void 2987 uvm_page_printit(struct vm_page *pg, boolean_t full, 2988 int (*pr)(const char *, ...)) 2989 { 2990 struct vm_page *tpg; 2991 struct uvm_object *uobj; 2992 struct pglist *pgl; 2993 2994 (*pr)("PAGE %p:\n", pg); 2995 (*pr)(" flags=%b, vers=%d, wire_count=%d, pa=0x%llx\n", 2996 pg->pg_flags, page_flagbits, pg->pg_version, pg->wire_count, 2997 (long long)pg->phys_addr); 2998 (*pr)(" uobject=%p, uanon=%p, offset=0x%llx\n", 2999 pg->uobject, pg->uanon, (long long)pg->offset); 3000 #if defined(UVM_PAGE_TRKOWN) 3001 if (pg->pg_flags & PG_BUSY) 3002 (*pr)(" owning thread = %d, tag=%s", 3003 pg->owner, pg->owner_tag); 3004 else 3005 (*pr)(" page not busy, no owner"); 3006 #else 3007 (*pr)(" [page ownership tracking disabled]"); 3008 #endif 3009 (*pr)("\tvm_page_md %p\n", &pg->mdpage); 3010 3011 if (!full) 3012 return; 3013 3014 /* cross-verify object/anon */ 3015 if ((pg->pg_flags & PQ_FREE) == 0) { 3016 if (pg->pg_flags & PQ_ANON) { 3017 if (pg->uanon == NULL || pg->uanon->an_page != pg) 3018 (*pr)(" >>> ANON DOES NOT POINT HERE <<< (%p)\n", 3019 (pg->uanon) ? pg->uanon->an_page : NULL); 3020 else 3021 (*pr)(" anon backpointer is OK\n"); 3022 } else { 3023 uobj = pg->uobject; 3024 if (uobj) { 3025 (*pr)(" checking object list\n"); 3026 RBT_FOREACH(tpg, uvm_objtree, &uobj->memt) { 3027 if (tpg == pg) { 3028 break; 3029 } 3030 } 3031 if (tpg) 3032 (*pr)(" page found on object list\n"); 3033 else 3034 (*pr)(" >>> PAGE NOT FOUND " 3035 "ON OBJECT LIST! <<<\n"); 3036 } 3037 } 3038 } 3039 3040 /* cross-verify page queue */ 3041 if (pg->pg_flags & PQ_FREE) { 3042 if (uvm_pmr_isfree(pg)) 3043 (*pr)(" page found in uvm_pmemrange\n"); 3044 else 3045 (*pr)(" >>> page not found in uvm_pmemrange <<<\n"); 3046 pgl = NULL; 3047 } else if (pg->pg_flags & PQ_INACTIVE) { 3048 pgl = &uvm.page_inactive; 3049 } else if (pg->pg_flags & PQ_ACTIVE) { 3050 pgl = &uvm.page_active; 3051 } else { 3052 pgl = NULL; 3053 } 3054 3055 if (pgl) { 3056 (*pr)(" checking pageq list\n"); 3057 TAILQ_FOREACH(tpg, pgl, pageq) { 3058 if (tpg == pg) { 3059 break; 3060 } 3061 } 3062 if (tpg) 3063 (*pr)(" page found on pageq list\n"); 3064 else 3065 (*pr)(" >>> PAGE NOT FOUND ON PAGEQ LIST! <<<\n"); 3066 } 3067 } 3068 #endif 3069 3070 /* 3071 * uvm_map_protect: change map protection 3072 * 3073 * => set_max means set max_protection. 3074 * => map must be unlocked. 3075 */ 3076 int 3077 uvm_map_protect(struct vm_map *map, vaddr_t start, vaddr_t end, 3078 vm_prot_t new_prot, int etype, boolean_t set_max, boolean_t checkimmutable) 3079 { 3080 struct vm_map_entry *first, *iter; 3081 vm_prot_t old_prot; 3082 vm_prot_t mask; 3083 vsize_t dused; 3084 int error; 3085 3086 KASSERT((etype & ~UVM_ET_STACK) == 0); /* only UVM_ET_STACK allowed */ 3087 3088 if (start > end) 3089 return EINVAL; 3090 start = MAX(start, map->min_offset); 3091 end = MIN(end, map->max_offset); 3092 if (start >= end) 3093 return 0; 3094 3095 dused = 0; 3096 error = 0; 3097 vm_map_lock(map); 3098 3099 /* 3100 * Set up first and last. 3101 * - first will contain first entry at or after start. 3102 */ 3103 first = uvm_map_entrybyaddr(&map->addr, start); 3104 KDASSERT(first != NULL); 3105 if (first->end <= start) 3106 first = RBT_NEXT(uvm_map_addr, first); 3107 3108 /* First, check for protection violations. */ 3109 for (iter = first; iter != NULL && iter->start < end; 3110 iter = RBT_NEXT(uvm_map_addr, iter)) { 3111 /* Treat memory holes as free space. */ 3112 if (iter->start == iter->end || UVM_ET_ISHOLE(iter)) 3113 continue; 3114 3115 if (checkimmutable && (iter->etype & UVM_ET_IMMUTABLE)) { 3116 error = EPERM; 3117 goto out; 3118 } 3119 old_prot = iter->protection; 3120 if (old_prot == PROT_NONE && new_prot != old_prot) { 3121 dused += uvmspace_dused( 3122 map, MAX(start, iter->start), MIN(end, iter->end)); 3123 } 3124 3125 if (UVM_ET_ISSUBMAP(iter)) { 3126 error = EINVAL; 3127 goto out; 3128 } 3129 if ((new_prot & iter->max_protection) != new_prot) { 3130 error = EACCES; 3131 goto out; 3132 } 3133 if (map == kernel_map && 3134 (new_prot & (PROT_WRITE | PROT_EXEC)) == (PROT_WRITE | PROT_EXEC)) 3135 panic("uvm_map_protect: kernel map W^X violation requested"); 3136 } 3137 3138 /* Check limits. */ 3139 if (dused > 0 && (map->flags & VM_MAP_ISVMSPACE)) { 3140 vsize_t limit = lim_cur(RLIMIT_DATA); 3141 dused = ptoa(dused); 3142 if (limit < dused || 3143 limit - dused < ptoa(((struct vmspace *)map)->vm_dused)) { 3144 error = ENOMEM; 3145 goto out; 3146 } 3147 } 3148 3149 /* only apply UVM_ET_STACK on a mapping changing to RW */ 3150 if (etype && new_prot != (PROT_READ|PROT_WRITE)) 3151 etype = 0; 3152 3153 /* Fix protections. */ 3154 for (iter = first; iter != NULL && iter->start < end; 3155 iter = RBT_NEXT(uvm_map_addr, iter)) { 3156 /* Treat memory holes as free space. */ 3157 if (iter->start == iter->end || UVM_ET_ISHOLE(iter)) 3158 continue; 3159 3160 old_prot = iter->protection; 3161 3162 /* 3163 * Skip adapting protection iff old and new protection 3164 * are equal. 3165 */ 3166 if (set_max) { 3167 if (old_prot == (new_prot & old_prot) && 3168 iter->max_protection == new_prot) 3169 continue; 3170 } else { 3171 if (old_prot == new_prot) 3172 continue; 3173 } 3174 3175 UVM_MAP_CLIP_START(map, iter, start); 3176 UVM_MAP_CLIP_END(map, iter, end); 3177 3178 if (set_max) { 3179 iter->max_protection = new_prot; 3180 iter->protection &= new_prot; 3181 } else 3182 iter->protection = new_prot; 3183 iter->etype |= etype; /* potentially add UVM_ET_STACK */ 3184 3185 /* 3186 * update physical map if necessary. worry about copy-on-write 3187 * here -- CHECK THIS XXX 3188 */ 3189 if (iter->protection != old_prot) { 3190 mask = UVM_ET_ISCOPYONWRITE(iter) ? 3191 ~PROT_WRITE : PROT_MASK; 3192 3193 if (map->flags & VM_MAP_ISVMSPACE) { 3194 if (old_prot == PROT_NONE) { 3195 ((struct vmspace *)map)->vm_dused += 3196 uvmspace_dused(map, iter->start, 3197 iter->end); 3198 } 3199 if (iter->protection == PROT_NONE) { 3200 ((struct vmspace *)map)->vm_dused -= 3201 uvmspace_dused(map, iter->start, 3202 iter->end); 3203 } 3204 } 3205 3206 /* update pmap */ 3207 if ((iter->protection & mask) == PROT_NONE && 3208 VM_MAPENT_ISWIRED(iter)) { 3209 /* 3210 * TODO(ariane) this is stupid. wired_count 3211 * is 0 if not wired, otherwise anything 3212 * larger than 0 (incremented once each time 3213 * wire is called). 3214 * Mostly to be able to undo the damage on 3215 * failure. Not the actually be a wired 3216 * refcounter... 3217 * Originally: iter->wired_count--; 3218 * (don't we have to unwire this in the pmap 3219 * as well?) 3220 */ 3221 iter->wired_count = 0; 3222 } 3223 uvm_map_lock_entry(iter); 3224 pmap_protect(map->pmap, iter->start, iter->end, 3225 iter->protection & mask); 3226 uvm_map_unlock_entry(iter); 3227 } 3228 3229 /* 3230 * If the map is configured to lock any future mappings, 3231 * wire this entry now if the old protection was PROT_NONE 3232 * and the new protection is not PROT_NONE. 3233 */ 3234 if ((map->flags & VM_MAP_WIREFUTURE) != 0 && 3235 VM_MAPENT_ISWIRED(iter) == 0 && 3236 old_prot == PROT_NONE && 3237 new_prot != PROT_NONE) { 3238 if (uvm_map_pageable(map, iter->start, iter->end, 3239 FALSE, UVM_LK_ENTER | UVM_LK_EXIT) != 0) { 3240 /* 3241 * If locking the entry fails, remember the 3242 * error if it's the first one. Note we 3243 * still continue setting the protection in 3244 * the map, but it will return the resource 3245 * storage condition regardless. 3246 * 3247 * XXX Ignore what the actual error is, 3248 * XXX just call it a resource shortage 3249 * XXX so that it doesn't get confused 3250 * XXX what uvm_map_protect() itself would 3251 * XXX normally return. 3252 */ 3253 error = ENOMEM; 3254 } 3255 } 3256 } 3257 pmap_update(map->pmap); 3258 3259 out: 3260 if (etype & UVM_ET_STACK) 3261 map->sserial++; 3262 vm_map_unlock(map); 3263 return error; 3264 } 3265 3266 /* 3267 * uvmspace_alloc: allocate a vmspace structure. 3268 * 3269 * - structure includes vm_map and pmap 3270 * - XXX: no locking on this structure 3271 * - refcnt set to 1, rest must be init'd by caller 3272 */ 3273 struct vmspace * 3274 uvmspace_alloc(vaddr_t min, vaddr_t max, boolean_t pageable, 3275 boolean_t remove_holes) 3276 { 3277 struct vmspace *vm; 3278 3279 vm = pool_get(&uvm_vmspace_pool, PR_WAITOK | PR_ZERO); 3280 uvmspace_init(vm, NULL, min, max, pageable, remove_holes); 3281 return (vm); 3282 } 3283 3284 /* 3285 * uvmspace_init: initialize a vmspace structure. 3286 * 3287 * - XXX: no locking on this structure 3288 * - refcnt set to 1, rest must be init'd by caller 3289 */ 3290 void 3291 uvmspace_init(struct vmspace *vm, struct pmap *pmap, vaddr_t min, vaddr_t max, 3292 boolean_t pageable, boolean_t remove_holes) 3293 { 3294 KASSERT(pmap == NULL || pmap == pmap_kernel()); 3295 3296 if (pmap) 3297 pmap_reference(pmap); 3298 else 3299 pmap = pmap_create(); 3300 3301 uvm_map_setup(&vm->vm_map, pmap, min, max, 3302 (pageable ? VM_MAP_PAGEABLE : 0) | VM_MAP_ISVMSPACE); 3303 3304 vm->vm_refcnt = 1; 3305 3306 if (remove_holes) 3307 pmap_remove_holes(vm); 3308 } 3309 3310 /* 3311 * uvmspace_share: share a vmspace between two processes 3312 * 3313 * - used for vfork 3314 */ 3315 3316 struct vmspace * 3317 uvmspace_share(struct process *pr) 3318 { 3319 struct vmspace *vm = pr->ps_vmspace; 3320 3321 uvmspace_addref(vm); 3322 return vm; 3323 } 3324 3325 /* 3326 * uvmspace_exec: the process wants to exec a new program 3327 * 3328 * - XXX: no locking on vmspace 3329 */ 3330 3331 void 3332 uvmspace_exec(struct proc *p, vaddr_t start, vaddr_t end) 3333 { 3334 struct process *pr = p->p_p; 3335 struct vmspace *nvm, *ovm = pr->ps_vmspace; 3336 struct vm_map *map = &ovm->vm_map; 3337 struct uvm_map_deadq dead_entries; 3338 3339 KASSERT((start & (vaddr_t)PAGE_MASK) == 0); 3340 KASSERT((end & (vaddr_t)PAGE_MASK) == 0 || 3341 (end & (vaddr_t)PAGE_MASK) == (vaddr_t)PAGE_MASK); 3342 3343 pmap_unuse_final(p); /* before stack addresses go away */ 3344 TAILQ_INIT(&dead_entries); 3345 3346 /* see if more than one process is using this vmspace... */ 3347 if (ovm->vm_refcnt == 1) { 3348 /* 3349 * If pr is the only process using its vmspace then 3350 * we can safely recycle that vmspace for the program 3351 * that is being exec'd. 3352 */ 3353 3354 #ifdef SYSVSHM 3355 /* 3356 * SYSV SHM semantics require us to kill all segments on an exec 3357 */ 3358 if (ovm->vm_shm) 3359 shmexit(ovm); 3360 #endif 3361 3362 /* 3363 * POSIX 1003.1b -- "lock future mappings" is revoked 3364 * when a process execs another program image. 3365 */ 3366 vm_map_lock(map); 3367 vm_map_modflags(map, 0, VM_MAP_WIREFUTURE | 3368 VM_MAP_PINSYSCALL_ONCE); 3369 3370 /* 3371 * now unmap the old program 3372 * 3373 * Instead of attempting to keep the map valid, we simply 3374 * nuke all entries and ask uvm_map_setup to reinitialize 3375 * the map to the new boundaries. 3376 * 3377 * uvm_unmap_remove will actually nuke all entries for us 3378 * (as in, not replace them with free-memory entries). 3379 */ 3380 uvm_unmap_remove(map, map->min_offset, map->max_offset, 3381 &dead_entries, TRUE, FALSE, FALSE); 3382 3383 KDASSERT(RBT_EMPTY(uvm_map_addr, &map->addr)); 3384 3385 /* Nuke statistics and boundaries. */ 3386 memset(&ovm->vm_startcopy, 0, 3387 (caddr_t) (ovm + 1) - (caddr_t) &ovm->vm_startcopy); 3388 3389 3390 if (end & (vaddr_t)PAGE_MASK) { 3391 end += 1; 3392 if (end == 0) /* overflow */ 3393 end -= PAGE_SIZE; 3394 } 3395 3396 /* Setup new boundaries and populate map with entries. */ 3397 map->min_offset = start; 3398 map->max_offset = end; 3399 uvm_map_setup_entries(map); 3400 vm_map_unlock(map); 3401 3402 /* but keep MMU holes unavailable */ 3403 pmap_remove_holes(ovm); 3404 } else { 3405 /* 3406 * pr's vmspace is being shared, so we can't reuse 3407 * it for pr since it is still being used for others. 3408 * allocate a new vmspace for pr 3409 */ 3410 nvm = uvmspace_alloc(start, end, 3411 (map->flags & VM_MAP_PAGEABLE) ? TRUE : FALSE, TRUE); 3412 3413 /* install new vmspace and drop our ref to the old one. */ 3414 pmap_deactivate(p); 3415 p->p_vmspace = pr->ps_vmspace = nvm; 3416 pmap_activate(p); 3417 3418 uvmspace_free(ovm); 3419 } 3420 #ifdef PMAP_CHECK_COPYIN 3421 p->p_vmspace->vm_map.check_copyin_count = 0; /* disable checks */ 3422 #endif 3423 3424 /* Release dead entries */ 3425 uvm_unmap_detach(&dead_entries, 0); 3426 } 3427 3428 /* 3429 * uvmspace_addref: add a reference to a vmspace. 3430 */ 3431 void 3432 uvmspace_addref(struct vmspace *vm) 3433 { 3434 KERNEL_ASSERT_LOCKED(); 3435 KASSERT(vm->vm_refcnt > 0); 3436 3437 vm->vm_refcnt++; 3438 } 3439 3440 /* 3441 * uvmspace_free: free a vmspace data structure 3442 */ 3443 void 3444 uvmspace_free(struct vmspace *vm) 3445 { 3446 KERNEL_ASSERT_LOCKED(); 3447 3448 if (--vm->vm_refcnt == 0) { 3449 /* 3450 * lock the map, to wait out all other references to it. delete 3451 * all of the mappings and pages they hold, then call the pmap 3452 * module to reclaim anything left. 3453 */ 3454 #ifdef SYSVSHM 3455 /* Get rid of any SYSV shared memory segments. */ 3456 if (vm->vm_shm != NULL) 3457 shmexit(vm); 3458 #endif 3459 3460 uvm_map_teardown(&vm->vm_map); 3461 pool_put(&uvm_vmspace_pool, vm); 3462 } 3463 } 3464 3465 /* 3466 * uvm_share: Map the address range [srcaddr, srcaddr + sz) in 3467 * srcmap to the address range [dstaddr, dstaddr + sz) in 3468 * dstmap. 3469 * 3470 * The whole address range in srcmap must be backed by an object 3471 * (no holes). 3472 * 3473 * If successful, the address ranges share memory and the destination 3474 * address range uses the protection flags in prot. 3475 * 3476 * This routine assumes that sz is a multiple of PAGE_SIZE and 3477 * that dstaddr and srcaddr are page-aligned. 3478 */ 3479 int 3480 uvm_share(struct vm_map *dstmap, vaddr_t dstaddr, vm_prot_t prot, 3481 struct vm_map *srcmap, vaddr_t srcaddr, vsize_t sz) 3482 { 3483 int ret = 0; 3484 vaddr_t unmap_end; 3485 vaddr_t dstva; 3486 vsize_t s_off, len, n = sz, remain; 3487 struct vm_map_entry *first = NULL, *last = NULL; 3488 struct vm_map_entry *src_entry, *psrc_entry = NULL; 3489 struct uvm_map_deadq dead; 3490 3491 if (srcaddr >= srcmap->max_offset || sz > srcmap->max_offset - srcaddr) 3492 return EINVAL; 3493 3494 TAILQ_INIT(&dead); 3495 vm_map_lock(dstmap); 3496 vm_map_lock_read(srcmap); 3497 3498 if (!uvm_map_isavail(dstmap, NULL, &first, &last, dstaddr, sz)) { 3499 ret = ENOMEM; 3500 goto exit_unlock; 3501 } 3502 if (!uvm_map_lookup_entry(srcmap, srcaddr, &src_entry)) { 3503 ret = EINVAL; 3504 goto exit_unlock; 3505 } 3506 3507 dstva = dstaddr; 3508 unmap_end = dstaddr; 3509 for (; src_entry != NULL; 3510 psrc_entry = src_entry, 3511 src_entry = RBT_NEXT(uvm_map_addr, src_entry)) { 3512 /* hole in address space, bail out */ 3513 if (psrc_entry != NULL && psrc_entry->end != src_entry->start) 3514 break; 3515 if (src_entry->start >= srcaddr + sz) 3516 break; 3517 3518 if (UVM_ET_ISSUBMAP(src_entry)) 3519 panic("uvm_share: encountered a submap (illegal)"); 3520 if (!UVM_ET_ISCOPYONWRITE(src_entry) && 3521 UVM_ET_ISNEEDSCOPY(src_entry)) 3522 panic("uvm_share: non-copy_on_write map entries " 3523 "marked needs_copy (illegal)"); 3524 3525 /* 3526 * srcaddr > map entry start? means we are in the middle of a 3527 * map, so we calculate the offset to use in the source map. 3528 */ 3529 if (srcaddr > src_entry->start) 3530 s_off = srcaddr - src_entry->start; 3531 else if (srcaddr == src_entry->start) 3532 s_off = 0; 3533 else 3534 panic("uvm_share: map entry start > srcaddr"); 3535 3536 remain = src_entry->end - src_entry->start - s_off; 3537 3538 /* Determine how many bytes to share in this pass */ 3539 if (n < remain) 3540 len = n; 3541 else 3542 len = remain; 3543 3544 if (uvm_mapent_share(dstmap, dstva, len, s_off, prot, prot, 3545 srcmap, src_entry, &dead) == NULL) 3546 break; 3547 3548 n -= len; 3549 dstva += len; 3550 srcaddr += len; 3551 unmap_end = dstva + len; 3552 if (n == 0) 3553 goto exit_unlock; 3554 } 3555 3556 ret = EINVAL; 3557 uvm_unmap_remove(dstmap, dstaddr, unmap_end, &dead, FALSE, TRUE, FALSE); 3558 3559 exit_unlock: 3560 vm_map_unlock_read(srcmap); 3561 vm_map_unlock(dstmap); 3562 uvm_unmap_detach(&dead, 0); 3563 3564 return ret; 3565 } 3566 3567 /* 3568 * Clone map entry into other map. 3569 * 3570 * Mapping will be placed at dstaddr, for the same length. 3571 * Space must be available. 3572 * Reference counters are incremented. 3573 */ 3574 struct vm_map_entry * 3575 uvm_mapent_clone(struct vm_map *dstmap, vaddr_t dstaddr, vsize_t dstlen, 3576 vsize_t off, vm_prot_t prot, vm_prot_t maxprot, 3577 struct vm_map_entry *old_entry, struct uvm_map_deadq *dead, 3578 int mapent_flags, int amap_share_flags) 3579 { 3580 struct vm_map_entry *new_entry, *first, *last; 3581 3582 KDASSERT(!UVM_ET_ISSUBMAP(old_entry)); 3583 3584 /* Create new entry (linked in on creation). Fill in first, last. */ 3585 first = last = NULL; 3586 if (!uvm_map_isavail(dstmap, NULL, &first, &last, dstaddr, dstlen)) { 3587 panic("uvm_mapent_clone: no space in map for " 3588 "entry in empty map"); 3589 } 3590 new_entry = uvm_map_mkentry(dstmap, first, last, 3591 dstaddr, dstlen, mapent_flags, dead, NULL); 3592 if (new_entry == NULL) 3593 return NULL; 3594 /* old_entry -> new_entry */ 3595 new_entry->object = old_entry->object; 3596 new_entry->offset = old_entry->offset; 3597 new_entry->aref = old_entry->aref; 3598 new_entry->etype |= old_entry->etype & ~UVM_ET_FREEMAPPED; 3599 new_entry->protection = prot; 3600 new_entry->max_protection = maxprot; 3601 new_entry->inheritance = old_entry->inheritance; 3602 new_entry->advice = old_entry->advice; 3603 3604 /* gain reference to object backing the map (can't be a submap). */ 3605 if (new_entry->aref.ar_amap) { 3606 new_entry->aref.ar_pageoff += off >> PAGE_SHIFT; 3607 amap_ref(new_entry->aref.ar_amap, new_entry->aref.ar_pageoff, 3608 (new_entry->end - new_entry->start) >> PAGE_SHIFT, 3609 amap_share_flags); 3610 } 3611 3612 if (UVM_ET_ISOBJ(new_entry) && 3613 new_entry->object.uvm_obj->pgops->pgo_reference) { 3614 new_entry->offset += off; 3615 new_entry->object.uvm_obj->pgops->pgo_reference 3616 (new_entry->object.uvm_obj); 3617 } 3618 3619 return new_entry; 3620 } 3621 3622 struct vm_map_entry * 3623 uvm_mapent_share(struct vm_map *dstmap, vaddr_t dstaddr, vsize_t dstlen, 3624 vsize_t off, vm_prot_t prot, vm_prot_t maxprot, struct vm_map *old_map, 3625 struct vm_map_entry *old_entry, struct uvm_map_deadq *dead) 3626 { 3627 /* 3628 * If old_entry refers to a copy-on-write region that has not yet been 3629 * written to (needs_copy flag is set), then we need to allocate a new 3630 * amap for old_entry. 3631 * 3632 * If we do not do this, and the process owning old_entry does a copy-on 3633 * write later, old_entry and new_entry will refer to different memory 3634 * regions, and the memory between the processes is no longer shared. 3635 * 3636 * [in other words, we need to clear needs_copy] 3637 */ 3638 3639 if (UVM_ET_ISNEEDSCOPY(old_entry)) { 3640 /* get our own amap, clears needs_copy */ 3641 amap_copy(old_map, old_entry, M_WAITOK, FALSE, 0, 0); 3642 /* XXXCDC: WAITOK??? */ 3643 } 3644 3645 return uvm_mapent_clone(dstmap, dstaddr, dstlen, off, 3646 prot, maxprot, old_entry, dead, 0, AMAP_SHARED); 3647 } 3648 3649 /* 3650 * share the mapping: this means we want the old and 3651 * new entries to share amaps and backing objects. 3652 */ 3653 struct vm_map_entry * 3654 uvm_mapent_forkshared(struct vmspace *new_vm, struct vm_map *new_map, 3655 struct vm_map *old_map, 3656 struct vm_map_entry *old_entry, struct uvm_map_deadq *dead) 3657 { 3658 struct vm_map_entry *new_entry; 3659 3660 new_entry = uvm_mapent_share(new_map, old_entry->start, 3661 old_entry->end - old_entry->start, 0, old_entry->protection, 3662 old_entry->max_protection, old_map, old_entry, dead); 3663 3664 return (new_entry); 3665 } 3666 3667 /* 3668 * copy-on-write the mapping (using mmap's 3669 * MAP_PRIVATE semantics) 3670 * 3671 * allocate new_entry, adjust reference counts. 3672 * (note that new references are read-only). 3673 */ 3674 struct vm_map_entry * 3675 uvm_mapent_forkcopy(struct vmspace *new_vm, struct vm_map *new_map, 3676 struct vm_map *old_map, 3677 struct vm_map_entry *old_entry, struct uvm_map_deadq *dead) 3678 { 3679 struct vm_map_entry *new_entry; 3680 boolean_t protect_child; 3681 3682 new_entry = uvm_mapent_clone(new_map, old_entry->start, 3683 old_entry->end - old_entry->start, 0, old_entry->protection, 3684 old_entry->max_protection, old_entry, dead, 0, 0); 3685 3686 new_entry->etype |= 3687 (UVM_ET_COPYONWRITE|UVM_ET_NEEDSCOPY); 3688 3689 /* 3690 * the new entry will need an amap. it will either 3691 * need to be copied from the old entry or created 3692 * from scratch (if the old entry does not have an 3693 * amap). can we defer this process until later 3694 * (by setting "needs_copy") or do we need to copy 3695 * the amap now? 3696 * 3697 * we must copy the amap now if any of the following 3698 * conditions hold: 3699 * 1. the old entry has an amap and that amap is 3700 * being shared. this means that the old (parent) 3701 * process is sharing the amap with another 3702 * process. if we do not clear needs_copy here 3703 * we will end up in a situation where both the 3704 * parent and child process are referring to the 3705 * same amap with "needs_copy" set. if the 3706 * parent write-faults, the fault routine will 3707 * clear "needs_copy" in the parent by allocating 3708 * a new amap. this is wrong because the 3709 * parent is supposed to be sharing the old amap 3710 * and the new amap will break that. 3711 * 3712 * 2. if the old entry has an amap and a non-zero 3713 * wire count then we are going to have to call 3714 * amap_cow_now to avoid page faults in the 3715 * parent process. since amap_cow_now requires 3716 * "needs_copy" to be clear we might as well 3717 * clear it here as well. 3718 * 3719 */ 3720 if (old_entry->aref.ar_amap != NULL && 3721 ((amap_flags(old_entry->aref.ar_amap) & 3722 AMAP_SHARED) != 0 || 3723 VM_MAPENT_ISWIRED(old_entry))) { 3724 amap_copy(new_map, new_entry, M_WAITOK, FALSE, 3725 0, 0); 3726 /* XXXCDC: M_WAITOK ... ok? */ 3727 } 3728 3729 /* 3730 * if the parent's entry is wired down, then the 3731 * parent process does not want page faults on 3732 * access to that memory. this means that we 3733 * cannot do copy-on-write because we can't write 3734 * protect the old entry. in this case we 3735 * resolve all copy-on-write faults now, using 3736 * amap_cow_now. note that we have already 3737 * allocated any needed amap (above). 3738 */ 3739 if (VM_MAPENT_ISWIRED(old_entry)) { 3740 /* 3741 * resolve all copy-on-write faults now 3742 * (note that there is nothing to do if 3743 * the old mapping does not have an amap). 3744 */ 3745 if (old_entry->aref.ar_amap) 3746 amap_cow_now(new_map, new_entry); 3747 } else { 3748 if (old_entry->aref.ar_amap) { 3749 /* 3750 * setup mappings to trigger copy-on-write faults 3751 * we must write-protect the parent if it has 3752 * an amap and it is not already "needs_copy"... 3753 * if it is already "needs_copy" then the parent 3754 * has already been write-protected by a previous 3755 * fork operation. 3756 * 3757 * if we do not write-protect the parent, then 3758 * we must be sure to write-protect the child. 3759 */ 3760 if (!UVM_ET_ISNEEDSCOPY(old_entry)) { 3761 if (old_entry->max_protection & PROT_WRITE) { 3762 uvm_map_lock_entry(old_entry); 3763 pmap_protect(old_map->pmap, 3764 old_entry->start, 3765 old_entry->end, 3766 old_entry->protection & 3767 ~PROT_WRITE); 3768 uvm_map_unlock_entry(old_entry); 3769 pmap_update(old_map->pmap); 3770 } 3771 old_entry->etype |= UVM_ET_NEEDSCOPY; 3772 } 3773 3774 /* parent must now be write-protected */ 3775 protect_child = FALSE; 3776 } else { 3777 /* 3778 * we only need to protect the child if the 3779 * parent has write access. 3780 */ 3781 if (old_entry->max_protection & PROT_WRITE) 3782 protect_child = TRUE; 3783 else 3784 protect_child = FALSE; 3785 } 3786 3787 /* protect the child's mappings if necessary */ 3788 if (protect_child) { 3789 pmap_protect(new_map->pmap, new_entry->start, 3790 new_entry->end, 3791 new_entry->protection & 3792 ~PROT_WRITE); 3793 } 3794 } 3795 3796 return (new_entry); 3797 } 3798 3799 /* 3800 * zero the mapping: the new entry will be zero initialized 3801 */ 3802 struct vm_map_entry * 3803 uvm_mapent_forkzero(struct vmspace *new_vm, struct vm_map *new_map, 3804 struct vm_map *old_map, 3805 struct vm_map_entry *old_entry, struct uvm_map_deadq *dead) 3806 { 3807 struct vm_map_entry *new_entry; 3808 3809 new_entry = uvm_mapent_clone(new_map, old_entry->start, 3810 old_entry->end - old_entry->start, 0, old_entry->protection, 3811 old_entry->max_protection, old_entry, dead, 0, 0); 3812 3813 new_entry->etype |= 3814 (UVM_ET_COPYONWRITE|UVM_ET_NEEDSCOPY); 3815 3816 if (new_entry->aref.ar_amap) { 3817 amap_unref(new_entry->aref.ar_amap, new_entry->aref.ar_pageoff, 3818 atop(new_entry->end - new_entry->start), 0); 3819 new_entry->aref.ar_amap = NULL; 3820 new_entry->aref.ar_pageoff = 0; 3821 } 3822 3823 if (UVM_ET_ISOBJ(new_entry)) { 3824 if (new_entry->object.uvm_obj->pgops->pgo_detach) 3825 new_entry->object.uvm_obj->pgops->pgo_detach( 3826 new_entry->object.uvm_obj); 3827 new_entry->object.uvm_obj = NULL; 3828 new_entry->etype &= ~UVM_ET_OBJ; 3829 } 3830 3831 return (new_entry); 3832 } 3833 3834 /* 3835 * uvmspace_fork: fork a process' main map 3836 * 3837 * => create a new vmspace for child process from parent. 3838 * => parent's map must not be locked. 3839 */ 3840 struct vmspace * 3841 uvmspace_fork(struct process *pr) 3842 { 3843 struct vmspace *vm1 = pr->ps_vmspace; 3844 struct vmspace *vm2; 3845 struct vm_map *old_map = &vm1->vm_map; 3846 struct vm_map *new_map; 3847 struct vm_map_entry *old_entry, *new_entry; 3848 struct uvm_map_deadq dead; 3849 3850 vm_map_lock(old_map); 3851 3852 vm2 = uvmspace_alloc(old_map->min_offset, old_map->max_offset, 3853 (old_map->flags & VM_MAP_PAGEABLE) ? TRUE : FALSE, FALSE); 3854 memcpy(&vm2->vm_startcopy, &vm1->vm_startcopy, 3855 (caddr_t) (vm1 + 1) - (caddr_t) &vm1->vm_startcopy); 3856 vm2->vm_dused = 0; /* Statistic managed by us. */ 3857 new_map = &vm2->vm_map; 3858 vm_map_lock(new_map); 3859 3860 /* go entry-by-entry */ 3861 TAILQ_INIT(&dead); 3862 RBT_FOREACH(old_entry, uvm_map_addr, &old_map->addr) { 3863 if (old_entry->start == old_entry->end) 3864 continue; 3865 3866 /* first, some sanity checks on the old entry */ 3867 if (UVM_ET_ISSUBMAP(old_entry)) { 3868 panic("fork: encountered a submap during fork " 3869 "(illegal)"); 3870 } 3871 3872 if (!UVM_ET_ISCOPYONWRITE(old_entry) && 3873 UVM_ET_ISNEEDSCOPY(old_entry)) { 3874 panic("fork: non-copy_on_write map entry marked " 3875 "needs_copy (illegal)"); 3876 } 3877 3878 /* Apply inheritance. */ 3879 switch (old_entry->inheritance) { 3880 case MAP_INHERIT_SHARE: 3881 new_entry = uvm_mapent_forkshared(vm2, new_map, 3882 old_map, old_entry, &dead); 3883 break; 3884 case MAP_INHERIT_COPY: 3885 new_entry = uvm_mapent_forkcopy(vm2, new_map, 3886 old_map, old_entry, &dead); 3887 break; 3888 case MAP_INHERIT_ZERO: 3889 new_entry = uvm_mapent_forkzero(vm2, new_map, 3890 old_map, old_entry, &dead); 3891 break; 3892 default: 3893 continue; 3894 } 3895 3896 /* Update process statistics. */ 3897 if (!UVM_ET_ISHOLE(new_entry)) 3898 new_map->size += new_entry->end - new_entry->start; 3899 if (!UVM_ET_ISOBJ(new_entry) && !UVM_ET_ISHOLE(new_entry) && 3900 new_entry->protection != PROT_NONE) { 3901 vm2->vm_dused += uvmspace_dused( 3902 new_map, new_entry->start, new_entry->end); 3903 } 3904 } 3905 new_map->flags |= old_map->flags & VM_MAP_PINSYSCALL_ONCE; 3906 #ifdef PMAP_CHECK_COPYIN 3907 if (PMAP_CHECK_COPYIN) { 3908 memcpy(&new_map->check_copyin, &old_map->check_copyin, 3909 sizeof(new_map->check_copyin)); 3910 membar_producer(); 3911 new_map->check_copyin_count = old_map->check_copyin_count; 3912 } 3913 #endif 3914 3915 vm_map_unlock(old_map); 3916 vm_map_unlock(new_map); 3917 3918 /* 3919 * This can actually happen, if multiple entries described a 3920 * space in which an entry was inherited. 3921 */ 3922 uvm_unmap_detach(&dead, 0); 3923 3924 #ifdef SYSVSHM 3925 if (vm1->vm_shm) 3926 shmfork(vm1, vm2); 3927 #endif 3928 3929 return vm2; 3930 } 3931 3932 /* 3933 * uvm_map_hint: return the beginning of the best area suitable for 3934 * creating a new mapping with "prot" protection. 3935 */ 3936 vaddr_t 3937 uvm_map_hint(struct vmspace *vm, vm_prot_t prot, vaddr_t minaddr, 3938 vaddr_t maxaddr) 3939 { 3940 vaddr_t addr; 3941 vaddr_t spacing; 3942 3943 #ifdef __i386__ 3944 /* 3945 * If executable skip first two pages, otherwise start 3946 * after data + heap region. 3947 */ 3948 if ((prot & PROT_EXEC) != 0 && 3949 (vaddr_t)vm->vm_daddr >= I386_MAX_EXE_ADDR) { 3950 addr = (PAGE_SIZE*2) + 3951 (arc4random() & (I386_MAX_EXE_ADDR / 2 - 1)); 3952 return (round_page(addr)); 3953 } 3954 #endif 3955 3956 #if defined (__LP64__) 3957 spacing = MIN(4UL * 1024 * 1024 * 1024, MAXDSIZ) - 1; 3958 #else 3959 spacing = MIN(1 * 1024 * 1024 * 1024, MAXDSIZ) - 1; 3960 #endif 3961 3962 /* 3963 * Start malloc/mmap after the brk. 3964 */ 3965 addr = (vaddr_t)vm->vm_daddr + BRKSIZ; 3966 addr = MAX(addr, minaddr); 3967 3968 if (addr < maxaddr) { 3969 while (spacing > maxaddr - addr) 3970 spacing >>= 1; 3971 } 3972 addr += arc4random() & spacing; 3973 return (round_page(addr)); 3974 } 3975 3976 /* 3977 * uvm_map_submap: punch down part of a map into a submap 3978 * 3979 * => only the kernel_map is allowed to be submapped 3980 * => the purpose of submapping is to break up the locking granularity 3981 * of a larger map 3982 * => the range specified must have been mapped previously with a uvm_map() 3983 * call [with uobj==NULL] to create a blank map entry in the main map. 3984 * [And it had better still be blank!] 3985 * => maps which contain submaps should never be copied or forked. 3986 * => to remove a submap, use uvm_unmap() on the main map 3987 * and then uvm_map_deallocate() the submap. 3988 * => main map must be unlocked. 3989 * => submap must have been init'd and have a zero reference count. 3990 * [need not be locked as we don't actually reference it] 3991 */ 3992 int 3993 uvm_map_submap(struct vm_map *map, vaddr_t start, vaddr_t end, 3994 struct vm_map *submap) 3995 { 3996 struct vm_map_entry *entry; 3997 int result; 3998 3999 if (start > map->max_offset || end > map->max_offset || 4000 start < map->min_offset || end < map->min_offset) 4001 return EINVAL; 4002 4003 vm_map_lock(map); 4004 4005 if (uvm_map_lookup_entry(map, start, &entry)) { 4006 UVM_MAP_CLIP_START(map, entry, start); 4007 UVM_MAP_CLIP_END(map, entry, end); 4008 } else 4009 entry = NULL; 4010 4011 if (entry != NULL && 4012 entry->start == start && entry->end == end && 4013 entry->object.uvm_obj == NULL && entry->aref.ar_amap == NULL && 4014 !UVM_ET_ISCOPYONWRITE(entry) && !UVM_ET_ISNEEDSCOPY(entry)) { 4015 entry->etype |= UVM_ET_SUBMAP; 4016 entry->object.sub_map = submap; 4017 entry->offset = 0; 4018 uvm_map_reference(submap); 4019 result = 0; 4020 } else 4021 result = EINVAL; 4022 4023 vm_map_unlock(map); 4024 return result; 4025 } 4026 4027 /* 4028 * uvm_map_checkprot: check protection in map 4029 * 4030 * => must allow specific protection in a fully allocated region. 4031 * => map must be read or write locked by caller. 4032 */ 4033 boolean_t 4034 uvm_map_checkprot(struct vm_map *map, vaddr_t start, vaddr_t end, 4035 vm_prot_t protection) 4036 { 4037 struct vm_map_entry *entry; 4038 4039 vm_map_assert_anylock(map); 4040 4041 if (start < map->min_offset || end > map->max_offset || start > end) 4042 return FALSE; 4043 if (start == end) 4044 return TRUE; 4045 4046 /* 4047 * Iterate entries. 4048 */ 4049 for (entry = uvm_map_entrybyaddr(&map->addr, start); 4050 entry != NULL && entry->start < end; 4051 entry = RBT_NEXT(uvm_map_addr, entry)) { 4052 /* Fail if a hole is found. */ 4053 if (UVM_ET_ISHOLE(entry) || 4054 (entry->end < end && entry->end != VMMAP_FREE_END(entry))) 4055 return FALSE; 4056 4057 /* Check protection. */ 4058 if ((entry->protection & protection) != protection) 4059 return FALSE; 4060 } 4061 return TRUE; 4062 } 4063 4064 /* 4065 * uvm_map_create: create map 4066 */ 4067 vm_map_t 4068 uvm_map_create(pmap_t pmap, vaddr_t min, vaddr_t max, int flags) 4069 { 4070 vm_map_t map; 4071 4072 map = malloc(sizeof *map, M_VMMAP, M_WAITOK); 4073 uvm_map_setup(map, pmap, min, max, flags); 4074 return (map); 4075 } 4076 4077 /* 4078 * uvm_map_deallocate: drop reference to a map 4079 * 4080 * => caller must not lock map 4081 * => we will zap map if ref count goes to zero 4082 */ 4083 void 4084 uvm_map_deallocate(vm_map_t map) 4085 { 4086 int c; 4087 struct uvm_map_deadq dead; 4088 4089 c = atomic_dec_int_nv(&map->ref_count); 4090 if (c > 0) { 4091 return; 4092 } 4093 4094 /* 4095 * all references gone. unmap and free. 4096 * 4097 * No lock required: we are only one to access this map. 4098 */ 4099 TAILQ_INIT(&dead); 4100 uvm_tree_sanity(map, __FILE__, __LINE__); 4101 vm_map_lock(map); 4102 uvm_unmap_remove(map, map->min_offset, map->max_offset, &dead, 4103 TRUE, FALSE, FALSE); 4104 vm_map_unlock(map); 4105 pmap_destroy(map->pmap); 4106 KASSERT(RBT_EMPTY(uvm_map_addr, &map->addr)); 4107 free(map, M_VMMAP, sizeof *map); 4108 4109 uvm_unmap_detach(&dead, 0); 4110 } 4111 4112 /* 4113 * uvm_map_inherit: set inheritance code for range of addrs in map. 4114 * 4115 * => map must be unlocked 4116 * => note that the inherit code is used during a "fork". see fork 4117 * code for details. 4118 */ 4119 int 4120 uvm_map_inherit(struct vm_map *map, vaddr_t start, vaddr_t end, 4121 vm_inherit_t new_inheritance) 4122 { 4123 struct vm_map_entry *entry, *entry1; 4124 int error = EPERM; 4125 4126 switch (new_inheritance) { 4127 case MAP_INHERIT_NONE: 4128 case MAP_INHERIT_COPY: 4129 case MAP_INHERIT_SHARE: 4130 case MAP_INHERIT_ZERO: 4131 break; 4132 default: 4133 return (EINVAL); 4134 } 4135 4136 if (start > end) 4137 return EINVAL; 4138 start = MAX(start, map->min_offset); 4139 end = MIN(end, map->max_offset); 4140 if (start >= end) 4141 return 0; 4142 4143 vm_map_lock(map); 4144 4145 entry = uvm_map_entrybyaddr(&map->addr, start); 4146 if (entry->end > start) 4147 UVM_MAP_CLIP_START(map, entry, start); 4148 else 4149 entry = RBT_NEXT(uvm_map_addr, entry); 4150 4151 /* First check for illegal operations */ 4152 entry1 = entry; 4153 while (entry1 != NULL && entry1->start < end) { 4154 if (entry1->etype & UVM_ET_IMMUTABLE) 4155 goto out; 4156 if (new_inheritance == MAP_INHERIT_ZERO && 4157 (entry1->protection & PROT_WRITE) == 0) 4158 goto out; 4159 entry1 = RBT_NEXT(uvm_map_addr, entry1); 4160 } 4161 4162 while (entry != NULL && entry->start < end) { 4163 UVM_MAP_CLIP_END(map, entry, end); 4164 entry->inheritance = new_inheritance; 4165 entry = RBT_NEXT(uvm_map_addr, entry); 4166 } 4167 4168 error = 0; 4169 out: 4170 vm_map_unlock(map); 4171 return (error); 4172 } 4173 4174 #ifdef PMAP_CHECK_COPYIN 4175 static void inline 4176 check_copyin_add(struct vm_map *map, vaddr_t start, vaddr_t end) 4177 { 4178 if (PMAP_CHECK_COPYIN == 0 || 4179 map->check_copyin_count >= UVM_MAP_CHECK_COPYIN_MAX) 4180 return; 4181 vm_map_assert_wrlock(map); 4182 map->check_copyin[map->check_copyin_count].start = start; 4183 map->check_copyin[map->check_copyin_count].end = end; 4184 membar_producer(); 4185 map->check_copyin_count++; 4186 } 4187 4188 /* 4189 * uvm_map_check_copyin_add: remember regions which are X-only for copyin(), 4190 * copyinstr(), uiomove(), and others 4191 * 4192 * => map must be unlocked 4193 */ 4194 int 4195 uvm_map_check_copyin_add(struct vm_map *map, vaddr_t start, vaddr_t end) 4196 { 4197 if (start > end) 4198 return EINVAL; 4199 start = MAX(start, map->min_offset); 4200 end = MIN(end, map->max_offset); 4201 if (start >= end) 4202 return 0; 4203 vm_map_lock(map); 4204 check_copyin_add(map, start, end); 4205 vm_map_unlock(map); 4206 return (0); 4207 } 4208 #endif /* PMAP_CHECK_COPYIN */ 4209 4210 /* 4211 * uvm_map_immutable: block mapping/mprotect for range of addrs in map. 4212 * 4213 * => map must be unlocked 4214 */ 4215 int 4216 uvm_map_immutable(struct vm_map *map, vaddr_t start, vaddr_t end, int imut) 4217 { 4218 struct vm_map_entry *entry, *entry1; 4219 int error = EPERM; 4220 4221 if (start > end) 4222 return EINVAL; 4223 start = MAX(start, map->min_offset); 4224 end = MIN(end, map->max_offset); 4225 if (start >= end) 4226 return 0; 4227 4228 vm_map_lock(map); 4229 4230 entry = uvm_map_entrybyaddr(&map->addr, start); 4231 if (entry->end > start) 4232 UVM_MAP_CLIP_START(map, entry, start); 4233 else 4234 entry = RBT_NEXT(uvm_map_addr, entry); 4235 4236 /* First check for illegal operations */ 4237 entry1 = entry; 4238 while (entry1 != NULL && entry1->start < end) { 4239 if (entry1->inheritance == MAP_INHERIT_ZERO) 4240 goto out; 4241 entry1 = RBT_NEXT(uvm_map_addr, entry1); 4242 } 4243 4244 while (entry != NULL && entry->start < end) { 4245 UVM_MAP_CLIP_END(map, entry, end); 4246 if (imut) 4247 entry->etype |= UVM_ET_IMMUTABLE; 4248 else 4249 entry->etype &= ~UVM_ET_IMMUTABLE; 4250 entry = RBT_NEXT(uvm_map_addr, entry); 4251 } 4252 error = 0; 4253 out: 4254 vm_map_unlock(map); 4255 return (0); 4256 } 4257 4258 /* 4259 * uvm_map_advice: set advice code for range of addrs in map. 4260 * 4261 * => map must be unlocked 4262 */ 4263 int 4264 uvm_map_advice(struct vm_map *map, vaddr_t start, vaddr_t end, int new_advice) 4265 { 4266 struct vm_map_entry *entry; 4267 4268 switch (new_advice) { 4269 case MADV_NORMAL: 4270 case MADV_RANDOM: 4271 case MADV_SEQUENTIAL: 4272 break; 4273 default: 4274 return (EINVAL); 4275 } 4276 4277 if (start > end) 4278 return EINVAL; 4279 start = MAX(start, map->min_offset); 4280 end = MIN(end, map->max_offset); 4281 if (start >= end) 4282 return 0; 4283 4284 vm_map_lock(map); 4285 4286 entry = uvm_map_entrybyaddr(&map->addr, start); 4287 if (entry != NULL && entry->end > start) 4288 UVM_MAP_CLIP_START(map, entry, start); 4289 else if (entry!= NULL) 4290 entry = RBT_NEXT(uvm_map_addr, entry); 4291 4292 /* 4293 * XXXJRT: disallow holes? 4294 */ 4295 while (entry != NULL && entry->start < end) { 4296 UVM_MAP_CLIP_END(map, entry, end); 4297 entry->advice = new_advice; 4298 entry = RBT_NEXT(uvm_map_addr, entry); 4299 } 4300 4301 vm_map_unlock(map); 4302 return (0); 4303 } 4304 4305 /* 4306 * uvm_map_extract: extract a mapping from a map and put it somewhere 4307 * in the kernel_map, setting protection to max_prot. 4308 * 4309 * => map should be unlocked (we will write lock it and kernel_map) 4310 * => returns 0 on success, error code otherwise 4311 * => start must be page aligned 4312 * => len must be page sized 4313 * => flags: 4314 * UVM_EXTRACT_FIXPROT: set prot to maxprot as we go 4315 * Mappings are QREF's. 4316 */ 4317 int 4318 uvm_map_extract(struct vm_map *srcmap, vaddr_t start, vsize_t len, 4319 vaddr_t *dstaddrp, int flags) 4320 { 4321 struct uvm_map_deadq dead; 4322 struct vm_map_entry *first, *entry, *newentry, *tmp1, *tmp2; 4323 vaddr_t dstaddr; 4324 vaddr_t end; 4325 vaddr_t cp_start; 4326 vsize_t cp_len, cp_off; 4327 int error; 4328 4329 TAILQ_INIT(&dead); 4330 end = start + len; 4331 4332 /* 4333 * Sanity check on the parameters. 4334 * Also, since the mapping may not contain gaps, error out if the 4335 * mapped area is not in source map. 4336 */ 4337 if ((start & (vaddr_t)PAGE_MASK) != 0 || 4338 (end & (vaddr_t)PAGE_MASK) != 0 || end < start) 4339 return EINVAL; 4340 if (start < srcmap->min_offset || end > srcmap->max_offset) 4341 return EINVAL; 4342 4343 /* Initialize dead entries. Handle len == 0 case. */ 4344 if (len == 0) 4345 return 0; 4346 4347 /* Acquire lock on srcmap. */ 4348 vm_map_lock(srcmap); 4349 4350 /* Lock srcmap, lookup first and last entry in <start,len>. */ 4351 first = uvm_map_entrybyaddr(&srcmap->addr, start); 4352 4353 /* Check that the range is contiguous. */ 4354 for (entry = first; entry != NULL && entry->end < end; 4355 entry = RBT_NEXT(uvm_map_addr, entry)) { 4356 if (VMMAP_FREE_END(entry) != entry->end || 4357 UVM_ET_ISHOLE(entry)) { 4358 error = EINVAL; 4359 goto fail; 4360 } 4361 } 4362 if (entry == NULL || UVM_ET_ISHOLE(entry)) { 4363 error = EINVAL; 4364 goto fail; 4365 } 4366 4367 /* 4368 * Handle need-copy flag. 4369 */ 4370 for (entry = first; entry != NULL && entry->start < end; 4371 entry = RBT_NEXT(uvm_map_addr, entry)) { 4372 if (UVM_ET_ISNEEDSCOPY(entry)) 4373 amap_copy(srcmap, entry, M_NOWAIT, 4374 UVM_ET_ISSTACK(entry) ? FALSE : TRUE, start, end); 4375 if (UVM_ET_ISNEEDSCOPY(entry)) { 4376 /* 4377 * amap_copy failure 4378 */ 4379 error = ENOMEM; 4380 goto fail; 4381 } 4382 } 4383 4384 /* Lock destination map (kernel_map). */ 4385 vm_map_lock(kernel_map); 4386 4387 if (uvm_map_findspace(kernel_map, &tmp1, &tmp2, &dstaddr, len, 4388 MAX(PAGE_SIZE, PMAP_PREFER_ALIGN()), PMAP_PREFER_OFFSET(start), 4389 PROT_NONE, 0) != 0) { 4390 error = ENOMEM; 4391 goto fail2; 4392 } 4393 *dstaddrp = dstaddr; 4394 4395 /* 4396 * We now have srcmap and kernel_map locked. 4397 * dstaddr contains the destination offset in dstmap. 4398 */ 4399 /* step 1: start looping through map entries, performing extraction. */ 4400 for (entry = first; entry != NULL && entry->start < end; 4401 entry = RBT_NEXT(uvm_map_addr, entry)) { 4402 KDASSERT(!UVM_ET_ISNEEDSCOPY(entry)); 4403 if (UVM_ET_ISHOLE(entry)) 4404 continue; 4405 4406 /* Calculate uvm_mapent_clone parameters. */ 4407 cp_start = entry->start; 4408 if (cp_start < start) { 4409 cp_off = start - cp_start; 4410 cp_start = start; 4411 } else 4412 cp_off = 0; 4413 cp_len = MIN(entry->end, end) - cp_start; 4414 4415 newentry = uvm_mapent_clone(kernel_map, 4416 cp_start - start + dstaddr, cp_len, cp_off, 4417 entry->protection, entry->max_protection, 4418 entry, &dead, flags, AMAP_SHARED | AMAP_REFALL); 4419 if (newentry == NULL) { 4420 error = ENOMEM; 4421 goto fail2_unmap; 4422 } 4423 kernel_map->size += cp_len; 4424 4425 /* Figure out the best protection */ 4426 if ((flags & UVM_EXTRACT_FIXPROT) && 4427 newentry->protection != PROT_NONE) 4428 newentry->protection = newentry->max_protection; 4429 newentry->protection &= ~PROT_EXEC; 4430 } 4431 pmap_update(kernel_map->pmap); 4432 4433 error = 0; 4434 4435 /* Unmap copied entries on failure. */ 4436 fail2_unmap: 4437 if (error) { 4438 uvm_unmap_remove(kernel_map, dstaddr, dstaddr + len, &dead, 4439 FALSE, TRUE, FALSE); 4440 } 4441 4442 /* Release maps, release dead entries. */ 4443 fail2: 4444 vm_map_unlock(kernel_map); 4445 4446 fail: 4447 vm_map_unlock(srcmap); 4448 4449 uvm_unmap_detach(&dead, 0); 4450 4451 return error; 4452 } 4453 4454 /* 4455 * uvm_map_clean: clean out a map range 4456 * 4457 * => valid flags: 4458 * if (flags & PGO_CLEANIT): dirty pages are cleaned first 4459 * if (flags & PGO_SYNCIO): dirty pages are written synchronously 4460 * if (flags & PGO_DEACTIVATE): any cached pages are deactivated after clean 4461 * if (flags & PGO_FREE): any cached pages are freed after clean 4462 * => returns an error if any part of the specified range isn't mapped 4463 * => never a need to flush amap layer since the anonymous memory has 4464 * no permanent home, but may deactivate pages there 4465 * => called from sys_msync() and sys_madvise() 4466 * => caller must not have map locked 4467 */ 4468 4469 int 4470 uvm_map_clean(struct vm_map *map, vaddr_t start, vaddr_t end, int flags) 4471 { 4472 struct vm_map_entry *first, *entry; 4473 struct vm_amap *amap; 4474 struct vm_anon *anon; 4475 struct vm_page *pg; 4476 struct uvm_object *uobj; 4477 vaddr_t cp_start, cp_end; 4478 int refs, imut = 0; 4479 int error; 4480 boolean_t rv; 4481 4482 KASSERT((flags & (PGO_FREE|PGO_DEACTIVATE)) != 4483 (PGO_FREE|PGO_DEACTIVATE)); 4484 4485 if (start > end || start < map->min_offset || end > map->max_offset) 4486 return EINVAL; 4487 4488 vm_map_lock(map); 4489 first = uvm_map_entrybyaddr(&map->addr, start); 4490 4491 /* Make a first pass to check for various conditions. */ 4492 for (entry = first; entry != NULL && entry->start < end; 4493 entry = RBT_NEXT(uvm_map_addr, entry)) { 4494 if (entry->etype & UVM_ET_IMMUTABLE) 4495 imut = 1; 4496 if (UVM_ET_ISSUBMAP(entry)) { 4497 vm_map_unlock(map); 4498 return EINVAL; 4499 } 4500 if (UVM_ET_ISSUBMAP(entry) || 4501 UVM_ET_ISHOLE(entry) || 4502 (entry->end < end && 4503 VMMAP_FREE_END(entry) != entry->end)) { 4504 vm_map_unlock(map); 4505 return EFAULT; 4506 } 4507 } 4508 4509 vm_map_busy(map); 4510 vm_map_unlock(map); 4511 error = 0; 4512 for (entry = first; entry != NULL && entry->start < end; 4513 entry = RBT_NEXT(uvm_map_addr, entry)) { 4514 amap = entry->aref.ar_amap; /* top layer */ 4515 if (UVM_ET_ISOBJ(entry)) 4516 uobj = entry->object.uvm_obj; 4517 else 4518 uobj = NULL; 4519 4520 /* 4521 * No amap cleaning necessary if: 4522 * - there's no amap 4523 * - we're not deactivating or freeing pages. 4524 */ 4525 if (amap == NULL || (flags & (PGO_DEACTIVATE|PGO_FREE)) == 0) 4526 goto flush_object; 4527 4528 if (imut) { 4529 vm_map_unbusy(map); 4530 return EPERM; 4531 } 4532 4533 cp_start = MAX(entry->start, start); 4534 cp_end = MIN(entry->end, end); 4535 4536 amap_lock(amap); 4537 for (; cp_start != cp_end; cp_start += PAGE_SIZE) { 4538 anon = amap_lookup(&entry->aref, 4539 cp_start - entry->start); 4540 if (anon == NULL) 4541 continue; 4542 4543 KASSERT(anon->an_lock == amap->am_lock); 4544 pg = anon->an_page; 4545 if (pg == NULL) { 4546 continue; 4547 } 4548 KASSERT(pg->pg_flags & PQ_ANON); 4549 4550 switch (flags & (PGO_CLEANIT|PGO_FREE|PGO_DEACTIVATE)) { 4551 /* 4552 * XXX In these first 3 cases, we always just 4553 * XXX deactivate the page. We may want to 4554 * XXX handle the different cases more 4555 * XXX specifically, in the future. 4556 */ 4557 case PGO_CLEANIT|PGO_FREE: 4558 case PGO_CLEANIT|PGO_DEACTIVATE: 4559 case PGO_DEACTIVATE: 4560 deactivate_it: 4561 /* skip the page if it's wired */ 4562 if (pg->wire_count != 0) 4563 break; 4564 4565 uvm_lock_pageq(); 4566 4567 KASSERT(pg->uanon == anon); 4568 4569 /* zap all mappings for the page. */ 4570 pmap_page_protect(pg, PROT_NONE); 4571 4572 /* ...and deactivate the page. */ 4573 uvm_pagedeactivate(pg); 4574 4575 uvm_unlock_pageq(); 4576 break; 4577 case PGO_FREE: 4578 /* 4579 * If there are multiple references to 4580 * the amap, just deactivate the page. 4581 */ 4582 if (amap_refs(amap) > 1) 4583 goto deactivate_it; 4584 4585 /* XXX skip the page if it's wired */ 4586 if (pg->wire_count != 0) { 4587 break; 4588 } 4589 amap_unadd(&entry->aref, 4590 cp_start - entry->start); 4591 refs = --anon->an_ref; 4592 if (refs == 0) 4593 uvm_anfree(anon); 4594 break; 4595 default: 4596 panic("uvm_map_clean: weird flags"); 4597 } 4598 } 4599 amap_unlock(amap); 4600 4601 flush_object: 4602 cp_start = MAX(entry->start, start); 4603 cp_end = MIN(entry->end, end); 4604 4605 /* 4606 * flush pages if we've got a valid backing object. 4607 * 4608 * Don't PGO_FREE if we don't have write permission 4609 * and don't flush if this is a copy-on-write object 4610 * since we can't know our permissions on it. 4611 */ 4612 if (uobj != NULL && 4613 ((flags & PGO_FREE) == 0 || 4614 ((entry->max_protection & PROT_WRITE) != 0 && 4615 (entry->etype & UVM_ET_COPYONWRITE) == 0))) { 4616 rw_enter(uobj->vmobjlock, RW_WRITE); 4617 rv = uobj->pgops->pgo_flush(uobj, 4618 cp_start - entry->start + entry->offset, 4619 cp_end - entry->start + entry->offset, flags); 4620 rw_exit(uobj->vmobjlock); 4621 4622 if (rv == FALSE) 4623 error = EFAULT; 4624 } 4625 } 4626 4627 vm_map_unbusy(map); 4628 return error; 4629 } 4630 4631 /* 4632 * UVM_MAP_CLIP_END implementation 4633 */ 4634 void 4635 uvm_map_clip_end(struct vm_map *map, struct vm_map_entry *entry, vaddr_t addr) 4636 { 4637 struct vm_map_entry *tmp; 4638 4639 KASSERT(entry->start < addr && VMMAP_FREE_END(entry) > addr); 4640 tmp = uvm_mapent_alloc(map, 0); 4641 4642 /* Invoke splitentry. */ 4643 uvm_map_splitentry(map, entry, tmp, addr); 4644 } 4645 4646 /* 4647 * UVM_MAP_CLIP_START implementation 4648 * 4649 * Clippers are required to not change the pointers to the entry they are 4650 * clipping on. 4651 * Since uvm_map_splitentry turns the original entry into the lowest 4652 * entry (address wise) we do a swap between the new entry and the original 4653 * entry, prior to calling uvm_map_splitentry. 4654 */ 4655 void 4656 uvm_map_clip_start(struct vm_map *map, struct vm_map_entry *entry, vaddr_t addr) 4657 { 4658 struct vm_map_entry *tmp; 4659 struct uvm_addr_state *free; 4660 4661 /* Unlink original. */ 4662 free = uvm_map_uaddr_e(map, entry); 4663 uvm_mapent_free_remove(map, free, entry); 4664 uvm_mapent_addr_remove(map, entry); 4665 4666 /* Copy entry. */ 4667 KASSERT(entry->start < addr && VMMAP_FREE_END(entry) > addr); 4668 tmp = uvm_mapent_alloc(map, 0); 4669 uvm_mapent_copy(entry, tmp); 4670 4671 /* Put new entry in place of original entry. */ 4672 uvm_mapent_addr_insert(map, tmp); 4673 uvm_mapent_free_insert(map, free, tmp); 4674 4675 /* Invoke splitentry. */ 4676 uvm_map_splitentry(map, tmp, entry, addr); 4677 } 4678 4679 /* 4680 * Boundary fixer. 4681 */ 4682 static inline vaddr_t uvm_map_boundfix(vaddr_t, vaddr_t, vaddr_t); 4683 static inline vaddr_t 4684 uvm_map_boundfix(vaddr_t min, vaddr_t max, vaddr_t bound) 4685 { 4686 return (min < bound && max > bound) ? bound : max; 4687 } 4688 4689 /* 4690 * Choose free list based on address at start of free space. 4691 * 4692 * The uvm_addr_state returned contains addr and is the first of: 4693 * - uaddr_exe 4694 * - uaddr_brk_stack 4695 * - uaddr_any 4696 */ 4697 struct uvm_addr_state* 4698 uvm_map_uaddr(struct vm_map *map, vaddr_t addr) 4699 { 4700 struct uvm_addr_state *uaddr; 4701 int i; 4702 4703 /* Special case the first page, to prevent mmap from returning 0. */ 4704 if (addr < VMMAP_MIN_ADDR) 4705 return NULL; 4706 4707 /* Upper bound for kernel maps at uvm_maxkaddr. */ 4708 if ((map->flags & VM_MAP_ISVMSPACE) == 0) { 4709 if (addr >= uvm_maxkaddr) 4710 return NULL; 4711 } 4712 4713 /* Is the address inside the exe-only map? */ 4714 if (map->uaddr_exe != NULL && addr >= map->uaddr_exe->uaddr_minaddr && 4715 addr < map->uaddr_exe->uaddr_maxaddr) 4716 return map->uaddr_exe; 4717 4718 /* Check if the space falls inside brk/stack area. */ 4719 if ((addr >= map->b_start && addr < map->b_end) || 4720 (addr >= map->s_start && addr < map->s_end)) { 4721 if (map->uaddr_brk_stack != NULL && 4722 addr >= map->uaddr_brk_stack->uaddr_minaddr && 4723 addr < map->uaddr_brk_stack->uaddr_maxaddr) { 4724 return map->uaddr_brk_stack; 4725 } else 4726 return NULL; 4727 } 4728 4729 /* 4730 * Check the other selectors. 4731 * 4732 * These selectors are only marked as the owner, if they have insert 4733 * functions. 4734 */ 4735 for (i = 0; i < nitems(map->uaddr_any); i++) { 4736 uaddr = map->uaddr_any[i]; 4737 if (uaddr == NULL) 4738 continue; 4739 if (uaddr->uaddr_functions->uaddr_free_insert == NULL) 4740 continue; 4741 4742 if (addr >= uaddr->uaddr_minaddr && 4743 addr < uaddr->uaddr_maxaddr) 4744 return uaddr; 4745 } 4746 4747 return NULL; 4748 } 4749 4750 /* 4751 * Choose free list based on address at start of free space. 4752 * 4753 * The uvm_addr_state returned contains addr and is the first of: 4754 * - uaddr_exe 4755 * - uaddr_brk_stack 4756 * - uaddr_any 4757 */ 4758 struct uvm_addr_state* 4759 uvm_map_uaddr_e(struct vm_map *map, struct vm_map_entry *entry) 4760 { 4761 return uvm_map_uaddr(map, VMMAP_FREE_START(entry)); 4762 } 4763 4764 /* 4765 * Returns the first free-memory boundary that is crossed by [min-max]. 4766 */ 4767 vsize_t 4768 uvm_map_boundary(struct vm_map *map, vaddr_t min, vaddr_t max) 4769 { 4770 struct uvm_addr_state *uaddr; 4771 int i; 4772 4773 /* Never return first page. */ 4774 max = uvm_map_boundfix(min, max, VMMAP_MIN_ADDR); 4775 4776 /* Treat the maxkaddr special, if the map is a kernel_map. */ 4777 if ((map->flags & VM_MAP_ISVMSPACE) == 0) 4778 max = uvm_map_boundfix(min, max, uvm_maxkaddr); 4779 4780 /* Check for exe-only boundaries. */ 4781 if (map->uaddr_exe != NULL) { 4782 max = uvm_map_boundfix(min, max, map->uaddr_exe->uaddr_minaddr); 4783 max = uvm_map_boundfix(min, max, map->uaddr_exe->uaddr_maxaddr); 4784 } 4785 4786 /* Check for exe-only boundaries. */ 4787 if (map->uaddr_brk_stack != NULL) { 4788 max = uvm_map_boundfix(min, max, 4789 map->uaddr_brk_stack->uaddr_minaddr); 4790 max = uvm_map_boundfix(min, max, 4791 map->uaddr_brk_stack->uaddr_maxaddr); 4792 } 4793 4794 /* Check other boundaries. */ 4795 for (i = 0; i < nitems(map->uaddr_any); i++) { 4796 uaddr = map->uaddr_any[i]; 4797 if (uaddr != NULL) { 4798 max = uvm_map_boundfix(min, max, uaddr->uaddr_minaddr); 4799 max = uvm_map_boundfix(min, max, uaddr->uaddr_maxaddr); 4800 } 4801 } 4802 4803 /* Boundaries at stack and brk() area. */ 4804 max = uvm_map_boundfix(min, max, map->s_start); 4805 max = uvm_map_boundfix(min, max, map->s_end); 4806 max = uvm_map_boundfix(min, max, map->b_start); 4807 max = uvm_map_boundfix(min, max, map->b_end); 4808 4809 return max; 4810 } 4811 4812 /* 4813 * Update map allocation start and end addresses from proc vmspace. 4814 */ 4815 void 4816 uvm_map_vmspace_update(struct vm_map *map, 4817 struct uvm_map_deadq *dead, int flags) 4818 { 4819 struct vmspace *vm; 4820 vaddr_t b_start, b_end, s_start, s_end; 4821 4822 KASSERT(map->flags & VM_MAP_ISVMSPACE); 4823 KASSERT(offsetof(struct vmspace, vm_map) == 0); 4824 4825 /* 4826 * Derive actual allocation boundaries from vmspace. 4827 */ 4828 vm = (struct vmspace *)map; 4829 b_start = (vaddr_t)vm->vm_daddr; 4830 b_end = b_start + BRKSIZ; 4831 s_start = MIN((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr); 4832 s_end = MAX((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr); 4833 #ifdef DIAGNOSTIC 4834 if ((b_start & (vaddr_t)PAGE_MASK) != 0 || 4835 (b_end & (vaddr_t)PAGE_MASK) != 0 || 4836 (s_start & (vaddr_t)PAGE_MASK) != 0 || 4837 (s_end & (vaddr_t)PAGE_MASK) != 0) { 4838 panic("uvm_map_vmspace_update: vmspace %p invalid bounds: " 4839 "b=0x%lx-0x%lx s=0x%lx-0x%lx", 4840 vm, b_start, b_end, s_start, s_end); 4841 } 4842 #endif 4843 4844 if (__predict_true(map->b_start == b_start && map->b_end == b_end && 4845 map->s_start == s_start && map->s_end == s_end)) 4846 return; 4847 4848 uvm_map_freelist_update(map, dead, b_start, b_end, 4849 s_start, s_end, flags); 4850 } 4851 4852 /* 4853 * Grow kernel memory. 4854 * 4855 * This function is only called for kernel maps when an allocation fails. 4856 * 4857 * If the map has a gap that is large enough to accommodate alloc_sz, this 4858 * function will make sure map->free will include it. 4859 */ 4860 void 4861 uvm_map_kmem_grow(struct vm_map *map, struct uvm_map_deadq *dead, 4862 vsize_t alloc_sz, int flags) 4863 { 4864 vsize_t sz; 4865 vaddr_t end; 4866 struct vm_map_entry *entry; 4867 4868 /* Kernel memory only. */ 4869 KASSERT((map->flags & VM_MAP_ISVMSPACE) == 0); 4870 /* Destroy free list. */ 4871 uvm_map_freelist_update_clear(map, dead); 4872 4873 /* Include the guard page in the hard minimum requirement of alloc_sz. */ 4874 if (map->flags & VM_MAP_GUARDPAGES) 4875 alloc_sz += PAGE_SIZE; 4876 4877 /* 4878 * Grow by ALLOCMUL * alloc_sz, but at least VM_MAP_KSIZE_DELTA. 4879 * 4880 * Don't handle the case where the multiplication overflows: 4881 * if that happens, the allocation is probably too big anyway. 4882 */ 4883 sz = MAX(VM_MAP_KSIZE_ALLOCMUL * alloc_sz, VM_MAP_KSIZE_DELTA); 4884 4885 /* 4886 * Walk forward until a gap large enough for alloc_sz shows up. 4887 * 4888 * We assume the kernel map has no boundaries. 4889 * uvm_maxkaddr may be zero. 4890 */ 4891 end = MAX(uvm_maxkaddr, map->min_offset); 4892 entry = uvm_map_entrybyaddr(&map->addr, end); 4893 while (entry && entry->fspace < alloc_sz) 4894 entry = RBT_NEXT(uvm_map_addr, entry); 4895 if (entry) { 4896 end = MAX(VMMAP_FREE_START(entry), end); 4897 end += MIN(sz, map->max_offset - end); 4898 } else 4899 end = map->max_offset; 4900 4901 /* Reserve pmap entries. */ 4902 #ifdef PMAP_GROWKERNEL 4903 uvm_maxkaddr = pmap_growkernel(end); 4904 #else 4905 uvm_maxkaddr = MAX(uvm_maxkaddr, end); 4906 #endif 4907 4908 /* Rebuild free list. */ 4909 uvm_map_freelist_update_refill(map, flags); 4910 } 4911 4912 /* 4913 * Freelist update subfunction: unlink all entries from freelists. 4914 */ 4915 void 4916 uvm_map_freelist_update_clear(struct vm_map *map, struct uvm_map_deadq *dead) 4917 { 4918 struct uvm_addr_state *free; 4919 struct vm_map_entry *entry, *prev, *next; 4920 4921 prev = NULL; 4922 for (entry = RBT_MIN(uvm_map_addr, &map->addr); entry != NULL; 4923 entry = next) { 4924 next = RBT_NEXT(uvm_map_addr, entry); 4925 4926 free = uvm_map_uaddr_e(map, entry); 4927 uvm_mapent_free_remove(map, free, entry); 4928 4929 if (prev != NULL && entry->start == entry->end) { 4930 prev->fspace += VMMAP_FREE_END(entry) - entry->end; 4931 uvm_mapent_addr_remove(map, entry); 4932 DEAD_ENTRY_PUSH(dead, entry); 4933 } else 4934 prev = entry; 4935 } 4936 } 4937 4938 /* 4939 * Freelist update subfunction: refill the freelists with entries. 4940 */ 4941 void 4942 uvm_map_freelist_update_refill(struct vm_map *map, int flags) 4943 { 4944 struct vm_map_entry *entry; 4945 vaddr_t min, max; 4946 4947 RBT_FOREACH(entry, uvm_map_addr, &map->addr) { 4948 min = VMMAP_FREE_START(entry); 4949 max = VMMAP_FREE_END(entry); 4950 entry->fspace = 0; 4951 4952 entry = uvm_map_fix_space(map, entry, min, max, flags); 4953 } 4954 4955 uvm_tree_sanity(map, __FILE__, __LINE__); 4956 } 4957 4958 /* 4959 * Change {a,b}_{start,end} allocation ranges and associated free lists. 4960 */ 4961 void 4962 uvm_map_freelist_update(struct vm_map *map, struct uvm_map_deadq *dead, 4963 vaddr_t b_start, vaddr_t b_end, vaddr_t s_start, vaddr_t s_end, int flags) 4964 { 4965 KDASSERT(b_end >= b_start && s_end >= s_start); 4966 vm_map_assert_wrlock(map); 4967 4968 /* Clear all free lists. */ 4969 uvm_map_freelist_update_clear(map, dead); 4970 4971 /* Apply new bounds. */ 4972 map->b_start = b_start; 4973 map->b_end = b_end; 4974 map->s_start = s_start; 4975 map->s_end = s_end; 4976 4977 /* Refill free lists. */ 4978 uvm_map_freelist_update_refill(map, flags); 4979 } 4980 4981 /* 4982 * Assign a uvm_addr_state to the specified pointer in vm_map. 4983 * 4984 * May sleep. 4985 */ 4986 void 4987 uvm_map_set_uaddr(struct vm_map *map, struct uvm_addr_state **which, 4988 struct uvm_addr_state *newval) 4989 { 4990 struct uvm_map_deadq dead; 4991 4992 /* Pointer which must be in this map. */ 4993 KASSERT(which != NULL); 4994 KASSERT((void*)map <= (void*)(which) && 4995 (void*)(which) < (void*)(map + 1)); 4996 4997 vm_map_lock(map); 4998 TAILQ_INIT(&dead); 4999 uvm_map_freelist_update_clear(map, &dead); 5000 5001 uvm_addr_destroy(*which); 5002 *which = newval; 5003 5004 uvm_map_freelist_update_refill(map, 0); 5005 vm_map_unlock(map); 5006 uvm_unmap_detach(&dead, 0); 5007 } 5008 5009 /* 5010 * Correct space insert. 5011 * 5012 * Entry must not be on any freelist. 5013 */ 5014 struct vm_map_entry* 5015 uvm_map_fix_space(struct vm_map *map, struct vm_map_entry *entry, 5016 vaddr_t min, vaddr_t max, int flags) 5017 { 5018 struct uvm_addr_state *free, *entfree; 5019 vaddr_t lmax; 5020 5021 KASSERT(entry == NULL || (entry->etype & UVM_ET_FREEMAPPED) == 0); 5022 KDASSERT(min <= max); 5023 KDASSERT((entry != NULL && VMMAP_FREE_END(entry) == min) || 5024 min == map->min_offset); 5025 5026 UVM_MAP_REQ_WRITE(map); 5027 5028 /* 5029 * During the function, entfree will always point at the uaddr state 5030 * for entry. 5031 */ 5032 entfree = (entry == NULL ? NULL : 5033 uvm_map_uaddr_e(map, entry)); 5034 5035 while (min != max) { 5036 /* Claim guard page for entry. */ 5037 if ((map->flags & VM_MAP_GUARDPAGES) && entry != NULL && 5038 VMMAP_FREE_END(entry) == entry->end && 5039 entry->start != entry->end) { 5040 if (max - min == 2 * PAGE_SIZE) { 5041 /* 5042 * If the free-space gap is exactly 2 pages, 5043 * we make the guard 2 pages instead of 1. 5044 * Because in a guarded map, an area needs 5045 * at least 2 pages to allocate from: 5046 * one page for the allocation and one for 5047 * the guard. 5048 */ 5049 entry->guard = 2 * PAGE_SIZE; 5050 min = max; 5051 } else { 5052 entry->guard = PAGE_SIZE; 5053 min += PAGE_SIZE; 5054 } 5055 continue; 5056 } 5057 5058 /* 5059 * Handle the case where entry has a 2-page guard, but the 5060 * space after entry is freed. 5061 */ 5062 if (entry != NULL && entry->fspace == 0 && 5063 entry->guard > PAGE_SIZE) { 5064 entry->guard = PAGE_SIZE; 5065 min = VMMAP_FREE_START(entry); 5066 } 5067 5068 lmax = uvm_map_boundary(map, min, max); 5069 free = uvm_map_uaddr(map, min); 5070 5071 /* 5072 * Entries are merged if they point at the same uvm_free(). 5073 * Exception to that rule: if min == uvm_maxkaddr, a new 5074 * entry is started regardless (otherwise the allocators 5075 * will get confused). 5076 */ 5077 if (entry != NULL && free == entfree && 5078 !((map->flags & VM_MAP_ISVMSPACE) == 0 && 5079 min == uvm_maxkaddr)) { 5080 KDASSERT(VMMAP_FREE_END(entry) == min); 5081 entry->fspace += lmax - min; 5082 } else { 5083 /* 5084 * Commit entry to free list: it'll not be added to 5085 * anymore. 5086 * We'll start a new entry and add to that entry 5087 * instead. 5088 */ 5089 if (entry != NULL) 5090 uvm_mapent_free_insert(map, entfree, entry); 5091 5092 /* New entry for new uaddr. */ 5093 entry = uvm_mapent_alloc(map, flags); 5094 KDASSERT(entry != NULL); 5095 entry->end = entry->start = min; 5096 entry->guard = 0; 5097 entry->fspace = lmax - min; 5098 entry->object.uvm_obj = NULL; 5099 entry->offset = 0; 5100 entry->etype = 0; 5101 entry->protection = entry->max_protection = 0; 5102 entry->inheritance = 0; 5103 entry->wired_count = 0; 5104 entry->advice = 0; 5105 entry->aref.ar_pageoff = 0; 5106 entry->aref.ar_amap = NULL; 5107 uvm_mapent_addr_insert(map, entry); 5108 5109 entfree = free; 5110 } 5111 5112 min = lmax; 5113 } 5114 /* Finally put entry on the uaddr state. */ 5115 if (entry != NULL) 5116 uvm_mapent_free_insert(map, entfree, entry); 5117 5118 return entry; 5119 } 5120 5121 /* 5122 * MQuery style of allocation. 5123 * 5124 * This allocator searches forward until sufficient space is found to map 5125 * the given size. 5126 * 5127 * XXX: factor in offset (via pmap_prefer) and protection? 5128 */ 5129 int 5130 uvm_map_mquery(struct vm_map *map, vaddr_t *addr_p, vsize_t sz, voff_t offset, 5131 int flags) 5132 { 5133 struct vm_map_entry *entry, *last; 5134 vaddr_t addr; 5135 vaddr_t tmp, pmap_align, pmap_offset; 5136 int error; 5137 5138 addr = *addr_p; 5139 vm_map_lock_read(map); 5140 5141 /* Configure pmap prefer. */ 5142 if (offset != UVM_UNKNOWN_OFFSET) { 5143 pmap_align = MAX(PAGE_SIZE, PMAP_PREFER_ALIGN()); 5144 pmap_offset = PMAP_PREFER_OFFSET(offset); 5145 } else { 5146 pmap_align = PAGE_SIZE; 5147 pmap_offset = 0; 5148 } 5149 5150 /* Align address to pmap_prefer unless FLAG_FIXED is set. */ 5151 if (!(flags & UVM_FLAG_FIXED) && offset != UVM_UNKNOWN_OFFSET) { 5152 tmp = (addr & ~(pmap_align - 1)) | pmap_offset; 5153 if (tmp < addr) 5154 tmp += pmap_align; 5155 addr = tmp; 5156 } 5157 5158 /* First, check if the requested range is fully available. */ 5159 entry = uvm_map_entrybyaddr(&map->addr, addr); 5160 last = NULL; 5161 if (uvm_map_isavail(map, NULL, &entry, &last, addr, sz)) { 5162 error = 0; 5163 goto out; 5164 } 5165 if (flags & UVM_FLAG_FIXED) { 5166 error = EINVAL; 5167 goto out; 5168 } 5169 5170 error = ENOMEM; /* Default error from here. */ 5171 5172 /* 5173 * At this point, the memory at <addr, sz> is not available. 5174 * The reasons are: 5175 * [1] it's outside the map, 5176 * [2] it starts in used memory (and therefore needs to move 5177 * toward the first free page in entry), 5178 * [3] it starts in free memory but bumps into used memory. 5179 * 5180 * Note that for case [2], the forward moving is handled by the 5181 * for loop below. 5182 */ 5183 if (entry == NULL) { 5184 /* [1] Outside the map. */ 5185 if (addr >= map->max_offset) 5186 goto out; 5187 else 5188 entry = RBT_MIN(uvm_map_addr, &map->addr); 5189 } else if (VMMAP_FREE_START(entry) <= addr) { 5190 /* [3] Bumped into used memory. */ 5191 entry = RBT_NEXT(uvm_map_addr, entry); 5192 } 5193 5194 /* Test if the next entry is sufficient for the allocation. */ 5195 for (; entry != NULL; 5196 entry = RBT_NEXT(uvm_map_addr, entry)) { 5197 if (entry->fspace == 0) 5198 continue; 5199 addr = VMMAP_FREE_START(entry); 5200 5201 restart: /* Restart address checks on address change. */ 5202 tmp = (addr & ~(pmap_align - 1)) | pmap_offset; 5203 if (tmp < addr) 5204 tmp += pmap_align; 5205 addr = tmp; 5206 if (addr >= VMMAP_FREE_END(entry)) 5207 continue; 5208 5209 /* Skip brk() allocation addresses. */ 5210 if (addr + sz > map->b_start && addr < map->b_end) { 5211 if (VMMAP_FREE_END(entry) > map->b_end) { 5212 addr = map->b_end; 5213 goto restart; 5214 } else 5215 continue; 5216 } 5217 /* Skip stack allocation addresses. */ 5218 if (addr + sz > map->s_start && addr < map->s_end) { 5219 if (VMMAP_FREE_END(entry) > map->s_end) { 5220 addr = map->s_end; 5221 goto restart; 5222 } else 5223 continue; 5224 } 5225 5226 last = NULL; 5227 if (uvm_map_isavail(map, NULL, &entry, &last, addr, sz)) { 5228 error = 0; 5229 goto out; 5230 } 5231 } 5232 5233 out: 5234 vm_map_unlock_read(map); 5235 if (error == 0) 5236 *addr_p = addr; 5237 return error; 5238 } 5239 5240 boolean_t 5241 vm_map_lock_try_ln(struct vm_map *map, char *file, int line) 5242 { 5243 boolean_t rv; 5244 5245 if (map->flags & VM_MAP_INTRSAFE) { 5246 rv = mtx_enter_try(&map->mtx); 5247 } else { 5248 mtx_enter(&map->flags_lock); 5249 if ((map->flags & VM_MAP_BUSY) && (map->busy != curproc)) { 5250 mtx_leave(&map->flags_lock); 5251 return (FALSE); 5252 } 5253 mtx_leave(&map->flags_lock); 5254 rv = (rw_enter(&map->lock, RW_WRITE|RW_NOSLEEP) == 0); 5255 /* check if the lock is busy and back out if we won the race */ 5256 if (rv) { 5257 mtx_enter(&map->flags_lock); 5258 if ((map->flags & VM_MAP_BUSY) && 5259 (map->busy != curproc)) { 5260 rw_exit(&map->lock); 5261 rv = FALSE; 5262 } 5263 mtx_leave(&map->flags_lock); 5264 } 5265 } 5266 5267 if (rv) { 5268 map->timestamp++; 5269 LPRINTF(("map lock: %p (at %s %d)\n", map, file, line)); 5270 uvm_tree_sanity(map, file, line); 5271 uvm_tree_size_chk(map, file, line); 5272 } 5273 5274 return (rv); 5275 } 5276 5277 void 5278 vm_map_lock_ln(struct vm_map *map, char *file, int line) 5279 { 5280 if ((map->flags & VM_MAP_INTRSAFE) == 0) { 5281 do { 5282 mtx_enter(&map->flags_lock); 5283 tryagain: 5284 while ((map->flags & VM_MAP_BUSY) && 5285 (map->busy != curproc)) { 5286 map->flags |= VM_MAP_WANTLOCK; 5287 msleep_nsec(&map->flags, &map->flags_lock, 5288 PVM, vmmapbsy, INFSLP); 5289 } 5290 mtx_leave(&map->flags_lock); 5291 } while (rw_enter(&map->lock, RW_WRITE|RW_SLEEPFAIL) != 0); 5292 /* check if the lock is busy and back out if we won the race */ 5293 mtx_enter(&map->flags_lock); 5294 if ((map->flags & VM_MAP_BUSY) && (map->busy != curproc)) { 5295 rw_exit(&map->lock); 5296 goto tryagain; 5297 } 5298 mtx_leave(&map->flags_lock); 5299 } else { 5300 mtx_enter(&map->mtx); 5301 } 5302 5303 if (map->busy != curproc) 5304 map->timestamp++; 5305 LPRINTF(("map lock: %p (at %s %d)\n", map, file, line)); 5306 uvm_tree_sanity(map, file, line); 5307 uvm_tree_size_chk(map, file, line); 5308 } 5309 5310 void 5311 vm_map_lock_read_ln(struct vm_map *map, char *file, int line) 5312 { 5313 if ((map->flags & VM_MAP_INTRSAFE) == 0) 5314 rw_enter_read(&map->lock); 5315 else 5316 mtx_enter(&map->mtx); 5317 LPRINTF(("map lock: %p (at %s %d)\n", map, file, line)); 5318 uvm_tree_sanity(map, file, line); 5319 uvm_tree_size_chk(map, file, line); 5320 } 5321 5322 void 5323 vm_map_unlock_ln(struct vm_map *map, char *file, int line) 5324 { 5325 KASSERT(map->busy == NULL || map->busy == curproc); 5326 uvm_tree_sanity(map, file, line); 5327 uvm_tree_size_chk(map, file, line); 5328 LPRINTF(("map unlock: %p (at %s %d)\n", map, file, line)); 5329 if ((map->flags & VM_MAP_INTRSAFE) == 0) 5330 rw_exit(&map->lock); 5331 else 5332 mtx_leave(&map->mtx); 5333 } 5334 5335 void 5336 vm_map_unlock_read_ln(struct vm_map *map, char *file, int line) 5337 { 5338 /* XXX: RO */ uvm_tree_sanity(map, file, line); 5339 /* XXX: RO */ uvm_tree_size_chk(map, file, line); 5340 LPRINTF(("map unlock: %p (at %s %d)\n", map, file, line)); 5341 if ((map->flags & VM_MAP_INTRSAFE) == 0) 5342 rw_exit_read(&map->lock); 5343 else 5344 mtx_leave(&map->mtx); 5345 } 5346 5347 void 5348 vm_map_busy_ln(struct vm_map *map, char *file, int line) 5349 { 5350 KASSERT((map->flags & VM_MAP_INTRSAFE) == 0); 5351 KASSERT(rw_write_held(&map->lock)); 5352 KASSERT(map->busy == NULL); 5353 5354 mtx_enter(&map->flags_lock); 5355 map->busy = curproc; 5356 map->flags |= VM_MAP_BUSY; 5357 mtx_leave(&map->flags_lock); 5358 } 5359 5360 void 5361 vm_map_unbusy_ln(struct vm_map *map, char *file, int line) 5362 { 5363 int oflags; 5364 5365 KASSERT((map->flags & VM_MAP_INTRSAFE) == 0); 5366 KASSERT(map->busy == curproc); 5367 5368 mtx_enter(&map->flags_lock); 5369 oflags = map->flags; 5370 map->busy = NULL; 5371 map->flags &= ~(VM_MAP_BUSY|VM_MAP_WANTLOCK); 5372 mtx_leave(&map->flags_lock); 5373 if (oflags & VM_MAP_WANTLOCK) 5374 wakeup(&map->flags); 5375 } 5376 5377 void 5378 vm_map_assert_anylock_ln(struct vm_map *map, char *file, int line) 5379 { 5380 LPRINTF(("map assert read or write locked: %p (at %s %d)\n", map, file, line)); 5381 if ((map->flags & VM_MAP_INTRSAFE) == 0) 5382 rw_assert_anylock(&map->lock); 5383 else 5384 MUTEX_ASSERT_LOCKED(&map->mtx); 5385 } 5386 5387 void 5388 vm_map_assert_wrlock_ln(struct vm_map *map, char *file, int line) 5389 { 5390 LPRINTF(("map assert write locked: %p (at %s %d)\n", map, file, line)); 5391 if ((map->flags & VM_MAP_INTRSAFE) == 0) { 5392 splassert(IPL_NONE); 5393 rw_assert_wrlock(&map->lock); 5394 } else 5395 MUTEX_ASSERT_LOCKED(&map->mtx); 5396 } 5397 5398 #ifndef SMALL_KERNEL 5399 int 5400 uvm_map_fill_vmmap(struct vm_map *map, struct kinfo_vmentry *kve, 5401 size_t *lenp) 5402 { 5403 struct vm_map_entry *entry; 5404 vaddr_t start; 5405 int cnt, maxcnt, error = 0; 5406 5407 KASSERT(*lenp > 0); 5408 KASSERT((*lenp % sizeof(*kve)) == 0); 5409 cnt = 0; 5410 maxcnt = *lenp / sizeof(*kve); 5411 KASSERT(maxcnt > 0); 5412 5413 /* 5414 * Return only entries whose address is above the given base 5415 * address. This allows userland to iterate without knowing the 5416 * number of entries beforehand. 5417 */ 5418 start = (vaddr_t)kve[0].kve_start; 5419 5420 vm_map_lock(map); 5421 RBT_FOREACH(entry, uvm_map_addr, &map->addr) { 5422 if (cnt == maxcnt) { 5423 error = ENOMEM; 5424 break; 5425 } 5426 if (start != 0 && entry->start < start) 5427 continue; 5428 kve->kve_start = entry->start; 5429 kve->kve_end = entry->end; 5430 kve->kve_guard = entry->guard; 5431 kve->kve_fspace = entry->fspace; 5432 kve->kve_fspace_augment = entry->fspace_augment; 5433 kve->kve_offset = entry->offset; 5434 kve->kve_wired_count = entry->wired_count; 5435 kve->kve_etype = entry->etype; 5436 kve->kve_protection = entry->protection; 5437 kve->kve_max_protection = entry->max_protection; 5438 kve->kve_advice = entry->advice; 5439 kve->kve_inheritance = entry->inheritance; 5440 kve->kve_flags = entry->flags; 5441 kve++; 5442 cnt++; 5443 } 5444 vm_map_unlock(map); 5445 5446 KASSERT(cnt <= maxcnt); 5447 5448 *lenp = sizeof(*kve) * cnt; 5449 return error; 5450 } 5451 #endif 5452 5453 5454 RBT_GENERATE_AUGMENT(uvm_map_addr, vm_map_entry, daddrs.addr_entry, 5455 uvm_mapentry_addrcmp, uvm_map_addr_augment); 5456 5457 5458 /* 5459 * MD code: vmspace allocator setup. 5460 */ 5461 5462 #ifdef __i386__ 5463 void 5464 uvm_map_setup_md(struct vm_map *map) 5465 { 5466 vaddr_t min, max; 5467 5468 min = map->min_offset; 5469 max = map->max_offset; 5470 5471 /* 5472 * Ensure the selectors will not try to manage page 0; 5473 * it's too special. 5474 */ 5475 if (min < VMMAP_MIN_ADDR) 5476 min = VMMAP_MIN_ADDR; 5477 5478 #if 0 /* Cool stuff, not yet */ 5479 /* Executable code is special. */ 5480 map->uaddr_exe = uaddr_rnd_create(min, I386_MAX_EXE_ADDR); 5481 /* Place normal allocations beyond executable mappings. */ 5482 map->uaddr_any[3] = uaddr_pivot_create(2 * I386_MAX_EXE_ADDR, max); 5483 #else /* Crappy stuff, for now */ 5484 map->uaddr_any[0] = uaddr_rnd_create(min, max); 5485 #endif 5486 5487 #ifndef SMALL_KERNEL 5488 map->uaddr_brk_stack = uaddr_stack_brk_create(min, max); 5489 #endif /* !SMALL_KERNEL */ 5490 } 5491 #elif __LP64__ 5492 void 5493 uvm_map_setup_md(struct vm_map *map) 5494 { 5495 vaddr_t min, max; 5496 5497 min = map->min_offset; 5498 max = map->max_offset; 5499 5500 /* 5501 * Ensure the selectors will not try to manage page 0; 5502 * it's too special. 5503 */ 5504 if (min < VMMAP_MIN_ADDR) 5505 min = VMMAP_MIN_ADDR; 5506 5507 #if 0 /* Cool stuff, not yet */ 5508 map->uaddr_any[3] = uaddr_pivot_create(MAX(min, 0x100000000ULL), max); 5509 #else /* Crappy stuff, for now */ 5510 map->uaddr_any[0] = uaddr_rnd_create(min, max); 5511 #endif 5512 5513 #ifndef SMALL_KERNEL 5514 map->uaddr_brk_stack = uaddr_stack_brk_create(min, max); 5515 #endif /* !SMALL_KERNEL */ 5516 } 5517 #else /* non-i386, 32 bit */ 5518 void 5519 uvm_map_setup_md(struct vm_map *map) 5520 { 5521 vaddr_t min, max; 5522 5523 min = map->min_offset; 5524 max = map->max_offset; 5525 5526 /* 5527 * Ensure the selectors will not try to manage page 0; 5528 * it's too special. 5529 */ 5530 if (min < VMMAP_MIN_ADDR) 5531 min = VMMAP_MIN_ADDR; 5532 5533 #if 0 /* Cool stuff, not yet */ 5534 map->uaddr_any[3] = uaddr_pivot_create(min, max); 5535 #else /* Crappy stuff, for now */ 5536 map->uaddr_any[0] = uaddr_rnd_create(min, max); 5537 #endif 5538 5539 #ifndef SMALL_KERNEL 5540 map->uaddr_brk_stack = uaddr_stack_brk_create(min, max); 5541 #endif /* !SMALL_KERNEL */ 5542 } 5543 #endif 5544