1 /* $OpenBSD: uvm_map.c,v 1.329 2024/06/02 15:31:57 deraadt Exp $ */
2 /* $NetBSD: uvm_map.c,v 1.86 2000/11/27 08:40:03 chs Exp $ */
3
4 /*
5 * Copyright (c) 2011 Ariane van der Steldt <ariane@openbsd.org>
6 *
7 * Permission to use, copy, modify, and distribute this software for any
8 * purpose with or without fee is hereby granted, provided that the above
9 * copyright notice and this permission notice appear in all copies.
10 *
11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18 *
19 *
20 * Copyright (c) 1997 Charles D. Cranor and Washington University.
21 * Copyright (c) 1991, 1993, The Regents of the University of California.
22 *
23 * All rights reserved.
24 *
25 * This code is derived from software contributed to Berkeley by
26 * The Mach Operating System project at Carnegie-Mellon University.
27 *
28 * Redistribution and use in source and binary forms, with or without
29 * modification, are permitted provided that the following conditions
30 * are met:
31 * 1. Redistributions of source code must retain the above copyright
32 * notice, this list of conditions and the following disclaimer.
33 * 2. Redistributions in binary form must reproduce the above copyright
34 * notice, this list of conditions and the following disclaimer in the
35 * documentation and/or other materials provided with the distribution.
36 * 3. Neither the name of the University nor the names of its contributors
37 * may be used to endorse or promote products derived from this software
38 * without specific prior written permission.
39 *
40 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
41 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
42 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
43 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
44 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
45 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
46 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
47 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
48 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
49 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
50 * SUCH DAMAGE.
51 *
52 * @(#)vm_map.c 8.3 (Berkeley) 1/12/94
53 * from: Id: uvm_map.c,v 1.1.2.27 1998/02/07 01:16:54 chs Exp
54 *
55 *
56 * Copyright (c) 1987, 1990 Carnegie-Mellon University.
57 * All rights reserved.
58 *
59 * Permission to use, copy, modify and distribute this software and
60 * its documentation is hereby granted, provided that both the copyright
61 * notice and this permission notice appear in all copies of the
62 * software, derivative works or modified versions, and any portions
63 * thereof, and that both notices appear in supporting documentation.
64 *
65 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
66 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
67 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
68 *
69 * Carnegie Mellon requests users of this software to return to
70 *
71 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
72 * School of Computer Science
73 * Carnegie Mellon University
74 * Pittsburgh PA 15213-3890
75 *
76 * any improvements or extensions that they make and grant Carnegie the
77 * rights to redistribute these changes.
78 */
79
80 /*
81 * uvm_map.c: uvm map operations
82 */
83
84 /* #define DEBUG */
85 /* #define VMMAP_DEBUG */
86
87 #include <sys/param.h>
88 #include <sys/systm.h>
89 #include <sys/acct.h>
90 #include <sys/mman.h>
91 #include <sys/proc.h>
92 #include <sys/malloc.h>
93 #include <sys/pool.h>
94 #include <sys/sysctl.h>
95 #include <sys/signalvar.h>
96 #include <sys/syslog.h>
97 #include <sys/user.h>
98 #include <sys/tracepoint.h>
99
100 #ifdef SYSVSHM
101 #include <sys/shm.h>
102 #endif
103
104 #include <uvm/uvm.h>
105
106 #ifdef DDB
107 #include <uvm/uvm_ddb.h>
108 #endif
109
110 #include <uvm/uvm_addr.h>
111
112
113 vsize_t uvmspace_dused(struct vm_map*, vaddr_t, vaddr_t);
114 int uvm_mapent_isjoinable(struct vm_map*,
115 struct vm_map_entry*, struct vm_map_entry*);
116 struct vm_map_entry *uvm_mapent_merge(struct vm_map*, struct vm_map_entry*,
117 struct vm_map_entry*, struct uvm_map_deadq*);
118 struct vm_map_entry *uvm_mapent_tryjoin(struct vm_map*,
119 struct vm_map_entry*, struct uvm_map_deadq*);
120 struct vm_map_entry *uvm_map_mkentry(struct vm_map*, struct vm_map_entry*,
121 struct vm_map_entry*, vaddr_t, vsize_t, int,
122 struct uvm_map_deadq*, struct vm_map_entry*);
123 struct vm_map_entry *uvm_mapent_alloc(struct vm_map*, int);
124 void uvm_mapent_free(struct vm_map_entry*);
125 void uvm_unmap_kill_entry(struct vm_map*,
126 struct vm_map_entry*);
127 void uvm_unmap_kill_entry_withlock(struct vm_map *,
128 struct vm_map_entry *, int);
129 void uvm_unmap_detach_intrsafe(struct uvm_map_deadq *);
130 void uvm_mapent_mkfree(struct vm_map*,
131 struct vm_map_entry*, struct vm_map_entry**,
132 struct uvm_map_deadq*, boolean_t);
133 void uvm_map_pageable_pgon(struct vm_map*,
134 struct vm_map_entry*, struct vm_map_entry*,
135 vaddr_t, vaddr_t);
136 int uvm_map_pageable_wire(struct vm_map*,
137 struct vm_map_entry*, struct vm_map_entry*,
138 vaddr_t, vaddr_t, int);
139 void uvm_map_setup_entries(struct vm_map*);
140 void uvm_map_setup_md(struct vm_map*);
141 void uvm_map_teardown(struct vm_map*);
142 void uvm_map_vmspace_update(struct vm_map*,
143 struct uvm_map_deadq*, int);
144 void uvm_map_kmem_grow(struct vm_map*,
145 struct uvm_map_deadq*, vsize_t, int);
146 void uvm_map_freelist_update_clear(struct vm_map*,
147 struct uvm_map_deadq*);
148 void uvm_map_freelist_update_refill(struct vm_map *, int);
149 void uvm_map_freelist_update(struct vm_map*,
150 struct uvm_map_deadq*, vaddr_t, vaddr_t,
151 vaddr_t, vaddr_t, int);
152 struct vm_map_entry *uvm_map_fix_space(struct vm_map*, struct vm_map_entry*,
153 vaddr_t, vaddr_t, int);
154 int uvm_map_findspace(struct vm_map*,
155 struct vm_map_entry**, struct vm_map_entry**,
156 vaddr_t*, vsize_t, vaddr_t, vaddr_t, vm_prot_t,
157 vaddr_t);
158 vsize_t uvm_map_addr_augment_get(struct vm_map_entry*);
159 void uvm_map_addr_augment(struct vm_map_entry*);
160
161 int uvm_map_inentry_recheck(u_long, vaddr_t,
162 struct p_inentry *);
163 boolean_t uvm_map_inentry_fix(struct proc *, struct p_inentry *,
164 vaddr_t, int (*)(vm_map_entry_t), u_long);
165 /*
166 * Tree management functions.
167 */
168
169 static inline void uvm_mapent_copy(struct vm_map_entry*,
170 struct vm_map_entry*);
171 static inline int uvm_mapentry_addrcmp(const struct vm_map_entry*,
172 const struct vm_map_entry*);
173 void uvm_mapent_free_insert(struct vm_map*,
174 struct uvm_addr_state*, struct vm_map_entry*);
175 void uvm_mapent_free_remove(struct vm_map*,
176 struct uvm_addr_state*, struct vm_map_entry*);
177 void uvm_mapent_addr_insert(struct vm_map*,
178 struct vm_map_entry*);
179 void uvm_mapent_addr_remove(struct vm_map*,
180 struct vm_map_entry*);
181 void uvm_map_splitentry(struct vm_map*,
182 struct vm_map_entry*, struct vm_map_entry*,
183 vaddr_t);
184 vsize_t uvm_map_boundary(struct vm_map*, vaddr_t, vaddr_t);
185
186 /*
187 * uvm_vmspace_fork helper functions.
188 */
189 struct vm_map_entry *uvm_mapent_clone(struct vm_map*, vaddr_t, vsize_t,
190 vsize_t, vm_prot_t, vm_prot_t,
191 struct vm_map_entry*, struct uvm_map_deadq*, int,
192 int);
193 struct vm_map_entry *uvm_mapent_share(struct vm_map*, vaddr_t, vsize_t,
194 vsize_t, vm_prot_t, vm_prot_t, struct vm_map*,
195 struct vm_map_entry*, struct uvm_map_deadq*);
196 struct vm_map_entry *uvm_mapent_forkshared(struct vmspace*, struct vm_map*,
197 struct vm_map*, struct vm_map_entry*,
198 struct uvm_map_deadq*);
199 struct vm_map_entry *uvm_mapent_forkcopy(struct vmspace*, struct vm_map*,
200 struct vm_map*, struct vm_map_entry*,
201 struct uvm_map_deadq*);
202 struct vm_map_entry *uvm_mapent_forkzero(struct vmspace*, struct vm_map*,
203 struct vm_map*, struct vm_map_entry*,
204 struct uvm_map_deadq*);
205
206 /*
207 * Tree validation.
208 */
209 #ifdef VMMAP_DEBUG
210 void uvm_tree_assert(struct vm_map*, int, char*,
211 char*, int);
212 #define UVM_ASSERT(map, cond, file, line) \
213 uvm_tree_assert((map), (cond), #cond, (file), (line))
214 void uvm_tree_sanity(struct vm_map*, char*, int);
215 void uvm_tree_size_chk(struct vm_map*, char*, int);
216 void vmspace_validate(struct vm_map*);
217 #else
218 #define uvm_tree_sanity(_map, _file, _line) do {} while (0)
219 #define uvm_tree_size_chk(_map, _file, _line) do {} while (0)
220 #define vmspace_validate(_map) do {} while (0)
221 #endif
222
223 /*
224 * The kernel map will initially be VM_MAP_KSIZE_INIT bytes.
225 * Every time that gets cramped, we grow by at least VM_MAP_KSIZE_DELTA bytes.
226 *
227 * We attempt to grow by UVM_MAP_KSIZE_ALLOCMUL times the allocation size
228 * each time.
229 */
230 #define VM_MAP_KSIZE_INIT (512 * (vaddr_t)PAGE_SIZE)
231 #define VM_MAP_KSIZE_DELTA (256 * (vaddr_t)PAGE_SIZE)
232 #define VM_MAP_KSIZE_ALLOCMUL 4
233
234 /* auto-allocate address lower bound */
235 #define VMMAP_MIN_ADDR PAGE_SIZE
236
237
238 #ifdef DEADBEEF0
239 #define UVMMAP_DEADBEEF ((unsigned long)DEADBEEF0)
240 #else
241 #define UVMMAP_DEADBEEF ((unsigned long)0xdeadd0d0)
242 #endif
243
244 #ifdef DEBUG
245 int uvm_map_printlocks = 0;
246
247 #define LPRINTF(_args) \
248 do { \
249 if (uvm_map_printlocks) \
250 printf _args; \
251 } while (0)
252 #else
253 #define LPRINTF(_args) do {} while (0)
254 #endif
255
256 static struct mutex uvm_kmapent_mtx;
257 static struct timeval uvm_kmapent_last_warn_time;
258 static struct timeval uvm_kmapent_warn_rate = { 10, 0 };
259
260 const char vmmapbsy[] = "vmmapbsy";
261
262 /*
263 * pool for vmspace structures.
264 */
265 struct pool uvm_vmspace_pool;
266
267 /*
268 * pool for dynamically-allocated map entries.
269 */
270 struct pool uvm_map_entry_pool;
271 struct pool uvm_map_entry_kmem_pool;
272
273 /*
274 * This global represents the end of the kernel virtual address
275 * space. If we want to exceed this, we must grow the kernel
276 * virtual address space dynamically.
277 *
278 * Note, this variable is locked by kernel_map's lock.
279 */
280 vaddr_t uvm_maxkaddr;
281
282 /*
283 * Locking predicate.
284 */
285 #define UVM_MAP_REQ_WRITE(_map) \
286 do { \
287 if ((_map)->ref_count > 0) { \
288 if (((_map)->flags & VM_MAP_INTRSAFE) == 0) \
289 rw_assert_wrlock(&(_map)->lock); \
290 else \
291 MUTEX_ASSERT_LOCKED(&(_map)->mtx); \
292 } \
293 } while (0)
294
295 #define vm_map_modflags(map, set, clear) \
296 do { \
297 mtx_enter(&(map)->flags_lock); \
298 (map)->flags = ((map)->flags | (set)) & ~(clear); \
299 mtx_leave(&(map)->flags_lock); \
300 } while (0)
301
302
303 /*
304 * Tree describing entries by address.
305 *
306 * Addresses are unique.
307 * Entries with start == end may only exist if they are the first entry
308 * (sorted by address) within a free-memory tree.
309 */
310
311 static inline int
uvm_mapentry_addrcmp(const struct vm_map_entry * e1,const struct vm_map_entry * e2)312 uvm_mapentry_addrcmp(const struct vm_map_entry *e1,
313 const struct vm_map_entry *e2)
314 {
315 return e1->start < e2->start ? -1 : e1->start > e2->start;
316 }
317
318 /*
319 * Copy mapentry.
320 */
321 static inline void
uvm_mapent_copy(struct vm_map_entry * src,struct vm_map_entry * dst)322 uvm_mapent_copy(struct vm_map_entry *src, struct vm_map_entry *dst)
323 {
324 caddr_t csrc, cdst;
325 size_t sz;
326
327 csrc = (caddr_t)src;
328 cdst = (caddr_t)dst;
329 csrc += offsetof(struct vm_map_entry, uvm_map_entry_start_copy);
330 cdst += offsetof(struct vm_map_entry, uvm_map_entry_start_copy);
331
332 sz = offsetof(struct vm_map_entry, uvm_map_entry_stop_copy) -
333 offsetof(struct vm_map_entry, uvm_map_entry_start_copy);
334 memcpy(cdst, csrc, sz);
335 }
336
337 /*
338 * Handle free-list insertion.
339 */
340 void
uvm_mapent_free_insert(struct vm_map * map,struct uvm_addr_state * uaddr,struct vm_map_entry * entry)341 uvm_mapent_free_insert(struct vm_map *map, struct uvm_addr_state *uaddr,
342 struct vm_map_entry *entry)
343 {
344 const struct uvm_addr_functions *fun;
345 #ifdef VMMAP_DEBUG
346 vaddr_t min, max, bound;
347 #endif
348
349 #ifdef VMMAP_DEBUG
350 /*
351 * Boundary check.
352 * Boundaries are folded if they go on the same free list.
353 */
354 min = VMMAP_FREE_START(entry);
355 max = VMMAP_FREE_END(entry);
356
357 while (min < max) {
358 bound = uvm_map_boundary(map, min, max);
359 KASSERT(uvm_map_uaddr(map, min) == uaddr);
360 min = bound;
361 }
362 #endif
363 KDASSERT((entry->fspace & (vaddr_t)PAGE_MASK) == 0);
364 KASSERT((entry->etype & UVM_ET_FREEMAPPED) == 0);
365
366 UVM_MAP_REQ_WRITE(map);
367
368 /* Actual insert: forward to uaddr pointer. */
369 if (uaddr != NULL) {
370 fun = uaddr->uaddr_functions;
371 KDASSERT(fun != NULL);
372 if (fun->uaddr_free_insert != NULL)
373 (*fun->uaddr_free_insert)(map, uaddr, entry);
374 entry->etype |= UVM_ET_FREEMAPPED;
375 }
376
377 /* Update fspace augmentation. */
378 uvm_map_addr_augment(entry);
379 }
380
381 /*
382 * Handle free-list removal.
383 */
384 void
uvm_mapent_free_remove(struct vm_map * map,struct uvm_addr_state * uaddr,struct vm_map_entry * entry)385 uvm_mapent_free_remove(struct vm_map *map, struct uvm_addr_state *uaddr,
386 struct vm_map_entry *entry)
387 {
388 const struct uvm_addr_functions *fun;
389
390 KASSERT((entry->etype & UVM_ET_FREEMAPPED) != 0 || uaddr == NULL);
391 KASSERT(uvm_map_uaddr_e(map, entry) == uaddr);
392 UVM_MAP_REQ_WRITE(map);
393
394 if (uaddr != NULL) {
395 fun = uaddr->uaddr_functions;
396 if (fun->uaddr_free_remove != NULL)
397 (*fun->uaddr_free_remove)(map, uaddr, entry);
398 entry->etype &= ~UVM_ET_FREEMAPPED;
399 }
400 }
401
402 /*
403 * Handle address tree insertion.
404 */
405 void
uvm_mapent_addr_insert(struct vm_map * map,struct vm_map_entry * entry)406 uvm_mapent_addr_insert(struct vm_map *map, struct vm_map_entry *entry)
407 {
408 struct vm_map_entry *res;
409
410 if (!RBT_CHECK(uvm_map_addr, entry, UVMMAP_DEADBEEF))
411 panic("uvm_mapent_addr_insert: entry still in addr list");
412 KDASSERT(entry->start <= entry->end);
413 KDASSERT((entry->start & (vaddr_t)PAGE_MASK) == 0 &&
414 (entry->end & (vaddr_t)PAGE_MASK) == 0);
415
416 TRACEPOINT(uvm, map_insert,
417 entry->start, entry->end, entry->protection, NULL);
418
419 UVM_MAP_REQ_WRITE(map);
420 res = RBT_INSERT(uvm_map_addr, &map->addr, entry);
421 if (res != NULL) {
422 panic("uvm_mapent_addr_insert: map %p entry %p "
423 "(0x%lx-0x%lx G=0x%lx F=0x%lx) insert collision "
424 "with entry %p (0x%lx-0x%lx G=0x%lx F=0x%lx)",
425 map, entry,
426 entry->start, entry->end, entry->guard, entry->fspace,
427 res, res->start, res->end, res->guard, res->fspace);
428 }
429 }
430
431 /*
432 * Handle address tree removal.
433 */
434 void
uvm_mapent_addr_remove(struct vm_map * map,struct vm_map_entry * entry)435 uvm_mapent_addr_remove(struct vm_map *map, struct vm_map_entry *entry)
436 {
437 struct vm_map_entry *res;
438
439 TRACEPOINT(uvm, map_remove,
440 entry->start, entry->end, entry->protection, NULL);
441
442 UVM_MAP_REQ_WRITE(map);
443 res = RBT_REMOVE(uvm_map_addr, &map->addr, entry);
444 if (res != entry)
445 panic("uvm_mapent_addr_remove");
446 RBT_POISON(uvm_map_addr, entry, UVMMAP_DEADBEEF);
447 }
448
449 /*
450 * uvm_map_reference: add reference to a map
451 *
452 * => map need not be locked
453 */
454 void
uvm_map_reference(struct vm_map * map)455 uvm_map_reference(struct vm_map *map)
456 {
457 atomic_inc_int(&map->ref_count);
458 }
459
460 void
uvm_map_lock_entry(struct vm_map_entry * entry)461 uvm_map_lock_entry(struct vm_map_entry *entry)
462 {
463 if (entry->aref.ar_amap != NULL) {
464 amap_lock(entry->aref.ar_amap);
465 }
466 if (UVM_ET_ISOBJ(entry)) {
467 rw_enter(entry->object.uvm_obj->vmobjlock, RW_WRITE);
468 }
469 }
470
471 void
uvm_map_unlock_entry(struct vm_map_entry * entry)472 uvm_map_unlock_entry(struct vm_map_entry *entry)
473 {
474 if (UVM_ET_ISOBJ(entry)) {
475 rw_exit(entry->object.uvm_obj->vmobjlock);
476 }
477 if (entry->aref.ar_amap != NULL) {
478 amap_unlock(entry->aref.ar_amap);
479 }
480 }
481
482 /*
483 * Calculate the dused delta.
484 */
485 vsize_t
uvmspace_dused(struct vm_map * map,vaddr_t min,vaddr_t max)486 uvmspace_dused(struct vm_map *map, vaddr_t min, vaddr_t max)
487 {
488 struct vmspace *vm;
489 vsize_t sz;
490 vaddr_t lmax;
491 vaddr_t stack_begin, stack_end; /* Position of stack. */
492
493 KASSERT(map->flags & VM_MAP_ISVMSPACE);
494 vm_map_assert_anylock(map);
495
496 vm = (struct vmspace *)map;
497 stack_begin = MIN((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr);
498 stack_end = MAX((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr);
499
500 sz = 0;
501 while (min != max) {
502 lmax = max;
503 if (min < stack_begin && lmax > stack_begin)
504 lmax = stack_begin;
505 else if (min < stack_end && lmax > stack_end)
506 lmax = stack_end;
507
508 if (min >= stack_begin && min < stack_end) {
509 /* nothing */
510 } else
511 sz += lmax - min;
512 min = lmax;
513 }
514
515 return sz >> PAGE_SHIFT;
516 }
517
518 /*
519 * Find the entry describing the given address.
520 */
521 struct vm_map_entry*
uvm_map_entrybyaddr(struct uvm_map_addr * atree,vaddr_t addr)522 uvm_map_entrybyaddr(struct uvm_map_addr *atree, vaddr_t addr)
523 {
524 struct vm_map_entry *iter;
525
526 iter = RBT_ROOT(uvm_map_addr, atree);
527 while (iter != NULL) {
528 if (iter->start > addr)
529 iter = RBT_LEFT(uvm_map_addr, iter);
530 else if (VMMAP_FREE_END(iter) <= addr)
531 iter = RBT_RIGHT(uvm_map_addr, iter);
532 else
533 return iter;
534 }
535 return NULL;
536 }
537
538 /*
539 * DEAD_ENTRY_PUSH(struct vm_map_deadq *deadq, struct vm_map_entry *entry)
540 *
541 * Push dead entries into a linked list.
542 * Since the linked list abuses the address tree for storage, the entry
543 * may not be linked in a map.
544 *
545 * *head must be initialized to NULL before the first call to this macro.
546 * uvm_unmap_detach(*head, 0) will remove dead entries.
547 */
548 static inline void
dead_entry_push(struct uvm_map_deadq * deadq,struct vm_map_entry * entry)549 dead_entry_push(struct uvm_map_deadq *deadq, struct vm_map_entry *entry)
550 {
551 TAILQ_INSERT_TAIL(deadq, entry, dfree.deadq);
552 }
553 #define DEAD_ENTRY_PUSH(_headptr, _entry) \
554 dead_entry_push((_headptr), (_entry))
555
556 /*
557 * Test if memory starting at addr with sz bytes is free.
558 *
559 * Fills in *start_ptr and *end_ptr to be the first and last entry describing
560 * the space.
561 * If called with prefilled *start_ptr and *end_ptr, they are to be correct.
562 */
563 int
uvm_map_isavail(struct vm_map * map,struct uvm_addr_state * uaddr,struct vm_map_entry ** start_ptr,struct vm_map_entry ** end_ptr,vaddr_t addr,vsize_t sz)564 uvm_map_isavail(struct vm_map *map, struct uvm_addr_state *uaddr,
565 struct vm_map_entry **start_ptr, struct vm_map_entry **end_ptr,
566 vaddr_t addr, vsize_t sz)
567 {
568 struct uvm_addr_state *free;
569 struct uvm_map_addr *atree;
570 struct vm_map_entry *i, *i_end;
571
572 if (addr + sz < addr)
573 return 0;
574
575 vm_map_assert_anylock(map);
576
577 /*
578 * Kernel memory above uvm_maxkaddr is considered unavailable.
579 */
580 if ((map->flags & VM_MAP_ISVMSPACE) == 0) {
581 if (addr + sz > uvm_maxkaddr)
582 return 0;
583 }
584
585 atree = &map->addr;
586
587 /*
588 * Fill in first, last, so they point at the entries containing the
589 * first and last address of the range.
590 * Note that if they are not NULL, we don't perform the lookup.
591 */
592 KDASSERT(atree != NULL && start_ptr != NULL && end_ptr != NULL);
593 if (*start_ptr == NULL) {
594 *start_ptr = uvm_map_entrybyaddr(atree, addr);
595 if (*start_ptr == NULL)
596 return 0;
597 } else
598 KASSERT(*start_ptr == uvm_map_entrybyaddr(atree, addr));
599 if (*end_ptr == NULL) {
600 if (VMMAP_FREE_END(*start_ptr) >= addr + sz)
601 *end_ptr = *start_ptr;
602 else {
603 *end_ptr = uvm_map_entrybyaddr(atree, addr + sz - 1);
604 if (*end_ptr == NULL)
605 return 0;
606 }
607 } else
608 KASSERT(*end_ptr == uvm_map_entrybyaddr(atree, addr + sz - 1));
609
610 /* Validation. */
611 KDASSERT(*start_ptr != NULL && *end_ptr != NULL);
612 KDASSERT((*start_ptr)->start <= addr &&
613 VMMAP_FREE_END(*start_ptr) > addr &&
614 (*end_ptr)->start < addr + sz &&
615 VMMAP_FREE_END(*end_ptr) >= addr + sz);
616
617 /*
618 * Check the none of the entries intersects with <addr, addr+sz>.
619 * Also, if the entry belong to uaddr_exe or uaddr_brk_stack, it is
620 * considered unavailable unless called by those allocators.
621 */
622 i = *start_ptr;
623 i_end = RBT_NEXT(uvm_map_addr, *end_ptr);
624 for (; i != i_end;
625 i = RBT_NEXT(uvm_map_addr, i)) {
626 if (i->start != i->end && i->end > addr)
627 return 0;
628
629 /*
630 * uaddr_exe and uaddr_brk_stack may only be used
631 * by these allocators and the NULL uaddr (i.e. no
632 * uaddr).
633 * Reject if this requirement is not met.
634 */
635 if (uaddr != NULL) {
636 free = uvm_map_uaddr_e(map, i);
637
638 if (uaddr != free && free != NULL &&
639 (free == map->uaddr_exe ||
640 free == map->uaddr_brk_stack))
641 return 0;
642 }
643 }
644
645 return -1;
646 }
647
648 /*
649 * Invoke each address selector until an address is found.
650 * Will not invoke uaddr_exe.
651 */
652 int
uvm_map_findspace(struct vm_map * map,struct vm_map_entry ** first,struct vm_map_entry ** last,vaddr_t * addr,vsize_t sz,vaddr_t pmap_align,vaddr_t pmap_offset,vm_prot_t prot,vaddr_t hint)653 uvm_map_findspace(struct vm_map *map, struct vm_map_entry**first,
654 struct vm_map_entry**last, vaddr_t *addr, vsize_t sz,
655 vaddr_t pmap_align, vaddr_t pmap_offset, vm_prot_t prot, vaddr_t hint)
656 {
657 struct uvm_addr_state *uaddr;
658 int i;
659
660 /*
661 * Allocation for sz bytes at any address,
662 * using the addr selectors in order.
663 */
664 for (i = 0; i < nitems(map->uaddr_any); i++) {
665 uaddr = map->uaddr_any[i];
666
667 if (uvm_addr_invoke(map, uaddr, first, last,
668 addr, sz, pmap_align, pmap_offset, prot, hint) == 0)
669 return 0;
670 }
671
672 /* Fall back to brk() and stack() address selectors. */
673 uaddr = map->uaddr_brk_stack;
674 if (uvm_addr_invoke(map, uaddr, first, last,
675 addr, sz, pmap_align, pmap_offset, prot, hint) == 0)
676 return 0;
677
678 return ENOMEM;
679 }
680
681 /* Calculate entry augmentation value. */
682 vsize_t
uvm_map_addr_augment_get(struct vm_map_entry * entry)683 uvm_map_addr_augment_get(struct vm_map_entry *entry)
684 {
685 vsize_t augment;
686 struct vm_map_entry *left, *right;
687
688 augment = entry->fspace;
689 if ((left = RBT_LEFT(uvm_map_addr, entry)) != NULL)
690 augment = MAX(augment, left->fspace_augment);
691 if ((right = RBT_RIGHT(uvm_map_addr, entry)) != NULL)
692 augment = MAX(augment, right->fspace_augment);
693 return augment;
694 }
695
696 /*
697 * Update augmentation data in entry.
698 */
699 void
uvm_map_addr_augment(struct vm_map_entry * entry)700 uvm_map_addr_augment(struct vm_map_entry *entry)
701 {
702 vsize_t augment;
703
704 while (entry != NULL) {
705 /* Calculate value for augmentation. */
706 augment = uvm_map_addr_augment_get(entry);
707
708 /*
709 * Descend update.
710 * Once we find an entry that already has the correct value,
711 * stop, since it means all its parents will use the correct
712 * value too.
713 */
714 if (entry->fspace_augment == augment)
715 return;
716 entry->fspace_augment = augment;
717 entry = RBT_PARENT(uvm_map_addr, entry);
718 }
719 }
720
721 /*
722 * uvm_mapanon: establish a valid mapping in map for an anon
723 *
724 * => *addr and sz must be a multiple of PAGE_SIZE.
725 * => *addr is ignored, except if flags contains UVM_FLAG_FIXED.
726 * => map must be unlocked.
727 *
728 * => align: align vaddr, must be a power-of-2.
729 * Align is only a hint and will be ignored if the alignment fails.
730 */
731 int
uvm_mapanon(struct vm_map * map,vaddr_t * addr,vsize_t sz,vsize_t align,unsigned int flags)732 uvm_mapanon(struct vm_map *map, vaddr_t *addr, vsize_t sz,
733 vsize_t align, unsigned int flags)
734 {
735 struct vm_map_entry *first, *last, *entry, *new;
736 struct uvm_map_deadq dead;
737 vm_prot_t prot;
738 vm_prot_t maxprot;
739 vm_inherit_t inherit;
740 int advice;
741 int error;
742 vaddr_t pmap_align, pmap_offset;
743 vaddr_t hint;
744
745 KASSERT((map->flags & VM_MAP_ISVMSPACE) == VM_MAP_ISVMSPACE);
746 KASSERT(map != kernel_map);
747 KASSERT((map->flags & UVM_FLAG_HOLE) == 0);
748 KASSERT((map->flags & VM_MAP_INTRSAFE) == 0);
749 splassert(IPL_NONE);
750 KASSERT((flags & UVM_FLAG_TRYLOCK) == 0);
751
752 /*
753 * We use pmap_align and pmap_offset as alignment and offset variables.
754 *
755 * Because the align parameter takes precedence over pmap prefer,
756 * the pmap_align will need to be set to align, with pmap_offset = 0,
757 * if pmap_prefer will not align.
758 */
759 pmap_align = MAX(align, PAGE_SIZE);
760 pmap_offset = 0;
761
762 /* Decode parameters. */
763 prot = UVM_PROTECTION(flags);
764 maxprot = UVM_MAXPROTECTION(flags);
765 advice = UVM_ADVICE(flags);
766 inherit = UVM_INHERIT(flags);
767 error = 0;
768 hint = trunc_page(*addr);
769 TAILQ_INIT(&dead);
770 KASSERT((sz & (vaddr_t)PAGE_MASK) == 0);
771 KASSERT((align & (align - 1)) == 0);
772
773 /* Check protection. */
774 if ((prot & maxprot) != prot)
775 return EACCES;
776
777 /*
778 * Before grabbing the lock, allocate a map entry for later
779 * use to ensure we don't wait for memory while holding the
780 * vm_map_lock.
781 */
782 new = uvm_mapent_alloc(map, flags);
783 if (new == NULL)
784 return ENOMEM;
785
786 vm_map_lock(map);
787 first = last = NULL;
788 if (flags & UVM_FLAG_FIXED) {
789 /*
790 * Fixed location.
791 *
792 * Note: we ignore align, pmap_prefer.
793 * Fill in first, last and *addr.
794 */
795 KASSERT((*addr & PAGE_MASK) == 0);
796
797 /* Check that the space is available. */
798 if (flags & UVM_FLAG_UNMAP) {
799 if ((flags & UVM_FLAG_STACK) &&
800 !uvm_map_is_stack_remappable(map, *addr, sz,
801 (flags & UVM_FLAG_SIGALTSTACK))) {
802 error = EINVAL;
803 goto unlock;
804 }
805 if (uvm_unmap_remove(map, *addr, *addr + sz, &dead,
806 FALSE, TRUE,
807 (flags & UVM_FLAG_SIGALTSTACK) ? FALSE : TRUE) != 0) {
808 error = EPERM; /* immutable entries found */
809 goto unlock;
810 }
811 }
812 if (!uvm_map_isavail(map, NULL, &first, &last, *addr, sz)) {
813 error = ENOMEM;
814 goto unlock;
815 }
816 } else if (*addr != 0 && (*addr & PAGE_MASK) == 0 &&
817 (align == 0 || (*addr & (align - 1)) == 0) &&
818 uvm_map_isavail(map, NULL, &first, &last, *addr, sz)) {
819 /*
820 * Address used as hint.
821 *
822 * Note: we enforce the alignment restriction,
823 * but ignore pmap_prefer.
824 */
825 } else if ((prot & PROT_EXEC) != 0 && map->uaddr_exe != NULL) {
826 /* Run selection algorithm for executables. */
827 error = uvm_addr_invoke(map, map->uaddr_exe, &first, &last,
828 addr, sz, pmap_align, pmap_offset, prot, hint);
829
830 if (error != 0)
831 goto unlock;
832 } else {
833 /* Update freelists from vmspace. */
834 uvm_map_vmspace_update(map, &dead, flags);
835
836 error = uvm_map_findspace(map, &first, &last, addr, sz,
837 pmap_align, pmap_offset, prot, hint);
838
839 if (error != 0)
840 goto unlock;
841 }
842
843 /* Double-check if selected address doesn't cause overflow. */
844 if (*addr + sz < *addr) {
845 error = ENOMEM;
846 goto unlock;
847 }
848
849 /* If we only want a query, return now. */
850 if (flags & UVM_FLAG_QUERY) {
851 error = 0;
852 goto unlock;
853 }
854
855 /*
856 * Create new entry.
857 * first and last may be invalidated after this call.
858 */
859 entry = uvm_map_mkentry(map, first, last, *addr, sz, flags, &dead,
860 new);
861 if (entry == NULL) {
862 error = ENOMEM;
863 goto unlock;
864 }
865 new = NULL;
866 KDASSERT(entry->start == *addr && entry->end == *addr + sz);
867 entry->object.uvm_obj = NULL;
868 entry->offset = 0;
869 entry->protection = prot;
870 entry->max_protection = maxprot;
871 entry->inheritance = inherit;
872 entry->wired_count = 0;
873 entry->advice = advice;
874 if (flags & UVM_FLAG_STACK) {
875 entry->etype |= UVM_ET_STACK;
876 if (flags & (UVM_FLAG_FIXED | UVM_FLAG_UNMAP))
877 map->sserial++;
878 }
879 if (flags & UVM_FLAG_COPYONW) {
880 entry->etype |= UVM_ET_COPYONWRITE;
881 if ((flags & UVM_FLAG_OVERLAY) == 0)
882 entry->etype |= UVM_ET_NEEDSCOPY;
883 }
884 if (flags & UVM_FLAG_CONCEAL)
885 entry->etype |= UVM_ET_CONCEAL;
886 if (flags & UVM_FLAG_OVERLAY) {
887 entry->aref.ar_pageoff = 0;
888 entry->aref.ar_amap = amap_alloc(sz, M_WAITOK, 0);
889 }
890
891 /* Update map and process statistics. */
892 map->size += sz;
893 if (prot != PROT_NONE) {
894 ((struct vmspace *)map)->vm_dused +=
895 uvmspace_dused(map, *addr, *addr + sz);
896 }
897
898 unlock:
899 vm_map_unlock(map);
900
901 /*
902 * Remove dead entries.
903 *
904 * Dead entries may be the result of merging.
905 * uvm_map_mkentry may also create dead entries, when it attempts to
906 * destroy free-space entries.
907 */
908 uvm_unmap_detach(&dead, 0);
909
910 if (new)
911 uvm_mapent_free(new);
912 return error;
913 }
914
915 /*
916 * uvm_map: establish a valid mapping in map
917 *
918 * => *addr and sz must be a multiple of PAGE_SIZE.
919 * => map must be unlocked.
920 * => <uobj,uoffset> value meanings (4 cases):
921 * [1] <NULL,uoffset> == uoffset is a hint for PMAP_PREFER
922 * [2] <NULL,UVM_UNKNOWN_OFFSET> == don't PMAP_PREFER
923 * [3] <uobj,uoffset> == normal mapping
924 * [4] <uobj,UVM_UNKNOWN_OFFSET> == uvm_map finds offset based on VA
925 *
926 * case [4] is for kernel mappings where we don't know the offset until
927 * we've found a virtual address. note that kernel object offsets are
928 * always relative to vm_map_min(kernel_map).
929 *
930 * => align: align vaddr, must be a power-of-2.
931 * Align is only a hint and will be ignored if the alignment fails.
932 */
933 int
uvm_map(struct vm_map * map,vaddr_t * addr,vsize_t sz,struct uvm_object * uobj,voff_t uoffset,vsize_t align,unsigned int flags)934 uvm_map(struct vm_map *map, vaddr_t *addr, vsize_t sz,
935 struct uvm_object *uobj, voff_t uoffset,
936 vsize_t align, unsigned int flags)
937 {
938 struct vm_map_entry *first, *last, *entry, *new;
939 struct uvm_map_deadq dead;
940 vm_prot_t prot;
941 vm_prot_t maxprot;
942 vm_inherit_t inherit;
943 int advice;
944 int error;
945 vaddr_t pmap_align, pmap_offset;
946 vaddr_t hint;
947
948 if ((map->flags & VM_MAP_INTRSAFE) == 0)
949 splassert(IPL_NONE);
950 else
951 splassert(IPL_VM);
952
953 /*
954 * We use pmap_align and pmap_offset as alignment and offset variables.
955 *
956 * Because the align parameter takes precedence over pmap prefer,
957 * the pmap_align will need to be set to align, with pmap_offset = 0,
958 * if pmap_prefer will not align.
959 */
960 if (uoffset == UVM_UNKNOWN_OFFSET) {
961 pmap_align = MAX(align, PAGE_SIZE);
962 pmap_offset = 0;
963 } else {
964 pmap_align = MAX(PMAP_PREFER_ALIGN(), PAGE_SIZE);
965 pmap_offset = PMAP_PREFER_OFFSET(uoffset);
966
967 if (align == 0 ||
968 (align <= pmap_align && (pmap_offset & (align - 1)) == 0)) {
969 /* pmap_offset satisfies align, no change. */
970 } else {
971 /* Align takes precedence over pmap prefer. */
972 pmap_align = align;
973 pmap_offset = 0;
974 }
975 }
976
977 /* Decode parameters. */
978 prot = UVM_PROTECTION(flags);
979 maxprot = UVM_MAXPROTECTION(flags);
980 advice = UVM_ADVICE(flags);
981 inherit = UVM_INHERIT(flags);
982 error = 0;
983 hint = trunc_page(*addr);
984 TAILQ_INIT(&dead);
985 KASSERT((sz & (vaddr_t)PAGE_MASK) == 0);
986 KASSERT((align & (align - 1)) == 0);
987
988 /* Holes are incompatible with other types of mappings. */
989 if (flags & UVM_FLAG_HOLE) {
990 KASSERT(uobj == NULL && (flags & UVM_FLAG_FIXED) &&
991 (flags & (UVM_FLAG_OVERLAY | UVM_FLAG_COPYONW)) == 0);
992 }
993
994 /* Unset hint for kernel_map non-fixed allocations. */
995 if (!(map->flags & VM_MAP_ISVMSPACE) && !(flags & UVM_FLAG_FIXED))
996 hint = 0;
997
998 /* Check protection. */
999 if ((prot & maxprot) != prot)
1000 return EACCES;
1001
1002 if (map == kernel_map &&
1003 (prot & (PROT_WRITE | PROT_EXEC)) == (PROT_WRITE | PROT_EXEC))
1004 panic("uvm_map: kernel map W^X violation requested");
1005
1006 /*
1007 * Before grabbing the lock, allocate a map entry for later
1008 * use to ensure we don't wait for memory while holding the
1009 * vm_map_lock.
1010 */
1011 new = uvm_mapent_alloc(map, flags);
1012 if (new == NULL)
1013 return ENOMEM;
1014
1015 if (flags & UVM_FLAG_TRYLOCK) {
1016 if (vm_map_lock_try(map) == FALSE) {
1017 error = EFAULT;
1018 goto out;
1019 }
1020 } else {
1021 vm_map_lock(map);
1022 }
1023
1024 first = last = NULL;
1025 if (flags & UVM_FLAG_FIXED) {
1026 /*
1027 * Fixed location.
1028 *
1029 * Note: we ignore align, pmap_prefer.
1030 * Fill in first, last and *addr.
1031 */
1032 KASSERT((*addr & PAGE_MASK) == 0);
1033
1034 /*
1035 * Grow pmap to include allocated address.
1036 * If the growth fails, the allocation will fail too.
1037 */
1038 if ((map->flags & VM_MAP_ISVMSPACE) == 0 &&
1039 uvm_maxkaddr < (*addr + sz)) {
1040 uvm_map_kmem_grow(map, &dead,
1041 *addr + sz - uvm_maxkaddr, flags);
1042 }
1043
1044 /* Check that the space is available. */
1045 if (flags & UVM_FLAG_UNMAP) {
1046 if (uvm_unmap_remove(map, *addr, *addr + sz, &dead,
1047 FALSE, TRUE, TRUE) != 0) {
1048 error = EPERM; /* immutable entries found */
1049 goto unlock;
1050 }
1051 }
1052 if (!uvm_map_isavail(map, NULL, &first, &last, *addr, sz)) {
1053 error = ENOMEM;
1054 goto unlock;
1055 }
1056 } else if (*addr != 0 && (*addr & PAGE_MASK) == 0 &&
1057 (map->flags & VM_MAP_ISVMSPACE) == VM_MAP_ISVMSPACE &&
1058 (align == 0 || (*addr & (align - 1)) == 0) &&
1059 uvm_map_isavail(map, NULL, &first, &last, *addr, sz)) {
1060 /*
1061 * Address used as hint.
1062 *
1063 * Note: we enforce the alignment restriction,
1064 * but ignore pmap_prefer.
1065 */
1066 } else if ((prot & PROT_EXEC) != 0 && map->uaddr_exe != NULL) {
1067 /* Run selection algorithm for executables. */
1068 error = uvm_addr_invoke(map, map->uaddr_exe, &first, &last,
1069 addr, sz, pmap_align, pmap_offset, prot, hint);
1070
1071 /* Grow kernel memory and try again. */
1072 if (error != 0 && (map->flags & VM_MAP_ISVMSPACE) == 0) {
1073 uvm_map_kmem_grow(map, &dead, sz, flags);
1074
1075 error = uvm_addr_invoke(map, map->uaddr_exe,
1076 &first, &last, addr, sz,
1077 pmap_align, pmap_offset, prot, hint);
1078 }
1079
1080 if (error != 0)
1081 goto unlock;
1082 } else {
1083 /* Update freelists from vmspace. */
1084 if (map->flags & VM_MAP_ISVMSPACE)
1085 uvm_map_vmspace_update(map, &dead, flags);
1086
1087 error = uvm_map_findspace(map, &first, &last, addr, sz,
1088 pmap_align, pmap_offset, prot, hint);
1089
1090 /* Grow kernel memory and try again. */
1091 if (error != 0 && (map->flags & VM_MAP_ISVMSPACE) == 0) {
1092 uvm_map_kmem_grow(map, &dead, sz, flags);
1093
1094 error = uvm_map_findspace(map, &first, &last, addr, sz,
1095 pmap_align, pmap_offset, prot, hint);
1096 }
1097
1098 if (error != 0)
1099 goto unlock;
1100 }
1101
1102 /* Double-check if selected address doesn't cause overflow. */
1103 if (*addr + sz < *addr) {
1104 error = ENOMEM;
1105 goto unlock;
1106 }
1107
1108 KASSERT((map->flags & VM_MAP_ISVMSPACE) == VM_MAP_ISVMSPACE ||
1109 uvm_maxkaddr >= *addr + sz);
1110
1111 /* If we only want a query, return now. */
1112 if (flags & UVM_FLAG_QUERY) {
1113 error = 0;
1114 goto unlock;
1115 }
1116
1117 if (uobj == NULL)
1118 uoffset = 0;
1119 else if (uoffset == UVM_UNKNOWN_OFFSET) {
1120 KASSERT(UVM_OBJ_IS_KERN_OBJECT(uobj));
1121 uoffset = *addr - vm_map_min(kernel_map);
1122 }
1123
1124 /*
1125 * Create new entry.
1126 * first and last may be invalidated after this call.
1127 */
1128 entry = uvm_map_mkentry(map, first, last, *addr, sz, flags, &dead,
1129 new);
1130 if (entry == NULL) {
1131 error = ENOMEM;
1132 goto unlock;
1133 }
1134 new = NULL;
1135 KDASSERT(entry->start == *addr && entry->end == *addr + sz);
1136 entry->object.uvm_obj = uobj;
1137 entry->offset = uoffset;
1138 entry->protection = prot;
1139 entry->max_protection = maxprot;
1140 entry->inheritance = inherit;
1141 entry->wired_count = 0;
1142 entry->advice = advice;
1143 if (flags & UVM_FLAG_STACK) {
1144 entry->etype |= UVM_ET_STACK;
1145 if (flags & UVM_FLAG_UNMAP)
1146 map->sserial++;
1147 }
1148 if (uobj)
1149 entry->etype |= UVM_ET_OBJ;
1150 else if (flags & UVM_FLAG_HOLE)
1151 entry->etype |= UVM_ET_HOLE;
1152 if (flags & UVM_FLAG_NOFAULT)
1153 entry->etype |= UVM_ET_NOFAULT;
1154 if (flags & UVM_FLAG_WC)
1155 entry->etype |= UVM_ET_WC;
1156 if (flags & UVM_FLAG_COPYONW) {
1157 entry->etype |= UVM_ET_COPYONWRITE;
1158 if ((flags & UVM_FLAG_OVERLAY) == 0)
1159 entry->etype |= UVM_ET_NEEDSCOPY;
1160 }
1161 if (flags & UVM_FLAG_CONCEAL)
1162 entry->etype |= UVM_ET_CONCEAL;
1163 if (flags & UVM_FLAG_OVERLAY) {
1164 entry->aref.ar_pageoff = 0;
1165 entry->aref.ar_amap = amap_alloc(sz, M_WAITOK, 0);
1166 }
1167
1168 /* Update map and process statistics. */
1169 if (!(flags & UVM_FLAG_HOLE)) {
1170 map->size += sz;
1171 if ((map->flags & VM_MAP_ISVMSPACE) && uobj == NULL &&
1172 prot != PROT_NONE) {
1173 ((struct vmspace *)map)->vm_dused +=
1174 uvmspace_dused(map, *addr, *addr + sz);
1175 }
1176 }
1177
1178 /*
1179 * Try to merge entry.
1180 *
1181 * Userland allocations are kept separated most of the time.
1182 * Forego the effort of merging what most of the time can't be merged
1183 * and only try the merge if it concerns a kernel entry.
1184 */
1185 if ((flags & UVM_FLAG_NOMERGE) == 0 &&
1186 (map->flags & VM_MAP_ISVMSPACE) == 0)
1187 uvm_mapent_tryjoin(map, entry, &dead);
1188
1189 unlock:
1190 vm_map_unlock(map);
1191
1192 /*
1193 * Remove dead entries.
1194 *
1195 * Dead entries may be the result of merging.
1196 * uvm_map_mkentry may also create dead entries, when it attempts to
1197 * destroy free-space entries.
1198 */
1199 if (map->flags & VM_MAP_INTRSAFE)
1200 uvm_unmap_detach_intrsafe(&dead);
1201 else
1202 uvm_unmap_detach(&dead, 0);
1203 out:
1204 if (new)
1205 uvm_mapent_free(new);
1206 return error;
1207 }
1208
1209 /*
1210 * True iff e1 and e2 can be joined together.
1211 */
1212 int
uvm_mapent_isjoinable(struct vm_map * map,struct vm_map_entry * e1,struct vm_map_entry * e2)1213 uvm_mapent_isjoinable(struct vm_map *map, struct vm_map_entry *e1,
1214 struct vm_map_entry *e2)
1215 {
1216 KDASSERT(e1 != NULL && e2 != NULL);
1217
1218 /* Must be the same entry type and not have free memory between. */
1219 if (e1->etype != e2->etype || e1->end != e2->start)
1220 return 0;
1221
1222 /* Submaps are never joined. */
1223 if (UVM_ET_ISSUBMAP(e1))
1224 return 0;
1225
1226 /* Never merge wired memory. */
1227 if (VM_MAPENT_ISWIRED(e1) || VM_MAPENT_ISWIRED(e2))
1228 return 0;
1229
1230 /* Protection, inheritance and advice must be equal. */
1231 if (e1->protection != e2->protection ||
1232 e1->max_protection != e2->max_protection ||
1233 e1->inheritance != e2->inheritance ||
1234 e1->advice != e2->advice)
1235 return 0;
1236
1237 /* If uvm_object: object itself and offsets within object must match. */
1238 if (UVM_ET_ISOBJ(e1)) {
1239 if (e1->object.uvm_obj != e2->object.uvm_obj)
1240 return 0;
1241 if (e1->offset + (e1->end - e1->start) != e2->offset)
1242 return 0;
1243 }
1244
1245 /*
1246 * Cannot join shared amaps.
1247 * Note: no need to lock amap to look at refs, since we don't care
1248 * about its exact value.
1249 * If it is 1 (i.e. we have the only reference) it will stay there.
1250 */
1251 if (e1->aref.ar_amap && amap_refs(e1->aref.ar_amap) != 1)
1252 return 0;
1253 if (e2->aref.ar_amap && amap_refs(e2->aref.ar_amap) != 1)
1254 return 0;
1255
1256 /* Apparently, e1 and e2 match. */
1257 return 1;
1258 }
1259
1260 /*
1261 * Join support function.
1262 *
1263 * Returns the merged entry on success.
1264 * Returns NULL if the merge failed.
1265 */
1266 struct vm_map_entry*
uvm_mapent_merge(struct vm_map * map,struct vm_map_entry * e1,struct vm_map_entry * e2,struct uvm_map_deadq * dead)1267 uvm_mapent_merge(struct vm_map *map, struct vm_map_entry *e1,
1268 struct vm_map_entry *e2, struct uvm_map_deadq *dead)
1269 {
1270 struct uvm_addr_state *free;
1271
1272 /*
1273 * Merging is not supported for map entries that
1274 * contain an amap in e1. This should never happen
1275 * anyway, because only kernel entries are merged.
1276 * These do not contain amaps.
1277 * e2 contains no real information in its amap,
1278 * so it can be erased immediately.
1279 */
1280 KASSERT(e1->aref.ar_amap == NULL);
1281
1282 /*
1283 * Don't drop obj reference:
1284 * uvm_unmap_detach will do this for us.
1285 */
1286 free = uvm_map_uaddr_e(map, e1);
1287 uvm_mapent_free_remove(map, free, e1);
1288
1289 free = uvm_map_uaddr_e(map, e2);
1290 uvm_mapent_free_remove(map, free, e2);
1291 uvm_mapent_addr_remove(map, e2);
1292 e1->end = e2->end;
1293 e1->guard = e2->guard;
1294 e1->fspace = e2->fspace;
1295 uvm_mapent_free_insert(map, free, e1);
1296
1297 DEAD_ENTRY_PUSH(dead, e2);
1298 return e1;
1299 }
1300
1301 /*
1302 * Attempt forward and backward joining of entry.
1303 *
1304 * Returns entry after joins.
1305 * We are guaranteed that the amap of entry is either non-existent or
1306 * has never been used.
1307 */
1308 struct vm_map_entry*
uvm_mapent_tryjoin(struct vm_map * map,struct vm_map_entry * entry,struct uvm_map_deadq * dead)1309 uvm_mapent_tryjoin(struct vm_map *map, struct vm_map_entry *entry,
1310 struct uvm_map_deadq *dead)
1311 {
1312 struct vm_map_entry *other;
1313 struct vm_map_entry *merged;
1314
1315 /* Merge with previous entry. */
1316 other = RBT_PREV(uvm_map_addr, entry);
1317 if (other && uvm_mapent_isjoinable(map, other, entry)) {
1318 merged = uvm_mapent_merge(map, other, entry, dead);
1319 if (merged)
1320 entry = merged;
1321 }
1322
1323 /*
1324 * Merge with next entry.
1325 *
1326 * Because amap can only extend forward and the next entry
1327 * probably contains sensible info, only perform forward merging
1328 * in the absence of an amap.
1329 */
1330 other = RBT_NEXT(uvm_map_addr, entry);
1331 if (other && entry->aref.ar_amap == NULL &&
1332 other->aref.ar_amap == NULL &&
1333 uvm_mapent_isjoinable(map, entry, other)) {
1334 merged = uvm_mapent_merge(map, entry, other, dead);
1335 if (merged)
1336 entry = merged;
1337 }
1338
1339 return entry;
1340 }
1341
1342 /*
1343 * Kill entries that are no longer in a map.
1344 */
1345 void
uvm_unmap_detach(struct uvm_map_deadq * deadq,int flags)1346 uvm_unmap_detach(struct uvm_map_deadq *deadq, int flags)
1347 {
1348 struct vm_map_entry *entry, *tmp;
1349 int waitok = flags & UVM_PLA_WAITOK;
1350
1351 TAILQ_FOREACH_SAFE(entry, deadq, dfree.deadq, tmp) {
1352 /* Drop reference to amap, if we've got one. */
1353 if (entry->aref.ar_amap)
1354 amap_unref(entry->aref.ar_amap,
1355 entry->aref.ar_pageoff,
1356 atop(entry->end - entry->start),
1357 flags & AMAP_REFALL);
1358
1359 /* Skip entries for which we have to grab the kernel lock. */
1360 if (UVM_ET_ISSUBMAP(entry) || UVM_ET_ISOBJ(entry))
1361 continue;
1362
1363 TAILQ_REMOVE(deadq, entry, dfree.deadq);
1364 uvm_mapent_free(entry);
1365 }
1366
1367 if (TAILQ_EMPTY(deadq))
1368 return;
1369
1370 KERNEL_LOCK();
1371 while ((entry = TAILQ_FIRST(deadq)) != NULL) {
1372 if (waitok)
1373 uvm_pause();
1374 /* Drop reference to our backing object, if we've got one. */
1375 if (UVM_ET_ISSUBMAP(entry)) {
1376 /* ... unlikely to happen, but play it safe */
1377 uvm_map_deallocate(entry->object.sub_map);
1378 } else if (UVM_ET_ISOBJ(entry) &&
1379 entry->object.uvm_obj->pgops->pgo_detach) {
1380 entry->object.uvm_obj->pgops->pgo_detach(
1381 entry->object.uvm_obj);
1382 }
1383
1384 /* Step to next. */
1385 TAILQ_REMOVE(deadq, entry, dfree.deadq);
1386 uvm_mapent_free(entry);
1387 }
1388 KERNEL_UNLOCK();
1389 }
1390
1391 void
uvm_unmap_detach_intrsafe(struct uvm_map_deadq * deadq)1392 uvm_unmap_detach_intrsafe(struct uvm_map_deadq *deadq)
1393 {
1394 struct vm_map_entry *entry;
1395
1396 while ((entry = TAILQ_FIRST(deadq)) != NULL) {
1397 KASSERT(entry->aref.ar_amap == NULL);
1398 KASSERT(!UVM_ET_ISSUBMAP(entry));
1399 KASSERT(!UVM_ET_ISOBJ(entry));
1400 TAILQ_REMOVE(deadq, entry, dfree.deadq);
1401 uvm_mapent_free(entry);
1402 }
1403 }
1404
1405 /*
1406 * Create and insert new entry.
1407 *
1408 * Returned entry contains new addresses and is inserted properly in the tree.
1409 * first and last are (probably) no longer valid.
1410 */
1411 struct vm_map_entry*
uvm_map_mkentry(struct vm_map * map,struct vm_map_entry * first,struct vm_map_entry * last,vaddr_t addr,vsize_t sz,int flags,struct uvm_map_deadq * dead,struct vm_map_entry * new)1412 uvm_map_mkentry(struct vm_map *map, struct vm_map_entry *first,
1413 struct vm_map_entry *last, vaddr_t addr, vsize_t sz, int flags,
1414 struct uvm_map_deadq *dead, struct vm_map_entry *new)
1415 {
1416 struct vm_map_entry *entry, *prev;
1417 struct uvm_addr_state *free;
1418 vaddr_t min, max; /* free space boundaries for new entry */
1419
1420 KDASSERT(map != NULL);
1421 KDASSERT(first != NULL);
1422 KDASSERT(last != NULL);
1423 KDASSERT(dead != NULL);
1424 KDASSERT(sz > 0);
1425 KDASSERT(addr + sz > addr);
1426 KDASSERT(first->end <= addr && VMMAP_FREE_END(first) > addr);
1427 KDASSERT(last->start < addr + sz && VMMAP_FREE_END(last) >= addr + sz);
1428 KDASSERT(uvm_map_isavail(map, NULL, &first, &last, addr, sz));
1429 uvm_tree_sanity(map, __FILE__, __LINE__);
1430
1431 min = addr + sz;
1432 max = VMMAP_FREE_END(last);
1433
1434 /* Initialize new entry. */
1435 if (new == NULL)
1436 entry = uvm_mapent_alloc(map, flags);
1437 else
1438 entry = new;
1439 if (entry == NULL)
1440 return NULL;
1441 entry->offset = 0;
1442 entry->etype = 0;
1443 entry->wired_count = 0;
1444 entry->aref.ar_pageoff = 0;
1445 entry->aref.ar_amap = NULL;
1446
1447 entry->start = addr;
1448 entry->end = min;
1449 entry->guard = 0;
1450 entry->fspace = 0;
1451
1452 vm_map_assert_wrlock(map);
1453
1454 /* Reset free space in first. */
1455 free = uvm_map_uaddr_e(map, first);
1456 uvm_mapent_free_remove(map, free, first);
1457 first->guard = 0;
1458 first->fspace = 0;
1459
1460 /*
1461 * Remove all entries that are fully replaced.
1462 * We are iterating using last in reverse order.
1463 */
1464 for (; first != last; last = prev) {
1465 prev = RBT_PREV(uvm_map_addr, last);
1466
1467 KDASSERT(last->start == last->end);
1468 free = uvm_map_uaddr_e(map, last);
1469 uvm_mapent_free_remove(map, free, last);
1470 uvm_mapent_addr_remove(map, last);
1471 DEAD_ENTRY_PUSH(dead, last);
1472 }
1473 /* Remove first if it is entirely inside <addr, addr+sz>. */
1474 if (first->start == addr) {
1475 uvm_mapent_addr_remove(map, first);
1476 DEAD_ENTRY_PUSH(dead, first);
1477 } else {
1478 uvm_map_fix_space(map, first, VMMAP_FREE_START(first),
1479 addr, flags);
1480 }
1481
1482 /* Finally, link in entry. */
1483 uvm_mapent_addr_insert(map, entry);
1484 uvm_map_fix_space(map, entry, min, max, flags);
1485
1486 uvm_tree_sanity(map, __FILE__, __LINE__);
1487 return entry;
1488 }
1489
1490
1491 /*
1492 * uvm_mapent_alloc: allocate a map entry
1493 */
1494 struct vm_map_entry *
uvm_mapent_alloc(struct vm_map * map,int flags)1495 uvm_mapent_alloc(struct vm_map *map, int flags)
1496 {
1497 struct vm_map_entry *me, *ne;
1498 int pool_flags;
1499 int i;
1500
1501 pool_flags = PR_WAITOK;
1502 if (flags & UVM_FLAG_TRYLOCK)
1503 pool_flags = PR_NOWAIT;
1504
1505 if (map->flags & VM_MAP_INTRSAFE || cold) {
1506 mtx_enter(&uvm_kmapent_mtx);
1507 if (SLIST_EMPTY(&uvm.kentry_free)) {
1508 ne = km_alloc(PAGE_SIZE, &kv_page, &kp_dirty,
1509 &kd_nowait);
1510 if (ne == NULL)
1511 panic("uvm_mapent_alloc: cannot allocate map "
1512 "entry");
1513 for (i = 0; i < PAGE_SIZE / sizeof(*ne); i++) {
1514 SLIST_INSERT_HEAD(&uvm.kentry_free,
1515 &ne[i], daddrs.addr_kentry);
1516 }
1517 if (ratecheck(&uvm_kmapent_last_warn_time,
1518 &uvm_kmapent_warn_rate))
1519 printf("uvm_mapent_alloc: out of static "
1520 "map entries\n");
1521 }
1522 me = SLIST_FIRST(&uvm.kentry_free);
1523 SLIST_REMOVE_HEAD(&uvm.kentry_free, daddrs.addr_kentry);
1524 uvmexp.kmapent++;
1525 mtx_leave(&uvm_kmapent_mtx);
1526 me->flags = UVM_MAP_STATIC;
1527 } else if (map == kernel_map) {
1528 splassert(IPL_NONE);
1529 me = pool_get(&uvm_map_entry_kmem_pool, pool_flags);
1530 if (me == NULL)
1531 goto out;
1532 me->flags = UVM_MAP_KMEM;
1533 } else {
1534 splassert(IPL_NONE);
1535 me = pool_get(&uvm_map_entry_pool, pool_flags);
1536 if (me == NULL)
1537 goto out;
1538 me->flags = 0;
1539 }
1540
1541 RBT_POISON(uvm_map_addr, me, UVMMAP_DEADBEEF);
1542 out:
1543 return me;
1544 }
1545
1546 /*
1547 * uvm_mapent_free: free map entry
1548 *
1549 * => XXX: static pool for kernel map?
1550 */
1551 void
uvm_mapent_free(struct vm_map_entry * me)1552 uvm_mapent_free(struct vm_map_entry *me)
1553 {
1554 if (me->flags & UVM_MAP_STATIC) {
1555 mtx_enter(&uvm_kmapent_mtx);
1556 SLIST_INSERT_HEAD(&uvm.kentry_free, me, daddrs.addr_kentry);
1557 uvmexp.kmapent--;
1558 mtx_leave(&uvm_kmapent_mtx);
1559 } else if (me->flags & UVM_MAP_KMEM) {
1560 splassert(IPL_NONE);
1561 pool_put(&uvm_map_entry_kmem_pool, me);
1562 } else {
1563 splassert(IPL_NONE);
1564 pool_put(&uvm_map_entry_pool, me);
1565 }
1566 }
1567
1568 /*
1569 * uvm_map_lookup_entry: find map entry at or before an address.
1570 *
1571 * => map must at least be read-locked by caller
1572 * => entry is returned in "entry"
1573 * => return value is true if address is in the returned entry
1574 * ET_HOLE entries are considered to not contain a mapping, ergo FALSE is
1575 * returned for those mappings.
1576 */
1577 boolean_t
uvm_map_lookup_entry(struct vm_map * map,vaddr_t address,struct vm_map_entry ** entry)1578 uvm_map_lookup_entry(struct vm_map *map, vaddr_t address,
1579 struct vm_map_entry **entry)
1580 {
1581 vm_map_assert_anylock(map);
1582
1583 *entry = uvm_map_entrybyaddr(&map->addr, address);
1584 return *entry != NULL && !UVM_ET_ISHOLE(*entry) &&
1585 (*entry)->start <= address && (*entry)->end > address;
1586 }
1587
1588 /*
1589 * Stack must be in a MAP_STACK entry. PROT_NONE indicates stack not yet
1590 * grown -- then uvm_map_check_region_range() should not cache the entry
1591 * because growth won't be seen.
1592 */
1593 int
uvm_map_inentry_sp(vm_map_entry_t entry)1594 uvm_map_inentry_sp(vm_map_entry_t entry)
1595 {
1596 if ((entry->etype & UVM_ET_STACK) == 0) {
1597 if (entry->protection == PROT_NONE)
1598 return (-1); /* don't update range */
1599 return (0);
1600 }
1601 return (1);
1602 }
1603
1604 int
uvm_map_inentry_recheck(u_long serial,vaddr_t addr,struct p_inentry * ie)1605 uvm_map_inentry_recheck(u_long serial, vaddr_t addr, struct p_inentry *ie)
1606 {
1607 return (serial != ie->ie_serial || ie->ie_start == 0 ||
1608 addr < ie->ie_start || addr >= ie->ie_end);
1609 }
1610
1611 /*
1612 * Inside a vm_map find the reg address and verify it via function.
1613 * Remember low and high addresses of region if valid and return TRUE,
1614 * else return FALSE.
1615 */
1616 boolean_t
uvm_map_inentry_fix(struct proc * p,struct p_inentry * ie,vaddr_t addr,int (* fn)(vm_map_entry_t),u_long serial)1617 uvm_map_inentry_fix(struct proc *p, struct p_inentry *ie, vaddr_t addr,
1618 int (*fn)(vm_map_entry_t), u_long serial)
1619 {
1620 vm_map_t map = &p->p_vmspace->vm_map;
1621 vm_map_entry_t entry;
1622 int ret;
1623
1624 if (addr < map->min_offset || addr >= map->max_offset)
1625 return (FALSE);
1626
1627 /* lock map */
1628 vm_map_lock_read(map);
1629
1630 /* lookup */
1631 if (!uvm_map_lookup_entry(map, trunc_page(addr), &entry)) {
1632 vm_map_unlock_read(map);
1633 return (FALSE);
1634 }
1635
1636 ret = (*fn)(entry);
1637 if (ret == 0) {
1638 vm_map_unlock_read(map);
1639 return (FALSE);
1640 } else if (ret == 1) {
1641 ie->ie_start = entry->start;
1642 ie->ie_end = entry->end;
1643 ie->ie_serial = serial;
1644 } else {
1645 /* do not update, re-check later */
1646 }
1647 vm_map_unlock_read(map);
1648 return (TRUE);
1649 }
1650
1651 boolean_t
uvm_map_inentry(struct proc * p,struct p_inentry * ie,vaddr_t addr,const char * fmt,int (* fn)(vm_map_entry_t),u_long serial)1652 uvm_map_inentry(struct proc *p, struct p_inentry *ie, vaddr_t addr,
1653 const char *fmt, int (*fn)(vm_map_entry_t), u_long serial)
1654 {
1655 union sigval sv;
1656 boolean_t ok = TRUE;
1657
1658 if (uvm_map_inentry_recheck(serial, addr, ie)) {
1659 ok = uvm_map_inentry_fix(p, ie, addr, fn, serial);
1660 if (!ok) {
1661 KERNEL_LOCK();
1662 uprintf(fmt, p->p_p->ps_comm, p->p_p->ps_pid, p->p_tid,
1663 addr, ie->ie_start, ie->ie_end-1);
1664 p->p_p->ps_acflag |= AMAP;
1665 sv.sival_ptr = (void *)PROC_PC(p);
1666 trapsignal(p, SIGSEGV, 0, SEGV_ACCERR, sv);
1667 KERNEL_UNLOCK();
1668 }
1669 }
1670 return (ok);
1671 }
1672
1673 /*
1674 * Check whether the given address range can be converted to a MAP_STACK
1675 * mapping.
1676 *
1677 * Must be called with map locked.
1678 */
1679 boolean_t
uvm_map_is_stack_remappable(struct vm_map * map,vaddr_t addr,vaddr_t sz,int sigaltstack_check)1680 uvm_map_is_stack_remappable(struct vm_map *map, vaddr_t addr, vaddr_t sz,
1681 int sigaltstack_check)
1682 {
1683 vaddr_t end = addr + sz;
1684 struct vm_map_entry *first, *iter, *prev = NULL;
1685
1686 vm_map_assert_anylock(map);
1687
1688 if (!uvm_map_lookup_entry(map, addr, &first))
1689 return FALSE;
1690
1691 /*
1692 * Check that the address range exists and is contiguous.
1693 */
1694 for (iter = first; iter != NULL && iter->start < end;
1695 prev = iter, iter = RBT_NEXT(uvm_map_addr, iter)) {
1696 /*
1697 * Make sure that we do not have holes in the range.
1698 */
1699 #if 0
1700 if (prev != NULL) {
1701 printf("prev->start 0x%lx, prev->end 0x%lx, "
1702 "iter->start 0x%lx, iter->end 0x%lx\n",
1703 prev->start, prev->end, iter->start, iter->end);
1704 }
1705 #endif
1706
1707 if (prev != NULL && prev->end != iter->start)
1708 return FALSE;
1709 if (iter->start == iter->end || UVM_ET_ISHOLE(iter))
1710 return FALSE;
1711 if (sigaltstack_check) {
1712 if (iter->protection != (PROT_READ | PROT_WRITE))
1713 return FALSE;
1714 }
1715 }
1716
1717 return TRUE;
1718 }
1719
1720 /*
1721 * Remap the middle-pages of an existing mapping as a stack range.
1722 * If there exists a previous contiguous mapping with the given range
1723 * [addr, addr + sz), with protection PROT_READ|PROT_WRITE, then the
1724 * mapping is dropped, and a new anon mapping is created and marked as
1725 * a stack.
1726 *
1727 * Must be called with map unlocked.
1728 */
1729 int
uvm_map_remap_as_stack(struct proc * p,vaddr_t addr,vaddr_t sz)1730 uvm_map_remap_as_stack(struct proc *p, vaddr_t addr, vaddr_t sz)
1731 {
1732 vm_map_t map = &p->p_vmspace->vm_map;
1733 vaddr_t start, end;
1734 int flags = UVM_MAPFLAG(PROT_READ | PROT_WRITE,
1735 PROT_READ | PROT_WRITE | PROT_EXEC,
1736 MAP_INHERIT_COPY, MADV_NORMAL,
1737 UVM_FLAG_STACK | UVM_FLAG_FIXED | UVM_FLAG_UNMAP |
1738 UVM_FLAG_COPYONW | UVM_FLAG_SIGALTSTACK);
1739
1740 start = round_page(addr);
1741 end = trunc_page(addr + sz);
1742 #ifdef MACHINE_STACK_GROWS_UP
1743 if (end == addr + sz)
1744 end -= PAGE_SIZE;
1745 #else
1746 if (start == addr)
1747 start += PAGE_SIZE;
1748 #endif
1749
1750 if (start < map->min_offset || end >= map->max_offset || end < start)
1751 return EINVAL;
1752
1753 /*
1754 * UVM_FLAG_SIGALTSTACK indicates that immutable may be bypassed,
1755 * but the range is checked that it is contiguous, is not a syscall
1756 * mapping, and protection RW. Then, a new mapping (all zero) is
1757 * placed upon the region, which prevents an attacker from pivoting
1758 * into pre-placed MAP_STACK space.
1759 */
1760 return uvm_mapanon(map, &start, end - start, 0, flags);
1761 }
1762
1763 /*
1764 * uvm_map_pie: return a random load address for a PIE executable
1765 * properly aligned.
1766 */
1767 #ifndef VM_PIE_MAX_ADDR
1768 #define VM_PIE_MAX_ADDR (VM_MAXUSER_ADDRESS / 4)
1769 #endif
1770
1771 #ifndef VM_PIE_MIN_ADDR
1772 #define VM_PIE_MIN_ADDR VM_MIN_ADDRESS
1773 #endif
1774
1775 #ifndef VM_PIE_MIN_ALIGN
1776 #define VM_PIE_MIN_ALIGN PAGE_SIZE
1777 #endif
1778
1779 vaddr_t
uvm_map_pie(vaddr_t align)1780 uvm_map_pie(vaddr_t align)
1781 {
1782 vaddr_t addr, space, min;
1783
1784 align = MAX(align, VM_PIE_MIN_ALIGN);
1785
1786 /* round up to next alignment */
1787 min = (VM_PIE_MIN_ADDR + align - 1) & ~(align - 1);
1788
1789 if (align >= VM_PIE_MAX_ADDR || min >= VM_PIE_MAX_ADDR)
1790 return (align);
1791
1792 space = (VM_PIE_MAX_ADDR - min) / align;
1793 space = MIN(space, (u_int32_t)-1);
1794
1795 addr = (vaddr_t)arc4random_uniform((u_int32_t)space) * align;
1796 addr += min;
1797
1798 return (addr);
1799 }
1800
1801 void
uvm_unmap(struct vm_map * map,vaddr_t start,vaddr_t end)1802 uvm_unmap(struct vm_map *map, vaddr_t start, vaddr_t end)
1803 {
1804 struct uvm_map_deadq dead;
1805
1806 KASSERT((start & (vaddr_t)PAGE_MASK) == 0 &&
1807 (end & (vaddr_t)PAGE_MASK) == 0);
1808 TAILQ_INIT(&dead);
1809 vm_map_lock(map);
1810 uvm_unmap_remove(map, start, end, &dead, FALSE, TRUE, FALSE);
1811 vm_map_unlock(map);
1812
1813 if (map->flags & VM_MAP_INTRSAFE)
1814 uvm_unmap_detach_intrsafe(&dead);
1815 else
1816 uvm_unmap_detach(&dead, 0);
1817 }
1818
1819 /*
1820 * Mark entry as free.
1821 *
1822 * entry will be put on the dead list.
1823 * The free space will be merged into the previous or a new entry,
1824 * unless markfree is false.
1825 */
1826 void
uvm_mapent_mkfree(struct vm_map * map,struct vm_map_entry * entry,struct vm_map_entry ** prev_ptr,struct uvm_map_deadq * dead,boolean_t markfree)1827 uvm_mapent_mkfree(struct vm_map *map, struct vm_map_entry *entry,
1828 struct vm_map_entry **prev_ptr, struct uvm_map_deadq *dead,
1829 boolean_t markfree)
1830 {
1831 struct uvm_addr_state *free;
1832 struct vm_map_entry *prev;
1833 vaddr_t addr; /* Start of freed range. */
1834 vaddr_t end; /* End of freed range. */
1835
1836 UVM_MAP_REQ_WRITE(map);
1837
1838 prev = *prev_ptr;
1839 if (prev == entry)
1840 *prev_ptr = prev = NULL;
1841
1842 if (prev == NULL ||
1843 VMMAP_FREE_END(prev) != entry->start)
1844 prev = RBT_PREV(uvm_map_addr, entry);
1845
1846 /* Entry is describing only free memory and has nothing to drain into. */
1847 if (prev == NULL && entry->start == entry->end && markfree) {
1848 *prev_ptr = entry;
1849 return;
1850 }
1851
1852 addr = entry->start;
1853 end = VMMAP_FREE_END(entry);
1854 free = uvm_map_uaddr_e(map, entry);
1855 uvm_mapent_free_remove(map, free, entry);
1856 uvm_mapent_addr_remove(map, entry);
1857 DEAD_ENTRY_PUSH(dead, entry);
1858
1859 if (markfree) {
1860 if (prev) {
1861 free = uvm_map_uaddr_e(map, prev);
1862 uvm_mapent_free_remove(map, free, prev);
1863 }
1864 *prev_ptr = uvm_map_fix_space(map, prev, addr, end, 0);
1865 }
1866 }
1867
1868 /*
1869 * Unwire and release referenced amap and object from map entry.
1870 */
1871 void
uvm_unmap_kill_entry_withlock(struct vm_map * map,struct vm_map_entry * entry,int needlock)1872 uvm_unmap_kill_entry_withlock(struct vm_map *map, struct vm_map_entry *entry,
1873 int needlock)
1874 {
1875 /* Unwire removed map entry. */
1876 if (VM_MAPENT_ISWIRED(entry)) {
1877 KERNEL_LOCK();
1878 entry->wired_count = 0;
1879 uvm_fault_unwire_locked(map, entry->start, entry->end);
1880 KERNEL_UNLOCK();
1881 }
1882
1883 if (needlock)
1884 uvm_map_lock_entry(entry);
1885
1886 /* Entry-type specific code. */
1887 if (UVM_ET_ISHOLE(entry)) {
1888 /* Nothing to be done for holes. */
1889 } else if (map->flags & VM_MAP_INTRSAFE) {
1890 KASSERT(vm_map_pmap(map) == pmap_kernel());
1891
1892 uvm_km_pgremove_intrsafe(entry->start, entry->end);
1893 } else if (UVM_ET_ISOBJ(entry) &&
1894 UVM_OBJ_IS_KERN_OBJECT(entry->object.uvm_obj)) {
1895 KASSERT(vm_map_pmap(map) == pmap_kernel());
1896 /*
1897 * Note: kernel object mappings are currently used in
1898 * two ways:
1899 * [1] "normal" mappings of pages in the kernel object
1900 * [2] uvm_km_valloc'd allocations in which we
1901 * pmap_enter in some non-kernel-object page
1902 * (e.g. vmapbuf).
1903 *
1904 * for case [1], we need to remove the mapping from
1905 * the pmap and then remove the page from the kernel
1906 * object (because, once pages in a kernel object are
1907 * unmapped they are no longer needed, unlike, say,
1908 * a vnode where you might want the data to persist
1909 * until flushed out of a queue).
1910 *
1911 * for case [2], we need to remove the mapping from
1912 * the pmap. there shouldn't be any pages at the
1913 * specified offset in the kernel object [but it
1914 * doesn't hurt to call uvm_km_pgremove just to be
1915 * safe?]
1916 *
1917 * uvm_km_pgremove currently does the following:
1918 * for pages in the kernel object range:
1919 * - drops the swap slot
1920 * - uvm_pagefree the page
1921 *
1922 * note there is version of uvm_km_pgremove() that
1923 * is used for "intrsafe" objects.
1924 */
1925 /*
1926 * remove mappings from pmap and drop the pages
1927 * from the object. offsets are always relative
1928 * to vm_map_min(kernel_map).
1929 */
1930 uvm_km_pgremove(entry->object.uvm_obj, entry->start,
1931 entry->end);
1932 } else {
1933 /* remove mappings the standard way. */
1934 pmap_remove(map->pmap, entry->start, entry->end);
1935 }
1936
1937 if (needlock)
1938 uvm_map_unlock_entry(entry);
1939 }
1940
1941 void
uvm_unmap_kill_entry(struct vm_map * map,struct vm_map_entry * entry)1942 uvm_unmap_kill_entry(struct vm_map *map, struct vm_map_entry *entry)
1943 {
1944 uvm_unmap_kill_entry_withlock(map, entry, 0);
1945 }
1946
1947 /*
1948 * Remove all entries from start to end.
1949 *
1950 * If remove_holes, then remove ET_HOLE entries as well.
1951 * If markfree, entry will be properly marked free, otherwise, no replacement
1952 * entry will be put in the tree (corrupting the tree).
1953 */
1954 int
uvm_unmap_remove(struct vm_map * map,vaddr_t start,vaddr_t end,struct uvm_map_deadq * dead,boolean_t remove_holes,boolean_t markfree,boolean_t checkimmutable)1955 uvm_unmap_remove(struct vm_map *map, vaddr_t start, vaddr_t end,
1956 struct uvm_map_deadq *dead, boolean_t remove_holes,
1957 boolean_t markfree, boolean_t checkimmutable)
1958 {
1959 struct vm_map_entry *prev_hint, *next, *entry;
1960
1961 start = MAX(start, map->min_offset);
1962 end = MIN(end, map->max_offset);
1963 if (start >= end)
1964 return 0;
1965
1966 vm_map_assert_wrlock(map);
1967
1968 /* Find first affected entry. */
1969 entry = uvm_map_entrybyaddr(&map->addr, start);
1970 KDASSERT(entry != NULL && entry->start <= start);
1971
1972 if (checkimmutable) {
1973 struct vm_map_entry *entry1 = entry;
1974
1975 /* Refuse to unmap if any entries are immutable */
1976 if (entry1->end <= start)
1977 entry1 = RBT_NEXT(uvm_map_addr, entry1);
1978 for (; entry1 != NULL && entry1->start < end; entry1 = next) {
1979 KDASSERT(entry1->start >= start);
1980 next = RBT_NEXT(uvm_map_addr, entry1);
1981 /* Treat memory holes as free space. */
1982 if (entry1->start == entry1->end || UVM_ET_ISHOLE(entry1))
1983 continue;
1984 if (entry1->etype & UVM_ET_IMMUTABLE)
1985 return EPERM;
1986 }
1987 }
1988
1989 if (entry->end <= start && markfree)
1990 entry = RBT_NEXT(uvm_map_addr, entry);
1991 else
1992 UVM_MAP_CLIP_START(map, entry, start);
1993
1994 /*
1995 * Iterate entries until we reach end address.
1996 * prev_hint hints where the freed space can be appended to.
1997 */
1998 prev_hint = NULL;
1999 for (; entry != NULL && entry->start < end; entry = next) {
2000 KDASSERT(entry->start >= start);
2001 if (entry->end > end || !markfree)
2002 UVM_MAP_CLIP_END(map, entry, end);
2003 KDASSERT(entry->start >= start && entry->end <= end);
2004 next = RBT_NEXT(uvm_map_addr, entry);
2005
2006 /* Don't remove holes unless asked to do so. */
2007 if (UVM_ET_ISHOLE(entry)) {
2008 if (!remove_holes) {
2009 prev_hint = entry;
2010 continue;
2011 }
2012 }
2013
2014 /* A stack has been removed.. */
2015 if (UVM_ET_ISSTACK(entry) && (map->flags & VM_MAP_ISVMSPACE))
2016 map->sserial++;
2017
2018 /* Kill entry. */
2019 uvm_unmap_kill_entry_withlock(map, entry, 1);
2020
2021 /* Update space usage. */
2022 if ((map->flags & VM_MAP_ISVMSPACE) &&
2023 entry->object.uvm_obj == NULL &&
2024 entry->protection != PROT_NONE &&
2025 !UVM_ET_ISHOLE(entry)) {
2026 ((struct vmspace *)map)->vm_dused -=
2027 uvmspace_dused(map, entry->start, entry->end);
2028 }
2029 if (!UVM_ET_ISHOLE(entry))
2030 map->size -= entry->end - entry->start;
2031
2032 /* Actual removal of entry. */
2033 uvm_mapent_mkfree(map, entry, &prev_hint, dead, markfree);
2034 }
2035
2036 pmap_update(vm_map_pmap(map));
2037
2038 #ifdef VMMAP_DEBUG
2039 if (markfree) {
2040 for (entry = uvm_map_entrybyaddr(&map->addr, start);
2041 entry != NULL && entry->start < end;
2042 entry = RBT_NEXT(uvm_map_addr, entry)) {
2043 KDASSERT(entry->end <= start ||
2044 entry->start == entry->end ||
2045 UVM_ET_ISHOLE(entry));
2046 }
2047 } else {
2048 vaddr_t a;
2049 for (a = start; a < end; a += PAGE_SIZE)
2050 KDASSERT(uvm_map_entrybyaddr(&map->addr, a) == NULL);
2051 }
2052 #endif
2053 return 0;
2054 }
2055
2056 /*
2057 * Mark all entries from first until end (exclusive) as pageable.
2058 *
2059 * Lock must be exclusive on entry and will not be touched.
2060 */
2061 void
uvm_map_pageable_pgon(struct vm_map * map,struct vm_map_entry * first,struct vm_map_entry * end,vaddr_t start_addr,vaddr_t end_addr)2062 uvm_map_pageable_pgon(struct vm_map *map, struct vm_map_entry *first,
2063 struct vm_map_entry *end, vaddr_t start_addr, vaddr_t end_addr)
2064 {
2065 struct vm_map_entry *iter;
2066
2067 for (iter = first; iter != end;
2068 iter = RBT_NEXT(uvm_map_addr, iter)) {
2069 KDASSERT(iter->start >= start_addr && iter->end <= end_addr);
2070 if (!VM_MAPENT_ISWIRED(iter) || UVM_ET_ISHOLE(iter))
2071 continue;
2072
2073 iter->wired_count = 0;
2074 uvm_fault_unwire_locked(map, iter->start, iter->end);
2075 }
2076 }
2077
2078 /*
2079 * Mark all entries from first until end (exclusive) as wired.
2080 *
2081 * Lockflags determines the lock state on return from this function.
2082 * Lock must be exclusive on entry.
2083 */
2084 int
uvm_map_pageable_wire(struct vm_map * map,struct vm_map_entry * first,struct vm_map_entry * end,vaddr_t start_addr,vaddr_t end_addr,int lockflags)2085 uvm_map_pageable_wire(struct vm_map *map, struct vm_map_entry *first,
2086 struct vm_map_entry *end, vaddr_t start_addr, vaddr_t end_addr,
2087 int lockflags)
2088 {
2089 struct vm_map_entry *iter;
2090 #ifdef DIAGNOSTIC
2091 unsigned int timestamp_save;
2092 #endif
2093 int error;
2094
2095 /*
2096 * Wire pages in two passes:
2097 *
2098 * 1: holding the write lock, we create any anonymous maps that need
2099 * to be created. then we clip each map entry to the region to
2100 * be wired and increment its wiring count.
2101 *
2102 * 2: we mark the map busy, unlock it and call uvm_fault_wire to fault
2103 * in the pages for any newly wired area (wired_count == 1).
2104 */
2105 for (iter = first; iter != end;
2106 iter = RBT_NEXT(uvm_map_addr, iter)) {
2107 KDASSERT(iter->start >= start_addr && iter->end <= end_addr);
2108 if (UVM_ET_ISHOLE(iter) || iter->start == iter->end ||
2109 iter->protection == PROT_NONE)
2110 continue;
2111
2112 /*
2113 * Perform actions of vm_map_lookup that need the write lock.
2114 * - create an anonymous map for copy-on-write
2115 * - anonymous map for zero-fill
2116 * Skip submaps.
2117 */
2118 if (!VM_MAPENT_ISWIRED(iter) && !UVM_ET_ISSUBMAP(iter) &&
2119 UVM_ET_ISNEEDSCOPY(iter) &&
2120 ((iter->protection & PROT_WRITE) ||
2121 iter->object.uvm_obj == NULL)) {
2122 amap_copy(map, iter, M_WAITOK,
2123 UVM_ET_ISSTACK(iter) ? FALSE : TRUE,
2124 iter->start, iter->end);
2125 }
2126 iter->wired_count++;
2127 }
2128
2129 /*
2130 * Pass 2.
2131 */
2132 #ifdef DIAGNOSTIC
2133 timestamp_save = map->timestamp;
2134 #endif
2135 vm_map_busy(map);
2136 vm_map_unlock(map);
2137
2138 error = 0;
2139 for (iter = first; error == 0 && iter != end;
2140 iter = RBT_NEXT(uvm_map_addr, iter)) {
2141 if (UVM_ET_ISHOLE(iter) || iter->start == iter->end ||
2142 iter->protection == PROT_NONE)
2143 continue;
2144
2145 error = uvm_fault_wire(map, iter->start, iter->end,
2146 iter->protection);
2147 }
2148
2149 vm_map_lock(map);
2150 vm_map_unbusy(map);
2151
2152 if (error) {
2153 #ifdef DIAGNOSTIC
2154 if (timestamp_save != map->timestamp)
2155 panic("uvm_map_pageable_wire: stale map");
2156 #endif
2157
2158 /*
2159 * first is no longer needed to restart loops.
2160 * Use it as iterator to unmap successful mappings.
2161 */
2162 for (; first != iter;
2163 first = RBT_NEXT(uvm_map_addr, first)) {
2164 if (UVM_ET_ISHOLE(first) ||
2165 first->start == first->end ||
2166 first->protection == PROT_NONE)
2167 continue;
2168
2169 first->wired_count--;
2170 if (!VM_MAPENT_ISWIRED(first)) {
2171 uvm_fault_unwire_locked(map,
2172 first->start, first->end);
2173 }
2174 }
2175
2176 /* decrease counter in the rest of the entries */
2177 for (; iter != end;
2178 iter = RBT_NEXT(uvm_map_addr, iter)) {
2179 if (UVM_ET_ISHOLE(iter) || iter->start == iter->end ||
2180 iter->protection == PROT_NONE)
2181 continue;
2182
2183 iter->wired_count--;
2184 }
2185
2186 if ((lockflags & UVM_LK_EXIT) == 0)
2187 vm_map_unlock(map);
2188 return error;
2189 }
2190
2191
2192 if ((lockflags & UVM_LK_EXIT) == 0) {
2193 vm_map_unlock(map);
2194 } else {
2195 #ifdef DIAGNOSTIC
2196 if (timestamp_save != map->timestamp)
2197 panic("uvm_map_pageable_wire: stale map");
2198 #endif
2199 }
2200 return 0;
2201 }
2202
2203 /*
2204 * uvm_map_pageable: set pageability of a range in a map.
2205 *
2206 * Flags:
2207 * UVM_LK_ENTER: map is already locked by caller
2208 * UVM_LK_EXIT: don't unlock map on exit
2209 *
2210 * The full range must be in use (entries may not have fspace != 0).
2211 * UVM_ET_HOLE counts as unmapped.
2212 */
2213 int
uvm_map_pageable(struct vm_map * map,vaddr_t start,vaddr_t end,boolean_t new_pageable,int lockflags)2214 uvm_map_pageable(struct vm_map *map, vaddr_t start, vaddr_t end,
2215 boolean_t new_pageable, int lockflags)
2216 {
2217 struct vm_map_entry *first, *last, *tmp;
2218 int error;
2219
2220 start = trunc_page(start);
2221 end = round_page(end);
2222
2223 if (start > end)
2224 return EINVAL;
2225 if (start == end)
2226 return 0; /* nothing to do */
2227 if (start < map->min_offset)
2228 return EFAULT; /* why? see first XXX below */
2229 if (end > map->max_offset)
2230 return EINVAL; /* why? see second XXX below */
2231
2232 KASSERT(map->flags & VM_MAP_PAGEABLE);
2233 if ((lockflags & UVM_LK_ENTER) == 0)
2234 vm_map_lock(map);
2235
2236 /*
2237 * Find first entry.
2238 *
2239 * Initial test on start is different, because of the different
2240 * error returned. Rest is tested further down.
2241 */
2242 first = uvm_map_entrybyaddr(&map->addr, start);
2243 if (first->end <= start || UVM_ET_ISHOLE(first)) {
2244 /*
2245 * XXX if the first address is not mapped, it is EFAULT?
2246 */
2247 error = EFAULT;
2248 goto out;
2249 }
2250
2251 /* Check that the range has no holes. */
2252 for (last = first; last != NULL && last->start < end;
2253 last = RBT_NEXT(uvm_map_addr, last)) {
2254 if (UVM_ET_ISHOLE(last) ||
2255 (last->end < end && VMMAP_FREE_END(last) != last->end)) {
2256 /*
2257 * XXX unmapped memory in range, why is it EINVAL
2258 * instead of EFAULT?
2259 */
2260 error = EINVAL;
2261 goto out;
2262 }
2263 }
2264
2265 /*
2266 * Last ended at the first entry after the range.
2267 * Move back one step.
2268 *
2269 * Note that last may be NULL.
2270 */
2271 if (last == NULL) {
2272 last = RBT_MAX(uvm_map_addr, &map->addr);
2273 if (last->end < end) {
2274 error = EINVAL;
2275 goto out;
2276 }
2277 } else {
2278 KASSERT(last != first);
2279 last = RBT_PREV(uvm_map_addr, last);
2280 }
2281
2282 /* Wire/unwire pages here. */
2283 if (new_pageable) {
2284 /*
2285 * Mark pageable.
2286 * entries that are not wired are untouched.
2287 */
2288 if (VM_MAPENT_ISWIRED(first))
2289 UVM_MAP_CLIP_START(map, first, start);
2290 /*
2291 * Split last at end.
2292 * Make tmp be the first entry after what is to be touched.
2293 * If last is not wired, don't touch it.
2294 */
2295 if (VM_MAPENT_ISWIRED(last)) {
2296 UVM_MAP_CLIP_END(map, last, end);
2297 tmp = RBT_NEXT(uvm_map_addr, last);
2298 } else
2299 tmp = last;
2300
2301 uvm_map_pageable_pgon(map, first, tmp, start, end);
2302 error = 0;
2303
2304 out:
2305 if ((lockflags & UVM_LK_EXIT) == 0)
2306 vm_map_unlock(map);
2307 return error;
2308 } else {
2309 /*
2310 * Mark entries wired.
2311 * entries are always touched (because recovery needs this).
2312 */
2313 if (!VM_MAPENT_ISWIRED(first))
2314 UVM_MAP_CLIP_START(map, first, start);
2315 /*
2316 * Split last at end.
2317 * Make tmp be the first entry after what is to be touched.
2318 * If last is not wired, don't touch it.
2319 */
2320 if (!VM_MAPENT_ISWIRED(last)) {
2321 UVM_MAP_CLIP_END(map, last, end);
2322 tmp = RBT_NEXT(uvm_map_addr, last);
2323 } else
2324 tmp = last;
2325
2326 return uvm_map_pageable_wire(map, first, tmp, start, end,
2327 lockflags);
2328 }
2329 }
2330
2331 /*
2332 * uvm_map_pageable_all: special case of uvm_map_pageable - affects
2333 * all mapped regions.
2334 *
2335 * Map must not be locked.
2336 * If no flags are specified, all regions are unwired.
2337 */
2338 int
uvm_map_pageable_all(struct vm_map * map,int flags,vsize_t limit)2339 uvm_map_pageable_all(struct vm_map *map, int flags, vsize_t limit)
2340 {
2341 vsize_t size;
2342 struct vm_map_entry *iter;
2343
2344 KASSERT(map->flags & VM_MAP_PAGEABLE);
2345 vm_map_lock(map);
2346
2347 if (flags == 0) {
2348 uvm_map_pageable_pgon(map, RBT_MIN(uvm_map_addr, &map->addr),
2349 NULL, map->min_offset, map->max_offset);
2350
2351 vm_map_modflags(map, 0, VM_MAP_WIREFUTURE);
2352 vm_map_unlock(map);
2353 return 0;
2354 }
2355
2356 if (flags & MCL_FUTURE)
2357 vm_map_modflags(map, VM_MAP_WIREFUTURE, 0);
2358 if (!(flags & MCL_CURRENT)) {
2359 vm_map_unlock(map);
2360 return 0;
2361 }
2362
2363 /*
2364 * Count number of pages in all non-wired entries.
2365 * If the number exceeds the limit, abort.
2366 */
2367 size = 0;
2368 RBT_FOREACH(iter, uvm_map_addr, &map->addr) {
2369 if (VM_MAPENT_ISWIRED(iter) || UVM_ET_ISHOLE(iter))
2370 continue;
2371
2372 size += iter->end - iter->start;
2373 }
2374
2375 if (atop(size) + uvmexp.wired > uvmexp.wiredmax) {
2376 vm_map_unlock(map);
2377 return ENOMEM;
2378 }
2379
2380 /* XXX non-pmap_wired_count case must be handled by caller */
2381 #ifdef pmap_wired_count
2382 if (limit != 0 &&
2383 size + ptoa(pmap_wired_count(vm_map_pmap(map))) > limit) {
2384 vm_map_unlock(map);
2385 return ENOMEM;
2386 }
2387 #endif
2388
2389 /*
2390 * uvm_map_pageable_wire will release lock
2391 */
2392 return uvm_map_pageable_wire(map, RBT_MIN(uvm_map_addr, &map->addr),
2393 NULL, map->min_offset, map->max_offset, 0);
2394 }
2395
2396 /*
2397 * Initialize map.
2398 *
2399 * Allocates sufficient entries to describe the free memory in the map.
2400 */
2401 void
uvm_map_setup(struct vm_map * map,pmap_t pmap,vaddr_t min,vaddr_t max,int flags)2402 uvm_map_setup(struct vm_map *map, pmap_t pmap, vaddr_t min, vaddr_t max,
2403 int flags)
2404 {
2405 int i;
2406
2407 KASSERT((min & (vaddr_t)PAGE_MASK) == 0);
2408 KASSERT((max & (vaddr_t)PAGE_MASK) == 0 ||
2409 (max & (vaddr_t)PAGE_MASK) == (vaddr_t)PAGE_MASK);
2410
2411 /*
2412 * Update parameters.
2413 *
2414 * This code handles (vaddr_t)-1 and other page mask ending addresses
2415 * properly.
2416 * We lose the top page if the full virtual address space is used.
2417 */
2418 if (max & (vaddr_t)PAGE_MASK) {
2419 max += 1;
2420 if (max == 0) /* overflow */
2421 max -= PAGE_SIZE;
2422 }
2423
2424 RBT_INIT(uvm_map_addr, &map->addr);
2425 map->uaddr_exe = NULL;
2426 for (i = 0; i < nitems(map->uaddr_any); ++i)
2427 map->uaddr_any[i] = NULL;
2428 map->uaddr_brk_stack = NULL;
2429
2430 map->pmap = pmap;
2431 map->size = 0;
2432 map->ref_count = 0;
2433 map->min_offset = min;
2434 map->max_offset = max;
2435 map->b_start = map->b_end = 0; /* Empty brk() area by default. */
2436 map->s_start = map->s_end = 0; /* Empty stack area by default. */
2437 map->flags = flags;
2438 map->timestamp = 0;
2439 map->busy = NULL;
2440 if (flags & VM_MAP_ISVMSPACE)
2441 rw_init_flags(&map->lock, "vmmaplk", RWL_DUPOK);
2442 else
2443 rw_init(&map->lock, "kmmaplk");
2444 mtx_init(&map->mtx, IPL_VM);
2445 mtx_init(&map->flags_lock, IPL_VM);
2446
2447 /* Configure the allocators. */
2448 if (flags & VM_MAP_ISVMSPACE)
2449 uvm_map_setup_md(map);
2450 else
2451 map->uaddr_any[3] = &uaddr_kbootstrap;
2452
2453 /*
2454 * Fill map entries.
2455 * We do not need to write-lock the map here because only the current
2456 * thread sees it right now. Initialize ref_count to 0 above to avoid
2457 * bogus triggering of lock-not-held assertions.
2458 */
2459 uvm_map_setup_entries(map);
2460 uvm_tree_sanity(map, __FILE__, __LINE__);
2461 map->ref_count = 1;
2462 }
2463
2464 /*
2465 * Destroy the map.
2466 *
2467 * This is the inverse operation to uvm_map_setup.
2468 */
2469 void
uvm_map_teardown(struct vm_map * map)2470 uvm_map_teardown(struct vm_map *map)
2471 {
2472 struct uvm_map_deadq dead_entries;
2473 struct vm_map_entry *entry, *tmp;
2474 #ifdef VMMAP_DEBUG
2475 size_t numq, numt;
2476 #endif
2477 int i;
2478
2479 KERNEL_ASSERT_LOCKED();
2480 KERNEL_UNLOCK();
2481 KERNEL_ASSERT_UNLOCKED();
2482
2483 KASSERT((map->flags & VM_MAP_INTRSAFE) == 0);
2484
2485 vm_map_lock(map);
2486
2487 /* Remove address selectors. */
2488 uvm_addr_destroy(map->uaddr_exe);
2489 map->uaddr_exe = NULL;
2490 for (i = 0; i < nitems(map->uaddr_any); i++) {
2491 uvm_addr_destroy(map->uaddr_any[i]);
2492 map->uaddr_any[i] = NULL;
2493 }
2494 uvm_addr_destroy(map->uaddr_brk_stack);
2495 map->uaddr_brk_stack = NULL;
2496
2497 /*
2498 * Remove entries.
2499 *
2500 * The following is based on graph breadth-first search.
2501 *
2502 * In color terms:
2503 * - the dead_entries set contains all nodes that are reachable
2504 * (i.e. both the black and the grey nodes)
2505 * - any entry not in dead_entries is white
2506 * - any entry that appears in dead_entries before entry,
2507 * is black, the rest is grey.
2508 * The set [entry, end] is also referred to as the wavefront.
2509 *
2510 * Since the tree is always a fully connected graph, the breadth-first
2511 * search guarantees that each vmmap_entry is visited exactly once.
2512 * The vm_map is broken down in linear time.
2513 */
2514 TAILQ_INIT(&dead_entries);
2515 if ((entry = RBT_ROOT(uvm_map_addr, &map->addr)) != NULL)
2516 DEAD_ENTRY_PUSH(&dead_entries, entry);
2517 while (entry != NULL) {
2518 sched_pause(yield);
2519 uvm_unmap_kill_entry(map, entry);
2520 if ((tmp = RBT_LEFT(uvm_map_addr, entry)) != NULL)
2521 DEAD_ENTRY_PUSH(&dead_entries, tmp);
2522 if ((tmp = RBT_RIGHT(uvm_map_addr, entry)) != NULL)
2523 DEAD_ENTRY_PUSH(&dead_entries, tmp);
2524 /* Update wave-front. */
2525 entry = TAILQ_NEXT(entry, dfree.deadq);
2526 }
2527
2528 vm_map_unlock(map);
2529
2530 #ifdef VMMAP_DEBUG
2531 numt = numq = 0;
2532 RBT_FOREACH(entry, uvm_map_addr, &map->addr)
2533 numt++;
2534 TAILQ_FOREACH(entry, &dead_entries, dfree.deadq)
2535 numq++;
2536 KASSERT(numt == numq);
2537 #endif
2538 uvm_unmap_detach(&dead_entries, UVM_PLA_WAITOK);
2539
2540 KERNEL_LOCK();
2541
2542 pmap_destroy(map->pmap);
2543 map->pmap = NULL;
2544 }
2545
2546 /*
2547 * Populate map with free-memory entries.
2548 *
2549 * Map must be initialized and empty.
2550 */
2551 void
uvm_map_setup_entries(struct vm_map * map)2552 uvm_map_setup_entries(struct vm_map *map)
2553 {
2554 KDASSERT(RBT_EMPTY(uvm_map_addr, &map->addr));
2555
2556 uvm_map_fix_space(map, NULL, map->min_offset, map->max_offset, 0);
2557 }
2558
2559 /*
2560 * Split entry at given address.
2561 *
2562 * orig: entry that is to be split.
2563 * next: a newly allocated map entry that is not linked.
2564 * split: address at which the split is done.
2565 */
2566 void
uvm_map_splitentry(struct vm_map * map,struct vm_map_entry * orig,struct vm_map_entry * next,vaddr_t split)2567 uvm_map_splitentry(struct vm_map *map, struct vm_map_entry *orig,
2568 struct vm_map_entry *next, vaddr_t split)
2569 {
2570 struct uvm_addr_state *free, *free_before;
2571 vsize_t adj;
2572
2573 if ((split & PAGE_MASK) != 0) {
2574 panic("uvm_map_splitentry: split address 0x%lx "
2575 "not on page boundary!", split);
2576 }
2577 KDASSERT(map != NULL && orig != NULL && next != NULL);
2578 uvm_tree_sanity(map, __FILE__, __LINE__);
2579 KASSERT(orig->start < split && VMMAP_FREE_END(orig) > split);
2580
2581 #ifdef VMMAP_DEBUG
2582 KDASSERT(RBT_FIND(uvm_map_addr, &map->addr, orig) == orig);
2583 KDASSERT(RBT_FIND(uvm_map_addr, &map->addr, next) != next);
2584 #endif /* VMMAP_DEBUG */
2585
2586 /*
2587 * Free space will change, unlink from free space tree.
2588 */
2589 free = uvm_map_uaddr_e(map, orig);
2590 uvm_mapent_free_remove(map, free, orig);
2591
2592 adj = split - orig->start;
2593
2594 uvm_mapent_copy(orig, next);
2595 if (split >= orig->end) {
2596 next->etype = 0;
2597 next->offset = 0;
2598 next->wired_count = 0;
2599 next->start = next->end = split;
2600 next->guard = 0;
2601 next->fspace = VMMAP_FREE_END(orig) - split;
2602 next->aref.ar_amap = NULL;
2603 next->aref.ar_pageoff = 0;
2604 orig->guard = MIN(orig->guard, split - orig->end);
2605 orig->fspace = split - VMMAP_FREE_START(orig);
2606 } else {
2607 orig->fspace = 0;
2608 orig->guard = 0;
2609 orig->end = next->start = split;
2610
2611 if (next->aref.ar_amap) {
2612 amap_splitref(&orig->aref, &next->aref, adj);
2613 }
2614 if (UVM_ET_ISSUBMAP(orig)) {
2615 uvm_map_reference(next->object.sub_map);
2616 next->offset += adj;
2617 } else if (UVM_ET_ISOBJ(orig)) {
2618 if (next->object.uvm_obj->pgops &&
2619 next->object.uvm_obj->pgops->pgo_reference) {
2620 KERNEL_LOCK();
2621 next->object.uvm_obj->pgops->pgo_reference(
2622 next->object.uvm_obj);
2623 KERNEL_UNLOCK();
2624 }
2625 next->offset += adj;
2626 }
2627 }
2628
2629 /*
2630 * Link next into address tree.
2631 * Link orig and next into free-space tree.
2632 *
2633 * Don't insert 'next' into the addr tree until orig has been linked,
2634 * in case the free-list looks at adjacent entries in the addr tree
2635 * for its decisions.
2636 */
2637 if (orig->fspace > 0)
2638 free_before = free;
2639 else
2640 free_before = uvm_map_uaddr_e(map, orig);
2641 uvm_mapent_free_insert(map, free_before, orig);
2642 uvm_mapent_addr_insert(map, next);
2643 uvm_mapent_free_insert(map, free, next);
2644
2645 uvm_tree_sanity(map, __FILE__, __LINE__);
2646 }
2647
2648
2649 #ifdef VMMAP_DEBUG
2650
2651 void
uvm_tree_assert(struct vm_map * map,int test,char * test_str,char * file,int line)2652 uvm_tree_assert(struct vm_map *map, int test, char *test_str,
2653 char *file, int line)
2654 {
2655 char* map_special;
2656
2657 if (test)
2658 return;
2659
2660 if (map == kernel_map)
2661 map_special = " (kernel_map)";
2662 else if (map == kmem_map)
2663 map_special = " (kmem_map)";
2664 else
2665 map_special = "";
2666 panic("uvm_tree_sanity %p%s (%s %d): %s", map, map_special, file,
2667 line, test_str);
2668 }
2669
2670 /*
2671 * Check that map is sane.
2672 */
2673 void
uvm_tree_sanity(struct vm_map * map,char * file,int line)2674 uvm_tree_sanity(struct vm_map *map, char *file, int line)
2675 {
2676 struct vm_map_entry *iter;
2677 vaddr_t addr;
2678 vaddr_t min, max, bound; /* Bounds checker. */
2679 struct uvm_addr_state *free;
2680
2681 addr = vm_map_min(map);
2682 RBT_FOREACH(iter, uvm_map_addr, &map->addr) {
2683 /*
2684 * Valid start, end.
2685 * Catch overflow for end+fspace.
2686 */
2687 UVM_ASSERT(map, iter->end >= iter->start, file, line);
2688 UVM_ASSERT(map, VMMAP_FREE_END(iter) >= iter->end, file, line);
2689
2690 /* May not be empty. */
2691 UVM_ASSERT(map, iter->start < VMMAP_FREE_END(iter),
2692 file, line);
2693
2694 /* Addresses for entry must lie within map boundaries. */
2695 UVM_ASSERT(map, iter->start >= vm_map_min(map) &&
2696 VMMAP_FREE_END(iter) <= vm_map_max(map), file, line);
2697
2698 /* Tree may not have gaps. */
2699 UVM_ASSERT(map, iter->start == addr, file, line);
2700 addr = VMMAP_FREE_END(iter);
2701
2702 /*
2703 * Free space may not cross boundaries, unless the same
2704 * free list is used on both sides of the border.
2705 */
2706 min = VMMAP_FREE_START(iter);
2707 max = VMMAP_FREE_END(iter);
2708
2709 while (min < max &&
2710 (bound = uvm_map_boundary(map, min, max)) != max) {
2711 UVM_ASSERT(map,
2712 uvm_map_uaddr(map, bound - 1) ==
2713 uvm_map_uaddr(map, bound),
2714 file, line);
2715 min = bound;
2716 }
2717
2718 free = uvm_map_uaddr_e(map, iter);
2719 if (free) {
2720 UVM_ASSERT(map, (iter->etype & UVM_ET_FREEMAPPED) != 0,
2721 file, line);
2722 } else {
2723 UVM_ASSERT(map, (iter->etype & UVM_ET_FREEMAPPED) == 0,
2724 file, line);
2725 }
2726 }
2727 UVM_ASSERT(map, addr == vm_map_max(map), file, line);
2728 }
2729
2730 void
uvm_tree_size_chk(struct vm_map * map,char * file,int line)2731 uvm_tree_size_chk(struct vm_map *map, char *file, int line)
2732 {
2733 struct vm_map_entry *iter;
2734 vsize_t size;
2735
2736 size = 0;
2737 RBT_FOREACH(iter, uvm_map_addr, &map->addr) {
2738 if (!UVM_ET_ISHOLE(iter))
2739 size += iter->end - iter->start;
2740 }
2741
2742 if (map->size != size)
2743 printf("map size = 0x%lx, should be 0x%lx\n", map->size, size);
2744 UVM_ASSERT(map, map->size == size, file, line);
2745
2746 vmspace_validate(map);
2747 }
2748
2749 /*
2750 * This function validates the statistics on vmspace.
2751 */
2752 void
vmspace_validate(struct vm_map * map)2753 vmspace_validate(struct vm_map *map)
2754 {
2755 struct vmspace *vm;
2756 struct vm_map_entry *iter;
2757 vaddr_t imin, imax;
2758 vaddr_t stack_begin, stack_end; /* Position of stack. */
2759 vsize_t stack, heap; /* Measured sizes. */
2760
2761 if (!(map->flags & VM_MAP_ISVMSPACE))
2762 return;
2763
2764 vm = (struct vmspace *)map;
2765 stack_begin = MIN((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr);
2766 stack_end = MAX((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr);
2767
2768 stack = heap = 0;
2769 RBT_FOREACH(iter, uvm_map_addr, &map->addr) {
2770 imin = imax = iter->start;
2771
2772 if (UVM_ET_ISHOLE(iter) || iter->object.uvm_obj != NULL ||
2773 iter->protection != PROT_NONE)
2774 continue;
2775
2776 /*
2777 * Update stack, heap.
2778 * Keep in mind that (theoretically) the entries of
2779 * userspace and stack may be joined.
2780 */
2781 while (imin != iter->end) {
2782 /*
2783 * Set imax to the first boundary crossed between
2784 * imin and stack addresses.
2785 */
2786 imax = iter->end;
2787 if (imin < stack_begin && imax > stack_begin)
2788 imax = stack_begin;
2789 else if (imin < stack_end && imax > stack_end)
2790 imax = stack_end;
2791
2792 if (imin >= stack_begin && imin < stack_end)
2793 stack += imax - imin;
2794 else
2795 heap += imax - imin;
2796 imin = imax;
2797 }
2798 }
2799
2800 heap >>= PAGE_SHIFT;
2801 if (heap != vm->vm_dused) {
2802 printf("vmspace stack range: 0x%lx-0x%lx\n",
2803 stack_begin, stack_end);
2804 panic("vmspace_validate: vmspace.vm_dused invalid, "
2805 "expected %ld pgs, got %d pgs in map %p",
2806 heap, vm->vm_dused,
2807 map);
2808 }
2809 }
2810
2811 #endif /* VMMAP_DEBUG */
2812
2813 /*
2814 * uvm_map_init: init mapping system at boot time. note that we allocate
2815 * and init the static pool of structs vm_map_entry for the kernel here.
2816 */
2817 void
uvm_map_init(void)2818 uvm_map_init(void)
2819 {
2820 static struct vm_map_entry kernel_map_entry[MAX_KMAPENT];
2821 int lcv;
2822
2823 /* now set up static pool of kernel map entries ... */
2824 mtx_init(&uvm_kmapent_mtx, IPL_VM);
2825 SLIST_INIT(&uvm.kentry_free);
2826 for (lcv = 0 ; lcv < MAX_KMAPENT ; lcv++) {
2827 SLIST_INSERT_HEAD(&uvm.kentry_free,
2828 &kernel_map_entry[lcv], daddrs.addr_kentry);
2829 }
2830
2831 /* initialize the map-related pools. */
2832 pool_init(&uvm_vmspace_pool, sizeof(struct vmspace), 0,
2833 IPL_NONE, PR_WAITOK, "vmsppl", NULL);
2834 pool_init(&uvm_map_entry_pool, sizeof(struct vm_map_entry), 0,
2835 IPL_VM, PR_WAITOK, "vmmpepl", NULL);
2836 pool_init(&uvm_map_entry_kmem_pool, sizeof(struct vm_map_entry), 0,
2837 IPL_VM, 0, "vmmpekpl", NULL);
2838 pool_sethiwat(&uvm_map_entry_pool, 8192);
2839
2840 uvm_addr_init();
2841 }
2842
2843 #if defined(DDB)
2844
2845 /*
2846 * DDB hooks
2847 */
2848
2849 /*
2850 * uvm_map_printit: actually prints the map
2851 */
2852 void
uvm_map_printit(struct vm_map * map,boolean_t full,int (* pr)(const char *,...))2853 uvm_map_printit(struct vm_map *map, boolean_t full,
2854 int (*pr)(const char *, ...))
2855 {
2856 struct vmspace *vm;
2857 struct vm_map_entry *entry;
2858 struct uvm_addr_state *free;
2859 int in_free, i;
2860 char buf[8];
2861
2862 (*pr)("MAP %p: [0x%lx->0x%lx]\n", map, map->min_offset,map->max_offset);
2863 (*pr)("\tbrk() allocate range: 0x%lx-0x%lx\n",
2864 map->b_start, map->b_end);
2865 (*pr)("\tstack allocate range: 0x%lx-0x%lx\n",
2866 map->s_start, map->s_end);
2867 (*pr)("\tsz=%u, ref=%d, version=%u, flags=0x%x\n",
2868 map->size, map->ref_count, map->timestamp,
2869 map->flags);
2870 (*pr)("\tpmap=%p(resident=%d)\n", map->pmap,
2871 pmap_resident_count(map->pmap));
2872
2873 /* struct vmspace handling. */
2874 if (map->flags & VM_MAP_ISVMSPACE) {
2875 vm = (struct vmspace *)map;
2876
2877 (*pr)("\tvm_refcnt=%d vm_shm=%p vm_rssize=%u vm_swrss=%u\n",
2878 vm->vm_refcnt, vm->vm_shm, vm->vm_rssize, vm->vm_swrss);
2879 (*pr)("\tvm_tsize=%u vm_dsize=%u\n",
2880 vm->vm_tsize, vm->vm_dsize);
2881 (*pr)("\tvm_taddr=%p vm_daddr=%p\n",
2882 vm->vm_taddr, vm->vm_daddr);
2883 (*pr)("\tvm_maxsaddr=%p vm_minsaddr=%p\n",
2884 vm->vm_maxsaddr, vm->vm_minsaddr);
2885 }
2886
2887 if (!full)
2888 goto print_uaddr;
2889 RBT_FOREACH(entry, uvm_map_addr, &map->addr) {
2890 (*pr)(" - %p: 0x%lx->0x%lx: obj=%p/0x%llx, amap=%p/%d\n",
2891 entry, entry->start, entry->end, entry->object.uvm_obj,
2892 (long long)entry->offset, entry->aref.ar_amap,
2893 entry->aref.ar_pageoff);
2894 (*pr)("\tsubmap=%c, cow=%c, nc=%c, stack=%c, "
2895 "prot(max)=%d/%d, inh=%d, "
2896 "wc=%d, adv=%d\n",
2897 (entry->etype & UVM_ET_SUBMAP) ? 'T' : 'F',
2898 (entry->etype & UVM_ET_COPYONWRITE) ? 'T' : 'F',
2899 (entry->etype & UVM_ET_NEEDSCOPY) ? 'T' : 'F',
2900 (entry->etype & UVM_ET_STACK) ? 'T' : 'F',
2901 entry->protection, entry->max_protection,
2902 entry->inheritance, entry->wired_count, entry->advice);
2903
2904 free = uvm_map_uaddr_e(map, entry);
2905 in_free = (free != NULL);
2906 (*pr)("\thole=%c, free=%c, guard=0x%lx, "
2907 "free=0x%lx-0x%lx\n",
2908 (entry->etype & UVM_ET_HOLE) ? 'T' : 'F',
2909 in_free ? 'T' : 'F',
2910 entry->guard,
2911 VMMAP_FREE_START(entry), VMMAP_FREE_END(entry));
2912 (*pr)("\tfspace_augment=%lu\n", entry->fspace_augment);
2913 (*pr)("\tfreemapped=%c, uaddr=%p\n",
2914 (entry->etype & UVM_ET_FREEMAPPED) ? 'T' : 'F', free);
2915 if (free) {
2916 (*pr)("\t\t(0x%lx-0x%lx %s)\n",
2917 free->uaddr_minaddr, free->uaddr_maxaddr,
2918 free->uaddr_functions->uaddr_name);
2919 }
2920 }
2921
2922 print_uaddr:
2923 uvm_addr_print(map->uaddr_exe, "exe", full, pr);
2924 for (i = 0; i < nitems(map->uaddr_any); i++) {
2925 snprintf(&buf[0], sizeof(buf), "any[%d]", i);
2926 uvm_addr_print(map->uaddr_any[i], &buf[0], full, pr);
2927 }
2928 uvm_addr_print(map->uaddr_brk_stack, "brk/stack", full, pr);
2929 }
2930
2931 /*
2932 * uvm_object_printit: actually prints the object
2933 */
2934 void
uvm_object_printit(struct uvm_object * uobj,boolean_t full,int (* pr)(const char *,...))2935 uvm_object_printit(struct uvm_object *uobj, boolean_t full,
2936 int (*pr)(const char *, ...))
2937 {
2938 struct vm_page *pg;
2939 int cnt = 0;
2940
2941 (*pr)("OBJECT %p: pgops=%p, npages=%d, ",
2942 uobj, uobj->pgops, uobj->uo_npages);
2943 if (UVM_OBJ_IS_KERN_OBJECT(uobj))
2944 (*pr)("refs=<SYSTEM>\n");
2945 else
2946 (*pr)("refs=%d\n", uobj->uo_refs);
2947
2948 if (!full) {
2949 return;
2950 }
2951 (*pr)(" PAGES <pg,offset>:\n ");
2952 RBT_FOREACH(pg, uvm_objtree, &uobj->memt) {
2953 (*pr)("<%p,0x%llx> ", pg, (long long)pg->offset);
2954 if ((cnt % 3) == 2) {
2955 (*pr)("\n ");
2956 }
2957 cnt++;
2958 }
2959 if ((cnt % 3) != 2) {
2960 (*pr)("\n");
2961 }
2962 }
2963
2964 /*
2965 * uvm_page_printit: actually print the page
2966 */
2967 static const char page_flagbits[] =
2968 "\20\1BUSY\2WANTED\3TABLED\4CLEAN\5CLEANCHK\6RELEASED\7FAKE\10RDONLY"
2969 "\11ZERO\12DEV\15PAGER1\21FREE\22INACTIVE\23ACTIVE\25ANON\26AOBJ"
2970 "\27ENCRYPT\31PMAP0\32PMAP1\33PMAP2\34PMAP3\35PMAP4\36PMAP5";
2971
2972 void
uvm_page_printit(struct vm_page * pg,boolean_t full,int (* pr)(const char *,...))2973 uvm_page_printit(struct vm_page *pg, boolean_t full,
2974 int (*pr)(const char *, ...))
2975 {
2976 struct vm_page *tpg;
2977 struct uvm_object *uobj;
2978 struct pglist *pgl;
2979
2980 (*pr)("PAGE %p:\n", pg);
2981 (*pr)(" flags=%b, vers=%d, wire_count=%d, pa=0x%llx\n",
2982 pg->pg_flags, page_flagbits, pg->pg_version, pg->wire_count,
2983 (long long)pg->phys_addr);
2984 (*pr)(" uobject=%p, uanon=%p, offset=0x%llx\n",
2985 pg->uobject, pg->uanon, (long long)pg->offset);
2986 #if defined(UVM_PAGE_TRKOWN)
2987 if (pg->pg_flags & PG_BUSY)
2988 (*pr)(" owning thread = %d, tag=%s",
2989 pg->owner, pg->owner_tag);
2990 else
2991 (*pr)(" page not busy, no owner");
2992 #else
2993 (*pr)(" [page ownership tracking disabled]");
2994 #endif
2995 (*pr)("\tvm_page_md %p\n", &pg->mdpage);
2996
2997 if (!full)
2998 return;
2999
3000 /* cross-verify object/anon */
3001 if ((pg->pg_flags & PQ_FREE) == 0) {
3002 if (pg->pg_flags & PQ_ANON) {
3003 if (pg->uanon == NULL || pg->uanon->an_page != pg)
3004 (*pr)(" >>> ANON DOES NOT POINT HERE <<< (%p)\n",
3005 (pg->uanon) ? pg->uanon->an_page : NULL);
3006 else
3007 (*pr)(" anon backpointer is OK\n");
3008 } else {
3009 uobj = pg->uobject;
3010 if (uobj) {
3011 (*pr)(" checking object list\n");
3012 RBT_FOREACH(tpg, uvm_objtree, &uobj->memt) {
3013 if (tpg == pg) {
3014 break;
3015 }
3016 }
3017 if (tpg)
3018 (*pr)(" page found on object list\n");
3019 else
3020 (*pr)(" >>> PAGE NOT FOUND "
3021 "ON OBJECT LIST! <<<\n");
3022 }
3023 }
3024 }
3025
3026 /* cross-verify page queue */
3027 if (pg->pg_flags & PQ_FREE) {
3028 if (uvm_pmr_isfree(pg))
3029 (*pr)(" page found in uvm_pmemrange\n");
3030 else
3031 (*pr)(" >>> page not found in uvm_pmemrange <<<\n");
3032 pgl = NULL;
3033 } else if (pg->pg_flags & PQ_INACTIVE) {
3034 pgl = &uvm.page_inactive;
3035 } else if (pg->pg_flags & PQ_ACTIVE) {
3036 pgl = &uvm.page_active;
3037 } else {
3038 pgl = NULL;
3039 }
3040
3041 if (pgl) {
3042 (*pr)(" checking pageq list\n");
3043 TAILQ_FOREACH(tpg, pgl, pageq) {
3044 if (tpg == pg) {
3045 break;
3046 }
3047 }
3048 if (tpg)
3049 (*pr)(" page found on pageq list\n");
3050 else
3051 (*pr)(" >>> PAGE NOT FOUND ON PAGEQ LIST! <<<\n");
3052 }
3053 }
3054 #endif
3055
3056 /*
3057 * uvm_map_protect: change map protection
3058 *
3059 * => set_max means set max_protection.
3060 * => map must be unlocked.
3061 */
3062 int
uvm_map_protect(struct vm_map * map,vaddr_t start,vaddr_t end,vm_prot_t new_prot,int etype,boolean_t set_max,boolean_t checkimmutable)3063 uvm_map_protect(struct vm_map *map, vaddr_t start, vaddr_t end,
3064 vm_prot_t new_prot, int etype, boolean_t set_max, boolean_t checkimmutable)
3065 {
3066 struct vm_map_entry *first, *iter;
3067 vm_prot_t old_prot;
3068 vm_prot_t mask;
3069 vsize_t dused;
3070 int error;
3071
3072 KASSERT((etype & ~UVM_ET_STACK) == 0); /* only UVM_ET_STACK allowed */
3073
3074 if (start > end)
3075 return EINVAL;
3076 start = MAX(start, map->min_offset);
3077 end = MIN(end, map->max_offset);
3078 if (start >= end)
3079 return 0;
3080
3081 dused = 0;
3082 error = 0;
3083 vm_map_lock(map);
3084
3085 /*
3086 * Set up first and last.
3087 * - first will contain first entry at or after start.
3088 */
3089 first = uvm_map_entrybyaddr(&map->addr, start);
3090 KDASSERT(first != NULL);
3091 if (first->end <= start)
3092 first = RBT_NEXT(uvm_map_addr, first);
3093
3094 /* First, check for protection violations. */
3095 for (iter = first; iter != NULL && iter->start < end;
3096 iter = RBT_NEXT(uvm_map_addr, iter)) {
3097 /* Treat memory holes as free space. */
3098 if (iter->start == iter->end || UVM_ET_ISHOLE(iter))
3099 continue;
3100
3101 if (checkimmutable && (iter->etype & UVM_ET_IMMUTABLE)) {
3102 error = EPERM;
3103 goto out;
3104 }
3105 old_prot = iter->protection;
3106 if (old_prot == PROT_NONE && new_prot != old_prot) {
3107 dused += uvmspace_dused(
3108 map, MAX(start, iter->start), MIN(end, iter->end));
3109 }
3110
3111 if (UVM_ET_ISSUBMAP(iter)) {
3112 error = EINVAL;
3113 goto out;
3114 }
3115 if ((new_prot & iter->max_protection) != new_prot) {
3116 error = EACCES;
3117 goto out;
3118 }
3119 if (map == kernel_map &&
3120 (new_prot & (PROT_WRITE | PROT_EXEC)) == (PROT_WRITE | PROT_EXEC))
3121 panic("uvm_map_protect: kernel map W^X violation requested");
3122 }
3123
3124 /* Check limits. */
3125 if (dused > 0 && (map->flags & VM_MAP_ISVMSPACE)) {
3126 vsize_t limit = lim_cur(RLIMIT_DATA);
3127 dused = ptoa(dused);
3128 if (limit < dused ||
3129 limit - dused < ptoa(((struct vmspace *)map)->vm_dused)) {
3130 error = ENOMEM;
3131 goto out;
3132 }
3133 }
3134
3135 /* only apply UVM_ET_STACK on a mapping changing to RW */
3136 if (etype && new_prot != (PROT_READ|PROT_WRITE))
3137 etype = 0;
3138
3139 /* Fix protections. */
3140 for (iter = first; iter != NULL && iter->start < end;
3141 iter = RBT_NEXT(uvm_map_addr, iter)) {
3142 /* Treat memory holes as free space. */
3143 if (iter->start == iter->end || UVM_ET_ISHOLE(iter))
3144 continue;
3145
3146 old_prot = iter->protection;
3147
3148 /*
3149 * Skip adapting protection iff old and new protection
3150 * are equal.
3151 */
3152 if (set_max) {
3153 if (old_prot == (new_prot & old_prot) &&
3154 iter->max_protection == new_prot)
3155 continue;
3156 } else {
3157 if (old_prot == new_prot)
3158 continue;
3159 }
3160
3161 UVM_MAP_CLIP_START(map, iter, start);
3162 UVM_MAP_CLIP_END(map, iter, end);
3163
3164 if (set_max) {
3165 iter->max_protection = new_prot;
3166 iter->protection &= new_prot;
3167 } else
3168 iter->protection = new_prot;
3169 iter->etype |= etype; /* potentially add UVM_ET_STACK */
3170
3171 /*
3172 * update physical map if necessary. worry about copy-on-write
3173 * here -- CHECK THIS XXX
3174 */
3175 if (iter->protection != old_prot) {
3176 mask = UVM_ET_ISCOPYONWRITE(iter) ?
3177 ~PROT_WRITE : PROT_MASK;
3178
3179 if (map->flags & VM_MAP_ISVMSPACE) {
3180 if (old_prot == PROT_NONE) {
3181 ((struct vmspace *)map)->vm_dused +=
3182 uvmspace_dused(map, iter->start,
3183 iter->end);
3184 }
3185 if (iter->protection == PROT_NONE) {
3186 ((struct vmspace *)map)->vm_dused -=
3187 uvmspace_dused(map, iter->start,
3188 iter->end);
3189 }
3190 }
3191
3192 /* update pmap */
3193 if ((iter->protection & mask) == PROT_NONE &&
3194 VM_MAPENT_ISWIRED(iter)) {
3195 /*
3196 * TODO(ariane) this is stupid. wired_count
3197 * is 0 if not wired, otherwise anything
3198 * larger than 0 (incremented once each time
3199 * wire is called).
3200 * Mostly to be able to undo the damage on
3201 * failure. Not the actually be a wired
3202 * refcounter...
3203 * Originally: iter->wired_count--;
3204 * (don't we have to unwire this in the pmap
3205 * as well?)
3206 */
3207 iter->wired_count = 0;
3208 }
3209 uvm_map_lock_entry(iter);
3210 pmap_protect(map->pmap, iter->start, iter->end,
3211 iter->protection & mask);
3212 uvm_map_unlock_entry(iter);
3213 }
3214
3215 /*
3216 * If the map is configured to lock any future mappings,
3217 * wire this entry now if the old protection was PROT_NONE
3218 * and the new protection is not PROT_NONE.
3219 */
3220 if ((map->flags & VM_MAP_WIREFUTURE) != 0 &&
3221 VM_MAPENT_ISWIRED(iter) == 0 &&
3222 old_prot == PROT_NONE &&
3223 new_prot != PROT_NONE) {
3224 if (uvm_map_pageable(map, iter->start, iter->end,
3225 FALSE, UVM_LK_ENTER | UVM_LK_EXIT) != 0) {
3226 /*
3227 * If locking the entry fails, remember the
3228 * error if it's the first one. Note we
3229 * still continue setting the protection in
3230 * the map, but it will return the resource
3231 * storage condition regardless.
3232 *
3233 * XXX Ignore what the actual error is,
3234 * XXX just call it a resource shortage
3235 * XXX so that it doesn't get confused
3236 * XXX what uvm_map_protect() itself would
3237 * XXX normally return.
3238 */
3239 error = ENOMEM;
3240 }
3241 }
3242 }
3243 pmap_update(map->pmap);
3244
3245 out:
3246 if (etype & UVM_ET_STACK)
3247 map->sserial++;
3248 vm_map_unlock(map);
3249 return error;
3250 }
3251
3252 /*
3253 * uvmspace_alloc: allocate a vmspace structure.
3254 *
3255 * - structure includes vm_map and pmap
3256 * - XXX: no locking on this structure
3257 * - refcnt set to 1, rest must be init'd by caller
3258 */
3259 struct vmspace *
uvmspace_alloc(vaddr_t min,vaddr_t max,boolean_t pageable,boolean_t remove_holes)3260 uvmspace_alloc(vaddr_t min, vaddr_t max, boolean_t pageable,
3261 boolean_t remove_holes)
3262 {
3263 struct vmspace *vm;
3264
3265 vm = pool_get(&uvm_vmspace_pool, PR_WAITOK | PR_ZERO);
3266 uvmspace_init(vm, NULL, min, max, pageable, remove_holes);
3267 return (vm);
3268 }
3269
3270 /*
3271 * uvmspace_init: initialize a vmspace structure.
3272 *
3273 * - XXX: no locking on this structure
3274 * - refcnt set to 1, rest must be init'd by caller
3275 */
3276 void
uvmspace_init(struct vmspace * vm,struct pmap * pmap,vaddr_t min,vaddr_t max,boolean_t pageable,boolean_t remove_holes)3277 uvmspace_init(struct vmspace *vm, struct pmap *pmap, vaddr_t min, vaddr_t max,
3278 boolean_t pageable, boolean_t remove_holes)
3279 {
3280 KASSERT(pmap == NULL || pmap == pmap_kernel());
3281
3282 if (pmap)
3283 pmap_reference(pmap);
3284 else
3285 pmap = pmap_create();
3286
3287 uvm_map_setup(&vm->vm_map, pmap, min, max,
3288 (pageable ? VM_MAP_PAGEABLE : 0) | VM_MAP_ISVMSPACE);
3289
3290 vm->vm_refcnt = 1;
3291
3292 if (remove_holes)
3293 pmap_remove_holes(vm);
3294 }
3295
3296 /*
3297 * uvmspace_share: share a vmspace between two processes
3298 *
3299 * - used for vfork
3300 */
3301
3302 struct vmspace *
uvmspace_share(struct process * pr)3303 uvmspace_share(struct process *pr)
3304 {
3305 struct vmspace *vm = pr->ps_vmspace;
3306
3307 uvmspace_addref(vm);
3308 return vm;
3309 }
3310
3311 /*
3312 * uvmspace_exec: the process wants to exec a new program
3313 *
3314 * - XXX: no locking on vmspace
3315 */
3316
3317 void
uvmspace_exec(struct proc * p,vaddr_t start,vaddr_t end)3318 uvmspace_exec(struct proc *p, vaddr_t start, vaddr_t end)
3319 {
3320 struct process *pr = p->p_p;
3321 struct vmspace *nvm, *ovm = pr->ps_vmspace;
3322 struct vm_map *map = &ovm->vm_map;
3323 struct uvm_map_deadq dead_entries;
3324
3325 KASSERT((start & (vaddr_t)PAGE_MASK) == 0);
3326 KASSERT((end & (vaddr_t)PAGE_MASK) == 0 ||
3327 (end & (vaddr_t)PAGE_MASK) == (vaddr_t)PAGE_MASK);
3328
3329 pmap_unuse_final(p); /* before stack addresses go away */
3330 TAILQ_INIT(&dead_entries);
3331
3332 /* see if more than one process is using this vmspace... */
3333 if (ovm->vm_refcnt == 1) {
3334 /*
3335 * If pr is the only process using its vmspace then
3336 * we can safely recycle that vmspace for the program
3337 * that is being exec'd.
3338 */
3339
3340 #ifdef SYSVSHM
3341 /*
3342 * SYSV SHM semantics require us to kill all segments on an exec
3343 */
3344 if (ovm->vm_shm)
3345 shmexit(ovm);
3346 #endif
3347
3348 /*
3349 * POSIX 1003.1b -- "lock future mappings" is revoked
3350 * when a process execs another program image.
3351 */
3352 vm_map_lock(map);
3353 vm_map_modflags(map, 0, VM_MAP_WIREFUTURE |
3354 VM_MAP_PINSYSCALL_ONCE);
3355
3356 /*
3357 * now unmap the old program
3358 *
3359 * Instead of attempting to keep the map valid, we simply
3360 * nuke all entries and ask uvm_map_setup to reinitialize
3361 * the map to the new boundaries.
3362 *
3363 * uvm_unmap_remove will actually nuke all entries for us
3364 * (as in, not replace them with free-memory entries).
3365 */
3366 uvm_unmap_remove(map, map->min_offset, map->max_offset,
3367 &dead_entries, TRUE, FALSE, FALSE);
3368
3369 KDASSERT(RBT_EMPTY(uvm_map_addr, &map->addr));
3370
3371 /* Nuke statistics and boundaries. */
3372 memset(&ovm->vm_startcopy, 0,
3373 (caddr_t) (ovm + 1) - (caddr_t) &ovm->vm_startcopy);
3374
3375
3376 if (end & (vaddr_t)PAGE_MASK) {
3377 end += 1;
3378 if (end == 0) /* overflow */
3379 end -= PAGE_SIZE;
3380 }
3381
3382 /* Setup new boundaries and populate map with entries. */
3383 map->min_offset = start;
3384 map->max_offset = end;
3385 uvm_map_setup_entries(map);
3386 vm_map_unlock(map);
3387
3388 /* but keep MMU holes unavailable */
3389 pmap_remove_holes(ovm);
3390 } else {
3391 /*
3392 * pr's vmspace is being shared, so we can't reuse
3393 * it for pr since it is still being used for others.
3394 * allocate a new vmspace for pr
3395 */
3396 nvm = uvmspace_alloc(start, end,
3397 (map->flags & VM_MAP_PAGEABLE) ? TRUE : FALSE, TRUE);
3398
3399 /* install new vmspace and drop our ref to the old one. */
3400 pmap_deactivate(p);
3401 p->p_vmspace = pr->ps_vmspace = nvm;
3402 pmap_activate(p);
3403
3404 uvmspace_free(ovm);
3405 }
3406 #ifdef PMAP_CHECK_COPYIN
3407 p->p_vmspace->vm_map.check_copyin_count = 0; /* disable checks */
3408 #endif
3409
3410 /* Release dead entries */
3411 uvm_unmap_detach(&dead_entries, 0);
3412 }
3413
3414 /*
3415 * uvmspace_addref: add a reference to a vmspace.
3416 */
3417 void
uvmspace_addref(struct vmspace * vm)3418 uvmspace_addref(struct vmspace *vm)
3419 {
3420 KERNEL_ASSERT_LOCKED();
3421 KASSERT(vm->vm_refcnt > 0);
3422
3423 vm->vm_refcnt++;
3424 }
3425
3426 /*
3427 * uvmspace_free: free a vmspace data structure
3428 */
3429 void
uvmspace_free(struct vmspace * vm)3430 uvmspace_free(struct vmspace *vm)
3431 {
3432 KERNEL_ASSERT_LOCKED();
3433
3434 if (--vm->vm_refcnt == 0) {
3435 /*
3436 * lock the map, to wait out all other references to it. delete
3437 * all of the mappings and pages they hold, then call the pmap
3438 * module to reclaim anything left.
3439 */
3440 #ifdef SYSVSHM
3441 /* Get rid of any SYSV shared memory segments. */
3442 if (vm->vm_shm != NULL)
3443 shmexit(vm);
3444 #endif
3445
3446 uvm_map_teardown(&vm->vm_map);
3447 pool_put(&uvm_vmspace_pool, vm);
3448 }
3449 }
3450
3451 /*
3452 * uvm_share: Map the address range [srcaddr, srcaddr + sz) in
3453 * srcmap to the address range [dstaddr, dstaddr + sz) in
3454 * dstmap.
3455 *
3456 * The whole address range in srcmap must be backed by an object
3457 * (no holes).
3458 *
3459 * If successful, the address ranges share memory and the destination
3460 * address range uses the protection flags in prot.
3461 *
3462 * This routine assumes that sz is a multiple of PAGE_SIZE and
3463 * that dstaddr and srcaddr are page-aligned.
3464 */
3465 int
uvm_share(struct vm_map * dstmap,vaddr_t dstaddr,vm_prot_t prot,struct vm_map * srcmap,vaddr_t srcaddr,vsize_t sz)3466 uvm_share(struct vm_map *dstmap, vaddr_t dstaddr, vm_prot_t prot,
3467 struct vm_map *srcmap, vaddr_t srcaddr, vsize_t sz)
3468 {
3469 int ret = 0;
3470 vaddr_t unmap_end;
3471 vaddr_t dstva;
3472 vsize_t s_off, len, n = sz, remain;
3473 struct vm_map_entry *first = NULL, *last = NULL;
3474 struct vm_map_entry *src_entry, *psrc_entry = NULL;
3475 struct uvm_map_deadq dead;
3476
3477 if (srcaddr >= srcmap->max_offset || sz > srcmap->max_offset - srcaddr)
3478 return EINVAL;
3479
3480 TAILQ_INIT(&dead);
3481 vm_map_lock(dstmap);
3482 vm_map_lock_read(srcmap);
3483
3484 if (!uvm_map_isavail(dstmap, NULL, &first, &last, dstaddr, sz)) {
3485 ret = ENOMEM;
3486 goto exit_unlock;
3487 }
3488 if (!uvm_map_lookup_entry(srcmap, srcaddr, &src_entry)) {
3489 ret = EINVAL;
3490 goto exit_unlock;
3491 }
3492
3493 dstva = dstaddr;
3494 unmap_end = dstaddr;
3495 for (; src_entry != NULL;
3496 psrc_entry = src_entry,
3497 src_entry = RBT_NEXT(uvm_map_addr, src_entry)) {
3498 /* hole in address space, bail out */
3499 if (psrc_entry != NULL && psrc_entry->end != src_entry->start)
3500 break;
3501 if (src_entry->start >= srcaddr + sz)
3502 break;
3503
3504 if (UVM_ET_ISSUBMAP(src_entry))
3505 panic("uvm_share: encountered a submap (illegal)");
3506 if (!UVM_ET_ISCOPYONWRITE(src_entry) &&
3507 UVM_ET_ISNEEDSCOPY(src_entry))
3508 panic("uvm_share: non-copy_on_write map entries "
3509 "marked needs_copy (illegal)");
3510
3511 /*
3512 * srcaddr > map entry start? means we are in the middle of a
3513 * map, so we calculate the offset to use in the source map.
3514 */
3515 if (srcaddr > src_entry->start)
3516 s_off = srcaddr - src_entry->start;
3517 else if (srcaddr == src_entry->start)
3518 s_off = 0;
3519 else
3520 panic("uvm_share: map entry start > srcaddr");
3521
3522 remain = src_entry->end - src_entry->start - s_off;
3523
3524 /* Determine how many bytes to share in this pass */
3525 if (n < remain)
3526 len = n;
3527 else
3528 len = remain;
3529
3530 if (uvm_mapent_share(dstmap, dstva, len, s_off, prot, prot,
3531 srcmap, src_entry, &dead) == NULL)
3532 break;
3533
3534 n -= len;
3535 dstva += len;
3536 srcaddr += len;
3537 unmap_end = dstva + len;
3538 if (n == 0)
3539 goto exit_unlock;
3540 }
3541
3542 ret = EINVAL;
3543 uvm_unmap_remove(dstmap, dstaddr, unmap_end, &dead, FALSE, TRUE, FALSE);
3544
3545 exit_unlock:
3546 vm_map_unlock_read(srcmap);
3547 vm_map_unlock(dstmap);
3548 uvm_unmap_detach(&dead, 0);
3549
3550 return ret;
3551 }
3552
3553 /*
3554 * Clone map entry into other map.
3555 *
3556 * Mapping will be placed at dstaddr, for the same length.
3557 * Space must be available.
3558 * Reference counters are incremented.
3559 */
3560 struct vm_map_entry *
uvm_mapent_clone(struct vm_map * dstmap,vaddr_t dstaddr,vsize_t dstlen,vsize_t off,vm_prot_t prot,vm_prot_t maxprot,struct vm_map_entry * old_entry,struct uvm_map_deadq * dead,int mapent_flags,int amap_share_flags)3561 uvm_mapent_clone(struct vm_map *dstmap, vaddr_t dstaddr, vsize_t dstlen,
3562 vsize_t off, vm_prot_t prot, vm_prot_t maxprot,
3563 struct vm_map_entry *old_entry, struct uvm_map_deadq *dead,
3564 int mapent_flags, int amap_share_flags)
3565 {
3566 struct vm_map_entry *new_entry, *first, *last;
3567
3568 KDASSERT(!UVM_ET_ISSUBMAP(old_entry));
3569
3570 /* Create new entry (linked in on creation). Fill in first, last. */
3571 first = last = NULL;
3572 if (!uvm_map_isavail(dstmap, NULL, &first, &last, dstaddr, dstlen)) {
3573 panic("uvm_mapent_clone: no space in map for "
3574 "entry in empty map");
3575 }
3576 new_entry = uvm_map_mkentry(dstmap, first, last,
3577 dstaddr, dstlen, mapent_flags, dead, NULL);
3578 if (new_entry == NULL)
3579 return NULL;
3580 /* old_entry -> new_entry */
3581 new_entry->object = old_entry->object;
3582 new_entry->offset = old_entry->offset;
3583 new_entry->aref = old_entry->aref;
3584 new_entry->etype |= old_entry->etype & ~UVM_ET_FREEMAPPED;
3585 new_entry->protection = prot;
3586 new_entry->max_protection = maxprot;
3587 new_entry->inheritance = old_entry->inheritance;
3588 new_entry->advice = old_entry->advice;
3589
3590 /* gain reference to object backing the map (can't be a submap). */
3591 if (new_entry->aref.ar_amap) {
3592 new_entry->aref.ar_pageoff += off >> PAGE_SHIFT;
3593 amap_ref(new_entry->aref.ar_amap, new_entry->aref.ar_pageoff,
3594 (new_entry->end - new_entry->start) >> PAGE_SHIFT,
3595 amap_share_flags);
3596 }
3597
3598 if (UVM_ET_ISOBJ(new_entry) &&
3599 new_entry->object.uvm_obj->pgops->pgo_reference) {
3600 new_entry->offset += off;
3601 new_entry->object.uvm_obj->pgops->pgo_reference
3602 (new_entry->object.uvm_obj);
3603 }
3604
3605 return new_entry;
3606 }
3607
3608 struct vm_map_entry *
uvm_mapent_share(struct vm_map * dstmap,vaddr_t dstaddr,vsize_t dstlen,vsize_t off,vm_prot_t prot,vm_prot_t maxprot,struct vm_map * old_map,struct vm_map_entry * old_entry,struct uvm_map_deadq * dead)3609 uvm_mapent_share(struct vm_map *dstmap, vaddr_t dstaddr, vsize_t dstlen,
3610 vsize_t off, vm_prot_t prot, vm_prot_t maxprot, struct vm_map *old_map,
3611 struct vm_map_entry *old_entry, struct uvm_map_deadq *dead)
3612 {
3613 /*
3614 * If old_entry refers to a copy-on-write region that has not yet been
3615 * written to (needs_copy flag is set), then we need to allocate a new
3616 * amap for old_entry.
3617 *
3618 * If we do not do this, and the process owning old_entry does a copy-on
3619 * write later, old_entry and new_entry will refer to different memory
3620 * regions, and the memory between the processes is no longer shared.
3621 *
3622 * [in other words, we need to clear needs_copy]
3623 */
3624
3625 if (UVM_ET_ISNEEDSCOPY(old_entry)) {
3626 /* get our own amap, clears needs_copy */
3627 amap_copy(old_map, old_entry, M_WAITOK, FALSE, 0, 0);
3628 /* XXXCDC: WAITOK??? */
3629 }
3630
3631 return uvm_mapent_clone(dstmap, dstaddr, dstlen, off,
3632 prot, maxprot, old_entry, dead, 0, AMAP_SHARED);
3633 }
3634
3635 /*
3636 * share the mapping: this means we want the old and
3637 * new entries to share amaps and backing objects.
3638 */
3639 struct vm_map_entry *
uvm_mapent_forkshared(struct vmspace * new_vm,struct vm_map * new_map,struct vm_map * old_map,struct vm_map_entry * old_entry,struct uvm_map_deadq * dead)3640 uvm_mapent_forkshared(struct vmspace *new_vm, struct vm_map *new_map,
3641 struct vm_map *old_map,
3642 struct vm_map_entry *old_entry, struct uvm_map_deadq *dead)
3643 {
3644 struct vm_map_entry *new_entry;
3645
3646 new_entry = uvm_mapent_share(new_map, old_entry->start,
3647 old_entry->end - old_entry->start, 0, old_entry->protection,
3648 old_entry->max_protection, old_map, old_entry, dead);
3649
3650 return (new_entry);
3651 }
3652
3653 /*
3654 * copy-on-write the mapping (using mmap's
3655 * MAP_PRIVATE semantics)
3656 *
3657 * allocate new_entry, adjust reference counts.
3658 * (note that new references are read-only).
3659 */
3660 struct vm_map_entry *
uvm_mapent_forkcopy(struct vmspace * new_vm,struct vm_map * new_map,struct vm_map * old_map,struct vm_map_entry * old_entry,struct uvm_map_deadq * dead)3661 uvm_mapent_forkcopy(struct vmspace *new_vm, struct vm_map *new_map,
3662 struct vm_map *old_map,
3663 struct vm_map_entry *old_entry, struct uvm_map_deadq *dead)
3664 {
3665 struct vm_map_entry *new_entry;
3666 boolean_t protect_child;
3667
3668 new_entry = uvm_mapent_clone(new_map, old_entry->start,
3669 old_entry->end - old_entry->start, 0, old_entry->protection,
3670 old_entry->max_protection, old_entry, dead, 0, 0);
3671
3672 new_entry->etype |=
3673 (UVM_ET_COPYONWRITE|UVM_ET_NEEDSCOPY);
3674
3675 /*
3676 * the new entry will need an amap. it will either
3677 * need to be copied from the old entry or created
3678 * from scratch (if the old entry does not have an
3679 * amap). can we defer this process until later
3680 * (by setting "needs_copy") or do we need to copy
3681 * the amap now?
3682 *
3683 * we must copy the amap now if any of the following
3684 * conditions hold:
3685 * 1. the old entry has an amap and that amap is
3686 * being shared. this means that the old (parent)
3687 * process is sharing the amap with another
3688 * process. if we do not clear needs_copy here
3689 * we will end up in a situation where both the
3690 * parent and child process are referring to the
3691 * same amap with "needs_copy" set. if the
3692 * parent write-faults, the fault routine will
3693 * clear "needs_copy" in the parent by allocating
3694 * a new amap. this is wrong because the
3695 * parent is supposed to be sharing the old amap
3696 * and the new amap will break that.
3697 *
3698 * 2. if the old entry has an amap and a non-zero
3699 * wire count then we are going to have to call
3700 * amap_cow_now to avoid page faults in the
3701 * parent process. since amap_cow_now requires
3702 * "needs_copy" to be clear we might as well
3703 * clear it here as well.
3704 *
3705 */
3706 if (old_entry->aref.ar_amap != NULL &&
3707 ((amap_flags(old_entry->aref.ar_amap) &
3708 AMAP_SHARED) != 0 ||
3709 VM_MAPENT_ISWIRED(old_entry))) {
3710 amap_copy(new_map, new_entry, M_WAITOK, FALSE,
3711 0, 0);
3712 /* XXXCDC: M_WAITOK ... ok? */
3713 }
3714
3715 /*
3716 * if the parent's entry is wired down, then the
3717 * parent process does not want page faults on
3718 * access to that memory. this means that we
3719 * cannot do copy-on-write because we can't write
3720 * protect the old entry. in this case we
3721 * resolve all copy-on-write faults now, using
3722 * amap_cow_now. note that we have already
3723 * allocated any needed amap (above).
3724 */
3725 if (VM_MAPENT_ISWIRED(old_entry)) {
3726 /*
3727 * resolve all copy-on-write faults now
3728 * (note that there is nothing to do if
3729 * the old mapping does not have an amap).
3730 */
3731 if (old_entry->aref.ar_amap)
3732 amap_cow_now(new_map, new_entry);
3733 } else {
3734 if (old_entry->aref.ar_amap) {
3735 /*
3736 * setup mappings to trigger copy-on-write faults
3737 * we must write-protect the parent if it has
3738 * an amap and it is not already "needs_copy"...
3739 * if it is already "needs_copy" then the parent
3740 * has already been write-protected by a previous
3741 * fork operation.
3742 *
3743 * if we do not write-protect the parent, then
3744 * we must be sure to write-protect the child.
3745 */
3746 if (!UVM_ET_ISNEEDSCOPY(old_entry)) {
3747 if (old_entry->max_protection & PROT_WRITE) {
3748 uvm_map_lock_entry(old_entry);
3749 pmap_protect(old_map->pmap,
3750 old_entry->start,
3751 old_entry->end,
3752 old_entry->protection &
3753 ~PROT_WRITE);
3754 uvm_map_unlock_entry(old_entry);
3755 pmap_update(old_map->pmap);
3756 }
3757 old_entry->etype |= UVM_ET_NEEDSCOPY;
3758 }
3759
3760 /* parent must now be write-protected */
3761 protect_child = FALSE;
3762 } else {
3763 /*
3764 * we only need to protect the child if the
3765 * parent has write access.
3766 */
3767 if (old_entry->max_protection & PROT_WRITE)
3768 protect_child = TRUE;
3769 else
3770 protect_child = FALSE;
3771 }
3772
3773 /* protect the child's mappings if necessary */
3774 if (protect_child) {
3775 pmap_protect(new_map->pmap, new_entry->start,
3776 new_entry->end,
3777 new_entry->protection &
3778 ~PROT_WRITE);
3779 }
3780 }
3781
3782 return (new_entry);
3783 }
3784
3785 /*
3786 * zero the mapping: the new entry will be zero initialized
3787 */
3788 struct vm_map_entry *
uvm_mapent_forkzero(struct vmspace * new_vm,struct vm_map * new_map,struct vm_map * old_map,struct vm_map_entry * old_entry,struct uvm_map_deadq * dead)3789 uvm_mapent_forkzero(struct vmspace *new_vm, struct vm_map *new_map,
3790 struct vm_map *old_map,
3791 struct vm_map_entry *old_entry, struct uvm_map_deadq *dead)
3792 {
3793 struct vm_map_entry *new_entry;
3794
3795 new_entry = uvm_mapent_clone(new_map, old_entry->start,
3796 old_entry->end - old_entry->start, 0, old_entry->protection,
3797 old_entry->max_protection, old_entry, dead, 0, 0);
3798
3799 new_entry->etype |=
3800 (UVM_ET_COPYONWRITE|UVM_ET_NEEDSCOPY);
3801
3802 if (new_entry->aref.ar_amap) {
3803 amap_unref(new_entry->aref.ar_amap, new_entry->aref.ar_pageoff,
3804 atop(new_entry->end - new_entry->start), 0);
3805 new_entry->aref.ar_amap = NULL;
3806 new_entry->aref.ar_pageoff = 0;
3807 }
3808
3809 if (UVM_ET_ISOBJ(new_entry)) {
3810 if (new_entry->object.uvm_obj->pgops->pgo_detach)
3811 new_entry->object.uvm_obj->pgops->pgo_detach(
3812 new_entry->object.uvm_obj);
3813 new_entry->object.uvm_obj = NULL;
3814 new_entry->etype &= ~UVM_ET_OBJ;
3815 }
3816
3817 return (new_entry);
3818 }
3819
3820 /*
3821 * uvmspace_fork: fork a process' main map
3822 *
3823 * => create a new vmspace for child process from parent.
3824 * => parent's map must not be locked.
3825 */
3826 struct vmspace *
uvmspace_fork(struct process * pr)3827 uvmspace_fork(struct process *pr)
3828 {
3829 struct vmspace *vm1 = pr->ps_vmspace;
3830 struct vmspace *vm2;
3831 struct vm_map *old_map = &vm1->vm_map;
3832 struct vm_map *new_map;
3833 struct vm_map_entry *old_entry, *new_entry;
3834 struct uvm_map_deadq dead;
3835
3836 vm_map_lock(old_map);
3837
3838 vm2 = uvmspace_alloc(old_map->min_offset, old_map->max_offset,
3839 (old_map->flags & VM_MAP_PAGEABLE) ? TRUE : FALSE, FALSE);
3840 memcpy(&vm2->vm_startcopy, &vm1->vm_startcopy,
3841 (caddr_t) (vm1 + 1) - (caddr_t) &vm1->vm_startcopy);
3842 vm2->vm_dused = 0; /* Statistic managed by us. */
3843 new_map = &vm2->vm_map;
3844 vm_map_lock(new_map);
3845
3846 /* go entry-by-entry */
3847 TAILQ_INIT(&dead);
3848 RBT_FOREACH(old_entry, uvm_map_addr, &old_map->addr) {
3849 if (old_entry->start == old_entry->end)
3850 continue;
3851
3852 /* first, some sanity checks on the old entry */
3853 if (UVM_ET_ISSUBMAP(old_entry)) {
3854 panic("fork: encountered a submap during fork "
3855 "(illegal)");
3856 }
3857
3858 if (!UVM_ET_ISCOPYONWRITE(old_entry) &&
3859 UVM_ET_ISNEEDSCOPY(old_entry)) {
3860 panic("fork: non-copy_on_write map entry marked "
3861 "needs_copy (illegal)");
3862 }
3863
3864 /* Apply inheritance. */
3865 switch (old_entry->inheritance) {
3866 case MAP_INHERIT_SHARE:
3867 new_entry = uvm_mapent_forkshared(vm2, new_map,
3868 old_map, old_entry, &dead);
3869 break;
3870 case MAP_INHERIT_COPY:
3871 new_entry = uvm_mapent_forkcopy(vm2, new_map,
3872 old_map, old_entry, &dead);
3873 break;
3874 case MAP_INHERIT_ZERO:
3875 new_entry = uvm_mapent_forkzero(vm2, new_map,
3876 old_map, old_entry, &dead);
3877 break;
3878 default:
3879 continue;
3880 }
3881
3882 /* Update process statistics. */
3883 if (!UVM_ET_ISHOLE(new_entry))
3884 new_map->size += new_entry->end - new_entry->start;
3885 if (!UVM_ET_ISOBJ(new_entry) && !UVM_ET_ISHOLE(new_entry) &&
3886 new_entry->protection != PROT_NONE) {
3887 vm2->vm_dused += uvmspace_dused(
3888 new_map, new_entry->start, new_entry->end);
3889 }
3890 }
3891 new_map->flags |= old_map->flags & VM_MAP_PINSYSCALL_ONCE;
3892 #ifdef PMAP_CHECK_COPYIN
3893 if (PMAP_CHECK_COPYIN) {
3894 memcpy(&new_map->check_copyin, &old_map->check_copyin,
3895 sizeof(new_map->check_copyin));
3896 membar_producer();
3897 new_map->check_copyin_count = old_map->check_copyin_count;
3898 }
3899 #endif
3900
3901 vm_map_unlock(old_map);
3902 vm_map_unlock(new_map);
3903
3904 /*
3905 * This can actually happen, if multiple entries described a
3906 * space in which an entry was inherited.
3907 */
3908 uvm_unmap_detach(&dead, 0);
3909
3910 #ifdef SYSVSHM
3911 if (vm1->vm_shm)
3912 shmfork(vm1, vm2);
3913 #endif
3914
3915 return vm2;
3916 }
3917
3918 /*
3919 * uvm_map_hint: return the beginning of the best area suitable for
3920 * creating a new mapping with "prot" protection.
3921 */
3922 vaddr_t
uvm_map_hint(struct vmspace * vm,vm_prot_t prot,vaddr_t minaddr,vaddr_t maxaddr)3923 uvm_map_hint(struct vmspace *vm, vm_prot_t prot, vaddr_t minaddr,
3924 vaddr_t maxaddr)
3925 {
3926 vaddr_t addr;
3927 vaddr_t spacing;
3928
3929 #ifdef __i386__
3930 /*
3931 * If executable skip first two pages, otherwise start
3932 * after data + heap region.
3933 */
3934 if ((prot & PROT_EXEC) != 0 &&
3935 (vaddr_t)vm->vm_daddr >= I386_MAX_EXE_ADDR) {
3936 addr = (PAGE_SIZE*2) +
3937 (arc4random() & (I386_MAX_EXE_ADDR / 2 - 1));
3938 return (round_page(addr));
3939 }
3940 #endif
3941
3942 #if defined (__LP64__)
3943 spacing = MIN(4UL * 1024 * 1024 * 1024, MAXDSIZ) - 1;
3944 #else
3945 spacing = MIN(1 * 1024 * 1024 * 1024, MAXDSIZ) - 1;
3946 #endif
3947
3948 /*
3949 * Start malloc/mmap after the brk.
3950 */
3951 addr = (vaddr_t)vm->vm_daddr + BRKSIZ;
3952 addr = MAX(addr, minaddr);
3953
3954 if (addr < maxaddr) {
3955 while (spacing > maxaddr - addr)
3956 spacing >>= 1;
3957 }
3958 addr += arc4random() & spacing;
3959 return (round_page(addr));
3960 }
3961
3962 /*
3963 * uvm_map_submap: punch down part of a map into a submap
3964 *
3965 * => only the kernel_map is allowed to be submapped
3966 * => the purpose of submapping is to break up the locking granularity
3967 * of a larger map
3968 * => the range specified must have been mapped previously with a uvm_map()
3969 * call [with uobj==NULL] to create a blank map entry in the main map.
3970 * [And it had better still be blank!]
3971 * => maps which contain submaps should never be copied or forked.
3972 * => to remove a submap, use uvm_unmap() on the main map
3973 * and then uvm_map_deallocate() the submap.
3974 * => main map must be unlocked.
3975 * => submap must have been init'd and have a zero reference count.
3976 * [need not be locked as we don't actually reference it]
3977 */
3978 int
uvm_map_submap(struct vm_map * map,vaddr_t start,vaddr_t end,struct vm_map * submap)3979 uvm_map_submap(struct vm_map *map, vaddr_t start, vaddr_t end,
3980 struct vm_map *submap)
3981 {
3982 struct vm_map_entry *entry;
3983 int result;
3984
3985 if (start > map->max_offset || end > map->max_offset ||
3986 start < map->min_offset || end < map->min_offset)
3987 return EINVAL;
3988
3989 vm_map_lock(map);
3990
3991 if (uvm_map_lookup_entry(map, start, &entry)) {
3992 UVM_MAP_CLIP_START(map, entry, start);
3993 UVM_MAP_CLIP_END(map, entry, end);
3994 } else
3995 entry = NULL;
3996
3997 if (entry != NULL &&
3998 entry->start == start && entry->end == end &&
3999 entry->object.uvm_obj == NULL && entry->aref.ar_amap == NULL &&
4000 !UVM_ET_ISCOPYONWRITE(entry) && !UVM_ET_ISNEEDSCOPY(entry)) {
4001 entry->etype |= UVM_ET_SUBMAP;
4002 entry->object.sub_map = submap;
4003 entry->offset = 0;
4004 uvm_map_reference(submap);
4005 result = 0;
4006 } else
4007 result = EINVAL;
4008
4009 vm_map_unlock(map);
4010 return result;
4011 }
4012
4013 /*
4014 * uvm_map_checkprot: check protection in map
4015 *
4016 * => must allow specific protection in a fully allocated region.
4017 * => map must be read or write locked by caller.
4018 */
4019 boolean_t
uvm_map_checkprot(struct vm_map * map,vaddr_t start,vaddr_t end,vm_prot_t protection)4020 uvm_map_checkprot(struct vm_map *map, vaddr_t start, vaddr_t end,
4021 vm_prot_t protection)
4022 {
4023 struct vm_map_entry *entry;
4024
4025 vm_map_assert_anylock(map);
4026
4027 if (start < map->min_offset || end > map->max_offset || start > end)
4028 return FALSE;
4029 if (start == end)
4030 return TRUE;
4031
4032 /*
4033 * Iterate entries.
4034 */
4035 for (entry = uvm_map_entrybyaddr(&map->addr, start);
4036 entry != NULL && entry->start < end;
4037 entry = RBT_NEXT(uvm_map_addr, entry)) {
4038 /* Fail if a hole is found. */
4039 if (UVM_ET_ISHOLE(entry) ||
4040 (entry->end < end && entry->end != VMMAP_FREE_END(entry)))
4041 return FALSE;
4042
4043 /* Check protection. */
4044 if ((entry->protection & protection) != protection)
4045 return FALSE;
4046 }
4047 return TRUE;
4048 }
4049
4050 /*
4051 * uvm_map_create: create map
4052 */
4053 vm_map_t
uvm_map_create(pmap_t pmap,vaddr_t min,vaddr_t max,int flags)4054 uvm_map_create(pmap_t pmap, vaddr_t min, vaddr_t max, int flags)
4055 {
4056 vm_map_t map;
4057
4058 map = malloc(sizeof *map, M_VMMAP, M_WAITOK);
4059 uvm_map_setup(map, pmap, min, max, flags);
4060 return (map);
4061 }
4062
4063 /*
4064 * uvm_map_deallocate: drop reference to a map
4065 *
4066 * => caller must not lock map
4067 * => we will zap map if ref count goes to zero
4068 */
4069 void
uvm_map_deallocate(vm_map_t map)4070 uvm_map_deallocate(vm_map_t map)
4071 {
4072 int c;
4073 struct uvm_map_deadq dead;
4074
4075 c = atomic_dec_int_nv(&map->ref_count);
4076 if (c > 0) {
4077 return;
4078 }
4079
4080 /*
4081 * all references gone. unmap and free.
4082 *
4083 * No lock required: we are only one to access this map.
4084 */
4085 TAILQ_INIT(&dead);
4086 uvm_tree_sanity(map, __FILE__, __LINE__);
4087 vm_map_lock(map);
4088 uvm_unmap_remove(map, map->min_offset, map->max_offset, &dead,
4089 TRUE, FALSE, FALSE);
4090 vm_map_unlock(map);
4091 pmap_destroy(map->pmap);
4092 KASSERT(RBT_EMPTY(uvm_map_addr, &map->addr));
4093 free(map, M_VMMAP, sizeof *map);
4094
4095 uvm_unmap_detach(&dead, 0);
4096 }
4097
4098 /*
4099 * uvm_map_inherit: set inheritance code for range of addrs in map.
4100 *
4101 * => map must be unlocked
4102 * => note that the inherit code is used during a "fork". see fork
4103 * code for details.
4104 */
4105 int
uvm_map_inherit(struct vm_map * map,vaddr_t start,vaddr_t end,vm_inherit_t new_inheritance)4106 uvm_map_inherit(struct vm_map *map, vaddr_t start, vaddr_t end,
4107 vm_inherit_t new_inheritance)
4108 {
4109 struct vm_map_entry *entry, *entry1;
4110 int error = EPERM;
4111
4112 switch (new_inheritance) {
4113 case MAP_INHERIT_NONE:
4114 case MAP_INHERIT_COPY:
4115 case MAP_INHERIT_SHARE:
4116 case MAP_INHERIT_ZERO:
4117 break;
4118 default:
4119 return (EINVAL);
4120 }
4121
4122 if (start > end)
4123 return EINVAL;
4124 start = MAX(start, map->min_offset);
4125 end = MIN(end, map->max_offset);
4126 if (start >= end)
4127 return 0;
4128
4129 vm_map_lock(map);
4130
4131 entry = uvm_map_entrybyaddr(&map->addr, start);
4132 if (entry->end > start)
4133 UVM_MAP_CLIP_START(map, entry, start);
4134 else
4135 entry = RBT_NEXT(uvm_map_addr, entry);
4136
4137 /* First check for illegal operations */
4138 entry1 = entry;
4139 while (entry1 != NULL && entry1->start < end) {
4140 if (entry1->etype & UVM_ET_IMMUTABLE)
4141 goto out;
4142 if (new_inheritance == MAP_INHERIT_ZERO &&
4143 (entry1->protection & PROT_WRITE) == 0)
4144 goto out;
4145 entry1 = RBT_NEXT(uvm_map_addr, entry1);
4146 }
4147
4148 while (entry != NULL && entry->start < end) {
4149 UVM_MAP_CLIP_END(map, entry, end);
4150 entry->inheritance = new_inheritance;
4151 entry = RBT_NEXT(uvm_map_addr, entry);
4152 }
4153
4154 error = 0;
4155 out:
4156 vm_map_unlock(map);
4157 return (error);
4158 }
4159
4160 #ifdef PMAP_CHECK_COPYIN
4161 static void inline
check_copyin_add(struct vm_map * map,vaddr_t start,vaddr_t end)4162 check_copyin_add(struct vm_map *map, vaddr_t start, vaddr_t end)
4163 {
4164 if (PMAP_CHECK_COPYIN == 0 ||
4165 map->check_copyin_count >= UVM_MAP_CHECK_COPYIN_MAX)
4166 return;
4167 vm_map_assert_wrlock(map);
4168 map->check_copyin[map->check_copyin_count].start = start;
4169 map->check_copyin[map->check_copyin_count].end = end;
4170 membar_producer();
4171 map->check_copyin_count++;
4172 }
4173
4174 /*
4175 * uvm_map_check_copyin_add: remember regions which are X-only for copyin(),
4176 * copyinstr(), uiomove(), and others
4177 *
4178 * => map must be unlocked
4179 */
4180 int
uvm_map_check_copyin_add(struct vm_map * map,vaddr_t start,vaddr_t end)4181 uvm_map_check_copyin_add(struct vm_map *map, vaddr_t start, vaddr_t end)
4182 {
4183 if (start > end)
4184 return EINVAL;
4185 start = MAX(start, map->min_offset);
4186 end = MIN(end, map->max_offset);
4187 if (start >= end)
4188 return 0;
4189 vm_map_lock(map);
4190 check_copyin_add(map, start, end);
4191 vm_map_unlock(map);
4192 return (0);
4193 }
4194 #endif /* PMAP_CHECK_COPYIN */
4195
4196 /*
4197 * uvm_map_immutable: block mapping/mprotect for range of addrs in map.
4198 *
4199 * => map must be unlocked
4200 */
4201 int
uvm_map_immutable(struct vm_map * map,vaddr_t start,vaddr_t end,int imut)4202 uvm_map_immutable(struct vm_map *map, vaddr_t start, vaddr_t end, int imut)
4203 {
4204 struct vm_map_entry *entry, *entry1;
4205 int error = EPERM;
4206
4207 if (start > end)
4208 return EINVAL;
4209 start = MAX(start, map->min_offset);
4210 end = MIN(end, map->max_offset);
4211 if (start >= end)
4212 return 0;
4213
4214 vm_map_lock(map);
4215
4216 entry = uvm_map_entrybyaddr(&map->addr, start);
4217 if (entry->end > start)
4218 UVM_MAP_CLIP_START(map, entry, start);
4219 else
4220 entry = RBT_NEXT(uvm_map_addr, entry);
4221
4222 /* First check for illegal operations */
4223 entry1 = entry;
4224 while (entry1 != NULL && entry1->start < end) {
4225 if (entry1->inheritance == MAP_INHERIT_ZERO)
4226 goto out;
4227 entry1 = RBT_NEXT(uvm_map_addr, entry1);
4228 }
4229
4230 while (entry != NULL && entry->start < end) {
4231 UVM_MAP_CLIP_END(map, entry, end);
4232 if (imut)
4233 entry->etype |= UVM_ET_IMMUTABLE;
4234 else
4235 entry->etype &= ~UVM_ET_IMMUTABLE;
4236 entry = RBT_NEXT(uvm_map_addr, entry);
4237 }
4238 error = 0;
4239 out:
4240 vm_map_unlock(map);
4241 return (0);
4242 }
4243
4244 /*
4245 * uvm_map_advice: set advice code for range of addrs in map.
4246 *
4247 * => map must be unlocked
4248 */
4249 int
uvm_map_advice(struct vm_map * map,vaddr_t start,vaddr_t end,int new_advice)4250 uvm_map_advice(struct vm_map *map, vaddr_t start, vaddr_t end, int new_advice)
4251 {
4252 struct vm_map_entry *entry;
4253
4254 switch (new_advice) {
4255 case MADV_NORMAL:
4256 case MADV_RANDOM:
4257 case MADV_SEQUENTIAL:
4258 break;
4259 default:
4260 return (EINVAL);
4261 }
4262
4263 if (start > end)
4264 return EINVAL;
4265 start = MAX(start, map->min_offset);
4266 end = MIN(end, map->max_offset);
4267 if (start >= end)
4268 return 0;
4269
4270 vm_map_lock(map);
4271
4272 entry = uvm_map_entrybyaddr(&map->addr, start);
4273 if (entry != NULL && entry->end > start)
4274 UVM_MAP_CLIP_START(map, entry, start);
4275 else if (entry!= NULL)
4276 entry = RBT_NEXT(uvm_map_addr, entry);
4277
4278 /*
4279 * XXXJRT: disallow holes?
4280 */
4281 while (entry != NULL && entry->start < end) {
4282 UVM_MAP_CLIP_END(map, entry, end);
4283 entry->advice = new_advice;
4284 entry = RBT_NEXT(uvm_map_addr, entry);
4285 }
4286
4287 vm_map_unlock(map);
4288 return (0);
4289 }
4290
4291 /*
4292 * uvm_map_extract: extract a mapping from a map and put it somewhere
4293 * in the kernel_map, setting protection to max_prot.
4294 *
4295 * => map should be unlocked (we will write lock it and kernel_map)
4296 * => returns 0 on success, error code otherwise
4297 * => start must be page aligned
4298 * => len must be page sized
4299 * => flags:
4300 * UVM_EXTRACT_FIXPROT: set prot to maxprot as we go
4301 * Mappings are QREF's.
4302 */
4303 int
uvm_map_extract(struct vm_map * srcmap,vaddr_t start,vsize_t len,vaddr_t * dstaddrp,int flags)4304 uvm_map_extract(struct vm_map *srcmap, vaddr_t start, vsize_t len,
4305 vaddr_t *dstaddrp, int flags)
4306 {
4307 struct uvm_map_deadq dead;
4308 struct vm_map_entry *first, *entry, *newentry, *tmp1, *tmp2;
4309 vaddr_t dstaddr;
4310 vaddr_t end;
4311 vaddr_t cp_start;
4312 vsize_t cp_len, cp_off;
4313 int error;
4314
4315 TAILQ_INIT(&dead);
4316 end = start + len;
4317
4318 /*
4319 * Sanity check on the parameters.
4320 * Also, since the mapping may not contain gaps, error out if the
4321 * mapped area is not in source map.
4322 */
4323 if ((start & (vaddr_t)PAGE_MASK) != 0 ||
4324 (end & (vaddr_t)PAGE_MASK) != 0 || end < start)
4325 return EINVAL;
4326 if (start < srcmap->min_offset || end > srcmap->max_offset)
4327 return EINVAL;
4328
4329 /* Initialize dead entries. Handle len == 0 case. */
4330 if (len == 0)
4331 return 0;
4332
4333 /* Acquire lock on srcmap. */
4334 vm_map_lock(srcmap);
4335
4336 /* Lock srcmap, lookup first and last entry in <start,len>. */
4337 first = uvm_map_entrybyaddr(&srcmap->addr, start);
4338
4339 /* Check that the range is contiguous. */
4340 for (entry = first; entry != NULL && entry->end < end;
4341 entry = RBT_NEXT(uvm_map_addr, entry)) {
4342 if (VMMAP_FREE_END(entry) != entry->end ||
4343 UVM_ET_ISHOLE(entry)) {
4344 error = EINVAL;
4345 goto fail;
4346 }
4347 }
4348 if (entry == NULL || UVM_ET_ISHOLE(entry)) {
4349 error = EINVAL;
4350 goto fail;
4351 }
4352
4353 /*
4354 * Handle need-copy flag.
4355 */
4356 for (entry = first; entry != NULL && entry->start < end;
4357 entry = RBT_NEXT(uvm_map_addr, entry)) {
4358 if (UVM_ET_ISNEEDSCOPY(entry))
4359 amap_copy(srcmap, entry, M_NOWAIT,
4360 UVM_ET_ISSTACK(entry) ? FALSE : TRUE, start, end);
4361 if (UVM_ET_ISNEEDSCOPY(entry)) {
4362 /*
4363 * amap_copy failure
4364 */
4365 error = ENOMEM;
4366 goto fail;
4367 }
4368 }
4369
4370 /* Lock destination map (kernel_map). */
4371 vm_map_lock(kernel_map);
4372
4373 if (uvm_map_findspace(kernel_map, &tmp1, &tmp2, &dstaddr, len,
4374 MAX(PAGE_SIZE, PMAP_PREFER_ALIGN()), PMAP_PREFER_OFFSET(start),
4375 PROT_NONE, 0) != 0) {
4376 error = ENOMEM;
4377 goto fail2;
4378 }
4379 *dstaddrp = dstaddr;
4380
4381 /*
4382 * We now have srcmap and kernel_map locked.
4383 * dstaddr contains the destination offset in dstmap.
4384 */
4385 /* step 1: start looping through map entries, performing extraction. */
4386 for (entry = first; entry != NULL && entry->start < end;
4387 entry = RBT_NEXT(uvm_map_addr, entry)) {
4388 KDASSERT(!UVM_ET_ISNEEDSCOPY(entry));
4389 if (UVM_ET_ISHOLE(entry))
4390 continue;
4391
4392 /* Calculate uvm_mapent_clone parameters. */
4393 cp_start = entry->start;
4394 if (cp_start < start) {
4395 cp_off = start - cp_start;
4396 cp_start = start;
4397 } else
4398 cp_off = 0;
4399 cp_len = MIN(entry->end, end) - cp_start;
4400
4401 newentry = uvm_mapent_clone(kernel_map,
4402 cp_start - start + dstaddr, cp_len, cp_off,
4403 entry->protection, entry->max_protection,
4404 entry, &dead, flags, AMAP_SHARED | AMAP_REFALL);
4405 if (newentry == NULL) {
4406 error = ENOMEM;
4407 goto fail2_unmap;
4408 }
4409 kernel_map->size += cp_len;
4410
4411 /* Figure out the best protection */
4412 if ((flags & UVM_EXTRACT_FIXPROT) &&
4413 newentry->protection != PROT_NONE)
4414 newentry->protection = newentry->max_protection;
4415 newentry->protection &= ~PROT_EXEC;
4416 }
4417 pmap_update(kernel_map->pmap);
4418
4419 error = 0;
4420
4421 /* Unmap copied entries on failure. */
4422 fail2_unmap:
4423 if (error) {
4424 uvm_unmap_remove(kernel_map, dstaddr, dstaddr + len, &dead,
4425 FALSE, TRUE, FALSE);
4426 }
4427
4428 /* Release maps, release dead entries. */
4429 fail2:
4430 vm_map_unlock(kernel_map);
4431
4432 fail:
4433 vm_map_unlock(srcmap);
4434
4435 uvm_unmap_detach(&dead, 0);
4436
4437 return error;
4438 }
4439
4440 /*
4441 * uvm_map_clean: clean out a map range
4442 *
4443 * => valid flags:
4444 * if (flags & PGO_CLEANIT): dirty pages are cleaned first
4445 * if (flags & PGO_SYNCIO): dirty pages are written synchronously
4446 * if (flags & PGO_DEACTIVATE): any cached pages are deactivated after clean
4447 * if (flags & PGO_FREE): any cached pages are freed after clean
4448 * => returns an error if any part of the specified range isn't mapped
4449 * => never a need to flush amap layer since the anonymous memory has
4450 * no permanent home, but may deactivate pages there
4451 * => called from sys_msync() and sys_madvise()
4452 * => caller must not have map locked
4453 */
4454
4455 int
uvm_map_clean(struct vm_map * map,vaddr_t start,vaddr_t end,int flags)4456 uvm_map_clean(struct vm_map *map, vaddr_t start, vaddr_t end, int flags)
4457 {
4458 struct vm_map_entry *first, *entry;
4459 struct vm_amap *amap;
4460 struct vm_anon *anon;
4461 struct vm_page *pg;
4462 struct uvm_object *uobj;
4463 vaddr_t cp_start, cp_end;
4464 int refs, imut = 0;
4465 int error;
4466 boolean_t rv;
4467
4468 KASSERT((flags & (PGO_FREE|PGO_DEACTIVATE)) !=
4469 (PGO_FREE|PGO_DEACTIVATE));
4470
4471 if (start > end || start < map->min_offset || end > map->max_offset)
4472 return EINVAL;
4473
4474 vm_map_lock(map);
4475 first = uvm_map_entrybyaddr(&map->addr, start);
4476
4477 /* Make a first pass to check for various conditions. */
4478 for (entry = first; entry != NULL && entry->start < end;
4479 entry = RBT_NEXT(uvm_map_addr, entry)) {
4480 if (entry->etype & UVM_ET_IMMUTABLE)
4481 imut = 1;
4482 if (UVM_ET_ISSUBMAP(entry)) {
4483 vm_map_unlock(map);
4484 return EINVAL;
4485 }
4486 if (UVM_ET_ISSUBMAP(entry) ||
4487 UVM_ET_ISHOLE(entry) ||
4488 (entry->end < end &&
4489 VMMAP_FREE_END(entry) != entry->end)) {
4490 vm_map_unlock(map);
4491 return EFAULT;
4492 }
4493 }
4494
4495 vm_map_busy(map);
4496 vm_map_unlock(map);
4497 error = 0;
4498 for (entry = first; entry != NULL && entry->start < end;
4499 entry = RBT_NEXT(uvm_map_addr, entry)) {
4500 amap = entry->aref.ar_amap; /* top layer */
4501 if (UVM_ET_ISOBJ(entry))
4502 uobj = entry->object.uvm_obj;
4503 else
4504 uobj = NULL;
4505
4506 /*
4507 * No amap cleaning necessary if:
4508 * - there's no amap
4509 * - we're not deactivating or freeing pages.
4510 */
4511 if (amap == NULL || (flags & (PGO_DEACTIVATE|PGO_FREE)) == 0)
4512 goto flush_object;
4513
4514 if (imut) {
4515 vm_map_unbusy(map);
4516 return EPERM;
4517 }
4518
4519 cp_start = MAX(entry->start, start);
4520 cp_end = MIN(entry->end, end);
4521
4522 amap_lock(amap);
4523 for (; cp_start != cp_end; cp_start += PAGE_SIZE) {
4524 anon = amap_lookup(&entry->aref,
4525 cp_start - entry->start);
4526 if (anon == NULL)
4527 continue;
4528
4529 KASSERT(anon->an_lock == amap->am_lock);
4530 pg = anon->an_page;
4531 if (pg == NULL) {
4532 continue;
4533 }
4534 KASSERT(pg->pg_flags & PQ_ANON);
4535
4536 switch (flags & (PGO_CLEANIT|PGO_FREE|PGO_DEACTIVATE)) {
4537 /*
4538 * XXX In these first 3 cases, we always just
4539 * XXX deactivate the page. We may want to
4540 * XXX handle the different cases more
4541 * XXX specifically, in the future.
4542 */
4543 case PGO_CLEANIT|PGO_FREE:
4544 case PGO_CLEANIT|PGO_DEACTIVATE:
4545 case PGO_DEACTIVATE:
4546 deactivate_it:
4547 /* skip the page if it's wired */
4548 if (pg->wire_count != 0)
4549 break;
4550
4551 uvm_lock_pageq();
4552
4553 KASSERT(pg->uanon == anon);
4554
4555 /* zap all mappings for the page. */
4556 pmap_page_protect(pg, PROT_NONE);
4557
4558 /* ...and deactivate the page. */
4559 uvm_pagedeactivate(pg);
4560
4561 uvm_unlock_pageq();
4562 break;
4563 case PGO_FREE:
4564 /*
4565 * If there are multiple references to
4566 * the amap, just deactivate the page.
4567 */
4568 if (amap_refs(amap) > 1)
4569 goto deactivate_it;
4570
4571 /* XXX skip the page if it's wired */
4572 if (pg->wire_count != 0) {
4573 break;
4574 }
4575 amap_unadd(&entry->aref,
4576 cp_start - entry->start);
4577 refs = --anon->an_ref;
4578 if (refs == 0)
4579 uvm_anfree(anon);
4580 break;
4581 default:
4582 panic("uvm_map_clean: weird flags");
4583 }
4584 }
4585 amap_unlock(amap);
4586
4587 flush_object:
4588 cp_start = MAX(entry->start, start);
4589 cp_end = MIN(entry->end, end);
4590
4591 /*
4592 * flush pages if we've got a valid backing object.
4593 *
4594 * Don't PGO_FREE if we don't have write permission
4595 * and don't flush if this is a copy-on-write object
4596 * since we can't know our permissions on it.
4597 */
4598 if (uobj != NULL &&
4599 ((flags & PGO_FREE) == 0 ||
4600 ((entry->max_protection & PROT_WRITE) != 0 &&
4601 (entry->etype & UVM_ET_COPYONWRITE) == 0))) {
4602 rw_enter(uobj->vmobjlock, RW_WRITE);
4603 rv = uobj->pgops->pgo_flush(uobj,
4604 cp_start - entry->start + entry->offset,
4605 cp_end - entry->start + entry->offset, flags);
4606 rw_exit(uobj->vmobjlock);
4607
4608 if (rv == FALSE)
4609 error = EFAULT;
4610 }
4611 }
4612
4613 vm_map_unbusy(map);
4614 return error;
4615 }
4616
4617 /*
4618 * UVM_MAP_CLIP_END implementation
4619 */
4620 void
uvm_map_clip_end(struct vm_map * map,struct vm_map_entry * entry,vaddr_t addr)4621 uvm_map_clip_end(struct vm_map *map, struct vm_map_entry *entry, vaddr_t addr)
4622 {
4623 struct vm_map_entry *tmp;
4624
4625 KASSERT(entry->start < addr && VMMAP_FREE_END(entry) > addr);
4626 tmp = uvm_mapent_alloc(map, 0);
4627
4628 /* Invoke splitentry. */
4629 uvm_map_splitentry(map, entry, tmp, addr);
4630 }
4631
4632 /*
4633 * UVM_MAP_CLIP_START implementation
4634 *
4635 * Clippers are required to not change the pointers to the entry they are
4636 * clipping on.
4637 * Since uvm_map_splitentry turns the original entry into the lowest
4638 * entry (address wise) we do a swap between the new entry and the original
4639 * entry, prior to calling uvm_map_splitentry.
4640 */
4641 void
uvm_map_clip_start(struct vm_map * map,struct vm_map_entry * entry,vaddr_t addr)4642 uvm_map_clip_start(struct vm_map *map, struct vm_map_entry *entry, vaddr_t addr)
4643 {
4644 struct vm_map_entry *tmp;
4645 struct uvm_addr_state *free;
4646
4647 /* Unlink original. */
4648 free = uvm_map_uaddr_e(map, entry);
4649 uvm_mapent_free_remove(map, free, entry);
4650 uvm_mapent_addr_remove(map, entry);
4651
4652 /* Copy entry. */
4653 KASSERT(entry->start < addr && VMMAP_FREE_END(entry) > addr);
4654 tmp = uvm_mapent_alloc(map, 0);
4655 uvm_mapent_copy(entry, tmp);
4656
4657 /* Put new entry in place of original entry. */
4658 uvm_mapent_addr_insert(map, tmp);
4659 uvm_mapent_free_insert(map, free, tmp);
4660
4661 /* Invoke splitentry. */
4662 uvm_map_splitentry(map, tmp, entry, addr);
4663 }
4664
4665 /*
4666 * Boundary fixer.
4667 */
4668 static inline vaddr_t uvm_map_boundfix(vaddr_t, vaddr_t, vaddr_t);
4669 static inline vaddr_t
uvm_map_boundfix(vaddr_t min,vaddr_t max,vaddr_t bound)4670 uvm_map_boundfix(vaddr_t min, vaddr_t max, vaddr_t bound)
4671 {
4672 return (min < bound && max > bound) ? bound : max;
4673 }
4674
4675 /*
4676 * Choose free list based on address at start of free space.
4677 *
4678 * The uvm_addr_state returned contains addr and is the first of:
4679 * - uaddr_exe
4680 * - uaddr_brk_stack
4681 * - uaddr_any
4682 */
4683 struct uvm_addr_state*
uvm_map_uaddr(struct vm_map * map,vaddr_t addr)4684 uvm_map_uaddr(struct vm_map *map, vaddr_t addr)
4685 {
4686 struct uvm_addr_state *uaddr;
4687 int i;
4688
4689 /* Special case the first page, to prevent mmap from returning 0. */
4690 if (addr < VMMAP_MIN_ADDR)
4691 return NULL;
4692
4693 /* Upper bound for kernel maps at uvm_maxkaddr. */
4694 if ((map->flags & VM_MAP_ISVMSPACE) == 0) {
4695 if (addr >= uvm_maxkaddr)
4696 return NULL;
4697 }
4698
4699 /* Is the address inside the exe-only map? */
4700 if (map->uaddr_exe != NULL && addr >= map->uaddr_exe->uaddr_minaddr &&
4701 addr < map->uaddr_exe->uaddr_maxaddr)
4702 return map->uaddr_exe;
4703
4704 /* Check if the space falls inside brk/stack area. */
4705 if ((addr >= map->b_start && addr < map->b_end) ||
4706 (addr >= map->s_start && addr < map->s_end)) {
4707 if (map->uaddr_brk_stack != NULL &&
4708 addr >= map->uaddr_brk_stack->uaddr_minaddr &&
4709 addr < map->uaddr_brk_stack->uaddr_maxaddr) {
4710 return map->uaddr_brk_stack;
4711 } else
4712 return NULL;
4713 }
4714
4715 /*
4716 * Check the other selectors.
4717 *
4718 * These selectors are only marked as the owner, if they have insert
4719 * functions.
4720 */
4721 for (i = 0; i < nitems(map->uaddr_any); i++) {
4722 uaddr = map->uaddr_any[i];
4723 if (uaddr == NULL)
4724 continue;
4725 if (uaddr->uaddr_functions->uaddr_free_insert == NULL)
4726 continue;
4727
4728 if (addr >= uaddr->uaddr_minaddr &&
4729 addr < uaddr->uaddr_maxaddr)
4730 return uaddr;
4731 }
4732
4733 return NULL;
4734 }
4735
4736 /*
4737 * Choose free list based on address at start of free space.
4738 *
4739 * The uvm_addr_state returned contains addr and is the first of:
4740 * - uaddr_exe
4741 * - uaddr_brk_stack
4742 * - uaddr_any
4743 */
4744 struct uvm_addr_state*
uvm_map_uaddr_e(struct vm_map * map,struct vm_map_entry * entry)4745 uvm_map_uaddr_e(struct vm_map *map, struct vm_map_entry *entry)
4746 {
4747 return uvm_map_uaddr(map, VMMAP_FREE_START(entry));
4748 }
4749
4750 /*
4751 * Returns the first free-memory boundary that is crossed by [min-max].
4752 */
4753 vsize_t
uvm_map_boundary(struct vm_map * map,vaddr_t min,vaddr_t max)4754 uvm_map_boundary(struct vm_map *map, vaddr_t min, vaddr_t max)
4755 {
4756 struct uvm_addr_state *uaddr;
4757 int i;
4758
4759 /* Never return first page. */
4760 max = uvm_map_boundfix(min, max, VMMAP_MIN_ADDR);
4761
4762 /* Treat the maxkaddr special, if the map is a kernel_map. */
4763 if ((map->flags & VM_MAP_ISVMSPACE) == 0)
4764 max = uvm_map_boundfix(min, max, uvm_maxkaddr);
4765
4766 /* Check for exe-only boundaries. */
4767 if (map->uaddr_exe != NULL) {
4768 max = uvm_map_boundfix(min, max, map->uaddr_exe->uaddr_minaddr);
4769 max = uvm_map_boundfix(min, max, map->uaddr_exe->uaddr_maxaddr);
4770 }
4771
4772 /* Check for exe-only boundaries. */
4773 if (map->uaddr_brk_stack != NULL) {
4774 max = uvm_map_boundfix(min, max,
4775 map->uaddr_brk_stack->uaddr_minaddr);
4776 max = uvm_map_boundfix(min, max,
4777 map->uaddr_brk_stack->uaddr_maxaddr);
4778 }
4779
4780 /* Check other boundaries. */
4781 for (i = 0; i < nitems(map->uaddr_any); i++) {
4782 uaddr = map->uaddr_any[i];
4783 if (uaddr != NULL) {
4784 max = uvm_map_boundfix(min, max, uaddr->uaddr_minaddr);
4785 max = uvm_map_boundfix(min, max, uaddr->uaddr_maxaddr);
4786 }
4787 }
4788
4789 /* Boundaries at stack and brk() area. */
4790 max = uvm_map_boundfix(min, max, map->s_start);
4791 max = uvm_map_boundfix(min, max, map->s_end);
4792 max = uvm_map_boundfix(min, max, map->b_start);
4793 max = uvm_map_boundfix(min, max, map->b_end);
4794
4795 return max;
4796 }
4797
4798 /*
4799 * Update map allocation start and end addresses from proc vmspace.
4800 */
4801 void
uvm_map_vmspace_update(struct vm_map * map,struct uvm_map_deadq * dead,int flags)4802 uvm_map_vmspace_update(struct vm_map *map,
4803 struct uvm_map_deadq *dead, int flags)
4804 {
4805 struct vmspace *vm;
4806 vaddr_t b_start, b_end, s_start, s_end;
4807
4808 KASSERT(map->flags & VM_MAP_ISVMSPACE);
4809 KASSERT(offsetof(struct vmspace, vm_map) == 0);
4810
4811 /*
4812 * Derive actual allocation boundaries from vmspace.
4813 */
4814 vm = (struct vmspace *)map;
4815 b_start = (vaddr_t)vm->vm_daddr;
4816 b_end = b_start + BRKSIZ;
4817 s_start = MIN((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr);
4818 s_end = MAX((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr);
4819 #ifdef DIAGNOSTIC
4820 if ((b_start & (vaddr_t)PAGE_MASK) != 0 ||
4821 (b_end & (vaddr_t)PAGE_MASK) != 0 ||
4822 (s_start & (vaddr_t)PAGE_MASK) != 0 ||
4823 (s_end & (vaddr_t)PAGE_MASK) != 0) {
4824 panic("uvm_map_vmspace_update: vmspace %p invalid bounds: "
4825 "b=0x%lx-0x%lx s=0x%lx-0x%lx",
4826 vm, b_start, b_end, s_start, s_end);
4827 }
4828 #endif
4829
4830 if (__predict_true(map->b_start == b_start && map->b_end == b_end &&
4831 map->s_start == s_start && map->s_end == s_end))
4832 return;
4833
4834 uvm_map_freelist_update(map, dead, b_start, b_end,
4835 s_start, s_end, flags);
4836 }
4837
4838 /*
4839 * Grow kernel memory.
4840 *
4841 * This function is only called for kernel maps when an allocation fails.
4842 *
4843 * If the map has a gap that is large enough to accommodate alloc_sz, this
4844 * function will make sure map->free will include it.
4845 */
4846 void
uvm_map_kmem_grow(struct vm_map * map,struct uvm_map_deadq * dead,vsize_t alloc_sz,int flags)4847 uvm_map_kmem_grow(struct vm_map *map, struct uvm_map_deadq *dead,
4848 vsize_t alloc_sz, int flags)
4849 {
4850 vsize_t sz;
4851 vaddr_t end;
4852 struct vm_map_entry *entry;
4853
4854 /* Kernel memory only. */
4855 KASSERT((map->flags & VM_MAP_ISVMSPACE) == 0);
4856 /* Destroy free list. */
4857 uvm_map_freelist_update_clear(map, dead);
4858
4859 /* Include the guard page in the hard minimum requirement of alloc_sz. */
4860 if (map->flags & VM_MAP_GUARDPAGES)
4861 alloc_sz += PAGE_SIZE;
4862
4863 /*
4864 * Grow by ALLOCMUL * alloc_sz, but at least VM_MAP_KSIZE_DELTA.
4865 *
4866 * Don't handle the case where the multiplication overflows:
4867 * if that happens, the allocation is probably too big anyway.
4868 */
4869 sz = MAX(VM_MAP_KSIZE_ALLOCMUL * alloc_sz, VM_MAP_KSIZE_DELTA);
4870
4871 /*
4872 * Walk forward until a gap large enough for alloc_sz shows up.
4873 *
4874 * We assume the kernel map has no boundaries.
4875 * uvm_maxkaddr may be zero.
4876 */
4877 end = MAX(uvm_maxkaddr, map->min_offset);
4878 entry = uvm_map_entrybyaddr(&map->addr, end);
4879 while (entry && entry->fspace < alloc_sz)
4880 entry = RBT_NEXT(uvm_map_addr, entry);
4881 if (entry) {
4882 end = MAX(VMMAP_FREE_START(entry), end);
4883 end += MIN(sz, map->max_offset - end);
4884 } else
4885 end = map->max_offset;
4886
4887 /* Reserve pmap entries. */
4888 #ifdef PMAP_GROWKERNEL
4889 uvm_maxkaddr = pmap_growkernel(end);
4890 #else
4891 uvm_maxkaddr = MAX(uvm_maxkaddr, end);
4892 #endif
4893
4894 /* Rebuild free list. */
4895 uvm_map_freelist_update_refill(map, flags);
4896 }
4897
4898 /*
4899 * Freelist update subfunction: unlink all entries from freelists.
4900 */
4901 void
uvm_map_freelist_update_clear(struct vm_map * map,struct uvm_map_deadq * dead)4902 uvm_map_freelist_update_clear(struct vm_map *map, struct uvm_map_deadq *dead)
4903 {
4904 struct uvm_addr_state *free;
4905 struct vm_map_entry *entry, *prev, *next;
4906
4907 prev = NULL;
4908 for (entry = RBT_MIN(uvm_map_addr, &map->addr); entry != NULL;
4909 entry = next) {
4910 next = RBT_NEXT(uvm_map_addr, entry);
4911
4912 free = uvm_map_uaddr_e(map, entry);
4913 uvm_mapent_free_remove(map, free, entry);
4914
4915 if (prev != NULL && entry->start == entry->end) {
4916 prev->fspace += VMMAP_FREE_END(entry) - entry->end;
4917 uvm_mapent_addr_remove(map, entry);
4918 DEAD_ENTRY_PUSH(dead, entry);
4919 } else
4920 prev = entry;
4921 }
4922 }
4923
4924 /*
4925 * Freelist update subfunction: refill the freelists with entries.
4926 */
4927 void
uvm_map_freelist_update_refill(struct vm_map * map,int flags)4928 uvm_map_freelist_update_refill(struct vm_map *map, int flags)
4929 {
4930 struct vm_map_entry *entry;
4931 vaddr_t min, max;
4932
4933 RBT_FOREACH(entry, uvm_map_addr, &map->addr) {
4934 min = VMMAP_FREE_START(entry);
4935 max = VMMAP_FREE_END(entry);
4936 entry->fspace = 0;
4937
4938 entry = uvm_map_fix_space(map, entry, min, max, flags);
4939 }
4940
4941 uvm_tree_sanity(map, __FILE__, __LINE__);
4942 }
4943
4944 /*
4945 * Change {a,b}_{start,end} allocation ranges and associated free lists.
4946 */
4947 void
uvm_map_freelist_update(struct vm_map * map,struct uvm_map_deadq * dead,vaddr_t b_start,vaddr_t b_end,vaddr_t s_start,vaddr_t s_end,int flags)4948 uvm_map_freelist_update(struct vm_map *map, struct uvm_map_deadq *dead,
4949 vaddr_t b_start, vaddr_t b_end, vaddr_t s_start, vaddr_t s_end, int flags)
4950 {
4951 KDASSERT(b_end >= b_start && s_end >= s_start);
4952 vm_map_assert_wrlock(map);
4953
4954 /* Clear all free lists. */
4955 uvm_map_freelist_update_clear(map, dead);
4956
4957 /* Apply new bounds. */
4958 map->b_start = b_start;
4959 map->b_end = b_end;
4960 map->s_start = s_start;
4961 map->s_end = s_end;
4962
4963 /* Refill free lists. */
4964 uvm_map_freelist_update_refill(map, flags);
4965 }
4966
4967 /*
4968 * Assign a uvm_addr_state to the specified pointer in vm_map.
4969 *
4970 * May sleep.
4971 */
4972 void
uvm_map_set_uaddr(struct vm_map * map,struct uvm_addr_state ** which,struct uvm_addr_state * newval)4973 uvm_map_set_uaddr(struct vm_map *map, struct uvm_addr_state **which,
4974 struct uvm_addr_state *newval)
4975 {
4976 struct uvm_map_deadq dead;
4977
4978 /* Pointer which must be in this map. */
4979 KASSERT(which != NULL);
4980 KASSERT((void*)map <= (void*)(which) &&
4981 (void*)(which) < (void*)(map + 1));
4982
4983 vm_map_lock(map);
4984 TAILQ_INIT(&dead);
4985 uvm_map_freelist_update_clear(map, &dead);
4986
4987 uvm_addr_destroy(*which);
4988 *which = newval;
4989
4990 uvm_map_freelist_update_refill(map, 0);
4991 vm_map_unlock(map);
4992 uvm_unmap_detach(&dead, 0);
4993 }
4994
4995 /*
4996 * Correct space insert.
4997 *
4998 * Entry must not be on any freelist.
4999 */
5000 struct vm_map_entry*
uvm_map_fix_space(struct vm_map * map,struct vm_map_entry * entry,vaddr_t min,vaddr_t max,int flags)5001 uvm_map_fix_space(struct vm_map *map, struct vm_map_entry *entry,
5002 vaddr_t min, vaddr_t max, int flags)
5003 {
5004 struct uvm_addr_state *free, *entfree;
5005 vaddr_t lmax;
5006
5007 KASSERT(entry == NULL || (entry->etype & UVM_ET_FREEMAPPED) == 0);
5008 KDASSERT(min <= max);
5009 KDASSERT((entry != NULL && VMMAP_FREE_END(entry) == min) ||
5010 min == map->min_offset);
5011
5012 UVM_MAP_REQ_WRITE(map);
5013
5014 /*
5015 * During the function, entfree will always point at the uaddr state
5016 * for entry.
5017 */
5018 entfree = (entry == NULL ? NULL :
5019 uvm_map_uaddr_e(map, entry));
5020
5021 while (min != max) {
5022 /* Claim guard page for entry. */
5023 if ((map->flags & VM_MAP_GUARDPAGES) && entry != NULL &&
5024 VMMAP_FREE_END(entry) == entry->end &&
5025 entry->start != entry->end) {
5026 if (max - min == 2 * PAGE_SIZE) {
5027 /*
5028 * If the free-space gap is exactly 2 pages,
5029 * we make the guard 2 pages instead of 1.
5030 * Because in a guarded map, an area needs
5031 * at least 2 pages to allocate from:
5032 * one page for the allocation and one for
5033 * the guard.
5034 */
5035 entry->guard = 2 * PAGE_SIZE;
5036 min = max;
5037 } else {
5038 entry->guard = PAGE_SIZE;
5039 min += PAGE_SIZE;
5040 }
5041 continue;
5042 }
5043
5044 /*
5045 * Handle the case where entry has a 2-page guard, but the
5046 * space after entry is freed.
5047 */
5048 if (entry != NULL && entry->fspace == 0 &&
5049 entry->guard > PAGE_SIZE) {
5050 entry->guard = PAGE_SIZE;
5051 min = VMMAP_FREE_START(entry);
5052 }
5053
5054 lmax = uvm_map_boundary(map, min, max);
5055 free = uvm_map_uaddr(map, min);
5056
5057 /*
5058 * Entries are merged if they point at the same uvm_free().
5059 * Exception to that rule: if min == uvm_maxkaddr, a new
5060 * entry is started regardless (otherwise the allocators
5061 * will get confused).
5062 */
5063 if (entry != NULL && free == entfree &&
5064 !((map->flags & VM_MAP_ISVMSPACE) == 0 &&
5065 min == uvm_maxkaddr)) {
5066 KDASSERT(VMMAP_FREE_END(entry) == min);
5067 entry->fspace += lmax - min;
5068 } else {
5069 /*
5070 * Commit entry to free list: it'll not be added to
5071 * anymore.
5072 * We'll start a new entry and add to that entry
5073 * instead.
5074 */
5075 if (entry != NULL)
5076 uvm_mapent_free_insert(map, entfree, entry);
5077
5078 /* New entry for new uaddr. */
5079 entry = uvm_mapent_alloc(map, flags);
5080 KDASSERT(entry != NULL);
5081 entry->end = entry->start = min;
5082 entry->guard = 0;
5083 entry->fspace = lmax - min;
5084 entry->object.uvm_obj = NULL;
5085 entry->offset = 0;
5086 entry->etype = 0;
5087 entry->protection = entry->max_protection = 0;
5088 entry->inheritance = 0;
5089 entry->wired_count = 0;
5090 entry->advice = 0;
5091 entry->aref.ar_pageoff = 0;
5092 entry->aref.ar_amap = NULL;
5093 uvm_mapent_addr_insert(map, entry);
5094
5095 entfree = free;
5096 }
5097
5098 min = lmax;
5099 }
5100 /* Finally put entry on the uaddr state. */
5101 if (entry != NULL)
5102 uvm_mapent_free_insert(map, entfree, entry);
5103
5104 return entry;
5105 }
5106
5107 /*
5108 * MQuery style of allocation.
5109 *
5110 * This allocator searches forward until sufficient space is found to map
5111 * the given size.
5112 *
5113 * XXX: factor in offset (via pmap_prefer) and protection?
5114 */
5115 int
uvm_map_mquery(struct vm_map * map,vaddr_t * addr_p,vsize_t sz,voff_t offset,int flags)5116 uvm_map_mquery(struct vm_map *map, vaddr_t *addr_p, vsize_t sz, voff_t offset,
5117 int flags)
5118 {
5119 struct vm_map_entry *entry, *last;
5120 vaddr_t addr;
5121 vaddr_t tmp, pmap_align, pmap_offset;
5122 int error;
5123
5124 addr = *addr_p;
5125 vm_map_lock_read(map);
5126
5127 /* Configure pmap prefer. */
5128 if (offset != UVM_UNKNOWN_OFFSET) {
5129 pmap_align = MAX(PAGE_SIZE, PMAP_PREFER_ALIGN());
5130 pmap_offset = PMAP_PREFER_OFFSET(offset);
5131 } else {
5132 pmap_align = PAGE_SIZE;
5133 pmap_offset = 0;
5134 }
5135
5136 /* Align address to pmap_prefer unless FLAG_FIXED is set. */
5137 if (!(flags & UVM_FLAG_FIXED) && offset != UVM_UNKNOWN_OFFSET) {
5138 tmp = (addr & ~(pmap_align - 1)) | pmap_offset;
5139 if (tmp < addr)
5140 tmp += pmap_align;
5141 addr = tmp;
5142 }
5143
5144 /* First, check if the requested range is fully available. */
5145 entry = uvm_map_entrybyaddr(&map->addr, addr);
5146 last = NULL;
5147 if (uvm_map_isavail(map, NULL, &entry, &last, addr, sz)) {
5148 error = 0;
5149 goto out;
5150 }
5151 if (flags & UVM_FLAG_FIXED) {
5152 error = EINVAL;
5153 goto out;
5154 }
5155
5156 error = ENOMEM; /* Default error from here. */
5157
5158 /*
5159 * At this point, the memory at <addr, sz> is not available.
5160 * The reasons are:
5161 * [1] it's outside the map,
5162 * [2] it starts in used memory (and therefore needs to move
5163 * toward the first free page in entry),
5164 * [3] it starts in free memory but bumps into used memory.
5165 *
5166 * Note that for case [2], the forward moving is handled by the
5167 * for loop below.
5168 */
5169 if (entry == NULL) {
5170 /* [1] Outside the map. */
5171 if (addr >= map->max_offset)
5172 goto out;
5173 else
5174 entry = RBT_MIN(uvm_map_addr, &map->addr);
5175 } else if (VMMAP_FREE_START(entry) <= addr) {
5176 /* [3] Bumped into used memory. */
5177 entry = RBT_NEXT(uvm_map_addr, entry);
5178 }
5179
5180 /* Test if the next entry is sufficient for the allocation. */
5181 for (; entry != NULL;
5182 entry = RBT_NEXT(uvm_map_addr, entry)) {
5183 if (entry->fspace == 0)
5184 continue;
5185 addr = VMMAP_FREE_START(entry);
5186
5187 restart: /* Restart address checks on address change. */
5188 tmp = (addr & ~(pmap_align - 1)) | pmap_offset;
5189 if (tmp < addr)
5190 tmp += pmap_align;
5191 addr = tmp;
5192 if (addr >= VMMAP_FREE_END(entry))
5193 continue;
5194
5195 /* Skip brk() allocation addresses. */
5196 if (addr + sz > map->b_start && addr < map->b_end) {
5197 if (VMMAP_FREE_END(entry) > map->b_end) {
5198 addr = map->b_end;
5199 goto restart;
5200 } else
5201 continue;
5202 }
5203 /* Skip stack allocation addresses. */
5204 if (addr + sz > map->s_start && addr < map->s_end) {
5205 if (VMMAP_FREE_END(entry) > map->s_end) {
5206 addr = map->s_end;
5207 goto restart;
5208 } else
5209 continue;
5210 }
5211
5212 last = NULL;
5213 if (uvm_map_isavail(map, NULL, &entry, &last, addr, sz)) {
5214 error = 0;
5215 goto out;
5216 }
5217 }
5218
5219 out:
5220 vm_map_unlock_read(map);
5221 if (error == 0)
5222 *addr_p = addr;
5223 return error;
5224 }
5225
5226 boolean_t
vm_map_lock_try_ln(struct vm_map * map,char * file,int line)5227 vm_map_lock_try_ln(struct vm_map *map, char *file, int line)
5228 {
5229 boolean_t rv;
5230
5231 if (map->flags & VM_MAP_INTRSAFE) {
5232 rv = mtx_enter_try(&map->mtx);
5233 } else {
5234 mtx_enter(&map->flags_lock);
5235 if ((map->flags & VM_MAP_BUSY) && (map->busy != curproc)) {
5236 mtx_leave(&map->flags_lock);
5237 return (FALSE);
5238 }
5239 mtx_leave(&map->flags_lock);
5240 rv = (rw_enter(&map->lock, RW_WRITE|RW_NOSLEEP) == 0);
5241 /* check if the lock is busy and back out if we won the race */
5242 if (rv) {
5243 mtx_enter(&map->flags_lock);
5244 if ((map->flags & VM_MAP_BUSY) &&
5245 (map->busy != curproc)) {
5246 rw_exit(&map->lock);
5247 rv = FALSE;
5248 }
5249 mtx_leave(&map->flags_lock);
5250 }
5251 }
5252
5253 if (rv) {
5254 map->timestamp++;
5255 LPRINTF(("map lock: %p (at %s %d)\n", map, file, line));
5256 uvm_tree_sanity(map, file, line);
5257 uvm_tree_size_chk(map, file, line);
5258 }
5259
5260 return (rv);
5261 }
5262
5263 void
vm_map_lock_ln(struct vm_map * map,char * file,int line)5264 vm_map_lock_ln(struct vm_map *map, char *file, int line)
5265 {
5266 if ((map->flags & VM_MAP_INTRSAFE) == 0) {
5267 do {
5268 mtx_enter(&map->flags_lock);
5269 tryagain:
5270 while ((map->flags & VM_MAP_BUSY) &&
5271 (map->busy != curproc)) {
5272 map->flags |= VM_MAP_WANTLOCK;
5273 msleep_nsec(&map->flags, &map->flags_lock,
5274 PVM, vmmapbsy, INFSLP);
5275 }
5276 mtx_leave(&map->flags_lock);
5277 } while (rw_enter(&map->lock, RW_WRITE|RW_SLEEPFAIL) != 0);
5278 /* check if the lock is busy and back out if we won the race */
5279 mtx_enter(&map->flags_lock);
5280 if ((map->flags & VM_MAP_BUSY) && (map->busy != curproc)) {
5281 rw_exit(&map->lock);
5282 goto tryagain;
5283 }
5284 mtx_leave(&map->flags_lock);
5285 } else {
5286 mtx_enter(&map->mtx);
5287 }
5288
5289 if (map->busy != curproc)
5290 map->timestamp++;
5291 LPRINTF(("map lock: %p (at %s %d)\n", map, file, line));
5292 uvm_tree_sanity(map, file, line);
5293 uvm_tree_size_chk(map, file, line);
5294 }
5295
5296 void
vm_map_lock_read_ln(struct vm_map * map,char * file,int line)5297 vm_map_lock_read_ln(struct vm_map *map, char *file, int line)
5298 {
5299 if ((map->flags & VM_MAP_INTRSAFE) == 0)
5300 rw_enter_read(&map->lock);
5301 else
5302 mtx_enter(&map->mtx);
5303 LPRINTF(("map lock: %p (at %s %d)\n", map, file, line));
5304 uvm_tree_sanity(map, file, line);
5305 uvm_tree_size_chk(map, file, line);
5306 }
5307
5308 void
vm_map_unlock_ln(struct vm_map * map,char * file,int line)5309 vm_map_unlock_ln(struct vm_map *map, char *file, int line)
5310 {
5311 KASSERT(map->busy == NULL || map->busy == curproc);
5312 uvm_tree_sanity(map, file, line);
5313 uvm_tree_size_chk(map, file, line);
5314 LPRINTF(("map unlock: %p (at %s %d)\n", map, file, line));
5315 if ((map->flags & VM_MAP_INTRSAFE) == 0)
5316 rw_exit(&map->lock);
5317 else
5318 mtx_leave(&map->mtx);
5319 }
5320
5321 void
vm_map_unlock_read_ln(struct vm_map * map,char * file,int line)5322 vm_map_unlock_read_ln(struct vm_map *map, char *file, int line)
5323 {
5324 /* XXX: RO */ uvm_tree_sanity(map, file, line);
5325 /* XXX: RO */ uvm_tree_size_chk(map, file, line);
5326 LPRINTF(("map unlock: %p (at %s %d)\n", map, file, line));
5327 if ((map->flags & VM_MAP_INTRSAFE) == 0)
5328 rw_exit_read(&map->lock);
5329 else
5330 mtx_leave(&map->mtx);
5331 }
5332
5333 void
vm_map_busy_ln(struct vm_map * map,char * file,int line)5334 vm_map_busy_ln(struct vm_map *map, char *file, int line)
5335 {
5336 KASSERT((map->flags & VM_MAP_INTRSAFE) == 0);
5337 KASSERT(rw_write_held(&map->lock));
5338 KASSERT(map->busy == NULL);
5339
5340 mtx_enter(&map->flags_lock);
5341 map->busy = curproc;
5342 map->flags |= VM_MAP_BUSY;
5343 mtx_leave(&map->flags_lock);
5344 }
5345
5346 void
vm_map_unbusy_ln(struct vm_map * map,char * file,int line)5347 vm_map_unbusy_ln(struct vm_map *map, char *file, int line)
5348 {
5349 int oflags;
5350
5351 KASSERT((map->flags & VM_MAP_INTRSAFE) == 0);
5352 KASSERT(map->busy == curproc);
5353
5354 mtx_enter(&map->flags_lock);
5355 oflags = map->flags;
5356 map->busy = NULL;
5357 map->flags &= ~(VM_MAP_BUSY|VM_MAP_WANTLOCK);
5358 mtx_leave(&map->flags_lock);
5359 if (oflags & VM_MAP_WANTLOCK)
5360 wakeup(&map->flags);
5361 }
5362
5363 void
vm_map_assert_anylock_ln(struct vm_map * map,char * file,int line)5364 vm_map_assert_anylock_ln(struct vm_map *map, char *file, int line)
5365 {
5366 LPRINTF(("map assert read or write locked: %p (at %s %d)\n", map, file, line));
5367 if ((map->flags & VM_MAP_INTRSAFE) == 0)
5368 rw_assert_anylock(&map->lock);
5369 else
5370 MUTEX_ASSERT_LOCKED(&map->mtx);
5371 }
5372
5373 void
vm_map_assert_wrlock_ln(struct vm_map * map,char * file,int line)5374 vm_map_assert_wrlock_ln(struct vm_map *map, char *file, int line)
5375 {
5376 LPRINTF(("map assert write locked: %p (at %s %d)\n", map, file, line));
5377 if ((map->flags & VM_MAP_INTRSAFE) == 0) {
5378 splassert(IPL_NONE);
5379 rw_assert_wrlock(&map->lock);
5380 } else
5381 MUTEX_ASSERT_LOCKED(&map->mtx);
5382 }
5383
5384 #ifndef SMALL_KERNEL
5385 int
uvm_map_fill_vmmap(struct vm_map * map,struct kinfo_vmentry * kve,size_t * lenp)5386 uvm_map_fill_vmmap(struct vm_map *map, struct kinfo_vmentry *kve,
5387 size_t *lenp)
5388 {
5389 struct vm_map_entry *entry;
5390 vaddr_t start;
5391 int cnt, maxcnt, error = 0;
5392
5393 KASSERT(*lenp > 0);
5394 KASSERT((*lenp % sizeof(*kve)) == 0);
5395 cnt = 0;
5396 maxcnt = *lenp / sizeof(*kve);
5397 KASSERT(maxcnt > 0);
5398
5399 /*
5400 * Return only entries whose address is above the given base
5401 * address. This allows userland to iterate without knowing the
5402 * number of entries beforehand.
5403 */
5404 start = (vaddr_t)kve[0].kve_start;
5405
5406 vm_map_lock(map);
5407 RBT_FOREACH(entry, uvm_map_addr, &map->addr) {
5408 if (cnt == maxcnt) {
5409 error = ENOMEM;
5410 break;
5411 }
5412 if (start != 0 && entry->start < start)
5413 continue;
5414 kve->kve_start = entry->start;
5415 kve->kve_end = entry->end;
5416 kve->kve_guard = entry->guard;
5417 kve->kve_fspace = entry->fspace;
5418 kve->kve_fspace_augment = entry->fspace_augment;
5419 kve->kve_offset = entry->offset;
5420 kve->kve_wired_count = entry->wired_count;
5421 kve->kve_etype = entry->etype;
5422 kve->kve_protection = entry->protection;
5423 kve->kve_max_protection = entry->max_protection;
5424 kve->kve_advice = entry->advice;
5425 kve->kve_inheritance = entry->inheritance;
5426 kve->kve_flags = entry->flags;
5427 kve++;
5428 cnt++;
5429 }
5430 vm_map_unlock(map);
5431
5432 KASSERT(cnt <= maxcnt);
5433
5434 *lenp = sizeof(*kve) * cnt;
5435 return error;
5436 }
5437 #endif
5438
5439
5440 RBT_GENERATE_AUGMENT(uvm_map_addr, vm_map_entry, daddrs.addr_entry,
5441 uvm_mapentry_addrcmp, uvm_map_addr_augment);
5442
5443
5444 /*
5445 * MD code: vmspace allocator setup.
5446 */
5447
5448 #ifdef __i386__
5449 void
uvm_map_setup_md(struct vm_map * map)5450 uvm_map_setup_md(struct vm_map *map)
5451 {
5452 vaddr_t min, max;
5453
5454 min = map->min_offset;
5455 max = map->max_offset;
5456
5457 /*
5458 * Ensure the selectors will not try to manage page 0;
5459 * it's too special.
5460 */
5461 if (min < VMMAP_MIN_ADDR)
5462 min = VMMAP_MIN_ADDR;
5463
5464 #if 0 /* Cool stuff, not yet */
5465 /* Executable code is special. */
5466 map->uaddr_exe = uaddr_rnd_create(min, I386_MAX_EXE_ADDR);
5467 /* Place normal allocations beyond executable mappings. */
5468 map->uaddr_any[3] = uaddr_pivot_create(2 * I386_MAX_EXE_ADDR, max);
5469 #else /* Crappy stuff, for now */
5470 map->uaddr_any[0] = uaddr_rnd_create(min, max);
5471 #endif
5472
5473 #ifndef SMALL_KERNEL
5474 map->uaddr_brk_stack = uaddr_stack_brk_create(min, max);
5475 #endif /* !SMALL_KERNEL */
5476 }
5477 #elif __LP64__
5478 void
uvm_map_setup_md(struct vm_map * map)5479 uvm_map_setup_md(struct vm_map *map)
5480 {
5481 vaddr_t min, max;
5482
5483 min = map->min_offset;
5484 max = map->max_offset;
5485
5486 /*
5487 * Ensure the selectors will not try to manage page 0;
5488 * it's too special.
5489 */
5490 if (min < VMMAP_MIN_ADDR)
5491 min = VMMAP_MIN_ADDR;
5492
5493 #if 0 /* Cool stuff, not yet */
5494 map->uaddr_any[3] = uaddr_pivot_create(MAX(min, 0x100000000ULL), max);
5495 #else /* Crappy stuff, for now */
5496 map->uaddr_any[0] = uaddr_rnd_create(min, max);
5497 #endif
5498
5499 #ifndef SMALL_KERNEL
5500 map->uaddr_brk_stack = uaddr_stack_brk_create(min, max);
5501 #endif /* !SMALL_KERNEL */
5502 }
5503 #else /* non-i386, 32 bit */
5504 void
uvm_map_setup_md(struct vm_map * map)5505 uvm_map_setup_md(struct vm_map *map)
5506 {
5507 vaddr_t min, max;
5508
5509 min = map->min_offset;
5510 max = map->max_offset;
5511
5512 /*
5513 * Ensure the selectors will not try to manage page 0;
5514 * it's too special.
5515 */
5516 if (min < VMMAP_MIN_ADDR)
5517 min = VMMAP_MIN_ADDR;
5518
5519 #if 0 /* Cool stuff, not yet */
5520 map->uaddr_any[3] = uaddr_pivot_create(min, max);
5521 #else /* Crappy stuff, for now */
5522 map->uaddr_any[0] = uaddr_rnd_create(min, max);
5523 #endif
5524
5525 #ifndef SMALL_KERNEL
5526 map->uaddr_brk_stack = uaddr_stack_brk_create(min, max);
5527 #endif /* !SMALL_KERNEL */
5528 }
5529 #endif
5530