xref: /openbsd/sys/uvm/uvm_map.c (revision dc230d7b)
1 /*	$OpenBSD: uvm_map.c,v 1.334 2024/11/05 08:18:44 mpi Exp $	*/
2 /*	$NetBSD: uvm_map.c,v 1.86 2000/11/27 08:40:03 chs Exp $	*/
3 
4 /*
5  * Copyright (c) 2011 Ariane van der Steldt <ariane@openbsd.org>
6  *
7  * Permission to use, copy, modify, and distribute this software for any
8  * purpose with or without fee is hereby granted, provided that the above
9  * copyright notice and this permission notice appear in all copies.
10  *
11  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18  *
19  *
20  * Copyright (c) 1997 Charles D. Cranor and Washington University.
21  * Copyright (c) 1991, 1993, The Regents of the University of California.
22  *
23  * All rights reserved.
24  *
25  * This code is derived from software contributed to Berkeley by
26  * The Mach Operating System project at Carnegie-Mellon University.
27  *
28  * Redistribution and use in source and binary forms, with or without
29  * modification, are permitted provided that the following conditions
30  * are met:
31  * 1. Redistributions of source code must retain the above copyright
32  *    notice, this list of conditions and the following disclaimer.
33  * 2. Redistributions in binary form must reproduce the above copyright
34  *    notice, this list of conditions and the following disclaimer in the
35  *    documentation and/or other materials provided with the distribution.
36  * 3. Neither the name of the University nor the names of its contributors
37  *    may be used to endorse or promote products derived from this software
38  *    without specific prior written permission.
39  *
40  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
41  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
42  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
43  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
44  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
45  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
46  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
47  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
48  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
49  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
50  * SUCH DAMAGE.
51  *
52  *	@(#)vm_map.c    8.3 (Berkeley) 1/12/94
53  * from: Id: uvm_map.c,v 1.1.2.27 1998/02/07 01:16:54 chs Exp
54  *
55  *
56  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
57  * All rights reserved.
58  *
59  * Permission to use, copy, modify and distribute this software and
60  * its documentation is hereby granted, provided that both the copyright
61  * notice and this permission notice appear in all copies of the
62  * software, derivative works or modified versions, and any portions
63  * thereof, and that both notices appear in supporting documentation.
64  *
65  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
66  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
67  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
68  *
69  * Carnegie Mellon requests users of this software to return to
70  *
71  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
72  *  School of Computer Science
73  *  Carnegie Mellon University
74  *  Pittsburgh PA 15213-3890
75  *
76  * any improvements or extensions that they make and grant Carnegie the
77  * rights to redistribute these changes.
78  */
79 
80 /*
81  * uvm_map.c: uvm map operations
82  */
83 
84 /* #define DEBUG */
85 /* #define VMMAP_DEBUG */
86 
87 #include <sys/param.h>
88 #include <sys/systm.h>
89 #include <sys/acct.h>
90 #include <sys/mman.h>
91 #include <sys/proc.h>
92 #include <sys/malloc.h>
93 #include <sys/pool.h>
94 #include <sys/sysctl.h>
95 #include <sys/signalvar.h>
96 #include <sys/syslog.h>
97 #include <sys/user.h>
98 #include <sys/tracepoint.h>
99 
100 #ifdef SYSVSHM
101 #include <sys/shm.h>
102 #endif
103 
104 #include <uvm/uvm.h>
105 
106 #ifdef DDB
107 #include <uvm/uvm_ddb.h>
108 #endif
109 
110 #include <uvm/uvm_addr.h>
111 
112 
113 vsize_t			 uvmspace_dused(struct vm_map*, vaddr_t, vaddr_t);
114 int			 uvm_mapent_isjoinable(struct vm_map*,
115 			    struct vm_map_entry*, struct vm_map_entry*);
116 struct vm_map_entry	*uvm_mapent_merge(struct vm_map*, struct vm_map_entry*,
117 			    struct vm_map_entry*, struct uvm_map_deadq*);
118 struct vm_map_entry	*uvm_mapent_tryjoin(struct vm_map*,
119 			    struct vm_map_entry*, struct uvm_map_deadq*);
120 struct vm_map_entry	*uvm_map_mkentry(struct vm_map*, struct vm_map_entry*,
121 			    struct vm_map_entry*, vaddr_t, vsize_t, int,
122 			    struct uvm_map_deadq*, struct vm_map_entry*);
123 struct vm_map_entry	*uvm_mapent_alloc(struct vm_map*, int);
124 void			 uvm_mapent_free(struct vm_map_entry*);
125 void			 uvm_unmap_kill_entry(struct vm_map*,
126 			    struct vm_map_entry*);
127 void			 uvm_unmap_kill_entry_withlock(struct vm_map *,
128 			    struct vm_map_entry *, int);
129 void			 uvm_unmap_detach_intrsafe(struct uvm_map_deadq *);
130 void			 uvm_mapent_mkfree(struct vm_map*,
131 			    struct vm_map_entry*, struct vm_map_entry**,
132 			    struct uvm_map_deadq*, boolean_t);
133 void			 uvm_map_pageable_pgon(struct vm_map*,
134 			    struct vm_map_entry*, struct vm_map_entry*,
135 			    vaddr_t, vaddr_t);
136 int			 uvm_map_pageable_wire(struct vm_map*,
137 			    struct vm_map_entry*, struct vm_map_entry*,
138 			    vaddr_t, vaddr_t, int);
139 void			 uvm_map_setup_entries(struct vm_map*);
140 void			 uvm_map_setup_md(struct vm_map*);
141 void			 uvm_map_teardown(struct vm_map*);
142 void			 uvm_map_vmspace_update(struct vm_map*,
143 			    struct uvm_map_deadq*, int);
144 void			 uvm_map_kmem_grow(struct vm_map*,
145 			    struct uvm_map_deadq*, vsize_t, int);
146 void			 uvm_map_freelist_update_clear(struct vm_map*,
147 			    struct uvm_map_deadq*);
148 void			 uvm_map_freelist_update_refill(struct vm_map *, int);
149 void			 uvm_map_freelist_update(struct vm_map*,
150 			    struct uvm_map_deadq*, vaddr_t, vaddr_t,
151 			    vaddr_t, vaddr_t, int);
152 struct vm_map_entry	*uvm_map_fix_space(struct vm_map*, struct vm_map_entry*,
153 			    vaddr_t, vaddr_t, int);
154 int			 uvm_map_findspace(struct vm_map*,
155 			    struct vm_map_entry**, struct vm_map_entry**,
156 			    vaddr_t*, vsize_t, vaddr_t, vaddr_t, vm_prot_t,
157 			    vaddr_t);
158 vsize_t			 uvm_map_addr_augment_get(struct vm_map_entry*);
159 void			 uvm_map_addr_augment(struct vm_map_entry*);
160 
161 int			 uvm_map_inentry_recheck(u_long, vaddr_t,
162 			     struct p_inentry *);
163 boolean_t		 uvm_map_inentry_fix(struct proc *, struct p_inentry *,
164 			     vaddr_t, int (*)(vm_map_entry_t), u_long);
165 /*
166  * Tree management functions.
167  */
168 
169 static inline void	 uvm_mapent_copy(struct vm_map_entry*,
170 			    struct vm_map_entry*);
171 static inline int	 uvm_mapentry_addrcmp(const struct vm_map_entry*,
172 			    const struct vm_map_entry*);
173 void			 uvm_mapent_free_insert(struct vm_map*,
174 			    struct uvm_addr_state*, struct vm_map_entry*);
175 void			 uvm_mapent_free_remove(struct vm_map*,
176 			    struct uvm_addr_state*, struct vm_map_entry*);
177 void			 uvm_mapent_addr_insert(struct vm_map*,
178 			    struct vm_map_entry*);
179 void			 uvm_mapent_addr_remove(struct vm_map*,
180 			    struct vm_map_entry*);
181 void			 uvm_map_splitentry(struct vm_map*,
182 			    struct vm_map_entry*, struct vm_map_entry*,
183 			    vaddr_t);
184 vsize_t			 uvm_map_boundary(struct vm_map*, vaddr_t, vaddr_t);
185 
186 /*
187  * uvm_vmspace_fork helper functions.
188  */
189 struct vm_map_entry	*uvm_mapent_clone(struct vm_map*, vaddr_t, vsize_t,
190 			    vsize_t, vm_prot_t, vm_prot_t,
191 			    struct vm_map_entry*, struct uvm_map_deadq*, int,
192 			    int);
193 struct vm_map_entry	*uvm_mapent_share(struct vm_map*, vaddr_t, vsize_t,
194 			    vsize_t, vm_prot_t, vm_prot_t, struct vm_map*,
195 			    struct vm_map_entry*, struct uvm_map_deadq*);
196 struct vm_map_entry	*uvm_mapent_forkshared(struct vmspace*, struct vm_map*,
197 			    struct vm_map*, struct vm_map_entry*,
198 			    struct uvm_map_deadq*);
199 struct vm_map_entry	*uvm_mapent_forkcopy(struct vmspace*, struct vm_map*,
200 			    struct vm_map*, struct vm_map_entry*,
201 			    struct uvm_map_deadq*);
202 struct vm_map_entry	*uvm_mapent_forkzero(struct vmspace*, struct vm_map*,
203 			    struct vm_map*, struct vm_map_entry*,
204 			    struct uvm_map_deadq*);
205 
206 /*
207  * Tree validation.
208  */
209 #ifdef VMMAP_DEBUG
210 void			 uvm_tree_assert(struct vm_map*, int, char*,
211 			    char*, int);
212 #define UVM_ASSERT(map, cond, file, line)				\
213 	uvm_tree_assert((map), (cond), #cond, (file), (line))
214 void			 uvm_tree_sanity(struct vm_map*, char*, int);
215 void			 uvm_tree_size_chk(struct vm_map*, char*, int);
216 void			 vmspace_validate(struct vm_map*);
217 #else
218 #define uvm_tree_sanity(_map, _file, _line)		do {} while (0)
219 #define uvm_tree_size_chk(_map, _file, _line)		do {} while (0)
220 #define vmspace_validate(_map)				do {} while (0)
221 #endif
222 
223 /*
224  * The kernel map will initially be VM_MAP_KSIZE_INIT bytes.
225  * Every time that gets cramped, we grow by at least VM_MAP_KSIZE_DELTA bytes.
226  *
227  * We attempt to grow by UVM_MAP_KSIZE_ALLOCMUL times the allocation size
228  * each time.
229  */
230 #define VM_MAP_KSIZE_INIT	(512 * (vaddr_t)PAGE_SIZE)
231 #define VM_MAP_KSIZE_DELTA	(256 * (vaddr_t)PAGE_SIZE)
232 #define VM_MAP_KSIZE_ALLOCMUL	4
233 
234 /* auto-allocate address lower bound */
235 #define VMMAP_MIN_ADDR		PAGE_SIZE
236 
237 
238 #ifdef DEADBEEF0
239 #define UVMMAP_DEADBEEF		((unsigned long)DEADBEEF0)
240 #else
241 #define UVMMAP_DEADBEEF		((unsigned long)0xdeadd0d0)
242 #endif
243 
244 #ifdef DEBUG
245 int uvm_map_printlocks = 0;
246 
247 #define LPRINTF(_args)							\
248 	do {								\
249 		if (uvm_map_printlocks)					\
250 			printf _args;					\
251 	} while (0)
252 #else
253 #define LPRINTF(_args)	do {} while (0)
254 #endif
255 
256 static struct mutex uvm_kmapent_mtx;
257 static struct timeval uvm_kmapent_last_warn_time;
258 static struct timeval uvm_kmapent_warn_rate = { 10, 0 };
259 
260 const char vmmapbsy[] = "vmmapbsy";
261 
262 /*
263  * pool for vmspace structures.
264  */
265 struct pool uvm_vmspace_pool;
266 
267 /*
268  * pool for dynamically-allocated map entries.
269  */
270 struct pool uvm_map_entry_pool;
271 struct pool uvm_map_entry_kmem_pool;
272 
273 /*
274  * This global represents the end of the kernel virtual address
275  * space. If we want to exceed this, we must grow the kernel
276  * virtual address space dynamically.
277  *
278  * Note, this variable is locked by kernel_map's lock.
279  */
280 vaddr_t uvm_maxkaddr;
281 
282 /*
283  * Locking predicate.
284  */
285 #define UVM_MAP_REQ_WRITE(_map)						\
286 	do {								\
287 		if ((_map)->ref_count > 0) {				\
288 			if (((_map)->flags & VM_MAP_INTRSAFE) == 0)	\
289 				rw_assert_wrlock(&(_map)->lock);	\
290 			else						\
291 				MUTEX_ASSERT_LOCKED(&(_map)->mtx);	\
292 		}							\
293 	} while (0)
294 
295 #define	vm_map_modflags(map, set, clear)				\
296 	do {								\
297 		mtx_enter(&(map)->flags_lock);				\
298 		(map)->flags = ((map)->flags | (set)) & ~(clear);	\
299 		mtx_leave(&(map)->flags_lock);				\
300 	} while (0)
301 
302 
303 /*
304  * Tree describing entries by address.
305  *
306  * Addresses are unique.
307  * Entries with start == end may only exist if they are the first entry
308  * (sorted by address) within a free-memory tree.
309  */
310 
311 static inline int
uvm_mapentry_addrcmp(const struct vm_map_entry * e1,const struct vm_map_entry * e2)312 uvm_mapentry_addrcmp(const struct vm_map_entry *e1,
313     const struct vm_map_entry *e2)
314 {
315 	return e1->start < e2->start ? -1 : e1->start > e2->start;
316 }
317 
318 /*
319  * Copy mapentry.
320  */
321 static inline void
uvm_mapent_copy(struct vm_map_entry * src,struct vm_map_entry * dst)322 uvm_mapent_copy(struct vm_map_entry *src, struct vm_map_entry *dst)
323 {
324 	caddr_t csrc, cdst;
325 	size_t sz;
326 
327 	csrc = (caddr_t)src;
328 	cdst = (caddr_t)dst;
329 	csrc += offsetof(struct vm_map_entry, uvm_map_entry_start_copy);
330 	cdst += offsetof(struct vm_map_entry, uvm_map_entry_start_copy);
331 
332 	sz = offsetof(struct vm_map_entry, uvm_map_entry_stop_copy) -
333 	    offsetof(struct vm_map_entry, uvm_map_entry_start_copy);
334 	memcpy(cdst, csrc, sz);
335 }
336 
337 /*
338  * Handle free-list insertion.
339  */
340 void
uvm_mapent_free_insert(struct vm_map * map,struct uvm_addr_state * uaddr,struct vm_map_entry * entry)341 uvm_mapent_free_insert(struct vm_map *map, struct uvm_addr_state *uaddr,
342     struct vm_map_entry *entry)
343 {
344 	const struct uvm_addr_functions *fun;
345 #ifdef VMMAP_DEBUG
346 	vaddr_t min, max, bound;
347 #endif
348 
349 #ifdef VMMAP_DEBUG
350 	/*
351 	 * Boundary check.
352 	 * Boundaries are folded if they go on the same free list.
353 	 */
354 	min = VMMAP_FREE_START(entry);
355 	max = VMMAP_FREE_END(entry);
356 
357 	while (min < max) {
358 		bound = uvm_map_boundary(map, min, max);
359 		KASSERT(uvm_map_uaddr(map, min) == uaddr);
360 		min = bound;
361 	}
362 #endif
363 	KDASSERT((entry->fspace & (vaddr_t)PAGE_MASK) == 0);
364 	KASSERT((entry->etype & UVM_ET_FREEMAPPED) == 0);
365 
366 	UVM_MAP_REQ_WRITE(map);
367 
368 	/* Actual insert: forward to uaddr pointer. */
369 	if (uaddr != NULL) {
370 		fun = uaddr->uaddr_functions;
371 		KDASSERT(fun != NULL);
372 		if (fun->uaddr_free_insert != NULL)
373 			(*fun->uaddr_free_insert)(map, uaddr, entry);
374 		entry->etype |= UVM_ET_FREEMAPPED;
375 	}
376 
377 	/* Update fspace augmentation. */
378 	uvm_map_addr_augment(entry);
379 }
380 
381 /*
382  * Handle free-list removal.
383  */
384 void
uvm_mapent_free_remove(struct vm_map * map,struct uvm_addr_state * uaddr,struct vm_map_entry * entry)385 uvm_mapent_free_remove(struct vm_map *map, struct uvm_addr_state *uaddr,
386     struct vm_map_entry *entry)
387 {
388 	const struct uvm_addr_functions *fun;
389 
390 	KASSERT((entry->etype & UVM_ET_FREEMAPPED) != 0 || uaddr == NULL);
391 	KASSERT(uvm_map_uaddr_e(map, entry) == uaddr);
392 	UVM_MAP_REQ_WRITE(map);
393 
394 	if (uaddr != NULL) {
395 		fun = uaddr->uaddr_functions;
396 		if (fun->uaddr_free_remove != NULL)
397 			(*fun->uaddr_free_remove)(map, uaddr, entry);
398 		entry->etype &= ~UVM_ET_FREEMAPPED;
399 	}
400 }
401 
402 /*
403  * Handle address tree insertion.
404  */
405 void
uvm_mapent_addr_insert(struct vm_map * map,struct vm_map_entry * entry)406 uvm_mapent_addr_insert(struct vm_map *map, struct vm_map_entry *entry)
407 {
408 	struct vm_map_entry *res;
409 
410 	if (!RBT_CHECK(uvm_map_addr, entry, UVMMAP_DEADBEEF))
411 		panic("uvm_mapent_addr_insert: entry still in addr list");
412 	KDASSERT(entry->start <= entry->end);
413 	KDASSERT((entry->start & (vaddr_t)PAGE_MASK) == 0 &&
414 	    (entry->end & (vaddr_t)PAGE_MASK) == 0);
415 
416 	TRACEPOINT(uvm, map_insert,
417 	    entry->start, entry->end, entry->protection, NULL);
418 
419 	UVM_MAP_REQ_WRITE(map);
420 	res = RBT_INSERT(uvm_map_addr, &map->addr, entry);
421 	if (res != NULL) {
422 		panic("uvm_mapent_addr_insert: map %p entry %p "
423 		    "(0x%lx-0x%lx G=0x%lx F=0x%lx) insert collision "
424 		    "with entry %p (0x%lx-0x%lx G=0x%lx F=0x%lx)",
425 		    map, entry,
426 		    entry->start, entry->end, entry->guard, entry->fspace,
427 		    res, res->start, res->end, res->guard, res->fspace);
428 	}
429 }
430 
431 /*
432  * Handle address tree removal.
433  */
434 void
uvm_mapent_addr_remove(struct vm_map * map,struct vm_map_entry * entry)435 uvm_mapent_addr_remove(struct vm_map *map, struct vm_map_entry *entry)
436 {
437 	struct vm_map_entry *res;
438 
439 	TRACEPOINT(uvm, map_remove,
440 	    entry->start, entry->end, entry->protection, NULL);
441 
442 	UVM_MAP_REQ_WRITE(map);
443 	res = RBT_REMOVE(uvm_map_addr, &map->addr, entry);
444 	if (res != entry)
445 		panic("uvm_mapent_addr_remove");
446 	RBT_POISON(uvm_map_addr, entry, UVMMAP_DEADBEEF);
447 }
448 
449 /*
450  * uvm_map_reference: add reference to a map
451  *
452  * => map need not be locked
453  */
454 void
uvm_map_reference(struct vm_map * map)455 uvm_map_reference(struct vm_map *map)
456 {
457 	atomic_inc_int(&map->ref_count);
458 }
459 
460 void
uvm_map_lock_entry(struct vm_map_entry * entry)461 uvm_map_lock_entry(struct vm_map_entry *entry)
462 {
463 	if (entry->aref.ar_amap != NULL) {
464 		amap_lock(entry->aref.ar_amap);
465 	}
466 	if (UVM_ET_ISOBJ(entry)) {
467 		rw_enter(entry->object.uvm_obj->vmobjlock, RW_WRITE);
468 	}
469 }
470 
471 void
uvm_map_unlock_entry(struct vm_map_entry * entry)472 uvm_map_unlock_entry(struct vm_map_entry *entry)
473 {
474 	if (UVM_ET_ISOBJ(entry)) {
475 		rw_exit(entry->object.uvm_obj->vmobjlock);
476 	}
477 	if (entry->aref.ar_amap != NULL) {
478 		amap_unlock(entry->aref.ar_amap);
479 	}
480 }
481 
482 /*
483  * Calculate the dused delta.
484  */
485 vsize_t
uvmspace_dused(struct vm_map * map,vaddr_t min,vaddr_t max)486 uvmspace_dused(struct vm_map *map, vaddr_t min, vaddr_t max)
487 {
488 	struct vmspace *vm;
489 	vsize_t sz;
490 	vaddr_t lmax;
491 	vaddr_t stack_begin, stack_end; /* Position of stack. */
492 
493 	KASSERT(map->flags & VM_MAP_ISVMSPACE);
494 	vm_map_assert_anylock(map);
495 
496 	vm = (struct vmspace *)map;
497 	stack_begin = MIN((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr);
498 	stack_end = MAX((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr);
499 
500 	sz = 0;
501 	while (min != max) {
502 		lmax = max;
503 		if (min < stack_begin && lmax > stack_begin)
504 			lmax = stack_begin;
505 		else if (min < stack_end && lmax > stack_end)
506 			lmax = stack_end;
507 
508 		if (min >= stack_begin && min < stack_end) {
509 			/* nothing */
510 		} else
511 			sz += lmax - min;
512 		min = lmax;
513 	}
514 
515 	return sz >> PAGE_SHIFT;
516 }
517 
518 /*
519  * Find the entry describing the given address.
520  */
521 struct vm_map_entry*
uvm_map_entrybyaddr(struct uvm_map_addr * atree,vaddr_t addr)522 uvm_map_entrybyaddr(struct uvm_map_addr *atree, vaddr_t addr)
523 {
524 	struct vm_map_entry *iter;
525 
526 	iter = RBT_ROOT(uvm_map_addr, atree);
527 	while (iter != NULL) {
528 		if (iter->start > addr)
529 			iter = RBT_LEFT(uvm_map_addr, iter);
530 		else if (VMMAP_FREE_END(iter) <= addr)
531 			iter = RBT_RIGHT(uvm_map_addr, iter);
532 		else
533 			return iter;
534 	}
535 	return NULL;
536 }
537 
538 /*
539  * DEAD_ENTRY_PUSH(struct vm_map_deadq *deadq, struct vm_map_entry *entry)
540  *
541  * Push dead entries into a linked list.
542  * Since the linked list abuses the address tree for storage, the entry
543  * may not be linked in a map.
544  *
545  * *head must be initialized to NULL before the first call to this macro.
546  * uvm_unmap_detach(*head, 0) will remove dead entries.
547  */
548 static inline void
dead_entry_push(struct uvm_map_deadq * deadq,struct vm_map_entry * entry)549 dead_entry_push(struct uvm_map_deadq *deadq, struct vm_map_entry *entry)
550 {
551 	TAILQ_INSERT_TAIL(deadq, entry, dfree.deadq);
552 }
553 #define DEAD_ENTRY_PUSH(_headptr, _entry)				\
554 	dead_entry_push((_headptr), (_entry))
555 
556 /*
557  * Test if memory starting at addr with sz bytes is free.
558  *
559  * Fills in *start_ptr and *end_ptr to be the first and last entry describing
560  * the space.
561  * If called with prefilled *start_ptr and *end_ptr, they are to be correct.
562  */
563 int
uvm_map_isavail(struct vm_map * map,struct uvm_addr_state * uaddr,struct vm_map_entry ** start_ptr,struct vm_map_entry ** end_ptr,vaddr_t addr,vsize_t sz)564 uvm_map_isavail(struct vm_map *map, struct uvm_addr_state *uaddr,
565     struct vm_map_entry **start_ptr, struct vm_map_entry **end_ptr,
566     vaddr_t addr, vsize_t sz)
567 {
568 	struct uvm_addr_state *free;
569 	struct uvm_map_addr *atree;
570 	struct vm_map_entry *i, *i_end;
571 
572 	if (addr + sz < addr)
573 		return 0;
574 
575 	vm_map_assert_anylock(map);
576 
577 	/*
578 	 * Kernel memory above uvm_maxkaddr is considered unavailable.
579 	 */
580 	if ((map->flags & VM_MAP_ISVMSPACE) == 0) {
581 		if (addr + sz > uvm_maxkaddr)
582 			return 0;
583 	}
584 
585 	atree = &map->addr;
586 
587 	/*
588 	 * Fill in first, last, so they point at the entries containing the
589 	 * first and last address of the range.
590 	 * Note that if they are not NULL, we don't perform the lookup.
591 	 */
592 	KDASSERT(atree != NULL && start_ptr != NULL && end_ptr != NULL);
593 	if (*start_ptr == NULL) {
594 		*start_ptr = uvm_map_entrybyaddr(atree, addr);
595 		if (*start_ptr == NULL)
596 			return 0;
597 	} else
598 		KASSERT(*start_ptr == uvm_map_entrybyaddr(atree, addr));
599 	if (*end_ptr == NULL) {
600 		if (VMMAP_FREE_END(*start_ptr) >= addr + sz)
601 			*end_ptr = *start_ptr;
602 		else {
603 			*end_ptr = uvm_map_entrybyaddr(atree, addr + sz - 1);
604 			if (*end_ptr == NULL)
605 				return 0;
606 		}
607 	} else
608 		KASSERT(*end_ptr == uvm_map_entrybyaddr(atree, addr + sz - 1));
609 
610 	/* Validation. */
611 	KDASSERT(*start_ptr != NULL && *end_ptr != NULL);
612 	KDASSERT((*start_ptr)->start <= addr &&
613 	    VMMAP_FREE_END(*start_ptr) > addr &&
614 	    (*end_ptr)->start < addr + sz &&
615 	    VMMAP_FREE_END(*end_ptr) >= addr + sz);
616 
617 	/*
618 	 * Check the none of the entries intersects with <addr, addr+sz>.
619 	 * Also, if the entry belong to uaddr_exe or uaddr_brk_stack, it is
620 	 * considered unavailable unless called by those allocators.
621 	 */
622 	i = *start_ptr;
623 	i_end = RBT_NEXT(uvm_map_addr, *end_ptr);
624 	for (; i != i_end;
625 	    i = RBT_NEXT(uvm_map_addr, i)) {
626 		if (i->start != i->end && i->end > addr)
627 			return 0;
628 
629 		/*
630 		 * uaddr_exe and uaddr_brk_stack may only be used
631 		 * by these allocators and the NULL uaddr (i.e. no
632 		 * uaddr).
633 		 * Reject if this requirement is not met.
634 		 */
635 		if (uaddr != NULL) {
636 			free = uvm_map_uaddr_e(map, i);
637 
638 			if (uaddr != free && free != NULL &&
639 			    (free == map->uaddr_exe ||
640 			     free == map->uaddr_brk_stack))
641 				return 0;
642 		}
643 	}
644 
645 	return -1;
646 }
647 
648 /*
649  * Invoke each address selector until an address is found.
650  * Will not invoke uaddr_exe.
651  */
652 int
uvm_map_findspace(struct vm_map * map,struct vm_map_entry ** first,struct vm_map_entry ** last,vaddr_t * addr,vsize_t sz,vaddr_t pmap_align,vaddr_t pmap_offset,vm_prot_t prot,vaddr_t hint)653 uvm_map_findspace(struct vm_map *map, struct vm_map_entry**first,
654     struct vm_map_entry**last, vaddr_t *addr, vsize_t sz,
655     vaddr_t pmap_align, vaddr_t pmap_offset, vm_prot_t prot, vaddr_t hint)
656 {
657 	struct uvm_addr_state *uaddr;
658 	int i;
659 
660 	/*
661 	 * Allocation for sz bytes at any address,
662 	 * using the addr selectors in order.
663 	 */
664 	for (i = 0; i < nitems(map->uaddr_any); i++) {
665 		uaddr = map->uaddr_any[i];
666 
667 		if (uvm_addr_invoke(map, uaddr, first, last,
668 		    addr, sz, pmap_align, pmap_offset, prot, hint) == 0)
669 			return 0;
670 	}
671 
672 	/* Fall back to brk() and stack() address selectors. */
673 	uaddr = map->uaddr_brk_stack;
674 	if (uvm_addr_invoke(map, uaddr, first, last,
675 	    addr, sz, pmap_align, pmap_offset, prot, hint) == 0)
676 		return 0;
677 
678 	return ENOMEM;
679 }
680 
681 /* Calculate entry augmentation value. */
682 vsize_t
uvm_map_addr_augment_get(struct vm_map_entry * entry)683 uvm_map_addr_augment_get(struct vm_map_entry *entry)
684 {
685 	vsize_t			 augment;
686 	struct vm_map_entry	*left, *right;
687 
688 	augment = entry->fspace;
689 	if ((left = RBT_LEFT(uvm_map_addr, entry)) != NULL)
690 		augment = MAX(augment, left->fspace_augment);
691 	if ((right = RBT_RIGHT(uvm_map_addr, entry)) != NULL)
692 		augment = MAX(augment, right->fspace_augment);
693 	return augment;
694 }
695 
696 /*
697  * Update augmentation data in entry.
698  */
699 void
uvm_map_addr_augment(struct vm_map_entry * entry)700 uvm_map_addr_augment(struct vm_map_entry *entry)
701 {
702 	vsize_t			 augment;
703 
704 	while (entry != NULL) {
705 		/* Calculate value for augmentation. */
706 		augment = uvm_map_addr_augment_get(entry);
707 
708 		/*
709 		 * Descend update.
710 		 * Once we find an entry that already has the correct value,
711 		 * stop, since it means all its parents will use the correct
712 		 * value too.
713 		 */
714 		if (entry->fspace_augment == augment)
715 			return;
716 		entry->fspace_augment = augment;
717 		entry = RBT_PARENT(uvm_map_addr, entry);
718 	}
719 }
720 
721 /*
722  * uvm_mapanon: establish a valid mapping in map for an anon
723  *
724  * => *addr and sz must be a multiple of PAGE_SIZE.
725  * => *addr is ignored, except if flags contains UVM_FLAG_FIXED.
726  * => map must be unlocked.
727  *
728  * => align: align vaddr, must be a power-of-2.
729  *    Align is only a hint and will be ignored if the alignment fails.
730  */
731 int
uvm_mapanon(struct vm_map * map,vaddr_t * addr,vsize_t sz,vsize_t align,unsigned int flags)732 uvm_mapanon(struct vm_map *map, vaddr_t *addr, vsize_t sz,
733     vsize_t align, unsigned int flags)
734 {
735 	struct vm_map_entry	*first, *last, *entry, *new;
736 	struct uvm_map_deadq	 dead;
737 	vm_prot_t		 prot;
738 	vm_prot_t		 maxprot;
739 	vm_inherit_t		 inherit;
740 	int			 advice;
741 	int			 error;
742 	vaddr_t			 pmap_align, pmap_offset;
743 	vaddr_t			 hint;
744 
745 	KASSERT((map->flags & VM_MAP_ISVMSPACE) == VM_MAP_ISVMSPACE);
746 	KASSERT(map != kernel_map);
747 	KASSERT((map->flags & UVM_FLAG_HOLE) == 0);
748 	KASSERT((map->flags & VM_MAP_INTRSAFE) == 0);
749 	splassert(IPL_NONE);
750 	KASSERT((flags & UVM_FLAG_TRYLOCK) == 0);
751 
752 	/*
753 	 * We use pmap_align and pmap_offset as alignment and offset variables.
754 	 *
755 	 * Because the align parameter takes precedence over pmap prefer,
756 	 * the pmap_align will need to be set to align, with pmap_offset = 0,
757 	 * if pmap_prefer will not align.
758 	 */
759 	pmap_align = MAX(align, PAGE_SIZE);
760 	pmap_offset = 0;
761 
762 	/* Decode parameters. */
763 	prot = UVM_PROTECTION(flags);
764 	maxprot = UVM_MAXPROTECTION(flags);
765 	advice = UVM_ADVICE(flags);
766 	inherit = UVM_INHERIT(flags);
767 	error = 0;
768 	hint = trunc_page(*addr);
769 	TAILQ_INIT(&dead);
770 	KASSERT((sz & (vaddr_t)PAGE_MASK) == 0);
771 	KASSERT((align & (align - 1)) == 0);
772 
773 	/* Check protection. */
774 	if ((prot & maxprot) != prot)
775 		return EACCES;
776 
777 	/*
778 	 * Before grabbing the lock, allocate a map entry for later
779 	 * use to ensure we don't wait for memory while holding the
780 	 * vm_map_lock.
781 	 */
782 	new = uvm_mapent_alloc(map, flags);
783 	if (new == NULL)
784 		return ENOMEM;
785 
786 	vm_map_lock(map);
787 	first = last = NULL;
788 	if (flags & UVM_FLAG_FIXED) {
789 		/*
790 		 * Fixed location.
791 		 *
792 		 * Note: we ignore align, pmap_prefer.
793 		 * Fill in first, last and *addr.
794 		 */
795 		KASSERT((*addr & PAGE_MASK) == 0);
796 
797 		/* Check that the space is available. */
798 		if (flags & UVM_FLAG_UNMAP) {
799 			if ((flags & UVM_FLAG_STACK) &&
800 			    !uvm_map_is_stack_remappable(map, *addr, sz,
801 				(flags & UVM_FLAG_SIGALTSTACK))) {
802 				error = EINVAL;
803 				goto unlock;
804 			}
805 			if (uvm_unmap_remove(map, *addr, *addr + sz, &dead,
806 			    FALSE, TRUE,
807 			    (flags & UVM_FLAG_SIGALTSTACK) ? FALSE : TRUE) != 0) {
808 				error = EPERM;	/* immutable entries found */
809 				goto unlock;
810 			}
811 		}
812 		if (!uvm_map_isavail(map, NULL, &first, &last, *addr, sz)) {
813 			error = ENOMEM;
814 			goto unlock;
815 		}
816 	} else if (*addr != 0 && (*addr & PAGE_MASK) == 0 &&
817 	    (align == 0 || (*addr & (align - 1)) == 0) &&
818 	    uvm_map_isavail(map, NULL, &first, &last, *addr, sz)) {
819 		/*
820 		 * Address used as hint.
821 		 *
822 		 * Note: we enforce the alignment restriction,
823 		 * but ignore pmap_prefer.
824 		 */
825 	} else if ((prot & PROT_EXEC) != 0 && map->uaddr_exe != NULL) {
826 		/* Run selection algorithm for executables. */
827 		error = uvm_addr_invoke(map, map->uaddr_exe, &first, &last,
828 		    addr, sz, pmap_align, pmap_offset, prot, hint);
829 
830 		if (error != 0)
831 			goto unlock;
832 	} else {
833 		/* Update freelists from vmspace. */
834 		uvm_map_vmspace_update(map, &dead, flags);
835 
836 		error = uvm_map_findspace(map, &first, &last, addr, sz,
837 		    pmap_align, pmap_offset, prot, hint);
838 
839 		if (error != 0)
840 			goto unlock;
841 	}
842 
843 	/* Double-check if selected address doesn't cause overflow. */
844 	if (*addr + sz < *addr) {
845 		error = ENOMEM;
846 		goto unlock;
847 	}
848 
849 	/* If we only want a query, return now. */
850 	if (flags & UVM_FLAG_QUERY) {
851 		error = 0;
852 		goto unlock;
853 	}
854 
855 	/*
856 	 * Create new entry.
857 	 * first and last may be invalidated after this call.
858 	 */
859 	entry = uvm_map_mkentry(map, first, last, *addr, sz, flags, &dead,
860 	    new);
861 	if (entry == NULL) {
862 		error = ENOMEM;
863 		goto unlock;
864 	}
865 	new = NULL;
866 	KDASSERT(entry->start == *addr && entry->end == *addr + sz);
867 	entry->object.uvm_obj = NULL;
868 	entry->offset = 0;
869 	entry->protection = prot;
870 	entry->max_protection = maxprot;
871 	entry->inheritance = inherit;
872 	entry->wired_count = 0;
873 	entry->advice = advice;
874 	if (flags & UVM_FLAG_STACK) {
875 		entry->etype |= UVM_ET_STACK;
876 		if (flags & (UVM_FLAG_FIXED | UVM_FLAG_UNMAP))
877 			map->sserial++;
878 	}
879 	if (flags & UVM_FLAG_COPYONW) {
880 		entry->etype |= UVM_ET_COPYONWRITE;
881 		if ((flags & UVM_FLAG_OVERLAY) == 0)
882 			entry->etype |= UVM_ET_NEEDSCOPY;
883 	}
884 	if (flags & UVM_FLAG_CONCEAL)
885 		entry->etype |= UVM_ET_CONCEAL;
886 	if (flags & UVM_FLAG_OVERLAY) {
887 		entry->aref.ar_pageoff = 0;
888 		entry->aref.ar_amap = amap_alloc(sz, M_WAITOK, 0);
889 	}
890 
891 	/* Update map and process statistics. */
892 	map->size += sz;
893 	if (prot != PROT_NONE) {
894 		((struct vmspace *)map)->vm_dused +=
895 		    uvmspace_dused(map, *addr, *addr + sz);
896 	}
897 
898 unlock:
899 	vm_map_unlock(map);
900 
901 	/*
902 	 * Remove dead entries.
903 	 *
904 	 * Dead entries may be the result of merging.
905 	 * uvm_map_mkentry may also create dead entries, when it attempts to
906 	 * destroy free-space entries.
907 	 */
908 	uvm_unmap_detach(&dead, 0);
909 
910 	if (new)
911 		uvm_mapent_free(new);
912 	return error;
913 }
914 
915 /*
916  * uvm_map: establish a valid mapping in map
917  *
918  * => *addr and sz must be a multiple of PAGE_SIZE.
919  * => map must be unlocked.
920  * => <uobj,uoffset> value meanings (4 cases):
921  *	[1] <NULL,uoffset>		== uoffset is a hint for PMAP_PREFER
922  *	[2] <NULL,UVM_UNKNOWN_OFFSET>	== don't PMAP_PREFER
923  *	[3] <uobj,uoffset>		== normal mapping
924  *	[4] <uobj,UVM_UNKNOWN_OFFSET>	== uvm_map finds offset based on VA
925  *
926  *   case [4] is for kernel mappings where we don't know the offset until
927  *   we've found a virtual address.   note that kernel object offsets are
928  *   always relative to vm_map_min(kernel_map).
929  *
930  * => align: align vaddr, must be a power-of-2.
931  *    Align is only a hint and will be ignored if the alignment fails.
932  */
933 int
uvm_map(struct vm_map * map,vaddr_t * addr,vsize_t sz,struct uvm_object * uobj,voff_t uoffset,vsize_t align,unsigned int flags)934 uvm_map(struct vm_map *map, vaddr_t *addr, vsize_t sz,
935     struct uvm_object *uobj, voff_t uoffset,
936     vsize_t align, unsigned int flags)
937 {
938 	struct vm_map_entry	*first, *last, *entry, *new;
939 	struct uvm_map_deadq	 dead;
940 	vm_prot_t		 prot;
941 	vm_prot_t		 maxprot;
942 	vm_inherit_t		 inherit;
943 	int			 advice;
944 	int			 error;
945 	vaddr_t			 pmap_align, pmap_offset;
946 	vaddr_t			 hint;
947 
948 	if ((map->flags & VM_MAP_INTRSAFE) == 0)
949 		splassert(IPL_NONE);
950 	else
951 		splassert(IPL_VM);
952 
953 	/*
954 	 * We use pmap_align and pmap_offset as alignment and offset variables.
955 	 *
956 	 * Because the align parameter takes precedence over pmap prefer,
957 	 * the pmap_align will need to be set to align, with pmap_offset = 0,
958 	 * if pmap_prefer will not align.
959 	 */
960 	if (uoffset == UVM_UNKNOWN_OFFSET) {
961 		pmap_align = MAX(align, PAGE_SIZE);
962 		pmap_offset = 0;
963 	} else {
964 		pmap_align = MAX(PMAP_PREFER_ALIGN(), PAGE_SIZE);
965 		pmap_offset = PMAP_PREFER_OFFSET(uoffset);
966 
967 		if (align == 0 ||
968 		    (align <= pmap_align && (pmap_offset & (align - 1)) == 0)) {
969 			/* pmap_offset satisfies align, no change. */
970 		} else {
971 			/* Align takes precedence over pmap prefer. */
972 			pmap_align = align;
973 			pmap_offset = 0;
974 		}
975 	}
976 
977 	/* Decode parameters. */
978 	prot = UVM_PROTECTION(flags);
979 	maxprot = UVM_MAXPROTECTION(flags);
980 	advice = UVM_ADVICE(flags);
981 	inherit = UVM_INHERIT(flags);
982 	error = 0;
983 	hint = trunc_page(*addr);
984 	TAILQ_INIT(&dead);
985 	KASSERT((sz & (vaddr_t)PAGE_MASK) == 0);
986 	KASSERT((align & (align - 1)) == 0);
987 
988 	/* Holes are incompatible with other types of mappings. */
989 	if (flags & UVM_FLAG_HOLE) {
990 		KASSERT(uobj == NULL && (flags & UVM_FLAG_FIXED) &&
991 		    (flags & (UVM_FLAG_OVERLAY | UVM_FLAG_COPYONW)) == 0);
992 	}
993 
994 	/* Unset hint for kernel_map non-fixed allocations. */
995 	if (!(map->flags & VM_MAP_ISVMSPACE) && !(flags & UVM_FLAG_FIXED))
996 		hint = 0;
997 
998 	/* Check protection. */
999 	if ((prot & maxprot) != prot)
1000 		return EACCES;
1001 
1002 	if (map == kernel_map &&
1003 	    (prot & (PROT_WRITE | PROT_EXEC)) == (PROT_WRITE | PROT_EXEC))
1004 		panic("uvm_map: kernel map W^X violation requested");
1005 
1006 	/*
1007 	 * Before grabbing the lock, allocate a map entry for later
1008 	 * use to ensure we don't wait for memory while holding the
1009 	 * vm_map_lock.
1010 	 */
1011 	new = uvm_mapent_alloc(map, flags);
1012 	if (new == NULL)
1013 		return ENOMEM;
1014 
1015 	if (flags & UVM_FLAG_TRYLOCK) {
1016 		if (vm_map_lock_try(map) == FALSE) {
1017 			error = EFAULT;
1018 			goto out;
1019 		}
1020 	} else {
1021 		vm_map_lock(map);
1022 	}
1023 
1024 	first = last = NULL;
1025 	if (flags & UVM_FLAG_FIXED) {
1026 		/*
1027 		 * Fixed location.
1028 		 *
1029 		 * Note: we ignore align, pmap_prefer.
1030 		 * Fill in first, last and *addr.
1031 		 */
1032 		KASSERT((*addr & PAGE_MASK) == 0);
1033 
1034 		/*
1035 		 * Grow pmap to include allocated address.
1036 		 * If the growth fails, the allocation will fail too.
1037 		 */
1038 		if ((map->flags & VM_MAP_ISVMSPACE) == 0 &&
1039 		    uvm_maxkaddr < (*addr + sz)) {
1040 			uvm_map_kmem_grow(map, &dead,
1041 			    *addr + sz - uvm_maxkaddr, flags);
1042 		}
1043 
1044 		/* Check that the space is available. */
1045 		if (flags & UVM_FLAG_UNMAP) {
1046 			if (uvm_unmap_remove(map, *addr, *addr + sz, &dead,
1047 			    FALSE, TRUE, TRUE) != 0) {
1048 				error = EPERM;	/* immutable entries found */
1049 				goto unlock;
1050 			}
1051 		}
1052 		if (!uvm_map_isavail(map, NULL, &first, &last, *addr, sz)) {
1053 			error = ENOMEM;
1054 			goto unlock;
1055 		}
1056 	} else if (*addr != 0 && (*addr & PAGE_MASK) == 0 &&
1057 	    (map->flags & VM_MAP_ISVMSPACE) == VM_MAP_ISVMSPACE &&
1058 	    (align == 0 || (*addr & (align - 1)) == 0) &&
1059 	    uvm_map_isavail(map, NULL, &first, &last, *addr, sz)) {
1060 		/*
1061 		 * Address used as hint.
1062 		 *
1063 		 * Note: we enforce the alignment restriction,
1064 		 * but ignore pmap_prefer.
1065 		 */
1066 	} else if ((prot & PROT_EXEC) != 0 && map->uaddr_exe != NULL) {
1067 		/* Run selection algorithm for executables. */
1068 		error = uvm_addr_invoke(map, map->uaddr_exe, &first, &last,
1069 		    addr, sz, pmap_align, pmap_offset, prot, hint);
1070 
1071 		/* Grow kernel memory and try again. */
1072 		if (error != 0 && (map->flags & VM_MAP_ISVMSPACE) == 0) {
1073 			uvm_map_kmem_grow(map, &dead, sz, flags);
1074 
1075 			error = uvm_addr_invoke(map, map->uaddr_exe,
1076 			    &first, &last, addr, sz,
1077 			    pmap_align, pmap_offset, prot, hint);
1078 		}
1079 
1080 		if (error != 0)
1081 			goto unlock;
1082 	} else {
1083 		/* Update freelists from vmspace. */
1084 		if (map->flags & VM_MAP_ISVMSPACE)
1085 			uvm_map_vmspace_update(map, &dead, flags);
1086 
1087 		error = uvm_map_findspace(map, &first, &last, addr, sz,
1088 		    pmap_align, pmap_offset, prot, hint);
1089 
1090 		/* Grow kernel memory and try again. */
1091 		if (error != 0 && (map->flags & VM_MAP_ISVMSPACE) == 0) {
1092 			uvm_map_kmem_grow(map, &dead, sz, flags);
1093 
1094 			error = uvm_map_findspace(map, &first, &last, addr, sz,
1095 			    pmap_align, pmap_offset, prot, hint);
1096 		}
1097 
1098 		if (error != 0)
1099 			goto unlock;
1100 	}
1101 
1102 	/* Double-check if selected address doesn't cause overflow. */
1103 	if (*addr + sz < *addr) {
1104 		error = ENOMEM;
1105 		goto unlock;
1106 	}
1107 
1108 	KASSERT((map->flags & VM_MAP_ISVMSPACE) == VM_MAP_ISVMSPACE ||
1109 	    uvm_maxkaddr >= *addr + sz);
1110 
1111 	/* If we only want a query, return now. */
1112 	if (flags & UVM_FLAG_QUERY) {
1113 		error = 0;
1114 		goto unlock;
1115 	}
1116 
1117 	if (uobj == NULL)
1118 		uoffset = 0;
1119 	else if (uoffset == UVM_UNKNOWN_OFFSET) {
1120 		KASSERT(UVM_OBJ_IS_KERN_OBJECT(uobj));
1121 		uoffset = *addr - vm_map_min(kernel_map);
1122 	}
1123 
1124 	/*
1125 	 * Create new entry.
1126 	 * first and last may be invalidated after this call.
1127 	 */
1128 	entry = uvm_map_mkentry(map, first, last, *addr, sz, flags, &dead,
1129 	    new);
1130 	if (entry == NULL) {
1131 		error = ENOMEM;
1132 		goto unlock;
1133 	}
1134 	new = NULL;
1135 	KDASSERT(entry->start == *addr && entry->end == *addr + sz);
1136 	entry->object.uvm_obj = uobj;
1137 	entry->offset = uoffset;
1138 	entry->protection = prot;
1139 	entry->max_protection = maxprot;
1140 	entry->inheritance = inherit;
1141 	entry->wired_count = 0;
1142 	entry->advice = advice;
1143 	if (flags & UVM_FLAG_STACK) {
1144 		entry->etype |= UVM_ET_STACK;
1145 		if (flags & UVM_FLAG_UNMAP)
1146 			map->sserial++;
1147 	}
1148 	if (uobj)
1149 		entry->etype |= UVM_ET_OBJ;
1150 	else if (flags & UVM_FLAG_HOLE)
1151 		entry->etype |= UVM_ET_HOLE;
1152 	if (flags & UVM_FLAG_NOFAULT)
1153 		entry->etype |= UVM_ET_NOFAULT;
1154 	if (flags & UVM_FLAG_WC)
1155 		entry->etype |= UVM_ET_WC;
1156 	if (flags & UVM_FLAG_COPYONW) {
1157 		entry->etype |= UVM_ET_COPYONWRITE;
1158 		if ((flags & UVM_FLAG_OVERLAY) == 0)
1159 			entry->etype |= UVM_ET_NEEDSCOPY;
1160 	}
1161 	if (flags & UVM_FLAG_CONCEAL)
1162 		entry->etype |= UVM_ET_CONCEAL;
1163 	if (flags & UVM_FLAG_OVERLAY) {
1164 		entry->aref.ar_pageoff = 0;
1165 		entry->aref.ar_amap = amap_alloc(sz, M_WAITOK, 0);
1166 	}
1167 
1168 	/* Update map and process statistics. */
1169 	if (!(flags & UVM_FLAG_HOLE)) {
1170 		map->size += sz;
1171 		if ((map->flags & VM_MAP_ISVMSPACE) && uobj == NULL &&
1172 		    prot != PROT_NONE) {
1173 			((struct vmspace *)map)->vm_dused +=
1174 			    uvmspace_dused(map, *addr, *addr + sz);
1175 		}
1176 	}
1177 
1178 	/*
1179 	 * Try to merge entry.
1180 	 *
1181 	 * Userland allocations are kept separated most of the time.
1182 	 * Forego the effort of merging what most of the time can't be merged
1183 	 * and only try the merge if it concerns a kernel entry.
1184 	 */
1185 	if ((flags & UVM_FLAG_NOMERGE) == 0 &&
1186 	    (map->flags & VM_MAP_ISVMSPACE) == 0)
1187 		uvm_mapent_tryjoin(map, entry, &dead);
1188 
1189 unlock:
1190 	vm_map_unlock(map);
1191 
1192 	/*
1193 	 * Remove dead entries.
1194 	 *
1195 	 * Dead entries may be the result of merging.
1196 	 * uvm_map_mkentry may also create dead entries, when it attempts to
1197 	 * destroy free-space entries.
1198 	 */
1199 	if (map->flags & VM_MAP_INTRSAFE)
1200 		uvm_unmap_detach_intrsafe(&dead);
1201 	else
1202 		uvm_unmap_detach(&dead, 0);
1203 out:
1204 	if (new)
1205 		uvm_mapent_free(new);
1206 	return error;
1207 }
1208 
1209 /*
1210  * True iff e1 and e2 can be joined together.
1211  */
1212 int
uvm_mapent_isjoinable(struct vm_map * map,struct vm_map_entry * e1,struct vm_map_entry * e2)1213 uvm_mapent_isjoinable(struct vm_map *map, struct vm_map_entry *e1,
1214     struct vm_map_entry *e2)
1215 {
1216 	KDASSERT(e1 != NULL && e2 != NULL);
1217 
1218 	/* Must be the same entry type and not have free memory between. */
1219 	if (e1->etype != e2->etype || e1->end != e2->start)
1220 		return 0;
1221 
1222 	/* Submaps are never joined. */
1223 	if (UVM_ET_ISSUBMAP(e1))
1224 		return 0;
1225 
1226 	/* Never merge wired memory. */
1227 	if (VM_MAPENT_ISWIRED(e1) || VM_MAPENT_ISWIRED(e2))
1228 		return 0;
1229 
1230 	/* Protection, inheritance and advice must be equal. */
1231 	if (e1->protection != e2->protection ||
1232 	    e1->max_protection != e2->max_protection ||
1233 	    e1->inheritance != e2->inheritance ||
1234 	    e1->advice != e2->advice)
1235 		return 0;
1236 
1237 	/* If uvm_object: object itself and offsets within object must match. */
1238 	if (UVM_ET_ISOBJ(e1)) {
1239 		if (e1->object.uvm_obj != e2->object.uvm_obj)
1240 			return 0;
1241 		if (e1->offset + (e1->end - e1->start) != e2->offset)
1242 			return 0;
1243 	}
1244 
1245 	/*
1246 	 * Cannot join shared amaps.
1247 	 * Note: no need to lock amap to look at refs, since we don't care
1248 	 * about its exact value.
1249 	 * If it is 1 (i.e. we have the only reference) it will stay there.
1250 	 */
1251 	if (e1->aref.ar_amap && amap_refs(e1->aref.ar_amap) != 1)
1252 		return 0;
1253 	if (e2->aref.ar_amap && amap_refs(e2->aref.ar_amap) != 1)
1254 		return 0;
1255 
1256 	/* Apparently, e1 and e2 match. */
1257 	return 1;
1258 }
1259 
1260 /*
1261  * Join support function.
1262  *
1263  * Returns the merged entry on success.
1264  * Returns NULL if the merge failed.
1265  */
1266 struct vm_map_entry*
uvm_mapent_merge(struct vm_map * map,struct vm_map_entry * e1,struct vm_map_entry * e2,struct uvm_map_deadq * dead)1267 uvm_mapent_merge(struct vm_map *map, struct vm_map_entry *e1,
1268     struct vm_map_entry *e2, struct uvm_map_deadq *dead)
1269 {
1270 	struct uvm_addr_state *free;
1271 
1272 	/*
1273 	 * Merging is not supported for map entries that
1274 	 * contain an amap in e1. This should never happen
1275 	 * anyway, because only kernel entries are merged.
1276 	 * These do not contain amaps.
1277 	 * e2 contains no real information in its amap,
1278 	 * so it can be erased immediately.
1279 	 */
1280 	KASSERT(e1->aref.ar_amap == NULL);
1281 
1282 	/*
1283 	 * Don't drop obj reference:
1284 	 * uvm_unmap_detach will do this for us.
1285 	 */
1286 	free = uvm_map_uaddr_e(map, e1);
1287 	uvm_mapent_free_remove(map, free, e1);
1288 
1289 	free = uvm_map_uaddr_e(map, e2);
1290 	uvm_mapent_free_remove(map, free, e2);
1291 	uvm_mapent_addr_remove(map, e2);
1292 	e1->end = e2->end;
1293 	e1->guard = e2->guard;
1294 	e1->fspace = e2->fspace;
1295 	uvm_mapent_free_insert(map, free, e1);
1296 
1297 	DEAD_ENTRY_PUSH(dead, e2);
1298 	return e1;
1299 }
1300 
1301 /*
1302  * Attempt forward and backward joining of entry.
1303  *
1304  * Returns entry after joins.
1305  * We are guaranteed that the amap of entry is either non-existent or
1306  * has never been used.
1307  */
1308 struct vm_map_entry*
uvm_mapent_tryjoin(struct vm_map * map,struct vm_map_entry * entry,struct uvm_map_deadq * dead)1309 uvm_mapent_tryjoin(struct vm_map *map, struct vm_map_entry *entry,
1310     struct uvm_map_deadq *dead)
1311 {
1312 	struct vm_map_entry *other;
1313 	struct vm_map_entry *merged;
1314 
1315 	/* Merge with previous entry. */
1316 	other = RBT_PREV(uvm_map_addr, entry);
1317 	if (other && uvm_mapent_isjoinable(map, other, entry)) {
1318 		merged = uvm_mapent_merge(map, other, entry, dead);
1319 		if (merged)
1320 			entry = merged;
1321 	}
1322 
1323 	/*
1324 	 * Merge with next entry.
1325 	 *
1326 	 * Because amap can only extend forward and the next entry
1327 	 * probably contains sensible info, only perform forward merging
1328 	 * in the absence of an amap.
1329 	 */
1330 	other = RBT_NEXT(uvm_map_addr, entry);
1331 	if (other && entry->aref.ar_amap == NULL &&
1332 	    other->aref.ar_amap == NULL &&
1333 	    uvm_mapent_isjoinable(map, entry, other)) {
1334 		merged = uvm_mapent_merge(map, entry, other, dead);
1335 		if (merged)
1336 			entry = merged;
1337 	}
1338 
1339 	return entry;
1340 }
1341 
1342 /*
1343  * Kill entries that are no longer in a map.
1344  */
1345 void
uvm_unmap_detach(struct uvm_map_deadq * deadq,int flags)1346 uvm_unmap_detach(struct uvm_map_deadq *deadq, int flags)
1347 {
1348 	struct vm_map_entry *entry, *tmp;
1349 
1350 	TAILQ_FOREACH_SAFE(entry, deadq, dfree.deadq, tmp) {
1351 		/* Drop reference to amap, if we've got one. */
1352 		if (entry->aref.ar_amap)
1353 			amap_unref(entry->aref.ar_amap,
1354 			    entry->aref.ar_pageoff,
1355 			    atop(entry->end - entry->start),
1356 			    flags & AMAP_REFALL);
1357 
1358 		/* Drop reference to our backing object, if we've got one. */
1359 		if (UVM_ET_ISSUBMAP(entry)) {
1360 			/* ... unlikely to happen, but play it safe */
1361 			uvm_map_deallocate(entry->object.sub_map);
1362 		} else if (UVM_ET_ISOBJ(entry) &&
1363 		    entry->object.uvm_obj->pgops->pgo_detach) {
1364 			entry->object.uvm_obj->pgops->pgo_detach(
1365 			    entry->object.uvm_obj);
1366 		}
1367 
1368 		TAILQ_REMOVE(deadq, entry, dfree.deadq);
1369 		uvm_mapent_free(entry);
1370 	}
1371 }
1372 
1373 void
uvm_unmap_detach_intrsafe(struct uvm_map_deadq * deadq)1374 uvm_unmap_detach_intrsafe(struct uvm_map_deadq *deadq)
1375 {
1376 	struct vm_map_entry *entry;
1377 
1378 	while ((entry = TAILQ_FIRST(deadq)) != NULL) {
1379 		KASSERT(entry->aref.ar_amap == NULL);
1380 		KASSERT(!UVM_ET_ISSUBMAP(entry));
1381 		KASSERT(!UVM_ET_ISOBJ(entry));
1382 		TAILQ_REMOVE(deadq, entry, dfree.deadq);
1383 		uvm_mapent_free(entry);
1384 	}
1385 }
1386 
1387 /*
1388  * Create and insert new entry.
1389  *
1390  * Returned entry contains new addresses and is inserted properly in the tree.
1391  * first and last are (probably) no longer valid.
1392  */
1393 struct vm_map_entry*
uvm_map_mkentry(struct vm_map * map,struct vm_map_entry * first,struct vm_map_entry * last,vaddr_t addr,vsize_t sz,int flags,struct uvm_map_deadq * dead,struct vm_map_entry * new)1394 uvm_map_mkentry(struct vm_map *map, struct vm_map_entry *first,
1395     struct vm_map_entry *last, vaddr_t addr, vsize_t sz, int flags,
1396     struct uvm_map_deadq *dead, struct vm_map_entry *new)
1397 {
1398 	struct vm_map_entry *entry, *prev;
1399 	struct uvm_addr_state *free;
1400 	vaddr_t min, max;	/* free space boundaries for new entry */
1401 
1402 	KDASSERT(map != NULL);
1403 	KDASSERT(first != NULL);
1404 	KDASSERT(last != NULL);
1405 	KDASSERT(dead != NULL);
1406 	KDASSERT(sz > 0);
1407 	KDASSERT(addr + sz > addr);
1408 	KDASSERT(first->end <= addr && VMMAP_FREE_END(first) > addr);
1409 	KDASSERT(last->start < addr + sz && VMMAP_FREE_END(last) >= addr + sz);
1410 	KDASSERT(uvm_map_isavail(map, NULL, &first, &last, addr, sz));
1411 	uvm_tree_sanity(map, __FILE__, __LINE__);
1412 
1413 	min = addr + sz;
1414 	max = VMMAP_FREE_END(last);
1415 
1416 	/* Initialize new entry. */
1417 	if (new == NULL)
1418 		entry = uvm_mapent_alloc(map, flags);
1419 	else
1420 		entry = new;
1421 	if (entry == NULL)
1422 		return NULL;
1423 	entry->offset = 0;
1424 	entry->etype = 0;
1425 	entry->wired_count = 0;
1426 	entry->aref.ar_pageoff = 0;
1427 	entry->aref.ar_amap = NULL;
1428 
1429 	entry->start = addr;
1430 	entry->end = min;
1431 	entry->guard = 0;
1432 	entry->fspace = 0;
1433 
1434 	vm_map_assert_wrlock(map);
1435 
1436 	/* Reset free space in first. */
1437 	free = uvm_map_uaddr_e(map, first);
1438 	uvm_mapent_free_remove(map, free, first);
1439 	first->guard = 0;
1440 	first->fspace = 0;
1441 
1442 	/*
1443 	 * Remove all entries that are fully replaced.
1444 	 * We are iterating using last in reverse order.
1445 	 */
1446 	for (; first != last; last = prev) {
1447 		prev = RBT_PREV(uvm_map_addr, last);
1448 
1449 		KDASSERT(last->start == last->end);
1450 		free = uvm_map_uaddr_e(map, last);
1451 		uvm_mapent_free_remove(map, free, last);
1452 		uvm_mapent_addr_remove(map, last);
1453 		DEAD_ENTRY_PUSH(dead, last);
1454 	}
1455 	/* Remove first if it is entirely inside <addr, addr+sz>.  */
1456 	if (first->start == addr) {
1457 		uvm_mapent_addr_remove(map, first);
1458 		DEAD_ENTRY_PUSH(dead, first);
1459 	} else {
1460 		uvm_map_fix_space(map, first, VMMAP_FREE_START(first),
1461 		    addr, flags);
1462 	}
1463 
1464 	/* Finally, link in entry. */
1465 	uvm_mapent_addr_insert(map, entry);
1466 	uvm_map_fix_space(map, entry, min, max, flags);
1467 
1468 	uvm_tree_sanity(map, __FILE__, __LINE__);
1469 	return entry;
1470 }
1471 
1472 
1473 /*
1474  * uvm_mapent_alloc: allocate a map entry
1475  */
1476 struct vm_map_entry *
uvm_mapent_alloc(struct vm_map * map,int flags)1477 uvm_mapent_alloc(struct vm_map *map, int flags)
1478 {
1479 	struct vm_map_entry *me, *ne;
1480 	int pool_flags;
1481 	int i;
1482 
1483 	pool_flags = PR_WAITOK;
1484 	if (flags & UVM_FLAG_TRYLOCK)
1485 		pool_flags = PR_NOWAIT;
1486 
1487 	if (map->flags & VM_MAP_INTRSAFE || cold) {
1488 		mtx_enter(&uvm_kmapent_mtx);
1489 		if (SLIST_EMPTY(&uvm.kentry_free)) {
1490 			ne = km_alloc(PAGE_SIZE, &kv_page, &kp_dirty,
1491 			    &kd_nowait);
1492 			if (ne == NULL)
1493 				panic("uvm_mapent_alloc: cannot allocate map "
1494 				    "entry");
1495 			for (i = 0; i < PAGE_SIZE / sizeof(*ne); i++) {
1496 				SLIST_INSERT_HEAD(&uvm.kentry_free,
1497 				    &ne[i], daddrs.addr_kentry);
1498 			}
1499 			if (ratecheck(&uvm_kmapent_last_warn_time,
1500 			    &uvm_kmapent_warn_rate))
1501 				printf("uvm_mapent_alloc: out of static "
1502 				    "map entries\n");
1503 		}
1504 		me = SLIST_FIRST(&uvm.kentry_free);
1505 		SLIST_REMOVE_HEAD(&uvm.kentry_free, daddrs.addr_kentry);
1506 		uvmexp.kmapent++;
1507 		mtx_leave(&uvm_kmapent_mtx);
1508 		me->flags = UVM_MAP_STATIC;
1509 	} else if (map == kernel_map) {
1510 		splassert(IPL_NONE);
1511 		me = pool_get(&uvm_map_entry_kmem_pool, pool_flags);
1512 		if (me == NULL)
1513 			goto out;
1514 		me->flags = UVM_MAP_KMEM;
1515 	} else {
1516 		splassert(IPL_NONE);
1517 		me = pool_get(&uvm_map_entry_pool, pool_flags);
1518 		if (me == NULL)
1519 			goto out;
1520 		me->flags = 0;
1521 	}
1522 
1523 	RBT_POISON(uvm_map_addr, me, UVMMAP_DEADBEEF);
1524 out:
1525 	return me;
1526 }
1527 
1528 /*
1529  * uvm_mapent_free: free map entry
1530  *
1531  * => XXX: static pool for kernel map?
1532  */
1533 void
uvm_mapent_free(struct vm_map_entry * me)1534 uvm_mapent_free(struct vm_map_entry *me)
1535 {
1536 	if (me->flags & UVM_MAP_STATIC) {
1537 		mtx_enter(&uvm_kmapent_mtx);
1538 		SLIST_INSERT_HEAD(&uvm.kentry_free, me, daddrs.addr_kentry);
1539 		uvmexp.kmapent--;
1540 		mtx_leave(&uvm_kmapent_mtx);
1541 	} else if (me->flags & UVM_MAP_KMEM) {
1542 		splassert(IPL_NONE);
1543 		pool_put(&uvm_map_entry_kmem_pool, me);
1544 	} else {
1545 		splassert(IPL_NONE);
1546 		pool_put(&uvm_map_entry_pool, me);
1547 	}
1548 }
1549 
1550 /*
1551  * uvm_map_lookup_entry: find map entry at or before an address.
1552  *
1553  * => map must at least be read-locked by caller
1554  * => entry is returned in "entry"
1555  * => return value is true if address is in the returned entry
1556  * ET_HOLE entries are considered to not contain a mapping, ergo FALSE is
1557  * returned for those mappings.
1558  */
1559 boolean_t
uvm_map_lookup_entry(struct vm_map * map,vaddr_t address,struct vm_map_entry ** entry)1560 uvm_map_lookup_entry(struct vm_map *map, vaddr_t address,
1561     struct vm_map_entry **entry)
1562 {
1563 	vm_map_assert_anylock(map);
1564 
1565 	*entry = uvm_map_entrybyaddr(&map->addr, address);
1566 	return *entry != NULL && !UVM_ET_ISHOLE(*entry) &&
1567 	    (*entry)->start <= address && (*entry)->end > address;
1568 }
1569 
1570 /*
1571  * Stack must be in a MAP_STACK entry. PROT_NONE indicates stack not yet
1572  * grown -- then uvm_map_check_region_range() should not cache the entry
1573  * because growth won't be seen.
1574  */
1575 int
uvm_map_inentry_sp(vm_map_entry_t entry)1576 uvm_map_inentry_sp(vm_map_entry_t entry)
1577 {
1578 	if ((entry->etype & UVM_ET_STACK) == 0) {
1579 		if (entry->protection == PROT_NONE)
1580 			return (-1);	/* don't update range */
1581 		return (0);
1582 	}
1583 	return (1);
1584 }
1585 
1586 int
uvm_map_inentry_recheck(u_long serial,vaddr_t addr,struct p_inentry * ie)1587 uvm_map_inentry_recheck(u_long serial, vaddr_t addr, struct p_inentry *ie)
1588 {
1589 	return (serial != ie->ie_serial || ie->ie_start == 0 ||
1590 	    addr < ie->ie_start || addr >= ie->ie_end);
1591 }
1592 
1593 /*
1594  * Inside a vm_map find the reg address and verify it via function.
1595  * Remember low and high addresses of region if valid and return TRUE,
1596  * else return FALSE.
1597  */
1598 boolean_t
uvm_map_inentry_fix(struct proc * p,struct p_inentry * ie,vaddr_t addr,int (* fn)(vm_map_entry_t),u_long serial)1599 uvm_map_inentry_fix(struct proc *p, struct p_inentry *ie, vaddr_t addr,
1600     int (*fn)(vm_map_entry_t), u_long serial)
1601 {
1602 	vm_map_t map = &p->p_vmspace->vm_map;
1603 	vm_map_entry_t entry;
1604 	int ret;
1605 
1606 	if (addr < map->min_offset || addr >= map->max_offset)
1607 		return (FALSE);
1608 
1609 	/* lock map */
1610 	vm_map_lock_read(map);
1611 
1612 	/* lookup */
1613 	if (!uvm_map_lookup_entry(map, trunc_page(addr), &entry)) {
1614 		vm_map_unlock_read(map);
1615 		return (FALSE);
1616 	}
1617 
1618 	ret = (*fn)(entry);
1619 	if (ret == 0) {
1620 		vm_map_unlock_read(map);
1621 		return (FALSE);
1622 	} else if (ret == 1) {
1623 		ie->ie_start = entry->start;
1624 		ie->ie_end = entry->end;
1625 		ie->ie_serial = serial;
1626 	} else {
1627 		/* do not update, re-check later */
1628 	}
1629 	vm_map_unlock_read(map);
1630 	return (TRUE);
1631 }
1632 
1633 boolean_t
uvm_map_inentry(struct proc * p,struct p_inentry * ie,vaddr_t addr,const char * fmt,int (* fn)(vm_map_entry_t),u_long serial)1634 uvm_map_inentry(struct proc *p, struct p_inentry *ie, vaddr_t addr,
1635     const char *fmt, int (*fn)(vm_map_entry_t), u_long serial)
1636 {
1637 	union sigval sv;
1638 	boolean_t ok = TRUE;
1639 
1640 	if (uvm_map_inentry_recheck(serial, addr, ie)) {
1641 		ok = uvm_map_inentry_fix(p, ie, addr, fn, serial);
1642 		if (!ok) {
1643 			KERNEL_LOCK();
1644 			uprintf(fmt, p->p_p->ps_comm, p->p_p->ps_pid, p->p_tid,
1645 			    addr, ie->ie_start, ie->ie_end-1);
1646 			p->p_p->ps_acflag |= AMAP;
1647 			sv.sival_ptr = (void *)PROC_PC(p);
1648 			trapsignal(p, SIGSEGV, 0, SEGV_ACCERR, sv);
1649 			KERNEL_UNLOCK();
1650 		}
1651 	}
1652 	return (ok);
1653 }
1654 
1655 /*
1656  * Check whether the given address range can be converted to a MAP_STACK
1657  * mapping.
1658  *
1659  * Must be called with map locked.
1660  */
1661 boolean_t
uvm_map_is_stack_remappable(struct vm_map * map,vaddr_t addr,vaddr_t sz,int sigaltstack_check)1662 uvm_map_is_stack_remappable(struct vm_map *map, vaddr_t addr, vaddr_t sz,
1663     int sigaltstack_check)
1664 {
1665 	vaddr_t end = addr + sz;
1666 	struct vm_map_entry *first, *iter, *prev = NULL;
1667 
1668 	vm_map_assert_anylock(map);
1669 
1670 	if (!uvm_map_lookup_entry(map, addr, &first))
1671 		return FALSE;
1672 
1673 	/*
1674 	 * Check that the address range exists and is contiguous.
1675 	 */
1676 	for (iter = first; iter != NULL && iter->start < end;
1677 	    prev = iter, iter = RBT_NEXT(uvm_map_addr, iter)) {
1678 		/*
1679 		 * Make sure that we do not have holes in the range.
1680 		 */
1681 #if 0
1682 		if (prev != NULL) {
1683 			printf("prev->start 0x%lx, prev->end 0x%lx, "
1684 			    "iter->start 0x%lx, iter->end 0x%lx\n",
1685 			    prev->start, prev->end, iter->start, iter->end);
1686 		}
1687 #endif
1688 
1689 		if (prev != NULL && prev->end != iter->start)
1690 			return FALSE;
1691 		if (iter->start == iter->end || UVM_ET_ISHOLE(iter))
1692 			return FALSE;
1693 		if (sigaltstack_check) {
1694 			if (iter->protection != (PROT_READ | PROT_WRITE))
1695 				return FALSE;
1696 		}
1697 	}
1698 
1699 	return TRUE;
1700 }
1701 
1702 /*
1703  * Remap the middle-pages of an existing mapping as a stack range.
1704  * If there exists a previous contiguous mapping with the given range
1705  * [addr, addr + sz), with protection PROT_READ|PROT_WRITE, then the
1706  * mapping is dropped, and a new anon mapping is created and marked as
1707  * a stack.
1708  *
1709  * Must be called with map unlocked.
1710  */
1711 int
uvm_map_remap_as_stack(struct proc * p,vaddr_t addr,vaddr_t sz)1712 uvm_map_remap_as_stack(struct proc *p, vaddr_t addr, vaddr_t sz)
1713 {
1714 	vm_map_t map = &p->p_vmspace->vm_map;
1715 	vaddr_t start, end;
1716 	int flags = UVM_MAPFLAG(PROT_READ | PROT_WRITE,
1717 	    PROT_READ | PROT_WRITE | PROT_EXEC,
1718 	    MAP_INHERIT_COPY, MADV_NORMAL,
1719 	    UVM_FLAG_STACK | UVM_FLAG_FIXED | UVM_FLAG_UNMAP |
1720 	    UVM_FLAG_COPYONW | UVM_FLAG_SIGALTSTACK);
1721 
1722 	start = round_page(addr);
1723 	end = trunc_page(addr + sz);
1724 #ifdef MACHINE_STACK_GROWS_UP
1725 	if (end == addr + sz)
1726 		end -= PAGE_SIZE;
1727 #else
1728 	if (start == addr)
1729 		start += PAGE_SIZE;
1730 #endif
1731 
1732 	if (start < map->min_offset || end >= map->max_offset || end < start)
1733 		return EINVAL;
1734 
1735 	/*
1736 	 * UVM_FLAG_SIGALTSTACK indicates that immutable may be bypassed,
1737 	 * but the range is checked that it is contiguous, is not a syscall
1738 	 * mapping, and protection RW.  Then, a new mapping (all zero) is
1739 	 * placed upon the region, which prevents an attacker from pivoting
1740 	 * into pre-placed MAP_STACK space.
1741 	 */
1742 	return uvm_mapanon(map, &start, end - start, 0, flags);
1743 }
1744 
1745 /*
1746  * uvm_map_pie: return a random load address for a PIE executable
1747  * properly aligned.
1748  */
1749 #ifndef VM_PIE_MAX_ADDR
1750 #define VM_PIE_MAX_ADDR (VM_MAXUSER_ADDRESS / 4)
1751 #endif
1752 
1753 #ifndef VM_PIE_MIN_ADDR
1754 #define VM_PIE_MIN_ADDR VM_MIN_ADDRESS
1755 #endif
1756 
1757 #ifndef VM_PIE_MIN_ALIGN
1758 #define VM_PIE_MIN_ALIGN PAGE_SIZE
1759 #endif
1760 
1761 vaddr_t
uvm_map_pie(vaddr_t align)1762 uvm_map_pie(vaddr_t align)
1763 {
1764 	vaddr_t addr, space, min;
1765 
1766 	align = MAX(align, VM_PIE_MIN_ALIGN);
1767 
1768 	/* round up to next alignment */
1769 	min = (VM_PIE_MIN_ADDR + align - 1) & ~(align - 1);
1770 
1771 	if (align >= VM_PIE_MAX_ADDR || min >= VM_PIE_MAX_ADDR)
1772 		return (align);
1773 
1774 	space = (VM_PIE_MAX_ADDR - min) / align;
1775 	space = MIN(space, (u_int32_t)-1);
1776 
1777 	addr = (vaddr_t)arc4random_uniform((u_int32_t)space) * align;
1778 	addr += min;
1779 
1780 	return (addr);
1781 }
1782 
1783 void
uvm_unmap(struct vm_map * map,vaddr_t start,vaddr_t end)1784 uvm_unmap(struct vm_map *map, vaddr_t start, vaddr_t end)
1785 {
1786 	struct uvm_map_deadq dead;
1787 
1788 	KASSERT((start & (vaddr_t)PAGE_MASK) == 0 &&
1789 	    (end & (vaddr_t)PAGE_MASK) == 0);
1790 	TAILQ_INIT(&dead);
1791 	vm_map_lock(map);
1792 	uvm_unmap_remove(map, start, end, &dead, FALSE, TRUE, FALSE);
1793 	vm_map_unlock(map);
1794 
1795 	if (map->flags & VM_MAP_INTRSAFE)
1796 		uvm_unmap_detach_intrsafe(&dead);
1797 	else
1798 		uvm_unmap_detach(&dead, 0);
1799 }
1800 
1801 /*
1802  * Mark entry as free.
1803  *
1804  * entry will be put on the dead list.
1805  * The free space will be merged into the previous or a new entry,
1806  * unless markfree is false.
1807  */
1808 void
uvm_mapent_mkfree(struct vm_map * map,struct vm_map_entry * entry,struct vm_map_entry ** prev_ptr,struct uvm_map_deadq * dead,boolean_t markfree)1809 uvm_mapent_mkfree(struct vm_map *map, struct vm_map_entry *entry,
1810     struct vm_map_entry **prev_ptr, struct uvm_map_deadq *dead,
1811     boolean_t markfree)
1812 {
1813 	struct uvm_addr_state	*free;
1814 	struct vm_map_entry	*prev;
1815 	vaddr_t			 addr;	/* Start of freed range. */
1816 	vaddr_t			 end;	/* End of freed range. */
1817 
1818 	UVM_MAP_REQ_WRITE(map);
1819 
1820 	prev = *prev_ptr;
1821 	if (prev == entry)
1822 		*prev_ptr = prev = NULL;
1823 
1824 	if (prev == NULL ||
1825 	    VMMAP_FREE_END(prev) != entry->start)
1826 		prev = RBT_PREV(uvm_map_addr, entry);
1827 
1828 	/* Entry is describing only free memory and has nothing to drain into. */
1829 	if (prev == NULL && entry->start == entry->end && markfree) {
1830 		*prev_ptr = entry;
1831 		return;
1832 	}
1833 
1834 	addr = entry->start;
1835 	end = VMMAP_FREE_END(entry);
1836 	free = uvm_map_uaddr_e(map, entry);
1837 	uvm_mapent_free_remove(map, free, entry);
1838 	uvm_mapent_addr_remove(map, entry);
1839 	DEAD_ENTRY_PUSH(dead, entry);
1840 
1841 	if (markfree) {
1842 		if (prev) {
1843 			free = uvm_map_uaddr_e(map, prev);
1844 			uvm_mapent_free_remove(map, free, prev);
1845 		}
1846 		*prev_ptr = uvm_map_fix_space(map, prev, addr, end, 0);
1847 	}
1848 }
1849 
1850 /*
1851  * Unwire and release referenced amap and object from map entry.
1852  */
1853 void
uvm_unmap_kill_entry_withlock(struct vm_map * map,struct vm_map_entry * entry,int needlock)1854 uvm_unmap_kill_entry_withlock(struct vm_map *map, struct vm_map_entry *entry,
1855     int needlock)
1856 {
1857 	/* Unwire removed map entry. */
1858 	if (VM_MAPENT_ISWIRED(entry)) {
1859 		entry->wired_count = 0;
1860 		uvm_fault_unwire_locked(map, entry->start, entry->end);
1861 	}
1862 
1863 	if (needlock)
1864 		uvm_map_lock_entry(entry);
1865 
1866 	/* Entry-type specific code. */
1867 	if (UVM_ET_ISHOLE(entry)) {
1868 		/* Nothing to be done for holes. */
1869 	} else if (map->flags & VM_MAP_INTRSAFE) {
1870 		KASSERT(vm_map_pmap(map) == pmap_kernel());
1871 
1872 		uvm_km_pgremove_intrsafe(entry->start, entry->end);
1873 	} else if (UVM_ET_ISOBJ(entry) &&
1874 	    UVM_OBJ_IS_KERN_OBJECT(entry->object.uvm_obj)) {
1875 		KASSERT(vm_map_pmap(map) == pmap_kernel());
1876 		/*
1877 		 * Note: kernel object mappings are currently used in
1878 		 * two ways:
1879 		 *  [1] "normal" mappings of pages in the kernel object
1880 		 *  [2] uvm_km_valloc'd allocations in which we
1881 		 *      pmap_enter in some non-kernel-object page
1882 		 *      (e.g. vmapbuf).
1883 		 *
1884 		 * for case [1], we need to remove the mapping from
1885 		 * the pmap and then remove the page from the kernel
1886 		 * object (because, once pages in a kernel object are
1887 		 * unmapped they are no longer needed, unlike, say,
1888 		 * a vnode where you might want the data to persist
1889 		 * until flushed out of a queue).
1890 		 *
1891 		 * for case [2], we need to remove the mapping from
1892 		 * the pmap.  there shouldn't be any pages at the
1893 		 * specified offset in the kernel object [but it
1894 		 * doesn't hurt to call uvm_km_pgremove just to be
1895 		 * safe?]
1896 		 *
1897 		 * uvm_km_pgremove currently does the following:
1898 		 *   for pages in the kernel object range:
1899 		 *     - drops the swap slot
1900 		 *     - uvm_pagefree the page
1901 		 *
1902 		 * note there is version of uvm_km_pgremove() that
1903 		 * is used for "intrsafe" objects.
1904 		 */
1905 		/*
1906 		 * remove mappings from pmap and drop the pages
1907 		 * from the object.  offsets are always relative
1908 		 * to vm_map_min(kernel_map).
1909 		 */
1910 		uvm_km_pgremove(entry->object.uvm_obj, entry->start,
1911 		    entry->end);
1912 	} else {
1913 		/* remove mappings the standard way. */
1914 		pmap_remove(map->pmap, entry->start, entry->end);
1915 	}
1916 
1917 	if (needlock)
1918 		uvm_map_unlock_entry(entry);
1919 }
1920 
1921 void
uvm_unmap_kill_entry(struct vm_map * map,struct vm_map_entry * entry)1922 uvm_unmap_kill_entry(struct vm_map *map, struct vm_map_entry *entry)
1923 {
1924 	uvm_unmap_kill_entry_withlock(map, entry, 0);
1925 }
1926 
1927 /*
1928  * Remove all entries from start to end.
1929  *
1930  * If remove_holes, then remove ET_HOLE entries as well.
1931  * If markfree, entry will be properly marked free, otherwise, no replacement
1932  * entry will be put in the tree (corrupting the tree).
1933  */
1934 int
uvm_unmap_remove(struct vm_map * map,vaddr_t start,vaddr_t end,struct uvm_map_deadq * dead,boolean_t remove_holes,boolean_t markfree,boolean_t checkimmutable)1935 uvm_unmap_remove(struct vm_map *map, vaddr_t start, vaddr_t end,
1936     struct uvm_map_deadq *dead, boolean_t remove_holes,
1937     boolean_t markfree, boolean_t checkimmutable)
1938 {
1939 	struct vm_map_entry *prev_hint, *next, *entry;
1940 
1941 	start = MAX(start, map->min_offset);
1942 	end = MIN(end, map->max_offset);
1943 	if (start >= end)
1944 		return 0;
1945 
1946 	vm_map_assert_wrlock(map);
1947 
1948 	/* Find first affected entry. */
1949 	entry = uvm_map_entrybyaddr(&map->addr, start);
1950 	KDASSERT(entry != NULL && entry->start <= start);
1951 
1952 	if (checkimmutable) {
1953 		struct vm_map_entry *entry1 = entry;
1954 
1955 		/* Refuse to unmap if any entries are immutable */
1956 		if (entry1->end <= start)
1957 			entry1 = RBT_NEXT(uvm_map_addr, entry1);
1958 		for (; entry1 != NULL && entry1->start < end; entry1 = next) {
1959 			KDASSERT(entry1->start >= start);
1960 			next = RBT_NEXT(uvm_map_addr, entry1);
1961 			/* Treat memory holes as free space. */
1962 			if (entry1->start == entry1->end || UVM_ET_ISHOLE(entry1))
1963 				continue;
1964 			if (entry1->etype & UVM_ET_IMMUTABLE)
1965 				return EPERM;
1966 		}
1967 	}
1968 
1969 	if (entry->end <= start && markfree)
1970 		entry = RBT_NEXT(uvm_map_addr, entry);
1971 	else
1972 		UVM_MAP_CLIP_START(map, entry, start);
1973 
1974 	/*
1975 	 * Iterate entries until we reach end address.
1976 	 * prev_hint hints where the freed space can be appended to.
1977 	 */
1978 	prev_hint = NULL;
1979 	for (; entry != NULL && entry->start < end; entry = next) {
1980 		KDASSERT(entry->start >= start);
1981 		if (entry->end > end || !markfree)
1982 			UVM_MAP_CLIP_END(map, entry, end);
1983 		KDASSERT(entry->start >= start && entry->end <= end);
1984 		next = RBT_NEXT(uvm_map_addr, entry);
1985 
1986 		/* Don't remove holes unless asked to do so. */
1987 		if (UVM_ET_ISHOLE(entry)) {
1988 			if (!remove_holes) {
1989 				prev_hint = entry;
1990 				continue;
1991 			}
1992 		}
1993 
1994 		/* A stack has been removed.. */
1995 		if (UVM_ET_ISSTACK(entry) && (map->flags & VM_MAP_ISVMSPACE))
1996 			map->sserial++;
1997 
1998 		/* Kill entry. */
1999 		uvm_unmap_kill_entry_withlock(map, entry, 1);
2000 
2001 		/* Update space usage. */
2002 		if ((map->flags & VM_MAP_ISVMSPACE) &&
2003 		    entry->object.uvm_obj == NULL &&
2004 		    entry->protection != PROT_NONE &&
2005 		    !UVM_ET_ISHOLE(entry)) {
2006 			((struct vmspace *)map)->vm_dused -=
2007 			    uvmspace_dused(map, entry->start, entry->end);
2008 		}
2009 		if (!UVM_ET_ISHOLE(entry))
2010 			map->size -= entry->end - entry->start;
2011 
2012 		/* Actual removal of entry. */
2013 		uvm_mapent_mkfree(map, entry, &prev_hint, dead, markfree);
2014 	}
2015 
2016 	pmap_update(vm_map_pmap(map));
2017 
2018 #ifdef VMMAP_DEBUG
2019 	if (markfree) {
2020 		for (entry = uvm_map_entrybyaddr(&map->addr, start);
2021 		    entry != NULL && entry->start < end;
2022 		    entry = RBT_NEXT(uvm_map_addr, entry)) {
2023 			KDASSERT(entry->end <= start ||
2024 			    entry->start == entry->end ||
2025 			    UVM_ET_ISHOLE(entry));
2026 		}
2027 	} else {
2028 		vaddr_t a;
2029 		for (a = start; a < end; a += PAGE_SIZE)
2030 			KDASSERT(uvm_map_entrybyaddr(&map->addr, a) == NULL);
2031 	}
2032 #endif
2033 	return 0;
2034 }
2035 
2036 /*
2037  * Mark all entries from first until end (exclusive) as pageable.
2038  *
2039  * Lock must be exclusive on entry and will not be touched.
2040  */
2041 void
uvm_map_pageable_pgon(struct vm_map * map,struct vm_map_entry * first,struct vm_map_entry * end,vaddr_t start_addr,vaddr_t end_addr)2042 uvm_map_pageable_pgon(struct vm_map *map, struct vm_map_entry *first,
2043     struct vm_map_entry *end, vaddr_t start_addr, vaddr_t end_addr)
2044 {
2045 	struct vm_map_entry *iter;
2046 
2047 	for (iter = first; iter != end;
2048 	    iter = RBT_NEXT(uvm_map_addr, iter)) {
2049 		KDASSERT(iter->start >= start_addr && iter->end <= end_addr);
2050 		if (!VM_MAPENT_ISWIRED(iter) || UVM_ET_ISHOLE(iter))
2051 			continue;
2052 
2053 		iter->wired_count = 0;
2054 		uvm_fault_unwire_locked(map, iter->start, iter->end);
2055 	}
2056 }
2057 
2058 /*
2059  * Mark all entries from first until end (exclusive) as wired.
2060  *
2061  * Lockflags determines the lock state on return from this function.
2062  * Lock must be exclusive on entry.
2063  */
2064 int
uvm_map_pageable_wire(struct vm_map * map,struct vm_map_entry * first,struct vm_map_entry * end,vaddr_t start_addr,vaddr_t end_addr,int lockflags)2065 uvm_map_pageable_wire(struct vm_map *map, struct vm_map_entry *first,
2066     struct vm_map_entry *end, vaddr_t start_addr, vaddr_t end_addr,
2067     int lockflags)
2068 {
2069 	struct vm_map_entry *iter;
2070 #ifdef DIAGNOSTIC
2071 	unsigned int timestamp_save;
2072 #endif
2073 	int error;
2074 
2075 	/*
2076 	 * Wire pages in two passes:
2077 	 *
2078 	 * 1: holding the write lock, we create any anonymous maps that need
2079 	 *    to be created.  then we clip each map entry to the region to
2080 	 *    be wired and increment its wiring count.
2081 	 *
2082 	 * 2: we mark the map busy, unlock it and call uvm_fault_wire to fault
2083 	 *    in the pages for any newly wired area (wired_count == 1).
2084 	 */
2085 	for (iter = first; iter != end;
2086 	    iter = RBT_NEXT(uvm_map_addr, iter)) {
2087 		KDASSERT(iter->start >= start_addr && iter->end <= end_addr);
2088 		if (UVM_ET_ISHOLE(iter) || iter->start == iter->end ||
2089 		    iter->protection == PROT_NONE)
2090 			continue;
2091 
2092 		/*
2093 		 * Perform actions of vm_map_lookup that need the write lock.
2094 		 * - create an anonymous map for copy-on-write
2095 		 * - anonymous map for zero-fill
2096 		 * Skip submaps.
2097 		 */
2098 		if (!VM_MAPENT_ISWIRED(iter) && !UVM_ET_ISSUBMAP(iter) &&
2099 		    UVM_ET_ISNEEDSCOPY(iter) &&
2100 		    ((iter->protection & PROT_WRITE) ||
2101 		    iter->object.uvm_obj == NULL)) {
2102 			amap_copy(map, iter, M_WAITOK,
2103 			    UVM_ET_ISSTACK(iter) ? FALSE : TRUE,
2104 			    iter->start, iter->end);
2105 		}
2106 		iter->wired_count++;
2107 	}
2108 
2109 	/*
2110 	 * Pass 2.
2111 	 */
2112 #ifdef DIAGNOSTIC
2113 	timestamp_save = map->timestamp;
2114 #endif
2115 	vm_map_busy(map);
2116 	vm_map_unlock(map);
2117 
2118 	error = 0;
2119 	for (iter = first; error == 0 && iter != end;
2120 	    iter = RBT_NEXT(uvm_map_addr, iter)) {
2121 		if (UVM_ET_ISHOLE(iter) || iter->start == iter->end ||
2122 		    iter->protection == PROT_NONE)
2123 			continue;
2124 
2125 		error = uvm_fault_wire(map, iter->start, iter->end,
2126 		    iter->protection);
2127 	}
2128 
2129 	vm_map_lock(map);
2130 	vm_map_unbusy(map);
2131 
2132 	if (error) {
2133 #ifdef DIAGNOSTIC
2134 		if (timestamp_save != map->timestamp)
2135 			panic("uvm_map_pageable_wire: stale map");
2136 #endif
2137 
2138 		/*
2139 		 * first is no longer needed to restart loops.
2140 		 * Use it as iterator to unmap successful mappings.
2141 		 */
2142 		for (; first != iter;
2143 		    first = RBT_NEXT(uvm_map_addr, first)) {
2144 			if (UVM_ET_ISHOLE(first) ||
2145 			    first->start == first->end ||
2146 			    first->protection == PROT_NONE)
2147 				continue;
2148 
2149 			first->wired_count--;
2150 			if (!VM_MAPENT_ISWIRED(first)) {
2151 				uvm_fault_unwire_locked(map,
2152 				    first->start, first->end);
2153 			}
2154 		}
2155 
2156 		/* decrease counter in the rest of the entries */
2157 		for (; iter != end;
2158 		    iter = RBT_NEXT(uvm_map_addr, iter)) {
2159 			if (UVM_ET_ISHOLE(iter) || iter->start == iter->end ||
2160 			    iter->protection == PROT_NONE)
2161 				continue;
2162 
2163 			iter->wired_count--;
2164 		}
2165 
2166 		if ((lockflags & UVM_LK_EXIT) == 0)
2167 			vm_map_unlock(map);
2168 		return error;
2169 	}
2170 
2171 
2172 	if ((lockflags & UVM_LK_EXIT) == 0) {
2173 		vm_map_unlock(map);
2174 	} else {
2175 #ifdef DIAGNOSTIC
2176 		if (timestamp_save != map->timestamp)
2177 			panic("uvm_map_pageable_wire: stale map");
2178 #endif
2179 	}
2180 	return 0;
2181 }
2182 
2183 /*
2184  * uvm_map_pageable: set pageability of a range in a map.
2185  *
2186  * Flags:
2187  * UVM_LK_ENTER: map is already locked by caller
2188  * UVM_LK_EXIT:  don't unlock map on exit
2189  *
2190  * The full range must be in use (entries may not have fspace != 0).
2191  * UVM_ET_HOLE counts as unmapped.
2192  */
2193 int
uvm_map_pageable(struct vm_map * map,vaddr_t start,vaddr_t end,boolean_t new_pageable,int lockflags)2194 uvm_map_pageable(struct vm_map *map, vaddr_t start, vaddr_t end,
2195     boolean_t new_pageable, int lockflags)
2196 {
2197 	struct vm_map_entry *first, *last, *tmp;
2198 	int error;
2199 
2200 	start = trunc_page(start);
2201 	end = round_page(end);
2202 
2203 	if (start > end)
2204 		return EINVAL;
2205 	if (start == end)
2206 		return 0;	/* nothing to do */
2207 	if (start < map->min_offset)
2208 		return EFAULT; /* why? see first XXX below */
2209 	if (end > map->max_offset)
2210 		return EINVAL; /* why? see second XXX below */
2211 
2212 	KASSERT(map->flags & VM_MAP_PAGEABLE);
2213 	if ((lockflags & UVM_LK_ENTER) == 0)
2214 		vm_map_lock(map);
2215 
2216 	/*
2217 	 * Find first entry.
2218 	 *
2219 	 * Initial test on start is different, because of the different
2220 	 * error returned. Rest is tested further down.
2221 	 */
2222 	first = uvm_map_entrybyaddr(&map->addr, start);
2223 	if (first->end <= start || UVM_ET_ISHOLE(first)) {
2224 		/*
2225 		 * XXX if the first address is not mapped, it is EFAULT?
2226 		 */
2227 		error = EFAULT;
2228 		goto out;
2229 	}
2230 
2231 	/* Check that the range has no holes. */
2232 	for (last = first; last != NULL && last->start < end;
2233 	    last = RBT_NEXT(uvm_map_addr, last)) {
2234 		if (UVM_ET_ISHOLE(last) ||
2235 		    (last->end < end && VMMAP_FREE_END(last) != last->end)) {
2236 			/*
2237 			 * XXX unmapped memory in range, why is it EINVAL
2238 			 * instead of EFAULT?
2239 			 */
2240 			error = EINVAL;
2241 			goto out;
2242 		}
2243 	}
2244 
2245 	/*
2246 	 * Last ended at the first entry after the range.
2247 	 * Move back one step.
2248 	 *
2249 	 * Note that last may be NULL.
2250 	 */
2251 	if (last == NULL) {
2252 		last = RBT_MAX(uvm_map_addr, &map->addr);
2253 		if (last->end < end) {
2254 			error = EINVAL;
2255 			goto out;
2256 		}
2257 	} else {
2258 		KASSERT(last != first);
2259 		last = RBT_PREV(uvm_map_addr, last);
2260 	}
2261 
2262 	/* Wire/unwire pages here. */
2263 	if (new_pageable) {
2264 		/*
2265 		 * Mark pageable.
2266 		 * entries that are not wired are untouched.
2267 		 */
2268 		if (VM_MAPENT_ISWIRED(first))
2269 			UVM_MAP_CLIP_START(map, first, start);
2270 		/*
2271 		 * Split last at end.
2272 		 * Make tmp be the first entry after what is to be touched.
2273 		 * If last is not wired, don't touch it.
2274 		 */
2275 		if (VM_MAPENT_ISWIRED(last)) {
2276 			UVM_MAP_CLIP_END(map, last, end);
2277 			tmp = RBT_NEXT(uvm_map_addr, last);
2278 		} else
2279 			tmp = last;
2280 
2281 		uvm_map_pageable_pgon(map, first, tmp, start, end);
2282 		error = 0;
2283 
2284 out:
2285 		if ((lockflags & UVM_LK_EXIT) == 0)
2286 			vm_map_unlock(map);
2287 		return error;
2288 	} else {
2289 		/*
2290 		 * Mark entries wired.
2291 		 * entries are always touched (because recovery needs this).
2292 		 */
2293 		if (!VM_MAPENT_ISWIRED(first))
2294 			UVM_MAP_CLIP_START(map, first, start);
2295 		/*
2296 		 * Split last at end.
2297 		 * Make tmp be the first entry after what is to be touched.
2298 		 * If last is not wired, don't touch it.
2299 		 */
2300 		if (!VM_MAPENT_ISWIRED(last)) {
2301 			UVM_MAP_CLIP_END(map, last, end);
2302 			tmp = RBT_NEXT(uvm_map_addr, last);
2303 		} else
2304 			tmp = last;
2305 
2306 		return uvm_map_pageable_wire(map, first, tmp, start, end,
2307 		    lockflags);
2308 	}
2309 }
2310 
2311 /*
2312  * uvm_map_pageable_all: special case of uvm_map_pageable - affects
2313  * all mapped regions.
2314  *
2315  * Map must not be locked.
2316  * If no flags are specified, all regions are unwired.
2317  */
2318 int
uvm_map_pageable_all(struct vm_map * map,int flags,vsize_t limit)2319 uvm_map_pageable_all(struct vm_map *map, int flags, vsize_t limit)
2320 {
2321 	vsize_t size;
2322 	struct vm_map_entry *iter;
2323 
2324 	KASSERT(map->flags & VM_MAP_PAGEABLE);
2325 	vm_map_lock(map);
2326 
2327 	if (flags == 0) {
2328 		uvm_map_pageable_pgon(map, RBT_MIN(uvm_map_addr, &map->addr),
2329 		    NULL, map->min_offset, map->max_offset);
2330 
2331 		vm_map_modflags(map, 0, VM_MAP_WIREFUTURE);
2332 		vm_map_unlock(map);
2333 		return 0;
2334 	}
2335 
2336 	if (flags & MCL_FUTURE)
2337 		vm_map_modflags(map, VM_MAP_WIREFUTURE, 0);
2338 	if (!(flags & MCL_CURRENT)) {
2339 		vm_map_unlock(map);
2340 		return 0;
2341 	}
2342 
2343 	/*
2344 	 * Count number of pages in all non-wired entries.
2345 	 * If the number exceeds the limit, abort.
2346 	 */
2347 	size = 0;
2348 	RBT_FOREACH(iter, uvm_map_addr, &map->addr) {
2349 		if (VM_MAPENT_ISWIRED(iter) || UVM_ET_ISHOLE(iter))
2350 			continue;
2351 
2352 		size += iter->end - iter->start;
2353 	}
2354 
2355 	if (atop(size) + uvmexp.wired > uvmexp.wiredmax) {
2356 		vm_map_unlock(map);
2357 		return ENOMEM;
2358 	}
2359 
2360 	/* XXX non-pmap_wired_count case must be handled by caller */
2361 #ifdef pmap_wired_count
2362 	if (limit != 0 &&
2363 	    size + ptoa(pmap_wired_count(vm_map_pmap(map))) > limit) {
2364 		vm_map_unlock(map);
2365 		return ENOMEM;
2366 	}
2367 #endif
2368 
2369 	/*
2370 	 * uvm_map_pageable_wire will release lock
2371 	 */
2372 	return uvm_map_pageable_wire(map, RBT_MIN(uvm_map_addr, &map->addr),
2373 	    NULL, map->min_offset, map->max_offset, 0);
2374 }
2375 
2376 /*
2377  * Initialize map.
2378  *
2379  * Allocates sufficient entries to describe the free memory in the map.
2380  */
2381 void
uvm_map_setup(struct vm_map * map,pmap_t pmap,vaddr_t min,vaddr_t max,int flags)2382 uvm_map_setup(struct vm_map *map, pmap_t pmap, vaddr_t min, vaddr_t max,
2383     int flags)
2384 {
2385 	int i;
2386 
2387 	KASSERT((min & (vaddr_t)PAGE_MASK) == 0);
2388 	KASSERT((max & (vaddr_t)PAGE_MASK) == 0 ||
2389 	    (max & (vaddr_t)PAGE_MASK) == (vaddr_t)PAGE_MASK);
2390 
2391 	/*
2392 	 * Update parameters.
2393 	 *
2394 	 * This code handles (vaddr_t)-1 and other page mask ending addresses
2395 	 * properly.
2396 	 * We lose the top page if the full virtual address space is used.
2397 	 */
2398 	if (max & (vaddr_t)PAGE_MASK) {
2399 		max += 1;
2400 		if (max == 0) /* overflow */
2401 			max -= PAGE_SIZE;
2402 	}
2403 
2404 	RBT_INIT(uvm_map_addr, &map->addr);
2405 	map->uaddr_exe = NULL;
2406 	for (i = 0; i < nitems(map->uaddr_any); ++i)
2407 		map->uaddr_any[i] = NULL;
2408 	map->uaddr_brk_stack = NULL;
2409 
2410 	map->pmap = pmap;
2411 	map->size = 0;
2412 	map->ref_count = 0;
2413 	map->min_offset = min;
2414 	map->max_offset = max;
2415 	map->b_start = map->b_end = 0; /* Empty brk() area by default. */
2416 	map->s_start = map->s_end = 0; /* Empty stack area by default. */
2417 	map->flags = flags;
2418 	map->timestamp = 0;
2419 	map->busy = NULL;
2420 	if (flags & VM_MAP_ISVMSPACE)
2421 		rw_init_flags(&map->lock, "vmmaplk", RWL_DUPOK);
2422 	else
2423 		rw_init(&map->lock, "kmmaplk");
2424 	mtx_init(&map->mtx, IPL_VM);
2425 	mtx_init(&map->flags_lock, IPL_VM);
2426 
2427 	/* Configure the allocators. */
2428 	if (flags & VM_MAP_ISVMSPACE)
2429 		uvm_map_setup_md(map);
2430 	else
2431 		map->uaddr_any[3] = &uaddr_kbootstrap;
2432 
2433 	/*
2434 	 * Fill map entries.
2435 	 * We do not need to write-lock the map here because only the current
2436 	 * thread sees it right now. Initialize ref_count to 0 above to avoid
2437 	 * bogus triggering of lock-not-held assertions.
2438 	 */
2439 	uvm_map_setup_entries(map);
2440 	uvm_tree_sanity(map, __FILE__, __LINE__);
2441 	map->ref_count = 1;
2442 }
2443 
2444 /*
2445  * Destroy the map.
2446  *
2447  * This is the inverse operation to uvm_map_setup.
2448  */
2449 void
uvm_map_teardown(struct vm_map * map)2450 uvm_map_teardown(struct vm_map *map)
2451 {
2452 	struct uvm_map_deadq	 dead_entries;
2453 	struct vm_map_entry	*entry, *tmp;
2454 #ifdef VMMAP_DEBUG
2455 	size_t			 numq, numt;
2456 #endif
2457 	int			 i;
2458 
2459 	KASSERT((map->flags & VM_MAP_INTRSAFE) == 0);
2460 
2461 	vm_map_lock(map);
2462 
2463 	/* Remove address selectors. */
2464 	uvm_addr_destroy(map->uaddr_exe);
2465 	map->uaddr_exe = NULL;
2466 	for (i = 0; i < nitems(map->uaddr_any); i++) {
2467 		uvm_addr_destroy(map->uaddr_any[i]);
2468 		map->uaddr_any[i] = NULL;
2469 	}
2470 	uvm_addr_destroy(map->uaddr_brk_stack);
2471 	map->uaddr_brk_stack = NULL;
2472 
2473 	/*
2474 	 * Remove entries.
2475 	 *
2476 	 * The following is based on graph breadth-first search.
2477 	 *
2478 	 * In color terms:
2479 	 * - the dead_entries set contains all nodes that are reachable
2480 	 *   (i.e. both the black and the grey nodes)
2481 	 * - any entry not in dead_entries is white
2482 	 * - any entry that appears in dead_entries before entry,
2483 	 *   is black, the rest is grey.
2484 	 * The set [entry, end] is also referred to as the wavefront.
2485 	 *
2486 	 * Since the tree is always a fully connected graph, the breadth-first
2487 	 * search guarantees that each vmmap_entry is visited exactly once.
2488 	 * The vm_map is broken down in linear time.
2489 	 */
2490 	TAILQ_INIT(&dead_entries);
2491 	if ((entry = RBT_ROOT(uvm_map_addr, &map->addr)) != NULL)
2492 		DEAD_ENTRY_PUSH(&dead_entries, entry);
2493 	while (entry != NULL) {
2494 		sched_pause(yield);
2495 		uvm_unmap_kill_entry(map, entry);
2496 		if ((tmp = RBT_LEFT(uvm_map_addr, entry)) != NULL)
2497 			DEAD_ENTRY_PUSH(&dead_entries, tmp);
2498 		if ((tmp = RBT_RIGHT(uvm_map_addr, entry)) != NULL)
2499 			DEAD_ENTRY_PUSH(&dead_entries, tmp);
2500 		/* Update wave-front. */
2501 		entry = TAILQ_NEXT(entry, dfree.deadq);
2502 	}
2503 
2504 	vm_map_unlock(map);
2505 
2506 #ifdef VMMAP_DEBUG
2507 	numt = numq = 0;
2508 	RBT_FOREACH(entry, uvm_map_addr, &map->addr)
2509 		numt++;
2510 	TAILQ_FOREACH(entry, &dead_entries, dfree.deadq)
2511 		numq++;
2512 	KASSERT(numt == numq);
2513 #endif
2514 	uvm_unmap_detach(&dead_entries, 0);
2515 
2516 	pmap_destroy(map->pmap);
2517 	map->pmap = NULL;
2518 }
2519 
2520 /*
2521  * Populate map with free-memory entries.
2522  *
2523  * Map must be initialized and empty.
2524  */
2525 void
uvm_map_setup_entries(struct vm_map * map)2526 uvm_map_setup_entries(struct vm_map *map)
2527 {
2528 	KDASSERT(RBT_EMPTY(uvm_map_addr, &map->addr));
2529 
2530 	uvm_map_fix_space(map, NULL, map->min_offset, map->max_offset, 0);
2531 }
2532 
2533 /*
2534  * Split entry at given address.
2535  *
2536  * orig:  entry that is to be split.
2537  * next:  a newly allocated map entry that is not linked.
2538  * split: address at which the split is done.
2539  */
2540 void
uvm_map_splitentry(struct vm_map * map,struct vm_map_entry * orig,struct vm_map_entry * next,vaddr_t split)2541 uvm_map_splitentry(struct vm_map *map, struct vm_map_entry *orig,
2542     struct vm_map_entry *next, vaddr_t split)
2543 {
2544 	struct uvm_addr_state *free, *free_before;
2545 	vsize_t adj;
2546 
2547 	if ((split & PAGE_MASK) != 0) {
2548 		panic("uvm_map_splitentry: split address 0x%lx "
2549 		    "not on page boundary!", split);
2550 	}
2551 	KDASSERT(map != NULL && orig != NULL && next != NULL);
2552 	uvm_tree_sanity(map, __FILE__, __LINE__);
2553 	KASSERT(orig->start < split && VMMAP_FREE_END(orig) > split);
2554 
2555 #ifdef VMMAP_DEBUG
2556 	KDASSERT(RBT_FIND(uvm_map_addr, &map->addr, orig) == orig);
2557 	KDASSERT(RBT_FIND(uvm_map_addr, &map->addr, next) != next);
2558 #endif /* VMMAP_DEBUG */
2559 
2560 	/*
2561 	 * Free space will change, unlink from free space tree.
2562 	 */
2563 	free = uvm_map_uaddr_e(map, orig);
2564 	uvm_mapent_free_remove(map, free, orig);
2565 
2566 	adj = split - orig->start;
2567 
2568 	uvm_mapent_copy(orig, next);
2569 	if (split >= orig->end) {
2570 		next->etype = 0;
2571 		next->offset = 0;
2572 		next->wired_count = 0;
2573 		next->start = next->end = split;
2574 		next->guard = 0;
2575 		next->fspace = VMMAP_FREE_END(orig) - split;
2576 		next->aref.ar_amap = NULL;
2577 		next->aref.ar_pageoff = 0;
2578 		orig->guard = MIN(orig->guard, split - orig->end);
2579 		orig->fspace = split - VMMAP_FREE_START(orig);
2580 	} else {
2581 		orig->fspace = 0;
2582 		orig->guard = 0;
2583 		orig->end = next->start = split;
2584 
2585 		if (next->aref.ar_amap) {
2586 			amap_splitref(&orig->aref, &next->aref, adj);
2587 		}
2588 		if (UVM_ET_ISSUBMAP(orig)) {
2589 			uvm_map_reference(next->object.sub_map);
2590 			next->offset += adj;
2591 		} else if (UVM_ET_ISOBJ(orig)) {
2592 			if (next->object.uvm_obj->pgops &&
2593 			    next->object.uvm_obj->pgops->pgo_reference) {
2594 				KERNEL_LOCK();
2595 				next->object.uvm_obj->pgops->pgo_reference(
2596 				    next->object.uvm_obj);
2597 				KERNEL_UNLOCK();
2598 			}
2599 			next->offset += adj;
2600 		}
2601 	}
2602 
2603 	/*
2604 	 * Link next into address tree.
2605 	 * Link orig and next into free-space tree.
2606 	 *
2607 	 * Don't insert 'next' into the addr tree until orig has been linked,
2608 	 * in case the free-list looks at adjacent entries in the addr tree
2609 	 * for its decisions.
2610 	 */
2611 	if (orig->fspace > 0)
2612 		free_before = free;
2613 	else
2614 		free_before = uvm_map_uaddr_e(map, orig);
2615 	uvm_mapent_free_insert(map, free_before, orig);
2616 	uvm_mapent_addr_insert(map, next);
2617 	uvm_mapent_free_insert(map, free, next);
2618 
2619 	uvm_tree_sanity(map, __FILE__, __LINE__);
2620 }
2621 
2622 
2623 #ifdef VMMAP_DEBUG
2624 
2625 void
uvm_tree_assert(struct vm_map * map,int test,char * test_str,char * file,int line)2626 uvm_tree_assert(struct vm_map *map, int test, char *test_str,
2627     char *file, int line)
2628 {
2629 	char* map_special;
2630 
2631 	if (test)
2632 		return;
2633 
2634 	if (map == kernel_map)
2635 		map_special = " (kernel_map)";
2636 	else if (map == kmem_map)
2637 		map_special = " (kmem_map)";
2638 	else
2639 		map_special = "";
2640 	panic("uvm_tree_sanity %p%s (%s %d): %s", map, map_special, file,
2641 	    line, test_str);
2642 }
2643 
2644 /*
2645  * Check that map is sane.
2646  */
2647 void
uvm_tree_sanity(struct vm_map * map,char * file,int line)2648 uvm_tree_sanity(struct vm_map *map, char *file, int line)
2649 {
2650 	struct vm_map_entry	*iter;
2651 	vaddr_t			 addr;
2652 	vaddr_t			 min, max, bound; /* Bounds checker. */
2653 	struct uvm_addr_state	*free;
2654 
2655 	addr = vm_map_min(map);
2656 	RBT_FOREACH(iter, uvm_map_addr, &map->addr) {
2657 		/*
2658 		 * Valid start, end.
2659 		 * Catch overflow for end+fspace.
2660 		 */
2661 		UVM_ASSERT(map, iter->end >= iter->start, file, line);
2662 		UVM_ASSERT(map, VMMAP_FREE_END(iter) >= iter->end, file, line);
2663 
2664 		/* May not be empty. */
2665 		UVM_ASSERT(map, iter->start < VMMAP_FREE_END(iter),
2666 		    file, line);
2667 
2668 		/* Addresses for entry must lie within map boundaries. */
2669 		UVM_ASSERT(map, iter->start >= vm_map_min(map) &&
2670 		    VMMAP_FREE_END(iter) <= vm_map_max(map), file, line);
2671 
2672 		/* Tree may not have gaps. */
2673 		UVM_ASSERT(map, iter->start == addr, file, line);
2674 		addr = VMMAP_FREE_END(iter);
2675 
2676 		/*
2677 		 * Free space may not cross boundaries, unless the same
2678 		 * free list is used on both sides of the border.
2679 		 */
2680 		min = VMMAP_FREE_START(iter);
2681 		max = VMMAP_FREE_END(iter);
2682 
2683 		while (min < max &&
2684 		    (bound = uvm_map_boundary(map, min, max)) != max) {
2685 			UVM_ASSERT(map,
2686 			    uvm_map_uaddr(map, bound - 1) ==
2687 			    uvm_map_uaddr(map, bound),
2688 			    file, line);
2689 			min = bound;
2690 		}
2691 
2692 		free = uvm_map_uaddr_e(map, iter);
2693 		if (free) {
2694 			UVM_ASSERT(map, (iter->etype & UVM_ET_FREEMAPPED) != 0,
2695 			    file, line);
2696 		} else {
2697 			UVM_ASSERT(map, (iter->etype & UVM_ET_FREEMAPPED) == 0,
2698 			    file, line);
2699 		}
2700 	}
2701 	UVM_ASSERT(map, addr == vm_map_max(map), file, line);
2702 }
2703 
2704 void
uvm_tree_size_chk(struct vm_map * map,char * file,int line)2705 uvm_tree_size_chk(struct vm_map *map, char *file, int line)
2706 {
2707 	struct vm_map_entry *iter;
2708 	vsize_t size;
2709 
2710 	size = 0;
2711 	RBT_FOREACH(iter, uvm_map_addr, &map->addr) {
2712 		if (!UVM_ET_ISHOLE(iter))
2713 			size += iter->end - iter->start;
2714 	}
2715 
2716 	if (map->size != size)
2717 		printf("map size = 0x%lx, should be 0x%lx\n", map->size, size);
2718 	UVM_ASSERT(map, map->size == size, file, line);
2719 
2720 	vmspace_validate(map);
2721 }
2722 
2723 /*
2724  * This function validates the statistics on vmspace.
2725  */
2726 void
vmspace_validate(struct vm_map * map)2727 vmspace_validate(struct vm_map *map)
2728 {
2729 	struct vmspace *vm;
2730 	struct vm_map_entry *iter;
2731 	vaddr_t imin, imax;
2732 	vaddr_t stack_begin, stack_end; /* Position of stack. */
2733 	vsize_t stack, heap; /* Measured sizes. */
2734 
2735 	if (!(map->flags & VM_MAP_ISVMSPACE))
2736 		return;
2737 
2738 	vm = (struct vmspace *)map;
2739 	stack_begin = MIN((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr);
2740 	stack_end = MAX((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr);
2741 
2742 	stack = heap = 0;
2743 	RBT_FOREACH(iter, uvm_map_addr, &map->addr) {
2744 		imin = imax = iter->start;
2745 
2746 		if (UVM_ET_ISHOLE(iter) || iter->object.uvm_obj != NULL ||
2747 		    iter->protection != PROT_NONE)
2748 			continue;
2749 
2750 		/*
2751 		 * Update stack, heap.
2752 		 * Keep in mind that (theoretically) the entries of
2753 		 * userspace and stack may be joined.
2754 		 */
2755 		while (imin != iter->end) {
2756 			/*
2757 			 * Set imax to the first boundary crossed between
2758 			 * imin and stack addresses.
2759 			 */
2760 			imax = iter->end;
2761 			if (imin < stack_begin && imax > stack_begin)
2762 				imax = stack_begin;
2763 			else if (imin < stack_end && imax > stack_end)
2764 				imax = stack_end;
2765 
2766 			if (imin >= stack_begin && imin < stack_end)
2767 				stack += imax - imin;
2768 			else
2769 				heap += imax - imin;
2770 			imin = imax;
2771 		}
2772 	}
2773 
2774 	heap >>= PAGE_SHIFT;
2775 	if (heap != vm->vm_dused) {
2776 		printf("vmspace stack range: 0x%lx-0x%lx\n",
2777 		    stack_begin, stack_end);
2778 		panic("vmspace_validate: vmspace.vm_dused invalid, "
2779 		    "expected %ld pgs, got %d pgs in map %p",
2780 		    heap, vm->vm_dused,
2781 		    map);
2782 	}
2783 }
2784 
2785 #endif /* VMMAP_DEBUG */
2786 
2787 /*
2788  * uvm_map_init: init mapping system at boot time.   note that we allocate
2789  * and init the static pool of structs vm_map_entry for the kernel here.
2790  */
2791 void
uvm_map_init(void)2792 uvm_map_init(void)
2793 {
2794 	static struct vm_map_entry kernel_map_entry[MAX_KMAPENT];
2795 	int lcv;
2796 
2797 	/* now set up static pool of kernel map entries ... */
2798 	mtx_init(&uvm_kmapent_mtx, IPL_VM);
2799 	SLIST_INIT(&uvm.kentry_free);
2800 	for (lcv = 0 ; lcv < MAX_KMAPENT ; lcv++) {
2801 		SLIST_INSERT_HEAD(&uvm.kentry_free,
2802 		    &kernel_map_entry[lcv], daddrs.addr_kentry);
2803 	}
2804 
2805 	/* initialize the map-related pools. */
2806 	pool_init(&uvm_vmspace_pool, sizeof(struct vmspace), 0,
2807 	    IPL_NONE, PR_WAITOK, "vmsppl", NULL);
2808 	pool_init(&uvm_map_entry_pool, sizeof(struct vm_map_entry), 0,
2809 	    IPL_VM, PR_WAITOK, "vmmpepl", NULL);
2810 	pool_init(&uvm_map_entry_kmem_pool, sizeof(struct vm_map_entry), 0,
2811 	    IPL_VM, 0, "vmmpekpl", NULL);
2812 	pool_sethiwat(&uvm_map_entry_pool, 8192);
2813 
2814 	uvm_addr_init();
2815 }
2816 
2817 #if defined(DDB)
2818 
2819 /*
2820  * DDB hooks
2821  */
2822 
2823 /*
2824  * uvm_map_printit: actually prints the map
2825  */
2826 void
uvm_map_printit(struct vm_map * map,boolean_t full,int (* pr)(const char *,...))2827 uvm_map_printit(struct vm_map *map, boolean_t full,
2828     int (*pr)(const char *, ...))
2829 {
2830 	struct vmspace			*vm;
2831 	struct vm_map_entry		*entry;
2832 	struct uvm_addr_state		*free;
2833 	int				 in_free, i;
2834 	char				 buf[8];
2835 
2836 	(*pr)("MAP %p: [0x%lx->0x%lx]\n", map, map->min_offset,map->max_offset);
2837 	(*pr)("\tbrk() allocate range: 0x%lx-0x%lx\n",
2838 	    map->b_start, map->b_end);
2839 	(*pr)("\tstack allocate range: 0x%lx-0x%lx\n",
2840 	    map->s_start, map->s_end);
2841 	(*pr)("\tsz=%u, ref=%d, version=%u, flags=0x%x\n",
2842 	    map->size, map->ref_count, map->timestamp,
2843 	    map->flags);
2844 	(*pr)("\tpmap=%p(resident=%d)\n", map->pmap,
2845 	    pmap_resident_count(map->pmap));
2846 
2847 	/* struct vmspace handling. */
2848 	if (map->flags & VM_MAP_ISVMSPACE) {
2849 		vm = (struct vmspace *)map;
2850 
2851 		(*pr)("\tvm_refcnt=%d vm_shm=%p vm_rssize=%u vm_swrss=%u\n",
2852 		    vm->vm_refcnt, vm->vm_shm, vm->vm_rssize, vm->vm_swrss);
2853 		(*pr)("\tvm_tsize=%u vm_dsize=%u\n",
2854 		    vm->vm_tsize, vm->vm_dsize);
2855 		(*pr)("\tvm_taddr=%p vm_daddr=%p\n",
2856 		    vm->vm_taddr, vm->vm_daddr);
2857 		(*pr)("\tvm_maxsaddr=%p vm_minsaddr=%p\n",
2858 		    vm->vm_maxsaddr, vm->vm_minsaddr);
2859 	}
2860 
2861 	if (!full)
2862 		goto print_uaddr;
2863 	RBT_FOREACH(entry, uvm_map_addr, &map->addr) {
2864 		(*pr)(" - %p: 0x%lx->0x%lx: obj=%p/0x%llx, amap=%p/%d\n",
2865 		    entry, entry->start, entry->end, entry->object.uvm_obj,
2866 		    (long long)entry->offset, entry->aref.ar_amap,
2867 		    entry->aref.ar_pageoff);
2868 		(*pr)("\tsubmap=%c, cow=%c, nc=%c, stack=%c, "
2869 		    "prot(max)=%d/%d, inh=%d, "
2870 		    "wc=%d, adv=%d\n",
2871 		    (entry->etype & UVM_ET_SUBMAP) ? 'T' : 'F',
2872 		    (entry->etype & UVM_ET_COPYONWRITE) ? 'T' : 'F',
2873 		    (entry->etype & UVM_ET_NEEDSCOPY) ? 'T' : 'F',
2874 		    (entry->etype & UVM_ET_STACK) ? 'T' : 'F',
2875 		    entry->protection, entry->max_protection,
2876 		    entry->inheritance, entry->wired_count, entry->advice);
2877 
2878 		free = uvm_map_uaddr_e(map, entry);
2879 		in_free = (free != NULL);
2880 		(*pr)("\thole=%c, free=%c, guard=0x%lx, "
2881 		    "free=0x%lx-0x%lx\n",
2882 		    (entry->etype & UVM_ET_HOLE) ? 'T' : 'F',
2883 		    in_free ? 'T' : 'F',
2884 		    entry->guard,
2885 		    VMMAP_FREE_START(entry), VMMAP_FREE_END(entry));
2886 		(*pr)("\tfspace_augment=%lu\n", entry->fspace_augment);
2887 		(*pr)("\tfreemapped=%c, uaddr=%p\n",
2888 		    (entry->etype & UVM_ET_FREEMAPPED) ? 'T' : 'F', free);
2889 		if (free) {
2890 			(*pr)("\t\t(0x%lx-0x%lx %s)\n",
2891 			    free->uaddr_minaddr, free->uaddr_maxaddr,
2892 			    free->uaddr_functions->uaddr_name);
2893 		}
2894 	}
2895 
2896 print_uaddr:
2897 	uvm_addr_print(map->uaddr_exe, "exe", full, pr);
2898 	for (i = 0; i < nitems(map->uaddr_any); i++) {
2899 		snprintf(&buf[0], sizeof(buf), "any[%d]", i);
2900 		uvm_addr_print(map->uaddr_any[i], &buf[0], full, pr);
2901 	}
2902 	uvm_addr_print(map->uaddr_brk_stack, "brk/stack", full, pr);
2903 }
2904 
2905 /*
2906  * uvm_object_printit: actually prints the object
2907  */
2908 void
uvm_object_printit(struct uvm_object * uobj,boolean_t full,int (* pr)(const char *,...))2909 uvm_object_printit(struct uvm_object *uobj, boolean_t full,
2910     int (*pr)(const char *, ...))
2911 {
2912 	struct vm_page *pg;
2913 	int cnt = 0;
2914 
2915 	(*pr)("OBJECT %p: pgops=%p, npages=%d, ",
2916 	    uobj, uobj->pgops, uobj->uo_npages);
2917 	if (UVM_OBJ_IS_KERN_OBJECT(uobj))
2918 		(*pr)("refs=<SYSTEM>\n");
2919 	else
2920 		(*pr)("refs=%d\n", uobj->uo_refs);
2921 
2922 	if (!full) {
2923 		return;
2924 	}
2925 	(*pr)("  PAGES <pg,offset>:\n  ");
2926 	RBT_FOREACH(pg, uvm_objtree, &uobj->memt) {
2927 		(*pr)("<%p,0x%llx> ", pg, (long long)pg->offset);
2928 		if ((cnt % 3) == 2) {
2929 			(*pr)("\n  ");
2930 		}
2931 		cnt++;
2932 	}
2933 	if ((cnt % 3) != 2) {
2934 		(*pr)("\n");
2935 	}
2936 }
2937 
2938 /*
2939  * uvm_page_printit: actually print the page
2940  */
2941 static const char page_flagbits[] =
2942 	"\20\1BUSY\2WANTED\3TABLED\4CLEAN\5CLEANCHK\6RELEASED\7FAKE\10RDONLY"
2943 	"\11ZERO\12DEV\15PAGER1\21FREE\22INACTIVE\23ACTIVE\25ANON\26AOBJ"
2944 	"\27ENCRYPT\31PMAP0\32PMAP1\33PMAP2\34PMAP3\35PMAP4\36PMAP5";
2945 
2946 void
uvm_page_printit(struct vm_page * pg,boolean_t full,int (* pr)(const char *,...))2947 uvm_page_printit(struct vm_page *pg, boolean_t full,
2948     int (*pr)(const char *, ...))
2949 {
2950 	struct vm_page *tpg;
2951 	struct uvm_object *uobj;
2952 	struct pglist *pgl;
2953 
2954 	(*pr)("PAGE %p:\n", pg);
2955 	(*pr)("  flags=%b, vers=%d, wire_count=%d, pa=0x%llx\n",
2956 	    pg->pg_flags, page_flagbits, pg->pg_version, pg->wire_count,
2957 	    (long long)pg->phys_addr);
2958 	(*pr)("  uobject=%p, uanon=%p, offset=0x%llx\n",
2959 	    pg->uobject, pg->uanon, (long long)pg->offset);
2960 #if defined(UVM_PAGE_TRKOWN)
2961 	if (pg->pg_flags & PG_BUSY)
2962 		(*pr)("  owning thread = %d, tag=%s",
2963 		    pg->owner, pg->owner_tag);
2964 	else
2965 		(*pr)("  page not busy, no owner");
2966 #else
2967 	(*pr)("  [page ownership tracking disabled]");
2968 #endif
2969 	(*pr)("\tvm_page_md %p\n", &pg->mdpage);
2970 
2971 	if (!full)
2972 		return;
2973 
2974 	/* cross-verify object/anon */
2975 	if ((pg->pg_flags & PQ_FREE) == 0) {
2976 		if (pg->pg_flags & PQ_ANON) {
2977 			if (pg->uanon == NULL || pg->uanon->an_page != pg)
2978 			    (*pr)("  >>> ANON DOES NOT POINT HERE <<< (%p)\n",
2979 				(pg->uanon) ? pg->uanon->an_page : NULL);
2980 			else
2981 				(*pr)("  anon backpointer is OK\n");
2982 		} else {
2983 			uobj = pg->uobject;
2984 			if (uobj) {
2985 				(*pr)("  checking object list\n");
2986 				RBT_FOREACH(tpg, uvm_objtree, &uobj->memt) {
2987 					if (tpg == pg) {
2988 						break;
2989 					}
2990 				}
2991 				if (tpg)
2992 					(*pr)("  page found on object list\n");
2993 				else
2994 					(*pr)("  >>> PAGE NOT FOUND "
2995 					    "ON OBJECT LIST! <<<\n");
2996 			}
2997 		}
2998 	}
2999 
3000 	/* cross-verify page queue */
3001 	if (pg->pg_flags & PQ_FREE) {
3002 		if (uvm_pmr_isfree(pg))
3003 			(*pr)("  page found in uvm_pmemrange\n");
3004 		else
3005 			(*pr)("  >>> page not found in uvm_pmemrange <<<\n");
3006 		pgl = NULL;
3007 	} else if (pg->pg_flags & PQ_INACTIVE) {
3008 		pgl = &uvm.page_inactive;
3009 	} else if (pg->pg_flags & PQ_ACTIVE) {
3010 		pgl = &uvm.page_active;
3011  	} else {
3012 		pgl = NULL;
3013 	}
3014 
3015 	if (pgl) {
3016 		(*pr)("  checking pageq list\n");
3017 		TAILQ_FOREACH(tpg, pgl, pageq) {
3018 			if (tpg == pg) {
3019 				break;
3020 			}
3021 		}
3022 		if (tpg)
3023 			(*pr)("  page found on pageq list\n");
3024 		else
3025 			(*pr)("  >>> PAGE NOT FOUND ON PAGEQ LIST! <<<\n");
3026 	}
3027 }
3028 #endif
3029 
3030 /*
3031  * uvm_map_protect: change map protection
3032  *
3033  * => set_max means set max_protection.
3034  * => map must be unlocked.
3035  */
3036 int
uvm_map_protect(struct vm_map * map,vaddr_t start,vaddr_t end,vm_prot_t new_prot,int etype,boolean_t set_max,boolean_t checkimmutable)3037 uvm_map_protect(struct vm_map *map, vaddr_t start, vaddr_t end,
3038     vm_prot_t new_prot, int etype, boolean_t set_max, boolean_t checkimmutable)
3039 {
3040 	struct vm_map_entry *first, *iter;
3041 	vm_prot_t old_prot;
3042 	vm_prot_t mask;
3043 	vsize_t dused;
3044 	int error;
3045 
3046 	KASSERT((etype & ~UVM_ET_STACK) == 0);	/* only UVM_ET_STACK allowed */
3047 
3048 	if (start > end)
3049 		return EINVAL;
3050 	start = MAX(start, map->min_offset);
3051 	end = MIN(end, map->max_offset);
3052 	if (start >= end)
3053 		return 0;
3054 
3055 	dused = 0;
3056 	error = 0;
3057 	vm_map_lock(map);
3058 
3059 	/*
3060 	 * Set up first and last.
3061 	 * - first will contain first entry at or after start.
3062 	 */
3063 	first = uvm_map_entrybyaddr(&map->addr, start);
3064 	KDASSERT(first != NULL);
3065 	if (first->end <= start)
3066 		first = RBT_NEXT(uvm_map_addr, first);
3067 
3068 	/* First, check for protection violations. */
3069 	for (iter = first; iter != NULL && iter->start < end;
3070 	    iter = RBT_NEXT(uvm_map_addr, iter)) {
3071 		/* Treat memory holes as free space. */
3072 		if (iter->start == iter->end || UVM_ET_ISHOLE(iter))
3073 			continue;
3074 
3075 		if (checkimmutable && (iter->etype & UVM_ET_IMMUTABLE)) {
3076 			error = EPERM;
3077 			goto out;
3078 		}
3079 		old_prot = iter->protection;
3080 		if (old_prot == PROT_NONE && new_prot != old_prot) {
3081 			dused += uvmspace_dused(
3082 			    map, MAX(start, iter->start), MIN(end, iter->end));
3083 		}
3084 
3085 		if (UVM_ET_ISSUBMAP(iter)) {
3086 			error = EINVAL;
3087 			goto out;
3088 		}
3089 		if ((new_prot & iter->max_protection) != new_prot) {
3090 			error = EACCES;
3091 			goto out;
3092 		}
3093 		if (map == kernel_map &&
3094 		    (new_prot & (PROT_WRITE | PROT_EXEC)) == (PROT_WRITE | PROT_EXEC))
3095 			panic("uvm_map_protect: kernel map W^X violation requested");
3096 	}
3097 
3098 	/* Check limits. */
3099 	if (dused > 0 && (map->flags & VM_MAP_ISVMSPACE)) {
3100 		vsize_t limit = lim_cur(RLIMIT_DATA);
3101 		dused = ptoa(dused);
3102 		if (limit < dused ||
3103 		    limit - dused < ptoa(((struct vmspace *)map)->vm_dused)) {
3104 			error = ENOMEM;
3105 			goto out;
3106 		}
3107 	}
3108 
3109 	/* only apply UVM_ET_STACK on a mapping changing to RW */
3110 	if (etype && new_prot != (PROT_READ|PROT_WRITE))
3111 		etype = 0;
3112 
3113 	/* Fix protections.  */
3114 	for (iter = first; iter != NULL && iter->start < end;
3115 	    iter = RBT_NEXT(uvm_map_addr, iter)) {
3116 		/* Treat memory holes as free space. */
3117 		if (iter->start == iter->end || UVM_ET_ISHOLE(iter))
3118 			continue;
3119 
3120 		old_prot = iter->protection;
3121 
3122 		/*
3123 		 * Skip adapting protection iff old and new protection
3124 		 * are equal.
3125 		 */
3126 		if (set_max) {
3127 			if (old_prot == (new_prot & old_prot) &&
3128 			    iter->max_protection == new_prot)
3129 				continue;
3130 		} else {
3131 			if (old_prot == new_prot)
3132 				continue;
3133 		}
3134 
3135 		UVM_MAP_CLIP_START(map, iter, start);
3136 		UVM_MAP_CLIP_END(map, iter, end);
3137 
3138 		if (set_max) {
3139 			iter->max_protection = new_prot;
3140 			iter->protection &= new_prot;
3141 		} else
3142 			iter->protection = new_prot;
3143 		iter->etype |= etype;	/* potentially add UVM_ET_STACK */
3144 
3145 		/*
3146 		 * update physical map if necessary.  worry about copy-on-write
3147 		 * here -- CHECK THIS XXX
3148 		 */
3149 		if (iter->protection != old_prot) {
3150 			mask = UVM_ET_ISCOPYONWRITE(iter) ?
3151 			    ~PROT_WRITE : PROT_MASK;
3152 
3153 			if (map->flags & VM_MAP_ISVMSPACE) {
3154 				if (old_prot == PROT_NONE) {
3155 					((struct vmspace *)map)->vm_dused +=
3156 					    uvmspace_dused(map, iter->start,
3157 					        iter->end);
3158 				}
3159 				if (iter->protection == PROT_NONE) {
3160 					((struct vmspace *)map)->vm_dused -=
3161 					    uvmspace_dused(map, iter->start,
3162 					        iter->end);
3163 				}
3164 			}
3165 
3166 			/* update pmap */
3167 			if ((iter->protection & mask) == PROT_NONE &&
3168 			    VM_MAPENT_ISWIRED(iter)) {
3169 				/*
3170 				 * TODO(ariane) this is stupid. wired_count
3171 				 * is 0 if not wired, otherwise anything
3172 				 * larger than 0 (incremented once each time
3173 				 * wire is called).
3174 				 * Mostly to be able to undo the damage on
3175 				 * failure. Not the actually be a wired
3176 				 * refcounter...
3177 				 * Originally: iter->wired_count--;
3178 				 * (don't we have to unwire this in the pmap
3179 				 * as well?)
3180 				 */
3181 				iter->wired_count = 0;
3182 			}
3183 			uvm_map_lock_entry(iter);
3184 			pmap_protect(map->pmap, iter->start, iter->end,
3185 			    iter->protection & mask);
3186 			uvm_map_unlock_entry(iter);
3187 		}
3188 
3189 		/*
3190 		 * If the map is configured to lock any future mappings,
3191 		 * wire this entry now if the old protection was PROT_NONE
3192 		 * and the new protection is not PROT_NONE.
3193 		 */
3194 		if ((map->flags & VM_MAP_WIREFUTURE) != 0 &&
3195 		    VM_MAPENT_ISWIRED(iter) == 0 &&
3196 		    old_prot == PROT_NONE &&
3197 		    new_prot != PROT_NONE) {
3198 			if (uvm_map_pageable(map, iter->start, iter->end,
3199 			    FALSE, UVM_LK_ENTER | UVM_LK_EXIT) != 0) {
3200 				/*
3201 				 * If locking the entry fails, remember the
3202 				 * error if it's the first one.  Note we
3203 				 * still continue setting the protection in
3204 				 * the map, but it will return the resource
3205 				 * storage condition regardless.
3206 				 *
3207 				 * XXX Ignore what the actual error is,
3208 				 * XXX just call it a resource shortage
3209 				 * XXX so that it doesn't get confused
3210 				 * XXX what uvm_map_protect() itself would
3211 				 * XXX normally return.
3212 				 */
3213 				error = ENOMEM;
3214 			}
3215 		}
3216 	}
3217 	pmap_update(map->pmap);
3218 
3219 out:
3220 	if (etype & UVM_ET_STACK)
3221 		map->sserial++;
3222 	vm_map_unlock(map);
3223 	return error;
3224 }
3225 
3226 /*
3227  * uvmspace_alloc: allocate a vmspace structure.
3228  *
3229  * - structure includes vm_map and pmap
3230  * - XXX: no locking on this structure
3231  * - refcnt set to 1, rest must be init'd by caller
3232  */
3233 struct vmspace *
uvmspace_alloc(vaddr_t min,vaddr_t max,boolean_t pageable,boolean_t remove_holes)3234 uvmspace_alloc(vaddr_t min, vaddr_t max, boolean_t pageable,
3235     boolean_t remove_holes)
3236 {
3237 	struct vmspace *vm;
3238 
3239 	vm = pool_get(&uvm_vmspace_pool, PR_WAITOK | PR_ZERO);
3240 	uvmspace_init(vm, NULL, min, max, pageable, remove_holes);
3241 	return (vm);
3242 }
3243 
3244 /*
3245  * uvmspace_init: initialize a vmspace structure.
3246  *
3247  * - XXX: no locking on this structure
3248  * - refcnt set to 1, rest must be init'd by caller
3249  */
3250 void
uvmspace_init(struct vmspace * vm,struct pmap * pmap,vaddr_t min,vaddr_t max,boolean_t pageable,boolean_t remove_holes)3251 uvmspace_init(struct vmspace *vm, struct pmap *pmap, vaddr_t min, vaddr_t max,
3252     boolean_t pageable, boolean_t remove_holes)
3253 {
3254 	KASSERT(pmap == NULL || pmap == pmap_kernel());
3255 
3256 	if (pmap)
3257 		pmap_reference(pmap);
3258 	else
3259 		pmap = pmap_create();
3260 
3261 	uvm_map_setup(&vm->vm_map, pmap, min, max,
3262 	    (pageable ? VM_MAP_PAGEABLE : 0) | VM_MAP_ISVMSPACE);
3263 
3264 	vm->vm_refcnt = 1;
3265 
3266 	if (remove_holes)
3267 		pmap_remove_holes(vm);
3268 }
3269 
3270 /*
3271  * uvmspace_share: share a vmspace between two processes
3272  *
3273  * - used for vfork
3274  */
3275 
3276 struct vmspace *
uvmspace_share(struct process * pr)3277 uvmspace_share(struct process *pr)
3278 {
3279 	struct vmspace *vm = pr->ps_vmspace;
3280 
3281 	uvmspace_addref(vm);
3282 	return vm;
3283 }
3284 
3285 /*
3286  * uvmspace_exec: the process wants to exec a new program
3287  *
3288  * - XXX: no locking on vmspace
3289  */
3290 
3291 void
uvmspace_exec(struct proc * p,vaddr_t start,vaddr_t end)3292 uvmspace_exec(struct proc *p, vaddr_t start, vaddr_t end)
3293 {
3294 	struct process *pr = p->p_p;
3295 	struct vmspace *nvm, *ovm = pr->ps_vmspace;
3296 	struct vm_map *map = &ovm->vm_map;
3297 	struct uvm_map_deadq dead_entries;
3298 
3299 	KASSERT((start & (vaddr_t)PAGE_MASK) == 0);
3300 	KASSERT((end & (vaddr_t)PAGE_MASK) == 0 ||
3301 	    (end & (vaddr_t)PAGE_MASK) == (vaddr_t)PAGE_MASK);
3302 
3303 	pmap_unuse_final(p);   /* before stack addresses go away */
3304 	TAILQ_INIT(&dead_entries);
3305 
3306 	/* see if more than one process is using this vmspace...  */
3307 	if (ovm->vm_refcnt == 1) {
3308 		/*
3309 		 * If pr is the only process using its vmspace then
3310 		 * we can safely recycle that vmspace for the program
3311 		 * that is being exec'd.
3312 		 */
3313 
3314 #ifdef SYSVSHM
3315 		/*
3316 		 * SYSV SHM semantics require us to kill all segments on an exec
3317 		 */
3318 		if (ovm->vm_shm)
3319 			shmexit(ovm);
3320 #endif
3321 
3322 		/*
3323 		 * POSIX 1003.1b -- "lock future mappings" is revoked
3324 		 * when a process execs another program image.
3325 		 */
3326 		vm_map_lock(map);
3327 		vm_map_modflags(map, 0, VM_MAP_WIREFUTURE |
3328 		    VM_MAP_PINSYSCALL_ONCE);
3329 
3330 		/*
3331 		 * now unmap the old program
3332 		 *
3333 		 * Instead of attempting to keep the map valid, we simply
3334 		 * nuke all entries and ask uvm_map_setup to reinitialize
3335 		 * the map to the new boundaries.
3336 		 *
3337 		 * uvm_unmap_remove will actually nuke all entries for us
3338 		 * (as in, not replace them with free-memory entries).
3339 		 */
3340 		uvm_unmap_remove(map, map->min_offset, map->max_offset,
3341 		    &dead_entries, TRUE, FALSE, FALSE);
3342 
3343 		KDASSERT(RBT_EMPTY(uvm_map_addr, &map->addr));
3344 
3345 		/* Nuke statistics and boundaries. */
3346 		memset(&ovm->vm_startcopy, 0,
3347 		    (caddr_t) (ovm + 1) - (caddr_t) &ovm->vm_startcopy);
3348 
3349 
3350 		if (end & (vaddr_t)PAGE_MASK) {
3351 			end += 1;
3352 			if (end == 0) /* overflow */
3353 				end -= PAGE_SIZE;
3354 		}
3355 
3356 		/* Setup new boundaries and populate map with entries. */
3357 		map->min_offset = start;
3358 		map->max_offset = end;
3359 		uvm_map_setup_entries(map);
3360 		vm_map_unlock(map);
3361 
3362 		/* but keep MMU holes unavailable */
3363 		pmap_remove_holes(ovm);
3364 	} else {
3365 		/*
3366 		 * pr's vmspace is being shared, so we can't reuse
3367 		 * it for pr since it is still being used for others.
3368 		 * allocate a new vmspace for pr
3369 		 */
3370 		nvm = uvmspace_alloc(start, end,
3371 		    (map->flags & VM_MAP_PAGEABLE) ? TRUE : FALSE, TRUE);
3372 
3373 		/* install new vmspace and drop our ref to the old one. */
3374 		pmap_deactivate(p);
3375 		p->p_vmspace = pr->ps_vmspace = nvm;
3376 		pmap_activate(p);
3377 
3378 		uvmspace_free(ovm);
3379 	}
3380 #ifdef PMAP_CHECK_COPYIN
3381 	p->p_vmspace->vm_map.check_copyin_count = 0;	/* disable checks */
3382 #endif
3383 
3384 	/* Release dead entries */
3385 	uvm_unmap_detach(&dead_entries, 0);
3386 }
3387 
3388 /*
3389  * uvmspace_addref: add a reference to a vmspace.
3390  */
3391 void
uvmspace_addref(struct vmspace * vm)3392 uvmspace_addref(struct vmspace *vm)
3393 {
3394 	KASSERT(vm->vm_refcnt > 0);
3395 	atomic_inc_int(&vm->vm_refcnt);
3396 }
3397 
3398 /*
3399  * uvmspace_free: free a vmspace data structure
3400  */
3401 void
uvmspace_free(struct vmspace * vm)3402 uvmspace_free(struct vmspace *vm)
3403 {
3404 	if (atomic_dec_int_nv(&vm->vm_refcnt) == 0) {
3405 		/*
3406 		 * lock the map, to wait out all other references to it.  delete
3407 		 * all of the mappings and pages they hold, then call the pmap
3408 		 * module to reclaim anything left.
3409 		 */
3410 #ifdef SYSVSHM
3411 		/* Get rid of any SYSV shared memory segments. */
3412 		if (vm->vm_shm != NULL) {
3413 			KERNEL_LOCK();
3414 			shmexit(vm);
3415 			KERNEL_UNLOCK();
3416 		}
3417 #endif
3418 
3419 		uvm_map_teardown(&vm->vm_map);
3420 		pool_put(&uvm_vmspace_pool, vm);
3421 	}
3422 }
3423 
3424 /*
3425  * uvm_share: Map the address range [srcaddr, srcaddr + sz) in
3426  * srcmap to the address range [dstaddr, dstaddr + sz) in
3427  * dstmap.
3428  *
3429  * The whole address range in srcmap must be backed by an object
3430  * (no holes).
3431  *
3432  * If successful, the address ranges share memory and the destination
3433  * address range uses the protection flags in prot.
3434  *
3435  * This routine assumes that sz is a multiple of PAGE_SIZE and
3436  * that dstaddr and srcaddr are page-aligned.
3437  */
3438 int
uvm_share(struct vm_map * dstmap,vaddr_t dstaddr,vm_prot_t prot,struct vm_map * srcmap,vaddr_t srcaddr,vsize_t sz)3439 uvm_share(struct vm_map *dstmap, vaddr_t dstaddr, vm_prot_t prot,
3440     struct vm_map *srcmap, vaddr_t srcaddr, vsize_t sz)
3441 {
3442 	int ret = 0;
3443 	vaddr_t unmap_end;
3444 	vaddr_t dstva;
3445 	vsize_t s_off, len, n = sz, remain;
3446 	struct vm_map_entry *first = NULL, *last = NULL;
3447 	struct vm_map_entry *src_entry, *psrc_entry = NULL;
3448 	struct uvm_map_deadq dead;
3449 
3450 	if (srcaddr >= srcmap->max_offset || sz > srcmap->max_offset - srcaddr)
3451 		return EINVAL;
3452 
3453 	TAILQ_INIT(&dead);
3454 	vm_map_lock(dstmap);
3455 	vm_map_lock_read(srcmap);
3456 
3457 	if (!uvm_map_isavail(dstmap, NULL, &first, &last, dstaddr, sz)) {
3458 		ret = ENOMEM;
3459 		goto exit_unlock;
3460 	}
3461 	if (!uvm_map_lookup_entry(srcmap, srcaddr, &src_entry)) {
3462 		ret = EINVAL;
3463 		goto exit_unlock;
3464 	}
3465 
3466 	dstva = dstaddr;
3467 	unmap_end = dstaddr;
3468 	for (; src_entry != NULL;
3469 	    psrc_entry = src_entry,
3470 	    src_entry = RBT_NEXT(uvm_map_addr, src_entry)) {
3471 		/* hole in address space, bail out */
3472 		if (psrc_entry != NULL && psrc_entry->end != src_entry->start)
3473 			break;
3474 		if (src_entry->start >= srcaddr + sz)
3475 			break;
3476 
3477 		if (UVM_ET_ISSUBMAP(src_entry))
3478 			panic("uvm_share: encountered a submap (illegal)");
3479 		if (!UVM_ET_ISCOPYONWRITE(src_entry) &&
3480 		    UVM_ET_ISNEEDSCOPY(src_entry))
3481 			panic("uvm_share: non-copy_on_write map entries "
3482 			    "marked needs_copy (illegal)");
3483 
3484 		/*
3485 		 * srcaddr > map entry start? means we are in the middle of a
3486 		 * map, so we calculate the offset to use in the source map.
3487 		 */
3488 		if (srcaddr > src_entry->start)
3489 			s_off = srcaddr - src_entry->start;
3490 		else if (srcaddr == src_entry->start)
3491 			s_off = 0;
3492 		else
3493 			panic("uvm_share: map entry start > srcaddr");
3494 
3495 		remain = src_entry->end - src_entry->start - s_off;
3496 
3497 		/* Determine how many bytes to share in this pass */
3498 		if (n < remain)
3499 			len = n;
3500 		else
3501 			len = remain;
3502 
3503 		if (uvm_mapent_share(dstmap, dstva, len, s_off, prot, prot,
3504 		    srcmap, src_entry, &dead) == NULL)
3505 			break;
3506 
3507 		n -= len;
3508 		dstva += len;
3509 		srcaddr += len;
3510 		unmap_end = dstva + len;
3511 		if (n == 0)
3512 			goto exit_unlock;
3513 	}
3514 
3515 	ret = EINVAL;
3516 	uvm_unmap_remove(dstmap, dstaddr, unmap_end, &dead, FALSE, TRUE, FALSE);
3517 
3518 exit_unlock:
3519 	vm_map_unlock_read(srcmap);
3520 	vm_map_unlock(dstmap);
3521 	uvm_unmap_detach(&dead, 0);
3522 
3523 	return ret;
3524 }
3525 
3526 /*
3527  * Clone map entry into other map.
3528  *
3529  * Mapping will be placed at dstaddr, for the same length.
3530  * Space must be available.
3531  * Reference counters are incremented.
3532  */
3533 struct vm_map_entry *
uvm_mapent_clone(struct vm_map * dstmap,vaddr_t dstaddr,vsize_t dstlen,vsize_t off,vm_prot_t prot,vm_prot_t maxprot,struct vm_map_entry * old_entry,struct uvm_map_deadq * dead,int mapent_flags,int amap_share_flags)3534 uvm_mapent_clone(struct vm_map *dstmap, vaddr_t dstaddr, vsize_t dstlen,
3535     vsize_t off, vm_prot_t prot, vm_prot_t maxprot,
3536     struct vm_map_entry *old_entry, struct uvm_map_deadq *dead,
3537     int mapent_flags, int amap_share_flags)
3538 {
3539 	struct vm_map_entry *new_entry, *first, *last;
3540 
3541 	KDASSERT(!UVM_ET_ISSUBMAP(old_entry));
3542 
3543 	/* Create new entry (linked in on creation). Fill in first, last. */
3544 	first = last = NULL;
3545 	if (!uvm_map_isavail(dstmap, NULL, &first, &last, dstaddr, dstlen)) {
3546 		panic("uvm_mapent_clone: no space in map for "
3547 		    "entry in empty map");
3548 	}
3549 	new_entry = uvm_map_mkentry(dstmap, first, last,
3550 	    dstaddr, dstlen, mapent_flags, dead, NULL);
3551 	if (new_entry == NULL)
3552 		return NULL;
3553 	/* old_entry -> new_entry */
3554 	new_entry->object = old_entry->object;
3555 	new_entry->offset = old_entry->offset;
3556 	new_entry->aref = old_entry->aref;
3557 	new_entry->etype |= old_entry->etype & ~UVM_ET_FREEMAPPED;
3558 	new_entry->protection = prot;
3559 	new_entry->max_protection = maxprot;
3560 	new_entry->inheritance = old_entry->inheritance;
3561 	new_entry->advice = old_entry->advice;
3562 
3563 	/* gain reference to object backing the map (can't be a submap). */
3564 	if (new_entry->aref.ar_amap) {
3565 		new_entry->aref.ar_pageoff += off >> PAGE_SHIFT;
3566 		amap_ref(new_entry->aref.ar_amap, new_entry->aref.ar_pageoff,
3567 		    (new_entry->end - new_entry->start) >> PAGE_SHIFT,
3568 		    amap_share_flags);
3569 	}
3570 
3571 	if (UVM_ET_ISOBJ(new_entry) &&
3572 	    new_entry->object.uvm_obj->pgops->pgo_reference) {
3573 		new_entry->offset += off;
3574 		new_entry->object.uvm_obj->pgops->pgo_reference
3575 		    (new_entry->object.uvm_obj);
3576 	}
3577 
3578 	return new_entry;
3579 }
3580 
3581 struct vm_map_entry *
uvm_mapent_share(struct vm_map * dstmap,vaddr_t dstaddr,vsize_t dstlen,vsize_t off,vm_prot_t prot,vm_prot_t maxprot,struct vm_map * old_map,struct vm_map_entry * old_entry,struct uvm_map_deadq * dead)3582 uvm_mapent_share(struct vm_map *dstmap, vaddr_t dstaddr, vsize_t dstlen,
3583     vsize_t off, vm_prot_t prot, vm_prot_t maxprot, struct vm_map *old_map,
3584     struct vm_map_entry *old_entry, struct uvm_map_deadq *dead)
3585 {
3586 	/*
3587 	 * If old_entry refers to a copy-on-write region that has not yet been
3588 	 * written to (needs_copy flag is set), then we need to allocate a new
3589 	 * amap for old_entry.
3590 	 *
3591 	 * If we do not do this, and the process owning old_entry does a copy-on
3592 	 * write later, old_entry and new_entry will refer to different memory
3593 	 * regions, and the memory between the processes is no longer shared.
3594 	 *
3595 	 * [in other words, we need to clear needs_copy]
3596 	 */
3597 
3598 	if (UVM_ET_ISNEEDSCOPY(old_entry)) {
3599 		/* get our own amap, clears needs_copy */
3600 		amap_copy(old_map, old_entry, M_WAITOK, FALSE, 0, 0);
3601 		/* XXXCDC: WAITOK??? */
3602 	}
3603 
3604 	return uvm_mapent_clone(dstmap, dstaddr, dstlen, off,
3605 	    prot, maxprot, old_entry, dead, 0, AMAP_SHARED);
3606 }
3607 
3608 /*
3609  * share the mapping: this means we want the old and
3610  * new entries to share amaps and backing objects.
3611  */
3612 struct vm_map_entry *
uvm_mapent_forkshared(struct vmspace * new_vm,struct vm_map * new_map,struct vm_map * old_map,struct vm_map_entry * old_entry,struct uvm_map_deadq * dead)3613 uvm_mapent_forkshared(struct vmspace *new_vm, struct vm_map *new_map,
3614     struct vm_map *old_map,
3615     struct vm_map_entry *old_entry, struct uvm_map_deadq *dead)
3616 {
3617 	struct vm_map_entry *new_entry;
3618 
3619 	new_entry = uvm_mapent_share(new_map, old_entry->start,
3620 	    old_entry->end - old_entry->start, 0, old_entry->protection,
3621 	    old_entry->max_protection, old_map, old_entry, dead);
3622 
3623 	return (new_entry);
3624 }
3625 
3626 /*
3627  * copy-on-write the mapping (using mmap's
3628  * MAP_PRIVATE semantics)
3629  *
3630  * allocate new_entry, adjust reference counts.
3631  * (note that new references are read-only).
3632  */
3633 struct vm_map_entry *
uvm_mapent_forkcopy(struct vmspace * new_vm,struct vm_map * new_map,struct vm_map * old_map,struct vm_map_entry * old_entry,struct uvm_map_deadq * dead)3634 uvm_mapent_forkcopy(struct vmspace *new_vm, struct vm_map *new_map,
3635     struct vm_map *old_map,
3636     struct vm_map_entry *old_entry, struct uvm_map_deadq *dead)
3637 {
3638 	struct vm_map_entry	*new_entry;
3639 	boolean_t		 protect_child;
3640 
3641 	new_entry = uvm_mapent_clone(new_map, old_entry->start,
3642 	    old_entry->end - old_entry->start, 0, old_entry->protection,
3643 	    old_entry->max_protection, old_entry, dead, 0, 0);
3644 
3645 	new_entry->etype |=
3646 	    (UVM_ET_COPYONWRITE|UVM_ET_NEEDSCOPY);
3647 
3648 	/*
3649 	 * the new entry will need an amap.  it will either
3650 	 * need to be copied from the old entry or created
3651 	 * from scratch (if the old entry does not have an
3652 	 * amap).  can we defer this process until later
3653 	 * (by setting "needs_copy") or do we need to copy
3654 	 * the amap now?
3655 	 *
3656 	 * we must copy the amap now if any of the following
3657 	 * conditions hold:
3658 	 * 1. the old entry has an amap and that amap is
3659 	 *    being shared.  this means that the old (parent)
3660 	 *    process is sharing the amap with another
3661 	 *    process.  if we do not clear needs_copy here
3662 	 *    we will end up in a situation where both the
3663 	 *    parent and child process are referring to the
3664 	 *    same amap with "needs_copy" set.  if the
3665 	 *    parent write-faults, the fault routine will
3666 	 *    clear "needs_copy" in the parent by allocating
3667 	 *    a new amap.   this is wrong because the
3668 	 *    parent is supposed to be sharing the old amap
3669 	 *    and the new amap will break that.
3670 	 *
3671 	 * 2. if the old entry has an amap and a non-zero
3672 	 *    wire count then we are going to have to call
3673 	 *    amap_cow_now to avoid page faults in the
3674 	 *    parent process.   since amap_cow_now requires
3675 	 *    "needs_copy" to be clear we might as well
3676 	 *    clear it here as well.
3677 	 *
3678 	 */
3679 	if (old_entry->aref.ar_amap != NULL &&
3680 	    ((amap_flags(old_entry->aref.ar_amap) &
3681 	    AMAP_SHARED) != 0 ||
3682 	    VM_MAPENT_ISWIRED(old_entry))) {
3683 		amap_copy(new_map, new_entry, M_WAITOK, FALSE,
3684 		    0, 0);
3685 		/* XXXCDC: M_WAITOK ... ok? */
3686 	}
3687 
3688 	/*
3689 	 * if the parent's entry is wired down, then the
3690 	 * parent process does not want page faults on
3691 	 * access to that memory.  this means that we
3692 	 * cannot do copy-on-write because we can't write
3693 	 * protect the old entry.   in this case we
3694 	 * resolve all copy-on-write faults now, using
3695 	 * amap_cow_now.   note that we have already
3696 	 * allocated any needed amap (above).
3697 	 */
3698 	if (VM_MAPENT_ISWIRED(old_entry)) {
3699 		/*
3700 		 * resolve all copy-on-write faults now
3701 		 * (note that there is nothing to do if
3702 		 * the old mapping does not have an amap).
3703 		 */
3704 		if (old_entry->aref.ar_amap)
3705 			amap_cow_now(new_map, new_entry);
3706 	} else {
3707 		if (old_entry->aref.ar_amap) {
3708 			/*
3709 			 * setup mappings to trigger copy-on-write faults
3710 			 * we must write-protect the parent if it has
3711 			 * an amap and it is not already "needs_copy"...
3712 			 * if it is already "needs_copy" then the parent
3713 			 * has already been write-protected by a previous
3714 			 * fork operation.
3715 			 *
3716 			 * if we do not write-protect the parent, then
3717 			 * we must be sure to write-protect the child.
3718 			 */
3719 			if (!UVM_ET_ISNEEDSCOPY(old_entry)) {
3720 				if (old_entry->max_protection & PROT_WRITE) {
3721 					uvm_map_lock_entry(old_entry);
3722 					pmap_protect(old_map->pmap,
3723 					    old_entry->start,
3724 					    old_entry->end,
3725 					    old_entry->protection &
3726 					    ~PROT_WRITE);
3727 					uvm_map_unlock_entry(old_entry);
3728 					pmap_update(old_map->pmap);
3729 				}
3730 				old_entry->etype |= UVM_ET_NEEDSCOPY;
3731 			}
3732 
3733 	  		/* parent must now be write-protected */
3734 	  		protect_child = FALSE;
3735 		} else {
3736 			/*
3737 			 * we only need to protect the child if the
3738 			 * parent has write access.
3739 			 */
3740 			if (old_entry->max_protection & PROT_WRITE)
3741 				protect_child = TRUE;
3742 			else
3743 				protect_child = FALSE;
3744 		}
3745 
3746 		/* protect the child's mappings if necessary */
3747 		if (protect_child) {
3748 			pmap_protect(new_map->pmap, new_entry->start,
3749 			    new_entry->end,
3750 			    new_entry->protection &
3751 			    ~PROT_WRITE);
3752 		}
3753 	}
3754 
3755 	return (new_entry);
3756 }
3757 
3758 /*
3759  * zero the mapping: the new entry will be zero initialized
3760  */
3761 struct vm_map_entry *
uvm_mapent_forkzero(struct vmspace * new_vm,struct vm_map * new_map,struct vm_map * old_map,struct vm_map_entry * old_entry,struct uvm_map_deadq * dead)3762 uvm_mapent_forkzero(struct vmspace *new_vm, struct vm_map *new_map,
3763     struct vm_map *old_map,
3764     struct vm_map_entry *old_entry, struct uvm_map_deadq *dead)
3765 {
3766 	struct vm_map_entry *new_entry;
3767 
3768 	new_entry = uvm_mapent_clone(new_map, old_entry->start,
3769 	    old_entry->end - old_entry->start, 0, old_entry->protection,
3770 	    old_entry->max_protection, old_entry, dead, 0, 0);
3771 
3772 	new_entry->etype |=
3773 	    (UVM_ET_COPYONWRITE|UVM_ET_NEEDSCOPY);
3774 
3775 	if (new_entry->aref.ar_amap) {
3776 		amap_unref(new_entry->aref.ar_amap, new_entry->aref.ar_pageoff,
3777 		    atop(new_entry->end - new_entry->start), 0);
3778 		new_entry->aref.ar_amap = NULL;
3779 		new_entry->aref.ar_pageoff = 0;
3780 	}
3781 
3782 	if (UVM_ET_ISOBJ(new_entry)) {
3783 		if (new_entry->object.uvm_obj->pgops->pgo_detach)
3784 			new_entry->object.uvm_obj->pgops->pgo_detach(
3785 			    new_entry->object.uvm_obj);
3786 		new_entry->object.uvm_obj = NULL;
3787 		new_entry->etype &= ~UVM_ET_OBJ;
3788 	}
3789 
3790 	return (new_entry);
3791 }
3792 
3793 /*
3794  * uvmspace_fork: fork a process' main map
3795  *
3796  * => create a new vmspace for child process from parent.
3797  * => parent's map must not be locked.
3798  */
3799 struct vmspace *
uvmspace_fork(struct process * pr)3800 uvmspace_fork(struct process *pr)
3801 {
3802 	struct vmspace *vm1 = pr->ps_vmspace;
3803 	struct vmspace *vm2;
3804 	struct vm_map *old_map = &vm1->vm_map;
3805 	struct vm_map *new_map;
3806 	struct vm_map_entry *old_entry, *new_entry;
3807 	struct uvm_map_deadq dead;
3808 
3809 	vm_map_lock(old_map);
3810 
3811 	vm2 = uvmspace_alloc(old_map->min_offset, old_map->max_offset,
3812 	    (old_map->flags & VM_MAP_PAGEABLE) ? TRUE : FALSE, FALSE);
3813 	memcpy(&vm2->vm_startcopy, &vm1->vm_startcopy,
3814 	    (caddr_t) (vm1 + 1) - (caddr_t) &vm1->vm_startcopy);
3815 	vm2->vm_dused = 0; /* Statistic managed by us. */
3816 	new_map = &vm2->vm_map;
3817 	vm_map_lock(new_map);
3818 
3819 	/* go entry-by-entry */
3820 	TAILQ_INIT(&dead);
3821 	RBT_FOREACH(old_entry, uvm_map_addr, &old_map->addr) {
3822 		if (old_entry->start == old_entry->end)
3823 			continue;
3824 
3825 		/* first, some sanity checks on the old entry */
3826 		if (UVM_ET_ISSUBMAP(old_entry)) {
3827 			panic("fork: encountered a submap during fork "
3828 			    "(illegal)");
3829 		}
3830 
3831 		if (!UVM_ET_ISCOPYONWRITE(old_entry) &&
3832 		    UVM_ET_ISNEEDSCOPY(old_entry)) {
3833 			panic("fork: non-copy_on_write map entry marked "
3834 			    "needs_copy (illegal)");
3835 		}
3836 
3837 		/* Apply inheritance. */
3838 		switch (old_entry->inheritance) {
3839 		case MAP_INHERIT_SHARE:
3840 			new_entry = uvm_mapent_forkshared(vm2, new_map,
3841 			    old_map, old_entry, &dead);
3842 			break;
3843 		case MAP_INHERIT_COPY:
3844 			new_entry = uvm_mapent_forkcopy(vm2, new_map,
3845 			    old_map, old_entry, &dead);
3846 			break;
3847 		case MAP_INHERIT_ZERO:
3848 			new_entry = uvm_mapent_forkzero(vm2, new_map,
3849 			    old_map, old_entry, &dead);
3850 			break;
3851 		default:
3852 			continue;
3853 		}
3854 
3855 	 	/* Update process statistics. */
3856 		if (!UVM_ET_ISHOLE(new_entry))
3857 			new_map->size += new_entry->end - new_entry->start;
3858 		if (!UVM_ET_ISOBJ(new_entry) && !UVM_ET_ISHOLE(new_entry) &&
3859 		    new_entry->protection != PROT_NONE) {
3860 			vm2->vm_dused += uvmspace_dused(
3861 			    new_map, new_entry->start, new_entry->end);
3862 		}
3863 	}
3864 	new_map->flags |= old_map->flags & VM_MAP_PINSYSCALL_ONCE;
3865 #ifdef PMAP_CHECK_COPYIN
3866 	if (PMAP_CHECK_COPYIN) {
3867 		memcpy(&new_map->check_copyin, &old_map->check_copyin,
3868 		    sizeof(new_map->check_copyin));
3869 		membar_producer();
3870 		new_map->check_copyin_count = old_map->check_copyin_count;
3871 	}
3872 #endif
3873 
3874 	vm_map_unlock(old_map);
3875 	vm_map_unlock(new_map);
3876 
3877 	/*
3878 	 * This can actually happen, if multiple entries described a
3879 	 * space in which an entry was inherited.
3880 	 */
3881 	uvm_unmap_detach(&dead, 0);
3882 
3883 #ifdef SYSVSHM
3884 	if (vm1->vm_shm)
3885 		shmfork(vm1, vm2);
3886 #endif
3887 
3888 	return vm2;
3889 }
3890 
3891 /*
3892  * uvm_map_hint: return the beginning of the best area suitable for
3893  * creating a new mapping with "prot" protection.
3894  */
3895 vaddr_t
uvm_map_hint(struct vmspace * vm,vm_prot_t prot,vaddr_t minaddr,vaddr_t maxaddr)3896 uvm_map_hint(struct vmspace *vm, vm_prot_t prot, vaddr_t minaddr,
3897     vaddr_t maxaddr)
3898 {
3899 	vaddr_t addr;
3900 	vaddr_t spacing;
3901 
3902 #ifdef __i386__
3903 	/*
3904 	 * If executable skip first two pages, otherwise start
3905 	 * after data + heap region.
3906 	 */
3907 	if ((prot & PROT_EXEC) != 0 &&
3908 	    (vaddr_t)vm->vm_daddr >= I386_MAX_EXE_ADDR) {
3909 		addr = (PAGE_SIZE*2) +
3910 		    (arc4random() & (I386_MAX_EXE_ADDR / 2 - 1));
3911 		return (round_page(addr));
3912 	}
3913 #endif
3914 
3915 #if defined (__LP64__)
3916 	spacing = MIN(4UL * 1024 * 1024 * 1024, MAXDSIZ) - 1;
3917 #else
3918 	spacing = MIN(1 * 1024 * 1024 * 1024, MAXDSIZ) - 1;
3919 #endif
3920 
3921 	/*
3922 	 * Start malloc/mmap after the brk.
3923 	 */
3924 	addr = (vaddr_t)vm->vm_daddr + BRKSIZ;
3925 	addr = MAX(addr, minaddr);
3926 
3927 	if (addr < maxaddr) {
3928 		while (spacing > maxaddr - addr)
3929 			spacing >>= 1;
3930 	}
3931 	addr += arc4random() & spacing;
3932 	return (round_page(addr));
3933 }
3934 
3935 /*
3936  * uvm_map_submap: punch down part of a map into a submap
3937  *
3938  * => only the kernel_map is allowed to be submapped
3939  * => the purpose of submapping is to break up the locking granularity
3940  *	of a larger map
3941  * => the range specified must have been mapped previously with a uvm_map()
3942  *	call [with uobj==NULL] to create a blank map entry in the main map.
3943  *	[And it had better still be blank!]
3944  * => maps which contain submaps should never be copied or forked.
3945  * => to remove a submap, use uvm_unmap() on the main map
3946  *	and then uvm_map_deallocate() the submap.
3947  * => main map must be unlocked.
3948  * => submap must have been init'd and have a zero reference count.
3949  *	[need not be locked as we don't actually reference it]
3950  */
3951 int
uvm_map_submap(struct vm_map * map,vaddr_t start,vaddr_t end,struct vm_map * submap)3952 uvm_map_submap(struct vm_map *map, vaddr_t start, vaddr_t end,
3953     struct vm_map *submap)
3954 {
3955 	struct vm_map_entry *entry;
3956 	int result;
3957 
3958 	if (start > map->max_offset || end > map->max_offset ||
3959 	    start < map->min_offset || end < map->min_offset)
3960 		return EINVAL;
3961 
3962 	vm_map_lock(map);
3963 
3964 	if (uvm_map_lookup_entry(map, start, &entry)) {
3965 		UVM_MAP_CLIP_START(map, entry, start);
3966 		UVM_MAP_CLIP_END(map, entry, end);
3967 	} else
3968 		entry = NULL;
3969 
3970 	if (entry != NULL &&
3971 	    entry->start == start && entry->end == end &&
3972 	    entry->object.uvm_obj == NULL && entry->aref.ar_amap == NULL &&
3973 	    !UVM_ET_ISCOPYONWRITE(entry) && !UVM_ET_ISNEEDSCOPY(entry)) {
3974 		entry->etype |= UVM_ET_SUBMAP;
3975 		entry->object.sub_map = submap;
3976 		entry->offset = 0;
3977 		uvm_map_reference(submap);
3978 		result = 0;
3979 	} else
3980 		result = EINVAL;
3981 
3982 	vm_map_unlock(map);
3983 	return result;
3984 }
3985 
3986 /*
3987  * uvm_map_checkprot: check protection in map
3988  *
3989  * => must allow specific protection in a fully allocated region.
3990  * => map must be read or write locked by caller.
3991  */
3992 boolean_t
uvm_map_checkprot(struct vm_map * map,vaddr_t start,vaddr_t end,vm_prot_t protection)3993 uvm_map_checkprot(struct vm_map *map, vaddr_t start, vaddr_t end,
3994     vm_prot_t protection)
3995 {
3996 	struct vm_map_entry *entry;
3997 
3998 	vm_map_assert_anylock(map);
3999 
4000 	if (start < map->min_offset || end > map->max_offset || start > end)
4001 		return FALSE;
4002 	if (start == end)
4003 		return TRUE;
4004 
4005 	/*
4006 	 * Iterate entries.
4007 	 */
4008 	for (entry = uvm_map_entrybyaddr(&map->addr, start);
4009 	    entry != NULL && entry->start < end;
4010 	    entry = RBT_NEXT(uvm_map_addr, entry)) {
4011 		/* Fail if a hole is found. */
4012 		if (UVM_ET_ISHOLE(entry) ||
4013 		    (entry->end < end && entry->end != VMMAP_FREE_END(entry)))
4014 			return FALSE;
4015 
4016 		/* Check protection. */
4017 		if ((entry->protection & protection) != protection)
4018 			return FALSE;
4019 	}
4020 	return TRUE;
4021 }
4022 
4023 /*
4024  * uvm_map_create: create map
4025  */
4026 vm_map_t
uvm_map_create(pmap_t pmap,vaddr_t min,vaddr_t max,int flags)4027 uvm_map_create(pmap_t pmap, vaddr_t min, vaddr_t max, int flags)
4028 {
4029 	vm_map_t map;
4030 
4031 	map = malloc(sizeof *map, M_VMMAP, M_WAITOK);
4032 	uvm_map_setup(map, pmap, min, max, flags);
4033 	return (map);
4034 }
4035 
4036 /*
4037  * uvm_map_deallocate: drop reference to a map
4038  *
4039  * => caller must not lock map
4040  * => we will zap map if ref count goes to zero
4041  */
4042 void
uvm_map_deallocate(vm_map_t map)4043 uvm_map_deallocate(vm_map_t map)
4044 {
4045 	int c;
4046 	struct uvm_map_deadq dead;
4047 
4048 	c = atomic_dec_int_nv(&map->ref_count);
4049 	if (c > 0) {
4050 		return;
4051 	}
4052 
4053 	/*
4054 	 * all references gone.   unmap and free.
4055 	 *
4056 	 * No lock required: we are only one to access this map.
4057 	 */
4058 	TAILQ_INIT(&dead);
4059 	uvm_tree_sanity(map, __FILE__, __LINE__);
4060 	vm_map_lock(map);
4061 	uvm_unmap_remove(map, map->min_offset, map->max_offset, &dead,
4062 	    TRUE, FALSE, FALSE);
4063 	vm_map_unlock(map);
4064 	pmap_destroy(map->pmap);
4065 	KASSERT(RBT_EMPTY(uvm_map_addr, &map->addr));
4066 	free(map, M_VMMAP, sizeof *map);
4067 
4068 	uvm_unmap_detach(&dead, 0);
4069 }
4070 
4071 /*
4072  * uvm_map_inherit: set inheritance code for range of addrs in map.
4073  *
4074  * => map must be unlocked
4075  * => note that the inherit code is used during a "fork".  see fork
4076  *	code for details.
4077  */
4078 int
uvm_map_inherit(struct vm_map * map,vaddr_t start,vaddr_t end,vm_inherit_t new_inheritance)4079 uvm_map_inherit(struct vm_map *map, vaddr_t start, vaddr_t end,
4080     vm_inherit_t new_inheritance)
4081 {
4082 	struct vm_map_entry *entry, *entry1;
4083 	int error = EPERM;
4084 
4085 	switch (new_inheritance) {
4086 	case MAP_INHERIT_NONE:
4087 	case MAP_INHERIT_COPY:
4088 	case MAP_INHERIT_SHARE:
4089 	case MAP_INHERIT_ZERO:
4090 		break;
4091 	default:
4092 		return (EINVAL);
4093 	}
4094 
4095 	if (start > end)
4096 		return EINVAL;
4097 	start = MAX(start, map->min_offset);
4098 	end = MIN(end, map->max_offset);
4099 	if (start >= end)
4100 		return 0;
4101 
4102 	vm_map_lock(map);
4103 
4104 	entry = uvm_map_entrybyaddr(&map->addr, start);
4105 	if (entry->end > start)
4106 		UVM_MAP_CLIP_START(map, entry, start);
4107 	else
4108 		entry = RBT_NEXT(uvm_map_addr, entry);
4109 
4110 	/* First check for illegal operations */
4111 	entry1 = entry;
4112 	while (entry1 != NULL && entry1->start < end) {
4113 		if (entry1->etype & UVM_ET_IMMUTABLE)
4114 			goto out;
4115 		if (new_inheritance == MAP_INHERIT_ZERO &&
4116 		    (entry1->protection & PROT_WRITE) == 0)
4117 			goto out;
4118 		entry1 = RBT_NEXT(uvm_map_addr, entry1);
4119 	}
4120 
4121 	while (entry != NULL && entry->start < end) {
4122 		UVM_MAP_CLIP_END(map, entry, end);
4123 		entry->inheritance = new_inheritance;
4124 		entry = RBT_NEXT(uvm_map_addr, entry);
4125 	}
4126 
4127 	error = 0;
4128 out:
4129 	vm_map_unlock(map);
4130 	return (error);
4131 }
4132 
4133 #ifdef PMAP_CHECK_COPYIN
4134 static void inline
check_copyin_add(struct vm_map * map,vaddr_t start,vaddr_t end)4135 check_copyin_add(struct vm_map *map, vaddr_t start, vaddr_t end)
4136 {
4137 	if (PMAP_CHECK_COPYIN == 0 ||
4138 	    map->check_copyin_count >= UVM_MAP_CHECK_COPYIN_MAX)
4139 		return;
4140 	vm_map_assert_wrlock(map);
4141 	map->check_copyin[map->check_copyin_count].start = start;
4142 	map->check_copyin[map->check_copyin_count].end = end;
4143 	membar_producer();
4144 	map->check_copyin_count++;
4145 }
4146 
4147 /*
4148  * uvm_map_check_copyin_add: remember regions which are X-only for copyin(),
4149  * copyinstr(), uiomove(), and others
4150  *
4151  * => map must be unlocked
4152  */
4153 int
uvm_map_check_copyin_add(struct vm_map * map,vaddr_t start,vaddr_t end)4154 uvm_map_check_copyin_add(struct vm_map *map, vaddr_t start, vaddr_t end)
4155 {
4156 	if (start > end)
4157 		return EINVAL;
4158 	start = MAX(start, map->min_offset);
4159 	end = MIN(end, map->max_offset);
4160 	if (start >= end)
4161 		return 0;
4162 	vm_map_lock(map);
4163 	check_copyin_add(map, start, end);
4164 	vm_map_unlock(map);
4165 	return (0);
4166 }
4167 #endif /* PMAP_CHECK_COPYIN */
4168 
4169 /*
4170  * uvm_map_immutable: block mapping/mprotect for range of addrs in map.
4171  *
4172  * => map must be unlocked
4173  */
4174 int
uvm_map_immutable(struct vm_map * map,vaddr_t start,vaddr_t end,int imut)4175 uvm_map_immutable(struct vm_map *map, vaddr_t start, vaddr_t end, int imut)
4176 {
4177 	struct vm_map_entry *entry, *entry1;
4178 	int error = EPERM;
4179 
4180 	if (start > end)
4181 		return EINVAL;
4182 	start = MAX(start, map->min_offset);
4183 	end = MIN(end, map->max_offset);
4184 	if (start >= end)
4185 		return 0;
4186 
4187 	vm_map_lock(map);
4188 
4189 	entry = uvm_map_entrybyaddr(&map->addr, start);
4190 	if (entry->end > start)
4191 		UVM_MAP_CLIP_START(map, entry, start);
4192 	else
4193 		entry = RBT_NEXT(uvm_map_addr, entry);
4194 
4195 	/* First check for illegal operations */
4196 	entry1 = entry;
4197 	while (entry1 != NULL && entry1->start < end) {
4198 		if (entry1->inheritance == MAP_INHERIT_ZERO)
4199 			goto out;
4200 		entry1 = RBT_NEXT(uvm_map_addr, entry1);
4201 	}
4202 
4203 	while (entry != NULL && entry->start < end) {
4204 		UVM_MAP_CLIP_END(map, entry, end);
4205 		if (imut)
4206 			entry->etype |= UVM_ET_IMMUTABLE;
4207 		else
4208 			entry->etype &= ~UVM_ET_IMMUTABLE;
4209 		entry = RBT_NEXT(uvm_map_addr, entry);
4210 	}
4211 	error = 0;
4212 out:
4213 	vm_map_unlock(map);
4214 	return (0);
4215 }
4216 
4217 /*
4218  * uvm_map_advice: set advice code for range of addrs in map.
4219  *
4220  * => map must be unlocked
4221  */
4222 int
uvm_map_advice(struct vm_map * map,vaddr_t start,vaddr_t end,int new_advice)4223 uvm_map_advice(struct vm_map *map, vaddr_t start, vaddr_t end, int new_advice)
4224 {
4225 	struct vm_map_entry *entry;
4226 
4227 	switch (new_advice) {
4228 	case MADV_NORMAL:
4229 	case MADV_RANDOM:
4230 	case MADV_SEQUENTIAL:
4231 		break;
4232 	default:
4233 		return (EINVAL);
4234 	}
4235 
4236 	if (start > end)
4237 		return EINVAL;
4238 	start = MAX(start, map->min_offset);
4239 	end = MIN(end, map->max_offset);
4240 	if (start >= end)
4241 		return 0;
4242 
4243 	vm_map_lock(map);
4244 
4245 	entry = uvm_map_entrybyaddr(&map->addr, start);
4246 	if (entry != NULL && entry->end > start)
4247 		UVM_MAP_CLIP_START(map, entry, start);
4248 	else if (entry!= NULL)
4249 		entry = RBT_NEXT(uvm_map_addr, entry);
4250 
4251 	/*
4252 	 * XXXJRT: disallow holes?
4253 	 */
4254 	while (entry != NULL && entry->start < end) {
4255 		UVM_MAP_CLIP_END(map, entry, end);
4256 		entry->advice = new_advice;
4257 		entry = RBT_NEXT(uvm_map_addr, entry);
4258 	}
4259 
4260 	vm_map_unlock(map);
4261 	return (0);
4262 }
4263 
4264 /*
4265  * uvm_map_extract: extract a mapping from a map and put it somewhere
4266  * in the kernel_map, setting protection to max_prot.
4267  *
4268  * => map should be unlocked (we will write lock it and kernel_map)
4269  * => returns 0 on success, error code otherwise
4270  * => start must be page aligned
4271  * => len must be page sized
4272  * => flags:
4273  *      UVM_EXTRACT_FIXPROT: set prot to maxprot as we go
4274  * Mappings are QREF's.
4275  */
4276 int
uvm_map_extract(struct vm_map * srcmap,vaddr_t start,vsize_t len,vaddr_t * dstaddrp,int flags)4277 uvm_map_extract(struct vm_map *srcmap, vaddr_t start, vsize_t len,
4278     vaddr_t *dstaddrp, int flags)
4279 {
4280 	struct uvm_map_deadq dead;
4281 	struct vm_map_entry *first, *entry, *newentry, *tmp1, *tmp2;
4282 	vaddr_t dstaddr;
4283 	vaddr_t end;
4284 	vaddr_t cp_start;
4285 	vsize_t cp_len, cp_off;
4286 	int error;
4287 
4288 	TAILQ_INIT(&dead);
4289 	end = start + len;
4290 
4291 	/*
4292 	 * Sanity check on the parameters.
4293 	 * Also, since the mapping may not contain gaps, error out if the
4294 	 * mapped area is not in source map.
4295 	 */
4296 	if ((start & (vaddr_t)PAGE_MASK) != 0 ||
4297 	    (end & (vaddr_t)PAGE_MASK) != 0 || end < start)
4298 		return EINVAL;
4299 	if (start < srcmap->min_offset || end > srcmap->max_offset)
4300 		return EINVAL;
4301 
4302 	/* Initialize dead entries. Handle len == 0 case. */
4303 	if (len == 0)
4304 		return 0;
4305 
4306 	/* Acquire lock on srcmap. */
4307 	vm_map_lock(srcmap);
4308 
4309 	/* Lock srcmap, lookup first and last entry in <start,len>. */
4310 	first = uvm_map_entrybyaddr(&srcmap->addr, start);
4311 
4312 	/* Check that the range is contiguous. */
4313 	for (entry = first; entry != NULL && entry->end < end;
4314 	    entry = RBT_NEXT(uvm_map_addr, entry)) {
4315 		if (VMMAP_FREE_END(entry) != entry->end ||
4316 		    UVM_ET_ISHOLE(entry)) {
4317 			error = EINVAL;
4318 			goto fail;
4319 		}
4320 	}
4321 	if (entry == NULL || UVM_ET_ISHOLE(entry)) {
4322 		error = EINVAL;
4323 		goto fail;
4324 	}
4325 
4326 	/*
4327 	 * Handle need-copy flag.
4328 	 */
4329 	for (entry = first; entry != NULL && entry->start < end;
4330 	    entry = RBT_NEXT(uvm_map_addr, entry)) {
4331 		if (UVM_ET_ISNEEDSCOPY(entry))
4332 			amap_copy(srcmap, entry, M_NOWAIT,
4333 			    UVM_ET_ISSTACK(entry) ? FALSE : TRUE, start, end);
4334 		if (UVM_ET_ISNEEDSCOPY(entry)) {
4335 			/*
4336 			 * amap_copy failure
4337 			 */
4338 			error = ENOMEM;
4339 			goto fail;
4340 		}
4341 	}
4342 
4343 	/* Lock destination map (kernel_map). */
4344 	vm_map_lock(kernel_map);
4345 
4346 	if (uvm_map_findspace(kernel_map, &tmp1, &tmp2, &dstaddr, len,
4347 	    MAX(PAGE_SIZE, PMAP_PREFER_ALIGN()), PMAP_PREFER_OFFSET(start),
4348 	    PROT_NONE, 0) != 0) {
4349 		error = ENOMEM;
4350 		goto fail2;
4351 	}
4352 	*dstaddrp = dstaddr;
4353 
4354 	/*
4355 	 * We now have srcmap and kernel_map locked.
4356 	 * dstaddr contains the destination offset in dstmap.
4357 	 */
4358 	/* step 1: start looping through map entries, performing extraction. */
4359 	for (entry = first; entry != NULL && entry->start < end;
4360 	    entry = RBT_NEXT(uvm_map_addr, entry)) {
4361 		KDASSERT(!UVM_ET_ISNEEDSCOPY(entry));
4362 		if (UVM_ET_ISHOLE(entry))
4363 			continue;
4364 
4365 		/* Calculate uvm_mapent_clone parameters. */
4366 		cp_start = entry->start;
4367 		if (cp_start < start) {
4368 			cp_off = start - cp_start;
4369 			cp_start = start;
4370 		} else
4371 			cp_off = 0;
4372 		cp_len = MIN(entry->end, end) - cp_start;
4373 
4374 		newentry = uvm_mapent_clone(kernel_map,
4375 		    cp_start - start + dstaddr, cp_len, cp_off,
4376 		    entry->protection, entry->max_protection,
4377 		    entry, &dead, flags, AMAP_SHARED | AMAP_REFALL);
4378 		if (newentry == NULL) {
4379 			error = ENOMEM;
4380 			goto fail2_unmap;
4381 		}
4382 		kernel_map->size += cp_len;
4383 
4384 		/* Figure out the best protection */
4385 		if ((flags & UVM_EXTRACT_FIXPROT) &&
4386 		    newentry->protection != PROT_NONE)
4387 			newentry->protection = newentry->max_protection;
4388 		newentry->protection &= ~PROT_EXEC;
4389 	}
4390 	pmap_update(kernel_map->pmap);
4391 
4392 	error = 0;
4393 
4394 	/* Unmap copied entries on failure. */
4395 fail2_unmap:
4396 	if (error) {
4397 		uvm_unmap_remove(kernel_map, dstaddr, dstaddr + len, &dead,
4398 		    FALSE, TRUE, FALSE);
4399 	}
4400 
4401 	/* Release maps, release dead entries. */
4402 fail2:
4403 	vm_map_unlock(kernel_map);
4404 
4405 fail:
4406 	vm_map_unlock(srcmap);
4407 
4408 	uvm_unmap_detach(&dead, 0);
4409 
4410 	return error;
4411 }
4412 
4413 /*
4414  * uvm_map_clean: clean out a map range
4415  *
4416  * => valid flags:
4417  *   if (flags & PGO_CLEANIT): dirty pages are cleaned first
4418  *   if (flags & PGO_SYNCIO): dirty pages are written synchronously
4419  *   if (flags & PGO_DEACTIVATE): any cached pages are deactivated after clean
4420  *   if (flags & PGO_FREE): any cached pages are freed after clean
4421  * => returns an error if any part of the specified range isn't mapped
4422  * => never a need to flush amap layer since the anonymous memory has
4423  *	no permanent home, but may deactivate pages there
4424  * => called from sys_msync() and sys_madvise()
4425  * => caller must not have map locked
4426  */
4427 
4428 int
uvm_map_clean(struct vm_map * map,vaddr_t start,vaddr_t end,int flags)4429 uvm_map_clean(struct vm_map *map, vaddr_t start, vaddr_t end, int flags)
4430 {
4431 	struct vm_map_entry *first, *entry;
4432 	struct vm_amap *amap;
4433 	struct vm_anon *anon;
4434 	struct vm_page *pg;
4435 	struct uvm_object *uobj;
4436 	vaddr_t cp_start, cp_end;
4437 	int refs, imut = 0;
4438 	int error;
4439 	boolean_t rv;
4440 
4441 	KASSERT((flags & (PGO_FREE|PGO_DEACTIVATE)) !=
4442 	    (PGO_FREE|PGO_DEACTIVATE));
4443 
4444 	if (start > end || start < map->min_offset || end > map->max_offset)
4445 		return EINVAL;
4446 
4447 	vm_map_lock(map);
4448 	first = uvm_map_entrybyaddr(&map->addr, start);
4449 
4450 	/* Make a first pass to check for various conditions. */
4451 	for (entry = first; entry != NULL && entry->start < end;
4452 	    entry = RBT_NEXT(uvm_map_addr, entry)) {
4453 		if (entry->etype & UVM_ET_IMMUTABLE)
4454 			imut = 1;
4455 		if (UVM_ET_ISSUBMAP(entry)) {
4456 			vm_map_unlock(map);
4457 			return EINVAL;
4458 		}
4459 		if (UVM_ET_ISSUBMAP(entry) ||
4460 		    UVM_ET_ISHOLE(entry) ||
4461 		    (entry->end < end &&
4462 		    VMMAP_FREE_END(entry) != entry->end)) {
4463 			vm_map_unlock(map);
4464 			return EFAULT;
4465 		}
4466 	}
4467 
4468 	vm_map_busy(map);
4469 	vm_map_unlock(map);
4470 	error = 0;
4471 	for (entry = first; entry != NULL && entry->start < end;
4472 	    entry = RBT_NEXT(uvm_map_addr, entry)) {
4473 		amap = entry->aref.ar_amap;	/* top layer */
4474 		if (UVM_ET_ISOBJ(entry))
4475 			uobj = entry->object.uvm_obj;
4476 		else
4477 			uobj = NULL;
4478 
4479 		/*
4480 		 * No amap cleaning necessary if:
4481 		 *  - there's no amap
4482 		 *  - we're not deactivating or freeing pages.
4483 		 */
4484 		if (amap == NULL || (flags & (PGO_DEACTIVATE|PGO_FREE)) == 0)
4485 			goto flush_object;
4486 
4487 		if (imut) {
4488 			vm_map_unbusy(map);
4489 			return EPERM;
4490 		}
4491 
4492 		cp_start = MAX(entry->start, start);
4493 		cp_end = MIN(entry->end, end);
4494 
4495 		amap_lock(amap);
4496 		for (; cp_start != cp_end; cp_start += PAGE_SIZE) {
4497 			anon = amap_lookup(&entry->aref,
4498 			    cp_start - entry->start);
4499 			if (anon == NULL)
4500 				continue;
4501 
4502 			KASSERT(anon->an_lock == amap->am_lock);
4503 			pg = anon->an_page;
4504 			if (pg == NULL) {
4505 				continue;
4506 			}
4507 			KASSERT(pg->pg_flags & PQ_ANON);
4508 
4509 			switch (flags & (PGO_CLEANIT|PGO_FREE|PGO_DEACTIVATE)) {
4510 			/*
4511 			 * XXX In these first 3 cases, we always just
4512 			 * XXX deactivate the page.  We may want to
4513 			 * XXX handle the different cases more
4514 			 * XXX specifically, in the future.
4515 			 */
4516 			case PGO_CLEANIT|PGO_FREE:
4517 			case PGO_CLEANIT|PGO_DEACTIVATE:
4518 			case PGO_DEACTIVATE:
4519 deactivate_it:
4520 				/* skip the page if it's wired */
4521 				if (pg->wire_count != 0)
4522 					break;
4523 
4524 				uvm_lock_pageq();
4525 
4526 				KASSERT(pg->uanon == anon);
4527 
4528 				/* zap all mappings for the page. */
4529 				pmap_page_protect(pg, PROT_NONE);
4530 
4531 				/* ...and deactivate the page. */
4532 				uvm_pagedeactivate(pg);
4533 
4534 				uvm_unlock_pageq();
4535 				break;
4536 			case PGO_FREE:
4537 				/*
4538 				 * If there are multiple references to
4539 				 * the amap, just deactivate the page.
4540 				 */
4541 				if (amap_refs(amap) > 1)
4542 					goto deactivate_it;
4543 
4544 				/* XXX skip the page if it's wired */
4545 				if (pg->wire_count != 0) {
4546 					break;
4547 				}
4548 				amap_unadd(&entry->aref,
4549 				    cp_start - entry->start);
4550 				refs = --anon->an_ref;
4551 				if (refs == 0)
4552 					uvm_anfree(anon);
4553 				break;
4554 			default:
4555 				panic("uvm_map_clean: weird flags");
4556 			}
4557 		}
4558 		amap_unlock(amap);
4559 
4560 flush_object:
4561 		cp_start = MAX(entry->start, start);
4562 		cp_end = MIN(entry->end, end);
4563 
4564 		/*
4565 		 * flush pages if we've got a valid backing object.
4566 		 *
4567 		 * Don't PGO_FREE if we don't have write permission
4568 		 * and don't flush if this is a copy-on-write object
4569 		 * since we can't know our permissions on it.
4570 		 */
4571 		if (uobj != NULL &&
4572 		    ((flags & PGO_FREE) == 0 ||
4573 		     ((entry->max_protection & PROT_WRITE) != 0 &&
4574 		      (entry->etype & UVM_ET_COPYONWRITE) == 0))) {
4575 			rw_enter(uobj->vmobjlock, RW_WRITE);
4576 			rv = uobj->pgops->pgo_flush(uobj,
4577 			    cp_start - entry->start + entry->offset,
4578 			    cp_end - entry->start + entry->offset, flags);
4579 			rw_exit(uobj->vmobjlock);
4580 
4581 			if (rv == FALSE)
4582 				error = EFAULT;
4583 		}
4584 	}
4585 
4586 	vm_map_unbusy(map);
4587 	return error;
4588 }
4589 
4590 /*
4591  * UVM_MAP_CLIP_END implementation
4592  */
4593 void
uvm_map_clip_end(struct vm_map * map,struct vm_map_entry * entry,vaddr_t addr)4594 uvm_map_clip_end(struct vm_map *map, struct vm_map_entry *entry, vaddr_t addr)
4595 {
4596 	struct vm_map_entry *tmp;
4597 
4598 	KASSERT(entry->start < addr && VMMAP_FREE_END(entry) > addr);
4599 	tmp = uvm_mapent_alloc(map, 0);
4600 
4601 	/* Invoke splitentry. */
4602 	uvm_map_splitentry(map, entry, tmp, addr);
4603 }
4604 
4605 /*
4606  * UVM_MAP_CLIP_START implementation
4607  *
4608  * Clippers are required to not change the pointers to the entry they are
4609  * clipping on.
4610  * Since uvm_map_splitentry turns the original entry into the lowest
4611  * entry (address wise) we do a swap between the new entry and the original
4612  * entry, prior to calling uvm_map_splitentry.
4613  */
4614 void
uvm_map_clip_start(struct vm_map * map,struct vm_map_entry * entry,vaddr_t addr)4615 uvm_map_clip_start(struct vm_map *map, struct vm_map_entry *entry, vaddr_t addr)
4616 {
4617 	struct vm_map_entry *tmp;
4618 	struct uvm_addr_state *free;
4619 
4620 	/* Unlink original. */
4621 	free = uvm_map_uaddr_e(map, entry);
4622 	uvm_mapent_free_remove(map, free, entry);
4623 	uvm_mapent_addr_remove(map, entry);
4624 
4625 	/* Copy entry. */
4626 	KASSERT(entry->start < addr && VMMAP_FREE_END(entry) > addr);
4627 	tmp = uvm_mapent_alloc(map, 0);
4628 	uvm_mapent_copy(entry, tmp);
4629 
4630 	/* Put new entry in place of original entry. */
4631 	uvm_mapent_addr_insert(map, tmp);
4632 	uvm_mapent_free_insert(map, free, tmp);
4633 
4634 	/* Invoke splitentry. */
4635 	uvm_map_splitentry(map, tmp, entry, addr);
4636 }
4637 
4638 /*
4639  * Boundary fixer.
4640  */
4641 static inline vaddr_t uvm_map_boundfix(vaddr_t, vaddr_t, vaddr_t);
4642 static inline vaddr_t
uvm_map_boundfix(vaddr_t min,vaddr_t max,vaddr_t bound)4643 uvm_map_boundfix(vaddr_t min, vaddr_t max, vaddr_t bound)
4644 {
4645 	return (min < bound && max > bound) ? bound : max;
4646 }
4647 
4648 /*
4649  * Choose free list based on address at start of free space.
4650  *
4651  * The uvm_addr_state returned contains addr and is the first of:
4652  * - uaddr_exe
4653  * - uaddr_brk_stack
4654  * - uaddr_any
4655  */
4656 struct uvm_addr_state*
uvm_map_uaddr(struct vm_map * map,vaddr_t addr)4657 uvm_map_uaddr(struct vm_map *map, vaddr_t addr)
4658 {
4659 	struct uvm_addr_state *uaddr;
4660 	int i;
4661 
4662 	/* Special case the first page, to prevent mmap from returning 0. */
4663 	if (addr < VMMAP_MIN_ADDR)
4664 		return NULL;
4665 
4666 	/* Upper bound for kernel maps at uvm_maxkaddr. */
4667 	if ((map->flags & VM_MAP_ISVMSPACE) == 0) {
4668 		if (addr >= uvm_maxkaddr)
4669 			return NULL;
4670 	}
4671 
4672 	/* Is the address inside the exe-only map? */
4673 	if (map->uaddr_exe != NULL && addr >= map->uaddr_exe->uaddr_minaddr &&
4674 	    addr < map->uaddr_exe->uaddr_maxaddr)
4675 		return map->uaddr_exe;
4676 
4677 	/* Check if the space falls inside brk/stack area. */
4678 	if ((addr >= map->b_start && addr < map->b_end) ||
4679 	    (addr >= map->s_start && addr < map->s_end)) {
4680 		if (map->uaddr_brk_stack != NULL &&
4681 		    addr >= map->uaddr_brk_stack->uaddr_minaddr &&
4682 		    addr < map->uaddr_brk_stack->uaddr_maxaddr) {
4683 			return map->uaddr_brk_stack;
4684 		} else
4685 			return NULL;
4686 	}
4687 
4688 	/*
4689 	 * Check the other selectors.
4690 	 *
4691 	 * These selectors are only marked as the owner, if they have insert
4692 	 * functions.
4693 	 */
4694 	for (i = 0; i < nitems(map->uaddr_any); i++) {
4695 		uaddr = map->uaddr_any[i];
4696 		if (uaddr == NULL)
4697 			continue;
4698 		if (uaddr->uaddr_functions->uaddr_free_insert == NULL)
4699 			continue;
4700 
4701 		if (addr >= uaddr->uaddr_minaddr &&
4702 		    addr < uaddr->uaddr_maxaddr)
4703 			return uaddr;
4704 	}
4705 
4706 	return NULL;
4707 }
4708 
4709 /*
4710  * Choose free list based on address at start of free space.
4711  *
4712  * The uvm_addr_state returned contains addr and is the first of:
4713  * - uaddr_exe
4714  * - uaddr_brk_stack
4715  * - uaddr_any
4716  */
4717 struct uvm_addr_state*
uvm_map_uaddr_e(struct vm_map * map,struct vm_map_entry * entry)4718 uvm_map_uaddr_e(struct vm_map *map, struct vm_map_entry *entry)
4719 {
4720 	return uvm_map_uaddr(map, VMMAP_FREE_START(entry));
4721 }
4722 
4723 /*
4724  * Returns the first free-memory boundary that is crossed by [min-max].
4725  */
4726 vsize_t
uvm_map_boundary(struct vm_map * map,vaddr_t min,vaddr_t max)4727 uvm_map_boundary(struct vm_map *map, vaddr_t min, vaddr_t max)
4728 {
4729 	struct uvm_addr_state	*uaddr;
4730 	int			 i;
4731 
4732 	/* Never return first page. */
4733 	max = uvm_map_boundfix(min, max, VMMAP_MIN_ADDR);
4734 
4735 	/* Treat the maxkaddr special, if the map is a kernel_map. */
4736 	if ((map->flags & VM_MAP_ISVMSPACE) == 0)
4737 		max = uvm_map_boundfix(min, max, uvm_maxkaddr);
4738 
4739 	/* Check for exe-only boundaries. */
4740 	if (map->uaddr_exe != NULL) {
4741 		max = uvm_map_boundfix(min, max, map->uaddr_exe->uaddr_minaddr);
4742 		max = uvm_map_boundfix(min, max, map->uaddr_exe->uaddr_maxaddr);
4743 	}
4744 
4745 	/* Check for exe-only boundaries. */
4746 	if (map->uaddr_brk_stack != NULL) {
4747 		max = uvm_map_boundfix(min, max,
4748 		    map->uaddr_brk_stack->uaddr_minaddr);
4749 		max = uvm_map_boundfix(min, max,
4750 		    map->uaddr_brk_stack->uaddr_maxaddr);
4751 	}
4752 
4753 	/* Check other boundaries. */
4754 	for (i = 0; i < nitems(map->uaddr_any); i++) {
4755 		uaddr = map->uaddr_any[i];
4756 		if (uaddr != NULL) {
4757 			max = uvm_map_boundfix(min, max, uaddr->uaddr_minaddr);
4758 			max = uvm_map_boundfix(min, max, uaddr->uaddr_maxaddr);
4759 		}
4760 	}
4761 
4762 	/* Boundaries at stack and brk() area. */
4763 	max = uvm_map_boundfix(min, max, map->s_start);
4764 	max = uvm_map_boundfix(min, max, map->s_end);
4765 	max = uvm_map_boundfix(min, max, map->b_start);
4766 	max = uvm_map_boundfix(min, max, map->b_end);
4767 
4768 	return max;
4769 }
4770 
4771 /*
4772  * Update map allocation start and end addresses from proc vmspace.
4773  */
4774 void
uvm_map_vmspace_update(struct vm_map * map,struct uvm_map_deadq * dead,int flags)4775 uvm_map_vmspace_update(struct vm_map *map,
4776     struct uvm_map_deadq *dead, int flags)
4777 {
4778 	struct vmspace *vm;
4779 	vaddr_t b_start, b_end, s_start, s_end;
4780 
4781 	KASSERT(map->flags & VM_MAP_ISVMSPACE);
4782 	KASSERT(offsetof(struct vmspace, vm_map) == 0);
4783 
4784 	/*
4785 	 * Derive actual allocation boundaries from vmspace.
4786 	 */
4787 	vm = (struct vmspace *)map;
4788 	b_start = (vaddr_t)vm->vm_daddr;
4789 	b_end   = b_start + BRKSIZ;
4790 	s_start = MIN((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr);
4791 	s_end   = MAX((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr);
4792 #ifdef DIAGNOSTIC
4793 	if ((b_start & (vaddr_t)PAGE_MASK) != 0 ||
4794 	    (b_end & (vaddr_t)PAGE_MASK) != 0 ||
4795 	    (s_start & (vaddr_t)PAGE_MASK) != 0 ||
4796 	    (s_end & (vaddr_t)PAGE_MASK) != 0) {
4797 		panic("uvm_map_vmspace_update: vmspace %p invalid bounds: "
4798 		    "b=0x%lx-0x%lx s=0x%lx-0x%lx",
4799 		    vm, b_start, b_end, s_start, s_end);
4800 	}
4801 #endif
4802 
4803 	if (__predict_true(map->b_start == b_start && map->b_end == b_end &&
4804 	    map->s_start == s_start && map->s_end == s_end))
4805 		return;
4806 
4807 	uvm_map_freelist_update(map, dead, b_start, b_end,
4808 	    s_start, s_end, flags);
4809 }
4810 
4811 /*
4812  * Grow kernel memory.
4813  *
4814  * This function is only called for kernel maps when an allocation fails.
4815  *
4816  * If the map has a gap that is large enough to accommodate alloc_sz, this
4817  * function will make sure map->free will include it.
4818  */
4819 void
uvm_map_kmem_grow(struct vm_map * map,struct uvm_map_deadq * dead,vsize_t alloc_sz,int flags)4820 uvm_map_kmem_grow(struct vm_map *map, struct uvm_map_deadq *dead,
4821     vsize_t alloc_sz, int flags)
4822 {
4823 	vsize_t sz;
4824 	vaddr_t end;
4825 	struct vm_map_entry *entry;
4826 
4827 	/* Kernel memory only. */
4828 	KASSERT((map->flags & VM_MAP_ISVMSPACE) == 0);
4829 	/* Destroy free list. */
4830 	uvm_map_freelist_update_clear(map, dead);
4831 
4832 	/* Include the guard page in the hard minimum requirement of alloc_sz. */
4833 	if (map->flags & VM_MAP_GUARDPAGES)
4834 		alloc_sz += PAGE_SIZE;
4835 
4836 	/*
4837 	 * Grow by ALLOCMUL * alloc_sz, but at least VM_MAP_KSIZE_DELTA.
4838 	 *
4839 	 * Don't handle the case where the multiplication overflows:
4840 	 * if that happens, the allocation is probably too big anyway.
4841 	 */
4842 	sz = MAX(VM_MAP_KSIZE_ALLOCMUL * alloc_sz, VM_MAP_KSIZE_DELTA);
4843 
4844 	/*
4845 	 * Walk forward until a gap large enough for alloc_sz shows up.
4846 	 *
4847 	 * We assume the kernel map has no boundaries.
4848 	 * uvm_maxkaddr may be zero.
4849 	 */
4850 	end = MAX(uvm_maxkaddr, map->min_offset);
4851 	entry = uvm_map_entrybyaddr(&map->addr, end);
4852 	while (entry && entry->fspace < alloc_sz)
4853 		entry = RBT_NEXT(uvm_map_addr, entry);
4854 	if (entry) {
4855 		end = MAX(VMMAP_FREE_START(entry), end);
4856 		end += MIN(sz, map->max_offset - end);
4857 	} else
4858 		end = map->max_offset;
4859 
4860 	/* Reserve pmap entries. */
4861 #ifdef PMAP_GROWKERNEL
4862 	uvm_maxkaddr = pmap_growkernel(end);
4863 #else
4864 	uvm_maxkaddr = MAX(uvm_maxkaddr, end);
4865 #endif
4866 
4867 	/* Rebuild free list. */
4868 	uvm_map_freelist_update_refill(map, flags);
4869 }
4870 
4871 /*
4872  * Freelist update subfunction: unlink all entries from freelists.
4873  */
4874 void
uvm_map_freelist_update_clear(struct vm_map * map,struct uvm_map_deadq * dead)4875 uvm_map_freelist_update_clear(struct vm_map *map, struct uvm_map_deadq *dead)
4876 {
4877 	struct uvm_addr_state *free;
4878 	struct vm_map_entry *entry, *prev, *next;
4879 
4880 	prev = NULL;
4881 	for (entry = RBT_MIN(uvm_map_addr, &map->addr); entry != NULL;
4882 	    entry = next) {
4883 		next = RBT_NEXT(uvm_map_addr, entry);
4884 
4885 		free = uvm_map_uaddr_e(map, entry);
4886 		uvm_mapent_free_remove(map, free, entry);
4887 
4888 		if (prev != NULL && entry->start == entry->end) {
4889 			prev->fspace += VMMAP_FREE_END(entry) - entry->end;
4890 			uvm_mapent_addr_remove(map, entry);
4891 			DEAD_ENTRY_PUSH(dead, entry);
4892 		} else
4893 			prev = entry;
4894 	}
4895 }
4896 
4897 /*
4898  * Freelist update subfunction: refill the freelists with entries.
4899  */
4900 void
uvm_map_freelist_update_refill(struct vm_map * map,int flags)4901 uvm_map_freelist_update_refill(struct vm_map *map, int flags)
4902 {
4903 	struct vm_map_entry *entry;
4904 	vaddr_t min, max;
4905 
4906 	RBT_FOREACH(entry, uvm_map_addr, &map->addr) {
4907 		min = VMMAP_FREE_START(entry);
4908 		max = VMMAP_FREE_END(entry);
4909 		entry->fspace = 0;
4910 
4911 		entry = uvm_map_fix_space(map, entry, min, max, flags);
4912 	}
4913 
4914 	uvm_tree_sanity(map, __FILE__, __LINE__);
4915 }
4916 
4917 /*
4918  * Change {a,b}_{start,end} allocation ranges and associated free lists.
4919  */
4920 void
uvm_map_freelist_update(struct vm_map * map,struct uvm_map_deadq * dead,vaddr_t b_start,vaddr_t b_end,vaddr_t s_start,vaddr_t s_end,int flags)4921 uvm_map_freelist_update(struct vm_map *map, struct uvm_map_deadq *dead,
4922     vaddr_t b_start, vaddr_t b_end, vaddr_t s_start, vaddr_t s_end, int flags)
4923 {
4924 	KDASSERT(b_end >= b_start && s_end >= s_start);
4925 	vm_map_assert_wrlock(map);
4926 
4927 	/* Clear all free lists. */
4928 	uvm_map_freelist_update_clear(map, dead);
4929 
4930 	/* Apply new bounds. */
4931 	map->b_start = b_start;
4932 	map->b_end   = b_end;
4933 	map->s_start = s_start;
4934 	map->s_end   = s_end;
4935 
4936 	/* Refill free lists. */
4937 	uvm_map_freelist_update_refill(map, flags);
4938 }
4939 
4940 /*
4941  * Assign a uvm_addr_state to the specified pointer in vm_map.
4942  *
4943  * May sleep.
4944  */
4945 void
uvm_map_set_uaddr(struct vm_map * map,struct uvm_addr_state ** which,struct uvm_addr_state * newval)4946 uvm_map_set_uaddr(struct vm_map *map, struct uvm_addr_state **which,
4947     struct uvm_addr_state *newval)
4948 {
4949 	struct uvm_map_deadq dead;
4950 
4951 	/* Pointer which must be in this map. */
4952 	KASSERT(which != NULL);
4953 	KASSERT((void*)map <= (void*)(which) &&
4954 	    (void*)(which) < (void*)(map + 1));
4955 
4956 	vm_map_lock(map);
4957 	TAILQ_INIT(&dead);
4958 	uvm_map_freelist_update_clear(map, &dead);
4959 
4960 	uvm_addr_destroy(*which);
4961 	*which = newval;
4962 
4963 	uvm_map_freelist_update_refill(map, 0);
4964 	vm_map_unlock(map);
4965 	uvm_unmap_detach(&dead, 0);
4966 }
4967 
4968 /*
4969  * Correct space insert.
4970  *
4971  * Entry must not be on any freelist.
4972  */
4973 struct vm_map_entry*
uvm_map_fix_space(struct vm_map * map,struct vm_map_entry * entry,vaddr_t min,vaddr_t max,int flags)4974 uvm_map_fix_space(struct vm_map *map, struct vm_map_entry *entry,
4975     vaddr_t min, vaddr_t max, int flags)
4976 {
4977 	struct uvm_addr_state	*free, *entfree;
4978 	vaddr_t			 lmax;
4979 
4980 	KASSERT(entry == NULL || (entry->etype & UVM_ET_FREEMAPPED) == 0);
4981 	KDASSERT(min <= max);
4982 	KDASSERT((entry != NULL && VMMAP_FREE_END(entry) == min) ||
4983 	    min == map->min_offset);
4984 
4985 	UVM_MAP_REQ_WRITE(map);
4986 
4987 	/*
4988 	 * During the function, entfree will always point at the uaddr state
4989 	 * for entry.
4990 	 */
4991 	entfree = (entry == NULL ? NULL :
4992 	    uvm_map_uaddr_e(map, entry));
4993 
4994 	while (min != max) {
4995 		/* Claim guard page for entry. */
4996 		if ((map->flags & VM_MAP_GUARDPAGES) && entry != NULL &&
4997 		    VMMAP_FREE_END(entry) == entry->end &&
4998 		    entry->start != entry->end) {
4999 			if (max - min == 2 * PAGE_SIZE) {
5000 				/*
5001 				 * If the free-space gap is exactly 2 pages,
5002 				 * we make the guard 2 pages instead of 1.
5003 				 * Because in a guarded map, an area needs
5004 				 * at least 2 pages to allocate from:
5005 				 * one page for the allocation and one for
5006 				 * the guard.
5007 				 */
5008 				entry->guard = 2 * PAGE_SIZE;
5009 				min = max;
5010 			} else {
5011 				entry->guard = PAGE_SIZE;
5012 				min += PAGE_SIZE;
5013 			}
5014 			continue;
5015 		}
5016 
5017 		/*
5018 		 * Handle the case where entry has a 2-page guard, but the
5019 		 * space after entry is freed.
5020 		 */
5021 		if (entry != NULL && entry->fspace == 0 &&
5022 		    entry->guard > PAGE_SIZE) {
5023 			entry->guard = PAGE_SIZE;
5024 			min = VMMAP_FREE_START(entry);
5025 		}
5026 
5027 		lmax = uvm_map_boundary(map, min, max);
5028 		free = uvm_map_uaddr(map, min);
5029 
5030 		/*
5031 		 * Entries are merged if they point at the same uvm_free().
5032 		 * Exception to that rule: if min == uvm_maxkaddr, a new
5033 		 * entry is started regardless (otherwise the allocators
5034 		 * will get confused).
5035 		 */
5036 		if (entry != NULL && free == entfree &&
5037 		    !((map->flags & VM_MAP_ISVMSPACE) == 0 &&
5038 		    min == uvm_maxkaddr)) {
5039 			KDASSERT(VMMAP_FREE_END(entry) == min);
5040 			entry->fspace += lmax - min;
5041 		} else {
5042 			/*
5043 			 * Commit entry to free list: it'll not be added to
5044 			 * anymore.
5045 			 * We'll start a new entry and add to that entry
5046 			 * instead.
5047 			 */
5048 			if (entry != NULL)
5049 				uvm_mapent_free_insert(map, entfree, entry);
5050 
5051 			/* New entry for new uaddr. */
5052 			entry = uvm_mapent_alloc(map, flags);
5053 			KDASSERT(entry != NULL);
5054 			entry->end = entry->start = min;
5055 			entry->guard = 0;
5056 			entry->fspace = lmax - min;
5057 			entry->object.uvm_obj = NULL;
5058 			entry->offset = 0;
5059 			entry->etype = 0;
5060 			entry->protection = entry->max_protection = 0;
5061 			entry->inheritance = 0;
5062 			entry->wired_count = 0;
5063 			entry->advice = 0;
5064 			entry->aref.ar_pageoff = 0;
5065 			entry->aref.ar_amap = NULL;
5066 			uvm_mapent_addr_insert(map, entry);
5067 
5068 			entfree = free;
5069 		}
5070 
5071 		min = lmax;
5072 	}
5073 	/* Finally put entry on the uaddr state. */
5074 	if (entry != NULL)
5075 		uvm_mapent_free_insert(map, entfree, entry);
5076 
5077 	return entry;
5078 }
5079 
5080 /*
5081  * MQuery style of allocation.
5082  *
5083  * This allocator searches forward until sufficient space is found to map
5084  * the given size.
5085  *
5086  * XXX: factor in offset (via pmap_prefer) and protection?
5087  */
5088 int
uvm_map_mquery(struct vm_map * map,vaddr_t * addr_p,vsize_t sz,voff_t offset,int flags)5089 uvm_map_mquery(struct vm_map *map, vaddr_t *addr_p, vsize_t sz, voff_t offset,
5090     int flags)
5091 {
5092 	struct vm_map_entry *entry, *last;
5093 	vaddr_t addr;
5094 	vaddr_t tmp, pmap_align, pmap_offset;
5095 	int error;
5096 
5097 	addr = *addr_p;
5098 	vm_map_lock_read(map);
5099 
5100 	/* Configure pmap prefer. */
5101 	if (offset != UVM_UNKNOWN_OFFSET) {
5102 		pmap_align = MAX(PAGE_SIZE, PMAP_PREFER_ALIGN());
5103 		pmap_offset = PMAP_PREFER_OFFSET(offset);
5104 	} else {
5105 		pmap_align = PAGE_SIZE;
5106 		pmap_offset = 0;
5107 	}
5108 
5109 	/* Align address to pmap_prefer unless FLAG_FIXED is set. */
5110 	if (!(flags & UVM_FLAG_FIXED) && offset != UVM_UNKNOWN_OFFSET) {
5111 	  	tmp = (addr & ~(pmap_align - 1)) | pmap_offset;
5112 		if (tmp < addr)
5113 			tmp += pmap_align;
5114 		addr = tmp;
5115 	}
5116 
5117 	/* First, check if the requested range is fully available. */
5118 	entry = uvm_map_entrybyaddr(&map->addr, addr);
5119 	last = NULL;
5120 	if (uvm_map_isavail(map, NULL, &entry, &last, addr, sz)) {
5121 		error = 0;
5122 		goto out;
5123 	}
5124 	if (flags & UVM_FLAG_FIXED) {
5125 		error = EINVAL;
5126 		goto out;
5127 	}
5128 
5129 	error = ENOMEM; /* Default error from here. */
5130 
5131 	/*
5132 	 * At this point, the memory at <addr, sz> is not available.
5133 	 * The reasons are:
5134 	 * [1] it's outside the map,
5135 	 * [2] it starts in used memory (and therefore needs to move
5136 	 *     toward the first free page in entry),
5137 	 * [3] it starts in free memory but bumps into used memory.
5138 	 *
5139 	 * Note that for case [2], the forward moving is handled by the
5140 	 * for loop below.
5141 	 */
5142 	if (entry == NULL) {
5143 		/* [1] Outside the map. */
5144 		if (addr >= map->max_offset)
5145 			goto out;
5146 		else
5147 			entry = RBT_MIN(uvm_map_addr, &map->addr);
5148 	} else if (VMMAP_FREE_START(entry) <= addr) {
5149 		/* [3] Bumped into used memory. */
5150 		entry = RBT_NEXT(uvm_map_addr, entry);
5151 	}
5152 
5153 	/* Test if the next entry is sufficient for the allocation. */
5154 	for (; entry != NULL;
5155 	    entry = RBT_NEXT(uvm_map_addr, entry)) {
5156 		if (entry->fspace == 0)
5157 			continue;
5158 		addr = VMMAP_FREE_START(entry);
5159 
5160 restart:	/* Restart address checks on address change. */
5161 		tmp = (addr & ~(pmap_align - 1)) | pmap_offset;
5162 		if (tmp < addr)
5163 			tmp += pmap_align;
5164 		addr = tmp;
5165 		if (addr >= VMMAP_FREE_END(entry))
5166 			continue;
5167 
5168 		/* Skip brk() allocation addresses. */
5169 		if (addr + sz > map->b_start && addr < map->b_end) {
5170 			if (VMMAP_FREE_END(entry) > map->b_end) {
5171 				addr = map->b_end;
5172 				goto restart;
5173 			} else
5174 				continue;
5175 		}
5176 		/* Skip stack allocation addresses. */
5177 		if (addr + sz > map->s_start && addr < map->s_end) {
5178 			if (VMMAP_FREE_END(entry) > map->s_end) {
5179 				addr = map->s_end;
5180 				goto restart;
5181 			} else
5182 				continue;
5183 		}
5184 
5185 		last = NULL;
5186 		if (uvm_map_isavail(map, NULL, &entry, &last, addr, sz)) {
5187 			error = 0;
5188 			goto out;
5189 		}
5190 	}
5191 
5192 out:
5193 	vm_map_unlock_read(map);
5194 	if (error == 0)
5195 		*addr_p = addr;
5196 	return error;
5197 }
5198 
5199 boolean_t
vm_map_lock_try_ln(struct vm_map * map,char * file,int line)5200 vm_map_lock_try_ln(struct vm_map *map, char *file, int line)
5201 {
5202 	int rv;
5203 
5204 	if (map->flags & VM_MAP_INTRSAFE) {
5205 		if (!mtx_enter_try(&map->mtx))
5206 			return FALSE;
5207 	} else {
5208 		struct proc *busy;
5209 
5210 		mtx_enter(&map->flags_lock);
5211 		busy = map->busy;
5212 		mtx_leave(&map->flags_lock);
5213 		if (busy != NULL && busy != curproc)
5214 			return FALSE;
5215 
5216 		rv = rw_enter(&map->lock, RW_WRITE|RW_NOSLEEP);
5217 		if (rv != 0)
5218 			return FALSE;
5219 
5220 		/* to be sure, to be sure */
5221 		mtx_enter(&map->flags_lock);
5222 		busy = map->busy;
5223 		mtx_leave(&map->flags_lock);
5224 		if (busy != NULL && busy != curproc) {
5225 			rw_exit(&map->lock);
5226 			return FALSE;
5227 		}
5228 	}
5229 
5230 	map->timestamp++;
5231 	LPRINTF(("map   lock: %p (at %s %d)\n", map, file, line));
5232 	uvm_tree_sanity(map, file, line);
5233 	uvm_tree_size_chk(map, file, line);
5234 
5235 	return TRUE;
5236 }
5237 
5238 void
vm_map_lock_ln(struct vm_map * map,char * file,int line)5239 vm_map_lock_ln(struct vm_map *map, char *file, int line)
5240 {
5241 	if ((map->flags & VM_MAP_INTRSAFE) == 0) {
5242 		mtx_enter(&map->flags_lock);
5243 		for (;;) {
5244 			while (map->busy != NULL && map->busy != curproc) {
5245 				map->nbusy++;
5246 				msleep_nsec(&map->busy, &map->flags_lock,
5247 				    PVM, vmmapbsy, INFSLP);
5248 				map->nbusy--;
5249 			}
5250 			mtx_leave(&map->flags_lock);
5251 
5252 			rw_enter_write(&map->lock);
5253 
5254 			/* to be sure, to be sure */
5255 			mtx_enter(&map->flags_lock);
5256 			if (map->busy != NULL && map->busy != curproc) {
5257 				/* go around again */
5258 				rw_exit_write(&map->lock);
5259 			} else {
5260 				/* we won */
5261 				break;
5262 			}
5263 		}
5264 		mtx_leave(&map->flags_lock);
5265 	} else {
5266 		mtx_enter(&map->mtx);
5267 	}
5268 
5269 	if (map->busy != curproc) {
5270 		KASSERT(map->busy == NULL);
5271 		map->timestamp++;
5272 	}
5273 	LPRINTF(("map   lock: %p (at %s %d)\n", map, file, line));
5274 	uvm_tree_sanity(map, file, line);
5275 	uvm_tree_size_chk(map, file, line);
5276 }
5277 
5278 void
vm_map_lock_read_ln(struct vm_map * map,char * file,int line)5279 vm_map_lock_read_ln(struct vm_map *map, char *file, int line)
5280 {
5281 	if ((map->flags & VM_MAP_INTRSAFE) == 0)
5282 		rw_enter_read(&map->lock);
5283 	else
5284 		mtx_enter(&map->mtx);
5285 	LPRINTF(("map   lock: %p (at %s %d)\n", map, file, line));
5286 	uvm_tree_sanity(map, file, line);
5287 	uvm_tree_size_chk(map, file, line);
5288 }
5289 
5290 void
vm_map_unlock_ln(struct vm_map * map,char * file,int line)5291 vm_map_unlock_ln(struct vm_map *map, char *file, int line)
5292 {
5293 	KASSERT(map->busy == NULL || map->busy == curproc);
5294 	uvm_tree_sanity(map, file, line);
5295 	uvm_tree_size_chk(map, file, line);
5296 	LPRINTF(("map unlock: %p (at %s %d)\n", map, file, line));
5297 	if ((map->flags & VM_MAP_INTRSAFE) == 0)
5298 		rw_exit(&map->lock);
5299 	else
5300 		mtx_leave(&map->mtx);
5301 }
5302 
5303 void
vm_map_unlock_read_ln(struct vm_map * map,char * file,int line)5304 vm_map_unlock_read_ln(struct vm_map *map, char *file, int line)
5305 {
5306 	/* XXX: RO */ uvm_tree_sanity(map, file, line);
5307 	/* XXX: RO */ uvm_tree_size_chk(map, file, line);
5308 	LPRINTF(("map unlock: %p (at %s %d)\n", map, file, line));
5309 	if ((map->flags & VM_MAP_INTRSAFE) == 0)
5310 		rw_exit_read(&map->lock);
5311 	else
5312 		mtx_leave(&map->mtx);
5313 }
5314 
5315 void
vm_map_busy_ln(struct vm_map * map,char * file,int line)5316 vm_map_busy_ln(struct vm_map *map, char *file, int line)
5317 {
5318 	KASSERT((map->flags & VM_MAP_INTRSAFE) == 0);
5319 	KASSERT(rw_write_held(&map->lock));
5320 	KASSERT(map->busy == NULL);
5321 
5322 	mtx_enter(&map->flags_lock);
5323 	map->busy = curproc;
5324 	mtx_leave(&map->flags_lock);
5325 }
5326 
5327 void
vm_map_unbusy_ln(struct vm_map * map,char * file,int line)5328 vm_map_unbusy_ln(struct vm_map *map, char *file, int line)
5329 {
5330 	unsigned int nbusy;
5331 
5332 	KASSERT((map->flags & VM_MAP_INTRSAFE) == 0);
5333 	KASSERT(map->busy == curproc);
5334 
5335 	mtx_enter(&map->flags_lock);
5336 	nbusy = map->nbusy;
5337 	map->busy = NULL;
5338 	mtx_leave(&map->flags_lock);
5339 
5340 	if (nbusy > 0)
5341 		wakeup(&map->busy);
5342 }
5343 
5344 void
vm_map_assert_anylock_ln(struct vm_map * map,char * file,int line)5345 vm_map_assert_anylock_ln(struct vm_map *map, char *file, int line)
5346 {
5347 	LPRINTF(("map assert read or write locked: %p (at %s %d)\n", map, file, line));
5348 	if ((map->flags & VM_MAP_INTRSAFE) == 0)
5349 		rw_assert_anylock(&map->lock);
5350 	else
5351 		MUTEX_ASSERT_LOCKED(&map->mtx);
5352 }
5353 
5354 void
vm_map_assert_wrlock_ln(struct vm_map * map,char * file,int line)5355 vm_map_assert_wrlock_ln(struct vm_map *map, char *file, int line)
5356 {
5357 	LPRINTF(("map assert write locked: %p (at %s %d)\n", map, file, line));
5358 	if ((map->flags & VM_MAP_INTRSAFE) == 0) {
5359 		splassert(IPL_NONE);
5360 		rw_assert_wrlock(&map->lock);
5361 	} else
5362 		MUTEX_ASSERT_LOCKED(&map->mtx);
5363 }
5364 
5365 #ifndef SMALL_KERNEL
5366 int
uvm_map_fill_vmmap(struct vm_map * map,struct kinfo_vmentry * kve,size_t * lenp)5367 uvm_map_fill_vmmap(struct vm_map *map, struct kinfo_vmentry *kve,
5368     size_t *lenp)
5369 {
5370 	struct vm_map_entry *entry;
5371 	vaddr_t start;
5372 	int cnt, maxcnt, error = 0;
5373 
5374 	KASSERT(*lenp > 0);
5375 	KASSERT((*lenp % sizeof(*kve)) == 0);
5376 	cnt = 0;
5377 	maxcnt = *lenp / sizeof(*kve);
5378 	KASSERT(maxcnt > 0);
5379 
5380 	/*
5381 	 * Return only entries whose address is above the given base
5382 	 * address.  This allows userland to iterate without knowing the
5383 	 * number of entries beforehand.
5384 	 */
5385 	start = (vaddr_t)kve[0].kve_start;
5386 
5387 	vm_map_lock(map);
5388 	RBT_FOREACH(entry, uvm_map_addr, &map->addr) {
5389 		if (cnt == maxcnt) {
5390 			error = ENOMEM;
5391 			break;
5392 		}
5393 		if (start != 0 && entry->start < start)
5394 			continue;
5395 		kve->kve_start = entry->start;
5396 		kve->kve_end = entry->end;
5397 		kve->kve_guard = entry->guard;
5398 		kve->kve_fspace = entry->fspace;
5399 		kve->kve_fspace_augment = entry->fspace_augment;
5400 		kve->kve_offset = entry->offset;
5401 		kve->kve_wired_count = entry->wired_count;
5402 		kve->kve_etype = entry->etype;
5403 		kve->kve_protection = entry->protection;
5404 		kve->kve_max_protection = entry->max_protection;
5405 		kve->kve_advice = entry->advice;
5406 		kve->kve_inheritance = entry->inheritance;
5407 		kve->kve_flags = entry->flags;
5408 		kve++;
5409 		cnt++;
5410 	}
5411 	vm_map_unlock(map);
5412 
5413 	KASSERT(cnt <= maxcnt);
5414 
5415 	*lenp = sizeof(*kve) * cnt;
5416 	return error;
5417 }
5418 #endif
5419 
5420 
5421 RBT_GENERATE_AUGMENT(uvm_map_addr, vm_map_entry, daddrs.addr_entry,
5422     uvm_mapentry_addrcmp, uvm_map_addr_augment);
5423 
5424 
5425 /*
5426  * MD code: vmspace allocator setup.
5427  */
5428 
5429 #ifdef __i386__
5430 void
uvm_map_setup_md(struct vm_map * map)5431 uvm_map_setup_md(struct vm_map *map)
5432 {
5433 	vaddr_t		min, max;
5434 
5435 	min = map->min_offset;
5436 	max = map->max_offset;
5437 
5438 	/*
5439 	 * Ensure the selectors will not try to manage page 0;
5440 	 * it's too special.
5441 	 */
5442 	if (min < VMMAP_MIN_ADDR)
5443 		min = VMMAP_MIN_ADDR;
5444 
5445 #if 0	/* Cool stuff, not yet */
5446 	/* Executable code is special. */
5447 	map->uaddr_exe = uaddr_rnd_create(min, I386_MAX_EXE_ADDR);
5448 	/* Place normal allocations beyond executable mappings. */
5449 	map->uaddr_any[3] = uaddr_pivot_create(2 * I386_MAX_EXE_ADDR, max);
5450 #else	/* Crappy stuff, for now */
5451 	map->uaddr_any[0] = uaddr_rnd_create(min, max);
5452 #endif
5453 
5454 #ifndef SMALL_KERNEL
5455 	map->uaddr_brk_stack = uaddr_stack_brk_create(min, max);
5456 #endif /* !SMALL_KERNEL */
5457 }
5458 #elif __LP64__
5459 void
uvm_map_setup_md(struct vm_map * map)5460 uvm_map_setup_md(struct vm_map *map)
5461 {
5462 	vaddr_t		min, max;
5463 
5464 	min = map->min_offset;
5465 	max = map->max_offset;
5466 
5467 	/*
5468 	 * Ensure the selectors will not try to manage page 0;
5469 	 * it's too special.
5470 	 */
5471 	if (min < VMMAP_MIN_ADDR)
5472 		min = VMMAP_MIN_ADDR;
5473 
5474 #if 0	/* Cool stuff, not yet */
5475 	map->uaddr_any[3] = uaddr_pivot_create(MAX(min, 0x100000000ULL), max);
5476 #else	/* Crappy stuff, for now */
5477 	map->uaddr_any[0] = uaddr_rnd_create(min, max);
5478 #endif
5479 
5480 #ifndef SMALL_KERNEL
5481 	map->uaddr_brk_stack = uaddr_stack_brk_create(min, max);
5482 #endif /* !SMALL_KERNEL */
5483 }
5484 #else	/* non-i386, 32 bit */
5485 void
uvm_map_setup_md(struct vm_map * map)5486 uvm_map_setup_md(struct vm_map *map)
5487 {
5488 	vaddr_t		min, max;
5489 
5490 	min = map->min_offset;
5491 	max = map->max_offset;
5492 
5493 	/*
5494 	 * Ensure the selectors will not try to manage page 0;
5495 	 * it's too special.
5496 	 */
5497 	if (min < VMMAP_MIN_ADDR)
5498 		min = VMMAP_MIN_ADDR;
5499 
5500 #if 0	/* Cool stuff, not yet */
5501 	map->uaddr_any[3] = uaddr_pivot_create(min, max);
5502 #else	/* Crappy stuff, for now */
5503 	map->uaddr_any[0] = uaddr_rnd_create(min, max);
5504 #endif
5505 
5506 #ifndef SMALL_KERNEL
5507 	map->uaddr_brk_stack = uaddr_stack_brk_create(min, max);
5508 #endif /* !SMALL_KERNEL */
5509 }
5510 #endif
5511