xref: /openbsd/sys/uvm/uvm_addr.c (revision 8294d648)
1 /*	$OpenBSD: uvm_addr.c,v 1.35 2024/06/07 06:04:43 jsg Exp $	*/
2 
3 /*
4  * Copyright (c) 2011 Ariane van der Steldt <ariane@stack.nl>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 
19 /* #define DEBUG */
20 
21 #include <sys/param.h>
22 #include <sys/systm.h>
23 #include <uvm/uvm.h>
24 #include <uvm/uvm_addr.h>
25 #include <sys/pool.h>
26 
27 /* Number of pivots in pivot allocator. */
28 #define NUM_PIVOTS		16
29 /*
30  * Max number (inclusive) of pages the pivot allocator
31  * will place between allocations.
32  *
33  * The uaddr_pivot_random() function attempts to bias towards
34  * small space between allocations, so putting a large number here is fine.
35  */
36 #define PIVOT_RND		8
37 /*
38  * Number of allocations that a pivot can supply before expiring.
39  * When a pivot expires, a new pivot has to be found.
40  *
41  * Must be at least 1.
42  */
43 #define PIVOT_EXPIRE		1024
44 
45 
46 /* Pool with uvm_addr_state structures. */
47 struct pool uaddr_pool;
48 struct pool uaddr_bestfit_pool;
49 struct pool uaddr_pivot_pool;
50 struct pool uaddr_rnd_pool;
51 
52 /* uvm_addr state for bestfit selector. */
53 struct uaddr_bestfit_state {
54 	struct uvm_addr_state		 ubf_uaddr;
55 	struct uaddr_free_rbtree	 ubf_free;
56 };
57 
58 /* uvm_addr state for rnd selector. */
59 struct uaddr_rnd_state {
60 	struct uvm_addr_state		 ur_uaddr;
61 #if 0
62 	TAILQ_HEAD(, vm_map_entry)	 ur_free;
63 #endif
64 };
65 
66 /*
67  * Definition of a pivot in pivot selector.
68  */
69 struct uaddr_pivot {
70 	vaddr_t				 addr;	/* End of prev. allocation. */
71 	int				 expire;/* Best before date. */
72 	int				 dir;	/* Direction. */
73 	struct vm_map_entry		*entry; /* Will contain next alloc. */
74 };
75 /* uvm_addr state for pivot selector. */
76 struct uaddr_pivot_state {
77 	struct uvm_addr_state		 up_uaddr;
78 
79 	/* Free space tree, for fast pivot selection. */
80 	struct uaddr_free_rbtree	 up_free;
81 
82 	/* List of pivots. The pointers point to after the last allocation. */
83 	struct uaddr_pivot		 up_pivots[NUM_PIVOTS];
84 };
85 
86 /* Forward declaration (see below). */
87 extern const struct uvm_addr_functions uaddr_kernel_functions;
88 struct uvm_addr_state uaddr_kbootstrap;
89 
90 
91 /*
92  * Support functions.
93  */
94 
95 #ifndef SMALL_KERNEL
96 struct vm_map_entry	*uvm_addr_entrybyspace(struct uaddr_free_rbtree*,
97 			    vsize_t);
98 #endif /* !SMALL_KERNEL */
99 void			 uaddr_destroy(struct uvm_addr_state *);
100 void			 uaddr_kbootstrap_destroy(struct uvm_addr_state *);
101 void			 uaddr_rnd_destroy(struct uvm_addr_state *);
102 void			 uaddr_bestfit_destroy(struct uvm_addr_state *);
103 void			 uaddr_pivot_destroy(struct uvm_addr_state *);
104 
105 #if 0
106 int			 uaddr_lin_select(struct vm_map *,
107 			    struct uvm_addr_state *, struct vm_map_entry **,
108 			    vaddr_t *, vsize_t, vaddr_t, vaddr_t, vm_prot_t,
109 			    vaddr_t);
110 #endif
111 int			 uaddr_kbootstrap_select(struct vm_map *,
112 			    struct uvm_addr_state *, struct vm_map_entry **,
113 			    vaddr_t *, vsize_t, vaddr_t, vaddr_t, vm_prot_t,
114 			    vaddr_t);
115 int			 uaddr_rnd_select(struct vm_map *,
116 			    struct uvm_addr_state *, struct vm_map_entry **,
117 			    vaddr_t *, vsize_t, vaddr_t, vaddr_t, vm_prot_t,
118 			    vaddr_t);
119 int			 uaddr_bestfit_select(struct vm_map *,
120 			    struct uvm_addr_state*, struct vm_map_entry **,
121 			    vaddr_t *, vsize_t, vaddr_t, vaddr_t, vm_prot_t,
122 			    vaddr_t);
123 #ifndef SMALL_KERNEL
124 int			 uaddr_pivot_select(struct vm_map *,
125 			    struct uvm_addr_state *, struct vm_map_entry **,
126 			    vaddr_t *, vsize_t, vaddr_t, vaddr_t, vm_prot_t,
127 			    vaddr_t);
128 int			 uaddr_stack_brk_select(struct vm_map *,
129 			    struct uvm_addr_state *, struct vm_map_entry **,
130 			    vaddr_t *, vsize_t, vaddr_t, vaddr_t, vm_prot_t,
131 			    vaddr_t);
132 #endif /* !SMALL_KERNEL */
133 
134 void			 uaddr_rnd_insert(struct vm_map *,
135 			    struct uvm_addr_state *, struct vm_map_entry *);
136 void			 uaddr_rnd_remove(struct vm_map *,
137 			    struct uvm_addr_state *, struct vm_map_entry *);
138 void			 uaddr_bestfit_insert(struct vm_map *,
139 			    struct uvm_addr_state *, struct vm_map_entry *);
140 void			 uaddr_bestfit_remove(struct vm_map *,
141 			    struct uvm_addr_state *, struct vm_map_entry *);
142 void			 uaddr_pivot_insert(struct vm_map *,
143 			    struct uvm_addr_state *, struct vm_map_entry *);
144 void			 uaddr_pivot_remove(struct vm_map *,
145 			    struct uvm_addr_state *, struct vm_map_entry *);
146 
147 #ifndef SMALL_KERNEL
148 vsize_t			 uaddr_pivot_random(void);
149 int			 uaddr_pivot_newpivot(struct vm_map *,
150 			    struct uaddr_pivot_state *, struct uaddr_pivot *,
151 			    struct vm_map_entry **, vaddr_t *,
152 			    vsize_t, vaddr_t, vaddr_t, vsize_t, vsize_t);
153 #endif /* !SMALL_KERNEL */
154 
155 #if defined(DEBUG) || defined(DDB)
156 void			 uaddr_pivot_print(struct uvm_addr_state *, boolean_t,
157 			    int (*)(const char *, ...));
158 #if 0
159 void			 uaddr_rnd_print(struct uvm_addr_state *, boolean_t,
160 			    int (*)(const char *, ...));
161 #endif
162 #endif /* DEBUG || DDB */
163 
164 
165 #ifndef SMALL_KERNEL
166 /*
167  * Find smallest entry in tree that will fit sz bytes.
168  */
169 struct vm_map_entry *
uvm_addr_entrybyspace(struct uaddr_free_rbtree * free,vsize_t sz)170 uvm_addr_entrybyspace(struct uaddr_free_rbtree *free, vsize_t sz)
171 {
172 	struct vm_map_entry	*tmp, *res;
173 
174 	tmp = RBT_ROOT(uaddr_free_rbtree, free);
175 	res = NULL;
176 	while (tmp) {
177 		if (tmp->fspace >= sz) {
178 			res = tmp;
179 			tmp = RBT_LEFT(uaddr_free_rbtree, tmp);
180 		} else if (tmp->fspace < sz)
181 			tmp = RBT_RIGHT(uaddr_free_rbtree, tmp);
182 	}
183 	return res;
184 }
185 #endif /* !SMALL_KERNEL */
186 
187 static inline vaddr_t
uvm_addr_align_forward(vaddr_t addr,vaddr_t align,vaddr_t offset)188 uvm_addr_align_forward(vaddr_t addr, vaddr_t align, vaddr_t offset)
189 {
190 	vaddr_t adjusted;
191 
192 	KASSERT(offset < align || (align == 0 && offset == 0));
193 	KASSERT((align & (align - 1)) == 0);
194 	KASSERT((offset & PAGE_MASK) == 0);
195 
196 	align = MAX(align, PAGE_SIZE);
197 	adjusted = addr & ~(align - 1);
198 	adjusted += offset;
199 	return (adjusted < addr ? adjusted + align : adjusted);
200 }
201 
202 static inline vaddr_t
uvm_addr_align_backward(vaddr_t addr,vaddr_t align,vaddr_t offset)203 uvm_addr_align_backward(vaddr_t addr, vaddr_t align, vaddr_t offset)
204 {
205 	vaddr_t adjusted;
206 
207 	KASSERT(offset < align || (align == 0 && offset == 0));
208 	KASSERT((align & (align - 1)) == 0);
209 	KASSERT((offset & PAGE_MASK) == 0);
210 
211 	align = MAX(align, PAGE_SIZE);
212 	adjusted = addr & ~(align - 1);
213 	adjusted += offset;
214 	return (adjusted > addr ? adjusted - align : adjusted);
215 }
216 
217 /*
218  * Try to fit the requested space into the entry.
219  */
220 int
uvm_addr_fitspace(vaddr_t * min_result,vaddr_t * max_result,vaddr_t low_addr,vaddr_t high_addr,vsize_t sz,vaddr_t align,vaddr_t offset,vsize_t before_gap,vsize_t after_gap)221 uvm_addr_fitspace(vaddr_t *min_result, vaddr_t *max_result,
222     vaddr_t low_addr, vaddr_t high_addr, vsize_t sz,
223     vaddr_t align, vaddr_t offset,
224     vsize_t before_gap, vsize_t after_gap)
225 {
226 	vaddr_t	tmp;
227 	vsize_t	fspace;
228 
229 	if (low_addr > high_addr)
230 		return ENOMEM;
231 	fspace = high_addr - low_addr;
232 	if (fspace < before_gap + after_gap)
233 		return ENOMEM;
234 	if (fspace - before_gap - after_gap < sz)
235 		return ENOMEM;
236 
237 	/*
238 	 * Calculate lowest address.
239 	 */
240 	low_addr += before_gap;
241 	low_addr = uvm_addr_align_forward(tmp = low_addr, align, offset);
242 	if (low_addr < tmp)	/* Overflow during alignment. */
243 		return ENOMEM;
244 	if (high_addr - after_gap - sz < low_addr)
245 		return ENOMEM;
246 
247 	/*
248 	 * Calculate highest address.
249 	 */
250 	high_addr -= after_gap + sz;
251 	high_addr = uvm_addr_align_backward(tmp = high_addr, align, offset);
252 	if (high_addr > tmp)	/* Overflow during alignment. */
253 		return ENOMEM;
254 	if (low_addr > high_addr)
255 		return ENOMEM;
256 
257 	*min_result = low_addr;
258 	*max_result = high_addr;
259 	return 0;
260 }
261 
262 
263 /*
264  * Initialize uvm_addr.
265  */
266 void
uvm_addr_init(void)267 uvm_addr_init(void)
268 {
269 	pool_init(&uaddr_pool, sizeof(struct uvm_addr_state), 0,
270 	    IPL_VM, PR_WAITOK, "uaddr", NULL);
271 	pool_init(&uaddr_bestfit_pool, sizeof(struct uaddr_bestfit_state), 0,
272 	    IPL_VM, PR_WAITOK, "uaddrbest", NULL);
273 	pool_init(&uaddr_pivot_pool, sizeof(struct uaddr_pivot_state), 0,
274 	    IPL_VM, PR_WAITOK, "uaddrpivot", NULL);
275 	pool_init(&uaddr_rnd_pool, sizeof(struct uaddr_rnd_state), 0,
276 	    IPL_VM, PR_WAITOK, "uaddrrnd", NULL);
277 
278 	uaddr_kbootstrap.uaddr_minaddr = PAGE_SIZE;
279 	uaddr_kbootstrap.uaddr_maxaddr = -(vaddr_t)PAGE_SIZE;
280 	uaddr_kbootstrap.uaddr_functions = &uaddr_kernel_functions;
281 }
282 
283 /*
284  * Invoke destructor function of uaddr.
285  */
286 void
uvm_addr_destroy(struct uvm_addr_state * uaddr)287 uvm_addr_destroy(struct uvm_addr_state *uaddr)
288 {
289 	if (uaddr)
290 		(*uaddr->uaddr_functions->uaddr_destroy)(uaddr);
291 }
292 
293 /*
294  * Move address forward to satisfy align, offset.
295  */
296 vaddr_t
uvm_addr_align(vaddr_t addr,vaddr_t align,vaddr_t offset)297 uvm_addr_align(vaddr_t addr, vaddr_t align, vaddr_t offset)
298 {
299 	vaddr_t result = (addr & ~(align - 1)) + offset;
300 	if (result < addr)
301 		result += align;
302 	return result;
303 }
304 
305 /*
306  * Move address backwards to satisfy align, offset.
307  */
308 vaddr_t
uvm_addr_align_back(vaddr_t addr,vaddr_t align,vaddr_t offset)309 uvm_addr_align_back(vaddr_t addr, vaddr_t align, vaddr_t offset)
310 {
311 	vaddr_t result = (addr & ~(align - 1)) + offset;
312 	if (result > addr)
313 		result -= align;
314 	return result;
315 }
316 
317 /*
318  * Directional first fit.
319  *
320  * Do a linear search for free space, starting at addr in entry.
321  * direction ==  1: search forward
322  * direction == -1: search backward
323  *
324  * Output: low <= addr <= high and entry will contain addr.
325  * 0 will be returned if no space is available.
326  *
327  * gap describes the space that must appear between the preceding entry.
328  */
329 int
uvm_addr_linsearch(struct vm_map * map,struct uvm_addr_state * uaddr,struct vm_map_entry ** entry_out,vaddr_t * addr_out,vaddr_t hint,vsize_t sz,vaddr_t align,vaddr_t offset,int direction,vaddr_t low,vaddr_t high,vsize_t before_gap,vsize_t after_gap)330 uvm_addr_linsearch(struct vm_map *map, struct uvm_addr_state *uaddr,
331     struct vm_map_entry **entry_out, vaddr_t *addr_out,
332     vaddr_t hint, vsize_t sz, vaddr_t align, vaddr_t offset,
333     int direction, vaddr_t low, vaddr_t high,
334     vsize_t before_gap, vsize_t after_gap)
335 {
336 	struct vm_map_entry	*entry;
337 	vaddr_t			 low_addr, high_addr;
338 
339 	KASSERT(entry_out != NULL && addr_out != NULL);
340 	KASSERT(direction == -1 || direction == 1);
341 	KASSERT((hint & PAGE_MASK) == 0 && (high & PAGE_MASK) == 0 &&
342 	    (low & PAGE_MASK) == 0 &&
343 	    (before_gap & PAGE_MASK) == 0 && (after_gap & PAGE_MASK) == 0);
344 	KASSERT(high + sz > high); /* Check for overflow. */
345 
346 	/*
347 	 * Hint magic.
348 	 */
349 	if (hint == 0)
350 		hint = (direction == 1 ? low : high);
351 	else if (hint > high) {
352 		if (direction != -1)
353 			return ENOMEM;
354 		hint = high;
355 	} else if (hint < low) {
356 		if (direction != 1)
357 			return ENOMEM;
358 		hint = low;
359 	}
360 
361 	for (entry = uvm_map_entrybyaddr(&map->addr,
362 	    hint - (direction == -1 ? 1 : 0)); entry != NULL;
363 	    entry = (direction == 1 ?
364 	    RBT_NEXT(uvm_map_addr, entry) :
365 	    RBT_PREV(uvm_map_addr, entry))) {
366 		if ((direction == 1 && VMMAP_FREE_START(entry) > high) ||
367 		    (direction == -1 && VMMAP_FREE_END(entry) < low)) {
368 			break;
369 		}
370 
371 		if (uvm_addr_fitspace(&low_addr, &high_addr,
372 		    MAX(low, VMMAP_FREE_START(entry)),
373 		    MIN(high, VMMAP_FREE_END(entry)),
374 		    sz, align, offset, before_gap, after_gap) == 0) {
375 			*entry_out = entry;
376 			if (hint >= low_addr && hint <= high_addr) {
377 				*addr_out = hint;
378 			} else {
379 				*addr_out = (direction == 1 ?
380 				    low_addr : high_addr);
381 			}
382 			return 0;
383 		}
384 	}
385 
386 	return ENOMEM;
387 }
388 
389 /*
390  * Invoke address selector of uaddr.
391  * uaddr may be NULL, in which case the algorithm will fail with ENOMEM.
392  *
393  * Will invoke uvm_addr_isavail to fill in last_out.
394  */
395 int
uvm_addr_invoke(struct vm_map * map,struct uvm_addr_state * uaddr,struct vm_map_entry ** entry_out,struct vm_map_entry ** last_out,vaddr_t * addr_out,vsize_t sz,vaddr_t align,vaddr_t offset,vm_prot_t prot,vaddr_t hint)396 uvm_addr_invoke(struct vm_map *map, struct uvm_addr_state *uaddr,
397     struct vm_map_entry **entry_out, struct vm_map_entry **last_out,
398     vaddr_t *addr_out,
399     vsize_t sz, vaddr_t align, vaddr_t offset, vm_prot_t prot, vaddr_t hint)
400 {
401 	int error;
402 
403 	if (uaddr == NULL)
404 		return ENOMEM;
405 
406 	hint &= ~((vaddr_t)PAGE_MASK);
407 	if (hint != 0 &&
408 	    !(hint >= uaddr->uaddr_minaddr && hint < uaddr->uaddr_maxaddr))
409 		return ENOMEM;
410 
411 	vm_map_assert_anylock(map);
412 
413 	error = (*uaddr->uaddr_functions->uaddr_select)(map, uaddr,
414 	    entry_out, addr_out, sz, align, offset, prot, hint);
415 
416 	if (error == 0) {
417 		KASSERT(*entry_out != NULL);
418 		*last_out = NULL;
419 		if (!uvm_map_isavail(map, uaddr, entry_out, last_out,
420 		    *addr_out, sz)) {
421 			panic("uvm_addr_invoke: address selector %p "
422 			    "(%s 0x%lx-0x%lx) "
423 			    "returned unavailable address 0x%lx sz 0x%lx",
424 			    uaddr, uaddr->uaddr_functions->uaddr_name,
425 			    uaddr->uaddr_minaddr, uaddr->uaddr_maxaddr,
426 			    *addr_out, sz);
427 		}
428 	}
429 
430 	return error;
431 }
432 
433 #if defined(DEBUG) || defined(DDB)
434 void
uvm_addr_print(struct uvm_addr_state * uaddr,const char * slot,boolean_t full,int (* pr)(const char *,...))435 uvm_addr_print(struct uvm_addr_state *uaddr, const char *slot, boolean_t full,
436     int (*pr)(const char *, ...))
437 {
438 	if (uaddr == NULL) {
439 		(*pr)("- uvm_addr %s: NULL\n", slot);
440 		return;
441 	}
442 
443 	(*pr)("- uvm_addr %s: %p (%s 0x%lx-0x%lx)\n", slot, uaddr,
444 	    uaddr->uaddr_functions->uaddr_name,
445 	    uaddr->uaddr_minaddr, uaddr->uaddr_maxaddr);
446 	if (uaddr->uaddr_functions->uaddr_print == NULL)
447 		return;
448 
449 	(*uaddr->uaddr_functions->uaddr_print)(uaddr, full, pr);
450 }
451 #endif /* DEBUG || DDB */
452 
453 /*
454  * Destroy a uvm_addr_state structure.
455  * The uaddr must have been previously allocated from uaddr_state_pool.
456  */
457 void
uaddr_destroy(struct uvm_addr_state * uaddr)458 uaddr_destroy(struct uvm_addr_state *uaddr)
459 {
460 	pool_put(&uaddr_pool, uaddr);
461 }
462 
463 
464 #if 0
465 /*
466  * Linear allocator.
467  * This allocator uses a first-fit algorithm.
468  *
469  * If hint is set, search will start at the hint position.
470  * Only searches forward.
471  */
472 
473 const struct uvm_addr_functions uaddr_lin_functions = {
474 	.uaddr_select = &uaddr_lin_select,
475 	.uaddr_destroy = &uaddr_destroy,
476 	.uaddr_name = "uaddr_lin"
477 };
478 
479 struct uvm_addr_state *
480 uaddr_lin_create(vaddr_t minaddr, vaddr_t maxaddr)
481 {
482 	struct uvm_addr_state *uaddr;
483 
484 	uaddr = pool_get(&uaddr_pool, PR_WAITOK);
485 	uaddr->uaddr_minaddr = minaddr;
486 	uaddr->uaddr_maxaddr = maxaddr;
487 	uaddr->uaddr_functions = &uaddr_lin_functions;
488 	return uaddr;
489 }
490 
491 int
492 uaddr_lin_select(struct vm_map *map, struct uvm_addr_state *uaddr,
493     struct vm_map_entry **entry_out, vaddr_t *addr_out,
494     vsize_t sz, vaddr_t align, vaddr_t offset,
495     vm_prot_t prot, vaddr_t hint)
496 {
497 	vaddr_t guard_sz;
498 
499 	/*
500 	 * Deal with guardpages: search for space with one extra page.
501 	 */
502 	guard_sz = ((map->flags & VM_MAP_GUARDPAGES) == 0 ? 0 : PAGE_SIZE);
503 
504 	if (uaddr->uaddr_maxaddr - uaddr->uaddr_minaddr - guard_sz < sz)
505 		return ENOMEM;
506 	return uvm_addr_linsearch(map, uaddr, entry_out, addr_out, 0, sz,
507 	    align, offset, 1, uaddr->uaddr_minaddr, uaddr->uaddr_maxaddr - sz,
508 	    0, guard_sz);
509 }
510 #endif
511 
512 /*
513  * Randomized allocator.
514  * This allocator use uvm_map_hint to acquire a random address and searches
515  * from there.
516  */
517 
518 const struct uvm_addr_functions uaddr_rnd_functions = {
519 	.uaddr_select = &uaddr_rnd_select,
520 	.uaddr_free_insert = &uaddr_rnd_insert,
521 	.uaddr_free_remove = &uaddr_rnd_remove,
522 	.uaddr_destroy = &uaddr_rnd_destroy,
523 #if defined(DEBUG) || defined(DDB)
524 #if 0
525 	.uaddr_print = &uaddr_rnd_print,
526 #endif
527 #endif /* DEBUG || DDB */
528 	.uaddr_name = "uaddr_rnd"
529 };
530 
531 struct uvm_addr_state *
uaddr_rnd_create(vaddr_t minaddr,vaddr_t maxaddr)532 uaddr_rnd_create(vaddr_t minaddr, vaddr_t maxaddr)
533 {
534 	struct uaddr_rnd_state *uaddr;
535 
536 	uaddr = pool_get(&uaddr_rnd_pool, PR_WAITOK);
537 	uaddr->ur_uaddr.uaddr_minaddr = minaddr;
538 	uaddr->ur_uaddr.uaddr_maxaddr = maxaddr;
539 	uaddr->ur_uaddr.uaddr_functions = &uaddr_rnd_functions;
540 #if 0
541 	TAILQ_INIT(&uaddr->ur_free);
542 #endif
543 	return &uaddr->ur_uaddr;
544 }
545 
546 int
uaddr_rnd_select(struct vm_map * map,struct uvm_addr_state * uaddr,struct vm_map_entry ** entry_out,vaddr_t * addr_out,vsize_t sz,vaddr_t align,vaddr_t offset,vm_prot_t prot,vaddr_t hint)547 uaddr_rnd_select(struct vm_map *map, struct uvm_addr_state *uaddr,
548     struct vm_map_entry **entry_out, vaddr_t *addr_out,
549     vsize_t sz, vaddr_t align, vaddr_t offset,
550     vm_prot_t prot, vaddr_t hint)
551 {
552 	struct vmspace		*vm;
553 	vaddr_t			 minaddr, maxaddr;
554 	vaddr_t			 guard_sz;
555 	vaddr_t			 low_addr, high_addr;
556 	struct vm_map_entry	*entry, *next;
557 	vsize_t			 before_gap, after_gap;
558 	vaddr_t			 tmp;
559 
560 	KASSERT((map->flags & VM_MAP_ISVMSPACE) != 0);
561 	vm = (struct vmspace *)map;
562 
563 	/* Deal with guardpages: search for space with one extra page. */
564 	guard_sz = ((map->flags & VM_MAP_GUARDPAGES) == 0 ? 0 : PAGE_SIZE);
565 
566 	if (uaddr->uaddr_maxaddr - guard_sz < sz)
567 		return ENOMEM;
568 	minaddr = uvm_addr_align_forward(uaddr->uaddr_minaddr, align, offset);
569 	maxaddr = uvm_addr_align_backward(uaddr->uaddr_maxaddr - sz - guard_sz,
570 	    align, offset);
571 
572 	/* Quick fail if the allocation won't fit. */
573 	if (minaddr >= maxaddr)
574 		return ENOMEM;
575 
576 	/* Select a hint. */
577 	if (hint == 0)
578 		hint = uvm_map_hint(vm, prot, minaddr, maxaddr);
579 	/* Clamp hint to uaddr range. */
580 	hint = MIN(MAX(hint, minaddr), maxaddr);
581 
582 	/* Align hint to align,offset parameters. */
583 	tmp = hint;
584 	hint = uvm_addr_align_forward(tmp, align, offset);
585 	/* Check for overflow during alignment. */
586 	if (hint < tmp || hint > maxaddr)
587 		return ENOMEM; /* Compatibility mode: never look backwards. */
588 
589 	before_gap = 0;
590 	after_gap = guard_sz;
591 	hint -= MIN(hint, before_gap);
592 
593 	/*
594 	 * Use the augmented address tree to look up the first entry
595 	 * at or after hint with sufficient space.
596 	 *
597 	 * This code is the original optimized code, but will fail if the
598 	 * subtree it looks at does have sufficient space, but fails to meet
599 	 * the align constraint.
600 	 *
601 	 * Guard: subtree is not exhausted and max(fspace) >= required.
602 	 */
603 	entry = uvm_map_entrybyaddr(&map->addr, hint);
604 
605 	/* Walk up the tree, until there is at least sufficient space. */
606 	while (entry != NULL &&
607 	    entry->fspace_augment < before_gap + after_gap + sz)
608 		entry = RBT_PARENT(uvm_map_addr, entry);
609 
610 	while (entry != NULL) {
611 		/* Test if this fits. */
612 		if (VMMAP_FREE_END(entry) > hint &&
613 		    uvm_map_uaddr_e(map, entry) == uaddr &&
614 		    uvm_addr_fitspace(&low_addr, &high_addr,
615 		    MAX(uaddr->uaddr_minaddr, VMMAP_FREE_START(entry)),
616 		    MIN(uaddr->uaddr_maxaddr, VMMAP_FREE_END(entry)),
617 		    sz, align, offset, before_gap, after_gap) == 0) {
618 			*entry_out = entry;
619 			if (hint >= low_addr && hint <= high_addr)
620 				*addr_out = hint;
621 			else
622 				*addr_out = low_addr;
623 			return 0;
624 		}
625 
626 		/* RBT_NEXT, but skip subtrees that cannot possible fit. */
627 		next = RBT_RIGHT(uvm_map_addr, entry);
628 		if (next != NULL &&
629 		    next->fspace_augment >= before_gap + after_gap + sz) {
630 			entry = next;
631 			while ((next = RBT_LEFT(uvm_map_addr, entry)) !=
632 			    NULL)
633 				entry = next;
634 		} else {
635 do_parent:
636 			next = RBT_PARENT(uvm_map_addr, entry);
637 			if (next == NULL)
638 				entry = NULL;
639 			else if (RBT_LEFT(uvm_map_addr, next) == entry)
640 				entry = next;
641 			else {
642 				entry = next;
643 				goto do_parent;
644 			}
645 		}
646 	}
647 
648 	/* Lookup failed. */
649 	return ENOMEM;
650 }
651 
652 /*
653  * Destroy a uaddr_rnd_state structure.
654  */
655 void
uaddr_rnd_destroy(struct uvm_addr_state * uaddr)656 uaddr_rnd_destroy(struct uvm_addr_state *uaddr)
657 {
658 	pool_put(&uaddr_rnd_pool, uaddr);
659 }
660 
661 /*
662  * Add entry to tailq.
663  */
664 void
uaddr_rnd_insert(struct vm_map * map,struct uvm_addr_state * uaddr_p,struct vm_map_entry * entry)665 uaddr_rnd_insert(struct vm_map *map, struct uvm_addr_state *uaddr_p,
666     struct vm_map_entry *entry)
667 {
668 	return;
669 }
670 
671 /*
672  * Remove entry from tailq.
673  */
674 void
uaddr_rnd_remove(struct vm_map * map,struct uvm_addr_state * uaddr_p,struct vm_map_entry * entry)675 uaddr_rnd_remove(struct vm_map *map, struct uvm_addr_state *uaddr_p,
676     struct vm_map_entry *entry)
677 {
678 	return;
679 }
680 
681 #if 0
682 #if defined(DEBUG) || defined(DDB)
683 void
684 uaddr_rnd_print(struct uvm_addr_state *uaddr_p, boolean_t full,
685     int (*pr)(const char*, ...))
686 {
687 	struct vm_map_entry	*entry;
688 	struct uaddr_rnd_state	*uaddr;
689 	vaddr_t			 addr;
690 	size_t			 count;
691 	vsize_t			 space;
692 
693 	uaddr = (struct uaddr_rnd_state *)uaddr_p;
694 	addr = 0;
695 	count = 0;
696 	space = 0;
697 	TAILQ_FOREACH(entry, &uaddr->ur_free, dfree.tailq) {
698 		count++;
699 		space += entry->fspace;
700 
701 		if (full) {
702 			(*pr)("\tentry %p: 0x%lx-0x%lx G=0x%lx F=0x%lx\n",
703 			    entry, entry->start, entry->end,
704 			    entry->guard, entry->fspace);
705 			(*pr)("\t\tfree: 0x%lx-0x%lx\n",
706 			    VMMAP_FREE_START(entry), VMMAP_FREE_END(entry));
707 		}
708 		if (entry->start < addr) {
709 			if (!full)
710 				(*pr)("\tentry %p: 0x%lx-0x%lx "
711 				    "G=0x%lx F=0x%lx\n",
712 				    entry, entry->start, entry->end,
713 				    entry->guard, entry->fspace);
714 			(*pr)("\t\tstart=0x%lx, expected at least 0x%lx\n",
715 			    entry->start, addr);
716 		}
717 
718 		addr = VMMAP_FREE_END(entry);
719 	}
720 	(*pr)("\t0x%lu entries, 0x%lx free bytes\n", count, space);
721 }
722 #endif /* DEBUG || DDB */
723 #endif
724 
725 /*
726  * Kernel allocation bootstrap logic.
727  */
728 
729 const struct uvm_addr_functions uaddr_kernel_functions = {
730 	.uaddr_select = &uaddr_kbootstrap_select,
731 	.uaddr_destroy = &uaddr_kbootstrap_destroy,
732 	.uaddr_name = "uaddr_kbootstrap"
733 };
734 
735 /*
736  * Select an address from the map.
737  *
738  * This function ignores the uaddr spec and instead uses the map directly.
739  * Because of that property, the uaddr algorithm can be shared across all
740  * kernel maps.
741  */
742 int
uaddr_kbootstrap_select(struct vm_map * map,struct uvm_addr_state * uaddr,struct vm_map_entry ** entry_out,vaddr_t * addr_out,vsize_t sz,vaddr_t align,vaddr_t offset,vm_prot_t prot,vaddr_t hint)743 uaddr_kbootstrap_select(struct vm_map *map, struct uvm_addr_state *uaddr,
744     struct vm_map_entry **entry_out, vaddr_t *addr_out,
745     vsize_t sz, vaddr_t align, vaddr_t offset, vm_prot_t prot, vaddr_t hint)
746 {
747 	vaddr_t tmp;
748 
749 	RBT_FOREACH(*entry_out, uvm_map_addr, &map->addr) {
750 		if (VMMAP_FREE_END(*entry_out) <= uvm_maxkaddr &&
751 		    uvm_addr_fitspace(addr_out, &tmp,
752 		    VMMAP_FREE_START(*entry_out), VMMAP_FREE_END(*entry_out),
753 		    sz, align, offset, 0, 0) == 0)
754 			return 0;
755 	}
756 
757 	return ENOMEM;
758 }
759 
760 /*
761  * Don't destroy the kernel bootstrap allocator.
762  */
763 void
uaddr_kbootstrap_destroy(struct uvm_addr_state * uaddr)764 uaddr_kbootstrap_destroy(struct uvm_addr_state *uaddr)
765 {
766 	KASSERT(uaddr == (struct uvm_addr_state *)&uaddr_kbootstrap);
767 }
768 
769 #ifndef SMALL_KERNEL
770 /*
771  * Best fit algorithm.
772  */
773 
774 const struct uvm_addr_functions uaddr_bestfit_functions = {
775 	.uaddr_select = &uaddr_bestfit_select,
776 	.uaddr_free_insert = &uaddr_bestfit_insert,
777 	.uaddr_free_remove = &uaddr_bestfit_remove,
778 	.uaddr_destroy = &uaddr_bestfit_destroy,
779 	.uaddr_name = "uaddr_bestfit"
780 };
781 
782 struct uvm_addr_state *
uaddr_bestfit_create(vaddr_t minaddr,vaddr_t maxaddr)783 uaddr_bestfit_create(vaddr_t minaddr, vaddr_t maxaddr)
784 {
785 	struct uaddr_bestfit_state *uaddr;
786 
787 	uaddr = pool_get(&uaddr_bestfit_pool, PR_WAITOK);
788 	uaddr->ubf_uaddr.uaddr_minaddr = minaddr;
789 	uaddr->ubf_uaddr.uaddr_maxaddr = maxaddr;
790 	uaddr->ubf_uaddr.uaddr_functions = &uaddr_bestfit_functions;
791 	RBT_INIT(uaddr_free_rbtree, &uaddr->ubf_free);
792 	return &uaddr->ubf_uaddr;
793 }
794 
795 void
uaddr_bestfit_destroy(struct uvm_addr_state * uaddr)796 uaddr_bestfit_destroy(struct uvm_addr_state *uaddr)
797 {
798 	pool_put(&uaddr_bestfit_pool, uaddr);
799 }
800 
801 void
uaddr_bestfit_insert(struct vm_map * map,struct uvm_addr_state * uaddr_p,struct vm_map_entry * entry)802 uaddr_bestfit_insert(struct vm_map *map, struct uvm_addr_state *uaddr_p,
803     struct vm_map_entry *entry)
804 {
805 	struct uaddr_bestfit_state	*uaddr;
806 	struct vm_map_entry		*rb_rv;
807 
808 	uaddr = (struct uaddr_bestfit_state *)uaddr_p;
809 	if ((rb_rv = RBT_INSERT(uaddr_free_rbtree, &uaddr->ubf_free, entry)) !=
810 	    NULL) {
811 		panic("%s: duplicate insertion: state %p "
812 		    "inserting %p, colliding with %p", __func__,
813 		    uaddr, entry, rb_rv);
814 	}
815 }
816 
817 void
uaddr_bestfit_remove(struct vm_map * map,struct uvm_addr_state * uaddr_p,struct vm_map_entry * entry)818 uaddr_bestfit_remove(struct vm_map *map, struct uvm_addr_state *uaddr_p,
819     struct vm_map_entry *entry)
820 {
821 	struct uaddr_bestfit_state	*uaddr;
822 
823 	uaddr = (struct uaddr_bestfit_state *)uaddr_p;
824 	if (RBT_REMOVE(uaddr_free_rbtree, &uaddr->ubf_free, entry) != entry)
825 		panic("%s: entry was not in tree", __func__);
826 }
827 
828 int
uaddr_bestfit_select(struct vm_map * map,struct uvm_addr_state * uaddr_p,struct vm_map_entry ** entry_out,vaddr_t * addr_out,vsize_t sz,vaddr_t align,vaddr_t offset,vm_prot_t prot,vaddr_t hint)829 uaddr_bestfit_select(struct vm_map *map, struct uvm_addr_state *uaddr_p,
830     struct vm_map_entry **entry_out, vaddr_t *addr_out,
831     vsize_t sz, vaddr_t align, vaddr_t offset,
832     vm_prot_t prot, vaddr_t hint)
833 {
834 	vaddr_t				 min, max;
835 	struct uaddr_bestfit_state	*uaddr;
836 	struct vm_map_entry		*entry;
837 	vsize_t				 guardsz;
838 
839 	uaddr = (struct uaddr_bestfit_state *)uaddr_p;
840 	guardsz = ((map->flags & VM_MAP_GUARDPAGES) ? PAGE_SIZE : 0);
841 	if (sz + guardsz < sz)
842 		return ENOMEM;
843 
844 	/*
845 	 * Find smallest item on freelist capable of holding item.
846 	 * Deal with guardpages: search for space with one extra page.
847 	 */
848 	entry = uvm_addr_entrybyspace(&uaddr->ubf_free, sz + guardsz);
849 	if (entry == NULL)
850 		return ENOMEM;
851 
852 	/*
853 	 * Walk the tree until we find an entry that fits.
854 	 */
855 	while (uvm_addr_fitspace(&min, &max,
856 	    VMMAP_FREE_START(entry), VMMAP_FREE_END(entry),
857 	    sz, align, offset, 0, guardsz) != 0) {
858 		entry = RBT_NEXT(uaddr_free_rbtree, entry);
859 		if (entry == NULL)
860 			return ENOMEM;
861 	}
862 
863 	/*
864 	 * Return the address that generates the least fragmentation.
865 	 */
866 	*entry_out = entry;
867 	*addr_out = (min - VMMAP_FREE_START(entry) <=
868 	    VMMAP_FREE_END(entry) - guardsz - sz - max ?
869 	    min : max);
870 	return 0;
871 }
872 #endif /* !SMALL_KERNEL */
873 
874 
875 #ifndef SMALL_KERNEL
876 /*
877  * A userspace allocator based on pivots.
878  */
879 
880 const struct uvm_addr_functions uaddr_pivot_functions = {
881 	.uaddr_select = &uaddr_pivot_select,
882 	.uaddr_free_insert = &uaddr_pivot_insert,
883 	.uaddr_free_remove = &uaddr_pivot_remove,
884 	.uaddr_destroy = &uaddr_pivot_destroy,
885 #if defined(DEBUG) || defined(DDB)
886 	.uaddr_print = &uaddr_pivot_print,
887 #endif /* DEBUG || DDB */
888 	.uaddr_name = "uaddr_pivot"
889 };
890 
891 /*
892  * A special random function for pivots.
893  *
894  * This function will return:
895  * - a random number
896  * - a multiple of PAGE_SIZE
897  * - at least PAGE_SIZE
898  *
899  * The random function has a slightly higher change to return a small number.
900  */
901 vsize_t
uaddr_pivot_random(void)902 uaddr_pivot_random(void)
903 {
904 	int			r;
905 
906 	/*
907 	 * The sum of two six-sided dice will have a normal distribution.
908 	 * We map the highest probable number to 1, by folding the curve
909 	 * (think of a graph on a piece of paper, that you fold).
910 	 *
911 	 * Because the fold happens at PIVOT_RND - 1, the numbers 0 and 1
912 	 * have the same and highest probability of happening.
913 	 */
914 	r = arc4random_uniform(PIVOT_RND) + arc4random_uniform(PIVOT_RND) -
915 	    (PIVOT_RND - 1);
916 	if (r < 0)
917 		r = -r;
918 
919 	/*
920 	 * Make the returned value at least PAGE_SIZE and a multiple of
921 	 * PAGE_SIZE.
922 	 */
923 	return (vaddr_t)(1 + r) << PAGE_SHIFT;
924 }
925 
926 /*
927  * Select a new pivot.
928  *
929  * A pivot must:
930  * - be chosen random
931  * - have a randomly chosen gap before it, where the uaddr_state starts
932  * - have a randomly chosen gap after it, before the uaddr_state ends
933  *
934  * Furthermore, the pivot must provide sufficient space for the allocation.
935  * The addr will be set to the selected address.
936  *
937  * Returns ENOMEM on failure.
938  */
939 int
uaddr_pivot_newpivot(struct vm_map * map,struct uaddr_pivot_state * uaddr,struct uaddr_pivot * pivot,struct vm_map_entry ** entry_out,vaddr_t * addr_out,vsize_t sz,vaddr_t align,vaddr_t offset,vsize_t before_gap,vsize_t after_gap)940 uaddr_pivot_newpivot(struct vm_map *map, struct uaddr_pivot_state *uaddr,
941     struct uaddr_pivot *pivot,
942     struct vm_map_entry **entry_out, vaddr_t *addr_out,
943     vsize_t sz, vaddr_t align, vaddr_t offset,
944     vsize_t before_gap, vsize_t after_gap)
945 {
946 	struct vm_map_entry		*entry, *found;
947 	vaddr_t				 minaddr, maxaddr;
948 	vsize_t				 dist;
949 	vaddr_t				 found_minaddr, found_maxaddr;
950 	vaddr_t				 min, max;
951 	vsize_t				 arc4_arg;
952 	int				 fit_error;
953 	u_int32_t			 path;
954 
955 	minaddr = uaddr->up_uaddr.uaddr_minaddr;
956 	maxaddr = uaddr->up_uaddr.uaddr_maxaddr;
957 	KASSERT(minaddr < maxaddr);
958 #ifdef DIAGNOSTIC
959 	if (minaddr + 2 * PAGE_SIZE > maxaddr) {
960 		panic("uaddr_pivot_newpivot: cannot grant random pivot "
961 		    "in area less than 2 pages (size = 0x%lx)",
962 		    maxaddr - minaddr);
963 	}
964 #endif /* DIAGNOSTIC */
965 
966 	/*
967 	 * Gap calculation: 1/32 of the size of the managed area.
968 	 *
969 	 * At most: sufficient to not get truncated at arc4random.
970 	 * At least: 2 PAGE_SIZE
971 	 *
972 	 * minaddr and maxaddr will be changed according to arc4random.
973 	 */
974 	dist = MAX((maxaddr - minaddr) / 32, 2 * (vaddr_t)PAGE_SIZE);
975 	if (dist >> PAGE_SHIFT > 0xffffffff) {
976 		minaddr += (vsize_t)arc4random() << PAGE_SHIFT;
977 		maxaddr -= (vsize_t)arc4random() << PAGE_SHIFT;
978 	} else {
979 		minaddr += (vsize_t)arc4random_uniform(dist >> PAGE_SHIFT) <<
980 		    PAGE_SHIFT;
981 		maxaddr -= (vsize_t)arc4random_uniform(dist >> PAGE_SHIFT) <<
982 		    PAGE_SHIFT;
983 	}
984 
985 	/*
986 	 * A very fast way to find an entry that will be large enough
987 	 * to hold the allocation, but still is found more or less
988 	 * randomly: the tree path selector has a 50% chance to go for
989 	 * a bigger or smaller entry.
990 	 *
991 	 * Note that the memory may actually be available,
992 	 * but the fragmentation may be so bad and the gaps chosen
993 	 * so unfortunately, that the allocation will not succeed.
994 	 * Or the alignment can only be satisfied by an entry that
995 	 * is not visited in the randomly selected path.
996 	 *
997 	 * This code finds an entry with sufficient space in O(log n) time.
998 	 */
999 	path = arc4random();
1000 	found = NULL;
1001 	entry = RBT_ROOT(uaddr_free_rbtree, &uaddr->up_free);
1002 	while (entry != NULL) {
1003 		fit_error = uvm_addr_fitspace(&min, &max,
1004 		    MAX(VMMAP_FREE_START(entry), minaddr),
1005 		    MIN(VMMAP_FREE_END(entry), maxaddr),
1006 		    sz, align, offset, before_gap, after_gap);
1007 
1008 		/* It fits, save this entry. */
1009 		if (fit_error == 0) {
1010 			found = entry;
1011 			found_minaddr = min;
1012 			found_maxaddr = max;
1013 		}
1014 
1015 		/* Next. */
1016 		if (fit_error != 0)
1017 			entry = RBT_RIGHT(uaddr_free_rbtree, entry);
1018 		else if	((path & 0x1) == 0) {
1019 			path >>= 1;
1020 			entry = RBT_RIGHT(uaddr_free_rbtree, entry);
1021 		} else {
1022 			path >>= 1;
1023 			entry = RBT_LEFT(uaddr_free_rbtree, entry);
1024 		}
1025 	}
1026 	if (found == NULL)
1027 		return ENOMEM;	/* Not found a large enough region. */
1028 
1029 	/*
1030 	 * Calculate a random address within found.
1031 	 *
1032 	 * found_minaddr and found_maxaddr are already aligned, so be sure
1033 	 * to select a multiple of align as the offset in the entry.
1034 	 * Preferably, arc4random_uniform is used to provide no bias within
1035 	 * the entry.
1036 	 * However if the size of the entry exceeds arc4random_uniforms
1037 	 * argument limit, we simply use arc4random (thus limiting ourselves
1038 	 * to 4G * PAGE_SIZE bytes offset).
1039 	 */
1040 	if (found_maxaddr == found_minaddr)
1041 		*addr_out = found_minaddr;
1042 	else {
1043 		KASSERT(align >= PAGE_SIZE && (align & (align - 1)) == 0);
1044 		arc4_arg = found_maxaddr - found_minaddr;
1045 		if (arc4_arg > 0xffffffff) {
1046 			*addr_out = found_minaddr +
1047 			    (arc4random() & ~(align - 1));
1048 		} else {
1049 			*addr_out = found_minaddr +
1050 			    (arc4random_uniform(arc4_arg) & ~(align - 1));
1051 		}
1052 	}
1053 	/* Address was found in this entry. */
1054 	*entry_out = found;
1055 
1056 	/*
1057 	 * Set up new pivot and return selected address.
1058 	 *
1059 	 * Depending on the direction of the pivot, the pivot must be placed
1060 	 * at the bottom or the top of the allocation:
1061 	 * - if the pivot moves upwards, place the pivot at the top of the
1062 	 *   allocation,
1063 	 * - if the pivot moves downwards, place the pivot at the bottom
1064 	 *   of the allocation.
1065 	 */
1066 	pivot->entry = found;
1067 	pivot->dir = (arc4random() & 0x1 ? 1 : -1);
1068 	if (pivot->dir > 0)
1069 		pivot->addr = *addr_out + sz;
1070 	else
1071 		pivot->addr = *addr_out;
1072 	pivot->expire = PIVOT_EXPIRE - 1; /* First use is right now. */
1073 	return 0;
1074 }
1075 
1076 /*
1077  * Pivot selector.
1078  *
1079  * Each time the selector is invoked, it will select a random pivot, which
1080  * it will use to select memory with. The memory will be placed at the pivot,
1081  * with a randomly sized gap between the allocation and the pivot.
1082  * The pivot will then move so it will never revisit this address.
1083  *
1084  * Each allocation, the pivot expiry timer ticks. Once the pivot becomes
1085  * expired, it will be replaced with a newly created pivot. Pivots also
1086  * automatically expire if they fail to provide memory for an allocation.
1087  *
1088  * Expired pivots are replaced using the uaddr_pivot_newpivot() function,
1089  * which will ensure the pivot points at memory in such a way that the
1090  * allocation will succeed.
1091  * As an added bonus, the uaddr_pivot_newpivot() function will perform the
1092  * allocation immediately and move the pivot as appropriate.
1093  *
1094  * If uaddr_pivot_newpivot() fails to find a new pivot that will allow the
1095  * allocation to succeed, it will not create a new pivot and the allocation
1096  * will fail.
1097  *
1098  * A pivot running into used memory will automatically expire (because it will
1099  * fail to allocate).
1100  *
1101  * Characteristics of the allocator:
1102  * - best case, an allocation is O(log N)
1103  *   (it would be O(1), if it werent for the need to check if the memory is
1104  *   free; although that can be avoided...)
1105  * - worst case, an allocation is O(log N)
1106  *   (the uaddr_pivot_newpivot() function has that complexity)
1107  * - failed allocations always take O(log N)
1108  *   (the uaddr_pivot_newpivot() function will walk that deep into the tree).
1109  */
1110 int
uaddr_pivot_select(struct vm_map * map,struct uvm_addr_state * uaddr_p,struct vm_map_entry ** entry_out,vaddr_t * addr_out,vsize_t sz,vaddr_t align,vaddr_t offset,vm_prot_t prot,vaddr_t hint)1111 uaddr_pivot_select(struct vm_map *map, struct uvm_addr_state *uaddr_p,
1112     struct vm_map_entry **entry_out, vaddr_t *addr_out,
1113     vsize_t sz, vaddr_t align, vaddr_t offset,
1114     vm_prot_t prot, vaddr_t hint)
1115 {
1116 	struct uaddr_pivot_state	*uaddr;
1117 	struct vm_map_entry		*entry;
1118 	struct uaddr_pivot		*pivot;
1119 	vaddr_t				 min, max;
1120 	vsize_t				 before_gap, after_gap;
1121 	int				 err;
1122 
1123 	/*
1124 	 * When we have a hint, use the rnd allocator that finds the
1125 	 * area that is closest to the hint, if there is such an area.
1126 	 */
1127 	if (hint != 0) {
1128 		if (uaddr_rnd_select(map, uaddr_p, entry_out, addr_out,
1129 		    sz, align, offset, prot, hint) == 0)
1130 			return 0;
1131 		return ENOMEM;
1132 	}
1133 
1134 	/*
1135 	 * Select a random pivot and a random gap sizes around the allocation.
1136 	 */
1137 	uaddr = (struct uaddr_pivot_state *)uaddr_p;
1138 	pivot = &uaddr->up_pivots[
1139 	    arc4random_uniform(nitems(uaddr->up_pivots))];
1140 	before_gap = uaddr_pivot_random();
1141 	after_gap = uaddr_pivot_random();
1142 	if (pivot->addr == 0 || pivot->entry == NULL || pivot->expire == 0)
1143 		goto expired;	/* Pivot is invalid (null or expired). */
1144 
1145 	/*
1146 	 * Attempt to use the pivot to map the entry.
1147 	 */
1148 	entry = pivot->entry;
1149 	if (pivot->dir > 0) {
1150 		if (uvm_addr_fitspace(&min, &max,
1151 		    MAX(VMMAP_FREE_START(entry), pivot->addr),
1152 		    VMMAP_FREE_END(entry), sz, align, offset,
1153 		    before_gap, after_gap) == 0) {
1154 			*addr_out = min;
1155 			*entry_out = entry;
1156 			pivot->addr = min + sz;
1157 			pivot->expire--;
1158 			return 0;
1159 		}
1160 	} else {
1161 		if (uvm_addr_fitspace(&min, &max,
1162 		    VMMAP_FREE_START(entry),
1163 		    MIN(VMMAP_FREE_END(entry), pivot->addr),
1164 		    sz, align, offset, before_gap, after_gap) == 0) {
1165 			*addr_out = max;
1166 			*entry_out = entry;
1167 			pivot->addr = max;
1168 			pivot->expire--;
1169 			return 0;
1170 		}
1171 	}
1172 
1173 expired:
1174 	/*
1175 	 * Pivot expired or allocation failed.
1176 	 * Use pivot selector to do the allocation and find a new pivot.
1177 	 */
1178 	err = uaddr_pivot_newpivot(map, uaddr, pivot, entry_out, addr_out,
1179 	    sz, align, offset, before_gap, after_gap);
1180 	return err;
1181 }
1182 
1183 /*
1184  * Free the pivot.
1185  */
1186 void
uaddr_pivot_destroy(struct uvm_addr_state * uaddr)1187 uaddr_pivot_destroy(struct uvm_addr_state *uaddr)
1188 {
1189 	pool_put(&uaddr_pivot_pool, uaddr);
1190 }
1191 
1192 /*
1193  * Insert an entry with free space in the space tree.
1194  */
1195 void
uaddr_pivot_insert(struct vm_map * map,struct uvm_addr_state * uaddr_p,struct vm_map_entry * entry)1196 uaddr_pivot_insert(struct vm_map *map, struct uvm_addr_state *uaddr_p,
1197     struct vm_map_entry *entry)
1198 {
1199 	struct uaddr_pivot_state	*uaddr;
1200 	struct vm_map_entry		*rb_rv;
1201 	struct uaddr_pivot		*p;
1202 	vaddr_t				 check_addr;
1203 	vaddr_t				 start, end;
1204 
1205 	uaddr = (struct uaddr_pivot_state *)uaddr_p;
1206 	if ((rb_rv = RBT_INSERT(uaddr_free_rbtree, &uaddr->up_free, entry)) !=
1207 	    NULL) {
1208 		panic("%s: duplicate insertion: state %p "
1209 		    "inserting entry %p which collides with %p", __func__,
1210 		    uaddr, entry, rb_rv);
1211 	}
1212 
1213 	start = VMMAP_FREE_START(entry);
1214 	end = VMMAP_FREE_END(entry);
1215 
1216 	/*
1217 	 * Update all pivots that are contained in this entry.
1218 	 */
1219 	for (p = &uaddr->up_pivots[0];
1220 	    p != &uaddr->up_pivots[nitems(uaddr->up_pivots)]; p++) {
1221 		check_addr = p->addr;
1222 		if (check_addr == 0)
1223 			continue;
1224 		if (p->dir < 0)
1225 			check_addr--;
1226 
1227 		if (start <= check_addr &&
1228 		    check_addr < end) {
1229 			KASSERT(p->entry == NULL);
1230 			p->entry = entry;
1231 		}
1232 	}
1233 }
1234 
1235 /*
1236  * Remove an entry with free space from the space tree.
1237  */
1238 void
uaddr_pivot_remove(struct vm_map * map,struct uvm_addr_state * uaddr_p,struct vm_map_entry * entry)1239 uaddr_pivot_remove(struct vm_map *map, struct uvm_addr_state *uaddr_p,
1240     struct vm_map_entry *entry)
1241 {
1242 	struct uaddr_pivot_state	*uaddr;
1243 	struct uaddr_pivot		*p;
1244 
1245 	uaddr = (struct uaddr_pivot_state *)uaddr_p;
1246 	if (RBT_REMOVE(uaddr_free_rbtree, &uaddr->up_free, entry) != entry)
1247 		panic("%s: entry was not in tree", __func__);
1248 
1249 	/*
1250 	 * Inform any pivot with this entry that the entry is gone.
1251 	 * Note that this does not automatically invalidate the pivot.
1252 	 */
1253 	for (p = &uaddr->up_pivots[0];
1254 	    p != &uaddr->up_pivots[nitems(uaddr->up_pivots)]; p++) {
1255 		if (p->entry == entry)
1256 			p->entry = NULL;
1257 	}
1258 }
1259 
1260 /*
1261  * Create a new pivot selector.
1262  *
1263  * Initially, all pivots are in the expired state.
1264  * Two reasons for this:
1265  * - it means this allocator will not take a huge amount of time
1266  * - pivots select better on demand, because the pivot selection will be
1267  *   affected by preceding allocations:
1268  *   the next pivots will likely end up in different segments of free memory,
1269  *   that was segmented by an earlier allocation; better spread.
1270  */
1271 struct uvm_addr_state *
uaddr_pivot_create(vaddr_t minaddr,vaddr_t maxaddr)1272 uaddr_pivot_create(vaddr_t minaddr, vaddr_t maxaddr)
1273 {
1274 	struct uaddr_pivot_state *uaddr;
1275 
1276 	uaddr = pool_get(&uaddr_pivot_pool, PR_WAITOK);
1277 	uaddr->up_uaddr.uaddr_minaddr = minaddr;
1278 	uaddr->up_uaddr.uaddr_maxaddr = maxaddr;
1279 	uaddr->up_uaddr.uaddr_functions = &uaddr_pivot_functions;
1280 	RBT_INIT(uaddr_free_rbtree, &uaddr->up_free);
1281 	memset(uaddr->up_pivots, 0, sizeof(uaddr->up_pivots));
1282 
1283 	return &uaddr->up_uaddr;
1284 }
1285 
1286 #if defined(DEBUG) || defined(DDB)
1287 /*
1288  * Print the uaddr_pivot_state.
1289  *
1290  * If full, a listing of all entries in the state will be provided.
1291  */
1292 void
uaddr_pivot_print(struct uvm_addr_state * uaddr_p,boolean_t full,int (* pr)(const char *,...))1293 uaddr_pivot_print(struct uvm_addr_state *uaddr_p, boolean_t full,
1294     int (*pr)(const char *, ...))
1295 {
1296 	struct uaddr_pivot_state	*uaddr;
1297 	struct uaddr_pivot		*pivot;
1298 	struct vm_map_entry		*entry;
1299 	int				 i;
1300 	vaddr_t				 check_addr;
1301 
1302 	uaddr = (struct uaddr_pivot_state *)uaddr_p;
1303 
1304 	for (i = 0; i < NUM_PIVOTS; i++) {
1305 		pivot = &uaddr->up_pivots[i];
1306 
1307 		(*pr)("\tpivot 0x%lx, epires in %d, direction %d\n",
1308 		    pivot->addr, pivot->expire, pivot->dir);
1309 	}
1310 	if (!full)
1311 		return;
1312 
1313 	if (RBT_EMPTY(uaddr_free_rbtree, &uaddr->up_free))
1314 		(*pr)("\tempty\n");
1315 	/* Print list of free space. */
1316 	RBT_FOREACH(entry, uaddr_free_rbtree, &uaddr->up_free) {
1317 		(*pr)("\t0x%lx - 0x%lx free (0x%lx bytes)\n",
1318 		    VMMAP_FREE_START(entry), VMMAP_FREE_END(entry),
1319 		    VMMAP_FREE_END(entry) - VMMAP_FREE_START(entry));
1320 
1321 		for (i = 0; i < NUM_PIVOTS; i++) {
1322 			pivot = &uaddr->up_pivots[i];
1323 			check_addr = pivot->addr;
1324 			if (check_addr == 0)
1325 				continue;
1326 			if (pivot->dir < 0)
1327 				check_addr--;
1328 
1329 			if (VMMAP_FREE_START(entry) <= check_addr &&
1330 			    check_addr < VMMAP_FREE_END(entry)) {
1331 				(*pr)("\t\tcontains pivot %d (0x%lx)\n",
1332 				    i, pivot->addr);
1333 			}
1334 		}
1335 	}
1336 }
1337 #endif /* DEBUG || DDB */
1338 #endif /* !SMALL_KERNEL */
1339 
1340 #ifndef SMALL_KERNEL
1341 /*
1342  * Stack/break allocator.
1343  *
1344  * Stack area is grown into in the opposite direction of the stack growth,
1345  * brk area is grown downward (because sbrk() grows upward).
1346  *
1347  * Both areas are grown into proportially: a weighted chance is used to
1348  * select which one (stack or brk area) to try. If the allocation fails,
1349  * the other one is tested.
1350  */
1351 const struct uvm_addr_functions uaddr_stack_brk_functions = {
1352 	.uaddr_select = &uaddr_stack_brk_select,
1353 	.uaddr_destroy = &uaddr_destroy,
1354 	.uaddr_name = "uaddr_stckbrk"
1355 };
1356 
1357 /*
1358  * Stack/brk address selector.
1359  */
1360 int
uaddr_stack_brk_select(struct vm_map * map,struct uvm_addr_state * uaddr,struct vm_map_entry ** entry_out,vaddr_t * addr_out,vsize_t sz,vaddr_t align,vaddr_t offset,vm_prot_t prot,vaddr_t hint)1361 uaddr_stack_brk_select(struct vm_map *map, struct uvm_addr_state *uaddr,
1362     struct vm_map_entry **entry_out, vaddr_t *addr_out,
1363     vsize_t sz, vaddr_t align, vaddr_t offset,
1364     vm_prot_t prot, vaddr_t hint)
1365 {
1366 	vaddr_t			start;
1367 	vaddr_t			end;
1368 	vsize_t			before_gap;
1369 	vsize_t			after_gap;
1370 	int			dir;
1371 
1372 	/* Set up brk search strategy. */
1373 	start = MAX(map->b_start, uaddr->uaddr_minaddr);
1374 	end = MIN(map->b_end, uaddr->uaddr_maxaddr);
1375 	before_gap = 0;
1376 	after_gap = 0;
1377 	dir = -1;	/* Opposite of brk() growth. */
1378 
1379 	if (end - start >= sz) {
1380 		if (uvm_addr_linsearch(map, uaddr, entry_out, addr_out,
1381 		    0, sz, align, offset, dir, start, end - sz,
1382 		    before_gap, after_gap) == 0)
1383 			return 0;
1384 	}
1385 
1386 	/* Set up stack search strategy. */
1387 	start = MAX(map->s_start, uaddr->uaddr_minaddr);
1388 	end = MIN(map->s_end, uaddr->uaddr_maxaddr);
1389 	before_gap = ((arc4random() & 0x3) + 1) << PAGE_SHIFT;
1390 	after_gap = ((arc4random() & 0x3) + 1) << PAGE_SHIFT;
1391 #ifdef MACHINE_STACK_GROWS_UP
1392 	dir = -1;
1393 #else
1394 	dir =  1;
1395 #endif
1396 	if (end - start >= before_gap + after_gap &&
1397 	    end - start - before_gap - after_gap >= sz) {
1398 		if (uvm_addr_linsearch(map, uaddr, entry_out, addr_out,
1399 		    0, sz, align, offset, dir, start, end - sz,
1400 		    before_gap, after_gap) == 0)
1401 			return 0;
1402 	}
1403 
1404 	return ENOMEM;
1405 }
1406 
1407 struct uvm_addr_state *
uaddr_stack_brk_create(vaddr_t minaddr,vaddr_t maxaddr)1408 uaddr_stack_brk_create(vaddr_t minaddr, vaddr_t maxaddr)
1409 {
1410 	struct uvm_addr_state* uaddr;
1411 
1412 	uaddr = pool_get(&uaddr_pool, PR_WAITOK);
1413 	uaddr->uaddr_minaddr = minaddr;
1414 	uaddr->uaddr_maxaddr = maxaddr;
1415 	uaddr->uaddr_functions = &uaddr_stack_brk_functions;
1416 	return uaddr;
1417 }
1418 #endif /* !SMALL_KERNEL */
1419 
1420 
1421 #ifndef SMALL_KERNEL
1422 /*
1423  * Free space comparison.
1424  * Compares smaller free-space before larger free-space.
1425  */
1426 static inline int
uvm_mapent_fspace_cmp(const struct vm_map_entry * e1,const struct vm_map_entry * e2)1427 uvm_mapent_fspace_cmp(const struct vm_map_entry *e1,
1428     const struct vm_map_entry *e2)
1429 {
1430 	if (e1->fspace != e2->fspace)
1431 		return (e1->fspace < e2->fspace ? -1 : 1);
1432 	return (e1->start < e2->start ? -1 : e1->start > e2->start);
1433 }
1434 
1435 RBT_GENERATE(uaddr_free_rbtree, vm_map_entry, dfree.rbtree,
1436     uvm_mapent_fspace_cmp);
1437 #endif /* !SMALL_KERNEL */
1438