1 /* $OpenBSD: uvm_addr.c,v 1.37 2024/09/04 07:54:53 mglocker Exp $ */
2
3 /*
4 * Copyright (c) 2011 Ariane van der Steldt <ariane@stack.nl>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18
19 /* #define DEBUG */
20
21 #include <sys/param.h>
22 #include <sys/systm.h>
23 #include <uvm/uvm.h>
24 #include <uvm/uvm_addr.h>
25 #include <sys/pool.h>
26
27 /* Number of pivots in pivot allocator. */
28 #define NUM_PIVOTS 16
29 /*
30 * Max number (inclusive) of pages the pivot allocator
31 * will place between allocations.
32 *
33 * The uaddr_pivot_random() function attempts to bias towards
34 * small space between allocations, so putting a large number here is fine.
35 */
36 #define PIVOT_RND 8
37 /*
38 * Number of allocations that a pivot can supply before expiring.
39 * When a pivot expires, a new pivot has to be found.
40 *
41 * Must be at least 1.
42 */
43 #define PIVOT_EXPIRE 1024
44
45
46 /* Pool with uvm_addr_state structures. */
47 struct pool uaddr_pool;
48 struct pool uaddr_bestfit_pool;
49 struct pool uaddr_pivot_pool;
50 struct pool uaddr_rnd_pool;
51
52 /* uvm_addr state for bestfit selector. */
53 struct uaddr_bestfit_state {
54 struct uvm_addr_state ubf_uaddr;
55 struct uaddr_free_rbtree ubf_free;
56 };
57
58 /* uvm_addr state for rnd selector. */
59 struct uaddr_rnd_state {
60 struct uvm_addr_state ur_uaddr;
61 #if 0
62 TAILQ_HEAD(, vm_map_entry) ur_free;
63 #endif
64 };
65
66 /*
67 * Definition of a pivot in pivot selector.
68 */
69 struct uaddr_pivot {
70 vaddr_t addr; /* End of prev. allocation. */
71 int expire;/* Best before date. */
72 int dir; /* Direction. */
73 struct vm_map_entry *entry; /* Will contain next alloc. */
74 };
75 /* uvm_addr state for pivot selector. */
76 struct uaddr_pivot_state {
77 struct uvm_addr_state up_uaddr;
78
79 /* Free space tree, for fast pivot selection. */
80 struct uaddr_free_rbtree up_free;
81
82 /* List of pivots. The pointers point to after the last allocation. */
83 struct uaddr_pivot up_pivots[NUM_PIVOTS];
84 };
85
86 /* Forward declaration (see below). */
87 extern const struct uvm_addr_functions uaddr_kernel_functions;
88 struct uvm_addr_state uaddr_kbootstrap;
89
90
91 /*
92 * Support functions.
93 */
94
95 #ifndef SMALL_KERNEL
96 struct vm_map_entry *uvm_addr_entrybyspace(struct uaddr_free_rbtree*,
97 vsize_t);
98 #endif /* !SMALL_KERNEL */
99 void uaddr_destroy(struct uvm_addr_state *);
100 void uaddr_kbootstrap_destroy(struct uvm_addr_state *);
101 void uaddr_rnd_destroy(struct uvm_addr_state *);
102 void uaddr_bestfit_destroy(struct uvm_addr_state *);
103 void uaddr_pivot_destroy(struct uvm_addr_state *);
104
105 #if 0
106 int uaddr_lin_select(struct vm_map *,
107 struct uvm_addr_state *, struct vm_map_entry **,
108 vaddr_t *, vsize_t, vaddr_t, vaddr_t, vm_prot_t,
109 vaddr_t);
110 #endif
111 int uaddr_kbootstrap_select(struct vm_map *,
112 struct uvm_addr_state *, struct vm_map_entry **,
113 vaddr_t *, vsize_t, vaddr_t, vaddr_t, vm_prot_t,
114 vaddr_t);
115 int uaddr_rnd_select(struct vm_map *,
116 struct uvm_addr_state *, struct vm_map_entry **,
117 vaddr_t *, vsize_t, vaddr_t, vaddr_t, vm_prot_t,
118 vaddr_t);
119 int uaddr_bestfit_select(struct vm_map *,
120 struct uvm_addr_state*, struct vm_map_entry **,
121 vaddr_t *, vsize_t, vaddr_t, vaddr_t, vm_prot_t,
122 vaddr_t);
123 #ifndef SMALL_KERNEL
124 int uaddr_pivot_select(struct vm_map *,
125 struct uvm_addr_state *, struct vm_map_entry **,
126 vaddr_t *, vsize_t, vaddr_t, vaddr_t, vm_prot_t,
127 vaddr_t);
128 int uaddr_stack_brk_select(struct vm_map *,
129 struct uvm_addr_state *, struct vm_map_entry **,
130 vaddr_t *, vsize_t, vaddr_t, vaddr_t, vm_prot_t,
131 vaddr_t);
132 #endif /* !SMALL_KERNEL */
133
134 void uaddr_rnd_insert(struct vm_map *,
135 struct uvm_addr_state *, struct vm_map_entry *);
136 void uaddr_rnd_remove(struct vm_map *,
137 struct uvm_addr_state *, struct vm_map_entry *);
138 void uaddr_bestfit_insert(struct vm_map *,
139 struct uvm_addr_state *, struct vm_map_entry *);
140 void uaddr_bestfit_remove(struct vm_map *,
141 struct uvm_addr_state *, struct vm_map_entry *);
142 void uaddr_pivot_insert(struct vm_map *,
143 struct uvm_addr_state *, struct vm_map_entry *);
144 void uaddr_pivot_remove(struct vm_map *,
145 struct uvm_addr_state *, struct vm_map_entry *);
146
147 #ifndef SMALL_KERNEL
148 vsize_t uaddr_pivot_random(void);
149 int uaddr_pivot_newpivot(struct vm_map *,
150 struct uaddr_pivot_state *, struct uaddr_pivot *,
151 struct vm_map_entry **, vaddr_t *,
152 vsize_t, vaddr_t, vaddr_t, vsize_t, vsize_t);
153 #endif /* !SMALL_KERNEL */
154
155 #if defined(DEBUG) || defined(DDB)
156 void uaddr_pivot_print(struct uvm_addr_state *, boolean_t,
157 int (*)(const char *, ...));
158 #if 0
159 void uaddr_rnd_print(struct uvm_addr_state *, boolean_t,
160 int (*)(const char *, ...));
161 #endif
162 #endif /* DEBUG || DDB */
163
164
165 #ifndef SMALL_KERNEL
166 /*
167 * Find smallest entry in tree that will fit sz bytes.
168 */
169 struct vm_map_entry *
uvm_addr_entrybyspace(struct uaddr_free_rbtree * free,vsize_t sz)170 uvm_addr_entrybyspace(struct uaddr_free_rbtree *free, vsize_t sz)
171 {
172 struct vm_map_entry *tmp, *res;
173
174 tmp = RBT_ROOT(uaddr_free_rbtree, free);
175 res = NULL;
176 while (tmp) {
177 if (tmp->fspace >= sz) {
178 res = tmp;
179 tmp = RBT_LEFT(uaddr_free_rbtree, tmp);
180 } else if (tmp->fspace < sz)
181 tmp = RBT_RIGHT(uaddr_free_rbtree, tmp);
182 }
183 return res;
184 }
185 #endif /* !SMALL_KERNEL */
186
187 static inline vaddr_t
uvm_addr_align_forward(vaddr_t addr,vaddr_t align,vaddr_t offset)188 uvm_addr_align_forward(vaddr_t addr, vaddr_t align, vaddr_t offset)
189 {
190 vaddr_t adjusted;
191
192 KASSERT(offset < align || (align == 0 && offset == 0));
193 KASSERT((align & (align - 1)) == 0);
194 KASSERT((offset & PAGE_MASK) == 0);
195
196 align = MAX(align, PAGE_SIZE);
197 adjusted = addr & ~(align - 1);
198 adjusted += offset;
199 return (adjusted < addr ? adjusted + align : adjusted);
200 }
201
202 static inline vaddr_t
uvm_addr_align_backward(vaddr_t addr,vaddr_t align,vaddr_t offset)203 uvm_addr_align_backward(vaddr_t addr, vaddr_t align, vaddr_t offset)
204 {
205 vaddr_t adjusted;
206
207 KASSERT(offset < align || (align == 0 && offset == 0));
208 KASSERT((align & (align - 1)) == 0);
209 KASSERT((offset & PAGE_MASK) == 0);
210
211 align = MAX(align, PAGE_SIZE);
212 adjusted = addr & ~(align - 1);
213 adjusted += offset;
214 return (adjusted > addr ? adjusted - align : adjusted);
215 }
216
217 /*
218 * Try to fit the requested space into the entry.
219 */
220 int
uvm_addr_fitspace(vaddr_t * min_result,vaddr_t * max_result,vaddr_t low_addr,vaddr_t high_addr,vsize_t sz,vaddr_t align,vaddr_t offset,vsize_t before_gap,vsize_t after_gap)221 uvm_addr_fitspace(vaddr_t *min_result, vaddr_t *max_result,
222 vaddr_t low_addr, vaddr_t high_addr, vsize_t sz,
223 vaddr_t align, vaddr_t offset,
224 vsize_t before_gap, vsize_t after_gap)
225 {
226 vaddr_t tmp;
227 vsize_t fspace;
228
229 if (low_addr > high_addr)
230 return ENOMEM;
231 fspace = high_addr - low_addr;
232 if (fspace < before_gap + after_gap)
233 return ENOMEM;
234 if (fspace - before_gap - after_gap < sz)
235 return ENOMEM;
236
237 /*
238 * Calculate lowest address.
239 */
240 low_addr += before_gap;
241 low_addr = uvm_addr_align_forward(tmp = low_addr, align, offset);
242 if (low_addr < tmp) /* Overflow during alignment. */
243 return ENOMEM;
244 if (high_addr - after_gap - sz < low_addr)
245 return ENOMEM;
246
247 /*
248 * Calculate highest address.
249 */
250 high_addr -= after_gap + sz;
251 high_addr = uvm_addr_align_backward(tmp = high_addr, align, offset);
252 if (high_addr > tmp) /* Overflow during alignment. */
253 return ENOMEM;
254 if (low_addr > high_addr)
255 return ENOMEM;
256
257 *min_result = low_addr;
258 *max_result = high_addr;
259 return 0;
260 }
261
262
263 /*
264 * Initialize uvm_addr.
265 */
266 void
uvm_addr_init(void)267 uvm_addr_init(void)
268 {
269 pool_init(&uaddr_pool, sizeof(struct uvm_addr_state), 0,
270 IPL_VM, PR_WAITOK, "uaddr", NULL);
271 pool_init(&uaddr_bestfit_pool, sizeof(struct uaddr_bestfit_state), 0,
272 IPL_VM, PR_WAITOK, "uaddrbest", NULL);
273 pool_init(&uaddr_pivot_pool, sizeof(struct uaddr_pivot_state), 0,
274 IPL_VM, PR_WAITOK, "uaddrpivot", NULL);
275 pool_init(&uaddr_rnd_pool, sizeof(struct uaddr_rnd_state), 0,
276 IPL_VM, PR_WAITOK, "uaddrrnd", NULL);
277
278 uaddr_kbootstrap.uaddr_minaddr = PAGE_SIZE;
279 uaddr_kbootstrap.uaddr_maxaddr = -(vaddr_t)PAGE_SIZE;
280 uaddr_kbootstrap.uaddr_functions = &uaddr_kernel_functions;
281 }
282
283 /*
284 * Invoke destructor function of uaddr.
285 */
286 void
uvm_addr_destroy(struct uvm_addr_state * uaddr)287 uvm_addr_destroy(struct uvm_addr_state *uaddr)
288 {
289 if (uaddr)
290 (*uaddr->uaddr_functions->uaddr_destroy)(uaddr);
291 }
292
293 /*
294 * Directional first fit.
295 *
296 * Do a linear search for free space, starting at addr in entry.
297 * direction == 1: search forward
298 * direction == -1: search backward
299 *
300 * Output: low <= addr <= high and entry will contain addr.
301 * 0 will be returned if no space is available.
302 *
303 * gap describes the space that must appear between the preceding entry.
304 */
305 int
uvm_addr_linsearch(struct vm_map * map,struct uvm_addr_state * uaddr,struct vm_map_entry ** entry_out,vaddr_t * addr_out,vaddr_t hint,vsize_t sz,vaddr_t align,vaddr_t offset,int direction,vaddr_t low,vaddr_t high,vsize_t before_gap,vsize_t after_gap)306 uvm_addr_linsearch(struct vm_map *map, struct uvm_addr_state *uaddr,
307 struct vm_map_entry **entry_out, vaddr_t *addr_out,
308 vaddr_t hint, vsize_t sz, vaddr_t align, vaddr_t offset,
309 int direction, vaddr_t low, vaddr_t high,
310 vsize_t before_gap, vsize_t after_gap)
311 {
312 struct vm_map_entry *entry;
313 vaddr_t low_addr, high_addr;
314
315 KASSERT(entry_out != NULL && addr_out != NULL);
316 KASSERT(direction == -1 || direction == 1);
317 KASSERT((hint & PAGE_MASK) == 0 && (high & PAGE_MASK) == 0 &&
318 (low & PAGE_MASK) == 0 &&
319 (before_gap & PAGE_MASK) == 0 && (after_gap & PAGE_MASK) == 0);
320 KASSERT(high + sz > high); /* Check for overflow. */
321
322 /*
323 * Hint magic.
324 */
325 if (hint == 0)
326 hint = (direction == 1 ? low : high);
327 else if (hint > high) {
328 if (direction != -1)
329 return ENOMEM;
330 hint = high;
331 } else if (hint < low) {
332 if (direction != 1)
333 return ENOMEM;
334 hint = low;
335 }
336
337 for (entry = uvm_map_entrybyaddr(&map->addr,
338 hint - (direction == -1 ? 1 : 0)); entry != NULL;
339 entry = (direction == 1 ?
340 RBT_NEXT(uvm_map_addr, entry) :
341 RBT_PREV(uvm_map_addr, entry))) {
342 if ((direction == 1 && VMMAP_FREE_START(entry) > high) ||
343 (direction == -1 && VMMAP_FREE_END(entry) < low)) {
344 break;
345 }
346
347 if (uvm_addr_fitspace(&low_addr, &high_addr,
348 MAX(low, VMMAP_FREE_START(entry)),
349 MIN(high, VMMAP_FREE_END(entry)),
350 sz, align, offset, before_gap, after_gap) == 0) {
351 *entry_out = entry;
352 if (hint >= low_addr && hint <= high_addr) {
353 *addr_out = hint;
354 } else {
355 *addr_out = (direction == 1 ?
356 low_addr : high_addr);
357 }
358 return 0;
359 }
360 }
361
362 return ENOMEM;
363 }
364
365 /*
366 * Invoke address selector of uaddr.
367 * uaddr may be NULL, in which case the algorithm will fail with ENOMEM.
368 *
369 * Will invoke uvm_addr_isavail to fill in last_out.
370 */
371 int
uvm_addr_invoke(struct vm_map * map,struct uvm_addr_state * uaddr,struct vm_map_entry ** entry_out,struct vm_map_entry ** last_out,vaddr_t * addr_out,vsize_t sz,vaddr_t align,vaddr_t offset,vm_prot_t prot,vaddr_t hint)372 uvm_addr_invoke(struct vm_map *map, struct uvm_addr_state *uaddr,
373 struct vm_map_entry **entry_out, struct vm_map_entry **last_out,
374 vaddr_t *addr_out,
375 vsize_t sz, vaddr_t align, vaddr_t offset, vm_prot_t prot, vaddr_t hint)
376 {
377 int error;
378
379 if (uaddr == NULL)
380 return ENOMEM;
381
382 hint &= ~((vaddr_t)PAGE_MASK);
383 if (hint != 0 &&
384 !(hint >= uaddr->uaddr_minaddr && hint < uaddr->uaddr_maxaddr))
385 return ENOMEM;
386
387 vm_map_assert_anylock(map);
388
389 error = (*uaddr->uaddr_functions->uaddr_select)(map, uaddr,
390 entry_out, addr_out, sz, align, offset, prot, hint);
391
392 if (error == 0) {
393 KASSERT(*entry_out != NULL);
394 *last_out = NULL;
395 if (!uvm_map_isavail(map, uaddr, entry_out, last_out,
396 *addr_out, sz)) {
397 panic("uvm_addr_invoke: address selector %p "
398 "(%s 0x%lx-0x%lx) "
399 "returned unavailable address 0x%lx sz 0x%lx",
400 uaddr, uaddr->uaddr_functions->uaddr_name,
401 uaddr->uaddr_minaddr, uaddr->uaddr_maxaddr,
402 *addr_out, sz);
403 }
404 }
405
406 return error;
407 }
408
409 #if defined(DEBUG) || defined(DDB)
410 void
uvm_addr_print(struct uvm_addr_state * uaddr,const char * slot,boolean_t full,int (* pr)(const char *,...))411 uvm_addr_print(struct uvm_addr_state *uaddr, const char *slot, boolean_t full,
412 int (*pr)(const char *, ...))
413 {
414 if (uaddr == NULL) {
415 (*pr)("- uvm_addr %s: NULL\n", slot);
416 return;
417 }
418
419 (*pr)("- uvm_addr %s: %p (%s 0x%lx-0x%lx)\n", slot, uaddr,
420 uaddr->uaddr_functions->uaddr_name,
421 uaddr->uaddr_minaddr, uaddr->uaddr_maxaddr);
422 if (uaddr->uaddr_functions->uaddr_print == NULL)
423 return;
424
425 (*uaddr->uaddr_functions->uaddr_print)(uaddr, full, pr);
426 }
427 #endif /* DEBUG || DDB */
428
429 /*
430 * Destroy a uvm_addr_state structure.
431 * The uaddr must have been previously allocated from uaddr_state_pool.
432 */
433 void
uaddr_destroy(struct uvm_addr_state * uaddr)434 uaddr_destroy(struct uvm_addr_state *uaddr)
435 {
436 pool_put(&uaddr_pool, uaddr);
437 }
438
439
440 #if 0
441 /*
442 * Linear allocator.
443 * This allocator uses a first-fit algorithm.
444 *
445 * If hint is set, search will start at the hint position.
446 * Only searches forward.
447 */
448
449 const struct uvm_addr_functions uaddr_lin_functions = {
450 .uaddr_select = &uaddr_lin_select,
451 .uaddr_destroy = &uaddr_destroy,
452 .uaddr_name = "uaddr_lin"
453 };
454
455 struct uvm_addr_state *
456 uaddr_lin_create(vaddr_t minaddr, vaddr_t maxaddr)
457 {
458 struct uvm_addr_state *uaddr;
459
460 uaddr = pool_get(&uaddr_pool, PR_WAITOK);
461 uaddr->uaddr_minaddr = minaddr;
462 uaddr->uaddr_maxaddr = maxaddr;
463 uaddr->uaddr_functions = &uaddr_lin_functions;
464 return uaddr;
465 }
466
467 int
468 uaddr_lin_select(struct vm_map *map, struct uvm_addr_state *uaddr,
469 struct vm_map_entry **entry_out, vaddr_t *addr_out,
470 vsize_t sz, vaddr_t align, vaddr_t offset,
471 vm_prot_t prot, vaddr_t hint)
472 {
473 vaddr_t guard_sz;
474
475 /*
476 * Deal with guardpages: search for space with one extra page.
477 */
478 guard_sz = ((map->flags & VM_MAP_GUARDPAGES) == 0 ? 0 : PAGE_SIZE);
479
480 if (uaddr->uaddr_maxaddr - uaddr->uaddr_minaddr - guard_sz < sz)
481 return ENOMEM;
482 return uvm_addr_linsearch(map, uaddr, entry_out, addr_out, 0, sz,
483 align, offset, 1, uaddr->uaddr_minaddr, uaddr->uaddr_maxaddr - sz,
484 0, guard_sz);
485 }
486 #endif
487
488 /*
489 * Randomized allocator.
490 * This allocator use uvm_map_hint to acquire a random address and searches
491 * from there.
492 */
493
494 const struct uvm_addr_functions uaddr_rnd_functions = {
495 .uaddr_select = &uaddr_rnd_select,
496 .uaddr_free_insert = &uaddr_rnd_insert,
497 .uaddr_free_remove = &uaddr_rnd_remove,
498 .uaddr_destroy = &uaddr_rnd_destroy,
499 #if defined(DEBUG) || defined(DDB)
500 #if 0
501 .uaddr_print = &uaddr_rnd_print,
502 #endif
503 #endif /* DEBUG || DDB */
504 .uaddr_name = "uaddr_rnd"
505 };
506
507 struct uvm_addr_state *
uaddr_rnd_create(vaddr_t minaddr,vaddr_t maxaddr)508 uaddr_rnd_create(vaddr_t minaddr, vaddr_t maxaddr)
509 {
510 struct uaddr_rnd_state *uaddr;
511
512 uaddr = pool_get(&uaddr_rnd_pool, PR_WAITOK);
513 uaddr->ur_uaddr.uaddr_minaddr = minaddr;
514 uaddr->ur_uaddr.uaddr_maxaddr = maxaddr;
515 uaddr->ur_uaddr.uaddr_functions = &uaddr_rnd_functions;
516 #if 0
517 TAILQ_INIT(&uaddr->ur_free);
518 #endif
519 return &uaddr->ur_uaddr;
520 }
521
522 int
uaddr_rnd_select(struct vm_map * map,struct uvm_addr_state * uaddr,struct vm_map_entry ** entry_out,vaddr_t * addr_out,vsize_t sz,vaddr_t align,vaddr_t offset,vm_prot_t prot,vaddr_t hint)523 uaddr_rnd_select(struct vm_map *map, struct uvm_addr_state *uaddr,
524 struct vm_map_entry **entry_out, vaddr_t *addr_out,
525 vsize_t sz, vaddr_t align, vaddr_t offset,
526 vm_prot_t prot, vaddr_t hint)
527 {
528 struct vmspace *vm;
529 vaddr_t minaddr, maxaddr;
530 vaddr_t guard_sz;
531 vaddr_t low_addr, high_addr;
532 struct vm_map_entry *entry, *next;
533 vsize_t before_gap, after_gap;
534 vaddr_t tmp;
535
536 KASSERT((map->flags & VM_MAP_ISVMSPACE) != 0);
537 vm = (struct vmspace *)map;
538
539 /* Deal with guardpages: search for space with one extra page. */
540 guard_sz = ((map->flags & VM_MAP_GUARDPAGES) == 0 ? 0 : PAGE_SIZE);
541
542 if (uaddr->uaddr_maxaddr - guard_sz < sz)
543 return ENOMEM;
544 minaddr = uvm_addr_align_forward(uaddr->uaddr_minaddr, align, offset);
545 maxaddr = uvm_addr_align_backward(uaddr->uaddr_maxaddr - sz - guard_sz,
546 align, offset);
547
548 /* Quick fail if the allocation won't fit. */
549 if (minaddr >= maxaddr)
550 return ENOMEM;
551
552 /* Select a hint. */
553 if (hint == 0)
554 hint = uvm_map_hint(vm, prot, minaddr, maxaddr);
555 /* Clamp hint to uaddr range. */
556 hint = MIN(MAX(hint, minaddr), maxaddr);
557
558 /* Align hint to align,offset parameters. */
559 tmp = hint;
560 hint = uvm_addr_align_forward(tmp, align, offset);
561 /* Check for overflow during alignment. */
562 if (hint < tmp || hint > maxaddr)
563 return ENOMEM; /* Compatibility mode: never look backwards. */
564
565 before_gap = 0;
566 after_gap = guard_sz;
567 hint -= MIN(hint, before_gap);
568
569 /*
570 * Use the augmented address tree to look up the first entry
571 * at or after hint with sufficient space.
572 *
573 * This code is the original optimized code, but will fail if the
574 * subtree it looks at does have sufficient space, but fails to meet
575 * the align constraint.
576 *
577 * Guard: subtree is not exhausted and max(fspace) >= required.
578 */
579 entry = uvm_map_entrybyaddr(&map->addr, hint);
580
581 /* Walk up the tree, until there is at least sufficient space. */
582 while (entry != NULL &&
583 entry->fspace_augment < before_gap + after_gap + sz)
584 entry = RBT_PARENT(uvm_map_addr, entry);
585
586 while (entry != NULL) {
587 /* Test if this fits. */
588 if (VMMAP_FREE_END(entry) > hint &&
589 uvm_map_uaddr_e(map, entry) == uaddr &&
590 uvm_addr_fitspace(&low_addr, &high_addr,
591 MAX(uaddr->uaddr_minaddr, VMMAP_FREE_START(entry)),
592 MIN(uaddr->uaddr_maxaddr, VMMAP_FREE_END(entry)),
593 sz, align, offset, before_gap, after_gap) == 0) {
594 *entry_out = entry;
595 if (hint >= low_addr && hint <= high_addr)
596 *addr_out = hint;
597 else
598 *addr_out = low_addr;
599 return 0;
600 }
601
602 /* RBT_NEXT, but skip subtrees that cannot possible fit. */
603 next = RBT_RIGHT(uvm_map_addr, entry);
604 if (next != NULL &&
605 next->fspace_augment >= before_gap + after_gap + sz) {
606 entry = next;
607 while ((next = RBT_LEFT(uvm_map_addr, entry)) !=
608 NULL)
609 entry = next;
610 } else {
611 do_parent:
612 next = RBT_PARENT(uvm_map_addr, entry);
613 if (next == NULL)
614 entry = NULL;
615 else if (RBT_LEFT(uvm_map_addr, next) == entry)
616 entry = next;
617 else {
618 entry = next;
619 goto do_parent;
620 }
621 }
622 }
623
624 /* Lookup failed. */
625 return ENOMEM;
626 }
627
628 /*
629 * Destroy a uaddr_rnd_state structure.
630 */
631 void
uaddr_rnd_destroy(struct uvm_addr_state * uaddr)632 uaddr_rnd_destroy(struct uvm_addr_state *uaddr)
633 {
634 pool_put(&uaddr_rnd_pool, uaddr);
635 }
636
637 /*
638 * Add entry to tailq.
639 */
640 void
uaddr_rnd_insert(struct vm_map * map,struct uvm_addr_state * uaddr_p,struct vm_map_entry * entry)641 uaddr_rnd_insert(struct vm_map *map, struct uvm_addr_state *uaddr_p,
642 struct vm_map_entry *entry)
643 {
644 return;
645 }
646
647 /*
648 * Remove entry from tailq.
649 */
650 void
uaddr_rnd_remove(struct vm_map * map,struct uvm_addr_state * uaddr_p,struct vm_map_entry * entry)651 uaddr_rnd_remove(struct vm_map *map, struct uvm_addr_state *uaddr_p,
652 struct vm_map_entry *entry)
653 {
654 return;
655 }
656
657 #if 0
658 #if defined(DEBUG) || defined(DDB)
659 void
660 uaddr_rnd_print(struct uvm_addr_state *uaddr_p, boolean_t full,
661 int (*pr)(const char*, ...))
662 {
663 struct vm_map_entry *entry;
664 struct uaddr_rnd_state *uaddr;
665 vaddr_t addr;
666 size_t count;
667 vsize_t space;
668
669 uaddr = (struct uaddr_rnd_state *)uaddr_p;
670 addr = 0;
671 count = 0;
672 space = 0;
673 TAILQ_FOREACH(entry, &uaddr->ur_free, dfree.tailq) {
674 count++;
675 space += entry->fspace;
676
677 if (full) {
678 (*pr)("\tentry %p: 0x%lx-0x%lx G=0x%lx F=0x%lx\n",
679 entry, entry->start, entry->end,
680 entry->guard, entry->fspace);
681 (*pr)("\t\tfree: 0x%lx-0x%lx\n",
682 VMMAP_FREE_START(entry), VMMAP_FREE_END(entry));
683 }
684 if (entry->start < addr) {
685 if (!full)
686 (*pr)("\tentry %p: 0x%lx-0x%lx "
687 "G=0x%lx F=0x%lx\n",
688 entry, entry->start, entry->end,
689 entry->guard, entry->fspace);
690 (*pr)("\t\tstart=0x%lx, expected at least 0x%lx\n",
691 entry->start, addr);
692 }
693
694 addr = VMMAP_FREE_END(entry);
695 }
696 (*pr)("\t0x%lu entries, 0x%lx free bytes\n", count, space);
697 }
698 #endif /* DEBUG || DDB */
699 #endif
700
701 /*
702 * Kernel allocation bootstrap logic.
703 */
704
705 const struct uvm_addr_functions uaddr_kernel_functions = {
706 .uaddr_select = &uaddr_kbootstrap_select,
707 .uaddr_destroy = &uaddr_kbootstrap_destroy,
708 .uaddr_name = "uaddr_kbootstrap"
709 };
710
711 /*
712 * Select an address from the map.
713 *
714 * This function ignores the uaddr spec and instead uses the map directly.
715 * Because of that property, the uaddr algorithm can be shared across all
716 * kernel maps.
717 */
718 int
uaddr_kbootstrap_select(struct vm_map * map,struct uvm_addr_state * uaddr,struct vm_map_entry ** entry_out,vaddr_t * addr_out,vsize_t sz,vaddr_t align,vaddr_t offset,vm_prot_t prot,vaddr_t hint)719 uaddr_kbootstrap_select(struct vm_map *map, struct uvm_addr_state *uaddr,
720 struct vm_map_entry **entry_out, vaddr_t *addr_out,
721 vsize_t sz, vaddr_t align, vaddr_t offset, vm_prot_t prot, vaddr_t hint)
722 {
723 vaddr_t tmp;
724
725 RBT_FOREACH(*entry_out, uvm_map_addr, &map->addr) {
726 if (VMMAP_FREE_END(*entry_out) <= uvm_maxkaddr &&
727 uvm_addr_fitspace(addr_out, &tmp,
728 VMMAP_FREE_START(*entry_out), VMMAP_FREE_END(*entry_out),
729 sz, align, offset, 0, 0) == 0)
730 return 0;
731 }
732
733 return ENOMEM;
734 }
735
736 /*
737 * Don't destroy the kernel bootstrap allocator.
738 */
739 void
uaddr_kbootstrap_destroy(struct uvm_addr_state * uaddr)740 uaddr_kbootstrap_destroy(struct uvm_addr_state *uaddr)
741 {
742 KASSERT(uaddr == (struct uvm_addr_state *)&uaddr_kbootstrap);
743 }
744
745 #ifndef SMALL_KERNEL
746 /*
747 * Best fit algorithm.
748 */
749
750 const struct uvm_addr_functions uaddr_bestfit_functions = {
751 .uaddr_select = &uaddr_bestfit_select,
752 .uaddr_free_insert = &uaddr_bestfit_insert,
753 .uaddr_free_remove = &uaddr_bestfit_remove,
754 .uaddr_destroy = &uaddr_bestfit_destroy,
755 .uaddr_name = "uaddr_bestfit"
756 };
757
758 struct uvm_addr_state *
uaddr_bestfit_create(vaddr_t minaddr,vaddr_t maxaddr)759 uaddr_bestfit_create(vaddr_t minaddr, vaddr_t maxaddr)
760 {
761 struct uaddr_bestfit_state *uaddr;
762
763 uaddr = pool_get(&uaddr_bestfit_pool, PR_WAITOK);
764 uaddr->ubf_uaddr.uaddr_minaddr = minaddr;
765 uaddr->ubf_uaddr.uaddr_maxaddr = maxaddr;
766 uaddr->ubf_uaddr.uaddr_functions = &uaddr_bestfit_functions;
767 RBT_INIT(uaddr_free_rbtree, &uaddr->ubf_free);
768 return &uaddr->ubf_uaddr;
769 }
770
771 void
uaddr_bestfit_destroy(struct uvm_addr_state * uaddr)772 uaddr_bestfit_destroy(struct uvm_addr_state *uaddr)
773 {
774 pool_put(&uaddr_bestfit_pool, uaddr);
775 }
776
777 void
uaddr_bestfit_insert(struct vm_map * map,struct uvm_addr_state * uaddr_p,struct vm_map_entry * entry)778 uaddr_bestfit_insert(struct vm_map *map, struct uvm_addr_state *uaddr_p,
779 struct vm_map_entry *entry)
780 {
781 struct uaddr_bestfit_state *uaddr;
782 struct vm_map_entry *rb_rv;
783
784 uaddr = (struct uaddr_bestfit_state *)uaddr_p;
785 if ((rb_rv = RBT_INSERT(uaddr_free_rbtree, &uaddr->ubf_free, entry)) !=
786 NULL) {
787 panic("%s: duplicate insertion: state %p "
788 "inserting %p, colliding with %p", __func__,
789 uaddr, entry, rb_rv);
790 }
791 }
792
793 void
uaddr_bestfit_remove(struct vm_map * map,struct uvm_addr_state * uaddr_p,struct vm_map_entry * entry)794 uaddr_bestfit_remove(struct vm_map *map, struct uvm_addr_state *uaddr_p,
795 struct vm_map_entry *entry)
796 {
797 struct uaddr_bestfit_state *uaddr;
798
799 uaddr = (struct uaddr_bestfit_state *)uaddr_p;
800 if (RBT_REMOVE(uaddr_free_rbtree, &uaddr->ubf_free, entry) != entry)
801 panic("%s: entry was not in tree", __func__);
802 }
803
804 int
uaddr_bestfit_select(struct vm_map * map,struct uvm_addr_state * uaddr_p,struct vm_map_entry ** entry_out,vaddr_t * addr_out,vsize_t sz,vaddr_t align,vaddr_t offset,vm_prot_t prot,vaddr_t hint)805 uaddr_bestfit_select(struct vm_map *map, struct uvm_addr_state *uaddr_p,
806 struct vm_map_entry **entry_out, vaddr_t *addr_out,
807 vsize_t sz, vaddr_t align, vaddr_t offset,
808 vm_prot_t prot, vaddr_t hint)
809 {
810 vaddr_t min, max;
811 struct uaddr_bestfit_state *uaddr;
812 struct vm_map_entry *entry;
813 vsize_t guardsz;
814
815 uaddr = (struct uaddr_bestfit_state *)uaddr_p;
816 guardsz = ((map->flags & VM_MAP_GUARDPAGES) ? PAGE_SIZE : 0);
817 if (sz + guardsz < sz)
818 return ENOMEM;
819
820 /*
821 * Find smallest item on freelist capable of holding item.
822 * Deal with guardpages: search for space with one extra page.
823 */
824 entry = uvm_addr_entrybyspace(&uaddr->ubf_free, sz + guardsz);
825 if (entry == NULL)
826 return ENOMEM;
827
828 /*
829 * Walk the tree until we find an entry that fits.
830 */
831 while (uvm_addr_fitspace(&min, &max,
832 VMMAP_FREE_START(entry), VMMAP_FREE_END(entry),
833 sz, align, offset, 0, guardsz) != 0) {
834 entry = RBT_NEXT(uaddr_free_rbtree, entry);
835 if (entry == NULL)
836 return ENOMEM;
837 }
838
839 /*
840 * Return the address that generates the least fragmentation.
841 */
842 *entry_out = entry;
843 *addr_out = (min - VMMAP_FREE_START(entry) <=
844 VMMAP_FREE_END(entry) - guardsz - sz - max ?
845 min : max);
846 return 0;
847 }
848 #endif /* !SMALL_KERNEL */
849
850
851 #ifndef SMALL_KERNEL
852 /*
853 * A userspace allocator based on pivots.
854 */
855
856 const struct uvm_addr_functions uaddr_pivot_functions = {
857 .uaddr_select = &uaddr_pivot_select,
858 .uaddr_free_insert = &uaddr_pivot_insert,
859 .uaddr_free_remove = &uaddr_pivot_remove,
860 .uaddr_destroy = &uaddr_pivot_destroy,
861 #if defined(DEBUG) || defined(DDB)
862 .uaddr_print = &uaddr_pivot_print,
863 #endif /* DEBUG || DDB */
864 .uaddr_name = "uaddr_pivot"
865 };
866
867 /*
868 * A special random function for pivots.
869 *
870 * This function will return:
871 * - a random number
872 * - a multiple of PAGE_SIZE
873 * - at least PAGE_SIZE
874 *
875 * The random function has a slightly higher change to return a small number.
876 */
877 vsize_t
uaddr_pivot_random(void)878 uaddr_pivot_random(void)
879 {
880 int r;
881
882 /*
883 * The sum of two six-sided dice will have a normal distribution.
884 * We map the highest probable number to 1, by folding the curve
885 * (think of a graph on a piece of paper, that you fold).
886 *
887 * Because the fold happens at PIVOT_RND - 1, the numbers 0 and 1
888 * have the same and highest probability of happening.
889 */
890 r = arc4random_uniform(PIVOT_RND) + arc4random_uniform(PIVOT_RND) -
891 (PIVOT_RND - 1);
892 if (r < 0)
893 r = -r;
894
895 /*
896 * Make the returned value at least PAGE_SIZE and a multiple of
897 * PAGE_SIZE.
898 */
899 return (vaddr_t)(1 + r) << PAGE_SHIFT;
900 }
901
902 /*
903 * Select a new pivot.
904 *
905 * A pivot must:
906 * - be chosen random
907 * - have a randomly chosen gap before it, where the uaddr_state starts
908 * - have a randomly chosen gap after it, before the uaddr_state ends
909 *
910 * Furthermore, the pivot must provide sufficient space for the allocation.
911 * The addr will be set to the selected address.
912 *
913 * Returns ENOMEM on failure.
914 */
915 int
uaddr_pivot_newpivot(struct vm_map * map,struct uaddr_pivot_state * uaddr,struct uaddr_pivot * pivot,struct vm_map_entry ** entry_out,vaddr_t * addr_out,vsize_t sz,vaddr_t align,vaddr_t offset,vsize_t before_gap,vsize_t after_gap)916 uaddr_pivot_newpivot(struct vm_map *map, struct uaddr_pivot_state *uaddr,
917 struct uaddr_pivot *pivot,
918 struct vm_map_entry **entry_out, vaddr_t *addr_out,
919 vsize_t sz, vaddr_t align, vaddr_t offset,
920 vsize_t before_gap, vsize_t after_gap)
921 {
922 struct vm_map_entry *entry, *found;
923 vaddr_t minaddr, maxaddr;
924 vsize_t dist;
925 vaddr_t found_minaddr, found_maxaddr;
926 vaddr_t min, max;
927 vsize_t arc4_arg;
928 int fit_error;
929 u_int32_t path;
930
931 minaddr = uaddr->up_uaddr.uaddr_minaddr;
932 maxaddr = uaddr->up_uaddr.uaddr_maxaddr;
933 KASSERT(minaddr < maxaddr);
934 #ifdef DIAGNOSTIC
935 if (minaddr + 2 * PAGE_SIZE > maxaddr) {
936 panic("uaddr_pivot_newpivot: cannot grant random pivot "
937 "in area less than 2 pages (size = 0x%lx)",
938 maxaddr - minaddr);
939 }
940 #endif /* DIAGNOSTIC */
941
942 /*
943 * Gap calculation: 1/32 of the size of the managed area.
944 *
945 * At most: sufficient to not get truncated at arc4random.
946 * At least: 2 PAGE_SIZE
947 *
948 * minaddr and maxaddr will be changed according to arc4random.
949 */
950 dist = MAX((maxaddr - minaddr) / 32, 2 * (vaddr_t)PAGE_SIZE);
951 if (dist >> PAGE_SHIFT > 0xffffffff) {
952 minaddr += (vsize_t)arc4random() << PAGE_SHIFT;
953 maxaddr -= (vsize_t)arc4random() << PAGE_SHIFT;
954 } else {
955 minaddr += (vsize_t)arc4random_uniform(dist >> PAGE_SHIFT) <<
956 PAGE_SHIFT;
957 maxaddr -= (vsize_t)arc4random_uniform(dist >> PAGE_SHIFT) <<
958 PAGE_SHIFT;
959 }
960
961 /*
962 * A very fast way to find an entry that will be large enough
963 * to hold the allocation, but still is found more or less
964 * randomly: the tree path selector has a 50% chance to go for
965 * a bigger or smaller entry.
966 *
967 * Note that the memory may actually be available,
968 * but the fragmentation may be so bad and the gaps chosen
969 * so unfortunately, that the allocation will not succeed.
970 * Or the alignment can only be satisfied by an entry that
971 * is not visited in the randomly selected path.
972 *
973 * This code finds an entry with sufficient space in O(log n) time.
974 */
975 path = arc4random();
976 found = NULL;
977 entry = RBT_ROOT(uaddr_free_rbtree, &uaddr->up_free);
978 while (entry != NULL) {
979 fit_error = uvm_addr_fitspace(&min, &max,
980 MAX(VMMAP_FREE_START(entry), minaddr),
981 MIN(VMMAP_FREE_END(entry), maxaddr),
982 sz, align, offset, before_gap, after_gap);
983
984 /* It fits, save this entry. */
985 if (fit_error == 0) {
986 found = entry;
987 found_minaddr = min;
988 found_maxaddr = max;
989 }
990
991 /* Next. */
992 if (fit_error != 0)
993 entry = RBT_RIGHT(uaddr_free_rbtree, entry);
994 else if ((path & 0x1) == 0) {
995 path >>= 1;
996 entry = RBT_RIGHT(uaddr_free_rbtree, entry);
997 } else {
998 path >>= 1;
999 entry = RBT_LEFT(uaddr_free_rbtree, entry);
1000 }
1001 }
1002 if (found == NULL)
1003 return ENOMEM; /* Not found a large enough region. */
1004
1005 /*
1006 * Calculate a random address within found.
1007 *
1008 * found_minaddr and found_maxaddr are already aligned, so be sure
1009 * to select a multiple of align as the offset in the entry.
1010 * Preferably, arc4random_uniform is used to provide no bias within
1011 * the entry.
1012 * However if the size of the entry exceeds arc4random_uniforms
1013 * argument limit, we simply use arc4random (thus limiting ourselves
1014 * to 4G * PAGE_SIZE bytes offset).
1015 */
1016 if (found_maxaddr == found_minaddr)
1017 *addr_out = found_minaddr;
1018 else {
1019 KASSERT(align >= PAGE_SIZE && (align & (align - 1)) == 0);
1020 arc4_arg = found_maxaddr - found_minaddr;
1021 if (arc4_arg > 0xffffffff) {
1022 *addr_out = found_minaddr +
1023 (arc4random() & ~(align - 1));
1024 } else {
1025 *addr_out = found_minaddr +
1026 (arc4random_uniform(arc4_arg) & ~(align - 1));
1027 }
1028 }
1029 /* Address was found in this entry. */
1030 *entry_out = found;
1031
1032 /*
1033 * Set up new pivot and return selected address.
1034 *
1035 * Depending on the direction of the pivot, the pivot must be placed
1036 * at the bottom or the top of the allocation:
1037 * - if the pivot moves upwards, place the pivot at the top of the
1038 * allocation,
1039 * - if the pivot moves downwards, place the pivot at the bottom
1040 * of the allocation.
1041 */
1042 pivot->entry = found;
1043 pivot->dir = (arc4random() & 0x1 ? 1 : -1);
1044 if (pivot->dir > 0)
1045 pivot->addr = *addr_out + sz;
1046 else
1047 pivot->addr = *addr_out;
1048 pivot->expire = PIVOT_EXPIRE - 1; /* First use is right now. */
1049 return 0;
1050 }
1051
1052 /*
1053 * Pivot selector.
1054 *
1055 * Each time the selector is invoked, it will select a random pivot, which
1056 * it will use to select memory with. The memory will be placed at the pivot,
1057 * with a randomly sized gap between the allocation and the pivot.
1058 * The pivot will then move so it will never revisit this address.
1059 *
1060 * Each allocation, the pivot expiry timer ticks. Once the pivot becomes
1061 * expired, it will be replaced with a newly created pivot. Pivots also
1062 * automatically expire if they fail to provide memory for an allocation.
1063 *
1064 * Expired pivots are replaced using the uaddr_pivot_newpivot() function,
1065 * which will ensure the pivot points at memory in such a way that the
1066 * allocation will succeed.
1067 * As an added bonus, the uaddr_pivot_newpivot() function will perform the
1068 * allocation immediately and move the pivot as appropriate.
1069 *
1070 * If uaddr_pivot_newpivot() fails to find a new pivot that will allow the
1071 * allocation to succeed, it will not create a new pivot and the allocation
1072 * will fail.
1073 *
1074 * A pivot running into used memory will automatically expire (because it will
1075 * fail to allocate).
1076 *
1077 * Characteristics of the allocator:
1078 * - best case, an allocation is O(log N)
1079 * (it would be O(1), if it weren't for the need to check if the memory is
1080 * free; although that can be avoided...)
1081 * - worst case, an allocation is O(log N)
1082 * (the uaddr_pivot_newpivot() function has that complexity)
1083 * - failed allocations always take O(log N)
1084 * (the uaddr_pivot_newpivot() function will walk that deep into the tree).
1085 */
1086 int
uaddr_pivot_select(struct vm_map * map,struct uvm_addr_state * uaddr_p,struct vm_map_entry ** entry_out,vaddr_t * addr_out,vsize_t sz,vaddr_t align,vaddr_t offset,vm_prot_t prot,vaddr_t hint)1087 uaddr_pivot_select(struct vm_map *map, struct uvm_addr_state *uaddr_p,
1088 struct vm_map_entry **entry_out, vaddr_t *addr_out,
1089 vsize_t sz, vaddr_t align, vaddr_t offset,
1090 vm_prot_t prot, vaddr_t hint)
1091 {
1092 struct uaddr_pivot_state *uaddr;
1093 struct vm_map_entry *entry;
1094 struct uaddr_pivot *pivot;
1095 vaddr_t min, max;
1096 vsize_t before_gap, after_gap;
1097 int err;
1098
1099 /*
1100 * When we have a hint, use the rnd allocator that finds the
1101 * area that is closest to the hint, if there is such an area.
1102 */
1103 if (hint != 0) {
1104 if (uaddr_rnd_select(map, uaddr_p, entry_out, addr_out,
1105 sz, align, offset, prot, hint) == 0)
1106 return 0;
1107 return ENOMEM;
1108 }
1109
1110 /*
1111 * Select a random pivot and a random gap sizes around the allocation.
1112 */
1113 uaddr = (struct uaddr_pivot_state *)uaddr_p;
1114 pivot = &uaddr->up_pivots[
1115 arc4random_uniform(nitems(uaddr->up_pivots))];
1116 before_gap = uaddr_pivot_random();
1117 after_gap = uaddr_pivot_random();
1118 if (pivot->addr == 0 || pivot->entry == NULL || pivot->expire == 0)
1119 goto expired; /* Pivot is invalid (null or expired). */
1120
1121 /*
1122 * Attempt to use the pivot to map the entry.
1123 */
1124 entry = pivot->entry;
1125 if (pivot->dir > 0) {
1126 if (uvm_addr_fitspace(&min, &max,
1127 MAX(VMMAP_FREE_START(entry), pivot->addr),
1128 VMMAP_FREE_END(entry), sz, align, offset,
1129 before_gap, after_gap) == 0) {
1130 *addr_out = min;
1131 *entry_out = entry;
1132 pivot->addr = min + sz;
1133 pivot->expire--;
1134 return 0;
1135 }
1136 } else {
1137 if (uvm_addr_fitspace(&min, &max,
1138 VMMAP_FREE_START(entry),
1139 MIN(VMMAP_FREE_END(entry), pivot->addr),
1140 sz, align, offset, before_gap, after_gap) == 0) {
1141 *addr_out = max;
1142 *entry_out = entry;
1143 pivot->addr = max;
1144 pivot->expire--;
1145 return 0;
1146 }
1147 }
1148
1149 expired:
1150 /*
1151 * Pivot expired or allocation failed.
1152 * Use pivot selector to do the allocation and find a new pivot.
1153 */
1154 err = uaddr_pivot_newpivot(map, uaddr, pivot, entry_out, addr_out,
1155 sz, align, offset, before_gap, after_gap);
1156 return err;
1157 }
1158
1159 /*
1160 * Free the pivot.
1161 */
1162 void
uaddr_pivot_destroy(struct uvm_addr_state * uaddr)1163 uaddr_pivot_destroy(struct uvm_addr_state *uaddr)
1164 {
1165 pool_put(&uaddr_pivot_pool, uaddr);
1166 }
1167
1168 /*
1169 * Insert an entry with free space in the space tree.
1170 */
1171 void
uaddr_pivot_insert(struct vm_map * map,struct uvm_addr_state * uaddr_p,struct vm_map_entry * entry)1172 uaddr_pivot_insert(struct vm_map *map, struct uvm_addr_state *uaddr_p,
1173 struct vm_map_entry *entry)
1174 {
1175 struct uaddr_pivot_state *uaddr;
1176 struct vm_map_entry *rb_rv;
1177 struct uaddr_pivot *p;
1178 vaddr_t check_addr;
1179 vaddr_t start, end;
1180
1181 uaddr = (struct uaddr_pivot_state *)uaddr_p;
1182 if ((rb_rv = RBT_INSERT(uaddr_free_rbtree, &uaddr->up_free, entry)) !=
1183 NULL) {
1184 panic("%s: duplicate insertion: state %p "
1185 "inserting entry %p which collides with %p", __func__,
1186 uaddr, entry, rb_rv);
1187 }
1188
1189 start = VMMAP_FREE_START(entry);
1190 end = VMMAP_FREE_END(entry);
1191
1192 /*
1193 * Update all pivots that are contained in this entry.
1194 */
1195 for (p = &uaddr->up_pivots[0];
1196 p != &uaddr->up_pivots[nitems(uaddr->up_pivots)]; p++) {
1197 check_addr = p->addr;
1198 if (check_addr == 0)
1199 continue;
1200 if (p->dir < 0)
1201 check_addr--;
1202
1203 if (start <= check_addr &&
1204 check_addr < end) {
1205 KASSERT(p->entry == NULL);
1206 p->entry = entry;
1207 }
1208 }
1209 }
1210
1211 /*
1212 * Remove an entry with free space from the space tree.
1213 */
1214 void
uaddr_pivot_remove(struct vm_map * map,struct uvm_addr_state * uaddr_p,struct vm_map_entry * entry)1215 uaddr_pivot_remove(struct vm_map *map, struct uvm_addr_state *uaddr_p,
1216 struct vm_map_entry *entry)
1217 {
1218 struct uaddr_pivot_state *uaddr;
1219 struct uaddr_pivot *p;
1220
1221 uaddr = (struct uaddr_pivot_state *)uaddr_p;
1222 if (RBT_REMOVE(uaddr_free_rbtree, &uaddr->up_free, entry) != entry)
1223 panic("%s: entry was not in tree", __func__);
1224
1225 /*
1226 * Inform any pivot with this entry that the entry is gone.
1227 * Note that this does not automatically invalidate the pivot.
1228 */
1229 for (p = &uaddr->up_pivots[0];
1230 p != &uaddr->up_pivots[nitems(uaddr->up_pivots)]; p++) {
1231 if (p->entry == entry)
1232 p->entry = NULL;
1233 }
1234 }
1235
1236 /*
1237 * Create a new pivot selector.
1238 *
1239 * Initially, all pivots are in the expired state.
1240 * Two reasons for this:
1241 * - it means this allocator will not take a huge amount of time
1242 * - pivots select better on demand, because the pivot selection will be
1243 * affected by preceding allocations:
1244 * the next pivots will likely end up in different segments of free memory,
1245 * that was segmented by an earlier allocation; better spread.
1246 */
1247 struct uvm_addr_state *
uaddr_pivot_create(vaddr_t minaddr,vaddr_t maxaddr)1248 uaddr_pivot_create(vaddr_t minaddr, vaddr_t maxaddr)
1249 {
1250 struct uaddr_pivot_state *uaddr;
1251
1252 uaddr = pool_get(&uaddr_pivot_pool, PR_WAITOK);
1253 uaddr->up_uaddr.uaddr_minaddr = minaddr;
1254 uaddr->up_uaddr.uaddr_maxaddr = maxaddr;
1255 uaddr->up_uaddr.uaddr_functions = &uaddr_pivot_functions;
1256 RBT_INIT(uaddr_free_rbtree, &uaddr->up_free);
1257 memset(uaddr->up_pivots, 0, sizeof(uaddr->up_pivots));
1258
1259 return &uaddr->up_uaddr;
1260 }
1261
1262 #if defined(DEBUG) || defined(DDB)
1263 /*
1264 * Print the uaddr_pivot_state.
1265 *
1266 * If full, a listing of all entries in the state will be provided.
1267 */
1268 void
uaddr_pivot_print(struct uvm_addr_state * uaddr_p,boolean_t full,int (* pr)(const char *,...))1269 uaddr_pivot_print(struct uvm_addr_state *uaddr_p, boolean_t full,
1270 int (*pr)(const char *, ...))
1271 {
1272 struct uaddr_pivot_state *uaddr;
1273 struct uaddr_pivot *pivot;
1274 struct vm_map_entry *entry;
1275 int i;
1276 vaddr_t check_addr;
1277
1278 uaddr = (struct uaddr_pivot_state *)uaddr_p;
1279
1280 for (i = 0; i < NUM_PIVOTS; i++) {
1281 pivot = &uaddr->up_pivots[i];
1282
1283 (*pr)("\tpivot 0x%lx, epires in %d, direction %d\n",
1284 pivot->addr, pivot->expire, pivot->dir);
1285 }
1286 if (!full)
1287 return;
1288
1289 if (RBT_EMPTY(uaddr_free_rbtree, &uaddr->up_free))
1290 (*pr)("\tempty\n");
1291 /* Print list of free space. */
1292 RBT_FOREACH(entry, uaddr_free_rbtree, &uaddr->up_free) {
1293 (*pr)("\t0x%lx - 0x%lx free (0x%lx bytes)\n",
1294 VMMAP_FREE_START(entry), VMMAP_FREE_END(entry),
1295 VMMAP_FREE_END(entry) - VMMAP_FREE_START(entry));
1296
1297 for (i = 0; i < NUM_PIVOTS; i++) {
1298 pivot = &uaddr->up_pivots[i];
1299 check_addr = pivot->addr;
1300 if (check_addr == 0)
1301 continue;
1302 if (pivot->dir < 0)
1303 check_addr--;
1304
1305 if (VMMAP_FREE_START(entry) <= check_addr &&
1306 check_addr < VMMAP_FREE_END(entry)) {
1307 (*pr)("\t\tcontains pivot %d (0x%lx)\n",
1308 i, pivot->addr);
1309 }
1310 }
1311 }
1312 }
1313 #endif /* DEBUG || DDB */
1314 #endif /* !SMALL_KERNEL */
1315
1316 #ifndef SMALL_KERNEL
1317 /*
1318 * Stack/break allocator.
1319 *
1320 * Stack area is grown into in the opposite direction of the stack growth,
1321 * brk area is grown downward (because sbrk() grows upward).
1322 *
1323 * Both areas are grown into proportially: a weighted chance is used to
1324 * select which one (stack or brk area) to try. If the allocation fails,
1325 * the other one is tested.
1326 */
1327 const struct uvm_addr_functions uaddr_stack_brk_functions = {
1328 .uaddr_select = &uaddr_stack_brk_select,
1329 .uaddr_destroy = &uaddr_destroy,
1330 .uaddr_name = "uaddr_stckbrk"
1331 };
1332
1333 /*
1334 * Stack/brk address selector.
1335 */
1336 int
uaddr_stack_brk_select(struct vm_map * map,struct uvm_addr_state * uaddr,struct vm_map_entry ** entry_out,vaddr_t * addr_out,vsize_t sz,vaddr_t align,vaddr_t offset,vm_prot_t prot,vaddr_t hint)1337 uaddr_stack_brk_select(struct vm_map *map, struct uvm_addr_state *uaddr,
1338 struct vm_map_entry **entry_out, vaddr_t *addr_out,
1339 vsize_t sz, vaddr_t align, vaddr_t offset,
1340 vm_prot_t prot, vaddr_t hint)
1341 {
1342 vaddr_t start;
1343 vaddr_t end;
1344 vsize_t before_gap;
1345 vsize_t after_gap;
1346 int dir;
1347
1348 /* Set up brk search strategy. */
1349 start = MAX(map->b_start, uaddr->uaddr_minaddr);
1350 end = MIN(map->b_end, uaddr->uaddr_maxaddr);
1351 before_gap = 0;
1352 after_gap = 0;
1353 dir = -1; /* Opposite of brk() growth. */
1354
1355 if (end - start >= sz) {
1356 if (uvm_addr_linsearch(map, uaddr, entry_out, addr_out,
1357 0, sz, align, offset, dir, start, end - sz,
1358 before_gap, after_gap) == 0)
1359 return 0;
1360 }
1361
1362 /* Set up stack search strategy. */
1363 start = MAX(map->s_start, uaddr->uaddr_minaddr);
1364 end = MIN(map->s_end, uaddr->uaddr_maxaddr);
1365 before_gap = ((arc4random() & 0x3) + 1) << PAGE_SHIFT;
1366 after_gap = ((arc4random() & 0x3) + 1) << PAGE_SHIFT;
1367 #ifdef MACHINE_STACK_GROWS_UP
1368 dir = -1;
1369 #else
1370 dir = 1;
1371 #endif
1372 if (end - start >= before_gap + after_gap &&
1373 end - start - before_gap - after_gap >= sz) {
1374 if (uvm_addr_linsearch(map, uaddr, entry_out, addr_out,
1375 0, sz, align, offset, dir, start, end - sz,
1376 before_gap, after_gap) == 0)
1377 return 0;
1378 }
1379
1380 return ENOMEM;
1381 }
1382
1383 struct uvm_addr_state *
uaddr_stack_brk_create(vaddr_t minaddr,vaddr_t maxaddr)1384 uaddr_stack_brk_create(vaddr_t minaddr, vaddr_t maxaddr)
1385 {
1386 struct uvm_addr_state* uaddr;
1387
1388 uaddr = pool_get(&uaddr_pool, PR_WAITOK);
1389 uaddr->uaddr_minaddr = minaddr;
1390 uaddr->uaddr_maxaddr = maxaddr;
1391 uaddr->uaddr_functions = &uaddr_stack_brk_functions;
1392 return uaddr;
1393 }
1394 #endif /* !SMALL_KERNEL */
1395
1396
1397 #ifndef SMALL_KERNEL
1398 /*
1399 * Free space comparison.
1400 * Compares smaller free-space before larger free-space.
1401 */
1402 static inline int
uvm_mapent_fspace_cmp(const struct vm_map_entry * e1,const struct vm_map_entry * e2)1403 uvm_mapent_fspace_cmp(const struct vm_map_entry *e1,
1404 const struct vm_map_entry *e2)
1405 {
1406 if (e1->fspace != e2->fspace)
1407 return (e1->fspace < e2->fspace ? -1 : 1);
1408 return (e1->start < e2->start ? -1 : e1->start > e2->start);
1409 }
1410
1411 RBT_GENERATE(uaddr_free_rbtree, vm_map_entry, dfree.rbtree,
1412 uvm_mapent_fspace_cmp);
1413 #endif /* !SMALL_KERNEL */
1414