1 /* $OpenBSD: uvm_km.c,v 1.152 2024/03/27 15:41:40 kurt Exp $ */
2 /* $NetBSD: uvm_km.c,v 1.42 2001/01/14 02:10:01 thorpej Exp $ */
3
4 /*
5 * Copyright (c) 1997 Charles D. Cranor and Washington University.
6 * Copyright (c) 1991, 1993, The Regents of the University of California.
7 *
8 * All rights reserved.
9 *
10 * This code is derived from software contributed to Berkeley by
11 * The Mach Operating System project at Carnegie-Mellon University.
12 *
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions
15 * are met:
16 * 1. Redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer.
18 * 2. Redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution.
21 * 3. Neither the name of the University nor the names of its contributors
22 * may be used to endorse or promote products derived from this software
23 * without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 * SUCH DAMAGE.
36 *
37 * @(#)vm_kern.c 8.3 (Berkeley) 1/12/94
38 * from: Id: uvm_km.c,v 1.1.2.14 1998/02/06 05:19:27 chs Exp
39 *
40 *
41 * Copyright (c) 1987, 1990 Carnegie-Mellon University.
42 * All rights reserved.
43 *
44 * Permission to use, copy, modify and distribute this software and
45 * its documentation is hereby granted, provided that both the copyright
46 * notice and this permission notice appear in all copies of the
47 * software, derivative works or modified versions, and any portions
48 * thereof, and that both notices appear in supporting documentation.
49 *
50 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
51 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
52 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
53 *
54 * Carnegie Mellon requests users of this software to return to
55 *
56 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
57 * School of Computer Science
58 * Carnegie Mellon University
59 * Pittsburgh PA 15213-3890
60 *
61 * any improvements or extensions that they make and grant Carnegie the
62 * rights to redistribute these changes.
63 */
64
65 /*
66 * uvm_km.c: handle kernel memory allocation and management
67 */
68
69 /*
70 * overview of kernel memory management:
71 *
72 * the kernel virtual address space is mapped by "kernel_map." kernel_map
73 * starts at a machine-dependent address and is VM_KERNEL_SPACE_SIZE bytes
74 * large.
75 *
76 * the kernel_map has several "submaps." submaps can only appear in
77 * the kernel_map (user processes can't use them). submaps "take over"
78 * the management of a sub-range of the kernel's address space. submaps
79 * are typically allocated at boot time and are never released. kernel
80 * virtual address space that is mapped by a submap is locked by the
81 * submap's lock -- not the kernel_map's lock.
82 *
83 * thus, the useful feature of submaps is that they allow us to break
84 * up the locking and protection of the kernel address space into smaller
85 * chunks.
86 *
87 * The VM system has several standard kernel submaps:
88 * kmem_map: Contains only wired kernel memory for malloc(9).
89 * Note: All access to this map must be protected by splvm as
90 * calls to malloc(9) are allowed in interrupt handlers.
91 * exec_map: Memory to hold arguments to system calls are allocated from
92 * this map.
93 * XXX: This is primeraly used to artificially limit the number
94 * of concurrent processes doing an exec.
95 * phys_map: Buffers for vmapbuf (physio) are allocated from this map.
96 *
97 * the kernel allocates its private memory out of special uvm_objects whose
98 * reference count is set to UVM_OBJ_KERN (thus indicating that the objects
99 * are "special" and never die). all kernel objects should be thought of
100 * as large, fixed-sized, sparsely populated uvm_objects. each kernel
101 * object is equal to the size of kernel virtual address space (i.e.
102 * VM_KERNEL_SPACE_SIZE).
103 *
104 * most kernel private memory lives in kernel_object. the only exception
105 * to this is for memory that belongs to submaps that must be protected
106 * by splvm(). each of these submaps manages their own pages.
107 *
108 * note that just because a kernel object spans the entire kernel virtual
109 * address space doesn't mean that it has to be mapped into the entire space.
110 * large chunks of a kernel object's space go unused either because
111 * that area of kernel VM is unmapped, or there is some other type of
112 * object mapped into that range (e.g. a vnode). for submap's kernel
113 * objects, the only part of the object that can ever be populated is the
114 * offsets that are managed by the submap.
115 *
116 * note that the "offset" in a kernel object is always the kernel virtual
117 * address minus the vm_map_min(kernel_map).
118 * example:
119 * suppose kernel_map starts at 0xf8000000 and the kernel does a
120 * uvm_km_alloc(kernel_map, PAGE_SIZE) [allocate 1 wired down page in the
121 * kernel map]. if uvm_km_alloc returns virtual address 0xf8235000,
122 * then that means that the page at offset 0x235000 in kernel_object is
123 * mapped at 0xf8235000.
124 *
125 * kernel objects have one other special property: when the kernel virtual
126 * memory mapping them is unmapped, the backing memory in the object is
127 * freed right away. this is done with the uvm_km_pgremove() function.
128 * this has to be done because there is no backing store for kernel pages
129 * and no need to save them after they are no longer referenced.
130 */
131
132 #include <sys/param.h>
133 #include <sys/systm.h>
134 #include <sys/proc.h>
135 #include <sys/kthread.h>
136 #include <uvm/uvm.h>
137
138 /*
139 * global data structures
140 */
141
142 struct vm_map *kernel_map = NULL;
143
144 /* Unconstraint range. */
145 struct uvm_constraint_range no_constraint = { 0x0, (paddr_t)-1 };
146
147 /*
148 * local data structures
149 */
150 static struct vm_map kernel_map_store;
151
152 /*
153 * uvm_km_init: init kernel maps and objects to reflect reality (i.e.
154 * KVM already allocated for text, data, bss, and static data structures).
155 *
156 * => KVM is defined by [base.. base + VM_KERNEL_SPACE_SIZE].
157 * we assume that [base -> start] has already been allocated and that
158 * "end" is the end of the kernel image span.
159 */
160 void
uvm_km_init(vaddr_t base,vaddr_t start,vaddr_t end)161 uvm_km_init(vaddr_t base, vaddr_t start, vaddr_t end)
162 {
163 /* kernel_object: for pageable anonymous kernel memory */
164 uao_init();
165 uvm.kernel_object = uao_create(VM_KERNEL_SPACE_SIZE, UAO_FLAG_KERNOBJ);
166
167 /*
168 * init the map and reserve already allocated kernel space
169 * before installing.
170 */
171
172 uvm_map_setup(&kernel_map_store, pmap_kernel(), base, end,
173 #ifdef KVA_GUARDPAGES
174 VM_MAP_PAGEABLE | VM_MAP_GUARDPAGES
175 #else
176 VM_MAP_PAGEABLE
177 #endif
178 );
179 if (base != start && uvm_map(&kernel_map_store, &base, start - base,
180 NULL, UVM_UNKNOWN_OFFSET, 0,
181 UVM_MAPFLAG(PROT_READ | PROT_WRITE, PROT_READ | PROT_WRITE,
182 MAP_INHERIT_NONE, MADV_RANDOM, UVM_FLAG_FIXED)) != 0)
183 panic("uvm_km_init: could not reserve space for kernel");
184
185 kernel_map = &kernel_map_store;
186
187 #ifndef __HAVE_PMAP_DIRECT
188 /* allow km_alloc calls before uvm_km_thread starts */
189 mtx_init(&uvm_km_pages.mtx, IPL_VM);
190 #endif
191 }
192
193 /*
194 * uvm_km_suballoc: allocate a submap in the kernel map. once a submap
195 * is allocated all references to that area of VM must go through it. this
196 * allows the locking of VAs in kernel_map to be broken up into regions.
197 *
198 * => if `fixed' is true, *min specifies where the region described
199 * by the submap must start
200 * => if submap is non NULL we use that as the submap, otherwise we
201 * alloc a new map
202 */
203 struct vm_map *
uvm_km_suballoc(struct vm_map * map,vaddr_t * min,vaddr_t * max,vsize_t size,int flags,boolean_t fixed,struct vm_map * submap)204 uvm_km_suballoc(struct vm_map *map, vaddr_t *min, vaddr_t *max, vsize_t size,
205 int flags, boolean_t fixed, struct vm_map *submap)
206 {
207 int mapflags = UVM_FLAG_NOMERGE | (fixed ? UVM_FLAG_FIXED : 0);
208
209 size = round_page(size); /* round up to pagesize */
210
211 /* first allocate a blank spot in the parent map */
212 if (uvm_map(map, min, size, NULL, UVM_UNKNOWN_OFFSET, 0,
213 UVM_MAPFLAG(PROT_READ | PROT_WRITE, PROT_READ | PROT_WRITE,
214 MAP_INHERIT_NONE, MADV_RANDOM, mapflags)) != 0) {
215 panic("uvm_km_suballoc: unable to allocate space in parent map");
216 }
217
218 /* set VM bounds (min is filled in by uvm_map) */
219 *max = *min + size;
220
221 /* add references to pmap and create or init the submap */
222 pmap_reference(vm_map_pmap(map));
223 if (submap == NULL) {
224 submap = uvm_map_create(vm_map_pmap(map), *min, *max, flags);
225 if (submap == NULL)
226 panic("uvm_km_suballoc: unable to create submap");
227 } else {
228 uvm_map_setup(submap, vm_map_pmap(map), *min, *max, flags);
229 }
230
231 /*
232 * now let uvm_map_submap plug in it...
233 */
234 if (uvm_map_submap(map, *min, *max, submap) != 0)
235 panic("uvm_km_suballoc: submap allocation failed");
236
237 return(submap);
238 }
239
240 /*
241 * uvm_km_pgremove: remove pages from a kernel uvm_object.
242 *
243 * => when you unmap a part of anonymous kernel memory you want to toss
244 * the pages right away. (this gets called from uvm_unmap_...).
245 */
246 void
uvm_km_pgremove(struct uvm_object * uobj,vaddr_t startva,vaddr_t endva)247 uvm_km_pgremove(struct uvm_object *uobj, vaddr_t startva, vaddr_t endva)
248 {
249 const voff_t start = startva - vm_map_min(kernel_map);
250 const voff_t end = endva - vm_map_min(kernel_map);
251 struct vm_page *pp;
252 voff_t curoff;
253 int slot;
254 int swpgonlydelta = 0;
255
256 KASSERT(UVM_OBJ_IS_AOBJ(uobj));
257 KASSERT(rw_write_held(uobj->vmobjlock));
258
259 pmap_remove(pmap_kernel(), startva, endva);
260 for (curoff = start ; curoff < end ; curoff += PAGE_SIZE) {
261 pp = uvm_pagelookup(uobj, curoff);
262 if (pp && pp->pg_flags & PG_BUSY) {
263 uvm_pagewait(pp, uobj->vmobjlock, "km_pgrm");
264 rw_enter(uobj->vmobjlock, RW_WRITE);
265 curoff -= PAGE_SIZE; /* loop back to us */
266 continue;
267 }
268
269 /* free the swap slot, then the page */
270 slot = uao_dropswap(uobj, curoff >> PAGE_SHIFT);
271
272 if (pp != NULL) {
273 uvm_lock_pageq();
274 uvm_pagefree(pp);
275 uvm_unlock_pageq();
276 } else if (slot != 0) {
277 swpgonlydelta++;
278 }
279 }
280
281 if (swpgonlydelta > 0) {
282 KASSERT(uvmexp.swpgonly >= swpgonlydelta);
283 atomic_add_int(&uvmexp.swpgonly, -swpgonlydelta);
284 }
285 }
286
287
288 /*
289 * uvm_km_pgremove_intrsafe: like uvm_km_pgremove(), but for "intrsafe"
290 * objects
291 *
292 * => when you unmap a part of anonymous kernel memory you want to toss
293 * the pages right away. (this gets called from uvm_unmap_...).
294 * => none of the pages will ever be busy, and none of them will ever
295 * be on the active or inactive queues (because these objects are
296 * never allowed to "page").
297 */
298 void
uvm_km_pgremove_intrsafe(vaddr_t start,vaddr_t end)299 uvm_km_pgremove_intrsafe(vaddr_t start, vaddr_t end)
300 {
301 struct vm_page *pg;
302 vaddr_t va;
303 paddr_t pa;
304
305 for (va = start; va < end; va += PAGE_SIZE) {
306 if (!pmap_extract(pmap_kernel(), va, &pa))
307 continue;
308 pg = PHYS_TO_VM_PAGE(pa);
309 if (pg == NULL)
310 panic("uvm_km_pgremove_intrsafe: no page");
311 uvm_pagefree(pg);
312 }
313 pmap_kremove(start, end - start);
314 }
315
316 /*
317 * uvm_km_kmemalloc: lower level kernel memory allocator for malloc()
318 *
319 * => we map wired memory into the specified map using the obj passed in
320 * => NOTE: we can return NULL even if we can wait if there is not enough
321 * free VM space in the map... caller should be prepared to handle
322 * this case.
323 * => we return KVA of memory allocated
324 * => flags: NOWAIT, VALLOC - just allocate VA, TRYLOCK - fail if we can't
325 * lock the map
326 * => low, high, alignment, boundary, nsegs are the corresponding parameters
327 * to uvm_pglistalloc
328 * => flags: ZERO - correspond to uvm_pglistalloc flags
329 */
330 vaddr_t
uvm_km_kmemalloc_pla(struct vm_map * map,struct uvm_object * obj,vsize_t size,vsize_t valign,int flags,paddr_t low,paddr_t high,paddr_t alignment,paddr_t boundary,int nsegs)331 uvm_km_kmemalloc_pla(struct vm_map *map, struct uvm_object *obj, vsize_t size,
332 vsize_t valign, int flags, paddr_t low, paddr_t high, paddr_t alignment,
333 paddr_t boundary, int nsegs)
334 {
335 vaddr_t kva, loopva;
336 voff_t offset;
337 struct vm_page *pg;
338 struct pglist pgl;
339 int pla_flags;
340
341 KASSERT(vm_map_pmap(map) == pmap_kernel());
342 /* UVM_KMF_VALLOC => !UVM_KMF_ZERO */
343 KASSERT(!(flags & UVM_KMF_VALLOC) ||
344 !(flags & UVM_KMF_ZERO));
345
346 /* setup for call */
347 size = round_page(size);
348 kva = vm_map_min(map); /* hint */
349 if (nsegs == 0)
350 nsegs = atop(size);
351
352 /* allocate some virtual space */
353 if (__predict_false(uvm_map(map, &kva, size, obj, UVM_UNKNOWN_OFFSET,
354 valign, UVM_MAPFLAG(PROT_READ | PROT_WRITE, PROT_READ | PROT_WRITE,
355 MAP_INHERIT_NONE, MADV_RANDOM, (flags & UVM_KMF_TRYLOCK))) != 0)) {
356 return 0;
357 }
358
359 /* if all we wanted was VA, return now */
360 if (flags & UVM_KMF_VALLOC) {
361 return kva;
362 }
363
364 /* recover object offset from virtual address */
365 if (obj != NULL)
366 offset = kva - vm_map_min(kernel_map);
367 else
368 offset = 0;
369
370 /*
371 * now allocate and map in the memory... note that we are the only ones
372 * whom should ever get a handle on this area of VM.
373 */
374 TAILQ_INIT(&pgl);
375 pla_flags = 0;
376 KASSERT(uvmexp.swpgonly <= uvmexp.swpages);
377 if ((flags & UVM_KMF_NOWAIT) ||
378 ((flags & UVM_KMF_CANFAIL) &&
379 uvmexp.swpages - uvmexp.swpgonly <= atop(size)))
380 pla_flags |= UVM_PLA_NOWAIT;
381 else
382 pla_flags |= UVM_PLA_WAITOK;
383 if (flags & UVM_KMF_ZERO)
384 pla_flags |= UVM_PLA_ZERO;
385 if (uvm_pglistalloc(size, low, high, alignment, boundary, &pgl, nsegs,
386 pla_flags) != 0) {
387 /* Failed. */
388 uvm_unmap(map, kva, kva + size);
389 return (0);
390 }
391
392 if (obj != NULL)
393 rw_enter(obj->vmobjlock, RW_WRITE);
394
395 loopva = kva;
396 while (loopva != kva + size) {
397 pg = TAILQ_FIRST(&pgl);
398 TAILQ_REMOVE(&pgl, pg, pageq);
399 uvm_pagealloc_pg(pg, obj, offset, NULL);
400 atomic_clearbits_int(&pg->pg_flags, PG_BUSY);
401 UVM_PAGE_OWN(pg, NULL);
402
403 /*
404 * map it in: note that we call pmap_enter with the map and
405 * object unlocked in case we are kmem_map.
406 */
407 if (obj == NULL) {
408 pmap_kenter_pa(loopva, VM_PAGE_TO_PHYS(pg),
409 PROT_READ | PROT_WRITE);
410 } else {
411 pmap_enter(map->pmap, loopva, VM_PAGE_TO_PHYS(pg),
412 PROT_READ | PROT_WRITE,
413 PROT_READ | PROT_WRITE | PMAP_WIRED);
414 }
415 loopva += PAGE_SIZE;
416 offset += PAGE_SIZE;
417 }
418 KASSERT(TAILQ_EMPTY(&pgl));
419 pmap_update(pmap_kernel());
420
421 if (obj != NULL)
422 rw_exit(obj->vmobjlock);
423
424 return kva;
425 }
426
427 /*
428 * uvm_km_free: free an area of kernel memory
429 */
430 void
uvm_km_free(struct vm_map * map,vaddr_t addr,vsize_t size)431 uvm_km_free(struct vm_map *map, vaddr_t addr, vsize_t size)
432 {
433 uvm_unmap(map, trunc_page(addr), round_page(addr+size));
434 }
435
436 /*
437 * uvm_km_alloc1: allocate wired down memory in the kernel map.
438 *
439 * => we can sleep if needed
440 */
441 vaddr_t
uvm_km_alloc1(struct vm_map * map,vsize_t size,vsize_t align,boolean_t zeroit)442 uvm_km_alloc1(struct vm_map *map, vsize_t size, vsize_t align, boolean_t zeroit)
443 {
444 vaddr_t kva, loopva;
445 voff_t offset;
446 struct vm_page *pg;
447
448 KASSERT(vm_map_pmap(map) == pmap_kernel());
449
450 size = round_page(size);
451 kva = vm_map_min(map); /* hint */
452
453 /* allocate some virtual space */
454 if (__predict_false(uvm_map(map, &kva, size, uvm.kernel_object,
455 UVM_UNKNOWN_OFFSET, align,
456 UVM_MAPFLAG(PROT_READ | PROT_WRITE,
457 PROT_READ | PROT_WRITE | PROT_EXEC,
458 MAP_INHERIT_NONE, MADV_RANDOM, 0)) != 0)) {
459 return 0;
460 }
461
462 /* recover object offset from virtual address */
463 offset = kva - vm_map_min(kernel_map);
464
465 /* now allocate the memory. we must be careful about released pages. */
466 loopva = kva;
467 while (size) {
468 rw_enter(uvm.kernel_object->vmobjlock, RW_WRITE);
469 /* allocate ram */
470 pg = uvm_pagealloc(uvm.kernel_object, offset, NULL, 0);
471 if (pg) {
472 atomic_clearbits_int(&pg->pg_flags, PG_BUSY);
473 UVM_PAGE_OWN(pg, NULL);
474 }
475 rw_exit(uvm.kernel_object->vmobjlock);
476 if (__predict_false(pg == NULL)) {
477 if (curproc == uvm.pagedaemon_proc) {
478 /*
479 * It is unfeasible for the page daemon to
480 * sleep for memory, so free what we have
481 * allocated and fail.
482 */
483 uvm_unmap(map, kva, loopva - kva);
484 return (0);
485 } else {
486 uvm_wait("km_alloc1w"); /* wait for memory */
487 continue;
488 }
489 }
490
491 /*
492 * map it in; note we're never called with an intrsafe
493 * object, so we always use regular old pmap_enter().
494 */
495 pmap_enter(map->pmap, loopva, VM_PAGE_TO_PHYS(pg),
496 PROT_READ | PROT_WRITE,
497 PROT_READ | PROT_WRITE | PMAP_WIRED);
498
499 loopva += PAGE_SIZE;
500 offset += PAGE_SIZE;
501 size -= PAGE_SIZE;
502 }
503 pmap_update(map->pmap);
504
505 /*
506 * zero on request (note that "size" is now zero due to the above loop
507 * so we need to subtract kva from loopva to reconstruct the size).
508 */
509 if (zeroit)
510 memset((caddr_t)kva, 0, loopva - kva);
511
512 return kva;
513 }
514
515 #if defined(__HAVE_PMAP_DIRECT)
516 /*
517 * uvm_km_page allocator, __HAVE_PMAP_DIRECT arch
518 * On architectures with machine memory direct mapped into a portion
519 * of KVM, we have very little work to do. Just get a physical page,
520 * and find and return its VA.
521 */
522 void
uvm_km_page_init(void)523 uvm_km_page_init(void)
524 {
525 /* nothing */
526 }
527
528 void
uvm_km_page_lateinit(void)529 uvm_km_page_lateinit(void)
530 {
531 /* nothing */
532 }
533
534 #else
535 /*
536 * uvm_km_page allocator, non __HAVE_PMAP_DIRECT archs
537 * This is a special allocator that uses a reserve of free pages
538 * to fulfill requests. It is fast and interrupt safe, but can only
539 * return page sized regions. Its primary use is as a backend for pool.
540 *
541 * The memory returned is allocated from the larger kernel_map, sparing
542 * pressure on the small interrupt-safe kmem_map. It is wired, but
543 * not zero filled.
544 */
545
546 struct uvm_km_pages uvm_km_pages;
547
548 void uvm_km_createthread(void *);
549 void uvm_km_thread(void *);
550 struct uvm_km_free_page *uvm_km_doputpage(struct uvm_km_free_page *);
551
552 /*
553 * Allocate the initial reserve, and create the thread which will
554 * keep the reserve full. For bootstrapping, we allocate more than
555 * the lowat amount, because it may be a while before the thread is
556 * running.
557 */
558 void
uvm_km_page_init(void)559 uvm_km_page_init(void)
560 {
561 int lowat_min;
562 int i;
563 int len, bulk;
564 vaddr_t addr;
565
566 if (!uvm_km_pages.lowat) {
567 /* based on physmem, calculate a good value here */
568 uvm_km_pages.lowat = physmem / 256;
569 lowat_min = physmem < atop(16 * 1024 * 1024) ? 32 : 128;
570 if (uvm_km_pages.lowat < lowat_min)
571 uvm_km_pages.lowat = lowat_min;
572 }
573 if (uvm_km_pages.lowat > UVM_KM_PAGES_LOWAT_MAX)
574 uvm_km_pages.lowat = UVM_KM_PAGES_LOWAT_MAX;
575 uvm_km_pages.hiwat = 4 * uvm_km_pages.lowat;
576 if (uvm_km_pages.hiwat > UVM_KM_PAGES_HIWAT_MAX)
577 uvm_km_pages.hiwat = UVM_KM_PAGES_HIWAT_MAX;
578
579 /* Allocate all pages in as few allocations as possible. */
580 len = 0;
581 bulk = uvm_km_pages.hiwat;
582 while (len < uvm_km_pages.hiwat && bulk > 0) {
583 bulk = MIN(bulk, uvm_km_pages.hiwat - len);
584 addr = vm_map_min(kernel_map);
585 if (uvm_map(kernel_map, &addr, (vsize_t)bulk << PAGE_SHIFT,
586 NULL, UVM_UNKNOWN_OFFSET, 0,
587 UVM_MAPFLAG(PROT_READ | PROT_WRITE,
588 PROT_READ | PROT_WRITE, MAP_INHERIT_NONE,
589 MADV_RANDOM, UVM_KMF_TRYLOCK)) != 0) {
590 bulk /= 2;
591 continue;
592 }
593
594 for (i = len; i < len + bulk; i++, addr += PAGE_SIZE)
595 uvm_km_pages.page[i] = addr;
596 len += bulk;
597 }
598
599 uvm_km_pages.free = len;
600 for (i = len; i < UVM_KM_PAGES_HIWAT_MAX; i++)
601 uvm_km_pages.page[i] = 0;
602
603 /* tone down if really high */
604 if (uvm_km_pages.lowat > 512)
605 uvm_km_pages.lowat = 512;
606 }
607
608 void
uvm_km_page_lateinit(void)609 uvm_km_page_lateinit(void)
610 {
611 kthread_create_deferred(uvm_km_createthread, NULL);
612 }
613
614 void
uvm_km_createthread(void * arg)615 uvm_km_createthread(void *arg)
616 {
617 kthread_create(uvm_km_thread, NULL, &uvm_km_pages.km_proc, "kmthread");
618 }
619
620 /*
621 * Endless loop. We grab pages in increments of 16 pages, then
622 * quickly swap them into the list.
623 */
624 void
uvm_km_thread(void * arg)625 uvm_km_thread(void *arg)
626 {
627 vaddr_t pg[16];
628 int i;
629 int allocmore = 0;
630 int flags;
631 struct uvm_km_free_page *fp = NULL;
632
633 KERNEL_UNLOCK();
634
635 for (;;) {
636 mtx_enter(&uvm_km_pages.mtx);
637 if (uvm_km_pages.free >= uvm_km_pages.lowat &&
638 uvm_km_pages.freelist == NULL) {
639 msleep_nsec(&uvm_km_pages.km_proc, &uvm_km_pages.mtx,
640 PVM, "kmalloc", INFSLP);
641 }
642 allocmore = uvm_km_pages.free < uvm_km_pages.lowat;
643 fp = uvm_km_pages.freelist;
644 uvm_km_pages.freelist = NULL;
645 uvm_km_pages.freelistlen = 0;
646 mtx_leave(&uvm_km_pages.mtx);
647
648 if (allocmore) {
649 /*
650 * If there was nothing on the freelist, then we
651 * must obtain at least one page to make progress.
652 * So, only use UVM_KMF_TRYLOCK for the first page
653 * if fp != NULL
654 */
655 flags = UVM_MAPFLAG(PROT_READ | PROT_WRITE,
656 PROT_READ | PROT_WRITE, MAP_INHERIT_NONE,
657 MADV_RANDOM, fp != NULL ? UVM_KMF_TRYLOCK : 0);
658 memset(pg, 0, sizeof(pg));
659 for (i = 0; i < nitems(pg); i++) {
660 pg[i] = vm_map_min(kernel_map);
661 if (uvm_map(kernel_map, &pg[i], PAGE_SIZE,
662 NULL, UVM_UNKNOWN_OFFSET, 0, flags) != 0) {
663 pg[i] = 0;
664 break;
665 }
666
667 /* made progress, so don't sleep for more */
668 flags = UVM_MAPFLAG(PROT_READ | PROT_WRITE,
669 PROT_READ | PROT_WRITE, MAP_INHERIT_NONE,
670 MADV_RANDOM, UVM_KMF_TRYLOCK);
671 }
672
673 mtx_enter(&uvm_km_pages.mtx);
674 for (i = 0; i < nitems(pg); i++) {
675 if (uvm_km_pages.free ==
676 nitems(uvm_km_pages.page))
677 break;
678 else if (pg[i] != 0)
679 uvm_km_pages.page[uvm_km_pages.free++]
680 = pg[i];
681 }
682 wakeup(&uvm_km_pages.free);
683 mtx_leave(&uvm_km_pages.mtx);
684
685 /* Cleanup left-over pages (if any). */
686 for (; i < nitems(pg); i++) {
687 if (pg[i] != 0) {
688 uvm_unmap(kernel_map,
689 pg[i], pg[i] + PAGE_SIZE);
690 }
691 }
692 }
693 while (fp) {
694 fp = uvm_km_doputpage(fp);
695 }
696 }
697 }
698
699 struct uvm_km_free_page *
uvm_km_doputpage(struct uvm_km_free_page * fp)700 uvm_km_doputpage(struct uvm_km_free_page *fp)
701 {
702 vaddr_t va = (vaddr_t)fp;
703 struct vm_page *pg;
704 int freeva = 1;
705 struct uvm_km_free_page *nextfp = fp->next;
706
707 pg = uvm_atopg(va);
708
709 pmap_kremove(va, PAGE_SIZE);
710 pmap_update(kernel_map->pmap);
711
712 mtx_enter(&uvm_km_pages.mtx);
713 if (uvm_km_pages.free < uvm_km_pages.hiwat) {
714 uvm_km_pages.page[uvm_km_pages.free++] = va;
715 freeva = 0;
716 }
717 mtx_leave(&uvm_km_pages.mtx);
718
719 if (freeva)
720 uvm_unmap(kernel_map, va, va + PAGE_SIZE);
721
722 uvm_pagefree(pg);
723 return (nextfp);
724 }
725 #endif /* !__HAVE_PMAP_DIRECT */
726
727 void *
km_alloc(size_t sz,const struct kmem_va_mode * kv,const struct kmem_pa_mode * kp,const struct kmem_dyn_mode * kd)728 km_alloc(size_t sz, const struct kmem_va_mode *kv,
729 const struct kmem_pa_mode *kp, const struct kmem_dyn_mode *kd)
730 {
731 struct vm_map *map;
732 struct vm_page *pg;
733 struct pglist pgl;
734 int mapflags = 0;
735 vm_prot_t prot;
736 paddr_t pla_align;
737 int pla_flags;
738 int pla_maxseg;
739 vaddr_t va, sva = 0;
740
741 KASSERT(sz == round_page(sz));
742
743 TAILQ_INIT(&pgl);
744
745 if (kp->kp_nomem || kp->kp_pageable)
746 goto alloc_va;
747
748 pla_flags = kd->kd_waitok ? UVM_PLA_WAITOK : UVM_PLA_NOWAIT;
749 pla_flags |= UVM_PLA_TRYCONTIG;
750 if (kp->kp_zero)
751 pla_flags |= UVM_PLA_ZERO;
752
753 pla_align = kp->kp_align;
754 #ifdef __HAVE_PMAP_DIRECT
755 if (pla_align < kv->kv_align)
756 pla_align = kv->kv_align;
757 #endif
758 pla_maxseg = kp->kp_maxseg;
759 if (pla_maxseg == 0)
760 pla_maxseg = sz / PAGE_SIZE;
761
762 if (uvm_pglistalloc(sz, kp->kp_constraint->ucr_low,
763 kp->kp_constraint->ucr_high, pla_align, kp->kp_boundary,
764 &pgl, pla_maxseg, pla_flags)) {
765 return (NULL);
766 }
767
768 #ifdef __HAVE_PMAP_DIRECT
769 /*
770 * Only use direct mappings for single page or single segment
771 * allocations.
772 */
773 if (kv->kv_singlepage || kp->kp_maxseg == 1) {
774 TAILQ_FOREACH(pg, &pgl, pageq) {
775 va = pmap_map_direct(pg);
776 if (pg == TAILQ_FIRST(&pgl))
777 sva = va;
778 }
779 return ((void *)sva);
780 }
781 #endif
782 alloc_va:
783 prot = PROT_READ | PROT_WRITE;
784
785 if (kp->kp_pageable) {
786 KASSERT(kp->kp_object);
787 KASSERT(!kv->kv_singlepage);
788 } else {
789 KASSERT(kp->kp_object == NULL);
790 }
791
792 if (kv->kv_singlepage) {
793 KASSERT(sz == PAGE_SIZE);
794 #ifdef __HAVE_PMAP_DIRECT
795 panic("km_alloc: DIRECT single page");
796 #else
797 mtx_enter(&uvm_km_pages.mtx);
798 while (uvm_km_pages.free == 0) {
799 if (kd->kd_waitok == 0) {
800 mtx_leave(&uvm_km_pages.mtx);
801 uvm_pglistfree(&pgl);
802 return NULL;
803 }
804 msleep_nsec(&uvm_km_pages.free, &uvm_km_pages.mtx,
805 PVM, "getpage", INFSLP);
806 }
807 va = uvm_km_pages.page[--uvm_km_pages.free];
808 if (uvm_km_pages.free < uvm_km_pages.lowat &&
809 curproc != uvm_km_pages.km_proc) {
810 if (kd->kd_slowdown)
811 *kd->kd_slowdown = 1;
812 wakeup(&uvm_km_pages.km_proc);
813 }
814 mtx_leave(&uvm_km_pages.mtx);
815 #endif
816 } else {
817 struct uvm_object *uobj = NULL;
818
819 if (kd->kd_trylock)
820 mapflags |= UVM_KMF_TRYLOCK;
821
822 if (kp->kp_object)
823 uobj = *kp->kp_object;
824 try_map:
825 map = *kv->kv_map;
826 va = vm_map_min(map);
827 if (uvm_map(map, &va, sz, uobj, kd->kd_prefer,
828 kv->kv_align, UVM_MAPFLAG(prot, prot, MAP_INHERIT_NONE,
829 MADV_RANDOM, mapflags))) {
830 if (kv->kv_wait && kd->kd_waitok) {
831 tsleep_nsec(map, PVM, "km_allocva", INFSLP);
832 goto try_map;
833 }
834 uvm_pglistfree(&pgl);
835 return (NULL);
836 }
837 }
838 sva = va;
839 TAILQ_FOREACH(pg, &pgl, pageq) {
840 if (kp->kp_pageable)
841 pmap_enter(pmap_kernel(), va, VM_PAGE_TO_PHYS(pg),
842 prot, prot | PMAP_WIRED);
843 else
844 pmap_kenter_pa(va, VM_PAGE_TO_PHYS(pg), prot);
845 va += PAGE_SIZE;
846 }
847 pmap_update(pmap_kernel());
848 return ((void *)sva);
849 }
850
851 void
km_free(void * v,size_t sz,const struct kmem_va_mode * kv,const struct kmem_pa_mode * kp)852 km_free(void *v, size_t sz, const struct kmem_va_mode *kv,
853 const struct kmem_pa_mode *kp)
854 {
855 vaddr_t sva, eva, va;
856 struct vm_page *pg;
857 struct pglist pgl;
858
859 sva = (vaddr_t)v;
860 eva = sva + sz;
861
862 if (kp->kp_nomem)
863 goto free_va;
864
865 #ifdef __HAVE_PMAP_DIRECT
866 if (kv->kv_singlepage || kp->kp_maxseg == 1) {
867 TAILQ_INIT(&pgl);
868 for (va = sva; va < eva; va += PAGE_SIZE) {
869 pg = pmap_unmap_direct(va);
870 TAILQ_INSERT_TAIL(&pgl, pg, pageq);
871 }
872 uvm_pglistfree(&pgl);
873 return;
874 }
875 #else
876 if (kv->kv_singlepage) {
877 struct uvm_km_free_page *fp = v;
878
879 mtx_enter(&uvm_km_pages.mtx);
880 fp->next = uvm_km_pages.freelist;
881 uvm_km_pages.freelist = fp;
882 if (uvm_km_pages.freelistlen++ > 16)
883 wakeup(&uvm_km_pages.km_proc);
884 mtx_leave(&uvm_km_pages.mtx);
885 return;
886 }
887 #endif
888
889 if (kp->kp_pageable) {
890 pmap_remove(pmap_kernel(), sva, eva);
891 pmap_update(pmap_kernel());
892 } else {
893 TAILQ_INIT(&pgl);
894 for (va = sva; va < eva; va += PAGE_SIZE) {
895 paddr_t pa;
896
897 if (!pmap_extract(pmap_kernel(), va, &pa))
898 continue;
899
900 pg = PHYS_TO_VM_PAGE(pa);
901 if (pg == NULL) {
902 panic("km_free: unmanaged page 0x%lx", pa);
903 }
904 TAILQ_INSERT_TAIL(&pgl, pg, pageq);
905 }
906 pmap_kremove(sva, sz);
907 pmap_update(pmap_kernel());
908 uvm_pglistfree(&pgl);
909 }
910 free_va:
911 uvm_unmap(*kv->kv_map, sva, eva);
912 if (kv->kv_wait)
913 wakeup(*kv->kv_map);
914 }
915
916 const struct kmem_va_mode kv_any = {
917 .kv_map = &kernel_map,
918 };
919
920 const struct kmem_va_mode kv_intrsafe = {
921 .kv_map = &kmem_map,
922 };
923
924 const struct kmem_va_mode kv_page = {
925 .kv_singlepage = 1
926 };
927
928 const struct kmem_pa_mode kp_dirty = {
929 .kp_constraint = &no_constraint
930 };
931
932 const struct kmem_pa_mode kp_dma = {
933 .kp_constraint = &dma_constraint
934 };
935
936 const struct kmem_pa_mode kp_dma_contig = {
937 .kp_constraint = &dma_constraint,
938 .kp_maxseg = 1
939 };
940
941 const struct kmem_pa_mode kp_dma_zero = {
942 .kp_constraint = &dma_constraint,
943 .kp_zero = 1
944 };
945
946 const struct kmem_pa_mode kp_zero = {
947 .kp_constraint = &no_constraint,
948 .kp_zero = 1
949 };
950
951 const struct kmem_pa_mode kp_pageable = {
952 .kp_object = &uvm.kernel_object,
953 .kp_pageable = 1
954 /* XXX - kp_nomem, maybe, but we'll need to fix km_free. */
955 };
956
957 const struct kmem_pa_mode kp_none = {
958 .kp_nomem = 1
959 };
960
961 const struct kmem_dyn_mode kd_waitok = {
962 .kd_waitok = 1,
963 .kd_prefer = UVM_UNKNOWN_OFFSET
964 };
965
966 const struct kmem_dyn_mode kd_nowait = {
967 .kd_prefer = UVM_UNKNOWN_OFFSET
968 };
969
970 const struct kmem_dyn_mode kd_trylock = {
971 .kd_trylock = 1,
972 .kd_prefer = UVM_UNKNOWN_OFFSET
973 };
974