1 /* $OpenBSD: uvm_km.c,v 1.137 2020/05/23 06:15:09 jan Exp $ */ 2 /* $NetBSD: uvm_km.c,v 1.42 2001/01/14 02:10:01 thorpej Exp $ */ 3 4 /* 5 * Copyright (c) 1997 Charles D. Cranor and Washington University. 6 * Copyright (c) 1991, 1993, The Regents of the University of California. 7 * 8 * All rights reserved. 9 * 10 * This code is derived from software contributed to Berkeley by 11 * The Mach Operating System project at Carnegie-Mellon University. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 3. Neither the name of the University nor the names of its contributors 22 * may be used to endorse or promote products derived from this software 23 * without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 * 37 * @(#)vm_kern.c 8.3 (Berkeley) 1/12/94 38 * from: Id: uvm_km.c,v 1.1.2.14 1998/02/06 05:19:27 chs Exp 39 * 40 * 41 * Copyright (c) 1987, 1990 Carnegie-Mellon University. 42 * All rights reserved. 43 * 44 * Permission to use, copy, modify and distribute this software and 45 * its documentation is hereby granted, provided that both the copyright 46 * notice and this permission notice appear in all copies of the 47 * software, derivative works or modified versions, and any portions 48 * thereof, and that both notices appear in supporting documentation. 49 * 50 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 51 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 52 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 53 * 54 * Carnegie Mellon requests users of this software to return to 55 * 56 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 57 * School of Computer Science 58 * Carnegie Mellon University 59 * Pittsburgh PA 15213-3890 60 * 61 * any improvements or extensions that they make and grant Carnegie the 62 * rights to redistribute these changes. 63 */ 64 65 /* 66 * uvm_km.c: handle kernel memory allocation and management 67 */ 68 69 /* 70 * overview of kernel memory management: 71 * 72 * the kernel virtual address space is mapped by "kernel_map." kernel_map 73 * starts at a machine-dependent address and is VM_KERNEL_SPACE_SIZE bytes 74 * large. 75 * 76 * the kernel_map has several "submaps." submaps can only appear in 77 * the kernel_map (user processes can't use them). submaps "take over" 78 * the management of a sub-range of the kernel's address space. submaps 79 * are typically allocated at boot time and are never released. kernel 80 * virtual address space that is mapped by a submap is locked by the 81 * submap's lock -- not the kernel_map's lock. 82 * 83 * thus, the useful feature of submaps is that they allow us to break 84 * up the locking and protection of the kernel address space into smaller 85 * chunks. 86 * 87 * The VM system has several standard kernel submaps: 88 * kmem_map: Contains only wired kernel memory for malloc(9). 89 * Note: All access to this map must be protected by splvm as 90 * calls to malloc(9) are allowed in interrupt handlers. 91 * exec_map: Memory to hold arguments to system calls are allocated from 92 * this map. 93 * XXX: This is primeraly used to artificially limit the number 94 * of concurrent processes doing an exec. 95 * phys_map: Buffers for vmapbuf (physio) are allocated from this map. 96 * 97 * the kernel allocates its private memory out of special uvm_objects whose 98 * reference count is set to UVM_OBJ_KERN (thus indicating that the objects 99 * are "special" and never die). all kernel objects should be thought of 100 * as large, fixed-sized, sparsely populated uvm_objects. each kernel 101 * object is equal to the size of kernel virtual address space (i.e. 102 * VM_KERNEL_SPACE_SIZE). 103 * 104 * most kernel private memory lives in kernel_object. the only exception 105 * to this is for memory that belongs to submaps that must be protected 106 * by splvm(). each of these submaps manages their own pages. 107 * 108 * note that just because a kernel object spans the entire kernel virtual 109 * address space doesn't mean that it has to be mapped into the entire space. 110 * large chunks of a kernel object's space go unused either because 111 * that area of kernel VM is unmapped, or there is some other type of 112 * object mapped into that range (e.g. a vnode). for submap's kernel 113 * objects, the only part of the object that can ever be populated is the 114 * offsets that are managed by the submap. 115 * 116 * note that the "offset" in a kernel object is always the kernel virtual 117 * address minus the vm_map_min(kernel_map). 118 * example: 119 * suppose kernel_map starts at 0xf8000000 and the kernel does a 120 * uvm_km_alloc(kernel_map, PAGE_SIZE) [allocate 1 wired down page in the 121 * kernel map]. if uvm_km_alloc returns virtual address 0xf8235000, 122 * then that means that the page at offset 0x235000 in kernel_object is 123 * mapped at 0xf8235000. 124 * 125 * kernel objects have one other special property: when the kernel virtual 126 * memory mapping them is unmapped, the backing memory in the object is 127 * freed right away. this is done with the uvm_km_pgremove() function. 128 * this has to be done because there is no backing store for kernel pages 129 * and no need to save them after they are no longer referenced. 130 */ 131 132 #include <sys/param.h> 133 #include <sys/systm.h> 134 #include <sys/proc.h> 135 #include <sys/kthread.h> 136 #include <uvm/uvm.h> 137 138 /* 139 * global data structures 140 */ 141 142 struct vm_map *kernel_map = NULL; 143 144 /* Unconstraint range. */ 145 struct uvm_constraint_range no_constraint = { 0x0, (paddr_t)-1 }; 146 147 /* 148 * local data structues 149 */ 150 static struct vm_map kernel_map_store; 151 152 /* 153 * uvm_km_init: init kernel maps and objects to reflect reality (i.e. 154 * KVM already allocated for text, data, bss, and static data structures). 155 * 156 * => KVM is defined by [base.. base + VM_KERNEL_SPACE_SIZE]. 157 * we assume that [base -> start] has already been allocated and that 158 * "end" is the end of the kernel image span. 159 */ 160 void 161 uvm_km_init(vaddr_t base, vaddr_t start, vaddr_t end) 162 { 163 /* kernel_object: for pageable anonymous kernel memory */ 164 uao_init(); 165 uvm.kernel_object = uao_create(VM_KERNEL_SPACE_SIZE, UAO_FLAG_KERNOBJ); 166 167 /* 168 * init the map and reserve already allocated kernel space 169 * before installing. 170 */ 171 172 uvm_map_setup(&kernel_map_store, pmap_kernel(), base, end, 173 #ifdef KVA_GUARDPAGES 174 VM_MAP_PAGEABLE | VM_MAP_GUARDPAGES 175 #else 176 VM_MAP_PAGEABLE 177 #endif 178 ); 179 if (base != start && uvm_map(&kernel_map_store, &base, start - base, 180 NULL, UVM_UNKNOWN_OFFSET, 0, 181 UVM_MAPFLAG(PROT_READ | PROT_WRITE, PROT_READ | PROT_WRITE, 182 MAP_INHERIT_NONE, MADV_RANDOM, UVM_FLAG_FIXED)) != 0) 183 panic("uvm_km_init: could not reserve space for kernel"); 184 185 kernel_map = &kernel_map_store; 186 } 187 188 /* 189 * uvm_km_suballoc: allocate a submap in the kernel map. once a submap 190 * is allocated all references to that area of VM must go through it. this 191 * allows the locking of VAs in kernel_map to be broken up into regions. 192 * 193 * => if `fixed' is true, *min specifies where the region described 194 * by the submap must start 195 * => if submap is non NULL we use that as the submap, otherwise we 196 * alloc a new map 197 */ 198 struct vm_map * 199 uvm_km_suballoc(struct vm_map *map, vaddr_t *min, vaddr_t *max, vsize_t size, 200 int flags, boolean_t fixed, struct vm_map *submap) 201 { 202 int mapflags = UVM_FLAG_NOMERGE | (fixed ? UVM_FLAG_FIXED : 0); 203 204 size = round_page(size); /* round up to pagesize */ 205 206 /* first allocate a blank spot in the parent map */ 207 if (uvm_map(map, min, size, NULL, UVM_UNKNOWN_OFFSET, 0, 208 UVM_MAPFLAG(PROT_READ | PROT_WRITE, PROT_READ | PROT_WRITE, 209 MAP_INHERIT_NONE, MADV_RANDOM, mapflags)) != 0) { 210 panic("uvm_km_suballoc: unable to allocate space in parent map"); 211 } 212 213 /* set VM bounds (min is filled in by uvm_map) */ 214 *max = *min + size; 215 216 /* add references to pmap and create or init the submap */ 217 pmap_reference(vm_map_pmap(map)); 218 if (submap == NULL) { 219 submap = uvm_map_create(vm_map_pmap(map), *min, *max, flags); 220 if (submap == NULL) 221 panic("uvm_km_suballoc: unable to create submap"); 222 } else { 223 uvm_map_setup(submap, vm_map_pmap(map), *min, *max, flags); 224 } 225 226 /* now let uvm_map_submap plug in it... */ 227 if (uvm_map_submap(map, *min, *max, submap) != 0) 228 panic("uvm_km_suballoc: submap allocation failed"); 229 230 return(submap); 231 } 232 233 /* 234 * uvm_km_pgremove: remove pages from a kernel uvm_object. 235 * 236 * => when you unmap a part of anonymous kernel memory you want to toss 237 * the pages right away. (this gets called from uvm_unmap_...). 238 */ 239 void 240 uvm_km_pgremove(struct uvm_object *uobj, vaddr_t start, vaddr_t end) 241 { 242 struct vm_page *pp; 243 voff_t curoff; 244 int slot; 245 246 KASSERT(uobj->pgops == &aobj_pager); 247 248 for (curoff = start ; curoff < end ; curoff += PAGE_SIZE) { 249 pp = uvm_pagelookup(uobj, curoff); 250 if (pp && pp->pg_flags & PG_BUSY) { 251 atomic_setbits_int(&pp->pg_flags, PG_WANTED); 252 tsleep_nsec(pp, PVM, "km_pgrm", INFSLP); 253 curoff -= PAGE_SIZE; /* loop back to us */ 254 continue; 255 } 256 257 /* free the swap slot, then the page */ 258 slot = uao_dropswap(uobj, curoff >> PAGE_SHIFT); 259 260 if (pp != NULL) { 261 uvm_lock_pageq(); 262 uvm_pagefree(pp); 263 uvm_unlock_pageq(); 264 } else if (slot != 0) { 265 uvmexp.swpgonly--; 266 } 267 } 268 } 269 270 271 /* 272 * uvm_km_pgremove_intrsafe: like uvm_km_pgremove(), but for "intrsafe" 273 * objects 274 * 275 * => when you unmap a part of anonymous kernel memory you want to toss 276 * the pages right away. (this gets called from uvm_unmap_...). 277 * => none of the pages will ever be busy, and none of them will ever 278 * be on the active or inactive queues (because these objects are 279 * never allowed to "page"). 280 */ 281 void 282 uvm_km_pgremove_intrsafe(vaddr_t start, vaddr_t end) 283 { 284 struct vm_page *pg; 285 vaddr_t va; 286 paddr_t pa; 287 288 for (va = start; va < end; va += PAGE_SIZE) { 289 if (!pmap_extract(pmap_kernel(), va, &pa)) 290 continue; 291 pg = PHYS_TO_VM_PAGE(pa); 292 if (pg == NULL) 293 panic("uvm_km_pgremove_intrsafe: no page"); 294 uvm_pagefree(pg); 295 } 296 } 297 298 /* 299 * uvm_km_kmemalloc: lower level kernel memory allocator for malloc() 300 * 301 * => we map wired memory into the specified map using the obj passed in 302 * => NOTE: we can return NULL even if we can wait if there is not enough 303 * free VM space in the map... caller should be prepared to handle 304 * this case. 305 * => we return KVA of memory allocated 306 * => flags: NOWAIT, VALLOC - just allocate VA, TRYLOCK - fail if we can't 307 * lock the map 308 * => low, high, alignment, boundary, nsegs are the corresponding parameters 309 * to uvm_pglistalloc 310 * => flags: ZERO - correspond to uvm_pglistalloc flags 311 */ 312 vaddr_t 313 uvm_km_kmemalloc_pla(struct vm_map *map, struct uvm_object *obj, vsize_t size, 314 vsize_t valign, int flags, paddr_t low, paddr_t high, paddr_t alignment, 315 paddr_t boundary, int nsegs) 316 { 317 vaddr_t kva, loopva; 318 voff_t offset; 319 struct vm_page *pg; 320 struct pglist pgl; 321 int pla_flags; 322 323 KASSERT(vm_map_pmap(map) == pmap_kernel()); 324 /* UVM_KMF_VALLOC => !UVM_KMF_ZERO */ 325 KASSERT(!(flags & UVM_KMF_VALLOC) || 326 !(flags & UVM_KMF_ZERO)); 327 328 /* setup for call */ 329 size = round_page(size); 330 kva = vm_map_min(map); /* hint */ 331 if (nsegs == 0) 332 nsegs = atop(size); 333 334 /* allocate some virtual space */ 335 if (__predict_false(uvm_map(map, &kva, size, obj, UVM_UNKNOWN_OFFSET, 336 valign, UVM_MAPFLAG(PROT_READ | PROT_WRITE, PROT_READ | PROT_WRITE, 337 MAP_INHERIT_NONE, MADV_RANDOM, (flags & UVM_KMF_TRYLOCK))) != 0)) { 338 return(0); 339 } 340 341 /* if all we wanted was VA, return now */ 342 if (flags & UVM_KMF_VALLOC) { 343 return(kva); 344 } 345 346 /* recover object offset from virtual address */ 347 if (obj != NULL) 348 offset = kva - vm_map_min(kernel_map); 349 else 350 offset = 0; 351 352 /* 353 * now allocate and map in the memory... note that we are the only ones 354 * whom should ever get a handle on this area of VM. 355 */ 356 TAILQ_INIT(&pgl); 357 pla_flags = 0; 358 KASSERT(uvmexp.swpgonly <= uvmexp.swpages); 359 if ((flags & UVM_KMF_NOWAIT) || 360 ((flags & UVM_KMF_CANFAIL) && 361 uvmexp.swpages - uvmexp.swpgonly <= atop(size))) 362 pla_flags |= UVM_PLA_NOWAIT; 363 else 364 pla_flags |= UVM_PLA_WAITOK; 365 if (flags & UVM_KMF_ZERO) 366 pla_flags |= UVM_PLA_ZERO; 367 if (uvm_pglistalloc(size, low, high, alignment, boundary, &pgl, nsegs, 368 pla_flags) != 0) { 369 /* Failed. */ 370 uvm_unmap(map, kva, kva + size); 371 return (0); 372 } 373 374 loopva = kva; 375 while (loopva != kva + size) { 376 pg = TAILQ_FIRST(&pgl); 377 TAILQ_REMOVE(&pgl, pg, pageq); 378 uvm_pagealloc_pg(pg, obj, offset, NULL); 379 atomic_clearbits_int(&pg->pg_flags, PG_BUSY); 380 UVM_PAGE_OWN(pg, NULL); 381 382 /* 383 * map it in: note that we call pmap_enter with the map and 384 * object unlocked in case we are kmem_map. 385 */ 386 if (obj == NULL) { 387 pmap_kenter_pa(loopva, VM_PAGE_TO_PHYS(pg), 388 PROT_READ | PROT_WRITE); 389 } else { 390 pmap_enter(map->pmap, loopva, VM_PAGE_TO_PHYS(pg), 391 PROT_READ | PROT_WRITE, 392 PROT_READ | PROT_WRITE | PMAP_WIRED); 393 } 394 loopva += PAGE_SIZE; 395 offset += PAGE_SIZE; 396 } 397 KASSERT(TAILQ_EMPTY(&pgl)); 398 pmap_update(pmap_kernel()); 399 400 return(kva); 401 } 402 403 /* 404 * uvm_km_free: free an area of kernel memory 405 */ 406 void 407 uvm_km_free(struct vm_map *map, vaddr_t addr, vsize_t size) 408 { 409 uvm_unmap(map, trunc_page(addr), round_page(addr+size)); 410 } 411 412 /* 413 * uvm_km_free_wakeup: free an area of kernel memory and wake up 414 * anyone waiting for vm space. 415 * 416 * => XXX: "wanted" bit + unlock&wait on other end? 417 */ 418 void 419 uvm_km_free_wakeup(struct vm_map *map, vaddr_t addr, vsize_t size) 420 { 421 struct uvm_map_deadq dead_entries; 422 423 vm_map_lock(map); 424 TAILQ_INIT(&dead_entries); 425 uvm_unmap_remove(map, trunc_page(addr), round_page(addr+size), 426 &dead_entries, FALSE, TRUE); 427 wakeup(map); 428 vm_map_unlock(map); 429 430 uvm_unmap_detach(&dead_entries, 0); 431 } 432 433 /* 434 * uvm_km_alloc1: allocate wired down memory in the kernel map. 435 * 436 * => we can sleep if needed 437 */ 438 vaddr_t 439 uvm_km_alloc1(struct vm_map *map, vsize_t size, vsize_t align, boolean_t zeroit) 440 { 441 vaddr_t kva, loopva; 442 voff_t offset; 443 struct vm_page *pg; 444 445 KASSERT(vm_map_pmap(map) == pmap_kernel()); 446 447 size = round_page(size); 448 kva = vm_map_min(map); /* hint */ 449 450 /* allocate some virtual space */ 451 if (__predict_false(uvm_map(map, &kva, size, uvm.kernel_object, 452 UVM_UNKNOWN_OFFSET, align, 453 UVM_MAPFLAG(PROT_READ | PROT_WRITE, 454 PROT_READ | PROT_WRITE | PROT_EXEC, 455 MAP_INHERIT_NONE, MADV_RANDOM, 0)) != 0)) { 456 return(0); 457 } 458 459 /* recover object offset from virtual address */ 460 offset = kva - vm_map_min(kernel_map); 461 462 /* now allocate the memory. we must be careful about released pages. */ 463 loopva = kva; 464 while (size) { 465 /* allocate ram */ 466 pg = uvm_pagealloc(uvm.kernel_object, offset, NULL, 0); 467 if (pg) { 468 atomic_clearbits_int(&pg->pg_flags, PG_BUSY); 469 UVM_PAGE_OWN(pg, NULL); 470 } 471 if (__predict_false(pg == NULL)) { 472 if (curproc == uvm.pagedaemon_proc) { 473 /* 474 * It is unfeasible for the page daemon to 475 * sleep for memory, so free what we have 476 * allocated and fail. 477 */ 478 uvm_unmap(map, kva, loopva - kva); 479 return (0); 480 } else { 481 uvm_wait("km_alloc1w"); /* wait for memory */ 482 continue; 483 } 484 } 485 486 /* 487 * map it in; note we're never called with an intrsafe 488 * object, so we always use regular old pmap_enter(). 489 */ 490 pmap_enter(map->pmap, loopva, VM_PAGE_TO_PHYS(pg), 491 PROT_READ | PROT_WRITE, 492 PROT_READ | PROT_WRITE | PMAP_WIRED); 493 494 loopva += PAGE_SIZE; 495 offset += PAGE_SIZE; 496 size -= PAGE_SIZE; 497 } 498 pmap_update(map->pmap); 499 500 /* 501 * zero on request (note that "size" is now zero due to the above loop 502 * so we need to subtract kva from loopva to reconstruct the size). 503 */ 504 if (zeroit) 505 memset((caddr_t)kva, 0, loopva - kva); 506 507 return(kva); 508 } 509 510 /* 511 * uvm_km_valloc: allocate zero-fill memory in the kernel's address space 512 * 513 * => memory is not allocated until fault time 514 */ 515 516 vaddr_t 517 uvm_km_valloc(struct vm_map *map, vsize_t size) 518 { 519 return(uvm_km_valloc_align(map, size, 0, 0)); 520 } 521 522 vaddr_t 523 uvm_km_valloc_try(struct vm_map *map, vsize_t size) 524 { 525 return(uvm_km_valloc_align(map, size, 0, UVM_FLAG_TRYLOCK)); 526 } 527 528 vaddr_t 529 uvm_km_valloc_align(struct vm_map *map, vsize_t size, vsize_t align, int flags) 530 { 531 vaddr_t kva; 532 533 KASSERT(vm_map_pmap(map) == pmap_kernel()); 534 535 size = round_page(size); 536 kva = vm_map_min(map); /* hint */ 537 538 /* allocate some virtual space, demand filled by kernel_object. */ 539 540 if (__predict_false(uvm_map(map, &kva, size, uvm.kernel_object, 541 UVM_UNKNOWN_OFFSET, align, 542 UVM_MAPFLAG(PROT_READ | PROT_WRITE, PROT_READ | PROT_WRITE, 543 MAP_INHERIT_NONE, MADV_RANDOM, flags)) != 0)) { 544 return(0); 545 } 546 547 return(kva); 548 } 549 550 /* 551 * uvm_km_valloc_wait: allocate zero-fill memory in the kernel's address space 552 * 553 * => memory is not allocated until fault time 554 * => if no room in map, wait for space to free, unless requested size 555 * is larger than map (in which case we return 0) 556 */ 557 vaddr_t 558 uvm_km_valloc_prefer_wait(struct vm_map *map, vsize_t size, voff_t prefer) 559 { 560 vaddr_t kva; 561 562 KASSERT(vm_map_pmap(map) == pmap_kernel()); 563 564 size = round_page(size); 565 if (size > vm_map_max(map) - vm_map_min(map)) 566 return(0); 567 568 while (1) { 569 kva = vm_map_min(map); /* hint */ 570 571 /* 572 * allocate some virtual space. will be demand filled 573 * by kernel_object. 574 */ 575 if (__predict_true(uvm_map(map, &kva, size, uvm.kernel_object, 576 prefer, 0, 577 UVM_MAPFLAG(PROT_READ | PROT_WRITE, PROT_READ | PROT_WRITE, 578 MAP_INHERIT_NONE, MADV_RANDOM, 0)) == 0)) { 579 return(kva); 580 } 581 582 /* failed. sleep for a while (on map) */ 583 tsleep_nsec(map, PVM, "vallocwait", INFSLP); 584 } 585 /*NOTREACHED*/ 586 } 587 588 vaddr_t 589 uvm_km_valloc_wait(struct vm_map *map, vsize_t size) 590 { 591 return uvm_km_valloc_prefer_wait(map, size, UVM_UNKNOWN_OFFSET); 592 } 593 594 #if defined(__HAVE_PMAP_DIRECT) 595 /* 596 * uvm_km_page allocator, __HAVE_PMAP_DIRECT arch 597 * On architectures with machine memory direct mapped into a portion 598 * of KVM, we have very little work to do. Just get a physical page, 599 * and find and return its VA. 600 */ 601 void 602 uvm_km_page_init(void) 603 { 604 /* nothing */ 605 } 606 607 void 608 uvm_km_page_lateinit(void) 609 { 610 /* nothing */ 611 } 612 613 #else 614 /* 615 * uvm_km_page allocator, non __HAVE_PMAP_DIRECT archs 616 * This is a special allocator that uses a reserve of free pages 617 * to fulfill requests. It is fast and interrupt safe, but can only 618 * return page sized regions. Its primary use is as a backend for pool. 619 * 620 * The memory returned is allocated from the larger kernel_map, sparing 621 * pressure on the small interrupt-safe kmem_map. It is wired, but 622 * not zero filled. 623 */ 624 625 struct uvm_km_pages uvm_km_pages; 626 627 void uvm_km_createthread(void *); 628 void uvm_km_thread(void *); 629 struct uvm_km_free_page *uvm_km_doputpage(struct uvm_km_free_page *); 630 631 /* 632 * Allocate the initial reserve, and create the thread which will 633 * keep the reserve full. For bootstrapping, we allocate more than 634 * the lowat amount, because it may be a while before the thread is 635 * running. 636 */ 637 void 638 uvm_km_page_init(void) 639 { 640 int lowat_min; 641 int i; 642 int len, bulk; 643 vaddr_t addr; 644 645 mtx_init(&uvm_km_pages.mtx, IPL_VM); 646 if (!uvm_km_pages.lowat) { 647 /* based on physmem, calculate a good value here */ 648 uvm_km_pages.lowat = physmem / 256; 649 lowat_min = physmem < atop(16 * 1024 * 1024) ? 32 : 128; 650 if (uvm_km_pages.lowat < lowat_min) 651 uvm_km_pages.lowat = lowat_min; 652 } 653 if (uvm_km_pages.lowat > UVM_KM_PAGES_LOWAT_MAX) 654 uvm_km_pages.lowat = UVM_KM_PAGES_LOWAT_MAX; 655 uvm_km_pages.hiwat = 4 * uvm_km_pages.lowat; 656 if (uvm_km_pages.hiwat > UVM_KM_PAGES_HIWAT_MAX) 657 uvm_km_pages.hiwat = UVM_KM_PAGES_HIWAT_MAX; 658 659 /* Allocate all pages in as few allocations as possible. */ 660 len = 0; 661 bulk = uvm_km_pages.hiwat; 662 while (len < uvm_km_pages.hiwat && bulk > 0) { 663 bulk = MIN(bulk, uvm_km_pages.hiwat - len); 664 addr = vm_map_min(kernel_map); 665 if (uvm_map(kernel_map, &addr, (vsize_t)bulk << PAGE_SHIFT, 666 NULL, UVM_UNKNOWN_OFFSET, 0, 667 UVM_MAPFLAG(PROT_READ | PROT_WRITE, 668 PROT_READ | PROT_WRITE, MAP_INHERIT_NONE, 669 MADV_RANDOM, UVM_KMF_TRYLOCK)) != 0) { 670 bulk /= 2; 671 continue; 672 } 673 674 for (i = len; i < len + bulk; i++, addr += PAGE_SIZE) 675 uvm_km_pages.page[i] = addr; 676 len += bulk; 677 } 678 679 uvm_km_pages.free = len; 680 for (i = len; i < UVM_KM_PAGES_HIWAT_MAX; i++) 681 uvm_km_pages.page[i] = 0; 682 683 /* tone down if really high */ 684 if (uvm_km_pages.lowat > 512) 685 uvm_km_pages.lowat = 512; 686 } 687 688 void 689 uvm_km_page_lateinit(void) 690 { 691 kthread_create_deferred(uvm_km_createthread, NULL); 692 } 693 694 void 695 uvm_km_createthread(void *arg) 696 { 697 kthread_create(uvm_km_thread, NULL, &uvm_km_pages.km_proc, "kmthread"); 698 } 699 700 /* 701 * Endless loop. We grab pages in increments of 16 pages, then 702 * quickly swap them into the list. 703 */ 704 void 705 uvm_km_thread(void *arg) 706 { 707 vaddr_t pg[16]; 708 int i; 709 int allocmore = 0; 710 int flags; 711 struct uvm_km_free_page *fp = NULL; 712 713 KERNEL_UNLOCK(); 714 715 for (;;) { 716 mtx_enter(&uvm_km_pages.mtx); 717 if (uvm_km_pages.free >= uvm_km_pages.lowat && 718 uvm_km_pages.freelist == NULL) { 719 msleep_nsec(&uvm_km_pages.km_proc, &uvm_km_pages.mtx, 720 PVM, "kmalloc", INFSLP); 721 } 722 allocmore = uvm_km_pages.free < uvm_km_pages.lowat; 723 fp = uvm_km_pages.freelist; 724 uvm_km_pages.freelist = NULL; 725 uvm_km_pages.freelistlen = 0; 726 mtx_leave(&uvm_km_pages.mtx); 727 728 if (allocmore) { 729 /* 730 * If there was nothing on the freelist, then we 731 * must obtain at least one page to make progress. 732 * So, only use UVM_KMF_TRYLOCK for the first page 733 * if fp != NULL 734 */ 735 flags = UVM_MAPFLAG(PROT_READ | PROT_WRITE, 736 PROT_READ | PROT_WRITE, MAP_INHERIT_NONE, 737 MADV_RANDOM, fp != NULL ? UVM_KMF_TRYLOCK : 0); 738 memset(pg, 0, sizeof(pg)); 739 for (i = 0; i < nitems(pg); i++) { 740 pg[i] = vm_map_min(kernel_map); 741 if (uvm_map(kernel_map, &pg[i], PAGE_SIZE, 742 NULL, UVM_UNKNOWN_OFFSET, 0, flags) != 0) { 743 pg[i] = 0; 744 break; 745 } 746 747 /* made progress, so don't sleep for more */ 748 flags = UVM_MAPFLAG(PROT_READ | PROT_WRITE, 749 PROT_READ | PROT_WRITE, MAP_INHERIT_NONE, 750 MADV_RANDOM, UVM_KMF_TRYLOCK); 751 } 752 753 mtx_enter(&uvm_km_pages.mtx); 754 for (i = 0; i < nitems(pg); i++) { 755 if (uvm_km_pages.free == 756 nitems(uvm_km_pages.page)) 757 break; 758 else if (pg[i] != 0) 759 uvm_km_pages.page[uvm_km_pages.free++] 760 = pg[i]; 761 } 762 wakeup(&uvm_km_pages.free); 763 mtx_leave(&uvm_km_pages.mtx); 764 765 /* Cleanup left-over pages (if any). */ 766 for (; i < nitems(pg); i++) { 767 if (pg[i] != 0) { 768 uvm_unmap(kernel_map, 769 pg[i], pg[i] + PAGE_SIZE); 770 } 771 } 772 } 773 while (fp) { 774 fp = uvm_km_doputpage(fp); 775 } 776 } 777 } 778 779 struct uvm_km_free_page * 780 uvm_km_doputpage(struct uvm_km_free_page *fp) 781 { 782 vaddr_t va = (vaddr_t)fp; 783 struct vm_page *pg; 784 int freeva = 1; 785 struct uvm_km_free_page *nextfp = fp->next; 786 787 pg = uvm_atopg(va); 788 789 pmap_kremove(va, PAGE_SIZE); 790 pmap_update(kernel_map->pmap); 791 792 mtx_enter(&uvm_km_pages.mtx); 793 if (uvm_km_pages.free < uvm_km_pages.hiwat) { 794 uvm_km_pages.page[uvm_km_pages.free++] = va; 795 freeva = 0; 796 } 797 mtx_leave(&uvm_km_pages.mtx); 798 799 if (freeva) 800 uvm_unmap(kernel_map, va, va + PAGE_SIZE); 801 802 uvm_pagefree(pg); 803 return (nextfp); 804 } 805 #endif /* !__HAVE_PMAP_DIRECT */ 806 807 void * 808 km_alloc(size_t sz, const struct kmem_va_mode *kv, 809 const struct kmem_pa_mode *kp, const struct kmem_dyn_mode *kd) 810 { 811 struct vm_map *map; 812 struct vm_page *pg; 813 struct pglist pgl; 814 int mapflags = 0; 815 vm_prot_t prot; 816 paddr_t pla_align; 817 int pla_flags; 818 int pla_maxseg; 819 vaddr_t va, sva = 0; 820 821 KASSERT(sz == round_page(sz)); 822 823 TAILQ_INIT(&pgl); 824 825 if (kp->kp_nomem || kp->kp_pageable) 826 goto alloc_va; 827 828 pla_flags = kd->kd_waitok ? UVM_PLA_WAITOK : UVM_PLA_NOWAIT; 829 pla_flags |= UVM_PLA_TRYCONTIG; 830 if (kp->kp_zero) 831 pla_flags |= UVM_PLA_ZERO; 832 833 pla_align = kp->kp_align; 834 #ifdef __HAVE_PMAP_DIRECT 835 if (pla_align < kv->kv_align) 836 pla_align = kv->kv_align; 837 #endif 838 pla_maxseg = kp->kp_maxseg; 839 if (pla_maxseg == 0) 840 pla_maxseg = sz / PAGE_SIZE; 841 842 if (uvm_pglistalloc(sz, kp->kp_constraint->ucr_low, 843 kp->kp_constraint->ucr_high, pla_align, kp->kp_boundary, 844 &pgl, pla_maxseg, pla_flags)) { 845 return (NULL); 846 } 847 848 #ifdef __HAVE_PMAP_DIRECT 849 /* 850 * Only use direct mappings for single page or single segment 851 * allocations. 852 */ 853 if (kv->kv_singlepage || kp->kp_maxseg == 1) { 854 TAILQ_FOREACH(pg, &pgl, pageq) { 855 va = pmap_map_direct(pg); 856 if (pg == TAILQ_FIRST(&pgl)) 857 sva = va; 858 } 859 return ((void *)sva); 860 } 861 #endif 862 alloc_va: 863 prot = PROT_READ | PROT_WRITE; 864 865 if (kp->kp_pageable) { 866 KASSERT(kp->kp_object); 867 KASSERT(!kv->kv_singlepage); 868 } else { 869 KASSERT(kp->kp_object == NULL); 870 } 871 872 if (kv->kv_singlepage) { 873 KASSERT(sz == PAGE_SIZE); 874 #ifdef __HAVE_PMAP_DIRECT 875 panic("km_alloc: DIRECT single page"); 876 #else 877 mtx_enter(&uvm_km_pages.mtx); 878 while (uvm_km_pages.free == 0) { 879 if (kd->kd_waitok == 0) { 880 mtx_leave(&uvm_km_pages.mtx); 881 uvm_pglistfree(&pgl); 882 return NULL; 883 } 884 msleep_nsec(&uvm_km_pages.free, &uvm_km_pages.mtx, 885 PVM, "getpage", INFSLP); 886 } 887 va = uvm_km_pages.page[--uvm_km_pages.free]; 888 if (uvm_km_pages.free < uvm_km_pages.lowat && 889 curproc != uvm_km_pages.km_proc) { 890 if (kd->kd_slowdown) 891 *kd->kd_slowdown = 1; 892 wakeup(&uvm_km_pages.km_proc); 893 } 894 mtx_leave(&uvm_km_pages.mtx); 895 #endif 896 } else { 897 struct uvm_object *uobj = NULL; 898 899 if (kd->kd_trylock) 900 mapflags |= UVM_KMF_TRYLOCK; 901 902 if (kp->kp_object) 903 uobj = *kp->kp_object; 904 try_map: 905 map = *kv->kv_map; 906 va = vm_map_min(map); 907 if (uvm_map(map, &va, sz, uobj, kd->kd_prefer, 908 kv->kv_align, UVM_MAPFLAG(prot, prot, MAP_INHERIT_NONE, 909 MADV_RANDOM, mapflags))) { 910 if (kv->kv_wait && kd->kd_waitok) { 911 tsleep_nsec(map, PVM, "km_allocva", INFSLP); 912 goto try_map; 913 } 914 uvm_pglistfree(&pgl); 915 return (NULL); 916 } 917 } 918 sva = va; 919 TAILQ_FOREACH(pg, &pgl, pageq) { 920 if (kp->kp_pageable) 921 pmap_enter(pmap_kernel(), va, VM_PAGE_TO_PHYS(pg), 922 prot, prot | PMAP_WIRED); 923 else 924 pmap_kenter_pa(va, VM_PAGE_TO_PHYS(pg), prot); 925 va += PAGE_SIZE; 926 } 927 pmap_update(pmap_kernel()); 928 return ((void *)sva); 929 } 930 931 void 932 km_free(void *v, size_t sz, const struct kmem_va_mode *kv, 933 const struct kmem_pa_mode *kp) 934 { 935 vaddr_t sva, eva, va; 936 struct vm_page *pg; 937 struct pglist pgl; 938 939 sva = (vaddr_t)v; 940 eva = sva + sz; 941 942 if (kp->kp_nomem) 943 goto free_va; 944 945 #ifdef __HAVE_PMAP_DIRECT 946 if (kv->kv_singlepage || kp->kp_maxseg == 1) { 947 TAILQ_INIT(&pgl); 948 for (va = sva; va < eva; va += PAGE_SIZE) { 949 pg = pmap_unmap_direct(va); 950 TAILQ_INSERT_TAIL(&pgl, pg, pageq); 951 } 952 uvm_pglistfree(&pgl); 953 return; 954 } 955 #else 956 if (kv->kv_singlepage) { 957 struct uvm_km_free_page *fp = v; 958 959 mtx_enter(&uvm_km_pages.mtx); 960 fp->next = uvm_km_pages.freelist; 961 uvm_km_pages.freelist = fp; 962 if (uvm_km_pages.freelistlen++ > 16) 963 wakeup(&uvm_km_pages.km_proc); 964 mtx_leave(&uvm_km_pages.mtx); 965 return; 966 } 967 #endif 968 969 if (kp->kp_pageable) { 970 pmap_remove(pmap_kernel(), sva, eva); 971 pmap_update(pmap_kernel()); 972 } else { 973 TAILQ_INIT(&pgl); 974 for (va = sva; va < eva; va += PAGE_SIZE) { 975 paddr_t pa; 976 977 if (!pmap_extract(pmap_kernel(), va, &pa)) 978 continue; 979 980 pg = PHYS_TO_VM_PAGE(pa); 981 if (pg == NULL) { 982 panic("km_free: unmanaged page 0x%lx\n", pa); 983 } 984 TAILQ_INSERT_TAIL(&pgl, pg, pageq); 985 } 986 pmap_kremove(sva, sz); 987 pmap_update(pmap_kernel()); 988 uvm_pglistfree(&pgl); 989 } 990 free_va: 991 uvm_unmap(*kv->kv_map, sva, eva); 992 if (kv->kv_wait) 993 wakeup(*kv->kv_map); 994 } 995 996 const struct kmem_va_mode kv_any = { 997 .kv_map = &kernel_map, 998 }; 999 1000 const struct kmem_va_mode kv_intrsafe = { 1001 .kv_map = &kmem_map, 1002 }; 1003 1004 const struct kmem_va_mode kv_page = { 1005 .kv_singlepage = 1 1006 }; 1007 1008 const struct kmem_pa_mode kp_dirty = { 1009 .kp_constraint = &no_constraint 1010 }; 1011 1012 const struct kmem_pa_mode kp_dma = { 1013 .kp_constraint = &dma_constraint 1014 }; 1015 1016 const struct kmem_pa_mode kp_dma_contig = { 1017 .kp_constraint = &dma_constraint, 1018 .kp_maxseg = 1 1019 }; 1020 1021 const struct kmem_pa_mode kp_dma_zero = { 1022 .kp_constraint = &dma_constraint, 1023 .kp_zero = 1 1024 }; 1025 1026 const struct kmem_pa_mode kp_zero = { 1027 .kp_constraint = &no_constraint, 1028 .kp_zero = 1 1029 }; 1030 1031 const struct kmem_pa_mode kp_pageable = { 1032 .kp_object = &uvm.kernel_object, 1033 .kp_pageable = 1 1034 /* XXX - kp_nomem, maybe, but we'll need to fix km_free. */ 1035 }; 1036 1037 const struct kmem_pa_mode kp_none = { 1038 .kp_nomem = 1 1039 }; 1040 1041 const struct kmem_dyn_mode kd_waitok = { 1042 .kd_waitok = 1, 1043 .kd_prefer = UVM_UNKNOWN_OFFSET 1044 }; 1045 1046 const struct kmem_dyn_mode kd_nowait = { 1047 .kd_prefer = UVM_UNKNOWN_OFFSET 1048 }; 1049 1050 const struct kmem_dyn_mode kd_trylock = { 1051 .kd_trylock = 1, 1052 .kd_prefer = UVM_UNKNOWN_OFFSET 1053 }; 1054