1 /* 2 * (MPSAFE) 3 * 4 * Copyright (c) 1991, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * The Mach Operating System project at Carnegie-Mellon University. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * from: @(#)vm_kern.c 8.3 (Berkeley) 1/12/94 35 * 36 * 37 * Copyright (c) 1987, 1990 Carnegie-Mellon University. 38 * All rights reserved. 39 * 40 * Authors: Avadis Tevanian, Jr., Michael Wayne Young 41 * 42 * Permission to use, copy, modify and distribute this software and 43 * its documentation is hereby granted, provided that both the copyright 44 * notice and this permission notice appear in all copies of the 45 * software, derivative works or modified versions, and any portions 46 * thereof, and that both notices appear in supporting documentation. 47 * 48 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 49 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 50 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 51 * 52 * Carnegie Mellon requests users of this software to return to 53 * 54 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 55 * School of Computer Science 56 * Carnegie Mellon University 57 * Pittsburgh PA 15213-3890 58 * 59 * any improvements or extensions that they make and grant Carnegie the 60 * rights to redistribute these changes. 61 * 62 * $FreeBSD: src/sys/vm/vm_kern.c,v 1.61.2.2 2002/03/12 18:25:26 tegge Exp $ 63 */ 64 65 /* 66 * Kernel memory management. 67 */ 68 69 #include <sys/param.h> 70 #include <sys/systm.h> 71 #include <sys/proc.h> 72 #include <sys/malloc.h> 73 #include <sys/kernel.h> 74 #include <sys/sysctl.h> 75 76 #include <vm/vm.h> 77 #include <vm/vm_param.h> 78 #include <sys/lock.h> 79 #include <vm/pmap.h> 80 #include <vm/vm_map.h> 81 #include <vm/vm_object.h> 82 #include <vm/vm_page.h> 83 #include <vm/vm_pageout.h> 84 #include <vm/vm_kern.h> 85 #include <vm/vm_extern.h> 86 87 struct vm_map kernel_map; 88 struct vm_map clean_map; 89 struct vm_map buffer_map; 90 91 /* 92 * Allocate pageable swap-backed anonymous memory 93 */ 94 void * 95 kmem_alloc_swapbacked(kmem_anon_desc_t *kp, vm_size_t size) 96 { 97 int error; 98 vm_pindex_t npages; 99 100 size = round_page(size); 101 npages = size / PAGE_SIZE; 102 103 if (kp->map == NULL) 104 kp->map = &kernel_map; 105 kp->data = vm_map_min(&kernel_map); 106 kp->size = size; 107 kp->object = vm_object_allocate(OBJT_DEFAULT, npages); 108 109 error = vm_map_find(kp->map, kp->object, NULL, 0, 110 &kp->data, size, 111 PAGE_SIZE, 112 1, VM_MAPTYPE_NORMAL, 113 VM_PROT_ALL, VM_PROT_ALL, 0); 114 if (error) { 115 kprintf("kmem_alloc_swapbacked: %zd bytes failed %d\n", 116 size, error); 117 kp->data = (vm_offset_t)0; 118 kmem_free_swapbacked(kp); 119 return NULL; 120 } 121 return ((void *)(intptr_t)kp->data); 122 } 123 124 void 125 kmem_free_swapbacked(kmem_anon_desc_t *kp) 126 { 127 if (kp->data) { 128 /* 129 * The object will be deallocated by kmem_free(). 130 */ 131 kmem_free(kp->map, kp->data, kp->size); 132 kp->data = (vm_offset_t)0; 133 } else { 134 /* 135 * Failure during allocation, object must be deallocated 136 * manually. 137 */ 138 vm_object_deallocate(kp->object); 139 } 140 kp->object = NULL; 141 } 142 143 /* 144 * Allocate pageable memory to the kernel's address map. "map" must 145 * be kernel_map or a submap of kernel_map. Caller must adjust map or 146 * enter VM pages itself. 147 * 148 * No requirements. 149 */ 150 vm_offset_t 151 kmem_alloc_pageable(vm_map_t map, vm_size_t size) 152 { 153 vm_offset_t addr; 154 int result; 155 156 size = round_page(size); 157 addr = vm_map_min(map); 158 result = vm_map_find(map, NULL, NULL, 159 (vm_offset_t) 0, &addr, size, 160 PAGE_SIZE, 161 TRUE, VM_MAPTYPE_NORMAL, 162 VM_PROT_ALL, VM_PROT_ALL, 0); 163 if (result != KERN_SUCCESS) 164 return (0); 165 return (addr); 166 } 167 168 /* 169 * Same as kmem_alloc_pageable, except that it create a nofault entry. 170 * 171 * No requirements. 172 */ 173 vm_offset_t 174 kmem_alloc_nofault(vm_map_t map, vm_size_t size, vm_size_t align) 175 { 176 vm_offset_t addr; 177 int result; 178 179 size = round_page(size); 180 addr = vm_map_min(map); 181 result = vm_map_find(map, NULL, NULL, 182 (vm_offset_t) 0, &addr, size, 183 align, 184 TRUE, VM_MAPTYPE_NORMAL, 185 VM_PROT_ALL, VM_PROT_ALL, MAP_NOFAULT); 186 if (result != KERN_SUCCESS) 187 return (0); 188 return (addr); 189 } 190 191 /* 192 * Allocate wired-down memory in the kernel's address map or a submap. 193 * 194 * No requirements. 195 */ 196 vm_offset_t 197 kmem_alloc3(vm_map_t map, vm_size_t size, int kmflags) 198 { 199 vm_offset_t addr; 200 vm_offset_t gstart; 201 vm_offset_t i; 202 int count; 203 int cow; 204 205 size = round_page(size); 206 207 if (kmflags & KM_KRESERVE) 208 count = vm_map_entry_kreserve(MAP_RESERVE_COUNT); 209 else 210 count = vm_map_entry_reserve(MAP_RESERVE_COUNT); 211 212 if (kmflags & KM_STACK) { 213 cow = MAP_IS_KSTACK; 214 gstart = PAGE_SIZE; 215 } else { 216 cow = 0; 217 gstart = 0; 218 } 219 220 /* 221 * Use the kernel object for wired-down kernel pages. Assume that no 222 * region of the kernel object is referenced more than once. 223 * 224 * Locate sufficient space in the map. This will give us the final 225 * virtual address for the new memory, and thus will tell us the 226 * offset within the kernel map. 227 */ 228 vm_map_lock(map); 229 if (vm_map_findspace(map, vm_map_min(map), size, PAGE_SIZE, 0, &addr)) { 230 vm_map_unlock(map); 231 if (kmflags & KM_KRESERVE) 232 vm_map_entry_krelease(count); 233 else 234 vm_map_entry_release(count); 235 return (0); 236 } 237 vm_object_hold(&kernel_object); 238 vm_object_reference_locked(&kernel_object); 239 vm_map_insert(map, &count, 240 &kernel_object, NULL, 241 addr, addr, addr + size, 242 VM_MAPTYPE_NORMAL, 243 VM_PROT_ALL, VM_PROT_ALL, cow); 244 vm_object_drop(&kernel_object); 245 246 vm_map_unlock(map); 247 if (kmflags & KM_KRESERVE) 248 vm_map_entry_krelease(count); 249 else 250 vm_map_entry_release(count); 251 252 /* 253 * Guarantee that there are pages already in this object before 254 * calling vm_map_wire. This is to prevent the following 255 * scenario: 256 * 257 * 1) Threads have swapped out, so that there is a pager for the 258 * kernel_object. 2) The kmsg zone is empty, and so we are 259 * kmem_allocing a new page for it. 3) vm_map_wire calls vm_fault; 260 * there is no page, but there is a pager, so we call 261 * pager_data_request. But the kmsg zone is empty, so we must 262 * kmem_alloc. 4) goto 1 5) Even if the kmsg zone is not empty: when 263 * we get the data back from the pager, it will be (very stale) 264 * non-zero data. kmem_alloc is defined to return zero-filled memory. 265 * 266 * We're intentionally not activating the pages we allocate to prevent a 267 * race with page-out. vm_map_wire will wire the pages. 268 */ 269 vm_object_hold(&kernel_object); 270 for (i = gstart; i < size; i += PAGE_SIZE) { 271 vm_page_t mem; 272 273 mem = vm_page_grab(&kernel_object, OFF_TO_IDX(addr + i), 274 VM_ALLOC_FORCE_ZERO | VM_ALLOC_NORMAL | 275 VM_ALLOC_RETRY); 276 vm_page_unqueue_nowakeup(mem); 277 vm_page_wakeup(mem); 278 } 279 vm_object_drop(&kernel_object); 280 281 /* 282 * And finally, mark the data as non-pageable. 283 * 284 * NOTE: vm_map_wire() handles any kstack guard. 285 */ 286 vm_map_wire(map, addr, addr + size, kmflags); 287 288 return (addr); 289 } 290 291 /* 292 * Release a region of kernel virtual memory allocated with kmem_alloc, 293 * and return the physical pages associated with that region. 294 * 295 * WARNING! If the caller entered pages into the region using pmap_kenter() 296 * it must remove the pages using pmap_kremove[_quick]() before freeing the 297 * underlying kmem, otherwise resident_count will be mistabulated. 298 * 299 * No requirements. 300 */ 301 void 302 kmem_free(vm_map_t map, vm_offset_t addr, vm_size_t size) 303 { 304 vm_map_remove(map, trunc_page(addr), round_page(addr + size)); 305 } 306 307 /* 308 * Used to break a system map into smaller maps, usually to reduce 309 * contention and to provide large KVA spaces for subsystems like the 310 * buffer cache. 311 * 312 * parent Map to take range from 313 * result 314 * size Size of range to find 315 * min, max Returned endpoints of map 316 * pageable Can the region be paged 317 * 318 * No requirements. 319 */ 320 void 321 kmem_suballoc(vm_map_t parent, vm_map_t result, 322 vm_offset_t *min, vm_offset_t *max, vm_size_t size) 323 { 324 int ret; 325 326 size = round_page(size); 327 328 *min = (vm_offset_t) vm_map_min(parent); 329 ret = vm_map_find(parent, NULL, NULL, 330 (vm_offset_t) 0, min, size, 331 PAGE_SIZE, 332 TRUE, VM_MAPTYPE_UNSPECIFIED, 333 VM_PROT_ALL, VM_PROT_ALL, 0); 334 if (ret != KERN_SUCCESS) { 335 kprintf("kmem_suballoc: bad status return of %d.\n", ret); 336 panic("kmem_suballoc"); 337 } 338 *max = *min + size; 339 pmap_reference(vm_map_pmap(parent)); 340 vm_map_init(result, *min, *max, vm_map_pmap(parent)); 341 if ((ret = vm_map_submap(parent, *min, *max, result)) != KERN_SUCCESS) 342 panic("kmem_suballoc: unable to change range to submap"); 343 } 344 345 /* 346 * Allocates pageable memory from a sub-map of the kernel. If the submap 347 * has no room, the caller sleeps waiting for more memory in the submap. 348 * 349 * No requirements. 350 */ 351 vm_offset_t 352 kmem_alloc_wait(vm_map_t map, vm_size_t size) 353 { 354 vm_offset_t addr; 355 int count; 356 357 size = round_page(size); 358 359 count = vm_map_entry_reserve(MAP_RESERVE_COUNT); 360 361 for (;;) { 362 /* 363 * To make this work for more than one map, use the map's lock 364 * to lock out sleepers/wakers. 365 */ 366 vm_map_lock(map); 367 if (vm_map_findspace(map, vm_map_min(map), 368 size, PAGE_SIZE, 0, &addr) == 0) { 369 break; 370 } 371 /* no space now; see if we can ever get space */ 372 if (vm_map_max(map) - vm_map_min(map) < size) { 373 vm_map_entry_release(count); 374 vm_map_unlock(map); 375 return (0); 376 } 377 vm_map_unlock(map); 378 tsleep(map, 0, "kmaw", 0); 379 } 380 vm_map_insert(map, &count, 381 NULL, NULL, 382 (vm_offset_t) 0, addr, addr + size, 383 VM_MAPTYPE_NORMAL, 384 VM_PROT_ALL, VM_PROT_ALL, 385 0); 386 vm_map_unlock(map); 387 vm_map_entry_release(count); 388 389 return (addr); 390 } 391 392 /* 393 * Allocates a region from the kernel address map and physical pages 394 * within the specified address range to the kernel object. Creates a 395 * wired mapping from this region to these pages, and returns the 396 * region's starting virtual address. The allocated pages are not 397 * necessarily physically contiguous. If M_ZERO is specified through the 398 * given flags, then the pages are zeroed before they are mapped. 399 */ 400 vm_offset_t 401 kmem_alloc_attr(vm_map_t map, vm_size_t size, int flags, vm_paddr_t low, 402 vm_paddr_t high, vm_memattr_t memattr) 403 { 404 vm_offset_t addr, i, offset; 405 vm_page_t m; 406 int count; 407 408 size = round_page(size); 409 count = vm_map_entry_reserve(MAP_RESERVE_COUNT); 410 vm_map_lock(map); 411 if (vm_map_findspace(map, vm_map_min(map), size, PAGE_SIZE, 412 flags, &addr)) { 413 vm_map_unlock(map); 414 vm_map_entry_release(count); 415 return (0); 416 } 417 offset = addr - vm_map_min(&kernel_map); 418 vm_object_hold(&kernel_object); 419 vm_object_reference_locked(&kernel_object); 420 vm_map_insert(map, &count, 421 &kernel_object, NULL, 422 offset, addr, addr + size, 423 VM_MAPTYPE_NORMAL, 424 VM_PROT_ALL, VM_PROT_ALL, 0); 425 vm_map_unlock(map); 426 vm_map_entry_release(count); 427 vm_object_drop(&kernel_object); 428 for (i = 0; i < size; i += PAGE_SIZE) { 429 m = vm_page_alloc_contig(low, high, PAGE_SIZE, 0, PAGE_SIZE, memattr); 430 if (!m) { 431 return (0); 432 } 433 vm_object_hold(&kernel_object); 434 vm_page_insert(m, &kernel_object, OFF_TO_IDX(offset + i)); 435 vm_object_drop(&kernel_object); 436 if ((flags & M_ZERO) && (m->flags & PG_ZERO) == 0) 437 pmap_zero_page(VM_PAGE_TO_PHYS(m)); 438 m->valid = VM_PAGE_BITS_ALL; 439 } 440 vm_map_wire(map, addr, addr + size, 0); 441 return (addr); 442 } 443 444 445 /* 446 * Returns memory to a submap of the kernel, and wakes up any processes 447 * waiting for memory in that map. 448 * 449 * No requirements. 450 */ 451 void 452 kmem_free_wakeup(vm_map_t map, vm_offset_t addr, vm_size_t size) 453 { 454 int count; 455 456 count = vm_map_entry_reserve(MAP_RESERVE_COUNT); 457 vm_map_lock(map); 458 vm_map_delete(map, trunc_page(addr), round_page(addr + size), &count); 459 wakeup(map); 460 vm_map_unlock(map); 461 vm_map_entry_release(count); 462 } 463 464 /* 465 * Create the kernel_ma for (KvaStart,KvaEnd) and insert mappings to 466 * cover areas already allocated or reserved thus far. 467 * 468 * The areas (virtual_start, virtual_end) and (virtual2_start, virtual2_end) 469 * are available so the cutouts are the areas around these ranges between 470 * KvaStart and KvaEnd. 471 * 472 * Depend on the zalloc bootstrap cache to get our vm_map_entry_t. 473 * Called from the low level boot code only. 474 */ 475 void 476 kmem_init(void) 477 { 478 vm_offset_t addr; 479 vm_map_t m; 480 int count; 481 482 m = vm_map_create(&kernel_map, &kernel_pmap, KvaStart, KvaEnd); 483 vm_map_lock(m); 484 /* N.B.: cannot use kgdb to debug, starting with this assignment ... */ 485 m->system_map = 1; 486 count = vm_map_entry_reserve(MAP_RESERVE_COUNT); 487 addr = KvaStart; 488 if (virtual2_start) { 489 if (addr < virtual2_start) { 490 vm_map_insert(m, &count, 491 NULL, NULL, 492 (vm_offset_t) 0, addr, virtual2_start, 493 VM_MAPTYPE_NORMAL, 494 VM_PROT_ALL, VM_PROT_ALL, 0); 495 } 496 addr = virtual2_end; 497 } 498 if (addr < virtual_start) { 499 vm_map_insert(m, &count, 500 NULL, NULL, 501 (vm_offset_t) 0, addr, virtual_start, 502 VM_MAPTYPE_NORMAL, 503 VM_PROT_ALL, VM_PROT_ALL, 0); 504 } 505 addr = virtual_end; 506 if (addr < KvaEnd) { 507 vm_map_insert(m, &count, 508 NULL, NULL, 509 (vm_offset_t) 0, addr, KvaEnd, 510 VM_MAPTYPE_NORMAL, 511 VM_PROT_ALL, VM_PROT_ALL, 0); 512 } 513 /* ... and ending with the completion of the above `insert' */ 514 vm_map_unlock(m); 515 vm_map_entry_release(count); 516 } 517 518 /* 519 * No requirements. 520 */ 521 static int 522 kvm_size(SYSCTL_HANDLER_ARGS) 523 { 524 unsigned long ksize = KvaSize; 525 526 return sysctl_handle_long(oidp, &ksize, 0, req); 527 } 528 SYSCTL_PROC(_vm, OID_AUTO, kvm_size, CTLTYPE_ULONG|CTLFLAG_RD, 529 0, 0, kvm_size, "LU", "Size of KVM"); 530 531 /* 532 * No requirements. 533 */ 534 static int 535 kvm_free(SYSCTL_HANDLER_ARGS) 536 { 537 unsigned long kfree = virtual_end - kernel_vm_end; 538 539 return sysctl_handle_long(oidp, &kfree, 0, req); 540 } 541 SYSCTL_PROC(_vm, OID_AUTO, kvm_free, CTLTYPE_ULONG|CTLFLAG_RD, 542 0, 0, kvm_free, "LU", "Amount of KVM free"); 543 544