1 /* 2 * (MPSAFE) 3 * 4 * Copyright (c) 1991, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * The Mach Operating System project at Carnegie-Mellon University. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * from: @(#)vm_kern.c 8.3 (Berkeley) 1/12/94 35 * 36 * 37 * Copyright (c) 1987, 1990 Carnegie-Mellon University. 38 * All rights reserved. 39 * 40 * Authors: Avadis Tevanian, Jr., Michael Wayne Young 41 * 42 * Permission to use, copy, modify and distribute this software and 43 * its documentation is hereby granted, provided that both the copyright 44 * notice and this permission notice appear in all copies of the 45 * software, derivative works or modified versions, and any portions 46 * thereof, and that both notices appear in supporting documentation. 47 * 48 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 49 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 50 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 51 * 52 * Carnegie Mellon requests users of this software to return to 53 * 54 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 55 * School of Computer Science 56 * Carnegie Mellon University 57 * Pittsburgh PA 15213-3890 58 * 59 * any improvements or extensions that they make and grant Carnegie the 60 * rights to redistribute these changes. 61 * 62 * $FreeBSD: src/sys/vm/vm_kern.c,v 1.61.2.2 2002/03/12 18:25:26 tegge Exp $ 63 */ 64 65 /* 66 * Kernel memory management. 67 */ 68 69 #include <sys/param.h> 70 #include <sys/systm.h> 71 #include <sys/proc.h> 72 #include <sys/malloc.h> 73 #include <sys/kernel.h> 74 #include <sys/sysctl.h> 75 76 #include <vm/vm.h> 77 #include <vm/vm_param.h> 78 #include <sys/lock.h> 79 #include <vm/pmap.h> 80 #include <vm/vm_map.h> 81 #include <vm/vm_object.h> 82 #include <vm/vm_page.h> 83 #include <vm/vm_pageout.h> 84 #include <vm/vm_kern.h> 85 #include <vm/vm_extern.h> 86 87 struct vm_map kernel_map; 88 struct vm_map clean_map; 89 struct vm_map buffer_map; 90 91 /* 92 * Allocate pageable memory to the kernel's address map. "map" must 93 * be kernel_map or a submap of kernel_map. 94 * 95 * No requirements. 96 */ 97 vm_offset_t 98 kmem_alloc_pageable(vm_map_t map, vm_size_t size) 99 { 100 vm_offset_t addr; 101 int result; 102 103 size = round_page(size); 104 addr = vm_map_min(map); 105 result = vm_map_find(map, NULL, NULL, 106 (vm_offset_t) 0, &addr, size, 107 PAGE_SIZE, 108 TRUE, VM_MAPTYPE_NORMAL, 109 VM_PROT_ALL, VM_PROT_ALL, 0); 110 if (result != KERN_SUCCESS) 111 return (0); 112 return (addr); 113 } 114 115 /* 116 * Same as kmem_alloc_pageable, except that it create a nofault entry. 117 * 118 * No requirements. 119 */ 120 vm_offset_t 121 kmem_alloc_nofault(vm_map_t map, vm_size_t size, vm_size_t align) 122 { 123 vm_offset_t addr; 124 int result; 125 126 size = round_page(size); 127 addr = vm_map_min(map); 128 result = vm_map_find(map, NULL, NULL, 129 (vm_offset_t) 0, &addr, size, 130 align, 131 TRUE, VM_MAPTYPE_NORMAL, 132 VM_PROT_ALL, VM_PROT_ALL, MAP_NOFAULT); 133 if (result != KERN_SUCCESS) 134 return (0); 135 return (addr); 136 } 137 138 /* 139 * Allocate wired-down memory in the kernel's address map or a submap. 140 * 141 * No requirements. 142 */ 143 vm_offset_t 144 kmem_alloc3(vm_map_t map, vm_size_t size, int kmflags) 145 { 146 vm_offset_t addr; 147 vm_offset_t gstart; 148 vm_offset_t i; 149 int count; 150 int cow; 151 152 size = round_page(size); 153 154 if (kmflags & KM_KRESERVE) 155 count = vm_map_entry_kreserve(MAP_RESERVE_COUNT); 156 else 157 count = vm_map_entry_reserve(MAP_RESERVE_COUNT); 158 159 if (kmflags & KM_STACK) { 160 cow = MAP_IS_KSTACK; 161 gstart = PAGE_SIZE; 162 } else { 163 cow = 0; 164 gstart = 0; 165 } 166 167 /* 168 * Use the kernel object for wired-down kernel pages. Assume that no 169 * region of the kernel object is referenced more than once. 170 * 171 * Locate sufficient space in the map. This will give us the final 172 * virtual address for the new memory, and thus will tell us the 173 * offset within the kernel map. 174 */ 175 vm_map_lock(map); 176 if (vm_map_findspace(map, vm_map_min(map), size, PAGE_SIZE, 0, &addr)) { 177 vm_map_unlock(map); 178 if (kmflags & KM_KRESERVE) 179 vm_map_entry_krelease(count); 180 else 181 vm_map_entry_release(count); 182 return (0); 183 } 184 vm_object_hold(&kernel_object); 185 vm_object_reference_locked(&kernel_object); 186 vm_map_insert(map, &count, 187 &kernel_object, NULL, 188 addr, addr, addr + size, 189 VM_MAPTYPE_NORMAL, 190 VM_PROT_ALL, VM_PROT_ALL, cow); 191 vm_object_drop(&kernel_object); 192 193 vm_map_unlock(map); 194 if (kmflags & KM_KRESERVE) 195 vm_map_entry_krelease(count); 196 else 197 vm_map_entry_release(count); 198 199 /* 200 * Guarantee that there are pages already in this object before 201 * calling vm_map_wire. This is to prevent the following 202 * scenario: 203 * 204 * 1) Threads have swapped out, so that there is a pager for the 205 * kernel_object. 2) The kmsg zone is empty, and so we are 206 * kmem_allocing a new page for it. 3) vm_map_wire calls vm_fault; 207 * there is no page, but there is a pager, so we call 208 * pager_data_request. But the kmsg zone is empty, so we must 209 * kmem_alloc. 4) goto 1 5) Even if the kmsg zone is not empty: when 210 * we get the data back from the pager, it will be (very stale) 211 * non-zero data. kmem_alloc is defined to return zero-filled memory. 212 * 213 * We're intentionally not activating the pages we allocate to prevent a 214 * race with page-out. vm_map_wire will wire the pages. 215 */ 216 vm_object_hold(&kernel_object); 217 for (i = gstart; i < size; i += PAGE_SIZE) { 218 vm_page_t mem; 219 220 mem = vm_page_grab(&kernel_object, OFF_TO_IDX(addr + i), 221 VM_ALLOC_FORCE_ZERO | VM_ALLOC_NORMAL | 222 VM_ALLOC_RETRY); 223 vm_page_unqueue_nowakeup(mem); 224 vm_page_wakeup(mem); 225 } 226 vm_object_drop(&kernel_object); 227 228 /* 229 * And finally, mark the data as non-pageable. 230 * 231 * NOTE: vm_map_wire() handles any kstack guard. 232 */ 233 vm_map_wire(map, addr, addr + size, kmflags); 234 235 return (addr); 236 } 237 238 /* 239 * Release a region of kernel virtual memory allocated with kmem_alloc, 240 * and return the physical pages associated with that region. 241 * 242 * WARNING! If the caller entered pages into the region using pmap_kenter() 243 * it must remove the pages using pmap_kremove[_quick]() before freeing the 244 * underlying kmem, otherwise resident_count will be mistabulated. 245 * 246 * No requirements. 247 */ 248 void 249 kmem_free(vm_map_t map, vm_offset_t addr, vm_size_t size) 250 { 251 vm_map_remove(map, trunc_page(addr), round_page(addr + size)); 252 } 253 254 /* 255 * Used to break a system map into smaller maps, usually to reduce 256 * contention and to provide large KVA spaces for subsystems like the 257 * buffer cache. 258 * 259 * parent Map to take range from 260 * result 261 * size Size of range to find 262 * min, max Returned endpoints of map 263 * pageable Can the region be paged 264 * 265 * No requirements. 266 */ 267 void 268 kmem_suballoc(vm_map_t parent, vm_map_t result, 269 vm_offset_t *min, vm_offset_t *max, vm_size_t size) 270 { 271 int ret; 272 273 size = round_page(size); 274 275 *min = (vm_offset_t) vm_map_min(parent); 276 ret = vm_map_find(parent, NULL, NULL, 277 (vm_offset_t) 0, min, size, 278 PAGE_SIZE, 279 TRUE, VM_MAPTYPE_UNSPECIFIED, 280 VM_PROT_ALL, VM_PROT_ALL, 0); 281 if (ret != KERN_SUCCESS) { 282 kprintf("kmem_suballoc: bad status return of %d.\n", ret); 283 panic("kmem_suballoc"); 284 } 285 *max = *min + size; 286 pmap_reference(vm_map_pmap(parent)); 287 vm_map_init(result, *min, *max, vm_map_pmap(parent)); 288 if ((ret = vm_map_submap(parent, *min, *max, result)) != KERN_SUCCESS) 289 panic("kmem_suballoc: unable to change range to submap"); 290 } 291 292 /* 293 * Allocates pageable memory from a sub-map of the kernel. If the submap 294 * has no room, the caller sleeps waiting for more memory in the submap. 295 * 296 * No requirements. 297 */ 298 vm_offset_t 299 kmem_alloc_wait(vm_map_t map, vm_size_t size) 300 { 301 vm_offset_t addr; 302 int count; 303 304 size = round_page(size); 305 306 count = vm_map_entry_reserve(MAP_RESERVE_COUNT); 307 308 for (;;) { 309 /* 310 * To make this work for more than one map, use the map's lock 311 * to lock out sleepers/wakers. 312 */ 313 vm_map_lock(map); 314 if (vm_map_findspace(map, vm_map_min(map), 315 size, PAGE_SIZE, 0, &addr) == 0) { 316 break; 317 } 318 /* no space now; see if we can ever get space */ 319 if (vm_map_max(map) - vm_map_min(map) < size) { 320 vm_map_entry_release(count); 321 vm_map_unlock(map); 322 return (0); 323 } 324 vm_map_unlock(map); 325 tsleep(map, 0, "kmaw", 0); 326 } 327 vm_map_insert(map, &count, 328 NULL, NULL, 329 (vm_offset_t) 0, addr, addr + size, 330 VM_MAPTYPE_NORMAL, 331 VM_PROT_ALL, VM_PROT_ALL, 332 0); 333 vm_map_unlock(map); 334 vm_map_entry_release(count); 335 336 return (addr); 337 } 338 339 /* 340 * Allocates a region from the kernel address map and physical pages 341 * within the specified address range to the kernel object. Creates a 342 * wired mapping from this region to these pages, and returns the 343 * region's starting virtual address. The allocated pages are not 344 * necessarily physically contiguous. If M_ZERO is specified through the 345 * given flags, then the pages are zeroed before they are mapped. 346 */ 347 vm_offset_t 348 kmem_alloc_attr(vm_map_t map, vm_size_t size, int flags, vm_paddr_t low, 349 vm_paddr_t high, vm_memattr_t memattr) 350 { 351 vm_offset_t addr, i, offset; 352 vm_page_t m; 353 int count; 354 355 size = round_page(size); 356 count = vm_map_entry_reserve(MAP_RESERVE_COUNT); 357 vm_map_lock(map); 358 if (vm_map_findspace(map, vm_map_min(map), size, PAGE_SIZE, 359 flags, &addr)) { 360 vm_map_unlock(map); 361 vm_map_entry_release(count); 362 return (0); 363 } 364 offset = addr - vm_map_min(&kernel_map); 365 vm_object_hold(&kernel_object); 366 vm_object_reference_locked(&kernel_object); 367 vm_map_insert(map, &count, 368 &kernel_object, NULL, 369 offset, addr, addr + size, 370 VM_MAPTYPE_NORMAL, 371 VM_PROT_ALL, VM_PROT_ALL, 0); 372 vm_map_unlock(map); 373 vm_map_entry_release(count); 374 vm_object_drop(&kernel_object); 375 for (i = 0; i < size; i += PAGE_SIZE) { 376 m = vm_page_alloc_contig(low, high, PAGE_SIZE, 0, PAGE_SIZE, memattr); 377 if (!m) { 378 return (0); 379 } 380 vm_object_hold(&kernel_object); 381 vm_page_insert(m, &kernel_object, OFF_TO_IDX(offset + i)); 382 vm_object_drop(&kernel_object); 383 if ((flags & M_ZERO) && (m->flags & PG_ZERO) == 0) 384 pmap_zero_page(VM_PAGE_TO_PHYS(m)); 385 m->valid = VM_PAGE_BITS_ALL; 386 } 387 vm_map_wire(map, addr, addr + size, 0); 388 return (addr); 389 } 390 391 392 /* 393 * Returns memory to a submap of the kernel, and wakes up any processes 394 * waiting for memory in that map. 395 * 396 * No requirements. 397 */ 398 void 399 kmem_free_wakeup(vm_map_t map, vm_offset_t addr, vm_size_t size) 400 { 401 int count; 402 403 count = vm_map_entry_reserve(MAP_RESERVE_COUNT); 404 vm_map_lock(map); 405 vm_map_delete(map, trunc_page(addr), round_page(addr + size), &count); 406 wakeup(map); 407 vm_map_unlock(map); 408 vm_map_entry_release(count); 409 } 410 411 /* 412 * Create the kernel_ma for (KvaStart,KvaEnd) and insert mappings to 413 * cover areas already allocated or reserved thus far. 414 * 415 * The areas (virtual_start, virtual_end) and (virtual2_start, virtual2_end) 416 * are available so the cutouts are the areas around these ranges between 417 * KvaStart and KvaEnd. 418 * 419 * Depend on the zalloc bootstrap cache to get our vm_map_entry_t. 420 * Called from the low level boot code only. 421 */ 422 void 423 kmem_init(void) 424 { 425 vm_offset_t addr; 426 vm_map_t m; 427 int count; 428 429 m = vm_map_create(&kernel_map, &kernel_pmap, KvaStart, KvaEnd); 430 vm_map_lock(m); 431 /* N.B.: cannot use kgdb to debug, starting with this assignment ... */ 432 m->system_map = 1; 433 count = vm_map_entry_reserve(MAP_RESERVE_COUNT); 434 addr = KvaStart; 435 if (virtual2_start) { 436 if (addr < virtual2_start) { 437 vm_map_insert(m, &count, 438 NULL, NULL, 439 (vm_offset_t) 0, addr, virtual2_start, 440 VM_MAPTYPE_NORMAL, 441 VM_PROT_ALL, VM_PROT_ALL, 0); 442 } 443 addr = virtual2_end; 444 } 445 if (addr < virtual_start) { 446 vm_map_insert(m, &count, 447 NULL, NULL, 448 (vm_offset_t) 0, addr, virtual_start, 449 VM_MAPTYPE_NORMAL, 450 VM_PROT_ALL, VM_PROT_ALL, 0); 451 } 452 addr = virtual_end; 453 if (addr < KvaEnd) { 454 vm_map_insert(m, &count, 455 NULL, NULL, 456 (vm_offset_t) 0, addr, KvaEnd, 457 VM_MAPTYPE_NORMAL, 458 VM_PROT_ALL, VM_PROT_ALL, 0); 459 } 460 /* ... and ending with the completion of the above `insert' */ 461 vm_map_unlock(m); 462 vm_map_entry_release(count); 463 } 464 465 /* 466 * No requirements. 467 */ 468 static int 469 kvm_size(SYSCTL_HANDLER_ARGS) 470 { 471 unsigned long ksize = KvaSize; 472 473 return sysctl_handle_long(oidp, &ksize, 0, req); 474 } 475 SYSCTL_PROC(_vm, OID_AUTO, kvm_size, CTLTYPE_ULONG|CTLFLAG_RD, 476 0, 0, kvm_size, "LU", "Size of KVM"); 477 478 /* 479 * No requirements. 480 */ 481 static int 482 kvm_free(SYSCTL_HANDLER_ARGS) 483 { 484 unsigned long kfree = virtual_end - kernel_vm_end; 485 486 return sysctl_handle_long(oidp, &kfree, 0, req); 487 } 488 SYSCTL_PROC(_vm, OID_AUTO, kvm_free, CTLTYPE_ULONG|CTLFLAG_RD, 489 0, 0, kvm_free, "LU", "Amount of KVM free"); 490 491