1 /* 2 * (MPSAFE) 3 * 4 * Copyright (c) 1991, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * The Mach Operating System project at Carnegie-Mellon University. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * from: @(#)vm_kern.c 8.3 (Berkeley) 1/12/94 35 * 36 * 37 * Copyright (c) 1987, 1990 Carnegie-Mellon University. 38 * All rights reserved. 39 * 40 * Authors: Avadis Tevanian, Jr., Michael Wayne Young 41 * 42 * Permission to use, copy, modify and distribute this software and 43 * its documentation is hereby granted, provided that both the copyright 44 * notice and this permission notice appear in all copies of the 45 * software, derivative works or modified versions, and any portions 46 * thereof, and that both notices appear in supporting documentation. 47 * 48 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 49 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 50 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 51 * 52 * Carnegie Mellon requests users of this software to return to 53 * 54 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 55 * School of Computer Science 56 * Carnegie Mellon University 57 * Pittsburgh PA 15213-3890 58 * 59 * any improvements or extensions that they make and grant Carnegie the 60 * rights to redistribute these changes. 61 * 62 * $FreeBSD: src/sys/vm/vm_kern.c,v 1.61.2.2 2002/03/12 18:25:26 tegge Exp $ 63 */ 64 65 /* 66 * Kernel memory management. 67 */ 68 69 #include <sys/param.h> 70 #include <sys/systm.h> 71 #include <sys/proc.h> 72 #include <sys/malloc.h> 73 #include <sys/kernel.h> 74 #include <sys/sysctl.h> 75 76 #include <vm/vm.h> 77 #include <vm/vm_param.h> 78 #include <sys/lock.h> 79 #include <vm/pmap.h> 80 #include <vm/vm_map.h> 81 #include <vm/vm_object.h> 82 #include <vm/vm_page.h> 83 #include <vm/vm_pageout.h> 84 #include <vm/vm_kern.h> 85 #include <vm/vm_extern.h> 86 87 struct vm_map kernel_map; 88 struct vm_map clean_map; 89 struct vm_map buffer_map; 90 91 /* 92 * Allocate pageable memory to the kernel's address map. "map" must 93 * be kernel_map or a submap of kernel_map. 94 * 95 * No requirements. 96 */ 97 vm_offset_t 98 kmem_alloc_pageable(vm_map_t map, vm_size_t size) 99 { 100 vm_offset_t addr; 101 int result; 102 103 size = round_page(size); 104 addr = vm_map_min(map); 105 result = vm_map_find(map, NULL, (vm_offset_t) 0, 106 &addr, size, PAGE_SIZE, 107 TRUE, VM_MAPTYPE_NORMAL, 108 VM_PROT_ALL, VM_PROT_ALL, 109 0); 110 if (result != KERN_SUCCESS) 111 return (0); 112 return (addr); 113 } 114 115 /* 116 * Same as kmem_alloc_pageable, except that it create a nofault entry. 117 * 118 * No requirements. 119 */ 120 vm_offset_t 121 kmem_alloc_nofault(vm_map_t map, vm_size_t size, vm_size_t align) 122 { 123 vm_offset_t addr; 124 int result; 125 126 size = round_page(size); 127 addr = vm_map_min(map); 128 result = vm_map_find(map, NULL, (vm_offset_t) 0, 129 &addr, size, align, 130 TRUE, VM_MAPTYPE_NORMAL, 131 VM_PROT_ALL, VM_PROT_ALL, 132 MAP_NOFAULT); 133 if (result != KERN_SUCCESS) 134 return (0); 135 return (addr); 136 } 137 138 /* 139 * Allocate wired-down memory in the kernel's address map or a submap. 140 * 141 * No requirements. 142 */ 143 vm_offset_t 144 kmem_alloc3(vm_map_t map, vm_size_t size, int kmflags) 145 { 146 vm_offset_t addr; 147 vm_offset_t gstart; 148 vm_offset_t i; 149 int count; 150 int cow; 151 152 size = round_page(size); 153 154 if (kmflags & KM_KRESERVE) 155 count = vm_map_entry_kreserve(MAP_RESERVE_COUNT); 156 else 157 count = vm_map_entry_reserve(MAP_RESERVE_COUNT); 158 159 if (kmflags & KM_STACK) { 160 cow = MAP_IS_KSTACK; 161 gstart = PAGE_SIZE; 162 } else { 163 cow = 0; 164 gstart = 0; 165 } 166 167 /* 168 * Use the kernel object for wired-down kernel pages. Assume that no 169 * region of the kernel object is referenced more than once. 170 * 171 * Locate sufficient space in the map. This will give us the final 172 * virtual address for the new memory, and thus will tell us the 173 * offset within the kernel map. 174 */ 175 vm_map_lock(map); 176 if (vm_map_findspace(map, vm_map_min(map), size, PAGE_SIZE, 0, &addr)) { 177 vm_map_unlock(map); 178 if (kmflags & KM_KRESERVE) 179 vm_map_entry_krelease(count); 180 else 181 vm_map_entry_release(count); 182 return (0); 183 } 184 vm_object_hold(&kernel_object); 185 vm_object_reference_locked(&kernel_object); 186 vm_map_insert(map, &count, 187 &kernel_object, addr, addr, addr + size, 188 VM_MAPTYPE_NORMAL, 189 VM_PROT_ALL, VM_PROT_ALL, 190 cow); 191 vm_object_drop(&kernel_object); 192 193 vm_map_unlock(map); 194 if (kmflags & KM_KRESERVE) 195 vm_map_entry_krelease(count); 196 else 197 vm_map_entry_release(count); 198 199 /* 200 * Guarantee that there are pages already in this object before 201 * calling vm_map_wire. This is to prevent the following 202 * scenario: 203 * 204 * 1) Threads have swapped out, so that there is a pager for the 205 * kernel_object. 2) The kmsg zone is empty, and so we are 206 * kmem_allocing a new page for it. 3) vm_map_wire calls vm_fault; 207 * there is no page, but there is a pager, so we call 208 * pager_data_request. But the kmsg zone is empty, so we must 209 * kmem_alloc. 4) goto 1 5) Even if the kmsg zone is not empty: when 210 * we get the data back from the pager, it will be (very stale) 211 * non-zero data. kmem_alloc is defined to return zero-filled memory. 212 * 213 * We're intentionally not activating the pages we allocate to prevent a 214 * race with page-out. vm_map_wire will wire the pages. 215 */ 216 vm_object_hold(&kernel_object); 217 for (i = gstart; i < size; i += PAGE_SIZE) { 218 vm_page_t mem; 219 220 mem = vm_page_grab(&kernel_object, OFF_TO_IDX(addr + i), 221 VM_ALLOC_FORCE_ZERO | VM_ALLOC_NORMAL | 222 VM_ALLOC_RETRY); 223 vm_page_unqueue_nowakeup(mem); 224 vm_page_wakeup(mem); 225 } 226 vm_object_drop(&kernel_object); 227 228 /* 229 * And finally, mark the data as non-pageable. 230 * 231 * NOTE: vm_map_wire() handles any kstack guard. 232 */ 233 vm_map_wire(map, addr, addr + size, kmflags); 234 235 return (addr); 236 } 237 238 /* 239 * Release a region of kernel virtual memory allocated with kmem_alloc, 240 * and return the physical pages associated with that region. 241 * 242 * WARNING! If the caller entered pages into the region using pmap_kenter() 243 * it must remove the pages using pmap_kremove[_quick]() before freeing the 244 * underlying kmem, otherwise resident_count will be mistabulated. 245 * 246 * No requirements. 247 */ 248 void 249 kmem_free(vm_map_t map, vm_offset_t addr, vm_size_t size) 250 { 251 vm_map_remove(map, trunc_page(addr), round_page(addr + size)); 252 } 253 254 /* 255 * Used to break a system map into smaller maps, usually to reduce 256 * contention and to provide large KVA spaces for subsystems like the 257 * buffer cache. 258 * 259 * parent Map to take range from 260 * result 261 * size Size of range to find 262 * min, max Returned endpoints of map 263 * pageable Can the region be paged 264 * 265 * No requirements. 266 */ 267 void 268 kmem_suballoc(vm_map_t parent, vm_map_t result, 269 vm_offset_t *min, vm_offset_t *max, vm_size_t size) 270 { 271 int ret; 272 273 size = round_page(size); 274 275 *min = (vm_offset_t) vm_map_min(parent); 276 ret = vm_map_find(parent, NULL, (vm_offset_t) 0, 277 min, size, PAGE_SIZE, 278 TRUE, VM_MAPTYPE_UNSPECIFIED, 279 VM_PROT_ALL, VM_PROT_ALL, 280 0); 281 if (ret != KERN_SUCCESS) { 282 kprintf("kmem_suballoc: bad status return of %d.\n", ret); 283 panic("kmem_suballoc"); 284 } 285 *max = *min + size; 286 pmap_reference(vm_map_pmap(parent)); 287 vm_map_init(result, *min, *max, vm_map_pmap(parent)); 288 if ((ret = vm_map_submap(parent, *min, *max, result)) != KERN_SUCCESS) 289 panic("kmem_suballoc: unable to change range to submap"); 290 } 291 292 /* 293 * Allocates pageable memory from a sub-map of the kernel. If the submap 294 * has no room, the caller sleeps waiting for more memory in the submap. 295 * 296 * No requirements. 297 */ 298 vm_offset_t 299 kmem_alloc_wait(vm_map_t map, vm_size_t size) 300 { 301 vm_offset_t addr; 302 int count; 303 304 size = round_page(size); 305 306 count = vm_map_entry_reserve(MAP_RESERVE_COUNT); 307 308 for (;;) { 309 /* 310 * To make this work for more than one map, use the map's lock 311 * to lock out sleepers/wakers. 312 */ 313 vm_map_lock(map); 314 if (vm_map_findspace(map, vm_map_min(map), 315 size, PAGE_SIZE, 0, &addr) == 0) { 316 break; 317 } 318 /* no space now; see if we can ever get space */ 319 if (vm_map_max(map) - vm_map_min(map) < size) { 320 vm_map_entry_release(count); 321 vm_map_unlock(map); 322 return (0); 323 } 324 vm_map_unlock(map); 325 tsleep(map, 0, "kmaw", 0); 326 } 327 vm_map_insert(map, &count, 328 NULL, (vm_offset_t) 0, 329 addr, addr + size, 330 VM_MAPTYPE_NORMAL, 331 VM_PROT_ALL, VM_PROT_ALL, 332 0); 333 vm_map_unlock(map); 334 vm_map_entry_release(count); 335 336 return (addr); 337 } 338 339 /* 340 * Allocates a region from the kernel address map and physical pages 341 * within the specified address range to the kernel object. Creates a 342 * wired mapping from this region to these pages, and returns the 343 * region's starting virtual address. The allocated pages are not 344 * necessarily physically contiguous. If M_ZERO is specified through the 345 * given flags, then the pages are zeroed before they are mapped. 346 */ 347 vm_offset_t 348 kmem_alloc_attr(vm_map_t map, vm_size_t size, int flags, vm_paddr_t low, 349 vm_paddr_t high, vm_memattr_t memattr) 350 { 351 vm_offset_t addr, i, offset; 352 vm_page_t m; 353 int count; 354 355 size = round_page(size); 356 count = vm_map_entry_reserve(MAP_RESERVE_COUNT); 357 vm_map_lock(map); 358 if (vm_map_findspace(map, vm_map_min(map), size, PAGE_SIZE, 359 flags, &addr)) { 360 vm_map_unlock(map); 361 vm_map_entry_release(count); 362 return (0); 363 } 364 offset = addr - vm_map_min(&kernel_map); 365 vm_object_hold(&kernel_object); 366 vm_object_reference_locked(&kernel_object); 367 vm_map_insert(map, &count, &kernel_object, offset, addr, addr + size, 368 VM_MAPTYPE_NORMAL, VM_PROT_ALL, VM_PROT_ALL, 0); 369 vm_map_unlock(map); 370 vm_map_entry_release(count); 371 vm_object_drop(&kernel_object); 372 for (i = 0; i < size; i += PAGE_SIZE) { 373 m = vm_page_alloc_contig(low, high, PAGE_SIZE, 0, PAGE_SIZE, memattr); 374 if (!m) { 375 return (0); 376 } 377 vm_object_hold(&kernel_object); 378 vm_page_insert(m, &kernel_object, OFF_TO_IDX(offset + i)); 379 vm_object_drop(&kernel_object); 380 if ((flags & M_ZERO) && (m->flags & PG_ZERO) == 0) 381 pmap_zero_page(VM_PAGE_TO_PHYS(m)); 382 m->valid = VM_PAGE_BITS_ALL; 383 } 384 vm_map_wire(map, addr, addr + size, 0); 385 return (addr); 386 } 387 388 389 /* 390 * Returns memory to a submap of the kernel, and wakes up any processes 391 * waiting for memory in that map. 392 * 393 * No requirements. 394 */ 395 void 396 kmem_free_wakeup(vm_map_t map, vm_offset_t addr, vm_size_t size) 397 { 398 int count; 399 400 count = vm_map_entry_reserve(MAP_RESERVE_COUNT); 401 vm_map_lock(map); 402 vm_map_delete(map, trunc_page(addr), round_page(addr + size), &count); 403 wakeup(map); 404 vm_map_unlock(map); 405 vm_map_entry_release(count); 406 } 407 408 /* 409 * Create the kernel_ma for (KvaStart,KvaEnd) and insert mappings to 410 * cover areas already allocated or reserved thus far. 411 * 412 * The areas (virtual_start, virtual_end) and (virtual2_start, virtual2_end) 413 * are available so the cutouts are the areas around these ranges between 414 * KvaStart and KvaEnd. 415 * 416 * Depend on the zalloc bootstrap cache to get our vm_map_entry_t. 417 * Called from the low level boot code only. 418 */ 419 void 420 kmem_init(void) 421 { 422 vm_offset_t addr; 423 vm_map_t m; 424 int count; 425 426 m = vm_map_create(&kernel_map, &kernel_pmap, KvaStart, KvaEnd); 427 vm_map_lock(m); 428 /* N.B.: cannot use kgdb to debug, starting with this assignment ... */ 429 m->system_map = 1; 430 count = vm_map_entry_reserve(MAP_RESERVE_COUNT); 431 addr = KvaStart; 432 if (virtual2_start) { 433 if (addr < virtual2_start) { 434 vm_map_insert(m, &count, NULL, (vm_offset_t) 0, 435 addr, virtual2_start, 436 VM_MAPTYPE_NORMAL, 437 VM_PROT_ALL, VM_PROT_ALL, 438 0); 439 } 440 addr = virtual2_end; 441 } 442 if (addr < virtual_start) { 443 vm_map_insert(m, &count, NULL, (vm_offset_t) 0, 444 addr, virtual_start, 445 VM_MAPTYPE_NORMAL, 446 VM_PROT_ALL, VM_PROT_ALL, 447 0); 448 } 449 addr = virtual_end; 450 if (addr < KvaEnd) { 451 vm_map_insert(m, &count, NULL, (vm_offset_t) 0, 452 addr, KvaEnd, 453 VM_MAPTYPE_NORMAL, 454 VM_PROT_ALL, VM_PROT_ALL, 455 0); 456 } 457 /* ... and ending with the completion of the above `insert' */ 458 vm_map_unlock(m); 459 vm_map_entry_release(count); 460 } 461 462 /* 463 * No requirements. 464 */ 465 static int 466 kvm_size(SYSCTL_HANDLER_ARGS) 467 { 468 unsigned long ksize = KvaSize; 469 470 return sysctl_handle_long(oidp, &ksize, 0, req); 471 } 472 SYSCTL_PROC(_vm, OID_AUTO, kvm_size, CTLTYPE_ULONG|CTLFLAG_RD, 473 0, 0, kvm_size, "LU", "Size of KVM"); 474 475 /* 476 * No requirements. 477 */ 478 static int 479 kvm_free(SYSCTL_HANDLER_ARGS) 480 { 481 unsigned long kfree = virtual_end - kernel_vm_end; 482 483 return sysctl_handle_long(oidp, &kfree, 0, req); 484 } 485 SYSCTL_PROC(_vm, OID_AUTO, kvm_free, CTLTYPE_ULONG|CTLFLAG_RD, 486 0, 0, kvm_free, "LU", "Amount of KVM free"); 487 488