1 /* 2 * Copyright (c) 1991, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * The Mach Operating System project at Carnegie-Mellon University. 7 * 8 * %sccs.include.redist.c% 9 * 10 * @(#)vm_kern.c 8.1 (Berkeley) 06/11/93 11 * 12 * 13 * Copyright (c) 1987, 1990 Carnegie-Mellon University. 14 * All rights reserved. 15 * 16 * Authors: Avadis Tevanian, Jr., Michael Wayne Young 17 * 18 * Permission to use, copy, modify and distribute this software and 19 * its documentation is hereby granted, provided that both the copyright 20 * notice and this permission notice appear in all copies of the 21 * software, derivative works or modified versions, and any portions 22 * thereof, and that both notices appear in supporting documentation. 23 * 24 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 25 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 26 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 27 * 28 * Carnegie Mellon requests users of this software to return to 29 * 30 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 31 * School of Computer Science 32 * Carnegie Mellon University 33 * Pittsburgh PA 15213-3890 34 * 35 * any improvements or extensions that they make and grant Carnegie the 36 * rights to redistribute these changes. 37 */ 38 39 /* 40 * Kernel memory management. 41 */ 42 43 #include <sys/param.h> 44 #include <sys/systm.h> 45 46 #include <vm/vm.h> 47 #include <vm/vm_page.h> 48 #include <vm/vm_pageout.h> 49 #include <vm/vm_kern.h> 50 51 /* 52 * kmem_alloc_pageable: 53 * 54 * Allocate pageable memory to the kernel's address map. 55 * map must be "kernel_map" below. 56 */ 57 58 vm_offset_t kmem_alloc_pageable(map, size) 59 vm_map_t map; 60 register vm_size_t size; 61 { 62 vm_offset_t addr; 63 register int result; 64 65 #if 0 66 if (map != kernel_map) 67 panic("kmem_alloc_pageable: not called with kernel_map"); 68 #endif 69 70 size = round_page(size); 71 72 addr = vm_map_min(map); 73 result = vm_map_find(map, NULL, (vm_offset_t) 0, 74 &addr, size, TRUE); 75 if (result != KERN_SUCCESS) { 76 return(0); 77 } 78 79 return(addr); 80 } 81 82 /* 83 * Allocate wired-down memory in the kernel's address map 84 * or a submap. 85 */ 86 vm_offset_t kmem_alloc(map, size) 87 register vm_map_t map; 88 register vm_size_t size; 89 { 90 vm_offset_t addr; 91 register int result; 92 register vm_offset_t offset; 93 extern vm_object_t kernel_object; 94 vm_offset_t i; 95 96 size = round_page(size); 97 98 /* 99 * Use the kernel object for wired-down kernel pages. 100 * Assume that no region of the kernel object is 101 * referenced more than once. 102 */ 103 104 /* 105 * Locate sufficient space in the map. This will give us the 106 * final virtual address for the new memory, and thus will tell 107 * us the offset within the kernel map. 108 */ 109 vm_map_lock(map); 110 if (vm_map_findspace(map, 0, size, &addr)) { 111 vm_map_unlock(map); 112 return (0); 113 } 114 offset = addr - VM_MIN_KERNEL_ADDRESS; 115 vm_object_reference(kernel_object); 116 vm_map_insert(map, kernel_object, offset, addr, addr + size); 117 vm_map_unlock(map); 118 119 /* 120 * Guarantee that there are pages already in this object 121 * before calling vm_map_pageable. This is to prevent the 122 * following scenario: 123 * 124 * 1) Threads have swapped out, so that there is a 125 * pager for the kernel_object. 126 * 2) The kmsg zone is empty, and so we are kmem_allocing 127 * a new page for it. 128 * 3) vm_map_pageable calls vm_fault; there is no page, 129 * but there is a pager, so we call 130 * pager_data_request. But the kmsg zone is empty, 131 * so we must kmem_alloc. 132 * 4) goto 1 133 * 5) Even if the kmsg zone is not empty: when we get 134 * the data back from the pager, it will be (very 135 * stale) non-zero data. kmem_alloc is defined to 136 * return zero-filled memory. 137 * 138 * We're intentionally not activating the pages we allocate 139 * to prevent a race with page-out. vm_map_pageable will wire 140 * the pages. 141 */ 142 143 vm_object_lock(kernel_object); 144 for (i = 0 ; i < size; i+= PAGE_SIZE) { 145 vm_page_t mem; 146 147 while ((mem = vm_page_alloc(kernel_object, offset+i)) == NULL) { 148 vm_object_unlock(kernel_object); 149 VM_WAIT; 150 vm_object_lock(kernel_object); 151 } 152 vm_page_zero_fill(mem); 153 mem->flags &= ~PG_BUSY; 154 } 155 vm_object_unlock(kernel_object); 156 157 /* 158 * And finally, mark the data as non-pageable. 159 */ 160 161 (void) vm_map_pageable(map, (vm_offset_t) addr, addr + size, FALSE); 162 163 /* 164 * Try to coalesce the map 165 */ 166 167 vm_map_simplify(map, addr); 168 169 return(addr); 170 } 171 172 /* 173 * kmem_free: 174 * 175 * Release a region of kernel virtual memory allocated 176 * with kmem_alloc, and return the physical pages 177 * associated with that region. 178 */ 179 void kmem_free(map, addr, size) 180 vm_map_t map; 181 register vm_offset_t addr; 182 vm_size_t size; 183 { 184 (void) vm_map_remove(map, trunc_page(addr), round_page(addr + size)); 185 } 186 187 /* 188 * kmem_suballoc: 189 * 190 * Allocates a map to manage a subrange 191 * of the kernel virtual address space. 192 * 193 * Arguments are as follows: 194 * 195 * parent Map to take range from 196 * size Size of range to find 197 * min, max Returned endpoints of map 198 * pageable Can the region be paged 199 */ 200 vm_map_t kmem_suballoc(parent, min, max, size, pageable) 201 register vm_map_t parent; 202 vm_offset_t *min, *max; 203 register vm_size_t size; 204 boolean_t pageable; 205 { 206 register int ret; 207 vm_map_t result; 208 209 size = round_page(size); 210 211 *min = (vm_offset_t) vm_map_min(parent); 212 ret = vm_map_find(parent, NULL, (vm_offset_t) 0, 213 min, size, TRUE); 214 if (ret != KERN_SUCCESS) { 215 printf("kmem_suballoc: bad status return of %d.\n", ret); 216 panic("kmem_suballoc"); 217 } 218 *max = *min + size; 219 pmap_reference(vm_map_pmap(parent)); 220 result = vm_map_create(vm_map_pmap(parent), *min, *max, pageable); 221 if (result == NULL) 222 panic("kmem_suballoc: cannot create submap"); 223 if ((ret = vm_map_submap(parent, *min, *max, result)) != KERN_SUCCESS) 224 panic("kmem_suballoc: unable to change range to submap"); 225 return(result); 226 } 227 228 /* 229 * vm_move: 230 * 231 * Move memory from source to destination map, possibly deallocating 232 * the source map reference to the memory. 233 * 234 * Parameters are as follows: 235 * 236 * src_map Source address map 237 * src_addr Address within source map 238 * dst_map Destination address map 239 * num_bytes Amount of data (in bytes) to copy/move 240 * src_dealloc Should source be removed after copy? 241 * 242 * Assumes the src and dst maps are not already locked. 243 * 244 * Returns new destination address or 0 (if a failure occurs). 245 */ 246 vm_offset_t vm_move(src_map,src_addr,dst_map,num_bytes,src_dealloc) 247 vm_map_t src_map; 248 register vm_offset_t src_addr; 249 register vm_map_t dst_map; 250 vm_offset_t num_bytes; 251 boolean_t src_dealloc; 252 { 253 register vm_offset_t src_start; /* Beginning of region */ 254 register vm_size_t src_size; /* Size of rounded region */ 255 vm_offset_t dst_start; /* destination address */ 256 register int result; 257 258 /* 259 * Page-align the source region 260 */ 261 262 src_start = trunc_page(src_addr); 263 src_size = round_page(src_addr + num_bytes) - src_start; 264 265 /* 266 * If there's no destination, we can be at most deallocating 267 * the source range. 268 */ 269 if (dst_map == NULL) { 270 if (src_dealloc) 271 if (vm_deallocate(src_map, src_start, src_size) 272 != KERN_SUCCESS) { 273 printf("vm_move: deallocate of source"); 274 printf(" failed, dealloc_only clause\n"); 275 } 276 return(0); 277 } 278 279 /* 280 * Allocate a place to put the copy 281 */ 282 283 dst_start = (vm_offset_t) 0; 284 if ((result = vm_allocate(dst_map, &dst_start, src_size, TRUE)) 285 == KERN_SUCCESS) { 286 /* 287 * Perform the copy, asking for deallocation if desired 288 */ 289 result = vm_map_copy(dst_map, src_map, dst_start, src_size, 290 src_start, FALSE, src_dealloc); 291 } 292 293 /* 294 * Return the destination address corresponding to 295 * the source address given (rather than the front 296 * of the newly-allocated page). 297 */ 298 299 if (result == KERN_SUCCESS) 300 return(dst_start + (src_addr - src_start)); 301 return(0); 302 } 303 304 /* 305 * Allocate wired-down memory in the kernel's address map for the higher 306 * level kernel memory allocator (kern/kern_malloc.c). We cannot use 307 * kmem_alloc() because we may need to allocate memory at interrupt 308 * level where we cannot block (canwait == FALSE). 309 * 310 * This routine has its own private kernel submap (kmem_map) and object 311 * (kmem_object). This, combined with the fact that only malloc uses 312 * this routine, ensures that we will never block in map or object waits. 313 * 314 * Note that this still only works in a uni-processor environment and 315 * when called at splhigh(). 316 * 317 * We don't worry about expanding the map (adding entries) since entries 318 * for wired maps are statically allocated. 319 */ 320 vm_offset_t 321 kmem_malloc(map, size, canwait) 322 register vm_map_t map; 323 register vm_size_t size; 324 boolean_t canwait; 325 { 326 register vm_offset_t offset, i; 327 vm_map_entry_t entry; 328 vm_offset_t addr; 329 vm_page_t m; 330 extern vm_object_t kmem_object; 331 332 if (map != kmem_map && map != mb_map) 333 panic("kern_malloc_alloc: map != {kmem,mb}_map"); 334 335 size = round_page(size); 336 addr = vm_map_min(map); 337 338 /* 339 * Locate sufficient space in the map. This will give us the 340 * final virtual address for the new memory, and thus will tell 341 * us the offset within the kernel map. 342 */ 343 vm_map_lock(map); 344 if (vm_map_findspace(map, 0, size, &addr)) { 345 vm_map_unlock(map); 346 if (canwait) /* XXX should wait */ 347 panic("kmem_malloc: %s too small", 348 map == kmem_map ? "kmem_map" : "mb_map"); 349 return (0); 350 } 351 offset = addr - vm_map_min(kmem_map); 352 vm_object_reference(kmem_object); 353 vm_map_insert(map, kmem_object, offset, addr, addr + size); 354 355 /* 356 * If we can wait, just mark the range as wired 357 * (will fault pages as necessary). 358 */ 359 if (canwait) { 360 vm_map_unlock(map); 361 (void) vm_map_pageable(map, (vm_offset_t) addr, addr + size, 362 FALSE); 363 vm_map_simplify(map, addr); 364 return(addr); 365 } 366 367 /* 368 * If we cannot wait then we must allocate all memory up front, 369 * pulling it off the active queue to prevent pageout. 370 */ 371 vm_object_lock(kmem_object); 372 for (i = 0; i < size; i += PAGE_SIZE) { 373 m = vm_page_alloc(kmem_object, offset + i); 374 375 /* 376 * Ran out of space, free everything up and return. 377 * Don't need to lock page queues here as we know 378 * that the pages we got aren't on any queues. 379 */ 380 if (m == NULL) { 381 while (i != 0) { 382 i -= PAGE_SIZE; 383 m = vm_page_lookup(kmem_object, offset + i); 384 vm_page_free(m); 385 } 386 vm_object_unlock(kmem_object); 387 vm_map_delete(map, addr, addr + size); 388 vm_map_unlock(map); 389 return(0); 390 } 391 #if 0 392 vm_page_zero_fill(m); 393 #endif 394 m->flags &= ~PG_BUSY; 395 } 396 vm_object_unlock(kmem_object); 397 398 /* 399 * Mark map entry as non-pageable. 400 * Assert: vm_map_insert() will never be able to extend the previous 401 * entry so there will be a new entry exactly corresponding to this 402 * address range and it will have wired_count == 0. 403 */ 404 if (!vm_map_lookup_entry(map, addr, &entry) || 405 entry->start != addr || entry->end != addr + size || 406 entry->wired_count) 407 panic("kmem_malloc: entry not found or misaligned"); 408 entry->wired_count++; 409 410 /* 411 * Loop thru pages, entering them in the pmap. 412 * (We cannot add them to the wired count without 413 * wrapping the vm_page_queue_lock in splimp...) 414 */ 415 for (i = 0; i < size; i += PAGE_SIZE) { 416 vm_object_lock(kmem_object); 417 m = vm_page_lookup(kmem_object, offset + i); 418 vm_object_unlock(kmem_object); 419 pmap_enter(map->pmap, addr + i, VM_PAGE_TO_PHYS(m), 420 VM_PROT_DEFAULT, TRUE); 421 } 422 vm_map_unlock(map); 423 424 vm_map_simplify(map, addr); 425 return(addr); 426 } 427 428 /* 429 * kmem_alloc_wait 430 * 431 * Allocates pageable memory from a sub-map of the kernel. If the submap 432 * has no room, the caller sleeps waiting for more memory in the submap. 433 * 434 */ 435 vm_offset_t kmem_alloc_wait(map, size) 436 vm_map_t map; 437 vm_size_t size; 438 { 439 vm_offset_t addr; 440 int result; 441 442 size = round_page(size); 443 444 for (;;) { 445 /* 446 * To make this work for more than one map, 447 * use the map's lock to lock out sleepers/wakers. 448 */ 449 vm_map_lock(map); 450 if (vm_map_findspace(map, 0, size, &addr) == 0) 451 break; 452 /* no space now; see if we can ever get space */ 453 if (vm_map_max(map) - vm_map_min(map) < size) { 454 vm_map_unlock(map); 455 return (0); 456 } 457 assert_wait((int)map, TRUE); 458 vm_map_unlock(map); 459 thread_block(); 460 } 461 vm_map_insert(map, NULL, (vm_offset_t)0, addr, addr + size); 462 vm_map_unlock(map); 463 return (addr); 464 } 465 466 /* 467 * kmem_free_wakeup 468 * 469 * Returns memory to a submap of the kernel, and wakes up any threads 470 * waiting for memory in that map. 471 */ 472 void kmem_free_wakeup(map, addr, size) 473 vm_map_t map; 474 vm_offset_t addr; 475 vm_size_t size; 476 { 477 vm_map_lock(map); 478 (void) vm_map_delete(map, trunc_page(addr), round_page(addr + size)); 479 thread_wakeup((int)map); 480 vm_map_unlock(map); 481 } 482 483 /* 484 * Create the kernel map; insert a mapping covering kernel text, data, bss, 485 * and all space allocated thus far (`boostrap' data). The new map will thus 486 * map the range between VM_MIN_KERNEL_ADDRESS and `start' as allocated, and 487 * the range between `start' and `end' as free. 488 */ 489 void kmem_init(start, end) 490 vm_offset_t start, end; 491 { 492 register vm_map_t m; 493 494 m = vm_map_create(kernel_pmap, VM_MIN_KERNEL_ADDRESS, end, FALSE); 495 vm_map_lock(m); 496 /* N.B.: cannot use kgdb to debug, starting with this assignment ... */ 497 kernel_map = m; 498 (void) vm_map_insert(m, NULL, (vm_offset_t)0, 499 VM_MIN_KERNEL_ADDRESS, start); 500 /* ... and ending with the completion of the above `insert' */ 501 vm_map_unlock(m); 502 } 503