1 /* 2 * (MPSAFE) 3 * 4 * Copyright (c) 2003, 2004 The DragonFly Project. All rights reserved. 5 * 6 * This code is derived from software contributed to The DragonFly Project 7 * by Hiten Pandya <hmp@backplane.com>. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in 17 * the documentation and/or other materials provided with the 18 * distribution. 19 * 3. Neither the name of The DragonFly Project nor the names of its 20 * contributors may be used to endorse or promote products derived 21 * from this software without specific, prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 24 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 25 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 26 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 27 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 28 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 29 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 30 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 31 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 32 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 33 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 */ 37 /* 38 * Copyright (c) 1991 Regents of the University of California. 39 * All rights reserved. 40 * 41 * This code is derived from software contributed to Berkeley by 42 * The Mach Operating System project at Carnegie-Mellon University. 43 * 44 * Redistribution and use in source and binary forms, with or without 45 * modification, are permitted provided that the following conditions 46 * are met: 47 * 1. Redistributions of source code must retain the above copyright 48 * notice, this list of conditions and the following disclaimer. 49 * 2. Redistributions in binary form must reproduce the above copyright 50 * notice, this list of conditions and the following disclaimer in the 51 * documentation and/or other materials provided with the distribution. 52 * 3. Neither the name of the University nor the names of its contributors 53 * may be used to endorse or promote products derived from this software 54 * without specific prior written permission. 55 * 56 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 57 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 58 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 59 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 60 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 61 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 62 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 63 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 64 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 65 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 66 * SUCH DAMAGE. 67 * 68 * from: @(#)vm_page.c 7.4 (Berkeley) 5/7/91 69 * $DragonFly: src/sys/vm/vm_contig.c,v 1.21 2006/12/28 21:24:02 dillon Exp $ 70 */ 71 72 /* 73 * Copyright (c) 1987, 1990 Carnegie-Mellon University. 74 * All rights reserved. 75 * 76 * Authors: Avadis Tevanian, Jr., Michael Wayne Young 77 * 78 * Permission to use, copy, modify and distribute this software and 79 * its documentation is hereby granted, provided that both the copyright 80 * notice and this permission notice appear in all copies of the 81 * software, derivative works or modified versions, and any portions 82 * thereof, and that both notices appear in supporting documentation. 83 * 84 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 85 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 86 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 87 * 88 * Carnegie Mellon requests users of this software to return to 89 * 90 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 91 * School of Computer Science 92 * Carnegie Mellon University 93 * Pittsburgh PA 15213-3890 94 * 95 * any improvements or extensions that they make and grant Carnegie the 96 * rights to redistribute these changes. 97 */ 98 99 /* 100 * Contiguous memory allocation API. 101 */ 102 103 #include <sys/param.h> 104 #include <sys/systm.h> 105 #include <sys/malloc.h> 106 #include <sys/proc.h> 107 #include <sys/lock.h> 108 #include <sys/vmmeter.h> 109 #include <sys/vnode.h> 110 111 #include <vm/vm.h> 112 #include <vm/vm_param.h> 113 #include <vm/vm_kern.h> 114 #include <vm/pmap.h> 115 #include <vm/vm_map.h> 116 #include <vm/vm_object.h> 117 #include <vm/vm_page.h> 118 #include <vm/vm_pageout.h> 119 #include <vm/vm_pager.h> 120 #include <vm/vm_extern.h> 121 122 #include <sys/thread2.h> 123 #include <vm/vm_page2.h> 124 125 /* 126 * vm_contig_pg_clean: 127 * 128 * Do a thorough cleanup of the specified 'queue', which can be either 129 * PQ_ACTIVE or PQ_INACTIVE by doing a walkthrough. If the page is not 130 * marked dirty, it is shoved into the page cache, provided no one has 131 * currently aqcuired it, otherwise localized action per object type 132 * is taken for cleanup: 133 * 134 * In the OBJT_VNODE case, the whole page range is cleaned up 135 * using the vm_object_page_clean() routine, by specyfing a 136 * start and end of '0'. 137 * 138 * Otherwise if the object is of any other type, the generic 139 * pageout (daemon) flush routine is invoked. 140 * 141 * The caller must hold vm_token. 142 */ 143 static int 144 vm_contig_pg_clean(int queue) 145 { 146 vm_object_t object; 147 vm_page_t m, m_tmp, next; 148 149 ASSERT_LWKT_TOKEN_HELD(&vm_token); 150 151 for (m = TAILQ_FIRST(&vm_page_queues[queue].pl); m != NULL; m = next) { 152 KASSERT(m->queue == queue, 153 ("vm_contig_clean: page %p's queue is not %d", 154 m, queue)); 155 next = TAILQ_NEXT(m, pageq); 156 157 if (m->flags & PG_MARKER) 158 continue; 159 160 if (vm_page_sleep_busy(m, TRUE, "vpctw0")) 161 return (TRUE); 162 163 vm_page_test_dirty(m); 164 if (m->dirty) { 165 object = m->object; 166 if (object->type == OBJT_VNODE) { 167 vn_lock(object->handle, LK_EXCLUSIVE|LK_RETRY); 168 vm_object_page_clean(object, 0, 0, OBJPC_SYNC); 169 vn_unlock(((struct vnode *)object->handle)); 170 return (TRUE); 171 } else if (object->type == OBJT_SWAP || 172 object->type == OBJT_DEFAULT) { 173 m_tmp = m; 174 vm_pageout_flush(&m_tmp, 1, 0); 175 return (TRUE); 176 } 177 } 178 if ((m->dirty == 0) && (m->busy == 0) && (m->hold_count == 0)) 179 vm_page_cache(m); 180 } 181 return (FALSE); 182 } 183 184 /* 185 * vm_contig_pg_flush: 186 * 187 * Attempt to flush (count) pages from the given page queue. This may or 188 * may not succeed. Take up to <count> passes and delay 1/20 of a second 189 * between each pass. 190 * 191 * The caller must hold vm_token. 192 */ 193 static void 194 vm_contig_pg_flush(int queue, int count) 195 { 196 while (count > 0) { 197 if (!vm_contig_pg_clean(queue)) 198 break; 199 --count; 200 } 201 } 202 /* 203 * vm_contig_pg_alloc: 204 * 205 * Allocate contiguous pages from the VM. This function does not 206 * map the allocated pages into the kernel map, otherwise it is 207 * impossible to make large allocations (i.e. >2G). 208 * 209 * Malloc()'s data structures have been used for collection of 210 * statistics and for allocations of less than a page. 211 * 212 * The caller must hold vm_token. 213 */ 214 static int 215 vm_contig_pg_alloc(unsigned long size, vm_paddr_t low, vm_paddr_t high, 216 unsigned long alignment, unsigned long boundary, int mflags) 217 { 218 int i, start, pass; 219 vm_offset_t phys; 220 vm_page_t pga = vm_page_array; 221 vm_page_t m; 222 int pqtype; 223 224 size = round_page(size); 225 if (size == 0) 226 panic("vm_contig_pg_alloc: size must not be 0"); 227 if ((alignment & (alignment - 1)) != 0) 228 panic("vm_contig_pg_alloc: alignment must be a power of 2"); 229 if ((boundary & (boundary - 1)) != 0) 230 panic("vm_contig_pg_alloc: boundary must be a power of 2"); 231 232 start = 0; 233 crit_enter(); 234 235 /* 236 * Three passes (0, 1, 2). Each pass scans the VM page list for 237 * free or cached pages. After each pass if the entire scan failed 238 * we attempt to flush inactive pages and reset the start index back 239 * to 0. For passes 1 and 2 we also attempt to flush active pages. 240 */ 241 for (pass = 0; pass < 3; pass++) { 242 /* 243 * Find first page in array that is free, within range, 244 * aligned, and such that the boundary won't be crossed. 245 */ 246 again: 247 for (i = start; i < vmstats.v_page_count; i++) { 248 m = &pga[i]; 249 phys = VM_PAGE_TO_PHYS(m); 250 pqtype = m->queue - m->pc; 251 if (((pqtype == PQ_FREE) || (pqtype == PQ_CACHE)) && 252 (phys >= low) && (phys < high) && 253 ((phys & (alignment - 1)) == 0) && 254 (((phys ^ (phys + size - 1)) & ~(boundary - 1)) == 0) && 255 m->busy == 0 && m->wire_count == 0 && 256 m->hold_count == 0 && (m->flags & PG_BUSY) == 0 257 258 ) { 259 break; 260 } 261 } 262 263 /* 264 * If we cannot find the page in the given range, or we have 265 * crossed the boundary, call the vm_contig_pg_clean() function 266 * for flushing out the queues, and returning it back to 267 * normal state. 268 */ 269 if ((i == vmstats.v_page_count) || 270 ((VM_PAGE_TO_PHYS(&pga[i]) + size) > high)) { 271 272 /* 273 * Best effort flush of all inactive pages. 274 * This is quite quick, for now stall all 275 * callers, even if they've specified M_NOWAIT. 276 */ 277 vm_contig_pg_flush(PQ_INACTIVE, 278 vmstats.v_inactive_count); 279 280 crit_exit(); /* give interrupts a chance */ 281 crit_enter(); 282 283 /* 284 * Best effort flush of active pages. 285 * 286 * This is very, very slow. 287 * Only do this if the caller has agreed to M_WAITOK. 288 * 289 * If enough pages are flushed, we may succeed on 290 * next (final) pass, if not the caller, contigmalloc(), 291 * will fail in the index < 0 case. 292 */ 293 if (pass > 0 && (mflags & M_WAITOK)) { 294 vm_contig_pg_flush (PQ_ACTIVE, 295 vmstats.v_active_count); 296 } 297 298 /* 299 * We're already too high in the address space 300 * to succeed, reset to 0 for the next iteration. 301 */ 302 start = 0; 303 crit_exit(); /* give interrupts a chance */ 304 crit_enter(); 305 continue; /* next pass */ 306 } 307 start = i; 308 309 /* 310 * Check successive pages for contiguous and free. 311 * 312 * (still in critical section) 313 */ 314 for (i = start + 1; i < (start + size / PAGE_SIZE); i++) { 315 m = &pga[i]; 316 pqtype = m->queue - m->pc; 317 if ((VM_PAGE_TO_PHYS(&m[0]) != 318 (VM_PAGE_TO_PHYS(&m[-1]) + PAGE_SIZE)) || 319 ((pqtype != PQ_FREE) && (pqtype != PQ_CACHE)) || 320 m->busy || m->wire_count || 321 m->hold_count || (m->flags & PG_BUSY) 322 ) { 323 start++; 324 goto again; 325 } 326 } 327 328 /* 329 * (still in critical section) 330 */ 331 for (i = start; i < (start + size / PAGE_SIZE); i++) { 332 m = &pga[i]; 333 pqtype = m->queue - m->pc; 334 if (pqtype == PQ_CACHE) { 335 vm_page_busy(m); 336 vm_page_free(m); 337 } 338 KKASSERT(m->object == NULL); 339 vm_page_unqueue_nowakeup(m); 340 m->valid = VM_PAGE_BITS_ALL; 341 if (m->flags & PG_ZERO) 342 vm_page_zero_count--; 343 /* Don't clear the PG_ZERO flag, we'll need it later. */ 344 m->flags &= PG_ZERO; 345 KASSERT(m->dirty == 0, 346 ("vm_contig_pg_alloc: page %p was dirty", m)); 347 m->wire_count = 0; 348 m->busy = 0; 349 } 350 351 /* 352 * Our job is done, return the index page of vm_page_array. 353 */ 354 crit_exit(); 355 return (start); /* aka &pga[start] */ 356 } 357 358 /* 359 * Failed. 360 */ 361 crit_exit(); 362 return (-1); 363 } 364 365 /* 366 * vm_contig_pg_free: 367 * 368 * Remove pages previously allocated by vm_contig_pg_alloc, and 369 * assume all references to the pages have been removed, and that 370 * it is OK to add them back to the free list. 371 * 372 * Caller must ensure no races on the page range in question. 373 * No other requirements. 374 */ 375 void 376 vm_contig_pg_free(int start, u_long size) 377 { 378 vm_page_t pga = vm_page_array; 379 vm_page_t m; 380 int i; 381 382 size = round_page(size); 383 if (size == 0) 384 panic("vm_contig_pg_free: size must not be 0"); 385 386 lwkt_gettoken(&vm_token); 387 for (i = start; i < (start + size / PAGE_SIZE); i++) { 388 m = &pga[i]; 389 vm_page_busy(m); 390 vm_page_free(m); 391 } 392 lwkt_reltoken(&vm_token); 393 } 394 395 /* 396 * vm_contig_pg_kmap: 397 * 398 * Map previously allocated (vm_contig_pg_alloc) range of pages from 399 * vm_page_array[] into the KVA. Once mapped, the pages are part of 400 * the Kernel, and are to free'ed with kmem_free(&kernel_map, addr, size). 401 * 402 * No requirements. 403 */ 404 vm_offset_t 405 vm_contig_pg_kmap(int start, u_long size, vm_map_t map, int flags) 406 { 407 vm_offset_t addr, tmp_addr; 408 vm_page_t pga = vm_page_array; 409 int i, count; 410 411 size = round_page(size); 412 if (size == 0) 413 panic("vm_contig_pg_kmap: size must not be 0"); 414 415 crit_enter(); 416 lwkt_gettoken(&vm_token); 417 418 /* 419 * We've found a contiguous chunk that meets our requirements. 420 * Allocate KVM, and assign phys pages and return a kernel VM 421 * pointer. 422 */ 423 count = vm_map_entry_reserve(MAP_RESERVE_COUNT); 424 vm_map_lock(map); 425 if (vm_map_findspace(map, vm_map_min(map), size, PAGE_SIZE, 0, &addr) != 426 KERN_SUCCESS) { 427 /* 428 * XXX We almost never run out of kernel virtual 429 * space, so we don't make the allocated memory 430 * above available. 431 */ 432 vm_map_unlock(map); 433 vm_map_entry_release(count); 434 lwkt_reltoken(&vm_token); 435 crit_exit(); 436 return (0); 437 } 438 439 /* 440 * kernel_object maps 1:1 to kernel_map. 441 */ 442 vm_object_reference(&kernel_object); 443 vm_map_insert(map, &count, 444 &kernel_object, addr, 445 addr, addr + size, 446 VM_MAPTYPE_NORMAL, 447 VM_PROT_ALL, VM_PROT_ALL, 448 0); 449 vm_map_unlock(map); 450 vm_map_entry_release(count); 451 452 tmp_addr = addr; 453 for (i = start; i < (start + size / PAGE_SIZE); i++) { 454 vm_page_t m = &pga[i]; 455 vm_page_insert(m, &kernel_object, OFF_TO_IDX(tmp_addr)); 456 if ((flags & M_ZERO) && !(m->flags & PG_ZERO)) 457 pmap_zero_page(VM_PAGE_TO_PHYS(m)); 458 m->flags = 0; 459 tmp_addr += PAGE_SIZE; 460 } 461 vm_map_wire(map, addr, addr + size, 0); 462 463 lwkt_reltoken(&vm_token); 464 crit_exit(); 465 return (addr); 466 } 467 468 /* 469 * No requirements. 470 */ 471 void * 472 contigmalloc( 473 unsigned long size, /* should be size_t here and for malloc() */ 474 struct malloc_type *type, 475 int flags, 476 vm_paddr_t low, 477 vm_paddr_t high, 478 unsigned long alignment, 479 unsigned long boundary) 480 { 481 return contigmalloc_map(size, type, flags, low, high, alignment, 482 boundary, &kernel_map); 483 } 484 485 /* 486 * No requirements. 487 */ 488 void * 489 contigmalloc_map( 490 unsigned long size, /* should be size_t here and for malloc() */ 491 struct malloc_type *type, 492 int flags, 493 vm_paddr_t low, 494 vm_paddr_t high, 495 unsigned long alignment, 496 unsigned long boundary, 497 vm_map_t map) 498 { 499 int index; 500 void *rv; 501 502 lwkt_gettoken(&vm_token); 503 index = vm_contig_pg_alloc(size, low, high, alignment, boundary, flags); 504 if (index < 0) { 505 kprintf("contigmalloc_map: failed size %lu low=%llx " 506 "high=%llx align=%lu boundary=%lu flags=%08x\n", 507 size, (long long)low, (long long)high, 508 alignment, boundary, flags); 509 lwkt_reltoken(&vm_token); 510 return NULL; 511 } 512 513 rv = (void *)vm_contig_pg_kmap(index, size, map, flags); 514 if (rv == NULL) 515 vm_contig_pg_free(index, size); 516 lwkt_reltoken(&vm_token); 517 518 return rv; 519 } 520 521 /* 522 * No requirements. 523 */ 524 void 525 contigfree(void *addr, unsigned long size, struct malloc_type *type) 526 { 527 kmem_free(&kernel_map, (vm_offset_t)addr, size); 528 } 529 530 /* 531 * No requirements. 532 */ 533 vm_offset_t 534 vm_page_alloc_contig( 535 vm_offset_t size, 536 vm_paddr_t low, 537 vm_paddr_t high, 538 vm_offset_t alignment) 539 { 540 return ((vm_offset_t)contigmalloc_map(size, M_DEVBUF, M_NOWAIT, low, 541 high, alignment, 0ul, &kernel_map)); 542 } 543