1 /* 2 * (MPSAFE) 3 * 4 * Copyright (c) 2003, 2004 The DragonFly Project. All rights reserved. 5 * 6 * This code is derived from software contributed to The DragonFly Project 7 * by Hiten Pandya <hmp@backplane.com>. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in 17 * the documentation and/or other materials provided with the 18 * distribution. 19 * 3. Neither the name of The DragonFly Project nor the names of its 20 * contributors may be used to endorse or promote products derived 21 * from this software without specific, prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 24 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 25 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 26 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 27 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 28 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 29 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 30 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 31 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 32 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 33 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 */ 37 /* 38 * Copyright (c) 1991 Regents of the University of California. 39 * All rights reserved. 40 * 41 * This code is derived from software contributed to Berkeley by 42 * The Mach Operating System project at Carnegie-Mellon University. 43 * 44 * Redistribution and use in source and binary forms, with or without 45 * modification, are permitted provided that the following conditions 46 * are met: 47 * 1. Redistributions of source code must retain the above copyright 48 * notice, this list of conditions and the following disclaimer. 49 * 2. Redistributions in binary form must reproduce the above copyright 50 * notice, this list of conditions and the following disclaimer in the 51 * documentation and/or other materials provided with the distribution. 52 * 3. Neither the name of the University nor the names of its contributors 53 * may be used to endorse or promote products derived from this software 54 * without specific prior written permission. 55 * 56 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 57 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 58 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 59 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 60 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 61 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 62 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 63 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 64 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 65 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 66 * SUCH DAMAGE. 67 * 68 * from: @(#)vm_page.c 7.4 (Berkeley) 5/7/91 69 * $DragonFly: src/sys/vm/vm_contig.c,v 1.21 2006/12/28 21:24:02 dillon Exp $ 70 */ 71 72 /* 73 * Copyright (c) 1987, 1990 Carnegie-Mellon University. 74 * All rights reserved. 75 * 76 * Authors: Avadis Tevanian, Jr., Michael Wayne Young 77 * 78 * Permission to use, copy, modify and distribute this software and 79 * its documentation is hereby granted, provided that both the copyright 80 * notice and this permission notice appear in all copies of the 81 * software, derivative works or modified versions, and any portions 82 * thereof, and that both notices appear in supporting documentation. 83 * 84 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 85 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 86 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 87 * 88 * Carnegie Mellon requests users of this software to return to 89 * 90 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 91 * School of Computer Science 92 * Carnegie Mellon University 93 * Pittsburgh PA 15213-3890 94 * 95 * any improvements or extensions that they make and grant Carnegie the 96 * rights to redistribute these changes. 97 */ 98 99 /* 100 * Contiguous memory allocation API. 101 */ 102 103 #include <sys/param.h> 104 #include <sys/systm.h> 105 #include <sys/malloc.h> 106 #include <sys/proc.h> 107 #include <sys/lock.h> 108 #include <sys/vmmeter.h> 109 #include <sys/vnode.h> 110 111 #include <vm/vm.h> 112 #include <vm/vm_param.h> 113 #include <vm/vm_kern.h> 114 #include <vm/pmap.h> 115 #include <vm/vm_map.h> 116 #include <vm/vm_object.h> 117 #include <vm/vm_page.h> 118 #include <vm/vm_pageout.h> 119 #include <vm/vm_pager.h> 120 #include <vm/vm_extern.h> 121 122 #include <sys/thread2.h> 123 #include <vm/vm_page2.h> 124 125 /* 126 * vm_contig_pg_clean: 127 * 128 * Do a thorough cleanup of the specified 'queue', which can be either 129 * PQ_ACTIVE or PQ_INACTIVE by doing a walkthrough. If the page is not 130 * marked dirty, it is shoved into the page cache, provided no one has 131 * currently aqcuired it, otherwise localized action per object type 132 * is taken for cleanup: 133 * 134 * In the OBJT_VNODE case, the whole page range is cleaned up 135 * using the vm_object_page_clean() routine, by specyfing a 136 * start and end of '0'. 137 * 138 * Otherwise if the object is of any other type, the generic 139 * pageout (daemon) flush routine is invoked. 140 * 141 * The caller must hold vm_token. 142 */ 143 static int 144 vm_contig_pg_clean(int queue) 145 { 146 vm_object_t object; 147 vm_page_t m, m_tmp, next; 148 149 ASSERT_LWKT_TOKEN_HELD(&vm_token); 150 151 for (m = TAILQ_FIRST(&vm_page_queues[queue].pl); m != NULL; m = next) { 152 KASSERT(m->queue == queue, 153 ("vm_contig_clean: page %p's queue is not %d", 154 m, queue)); 155 next = TAILQ_NEXT(m, pageq); 156 157 if (m->flags & PG_MARKER) 158 continue; 159 160 if (vm_page_sleep_busy(m, TRUE, "vpctw0")) 161 return (TRUE); 162 163 vm_page_test_dirty(m); 164 if (m->dirty) { 165 object = m->object; 166 if (object->type == OBJT_VNODE) { 167 vn_lock(object->handle, LK_EXCLUSIVE|LK_RETRY); 168 vm_object_page_clean(object, 0, 0, OBJPC_SYNC); 169 vn_unlock(((struct vnode *)object->handle)); 170 return (TRUE); 171 } else if (object->type == OBJT_SWAP || 172 object->type == OBJT_DEFAULT) { 173 m_tmp = m; 174 vm_pageout_flush(&m_tmp, 1, 0); 175 return (TRUE); 176 } 177 } 178 KKASSERT(m->busy == 0); 179 if (m->dirty == 0 && m->hold_count == 0) { 180 vm_page_busy(m); 181 vm_page_cache(m); 182 } 183 } 184 return (FALSE); 185 } 186 187 /* 188 * vm_contig_pg_flush: 189 * 190 * Attempt to flush (count) pages from the given page queue. This may or 191 * may not succeed. Take up to <count> passes and delay 1/20 of a second 192 * between each pass. 193 * 194 * The caller must hold vm_token. 195 */ 196 static void 197 vm_contig_pg_flush(int queue, int count) 198 { 199 while (count > 0) { 200 if (!vm_contig_pg_clean(queue)) 201 break; 202 --count; 203 } 204 } 205 /* 206 * vm_contig_pg_alloc: 207 * 208 * Allocate contiguous pages from the VM. This function does not 209 * map the allocated pages into the kernel map, otherwise it is 210 * impossible to make large allocations (i.e. >2G). 211 * 212 * Malloc()'s data structures have been used for collection of 213 * statistics and for allocations of less than a page. 214 * 215 * The caller must hold vm_token. 216 */ 217 static int 218 vm_contig_pg_alloc(unsigned long size, vm_paddr_t low, vm_paddr_t high, 219 unsigned long alignment, unsigned long boundary, int mflags) 220 { 221 int i, start, pass; 222 vm_offset_t phys; 223 vm_page_t pga = vm_page_array; 224 vm_page_t m; 225 int pqtype; 226 227 size = round_page(size); 228 if (size == 0) 229 panic("vm_contig_pg_alloc: size must not be 0"); 230 if ((alignment & (alignment - 1)) != 0) 231 panic("vm_contig_pg_alloc: alignment must be a power of 2"); 232 if ((boundary & (boundary - 1)) != 0) 233 panic("vm_contig_pg_alloc: boundary must be a power of 2"); 234 235 start = 0; 236 crit_enter(); 237 238 /* 239 * Three passes (0, 1, 2). Each pass scans the VM page list for 240 * free or cached pages. After each pass if the entire scan failed 241 * we attempt to flush inactive pages and reset the start index back 242 * to 0. For passes 1 and 2 we also attempt to flush active pages. 243 */ 244 for (pass = 0; pass < 3; pass++) { 245 /* 246 * Find first page in array that is free, within range, 247 * aligned, and such that the boundary won't be crossed. 248 */ 249 again: 250 for (i = start; i < vmstats.v_page_count; i++) { 251 m = &pga[i]; 252 phys = VM_PAGE_TO_PHYS(m); 253 pqtype = m->queue - m->pc; 254 if (((pqtype == PQ_FREE) || (pqtype == PQ_CACHE)) && 255 (phys >= low) && (phys < high) && 256 ((phys & (alignment - 1)) == 0) && 257 (((phys ^ (phys + size - 1)) & ~(boundary - 1)) == 0) && 258 m->busy == 0 && m->wire_count == 0 && 259 m->hold_count == 0 && (m->flags & PG_BUSY) == 0 260 261 ) { 262 break; 263 } 264 } 265 266 /* 267 * If we cannot find the page in the given range, or we have 268 * crossed the boundary, call the vm_contig_pg_clean() function 269 * for flushing out the queues, and returning it back to 270 * normal state. 271 */ 272 if ((i == vmstats.v_page_count) || 273 ((VM_PAGE_TO_PHYS(&pga[i]) + size) > high)) { 274 275 /* 276 * Best effort flush of all inactive pages. 277 * This is quite quick, for now stall all 278 * callers, even if they've specified M_NOWAIT. 279 */ 280 vm_contig_pg_flush(PQ_INACTIVE, 281 vmstats.v_inactive_count); 282 283 crit_exit(); /* give interrupts a chance */ 284 crit_enter(); 285 286 /* 287 * Best effort flush of active pages. 288 * 289 * This is very, very slow. 290 * Only do this if the caller has agreed to M_WAITOK. 291 * 292 * If enough pages are flushed, we may succeed on 293 * next (final) pass, if not the caller, contigmalloc(), 294 * will fail in the index < 0 case. 295 */ 296 if (pass > 0 && (mflags & M_WAITOK)) { 297 vm_contig_pg_flush (PQ_ACTIVE, 298 vmstats.v_active_count); 299 } 300 301 /* 302 * We're already too high in the address space 303 * to succeed, reset to 0 for the next iteration. 304 */ 305 start = 0; 306 crit_exit(); /* give interrupts a chance */ 307 crit_enter(); 308 continue; /* next pass */ 309 } 310 start = i; 311 312 /* 313 * Check successive pages for contiguous and free. 314 * 315 * (still in critical section) 316 */ 317 for (i = start + 1; i < (start + size / PAGE_SIZE); i++) { 318 m = &pga[i]; 319 pqtype = m->queue - m->pc; 320 if ((VM_PAGE_TO_PHYS(&m[0]) != 321 (VM_PAGE_TO_PHYS(&m[-1]) + PAGE_SIZE)) || 322 ((pqtype != PQ_FREE) && (pqtype != PQ_CACHE)) || 323 m->busy || m->wire_count || 324 m->hold_count || (m->flags & PG_BUSY) 325 ) { 326 start++; 327 goto again; 328 } 329 } 330 331 /* 332 * (still in critical section) 333 */ 334 for (i = start; i < (start + size / PAGE_SIZE); i++) { 335 m = &pga[i]; 336 pqtype = m->queue - m->pc; 337 if (pqtype == PQ_CACHE) { 338 vm_page_busy(m); 339 vm_page_free(m); 340 } 341 KKASSERT(m->object == NULL); 342 vm_page_unqueue_nowakeup(m); 343 m->valid = VM_PAGE_BITS_ALL; 344 if (m->flags & PG_ZERO) 345 vm_page_zero_count--; 346 /* Don't clear the PG_ZERO flag, we'll need it later. */ 347 m->flags &= PG_ZERO; 348 KASSERT(m->dirty == 0, 349 ("vm_contig_pg_alloc: page %p was dirty", m)); 350 m->wire_count = 0; 351 m->busy = 0; 352 } 353 354 /* 355 * Our job is done, return the index page of vm_page_array. 356 */ 357 crit_exit(); 358 return (start); /* aka &pga[start] */ 359 } 360 361 /* 362 * Failed. 363 */ 364 crit_exit(); 365 return (-1); 366 } 367 368 /* 369 * vm_contig_pg_free: 370 * 371 * Remove pages previously allocated by vm_contig_pg_alloc, and 372 * assume all references to the pages have been removed, and that 373 * it is OK to add them back to the free list. 374 * 375 * Caller must ensure no races on the page range in question. 376 * No other requirements. 377 */ 378 void 379 vm_contig_pg_free(int start, u_long size) 380 { 381 vm_page_t pga = vm_page_array; 382 vm_page_t m; 383 int i; 384 385 size = round_page(size); 386 if (size == 0) 387 panic("vm_contig_pg_free: size must not be 0"); 388 389 lwkt_gettoken(&vm_token); 390 for (i = start; i < (start + size / PAGE_SIZE); i++) { 391 m = &pga[i]; 392 vm_page_busy(m); 393 vm_page_free(m); 394 } 395 lwkt_reltoken(&vm_token); 396 } 397 398 /* 399 * vm_contig_pg_kmap: 400 * 401 * Map previously allocated (vm_contig_pg_alloc) range of pages from 402 * vm_page_array[] into the KVA. Once mapped, the pages are part of 403 * the Kernel, and are to free'ed with kmem_free(&kernel_map, addr, size). 404 * 405 * No requirements. 406 */ 407 vm_offset_t 408 vm_contig_pg_kmap(int start, u_long size, vm_map_t map, int flags) 409 { 410 vm_offset_t addr, tmp_addr; 411 vm_page_t pga = vm_page_array; 412 int i, count; 413 414 size = round_page(size); 415 if (size == 0) 416 panic("vm_contig_pg_kmap: size must not be 0"); 417 418 crit_enter(); 419 lwkt_gettoken(&vm_token); 420 421 /* 422 * We've found a contiguous chunk that meets our requirements. 423 * Allocate KVM, and assign phys pages and return a kernel VM 424 * pointer. 425 */ 426 count = vm_map_entry_reserve(MAP_RESERVE_COUNT); 427 vm_map_lock(map); 428 if (vm_map_findspace(map, vm_map_min(map), size, PAGE_SIZE, 0, &addr) != 429 KERN_SUCCESS) { 430 /* 431 * XXX We almost never run out of kernel virtual 432 * space, so we don't make the allocated memory 433 * above available. 434 */ 435 vm_map_unlock(map); 436 vm_map_entry_release(count); 437 lwkt_reltoken(&vm_token); 438 crit_exit(); 439 return (0); 440 } 441 442 /* 443 * kernel_object maps 1:1 to kernel_map. 444 */ 445 vm_object_reference(&kernel_object); 446 vm_map_insert(map, &count, 447 &kernel_object, addr, 448 addr, addr + size, 449 VM_MAPTYPE_NORMAL, 450 VM_PROT_ALL, VM_PROT_ALL, 451 0); 452 vm_map_unlock(map); 453 vm_map_entry_release(count); 454 455 tmp_addr = addr; 456 for (i = start; i < (start + size / PAGE_SIZE); i++) { 457 vm_page_t m = &pga[i]; 458 vm_page_insert(m, &kernel_object, OFF_TO_IDX(tmp_addr)); 459 if ((flags & M_ZERO) && !(m->flags & PG_ZERO)) 460 pmap_zero_page(VM_PAGE_TO_PHYS(m)); 461 m->flags = 0; 462 tmp_addr += PAGE_SIZE; 463 } 464 vm_map_wire(map, addr, addr + size, 0); 465 466 lwkt_reltoken(&vm_token); 467 crit_exit(); 468 return (addr); 469 } 470 471 /* 472 * No requirements. 473 */ 474 void * 475 contigmalloc( 476 unsigned long size, /* should be size_t here and for malloc() */ 477 struct malloc_type *type, 478 int flags, 479 vm_paddr_t low, 480 vm_paddr_t high, 481 unsigned long alignment, 482 unsigned long boundary) 483 { 484 return contigmalloc_map(size, type, flags, low, high, alignment, 485 boundary, &kernel_map); 486 } 487 488 /* 489 * No requirements. 490 */ 491 void * 492 contigmalloc_map( 493 unsigned long size, /* should be size_t here and for malloc() */ 494 struct malloc_type *type, 495 int flags, 496 vm_paddr_t low, 497 vm_paddr_t high, 498 unsigned long alignment, 499 unsigned long boundary, 500 vm_map_t map) 501 { 502 int index; 503 void *rv; 504 505 lwkt_gettoken(&vm_token); 506 index = vm_contig_pg_alloc(size, low, high, alignment, boundary, flags); 507 if (index < 0) { 508 kprintf("contigmalloc_map: failed size %lu low=%llx " 509 "high=%llx align=%lu boundary=%lu flags=%08x\n", 510 size, (long long)low, (long long)high, 511 alignment, boundary, flags); 512 lwkt_reltoken(&vm_token); 513 return NULL; 514 } 515 516 rv = (void *)vm_contig_pg_kmap(index, size, map, flags); 517 if (rv == NULL) 518 vm_contig_pg_free(index, size); 519 lwkt_reltoken(&vm_token); 520 521 return rv; 522 } 523 524 /* 525 * No requirements. 526 */ 527 void 528 contigfree(void *addr, unsigned long size, struct malloc_type *type) 529 { 530 kmem_free(&kernel_map, (vm_offset_t)addr, size); 531 } 532 533 /* 534 * No requirements. 535 */ 536 vm_offset_t 537 vm_page_alloc_contig( 538 vm_offset_t size, 539 vm_paddr_t low, 540 vm_paddr_t high, 541 vm_offset_t alignment) 542 { 543 return ((vm_offset_t)contigmalloc_map(size, M_DEVBUF, M_NOWAIT, low, 544 high, alignment, 0ul, &kernel_map)); 545 } 546