1 /* 2 * Copyright (c) 2003, 2004 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Hiten Pandya <hmp@backplane.com>. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 */ 35 /* 36 * Copyright (c) 1991 Regents of the University of California. 37 * All rights reserved. 38 * 39 * This code is derived from software contributed to Berkeley by 40 * The Mach Operating System project at Carnegie-Mellon University. 41 * 42 * Redistribution and use in source and binary forms, with or without 43 * modification, are permitted provided that the following conditions 44 * are met: 45 * 1. Redistributions of source code must retain the above copyright 46 * notice, this list of conditions and the following disclaimer. 47 * 2. Redistributions in binary form must reproduce the above copyright 48 * notice, this list of conditions and the following disclaimer in the 49 * documentation and/or other materials provided with the distribution. 50 * 3. Neither the name of the University nor the names of its contributors 51 * may be used to endorse or promote products derived from this software 52 * without specific prior written permission. 53 * 54 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 57 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 64 * SUCH DAMAGE. 65 * 66 * from: @(#)vm_page.c 7.4 (Berkeley) 5/7/91 67 * $DragonFly: src/sys/vm/vm_contig.c,v 1.21 2006/12/28 21:24:02 dillon Exp $ 68 */ 69 70 /* 71 * Copyright (c) 1987, 1990 Carnegie-Mellon University. 72 * All rights reserved. 73 * 74 * Authors: Avadis Tevanian, Jr., Michael Wayne Young 75 * 76 * Permission to use, copy, modify and distribute this software and 77 * its documentation is hereby granted, provided that both the copyright 78 * notice and this permission notice appear in all copies of the 79 * software, derivative works or modified versions, and any portions 80 * thereof, and that both notices appear in supporting documentation. 81 * 82 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 83 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 84 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 85 * 86 * Carnegie Mellon requests users of this software to return to 87 * 88 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 89 * School of Computer Science 90 * Carnegie Mellon University 91 * Pittsburgh PA 15213-3890 92 * 93 * any improvements or extensions that they make and grant Carnegie the 94 * rights to redistribute these changes. 95 */ 96 97 /* 98 * Contiguous memory allocation API. 99 */ 100 101 #include <sys/param.h> 102 #include <sys/systm.h> 103 #include <sys/malloc.h> 104 #include <sys/proc.h> 105 #include <sys/lock.h> 106 #include <sys/vmmeter.h> 107 #include <sys/vnode.h> 108 109 #include <vm/vm.h> 110 #include <vm/vm_param.h> 111 #include <vm/vm_kern.h> 112 #include <vm/pmap.h> 113 #include <vm/vm_map.h> 114 #include <vm/vm_object.h> 115 #include <vm/vm_page.h> 116 #include <vm/vm_pageout.h> 117 #include <vm/vm_pager.h> 118 #include <vm/vm_extern.h> 119 120 #include <sys/thread2.h> 121 #include <sys/spinlock2.h> 122 #include <vm/vm_page2.h> 123 124 #include <machine/bus_dma.h> 125 126 static void vm_contig_pg_free(vm_pindex_t start, u_long size); 127 128 /* 129 * vm_contig_pg_clean: 130 * 131 * Do a thorough cleanup of the specified 'queue', which can be either 132 * PQ_ACTIVE or PQ_INACTIVE by doing a walkthrough. If the page is not 133 * marked dirty, it is shoved into the page cache, provided no one has 134 * currently aqcuired it, otherwise localized action per object type 135 * is taken for cleanup: 136 * 137 * In the OBJT_VNODE case, the whole page range is cleaned up 138 * using the vm_object_page_clean() routine, by specyfing a 139 * start and end of '0'. 140 * 141 * Otherwise if the object is of any other type, the generic 142 * pageout (daemon) flush routine is invoked. 143 */ 144 static void 145 vm_contig_pg_clean(int queue, vm_pindex_t count) 146 { 147 vm_object_t object; 148 vm_page_t m, m_tmp; 149 struct vm_page marker; 150 struct vpgqueues *pq = &vm_page_queues[queue]; 151 152 /* 153 * Setup a local marker 154 */ 155 bzero(&marker, sizeof(marker)); 156 marker.flags = PG_FICTITIOUS | PG_MARKER; 157 marker.busy_count = PBUSY_LOCKED; 158 marker.queue = queue; 159 marker.wire_count = 1; 160 161 vm_page_queues_spin_lock(queue); 162 TAILQ_INSERT_HEAD(&pq->pl, &marker, pageq); 163 vm_page_queues_spin_unlock(queue); 164 165 /* 166 * Iterate the queue. Note that the vm_page spinlock must be 167 * acquired before the pageq spinlock so it's easiest to simply 168 * not hold it in the loop iteration. 169 */ 170 while ((long)count-- > 0 && 171 (m = TAILQ_NEXT(&marker, pageq)) != NULL) { 172 vm_page_and_queue_spin_lock(m); 173 if (m != TAILQ_NEXT(&marker, pageq)) { 174 vm_page_and_queue_spin_unlock(m); 175 ++count; 176 continue; 177 } 178 KKASSERT(m->queue == queue); 179 180 TAILQ_REMOVE(&pq->pl, &marker, pageq); 181 TAILQ_INSERT_AFTER(&pq->pl, m, &marker, pageq); 182 183 if (m->flags & PG_MARKER) { 184 vm_page_and_queue_spin_unlock(m); 185 continue; 186 } 187 if (vm_page_busy_try(m, TRUE)) { 188 vm_page_and_queue_spin_unlock(m); 189 continue; 190 } 191 vm_page_and_queue_spin_unlock(m); 192 193 /* 194 * We've successfully busied the page 195 */ 196 if (m->queue - m->pc != queue) { 197 vm_page_wakeup(m); 198 continue; 199 } 200 if (m->wire_count || m->hold_count) { 201 vm_page_wakeup(m); 202 continue; 203 } 204 if ((object = m->object) == NULL) { 205 vm_page_wakeup(m); 206 continue; 207 } 208 vm_page_test_dirty(m); 209 if (m->dirty || (m->flags & PG_NEED_COMMIT)) { 210 vm_object_hold(object); 211 KKASSERT(m->object == object); 212 213 if (object->type == OBJT_VNODE) { 214 vm_page_wakeup(m); 215 vn_lock(object->handle, LK_EXCLUSIVE|LK_RETRY); 216 vm_object_page_clean(object, 0, 0, OBJPC_SYNC); 217 vn_unlock(((struct vnode *)object->handle)); 218 } else if (object->type == OBJT_SWAP || 219 object->type == OBJT_DEFAULT) { 220 m_tmp = m; 221 vm_pageout_flush(&m_tmp, 1, 0); 222 } else { 223 vm_page_wakeup(m); 224 } 225 vm_object_drop(object); 226 } else if (m->hold_count == 0) { 227 vm_page_cache(m); 228 } else { 229 vm_page_wakeup(m); 230 } 231 } 232 233 /* 234 * Scrap our local marker 235 */ 236 vm_page_queues_spin_lock(queue); 237 TAILQ_REMOVE(&pq->pl, &marker, pageq); 238 vm_page_queues_spin_unlock(queue); 239 } 240 241 /* 242 * vm_contig_pg_alloc: 243 * 244 * Allocate contiguous pages from the VM. This function does not 245 * map the allocated pages into the kernel map, otherwise it is 246 * impossible to make large allocations (i.e. >2G). 247 * 248 * Malloc()'s data structures have been used for collection of 249 * statistics and for allocations of less than a page. 250 */ 251 static vm_pindex_t 252 vm_contig_pg_alloc(unsigned long size, vm_paddr_t low, vm_paddr_t high, 253 unsigned long alignment, unsigned long boundary, int mflags) 254 { 255 vm_pindex_t i, q, start; 256 vm_offset_t phys; 257 vm_page_t pga = vm_page_array; 258 vm_page_t m; 259 int pass; 260 int pqtype; 261 262 size = round_page(size); 263 if (size == 0) 264 panic("vm_contig_pg_alloc: size must not be 0"); 265 if ((alignment & (alignment - 1)) != 0) 266 panic("vm_contig_pg_alloc: alignment must be a power of 2"); 267 if ((boundary & (boundary - 1)) != 0) 268 panic("vm_contig_pg_alloc: boundary must be a power of 2"); 269 270 /* 271 * See if we can get the pages from the contiguous page reserve 272 * alist. The returned pages will be allocated and wired but not 273 * busied. 274 * 275 * If high is not set to BUS_SPACE_MAXADDR we try using our 276 * free memory reserve first, otherwise we try it last. 277 * 278 * XXX Always use the dma reserve first for performance, until 279 * we find a better way to differentiate the DRM API. 280 */ 281 #if 0 282 if (high != BUS_SPACE_MAXADDR) 283 #endif 284 { 285 m = vm_page_alloc_contig( 286 low, high, alignment, boundary, 287 size, VM_MEMATTR_DEFAULT); 288 if (m) 289 return (m - &pga[0]); 290 } 291 292 /* 293 * Three passes (0, 1, 2). Each pass scans the VM page list for 294 * free or cached pages. After each pass if the entire scan failed 295 * we attempt to flush inactive pages and reset the start index back 296 * to 0. For passes 1 and 2 we also attempt to flush active pages. 297 */ 298 start = 0; 299 for (pass = 0; pass < 3; pass++) { 300 /* 301 * Find first page in array that is free, within range, 302 * aligned, and such that the boundary won't be crossed. 303 */ 304 again: 305 for (i = start; i < vmstats.v_page_count; i++) { 306 m = &pga[i]; 307 phys = VM_PAGE_TO_PHYS(m); 308 pqtype = m->queue - m->pc; 309 if (((pqtype == PQ_FREE) || (pqtype == PQ_CACHE)) && 310 (phys >= low) && (phys < high) && 311 ((phys & (alignment - 1)) == 0) && 312 (((phys ^ (phys + size - 1)) & /* bitwise and */ 313 ~(boundary - 1)) == 0) && 314 m->wire_count == 0 && m->hold_count == 0 && 315 (m->busy_count & 316 (PBUSY_LOCKED | PBUSY_MASK)) == 0 && 317 (m->flags & PG_NEED_COMMIT) == 0) 318 { 319 break; 320 } 321 } 322 323 /* 324 * If we cannot find the page in the given range, or we have 325 * crossed the boundary, call the vm_contig_pg_clean() function 326 * for flushing out the queues, and returning it back to 327 * normal state. 328 */ 329 if ((i == vmstats.v_page_count) || 330 ((VM_PAGE_TO_PHYS(&pga[i]) + size) > high)) { 331 332 /* 333 * Best effort flush of all inactive pages. 334 * This is quite quick, for now stall all 335 * callers, even if they've specified M_NOWAIT. 336 */ 337 for (q = 0; q < PQ_L2_SIZE; ++q) { 338 vm_contig_pg_clean(PQ_INACTIVE + q, 339 vmstats.v_inactive_count); 340 lwkt_yield(); 341 } 342 343 /* 344 * Best effort flush of active pages. 345 * 346 * This is very, very slow. 347 * Only do this if the caller has agreed to M_WAITOK. 348 * 349 * If enough pages are flushed, we may succeed on 350 * next (final) pass, if not the caller, contigmalloc(), 351 * will fail in the index < 0 case. 352 */ 353 if (pass > 0 && (mflags & M_WAITOK)) { 354 for (q = 0; q < PQ_L2_SIZE; ++q) { 355 vm_contig_pg_clean(PQ_ACTIVE + q, 356 vmstats.v_active_count); 357 } 358 lwkt_yield(); 359 } 360 361 /* 362 * We're already too high in the address space 363 * to succeed, reset to 0 for the next iteration. 364 */ 365 start = 0; 366 continue; /* next pass */ 367 } 368 start = i; 369 370 /* 371 * Check successive pages for contiguous and free. 372 * 373 * (still in critical section) 374 */ 375 for (i = start + 1; i < (start + size / PAGE_SIZE); i++) { 376 m = &pga[i]; 377 pqtype = m->queue - m->pc; 378 if ((VM_PAGE_TO_PHYS(&m[0]) != 379 (VM_PAGE_TO_PHYS(&m[-1]) + PAGE_SIZE)) || 380 ((pqtype != PQ_FREE) && (pqtype != PQ_CACHE)) || 381 m->wire_count || 382 m->hold_count || 383 (m->busy_count & (PBUSY_LOCKED | PBUSY_MASK)) || 384 (m->flags & PG_NEED_COMMIT)) 385 { 386 start++; 387 goto again; 388 } 389 } 390 391 /* 392 * Try to allocate the pages, wiring them as we go. 393 * 394 * (still in critical section) 395 */ 396 for (i = start; i < (start + size / PAGE_SIZE); i++) { 397 m = &pga[i]; 398 399 if (vm_page_busy_try(m, TRUE)) { 400 vm_contig_pg_free(start, 401 (i - start) * PAGE_SIZE); 402 start++; 403 goto again; 404 } 405 pqtype = m->queue - m->pc; 406 if (pqtype == PQ_CACHE && 407 m->hold_count == 0 && 408 m->wire_count == 0 && 409 (m->flags & (PG_UNMANAGED | PG_NEED_COMMIT)) == 0) { 410 vm_page_protect(m, VM_PROT_NONE); 411 KKASSERT((m->flags & PG_MAPPED) == 0); 412 KKASSERT(m->dirty == 0); 413 vm_page_free(m); 414 --i; 415 continue; /* retry the page */ 416 } 417 if (pqtype != PQ_FREE || m->hold_count) { 418 vm_page_wakeup(m); 419 vm_contig_pg_free(start, 420 (i - start) * PAGE_SIZE); 421 start++; 422 goto again; 423 } 424 KKASSERT((m->valid & m->dirty) == 0); 425 KKASSERT(m->wire_count == 0); 426 KKASSERT(m->object == NULL); 427 vm_page_unqueue_nowakeup(m); 428 m->valid = VM_PAGE_BITS_ALL; 429 KASSERT(m->dirty == 0, 430 ("vm_contig_pg_alloc: page %p was dirty", m)); 431 KKASSERT(m->wire_count == 0); 432 KKASSERT((m->busy_count & PBUSY_MASK) == 0); 433 434 /* 435 * Clear all flags. Then unbusy the now allocated 436 * page. 437 */ 438 vm_page_flag_clear(m, ~PG_KEEP_NEWPAGE_MASK); 439 vm_page_wire(m); 440 vm_page_wakeup(m); 441 } 442 443 /* 444 * Our job is done, return the index page of vm_page_array. 445 */ 446 return (start); /* aka &pga[start] */ 447 } 448 449 #if 0 450 /* 451 * Failed, if we haven't already tried, allocate from our reserved 452 * dma memory. 453 * 454 * XXX (see conditionalized code above) 455 */ 456 if (high == BUS_SPACE_MAXADDR) { 457 m = vm_page_alloc_contig( 458 low, high, alignment, boundary, 459 size, VM_MEMATTR_DEFAULT); 460 if (m) 461 return (m - &pga[0]); 462 } 463 #endif 464 465 /* 466 * Failed. 467 */ 468 return ((vm_pindex_t)-1); 469 } 470 471 /* 472 * vm_contig_pg_free: 473 * 474 * Remove pages previously allocated by vm_contig_pg_alloc, and 475 * assume all references to the pages have been removed, and that 476 * it is OK to add them back to the free list. 477 * 478 * Caller must ensure no races on the page range in question. 479 * No other requirements. 480 */ 481 static void 482 vm_contig_pg_free(vm_pindex_t start, u_long size) 483 { 484 vm_page_t pga = vm_page_array; 485 486 size = round_page(size); 487 if (size == 0) 488 panic("vm_contig_pg_free: size must not be 0"); 489 490 /* 491 * The pages are wired, vm_page_free_contig() determines whether they 492 * belong to the contig space or not and either frees them to that 493 * space (leaving them wired), or unwires the page and frees it to the 494 * normal PQ_FREE queue. 495 */ 496 vm_page_free_contig(&pga[start], size); 497 } 498 499 /* 500 * vm_contig_pg_kmap: 501 * 502 * Map previously allocated (vm_contig_pg_alloc) range of pages from 503 * vm_page_array[] into the KVA. Once mapped, the pages are part of 504 * the Kernel, and are to free'ed with kmem_free(&kernel_map, addr, size). 505 * 506 * No requirements. 507 */ 508 static vm_offset_t 509 vm_contig_pg_kmap(vm_pindex_t start, u_long size, vm_map_t map, int flags) 510 { 511 vm_offset_t addr; 512 vm_paddr_t pa; 513 vm_page_t pga = vm_page_array; 514 u_long offset; 515 516 if (size == 0) 517 panic("vm_contig_pg_kmap: size must not be 0"); 518 size = round_page(size); 519 addr = kmem_alloc_pageable(&kernel_map, size, VM_SUBSYS_CONTIG); 520 if (addr) { 521 pa = VM_PAGE_TO_PHYS(&pga[start]); 522 for (offset = 0; offset < size; offset += PAGE_SIZE) 523 pmap_kenter_noinval(addr + offset, pa + offset); 524 pmap_invalidate_range(&kernel_pmap, addr, addr + size); 525 if (flags & M_ZERO) 526 bzero((void *)addr, size); 527 } 528 return(addr); 529 } 530 531 /* 532 * No requirements. 533 */ 534 void * 535 contigmalloc( 536 unsigned long size, /* should be size_t here and for malloc() */ 537 struct malloc_type *type, 538 int flags, 539 vm_paddr_t low, 540 vm_paddr_t high, 541 unsigned long alignment, 542 unsigned long boundary) 543 { 544 return contigmalloc_map(size, type, flags, low, high, alignment, 545 boundary, &kernel_map); 546 } 547 548 /* 549 * No requirements. 550 */ 551 void * 552 contigmalloc_map(unsigned long size, struct malloc_type *type, 553 int flags, vm_paddr_t low, vm_paddr_t high, 554 unsigned long alignment, unsigned long boundary, 555 vm_map_t map) 556 { 557 vm_pindex_t index; 558 void *rv; 559 560 index = vm_contig_pg_alloc(size, low, high, alignment, boundary, flags); 561 if (index == (vm_pindex_t)-1) { 562 kprintf("contigmalloc_map: failed size %lu low=%llx " 563 "high=%llx align=%lu boundary=%lu flags=%08x\n", 564 size, (long long)low, (long long)high, 565 alignment, boundary, flags); 566 return NULL; 567 } 568 569 rv = (void *)vm_contig_pg_kmap(index, size, map, flags); 570 if (rv == NULL) 571 vm_contig_pg_free(index, size); 572 573 return rv; 574 } 575 576 /* 577 * No requirements. 578 */ 579 void 580 contigfree(void *addr, unsigned long size, struct malloc_type *type) 581 { 582 vm_paddr_t pa; 583 vm_page_t m; 584 585 if (size == 0) 586 panic("vm_contig_pg_kmap: size must not be 0"); 587 size = round_page(size); 588 589 pa = pmap_kextract((vm_offset_t)addr); 590 pmap_qremove((vm_offset_t)addr, size / PAGE_SIZE); 591 kmem_free(&kernel_map, (vm_offset_t)addr, size); 592 593 m = PHYS_TO_VM_PAGE(pa); 594 vm_page_free_contig(m, size); 595 } 596 597 /* 598 * No requirements. 599 */ 600 vm_offset_t 601 kmem_alloc_contig(vm_offset_t size, vm_paddr_t low, vm_paddr_t high, 602 vm_offset_t alignment) 603 { 604 return ((vm_offset_t)contigmalloc_map(size, M_DEVBUF, M_NOWAIT, low, 605 high, alignment, 0ul, &kernel_map)); 606 } 607