1 /* 2 * Copyright (c) 1991 Regents of the University of California. 3 * All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * The Mach Operating System project at Carnegie-Mellon University. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * from: @(#)vm_page.c 7.4 (Berkeley) 5/7/91 33 * $FreeBSD: src/sys/vm/vm_page.c,v 1.147.2.18 2002/03/10 05:03:19 alc Exp $ 34 */ 35 36 /* 37 * Copyright (c) 1987, 1990 Carnegie-Mellon University. 38 * All rights reserved. 39 * 40 * Authors: Avadis Tevanian, Jr., Michael Wayne Young 41 * 42 * Permission to use, copy, modify and distribute this software and 43 * its documentation is hereby granted, provided that both the copyright 44 * notice and this permission notice appear in all copies of the 45 * software, derivative works or modified versions, and any portions 46 * thereof, and that both notices appear in supporting documentation. 47 * 48 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 49 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 50 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 51 * 52 * Carnegie Mellon requests users of this software to return to 53 * 54 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 55 * School of Computer Science 56 * Carnegie Mellon University 57 * Pittsburgh PA 15213-3890 58 * 59 * any improvements or extensions that they make and grant Carnegie the 60 * rights to redistribute these changes. 61 */ 62 /* 63 * Resident memory management module. The module manipulates 'VM pages'. 64 * A VM page is the core building block for memory management. 65 */ 66 67 #include <sys/param.h> 68 #include <sys/systm.h> 69 #include <sys/malloc.h> 70 #include <sys/proc.h> 71 #include <sys/vmmeter.h> 72 #include <sys/vnode.h> 73 #include <sys/kernel.h> 74 #include <sys/alist.h> 75 #include <sys/sysctl.h> 76 #include <sys/cpu_topology.h> 77 78 #include <vm/vm.h> 79 #include <vm/vm_param.h> 80 #include <sys/lock.h> 81 #include <vm/vm_kern.h> 82 #include <vm/pmap.h> 83 #include <vm/vm_map.h> 84 #include <vm/vm_object.h> 85 #include <vm/vm_page.h> 86 #include <vm/vm_pageout.h> 87 #include <vm/vm_pager.h> 88 #include <vm/vm_extern.h> 89 #include <vm/swap_pager.h> 90 91 #include <machine/inttypes.h> 92 #include <machine/md_var.h> 93 #include <machine/specialreg.h> 94 95 #include <vm/vm_page2.h> 96 #include <sys/spinlock2.h> 97 98 /* 99 * SET - Minimum required set associative size, must be a power of 2. We 100 * want this to match or exceed the set-associativeness of the cpu. 101 * 102 * GRP - A larger set that allows bleed-over into the domains of other 103 * nearby cpus. Also must be a power of 2. Used by the page zeroing 104 * code to smooth things out a bit. 105 */ 106 #define PQ_SET_ASSOC 16 107 #define PQ_SET_ASSOC_MASK (PQ_SET_ASSOC - 1) 108 109 #define PQ_GRP_ASSOC (PQ_SET_ASSOC * 2) 110 #define PQ_GRP_ASSOC_MASK (PQ_GRP_ASSOC - 1) 111 112 static void vm_page_queue_init(void); 113 static void vm_page_free_wakeup(void); 114 static vm_page_t vm_page_select_cache(u_short pg_color); 115 static vm_page_t _vm_page_list_find2(int basequeue, int index); 116 static void _vm_page_deactivate_locked(vm_page_t m, int athead); 117 118 MALLOC_DEFINE(M_ACTIONHASH, "acthash", "vmpage action hash"); 119 120 /* 121 * Array of tailq lists 122 */ 123 __cachealign struct vpgqueues vm_page_queues[PQ_COUNT]; 124 125 LIST_HEAD(vm_page_action_list, vm_page_action); 126 127 /* 128 * Action hash for user umtx support. Contention is governed by both 129 * tsleep/wakeup handling (kern/kern_synch.c) and action_hash[] below. 130 * Because action_hash[] represents active table locks, a modest fixed 131 * value well in excess of MAXCPU works here. 132 * 133 * There is also scan overhead depending on the number of threads in 134 * umtx*() calls, so we also size the hash table based on maxproc. 135 */ 136 struct vm_page_action_hash { 137 struct vm_page_action_list list; 138 struct lock lk; 139 } __cachealign; 140 141 #define VMACTION_MINHSIZE 256 142 143 struct vm_page_action_hash *action_hash; 144 static int vmaction_hsize; 145 static int vmaction_hmask; 146 147 static volatile int vm_pages_waiting; 148 static struct alist vm_contig_alist; 149 static struct almeta vm_contig_ameta[ALIST_RECORDS_65536]; 150 static struct spinlock vm_contig_spin = SPINLOCK_INITIALIZER(&vm_contig_spin, "vm_contig_spin"); 151 152 static u_long vm_dma_reserved = 0; 153 TUNABLE_ULONG("vm.dma_reserved", &vm_dma_reserved); 154 SYSCTL_ULONG(_vm, OID_AUTO, dma_reserved, CTLFLAG_RD, &vm_dma_reserved, 0, 155 "Memory reserved for DMA"); 156 SYSCTL_UINT(_vm, OID_AUTO, dma_free_pages, CTLFLAG_RD, 157 &vm_contig_alist.bl_free, 0, "Memory reserved for DMA"); 158 159 static int vm_contig_verbose = 0; 160 TUNABLE_INT("vm.contig_verbose", &vm_contig_verbose); 161 162 RB_GENERATE2(vm_page_rb_tree, vm_page, rb_entry, rb_vm_page_compare, 163 vm_pindex_t, pindex); 164 165 static void 166 vm_page_queue_init(void) 167 { 168 int i; 169 170 for (i = 0; i < PQ_L2_SIZE; i++) 171 vm_page_queues[PQ_FREE+i].cnt_offset = 172 offsetof(struct vmstats, v_free_count); 173 for (i = 0; i < PQ_L2_SIZE; i++) 174 vm_page_queues[PQ_CACHE+i].cnt_offset = 175 offsetof(struct vmstats, v_cache_count); 176 for (i = 0; i < PQ_L2_SIZE; i++) 177 vm_page_queues[PQ_INACTIVE+i].cnt_offset = 178 offsetof(struct vmstats, v_inactive_count); 179 for (i = 0; i < PQ_L2_SIZE; i++) 180 vm_page_queues[PQ_ACTIVE+i].cnt_offset = 181 offsetof(struct vmstats, v_active_count); 182 for (i = 0; i < PQ_L2_SIZE; i++) 183 vm_page_queues[PQ_HOLD+i].cnt_offset = 184 offsetof(struct vmstats, v_active_count); 185 /* PQ_NONE has no queue */ 186 187 for (i = 0; i < PQ_COUNT; i++) { 188 TAILQ_INIT(&vm_page_queues[i].pl); 189 spin_init(&vm_page_queues[i].spin, "vm_page_queue_init"); 190 } 191 } 192 193 /* 194 * note: place in initialized data section? Is this necessary? 195 */ 196 long first_page = 0; 197 int vm_page_array_size = 0; 198 vm_page_t vm_page_array = NULL; 199 vm_paddr_t vm_low_phys_reserved; 200 201 /* 202 * (low level boot) 203 * 204 * Sets the page size, perhaps based upon the memory size. 205 * Must be called before any use of page-size dependent functions. 206 */ 207 void 208 vm_set_page_size(void) 209 { 210 if (vmstats.v_page_size == 0) 211 vmstats.v_page_size = PAGE_SIZE; 212 if (((vmstats.v_page_size - 1) & vmstats.v_page_size) != 0) 213 panic("vm_set_page_size: page size not a power of two"); 214 } 215 216 /* 217 * (low level boot) 218 * 219 * Add a new page to the freelist for use by the system. New pages 220 * are added to both the head and tail of the associated free page 221 * queue in a bottom-up fashion, so both zero'd and non-zero'd page 222 * requests pull 'recent' adds (higher physical addresses) first. 223 * 224 * Beware that the page zeroing daemon will also be running soon after 225 * boot, moving pages from the head to the tail of the PQ_FREE queues. 226 * 227 * Must be called in a critical section. 228 */ 229 static void 230 vm_add_new_page(vm_paddr_t pa) 231 { 232 struct vpgqueues *vpq; 233 vm_page_t m; 234 235 m = PHYS_TO_VM_PAGE(pa); 236 m->phys_addr = pa; 237 m->flags = 0; 238 m->pat_mode = PAT_WRITE_BACK; 239 m->pc = (pa >> PAGE_SHIFT); 240 241 /* 242 * Twist for cpu localization in addition to page coloring, so 243 * different cpus selecting by m->queue get different page colors. 244 */ 245 m->pc ^= ((pa >> PAGE_SHIFT) / PQ_L2_SIZE); 246 m->pc ^= ((pa >> PAGE_SHIFT) / (PQ_L2_SIZE * PQ_L2_SIZE)); 247 m->pc &= PQ_L2_MASK; 248 249 /* 250 * Reserve a certain number of contiguous low memory pages for 251 * contigmalloc() to use. 252 */ 253 if (pa < vm_low_phys_reserved) { 254 atomic_add_int(&vmstats.v_page_count, 1); 255 atomic_add_int(&vmstats.v_dma_pages, 1); 256 m->queue = PQ_NONE; 257 m->wire_count = 1; 258 atomic_add_int(&vmstats.v_wire_count, 1); 259 alist_free(&vm_contig_alist, pa >> PAGE_SHIFT, 1); 260 return; 261 } 262 263 /* 264 * General page 265 */ 266 m->queue = m->pc + PQ_FREE; 267 KKASSERT(m->dirty == 0); 268 269 atomic_add_int(&vmstats.v_page_count, 1); 270 atomic_add_int(&vmstats.v_free_count, 1); 271 vpq = &vm_page_queues[m->queue]; 272 TAILQ_INSERT_HEAD(&vpq->pl, m, pageq); 273 ++vpq->lcnt; 274 } 275 276 /* 277 * (low level boot) 278 * 279 * Initializes the resident memory module. 280 * 281 * Preallocates memory for critical VM structures and arrays prior to 282 * kernel_map becoming available. 283 * 284 * Memory is allocated from (virtual2_start, virtual2_end) if available, 285 * otherwise memory is allocated from (virtual_start, virtual_end). 286 * 287 * On x86-64 (virtual_start, virtual_end) is only 2GB and may not be 288 * large enough to hold vm_page_array & other structures for machines with 289 * large amounts of ram, so we want to use virtual2* when available. 290 */ 291 void 292 vm_page_startup(void) 293 { 294 vm_offset_t vaddr = virtual2_start ? virtual2_start : virtual_start; 295 vm_offset_t mapped; 296 vm_size_t npages; 297 vm_paddr_t page_range; 298 vm_paddr_t new_end; 299 int i; 300 vm_paddr_t pa; 301 vm_paddr_t last_pa; 302 vm_paddr_t end; 303 vm_paddr_t biggestone, biggestsize; 304 vm_paddr_t total; 305 vm_page_t m; 306 307 total = 0; 308 biggestsize = 0; 309 biggestone = 0; 310 vaddr = round_page(vaddr); 311 312 /* 313 * Make sure ranges are page-aligned. 314 */ 315 for (i = 0; phys_avail[i].phys_end; ++i) { 316 phys_avail[i].phys_beg = round_page64(phys_avail[i].phys_beg); 317 phys_avail[i].phys_end = trunc_page64(phys_avail[i].phys_end); 318 if (phys_avail[i].phys_end < phys_avail[i].phys_beg) 319 phys_avail[i].phys_end = phys_avail[i].phys_beg; 320 } 321 322 /* 323 * Locate largest block 324 */ 325 for (i = 0; phys_avail[i].phys_end; ++i) { 326 vm_paddr_t size = phys_avail[i].phys_end - 327 phys_avail[i].phys_beg; 328 329 if (size > biggestsize) { 330 biggestone = i; 331 biggestsize = size; 332 } 333 total += size; 334 } 335 --i; /* adjust to last entry for use down below */ 336 337 end = phys_avail[biggestone].phys_end; 338 end = trunc_page(end); 339 340 /* 341 * Initialize the queue headers for the free queue, the active queue 342 * and the inactive queue. 343 */ 344 vm_page_queue_init(); 345 346 #if !defined(_KERNEL_VIRTUAL) 347 /* 348 * VKERNELs don't support minidumps and as such don't need 349 * vm_page_dump 350 * 351 * Allocate a bitmap to indicate that a random physical page 352 * needs to be included in a minidump. 353 * 354 * The amd64 port needs this to indicate which direct map pages 355 * need to be dumped, via calls to dump_add_page()/dump_drop_page(). 356 * 357 * However, i386 still needs this workspace internally within the 358 * minidump code. In theory, they are not needed on i386, but are 359 * included should the sf_buf code decide to use them. 360 */ 361 page_range = phys_avail[i].phys_end / PAGE_SIZE; 362 vm_page_dump_size = round_page(roundup2(page_range, NBBY) / NBBY); 363 end -= vm_page_dump_size; 364 vm_page_dump = (void *)pmap_map(&vaddr, end, end + vm_page_dump_size, 365 VM_PROT_READ | VM_PROT_WRITE); 366 bzero((void *)vm_page_dump, vm_page_dump_size); 367 #endif 368 /* 369 * Compute the number of pages of memory that will be available for 370 * use (taking into account the overhead of a page structure per 371 * page). 372 */ 373 first_page = phys_avail[0].phys_beg / PAGE_SIZE; 374 page_range = phys_avail[i].phys_end / PAGE_SIZE - first_page; 375 npages = (total - (page_range * sizeof(struct vm_page))) / PAGE_SIZE; 376 377 #ifndef _KERNEL_VIRTUAL 378 /* 379 * (only applies to real kernels) 380 * 381 * Reserve a large amount of low memory for potential 32-bit DMA 382 * space allocations. Once device initialization is complete we 383 * release most of it, but keep (vm_dma_reserved) memory reserved 384 * for later use. Typically for X / graphics. Through trial and 385 * error we find that GPUs usually requires ~60-100MB or so. 386 * 387 * By default, 128M is left in reserve on machines with 2G+ of ram. 388 */ 389 vm_low_phys_reserved = (vm_paddr_t)65536 << PAGE_SHIFT; 390 if (vm_low_phys_reserved > total / 4) 391 vm_low_phys_reserved = total / 4; 392 if (vm_dma_reserved == 0) { 393 vm_dma_reserved = 128 * 1024 * 1024; /* 128MB */ 394 if (vm_dma_reserved > total / 16) 395 vm_dma_reserved = total / 16; 396 } 397 #endif 398 alist_init(&vm_contig_alist, 65536, vm_contig_ameta, 399 ALIST_RECORDS_65536); 400 401 /* 402 * Initialize the mem entry structures now, and put them in the free 403 * queue. 404 */ 405 new_end = trunc_page(end - page_range * sizeof(struct vm_page)); 406 mapped = pmap_map(&vaddr, new_end, end, VM_PROT_READ | VM_PROT_WRITE); 407 vm_page_array = (vm_page_t)mapped; 408 409 #if defined(__x86_64__) && !defined(_KERNEL_VIRTUAL) 410 /* 411 * since pmap_map on amd64 returns stuff out of a direct-map region, 412 * we have to manually add these pages to the minidump tracking so 413 * that they can be dumped, including the vm_page_array. 414 */ 415 for (pa = new_end; 416 pa < phys_avail[biggestone].phys_end; 417 pa += PAGE_SIZE) { 418 dump_add_page(pa); 419 } 420 #endif 421 422 /* 423 * Clear all of the page structures, run basic initialization so 424 * PHYS_TO_VM_PAGE() operates properly even on pages not in the 425 * map. 426 */ 427 bzero((caddr_t) vm_page_array, page_range * sizeof(struct vm_page)); 428 vm_page_array_size = page_range; 429 430 m = &vm_page_array[0]; 431 pa = ptoa(first_page); 432 for (i = 0; i < page_range; ++i) { 433 spin_init(&m->spin, "vm_page"); 434 m->phys_addr = pa; 435 pa += PAGE_SIZE; 436 ++m; 437 } 438 439 /* 440 * Construct the free queue(s) in ascending order (by physical 441 * address) so that the first 16MB of physical memory is allocated 442 * last rather than first. On large-memory machines, this avoids 443 * the exhaustion of low physical memory before isa_dmainit has run. 444 */ 445 vmstats.v_page_count = 0; 446 vmstats.v_free_count = 0; 447 for (i = 0; phys_avail[i].phys_end && npages > 0; ++i) { 448 pa = phys_avail[i].phys_beg; 449 if (i == biggestone) 450 last_pa = new_end; 451 else 452 last_pa = phys_avail[i].phys_end; 453 while (pa < last_pa && npages-- > 0) { 454 vm_add_new_page(pa); 455 pa += PAGE_SIZE; 456 } 457 } 458 if (virtual2_start) 459 virtual2_start = vaddr; 460 else 461 virtual_start = vaddr; 462 mycpu->gd_vmstats = vmstats; 463 } 464 465 /* 466 * Reorganize VM pages based on numa data. May be called as many times as 467 * necessary. Will reorganize the vm_page_t page color and related queue(s) 468 * to allow vm_page_alloc() to choose pages based on socket affinity. 469 * 470 * NOTE: This function is only called while we are still in UP mode, so 471 * we only need a critical section to protect the queues (which 472 * saves a lot of time, there are likely a ton of pages). 473 */ 474 void 475 vm_numa_organize(vm_paddr_t ran_beg, vm_paddr_t bytes, int physid) 476 { 477 vm_paddr_t scan_beg; 478 vm_paddr_t scan_end; 479 vm_paddr_t ran_end; 480 struct vpgqueues *vpq; 481 vm_page_t m; 482 vm_page_t mend; 483 int i; 484 int socket_mod; 485 int socket_value; 486 487 /* 488 * Check if no physical information, or there was only one socket 489 * (so don't waste time doing nothing!). 490 */ 491 if (cpu_topology_phys_ids <= 1 || 492 cpu_topology_core_ids == 0) { 493 return; 494 } 495 496 /* 497 * Setup for our iteration. Note that ACPI may iterate CPU 498 * sockets starting at 0 or 1 or some other number. The 499 * cpu_topology code mod's it against the socket count. 500 */ 501 ran_end = ran_beg + bytes; 502 physid %= cpu_topology_phys_ids; 503 504 socket_mod = PQ_L2_SIZE / cpu_topology_phys_ids; 505 socket_value = physid * socket_mod; 506 mend = &vm_page_array[vm_page_array_size]; 507 508 crit_enter(); 509 510 /* 511 * Adjust vm_page->pc and requeue all affected pages. The 512 * allocator will then be able to localize memory allocations 513 * to some degree. 514 */ 515 for (i = 0; phys_avail[i].phys_end; ++i) { 516 scan_beg = phys_avail[i].phys_beg; 517 scan_end = phys_avail[i].phys_end; 518 if (scan_end <= ran_beg) 519 continue; 520 if (scan_beg >= ran_end) 521 continue; 522 if (scan_beg < ran_beg) 523 scan_beg = ran_beg; 524 if (scan_end > ran_end) 525 scan_end = ran_end; 526 if (atop(scan_end) > first_page + vm_page_array_size) 527 scan_end = ptoa(first_page + vm_page_array_size); 528 529 m = PHYS_TO_VM_PAGE(scan_beg); 530 while (scan_beg < scan_end) { 531 KKASSERT(m < mend); 532 if (m->queue != PQ_NONE) { 533 vpq = &vm_page_queues[m->queue]; 534 TAILQ_REMOVE(&vpq->pl, m, pageq); 535 --vpq->lcnt; 536 /* queue doesn't change, no need to adj cnt */ 537 m->queue -= m->pc; 538 m->pc %= socket_mod; 539 m->pc += socket_value; 540 m->pc &= PQ_L2_MASK; 541 m->queue += m->pc; 542 vpq = &vm_page_queues[m->queue]; 543 TAILQ_INSERT_HEAD(&vpq->pl, m, pageq); 544 ++vpq->lcnt; 545 /* queue doesn't change, no need to adj cnt */ 546 } else { 547 m->pc %= socket_mod; 548 m->pc += socket_value; 549 m->pc &= PQ_L2_MASK; 550 } 551 scan_beg += PAGE_SIZE; 552 ++m; 553 } 554 } 555 crit_exit(); 556 } 557 558 /* 559 * We tended to reserve a ton of memory for contigmalloc(). Now that most 560 * drivers have initialized we want to return most the remaining free 561 * reserve back to the VM page queues so they can be used for normal 562 * allocations. 563 * 564 * We leave vm_dma_reserved bytes worth of free pages in the reserve pool. 565 * 566 * Also setup the action_hash[] table here (which is only used by userland) 567 */ 568 static void 569 vm_page_startup_finish(void *dummy __unused) 570 { 571 alist_blk_t blk; 572 alist_blk_t rblk; 573 alist_blk_t count; 574 alist_blk_t xcount; 575 alist_blk_t bfree; 576 vm_page_t m; 577 int i; 578 579 spin_lock(&vm_contig_spin); 580 for (;;) { 581 bfree = alist_free_info(&vm_contig_alist, &blk, &count); 582 if (bfree <= vm_dma_reserved / PAGE_SIZE) 583 break; 584 if (count == 0) 585 break; 586 587 /* 588 * Figure out how much of the initial reserve we have to 589 * free in order to reach our target. 590 */ 591 bfree -= vm_dma_reserved / PAGE_SIZE; 592 if (count > bfree) { 593 blk += count - bfree; 594 count = bfree; 595 } 596 597 /* 598 * Calculate the nearest power of 2 <= count. 599 */ 600 for (xcount = 1; xcount <= count; xcount <<= 1) 601 ; 602 xcount >>= 1; 603 blk += count - xcount; 604 count = xcount; 605 606 /* 607 * Allocate the pages from the alist, then free them to 608 * the normal VM page queues. 609 * 610 * Pages allocated from the alist are wired. We have to 611 * busy, unwire, and free them. We must also adjust 612 * vm_low_phys_reserved before freeing any pages to prevent 613 * confusion. 614 */ 615 rblk = alist_alloc(&vm_contig_alist, blk, count); 616 if (rblk != blk) { 617 kprintf("vm_page_startup_finish: Unable to return " 618 "dma space @0x%08x/%d -> 0x%08x\n", 619 blk, count, rblk); 620 break; 621 } 622 atomic_add_int(&vmstats.v_dma_pages, -count); 623 spin_unlock(&vm_contig_spin); 624 625 m = PHYS_TO_VM_PAGE((vm_paddr_t)blk << PAGE_SHIFT); 626 vm_low_phys_reserved = VM_PAGE_TO_PHYS(m); 627 while (count) { 628 vm_page_busy_wait(m, FALSE, "cpgfr"); 629 vm_page_unwire(m, 0); 630 vm_page_free(m); 631 --count; 632 ++m; 633 } 634 spin_lock(&vm_contig_spin); 635 } 636 spin_unlock(&vm_contig_spin); 637 638 /* 639 * Print out how much DMA space drivers have already allocated and 640 * how much is left over. 641 */ 642 kprintf("DMA space used: %jdk, remaining available: %jdk\n", 643 (intmax_t)(vmstats.v_dma_pages - vm_contig_alist.bl_free) * 644 (PAGE_SIZE / 1024), 645 (intmax_t)vm_contig_alist.bl_free * (PAGE_SIZE / 1024)); 646 647 /* 648 * Scale the action_hash[] array. Primary contention occurs due 649 * to cpu locks, scaled to ncpus, and scan overhead may be incurred 650 * depending on the number of threads, which we scale to maxproc. 651 * 652 * NOTE: Action lock might recurse due to callback, so allow 653 * recursion. 654 */ 655 vmaction_hsize = VMACTION_MINHSIZE; 656 if (vmaction_hsize < ncpus * 2) 657 vmaction_hsize = ncpus * 2; 658 if (vmaction_hsize < maxproc / 16) 659 vmaction_hsize = maxproc / 16; 660 vmaction_hmask = 1; 661 while (vmaction_hmask < vmaction_hsize) 662 vmaction_hmask = (vmaction_hmask << 1) | 1; 663 vmaction_hsize = vmaction_hmask + 1; 664 665 action_hash = kmalloc(sizeof(action_hash[0]) * vmaction_hsize, 666 M_ACTIONHASH, 667 M_WAITOK | M_ZERO); 668 669 for (i = 0; i < vmaction_hsize; i++) { 670 LIST_INIT(&action_hash[i].list); 671 lockinit(&action_hash[i].lk, "actlk", 0, LK_CANRECURSE); 672 } 673 } 674 SYSINIT(vm_pgend, SI_SUB_PROC0_POST, SI_ORDER_ANY, 675 vm_page_startup_finish, NULL); 676 677 678 /* 679 * Scan comparison function for Red-Black tree scans. An inclusive 680 * (start,end) is expected. Other fields are not used. 681 */ 682 int 683 rb_vm_page_scancmp(struct vm_page *p, void *data) 684 { 685 struct rb_vm_page_scan_info *info = data; 686 687 if (p->pindex < info->start_pindex) 688 return(-1); 689 if (p->pindex > info->end_pindex) 690 return(1); 691 return(0); 692 } 693 694 int 695 rb_vm_page_compare(struct vm_page *p1, struct vm_page *p2) 696 { 697 if (p1->pindex < p2->pindex) 698 return(-1); 699 if (p1->pindex > p2->pindex) 700 return(1); 701 return(0); 702 } 703 704 void 705 vm_page_init(vm_page_t m) 706 { 707 /* do nothing for now. Called from pmap_page_init() */ 708 } 709 710 /* 711 * Each page queue has its own spin lock, which is fairly optimal for 712 * allocating and freeing pages at least. 713 * 714 * The caller must hold the vm_page_spin_lock() before locking a vm_page's 715 * queue spinlock via this function. Also note that m->queue cannot change 716 * unless both the page and queue are locked. 717 */ 718 static __inline 719 void 720 _vm_page_queue_spin_lock(vm_page_t m) 721 { 722 u_short queue; 723 724 queue = m->queue; 725 if (queue != PQ_NONE) { 726 spin_lock(&vm_page_queues[queue].spin); 727 KKASSERT(queue == m->queue); 728 } 729 } 730 731 static __inline 732 void 733 _vm_page_queue_spin_unlock(vm_page_t m) 734 { 735 u_short queue; 736 737 queue = m->queue; 738 cpu_ccfence(); 739 if (queue != PQ_NONE) 740 spin_unlock(&vm_page_queues[queue].spin); 741 } 742 743 static __inline 744 void 745 _vm_page_queues_spin_lock(u_short queue) 746 { 747 cpu_ccfence(); 748 if (queue != PQ_NONE) 749 spin_lock(&vm_page_queues[queue].spin); 750 } 751 752 753 static __inline 754 void 755 _vm_page_queues_spin_unlock(u_short queue) 756 { 757 cpu_ccfence(); 758 if (queue != PQ_NONE) 759 spin_unlock(&vm_page_queues[queue].spin); 760 } 761 762 void 763 vm_page_queue_spin_lock(vm_page_t m) 764 { 765 _vm_page_queue_spin_lock(m); 766 } 767 768 void 769 vm_page_queues_spin_lock(u_short queue) 770 { 771 _vm_page_queues_spin_lock(queue); 772 } 773 774 void 775 vm_page_queue_spin_unlock(vm_page_t m) 776 { 777 _vm_page_queue_spin_unlock(m); 778 } 779 780 void 781 vm_page_queues_spin_unlock(u_short queue) 782 { 783 _vm_page_queues_spin_unlock(queue); 784 } 785 786 /* 787 * This locks the specified vm_page and its queue in the proper order 788 * (page first, then queue). The queue may change so the caller must 789 * recheck on return. 790 */ 791 static __inline 792 void 793 _vm_page_and_queue_spin_lock(vm_page_t m) 794 { 795 vm_page_spin_lock(m); 796 _vm_page_queue_spin_lock(m); 797 } 798 799 static __inline 800 void 801 _vm_page_and_queue_spin_unlock(vm_page_t m) 802 { 803 _vm_page_queues_spin_unlock(m->queue); 804 vm_page_spin_unlock(m); 805 } 806 807 void 808 vm_page_and_queue_spin_unlock(vm_page_t m) 809 { 810 _vm_page_and_queue_spin_unlock(m); 811 } 812 813 void 814 vm_page_and_queue_spin_lock(vm_page_t m) 815 { 816 _vm_page_and_queue_spin_lock(m); 817 } 818 819 /* 820 * Helper function removes vm_page from its current queue. 821 * Returns the base queue the page used to be on. 822 * 823 * The vm_page and the queue must be spinlocked. 824 * This function will unlock the queue but leave the page spinlocked. 825 */ 826 static __inline u_short 827 _vm_page_rem_queue_spinlocked(vm_page_t m) 828 { 829 struct vpgqueues *pq; 830 u_short queue; 831 u_short oqueue; 832 int *cnt; 833 834 queue = m->queue; 835 if (queue != PQ_NONE) { 836 pq = &vm_page_queues[queue]; 837 TAILQ_REMOVE(&pq->pl, m, pageq); 838 839 /* 840 * Adjust our pcpu stats. In order for the nominal low-memory 841 * algorithms to work properly we don't let any pcpu stat get 842 * too negative before we force it to be rolled-up into the 843 * global stats. Otherwise our pageout and vm_wait tests 844 * will fail badly. 845 * 846 * The idea here is to reduce unnecessary SMP cache 847 * mastership changes in the global vmstats, which can be 848 * particularly bad in multi-socket systems. 849 */ 850 cnt = (int *)((char *)&mycpu->gd_vmstats_adj + pq->cnt_offset); 851 atomic_add_int(cnt, -1); 852 if (*cnt < -VMMETER_SLOP_COUNT) { 853 u_int copy = atomic_swap_int(cnt, 0); 854 cnt = (int *)((char *)&vmstats + pq->cnt_offset); 855 atomic_add_int(cnt, copy); 856 cnt = (int *)((char *)&mycpu->gd_vmstats + 857 pq->cnt_offset); 858 atomic_add_int(cnt, copy); 859 } 860 pq->lcnt--; 861 m->queue = PQ_NONE; 862 oqueue = queue; 863 queue -= m->pc; 864 vm_page_queues_spin_unlock(oqueue); /* intended */ 865 } 866 return queue; 867 } 868 869 /* 870 * Helper function places the vm_page on the specified queue. Generally 871 * speaking only PQ_FREE pages are placed at the head, to allow them to 872 * be allocated sooner rather than later on the assumption that they 873 * are cache-hot. 874 * 875 * The vm_page must be spinlocked. 876 * This function will return with both the page and the queue locked. 877 */ 878 static __inline void 879 _vm_page_add_queue_spinlocked(vm_page_t m, u_short queue, int athead) 880 { 881 struct vpgqueues *pq; 882 u_int *cnt; 883 884 KKASSERT(m->queue == PQ_NONE); 885 886 if (queue != PQ_NONE) { 887 vm_page_queues_spin_lock(queue); 888 pq = &vm_page_queues[queue]; 889 ++pq->lcnt; 890 891 /* 892 * Adjust our pcpu stats. If a system entity really needs 893 * to incorporate the count it will call vmstats_rollup() 894 * to roll it all up into the global vmstats strufture. 895 */ 896 cnt = (int *)((char *)&mycpu->gd_vmstats_adj + pq->cnt_offset); 897 atomic_add_int(cnt, 1); 898 899 /* 900 * PQ_FREE is always handled LIFO style to try to provide 901 * cache-hot pages to programs. 902 */ 903 m->queue = queue; 904 if (queue - m->pc == PQ_FREE) { 905 TAILQ_INSERT_HEAD(&pq->pl, m, pageq); 906 } else if (athead) { 907 TAILQ_INSERT_HEAD(&pq->pl, m, pageq); 908 } else { 909 TAILQ_INSERT_TAIL(&pq->pl, m, pageq); 910 } 911 /* leave the queue spinlocked */ 912 } 913 } 914 915 /* 916 * Wait until page is no longer PG_BUSY or (if also_m_busy is TRUE) 917 * m->busy is zero. Returns TRUE if it had to sleep, FALSE if we 918 * did not. Only one sleep call will be made before returning. 919 * 920 * This function does NOT busy the page and on return the page is not 921 * guaranteed to be available. 922 */ 923 void 924 vm_page_sleep_busy(vm_page_t m, int also_m_busy, const char *msg) 925 { 926 u_int32_t flags; 927 928 for (;;) { 929 flags = m->flags; 930 cpu_ccfence(); 931 932 if ((flags & PG_BUSY) == 0 && 933 (also_m_busy == 0 || (flags & PG_SBUSY) == 0)) { 934 break; 935 } 936 tsleep_interlock(m, 0); 937 if (atomic_cmpset_int(&m->flags, flags, 938 flags | PG_WANTED | PG_REFERENCED)) { 939 tsleep(m, PINTERLOCKED, msg, 0); 940 break; 941 } 942 } 943 } 944 945 /* 946 * This calculates and returns a page color given an optional VM object and 947 * either a pindex or an iterator. We attempt to return a cpu-localized 948 * pg_color that is still roughly 16-way set-associative. The CPU topology 949 * is used if it was probed. 950 * 951 * The caller may use the returned value to index into e.g. PQ_FREE when 952 * allocating a page in order to nominally obtain pages that are hopefully 953 * already localized to the requesting cpu. This function is not able to 954 * provide any sort of guarantee of this, but does its best to improve 955 * hardware cache management performance. 956 * 957 * WARNING! The caller must mask the returned value with PQ_L2_MASK. 958 */ 959 u_short 960 vm_get_pg_color(int cpuid, vm_object_t object, vm_pindex_t pindex) 961 { 962 u_short pg_color; 963 int phys_id; 964 int core_id; 965 int object_pg_color; 966 967 phys_id = get_cpu_phys_id(cpuid); 968 core_id = get_cpu_core_id(cpuid); 969 object_pg_color = object ? object->pg_color : 0; 970 971 if (cpu_topology_phys_ids && cpu_topology_core_ids) { 972 int grpsize; 973 974 /* 975 * Break us down by socket and cpu 976 */ 977 pg_color = phys_id * PQ_L2_SIZE / cpu_topology_phys_ids; 978 pg_color += core_id * PQ_L2_SIZE / 979 (cpu_topology_core_ids * cpu_topology_phys_ids); 980 981 /* 982 * Calculate remaining component for object/queue color 983 */ 984 grpsize = PQ_L2_SIZE / (cpu_topology_core_ids * 985 cpu_topology_phys_ids); 986 if (grpsize >= 8) { 987 pg_color += (pindex + object_pg_color) % grpsize; 988 } else { 989 if (grpsize <= 2) { 990 grpsize = 8; 991 } else { 992 /* 3->9, 4->8, 5->10, 6->12, 7->14 */ 993 grpsize += grpsize; 994 if (grpsize < 8) 995 grpsize += grpsize; 996 } 997 pg_color += (pindex + object_pg_color) % grpsize; 998 } 999 } else { 1000 /* 1001 * Unknown topology, distribute things evenly. 1002 */ 1003 pg_color = cpuid * PQ_L2_SIZE / ncpus; 1004 pg_color += pindex + object_pg_color; 1005 } 1006 return (pg_color & PQ_L2_MASK); 1007 } 1008 1009 /* 1010 * Wait until PG_BUSY can be set, then set it. If also_m_busy is TRUE we 1011 * also wait for m->busy to become 0 before setting PG_BUSY. 1012 */ 1013 void 1014 VM_PAGE_DEBUG_EXT(vm_page_busy_wait)(vm_page_t m, 1015 int also_m_busy, const char *msg 1016 VM_PAGE_DEBUG_ARGS) 1017 { 1018 u_int32_t flags; 1019 1020 for (;;) { 1021 flags = m->flags; 1022 cpu_ccfence(); 1023 if (flags & PG_BUSY) { 1024 tsleep_interlock(m, 0); 1025 if (atomic_cmpset_int(&m->flags, flags, 1026 flags | PG_WANTED | PG_REFERENCED)) { 1027 tsleep(m, PINTERLOCKED, msg, 0); 1028 } 1029 } else if (also_m_busy && (flags & PG_SBUSY)) { 1030 tsleep_interlock(m, 0); 1031 if (atomic_cmpset_int(&m->flags, flags, 1032 flags | PG_WANTED | PG_REFERENCED)) { 1033 tsleep(m, PINTERLOCKED, msg, 0); 1034 } 1035 } else { 1036 if (atomic_cmpset_int(&m->flags, flags, 1037 flags | PG_BUSY)) { 1038 #ifdef VM_PAGE_DEBUG 1039 m->busy_func = func; 1040 m->busy_line = lineno; 1041 #endif 1042 break; 1043 } 1044 } 1045 } 1046 } 1047 1048 /* 1049 * Attempt to set PG_BUSY. If also_m_busy is TRUE we only succeed if m->busy 1050 * is also 0. 1051 * 1052 * Returns non-zero on failure. 1053 */ 1054 int 1055 VM_PAGE_DEBUG_EXT(vm_page_busy_try)(vm_page_t m, int also_m_busy 1056 VM_PAGE_DEBUG_ARGS) 1057 { 1058 u_int32_t flags; 1059 1060 for (;;) { 1061 flags = m->flags; 1062 cpu_ccfence(); 1063 if (flags & PG_BUSY) 1064 return TRUE; 1065 if (also_m_busy && (flags & PG_SBUSY)) 1066 return TRUE; 1067 if (atomic_cmpset_int(&m->flags, flags, flags | PG_BUSY)) { 1068 #ifdef VM_PAGE_DEBUG 1069 m->busy_func = func; 1070 m->busy_line = lineno; 1071 #endif 1072 return FALSE; 1073 } 1074 } 1075 } 1076 1077 /* 1078 * Clear the PG_BUSY flag and return non-zero to indicate to the caller 1079 * that a wakeup() should be performed. 1080 * 1081 * The vm_page must be spinlocked and will remain spinlocked on return. 1082 * The related queue must NOT be spinlocked (which could deadlock us). 1083 * 1084 * (inline version) 1085 */ 1086 static __inline 1087 int 1088 _vm_page_wakeup(vm_page_t m) 1089 { 1090 u_int32_t flags; 1091 1092 for (;;) { 1093 flags = m->flags; 1094 cpu_ccfence(); 1095 if (atomic_cmpset_int(&m->flags, flags, 1096 flags & ~(PG_BUSY | PG_WANTED))) { 1097 break; 1098 } 1099 } 1100 return(flags & PG_WANTED); 1101 } 1102 1103 /* 1104 * Clear the PG_BUSY flag and wakeup anyone waiting for the page. This 1105 * is typically the last call you make on a page before moving onto 1106 * other things. 1107 */ 1108 void 1109 vm_page_wakeup(vm_page_t m) 1110 { 1111 KASSERT(m->flags & PG_BUSY, ("vm_page_wakeup: page not busy!!!")); 1112 vm_page_spin_lock(m); 1113 if (_vm_page_wakeup(m)) { 1114 vm_page_spin_unlock(m); 1115 wakeup(m); 1116 } else { 1117 vm_page_spin_unlock(m); 1118 } 1119 } 1120 1121 /* 1122 * Holding a page keeps it from being reused. Other parts of the system 1123 * can still disassociate the page from its current object and free it, or 1124 * perform read or write I/O on it and/or otherwise manipulate the page, 1125 * but if the page is held the VM system will leave the page and its data 1126 * intact and not reuse the page for other purposes until the last hold 1127 * reference is released. (see vm_page_wire() if you want to prevent the 1128 * page from being disassociated from its object too). 1129 * 1130 * The caller must still validate the contents of the page and, if necessary, 1131 * wait for any pending I/O (e.g. vm_page_sleep_busy() loop) to complete 1132 * before manipulating the page. 1133 * 1134 * XXX get vm_page_spin_lock() here and move FREE->HOLD if necessary 1135 */ 1136 void 1137 vm_page_hold(vm_page_t m) 1138 { 1139 vm_page_spin_lock(m); 1140 atomic_add_int(&m->hold_count, 1); 1141 if (m->queue - m->pc == PQ_FREE) { 1142 _vm_page_queue_spin_lock(m); 1143 _vm_page_rem_queue_spinlocked(m); 1144 _vm_page_add_queue_spinlocked(m, PQ_HOLD + m->pc, 0); 1145 _vm_page_queue_spin_unlock(m); 1146 } 1147 vm_page_spin_unlock(m); 1148 } 1149 1150 /* 1151 * The opposite of vm_page_hold(). If the page is on the HOLD queue 1152 * it was freed while held and must be moved back to the FREE queue. 1153 */ 1154 void 1155 vm_page_unhold(vm_page_t m) 1156 { 1157 KASSERT(m->hold_count > 0 && m->queue - m->pc != PQ_FREE, 1158 ("vm_page_unhold: pg %p illegal hold_count (%d) or on FREE queue (%d)", 1159 m, m->hold_count, m->queue - m->pc)); 1160 vm_page_spin_lock(m); 1161 atomic_add_int(&m->hold_count, -1); 1162 if (m->hold_count == 0 && m->queue - m->pc == PQ_HOLD) { 1163 _vm_page_queue_spin_lock(m); 1164 _vm_page_rem_queue_spinlocked(m); 1165 _vm_page_add_queue_spinlocked(m, PQ_FREE + m->pc, 1); 1166 _vm_page_queue_spin_unlock(m); 1167 } 1168 vm_page_spin_unlock(m); 1169 } 1170 1171 /* 1172 * vm_page_getfake: 1173 * 1174 * Create a fictitious page with the specified physical address and 1175 * memory attribute. The memory attribute is the only the machine- 1176 * dependent aspect of a fictitious page that must be initialized. 1177 */ 1178 1179 void 1180 vm_page_initfake(vm_page_t m, vm_paddr_t paddr, vm_memattr_t memattr) 1181 { 1182 1183 if ((m->flags & PG_FICTITIOUS) != 0) { 1184 /* 1185 * The page's memattr might have changed since the 1186 * previous initialization. Update the pmap to the 1187 * new memattr. 1188 */ 1189 goto memattr; 1190 } 1191 m->phys_addr = paddr; 1192 m->queue = PQ_NONE; 1193 /* Fictitious pages don't use "segind". */ 1194 /* Fictitious pages don't use "order" or "pool". */ 1195 m->flags = PG_FICTITIOUS | PG_UNMANAGED | PG_BUSY; 1196 m->wire_count = 1; 1197 spin_init(&m->spin, "fake_page"); 1198 pmap_page_init(m); 1199 memattr: 1200 pmap_page_set_memattr(m, memattr); 1201 } 1202 1203 /* 1204 * Inserts the given vm_page into the object and object list. 1205 * 1206 * The pagetables are not updated but will presumably fault the page 1207 * in if necessary, or if a kernel page the caller will at some point 1208 * enter the page into the kernel's pmap. We are not allowed to block 1209 * here so we *can't* do this anyway. 1210 * 1211 * This routine may not block. 1212 * This routine must be called with the vm_object held. 1213 * This routine must be called with a critical section held. 1214 * 1215 * This routine returns TRUE if the page was inserted into the object 1216 * successfully, and FALSE if the page already exists in the object. 1217 */ 1218 int 1219 vm_page_insert(vm_page_t m, vm_object_t object, vm_pindex_t pindex) 1220 { 1221 ASSERT_LWKT_TOKEN_HELD_EXCL(vm_object_token(object)); 1222 if (m->object != NULL) 1223 panic("vm_page_insert: already inserted"); 1224 1225 atomic_add_int(&object->generation, 1); 1226 1227 /* 1228 * Record the object/offset pair in this page and add the 1229 * pv_list_count of the page to the object. 1230 * 1231 * The vm_page spin lock is required for interactions with the pmap. 1232 */ 1233 vm_page_spin_lock(m); 1234 m->object = object; 1235 m->pindex = pindex; 1236 if (vm_page_rb_tree_RB_INSERT(&object->rb_memq, m)) { 1237 m->object = NULL; 1238 m->pindex = 0; 1239 vm_page_spin_unlock(m); 1240 return FALSE; 1241 } 1242 ++object->resident_page_count; 1243 ++mycpu->gd_vmtotal.t_rm; 1244 vm_page_spin_unlock(m); 1245 1246 /* 1247 * Since we are inserting a new and possibly dirty page, 1248 * update the object's OBJ_WRITEABLE and OBJ_MIGHTBEDIRTY flags. 1249 */ 1250 if ((m->valid & m->dirty) || 1251 (m->flags & (PG_WRITEABLE | PG_NEED_COMMIT))) 1252 vm_object_set_writeable_dirty(object); 1253 1254 /* 1255 * Checks for a swap assignment and sets PG_SWAPPED if appropriate. 1256 */ 1257 swap_pager_page_inserted(m); 1258 return TRUE; 1259 } 1260 1261 /* 1262 * Removes the given vm_page_t from the (object,index) table 1263 * 1264 * The underlying pmap entry (if any) is NOT removed here. 1265 * This routine may not block. 1266 * 1267 * The page must be BUSY and will remain BUSY on return. 1268 * No other requirements. 1269 * 1270 * NOTE: FreeBSD side effect was to unbusy the page on return. We leave 1271 * it busy. 1272 */ 1273 void 1274 vm_page_remove(vm_page_t m) 1275 { 1276 vm_object_t object; 1277 1278 if (m->object == NULL) { 1279 return; 1280 } 1281 1282 if ((m->flags & PG_BUSY) == 0) 1283 panic("vm_page_remove: page not busy"); 1284 1285 object = m->object; 1286 1287 vm_object_hold(object); 1288 1289 /* 1290 * Remove the page from the object and update the object. 1291 * 1292 * The vm_page spin lock is required for interactions with the pmap. 1293 */ 1294 vm_page_spin_lock(m); 1295 vm_page_rb_tree_RB_REMOVE(&object->rb_memq, m); 1296 --object->resident_page_count; 1297 --mycpu->gd_vmtotal.t_rm; 1298 m->object = NULL; 1299 atomic_add_int(&object->generation, 1); 1300 vm_page_spin_unlock(m); 1301 1302 vm_object_drop(object); 1303 } 1304 1305 /* 1306 * Locate and return the page at (object, pindex), or NULL if the 1307 * page could not be found. 1308 * 1309 * The caller must hold the vm_object token. 1310 */ 1311 vm_page_t 1312 vm_page_lookup(vm_object_t object, vm_pindex_t pindex) 1313 { 1314 vm_page_t m; 1315 1316 /* 1317 * Search the hash table for this object/offset pair 1318 */ 1319 ASSERT_LWKT_TOKEN_HELD(vm_object_token(object)); 1320 m = vm_page_rb_tree_RB_LOOKUP(&object->rb_memq, pindex); 1321 KKASSERT(m == NULL || (m->object == object && m->pindex == pindex)); 1322 return(m); 1323 } 1324 1325 vm_page_t 1326 VM_PAGE_DEBUG_EXT(vm_page_lookup_busy_wait)(struct vm_object *object, 1327 vm_pindex_t pindex, 1328 int also_m_busy, const char *msg 1329 VM_PAGE_DEBUG_ARGS) 1330 { 1331 u_int32_t flags; 1332 vm_page_t m; 1333 1334 ASSERT_LWKT_TOKEN_HELD(vm_object_token(object)); 1335 m = vm_page_rb_tree_RB_LOOKUP(&object->rb_memq, pindex); 1336 while (m) { 1337 KKASSERT(m->object == object && m->pindex == pindex); 1338 flags = m->flags; 1339 cpu_ccfence(); 1340 if (flags & PG_BUSY) { 1341 tsleep_interlock(m, 0); 1342 if (atomic_cmpset_int(&m->flags, flags, 1343 flags | PG_WANTED | PG_REFERENCED)) { 1344 tsleep(m, PINTERLOCKED, msg, 0); 1345 m = vm_page_rb_tree_RB_LOOKUP(&object->rb_memq, 1346 pindex); 1347 } 1348 } else if (also_m_busy && (flags & PG_SBUSY)) { 1349 tsleep_interlock(m, 0); 1350 if (atomic_cmpset_int(&m->flags, flags, 1351 flags | PG_WANTED | PG_REFERENCED)) { 1352 tsleep(m, PINTERLOCKED, msg, 0); 1353 m = vm_page_rb_tree_RB_LOOKUP(&object->rb_memq, 1354 pindex); 1355 } 1356 } else if (atomic_cmpset_int(&m->flags, flags, 1357 flags | PG_BUSY)) { 1358 #ifdef VM_PAGE_DEBUG 1359 m->busy_func = func; 1360 m->busy_line = lineno; 1361 #endif 1362 break; 1363 } 1364 } 1365 return m; 1366 } 1367 1368 /* 1369 * Attempt to lookup and busy a page. 1370 * 1371 * Returns NULL if the page could not be found 1372 * 1373 * Returns a vm_page and error == TRUE if the page exists but could not 1374 * be busied. 1375 * 1376 * Returns a vm_page and error == FALSE on success. 1377 */ 1378 vm_page_t 1379 VM_PAGE_DEBUG_EXT(vm_page_lookup_busy_try)(struct vm_object *object, 1380 vm_pindex_t pindex, 1381 int also_m_busy, int *errorp 1382 VM_PAGE_DEBUG_ARGS) 1383 { 1384 u_int32_t flags; 1385 vm_page_t m; 1386 1387 ASSERT_LWKT_TOKEN_HELD(vm_object_token(object)); 1388 m = vm_page_rb_tree_RB_LOOKUP(&object->rb_memq, pindex); 1389 *errorp = FALSE; 1390 while (m) { 1391 KKASSERT(m->object == object && m->pindex == pindex); 1392 flags = m->flags; 1393 cpu_ccfence(); 1394 if (flags & PG_BUSY) { 1395 *errorp = TRUE; 1396 break; 1397 } 1398 if (also_m_busy && (flags & PG_SBUSY)) { 1399 *errorp = TRUE; 1400 break; 1401 } 1402 if (atomic_cmpset_int(&m->flags, flags, flags | PG_BUSY)) { 1403 #ifdef VM_PAGE_DEBUG 1404 m->busy_func = func; 1405 m->busy_line = lineno; 1406 #endif 1407 break; 1408 } 1409 } 1410 return m; 1411 } 1412 1413 /* 1414 * Attempt to repurpose the passed-in page. If the passed-in page cannot 1415 * be repurposed it will be released, *must_reenter will be set to 1, and 1416 * this function will fall-through to vm_page_lookup_busy_try(). 1417 * 1418 * The passed-in page must be wired and not busy. The returned page will 1419 * be busied and not wired. 1420 * 1421 * A different page may be returned. The returned page will be busied and 1422 * not wired. 1423 * 1424 * NULL can be returned. If so, the required page could not be busied. 1425 * The passed-in page will be unwired. 1426 */ 1427 vm_page_t 1428 vm_page_repurpose(struct vm_object *object, vm_pindex_t pindex, 1429 int also_m_busy, int *errorp, vm_page_t m, 1430 int *must_reenter, int *iswired) 1431 { 1432 if (m) { 1433 /* 1434 * Do not mess with pages in a complex state, such as pages 1435 * which are mapped, as repurposing such pages can be more 1436 * expensive than simply allocatin a new one. 1437 * 1438 * NOTE: Soft-busying can deadlock against putpages or I/O 1439 * so we only allow hard-busying here. 1440 */ 1441 KKASSERT(also_m_busy == FALSE); 1442 vm_page_busy_wait(m, also_m_busy, "biodep"); 1443 1444 if ((m->flags & (PG_UNMANAGED | PG_MAPPED | 1445 PG_FICTITIOUS | PG_SBUSY)) || 1446 m->busy || m->wire_count != 1 || m->hold_count) { 1447 vm_page_unwire(m, 0); 1448 vm_page_wakeup(m); 1449 /* fall through to normal lookup */ 1450 } else if (m->dirty || (m->flags & PG_NEED_COMMIT)) { 1451 vm_page_unwire(m, 0); 1452 vm_page_deactivate(m); 1453 vm_page_wakeup(m); 1454 /* fall through to normal lookup */ 1455 } else { 1456 /* 1457 * We can safely repurpose the page. It should 1458 * already be unqueued. 1459 */ 1460 KKASSERT(m->queue == PQ_NONE && m->dirty == 0); 1461 vm_page_remove(m); 1462 m->valid = 0; 1463 m->act_count = 0; 1464 if (vm_page_insert(m, object, pindex)) { 1465 *errorp = 0; 1466 *iswired = 1; 1467 1468 return m; 1469 } 1470 vm_page_unwire(m, 0); 1471 vm_page_free(m); 1472 /* fall through to normal lookup */ 1473 } 1474 } 1475 1476 /* 1477 * Cannot repurpose page, attempt to locate the desired page. May 1478 * return NULL. 1479 */ 1480 *must_reenter = 1; 1481 *iswired = 0; 1482 m = vm_page_lookup_busy_try(object, pindex, also_m_busy, errorp); 1483 1484 return m; 1485 } 1486 1487 /* 1488 * Caller must hold the related vm_object 1489 */ 1490 vm_page_t 1491 vm_page_next(vm_page_t m) 1492 { 1493 vm_page_t next; 1494 1495 next = vm_page_rb_tree_RB_NEXT(m); 1496 if (next && next->pindex != m->pindex + 1) 1497 next = NULL; 1498 return (next); 1499 } 1500 1501 /* 1502 * vm_page_rename() 1503 * 1504 * Move the given vm_page from its current object to the specified 1505 * target object/offset. The page must be busy and will remain so 1506 * on return. 1507 * 1508 * new_object must be held. 1509 * This routine might block. XXX ? 1510 * 1511 * NOTE: Swap associated with the page must be invalidated by the move. We 1512 * have to do this for several reasons: (1) we aren't freeing the 1513 * page, (2) we are dirtying the page, (3) the VM system is probably 1514 * moving the page from object A to B, and will then later move 1515 * the backing store from A to B and we can't have a conflict. 1516 * 1517 * NOTE: We *always* dirty the page. It is necessary both for the 1518 * fact that we moved it, and because we may be invalidating 1519 * swap. If the page is on the cache, we have to deactivate it 1520 * or vm_page_dirty() will panic. Dirty pages are not allowed 1521 * on the cache. 1522 */ 1523 void 1524 vm_page_rename(vm_page_t m, vm_object_t new_object, vm_pindex_t new_pindex) 1525 { 1526 KKASSERT(m->flags & PG_BUSY); 1527 ASSERT_LWKT_TOKEN_HELD_EXCL(vm_object_token(new_object)); 1528 if (m->object) { 1529 ASSERT_LWKT_TOKEN_HELD_EXCL(vm_object_token(m->object)); 1530 vm_page_remove(m); 1531 } 1532 if (vm_page_insert(m, new_object, new_pindex) == FALSE) { 1533 panic("vm_page_rename: target exists (%p,%"PRIu64")", 1534 new_object, new_pindex); 1535 } 1536 if (m->queue - m->pc == PQ_CACHE) 1537 vm_page_deactivate(m); 1538 vm_page_dirty(m); 1539 } 1540 1541 /* 1542 * vm_page_unqueue() without any wakeup. This routine is used when a page 1543 * is to remain BUSYied by the caller. 1544 * 1545 * This routine may not block. 1546 */ 1547 void 1548 vm_page_unqueue_nowakeup(vm_page_t m) 1549 { 1550 vm_page_and_queue_spin_lock(m); 1551 (void)_vm_page_rem_queue_spinlocked(m); 1552 vm_page_spin_unlock(m); 1553 } 1554 1555 /* 1556 * vm_page_unqueue() - Remove a page from its queue, wakeup the pagedemon 1557 * if necessary. 1558 * 1559 * This routine may not block. 1560 */ 1561 void 1562 vm_page_unqueue(vm_page_t m) 1563 { 1564 u_short queue; 1565 1566 vm_page_and_queue_spin_lock(m); 1567 queue = _vm_page_rem_queue_spinlocked(m); 1568 if (queue == PQ_FREE || queue == PQ_CACHE) { 1569 vm_page_spin_unlock(m); 1570 pagedaemon_wakeup(); 1571 } else { 1572 vm_page_spin_unlock(m); 1573 } 1574 } 1575 1576 /* 1577 * vm_page_list_find() 1578 * 1579 * Find a page on the specified queue with color optimization. 1580 * 1581 * The page coloring optimization attempts to locate a page that does 1582 * not overload other nearby pages in the object in the cpu's L1 or L2 1583 * caches. We need this optimization because cpu caches tend to be 1584 * physical caches, while object spaces tend to be virtual. 1585 * 1586 * The page coloring optimization also, very importantly, tries to localize 1587 * memory to cpus and physical sockets. 1588 * 1589 * On MP systems each PQ_FREE and PQ_CACHE color queue has its own spinlock 1590 * and the algorithm is adjusted to localize allocations on a per-core basis. 1591 * This is done by 'twisting' the colors. 1592 * 1593 * The page is returned spinlocked and removed from its queue (it will 1594 * be on PQ_NONE), or NULL. The page is not PG_BUSY'd. The caller 1595 * is responsible for dealing with the busy-page case (usually by 1596 * deactivating the page and looping). 1597 * 1598 * NOTE: This routine is carefully inlined. A non-inlined version 1599 * is available for outside callers but the only critical path is 1600 * from within this source file. 1601 * 1602 * NOTE: This routine assumes that the vm_pages found in PQ_CACHE and PQ_FREE 1603 * represent stable storage, allowing us to order our locks vm_page 1604 * first, then queue. 1605 */ 1606 static __inline 1607 vm_page_t 1608 _vm_page_list_find(int basequeue, int index) 1609 { 1610 vm_page_t m; 1611 1612 for (;;) { 1613 m = TAILQ_FIRST(&vm_page_queues[basequeue+index].pl); 1614 if (m == NULL) { 1615 m = _vm_page_list_find2(basequeue, index); 1616 return(m); 1617 } 1618 vm_page_and_queue_spin_lock(m); 1619 if (m->queue == basequeue + index) { 1620 _vm_page_rem_queue_spinlocked(m); 1621 /* vm_page_t spin held, no queue spin */ 1622 break; 1623 } 1624 vm_page_and_queue_spin_unlock(m); 1625 } 1626 return(m); 1627 } 1628 1629 /* 1630 * If we could not find the page in the desired queue try to find it in 1631 * a nearby queue. 1632 */ 1633 static vm_page_t 1634 _vm_page_list_find2(int basequeue, int index) 1635 { 1636 struct vpgqueues *pq; 1637 vm_page_t m = NULL; 1638 int pqmask = PQ_SET_ASSOC_MASK >> 1; 1639 int pqi; 1640 int i; 1641 1642 index &= PQ_L2_MASK; 1643 pq = &vm_page_queues[basequeue]; 1644 1645 /* 1646 * Run local sets of 16, 32, 64, 128, and the whole queue if all 1647 * else fails (PQ_L2_MASK which is 255). 1648 */ 1649 do { 1650 pqmask = (pqmask << 1) | 1; 1651 for (i = 0; i <= pqmask; ++i) { 1652 pqi = (index & ~pqmask) | ((index + i) & pqmask); 1653 m = TAILQ_FIRST(&pq[pqi].pl); 1654 if (m) { 1655 _vm_page_and_queue_spin_lock(m); 1656 if (m->queue == basequeue + pqi) { 1657 _vm_page_rem_queue_spinlocked(m); 1658 return(m); 1659 } 1660 _vm_page_and_queue_spin_unlock(m); 1661 --i; 1662 continue; 1663 } 1664 } 1665 } while (pqmask != PQ_L2_MASK); 1666 1667 return(m); 1668 } 1669 1670 /* 1671 * Returns a vm_page candidate for allocation. The page is not busied so 1672 * it can move around. The caller must busy the page (and typically 1673 * deactivate it if it cannot be busied!) 1674 * 1675 * Returns a spinlocked vm_page that has been removed from its queue. 1676 */ 1677 vm_page_t 1678 vm_page_list_find(int basequeue, int index) 1679 { 1680 return(_vm_page_list_find(basequeue, index)); 1681 } 1682 1683 /* 1684 * Find a page on the cache queue with color optimization, remove it 1685 * from the queue, and busy it. The returned page will not be spinlocked. 1686 * 1687 * A candidate failure will be deactivated. Candidates can fail due to 1688 * being busied by someone else, in which case they will be deactivated. 1689 * 1690 * This routine may not block. 1691 * 1692 */ 1693 static vm_page_t 1694 vm_page_select_cache(u_short pg_color) 1695 { 1696 vm_page_t m; 1697 1698 for (;;) { 1699 m = _vm_page_list_find(PQ_CACHE, pg_color & PQ_L2_MASK); 1700 if (m == NULL) 1701 break; 1702 /* 1703 * (m) has been removed from its queue and spinlocked 1704 */ 1705 if (vm_page_busy_try(m, TRUE)) { 1706 _vm_page_deactivate_locked(m, 0); 1707 vm_page_spin_unlock(m); 1708 } else { 1709 /* 1710 * We successfully busied the page 1711 */ 1712 if ((m->flags & (PG_UNMANAGED | PG_NEED_COMMIT)) == 0 && 1713 m->hold_count == 0 && 1714 m->wire_count == 0 && 1715 (m->dirty & m->valid) == 0) { 1716 vm_page_spin_unlock(m); 1717 pagedaemon_wakeup(); 1718 return(m); 1719 } 1720 1721 /* 1722 * The page cannot be recycled, deactivate it. 1723 */ 1724 _vm_page_deactivate_locked(m, 0); 1725 if (_vm_page_wakeup(m)) { 1726 vm_page_spin_unlock(m); 1727 wakeup(m); 1728 } else { 1729 vm_page_spin_unlock(m); 1730 } 1731 } 1732 } 1733 return (m); 1734 } 1735 1736 /* 1737 * Find a free page. We attempt to inline the nominal case and fall back 1738 * to _vm_page_select_free() otherwise. A busied page is removed from 1739 * the queue and returned. 1740 * 1741 * This routine may not block. 1742 */ 1743 static __inline vm_page_t 1744 vm_page_select_free(u_short pg_color) 1745 { 1746 vm_page_t m; 1747 1748 for (;;) { 1749 m = _vm_page_list_find(PQ_FREE, pg_color & PQ_L2_MASK); 1750 if (m == NULL) 1751 break; 1752 if (vm_page_busy_try(m, TRUE)) { 1753 /* 1754 * Various mechanisms such as a pmap_collect can 1755 * result in a busy page on the free queue. We 1756 * have to move the page out of the way so we can 1757 * retry the allocation. If the other thread is not 1758 * allocating the page then m->valid will remain 0 and 1759 * the pageout daemon will free the page later on. 1760 * 1761 * Since we could not busy the page, however, we 1762 * cannot make assumptions as to whether the page 1763 * will be allocated by the other thread or not, 1764 * so all we can do is deactivate it to move it out 1765 * of the way. In particular, if the other thread 1766 * wires the page it may wind up on the inactive 1767 * queue and the pageout daemon will have to deal 1768 * with that case too. 1769 */ 1770 _vm_page_deactivate_locked(m, 0); 1771 vm_page_spin_unlock(m); 1772 } else { 1773 /* 1774 * Theoretically if we are able to busy the page 1775 * atomic with the queue removal (using the vm_page 1776 * lock) nobody else should be able to mess with the 1777 * page before us. 1778 */ 1779 KKASSERT((m->flags & (PG_UNMANAGED | 1780 PG_NEED_COMMIT)) == 0); 1781 KASSERT(m->hold_count == 0, ("m->hold_count is not zero " 1782 "pg %p q=%d flags=%08x hold=%d wire=%d", 1783 m, m->queue, m->flags, m->hold_count, m->wire_count)); 1784 KKASSERT(m->wire_count == 0); 1785 vm_page_spin_unlock(m); 1786 pagedaemon_wakeup(); 1787 1788 /* return busied and removed page */ 1789 return(m); 1790 } 1791 } 1792 return(m); 1793 } 1794 1795 /* 1796 * vm_page_alloc() 1797 * 1798 * Allocate and return a memory cell associated with this VM object/offset 1799 * pair. If object is NULL an unassociated page will be allocated. 1800 * 1801 * The returned page will be busied and removed from its queues. This 1802 * routine can block and may return NULL if a race occurs and the page 1803 * is found to already exist at the specified (object, pindex). 1804 * 1805 * VM_ALLOC_NORMAL allow use of cache pages, nominal free drain 1806 * VM_ALLOC_QUICK like normal but cannot use cache 1807 * VM_ALLOC_SYSTEM greater free drain 1808 * VM_ALLOC_INTERRUPT allow free list to be completely drained 1809 * VM_ALLOC_ZERO advisory request for pre-zero'd page only 1810 * VM_ALLOC_FORCE_ZERO advisory request for pre-zero'd page only 1811 * VM_ALLOC_NULL_OK ok to return NULL on insertion collision 1812 * (see vm_page_grab()) 1813 * VM_ALLOC_USE_GD ok to use per-gd cache 1814 * 1815 * VM_ALLOC_CPU(n) allocate using specified cpu localization 1816 * 1817 * The object must be held if not NULL 1818 * This routine may not block 1819 * 1820 * Additional special handling is required when called from an interrupt 1821 * (VM_ALLOC_INTERRUPT). We are not allowed to mess with the page cache 1822 * in this case. 1823 */ 1824 vm_page_t 1825 vm_page_alloc(vm_object_t object, vm_pindex_t pindex, int page_req) 1826 { 1827 globaldata_t gd; 1828 vm_object_t obj; 1829 vm_page_t m; 1830 u_short pg_color; 1831 int cpuid_local; 1832 1833 #if 0 1834 /* 1835 * Special per-cpu free VM page cache. The pages are pre-busied 1836 * and pre-zerod for us. 1837 */ 1838 if (gd->gd_vmpg_count && (page_req & VM_ALLOC_USE_GD)) { 1839 crit_enter_gd(gd); 1840 if (gd->gd_vmpg_count) { 1841 m = gd->gd_vmpg_array[--gd->gd_vmpg_count]; 1842 crit_exit_gd(gd); 1843 goto done; 1844 } 1845 crit_exit_gd(gd); 1846 } 1847 #endif 1848 m = NULL; 1849 1850 /* 1851 * CPU LOCALIZATION 1852 * 1853 * CPU localization algorithm. Break the page queues up by physical 1854 * id and core id (note that two cpu threads will have the same core 1855 * id, and core_id != gd_cpuid). 1856 * 1857 * This is nowhere near perfect, for example the last pindex in a 1858 * subgroup will overflow into the next cpu or package. But this 1859 * should get us good page reuse locality in heavy mixed loads. 1860 * 1861 * (may be executed before the APs are started, so other GDs might 1862 * not exist!) 1863 */ 1864 if (page_req & VM_ALLOC_CPU_SPEC) 1865 cpuid_local = VM_ALLOC_GETCPU(page_req); 1866 else 1867 cpuid_local = mycpu->gd_cpuid; 1868 1869 pg_color = vm_get_pg_color(cpuid_local, object, pindex); 1870 1871 KKASSERT(page_req & 1872 (VM_ALLOC_NORMAL|VM_ALLOC_QUICK| 1873 VM_ALLOC_INTERRUPT|VM_ALLOC_SYSTEM)); 1874 1875 /* 1876 * Certain system threads (pageout daemon, buf_daemon's) are 1877 * allowed to eat deeper into the free page list. 1878 */ 1879 if (curthread->td_flags & TDF_SYSTHREAD) 1880 page_req |= VM_ALLOC_SYSTEM; 1881 1882 /* 1883 * Impose various limitations. Note that the v_free_reserved test 1884 * must match the opposite of vm_page_count_target() to avoid 1885 * livelocks, be careful. 1886 */ 1887 loop: 1888 gd = mycpu; 1889 if (gd->gd_vmstats.v_free_count >= gd->gd_vmstats.v_free_reserved || 1890 ((page_req & VM_ALLOC_INTERRUPT) && 1891 gd->gd_vmstats.v_free_count > 0) || 1892 ((page_req & VM_ALLOC_SYSTEM) && 1893 gd->gd_vmstats.v_cache_count == 0 && 1894 gd->gd_vmstats.v_free_count > 1895 gd->gd_vmstats.v_interrupt_free_min) 1896 ) { 1897 /* 1898 * The free queue has sufficient free pages to take one out. 1899 */ 1900 m = vm_page_select_free(pg_color); 1901 } else if (page_req & VM_ALLOC_NORMAL) { 1902 /* 1903 * Allocatable from the cache (non-interrupt only). On 1904 * success, we must free the page and try again, thus 1905 * ensuring that vmstats.v_*_free_min counters are replenished. 1906 */ 1907 #ifdef INVARIANTS 1908 if (curthread->td_preempted) { 1909 kprintf("vm_page_alloc(): warning, attempt to allocate" 1910 " cache page from preempting interrupt\n"); 1911 m = NULL; 1912 } else { 1913 m = vm_page_select_cache(pg_color); 1914 } 1915 #else 1916 m = vm_page_select_cache(pg_color); 1917 #endif 1918 /* 1919 * On success move the page into the free queue and loop. 1920 * 1921 * Only do this if we can safely acquire the vm_object lock, 1922 * because this is effectively a random page and the caller 1923 * might be holding the lock shared, we don't want to 1924 * deadlock. 1925 */ 1926 if (m != NULL) { 1927 KASSERT(m->dirty == 0, 1928 ("Found dirty cache page %p", m)); 1929 if ((obj = m->object) != NULL) { 1930 if (vm_object_hold_try(obj)) { 1931 vm_page_protect(m, VM_PROT_NONE); 1932 vm_page_free(m); 1933 /* m->object NULL here */ 1934 vm_object_drop(obj); 1935 } else { 1936 vm_page_deactivate(m); 1937 vm_page_wakeup(m); 1938 } 1939 } else { 1940 vm_page_protect(m, VM_PROT_NONE); 1941 vm_page_free(m); 1942 } 1943 goto loop; 1944 } 1945 1946 /* 1947 * On failure return NULL 1948 */ 1949 atomic_add_int(&vm_pageout_deficit, 1); 1950 pagedaemon_wakeup(); 1951 return (NULL); 1952 } else { 1953 /* 1954 * No pages available, wakeup the pageout daemon and give up. 1955 */ 1956 atomic_add_int(&vm_pageout_deficit, 1); 1957 pagedaemon_wakeup(); 1958 return (NULL); 1959 } 1960 1961 /* 1962 * v_free_count can race so loop if we don't find the expected 1963 * page. 1964 */ 1965 if (m == NULL) { 1966 vmstats_rollup(); 1967 goto loop; 1968 } 1969 1970 /* 1971 * Good page found. The page has already been busied for us and 1972 * removed from its queues. 1973 */ 1974 KASSERT(m->dirty == 0, 1975 ("vm_page_alloc: free/cache page %p was dirty", m)); 1976 KKASSERT(m->queue == PQ_NONE); 1977 1978 #if 0 1979 done: 1980 #endif 1981 /* 1982 * Initialize the structure, inheriting some flags but clearing 1983 * all the rest. The page has already been busied for us. 1984 */ 1985 vm_page_flag_clear(m, ~PG_KEEP_NEWPAGE_MASK); 1986 1987 KKASSERT(m->wire_count == 0); 1988 KKASSERT(m->busy == 0); 1989 m->act_count = 0; 1990 m->valid = 0; 1991 1992 /* 1993 * Caller must be holding the object lock (asserted by 1994 * vm_page_insert()). 1995 * 1996 * NOTE: Inserting a page here does not insert it into any pmaps 1997 * (which could cause us to block allocating memory). 1998 * 1999 * NOTE: If no object an unassociated page is allocated, m->pindex 2000 * can be used by the caller for any purpose. 2001 */ 2002 if (object) { 2003 if (vm_page_insert(m, object, pindex) == FALSE) { 2004 vm_page_free(m); 2005 if ((page_req & VM_ALLOC_NULL_OK) == 0) 2006 panic("PAGE RACE %p[%ld]/%p", 2007 object, (long)pindex, m); 2008 m = NULL; 2009 } 2010 } else { 2011 m->pindex = pindex; 2012 } 2013 2014 /* 2015 * Don't wakeup too often - wakeup the pageout daemon when 2016 * we would be nearly out of memory. 2017 */ 2018 pagedaemon_wakeup(); 2019 2020 /* 2021 * A PG_BUSY page is returned. 2022 */ 2023 return (m); 2024 } 2025 2026 /* 2027 * Returns number of pages available in our DMA memory reserve 2028 * (adjusted with vm.dma_reserved=<value>m in /boot/loader.conf) 2029 */ 2030 vm_size_t 2031 vm_contig_avail_pages(void) 2032 { 2033 alist_blk_t blk; 2034 alist_blk_t count; 2035 alist_blk_t bfree; 2036 spin_lock(&vm_contig_spin); 2037 bfree = alist_free_info(&vm_contig_alist, &blk, &count); 2038 spin_unlock(&vm_contig_spin); 2039 2040 return bfree; 2041 } 2042 2043 /* 2044 * Attempt to allocate contiguous physical memory with the specified 2045 * requirements. 2046 */ 2047 vm_page_t 2048 vm_page_alloc_contig(vm_paddr_t low, vm_paddr_t high, 2049 unsigned long alignment, unsigned long boundary, 2050 unsigned long size, vm_memattr_t memattr) 2051 { 2052 alist_blk_t blk; 2053 vm_page_t m; 2054 int i; 2055 2056 alignment >>= PAGE_SHIFT; 2057 if (alignment == 0) 2058 alignment = 1; 2059 boundary >>= PAGE_SHIFT; 2060 if (boundary == 0) 2061 boundary = 1; 2062 size = (size + PAGE_MASK) >> PAGE_SHIFT; 2063 2064 spin_lock(&vm_contig_spin); 2065 blk = alist_alloc(&vm_contig_alist, 0, size); 2066 if (blk == ALIST_BLOCK_NONE) { 2067 spin_unlock(&vm_contig_spin); 2068 if (bootverbose) { 2069 kprintf("vm_page_alloc_contig: %ldk nospace\n", 2070 (size + PAGE_MASK) * (PAGE_SIZE / 1024)); 2071 } 2072 return(NULL); 2073 } 2074 if (high && ((vm_paddr_t)(blk + size) << PAGE_SHIFT) > high) { 2075 alist_free(&vm_contig_alist, blk, size); 2076 spin_unlock(&vm_contig_spin); 2077 if (bootverbose) { 2078 kprintf("vm_page_alloc_contig: %ldk high " 2079 "%016jx failed\n", 2080 (size + PAGE_MASK) * (PAGE_SIZE / 1024), 2081 (intmax_t)high); 2082 } 2083 return(NULL); 2084 } 2085 spin_unlock(&vm_contig_spin); 2086 if (vm_contig_verbose) { 2087 kprintf("vm_page_alloc_contig: %016jx/%ldk\n", 2088 (intmax_t)(vm_paddr_t)blk << PAGE_SHIFT, 2089 (size + PAGE_MASK) * (PAGE_SIZE / 1024)); 2090 } 2091 2092 m = PHYS_TO_VM_PAGE((vm_paddr_t)blk << PAGE_SHIFT); 2093 if (memattr != VM_MEMATTR_DEFAULT) 2094 for (i = 0;i < size;i++) 2095 pmap_page_set_memattr(&m[i], memattr); 2096 return m; 2097 } 2098 2099 /* 2100 * Free contiguously allocated pages. The pages will be wired but not busy. 2101 * When freeing to the alist we leave them wired and not busy. 2102 */ 2103 void 2104 vm_page_free_contig(vm_page_t m, unsigned long size) 2105 { 2106 vm_paddr_t pa = VM_PAGE_TO_PHYS(m); 2107 vm_pindex_t start = pa >> PAGE_SHIFT; 2108 vm_pindex_t pages = (size + PAGE_MASK) >> PAGE_SHIFT; 2109 2110 if (vm_contig_verbose) { 2111 kprintf("vm_page_free_contig: %016jx/%ldk\n", 2112 (intmax_t)pa, size / 1024); 2113 } 2114 if (pa < vm_low_phys_reserved) { 2115 KKASSERT(pa + size <= vm_low_phys_reserved); 2116 spin_lock(&vm_contig_spin); 2117 alist_free(&vm_contig_alist, start, pages); 2118 spin_unlock(&vm_contig_spin); 2119 } else { 2120 while (pages) { 2121 vm_page_busy_wait(m, FALSE, "cpgfr"); 2122 vm_page_unwire(m, 0); 2123 vm_page_free(m); 2124 --pages; 2125 ++m; 2126 } 2127 2128 } 2129 } 2130 2131 2132 /* 2133 * Wait for sufficient free memory for nominal heavy memory use kernel 2134 * operations. 2135 * 2136 * WARNING! Be sure never to call this in any vm_pageout code path, which 2137 * will trivially deadlock the system. 2138 */ 2139 void 2140 vm_wait_nominal(void) 2141 { 2142 while (vm_page_count_min(0)) 2143 vm_wait(0); 2144 } 2145 2146 /* 2147 * Test if vm_wait_nominal() would block. 2148 */ 2149 int 2150 vm_test_nominal(void) 2151 { 2152 if (vm_page_count_min(0)) 2153 return(1); 2154 return(0); 2155 } 2156 2157 /* 2158 * Block until free pages are available for allocation, called in various 2159 * places before memory allocations. 2160 * 2161 * The caller may loop if vm_page_count_min() == FALSE so we cannot be 2162 * more generous then that. 2163 */ 2164 void 2165 vm_wait(int timo) 2166 { 2167 /* 2168 * never wait forever 2169 */ 2170 if (timo == 0) 2171 timo = hz; 2172 lwkt_gettoken(&vm_token); 2173 2174 if (curthread == pagethread) { 2175 /* 2176 * The pageout daemon itself needs pages, this is bad. 2177 */ 2178 if (vm_page_count_min(0)) { 2179 vm_pageout_pages_needed = 1; 2180 tsleep(&vm_pageout_pages_needed, 0, "VMWait", timo); 2181 } 2182 } else { 2183 /* 2184 * Wakeup the pageout daemon if necessary and wait. 2185 * 2186 * Do not wait indefinitely for the target to be reached, 2187 * as load might prevent it from being reached any time soon. 2188 * But wait a little to try to slow down page allocations 2189 * and to give more important threads (the pagedaemon) 2190 * allocation priority. 2191 */ 2192 if (vm_page_count_target()) { 2193 if (vm_pages_needed == 0) { 2194 vm_pages_needed = 1; 2195 wakeup(&vm_pages_needed); 2196 } 2197 ++vm_pages_waiting; /* SMP race ok */ 2198 tsleep(&vmstats.v_free_count, 0, "vmwait", timo); 2199 } 2200 } 2201 lwkt_reltoken(&vm_token); 2202 } 2203 2204 /* 2205 * Block until free pages are available for allocation 2206 * 2207 * Called only from vm_fault so that processes page faulting can be 2208 * easily tracked. 2209 */ 2210 void 2211 vm_wait_pfault(void) 2212 { 2213 /* 2214 * Wakeup the pageout daemon if necessary and wait. 2215 * 2216 * Do not wait indefinitely for the target to be reached, 2217 * as load might prevent it from being reached any time soon. 2218 * But wait a little to try to slow down page allocations 2219 * and to give more important threads (the pagedaemon) 2220 * allocation priority. 2221 */ 2222 if (vm_page_count_min(0)) { 2223 lwkt_gettoken(&vm_token); 2224 while (vm_page_count_severe()) { 2225 if (vm_page_count_target()) { 2226 thread_t td; 2227 2228 if (vm_pages_needed == 0) { 2229 vm_pages_needed = 1; 2230 wakeup(&vm_pages_needed); 2231 } 2232 ++vm_pages_waiting; /* SMP race ok */ 2233 tsleep(&vmstats.v_free_count, 0, "pfault", hz); 2234 2235 /* 2236 * Do not stay stuck in the loop if the system is trying 2237 * to kill the process. 2238 */ 2239 td = curthread; 2240 if (td->td_proc && (td->td_proc->p_flags & P_LOWMEMKILL)) 2241 break; 2242 } 2243 } 2244 lwkt_reltoken(&vm_token); 2245 } 2246 } 2247 2248 /* 2249 * Put the specified page on the active list (if appropriate). Ensure 2250 * that act_count is at least ACT_INIT but do not otherwise mess with it. 2251 * 2252 * The caller should be holding the page busied ? XXX 2253 * This routine may not block. 2254 */ 2255 void 2256 vm_page_activate(vm_page_t m) 2257 { 2258 u_short oqueue; 2259 2260 vm_page_spin_lock(m); 2261 if (m->queue - m->pc != PQ_ACTIVE) { 2262 _vm_page_queue_spin_lock(m); 2263 oqueue = _vm_page_rem_queue_spinlocked(m); 2264 /* page is left spinlocked, queue is unlocked */ 2265 2266 if (oqueue == PQ_CACHE) 2267 mycpu->gd_cnt.v_reactivated++; 2268 if (m->wire_count == 0 && (m->flags & PG_UNMANAGED) == 0) { 2269 if (m->act_count < ACT_INIT) 2270 m->act_count = ACT_INIT; 2271 _vm_page_add_queue_spinlocked(m, PQ_ACTIVE + m->pc, 0); 2272 } 2273 _vm_page_and_queue_spin_unlock(m); 2274 if (oqueue == PQ_CACHE || oqueue == PQ_FREE) 2275 pagedaemon_wakeup(); 2276 } else { 2277 if (m->act_count < ACT_INIT) 2278 m->act_count = ACT_INIT; 2279 vm_page_spin_unlock(m); 2280 } 2281 } 2282 2283 /* 2284 * Helper routine for vm_page_free_toq() and vm_page_cache(). This 2285 * routine is called when a page has been added to the cache or free 2286 * queues. 2287 * 2288 * This routine may not block. 2289 */ 2290 static __inline void 2291 vm_page_free_wakeup(void) 2292 { 2293 globaldata_t gd = mycpu; 2294 2295 /* 2296 * If the pageout daemon itself needs pages, then tell it that 2297 * there are some free. 2298 */ 2299 if (vm_pageout_pages_needed && 2300 gd->gd_vmstats.v_cache_count + gd->gd_vmstats.v_free_count >= 2301 gd->gd_vmstats.v_pageout_free_min 2302 ) { 2303 vm_pageout_pages_needed = 0; 2304 wakeup(&vm_pageout_pages_needed); 2305 } 2306 2307 /* 2308 * Wakeup processes that are waiting on memory. 2309 * 2310 * Generally speaking we want to wakeup stuck processes as soon as 2311 * possible. !vm_page_count_min(0) is the absolute minimum point 2312 * where we can do this. Wait a bit longer to reduce degenerate 2313 * re-blocking (vm_page_free_hysteresis). The target check is just 2314 * to make sure the min-check w/hysteresis does not exceed the 2315 * normal target. 2316 */ 2317 if (vm_pages_waiting) { 2318 if (!vm_page_count_min(vm_page_free_hysteresis) || 2319 !vm_page_count_target()) { 2320 vm_pages_waiting = 0; 2321 wakeup(&vmstats.v_free_count); 2322 ++mycpu->gd_cnt.v_ppwakeups; 2323 } 2324 #if 0 2325 if (!vm_page_count_target()) { 2326 /* 2327 * Plenty of pages are free, wakeup everyone. 2328 */ 2329 vm_pages_waiting = 0; 2330 wakeup(&vmstats.v_free_count); 2331 ++mycpu->gd_cnt.v_ppwakeups; 2332 } else if (!vm_page_count_min(0)) { 2333 /* 2334 * Some pages are free, wakeup someone. 2335 */ 2336 int wcount = vm_pages_waiting; 2337 if (wcount > 0) 2338 --wcount; 2339 vm_pages_waiting = wcount; 2340 wakeup_one(&vmstats.v_free_count); 2341 ++mycpu->gd_cnt.v_ppwakeups; 2342 } 2343 #endif 2344 } 2345 } 2346 2347 /* 2348 * Returns the given page to the PQ_FREE or PQ_HOLD list and disassociates 2349 * it from its VM object. 2350 * 2351 * The vm_page must be PG_BUSY on entry. PG_BUSY will be released on 2352 * return (the page will have been freed). 2353 */ 2354 void 2355 vm_page_free_toq(vm_page_t m) 2356 { 2357 mycpu->gd_cnt.v_tfree++; 2358 KKASSERT((m->flags & PG_MAPPED) == 0); 2359 KKASSERT(m->flags & PG_BUSY); 2360 2361 if (m->busy || ((m->queue - m->pc) == PQ_FREE)) { 2362 kprintf("vm_page_free: pindex(%lu), busy(%d), " 2363 "PG_BUSY(%d), hold(%d)\n", 2364 (u_long)m->pindex, m->busy, 2365 ((m->flags & PG_BUSY) ? 1 : 0), m->hold_count); 2366 if ((m->queue - m->pc) == PQ_FREE) 2367 panic("vm_page_free: freeing free page"); 2368 else 2369 panic("vm_page_free: freeing busy page"); 2370 } 2371 2372 /* 2373 * Remove from object, spinlock the page and its queues and 2374 * remove from any queue. No queue spinlock will be held 2375 * after this section (because the page was removed from any 2376 * queue). 2377 */ 2378 vm_page_remove(m); 2379 vm_page_and_queue_spin_lock(m); 2380 _vm_page_rem_queue_spinlocked(m); 2381 2382 /* 2383 * No further management of fictitious pages occurs beyond object 2384 * and queue removal. 2385 */ 2386 if ((m->flags & PG_FICTITIOUS) != 0) { 2387 vm_page_spin_unlock(m); 2388 vm_page_wakeup(m); 2389 return; 2390 } 2391 2392 m->valid = 0; 2393 vm_page_undirty(m); 2394 2395 if (m->wire_count != 0) { 2396 if (m->wire_count > 1) { 2397 panic( 2398 "vm_page_free: invalid wire count (%d), pindex: 0x%lx", 2399 m->wire_count, (long)m->pindex); 2400 } 2401 panic("vm_page_free: freeing wired page"); 2402 } 2403 2404 /* 2405 * Clear the UNMANAGED flag when freeing an unmanaged page. 2406 * Clear the NEED_COMMIT flag 2407 */ 2408 if (m->flags & PG_UNMANAGED) 2409 vm_page_flag_clear(m, PG_UNMANAGED); 2410 if (m->flags & PG_NEED_COMMIT) 2411 vm_page_flag_clear(m, PG_NEED_COMMIT); 2412 2413 if (m->hold_count != 0) { 2414 _vm_page_add_queue_spinlocked(m, PQ_HOLD + m->pc, 0); 2415 } else { 2416 _vm_page_add_queue_spinlocked(m, PQ_FREE + m->pc, 1); 2417 } 2418 2419 /* 2420 * This sequence allows us to clear PG_BUSY while still holding 2421 * its spin lock, which reduces contention vs allocators. We 2422 * must not leave the queue locked or _vm_page_wakeup() may 2423 * deadlock. 2424 */ 2425 _vm_page_queue_spin_unlock(m); 2426 if (_vm_page_wakeup(m)) { 2427 vm_page_spin_unlock(m); 2428 wakeup(m); 2429 } else { 2430 vm_page_spin_unlock(m); 2431 } 2432 vm_page_free_wakeup(); 2433 } 2434 2435 /* 2436 * vm_page_unmanage() 2437 * 2438 * Prevent PV management from being done on the page. The page is 2439 * removed from the paging queues as if it were wired, and as a 2440 * consequence of no longer being managed the pageout daemon will not 2441 * touch it (since there is no way to locate the pte mappings for the 2442 * page). madvise() calls that mess with the pmap will also no longer 2443 * operate on the page. 2444 * 2445 * Beyond that the page is still reasonably 'normal'. Freeing the page 2446 * will clear the flag. 2447 * 2448 * This routine is used by OBJT_PHYS objects - objects using unswappable 2449 * physical memory as backing store rather then swap-backed memory and 2450 * will eventually be extended to support 4MB unmanaged physical 2451 * mappings. 2452 * 2453 * Caller must be holding the page busy. 2454 */ 2455 void 2456 vm_page_unmanage(vm_page_t m) 2457 { 2458 KKASSERT(m->flags & PG_BUSY); 2459 if ((m->flags & PG_UNMANAGED) == 0) { 2460 if (m->wire_count == 0) 2461 vm_page_unqueue(m); 2462 } 2463 vm_page_flag_set(m, PG_UNMANAGED); 2464 } 2465 2466 /* 2467 * Mark this page as wired down by yet another map, removing it from 2468 * paging queues as necessary. 2469 * 2470 * Caller must be holding the page busy. 2471 */ 2472 void 2473 vm_page_wire(vm_page_t m) 2474 { 2475 /* 2476 * Only bump the wire statistics if the page is not already wired, 2477 * and only unqueue the page if it is on some queue (if it is unmanaged 2478 * it is already off the queues). Don't do anything with fictitious 2479 * pages because they are always wired. 2480 */ 2481 KKASSERT(m->flags & PG_BUSY); 2482 if ((m->flags & PG_FICTITIOUS) == 0) { 2483 if (atomic_fetchadd_int(&m->wire_count, 1) == 0) { 2484 if ((m->flags & PG_UNMANAGED) == 0) 2485 vm_page_unqueue(m); 2486 atomic_add_int(&mycpu->gd_vmstats_adj.v_wire_count, 1); 2487 } 2488 KASSERT(m->wire_count != 0, 2489 ("vm_page_wire: wire_count overflow m=%p", m)); 2490 } 2491 } 2492 2493 /* 2494 * Release one wiring of this page, potentially enabling it to be paged again. 2495 * 2496 * Many pages placed on the inactive queue should actually go 2497 * into the cache, but it is difficult to figure out which. What 2498 * we do instead, if the inactive target is well met, is to put 2499 * clean pages at the head of the inactive queue instead of the tail. 2500 * This will cause them to be moved to the cache more quickly and 2501 * if not actively re-referenced, freed more quickly. If we just 2502 * stick these pages at the end of the inactive queue, heavy filesystem 2503 * meta-data accesses can cause an unnecessary paging load on memory bound 2504 * processes. This optimization causes one-time-use metadata to be 2505 * reused more quickly. 2506 * 2507 * Pages marked PG_NEED_COMMIT are always activated and never placed on 2508 * the inactive queue. This helps the pageout daemon determine memory 2509 * pressure and act on out-of-memory situations more quickly. 2510 * 2511 * BUT, if we are in a low-memory situation we have no choice but to 2512 * put clean pages on the cache queue. 2513 * 2514 * A number of routines use vm_page_unwire() to guarantee that the page 2515 * will go into either the inactive or active queues, and will NEVER 2516 * be placed in the cache - for example, just after dirtying a page. 2517 * dirty pages in the cache are not allowed. 2518 * 2519 * This routine may not block. 2520 */ 2521 void 2522 vm_page_unwire(vm_page_t m, int activate) 2523 { 2524 KKASSERT(m->flags & PG_BUSY); 2525 if (m->flags & PG_FICTITIOUS) { 2526 /* do nothing */ 2527 } else if (m->wire_count <= 0) { 2528 panic("vm_page_unwire: invalid wire count: %d", m->wire_count); 2529 } else { 2530 if (atomic_fetchadd_int(&m->wire_count, -1) == 1) { 2531 atomic_add_int(&mycpu->gd_vmstats_adj.v_wire_count, -1); 2532 if (m->flags & PG_UNMANAGED) { 2533 ; 2534 } else if (activate || (m->flags & PG_NEED_COMMIT)) { 2535 vm_page_spin_lock(m); 2536 _vm_page_add_queue_spinlocked(m, 2537 PQ_ACTIVE + m->pc, 0); 2538 _vm_page_and_queue_spin_unlock(m); 2539 } else { 2540 vm_page_spin_lock(m); 2541 vm_page_flag_clear(m, PG_WINATCFLS); 2542 _vm_page_add_queue_spinlocked(m, 2543 PQ_INACTIVE + m->pc, 0); 2544 ++vm_swapcache_inactive_heuristic; 2545 _vm_page_and_queue_spin_unlock(m); 2546 } 2547 } 2548 } 2549 } 2550 2551 /* 2552 * Move the specified page to the inactive queue. If the page has 2553 * any associated swap, the swap is deallocated. 2554 * 2555 * Normally athead is 0 resulting in LRU operation. athead is set 2556 * to 1 if we want this page to be 'as if it were placed in the cache', 2557 * except without unmapping it from the process address space. 2558 * 2559 * vm_page's spinlock must be held on entry and will remain held on return. 2560 * This routine may not block. 2561 */ 2562 static void 2563 _vm_page_deactivate_locked(vm_page_t m, int athead) 2564 { 2565 u_short oqueue; 2566 2567 /* 2568 * Ignore if already inactive. 2569 */ 2570 if (m->queue - m->pc == PQ_INACTIVE) 2571 return; 2572 _vm_page_queue_spin_lock(m); 2573 oqueue = _vm_page_rem_queue_spinlocked(m); 2574 2575 if (m->wire_count == 0 && (m->flags & PG_UNMANAGED) == 0) { 2576 if (oqueue == PQ_CACHE) 2577 mycpu->gd_cnt.v_reactivated++; 2578 vm_page_flag_clear(m, PG_WINATCFLS); 2579 _vm_page_add_queue_spinlocked(m, PQ_INACTIVE + m->pc, athead); 2580 if (athead == 0) 2581 ++vm_swapcache_inactive_heuristic; 2582 } 2583 /* NOTE: PQ_NONE if condition not taken */ 2584 _vm_page_queue_spin_unlock(m); 2585 /* leaves vm_page spinlocked */ 2586 } 2587 2588 /* 2589 * Attempt to deactivate a page. 2590 * 2591 * No requirements. 2592 */ 2593 void 2594 vm_page_deactivate(vm_page_t m) 2595 { 2596 vm_page_spin_lock(m); 2597 _vm_page_deactivate_locked(m, 0); 2598 vm_page_spin_unlock(m); 2599 } 2600 2601 void 2602 vm_page_deactivate_locked(vm_page_t m) 2603 { 2604 _vm_page_deactivate_locked(m, 0); 2605 } 2606 2607 /* 2608 * Attempt to move a busied page to PQ_CACHE, then unconditionally unbusy it. 2609 * 2610 * This function returns non-zero if it successfully moved the page to 2611 * PQ_CACHE. 2612 * 2613 * This function unconditionally unbusies the page on return. 2614 */ 2615 int 2616 vm_page_try_to_cache(vm_page_t m) 2617 { 2618 vm_page_spin_lock(m); 2619 if (m->dirty || m->hold_count || m->wire_count || 2620 (m->flags & (PG_UNMANAGED | PG_NEED_COMMIT))) { 2621 if (_vm_page_wakeup(m)) { 2622 vm_page_spin_unlock(m); 2623 wakeup(m); 2624 } else { 2625 vm_page_spin_unlock(m); 2626 } 2627 return(0); 2628 } 2629 vm_page_spin_unlock(m); 2630 2631 /* 2632 * Page busied by us and no longer spinlocked. Dirty pages cannot 2633 * be moved to the cache. 2634 */ 2635 vm_page_test_dirty(m); 2636 if (m->dirty || (m->flags & PG_NEED_COMMIT)) { 2637 vm_page_wakeup(m); 2638 return(0); 2639 } 2640 vm_page_cache(m); 2641 return(1); 2642 } 2643 2644 /* 2645 * Attempt to free the page. If we cannot free it, we do nothing. 2646 * 1 is returned on success, 0 on failure. 2647 * 2648 * No requirements. 2649 */ 2650 int 2651 vm_page_try_to_free(vm_page_t m) 2652 { 2653 vm_page_spin_lock(m); 2654 if (vm_page_busy_try(m, TRUE)) { 2655 vm_page_spin_unlock(m); 2656 return(0); 2657 } 2658 2659 /* 2660 * The page can be in any state, including already being on the free 2661 * queue. Check to see if it really can be freed. 2662 */ 2663 if (m->dirty || /* can't free if it is dirty */ 2664 m->hold_count || /* or held (XXX may be wrong) */ 2665 m->wire_count || /* or wired */ 2666 (m->flags & (PG_UNMANAGED | /* or unmanaged */ 2667 PG_NEED_COMMIT)) || /* or needs a commit */ 2668 m->queue - m->pc == PQ_FREE || /* already on PQ_FREE */ 2669 m->queue - m->pc == PQ_HOLD) { /* already on PQ_HOLD */ 2670 if (_vm_page_wakeup(m)) { 2671 vm_page_spin_unlock(m); 2672 wakeup(m); 2673 } else { 2674 vm_page_spin_unlock(m); 2675 } 2676 return(0); 2677 } 2678 vm_page_spin_unlock(m); 2679 2680 /* 2681 * We can probably free the page. 2682 * 2683 * Page busied by us and no longer spinlocked. Dirty pages will 2684 * not be freed by this function. We have to re-test the 2685 * dirty bit after cleaning out the pmaps. 2686 */ 2687 vm_page_test_dirty(m); 2688 if (m->dirty || (m->flags & PG_NEED_COMMIT)) { 2689 vm_page_wakeup(m); 2690 return(0); 2691 } 2692 vm_page_protect(m, VM_PROT_NONE); 2693 if (m->dirty || (m->flags & PG_NEED_COMMIT)) { 2694 vm_page_wakeup(m); 2695 return(0); 2696 } 2697 vm_page_free(m); 2698 return(1); 2699 } 2700 2701 /* 2702 * vm_page_cache 2703 * 2704 * Put the specified page onto the page cache queue (if appropriate). 2705 * 2706 * The page must be busy, and this routine will release the busy and 2707 * possibly even free the page. 2708 */ 2709 void 2710 vm_page_cache(vm_page_t m) 2711 { 2712 /* 2713 * Not suitable for the cache 2714 */ 2715 if ((m->flags & (PG_UNMANAGED | PG_NEED_COMMIT)) || 2716 m->busy || m->wire_count || m->hold_count) { 2717 vm_page_wakeup(m); 2718 return; 2719 } 2720 2721 /* 2722 * Already in the cache (and thus not mapped) 2723 */ 2724 if ((m->queue - m->pc) == PQ_CACHE) { 2725 KKASSERT((m->flags & PG_MAPPED) == 0); 2726 vm_page_wakeup(m); 2727 return; 2728 } 2729 2730 /* 2731 * Caller is required to test m->dirty, but note that the act of 2732 * removing the page from its maps can cause it to become dirty 2733 * on an SMP system due to another cpu running in usermode. 2734 */ 2735 if (m->dirty) { 2736 panic("vm_page_cache: caching a dirty page, pindex: %ld", 2737 (long)m->pindex); 2738 } 2739 2740 /* 2741 * Remove all pmaps and indicate that the page is not 2742 * writeable or mapped. Our vm_page_protect() call may 2743 * have blocked (especially w/ VM_PROT_NONE), so recheck 2744 * everything. 2745 */ 2746 vm_page_protect(m, VM_PROT_NONE); 2747 if ((m->flags & (PG_UNMANAGED | PG_MAPPED)) || 2748 m->busy || m->wire_count || m->hold_count) { 2749 vm_page_wakeup(m); 2750 } else if (m->dirty || (m->flags & PG_NEED_COMMIT)) { 2751 vm_page_deactivate(m); 2752 vm_page_wakeup(m); 2753 } else { 2754 _vm_page_and_queue_spin_lock(m); 2755 _vm_page_rem_queue_spinlocked(m); 2756 _vm_page_add_queue_spinlocked(m, PQ_CACHE + m->pc, 0); 2757 _vm_page_queue_spin_unlock(m); 2758 if (_vm_page_wakeup(m)) { 2759 vm_page_spin_unlock(m); 2760 wakeup(m); 2761 } else { 2762 vm_page_spin_unlock(m); 2763 } 2764 vm_page_free_wakeup(); 2765 } 2766 } 2767 2768 /* 2769 * vm_page_dontneed() 2770 * 2771 * Cache, deactivate, or do nothing as appropriate. This routine 2772 * is typically used by madvise() MADV_DONTNEED. 2773 * 2774 * Generally speaking we want to move the page into the cache so 2775 * it gets reused quickly. However, this can result in a silly syndrome 2776 * due to the page recycling too quickly. Small objects will not be 2777 * fully cached. On the otherhand, if we move the page to the inactive 2778 * queue we wind up with a problem whereby very large objects 2779 * unnecessarily blow away our inactive and cache queues. 2780 * 2781 * The solution is to move the pages based on a fixed weighting. We 2782 * either leave them alone, deactivate them, or move them to the cache, 2783 * where moving them to the cache has the highest weighting. 2784 * By forcing some pages into other queues we eventually force the 2785 * system to balance the queues, potentially recovering other unrelated 2786 * space from active. The idea is to not force this to happen too 2787 * often. 2788 * 2789 * The page must be busied. 2790 */ 2791 void 2792 vm_page_dontneed(vm_page_t m) 2793 { 2794 static int dnweight; 2795 int dnw; 2796 int head; 2797 2798 dnw = ++dnweight; 2799 2800 /* 2801 * occassionally leave the page alone 2802 */ 2803 if ((dnw & 0x01F0) == 0 || 2804 m->queue - m->pc == PQ_INACTIVE || 2805 m->queue - m->pc == PQ_CACHE 2806 ) { 2807 if (m->act_count >= ACT_INIT) 2808 --m->act_count; 2809 return; 2810 } 2811 2812 /* 2813 * If vm_page_dontneed() is inactivating a page, it must clear 2814 * the referenced flag; otherwise the pagedaemon will see references 2815 * on the page in the inactive queue and reactivate it. Until the 2816 * page can move to the cache queue, madvise's job is not done. 2817 */ 2818 vm_page_flag_clear(m, PG_REFERENCED); 2819 pmap_clear_reference(m); 2820 2821 if (m->dirty == 0) 2822 vm_page_test_dirty(m); 2823 2824 if (m->dirty || (dnw & 0x0070) == 0) { 2825 /* 2826 * Deactivate the page 3 times out of 32. 2827 */ 2828 head = 0; 2829 } else { 2830 /* 2831 * Cache the page 28 times out of every 32. Note that 2832 * the page is deactivated instead of cached, but placed 2833 * at the head of the queue instead of the tail. 2834 */ 2835 head = 1; 2836 } 2837 vm_page_spin_lock(m); 2838 _vm_page_deactivate_locked(m, head); 2839 vm_page_spin_unlock(m); 2840 } 2841 2842 /* 2843 * These routines manipulate the 'soft busy' count for a page. A soft busy 2844 * is almost like PG_BUSY except that it allows certain compatible operations 2845 * to occur on the page while it is busy. For example, a page undergoing a 2846 * write can still be mapped read-only. 2847 * 2848 * Because vm_pages can overlap buffers m->busy can be > 1. m->busy is only 2849 * adjusted while the vm_page is PG_BUSY so the flash will occur when the 2850 * busy bit is cleared. 2851 * 2852 * The caller must hold the page BUSY when making these two calls. 2853 */ 2854 void 2855 vm_page_io_start(vm_page_t m) 2856 { 2857 KASSERT(m->flags & PG_BUSY, ("vm_page_io_start: page not busy!!!")); 2858 atomic_add_char(&m->busy, 1); 2859 vm_page_flag_set(m, PG_SBUSY); 2860 } 2861 2862 void 2863 vm_page_io_finish(vm_page_t m) 2864 { 2865 KASSERT(m->flags & PG_BUSY, ("vm_page_io_finish: page not busy!!!")); 2866 atomic_subtract_char(&m->busy, 1); 2867 if (m->busy == 0) 2868 vm_page_flag_clear(m, PG_SBUSY); 2869 } 2870 2871 /* 2872 * Indicate that a clean VM page requires a filesystem commit and cannot 2873 * be reused. Used by tmpfs. 2874 */ 2875 void 2876 vm_page_need_commit(vm_page_t m) 2877 { 2878 vm_page_flag_set(m, PG_NEED_COMMIT); 2879 vm_object_set_writeable_dirty(m->object); 2880 } 2881 2882 void 2883 vm_page_clear_commit(vm_page_t m) 2884 { 2885 vm_page_flag_clear(m, PG_NEED_COMMIT); 2886 } 2887 2888 /* 2889 * Grab a page, blocking if it is busy and allocating a page if necessary. 2890 * A busy page is returned or NULL. The page may or may not be valid and 2891 * might not be on a queue (the caller is responsible for the disposition of 2892 * the page). 2893 * 2894 * If VM_ALLOC_ZERO is specified and the grab must allocate a new page, the 2895 * page will be zero'd and marked valid. 2896 * 2897 * If VM_ALLOC_FORCE_ZERO is specified the page will be zero'd and marked 2898 * valid even if it already exists. 2899 * 2900 * If VM_ALLOC_RETRY is specified this routine will never return NULL. Also 2901 * note that VM_ALLOC_NORMAL must be specified if VM_ALLOC_RETRY is specified. 2902 * VM_ALLOC_NULL_OK is implied when VM_ALLOC_RETRY is specified. 2903 * 2904 * This routine may block, but if VM_ALLOC_RETRY is not set then NULL is 2905 * always returned if we had blocked. 2906 * 2907 * This routine may not be called from an interrupt. 2908 * 2909 * No other requirements. 2910 */ 2911 vm_page_t 2912 vm_page_grab(vm_object_t object, vm_pindex_t pindex, int allocflags) 2913 { 2914 vm_page_t m; 2915 int error; 2916 int shared = 1; 2917 2918 KKASSERT(allocflags & 2919 (VM_ALLOC_NORMAL|VM_ALLOC_INTERRUPT|VM_ALLOC_SYSTEM)); 2920 vm_object_hold_shared(object); 2921 for (;;) { 2922 m = vm_page_lookup_busy_try(object, pindex, TRUE, &error); 2923 if (error) { 2924 vm_page_sleep_busy(m, TRUE, "pgrbwt"); 2925 if ((allocflags & VM_ALLOC_RETRY) == 0) { 2926 m = NULL; 2927 break; 2928 } 2929 /* retry */ 2930 } else if (m == NULL) { 2931 if (shared) { 2932 vm_object_upgrade(object); 2933 shared = 0; 2934 } 2935 if (allocflags & VM_ALLOC_RETRY) 2936 allocflags |= VM_ALLOC_NULL_OK; 2937 m = vm_page_alloc(object, pindex, 2938 allocflags & ~VM_ALLOC_RETRY); 2939 if (m) 2940 break; 2941 vm_wait(0); 2942 if ((allocflags & VM_ALLOC_RETRY) == 0) 2943 goto failed; 2944 } else { 2945 /* m found */ 2946 break; 2947 } 2948 } 2949 2950 /* 2951 * If VM_ALLOC_ZERO an invalid page will be zero'd and set valid. 2952 * 2953 * If VM_ALLOC_FORCE_ZERO the page is unconditionally zero'd and set 2954 * valid even if already valid. 2955 * 2956 * NOTE! We have removed all of the PG_ZERO optimizations and also 2957 * removed the idle zeroing code. These optimizations actually 2958 * slow things down on modern cpus because the zerod area is 2959 * likely uncached, placing a memory-access burden on the 2960 * accesors taking the fault. 2961 * 2962 * By always zeroing the page in-line with the fault, no 2963 * dynamic ram reads are needed and the caches are hot, ready 2964 * for userland to access the memory. 2965 */ 2966 if (m->valid == 0) { 2967 if (allocflags & (VM_ALLOC_ZERO | VM_ALLOC_FORCE_ZERO)) { 2968 pmap_zero_page(VM_PAGE_TO_PHYS(m)); 2969 m->valid = VM_PAGE_BITS_ALL; 2970 } 2971 } else if (allocflags & VM_ALLOC_FORCE_ZERO) { 2972 pmap_zero_page(VM_PAGE_TO_PHYS(m)); 2973 m->valid = VM_PAGE_BITS_ALL; 2974 } 2975 failed: 2976 vm_object_drop(object); 2977 return(m); 2978 } 2979 2980 /* 2981 * Mapping function for valid bits or for dirty bits in 2982 * a page. May not block. 2983 * 2984 * Inputs are required to range within a page. 2985 * 2986 * No requirements. 2987 * Non blocking. 2988 */ 2989 int 2990 vm_page_bits(int base, int size) 2991 { 2992 int first_bit; 2993 int last_bit; 2994 2995 KASSERT( 2996 base + size <= PAGE_SIZE, 2997 ("vm_page_bits: illegal base/size %d/%d", base, size) 2998 ); 2999 3000 if (size == 0) /* handle degenerate case */ 3001 return(0); 3002 3003 first_bit = base >> DEV_BSHIFT; 3004 last_bit = (base + size - 1) >> DEV_BSHIFT; 3005 3006 return ((2 << last_bit) - (1 << first_bit)); 3007 } 3008 3009 /* 3010 * Sets portions of a page valid and clean. The arguments are expected 3011 * to be DEV_BSIZE aligned but if they aren't the bitmap is inclusive 3012 * of any partial chunks touched by the range. The invalid portion of 3013 * such chunks will be zero'd. 3014 * 3015 * NOTE: When truncating a buffer vnode_pager_setsize() will automatically 3016 * align base to DEV_BSIZE so as not to mark clean a partially 3017 * truncated device block. Otherwise the dirty page status might be 3018 * lost. 3019 * 3020 * This routine may not block. 3021 * 3022 * (base + size) must be less then or equal to PAGE_SIZE. 3023 */ 3024 static void 3025 _vm_page_zero_valid(vm_page_t m, int base, int size) 3026 { 3027 int frag; 3028 int endoff; 3029 3030 if (size == 0) /* handle degenerate case */ 3031 return; 3032 3033 /* 3034 * If the base is not DEV_BSIZE aligned and the valid 3035 * bit is clear, we have to zero out a portion of the 3036 * first block. 3037 */ 3038 3039 if ((frag = base & ~(DEV_BSIZE - 1)) != base && 3040 (m->valid & (1 << (base >> DEV_BSHIFT))) == 0 3041 ) { 3042 pmap_zero_page_area( 3043 VM_PAGE_TO_PHYS(m), 3044 frag, 3045 base - frag 3046 ); 3047 } 3048 3049 /* 3050 * If the ending offset is not DEV_BSIZE aligned and the 3051 * valid bit is clear, we have to zero out a portion of 3052 * the last block. 3053 */ 3054 3055 endoff = base + size; 3056 3057 if ((frag = endoff & ~(DEV_BSIZE - 1)) != endoff && 3058 (m->valid & (1 << (endoff >> DEV_BSHIFT))) == 0 3059 ) { 3060 pmap_zero_page_area( 3061 VM_PAGE_TO_PHYS(m), 3062 endoff, 3063 DEV_BSIZE - (endoff & (DEV_BSIZE - 1)) 3064 ); 3065 } 3066 } 3067 3068 /* 3069 * Set valid, clear dirty bits. If validating the entire 3070 * page we can safely clear the pmap modify bit. We also 3071 * use this opportunity to clear the PG_NOSYNC flag. If a process 3072 * takes a write fault on a MAP_NOSYNC memory area the flag will 3073 * be set again. 3074 * 3075 * We set valid bits inclusive of any overlap, but we can only 3076 * clear dirty bits for DEV_BSIZE chunks that are fully within 3077 * the range. 3078 * 3079 * Page must be busied? 3080 * No other requirements. 3081 */ 3082 void 3083 vm_page_set_valid(vm_page_t m, int base, int size) 3084 { 3085 _vm_page_zero_valid(m, base, size); 3086 m->valid |= vm_page_bits(base, size); 3087 } 3088 3089 3090 /* 3091 * Set valid bits and clear dirty bits. 3092 * 3093 * Page must be busied by caller. 3094 * 3095 * NOTE: This function does not clear the pmap modified bit. 3096 * Also note that e.g. NFS may use a byte-granular base 3097 * and size. 3098 * 3099 * No other requirements. 3100 */ 3101 void 3102 vm_page_set_validclean(vm_page_t m, int base, int size) 3103 { 3104 int pagebits; 3105 3106 _vm_page_zero_valid(m, base, size); 3107 pagebits = vm_page_bits(base, size); 3108 m->valid |= pagebits; 3109 m->dirty &= ~pagebits; 3110 if (base == 0 && size == PAGE_SIZE) { 3111 /*pmap_clear_modify(m);*/ 3112 vm_page_flag_clear(m, PG_NOSYNC); 3113 } 3114 } 3115 3116 /* 3117 * Set valid & dirty. Used by buwrite() 3118 * 3119 * Page must be busied by caller. 3120 */ 3121 void 3122 vm_page_set_validdirty(vm_page_t m, int base, int size) 3123 { 3124 int pagebits; 3125 3126 pagebits = vm_page_bits(base, size); 3127 m->valid |= pagebits; 3128 m->dirty |= pagebits; 3129 if (m->object) 3130 vm_object_set_writeable_dirty(m->object); 3131 } 3132 3133 /* 3134 * Clear dirty bits. 3135 * 3136 * NOTE: This function does not clear the pmap modified bit. 3137 * Also note that e.g. NFS may use a byte-granular base 3138 * and size. 3139 * 3140 * Page must be busied? 3141 * No other requirements. 3142 */ 3143 void 3144 vm_page_clear_dirty(vm_page_t m, int base, int size) 3145 { 3146 m->dirty &= ~vm_page_bits(base, size); 3147 if (base == 0 && size == PAGE_SIZE) { 3148 /*pmap_clear_modify(m);*/ 3149 vm_page_flag_clear(m, PG_NOSYNC); 3150 } 3151 } 3152 3153 /* 3154 * Make the page all-dirty. 3155 * 3156 * Also make sure the related object and vnode reflect the fact that the 3157 * object may now contain a dirty page. 3158 * 3159 * Page must be busied? 3160 * No other requirements. 3161 */ 3162 void 3163 vm_page_dirty(vm_page_t m) 3164 { 3165 #ifdef INVARIANTS 3166 int pqtype = m->queue - m->pc; 3167 #endif 3168 KASSERT(pqtype != PQ_CACHE && pqtype != PQ_FREE, 3169 ("vm_page_dirty: page in free/cache queue!")); 3170 if (m->dirty != VM_PAGE_BITS_ALL) { 3171 m->dirty = VM_PAGE_BITS_ALL; 3172 if (m->object) 3173 vm_object_set_writeable_dirty(m->object); 3174 } 3175 } 3176 3177 /* 3178 * Invalidates DEV_BSIZE'd chunks within a page. Both the 3179 * valid and dirty bits for the effected areas are cleared. 3180 * 3181 * Page must be busied? 3182 * Does not block. 3183 * No other requirements. 3184 */ 3185 void 3186 vm_page_set_invalid(vm_page_t m, int base, int size) 3187 { 3188 int bits; 3189 3190 bits = vm_page_bits(base, size); 3191 m->valid &= ~bits; 3192 m->dirty &= ~bits; 3193 atomic_add_int(&m->object->generation, 1); 3194 } 3195 3196 /* 3197 * The kernel assumes that the invalid portions of a page contain 3198 * garbage, but such pages can be mapped into memory by user code. 3199 * When this occurs, we must zero out the non-valid portions of the 3200 * page so user code sees what it expects. 3201 * 3202 * Pages are most often semi-valid when the end of a file is mapped 3203 * into memory and the file's size is not page aligned. 3204 * 3205 * Page must be busied? 3206 * No other requirements. 3207 */ 3208 void 3209 vm_page_zero_invalid(vm_page_t m, boolean_t setvalid) 3210 { 3211 int b; 3212 int i; 3213 3214 /* 3215 * Scan the valid bits looking for invalid sections that 3216 * must be zerod. Invalid sub-DEV_BSIZE'd areas ( where the 3217 * valid bit may be set ) have already been zerod by 3218 * vm_page_set_validclean(). 3219 */ 3220 for (b = i = 0; i <= PAGE_SIZE / DEV_BSIZE; ++i) { 3221 if (i == (PAGE_SIZE / DEV_BSIZE) || 3222 (m->valid & (1 << i)) 3223 ) { 3224 if (i > b) { 3225 pmap_zero_page_area( 3226 VM_PAGE_TO_PHYS(m), 3227 b << DEV_BSHIFT, 3228 (i - b) << DEV_BSHIFT 3229 ); 3230 } 3231 b = i + 1; 3232 } 3233 } 3234 3235 /* 3236 * setvalid is TRUE when we can safely set the zero'd areas 3237 * as being valid. We can do this if there are no cache consistency 3238 * issues. e.g. it is ok to do with UFS, but not ok to do with NFS. 3239 */ 3240 if (setvalid) 3241 m->valid = VM_PAGE_BITS_ALL; 3242 } 3243 3244 /* 3245 * Is a (partial) page valid? Note that the case where size == 0 3246 * will return FALSE in the degenerate case where the page is entirely 3247 * invalid, and TRUE otherwise. 3248 * 3249 * Does not block. 3250 * No other requirements. 3251 */ 3252 int 3253 vm_page_is_valid(vm_page_t m, int base, int size) 3254 { 3255 int bits = vm_page_bits(base, size); 3256 3257 if (m->valid && ((m->valid & bits) == bits)) 3258 return 1; 3259 else 3260 return 0; 3261 } 3262 3263 /* 3264 * update dirty bits from pmap/mmu. May not block. 3265 * 3266 * Caller must hold the page busy 3267 */ 3268 void 3269 vm_page_test_dirty(vm_page_t m) 3270 { 3271 if ((m->dirty != VM_PAGE_BITS_ALL) && pmap_is_modified(m)) { 3272 vm_page_dirty(m); 3273 } 3274 } 3275 3276 /* 3277 * Register an action, associating it with its vm_page 3278 */ 3279 void 3280 vm_page_register_action(vm_page_action_t action, vm_page_event_t event) 3281 { 3282 struct vm_page_action_hash *hash; 3283 int hv; 3284 3285 hv = (int)((intptr_t)action->m >> 8) & vmaction_hmask; 3286 hash = &action_hash[hv]; 3287 3288 lockmgr(&hash->lk, LK_EXCLUSIVE); 3289 vm_page_flag_set(action->m, PG_ACTIONLIST); 3290 action->event = event; 3291 LIST_INSERT_HEAD(&hash->list, action, entry); 3292 lockmgr(&hash->lk, LK_RELEASE); 3293 } 3294 3295 /* 3296 * Unregister an action, disassociating it from its related vm_page 3297 */ 3298 void 3299 vm_page_unregister_action(vm_page_action_t action) 3300 { 3301 struct vm_page_action_hash *hash; 3302 int hv; 3303 3304 hv = (int)((intptr_t)action->m >> 8) & vmaction_hmask; 3305 hash = &action_hash[hv]; 3306 lockmgr(&hash->lk, LK_EXCLUSIVE); 3307 if (action->event != VMEVENT_NONE) { 3308 action->event = VMEVENT_NONE; 3309 LIST_REMOVE(action, entry); 3310 3311 if (LIST_EMPTY(&hash->list)) 3312 vm_page_flag_clear(action->m, PG_ACTIONLIST); 3313 } 3314 lockmgr(&hash->lk, LK_RELEASE); 3315 } 3316 3317 /* 3318 * Issue an event on a VM page. Corresponding action structures are 3319 * removed from the page's list and called. 3320 * 3321 * If the vm_page has no more pending action events we clear its 3322 * PG_ACTIONLIST flag. 3323 */ 3324 void 3325 vm_page_event_internal(vm_page_t m, vm_page_event_t event) 3326 { 3327 struct vm_page_action_hash *hash; 3328 struct vm_page_action *scan; 3329 struct vm_page_action *next; 3330 int hv; 3331 int all; 3332 3333 hv = (int)((intptr_t)m >> 8) & vmaction_hmask; 3334 hash = &action_hash[hv]; 3335 all = 1; 3336 3337 lockmgr(&hash->lk, LK_EXCLUSIVE); 3338 LIST_FOREACH_MUTABLE(scan, &hash->list, entry, next) { 3339 if (scan->m == m) { 3340 if (scan->event == event) { 3341 scan->event = VMEVENT_NONE; 3342 LIST_REMOVE(scan, entry); 3343 scan->func(m, scan); 3344 /* XXX */ 3345 } else { 3346 all = 0; 3347 } 3348 } 3349 } 3350 if (all) 3351 vm_page_flag_clear(m, PG_ACTIONLIST); 3352 lockmgr(&hash->lk, LK_RELEASE); 3353 } 3354 3355 #include "opt_ddb.h" 3356 #ifdef DDB 3357 #include <ddb/ddb.h> 3358 3359 DB_SHOW_COMMAND(page, vm_page_print_page_info) 3360 { 3361 db_printf("vmstats.v_free_count: %d\n", vmstats.v_free_count); 3362 db_printf("vmstats.v_cache_count: %d\n", vmstats.v_cache_count); 3363 db_printf("vmstats.v_inactive_count: %d\n", vmstats.v_inactive_count); 3364 db_printf("vmstats.v_active_count: %d\n", vmstats.v_active_count); 3365 db_printf("vmstats.v_wire_count: %d\n", vmstats.v_wire_count); 3366 db_printf("vmstats.v_free_reserved: %d\n", vmstats.v_free_reserved); 3367 db_printf("vmstats.v_free_min: %d\n", vmstats.v_free_min); 3368 db_printf("vmstats.v_free_target: %d\n", vmstats.v_free_target); 3369 db_printf("vmstats.v_cache_min: %d\n", vmstats.v_cache_min); 3370 db_printf("vmstats.v_inactive_target: %d\n", vmstats.v_inactive_target); 3371 } 3372 3373 DB_SHOW_COMMAND(pageq, vm_page_print_pageq_info) 3374 { 3375 int i; 3376 db_printf("PQ_FREE:"); 3377 for (i = 0; i < PQ_L2_SIZE; i++) { 3378 db_printf(" %d", vm_page_queues[PQ_FREE + i].lcnt); 3379 } 3380 db_printf("\n"); 3381 3382 db_printf("PQ_CACHE:"); 3383 for(i = 0; i < PQ_L2_SIZE; i++) { 3384 db_printf(" %d", vm_page_queues[PQ_CACHE + i].lcnt); 3385 } 3386 db_printf("\n"); 3387 3388 db_printf("PQ_ACTIVE:"); 3389 for(i = 0; i < PQ_L2_SIZE; i++) { 3390 db_printf(" %d", vm_page_queues[PQ_ACTIVE + i].lcnt); 3391 } 3392 db_printf("\n"); 3393 3394 db_printf("PQ_INACTIVE:"); 3395 for(i = 0; i < PQ_L2_SIZE; i++) { 3396 db_printf(" %d", vm_page_queues[PQ_INACTIVE + i].lcnt); 3397 } 3398 db_printf("\n"); 3399 } 3400 #endif /* DDB */ 3401