1 /* 2 * (MPSAFE) 3 * 4 * Copyright (c) 1991, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * The Mach Operating System project at Carnegie-Mellon University. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * from: @(#)vm_pager.c 8.6 (Berkeley) 1/12/94 35 * 36 * 37 * Copyright (c) 1987, 1990 Carnegie-Mellon University. 38 * All rights reserved. 39 * 40 * Authors: Avadis Tevanian, Jr., Michael Wayne Young 41 * 42 * Permission to use, copy, modify and distribute this software and 43 * its documentation is hereby granted, provided that both the copyright 44 * notice and this permission notice appear in all copies of the 45 * software, derivative works or modified versions, and any portions 46 * thereof, and that both notices appear in supporting documentation. 47 * 48 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 49 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 50 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 51 * 52 * Carnegie Mellon requests users of this software to return to 53 * 54 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 55 * School of Computer Science 56 * Carnegie Mellon University 57 * Pittsburgh PA 15213-3890 58 * 59 * any improvements or extensions that they make and grant Carnegie the 60 * rights to redistribute these changes. 61 * 62 * $FreeBSD: src/sys/vm/vm_pager.c,v 1.54.2.2 2001/11/18 07:11:00 dillon Exp $ 63 */ 64 65 /* 66 * Paging space routine stubs. Emulates a matchmaker-like interface 67 * for builtin pagers. 68 */ 69 70 #include <sys/param.h> 71 #include <sys/systm.h> 72 #include <sys/kernel.h> 73 #include <sys/vnode.h> 74 #include <sys/buf.h> 75 #include <sys/ucred.h> 76 #include <sys/dsched.h> 77 #include <sys/proc.h> 78 #include <sys/sysctl.h> 79 #include <sys/thread2.h> 80 81 #include <vm/vm.h> 82 #include <vm/vm_param.h> 83 #include <vm/vm_kern.h> 84 #include <vm/vm_object.h> 85 #include <vm/vm_page.h> 86 #include <vm/vm_pager.h> 87 #include <vm/vm_extern.h> 88 89 #include <sys/buf2.h> 90 #include <vm/vm_page2.h> 91 92 extern struct pagerops defaultpagerops; 93 extern struct pagerops swappagerops; 94 extern struct pagerops vnodepagerops; 95 extern struct pagerops devicepagerops; 96 extern struct pagerops physpagerops; 97 98 int cluster_pbuf_freecnt = -1; /* unlimited to begin with */ 99 100 static int dead_pager_getpage (vm_object_t, vm_page_t *, int); 101 static void dead_pager_putpages (vm_object_t, vm_page_t *, int, int, int *); 102 static boolean_t dead_pager_haspage (vm_object_t, vm_pindex_t); 103 static void dead_pager_dealloc (vm_object_t); 104 105 /* 106 * No requirements. 107 */ 108 static int 109 dead_pager_getpage(vm_object_t obj, vm_page_t *mpp, int seqaccess) 110 { 111 return VM_PAGER_FAIL; 112 } 113 114 /* 115 * No requirements. 116 */ 117 static void 118 dead_pager_putpages(vm_object_t object, vm_page_t *m, int count, int flags, 119 int *rtvals) 120 { 121 int i; 122 123 for (i = 0; i < count; i++) { 124 rtvals[i] = VM_PAGER_AGAIN; 125 } 126 } 127 128 /* 129 * No requirements. 130 */ 131 static boolean_t 132 dead_pager_haspage(vm_object_t object, vm_pindex_t pindex) 133 { 134 return FALSE; 135 } 136 137 /* 138 * No requirements. 139 */ 140 static void 141 dead_pager_dealloc(vm_object_t object) 142 { 143 KKASSERT(object->swblock_count == 0); 144 return; 145 } 146 147 static struct pagerops deadpagerops = { 148 dead_pager_dealloc, 149 dead_pager_getpage, 150 dead_pager_putpages, 151 dead_pager_haspage 152 }; 153 154 struct pagerops *pagertab[] = { 155 &defaultpagerops, /* OBJT_DEFAULT */ 156 &swappagerops, /* OBJT_SWAP */ 157 &vnodepagerops, /* OBJT_VNODE */ 158 &devicepagerops, /* OBJT_DEVICE */ 159 &devicepagerops, /* OBJT_MGTDEVICE */ 160 &physpagerops, /* OBJT_PHYS */ 161 &deadpagerops /* OBJT_DEAD */ 162 }; 163 164 int npagers = NELEM(pagertab); 165 166 /* 167 * Kernel address space for mapping pages. 168 * Used by pagers where KVAs are needed for IO. 169 * 170 * XXX needs to be large enough to support the number of pending async 171 * cleaning requests (NPENDINGIO == 64) * the maximum swap cluster size 172 * (MAXPHYS == 64k) if you want to get the most efficiency. 173 */ 174 #define PAGER_MAP_SIZE (8 * 1024 * 1024) 175 176 #define BSWHSIZE 16 177 #define BSWHMASK (BSWHSIZE - 1) 178 179 TAILQ_HEAD(swqueue, buf); 180 181 int pager_map_size = PAGER_MAP_SIZE; 182 struct vm_map pager_map; 183 184 static vm_offset_t swapbkva_mem; /* swap buffers kva */ 185 static vm_offset_t swapbkva_kva; /* swap buffers kva */ 186 static struct swqueue bswlist_mem[BSWHSIZE]; /* with preallocated memory */ 187 static struct swqueue bswlist_kva[BSWHSIZE]; /* with kva */ 188 static struct swqueue bswlist_raw[BSWHSIZE]; /* without kva */ 189 static struct spinlock bswspin_mem[BSWHSIZE]; 190 static struct spinlock bswspin_kva[BSWHSIZE]; 191 static struct spinlock bswspin_raw[BSWHSIZE]; 192 static int pbuf_raw_count; 193 static int pbuf_kva_count; 194 static int pbuf_mem_count; 195 196 SYSCTL_INT(_vfs, OID_AUTO, pbuf_raw_count, CTLFLAG_RD, &pbuf_raw_count, 0, 197 "Kernel pbuf raw reservations"); 198 SYSCTL_INT(_vfs, OID_AUTO, pbuf_kva_count, CTLFLAG_RD, &pbuf_kva_count, 0, 199 "Kernel pbuf kva reservations"); 200 SYSCTL_INT(_vfs, OID_AUTO, pbuf_mem_count, CTLFLAG_RD, &pbuf_mem_count, 0, 201 "Kernel pbuf mem reservations"); 202 203 /* 204 * Initialize the swap buffer list. 205 * 206 * Called from the low level boot code only. 207 */ 208 static void 209 vm_pager_init(void *arg __unused) 210 { 211 int i; 212 213 for (i = 0; i < BSWHSIZE; ++i) { 214 TAILQ_INIT(&bswlist_mem[i]); 215 TAILQ_INIT(&bswlist_kva[i]); 216 TAILQ_INIT(&bswlist_raw[i]); 217 spin_init(&bswspin_mem[i], "bswmem"); 218 spin_init(&bswspin_kva[i], "bswkva"); 219 spin_init(&bswspin_raw[i], "bswraw"); 220 } 221 } 222 SYSINIT(vm_mem, SI_BOOT1_VM, SI_ORDER_SECOND, vm_pager_init, NULL); 223 224 /* 225 * Called from the low level boot code only. 226 */ 227 static 228 void 229 vm_pager_bufferinit(void *dummy __unused) 230 { 231 struct buf *bp; 232 long i; 233 234 /* 235 * Reserve KVM space for pbuf data. 236 */ 237 swapbkva_mem = kmem_alloc_pageable(&pager_map, nswbuf_mem * MAXPHYS); 238 if (!swapbkva_mem) 239 panic("Not enough pager_map VM space for physical buffers"); 240 swapbkva_kva = kmem_alloc_pageable(&pager_map, nswbuf_kva * MAXPHYS); 241 if (!swapbkva_kva) 242 panic("Not enough pager_map VM space for physical buffers"); 243 244 /* 245 * Initial pbuf setup. 246 * 247 * mem - These pbufs have permanently allocated memory 248 * kva - These pbufs have unallocated kva reservations 249 * raw - These pbufs have no kva reservations 250 */ 251 252 /* 253 * Buffers with pre-allocated kernel memory can be convenient for 254 * copyin/copyout because no SMP page invalidation or other pmap 255 * operations are needed. 256 */ 257 #if 1 258 bp = swbuf_mem; 259 for (i = 0; i < nswbuf_mem; ++i, ++bp) { 260 vm_page_t m; 261 vm_pindex_t pg; 262 int j; 263 264 bp->b_kvabase = (caddr_t)((intptr_t)i * MAXPHYS) + swapbkva_mem; 265 bp->b_kvasize = MAXPHYS; 266 bp->b_swindex = i & BSWHMASK; 267 BUF_LOCKINIT(bp); 268 buf_dep_init(bp); 269 TAILQ_INSERT_HEAD(&bswlist_mem[i & BSWHMASK], bp, b_freelist); 270 atomic_add_int(&pbuf_mem_count, 1); 271 bp->b_data = bp->b_kvabase; 272 bp->b_bcount = MAXPHYS; 273 bp->b_xio.xio_pages = bp->b_xio.xio_internal_pages; 274 275 pg = (vm_offset_t)bp->b_kvabase >> PAGE_SHIFT; 276 vm_object_hold(&kernel_object); 277 for (j = 0; j < MAXPHYS / PAGE_SIZE; ++j) { 278 m = vm_page_alloc(&kernel_object, pg, VM_ALLOC_NORMAL | 279 VM_ALLOC_SYSTEM); 280 KKASSERT(m != NULL); 281 bp->b_xio.xio_internal_pages[j] = m; 282 vm_page_wire(m); 283 /* early boot, no other cpus running yet */ 284 pmap_kenter_noinval(pg * PAGE_SIZE, VM_PAGE_TO_PHYS(m)); 285 cpu_invlpg((void *)(pg * PAGE_SIZE)); 286 vm_page_wakeup(m); 287 ++pg; 288 } 289 vm_object_drop(&kernel_object); 290 bp->b_xio.xio_npages = j; 291 } 292 #endif 293 294 /* 295 * Buffers with pre-assigned KVA bases. The KVA has no memory pages 296 * assigned to it. Saves the caller from having to reserve KVA for 297 * the page map. 298 */ 299 bp = swbuf_kva; 300 for (i = 0; i < nswbuf_kva; ++i, ++bp) { 301 bp->b_kvabase = (caddr_t)((intptr_t)i * MAXPHYS) + swapbkva_kva; 302 bp->b_kvasize = MAXPHYS; 303 bp->b_swindex = i & BSWHMASK; 304 BUF_LOCKINIT(bp); 305 buf_dep_init(bp); 306 TAILQ_INSERT_HEAD(&bswlist_kva[i & BSWHMASK], bp, b_freelist); 307 atomic_add_int(&pbuf_kva_count, 1); 308 } 309 310 /* 311 * RAW buffers with no KVA mappings. 312 * 313 * NOTE: We use KM_NOTLBSYNC here to reduce unnecessary IPIs 314 * during startup, which can really slow down emulated 315 * systems. 316 */ 317 nswbuf_raw = nbuf * 2; 318 swbuf_raw = (void *)kmem_alloc3(&kernel_map, 319 round_page(nswbuf_raw * sizeof(struct buf)), 320 KM_NOTLBSYNC); 321 smp_invltlb(); 322 bp = swbuf_raw; 323 for (i = 0; i < nswbuf_raw; ++i, ++bp) { 324 bp->b_swindex = i & BSWHMASK; 325 BUF_LOCKINIT(bp); 326 buf_dep_init(bp); 327 TAILQ_INSERT_HEAD(&bswlist_raw[i & BSWHMASK], bp, b_freelist); 328 atomic_add_int(&pbuf_raw_count, 1); 329 } 330 331 /* 332 * Allow the clustering code to use half of our pbufs. 333 */ 334 cluster_pbuf_freecnt = nswbuf_kva / 2; 335 } 336 337 SYSINIT(do_vmpg, SI_BOOT2_MACHDEP, SI_ORDER_FIRST, vm_pager_bufferinit, NULL); 338 339 /* 340 * No requirements. 341 */ 342 void 343 vm_pager_deallocate(vm_object_t object) 344 { 345 (*pagertab[object->type]->pgo_dealloc) (object); 346 } 347 348 /* 349 * vm_pager_get_pages() - inline, see vm/vm_pager.h 350 * vm_pager_put_pages() - inline, see vm/vm_pager.h 351 * vm_pager_has_page() - inline, see vm/vm_pager.h 352 * vm_pager_page_inserted() - inline, see vm/vm_pager.h 353 * vm_pager_page_removed() - inline, see vm/vm_pager.h 354 */ 355 356 /* 357 * Search the specified pager object list for an object with the 358 * specified handle. If an object with the specified handle is found, 359 * increase its reference count and return it. Otherwise, return NULL. 360 * 361 * The pager object list must be locked. 362 */ 363 vm_object_t 364 vm_pager_object_lookup(struct pagerlst *pg_list, void *handle) 365 { 366 vm_object_t object; 367 368 TAILQ_FOREACH(object, pg_list, pager_object_list) { 369 if (object->handle == handle) { 370 VM_OBJECT_LOCK(object); 371 if ((object->flags & OBJ_DEAD) == 0) { 372 vm_object_reference_locked(object); 373 VM_OBJECT_UNLOCK(object); 374 break; 375 } 376 VM_OBJECT_UNLOCK(object); 377 } 378 } 379 return (object); 380 } 381 382 /* 383 * Initialize a physical buffer. 384 * 385 * No requirements. 386 */ 387 static void 388 initpbuf(struct buf *bp) 389 { 390 bp->b_qindex = 0; /* BQUEUE_NONE */ 391 bp->b_data = bp->b_kvabase; /* NULL if pbuf sans kva */ 392 bp->b_flags = B_PAGING; 393 bp->b_cmd = BUF_CMD_DONE; 394 bp->b_error = 0; 395 bp->b_bcount = 0; 396 bp->b_bufsize = MAXPHYS; 397 initbufbio(bp); 398 xio_init(&bp->b_xio); 399 BUF_LOCK(bp, LK_EXCLUSIVE); 400 } 401 402 /* 403 * Allocate a physical buffer 404 * 405 * There are a limited number of physical buffers. We need to make 406 * sure that no single subsystem is able to hog all of them, 407 * so each subsystem implements a counter which is typically initialized 408 * to 1/2 nswbuf. getpbuf() decrements this counter in allocation and 409 * increments it on release, and blocks if the counter hits zero. A 410 * subsystem may initialize the counter to -1 to disable the feature, 411 * but it must still be sure to match up all uses of getpbuf() with 412 * relpbuf() using the same variable. 413 * 414 * NOTE: pfreecnt can be NULL, but this 'feature' will be removed 415 * relatively soon when the rest of the subsystems get smart about it. XXX 416 * 417 * Physical buffers can be with or without KVA space reserved. There 418 * are severe limitations on the ones with KVA reserved, and fewer 419 * limitations on the ones without. getpbuf() gets one without, 420 * getpbuf_kva() gets one with. 421 * 422 * No requirements. 423 */ 424 struct buf * 425 getpbuf(int *pfreecnt) /* raw */ 426 { 427 struct buf *bp; 428 int iter; 429 int loops; 430 431 for (;;) { 432 while (pfreecnt && *pfreecnt <= 0) { 433 tsleep_interlock(pfreecnt, 0); 434 if ((int)atomic_fetchadd_int(pfreecnt, 0) <= 0) 435 tsleep(pfreecnt, PINTERLOCKED, "wswbuf0", 0); 436 } 437 if (pbuf_raw_count <= 0) { 438 tsleep_interlock(&pbuf_raw_count, 0); 439 if ((int)atomic_fetchadd_int(&pbuf_raw_count, 0) <= 0) 440 tsleep(&pbuf_raw_count, PINTERLOCKED, 441 "wswbuf0", 0); 442 continue; 443 } 444 iter = mycpuid & BSWHMASK; 445 for (loops = BSWHSIZE; loops; --loops) { 446 if (TAILQ_FIRST(&bswlist_raw[iter]) == NULL) { 447 iter = (iter + 1) & BSWHMASK; 448 continue; 449 } 450 spin_lock(&bswspin_raw[iter]); 451 if ((bp = TAILQ_FIRST(&bswlist_raw[iter])) == NULL) { 452 spin_unlock(&bswspin_raw[iter]); 453 iter = (iter + 1) & BSWHMASK; 454 continue; 455 } 456 TAILQ_REMOVE(&bswlist_raw[iter], bp, b_freelist); 457 atomic_add_int(&pbuf_raw_count, -1); 458 if (pfreecnt) 459 atomic_add_int(pfreecnt, -1); 460 spin_unlock(&bswspin_raw[iter]); 461 initpbuf(bp); 462 463 return bp; 464 } 465 } 466 /* not reached */ 467 } 468 469 struct buf * 470 getpbuf_kva(int *pfreecnt) 471 { 472 struct buf *bp; 473 int iter; 474 int loops; 475 476 for (;;) { 477 while (pfreecnt && *pfreecnt <= 0) { 478 tsleep_interlock(pfreecnt, 0); 479 if ((int)atomic_fetchadd_int(pfreecnt, 0) <= 0) 480 tsleep(pfreecnt, PINTERLOCKED, "wswbuf0", 0); 481 } 482 if (pbuf_kva_count <= 0) { 483 tsleep_interlock(&pbuf_kva_count, 0); 484 if ((int)atomic_fetchadd_int(&pbuf_kva_count, 0) <= 0) 485 tsleep(&pbuf_kva_count, PINTERLOCKED, 486 "wswbuf0", 0); 487 continue; 488 } 489 iter = mycpuid & BSWHMASK; 490 for (loops = BSWHSIZE; loops; --loops) { 491 if (TAILQ_FIRST(&bswlist_kva[iter]) == NULL) { 492 iter = (iter + 1) & BSWHMASK; 493 continue; 494 } 495 spin_lock(&bswspin_kva[iter]); 496 if ((bp = TAILQ_FIRST(&bswlist_kva[iter])) == NULL) { 497 spin_unlock(&bswspin_kva[iter]); 498 iter = (iter + 1) & BSWHMASK; 499 continue; 500 } 501 TAILQ_REMOVE(&bswlist_kva[iter], bp, b_freelist); 502 atomic_add_int(&pbuf_kva_count, -1); 503 if (pfreecnt) 504 atomic_add_int(pfreecnt, -1); 505 spin_unlock(&bswspin_kva[iter]); 506 initpbuf(bp); 507 508 return bp; 509 } 510 } 511 /* not reached */ 512 } 513 514 /* 515 * Allocate a pbuf with kernel memory already preallocated. Caller must 516 * not change the mapping. 517 */ 518 struct buf * 519 getpbuf_mem(int *pfreecnt) 520 { 521 struct buf *bp; 522 int iter; 523 int loops; 524 525 for (;;) { 526 while (pfreecnt && *pfreecnt <= 0) { 527 tsleep_interlock(pfreecnt, 0); 528 if ((int)atomic_fetchadd_int(pfreecnt, 0) <= 0) 529 tsleep(pfreecnt, PINTERLOCKED, "wswbuf0", 0); 530 } 531 if (pbuf_mem_count <= 0) { 532 tsleep_interlock(&pbuf_mem_count, 0); 533 if ((int)atomic_fetchadd_int(&pbuf_mem_count, 0) <= 0) 534 tsleep(&pbuf_mem_count, PINTERLOCKED, 535 "wswbuf0", 0); 536 continue; 537 } 538 iter = mycpuid & BSWHMASK; 539 for (loops = BSWHSIZE; loops; --loops) { 540 if (TAILQ_FIRST(&bswlist_mem[iter]) == NULL) { 541 iter = (iter + 1) & BSWHMASK; 542 continue; 543 } 544 spin_lock(&bswspin_mem[iter]); 545 if ((bp = TAILQ_FIRST(&bswlist_mem[iter])) == NULL) { 546 spin_unlock(&bswspin_mem[iter]); 547 iter = (iter + 1) & BSWHMASK; 548 continue; 549 } 550 TAILQ_REMOVE(&bswlist_mem[iter], bp, b_freelist); 551 atomic_add_int(&pbuf_mem_count, -1); 552 if (pfreecnt) 553 atomic_add_int(pfreecnt, -1); 554 spin_unlock(&bswspin_mem[iter]); 555 initpbuf(bp); 556 557 return bp; 558 } 559 } 560 /* not reached */ 561 } 562 563 /* 564 * Allocate a physical buffer, if one is available. 565 * 566 * Note that there is no NULL hack here - all subsystems using this 567 * call understand how to use pfreecnt. 568 * 569 * No requirements. 570 */ 571 struct buf * 572 trypbuf(int *pfreecnt) /* raw */ 573 { 574 struct buf *bp; 575 int iter = mycpuid & BSWHMASK; 576 int loops; 577 578 for (loops = BSWHSIZE; loops; --loops) { 579 if (*pfreecnt <= 0 || TAILQ_FIRST(&bswlist_raw[iter]) == NULL) { 580 iter = (iter + 1) & BSWHMASK; 581 continue; 582 } 583 spin_lock(&bswspin_raw[iter]); 584 if (*pfreecnt <= 0 || 585 (bp = TAILQ_FIRST(&bswlist_raw[iter])) == NULL) { 586 spin_unlock(&bswspin_raw[iter]); 587 iter = (iter + 1) & BSWHMASK; 588 continue; 589 } 590 TAILQ_REMOVE(&bswlist_raw[iter], bp, b_freelist); 591 atomic_add_int(&pbuf_raw_count, -1); 592 atomic_add_int(pfreecnt, -1); 593 594 spin_unlock(&bswspin_raw[iter]); 595 596 initpbuf(bp); 597 598 return bp; 599 } 600 return NULL; 601 } 602 603 struct buf * 604 trypbuf_kva(int *pfreecnt) 605 { 606 struct buf *bp; 607 int iter = mycpuid & BSWHMASK; 608 int loops; 609 610 for (loops = BSWHSIZE; loops; --loops) { 611 if (*pfreecnt <= 0 || TAILQ_FIRST(&bswlist_kva[iter]) == NULL) { 612 iter = (iter + 1) & BSWHMASK; 613 continue; 614 } 615 spin_lock(&bswspin_kva[iter]); 616 if (*pfreecnt <= 0 || 617 (bp = TAILQ_FIRST(&bswlist_kva[iter])) == NULL) { 618 spin_unlock(&bswspin_kva[iter]); 619 iter = (iter + 1) & BSWHMASK; 620 continue; 621 } 622 TAILQ_REMOVE(&bswlist_kva[iter], bp, b_freelist); 623 atomic_add_int(&pbuf_kva_count, -1); 624 atomic_add_int(pfreecnt, -1); 625 626 spin_unlock(&bswspin_kva[iter]); 627 628 initpbuf(bp); 629 630 return bp; 631 } 632 return NULL; 633 } 634 635 /* 636 * Release a physical buffer 637 * 638 * NOTE: pfreecnt can be NULL, but this 'feature' will be removed 639 * relatively soon when the rest of the subsystems get smart about it. XXX 640 * 641 * No requirements. 642 */ 643 void 644 relpbuf(struct buf *bp, int *pfreecnt) 645 { 646 int wake = 0; 647 int wake_free = 0; 648 int iter = bp->b_swindex; 649 650 KKASSERT(bp->b_flags & B_PAGING); 651 dsched_buf_exit(bp); 652 653 BUF_UNLOCK(bp); 654 655 if (bp >= swbuf_mem && bp < &swbuf_mem[nswbuf_mem]) { 656 KKASSERT(bp->b_kvabase); 657 spin_lock(&bswspin_mem[iter]); 658 TAILQ_INSERT_HEAD(&bswlist_mem[iter], bp, b_freelist); 659 if (atomic_fetchadd_int(&pbuf_mem_count, 1) == nswbuf_mem / 4) 660 wake = 1; 661 if (pfreecnt) { 662 if (atomic_fetchadd_int(pfreecnt, 1) == 1) 663 wake_free = 1; 664 } 665 spin_unlock(&bswspin_mem[iter]); 666 if (wake) 667 wakeup(&pbuf_mem_count); 668 } else if (bp >= swbuf_kva && bp < &swbuf_kva[nswbuf_kva]) { 669 KKASSERT(bp->b_kvabase); 670 spin_lock(&bswspin_kva[iter]); 671 TAILQ_INSERT_HEAD(&bswlist_kva[iter], bp, b_freelist); 672 if (atomic_fetchadd_int(&pbuf_kva_count, 1) == nswbuf_kva / 4) 673 wake = 1; 674 if (pfreecnt) { 675 if (atomic_fetchadd_int(pfreecnt, 1) == 1) 676 wake_free = 1; 677 } 678 spin_unlock(&bswspin_kva[iter]); 679 if (wake) 680 wakeup(&pbuf_kva_count); 681 } else { 682 KKASSERT(bp->b_kvabase == NULL); 683 KKASSERT(bp >= swbuf_raw && bp < &swbuf_raw[nswbuf_raw]); 684 spin_lock(&bswspin_raw[iter]); 685 TAILQ_INSERT_HEAD(&bswlist_raw[iter], bp, b_freelist); 686 if (atomic_fetchadd_int(&pbuf_raw_count, 1) == nswbuf_raw / 4) 687 wake = 1; 688 if (pfreecnt) { 689 if (atomic_fetchadd_int(pfreecnt, 1) == 1) 690 wake_free = 1; 691 } 692 spin_unlock(&bswspin_raw[iter]); 693 if (wake) 694 wakeup(&pbuf_raw_count); 695 } 696 if (wake_free) 697 wakeup(pfreecnt); 698 } 699 700 void 701 pbuf_adjcount(int *pfreecnt, int n) 702 { 703 if (n) { 704 atomic_add_int(pfreecnt, n); 705 wakeup(pfreecnt); 706 } 707 } 708