1 /* $OpenBSD: subr_pool.c,v 1.233 2021/03/10 10:21:47 jsg Exp $ */ 2 /* $NetBSD: subr_pool.c,v 1.61 2001/09/26 07:14:56 chs Exp $ */ 3 4 /*- 5 * Copyright (c) 1997, 1999, 2000 The NetBSD Foundation, Inc. 6 * All rights reserved. 7 * 8 * This code is derived from software contributed to The NetBSD Foundation 9 * by Paul Kranenburg; by Jason R. Thorpe of the Numerical Aerospace 10 * Simulation Facility, NASA Ames Research Center. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 23 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 24 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 25 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 * POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include <sys/param.h> 35 #include <sys/systm.h> 36 #include <sys/errno.h> 37 #include <sys/kernel.h> 38 #include <sys/malloc.h> 39 #include <sys/pool.h> 40 #include <sys/proc.h> 41 #include <sys/syslog.h> 42 #include <sys/sysctl.h> 43 #include <sys/task.h> 44 #include <sys/time.h> 45 #include <sys/timeout.h> 46 #include <sys/percpu.h> 47 #include <sys/tracepoint.h> 48 49 #include <uvm/uvm_extern.h> 50 51 /* 52 * Pool resource management utility. 53 * 54 * Memory is allocated in pages which are split into pieces according to 55 * the pool item size. Each page is kept on one of three lists in the 56 * pool structure: `pr_emptypages', `pr_fullpages' and `pr_partpages', 57 * for empty, full and partially-full pages respectively. The individual 58 * pool items are on a linked list headed by `ph_items' in each page 59 * header. The memory for building the page list is either taken from 60 * the allocated pages themselves (for small pool items) or taken from 61 * an internal pool of page headers (`phpool'). 62 */ 63 64 /* List of all pools */ 65 SIMPLEQ_HEAD(,pool) pool_head = SIMPLEQ_HEAD_INITIALIZER(pool_head); 66 67 /* 68 * Every pool gets a unique serial number assigned to it. If this counter 69 * wraps, we're screwed, but we shouldn't create so many pools anyway. 70 */ 71 unsigned int pool_serial; 72 unsigned int pool_count; 73 74 /* Lock the previous variables making up the global pool state */ 75 struct rwlock pool_lock = RWLOCK_INITIALIZER("pools"); 76 77 /* Private pool for page header structures */ 78 struct pool phpool; 79 80 struct pool_lock_ops { 81 void (*pl_init)(struct pool *, union pool_lock *, 82 const struct lock_type *); 83 void (*pl_enter)(union pool_lock *); 84 int (*pl_enter_try)(union pool_lock *); 85 void (*pl_leave)(union pool_lock *); 86 void (*pl_assert_locked)(union pool_lock *); 87 void (*pl_assert_unlocked)(union pool_lock *); 88 int (*pl_sleep)(void *, union pool_lock *, int, const char *); 89 }; 90 91 static const struct pool_lock_ops pool_lock_ops_mtx; 92 static const struct pool_lock_ops pool_lock_ops_rw; 93 94 #ifdef WITNESS 95 #define pl_init(pp, pl) do { \ 96 static const struct lock_type __lock_type = { .lt_name = #pl }; \ 97 (pp)->pr_lock_ops->pl_init(pp, pl, &__lock_type); \ 98 } while (0) 99 #else /* WITNESS */ 100 #define pl_init(pp, pl) (pp)->pr_lock_ops->pl_init(pp, pl, NULL) 101 #endif /* WITNESS */ 102 103 static inline void 104 pl_enter(struct pool *pp, union pool_lock *pl) 105 { 106 pp->pr_lock_ops->pl_enter(pl); 107 } 108 static inline int 109 pl_enter_try(struct pool *pp, union pool_lock *pl) 110 { 111 return pp->pr_lock_ops->pl_enter_try(pl); 112 } 113 static inline void 114 pl_leave(struct pool *pp, union pool_lock *pl) 115 { 116 pp->pr_lock_ops->pl_leave(pl); 117 } 118 static inline void 119 pl_assert_locked(struct pool *pp, union pool_lock *pl) 120 { 121 pp->pr_lock_ops->pl_assert_locked(pl); 122 } 123 static inline void 124 pl_assert_unlocked(struct pool *pp, union pool_lock *pl) 125 { 126 pp->pr_lock_ops->pl_assert_unlocked(pl); 127 } 128 static inline int 129 pl_sleep(struct pool *pp, void *ident, union pool_lock *lock, int priority, 130 const char *wmesg) 131 { 132 return pp->pr_lock_ops->pl_sleep(ident, lock, priority, wmesg); 133 } 134 135 struct pool_item { 136 u_long pi_magic; 137 XSIMPLEQ_ENTRY(pool_item) pi_list; 138 }; 139 #define POOL_IMAGIC(ph, pi) ((u_long)(pi) ^ (ph)->ph_magic) 140 141 struct pool_page_header { 142 /* Page headers */ 143 TAILQ_ENTRY(pool_page_header) 144 ph_entry; /* pool page list */ 145 XSIMPLEQ_HEAD(, pool_item) 146 ph_items; /* free items on the page */ 147 RBT_ENTRY(pool_page_header) 148 ph_node; /* off-page page headers */ 149 unsigned int ph_nmissing; /* # of chunks in use */ 150 caddr_t ph_page; /* this page's address */ 151 caddr_t ph_colored; /* page's colored address */ 152 unsigned long ph_magic; 153 uint64_t ph_timestamp; 154 }; 155 #define POOL_MAGICBIT (1 << 3) /* keep away from perturbed low bits */ 156 #define POOL_PHPOISON(ph) ISSET((ph)->ph_magic, POOL_MAGICBIT) 157 158 #ifdef MULTIPROCESSOR 159 struct pool_cache_item { 160 struct pool_cache_item *ci_next; /* next item in list */ 161 unsigned long ci_nitems; /* number of items in list */ 162 TAILQ_ENTRY(pool_cache_item) 163 ci_nextl; /* entry in list of lists */ 164 }; 165 166 /* we store whether the cached item is poisoned in the high bit of nitems */ 167 #define POOL_CACHE_ITEM_NITEMS_MASK 0x7ffffffUL 168 #define POOL_CACHE_ITEM_NITEMS_POISON 0x8000000UL 169 170 #define POOL_CACHE_ITEM_NITEMS(_ci) \ 171 ((_ci)->ci_nitems & POOL_CACHE_ITEM_NITEMS_MASK) 172 173 #define POOL_CACHE_ITEM_POISONED(_ci) \ 174 ISSET((_ci)->ci_nitems, POOL_CACHE_ITEM_NITEMS_POISON) 175 176 struct pool_cache { 177 struct pool_cache_item *pc_actv; /* active list of items */ 178 unsigned long pc_nactv; /* actv head nitems cache */ 179 struct pool_cache_item *pc_prev; /* previous list of items */ 180 181 uint64_t pc_gen; /* generation number */ 182 uint64_t pc_nget; /* # of successful requests */ 183 uint64_t pc_nfail; /* # of unsuccessful reqs */ 184 uint64_t pc_nput; /* # of releases */ 185 uint64_t pc_nlget; /* # of list requests */ 186 uint64_t pc_nlfail; /* # of fails getting a list */ 187 uint64_t pc_nlput; /* # of list releases */ 188 189 int pc_nout; 190 }; 191 192 void *pool_cache_get(struct pool *); 193 void pool_cache_put(struct pool *, void *); 194 void pool_cache_destroy(struct pool *); 195 void pool_cache_gc(struct pool *); 196 #endif 197 void pool_cache_pool_info(struct pool *, struct kinfo_pool *); 198 int pool_cache_info(struct pool *, void *, size_t *); 199 int pool_cache_cpus_info(struct pool *, void *, size_t *); 200 201 #ifdef POOL_DEBUG 202 int pool_debug = 1; 203 #else 204 int pool_debug = 0; 205 #endif 206 207 #define POOL_INPGHDR(pp) ((pp)->pr_phoffset != 0) 208 209 struct pool_page_header * 210 pool_p_alloc(struct pool *, int, int *); 211 void pool_p_insert(struct pool *, struct pool_page_header *); 212 void pool_p_remove(struct pool *, struct pool_page_header *); 213 void pool_p_free(struct pool *, struct pool_page_header *); 214 215 void pool_update_curpage(struct pool *); 216 void *pool_do_get(struct pool *, int, int *); 217 void pool_do_put(struct pool *, void *); 218 int pool_chk_page(struct pool *, struct pool_page_header *, int); 219 int pool_chk(struct pool *); 220 void pool_get_done(struct pool *, void *, void *); 221 void pool_runqueue(struct pool *, int); 222 223 void *pool_allocator_alloc(struct pool *, int, int *); 224 void pool_allocator_free(struct pool *, void *); 225 226 /* 227 * The default pool allocator. 228 */ 229 void *pool_page_alloc(struct pool *, int, int *); 230 void pool_page_free(struct pool *, void *); 231 232 /* 233 * safe for interrupts; this is the default allocator 234 */ 235 struct pool_allocator pool_allocator_single = { 236 pool_page_alloc, 237 pool_page_free, 238 POOL_ALLOC_SIZE(PAGE_SIZE, POOL_ALLOC_ALIGNED) 239 }; 240 241 void *pool_multi_alloc(struct pool *, int, int *); 242 void pool_multi_free(struct pool *, void *); 243 244 struct pool_allocator pool_allocator_multi = { 245 pool_multi_alloc, 246 pool_multi_free, 247 POOL_ALLOC_SIZES(PAGE_SIZE, (1UL << 31), POOL_ALLOC_ALIGNED) 248 }; 249 250 void *pool_multi_alloc_ni(struct pool *, int, int *); 251 void pool_multi_free_ni(struct pool *, void *); 252 253 struct pool_allocator pool_allocator_multi_ni = { 254 pool_multi_alloc_ni, 255 pool_multi_free_ni, 256 POOL_ALLOC_SIZES(PAGE_SIZE, (1UL << 31), POOL_ALLOC_ALIGNED) 257 }; 258 259 #ifdef DDB 260 void pool_print_pagelist(struct pool_pagelist *, int (*)(const char *, ...) 261 __attribute__((__format__(__kprintf__,1,2)))); 262 void pool_print1(struct pool *, const char *, int (*)(const char *, ...) 263 __attribute__((__format__(__kprintf__,1,2)))); 264 #endif 265 266 /* stale page garbage collectors */ 267 void pool_gc_sched(void *); 268 struct timeout pool_gc_tick = TIMEOUT_INITIALIZER(pool_gc_sched, NULL); 269 void pool_gc_pages(void *); 270 struct task pool_gc_task = TASK_INITIALIZER(pool_gc_pages, NULL); 271 272 #define POOL_WAIT_FREE SEC_TO_NSEC(1) 273 #define POOL_WAIT_GC SEC_TO_NSEC(8) 274 275 /* 276 * TODO Move getnsecuptime() to kern_tc.c and document it when we 277 * have callers in other modules. 278 */ 279 static uint64_t 280 getnsecuptime(void) 281 { 282 struct timespec now; 283 284 getnanouptime(&now); 285 return TIMESPEC_TO_NSEC(&now); 286 } 287 288 RBT_PROTOTYPE(phtree, pool_page_header, ph_node, phtree_compare); 289 290 static inline int 291 phtree_compare(const struct pool_page_header *a, 292 const struct pool_page_header *b) 293 { 294 vaddr_t va = (vaddr_t)a->ph_page; 295 vaddr_t vb = (vaddr_t)b->ph_page; 296 297 /* the compares in this order are important for the NFIND to work */ 298 if (vb < va) 299 return (-1); 300 if (vb > va) 301 return (1); 302 303 return (0); 304 } 305 306 RBT_GENERATE(phtree, pool_page_header, ph_node, phtree_compare); 307 308 /* 309 * Return the pool page header based on page address. 310 */ 311 static inline struct pool_page_header * 312 pr_find_pagehead(struct pool *pp, void *v) 313 { 314 struct pool_page_header *ph, key; 315 316 if (POOL_INPGHDR(pp)) { 317 caddr_t page; 318 319 page = (caddr_t)((vaddr_t)v & pp->pr_pgmask); 320 321 return ((struct pool_page_header *)(page + pp->pr_phoffset)); 322 } 323 324 key.ph_page = v; 325 ph = RBT_NFIND(phtree, &pp->pr_phtree, &key); 326 if (ph == NULL) 327 panic("%s: %s: page header missing", __func__, pp->pr_wchan); 328 329 KASSERT(ph->ph_page <= (caddr_t)v); 330 if (ph->ph_page + pp->pr_pgsize <= (caddr_t)v) 331 panic("%s: %s: incorrect page", __func__, pp->pr_wchan); 332 333 return (ph); 334 } 335 336 /* 337 * Initialize the given pool resource structure. 338 * 339 * We export this routine to allow other kernel parts to declare 340 * static pools that must be initialized before malloc() is available. 341 */ 342 void 343 pool_init(struct pool *pp, size_t size, u_int align, int ipl, int flags, 344 const char *wchan, struct pool_allocator *palloc) 345 { 346 int off = 0, space; 347 unsigned int pgsize = PAGE_SIZE, items; 348 size_t pa_pagesz; 349 #ifdef DIAGNOSTIC 350 struct pool *iter; 351 #endif 352 353 if (align == 0) 354 align = ALIGN(1); 355 356 if (size < sizeof(struct pool_item)) 357 size = sizeof(struct pool_item); 358 359 size = roundup(size, align); 360 361 while (size * 8 > pgsize) 362 pgsize <<= 1; 363 364 if (palloc == NULL) { 365 if (pgsize > PAGE_SIZE) { 366 palloc = ISSET(flags, PR_WAITOK) ? 367 &pool_allocator_multi_ni : &pool_allocator_multi; 368 } else 369 palloc = &pool_allocator_single; 370 371 pa_pagesz = palloc->pa_pagesz; 372 } else { 373 size_t pgsizes; 374 375 pa_pagesz = palloc->pa_pagesz; 376 if (pa_pagesz == 0) 377 pa_pagesz = POOL_ALLOC_DEFAULT; 378 379 pgsizes = pa_pagesz & ~POOL_ALLOC_ALIGNED; 380 381 /* make sure the allocator can fit at least one item */ 382 if (size > pgsizes) { 383 panic("%s: pool %s item size 0x%zx > " 384 "allocator %p sizes 0x%zx", __func__, wchan, 385 size, palloc, pgsizes); 386 } 387 388 /* shrink pgsize until it fits into the range */ 389 while (!ISSET(pgsizes, pgsize)) 390 pgsize >>= 1; 391 } 392 KASSERT(ISSET(pa_pagesz, pgsize)); 393 394 items = pgsize / size; 395 396 /* 397 * Decide whether to put the page header off page to avoid 398 * wasting too large a part of the page. Off-page page headers 399 * go into an RB tree, so we can match a returned item with 400 * its header based on the page address. 401 */ 402 if (ISSET(pa_pagesz, POOL_ALLOC_ALIGNED)) { 403 if (pgsize - (size * items) > 404 sizeof(struct pool_page_header)) { 405 off = pgsize - sizeof(struct pool_page_header); 406 } else if (sizeof(struct pool_page_header) * 2 >= size) { 407 off = pgsize - sizeof(struct pool_page_header); 408 items = off / size; 409 } 410 } 411 412 KASSERT(items > 0); 413 414 /* 415 * Initialize the pool structure. 416 */ 417 memset(pp, 0, sizeof(*pp)); 418 if (ISSET(flags, PR_RWLOCK)) { 419 KASSERT(flags & PR_WAITOK); 420 pp->pr_lock_ops = &pool_lock_ops_rw; 421 } else 422 pp->pr_lock_ops = &pool_lock_ops_mtx; 423 TAILQ_INIT(&pp->pr_emptypages); 424 TAILQ_INIT(&pp->pr_fullpages); 425 TAILQ_INIT(&pp->pr_partpages); 426 pp->pr_curpage = NULL; 427 pp->pr_npages = 0; 428 pp->pr_minitems = 0; 429 pp->pr_minpages = 0; 430 pp->pr_maxpages = 8; 431 pp->pr_size = size; 432 pp->pr_pgsize = pgsize; 433 pp->pr_pgmask = ~0UL ^ (pgsize - 1); 434 pp->pr_phoffset = off; 435 pp->pr_itemsperpage = items; 436 pp->pr_wchan = wchan; 437 pp->pr_alloc = palloc; 438 pp->pr_nitems = 0; 439 pp->pr_nout = 0; 440 pp->pr_hardlimit = UINT_MAX; 441 pp->pr_hardlimit_warning = NULL; 442 pp->pr_hardlimit_ratecap.tv_sec = 0; 443 pp->pr_hardlimit_ratecap.tv_usec = 0; 444 pp->pr_hardlimit_warning_last.tv_sec = 0; 445 pp->pr_hardlimit_warning_last.tv_usec = 0; 446 RBT_INIT(phtree, &pp->pr_phtree); 447 448 /* 449 * Use the space between the chunks and the page header 450 * for cache coloring. 451 */ 452 space = POOL_INPGHDR(pp) ? pp->pr_phoffset : pp->pr_pgsize; 453 space -= pp->pr_itemsperpage * pp->pr_size; 454 pp->pr_align = align; 455 pp->pr_maxcolors = (space / align) + 1; 456 457 pp->pr_nget = 0; 458 pp->pr_nfail = 0; 459 pp->pr_nput = 0; 460 pp->pr_npagealloc = 0; 461 pp->pr_npagefree = 0; 462 pp->pr_hiwat = 0; 463 pp->pr_nidle = 0; 464 465 pp->pr_ipl = ipl; 466 pp->pr_flags = flags; 467 468 pl_init(pp, &pp->pr_lock); 469 pl_init(pp, &pp->pr_requests_lock); 470 TAILQ_INIT(&pp->pr_requests); 471 472 if (phpool.pr_size == 0) { 473 pool_init(&phpool, sizeof(struct pool_page_header), 0, 474 IPL_HIGH, 0, "phpool", NULL); 475 476 /* make sure phpool won't "recurse" */ 477 KASSERT(POOL_INPGHDR(&phpool)); 478 } 479 480 /* pglistalloc/constraint parameters */ 481 pp->pr_crange = &kp_dirty; 482 483 /* Insert this into the list of all pools. */ 484 rw_enter_write(&pool_lock); 485 #ifdef DIAGNOSTIC 486 SIMPLEQ_FOREACH(iter, &pool_head, pr_poollist) { 487 if (iter == pp) 488 panic("%s: pool %s already on list", __func__, wchan); 489 } 490 #endif 491 492 pp->pr_serial = ++pool_serial; 493 if (pool_serial == 0) 494 panic("%s: too much uptime", __func__); 495 496 SIMPLEQ_INSERT_HEAD(&pool_head, pp, pr_poollist); 497 pool_count++; 498 rw_exit_write(&pool_lock); 499 } 500 501 /* 502 * Decommission a pool resource. 503 */ 504 void 505 pool_destroy(struct pool *pp) 506 { 507 struct pool_page_header *ph; 508 struct pool *prev, *iter; 509 510 #ifdef MULTIPROCESSOR 511 if (pp->pr_cache != NULL) 512 pool_cache_destroy(pp); 513 #endif 514 515 #ifdef DIAGNOSTIC 516 if (pp->pr_nout != 0) 517 panic("%s: pool busy: still out: %u", __func__, pp->pr_nout); 518 #endif 519 520 /* Remove from global pool list */ 521 rw_enter_write(&pool_lock); 522 pool_count--; 523 if (pp == SIMPLEQ_FIRST(&pool_head)) 524 SIMPLEQ_REMOVE_HEAD(&pool_head, pr_poollist); 525 else { 526 prev = SIMPLEQ_FIRST(&pool_head); 527 SIMPLEQ_FOREACH(iter, &pool_head, pr_poollist) { 528 if (iter == pp) { 529 SIMPLEQ_REMOVE_AFTER(&pool_head, prev, 530 pr_poollist); 531 break; 532 } 533 prev = iter; 534 } 535 } 536 rw_exit_write(&pool_lock); 537 538 /* Remove all pages */ 539 while ((ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL) { 540 pl_enter(pp, &pp->pr_lock); 541 pool_p_remove(pp, ph); 542 pl_leave(pp, &pp->pr_lock); 543 pool_p_free(pp, ph); 544 } 545 KASSERT(TAILQ_EMPTY(&pp->pr_fullpages)); 546 KASSERT(TAILQ_EMPTY(&pp->pr_partpages)); 547 } 548 549 void 550 pool_request_init(struct pool_request *pr, 551 void (*handler)(struct pool *, void *, void *), void *cookie) 552 { 553 pr->pr_handler = handler; 554 pr->pr_cookie = cookie; 555 pr->pr_item = NULL; 556 } 557 558 void 559 pool_request(struct pool *pp, struct pool_request *pr) 560 { 561 pl_enter(pp, &pp->pr_requests_lock); 562 TAILQ_INSERT_TAIL(&pp->pr_requests, pr, pr_entry); 563 pool_runqueue(pp, PR_NOWAIT); 564 pl_leave(pp, &pp->pr_requests_lock); 565 } 566 567 struct pool_get_memory { 568 union pool_lock lock; 569 void * volatile v; 570 }; 571 572 /* 573 * Grab an item from the pool. 574 */ 575 void * 576 pool_get(struct pool *pp, int flags) 577 { 578 void *v = NULL; 579 int slowdown = 0; 580 581 KASSERT(flags & (PR_WAITOK | PR_NOWAIT)); 582 if (pp->pr_flags & PR_RWLOCK) 583 KASSERT(flags & PR_WAITOK); 584 585 #ifdef MULTIPROCESSOR 586 if (pp->pr_cache != NULL) { 587 v = pool_cache_get(pp); 588 if (v != NULL) 589 goto good; 590 } 591 #endif 592 593 pl_enter(pp, &pp->pr_lock); 594 if (pp->pr_nout >= pp->pr_hardlimit) { 595 if (ISSET(flags, PR_NOWAIT|PR_LIMITFAIL)) 596 goto fail; 597 } else if ((v = pool_do_get(pp, flags, &slowdown)) == NULL) { 598 if (ISSET(flags, PR_NOWAIT)) 599 goto fail; 600 } 601 pl_leave(pp, &pp->pr_lock); 602 603 if ((slowdown || pool_debug == 2) && ISSET(flags, PR_WAITOK)) 604 yield(); 605 606 if (v == NULL) { 607 struct pool_get_memory mem = { .v = NULL }; 608 struct pool_request pr; 609 610 #ifdef DIAGNOSTIC 611 if (ISSET(flags, PR_WAITOK) && curproc == &proc0) 612 panic("%s: cannot sleep for memory during boot", 613 __func__); 614 #endif 615 pl_init(pp, &mem.lock); 616 pool_request_init(&pr, pool_get_done, &mem); 617 pool_request(pp, &pr); 618 619 pl_enter(pp, &mem.lock); 620 while (mem.v == NULL) 621 pl_sleep(pp, &mem, &mem.lock, PSWP, pp->pr_wchan); 622 pl_leave(pp, &mem.lock); 623 624 v = mem.v; 625 } 626 627 #ifdef MULTIPROCESSOR 628 good: 629 #endif 630 if (ISSET(flags, PR_ZERO)) 631 memset(v, 0, pp->pr_size); 632 633 TRACEPOINT(uvm, pool_get, pp, v, flags); 634 635 return (v); 636 637 fail: 638 pp->pr_nfail++; 639 pl_leave(pp, &pp->pr_lock); 640 return (NULL); 641 } 642 643 void 644 pool_get_done(struct pool *pp, void *xmem, void *v) 645 { 646 struct pool_get_memory *mem = xmem; 647 648 pl_enter(pp, &mem->lock); 649 mem->v = v; 650 pl_leave(pp, &mem->lock); 651 652 wakeup_one(mem); 653 } 654 655 void 656 pool_runqueue(struct pool *pp, int flags) 657 { 658 struct pool_requests prl = TAILQ_HEAD_INITIALIZER(prl); 659 struct pool_request *pr; 660 661 pl_assert_unlocked(pp, &pp->pr_lock); 662 pl_assert_locked(pp, &pp->pr_requests_lock); 663 664 if (pp->pr_requesting++) 665 return; 666 667 do { 668 pp->pr_requesting = 1; 669 670 TAILQ_CONCAT(&prl, &pp->pr_requests, pr_entry); 671 if (TAILQ_EMPTY(&prl)) 672 continue; 673 674 pl_leave(pp, &pp->pr_requests_lock); 675 676 pl_enter(pp, &pp->pr_lock); 677 pr = TAILQ_FIRST(&prl); 678 while (pr != NULL) { 679 int slowdown = 0; 680 681 if (pp->pr_nout >= pp->pr_hardlimit) 682 break; 683 684 pr->pr_item = pool_do_get(pp, flags, &slowdown); 685 if (pr->pr_item == NULL) /* || slowdown ? */ 686 break; 687 688 pr = TAILQ_NEXT(pr, pr_entry); 689 } 690 pl_leave(pp, &pp->pr_lock); 691 692 while ((pr = TAILQ_FIRST(&prl)) != NULL && 693 pr->pr_item != NULL) { 694 TAILQ_REMOVE(&prl, pr, pr_entry); 695 (*pr->pr_handler)(pp, pr->pr_cookie, pr->pr_item); 696 } 697 698 pl_enter(pp, &pp->pr_requests_lock); 699 } while (--pp->pr_requesting); 700 701 TAILQ_CONCAT(&pp->pr_requests, &prl, pr_entry); 702 } 703 704 void * 705 pool_do_get(struct pool *pp, int flags, int *slowdown) 706 { 707 struct pool_item *pi; 708 struct pool_page_header *ph; 709 710 pl_assert_locked(pp, &pp->pr_lock); 711 712 splassert(pp->pr_ipl); 713 714 /* 715 * Account for this item now to avoid races if we need to give up 716 * pr_lock to allocate a page. 717 */ 718 pp->pr_nout++; 719 720 if (pp->pr_curpage == NULL) { 721 pl_leave(pp, &pp->pr_lock); 722 ph = pool_p_alloc(pp, flags, slowdown); 723 pl_enter(pp, &pp->pr_lock); 724 725 if (ph == NULL) { 726 pp->pr_nout--; 727 return (NULL); 728 } 729 730 pool_p_insert(pp, ph); 731 } 732 733 ph = pp->pr_curpage; 734 pi = XSIMPLEQ_FIRST(&ph->ph_items); 735 if (__predict_false(pi == NULL)) 736 panic("%s: %s: page empty", __func__, pp->pr_wchan); 737 738 if (__predict_false(pi->pi_magic != POOL_IMAGIC(ph, pi))) { 739 panic("%s: %s free list modified: " 740 "page %p; item addr %p; offset 0x%x=0x%lx != 0x%lx", 741 __func__, pp->pr_wchan, ph->ph_page, pi, 742 0, pi->pi_magic, POOL_IMAGIC(ph, pi)); 743 } 744 745 XSIMPLEQ_REMOVE_HEAD(&ph->ph_items, pi_list); 746 747 #ifdef DIAGNOSTIC 748 if (pool_debug && POOL_PHPOISON(ph)) { 749 size_t pidx; 750 uint32_t pval; 751 if (poison_check(pi + 1, pp->pr_size - sizeof(*pi), 752 &pidx, &pval)) { 753 int *ip = (int *)(pi + 1); 754 panic("%s: %s free list modified: " 755 "page %p; item addr %p; offset 0x%zx=0x%x", 756 __func__, pp->pr_wchan, ph->ph_page, pi, 757 (pidx * sizeof(int)) + sizeof(*pi), ip[pidx]); 758 } 759 } 760 #endif /* DIAGNOSTIC */ 761 762 if (ph->ph_nmissing++ == 0) { 763 /* 764 * This page was previously empty. Move it to the list of 765 * partially-full pages. This page is already curpage. 766 */ 767 TAILQ_REMOVE(&pp->pr_emptypages, ph, ph_entry); 768 TAILQ_INSERT_TAIL(&pp->pr_partpages, ph, ph_entry); 769 770 pp->pr_nidle--; 771 } 772 773 if (ph->ph_nmissing == pp->pr_itemsperpage) { 774 /* 775 * This page is now full. Move it to the full list 776 * and select a new current page. 777 */ 778 TAILQ_REMOVE(&pp->pr_partpages, ph, ph_entry); 779 TAILQ_INSERT_TAIL(&pp->pr_fullpages, ph, ph_entry); 780 pool_update_curpage(pp); 781 } 782 783 pp->pr_nget++; 784 785 return (pi); 786 } 787 788 /* 789 * Return resource to the pool. 790 */ 791 void 792 pool_put(struct pool *pp, void *v) 793 { 794 struct pool_page_header *ph, *freeph = NULL; 795 796 #ifdef DIAGNOSTIC 797 if (v == NULL) 798 panic("%s: NULL item", __func__); 799 #endif 800 801 TRACEPOINT(uvm, pool_put, pp, v); 802 803 #ifdef MULTIPROCESSOR 804 if (pp->pr_cache != NULL && TAILQ_EMPTY(&pp->pr_requests)) { 805 pool_cache_put(pp, v); 806 return; 807 } 808 #endif 809 810 pl_enter(pp, &pp->pr_lock); 811 812 pool_do_put(pp, v); 813 814 pp->pr_nout--; 815 pp->pr_nput++; 816 817 /* is it time to free a page? */ 818 if (pp->pr_nidle > pp->pr_maxpages && 819 (ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL && 820 getnsecuptime() - ph->ph_timestamp > POOL_WAIT_FREE) { 821 freeph = ph; 822 pool_p_remove(pp, freeph); 823 } 824 825 pl_leave(pp, &pp->pr_lock); 826 827 if (freeph != NULL) 828 pool_p_free(pp, freeph); 829 830 pool_wakeup(pp); 831 } 832 833 void 834 pool_wakeup(struct pool *pp) 835 { 836 if (!TAILQ_EMPTY(&pp->pr_requests)) { 837 pl_enter(pp, &pp->pr_requests_lock); 838 pool_runqueue(pp, PR_NOWAIT); 839 pl_leave(pp, &pp->pr_requests_lock); 840 } 841 } 842 843 void 844 pool_do_put(struct pool *pp, void *v) 845 { 846 struct pool_item *pi = v; 847 struct pool_page_header *ph; 848 849 splassert(pp->pr_ipl); 850 851 ph = pr_find_pagehead(pp, v); 852 853 #ifdef DIAGNOSTIC 854 if (pool_debug) { 855 struct pool_item *qi; 856 XSIMPLEQ_FOREACH(qi, &ph->ph_items, pi_list) { 857 if (pi == qi) { 858 panic("%s: %s: double pool_put: %p", __func__, 859 pp->pr_wchan, pi); 860 } 861 } 862 } 863 #endif /* DIAGNOSTIC */ 864 865 pi->pi_magic = POOL_IMAGIC(ph, pi); 866 XSIMPLEQ_INSERT_HEAD(&ph->ph_items, pi, pi_list); 867 #ifdef DIAGNOSTIC 868 if (POOL_PHPOISON(ph)) 869 poison_mem(pi + 1, pp->pr_size - sizeof(*pi)); 870 #endif /* DIAGNOSTIC */ 871 872 if (ph->ph_nmissing-- == pp->pr_itemsperpage) { 873 /* 874 * The page was previously completely full, move it to the 875 * partially-full list. 876 */ 877 TAILQ_REMOVE(&pp->pr_fullpages, ph, ph_entry); 878 TAILQ_INSERT_TAIL(&pp->pr_partpages, ph, ph_entry); 879 } 880 881 if (ph->ph_nmissing == 0) { 882 /* 883 * The page is now empty, so move it to the empty page list. 884 */ 885 pp->pr_nidle++; 886 887 ph->ph_timestamp = getnsecuptime(); 888 TAILQ_REMOVE(&pp->pr_partpages, ph, ph_entry); 889 TAILQ_INSERT_TAIL(&pp->pr_emptypages, ph, ph_entry); 890 pool_update_curpage(pp); 891 } 892 } 893 894 /* 895 * Add N items to the pool. 896 */ 897 int 898 pool_prime(struct pool *pp, int n) 899 { 900 struct pool_pagelist pl = TAILQ_HEAD_INITIALIZER(pl); 901 struct pool_page_header *ph; 902 int newpages; 903 904 newpages = roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 905 906 while (newpages-- > 0) { 907 int slowdown = 0; 908 909 ph = pool_p_alloc(pp, PR_NOWAIT, &slowdown); 910 if (ph == NULL) /* or slowdown? */ 911 break; 912 913 TAILQ_INSERT_TAIL(&pl, ph, ph_entry); 914 } 915 916 pl_enter(pp, &pp->pr_lock); 917 while ((ph = TAILQ_FIRST(&pl)) != NULL) { 918 TAILQ_REMOVE(&pl, ph, ph_entry); 919 pool_p_insert(pp, ph); 920 } 921 pl_leave(pp, &pp->pr_lock); 922 923 return (0); 924 } 925 926 struct pool_page_header * 927 pool_p_alloc(struct pool *pp, int flags, int *slowdown) 928 { 929 struct pool_page_header *ph; 930 struct pool_item *pi; 931 caddr_t addr; 932 unsigned int order; 933 int o; 934 int n; 935 936 pl_assert_unlocked(pp, &pp->pr_lock); 937 KASSERT(pp->pr_size >= sizeof(*pi)); 938 939 addr = pool_allocator_alloc(pp, flags, slowdown); 940 if (addr == NULL) 941 return (NULL); 942 943 if (POOL_INPGHDR(pp)) 944 ph = (struct pool_page_header *)(addr + pp->pr_phoffset); 945 else { 946 ph = pool_get(&phpool, flags); 947 if (ph == NULL) { 948 pool_allocator_free(pp, addr); 949 return (NULL); 950 } 951 } 952 953 XSIMPLEQ_INIT(&ph->ph_items); 954 ph->ph_page = addr; 955 addr += pp->pr_align * (pp->pr_npagealloc % pp->pr_maxcolors); 956 ph->ph_colored = addr; 957 ph->ph_nmissing = 0; 958 arc4random_buf(&ph->ph_magic, sizeof(ph->ph_magic)); 959 #ifdef DIAGNOSTIC 960 /* use a bit in ph_magic to record if we poison page items */ 961 if (pool_debug) 962 SET(ph->ph_magic, POOL_MAGICBIT); 963 else 964 CLR(ph->ph_magic, POOL_MAGICBIT); 965 #endif /* DIAGNOSTIC */ 966 967 n = pp->pr_itemsperpage; 968 o = 32; 969 while (n--) { 970 pi = (struct pool_item *)addr; 971 pi->pi_magic = POOL_IMAGIC(ph, pi); 972 973 if (o == 32) { 974 order = arc4random(); 975 o = 0; 976 } 977 if (ISSET(order, 1 << o++)) 978 XSIMPLEQ_INSERT_TAIL(&ph->ph_items, pi, pi_list); 979 else 980 XSIMPLEQ_INSERT_HEAD(&ph->ph_items, pi, pi_list); 981 982 #ifdef DIAGNOSTIC 983 if (POOL_PHPOISON(ph)) 984 poison_mem(pi + 1, pp->pr_size - sizeof(*pi)); 985 #endif /* DIAGNOSTIC */ 986 987 addr += pp->pr_size; 988 } 989 990 return (ph); 991 } 992 993 void 994 pool_p_free(struct pool *pp, struct pool_page_header *ph) 995 { 996 struct pool_item *pi; 997 998 pl_assert_unlocked(pp, &pp->pr_lock); 999 KASSERT(ph->ph_nmissing == 0); 1000 1001 XSIMPLEQ_FOREACH(pi, &ph->ph_items, pi_list) { 1002 if (__predict_false(pi->pi_magic != POOL_IMAGIC(ph, pi))) { 1003 panic("%s: %s free list modified: " 1004 "page %p; item addr %p; offset 0x%x=0x%lx", 1005 __func__, pp->pr_wchan, ph->ph_page, pi, 1006 0, pi->pi_magic); 1007 } 1008 1009 #ifdef DIAGNOSTIC 1010 if (POOL_PHPOISON(ph)) { 1011 size_t pidx; 1012 uint32_t pval; 1013 if (poison_check(pi + 1, pp->pr_size - sizeof(*pi), 1014 &pidx, &pval)) { 1015 int *ip = (int *)(pi + 1); 1016 panic("%s: %s free list modified: " 1017 "page %p; item addr %p; offset 0x%zx=0x%x", 1018 __func__, pp->pr_wchan, ph->ph_page, pi, 1019 pidx * sizeof(int), ip[pidx]); 1020 } 1021 } 1022 #endif 1023 } 1024 1025 pool_allocator_free(pp, ph->ph_page); 1026 1027 if (!POOL_INPGHDR(pp)) 1028 pool_put(&phpool, ph); 1029 } 1030 1031 void 1032 pool_p_insert(struct pool *pp, struct pool_page_header *ph) 1033 { 1034 pl_assert_locked(pp, &pp->pr_lock); 1035 1036 /* If the pool was depleted, point at the new page */ 1037 if (pp->pr_curpage == NULL) 1038 pp->pr_curpage = ph; 1039 1040 TAILQ_INSERT_TAIL(&pp->pr_emptypages, ph, ph_entry); 1041 if (!POOL_INPGHDR(pp)) 1042 RBT_INSERT(phtree, &pp->pr_phtree, ph); 1043 1044 pp->pr_nitems += pp->pr_itemsperpage; 1045 pp->pr_nidle++; 1046 1047 pp->pr_npagealloc++; 1048 if (++pp->pr_npages > pp->pr_hiwat) 1049 pp->pr_hiwat = pp->pr_npages; 1050 } 1051 1052 void 1053 pool_p_remove(struct pool *pp, struct pool_page_header *ph) 1054 { 1055 pl_assert_locked(pp, &pp->pr_lock); 1056 1057 pp->pr_npagefree++; 1058 pp->pr_npages--; 1059 pp->pr_nidle--; 1060 pp->pr_nitems -= pp->pr_itemsperpage; 1061 1062 if (!POOL_INPGHDR(pp)) 1063 RBT_REMOVE(phtree, &pp->pr_phtree, ph); 1064 TAILQ_REMOVE(&pp->pr_emptypages, ph, ph_entry); 1065 1066 pool_update_curpage(pp); 1067 } 1068 1069 void 1070 pool_update_curpage(struct pool *pp) 1071 { 1072 pp->pr_curpage = TAILQ_LAST(&pp->pr_partpages, pool_pagelist); 1073 if (pp->pr_curpage == NULL) { 1074 pp->pr_curpage = TAILQ_LAST(&pp->pr_emptypages, pool_pagelist); 1075 } 1076 } 1077 1078 void 1079 pool_setlowat(struct pool *pp, int n) 1080 { 1081 int prime = 0; 1082 1083 pl_enter(pp, &pp->pr_lock); 1084 pp->pr_minitems = n; 1085 pp->pr_minpages = (n == 0) 1086 ? 0 1087 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 1088 1089 if (pp->pr_nitems < n) 1090 prime = n - pp->pr_nitems; 1091 pl_leave(pp, &pp->pr_lock); 1092 1093 if (prime > 0) 1094 pool_prime(pp, prime); 1095 } 1096 1097 void 1098 pool_sethiwat(struct pool *pp, int n) 1099 { 1100 pp->pr_maxpages = (n == 0) 1101 ? 0 1102 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 1103 } 1104 1105 int 1106 pool_sethardlimit(struct pool *pp, u_int n, const char *warnmsg, int ratecap) 1107 { 1108 int error = 0; 1109 1110 if (n < pp->pr_nout) { 1111 error = EINVAL; 1112 goto done; 1113 } 1114 1115 pp->pr_hardlimit = n; 1116 pp->pr_hardlimit_warning = warnmsg; 1117 pp->pr_hardlimit_ratecap.tv_sec = ratecap; 1118 pp->pr_hardlimit_warning_last.tv_sec = 0; 1119 pp->pr_hardlimit_warning_last.tv_usec = 0; 1120 1121 done: 1122 return (error); 1123 } 1124 1125 void 1126 pool_set_constraints(struct pool *pp, const struct kmem_pa_mode *mode) 1127 { 1128 pp->pr_crange = mode; 1129 } 1130 1131 /* 1132 * Release all complete pages that have not been used recently. 1133 * 1134 * Returns non-zero if any pages have been reclaimed. 1135 */ 1136 int 1137 pool_reclaim(struct pool *pp) 1138 { 1139 struct pool_page_header *ph, *phnext; 1140 struct pool_pagelist pl = TAILQ_HEAD_INITIALIZER(pl); 1141 1142 pl_enter(pp, &pp->pr_lock); 1143 for (ph = TAILQ_FIRST(&pp->pr_emptypages); ph != NULL; ph = phnext) { 1144 phnext = TAILQ_NEXT(ph, ph_entry); 1145 1146 /* Check our minimum page claim */ 1147 if (pp->pr_npages <= pp->pr_minpages) 1148 break; 1149 1150 /* 1151 * If freeing this page would put us below 1152 * the low water mark, stop now. 1153 */ 1154 if ((pp->pr_nitems - pp->pr_itemsperpage) < 1155 pp->pr_minitems) 1156 break; 1157 1158 pool_p_remove(pp, ph); 1159 TAILQ_INSERT_TAIL(&pl, ph, ph_entry); 1160 } 1161 pl_leave(pp, &pp->pr_lock); 1162 1163 if (TAILQ_EMPTY(&pl)) 1164 return (0); 1165 1166 while ((ph = TAILQ_FIRST(&pl)) != NULL) { 1167 TAILQ_REMOVE(&pl, ph, ph_entry); 1168 pool_p_free(pp, ph); 1169 } 1170 1171 return (1); 1172 } 1173 1174 /* 1175 * Release all complete pages that have not been used recently 1176 * from all pools. 1177 */ 1178 void 1179 pool_reclaim_all(void) 1180 { 1181 struct pool *pp; 1182 1183 rw_enter_read(&pool_lock); 1184 SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) 1185 pool_reclaim(pp); 1186 rw_exit_read(&pool_lock); 1187 } 1188 1189 #ifdef DDB 1190 #include <machine/db_machdep.h> 1191 #include <ddb/db_output.h> 1192 1193 /* 1194 * Diagnostic helpers. 1195 */ 1196 void 1197 pool_printit(struct pool *pp, const char *modif, 1198 int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2)))) 1199 { 1200 pool_print1(pp, modif, pr); 1201 } 1202 1203 void 1204 pool_print_pagelist(struct pool_pagelist *pl, 1205 int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2)))) 1206 { 1207 struct pool_page_header *ph; 1208 struct pool_item *pi; 1209 1210 TAILQ_FOREACH(ph, pl, ph_entry) { 1211 (*pr)("\t\tpage %p, color %p, nmissing %d\n", 1212 ph->ph_page, ph->ph_colored, ph->ph_nmissing); 1213 XSIMPLEQ_FOREACH(pi, &ph->ph_items, pi_list) { 1214 if (pi->pi_magic != POOL_IMAGIC(ph, pi)) { 1215 (*pr)("\t\t\titem %p, magic 0x%lx\n", 1216 pi, pi->pi_magic); 1217 } 1218 } 1219 } 1220 } 1221 1222 void 1223 pool_print1(struct pool *pp, const char *modif, 1224 int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2)))) 1225 { 1226 struct pool_page_header *ph; 1227 int print_pagelist = 0; 1228 char c; 1229 1230 while ((c = *modif++) != '\0') { 1231 if (c == 'p') 1232 print_pagelist = 1; 1233 modif++; 1234 } 1235 1236 (*pr)("POOL %s: size %u maxcolors %u\n", pp->pr_wchan, pp->pr_size, 1237 pp->pr_maxcolors); 1238 (*pr)("\talloc %p\n", pp->pr_alloc); 1239 (*pr)("\tminitems %u, minpages %u, maxpages %u, npages %u\n", 1240 pp->pr_minitems, pp->pr_minpages, pp->pr_maxpages, pp->pr_npages); 1241 (*pr)("\titemsperpage %u, nitems %u, nout %u, hardlimit %u\n", 1242 pp->pr_itemsperpage, pp->pr_nitems, pp->pr_nout, pp->pr_hardlimit); 1243 1244 (*pr)("\n\tnget %lu, nfail %lu, nput %lu\n", 1245 pp->pr_nget, pp->pr_nfail, pp->pr_nput); 1246 (*pr)("\tnpagealloc %lu, npagefree %lu, hiwat %u, nidle %lu\n", 1247 pp->pr_npagealloc, pp->pr_npagefree, pp->pr_hiwat, pp->pr_nidle); 1248 1249 if (print_pagelist == 0) 1250 return; 1251 1252 if ((ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL) 1253 (*pr)("\n\tempty page list:\n"); 1254 pool_print_pagelist(&pp->pr_emptypages, pr); 1255 if ((ph = TAILQ_FIRST(&pp->pr_fullpages)) != NULL) 1256 (*pr)("\n\tfull page list:\n"); 1257 pool_print_pagelist(&pp->pr_fullpages, pr); 1258 if ((ph = TAILQ_FIRST(&pp->pr_partpages)) != NULL) 1259 (*pr)("\n\tpartial-page list:\n"); 1260 pool_print_pagelist(&pp->pr_partpages, pr); 1261 1262 if (pp->pr_curpage == NULL) 1263 (*pr)("\tno current page\n"); 1264 else 1265 (*pr)("\tcurpage %p\n", pp->pr_curpage->ph_page); 1266 } 1267 1268 void 1269 db_show_all_pools(db_expr_t expr, int haddr, db_expr_t count, char *modif) 1270 { 1271 struct pool *pp; 1272 char maxp[16]; 1273 int ovflw; 1274 char mode; 1275 1276 mode = modif[0]; 1277 if (mode != '\0' && mode != 'a') { 1278 db_printf("usage: show all pools [/a]\n"); 1279 return; 1280 } 1281 1282 if (mode == '\0') 1283 db_printf("%-10s%4s%9s%5s%9s%6s%6s%6s%6s%6s%6s%5s\n", 1284 "Name", 1285 "Size", 1286 "Requests", 1287 "Fail", 1288 "Releases", 1289 "Pgreq", 1290 "Pgrel", 1291 "Npage", 1292 "Hiwat", 1293 "Minpg", 1294 "Maxpg", 1295 "Idle"); 1296 else 1297 db_printf("%-12s %18s %18s\n", 1298 "Name", "Address", "Allocator"); 1299 1300 SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) { 1301 if (mode == 'a') { 1302 db_printf("%-12s %18p %18p\n", pp->pr_wchan, pp, 1303 pp->pr_alloc); 1304 continue; 1305 } 1306 1307 if (!pp->pr_nget) 1308 continue; 1309 1310 if (pp->pr_maxpages == UINT_MAX) 1311 snprintf(maxp, sizeof maxp, "inf"); 1312 else 1313 snprintf(maxp, sizeof maxp, "%u", pp->pr_maxpages); 1314 1315 #define PRWORD(ovflw, fmt, width, fixed, val) do { \ 1316 (ovflw) += db_printf((fmt), \ 1317 (width) - (fixed) - (ovflw) > 0 ? \ 1318 (width) - (fixed) - (ovflw) : 0, \ 1319 (val)) - (width); \ 1320 if ((ovflw) < 0) \ 1321 (ovflw) = 0; \ 1322 } while (/* CONSTCOND */0) 1323 1324 ovflw = 0; 1325 PRWORD(ovflw, "%-*s", 10, 0, pp->pr_wchan); 1326 PRWORD(ovflw, " %*u", 4, 1, pp->pr_size); 1327 PRWORD(ovflw, " %*lu", 9, 1, pp->pr_nget); 1328 PRWORD(ovflw, " %*lu", 5, 1, pp->pr_nfail); 1329 PRWORD(ovflw, " %*lu", 9, 1, pp->pr_nput); 1330 PRWORD(ovflw, " %*lu", 6, 1, pp->pr_npagealloc); 1331 PRWORD(ovflw, " %*lu", 6, 1, pp->pr_npagefree); 1332 PRWORD(ovflw, " %*d", 6, 1, pp->pr_npages); 1333 PRWORD(ovflw, " %*d", 6, 1, pp->pr_hiwat); 1334 PRWORD(ovflw, " %*d", 6, 1, pp->pr_minpages); 1335 PRWORD(ovflw, " %*s", 6, 1, maxp); 1336 PRWORD(ovflw, " %*lu\n", 5, 1, pp->pr_nidle); 1337 1338 pool_chk(pp); 1339 } 1340 } 1341 #endif /* DDB */ 1342 1343 #if defined(POOL_DEBUG) || defined(DDB) 1344 int 1345 pool_chk_page(struct pool *pp, struct pool_page_header *ph, int expected) 1346 { 1347 struct pool_item *pi; 1348 caddr_t page; 1349 int n; 1350 const char *label = pp->pr_wchan; 1351 1352 page = (caddr_t)((u_long)ph & pp->pr_pgmask); 1353 if (page != ph->ph_page && POOL_INPGHDR(pp)) { 1354 printf("%s: ", label); 1355 printf("pool(%p:%s): page inconsistency: page %p; " 1356 "at page head addr %p (p %p)\n", 1357 pp, pp->pr_wchan, ph->ph_page, ph, page); 1358 return 1; 1359 } 1360 1361 for (pi = XSIMPLEQ_FIRST(&ph->ph_items), n = 0; 1362 pi != NULL; 1363 pi = XSIMPLEQ_NEXT(&ph->ph_items, pi, pi_list), n++) { 1364 if ((caddr_t)pi < ph->ph_page || 1365 (caddr_t)pi >= ph->ph_page + pp->pr_pgsize) { 1366 printf("%s: ", label); 1367 printf("pool(%p:%s): page inconsistency: page %p;" 1368 " item ordinal %d; addr %p\n", pp, 1369 pp->pr_wchan, ph->ph_page, n, pi); 1370 return (1); 1371 } 1372 1373 if (pi->pi_magic != POOL_IMAGIC(ph, pi)) { 1374 printf("%s: ", label); 1375 printf("pool(%p:%s): free list modified: " 1376 "page %p; item ordinal %d; addr %p " 1377 "(p %p); offset 0x%x=0x%lx\n", 1378 pp, pp->pr_wchan, ph->ph_page, n, pi, page, 1379 0, pi->pi_magic); 1380 } 1381 1382 #ifdef DIAGNOSTIC 1383 if (POOL_PHPOISON(ph)) { 1384 size_t pidx; 1385 uint32_t pval; 1386 if (poison_check(pi + 1, pp->pr_size - sizeof(*pi), 1387 &pidx, &pval)) { 1388 int *ip = (int *)(pi + 1); 1389 printf("pool(%s): free list modified: " 1390 "page %p; item ordinal %d; addr %p " 1391 "(p %p); offset 0x%zx=0x%x\n", 1392 pp->pr_wchan, ph->ph_page, n, pi, 1393 page, pidx * sizeof(int), ip[pidx]); 1394 } 1395 } 1396 #endif /* DIAGNOSTIC */ 1397 } 1398 if (n + ph->ph_nmissing != pp->pr_itemsperpage) { 1399 printf("pool(%p:%s): page inconsistency: page %p;" 1400 " %d on list, %d missing, %d items per page\n", pp, 1401 pp->pr_wchan, ph->ph_page, n, ph->ph_nmissing, 1402 pp->pr_itemsperpage); 1403 return 1; 1404 } 1405 if (expected >= 0 && n != expected) { 1406 printf("pool(%p:%s): page inconsistency: page %p;" 1407 " %d on list, %d missing, %d expected\n", pp, 1408 pp->pr_wchan, ph->ph_page, n, ph->ph_nmissing, 1409 expected); 1410 return 1; 1411 } 1412 return 0; 1413 } 1414 1415 int 1416 pool_chk(struct pool *pp) 1417 { 1418 struct pool_page_header *ph; 1419 int r = 0; 1420 1421 TAILQ_FOREACH(ph, &pp->pr_emptypages, ph_entry) 1422 r += pool_chk_page(pp, ph, pp->pr_itemsperpage); 1423 TAILQ_FOREACH(ph, &pp->pr_fullpages, ph_entry) 1424 r += pool_chk_page(pp, ph, 0); 1425 TAILQ_FOREACH(ph, &pp->pr_partpages, ph_entry) 1426 r += pool_chk_page(pp, ph, -1); 1427 1428 return (r); 1429 } 1430 #endif /* defined(POOL_DEBUG) || defined(DDB) */ 1431 1432 #ifdef DDB 1433 void 1434 pool_walk(struct pool *pp, int full, 1435 int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2))), 1436 void (*func)(void *, int, int (*)(const char *, ...) 1437 __attribute__((__format__(__kprintf__,1,2))))) 1438 { 1439 struct pool_page_header *ph; 1440 struct pool_item *pi; 1441 caddr_t cp; 1442 int n; 1443 1444 TAILQ_FOREACH(ph, &pp->pr_fullpages, ph_entry) { 1445 cp = ph->ph_colored; 1446 n = ph->ph_nmissing; 1447 1448 while (n--) { 1449 func(cp, full, pr); 1450 cp += pp->pr_size; 1451 } 1452 } 1453 1454 TAILQ_FOREACH(ph, &pp->pr_partpages, ph_entry) { 1455 cp = ph->ph_colored; 1456 n = ph->ph_nmissing; 1457 1458 do { 1459 XSIMPLEQ_FOREACH(pi, &ph->ph_items, pi_list) { 1460 if (cp == (caddr_t)pi) 1461 break; 1462 } 1463 if (cp != (caddr_t)pi) { 1464 func(cp, full, pr); 1465 n--; 1466 } 1467 1468 cp += pp->pr_size; 1469 } while (n > 0); 1470 } 1471 } 1472 #endif 1473 1474 /* 1475 * We have three different sysctls. 1476 * kern.pool.npools - the number of pools. 1477 * kern.pool.pool.<pool#> - the pool struct for the pool#. 1478 * kern.pool.name.<pool#> - the name for pool#. 1479 */ 1480 int 1481 sysctl_dopool(int *name, u_int namelen, char *oldp, size_t *oldlenp) 1482 { 1483 struct kinfo_pool pi; 1484 struct pool *pp; 1485 int rv = ENOENT; 1486 1487 switch (name[0]) { 1488 case KERN_POOL_NPOOLS: 1489 if (namelen != 1) 1490 return (ENOTDIR); 1491 return (sysctl_rdint(oldp, oldlenp, NULL, pool_count)); 1492 1493 case KERN_POOL_NAME: 1494 case KERN_POOL_POOL: 1495 case KERN_POOL_CACHE: 1496 case KERN_POOL_CACHE_CPUS: 1497 break; 1498 default: 1499 return (EOPNOTSUPP); 1500 } 1501 1502 if (namelen != 2) 1503 return (ENOTDIR); 1504 1505 rw_enter_read(&pool_lock); 1506 1507 SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) { 1508 if (name[1] == pp->pr_serial) 1509 break; 1510 } 1511 1512 if (pp == NULL) 1513 goto done; 1514 1515 switch (name[0]) { 1516 case KERN_POOL_NAME: 1517 rv = sysctl_rdstring(oldp, oldlenp, NULL, pp->pr_wchan); 1518 break; 1519 case KERN_POOL_POOL: 1520 memset(&pi, 0, sizeof(pi)); 1521 1522 pl_enter(pp, &pp->pr_lock); 1523 pi.pr_size = pp->pr_size; 1524 pi.pr_pgsize = pp->pr_pgsize; 1525 pi.pr_itemsperpage = pp->pr_itemsperpage; 1526 pi.pr_npages = pp->pr_npages; 1527 pi.pr_minpages = pp->pr_minpages; 1528 pi.pr_maxpages = pp->pr_maxpages; 1529 pi.pr_hardlimit = pp->pr_hardlimit; 1530 pi.pr_nout = pp->pr_nout; 1531 pi.pr_nitems = pp->pr_nitems; 1532 pi.pr_nget = pp->pr_nget; 1533 pi.pr_nput = pp->pr_nput; 1534 pi.pr_nfail = pp->pr_nfail; 1535 pi.pr_npagealloc = pp->pr_npagealloc; 1536 pi.pr_npagefree = pp->pr_npagefree; 1537 pi.pr_hiwat = pp->pr_hiwat; 1538 pi.pr_nidle = pp->pr_nidle; 1539 pl_leave(pp, &pp->pr_lock); 1540 1541 pool_cache_pool_info(pp, &pi); 1542 1543 rv = sysctl_rdstruct(oldp, oldlenp, NULL, &pi, sizeof(pi)); 1544 break; 1545 1546 case KERN_POOL_CACHE: 1547 rv = pool_cache_info(pp, oldp, oldlenp); 1548 break; 1549 1550 case KERN_POOL_CACHE_CPUS: 1551 rv = pool_cache_cpus_info(pp, oldp, oldlenp); 1552 break; 1553 } 1554 1555 done: 1556 rw_exit_read(&pool_lock); 1557 1558 return (rv); 1559 } 1560 1561 void 1562 pool_gc_sched(void *null) 1563 { 1564 task_add(systqmp, &pool_gc_task); 1565 } 1566 1567 void 1568 pool_gc_pages(void *null) 1569 { 1570 struct pool *pp; 1571 struct pool_page_header *ph, *freeph; 1572 int s; 1573 1574 rw_enter_read(&pool_lock); 1575 s = splvm(); /* XXX go to splvm until all pools _setipl properly */ 1576 SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) { 1577 #ifdef MULTIPROCESSOR 1578 if (pp->pr_cache != NULL) 1579 pool_cache_gc(pp); 1580 #endif 1581 1582 if (pp->pr_nidle <= pp->pr_minpages || /* guess */ 1583 !pl_enter_try(pp, &pp->pr_lock)) /* try */ 1584 continue; 1585 1586 /* is it time to free a page? */ 1587 if (pp->pr_nidle > pp->pr_minpages && 1588 (ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL && 1589 getnsecuptime() - ph->ph_timestamp > POOL_WAIT_GC) { 1590 freeph = ph; 1591 pool_p_remove(pp, freeph); 1592 } else 1593 freeph = NULL; 1594 1595 pl_leave(pp, &pp->pr_lock); 1596 1597 if (freeph != NULL) 1598 pool_p_free(pp, freeph); 1599 } 1600 splx(s); 1601 rw_exit_read(&pool_lock); 1602 1603 timeout_add_sec(&pool_gc_tick, 1); 1604 } 1605 1606 /* 1607 * Pool backend allocators. 1608 */ 1609 1610 void * 1611 pool_allocator_alloc(struct pool *pp, int flags, int *slowdown) 1612 { 1613 void *v; 1614 1615 v = (*pp->pr_alloc->pa_alloc)(pp, flags, slowdown); 1616 1617 #ifdef DIAGNOSTIC 1618 if (v != NULL && POOL_INPGHDR(pp)) { 1619 vaddr_t addr = (vaddr_t)v; 1620 if ((addr & pp->pr_pgmask) != addr) { 1621 panic("%s: %s page address %p isn't aligned to %u", 1622 __func__, pp->pr_wchan, v, pp->pr_pgsize); 1623 } 1624 } 1625 #endif 1626 1627 return (v); 1628 } 1629 1630 void 1631 pool_allocator_free(struct pool *pp, void *v) 1632 { 1633 struct pool_allocator *pa = pp->pr_alloc; 1634 1635 (*pa->pa_free)(pp, v); 1636 } 1637 1638 void * 1639 pool_page_alloc(struct pool *pp, int flags, int *slowdown) 1640 { 1641 struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER; 1642 1643 kd.kd_waitok = ISSET(flags, PR_WAITOK); 1644 kd.kd_slowdown = slowdown; 1645 1646 return (km_alloc(pp->pr_pgsize, &kv_page, pp->pr_crange, &kd)); 1647 } 1648 1649 void 1650 pool_page_free(struct pool *pp, void *v) 1651 { 1652 km_free(v, pp->pr_pgsize, &kv_page, pp->pr_crange); 1653 } 1654 1655 void * 1656 pool_multi_alloc(struct pool *pp, int flags, int *slowdown) 1657 { 1658 struct kmem_va_mode kv = kv_intrsafe; 1659 struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER; 1660 void *v; 1661 int s; 1662 1663 if (POOL_INPGHDR(pp)) 1664 kv.kv_align = pp->pr_pgsize; 1665 1666 kd.kd_waitok = ISSET(flags, PR_WAITOK); 1667 kd.kd_slowdown = slowdown; 1668 1669 s = splvm(); 1670 v = km_alloc(pp->pr_pgsize, &kv, pp->pr_crange, &kd); 1671 splx(s); 1672 1673 return (v); 1674 } 1675 1676 void 1677 pool_multi_free(struct pool *pp, void *v) 1678 { 1679 struct kmem_va_mode kv = kv_intrsafe; 1680 int s; 1681 1682 if (POOL_INPGHDR(pp)) 1683 kv.kv_align = pp->pr_pgsize; 1684 1685 s = splvm(); 1686 km_free(v, pp->pr_pgsize, &kv, pp->pr_crange); 1687 splx(s); 1688 } 1689 1690 void * 1691 pool_multi_alloc_ni(struct pool *pp, int flags, int *slowdown) 1692 { 1693 struct kmem_va_mode kv = kv_any; 1694 struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER; 1695 void *v; 1696 1697 if (POOL_INPGHDR(pp)) 1698 kv.kv_align = pp->pr_pgsize; 1699 1700 kd.kd_waitok = ISSET(flags, PR_WAITOK); 1701 kd.kd_slowdown = slowdown; 1702 1703 KERNEL_LOCK(); 1704 v = km_alloc(pp->pr_pgsize, &kv, pp->pr_crange, &kd); 1705 KERNEL_UNLOCK(); 1706 1707 return (v); 1708 } 1709 1710 void 1711 pool_multi_free_ni(struct pool *pp, void *v) 1712 { 1713 struct kmem_va_mode kv = kv_any; 1714 1715 if (POOL_INPGHDR(pp)) 1716 kv.kv_align = pp->pr_pgsize; 1717 1718 KERNEL_LOCK(); 1719 km_free(v, pp->pr_pgsize, &kv, pp->pr_crange); 1720 KERNEL_UNLOCK(); 1721 } 1722 1723 #ifdef MULTIPROCESSOR 1724 1725 struct pool pool_caches; /* per cpu cache entries */ 1726 1727 void 1728 pool_cache_init(struct pool *pp) 1729 { 1730 struct cpumem *cm; 1731 struct pool_cache *pc; 1732 struct cpumem_iter i; 1733 1734 if (pool_caches.pr_size == 0) { 1735 pool_init(&pool_caches, sizeof(struct pool_cache), 1736 CACHELINESIZE, IPL_NONE, PR_WAITOK | PR_RWLOCK, 1737 "plcache", NULL); 1738 } 1739 1740 /* must be able to use the pool items as cache list items */ 1741 KASSERT(pp->pr_size >= sizeof(struct pool_cache_item)); 1742 1743 cm = cpumem_get(&pool_caches); 1744 1745 pl_init(pp, &pp->pr_cache_lock); 1746 arc4random_buf(pp->pr_cache_magic, sizeof(pp->pr_cache_magic)); 1747 TAILQ_INIT(&pp->pr_cache_lists); 1748 pp->pr_cache_nitems = 0; 1749 pp->pr_cache_timestamp = getnsecuptime(); 1750 pp->pr_cache_items = 8; 1751 pp->pr_cache_contention = 0; 1752 pp->pr_cache_ngc = 0; 1753 1754 CPUMEM_FOREACH(pc, &i, cm) { 1755 pc->pc_actv = NULL; 1756 pc->pc_nactv = 0; 1757 pc->pc_prev = NULL; 1758 1759 pc->pc_nget = 0; 1760 pc->pc_nfail = 0; 1761 pc->pc_nput = 0; 1762 pc->pc_nlget = 0; 1763 pc->pc_nlfail = 0; 1764 pc->pc_nlput = 0; 1765 pc->pc_nout = 0; 1766 } 1767 1768 membar_producer(); 1769 1770 pp->pr_cache = cm; 1771 } 1772 1773 static inline void 1774 pool_cache_item_magic(struct pool *pp, struct pool_cache_item *ci) 1775 { 1776 unsigned long *entry = (unsigned long *)&ci->ci_nextl; 1777 1778 entry[0] = pp->pr_cache_magic[0] ^ (u_long)ci; 1779 entry[1] = pp->pr_cache_magic[1] ^ (u_long)ci->ci_next; 1780 } 1781 1782 static inline void 1783 pool_cache_item_magic_check(struct pool *pp, struct pool_cache_item *ci) 1784 { 1785 unsigned long *entry; 1786 unsigned long val; 1787 1788 entry = (unsigned long *)&ci->ci_nextl; 1789 val = pp->pr_cache_magic[0] ^ (u_long)ci; 1790 if (*entry != val) 1791 goto fail; 1792 1793 entry++; 1794 val = pp->pr_cache_magic[1] ^ (u_long)ci->ci_next; 1795 if (*entry != val) 1796 goto fail; 1797 1798 return; 1799 1800 fail: 1801 panic("%s: %s cpu free list modified: item addr %p+%zu 0x%lx!=0x%lx", 1802 __func__, pp->pr_wchan, ci, (caddr_t)entry - (caddr_t)ci, 1803 *entry, val); 1804 } 1805 1806 static inline void 1807 pool_list_enter(struct pool *pp) 1808 { 1809 if (pl_enter_try(pp, &pp->pr_cache_lock) == 0) { 1810 pl_enter(pp, &pp->pr_cache_lock); 1811 pp->pr_cache_contention++; 1812 } 1813 } 1814 1815 static inline void 1816 pool_list_leave(struct pool *pp) 1817 { 1818 pl_leave(pp, &pp->pr_cache_lock); 1819 } 1820 1821 static inline struct pool_cache_item * 1822 pool_cache_list_alloc(struct pool *pp, struct pool_cache *pc) 1823 { 1824 struct pool_cache_item *pl; 1825 1826 pool_list_enter(pp); 1827 pl = TAILQ_FIRST(&pp->pr_cache_lists); 1828 if (pl != NULL) { 1829 TAILQ_REMOVE(&pp->pr_cache_lists, pl, ci_nextl); 1830 pp->pr_cache_nitems -= POOL_CACHE_ITEM_NITEMS(pl); 1831 1832 pool_cache_item_magic(pp, pl); 1833 1834 pc->pc_nlget++; 1835 } else 1836 pc->pc_nlfail++; 1837 1838 /* fold this cpus nout into the global while we have the lock */ 1839 pp->pr_cache_nout += pc->pc_nout; 1840 pc->pc_nout = 0; 1841 pool_list_leave(pp); 1842 1843 return (pl); 1844 } 1845 1846 static inline void 1847 pool_cache_list_free(struct pool *pp, struct pool_cache *pc, 1848 struct pool_cache_item *ci) 1849 { 1850 pool_list_enter(pp); 1851 if (TAILQ_EMPTY(&pp->pr_cache_lists)) 1852 pp->pr_cache_timestamp = getnsecuptime(); 1853 1854 pp->pr_cache_nitems += POOL_CACHE_ITEM_NITEMS(ci); 1855 TAILQ_INSERT_TAIL(&pp->pr_cache_lists, ci, ci_nextl); 1856 1857 pc->pc_nlput++; 1858 1859 /* fold this cpus nout into the global while we have the lock */ 1860 pp->pr_cache_nout += pc->pc_nout; 1861 pc->pc_nout = 0; 1862 pool_list_leave(pp); 1863 } 1864 1865 static inline struct pool_cache * 1866 pool_cache_enter(struct pool *pp, int *s) 1867 { 1868 struct pool_cache *pc; 1869 1870 pc = cpumem_enter(pp->pr_cache); 1871 *s = splraise(pp->pr_ipl); 1872 pc->pc_gen++; 1873 1874 return (pc); 1875 } 1876 1877 static inline void 1878 pool_cache_leave(struct pool *pp, struct pool_cache *pc, int s) 1879 { 1880 pc->pc_gen++; 1881 splx(s); 1882 cpumem_leave(pp->pr_cache, pc); 1883 } 1884 1885 void * 1886 pool_cache_get(struct pool *pp) 1887 { 1888 struct pool_cache *pc; 1889 struct pool_cache_item *ci; 1890 int s; 1891 1892 pc = pool_cache_enter(pp, &s); 1893 1894 if (pc->pc_actv != NULL) { 1895 ci = pc->pc_actv; 1896 } else if (pc->pc_prev != NULL) { 1897 ci = pc->pc_prev; 1898 pc->pc_prev = NULL; 1899 } else if ((ci = pool_cache_list_alloc(pp, pc)) == NULL) { 1900 pc->pc_nfail++; 1901 goto done; 1902 } 1903 1904 pool_cache_item_magic_check(pp, ci); 1905 #ifdef DIAGNOSTIC 1906 if (pool_debug && POOL_CACHE_ITEM_POISONED(ci)) { 1907 size_t pidx; 1908 uint32_t pval; 1909 1910 if (poison_check(ci + 1, pp->pr_size - sizeof(*ci), 1911 &pidx, &pval)) { 1912 int *ip = (int *)(ci + 1); 1913 ip += pidx; 1914 1915 panic("%s: %s cpu free list modified: " 1916 "item addr %p+%zu 0x%x!=0x%x", 1917 __func__, pp->pr_wchan, ci, 1918 (caddr_t)ip - (caddr_t)ci, *ip, pval); 1919 } 1920 } 1921 #endif 1922 1923 pc->pc_actv = ci->ci_next; 1924 pc->pc_nactv = POOL_CACHE_ITEM_NITEMS(ci) - 1; 1925 pc->pc_nget++; 1926 pc->pc_nout++; 1927 1928 done: 1929 pool_cache_leave(pp, pc, s); 1930 1931 return (ci); 1932 } 1933 1934 void 1935 pool_cache_put(struct pool *pp, void *v) 1936 { 1937 struct pool_cache *pc; 1938 struct pool_cache_item *ci = v; 1939 unsigned long nitems; 1940 int s; 1941 #ifdef DIAGNOSTIC 1942 int poison = pool_debug && pp->pr_size > sizeof(*ci); 1943 1944 if (poison) 1945 poison_mem(ci + 1, pp->pr_size - sizeof(*ci)); 1946 #endif 1947 1948 pc = pool_cache_enter(pp, &s); 1949 1950 nitems = pc->pc_nactv; 1951 if (nitems >= pp->pr_cache_items) { 1952 if (pc->pc_prev != NULL) 1953 pool_cache_list_free(pp, pc, pc->pc_prev); 1954 1955 pc->pc_prev = pc->pc_actv; 1956 1957 pc->pc_actv = NULL; 1958 pc->pc_nactv = 0; 1959 nitems = 0; 1960 } 1961 1962 ci->ci_next = pc->pc_actv; 1963 ci->ci_nitems = ++nitems; 1964 #ifdef DIAGNOSTIC 1965 ci->ci_nitems |= poison ? POOL_CACHE_ITEM_NITEMS_POISON : 0; 1966 #endif 1967 pool_cache_item_magic(pp, ci); 1968 1969 pc->pc_actv = ci; 1970 pc->pc_nactv = nitems; 1971 1972 pc->pc_nput++; 1973 pc->pc_nout--; 1974 1975 pool_cache_leave(pp, pc, s); 1976 } 1977 1978 struct pool_cache_item * 1979 pool_cache_list_put(struct pool *pp, struct pool_cache_item *pl) 1980 { 1981 struct pool_cache_item *rpl, *next; 1982 1983 if (pl == NULL) 1984 return (NULL); 1985 1986 rpl = TAILQ_NEXT(pl, ci_nextl); 1987 1988 pl_enter(pp, &pp->pr_lock); 1989 do { 1990 next = pl->ci_next; 1991 pool_do_put(pp, pl); 1992 pl = next; 1993 } while (pl != NULL); 1994 pl_leave(pp, &pp->pr_lock); 1995 1996 return (rpl); 1997 } 1998 1999 void 2000 pool_cache_destroy(struct pool *pp) 2001 { 2002 struct pool_cache *pc; 2003 struct pool_cache_item *pl; 2004 struct cpumem_iter i; 2005 struct cpumem *cm; 2006 2007 rw_enter_write(&pool_lock); /* serialise with the gc */ 2008 cm = pp->pr_cache; 2009 pp->pr_cache = NULL; /* make pool_put avoid the cache */ 2010 rw_exit_write(&pool_lock); 2011 2012 CPUMEM_FOREACH(pc, &i, cm) { 2013 pool_cache_list_put(pp, pc->pc_actv); 2014 pool_cache_list_put(pp, pc->pc_prev); 2015 } 2016 2017 cpumem_put(&pool_caches, cm); 2018 2019 pl = TAILQ_FIRST(&pp->pr_cache_lists); 2020 while (pl != NULL) 2021 pl = pool_cache_list_put(pp, pl); 2022 } 2023 2024 void 2025 pool_cache_gc(struct pool *pp) 2026 { 2027 unsigned int contention, delta; 2028 2029 if (getnsecuptime() - pp->pr_cache_timestamp > POOL_WAIT_GC && 2030 !TAILQ_EMPTY(&pp->pr_cache_lists) && 2031 pl_enter_try(pp, &pp->pr_cache_lock)) { 2032 struct pool_cache_item *pl = NULL; 2033 2034 pl = TAILQ_FIRST(&pp->pr_cache_lists); 2035 if (pl != NULL) { 2036 TAILQ_REMOVE(&pp->pr_cache_lists, pl, ci_nextl); 2037 pp->pr_cache_nitems -= POOL_CACHE_ITEM_NITEMS(pl); 2038 pp->pr_cache_timestamp = getnsecuptime(); 2039 2040 pp->pr_cache_ngc++; 2041 } 2042 2043 pl_leave(pp, &pp->pr_cache_lock); 2044 2045 pool_cache_list_put(pp, pl); 2046 } 2047 2048 /* 2049 * if there's a lot of contention on the pr_cache_mtx then consider 2050 * growing the length of the list to reduce the need to access the 2051 * global pool. 2052 */ 2053 2054 contention = pp->pr_cache_contention; 2055 delta = contention - pp->pr_cache_contention_prev; 2056 if (delta > 8 /* magic */) { 2057 if ((ncpusfound * 8 * 2) <= pp->pr_cache_nitems) 2058 pp->pr_cache_items += 8; 2059 } else if (delta == 0) { 2060 if (pp->pr_cache_items > 8) 2061 pp->pr_cache_items--; 2062 } 2063 pp->pr_cache_contention_prev = contention; 2064 } 2065 2066 void 2067 pool_cache_pool_info(struct pool *pp, struct kinfo_pool *pi) 2068 { 2069 struct pool_cache *pc; 2070 struct cpumem_iter i; 2071 2072 if (pp->pr_cache == NULL) 2073 return; 2074 2075 /* loop through the caches twice to collect stats */ 2076 2077 /* once without the lock so we can yield while reading nget/nput */ 2078 CPUMEM_FOREACH(pc, &i, pp->pr_cache) { 2079 uint64_t gen, nget, nput; 2080 2081 do { 2082 while ((gen = pc->pc_gen) & 1) 2083 yield(); 2084 2085 nget = pc->pc_nget; 2086 nput = pc->pc_nput; 2087 } while (gen != pc->pc_gen); 2088 2089 pi->pr_nget += nget; 2090 pi->pr_nput += nput; 2091 } 2092 2093 /* and once with the mtx so we can get consistent nout values */ 2094 pl_enter(pp, &pp->pr_cache_lock); 2095 CPUMEM_FOREACH(pc, &i, pp->pr_cache) 2096 pi->pr_nout += pc->pc_nout; 2097 2098 pi->pr_nout += pp->pr_cache_nout; 2099 pl_leave(pp, &pp->pr_cache_lock); 2100 } 2101 2102 int 2103 pool_cache_info(struct pool *pp, void *oldp, size_t *oldlenp) 2104 { 2105 struct kinfo_pool_cache kpc; 2106 2107 if (pp->pr_cache == NULL) 2108 return (EOPNOTSUPP); 2109 2110 memset(&kpc, 0, sizeof(kpc)); /* don't leak padding */ 2111 2112 pl_enter(pp, &pp->pr_cache_lock); 2113 kpc.pr_ngc = pp->pr_cache_ngc; 2114 kpc.pr_len = pp->pr_cache_items; 2115 kpc.pr_nitems = pp->pr_cache_nitems; 2116 kpc.pr_contention = pp->pr_cache_contention; 2117 pl_leave(pp, &pp->pr_cache_lock); 2118 2119 return (sysctl_rdstruct(oldp, oldlenp, NULL, &kpc, sizeof(kpc))); 2120 } 2121 2122 int 2123 pool_cache_cpus_info(struct pool *pp, void *oldp, size_t *oldlenp) 2124 { 2125 struct pool_cache *pc; 2126 struct kinfo_pool_cache_cpu *kpcc, *info; 2127 unsigned int cpu = 0; 2128 struct cpumem_iter i; 2129 int error = 0; 2130 size_t len; 2131 2132 if (pp->pr_cache == NULL) 2133 return (EOPNOTSUPP); 2134 if (*oldlenp % sizeof(*kpcc)) 2135 return (EINVAL); 2136 2137 kpcc = mallocarray(ncpusfound, sizeof(*kpcc), M_TEMP, 2138 M_WAITOK|M_CANFAIL|M_ZERO); 2139 if (kpcc == NULL) 2140 return (EIO); 2141 2142 len = ncpusfound * sizeof(*kpcc); 2143 2144 CPUMEM_FOREACH(pc, &i, pp->pr_cache) { 2145 uint64_t gen; 2146 2147 if (cpu >= ncpusfound) { 2148 error = EIO; 2149 goto err; 2150 } 2151 2152 info = &kpcc[cpu]; 2153 info->pr_cpu = cpu; 2154 2155 do { 2156 while ((gen = pc->pc_gen) & 1) 2157 yield(); 2158 2159 info->pr_nget = pc->pc_nget; 2160 info->pr_nfail = pc->pc_nfail; 2161 info->pr_nput = pc->pc_nput; 2162 info->pr_nlget = pc->pc_nlget; 2163 info->pr_nlfail = pc->pc_nlfail; 2164 info->pr_nlput = pc->pc_nlput; 2165 } while (gen != pc->pc_gen); 2166 2167 cpu++; 2168 } 2169 2170 error = sysctl_rdstruct(oldp, oldlenp, NULL, kpcc, len); 2171 err: 2172 free(kpcc, M_TEMP, len); 2173 2174 return (error); 2175 } 2176 #else /* MULTIPROCESSOR */ 2177 void 2178 pool_cache_init(struct pool *pp) 2179 { 2180 /* nop */ 2181 } 2182 2183 void 2184 pool_cache_pool_info(struct pool *pp, struct kinfo_pool *pi) 2185 { 2186 /* nop */ 2187 } 2188 2189 int 2190 pool_cache_info(struct pool *pp, void *oldp, size_t *oldlenp) 2191 { 2192 return (EOPNOTSUPP); 2193 } 2194 2195 int 2196 pool_cache_cpus_info(struct pool *pp, void *oldp, size_t *oldlenp) 2197 { 2198 return (EOPNOTSUPP); 2199 } 2200 #endif /* MULTIPROCESSOR */ 2201 2202 2203 void 2204 pool_lock_mtx_init(struct pool *pp, union pool_lock *lock, 2205 const struct lock_type *type) 2206 { 2207 _mtx_init_flags(&lock->prl_mtx, pp->pr_ipl, pp->pr_wchan, 0, type); 2208 } 2209 2210 void 2211 pool_lock_mtx_enter(union pool_lock *lock) 2212 { 2213 mtx_enter(&lock->prl_mtx); 2214 } 2215 2216 int 2217 pool_lock_mtx_enter_try(union pool_lock *lock) 2218 { 2219 return (mtx_enter_try(&lock->prl_mtx)); 2220 } 2221 2222 void 2223 pool_lock_mtx_leave(union pool_lock *lock) 2224 { 2225 mtx_leave(&lock->prl_mtx); 2226 } 2227 2228 void 2229 pool_lock_mtx_assert_locked(union pool_lock *lock) 2230 { 2231 MUTEX_ASSERT_LOCKED(&lock->prl_mtx); 2232 } 2233 2234 void 2235 pool_lock_mtx_assert_unlocked(union pool_lock *lock) 2236 { 2237 MUTEX_ASSERT_UNLOCKED(&lock->prl_mtx); 2238 } 2239 2240 int 2241 pool_lock_mtx_sleep(void *ident, union pool_lock *lock, int priority, 2242 const char *wmesg) 2243 { 2244 return msleep_nsec(ident, &lock->prl_mtx, priority, wmesg, INFSLP); 2245 } 2246 2247 static const struct pool_lock_ops pool_lock_ops_mtx = { 2248 pool_lock_mtx_init, 2249 pool_lock_mtx_enter, 2250 pool_lock_mtx_enter_try, 2251 pool_lock_mtx_leave, 2252 pool_lock_mtx_assert_locked, 2253 pool_lock_mtx_assert_unlocked, 2254 pool_lock_mtx_sleep, 2255 }; 2256 2257 void 2258 pool_lock_rw_init(struct pool *pp, union pool_lock *lock, 2259 const struct lock_type *type) 2260 { 2261 _rw_init_flags(&lock->prl_rwlock, pp->pr_wchan, 0, type); 2262 } 2263 2264 void 2265 pool_lock_rw_enter(union pool_lock *lock) 2266 { 2267 rw_enter_write(&lock->prl_rwlock); 2268 } 2269 2270 int 2271 pool_lock_rw_enter_try(union pool_lock *lock) 2272 { 2273 return (rw_enter(&lock->prl_rwlock, RW_WRITE | RW_NOSLEEP) == 0); 2274 } 2275 2276 void 2277 pool_lock_rw_leave(union pool_lock *lock) 2278 { 2279 rw_exit_write(&lock->prl_rwlock); 2280 } 2281 2282 void 2283 pool_lock_rw_assert_locked(union pool_lock *lock) 2284 { 2285 rw_assert_wrlock(&lock->prl_rwlock); 2286 } 2287 2288 void 2289 pool_lock_rw_assert_unlocked(union pool_lock *lock) 2290 { 2291 KASSERT(rw_status(&lock->prl_rwlock) != RW_WRITE); 2292 } 2293 2294 int 2295 pool_lock_rw_sleep(void *ident, union pool_lock *lock, int priority, 2296 const char *wmesg) 2297 { 2298 return rwsleep_nsec(ident, &lock->prl_rwlock, priority, wmesg, INFSLP); 2299 } 2300 2301 static const struct pool_lock_ops pool_lock_ops_rw = { 2302 pool_lock_rw_init, 2303 pool_lock_rw_enter, 2304 pool_lock_rw_enter_try, 2305 pool_lock_rw_leave, 2306 pool_lock_rw_assert_locked, 2307 pool_lock_rw_assert_unlocked, 2308 pool_lock_rw_sleep, 2309 }; 2310