1 /* $NetBSD: subr_pool.c,v 1.83 2002/11/24 11:37:57 scw Exp $ */ 2 3 /*- 4 * Copyright (c) 1997, 1999, 2000 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Paul Kranenburg; by Jason R. Thorpe of the Numerical Aerospace 9 * Simulation Facility, NASA Ames Research Center. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. All advertising materials mentioning features or use of this software 20 * must display the following acknowledgement: 21 * This product includes software developed by the NetBSD 22 * Foundation, Inc. and its contributors. 23 * 4. Neither the name of The NetBSD Foundation nor the names of its 24 * contributors may be used to endorse or promote products derived 25 * from this software without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 28 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 29 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 30 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 31 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 34 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 35 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 36 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 37 * POSSIBILITY OF SUCH DAMAGE. 38 */ 39 40 #include <sys/cdefs.h> 41 __KERNEL_RCSID(0, "$NetBSD: subr_pool.c,v 1.83 2002/11/24 11:37:57 scw Exp $"); 42 43 #include "opt_pool.h" 44 #include "opt_poollog.h" 45 #include "opt_lockdebug.h" 46 47 #include <sys/param.h> 48 #include <sys/systm.h> 49 #include <sys/proc.h> 50 #include <sys/errno.h> 51 #include <sys/kernel.h> 52 #include <sys/malloc.h> 53 #include <sys/lock.h> 54 #include <sys/pool.h> 55 #include <sys/syslog.h> 56 57 #include <uvm/uvm.h> 58 59 /* 60 * Pool resource management utility. 61 * 62 * Memory is allocated in pages which are split into pieces according 63 * to the pool item size. Each page is kept on a list headed by `pr_pagelist' 64 * in the pool structure and the individual pool items are on a linked list 65 * headed by `ph_itemlist' in each page header. The memory for building 66 * the page list is either taken from the allocated pages themselves (for 67 * small pool items) or taken from an internal pool of page headers (`phpool'). 68 */ 69 70 /* List of all pools */ 71 TAILQ_HEAD(,pool) pool_head = TAILQ_HEAD_INITIALIZER(pool_head); 72 73 /* Private pool for page header structures */ 74 static struct pool phpool; 75 76 #ifdef POOL_SUBPAGE 77 /* Pool of subpages for use by normal pools. */ 78 static struct pool psppool; 79 #endif 80 81 /* # of seconds to retain page after last use */ 82 int pool_inactive_time = 10; 83 84 /* Next candidate for drainage (see pool_drain()) */ 85 static struct pool *drainpp; 86 87 /* This spin lock protects both pool_head and drainpp. */ 88 struct simplelock pool_head_slock = SIMPLELOCK_INITIALIZER; 89 90 struct pool_item_header { 91 /* Page headers */ 92 TAILQ_ENTRY(pool_item_header) 93 ph_pagelist; /* pool page list */ 94 TAILQ_HEAD(,pool_item) ph_itemlist; /* chunk list for this page */ 95 LIST_ENTRY(pool_item_header) 96 ph_hashlist; /* Off-page page headers */ 97 unsigned int ph_nmissing; /* # of chunks in use */ 98 caddr_t ph_page; /* this page's address */ 99 struct timeval ph_time; /* last referenced */ 100 }; 101 TAILQ_HEAD(pool_pagelist,pool_item_header); 102 103 struct pool_item { 104 #ifdef DIAGNOSTIC 105 u_int pi_magic; 106 #endif 107 #define PI_MAGIC 0xdeadbeefU 108 /* Other entries use only this list entry */ 109 TAILQ_ENTRY(pool_item) pi_list; 110 }; 111 112 #define PR_HASH_INDEX(pp,addr) \ 113 (((u_long)(addr) >> (pp)->pr_alloc->pa_pageshift) & \ 114 (PR_HASHTABSIZE - 1)) 115 116 #define POOL_NEEDS_CATCHUP(pp) \ 117 ((pp)->pr_nitems < (pp)->pr_minitems) 118 119 /* 120 * Pool cache management. 121 * 122 * Pool caches provide a way for constructed objects to be cached by the 123 * pool subsystem. This can lead to performance improvements by avoiding 124 * needless object construction/destruction; it is deferred until absolutely 125 * necessary. 126 * 127 * Caches are grouped into cache groups. Each cache group references 128 * up to 16 constructed objects. When a cache allocates an object 129 * from the pool, it calls the object's constructor and places it into 130 * a cache group. When a cache group frees an object back to the pool, 131 * it first calls the object's destructor. This allows the object to 132 * persist in constructed form while freed to the cache. 133 * 134 * Multiple caches may exist for each pool. This allows a single 135 * object type to have multiple constructed forms. The pool references 136 * each cache, so that when a pool is drained by the pagedaemon, it can 137 * drain each individual cache as well. Each time a cache is drained, 138 * the most idle cache group is freed to the pool in its entirety. 139 * 140 * Pool caches are layed on top of pools. By layering them, we can avoid 141 * the complexity of cache management for pools which would not benefit 142 * from it. 143 */ 144 145 /* The cache group pool. */ 146 static struct pool pcgpool; 147 148 static void pool_cache_reclaim(struct pool_cache *); 149 150 static int pool_catchup(struct pool *); 151 static void pool_prime_page(struct pool *, caddr_t, 152 struct pool_item_header *); 153 154 void *pool_allocator_alloc(struct pool *, int); 155 void pool_allocator_free(struct pool *, void *); 156 157 static void pool_print1(struct pool *, const char *, 158 void (*)(const char *, ...)); 159 160 /* 161 * Pool log entry. An array of these is allocated in pool_init(). 162 */ 163 struct pool_log { 164 const char *pl_file; 165 long pl_line; 166 int pl_action; 167 #define PRLOG_GET 1 168 #define PRLOG_PUT 2 169 void *pl_addr; 170 }; 171 172 /* Number of entries in pool log buffers */ 173 #ifndef POOL_LOGSIZE 174 #define POOL_LOGSIZE 10 175 #endif 176 177 int pool_logsize = POOL_LOGSIZE; 178 179 #ifdef POOL_DIAGNOSTIC 180 static __inline void 181 pr_log(struct pool *pp, void *v, int action, const char *file, long line) 182 { 183 int n = pp->pr_curlogentry; 184 struct pool_log *pl; 185 186 if ((pp->pr_roflags & PR_LOGGING) == 0) 187 return; 188 189 /* 190 * Fill in the current entry. Wrap around and overwrite 191 * the oldest entry if necessary. 192 */ 193 pl = &pp->pr_log[n]; 194 pl->pl_file = file; 195 pl->pl_line = line; 196 pl->pl_action = action; 197 pl->pl_addr = v; 198 if (++n >= pp->pr_logsize) 199 n = 0; 200 pp->pr_curlogentry = n; 201 } 202 203 static void 204 pr_printlog(struct pool *pp, struct pool_item *pi, 205 void (*pr)(const char *, ...)) 206 { 207 int i = pp->pr_logsize; 208 int n = pp->pr_curlogentry; 209 210 if ((pp->pr_roflags & PR_LOGGING) == 0) 211 return; 212 213 /* 214 * Print all entries in this pool's log. 215 */ 216 while (i-- > 0) { 217 struct pool_log *pl = &pp->pr_log[n]; 218 if (pl->pl_action != 0) { 219 if (pi == NULL || pi == pl->pl_addr) { 220 (*pr)("\tlog entry %d:\n", i); 221 (*pr)("\t\taction = %s, addr = %p\n", 222 pl->pl_action == PRLOG_GET ? "get" : "put", 223 pl->pl_addr); 224 (*pr)("\t\tfile: %s at line %lu\n", 225 pl->pl_file, pl->pl_line); 226 } 227 } 228 if (++n >= pp->pr_logsize) 229 n = 0; 230 } 231 } 232 233 static __inline void 234 pr_enter(struct pool *pp, const char *file, long line) 235 { 236 237 if (__predict_false(pp->pr_entered_file != NULL)) { 238 printf("pool %s: reentrancy at file %s line %ld\n", 239 pp->pr_wchan, file, line); 240 printf(" previous entry at file %s line %ld\n", 241 pp->pr_entered_file, pp->pr_entered_line); 242 panic("pr_enter"); 243 } 244 245 pp->pr_entered_file = file; 246 pp->pr_entered_line = line; 247 } 248 249 static __inline void 250 pr_leave(struct pool *pp) 251 { 252 253 if (__predict_false(pp->pr_entered_file == NULL)) { 254 printf("pool %s not entered?\n", pp->pr_wchan); 255 panic("pr_leave"); 256 } 257 258 pp->pr_entered_file = NULL; 259 pp->pr_entered_line = 0; 260 } 261 262 static __inline void 263 pr_enter_check(struct pool *pp, void (*pr)(const char *, ...)) 264 { 265 266 if (pp->pr_entered_file != NULL) 267 (*pr)("\n\tcurrently entered from file %s line %ld\n", 268 pp->pr_entered_file, pp->pr_entered_line); 269 } 270 #else 271 #define pr_log(pp, v, action, file, line) 272 #define pr_printlog(pp, pi, pr) 273 #define pr_enter(pp, file, line) 274 #define pr_leave(pp) 275 #define pr_enter_check(pp, pr) 276 #endif /* POOL_DIAGNOSTIC */ 277 278 /* 279 * Return the pool page header based on page address. 280 */ 281 static __inline struct pool_item_header * 282 pr_find_pagehead(struct pool *pp, caddr_t page) 283 { 284 struct pool_item_header *ph; 285 286 if ((pp->pr_roflags & PR_PHINPAGE) != 0) 287 return ((struct pool_item_header *)(page + pp->pr_phoffset)); 288 289 for (ph = LIST_FIRST(&pp->pr_hashtab[PR_HASH_INDEX(pp, page)]); 290 ph != NULL; 291 ph = LIST_NEXT(ph, ph_hashlist)) { 292 if (ph->ph_page == page) 293 return (ph); 294 } 295 return (NULL); 296 } 297 298 /* 299 * Remove a page from the pool. 300 */ 301 static __inline void 302 pr_rmpage(struct pool *pp, struct pool_item_header *ph, 303 struct pool_pagelist *pq) 304 { 305 int s; 306 307 /* 308 * If the page was idle, decrement the idle page count. 309 */ 310 if (ph->ph_nmissing == 0) { 311 #ifdef DIAGNOSTIC 312 if (pp->pr_nidle == 0) 313 panic("pr_rmpage: nidle inconsistent"); 314 if (pp->pr_nitems < pp->pr_itemsperpage) 315 panic("pr_rmpage: nitems inconsistent"); 316 #endif 317 pp->pr_nidle--; 318 } 319 320 pp->pr_nitems -= pp->pr_itemsperpage; 321 322 /* 323 * Unlink a page from the pool and release it (or queue it for release). 324 */ 325 TAILQ_REMOVE(&pp->pr_pagelist, ph, ph_pagelist); 326 if (pq) { 327 TAILQ_INSERT_HEAD(pq, ph, ph_pagelist); 328 } else { 329 pool_allocator_free(pp, ph->ph_page); 330 if ((pp->pr_roflags & PR_PHINPAGE) == 0) { 331 LIST_REMOVE(ph, ph_hashlist); 332 s = splhigh(); 333 pool_put(&phpool, ph); 334 splx(s); 335 } 336 } 337 pp->pr_npages--; 338 pp->pr_npagefree++; 339 340 if (pp->pr_curpage == ph) { 341 /* 342 * Find a new non-empty page header, if any. 343 * Start search from the page head, to increase the 344 * chance for "high water" pages to be freed. 345 */ 346 TAILQ_FOREACH(ph, &pp->pr_pagelist, ph_pagelist) 347 if (TAILQ_FIRST(&ph->ph_itemlist) != NULL) 348 break; 349 350 pp->pr_curpage = ph; 351 } 352 } 353 354 /* 355 * Initialize the given pool resource structure. 356 * 357 * We export this routine to allow other kernel parts to declare 358 * static pools that must be initialized before malloc() is available. 359 */ 360 void 361 pool_init(struct pool *pp, size_t size, u_int align, u_int ioff, int flags, 362 const char *wchan, struct pool_allocator *palloc) 363 { 364 int off, slack, i; 365 366 #ifdef POOL_DIAGNOSTIC 367 /* 368 * Always log if POOL_DIAGNOSTIC is defined. 369 */ 370 if (pool_logsize != 0) 371 flags |= PR_LOGGING; 372 #endif 373 374 #ifdef POOL_SUBPAGE 375 /* 376 * XXX We don't provide a real `nointr' back-end 377 * yet; all sub-pages come from a kmem back-end. 378 * maybe some day... 379 */ 380 if (palloc == NULL) { 381 extern struct pool_allocator pool_allocator_kmem_subpage; 382 palloc = &pool_allocator_kmem_subpage; 383 } 384 /* 385 * We'll assume any user-specified back-end allocator 386 * will deal with sub-pages, or simply don't care. 387 */ 388 #else 389 if (palloc == NULL) 390 palloc = &pool_allocator_kmem; 391 #endif /* POOL_SUBPAGE */ 392 if ((palloc->pa_flags & PA_INITIALIZED) == 0) { 393 if (palloc->pa_pagesz == 0) { 394 #ifdef POOL_SUBPAGE 395 if (palloc == &pool_allocator_kmem) 396 palloc->pa_pagesz = PAGE_SIZE; 397 else 398 palloc->pa_pagesz = POOL_SUBPAGE; 399 #else 400 palloc->pa_pagesz = PAGE_SIZE; 401 #endif /* POOL_SUBPAGE */ 402 } 403 404 TAILQ_INIT(&palloc->pa_list); 405 406 simple_lock_init(&palloc->pa_slock); 407 palloc->pa_pagemask = ~(palloc->pa_pagesz - 1); 408 palloc->pa_pageshift = ffs(palloc->pa_pagesz) - 1; 409 palloc->pa_flags |= PA_INITIALIZED; 410 } 411 412 if (align == 0) 413 align = ALIGN(1); 414 415 if (size < sizeof(struct pool_item)) 416 size = sizeof(struct pool_item); 417 418 size = roundup(size, align); 419 #ifdef DIAGNOSTIC 420 if (size > palloc->pa_pagesz) 421 panic("pool_init: pool item size (%lu) too large", 422 (u_long)size); 423 #endif 424 425 /* 426 * Initialize the pool structure. 427 */ 428 TAILQ_INIT(&pp->pr_pagelist); 429 TAILQ_INIT(&pp->pr_cachelist); 430 pp->pr_curpage = NULL; 431 pp->pr_npages = 0; 432 pp->pr_minitems = 0; 433 pp->pr_minpages = 0; 434 pp->pr_maxpages = UINT_MAX; 435 pp->pr_roflags = flags; 436 pp->pr_flags = 0; 437 pp->pr_size = size; 438 pp->pr_align = align; 439 pp->pr_wchan = wchan; 440 pp->pr_alloc = palloc; 441 pp->pr_nitems = 0; 442 pp->pr_nout = 0; 443 pp->pr_hardlimit = UINT_MAX; 444 pp->pr_hardlimit_warning = NULL; 445 pp->pr_hardlimit_ratecap.tv_sec = 0; 446 pp->pr_hardlimit_ratecap.tv_usec = 0; 447 pp->pr_hardlimit_warning_last.tv_sec = 0; 448 pp->pr_hardlimit_warning_last.tv_usec = 0; 449 pp->pr_drain_hook = NULL; 450 pp->pr_drain_hook_arg = NULL; 451 452 /* 453 * Decide whether to put the page header off page to avoid 454 * wasting too large a part of the page. Off-page page headers 455 * go on a hash table, so we can match a returned item 456 * with its header based on the page address. 457 * We use 1/16 of the page size as the threshold (XXX: tune) 458 */ 459 if (pp->pr_size < palloc->pa_pagesz/16) { 460 /* Use the end of the page for the page header */ 461 pp->pr_roflags |= PR_PHINPAGE; 462 pp->pr_phoffset = off = palloc->pa_pagesz - 463 ALIGN(sizeof(struct pool_item_header)); 464 } else { 465 /* The page header will be taken from our page header pool */ 466 pp->pr_phoffset = 0; 467 off = palloc->pa_pagesz; 468 for (i = 0; i < PR_HASHTABSIZE; i++) { 469 LIST_INIT(&pp->pr_hashtab[i]); 470 } 471 } 472 473 /* 474 * Alignment is to take place at `ioff' within the item. This means 475 * we must reserve up to `align - 1' bytes on the page to allow 476 * appropriate positioning of each item. 477 * 478 * Silently enforce `0 <= ioff < align'. 479 */ 480 pp->pr_itemoffset = ioff = ioff % align; 481 pp->pr_itemsperpage = (off - ((align - ioff) % align)) / pp->pr_size; 482 KASSERT(pp->pr_itemsperpage != 0); 483 484 /* 485 * Use the slack between the chunks and the page header 486 * for "cache coloring". 487 */ 488 slack = off - pp->pr_itemsperpage * pp->pr_size; 489 pp->pr_maxcolor = (slack / align) * align; 490 pp->pr_curcolor = 0; 491 492 pp->pr_nget = 0; 493 pp->pr_nfail = 0; 494 pp->pr_nput = 0; 495 pp->pr_npagealloc = 0; 496 pp->pr_npagefree = 0; 497 pp->pr_hiwat = 0; 498 pp->pr_nidle = 0; 499 500 #ifdef POOL_DIAGNOSTIC 501 if (flags & PR_LOGGING) { 502 if (kmem_map == NULL || 503 (pp->pr_log = malloc(pool_logsize * sizeof(struct pool_log), 504 M_TEMP, M_NOWAIT)) == NULL) 505 pp->pr_roflags &= ~PR_LOGGING; 506 pp->pr_curlogentry = 0; 507 pp->pr_logsize = pool_logsize; 508 } 509 #endif 510 511 pp->pr_entered_file = NULL; 512 pp->pr_entered_line = 0; 513 514 simple_lock_init(&pp->pr_slock); 515 516 /* 517 * Initialize private page header pool and cache magazine pool if we 518 * haven't done so yet. 519 * XXX LOCKING. 520 */ 521 if (phpool.pr_size == 0) { 522 #ifdef POOL_SUBPAGE 523 pool_init(&phpool, sizeof(struct pool_item_header), 0, 0, 0, 524 "phpool", &pool_allocator_kmem); 525 pool_init(&psppool, POOL_SUBPAGE, POOL_SUBPAGE, 0, 526 PR_RECURSIVE, "psppool", &pool_allocator_kmem); 527 #else 528 pool_init(&phpool, sizeof(struct pool_item_header), 0, 0, 529 0, "phpool", NULL); 530 #endif 531 pool_init(&pcgpool, sizeof(struct pool_cache_group), 0, 0, 532 0, "pcgpool", NULL); 533 } 534 535 /* Insert into the list of all pools. */ 536 simple_lock(&pool_head_slock); 537 TAILQ_INSERT_TAIL(&pool_head, pp, pr_poollist); 538 simple_unlock(&pool_head_slock); 539 540 /* Insert this into the list of pools using this allocator. */ 541 simple_lock(&palloc->pa_slock); 542 TAILQ_INSERT_TAIL(&palloc->pa_list, pp, pr_alloc_list); 543 simple_unlock(&palloc->pa_slock); 544 } 545 546 /* 547 * De-commision a pool resource. 548 */ 549 void 550 pool_destroy(struct pool *pp) 551 { 552 struct pool_item_header *ph; 553 struct pool_cache *pc; 554 555 /* Locking order: pool_allocator -> pool */ 556 simple_lock(&pp->pr_alloc->pa_slock); 557 TAILQ_REMOVE(&pp->pr_alloc->pa_list, pp, pr_alloc_list); 558 simple_unlock(&pp->pr_alloc->pa_slock); 559 560 /* Destroy all caches for this pool. */ 561 while ((pc = TAILQ_FIRST(&pp->pr_cachelist)) != NULL) 562 pool_cache_destroy(pc); 563 564 #ifdef DIAGNOSTIC 565 if (pp->pr_nout != 0) { 566 pr_printlog(pp, NULL, printf); 567 panic("pool_destroy: pool busy: still out: %u", 568 pp->pr_nout); 569 } 570 #endif 571 572 /* Remove all pages */ 573 while ((ph = TAILQ_FIRST(&pp->pr_pagelist)) != NULL) 574 pr_rmpage(pp, ph, NULL); 575 576 /* Remove from global pool list */ 577 simple_lock(&pool_head_slock); 578 TAILQ_REMOVE(&pool_head, pp, pr_poollist); 579 if (drainpp == pp) { 580 drainpp = NULL; 581 } 582 simple_unlock(&pool_head_slock); 583 584 #ifdef POOL_DIAGNOSTIC 585 if ((pp->pr_roflags & PR_LOGGING) != 0) 586 free(pp->pr_log, M_TEMP); 587 #endif 588 } 589 590 void 591 pool_set_drain_hook(struct pool *pp, void (*fn)(void *, int), void *arg) 592 { 593 594 /* XXX no locking -- must be used just after pool_init() */ 595 #ifdef DIAGNOSTIC 596 if (pp->pr_drain_hook != NULL) 597 panic("pool_set_drain_hook(%s): already set", pp->pr_wchan); 598 #endif 599 pp->pr_drain_hook = fn; 600 pp->pr_drain_hook_arg = arg; 601 } 602 603 static __inline struct pool_item_header * 604 pool_alloc_item_header(struct pool *pp, caddr_t storage, int flags) 605 { 606 struct pool_item_header *ph; 607 int s; 608 609 LOCK_ASSERT(simple_lock_held(&pp->pr_slock) == 0); 610 611 if ((pp->pr_roflags & PR_PHINPAGE) != 0) 612 ph = (struct pool_item_header *) (storage + pp->pr_phoffset); 613 else { 614 s = splhigh(); 615 ph = pool_get(&phpool, flags); 616 splx(s); 617 } 618 619 return (ph); 620 } 621 622 /* 623 * Grab an item from the pool; must be called at appropriate spl level 624 */ 625 void * 626 #ifdef POOL_DIAGNOSTIC 627 _pool_get(struct pool *pp, int flags, const char *file, long line) 628 #else 629 pool_get(struct pool *pp, int flags) 630 #endif 631 { 632 struct pool_item *pi; 633 struct pool_item_header *ph; 634 void *v; 635 636 #ifdef DIAGNOSTIC 637 if (__predict_false(curproc == NULL && doing_shutdown == 0 && 638 (flags & PR_WAITOK) != 0)) 639 panic("pool_get: %s: must have NOWAIT", pp->pr_wchan); 640 641 #ifdef LOCKDEBUG 642 if (flags & PR_WAITOK) 643 simple_lock_only_held(NULL, "pool_get(PR_WAITOK)"); 644 #endif 645 #endif /* DIAGNOSTIC */ 646 647 simple_lock(&pp->pr_slock); 648 pr_enter(pp, file, line); 649 650 startover: 651 /* 652 * Check to see if we've reached the hard limit. If we have, 653 * and we can wait, then wait until an item has been returned to 654 * the pool. 655 */ 656 #ifdef DIAGNOSTIC 657 if (__predict_false(pp->pr_nout > pp->pr_hardlimit)) { 658 pr_leave(pp); 659 simple_unlock(&pp->pr_slock); 660 panic("pool_get: %s: crossed hard limit", pp->pr_wchan); 661 } 662 #endif 663 if (__predict_false(pp->pr_nout == pp->pr_hardlimit)) { 664 if (pp->pr_drain_hook != NULL) { 665 /* 666 * Since the drain hook is going to free things 667 * back to the pool, unlock, call the hook, re-lock, 668 * and check the hardlimit condition again. 669 */ 670 pr_leave(pp); 671 simple_unlock(&pp->pr_slock); 672 (*pp->pr_drain_hook)(pp->pr_drain_hook_arg, flags); 673 simple_lock(&pp->pr_slock); 674 pr_enter(pp, file, line); 675 if (pp->pr_nout < pp->pr_hardlimit) 676 goto startover; 677 } 678 679 if ((flags & PR_WAITOK) && !(flags & PR_LIMITFAIL)) { 680 /* 681 * XXX: A warning isn't logged in this case. Should 682 * it be? 683 */ 684 pp->pr_flags |= PR_WANTED; 685 pr_leave(pp); 686 ltsleep(pp, PSWP, pp->pr_wchan, 0, &pp->pr_slock); 687 pr_enter(pp, file, line); 688 goto startover; 689 } 690 691 /* 692 * Log a message that the hard limit has been hit. 693 */ 694 if (pp->pr_hardlimit_warning != NULL && 695 ratecheck(&pp->pr_hardlimit_warning_last, 696 &pp->pr_hardlimit_ratecap)) 697 log(LOG_ERR, "%s\n", pp->pr_hardlimit_warning); 698 699 pp->pr_nfail++; 700 701 pr_leave(pp); 702 simple_unlock(&pp->pr_slock); 703 return (NULL); 704 } 705 706 /* 707 * The convention we use is that if `curpage' is not NULL, then 708 * it points at a non-empty bucket. In particular, `curpage' 709 * never points at a page header which has PR_PHINPAGE set and 710 * has no items in its bucket. 711 */ 712 if ((ph = pp->pr_curpage) == NULL) { 713 #ifdef DIAGNOSTIC 714 if (pp->pr_nitems != 0) { 715 simple_unlock(&pp->pr_slock); 716 printf("pool_get: %s: curpage NULL, nitems %u\n", 717 pp->pr_wchan, pp->pr_nitems); 718 panic("pool_get: nitems inconsistent"); 719 } 720 #endif 721 722 /* 723 * Call the back-end page allocator for more memory. 724 * Release the pool lock, as the back-end page allocator 725 * may block. 726 */ 727 pr_leave(pp); 728 simple_unlock(&pp->pr_slock); 729 v = pool_allocator_alloc(pp, flags); 730 if (__predict_true(v != NULL)) 731 ph = pool_alloc_item_header(pp, v, flags); 732 simple_lock(&pp->pr_slock); 733 pr_enter(pp, file, line); 734 735 if (__predict_false(v == NULL || ph == NULL)) { 736 if (v != NULL) 737 pool_allocator_free(pp, v); 738 739 /* 740 * We were unable to allocate a page or item 741 * header, but we released the lock during 742 * allocation, so perhaps items were freed 743 * back to the pool. Check for this case. 744 */ 745 if (pp->pr_curpage != NULL) 746 goto startover; 747 748 if ((flags & PR_WAITOK) == 0) { 749 pp->pr_nfail++; 750 pr_leave(pp); 751 simple_unlock(&pp->pr_slock); 752 return (NULL); 753 } 754 755 /* 756 * Wait for items to be returned to this pool. 757 * 758 * XXX: maybe we should wake up once a second and 759 * try again? 760 */ 761 pp->pr_flags |= PR_WANTED; 762 /* PA_WANTED is already set on the allocator. */ 763 pr_leave(pp); 764 ltsleep(pp, PSWP, pp->pr_wchan, 0, &pp->pr_slock); 765 pr_enter(pp, file, line); 766 goto startover; 767 } 768 769 /* We have more memory; add it to the pool */ 770 pool_prime_page(pp, v, ph); 771 pp->pr_npagealloc++; 772 773 /* Start the allocation process over. */ 774 goto startover; 775 } 776 777 if (__predict_false((v = pi = TAILQ_FIRST(&ph->ph_itemlist)) == NULL)) { 778 pr_leave(pp); 779 simple_unlock(&pp->pr_slock); 780 panic("pool_get: %s: page empty", pp->pr_wchan); 781 } 782 #ifdef DIAGNOSTIC 783 if (__predict_false(pp->pr_nitems == 0)) { 784 pr_leave(pp); 785 simple_unlock(&pp->pr_slock); 786 printf("pool_get: %s: items on itemlist, nitems %u\n", 787 pp->pr_wchan, pp->pr_nitems); 788 panic("pool_get: nitems inconsistent"); 789 } 790 #endif 791 792 #ifdef POOL_DIAGNOSTIC 793 pr_log(pp, v, PRLOG_GET, file, line); 794 #endif 795 796 #ifdef DIAGNOSTIC 797 if (__predict_false(pi->pi_magic != PI_MAGIC)) { 798 pr_printlog(pp, pi, printf); 799 panic("pool_get(%s): free list modified: magic=%x; page %p;" 800 " item addr %p\n", 801 pp->pr_wchan, pi->pi_magic, ph->ph_page, pi); 802 } 803 #endif 804 805 /* 806 * Remove from item list. 807 */ 808 TAILQ_REMOVE(&ph->ph_itemlist, pi, pi_list); 809 pp->pr_nitems--; 810 pp->pr_nout++; 811 if (ph->ph_nmissing == 0) { 812 #ifdef DIAGNOSTIC 813 if (__predict_false(pp->pr_nidle == 0)) 814 panic("pool_get: nidle inconsistent"); 815 #endif 816 pp->pr_nidle--; 817 } 818 ph->ph_nmissing++; 819 if (TAILQ_FIRST(&ph->ph_itemlist) == NULL) { 820 #ifdef DIAGNOSTIC 821 if (__predict_false(ph->ph_nmissing != pp->pr_itemsperpage)) { 822 pr_leave(pp); 823 simple_unlock(&pp->pr_slock); 824 panic("pool_get: %s: nmissing inconsistent", 825 pp->pr_wchan); 826 } 827 #endif 828 /* 829 * Find a new non-empty page header, if any. 830 * Start search from the page head, to increase 831 * the chance for "high water" pages to be freed. 832 * 833 * Migrate empty pages to the end of the list. This 834 * will speed the update of curpage as pages become 835 * idle. Empty pages intermingled with idle pages 836 * is no big deal. As soon as a page becomes un-empty, 837 * it will move back to the head of the list. 838 */ 839 TAILQ_REMOVE(&pp->pr_pagelist, ph, ph_pagelist); 840 TAILQ_INSERT_TAIL(&pp->pr_pagelist, ph, ph_pagelist); 841 TAILQ_FOREACH(ph, &pp->pr_pagelist, ph_pagelist) 842 if (TAILQ_FIRST(&ph->ph_itemlist) != NULL) 843 break; 844 845 pp->pr_curpage = ph; 846 } 847 848 pp->pr_nget++; 849 850 /* 851 * If we have a low water mark and we are now below that low 852 * water mark, add more items to the pool. 853 */ 854 if (POOL_NEEDS_CATCHUP(pp) && pool_catchup(pp) != 0) { 855 /* 856 * XXX: Should we log a warning? Should we set up a timeout 857 * to try again in a second or so? The latter could break 858 * a caller's assumptions about interrupt protection, etc. 859 */ 860 } 861 862 pr_leave(pp); 863 simple_unlock(&pp->pr_slock); 864 return (v); 865 } 866 867 /* 868 * Internal version of pool_put(). Pool is already locked/entered. 869 */ 870 static void 871 pool_do_put(struct pool *pp, void *v) 872 { 873 struct pool_item *pi = v; 874 struct pool_item_header *ph; 875 caddr_t page; 876 int s; 877 878 LOCK_ASSERT(simple_lock_held(&pp->pr_slock)); 879 880 page = (caddr_t)((u_long)v & pp->pr_alloc->pa_pagemask); 881 882 #ifdef DIAGNOSTIC 883 if (__predict_false(pp->pr_nout == 0)) { 884 printf("pool %s: putting with none out\n", 885 pp->pr_wchan); 886 panic("pool_put"); 887 } 888 #endif 889 890 if (__predict_false((ph = pr_find_pagehead(pp, page)) == NULL)) { 891 pr_printlog(pp, NULL, printf); 892 panic("pool_put: %s: page header missing", pp->pr_wchan); 893 } 894 895 #ifdef LOCKDEBUG 896 /* 897 * Check if we're freeing a locked simple lock. 898 */ 899 simple_lock_freecheck((caddr_t)pi, ((caddr_t)pi) + pp->pr_size); 900 #endif 901 902 /* 903 * Return to item list. 904 */ 905 #ifdef DIAGNOSTIC 906 pi->pi_magic = PI_MAGIC; 907 #endif 908 #ifdef DEBUG 909 { 910 int i, *ip = v; 911 912 for (i = 0; i < pp->pr_size / sizeof(int); i++) { 913 *ip++ = PI_MAGIC; 914 } 915 } 916 #endif 917 918 TAILQ_INSERT_HEAD(&ph->ph_itemlist, pi, pi_list); 919 KDASSERT(ph->ph_nmissing != 0); 920 ph->ph_nmissing--; 921 pp->pr_nput++; 922 pp->pr_nitems++; 923 pp->pr_nout--; 924 925 /* Cancel "pool empty" condition if it exists */ 926 if (pp->pr_curpage == NULL) 927 pp->pr_curpage = ph; 928 929 if (pp->pr_flags & PR_WANTED) { 930 pp->pr_flags &= ~PR_WANTED; 931 if (ph->ph_nmissing == 0) 932 pp->pr_nidle++; 933 wakeup((caddr_t)pp); 934 return; 935 } 936 937 /* 938 * If this page is now complete, do one of two things: 939 * 940 * (1) If we have more pages than the page high water 941 * mark, free the page back to the system. 942 * 943 * (2) Move it to the end of the page list, so that 944 * we minimize our chances of fragmenting the 945 * pool. Idle pages migrate to the end (along with 946 * completely empty pages, so that we find un-empty 947 * pages more quickly when we update curpage) of the 948 * list so they can be more easily swept up by 949 * the pagedaemon when pages are scarce. 950 */ 951 if (ph->ph_nmissing == 0) { 952 pp->pr_nidle++; 953 if (pp->pr_npages > pp->pr_maxpages || 954 (pp->pr_alloc->pa_flags & PA_WANT) != 0) { 955 pr_rmpage(pp, ph, NULL); 956 } else { 957 TAILQ_REMOVE(&pp->pr_pagelist, ph, ph_pagelist); 958 TAILQ_INSERT_TAIL(&pp->pr_pagelist, ph, ph_pagelist); 959 960 /* 961 * Update the timestamp on the page. A page must 962 * be idle for some period of time before it can 963 * be reclaimed by the pagedaemon. This minimizes 964 * ping-pong'ing for memory. 965 */ 966 s = splclock(); 967 ph->ph_time = mono_time; 968 splx(s); 969 970 /* 971 * Update the current page pointer. Just look for 972 * the first page with any free items. 973 * 974 * XXX: Maybe we want an option to look for the 975 * page with the fewest available items, to minimize 976 * fragmentation? 977 */ 978 TAILQ_FOREACH(ph, &pp->pr_pagelist, ph_pagelist) 979 if (TAILQ_FIRST(&ph->ph_itemlist) != NULL) 980 break; 981 982 pp->pr_curpage = ph; 983 } 984 } 985 /* 986 * If the page has just become un-empty, move it to the head of 987 * the list, and make it the current page. The next allocation 988 * will get the item from this page, instead of further fragmenting 989 * the pool. 990 */ 991 else if (ph->ph_nmissing == (pp->pr_itemsperpage - 1)) { 992 TAILQ_REMOVE(&pp->pr_pagelist, ph, ph_pagelist); 993 TAILQ_INSERT_HEAD(&pp->pr_pagelist, ph, ph_pagelist); 994 pp->pr_curpage = ph; 995 } 996 } 997 998 /* 999 * Return resource to the pool; must be called at appropriate spl level 1000 */ 1001 #ifdef POOL_DIAGNOSTIC 1002 void 1003 _pool_put(struct pool *pp, void *v, const char *file, long line) 1004 { 1005 1006 simple_lock(&pp->pr_slock); 1007 pr_enter(pp, file, line); 1008 1009 pr_log(pp, v, PRLOG_PUT, file, line); 1010 1011 pool_do_put(pp, v); 1012 1013 pr_leave(pp); 1014 simple_unlock(&pp->pr_slock); 1015 } 1016 #undef pool_put 1017 #endif /* POOL_DIAGNOSTIC */ 1018 1019 void 1020 pool_put(struct pool *pp, void *v) 1021 { 1022 1023 simple_lock(&pp->pr_slock); 1024 1025 pool_do_put(pp, v); 1026 1027 simple_unlock(&pp->pr_slock); 1028 } 1029 1030 #ifdef POOL_DIAGNOSTIC 1031 #define pool_put(h, v) _pool_put((h), (v), __FILE__, __LINE__) 1032 #endif 1033 1034 /* 1035 * Add N items to the pool. 1036 */ 1037 int 1038 pool_prime(struct pool *pp, int n) 1039 { 1040 struct pool_item_header *ph = NULL; 1041 caddr_t cp; 1042 int newpages; 1043 1044 simple_lock(&pp->pr_slock); 1045 1046 newpages = roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 1047 1048 while (newpages-- > 0) { 1049 simple_unlock(&pp->pr_slock); 1050 cp = pool_allocator_alloc(pp, PR_NOWAIT); 1051 if (__predict_true(cp != NULL)) 1052 ph = pool_alloc_item_header(pp, cp, PR_NOWAIT); 1053 simple_lock(&pp->pr_slock); 1054 1055 if (__predict_false(cp == NULL || ph == NULL)) { 1056 if (cp != NULL) 1057 pool_allocator_free(pp, cp); 1058 break; 1059 } 1060 1061 pool_prime_page(pp, cp, ph); 1062 pp->pr_npagealloc++; 1063 pp->pr_minpages++; 1064 } 1065 1066 if (pp->pr_minpages >= pp->pr_maxpages) 1067 pp->pr_maxpages = pp->pr_minpages + 1; /* XXX */ 1068 1069 simple_unlock(&pp->pr_slock); 1070 return (0); 1071 } 1072 1073 /* 1074 * Add a page worth of items to the pool. 1075 * 1076 * Note, we must be called with the pool descriptor LOCKED. 1077 */ 1078 static void 1079 pool_prime_page(struct pool *pp, caddr_t storage, struct pool_item_header *ph) 1080 { 1081 struct pool_item *pi; 1082 caddr_t cp = storage; 1083 unsigned int align = pp->pr_align; 1084 unsigned int ioff = pp->pr_itemoffset; 1085 int n; 1086 1087 #ifdef DIAGNOSTIC 1088 if (((u_long)cp & (pp->pr_alloc->pa_pagesz - 1)) != 0) 1089 panic("pool_prime_page: %s: unaligned page", pp->pr_wchan); 1090 #endif 1091 1092 if ((pp->pr_roflags & PR_PHINPAGE) == 0) 1093 LIST_INSERT_HEAD(&pp->pr_hashtab[PR_HASH_INDEX(pp, cp)], 1094 ph, ph_hashlist); 1095 1096 /* 1097 * Insert page header. 1098 */ 1099 TAILQ_INSERT_HEAD(&pp->pr_pagelist, ph, ph_pagelist); 1100 TAILQ_INIT(&ph->ph_itemlist); 1101 ph->ph_page = storage; 1102 ph->ph_nmissing = 0; 1103 memset(&ph->ph_time, 0, sizeof(ph->ph_time)); 1104 1105 pp->pr_nidle++; 1106 1107 /* 1108 * Color this page. 1109 */ 1110 cp = (caddr_t)(cp + pp->pr_curcolor); 1111 if ((pp->pr_curcolor += align) > pp->pr_maxcolor) 1112 pp->pr_curcolor = 0; 1113 1114 /* 1115 * Adjust storage to apply aligment to `pr_itemoffset' in each item. 1116 */ 1117 if (ioff != 0) 1118 cp = (caddr_t)(cp + (align - ioff)); 1119 1120 /* 1121 * Insert remaining chunks on the bucket list. 1122 */ 1123 n = pp->pr_itemsperpage; 1124 pp->pr_nitems += n; 1125 1126 while (n--) { 1127 pi = (struct pool_item *)cp; 1128 1129 KASSERT(((((vaddr_t)pi) + ioff) & (align - 1)) == 0); 1130 1131 /* Insert on page list */ 1132 TAILQ_INSERT_TAIL(&ph->ph_itemlist, pi, pi_list); 1133 #ifdef DIAGNOSTIC 1134 pi->pi_magic = PI_MAGIC; 1135 #endif 1136 cp = (caddr_t)(cp + pp->pr_size); 1137 } 1138 1139 /* 1140 * If the pool was depleted, point at the new page. 1141 */ 1142 if (pp->pr_curpage == NULL) 1143 pp->pr_curpage = ph; 1144 1145 if (++pp->pr_npages > pp->pr_hiwat) 1146 pp->pr_hiwat = pp->pr_npages; 1147 } 1148 1149 /* 1150 * Used by pool_get() when nitems drops below the low water mark. This 1151 * is used to catch up nitmes with the low water mark. 1152 * 1153 * Note 1, we never wait for memory here, we let the caller decide what to do. 1154 * 1155 * Note 2, we must be called with the pool already locked, and we return 1156 * with it locked. 1157 */ 1158 static int 1159 pool_catchup(struct pool *pp) 1160 { 1161 struct pool_item_header *ph = NULL; 1162 caddr_t cp; 1163 int error = 0; 1164 1165 while (POOL_NEEDS_CATCHUP(pp)) { 1166 /* 1167 * Call the page back-end allocator for more memory. 1168 * 1169 * XXX: We never wait, so should we bother unlocking 1170 * the pool descriptor? 1171 */ 1172 simple_unlock(&pp->pr_slock); 1173 cp = pool_allocator_alloc(pp, PR_NOWAIT); 1174 if (__predict_true(cp != NULL)) 1175 ph = pool_alloc_item_header(pp, cp, PR_NOWAIT); 1176 simple_lock(&pp->pr_slock); 1177 if (__predict_false(cp == NULL || ph == NULL)) { 1178 if (cp != NULL) 1179 pool_allocator_free(pp, cp); 1180 error = ENOMEM; 1181 break; 1182 } 1183 pool_prime_page(pp, cp, ph); 1184 pp->pr_npagealloc++; 1185 } 1186 1187 return (error); 1188 } 1189 1190 void 1191 pool_setlowat(struct pool *pp, int n) 1192 { 1193 1194 simple_lock(&pp->pr_slock); 1195 1196 pp->pr_minitems = n; 1197 pp->pr_minpages = (n == 0) 1198 ? 0 1199 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 1200 1201 /* Make sure we're caught up with the newly-set low water mark. */ 1202 if (POOL_NEEDS_CATCHUP(pp) && pool_catchup(pp) != 0) { 1203 /* 1204 * XXX: Should we log a warning? Should we set up a timeout 1205 * to try again in a second or so? The latter could break 1206 * a caller's assumptions about interrupt protection, etc. 1207 */ 1208 } 1209 1210 simple_unlock(&pp->pr_slock); 1211 } 1212 1213 void 1214 pool_sethiwat(struct pool *pp, int n) 1215 { 1216 1217 simple_lock(&pp->pr_slock); 1218 1219 pp->pr_maxpages = (n == 0) 1220 ? 0 1221 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 1222 1223 simple_unlock(&pp->pr_slock); 1224 } 1225 1226 void 1227 pool_sethardlimit(struct pool *pp, int n, const char *warnmess, int ratecap) 1228 { 1229 1230 simple_lock(&pp->pr_slock); 1231 1232 pp->pr_hardlimit = n; 1233 pp->pr_hardlimit_warning = warnmess; 1234 pp->pr_hardlimit_ratecap.tv_sec = ratecap; 1235 pp->pr_hardlimit_warning_last.tv_sec = 0; 1236 pp->pr_hardlimit_warning_last.tv_usec = 0; 1237 1238 /* 1239 * In-line version of pool_sethiwat(), because we don't want to 1240 * release the lock. 1241 */ 1242 pp->pr_maxpages = (n == 0) 1243 ? 0 1244 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 1245 1246 simple_unlock(&pp->pr_slock); 1247 } 1248 1249 /* 1250 * Release all complete pages that have not been used recently. 1251 */ 1252 int 1253 #ifdef POOL_DIAGNOSTIC 1254 _pool_reclaim(struct pool *pp, const char *file, long line) 1255 #else 1256 pool_reclaim(struct pool *pp) 1257 #endif 1258 { 1259 struct pool_item_header *ph, *phnext; 1260 struct pool_cache *pc; 1261 struct timeval curtime; 1262 struct pool_pagelist pq; 1263 int s; 1264 1265 if (pp->pr_drain_hook != NULL) { 1266 /* 1267 * The drain hook must be called with the pool unlocked. 1268 */ 1269 (*pp->pr_drain_hook)(pp->pr_drain_hook_arg, PR_NOWAIT); 1270 } 1271 1272 if (simple_lock_try(&pp->pr_slock) == 0) 1273 return (0); 1274 pr_enter(pp, file, line); 1275 1276 TAILQ_INIT(&pq); 1277 1278 /* 1279 * Reclaim items from the pool's caches. 1280 */ 1281 TAILQ_FOREACH(pc, &pp->pr_cachelist, pc_poollist) 1282 pool_cache_reclaim(pc); 1283 1284 s = splclock(); 1285 curtime = mono_time; 1286 splx(s); 1287 1288 for (ph = TAILQ_FIRST(&pp->pr_pagelist); ph != NULL; ph = phnext) { 1289 phnext = TAILQ_NEXT(ph, ph_pagelist); 1290 1291 /* Check our minimum page claim */ 1292 if (pp->pr_npages <= pp->pr_minpages) 1293 break; 1294 1295 if (ph->ph_nmissing == 0) { 1296 struct timeval diff; 1297 timersub(&curtime, &ph->ph_time, &diff); 1298 if (diff.tv_sec < pool_inactive_time) 1299 continue; 1300 1301 /* 1302 * If freeing this page would put us below 1303 * the low water mark, stop now. 1304 */ 1305 if ((pp->pr_nitems - pp->pr_itemsperpage) < 1306 pp->pr_minitems) 1307 break; 1308 1309 pr_rmpage(pp, ph, &pq); 1310 } 1311 } 1312 1313 pr_leave(pp); 1314 simple_unlock(&pp->pr_slock); 1315 if (TAILQ_EMPTY(&pq)) 1316 return (0); 1317 1318 while ((ph = TAILQ_FIRST(&pq)) != NULL) { 1319 TAILQ_REMOVE(&pq, ph, ph_pagelist); 1320 pool_allocator_free(pp, ph->ph_page); 1321 if (pp->pr_roflags & PR_PHINPAGE) { 1322 continue; 1323 } 1324 LIST_REMOVE(ph, ph_hashlist); 1325 s = splhigh(); 1326 pool_put(&phpool, ph); 1327 splx(s); 1328 } 1329 1330 return (1); 1331 } 1332 1333 /* 1334 * Drain pools, one at a time. 1335 * 1336 * Note, we must never be called from an interrupt context. 1337 */ 1338 void 1339 pool_drain(void *arg) 1340 { 1341 struct pool *pp; 1342 int s; 1343 1344 pp = NULL; 1345 s = splvm(); 1346 simple_lock(&pool_head_slock); 1347 if (drainpp == NULL) { 1348 drainpp = TAILQ_FIRST(&pool_head); 1349 } 1350 if (drainpp) { 1351 pp = drainpp; 1352 drainpp = TAILQ_NEXT(pp, pr_poollist); 1353 } 1354 simple_unlock(&pool_head_slock); 1355 pool_reclaim(pp); 1356 splx(s); 1357 } 1358 1359 /* 1360 * Diagnostic helpers. 1361 */ 1362 void 1363 pool_print(struct pool *pp, const char *modif) 1364 { 1365 int s; 1366 1367 s = splvm(); 1368 if (simple_lock_try(&pp->pr_slock) == 0) { 1369 printf("pool %s is locked; try again later\n", 1370 pp->pr_wchan); 1371 splx(s); 1372 return; 1373 } 1374 pool_print1(pp, modif, printf); 1375 simple_unlock(&pp->pr_slock); 1376 splx(s); 1377 } 1378 1379 void 1380 pool_printit(struct pool *pp, const char *modif, void (*pr)(const char *, ...)) 1381 { 1382 int didlock = 0; 1383 1384 if (pp == NULL) { 1385 (*pr)("Must specify a pool to print.\n"); 1386 return; 1387 } 1388 1389 /* 1390 * Called from DDB; interrupts should be blocked, and all 1391 * other processors should be paused. We can skip locking 1392 * the pool in this case. 1393 * 1394 * We do a simple_lock_try() just to print the lock 1395 * status, however. 1396 */ 1397 1398 if (simple_lock_try(&pp->pr_slock) == 0) 1399 (*pr)("WARNING: pool %s is locked\n", pp->pr_wchan); 1400 else 1401 didlock = 1; 1402 1403 pool_print1(pp, modif, pr); 1404 1405 if (didlock) 1406 simple_unlock(&pp->pr_slock); 1407 } 1408 1409 static void 1410 pool_print1(struct pool *pp, const char *modif, void (*pr)(const char *, ...)) 1411 { 1412 struct pool_item_header *ph; 1413 struct pool_cache *pc; 1414 struct pool_cache_group *pcg; 1415 #ifdef DIAGNOSTIC 1416 struct pool_item *pi; 1417 #endif 1418 int i, print_log = 0, print_pagelist = 0, print_cache = 0; 1419 char c; 1420 1421 while ((c = *modif++) != '\0') { 1422 if (c == 'l') 1423 print_log = 1; 1424 if (c == 'p') 1425 print_pagelist = 1; 1426 if (c == 'c') 1427 print_cache = 1; 1428 } 1429 1430 (*pr)("POOL %s: size %u, align %u, ioff %u, roflags 0x%08x\n", 1431 pp->pr_wchan, pp->pr_size, pp->pr_align, pp->pr_itemoffset, 1432 pp->pr_roflags); 1433 (*pr)("\talloc %p\n", pp->pr_alloc); 1434 (*pr)("\tminitems %u, minpages %u, maxpages %u, npages %u\n", 1435 pp->pr_minitems, pp->pr_minpages, pp->pr_maxpages, pp->pr_npages); 1436 (*pr)("\titemsperpage %u, nitems %u, nout %u, hardlimit %u\n", 1437 pp->pr_itemsperpage, pp->pr_nitems, pp->pr_nout, pp->pr_hardlimit); 1438 1439 (*pr)("\n\tnget %lu, nfail %lu, nput %lu\n", 1440 pp->pr_nget, pp->pr_nfail, pp->pr_nput); 1441 (*pr)("\tnpagealloc %lu, npagefree %lu, hiwat %u, nidle %lu\n", 1442 pp->pr_npagealloc, pp->pr_npagefree, pp->pr_hiwat, pp->pr_nidle); 1443 1444 if (print_pagelist == 0) 1445 goto skip_pagelist; 1446 1447 if ((ph = TAILQ_FIRST(&pp->pr_pagelist)) != NULL) 1448 (*pr)("\n\tpage list:\n"); 1449 for (; ph != NULL; ph = TAILQ_NEXT(ph, ph_pagelist)) { 1450 (*pr)("\t\tpage %p, nmissing %d, time %lu,%lu\n", 1451 ph->ph_page, ph->ph_nmissing, 1452 (u_long)ph->ph_time.tv_sec, 1453 (u_long)ph->ph_time.tv_usec); 1454 #ifdef DIAGNOSTIC 1455 TAILQ_FOREACH(pi, &ph->ph_itemlist, pi_list) { 1456 if (pi->pi_magic != PI_MAGIC) { 1457 (*pr)("\t\t\titem %p, magic 0x%x\n", 1458 pi, pi->pi_magic); 1459 } 1460 } 1461 #endif 1462 } 1463 if (pp->pr_curpage == NULL) 1464 (*pr)("\tno current page\n"); 1465 else 1466 (*pr)("\tcurpage %p\n", pp->pr_curpage->ph_page); 1467 1468 skip_pagelist: 1469 1470 if (print_log == 0) 1471 goto skip_log; 1472 1473 (*pr)("\n"); 1474 if ((pp->pr_roflags & PR_LOGGING) == 0) 1475 (*pr)("\tno log\n"); 1476 else 1477 pr_printlog(pp, NULL, pr); 1478 1479 skip_log: 1480 1481 if (print_cache == 0) 1482 goto skip_cache; 1483 1484 TAILQ_FOREACH(pc, &pp->pr_cachelist, pc_poollist) { 1485 (*pr)("\tcache %p: allocfrom %p freeto %p\n", pc, 1486 pc->pc_allocfrom, pc->pc_freeto); 1487 (*pr)("\t hits %lu misses %lu ngroups %lu nitems %lu\n", 1488 pc->pc_hits, pc->pc_misses, pc->pc_ngroups, pc->pc_nitems); 1489 TAILQ_FOREACH(pcg, &pc->pc_grouplist, pcg_list) { 1490 (*pr)("\t\tgroup %p: avail %d\n", pcg, pcg->pcg_avail); 1491 for (i = 0; i < PCG_NOBJECTS; i++) 1492 (*pr)("\t\t\t%p\n", pcg->pcg_objects[i]); 1493 } 1494 } 1495 1496 skip_cache: 1497 1498 pr_enter_check(pp, pr); 1499 } 1500 1501 int 1502 pool_chk(struct pool *pp, const char *label) 1503 { 1504 struct pool_item_header *ph; 1505 int r = 0; 1506 1507 simple_lock(&pp->pr_slock); 1508 1509 TAILQ_FOREACH(ph, &pp->pr_pagelist, ph_pagelist) { 1510 struct pool_item *pi; 1511 int n; 1512 caddr_t page; 1513 1514 page = (caddr_t)((u_long)ph & pp->pr_alloc->pa_pagemask); 1515 if (page != ph->ph_page && 1516 (pp->pr_roflags & PR_PHINPAGE) != 0) { 1517 if (label != NULL) 1518 printf("%s: ", label); 1519 printf("pool(%p:%s): page inconsistency: page %p;" 1520 " at page head addr %p (p %p)\n", pp, 1521 pp->pr_wchan, ph->ph_page, 1522 ph, page); 1523 r++; 1524 goto out; 1525 } 1526 1527 for (pi = TAILQ_FIRST(&ph->ph_itemlist), n = 0; 1528 pi != NULL; 1529 pi = TAILQ_NEXT(pi,pi_list), n++) { 1530 1531 #ifdef DIAGNOSTIC 1532 if (pi->pi_magic != PI_MAGIC) { 1533 if (label != NULL) 1534 printf("%s: ", label); 1535 printf("pool(%s): free list modified: magic=%x;" 1536 " page %p; item ordinal %d;" 1537 " addr %p (p %p)\n", 1538 pp->pr_wchan, pi->pi_magic, ph->ph_page, 1539 n, pi, page); 1540 panic("pool"); 1541 } 1542 #endif 1543 page = 1544 (caddr_t)((u_long)pi & pp->pr_alloc->pa_pagemask); 1545 if (page == ph->ph_page) 1546 continue; 1547 1548 if (label != NULL) 1549 printf("%s: ", label); 1550 printf("pool(%p:%s): page inconsistency: page %p;" 1551 " item ordinal %d; addr %p (p %p)\n", pp, 1552 pp->pr_wchan, ph->ph_page, 1553 n, pi, page); 1554 r++; 1555 goto out; 1556 } 1557 } 1558 out: 1559 simple_unlock(&pp->pr_slock); 1560 return (r); 1561 } 1562 1563 /* 1564 * pool_cache_init: 1565 * 1566 * Initialize a pool cache. 1567 * 1568 * NOTE: If the pool must be protected from interrupts, we expect 1569 * to be called at the appropriate interrupt priority level. 1570 */ 1571 void 1572 pool_cache_init(struct pool_cache *pc, struct pool *pp, 1573 int (*ctor)(void *, void *, int), 1574 void (*dtor)(void *, void *), 1575 void *arg) 1576 { 1577 1578 TAILQ_INIT(&pc->pc_grouplist); 1579 simple_lock_init(&pc->pc_slock); 1580 1581 pc->pc_allocfrom = NULL; 1582 pc->pc_freeto = NULL; 1583 pc->pc_pool = pp; 1584 1585 pc->pc_ctor = ctor; 1586 pc->pc_dtor = dtor; 1587 pc->pc_arg = arg; 1588 1589 pc->pc_hits = 0; 1590 pc->pc_misses = 0; 1591 1592 pc->pc_ngroups = 0; 1593 1594 pc->pc_nitems = 0; 1595 1596 simple_lock(&pp->pr_slock); 1597 TAILQ_INSERT_TAIL(&pp->pr_cachelist, pc, pc_poollist); 1598 simple_unlock(&pp->pr_slock); 1599 } 1600 1601 /* 1602 * pool_cache_destroy: 1603 * 1604 * Destroy a pool cache. 1605 */ 1606 void 1607 pool_cache_destroy(struct pool_cache *pc) 1608 { 1609 struct pool *pp = pc->pc_pool; 1610 1611 /* First, invalidate the entire cache. */ 1612 pool_cache_invalidate(pc); 1613 1614 /* ...and remove it from the pool's cache list. */ 1615 simple_lock(&pp->pr_slock); 1616 TAILQ_REMOVE(&pp->pr_cachelist, pc, pc_poollist); 1617 simple_unlock(&pp->pr_slock); 1618 } 1619 1620 static __inline void * 1621 pcg_get(struct pool_cache_group *pcg) 1622 { 1623 void *object; 1624 u_int idx; 1625 1626 KASSERT(pcg->pcg_avail <= PCG_NOBJECTS); 1627 KASSERT(pcg->pcg_avail != 0); 1628 idx = --pcg->pcg_avail; 1629 1630 KASSERT(pcg->pcg_objects[idx] != NULL); 1631 object = pcg->pcg_objects[idx]; 1632 pcg->pcg_objects[idx] = NULL; 1633 1634 return (object); 1635 } 1636 1637 static __inline void 1638 pcg_put(struct pool_cache_group *pcg, void *object) 1639 { 1640 u_int idx; 1641 1642 KASSERT(pcg->pcg_avail < PCG_NOBJECTS); 1643 idx = pcg->pcg_avail++; 1644 1645 KASSERT(pcg->pcg_objects[idx] == NULL); 1646 pcg->pcg_objects[idx] = object; 1647 } 1648 1649 /* 1650 * pool_cache_get: 1651 * 1652 * Get an object from a pool cache. 1653 */ 1654 void * 1655 pool_cache_get(struct pool_cache *pc, int flags) 1656 { 1657 struct pool_cache_group *pcg; 1658 void *object; 1659 1660 #ifdef LOCKDEBUG 1661 if (flags & PR_WAITOK) 1662 simple_lock_only_held(NULL, "pool_cache_get(PR_WAITOK)"); 1663 #endif 1664 1665 simple_lock(&pc->pc_slock); 1666 1667 if ((pcg = pc->pc_allocfrom) == NULL) { 1668 TAILQ_FOREACH(pcg, &pc->pc_grouplist, pcg_list) { 1669 if (pcg->pcg_avail != 0) { 1670 pc->pc_allocfrom = pcg; 1671 goto have_group; 1672 } 1673 } 1674 1675 /* 1676 * No groups with any available objects. Allocate 1677 * a new object, construct it, and return it to 1678 * the caller. We will allocate a group, if necessary, 1679 * when the object is freed back to the cache. 1680 */ 1681 pc->pc_misses++; 1682 simple_unlock(&pc->pc_slock); 1683 object = pool_get(pc->pc_pool, flags); 1684 if (object != NULL && pc->pc_ctor != NULL) { 1685 if ((*pc->pc_ctor)(pc->pc_arg, object, flags) != 0) { 1686 pool_put(pc->pc_pool, object); 1687 return (NULL); 1688 } 1689 } 1690 return (object); 1691 } 1692 1693 have_group: 1694 pc->pc_hits++; 1695 pc->pc_nitems--; 1696 object = pcg_get(pcg); 1697 1698 if (pcg->pcg_avail == 0) 1699 pc->pc_allocfrom = NULL; 1700 1701 simple_unlock(&pc->pc_slock); 1702 1703 return (object); 1704 } 1705 1706 /* 1707 * pool_cache_put: 1708 * 1709 * Put an object back to the pool cache. 1710 */ 1711 void 1712 pool_cache_put(struct pool_cache *pc, void *object) 1713 { 1714 struct pool_cache_group *pcg; 1715 int s; 1716 1717 simple_lock(&pc->pc_slock); 1718 1719 if ((pcg = pc->pc_freeto) == NULL) { 1720 TAILQ_FOREACH(pcg, &pc->pc_grouplist, pcg_list) { 1721 if (pcg->pcg_avail != PCG_NOBJECTS) { 1722 pc->pc_freeto = pcg; 1723 goto have_group; 1724 } 1725 } 1726 1727 /* 1728 * No empty groups to free the object to. Attempt to 1729 * allocate one. 1730 */ 1731 simple_unlock(&pc->pc_slock); 1732 s = splvm(); 1733 pcg = pool_get(&pcgpool, PR_NOWAIT); 1734 splx(s); 1735 if (pcg != NULL) { 1736 memset(pcg, 0, sizeof(*pcg)); 1737 simple_lock(&pc->pc_slock); 1738 pc->pc_ngroups++; 1739 TAILQ_INSERT_TAIL(&pc->pc_grouplist, pcg, pcg_list); 1740 if (pc->pc_freeto == NULL) 1741 pc->pc_freeto = pcg; 1742 goto have_group; 1743 } 1744 1745 /* 1746 * Unable to allocate a cache group; destruct the object 1747 * and free it back to the pool. 1748 */ 1749 pool_cache_destruct_object(pc, object); 1750 return; 1751 } 1752 1753 have_group: 1754 pc->pc_nitems++; 1755 pcg_put(pcg, object); 1756 1757 if (pcg->pcg_avail == PCG_NOBJECTS) 1758 pc->pc_freeto = NULL; 1759 1760 simple_unlock(&pc->pc_slock); 1761 } 1762 1763 /* 1764 * pool_cache_destruct_object: 1765 * 1766 * Force destruction of an object and its release back into 1767 * the pool. 1768 */ 1769 void 1770 pool_cache_destruct_object(struct pool_cache *pc, void *object) 1771 { 1772 1773 if (pc->pc_dtor != NULL) 1774 (*pc->pc_dtor)(pc->pc_arg, object); 1775 pool_put(pc->pc_pool, object); 1776 } 1777 1778 /* 1779 * pool_cache_do_invalidate: 1780 * 1781 * This internal function implements pool_cache_invalidate() and 1782 * pool_cache_reclaim(). 1783 */ 1784 static void 1785 pool_cache_do_invalidate(struct pool_cache *pc, int free_groups, 1786 void (*putit)(struct pool *, void *)) 1787 { 1788 struct pool_cache_group *pcg, *npcg; 1789 void *object; 1790 int s; 1791 1792 for (pcg = TAILQ_FIRST(&pc->pc_grouplist); pcg != NULL; 1793 pcg = npcg) { 1794 npcg = TAILQ_NEXT(pcg, pcg_list); 1795 while (pcg->pcg_avail != 0) { 1796 pc->pc_nitems--; 1797 object = pcg_get(pcg); 1798 if (pcg->pcg_avail == 0 && pc->pc_allocfrom == pcg) 1799 pc->pc_allocfrom = NULL; 1800 if (pc->pc_dtor != NULL) 1801 (*pc->pc_dtor)(pc->pc_arg, object); 1802 (*putit)(pc->pc_pool, object); 1803 } 1804 if (free_groups) { 1805 pc->pc_ngroups--; 1806 TAILQ_REMOVE(&pc->pc_grouplist, pcg, pcg_list); 1807 if (pc->pc_freeto == pcg) 1808 pc->pc_freeto = NULL; 1809 s = splvm(); 1810 pool_put(&pcgpool, pcg); 1811 splx(s); 1812 } 1813 } 1814 } 1815 1816 /* 1817 * pool_cache_invalidate: 1818 * 1819 * Invalidate a pool cache (destruct and release all of the 1820 * cached objects). 1821 */ 1822 void 1823 pool_cache_invalidate(struct pool_cache *pc) 1824 { 1825 1826 simple_lock(&pc->pc_slock); 1827 pool_cache_do_invalidate(pc, 0, pool_put); 1828 simple_unlock(&pc->pc_slock); 1829 } 1830 1831 /* 1832 * pool_cache_reclaim: 1833 * 1834 * Reclaim a pool cache for pool_reclaim(). 1835 */ 1836 static void 1837 pool_cache_reclaim(struct pool_cache *pc) 1838 { 1839 1840 simple_lock(&pc->pc_slock); 1841 pool_cache_do_invalidate(pc, 1, pool_do_put); 1842 simple_unlock(&pc->pc_slock); 1843 } 1844 1845 /* 1846 * Pool backend allocators. 1847 * 1848 * Each pool has a backend allocator that handles allocation, deallocation, 1849 * and any additional draining that might be needed. 1850 * 1851 * We provide two standard allocators: 1852 * 1853 * pool_allocator_kmem - the default when no allocator is specified 1854 * 1855 * pool_allocator_nointr - used for pools that will not be accessed 1856 * in interrupt context. 1857 */ 1858 void *pool_page_alloc(struct pool *, int); 1859 void pool_page_free(struct pool *, void *); 1860 1861 struct pool_allocator pool_allocator_kmem = { 1862 pool_page_alloc, pool_page_free, 0, 1863 }; 1864 1865 void *pool_page_alloc_nointr(struct pool *, int); 1866 void pool_page_free_nointr(struct pool *, void *); 1867 1868 struct pool_allocator pool_allocator_nointr = { 1869 pool_page_alloc_nointr, pool_page_free_nointr, 0, 1870 }; 1871 1872 #ifdef POOL_SUBPAGE 1873 void *pool_subpage_alloc(struct pool *, int); 1874 void pool_subpage_free(struct pool *, void *); 1875 1876 struct pool_allocator pool_allocator_kmem_subpage = { 1877 pool_subpage_alloc, pool_subpage_free, 0, 1878 }; 1879 #endif /* POOL_SUBPAGE */ 1880 1881 /* 1882 * We have at least three different resources for the same allocation and 1883 * each resource can be depleted. First, we have the ready elements in the 1884 * pool. Then we have the resource (typically a vm_map) for this allocator. 1885 * Finally, we have physical memory. Waiting for any of these can be 1886 * unnecessary when any other is freed, but the kernel doesn't support 1887 * sleeping on multiple wait channels, so we have to employ another strategy. 1888 * 1889 * The caller sleeps on the pool (so that it can be awakened when an item 1890 * is returned to the pool), but we set PA_WANT on the allocator. When a 1891 * page is returned to the allocator and PA_WANT is set, pool_allocator_free 1892 * will wake up all sleeping pools belonging to this allocator. 1893 * 1894 * XXX Thundering herd. 1895 */ 1896 void * 1897 pool_allocator_alloc(struct pool *org, int flags) 1898 { 1899 struct pool_allocator *pa = org->pr_alloc; 1900 struct pool *pp, *start; 1901 int s, freed; 1902 void *res; 1903 1904 do { 1905 if ((res = (*pa->pa_alloc)(org, flags)) != NULL) 1906 return (res); 1907 if ((flags & PR_WAITOK) == 0) { 1908 /* 1909 * We only run the drain hookhere if PR_NOWAIT. 1910 * In other cases, the hook will be run in 1911 * pool_reclaim(). 1912 */ 1913 if (org->pr_drain_hook != NULL) { 1914 (*org->pr_drain_hook)(org->pr_drain_hook_arg, 1915 flags); 1916 if ((res = (*pa->pa_alloc)(org, flags)) != NULL) 1917 return (res); 1918 } 1919 break; 1920 } 1921 1922 /* 1923 * Drain all pools, except "org", that use this 1924 * allocator. We do this to reclaim VA space. 1925 * pa_alloc is responsible for waiting for 1926 * physical memory. 1927 * 1928 * XXX We risk looping forever if start if someone 1929 * calls pool_destroy on "start". But there is no 1930 * other way to have potentially sleeping pool_reclaim, 1931 * non-sleeping locks on pool_allocator, and some 1932 * stirring of drained pools in the allocator. 1933 * 1934 * XXX Maybe we should use pool_head_slock for locking 1935 * the allocators? 1936 */ 1937 freed = 0; 1938 1939 s = splvm(); 1940 simple_lock(&pa->pa_slock); 1941 pp = start = TAILQ_FIRST(&pa->pa_list); 1942 do { 1943 TAILQ_REMOVE(&pa->pa_list, pp, pr_alloc_list); 1944 TAILQ_INSERT_TAIL(&pa->pa_list, pp, pr_alloc_list); 1945 if (pp == org) 1946 continue; 1947 simple_unlock(&pa->pa_slock); 1948 freed = pool_reclaim(pp); 1949 simple_lock(&pa->pa_slock); 1950 } while ((pp = TAILQ_FIRST(&pa->pa_list)) != start && 1951 freed == 0); 1952 1953 if (freed == 0) { 1954 /* 1955 * We set PA_WANT here, the caller will most likely 1956 * sleep waiting for pages (if not, this won't hurt 1957 * that much), and there is no way to set this in 1958 * the caller without violating locking order. 1959 */ 1960 pa->pa_flags |= PA_WANT; 1961 } 1962 simple_unlock(&pa->pa_slock); 1963 splx(s); 1964 } while (freed); 1965 return (NULL); 1966 } 1967 1968 void 1969 pool_allocator_free(struct pool *pp, void *v) 1970 { 1971 struct pool_allocator *pa = pp->pr_alloc; 1972 int s; 1973 1974 (*pa->pa_free)(pp, v); 1975 1976 s = splvm(); 1977 simple_lock(&pa->pa_slock); 1978 if ((pa->pa_flags & PA_WANT) == 0) { 1979 simple_unlock(&pa->pa_slock); 1980 splx(s); 1981 return; 1982 } 1983 1984 TAILQ_FOREACH(pp, &pa->pa_list, pr_alloc_list) { 1985 simple_lock(&pp->pr_slock); 1986 if ((pp->pr_flags & PR_WANTED) != 0) { 1987 pp->pr_flags &= ~PR_WANTED; 1988 wakeup(pp); 1989 } 1990 simple_unlock(&pp->pr_slock); 1991 } 1992 pa->pa_flags &= ~PA_WANT; 1993 simple_unlock(&pa->pa_slock); 1994 splx(s); 1995 } 1996 1997 void * 1998 pool_page_alloc(struct pool *pp, int flags) 1999 { 2000 boolean_t waitok = (flags & PR_WAITOK) ? TRUE : FALSE; 2001 2002 return ((void *) uvm_km_alloc_poolpage(waitok)); 2003 } 2004 2005 void 2006 pool_page_free(struct pool *pp, void *v) 2007 { 2008 2009 uvm_km_free_poolpage((vaddr_t) v); 2010 } 2011 2012 #ifdef POOL_SUBPAGE 2013 /* Sub-page allocator, for machines with large hardware pages. */ 2014 void * 2015 pool_subpage_alloc(struct pool *pp, int flags) 2016 { 2017 2018 return (pool_get(&psppool, flags)); 2019 } 2020 2021 void 2022 pool_subpage_free(struct pool *pp, void *v) 2023 { 2024 2025 pool_put(&psppool, v); 2026 } 2027 2028 /* We don't provide a real nointr allocator. Maybe later. */ 2029 void * 2030 pool_page_alloc_nointr(struct pool *pp, int flags) 2031 { 2032 2033 return (pool_subpage_alloc(pp, flags)); 2034 } 2035 2036 void 2037 pool_page_free_nointr(struct pool *pp, void *v) 2038 { 2039 2040 pool_subpage_free(pp, v); 2041 } 2042 #else 2043 void * 2044 pool_page_alloc_nointr(struct pool *pp, int flags) 2045 { 2046 boolean_t waitok = (flags & PR_WAITOK) ? TRUE : FALSE; 2047 2048 return ((void *) uvm_km_alloc_poolpage1(kernel_map, 2049 uvm.kernel_object, waitok)); 2050 } 2051 2052 void 2053 pool_page_free_nointr(struct pool *pp, void *v) 2054 { 2055 2056 uvm_km_free_poolpage1(kernel_map, (vaddr_t) v); 2057 } 2058 #endif /* POOL_SUBPAGE */ 2059