1 /* $NetBSD: subr_pool.c,v 1.76 2002/03/13 10:57:18 simonb Exp $ */ 2 3 /*- 4 * Copyright (c) 1997, 1999, 2000 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Paul Kranenburg; by Jason R. Thorpe of the Numerical Aerospace 9 * Simulation Facility, NASA Ames Research Center. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. All advertising materials mentioning features or use of this software 20 * must display the following acknowledgement: 21 * This product includes software developed by the NetBSD 22 * Foundation, Inc. and its contributors. 23 * 4. Neither the name of The NetBSD Foundation nor the names of its 24 * contributors may be used to endorse or promote products derived 25 * from this software without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 28 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 29 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 30 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 31 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 34 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 35 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 36 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 37 * POSSIBILITY OF SUCH DAMAGE. 38 */ 39 40 #include <sys/cdefs.h> 41 __KERNEL_RCSID(0, "$NetBSD: subr_pool.c,v 1.76 2002/03/13 10:57:18 simonb Exp $"); 42 43 #include "opt_pool.h" 44 #include "opt_poollog.h" 45 #include "opt_lockdebug.h" 46 47 #include <sys/param.h> 48 #include <sys/systm.h> 49 #include <sys/proc.h> 50 #include <sys/errno.h> 51 #include <sys/kernel.h> 52 #include <sys/malloc.h> 53 #include <sys/lock.h> 54 #include <sys/pool.h> 55 #include <sys/syslog.h> 56 57 #include <uvm/uvm.h> 58 59 /* 60 * Pool resource management utility. 61 * 62 * Memory is allocated in pages which are split into pieces according 63 * to the pool item size. Each page is kept on a list headed by `pr_pagelist' 64 * in the pool structure and the individual pool items are on a linked list 65 * headed by `ph_itemlist' in each page header. The memory for building 66 * the page list is either taken from the allocated pages themselves (for 67 * small pool items) or taken from an internal pool of page headers (`phpool'). 68 */ 69 70 /* List of all pools */ 71 TAILQ_HEAD(,pool) pool_head = TAILQ_HEAD_INITIALIZER(pool_head); 72 73 /* Private pool for page header structures */ 74 static struct pool phpool; 75 76 #ifdef POOL_SUBPAGE 77 /* Pool of subpages for use by normal pools. */ 78 static struct pool psppool; 79 #endif 80 81 /* # of seconds to retain page after last use */ 82 int pool_inactive_time = 10; 83 84 /* Next candidate for drainage (see pool_drain()) */ 85 static struct pool *drainpp; 86 87 /* This spin lock protects both pool_head and drainpp. */ 88 struct simplelock pool_head_slock = SIMPLELOCK_INITIALIZER; 89 90 struct pool_item_header { 91 /* Page headers */ 92 TAILQ_ENTRY(pool_item_header) 93 ph_pagelist; /* pool page list */ 94 TAILQ_HEAD(,pool_item) ph_itemlist; /* chunk list for this page */ 95 LIST_ENTRY(pool_item_header) 96 ph_hashlist; /* Off-page page headers */ 97 int ph_nmissing; /* # of chunks in use */ 98 caddr_t ph_page; /* this page's address */ 99 struct timeval ph_time; /* last referenced */ 100 }; 101 TAILQ_HEAD(pool_pagelist,pool_item_header); 102 103 struct pool_item { 104 #ifdef DIAGNOSTIC 105 int pi_magic; 106 #endif 107 #define PI_MAGIC 0xdeadbeef 108 /* Other entries use only this list entry */ 109 TAILQ_ENTRY(pool_item) pi_list; 110 }; 111 112 #define PR_HASH_INDEX(pp,addr) \ 113 (((u_long)(addr) >> (pp)->pr_alloc->pa_pageshift) & \ 114 (PR_HASHTABSIZE - 1)) 115 116 #define POOL_NEEDS_CATCHUP(pp) \ 117 ((pp)->pr_nitems < (pp)->pr_minitems) 118 119 /* 120 * Pool cache management. 121 * 122 * Pool caches provide a way for constructed objects to be cached by the 123 * pool subsystem. This can lead to performance improvements by avoiding 124 * needless object construction/destruction; it is deferred until absolutely 125 * necessary. 126 * 127 * Caches are grouped into cache groups. Each cache group references 128 * up to 16 constructed objects. When a cache allocates an object 129 * from the pool, it calls the object's constructor and places it into 130 * a cache group. When a cache group frees an object back to the pool, 131 * it first calls the object's destructor. This allows the object to 132 * persist in constructed form while freed to the cache. 133 * 134 * Multiple caches may exist for each pool. This allows a single 135 * object type to have multiple constructed forms. The pool references 136 * each cache, so that when a pool is drained by the pagedaemon, it can 137 * drain each individual cache as well. Each time a cache is drained, 138 * the most idle cache group is freed to the pool in its entirety. 139 * 140 * Pool caches are layed on top of pools. By layering them, we can avoid 141 * the complexity of cache management for pools which would not benefit 142 * from it. 143 */ 144 145 /* The cache group pool. */ 146 static struct pool pcgpool; 147 148 static void pool_cache_reclaim(struct pool_cache *); 149 150 static int pool_catchup(struct pool *); 151 static void pool_prime_page(struct pool *, caddr_t, 152 struct pool_item_header *); 153 154 void *pool_allocator_alloc(struct pool *, int); 155 void pool_allocator_free(struct pool *, void *); 156 157 static void pool_print1(struct pool *, const char *, 158 void (*)(const char *, ...)); 159 160 /* 161 * Pool log entry. An array of these is allocated in pool_init(). 162 */ 163 struct pool_log { 164 const char *pl_file; 165 long pl_line; 166 int pl_action; 167 #define PRLOG_GET 1 168 #define PRLOG_PUT 2 169 void *pl_addr; 170 }; 171 172 /* Number of entries in pool log buffers */ 173 #ifndef POOL_LOGSIZE 174 #define POOL_LOGSIZE 10 175 #endif 176 177 int pool_logsize = POOL_LOGSIZE; 178 179 #ifdef POOL_DIAGNOSTIC 180 static __inline void 181 pr_log(struct pool *pp, void *v, int action, const char *file, long line) 182 { 183 int n = pp->pr_curlogentry; 184 struct pool_log *pl; 185 186 if ((pp->pr_roflags & PR_LOGGING) == 0) 187 return; 188 189 /* 190 * Fill in the current entry. Wrap around and overwrite 191 * the oldest entry if necessary. 192 */ 193 pl = &pp->pr_log[n]; 194 pl->pl_file = file; 195 pl->pl_line = line; 196 pl->pl_action = action; 197 pl->pl_addr = v; 198 if (++n >= pp->pr_logsize) 199 n = 0; 200 pp->pr_curlogentry = n; 201 } 202 203 static void 204 pr_printlog(struct pool *pp, struct pool_item *pi, 205 void (*pr)(const char *, ...)) 206 { 207 int i = pp->pr_logsize; 208 int n = pp->pr_curlogentry; 209 210 if ((pp->pr_roflags & PR_LOGGING) == 0) 211 return; 212 213 /* 214 * Print all entries in this pool's log. 215 */ 216 while (i-- > 0) { 217 struct pool_log *pl = &pp->pr_log[n]; 218 if (pl->pl_action != 0) { 219 if (pi == NULL || pi == pl->pl_addr) { 220 (*pr)("\tlog entry %d:\n", i); 221 (*pr)("\t\taction = %s, addr = %p\n", 222 pl->pl_action == PRLOG_GET ? "get" : "put", 223 pl->pl_addr); 224 (*pr)("\t\tfile: %s at line %lu\n", 225 pl->pl_file, pl->pl_line); 226 } 227 } 228 if (++n >= pp->pr_logsize) 229 n = 0; 230 } 231 } 232 233 static __inline void 234 pr_enter(struct pool *pp, const char *file, long line) 235 { 236 237 if (__predict_false(pp->pr_entered_file != NULL)) { 238 printf("pool %s: reentrancy at file %s line %ld\n", 239 pp->pr_wchan, file, line); 240 printf(" previous entry at file %s line %ld\n", 241 pp->pr_entered_file, pp->pr_entered_line); 242 panic("pr_enter"); 243 } 244 245 pp->pr_entered_file = file; 246 pp->pr_entered_line = line; 247 } 248 249 static __inline void 250 pr_leave(struct pool *pp) 251 { 252 253 if (__predict_false(pp->pr_entered_file == NULL)) { 254 printf("pool %s not entered?\n", pp->pr_wchan); 255 panic("pr_leave"); 256 } 257 258 pp->pr_entered_file = NULL; 259 pp->pr_entered_line = 0; 260 } 261 262 static __inline void 263 pr_enter_check(struct pool *pp, void (*pr)(const char *, ...)) 264 { 265 266 if (pp->pr_entered_file != NULL) 267 (*pr)("\n\tcurrently entered from file %s line %ld\n", 268 pp->pr_entered_file, pp->pr_entered_line); 269 } 270 #else 271 #define pr_log(pp, v, action, file, line) 272 #define pr_printlog(pp, pi, pr) 273 #define pr_enter(pp, file, line) 274 #define pr_leave(pp) 275 #define pr_enter_check(pp, pr) 276 #endif /* POOL_DIAGNOSTIC */ 277 278 /* 279 * Return the pool page header based on page address. 280 */ 281 static __inline struct pool_item_header * 282 pr_find_pagehead(struct pool *pp, caddr_t page) 283 { 284 struct pool_item_header *ph; 285 286 if ((pp->pr_roflags & PR_PHINPAGE) != 0) 287 return ((struct pool_item_header *)(page + pp->pr_phoffset)); 288 289 for (ph = LIST_FIRST(&pp->pr_hashtab[PR_HASH_INDEX(pp, page)]); 290 ph != NULL; 291 ph = LIST_NEXT(ph, ph_hashlist)) { 292 if (ph->ph_page == page) 293 return (ph); 294 } 295 return (NULL); 296 } 297 298 /* 299 * Remove a page from the pool. 300 */ 301 static __inline void 302 pr_rmpage(struct pool *pp, struct pool_item_header *ph, 303 struct pool_pagelist *pq) 304 { 305 int s; 306 307 /* 308 * If the page was idle, decrement the idle page count. 309 */ 310 if (ph->ph_nmissing == 0) { 311 #ifdef DIAGNOSTIC 312 if (pp->pr_nidle == 0) 313 panic("pr_rmpage: nidle inconsistent"); 314 if (pp->pr_nitems < pp->pr_itemsperpage) 315 panic("pr_rmpage: nitems inconsistent"); 316 #endif 317 pp->pr_nidle--; 318 } 319 320 pp->pr_nitems -= pp->pr_itemsperpage; 321 322 /* 323 * Unlink a page from the pool and release it (or queue it for release). 324 */ 325 TAILQ_REMOVE(&pp->pr_pagelist, ph, ph_pagelist); 326 if (pq) { 327 TAILQ_INSERT_HEAD(pq, ph, ph_pagelist); 328 } else { 329 pool_allocator_free(pp, ph->ph_page); 330 if ((pp->pr_roflags & PR_PHINPAGE) == 0) { 331 LIST_REMOVE(ph, ph_hashlist); 332 s = splhigh(); 333 pool_put(&phpool, ph); 334 splx(s); 335 } 336 } 337 pp->pr_npages--; 338 pp->pr_npagefree++; 339 340 if (pp->pr_curpage == ph) { 341 /* 342 * Find a new non-empty page header, if any. 343 * Start search from the page head, to increase the 344 * chance for "high water" pages to be freed. 345 */ 346 TAILQ_FOREACH(ph, &pp->pr_pagelist, ph_pagelist) 347 if (TAILQ_FIRST(&ph->ph_itemlist) != NULL) 348 break; 349 350 pp->pr_curpage = ph; 351 } 352 } 353 354 /* 355 * Initialize the given pool resource structure. 356 * 357 * We export this routine to allow other kernel parts to declare 358 * static pools that must be initialized before malloc() is available. 359 */ 360 void 361 pool_init(struct pool *pp, size_t size, u_int align, u_int ioff, int flags, 362 const char *wchan, struct pool_allocator *palloc) 363 { 364 int off, slack, i; 365 366 #ifdef POOL_DIAGNOSTIC 367 /* 368 * Always log if POOL_DIAGNOSTIC is defined. 369 */ 370 if (pool_logsize != 0) 371 flags |= PR_LOGGING; 372 #endif 373 374 #ifdef POOL_SUBPAGE 375 /* 376 * XXX We don't provide a real `nointr' back-end 377 * yet; all sub-pages come from a kmem back-end. 378 * maybe some day... 379 */ 380 if (palloc == NULL) { 381 extern struct pool_allocator pool_allocator_kmem_subpage; 382 palloc = &pool_allocator_kmem_subpage; 383 } 384 /* 385 * We'll assume any user-specified back-end allocator 386 * will deal with sub-pages, or simply don't care. 387 */ 388 #else 389 if (palloc == NULL) 390 palloc = &pool_allocator_kmem; 391 #endif /* POOL_SUBPAGE */ 392 if ((palloc->pa_flags & PA_INITIALIZED) == 0) { 393 if (palloc->pa_pagesz == 0) { 394 #ifdef POOL_SUBPAGE 395 if (palloc == &pool_allocator_kmem) 396 palloc->pa_pagesz = PAGE_SIZE; 397 else 398 palloc->pa_pagesz = POOL_SUBPAGE; 399 #else 400 palloc->pa_pagesz = PAGE_SIZE; 401 #endif /* POOL_SUBPAGE */ 402 } 403 404 TAILQ_INIT(&palloc->pa_list); 405 406 simple_lock_init(&palloc->pa_slock); 407 palloc->pa_pagemask = ~(palloc->pa_pagesz - 1); 408 palloc->pa_pageshift = ffs(palloc->pa_pagesz) - 1; 409 palloc->pa_flags |= PA_INITIALIZED; 410 } 411 412 if (align == 0) 413 align = ALIGN(1); 414 415 if (size < sizeof(struct pool_item)) 416 size = sizeof(struct pool_item); 417 418 size = ALIGN(size); 419 #ifdef DIAGNOSTIC 420 if (size > palloc->pa_pagesz) 421 panic("pool_init: pool item size (%lu) too large", 422 (u_long)size); 423 #endif 424 425 /* 426 * Initialize the pool structure. 427 */ 428 TAILQ_INIT(&pp->pr_pagelist); 429 TAILQ_INIT(&pp->pr_cachelist); 430 pp->pr_curpage = NULL; 431 pp->pr_npages = 0; 432 pp->pr_minitems = 0; 433 pp->pr_minpages = 0; 434 pp->pr_maxpages = UINT_MAX; 435 pp->pr_roflags = flags; 436 pp->pr_flags = 0; 437 pp->pr_size = size; 438 pp->pr_align = align; 439 pp->pr_wchan = wchan; 440 pp->pr_alloc = palloc; 441 pp->pr_nitems = 0; 442 pp->pr_nout = 0; 443 pp->pr_hardlimit = UINT_MAX; 444 pp->pr_hardlimit_warning = NULL; 445 pp->pr_hardlimit_ratecap.tv_sec = 0; 446 pp->pr_hardlimit_ratecap.tv_usec = 0; 447 pp->pr_hardlimit_warning_last.tv_sec = 0; 448 pp->pr_hardlimit_warning_last.tv_usec = 0; 449 pp->pr_drain_hook = NULL; 450 pp->pr_drain_hook_arg = NULL; 451 452 /* 453 * Decide whether to put the page header off page to avoid 454 * wasting too large a part of the page. Off-page page headers 455 * go on a hash table, so we can match a returned item 456 * with its header based on the page address. 457 * We use 1/16 of the page size as the threshold (XXX: tune) 458 */ 459 if (pp->pr_size < palloc->pa_pagesz/16) { 460 /* Use the end of the page for the page header */ 461 pp->pr_roflags |= PR_PHINPAGE; 462 pp->pr_phoffset = off = palloc->pa_pagesz - 463 ALIGN(sizeof(struct pool_item_header)); 464 } else { 465 /* The page header will be taken from our page header pool */ 466 pp->pr_phoffset = 0; 467 off = palloc->pa_pagesz; 468 for (i = 0; i < PR_HASHTABSIZE; i++) { 469 LIST_INIT(&pp->pr_hashtab[i]); 470 } 471 } 472 473 /* 474 * Alignment is to take place at `ioff' within the item. This means 475 * we must reserve up to `align - 1' bytes on the page to allow 476 * appropriate positioning of each item. 477 * 478 * Silently enforce `0 <= ioff < align'. 479 */ 480 pp->pr_itemoffset = ioff = ioff % align; 481 pp->pr_itemsperpage = (off - ((align - ioff) % align)) / pp->pr_size; 482 KASSERT(pp->pr_itemsperpage != 0); 483 484 /* 485 * Use the slack between the chunks and the page header 486 * for "cache coloring". 487 */ 488 slack = off - pp->pr_itemsperpage * pp->pr_size; 489 pp->pr_maxcolor = (slack / align) * align; 490 pp->pr_curcolor = 0; 491 492 pp->pr_nget = 0; 493 pp->pr_nfail = 0; 494 pp->pr_nput = 0; 495 pp->pr_npagealloc = 0; 496 pp->pr_npagefree = 0; 497 pp->pr_hiwat = 0; 498 pp->pr_nidle = 0; 499 500 #ifdef POOL_DIAGNOSTIC 501 if (flags & PR_LOGGING) { 502 if (kmem_map == NULL || 503 (pp->pr_log = malloc(pool_logsize * sizeof(struct pool_log), 504 M_TEMP, M_NOWAIT)) == NULL) 505 pp->pr_roflags &= ~PR_LOGGING; 506 pp->pr_curlogentry = 0; 507 pp->pr_logsize = pool_logsize; 508 } 509 #endif 510 511 pp->pr_entered_file = NULL; 512 pp->pr_entered_line = 0; 513 514 simple_lock_init(&pp->pr_slock); 515 516 /* 517 * Initialize private page header pool and cache magazine pool if we 518 * haven't done so yet. 519 * XXX LOCKING. 520 */ 521 if (phpool.pr_size == 0) { 522 #ifdef POOL_SUBPAGE 523 pool_init(&phpool, sizeof(struct pool_item_header), 0, 0, 0, 524 "phpool", &pool_allocator_kmem); 525 pool_init(&psppool, POOL_SUBPAGE, POOL_SUBPAGE, 0, 526 PR_RECURSIVE, "psppool", &pool_allocator_kmem); 527 #else 528 pool_init(&phpool, sizeof(struct pool_item_header), 0, 0, 529 0, "phpool", NULL); 530 #endif 531 pool_init(&pcgpool, sizeof(struct pool_cache_group), 0, 0, 532 0, "pcgpool", NULL); 533 } 534 535 /* Insert into the list of all pools. */ 536 simple_lock(&pool_head_slock); 537 TAILQ_INSERT_TAIL(&pool_head, pp, pr_poollist); 538 simple_unlock(&pool_head_slock); 539 540 /* Insert this into the list of pools using this allocator. */ 541 simple_lock(&palloc->pa_slock); 542 TAILQ_INSERT_TAIL(&palloc->pa_list, pp, pr_alloc_list); 543 simple_unlock(&palloc->pa_slock); 544 } 545 546 /* 547 * De-commision a pool resource. 548 */ 549 void 550 pool_destroy(struct pool *pp) 551 { 552 struct pool_item_header *ph; 553 struct pool_cache *pc; 554 555 /* Locking order: pool_allocator -> pool */ 556 simple_lock(&pp->pr_alloc->pa_slock); 557 TAILQ_REMOVE(&pp->pr_alloc->pa_list, pp, pr_alloc_list); 558 simple_unlock(&pp->pr_alloc->pa_slock); 559 560 /* Destroy all caches for this pool. */ 561 while ((pc = TAILQ_FIRST(&pp->pr_cachelist)) != NULL) 562 pool_cache_destroy(pc); 563 564 #ifdef DIAGNOSTIC 565 if (pp->pr_nout != 0) { 566 pr_printlog(pp, NULL, printf); 567 panic("pool_destroy: pool busy: still out: %u\n", 568 pp->pr_nout); 569 } 570 #endif 571 572 /* Remove all pages */ 573 while ((ph = TAILQ_FIRST(&pp->pr_pagelist)) != NULL) 574 pr_rmpage(pp, ph, NULL); 575 576 /* Remove from global pool list */ 577 simple_lock(&pool_head_slock); 578 TAILQ_REMOVE(&pool_head, pp, pr_poollist); 579 if (drainpp == pp) { 580 drainpp = NULL; 581 } 582 simple_unlock(&pool_head_slock); 583 584 #ifdef POOL_DIAGNOSTIC 585 if ((pp->pr_roflags & PR_LOGGING) != 0) 586 free(pp->pr_log, M_TEMP); 587 #endif 588 } 589 590 void 591 pool_set_drain_hook(struct pool *pp, void (*fn)(void *, int), void *arg) 592 { 593 594 /* XXX no locking -- must be used just after pool_init() */ 595 #ifdef DIAGNOSTIC 596 if (pp->pr_drain_hook != NULL) 597 panic("pool_set_drain_hook(%s): already set", pp->pr_wchan); 598 #endif 599 pp->pr_drain_hook = fn; 600 pp->pr_drain_hook_arg = arg; 601 } 602 603 static __inline struct pool_item_header * 604 pool_alloc_item_header(struct pool *pp, caddr_t storage, int flags) 605 { 606 struct pool_item_header *ph; 607 int s; 608 609 LOCK_ASSERT(simple_lock_held(&pp->pr_slock) == 0); 610 611 if ((pp->pr_roflags & PR_PHINPAGE) != 0) 612 ph = (struct pool_item_header *) (storage + pp->pr_phoffset); 613 else { 614 s = splhigh(); 615 ph = pool_get(&phpool, flags); 616 splx(s); 617 } 618 619 return (ph); 620 } 621 622 /* 623 * Grab an item from the pool; must be called at appropriate spl level 624 */ 625 void * 626 #ifdef POOL_DIAGNOSTIC 627 _pool_get(struct pool *pp, int flags, const char *file, long line) 628 #else 629 pool_get(struct pool *pp, int flags) 630 #endif 631 { 632 struct pool_item *pi; 633 struct pool_item_header *ph; 634 void *v; 635 636 #ifdef DIAGNOSTIC 637 if (__predict_false(curproc == NULL && doing_shutdown == 0 && 638 (flags & PR_WAITOK) != 0)) 639 panic("pool_get: must have NOWAIT"); 640 641 #ifdef LOCKDEBUG 642 if (flags & PR_WAITOK) 643 simple_lock_only_held(NULL, "pool_get(PR_WAITOK)"); 644 #endif 645 #endif /* DIAGNOSTIC */ 646 647 simple_lock(&pp->pr_slock); 648 pr_enter(pp, file, line); 649 650 startover: 651 /* 652 * Check to see if we've reached the hard limit. If we have, 653 * and we can wait, then wait until an item has been returned to 654 * the pool. 655 */ 656 #ifdef DIAGNOSTIC 657 if (__predict_false(pp->pr_nout > pp->pr_hardlimit)) { 658 pr_leave(pp); 659 simple_unlock(&pp->pr_slock); 660 panic("pool_get: %s: crossed hard limit", pp->pr_wchan); 661 } 662 #endif 663 if (__predict_false(pp->pr_nout == pp->pr_hardlimit)) { 664 if (pp->pr_drain_hook != NULL) { 665 /* 666 * Since the drain hook is going to free things 667 * back to the pool, unlock, call the hook, re-lock, 668 * and check the hardlimit condition again. 669 */ 670 pr_leave(pp); 671 simple_unlock(&pp->pr_slock); 672 (*pp->pr_drain_hook)(pp->pr_drain_hook_arg, flags); 673 simple_lock(&pp->pr_slock); 674 pr_enter(pp, file, line); 675 if (pp->pr_nout < pp->pr_hardlimit) 676 goto startover; 677 } 678 679 if ((flags & PR_WAITOK) && !(flags & PR_LIMITFAIL)) { 680 /* 681 * XXX: A warning isn't logged in this case. Should 682 * it be? 683 */ 684 pp->pr_flags |= PR_WANTED; 685 pr_leave(pp); 686 ltsleep(pp, PSWP, pp->pr_wchan, 0, &pp->pr_slock); 687 pr_enter(pp, file, line); 688 goto startover; 689 } 690 691 /* 692 * Log a message that the hard limit has been hit. 693 */ 694 if (pp->pr_hardlimit_warning != NULL && 695 ratecheck(&pp->pr_hardlimit_warning_last, 696 &pp->pr_hardlimit_ratecap)) 697 log(LOG_ERR, "%s\n", pp->pr_hardlimit_warning); 698 699 pp->pr_nfail++; 700 701 pr_leave(pp); 702 simple_unlock(&pp->pr_slock); 703 return (NULL); 704 } 705 706 /* 707 * The convention we use is that if `curpage' is not NULL, then 708 * it points at a non-empty bucket. In particular, `curpage' 709 * never points at a page header which has PR_PHINPAGE set and 710 * has no items in its bucket. 711 */ 712 if ((ph = pp->pr_curpage) == NULL) { 713 #ifdef DIAGNOSTIC 714 if (pp->pr_nitems != 0) { 715 simple_unlock(&pp->pr_slock); 716 printf("pool_get: %s: curpage NULL, nitems %u\n", 717 pp->pr_wchan, pp->pr_nitems); 718 panic("pool_get: nitems inconsistent\n"); 719 } 720 #endif 721 722 /* 723 * Call the back-end page allocator for more memory. 724 * Release the pool lock, as the back-end page allocator 725 * may block. 726 */ 727 pr_leave(pp); 728 simple_unlock(&pp->pr_slock); 729 v = pool_allocator_alloc(pp, flags); 730 if (__predict_true(v != NULL)) 731 ph = pool_alloc_item_header(pp, v, flags); 732 simple_lock(&pp->pr_slock); 733 pr_enter(pp, file, line); 734 735 if (__predict_false(v == NULL || ph == NULL)) { 736 if (v != NULL) 737 pool_allocator_free(pp, v); 738 739 /* 740 * We were unable to allocate a page or item 741 * header, but we released the lock during 742 * allocation, so perhaps items were freed 743 * back to the pool. Check for this case. 744 */ 745 if (pp->pr_curpage != NULL) 746 goto startover; 747 748 if ((flags & PR_WAITOK) == 0) { 749 pp->pr_nfail++; 750 pr_leave(pp); 751 simple_unlock(&pp->pr_slock); 752 return (NULL); 753 } 754 755 /* 756 * Wait for items to be returned to this pool. 757 * 758 * XXX: maybe we should wake up once a second and 759 * try again? 760 */ 761 pp->pr_flags |= PR_WANTED; 762 /* PA_WANTED is already set on the allocator. */ 763 pr_leave(pp); 764 ltsleep(pp, PSWP, pp->pr_wchan, 0, &pp->pr_slock); 765 pr_enter(pp, file, line); 766 goto startover; 767 } 768 769 /* We have more memory; add it to the pool */ 770 pool_prime_page(pp, v, ph); 771 pp->pr_npagealloc++; 772 773 /* Start the allocation process over. */ 774 goto startover; 775 } 776 777 if (__predict_false((v = pi = TAILQ_FIRST(&ph->ph_itemlist)) == NULL)) { 778 pr_leave(pp); 779 simple_unlock(&pp->pr_slock); 780 panic("pool_get: %s: page empty", pp->pr_wchan); 781 } 782 #ifdef DIAGNOSTIC 783 if (__predict_false(pp->pr_nitems == 0)) { 784 pr_leave(pp); 785 simple_unlock(&pp->pr_slock); 786 printf("pool_get: %s: items on itemlist, nitems %u\n", 787 pp->pr_wchan, pp->pr_nitems); 788 panic("pool_get: nitems inconsistent\n"); 789 } 790 #endif 791 792 #ifdef POOL_DIAGNOSTIC 793 pr_log(pp, v, PRLOG_GET, file, line); 794 #endif 795 796 #ifdef DIAGNOSTIC 797 if (__predict_false(pi->pi_magic != PI_MAGIC)) { 798 pr_printlog(pp, pi, printf); 799 panic("pool_get(%s): free list modified: magic=%x; page %p;" 800 " item addr %p\n", 801 pp->pr_wchan, pi->pi_magic, ph->ph_page, pi); 802 } 803 #endif 804 805 /* 806 * Remove from item list. 807 */ 808 TAILQ_REMOVE(&ph->ph_itemlist, pi, pi_list); 809 pp->pr_nitems--; 810 pp->pr_nout++; 811 if (ph->ph_nmissing == 0) { 812 #ifdef DIAGNOSTIC 813 if (__predict_false(pp->pr_nidle == 0)) 814 panic("pool_get: nidle inconsistent"); 815 #endif 816 pp->pr_nidle--; 817 } 818 ph->ph_nmissing++; 819 if (TAILQ_FIRST(&ph->ph_itemlist) == NULL) { 820 #ifdef DIAGNOSTIC 821 if (__predict_false(ph->ph_nmissing != pp->pr_itemsperpage)) { 822 pr_leave(pp); 823 simple_unlock(&pp->pr_slock); 824 panic("pool_get: %s: nmissing inconsistent", 825 pp->pr_wchan); 826 } 827 #endif 828 /* 829 * Find a new non-empty page header, if any. 830 * Start search from the page head, to increase 831 * the chance for "high water" pages to be freed. 832 * 833 * Migrate empty pages to the end of the list. This 834 * will speed the update of curpage as pages become 835 * idle. Empty pages intermingled with idle pages 836 * is no big deal. As soon as a page becomes un-empty, 837 * it will move back to the head of the list. 838 */ 839 TAILQ_REMOVE(&pp->pr_pagelist, ph, ph_pagelist); 840 TAILQ_INSERT_TAIL(&pp->pr_pagelist, ph, ph_pagelist); 841 TAILQ_FOREACH(ph, &pp->pr_pagelist, ph_pagelist) 842 if (TAILQ_FIRST(&ph->ph_itemlist) != NULL) 843 break; 844 845 pp->pr_curpage = ph; 846 } 847 848 pp->pr_nget++; 849 850 /* 851 * If we have a low water mark and we are now below that low 852 * water mark, add more items to the pool. 853 */ 854 if (POOL_NEEDS_CATCHUP(pp) && pool_catchup(pp) != 0) { 855 /* 856 * XXX: Should we log a warning? Should we set up a timeout 857 * to try again in a second or so? The latter could break 858 * a caller's assumptions about interrupt protection, etc. 859 */ 860 } 861 862 pr_leave(pp); 863 simple_unlock(&pp->pr_slock); 864 return (v); 865 } 866 867 /* 868 * Internal version of pool_put(). Pool is already locked/entered. 869 */ 870 static void 871 pool_do_put(struct pool *pp, void *v) 872 { 873 struct pool_item *pi = v; 874 struct pool_item_header *ph; 875 caddr_t page; 876 int s; 877 878 LOCK_ASSERT(simple_lock_held(&pp->pr_slock)); 879 880 page = (caddr_t)((u_long)v & pp->pr_alloc->pa_pagemask); 881 882 #ifdef DIAGNOSTIC 883 if (__predict_false(pp->pr_nout == 0)) { 884 printf("pool %s: putting with none out\n", 885 pp->pr_wchan); 886 panic("pool_put"); 887 } 888 #endif 889 890 if (__predict_false((ph = pr_find_pagehead(pp, page)) == NULL)) { 891 pr_printlog(pp, NULL, printf); 892 panic("pool_put: %s: page header missing", pp->pr_wchan); 893 } 894 895 #ifdef LOCKDEBUG 896 /* 897 * Check if we're freeing a locked simple lock. 898 */ 899 simple_lock_freecheck((caddr_t)pi, ((caddr_t)pi) + pp->pr_size); 900 #endif 901 902 /* 903 * Return to item list. 904 */ 905 #ifdef DIAGNOSTIC 906 pi->pi_magic = PI_MAGIC; 907 #endif 908 #ifdef DEBUG 909 { 910 int i, *ip = v; 911 912 for (i = 0; i < pp->pr_size / sizeof(int); i++) { 913 *ip++ = PI_MAGIC; 914 } 915 } 916 #endif 917 918 TAILQ_INSERT_HEAD(&ph->ph_itemlist, pi, pi_list); 919 ph->ph_nmissing--; 920 pp->pr_nput++; 921 pp->pr_nitems++; 922 pp->pr_nout--; 923 924 /* Cancel "pool empty" condition if it exists */ 925 if (pp->pr_curpage == NULL) 926 pp->pr_curpage = ph; 927 928 if (pp->pr_flags & PR_WANTED) { 929 pp->pr_flags &= ~PR_WANTED; 930 if (ph->ph_nmissing == 0) 931 pp->pr_nidle++; 932 wakeup((caddr_t)pp); 933 return; 934 } 935 936 /* 937 * If this page is now complete, do one of two things: 938 * 939 * (1) If we have more pages than the page high water 940 * mark, free the page back to the system. 941 * 942 * (2) Move it to the end of the page list, so that 943 * we minimize our chances of fragmenting the 944 * pool. Idle pages migrate to the end (along with 945 * completely empty pages, so that we find un-empty 946 * pages more quickly when we update curpage) of the 947 * list so they can be more easily swept up by 948 * the pagedaemon when pages are scarce. 949 */ 950 if (ph->ph_nmissing == 0) { 951 pp->pr_nidle++; 952 if (pp->pr_npages > pp->pr_maxpages || 953 (pp->pr_alloc->pa_flags & PA_WANT) != 0) { 954 pr_rmpage(pp, ph, NULL); 955 } else { 956 TAILQ_REMOVE(&pp->pr_pagelist, ph, ph_pagelist); 957 TAILQ_INSERT_TAIL(&pp->pr_pagelist, ph, ph_pagelist); 958 959 /* 960 * Update the timestamp on the page. A page must 961 * be idle for some period of time before it can 962 * be reclaimed by the pagedaemon. This minimizes 963 * ping-pong'ing for memory. 964 */ 965 s = splclock(); 966 ph->ph_time = mono_time; 967 splx(s); 968 969 /* 970 * Update the current page pointer. Just look for 971 * the first page with any free items. 972 * 973 * XXX: Maybe we want an option to look for the 974 * page with the fewest available items, to minimize 975 * fragmentation? 976 */ 977 TAILQ_FOREACH(ph, &pp->pr_pagelist, ph_pagelist) 978 if (TAILQ_FIRST(&ph->ph_itemlist) != NULL) 979 break; 980 981 pp->pr_curpage = ph; 982 } 983 } 984 /* 985 * If the page has just become un-empty, move it to the head of 986 * the list, and make it the current page. The next allocation 987 * will get the item from this page, instead of further fragmenting 988 * the pool. 989 */ 990 else if (ph->ph_nmissing == (pp->pr_itemsperpage - 1)) { 991 TAILQ_REMOVE(&pp->pr_pagelist, ph, ph_pagelist); 992 TAILQ_INSERT_HEAD(&pp->pr_pagelist, ph, ph_pagelist); 993 pp->pr_curpage = ph; 994 } 995 } 996 997 /* 998 * Return resource to the pool; must be called at appropriate spl level 999 */ 1000 #ifdef POOL_DIAGNOSTIC 1001 void 1002 _pool_put(struct pool *pp, void *v, const char *file, long line) 1003 { 1004 1005 simple_lock(&pp->pr_slock); 1006 pr_enter(pp, file, line); 1007 1008 pr_log(pp, v, PRLOG_PUT, file, line); 1009 1010 pool_do_put(pp, v); 1011 1012 pr_leave(pp); 1013 simple_unlock(&pp->pr_slock); 1014 } 1015 #undef pool_put 1016 #endif /* POOL_DIAGNOSTIC */ 1017 1018 void 1019 pool_put(struct pool *pp, void *v) 1020 { 1021 1022 simple_lock(&pp->pr_slock); 1023 1024 pool_do_put(pp, v); 1025 1026 simple_unlock(&pp->pr_slock); 1027 } 1028 1029 #ifdef POOL_DIAGNOSTIC 1030 #define pool_put(h, v) _pool_put((h), (v), __FILE__, __LINE__) 1031 #endif 1032 1033 /* 1034 * Add N items to the pool. 1035 */ 1036 int 1037 pool_prime(struct pool *pp, int n) 1038 { 1039 struct pool_item_header *ph; 1040 caddr_t cp; 1041 int newpages; 1042 1043 simple_lock(&pp->pr_slock); 1044 1045 newpages = roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 1046 1047 while (newpages-- > 0) { 1048 simple_unlock(&pp->pr_slock); 1049 cp = pool_allocator_alloc(pp, PR_NOWAIT); 1050 if (__predict_true(cp != NULL)) 1051 ph = pool_alloc_item_header(pp, cp, PR_NOWAIT); 1052 simple_lock(&pp->pr_slock); 1053 1054 if (__predict_false(cp == NULL || ph == NULL)) { 1055 if (cp != NULL) 1056 pool_allocator_free(pp, cp); 1057 break; 1058 } 1059 1060 pool_prime_page(pp, cp, ph); 1061 pp->pr_npagealloc++; 1062 pp->pr_minpages++; 1063 } 1064 1065 if (pp->pr_minpages >= pp->pr_maxpages) 1066 pp->pr_maxpages = pp->pr_minpages + 1; /* XXX */ 1067 1068 simple_unlock(&pp->pr_slock); 1069 return (0); 1070 } 1071 1072 /* 1073 * Add a page worth of items to the pool. 1074 * 1075 * Note, we must be called with the pool descriptor LOCKED. 1076 */ 1077 static void 1078 pool_prime_page(struct pool *pp, caddr_t storage, struct pool_item_header *ph) 1079 { 1080 struct pool_item *pi; 1081 caddr_t cp = storage; 1082 unsigned int align = pp->pr_align; 1083 unsigned int ioff = pp->pr_itemoffset; 1084 int n; 1085 1086 #ifdef DIAGNOSTIC 1087 if (((u_long)cp & (pp->pr_alloc->pa_pagesz - 1)) != 0) 1088 panic("pool_prime_page: %s: unaligned page", pp->pr_wchan); 1089 #endif 1090 1091 if ((pp->pr_roflags & PR_PHINPAGE) == 0) 1092 LIST_INSERT_HEAD(&pp->pr_hashtab[PR_HASH_INDEX(pp, cp)], 1093 ph, ph_hashlist); 1094 1095 /* 1096 * Insert page header. 1097 */ 1098 TAILQ_INSERT_HEAD(&pp->pr_pagelist, ph, ph_pagelist); 1099 TAILQ_INIT(&ph->ph_itemlist); 1100 ph->ph_page = storage; 1101 ph->ph_nmissing = 0; 1102 memset(&ph->ph_time, 0, sizeof(ph->ph_time)); 1103 1104 pp->pr_nidle++; 1105 1106 /* 1107 * Color this page. 1108 */ 1109 cp = (caddr_t)(cp + pp->pr_curcolor); 1110 if ((pp->pr_curcolor += align) > pp->pr_maxcolor) 1111 pp->pr_curcolor = 0; 1112 1113 /* 1114 * Adjust storage to apply aligment to `pr_itemoffset' in each item. 1115 */ 1116 if (ioff != 0) 1117 cp = (caddr_t)(cp + (align - ioff)); 1118 1119 /* 1120 * Insert remaining chunks on the bucket list. 1121 */ 1122 n = pp->pr_itemsperpage; 1123 pp->pr_nitems += n; 1124 1125 while (n--) { 1126 pi = (struct pool_item *)cp; 1127 1128 /* Insert on page list */ 1129 TAILQ_INSERT_TAIL(&ph->ph_itemlist, pi, pi_list); 1130 #ifdef DIAGNOSTIC 1131 pi->pi_magic = PI_MAGIC; 1132 #endif 1133 cp = (caddr_t)(cp + pp->pr_size); 1134 } 1135 1136 /* 1137 * If the pool was depleted, point at the new page. 1138 */ 1139 if (pp->pr_curpage == NULL) 1140 pp->pr_curpage = ph; 1141 1142 if (++pp->pr_npages > pp->pr_hiwat) 1143 pp->pr_hiwat = pp->pr_npages; 1144 } 1145 1146 /* 1147 * Used by pool_get() when nitems drops below the low water mark. This 1148 * is used to catch up nitmes with the low water mark. 1149 * 1150 * Note 1, we never wait for memory here, we let the caller decide what to do. 1151 * 1152 * Note 2, we must be called with the pool already locked, and we return 1153 * with it locked. 1154 */ 1155 static int 1156 pool_catchup(struct pool *pp) 1157 { 1158 struct pool_item_header *ph; 1159 caddr_t cp; 1160 int error = 0; 1161 1162 while (POOL_NEEDS_CATCHUP(pp)) { 1163 /* 1164 * Call the page back-end allocator for more memory. 1165 * 1166 * XXX: We never wait, so should we bother unlocking 1167 * the pool descriptor? 1168 */ 1169 simple_unlock(&pp->pr_slock); 1170 cp = pool_allocator_alloc(pp, PR_NOWAIT); 1171 if (__predict_true(cp != NULL)) 1172 ph = pool_alloc_item_header(pp, cp, PR_NOWAIT); 1173 simple_lock(&pp->pr_slock); 1174 if (__predict_false(cp == NULL || ph == NULL)) { 1175 if (cp != NULL) 1176 pool_allocator_free(pp, cp); 1177 error = ENOMEM; 1178 break; 1179 } 1180 pool_prime_page(pp, cp, ph); 1181 pp->pr_npagealloc++; 1182 } 1183 1184 return (error); 1185 } 1186 1187 void 1188 pool_setlowat(struct pool *pp, int n) 1189 { 1190 1191 simple_lock(&pp->pr_slock); 1192 1193 pp->pr_minitems = n; 1194 pp->pr_minpages = (n == 0) 1195 ? 0 1196 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 1197 1198 /* Make sure we're caught up with the newly-set low water mark. */ 1199 if (POOL_NEEDS_CATCHUP(pp) && pool_catchup(pp) != 0) { 1200 /* 1201 * XXX: Should we log a warning? Should we set up a timeout 1202 * to try again in a second or so? The latter could break 1203 * a caller's assumptions about interrupt protection, etc. 1204 */ 1205 } 1206 1207 simple_unlock(&pp->pr_slock); 1208 } 1209 1210 void 1211 pool_sethiwat(struct pool *pp, int n) 1212 { 1213 1214 simple_lock(&pp->pr_slock); 1215 1216 pp->pr_maxpages = (n == 0) 1217 ? 0 1218 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 1219 1220 simple_unlock(&pp->pr_slock); 1221 } 1222 1223 void 1224 pool_sethardlimit(struct pool *pp, int n, const char *warnmess, int ratecap) 1225 { 1226 1227 simple_lock(&pp->pr_slock); 1228 1229 pp->pr_hardlimit = n; 1230 pp->pr_hardlimit_warning = warnmess; 1231 pp->pr_hardlimit_ratecap.tv_sec = ratecap; 1232 pp->pr_hardlimit_warning_last.tv_sec = 0; 1233 pp->pr_hardlimit_warning_last.tv_usec = 0; 1234 1235 /* 1236 * In-line version of pool_sethiwat(), because we don't want to 1237 * release the lock. 1238 */ 1239 pp->pr_maxpages = (n == 0) 1240 ? 0 1241 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 1242 1243 simple_unlock(&pp->pr_slock); 1244 } 1245 1246 /* 1247 * Release all complete pages that have not been used recently. 1248 */ 1249 int 1250 #ifdef POOL_DIAGNOSTIC 1251 _pool_reclaim(struct pool *pp, const char *file, long line) 1252 #else 1253 pool_reclaim(struct pool *pp) 1254 #endif 1255 { 1256 struct pool_item_header *ph, *phnext; 1257 struct pool_cache *pc; 1258 struct timeval curtime; 1259 struct pool_pagelist pq; 1260 int s; 1261 1262 if (pp->pr_drain_hook != NULL) { 1263 /* 1264 * The drain hook must be called with the pool unlocked. 1265 */ 1266 (*pp->pr_drain_hook)(pp->pr_drain_hook_arg, PR_NOWAIT); 1267 } 1268 1269 if (simple_lock_try(&pp->pr_slock) == 0) 1270 return (0); 1271 pr_enter(pp, file, line); 1272 1273 TAILQ_INIT(&pq); 1274 1275 /* 1276 * Reclaim items from the pool's caches. 1277 */ 1278 TAILQ_FOREACH(pc, &pp->pr_cachelist, pc_poollist) 1279 pool_cache_reclaim(pc); 1280 1281 s = splclock(); 1282 curtime = mono_time; 1283 splx(s); 1284 1285 for (ph = TAILQ_FIRST(&pp->pr_pagelist); ph != NULL; ph = phnext) { 1286 phnext = TAILQ_NEXT(ph, ph_pagelist); 1287 1288 /* Check our minimum page claim */ 1289 if (pp->pr_npages <= pp->pr_minpages) 1290 break; 1291 1292 if (ph->ph_nmissing == 0) { 1293 struct timeval diff; 1294 timersub(&curtime, &ph->ph_time, &diff); 1295 if (diff.tv_sec < pool_inactive_time) 1296 continue; 1297 1298 /* 1299 * If freeing this page would put us below 1300 * the low water mark, stop now. 1301 */ 1302 if ((pp->pr_nitems - pp->pr_itemsperpage) < 1303 pp->pr_minitems) 1304 break; 1305 1306 pr_rmpage(pp, ph, &pq); 1307 } 1308 } 1309 1310 pr_leave(pp); 1311 simple_unlock(&pp->pr_slock); 1312 if (TAILQ_EMPTY(&pq)) 1313 return (0); 1314 1315 while ((ph = TAILQ_FIRST(&pq)) != NULL) { 1316 TAILQ_REMOVE(&pq, ph, ph_pagelist); 1317 pool_allocator_free(pp, ph->ph_page); 1318 if (pp->pr_roflags & PR_PHINPAGE) { 1319 continue; 1320 } 1321 LIST_REMOVE(ph, ph_hashlist); 1322 s = splhigh(); 1323 pool_put(&phpool, ph); 1324 splx(s); 1325 } 1326 1327 return (1); 1328 } 1329 1330 /* 1331 * Drain pools, one at a time. 1332 * 1333 * Note, we must never be called from an interrupt context. 1334 */ 1335 void 1336 pool_drain(void *arg) 1337 { 1338 struct pool *pp; 1339 int s; 1340 1341 pp = NULL; 1342 s = splvm(); 1343 simple_lock(&pool_head_slock); 1344 if (drainpp == NULL) { 1345 drainpp = TAILQ_FIRST(&pool_head); 1346 } 1347 if (drainpp) { 1348 pp = drainpp; 1349 drainpp = TAILQ_NEXT(pp, pr_poollist); 1350 } 1351 simple_unlock(&pool_head_slock); 1352 pool_reclaim(pp); 1353 splx(s); 1354 } 1355 1356 /* 1357 * Diagnostic helpers. 1358 */ 1359 void 1360 pool_print(struct pool *pp, const char *modif) 1361 { 1362 int s; 1363 1364 s = splvm(); 1365 if (simple_lock_try(&pp->pr_slock) == 0) { 1366 printf("pool %s is locked; try again later\n", 1367 pp->pr_wchan); 1368 splx(s); 1369 return; 1370 } 1371 pool_print1(pp, modif, printf); 1372 simple_unlock(&pp->pr_slock); 1373 splx(s); 1374 } 1375 1376 void 1377 pool_printit(struct pool *pp, const char *modif, void (*pr)(const char *, ...)) 1378 { 1379 int didlock = 0; 1380 1381 if (pp == NULL) { 1382 (*pr)("Must specify a pool to print.\n"); 1383 return; 1384 } 1385 1386 /* 1387 * Called from DDB; interrupts should be blocked, and all 1388 * other processors should be paused. We can skip locking 1389 * the pool in this case. 1390 * 1391 * We do a simple_lock_try() just to print the lock 1392 * status, however. 1393 */ 1394 1395 if (simple_lock_try(&pp->pr_slock) == 0) 1396 (*pr)("WARNING: pool %s is locked\n", pp->pr_wchan); 1397 else 1398 didlock = 1; 1399 1400 pool_print1(pp, modif, pr); 1401 1402 if (didlock) 1403 simple_unlock(&pp->pr_slock); 1404 } 1405 1406 static void 1407 pool_print1(struct pool *pp, const char *modif, void (*pr)(const char *, ...)) 1408 { 1409 struct pool_item_header *ph; 1410 struct pool_cache *pc; 1411 struct pool_cache_group *pcg; 1412 #ifdef DIAGNOSTIC 1413 struct pool_item *pi; 1414 #endif 1415 int i, print_log = 0, print_pagelist = 0, print_cache = 0; 1416 char c; 1417 1418 while ((c = *modif++) != '\0') { 1419 if (c == 'l') 1420 print_log = 1; 1421 if (c == 'p') 1422 print_pagelist = 1; 1423 if (c == 'c') 1424 print_cache = 1; 1425 modif++; 1426 } 1427 1428 (*pr)("POOL %s: size %u, align %u, ioff %u, roflags 0x%08x\n", 1429 pp->pr_wchan, pp->pr_size, pp->pr_align, pp->pr_itemoffset, 1430 pp->pr_roflags); 1431 (*pr)("\talloc %p\n", pp->pr_alloc); 1432 (*pr)("\tminitems %u, minpages %u, maxpages %u, npages %u\n", 1433 pp->pr_minitems, pp->pr_minpages, pp->pr_maxpages, pp->pr_npages); 1434 (*pr)("\titemsperpage %u, nitems %u, nout %u, hardlimit %u\n", 1435 pp->pr_itemsperpage, pp->pr_nitems, pp->pr_nout, pp->pr_hardlimit); 1436 1437 (*pr)("\n\tnget %lu, nfail %lu, nput %lu\n", 1438 pp->pr_nget, pp->pr_nfail, pp->pr_nput); 1439 (*pr)("\tnpagealloc %lu, npagefree %lu, hiwat %u, nidle %lu\n", 1440 pp->pr_npagealloc, pp->pr_npagefree, pp->pr_hiwat, pp->pr_nidle); 1441 1442 if (print_pagelist == 0) 1443 goto skip_pagelist; 1444 1445 if ((ph = TAILQ_FIRST(&pp->pr_pagelist)) != NULL) 1446 (*pr)("\n\tpage list:\n"); 1447 for (; ph != NULL; ph = TAILQ_NEXT(ph, ph_pagelist)) { 1448 (*pr)("\t\tpage %p, nmissing %d, time %lu,%lu\n", 1449 ph->ph_page, ph->ph_nmissing, 1450 (u_long)ph->ph_time.tv_sec, 1451 (u_long)ph->ph_time.tv_usec); 1452 #ifdef DIAGNOSTIC 1453 TAILQ_FOREACH(pi, &ph->ph_itemlist, pi_list) { 1454 if (pi->pi_magic != PI_MAGIC) { 1455 (*pr)("\t\t\titem %p, magic 0x%x\n", 1456 pi, pi->pi_magic); 1457 } 1458 } 1459 #endif 1460 } 1461 if (pp->pr_curpage == NULL) 1462 (*pr)("\tno current page\n"); 1463 else 1464 (*pr)("\tcurpage %p\n", pp->pr_curpage->ph_page); 1465 1466 skip_pagelist: 1467 1468 if (print_log == 0) 1469 goto skip_log; 1470 1471 (*pr)("\n"); 1472 if ((pp->pr_roflags & PR_LOGGING) == 0) 1473 (*pr)("\tno log\n"); 1474 else 1475 pr_printlog(pp, NULL, pr); 1476 1477 skip_log: 1478 1479 if (print_cache == 0) 1480 goto skip_cache; 1481 1482 TAILQ_FOREACH(pc, &pp->pr_cachelist, pc_poollist) { 1483 (*pr)("\tcache %p: allocfrom %p freeto %p\n", pc, 1484 pc->pc_allocfrom, pc->pc_freeto); 1485 (*pr)("\t hits %lu misses %lu ngroups %lu nitems %lu\n", 1486 pc->pc_hits, pc->pc_misses, pc->pc_ngroups, pc->pc_nitems); 1487 TAILQ_FOREACH(pcg, &pc->pc_grouplist, pcg_list) { 1488 (*pr)("\t\tgroup %p: avail %d\n", pcg, pcg->pcg_avail); 1489 for (i = 0; i < PCG_NOBJECTS; i++) 1490 (*pr)("\t\t\t%p\n", pcg->pcg_objects[i]); 1491 } 1492 } 1493 1494 skip_cache: 1495 1496 pr_enter_check(pp, pr); 1497 } 1498 1499 int 1500 pool_chk(struct pool *pp, const char *label) 1501 { 1502 struct pool_item_header *ph; 1503 int r = 0; 1504 1505 simple_lock(&pp->pr_slock); 1506 1507 TAILQ_FOREACH(ph, &pp->pr_pagelist, ph_pagelist) { 1508 struct pool_item *pi; 1509 int n; 1510 caddr_t page; 1511 1512 page = (caddr_t)((u_long)ph & pp->pr_alloc->pa_pagemask); 1513 if (page != ph->ph_page && 1514 (pp->pr_roflags & PR_PHINPAGE) != 0) { 1515 if (label != NULL) 1516 printf("%s: ", label); 1517 printf("pool(%p:%s): page inconsistency: page %p;" 1518 " at page head addr %p (p %p)\n", pp, 1519 pp->pr_wchan, ph->ph_page, 1520 ph, page); 1521 r++; 1522 goto out; 1523 } 1524 1525 for (pi = TAILQ_FIRST(&ph->ph_itemlist), n = 0; 1526 pi != NULL; 1527 pi = TAILQ_NEXT(pi,pi_list), n++) { 1528 1529 #ifdef DIAGNOSTIC 1530 if (pi->pi_magic != PI_MAGIC) { 1531 if (label != NULL) 1532 printf("%s: ", label); 1533 printf("pool(%s): free list modified: magic=%x;" 1534 " page %p; item ordinal %d;" 1535 " addr %p (p %p)\n", 1536 pp->pr_wchan, pi->pi_magic, ph->ph_page, 1537 n, pi, page); 1538 panic("pool"); 1539 } 1540 #endif 1541 page = 1542 (caddr_t)((u_long)pi & pp->pr_alloc->pa_pagemask); 1543 if (page == ph->ph_page) 1544 continue; 1545 1546 if (label != NULL) 1547 printf("%s: ", label); 1548 printf("pool(%p:%s): page inconsistency: page %p;" 1549 " item ordinal %d; addr %p (p %p)\n", pp, 1550 pp->pr_wchan, ph->ph_page, 1551 n, pi, page); 1552 r++; 1553 goto out; 1554 } 1555 } 1556 out: 1557 simple_unlock(&pp->pr_slock); 1558 return (r); 1559 } 1560 1561 /* 1562 * pool_cache_init: 1563 * 1564 * Initialize a pool cache. 1565 * 1566 * NOTE: If the pool must be protected from interrupts, we expect 1567 * to be called at the appropriate interrupt priority level. 1568 */ 1569 void 1570 pool_cache_init(struct pool_cache *pc, struct pool *pp, 1571 int (*ctor)(void *, void *, int), 1572 void (*dtor)(void *, void *), 1573 void *arg) 1574 { 1575 1576 TAILQ_INIT(&pc->pc_grouplist); 1577 simple_lock_init(&pc->pc_slock); 1578 1579 pc->pc_allocfrom = NULL; 1580 pc->pc_freeto = NULL; 1581 pc->pc_pool = pp; 1582 1583 pc->pc_ctor = ctor; 1584 pc->pc_dtor = dtor; 1585 pc->pc_arg = arg; 1586 1587 pc->pc_hits = 0; 1588 pc->pc_misses = 0; 1589 1590 pc->pc_ngroups = 0; 1591 1592 pc->pc_nitems = 0; 1593 1594 simple_lock(&pp->pr_slock); 1595 TAILQ_INSERT_TAIL(&pp->pr_cachelist, pc, pc_poollist); 1596 simple_unlock(&pp->pr_slock); 1597 } 1598 1599 /* 1600 * pool_cache_destroy: 1601 * 1602 * Destroy a pool cache. 1603 */ 1604 void 1605 pool_cache_destroy(struct pool_cache *pc) 1606 { 1607 struct pool *pp = pc->pc_pool; 1608 1609 /* First, invalidate the entire cache. */ 1610 pool_cache_invalidate(pc); 1611 1612 /* ...and remove it from the pool's cache list. */ 1613 simple_lock(&pp->pr_slock); 1614 TAILQ_REMOVE(&pp->pr_cachelist, pc, pc_poollist); 1615 simple_unlock(&pp->pr_slock); 1616 } 1617 1618 static __inline void * 1619 pcg_get(struct pool_cache_group *pcg) 1620 { 1621 void *object; 1622 u_int idx; 1623 1624 KASSERT(pcg->pcg_avail <= PCG_NOBJECTS); 1625 KASSERT(pcg->pcg_avail != 0); 1626 idx = --pcg->pcg_avail; 1627 1628 KASSERT(pcg->pcg_objects[idx] != NULL); 1629 object = pcg->pcg_objects[idx]; 1630 pcg->pcg_objects[idx] = NULL; 1631 1632 return (object); 1633 } 1634 1635 static __inline void 1636 pcg_put(struct pool_cache_group *pcg, void *object) 1637 { 1638 u_int idx; 1639 1640 KASSERT(pcg->pcg_avail < PCG_NOBJECTS); 1641 idx = pcg->pcg_avail++; 1642 1643 KASSERT(pcg->pcg_objects[idx] == NULL); 1644 pcg->pcg_objects[idx] = object; 1645 } 1646 1647 /* 1648 * pool_cache_get: 1649 * 1650 * Get an object from a pool cache. 1651 */ 1652 void * 1653 pool_cache_get(struct pool_cache *pc, int flags) 1654 { 1655 struct pool_cache_group *pcg; 1656 void *object; 1657 1658 #ifdef LOCKDEBUG 1659 if (flags & PR_WAITOK) 1660 simple_lock_only_held(NULL, "pool_cache_get(PR_WAITOK)"); 1661 #endif 1662 1663 simple_lock(&pc->pc_slock); 1664 1665 if ((pcg = pc->pc_allocfrom) == NULL) { 1666 TAILQ_FOREACH(pcg, &pc->pc_grouplist, pcg_list) { 1667 if (pcg->pcg_avail != 0) { 1668 pc->pc_allocfrom = pcg; 1669 goto have_group; 1670 } 1671 } 1672 1673 /* 1674 * No groups with any available objects. Allocate 1675 * a new object, construct it, and return it to 1676 * the caller. We will allocate a group, if necessary, 1677 * when the object is freed back to the cache. 1678 */ 1679 pc->pc_misses++; 1680 simple_unlock(&pc->pc_slock); 1681 object = pool_get(pc->pc_pool, flags); 1682 if (object != NULL && pc->pc_ctor != NULL) { 1683 if ((*pc->pc_ctor)(pc->pc_arg, object, flags) != 0) { 1684 pool_put(pc->pc_pool, object); 1685 return (NULL); 1686 } 1687 } 1688 return (object); 1689 } 1690 1691 have_group: 1692 pc->pc_hits++; 1693 pc->pc_nitems--; 1694 object = pcg_get(pcg); 1695 1696 if (pcg->pcg_avail == 0) 1697 pc->pc_allocfrom = NULL; 1698 1699 simple_unlock(&pc->pc_slock); 1700 1701 return (object); 1702 } 1703 1704 /* 1705 * pool_cache_put: 1706 * 1707 * Put an object back to the pool cache. 1708 */ 1709 void 1710 pool_cache_put(struct pool_cache *pc, void *object) 1711 { 1712 struct pool_cache_group *pcg; 1713 int s; 1714 1715 simple_lock(&pc->pc_slock); 1716 1717 if ((pcg = pc->pc_freeto) == NULL) { 1718 TAILQ_FOREACH(pcg, &pc->pc_grouplist, pcg_list) { 1719 if (pcg->pcg_avail != PCG_NOBJECTS) { 1720 pc->pc_freeto = pcg; 1721 goto have_group; 1722 } 1723 } 1724 1725 /* 1726 * No empty groups to free the object to. Attempt to 1727 * allocate one. 1728 */ 1729 simple_unlock(&pc->pc_slock); 1730 s = splvm(); 1731 pcg = pool_get(&pcgpool, PR_NOWAIT); 1732 splx(s); 1733 if (pcg != NULL) { 1734 memset(pcg, 0, sizeof(*pcg)); 1735 simple_lock(&pc->pc_slock); 1736 pc->pc_ngroups++; 1737 TAILQ_INSERT_TAIL(&pc->pc_grouplist, pcg, pcg_list); 1738 if (pc->pc_freeto == NULL) 1739 pc->pc_freeto = pcg; 1740 goto have_group; 1741 } 1742 1743 /* 1744 * Unable to allocate a cache group; destruct the object 1745 * and free it back to the pool. 1746 */ 1747 pool_cache_destruct_object(pc, object); 1748 return; 1749 } 1750 1751 have_group: 1752 pc->pc_nitems++; 1753 pcg_put(pcg, object); 1754 1755 if (pcg->pcg_avail == PCG_NOBJECTS) 1756 pc->pc_freeto = NULL; 1757 1758 simple_unlock(&pc->pc_slock); 1759 } 1760 1761 /* 1762 * pool_cache_destruct_object: 1763 * 1764 * Force destruction of an object and its release back into 1765 * the pool. 1766 */ 1767 void 1768 pool_cache_destruct_object(struct pool_cache *pc, void *object) 1769 { 1770 1771 if (pc->pc_dtor != NULL) 1772 (*pc->pc_dtor)(pc->pc_arg, object); 1773 pool_put(pc->pc_pool, object); 1774 } 1775 1776 /* 1777 * pool_cache_do_invalidate: 1778 * 1779 * This internal function implements pool_cache_invalidate() and 1780 * pool_cache_reclaim(). 1781 */ 1782 static void 1783 pool_cache_do_invalidate(struct pool_cache *pc, int free_groups, 1784 void (*putit)(struct pool *, void *)) 1785 { 1786 struct pool_cache_group *pcg, *npcg; 1787 void *object; 1788 int s; 1789 1790 for (pcg = TAILQ_FIRST(&pc->pc_grouplist); pcg != NULL; 1791 pcg = npcg) { 1792 npcg = TAILQ_NEXT(pcg, pcg_list); 1793 while (pcg->pcg_avail != 0) { 1794 pc->pc_nitems--; 1795 object = pcg_get(pcg); 1796 if (pcg->pcg_avail == 0 && pc->pc_allocfrom == pcg) 1797 pc->pc_allocfrom = NULL; 1798 if (pc->pc_dtor != NULL) 1799 (*pc->pc_dtor)(pc->pc_arg, object); 1800 (*putit)(pc->pc_pool, object); 1801 } 1802 if (free_groups) { 1803 pc->pc_ngroups--; 1804 TAILQ_REMOVE(&pc->pc_grouplist, pcg, pcg_list); 1805 if (pc->pc_freeto == pcg) 1806 pc->pc_freeto = NULL; 1807 s = splvm(); 1808 pool_put(&pcgpool, pcg); 1809 splx(s); 1810 } 1811 } 1812 } 1813 1814 /* 1815 * pool_cache_invalidate: 1816 * 1817 * Invalidate a pool cache (destruct and release all of the 1818 * cached objects). 1819 */ 1820 void 1821 pool_cache_invalidate(struct pool_cache *pc) 1822 { 1823 1824 simple_lock(&pc->pc_slock); 1825 pool_cache_do_invalidate(pc, 0, pool_put); 1826 simple_unlock(&pc->pc_slock); 1827 } 1828 1829 /* 1830 * pool_cache_reclaim: 1831 * 1832 * Reclaim a pool cache for pool_reclaim(). 1833 */ 1834 static void 1835 pool_cache_reclaim(struct pool_cache *pc) 1836 { 1837 1838 simple_lock(&pc->pc_slock); 1839 pool_cache_do_invalidate(pc, 1, pool_do_put); 1840 simple_unlock(&pc->pc_slock); 1841 } 1842 1843 /* 1844 * Pool backend allocators. 1845 * 1846 * Each pool has a backend allocator that handles allocation, deallocation, 1847 * and any additional draining that might be needed. 1848 * 1849 * We provide two standard allocators: 1850 * 1851 * pool_allocator_kmem - the default when no allocator is specified 1852 * 1853 * pool_allocator_nointr - used for pools that will not be accessed 1854 * in interrupt context. 1855 */ 1856 void *pool_page_alloc(struct pool *, int); 1857 void pool_page_free(struct pool *, void *); 1858 1859 struct pool_allocator pool_allocator_kmem = { 1860 pool_page_alloc, pool_page_free, 0, 1861 }; 1862 1863 void *pool_page_alloc_nointr(struct pool *, int); 1864 void pool_page_free_nointr(struct pool *, void *); 1865 1866 struct pool_allocator pool_allocator_nointr = { 1867 pool_page_alloc_nointr, pool_page_free_nointr, 0, 1868 }; 1869 1870 #ifdef POOL_SUBPAGE 1871 void *pool_subpage_alloc(struct pool *, int); 1872 void pool_subpage_free(struct pool *, void *); 1873 1874 struct pool_allocator pool_allocator_kmem_subpage = { 1875 pool_subpage_alloc, pool_subpage_free, 0, 1876 }; 1877 #endif /* POOL_SUBPAGE */ 1878 1879 /* 1880 * We have at least three different resources for the same allocation and 1881 * each resource can be depleted. First, we have the ready elements in the 1882 * pool. Then we have the resource (typically a vm_map) for this allocator. 1883 * Finally, we have physical memory. Waiting for any of these can be 1884 * unnecessary when any other is freed, but the kernel doesn't support 1885 * sleeping on multiple wait channels, so we have to employ another strategy. 1886 * 1887 * The caller sleeps on the pool (so that it can be awakened when an item 1888 * is returned to the pool), but we set PA_WANT on the allocator. When a 1889 * page is returned to the allocator and PA_WANT is set, pool_allocator_free 1890 * will wake up all sleeping pools belonging to this allocator. 1891 * 1892 * XXX Thundering herd. 1893 */ 1894 void * 1895 pool_allocator_alloc(struct pool *org, int flags) 1896 { 1897 struct pool_allocator *pa = org->pr_alloc; 1898 struct pool *pp, *start; 1899 int s, freed; 1900 void *res; 1901 1902 do { 1903 if ((res = (*pa->pa_alloc)(org, flags)) != NULL) 1904 return (res); 1905 if ((flags & PR_WAITOK) == 0) { 1906 /* 1907 * We only run the drain hookhere if PR_NOWAIT. 1908 * In other cases, the hook will be run in 1909 * pool_reclaim(). 1910 */ 1911 if (org->pr_drain_hook != NULL) { 1912 (*org->pr_drain_hook)(org->pr_drain_hook_arg, 1913 flags); 1914 if ((res = (*pa->pa_alloc)(org, flags)) != NULL) 1915 return (res); 1916 } 1917 break; 1918 } 1919 1920 /* 1921 * Drain all pools, except "org", that use this 1922 * allocator. We do this to reclaim VA space. 1923 * pa_alloc is responsible for waiting for 1924 * physical memory. 1925 * 1926 * XXX We risk looping forever if start if someone 1927 * calls pool_destroy on "start". But there is no 1928 * other way to have potentially sleeping pool_reclaim, 1929 * non-sleeping locks on pool_allocator, and some 1930 * stirring of drained pools in the allocator. 1931 * 1932 * XXX Maybe we should use pool_head_slock for locking 1933 * the allocators? 1934 */ 1935 freed = 0; 1936 1937 s = splvm(); 1938 simple_lock(&pa->pa_slock); 1939 pp = start = TAILQ_FIRST(&pa->pa_list); 1940 do { 1941 TAILQ_REMOVE(&pa->pa_list, pp, pr_alloc_list); 1942 TAILQ_INSERT_TAIL(&pa->pa_list, pp, pr_alloc_list); 1943 if (pp == org) 1944 continue; 1945 simple_unlock(&pa->pa_slock); 1946 freed = pool_reclaim(pp); 1947 simple_lock(&pa->pa_slock); 1948 } while ((pp = TAILQ_FIRST(&pa->pa_list)) != start && 1949 freed == 0); 1950 1951 if (freed == 0) { 1952 /* 1953 * We set PA_WANT here, the caller will most likely 1954 * sleep waiting for pages (if not, this won't hurt 1955 * that much), and there is no way to set this in 1956 * the caller without violating locking order. 1957 */ 1958 pa->pa_flags |= PA_WANT; 1959 } 1960 simple_unlock(&pa->pa_slock); 1961 splx(s); 1962 } while (freed); 1963 return (NULL); 1964 } 1965 1966 void 1967 pool_allocator_free(struct pool *pp, void *v) 1968 { 1969 struct pool_allocator *pa = pp->pr_alloc; 1970 int s; 1971 1972 (*pa->pa_free)(pp, v); 1973 1974 s = splvm(); 1975 simple_lock(&pa->pa_slock); 1976 if ((pa->pa_flags & PA_WANT) == 0) { 1977 simple_unlock(&pa->pa_slock); 1978 splx(s); 1979 return; 1980 } 1981 1982 TAILQ_FOREACH(pp, &pa->pa_list, pr_alloc_list) { 1983 simple_lock(&pp->pr_slock); 1984 if ((pp->pr_flags & PR_WANTED) != 0) { 1985 pp->pr_flags &= ~PR_WANTED; 1986 wakeup(pp); 1987 } 1988 simple_unlock(&pp->pr_slock); 1989 } 1990 pa->pa_flags &= ~PA_WANT; 1991 simple_unlock(&pa->pa_slock); 1992 splx(s); 1993 } 1994 1995 void * 1996 pool_page_alloc(struct pool *pp, int flags) 1997 { 1998 boolean_t waitok = (flags & PR_WAITOK) ? TRUE : FALSE; 1999 2000 return ((void *) uvm_km_alloc_poolpage(waitok)); 2001 } 2002 2003 void 2004 pool_page_free(struct pool *pp, void *v) 2005 { 2006 2007 uvm_km_free_poolpage((vaddr_t) v); 2008 } 2009 2010 #ifdef POOL_SUBPAGE 2011 /* Sub-page allocator, for machines with large hardware pages. */ 2012 void * 2013 pool_subpage_alloc(struct pool *pp, int flags) 2014 { 2015 2016 return (pool_get(&psppool, flags)); 2017 } 2018 2019 void 2020 pool_subpage_free(struct pool *pp, void *v) 2021 { 2022 2023 pool_put(&psppool, v); 2024 } 2025 2026 /* We don't provide a real nointr allocator. Maybe later. */ 2027 void * 2028 pool_page_alloc_nointr(struct pool *pp, int flags) 2029 { 2030 2031 return (pool_subpage_alloc(pp, flags)); 2032 } 2033 2034 void 2035 pool_page_free_nointr(struct pool *pp, void *v) 2036 { 2037 2038 pool_subpage_free(pp, v); 2039 } 2040 #else 2041 void * 2042 pool_page_alloc_nointr(struct pool *pp, int flags) 2043 { 2044 boolean_t waitok = (flags & PR_WAITOK) ? TRUE : FALSE; 2045 2046 return ((void *) uvm_km_alloc_poolpage1(kernel_map, 2047 uvm.kernel_object, waitok)); 2048 } 2049 2050 void 2051 pool_page_free_nointr(struct pool *pp, void *v) 2052 { 2053 2054 uvm_km_free_poolpage1(kernel_map, (vaddr_t) v); 2055 } 2056 #endif /* POOL_SUBPAGE */ 2057