1 /* $OpenBSD: subr_pool.c,v 1.114 2013/02/17 17:39:29 miod Exp $ */ 2 /* $NetBSD: subr_pool.c,v 1.61 2001/09/26 07:14:56 chs Exp $ */ 3 4 /*- 5 * Copyright (c) 1997, 1999, 2000 The NetBSD Foundation, Inc. 6 * All rights reserved. 7 * 8 * This code is derived from software contributed to The NetBSD Foundation 9 * by Paul Kranenburg; by Jason R. Thorpe of the Numerical Aerospace 10 * Simulation Facility, NASA Ames Research Center. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 23 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 24 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 25 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 * POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include <sys/param.h> 35 #include <sys/systm.h> 36 #include <sys/proc.h> 37 #include <sys/errno.h> 38 #include <sys/kernel.h> 39 #include <sys/malloc.h> 40 #include <sys/pool.h> 41 #include <sys/syslog.h> 42 #include <sys/sysctl.h> 43 44 #include <uvm/uvm.h> 45 #include <dev/rndvar.h> 46 47 /* 48 * Pool resource management utility. 49 * 50 * Memory is allocated in pages which are split into pieces according to 51 * the pool item size. Each page is kept on one of three lists in the 52 * pool structure: `pr_emptypages', `pr_fullpages' and `pr_partpages', 53 * for empty, full and partially-full pages respectively. The individual 54 * pool items are on a linked list headed by `ph_itemlist' in each page 55 * header. The memory for building the page list is either taken from 56 * the allocated pages themselves (for small pool items) or taken from 57 * an internal pool of page headers (`phpool'). 58 */ 59 60 /* List of all pools */ 61 TAILQ_HEAD(,pool) pool_head = TAILQ_HEAD_INITIALIZER(pool_head); 62 63 /* Private pool for page header structures */ 64 struct pool phpool; 65 66 struct pool_item_header { 67 /* Page headers */ 68 LIST_ENTRY(pool_item_header) 69 ph_pagelist; /* pool page list */ 70 TAILQ_HEAD(,pool_item) ph_itemlist; /* chunk list for this page */ 71 RB_ENTRY(pool_item_header) 72 ph_node; /* Off-page page headers */ 73 int ph_nmissing; /* # of chunks in use */ 74 caddr_t ph_page; /* this page's address */ 75 caddr_t ph_colored; /* page's colored address */ 76 int ph_pagesize; 77 int ph_magic; 78 }; 79 80 struct pool_item { 81 #ifdef DIAGNOSTIC 82 u_int32_t pi_magic; 83 #endif 84 /* Other entries use only this list entry */ 85 TAILQ_ENTRY(pool_item) pi_list; 86 }; 87 88 #ifdef DEADBEEF1 89 #define PI_MAGIC DEADBEEF1 90 #else 91 #define PI_MAGIC 0xdeafbeef 92 #endif 93 94 #ifdef POOL_DEBUG 95 int pool_debug = 1; 96 #else 97 int pool_debug = 0; 98 #endif 99 100 #define POOL_NEEDS_CATCHUP(pp) \ 101 ((pp)->pr_nitems < (pp)->pr_minitems) 102 103 /* 104 * Every pool gets a unique serial number assigned to it. If this counter 105 * wraps, we're screwed, but we shouldn't create so many pools anyway. 106 */ 107 unsigned int pool_serial; 108 109 int pool_catchup(struct pool *); 110 void pool_prime_page(struct pool *, caddr_t, struct pool_item_header *); 111 void pool_update_curpage(struct pool *); 112 void *pool_do_get(struct pool *, int); 113 void pool_do_put(struct pool *, void *); 114 void pr_rmpage(struct pool *, struct pool_item_header *, 115 struct pool_pagelist *); 116 int pool_chk_page(struct pool *, struct pool_item_header *, int); 117 int pool_chk(struct pool *); 118 struct pool_item_header *pool_alloc_item_header(struct pool *, caddr_t , int); 119 120 void *pool_allocator_alloc(struct pool *, int, int *); 121 void pool_allocator_free(struct pool *, void *); 122 123 /* 124 * XXX - quick hack. For pools with large items we want to use a special 125 * allocator. For now, instead of having the allocator figure out 126 * the allocation size from the pool (which can be done trivially 127 * with round_page(pr_itemsperpage * pr_size)) which would require 128 * lots of changes everywhere, we just create allocators for each 129 * size. We limit those to 128 pages. 130 */ 131 #define POOL_LARGE_MAXPAGES 128 132 struct pool_allocator pool_allocator_large[POOL_LARGE_MAXPAGES]; 133 struct pool_allocator pool_allocator_large_ni[POOL_LARGE_MAXPAGES]; 134 void *pool_large_alloc(struct pool *, int, int *); 135 void pool_large_free(struct pool *, void *); 136 void *pool_large_alloc_ni(struct pool *, int, int *); 137 void pool_large_free_ni(struct pool *, void *); 138 139 140 #ifdef DDB 141 void pool_print_pagelist(struct pool_pagelist *, int (*)(const char *, ...) 142 /* __attribute__((__format__(__kprintf__,1,2))) */); 143 void pool_print1(struct pool *, const char *, int (*)(const char *, ...) 144 /* __attribute__((__format__(__kprintf__,1,2))) */); 145 #endif 146 147 #define pool_sleep(pl) msleep(pl, &pl->pr_mtx, PSWP, pl->pr_wchan, 0) 148 149 static __inline int 150 phtree_compare(struct pool_item_header *a, struct pool_item_header *b) 151 { 152 long diff = (vaddr_t)a->ph_page - (vaddr_t)b->ph_page; 153 if (diff < 0) 154 return -(-diff >= a->ph_pagesize); 155 else if (diff > 0) 156 return (diff >= b->ph_pagesize); 157 else 158 return (0); 159 } 160 161 RB_PROTOTYPE(phtree, pool_item_header, ph_node, phtree_compare); 162 RB_GENERATE(phtree, pool_item_header, ph_node, phtree_compare); 163 164 /* 165 * Return the pool page header based on page address. 166 */ 167 static __inline struct pool_item_header * 168 pr_find_pagehead(struct pool *pp, void *v) 169 { 170 struct pool_item_header *ph, tmp; 171 172 if ((pp->pr_roflags & PR_PHINPAGE) != 0) { 173 caddr_t page; 174 175 page = (caddr_t)((vaddr_t)v & pp->pr_alloc->pa_pagemask); 176 177 return ((struct pool_item_header *)(page + pp->pr_phoffset)); 178 } 179 180 /* 181 * The trick we're using in the tree compare function is to compare 182 * two elements equal when they overlap. We want to return the 183 * page header that belongs to the element just before this address. 184 * We don't want this element to compare equal to the next element, 185 * so the compare function takes the pagesize from the lower element. 186 * If this header is the lower, its pagesize is zero, so it can't 187 * overlap with the next header. But if the header we're looking for 188 * is lower, we'll use its pagesize and it will overlap and return 189 * equal. 190 */ 191 tmp.ph_page = v; 192 tmp.ph_pagesize = 0; 193 ph = RB_FIND(phtree, &pp->pr_phtree, &tmp); 194 195 if (ph) { 196 KASSERT(ph->ph_page <= (caddr_t)v); 197 KASSERT(ph->ph_page + ph->ph_pagesize > (caddr_t)v); 198 } 199 return ph; 200 } 201 202 /* 203 * Remove a page from the pool. 204 */ 205 void 206 pr_rmpage(struct pool *pp, struct pool_item_header *ph, 207 struct pool_pagelist *pq) 208 { 209 210 /* 211 * If the page was idle, decrement the idle page count. 212 */ 213 if (ph->ph_nmissing == 0) { 214 #ifdef DIAGNOSTIC 215 if (pp->pr_nidle == 0) 216 panic("pr_rmpage: nidle inconsistent"); 217 if (pp->pr_nitems < pp->pr_itemsperpage) 218 panic("pr_rmpage: nitems inconsistent"); 219 #endif 220 pp->pr_nidle--; 221 } 222 223 pp->pr_nitems -= pp->pr_itemsperpage; 224 225 /* 226 * Unlink a page from the pool and release it (or queue it for release). 227 */ 228 LIST_REMOVE(ph, ph_pagelist); 229 if ((pp->pr_roflags & PR_PHINPAGE) == 0) 230 RB_REMOVE(phtree, &pp->pr_phtree, ph); 231 pp->pr_npages--; 232 pp->pr_npagefree++; 233 pool_update_curpage(pp); 234 235 if (pq) { 236 LIST_INSERT_HEAD(pq, ph, ph_pagelist); 237 } else { 238 pool_allocator_free(pp, ph->ph_page); 239 if ((pp->pr_roflags & PR_PHINPAGE) == 0) 240 pool_put(&phpool, ph); 241 } 242 } 243 244 /* 245 * Initialize the given pool resource structure. 246 * 247 * We export this routine to allow other kernel parts to declare 248 * static pools that must be initialized before malloc() is available. 249 */ 250 void 251 pool_init(struct pool *pp, size_t size, u_int align, u_int ioff, int flags, 252 const char *wchan, struct pool_allocator *palloc) 253 { 254 int off, slack; 255 256 #ifdef MALLOC_DEBUG 257 if ((flags & PR_DEBUG) && (ioff != 0 || align != 0)) 258 flags &= ~PR_DEBUG; 259 #endif 260 /* 261 * Check arguments and construct default values. 262 */ 263 if (palloc == NULL) { 264 if (size > PAGE_SIZE) { 265 int psize; 266 267 /* 268 * XXX - should take align into account as well. 269 */ 270 if (size == round_page(size)) 271 psize = size / PAGE_SIZE; 272 else 273 psize = PAGE_SIZE / roundup(size % PAGE_SIZE, 274 1024); 275 if (psize > POOL_LARGE_MAXPAGES) 276 psize = POOL_LARGE_MAXPAGES; 277 if (flags & PR_WAITOK) 278 palloc = &pool_allocator_large_ni[psize-1]; 279 else 280 palloc = &pool_allocator_large[psize-1]; 281 if (palloc->pa_pagesz == 0) { 282 palloc->pa_pagesz = psize * PAGE_SIZE; 283 if (flags & PR_WAITOK) { 284 palloc->pa_alloc = pool_large_alloc_ni; 285 palloc->pa_free = pool_large_free_ni; 286 } else { 287 palloc->pa_alloc = pool_large_alloc; 288 palloc->pa_free = pool_large_free; 289 } 290 } 291 } else { 292 palloc = &pool_allocator_nointr; 293 } 294 } 295 if (palloc->pa_pagesz == 0) { 296 palloc->pa_pagesz = PAGE_SIZE; 297 } 298 if (palloc->pa_pagemask == 0) { 299 palloc->pa_pagemask = ~(palloc->pa_pagesz - 1); 300 palloc->pa_pageshift = ffs(palloc->pa_pagesz) - 1; 301 } 302 303 if (align == 0) 304 align = ALIGN(1); 305 306 if (size < sizeof(struct pool_item)) 307 size = sizeof(struct pool_item); 308 309 size = roundup(size, align); 310 #ifdef DIAGNOSTIC 311 if (size > palloc->pa_pagesz) 312 panic("pool_init: pool item size (%lu) too large", 313 (u_long)size); 314 #endif 315 316 /* 317 * Initialize the pool structure. 318 */ 319 LIST_INIT(&pp->pr_emptypages); 320 LIST_INIT(&pp->pr_fullpages); 321 LIST_INIT(&pp->pr_partpages); 322 pp->pr_curpage = NULL; 323 pp->pr_npages = 0; 324 pp->pr_minitems = 0; 325 pp->pr_minpages = 0; 326 pp->pr_maxpages = 8; 327 pp->pr_roflags = flags; 328 pp->pr_flags = 0; 329 pp->pr_size = size; 330 pp->pr_align = align; 331 pp->pr_wchan = wchan; 332 pp->pr_alloc = palloc; 333 pp->pr_nitems = 0; 334 pp->pr_nout = 0; 335 pp->pr_hardlimit = UINT_MAX; 336 pp->pr_hardlimit_warning = NULL; 337 pp->pr_hardlimit_ratecap.tv_sec = 0; 338 pp->pr_hardlimit_ratecap.tv_usec = 0; 339 pp->pr_hardlimit_warning_last.tv_sec = 0; 340 pp->pr_hardlimit_warning_last.tv_usec = 0; 341 pp->pr_serial = ++pool_serial; 342 if (pool_serial == 0) 343 panic("pool_init: too much uptime"); 344 345 /* constructor, destructor, and arg */ 346 pp->pr_ctor = NULL; 347 pp->pr_dtor = NULL; 348 pp->pr_arg = NULL; 349 350 /* 351 * Decide whether to put the page header off page to avoid 352 * wasting too large a part of the page. Off-page page headers 353 * go into an RB tree, so we can match a returned item with 354 * its header based on the page address. 355 * We use 1/16 of the page size as the threshold (XXX: tune) 356 */ 357 if (pp->pr_size < palloc->pa_pagesz/16 && pp->pr_size < PAGE_SIZE) { 358 /* Use the end of the page for the page header */ 359 pp->pr_roflags |= PR_PHINPAGE; 360 pp->pr_phoffset = off = palloc->pa_pagesz - 361 ALIGN(sizeof(struct pool_item_header)); 362 } else { 363 /* The page header will be taken from our page header pool */ 364 pp->pr_phoffset = 0; 365 off = palloc->pa_pagesz; 366 RB_INIT(&pp->pr_phtree); 367 } 368 369 /* 370 * Alignment is to take place at `ioff' within the item. This means 371 * we must reserve up to `align - 1' bytes on the page to allow 372 * appropriate positioning of each item. 373 * 374 * Silently enforce `0 <= ioff < align'. 375 */ 376 pp->pr_itemoffset = ioff = ioff % align; 377 pp->pr_itemsperpage = (off - ((align - ioff) % align)) / pp->pr_size; 378 KASSERT(pp->pr_itemsperpage != 0); 379 380 /* 381 * Use the slack between the chunks and the page header 382 * for "cache coloring". 383 */ 384 slack = off - pp->pr_itemsperpage * pp->pr_size; 385 pp->pr_maxcolor = (slack / align) * align; 386 pp->pr_curcolor = 0; 387 388 pp->pr_nget = 0; 389 pp->pr_nfail = 0; 390 pp->pr_nput = 0; 391 pp->pr_npagealloc = 0; 392 pp->pr_npagefree = 0; 393 pp->pr_hiwat = 0; 394 pp->pr_nidle = 0; 395 396 pp->pr_ipl = -1; 397 mtx_init(&pp->pr_mtx, IPL_NONE); 398 399 if (phpool.pr_size == 0) { 400 pool_init(&phpool, sizeof(struct pool_item_header), 0, 0, 401 0, "phpool", NULL); 402 pool_setipl(&phpool, IPL_HIGH); 403 } 404 405 /* pglistalloc/constraint parameters */ 406 pp->pr_crange = &kp_dirty; 407 408 /* Insert this into the list of all pools. */ 409 TAILQ_INSERT_HEAD(&pool_head, pp, pr_poollist); 410 } 411 412 void 413 pool_setipl(struct pool *pp, int ipl) 414 { 415 pp->pr_ipl = ipl; 416 mtx_init(&pp->pr_mtx, ipl); 417 } 418 419 /* 420 * Decommission a pool resource. 421 */ 422 void 423 pool_destroy(struct pool *pp) 424 { 425 struct pool_item_header *ph; 426 427 #ifdef DIAGNOSTIC 428 if (pp->pr_nout != 0) 429 panic("pool_destroy: pool busy: still out: %u", pp->pr_nout); 430 #endif 431 432 /* Remove all pages */ 433 while ((ph = LIST_FIRST(&pp->pr_emptypages)) != NULL) 434 pr_rmpage(pp, ph, NULL); 435 KASSERT(LIST_EMPTY(&pp->pr_fullpages)); 436 KASSERT(LIST_EMPTY(&pp->pr_partpages)); 437 438 /* Remove from global pool list */ 439 TAILQ_REMOVE(&pool_head, pp, pr_poollist); 440 } 441 442 struct pool_item_header * 443 pool_alloc_item_header(struct pool *pp, caddr_t storage, int flags) 444 { 445 struct pool_item_header *ph; 446 447 if ((pp->pr_roflags & PR_PHINPAGE) != 0) 448 ph = (struct pool_item_header *)(storage + pp->pr_phoffset); 449 else 450 ph = pool_get(&phpool, (flags & ~(PR_WAITOK | PR_ZERO)) | 451 PR_NOWAIT); 452 if (pool_debug && ph != NULL) 453 ph->ph_magic = PI_MAGIC; 454 return (ph); 455 } 456 457 /* 458 * Grab an item from the pool; must be called at appropriate spl level 459 */ 460 void * 461 pool_get(struct pool *pp, int flags) 462 { 463 void *v; 464 465 KASSERT(flags & (PR_WAITOK | PR_NOWAIT)); 466 467 #ifdef DIAGNOSTIC 468 if ((flags & PR_WAITOK) != 0) 469 assertwaitok(); 470 #endif /* DIAGNOSTIC */ 471 472 mtx_enter(&pp->pr_mtx); 473 #ifdef POOL_DEBUG 474 if (pp->pr_roflags & PR_DEBUGCHK) { 475 if (pool_chk(pp)) 476 panic("before pool_get"); 477 } 478 #endif 479 v = pool_do_get(pp, flags); 480 #ifdef POOL_DEBUG 481 if (pp->pr_roflags & PR_DEBUGCHK) { 482 if (pool_chk(pp)) 483 panic("after pool_get"); 484 } 485 #endif 486 if (v != NULL) 487 pp->pr_nget++; 488 mtx_leave(&pp->pr_mtx); 489 if (v == NULL) 490 return (v); 491 492 if (pp->pr_ctor) { 493 if (flags & PR_ZERO) 494 panic("pool_get: PR_ZERO when ctor set"); 495 if (pp->pr_ctor(pp->pr_arg, v, flags)) { 496 mtx_enter(&pp->pr_mtx); 497 pp->pr_nget--; 498 pool_do_put(pp, v); 499 mtx_leave(&pp->pr_mtx); 500 v = NULL; 501 } 502 } else { 503 if (flags & PR_ZERO) 504 memset(v, 0, pp->pr_size); 505 } 506 return (v); 507 } 508 509 void * 510 pool_do_get(struct pool *pp, int flags) 511 { 512 struct pool_item *pi; 513 struct pool_item_header *ph; 514 void *v; 515 int slowdown = 0; 516 #if defined(DIAGNOSTIC) && defined(POOL_DEBUG) 517 int i, *ip; 518 #endif 519 520 #ifdef MALLOC_DEBUG 521 if (pp->pr_roflags & PR_DEBUG) { 522 void *addr; 523 524 addr = NULL; 525 debug_malloc(pp->pr_size, M_DEBUG, 526 (flags & PR_WAITOK) ? M_WAITOK : M_NOWAIT, &addr); 527 return (addr); 528 } 529 #endif 530 531 startover: 532 /* 533 * Check to see if we've reached the hard limit. If we have, 534 * and we can wait, then wait until an item has been returned to 535 * the pool. 536 */ 537 #ifdef DIAGNOSTIC 538 if (pp->pr_nout > pp->pr_hardlimit) 539 panic("pool_do_get: %s: crossed hard limit", pp->pr_wchan); 540 #endif 541 if (pp->pr_nout == pp->pr_hardlimit) { 542 if ((flags & PR_WAITOK) && !(flags & PR_LIMITFAIL)) { 543 /* 544 * XXX: A warning isn't logged in this case. Should 545 * it be? 546 */ 547 pp->pr_flags |= PR_WANTED; 548 pool_sleep(pp); 549 goto startover; 550 } 551 552 /* 553 * Log a message that the hard limit has been hit. 554 */ 555 if (pp->pr_hardlimit_warning != NULL && 556 ratecheck(&pp->pr_hardlimit_warning_last, 557 &pp->pr_hardlimit_ratecap)) 558 log(LOG_ERR, "%s\n", pp->pr_hardlimit_warning); 559 560 pp->pr_nfail++; 561 return (NULL); 562 } 563 564 /* 565 * The convention we use is that if `curpage' is not NULL, then 566 * it points at a non-empty bucket. In particular, `curpage' 567 * never points at a page header which has PR_PHINPAGE set and 568 * has no items in its bucket. 569 */ 570 if ((ph = pp->pr_curpage) == NULL) { 571 #ifdef DIAGNOSTIC 572 if (pp->pr_nitems != 0) { 573 printf("pool_do_get: %s: curpage NULL, nitems %u\n", 574 pp->pr_wchan, pp->pr_nitems); 575 panic("pool_do_get: nitems inconsistent"); 576 } 577 #endif 578 579 /* 580 * Call the back-end page allocator for more memory. 581 */ 582 v = pool_allocator_alloc(pp, flags, &slowdown); 583 if (v != NULL) 584 ph = pool_alloc_item_header(pp, v, flags); 585 586 if (v == NULL || ph == NULL) { 587 if (v != NULL) 588 pool_allocator_free(pp, v); 589 590 if ((flags & PR_WAITOK) == 0) { 591 pp->pr_nfail++; 592 return (NULL); 593 } 594 595 /* 596 * Wait for items to be returned to this pool. 597 * 598 * XXX: maybe we should wake up once a second and 599 * try again? 600 */ 601 pp->pr_flags |= PR_WANTED; 602 pool_sleep(pp); 603 goto startover; 604 } 605 606 /* We have more memory; add it to the pool */ 607 pool_prime_page(pp, v, ph); 608 pp->pr_npagealloc++; 609 610 if (slowdown && (flags & PR_WAITOK)) { 611 mtx_leave(&pp->pr_mtx); 612 yield(); 613 mtx_enter(&pp->pr_mtx); 614 } 615 616 /* Start the allocation process over. */ 617 goto startover; 618 } 619 if ((v = pi = TAILQ_FIRST(&ph->ph_itemlist)) == NULL) { 620 panic("pool_do_get: %s: page empty", pp->pr_wchan); 621 } 622 #ifdef DIAGNOSTIC 623 if (pp->pr_nitems == 0) { 624 printf("pool_do_get: %s: items on itemlist, nitems %u\n", 625 pp->pr_wchan, pp->pr_nitems); 626 panic("pool_do_get: nitems inconsistent"); 627 } 628 #endif 629 630 #ifdef DIAGNOSTIC 631 if (pi->pi_magic != PI_MAGIC) 632 panic("pool_do_get(%s): free list modified: " 633 "page %p; item addr %p; offset 0x%x=0x%x", 634 pp->pr_wchan, ph->ph_page, pi, 0, pi->pi_magic); 635 #ifdef POOL_DEBUG 636 if (pool_debug && ph->ph_magic) { 637 for (ip = (int *)pi, i = sizeof(*pi) / sizeof(int); 638 i < pp->pr_size / sizeof(int); i++) { 639 if (ip[i] != ph->ph_magic) { 640 panic("pool_do_get(%s): free list modified: " 641 "page %p; item addr %p; offset 0x%zx=0x%x", 642 pp->pr_wchan, ph->ph_page, pi, 643 i * sizeof(int), ip[i]); 644 } 645 } 646 } 647 #endif /* POOL_DEBUG */ 648 #endif /* DIAGNOSTIC */ 649 650 /* 651 * Remove from item list. 652 */ 653 TAILQ_REMOVE(&ph->ph_itemlist, pi, pi_list); 654 pp->pr_nitems--; 655 pp->pr_nout++; 656 if (ph->ph_nmissing == 0) { 657 #ifdef DIAGNOSTIC 658 if (pp->pr_nidle == 0) 659 panic("pool_do_get: nidle inconsistent"); 660 #endif 661 pp->pr_nidle--; 662 663 /* 664 * This page was previously empty. Move it to the list of 665 * partially-full pages. This page is already curpage. 666 */ 667 LIST_REMOVE(ph, ph_pagelist); 668 LIST_INSERT_HEAD(&pp->pr_partpages, ph, ph_pagelist); 669 } 670 ph->ph_nmissing++; 671 if (TAILQ_EMPTY(&ph->ph_itemlist)) { 672 #ifdef DIAGNOSTIC 673 if (ph->ph_nmissing != pp->pr_itemsperpage) { 674 panic("pool_do_get: %s: nmissing inconsistent", 675 pp->pr_wchan); 676 } 677 #endif 678 /* 679 * This page is now full. Move it to the full list 680 * and select a new current page. 681 */ 682 LIST_REMOVE(ph, ph_pagelist); 683 LIST_INSERT_HEAD(&pp->pr_fullpages, ph, ph_pagelist); 684 pool_update_curpage(pp); 685 } 686 687 /* 688 * If we have a low water mark and we are now below that low 689 * water mark, add more items to the pool. 690 */ 691 if (POOL_NEEDS_CATCHUP(pp) && pool_catchup(pp) != 0) { 692 /* 693 * XXX: Should we log a warning? Should we set up a timeout 694 * to try again in a second or so? The latter could break 695 * a caller's assumptions about interrupt protection, etc. 696 */ 697 } 698 return (v); 699 } 700 701 /* 702 * Return resource to the pool; must be called at appropriate spl level 703 */ 704 void 705 pool_put(struct pool *pp, void *v) 706 { 707 if (pp->pr_dtor) 708 pp->pr_dtor(pp->pr_arg, v); 709 mtx_enter(&pp->pr_mtx); 710 #ifdef POOL_DEBUG 711 if (pp->pr_roflags & PR_DEBUGCHK) { 712 if (pool_chk(pp)) 713 panic("before pool_put"); 714 } 715 #endif 716 pool_do_put(pp, v); 717 #ifdef POOL_DEBUG 718 if (pp->pr_roflags & PR_DEBUGCHK) { 719 if (pool_chk(pp)) 720 panic("after pool_put"); 721 } 722 #endif 723 pp->pr_nput++; 724 mtx_leave(&pp->pr_mtx); 725 } 726 727 /* 728 * Internal version of pool_put(). 729 */ 730 void 731 pool_do_put(struct pool *pp, void *v) 732 { 733 struct pool_item *pi = v; 734 struct pool_item_header *ph; 735 #if defined(DIAGNOSTIC) && defined(POOL_DEBUG) 736 int i, *ip; 737 #endif 738 739 if (v == NULL) 740 panic("pool_put of NULL"); 741 742 #ifdef MALLOC_DEBUG 743 if (pp->pr_roflags & PR_DEBUG) { 744 debug_free(v, M_DEBUG); 745 return; 746 } 747 #endif 748 749 #ifdef DIAGNOSTIC 750 if (pp->pr_ipl != -1) 751 splassert(pp->pr_ipl); 752 753 if (pp->pr_nout == 0) { 754 printf("pool %s: putting with none out\n", 755 pp->pr_wchan); 756 panic("pool_do_put"); 757 } 758 #endif 759 760 if ((ph = pr_find_pagehead(pp, v)) == NULL) { 761 panic("pool_do_put: %s: page header missing", pp->pr_wchan); 762 } 763 764 /* 765 * Return to item list. 766 */ 767 #ifdef DIAGNOSTIC 768 pi->pi_magic = PI_MAGIC; 769 #ifdef POOL_DEBUG 770 if (ph->ph_magic) { 771 for (ip = (int *)pi, i = sizeof(*pi)/sizeof(int); 772 i < pp->pr_size / sizeof(int); i++) 773 ip[i] = ph->ph_magic; 774 } 775 #endif /* POOL_DEBUG */ 776 #endif /* DIAGNOSTIC */ 777 778 TAILQ_INSERT_HEAD(&ph->ph_itemlist, pi, pi_list); 779 ph->ph_nmissing--; 780 pp->pr_nitems++; 781 pp->pr_nout--; 782 783 /* Cancel "pool empty" condition if it exists */ 784 if (pp->pr_curpage == NULL) 785 pp->pr_curpage = ph; 786 787 if (pp->pr_flags & PR_WANTED) { 788 pp->pr_flags &= ~PR_WANTED; 789 wakeup(pp); 790 } 791 792 /* 793 * If this page is now empty, do one of two things: 794 * 795 * (1) If we have more pages than the page high water mark, 796 * free the page back to the system. 797 * 798 * (2) Otherwise, move the page to the empty page list. 799 * 800 * Either way, select a new current page (so we use a partially-full 801 * page if one is available). 802 */ 803 if (ph->ph_nmissing == 0) { 804 pp->pr_nidle++; 805 if (pp->pr_nidle > pp->pr_maxpages) { 806 pr_rmpage(pp, ph, NULL); 807 } else { 808 LIST_REMOVE(ph, ph_pagelist); 809 LIST_INSERT_HEAD(&pp->pr_emptypages, ph, ph_pagelist); 810 pool_update_curpage(pp); 811 } 812 } 813 814 /* 815 * If the page was previously completely full, move it to the 816 * partially-full list and make it the current page. The next 817 * allocation will get the item from this page, instead of 818 * further fragmenting the pool. 819 */ 820 else if (ph->ph_nmissing == (pp->pr_itemsperpage - 1)) { 821 LIST_REMOVE(ph, ph_pagelist); 822 LIST_INSERT_HEAD(&pp->pr_partpages, ph, ph_pagelist); 823 pp->pr_curpage = ph; 824 } 825 } 826 827 /* 828 * Add N items to the pool. 829 */ 830 int 831 pool_prime(struct pool *pp, int n) 832 { 833 struct pool_item_header *ph; 834 caddr_t cp; 835 int newpages; 836 int slowdown; 837 838 mtx_enter(&pp->pr_mtx); 839 newpages = roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 840 841 while (newpages-- > 0) { 842 cp = pool_allocator_alloc(pp, PR_NOWAIT, &slowdown); 843 if (cp != NULL) 844 ph = pool_alloc_item_header(pp, cp, PR_NOWAIT); 845 if (cp == NULL || ph == NULL) { 846 if (cp != NULL) 847 pool_allocator_free(pp, cp); 848 break; 849 } 850 851 pool_prime_page(pp, cp, ph); 852 pp->pr_npagealloc++; 853 pp->pr_minpages++; 854 } 855 856 if (pp->pr_minpages >= pp->pr_maxpages) 857 pp->pr_maxpages = pp->pr_minpages + 1; /* XXX */ 858 859 mtx_leave(&pp->pr_mtx); 860 return (0); 861 } 862 863 /* 864 * Add a page worth of items to the pool. 865 * 866 * Note, we must be called with the pool descriptor LOCKED. 867 */ 868 void 869 pool_prime_page(struct pool *pp, caddr_t storage, struct pool_item_header *ph) 870 { 871 struct pool_item *pi; 872 caddr_t cp = storage; 873 unsigned int align = pp->pr_align; 874 unsigned int ioff = pp->pr_itemoffset; 875 int n; 876 #if defined(DIAGNOSTIC) && defined(POOL_DEBUG) 877 int i, *ip; 878 #endif 879 880 /* 881 * Insert page header. 882 */ 883 LIST_INSERT_HEAD(&pp->pr_emptypages, ph, ph_pagelist); 884 TAILQ_INIT(&ph->ph_itemlist); 885 ph->ph_page = storage; 886 ph->ph_pagesize = pp->pr_alloc->pa_pagesz; 887 ph->ph_nmissing = 0; 888 if ((pp->pr_roflags & PR_PHINPAGE) == 0) 889 RB_INSERT(phtree, &pp->pr_phtree, ph); 890 891 pp->pr_nidle++; 892 893 /* 894 * Color this page. 895 */ 896 cp = (caddr_t)(cp + pp->pr_curcolor); 897 if ((pp->pr_curcolor += align) > pp->pr_maxcolor) 898 pp->pr_curcolor = 0; 899 900 /* 901 * Adjust storage to apply alignment to `pr_itemoffset' in each item. 902 */ 903 if (ioff != 0) 904 cp = (caddr_t)(cp + (align - ioff)); 905 ph->ph_colored = cp; 906 907 /* 908 * Insert remaining chunks on the bucket list. 909 */ 910 n = pp->pr_itemsperpage; 911 pp->pr_nitems += n; 912 913 while (n--) { 914 pi = (struct pool_item *)cp; 915 916 KASSERT(((((vaddr_t)pi) + ioff) & (align - 1)) == 0); 917 918 /* Insert on page list */ 919 TAILQ_INSERT_TAIL(&ph->ph_itemlist, pi, pi_list); 920 921 #ifdef DIAGNOSTIC 922 pi->pi_magic = PI_MAGIC; 923 #ifdef POOL_DEBUG 924 if (ph->ph_magic) { 925 for (ip = (int *)pi, i = sizeof(*pi)/sizeof(int); 926 i < pp->pr_size / sizeof(int); i++) 927 ip[i] = ph->ph_magic; 928 } 929 #endif /* POOL_DEBUG */ 930 #endif /* DIAGNOSTIC */ 931 cp = (caddr_t)(cp + pp->pr_size); 932 } 933 934 /* 935 * If the pool was depleted, point at the new page. 936 */ 937 if (pp->pr_curpage == NULL) 938 pp->pr_curpage = ph; 939 940 if (++pp->pr_npages > pp->pr_hiwat) 941 pp->pr_hiwat = pp->pr_npages; 942 } 943 944 /* 945 * Used by pool_get() when nitems drops below the low water mark. This 946 * is used to catch up pr_nitems with the low water mark. 947 * 948 * Note we never wait for memory here, we let the caller decide what to do. 949 */ 950 int 951 pool_catchup(struct pool *pp) 952 { 953 struct pool_item_header *ph; 954 caddr_t cp; 955 int error = 0; 956 int slowdown; 957 958 while (POOL_NEEDS_CATCHUP(pp)) { 959 /* 960 * Call the page back-end allocator for more memory. 961 */ 962 cp = pool_allocator_alloc(pp, PR_NOWAIT, &slowdown); 963 if (cp != NULL) 964 ph = pool_alloc_item_header(pp, cp, PR_NOWAIT); 965 if (cp == NULL || ph == NULL) { 966 if (cp != NULL) 967 pool_allocator_free(pp, cp); 968 error = ENOMEM; 969 break; 970 } 971 pool_prime_page(pp, cp, ph); 972 pp->pr_npagealloc++; 973 } 974 975 return (error); 976 } 977 978 void 979 pool_update_curpage(struct pool *pp) 980 { 981 982 pp->pr_curpage = LIST_FIRST(&pp->pr_partpages); 983 if (pp->pr_curpage == NULL) { 984 pp->pr_curpage = LIST_FIRST(&pp->pr_emptypages); 985 } 986 } 987 988 void 989 pool_setlowat(struct pool *pp, int n) 990 { 991 992 pp->pr_minitems = n; 993 pp->pr_minpages = (n == 0) 994 ? 0 995 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 996 997 mtx_enter(&pp->pr_mtx); 998 /* Make sure we're caught up with the newly-set low water mark. */ 999 if (POOL_NEEDS_CATCHUP(pp) && pool_catchup(pp) != 0) { 1000 /* 1001 * XXX: Should we log a warning? Should we set up a timeout 1002 * to try again in a second or so? The latter could break 1003 * a caller's assumptions about interrupt protection, etc. 1004 */ 1005 } 1006 mtx_leave(&pp->pr_mtx); 1007 } 1008 1009 void 1010 pool_sethiwat(struct pool *pp, int n) 1011 { 1012 1013 pp->pr_maxpages = (n == 0) 1014 ? 0 1015 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage; 1016 } 1017 1018 int 1019 pool_sethardlimit(struct pool *pp, u_int n, const char *warnmsg, int ratecap) 1020 { 1021 int error = 0; 1022 1023 if (n < pp->pr_nout) { 1024 error = EINVAL; 1025 goto done; 1026 } 1027 1028 pp->pr_hardlimit = n; 1029 pp->pr_hardlimit_warning = warnmsg; 1030 pp->pr_hardlimit_ratecap.tv_sec = ratecap; 1031 pp->pr_hardlimit_warning_last.tv_sec = 0; 1032 pp->pr_hardlimit_warning_last.tv_usec = 0; 1033 1034 done: 1035 return (error); 1036 } 1037 1038 void 1039 pool_set_constraints(struct pool *pp, const struct kmem_pa_mode *mode) 1040 { 1041 pp->pr_crange = mode; 1042 } 1043 1044 void 1045 pool_set_ctordtor(struct pool *pp, int (*ctor)(void *, void *, int), 1046 void (*dtor)(void *, void *), void *arg) 1047 { 1048 pp->pr_ctor = ctor; 1049 pp->pr_dtor = dtor; 1050 pp->pr_arg = arg; 1051 } 1052 /* 1053 * Release all complete pages that have not been used recently. 1054 * 1055 * Returns non-zero if any pages have been reclaimed. 1056 */ 1057 int 1058 pool_reclaim(struct pool *pp) 1059 { 1060 struct pool_item_header *ph, *phnext; 1061 struct pool_pagelist pq; 1062 1063 LIST_INIT(&pq); 1064 1065 mtx_enter(&pp->pr_mtx); 1066 for (ph = LIST_FIRST(&pp->pr_emptypages); ph != NULL; ph = phnext) { 1067 phnext = LIST_NEXT(ph, ph_pagelist); 1068 1069 /* Check our minimum page claim */ 1070 if (pp->pr_npages <= pp->pr_minpages) 1071 break; 1072 1073 KASSERT(ph->ph_nmissing == 0); 1074 1075 /* 1076 * If freeing this page would put us below 1077 * the low water mark, stop now. 1078 */ 1079 if ((pp->pr_nitems - pp->pr_itemsperpage) < 1080 pp->pr_minitems) 1081 break; 1082 1083 pr_rmpage(pp, ph, &pq); 1084 } 1085 mtx_leave(&pp->pr_mtx); 1086 1087 if (LIST_EMPTY(&pq)) 1088 return (0); 1089 while ((ph = LIST_FIRST(&pq)) != NULL) { 1090 LIST_REMOVE(ph, ph_pagelist); 1091 pool_allocator_free(pp, ph->ph_page); 1092 if (pp->pr_roflags & PR_PHINPAGE) 1093 continue; 1094 pool_put(&phpool, ph); 1095 } 1096 1097 return (1); 1098 } 1099 1100 /* 1101 * Release all complete pages that have not been used recently 1102 * from all pools. 1103 */ 1104 void 1105 pool_reclaim_all(void) 1106 { 1107 struct pool *pp; 1108 int s; 1109 1110 s = splhigh(); 1111 TAILQ_FOREACH(pp, &pool_head, pr_poollist) 1112 pool_reclaim(pp); 1113 splx(s); 1114 } 1115 1116 #ifdef DDB 1117 #include <machine/db_machdep.h> 1118 #include <ddb/db_interface.h> 1119 #include <ddb/db_output.h> 1120 1121 /* 1122 * Diagnostic helpers. 1123 */ 1124 void 1125 pool_printit(struct pool *pp, const char *modif, 1126 int (*pr)(const char *, ...) /* __attribute__((__format__(__kprintf__,1,2))) */) 1127 { 1128 pool_print1(pp, modif, pr); 1129 } 1130 1131 void 1132 pool_print_pagelist(struct pool_pagelist *pl, 1133 int (*pr)(const char *, ...) /* __attribute__((__format__(__kprintf__,1,2))) */) 1134 { 1135 struct pool_item_header *ph; 1136 #ifdef DIAGNOSTIC 1137 struct pool_item *pi; 1138 #endif 1139 1140 LIST_FOREACH(ph, pl, ph_pagelist) { 1141 (*pr)("\t\tpage %p, nmissing %d\n", 1142 ph->ph_page, ph->ph_nmissing); 1143 #ifdef DIAGNOSTIC 1144 TAILQ_FOREACH(pi, &ph->ph_itemlist, pi_list) { 1145 if (pi->pi_magic != PI_MAGIC) { 1146 (*pr)("\t\t\titem %p, magic 0x%x\n", 1147 pi, pi->pi_magic); 1148 } 1149 } 1150 #endif 1151 } 1152 } 1153 1154 void 1155 pool_print1(struct pool *pp, const char *modif, 1156 int (*pr)(const char *, ...) /* __attribute__((__format__(__kprintf__,1,2))) */) 1157 { 1158 struct pool_item_header *ph; 1159 int print_pagelist = 0; 1160 char c; 1161 1162 while ((c = *modif++) != '\0') { 1163 if (c == 'p') 1164 print_pagelist = 1; 1165 modif++; 1166 } 1167 1168 (*pr)("POOL %s: size %u, align %u, ioff %u, roflags 0x%08x\n", 1169 pp->pr_wchan, pp->pr_size, pp->pr_align, pp->pr_itemoffset, 1170 pp->pr_roflags); 1171 (*pr)("\talloc %p\n", pp->pr_alloc); 1172 (*pr)("\tminitems %u, minpages %u, maxpages %u, npages %u\n", 1173 pp->pr_minitems, pp->pr_minpages, pp->pr_maxpages, pp->pr_npages); 1174 (*pr)("\titemsperpage %u, nitems %u, nout %u, hardlimit %u\n", 1175 pp->pr_itemsperpage, pp->pr_nitems, pp->pr_nout, pp->pr_hardlimit); 1176 1177 (*pr)("\n\tnget %lu, nfail %lu, nput %lu\n", 1178 pp->pr_nget, pp->pr_nfail, pp->pr_nput); 1179 (*pr)("\tnpagealloc %lu, npagefree %lu, hiwat %u, nidle %lu\n", 1180 pp->pr_npagealloc, pp->pr_npagefree, pp->pr_hiwat, pp->pr_nidle); 1181 1182 if (print_pagelist == 0) 1183 return; 1184 1185 if ((ph = LIST_FIRST(&pp->pr_emptypages)) != NULL) 1186 (*pr)("\n\tempty page list:\n"); 1187 pool_print_pagelist(&pp->pr_emptypages, pr); 1188 if ((ph = LIST_FIRST(&pp->pr_fullpages)) != NULL) 1189 (*pr)("\n\tfull page list:\n"); 1190 pool_print_pagelist(&pp->pr_fullpages, pr); 1191 if ((ph = LIST_FIRST(&pp->pr_partpages)) != NULL) 1192 (*pr)("\n\tpartial-page list:\n"); 1193 pool_print_pagelist(&pp->pr_partpages, pr); 1194 1195 if (pp->pr_curpage == NULL) 1196 (*pr)("\tno current page\n"); 1197 else 1198 (*pr)("\tcurpage %p\n", pp->pr_curpage->ph_page); 1199 } 1200 1201 void 1202 db_show_all_pools(db_expr_t expr, int haddr, db_expr_t count, char *modif) 1203 { 1204 struct pool *pp; 1205 char maxp[16]; 1206 int ovflw; 1207 char mode; 1208 1209 mode = modif[0]; 1210 if (mode != '\0' && mode != 'a') { 1211 db_printf("usage: show all pools [/a]\n"); 1212 return; 1213 } 1214 1215 if (mode == '\0') 1216 db_printf("%-10s%4s%9s%5s%9s%6s%6s%6s%6s%6s%6s%5s\n", 1217 "Name", 1218 "Size", 1219 "Requests", 1220 "Fail", 1221 "Releases", 1222 "Pgreq", 1223 "Pgrel", 1224 "Npage", 1225 "Hiwat", 1226 "Minpg", 1227 "Maxpg", 1228 "Idle"); 1229 else 1230 db_printf("%-12s %18s %18s\n", 1231 "Name", "Address", "Allocator"); 1232 1233 TAILQ_FOREACH(pp, &pool_head, pr_poollist) { 1234 if (mode == 'a') { 1235 db_printf("%-12s %18p %18p\n", pp->pr_wchan, pp, 1236 pp->pr_alloc); 1237 continue; 1238 } 1239 1240 if (!pp->pr_nget) 1241 continue; 1242 1243 if (pp->pr_maxpages == UINT_MAX) 1244 snprintf(maxp, sizeof maxp, "inf"); 1245 else 1246 snprintf(maxp, sizeof maxp, "%u", pp->pr_maxpages); 1247 1248 #define PRWORD(ovflw, fmt, width, fixed, val) do { \ 1249 (ovflw) += db_printf((fmt), \ 1250 (width) - (fixed) - (ovflw) > 0 ? \ 1251 (width) - (fixed) - (ovflw) : 0, \ 1252 (val)) - (width); \ 1253 if ((ovflw) < 0) \ 1254 (ovflw) = 0; \ 1255 } while (/* CONSTCOND */0) 1256 1257 ovflw = 0; 1258 PRWORD(ovflw, "%-*s", 10, 0, pp->pr_wchan); 1259 PRWORD(ovflw, " %*u", 4, 1, pp->pr_size); 1260 PRWORD(ovflw, " %*lu", 9, 1, pp->pr_nget); 1261 PRWORD(ovflw, " %*lu", 5, 1, pp->pr_nfail); 1262 PRWORD(ovflw, " %*lu", 9, 1, pp->pr_nput); 1263 PRWORD(ovflw, " %*lu", 6, 1, pp->pr_npagealloc); 1264 PRWORD(ovflw, " %*lu", 6, 1, pp->pr_npagefree); 1265 PRWORD(ovflw, " %*d", 6, 1, pp->pr_npages); 1266 PRWORD(ovflw, " %*d", 6, 1, pp->pr_hiwat); 1267 PRWORD(ovflw, " %*d", 6, 1, pp->pr_minpages); 1268 PRWORD(ovflw, " %*s", 6, 1, maxp); 1269 PRWORD(ovflw, " %*lu\n", 5, 1, pp->pr_nidle); 1270 1271 pool_chk(pp); 1272 } 1273 } 1274 #endif /* DDB */ 1275 1276 #if defined(POOL_DEBUG) || defined(DDB) 1277 int 1278 pool_chk_page(struct pool *pp, struct pool_item_header *ph, int expected) 1279 { 1280 struct pool_item *pi; 1281 caddr_t page; 1282 int n; 1283 #if defined(DIAGNOSTIC) && defined(POOL_DEBUG) 1284 int i, *ip; 1285 #endif 1286 const char *label = pp->pr_wchan; 1287 1288 page = (caddr_t)((u_long)ph & pp->pr_alloc->pa_pagemask); 1289 if (page != ph->ph_page && 1290 (pp->pr_roflags & PR_PHINPAGE) != 0) { 1291 printf("%s: ", label); 1292 printf("pool(%p:%s): page inconsistency: page %p; " 1293 "at page head addr %p (p %p)\n", 1294 pp, pp->pr_wchan, ph->ph_page, ph, page); 1295 return 1; 1296 } 1297 1298 for (pi = TAILQ_FIRST(&ph->ph_itemlist), n = 0; 1299 pi != NULL; 1300 pi = TAILQ_NEXT(pi,pi_list), n++) { 1301 1302 #ifdef DIAGNOSTIC 1303 if (pi->pi_magic != PI_MAGIC) { 1304 printf("%s: ", label); 1305 printf("pool(%s): free list modified: " 1306 "page %p; item ordinal %d; addr %p " 1307 "(p %p); offset 0x%x=0x%x\n", 1308 pp->pr_wchan, ph->ph_page, n, pi, page, 1309 0, pi->pi_magic); 1310 } 1311 #ifdef POOL_DEBUG 1312 if (pool_debug && ph->ph_magic) { 1313 for (ip = (int *)pi, i = sizeof(*pi) / sizeof(int); 1314 i < pp->pr_size / sizeof(int); i++) { 1315 if (ip[i] != ph->ph_magic) { 1316 printf("pool(%s): free list modified: " 1317 "page %p; item ordinal %d; addr %p " 1318 "(p %p); offset 0x%zx=0x%x\n", 1319 pp->pr_wchan, ph->ph_page, n, pi, 1320 page, i * sizeof(int), ip[i]); 1321 } 1322 } 1323 } 1324 1325 #endif /* POOL_DEBUG */ 1326 #endif /* DIAGNOSTIC */ 1327 page = 1328 (caddr_t)((u_long)pi & pp->pr_alloc->pa_pagemask); 1329 if (page == ph->ph_page) 1330 continue; 1331 1332 printf("%s: ", label); 1333 printf("pool(%p:%s): page inconsistency: page %p;" 1334 " item ordinal %d; addr %p (p %p)\n", pp, 1335 pp->pr_wchan, ph->ph_page, n, pi, page); 1336 return 1; 1337 } 1338 if (n + ph->ph_nmissing != pp->pr_itemsperpage) { 1339 printf("pool(%p:%s): page inconsistency: page %p;" 1340 " %d on list, %d missing, %d items per page\n", pp, 1341 pp->pr_wchan, ph->ph_page, n, ph->ph_nmissing, 1342 pp->pr_itemsperpage); 1343 return 1; 1344 } 1345 if (expected >= 0 && n != expected) { 1346 printf("pool(%p:%s): page inconsistency: page %p;" 1347 " %d on list, %d missing, %d expected\n", pp, 1348 pp->pr_wchan, ph->ph_page, n, ph->ph_nmissing, 1349 expected); 1350 return 1; 1351 } 1352 return 0; 1353 } 1354 1355 int 1356 pool_chk(struct pool *pp) 1357 { 1358 struct pool_item_header *ph; 1359 int r = 0; 1360 1361 LIST_FOREACH(ph, &pp->pr_emptypages, ph_pagelist) 1362 r += pool_chk_page(pp, ph, pp->pr_itemsperpage); 1363 LIST_FOREACH(ph, &pp->pr_fullpages, ph_pagelist) 1364 r += pool_chk_page(pp, ph, 0); 1365 LIST_FOREACH(ph, &pp->pr_partpages, ph_pagelist) 1366 r += pool_chk_page(pp, ph, -1); 1367 1368 return (r); 1369 } 1370 #endif /* defined(POOL_DEBUG) || defined(DDB) */ 1371 1372 #ifdef DDB 1373 void 1374 pool_walk(struct pool *pp, int full, 1375 int (*pr)(const char *, ...) /* __attribute__((__format__(__kprintf__,1,2))) */, 1376 void (*func)(void *, int, int (*)(const char *, ...) /* __attribute__((__format__(__kprintf__,1,2))) */)) 1377 { 1378 struct pool_item_header *ph; 1379 struct pool_item *pi; 1380 caddr_t cp; 1381 int n; 1382 1383 LIST_FOREACH(ph, &pp->pr_fullpages, ph_pagelist) { 1384 cp = ph->ph_colored; 1385 n = ph->ph_nmissing; 1386 1387 while (n--) { 1388 func(cp, full, pr); 1389 cp += pp->pr_size; 1390 } 1391 } 1392 1393 LIST_FOREACH(ph, &pp->pr_partpages, ph_pagelist) { 1394 cp = ph->ph_colored; 1395 n = ph->ph_nmissing; 1396 1397 do { 1398 TAILQ_FOREACH(pi, &ph->ph_itemlist, pi_list) { 1399 if (cp == (caddr_t)pi) 1400 break; 1401 } 1402 if (cp != (caddr_t)pi) { 1403 func(cp, full, pr); 1404 n--; 1405 } 1406 1407 cp += pp->pr_size; 1408 } while (n > 0); 1409 } 1410 } 1411 #endif 1412 1413 /* 1414 * We have three different sysctls. 1415 * kern.pool.npools - the number of pools. 1416 * kern.pool.pool.<pool#> - the pool struct for the pool#. 1417 * kern.pool.name.<pool#> - the name for pool#. 1418 */ 1419 int 1420 sysctl_dopool(int *name, u_int namelen, char *where, size_t *sizep) 1421 { 1422 struct pool *pp, *foundpool = NULL; 1423 size_t buflen = where != NULL ? *sizep : 0; 1424 int npools = 0, s; 1425 unsigned int lookfor; 1426 size_t len; 1427 1428 switch (*name) { 1429 case KERN_POOL_NPOOLS: 1430 if (namelen != 1 || buflen != sizeof(int)) 1431 return (EINVAL); 1432 lookfor = 0; 1433 break; 1434 case KERN_POOL_NAME: 1435 if (namelen != 2 || buflen < 1) 1436 return (EINVAL); 1437 lookfor = name[1]; 1438 break; 1439 case KERN_POOL_POOL: 1440 if (namelen != 2 || buflen != sizeof(struct pool)) 1441 return (EINVAL); 1442 lookfor = name[1]; 1443 break; 1444 default: 1445 return (EINVAL); 1446 } 1447 1448 s = splvm(); 1449 1450 TAILQ_FOREACH(pp, &pool_head, pr_poollist) { 1451 npools++; 1452 if (lookfor == pp->pr_serial) { 1453 foundpool = pp; 1454 break; 1455 } 1456 } 1457 1458 splx(s); 1459 1460 if (*name != KERN_POOL_NPOOLS && foundpool == NULL) 1461 return (ENOENT); 1462 1463 switch (*name) { 1464 case KERN_POOL_NPOOLS: 1465 return copyout(&npools, where, buflen); 1466 case KERN_POOL_NAME: 1467 len = strlen(foundpool->pr_wchan) + 1; 1468 if (*sizep < len) 1469 return (ENOMEM); 1470 *sizep = len; 1471 return copyout(foundpool->pr_wchan, where, len); 1472 case KERN_POOL_POOL: 1473 return copyout(foundpool, where, buflen); 1474 } 1475 /* NOTREACHED */ 1476 return (0); /* XXX - Stupid gcc */ 1477 } 1478 1479 /* 1480 * Pool backend allocators. 1481 * 1482 * Each pool has a backend allocator that handles allocation, deallocation 1483 */ 1484 void *pool_page_alloc(struct pool *, int, int *); 1485 void pool_page_free(struct pool *, void *); 1486 1487 /* 1488 * safe for interrupts, name preserved for compat this is the default 1489 * allocator 1490 */ 1491 struct pool_allocator pool_allocator_nointr = { 1492 pool_page_alloc, pool_page_free, 0, 1493 }; 1494 1495 /* 1496 * XXX - we have at least three different resources for the same allocation 1497 * and each resource can be depleted. First we have the ready elements in 1498 * the pool. Then we have the resource (typically a vm_map) for this 1499 * allocator, then we have physical memory. Waiting for any of these can 1500 * be unnecessary when any other is freed, but the kernel doesn't support 1501 * sleeping on multiple addresses, so we have to fake. The caller sleeps on 1502 * the pool (so that we can be awakened when an item is returned to the pool), 1503 * but we set PA_WANT on the allocator. When a page is returned to 1504 * the allocator and PA_WANT is set pool_allocator_free will wakeup all 1505 * sleeping pools belonging to this allocator. (XXX - thundering herd). 1506 * We also wake up the allocator in case someone without a pool (malloc) 1507 * is sleeping waiting for this allocator. 1508 */ 1509 1510 void * 1511 pool_allocator_alloc(struct pool *pp, int flags, int *slowdown) 1512 { 1513 boolean_t waitok = (flags & PR_WAITOK) ? TRUE : FALSE; 1514 void *v; 1515 1516 if (waitok) 1517 mtx_leave(&pp->pr_mtx); 1518 v = pp->pr_alloc->pa_alloc(pp, flags, slowdown); 1519 if (waitok) 1520 mtx_enter(&pp->pr_mtx); 1521 1522 return (v); 1523 } 1524 1525 void 1526 pool_allocator_free(struct pool *pp, void *v) 1527 { 1528 struct pool_allocator *pa = pp->pr_alloc; 1529 1530 (*pa->pa_free)(pp, v); 1531 } 1532 1533 void * 1534 pool_page_alloc(struct pool *pp, int flags, int *slowdown) 1535 { 1536 struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER; 1537 1538 kd.kd_waitok = (flags & PR_WAITOK); 1539 kd.kd_slowdown = slowdown; 1540 1541 return (km_alloc(PAGE_SIZE, &kv_page, pp->pr_crange, &kd)); 1542 } 1543 1544 void 1545 pool_page_free(struct pool *pp, void *v) 1546 { 1547 km_free(v, PAGE_SIZE, &kv_page, pp->pr_crange); 1548 } 1549 1550 void * 1551 pool_large_alloc(struct pool *pp, int flags, int *slowdown) 1552 { 1553 struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER; 1554 void *v; 1555 int s; 1556 1557 kd.kd_waitok = (flags & PR_WAITOK); 1558 kd.kd_slowdown = slowdown; 1559 1560 s = splvm(); 1561 v = km_alloc(pp->pr_alloc->pa_pagesz, &kv_intrsafe, pp->pr_crange, 1562 &kd); 1563 splx(s); 1564 1565 return (v); 1566 } 1567 1568 void 1569 pool_large_free(struct pool *pp, void *v) 1570 { 1571 int s; 1572 1573 s = splvm(); 1574 km_free(v, pp->pr_alloc->pa_pagesz, &kv_intrsafe, pp->pr_crange); 1575 splx(s); 1576 } 1577 1578 void * 1579 pool_large_alloc_ni(struct pool *pp, int flags, int *slowdown) 1580 { 1581 struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER; 1582 1583 kd.kd_waitok = (flags & PR_WAITOK); 1584 kd.kd_slowdown = slowdown; 1585 1586 return (km_alloc(pp->pr_alloc->pa_pagesz, &kv_any, pp->pr_crange, &kd)); 1587 } 1588 1589 void 1590 pool_large_free_ni(struct pool *pp, void *v) 1591 { 1592 km_free(v, pp->pr_alloc->pa_pagesz, &kv_any, pp->pr_crange); 1593 } 1594