1 /* $OpenBSD: subr_pool.c,v 1.237 2025/01/04 09:26:01 mvs Exp $ */
2 /* $NetBSD: subr_pool.c,v 1.61 2001/09/26 07:14:56 chs Exp $ */
3
4 /*-
5 * Copyright (c) 1997, 1999, 2000 The NetBSD Foundation, Inc.
6 * All rights reserved.
7 *
8 * This code is derived from software contributed to The NetBSD Foundation
9 * by Paul Kranenburg; by Jason R. Thorpe of the Numerical Aerospace
10 * Simulation Facility, NASA Ames Research Center.
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
23 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
24 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
25 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 * POSSIBILITY OF SUCH DAMAGE.
32 */
33
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/errno.h>
37 #include <sys/malloc.h>
38 #include <sys/pool.h>
39 #include <sys/proc.h>
40 #include <sys/sysctl.h>
41 #include <sys/task.h>
42 #include <sys/time.h>
43 #include <sys/timeout.h>
44 #include <sys/percpu.h>
45 #include <sys/tracepoint.h>
46
47 #include <uvm/uvm_extern.h>
48
49 /*
50 * Pool resource management utility.
51 *
52 * Memory is allocated in pages which are split into pieces according to
53 * the pool item size. Each page is kept on one of three lists in the
54 * pool structure: `pr_emptypages', `pr_fullpages' and `pr_partpages',
55 * for empty, full and partially-full pages respectively. The individual
56 * pool items are on a linked list headed by `ph_items' in each page
57 * header. The memory for building the page list is either taken from
58 * the allocated pages themselves (for small pool items) or taken from
59 * an internal pool of page headers (`phpool').
60 */
61
62 /* List of all pools */
63 SIMPLEQ_HEAD(,pool) pool_head = SIMPLEQ_HEAD_INITIALIZER(pool_head);
64
65 /*
66 * Every pool gets a unique serial number assigned to it. If this counter
67 * wraps, we're screwed, but we shouldn't create so many pools anyway.
68 */
69 unsigned int pool_serial;
70 unsigned int pool_count;
71
72 /* Lock the previous variables making up the global pool state */
73 struct rwlock pool_lock = RWLOCK_INITIALIZER("pools");
74
75 /* Private pool for page header structures */
76 struct pool phpool;
77
78 struct pool_lock_ops {
79 void (*pl_init)(struct pool *, union pool_lock *,
80 const struct lock_type *);
81 void (*pl_enter)(union pool_lock *);
82 int (*pl_enter_try)(union pool_lock *);
83 void (*pl_leave)(union pool_lock *);
84 void (*pl_assert_locked)(union pool_lock *);
85 void (*pl_assert_unlocked)(union pool_lock *);
86 int (*pl_sleep)(void *, union pool_lock *, int, const char *);
87 };
88
89 static const struct pool_lock_ops pool_lock_ops_mtx;
90 static const struct pool_lock_ops pool_lock_ops_rw;
91
92 #ifdef WITNESS
93 #define pl_init(pp, pl) do { \
94 static const struct lock_type __lock_type = { .lt_name = #pl }; \
95 (pp)->pr_lock_ops->pl_init(pp, pl, &__lock_type); \
96 } while (0)
97 #else /* WITNESS */
98 #define pl_init(pp, pl) (pp)->pr_lock_ops->pl_init(pp, pl, NULL)
99 #endif /* WITNESS */
100
101 static inline void
pl_enter(struct pool * pp,union pool_lock * pl)102 pl_enter(struct pool *pp, union pool_lock *pl)
103 {
104 pp->pr_lock_ops->pl_enter(pl);
105 }
106 static inline int
pl_enter_try(struct pool * pp,union pool_lock * pl)107 pl_enter_try(struct pool *pp, union pool_lock *pl)
108 {
109 return pp->pr_lock_ops->pl_enter_try(pl);
110 }
111 static inline void
pl_leave(struct pool * pp,union pool_lock * pl)112 pl_leave(struct pool *pp, union pool_lock *pl)
113 {
114 pp->pr_lock_ops->pl_leave(pl);
115 }
116 static inline void
pl_assert_locked(struct pool * pp,union pool_lock * pl)117 pl_assert_locked(struct pool *pp, union pool_lock *pl)
118 {
119 pp->pr_lock_ops->pl_assert_locked(pl);
120 }
121 static inline void
pl_assert_unlocked(struct pool * pp,union pool_lock * pl)122 pl_assert_unlocked(struct pool *pp, union pool_lock *pl)
123 {
124 pp->pr_lock_ops->pl_assert_unlocked(pl);
125 }
126 static inline int
pl_sleep(struct pool * pp,void * ident,union pool_lock * lock,int priority,const char * wmesg)127 pl_sleep(struct pool *pp, void *ident, union pool_lock *lock, int priority,
128 const char *wmesg)
129 {
130 return pp->pr_lock_ops->pl_sleep(ident, lock, priority, wmesg);
131 }
132
133 struct pool_item {
134 u_long pi_magic;
135 XSIMPLEQ_ENTRY(pool_item) pi_list;
136 };
137 #define POOL_IMAGIC(ph, pi) ((u_long)(pi) ^ (ph)->ph_magic)
138
139 struct pool_page_header {
140 /* Page headers */
141 TAILQ_ENTRY(pool_page_header)
142 ph_entry; /* pool page list */
143 XSIMPLEQ_HEAD(, pool_item)
144 ph_items; /* free items on the page */
145 RBT_ENTRY(pool_page_header)
146 ph_node; /* off-page page headers */
147 unsigned int ph_nmissing; /* # of chunks in use */
148 caddr_t ph_page; /* this page's address */
149 caddr_t ph_colored; /* page's colored address */
150 unsigned long ph_magic;
151 uint64_t ph_timestamp;
152 };
153 #define POOL_MAGICBIT (1 << 3) /* keep away from perturbed low bits */
154 #define POOL_PHPOISON(ph) ISSET((ph)->ph_magic, POOL_MAGICBIT)
155
156 #ifdef MULTIPROCESSOR
157 struct pool_cache_item {
158 struct pool_cache_item *ci_next; /* next item in list */
159 unsigned long ci_nitems; /* number of items in list */
160 TAILQ_ENTRY(pool_cache_item)
161 ci_nextl; /* entry in list of lists */
162 };
163
164 /* we store whether the cached item is poisoned in the high bit of nitems */
165 #define POOL_CACHE_ITEM_NITEMS_MASK 0x7ffffffUL
166 #define POOL_CACHE_ITEM_NITEMS_POISON 0x8000000UL
167
168 #define POOL_CACHE_ITEM_NITEMS(_ci) \
169 ((_ci)->ci_nitems & POOL_CACHE_ITEM_NITEMS_MASK)
170
171 #define POOL_CACHE_ITEM_POISONED(_ci) \
172 ISSET((_ci)->ci_nitems, POOL_CACHE_ITEM_NITEMS_POISON)
173
174 struct pool_cache {
175 struct pool_cache_item *pc_actv; /* active list of items */
176 unsigned long pc_nactv; /* actv head nitems cache */
177 struct pool_cache_item *pc_prev; /* previous list of items */
178
179 uint64_t pc_gen; /* generation number */
180 uint64_t pc_nget; /* # of successful requests */
181 uint64_t pc_nfail; /* # of unsuccessful reqs */
182 uint64_t pc_nput; /* # of releases */
183 uint64_t pc_nlget; /* # of list requests */
184 uint64_t pc_nlfail; /* # of fails getting a list */
185 uint64_t pc_nlput; /* # of list releases */
186
187 int pc_nout;
188 };
189
190 void *pool_cache_get(struct pool *);
191 void pool_cache_put(struct pool *, void *);
192 void pool_cache_destroy(struct pool *);
193 void pool_cache_gc(struct pool *);
194 #endif
195 void pool_cache_pool_info(struct pool *, struct kinfo_pool *);
196 int pool_cache_info(struct pool *, void *, size_t *);
197 int pool_cache_cpus_info(struct pool *, void *, size_t *);
198
199 #ifdef POOL_DEBUG
200 int pool_debug = 1;
201 #else
202 int pool_debug = 0;
203 #endif
204
205 #define POOL_INPGHDR(pp) ((pp)->pr_phoffset != 0)
206
207 struct pool_page_header *
208 pool_p_alloc(struct pool *, int, int *);
209 void pool_p_insert(struct pool *, struct pool_page_header *);
210 void pool_p_remove(struct pool *, struct pool_page_header *);
211 void pool_p_free(struct pool *, struct pool_page_header *);
212
213 void pool_update_curpage(struct pool *);
214 void *pool_do_get(struct pool *, int, int *);
215 void pool_do_put(struct pool *, void *);
216 int pool_chk_page(struct pool *, struct pool_page_header *, int);
217 int pool_chk(struct pool *);
218 void pool_get_done(struct pool *, void *, void *);
219 void pool_runqueue(struct pool *, int);
220
221 void *pool_allocator_alloc(struct pool *, int, int *);
222 void pool_allocator_free(struct pool *, void *);
223
224 /*
225 * The default pool allocator.
226 */
227 void *pool_page_alloc(struct pool *, int, int *);
228 void pool_page_free(struct pool *, void *);
229
230 /*
231 * safe for interrupts; this is the default allocator
232 */
233 struct pool_allocator pool_allocator_single = {
234 pool_page_alloc,
235 pool_page_free,
236 POOL_ALLOC_SIZE(PAGE_SIZE, POOL_ALLOC_ALIGNED)
237 };
238
239 void *pool_multi_alloc(struct pool *, int, int *);
240 void pool_multi_free(struct pool *, void *);
241
242 struct pool_allocator pool_allocator_multi = {
243 pool_multi_alloc,
244 pool_multi_free,
245 POOL_ALLOC_SIZES(PAGE_SIZE, (1UL << 31), POOL_ALLOC_ALIGNED)
246 };
247
248 void *pool_multi_alloc_ni(struct pool *, int, int *);
249 void pool_multi_free_ni(struct pool *, void *);
250
251 struct pool_allocator pool_allocator_multi_ni = {
252 pool_multi_alloc_ni,
253 pool_multi_free_ni,
254 POOL_ALLOC_SIZES(PAGE_SIZE, (1UL << 31), POOL_ALLOC_ALIGNED)
255 };
256
257 #ifdef DDB
258 void pool_print_pagelist(struct pool_pagelist *, int (*)(const char *, ...)
259 __attribute__((__format__(__kprintf__,1,2))));
260 void pool_print1(struct pool *, const char *, int (*)(const char *, ...)
261 __attribute__((__format__(__kprintf__,1,2))));
262 #endif
263
264 /* stale page garbage collectors */
265 void pool_gc_sched(void *);
266 struct timeout pool_gc_tick = TIMEOUT_INITIALIZER(pool_gc_sched, NULL);
267 void pool_gc_pages(void *);
268 struct task pool_gc_task = TASK_INITIALIZER(pool_gc_pages, NULL);
269
270 #define POOL_WAIT_FREE SEC_TO_NSEC(1)
271 #define POOL_WAIT_GC SEC_TO_NSEC(8)
272
273 RBT_PROTOTYPE(phtree, pool_page_header, ph_node, phtree_compare);
274
275 static inline int
phtree_compare(const struct pool_page_header * a,const struct pool_page_header * b)276 phtree_compare(const struct pool_page_header *a,
277 const struct pool_page_header *b)
278 {
279 vaddr_t va = (vaddr_t)a->ph_page;
280 vaddr_t vb = (vaddr_t)b->ph_page;
281
282 /* the compares in this order are important for the NFIND to work */
283 if (vb < va)
284 return (-1);
285 if (vb > va)
286 return (1);
287
288 return (0);
289 }
290
291 RBT_GENERATE(phtree, pool_page_header, ph_node, phtree_compare);
292
293 /*
294 * Return the pool page header based on page address.
295 */
296 static inline struct pool_page_header *
pr_find_pagehead(struct pool * pp,void * v)297 pr_find_pagehead(struct pool *pp, void *v)
298 {
299 struct pool_page_header *ph, key;
300
301 if (POOL_INPGHDR(pp)) {
302 caddr_t page;
303
304 page = (caddr_t)((vaddr_t)v & pp->pr_pgmask);
305
306 return ((struct pool_page_header *)(page + pp->pr_phoffset));
307 }
308
309 key.ph_page = v;
310 ph = RBT_NFIND(phtree, &pp->pr_phtree, &key);
311 if (ph == NULL)
312 panic("%s: %s: page header missing", __func__, pp->pr_wchan);
313
314 KASSERT(ph->ph_page <= (caddr_t)v);
315 if (ph->ph_page + pp->pr_pgsize <= (caddr_t)v)
316 panic("%s: %s: incorrect page", __func__, pp->pr_wchan);
317
318 return (ph);
319 }
320
321 /*
322 * Initialize the given pool resource structure.
323 *
324 * We export this routine to allow other kernel parts to declare
325 * static pools that must be initialized before malloc() is available.
326 */
327 void
pool_init(struct pool * pp,size_t size,u_int align,int ipl,int flags,const char * wchan,struct pool_allocator * palloc)328 pool_init(struct pool *pp, size_t size, u_int align, int ipl, int flags,
329 const char *wchan, struct pool_allocator *palloc)
330 {
331 int off = 0, space;
332 unsigned int pgsize = PAGE_SIZE, items;
333 size_t pa_pagesz;
334 #ifdef DIAGNOSTIC
335 struct pool *iter;
336 #endif
337
338 if (align == 0)
339 align = ALIGN(1);
340
341 if (size < sizeof(struct pool_item))
342 size = sizeof(struct pool_item);
343
344 size = roundup(size, align);
345
346 while (size * 8 > pgsize)
347 pgsize <<= 1;
348
349 if (palloc == NULL) {
350 if (pgsize > PAGE_SIZE) {
351 palloc = ISSET(flags, PR_WAITOK) ?
352 &pool_allocator_multi_ni : &pool_allocator_multi;
353 } else
354 palloc = &pool_allocator_single;
355
356 pa_pagesz = palloc->pa_pagesz;
357 } else {
358 size_t pgsizes;
359
360 pa_pagesz = palloc->pa_pagesz;
361 if (pa_pagesz == 0)
362 pa_pagesz = POOL_ALLOC_DEFAULT;
363
364 pgsizes = pa_pagesz & ~POOL_ALLOC_ALIGNED;
365
366 /* make sure the allocator can fit at least one item */
367 if (size > pgsizes) {
368 panic("%s: pool %s item size 0x%zx > "
369 "allocator %p sizes 0x%zx", __func__, wchan,
370 size, palloc, pgsizes);
371 }
372
373 /* shrink pgsize until it fits into the range */
374 while (!ISSET(pgsizes, pgsize))
375 pgsize >>= 1;
376 }
377 KASSERT(ISSET(pa_pagesz, pgsize));
378
379 items = pgsize / size;
380
381 /*
382 * Decide whether to put the page header off page to avoid
383 * wasting too large a part of the page. Off-page page headers
384 * go into an RB tree, so we can match a returned item with
385 * its header based on the page address.
386 */
387 if (ISSET(pa_pagesz, POOL_ALLOC_ALIGNED)) {
388 if (pgsize - (size * items) >
389 sizeof(struct pool_page_header)) {
390 off = pgsize - sizeof(struct pool_page_header);
391 } else if (sizeof(struct pool_page_header) * 2 >= size) {
392 off = pgsize - sizeof(struct pool_page_header);
393 items = off / size;
394 }
395 }
396
397 KASSERT(items > 0);
398
399 /*
400 * Initialize the pool structure.
401 */
402 memset(pp, 0, sizeof(*pp));
403 refcnt_init(&pp->pr_refcnt);
404 if (ISSET(flags, PR_RWLOCK)) {
405 KASSERT(flags & PR_WAITOK);
406 pp->pr_lock_ops = &pool_lock_ops_rw;
407 } else
408 pp->pr_lock_ops = &pool_lock_ops_mtx;
409 TAILQ_INIT(&pp->pr_emptypages);
410 TAILQ_INIT(&pp->pr_fullpages);
411 TAILQ_INIT(&pp->pr_partpages);
412 pp->pr_curpage = NULL;
413 pp->pr_npages = 0;
414 pp->pr_minitems = 0;
415 pp->pr_minpages = 0;
416 pp->pr_maxpages = 8;
417 pp->pr_size = size;
418 pp->pr_pgsize = pgsize;
419 pp->pr_pgmask = ~0UL ^ (pgsize - 1);
420 pp->pr_phoffset = off;
421 pp->pr_itemsperpage = items;
422 pp->pr_wchan = wchan;
423 pp->pr_alloc = palloc;
424 pp->pr_nitems = 0;
425 pp->pr_nout = 0;
426 pp->pr_hardlimit = UINT_MAX;
427 pp->pr_hardlimit_warning = NULL;
428 pp->pr_hardlimit_ratecap.tv_sec = 0;
429 pp->pr_hardlimit_ratecap.tv_usec = 0;
430 pp->pr_hardlimit_warning_last.tv_sec = 0;
431 pp->pr_hardlimit_warning_last.tv_usec = 0;
432 RBT_INIT(phtree, &pp->pr_phtree);
433
434 /*
435 * Use the space between the chunks and the page header
436 * for cache coloring.
437 */
438 space = POOL_INPGHDR(pp) ? pp->pr_phoffset : pp->pr_pgsize;
439 space -= pp->pr_itemsperpage * pp->pr_size;
440 pp->pr_align = align;
441 pp->pr_maxcolors = (space / align) + 1;
442
443 pp->pr_nget = 0;
444 pp->pr_nfail = 0;
445 pp->pr_nput = 0;
446 pp->pr_npagealloc = 0;
447 pp->pr_npagefree = 0;
448 pp->pr_hiwat = 0;
449 pp->pr_nidle = 0;
450
451 pp->pr_ipl = ipl;
452 pp->pr_flags = flags;
453
454 pl_init(pp, &pp->pr_lock);
455 pl_init(pp, &pp->pr_requests_lock);
456 TAILQ_INIT(&pp->pr_requests);
457
458 if (phpool.pr_size == 0) {
459 pool_init(&phpool, sizeof(struct pool_page_header), 0,
460 IPL_HIGH, 0, "phpool", NULL);
461
462 /* make sure phpool won't "recurse" */
463 KASSERT(POOL_INPGHDR(&phpool));
464 }
465
466 /* pglistalloc/constraint parameters */
467 pp->pr_crange = &kp_dirty;
468
469 /* Insert this into the list of all pools. */
470 rw_enter_write(&pool_lock);
471 #ifdef DIAGNOSTIC
472 SIMPLEQ_FOREACH(iter, &pool_head, pr_poollist) {
473 if (iter == pp)
474 panic("%s: pool %s already on list", __func__, wchan);
475 }
476 #endif
477
478 pp->pr_serial = ++pool_serial;
479 if (pool_serial == 0)
480 panic("%s: too much uptime", __func__);
481
482 SIMPLEQ_INSERT_HEAD(&pool_head, pp, pr_poollist);
483 pool_count++;
484 rw_exit_write(&pool_lock);
485 }
486
487 /*
488 * Decommission a pool resource.
489 */
490 void
pool_destroy(struct pool * pp)491 pool_destroy(struct pool *pp)
492 {
493 struct pool_page_header *ph;
494 struct pool *prev, *iter;
495
496 #ifdef DIAGNOSTIC
497 if (pp->pr_nout != 0)
498 panic("%s: pool busy: still out: %u", __func__, pp->pr_nout);
499 #endif
500
501 /* Remove from global pool list */
502 rw_enter_write(&pool_lock);
503 pool_count--;
504 if (pp == SIMPLEQ_FIRST(&pool_head))
505 SIMPLEQ_REMOVE_HEAD(&pool_head, pr_poollist);
506 else {
507 prev = SIMPLEQ_FIRST(&pool_head);
508 SIMPLEQ_FOREACH(iter, &pool_head, pr_poollist) {
509 if (iter == pp) {
510 SIMPLEQ_REMOVE_AFTER(&pool_head, prev,
511 pr_poollist);
512 break;
513 }
514 prev = iter;
515 }
516 }
517 rw_exit_write(&pool_lock);
518
519 /* Wait for concurrent sysctl_dopool() */
520 refcnt_finalize(&pp->pr_refcnt, "pooldtor");
521
522 #ifdef MULTIPROCESSOR
523 if (pp->pr_cache != NULL)
524 pool_cache_destroy(pp);
525 #endif
526
527 /* Remove all pages */
528 while ((ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL) {
529 pl_enter(pp, &pp->pr_lock);
530 pool_p_remove(pp, ph);
531 pl_leave(pp, &pp->pr_lock);
532 pool_p_free(pp, ph);
533 }
534 KASSERT(TAILQ_EMPTY(&pp->pr_fullpages));
535 KASSERT(TAILQ_EMPTY(&pp->pr_partpages));
536 }
537
538 void
pool_request_init(struct pool_request * pr,void (* handler)(struct pool *,void *,void *),void * cookie)539 pool_request_init(struct pool_request *pr,
540 void (*handler)(struct pool *, void *, void *), void *cookie)
541 {
542 pr->pr_handler = handler;
543 pr->pr_cookie = cookie;
544 pr->pr_item = NULL;
545 }
546
547 void
pool_request(struct pool * pp,struct pool_request * pr)548 pool_request(struct pool *pp, struct pool_request *pr)
549 {
550 pl_enter(pp, &pp->pr_requests_lock);
551 TAILQ_INSERT_TAIL(&pp->pr_requests, pr, pr_entry);
552 pool_runqueue(pp, PR_NOWAIT);
553 pl_leave(pp, &pp->pr_requests_lock);
554 }
555
556 struct pool_get_memory {
557 union pool_lock lock;
558 void * volatile v;
559 };
560
561 /*
562 * Grab an item from the pool.
563 */
564 void *
pool_get(struct pool * pp,int flags)565 pool_get(struct pool *pp, int flags)
566 {
567 void *v = NULL;
568 int slowdown = 0;
569
570 KASSERT(flags & (PR_WAITOK | PR_NOWAIT));
571 if (pp->pr_flags & PR_RWLOCK)
572 KASSERT(flags & PR_WAITOK);
573
574 #ifdef MULTIPROCESSOR
575 if (pp->pr_cache != NULL) {
576 v = pool_cache_get(pp);
577 if (v != NULL)
578 goto good;
579 }
580 #endif
581
582 pl_enter(pp, &pp->pr_lock);
583 if (pp->pr_nout >= pp->pr_hardlimit) {
584 if (ISSET(flags, PR_NOWAIT|PR_LIMITFAIL))
585 goto fail;
586 } else if ((v = pool_do_get(pp, flags, &slowdown)) == NULL) {
587 if (ISSET(flags, PR_NOWAIT))
588 goto fail;
589 }
590 pl_leave(pp, &pp->pr_lock);
591
592 if ((slowdown || pool_debug == 2) && ISSET(flags, PR_WAITOK))
593 yield();
594
595 if (v == NULL) {
596 struct pool_get_memory mem = { .v = NULL };
597 struct pool_request pr;
598
599 #ifdef DIAGNOSTIC
600 if (ISSET(flags, PR_WAITOK) && curproc == &proc0)
601 panic("%s: cannot sleep for memory during boot",
602 __func__);
603 #endif
604 pl_init(pp, &mem.lock);
605 pool_request_init(&pr, pool_get_done, &mem);
606 pool_request(pp, &pr);
607
608 pl_enter(pp, &mem.lock);
609 while (mem.v == NULL)
610 pl_sleep(pp, &mem, &mem.lock, PSWP, pp->pr_wchan);
611 pl_leave(pp, &mem.lock);
612
613 v = mem.v;
614 }
615
616 #ifdef MULTIPROCESSOR
617 good:
618 #endif
619 if (ISSET(flags, PR_ZERO))
620 memset(v, 0, pp->pr_size);
621
622 TRACEPOINT(uvm, pool_get, pp, v, flags);
623
624 return (v);
625
626 fail:
627 pp->pr_nfail++;
628 pl_leave(pp, &pp->pr_lock);
629 return (NULL);
630 }
631
632 void
pool_get_done(struct pool * pp,void * xmem,void * v)633 pool_get_done(struct pool *pp, void *xmem, void *v)
634 {
635 struct pool_get_memory *mem = xmem;
636
637 pl_enter(pp, &mem->lock);
638 mem->v = v;
639 pl_leave(pp, &mem->lock);
640
641 wakeup_one(mem);
642 }
643
644 void
pool_runqueue(struct pool * pp,int flags)645 pool_runqueue(struct pool *pp, int flags)
646 {
647 struct pool_requests prl = TAILQ_HEAD_INITIALIZER(prl);
648 struct pool_request *pr;
649
650 pl_assert_unlocked(pp, &pp->pr_lock);
651 pl_assert_locked(pp, &pp->pr_requests_lock);
652
653 if (pp->pr_requesting++)
654 return;
655
656 do {
657 pp->pr_requesting = 1;
658
659 TAILQ_CONCAT(&prl, &pp->pr_requests, pr_entry);
660 if (TAILQ_EMPTY(&prl))
661 continue;
662
663 pl_leave(pp, &pp->pr_requests_lock);
664
665 pl_enter(pp, &pp->pr_lock);
666 pr = TAILQ_FIRST(&prl);
667 while (pr != NULL) {
668 int slowdown = 0;
669
670 if (pp->pr_nout >= pp->pr_hardlimit)
671 break;
672
673 pr->pr_item = pool_do_get(pp, flags, &slowdown);
674 if (pr->pr_item == NULL) /* || slowdown ? */
675 break;
676
677 pr = TAILQ_NEXT(pr, pr_entry);
678 }
679 pl_leave(pp, &pp->pr_lock);
680
681 while ((pr = TAILQ_FIRST(&prl)) != NULL &&
682 pr->pr_item != NULL) {
683 TAILQ_REMOVE(&prl, pr, pr_entry);
684 (*pr->pr_handler)(pp, pr->pr_cookie, pr->pr_item);
685 }
686
687 pl_enter(pp, &pp->pr_requests_lock);
688 } while (--pp->pr_requesting);
689
690 TAILQ_CONCAT(&pp->pr_requests, &prl, pr_entry);
691 }
692
693 void *
pool_do_get(struct pool * pp,int flags,int * slowdown)694 pool_do_get(struct pool *pp, int flags, int *slowdown)
695 {
696 struct pool_item *pi;
697 struct pool_page_header *ph;
698
699 pl_assert_locked(pp, &pp->pr_lock);
700
701 splassert(pp->pr_ipl);
702
703 /*
704 * Account for this item now to avoid races if we need to give up
705 * pr_lock to allocate a page.
706 */
707 pp->pr_nout++;
708
709 if (pp->pr_curpage == NULL) {
710 pl_leave(pp, &pp->pr_lock);
711 ph = pool_p_alloc(pp, flags, slowdown);
712 pl_enter(pp, &pp->pr_lock);
713
714 if (ph == NULL) {
715 pp->pr_nout--;
716 return (NULL);
717 }
718
719 pool_p_insert(pp, ph);
720 }
721
722 ph = pp->pr_curpage;
723 pi = XSIMPLEQ_FIRST(&ph->ph_items);
724 if (__predict_false(pi == NULL))
725 panic("%s: %s: page empty", __func__, pp->pr_wchan);
726
727 if (__predict_false(pi->pi_magic != POOL_IMAGIC(ph, pi))) {
728 panic("%s: %s free list modified: "
729 "page %p; item addr %p; offset 0x%x=0x%lx != 0x%lx",
730 __func__, pp->pr_wchan, ph->ph_page, pi,
731 0, pi->pi_magic, POOL_IMAGIC(ph, pi));
732 }
733
734 XSIMPLEQ_REMOVE_HEAD(&ph->ph_items, pi_list);
735
736 #ifdef DIAGNOSTIC
737 if (pool_debug && POOL_PHPOISON(ph)) {
738 size_t pidx;
739 uint32_t pval;
740 if (poison_check(pi + 1, pp->pr_size - sizeof(*pi),
741 &pidx, &pval)) {
742 int *ip = (int *)(pi + 1);
743 panic("%s: %s free list modified: "
744 "page %p; item addr %p; offset 0x%zx=0x%x",
745 __func__, pp->pr_wchan, ph->ph_page, pi,
746 (pidx * sizeof(int)) + sizeof(*pi), ip[pidx]);
747 }
748 }
749 #endif /* DIAGNOSTIC */
750
751 if (ph->ph_nmissing++ == 0) {
752 /*
753 * This page was previously empty. Move it to the list of
754 * partially-full pages. This page is already curpage.
755 */
756 TAILQ_REMOVE(&pp->pr_emptypages, ph, ph_entry);
757 TAILQ_INSERT_TAIL(&pp->pr_partpages, ph, ph_entry);
758
759 pp->pr_nidle--;
760 }
761
762 if (ph->ph_nmissing == pp->pr_itemsperpage) {
763 /*
764 * This page is now full. Move it to the full list
765 * and select a new current page.
766 */
767 TAILQ_REMOVE(&pp->pr_partpages, ph, ph_entry);
768 TAILQ_INSERT_TAIL(&pp->pr_fullpages, ph, ph_entry);
769 pool_update_curpage(pp);
770 }
771
772 pp->pr_nget++;
773
774 return (pi);
775 }
776
777 /*
778 * Return resource to the pool.
779 */
780 void
pool_put(struct pool * pp,void * v)781 pool_put(struct pool *pp, void *v)
782 {
783 struct pool_page_header *ph, *freeph = NULL;
784
785 #ifdef DIAGNOSTIC
786 if (v == NULL)
787 panic("%s: NULL item", __func__);
788 #endif
789
790 TRACEPOINT(uvm, pool_put, pp, v);
791
792 #ifdef MULTIPROCESSOR
793 if (pp->pr_cache != NULL && TAILQ_EMPTY(&pp->pr_requests)) {
794 pool_cache_put(pp, v);
795 return;
796 }
797 #endif
798
799 pl_enter(pp, &pp->pr_lock);
800
801 pool_do_put(pp, v);
802
803 pp->pr_nout--;
804 pp->pr_nput++;
805
806 /* is it time to free a page? */
807 if (pp->pr_nidle > pp->pr_maxpages &&
808 (ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL &&
809 getnsecuptime() - ph->ph_timestamp > POOL_WAIT_FREE) {
810 freeph = ph;
811 pool_p_remove(pp, freeph);
812 }
813
814 pl_leave(pp, &pp->pr_lock);
815
816 if (freeph != NULL)
817 pool_p_free(pp, freeph);
818
819 pool_wakeup(pp);
820 }
821
822 void
pool_wakeup(struct pool * pp)823 pool_wakeup(struct pool *pp)
824 {
825 if (!TAILQ_EMPTY(&pp->pr_requests)) {
826 pl_enter(pp, &pp->pr_requests_lock);
827 pool_runqueue(pp, PR_NOWAIT);
828 pl_leave(pp, &pp->pr_requests_lock);
829 }
830 }
831
832 void
pool_do_put(struct pool * pp,void * v)833 pool_do_put(struct pool *pp, void *v)
834 {
835 struct pool_item *pi = v;
836 struct pool_page_header *ph;
837
838 splassert(pp->pr_ipl);
839
840 ph = pr_find_pagehead(pp, v);
841
842 #ifdef DIAGNOSTIC
843 if (pool_debug) {
844 struct pool_item *qi;
845 XSIMPLEQ_FOREACH(qi, &ph->ph_items, pi_list) {
846 if (pi == qi) {
847 panic("%s: %s: double pool_put: %p", __func__,
848 pp->pr_wchan, pi);
849 }
850 }
851 }
852 #endif /* DIAGNOSTIC */
853
854 pi->pi_magic = POOL_IMAGIC(ph, pi);
855 XSIMPLEQ_INSERT_HEAD(&ph->ph_items, pi, pi_list);
856 #ifdef DIAGNOSTIC
857 if (POOL_PHPOISON(ph))
858 poison_mem(pi + 1, pp->pr_size - sizeof(*pi));
859 #endif /* DIAGNOSTIC */
860
861 if (ph->ph_nmissing-- == pp->pr_itemsperpage) {
862 /*
863 * The page was previously completely full, move it to the
864 * partially-full list.
865 */
866 TAILQ_REMOVE(&pp->pr_fullpages, ph, ph_entry);
867 TAILQ_INSERT_TAIL(&pp->pr_partpages, ph, ph_entry);
868 }
869
870 if (ph->ph_nmissing == 0) {
871 /*
872 * The page is now empty, so move it to the empty page list.
873 */
874 pp->pr_nidle++;
875
876 ph->ph_timestamp = getnsecuptime();
877 TAILQ_REMOVE(&pp->pr_partpages, ph, ph_entry);
878 TAILQ_INSERT_TAIL(&pp->pr_emptypages, ph, ph_entry);
879 pool_update_curpage(pp);
880 }
881 }
882
883 /*
884 * Add N items to the pool.
885 */
886 int
pool_prime(struct pool * pp,int n)887 pool_prime(struct pool *pp, int n)
888 {
889 struct pool_pagelist pl = TAILQ_HEAD_INITIALIZER(pl);
890 struct pool_page_header *ph;
891 int newpages;
892
893 newpages = roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage;
894
895 while (newpages-- > 0) {
896 int slowdown = 0;
897
898 ph = pool_p_alloc(pp, PR_NOWAIT, &slowdown);
899 if (ph == NULL) /* or slowdown? */
900 break;
901
902 TAILQ_INSERT_TAIL(&pl, ph, ph_entry);
903 }
904
905 pl_enter(pp, &pp->pr_lock);
906 while ((ph = TAILQ_FIRST(&pl)) != NULL) {
907 TAILQ_REMOVE(&pl, ph, ph_entry);
908 pool_p_insert(pp, ph);
909 }
910 pl_leave(pp, &pp->pr_lock);
911
912 return (0);
913 }
914
915 struct pool_page_header *
pool_p_alloc(struct pool * pp,int flags,int * slowdown)916 pool_p_alloc(struct pool *pp, int flags, int *slowdown)
917 {
918 struct pool_page_header *ph;
919 struct pool_item *pi;
920 caddr_t addr;
921 unsigned int order;
922 int o;
923 int n;
924
925 pl_assert_unlocked(pp, &pp->pr_lock);
926 KASSERT(pp->pr_size >= sizeof(*pi));
927
928 addr = pool_allocator_alloc(pp, flags, slowdown);
929 if (addr == NULL)
930 return (NULL);
931
932 if (POOL_INPGHDR(pp))
933 ph = (struct pool_page_header *)(addr + pp->pr_phoffset);
934 else {
935 ph = pool_get(&phpool, flags);
936 if (ph == NULL) {
937 pool_allocator_free(pp, addr);
938 return (NULL);
939 }
940 }
941
942 XSIMPLEQ_INIT(&ph->ph_items);
943 ph->ph_page = addr;
944 addr += pp->pr_align * (pp->pr_npagealloc % pp->pr_maxcolors);
945 ph->ph_colored = addr;
946 ph->ph_nmissing = 0;
947 arc4random_buf(&ph->ph_magic, sizeof(ph->ph_magic));
948 #ifdef DIAGNOSTIC
949 /* use a bit in ph_magic to record if we poison page items */
950 if (pool_debug)
951 SET(ph->ph_magic, POOL_MAGICBIT);
952 else
953 CLR(ph->ph_magic, POOL_MAGICBIT);
954 #endif /* DIAGNOSTIC */
955
956 n = pp->pr_itemsperpage;
957 o = 32;
958 while (n--) {
959 pi = (struct pool_item *)addr;
960 pi->pi_magic = POOL_IMAGIC(ph, pi);
961
962 if (o == 32) {
963 order = arc4random();
964 o = 0;
965 }
966 if (ISSET(order, 1U << o++))
967 XSIMPLEQ_INSERT_TAIL(&ph->ph_items, pi, pi_list);
968 else
969 XSIMPLEQ_INSERT_HEAD(&ph->ph_items, pi, pi_list);
970
971 #ifdef DIAGNOSTIC
972 if (POOL_PHPOISON(ph))
973 poison_mem(pi + 1, pp->pr_size - sizeof(*pi));
974 #endif /* DIAGNOSTIC */
975
976 addr += pp->pr_size;
977 }
978
979 return (ph);
980 }
981
982 void
pool_p_free(struct pool * pp,struct pool_page_header * ph)983 pool_p_free(struct pool *pp, struct pool_page_header *ph)
984 {
985 struct pool_item *pi;
986
987 pl_assert_unlocked(pp, &pp->pr_lock);
988 KASSERT(ph->ph_nmissing == 0);
989
990 XSIMPLEQ_FOREACH(pi, &ph->ph_items, pi_list) {
991 if (__predict_false(pi->pi_magic != POOL_IMAGIC(ph, pi))) {
992 panic("%s: %s free list modified: "
993 "page %p; item addr %p; offset 0x%x=0x%lx",
994 __func__, pp->pr_wchan, ph->ph_page, pi,
995 0, pi->pi_magic);
996 }
997
998 #ifdef DIAGNOSTIC
999 if (POOL_PHPOISON(ph)) {
1000 size_t pidx;
1001 uint32_t pval;
1002 if (poison_check(pi + 1, pp->pr_size - sizeof(*pi),
1003 &pidx, &pval)) {
1004 int *ip = (int *)(pi + 1);
1005 panic("%s: %s free list modified: "
1006 "page %p; item addr %p; offset 0x%zx=0x%x",
1007 __func__, pp->pr_wchan, ph->ph_page, pi,
1008 pidx * sizeof(int), ip[pidx]);
1009 }
1010 }
1011 #endif
1012 }
1013
1014 pool_allocator_free(pp, ph->ph_page);
1015
1016 if (!POOL_INPGHDR(pp))
1017 pool_put(&phpool, ph);
1018 }
1019
1020 void
pool_p_insert(struct pool * pp,struct pool_page_header * ph)1021 pool_p_insert(struct pool *pp, struct pool_page_header *ph)
1022 {
1023 pl_assert_locked(pp, &pp->pr_lock);
1024
1025 /* If the pool was depleted, point at the new page */
1026 if (pp->pr_curpage == NULL)
1027 pp->pr_curpage = ph;
1028
1029 TAILQ_INSERT_TAIL(&pp->pr_emptypages, ph, ph_entry);
1030 if (!POOL_INPGHDR(pp))
1031 RBT_INSERT(phtree, &pp->pr_phtree, ph);
1032
1033 pp->pr_nitems += pp->pr_itemsperpage;
1034 pp->pr_nidle++;
1035
1036 pp->pr_npagealloc++;
1037 if (++pp->pr_npages > pp->pr_hiwat)
1038 pp->pr_hiwat = pp->pr_npages;
1039 }
1040
1041 void
pool_p_remove(struct pool * pp,struct pool_page_header * ph)1042 pool_p_remove(struct pool *pp, struct pool_page_header *ph)
1043 {
1044 pl_assert_locked(pp, &pp->pr_lock);
1045
1046 pp->pr_npagefree++;
1047 pp->pr_npages--;
1048 pp->pr_nidle--;
1049 pp->pr_nitems -= pp->pr_itemsperpage;
1050
1051 if (!POOL_INPGHDR(pp))
1052 RBT_REMOVE(phtree, &pp->pr_phtree, ph);
1053 TAILQ_REMOVE(&pp->pr_emptypages, ph, ph_entry);
1054
1055 pool_update_curpage(pp);
1056 }
1057
1058 void
pool_update_curpage(struct pool * pp)1059 pool_update_curpage(struct pool *pp)
1060 {
1061 pp->pr_curpage = TAILQ_LAST(&pp->pr_partpages, pool_pagelist);
1062 if (pp->pr_curpage == NULL) {
1063 pp->pr_curpage = TAILQ_LAST(&pp->pr_emptypages, pool_pagelist);
1064 }
1065 }
1066
1067 void
pool_setlowat(struct pool * pp,int n)1068 pool_setlowat(struct pool *pp, int n)
1069 {
1070 int prime = 0;
1071
1072 pl_enter(pp, &pp->pr_lock);
1073 pp->pr_minitems = n;
1074 pp->pr_minpages = (n == 0)
1075 ? 0
1076 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage;
1077
1078 if (pp->pr_nitems < n)
1079 prime = n - pp->pr_nitems;
1080 pl_leave(pp, &pp->pr_lock);
1081
1082 if (prime > 0)
1083 pool_prime(pp, prime);
1084 }
1085
1086 void
pool_sethiwat(struct pool * pp,int n)1087 pool_sethiwat(struct pool *pp, int n)
1088 {
1089 pp->pr_maxpages = (n == 0)
1090 ? 0
1091 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage;
1092 }
1093
1094 int
pool_sethardlimit(struct pool * pp,u_int n,const char * warnmsg,int ratecap)1095 pool_sethardlimit(struct pool *pp, u_int n, const char *warnmsg, int ratecap)
1096 {
1097 int error = 0;
1098
1099 if (n < pp->pr_nout) {
1100 error = EINVAL;
1101 goto done;
1102 }
1103
1104 pp->pr_hardlimit = n;
1105 pp->pr_hardlimit_warning = warnmsg;
1106 pp->pr_hardlimit_ratecap.tv_sec = ratecap;
1107 pp->pr_hardlimit_warning_last.tv_sec = 0;
1108 pp->pr_hardlimit_warning_last.tv_usec = 0;
1109
1110 done:
1111 return (error);
1112 }
1113
1114 void
pool_set_constraints(struct pool * pp,const struct kmem_pa_mode * mode)1115 pool_set_constraints(struct pool *pp, const struct kmem_pa_mode *mode)
1116 {
1117 pp->pr_crange = mode;
1118 }
1119
1120 /*
1121 * Release all complete pages that have not been used recently.
1122 *
1123 * Returns non-zero if any pages have been reclaimed.
1124 */
1125 int
pool_reclaim(struct pool * pp)1126 pool_reclaim(struct pool *pp)
1127 {
1128 struct pool_page_header *ph, *phnext;
1129 struct pool_pagelist pl = TAILQ_HEAD_INITIALIZER(pl);
1130
1131 pl_enter(pp, &pp->pr_lock);
1132 for (ph = TAILQ_FIRST(&pp->pr_emptypages); ph != NULL; ph = phnext) {
1133 phnext = TAILQ_NEXT(ph, ph_entry);
1134
1135 /* Check our minimum page claim */
1136 if (pp->pr_npages <= pp->pr_minpages)
1137 break;
1138
1139 /*
1140 * If freeing this page would put us below
1141 * the low water mark, stop now.
1142 */
1143 if ((pp->pr_nitems - pp->pr_itemsperpage) <
1144 pp->pr_minitems)
1145 break;
1146
1147 pool_p_remove(pp, ph);
1148 TAILQ_INSERT_TAIL(&pl, ph, ph_entry);
1149 }
1150 pl_leave(pp, &pp->pr_lock);
1151
1152 if (TAILQ_EMPTY(&pl))
1153 return (0);
1154
1155 while ((ph = TAILQ_FIRST(&pl)) != NULL) {
1156 TAILQ_REMOVE(&pl, ph, ph_entry);
1157 pool_p_free(pp, ph);
1158 }
1159
1160 return (1);
1161 }
1162
1163 /*
1164 * Release all complete pages that have not been used recently
1165 * from all pools.
1166 */
1167 void
pool_reclaim_all(void)1168 pool_reclaim_all(void)
1169 {
1170 struct pool *pp;
1171
1172 rw_enter_read(&pool_lock);
1173 SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist)
1174 pool_reclaim(pp);
1175 rw_exit_read(&pool_lock);
1176 }
1177
1178 #ifdef DDB
1179 #include <machine/db_machdep.h>
1180 #include <ddb/db_output.h>
1181
1182 /*
1183 * Diagnostic helpers.
1184 */
1185 void
pool_printit(struct pool * pp,const char * modif,int (* pr)(const char *,...))1186 pool_printit(struct pool *pp, const char *modif,
1187 int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2))))
1188 {
1189 pool_print1(pp, modif, pr);
1190 }
1191
1192 void
pool_print_pagelist(struct pool_pagelist * pl,int (* pr)(const char *,...))1193 pool_print_pagelist(struct pool_pagelist *pl,
1194 int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2))))
1195 {
1196 struct pool_page_header *ph;
1197 struct pool_item *pi;
1198
1199 TAILQ_FOREACH(ph, pl, ph_entry) {
1200 (*pr)("\t\tpage %p, color %p, nmissing %d\n",
1201 ph->ph_page, ph->ph_colored, ph->ph_nmissing);
1202 XSIMPLEQ_FOREACH(pi, &ph->ph_items, pi_list) {
1203 if (pi->pi_magic != POOL_IMAGIC(ph, pi)) {
1204 (*pr)("\t\t\titem %p, magic 0x%lx\n",
1205 pi, pi->pi_magic);
1206 }
1207 }
1208 }
1209 }
1210
1211 void
pool_print1(struct pool * pp,const char * modif,int (* pr)(const char *,...))1212 pool_print1(struct pool *pp, const char *modif,
1213 int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2))))
1214 {
1215 struct pool_page_header *ph;
1216 int print_pagelist = 0;
1217 char c;
1218
1219 while ((c = *modif++) != '\0') {
1220 if (c == 'p')
1221 print_pagelist = 1;
1222 modif++;
1223 }
1224
1225 (*pr)("POOL %s: size %u maxcolors %u\n", pp->pr_wchan, pp->pr_size,
1226 pp->pr_maxcolors);
1227 (*pr)("\talloc %p\n", pp->pr_alloc);
1228 (*pr)("\tminitems %u, minpages %u, maxpages %u, npages %u\n",
1229 pp->pr_minitems, pp->pr_minpages, pp->pr_maxpages, pp->pr_npages);
1230 (*pr)("\titemsperpage %u, nitems %u, nout %u, hardlimit %u\n",
1231 pp->pr_itemsperpage, pp->pr_nitems, pp->pr_nout, pp->pr_hardlimit);
1232
1233 (*pr)("\n\tnget %lu, nfail %lu, nput %lu\n",
1234 pp->pr_nget, pp->pr_nfail, pp->pr_nput);
1235 (*pr)("\tnpagealloc %lu, npagefree %lu, hiwat %u, nidle %lu\n",
1236 pp->pr_npagealloc, pp->pr_npagefree, pp->pr_hiwat, pp->pr_nidle);
1237
1238 if (print_pagelist == 0)
1239 return;
1240
1241 if ((ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL)
1242 (*pr)("\n\tempty page list:\n");
1243 pool_print_pagelist(&pp->pr_emptypages, pr);
1244 if ((ph = TAILQ_FIRST(&pp->pr_fullpages)) != NULL)
1245 (*pr)("\n\tfull page list:\n");
1246 pool_print_pagelist(&pp->pr_fullpages, pr);
1247 if ((ph = TAILQ_FIRST(&pp->pr_partpages)) != NULL)
1248 (*pr)("\n\tpartial-page list:\n");
1249 pool_print_pagelist(&pp->pr_partpages, pr);
1250
1251 if (pp->pr_curpage == NULL)
1252 (*pr)("\tno current page\n");
1253 else
1254 (*pr)("\tcurpage %p\n", pp->pr_curpage->ph_page);
1255 }
1256
1257 void
db_show_all_pools(db_expr_t expr,int haddr,db_expr_t count,char * modif)1258 db_show_all_pools(db_expr_t expr, int haddr, db_expr_t count, char *modif)
1259 {
1260 struct pool *pp;
1261 char maxp[16];
1262 int ovflw;
1263 char mode;
1264
1265 mode = modif[0];
1266 if (mode != '\0' && mode != 'a') {
1267 db_printf("usage: show all pools [/a]\n");
1268 return;
1269 }
1270
1271 if (mode == '\0')
1272 db_printf("%-10s%4s%9s%5s%9s%6s%6s%6s%6s%6s%6s%5s\n",
1273 "Name",
1274 "Size",
1275 "Requests",
1276 "Fail",
1277 "Releases",
1278 "Pgreq",
1279 "Pgrel",
1280 "Npage",
1281 "Hiwat",
1282 "Minpg",
1283 "Maxpg",
1284 "Idle");
1285 else
1286 db_printf("%-12s %18s %18s\n",
1287 "Name", "Address", "Allocator");
1288
1289 SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) {
1290 if (mode == 'a') {
1291 db_printf("%-12s %18p %18p\n", pp->pr_wchan, pp,
1292 pp->pr_alloc);
1293 continue;
1294 }
1295
1296 if (!pp->pr_nget)
1297 continue;
1298
1299 if (pp->pr_maxpages == UINT_MAX)
1300 snprintf(maxp, sizeof maxp, "inf");
1301 else
1302 snprintf(maxp, sizeof maxp, "%u", pp->pr_maxpages);
1303
1304 #define PRWORD(ovflw, fmt, width, fixed, val) do { \
1305 (ovflw) += db_printf((fmt), \
1306 (width) - (fixed) - (ovflw) > 0 ? \
1307 (width) - (fixed) - (ovflw) : 0, \
1308 (val)) - (width); \
1309 if ((ovflw) < 0) \
1310 (ovflw) = 0; \
1311 } while (/* CONSTCOND */0)
1312
1313 ovflw = 0;
1314 PRWORD(ovflw, "%-*s", 10, 0, pp->pr_wchan);
1315 PRWORD(ovflw, " %*u", 4, 1, pp->pr_size);
1316 PRWORD(ovflw, " %*lu", 9, 1, pp->pr_nget);
1317 PRWORD(ovflw, " %*lu", 5, 1, pp->pr_nfail);
1318 PRWORD(ovflw, " %*lu", 9, 1, pp->pr_nput);
1319 PRWORD(ovflw, " %*lu", 6, 1, pp->pr_npagealloc);
1320 PRWORD(ovflw, " %*lu", 6, 1, pp->pr_npagefree);
1321 PRWORD(ovflw, " %*d", 6, 1, pp->pr_npages);
1322 PRWORD(ovflw, " %*d", 6, 1, pp->pr_hiwat);
1323 PRWORD(ovflw, " %*d", 6, 1, pp->pr_minpages);
1324 PRWORD(ovflw, " %*s", 6, 1, maxp);
1325 PRWORD(ovflw, " %*lu\n", 5, 1, pp->pr_nidle);
1326
1327 pool_chk(pp);
1328 }
1329 }
1330 #endif /* DDB */
1331
1332 #if defined(POOL_DEBUG) || defined(DDB)
1333 int
pool_chk_page(struct pool * pp,struct pool_page_header * ph,int expected)1334 pool_chk_page(struct pool *pp, struct pool_page_header *ph, int expected)
1335 {
1336 struct pool_item *pi;
1337 caddr_t page;
1338 int n;
1339 const char *label = pp->pr_wchan;
1340
1341 page = (caddr_t)((u_long)ph & pp->pr_pgmask);
1342 if (page != ph->ph_page && POOL_INPGHDR(pp)) {
1343 printf("%s: ", label);
1344 printf("pool(%p:%s): page inconsistency: page %p; "
1345 "at page head addr %p (p %p)\n",
1346 pp, pp->pr_wchan, ph->ph_page, ph, page);
1347 return 1;
1348 }
1349
1350 for (pi = XSIMPLEQ_FIRST(&ph->ph_items), n = 0;
1351 pi != NULL;
1352 pi = XSIMPLEQ_NEXT(&ph->ph_items, pi, pi_list), n++) {
1353 if ((caddr_t)pi < ph->ph_page ||
1354 (caddr_t)pi >= ph->ph_page + pp->pr_pgsize) {
1355 printf("%s: ", label);
1356 printf("pool(%p:%s): page inconsistency: page %p;"
1357 " item ordinal %d; addr %p\n", pp,
1358 pp->pr_wchan, ph->ph_page, n, pi);
1359 return (1);
1360 }
1361
1362 if (pi->pi_magic != POOL_IMAGIC(ph, pi)) {
1363 printf("%s: ", label);
1364 printf("pool(%p:%s): free list modified: "
1365 "page %p; item ordinal %d; addr %p "
1366 "(p %p); offset 0x%x=0x%lx\n",
1367 pp, pp->pr_wchan, ph->ph_page, n, pi, page,
1368 0, pi->pi_magic);
1369 }
1370
1371 #ifdef DIAGNOSTIC
1372 if (POOL_PHPOISON(ph)) {
1373 size_t pidx;
1374 uint32_t pval;
1375 if (poison_check(pi + 1, pp->pr_size - sizeof(*pi),
1376 &pidx, &pval)) {
1377 int *ip = (int *)(pi + 1);
1378 printf("pool(%s): free list modified: "
1379 "page %p; item ordinal %d; addr %p "
1380 "(p %p); offset 0x%zx=0x%x\n",
1381 pp->pr_wchan, ph->ph_page, n, pi,
1382 page, pidx * sizeof(int), ip[pidx]);
1383 }
1384 }
1385 #endif /* DIAGNOSTIC */
1386 }
1387 if (n + ph->ph_nmissing != pp->pr_itemsperpage) {
1388 printf("pool(%p:%s): page inconsistency: page %p;"
1389 " %d on list, %d missing, %d items per page\n", pp,
1390 pp->pr_wchan, ph->ph_page, n, ph->ph_nmissing,
1391 pp->pr_itemsperpage);
1392 return 1;
1393 }
1394 if (expected >= 0 && n != expected) {
1395 printf("pool(%p:%s): page inconsistency: page %p;"
1396 " %d on list, %d missing, %d expected\n", pp,
1397 pp->pr_wchan, ph->ph_page, n, ph->ph_nmissing,
1398 expected);
1399 return 1;
1400 }
1401 return 0;
1402 }
1403
1404 int
pool_chk(struct pool * pp)1405 pool_chk(struct pool *pp)
1406 {
1407 struct pool_page_header *ph;
1408 int r = 0;
1409
1410 TAILQ_FOREACH(ph, &pp->pr_emptypages, ph_entry)
1411 r += pool_chk_page(pp, ph, pp->pr_itemsperpage);
1412 TAILQ_FOREACH(ph, &pp->pr_fullpages, ph_entry)
1413 r += pool_chk_page(pp, ph, 0);
1414 TAILQ_FOREACH(ph, &pp->pr_partpages, ph_entry)
1415 r += pool_chk_page(pp, ph, -1);
1416
1417 return (r);
1418 }
1419 #endif /* defined(POOL_DEBUG) || defined(DDB) */
1420
1421 #ifdef DDB
1422 void
pool_walk(struct pool * pp,int full,int (* pr)(const char *,...),void (* func)(void *,int,int (*)(const char *,...)))1423 pool_walk(struct pool *pp, int full,
1424 int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2))),
1425 void (*func)(void *, int, int (*)(const char *, ...)
1426 __attribute__((__format__(__kprintf__,1,2)))))
1427 {
1428 struct pool_page_header *ph;
1429 struct pool_item *pi;
1430 caddr_t cp;
1431 int n;
1432
1433 TAILQ_FOREACH(ph, &pp->pr_fullpages, ph_entry) {
1434 cp = ph->ph_colored;
1435 n = ph->ph_nmissing;
1436
1437 while (n--) {
1438 func(cp, full, pr);
1439 cp += pp->pr_size;
1440 }
1441 }
1442
1443 TAILQ_FOREACH(ph, &pp->pr_partpages, ph_entry) {
1444 cp = ph->ph_colored;
1445 n = ph->ph_nmissing;
1446
1447 do {
1448 XSIMPLEQ_FOREACH(pi, &ph->ph_items, pi_list) {
1449 if (cp == (caddr_t)pi)
1450 break;
1451 }
1452 if (cp != (caddr_t)pi) {
1453 func(cp, full, pr);
1454 n--;
1455 }
1456
1457 cp += pp->pr_size;
1458 } while (n > 0);
1459 }
1460 }
1461 #endif
1462
1463 /*
1464 * We have three different sysctls.
1465 * kern.pool.npools - the number of pools.
1466 * kern.pool.pool.<pool#> - the pool struct for the pool#.
1467 * kern.pool.name.<pool#> - the name for pool#.
1468 */
1469 int
sysctl_dopool(int * name,u_int namelen,char * oldp,size_t * oldlenp)1470 sysctl_dopool(int *name, u_int namelen, char *oldp, size_t *oldlenp)
1471 {
1472 struct kinfo_pool pi;
1473 struct pool *pp;
1474 int rv = EOPNOTSUPP;
1475
1476 switch (name[0]) {
1477 case KERN_POOL_NPOOLS:
1478 if (namelen != 1)
1479 return (ENOTDIR);
1480 return (sysctl_rdint(oldp, oldlenp, NULL, pool_count));
1481
1482 case KERN_POOL_NAME:
1483 case KERN_POOL_POOL:
1484 case KERN_POOL_CACHE:
1485 case KERN_POOL_CACHE_CPUS:
1486 break;
1487 default:
1488 return (EOPNOTSUPP);
1489 }
1490
1491 if (namelen != 2)
1492 return (ENOTDIR);
1493
1494 rw_enter_read(&pool_lock);
1495 SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) {
1496 if (name[1] == pp->pr_serial) {
1497 refcnt_take(&pp->pr_refcnt);
1498 break;
1499 }
1500 }
1501 rw_exit_read(&pool_lock);
1502
1503 if (pp == NULL)
1504 return (ENOENT);
1505
1506 switch (name[0]) {
1507 case KERN_POOL_NAME:
1508 rv = sysctl_rdstring(oldp, oldlenp, NULL, pp->pr_wchan);
1509 break;
1510 case KERN_POOL_POOL:
1511 memset(&pi, 0, sizeof(pi));
1512
1513 pl_enter(pp, &pp->pr_lock);
1514 pi.pr_size = pp->pr_size;
1515 pi.pr_pgsize = pp->pr_pgsize;
1516 pi.pr_itemsperpage = pp->pr_itemsperpage;
1517 pi.pr_npages = pp->pr_npages;
1518 pi.pr_minpages = pp->pr_minpages;
1519 pi.pr_maxpages = pp->pr_maxpages;
1520 pi.pr_hardlimit = pp->pr_hardlimit;
1521 pi.pr_nout = pp->pr_nout;
1522 pi.pr_nitems = pp->pr_nitems;
1523 pi.pr_nget = pp->pr_nget;
1524 pi.pr_nput = pp->pr_nput;
1525 pi.pr_nfail = pp->pr_nfail;
1526 pi.pr_npagealloc = pp->pr_npagealloc;
1527 pi.pr_npagefree = pp->pr_npagefree;
1528 pi.pr_hiwat = pp->pr_hiwat;
1529 pi.pr_nidle = pp->pr_nidle;
1530 pl_leave(pp, &pp->pr_lock);
1531
1532 pool_cache_pool_info(pp, &pi);
1533
1534 rv = sysctl_rdstruct(oldp, oldlenp, NULL, &pi, sizeof(pi));
1535 break;
1536
1537 case KERN_POOL_CACHE:
1538 rv = pool_cache_info(pp, oldp, oldlenp);
1539 break;
1540
1541 case KERN_POOL_CACHE_CPUS:
1542 rv = pool_cache_cpus_info(pp, oldp, oldlenp);
1543 break;
1544 }
1545
1546 refcnt_rele_wake(&pp->pr_refcnt);
1547
1548 return (rv);
1549 }
1550
1551 void
pool_gc_sched(void * null)1552 pool_gc_sched(void *null)
1553 {
1554 task_add(systqmp, &pool_gc_task);
1555 }
1556
1557 void
pool_gc_pages(void * null)1558 pool_gc_pages(void *null)
1559 {
1560 struct pool *pp;
1561 struct pool_page_header *ph, *freeph;
1562 int s;
1563
1564 rw_enter_read(&pool_lock);
1565 s = splvm(); /* XXX go to splvm until all pools _setipl properly */
1566 SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) {
1567 #ifdef MULTIPROCESSOR
1568 if (pp->pr_cache != NULL)
1569 pool_cache_gc(pp);
1570 #endif
1571
1572 if (pp->pr_nidle <= pp->pr_minpages || /* guess */
1573 !pl_enter_try(pp, &pp->pr_lock)) /* try */
1574 continue;
1575
1576 /* is it time to free a page? */
1577 if (pp->pr_nidle > pp->pr_minpages &&
1578 (ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL &&
1579 getnsecuptime() - ph->ph_timestamp > POOL_WAIT_GC) {
1580 freeph = ph;
1581 pool_p_remove(pp, freeph);
1582 } else
1583 freeph = NULL;
1584
1585 pl_leave(pp, &pp->pr_lock);
1586
1587 if (freeph != NULL)
1588 pool_p_free(pp, freeph);
1589 }
1590 splx(s);
1591 rw_exit_read(&pool_lock);
1592
1593 timeout_add_sec(&pool_gc_tick, 1);
1594 }
1595
1596 /*
1597 * Pool backend allocators.
1598 */
1599
1600 void *
pool_allocator_alloc(struct pool * pp,int flags,int * slowdown)1601 pool_allocator_alloc(struct pool *pp, int flags, int *slowdown)
1602 {
1603 void *v;
1604
1605 v = (*pp->pr_alloc->pa_alloc)(pp, flags, slowdown);
1606
1607 #ifdef DIAGNOSTIC
1608 if (v != NULL && POOL_INPGHDR(pp)) {
1609 vaddr_t addr = (vaddr_t)v;
1610 if ((addr & pp->pr_pgmask) != addr) {
1611 panic("%s: %s page address %p isn't aligned to %u",
1612 __func__, pp->pr_wchan, v, pp->pr_pgsize);
1613 }
1614 }
1615 #endif
1616
1617 return (v);
1618 }
1619
1620 void
pool_allocator_free(struct pool * pp,void * v)1621 pool_allocator_free(struct pool *pp, void *v)
1622 {
1623 struct pool_allocator *pa = pp->pr_alloc;
1624
1625 (*pa->pa_free)(pp, v);
1626 }
1627
1628 void *
pool_page_alloc(struct pool * pp,int flags,int * slowdown)1629 pool_page_alloc(struct pool *pp, int flags, int *slowdown)
1630 {
1631 struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER;
1632
1633 kd.kd_waitok = ISSET(flags, PR_WAITOK);
1634 kd.kd_slowdown = slowdown;
1635
1636 return (km_alloc(pp->pr_pgsize, &kv_page, pp->pr_crange, &kd));
1637 }
1638
1639 void
pool_page_free(struct pool * pp,void * v)1640 pool_page_free(struct pool *pp, void *v)
1641 {
1642 km_free(v, pp->pr_pgsize, &kv_page, pp->pr_crange);
1643 }
1644
1645 void *
pool_multi_alloc(struct pool * pp,int flags,int * slowdown)1646 pool_multi_alloc(struct pool *pp, int flags, int *slowdown)
1647 {
1648 struct kmem_va_mode kv = kv_intrsafe;
1649 struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER;
1650 void *v;
1651 int s;
1652
1653 if (POOL_INPGHDR(pp))
1654 kv.kv_align = pp->pr_pgsize;
1655
1656 kd.kd_waitok = ISSET(flags, PR_WAITOK);
1657 kd.kd_slowdown = slowdown;
1658
1659 s = splvm();
1660 v = km_alloc(pp->pr_pgsize, &kv, pp->pr_crange, &kd);
1661 splx(s);
1662
1663 return (v);
1664 }
1665
1666 void
pool_multi_free(struct pool * pp,void * v)1667 pool_multi_free(struct pool *pp, void *v)
1668 {
1669 struct kmem_va_mode kv = kv_intrsafe;
1670 int s;
1671
1672 if (POOL_INPGHDR(pp))
1673 kv.kv_align = pp->pr_pgsize;
1674
1675 s = splvm();
1676 km_free(v, pp->pr_pgsize, &kv, pp->pr_crange);
1677 splx(s);
1678 }
1679
1680 void *
pool_multi_alloc_ni(struct pool * pp,int flags,int * slowdown)1681 pool_multi_alloc_ni(struct pool *pp, int flags, int *slowdown)
1682 {
1683 struct kmem_va_mode kv = kv_any;
1684 struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER;
1685 void *v;
1686
1687 if (POOL_INPGHDR(pp))
1688 kv.kv_align = pp->pr_pgsize;
1689
1690 kd.kd_waitok = ISSET(flags, PR_WAITOK);
1691 kd.kd_slowdown = slowdown;
1692
1693 KERNEL_LOCK();
1694 v = km_alloc(pp->pr_pgsize, &kv, pp->pr_crange, &kd);
1695 KERNEL_UNLOCK();
1696
1697 return (v);
1698 }
1699
1700 void
pool_multi_free_ni(struct pool * pp,void * v)1701 pool_multi_free_ni(struct pool *pp, void *v)
1702 {
1703 struct kmem_va_mode kv = kv_any;
1704
1705 if (POOL_INPGHDR(pp))
1706 kv.kv_align = pp->pr_pgsize;
1707
1708 KERNEL_LOCK();
1709 km_free(v, pp->pr_pgsize, &kv, pp->pr_crange);
1710 KERNEL_UNLOCK();
1711 }
1712
1713 #ifdef MULTIPROCESSOR
1714
1715 struct pool pool_caches; /* per cpu cache entries */
1716
1717 void
pool_cache_init(struct pool * pp)1718 pool_cache_init(struct pool *pp)
1719 {
1720 struct cpumem *cm;
1721 struct pool_cache *pc;
1722 struct cpumem_iter i;
1723
1724 if (pool_caches.pr_size == 0) {
1725 pool_init(&pool_caches, sizeof(struct pool_cache),
1726 CACHELINESIZE, IPL_NONE, PR_WAITOK | PR_RWLOCK,
1727 "plcache", NULL);
1728 }
1729
1730 /* must be able to use the pool items as cache list items */
1731 KASSERT(pp->pr_size >= sizeof(struct pool_cache_item));
1732
1733 cm = cpumem_get(&pool_caches);
1734
1735 pl_init(pp, &pp->pr_cache_lock);
1736 arc4random_buf(pp->pr_cache_magic, sizeof(pp->pr_cache_magic));
1737 TAILQ_INIT(&pp->pr_cache_lists);
1738 pp->pr_cache_nitems = 0;
1739 pp->pr_cache_timestamp = getnsecuptime();
1740 pp->pr_cache_items = 8;
1741 pp->pr_cache_contention = 0;
1742 pp->pr_cache_ngc = 0;
1743
1744 CPUMEM_FOREACH(pc, &i, cm) {
1745 pc->pc_actv = NULL;
1746 pc->pc_nactv = 0;
1747 pc->pc_prev = NULL;
1748
1749 pc->pc_nget = 0;
1750 pc->pc_nfail = 0;
1751 pc->pc_nput = 0;
1752 pc->pc_nlget = 0;
1753 pc->pc_nlfail = 0;
1754 pc->pc_nlput = 0;
1755 pc->pc_nout = 0;
1756 }
1757
1758 membar_producer();
1759
1760 pp->pr_cache = cm;
1761 }
1762
1763 static inline void
pool_cache_item_magic(struct pool * pp,struct pool_cache_item * ci)1764 pool_cache_item_magic(struct pool *pp, struct pool_cache_item *ci)
1765 {
1766 unsigned long *entry = (unsigned long *)&ci->ci_nextl;
1767
1768 entry[0] = pp->pr_cache_magic[0] ^ (u_long)ci;
1769 entry[1] = pp->pr_cache_magic[1] ^ (u_long)ci->ci_next;
1770 }
1771
1772 static inline void
pool_cache_item_magic_check(struct pool * pp,struct pool_cache_item * ci)1773 pool_cache_item_magic_check(struct pool *pp, struct pool_cache_item *ci)
1774 {
1775 unsigned long *entry;
1776 unsigned long val;
1777
1778 entry = (unsigned long *)&ci->ci_nextl;
1779 val = pp->pr_cache_magic[0] ^ (u_long)ci;
1780 if (*entry != val)
1781 goto fail;
1782
1783 entry++;
1784 val = pp->pr_cache_magic[1] ^ (u_long)ci->ci_next;
1785 if (*entry != val)
1786 goto fail;
1787
1788 return;
1789
1790 fail:
1791 panic("%s: %s cpu free list modified: item addr %p+%zu 0x%lx!=0x%lx",
1792 __func__, pp->pr_wchan, ci, (caddr_t)entry - (caddr_t)ci,
1793 *entry, val);
1794 }
1795
1796 static inline void
pool_list_enter(struct pool * pp)1797 pool_list_enter(struct pool *pp)
1798 {
1799 if (pl_enter_try(pp, &pp->pr_cache_lock) == 0) {
1800 pl_enter(pp, &pp->pr_cache_lock);
1801 pp->pr_cache_contention++;
1802 }
1803 }
1804
1805 static inline void
pool_list_leave(struct pool * pp)1806 pool_list_leave(struct pool *pp)
1807 {
1808 pl_leave(pp, &pp->pr_cache_lock);
1809 }
1810
1811 static inline struct pool_cache_item *
pool_cache_list_alloc(struct pool * pp,struct pool_cache * pc)1812 pool_cache_list_alloc(struct pool *pp, struct pool_cache *pc)
1813 {
1814 struct pool_cache_item *pl;
1815
1816 pool_list_enter(pp);
1817 pl = TAILQ_FIRST(&pp->pr_cache_lists);
1818 if (pl != NULL) {
1819 TAILQ_REMOVE(&pp->pr_cache_lists, pl, ci_nextl);
1820 pp->pr_cache_nitems -= POOL_CACHE_ITEM_NITEMS(pl);
1821
1822 pool_cache_item_magic(pp, pl);
1823
1824 pc->pc_nlget++;
1825 } else
1826 pc->pc_nlfail++;
1827
1828 /* fold this cpus nout into the global while we have the lock */
1829 pp->pr_cache_nout += pc->pc_nout;
1830 pc->pc_nout = 0;
1831 pool_list_leave(pp);
1832
1833 return (pl);
1834 }
1835
1836 static inline void
pool_cache_list_free(struct pool * pp,struct pool_cache * pc,struct pool_cache_item * ci)1837 pool_cache_list_free(struct pool *pp, struct pool_cache *pc,
1838 struct pool_cache_item *ci)
1839 {
1840 pool_list_enter(pp);
1841 if (TAILQ_EMPTY(&pp->pr_cache_lists))
1842 pp->pr_cache_timestamp = getnsecuptime();
1843
1844 pp->pr_cache_nitems += POOL_CACHE_ITEM_NITEMS(ci);
1845 TAILQ_INSERT_TAIL(&pp->pr_cache_lists, ci, ci_nextl);
1846
1847 pc->pc_nlput++;
1848
1849 /* fold this cpus nout into the global while we have the lock */
1850 pp->pr_cache_nout += pc->pc_nout;
1851 pc->pc_nout = 0;
1852 pool_list_leave(pp);
1853 }
1854
1855 static inline struct pool_cache *
pool_cache_enter(struct pool * pp,int * s)1856 pool_cache_enter(struct pool *pp, int *s)
1857 {
1858 struct pool_cache *pc;
1859
1860 pc = cpumem_enter(pp->pr_cache);
1861 *s = splraise(pp->pr_ipl);
1862 pc->pc_gen++;
1863
1864 return (pc);
1865 }
1866
1867 static inline void
pool_cache_leave(struct pool * pp,struct pool_cache * pc,int s)1868 pool_cache_leave(struct pool *pp, struct pool_cache *pc, int s)
1869 {
1870 pc->pc_gen++;
1871 splx(s);
1872 cpumem_leave(pp->pr_cache, pc);
1873 }
1874
1875 void *
pool_cache_get(struct pool * pp)1876 pool_cache_get(struct pool *pp)
1877 {
1878 struct pool_cache *pc;
1879 struct pool_cache_item *ci;
1880 int s;
1881
1882 pc = pool_cache_enter(pp, &s);
1883
1884 if (pc->pc_actv != NULL) {
1885 ci = pc->pc_actv;
1886 } else if (pc->pc_prev != NULL) {
1887 ci = pc->pc_prev;
1888 pc->pc_prev = NULL;
1889 } else if ((ci = pool_cache_list_alloc(pp, pc)) == NULL) {
1890 pc->pc_nfail++;
1891 goto done;
1892 }
1893
1894 pool_cache_item_magic_check(pp, ci);
1895 #ifdef DIAGNOSTIC
1896 if (pool_debug && POOL_CACHE_ITEM_POISONED(ci)) {
1897 size_t pidx;
1898 uint32_t pval;
1899
1900 if (poison_check(ci + 1, pp->pr_size - sizeof(*ci),
1901 &pidx, &pval)) {
1902 int *ip = (int *)(ci + 1);
1903 ip += pidx;
1904
1905 panic("%s: %s cpu free list modified: "
1906 "item addr %p+%zu 0x%x!=0x%x",
1907 __func__, pp->pr_wchan, ci,
1908 (caddr_t)ip - (caddr_t)ci, *ip, pval);
1909 }
1910 }
1911 #endif
1912
1913 pc->pc_actv = ci->ci_next;
1914 pc->pc_nactv = POOL_CACHE_ITEM_NITEMS(ci) - 1;
1915 pc->pc_nget++;
1916 pc->pc_nout++;
1917
1918 done:
1919 pool_cache_leave(pp, pc, s);
1920
1921 return (ci);
1922 }
1923
1924 void
pool_cache_put(struct pool * pp,void * v)1925 pool_cache_put(struct pool *pp, void *v)
1926 {
1927 struct pool_cache *pc;
1928 struct pool_cache_item *ci = v;
1929 unsigned long nitems;
1930 int s;
1931 #ifdef DIAGNOSTIC
1932 int poison = pool_debug && pp->pr_size > sizeof(*ci);
1933
1934 if (poison)
1935 poison_mem(ci + 1, pp->pr_size - sizeof(*ci));
1936 #endif
1937
1938 pc = pool_cache_enter(pp, &s);
1939
1940 nitems = pc->pc_nactv;
1941 if (nitems >= pp->pr_cache_items) {
1942 if (pc->pc_prev != NULL)
1943 pool_cache_list_free(pp, pc, pc->pc_prev);
1944
1945 pc->pc_prev = pc->pc_actv;
1946
1947 pc->pc_actv = NULL;
1948 pc->pc_nactv = 0;
1949 nitems = 0;
1950 }
1951
1952 ci->ci_next = pc->pc_actv;
1953 ci->ci_nitems = ++nitems;
1954 #ifdef DIAGNOSTIC
1955 ci->ci_nitems |= poison ? POOL_CACHE_ITEM_NITEMS_POISON : 0;
1956 #endif
1957 pool_cache_item_magic(pp, ci);
1958
1959 pc->pc_actv = ci;
1960 pc->pc_nactv = nitems;
1961
1962 pc->pc_nput++;
1963 pc->pc_nout--;
1964
1965 pool_cache_leave(pp, pc, s);
1966 }
1967
1968 struct pool_cache_item *
pool_cache_list_put(struct pool * pp,struct pool_cache_item * pl)1969 pool_cache_list_put(struct pool *pp, struct pool_cache_item *pl)
1970 {
1971 struct pool_cache_item *rpl, *next;
1972
1973 if (pl == NULL)
1974 return (NULL);
1975
1976 rpl = TAILQ_NEXT(pl, ci_nextl);
1977
1978 pl_enter(pp, &pp->pr_lock);
1979 do {
1980 next = pl->ci_next;
1981 pool_do_put(pp, pl);
1982 pl = next;
1983 } while (pl != NULL);
1984 pl_leave(pp, &pp->pr_lock);
1985
1986 return (rpl);
1987 }
1988
1989 void
pool_cache_destroy(struct pool * pp)1990 pool_cache_destroy(struct pool *pp)
1991 {
1992 struct pool_cache *pc;
1993 struct pool_cache_item *pl;
1994 struct cpumem_iter i;
1995 struct cpumem *cm;
1996
1997 rw_enter_write(&pool_lock); /* serialise with the gc */
1998 cm = pp->pr_cache;
1999 pp->pr_cache = NULL; /* make pool_put avoid the cache */
2000 rw_exit_write(&pool_lock);
2001
2002 CPUMEM_FOREACH(pc, &i, cm) {
2003 pool_cache_list_put(pp, pc->pc_actv);
2004 pool_cache_list_put(pp, pc->pc_prev);
2005 }
2006
2007 cpumem_put(&pool_caches, cm);
2008
2009 pl = TAILQ_FIRST(&pp->pr_cache_lists);
2010 while (pl != NULL)
2011 pl = pool_cache_list_put(pp, pl);
2012 }
2013
2014 void
pool_cache_gc(struct pool * pp)2015 pool_cache_gc(struct pool *pp)
2016 {
2017 unsigned int contention, delta;
2018
2019 if (getnsecuptime() - pp->pr_cache_timestamp > POOL_WAIT_GC &&
2020 !TAILQ_EMPTY(&pp->pr_cache_lists) &&
2021 pl_enter_try(pp, &pp->pr_cache_lock)) {
2022 struct pool_cache_item *pl = NULL;
2023
2024 pl = TAILQ_FIRST(&pp->pr_cache_lists);
2025 if (pl != NULL) {
2026 TAILQ_REMOVE(&pp->pr_cache_lists, pl, ci_nextl);
2027 pp->pr_cache_nitems -= POOL_CACHE_ITEM_NITEMS(pl);
2028 pp->pr_cache_timestamp = getnsecuptime();
2029
2030 pp->pr_cache_ngc++;
2031 }
2032
2033 pl_leave(pp, &pp->pr_cache_lock);
2034
2035 pool_cache_list_put(pp, pl);
2036 }
2037
2038 /*
2039 * if there's a lot of contention on the pr_cache_mtx then consider
2040 * growing the length of the list to reduce the need to access the
2041 * global pool.
2042 */
2043
2044 contention = pp->pr_cache_contention;
2045 delta = contention - pp->pr_cache_contention_prev;
2046 if (delta > 8 /* magic */) {
2047 if ((ncpusfound * 8 * 2) <= pp->pr_cache_nitems)
2048 pp->pr_cache_items += 8;
2049 } else if (delta == 0) {
2050 if (pp->pr_cache_items > 8)
2051 pp->pr_cache_items--;
2052 }
2053 pp->pr_cache_contention_prev = contention;
2054 }
2055
2056 void
pool_cache_pool_info(struct pool * pp,struct kinfo_pool * pi)2057 pool_cache_pool_info(struct pool *pp, struct kinfo_pool *pi)
2058 {
2059 struct pool_cache *pc;
2060 struct cpumem_iter i;
2061
2062 if (pp->pr_cache == NULL)
2063 return;
2064
2065 /* loop through the caches twice to collect stats */
2066
2067 /* once without the lock so we can yield while reading nget/nput */
2068 CPUMEM_FOREACH(pc, &i, pp->pr_cache) {
2069 uint64_t gen, nget, nput;
2070
2071 do {
2072 while ((gen = pc->pc_gen) & 1)
2073 yield();
2074
2075 nget = pc->pc_nget;
2076 nput = pc->pc_nput;
2077 } while (gen != pc->pc_gen);
2078
2079 pi->pr_nget += nget;
2080 pi->pr_nput += nput;
2081 }
2082
2083 /* and once with the mtx so we can get consistent nout values */
2084 pl_enter(pp, &pp->pr_cache_lock);
2085 CPUMEM_FOREACH(pc, &i, pp->pr_cache)
2086 pi->pr_nout += pc->pc_nout;
2087
2088 pi->pr_nout += pp->pr_cache_nout;
2089 pl_leave(pp, &pp->pr_cache_lock);
2090 }
2091
2092 int
pool_cache_info(struct pool * pp,void * oldp,size_t * oldlenp)2093 pool_cache_info(struct pool *pp, void *oldp, size_t *oldlenp)
2094 {
2095 struct kinfo_pool_cache kpc;
2096
2097 if (pp->pr_cache == NULL)
2098 return (EOPNOTSUPP);
2099
2100 memset(&kpc, 0, sizeof(kpc)); /* don't leak padding */
2101
2102 pl_enter(pp, &pp->pr_cache_lock);
2103 kpc.pr_ngc = pp->pr_cache_ngc;
2104 kpc.pr_len = pp->pr_cache_items;
2105 kpc.pr_nitems = pp->pr_cache_nitems;
2106 kpc.pr_contention = pp->pr_cache_contention;
2107 pl_leave(pp, &pp->pr_cache_lock);
2108
2109 return (sysctl_rdstruct(oldp, oldlenp, NULL, &kpc, sizeof(kpc)));
2110 }
2111
2112 int
pool_cache_cpus_info(struct pool * pp,void * oldp,size_t * oldlenp)2113 pool_cache_cpus_info(struct pool *pp, void *oldp, size_t *oldlenp)
2114 {
2115 struct pool_cache *pc;
2116 struct kinfo_pool_cache_cpu *kpcc, *info;
2117 unsigned int cpu = 0;
2118 struct cpumem_iter i;
2119 int error = 0;
2120 size_t len;
2121
2122 if (pp->pr_cache == NULL)
2123 return (EOPNOTSUPP);
2124 if (*oldlenp % sizeof(*kpcc))
2125 return (EINVAL);
2126
2127 kpcc = mallocarray(ncpusfound, sizeof(*kpcc), M_TEMP,
2128 M_WAITOK|M_CANFAIL|M_ZERO);
2129 if (kpcc == NULL)
2130 return (EIO);
2131
2132 len = ncpusfound * sizeof(*kpcc);
2133
2134 CPUMEM_FOREACH(pc, &i, pp->pr_cache) {
2135 uint64_t gen;
2136
2137 if (cpu >= ncpusfound) {
2138 error = EIO;
2139 goto err;
2140 }
2141
2142 info = &kpcc[cpu];
2143 info->pr_cpu = cpu;
2144
2145 do {
2146 while ((gen = pc->pc_gen) & 1)
2147 yield();
2148
2149 info->pr_nget = pc->pc_nget;
2150 info->pr_nfail = pc->pc_nfail;
2151 info->pr_nput = pc->pc_nput;
2152 info->pr_nlget = pc->pc_nlget;
2153 info->pr_nlfail = pc->pc_nlfail;
2154 info->pr_nlput = pc->pc_nlput;
2155 } while (gen != pc->pc_gen);
2156
2157 cpu++;
2158 }
2159
2160 error = sysctl_rdstruct(oldp, oldlenp, NULL, kpcc, len);
2161 err:
2162 free(kpcc, M_TEMP, len);
2163
2164 return (error);
2165 }
2166 #else /* MULTIPROCESSOR */
2167 void
pool_cache_init(struct pool * pp)2168 pool_cache_init(struct pool *pp)
2169 {
2170 /* nop */
2171 }
2172
2173 void
pool_cache_pool_info(struct pool * pp,struct kinfo_pool * pi)2174 pool_cache_pool_info(struct pool *pp, struct kinfo_pool *pi)
2175 {
2176 /* nop */
2177 }
2178
2179 int
pool_cache_info(struct pool * pp,void * oldp,size_t * oldlenp)2180 pool_cache_info(struct pool *pp, void *oldp, size_t *oldlenp)
2181 {
2182 return (EOPNOTSUPP);
2183 }
2184
2185 int
pool_cache_cpus_info(struct pool * pp,void * oldp,size_t * oldlenp)2186 pool_cache_cpus_info(struct pool *pp, void *oldp, size_t *oldlenp)
2187 {
2188 return (EOPNOTSUPP);
2189 }
2190 #endif /* MULTIPROCESSOR */
2191
2192
2193 void
pool_lock_mtx_init(struct pool * pp,union pool_lock * lock,const struct lock_type * type)2194 pool_lock_mtx_init(struct pool *pp, union pool_lock *lock,
2195 const struct lock_type *type)
2196 {
2197 _mtx_init_flags(&lock->prl_mtx, pp->pr_ipl, pp->pr_wchan, 0, type);
2198 }
2199
2200 void
pool_lock_mtx_enter(union pool_lock * lock)2201 pool_lock_mtx_enter(union pool_lock *lock)
2202 {
2203 mtx_enter(&lock->prl_mtx);
2204 }
2205
2206 int
pool_lock_mtx_enter_try(union pool_lock * lock)2207 pool_lock_mtx_enter_try(union pool_lock *lock)
2208 {
2209 return (mtx_enter_try(&lock->prl_mtx));
2210 }
2211
2212 void
pool_lock_mtx_leave(union pool_lock * lock)2213 pool_lock_mtx_leave(union pool_lock *lock)
2214 {
2215 mtx_leave(&lock->prl_mtx);
2216 }
2217
2218 void
pool_lock_mtx_assert_locked(union pool_lock * lock)2219 pool_lock_mtx_assert_locked(union pool_lock *lock)
2220 {
2221 MUTEX_ASSERT_LOCKED(&lock->prl_mtx);
2222 }
2223
2224 void
pool_lock_mtx_assert_unlocked(union pool_lock * lock)2225 pool_lock_mtx_assert_unlocked(union pool_lock *lock)
2226 {
2227 MUTEX_ASSERT_UNLOCKED(&lock->prl_mtx);
2228 }
2229
2230 int
pool_lock_mtx_sleep(void * ident,union pool_lock * lock,int priority,const char * wmesg)2231 pool_lock_mtx_sleep(void *ident, union pool_lock *lock, int priority,
2232 const char *wmesg)
2233 {
2234 return msleep_nsec(ident, &lock->prl_mtx, priority, wmesg, INFSLP);
2235 }
2236
2237 static const struct pool_lock_ops pool_lock_ops_mtx = {
2238 pool_lock_mtx_init,
2239 pool_lock_mtx_enter,
2240 pool_lock_mtx_enter_try,
2241 pool_lock_mtx_leave,
2242 pool_lock_mtx_assert_locked,
2243 pool_lock_mtx_assert_unlocked,
2244 pool_lock_mtx_sleep,
2245 };
2246
2247 void
pool_lock_rw_init(struct pool * pp,union pool_lock * lock,const struct lock_type * type)2248 pool_lock_rw_init(struct pool *pp, union pool_lock *lock,
2249 const struct lock_type *type)
2250 {
2251 _rw_init_flags(&lock->prl_rwlock, pp->pr_wchan, 0, type);
2252 }
2253
2254 void
pool_lock_rw_enter(union pool_lock * lock)2255 pool_lock_rw_enter(union pool_lock *lock)
2256 {
2257 rw_enter_write(&lock->prl_rwlock);
2258 }
2259
2260 int
pool_lock_rw_enter_try(union pool_lock * lock)2261 pool_lock_rw_enter_try(union pool_lock *lock)
2262 {
2263 return (rw_enter(&lock->prl_rwlock, RW_WRITE | RW_NOSLEEP) == 0);
2264 }
2265
2266 void
pool_lock_rw_leave(union pool_lock * lock)2267 pool_lock_rw_leave(union pool_lock *lock)
2268 {
2269 rw_exit_write(&lock->prl_rwlock);
2270 }
2271
2272 void
pool_lock_rw_assert_locked(union pool_lock * lock)2273 pool_lock_rw_assert_locked(union pool_lock *lock)
2274 {
2275 rw_assert_wrlock(&lock->prl_rwlock);
2276 }
2277
2278 void
pool_lock_rw_assert_unlocked(union pool_lock * lock)2279 pool_lock_rw_assert_unlocked(union pool_lock *lock)
2280 {
2281 KASSERT(rw_status(&lock->prl_rwlock) != RW_WRITE);
2282 }
2283
2284 int
pool_lock_rw_sleep(void * ident,union pool_lock * lock,int priority,const char * wmesg)2285 pool_lock_rw_sleep(void *ident, union pool_lock *lock, int priority,
2286 const char *wmesg)
2287 {
2288 return rwsleep_nsec(ident, &lock->prl_rwlock, priority, wmesg, INFSLP);
2289 }
2290
2291 static const struct pool_lock_ops pool_lock_ops_rw = {
2292 pool_lock_rw_init,
2293 pool_lock_rw_enter,
2294 pool_lock_rw_enter_try,
2295 pool_lock_rw_leave,
2296 pool_lock_rw_assert_locked,
2297 pool_lock_rw_assert_unlocked,
2298 pool_lock_rw_sleep,
2299 };
2300