1 /* $OpenBSD: subr_pool.c,v 1.236 2022/08/14 01:58:28 jsg Exp $ */
2 /* $NetBSD: subr_pool.c,v 1.61 2001/09/26 07:14:56 chs Exp $ */
3
4 /*-
5 * Copyright (c) 1997, 1999, 2000 The NetBSD Foundation, Inc.
6 * All rights reserved.
7 *
8 * This code is derived from software contributed to The NetBSD Foundation
9 * by Paul Kranenburg; by Jason R. Thorpe of the Numerical Aerospace
10 * Simulation Facility, NASA Ames Research Center.
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
23 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
24 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
25 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 * POSSIBILITY OF SUCH DAMAGE.
32 */
33
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/errno.h>
37 #include <sys/malloc.h>
38 #include <sys/pool.h>
39 #include <sys/proc.h>
40 #include <sys/sysctl.h>
41 #include <sys/task.h>
42 #include <sys/time.h>
43 #include <sys/timeout.h>
44 #include <sys/percpu.h>
45 #include <sys/tracepoint.h>
46
47 #include <uvm/uvm_extern.h>
48
49 /*
50 * Pool resource management utility.
51 *
52 * Memory is allocated in pages which are split into pieces according to
53 * the pool item size. Each page is kept on one of three lists in the
54 * pool structure: `pr_emptypages', `pr_fullpages' and `pr_partpages',
55 * for empty, full and partially-full pages respectively. The individual
56 * pool items are on a linked list headed by `ph_items' in each page
57 * header. The memory for building the page list is either taken from
58 * the allocated pages themselves (for small pool items) or taken from
59 * an internal pool of page headers (`phpool').
60 */
61
62 /* List of all pools */
63 SIMPLEQ_HEAD(,pool) pool_head = SIMPLEQ_HEAD_INITIALIZER(pool_head);
64
65 /*
66 * Every pool gets a unique serial number assigned to it. If this counter
67 * wraps, we're screwed, but we shouldn't create so many pools anyway.
68 */
69 unsigned int pool_serial;
70 unsigned int pool_count;
71
72 /* Lock the previous variables making up the global pool state */
73 struct rwlock pool_lock = RWLOCK_INITIALIZER("pools");
74
75 /* Private pool for page header structures */
76 struct pool phpool;
77
78 struct pool_lock_ops {
79 void (*pl_init)(struct pool *, union pool_lock *,
80 const struct lock_type *);
81 void (*pl_enter)(union pool_lock *);
82 int (*pl_enter_try)(union pool_lock *);
83 void (*pl_leave)(union pool_lock *);
84 void (*pl_assert_locked)(union pool_lock *);
85 void (*pl_assert_unlocked)(union pool_lock *);
86 int (*pl_sleep)(void *, union pool_lock *, int, const char *);
87 };
88
89 static const struct pool_lock_ops pool_lock_ops_mtx;
90 static const struct pool_lock_ops pool_lock_ops_rw;
91
92 #ifdef WITNESS
93 #define pl_init(pp, pl) do { \
94 static const struct lock_type __lock_type = { .lt_name = #pl }; \
95 (pp)->pr_lock_ops->pl_init(pp, pl, &__lock_type); \
96 } while (0)
97 #else /* WITNESS */
98 #define pl_init(pp, pl) (pp)->pr_lock_ops->pl_init(pp, pl, NULL)
99 #endif /* WITNESS */
100
101 static inline void
pl_enter(struct pool * pp,union pool_lock * pl)102 pl_enter(struct pool *pp, union pool_lock *pl)
103 {
104 pp->pr_lock_ops->pl_enter(pl);
105 }
106 static inline int
pl_enter_try(struct pool * pp,union pool_lock * pl)107 pl_enter_try(struct pool *pp, union pool_lock *pl)
108 {
109 return pp->pr_lock_ops->pl_enter_try(pl);
110 }
111 static inline void
pl_leave(struct pool * pp,union pool_lock * pl)112 pl_leave(struct pool *pp, union pool_lock *pl)
113 {
114 pp->pr_lock_ops->pl_leave(pl);
115 }
116 static inline void
pl_assert_locked(struct pool * pp,union pool_lock * pl)117 pl_assert_locked(struct pool *pp, union pool_lock *pl)
118 {
119 pp->pr_lock_ops->pl_assert_locked(pl);
120 }
121 static inline void
pl_assert_unlocked(struct pool * pp,union pool_lock * pl)122 pl_assert_unlocked(struct pool *pp, union pool_lock *pl)
123 {
124 pp->pr_lock_ops->pl_assert_unlocked(pl);
125 }
126 static inline int
pl_sleep(struct pool * pp,void * ident,union pool_lock * lock,int priority,const char * wmesg)127 pl_sleep(struct pool *pp, void *ident, union pool_lock *lock, int priority,
128 const char *wmesg)
129 {
130 return pp->pr_lock_ops->pl_sleep(ident, lock, priority, wmesg);
131 }
132
133 struct pool_item {
134 u_long pi_magic;
135 XSIMPLEQ_ENTRY(pool_item) pi_list;
136 };
137 #define POOL_IMAGIC(ph, pi) ((u_long)(pi) ^ (ph)->ph_magic)
138
139 struct pool_page_header {
140 /* Page headers */
141 TAILQ_ENTRY(pool_page_header)
142 ph_entry; /* pool page list */
143 XSIMPLEQ_HEAD(, pool_item)
144 ph_items; /* free items on the page */
145 RBT_ENTRY(pool_page_header)
146 ph_node; /* off-page page headers */
147 unsigned int ph_nmissing; /* # of chunks in use */
148 caddr_t ph_page; /* this page's address */
149 caddr_t ph_colored; /* page's colored address */
150 unsigned long ph_magic;
151 uint64_t ph_timestamp;
152 };
153 #define POOL_MAGICBIT (1 << 3) /* keep away from perturbed low bits */
154 #define POOL_PHPOISON(ph) ISSET((ph)->ph_magic, POOL_MAGICBIT)
155
156 #ifdef MULTIPROCESSOR
157 struct pool_cache_item {
158 struct pool_cache_item *ci_next; /* next item in list */
159 unsigned long ci_nitems; /* number of items in list */
160 TAILQ_ENTRY(pool_cache_item)
161 ci_nextl; /* entry in list of lists */
162 };
163
164 /* we store whether the cached item is poisoned in the high bit of nitems */
165 #define POOL_CACHE_ITEM_NITEMS_MASK 0x7ffffffUL
166 #define POOL_CACHE_ITEM_NITEMS_POISON 0x8000000UL
167
168 #define POOL_CACHE_ITEM_NITEMS(_ci) \
169 ((_ci)->ci_nitems & POOL_CACHE_ITEM_NITEMS_MASK)
170
171 #define POOL_CACHE_ITEM_POISONED(_ci) \
172 ISSET((_ci)->ci_nitems, POOL_CACHE_ITEM_NITEMS_POISON)
173
174 struct pool_cache {
175 struct pool_cache_item *pc_actv; /* active list of items */
176 unsigned long pc_nactv; /* actv head nitems cache */
177 struct pool_cache_item *pc_prev; /* previous list of items */
178
179 uint64_t pc_gen; /* generation number */
180 uint64_t pc_nget; /* # of successful requests */
181 uint64_t pc_nfail; /* # of unsuccessful reqs */
182 uint64_t pc_nput; /* # of releases */
183 uint64_t pc_nlget; /* # of list requests */
184 uint64_t pc_nlfail; /* # of fails getting a list */
185 uint64_t pc_nlput; /* # of list releases */
186
187 int pc_nout;
188 };
189
190 void *pool_cache_get(struct pool *);
191 void pool_cache_put(struct pool *, void *);
192 void pool_cache_destroy(struct pool *);
193 void pool_cache_gc(struct pool *);
194 #endif
195 void pool_cache_pool_info(struct pool *, struct kinfo_pool *);
196 int pool_cache_info(struct pool *, void *, size_t *);
197 int pool_cache_cpus_info(struct pool *, void *, size_t *);
198
199 #ifdef POOL_DEBUG
200 int pool_debug = 1;
201 #else
202 int pool_debug = 0;
203 #endif
204
205 #define POOL_INPGHDR(pp) ((pp)->pr_phoffset != 0)
206
207 struct pool_page_header *
208 pool_p_alloc(struct pool *, int, int *);
209 void pool_p_insert(struct pool *, struct pool_page_header *);
210 void pool_p_remove(struct pool *, struct pool_page_header *);
211 void pool_p_free(struct pool *, struct pool_page_header *);
212
213 void pool_update_curpage(struct pool *);
214 void *pool_do_get(struct pool *, int, int *);
215 void pool_do_put(struct pool *, void *);
216 int pool_chk_page(struct pool *, struct pool_page_header *, int);
217 int pool_chk(struct pool *);
218 void pool_get_done(struct pool *, void *, void *);
219 void pool_runqueue(struct pool *, int);
220
221 void *pool_allocator_alloc(struct pool *, int, int *);
222 void pool_allocator_free(struct pool *, void *);
223
224 /*
225 * The default pool allocator.
226 */
227 void *pool_page_alloc(struct pool *, int, int *);
228 void pool_page_free(struct pool *, void *);
229
230 /*
231 * safe for interrupts; this is the default allocator
232 */
233 struct pool_allocator pool_allocator_single = {
234 pool_page_alloc,
235 pool_page_free,
236 POOL_ALLOC_SIZE(PAGE_SIZE, POOL_ALLOC_ALIGNED)
237 };
238
239 void *pool_multi_alloc(struct pool *, int, int *);
240 void pool_multi_free(struct pool *, void *);
241
242 struct pool_allocator pool_allocator_multi = {
243 pool_multi_alloc,
244 pool_multi_free,
245 POOL_ALLOC_SIZES(PAGE_SIZE, (1UL << 31), POOL_ALLOC_ALIGNED)
246 };
247
248 void *pool_multi_alloc_ni(struct pool *, int, int *);
249 void pool_multi_free_ni(struct pool *, void *);
250
251 struct pool_allocator pool_allocator_multi_ni = {
252 pool_multi_alloc_ni,
253 pool_multi_free_ni,
254 POOL_ALLOC_SIZES(PAGE_SIZE, (1UL << 31), POOL_ALLOC_ALIGNED)
255 };
256
257 #ifdef DDB
258 void pool_print_pagelist(struct pool_pagelist *, int (*)(const char *, ...)
259 __attribute__((__format__(__kprintf__,1,2))));
260 void pool_print1(struct pool *, const char *, int (*)(const char *, ...)
261 __attribute__((__format__(__kprintf__,1,2))));
262 #endif
263
264 /* stale page garbage collectors */
265 void pool_gc_sched(void *);
266 struct timeout pool_gc_tick = TIMEOUT_INITIALIZER(pool_gc_sched, NULL);
267 void pool_gc_pages(void *);
268 struct task pool_gc_task = TASK_INITIALIZER(pool_gc_pages, NULL);
269
270 #define POOL_WAIT_FREE SEC_TO_NSEC(1)
271 #define POOL_WAIT_GC SEC_TO_NSEC(8)
272
273 RBT_PROTOTYPE(phtree, pool_page_header, ph_node, phtree_compare);
274
275 static inline int
phtree_compare(const struct pool_page_header * a,const struct pool_page_header * b)276 phtree_compare(const struct pool_page_header *a,
277 const struct pool_page_header *b)
278 {
279 vaddr_t va = (vaddr_t)a->ph_page;
280 vaddr_t vb = (vaddr_t)b->ph_page;
281
282 /* the compares in this order are important for the NFIND to work */
283 if (vb < va)
284 return (-1);
285 if (vb > va)
286 return (1);
287
288 return (0);
289 }
290
291 RBT_GENERATE(phtree, pool_page_header, ph_node, phtree_compare);
292
293 /*
294 * Return the pool page header based on page address.
295 */
296 static inline struct pool_page_header *
pr_find_pagehead(struct pool * pp,void * v)297 pr_find_pagehead(struct pool *pp, void *v)
298 {
299 struct pool_page_header *ph, key;
300
301 if (POOL_INPGHDR(pp)) {
302 caddr_t page;
303
304 page = (caddr_t)((vaddr_t)v & pp->pr_pgmask);
305
306 return ((struct pool_page_header *)(page + pp->pr_phoffset));
307 }
308
309 key.ph_page = v;
310 ph = RBT_NFIND(phtree, &pp->pr_phtree, &key);
311 if (ph == NULL)
312 panic("%s: %s: page header missing", __func__, pp->pr_wchan);
313
314 KASSERT(ph->ph_page <= (caddr_t)v);
315 if (ph->ph_page + pp->pr_pgsize <= (caddr_t)v)
316 panic("%s: %s: incorrect page", __func__, pp->pr_wchan);
317
318 return (ph);
319 }
320
321 /*
322 * Initialize the given pool resource structure.
323 *
324 * We export this routine to allow other kernel parts to declare
325 * static pools that must be initialized before malloc() is available.
326 */
327 void
pool_init(struct pool * pp,size_t size,u_int align,int ipl,int flags,const char * wchan,struct pool_allocator * palloc)328 pool_init(struct pool *pp, size_t size, u_int align, int ipl, int flags,
329 const char *wchan, struct pool_allocator *palloc)
330 {
331 int off = 0, space;
332 unsigned int pgsize = PAGE_SIZE, items;
333 size_t pa_pagesz;
334 #ifdef DIAGNOSTIC
335 struct pool *iter;
336 #endif
337
338 if (align == 0)
339 align = ALIGN(1);
340
341 if (size < sizeof(struct pool_item))
342 size = sizeof(struct pool_item);
343
344 size = roundup(size, align);
345
346 while (size * 8 > pgsize)
347 pgsize <<= 1;
348
349 if (palloc == NULL) {
350 if (pgsize > PAGE_SIZE) {
351 palloc = ISSET(flags, PR_WAITOK) ?
352 &pool_allocator_multi_ni : &pool_allocator_multi;
353 } else
354 palloc = &pool_allocator_single;
355
356 pa_pagesz = palloc->pa_pagesz;
357 } else {
358 size_t pgsizes;
359
360 pa_pagesz = palloc->pa_pagesz;
361 if (pa_pagesz == 0)
362 pa_pagesz = POOL_ALLOC_DEFAULT;
363
364 pgsizes = pa_pagesz & ~POOL_ALLOC_ALIGNED;
365
366 /* make sure the allocator can fit at least one item */
367 if (size > pgsizes) {
368 panic("%s: pool %s item size 0x%zx > "
369 "allocator %p sizes 0x%zx", __func__, wchan,
370 size, palloc, pgsizes);
371 }
372
373 /* shrink pgsize until it fits into the range */
374 while (!ISSET(pgsizes, pgsize))
375 pgsize >>= 1;
376 }
377 KASSERT(ISSET(pa_pagesz, pgsize));
378
379 items = pgsize / size;
380
381 /*
382 * Decide whether to put the page header off page to avoid
383 * wasting too large a part of the page. Off-page page headers
384 * go into an RB tree, so we can match a returned item with
385 * its header based on the page address.
386 */
387 if (ISSET(pa_pagesz, POOL_ALLOC_ALIGNED)) {
388 if (pgsize - (size * items) >
389 sizeof(struct pool_page_header)) {
390 off = pgsize - sizeof(struct pool_page_header);
391 } else if (sizeof(struct pool_page_header) * 2 >= size) {
392 off = pgsize - sizeof(struct pool_page_header);
393 items = off / size;
394 }
395 }
396
397 KASSERT(items > 0);
398
399 /*
400 * Initialize the pool structure.
401 */
402 memset(pp, 0, sizeof(*pp));
403 if (ISSET(flags, PR_RWLOCK)) {
404 KASSERT(flags & PR_WAITOK);
405 pp->pr_lock_ops = &pool_lock_ops_rw;
406 } else
407 pp->pr_lock_ops = &pool_lock_ops_mtx;
408 TAILQ_INIT(&pp->pr_emptypages);
409 TAILQ_INIT(&pp->pr_fullpages);
410 TAILQ_INIT(&pp->pr_partpages);
411 pp->pr_curpage = NULL;
412 pp->pr_npages = 0;
413 pp->pr_minitems = 0;
414 pp->pr_minpages = 0;
415 pp->pr_maxpages = 8;
416 pp->pr_size = size;
417 pp->pr_pgsize = pgsize;
418 pp->pr_pgmask = ~0UL ^ (pgsize - 1);
419 pp->pr_phoffset = off;
420 pp->pr_itemsperpage = items;
421 pp->pr_wchan = wchan;
422 pp->pr_alloc = palloc;
423 pp->pr_nitems = 0;
424 pp->pr_nout = 0;
425 pp->pr_hardlimit = UINT_MAX;
426 pp->pr_hardlimit_warning = NULL;
427 pp->pr_hardlimit_ratecap.tv_sec = 0;
428 pp->pr_hardlimit_ratecap.tv_usec = 0;
429 pp->pr_hardlimit_warning_last.tv_sec = 0;
430 pp->pr_hardlimit_warning_last.tv_usec = 0;
431 RBT_INIT(phtree, &pp->pr_phtree);
432
433 /*
434 * Use the space between the chunks and the page header
435 * for cache coloring.
436 */
437 space = POOL_INPGHDR(pp) ? pp->pr_phoffset : pp->pr_pgsize;
438 space -= pp->pr_itemsperpage * pp->pr_size;
439 pp->pr_align = align;
440 pp->pr_maxcolors = (space / align) + 1;
441
442 pp->pr_nget = 0;
443 pp->pr_nfail = 0;
444 pp->pr_nput = 0;
445 pp->pr_npagealloc = 0;
446 pp->pr_npagefree = 0;
447 pp->pr_hiwat = 0;
448 pp->pr_nidle = 0;
449
450 pp->pr_ipl = ipl;
451 pp->pr_flags = flags;
452
453 pl_init(pp, &pp->pr_lock);
454 pl_init(pp, &pp->pr_requests_lock);
455 TAILQ_INIT(&pp->pr_requests);
456
457 if (phpool.pr_size == 0) {
458 pool_init(&phpool, sizeof(struct pool_page_header), 0,
459 IPL_HIGH, 0, "phpool", NULL);
460
461 /* make sure phpool won't "recurse" */
462 KASSERT(POOL_INPGHDR(&phpool));
463 }
464
465 /* pglistalloc/constraint parameters */
466 pp->pr_crange = &kp_dirty;
467
468 /* Insert this into the list of all pools. */
469 rw_enter_write(&pool_lock);
470 #ifdef DIAGNOSTIC
471 SIMPLEQ_FOREACH(iter, &pool_head, pr_poollist) {
472 if (iter == pp)
473 panic("%s: pool %s already on list", __func__, wchan);
474 }
475 #endif
476
477 pp->pr_serial = ++pool_serial;
478 if (pool_serial == 0)
479 panic("%s: too much uptime", __func__);
480
481 SIMPLEQ_INSERT_HEAD(&pool_head, pp, pr_poollist);
482 pool_count++;
483 rw_exit_write(&pool_lock);
484 }
485
486 /*
487 * Decommission a pool resource.
488 */
489 void
pool_destroy(struct pool * pp)490 pool_destroy(struct pool *pp)
491 {
492 struct pool_page_header *ph;
493 struct pool *prev, *iter;
494
495 #ifdef MULTIPROCESSOR
496 if (pp->pr_cache != NULL)
497 pool_cache_destroy(pp);
498 #endif
499
500 #ifdef DIAGNOSTIC
501 if (pp->pr_nout != 0)
502 panic("%s: pool busy: still out: %u", __func__, pp->pr_nout);
503 #endif
504
505 /* Remove from global pool list */
506 rw_enter_write(&pool_lock);
507 pool_count--;
508 if (pp == SIMPLEQ_FIRST(&pool_head))
509 SIMPLEQ_REMOVE_HEAD(&pool_head, pr_poollist);
510 else {
511 prev = SIMPLEQ_FIRST(&pool_head);
512 SIMPLEQ_FOREACH(iter, &pool_head, pr_poollist) {
513 if (iter == pp) {
514 SIMPLEQ_REMOVE_AFTER(&pool_head, prev,
515 pr_poollist);
516 break;
517 }
518 prev = iter;
519 }
520 }
521 rw_exit_write(&pool_lock);
522
523 /* Remove all pages */
524 while ((ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL) {
525 pl_enter(pp, &pp->pr_lock);
526 pool_p_remove(pp, ph);
527 pl_leave(pp, &pp->pr_lock);
528 pool_p_free(pp, ph);
529 }
530 KASSERT(TAILQ_EMPTY(&pp->pr_fullpages));
531 KASSERT(TAILQ_EMPTY(&pp->pr_partpages));
532 }
533
534 void
pool_request_init(struct pool_request * pr,void (* handler)(struct pool *,void *,void *),void * cookie)535 pool_request_init(struct pool_request *pr,
536 void (*handler)(struct pool *, void *, void *), void *cookie)
537 {
538 pr->pr_handler = handler;
539 pr->pr_cookie = cookie;
540 pr->pr_item = NULL;
541 }
542
543 void
pool_request(struct pool * pp,struct pool_request * pr)544 pool_request(struct pool *pp, struct pool_request *pr)
545 {
546 pl_enter(pp, &pp->pr_requests_lock);
547 TAILQ_INSERT_TAIL(&pp->pr_requests, pr, pr_entry);
548 pool_runqueue(pp, PR_NOWAIT);
549 pl_leave(pp, &pp->pr_requests_lock);
550 }
551
552 struct pool_get_memory {
553 union pool_lock lock;
554 void * volatile v;
555 };
556
557 /*
558 * Grab an item from the pool.
559 */
560 void *
pool_get(struct pool * pp,int flags)561 pool_get(struct pool *pp, int flags)
562 {
563 void *v = NULL;
564 int slowdown = 0;
565
566 KASSERT(flags & (PR_WAITOK | PR_NOWAIT));
567 if (pp->pr_flags & PR_RWLOCK)
568 KASSERT(flags & PR_WAITOK);
569
570 #ifdef MULTIPROCESSOR
571 if (pp->pr_cache != NULL) {
572 v = pool_cache_get(pp);
573 if (v != NULL)
574 goto good;
575 }
576 #endif
577
578 pl_enter(pp, &pp->pr_lock);
579 if (pp->pr_nout >= pp->pr_hardlimit) {
580 if (ISSET(flags, PR_NOWAIT|PR_LIMITFAIL))
581 goto fail;
582 } else if ((v = pool_do_get(pp, flags, &slowdown)) == NULL) {
583 if (ISSET(flags, PR_NOWAIT))
584 goto fail;
585 }
586 pl_leave(pp, &pp->pr_lock);
587
588 if ((slowdown || pool_debug == 2) && ISSET(flags, PR_WAITOK))
589 yield();
590
591 if (v == NULL) {
592 struct pool_get_memory mem = { .v = NULL };
593 struct pool_request pr;
594
595 #ifdef DIAGNOSTIC
596 if (ISSET(flags, PR_WAITOK) && curproc == &proc0)
597 panic("%s: cannot sleep for memory during boot",
598 __func__);
599 #endif
600 pl_init(pp, &mem.lock);
601 pool_request_init(&pr, pool_get_done, &mem);
602 pool_request(pp, &pr);
603
604 pl_enter(pp, &mem.lock);
605 while (mem.v == NULL)
606 pl_sleep(pp, &mem, &mem.lock, PSWP, pp->pr_wchan);
607 pl_leave(pp, &mem.lock);
608
609 v = mem.v;
610 }
611
612 #ifdef MULTIPROCESSOR
613 good:
614 #endif
615 if (ISSET(flags, PR_ZERO))
616 memset(v, 0, pp->pr_size);
617
618 TRACEPOINT(uvm, pool_get, pp, v, flags);
619
620 return (v);
621
622 fail:
623 pp->pr_nfail++;
624 pl_leave(pp, &pp->pr_lock);
625 return (NULL);
626 }
627
628 void
pool_get_done(struct pool * pp,void * xmem,void * v)629 pool_get_done(struct pool *pp, void *xmem, void *v)
630 {
631 struct pool_get_memory *mem = xmem;
632
633 pl_enter(pp, &mem->lock);
634 mem->v = v;
635 pl_leave(pp, &mem->lock);
636
637 wakeup_one(mem);
638 }
639
640 void
pool_runqueue(struct pool * pp,int flags)641 pool_runqueue(struct pool *pp, int flags)
642 {
643 struct pool_requests prl = TAILQ_HEAD_INITIALIZER(prl);
644 struct pool_request *pr;
645
646 pl_assert_unlocked(pp, &pp->pr_lock);
647 pl_assert_locked(pp, &pp->pr_requests_lock);
648
649 if (pp->pr_requesting++)
650 return;
651
652 do {
653 pp->pr_requesting = 1;
654
655 TAILQ_CONCAT(&prl, &pp->pr_requests, pr_entry);
656 if (TAILQ_EMPTY(&prl))
657 continue;
658
659 pl_leave(pp, &pp->pr_requests_lock);
660
661 pl_enter(pp, &pp->pr_lock);
662 pr = TAILQ_FIRST(&prl);
663 while (pr != NULL) {
664 int slowdown = 0;
665
666 if (pp->pr_nout >= pp->pr_hardlimit)
667 break;
668
669 pr->pr_item = pool_do_get(pp, flags, &slowdown);
670 if (pr->pr_item == NULL) /* || slowdown ? */
671 break;
672
673 pr = TAILQ_NEXT(pr, pr_entry);
674 }
675 pl_leave(pp, &pp->pr_lock);
676
677 while ((pr = TAILQ_FIRST(&prl)) != NULL &&
678 pr->pr_item != NULL) {
679 TAILQ_REMOVE(&prl, pr, pr_entry);
680 (*pr->pr_handler)(pp, pr->pr_cookie, pr->pr_item);
681 }
682
683 pl_enter(pp, &pp->pr_requests_lock);
684 } while (--pp->pr_requesting);
685
686 TAILQ_CONCAT(&pp->pr_requests, &prl, pr_entry);
687 }
688
689 void *
pool_do_get(struct pool * pp,int flags,int * slowdown)690 pool_do_get(struct pool *pp, int flags, int *slowdown)
691 {
692 struct pool_item *pi;
693 struct pool_page_header *ph;
694
695 pl_assert_locked(pp, &pp->pr_lock);
696
697 splassert(pp->pr_ipl);
698
699 /*
700 * Account for this item now to avoid races if we need to give up
701 * pr_lock to allocate a page.
702 */
703 pp->pr_nout++;
704
705 if (pp->pr_curpage == NULL) {
706 pl_leave(pp, &pp->pr_lock);
707 ph = pool_p_alloc(pp, flags, slowdown);
708 pl_enter(pp, &pp->pr_lock);
709
710 if (ph == NULL) {
711 pp->pr_nout--;
712 return (NULL);
713 }
714
715 pool_p_insert(pp, ph);
716 }
717
718 ph = pp->pr_curpage;
719 pi = XSIMPLEQ_FIRST(&ph->ph_items);
720 if (__predict_false(pi == NULL))
721 panic("%s: %s: page empty", __func__, pp->pr_wchan);
722
723 if (__predict_false(pi->pi_magic != POOL_IMAGIC(ph, pi))) {
724 panic("%s: %s free list modified: "
725 "page %p; item addr %p; offset 0x%x=0x%lx != 0x%lx",
726 __func__, pp->pr_wchan, ph->ph_page, pi,
727 0, pi->pi_magic, POOL_IMAGIC(ph, pi));
728 }
729
730 XSIMPLEQ_REMOVE_HEAD(&ph->ph_items, pi_list);
731
732 #ifdef DIAGNOSTIC
733 if (pool_debug && POOL_PHPOISON(ph)) {
734 size_t pidx;
735 uint32_t pval;
736 if (poison_check(pi + 1, pp->pr_size - sizeof(*pi),
737 &pidx, &pval)) {
738 int *ip = (int *)(pi + 1);
739 panic("%s: %s free list modified: "
740 "page %p; item addr %p; offset 0x%zx=0x%x",
741 __func__, pp->pr_wchan, ph->ph_page, pi,
742 (pidx * sizeof(int)) + sizeof(*pi), ip[pidx]);
743 }
744 }
745 #endif /* DIAGNOSTIC */
746
747 if (ph->ph_nmissing++ == 0) {
748 /*
749 * This page was previously empty. Move it to the list of
750 * partially-full pages. This page is already curpage.
751 */
752 TAILQ_REMOVE(&pp->pr_emptypages, ph, ph_entry);
753 TAILQ_INSERT_TAIL(&pp->pr_partpages, ph, ph_entry);
754
755 pp->pr_nidle--;
756 }
757
758 if (ph->ph_nmissing == pp->pr_itemsperpage) {
759 /*
760 * This page is now full. Move it to the full list
761 * and select a new current page.
762 */
763 TAILQ_REMOVE(&pp->pr_partpages, ph, ph_entry);
764 TAILQ_INSERT_TAIL(&pp->pr_fullpages, ph, ph_entry);
765 pool_update_curpage(pp);
766 }
767
768 pp->pr_nget++;
769
770 return (pi);
771 }
772
773 /*
774 * Return resource to the pool.
775 */
776 void
pool_put(struct pool * pp,void * v)777 pool_put(struct pool *pp, void *v)
778 {
779 struct pool_page_header *ph, *freeph = NULL;
780
781 #ifdef DIAGNOSTIC
782 if (v == NULL)
783 panic("%s: NULL item", __func__);
784 #endif
785
786 TRACEPOINT(uvm, pool_put, pp, v);
787
788 #ifdef MULTIPROCESSOR
789 if (pp->pr_cache != NULL && TAILQ_EMPTY(&pp->pr_requests)) {
790 pool_cache_put(pp, v);
791 return;
792 }
793 #endif
794
795 pl_enter(pp, &pp->pr_lock);
796
797 pool_do_put(pp, v);
798
799 pp->pr_nout--;
800 pp->pr_nput++;
801
802 /* is it time to free a page? */
803 if (pp->pr_nidle > pp->pr_maxpages &&
804 (ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL &&
805 getnsecuptime() - ph->ph_timestamp > POOL_WAIT_FREE) {
806 freeph = ph;
807 pool_p_remove(pp, freeph);
808 }
809
810 pl_leave(pp, &pp->pr_lock);
811
812 if (freeph != NULL)
813 pool_p_free(pp, freeph);
814
815 pool_wakeup(pp);
816 }
817
818 void
pool_wakeup(struct pool * pp)819 pool_wakeup(struct pool *pp)
820 {
821 if (!TAILQ_EMPTY(&pp->pr_requests)) {
822 pl_enter(pp, &pp->pr_requests_lock);
823 pool_runqueue(pp, PR_NOWAIT);
824 pl_leave(pp, &pp->pr_requests_lock);
825 }
826 }
827
828 void
pool_do_put(struct pool * pp,void * v)829 pool_do_put(struct pool *pp, void *v)
830 {
831 struct pool_item *pi = v;
832 struct pool_page_header *ph;
833
834 splassert(pp->pr_ipl);
835
836 ph = pr_find_pagehead(pp, v);
837
838 #ifdef DIAGNOSTIC
839 if (pool_debug) {
840 struct pool_item *qi;
841 XSIMPLEQ_FOREACH(qi, &ph->ph_items, pi_list) {
842 if (pi == qi) {
843 panic("%s: %s: double pool_put: %p", __func__,
844 pp->pr_wchan, pi);
845 }
846 }
847 }
848 #endif /* DIAGNOSTIC */
849
850 pi->pi_magic = POOL_IMAGIC(ph, pi);
851 XSIMPLEQ_INSERT_HEAD(&ph->ph_items, pi, pi_list);
852 #ifdef DIAGNOSTIC
853 if (POOL_PHPOISON(ph))
854 poison_mem(pi + 1, pp->pr_size - sizeof(*pi));
855 #endif /* DIAGNOSTIC */
856
857 if (ph->ph_nmissing-- == pp->pr_itemsperpage) {
858 /*
859 * The page was previously completely full, move it to the
860 * partially-full list.
861 */
862 TAILQ_REMOVE(&pp->pr_fullpages, ph, ph_entry);
863 TAILQ_INSERT_TAIL(&pp->pr_partpages, ph, ph_entry);
864 }
865
866 if (ph->ph_nmissing == 0) {
867 /*
868 * The page is now empty, so move it to the empty page list.
869 */
870 pp->pr_nidle++;
871
872 ph->ph_timestamp = getnsecuptime();
873 TAILQ_REMOVE(&pp->pr_partpages, ph, ph_entry);
874 TAILQ_INSERT_TAIL(&pp->pr_emptypages, ph, ph_entry);
875 pool_update_curpage(pp);
876 }
877 }
878
879 /*
880 * Add N items to the pool.
881 */
882 int
pool_prime(struct pool * pp,int n)883 pool_prime(struct pool *pp, int n)
884 {
885 struct pool_pagelist pl = TAILQ_HEAD_INITIALIZER(pl);
886 struct pool_page_header *ph;
887 int newpages;
888
889 newpages = roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage;
890
891 while (newpages-- > 0) {
892 int slowdown = 0;
893
894 ph = pool_p_alloc(pp, PR_NOWAIT, &slowdown);
895 if (ph == NULL) /* or slowdown? */
896 break;
897
898 TAILQ_INSERT_TAIL(&pl, ph, ph_entry);
899 }
900
901 pl_enter(pp, &pp->pr_lock);
902 while ((ph = TAILQ_FIRST(&pl)) != NULL) {
903 TAILQ_REMOVE(&pl, ph, ph_entry);
904 pool_p_insert(pp, ph);
905 }
906 pl_leave(pp, &pp->pr_lock);
907
908 return (0);
909 }
910
911 struct pool_page_header *
pool_p_alloc(struct pool * pp,int flags,int * slowdown)912 pool_p_alloc(struct pool *pp, int flags, int *slowdown)
913 {
914 struct pool_page_header *ph;
915 struct pool_item *pi;
916 caddr_t addr;
917 unsigned int order;
918 int o;
919 int n;
920
921 pl_assert_unlocked(pp, &pp->pr_lock);
922 KASSERT(pp->pr_size >= sizeof(*pi));
923
924 addr = pool_allocator_alloc(pp, flags, slowdown);
925 if (addr == NULL)
926 return (NULL);
927
928 if (POOL_INPGHDR(pp))
929 ph = (struct pool_page_header *)(addr + pp->pr_phoffset);
930 else {
931 ph = pool_get(&phpool, flags);
932 if (ph == NULL) {
933 pool_allocator_free(pp, addr);
934 return (NULL);
935 }
936 }
937
938 XSIMPLEQ_INIT(&ph->ph_items);
939 ph->ph_page = addr;
940 addr += pp->pr_align * (pp->pr_npagealloc % pp->pr_maxcolors);
941 ph->ph_colored = addr;
942 ph->ph_nmissing = 0;
943 arc4random_buf(&ph->ph_magic, sizeof(ph->ph_magic));
944 #ifdef DIAGNOSTIC
945 /* use a bit in ph_magic to record if we poison page items */
946 if (pool_debug)
947 SET(ph->ph_magic, POOL_MAGICBIT);
948 else
949 CLR(ph->ph_magic, POOL_MAGICBIT);
950 #endif /* DIAGNOSTIC */
951
952 n = pp->pr_itemsperpage;
953 o = 32;
954 while (n--) {
955 pi = (struct pool_item *)addr;
956 pi->pi_magic = POOL_IMAGIC(ph, pi);
957
958 if (o == 32) {
959 order = arc4random();
960 o = 0;
961 }
962 if (ISSET(order, 1U << o++))
963 XSIMPLEQ_INSERT_TAIL(&ph->ph_items, pi, pi_list);
964 else
965 XSIMPLEQ_INSERT_HEAD(&ph->ph_items, pi, pi_list);
966
967 #ifdef DIAGNOSTIC
968 if (POOL_PHPOISON(ph))
969 poison_mem(pi + 1, pp->pr_size - sizeof(*pi));
970 #endif /* DIAGNOSTIC */
971
972 addr += pp->pr_size;
973 }
974
975 return (ph);
976 }
977
978 void
pool_p_free(struct pool * pp,struct pool_page_header * ph)979 pool_p_free(struct pool *pp, struct pool_page_header *ph)
980 {
981 struct pool_item *pi;
982
983 pl_assert_unlocked(pp, &pp->pr_lock);
984 KASSERT(ph->ph_nmissing == 0);
985
986 XSIMPLEQ_FOREACH(pi, &ph->ph_items, pi_list) {
987 if (__predict_false(pi->pi_magic != POOL_IMAGIC(ph, pi))) {
988 panic("%s: %s free list modified: "
989 "page %p; item addr %p; offset 0x%x=0x%lx",
990 __func__, pp->pr_wchan, ph->ph_page, pi,
991 0, pi->pi_magic);
992 }
993
994 #ifdef DIAGNOSTIC
995 if (POOL_PHPOISON(ph)) {
996 size_t pidx;
997 uint32_t pval;
998 if (poison_check(pi + 1, pp->pr_size - sizeof(*pi),
999 &pidx, &pval)) {
1000 int *ip = (int *)(pi + 1);
1001 panic("%s: %s free list modified: "
1002 "page %p; item addr %p; offset 0x%zx=0x%x",
1003 __func__, pp->pr_wchan, ph->ph_page, pi,
1004 pidx * sizeof(int), ip[pidx]);
1005 }
1006 }
1007 #endif
1008 }
1009
1010 pool_allocator_free(pp, ph->ph_page);
1011
1012 if (!POOL_INPGHDR(pp))
1013 pool_put(&phpool, ph);
1014 }
1015
1016 void
pool_p_insert(struct pool * pp,struct pool_page_header * ph)1017 pool_p_insert(struct pool *pp, struct pool_page_header *ph)
1018 {
1019 pl_assert_locked(pp, &pp->pr_lock);
1020
1021 /* If the pool was depleted, point at the new page */
1022 if (pp->pr_curpage == NULL)
1023 pp->pr_curpage = ph;
1024
1025 TAILQ_INSERT_TAIL(&pp->pr_emptypages, ph, ph_entry);
1026 if (!POOL_INPGHDR(pp))
1027 RBT_INSERT(phtree, &pp->pr_phtree, ph);
1028
1029 pp->pr_nitems += pp->pr_itemsperpage;
1030 pp->pr_nidle++;
1031
1032 pp->pr_npagealloc++;
1033 if (++pp->pr_npages > pp->pr_hiwat)
1034 pp->pr_hiwat = pp->pr_npages;
1035 }
1036
1037 void
pool_p_remove(struct pool * pp,struct pool_page_header * ph)1038 pool_p_remove(struct pool *pp, struct pool_page_header *ph)
1039 {
1040 pl_assert_locked(pp, &pp->pr_lock);
1041
1042 pp->pr_npagefree++;
1043 pp->pr_npages--;
1044 pp->pr_nidle--;
1045 pp->pr_nitems -= pp->pr_itemsperpage;
1046
1047 if (!POOL_INPGHDR(pp))
1048 RBT_REMOVE(phtree, &pp->pr_phtree, ph);
1049 TAILQ_REMOVE(&pp->pr_emptypages, ph, ph_entry);
1050
1051 pool_update_curpage(pp);
1052 }
1053
1054 void
pool_update_curpage(struct pool * pp)1055 pool_update_curpage(struct pool *pp)
1056 {
1057 pp->pr_curpage = TAILQ_LAST(&pp->pr_partpages, pool_pagelist);
1058 if (pp->pr_curpage == NULL) {
1059 pp->pr_curpage = TAILQ_LAST(&pp->pr_emptypages, pool_pagelist);
1060 }
1061 }
1062
1063 void
pool_setlowat(struct pool * pp,int n)1064 pool_setlowat(struct pool *pp, int n)
1065 {
1066 int prime = 0;
1067
1068 pl_enter(pp, &pp->pr_lock);
1069 pp->pr_minitems = n;
1070 pp->pr_minpages = (n == 0)
1071 ? 0
1072 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage;
1073
1074 if (pp->pr_nitems < n)
1075 prime = n - pp->pr_nitems;
1076 pl_leave(pp, &pp->pr_lock);
1077
1078 if (prime > 0)
1079 pool_prime(pp, prime);
1080 }
1081
1082 void
pool_sethiwat(struct pool * pp,int n)1083 pool_sethiwat(struct pool *pp, int n)
1084 {
1085 pp->pr_maxpages = (n == 0)
1086 ? 0
1087 : roundup(n, pp->pr_itemsperpage) / pp->pr_itemsperpage;
1088 }
1089
1090 int
pool_sethardlimit(struct pool * pp,u_int n,const char * warnmsg,int ratecap)1091 pool_sethardlimit(struct pool *pp, u_int n, const char *warnmsg, int ratecap)
1092 {
1093 int error = 0;
1094
1095 if (n < pp->pr_nout) {
1096 error = EINVAL;
1097 goto done;
1098 }
1099
1100 pp->pr_hardlimit = n;
1101 pp->pr_hardlimit_warning = warnmsg;
1102 pp->pr_hardlimit_ratecap.tv_sec = ratecap;
1103 pp->pr_hardlimit_warning_last.tv_sec = 0;
1104 pp->pr_hardlimit_warning_last.tv_usec = 0;
1105
1106 done:
1107 return (error);
1108 }
1109
1110 void
pool_set_constraints(struct pool * pp,const struct kmem_pa_mode * mode)1111 pool_set_constraints(struct pool *pp, const struct kmem_pa_mode *mode)
1112 {
1113 pp->pr_crange = mode;
1114 }
1115
1116 /*
1117 * Release all complete pages that have not been used recently.
1118 *
1119 * Returns non-zero if any pages have been reclaimed.
1120 */
1121 int
pool_reclaim(struct pool * pp)1122 pool_reclaim(struct pool *pp)
1123 {
1124 struct pool_page_header *ph, *phnext;
1125 struct pool_pagelist pl = TAILQ_HEAD_INITIALIZER(pl);
1126
1127 pl_enter(pp, &pp->pr_lock);
1128 for (ph = TAILQ_FIRST(&pp->pr_emptypages); ph != NULL; ph = phnext) {
1129 phnext = TAILQ_NEXT(ph, ph_entry);
1130
1131 /* Check our minimum page claim */
1132 if (pp->pr_npages <= pp->pr_minpages)
1133 break;
1134
1135 /*
1136 * If freeing this page would put us below
1137 * the low water mark, stop now.
1138 */
1139 if ((pp->pr_nitems - pp->pr_itemsperpage) <
1140 pp->pr_minitems)
1141 break;
1142
1143 pool_p_remove(pp, ph);
1144 TAILQ_INSERT_TAIL(&pl, ph, ph_entry);
1145 }
1146 pl_leave(pp, &pp->pr_lock);
1147
1148 if (TAILQ_EMPTY(&pl))
1149 return (0);
1150
1151 while ((ph = TAILQ_FIRST(&pl)) != NULL) {
1152 TAILQ_REMOVE(&pl, ph, ph_entry);
1153 pool_p_free(pp, ph);
1154 }
1155
1156 return (1);
1157 }
1158
1159 /*
1160 * Release all complete pages that have not been used recently
1161 * from all pools.
1162 */
1163 void
pool_reclaim_all(void)1164 pool_reclaim_all(void)
1165 {
1166 struct pool *pp;
1167
1168 rw_enter_read(&pool_lock);
1169 SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist)
1170 pool_reclaim(pp);
1171 rw_exit_read(&pool_lock);
1172 }
1173
1174 #ifdef DDB
1175 #include <machine/db_machdep.h>
1176 #include <ddb/db_output.h>
1177
1178 /*
1179 * Diagnostic helpers.
1180 */
1181 void
pool_printit(struct pool * pp,const char * modif,int (* pr)(const char *,...))1182 pool_printit(struct pool *pp, const char *modif,
1183 int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2))))
1184 {
1185 pool_print1(pp, modif, pr);
1186 }
1187
1188 void
pool_print_pagelist(struct pool_pagelist * pl,int (* pr)(const char *,...))1189 pool_print_pagelist(struct pool_pagelist *pl,
1190 int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2))))
1191 {
1192 struct pool_page_header *ph;
1193 struct pool_item *pi;
1194
1195 TAILQ_FOREACH(ph, pl, ph_entry) {
1196 (*pr)("\t\tpage %p, color %p, nmissing %d\n",
1197 ph->ph_page, ph->ph_colored, ph->ph_nmissing);
1198 XSIMPLEQ_FOREACH(pi, &ph->ph_items, pi_list) {
1199 if (pi->pi_magic != POOL_IMAGIC(ph, pi)) {
1200 (*pr)("\t\t\titem %p, magic 0x%lx\n",
1201 pi, pi->pi_magic);
1202 }
1203 }
1204 }
1205 }
1206
1207 void
pool_print1(struct pool * pp,const char * modif,int (* pr)(const char *,...))1208 pool_print1(struct pool *pp, const char *modif,
1209 int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2))))
1210 {
1211 struct pool_page_header *ph;
1212 int print_pagelist = 0;
1213 char c;
1214
1215 while ((c = *modif++) != '\0') {
1216 if (c == 'p')
1217 print_pagelist = 1;
1218 modif++;
1219 }
1220
1221 (*pr)("POOL %s: size %u maxcolors %u\n", pp->pr_wchan, pp->pr_size,
1222 pp->pr_maxcolors);
1223 (*pr)("\talloc %p\n", pp->pr_alloc);
1224 (*pr)("\tminitems %u, minpages %u, maxpages %u, npages %u\n",
1225 pp->pr_minitems, pp->pr_minpages, pp->pr_maxpages, pp->pr_npages);
1226 (*pr)("\titemsperpage %u, nitems %u, nout %u, hardlimit %u\n",
1227 pp->pr_itemsperpage, pp->pr_nitems, pp->pr_nout, pp->pr_hardlimit);
1228
1229 (*pr)("\n\tnget %lu, nfail %lu, nput %lu\n",
1230 pp->pr_nget, pp->pr_nfail, pp->pr_nput);
1231 (*pr)("\tnpagealloc %lu, npagefree %lu, hiwat %u, nidle %lu\n",
1232 pp->pr_npagealloc, pp->pr_npagefree, pp->pr_hiwat, pp->pr_nidle);
1233
1234 if (print_pagelist == 0)
1235 return;
1236
1237 if ((ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL)
1238 (*pr)("\n\tempty page list:\n");
1239 pool_print_pagelist(&pp->pr_emptypages, pr);
1240 if ((ph = TAILQ_FIRST(&pp->pr_fullpages)) != NULL)
1241 (*pr)("\n\tfull page list:\n");
1242 pool_print_pagelist(&pp->pr_fullpages, pr);
1243 if ((ph = TAILQ_FIRST(&pp->pr_partpages)) != NULL)
1244 (*pr)("\n\tpartial-page list:\n");
1245 pool_print_pagelist(&pp->pr_partpages, pr);
1246
1247 if (pp->pr_curpage == NULL)
1248 (*pr)("\tno current page\n");
1249 else
1250 (*pr)("\tcurpage %p\n", pp->pr_curpage->ph_page);
1251 }
1252
1253 void
db_show_all_pools(db_expr_t expr,int haddr,db_expr_t count,char * modif)1254 db_show_all_pools(db_expr_t expr, int haddr, db_expr_t count, char *modif)
1255 {
1256 struct pool *pp;
1257 char maxp[16];
1258 int ovflw;
1259 char mode;
1260
1261 mode = modif[0];
1262 if (mode != '\0' && mode != 'a') {
1263 db_printf("usage: show all pools [/a]\n");
1264 return;
1265 }
1266
1267 if (mode == '\0')
1268 db_printf("%-10s%4s%9s%5s%9s%6s%6s%6s%6s%6s%6s%5s\n",
1269 "Name",
1270 "Size",
1271 "Requests",
1272 "Fail",
1273 "Releases",
1274 "Pgreq",
1275 "Pgrel",
1276 "Npage",
1277 "Hiwat",
1278 "Minpg",
1279 "Maxpg",
1280 "Idle");
1281 else
1282 db_printf("%-12s %18s %18s\n",
1283 "Name", "Address", "Allocator");
1284
1285 SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) {
1286 if (mode == 'a') {
1287 db_printf("%-12s %18p %18p\n", pp->pr_wchan, pp,
1288 pp->pr_alloc);
1289 continue;
1290 }
1291
1292 if (!pp->pr_nget)
1293 continue;
1294
1295 if (pp->pr_maxpages == UINT_MAX)
1296 snprintf(maxp, sizeof maxp, "inf");
1297 else
1298 snprintf(maxp, sizeof maxp, "%u", pp->pr_maxpages);
1299
1300 #define PRWORD(ovflw, fmt, width, fixed, val) do { \
1301 (ovflw) += db_printf((fmt), \
1302 (width) - (fixed) - (ovflw) > 0 ? \
1303 (width) - (fixed) - (ovflw) : 0, \
1304 (val)) - (width); \
1305 if ((ovflw) < 0) \
1306 (ovflw) = 0; \
1307 } while (/* CONSTCOND */0)
1308
1309 ovflw = 0;
1310 PRWORD(ovflw, "%-*s", 10, 0, pp->pr_wchan);
1311 PRWORD(ovflw, " %*u", 4, 1, pp->pr_size);
1312 PRWORD(ovflw, " %*lu", 9, 1, pp->pr_nget);
1313 PRWORD(ovflw, " %*lu", 5, 1, pp->pr_nfail);
1314 PRWORD(ovflw, " %*lu", 9, 1, pp->pr_nput);
1315 PRWORD(ovflw, " %*lu", 6, 1, pp->pr_npagealloc);
1316 PRWORD(ovflw, " %*lu", 6, 1, pp->pr_npagefree);
1317 PRWORD(ovflw, " %*d", 6, 1, pp->pr_npages);
1318 PRWORD(ovflw, " %*d", 6, 1, pp->pr_hiwat);
1319 PRWORD(ovflw, " %*d", 6, 1, pp->pr_minpages);
1320 PRWORD(ovflw, " %*s", 6, 1, maxp);
1321 PRWORD(ovflw, " %*lu\n", 5, 1, pp->pr_nidle);
1322
1323 pool_chk(pp);
1324 }
1325 }
1326 #endif /* DDB */
1327
1328 #if defined(POOL_DEBUG) || defined(DDB)
1329 int
pool_chk_page(struct pool * pp,struct pool_page_header * ph,int expected)1330 pool_chk_page(struct pool *pp, struct pool_page_header *ph, int expected)
1331 {
1332 struct pool_item *pi;
1333 caddr_t page;
1334 int n;
1335 const char *label = pp->pr_wchan;
1336
1337 page = (caddr_t)((u_long)ph & pp->pr_pgmask);
1338 if (page != ph->ph_page && POOL_INPGHDR(pp)) {
1339 printf("%s: ", label);
1340 printf("pool(%p:%s): page inconsistency: page %p; "
1341 "at page head addr %p (p %p)\n",
1342 pp, pp->pr_wchan, ph->ph_page, ph, page);
1343 return 1;
1344 }
1345
1346 for (pi = XSIMPLEQ_FIRST(&ph->ph_items), n = 0;
1347 pi != NULL;
1348 pi = XSIMPLEQ_NEXT(&ph->ph_items, pi, pi_list), n++) {
1349 if ((caddr_t)pi < ph->ph_page ||
1350 (caddr_t)pi >= ph->ph_page + pp->pr_pgsize) {
1351 printf("%s: ", label);
1352 printf("pool(%p:%s): page inconsistency: page %p;"
1353 " item ordinal %d; addr %p\n", pp,
1354 pp->pr_wchan, ph->ph_page, n, pi);
1355 return (1);
1356 }
1357
1358 if (pi->pi_magic != POOL_IMAGIC(ph, pi)) {
1359 printf("%s: ", label);
1360 printf("pool(%p:%s): free list modified: "
1361 "page %p; item ordinal %d; addr %p "
1362 "(p %p); offset 0x%x=0x%lx\n",
1363 pp, pp->pr_wchan, ph->ph_page, n, pi, page,
1364 0, pi->pi_magic);
1365 }
1366
1367 #ifdef DIAGNOSTIC
1368 if (POOL_PHPOISON(ph)) {
1369 size_t pidx;
1370 uint32_t pval;
1371 if (poison_check(pi + 1, pp->pr_size - sizeof(*pi),
1372 &pidx, &pval)) {
1373 int *ip = (int *)(pi + 1);
1374 printf("pool(%s): free list modified: "
1375 "page %p; item ordinal %d; addr %p "
1376 "(p %p); offset 0x%zx=0x%x\n",
1377 pp->pr_wchan, ph->ph_page, n, pi,
1378 page, pidx * sizeof(int), ip[pidx]);
1379 }
1380 }
1381 #endif /* DIAGNOSTIC */
1382 }
1383 if (n + ph->ph_nmissing != pp->pr_itemsperpage) {
1384 printf("pool(%p:%s): page inconsistency: page %p;"
1385 " %d on list, %d missing, %d items per page\n", pp,
1386 pp->pr_wchan, ph->ph_page, n, ph->ph_nmissing,
1387 pp->pr_itemsperpage);
1388 return 1;
1389 }
1390 if (expected >= 0 && n != expected) {
1391 printf("pool(%p:%s): page inconsistency: page %p;"
1392 " %d on list, %d missing, %d expected\n", pp,
1393 pp->pr_wchan, ph->ph_page, n, ph->ph_nmissing,
1394 expected);
1395 return 1;
1396 }
1397 return 0;
1398 }
1399
1400 int
pool_chk(struct pool * pp)1401 pool_chk(struct pool *pp)
1402 {
1403 struct pool_page_header *ph;
1404 int r = 0;
1405
1406 TAILQ_FOREACH(ph, &pp->pr_emptypages, ph_entry)
1407 r += pool_chk_page(pp, ph, pp->pr_itemsperpage);
1408 TAILQ_FOREACH(ph, &pp->pr_fullpages, ph_entry)
1409 r += pool_chk_page(pp, ph, 0);
1410 TAILQ_FOREACH(ph, &pp->pr_partpages, ph_entry)
1411 r += pool_chk_page(pp, ph, -1);
1412
1413 return (r);
1414 }
1415 #endif /* defined(POOL_DEBUG) || defined(DDB) */
1416
1417 #ifdef DDB
1418 void
pool_walk(struct pool * pp,int full,int (* pr)(const char *,...),void (* func)(void *,int,int (*)(const char *,...)))1419 pool_walk(struct pool *pp, int full,
1420 int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2))),
1421 void (*func)(void *, int, int (*)(const char *, ...)
1422 __attribute__((__format__(__kprintf__,1,2)))))
1423 {
1424 struct pool_page_header *ph;
1425 struct pool_item *pi;
1426 caddr_t cp;
1427 int n;
1428
1429 TAILQ_FOREACH(ph, &pp->pr_fullpages, ph_entry) {
1430 cp = ph->ph_colored;
1431 n = ph->ph_nmissing;
1432
1433 while (n--) {
1434 func(cp, full, pr);
1435 cp += pp->pr_size;
1436 }
1437 }
1438
1439 TAILQ_FOREACH(ph, &pp->pr_partpages, ph_entry) {
1440 cp = ph->ph_colored;
1441 n = ph->ph_nmissing;
1442
1443 do {
1444 XSIMPLEQ_FOREACH(pi, &ph->ph_items, pi_list) {
1445 if (cp == (caddr_t)pi)
1446 break;
1447 }
1448 if (cp != (caddr_t)pi) {
1449 func(cp, full, pr);
1450 n--;
1451 }
1452
1453 cp += pp->pr_size;
1454 } while (n > 0);
1455 }
1456 }
1457 #endif
1458
1459 /*
1460 * We have three different sysctls.
1461 * kern.pool.npools - the number of pools.
1462 * kern.pool.pool.<pool#> - the pool struct for the pool#.
1463 * kern.pool.name.<pool#> - the name for pool#.
1464 */
1465 int
sysctl_dopool(int * name,u_int namelen,char * oldp,size_t * oldlenp)1466 sysctl_dopool(int *name, u_int namelen, char *oldp, size_t *oldlenp)
1467 {
1468 struct kinfo_pool pi;
1469 struct pool *pp;
1470 int rv = ENOENT;
1471
1472 switch (name[0]) {
1473 case KERN_POOL_NPOOLS:
1474 if (namelen != 1)
1475 return (ENOTDIR);
1476 return (sysctl_rdint(oldp, oldlenp, NULL, pool_count));
1477
1478 case KERN_POOL_NAME:
1479 case KERN_POOL_POOL:
1480 case KERN_POOL_CACHE:
1481 case KERN_POOL_CACHE_CPUS:
1482 break;
1483 default:
1484 return (EOPNOTSUPP);
1485 }
1486
1487 if (namelen != 2)
1488 return (ENOTDIR);
1489
1490 rw_enter_read(&pool_lock);
1491
1492 SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) {
1493 if (name[1] == pp->pr_serial)
1494 break;
1495 }
1496
1497 if (pp == NULL)
1498 goto done;
1499
1500 switch (name[0]) {
1501 case KERN_POOL_NAME:
1502 rv = sysctl_rdstring(oldp, oldlenp, NULL, pp->pr_wchan);
1503 break;
1504 case KERN_POOL_POOL:
1505 memset(&pi, 0, sizeof(pi));
1506
1507 pl_enter(pp, &pp->pr_lock);
1508 pi.pr_size = pp->pr_size;
1509 pi.pr_pgsize = pp->pr_pgsize;
1510 pi.pr_itemsperpage = pp->pr_itemsperpage;
1511 pi.pr_npages = pp->pr_npages;
1512 pi.pr_minpages = pp->pr_minpages;
1513 pi.pr_maxpages = pp->pr_maxpages;
1514 pi.pr_hardlimit = pp->pr_hardlimit;
1515 pi.pr_nout = pp->pr_nout;
1516 pi.pr_nitems = pp->pr_nitems;
1517 pi.pr_nget = pp->pr_nget;
1518 pi.pr_nput = pp->pr_nput;
1519 pi.pr_nfail = pp->pr_nfail;
1520 pi.pr_npagealloc = pp->pr_npagealloc;
1521 pi.pr_npagefree = pp->pr_npagefree;
1522 pi.pr_hiwat = pp->pr_hiwat;
1523 pi.pr_nidle = pp->pr_nidle;
1524 pl_leave(pp, &pp->pr_lock);
1525
1526 pool_cache_pool_info(pp, &pi);
1527
1528 rv = sysctl_rdstruct(oldp, oldlenp, NULL, &pi, sizeof(pi));
1529 break;
1530
1531 case KERN_POOL_CACHE:
1532 rv = pool_cache_info(pp, oldp, oldlenp);
1533 break;
1534
1535 case KERN_POOL_CACHE_CPUS:
1536 rv = pool_cache_cpus_info(pp, oldp, oldlenp);
1537 break;
1538 }
1539
1540 done:
1541 rw_exit_read(&pool_lock);
1542
1543 return (rv);
1544 }
1545
1546 void
pool_gc_sched(void * null)1547 pool_gc_sched(void *null)
1548 {
1549 task_add(systqmp, &pool_gc_task);
1550 }
1551
1552 void
pool_gc_pages(void * null)1553 pool_gc_pages(void *null)
1554 {
1555 struct pool *pp;
1556 struct pool_page_header *ph, *freeph;
1557 int s;
1558
1559 rw_enter_read(&pool_lock);
1560 s = splvm(); /* XXX go to splvm until all pools _setipl properly */
1561 SIMPLEQ_FOREACH(pp, &pool_head, pr_poollist) {
1562 #ifdef MULTIPROCESSOR
1563 if (pp->pr_cache != NULL)
1564 pool_cache_gc(pp);
1565 #endif
1566
1567 if (pp->pr_nidle <= pp->pr_minpages || /* guess */
1568 !pl_enter_try(pp, &pp->pr_lock)) /* try */
1569 continue;
1570
1571 /* is it time to free a page? */
1572 if (pp->pr_nidle > pp->pr_minpages &&
1573 (ph = TAILQ_FIRST(&pp->pr_emptypages)) != NULL &&
1574 getnsecuptime() - ph->ph_timestamp > POOL_WAIT_GC) {
1575 freeph = ph;
1576 pool_p_remove(pp, freeph);
1577 } else
1578 freeph = NULL;
1579
1580 pl_leave(pp, &pp->pr_lock);
1581
1582 if (freeph != NULL)
1583 pool_p_free(pp, freeph);
1584 }
1585 splx(s);
1586 rw_exit_read(&pool_lock);
1587
1588 timeout_add_sec(&pool_gc_tick, 1);
1589 }
1590
1591 /*
1592 * Pool backend allocators.
1593 */
1594
1595 void *
pool_allocator_alloc(struct pool * pp,int flags,int * slowdown)1596 pool_allocator_alloc(struct pool *pp, int flags, int *slowdown)
1597 {
1598 void *v;
1599
1600 v = (*pp->pr_alloc->pa_alloc)(pp, flags, slowdown);
1601
1602 #ifdef DIAGNOSTIC
1603 if (v != NULL && POOL_INPGHDR(pp)) {
1604 vaddr_t addr = (vaddr_t)v;
1605 if ((addr & pp->pr_pgmask) != addr) {
1606 panic("%s: %s page address %p isn't aligned to %u",
1607 __func__, pp->pr_wchan, v, pp->pr_pgsize);
1608 }
1609 }
1610 #endif
1611
1612 return (v);
1613 }
1614
1615 void
pool_allocator_free(struct pool * pp,void * v)1616 pool_allocator_free(struct pool *pp, void *v)
1617 {
1618 struct pool_allocator *pa = pp->pr_alloc;
1619
1620 (*pa->pa_free)(pp, v);
1621 }
1622
1623 void *
pool_page_alloc(struct pool * pp,int flags,int * slowdown)1624 pool_page_alloc(struct pool *pp, int flags, int *slowdown)
1625 {
1626 struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER;
1627
1628 kd.kd_waitok = ISSET(flags, PR_WAITOK);
1629 kd.kd_slowdown = slowdown;
1630
1631 return (km_alloc(pp->pr_pgsize, &kv_page, pp->pr_crange, &kd));
1632 }
1633
1634 void
pool_page_free(struct pool * pp,void * v)1635 pool_page_free(struct pool *pp, void *v)
1636 {
1637 km_free(v, pp->pr_pgsize, &kv_page, pp->pr_crange);
1638 }
1639
1640 void *
pool_multi_alloc(struct pool * pp,int flags,int * slowdown)1641 pool_multi_alloc(struct pool *pp, int flags, int *slowdown)
1642 {
1643 struct kmem_va_mode kv = kv_intrsafe;
1644 struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER;
1645 void *v;
1646 int s;
1647
1648 if (POOL_INPGHDR(pp))
1649 kv.kv_align = pp->pr_pgsize;
1650
1651 kd.kd_waitok = ISSET(flags, PR_WAITOK);
1652 kd.kd_slowdown = slowdown;
1653
1654 s = splvm();
1655 v = km_alloc(pp->pr_pgsize, &kv, pp->pr_crange, &kd);
1656 splx(s);
1657
1658 return (v);
1659 }
1660
1661 void
pool_multi_free(struct pool * pp,void * v)1662 pool_multi_free(struct pool *pp, void *v)
1663 {
1664 struct kmem_va_mode kv = kv_intrsafe;
1665 int s;
1666
1667 if (POOL_INPGHDR(pp))
1668 kv.kv_align = pp->pr_pgsize;
1669
1670 s = splvm();
1671 km_free(v, pp->pr_pgsize, &kv, pp->pr_crange);
1672 splx(s);
1673 }
1674
1675 void *
pool_multi_alloc_ni(struct pool * pp,int flags,int * slowdown)1676 pool_multi_alloc_ni(struct pool *pp, int flags, int *slowdown)
1677 {
1678 struct kmem_va_mode kv = kv_any;
1679 struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER;
1680 void *v;
1681
1682 if (POOL_INPGHDR(pp))
1683 kv.kv_align = pp->pr_pgsize;
1684
1685 kd.kd_waitok = ISSET(flags, PR_WAITOK);
1686 kd.kd_slowdown = slowdown;
1687
1688 KERNEL_LOCK();
1689 v = km_alloc(pp->pr_pgsize, &kv, pp->pr_crange, &kd);
1690 KERNEL_UNLOCK();
1691
1692 return (v);
1693 }
1694
1695 void
pool_multi_free_ni(struct pool * pp,void * v)1696 pool_multi_free_ni(struct pool *pp, void *v)
1697 {
1698 struct kmem_va_mode kv = kv_any;
1699
1700 if (POOL_INPGHDR(pp))
1701 kv.kv_align = pp->pr_pgsize;
1702
1703 KERNEL_LOCK();
1704 km_free(v, pp->pr_pgsize, &kv, pp->pr_crange);
1705 KERNEL_UNLOCK();
1706 }
1707
1708 #ifdef MULTIPROCESSOR
1709
1710 struct pool pool_caches; /* per cpu cache entries */
1711
1712 void
pool_cache_init(struct pool * pp)1713 pool_cache_init(struct pool *pp)
1714 {
1715 struct cpumem *cm;
1716 struct pool_cache *pc;
1717 struct cpumem_iter i;
1718
1719 if (pool_caches.pr_size == 0) {
1720 pool_init(&pool_caches, sizeof(struct pool_cache),
1721 CACHELINESIZE, IPL_NONE, PR_WAITOK | PR_RWLOCK,
1722 "plcache", NULL);
1723 }
1724
1725 /* must be able to use the pool items as cache list items */
1726 KASSERT(pp->pr_size >= sizeof(struct pool_cache_item));
1727
1728 cm = cpumem_get(&pool_caches);
1729
1730 pl_init(pp, &pp->pr_cache_lock);
1731 arc4random_buf(pp->pr_cache_magic, sizeof(pp->pr_cache_magic));
1732 TAILQ_INIT(&pp->pr_cache_lists);
1733 pp->pr_cache_nitems = 0;
1734 pp->pr_cache_timestamp = getnsecuptime();
1735 pp->pr_cache_items = 8;
1736 pp->pr_cache_contention = 0;
1737 pp->pr_cache_ngc = 0;
1738
1739 CPUMEM_FOREACH(pc, &i, cm) {
1740 pc->pc_actv = NULL;
1741 pc->pc_nactv = 0;
1742 pc->pc_prev = NULL;
1743
1744 pc->pc_nget = 0;
1745 pc->pc_nfail = 0;
1746 pc->pc_nput = 0;
1747 pc->pc_nlget = 0;
1748 pc->pc_nlfail = 0;
1749 pc->pc_nlput = 0;
1750 pc->pc_nout = 0;
1751 }
1752
1753 membar_producer();
1754
1755 pp->pr_cache = cm;
1756 }
1757
1758 static inline void
pool_cache_item_magic(struct pool * pp,struct pool_cache_item * ci)1759 pool_cache_item_magic(struct pool *pp, struct pool_cache_item *ci)
1760 {
1761 unsigned long *entry = (unsigned long *)&ci->ci_nextl;
1762
1763 entry[0] = pp->pr_cache_magic[0] ^ (u_long)ci;
1764 entry[1] = pp->pr_cache_magic[1] ^ (u_long)ci->ci_next;
1765 }
1766
1767 static inline void
pool_cache_item_magic_check(struct pool * pp,struct pool_cache_item * ci)1768 pool_cache_item_magic_check(struct pool *pp, struct pool_cache_item *ci)
1769 {
1770 unsigned long *entry;
1771 unsigned long val;
1772
1773 entry = (unsigned long *)&ci->ci_nextl;
1774 val = pp->pr_cache_magic[0] ^ (u_long)ci;
1775 if (*entry != val)
1776 goto fail;
1777
1778 entry++;
1779 val = pp->pr_cache_magic[1] ^ (u_long)ci->ci_next;
1780 if (*entry != val)
1781 goto fail;
1782
1783 return;
1784
1785 fail:
1786 panic("%s: %s cpu free list modified: item addr %p+%zu 0x%lx!=0x%lx",
1787 __func__, pp->pr_wchan, ci, (caddr_t)entry - (caddr_t)ci,
1788 *entry, val);
1789 }
1790
1791 static inline void
pool_list_enter(struct pool * pp)1792 pool_list_enter(struct pool *pp)
1793 {
1794 if (pl_enter_try(pp, &pp->pr_cache_lock) == 0) {
1795 pl_enter(pp, &pp->pr_cache_lock);
1796 pp->pr_cache_contention++;
1797 }
1798 }
1799
1800 static inline void
pool_list_leave(struct pool * pp)1801 pool_list_leave(struct pool *pp)
1802 {
1803 pl_leave(pp, &pp->pr_cache_lock);
1804 }
1805
1806 static inline struct pool_cache_item *
pool_cache_list_alloc(struct pool * pp,struct pool_cache * pc)1807 pool_cache_list_alloc(struct pool *pp, struct pool_cache *pc)
1808 {
1809 struct pool_cache_item *pl;
1810
1811 pool_list_enter(pp);
1812 pl = TAILQ_FIRST(&pp->pr_cache_lists);
1813 if (pl != NULL) {
1814 TAILQ_REMOVE(&pp->pr_cache_lists, pl, ci_nextl);
1815 pp->pr_cache_nitems -= POOL_CACHE_ITEM_NITEMS(pl);
1816
1817 pool_cache_item_magic(pp, pl);
1818
1819 pc->pc_nlget++;
1820 } else
1821 pc->pc_nlfail++;
1822
1823 /* fold this cpus nout into the global while we have the lock */
1824 pp->pr_cache_nout += pc->pc_nout;
1825 pc->pc_nout = 0;
1826 pool_list_leave(pp);
1827
1828 return (pl);
1829 }
1830
1831 static inline void
pool_cache_list_free(struct pool * pp,struct pool_cache * pc,struct pool_cache_item * ci)1832 pool_cache_list_free(struct pool *pp, struct pool_cache *pc,
1833 struct pool_cache_item *ci)
1834 {
1835 pool_list_enter(pp);
1836 if (TAILQ_EMPTY(&pp->pr_cache_lists))
1837 pp->pr_cache_timestamp = getnsecuptime();
1838
1839 pp->pr_cache_nitems += POOL_CACHE_ITEM_NITEMS(ci);
1840 TAILQ_INSERT_TAIL(&pp->pr_cache_lists, ci, ci_nextl);
1841
1842 pc->pc_nlput++;
1843
1844 /* fold this cpus nout into the global while we have the lock */
1845 pp->pr_cache_nout += pc->pc_nout;
1846 pc->pc_nout = 0;
1847 pool_list_leave(pp);
1848 }
1849
1850 static inline struct pool_cache *
pool_cache_enter(struct pool * pp,int * s)1851 pool_cache_enter(struct pool *pp, int *s)
1852 {
1853 struct pool_cache *pc;
1854
1855 pc = cpumem_enter(pp->pr_cache);
1856 *s = splraise(pp->pr_ipl);
1857 pc->pc_gen++;
1858
1859 return (pc);
1860 }
1861
1862 static inline void
pool_cache_leave(struct pool * pp,struct pool_cache * pc,int s)1863 pool_cache_leave(struct pool *pp, struct pool_cache *pc, int s)
1864 {
1865 pc->pc_gen++;
1866 splx(s);
1867 cpumem_leave(pp->pr_cache, pc);
1868 }
1869
1870 void *
pool_cache_get(struct pool * pp)1871 pool_cache_get(struct pool *pp)
1872 {
1873 struct pool_cache *pc;
1874 struct pool_cache_item *ci;
1875 int s;
1876
1877 pc = pool_cache_enter(pp, &s);
1878
1879 if (pc->pc_actv != NULL) {
1880 ci = pc->pc_actv;
1881 } else if (pc->pc_prev != NULL) {
1882 ci = pc->pc_prev;
1883 pc->pc_prev = NULL;
1884 } else if ((ci = pool_cache_list_alloc(pp, pc)) == NULL) {
1885 pc->pc_nfail++;
1886 goto done;
1887 }
1888
1889 pool_cache_item_magic_check(pp, ci);
1890 #ifdef DIAGNOSTIC
1891 if (pool_debug && POOL_CACHE_ITEM_POISONED(ci)) {
1892 size_t pidx;
1893 uint32_t pval;
1894
1895 if (poison_check(ci + 1, pp->pr_size - sizeof(*ci),
1896 &pidx, &pval)) {
1897 int *ip = (int *)(ci + 1);
1898 ip += pidx;
1899
1900 panic("%s: %s cpu free list modified: "
1901 "item addr %p+%zu 0x%x!=0x%x",
1902 __func__, pp->pr_wchan, ci,
1903 (caddr_t)ip - (caddr_t)ci, *ip, pval);
1904 }
1905 }
1906 #endif
1907
1908 pc->pc_actv = ci->ci_next;
1909 pc->pc_nactv = POOL_CACHE_ITEM_NITEMS(ci) - 1;
1910 pc->pc_nget++;
1911 pc->pc_nout++;
1912
1913 done:
1914 pool_cache_leave(pp, pc, s);
1915
1916 return (ci);
1917 }
1918
1919 void
pool_cache_put(struct pool * pp,void * v)1920 pool_cache_put(struct pool *pp, void *v)
1921 {
1922 struct pool_cache *pc;
1923 struct pool_cache_item *ci = v;
1924 unsigned long nitems;
1925 int s;
1926 #ifdef DIAGNOSTIC
1927 int poison = pool_debug && pp->pr_size > sizeof(*ci);
1928
1929 if (poison)
1930 poison_mem(ci + 1, pp->pr_size - sizeof(*ci));
1931 #endif
1932
1933 pc = pool_cache_enter(pp, &s);
1934
1935 nitems = pc->pc_nactv;
1936 if (nitems >= pp->pr_cache_items) {
1937 if (pc->pc_prev != NULL)
1938 pool_cache_list_free(pp, pc, pc->pc_prev);
1939
1940 pc->pc_prev = pc->pc_actv;
1941
1942 pc->pc_actv = NULL;
1943 pc->pc_nactv = 0;
1944 nitems = 0;
1945 }
1946
1947 ci->ci_next = pc->pc_actv;
1948 ci->ci_nitems = ++nitems;
1949 #ifdef DIAGNOSTIC
1950 ci->ci_nitems |= poison ? POOL_CACHE_ITEM_NITEMS_POISON : 0;
1951 #endif
1952 pool_cache_item_magic(pp, ci);
1953
1954 pc->pc_actv = ci;
1955 pc->pc_nactv = nitems;
1956
1957 pc->pc_nput++;
1958 pc->pc_nout--;
1959
1960 pool_cache_leave(pp, pc, s);
1961 }
1962
1963 struct pool_cache_item *
pool_cache_list_put(struct pool * pp,struct pool_cache_item * pl)1964 pool_cache_list_put(struct pool *pp, struct pool_cache_item *pl)
1965 {
1966 struct pool_cache_item *rpl, *next;
1967
1968 if (pl == NULL)
1969 return (NULL);
1970
1971 rpl = TAILQ_NEXT(pl, ci_nextl);
1972
1973 pl_enter(pp, &pp->pr_lock);
1974 do {
1975 next = pl->ci_next;
1976 pool_do_put(pp, pl);
1977 pl = next;
1978 } while (pl != NULL);
1979 pl_leave(pp, &pp->pr_lock);
1980
1981 return (rpl);
1982 }
1983
1984 void
pool_cache_destroy(struct pool * pp)1985 pool_cache_destroy(struct pool *pp)
1986 {
1987 struct pool_cache *pc;
1988 struct pool_cache_item *pl;
1989 struct cpumem_iter i;
1990 struct cpumem *cm;
1991
1992 rw_enter_write(&pool_lock); /* serialise with the gc */
1993 cm = pp->pr_cache;
1994 pp->pr_cache = NULL; /* make pool_put avoid the cache */
1995 rw_exit_write(&pool_lock);
1996
1997 CPUMEM_FOREACH(pc, &i, cm) {
1998 pool_cache_list_put(pp, pc->pc_actv);
1999 pool_cache_list_put(pp, pc->pc_prev);
2000 }
2001
2002 cpumem_put(&pool_caches, cm);
2003
2004 pl = TAILQ_FIRST(&pp->pr_cache_lists);
2005 while (pl != NULL)
2006 pl = pool_cache_list_put(pp, pl);
2007 }
2008
2009 void
pool_cache_gc(struct pool * pp)2010 pool_cache_gc(struct pool *pp)
2011 {
2012 unsigned int contention, delta;
2013
2014 if (getnsecuptime() - pp->pr_cache_timestamp > POOL_WAIT_GC &&
2015 !TAILQ_EMPTY(&pp->pr_cache_lists) &&
2016 pl_enter_try(pp, &pp->pr_cache_lock)) {
2017 struct pool_cache_item *pl = NULL;
2018
2019 pl = TAILQ_FIRST(&pp->pr_cache_lists);
2020 if (pl != NULL) {
2021 TAILQ_REMOVE(&pp->pr_cache_lists, pl, ci_nextl);
2022 pp->pr_cache_nitems -= POOL_CACHE_ITEM_NITEMS(pl);
2023 pp->pr_cache_timestamp = getnsecuptime();
2024
2025 pp->pr_cache_ngc++;
2026 }
2027
2028 pl_leave(pp, &pp->pr_cache_lock);
2029
2030 pool_cache_list_put(pp, pl);
2031 }
2032
2033 /*
2034 * if there's a lot of contention on the pr_cache_mtx then consider
2035 * growing the length of the list to reduce the need to access the
2036 * global pool.
2037 */
2038
2039 contention = pp->pr_cache_contention;
2040 delta = contention - pp->pr_cache_contention_prev;
2041 if (delta > 8 /* magic */) {
2042 if ((ncpusfound * 8 * 2) <= pp->pr_cache_nitems)
2043 pp->pr_cache_items += 8;
2044 } else if (delta == 0) {
2045 if (pp->pr_cache_items > 8)
2046 pp->pr_cache_items--;
2047 }
2048 pp->pr_cache_contention_prev = contention;
2049 }
2050
2051 void
pool_cache_pool_info(struct pool * pp,struct kinfo_pool * pi)2052 pool_cache_pool_info(struct pool *pp, struct kinfo_pool *pi)
2053 {
2054 struct pool_cache *pc;
2055 struct cpumem_iter i;
2056
2057 if (pp->pr_cache == NULL)
2058 return;
2059
2060 /* loop through the caches twice to collect stats */
2061
2062 /* once without the lock so we can yield while reading nget/nput */
2063 CPUMEM_FOREACH(pc, &i, pp->pr_cache) {
2064 uint64_t gen, nget, nput;
2065
2066 do {
2067 while ((gen = pc->pc_gen) & 1)
2068 yield();
2069
2070 nget = pc->pc_nget;
2071 nput = pc->pc_nput;
2072 } while (gen != pc->pc_gen);
2073
2074 pi->pr_nget += nget;
2075 pi->pr_nput += nput;
2076 }
2077
2078 /* and once with the mtx so we can get consistent nout values */
2079 pl_enter(pp, &pp->pr_cache_lock);
2080 CPUMEM_FOREACH(pc, &i, pp->pr_cache)
2081 pi->pr_nout += pc->pc_nout;
2082
2083 pi->pr_nout += pp->pr_cache_nout;
2084 pl_leave(pp, &pp->pr_cache_lock);
2085 }
2086
2087 int
pool_cache_info(struct pool * pp,void * oldp,size_t * oldlenp)2088 pool_cache_info(struct pool *pp, void *oldp, size_t *oldlenp)
2089 {
2090 struct kinfo_pool_cache kpc;
2091
2092 if (pp->pr_cache == NULL)
2093 return (EOPNOTSUPP);
2094
2095 memset(&kpc, 0, sizeof(kpc)); /* don't leak padding */
2096
2097 pl_enter(pp, &pp->pr_cache_lock);
2098 kpc.pr_ngc = pp->pr_cache_ngc;
2099 kpc.pr_len = pp->pr_cache_items;
2100 kpc.pr_nitems = pp->pr_cache_nitems;
2101 kpc.pr_contention = pp->pr_cache_contention;
2102 pl_leave(pp, &pp->pr_cache_lock);
2103
2104 return (sysctl_rdstruct(oldp, oldlenp, NULL, &kpc, sizeof(kpc)));
2105 }
2106
2107 int
pool_cache_cpus_info(struct pool * pp,void * oldp,size_t * oldlenp)2108 pool_cache_cpus_info(struct pool *pp, void *oldp, size_t *oldlenp)
2109 {
2110 struct pool_cache *pc;
2111 struct kinfo_pool_cache_cpu *kpcc, *info;
2112 unsigned int cpu = 0;
2113 struct cpumem_iter i;
2114 int error = 0;
2115 size_t len;
2116
2117 if (pp->pr_cache == NULL)
2118 return (EOPNOTSUPP);
2119 if (*oldlenp % sizeof(*kpcc))
2120 return (EINVAL);
2121
2122 kpcc = mallocarray(ncpusfound, sizeof(*kpcc), M_TEMP,
2123 M_WAITOK|M_CANFAIL|M_ZERO);
2124 if (kpcc == NULL)
2125 return (EIO);
2126
2127 len = ncpusfound * sizeof(*kpcc);
2128
2129 CPUMEM_FOREACH(pc, &i, pp->pr_cache) {
2130 uint64_t gen;
2131
2132 if (cpu >= ncpusfound) {
2133 error = EIO;
2134 goto err;
2135 }
2136
2137 info = &kpcc[cpu];
2138 info->pr_cpu = cpu;
2139
2140 do {
2141 while ((gen = pc->pc_gen) & 1)
2142 yield();
2143
2144 info->pr_nget = pc->pc_nget;
2145 info->pr_nfail = pc->pc_nfail;
2146 info->pr_nput = pc->pc_nput;
2147 info->pr_nlget = pc->pc_nlget;
2148 info->pr_nlfail = pc->pc_nlfail;
2149 info->pr_nlput = pc->pc_nlput;
2150 } while (gen != pc->pc_gen);
2151
2152 cpu++;
2153 }
2154
2155 error = sysctl_rdstruct(oldp, oldlenp, NULL, kpcc, len);
2156 err:
2157 free(kpcc, M_TEMP, len);
2158
2159 return (error);
2160 }
2161 #else /* MULTIPROCESSOR */
2162 void
pool_cache_init(struct pool * pp)2163 pool_cache_init(struct pool *pp)
2164 {
2165 /* nop */
2166 }
2167
2168 void
pool_cache_pool_info(struct pool * pp,struct kinfo_pool * pi)2169 pool_cache_pool_info(struct pool *pp, struct kinfo_pool *pi)
2170 {
2171 /* nop */
2172 }
2173
2174 int
pool_cache_info(struct pool * pp,void * oldp,size_t * oldlenp)2175 pool_cache_info(struct pool *pp, void *oldp, size_t *oldlenp)
2176 {
2177 return (EOPNOTSUPP);
2178 }
2179
2180 int
pool_cache_cpus_info(struct pool * pp,void * oldp,size_t * oldlenp)2181 pool_cache_cpus_info(struct pool *pp, void *oldp, size_t *oldlenp)
2182 {
2183 return (EOPNOTSUPP);
2184 }
2185 #endif /* MULTIPROCESSOR */
2186
2187
2188 void
pool_lock_mtx_init(struct pool * pp,union pool_lock * lock,const struct lock_type * type)2189 pool_lock_mtx_init(struct pool *pp, union pool_lock *lock,
2190 const struct lock_type *type)
2191 {
2192 _mtx_init_flags(&lock->prl_mtx, pp->pr_ipl, pp->pr_wchan, 0, type);
2193 }
2194
2195 void
pool_lock_mtx_enter(union pool_lock * lock)2196 pool_lock_mtx_enter(union pool_lock *lock)
2197 {
2198 mtx_enter(&lock->prl_mtx);
2199 }
2200
2201 int
pool_lock_mtx_enter_try(union pool_lock * lock)2202 pool_lock_mtx_enter_try(union pool_lock *lock)
2203 {
2204 return (mtx_enter_try(&lock->prl_mtx));
2205 }
2206
2207 void
pool_lock_mtx_leave(union pool_lock * lock)2208 pool_lock_mtx_leave(union pool_lock *lock)
2209 {
2210 mtx_leave(&lock->prl_mtx);
2211 }
2212
2213 void
pool_lock_mtx_assert_locked(union pool_lock * lock)2214 pool_lock_mtx_assert_locked(union pool_lock *lock)
2215 {
2216 MUTEX_ASSERT_LOCKED(&lock->prl_mtx);
2217 }
2218
2219 void
pool_lock_mtx_assert_unlocked(union pool_lock * lock)2220 pool_lock_mtx_assert_unlocked(union pool_lock *lock)
2221 {
2222 MUTEX_ASSERT_UNLOCKED(&lock->prl_mtx);
2223 }
2224
2225 int
pool_lock_mtx_sleep(void * ident,union pool_lock * lock,int priority,const char * wmesg)2226 pool_lock_mtx_sleep(void *ident, union pool_lock *lock, int priority,
2227 const char *wmesg)
2228 {
2229 return msleep_nsec(ident, &lock->prl_mtx, priority, wmesg, INFSLP);
2230 }
2231
2232 static const struct pool_lock_ops pool_lock_ops_mtx = {
2233 pool_lock_mtx_init,
2234 pool_lock_mtx_enter,
2235 pool_lock_mtx_enter_try,
2236 pool_lock_mtx_leave,
2237 pool_lock_mtx_assert_locked,
2238 pool_lock_mtx_assert_unlocked,
2239 pool_lock_mtx_sleep,
2240 };
2241
2242 void
pool_lock_rw_init(struct pool * pp,union pool_lock * lock,const struct lock_type * type)2243 pool_lock_rw_init(struct pool *pp, union pool_lock *lock,
2244 const struct lock_type *type)
2245 {
2246 _rw_init_flags(&lock->prl_rwlock, pp->pr_wchan, 0, type);
2247 }
2248
2249 void
pool_lock_rw_enter(union pool_lock * lock)2250 pool_lock_rw_enter(union pool_lock *lock)
2251 {
2252 rw_enter_write(&lock->prl_rwlock);
2253 }
2254
2255 int
pool_lock_rw_enter_try(union pool_lock * lock)2256 pool_lock_rw_enter_try(union pool_lock *lock)
2257 {
2258 return (rw_enter(&lock->prl_rwlock, RW_WRITE | RW_NOSLEEP) == 0);
2259 }
2260
2261 void
pool_lock_rw_leave(union pool_lock * lock)2262 pool_lock_rw_leave(union pool_lock *lock)
2263 {
2264 rw_exit_write(&lock->prl_rwlock);
2265 }
2266
2267 void
pool_lock_rw_assert_locked(union pool_lock * lock)2268 pool_lock_rw_assert_locked(union pool_lock *lock)
2269 {
2270 rw_assert_wrlock(&lock->prl_rwlock);
2271 }
2272
2273 void
pool_lock_rw_assert_unlocked(union pool_lock * lock)2274 pool_lock_rw_assert_unlocked(union pool_lock *lock)
2275 {
2276 KASSERT(rw_status(&lock->prl_rwlock) != RW_WRITE);
2277 }
2278
2279 int
pool_lock_rw_sleep(void * ident,union pool_lock * lock,int priority,const char * wmesg)2280 pool_lock_rw_sleep(void *ident, union pool_lock *lock, int priority,
2281 const char *wmesg)
2282 {
2283 return rwsleep_nsec(ident, &lock->prl_rwlock, priority, wmesg, INFSLP);
2284 }
2285
2286 static const struct pool_lock_ops pool_lock_ops_rw = {
2287 pool_lock_rw_init,
2288 pool_lock_rw_enter,
2289 pool_lock_rw_enter_try,
2290 pool_lock_rw_leave,
2291 pool_lock_rw_assert_locked,
2292 pool_lock_rw_assert_unlocked,
2293 pool_lock_rw_sleep,
2294 };
2295