xref: /netbsd/sys/sys/pool.h (revision 25ae68d5)
1 /*	$NetBSD: pool.h,v 1.96 2021/12/22 16:57:28 thorpej Exp $	*/
2 
3 /*-
4  * Copyright (c) 1997, 1998, 1999, 2000, 2007, 2020
5  *     The NetBSD Foundation, Inc.
6  * All rights reserved.
7  *
8  * This code is derived from software contributed to The NetBSD Foundation
9  * by Paul Kranenburg; by Jason R. Thorpe of the Numerical Aerospace
10  * Simulation Facility, NASA Ames Research Center.
11  *
12  * Redistribution and use in source and binary forms, with or without
13  * modification, are permitted provided that the following conditions
14  * are met:
15  * 1. Redistributions of source code must retain the above copyright
16  *    notice, this list of conditions and the following disclaimer.
17  * 2. Redistributions in binary form must reproduce the above copyright
18  *    notice, this list of conditions and the following disclaimer in the
19  *    documentation and/or other materials provided with the distribution.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
23  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
24  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
25  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31  * POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #ifndef _SYS_POOL_H_
35 #define _SYS_POOL_H_
36 
37 #include <sys/stdbool.h>
38 #include <sys/stdint.h>
39 
40 struct pool_sysctl {
41 	char pr_wchan[16];
42 	uint64_t pr_flags;
43 	uint64_t pr_size;
44 	uint64_t pr_pagesize;
45 	uint64_t pr_itemsperpage;
46 	uint64_t pr_nitems;
47 	uint64_t pr_nout;
48 	uint64_t pr_hardlimit;
49 	uint64_t pr_npages;
50 	uint64_t pr_minpages;
51 	uint64_t pr_maxpages;
52 
53 	uint64_t pr_nget;
54 	uint64_t pr_nfail;
55 	uint64_t pr_nput;
56 	uint64_t pr_npagealloc;
57 	uint64_t pr_npagefree;
58 	uint64_t pr_hiwat;
59 	uint64_t pr_nidle;
60 
61 	uint64_t pr_cache_meta_size;
62 	uint64_t pr_cache_nfull;
63 	uint64_t pr_cache_npartial;
64 	uint64_t pr_cache_nempty;
65 	uint64_t pr_cache_ncontended;
66 	uint64_t pr_cache_nmiss_global;
67 	uint64_t pr_cache_nhit_global;
68 	uint64_t pr_cache_nmiss_pcpu;
69 	uint64_t pr_cache_nhit_pcpu;
70 };
71 
72 #ifdef _KERNEL
73 #define __POOL_EXPOSE
74 #endif
75 
76 #ifdef __POOL_EXPOSE
77 #include <sys/param.h>
78 #include <sys/mutex.h>
79 #include <sys/condvar.h>
80 #include <sys/queue.h>
81 #include <sys/time.h>
82 #include <sys/tree.h>
83 #include <sys/callback.h>
84 
85 #ifdef _KERNEL_OPT
86 #include "opt_pool.h"
87 #endif
88 
89 #define POOL_PADDR_INVALID	((paddr_t) -1)
90 
91 struct pool;
92 
93 struct pool_allocator {
94 	void		*(*pa_alloc)(struct pool *, int);
95 	void		(*pa_free)(struct pool *, void *);
96 	unsigned int	pa_pagesz;
97 
98 	/* The following fields are for internal use only. */
99 	kmutex_t	pa_lock;
100 	TAILQ_HEAD(, pool) pa_list;	/* list of pools using this allocator */
101 	uint32_t	pa_refcnt;	/* number of pools using this allocator */
102 	int		pa_pagemask;
103 	int		pa_pageshift;
104 };
105 
106 LIST_HEAD(pool_pagelist,pool_item_header);
107 SPLAY_HEAD(phtree, pool_item_header);
108 
109 #define POOL_QUARANTINE_DEPTH	128
110 typedef struct {
111 	size_t rotor;
112 	intptr_t list[POOL_QUARANTINE_DEPTH];
113 } pool_quar_t;
114 
115 struct pool {
116 	TAILQ_ENTRY(pool)
117 			pr_poollist;
118 	struct pool_pagelist
119 			pr_emptypages;	/* Empty pages */
120 	struct pool_pagelist
121 			pr_fullpages;	/* Full pages */
122 	struct pool_pagelist
123 			pr_partpages;	/* Partially-allocated pages */
124 	struct pool_item_header	*pr_curpage;
125 	struct pool	*pr_phpool;	/* Pool item header pool */
126 	struct pool_cache *pr_cache;	/* Cache for this pool */
127 	unsigned int	pr_size;	/* Size of item */
128 	unsigned int	pr_align;	/* Requested alignment, must be 2^n */
129 	unsigned int	pr_itemoffset;	/* offset of the item space */
130 	unsigned int	pr_minitems;	/* minimum # of free items to keep */
131 	unsigned int	pr_maxitems;	/* maximum # of free items to keep */
132 	unsigned int	pr_minpages;	/* minimum # of pages to keep */
133 	unsigned int	pr_maxpages;	/* maximum # of pages to keep */
134 	unsigned int	pr_npages;	/* # of pages allocated */
135 	unsigned int	pr_itemsperpage;/* # items that fit in a page */
136 	unsigned int	pr_poolid;	/* id of the pool */
137 	unsigned int	pr_nitems;	/* number of free items in pool */
138 	unsigned int	pr_nout;	/* # items currently allocated */
139 	unsigned int	pr_hardlimit;	/* hard limit to number of allocated
140 					   items */
141 	unsigned int	pr_refcnt;	/* ref count for pagedaemon, etc */
142 	struct pool_allocator *pr_alloc;/* back-end allocator */
143 	TAILQ_ENTRY(pool) pr_alloc_list;/* link on allocator's pool list */
144 
145 	/* Drain hook. */
146 	void		(*pr_drain_hook)(void *, int);
147 	void		*pr_drain_hook_arg;
148 
149 	const char	*pr_wchan;	/* tsleep(9) identifier */
150 	unsigned int	pr_flags;	/* r/w flags */
151 	unsigned int	pr_roflags;	/* r/o flags */
152 #define PR_WAITOK	0x01	/* Note: matches KM_SLEEP */
153 #define PR_NOWAIT	0x02	/* Note: matches KM_NOSLEEP */
154 #define PR_WANTED	0x04	/* waiting for free objects */
155 #define PR_PHINPAGE	0x40	/* page header in page */
156 #define PR_LIMITFAIL	0x100	/* even if waiting, fail if we hit limit */
157 #define PR_RECURSIVE	0x200	/* pool contains pools, for vmstat(8) */
158 #define PR_NOTOUCH	0x400	/* don't use free items to keep internal state*/
159 #define PR_NOALIGN	0x800	/* don't assume backend alignment */
160 #define PR_LARGECACHE	0x1000	/* use large cache groups */
161 #define PR_GROWING	0x2000	/* pool_grow in progress */
162 #define PR_GROWINGNOWAIT 0x4000	/* pool_grow in progress by PR_NOWAIT alloc */
163 #define PR_ZERO		0x8000	/* zero data before returning */
164 #define PR_USEBMAP	0x10000	/* use a bitmap to manage freed items */
165 #define PR_PSERIALIZE	0x20000	/* needs pserialize sync point before free */
166 
167 	/*
168 	 * `pr_lock' protects the pool's data structures when removing
169 	 * items from or returning items to the pool, or when reading
170 	 * or updating read/write fields in the pool descriptor.
171 	 *
172 	 * We assume back-end page allocators provide their own locking
173 	 * scheme.  They will be called with the pool descriptor _unlocked_,
174 	 * since the page allocators may block.
175 	 */
176 	kmutex_t	pr_lock;
177 	kcondvar_t	pr_cv;
178 	int		pr_ipl;
179 
180 	struct phtree	pr_phtree;
181 
182 	int		pr_maxcolor;	/* Cache colouring */
183 	int		pr_curcolor;
184 	int		pr_phoffset;	/* unused */
185 
186 	/*
187 	 * Warning message to be issued, and a per-time-delta rate cap,
188 	 * if the hard limit is reached.
189 	 */
190 	const char	*pr_hardlimit_warning;
191 	struct timeval	pr_hardlimit_ratecap;
192 	struct timeval	pr_hardlimit_warning_last;
193 
194 	/*
195 	 * Instrumentation
196 	 */
197 	unsigned long	pr_nget;	/* # of successful requests */
198 	unsigned long	pr_nfail;	/* # of unsuccessful requests */
199 	unsigned long	pr_nput;	/* # of releases */
200 	unsigned long	pr_npagealloc;	/* # of pages allocated */
201 	unsigned long	pr_npagefree;	/* # of pages released */
202 	unsigned int	pr_hiwat;	/* max # of pages in pool */
203 	unsigned long	pr_nidle;	/* # of idle pages */
204 
205 	/*
206 	 * Diagnostic aides.
207 	 */
208 	void		*pr_freecheck;
209 	void		*pr_qcache;
210 	bool		pr_redzone;
211 	size_t		pr_reqsize;
212 	size_t		pr_reqsize_with_redzone;
213 #ifdef POOL_QUARANTINE
214 	pool_quar_t	pr_quar;
215 #endif
216 };
217 
218 /*
219  * Cache group sizes, assuming 4-byte paddr_t on !_LP64.
220  * All groups will be aligned to COHERENCY_UNIT.
221  */
222 #ifdef _LP64
223 #define PCG_NOBJECTS_NORMAL	15	/* 256 byte group */
224 #define PCG_NOBJECTS_LARGE	63	/* 1024 byte group */
225 #else
226 #define PCG_NOBJECTS_NORMAL	14	/* 124 byte group */
227 #define PCG_NOBJECTS_LARGE	62	/* 508 byte group */
228 #endif
229 
230 typedef struct pcgpair {
231 	void	*pcgo_va;		/* object virtual address */
232 	paddr_t	pcgo_pa;		/* object physical address */
233 } pcgpair_t;
234 
235 /* The pool cache group. */
236 typedef struct pool_cache_group {
237 	struct pool_cache_group	*pcg_next;	/* link to next group */
238 	u_int			pcg_avail;	/* # available objects */
239 	u_int			pcg_size;	/* max number objects */
240 	pcgpair_t 		pcg_objects[1];	/* the objects */
241 } pcg_t;
242 
243 /* Pool cache CPU.  Sized to 64 bytes on _LP64. */
244 typedef struct pool_cache_cpu {
245 	struct pool_cache_group	*cc_current;
246 	struct pool_cache_group	*cc_previous;
247 	pcg_t *volatile 	*cc_pcgcache;
248 	uint64_t		cc_misses;
249 	uint64_t		cc_hits;
250 	uint64_t		cc_pcmisses;
251 	uint64_t		cc_contended;
252 	uint32_t		cc_nfull;
253 	uint32_t		cc_npart;
254 } pool_cache_cpu_t;
255 
256 struct pool_cache {
257 	/* Pool layer. */
258 	struct pool	pc_pool;
259 
260 	/* Cache layer. */
261 	TAILQ_ENTRY(pool_cache)
262 			pc_cachelist;	/* entry on global cache list */
263 	struct pool	*pc_pcgpool;	/* Pool of cache groups */
264 	pcg_t *volatile *pc_pcgcache;	/* list of empty cache groups */
265 	int		pc_pcgsize;	/* Use large cache groups? */
266 	int		pc_ncpu;	/* number cpus set up */
267 	int		(*pc_ctor)(void *, void *, int);
268 	void		(*pc_dtor)(void *, void *);
269 	void		*pc_arg;	/* for ctor/dtor */
270 	unsigned int	pc_refcnt;	/* ref count for pagedaemon, etc */
271 	unsigned int	pc_roflags;	/* r/o cache flags */
272 	void		*pc_cpus[MAXCPUS];
273 
274 	/* Diagnostic aides. */
275 	void		*pc_freecheck;
276 	bool		pc_redzone;
277 	size_t		pc_reqsize;
278 
279 	/* Hot items. */
280 	pcg_t *volatile pc_fullgroups	/* list of full cache groups */
281 	    __aligned(CACHE_LINE_SIZE);
282 	pcg_t *volatile pc_partgroups;	/* groups for reclamation */
283 
284 	/* Boot cpu. */
285 	pool_cache_cpu_t pc_cpu0 __aligned(CACHE_LINE_SIZE);
286 };
287 
288 #endif /* __POOL_EXPOSE */
289 
290 typedef struct pool_cache *pool_cache_t;
291 
292 #ifdef _KERNEL
293 /*
294  * pool_allocator_kmem is the default that all pools get unless
295  * otherwise specified.  pool_allocator_nointr is provided for
296  * pools that know they will never be accessed in interrupt
297  * context.
298  */
299 extern struct pool_allocator pool_allocator_kmem;
300 extern struct pool_allocator pool_allocator_nointr;
301 extern struct pool_allocator pool_allocator_meta;
302 
303 void		pool_subsystem_init(void);
304 
305 void		pool_init(struct pool *, size_t, u_int, u_int,
306 		    int, const char *, struct pool_allocator *, int);
307 void		pool_destroy(struct pool *);
308 
309 void		pool_set_drain_hook(struct pool *,
310 		    void (*)(void *, int), void *);
311 
312 void		*pool_get(struct pool *, int);
313 void		pool_put(struct pool *, void *);
314 int		pool_reclaim(struct pool *);
315 
316 void		pool_prime(struct pool *, int);
317 void		pool_setlowat(struct pool *, int);
318 void		pool_sethiwat(struct pool *, int);
319 void		pool_sethardlimit(struct pool *, int, const char *, int);
320 bool		pool_drain(struct pool **);
321 int		pool_totalpages(void);
322 int		pool_totalpages_locked(void);
323 
324 unsigned int	pool_nget(struct pool *);
325 unsigned int	pool_nput(struct pool *);
326 
327 /*
328  * Debugging and diagnostic aides.
329  */
330 void		pool_printit(struct pool *, const char *,
331     void (*)(const char *, ...) __printflike(1, 2));
332 void		pool_printall(const char *, void (*)(const char *, ...)
333     __printflike(1, 2));
334 int		pool_chk(struct pool *, const char *);
335 
336 /*
337  * Pool cache routines.
338  */
339 pool_cache_t	pool_cache_init(size_t, u_int, u_int, u_int, const char *,
340 		    struct pool_allocator *, int, int (*)(void *, void *, int),
341 		    void (*)(void *, void *), void *);
342 void		pool_cache_bootstrap(pool_cache_t, size_t, u_int, u_int, u_int,
343 		    const char *, struct pool_allocator *, int,
344 		    int (*)(void *, void *, int), void (*)(void *, void *),
345 		    void *);
346 void		pool_cache_destroy(pool_cache_t);
347 void		pool_cache_bootstrap_destroy(pool_cache_t);
348 void		*pool_cache_get_paddr(pool_cache_t, int, paddr_t *);
349 void		pool_cache_put_paddr(pool_cache_t, void *, paddr_t);
350 void		pool_cache_destruct_object(pool_cache_t, void *);
351 void		pool_cache_invalidate(pool_cache_t);
352 bool		pool_cache_reclaim(pool_cache_t);
353 void		pool_cache_set_drain_hook(pool_cache_t,
354 		    void (*)(void *, int), void *);
355 void		pool_cache_setlowat(pool_cache_t, int);
356 void		pool_cache_sethiwat(pool_cache_t, int);
357 void		pool_cache_sethardlimit(pool_cache_t, int, const char *, int);
358 void		pool_cache_prime(pool_cache_t, int);
359 void		pool_cache_cpu_init(struct cpu_info *);
360 
361 unsigned int	pool_cache_nget(pool_cache_t);
362 unsigned int	pool_cache_nput(pool_cache_t);
363 
364 #define		pool_cache_get(pc, f) pool_cache_get_paddr((pc), (f), NULL)
365 #define		pool_cache_put(pc, o) pool_cache_put_paddr((pc), (o), \
366 				          POOL_PADDR_INVALID)
367 
368 void		pool_whatis(uintptr_t, void (*)(const char *, ...)
369     __printflike(1, 2));
370 #endif /* _KERNEL */
371 
372 #endif /* _SYS_POOL_H_ */
373