1 /*	$NetBSD: pool.h,v 1.79 2015/07/29 00:10:25 christos Exp $	*/
2 
3 /*-
4  * Copyright (c) 1997, 1998, 1999, 2000, 2007 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Paul Kranenburg; by Jason R. Thorpe of the Numerical Aerospace
9  * Simulation Facility, NASA Ames Research Center.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  * POSSIBILITY OF SUCH DAMAGE.
31  */
32 
33 #ifndef _SYS_POOL_H_
34 #define _SYS_POOL_H_
35 
36 #include <sys/stdbool.h>
37 #include <sys/stdint.h>
38 
39 struct pool_sysctl {
40 	char pr_wchan[16];
41 	uint64_t pr_flags;
42 	uint64_t pr_size;
43 	uint64_t pr_pagesize;
44 	uint64_t pr_itemsperpage;
45 	uint64_t pr_nitems;
46 	uint64_t pr_nout;
47 	uint64_t pr_hardlimit;
48 	uint64_t pr_npages;
49 	uint64_t pr_minpages;
50 	uint64_t pr_maxpages;
51 
52 	uint64_t pr_nget;
53 	uint64_t pr_nfail;
54 	uint64_t pr_nput;
55 	uint64_t pr_npagealloc;
56 	uint64_t pr_npagefree;
57 	uint64_t pr_hiwat;
58 	uint64_t pr_nidle;
59 
60 	uint64_t pr_cache_meta_size;
61 	uint64_t pr_cache_nfull;
62 	uint64_t pr_cache_npartial;
63 	uint64_t pr_cache_nempty;
64 	uint64_t pr_cache_ncontended;
65 	uint64_t pr_cache_nmiss_global;
66 	uint64_t pr_cache_nhit_global;
67 	uint64_t pr_cache_nmiss_pcpu;
68 	uint64_t pr_cache_nhit_pcpu;
69 };
70 
71 #ifdef _KERNEL
72 #define	__POOL_EXPOSE
73 #endif
74 
75 #ifdef __POOL_EXPOSE
76 #include <sys/param.h>
77 #include <sys/mutex.h>
78 #include <sys/condvar.h>
79 #include <sys/queue.h>
80 #include <sys/time.h>
81 #include <sys/tree.h>
82 #include <sys/callback.h>
83 
84 #define	POOL_PADDR_INVALID	((paddr_t) -1)
85 
86 struct pool;
87 
88 struct pool_allocator {
89 	void		*(*pa_alloc)(struct pool *, int);
90 	void		(*pa_free)(struct pool *, void *);
91 	unsigned int	pa_pagesz;
92 
93 	/* The following fields are for internal use only. */
94 	kmutex_t	pa_lock;
95 	TAILQ_HEAD(, pool) pa_list;	/* list of pools using this allocator */
96 	uint32_t	pa_refcnt;	/* number of pools using this allocator */
97 	int		pa_pagemask;
98 	int		pa_pageshift;
99 };
100 
101 LIST_HEAD(pool_pagelist,pool_item_header);
102 SPLAY_HEAD(phtree, pool_item_header);
103 
104 struct pool {
105 	TAILQ_ENTRY(pool)
106 			pr_poollist;
107 	struct pool_pagelist
108 			pr_emptypages;	/* Empty pages */
109 	struct pool_pagelist
110 			pr_fullpages;	/* Full pages */
111 	struct pool_pagelist
112 			pr_partpages;	/* Partially-allocated pages */
113 	struct pool_item_header	*pr_curpage;
114 	struct pool	*pr_phpool;	/* Pool item header pool */
115 	struct pool_cache *pr_cache;	/* Cache for this pool */
116 	unsigned int	pr_size;	/* Size of item */
117 	unsigned int	pr_align;	/* Requested alignment, must be 2^n */
118 	unsigned int	pr_itemoffset;	/* Align this offset in item */
119 	unsigned int	pr_minitems;	/* minimum # of items to keep */
120 	unsigned int	pr_minpages;	/* same in page units */
121 	unsigned int	pr_maxpages;	/* maximum # of pages to keep */
122 	unsigned int	pr_npages;	/* # of pages allocated */
123 	unsigned int	pr_itemsperpage;/* # items that fit in a page */
124 	unsigned int	pr_slack;	/* unused space in a page */
125 	unsigned int	pr_nitems;	/* number of available items in pool */
126 	unsigned int	pr_nout;	/* # items currently allocated */
127 	unsigned int	pr_hardlimit;	/* hard limit to number of allocated
128 					   items */
129 	unsigned int	pr_refcnt;	/* ref count for pagedaemon, etc */
130 	struct pool_allocator *pr_alloc;/* back-end allocator */
131 	TAILQ_ENTRY(pool) pr_alloc_list;/* link on allocator's pool list */
132 
133 	/* Drain hook. */
134 	void		(*pr_drain_hook)(void *, int);
135 	void		*pr_drain_hook_arg;
136 
137 	const char	*pr_wchan;	/* tsleep(9) identifier */
138 	unsigned int	pr_flags;	/* r/w flags */
139 	unsigned int	pr_roflags;	/* r/o flags */
140 #define	PR_WAITOK	0x01	/* Note: matches KM_SLEEP */
141 #define PR_NOWAIT	0x02	/* Note: matches KM_NOSLEEP */
142 #define PR_WANTED	0x04
143 #define PR_PHINPAGE	0x40
144 #define PR_LOGGING	0x80
145 #define PR_LIMITFAIL	0x100	/* even if waiting, fail if we hit limit */
146 #define PR_RECURSIVE	0x200	/* pool contains pools, for vmstat(8) */
147 #define PR_NOTOUCH	0x400	/* don't use free items to keep internal state*/
148 #define PR_NOALIGN	0x800	/* don't assume backend alignment */
149 #define	PR_LARGECACHE	0x1000	/* use large cache groups */
150 
151 	/*
152 	 * `pr_lock' protects the pool's data structures when removing
153 	 * items from or returning items to the pool, or when reading
154 	 * or updating read/write fields in the pool descriptor.
155 	 *
156 	 * We assume back-end page allocators provide their own locking
157 	 * scheme.  They will be called with the pool descriptor _unlocked_,
158 	 * since the page allocators may block.
159 	 */
160 	kmutex_t	pr_lock;
161 	kcondvar_t	pr_cv;
162 	int		pr_ipl;
163 
164 	struct phtree	pr_phtree;
165 
166 	int		pr_maxcolor;	/* Cache colouring */
167 	int		pr_curcolor;
168 	int		pr_phoffset;	/* Offset in page of page header */
169 
170 	/*
171 	 * Warning message to be issued, and a per-time-delta rate cap,
172 	 * if the hard limit is reached.
173 	 */
174 	const char	*pr_hardlimit_warning;
175 	struct timeval	pr_hardlimit_ratecap;
176 	struct timeval	pr_hardlimit_warning_last;
177 
178 	/*
179 	 * Instrumentation
180 	 */
181 	unsigned long	pr_nget;	/* # of successful requests */
182 	unsigned long	pr_nfail;	/* # of unsuccessful requests */
183 	unsigned long	pr_nput;	/* # of releases */
184 	unsigned long	pr_npagealloc;	/* # of pages allocated */
185 	unsigned long	pr_npagefree;	/* # of pages released */
186 	unsigned int	pr_hiwat;	/* max # of pages in pool */
187 	unsigned long	pr_nidle;	/* # of idle pages */
188 
189 	/*
190 	 * Diagnostic aides.
191 	 */
192 	void		*pr_freecheck;
193 	void		*pr_qcache;
194 	bool		pr_redzone;
195 	size_t		pr_reqsize;
196 };
197 
198 /*
199  * Cache group sizes, assuming 4-byte paddr_t on !_LP64.
200  * All groups will be aligned to CACHE_LINE_SIZE.
201  */
202 #ifdef _LP64
203 #define	PCG_NOBJECTS_NORMAL	15	/* 256 byte group */
204 #define	PCG_NOBJECTS_LARGE	63	/* 1024 byte group */
205 #else
206 #define	PCG_NOBJECTS_NORMAL	14	/* 124 byte group */
207 #define	PCG_NOBJECTS_LARGE	62	/* 508 byte group */
208 #endif
209 
210 typedef struct pcgpair {
211 	void	*pcgo_va;		/* object virtual address */
212 	paddr_t	pcgo_pa;		/* object physical address */
213 } pcgpair_t;
214 
215 /* The pool cache group. */
216 typedef struct pool_cache_group {
217 	struct pool_cache_group	*pcg_next;	/* link to next group */
218 	u_int			pcg_avail;	/* # available objects */
219 	u_int			pcg_size;	/* max number objects */
220 	pcgpair_t 		pcg_objects[1];	/* the objects */
221 } pcg_t;
222 
223 typedef struct pool_cache_cpu {
224 	uint64_t		cc_misses;
225 	uint64_t		cc_hits;
226 	struct pool_cache_group	*cc_current;
227 	struct pool_cache_group	*cc_previous;
228 	struct pool_cache	*cc_cache;
229 	int			cc_ipl;
230 	int			cc_cpuindex;
231 #ifdef _KERNEL
232 	ipl_cookie_t		cc_iplcookie;
233 #endif
234 } pool_cache_cpu_t;
235 
236 struct pool_cache {
237 	/* Pool layer. */
238 	struct pool	pc_pool;
239 
240 	/* Cache layer. */
241 	kmutex_t	pc_lock;	/* locks cache layer */
242 	TAILQ_ENTRY(pool_cache)
243 			pc_cachelist;	/* entry on global cache list */
244 	pcg_t		*pc_emptygroups;/* list of empty cache groups */
245 	pcg_t		*pc_fullgroups;	/* list of full cache groups */
246 	pcg_t		*pc_partgroups;	/* groups for reclamation */
247 	struct pool	*pc_pcgpool;	/* Pool of cache groups */
248 	int		pc_pcgsize;	/* Use large cache groups? */
249 	int		pc_ncpu;	/* number cpus set up */
250 	int		(*pc_ctor)(void *, void *, int);
251 	void		(*pc_dtor)(void *, void *);
252 	void		*pc_arg;	/* for ctor/ctor */
253 	uint64_t	pc_hits;	/* cache layer hits */
254 	uint64_t	pc_misses;	/* cache layer misses */
255 	uint64_t	pc_contended;	/* contention events on cache */
256 	unsigned int	pc_nempty;	/* empty groups in cache */
257 	unsigned int	pc_nfull;	/* full groups in cache */
258 	unsigned int	pc_npart;	/* partial groups in cache */
259 	unsigned int	pc_refcnt;	/* ref count for pagedaemon, etc */
260 
261 	/* Diagnostic aides. */
262 	void		*pc_freecheck;
263 	bool		pc_redzone;
264 	size_t		pc_reqsize;
265 
266 	/* CPU layer. */
267 	pool_cache_cpu_t pc_cpu0 __aligned(CACHE_LINE_SIZE);
268 	void		*pc_cpus[MAXCPUS] __aligned(CACHE_LINE_SIZE);
269 };
270 
271 #endif /* __POOL_EXPOSE */
272 
273 typedef struct pool_cache *pool_cache_t;
274 
275 #ifdef _KERNEL
276 /*
277  * pool_allocator_kmem is the default that all pools get unless
278  * otherwise specified.  pool_allocator_nointr is provided for
279  * pools that know they will never be accessed in interrupt
280  * context.
281  */
282 extern struct pool_allocator pool_allocator_kmem;
283 extern struct pool_allocator pool_allocator_nointr;
284 extern struct pool_allocator pool_allocator_meta;
285 #ifdef POOL_SUBPAGE
286 /* The above are subpage allocators in this case. */
287 extern struct pool_allocator pool_allocator_kmem_fullpage;
288 extern struct pool_allocator pool_allocator_nointr_fullpage;
289 #endif
290 
291 void		pool_subsystem_init(void);
292 
293 void		pool_init(struct pool *, size_t, u_int, u_int,
294 		    int, const char *, struct pool_allocator *, int);
295 void		pool_destroy(struct pool *);
296 
297 void		pool_set_drain_hook(struct pool *,
298 		    void (*)(void *, int), void *);
299 
300 void		*pool_get(struct pool *, int);
301 void		pool_put(struct pool *, void *);
302 int		pool_reclaim(struct pool *);
303 
304 int		pool_prime(struct pool *, int);
305 void		pool_setlowat(struct pool *, int);
306 void		pool_sethiwat(struct pool *, int);
307 void		pool_sethardlimit(struct pool *, int, const char *, int);
308 bool		pool_drain(struct pool **);
309 
310 /*
311  * Debugging and diagnostic aides.
312  */
313 void		pool_printit(struct pool *, const char *,
314     void (*)(const char *, ...) __printflike(1, 2));
315 void		pool_printall(const char *, void (*)(const char *, ...)
316     __printflike(1, 2));
317 int		pool_chk(struct pool *, const char *);
318 
319 /*
320  * Pool cache routines.
321  */
322 pool_cache_t	pool_cache_init(size_t, u_int, u_int, u_int, const char *,
323 		    struct pool_allocator *, int, int (*)(void *, void *, int),
324 		    void (*)(void *, void *), void *);
325 void		pool_cache_bootstrap(pool_cache_t, size_t, u_int, u_int, u_int,
326 		    const char *, struct pool_allocator *, int,
327 		    int (*)(void *, void *, int), void (*)(void *, void *),
328 		    void *);
329 void		pool_cache_destroy(pool_cache_t);
330 void		pool_cache_bootstrap_destroy(pool_cache_t);
331 void		*pool_cache_get_paddr(pool_cache_t, int, paddr_t *);
332 void		pool_cache_put_paddr(pool_cache_t, void *, paddr_t);
333 void		pool_cache_destruct_object(pool_cache_t, void *);
334 void		pool_cache_invalidate(pool_cache_t);
335 bool		pool_cache_reclaim(pool_cache_t);
336 void		pool_cache_set_drain_hook(pool_cache_t,
337 		    void (*)(void *, int), void *);
338 void		pool_cache_setlowat(pool_cache_t, int);
339 void		pool_cache_sethiwat(pool_cache_t, int);
340 void		pool_cache_sethardlimit(pool_cache_t, int, const char *, int);
341 void		pool_cache_cpu_init(struct cpu_info *);
342 
343 #define		pool_cache_get(pc, f) pool_cache_get_paddr((pc), (f), NULL)
344 #define		pool_cache_put(pc, o) pool_cache_put_paddr((pc), (o), \
345 				          POOL_PADDR_INVALID)
346 
347 void 		pool_whatis(uintptr_t, void (*)(const char *, ...)
348     __printflike(1, 2));
349 #endif /* _KERNEL */
350 
351 #endif /* _SYS_POOL_H_ */
352