1 /* $NetBSD: pool.h,v 1.96 2021/12/22 16:57:28 thorpej Exp $ */ 2 3 /*- 4 * Copyright (c) 1997, 1998, 1999, 2000, 2007, 2020 5 * The NetBSD Foundation, Inc. 6 * All rights reserved. 7 * 8 * This code is derived from software contributed to The NetBSD Foundation 9 * by Paul Kranenburg; by Jason R. Thorpe of the Numerical Aerospace 10 * Simulation Facility, NASA Ames Research Center. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 23 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 24 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 25 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 * POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #ifndef _SYS_POOL_H_ 35 #define _SYS_POOL_H_ 36 37 #include <sys/stdbool.h> 38 #include <sys/stdint.h> 39 40 struct pool_sysctl { 41 char pr_wchan[16]; 42 uint64_t pr_flags; 43 uint64_t pr_size; 44 uint64_t pr_pagesize; 45 uint64_t pr_itemsperpage; 46 uint64_t pr_nitems; 47 uint64_t pr_nout; 48 uint64_t pr_hardlimit; 49 uint64_t pr_npages; 50 uint64_t pr_minpages; 51 uint64_t pr_maxpages; 52 53 uint64_t pr_nget; 54 uint64_t pr_nfail; 55 uint64_t pr_nput; 56 uint64_t pr_npagealloc; 57 uint64_t pr_npagefree; 58 uint64_t pr_hiwat; 59 uint64_t pr_nidle; 60 61 uint64_t pr_cache_meta_size; 62 uint64_t pr_cache_nfull; 63 uint64_t pr_cache_npartial; 64 uint64_t pr_cache_nempty; 65 uint64_t pr_cache_ncontended; 66 uint64_t pr_cache_nmiss_global; 67 uint64_t pr_cache_nhit_global; 68 uint64_t pr_cache_nmiss_pcpu; 69 uint64_t pr_cache_nhit_pcpu; 70 }; 71 72 #ifdef _KERNEL 73 #define __POOL_EXPOSE 74 #endif 75 76 #ifdef __POOL_EXPOSE 77 #include <sys/param.h> 78 #include <sys/mutex.h> 79 #include <sys/condvar.h> 80 #include <sys/queue.h> 81 #include <sys/time.h> 82 #include <sys/tree.h> 83 #include <sys/callback.h> 84 85 #ifdef _KERNEL_OPT 86 #include "opt_pool.h" 87 #endif 88 89 #define POOL_PADDR_INVALID ((paddr_t) -1) 90 91 struct pool; 92 93 struct pool_allocator { 94 void *(*pa_alloc)(struct pool *, int); 95 void (*pa_free)(struct pool *, void *); 96 unsigned int pa_pagesz; 97 98 /* The following fields are for internal use only. */ 99 kmutex_t pa_lock; 100 TAILQ_HEAD(, pool) pa_list; /* list of pools using this allocator */ 101 uint32_t pa_refcnt; /* number of pools using this allocator */ 102 int pa_pagemask; 103 int pa_pageshift; 104 }; 105 106 LIST_HEAD(pool_pagelist,pool_item_header); 107 SPLAY_HEAD(phtree, pool_item_header); 108 109 #define POOL_QUARANTINE_DEPTH 128 110 typedef struct { 111 size_t rotor; 112 intptr_t list[POOL_QUARANTINE_DEPTH]; 113 } pool_quar_t; 114 115 struct pool { 116 TAILQ_ENTRY(pool) 117 pr_poollist; 118 struct pool_pagelist 119 pr_emptypages; /* Empty pages */ 120 struct pool_pagelist 121 pr_fullpages; /* Full pages */ 122 struct pool_pagelist 123 pr_partpages; /* Partially-allocated pages */ 124 struct pool_item_header *pr_curpage; 125 struct pool *pr_phpool; /* Pool item header pool */ 126 struct pool_cache *pr_cache; /* Cache for this pool */ 127 unsigned int pr_size; /* Size of item */ 128 unsigned int pr_align; /* Requested alignment, must be 2^n */ 129 unsigned int pr_itemoffset; /* offset of the item space */ 130 unsigned int pr_minitems; /* minimum # of free items to keep */ 131 unsigned int pr_maxitems; /* maximum # of free items to keep */ 132 unsigned int pr_minpages; /* minimum # of pages to keep */ 133 unsigned int pr_maxpages; /* maximum # of pages to keep */ 134 unsigned int pr_npages; /* # of pages allocated */ 135 unsigned int pr_itemsperpage;/* # items that fit in a page */ 136 unsigned int pr_poolid; /* id of the pool */ 137 unsigned int pr_nitems; /* number of free items in pool */ 138 unsigned int pr_nout; /* # items currently allocated */ 139 unsigned int pr_hardlimit; /* hard limit to number of allocated 140 items */ 141 unsigned int pr_refcnt; /* ref count for pagedaemon, etc */ 142 struct pool_allocator *pr_alloc;/* back-end allocator */ 143 TAILQ_ENTRY(pool) pr_alloc_list;/* link on allocator's pool list */ 144 145 /* Drain hook. */ 146 void (*pr_drain_hook)(void *, int); 147 void *pr_drain_hook_arg; 148 149 const char *pr_wchan; /* tsleep(9) identifier */ 150 unsigned int pr_flags; /* r/w flags */ 151 unsigned int pr_roflags; /* r/o flags */ 152 #define PR_WAITOK 0x01 /* Note: matches KM_SLEEP */ 153 #define PR_NOWAIT 0x02 /* Note: matches KM_NOSLEEP */ 154 #define PR_WANTED 0x04 /* waiting for free objects */ 155 #define PR_PHINPAGE 0x40 /* page header in page */ 156 #define PR_LIMITFAIL 0x100 /* even if waiting, fail if we hit limit */ 157 #define PR_RECURSIVE 0x200 /* pool contains pools, for vmstat(8) */ 158 #define PR_NOTOUCH 0x400 /* don't use free items to keep internal state*/ 159 #define PR_NOALIGN 0x800 /* don't assume backend alignment */ 160 #define PR_LARGECACHE 0x1000 /* use large cache groups */ 161 #define PR_GROWING 0x2000 /* pool_grow in progress */ 162 #define PR_GROWINGNOWAIT 0x4000 /* pool_grow in progress by PR_NOWAIT alloc */ 163 #define PR_ZERO 0x8000 /* zero data before returning */ 164 #define PR_USEBMAP 0x10000 /* use a bitmap to manage freed items */ 165 #define PR_PSERIALIZE 0x20000 /* needs pserialize sync point before free */ 166 167 /* 168 * `pr_lock' protects the pool's data structures when removing 169 * items from or returning items to the pool, or when reading 170 * or updating read/write fields in the pool descriptor. 171 * 172 * We assume back-end page allocators provide their own locking 173 * scheme. They will be called with the pool descriptor _unlocked_, 174 * since the page allocators may block. 175 */ 176 kmutex_t pr_lock; 177 kcondvar_t pr_cv; 178 int pr_ipl; 179 180 struct phtree pr_phtree; 181 182 int pr_maxcolor; /* Cache colouring */ 183 int pr_curcolor; 184 int pr_phoffset; /* unused */ 185 186 /* 187 * Warning message to be issued, and a per-time-delta rate cap, 188 * if the hard limit is reached. 189 */ 190 const char *pr_hardlimit_warning; 191 struct timeval pr_hardlimit_ratecap; 192 struct timeval pr_hardlimit_warning_last; 193 194 /* 195 * Instrumentation 196 */ 197 unsigned long pr_nget; /* # of successful requests */ 198 unsigned long pr_nfail; /* # of unsuccessful requests */ 199 unsigned long pr_nput; /* # of releases */ 200 unsigned long pr_npagealloc; /* # of pages allocated */ 201 unsigned long pr_npagefree; /* # of pages released */ 202 unsigned int pr_hiwat; /* max # of pages in pool */ 203 unsigned long pr_nidle; /* # of idle pages */ 204 205 /* 206 * Diagnostic aides. 207 */ 208 void *pr_freecheck; 209 void *pr_qcache; 210 bool pr_redzone; 211 size_t pr_reqsize; 212 size_t pr_reqsize_with_redzone; 213 #ifdef POOL_QUARANTINE 214 pool_quar_t pr_quar; 215 #endif 216 }; 217 218 /* 219 * Cache group sizes, assuming 4-byte paddr_t on !_LP64. 220 * All groups will be aligned to COHERENCY_UNIT. 221 */ 222 #ifdef _LP64 223 #define PCG_NOBJECTS_NORMAL 15 /* 256 byte group */ 224 #define PCG_NOBJECTS_LARGE 63 /* 1024 byte group */ 225 #else 226 #define PCG_NOBJECTS_NORMAL 14 /* 124 byte group */ 227 #define PCG_NOBJECTS_LARGE 62 /* 508 byte group */ 228 #endif 229 230 typedef struct pcgpair { 231 void *pcgo_va; /* object virtual address */ 232 paddr_t pcgo_pa; /* object physical address */ 233 } pcgpair_t; 234 235 /* The pool cache group. */ 236 typedef struct pool_cache_group { 237 struct pool_cache_group *pcg_next; /* link to next group */ 238 u_int pcg_avail; /* # available objects */ 239 u_int pcg_size; /* max number objects */ 240 pcgpair_t pcg_objects[1]; /* the objects */ 241 } pcg_t; 242 243 /* Pool cache CPU. Sized to 64 bytes on _LP64. */ 244 typedef struct pool_cache_cpu { 245 struct pool_cache_group *cc_current; 246 struct pool_cache_group *cc_previous; 247 pcg_t *volatile *cc_pcgcache; 248 uint64_t cc_misses; 249 uint64_t cc_hits; 250 uint64_t cc_pcmisses; 251 uint64_t cc_contended; 252 uint32_t cc_nfull; 253 uint32_t cc_npart; 254 } pool_cache_cpu_t; 255 256 struct pool_cache { 257 /* Pool layer. */ 258 struct pool pc_pool; 259 260 /* Cache layer. */ 261 TAILQ_ENTRY(pool_cache) 262 pc_cachelist; /* entry on global cache list */ 263 struct pool *pc_pcgpool; /* Pool of cache groups */ 264 pcg_t *volatile *pc_pcgcache; /* list of empty cache groups */ 265 int pc_pcgsize; /* Use large cache groups? */ 266 int pc_ncpu; /* number cpus set up */ 267 int (*pc_ctor)(void *, void *, int); 268 void (*pc_dtor)(void *, void *); 269 void *pc_arg; /* for ctor/dtor */ 270 unsigned int pc_refcnt; /* ref count for pagedaemon, etc */ 271 unsigned int pc_roflags; /* r/o cache flags */ 272 void *pc_cpus[MAXCPUS]; 273 274 /* Diagnostic aides. */ 275 void *pc_freecheck; 276 bool pc_redzone; 277 size_t pc_reqsize; 278 279 /* Hot items. */ 280 pcg_t *volatile pc_fullgroups /* list of full cache groups */ 281 __aligned(CACHE_LINE_SIZE); 282 pcg_t *volatile pc_partgroups; /* groups for reclamation */ 283 284 /* Boot cpu. */ 285 pool_cache_cpu_t pc_cpu0 __aligned(CACHE_LINE_SIZE); 286 }; 287 288 #endif /* __POOL_EXPOSE */ 289 290 typedef struct pool_cache *pool_cache_t; 291 292 #ifdef _KERNEL 293 /* 294 * pool_allocator_kmem is the default that all pools get unless 295 * otherwise specified. pool_allocator_nointr is provided for 296 * pools that know they will never be accessed in interrupt 297 * context. 298 */ 299 extern struct pool_allocator pool_allocator_kmem; 300 extern struct pool_allocator pool_allocator_nointr; 301 extern struct pool_allocator pool_allocator_meta; 302 303 void pool_subsystem_init(void); 304 305 void pool_init(struct pool *, size_t, u_int, u_int, 306 int, const char *, struct pool_allocator *, int); 307 void pool_destroy(struct pool *); 308 309 void pool_set_drain_hook(struct pool *, 310 void (*)(void *, int), void *); 311 312 void *pool_get(struct pool *, int); 313 void pool_put(struct pool *, void *); 314 int pool_reclaim(struct pool *); 315 316 void pool_prime(struct pool *, int); 317 void pool_setlowat(struct pool *, int); 318 void pool_sethiwat(struct pool *, int); 319 void pool_sethardlimit(struct pool *, int, const char *, int); 320 bool pool_drain(struct pool **); 321 int pool_totalpages(void); 322 int pool_totalpages_locked(void); 323 324 unsigned int pool_nget(struct pool *); 325 unsigned int pool_nput(struct pool *); 326 327 /* 328 * Debugging and diagnostic aides. 329 */ 330 void pool_printit(struct pool *, const char *, 331 void (*)(const char *, ...) __printflike(1, 2)); 332 void pool_printall(const char *, void (*)(const char *, ...) 333 __printflike(1, 2)); 334 int pool_chk(struct pool *, const char *); 335 336 /* 337 * Pool cache routines. 338 */ 339 pool_cache_t pool_cache_init(size_t, u_int, u_int, u_int, const char *, 340 struct pool_allocator *, int, int (*)(void *, void *, int), 341 void (*)(void *, void *), void *); 342 void pool_cache_bootstrap(pool_cache_t, size_t, u_int, u_int, u_int, 343 const char *, struct pool_allocator *, int, 344 int (*)(void *, void *, int), void (*)(void *, void *), 345 void *); 346 void pool_cache_destroy(pool_cache_t); 347 void pool_cache_bootstrap_destroy(pool_cache_t); 348 void *pool_cache_get_paddr(pool_cache_t, int, paddr_t *); 349 void pool_cache_put_paddr(pool_cache_t, void *, paddr_t); 350 void pool_cache_destruct_object(pool_cache_t, void *); 351 void pool_cache_invalidate(pool_cache_t); 352 bool pool_cache_reclaim(pool_cache_t); 353 void pool_cache_set_drain_hook(pool_cache_t, 354 void (*)(void *, int), void *); 355 void pool_cache_setlowat(pool_cache_t, int); 356 void pool_cache_sethiwat(pool_cache_t, int); 357 void pool_cache_sethardlimit(pool_cache_t, int, const char *, int); 358 void pool_cache_prime(pool_cache_t, int); 359 void pool_cache_cpu_init(struct cpu_info *); 360 361 unsigned int pool_cache_nget(pool_cache_t); 362 unsigned int pool_cache_nput(pool_cache_t); 363 364 #define pool_cache_get(pc, f) pool_cache_get_paddr((pc), (f), NULL) 365 #define pool_cache_put(pc, o) pool_cache_put_paddr((pc), (o), \ 366 POOL_PADDR_INVALID) 367 368 void pool_whatis(uintptr_t, void (*)(const char *, ...) 369 __printflike(1, 2)); 370 #endif /* _KERNEL */ 371 372 #endif /* _SYS_POOL_H_ */ 373