1 /*
2 * Copyright (c) 2005 Jeffrey M. Hsu. All rights reserved.
3 *
4 * This code is derived from software contributed to The DragonFly Project
5 * by Jeffrey M. Hsu.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of The DragonFly Project nor the names of its
16 * contributors may be used to endorse or promote products derived
17 * from this software without specific, prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
22 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
23 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
24 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
25 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
26 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
27 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
29 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 */
32
33 #include <sys/param.h>
34 #include <sys/kernel.h>
35 #include <sys/systm.h>
36 #include <sys/callout.h>
37 #include <sys/globaldata.h>
38 #include <sys/malloc.h>
39 #include <sys/queue.h>
40 #include <sys/objcache.h>
41 #include <sys/spinlock.h>
42 #include <sys/thread.h>
43 #include <sys/thread2.h>
44 #include <sys/spinlock2.h>
45 #include <sys/sysctl.h>
46
47 static MALLOC_DEFINE(M_OBJCACHE, "objcache", "Object Cache");
48 static MALLOC_DEFINE(M_OBJMAG, "objcache mag", "Object Cache Magazine");
49
50 #define INITIAL_MAG_CAPACITY 64
51
52 struct magazine {
53 int rounds;
54 int capacity;
55 SLIST_ENTRY(magazine) nextmagazine;
56 void *objects[];
57 };
58
59 SLIST_HEAD(magazinelist, magazine);
60
61 #define MAGAZINE_HDRSIZE __offsetof(struct magazine, objects[0])
62 #define MAGAZINE_CAPACITY_MAX 4096
63 #define MAGAZINE_CAPACITY_MIN 4
64
65 /*
66 * per-cluster cache of magazines
67 *
68 * All fields in this structure are protected by the spinlock.
69 */
70 struct magazinedepot {
71 /*
72 * The per-cpu object caches only exchanges completely full or
73 * completely empty magazines with the depot layer, so only have
74 * to cache these two types of magazines.
75 */
76 struct magazinelist fullmagazines;
77 struct magazinelist emptymagazines;
78 int magcapacity;
79
80 /* protect this structure */
81 struct spinlock spin;
82
83 /* magazines not yet allocated towards limit */
84 int unallocated_objects;
85 int cluster_limit; /* ref for adjustments */
86
87 /* infrequently used fields */
88 int waiting; /* waiting for another cpu to
89 * return a full magazine to
90 * the depot */
91 int contested; /* depot contention count */
92 } __cachealign;
93
94 /*
95 * per-cpu object cache
96 * All fields in this structure are protected by crit_enter().
97 */
98 struct percpu_objcache {
99 struct magazine *loaded_magazine; /* active magazine */
100 struct magazine *previous_magazine; /* backup magazine */
101
102 /* statistics */
103 u_long gets_cumulative; /* total calls to get */
104 u_long gets_null; /* objcache_get returned NULL */
105 u_long allocs_cumulative; /* total calls to alloc */
106 u_long puts_cumulative; /* total calls to put */
107 u_long gets_exhausted; /* # of gets hit exhaustion */
108 #ifdef notyet
109 u_long puts_othercluster; /* returned to other cluster */
110 #endif
111
112 /* infrequently used fields */
113 int waiting; /* waiting for a thread on this
114 * cpu to return an obj to the
115 * per-cpu cache */
116 } __cachealign;
117
118 /* only until we have NUMA cluster topology information XXX */
119 #define MAXCLUSTERS 1
120 #define myclusterid 0
121 #define CLUSTER_OF(obj) 0
122
123 /*
124 * Rarely accessed but useful bits of objcache.
125 */
126 struct objcache_desc {
127 LIST_ENTRY(objcache_desc) next;
128 struct objcache *objcache;
129 int total_objects;
130 int reserved;
131 char name[OBJCACHE_NAMELEN];
132 };
133
134 /*
135 * Two-level object cache consisting of NUMA cluster-level depots of
136 * fully loaded or completely empty magazines and cpu-level caches of
137 * individual objects.
138 */
139 struct objcache {
140 /* object constructor and destructor from blank storage */
141 objcache_ctor_fn *ctor;
142 objcache_dtor_fn *dtor;
143 void *privdata;
144
145 /* interface to underlying allocator */
146 objcache_alloc_fn *alloc;
147 objcache_free_fn *free;
148 void *allocator_args;
149
150 struct objcache_desc *desc;
151
152 /* NUMA-cluster level caches */
153 struct magazinedepot depot[MAXCLUSTERS];
154
155 struct percpu_objcache cache_percpu[]; /* per-cpu caches */
156 };
157
158 SYSCTL_NODE(_kern, OID_AUTO, objcache, CTLFLAG_RW, 0, "objcache");
159
160 static struct spinlock objcachelist_spin;
161 static LIST_HEAD(objcachelist, objcache_desc) allobjcaches;
162 static int magazine_capmin;
163 static int magazine_capmax;
164
165 static struct magazine *
mag_alloc(int capacity)166 mag_alloc(int capacity)
167 {
168 struct magazine *mag;
169 int size;
170
171 size = __offsetof(struct magazine, objects[capacity]);
172 KASSERT(size > 0 && (size & __VM_CACHELINE_MASK) == 0,
173 ("magazine size is not multiple cache line size"));
174
175 mag = kmalloc(size, M_OBJMAG, M_INTWAIT | M_ZERO | M_CACHEALIGN);
176 mag->capacity = capacity;
177 mag->rounds = 0;
178 return (mag);
179 }
180
181 static int
mag_capacity_align(int mag_capacity)182 mag_capacity_align(int mag_capacity)
183 {
184 int mag_size;
185
186 mag_size = __VM_CACHELINE_ALIGN(
187 __offsetof(struct magazine, objects[mag_capacity]));
188 mag_capacity = (mag_size - MAGAZINE_HDRSIZE) / sizeof(void *);
189
190 return mag_capacity;
191 }
192
193 /*
194 * Utility routine for objects that don't require any de-construction.
195 */
196
197 static void
null_dtor(void * obj,void * privdata)198 null_dtor(void *obj, void *privdata)
199 {
200 /* do nothing */
201 }
202
203 static boolean_t
null_ctor(void * obj,void * privdata,int ocflags)204 null_ctor(void *obj, void *privdata, int ocflags)
205 {
206 return TRUE;
207 }
208
209 /*
210 * Create an object cache.
211 */
212 struct objcache *
objcache_create(const char * name,int cluster_limit,int nom_cache,objcache_ctor_fn * ctor,objcache_dtor_fn * dtor,void * privdata,objcache_alloc_fn * alloc,objcache_free_fn * free,void * allocator_args)213 objcache_create(const char *name, int cluster_limit, int nom_cache,
214 objcache_ctor_fn *ctor, objcache_dtor_fn *dtor, void *privdata,
215 objcache_alloc_fn *alloc, objcache_free_fn *free,
216 void *allocator_args)
217 {
218 struct objcache_desc *desc;
219 struct objcache *oc;
220 struct magazinedepot *depot;
221 int cpuid;
222 int nmagdepot;
223 int mag_capacity;
224 int i;
225
226 /*
227 * Allocate objcache descriptor.
228 */
229 desc = kmalloc(sizeof(*desc), M_OBJCACHE, M_WAITOK | M_ZERO);
230
231 /*
232 * Allocate object cache structure
233 */
234 oc = kmalloc(__offsetof(struct objcache, cache_percpu[ncpus]),
235 M_OBJCACHE,
236 M_WAITOK | M_ZERO | M_CACHEALIGN);
237 oc->ctor = ctor ? ctor : null_ctor;
238 oc->dtor = dtor ? dtor : null_dtor;
239 oc->privdata = privdata;
240 oc->alloc = alloc;
241 oc->free = free;
242 oc->allocator_args = allocator_args;
243
244 /*
245 * Link objcache and its descriptor.
246 */
247 oc->desc = desc;
248 desc->objcache = oc;
249 strlcpy(desc->name, name, sizeof(desc->name));
250
251 /*
252 * Initialize depot list(s).
253 */
254 depot = &oc->depot[0];
255
256 spin_init(&depot->spin, "objcachedepot");
257 SLIST_INIT(&depot->fullmagazines);
258 SLIST_INIT(&depot->emptymagazines);
259
260 /*
261 * Figure out the nominal number of free objects to cache and
262 * the magazine capacity. By default we want to cache up to
263 * half the cluster_limit. If there is no cluster_limit then
264 * we want to cache up to 128 objects.
265 */
266 if (nom_cache == 0)
267 nom_cache = cluster_limit / 2;
268 if (cluster_limit && nom_cache > cluster_limit)
269 nom_cache = cluster_limit;
270 if (nom_cache == 0)
271 nom_cache = INITIAL_MAG_CAPACITY * 2;
272
273 /*
274 * Magazine capacity for 2 active magazines per cpu plus 2
275 * magazines in the depot.
276 */
277 mag_capacity = mag_capacity_align(nom_cache / (ncpus + 1) / 2 + 1);
278 if (mag_capacity > magazine_capmax)
279 mag_capacity = magazine_capmax;
280 else if (mag_capacity < magazine_capmin)
281 mag_capacity = magazine_capmin;
282 depot->magcapacity = mag_capacity;
283
284 /*
285 * The cluster_limit must be sufficient to have two magazines per
286 * cpu plus at least two magazines in the depot. However, because
287 * partial magazines can stay on the cpus what we really need here
288 * is to specify the number of extra magazines we allocate for the
289 * depot.
290 *
291 * Use ~1B objects to mean 'unlimited'. A negative unallocated
292 * object count is possible due to dynamic adjustments so we can't
293 * use a negative number to mean 'unlimited'. We need some overflow
294 * capacity too due to the preallocated mags.
295 */
296 if (cluster_limit == 0) {
297 depot->unallocated_objects = OBJCACHE_UNLIMITED;
298 } else {
299 depot->unallocated_objects = ncpus * mag_capacity * 2 +
300 cluster_limit;
301 }
302
303 /* Save # of total objects. */
304 desc->total_objects = depot->unallocated_objects;
305
306 /*
307 * This is a dynamic adjustment aid initialized to the callers
308 * expectations of the current limit.
309 */
310 depot->cluster_limit = cluster_limit;
311
312 /*
313 * Initialize per-cpu caches
314 */
315 for (cpuid = 0; cpuid < ncpus; cpuid++) {
316 struct percpu_objcache *cache_percpu = &oc->cache_percpu[cpuid];
317
318 cache_percpu->loaded_magazine = mag_alloc(mag_capacity);
319 cache_percpu->previous_magazine = mag_alloc(mag_capacity);
320 }
321
322 /*
323 * Compute how many empty magazines to place in the depot. This
324 * determines the retained cache size and is based on nom_cache.
325 *
326 * The actual cache size is larger because there are two magazines
327 * for each cpu as well but those can be in any fill state so we
328 * just can't count them.
329 *
330 * There is a minimum of two magazines in the depot.
331 */
332 nmagdepot = nom_cache / mag_capacity + 1;
333 if (nmagdepot < 2)
334 nmagdepot = 2;
335
336 /*
337 * Put empty magazines in depot
338 */
339 for (i = 0; i < nmagdepot; i++) {
340 struct magazine *mag = mag_alloc(mag_capacity);
341 SLIST_INSERT_HEAD(&depot->emptymagazines, mag, nextmagazine);
342 }
343
344 spin_lock(&objcachelist_spin);
345 LIST_INSERT_HEAD(&allobjcaches, desc, next);
346 spin_unlock(&objcachelist_spin);
347
348 return (oc);
349 }
350
351 /*
352 * Adjust the cluster limit. This is allowed to cause unallocated_objects
353 * to go negative. Note that due to the magazine hysteresis there is a
354 * limit to how much of the objcache can be reclaimed using this API to
355 * reduce its size.
356 */
357 void
objcache_set_cluster_limit(struct objcache * oc,int cluster_limit)358 objcache_set_cluster_limit(struct objcache *oc, int cluster_limit)
359 {
360 struct magazinedepot *depot;
361
362 depot = &oc->depot[myclusterid];
363 if (depot->cluster_limit != cluster_limit) {
364 int delta;
365
366 spin_lock(&depot->spin);
367 delta = cluster_limit - depot->cluster_limit;
368 depot->unallocated_objects += delta;
369 depot->cluster_limit = cluster_limit;
370 spin_unlock(&depot->spin);
371 wakeup(depot);
372
373 oc->desc->total_objects += delta;
374 }
375 }
376
377 struct objcache *
objcache_create_simple(malloc_type_t mtype,size_t objsize)378 objcache_create_simple(malloc_type_t mtype, size_t objsize)
379 {
380 struct objcache_malloc_args *margs;
381 struct objcache *oc;
382
383 margs = kmalloc(sizeof(*margs), M_OBJCACHE, M_WAITOK|M_ZERO);
384 margs->objsize = objsize;
385 margs->mtype = mtype;
386 oc = objcache_create(mtype->ks_shortdesc, 0, 0,
387 NULL, NULL, NULL,
388 objcache_malloc_alloc, objcache_malloc_free,
389 margs);
390 return (oc);
391 }
392
393 struct objcache *
objcache_create_mbacked(malloc_type_t mtype,size_t objsize,int cluster_limit,int nom_cache,objcache_ctor_fn * ctor,objcache_dtor_fn * dtor,void * privdata)394 objcache_create_mbacked(malloc_type_t mtype, size_t objsize,
395 int cluster_limit, int nom_cache,
396 objcache_ctor_fn *ctor, objcache_dtor_fn *dtor,
397 void *privdata)
398 {
399 struct objcache_malloc_args *margs;
400 struct objcache *oc;
401
402 margs = kmalloc(sizeof(*margs), M_OBJCACHE, M_WAITOK|M_ZERO);
403 margs->objsize = objsize;
404 margs->mtype = mtype;
405 oc = objcache_create(mtype->ks_shortdesc,
406 cluster_limit, nom_cache,
407 ctor, dtor, privdata,
408 objcache_malloc_alloc, objcache_malloc_free,
409 margs);
410 return(oc);
411 }
412
413
414 #define MAGAZINE_EMPTY(mag) (mag->rounds == 0)
415 #define MAGAZINE_NOTEMPTY(mag) (mag->rounds != 0)
416 #define MAGAZINE_FULL(mag) (mag->rounds == mag->capacity)
417
418 #define swap(x, y) ({ struct magazine *t = x; x = y; y = t; })
419
420 /*
421 * Get an object from the object cache.
422 *
423 * WARNING! ocflags are only used when we have to go to the underlying
424 * allocator, so we cannot depend on flags such as M_ZERO.
425 */
426 void *
objcache_get(struct objcache * oc,int ocflags)427 objcache_get(struct objcache *oc, int ocflags)
428 {
429 struct percpu_objcache *cpucache = &oc->cache_percpu[mycpuid];
430 struct magazine *loadedmag;
431 struct magazine *emptymag;
432 void *obj;
433 struct magazinedepot *depot;
434
435 KKASSERT((ocflags & M_ZERO) == 0);
436 crit_enter();
437 ++cpucache->gets_cumulative;
438
439 retry:
440 /*
441 * Loaded magazine has an object. This is the hot path.
442 * It is lock-free and uses a critical section to block
443 * out interrupt handlers on the same processor.
444 */
445 loadedmag = cpucache->loaded_magazine;
446 if (MAGAZINE_NOTEMPTY(loadedmag)) {
447 obj = loadedmag->objects[--loadedmag->rounds];
448 crit_exit();
449 return (obj);
450 }
451
452 /* Previous magazine has an object. */
453 if (MAGAZINE_NOTEMPTY(cpucache->previous_magazine)) {
454 swap(cpucache->loaded_magazine, cpucache->previous_magazine);
455 loadedmag = cpucache->loaded_magazine;
456 obj = loadedmag->objects[--loadedmag->rounds];
457 crit_exit();
458 return (obj);
459 }
460
461 /*
462 * Both magazines empty. Get a full magazine from the depot and
463 * move one of the empty ones to the depot.
464 *
465 * Obtain the depot spinlock.
466 *
467 * NOTE: Beyond this point, M_* flags are handled via oc->alloc()
468 */
469 depot = &oc->depot[myclusterid];
470 spin_lock(&depot->spin);
471
472 /*
473 * Recheck the cpucache after obtaining the depot spinlock. This
474 * shouldn't be necessary now but don't take any chances.
475 */
476 if (MAGAZINE_NOTEMPTY(cpucache->loaded_magazine) ||
477 MAGAZINE_NOTEMPTY(cpucache->previous_magazine)
478 ) {
479 spin_unlock(&depot->spin);
480 goto retry;
481 }
482
483 /* Check if depot has a full magazine. */
484 if (!SLIST_EMPTY(&depot->fullmagazines)) {
485 emptymag = cpucache->previous_magazine;
486 cpucache->previous_magazine = cpucache->loaded_magazine;
487 cpucache->loaded_magazine = SLIST_FIRST(&depot->fullmagazines);
488 SLIST_REMOVE_HEAD(&depot->fullmagazines, nextmagazine);
489
490 /*
491 * Return emptymag to the depot.
492 */
493 KKASSERT(MAGAZINE_EMPTY(emptymag));
494 SLIST_INSERT_HEAD(&depot->emptymagazines,
495 emptymag, nextmagazine);
496 spin_unlock(&depot->spin);
497 goto retry;
498 }
499
500 /*
501 * The depot does not have any non-empty magazines. If we have
502 * not hit our object limit we can allocate a new object using
503 * the back-end allocator.
504 *
505 * NOTE: unallocated_objects can wind up being negative due to
506 * objcache_set_cluster_limit() calls.
507 */
508 if (__predict_true(depot->unallocated_objects > 0)) {
509 --depot->unallocated_objects;
510 spin_unlock(&depot->spin);
511 ++cpucache->allocs_cumulative;
512 crit_exit();
513
514 obj = oc->alloc(oc->allocator_args, ocflags);
515 if (obj) {
516 if (oc->ctor(obj, oc->privdata, ocflags))
517 return (obj);
518 oc->free(obj, oc->allocator_args);
519 obj = NULL;
520 }
521 if (obj == NULL) {
522 spin_lock(&depot->spin);
523 ++depot->unallocated_objects;
524 spin_unlock(&depot->spin);
525 if (depot->waiting)
526 wakeup(depot);
527
528 crit_enter();
529 /*
530 * makes debugging easier when gets_cumulative does
531 * not include gets_null.
532 */
533 ++cpucache->gets_null;
534 --cpucache->gets_cumulative;
535 crit_exit();
536 }
537 return(obj);
538 }
539 if (__predict_false(cpucache->gets_exhausted++ == 0)) {
540 kprintf("Warning: objcache(%s) exhausted on cpu%d!\n",
541 oc->desc->name, mycpuid);
542 }
543
544 /*
545 * Otherwise block if allowed to.
546 */
547 if ((ocflags & (M_WAITOK|M_NULLOK)) == M_WAITOK) {
548 ++cpucache->waiting;
549 ++depot->waiting;
550 ssleep(depot, &depot->spin, 0, "objcache_get", 0);
551 --cpucache->waiting;
552 --depot->waiting;
553 spin_unlock(&depot->spin);
554 goto retry;
555 }
556
557 /*
558 * Otherwise fail
559 */
560 ++cpucache->gets_null;
561 --cpucache->gets_cumulative;
562 crit_exit();
563 spin_unlock(&depot->spin);
564 return (NULL);
565 }
566
567 /*
568 * Wrapper for malloc allocation routines.
569 */
570 void *
objcache_malloc_alloc(void * allocator_args,int ocflags)571 objcache_malloc_alloc(void *allocator_args, int ocflags)
572 {
573 struct objcache_malloc_args *alloc_args = allocator_args;
574
575 return (kmalloc(alloc_args->objsize, alloc_args->mtype,
576 ocflags & OC_MFLAGS));
577 }
578
579 /*
580 * Wrapper for malloc allocation routines, with initial zeroing
581 * (but objects are not zerod on reuse from cache).
582 */
583 void *
objcache_malloc_alloc_zero(void * allocator_args,int ocflags)584 objcache_malloc_alloc_zero(void *allocator_args, int ocflags)
585 {
586 struct objcache_malloc_args *alloc_args = allocator_args;
587
588 return (kmalloc(alloc_args->objsize, alloc_args->mtype,
589 (ocflags & OC_MFLAGS) | M_ZERO));
590 }
591
592
593 void
objcache_malloc_free(void * obj,void * allocator_args)594 objcache_malloc_free(void *obj, void *allocator_args)
595 {
596 struct objcache_malloc_args *alloc_args = allocator_args;
597
598 kfree(obj, alloc_args->mtype);
599 }
600
601 /*
602 * Wrapper for allocation policies that pre-allocate at initialization time
603 * and don't do run-time allocation.
604 */
605 void *
objcache_nop_alloc(void * allocator_args,int ocflags)606 objcache_nop_alloc(void *allocator_args, int ocflags)
607 {
608 return (NULL);
609 }
610
611 void
objcache_nop_free(void * obj,void * allocator_args)612 objcache_nop_free(void *obj, void *allocator_args)
613 {
614 }
615
616 /*
617 * Return an object to the object cache.
618 */
619 void
objcache_put(struct objcache * oc,void * obj)620 objcache_put(struct objcache *oc, void *obj)
621 {
622 struct percpu_objcache *cpucache = &oc->cache_percpu[mycpuid];
623 struct magazine *loadedmag;
624 struct magazinedepot *depot;
625
626 crit_enter();
627 ++cpucache->puts_cumulative;
628
629 if (CLUSTER_OF(obj) != myclusterid) {
630 #ifdef notyet
631 /* use lazy IPI to send object to owning cluster XXX todo */
632 ++cpucache->puts_othercluster;
633 crit_exit();
634 return;
635 #endif
636 }
637
638 retry:
639 /*
640 * Free slot available in loaded magazine. This is the hot path.
641 * It is lock-free and uses a critical section to block out interrupt
642 * handlers on the same processor.
643 */
644 loadedmag = cpucache->loaded_magazine;
645 if (!MAGAZINE_FULL(loadedmag)) {
646 loadedmag->objects[loadedmag->rounds++] = obj;
647 if (cpucache->waiting)
648 wakeup_mycpu(&oc->depot[myclusterid]);
649 crit_exit();
650 return;
651 }
652
653 /*
654 * Current magazine full, but previous magazine has room. XXX
655 */
656 if (!MAGAZINE_FULL(cpucache->previous_magazine)) {
657 swap(cpucache->loaded_magazine, cpucache->previous_magazine);
658 loadedmag = cpucache->loaded_magazine;
659 loadedmag->objects[loadedmag->rounds++] = obj;
660 if (cpucache->waiting)
661 wakeup_mycpu(&oc->depot[myclusterid]);
662 crit_exit();
663 return;
664 }
665
666 /*
667 * Both magazines full. Get an empty magazine from the depot and
668 * move a full loaded magazine to the depot. Even though the
669 * magazine may wind up with space available after we block on
670 * the spinlock, we still cycle it through to avoid the non-optimal
671 * corner-case.
672 *
673 * Obtain the depot spinlock.
674 */
675 depot = &oc->depot[myclusterid];
676 spin_lock(&depot->spin);
677
678 /*
679 * If an empty magazine is available in the depot, cycle it
680 * through and retry.
681 */
682 if (!SLIST_EMPTY(&depot->emptymagazines)) {
683 loadedmag = cpucache->previous_magazine;
684 cpucache->previous_magazine = cpucache->loaded_magazine;
685 cpucache->loaded_magazine = SLIST_FIRST(&depot->emptymagazines);
686 SLIST_REMOVE_HEAD(&depot->emptymagazines, nextmagazine);
687
688 /*
689 * Return loadedmag to the depot. Due to blocking it may
690 * not be entirely full and could even be empty.
691 */
692 if (MAGAZINE_EMPTY(loadedmag)) {
693 SLIST_INSERT_HEAD(&depot->emptymagazines,
694 loadedmag, nextmagazine);
695 spin_unlock(&depot->spin);
696 } else {
697 SLIST_INSERT_HEAD(&depot->fullmagazines,
698 loadedmag, nextmagazine);
699 spin_unlock(&depot->spin);
700 if (depot->waiting)
701 wakeup(depot);
702 }
703 goto retry;
704 }
705
706 /*
707 * An empty mag is not available. This is a corner case which can
708 * occur due to cpus holding partially full magazines. Do not try
709 * to allocate a mag, just free the object.
710 */
711 ++depot->unallocated_objects;
712 spin_unlock(&depot->spin);
713 if (depot->waiting)
714 wakeup(depot);
715 crit_exit();
716 oc->dtor(obj, oc->privdata);
717 oc->free(obj, oc->allocator_args);
718 }
719
720 /*
721 * The object is being put back into the cache, but the caller has
722 * indicated that the object is not in any shape to be reused and should
723 * be dtor'd immediately.
724 */
725 void
objcache_dtor(struct objcache * oc,void * obj)726 objcache_dtor(struct objcache *oc, void *obj)
727 {
728 struct magazinedepot *depot;
729
730 depot = &oc->depot[myclusterid];
731 spin_lock(&depot->spin);
732 ++depot->unallocated_objects;
733 spin_unlock(&depot->spin);
734 if (depot->waiting)
735 wakeup(depot);
736 oc->dtor(obj, oc->privdata);
737 oc->free(obj, oc->allocator_args);
738 }
739
740 /*
741 * Deallocate all objects in a magazine and free the magazine if requested.
742 * When freeit is TRUE the magazine must already be disassociated from the
743 * depot.
744 *
745 * Must be called with a critical section held when called with a per-cpu
746 * magazine. The magazine may be indirectly modified during the loop.
747 *
748 * If the magazine moves during a dtor the operation is aborted. This is
749 * only allowed when freeit is FALSE.
750 *
751 * The number of objects freed is returned.
752 */
753 static int
mag_purge(struct objcache * oc,struct magazine ** magp,int freeit)754 mag_purge(struct objcache *oc, struct magazine **magp, int freeit)
755 {
756 struct magazine *mag = *magp;
757 int count;
758 void *obj;
759
760 count = 0;
761 while (mag->rounds) {
762 obj = mag->objects[--mag->rounds];
763 oc->dtor(obj, oc->privdata); /* MAY BLOCK */
764 oc->free(obj, oc->allocator_args); /* MAY BLOCK */
765 ++count;
766
767 /*
768 * Cycle for interrupts.
769 */
770 if ((count & 15) == 0) {
771 crit_exit();
772 crit_enter();
773 }
774
775 /*
776 * mag may have become invalid either due to dtor/free
777 * blocking or interrupt cycling, do not derefernce it
778 * until we check.
779 */
780 if (*magp != mag) {
781 kprintf("mag_purge: mag ripped out\n");
782 break;
783 }
784 }
785 if (freeit) {
786 KKASSERT(*magp == mag);
787 *magp = NULL;
788 kfree(mag, M_OBJMAG);
789 }
790 return(count);
791 }
792
793 /*
794 * Disassociate zero or more magazines from a magazine list associated with
795 * the depot, update the depot, and move the magazines to a temporary
796 * list.
797 *
798 * The caller must check the depot for waiters and wake it up, typically
799 * after disposing of the magazines this function loads onto the temporary
800 * list.
801 */
802 static void
maglist_disassociate(struct magazinedepot * depot,struct magazinelist * maglist,struct magazinelist * tmplist,boolean_t purgeall)803 maglist_disassociate(struct magazinedepot *depot, struct magazinelist *maglist,
804 struct magazinelist *tmplist, boolean_t purgeall)
805 {
806 struct magazine *mag;
807
808 while ((mag = SLIST_FIRST(maglist)) != NULL) {
809 SLIST_REMOVE_HEAD(maglist, nextmagazine);
810 SLIST_INSERT_HEAD(tmplist, mag, nextmagazine);
811 depot->unallocated_objects += mag->rounds;
812 }
813 }
814
815 /*
816 * Deallocate all magazines and their contents from the passed temporary
817 * list. The magazines have already been accounted for by their depots.
818 *
819 * The total number of rounds freed is returned. This number is typically
820 * only used to determine whether a wakeup on the depot is needed or not.
821 */
822 static int
maglist_purge(struct objcache * oc,struct magazinelist * maglist)823 maglist_purge(struct objcache *oc, struct magazinelist *maglist)
824 {
825 struct magazine *mag;
826 int count = 0;
827
828 /*
829 * can't use SLIST_FOREACH because blocking releases the depot
830 * spinlock
831 */
832 crit_enter();
833 while ((mag = SLIST_FIRST(maglist)) != NULL) {
834 SLIST_REMOVE_HEAD(maglist, nextmagazine);
835 count += mag_purge(oc, &mag, TRUE);
836 }
837 crit_exit();
838 return(count);
839 }
840
841 /*
842 * De-allocates all magazines on the full and empty magazine lists.
843 *
844 * Because this routine is called with a spinlock held, the magazines
845 * can only be disassociated and moved to a temporary list, not freed.
846 *
847 * The caller is responsible for freeing the magazines.
848 */
849 static void
depot_disassociate(struct magazinedepot * depot,struct magazinelist * tmplist)850 depot_disassociate(struct magazinedepot *depot, struct magazinelist *tmplist)
851 {
852 maglist_disassociate(depot, &depot->fullmagazines, tmplist, TRUE);
853 maglist_disassociate(depot, &depot->emptymagazines, tmplist, TRUE);
854 }
855
856 /*
857 * Try to free up some memory. Return as soon as some free memory is found.
858 * For each object cache on the reclaim list, first try the current per-cpu
859 * cache, then the full magazine depot.
860 */
861 boolean_t
objcache_reclaimlist(struct objcache * oclist[],int nlist)862 objcache_reclaimlist(struct objcache *oclist[], int nlist)
863 {
864 struct objcache *oc;
865 struct percpu_objcache *cpucache;
866 struct magazinedepot *depot;
867 struct magazinelist tmplist;
868 int i, count;
869
870 SLIST_INIT(&tmplist);
871
872 for (i = 0; i < nlist; i++) {
873 oc = oclist[i];
874 cpucache = &oc->cache_percpu[mycpuid];
875 depot = &oc->depot[myclusterid];
876
877 crit_enter();
878 count = mag_purge(oc, &cpucache->loaded_magazine, FALSE);
879 if (count == 0)
880 count += mag_purge(oc, &cpucache->previous_magazine, FALSE);
881 crit_exit();
882 if (count > 0) {
883 spin_lock(&depot->spin);
884 depot->unallocated_objects += count;
885 spin_unlock(&depot->spin);
886 if (depot->waiting)
887 wakeup(depot);
888 return (TRUE);
889 }
890 spin_lock(&depot->spin);
891 maglist_disassociate(depot, &depot->fullmagazines,
892 &tmplist, FALSE);
893 spin_unlock(&depot->spin);
894 count = maglist_purge(oc, &tmplist);
895 if (count > 0) {
896 if (depot->waiting)
897 wakeup(depot);
898 return (TRUE);
899 }
900 }
901 return (FALSE);
902 }
903
904 /*
905 * Destroy an object cache. Must have no existing references.
906 */
907 void
objcache_destroy(struct objcache * oc)908 objcache_destroy(struct objcache *oc)
909 {
910 struct objcache_desc *desc = oc->desc;
911 struct percpu_objcache *cache_percpu;
912 struct magazinedepot *depot;
913 int clusterid, cpuid;
914 struct magazinelist tmplist;
915
916 spin_lock(&objcachelist_spin);
917 LIST_REMOVE(desc, next);
918 spin_unlock(&objcachelist_spin);
919
920 SLIST_INIT(&tmplist);
921 for (clusterid = 0; clusterid < MAXCLUSTERS; clusterid++) {
922 depot = &oc->depot[clusterid];
923 spin_lock(&depot->spin);
924 depot_disassociate(depot, &tmplist);
925 spin_unlock(&depot->spin);
926 }
927 maglist_purge(oc, &tmplist);
928
929 for (cpuid = 0; cpuid < ncpus; cpuid++) {
930 cache_percpu = &oc->cache_percpu[cpuid];
931
932 crit_enter();
933 mag_purge(oc, &cache_percpu->loaded_magazine, TRUE);
934 mag_purge(oc, &cache_percpu->previous_magazine, TRUE);
935 crit_exit();
936 cache_percpu->loaded_magazine = NULL;
937 cache_percpu->previous_magazine = NULL;
938 /* don't bother adjusting depot->unallocated_objects */
939 }
940
941 kfree(desc, M_OBJCACHE);
942 kfree(oc, M_OBJCACHE);
943 }
944
945 static int
sysctl_ocstats(SYSCTL_HANDLER_ARGS)946 sysctl_ocstats(SYSCTL_HANDLER_ARGS)
947 {
948 struct objcache_stats stat;
949 struct objcache_desc marker, *desc;
950 int error;
951
952 memset(&marker, 0, sizeof(marker));
953
954 spin_lock(&objcachelist_spin);
955
956 LIST_INSERT_HEAD(&allobjcaches, &marker, next);
957 while ((desc = LIST_NEXT(&marker, next)) != NULL) {
958 u_long puts, unalloc;
959 int cpu;
960
961 LIST_REMOVE(&marker, next);
962 LIST_INSERT_AFTER(desc, &marker, next);
963
964 if (desc->total_objects == 0) {
965 /* Marker inserted by another thread. */
966 continue;
967 }
968
969 memset(&stat, 0, sizeof(stat));
970 strlcpy(stat.oc_name, desc->name, sizeof(stat.oc_name));
971 stat.oc_limit = desc->total_objects;
972 /* XXX domain aware */
973 unalloc = desc->objcache->depot[0].unallocated_objects;
974
975 puts = 0;
976 for (cpu = 0; cpu < ncpus; ++cpu) {
977 const struct percpu_objcache *cache;
978
979 cache = &desc->objcache->cache_percpu[cpu];
980 puts += cache->puts_cumulative;
981
982 stat.oc_requested += cache->gets_cumulative;
983 stat.oc_exhausted += cache->gets_exhausted;
984 stat.oc_failed += cache->gets_null;
985 stat.oc_allocated += cache->allocs_cumulative;
986 }
987 spin_unlock(&objcachelist_spin);
988
989 /*
990 * Apply fixup.
991 */
992 if (stat.oc_requested > puts)
993 stat.oc_used = stat.oc_requested - puts;
994 if (stat.oc_limit > unalloc + stat.oc_used) {
995 stat.oc_cached = stat.oc_limit -
996 (unalloc + stat.oc_used);
997 }
998 stat.oc_requested += stat.oc_failed;
999
1000 /* Send out. */
1001 error = SYSCTL_OUT(req, &stat, sizeof(stat));
1002
1003 /* Hold the lock before we return. */
1004 spin_lock(&objcachelist_spin);
1005
1006 if (error)
1007 break;
1008 }
1009 LIST_REMOVE(&marker, next);
1010
1011 spin_unlock(&objcachelist_spin);
1012
1013 return error;
1014 }
1015 SYSCTL_PROC(_kern_objcache, OID_AUTO, stats, (CTLTYPE_OPAQUE | CTLFLAG_RD),
1016 0, 0, sysctl_ocstats, "S,objcache_stats", "objcache statistics");
1017
1018 static void
objcache_init(void)1019 objcache_init(void)
1020 {
1021 spin_init(&objcachelist_spin, "objcachelist");
1022
1023 magazine_capmin = mag_capacity_align(MAGAZINE_CAPACITY_MIN);
1024 magazine_capmax = mag_capacity_align(MAGAZINE_CAPACITY_MAX);
1025 if (bootverbose) {
1026 kprintf("objcache: magazine cap [%d, %d]\n",
1027 magazine_capmin, magazine_capmax);
1028 }
1029 #if 0
1030 callout_init_mp(&objcache_callout);
1031 objcache_rebalance_period = 60 * hz;
1032 callout_reset(&objcache_callout, objcache_rebalance_period,
1033 objcache_timer, NULL);
1034 #endif
1035 }
1036 SYSINIT(objcache, SI_BOOT2_OBJCACHE, SI_ORDER_FIRST, objcache_init, 0);
1037