xref: /dragonfly/sys/kern/kern_objcache.c (revision 9dbd27e2)
1 /*
2  * Copyright (c) 2005 Jeffrey M. Hsu.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Jeffrey M. Hsu.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. Neither the name of The DragonFly Project nor the names of its
16  *    contributors may be used to endorse or promote products derived
17  *    from this software without specific, prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
22  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
23  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
24  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
25  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
26  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
27  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
29  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  */
32 
33 #include <sys/param.h>
34 #include <sys/kernel.h>
35 #include <sys/systm.h>
36 #include <sys/callout.h>
37 #include <sys/globaldata.h>
38 #include <sys/malloc.h>
39 #include <sys/queue.h>
40 #include <sys/objcache.h>
41 #include <sys/spinlock.h>
42 #include <sys/thread.h>
43 #include <sys/thread2.h>
44 #include <sys/spinlock2.h>
45 
46 static MALLOC_DEFINE(M_OBJCACHE, "objcache", "Object Cache");
47 static MALLOC_DEFINE(M_OBJMAG, "objcache mag", "Object Cache Magazine");
48 
49 #define	INITIAL_MAG_CAPACITY	64
50 
51 struct magazine {
52 	int			 rounds;
53 	int			 capacity;
54 	SLIST_ENTRY(magazine)	 nextmagazine;
55 	void			*objects[];
56 };
57 
58 SLIST_HEAD(magazinelist, magazine);
59 
60 #define MAGAZINE_HDRSIZE	__offsetof(struct magazine, objects[0])
61 #define MAGAZINE_CAPACITY_MAX	4096
62 #define MAGAZINE_CAPACITY_MIN	4
63 
64 /*
65  * per-cluster cache of magazines
66  *
67  * All fields in this structure are protected by the spinlock.
68  */
69 struct magazinedepot {
70 	/*
71 	 * The per-cpu object caches only exchanges completely full or
72 	 * completely empty magazines with the depot layer, so only have
73 	 * to cache these two types of magazines.
74 	 */
75 	struct magazinelist	fullmagazines;
76 	struct magazinelist	emptymagazines;
77 	int			magcapacity;
78 
79 	/* protect this structure */
80 	struct spinlock		spin;
81 
82 	/* magazines not yet allocated towards limit */
83 	int			unallocated_objects;
84 	int			cluster_limit;	/* ref for adjustments */
85 
86 	/* infrequently used fields */
87 	int			waiting;	/* waiting for another cpu to
88 						 * return a full magazine to
89 						 * the depot */
90 	int			contested;	/* depot contention count */
91 } __cachealign;
92 
93 /*
94  * per-cpu object cache
95  * All fields in this structure are protected by crit_enter().
96  */
97 struct percpu_objcache {
98 	struct magazine	*loaded_magazine;	/* active magazine */
99 	struct magazine	*previous_magazine;	/* backup magazine */
100 
101 	/* statistics */
102 	int		gets_cumulative;	/* total calls to get */
103 	int		gets_null;		/* objcache_get returned NULL */
104 	int		puts_cumulative;	/* total calls to put */
105 	int		puts_othercluster;	/* returned to other cluster */
106 
107 	/* infrequently used fields */
108 	int		waiting;		/* waiting for a thread on this
109 						 * cpu to return an obj to the
110 						 * per-cpu cache */
111 } __cachealign;
112 
113 /* only until we have NUMA cluster topology information XXX */
114 #define MAXCLUSTERS 1
115 #define myclusterid 0
116 #define CLUSTER_OF(obj) 0
117 
118 /*
119  * Two-level object cache consisting of NUMA cluster-level depots of
120  * fully loaded or completely empty magazines and cpu-level caches of
121  * individual objects.
122  */
123 struct objcache {
124 	const char		*name;
125 
126 	/* object constructor and destructor from blank storage */
127 	objcache_ctor_fn	*ctor;
128 	objcache_dtor_fn	*dtor;
129 	void			*privdata;
130 
131 	/* interface to underlying allocator */
132 	objcache_alloc_fn	*alloc;
133 	objcache_free_fn	*free;
134 	void			*allocator_args;
135 
136 	LIST_ENTRY(objcache)	oc_next;
137 	int			exhausted;	/* oops */
138 
139 	/* NUMA-cluster level caches */
140 	struct magazinedepot	depot[MAXCLUSTERS];
141 
142 	struct percpu_objcache	cache_percpu[];	/* per-cpu caches */
143 };
144 
145 static struct spinlock objcachelist_spin;
146 static LIST_HEAD(objcachelist, objcache) allobjcaches;
147 static int magazine_capmin;
148 static int magazine_capmax;
149 
150 static struct magazine *
151 mag_alloc(int capacity)
152 {
153 	struct magazine *mag;
154 	int size;
155 
156 	size = __offsetof(struct magazine, objects[capacity]);
157 	KASSERT(size > 0 && (size & __VM_CACHELINE_MASK) == 0,
158 	    ("magazine size is not multiple cache line size"));
159 
160 	mag = kmalloc_cachealign(size, M_OBJMAG, M_INTWAIT | M_ZERO);
161 	mag->capacity = capacity;
162 	mag->rounds = 0;
163 	return (mag);
164 }
165 
166 static int
167 mag_capacity_align(int mag_capacity)
168 {
169 	int mag_size;
170 
171 	mag_size = __VM_CACHELINE_ALIGN(
172 	    __offsetof(struct magazine, objects[mag_capacity]));
173 	mag_capacity = (mag_size - MAGAZINE_HDRSIZE) / sizeof(void *);
174 
175 	return mag_capacity;
176 }
177 
178 /*
179  * Utility routine for objects that don't require any de-construction.
180  */
181 
182 static void
183 null_dtor(void *obj, void *privdata)
184 {
185 	/* do nothing */
186 }
187 
188 static boolean_t
189 null_ctor(void *obj, void *privdata, int ocflags)
190 {
191 	return TRUE;
192 }
193 
194 /*
195  * Create an object cache.
196  */
197 struct objcache *
198 objcache_create(const char *name, int cluster_limit, int nom_cache,
199 		objcache_ctor_fn *ctor, objcache_dtor_fn *dtor, void *privdata,
200 		objcache_alloc_fn *alloc, objcache_free_fn *free,
201 		void *allocator_args)
202 {
203 	struct objcache *oc;
204 	struct magazinedepot *depot;
205 	int cpuid;
206 	int nmagdepot;
207 	int mag_capacity;
208 	int i;
209 
210 	/*
211 	 * Allocate object cache structure
212 	 */
213 	oc = kmalloc_cachealign(
214 	    __offsetof(struct objcache, cache_percpu[ncpus]),
215 	    M_OBJCACHE, M_WAITOK | M_ZERO);
216 	oc->name = kstrdup(name, M_TEMP);
217 	oc->ctor = ctor ? ctor : null_ctor;
218 	oc->dtor = dtor ? dtor : null_dtor;
219 	oc->privdata = privdata;
220 	oc->alloc = alloc;
221 	oc->free = free;
222 	oc->allocator_args = allocator_args;
223 
224 	/*
225 	 * Initialize depot list(s).
226 	 */
227 	depot = &oc->depot[0];
228 
229 	spin_init(&depot->spin, "objcachedepot");
230 	SLIST_INIT(&depot->fullmagazines);
231 	SLIST_INIT(&depot->emptymagazines);
232 
233 	/*
234 	 * Figure out the nominal number of free objects to cache and
235 	 * the magazine capacity.  By default we want to cache up to
236 	 * half the cluster_limit.  If there is no cluster_limit then
237 	 * we want to cache up to 128 objects.
238 	 */
239 	if (nom_cache == 0)
240 		nom_cache = cluster_limit / 2;
241 	if (cluster_limit && nom_cache > cluster_limit)
242 		nom_cache = cluster_limit;
243 	if (nom_cache == 0)
244 		nom_cache = INITIAL_MAG_CAPACITY * 2;
245 
246 	/*
247 	 * Magazine capacity for 2 active magazines per cpu plus 2
248 	 * magazines in the depot.
249 	 */
250 	mag_capacity = mag_capacity_align(nom_cache / (ncpus + 1) / 2 + 1);
251 	if (mag_capacity > magazine_capmax)
252 		mag_capacity = magazine_capmax;
253 	else if (mag_capacity < magazine_capmin)
254 		mag_capacity = magazine_capmin;
255 	depot->magcapacity = mag_capacity;
256 
257 	/*
258 	 * The cluster_limit must be sufficient to have two magazines per
259 	 * cpu plus at least two magazines in the depot.  However, because
260 	 * partial magazines can stay on the cpus what we really need here
261 	 * is to specify the number of extra magazines we allocate for the
262 	 * depot.
263 	 *
264 	 * Use ~1B objects to mean 'unlimited'.  A negative unallocated
265 	 * object count is possible due to dynamic adjustments so we can't
266 	 * use a negative number to mean 'unlimited'.  We need some overflow
267 	 * capacity too due to the preallocated mags.
268 	 */
269 	if (cluster_limit == 0) {
270 		depot->unallocated_objects = 0x40000000;
271 	} else {
272 		depot->unallocated_objects = ncpus * mag_capacity * 2 +
273 					     cluster_limit;
274 	}
275 
276 	/*
277 	 * This is a dynamic adjustment aid initialized to the callers
278 	 * expectations of the current limit.
279 	 */
280 	depot->cluster_limit = cluster_limit;
281 
282 	/*
283 	 * Initialize per-cpu caches
284 	 */
285 	for (cpuid = 0; cpuid < ncpus; cpuid++) {
286 		struct percpu_objcache *cache_percpu = &oc->cache_percpu[cpuid];
287 
288 		cache_percpu->loaded_magazine = mag_alloc(mag_capacity);
289 		cache_percpu->previous_magazine = mag_alloc(mag_capacity);
290 	}
291 
292 	/*
293 	 * Compute how many empty magazines to place in the depot.  This
294 	 * determines the retained cache size and is based on nom_cache.
295 	 *
296 	 * The actual cache size is larger because there are two magazines
297 	 * for each cpu as well but those can be in any fill state so we
298 	 * just can't count them.
299 	 *
300 	 * There is a minimum of two magazines in the depot.
301 	 */
302 	nmagdepot = nom_cache / mag_capacity + 1;
303 	if (nmagdepot < 2)
304 		nmagdepot = 2;
305 
306 	/*
307 	 * Put empty magazines in depot
308 	 */
309 	for (i = 0; i < nmagdepot; i++) {
310 		struct magazine *mag = mag_alloc(mag_capacity);
311 		SLIST_INSERT_HEAD(&depot->emptymagazines, mag, nextmagazine);
312 	}
313 
314 	spin_lock(&objcachelist_spin);
315 	LIST_INSERT_HEAD(&allobjcaches, oc, oc_next);
316 	spin_unlock(&objcachelist_spin);
317 
318 	return (oc);
319 }
320 
321 /*
322  * Adjust the cluster limit.  This is allowed to cause unallocated_objects
323  * to go negative.  Note that due to the magazine hysteresis there is a
324  * limit to how much of the objcache can be reclaimed using this API to
325  * reduce its size.
326  */
327 void
328 objcache_set_cluster_limit(struct objcache *oc, int cluster_limit)
329 {
330 	struct magazinedepot *depot;
331 	int delta;
332 
333 	depot = &oc->depot[myclusterid];
334 	if (depot->cluster_limit != cluster_limit) {
335 		spin_lock(&depot->spin);
336 		delta = cluster_limit - depot->cluster_limit;
337 		depot->unallocated_objects += delta;
338 		depot->cluster_limit = cluster_limit;
339 		spin_unlock(&depot->spin);
340 		wakeup(depot);
341 	}
342 }
343 
344 struct objcache *
345 objcache_create_simple(malloc_type_t mtype, size_t objsize)
346 {
347 	struct objcache_malloc_args *margs;
348 	struct objcache *oc;
349 
350 	margs = kmalloc(sizeof(*margs), M_OBJCACHE, M_WAITOK|M_ZERO);
351 	margs->objsize = objsize;
352 	margs->mtype = mtype;
353 	oc = objcache_create(mtype->ks_shortdesc, 0, 0,
354 			     NULL, NULL, NULL,
355 			     objcache_malloc_alloc, objcache_malloc_free,
356 			     margs);
357 	return (oc);
358 }
359 
360 struct objcache *
361 objcache_create_mbacked(malloc_type_t mtype, size_t objsize,
362 			int cluster_limit, int nom_cache,
363 			objcache_ctor_fn *ctor, objcache_dtor_fn *dtor,
364 			void *privdata)
365 {
366 	struct objcache_malloc_args *margs;
367 	struct objcache *oc;
368 
369 	margs = kmalloc(sizeof(*margs), M_OBJCACHE, M_WAITOK|M_ZERO);
370 	margs->objsize = objsize;
371 	margs->mtype = mtype;
372 	oc = objcache_create(mtype->ks_shortdesc,
373 			     cluster_limit, nom_cache,
374 			     ctor, dtor, privdata,
375 			     objcache_malloc_alloc, objcache_malloc_free,
376 			     margs);
377 	return(oc);
378 }
379 
380 
381 #define MAGAZINE_EMPTY(mag)	(mag->rounds == 0)
382 #define MAGAZINE_NOTEMPTY(mag)	(mag->rounds != 0)
383 #define MAGAZINE_FULL(mag)	(mag->rounds == mag->capacity)
384 
385 #define	swap(x, y)	({ struct magazine *t = x; x = y; y = t; })
386 
387 /*
388  * Get an object from the object cache.
389  *
390  * WARNING!  ocflags are only used when we have to go to the underlying
391  * allocator, so we cannot depend on flags such as M_ZERO.
392  */
393 void *
394 objcache_get(struct objcache *oc, int ocflags)
395 {
396 	struct percpu_objcache *cpucache = &oc->cache_percpu[mycpuid];
397 	struct magazine *loadedmag;
398 	struct magazine *emptymag;
399 	void *obj;
400 	struct magazinedepot *depot;
401 
402 	KKASSERT((ocflags & M_ZERO) == 0);
403 	crit_enter();
404 	++cpucache->gets_cumulative;
405 
406 retry:
407 	/*
408 	 * Loaded magazine has an object.  This is the hot path.
409 	 * It is lock-free and uses a critical section to block
410 	 * out interrupt handlers on the same processor.
411 	 */
412 	loadedmag = cpucache->loaded_magazine;
413 	if (MAGAZINE_NOTEMPTY(loadedmag)) {
414 		obj = loadedmag->objects[--loadedmag->rounds];
415 		crit_exit();
416 		return (obj);
417 	}
418 
419 	/* Previous magazine has an object. */
420 	if (MAGAZINE_NOTEMPTY(cpucache->previous_magazine)) {
421 		swap(cpucache->loaded_magazine, cpucache->previous_magazine);
422 		loadedmag = cpucache->loaded_magazine;
423 		obj = loadedmag->objects[--loadedmag->rounds];
424 		crit_exit();
425 		return (obj);
426 	}
427 
428 	/*
429 	 * Both magazines empty.  Get a full magazine from the depot and
430 	 * move one of the empty ones to the depot.
431 	 *
432 	 * Obtain the depot spinlock.
433 	 *
434 	 * NOTE: Beyond this point, M_* flags are handled via oc->alloc()
435 	 */
436 	depot = &oc->depot[myclusterid];
437 	spin_lock(&depot->spin);
438 
439 	/*
440 	 * Recheck the cpucache after obtaining the depot spinlock.  This
441 	 * shouldn't be necessary now but don't take any chances.
442 	 */
443 	if (MAGAZINE_NOTEMPTY(cpucache->loaded_magazine) ||
444 	    MAGAZINE_NOTEMPTY(cpucache->previous_magazine)
445 	) {
446 		spin_unlock(&depot->spin);
447 		goto retry;
448 	}
449 
450 	/* Check if depot has a full magazine. */
451 	if (!SLIST_EMPTY(&depot->fullmagazines)) {
452 		emptymag = cpucache->previous_magazine;
453 		cpucache->previous_magazine = cpucache->loaded_magazine;
454 		cpucache->loaded_magazine = SLIST_FIRST(&depot->fullmagazines);
455 		SLIST_REMOVE_HEAD(&depot->fullmagazines, nextmagazine);
456 
457 		/*
458 		 * Return emptymag to the depot.
459 		 */
460 		KKASSERT(MAGAZINE_EMPTY(emptymag));
461 		SLIST_INSERT_HEAD(&depot->emptymagazines,
462 				  emptymag, nextmagazine);
463 		spin_unlock(&depot->spin);
464 		goto retry;
465 	}
466 
467 	/*
468 	 * The depot does not have any non-empty magazines.  If we have
469 	 * not hit our object limit we can allocate a new object using
470 	 * the back-end allocator.
471 	 *
472 	 * NOTE: unallocated_objects can wind up being negative due to
473 	 *	 objcache_set_cluster_limit() calls.
474 	 */
475 	if (__predict_true(depot->unallocated_objects > 0)) {
476 		--depot->unallocated_objects;
477 		spin_unlock(&depot->spin);
478 		crit_exit();
479 
480 		obj = oc->alloc(oc->allocator_args, ocflags);
481 		if (obj) {
482 			if (oc->ctor(obj, oc->privdata, ocflags))
483 				return (obj);
484 			oc->free(obj, oc->allocator_args);
485 			obj = NULL;
486 		}
487 		if (obj == NULL) {
488 			spin_lock(&depot->spin);
489 			++depot->unallocated_objects;
490 			spin_unlock(&depot->spin);
491 			if (depot->waiting)
492 				wakeup(depot);
493 
494 			crit_enter();
495 			/*
496 			 * makes debugging easier when gets_cumulative does
497 			 * not include gets_null.
498 			 */
499 			++cpucache->gets_null;
500 			--cpucache->gets_cumulative;
501 			crit_exit();
502 		}
503 		return(obj);
504 	}
505 	if (__predict_false(oc->exhausted == 0)) {
506 		kprintf("Warning, objcache(%s): Exhausted!\n", oc->name);
507 		oc->exhausted = 1;
508 	}
509 
510 	/*
511 	 * Otherwise block if allowed to.
512 	 */
513 	if ((ocflags & (M_WAITOK|M_NULLOK)) == M_WAITOK) {
514 		++cpucache->waiting;
515 		++depot->waiting;
516 		ssleep(depot, &depot->spin, 0, "objcache_get", 0);
517 		--cpucache->waiting;
518 		--depot->waiting;
519 		spin_unlock(&depot->spin);
520 		goto retry;
521 	}
522 
523 	/*
524 	 * Otherwise fail
525 	 */
526 	++cpucache->gets_null;
527 	--cpucache->gets_cumulative;
528 	crit_exit();
529 	spin_unlock(&depot->spin);
530 	return (NULL);
531 }
532 
533 /*
534  * Wrapper for malloc allocation routines.
535  */
536 void *
537 objcache_malloc_alloc(void *allocator_args, int ocflags)
538 {
539 	struct objcache_malloc_args *alloc_args = allocator_args;
540 
541 	return (kmalloc(alloc_args->objsize, alloc_args->mtype,
542 		       ocflags & OC_MFLAGS));
543 }
544 
545 /*
546  * Wrapper for malloc allocation routines, with initial zeroing
547  * (but objects are not zerod on reuse from cache).
548  */
549 void *
550 objcache_malloc_alloc_zero(void *allocator_args, int ocflags)
551 {
552 	struct objcache_malloc_args *alloc_args = allocator_args;
553 
554 	return (kmalloc(alloc_args->objsize, alloc_args->mtype,
555 		       (ocflags & OC_MFLAGS) | M_ZERO));
556 }
557 
558 
559 void
560 objcache_malloc_free(void *obj, void *allocator_args)
561 {
562 	struct objcache_malloc_args *alloc_args = allocator_args;
563 
564 	kfree(obj, alloc_args->mtype);
565 }
566 
567 /*
568  * Wrapper for allocation policies that pre-allocate at initialization time
569  * and don't do run-time allocation.
570  */
571 void *
572 objcache_nop_alloc(void *allocator_args, int ocflags)
573 {
574 	return (NULL);
575 }
576 
577 void
578 objcache_nop_free(void *obj, void *allocator_args)
579 {
580 }
581 
582 /*
583  * Return an object to the object cache.
584  */
585 void
586 objcache_put(struct objcache *oc, void *obj)
587 {
588 	struct percpu_objcache *cpucache = &oc->cache_percpu[mycpuid];
589 	struct magazine *loadedmag;
590 	struct magazinedepot *depot;
591 
592 	crit_enter();
593 	++cpucache->puts_cumulative;
594 
595 	if (CLUSTER_OF(obj) != myclusterid) {
596 #ifdef notyet
597 		/* use lazy IPI to send object to owning cluster XXX todo */
598 		++cpucache->puts_othercluster;
599 		crit_exit();
600 		return;
601 #endif
602 	}
603 
604 retry:
605 	/*
606 	 * Free slot available in loaded magazine.  This is the hot path.
607 	 * It is lock-free and uses a critical section to block out interrupt
608 	 * handlers on the same processor.
609 	 */
610 	loadedmag = cpucache->loaded_magazine;
611 	if (!MAGAZINE_FULL(loadedmag)) {
612 		loadedmag->objects[loadedmag->rounds++] = obj;
613 		if (cpucache->waiting)
614 			wakeup_mycpu(&oc->depot[myclusterid]);
615 		crit_exit();
616 		return;
617 	}
618 
619 	/*
620 	 * Current magazine full, but previous magazine has room.  XXX
621 	 */
622 	if (!MAGAZINE_FULL(cpucache->previous_magazine)) {
623 		swap(cpucache->loaded_magazine, cpucache->previous_magazine);
624 		loadedmag = cpucache->loaded_magazine;
625 		loadedmag->objects[loadedmag->rounds++] = obj;
626 		if (cpucache->waiting)
627 			wakeup_mycpu(&oc->depot[myclusterid]);
628 		crit_exit();
629 		return;
630 	}
631 
632 	/*
633 	 * Both magazines full.  Get an empty magazine from the depot and
634 	 * move a full loaded magazine to the depot.  Even though the
635 	 * magazine may wind up with space available after we block on
636 	 * the spinlock, we still cycle it through to avoid the non-optimal
637 	 * corner-case.
638 	 *
639 	 * Obtain the depot spinlock.
640 	 */
641 	depot = &oc->depot[myclusterid];
642 	spin_lock(&depot->spin);
643 
644 	/*
645 	 * If an empty magazine is available in the depot, cycle it
646 	 * through and retry.
647 	 */
648 	if (!SLIST_EMPTY(&depot->emptymagazines)) {
649 		loadedmag = cpucache->previous_magazine;
650 		cpucache->previous_magazine = cpucache->loaded_magazine;
651 		cpucache->loaded_magazine = SLIST_FIRST(&depot->emptymagazines);
652 		SLIST_REMOVE_HEAD(&depot->emptymagazines, nextmagazine);
653 
654 		/*
655 		 * Return loadedmag to the depot.  Due to blocking it may
656 		 * not be entirely full and could even be empty.
657 		 */
658 		if (MAGAZINE_EMPTY(loadedmag)) {
659 			SLIST_INSERT_HEAD(&depot->emptymagazines,
660 					  loadedmag, nextmagazine);
661 			spin_unlock(&depot->spin);
662 		} else {
663 			SLIST_INSERT_HEAD(&depot->fullmagazines,
664 					  loadedmag, nextmagazine);
665 			spin_unlock(&depot->spin);
666 			if (depot->waiting)
667 				wakeup(depot);
668 		}
669 		goto retry;
670 	}
671 
672 	/*
673 	 * An empty mag is not available.  This is a corner case which can
674 	 * occur due to cpus holding partially full magazines.  Do not try
675 	 * to allocate a mag, just free the object.
676 	 */
677 	++depot->unallocated_objects;
678 	spin_unlock(&depot->spin);
679 	if (depot->waiting)
680 		wakeup(depot);
681 	crit_exit();
682 	oc->dtor(obj, oc->privdata);
683 	oc->free(obj, oc->allocator_args);
684 }
685 
686 /*
687  * The object is being put back into the cache, but the caller has
688  * indicated that the object is not in any shape to be reused and should
689  * be dtor'd immediately.
690  */
691 void
692 objcache_dtor(struct objcache *oc, void *obj)
693 {
694 	struct magazinedepot *depot;
695 
696 	depot = &oc->depot[myclusterid];
697 	spin_lock(&depot->spin);
698 	++depot->unallocated_objects;
699 	spin_unlock(&depot->spin);
700 	if (depot->waiting)
701 		wakeup(depot);
702 	oc->dtor(obj, oc->privdata);
703 	oc->free(obj, oc->allocator_args);
704 }
705 
706 /*
707  * Deallocate all objects in a magazine and free the magazine if requested.
708  * When freeit is TRUE the magazine must already be disassociated from the
709  * depot.
710  *
711  * Must be called with a critical section held when called with a per-cpu
712  * magazine.  The magazine may be indirectly modified during the loop.
713  *
714  * If the magazine moves during a dtor the operation is aborted.  This is
715  * only allowed when freeit is FALSE.
716  *
717  * The number of objects freed is returned.
718  */
719 static int
720 mag_purge(struct objcache *oc, struct magazine **magp, int freeit)
721 {
722 	struct magazine *mag = *magp;
723 	int count;
724 	void *obj;
725 
726 	count = 0;
727 	while (mag->rounds) {
728 		obj = mag->objects[--mag->rounds];
729 		oc->dtor(obj, oc->privdata);		/* MAY BLOCK */
730 		oc->free(obj, oc->allocator_args);	/* MAY BLOCK */
731 		++count;
732 
733 		/*
734 		 * Cycle for interrupts.
735 		 */
736 		if ((count & 15) == 0) {
737 			crit_exit();
738 			crit_enter();
739 		}
740 
741 		/*
742 		 * mag may have become invalid either due to dtor/free
743 		 * blocking or interrupt cycling, do not derefernce it
744 		 * until we check.
745 		 */
746 		if (*magp != mag) {
747 			kprintf("mag_purge: mag ripped out\n");
748 			break;
749 		}
750 	}
751 	if (freeit) {
752 		KKASSERT(*magp == mag);
753 		*magp = NULL;
754 		kfree(mag, M_OBJMAG);
755 	}
756 	return(count);
757 }
758 
759 /*
760  * Disassociate zero or more magazines from a magazine list associated with
761  * the depot, update the depot, and move the magazines to a temporary
762  * list.
763  *
764  * The caller must check the depot for waiters and wake it up, typically
765  * after disposing of the magazines this function loads onto the temporary
766  * list.
767  */
768 static void
769 maglist_disassociate(struct magazinedepot *depot, struct magazinelist *maglist,
770 		     struct magazinelist *tmplist, boolean_t purgeall)
771 {
772 	struct magazine *mag;
773 
774 	while ((mag = SLIST_FIRST(maglist)) != NULL) {
775 		SLIST_REMOVE_HEAD(maglist, nextmagazine);
776 		SLIST_INSERT_HEAD(tmplist, mag, nextmagazine);
777 		depot->unallocated_objects += mag->rounds;
778 	}
779 }
780 
781 /*
782  * Deallocate all magazines and their contents from the passed temporary
783  * list.  The magazines have already been accounted for by their depots.
784  *
785  * The total number of rounds freed is returned.  This number is typically
786  * only used to determine whether a wakeup on the depot is needed or not.
787  */
788 static int
789 maglist_purge(struct objcache *oc, struct magazinelist *maglist)
790 {
791 	struct magazine *mag;
792 	int count = 0;
793 
794 	/*
795 	 * can't use SLIST_FOREACH because blocking releases the depot
796 	 * spinlock
797 	 */
798 	crit_enter();
799 	while ((mag = SLIST_FIRST(maglist)) != NULL) {
800 		SLIST_REMOVE_HEAD(maglist, nextmagazine);
801 		count += mag_purge(oc, &mag, TRUE);
802 	}
803 	crit_exit();
804 	return(count);
805 }
806 
807 /*
808  * De-allocates all magazines on the full and empty magazine lists.
809  *
810  * Because this routine is called with a spinlock held, the magazines
811  * can only be disassociated and moved to a temporary list, not freed.
812  *
813  * The caller is responsible for freeing the magazines.
814  */
815 static void
816 depot_disassociate(struct magazinedepot *depot, struct magazinelist *tmplist)
817 {
818 	maglist_disassociate(depot, &depot->fullmagazines, tmplist, TRUE);
819 	maglist_disassociate(depot, &depot->emptymagazines, tmplist, TRUE);
820 }
821 
822 /*
823  * Try to free up some memory.  Return as soon as some free memory is found.
824  * For each object cache on the reclaim list, first try the current per-cpu
825  * cache, then the full magazine depot.
826  */
827 boolean_t
828 objcache_reclaimlist(struct objcache *oclist[], int nlist, int ocflags)
829 {
830 	struct objcache *oc;
831 	struct percpu_objcache *cpucache;
832 	struct magazinedepot *depot;
833 	struct magazinelist tmplist;
834 	int i, count;
835 
836 	SLIST_INIT(&tmplist);
837 
838 	for (i = 0; i < nlist; i++) {
839 		oc = oclist[i];
840 		cpucache = &oc->cache_percpu[mycpuid];
841 		depot = &oc->depot[myclusterid];
842 
843 		crit_enter();
844 		count = mag_purge(oc, &cpucache->loaded_magazine, FALSE);
845 		if (count == 0)
846 			count += mag_purge(oc, &cpucache->previous_magazine, FALSE);
847 		crit_exit();
848 		if (count > 0) {
849 			spin_lock(&depot->spin);
850 			depot->unallocated_objects += count;
851 			spin_unlock(&depot->spin);
852 			if (depot->waiting)
853 				wakeup(depot);
854 			return (TRUE);
855 		}
856 		spin_lock(&depot->spin);
857 		maglist_disassociate(depot, &depot->fullmagazines,
858 				     &tmplist, FALSE);
859 		spin_unlock(&depot->spin);
860 		count = maglist_purge(oc, &tmplist);
861 		if (count > 0) {
862 			if (depot->waiting)
863 				wakeup(depot);
864 			return (TRUE);
865 		}
866 	}
867 	return (FALSE);
868 }
869 
870 /*
871  * Destroy an object cache.  Must have no existing references.
872  */
873 void
874 objcache_destroy(struct objcache *oc)
875 {
876 	struct percpu_objcache *cache_percpu;
877 	struct magazinedepot *depot;
878 	int clusterid, cpuid;
879 	struct magazinelist tmplist;
880 
881 	spin_lock(&objcachelist_spin);
882 	LIST_REMOVE(oc, oc_next);
883 	spin_unlock(&objcachelist_spin);
884 
885 	SLIST_INIT(&tmplist);
886 	for (clusterid = 0; clusterid < MAXCLUSTERS; clusterid++) {
887 		depot = &oc->depot[clusterid];
888 		spin_lock(&depot->spin);
889 		depot_disassociate(depot, &tmplist);
890 		spin_unlock(&depot->spin);
891 	}
892 	maglist_purge(oc, &tmplist);
893 
894 	for (cpuid = 0; cpuid < ncpus; cpuid++) {
895 		cache_percpu = &oc->cache_percpu[cpuid];
896 
897 		crit_enter();
898 		mag_purge(oc, &cache_percpu->loaded_magazine, TRUE);
899 		mag_purge(oc, &cache_percpu->previous_magazine, TRUE);
900 		crit_exit();
901 		cache_percpu->loaded_magazine = NULL;
902 		cache_percpu->previous_magazine = NULL;
903 		/* don't bother adjusting depot->unallocated_objects */
904 	}
905 
906 	kfree(__DECONST(void *, oc->name), M_TEMP);
907 	kfree(oc, M_OBJCACHE);
908 }
909 
910 static void
911 objcache_init(void)
912 {
913 	spin_init(&objcachelist_spin, "objcachelist");
914 
915 	magazine_capmin = mag_capacity_align(MAGAZINE_CAPACITY_MIN);
916 	magazine_capmax = mag_capacity_align(MAGAZINE_CAPACITY_MAX);
917 	if (bootverbose) {
918 		kprintf("objcache: magazine cap [%d, %d]\n",
919 		    magazine_capmin, magazine_capmax);
920 	}
921 #if 0
922 	callout_init_mp(&objcache_callout);
923 	objcache_rebalance_period = 60 * hz;
924 	callout_reset(&objcache_callout, objcache_rebalance_period,
925 		      objcache_timer, NULL);
926 #endif
927 }
928 SYSINIT(objcache, SI_BOOT2_OBJCACHE, SI_ORDER_FIRST, objcache_init, 0);
929