xref: /dragonfly/sys/kern/kern_objcache.c (revision 0720b42f)
1 /*
2  * Copyright (c) 2005 Jeffrey M. Hsu.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Jeffrey M. Hsu.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. Neither the name of The DragonFly Project nor the names of its
16  *    contributors may be used to endorse or promote products derived
17  *    from this software without specific, prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
22  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
23  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
24  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
25  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
26  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
27  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
29  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  */
32 
33 #include <sys/param.h>
34 #include <sys/kernel.h>
35 #include <sys/systm.h>
36 #include <sys/callout.h>
37 #include <sys/globaldata.h>
38 #include <sys/malloc.h>
39 #include <sys/queue.h>
40 #include <sys/objcache.h>
41 #include <sys/spinlock.h>
42 #include <sys/thread.h>
43 #include <sys/thread2.h>
44 #include <sys/spinlock2.h>
45 
46 static MALLOC_DEFINE(M_OBJCACHE, "objcache", "Object Cache");
47 static MALLOC_DEFINE(M_OBJMAG, "objcache magazine", "Object Cache Magazine");
48 
49 #define	INITIAL_MAG_CAPACITY	64
50 
51 struct magazine {
52 	int			 rounds;
53 	int			 capacity;
54 	SLIST_ENTRY(magazine)	 nextmagazine;
55 	void			*objects[];
56 };
57 
58 SLIST_HEAD(magazinelist, magazine);
59 
60 #define MAGAZINE_HDRSIZE	__offsetof(struct magazine, objects[0])
61 #define MAGAZINE_CAPACITY_MAX	128
62 #define MAGAZINE_CAPACITY_MIN	4
63 
64 /*
65  * per-cluster cache of magazines
66  *
67  * All fields in this structure are protected by the spinlock.
68  */
69 struct magazinedepot {
70 	/*
71 	 * The per-cpu object caches only exchanges completely full or
72 	 * completely empty magazines with the depot layer, so only have
73 	 * to cache these two types of magazines.
74 	 */
75 	struct magazinelist	fullmagazines;
76 	struct magazinelist	emptymagazines;
77 	int			magcapacity;
78 
79 	/* protect this structure */
80 	struct spinlock		spin;
81 
82 	/* magazines not yet allocated towards limit */
83 	int			unallocated_objects;
84 	int			cluster_limit;	/* ref for adjustments */
85 
86 	/* infrequently used fields */
87 	int			waiting;	/* waiting for another cpu to
88 						 * return a full magazine to
89 						 * the depot */
90 	int			contested;	/* depot contention count */
91 } __cachealign;
92 
93 /*
94  * per-cpu object cache
95  * All fields in this structure are protected by crit_enter().
96  */
97 struct percpu_objcache {
98 	struct magazine	*loaded_magazine;	/* active magazine */
99 	struct magazine	*previous_magazine;	/* backup magazine */
100 
101 	/* statistics */
102 	int		gets_cumulative;	/* total calls to get */
103 	int		gets_null;		/* objcache_get returned NULL */
104 	int		puts_cumulative;	/* total calls to put */
105 	int		puts_othercluster;	/* returned to other cluster */
106 
107 	/* infrequently used fields */
108 	int		waiting;	/* waiting for a thread on this cpu to
109 					 * return an obj to the per-cpu cache */
110 } __cachealign;
111 
112 /* only until we have NUMA cluster topology information XXX */
113 #define MAXCLUSTERS 1
114 #define myclusterid 0
115 #define CLUSTER_OF(obj) 0
116 
117 /*
118  * Two-level object cache consisting of NUMA cluster-level depots of
119  * fully loaded or completely empty magazines and cpu-level caches of
120  * individual objects.
121  */
122 struct objcache {
123 	char			*name;
124 
125 	/* object constructor and destructor from blank storage */
126 	objcache_ctor_fn	*ctor;
127 	objcache_dtor_fn	*dtor;
128 	void			*privdata;
129 
130 	/* interface to underlying allocator */
131 	objcache_alloc_fn	*alloc;
132 	objcache_free_fn	*free;
133 	void			*allocator_args;
134 
135 	LIST_ENTRY(objcache)	oc_next;
136 	int			exhausted;	/* oops */
137 
138 	/* NUMA-cluster level caches */
139 	struct magazinedepot	depot[MAXCLUSTERS];
140 
141 	struct percpu_objcache	cache_percpu[];		/* per-cpu caches */
142 };
143 
144 static struct spinlock objcachelist_spin;
145 static LIST_HEAD(objcachelist, objcache) allobjcaches;
146 static int magazine_capmin;
147 static int magazine_capmax;
148 
149 static struct magazine *
150 mag_alloc(int capacity)
151 {
152 	struct magazine *mag;
153 	int size;
154 
155 	size = __offsetof(struct magazine, objects[capacity]);
156 	KASSERT(size > 0 && (size & __VM_CACHELINE_MASK) == 0,
157 	    ("magazine size is not multiple cache line size"));
158 
159 	mag = kmalloc_cachealign(size, M_OBJMAG, M_INTWAIT | M_ZERO);
160 	mag->capacity = capacity;
161 	mag->rounds = 0;
162 	return (mag);
163 }
164 
165 static int
166 mag_capacity_align(int mag_capacity)
167 {
168 	int mag_size;
169 
170 	mag_size = __VM_CACHELINE_ALIGN(
171 	    __offsetof(struct magazine, objects[mag_capacity]));
172 	mag_capacity = (mag_size - MAGAZINE_HDRSIZE) / sizeof(void *);
173 
174 	return mag_capacity;
175 }
176 
177 /*
178  * Utility routine for objects that don't require any de-construction.
179  */
180 
181 static void
182 null_dtor(void *obj, void *privdata)
183 {
184 	/* do nothing */
185 }
186 
187 static boolean_t
188 null_ctor(void *obj, void *privdata, int ocflags)
189 {
190 	return TRUE;
191 }
192 
193 /*
194  * Create an object cache.
195  */
196 struct objcache *
197 objcache_create(const char *name, int cluster_limit, int nom_cache,
198 		objcache_ctor_fn *ctor, objcache_dtor_fn *dtor, void *privdata,
199 		objcache_alloc_fn *alloc, objcache_free_fn *free,
200 		void *allocator_args)
201 {
202 	struct objcache *oc;
203 	struct magazinedepot *depot;
204 	int cpuid;
205 	int nmagdepot;
206 	int mag_capacity;
207 	int i;
208 
209 	/*
210 	 * Allocate object cache structure
211 	 */
212 	oc = kmalloc_cachealign(
213 	    __offsetof(struct objcache, cache_percpu[ncpus]),
214 	    M_OBJCACHE, M_WAITOK | M_ZERO);
215 	oc->name = kstrdup(name, M_TEMP);
216 	oc->ctor = ctor ? ctor : null_ctor;
217 	oc->dtor = dtor ? dtor : null_dtor;
218 	oc->privdata = privdata;
219 	oc->alloc = alloc;
220 	oc->free = free;
221 	oc->allocator_args = allocator_args;
222 
223 	/*
224 	 * Initialize depot list(s).
225 	 */
226 	depot = &oc->depot[0];
227 
228 	spin_init(&depot->spin, "objcachedepot");
229 	SLIST_INIT(&depot->fullmagazines);
230 	SLIST_INIT(&depot->emptymagazines);
231 
232 	/*
233 	 * Figure out the nominal number of free objects to cache and
234 	 * the magazine capacity.  By default we want to cache up to
235 	 * half the cluster_limit.  If there is no cluster_limit then
236 	 * we want to cache up to 128 objects.
237 	 */
238 	if (nom_cache == 0)
239 		nom_cache = cluster_limit / 2;
240 	if (cluster_limit && nom_cache > cluster_limit)
241 		nom_cache = cluster_limit;
242 	if (nom_cache == 0)
243 		nom_cache = INITIAL_MAG_CAPACITY * 2;
244 
245 	/*
246 	 * Magazine capacity for 2 active magazines per cpu plus 2
247 	 * magazines in the depot.
248 	 */
249 	mag_capacity = mag_capacity_align(nom_cache / (ncpus + 1) / 2 + 1);
250 	if (mag_capacity > magazine_capmax)
251 		mag_capacity = magazine_capmax;
252 	else if (mag_capacity < magazine_capmin)
253 		mag_capacity = magazine_capmin;
254 	depot->magcapacity = mag_capacity;
255 
256 	/*
257 	 * The cluster_limit must be sufficient to have two magazines per
258 	 * cpu plus at least two magazines in the depot.  However, because
259 	 * partial magazines can stay on the cpus what we really need here
260 	 * is to specify the number of extra magazines we allocate for the
261 	 * depot.
262 	 *
263 	 * Use ~1B objects to mean 'unlimited'.  A negative unallocated
264 	 * object count is possible due to dynamic adjustments so we can't
265 	 * use a negative number to mean 'unlimited'.  We need some overflow
266 	 * capacity too due to the preallocated mags.
267 	 */
268 	if (cluster_limit == 0) {
269 		depot->unallocated_objects = 0x40000000;
270 	} else {
271 		depot->unallocated_objects = ncpus * mag_capacity * 2 +
272 					     cluster_limit;
273 	}
274 
275 	/*
276 	 * This is a dynamic adjustment aid initialized to the callers
277 	 * expectations of the current limit.
278 	 */
279 	depot->cluster_limit = cluster_limit;
280 
281 	/*
282 	 * Initialize per-cpu caches
283 	 */
284 	for (cpuid = 0; cpuid < ncpus; cpuid++) {
285 		struct percpu_objcache *cache_percpu = &oc->cache_percpu[cpuid];
286 
287 		cache_percpu->loaded_magazine = mag_alloc(mag_capacity);
288 		cache_percpu->previous_magazine = mag_alloc(mag_capacity);
289 	}
290 
291 	/*
292 	 * Compute how many empty magazines to place in the depot.  This
293 	 * determines the retained cache size and is based on nom_cache.
294 	 *
295 	 * The actual cache size is larger because there are two magazines
296 	 * for each cpu as well but those can be in any fill state so we
297 	 * just can't count them.
298 	 *
299 	 * There is a minimum of two magazines in the depot.
300 	 */
301 	nmagdepot = nom_cache / mag_capacity + 1;
302 	if (nmagdepot < 2)
303 		nmagdepot = 2;
304 
305 	/*
306 	 * Put empty magazines in depot
307 	 */
308 	for (i = 0; i < nmagdepot; i++) {
309 		struct magazine *mag = mag_alloc(mag_capacity);
310 		SLIST_INSERT_HEAD(&depot->emptymagazines, mag, nextmagazine);
311 	}
312 
313 	spin_lock(&objcachelist_spin);
314 	LIST_INSERT_HEAD(&allobjcaches, oc, oc_next);
315 	spin_unlock(&objcachelist_spin);
316 
317 	return (oc);
318 }
319 
320 /*
321  * Adjust the cluster limit.  This is allowed to cause unallocated_objects
322  * to go negative.  Note that due to the magazine hysteresis there is a
323  * limit to how much of the objcache can be reclaimed using this API to
324  * reduce its size.
325  */
326 void
327 objcache_set_cluster_limit(struct objcache *oc, int cluster_limit)
328 {
329 	struct magazinedepot *depot;
330 	int delta;
331 
332 	depot = &oc->depot[myclusterid];
333 	if (depot->cluster_limit != cluster_limit) {
334 		spin_lock(&depot->spin);
335 		delta = cluster_limit - depot->cluster_limit;
336 		depot->unallocated_objects += delta;
337 		depot->cluster_limit = cluster_limit;
338 		spin_unlock(&depot->spin);
339 		wakeup(depot);
340 	}
341 }
342 
343 struct objcache *
344 objcache_create_simple(malloc_type_t mtype, size_t objsize)
345 {
346 	struct objcache_malloc_args *margs;
347 	struct objcache *oc;
348 
349 	margs = kmalloc(sizeof(*margs), M_OBJCACHE, M_WAITOK|M_ZERO);
350 	margs->objsize = objsize;
351 	margs->mtype = mtype;
352 	oc = objcache_create(mtype->ks_shortdesc, 0, 0,
353 			     NULL, NULL, NULL,
354 			     objcache_malloc_alloc, objcache_malloc_free,
355 			     margs);
356 	return (oc);
357 }
358 
359 struct objcache *
360 objcache_create_mbacked(malloc_type_t mtype, size_t objsize,
361 			int cluster_limit, int nom_cache,
362 			objcache_ctor_fn *ctor, objcache_dtor_fn *dtor,
363 			void *privdata)
364 {
365 	struct objcache_malloc_args *margs;
366 	struct objcache *oc;
367 
368 	margs = kmalloc(sizeof(*margs), M_OBJCACHE, M_WAITOK|M_ZERO);
369 	margs->objsize = objsize;
370 	margs->mtype = mtype;
371 	oc = objcache_create(mtype->ks_shortdesc,
372 			     cluster_limit, nom_cache,
373 			     ctor, dtor, privdata,
374 			     objcache_malloc_alloc, objcache_malloc_free,
375 			     margs);
376 	return(oc);
377 }
378 
379 
380 #define MAGAZINE_EMPTY(mag)	(mag->rounds == 0)
381 #define MAGAZINE_NOTEMPTY(mag)	(mag->rounds != 0)
382 #define MAGAZINE_FULL(mag)	(mag->rounds == mag->capacity)
383 
384 #define	swap(x, y)	({ struct magazine *t = x; x = y; y = t; })
385 
386 /*
387  * Get an object from the object cache.
388  *
389  * WARNING!  ocflags are only used when we have to go to the underlying
390  * allocator, so we cannot depend on flags such as M_ZERO.
391  */
392 void *
393 objcache_get(struct objcache *oc, int ocflags)
394 {
395 	struct percpu_objcache *cpucache = &oc->cache_percpu[mycpuid];
396 	struct magazine *loadedmag;
397 	struct magazine *emptymag;
398 	void *obj;
399 	struct magazinedepot *depot;
400 
401 	KKASSERT((ocflags & M_ZERO) == 0);
402 	crit_enter();
403 	++cpucache->gets_cumulative;
404 
405 retry:
406 	/*
407 	 * Loaded magazine has an object.  This is the hot path.
408 	 * It is lock-free and uses a critical section to block
409 	 * out interrupt handlers on the same processor.
410 	 */
411 	loadedmag = cpucache->loaded_magazine;
412 	if (MAGAZINE_NOTEMPTY(loadedmag)) {
413 		obj = loadedmag->objects[--loadedmag->rounds];
414 		crit_exit();
415 		return (obj);
416 	}
417 
418 	/* Previous magazine has an object. */
419 	if (MAGAZINE_NOTEMPTY(cpucache->previous_magazine)) {
420 		swap(cpucache->loaded_magazine, cpucache->previous_magazine);
421 		loadedmag = cpucache->loaded_magazine;
422 		obj = loadedmag->objects[--loadedmag->rounds];
423 		crit_exit();
424 		return (obj);
425 	}
426 
427 	/*
428 	 * Both magazines empty.  Get a full magazine from the depot and
429 	 * move one of the empty ones to the depot.
430 	 *
431 	 * Obtain the depot spinlock.
432 	 *
433 	 * NOTE: Beyond this point, M_* flags are handled via oc->alloc()
434 	 */
435 	depot = &oc->depot[myclusterid];
436 	spin_lock(&depot->spin);
437 
438 	/*
439 	 * Recheck the cpucache after obtaining the depot spinlock.  This
440 	 * shouldn't be necessary now but don't take any chances.
441 	 */
442 	if (MAGAZINE_NOTEMPTY(cpucache->loaded_magazine) ||
443 	    MAGAZINE_NOTEMPTY(cpucache->previous_magazine)
444 	) {
445 		spin_unlock(&depot->spin);
446 		goto retry;
447 	}
448 
449 	/* Check if depot has a full magazine. */
450 	if (!SLIST_EMPTY(&depot->fullmagazines)) {
451 		emptymag = cpucache->previous_magazine;
452 		cpucache->previous_magazine = cpucache->loaded_magazine;
453 		cpucache->loaded_magazine = SLIST_FIRST(&depot->fullmagazines);
454 		SLIST_REMOVE_HEAD(&depot->fullmagazines, nextmagazine);
455 
456 		/*
457 		 * Return emptymag to the depot.
458 		 */
459 		KKASSERT(MAGAZINE_EMPTY(emptymag));
460 		SLIST_INSERT_HEAD(&depot->emptymagazines,
461 				  emptymag, nextmagazine);
462 		spin_unlock(&depot->spin);
463 		goto retry;
464 	}
465 
466 	/*
467 	 * The depot does not have any non-empty magazines.  If we have
468 	 * not hit our object limit we can allocate a new object using
469 	 * the back-end allocator.
470 	 *
471 	 * NOTE: unallocated_objects can wind up being negative due to
472 	 *	 objcache_set_cluster_limit() calls.
473 	 */
474 	if (depot->unallocated_objects > 0) {
475 		--depot->unallocated_objects;
476 		spin_unlock(&depot->spin);
477 		crit_exit();
478 
479 		obj = oc->alloc(oc->allocator_args, ocflags);
480 		if (obj) {
481 			if (oc->ctor(obj, oc->privdata, ocflags))
482 				return (obj);
483 			oc->free(obj, oc->allocator_args);
484 			obj = NULL;
485 		}
486 		if (obj == NULL) {
487 			spin_lock(&depot->spin);
488 			++depot->unallocated_objects;
489 			spin_unlock(&depot->spin);
490 			if (depot->waiting)
491 				wakeup(depot);
492 
493 			crit_enter();
494 			/*
495 			 * makes debugging easier when gets_cumulative does
496 			 * not include gets_null.
497 			 */
498 			++cpucache->gets_null;
499 			--cpucache->gets_cumulative;
500 			crit_exit();
501 		}
502 		return(obj);
503 	}
504 	if (oc->exhausted == 0) {
505 		kprintf("Warning, objcache(%s): Exhausted!\n", oc->name);
506 		oc->exhausted = 1;
507 	}
508 
509 	/*
510 	 * Otherwise block if allowed to.
511 	 */
512 	if ((ocflags & (M_WAITOK|M_NULLOK)) == M_WAITOK) {
513 		++cpucache->waiting;
514 		++depot->waiting;
515 		ssleep(depot, &depot->spin, 0, "objcache_get", 0);
516 		--cpucache->waiting;
517 		--depot->waiting;
518 		spin_unlock(&depot->spin);
519 		goto retry;
520 	}
521 
522 	/*
523 	 * Otherwise fail
524 	 */
525 	++cpucache->gets_null;
526 	--cpucache->gets_cumulative;
527 	crit_exit();
528 	spin_unlock(&depot->spin);
529 	return (NULL);
530 }
531 
532 /*
533  * Wrapper for malloc allocation routines.
534  */
535 void *
536 objcache_malloc_alloc(void *allocator_args, int ocflags)
537 {
538 	struct objcache_malloc_args *alloc_args = allocator_args;
539 
540 	return (kmalloc(alloc_args->objsize, alloc_args->mtype,
541 		       ocflags & OC_MFLAGS));
542 }
543 
544 /*
545  * Wrapper for malloc allocation routines, with initial zeroing
546  * (but objects are not zerod on reuse from cache).
547  */
548 void *
549 objcache_malloc_alloc_zero(void *allocator_args, int ocflags)
550 {
551 	struct objcache_malloc_args *alloc_args = allocator_args;
552 
553 	return (kmalloc(alloc_args->objsize, alloc_args->mtype,
554 		       (ocflags & OC_MFLAGS) | M_ZERO));
555 }
556 
557 
558 void
559 objcache_malloc_free(void *obj, void *allocator_args)
560 {
561 	struct objcache_malloc_args *alloc_args = allocator_args;
562 
563 	kfree(obj, alloc_args->mtype);
564 }
565 
566 /*
567  * Wrapper for allocation policies that pre-allocate at initialization time
568  * and don't do run-time allocation.
569  */
570 void *
571 objcache_nop_alloc(void *allocator_args, int ocflags)
572 {
573 	return (NULL);
574 }
575 
576 void
577 objcache_nop_free(void *obj, void *allocator_args)
578 {
579 }
580 
581 /*
582  * Return an object to the object cache.
583  */
584 void
585 objcache_put(struct objcache *oc, void *obj)
586 {
587 	struct percpu_objcache *cpucache = &oc->cache_percpu[mycpuid];
588 	struct magazine *loadedmag;
589 	struct magazinedepot *depot;
590 
591 	crit_enter();
592 	++cpucache->puts_cumulative;
593 
594 	if (CLUSTER_OF(obj) != myclusterid) {
595 #ifdef notyet
596 		/* use lazy IPI to send object to owning cluster XXX todo */
597 		++cpucache->puts_othercluster;
598 		crit_exit();
599 		return;
600 #endif
601 	}
602 
603 retry:
604 	/*
605 	 * Free slot available in loaded magazine.  This is the hot path.
606 	 * It is lock-free and uses a critical section to block out interrupt
607 	 * handlers on the same processor.
608 	 */
609 	loadedmag = cpucache->loaded_magazine;
610 	if (!MAGAZINE_FULL(loadedmag)) {
611 		loadedmag->objects[loadedmag->rounds++] = obj;
612 		if (cpucache->waiting)
613 			wakeup_mycpu(&oc->depot[myclusterid]);
614 		crit_exit();
615 		return;
616 	}
617 
618 	/*
619 	 * Current magazine full, but previous magazine has room.  XXX
620 	 */
621 	if (!MAGAZINE_FULL(cpucache->previous_magazine)) {
622 		swap(cpucache->loaded_magazine, cpucache->previous_magazine);
623 		loadedmag = cpucache->loaded_magazine;
624 		loadedmag->objects[loadedmag->rounds++] = obj;
625 		if (cpucache->waiting)
626 			wakeup_mycpu(&oc->depot[myclusterid]);
627 		crit_exit();
628 		return;
629 	}
630 
631 	/*
632 	 * Both magazines full.  Get an empty magazine from the depot and
633 	 * move a full loaded magazine to the depot.  Even though the
634 	 * magazine may wind up with space available after we block on
635 	 * the spinlock, we still cycle it through to avoid the non-optimal
636 	 * corner-case.
637 	 *
638 	 * Obtain the depot spinlock.
639 	 */
640 	depot = &oc->depot[myclusterid];
641 	spin_lock(&depot->spin);
642 
643 	/*
644 	 * If an empty magazine is available in the depot, cycle it
645 	 * through and retry.
646 	 */
647 	if (!SLIST_EMPTY(&depot->emptymagazines)) {
648 		loadedmag = cpucache->previous_magazine;
649 		cpucache->previous_magazine = cpucache->loaded_magazine;
650 		cpucache->loaded_magazine = SLIST_FIRST(&depot->emptymagazines);
651 		SLIST_REMOVE_HEAD(&depot->emptymagazines, nextmagazine);
652 
653 		/*
654 		 * Return loadedmag to the depot.  Due to blocking it may
655 		 * not be entirely full and could even be empty.
656 		 */
657 		if (MAGAZINE_EMPTY(loadedmag)) {
658 			SLIST_INSERT_HEAD(&depot->emptymagazines,
659 					  loadedmag, nextmagazine);
660 			spin_unlock(&depot->spin);
661 		} else {
662 			SLIST_INSERT_HEAD(&depot->fullmagazines,
663 					  loadedmag, nextmagazine);
664 			spin_unlock(&depot->spin);
665 			if (depot->waiting)
666 				wakeup(depot);
667 		}
668 		goto retry;
669 	}
670 
671 	/*
672 	 * An empty mag is not available.  This is a corner case which can
673 	 * occur due to cpus holding partially full magazines.  Do not try
674 	 * to allocate a mag, just free the object.
675 	 */
676 	++depot->unallocated_objects;
677 	spin_unlock(&depot->spin);
678 	if (depot->waiting)
679 		wakeup(depot);
680 	crit_exit();
681 	oc->dtor(obj, oc->privdata);
682 	oc->free(obj, oc->allocator_args);
683 }
684 
685 /*
686  * The object is being put back into the cache, but the caller has
687  * indicated that the object is not in any shape to be reused and should
688  * be dtor'd immediately.
689  */
690 void
691 objcache_dtor(struct objcache *oc, void *obj)
692 {
693 	struct magazinedepot *depot;
694 
695 	depot = &oc->depot[myclusterid];
696 	spin_lock(&depot->spin);
697 	++depot->unallocated_objects;
698 	spin_unlock(&depot->spin);
699 	if (depot->waiting)
700 		wakeup(depot);
701 	oc->dtor(obj, oc->privdata);
702 	oc->free(obj, oc->allocator_args);
703 }
704 
705 /*
706  * Deallocate all objects in a magazine and free the magazine if requested.
707  * When freeit is TRUE the magazine must already be disassociated from the
708  * depot.
709  *
710  * Must be called with a critical section held when called with a per-cpu
711  * magazine.  The magazine may be indirectly modified during the loop.
712  *
713  * If the magazine moves during a dtor the operation is aborted.  This is
714  * only allowed when freeit is FALSE.
715  *
716  * The number of objects freed is returned.
717  */
718 static int
719 mag_purge(struct objcache *oc, struct magazine **magp, int freeit)
720 {
721 	struct magazine *mag = *magp;
722 	int count;
723 	void *obj;
724 
725 	count = 0;
726 	while (mag->rounds) {
727 		obj = mag->objects[--mag->rounds];
728 		oc->dtor(obj, oc->privdata);		/* MAY BLOCK */
729 		oc->free(obj, oc->allocator_args);	/* MAY BLOCK */
730 		++count;
731 
732 		/*
733 		 * Cycle for interrupts.
734 		 */
735 		if ((count & 15) == 0) {
736 			crit_exit();
737 			crit_enter();
738 		}
739 
740 		/*
741 		 * mag may have become invalid either due to dtor/free
742 		 * blocking or interrupt cycling, do not derefernce it
743 		 * until we check.
744 		 */
745 		if (*magp != mag) {
746 			kprintf("mag_purge: mag ripped out\n");
747 			break;
748 		}
749 	}
750 	if (freeit) {
751 		KKASSERT(*magp == mag);
752 		*magp = NULL;
753 		kfree(mag, M_OBJMAG);
754 	}
755 	return(count);
756 }
757 
758 /*
759  * Disassociate zero or more magazines from a magazine list associated with
760  * the depot, update the depot, and move the magazines to a temporary
761  * list.
762  *
763  * The caller must check the depot for waiters and wake it up, typically
764  * after disposing of the magazines this function loads onto the temporary
765  * list.
766  */
767 static void
768 maglist_disassociate(struct magazinedepot *depot, struct magazinelist *maglist,
769 		     struct magazinelist *tmplist, boolean_t purgeall)
770 {
771 	struct magazine *mag;
772 
773 	while ((mag = SLIST_FIRST(maglist)) != NULL) {
774 		SLIST_REMOVE_HEAD(maglist, nextmagazine);
775 		SLIST_INSERT_HEAD(tmplist, mag, nextmagazine);
776 		depot->unallocated_objects += mag->rounds;
777 	}
778 }
779 
780 /*
781  * Deallocate all magazines and their contents from the passed temporary
782  * list.  The magazines have already been accounted for by their depots.
783  *
784  * The total number of rounds freed is returned.  This number is typically
785  * only used to determine whether a wakeup on the depot is needed or not.
786  */
787 static int
788 maglist_purge(struct objcache *oc, struct magazinelist *maglist)
789 {
790 	struct magazine *mag;
791 	int count = 0;
792 
793 	/*
794 	 * can't use SLIST_FOREACH because blocking releases the depot
795 	 * spinlock
796 	 */
797 	crit_enter();
798 	while ((mag = SLIST_FIRST(maglist)) != NULL) {
799 		SLIST_REMOVE_HEAD(maglist, nextmagazine);
800 		count += mag_purge(oc, &mag, TRUE);
801 	}
802 	crit_exit();
803 	return(count);
804 }
805 
806 /*
807  * De-allocates all magazines on the full and empty magazine lists.
808  *
809  * Because this routine is called with a spinlock held, the magazines
810  * can only be disassociated and moved to a temporary list, not freed.
811  *
812  * The caller is responsible for freeing the magazines.
813  */
814 static void
815 depot_disassociate(struct magazinedepot *depot, struct magazinelist *tmplist)
816 {
817 	maglist_disassociate(depot, &depot->fullmagazines, tmplist, TRUE);
818 	maglist_disassociate(depot, &depot->emptymagazines, tmplist, TRUE);
819 }
820 
821 /*
822  * Try to free up some memory.  Return as soon as some free memory is found.
823  * For each object cache on the reclaim list, first try the current per-cpu
824  * cache, then the full magazine depot.
825  */
826 boolean_t
827 objcache_reclaimlist(struct objcache *oclist[], int nlist, int ocflags)
828 {
829 	struct objcache *oc;
830 	struct percpu_objcache *cpucache;
831 	struct magazinedepot *depot;
832 	struct magazinelist tmplist;
833 	int i, count;
834 
835 	kprintf("objcache_reclaimlist\n");
836 
837 	SLIST_INIT(&tmplist);
838 
839 	for (i = 0; i < nlist; i++) {
840 		oc = oclist[i];
841 		cpucache = &oc->cache_percpu[mycpuid];
842 		depot = &oc->depot[myclusterid];
843 
844 		crit_enter();
845 		count = mag_purge(oc, &cpucache->loaded_magazine, FALSE);
846 		if (count == 0)
847 			count += mag_purge(oc, &cpucache->previous_magazine, FALSE);
848 		crit_exit();
849 		if (count > 0) {
850 			spin_lock(&depot->spin);
851 			depot->unallocated_objects += count;
852 			spin_unlock(&depot->spin);
853 			if (depot->waiting)
854 				wakeup(depot);
855 			return (TRUE);
856 		}
857 		spin_lock(&depot->spin);
858 		maglist_disassociate(depot, &depot->fullmagazines,
859 				     &tmplist, FALSE);
860 		spin_unlock(&depot->spin);
861 		count = maglist_purge(oc, &tmplist);
862 		if (count > 0) {
863 			if (depot->waiting)
864 				wakeup(depot);
865 			return (TRUE);
866 		}
867 	}
868 	return (FALSE);
869 }
870 
871 /*
872  * Destroy an object cache.  Must have no existing references.
873  */
874 void
875 objcache_destroy(struct objcache *oc)
876 {
877 	struct percpu_objcache *cache_percpu;
878 	struct magazinedepot *depot;
879 	int clusterid, cpuid;
880 	struct magazinelist tmplist;
881 
882 	spin_lock(&objcachelist_spin);
883 	LIST_REMOVE(oc, oc_next);
884 	spin_unlock(&objcachelist_spin);
885 
886 	SLIST_INIT(&tmplist);
887 	for (clusterid = 0; clusterid < MAXCLUSTERS; clusterid++) {
888 		depot = &oc->depot[clusterid];
889 		spin_lock(&depot->spin);
890 		depot_disassociate(depot, &tmplist);
891 		spin_unlock(&depot->spin);
892 	}
893 	maglist_purge(oc, &tmplist);
894 
895 	for (cpuid = 0; cpuid < ncpus; cpuid++) {
896 		cache_percpu = &oc->cache_percpu[cpuid];
897 
898 		crit_enter();
899 		mag_purge(oc, &cache_percpu->loaded_magazine, TRUE);
900 		mag_purge(oc, &cache_percpu->previous_magazine, TRUE);
901 		crit_exit();
902 		cache_percpu->loaded_magazine = NULL;
903 		cache_percpu->previous_magazine = NULL;
904 		/* don't bother adjusting depot->unallocated_objects */
905 	}
906 
907 	kfree(oc->name, M_TEMP);
908 	kfree(oc, M_OBJCACHE);
909 }
910 
911 static void
912 objcache_init(void)
913 {
914 	spin_init(&objcachelist_spin, "objcachelist");
915 
916 	magazine_capmin = mag_capacity_align(MAGAZINE_CAPACITY_MIN);
917 	magazine_capmax = mag_capacity_align(MAGAZINE_CAPACITY_MAX);
918 	if (bootverbose) {
919 		kprintf("objcache: magazine cap [%d, %d]\n",
920 		    magazine_capmin, magazine_capmax);
921 	}
922 #if 0
923 	callout_init_mp(&objcache_callout);
924 	objcache_rebalance_period = 60 * hz;
925 	callout_reset(&objcache_callout, objcache_rebalance_period,
926 		      objcache_timer, NULL);
927 #endif
928 }
929 SYSINIT(objcache, SI_BOOT2_OBJCACHE, SI_ORDER_FIRST, objcache_init, 0);
930