xref: /dragonfly/sys/kern/kern_objcache.c (revision 9a92bb4c)
1 /*
2  * Copyright (c) 2005 Jeffrey M. Hsu.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Jeffrey M. Hsu.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. Neither the name of The DragonFly Project nor the names of its
16  *    contributors may be used to endorse or promote products derived
17  *    from this software without specific, prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
22  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
23  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
24  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
25  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
26  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
27  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
29  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  *
32  * $DragonFly: src/sys/kern/kern_objcache.c,v 1.23 2008/10/26 04:29:19 sephe Exp $
33  */
34 
35 #include <sys/param.h>
36 #include <sys/kernel.h>
37 #include <sys/systm.h>
38 #include <sys/callout.h>
39 #include <sys/globaldata.h>
40 #include <sys/malloc.h>
41 #include <sys/queue.h>
42 #include <sys/objcache.h>
43 #include <sys/spinlock.h>
44 #include <sys/thread.h>
45 #include <sys/thread2.h>
46 #include <sys/spinlock2.h>
47 
48 static MALLOC_DEFINE(M_OBJCACHE, "objcache", "Object Cache");
49 static MALLOC_DEFINE(M_OBJMAG, "objcache magazine", "Object Cache Magazine");
50 
51 #define	INITIAL_MAG_CAPACITY	64
52 
53 struct magazine {
54 	int			 rounds;
55 	int			 capacity;
56 	int			 cleaning;
57 	SLIST_ENTRY(magazine)	 nextmagazine;
58 	void			*objects[];
59 };
60 
61 SLIST_HEAD(magazinelist, magazine);
62 
63 /*
64  * per-cluster cache of magazines
65  *
66  * All fields in this structure are protected by the spinlock.
67  */
68 struct magazinedepot {
69 	/*
70 	 * The per-cpu object caches only exchanges completely full or
71 	 * completely empty magazines with the depot layer, so only have
72 	 * to cache these two types of magazines.
73 	 */
74 	struct magazinelist	fullmagazines;
75 	struct magazinelist	emptymagazines;
76 	int			magcapacity;
77 
78 	/* protect this structure */
79 	struct spinlock		spin;
80 
81 	/* magazines not yet allocated towards limit */
82 	int			unallocated_objects;
83 
84 	/* infrequently used fields */
85 	int			waiting;	/* waiting for another cpu to
86 						 * return a full magazine to
87 						 * the depot */
88 	int			contested;	/* depot contention count */
89 };
90 
91 /*
92  * per-cpu object cache
93  * All fields in this structure are protected by crit_enter().
94  */
95 struct percpu_objcache {
96 	struct magazine	*loaded_magazine;	/* active magazine */
97 	struct magazine	*previous_magazine;	/* backup magazine */
98 
99 	/* statistics */
100 	int		gets_cumulative;	/* total calls to get */
101 	int		gets_null;		/* objcache_get returned NULL */
102 	int		puts_cumulative;	/* total calls to put */
103 	int		puts_othercluster;	/* returned to other cluster */
104 
105 	/* infrequently used fields */
106 	int		waiting;	/* waiting for a thread on this cpu to
107 					 * return an obj to the per-cpu cache */
108 };
109 
110 /* only until we have NUMA cluster topology information XXX */
111 #define MAXCLUSTERS 1
112 #define myclusterid 0
113 #define CLUSTER_OF(obj) 0
114 
115 /*
116  * Two-level object cache consisting of NUMA cluster-level depots of
117  * fully loaded or completely empty magazines and cpu-level caches of
118  * individual objects.
119  */
120 struct objcache {
121 	char			*name;
122 
123 	/* object constructor and destructor from blank storage */
124 	objcache_ctor_fn	*ctor;
125 	objcache_dtor_fn	*dtor;
126 	void			*privdata;
127 
128 	/* interface to underlying allocator */
129 	objcache_alloc_fn	*alloc;
130 	objcache_free_fn	*free;
131 	void			*allocator_args;
132 
133 	LIST_ENTRY(objcache)	oc_next;
134 	int			exhausted;	/* oops */
135 
136 	/* NUMA-cluster level caches */
137 	struct magazinedepot	depot[MAXCLUSTERS];
138 
139 	struct percpu_objcache	cache_percpu[];		/* per-cpu caches */
140 };
141 
142 static struct spinlock objcachelist_spin;
143 static LIST_HEAD(objcachelist, objcache) allobjcaches;
144 
145 static struct magazine *
146 mag_alloc(int capacity)
147 {
148 	struct magazine *mag;
149 
150 	mag = kmalloc(__offsetof(struct magazine, objects[capacity]),
151 			M_OBJMAG, M_INTWAIT | M_ZERO);
152 	mag->capacity = capacity;
153 	mag->rounds = 0;
154 	mag->cleaning = 0;
155 	return (mag);
156 }
157 
158 /*
159  * Utility routine for objects that don't require any de-construction.
160  */
161 
162 static void
163 null_dtor(void *obj, void *privdata)
164 {
165 	/* do nothing */
166 }
167 
168 static boolean_t
169 null_ctor(void *obj, void *privdata, int ocflags)
170 {
171 	return TRUE;
172 }
173 
174 /*
175  * Create an object cache.
176  */
177 struct objcache *
178 objcache_create(const char *name, int *cluster_limit0, int mag_capacity,
179 		objcache_ctor_fn *ctor, objcache_dtor_fn *dtor, void *privdata,
180 		objcache_alloc_fn *alloc, objcache_free_fn *free,
181 		void *allocator_args)
182 {
183 	struct objcache *oc;
184 	struct magazinedepot *depot;
185 	int cpuid;
186 	int need;
187 	int factor;
188 	int nmagdepot;
189 	int i;
190 	int cluster_limit;
191 
192 	if (cluster_limit0 == NULL)
193 		cluster_limit = 0;
194 	else
195 		cluster_limit = *cluster_limit0;
196 
197 	/* allocate object cache structure */
198 	oc = kmalloc(__offsetof(struct objcache, cache_percpu[ncpus]),
199 		    M_OBJCACHE, M_WAITOK | M_ZERO);
200 	oc->name = kstrdup(name, M_TEMP);
201 	oc->ctor = ctor ? ctor : null_ctor;
202 	oc->dtor = dtor ? dtor : null_dtor;
203 	oc->privdata = privdata;
204 	oc->free = free;
205 	oc->allocator_args = allocator_args;
206 
207 	/* initialize depots */
208 	depot = &oc->depot[0];
209 
210 	spin_init(&depot->spin);
211 	SLIST_INIT(&depot->fullmagazines);
212 	SLIST_INIT(&depot->emptymagazines);
213 
214 	if (mag_capacity == 0)
215 		mag_capacity = INITIAL_MAG_CAPACITY;
216 
217 	/*
218 	 * The cluster_limit must be sufficient to have three magazines per
219 	 * cpu.  If we have a lot of cpus the mag_capacity might just be
220 	 * too big, reduce it if necessary.
221 	 *
222 	 * Each cpu can hold up to two magazines, with the remainder in the
223 	 * depot.  If many objects are allocated fewer magazines are
224 	 * available.  We have to make sure that each cpu has access to
225 	 * free objects until the object cache hits 75% of its limit.
226 	 */
227 	if (cluster_limit == 0) {
228 		depot->unallocated_objects = -1;
229 	} else {
230 		factor = 8;
231 		need = mag_capacity * ncpus * factor;
232 		if (cluster_limit < need && mag_capacity > 16) {
233 			kprintf("objcache(%s): too small for ncpus"
234 				", adjusting mag_capacity %d->",
235 				name, mag_capacity);
236 			while (need > cluster_limit && mag_capacity > 16) {
237 				mag_capacity >>= 1;
238 				need = mag_capacity * ncpus * factor;
239 			}
240 			kprintf("%d\n", mag_capacity);
241 		}
242 		if (cluster_limit < need) {
243 			kprintf("objcache(%s): too small for ncpus"
244 				", adjusting cluster_limit %d->%d\n",
245 				name, cluster_limit, need);
246 			cluster_limit = need;
247 		}
248 		depot->unallocated_objects = cluster_limit;
249 	}
250 	depot->magcapacity = mag_capacity;
251 	oc->alloc = alloc;
252 
253 	/* initialize per-cpu caches */
254 	for (cpuid = 0; cpuid < ncpus; cpuid++) {
255 		struct percpu_objcache *cache_percpu = &oc->cache_percpu[cpuid];
256 
257 		cache_percpu->loaded_magazine = mag_alloc(mag_capacity);
258 		cache_percpu->previous_magazine = mag_alloc(mag_capacity);
259 	}
260 
261 	/* compute initial number of empty magazines in depot */
262 	nmagdepot = 0;
263 	if (cluster_limit > 0) {
264 		/* max number of magazines in depot */
265 		nmagdepot = (cluster_limit - ncpus * 2 * mag_capacity) /
266 				mag_capacity;
267 
268 		/* retain at most 50% of the limit */
269 		nmagdepot /= 2;
270 	}
271 	/* bound result to acceptable range */
272 	if (nmagdepot < 2)
273 		nmagdepot = 2;
274 	if (nmagdepot > 10)
275 		nmagdepot = 10;
276 
277 	/* put empty magazines in depot */
278 	for (i = 0; i < nmagdepot; i++) {
279 		struct magazine *mag = mag_alloc(mag_capacity);
280 		SLIST_INSERT_HEAD(&depot->emptymagazines, mag, nextmagazine);
281 	}
282 
283 	spin_lock_wr(&objcachelist_spin);
284 	LIST_INSERT_HEAD(&allobjcaches, oc, oc_next);
285 	spin_unlock_wr(&objcachelist_spin);
286 
287 	if (cluster_limit0 != NULL)
288 		*cluster_limit0 = cluster_limit;
289 	return (oc);
290 }
291 
292 struct objcache *
293 objcache_create_simple(malloc_type_t mtype, size_t objsize)
294 {
295 	struct objcache_malloc_args *margs;
296 	struct objcache *oc;
297 
298 	margs = kmalloc(sizeof(*margs), M_OBJCACHE, M_WAITOK|M_ZERO);
299 	margs->objsize = objsize;
300 	margs->mtype = mtype;
301 	oc = objcache_create(mtype->ks_shortdesc, NULL, 0,
302 			     NULL, NULL, NULL,
303 			     objcache_malloc_alloc, objcache_malloc_free,
304 			     margs);
305 	return (oc);
306 }
307 
308 struct objcache *
309 objcache_create_mbacked(malloc_type_t mtype, size_t objsize,
310 			int *cluster_limit, int mag_capacity,
311 			objcache_ctor_fn *ctor, objcache_dtor_fn *dtor,
312 			void *privdata)
313 {
314 	struct objcache_malloc_args *margs;
315 	struct objcache *oc;
316 
317 	margs = kmalloc(sizeof(*margs), M_OBJCACHE, M_WAITOK|M_ZERO);
318 	margs->objsize = objsize;
319 	margs->mtype = mtype;
320 	oc = objcache_create(mtype->ks_shortdesc,
321 			     cluster_limit, mag_capacity,
322 			     ctor, dtor, privdata,
323 			     objcache_malloc_alloc, objcache_malloc_free,
324 			     margs);
325 	return(oc);
326 }
327 
328 
329 #define MAGAZINE_EMPTY(mag)	(mag->rounds == 0)
330 #define MAGAZINE_NOTEMPTY(mag)	(mag->rounds != 0)
331 #define MAGAZINE_FULL(mag)	(mag->rounds == mag->capacity)
332 
333 #define	swap(x, y)	({ struct magazine *t = x; x = y; y = t; })
334 
335 /*
336  * Get an object from the object cache.
337  *
338  * WARNING!  ocflags are only used when we have to go to the underlying
339  * allocator, so we cannot depend on flags such as M_ZERO.
340  */
341 void *
342 objcache_get(struct objcache *oc, int ocflags)
343 {
344 	struct percpu_objcache *cpucache = &oc->cache_percpu[mycpuid];
345 	struct magazine *loadedmag;
346 	struct magazine *emptymag;
347 	void *obj;
348 	struct magazinedepot *depot;
349 
350 	KKASSERT((ocflags & M_ZERO) == 0);
351 	crit_enter();
352 	++cpucache->gets_cumulative;
353 
354 retry:
355 	/*
356 	 * Loaded magazine has an object.  This is the hot path.
357 	 * It is lock-free and uses a critical section to block
358 	 * out interrupt handlers on the same processor.
359 	 */
360 	loadedmag = cpucache->loaded_magazine;
361 	if (MAGAZINE_NOTEMPTY(loadedmag)) {
362 		obj = loadedmag->objects[--loadedmag->rounds];
363 		crit_exit();
364 		return (obj);
365 	}
366 
367 	/* Previous magazine has an object. */
368 	if (MAGAZINE_NOTEMPTY(cpucache->previous_magazine)) {
369 		KKASSERT(cpucache->previous_magazine->cleaning +
370 			 cpucache->loaded_magazine->cleaning == 0);
371 		swap(cpucache->loaded_magazine, cpucache->previous_magazine);
372 		loadedmag = cpucache->loaded_magazine;
373 		obj = loadedmag->objects[--loadedmag->rounds];
374 		crit_exit();
375 		return (obj);
376 	}
377 
378 	/*
379 	 * Both magazines empty.  Get a full magazine from the depot and
380 	 * move one of the empty ones to the depot.
381 	 *
382 	 * Obtain the depot spinlock.
383 	 *
384 	 * NOTE: Beyond this point, M_* flags are handled via oc->alloc()
385 	 */
386 	depot = &oc->depot[myclusterid];
387 	spin_lock_wr(&depot->spin);
388 
389 	/*
390 	 * Recheck the cpucache after obtaining the depot spinlock.  This
391 	 * shouldn't be necessary now but don't take any chances.
392 	 */
393 	if (MAGAZINE_NOTEMPTY(cpucache->loaded_magazine) ||
394 	    MAGAZINE_NOTEMPTY(cpucache->previous_magazine)
395 	) {
396 		spin_unlock_wr(&depot->spin);
397 		goto retry;
398 	}
399 
400 	/* Check if depot has a full magazine. */
401 	if (!SLIST_EMPTY(&depot->fullmagazines)) {
402 		emptymag = cpucache->previous_magazine;
403 		cpucache->previous_magazine = cpucache->loaded_magazine;
404 		cpucache->loaded_magazine = SLIST_FIRST(&depot->fullmagazines);
405 		SLIST_REMOVE_HEAD(&depot->fullmagazines, nextmagazine);
406 
407 		/*
408 		 * Return emptymag to the depot.
409 		 */
410 		KKASSERT(MAGAZINE_EMPTY(emptymag));
411 		SLIST_INSERT_HEAD(&depot->emptymagazines,
412 				  emptymag, nextmagazine);
413 		spin_unlock_wr(&depot->spin);
414 		goto retry;
415 	}
416 
417 	/*
418 	 * The depot does not have any non-empty magazines.  If we have
419 	 * not hit our object limit we can allocate a new object using
420 	 * the back-end allocator.
421 	 *
422 	 * note: unallocated_objects can be initialized to -1, which has
423 	 * the effect of removing any allocation limits.
424 	 */
425 	if (depot->unallocated_objects) {
426 		--depot->unallocated_objects;
427 		spin_unlock_wr(&depot->spin);
428 		crit_exit();
429 
430 		obj = oc->alloc(oc->allocator_args, ocflags);
431 		if (obj) {
432 			if (oc->ctor(obj, oc->privdata, ocflags))
433 				return (obj);
434 			oc->free(obj, oc->allocator_args);
435 			spin_lock_wr(&depot->spin);
436 			++depot->unallocated_objects;
437 			spin_unlock_wr(&depot->spin);
438 			if (depot->waiting)
439 				wakeup(depot);
440 			obj = NULL;
441 		}
442 		if (obj == NULL) {
443 			crit_enter();
444 			/*
445 			 * makes debugging easier when gets_cumulative does
446 			 * not include gets_null.
447 			 */
448 			++cpucache->gets_null;
449 			--cpucache->gets_cumulative;
450 			crit_exit();
451 		}
452 		return(obj);
453 	}
454 	if (oc->exhausted == 0) {
455 		kprintf("Warning, objcache(%s): Exhausted!\n", oc->name);
456 		oc->exhausted = 1;
457 	}
458 
459 	/*
460 	 * Otherwise block if allowed to.
461 	 */
462 	if ((ocflags & (M_WAITOK|M_NULLOK)) == M_WAITOK) {
463 		++cpucache->waiting;
464 		++depot->waiting;
465 		msleep(depot, &depot->spin, 0, "objcache_get", 0);
466 		--cpucache->waiting;
467 		--depot->waiting;
468 		spin_unlock_wr(&depot->spin);
469 		goto retry;
470 	}
471 
472 	/*
473 	 * Otherwise fail
474 	 */
475 	++cpucache->gets_null;
476 	--cpucache->gets_cumulative;
477 	crit_exit();
478 	spin_unlock_wr(&depot->spin);
479 	return (NULL);
480 }
481 
482 /*
483  * Wrapper for malloc allocation routines.
484  */
485 void *
486 objcache_malloc_alloc(void *allocator_args, int ocflags)
487 {
488 	struct objcache_malloc_args *alloc_args = allocator_args;
489 
490 	return (kmalloc(alloc_args->objsize, alloc_args->mtype,
491 		       ocflags & OC_MFLAGS));
492 }
493 
494 void
495 objcache_malloc_free(void *obj, void *allocator_args)
496 {
497 	struct objcache_malloc_args *alloc_args = allocator_args;
498 
499 	kfree(obj, alloc_args->mtype);
500 }
501 
502 /*
503  * Wrapper for allocation policies that pre-allocate at initialization time
504  * and don't do run-time allocation.
505  */
506 void *
507 objcache_nop_alloc(void *allocator_args, int ocflags)
508 {
509 	return (NULL);
510 }
511 
512 void
513 objcache_nop_free(void *obj, void *allocator_args)
514 {
515 }
516 
517 /*
518  * Return an object to the object cache.
519  */
520 void
521 objcache_put(struct objcache *oc, void *obj)
522 {
523 	struct percpu_objcache *cpucache = &oc->cache_percpu[mycpuid];
524 	struct magazine *loadedmag;
525 	struct magazinedepot *depot;
526 
527 	crit_enter();
528 	++cpucache->puts_cumulative;
529 
530 	if (CLUSTER_OF(obj) != myclusterid) {
531 #ifdef notyet
532 		/* use lazy IPI to send object to owning cluster XXX todo */
533 		++cpucache->puts_othercluster;
534 		crit_exit();
535 		return;
536 #endif
537 	}
538 
539 retry:
540 	/*
541 	 * Free slot available in loaded magazine.  This is the hot path.
542 	 * It is lock-free and uses a critical section to block out interrupt
543 	 * handlers on the same processor.
544 	 */
545 	loadedmag = cpucache->loaded_magazine;
546 	if (!MAGAZINE_FULL(loadedmag)) {
547 		loadedmag->objects[loadedmag->rounds++] = obj;
548 		if (cpucache->waiting)
549 			wakeup_mycpu(&oc->depot[myclusterid]);
550 		crit_exit();
551 		return;
552 	}
553 
554 	/*
555 	 * Current magazine full, but previous magazine has room.  XXX
556 	 */
557 	if (!MAGAZINE_FULL(cpucache->previous_magazine)) {
558 		KKASSERT(cpucache->previous_magazine->cleaning +
559 			 cpucache->loaded_magazine->cleaning == 0);
560 		swap(cpucache->loaded_magazine, cpucache->previous_magazine);
561 		loadedmag = cpucache->loaded_magazine;
562 		loadedmag->objects[loadedmag->rounds++] = obj;
563 		if (cpucache->waiting)
564 			wakeup_mycpu(&oc->depot[myclusterid]);
565 		crit_exit();
566 		return;
567 	}
568 
569 	/*
570 	 * Both magazines full.  Get an empty magazine from the depot and
571 	 * move a full loaded magazine to the depot.  Even though the
572 	 * magazine may wind up with space available after we block on
573 	 * the spinlock, we still cycle it through to avoid the non-optimal
574 	 * corner-case.
575 	 *
576 	 * Obtain the depot spinlock.
577 	 */
578 	depot = &oc->depot[myclusterid];
579 	spin_lock_wr(&depot->spin);
580 
581 	/*
582 	 * If an empty magazine is available in the depot, cycle it
583 	 * through and retry.
584 	 */
585 	if (!SLIST_EMPTY(&depot->emptymagazines)) {
586 		KKASSERT(cpucache->previous_magazine->cleaning +
587 			 cpucache->loaded_magazine->cleaning == 0);
588 		loadedmag = cpucache->previous_magazine;
589 		cpucache->previous_magazine = cpucache->loaded_magazine;
590 		cpucache->loaded_magazine = SLIST_FIRST(&depot->emptymagazines);
591 		SLIST_REMOVE_HEAD(&depot->emptymagazines, nextmagazine);
592 
593 		/*
594 		 * Return loadedmag to the depot.  Due to blocking it may
595 		 * not be entirely full and could even be empty.
596 		 */
597 		if (MAGAZINE_EMPTY(loadedmag)) {
598 			SLIST_INSERT_HEAD(&depot->emptymagazines,
599 					  loadedmag, nextmagazine);
600 			spin_unlock_wr(&depot->spin);
601 		} else {
602 			SLIST_INSERT_HEAD(&depot->fullmagazines,
603 					  loadedmag, nextmagazine);
604 			spin_unlock_wr(&depot->spin);
605 			if (depot->waiting)
606 				wakeup(depot);
607 		}
608 		goto retry;
609 	}
610 
611 	/*
612 	 * An empty mag is not available.  This is a corner case which can
613 	 * occur due to cpus holding partially full magazines.  Do not try
614 	 * to allocate a mag, just free the object.
615 	 */
616 	++depot->unallocated_objects;
617 	spin_unlock_wr(&depot->spin);
618 	if (depot->waiting)
619 		wakeup(depot);
620 	crit_exit();
621 	oc->dtor(obj, oc->privdata);
622 	oc->free(obj, oc->allocator_args);
623 }
624 
625 /*
626  * The object is being put back into the cache, but the caller has
627  * indicated that the object is not in any shape to be reused and should
628  * be dtor'd immediately.
629  */
630 void
631 objcache_dtor(struct objcache *oc, void *obj)
632 {
633 	struct magazinedepot *depot;
634 
635 	depot = &oc->depot[myclusterid];
636 	spin_lock_wr(&depot->spin);
637 	++depot->unallocated_objects;
638 	spin_unlock_wr(&depot->spin);
639 	if (depot->waiting)
640 		wakeup(depot);
641 	oc->dtor(obj, oc->privdata);
642 	oc->free(obj, oc->allocator_args);
643 }
644 
645 /*
646  * Deallocate all objects in a magazine and free the magazine if requested.
647  * The magazine must already be disassociated from the depot.
648  *
649  * Must be called with a critical section held when called with a per-cpu
650  * magazine.  The magazine may be indirectly modified during the loop.
651  *
652  * The number of objects freed is returned.
653  */
654 static int
655 mag_purge(struct objcache *oc, struct magazine *mag, int freeit)
656 {
657 	int count;
658 	void *obj;
659 
660 	count = 0;
661 	++mag->cleaning;
662 	while (mag->rounds) {
663 		obj = mag->objects[--mag->rounds];
664 		oc->dtor(obj, oc->privdata);		/* MAY BLOCK */
665 		oc->free(obj, oc->allocator_args);	/* MAY BLOCK */
666 		++count;
667 
668 		/*
669 		 * Cycle for interrupts
670 		 */
671 		if ((count & 15) == 0) {
672 			crit_exit();
673 			crit_enter();
674 		}
675 	}
676 	--mag->cleaning;
677 	if (freeit)
678 		kfree(mag, M_OBJMAG);
679 	return(count);
680 }
681 
682 /*
683  * Disassociate zero or more magazines from a magazine list associated with
684  * the depot, update the depot, and move the magazines to a temporary
685  * list.
686  *
687  * The caller must check the depot for waiters and wake it up, typically
688  * after disposing of the magazines this function loads onto the temporary
689  * list.
690  */
691 static void
692 maglist_disassociate(struct magazinedepot *depot, struct magazinelist *maglist,
693 		     struct magazinelist *tmplist, boolean_t purgeall)
694 {
695 	struct magazine *mag;
696 
697 	while ((mag = SLIST_FIRST(maglist)) != NULL) {
698 		SLIST_REMOVE_HEAD(maglist, nextmagazine);
699 		SLIST_INSERT_HEAD(tmplist, mag, nextmagazine);
700 		depot->unallocated_objects += mag->rounds;
701 	}
702 }
703 
704 /*
705  * Deallocate all magazines and their contents from the passed temporary
706  * list.  The magazines have already been accounted for by their depots.
707  *
708  * The total number of rounds freed is returned.  This number is typically
709  * only used to determine whether a wakeup on the depot is needed or not.
710  */
711 static int
712 maglist_purge(struct objcache *oc, struct magazinelist *maglist)
713 {
714 	struct magazine *mag;
715 	int count = 0;
716 
717 	/*
718 	 * can't use SLIST_FOREACH because blocking releases the depot
719 	 * spinlock
720 	 */
721 	while ((mag = SLIST_FIRST(maglist)) != NULL) {
722 		SLIST_REMOVE_HEAD(maglist, nextmagazine);
723 		count += mag_purge(oc, mag, TRUE);
724 	}
725 	return(count);
726 }
727 
728 /*
729  * De-allocates all magazines on the full and empty magazine lists.
730  *
731  * Because this routine is called with a spinlock held, the magazines
732  * can only be disassociated and moved to a temporary list, not freed.
733  *
734  * The caller is responsible for freeing the magazines.
735  */
736 static void
737 depot_disassociate(struct magazinedepot *depot, struct magazinelist *tmplist)
738 {
739 	maglist_disassociate(depot, &depot->fullmagazines, tmplist, TRUE);
740 	maglist_disassociate(depot, &depot->emptymagazines, tmplist, TRUE);
741 }
742 
743 #ifdef notneeded
744 void
745 objcache_reclaim(struct objcache *oc)
746 {
747 	struct percpu_objcache *cache_percpu = &oc->cache_percpu[myclusterid];
748 	struct magazinedepot *depot = &oc->depot[myclusterid];
749 	struct magazinelist tmplist;
750 	int count;
751 
752 	SLIST_INIT(&tmplist);
753 	crit_enter();
754 	count = mag_purge(oc, cache_percpu->loaded_magazine, FALSE);
755 	count += mag_purge(oc, cache_percpu->previous_magazine, FALSE);
756 	crit_exit();
757 
758 	spin_lock_wr(&depot->spin);
759 	depot->unallocated_objects += count;
760 	depot_disassociate(depot, &tmplist);
761 	spin_unlock_wr(&depot->spin);
762 	count += maglist_purge(oc, &tmplist);
763 	if (count && depot->waiting)
764 		wakeup(depot);
765 }
766 #endif
767 
768 /*
769  * Try to free up some memory.  Return as soon as some free memory is found.
770  * For each object cache on the reclaim list, first try the current per-cpu
771  * cache, then the full magazine depot.
772  */
773 boolean_t
774 objcache_reclaimlist(struct objcache *oclist[], int nlist, int ocflags)
775 {
776 	struct objcache *oc;
777 	struct percpu_objcache *cpucache;
778 	struct magazinedepot *depot;
779 	struct magazinelist tmplist;
780 	int i, count;
781 
782 	SLIST_INIT(&tmplist);
783 
784 	for (i = 0; i < nlist; i++) {
785 		oc = oclist[i];
786 		cpucache = &oc->cache_percpu[mycpuid];
787 		depot = &oc->depot[myclusterid];
788 
789 		crit_enter();
790 		count = mag_purge(oc, cpucache->loaded_magazine, FALSE);
791 		if (count == 0)
792 			count += mag_purge(oc, cpucache->previous_magazine, FALSE);
793 		crit_exit();
794 		if (count > 0) {
795 			spin_lock_wr(&depot->spin);
796 			depot->unallocated_objects += count;
797 			spin_unlock_wr(&depot->spin);
798 			if (depot->waiting)
799 				wakeup(depot);
800 			return (TRUE);
801 		}
802 		spin_lock_wr(&depot->spin);
803 		maglist_disassociate(depot, &depot->fullmagazines,
804 				     &tmplist, FALSE);
805 		spin_unlock_wr(&depot->spin);
806 		count = maglist_purge(oc, &tmplist);
807 		if (count > 0) {
808 			if (depot->waiting)
809 				wakeup(depot);
810 			return (TRUE);
811 		}
812 	}
813 	return (FALSE);
814 }
815 
816 /*
817  * Destroy an object cache.  Must have no existing references.
818  */
819 void
820 objcache_destroy(struct objcache *oc)
821 {
822 	struct percpu_objcache *cache_percpu;
823 	struct magazinedepot *depot;
824 	int clusterid, cpuid;
825 	struct magazinelist tmplist;
826 
827 	spin_lock_wr(&objcachelist_spin);
828 	LIST_REMOVE(oc, oc_next);
829 	spin_unlock_wr(&objcachelist_spin);
830 
831 	SLIST_INIT(&tmplist);
832 	for (clusterid = 0; clusterid < MAXCLUSTERS; clusterid++) {
833 		depot = &oc->depot[clusterid];
834 		spin_lock_wr(&depot->spin);
835 		depot_disassociate(depot, &tmplist);
836 		spin_unlock_wr(&depot->spin);
837 	}
838 	maglist_purge(oc, &tmplist);
839 
840 	for (cpuid = 0; cpuid < ncpus; cpuid++) {
841 		cache_percpu = &oc->cache_percpu[cpuid];
842 
843 		mag_purge(oc, cache_percpu->loaded_magazine, TRUE);
844 		mag_purge(oc, cache_percpu->previous_magazine, TRUE);
845 		cache_percpu->loaded_magazine = NULL;
846 		cache_percpu->previous_magazine = NULL;
847 		/* don't bother adjusting depot->unallocated_objects */
848 	}
849 
850 	kfree(oc->name, M_TEMP);
851 	kfree(oc, M_OBJCACHE);
852 }
853 
854 #if 0
855 /*
856  * Populate the per-cluster depot with elements from a linear block
857  * of memory.  Must be called for individually for each cluster.
858  * Populated depots should not be destroyed.
859  */
860 void
861 objcache_populate_linear(struct objcache *oc, void *base, int nelts, int size)
862 {
863 	char *p = base;
864 	char *end = (char *)base + (nelts * size);
865 	struct magazinedepot *depot = &oc->depot[myclusterid];
866 	struct magazine *emptymag = mag_alloc(depot->magcapcity);
867 
868 	while (p < end) {
869 		emptymag->objects[emptymag->rounds++] = p;
870 		if (MAGAZINE_FULL(emptymag)) {
871 			spin_lock_wr(&depot->spin);
872 			SLIST_INSERT_HEAD(&depot->fullmagazines, emptymag,
873 					  nextmagazine);
874 			depot->unallocated_objects += emptymag->rounds;
875 			spin_unlock_wr(&depot->spin);
876 			if (depot->waiting)
877 				wakeup(depot);
878 			emptymag = mag_alloc(depot->magcapacity);
879 		}
880 		p += size;
881 	}
882 	if (MAGAZINE_EMPTY(emptymag)) {
883 		mag_purge(oc, emptymag, TRUE);
884 	} else {
885 		spin_lock_wr(&depot->spin);
886 		SLIST_INSERT_HEAD(&depot->fullmagazines, emptymag,
887 				  nextmagazine);
888 		depot->unallocated_objects += emptymag->rounds;
889 		spin_unlock_wr(&depot->spin);
890 		if (depot->waiting)
891 			wakeup(depot);
892 		emptymag = mag_alloc(depot->magcapacity);
893 	}
894 }
895 #endif
896 
897 #if 0
898 /*
899  * Check depot contention once a minute.
900  * 2 contested locks per second allowed.
901  */
902 static int objcache_rebalance_period;
903 static const int objcache_contention_rate = 120;
904 static struct callout objcache_callout;
905 
906 #define MAXMAGSIZE 512
907 
908 /*
909  * Check depot contention and increase magazine size if necessary.
910  */
911 static void
912 objcache_timer(void *dummy)
913 {
914 	struct objcache *oc;
915 	struct magazinedepot *depot;
916 	struct magazinelist tmplist;
917 
918 	XXX we need to detect when an objcache is destroyed out from under
919 	    us XXX
920 
921 	SLIST_INIT(&tmplist);
922 
923 	spin_lock_wr(&objcachelist_spin);
924 	LIST_FOREACH(oc, &allobjcaches, oc_next) {
925 		depot = &oc->depot[myclusterid];
926 		if (depot->magcapacity < MAXMAGSIZE) {
927 			if (depot->contested > objcache_contention_rate) {
928 				spin_lock_wr(&depot->spin);
929 				depot_disassociate(depot, &tmplist);
930 				depot->magcapacity *= 2;
931 				spin_unlock_wr(&depot->spin);
932 				kprintf("objcache_timer: increasing cache %s"
933 				       " magsize to %d, contested %d times\n",
934 				    oc->name, depot->magcapacity,
935 				    depot->contested);
936 			}
937 			depot->contested = 0;
938 		}
939 		spin_unlock_wr(&objcachelist_spin);
940 		if (maglist_purge(oc, &tmplist) > 0 && depot->waiting)
941 			wakeup(depot);
942 		spin_lock_wr(&objcachelist_spin);
943 	}
944 	spin_unlock_wr(&objcachelist_spin);
945 
946 	callout_reset(&objcache_callout, objcache_rebalance_period,
947 		      objcache_timer, NULL);
948 }
949 
950 #endif
951 
952 static void
953 objcache_init(void)
954 {
955 	spin_init(&objcachelist_spin);
956 #if 0
957 	callout_init(&objcache_callout);
958 	objcache_rebalance_period = 60 * hz;
959 	callout_reset(&objcache_callout, objcache_rebalance_period,
960 		      objcache_timer, NULL);
961 #endif
962 }
963 SYSINIT(objcache, SI_BOOT2_OBJCACHE, SI_ORDER_FIRST, objcache_init, 0);
964