xref: /dragonfly/sys/kern/kern_objcache.c (revision 8e1c6f81)
1 /*
2  * Copyright (c) 2005 Jeffrey M. Hsu.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Jeffrey M. Hsu.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. Neither the name of The DragonFly Project nor the names of its
16  *    contributors may be used to endorse or promote products derived
17  *    from this software without specific, prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
22  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
23  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
24  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
25  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
26  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
27  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
29  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  *
32  * $DragonFly: src/sys/kern/kern_objcache.c,v 1.22 2008/02/03 13:37:56 nth Exp $
33  */
34 
35 #include <sys/param.h>
36 #include <sys/kernel.h>
37 #include <sys/systm.h>
38 #include <sys/callout.h>
39 #include <sys/globaldata.h>
40 #include <sys/malloc.h>
41 #include <sys/queue.h>
42 #include <sys/objcache.h>
43 #include <sys/spinlock.h>
44 #include <sys/thread.h>
45 #include <sys/thread2.h>
46 #include <sys/spinlock2.h>
47 
48 static MALLOC_DEFINE(M_OBJCACHE, "objcache", "Object Cache");
49 static MALLOC_DEFINE(M_OBJMAG, "objcache magazine", "Object Cache Magazine");
50 
51 #define	INITIAL_MAG_CAPACITY	64
52 
53 struct magazine {
54 	int			 rounds;
55 	int			 capacity;
56 	int			 cleaning;
57 	SLIST_ENTRY(magazine)	 nextmagazine;
58 	void			*objects[];
59 };
60 
61 SLIST_HEAD(magazinelist, magazine);
62 
63 /*
64  * per-cluster cache of magazines
65  *
66  * All fields in this structure are protected by the spinlock.
67  */
68 struct magazinedepot {
69 	/*
70 	 * The per-cpu object caches only exchanges completely full or
71 	 * completely empty magazines with the depot layer, so only have
72 	 * to cache these two types of magazines.
73 	 */
74 	struct magazinelist	fullmagazines;
75 	struct magazinelist	emptymagazines;
76 	int			magcapacity;
77 
78 	/* protect this structure */
79 	struct spinlock		spin;
80 
81 	/* magazines not yet allocated towards limit */
82 	int			unallocated_objects;
83 
84 	/* infrequently used fields */
85 	int			waiting;	/* waiting for another cpu to
86 						 * return a full magazine to
87 						 * the depot */
88 	int			contested;	/* depot contention count */
89 };
90 
91 /*
92  * per-cpu object cache
93  * All fields in this structure are protected by crit_enter().
94  */
95 struct percpu_objcache {
96 	struct magazine	*loaded_magazine;	/* active magazine */
97 	struct magazine	*previous_magazine;	/* backup magazine */
98 
99 	/* statistics */
100 	int		gets_cumulative;	/* total calls to get */
101 	int		gets_null;		/* objcache_get returned NULL */
102 	int		puts_cumulative;	/* total calls to put */
103 	int		puts_othercluster;	/* returned to other cluster */
104 
105 	/* infrequently used fields */
106 	int		waiting;	/* waiting for a thread on this cpu to
107 					 * return an obj to the per-cpu cache */
108 };
109 
110 /* only until we have NUMA cluster topology information XXX */
111 #define MAXCLUSTERS 1
112 #define myclusterid 0
113 #define CLUSTER_OF(obj) 0
114 
115 /*
116  * Two-level object cache consisting of NUMA cluster-level depots of
117  * fully loaded or completely empty magazines and cpu-level caches of
118  * individual objects.
119  */
120 struct objcache {
121 	char			*name;
122 
123 	/* object constructor and destructor from blank storage */
124 	objcache_ctor_fn	*ctor;
125 	objcache_dtor_fn	*dtor;
126 	void			*privdata;
127 
128 	/* interface to underlying allocator */
129 	objcache_alloc_fn	*alloc;
130 	objcache_free_fn	*free;
131 	void			*allocator_args;
132 
133 	LIST_ENTRY(objcache)	oc_next;
134 	int			exhausted;	/* oops */
135 
136 	/* NUMA-cluster level caches */
137 	struct magazinedepot	depot[MAXCLUSTERS];
138 
139 	struct percpu_objcache	cache_percpu[];		/* per-cpu caches */
140 };
141 
142 static struct spinlock objcachelist_spin;
143 static LIST_HEAD(objcachelist, objcache) allobjcaches;
144 
145 static struct magazine *
146 mag_alloc(int capacity)
147 {
148 	struct magazine *mag;
149 
150 	mag = kmalloc(__offsetof(struct magazine, objects[capacity]),
151 			M_OBJMAG, M_INTWAIT | M_ZERO);
152 	mag->capacity = capacity;
153 	mag->rounds = 0;
154 	mag->cleaning = 0;
155 	return (mag);
156 }
157 
158 /*
159  * Utility routine for objects that don't require any de-construction.
160  */
161 
162 static void
163 null_dtor(void *obj, void *privdata)
164 {
165 	/* do nothing */
166 }
167 
168 static boolean_t
169 null_ctor(void *obj, void *privdata, int ocflags)
170 {
171 	return TRUE;
172 }
173 
174 /*
175  * Create an object cache.
176  */
177 struct objcache *
178 objcache_create(const char *name, int cluster_limit, int mag_capacity,
179 		objcache_ctor_fn *ctor, objcache_dtor_fn *dtor, void *privdata,
180 		objcache_alloc_fn *alloc, objcache_free_fn *free,
181 		void *allocator_args)
182 {
183 	struct objcache *oc;
184 	struct magazinedepot *depot;
185 	int cpuid;
186 	int need;
187 	int factor;
188 	int nmagdepot;
189 	int i;
190 
191 	/* allocate object cache structure */
192 	oc = kmalloc(__offsetof(struct objcache, cache_percpu[ncpus]),
193 		    M_OBJCACHE, M_WAITOK | M_ZERO);
194 	oc->name = kstrdup(name, M_TEMP);
195 	oc->ctor = ctor ? ctor : null_ctor;
196 	oc->dtor = dtor ? dtor : null_dtor;
197 	oc->privdata = privdata;
198 	oc->free = free;
199 	oc->allocator_args = allocator_args;
200 
201 	/* initialize depots */
202 	depot = &oc->depot[0];
203 
204 	spin_init(&depot->spin);
205 	SLIST_INIT(&depot->fullmagazines);
206 	SLIST_INIT(&depot->emptymagazines);
207 
208 	if (mag_capacity == 0)
209 		mag_capacity = INITIAL_MAG_CAPACITY;
210 
211 	/*
212 	 * The cluster_limit must be sufficient to have three magazines per
213 	 * cpu.  If we have a lot of cpus the mag_capacity might just be
214 	 * too big, reduce it if necessary.
215 	 *
216 	 * Each cpu can hold up to two magazines, with the remainder in the
217 	 * depot.  If many objects are allocated fewer magazines are
218 	 * available.  We have to make sure that each cpu has access to
219 	 * free objects until the object cache hits 75% of its limit.
220 	 */
221 	if (cluster_limit == 0) {
222 		depot->unallocated_objects = -1;
223 	} else {
224 		factor = 8;
225 		need = mag_capacity * ncpus * factor;
226 		if (cluster_limit < need && mag_capacity > 16) {
227 			kprintf("objcache(%s): too small for ncpus"
228 				", adjusting mag_capacity %d->",
229 				name, mag_capacity);
230 			while (need > cluster_limit && mag_capacity > 16) {
231 				mag_capacity >>= 1;
232 				need = mag_capacity * ncpus * factor;
233 			}
234 			kprintf("%d\n", mag_capacity);
235 		}
236 		if (cluster_limit < need) {
237 			kprintf("objcache(%s): too small for ncpus"
238 				", adjusting cluster_limit %d->%d\n",
239 				name, cluster_limit, need);
240 			cluster_limit = need;
241 		}
242 		depot->unallocated_objects = cluster_limit;
243 	}
244 	depot->magcapacity = mag_capacity;
245 	oc->alloc = alloc;
246 
247 	/* initialize per-cpu caches */
248 	for (cpuid = 0; cpuid < ncpus; cpuid++) {
249 		struct percpu_objcache *cache_percpu = &oc->cache_percpu[cpuid];
250 
251 		cache_percpu->loaded_magazine = mag_alloc(mag_capacity);
252 		cache_percpu->previous_magazine = mag_alloc(mag_capacity);
253 	}
254 
255 	/* compute initial number of empty magazines in depot */
256 	nmagdepot = 0;
257 	if (cluster_limit > 0) {
258 		/* max number of magazines in depot */
259 		nmagdepot = (cluster_limit - ncpus * 2 * mag_capacity) /
260 				mag_capacity;
261 
262 		/* retain at most 50% of the limit */
263 		nmagdepot /= 2;
264 	}
265 	/* bound result to acceptable range */
266 	if (nmagdepot < 2)
267 		nmagdepot = 2;
268 	if (nmagdepot > 10)
269 		nmagdepot = 10;
270 
271 	/* put empty magazines in depot */
272 	for (i = 0; i < nmagdepot; i++) {
273 		struct magazine *mag = mag_alloc(mag_capacity);
274 		SLIST_INSERT_HEAD(&depot->emptymagazines, mag, nextmagazine);
275 	}
276 
277 	spin_lock_wr(&objcachelist_spin);
278 	LIST_INSERT_HEAD(&allobjcaches, oc, oc_next);
279 	spin_unlock_wr(&objcachelist_spin);
280 
281 	return (oc);
282 }
283 
284 struct objcache *
285 objcache_create_simple(malloc_type_t mtype, size_t objsize)
286 {
287 	struct objcache_malloc_args *margs;
288 	struct objcache *oc;
289 
290 	margs = kmalloc(sizeof(*margs), M_OBJCACHE, M_WAITOK|M_ZERO);
291 	margs->objsize = objsize;
292 	margs->mtype = mtype;
293 	oc = objcache_create(mtype->ks_shortdesc, 0, 0,
294 			     NULL, NULL, NULL,
295 			     objcache_malloc_alloc, objcache_malloc_free,
296 			     margs);
297 	return (oc);
298 }
299 
300 struct objcache *
301 objcache_create_mbacked(malloc_type_t mtype, size_t objsize,
302 			int cluster_limit, int mag_capacity,
303 			objcache_ctor_fn *ctor, objcache_dtor_fn *dtor,
304 			void *privdata)
305 {
306 	struct objcache_malloc_args *margs;
307 	struct objcache *oc;
308 
309 	margs = kmalloc(sizeof(*margs), M_OBJCACHE, M_WAITOK|M_ZERO);
310 	margs->objsize = objsize;
311 	margs->mtype = mtype;
312 	oc = objcache_create(mtype->ks_shortdesc,
313 			     cluster_limit, mag_capacity,
314 			     ctor, dtor, privdata,
315 			     objcache_malloc_alloc, objcache_malloc_free,
316 			     margs);
317 	return(oc);
318 }
319 
320 
321 #define MAGAZINE_EMPTY(mag)	(mag->rounds == 0)
322 #define MAGAZINE_NOTEMPTY(mag)	(mag->rounds != 0)
323 #define MAGAZINE_FULL(mag)	(mag->rounds == mag->capacity)
324 
325 #define	swap(x, y)	({ struct magazine *t = x; x = y; y = t; })
326 
327 /*
328  * Get an object from the object cache.
329  *
330  * WARNING!  ocflags are only used when we have to go to the underlying
331  * allocator, so we cannot depend on flags such as M_ZERO.
332  */
333 void *
334 objcache_get(struct objcache *oc, int ocflags)
335 {
336 	struct percpu_objcache *cpucache = &oc->cache_percpu[mycpuid];
337 	struct magazine *loadedmag;
338 	struct magazine *emptymag;
339 	void *obj;
340 	struct magazinedepot *depot;
341 
342 	KKASSERT((ocflags & M_ZERO) == 0);
343 	crit_enter();
344 	++cpucache->gets_cumulative;
345 
346 retry:
347 	/*
348 	 * Loaded magazine has an object.  This is the hot path.
349 	 * It is lock-free and uses a critical section to block
350 	 * out interrupt handlers on the same processor.
351 	 */
352 	loadedmag = cpucache->loaded_magazine;
353 	if (MAGAZINE_NOTEMPTY(loadedmag)) {
354 		obj = loadedmag->objects[--loadedmag->rounds];
355 		crit_exit();
356 		return (obj);
357 	}
358 
359 	/* Previous magazine has an object. */
360 	if (MAGAZINE_NOTEMPTY(cpucache->previous_magazine)) {
361 		KKASSERT(cpucache->previous_magazine->cleaning +
362 			 cpucache->loaded_magazine->cleaning == 0);
363 		swap(cpucache->loaded_magazine, cpucache->previous_magazine);
364 		loadedmag = cpucache->loaded_magazine;
365 		obj = loadedmag->objects[--loadedmag->rounds];
366 		crit_exit();
367 		return (obj);
368 	}
369 
370 	/*
371 	 * Both magazines empty.  Get a full magazine from the depot and
372 	 * move one of the empty ones to the depot.
373 	 *
374 	 * Obtain the depot spinlock.
375 	 *
376 	 * NOTE: Beyond this point, M_* flags are handled via oc->alloc()
377 	 */
378 	depot = &oc->depot[myclusterid];
379 	spin_lock_wr(&depot->spin);
380 
381 	/*
382 	 * Recheck the cpucache after obtaining the depot spinlock.  This
383 	 * shouldn't be necessary now but don't take any chances.
384 	 */
385 	if (MAGAZINE_NOTEMPTY(cpucache->loaded_magazine) ||
386 	    MAGAZINE_NOTEMPTY(cpucache->previous_magazine)
387 	) {
388 		spin_unlock_wr(&depot->spin);
389 		goto retry;
390 	}
391 
392 	/* Check if depot has a full magazine. */
393 	if (!SLIST_EMPTY(&depot->fullmagazines)) {
394 		emptymag = cpucache->previous_magazine;
395 		cpucache->previous_magazine = cpucache->loaded_magazine;
396 		cpucache->loaded_magazine = SLIST_FIRST(&depot->fullmagazines);
397 		SLIST_REMOVE_HEAD(&depot->fullmagazines, nextmagazine);
398 
399 		/*
400 		 * Return emptymag to the depot.
401 		 */
402 		KKASSERT(MAGAZINE_EMPTY(emptymag));
403 		SLIST_INSERT_HEAD(&depot->emptymagazines,
404 				  emptymag, nextmagazine);
405 		spin_unlock_wr(&depot->spin);
406 		goto retry;
407 	}
408 
409 	/*
410 	 * The depot does not have any non-empty magazines.  If we have
411 	 * not hit our object limit we can allocate a new object using
412 	 * the back-end allocator.
413 	 *
414 	 * note: unallocated_objects can be initialized to -1, which has
415 	 * the effect of removing any allocation limits.
416 	 */
417 	if (depot->unallocated_objects) {
418 		--depot->unallocated_objects;
419 		spin_unlock_wr(&depot->spin);
420 		crit_exit();
421 
422 		obj = oc->alloc(oc->allocator_args, ocflags);
423 		if (obj) {
424 			if (oc->ctor(obj, oc->privdata, ocflags))
425 				return (obj);
426 			oc->free(obj, oc->allocator_args);
427 			spin_lock_wr(&depot->spin);
428 			++depot->unallocated_objects;
429 			spin_unlock_wr(&depot->spin);
430 			if (depot->waiting)
431 				wakeup(depot);
432 			obj = NULL;
433 		}
434 		if (obj == NULL) {
435 			crit_enter();
436 			/*
437 			 * makes debugging easier when gets_cumulative does
438 			 * not include gets_null.
439 			 */
440 			++cpucache->gets_null;
441 			--cpucache->gets_cumulative;
442 			crit_exit();
443 		}
444 		return(obj);
445 	}
446 	if (oc->exhausted == 0) {
447 		kprintf("Warning, objcache(%s): Exhausted!\n", oc->name);
448 		oc->exhausted = 1;
449 	}
450 
451 	/*
452 	 * Otherwise block if allowed to.
453 	 */
454 	if ((ocflags & (M_WAITOK|M_NULLOK)) == M_WAITOK) {
455 		++cpucache->waiting;
456 		++depot->waiting;
457 		msleep(depot, &depot->spin, 0, "objcache_get", 0);
458 		--cpucache->waiting;
459 		--depot->waiting;
460 		spin_unlock_wr(&depot->spin);
461 		goto retry;
462 	}
463 
464 	/*
465 	 * Otherwise fail
466 	 */
467 	++cpucache->gets_null;
468 	--cpucache->gets_cumulative;
469 	crit_exit();
470 	spin_unlock_wr(&depot->spin);
471 	return (NULL);
472 }
473 
474 /*
475  * Wrapper for malloc allocation routines.
476  */
477 void *
478 objcache_malloc_alloc(void *allocator_args, int ocflags)
479 {
480 	struct objcache_malloc_args *alloc_args = allocator_args;
481 
482 	return (kmalloc(alloc_args->objsize, alloc_args->mtype,
483 		       ocflags & OC_MFLAGS));
484 }
485 
486 void
487 objcache_malloc_free(void *obj, void *allocator_args)
488 {
489 	struct objcache_malloc_args *alloc_args = allocator_args;
490 
491 	kfree(obj, alloc_args->mtype);
492 }
493 
494 /*
495  * Wrapper for allocation policies that pre-allocate at initialization time
496  * and don't do run-time allocation.
497  */
498 void *
499 objcache_nop_alloc(void *allocator_args, int ocflags)
500 {
501 	return (NULL);
502 }
503 
504 void
505 objcache_nop_free(void *obj, void *allocator_args)
506 {
507 }
508 
509 /*
510  * Return an object to the object cache.
511  */
512 void
513 objcache_put(struct objcache *oc, void *obj)
514 {
515 	struct percpu_objcache *cpucache = &oc->cache_percpu[mycpuid];
516 	struct magazine *loadedmag;
517 	struct magazinedepot *depot;
518 
519 	crit_enter();
520 	++cpucache->puts_cumulative;
521 
522 	if (CLUSTER_OF(obj) != myclusterid) {
523 #ifdef notyet
524 		/* use lazy IPI to send object to owning cluster XXX todo */
525 		++cpucache->puts_othercluster;
526 		crit_exit();
527 		return;
528 #endif
529 	}
530 
531 retry:
532 	/*
533 	 * Free slot available in loaded magazine.  This is the hot path.
534 	 * It is lock-free and uses a critical section to block out interrupt
535 	 * handlers on the same processor.
536 	 */
537 	loadedmag = cpucache->loaded_magazine;
538 	if (!MAGAZINE_FULL(loadedmag)) {
539 		loadedmag->objects[loadedmag->rounds++] = obj;
540 		if (cpucache->waiting)
541 			wakeup_mycpu(&oc->depot[myclusterid]);
542 		crit_exit();
543 		return;
544 	}
545 
546 	/*
547 	 * Current magazine full, but previous magazine has room.  XXX
548 	 */
549 	if (!MAGAZINE_FULL(cpucache->previous_magazine)) {
550 		KKASSERT(cpucache->previous_magazine->cleaning +
551 			 cpucache->loaded_magazine->cleaning == 0);
552 		swap(cpucache->loaded_magazine, cpucache->previous_magazine);
553 		loadedmag = cpucache->loaded_magazine;
554 		loadedmag->objects[loadedmag->rounds++] = obj;
555 		if (cpucache->waiting)
556 			wakeup_mycpu(&oc->depot[myclusterid]);
557 		crit_exit();
558 		return;
559 	}
560 
561 	/*
562 	 * Both magazines full.  Get an empty magazine from the depot and
563 	 * move a full loaded magazine to the depot.  Even though the
564 	 * magazine may wind up with space available after we block on
565 	 * the spinlock, we still cycle it through to avoid the non-optimal
566 	 * corner-case.
567 	 *
568 	 * Obtain the depot spinlock.
569 	 */
570 	depot = &oc->depot[myclusterid];
571 	spin_lock_wr(&depot->spin);
572 
573 	/*
574 	 * If an empty magazine is available in the depot, cycle it
575 	 * through and retry.
576 	 */
577 	if (!SLIST_EMPTY(&depot->emptymagazines)) {
578 		KKASSERT(cpucache->previous_magazine->cleaning +
579 			 cpucache->loaded_magazine->cleaning == 0);
580 		loadedmag = cpucache->previous_magazine;
581 		cpucache->previous_magazine = cpucache->loaded_magazine;
582 		cpucache->loaded_magazine = SLIST_FIRST(&depot->emptymagazines);
583 		SLIST_REMOVE_HEAD(&depot->emptymagazines, nextmagazine);
584 
585 		/*
586 		 * Return loadedmag to the depot.  Due to blocking it may
587 		 * not be entirely full and could even be empty.
588 		 */
589 		if (MAGAZINE_EMPTY(loadedmag)) {
590 			SLIST_INSERT_HEAD(&depot->emptymagazines,
591 					  loadedmag, nextmagazine);
592 			spin_unlock_wr(&depot->spin);
593 		} else {
594 			SLIST_INSERT_HEAD(&depot->fullmagazines,
595 					  loadedmag, nextmagazine);
596 			spin_unlock_wr(&depot->spin);
597 			if (depot->waiting)
598 				wakeup(depot);
599 		}
600 		goto retry;
601 	}
602 
603 	/*
604 	 * An empty mag is not available.  This is a corner case which can
605 	 * occur due to cpus holding partially full magazines.  Do not try
606 	 * to allocate a mag, just free the object.
607 	 */
608 	++depot->unallocated_objects;
609 	spin_unlock_wr(&depot->spin);
610 	if (depot->waiting)
611 		wakeup(depot);
612 	crit_exit();
613 	oc->dtor(obj, oc->privdata);
614 	oc->free(obj, oc->allocator_args);
615 }
616 
617 /*
618  * The object is being put back into the cache, but the caller has
619  * indicated that the object is not in any shape to be reused and should
620  * be dtor'd immediately.
621  */
622 void
623 objcache_dtor(struct objcache *oc, void *obj)
624 {
625 	struct magazinedepot *depot;
626 
627 	depot = &oc->depot[myclusterid];
628 	spin_lock_wr(&depot->spin);
629 	++depot->unallocated_objects;
630 	spin_unlock_wr(&depot->spin);
631 	if (depot->waiting)
632 		wakeup(depot);
633 	oc->dtor(obj, oc->privdata);
634 	oc->free(obj, oc->allocator_args);
635 }
636 
637 /*
638  * Deallocate all objects in a magazine and free the magazine if requested.
639  * The magazine must already be disassociated from the depot.
640  *
641  * Must be called with a critical section held when called with a per-cpu
642  * magazine.  The magazine may be indirectly modified during the loop.
643  *
644  * The number of objects freed is returned.
645  */
646 static int
647 mag_purge(struct objcache *oc, struct magazine *mag, int freeit)
648 {
649 	int count;
650 	void *obj;
651 
652 	count = 0;
653 	++mag->cleaning;
654 	while (mag->rounds) {
655 		obj = mag->objects[--mag->rounds];
656 		oc->dtor(obj, oc->privdata);		/* MAY BLOCK */
657 		oc->free(obj, oc->allocator_args);	/* MAY BLOCK */
658 		++count;
659 
660 		/*
661 		 * Cycle for interrupts
662 		 */
663 		if ((count & 15) == 0) {
664 			crit_exit();
665 			crit_enter();
666 		}
667 	}
668 	--mag->cleaning;
669 	if (freeit)
670 		kfree(mag, M_OBJMAG);
671 	return(count);
672 }
673 
674 /*
675  * Disassociate zero or more magazines from a magazine list associated with
676  * the depot, update the depot, and move the magazines to a temporary
677  * list.
678  *
679  * The caller must check the depot for waiters and wake it up, typically
680  * after disposing of the magazines this function loads onto the temporary
681  * list.
682  */
683 static void
684 maglist_disassociate(struct magazinedepot *depot, struct magazinelist *maglist,
685 		     struct magazinelist *tmplist, boolean_t purgeall)
686 {
687 	struct magazine *mag;
688 
689 	while ((mag = SLIST_FIRST(maglist)) != NULL) {
690 		SLIST_REMOVE_HEAD(maglist, nextmagazine);
691 		SLIST_INSERT_HEAD(tmplist, mag, nextmagazine);
692 		depot->unallocated_objects += mag->rounds;
693 	}
694 }
695 
696 /*
697  * Deallocate all magazines and their contents from the passed temporary
698  * list.  The magazines have already been accounted for by their depots.
699  *
700  * The total number of rounds freed is returned.  This number is typically
701  * only used to determine whether a wakeup on the depot is needed or not.
702  */
703 static int
704 maglist_purge(struct objcache *oc, struct magazinelist *maglist)
705 {
706 	struct magazine *mag;
707 	int count = 0;
708 
709 	/*
710 	 * can't use SLIST_FOREACH because blocking releases the depot
711 	 * spinlock
712 	 */
713 	while ((mag = SLIST_FIRST(maglist)) != NULL) {
714 		SLIST_REMOVE_HEAD(maglist, nextmagazine);
715 		count += mag_purge(oc, mag, TRUE);
716 	}
717 	return(count);
718 }
719 
720 /*
721  * De-allocates all magazines on the full and empty magazine lists.
722  *
723  * Because this routine is called with a spinlock held, the magazines
724  * can only be disassociated and moved to a temporary list, not freed.
725  *
726  * The caller is responsible for freeing the magazines.
727  */
728 static void
729 depot_disassociate(struct magazinedepot *depot, struct magazinelist *tmplist)
730 {
731 	maglist_disassociate(depot, &depot->fullmagazines, tmplist, TRUE);
732 	maglist_disassociate(depot, &depot->emptymagazines, tmplist, TRUE);
733 }
734 
735 #ifdef notneeded
736 void
737 objcache_reclaim(struct objcache *oc)
738 {
739 	struct percpu_objcache *cache_percpu = &oc->cache_percpu[myclusterid];
740 	struct magazinedepot *depot = &oc->depot[myclusterid];
741 	struct magazinelist tmplist;
742 	int count;
743 
744 	SLIST_INIT(&tmplist);
745 	crit_enter();
746 	count = mag_purge(oc, cache_percpu->loaded_magazine, FALSE);
747 	count += mag_purge(oc, cache_percpu->previous_magazine, FALSE);
748 	crit_exit();
749 
750 	spin_lock_wr(&depot->spin);
751 	depot->unallocated_objects += count;
752 	depot_disassociate(depot, &tmplist);
753 	spin_unlock_wr(&depot->spin);
754 	count += maglist_purge(oc, &tmplist);
755 	if (count && depot->waiting)
756 		wakeup(depot);
757 }
758 #endif
759 
760 /*
761  * Try to free up some memory.  Return as soon as some free memory is found.
762  * For each object cache on the reclaim list, first try the current per-cpu
763  * cache, then the full magazine depot.
764  */
765 boolean_t
766 objcache_reclaimlist(struct objcache *oclist[], int nlist, int ocflags)
767 {
768 	struct objcache *oc;
769 	struct percpu_objcache *cpucache;
770 	struct magazinedepot *depot;
771 	struct magazinelist tmplist;
772 	int i, count;
773 
774 	SLIST_INIT(&tmplist);
775 
776 	for (i = 0; i < nlist; i++) {
777 		oc = oclist[i];
778 		cpucache = &oc->cache_percpu[mycpuid];
779 		depot = &oc->depot[myclusterid];
780 
781 		crit_enter();
782 		count = mag_purge(oc, cpucache->loaded_magazine, FALSE);
783 		if (count == 0)
784 			count += mag_purge(oc, cpucache->previous_magazine, FALSE);
785 		crit_exit();
786 		if (count > 0) {
787 			spin_lock_wr(&depot->spin);
788 			depot->unallocated_objects += count;
789 			spin_unlock_wr(&depot->spin);
790 			if (depot->waiting)
791 				wakeup(depot);
792 			return (TRUE);
793 		}
794 		spin_lock_wr(&depot->spin);
795 		maglist_disassociate(depot, &depot->fullmagazines,
796 				     &tmplist, FALSE);
797 		spin_unlock_wr(&depot->spin);
798 		count = maglist_purge(oc, &tmplist);
799 		if (count > 0) {
800 			if (depot->waiting)
801 				wakeup(depot);
802 			return (TRUE);
803 		}
804 	}
805 	return (FALSE);
806 }
807 
808 /*
809  * Destroy an object cache.  Must have no existing references.
810  */
811 void
812 objcache_destroy(struct objcache *oc)
813 {
814 	struct percpu_objcache *cache_percpu;
815 	struct magazinedepot *depot;
816 	int clusterid, cpuid;
817 	struct magazinelist tmplist;
818 
819 	spin_lock_wr(&objcachelist_spin);
820 	LIST_REMOVE(oc, oc_next);
821 	spin_unlock_wr(&objcachelist_spin);
822 
823 	SLIST_INIT(&tmplist);
824 	for (clusterid = 0; clusterid < MAXCLUSTERS; clusterid++) {
825 		depot = &oc->depot[clusterid];
826 		spin_lock_wr(&depot->spin);
827 		depot_disassociate(depot, &tmplist);
828 		spin_unlock_wr(&depot->spin);
829 	}
830 	maglist_purge(oc, &tmplist);
831 
832 	for (cpuid = 0; cpuid < ncpus; cpuid++) {
833 		cache_percpu = &oc->cache_percpu[cpuid];
834 
835 		mag_purge(oc, cache_percpu->loaded_magazine, TRUE);
836 		mag_purge(oc, cache_percpu->previous_magazine, TRUE);
837 		cache_percpu->loaded_magazine = NULL;
838 		cache_percpu->previous_magazine = NULL;
839 		/* don't bother adjusting depot->unallocated_objects */
840 	}
841 
842 	kfree(oc->name, M_TEMP);
843 	kfree(oc, M_OBJCACHE);
844 }
845 
846 #if 0
847 /*
848  * Populate the per-cluster depot with elements from a linear block
849  * of memory.  Must be called for individually for each cluster.
850  * Populated depots should not be destroyed.
851  */
852 void
853 objcache_populate_linear(struct objcache *oc, void *base, int nelts, int size)
854 {
855 	char *p = base;
856 	char *end = (char *)base + (nelts * size);
857 	struct magazinedepot *depot = &oc->depot[myclusterid];
858 	struct magazine *emptymag = mag_alloc(depot->magcapcity);
859 
860 	while (p < end) {
861 		emptymag->objects[emptymag->rounds++] = p;
862 		if (MAGAZINE_FULL(emptymag)) {
863 			spin_lock_wr(&depot->spin);
864 			SLIST_INSERT_HEAD(&depot->fullmagazines, emptymag,
865 					  nextmagazine);
866 			depot->unallocated_objects += emptymag->rounds;
867 			spin_unlock_wr(&depot->spin);
868 			if (depot->waiting)
869 				wakeup(depot);
870 			emptymag = mag_alloc(depot->magcapacity);
871 		}
872 		p += size;
873 	}
874 	if (MAGAZINE_EMPTY(emptymag)) {
875 		mag_purge(oc, emptymag, TRUE);
876 	} else {
877 		spin_lock_wr(&depot->spin);
878 		SLIST_INSERT_HEAD(&depot->fullmagazines, emptymag,
879 				  nextmagazine);
880 		depot->unallocated_objects += emptymag->rounds;
881 		spin_unlock_wr(&depot->spin);
882 		if (depot->waiting)
883 			wakeup(depot);
884 		emptymag = mag_alloc(depot->magcapacity);
885 	}
886 }
887 #endif
888 
889 #if 0
890 /*
891  * Check depot contention once a minute.
892  * 2 contested locks per second allowed.
893  */
894 static int objcache_rebalance_period;
895 static const int objcache_contention_rate = 120;
896 static struct callout objcache_callout;
897 
898 #define MAXMAGSIZE 512
899 
900 /*
901  * Check depot contention and increase magazine size if necessary.
902  */
903 static void
904 objcache_timer(void *dummy)
905 {
906 	struct objcache *oc;
907 	struct magazinedepot *depot;
908 	struct magazinelist tmplist;
909 
910 	XXX we need to detect when an objcache is destroyed out from under
911 	    us XXX
912 
913 	SLIST_INIT(&tmplist);
914 
915 	spin_lock_wr(&objcachelist_spin);
916 	LIST_FOREACH(oc, &allobjcaches, oc_next) {
917 		depot = &oc->depot[myclusterid];
918 		if (depot->magcapacity < MAXMAGSIZE) {
919 			if (depot->contested > objcache_contention_rate) {
920 				spin_lock_wr(&depot->spin);
921 				depot_disassociate(depot, &tmplist);
922 				depot->magcapacity *= 2;
923 				spin_unlock_wr(&depot->spin);
924 				kprintf("objcache_timer: increasing cache %s"
925 				       " magsize to %d, contested %d times\n",
926 				    oc->name, depot->magcapacity,
927 				    depot->contested);
928 			}
929 			depot->contested = 0;
930 		}
931 		spin_unlock_wr(&objcachelist_spin);
932 		if (maglist_purge(oc, &tmplist) > 0 && depot->waiting)
933 			wakeup(depot);
934 		spin_lock_wr(&objcachelist_spin);
935 	}
936 	spin_unlock_wr(&objcachelist_spin);
937 
938 	callout_reset(&objcache_callout, objcache_rebalance_period,
939 		      objcache_timer, NULL);
940 }
941 
942 #endif
943 
944 static void
945 objcache_init(void)
946 {
947 	spin_init(&objcachelist_spin);
948 #if 0
949 	callout_init(&objcache_callout);
950 	objcache_rebalance_period = 60 * hz;
951 	callout_reset(&objcache_callout, objcache_rebalance_period,
952 		      objcache_timer, NULL);
953 #endif
954 }
955 SYSINIT(objcache, SI_BOOT2_OBJCACHE, SI_ORDER_FIRST, objcache_init, 0);
956