xref: /dragonfly/sys/kern/kern_objcache.c (revision 52f9f0d9)
1 /*
2  * Copyright (c) 2005 Jeffrey M. Hsu.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Jeffrey M. Hsu.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. Neither the name of The DragonFly Project nor the names of its
16  *    contributors may be used to endorse or promote products derived
17  *    from this software without specific, prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
22  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
23  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
24  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
25  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
26  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
27  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
29  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  *
32  * $DragonFly: src/sys/kern/kern_objcache.c,v 1.23 2008/10/26 04:29:19 sephe Exp $
33  */
34 
35 #include <sys/param.h>
36 #include <sys/kernel.h>
37 #include <sys/systm.h>
38 #include <sys/callout.h>
39 #include <sys/globaldata.h>
40 #include <sys/malloc.h>
41 #include <sys/queue.h>
42 #include <sys/objcache.h>
43 #include <sys/spinlock.h>
44 #include <sys/thread.h>
45 #include <sys/thread2.h>
46 #include <sys/spinlock2.h>
47 
48 static MALLOC_DEFINE(M_OBJCACHE, "objcache", "Object Cache");
49 static MALLOC_DEFINE(M_OBJMAG, "objcache magazine", "Object Cache Magazine");
50 
51 #define	INITIAL_MAG_CAPACITY	64
52 
53 struct magazine {
54 	int			 rounds;
55 	int			 capacity;
56 	SLIST_ENTRY(magazine)	 nextmagazine;
57 	void			*objects[];
58 };
59 
60 SLIST_HEAD(magazinelist, magazine);
61 
62 /*
63  * per-cluster cache of magazines
64  *
65  * All fields in this structure are protected by the spinlock.
66  */
67 struct magazinedepot {
68 	/*
69 	 * The per-cpu object caches only exchanges completely full or
70 	 * completely empty magazines with the depot layer, so only have
71 	 * to cache these two types of magazines.
72 	 */
73 	struct magazinelist	fullmagazines;
74 	struct magazinelist	emptymagazines;
75 	int			magcapacity;
76 
77 	/* protect this structure */
78 	struct spinlock		spin;
79 
80 	/* magazines not yet allocated towards limit */
81 	int			unallocated_objects;
82 
83 	/* infrequently used fields */
84 	int			waiting;	/* waiting for another cpu to
85 						 * return a full magazine to
86 						 * the depot */
87 	int			contested;	/* depot contention count */
88 };
89 
90 /*
91  * per-cpu object cache
92  * All fields in this structure are protected by crit_enter().
93  */
94 struct percpu_objcache {
95 	struct magazine	*loaded_magazine;	/* active magazine */
96 	struct magazine	*previous_magazine;	/* backup magazine */
97 
98 	/* statistics */
99 	int		gets_cumulative;	/* total calls to get */
100 	int		gets_null;		/* objcache_get returned NULL */
101 	int		puts_cumulative;	/* total calls to put */
102 	int		puts_othercluster;	/* returned to other cluster */
103 
104 	/* infrequently used fields */
105 	int		waiting;	/* waiting for a thread on this cpu to
106 					 * return an obj to the per-cpu cache */
107 };
108 
109 /* only until we have NUMA cluster topology information XXX */
110 #define MAXCLUSTERS 1
111 #define myclusterid 0
112 #define CLUSTER_OF(obj) 0
113 
114 /*
115  * Two-level object cache consisting of NUMA cluster-level depots of
116  * fully loaded or completely empty magazines and cpu-level caches of
117  * individual objects.
118  */
119 struct objcache {
120 	char			*name;
121 
122 	/* object constructor and destructor from blank storage */
123 	objcache_ctor_fn	*ctor;
124 	objcache_dtor_fn	*dtor;
125 	void			*privdata;
126 
127 	/* interface to underlying allocator */
128 	objcache_alloc_fn	*alloc;
129 	objcache_free_fn	*free;
130 	void			*allocator_args;
131 
132 	LIST_ENTRY(objcache)	oc_next;
133 	int			exhausted;	/* oops */
134 
135 	/* NUMA-cluster level caches */
136 	struct magazinedepot	depot[MAXCLUSTERS];
137 
138 	struct percpu_objcache	cache_percpu[];		/* per-cpu caches */
139 };
140 
141 static struct spinlock objcachelist_spin;
142 static LIST_HEAD(objcachelist, objcache) allobjcaches;
143 
144 static struct magazine *
145 mag_alloc(int capacity)
146 {
147 	struct magazine *mag;
148 
149 	mag = kmalloc(__offsetof(struct magazine, objects[capacity]),
150 			M_OBJMAG, M_INTWAIT | M_ZERO);
151 	mag->capacity = capacity;
152 	mag->rounds = 0;
153 	return (mag);
154 }
155 
156 /*
157  * Utility routine for objects that don't require any de-construction.
158  */
159 
160 static void
161 null_dtor(void *obj, void *privdata)
162 {
163 	/* do nothing */
164 }
165 
166 static boolean_t
167 null_ctor(void *obj, void *privdata, int ocflags)
168 {
169 	return TRUE;
170 }
171 
172 /*
173  * Create an object cache.
174  */
175 struct objcache *
176 objcache_create(const char *name, int *cluster_limit0, int nom_cache,
177 		objcache_ctor_fn *ctor, objcache_dtor_fn *dtor, void *privdata,
178 		objcache_alloc_fn *alloc, objcache_free_fn *free,
179 		void *allocator_args)
180 {
181 	struct objcache *oc;
182 	struct magazinedepot *depot;
183 	int cpuid;
184 	int nmagdepot;
185 	int mag_capacity;
186 	int i;
187 	int cluster_limit;
188 
189 	if (cluster_limit0 == NULL)
190 		cluster_limit = 0;
191 	else
192 		cluster_limit = *cluster_limit0;
193 
194 	/*
195 	 * Allocate object cache structure
196 	 */
197 	oc = kmalloc(__offsetof(struct objcache, cache_percpu[ncpus]),
198 		    M_OBJCACHE, M_WAITOK | M_ZERO);
199 	oc->name = kstrdup(name, M_TEMP);
200 	oc->ctor = ctor ? ctor : null_ctor;
201 	oc->dtor = dtor ? dtor : null_dtor;
202 	oc->privdata = privdata;
203 	oc->alloc = alloc;
204 	oc->free = free;
205 	oc->allocator_args = allocator_args;
206 
207 	/*
208 	 * Initialize depot list(s).
209 	 */
210 	depot = &oc->depot[0];
211 
212 	spin_init(&depot->spin);
213 	SLIST_INIT(&depot->fullmagazines);
214 	SLIST_INIT(&depot->emptymagazines);
215 
216 	/*
217 	 * Figure out the nominal number of free objects to cache and
218 	 * the magazine capacity.  By default we want to cache up to
219 	 * half the cluster_limit.  If there is no cluster_limit then
220 	 * we want to cache up to 128 objects.
221 	 */
222 	if (nom_cache == 0)
223 		nom_cache = cluster_limit / 2;
224 	if (cluster_limit && nom_cache > cluster_limit)
225 		nom_cache = cluster_limit;
226 	if (nom_cache == 0)
227 		nom_cache = INITIAL_MAG_CAPACITY * 2;
228 
229 	/*
230 	 * Magazine capacity for 2 active magazines per cpu plus 2
231 	 * magazines in the depot.  Minimum capacity is 4 objects.
232 	 */
233 	mag_capacity = nom_cache / (ncpus + 1) / 2 + 1;
234 	if (mag_capacity > 128)
235 		mag_capacity = 128;
236 	if (mag_capacity < 4)
237 		mag_capacity = 4;
238 	depot->magcapacity = mag_capacity;
239 
240 	/*
241 	 * The cluster_limit must be sufficient to have two magazines per
242 	 * cpu plus at least two magazines in the depot.  However, because
243 	 * partial magazines can stay on the cpus what we really need here
244 	 * is to specify the number of extra magazines we allocate for the
245 	 * depot.
246 	 */
247 	if (cluster_limit == 0) {
248 		depot->unallocated_objects = -1;
249 	} else {
250 		depot->unallocated_objects = ncpus * mag_capacity * 2 +
251 					     cluster_limit;
252 	}
253 
254 	/*
255 	 * Initialize per-cpu caches
256 	 */
257 	for (cpuid = 0; cpuid < ncpus; cpuid++) {
258 		struct percpu_objcache *cache_percpu = &oc->cache_percpu[cpuid];
259 
260 		cache_percpu->loaded_magazine = mag_alloc(mag_capacity);
261 		cache_percpu->previous_magazine = mag_alloc(mag_capacity);
262 	}
263 
264 	/*
265 	 * Compute how many empty magazines to place in the depot.  This
266 	 * determines the retained cache size and is based on nom_cache.
267 	 *
268 	 * The actual cache size is larger because there are two magazines
269 	 * for each cpu as well but those can be in any fill state so we
270 	 * just can't count them.
271 	 *
272 	 * There is a minimum of two magazines in the depot.
273 	 */
274 	nmagdepot = nom_cache / mag_capacity + 1;
275 	if (nmagdepot < 2)
276 		nmagdepot = 2;
277 	if (bootverbose) {
278 		kprintf("ndepotmags=%-3d x mag_cap=%-3d for %s\n",
279 			nmagdepot, mag_capacity, name);
280 	}
281 
282 	/*
283 	 * Put empty magazines in depot
284 	 */
285 	for (i = 0; i < nmagdepot; i++) {
286 		struct magazine *mag = mag_alloc(mag_capacity);
287 		SLIST_INSERT_HEAD(&depot->emptymagazines, mag, nextmagazine);
288 	}
289 
290 	spin_lock(&objcachelist_spin);
291 	LIST_INSERT_HEAD(&allobjcaches, oc, oc_next);
292 	spin_unlock(&objcachelist_spin);
293 
294 	if (cluster_limit0 != NULL)
295 		*cluster_limit0 = cluster_limit;
296 	return (oc);
297 }
298 
299 struct objcache *
300 objcache_create_simple(malloc_type_t mtype, size_t objsize)
301 {
302 	struct objcache_malloc_args *margs;
303 	struct objcache *oc;
304 
305 	margs = kmalloc(sizeof(*margs), M_OBJCACHE, M_WAITOK|M_ZERO);
306 	margs->objsize = objsize;
307 	margs->mtype = mtype;
308 	oc = objcache_create(mtype->ks_shortdesc, NULL, 0,
309 			     NULL, NULL, NULL,
310 			     objcache_malloc_alloc, objcache_malloc_free,
311 			     margs);
312 	return (oc);
313 }
314 
315 struct objcache *
316 objcache_create_mbacked(malloc_type_t mtype, size_t objsize,
317 			int *cluster_limit, int nom_cache,
318 			objcache_ctor_fn *ctor, objcache_dtor_fn *dtor,
319 			void *privdata)
320 {
321 	struct objcache_malloc_args *margs;
322 	struct objcache *oc;
323 
324 	margs = kmalloc(sizeof(*margs), M_OBJCACHE, M_WAITOK|M_ZERO);
325 	margs->objsize = objsize;
326 	margs->mtype = mtype;
327 	oc = objcache_create(mtype->ks_shortdesc,
328 			     cluster_limit, nom_cache,
329 			     ctor, dtor, privdata,
330 			     objcache_malloc_alloc, objcache_malloc_free,
331 			     margs);
332 	return(oc);
333 }
334 
335 
336 #define MAGAZINE_EMPTY(mag)	(mag->rounds == 0)
337 #define MAGAZINE_NOTEMPTY(mag)	(mag->rounds != 0)
338 #define MAGAZINE_FULL(mag)	(mag->rounds == mag->capacity)
339 
340 #define	swap(x, y)	({ struct magazine *t = x; x = y; y = t; })
341 
342 /*
343  * Get an object from the object cache.
344  *
345  * WARNING!  ocflags are only used when we have to go to the underlying
346  * allocator, so we cannot depend on flags such as M_ZERO.
347  */
348 void *
349 objcache_get(struct objcache *oc, int ocflags)
350 {
351 	struct percpu_objcache *cpucache = &oc->cache_percpu[mycpuid];
352 	struct magazine *loadedmag;
353 	struct magazine *emptymag;
354 	void *obj;
355 	struct magazinedepot *depot;
356 
357 	KKASSERT((ocflags & M_ZERO) == 0);
358 	crit_enter();
359 	++cpucache->gets_cumulative;
360 
361 retry:
362 	/*
363 	 * Loaded magazine has an object.  This is the hot path.
364 	 * It is lock-free and uses a critical section to block
365 	 * out interrupt handlers on the same processor.
366 	 */
367 	loadedmag = cpucache->loaded_magazine;
368 	if (MAGAZINE_NOTEMPTY(loadedmag)) {
369 		obj = loadedmag->objects[--loadedmag->rounds];
370 		crit_exit();
371 		return (obj);
372 	}
373 
374 	/* Previous magazine has an object. */
375 	if (MAGAZINE_NOTEMPTY(cpucache->previous_magazine)) {
376 		swap(cpucache->loaded_magazine, cpucache->previous_magazine);
377 		loadedmag = cpucache->loaded_magazine;
378 		obj = loadedmag->objects[--loadedmag->rounds];
379 		crit_exit();
380 		return (obj);
381 	}
382 
383 	/*
384 	 * Both magazines empty.  Get a full magazine from the depot and
385 	 * move one of the empty ones to the depot.
386 	 *
387 	 * Obtain the depot spinlock.
388 	 *
389 	 * NOTE: Beyond this point, M_* flags are handled via oc->alloc()
390 	 */
391 	depot = &oc->depot[myclusterid];
392 	spin_lock(&depot->spin);
393 
394 	/*
395 	 * Recheck the cpucache after obtaining the depot spinlock.  This
396 	 * shouldn't be necessary now but don't take any chances.
397 	 */
398 	if (MAGAZINE_NOTEMPTY(cpucache->loaded_magazine) ||
399 	    MAGAZINE_NOTEMPTY(cpucache->previous_magazine)
400 	) {
401 		spin_unlock(&depot->spin);
402 		goto retry;
403 	}
404 
405 	/* Check if depot has a full magazine. */
406 	if (!SLIST_EMPTY(&depot->fullmagazines)) {
407 		emptymag = cpucache->previous_magazine;
408 		cpucache->previous_magazine = cpucache->loaded_magazine;
409 		cpucache->loaded_magazine = SLIST_FIRST(&depot->fullmagazines);
410 		SLIST_REMOVE_HEAD(&depot->fullmagazines, nextmagazine);
411 
412 		/*
413 		 * Return emptymag to the depot.
414 		 */
415 		KKASSERT(MAGAZINE_EMPTY(emptymag));
416 		SLIST_INSERT_HEAD(&depot->emptymagazines,
417 				  emptymag, nextmagazine);
418 		spin_unlock(&depot->spin);
419 		goto retry;
420 	}
421 
422 	/*
423 	 * The depot does not have any non-empty magazines.  If we have
424 	 * not hit our object limit we can allocate a new object using
425 	 * the back-end allocator.
426 	 *
427 	 * note: unallocated_objects can be initialized to -1, which has
428 	 * the effect of removing any allocation limits.
429 	 */
430 	if (depot->unallocated_objects) {
431 		--depot->unallocated_objects;
432 		spin_unlock(&depot->spin);
433 		crit_exit();
434 
435 		obj = oc->alloc(oc->allocator_args, ocflags);
436 		if (obj) {
437 			if (oc->ctor(obj, oc->privdata, ocflags))
438 				return (obj);
439 			oc->free(obj, oc->allocator_args);
440 			obj = NULL;
441 		}
442 		if (obj == NULL) {
443 			spin_lock(&depot->spin);
444 			++depot->unallocated_objects;
445 			spin_unlock(&depot->spin);
446 			if (depot->waiting)
447 				wakeup(depot);
448 
449 			crit_enter();
450 			/*
451 			 * makes debugging easier when gets_cumulative does
452 			 * not include gets_null.
453 			 */
454 			++cpucache->gets_null;
455 			--cpucache->gets_cumulative;
456 			crit_exit();
457 		}
458 		return(obj);
459 	}
460 	if (oc->exhausted == 0) {
461 		kprintf("Warning, objcache(%s): Exhausted!\n", oc->name);
462 		oc->exhausted = 1;
463 	}
464 
465 	/*
466 	 * Otherwise block if allowed to.
467 	 */
468 	if ((ocflags & (M_WAITOK|M_NULLOK)) == M_WAITOK) {
469 		++cpucache->waiting;
470 		++depot->waiting;
471 		ssleep(depot, &depot->spin, 0, "objcache_get", 0);
472 		--cpucache->waiting;
473 		--depot->waiting;
474 		spin_unlock(&depot->spin);
475 		goto retry;
476 	}
477 
478 	/*
479 	 * Otherwise fail
480 	 */
481 	++cpucache->gets_null;
482 	--cpucache->gets_cumulative;
483 	crit_exit();
484 	spin_unlock(&depot->spin);
485 	return (NULL);
486 }
487 
488 /*
489  * Wrapper for malloc allocation routines.
490  */
491 void *
492 objcache_malloc_alloc(void *allocator_args, int ocflags)
493 {
494 	struct objcache_malloc_args *alloc_args = allocator_args;
495 
496 	return (kmalloc(alloc_args->objsize, alloc_args->mtype,
497 		       ocflags & OC_MFLAGS));
498 }
499 
500 void
501 objcache_malloc_free(void *obj, void *allocator_args)
502 {
503 	struct objcache_malloc_args *alloc_args = allocator_args;
504 
505 	kfree(obj, alloc_args->mtype);
506 }
507 
508 /*
509  * Wrapper for allocation policies that pre-allocate at initialization time
510  * and don't do run-time allocation.
511  */
512 void *
513 objcache_nop_alloc(void *allocator_args, int ocflags)
514 {
515 	return (NULL);
516 }
517 
518 void
519 objcache_nop_free(void *obj, void *allocator_args)
520 {
521 }
522 
523 /*
524  * Return an object to the object cache.
525  */
526 void
527 objcache_put(struct objcache *oc, void *obj)
528 {
529 	struct percpu_objcache *cpucache = &oc->cache_percpu[mycpuid];
530 	struct magazine *loadedmag;
531 	struct magazinedepot *depot;
532 
533 	crit_enter();
534 	++cpucache->puts_cumulative;
535 
536 	if (CLUSTER_OF(obj) != myclusterid) {
537 #ifdef notyet
538 		/* use lazy IPI to send object to owning cluster XXX todo */
539 		++cpucache->puts_othercluster;
540 		crit_exit();
541 		return;
542 #endif
543 	}
544 
545 retry:
546 	/*
547 	 * Free slot available in loaded magazine.  This is the hot path.
548 	 * It is lock-free and uses a critical section to block out interrupt
549 	 * handlers on the same processor.
550 	 */
551 	loadedmag = cpucache->loaded_magazine;
552 	if (!MAGAZINE_FULL(loadedmag)) {
553 		loadedmag->objects[loadedmag->rounds++] = obj;
554 		if (cpucache->waiting)
555 			wakeup_mycpu(&oc->depot[myclusterid]);
556 		crit_exit();
557 		return;
558 	}
559 
560 	/*
561 	 * Current magazine full, but previous magazine has room.  XXX
562 	 */
563 	if (!MAGAZINE_FULL(cpucache->previous_magazine)) {
564 		swap(cpucache->loaded_magazine, cpucache->previous_magazine);
565 		loadedmag = cpucache->loaded_magazine;
566 		loadedmag->objects[loadedmag->rounds++] = obj;
567 		if (cpucache->waiting)
568 			wakeup_mycpu(&oc->depot[myclusterid]);
569 		crit_exit();
570 		return;
571 	}
572 
573 	/*
574 	 * Both magazines full.  Get an empty magazine from the depot and
575 	 * move a full loaded magazine to the depot.  Even though the
576 	 * magazine may wind up with space available after we block on
577 	 * the spinlock, we still cycle it through to avoid the non-optimal
578 	 * corner-case.
579 	 *
580 	 * Obtain the depot spinlock.
581 	 */
582 	depot = &oc->depot[myclusterid];
583 	spin_lock(&depot->spin);
584 
585 	/*
586 	 * If an empty magazine is available in the depot, cycle it
587 	 * through and retry.
588 	 */
589 	if (!SLIST_EMPTY(&depot->emptymagazines)) {
590 		loadedmag = cpucache->previous_magazine;
591 		cpucache->previous_magazine = cpucache->loaded_magazine;
592 		cpucache->loaded_magazine = SLIST_FIRST(&depot->emptymagazines);
593 		SLIST_REMOVE_HEAD(&depot->emptymagazines, nextmagazine);
594 
595 		/*
596 		 * Return loadedmag to the depot.  Due to blocking it may
597 		 * not be entirely full and could even be empty.
598 		 */
599 		if (MAGAZINE_EMPTY(loadedmag)) {
600 			SLIST_INSERT_HEAD(&depot->emptymagazines,
601 					  loadedmag, nextmagazine);
602 			spin_unlock(&depot->spin);
603 		} else {
604 			SLIST_INSERT_HEAD(&depot->fullmagazines,
605 					  loadedmag, nextmagazine);
606 			spin_unlock(&depot->spin);
607 			if (depot->waiting)
608 				wakeup(depot);
609 		}
610 		goto retry;
611 	}
612 
613 	/*
614 	 * An empty mag is not available.  This is a corner case which can
615 	 * occur due to cpus holding partially full magazines.  Do not try
616 	 * to allocate a mag, just free the object.
617 	 */
618 	++depot->unallocated_objects;
619 	spin_unlock(&depot->spin);
620 	if (depot->waiting)
621 		wakeup(depot);
622 	crit_exit();
623 	oc->dtor(obj, oc->privdata);
624 	oc->free(obj, oc->allocator_args);
625 }
626 
627 /*
628  * The object is being put back into the cache, but the caller has
629  * indicated that the object is not in any shape to be reused and should
630  * be dtor'd immediately.
631  */
632 void
633 objcache_dtor(struct objcache *oc, void *obj)
634 {
635 	struct magazinedepot *depot;
636 
637 	depot = &oc->depot[myclusterid];
638 	spin_lock(&depot->spin);
639 	++depot->unallocated_objects;
640 	spin_unlock(&depot->spin);
641 	if (depot->waiting)
642 		wakeup(depot);
643 	oc->dtor(obj, oc->privdata);
644 	oc->free(obj, oc->allocator_args);
645 }
646 
647 /*
648  * Deallocate all objects in a magazine and free the magazine if requested.
649  * When freeit is TRUE the magazine must already be disassociated from the
650  * depot.
651  *
652  * Must be called with a critical section held when called with a per-cpu
653  * magazine.  The magazine may be indirectly modified during the loop.
654  *
655  * If the magazine moves during a dtor the operation is aborted.  This is
656  * only allowed when freeit is FALSE.
657  *
658  * The number of objects freed is returned.
659  */
660 static int
661 mag_purge(struct objcache *oc, struct magazine **magp, int freeit)
662 {
663 	struct magazine *mag = *magp;
664 	int count;
665 	void *obj;
666 
667 	count = 0;
668 	while (mag->rounds) {
669 		obj = mag->objects[--mag->rounds];
670 		oc->dtor(obj, oc->privdata);		/* MAY BLOCK */
671 		oc->free(obj, oc->allocator_args);	/* MAY BLOCK */
672 		++count;
673 
674 		/*
675 		 * Cycle for interrupts.
676 		 */
677 		if ((count & 15) == 0) {
678 			crit_exit();
679 			crit_enter();
680 		}
681 
682 		/*
683 		 * mag may have become invalid either due to dtor/free
684 		 * blocking or interrupt cycling, do not derefernce it
685 		 * until we check.
686 		 */
687 		if (*magp != mag) {
688 			kprintf("mag_purge: mag ripped out\n");
689 			break;
690 		}
691 	}
692 	if (freeit) {
693 		KKASSERT(*magp == mag);
694 		*magp = NULL;
695 		kfree(mag, M_OBJMAG);
696 	}
697 	return(count);
698 }
699 
700 /*
701  * Disassociate zero or more magazines from a magazine list associated with
702  * the depot, update the depot, and move the magazines to a temporary
703  * list.
704  *
705  * The caller must check the depot for waiters and wake it up, typically
706  * after disposing of the magazines this function loads onto the temporary
707  * list.
708  */
709 static void
710 maglist_disassociate(struct magazinedepot *depot, struct magazinelist *maglist,
711 		     struct magazinelist *tmplist, boolean_t purgeall)
712 {
713 	struct magazine *mag;
714 
715 	while ((mag = SLIST_FIRST(maglist)) != NULL) {
716 		SLIST_REMOVE_HEAD(maglist, nextmagazine);
717 		SLIST_INSERT_HEAD(tmplist, mag, nextmagazine);
718 		depot->unallocated_objects += mag->rounds;
719 	}
720 }
721 
722 /*
723  * Deallocate all magazines and their contents from the passed temporary
724  * list.  The magazines have already been accounted for by their depots.
725  *
726  * The total number of rounds freed is returned.  This number is typically
727  * only used to determine whether a wakeup on the depot is needed or not.
728  */
729 static int
730 maglist_purge(struct objcache *oc, struct magazinelist *maglist)
731 {
732 	struct magazine *mag;
733 	int count = 0;
734 
735 	/*
736 	 * can't use SLIST_FOREACH because blocking releases the depot
737 	 * spinlock
738 	 */
739 	crit_enter();
740 	while ((mag = SLIST_FIRST(maglist)) != NULL) {
741 		SLIST_REMOVE_HEAD(maglist, nextmagazine);
742 		count += mag_purge(oc, &mag, TRUE);
743 	}
744 	crit_exit();
745 	return(count);
746 }
747 
748 /*
749  * De-allocates all magazines on the full and empty magazine lists.
750  *
751  * Because this routine is called with a spinlock held, the magazines
752  * can only be disassociated and moved to a temporary list, not freed.
753  *
754  * The caller is responsible for freeing the magazines.
755  */
756 static void
757 depot_disassociate(struct magazinedepot *depot, struct magazinelist *tmplist)
758 {
759 	maglist_disassociate(depot, &depot->fullmagazines, tmplist, TRUE);
760 	maglist_disassociate(depot, &depot->emptymagazines, tmplist, TRUE);
761 }
762 
763 #ifdef notneeded
764 void
765 objcache_reclaim(struct objcache *oc)
766 {
767 	struct percpu_objcache *cache_percpu = &oc->cache_percpu[myclusterid];
768 	struct magazinedepot *depot = &oc->depot[myclusterid];
769 	struct magazinelist tmplist;
770 	int count;
771 
772 	SLIST_INIT(&tmplist);
773 	crit_enter();
774 	count = mag_purge(oc, &cache_percpu->loaded_magazine, FALSE);
775 	count += mag_purge(oc, &cache_percpu->previous_magazine, FALSE);
776 	crit_exit();
777 
778 	spin_lock(&depot->spin);
779 	depot->unallocated_objects += count;
780 	depot_disassociate(depot, &tmplist);
781 	spin_unlock(&depot->spin);
782 	count += maglist_purge(oc, &tmplist);
783 	if (count && depot->waiting)
784 		wakeup(depot);
785 }
786 #endif
787 
788 /*
789  * Try to free up some memory.  Return as soon as some free memory is found.
790  * For each object cache on the reclaim list, first try the current per-cpu
791  * cache, then the full magazine depot.
792  */
793 boolean_t
794 objcache_reclaimlist(struct objcache *oclist[], int nlist, int ocflags)
795 {
796 	struct objcache *oc;
797 	struct percpu_objcache *cpucache;
798 	struct magazinedepot *depot;
799 	struct magazinelist tmplist;
800 	int i, count;
801 
802 	kprintf("objcache_reclaimlist\n");
803 
804 	SLIST_INIT(&tmplist);
805 
806 	for (i = 0; i < nlist; i++) {
807 		oc = oclist[i];
808 		cpucache = &oc->cache_percpu[mycpuid];
809 		depot = &oc->depot[myclusterid];
810 
811 		crit_enter();
812 		count = mag_purge(oc, &cpucache->loaded_magazine, FALSE);
813 		if (count == 0)
814 			count += mag_purge(oc, &cpucache->previous_magazine, FALSE);
815 		crit_exit();
816 		if (count > 0) {
817 			spin_lock(&depot->spin);
818 			depot->unallocated_objects += count;
819 			spin_unlock(&depot->spin);
820 			if (depot->waiting)
821 				wakeup(depot);
822 			return (TRUE);
823 		}
824 		spin_lock(&depot->spin);
825 		maglist_disassociate(depot, &depot->fullmagazines,
826 				     &tmplist, FALSE);
827 		spin_unlock(&depot->spin);
828 		count = maglist_purge(oc, &tmplist);
829 		if (count > 0) {
830 			if (depot->waiting)
831 				wakeup(depot);
832 			return (TRUE);
833 		}
834 	}
835 	return (FALSE);
836 }
837 
838 /*
839  * Destroy an object cache.  Must have no existing references.
840  */
841 void
842 objcache_destroy(struct objcache *oc)
843 {
844 	struct percpu_objcache *cache_percpu;
845 	struct magazinedepot *depot;
846 	int clusterid, cpuid;
847 	struct magazinelist tmplist;
848 
849 	spin_lock(&objcachelist_spin);
850 	LIST_REMOVE(oc, oc_next);
851 	spin_unlock(&objcachelist_spin);
852 
853 	SLIST_INIT(&tmplist);
854 	for (clusterid = 0; clusterid < MAXCLUSTERS; clusterid++) {
855 		depot = &oc->depot[clusterid];
856 		spin_lock(&depot->spin);
857 		depot_disassociate(depot, &tmplist);
858 		spin_unlock(&depot->spin);
859 	}
860 	maglist_purge(oc, &tmplist);
861 
862 	for (cpuid = 0; cpuid < ncpus; cpuid++) {
863 		cache_percpu = &oc->cache_percpu[cpuid];
864 
865 		crit_enter();
866 		mag_purge(oc, &cache_percpu->loaded_magazine, TRUE);
867 		mag_purge(oc, &cache_percpu->previous_magazine, TRUE);
868 		crit_exit();
869 		cache_percpu->loaded_magazine = NULL;
870 		cache_percpu->previous_magazine = NULL;
871 		/* don't bother adjusting depot->unallocated_objects */
872 	}
873 
874 	kfree(oc->name, M_TEMP);
875 	kfree(oc, M_OBJCACHE);
876 }
877 
878 #if 0
879 /*
880  * Populate the per-cluster depot with elements from a linear block
881  * of memory.  Must be called for individually for each cluster.
882  * Populated depots should not be destroyed.
883  */
884 void
885 objcache_populate_linear(struct objcache *oc, void *base, int nelts, int size)
886 {
887 	char *p = base;
888 	char *end = (char *)base + (nelts * size);
889 	struct magazinedepot *depot = &oc->depot[myclusterid];
890 	struct magazine *emptymag = mag_alloc(depot->magcapcity);
891 
892 	while (p < end) {
893 		emptymag->objects[emptymag->rounds++] = p;
894 		if (MAGAZINE_FULL(emptymag)) {
895 			spin_lock_wr(&depot->spin);
896 			SLIST_INSERT_HEAD(&depot->fullmagazines, emptymag,
897 					  nextmagazine);
898 			depot->unallocated_objects += emptymag->rounds;
899 			spin_unlock_wr(&depot->spin);
900 			if (depot->waiting)
901 				wakeup(depot);
902 			emptymag = mag_alloc(depot->magcapacity);
903 		}
904 		p += size;
905 	}
906 	if (MAGAZINE_EMPTY(emptymag)) {
907 		crit_enter();
908 		mag_purge(oc, &emptymag, TRUE);
909 		crit_exit();
910 	} else {
911 		spin_lock_wr(&depot->spin);
912 		SLIST_INSERT_HEAD(&depot->fullmagazines, emptymag,
913 				  nextmagazine);
914 		depot->unallocated_objects += emptymag->rounds;
915 		spin_unlock_wr(&depot->spin);
916 		if (depot->waiting)
917 			wakeup(depot);
918 		emptymag = mag_alloc(depot->magcapacity);
919 	}
920 }
921 #endif
922 
923 #if 0
924 /*
925  * Check depot contention once a minute.
926  * 2 contested locks per second allowed.
927  */
928 static int objcache_rebalance_period;
929 static const int objcache_contention_rate = 120;
930 static struct callout objcache_callout;
931 
932 #define MAXMAGSIZE 512
933 
934 /*
935  * Check depot contention and increase magazine size if necessary.
936  */
937 static void
938 objcache_timer(void *dummy)
939 {
940 	struct objcache *oc;
941 	struct magazinedepot *depot;
942 	struct magazinelist tmplist;
943 
944 	XXX we need to detect when an objcache is destroyed out from under
945 	    us XXX
946 
947 	SLIST_INIT(&tmplist);
948 
949 	spin_lock_wr(&objcachelist_spin);
950 	LIST_FOREACH(oc, &allobjcaches, oc_next) {
951 		depot = &oc->depot[myclusterid];
952 		if (depot->magcapacity < MAXMAGSIZE) {
953 			if (depot->contested > objcache_contention_rate) {
954 				spin_lock_wr(&depot->spin);
955 				depot_disassociate(depot, &tmplist);
956 				depot->magcapacity *= 2;
957 				spin_unlock_wr(&depot->spin);
958 				kprintf("objcache_timer: increasing cache %s"
959 				       " magsize to %d, contested %d times\n",
960 				    oc->name, depot->magcapacity,
961 				    depot->contested);
962 			}
963 			depot->contested = 0;
964 		}
965 		spin_unlock_wr(&objcachelist_spin);
966 		if (maglist_purge(oc, &tmplist) > 0 && depot->waiting)
967 			wakeup(depot);
968 		spin_lock_wr(&objcachelist_spin);
969 	}
970 	spin_unlock_wr(&objcachelist_spin);
971 
972 	callout_reset(&objcache_callout, objcache_rebalance_period,
973 		      objcache_timer, NULL);
974 }
975 
976 #endif
977 
978 static void
979 objcache_init(void)
980 {
981 	spin_init(&objcachelist_spin);
982 #if 0
983 	callout_init_mp(&objcache_callout);
984 	objcache_rebalance_period = 60 * hz;
985 	callout_reset(&objcache_callout, objcache_rebalance_period,
986 		      objcache_timer, NULL);
987 #endif
988 }
989 SYSINIT(objcache, SI_BOOT2_OBJCACHE, SI_ORDER_FIRST, objcache_init, 0);
990