xref: /dragonfly/sys/kern/kern_objcache.c (revision 267c04fd)
1 /*
2  * Copyright (c) 2005 Jeffrey M. Hsu.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Jeffrey M. Hsu.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. Neither the name of The DragonFly Project nor the names of its
16  *    contributors may be used to endorse or promote products derived
17  *    from this software without specific, prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
22  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
23  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
24  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
25  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
26  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
27  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
29  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  */
32 
33 #include <sys/param.h>
34 #include <sys/kernel.h>
35 #include <sys/systm.h>
36 #include <sys/callout.h>
37 #include <sys/globaldata.h>
38 #include <sys/malloc.h>
39 #include <sys/queue.h>
40 #include <sys/objcache.h>
41 #include <sys/spinlock.h>
42 #include <sys/thread.h>
43 #include <sys/thread2.h>
44 #include <sys/spinlock2.h>
45 
46 static MALLOC_DEFINE(M_OBJCACHE, "objcache", "Object Cache");
47 static MALLOC_DEFINE(M_OBJMAG, "objcache magazine", "Object Cache Magazine");
48 
49 #define	INITIAL_MAG_CAPACITY	64
50 
51 struct magazine {
52 	int			 rounds;
53 	int			 capacity;
54 	SLIST_ENTRY(magazine)	 nextmagazine;
55 	void			*objects[];
56 };
57 
58 SLIST_HEAD(magazinelist, magazine);
59 
60 #define MAGAZINE_HDRSIZE	__offsetof(struct magazine, objects[0])
61 #define MAGAZINE_CAPACITY_MAX	128
62 #define MAGAZINE_CAPACITY_MIN	4
63 
64 /*
65  * per-cluster cache of magazines
66  *
67  * All fields in this structure are protected by the spinlock.
68  */
69 struct magazinedepot {
70 	/*
71 	 * The per-cpu object caches only exchanges completely full or
72 	 * completely empty magazines with the depot layer, so only have
73 	 * to cache these two types of magazines.
74 	 */
75 	struct magazinelist	fullmagazines;
76 	struct magazinelist	emptymagazines;
77 	int			magcapacity;
78 
79 	/* protect this structure */
80 	struct spinlock		spin;
81 
82 	/* magazines not yet allocated towards limit */
83 	int			unallocated_objects;
84 
85 	/* infrequently used fields */
86 	int			waiting;	/* waiting for another cpu to
87 						 * return a full magazine to
88 						 * the depot */
89 	int			contested;	/* depot contention count */
90 } __cachealign;
91 
92 /*
93  * per-cpu object cache
94  * All fields in this structure are protected by crit_enter().
95  */
96 struct percpu_objcache {
97 	struct magazine	*loaded_magazine;	/* active magazine */
98 	struct magazine	*previous_magazine;	/* backup magazine */
99 
100 	/* statistics */
101 	int		gets_cumulative;	/* total calls to get */
102 	int		gets_null;		/* objcache_get returned NULL */
103 	int		puts_cumulative;	/* total calls to put */
104 	int		puts_othercluster;	/* returned to other cluster */
105 
106 	/* infrequently used fields */
107 	int		waiting;	/* waiting for a thread on this cpu to
108 					 * return an obj to the per-cpu cache */
109 } __cachealign;
110 
111 /* only until we have NUMA cluster topology information XXX */
112 #define MAXCLUSTERS 1
113 #define myclusterid 0
114 #define CLUSTER_OF(obj) 0
115 
116 /*
117  * Two-level object cache consisting of NUMA cluster-level depots of
118  * fully loaded or completely empty magazines and cpu-level caches of
119  * individual objects.
120  */
121 struct objcache {
122 	char			*name;
123 
124 	/* object constructor and destructor from blank storage */
125 	objcache_ctor_fn	*ctor;
126 	objcache_dtor_fn	*dtor;
127 	void			*privdata;
128 
129 	/* interface to underlying allocator */
130 	objcache_alloc_fn	*alloc;
131 	objcache_free_fn	*free;
132 	void			*allocator_args;
133 
134 	LIST_ENTRY(objcache)	oc_next;
135 	int			exhausted;	/* oops */
136 
137 	/* NUMA-cluster level caches */
138 	struct magazinedepot	depot[MAXCLUSTERS];
139 
140 	struct percpu_objcache	cache_percpu[];		/* per-cpu caches */
141 };
142 
143 static struct spinlock objcachelist_spin;
144 static LIST_HEAD(objcachelist, objcache) allobjcaches;
145 static int magazine_capmin;
146 static int magazine_capmax;
147 
148 static struct magazine *
149 mag_alloc(int capacity)
150 {
151 	struct magazine *mag;
152 	int size;
153 
154 	size = __offsetof(struct magazine, objects[capacity]);
155 	KASSERT(size > 0 && (size & __VM_CACHELINE_MASK) == 0,
156 	    ("magazine size is not multiple cache line size"));
157 
158 	mag = kmalloc_cachealign(size, M_OBJMAG, M_INTWAIT | M_ZERO);
159 	mag->capacity = capacity;
160 	mag->rounds = 0;
161 	return (mag);
162 }
163 
164 static int
165 mag_capacity_align(int mag_capacity)
166 {
167 	int mag_size;
168 
169 	mag_size = __VM_CACHELINE_ALIGN(
170 	    __offsetof(struct magazine, objects[mag_capacity]));
171 	mag_capacity = (mag_size - MAGAZINE_HDRSIZE) / sizeof(void *);
172 
173 	return mag_capacity;
174 }
175 
176 /*
177  * Utility routine for objects that don't require any de-construction.
178  */
179 
180 static void
181 null_dtor(void *obj, void *privdata)
182 {
183 	/* do nothing */
184 }
185 
186 static boolean_t
187 null_ctor(void *obj, void *privdata, int ocflags)
188 {
189 	return TRUE;
190 }
191 
192 /*
193  * Create an object cache.
194  */
195 struct objcache *
196 objcache_create(const char *name, int cluster_limit, int nom_cache,
197 		objcache_ctor_fn *ctor, objcache_dtor_fn *dtor, void *privdata,
198 		objcache_alloc_fn *alloc, objcache_free_fn *free,
199 		void *allocator_args)
200 {
201 	struct objcache *oc;
202 	struct magazinedepot *depot;
203 	int cpuid;
204 	int nmagdepot;
205 	int mag_capacity;
206 	int i;
207 
208 	/*
209 	 * Allocate object cache structure
210 	 */
211 	oc = kmalloc_cachealign(
212 	    __offsetof(struct objcache, cache_percpu[ncpus]),
213 	    M_OBJCACHE, M_WAITOK | M_ZERO);
214 	oc->name = kstrdup(name, M_TEMP);
215 	oc->ctor = ctor ? ctor : null_ctor;
216 	oc->dtor = dtor ? dtor : null_dtor;
217 	oc->privdata = privdata;
218 	oc->alloc = alloc;
219 	oc->free = free;
220 	oc->allocator_args = allocator_args;
221 
222 	/*
223 	 * Initialize depot list(s).
224 	 */
225 	depot = &oc->depot[0];
226 
227 	spin_init(&depot->spin, "objcachedepot");
228 	SLIST_INIT(&depot->fullmagazines);
229 	SLIST_INIT(&depot->emptymagazines);
230 
231 	/*
232 	 * Figure out the nominal number of free objects to cache and
233 	 * the magazine capacity.  By default we want to cache up to
234 	 * half the cluster_limit.  If there is no cluster_limit then
235 	 * we want to cache up to 128 objects.
236 	 */
237 	if (nom_cache == 0)
238 		nom_cache = cluster_limit / 2;
239 	if (cluster_limit && nom_cache > cluster_limit)
240 		nom_cache = cluster_limit;
241 	if (nom_cache == 0)
242 		nom_cache = INITIAL_MAG_CAPACITY * 2;
243 
244 	/*
245 	 * Magazine capacity for 2 active magazines per cpu plus 2
246 	 * magazines in the depot.
247 	 */
248 	mag_capacity = mag_capacity_align(nom_cache / (ncpus + 1) / 2 + 1);
249 	if (mag_capacity > magazine_capmax)
250 		mag_capacity = magazine_capmax;
251 	else if (mag_capacity < magazine_capmin)
252 		mag_capacity = magazine_capmin;
253 	depot->magcapacity = mag_capacity;
254 
255 	/*
256 	 * The cluster_limit must be sufficient to have two magazines per
257 	 * cpu plus at least two magazines in the depot.  However, because
258 	 * partial magazines can stay on the cpus what we really need here
259 	 * is to specify the number of extra magazines we allocate for the
260 	 * depot.
261 	 */
262 	if (cluster_limit == 0) {
263 		depot->unallocated_objects = -1;
264 	} else {
265 		depot->unallocated_objects = ncpus * mag_capacity * 2 +
266 					     cluster_limit;
267 	}
268 
269 	/*
270 	 * Initialize per-cpu caches
271 	 */
272 	for (cpuid = 0; cpuid < ncpus; cpuid++) {
273 		struct percpu_objcache *cache_percpu = &oc->cache_percpu[cpuid];
274 
275 		cache_percpu->loaded_magazine = mag_alloc(mag_capacity);
276 		cache_percpu->previous_magazine = mag_alloc(mag_capacity);
277 	}
278 
279 	/*
280 	 * Compute how many empty magazines to place in the depot.  This
281 	 * determines the retained cache size and is based on nom_cache.
282 	 *
283 	 * The actual cache size is larger because there are two magazines
284 	 * for each cpu as well but those can be in any fill state so we
285 	 * just can't count them.
286 	 *
287 	 * There is a minimum of two magazines in the depot.
288 	 */
289 	nmagdepot = nom_cache / mag_capacity + 1;
290 	if (nmagdepot < 2)
291 		nmagdepot = 2;
292 
293 	/*
294 	 * Put empty magazines in depot
295 	 */
296 	for (i = 0; i < nmagdepot; i++) {
297 		struct magazine *mag = mag_alloc(mag_capacity);
298 		SLIST_INSERT_HEAD(&depot->emptymagazines, mag, nextmagazine);
299 	}
300 
301 	spin_lock(&objcachelist_spin);
302 	LIST_INSERT_HEAD(&allobjcaches, oc, oc_next);
303 	spin_unlock(&objcachelist_spin);
304 
305 	return (oc);
306 }
307 
308 struct objcache *
309 objcache_create_simple(malloc_type_t mtype, size_t objsize)
310 {
311 	struct objcache_malloc_args *margs;
312 	struct objcache *oc;
313 
314 	margs = kmalloc(sizeof(*margs), M_OBJCACHE, M_WAITOK|M_ZERO);
315 	margs->objsize = objsize;
316 	margs->mtype = mtype;
317 	oc = objcache_create(mtype->ks_shortdesc, 0, 0,
318 			     NULL, NULL, NULL,
319 			     objcache_malloc_alloc, objcache_malloc_free,
320 			     margs);
321 	return (oc);
322 }
323 
324 struct objcache *
325 objcache_create_mbacked(malloc_type_t mtype, size_t objsize,
326 			int cluster_limit, int nom_cache,
327 			objcache_ctor_fn *ctor, objcache_dtor_fn *dtor,
328 			void *privdata)
329 {
330 	struct objcache_malloc_args *margs;
331 	struct objcache *oc;
332 
333 	margs = kmalloc(sizeof(*margs), M_OBJCACHE, M_WAITOK|M_ZERO);
334 	margs->objsize = objsize;
335 	margs->mtype = mtype;
336 	oc = objcache_create(mtype->ks_shortdesc,
337 			     cluster_limit, nom_cache,
338 			     ctor, dtor, privdata,
339 			     objcache_malloc_alloc, objcache_malloc_free,
340 			     margs);
341 	return(oc);
342 }
343 
344 
345 #define MAGAZINE_EMPTY(mag)	(mag->rounds == 0)
346 #define MAGAZINE_NOTEMPTY(mag)	(mag->rounds != 0)
347 #define MAGAZINE_FULL(mag)	(mag->rounds == mag->capacity)
348 
349 #define	swap(x, y)	({ struct magazine *t = x; x = y; y = t; })
350 
351 /*
352  * Get an object from the object cache.
353  *
354  * WARNING!  ocflags are only used when we have to go to the underlying
355  * allocator, so we cannot depend on flags such as M_ZERO.
356  */
357 void *
358 objcache_get(struct objcache *oc, int ocflags)
359 {
360 	struct percpu_objcache *cpucache = &oc->cache_percpu[mycpuid];
361 	struct magazine *loadedmag;
362 	struct magazine *emptymag;
363 	void *obj;
364 	struct magazinedepot *depot;
365 
366 	KKASSERT((ocflags & M_ZERO) == 0);
367 	crit_enter();
368 	++cpucache->gets_cumulative;
369 
370 retry:
371 	/*
372 	 * Loaded magazine has an object.  This is the hot path.
373 	 * It is lock-free and uses a critical section to block
374 	 * out interrupt handlers on the same processor.
375 	 */
376 	loadedmag = cpucache->loaded_magazine;
377 	if (MAGAZINE_NOTEMPTY(loadedmag)) {
378 		obj = loadedmag->objects[--loadedmag->rounds];
379 		crit_exit();
380 		return (obj);
381 	}
382 
383 	/* Previous magazine has an object. */
384 	if (MAGAZINE_NOTEMPTY(cpucache->previous_magazine)) {
385 		swap(cpucache->loaded_magazine, cpucache->previous_magazine);
386 		loadedmag = cpucache->loaded_magazine;
387 		obj = loadedmag->objects[--loadedmag->rounds];
388 		crit_exit();
389 		return (obj);
390 	}
391 
392 	/*
393 	 * Both magazines empty.  Get a full magazine from the depot and
394 	 * move one of the empty ones to the depot.
395 	 *
396 	 * Obtain the depot spinlock.
397 	 *
398 	 * NOTE: Beyond this point, M_* flags are handled via oc->alloc()
399 	 */
400 	depot = &oc->depot[myclusterid];
401 	spin_lock(&depot->spin);
402 
403 	/*
404 	 * Recheck the cpucache after obtaining the depot spinlock.  This
405 	 * shouldn't be necessary now but don't take any chances.
406 	 */
407 	if (MAGAZINE_NOTEMPTY(cpucache->loaded_magazine) ||
408 	    MAGAZINE_NOTEMPTY(cpucache->previous_magazine)
409 	) {
410 		spin_unlock(&depot->spin);
411 		goto retry;
412 	}
413 
414 	/* Check if depot has a full magazine. */
415 	if (!SLIST_EMPTY(&depot->fullmagazines)) {
416 		emptymag = cpucache->previous_magazine;
417 		cpucache->previous_magazine = cpucache->loaded_magazine;
418 		cpucache->loaded_magazine = SLIST_FIRST(&depot->fullmagazines);
419 		SLIST_REMOVE_HEAD(&depot->fullmagazines, nextmagazine);
420 
421 		/*
422 		 * Return emptymag to the depot.
423 		 */
424 		KKASSERT(MAGAZINE_EMPTY(emptymag));
425 		SLIST_INSERT_HEAD(&depot->emptymagazines,
426 				  emptymag, nextmagazine);
427 		spin_unlock(&depot->spin);
428 		goto retry;
429 	}
430 
431 	/*
432 	 * The depot does not have any non-empty magazines.  If we have
433 	 * not hit our object limit we can allocate a new object using
434 	 * the back-end allocator.
435 	 *
436 	 * note: unallocated_objects can be initialized to -1, which has
437 	 * the effect of removing any allocation limits.
438 	 */
439 	if (depot->unallocated_objects) {
440 		--depot->unallocated_objects;
441 		spin_unlock(&depot->spin);
442 		crit_exit();
443 
444 		obj = oc->alloc(oc->allocator_args, ocflags);
445 		if (obj) {
446 			if (oc->ctor(obj, oc->privdata, ocflags))
447 				return (obj);
448 			oc->free(obj, oc->allocator_args);
449 			obj = NULL;
450 		}
451 		if (obj == NULL) {
452 			spin_lock(&depot->spin);
453 			++depot->unallocated_objects;
454 			spin_unlock(&depot->spin);
455 			if (depot->waiting)
456 				wakeup(depot);
457 
458 			crit_enter();
459 			/*
460 			 * makes debugging easier when gets_cumulative does
461 			 * not include gets_null.
462 			 */
463 			++cpucache->gets_null;
464 			--cpucache->gets_cumulative;
465 			crit_exit();
466 		}
467 		return(obj);
468 	}
469 	if (oc->exhausted == 0) {
470 		kprintf("Warning, objcache(%s): Exhausted!\n", oc->name);
471 		oc->exhausted = 1;
472 	}
473 
474 	/*
475 	 * Otherwise block if allowed to.
476 	 */
477 	if ((ocflags & (M_WAITOK|M_NULLOK)) == M_WAITOK) {
478 		++cpucache->waiting;
479 		++depot->waiting;
480 		ssleep(depot, &depot->spin, 0, "objcache_get", 0);
481 		--cpucache->waiting;
482 		--depot->waiting;
483 		spin_unlock(&depot->spin);
484 		goto retry;
485 	}
486 
487 	/*
488 	 * Otherwise fail
489 	 */
490 	++cpucache->gets_null;
491 	--cpucache->gets_cumulative;
492 	crit_exit();
493 	spin_unlock(&depot->spin);
494 	return (NULL);
495 }
496 
497 /*
498  * Wrapper for malloc allocation routines.
499  */
500 void *
501 objcache_malloc_alloc(void *allocator_args, int ocflags)
502 {
503 	struct objcache_malloc_args *alloc_args = allocator_args;
504 
505 	return (kmalloc(alloc_args->objsize, alloc_args->mtype,
506 		       ocflags & OC_MFLAGS));
507 }
508 
509 /*
510  * Wrapper for malloc allocation routines, with initial zeroing
511  * (but objects are not zerod on reuse from cache).
512  */
513 void *
514 objcache_malloc_alloc_zero(void *allocator_args, int ocflags)
515 {
516 	struct objcache_malloc_args *alloc_args = allocator_args;
517 
518 	return (kmalloc(alloc_args->objsize, alloc_args->mtype,
519 		       (ocflags & OC_MFLAGS) | M_ZERO));
520 }
521 
522 
523 void
524 objcache_malloc_free(void *obj, void *allocator_args)
525 {
526 	struct objcache_malloc_args *alloc_args = allocator_args;
527 
528 	kfree(obj, alloc_args->mtype);
529 }
530 
531 /*
532  * Wrapper for allocation policies that pre-allocate at initialization time
533  * and don't do run-time allocation.
534  */
535 void *
536 objcache_nop_alloc(void *allocator_args, int ocflags)
537 {
538 	return (NULL);
539 }
540 
541 void
542 objcache_nop_free(void *obj, void *allocator_args)
543 {
544 }
545 
546 /*
547  * Return an object to the object cache.
548  */
549 void
550 objcache_put(struct objcache *oc, void *obj)
551 {
552 	struct percpu_objcache *cpucache = &oc->cache_percpu[mycpuid];
553 	struct magazine *loadedmag;
554 	struct magazinedepot *depot;
555 
556 	crit_enter();
557 	++cpucache->puts_cumulative;
558 
559 	if (CLUSTER_OF(obj) != myclusterid) {
560 #ifdef notyet
561 		/* use lazy IPI to send object to owning cluster XXX todo */
562 		++cpucache->puts_othercluster;
563 		crit_exit();
564 		return;
565 #endif
566 	}
567 
568 retry:
569 	/*
570 	 * Free slot available in loaded magazine.  This is the hot path.
571 	 * It is lock-free and uses a critical section to block out interrupt
572 	 * handlers on the same processor.
573 	 */
574 	loadedmag = cpucache->loaded_magazine;
575 	if (!MAGAZINE_FULL(loadedmag)) {
576 		loadedmag->objects[loadedmag->rounds++] = obj;
577 		if (cpucache->waiting)
578 			wakeup_mycpu(&oc->depot[myclusterid]);
579 		crit_exit();
580 		return;
581 	}
582 
583 	/*
584 	 * Current magazine full, but previous magazine has room.  XXX
585 	 */
586 	if (!MAGAZINE_FULL(cpucache->previous_magazine)) {
587 		swap(cpucache->loaded_magazine, cpucache->previous_magazine);
588 		loadedmag = cpucache->loaded_magazine;
589 		loadedmag->objects[loadedmag->rounds++] = obj;
590 		if (cpucache->waiting)
591 			wakeup_mycpu(&oc->depot[myclusterid]);
592 		crit_exit();
593 		return;
594 	}
595 
596 	/*
597 	 * Both magazines full.  Get an empty magazine from the depot and
598 	 * move a full loaded magazine to the depot.  Even though the
599 	 * magazine may wind up with space available after we block on
600 	 * the spinlock, we still cycle it through to avoid the non-optimal
601 	 * corner-case.
602 	 *
603 	 * Obtain the depot spinlock.
604 	 */
605 	depot = &oc->depot[myclusterid];
606 	spin_lock(&depot->spin);
607 
608 	/*
609 	 * If an empty magazine is available in the depot, cycle it
610 	 * through and retry.
611 	 */
612 	if (!SLIST_EMPTY(&depot->emptymagazines)) {
613 		loadedmag = cpucache->previous_magazine;
614 		cpucache->previous_magazine = cpucache->loaded_magazine;
615 		cpucache->loaded_magazine = SLIST_FIRST(&depot->emptymagazines);
616 		SLIST_REMOVE_HEAD(&depot->emptymagazines, nextmagazine);
617 
618 		/*
619 		 * Return loadedmag to the depot.  Due to blocking it may
620 		 * not be entirely full and could even be empty.
621 		 */
622 		if (MAGAZINE_EMPTY(loadedmag)) {
623 			SLIST_INSERT_HEAD(&depot->emptymagazines,
624 					  loadedmag, nextmagazine);
625 			spin_unlock(&depot->spin);
626 		} else {
627 			SLIST_INSERT_HEAD(&depot->fullmagazines,
628 					  loadedmag, nextmagazine);
629 			spin_unlock(&depot->spin);
630 			if (depot->waiting)
631 				wakeup(depot);
632 		}
633 		goto retry;
634 	}
635 
636 	/*
637 	 * An empty mag is not available.  This is a corner case which can
638 	 * occur due to cpus holding partially full magazines.  Do not try
639 	 * to allocate a mag, just free the object.
640 	 */
641 	++depot->unallocated_objects;
642 	spin_unlock(&depot->spin);
643 	if (depot->waiting)
644 		wakeup(depot);
645 	crit_exit();
646 	oc->dtor(obj, oc->privdata);
647 	oc->free(obj, oc->allocator_args);
648 }
649 
650 /*
651  * The object is being put back into the cache, but the caller has
652  * indicated that the object is not in any shape to be reused and should
653  * be dtor'd immediately.
654  */
655 void
656 objcache_dtor(struct objcache *oc, void *obj)
657 {
658 	struct magazinedepot *depot;
659 
660 	depot = &oc->depot[myclusterid];
661 	spin_lock(&depot->spin);
662 	++depot->unallocated_objects;
663 	spin_unlock(&depot->spin);
664 	if (depot->waiting)
665 		wakeup(depot);
666 	oc->dtor(obj, oc->privdata);
667 	oc->free(obj, oc->allocator_args);
668 }
669 
670 /*
671  * Deallocate all objects in a magazine and free the magazine if requested.
672  * When freeit is TRUE the magazine must already be disassociated from the
673  * depot.
674  *
675  * Must be called with a critical section held when called with a per-cpu
676  * magazine.  The magazine may be indirectly modified during the loop.
677  *
678  * If the magazine moves during a dtor the operation is aborted.  This is
679  * only allowed when freeit is FALSE.
680  *
681  * The number of objects freed is returned.
682  */
683 static int
684 mag_purge(struct objcache *oc, struct magazine **magp, int freeit)
685 {
686 	struct magazine *mag = *magp;
687 	int count;
688 	void *obj;
689 
690 	count = 0;
691 	while (mag->rounds) {
692 		obj = mag->objects[--mag->rounds];
693 		oc->dtor(obj, oc->privdata);		/* MAY BLOCK */
694 		oc->free(obj, oc->allocator_args);	/* MAY BLOCK */
695 		++count;
696 
697 		/*
698 		 * Cycle for interrupts.
699 		 */
700 		if ((count & 15) == 0) {
701 			crit_exit();
702 			crit_enter();
703 		}
704 
705 		/*
706 		 * mag may have become invalid either due to dtor/free
707 		 * blocking or interrupt cycling, do not derefernce it
708 		 * until we check.
709 		 */
710 		if (*magp != mag) {
711 			kprintf("mag_purge: mag ripped out\n");
712 			break;
713 		}
714 	}
715 	if (freeit) {
716 		KKASSERT(*magp == mag);
717 		*magp = NULL;
718 		kfree(mag, M_OBJMAG);
719 	}
720 	return(count);
721 }
722 
723 /*
724  * Disassociate zero or more magazines from a magazine list associated with
725  * the depot, update the depot, and move the magazines to a temporary
726  * list.
727  *
728  * The caller must check the depot for waiters and wake it up, typically
729  * after disposing of the magazines this function loads onto the temporary
730  * list.
731  */
732 static void
733 maglist_disassociate(struct magazinedepot *depot, struct magazinelist *maglist,
734 		     struct magazinelist *tmplist, boolean_t purgeall)
735 {
736 	struct magazine *mag;
737 
738 	while ((mag = SLIST_FIRST(maglist)) != NULL) {
739 		SLIST_REMOVE_HEAD(maglist, nextmagazine);
740 		SLIST_INSERT_HEAD(tmplist, mag, nextmagazine);
741 		depot->unallocated_objects += mag->rounds;
742 	}
743 }
744 
745 /*
746  * Deallocate all magazines and their contents from the passed temporary
747  * list.  The magazines have already been accounted for by their depots.
748  *
749  * The total number of rounds freed is returned.  This number is typically
750  * only used to determine whether a wakeup on the depot is needed or not.
751  */
752 static int
753 maglist_purge(struct objcache *oc, struct magazinelist *maglist)
754 {
755 	struct magazine *mag;
756 	int count = 0;
757 
758 	/*
759 	 * can't use SLIST_FOREACH because blocking releases the depot
760 	 * spinlock
761 	 */
762 	crit_enter();
763 	while ((mag = SLIST_FIRST(maglist)) != NULL) {
764 		SLIST_REMOVE_HEAD(maglist, nextmagazine);
765 		count += mag_purge(oc, &mag, TRUE);
766 	}
767 	crit_exit();
768 	return(count);
769 }
770 
771 /*
772  * De-allocates all magazines on the full and empty magazine lists.
773  *
774  * Because this routine is called with a spinlock held, the magazines
775  * can only be disassociated and moved to a temporary list, not freed.
776  *
777  * The caller is responsible for freeing the magazines.
778  */
779 static void
780 depot_disassociate(struct magazinedepot *depot, struct magazinelist *tmplist)
781 {
782 	maglist_disassociate(depot, &depot->fullmagazines, tmplist, TRUE);
783 	maglist_disassociate(depot, &depot->emptymagazines, tmplist, TRUE);
784 }
785 
786 #ifdef notneeded
787 void
788 objcache_reclaim(struct objcache *oc)
789 {
790 	struct percpu_objcache *cache_percpu = &oc->cache_percpu[myclusterid];
791 	struct magazinedepot *depot = &oc->depot[myclusterid];
792 	struct magazinelist tmplist;
793 	int count;
794 
795 	SLIST_INIT(&tmplist);
796 	crit_enter();
797 	count = mag_purge(oc, &cache_percpu->loaded_magazine, FALSE);
798 	count += mag_purge(oc, &cache_percpu->previous_magazine, FALSE);
799 	crit_exit();
800 
801 	spin_lock(&depot->spin);
802 	depot->unallocated_objects += count;
803 	depot_disassociate(depot, &tmplist);
804 	spin_unlock(&depot->spin);
805 	count += maglist_purge(oc, &tmplist);
806 	if (count && depot->waiting)
807 		wakeup(depot);
808 }
809 #endif
810 
811 /*
812  * Try to free up some memory.  Return as soon as some free memory is found.
813  * For each object cache on the reclaim list, first try the current per-cpu
814  * cache, then the full magazine depot.
815  */
816 boolean_t
817 objcache_reclaimlist(struct objcache *oclist[], int nlist, int ocflags)
818 {
819 	struct objcache *oc;
820 	struct percpu_objcache *cpucache;
821 	struct magazinedepot *depot;
822 	struct magazinelist tmplist;
823 	int i, count;
824 
825 	kprintf("objcache_reclaimlist\n");
826 
827 	SLIST_INIT(&tmplist);
828 
829 	for (i = 0; i < nlist; i++) {
830 		oc = oclist[i];
831 		cpucache = &oc->cache_percpu[mycpuid];
832 		depot = &oc->depot[myclusterid];
833 
834 		crit_enter();
835 		count = mag_purge(oc, &cpucache->loaded_magazine, FALSE);
836 		if (count == 0)
837 			count += mag_purge(oc, &cpucache->previous_magazine, FALSE);
838 		crit_exit();
839 		if (count > 0) {
840 			spin_lock(&depot->spin);
841 			depot->unallocated_objects += count;
842 			spin_unlock(&depot->spin);
843 			if (depot->waiting)
844 				wakeup(depot);
845 			return (TRUE);
846 		}
847 		spin_lock(&depot->spin);
848 		maglist_disassociate(depot, &depot->fullmagazines,
849 				     &tmplist, FALSE);
850 		spin_unlock(&depot->spin);
851 		count = maglist_purge(oc, &tmplist);
852 		if (count > 0) {
853 			if (depot->waiting)
854 				wakeup(depot);
855 			return (TRUE);
856 		}
857 	}
858 	return (FALSE);
859 }
860 
861 /*
862  * Destroy an object cache.  Must have no existing references.
863  */
864 void
865 objcache_destroy(struct objcache *oc)
866 {
867 	struct percpu_objcache *cache_percpu;
868 	struct magazinedepot *depot;
869 	int clusterid, cpuid;
870 	struct magazinelist tmplist;
871 
872 	spin_lock(&objcachelist_spin);
873 	LIST_REMOVE(oc, oc_next);
874 	spin_unlock(&objcachelist_spin);
875 
876 	SLIST_INIT(&tmplist);
877 	for (clusterid = 0; clusterid < MAXCLUSTERS; clusterid++) {
878 		depot = &oc->depot[clusterid];
879 		spin_lock(&depot->spin);
880 		depot_disassociate(depot, &tmplist);
881 		spin_unlock(&depot->spin);
882 	}
883 	maglist_purge(oc, &tmplist);
884 
885 	for (cpuid = 0; cpuid < ncpus; cpuid++) {
886 		cache_percpu = &oc->cache_percpu[cpuid];
887 
888 		crit_enter();
889 		mag_purge(oc, &cache_percpu->loaded_magazine, TRUE);
890 		mag_purge(oc, &cache_percpu->previous_magazine, TRUE);
891 		crit_exit();
892 		cache_percpu->loaded_magazine = NULL;
893 		cache_percpu->previous_magazine = NULL;
894 		/* don't bother adjusting depot->unallocated_objects */
895 	}
896 
897 	kfree(oc->name, M_TEMP);
898 	kfree(oc, M_OBJCACHE);
899 }
900 
901 #if 0
902 /*
903  * Populate the per-cluster depot with elements from a linear block
904  * of memory.  Must be called for individually for each cluster.
905  * Populated depots should not be destroyed.
906  */
907 void
908 objcache_populate_linear(struct objcache *oc, void *base, int nelts, int size)
909 {
910 	char *p = base;
911 	char *end = (char *)base + (nelts * size);
912 	struct magazinedepot *depot = &oc->depot[myclusterid];
913 	struct magazine *emptymag = mag_alloc(depot->magcapcity);
914 
915 	while (p < end) {
916 		emptymag->objects[emptymag->rounds++] = p;
917 		if (MAGAZINE_FULL(emptymag)) {
918 			spin_lock_wr(&depot->spin);
919 			SLIST_INSERT_HEAD(&depot->fullmagazines, emptymag,
920 					  nextmagazine);
921 			depot->unallocated_objects += emptymag->rounds;
922 			spin_unlock_wr(&depot->spin);
923 			if (depot->waiting)
924 				wakeup(depot);
925 			emptymag = mag_alloc(depot->magcapacity);
926 		}
927 		p += size;
928 	}
929 	if (MAGAZINE_EMPTY(emptymag)) {
930 		crit_enter();
931 		mag_purge(oc, &emptymag, TRUE);
932 		crit_exit();
933 	} else {
934 		spin_lock_wr(&depot->spin);
935 		SLIST_INSERT_HEAD(&depot->fullmagazines, emptymag,
936 				  nextmagazine);
937 		depot->unallocated_objects += emptymag->rounds;
938 		spin_unlock_wr(&depot->spin);
939 		if (depot->waiting)
940 			wakeup(depot);
941 		emptymag = mag_alloc(depot->magcapacity);
942 	}
943 }
944 #endif
945 
946 #if 0
947 /*
948  * Check depot contention once a minute.
949  * 2 contested locks per second allowed.
950  */
951 static int objcache_rebalance_period;
952 static const int objcache_contention_rate = 120;
953 static struct callout objcache_callout;
954 
955 #define MAXMAGSIZE 512
956 
957 /*
958  * Check depot contention and increase magazine size if necessary.
959  */
960 static void
961 objcache_timer(void *dummy)
962 {
963 	struct objcache *oc;
964 	struct magazinedepot *depot;
965 	struct magazinelist tmplist;
966 
967 	XXX we need to detect when an objcache is destroyed out from under
968 	    us XXX
969 
970 	SLIST_INIT(&tmplist);
971 
972 	spin_lock_wr(&objcachelist_spin);
973 	LIST_FOREACH(oc, &allobjcaches, oc_next) {
974 		depot = &oc->depot[myclusterid];
975 		if (depot->magcapacity < MAXMAGSIZE) {
976 			if (depot->contested > objcache_contention_rate) {
977 				spin_lock_wr(&depot->spin);
978 				depot_disassociate(depot, &tmplist);
979 				depot->magcapacity *= 2;
980 				spin_unlock_wr(&depot->spin);
981 				kprintf("objcache_timer: increasing cache %s"
982 				       " magsize to %d, contested %d times\n",
983 				    oc->name, depot->magcapacity,
984 				    depot->contested);
985 			}
986 			depot->contested = 0;
987 		}
988 		spin_unlock_wr(&objcachelist_spin);
989 		if (maglist_purge(oc, &tmplist) > 0 && depot->waiting)
990 			wakeup(depot);
991 		spin_lock_wr(&objcachelist_spin);
992 	}
993 	spin_unlock_wr(&objcachelist_spin);
994 
995 	callout_reset(&objcache_callout, objcache_rebalance_period,
996 		      objcache_timer, NULL);
997 }
998 
999 #endif
1000 
1001 static void
1002 objcache_init(void)
1003 {
1004 	spin_init(&objcachelist_spin, "objcachelist");
1005 
1006 	magazine_capmin = mag_capacity_align(MAGAZINE_CAPACITY_MIN);
1007 	magazine_capmax = mag_capacity_align(MAGAZINE_CAPACITY_MAX);
1008 	if (bootverbose) {
1009 		kprintf("objcache: magazine cap [%d, %d]\n",
1010 		    magazine_capmin, magazine_capmax);
1011 	}
1012 
1013 #if 0
1014 	callout_init_mp(&objcache_callout);
1015 	objcache_rebalance_period = 60 * hz;
1016 	callout_reset(&objcache_callout, objcache_rebalance_period,
1017 		      objcache_timer, NULL);
1018 #endif
1019 }
1020 SYSINIT(objcache, SI_BOOT2_OBJCACHE, SI_ORDER_FIRST, objcache_init, 0);
1021