xref: /dragonfly/sys/kern/kern_objcache.c (revision f7df6c8e)
1 /*
2  * Copyright (c) 2005 Jeffrey M. Hsu.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Jeffrey M. Hsu.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. Neither the name of The DragonFly Project nor the names of its
16  *    contributors may be used to endorse or promote products derived
17  *    from this software without specific, prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
22  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
23  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
24  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
25  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
26  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
27  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
29  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  */
32 
33 #include <sys/param.h>
34 #include <sys/kernel.h>
35 #include <sys/systm.h>
36 #include <sys/callout.h>
37 #include <sys/globaldata.h>
38 #include <sys/malloc.h>
39 #include <sys/queue.h>
40 #include <sys/objcache.h>
41 #include <sys/spinlock.h>
42 #include <sys/thread.h>
43 #include <sys/thread2.h>
44 #include <sys/spinlock2.h>
45 
46 static MALLOC_DEFINE(M_OBJCACHE, "objcache", "Object Cache");
47 static MALLOC_DEFINE(M_OBJMAG, "objcache magazine", "Object Cache Magazine");
48 
49 #define	INITIAL_MAG_CAPACITY	64
50 
51 struct magazine {
52 	int			 rounds;
53 	int			 capacity;
54 	SLIST_ENTRY(magazine)	 nextmagazine;
55 	void			*objects[];
56 };
57 
58 SLIST_HEAD(magazinelist, magazine);
59 
60 #define MAGAZINE_HDRSIZE	__offsetof(struct magazine, objects[0])
61 #define MAGAZINE_CAPACITY_MAX	128
62 #define MAGAZINE_CAPACITY_MIN	4
63 
64 /*
65  * per-cluster cache of magazines
66  *
67  * All fields in this structure are protected by the spinlock.
68  */
69 struct magazinedepot {
70 	/*
71 	 * The per-cpu object caches only exchanges completely full or
72 	 * completely empty magazines with the depot layer, so only have
73 	 * to cache these two types of magazines.
74 	 */
75 	struct magazinelist	fullmagazines;
76 	struct magazinelist	emptymagazines;
77 	int			magcapacity;
78 
79 	/* protect this structure */
80 	struct spinlock		spin;
81 
82 	/* magazines not yet allocated towards limit */
83 	int			unallocated_objects;
84 
85 	/* infrequently used fields */
86 	int			waiting;	/* waiting for another cpu to
87 						 * return a full magazine to
88 						 * the depot */
89 	int			contested;	/* depot contention count */
90 } __cachealign;
91 
92 /*
93  * per-cpu object cache
94  * All fields in this structure are protected by crit_enter().
95  */
96 struct percpu_objcache {
97 	struct magazine	*loaded_magazine;	/* active magazine */
98 	struct magazine	*previous_magazine;	/* backup magazine */
99 
100 	/* statistics */
101 	int		gets_cumulative;	/* total calls to get */
102 	int		gets_null;		/* objcache_get returned NULL */
103 	int		puts_cumulative;	/* total calls to put */
104 	int		puts_othercluster;	/* returned to other cluster */
105 
106 	/* infrequently used fields */
107 	int		waiting;	/* waiting for a thread on this cpu to
108 					 * return an obj to the per-cpu cache */
109 } __cachealign;
110 
111 /* only until we have NUMA cluster topology information XXX */
112 #define MAXCLUSTERS 1
113 #define myclusterid 0
114 #define CLUSTER_OF(obj) 0
115 
116 /*
117  * Two-level object cache consisting of NUMA cluster-level depots of
118  * fully loaded or completely empty magazines and cpu-level caches of
119  * individual objects.
120  */
121 struct objcache {
122 	char			*name;
123 
124 	/* object constructor and destructor from blank storage */
125 	objcache_ctor_fn	*ctor;
126 	objcache_dtor_fn	*dtor;
127 	void			*privdata;
128 
129 	/* interface to underlying allocator */
130 	objcache_alloc_fn	*alloc;
131 	objcache_free_fn	*free;
132 	void			*allocator_args;
133 
134 	LIST_ENTRY(objcache)	oc_next;
135 	int			exhausted;	/* oops */
136 
137 	/* NUMA-cluster level caches */
138 	struct magazinedepot	depot[MAXCLUSTERS];
139 
140 	struct percpu_objcache	cache_percpu[];		/* per-cpu caches */
141 };
142 
143 static struct spinlock objcachelist_spin;
144 static LIST_HEAD(objcachelist, objcache) allobjcaches;
145 static int magazine_capmin;
146 static int magazine_capmax;
147 
148 static struct magazine *
149 mag_alloc(int capacity)
150 {
151 	struct magazine *mag;
152 	int size;
153 
154 	size = __offsetof(struct magazine, objects[capacity]);
155 	KASSERT(size > 0 && (size & __VM_CACHELINE_MASK) == 0,
156 	    ("magazine size is not multiple cache line size"));
157 
158 	mag = kmalloc_cachealign(size, M_OBJMAG, M_INTWAIT | M_ZERO);
159 	mag->capacity = capacity;
160 	mag->rounds = 0;
161 	return (mag);
162 }
163 
164 static int
165 mag_capacity_align(int mag_capacity)
166 {
167 	int mag_size;
168 
169 	mag_size = __VM_CACHELINE_ALIGN(
170 	    __offsetof(struct magazine, objects[mag_capacity]));
171 	mag_capacity = (mag_size - MAGAZINE_HDRSIZE) / sizeof(void *);
172 
173 	return mag_capacity;
174 }
175 
176 /*
177  * Utility routine for objects that don't require any de-construction.
178  */
179 
180 static void
181 null_dtor(void *obj, void *privdata)
182 {
183 	/* do nothing */
184 }
185 
186 static boolean_t
187 null_ctor(void *obj, void *privdata, int ocflags)
188 {
189 	return TRUE;
190 }
191 
192 /*
193  * Create an object cache.
194  */
195 struct objcache *
196 objcache_create(const char *name, int cluster_limit, int nom_cache,
197 		objcache_ctor_fn *ctor, objcache_dtor_fn *dtor, void *privdata,
198 		objcache_alloc_fn *alloc, objcache_free_fn *free,
199 		void *allocator_args)
200 {
201 	struct objcache *oc;
202 	struct magazinedepot *depot;
203 	int cpuid;
204 	int nmagdepot;
205 	int mag_capacity;
206 	int i;
207 
208 	/*
209 	 * Allocate object cache structure
210 	 */
211 	oc = kmalloc_cachealign(
212 	    __offsetof(struct objcache, cache_percpu[ncpus]),
213 	    M_OBJCACHE, M_WAITOK | M_ZERO);
214 	oc->name = kstrdup(name, M_TEMP);
215 	oc->ctor = ctor ? ctor : null_ctor;
216 	oc->dtor = dtor ? dtor : null_dtor;
217 	oc->privdata = privdata;
218 	oc->alloc = alloc;
219 	oc->free = free;
220 	oc->allocator_args = allocator_args;
221 
222 	/*
223 	 * Initialize depot list(s).
224 	 */
225 	depot = &oc->depot[0];
226 
227 	spin_init(&depot->spin, "objcachedepot");
228 	SLIST_INIT(&depot->fullmagazines);
229 	SLIST_INIT(&depot->emptymagazines);
230 
231 	/*
232 	 * Figure out the nominal number of free objects to cache and
233 	 * the magazine capacity.  By default we want to cache up to
234 	 * half the cluster_limit.  If there is no cluster_limit then
235 	 * we want to cache up to 128 objects.
236 	 */
237 	if (nom_cache == 0)
238 		nom_cache = cluster_limit / 2;
239 	if (cluster_limit && nom_cache > cluster_limit)
240 		nom_cache = cluster_limit;
241 	if (nom_cache == 0)
242 		nom_cache = INITIAL_MAG_CAPACITY * 2;
243 
244 	/*
245 	 * Magazine capacity for 2 active magazines per cpu plus 2
246 	 * magazines in the depot.
247 	 */
248 	mag_capacity = mag_capacity_align(nom_cache / (ncpus + 1) / 2 + 1);
249 	if (mag_capacity > magazine_capmax)
250 		mag_capacity = magazine_capmax;
251 	else if (mag_capacity < magazine_capmin)
252 		mag_capacity = magazine_capmin;
253 	depot->magcapacity = mag_capacity;
254 
255 	/*
256 	 * The cluster_limit must be sufficient to have two magazines per
257 	 * cpu plus at least two magazines in the depot.  However, because
258 	 * partial magazines can stay on the cpus what we really need here
259 	 * is to specify the number of extra magazines we allocate for the
260 	 * depot.
261 	 */
262 	if (cluster_limit == 0) {
263 		depot->unallocated_objects = -1;
264 	} else {
265 		depot->unallocated_objects = ncpus * mag_capacity * 2 +
266 					     cluster_limit;
267 	}
268 
269 	/*
270 	 * Initialize per-cpu caches
271 	 */
272 	for (cpuid = 0; cpuid < ncpus; cpuid++) {
273 		struct percpu_objcache *cache_percpu = &oc->cache_percpu[cpuid];
274 
275 		cache_percpu->loaded_magazine = mag_alloc(mag_capacity);
276 		cache_percpu->previous_magazine = mag_alloc(mag_capacity);
277 	}
278 
279 	/*
280 	 * Compute how many empty magazines to place in the depot.  This
281 	 * determines the retained cache size and is based on nom_cache.
282 	 *
283 	 * The actual cache size is larger because there are two magazines
284 	 * for each cpu as well but those can be in any fill state so we
285 	 * just can't count them.
286 	 *
287 	 * There is a minimum of two magazines in the depot.
288 	 */
289 	nmagdepot = nom_cache / mag_capacity + 1;
290 	if (nmagdepot < 2)
291 		nmagdepot = 2;
292 
293 	/*
294 	 * Put empty magazines in depot
295 	 */
296 	for (i = 0; i < nmagdepot; i++) {
297 		struct magazine *mag = mag_alloc(mag_capacity);
298 		SLIST_INSERT_HEAD(&depot->emptymagazines, mag, nextmagazine);
299 	}
300 
301 	spin_lock(&objcachelist_spin);
302 	LIST_INSERT_HEAD(&allobjcaches, oc, oc_next);
303 	spin_unlock(&objcachelist_spin);
304 
305 	return (oc);
306 }
307 
308 struct objcache *
309 objcache_create_simple(malloc_type_t mtype, size_t objsize)
310 {
311 	struct objcache_malloc_args *margs;
312 	struct objcache *oc;
313 
314 	margs = kmalloc(sizeof(*margs), M_OBJCACHE, M_WAITOK|M_ZERO);
315 	margs->objsize = objsize;
316 	margs->mtype = mtype;
317 	oc = objcache_create(mtype->ks_shortdesc, 0, 0,
318 			     NULL, NULL, NULL,
319 			     objcache_malloc_alloc, objcache_malloc_free,
320 			     margs);
321 	return (oc);
322 }
323 
324 struct objcache *
325 objcache_create_mbacked(malloc_type_t mtype, size_t objsize,
326 			int cluster_limit, int nom_cache,
327 			objcache_ctor_fn *ctor, objcache_dtor_fn *dtor,
328 			void *privdata)
329 {
330 	struct objcache_malloc_args *margs;
331 	struct objcache *oc;
332 
333 	margs = kmalloc(sizeof(*margs), M_OBJCACHE, M_WAITOK|M_ZERO);
334 	margs->objsize = objsize;
335 	margs->mtype = mtype;
336 	oc = objcache_create(mtype->ks_shortdesc,
337 			     cluster_limit, nom_cache,
338 			     ctor, dtor, privdata,
339 			     objcache_malloc_alloc, objcache_malloc_free,
340 			     margs);
341 	return(oc);
342 }
343 
344 
345 #define MAGAZINE_EMPTY(mag)	(mag->rounds == 0)
346 #define MAGAZINE_NOTEMPTY(mag)	(mag->rounds != 0)
347 #define MAGAZINE_FULL(mag)	(mag->rounds == mag->capacity)
348 
349 #define	swap(x, y)	({ struct magazine *t = x; x = y; y = t; })
350 
351 /*
352  * Get an object from the object cache.
353  *
354  * WARNING!  ocflags are only used when we have to go to the underlying
355  * allocator, so we cannot depend on flags such as M_ZERO.
356  */
357 void *
358 objcache_get(struct objcache *oc, int ocflags)
359 {
360 	struct percpu_objcache *cpucache = &oc->cache_percpu[mycpuid];
361 	struct magazine *loadedmag;
362 	struct magazine *emptymag;
363 	void *obj;
364 	struct magazinedepot *depot;
365 
366 	KKASSERT((ocflags & M_ZERO) == 0);
367 	crit_enter();
368 	++cpucache->gets_cumulative;
369 
370 retry:
371 	/*
372 	 * Loaded magazine has an object.  This is the hot path.
373 	 * It is lock-free and uses a critical section to block
374 	 * out interrupt handlers on the same processor.
375 	 */
376 	loadedmag = cpucache->loaded_magazine;
377 	if (MAGAZINE_NOTEMPTY(loadedmag)) {
378 		obj = loadedmag->objects[--loadedmag->rounds];
379 		crit_exit();
380 		return (obj);
381 	}
382 
383 	/* Previous magazine has an object. */
384 	if (MAGAZINE_NOTEMPTY(cpucache->previous_magazine)) {
385 		swap(cpucache->loaded_magazine, cpucache->previous_magazine);
386 		loadedmag = cpucache->loaded_magazine;
387 		obj = loadedmag->objects[--loadedmag->rounds];
388 		crit_exit();
389 		return (obj);
390 	}
391 
392 	/*
393 	 * Both magazines empty.  Get a full magazine from the depot and
394 	 * move one of the empty ones to the depot.
395 	 *
396 	 * Obtain the depot spinlock.
397 	 *
398 	 * NOTE: Beyond this point, M_* flags are handled via oc->alloc()
399 	 */
400 	depot = &oc->depot[myclusterid];
401 	spin_lock(&depot->spin);
402 
403 	/*
404 	 * Recheck the cpucache after obtaining the depot spinlock.  This
405 	 * shouldn't be necessary now but don't take any chances.
406 	 */
407 	if (MAGAZINE_NOTEMPTY(cpucache->loaded_magazine) ||
408 	    MAGAZINE_NOTEMPTY(cpucache->previous_magazine)
409 	) {
410 		spin_unlock(&depot->spin);
411 		goto retry;
412 	}
413 
414 	/* Check if depot has a full magazine. */
415 	if (!SLIST_EMPTY(&depot->fullmagazines)) {
416 		emptymag = cpucache->previous_magazine;
417 		cpucache->previous_magazine = cpucache->loaded_magazine;
418 		cpucache->loaded_magazine = SLIST_FIRST(&depot->fullmagazines);
419 		SLIST_REMOVE_HEAD(&depot->fullmagazines, nextmagazine);
420 
421 		/*
422 		 * Return emptymag to the depot.
423 		 */
424 		KKASSERT(MAGAZINE_EMPTY(emptymag));
425 		SLIST_INSERT_HEAD(&depot->emptymagazines,
426 				  emptymag, nextmagazine);
427 		spin_unlock(&depot->spin);
428 		goto retry;
429 	}
430 
431 	/*
432 	 * The depot does not have any non-empty magazines.  If we have
433 	 * not hit our object limit we can allocate a new object using
434 	 * the back-end allocator.
435 	 *
436 	 * note: unallocated_objects can be initialized to -1, which has
437 	 * the effect of removing any allocation limits.
438 	 */
439 	if (depot->unallocated_objects) {
440 		--depot->unallocated_objects;
441 		spin_unlock(&depot->spin);
442 		crit_exit();
443 
444 		obj = oc->alloc(oc->allocator_args, ocflags);
445 		if (obj) {
446 			if (oc->ctor(obj, oc->privdata, ocflags))
447 				return (obj);
448 			oc->free(obj, oc->allocator_args);
449 			obj = NULL;
450 		}
451 		if (obj == NULL) {
452 			spin_lock(&depot->spin);
453 			++depot->unallocated_objects;
454 			spin_unlock(&depot->spin);
455 			if (depot->waiting)
456 				wakeup(depot);
457 
458 			crit_enter();
459 			/*
460 			 * makes debugging easier when gets_cumulative does
461 			 * not include gets_null.
462 			 */
463 			++cpucache->gets_null;
464 			--cpucache->gets_cumulative;
465 			crit_exit();
466 		}
467 		return(obj);
468 	}
469 	if (oc->exhausted == 0) {
470 		kprintf("Warning, objcache(%s): Exhausted!\n", oc->name);
471 		oc->exhausted = 1;
472 	}
473 
474 	/*
475 	 * Otherwise block if allowed to.
476 	 */
477 	if ((ocflags & (M_WAITOK|M_NULLOK)) == M_WAITOK) {
478 		++cpucache->waiting;
479 		++depot->waiting;
480 		ssleep(depot, &depot->spin, 0, "objcache_get", 0);
481 		--cpucache->waiting;
482 		--depot->waiting;
483 		spin_unlock(&depot->spin);
484 		goto retry;
485 	}
486 
487 	/*
488 	 * Otherwise fail
489 	 */
490 	++cpucache->gets_null;
491 	--cpucache->gets_cumulative;
492 	crit_exit();
493 	spin_unlock(&depot->spin);
494 	return (NULL);
495 }
496 
497 /*
498  * Wrapper for malloc allocation routines.
499  */
500 void *
501 objcache_malloc_alloc(void *allocator_args, int ocflags)
502 {
503 	struct objcache_malloc_args *alloc_args = allocator_args;
504 
505 	return (kmalloc(alloc_args->objsize, alloc_args->mtype,
506 		       ocflags & OC_MFLAGS));
507 }
508 
509 void
510 objcache_malloc_free(void *obj, void *allocator_args)
511 {
512 	struct objcache_malloc_args *alloc_args = allocator_args;
513 
514 	kfree(obj, alloc_args->mtype);
515 }
516 
517 /*
518  * Wrapper for allocation policies that pre-allocate at initialization time
519  * and don't do run-time allocation.
520  */
521 void *
522 objcache_nop_alloc(void *allocator_args, int ocflags)
523 {
524 	return (NULL);
525 }
526 
527 void
528 objcache_nop_free(void *obj, void *allocator_args)
529 {
530 }
531 
532 /*
533  * Return an object to the object cache.
534  */
535 void
536 objcache_put(struct objcache *oc, void *obj)
537 {
538 	struct percpu_objcache *cpucache = &oc->cache_percpu[mycpuid];
539 	struct magazine *loadedmag;
540 	struct magazinedepot *depot;
541 
542 	crit_enter();
543 	++cpucache->puts_cumulative;
544 
545 	if (CLUSTER_OF(obj) != myclusterid) {
546 #ifdef notyet
547 		/* use lazy IPI to send object to owning cluster XXX todo */
548 		++cpucache->puts_othercluster;
549 		crit_exit();
550 		return;
551 #endif
552 	}
553 
554 retry:
555 	/*
556 	 * Free slot available in loaded magazine.  This is the hot path.
557 	 * It is lock-free and uses a critical section to block out interrupt
558 	 * handlers on the same processor.
559 	 */
560 	loadedmag = cpucache->loaded_magazine;
561 	if (!MAGAZINE_FULL(loadedmag)) {
562 		loadedmag->objects[loadedmag->rounds++] = obj;
563 		if (cpucache->waiting)
564 			wakeup_mycpu(&oc->depot[myclusterid]);
565 		crit_exit();
566 		return;
567 	}
568 
569 	/*
570 	 * Current magazine full, but previous magazine has room.  XXX
571 	 */
572 	if (!MAGAZINE_FULL(cpucache->previous_magazine)) {
573 		swap(cpucache->loaded_magazine, cpucache->previous_magazine);
574 		loadedmag = cpucache->loaded_magazine;
575 		loadedmag->objects[loadedmag->rounds++] = obj;
576 		if (cpucache->waiting)
577 			wakeup_mycpu(&oc->depot[myclusterid]);
578 		crit_exit();
579 		return;
580 	}
581 
582 	/*
583 	 * Both magazines full.  Get an empty magazine from the depot and
584 	 * move a full loaded magazine to the depot.  Even though the
585 	 * magazine may wind up with space available after we block on
586 	 * the spinlock, we still cycle it through to avoid the non-optimal
587 	 * corner-case.
588 	 *
589 	 * Obtain the depot spinlock.
590 	 */
591 	depot = &oc->depot[myclusterid];
592 	spin_lock(&depot->spin);
593 
594 	/*
595 	 * If an empty magazine is available in the depot, cycle it
596 	 * through and retry.
597 	 */
598 	if (!SLIST_EMPTY(&depot->emptymagazines)) {
599 		loadedmag = cpucache->previous_magazine;
600 		cpucache->previous_magazine = cpucache->loaded_magazine;
601 		cpucache->loaded_magazine = SLIST_FIRST(&depot->emptymagazines);
602 		SLIST_REMOVE_HEAD(&depot->emptymagazines, nextmagazine);
603 
604 		/*
605 		 * Return loadedmag to the depot.  Due to blocking it may
606 		 * not be entirely full and could even be empty.
607 		 */
608 		if (MAGAZINE_EMPTY(loadedmag)) {
609 			SLIST_INSERT_HEAD(&depot->emptymagazines,
610 					  loadedmag, nextmagazine);
611 			spin_unlock(&depot->spin);
612 		} else {
613 			SLIST_INSERT_HEAD(&depot->fullmagazines,
614 					  loadedmag, nextmagazine);
615 			spin_unlock(&depot->spin);
616 			if (depot->waiting)
617 				wakeup(depot);
618 		}
619 		goto retry;
620 	}
621 
622 	/*
623 	 * An empty mag is not available.  This is a corner case which can
624 	 * occur due to cpus holding partially full magazines.  Do not try
625 	 * to allocate a mag, just free the object.
626 	 */
627 	++depot->unallocated_objects;
628 	spin_unlock(&depot->spin);
629 	if (depot->waiting)
630 		wakeup(depot);
631 	crit_exit();
632 	oc->dtor(obj, oc->privdata);
633 	oc->free(obj, oc->allocator_args);
634 }
635 
636 /*
637  * The object is being put back into the cache, but the caller has
638  * indicated that the object is not in any shape to be reused and should
639  * be dtor'd immediately.
640  */
641 void
642 objcache_dtor(struct objcache *oc, void *obj)
643 {
644 	struct magazinedepot *depot;
645 
646 	depot = &oc->depot[myclusterid];
647 	spin_lock(&depot->spin);
648 	++depot->unallocated_objects;
649 	spin_unlock(&depot->spin);
650 	if (depot->waiting)
651 		wakeup(depot);
652 	oc->dtor(obj, oc->privdata);
653 	oc->free(obj, oc->allocator_args);
654 }
655 
656 /*
657  * Deallocate all objects in a magazine and free the magazine if requested.
658  * When freeit is TRUE the magazine must already be disassociated from the
659  * depot.
660  *
661  * Must be called with a critical section held when called with a per-cpu
662  * magazine.  The magazine may be indirectly modified during the loop.
663  *
664  * If the magazine moves during a dtor the operation is aborted.  This is
665  * only allowed when freeit is FALSE.
666  *
667  * The number of objects freed is returned.
668  */
669 static int
670 mag_purge(struct objcache *oc, struct magazine **magp, int freeit)
671 {
672 	struct magazine *mag = *magp;
673 	int count;
674 	void *obj;
675 
676 	count = 0;
677 	while (mag->rounds) {
678 		obj = mag->objects[--mag->rounds];
679 		oc->dtor(obj, oc->privdata);		/* MAY BLOCK */
680 		oc->free(obj, oc->allocator_args);	/* MAY BLOCK */
681 		++count;
682 
683 		/*
684 		 * Cycle for interrupts.
685 		 */
686 		if ((count & 15) == 0) {
687 			crit_exit();
688 			crit_enter();
689 		}
690 
691 		/*
692 		 * mag may have become invalid either due to dtor/free
693 		 * blocking or interrupt cycling, do not derefernce it
694 		 * until we check.
695 		 */
696 		if (*magp != mag) {
697 			kprintf("mag_purge: mag ripped out\n");
698 			break;
699 		}
700 	}
701 	if (freeit) {
702 		KKASSERT(*magp == mag);
703 		*magp = NULL;
704 		kfree(mag, M_OBJMAG);
705 	}
706 	return(count);
707 }
708 
709 /*
710  * Disassociate zero or more magazines from a magazine list associated with
711  * the depot, update the depot, and move the magazines to a temporary
712  * list.
713  *
714  * The caller must check the depot for waiters and wake it up, typically
715  * after disposing of the magazines this function loads onto the temporary
716  * list.
717  */
718 static void
719 maglist_disassociate(struct magazinedepot *depot, struct magazinelist *maglist,
720 		     struct magazinelist *tmplist, boolean_t purgeall)
721 {
722 	struct magazine *mag;
723 
724 	while ((mag = SLIST_FIRST(maglist)) != NULL) {
725 		SLIST_REMOVE_HEAD(maglist, nextmagazine);
726 		SLIST_INSERT_HEAD(tmplist, mag, nextmagazine);
727 		depot->unallocated_objects += mag->rounds;
728 	}
729 }
730 
731 /*
732  * Deallocate all magazines and their contents from the passed temporary
733  * list.  The magazines have already been accounted for by their depots.
734  *
735  * The total number of rounds freed is returned.  This number is typically
736  * only used to determine whether a wakeup on the depot is needed or not.
737  */
738 static int
739 maglist_purge(struct objcache *oc, struct magazinelist *maglist)
740 {
741 	struct magazine *mag;
742 	int count = 0;
743 
744 	/*
745 	 * can't use SLIST_FOREACH because blocking releases the depot
746 	 * spinlock
747 	 */
748 	crit_enter();
749 	while ((mag = SLIST_FIRST(maglist)) != NULL) {
750 		SLIST_REMOVE_HEAD(maglist, nextmagazine);
751 		count += mag_purge(oc, &mag, TRUE);
752 	}
753 	crit_exit();
754 	return(count);
755 }
756 
757 /*
758  * De-allocates all magazines on the full and empty magazine lists.
759  *
760  * Because this routine is called with a spinlock held, the magazines
761  * can only be disassociated and moved to a temporary list, not freed.
762  *
763  * The caller is responsible for freeing the magazines.
764  */
765 static void
766 depot_disassociate(struct magazinedepot *depot, struct magazinelist *tmplist)
767 {
768 	maglist_disassociate(depot, &depot->fullmagazines, tmplist, TRUE);
769 	maglist_disassociate(depot, &depot->emptymagazines, tmplist, TRUE);
770 }
771 
772 #ifdef notneeded
773 void
774 objcache_reclaim(struct objcache *oc)
775 {
776 	struct percpu_objcache *cache_percpu = &oc->cache_percpu[myclusterid];
777 	struct magazinedepot *depot = &oc->depot[myclusterid];
778 	struct magazinelist tmplist;
779 	int count;
780 
781 	SLIST_INIT(&tmplist);
782 	crit_enter();
783 	count = mag_purge(oc, &cache_percpu->loaded_magazine, FALSE);
784 	count += mag_purge(oc, &cache_percpu->previous_magazine, FALSE);
785 	crit_exit();
786 
787 	spin_lock(&depot->spin);
788 	depot->unallocated_objects += count;
789 	depot_disassociate(depot, &tmplist);
790 	spin_unlock(&depot->spin);
791 	count += maglist_purge(oc, &tmplist);
792 	if (count && depot->waiting)
793 		wakeup(depot);
794 }
795 #endif
796 
797 /*
798  * Try to free up some memory.  Return as soon as some free memory is found.
799  * For each object cache on the reclaim list, first try the current per-cpu
800  * cache, then the full magazine depot.
801  */
802 boolean_t
803 objcache_reclaimlist(struct objcache *oclist[], int nlist, int ocflags)
804 {
805 	struct objcache *oc;
806 	struct percpu_objcache *cpucache;
807 	struct magazinedepot *depot;
808 	struct magazinelist tmplist;
809 	int i, count;
810 
811 	kprintf("objcache_reclaimlist\n");
812 
813 	SLIST_INIT(&tmplist);
814 
815 	for (i = 0; i < nlist; i++) {
816 		oc = oclist[i];
817 		cpucache = &oc->cache_percpu[mycpuid];
818 		depot = &oc->depot[myclusterid];
819 
820 		crit_enter();
821 		count = mag_purge(oc, &cpucache->loaded_magazine, FALSE);
822 		if (count == 0)
823 			count += mag_purge(oc, &cpucache->previous_magazine, FALSE);
824 		crit_exit();
825 		if (count > 0) {
826 			spin_lock(&depot->spin);
827 			depot->unallocated_objects += count;
828 			spin_unlock(&depot->spin);
829 			if (depot->waiting)
830 				wakeup(depot);
831 			return (TRUE);
832 		}
833 		spin_lock(&depot->spin);
834 		maglist_disassociate(depot, &depot->fullmagazines,
835 				     &tmplist, FALSE);
836 		spin_unlock(&depot->spin);
837 		count = maglist_purge(oc, &tmplist);
838 		if (count > 0) {
839 			if (depot->waiting)
840 				wakeup(depot);
841 			return (TRUE);
842 		}
843 	}
844 	return (FALSE);
845 }
846 
847 /*
848  * Destroy an object cache.  Must have no existing references.
849  */
850 void
851 objcache_destroy(struct objcache *oc)
852 {
853 	struct percpu_objcache *cache_percpu;
854 	struct magazinedepot *depot;
855 	int clusterid, cpuid;
856 	struct magazinelist tmplist;
857 
858 	spin_lock(&objcachelist_spin);
859 	LIST_REMOVE(oc, oc_next);
860 	spin_unlock(&objcachelist_spin);
861 
862 	SLIST_INIT(&tmplist);
863 	for (clusterid = 0; clusterid < MAXCLUSTERS; clusterid++) {
864 		depot = &oc->depot[clusterid];
865 		spin_lock(&depot->spin);
866 		depot_disassociate(depot, &tmplist);
867 		spin_unlock(&depot->spin);
868 	}
869 	maglist_purge(oc, &tmplist);
870 
871 	for (cpuid = 0; cpuid < ncpus; cpuid++) {
872 		cache_percpu = &oc->cache_percpu[cpuid];
873 
874 		crit_enter();
875 		mag_purge(oc, &cache_percpu->loaded_magazine, TRUE);
876 		mag_purge(oc, &cache_percpu->previous_magazine, TRUE);
877 		crit_exit();
878 		cache_percpu->loaded_magazine = NULL;
879 		cache_percpu->previous_magazine = NULL;
880 		/* don't bother adjusting depot->unallocated_objects */
881 	}
882 
883 	kfree(oc->name, M_TEMP);
884 	kfree(oc, M_OBJCACHE);
885 }
886 
887 #if 0
888 /*
889  * Populate the per-cluster depot with elements from a linear block
890  * of memory.  Must be called for individually for each cluster.
891  * Populated depots should not be destroyed.
892  */
893 void
894 objcache_populate_linear(struct objcache *oc, void *base, int nelts, int size)
895 {
896 	char *p = base;
897 	char *end = (char *)base + (nelts * size);
898 	struct magazinedepot *depot = &oc->depot[myclusterid];
899 	struct magazine *emptymag = mag_alloc(depot->magcapcity);
900 
901 	while (p < end) {
902 		emptymag->objects[emptymag->rounds++] = p;
903 		if (MAGAZINE_FULL(emptymag)) {
904 			spin_lock_wr(&depot->spin);
905 			SLIST_INSERT_HEAD(&depot->fullmagazines, emptymag,
906 					  nextmagazine);
907 			depot->unallocated_objects += emptymag->rounds;
908 			spin_unlock_wr(&depot->spin);
909 			if (depot->waiting)
910 				wakeup(depot);
911 			emptymag = mag_alloc(depot->magcapacity);
912 		}
913 		p += size;
914 	}
915 	if (MAGAZINE_EMPTY(emptymag)) {
916 		crit_enter();
917 		mag_purge(oc, &emptymag, TRUE);
918 		crit_exit();
919 	} else {
920 		spin_lock_wr(&depot->spin);
921 		SLIST_INSERT_HEAD(&depot->fullmagazines, emptymag,
922 				  nextmagazine);
923 		depot->unallocated_objects += emptymag->rounds;
924 		spin_unlock_wr(&depot->spin);
925 		if (depot->waiting)
926 			wakeup(depot);
927 		emptymag = mag_alloc(depot->magcapacity);
928 	}
929 }
930 #endif
931 
932 #if 0
933 /*
934  * Check depot contention once a minute.
935  * 2 contested locks per second allowed.
936  */
937 static int objcache_rebalance_period;
938 static const int objcache_contention_rate = 120;
939 static struct callout objcache_callout;
940 
941 #define MAXMAGSIZE 512
942 
943 /*
944  * Check depot contention and increase magazine size if necessary.
945  */
946 static void
947 objcache_timer(void *dummy)
948 {
949 	struct objcache *oc;
950 	struct magazinedepot *depot;
951 	struct magazinelist tmplist;
952 
953 	XXX we need to detect when an objcache is destroyed out from under
954 	    us XXX
955 
956 	SLIST_INIT(&tmplist);
957 
958 	spin_lock_wr(&objcachelist_spin);
959 	LIST_FOREACH(oc, &allobjcaches, oc_next) {
960 		depot = &oc->depot[myclusterid];
961 		if (depot->magcapacity < MAXMAGSIZE) {
962 			if (depot->contested > objcache_contention_rate) {
963 				spin_lock_wr(&depot->spin);
964 				depot_disassociate(depot, &tmplist);
965 				depot->magcapacity *= 2;
966 				spin_unlock_wr(&depot->spin);
967 				kprintf("objcache_timer: increasing cache %s"
968 				       " magsize to %d, contested %d times\n",
969 				    oc->name, depot->magcapacity,
970 				    depot->contested);
971 			}
972 			depot->contested = 0;
973 		}
974 		spin_unlock_wr(&objcachelist_spin);
975 		if (maglist_purge(oc, &tmplist) > 0 && depot->waiting)
976 			wakeup(depot);
977 		spin_lock_wr(&objcachelist_spin);
978 	}
979 	spin_unlock_wr(&objcachelist_spin);
980 
981 	callout_reset(&objcache_callout, objcache_rebalance_period,
982 		      objcache_timer, NULL);
983 }
984 
985 #endif
986 
987 static void
988 objcache_init(void)
989 {
990 	spin_init(&objcachelist_spin, "objcachelist");
991 
992 	magazine_capmin = mag_capacity_align(MAGAZINE_CAPACITY_MIN);
993 	magazine_capmax = mag_capacity_align(MAGAZINE_CAPACITY_MAX);
994 	if (bootverbose) {
995 		kprintf("objcache: magazine cap [%d, %d]\n",
996 		    magazine_capmin, magazine_capmax);
997 	}
998 
999 #if 0
1000 	callout_init_mp(&objcache_callout);
1001 	objcache_rebalance_period = 60 * hz;
1002 	callout_reset(&objcache_callout, objcache_rebalance_period,
1003 		      objcache_timer, NULL);
1004 #endif
1005 }
1006 SYSINIT(objcache, SI_BOOT2_OBJCACHE, SI_ORDER_FIRST, objcache_init, 0);
1007