xref: /dragonfly/sys/kern/kern_objcache.c (revision 8f8e1daf)
1 /*
2  * Copyright (c) 2005 Jeffrey M. Hsu.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Jeffrey M. Hsu.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. Neither the name of The DragonFly Project nor the names of its
16  *    contributors may be used to endorse or promote products derived
17  *    from this software without specific, prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
22  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
23  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
24  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
25  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
26  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
27  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
29  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  *
32  * $DragonFly: src/sys/kern/kern_objcache.c,v 1.23 2008/10/26 04:29:19 sephe Exp $
33  */
34 
35 #include <sys/param.h>
36 #include <sys/kernel.h>
37 #include <sys/systm.h>
38 #include <sys/callout.h>
39 #include <sys/globaldata.h>
40 #include <sys/malloc.h>
41 #include <sys/queue.h>
42 #include <sys/objcache.h>
43 #include <sys/spinlock.h>
44 #include <sys/thread.h>
45 #include <sys/thread2.h>
46 #include <sys/spinlock2.h>
47 
48 static MALLOC_DEFINE(M_OBJCACHE, "objcache", "Object Cache");
49 static MALLOC_DEFINE(M_OBJMAG, "objcache magazine", "Object Cache Magazine");
50 
51 #define	INITIAL_MAG_CAPACITY	64
52 
53 struct magazine {
54 	int			 rounds;
55 	int			 capacity;
56 	SLIST_ENTRY(magazine)	 nextmagazine;
57 	void			*objects[];
58 };
59 
60 SLIST_HEAD(magazinelist, magazine);
61 
62 #define MAGAZINE_HDRSIZE	__offsetof(struct magazine, objects[0])
63 #define MAGAZINE_CAPACITY_MAX	128
64 #define MAGAZINE_CAPACITY_MIN	4
65 
66 /*
67  * per-cluster cache of magazines
68  *
69  * All fields in this structure are protected by the spinlock.
70  */
71 struct magazinedepot {
72 	/*
73 	 * The per-cpu object caches only exchanges completely full or
74 	 * completely empty magazines with the depot layer, so only have
75 	 * to cache these two types of magazines.
76 	 */
77 	struct magazinelist	fullmagazines;
78 	struct magazinelist	emptymagazines;
79 	int			magcapacity;
80 
81 	/* protect this structure */
82 	struct spinlock		spin;
83 
84 	/* magazines not yet allocated towards limit */
85 	int			unallocated_objects;
86 
87 	/* infrequently used fields */
88 	int			waiting;	/* waiting for another cpu to
89 						 * return a full magazine to
90 						 * the depot */
91 	int			contested;	/* depot contention count */
92 } __cachealign;
93 
94 /*
95  * per-cpu object cache
96  * All fields in this structure are protected by crit_enter().
97  */
98 struct percpu_objcache {
99 	struct magazine	*loaded_magazine;	/* active magazine */
100 	struct magazine	*previous_magazine;	/* backup magazine */
101 
102 	/* statistics */
103 	int		gets_cumulative;	/* total calls to get */
104 	int		gets_null;		/* objcache_get returned NULL */
105 	int		puts_cumulative;	/* total calls to put */
106 	int		puts_othercluster;	/* returned to other cluster */
107 
108 	/* infrequently used fields */
109 	int		waiting;	/* waiting for a thread on this cpu to
110 					 * return an obj to the per-cpu cache */
111 } __cachealign;
112 
113 /* only until we have NUMA cluster topology information XXX */
114 #define MAXCLUSTERS 1
115 #define myclusterid 0
116 #define CLUSTER_OF(obj) 0
117 
118 /*
119  * Two-level object cache consisting of NUMA cluster-level depots of
120  * fully loaded or completely empty magazines and cpu-level caches of
121  * individual objects.
122  */
123 struct objcache {
124 	char			*name;
125 
126 	/* object constructor and destructor from blank storage */
127 	objcache_ctor_fn	*ctor;
128 	objcache_dtor_fn	*dtor;
129 	void			*privdata;
130 
131 	/* interface to underlying allocator */
132 	objcache_alloc_fn	*alloc;
133 	objcache_free_fn	*free;
134 	void			*allocator_args;
135 
136 	LIST_ENTRY(objcache)	oc_next;
137 	int			exhausted;	/* oops */
138 
139 	/* NUMA-cluster level caches */
140 	struct magazinedepot	depot[MAXCLUSTERS];
141 
142 	struct percpu_objcache	cache_percpu[];		/* per-cpu caches */
143 };
144 
145 static struct spinlock objcachelist_spin;
146 static LIST_HEAD(objcachelist, objcache) allobjcaches;
147 static int magazine_capmin;
148 static int magazine_capmax;
149 
150 static struct magazine *
151 mag_alloc(int capacity)
152 {
153 	struct magazine *mag;
154 	int size;
155 
156 	size = __offsetof(struct magazine, objects[capacity]);
157 	KASSERT(size > 0 && (size & __VM_CACHELINE_MASK) == 0,
158 	    ("magazine size is not multiple cache line size"));
159 
160 	mag = kmalloc_cachealign(size, M_OBJMAG, M_INTWAIT | M_ZERO);
161 	mag->capacity = capacity;
162 	mag->rounds = 0;
163 	return (mag);
164 }
165 
166 static int
167 mag_capacity_align(int mag_capacity)
168 {
169 	int mag_size;
170 
171 	mag_size = __VM_CACHELINE_ALIGN(
172 	    __offsetof(struct magazine, objects[mag_capacity]));
173 	mag_capacity = (mag_size - MAGAZINE_HDRSIZE) / sizeof(void *);
174 
175 	return mag_capacity;
176 }
177 
178 /*
179  * Utility routine for objects that don't require any de-construction.
180  */
181 
182 static void
183 null_dtor(void *obj, void *privdata)
184 {
185 	/* do nothing */
186 }
187 
188 static boolean_t
189 null_ctor(void *obj, void *privdata, int ocflags)
190 {
191 	return TRUE;
192 }
193 
194 /*
195  * Create an object cache.
196  */
197 struct objcache *
198 objcache_create(const char *name, int cluster_limit, int nom_cache,
199 		objcache_ctor_fn *ctor, objcache_dtor_fn *dtor, void *privdata,
200 		objcache_alloc_fn *alloc, objcache_free_fn *free,
201 		void *allocator_args)
202 {
203 	struct objcache *oc;
204 	struct magazinedepot *depot;
205 	int cpuid;
206 	int nmagdepot;
207 	int mag_capacity;
208 	int i;
209 
210 	/*
211 	 * Allocate object cache structure
212 	 */
213 	oc = kmalloc_cachealign(
214 	    __offsetof(struct objcache, cache_percpu[ncpus]),
215 	    M_OBJCACHE, M_WAITOK | M_ZERO);
216 	oc->name = kstrdup(name, M_TEMP);
217 	oc->ctor = ctor ? ctor : null_ctor;
218 	oc->dtor = dtor ? dtor : null_dtor;
219 	oc->privdata = privdata;
220 	oc->alloc = alloc;
221 	oc->free = free;
222 	oc->allocator_args = allocator_args;
223 
224 	/*
225 	 * Initialize depot list(s).
226 	 */
227 	depot = &oc->depot[0];
228 
229 	spin_init(&depot->spin);
230 	SLIST_INIT(&depot->fullmagazines);
231 	SLIST_INIT(&depot->emptymagazines);
232 
233 	/*
234 	 * Figure out the nominal number of free objects to cache and
235 	 * the magazine capacity.  By default we want to cache up to
236 	 * half the cluster_limit.  If there is no cluster_limit then
237 	 * we want to cache up to 128 objects.
238 	 */
239 	if (nom_cache == 0)
240 		nom_cache = cluster_limit / 2;
241 	if (cluster_limit && nom_cache > cluster_limit)
242 		nom_cache = cluster_limit;
243 	if (nom_cache == 0)
244 		nom_cache = INITIAL_MAG_CAPACITY * 2;
245 
246 	/*
247 	 * Magazine capacity for 2 active magazines per cpu plus 2
248 	 * magazines in the depot.
249 	 */
250 	mag_capacity = mag_capacity_align(nom_cache / (ncpus + 1) / 2 + 1);
251 	if (mag_capacity > magazine_capmax)
252 		mag_capacity = magazine_capmax;
253 	else if (mag_capacity < magazine_capmin)
254 		mag_capacity = magazine_capmin;
255 	depot->magcapacity = mag_capacity;
256 
257 	/*
258 	 * The cluster_limit must be sufficient to have two magazines per
259 	 * cpu plus at least two magazines in the depot.  However, because
260 	 * partial magazines can stay on the cpus what we really need here
261 	 * is to specify the number of extra magazines we allocate for the
262 	 * depot.
263 	 */
264 	if (cluster_limit == 0) {
265 		depot->unallocated_objects = -1;
266 	} else {
267 		depot->unallocated_objects = ncpus * mag_capacity * 2 +
268 					     cluster_limit;
269 	}
270 
271 	/*
272 	 * Initialize per-cpu caches
273 	 */
274 	for (cpuid = 0; cpuid < ncpus; cpuid++) {
275 		struct percpu_objcache *cache_percpu = &oc->cache_percpu[cpuid];
276 
277 		cache_percpu->loaded_magazine = mag_alloc(mag_capacity);
278 		cache_percpu->previous_magazine = mag_alloc(mag_capacity);
279 	}
280 
281 	/*
282 	 * Compute how many empty magazines to place in the depot.  This
283 	 * determines the retained cache size and is based on nom_cache.
284 	 *
285 	 * The actual cache size is larger because there are two magazines
286 	 * for each cpu as well but those can be in any fill state so we
287 	 * just can't count them.
288 	 *
289 	 * There is a minimum of two magazines in the depot.
290 	 */
291 	nmagdepot = nom_cache / mag_capacity + 1;
292 	if (nmagdepot < 2)
293 		nmagdepot = 2;
294 	if (bootverbose) {
295 		kprintf("ndepotmags=%-3d x mag_cap=%-3d for %s\n",
296 			nmagdepot, mag_capacity, name);
297 	}
298 
299 	/*
300 	 * Put empty magazines in depot
301 	 */
302 	for (i = 0; i < nmagdepot; i++) {
303 		struct magazine *mag = mag_alloc(mag_capacity);
304 		SLIST_INSERT_HEAD(&depot->emptymagazines, mag, nextmagazine);
305 	}
306 
307 	spin_lock(&objcachelist_spin);
308 	LIST_INSERT_HEAD(&allobjcaches, oc, oc_next);
309 	spin_unlock(&objcachelist_spin);
310 
311 	return (oc);
312 }
313 
314 struct objcache *
315 objcache_create_simple(malloc_type_t mtype, size_t objsize)
316 {
317 	struct objcache_malloc_args *margs;
318 	struct objcache *oc;
319 
320 	margs = kmalloc(sizeof(*margs), M_OBJCACHE, M_WAITOK|M_ZERO);
321 	margs->objsize = objsize;
322 	margs->mtype = mtype;
323 	oc = objcache_create(mtype->ks_shortdesc, 0, 0,
324 			     NULL, NULL, NULL,
325 			     objcache_malloc_alloc, objcache_malloc_free,
326 			     margs);
327 	return (oc);
328 }
329 
330 struct objcache *
331 objcache_create_mbacked(malloc_type_t mtype, size_t objsize,
332 			int cluster_limit, int nom_cache,
333 			objcache_ctor_fn *ctor, objcache_dtor_fn *dtor,
334 			void *privdata)
335 {
336 	struct objcache_malloc_args *margs;
337 	struct objcache *oc;
338 
339 	margs = kmalloc(sizeof(*margs), M_OBJCACHE, M_WAITOK|M_ZERO);
340 	margs->objsize = objsize;
341 	margs->mtype = mtype;
342 	oc = objcache_create(mtype->ks_shortdesc,
343 			     cluster_limit, nom_cache,
344 			     ctor, dtor, privdata,
345 			     objcache_malloc_alloc, objcache_malloc_free,
346 			     margs);
347 	return(oc);
348 }
349 
350 
351 #define MAGAZINE_EMPTY(mag)	(mag->rounds == 0)
352 #define MAGAZINE_NOTEMPTY(mag)	(mag->rounds != 0)
353 #define MAGAZINE_FULL(mag)	(mag->rounds == mag->capacity)
354 
355 #define	swap(x, y)	({ struct magazine *t = x; x = y; y = t; })
356 
357 /*
358  * Get an object from the object cache.
359  *
360  * WARNING!  ocflags are only used when we have to go to the underlying
361  * allocator, so we cannot depend on flags such as M_ZERO.
362  */
363 void *
364 objcache_get(struct objcache *oc, int ocflags)
365 {
366 	struct percpu_objcache *cpucache = &oc->cache_percpu[mycpuid];
367 	struct magazine *loadedmag;
368 	struct magazine *emptymag;
369 	void *obj;
370 	struct magazinedepot *depot;
371 
372 	KKASSERT((ocflags & M_ZERO) == 0);
373 	crit_enter();
374 	++cpucache->gets_cumulative;
375 
376 retry:
377 	/*
378 	 * Loaded magazine has an object.  This is the hot path.
379 	 * It is lock-free and uses a critical section to block
380 	 * out interrupt handlers on the same processor.
381 	 */
382 	loadedmag = cpucache->loaded_magazine;
383 	if (MAGAZINE_NOTEMPTY(loadedmag)) {
384 		obj = loadedmag->objects[--loadedmag->rounds];
385 		crit_exit();
386 		return (obj);
387 	}
388 
389 	/* Previous magazine has an object. */
390 	if (MAGAZINE_NOTEMPTY(cpucache->previous_magazine)) {
391 		swap(cpucache->loaded_magazine, cpucache->previous_magazine);
392 		loadedmag = cpucache->loaded_magazine;
393 		obj = loadedmag->objects[--loadedmag->rounds];
394 		crit_exit();
395 		return (obj);
396 	}
397 
398 	/*
399 	 * Both magazines empty.  Get a full magazine from the depot and
400 	 * move one of the empty ones to the depot.
401 	 *
402 	 * Obtain the depot spinlock.
403 	 *
404 	 * NOTE: Beyond this point, M_* flags are handled via oc->alloc()
405 	 */
406 	depot = &oc->depot[myclusterid];
407 	spin_lock(&depot->spin);
408 
409 	/*
410 	 * Recheck the cpucache after obtaining the depot spinlock.  This
411 	 * shouldn't be necessary now but don't take any chances.
412 	 */
413 	if (MAGAZINE_NOTEMPTY(cpucache->loaded_magazine) ||
414 	    MAGAZINE_NOTEMPTY(cpucache->previous_magazine)
415 	) {
416 		spin_unlock(&depot->spin);
417 		goto retry;
418 	}
419 
420 	/* Check if depot has a full magazine. */
421 	if (!SLIST_EMPTY(&depot->fullmagazines)) {
422 		emptymag = cpucache->previous_magazine;
423 		cpucache->previous_magazine = cpucache->loaded_magazine;
424 		cpucache->loaded_magazine = SLIST_FIRST(&depot->fullmagazines);
425 		SLIST_REMOVE_HEAD(&depot->fullmagazines, nextmagazine);
426 
427 		/*
428 		 * Return emptymag to the depot.
429 		 */
430 		KKASSERT(MAGAZINE_EMPTY(emptymag));
431 		SLIST_INSERT_HEAD(&depot->emptymagazines,
432 				  emptymag, nextmagazine);
433 		spin_unlock(&depot->spin);
434 		goto retry;
435 	}
436 
437 	/*
438 	 * The depot does not have any non-empty magazines.  If we have
439 	 * not hit our object limit we can allocate a new object using
440 	 * the back-end allocator.
441 	 *
442 	 * note: unallocated_objects can be initialized to -1, which has
443 	 * the effect of removing any allocation limits.
444 	 */
445 	if (depot->unallocated_objects) {
446 		--depot->unallocated_objects;
447 		spin_unlock(&depot->spin);
448 		crit_exit();
449 
450 		obj = oc->alloc(oc->allocator_args, ocflags);
451 		if (obj) {
452 			if (oc->ctor(obj, oc->privdata, ocflags))
453 				return (obj);
454 			oc->free(obj, oc->allocator_args);
455 			obj = NULL;
456 		}
457 		if (obj == NULL) {
458 			spin_lock(&depot->spin);
459 			++depot->unallocated_objects;
460 			spin_unlock(&depot->spin);
461 			if (depot->waiting)
462 				wakeup(depot);
463 
464 			crit_enter();
465 			/*
466 			 * makes debugging easier when gets_cumulative does
467 			 * not include gets_null.
468 			 */
469 			++cpucache->gets_null;
470 			--cpucache->gets_cumulative;
471 			crit_exit();
472 		}
473 		return(obj);
474 	}
475 	if (oc->exhausted == 0) {
476 		kprintf("Warning, objcache(%s): Exhausted!\n", oc->name);
477 		oc->exhausted = 1;
478 	}
479 
480 	/*
481 	 * Otherwise block if allowed to.
482 	 */
483 	if ((ocflags & (M_WAITOK|M_NULLOK)) == M_WAITOK) {
484 		++cpucache->waiting;
485 		++depot->waiting;
486 		ssleep(depot, &depot->spin, 0, "objcache_get", 0);
487 		--cpucache->waiting;
488 		--depot->waiting;
489 		spin_unlock(&depot->spin);
490 		goto retry;
491 	}
492 
493 	/*
494 	 * Otherwise fail
495 	 */
496 	++cpucache->gets_null;
497 	--cpucache->gets_cumulative;
498 	crit_exit();
499 	spin_unlock(&depot->spin);
500 	return (NULL);
501 }
502 
503 /*
504  * Wrapper for malloc allocation routines.
505  */
506 void *
507 objcache_malloc_alloc(void *allocator_args, int ocflags)
508 {
509 	struct objcache_malloc_args *alloc_args = allocator_args;
510 
511 	return (kmalloc(alloc_args->objsize, alloc_args->mtype,
512 		       ocflags & OC_MFLAGS));
513 }
514 
515 void
516 objcache_malloc_free(void *obj, void *allocator_args)
517 {
518 	struct objcache_malloc_args *alloc_args = allocator_args;
519 
520 	kfree(obj, alloc_args->mtype);
521 }
522 
523 /*
524  * Wrapper for allocation policies that pre-allocate at initialization time
525  * and don't do run-time allocation.
526  */
527 void *
528 objcache_nop_alloc(void *allocator_args, int ocflags)
529 {
530 	return (NULL);
531 }
532 
533 void
534 objcache_nop_free(void *obj, void *allocator_args)
535 {
536 }
537 
538 /*
539  * Return an object to the object cache.
540  */
541 void
542 objcache_put(struct objcache *oc, void *obj)
543 {
544 	struct percpu_objcache *cpucache = &oc->cache_percpu[mycpuid];
545 	struct magazine *loadedmag;
546 	struct magazinedepot *depot;
547 
548 	crit_enter();
549 	++cpucache->puts_cumulative;
550 
551 	if (CLUSTER_OF(obj) != myclusterid) {
552 #ifdef notyet
553 		/* use lazy IPI to send object to owning cluster XXX todo */
554 		++cpucache->puts_othercluster;
555 		crit_exit();
556 		return;
557 #endif
558 	}
559 
560 retry:
561 	/*
562 	 * Free slot available in loaded magazine.  This is the hot path.
563 	 * It is lock-free and uses a critical section to block out interrupt
564 	 * handlers on the same processor.
565 	 */
566 	loadedmag = cpucache->loaded_magazine;
567 	if (!MAGAZINE_FULL(loadedmag)) {
568 		loadedmag->objects[loadedmag->rounds++] = obj;
569 		if (cpucache->waiting)
570 			wakeup_mycpu(&oc->depot[myclusterid]);
571 		crit_exit();
572 		return;
573 	}
574 
575 	/*
576 	 * Current magazine full, but previous magazine has room.  XXX
577 	 */
578 	if (!MAGAZINE_FULL(cpucache->previous_magazine)) {
579 		swap(cpucache->loaded_magazine, cpucache->previous_magazine);
580 		loadedmag = cpucache->loaded_magazine;
581 		loadedmag->objects[loadedmag->rounds++] = obj;
582 		if (cpucache->waiting)
583 			wakeup_mycpu(&oc->depot[myclusterid]);
584 		crit_exit();
585 		return;
586 	}
587 
588 	/*
589 	 * Both magazines full.  Get an empty magazine from the depot and
590 	 * move a full loaded magazine to the depot.  Even though the
591 	 * magazine may wind up with space available after we block on
592 	 * the spinlock, we still cycle it through to avoid the non-optimal
593 	 * corner-case.
594 	 *
595 	 * Obtain the depot spinlock.
596 	 */
597 	depot = &oc->depot[myclusterid];
598 	spin_lock(&depot->spin);
599 
600 	/*
601 	 * If an empty magazine is available in the depot, cycle it
602 	 * through and retry.
603 	 */
604 	if (!SLIST_EMPTY(&depot->emptymagazines)) {
605 		loadedmag = cpucache->previous_magazine;
606 		cpucache->previous_magazine = cpucache->loaded_magazine;
607 		cpucache->loaded_magazine = SLIST_FIRST(&depot->emptymagazines);
608 		SLIST_REMOVE_HEAD(&depot->emptymagazines, nextmagazine);
609 
610 		/*
611 		 * Return loadedmag to the depot.  Due to blocking it may
612 		 * not be entirely full and could even be empty.
613 		 */
614 		if (MAGAZINE_EMPTY(loadedmag)) {
615 			SLIST_INSERT_HEAD(&depot->emptymagazines,
616 					  loadedmag, nextmagazine);
617 			spin_unlock(&depot->spin);
618 		} else {
619 			SLIST_INSERT_HEAD(&depot->fullmagazines,
620 					  loadedmag, nextmagazine);
621 			spin_unlock(&depot->spin);
622 			if (depot->waiting)
623 				wakeup(depot);
624 		}
625 		goto retry;
626 	}
627 
628 	/*
629 	 * An empty mag is not available.  This is a corner case which can
630 	 * occur due to cpus holding partially full magazines.  Do not try
631 	 * to allocate a mag, just free the object.
632 	 */
633 	++depot->unallocated_objects;
634 	spin_unlock(&depot->spin);
635 	if (depot->waiting)
636 		wakeup(depot);
637 	crit_exit();
638 	oc->dtor(obj, oc->privdata);
639 	oc->free(obj, oc->allocator_args);
640 }
641 
642 /*
643  * The object is being put back into the cache, but the caller has
644  * indicated that the object is not in any shape to be reused and should
645  * be dtor'd immediately.
646  */
647 void
648 objcache_dtor(struct objcache *oc, void *obj)
649 {
650 	struct magazinedepot *depot;
651 
652 	depot = &oc->depot[myclusterid];
653 	spin_lock(&depot->spin);
654 	++depot->unallocated_objects;
655 	spin_unlock(&depot->spin);
656 	if (depot->waiting)
657 		wakeup(depot);
658 	oc->dtor(obj, oc->privdata);
659 	oc->free(obj, oc->allocator_args);
660 }
661 
662 /*
663  * Deallocate all objects in a magazine and free the magazine if requested.
664  * When freeit is TRUE the magazine must already be disassociated from the
665  * depot.
666  *
667  * Must be called with a critical section held when called with a per-cpu
668  * magazine.  The magazine may be indirectly modified during the loop.
669  *
670  * If the magazine moves during a dtor the operation is aborted.  This is
671  * only allowed when freeit is FALSE.
672  *
673  * The number of objects freed is returned.
674  */
675 static int
676 mag_purge(struct objcache *oc, struct magazine **magp, int freeit)
677 {
678 	struct magazine *mag = *magp;
679 	int count;
680 	void *obj;
681 
682 	count = 0;
683 	while (mag->rounds) {
684 		obj = mag->objects[--mag->rounds];
685 		oc->dtor(obj, oc->privdata);		/* MAY BLOCK */
686 		oc->free(obj, oc->allocator_args);	/* MAY BLOCK */
687 		++count;
688 
689 		/*
690 		 * Cycle for interrupts.
691 		 */
692 		if ((count & 15) == 0) {
693 			crit_exit();
694 			crit_enter();
695 		}
696 
697 		/*
698 		 * mag may have become invalid either due to dtor/free
699 		 * blocking or interrupt cycling, do not derefernce it
700 		 * until we check.
701 		 */
702 		if (*magp != mag) {
703 			kprintf("mag_purge: mag ripped out\n");
704 			break;
705 		}
706 	}
707 	if (freeit) {
708 		KKASSERT(*magp == mag);
709 		*magp = NULL;
710 		kfree(mag, M_OBJMAG);
711 	}
712 	return(count);
713 }
714 
715 /*
716  * Disassociate zero or more magazines from a magazine list associated with
717  * the depot, update the depot, and move the magazines to a temporary
718  * list.
719  *
720  * The caller must check the depot for waiters and wake it up, typically
721  * after disposing of the magazines this function loads onto the temporary
722  * list.
723  */
724 static void
725 maglist_disassociate(struct magazinedepot *depot, struct magazinelist *maglist,
726 		     struct magazinelist *tmplist, boolean_t purgeall)
727 {
728 	struct magazine *mag;
729 
730 	while ((mag = SLIST_FIRST(maglist)) != NULL) {
731 		SLIST_REMOVE_HEAD(maglist, nextmagazine);
732 		SLIST_INSERT_HEAD(tmplist, mag, nextmagazine);
733 		depot->unallocated_objects += mag->rounds;
734 	}
735 }
736 
737 /*
738  * Deallocate all magazines and their contents from the passed temporary
739  * list.  The magazines have already been accounted for by their depots.
740  *
741  * The total number of rounds freed is returned.  This number is typically
742  * only used to determine whether a wakeup on the depot is needed or not.
743  */
744 static int
745 maglist_purge(struct objcache *oc, struct magazinelist *maglist)
746 {
747 	struct magazine *mag;
748 	int count = 0;
749 
750 	/*
751 	 * can't use SLIST_FOREACH because blocking releases the depot
752 	 * spinlock
753 	 */
754 	crit_enter();
755 	while ((mag = SLIST_FIRST(maglist)) != NULL) {
756 		SLIST_REMOVE_HEAD(maglist, nextmagazine);
757 		count += mag_purge(oc, &mag, TRUE);
758 	}
759 	crit_exit();
760 	return(count);
761 }
762 
763 /*
764  * De-allocates all magazines on the full and empty magazine lists.
765  *
766  * Because this routine is called with a spinlock held, the magazines
767  * can only be disassociated and moved to a temporary list, not freed.
768  *
769  * The caller is responsible for freeing the magazines.
770  */
771 static void
772 depot_disassociate(struct magazinedepot *depot, struct magazinelist *tmplist)
773 {
774 	maglist_disassociate(depot, &depot->fullmagazines, tmplist, TRUE);
775 	maglist_disassociate(depot, &depot->emptymagazines, tmplist, TRUE);
776 }
777 
778 #ifdef notneeded
779 void
780 objcache_reclaim(struct objcache *oc)
781 {
782 	struct percpu_objcache *cache_percpu = &oc->cache_percpu[myclusterid];
783 	struct magazinedepot *depot = &oc->depot[myclusterid];
784 	struct magazinelist tmplist;
785 	int count;
786 
787 	SLIST_INIT(&tmplist);
788 	crit_enter();
789 	count = mag_purge(oc, &cache_percpu->loaded_magazine, FALSE);
790 	count += mag_purge(oc, &cache_percpu->previous_magazine, FALSE);
791 	crit_exit();
792 
793 	spin_lock(&depot->spin);
794 	depot->unallocated_objects += count;
795 	depot_disassociate(depot, &tmplist);
796 	spin_unlock(&depot->spin);
797 	count += maglist_purge(oc, &tmplist);
798 	if (count && depot->waiting)
799 		wakeup(depot);
800 }
801 #endif
802 
803 /*
804  * Try to free up some memory.  Return as soon as some free memory is found.
805  * For each object cache on the reclaim list, first try the current per-cpu
806  * cache, then the full magazine depot.
807  */
808 boolean_t
809 objcache_reclaimlist(struct objcache *oclist[], int nlist, int ocflags)
810 {
811 	struct objcache *oc;
812 	struct percpu_objcache *cpucache;
813 	struct magazinedepot *depot;
814 	struct magazinelist tmplist;
815 	int i, count;
816 
817 	kprintf("objcache_reclaimlist\n");
818 
819 	SLIST_INIT(&tmplist);
820 
821 	for (i = 0; i < nlist; i++) {
822 		oc = oclist[i];
823 		cpucache = &oc->cache_percpu[mycpuid];
824 		depot = &oc->depot[myclusterid];
825 
826 		crit_enter();
827 		count = mag_purge(oc, &cpucache->loaded_magazine, FALSE);
828 		if (count == 0)
829 			count += mag_purge(oc, &cpucache->previous_magazine, FALSE);
830 		crit_exit();
831 		if (count > 0) {
832 			spin_lock(&depot->spin);
833 			depot->unallocated_objects += count;
834 			spin_unlock(&depot->spin);
835 			if (depot->waiting)
836 				wakeup(depot);
837 			return (TRUE);
838 		}
839 		spin_lock(&depot->spin);
840 		maglist_disassociate(depot, &depot->fullmagazines,
841 				     &tmplist, FALSE);
842 		spin_unlock(&depot->spin);
843 		count = maglist_purge(oc, &tmplist);
844 		if (count > 0) {
845 			if (depot->waiting)
846 				wakeup(depot);
847 			return (TRUE);
848 		}
849 	}
850 	return (FALSE);
851 }
852 
853 /*
854  * Destroy an object cache.  Must have no existing references.
855  */
856 void
857 objcache_destroy(struct objcache *oc)
858 {
859 	struct percpu_objcache *cache_percpu;
860 	struct magazinedepot *depot;
861 	int clusterid, cpuid;
862 	struct magazinelist tmplist;
863 
864 	spin_lock(&objcachelist_spin);
865 	LIST_REMOVE(oc, oc_next);
866 	spin_unlock(&objcachelist_spin);
867 
868 	SLIST_INIT(&tmplist);
869 	for (clusterid = 0; clusterid < MAXCLUSTERS; clusterid++) {
870 		depot = &oc->depot[clusterid];
871 		spin_lock(&depot->spin);
872 		depot_disassociate(depot, &tmplist);
873 		spin_unlock(&depot->spin);
874 	}
875 	maglist_purge(oc, &tmplist);
876 
877 	for (cpuid = 0; cpuid < ncpus; cpuid++) {
878 		cache_percpu = &oc->cache_percpu[cpuid];
879 
880 		crit_enter();
881 		mag_purge(oc, &cache_percpu->loaded_magazine, TRUE);
882 		mag_purge(oc, &cache_percpu->previous_magazine, TRUE);
883 		crit_exit();
884 		cache_percpu->loaded_magazine = NULL;
885 		cache_percpu->previous_magazine = NULL;
886 		/* don't bother adjusting depot->unallocated_objects */
887 	}
888 
889 	kfree(oc->name, M_TEMP);
890 	kfree(oc, M_OBJCACHE);
891 }
892 
893 #if 0
894 /*
895  * Populate the per-cluster depot with elements from a linear block
896  * of memory.  Must be called for individually for each cluster.
897  * Populated depots should not be destroyed.
898  */
899 void
900 objcache_populate_linear(struct objcache *oc, void *base, int nelts, int size)
901 {
902 	char *p = base;
903 	char *end = (char *)base + (nelts * size);
904 	struct magazinedepot *depot = &oc->depot[myclusterid];
905 	struct magazine *emptymag = mag_alloc(depot->magcapcity);
906 
907 	while (p < end) {
908 		emptymag->objects[emptymag->rounds++] = p;
909 		if (MAGAZINE_FULL(emptymag)) {
910 			spin_lock_wr(&depot->spin);
911 			SLIST_INSERT_HEAD(&depot->fullmagazines, emptymag,
912 					  nextmagazine);
913 			depot->unallocated_objects += emptymag->rounds;
914 			spin_unlock_wr(&depot->spin);
915 			if (depot->waiting)
916 				wakeup(depot);
917 			emptymag = mag_alloc(depot->magcapacity);
918 		}
919 		p += size;
920 	}
921 	if (MAGAZINE_EMPTY(emptymag)) {
922 		crit_enter();
923 		mag_purge(oc, &emptymag, TRUE);
924 		crit_exit();
925 	} else {
926 		spin_lock_wr(&depot->spin);
927 		SLIST_INSERT_HEAD(&depot->fullmagazines, emptymag,
928 				  nextmagazine);
929 		depot->unallocated_objects += emptymag->rounds;
930 		spin_unlock_wr(&depot->spin);
931 		if (depot->waiting)
932 			wakeup(depot);
933 		emptymag = mag_alloc(depot->magcapacity);
934 	}
935 }
936 #endif
937 
938 #if 0
939 /*
940  * Check depot contention once a minute.
941  * 2 contested locks per second allowed.
942  */
943 static int objcache_rebalance_period;
944 static const int objcache_contention_rate = 120;
945 static struct callout objcache_callout;
946 
947 #define MAXMAGSIZE 512
948 
949 /*
950  * Check depot contention and increase magazine size if necessary.
951  */
952 static void
953 objcache_timer(void *dummy)
954 {
955 	struct objcache *oc;
956 	struct magazinedepot *depot;
957 	struct magazinelist tmplist;
958 
959 	XXX we need to detect when an objcache is destroyed out from under
960 	    us XXX
961 
962 	SLIST_INIT(&tmplist);
963 
964 	spin_lock_wr(&objcachelist_spin);
965 	LIST_FOREACH(oc, &allobjcaches, oc_next) {
966 		depot = &oc->depot[myclusterid];
967 		if (depot->magcapacity < MAXMAGSIZE) {
968 			if (depot->contested > objcache_contention_rate) {
969 				spin_lock_wr(&depot->spin);
970 				depot_disassociate(depot, &tmplist);
971 				depot->magcapacity *= 2;
972 				spin_unlock_wr(&depot->spin);
973 				kprintf("objcache_timer: increasing cache %s"
974 				       " magsize to %d, contested %d times\n",
975 				    oc->name, depot->magcapacity,
976 				    depot->contested);
977 			}
978 			depot->contested = 0;
979 		}
980 		spin_unlock_wr(&objcachelist_spin);
981 		if (maglist_purge(oc, &tmplist) > 0 && depot->waiting)
982 			wakeup(depot);
983 		spin_lock_wr(&objcachelist_spin);
984 	}
985 	spin_unlock_wr(&objcachelist_spin);
986 
987 	callout_reset(&objcache_callout, objcache_rebalance_period,
988 		      objcache_timer, NULL);
989 }
990 
991 #endif
992 
993 static void
994 objcache_init(void)
995 {
996 	spin_init(&objcachelist_spin);
997 
998 	magazine_capmin = mag_capacity_align(MAGAZINE_CAPACITY_MIN);
999 	magazine_capmax = mag_capacity_align(MAGAZINE_CAPACITY_MAX);
1000 	if (bootverbose) {
1001 		kprintf("objcache: magazine cap [%d, %d]\n",
1002 		    magazine_capmin, magazine_capmax);
1003 	}
1004 
1005 #if 0
1006 	callout_init_mp(&objcache_callout);
1007 	objcache_rebalance_period = 60 * hz;
1008 	callout_reset(&objcache_callout, objcache_rebalance_period,
1009 		      objcache_timer, NULL);
1010 #endif
1011 }
1012 SYSINIT(objcache, SI_BOOT2_OBJCACHE, SI_ORDER_FIRST, objcache_init, 0);
1013