xref: /dragonfly/sys/kern/kern_objcache.c (revision e0b1d537)
1 /*
2  * Copyright (c) 2005 Jeffrey M. Hsu.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Jeffrey M. Hsu.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. Neither the name of The DragonFly Project nor the names of its
16  *    contributors may be used to endorse or promote products derived
17  *    from this software without specific, prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
22  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
23  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
24  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
25  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
26  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
27  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
29  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  *
32  * $DragonFly: src/sys/kern/kern_objcache.c,v 1.23 2008/10/26 04:29:19 sephe Exp $
33  */
34 
35 #include <sys/param.h>
36 #include <sys/kernel.h>
37 #include <sys/systm.h>
38 #include <sys/callout.h>
39 #include <sys/globaldata.h>
40 #include <sys/malloc.h>
41 #include <sys/queue.h>
42 #include <sys/objcache.h>
43 #include <sys/spinlock.h>
44 #include <sys/thread.h>
45 #include <sys/thread2.h>
46 #include <sys/spinlock2.h>
47 
48 static MALLOC_DEFINE(M_OBJCACHE, "objcache", "Object Cache");
49 static MALLOC_DEFINE(M_OBJMAG, "objcache magazine", "Object Cache Magazine");
50 
51 #define	INITIAL_MAG_CAPACITY	64
52 
53 struct magazine {
54 	int			 rounds;
55 	int			 capacity;
56 	SLIST_ENTRY(magazine)	 nextmagazine;
57 	void			*objects[];
58 };
59 
60 SLIST_HEAD(magazinelist, magazine);
61 
62 #define MAGAZINE_HDRSIZE	__offsetof(struct magazine, objects[0])
63 #define MAGAZINE_CAPACITY_MAX	128
64 #define MAGAZINE_CAPACITY_MIN	4
65 
66 /*
67  * per-cluster cache of magazines
68  *
69  * All fields in this structure are protected by the spinlock.
70  */
71 struct magazinedepot {
72 	/*
73 	 * The per-cpu object caches only exchanges completely full or
74 	 * completely empty magazines with the depot layer, so only have
75 	 * to cache these two types of magazines.
76 	 */
77 	struct magazinelist	fullmagazines;
78 	struct magazinelist	emptymagazines;
79 	int			magcapacity;
80 
81 	/* protect this structure */
82 	struct spinlock		spin;
83 
84 	/* magazines not yet allocated towards limit */
85 	int			unallocated_objects;
86 
87 	/* infrequently used fields */
88 	int			waiting;	/* waiting for another cpu to
89 						 * return a full magazine to
90 						 * the depot */
91 	int			contested;	/* depot contention count */
92 } __cachealign;
93 
94 /*
95  * per-cpu object cache
96  * All fields in this structure are protected by crit_enter().
97  */
98 struct percpu_objcache {
99 	struct magazine	*loaded_magazine;	/* active magazine */
100 	struct magazine	*previous_magazine;	/* backup magazine */
101 
102 	/* statistics */
103 	int		gets_cumulative;	/* total calls to get */
104 	int		gets_null;		/* objcache_get returned NULL */
105 	int		puts_cumulative;	/* total calls to put */
106 	int		puts_othercluster;	/* returned to other cluster */
107 
108 	/* infrequently used fields */
109 	int		waiting;	/* waiting for a thread on this cpu to
110 					 * return an obj to the per-cpu cache */
111 } __cachealign;
112 
113 /* only until we have NUMA cluster topology information XXX */
114 #define MAXCLUSTERS 1
115 #define myclusterid 0
116 #define CLUSTER_OF(obj) 0
117 
118 /*
119  * Two-level object cache consisting of NUMA cluster-level depots of
120  * fully loaded or completely empty magazines and cpu-level caches of
121  * individual objects.
122  */
123 struct objcache {
124 	char			*name;
125 
126 	/* object constructor and destructor from blank storage */
127 	objcache_ctor_fn	*ctor;
128 	objcache_dtor_fn	*dtor;
129 	void			*privdata;
130 
131 	/* interface to underlying allocator */
132 	objcache_alloc_fn	*alloc;
133 	objcache_free_fn	*free;
134 	void			*allocator_args;
135 
136 	LIST_ENTRY(objcache)	oc_next;
137 	int			exhausted;	/* oops */
138 
139 	/* NUMA-cluster level caches */
140 	struct magazinedepot	depot[MAXCLUSTERS];
141 
142 	struct percpu_objcache	cache_percpu[];		/* per-cpu caches */
143 };
144 
145 static struct spinlock objcachelist_spin;
146 static LIST_HEAD(objcachelist, objcache) allobjcaches;
147 static int magazine_capmin;
148 static int magazine_capmax;
149 
150 static struct magazine *
151 mag_alloc(int capacity)
152 {
153 	struct magazine *mag;
154 	int size;
155 
156 	size = __offsetof(struct magazine, objects[capacity]);
157 	KASSERT(size > 0 && (size & __VM_CACHELINE_MASK) == 0,
158 	    ("magazine size is not multiple cache line size"));
159 
160 	mag = kmalloc_cachealign(size, M_OBJMAG, M_INTWAIT | M_ZERO);
161 	mag->capacity = capacity;
162 	mag->rounds = 0;
163 	return (mag);
164 }
165 
166 static int
167 mag_capacity_align(int mag_capacity)
168 {
169 	int mag_size;
170 
171 	mag_size = __VM_CACHELINE_ALIGN(
172 	    __offsetof(struct magazine, objects[mag_capacity]));
173 	mag_capacity = (mag_size - MAGAZINE_HDRSIZE) / sizeof(void *);
174 
175 	return mag_capacity;
176 }
177 
178 /*
179  * Utility routine for objects that don't require any de-construction.
180  */
181 
182 static void
183 null_dtor(void *obj, void *privdata)
184 {
185 	/* do nothing */
186 }
187 
188 static boolean_t
189 null_ctor(void *obj, void *privdata, int ocflags)
190 {
191 	return TRUE;
192 }
193 
194 /*
195  * Create an object cache.
196  */
197 struct objcache *
198 objcache_create(const char *name, int cluster_limit, int nom_cache,
199 		objcache_ctor_fn *ctor, objcache_dtor_fn *dtor, void *privdata,
200 		objcache_alloc_fn *alloc, objcache_free_fn *free,
201 		void *allocator_args)
202 {
203 	struct objcache *oc;
204 	struct magazinedepot *depot;
205 	int cpuid;
206 	int nmagdepot;
207 	int mag_capacity;
208 	int i;
209 
210 	/*
211 	 * Allocate object cache structure
212 	 */
213 	oc = kmalloc_cachealign(
214 	    __offsetof(struct objcache, cache_percpu[ncpus]),
215 	    M_OBJCACHE, M_WAITOK | M_ZERO);
216 	oc->name = kstrdup(name, M_TEMP);
217 	oc->ctor = ctor ? ctor : null_ctor;
218 	oc->dtor = dtor ? dtor : null_dtor;
219 	oc->privdata = privdata;
220 	oc->alloc = alloc;
221 	oc->free = free;
222 	oc->allocator_args = allocator_args;
223 
224 	/*
225 	 * Initialize depot list(s).
226 	 */
227 	depot = &oc->depot[0];
228 
229 	spin_init(&depot->spin);
230 	SLIST_INIT(&depot->fullmagazines);
231 	SLIST_INIT(&depot->emptymagazines);
232 
233 	/*
234 	 * Figure out the nominal number of free objects to cache and
235 	 * the magazine capacity.  By default we want to cache up to
236 	 * half the cluster_limit.  If there is no cluster_limit then
237 	 * we want to cache up to 128 objects.
238 	 */
239 	if (nom_cache == 0)
240 		nom_cache = cluster_limit / 2;
241 	if (cluster_limit && nom_cache > cluster_limit)
242 		nom_cache = cluster_limit;
243 	if (nom_cache == 0)
244 		nom_cache = INITIAL_MAG_CAPACITY * 2;
245 
246 	/*
247 	 * Magazine capacity for 2 active magazines per cpu plus 2
248 	 * magazines in the depot.
249 	 */
250 	mag_capacity = mag_capacity_align(nom_cache / (ncpus + 1) / 2 + 1);
251 	if (mag_capacity > magazine_capmax)
252 		mag_capacity = magazine_capmax;
253 	else if (mag_capacity < magazine_capmin)
254 		mag_capacity = magazine_capmin;
255 	depot->magcapacity = mag_capacity;
256 
257 	/*
258 	 * The cluster_limit must be sufficient to have two magazines per
259 	 * cpu plus at least two magazines in the depot.  However, because
260 	 * partial magazines can stay on the cpus what we really need here
261 	 * is to specify the number of extra magazines we allocate for the
262 	 * depot.
263 	 */
264 	if (cluster_limit == 0) {
265 		depot->unallocated_objects = -1;
266 	} else {
267 		depot->unallocated_objects = ncpus * mag_capacity * 2 +
268 					     cluster_limit;
269 	}
270 
271 	/*
272 	 * Initialize per-cpu caches
273 	 */
274 	for (cpuid = 0; cpuid < ncpus; cpuid++) {
275 		struct percpu_objcache *cache_percpu = &oc->cache_percpu[cpuid];
276 
277 		cache_percpu->loaded_magazine = mag_alloc(mag_capacity);
278 		cache_percpu->previous_magazine = mag_alloc(mag_capacity);
279 	}
280 
281 	/*
282 	 * Compute how many empty magazines to place in the depot.  This
283 	 * determines the retained cache size and is based on nom_cache.
284 	 *
285 	 * The actual cache size is larger because there are two magazines
286 	 * for each cpu as well but those can be in any fill state so we
287 	 * just can't count them.
288 	 *
289 	 * There is a minimum of two magazines in the depot.
290 	 */
291 	nmagdepot = nom_cache / mag_capacity + 1;
292 	if (nmagdepot < 2)
293 		nmagdepot = 2;
294 
295 	/*
296 	 * Put empty magazines in depot
297 	 */
298 	for (i = 0; i < nmagdepot; i++) {
299 		struct magazine *mag = mag_alloc(mag_capacity);
300 		SLIST_INSERT_HEAD(&depot->emptymagazines, mag, nextmagazine);
301 	}
302 
303 	spin_lock(&objcachelist_spin);
304 	LIST_INSERT_HEAD(&allobjcaches, oc, oc_next);
305 	spin_unlock(&objcachelist_spin);
306 
307 	return (oc);
308 }
309 
310 struct objcache *
311 objcache_create_simple(malloc_type_t mtype, size_t objsize)
312 {
313 	struct objcache_malloc_args *margs;
314 	struct objcache *oc;
315 
316 	margs = kmalloc(sizeof(*margs), M_OBJCACHE, M_WAITOK|M_ZERO);
317 	margs->objsize = objsize;
318 	margs->mtype = mtype;
319 	oc = objcache_create(mtype->ks_shortdesc, 0, 0,
320 			     NULL, NULL, NULL,
321 			     objcache_malloc_alloc, objcache_malloc_free,
322 			     margs);
323 	return (oc);
324 }
325 
326 struct objcache *
327 objcache_create_mbacked(malloc_type_t mtype, size_t objsize,
328 			int cluster_limit, int nom_cache,
329 			objcache_ctor_fn *ctor, objcache_dtor_fn *dtor,
330 			void *privdata)
331 {
332 	struct objcache_malloc_args *margs;
333 	struct objcache *oc;
334 
335 	margs = kmalloc(sizeof(*margs), M_OBJCACHE, M_WAITOK|M_ZERO);
336 	margs->objsize = objsize;
337 	margs->mtype = mtype;
338 	oc = objcache_create(mtype->ks_shortdesc,
339 			     cluster_limit, nom_cache,
340 			     ctor, dtor, privdata,
341 			     objcache_malloc_alloc, objcache_malloc_free,
342 			     margs);
343 	return(oc);
344 }
345 
346 
347 #define MAGAZINE_EMPTY(mag)	(mag->rounds == 0)
348 #define MAGAZINE_NOTEMPTY(mag)	(mag->rounds != 0)
349 #define MAGAZINE_FULL(mag)	(mag->rounds == mag->capacity)
350 
351 #define	swap(x, y)	({ struct magazine *t = x; x = y; y = t; })
352 
353 /*
354  * Get an object from the object cache.
355  *
356  * WARNING!  ocflags are only used when we have to go to the underlying
357  * allocator, so we cannot depend on flags such as M_ZERO.
358  */
359 void *
360 objcache_get(struct objcache *oc, int ocflags)
361 {
362 	struct percpu_objcache *cpucache = &oc->cache_percpu[mycpuid];
363 	struct magazine *loadedmag;
364 	struct magazine *emptymag;
365 	void *obj;
366 	struct magazinedepot *depot;
367 
368 	KKASSERT((ocflags & M_ZERO) == 0);
369 	crit_enter();
370 	++cpucache->gets_cumulative;
371 
372 retry:
373 	/*
374 	 * Loaded magazine has an object.  This is the hot path.
375 	 * It is lock-free and uses a critical section to block
376 	 * out interrupt handlers on the same processor.
377 	 */
378 	loadedmag = cpucache->loaded_magazine;
379 	if (MAGAZINE_NOTEMPTY(loadedmag)) {
380 		obj = loadedmag->objects[--loadedmag->rounds];
381 		crit_exit();
382 		return (obj);
383 	}
384 
385 	/* Previous magazine has an object. */
386 	if (MAGAZINE_NOTEMPTY(cpucache->previous_magazine)) {
387 		swap(cpucache->loaded_magazine, cpucache->previous_magazine);
388 		loadedmag = cpucache->loaded_magazine;
389 		obj = loadedmag->objects[--loadedmag->rounds];
390 		crit_exit();
391 		return (obj);
392 	}
393 
394 	/*
395 	 * Both magazines empty.  Get a full magazine from the depot and
396 	 * move one of the empty ones to the depot.
397 	 *
398 	 * Obtain the depot spinlock.
399 	 *
400 	 * NOTE: Beyond this point, M_* flags are handled via oc->alloc()
401 	 */
402 	depot = &oc->depot[myclusterid];
403 	spin_lock(&depot->spin);
404 
405 	/*
406 	 * Recheck the cpucache after obtaining the depot spinlock.  This
407 	 * shouldn't be necessary now but don't take any chances.
408 	 */
409 	if (MAGAZINE_NOTEMPTY(cpucache->loaded_magazine) ||
410 	    MAGAZINE_NOTEMPTY(cpucache->previous_magazine)
411 	) {
412 		spin_unlock(&depot->spin);
413 		goto retry;
414 	}
415 
416 	/* Check if depot has a full magazine. */
417 	if (!SLIST_EMPTY(&depot->fullmagazines)) {
418 		emptymag = cpucache->previous_magazine;
419 		cpucache->previous_magazine = cpucache->loaded_magazine;
420 		cpucache->loaded_magazine = SLIST_FIRST(&depot->fullmagazines);
421 		SLIST_REMOVE_HEAD(&depot->fullmagazines, nextmagazine);
422 
423 		/*
424 		 * Return emptymag to the depot.
425 		 */
426 		KKASSERT(MAGAZINE_EMPTY(emptymag));
427 		SLIST_INSERT_HEAD(&depot->emptymagazines,
428 				  emptymag, nextmagazine);
429 		spin_unlock(&depot->spin);
430 		goto retry;
431 	}
432 
433 	/*
434 	 * The depot does not have any non-empty magazines.  If we have
435 	 * not hit our object limit we can allocate a new object using
436 	 * the back-end allocator.
437 	 *
438 	 * note: unallocated_objects can be initialized to -1, which has
439 	 * the effect of removing any allocation limits.
440 	 */
441 	if (depot->unallocated_objects) {
442 		--depot->unallocated_objects;
443 		spin_unlock(&depot->spin);
444 		crit_exit();
445 
446 		obj = oc->alloc(oc->allocator_args, ocflags);
447 		if (obj) {
448 			if (oc->ctor(obj, oc->privdata, ocflags))
449 				return (obj);
450 			oc->free(obj, oc->allocator_args);
451 			obj = NULL;
452 		}
453 		if (obj == NULL) {
454 			spin_lock(&depot->spin);
455 			++depot->unallocated_objects;
456 			spin_unlock(&depot->spin);
457 			if (depot->waiting)
458 				wakeup(depot);
459 
460 			crit_enter();
461 			/*
462 			 * makes debugging easier when gets_cumulative does
463 			 * not include gets_null.
464 			 */
465 			++cpucache->gets_null;
466 			--cpucache->gets_cumulative;
467 			crit_exit();
468 		}
469 		return(obj);
470 	}
471 	if (oc->exhausted == 0) {
472 		kprintf("Warning, objcache(%s): Exhausted!\n", oc->name);
473 		oc->exhausted = 1;
474 	}
475 
476 	/*
477 	 * Otherwise block if allowed to.
478 	 */
479 	if ((ocflags & (M_WAITOK|M_NULLOK)) == M_WAITOK) {
480 		++cpucache->waiting;
481 		++depot->waiting;
482 		ssleep(depot, &depot->spin, 0, "objcache_get", 0);
483 		--cpucache->waiting;
484 		--depot->waiting;
485 		spin_unlock(&depot->spin);
486 		goto retry;
487 	}
488 
489 	/*
490 	 * Otherwise fail
491 	 */
492 	++cpucache->gets_null;
493 	--cpucache->gets_cumulative;
494 	crit_exit();
495 	spin_unlock(&depot->spin);
496 	return (NULL);
497 }
498 
499 /*
500  * Wrapper for malloc allocation routines.
501  */
502 void *
503 objcache_malloc_alloc(void *allocator_args, int ocflags)
504 {
505 	struct objcache_malloc_args *alloc_args = allocator_args;
506 
507 	return (kmalloc(alloc_args->objsize, alloc_args->mtype,
508 		       ocflags & OC_MFLAGS));
509 }
510 
511 void
512 objcache_malloc_free(void *obj, void *allocator_args)
513 {
514 	struct objcache_malloc_args *alloc_args = allocator_args;
515 
516 	kfree(obj, alloc_args->mtype);
517 }
518 
519 /*
520  * Wrapper for allocation policies that pre-allocate at initialization time
521  * and don't do run-time allocation.
522  */
523 void *
524 objcache_nop_alloc(void *allocator_args, int ocflags)
525 {
526 	return (NULL);
527 }
528 
529 void
530 objcache_nop_free(void *obj, void *allocator_args)
531 {
532 }
533 
534 /*
535  * Return an object to the object cache.
536  */
537 void
538 objcache_put(struct objcache *oc, void *obj)
539 {
540 	struct percpu_objcache *cpucache = &oc->cache_percpu[mycpuid];
541 	struct magazine *loadedmag;
542 	struct magazinedepot *depot;
543 
544 	crit_enter();
545 	++cpucache->puts_cumulative;
546 
547 	if (CLUSTER_OF(obj) != myclusterid) {
548 #ifdef notyet
549 		/* use lazy IPI to send object to owning cluster XXX todo */
550 		++cpucache->puts_othercluster;
551 		crit_exit();
552 		return;
553 #endif
554 	}
555 
556 retry:
557 	/*
558 	 * Free slot available in loaded magazine.  This is the hot path.
559 	 * It is lock-free and uses a critical section to block out interrupt
560 	 * handlers on the same processor.
561 	 */
562 	loadedmag = cpucache->loaded_magazine;
563 	if (!MAGAZINE_FULL(loadedmag)) {
564 		loadedmag->objects[loadedmag->rounds++] = obj;
565 		if (cpucache->waiting)
566 			wakeup_mycpu(&oc->depot[myclusterid]);
567 		crit_exit();
568 		return;
569 	}
570 
571 	/*
572 	 * Current magazine full, but previous magazine has room.  XXX
573 	 */
574 	if (!MAGAZINE_FULL(cpucache->previous_magazine)) {
575 		swap(cpucache->loaded_magazine, cpucache->previous_magazine);
576 		loadedmag = cpucache->loaded_magazine;
577 		loadedmag->objects[loadedmag->rounds++] = obj;
578 		if (cpucache->waiting)
579 			wakeup_mycpu(&oc->depot[myclusterid]);
580 		crit_exit();
581 		return;
582 	}
583 
584 	/*
585 	 * Both magazines full.  Get an empty magazine from the depot and
586 	 * move a full loaded magazine to the depot.  Even though the
587 	 * magazine may wind up with space available after we block on
588 	 * the spinlock, we still cycle it through to avoid the non-optimal
589 	 * corner-case.
590 	 *
591 	 * Obtain the depot spinlock.
592 	 */
593 	depot = &oc->depot[myclusterid];
594 	spin_lock(&depot->spin);
595 
596 	/*
597 	 * If an empty magazine is available in the depot, cycle it
598 	 * through and retry.
599 	 */
600 	if (!SLIST_EMPTY(&depot->emptymagazines)) {
601 		loadedmag = cpucache->previous_magazine;
602 		cpucache->previous_magazine = cpucache->loaded_magazine;
603 		cpucache->loaded_magazine = SLIST_FIRST(&depot->emptymagazines);
604 		SLIST_REMOVE_HEAD(&depot->emptymagazines, nextmagazine);
605 
606 		/*
607 		 * Return loadedmag to the depot.  Due to blocking it may
608 		 * not be entirely full and could even be empty.
609 		 */
610 		if (MAGAZINE_EMPTY(loadedmag)) {
611 			SLIST_INSERT_HEAD(&depot->emptymagazines,
612 					  loadedmag, nextmagazine);
613 			spin_unlock(&depot->spin);
614 		} else {
615 			SLIST_INSERT_HEAD(&depot->fullmagazines,
616 					  loadedmag, nextmagazine);
617 			spin_unlock(&depot->spin);
618 			if (depot->waiting)
619 				wakeup(depot);
620 		}
621 		goto retry;
622 	}
623 
624 	/*
625 	 * An empty mag is not available.  This is a corner case which can
626 	 * occur due to cpus holding partially full magazines.  Do not try
627 	 * to allocate a mag, just free the object.
628 	 */
629 	++depot->unallocated_objects;
630 	spin_unlock(&depot->spin);
631 	if (depot->waiting)
632 		wakeup(depot);
633 	crit_exit();
634 	oc->dtor(obj, oc->privdata);
635 	oc->free(obj, oc->allocator_args);
636 }
637 
638 /*
639  * The object is being put back into the cache, but the caller has
640  * indicated that the object is not in any shape to be reused and should
641  * be dtor'd immediately.
642  */
643 void
644 objcache_dtor(struct objcache *oc, void *obj)
645 {
646 	struct magazinedepot *depot;
647 
648 	depot = &oc->depot[myclusterid];
649 	spin_lock(&depot->spin);
650 	++depot->unallocated_objects;
651 	spin_unlock(&depot->spin);
652 	if (depot->waiting)
653 		wakeup(depot);
654 	oc->dtor(obj, oc->privdata);
655 	oc->free(obj, oc->allocator_args);
656 }
657 
658 /*
659  * Deallocate all objects in a magazine and free the magazine if requested.
660  * When freeit is TRUE the magazine must already be disassociated from the
661  * depot.
662  *
663  * Must be called with a critical section held when called with a per-cpu
664  * magazine.  The magazine may be indirectly modified during the loop.
665  *
666  * If the magazine moves during a dtor the operation is aborted.  This is
667  * only allowed when freeit is FALSE.
668  *
669  * The number of objects freed is returned.
670  */
671 static int
672 mag_purge(struct objcache *oc, struct magazine **magp, int freeit)
673 {
674 	struct magazine *mag = *magp;
675 	int count;
676 	void *obj;
677 
678 	count = 0;
679 	while (mag->rounds) {
680 		obj = mag->objects[--mag->rounds];
681 		oc->dtor(obj, oc->privdata);		/* MAY BLOCK */
682 		oc->free(obj, oc->allocator_args);	/* MAY BLOCK */
683 		++count;
684 
685 		/*
686 		 * Cycle for interrupts.
687 		 */
688 		if ((count & 15) == 0) {
689 			crit_exit();
690 			crit_enter();
691 		}
692 
693 		/*
694 		 * mag may have become invalid either due to dtor/free
695 		 * blocking or interrupt cycling, do not derefernce it
696 		 * until we check.
697 		 */
698 		if (*magp != mag) {
699 			kprintf("mag_purge: mag ripped out\n");
700 			break;
701 		}
702 	}
703 	if (freeit) {
704 		KKASSERT(*magp == mag);
705 		*magp = NULL;
706 		kfree(mag, M_OBJMAG);
707 	}
708 	return(count);
709 }
710 
711 /*
712  * Disassociate zero or more magazines from a magazine list associated with
713  * the depot, update the depot, and move the magazines to a temporary
714  * list.
715  *
716  * The caller must check the depot for waiters and wake it up, typically
717  * after disposing of the magazines this function loads onto the temporary
718  * list.
719  */
720 static void
721 maglist_disassociate(struct magazinedepot *depot, struct magazinelist *maglist,
722 		     struct magazinelist *tmplist, boolean_t purgeall)
723 {
724 	struct magazine *mag;
725 
726 	while ((mag = SLIST_FIRST(maglist)) != NULL) {
727 		SLIST_REMOVE_HEAD(maglist, nextmagazine);
728 		SLIST_INSERT_HEAD(tmplist, mag, nextmagazine);
729 		depot->unallocated_objects += mag->rounds;
730 	}
731 }
732 
733 /*
734  * Deallocate all magazines and their contents from the passed temporary
735  * list.  The magazines have already been accounted for by their depots.
736  *
737  * The total number of rounds freed is returned.  This number is typically
738  * only used to determine whether a wakeup on the depot is needed or not.
739  */
740 static int
741 maglist_purge(struct objcache *oc, struct magazinelist *maglist)
742 {
743 	struct magazine *mag;
744 	int count = 0;
745 
746 	/*
747 	 * can't use SLIST_FOREACH because blocking releases the depot
748 	 * spinlock
749 	 */
750 	crit_enter();
751 	while ((mag = SLIST_FIRST(maglist)) != NULL) {
752 		SLIST_REMOVE_HEAD(maglist, nextmagazine);
753 		count += mag_purge(oc, &mag, TRUE);
754 	}
755 	crit_exit();
756 	return(count);
757 }
758 
759 /*
760  * De-allocates all magazines on the full and empty magazine lists.
761  *
762  * Because this routine is called with a spinlock held, the magazines
763  * can only be disassociated and moved to a temporary list, not freed.
764  *
765  * The caller is responsible for freeing the magazines.
766  */
767 static void
768 depot_disassociate(struct magazinedepot *depot, struct magazinelist *tmplist)
769 {
770 	maglist_disassociate(depot, &depot->fullmagazines, tmplist, TRUE);
771 	maglist_disassociate(depot, &depot->emptymagazines, tmplist, TRUE);
772 }
773 
774 #ifdef notneeded
775 void
776 objcache_reclaim(struct objcache *oc)
777 {
778 	struct percpu_objcache *cache_percpu = &oc->cache_percpu[myclusterid];
779 	struct magazinedepot *depot = &oc->depot[myclusterid];
780 	struct magazinelist tmplist;
781 	int count;
782 
783 	SLIST_INIT(&tmplist);
784 	crit_enter();
785 	count = mag_purge(oc, &cache_percpu->loaded_magazine, FALSE);
786 	count += mag_purge(oc, &cache_percpu->previous_magazine, FALSE);
787 	crit_exit();
788 
789 	spin_lock(&depot->spin);
790 	depot->unallocated_objects += count;
791 	depot_disassociate(depot, &tmplist);
792 	spin_unlock(&depot->spin);
793 	count += maglist_purge(oc, &tmplist);
794 	if (count && depot->waiting)
795 		wakeup(depot);
796 }
797 #endif
798 
799 /*
800  * Try to free up some memory.  Return as soon as some free memory is found.
801  * For each object cache on the reclaim list, first try the current per-cpu
802  * cache, then the full magazine depot.
803  */
804 boolean_t
805 objcache_reclaimlist(struct objcache *oclist[], int nlist, int ocflags)
806 {
807 	struct objcache *oc;
808 	struct percpu_objcache *cpucache;
809 	struct magazinedepot *depot;
810 	struct magazinelist tmplist;
811 	int i, count;
812 
813 	kprintf("objcache_reclaimlist\n");
814 
815 	SLIST_INIT(&tmplist);
816 
817 	for (i = 0; i < nlist; i++) {
818 		oc = oclist[i];
819 		cpucache = &oc->cache_percpu[mycpuid];
820 		depot = &oc->depot[myclusterid];
821 
822 		crit_enter();
823 		count = mag_purge(oc, &cpucache->loaded_magazine, FALSE);
824 		if (count == 0)
825 			count += mag_purge(oc, &cpucache->previous_magazine, FALSE);
826 		crit_exit();
827 		if (count > 0) {
828 			spin_lock(&depot->spin);
829 			depot->unallocated_objects += count;
830 			spin_unlock(&depot->spin);
831 			if (depot->waiting)
832 				wakeup(depot);
833 			return (TRUE);
834 		}
835 		spin_lock(&depot->spin);
836 		maglist_disassociate(depot, &depot->fullmagazines,
837 				     &tmplist, FALSE);
838 		spin_unlock(&depot->spin);
839 		count = maglist_purge(oc, &tmplist);
840 		if (count > 0) {
841 			if (depot->waiting)
842 				wakeup(depot);
843 			return (TRUE);
844 		}
845 	}
846 	return (FALSE);
847 }
848 
849 /*
850  * Destroy an object cache.  Must have no existing references.
851  */
852 void
853 objcache_destroy(struct objcache *oc)
854 {
855 	struct percpu_objcache *cache_percpu;
856 	struct magazinedepot *depot;
857 	int clusterid, cpuid;
858 	struct magazinelist tmplist;
859 
860 	spin_lock(&objcachelist_spin);
861 	LIST_REMOVE(oc, oc_next);
862 	spin_unlock(&objcachelist_spin);
863 
864 	SLIST_INIT(&tmplist);
865 	for (clusterid = 0; clusterid < MAXCLUSTERS; clusterid++) {
866 		depot = &oc->depot[clusterid];
867 		spin_lock(&depot->spin);
868 		depot_disassociate(depot, &tmplist);
869 		spin_unlock(&depot->spin);
870 	}
871 	maglist_purge(oc, &tmplist);
872 
873 	for (cpuid = 0; cpuid < ncpus; cpuid++) {
874 		cache_percpu = &oc->cache_percpu[cpuid];
875 
876 		crit_enter();
877 		mag_purge(oc, &cache_percpu->loaded_magazine, TRUE);
878 		mag_purge(oc, &cache_percpu->previous_magazine, TRUE);
879 		crit_exit();
880 		cache_percpu->loaded_magazine = NULL;
881 		cache_percpu->previous_magazine = NULL;
882 		/* don't bother adjusting depot->unallocated_objects */
883 	}
884 
885 	kfree(oc->name, M_TEMP);
886 	kfree(oc, M_OBJCACHE);
887 }
888 
889 #if 0
890 /*
891  * Populate the per-cluster depot with elements from a linear block
892  * of memory.  Must be called for individually for each cluster.
893  * Populated depots should not be destroyed.
894  */
895 void
896 objcache_populate_linear(struct objcache *oc, void *base, int nelts, int size)
897 {
898 	char *p = base;
899 	char *end = (char *)base + (nelts * size);
900 	struct magazinedepot *depot = &oc->depot[myclusterid];
901 	struct magazine *emptymag = mag_alloc(depot->magcapcity);
902 
903 	while (p < end) {
904 		emptymag->objects[emptymag->rounds++] = p;
905 		if (MAGAZINE_FULL(emptymag)) {
906 			spin_lock_wr(&depot->spin);
907 			SLIST_INSERT_HEAD(&depot->fullmagazines, emptymag,
908 					  nextmagazine);
909 			depot->unallocated_objects += emptymag->rounds;
910 			spin_unlock_wr(&depot->spin);
911 			if (depot->waiting)
912 				wakeup(depot);
913 			emptymag = mag_alloc(depot->magcapacity);
914 		}
915 		p += size;
916 	}
917 	if (MAGAZINE_EMPTY(emptymag)) {
918 		crit_enter();
919 		mag_purge(oc, &emptymag, TRUE);
920 		crit_exit();
921 	} else {
922 		spin_lock_wr(&depot->spin);
923 		SLIST_INSERT_HEAD(&depot->fullmagazines, emptymag,
924 				  nextmagazine);
925 		depot->unallocated_objects += emptymag->rounds;
926 		spin_unlock_wr(&depot->spin);
927 		if (depot->waiting)
928 			wakeup(depot);
929 		emptymag = mag_alloc(depot->magcapacity);
930 	}
931 }
932 #endif
933 
934 #if 0
935 /*
936  * Check depot contention once a minute.
937  * 2 contested locks per second allowed.
938  */
939 static int objcache_rebalance_period;
940 static const int objcache_contention_rate = 120;
941 static struct callout objcache_callout;
942 
943 #define MAXMAGSIZE 512
944 
945 /*
946  * Check depot contention and increase magazine size if necessary.
947  */
948 static void
949 objcache_timer(void *dummy)
950 {
951 	struct objcache *oc;
952 	struct magazinedepot *depot;
953 	struct magazinelist tmplist;
954 
955 	XXX we need to detect when an objcache is destroyed out from under
956 	    us XXX
957 
958 	SLIST_INIT(&tmplist);
959 
960 	spin_lock_wr(&objcachelist_spin);
961 	LIST_FOREACH(oc, &allobjcaches, oc_next) {
962 		depot = &oc->depot[myclusterid];
963 		if (depot->magcapacity < MAXMAGSIZE) {
964 			if (depot->contested > objcache_contention_rate) {
965 				spin_lock_wr(&depot->spin);
966 				depot_disassociate(depot, &tmplist);
967 				depot->magcapacity *= 2;
968 				spin_unlock_wr(&depot->spin);
969 				kprintf("objcache_timer: increasing cache %s"
970 				       " magsize to %d, contested %d times\n",
971 				    oc->name, depot->magcapacity,
972 				    depot->contested);
973 			}
974 			depot->contested = 0;
975 		}
976 		spin_unlock_wr(&objcachelist_spin);
977 		if (maglist_purge(oc, &tmplist) > 0 && depot->waiting)
978 			wakeup(depot);
979 		spin_lock_wr(&objcachelist_spin);
980 	}
981 	spin_unlock_wr(&objcachelist_spin);
982 
983 	callout_reset(&objcache_callout, objcache_rebalance_period,
984 		      objcache_timer, NULL);
985 }
986 
987 #endif
988 
989 static void
990 objcache_init(void)
991 {
992 	spin_init(&objcachelist_spin);
993 
994 	magazine_capmin = mag_capacity_align(MAGAZINE_CAPACITY_MIN);
995 	magazine_capmax = mag_capacity_align(MAGAZINE_CAPACITY_MAX);
996 	if (bootverbose) {
997 		kprintf("objcache: magazine cap [%d, %d]\n",
998 		    magazine_capmin, magazine_capmax);
999 	}
1000 
1001 #if 0
1002 	callout_init_mp(&objcache_callout);
1003 	objcache_rebalance_period = 60 * hz;
1004 	callout_reset(&objcache_callout, objcache_rebalance_period,
1005 		      objcache_timer, NULL);
1006 #endif
1007 }
1008 SYSINIT(objcache, SI_BOOT2_OBJCACHE, SI_ORDER_FIRST, objcache_init, 0);
1009