xref: /dragonfly/sys/kern/kern_sysref.c (revision 81c11cd3)
1 /*
2  * Copyright (c) 2007 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  * $DragonFly: src/sys/kern/kern_sysref.c,v 1.7 2008/10/26 04:29:19 sephe Exp $
35  */
36 /*
37  * System resource control module for all cluster-addressable system resource
38  * structures.
39  *
40  * This module implements the core ref counting, sysid registration, and
41  * objcache-backed allocation mechanism for all major system resource
42  * structures.
43  *
44  * sysid registrations operate via the objcache ctor/dtor mechanism and
45  * sysids will be reused if the resource is not explicitly accessed via
46  * its sysid.  This removes all RB tree handling overhead from the critical
47  * path for locally used resources.
48  */
49 
50 #include <sys/param.h>
51 #include <sys/systm.h>
52 #include <sys/kernel.h>
53 #include <sys/tree.h>
54 #include <sys/spinlock.h>
55 #include <machine/atomic.h>
56 #include <machine/cpufunc.h>
57 
58 #include <sys/spinlock2.h>
59 #include <sys/sysref2.h>
60 
61 static boolean_t sysref_ctor(void *data, void *privdata, int ocflags);
62 static void sysref_dtor(void *data, void *privdata);
63 
64 /*
65  * Red-Black tree support
66  */
67 static int rb_sysref_compare(struct sysref *sr1, struct sysref *sr2);
68 RB_GENERATE2(sysref_rb_tree, sysref, rbnode, rb_sysref_compare, sysid_t, sysid);
69 
70 static struct srpercpu {
71 	struct sysref_rb_tree rbtree;
72 	struct spinlock spin;
73 } sysref_array[MAXCPU];
74 
75 static void
76 sysrefbootinit(void *dummy __unused)
77 {
78 	struct srpercpu *sa;
79 	int i;
80 
81 	for (i = 0; i < ncpus; ++i) {
82 		sa = &sysref_array[i];
83 		spin_init(&sa->spin);
84 		RB_INIT(&sa->rbtree);
85 	}
86 }
87 
88 SYSINIT(sysref, SI_BOOT2_MACHDEP, SI_ORDER_ANY, sysrefbootinit, NULL);
89 
90 static
91 int
92 rb_sysref_compare(struct sysref *sr1, struct sysref *sr2)
93 {
94 	if (sr1->sysid < sr2->sysid)
95 		return(-1);
96 	if (sr1->sysid > sr2->sysid)
97 		return(1);
98 	return(0);
99 }
100 
101 /*
102  * Manual initialization of a resource structure's sysref, only used during
103  * booting to set up certain statically declared resources which cannot
104  * be deallocated.
105  */
106 void
107 sysref_init(struct sysref *sr, struct sysref_class *srclass)
108 {
109 	struct srpercpu *sa;
110 	globaldata_t gd;
111 
112 	gd = mycpu;
113 	crit_enter_gd(gd);
114 	gd->gd_sysid_alloc += ncpus_fit; /* next unique sysid */
115 	sr->sysid = gd->gd_sysid_alloc;
116 	KKASSERT(((int)sr->sysid & ncpus_fit_mask) == gd->gd_cpuid);
117 	sr->refcnt = -0x40000000;
118 	sr->flags = 0;
119 	sr->srclass = srclass;
120 
121 	sa = &sysref_array[gd->gd_cpuid];
122 	spin_lock(&sa->spin);
123 	sysref_rb_tree_RB_INSERT(&sa->rbtree, sr);
124 	spin_unlock(&sa->spin);
125 	crit_exit_gd(gd);
126 }
127 
128 /*
129  * Allocate a resource structure of the specified class, initialize a
130  * sysid and add the resource to the RB tree.  The caller must complete
131  * initialization of the resource and call sysref_activate() to activate it.
132  */
133 void *
134 sysref_alloc(struct sysref_class *srclass)
135 {
136 	struct sysref *sr;
137 	char *data;
138 	int n;
139 
140 	/*
141 	 * Create the object cache backing store.
142 	 */
143 	if (srclass->oc == NULL) {
144 		KKASSERT(srclass->mtype != NULL);
145 		srclass->oc = objcache_create_mbacked(
146 				srclass->mtype, srclass->objsize,
147 				NULL, srclass->mag_capacity,
148 				sysref_ctor, sysref_dtor, srclass);
149 	}
150 
151 	/*
152 	 * Allocate the resource.
153 	 */
154 	data = objcache_get(srclass->oc, M_WAITOK);
155 	sr = (struct sysref *)(data + srclass->offset);
156 	KKASSERT(sr->flags & SRF_PUTAWAY);
157 	sr->flags &= ~SRF_PUTAWAY;
158 
159 	/*
160 	 * Refcnt isn't touched while it is zero.  The objcache ctor
161 	 * function has already allocated a sysid and emplaced the
162 	 * structure in the RB tree.
163 	 */
164 	KKASSERT(sr->refcnt == 0);
165 	sr->refcnt = -0x40000000;
166 
167 	/*
168 	 * Clean out the structure unless the caller wants to deal with
169 	 * it (e.g. like the vmspace code).
170 	 */
171 	if ((srclass->flags & SRC_MANAGEDINIT) == 0) {
172 		if (srclass->offset != 0)
173 			bzero(data, srclass->offset);
174 		n = srclass->offset + sizeof(struct sysref);
175 		KKASSERT(n <= srclass->objsize);
176 		if (n != srclass->objsize)
177 			bzero(data + n, srclass->objsize - n);
178 	}
179 	return(data);
180 }
181 
182 /*
183  * Object cache backing store ctor function.
184  *
185  * This allocates the sysid and associates the structure with the
186  * red-black tree, allowing it to be looked up.  The actual resource
187  * structure has NOT yet been allocated so it is marked free.
188  *
189  * If the sysid is not used to access the resource, we will just
190  * allow the sysid to be reused when the resource structure is reused,
191  * allowing the RB tree operation to be 'cached'.  This results in
192  * virtually no performance penalty for using the sysref facility.
193  */
194 static
195 boolean_t
196 sysref_ctor(void *data, void *privdata, int ocflags)
197 {
198 	globaldata_t gd;
199 	struct srpercpu *sa;
200 	struct sysref_class *srclass = privdata;
201 	struct sysref *sr = (void *)((char *)data + srclass->offset);
202 
203 	/*
204 	 * Resource structures need to be cleared when allocating from
205 	 * malloc backing store.  This is different from the zeroing
206 	 * that we do in sysref_alloc().
207 	 */
208 	bzero(data, srclass->objsize);
209 
210 	/*
211 	 * Resources managed by our objcache do the sysid and RB tree
212 	 * handling in the objcache ctor/dtor, so we can reuse the
213 	 * structure without re-treeing it over and over again.
214 	 */
215 	gd = mycpu;
216 	crit_enter_gd(gd);
217 	gd->gd_sysid_alloc += ncpus_fit; /* next unique sysid */
218 	sr->sysid = gd->gd_sysid_alloc;
219 	KKASSERT(((int)sr->sysid & ncpus_fit_mask) == gd->gd_cpuid);
220 	/* sr->refcnt= 0; already zero */
221 	sr->flags = SRF_ALLOCATED | SRF_PUTAWAY;
222 	sr->srclass = srclass;
223 
224 	sa = &sysref_array[gd->gd_cpuid];
225 	spin_lock(&sa->spin);
226 	sysref_rb_tree_RB_INSERT(&sa->rbtree, sr);
227 	spin_unlock(&sa->spin);
228 	crit_exit_gd(gd);
229 
230 	/*
231 	 * Execute the class's ctor function, if any.  NOTE: The class
232 	 * should not try to zero out the structure, we've already handled
233 	 * that and preinitialized the sysref.
234 	 *
235 	 * XXX ignores return value for now
236 	 */
237 	if (srclass->ctor)
238 		srclass->ctor(data, privdata, ocflags);
239 	return TRUE;
240 }
241 
242 /*
243  * Object cache destructor, allowing the structure to be returned
244  * to the system memory pool.  The resource structure must be
245  * removed from the RB tree.  All other references have already
246  * been destroyed and the RB tree will not create any new references
247  * to the structure in its current state.
248  */
249 static
250 void
251 sysref_dtor(void *data, void *privdata)
252 {
253 	struct srpercpu *sa;
254 	struct sysref_class *srclass = privdata;
255 	struct sysref *sr = (void *)((char *)data + srclass->offset);
256 
257 	KKASSERT(sr->refcnt == 0);
258 	sa = &sysref_array[(int)sr->sysid & ncpus_fit_mask];
259 	spin_lock(&sa->spin);
260 	sysref_rb_tree_RB_REMOVE(&sa->rbtree, sr);
261 	spin_unlock(&sa->spin);
262 	if (srclass->dtor)
263 		srclass->dtor(data, privdata);
264 }
265 
266 /*
267  * Activate or reactivate a resource. 0x40000001 is added to the ref count
268  * so -0x40000000 (during initialization) will translate to a ref count of 1.
269  * Any references made during initialization will translate to additional
270  * positive ref counts.
271  *
272  * MPSAFE
273  */
274 void
275 sysref_activate(struct sysref *sr)
276 {
277 	int count;
278 
279 	for (;;) {
280 		count = sr->refcnt;
281 		KASSERT(count < 0 && count + 0x40000001 > 0,
282 			("sysref_activate: bad count %08x", count));
283 		if (atomic_cmpset_int(&sr->refcnt, count, count + 0x40000001))
284 			break;
285 		cpu_pause();
286 	}
287 }
288 
289 /*
290  * Release a reference under special circumstances.  This call is made
291  * from the sysref_put() inline from sys/sysref2.h for any 1->0 transitions,
292  * negative->negative 'termination in progress' transitions, and when the
293  * cmpset instruction fails during a normal transition.
294  *
295  * This function is called from the sysref_put() inline in sys/sysref2.h,
296  * but handles all cases regardless.
297  */
298 void
299 _sysref_put(struct sysref *sr)
300 {
301 	int count;
302 	void *data;
303 
304 	KKASSERT((sr->flags & SRF_PUTAWAY) == 0);
305 
306 	for (;;) {
307 		count = sr->refcnt;
308 		if (count > 1) {
309 			/*
310 			 * release 1 count, nominal case, active resource
311 			 * structure, no other action required.
312 			 */
313 			if (atomic_cmpset_int(&sr->refcnt, count, count - 1))
314 				break;
315 		} else if (count == 1) {
316 			/*
317 			 * 1->0 transitions transition to -0x40000000 instead,
318 			 * placing the resource structure into a termination-
319 			 * in-progress state.  The termination function is
320 			 * then called.
321 			 */
322 			data = (char *)sr - sr->srclass->offset;
323 			sr->srclass->ops.lock(data);
324 			if (atomic_cmpset_int(&sr->refcnt, count, -0x40000000)) {
325 				sr->srclass->ops.terminate(data);
326 				break;
327 			}
328 			sr->srclass->ops.unlock(data);
329 		} else if (count > -0x40000000) {
330 			/*
331 			 * release 1 count, nominal case, resource undergoing
332 			 * termination.  The Resource can be ref'd and
333 			 * deref'd while undergoing termination.
334 			 */
335 			if (atomic_cmpset_int(&sr->refcnt, count, count - 1))
336 				break;
337 		} else {
338 			/*
339 			 * Final release, set refcnt to 0.
340 			 * Resource must have been allocated.
341 			 *
342 			 * If SRF_SYSIDUSED is not set just objcache_put() the
343 			 * resource, otherwise objcache_dtor() the resource.
344 			 */
345 			KKASSERT(count == -0x40000000);
346 			if (atomic_cmpset_int(&sr->refcnt, count, 0)) {
347 				KKASSERT(sr->flags & SRF_ALLOCATED);
348 				sr->flags |= SRF_PUTAWAY;
349 				data = (char *)sr - sr->srclass->offset;
350 				if (sr->flags & SRF_SYSIDUSED)
351 					objcache_dtor(sr->srclass->oc, data);
352 				else
353 					objcache_put(sr->srclass->oc, data);
354 				break;
355 			}
356 		}
357 		/* loop until the cmpset succeeds */
358 		cpu_pause();
359 	}
360 }
361 
362 sysid_t
363 allocsysid(void)
364 {
365 	globaldata_t gd = mycpu;
366 	sysid_t sysid;
367 
368 	crit_enter_gd(gd);
369 	gd->gd_sysid_alloc += ncpus_fit;
370 	sysid = gd->gd_sysid_alloc;
371 	crit_exit_gd(gd);
372 	return(sysid);
373 }
374 
375