1 /* 2 * Copyright (c) 2007 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 /* 35 * System resource control module for all cluster-addressable system resource 36 * structures. 37 * 38 * This module implements the core ref counting, sysid registration, and 39 * objcache-backed allocation mechanism for all major system resource 40 * structures. 41 * 42 * sysid registrations operate via the objcache ctor/dtor mechanism and 43 * sysids will be reused if the resource is not explicitly accessed via 44 * its sysid. This removes all RB tree handling overhead from the critical 45 * path for locally used resources. 46 */ 47 48 #include <sys/param.h> 49 #include <sys/systm.h> 50 #include <sys/kernel.h> 51 #include <sys/tree.h> 52 #include <sys/spinlock.h> 53 #include <machine/atomic.h> 54 #include <machine/cpufunc.h> 55 56 #include <sys/spinlock2.h> 57 #include <sys/sysref2.h> 58 59 static boolean_t sysref_ctor(void *data, void *privdata, int ocflags); 60 static void sysref_dtor(void *data, void *privdata); 61 62 /* 63 * Red-Black tree support 64 */ 65 static int rb_sysref_compare(struct sysref *sr1, struct sysref *sr2); 66 RB_GENERATE2(sysref_rb_tree, sysref, rbnode, rb_sysref_compare, sysid_t, sysid); 67 68 static struct srpercpu { 69 struct sysref_rb_tree rbtree; 70 struct spinlock spin; 71 } sysref_array[MAXCPU]; 72 73 static void 74 sysrefbootinit(void *dummy __unused) 75 { 76 struct srpercpu *sa; 77 int i; 78 79 for (i = 0; i < ncpus; ++i) { 80 sa = &sysref_array[i]; 81 spin_init(&sa->spin, "sysrefbootinit"); 82 RB_INIT(&sa->rbtree); 83 } 84 } 85 86 SYSINIT(sysref, SI_BOOT2_MACHDEP, SI_ORDER_ANY, sysrefbootinit, NULL); 87 88 static 89 int 90 rb_sysref_compare(struct sysref *sr1, struct sysref *sr2) 91 { 92 if (sr1->sysid < sr2->sysid) 93 return(-1); 94 if (sr1->sysid > sr2->sysid) 95 return(1); 96 return(0); 97 } 98 99 /* 100 * Manual initialization of a resource structure's sysref, only used during 101 * booting to set up certain statically declared resources which cannot 102 * be deallocated. 103 */ 104 void 105 sysref_init(struct sysref *sr, struct sysref_class *srclass) 106 { 107 struct srpercpu *sa; 108 globaldata_t gd; 109 110 gd = mycpu; 111 crit_enter_gd(gd); 112 gd->gd_sysid_alloc += ncpus_fit; /* next unique sysid */ 113 sr->sysid = gd->gd_sysid_alloc; 114 KKASSERT(((int)sr->sysid & ncpus_fit_mask) == gd->gd_cpuid); 115 sr->refcnt = -0x40000000; 116 sr->flags = 0; 117 sr->srclass = srclass; 118 119 sa = &sysref_array[gd->gd_cpuid]; 120 spin_lock(&sa->spin); 121 sysref_rb_tree_RB_INSERT(&sa->rbtree, sr); 122 spin_unlock(&sa->spin); 123 crit_exit_gd(gd); 124 } 125 126 /* 127 * Allocate a resource structure of the specified class, initialize a 128 * sysid and add the resource to the RB tree. The caller must complete 129 * initialization of the resource and call sysref_activate() to activate it. 130 */ 131 void * 132 sysref_alloc(struct sysref_class *srclass) 133 { 134 struct sysref *sr; 135 char *data; 136 int n; 137 138 /* 139 * Create the object cache backing store. 140 */ 141 if (srclass->oc == NULL) { 142 KKASSERT(srclass->mtype != NULL); 143 srclass->oc = objcache_create_mbacked( 144 srclass->mtype, srclass->objsize, 145 0, srclass->nom_cache, 146 sysref_ctor, sysref_dtor, srclass); 147 } 148 149 /* 150 * Allocate the resource. 151 */ 152 data = objcache_get(srclass->oc, M_WAITOK); 153 sr = (struct sysref *)(data + srclass->offset); 154 KKASSERT(sr->flags & SRF_PUTAWAY); 155 sr->flags &= ~SRF_PUTAWAY; 156 157 /* 158 * Refcnt isn't touched while it is zero. The objcache ctor 159 * function has already allocated a sysid and emplaced the 160 * structure in the RB tree. 161 */ 162 KKASSERT(sr->refcnt == 0); 163 sr->refcnt = -0x40000000; 164 165 /* 166 * Clean out the structure unless the caller wants to deal with 167 * it (e.g. like the vmspace code). 168 */ 169 if ((srclass->flags & SRC_MANAGEDINIT) == 0) { 170 if (srclass->offset != 0) 171 bzero(data, srclass->offset); 172 n = srclass->offset + sizeof(struct sysref); 173 KKASSERT(n <= srclass->objsize); 174 if (n != srclass->objsize) 175 bzero(data + n, srclass->objsize - n); 176 } 177 return(data); 178 } 179 180 /* 181 * Object cache backing store ctor function. 182 * 183 * This allocates the sysid and associates the structure with the 184 * red-black tree, allowing it to be looked up. The actual resource 185 * structure has NOT yet been allocated so it is marked free. 186 * 187 * If the sysid is not used to access the resource, we will just 188 * allow the sysid to be reused when the resource structure is reused, 189 * allowing the RB tree operation to be 'cached'. This results in 190 * virtually no performance penalty for using the sysref facility. 191 */ 192 static 193 boolean_t 194 sysref_ctor(void *data, void *privdata, int ocflags) 195 { 196 globaldata_t gd; 197 struct srpercpu *sa; 198 struct sysref_class *srclass = privdata; 199 struct sysref *sr = (void *)((char *)data + srclass->offset); 200 201 /* 202 * Resource structures need to be cleared when allocating from 203 * malloc backing store. This is different from the zeroing 204 * that we do in sysref_alloc(). 205 */ 206 bzero(data, srclass->objsize); 207 208 /* 209 * Resources managed by our objcache do the sysid and RB tree 210 * handling in the objcache ctor/dtor, so we can reuse the 211 * structure without re-treeing it over and over again. 212 */ 213 gd = mycpu; 214 crit_enter_gd(gd); 215 gd->gd_sysid_alloc += ncpus_fit; /* next unique sysid */ 216 sr->sysid = gd->gd_sysid_alloc; 217 KKASSERT(((int)sr->sysid & ncpus_fit_mask) == gd->gd_cpuid); 218 /* sr->refcnt= 0; already zero */ 219 sr->flags = SRF_ALLOCATED | SRF_PUTAWAY; 220 sr->srclass = srclass; 221 222 sa = &sysref_array[gd->gd_cpuid]; 223 spin_lock(&sa->spin); 224 sysref_rb_tree_RB_INSERT(&sa->rbtree, sr); 225 spin_unlock(&sa->spin); 226 crit_exit_gd(gd); 227 228 /* 229 * Execute the class's ctor function, if any. NOTE: The class 230 * should not try to zero out the structure, we've already handled 231 * that and preinitialized the sysref. 232 * 233 * XXX ignores return value for now 234 */ 235 if (srclass->ctor) 236 srclass->ctor(data, privdata, ocflags); 237 return TRUE; 238 } 239 240 /* 241 * Object cache destructor, allowing the structure to be returned 242 * to the system memory pool. The resource structure must be 243 * removed from the RB tree. All other references have already 244 * been destroyed and the RB tree will not create any new references 245 * to the structure in its current state. 246 */ 247 static 248 void 249 sysref_dtor(void *data, void *privdata) 250 { 251 struct srpercpu *sa; 252 struct sysref_class *srclass = privdata; 253 struct sysref *sr = (void *)((char *)data + srclass->offset); 254 255 KKASSERT(sr->refcnt == 0); 256 sa = &sysref_array[(int)sr->sysid & ncpus_fit_mask]; 257 spin_lock(&sa->spin); 258 sysref_rb_tree_RB_REMOVE(&sa->rbtree, sr); 259 spin_unlock(&sa->spin); 260 if (srclass->dtor) 261 srclass->dtor(data, privdata); 262 } 263 264 /* 265 * Activate or reactivate a resource. 0x40000001 is added to the ref count 266 * so -0x40000000 (during initialization) will translate to a ref count of 1. 267 * Any references made during initialization will translate to additional 268 * positive ref counts. 269 * 270 * MPSAFE 271 */ 272 void 273 sysref_activate(struct sysref *sr) 274 { 275 int count; 276 277 for (;;) { 278 count = sr->refcnt; 279 KASSERT(count < 0 && count + 0x40000001 > 0, 280 ("sysref_activate: bad count %08x", count)); 281 if (atomic_cmpset_int(&sr->refcnt, count, count + 0x40000001)) 282 break; 283 cpu_pause(); 284 } 285 } 286 287 /* 288 * Release a reference under special circumstances. This call is made 289 * from the sysref_put() inline from sys/sysref2.h for any 1->0 transitions, 290 * negative->negative 'termination in progress' transitions, and when the 291 * cmpset instruction fails during a normal transition. 292 * 293 * This function is called from the sysref_put() inline in sys/sysref2.h, 294 * but handles all cases regardless. 295 */ 296 void 297 _sysref_put(struct sysref *sr) 298 { 299 int count; 300 void *data; 301 302 KKASSERT((sr->flags & SRF_PUTAWAY) == 0); 303 304 for (;;) { 305 count = sr->refcnt; 306 if (count > 1) { 307 /* 308 * release 1 count, nominal case, active resource 309 * structure, no other action required. 310 */ 311 if (atomic_cmpset_int(&sr->refcnt, count, count - 1)) 312 break; 313 } else if (count == 1) { 314 /* 315 * 1->0 transitions transition to -0x40000000 instead, 316 * placing the resource structure into a termination- 317 * in-progress state. The termination function is 318 * then called. 319 */ 320 data = (char *)sr - sr->srclass->offset; 321 sr->srclass->ops.lock(data); 322 if (atomic_cmpset_int(&sr->refcnt, count, -0x40000000)) { 323 sr->srclass->ops.terminate(data); 324 break; 325 } 326 sr->srclass->ops.unlock(data); 327 } else if (count > -0x40000000) { 328 /* 329 * release 1 count, nominal case, resource undergoing 330 * termination. The Resource can be ref'd and 331 * deref'd while undergoing termination. 332 */ 333 if (atomic_cmpset_int(&sr->refcnt, count, count - 1)) 334 break; 335 } else { 336 /* 337 * Final release, set refcnt to 0. 338 * Resource must have been allocated. 339 * 340 * If SRF_SYSIDUSED is not set just objcache_put() the 341 * resource, otherwise objcache_dtor() the resource. 342 */ 343 KKASSERT(count == -0x40000000); 344 if (atomic_cmpset_int(&sr->refcnt, count, 0)) { 345 KKASSERT(sr->flags & SRF_ALLOCATED); 346 sr->flags |= SRF_PUTAWAY; 347 data = (char *)sr - sr->srclass->offset; 348 if (sr->flags & SRF_SYSIDUSED) 349 objcache_dtor(sr->srclass->oc, data); 350 else 351 objcache_put(sr->srclass->oc, data); 352 break; 353 } 354 } 355 /* loop until the cmpset succeeds */ 356 cpu_pause(); 357 } 358 } 359 360 sysid_t 361 allocsysid(void) 362 { 363 globaldata_t gd = mycpu; 364 sysid_t sysid; 365 366 crit_enter_gd(gd); 367 gd->gd_sysid_alloc += ncpus_fit; 368 sysid = gd->gd_sysid_alloc; 369 crit_exit_gd(gd); 370 return(sysid); 371 } 372 373