1 /* 2 * Copyright (c) 2005 Jeffrey M. Hsu. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Jeffrey M. Hsu. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. Neither the name of The DragonFly Project nor the names of its 16 * contributors may be used to endorse or promote products derived 17 * from this software without specific, prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 22 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 23 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 24 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 25 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 26 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 27 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 29 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 */ 32 33 #include <sys/param.h> 34 #include <sys/kernel.h> 35 #include <sys/systm.h> 36 #include <sys/callout.h> 37 #include <sys/globaldata.h> 38 #include <sys/malloc.h> 39 #include <sys/queue.h> 40 #include <sys/objcache.h> 41 #include <sys/spinlock.h> 42 #include <sys/thread.h> 43 #include <sys/thread2.h> 44 #include <sys/spinlock2.h> 45 #include <sys/sysctl.h> 46 47 static MALLOC_DEFINE(M_OBJCACHE, "objcache", "Object Cache"); 48 static MALLOC_DEFINE(M_OBJMAG, "objcache mag", "Object Cache Magazine"); 49 50 #define INITIAL_MAG_CAPACITY 64 51 52 struct magazine { 53 int rounds; 54 int capacity; 55 SLIST_ENTRY(magazine) nextmagazine; 56 void *objects[]; 57 }; 58 59 SLIST_HEAD(magazinelist, magazine); 60 61 #define MAGAZINE_HDRSIZE __offsetof(struct magazine, objects[0]) 62 #define MAGAZINE_CAPACITY_MAX 4096 63 #define MAGAZINE_CAPACITY_MIN 4 64 65 /* 66 * per-cluster cache of magazines 67 * 68 * All fields in this structure are protected by the spinlock. 69 */ 70 struct magazinedepot { 71 /* 72 * The per-cpu object caches only exchanges completely full or 73 * completely empty magazines with the depot layer, so only have 74 * to cache these two types of magazines. 75 */ 76 struct magazinelist fullmagazines; 77 struct magazinelist emptymagazines; 78 int magcapacity; 79 80 /* protect this structure */ 81 struct spinlock spin; 82 83 /* magazines not yet allocated towards limit */ 84 int unallocated_objects; 85 int cluster_limit; /* ref for adjustments */ 86 87 /* infrequently used fields */ 88 int waiting; /* waiting for another cpu to 89 * return a full magazine to 90 * the depot */ 91 int contested; /* depot contention count */ 92 } __cachealign; 93 94 /* 95 * per-cpu object cache 96 * All fields in this structure are protected by crit_enter(). 97 */ 98 struct percpu_objcache { 99 struct magazine *loaded_magazine; /* active magazine */ 100 struct magazine *previous_magazine; /* backup magazine */ 101 102 /* statistics */ 103 u_long gets_cumulative; /* total calls to get */ 104 u_long gets_null; /* objcache_get returned NULL */ 105 u_long allocs_cumulative; /* total calls to alloc */ 106 u_long puts_cumulative; /* total calls to put */ 107 u_long gets_exhausted; /* # of gets hit exhaustion */ 108 #ifdef notyet 109 u_long puts_othercluster; /* returned to other cluster */ 110 #endif 111 112 /* infrequently used fields */ 113 int waiting; /* waiting for a thread on this 114 * cpu to return an obj to the 115 * per-cpu cache */ 116 } __cachealign; 117 118 /* only until we have NUMA cluster topology information XXX */ 119 #define MAXCLUSTERS 1 120 #define myclusterid 0 121 #define CLUSTER_OF(obj) 0 122 123 /* 124 * Rarely accessed but useful bits of objcache. 125 */ 126 struct objcache_desc { 127 LIST_ENTRY(objcache_desc) next; 128 struct objcache *objcache; 129 int total_objects; 130 int reserved; 131 char name[OBJCACHE_NAMELEN]; 132 }; 133 134 /* 135 * Two-level object cache consisting of NUMA cluster-level depots of 136 * fully loaded or completely empty magazines and cpu-level caches of 137 * individual objects. 138 */ 139 struct objcache { 140 /* object constructor and destructor from blank storage */ 141 objcache_ctor_fn *ctor; 142 objcache_dtor_fn *dtor; 143 void *privdata; 144 145 /* interface to underlying allocator */ 146 objcache_alloc_fn *alloc; 147 objcache_free_fn *free; 148 void *allocator_args; 149 150 struct objcache_desc *desc; 151 152 /* NUMA-cluster level caches */ 153 struct magazinedepot depot[MAXCLUSTERS]; 154 155 struct percpu_objcache cache_percpu[]; /* per-cpu caches */ 156 }; 157 158 SYSCTL_NODE(_kern, OID_AUTO, objcache, CTLFLAG_RW, 0, "objcache"); 159 160 static struct spinlock objcachelist_spin; 161 static LIST_HEAD(objcachelist, objcache_desc) allobjcaches; 162 static int magazine_capmin; 163 static int magazine_capmax; 164 165 static struct magazine * 166 mag_alloc(int capacity) 167 { 168 struct magazine *mag; 169 int size; 170 171 size = __offsetof(struct magazine, objects[capacity]); 172 KASSERT(size > 0 && (size & __VM_CACHELINE_MASK) == 0, 173 ("magazine size is not multiple cache line size")); 174 175 mag = kmalloc_cachealign(size, M_OBJMAG, M_INTWAIT | M_ZERO); 176 mag->capacity = capacity; 177 mag->rounds = 0; 178 return (mag); 179 } 180 181 static int 182 mag_capacity_align(int mag_capacity) 183 { 184 int mag_size; 185 186 mag_size = __VM_CACHELINE_ALIGN( 187 __offsetof(struct magazine, objects[mag_capacity])); 188 mag_capacity = (mag_size - MAGAZINE_HDRSIZE) / sizeof(void *); 189 190 return mag_capacity; 191 } 192 193 /* 194 * Utility routine for objects that don't require any de-construction. 195 */ 196 197 static void 198 null_dtor(void *obj, void *privdata) 199 { 200 /* do nothing */ 201 } 202 203 static boolean_t 204 null_ctor(void *obj, void *privdata, int ocflags) 205 { 206 return TRUE; 207 } 208 209 /* 210 * Create an object cache. 211 */ 212 struct objcache * 213 objcache_create(const char *name, int cluster_limit, int nom_cache, 214 objcache_ctor_fn *ctor, objcache_dtor_fn *dtor, void *privdata, 215 objcache_alloc_fn *alloc, objcache_free_fn *free, 216 void *allocator_args) 217 { 218 struct objcache_desc *desc; 219 struct objcache *oc; 220 struct magazinedepot *depot; 221 int cpuid; 222 int nmagdepot; 223 int mag_capacity; 224 int i; 225 226 /* 227 * Allocate objcache descriptor. 228 */ 229 desc = kmalloc(sizeof(*desc), M_OBJCACHE, M_WAITOK | M_ZERO); 230 231 /* 232 * Allocate object cache structure 233 */ 234 oc = kmalloc_cachealign( 235 __offsetof(struct objcache, cache_percpu[ncpus]), 236 M_OBJCACHE, M_WAITOK | M_ZERO); 237 oc->ctor = ctor ? ctor : null_ctor; 238 oc->dtor = dtor ? dtor : null_dtor; 239 oc->privdata = privdata; 240 oc->alloc = alloc; 241 oc->free = free; 242 oc->allocator_args = allocator_args; 243 244 /* 245 * Link objcache and its descriptor. 246 */ 247 oc->desc = desc; 248 desc->objcache = oc; 249 strlcpy(desc->name, name, sizeof(desc->name)); 250 251 /* 252 * Initialize depot list(s). 253 */ 254 depot = &oc->depot[0]; 255 256 spin_init(&depot->spin, "objcachedepot"); 257 SLIST_INIT(&depot->fullmagazines); 258 SLIST_INIT(&depot->emptymagazines); 259 260 /* 261 * Figure out the nominal number of free objects to cache and 262 * the magazine capacity. By default we want to cache up to 263 * half the cluster_limit. If there is no cluster_limit then 264 * we want to cache up to 128 objects. 265 */ 266 if (nom_cache == 0) 267 nom_cache = cluster_limit / 2; 268 if (cluster_limit && nom_cache > cluster_limit) 269 nom_cache = cluster_limit; 270 if (nom_cache == 0) 271 nom_cache = INITIAL_MAG_CAPACITY * 2; 272 273 /* 274 * Magazine capacity for 2 active magazines per cpu plus 2 275 * magazines in the depot. 276 */ 277 mag_capacity = mag_capacity_align(nom_cache / (ncpus + 1) / 2 + 1); 278 if (mag_capacity > magazine_capmax) 279 mag_capacity = magazine_capmax; 280 else if (mag_capacity < magazine_capmin) 281 mag_capacity = magazine_capmin; 282 depot->magcapacity = mag_capacity; 283 284 /* 285 * The cluster_limit must be sufficient to have two magazines per 286 * cpu plus at least two magazines in the depot. However, because 287 * partial magazines can stay on the cpus what we really need here 288 * is to specify the number of extra magazines we allocate for the 289 * depot. 290 * 291 * Use ~1B objects to mean 'unlimited'. A negative unallocated 292 * object count is possible due to dynamic adjustments so we can't 293 * use a negative number to mean 'unlimited'. We need some overflow 294 * capacity too due to the preallocated mags. 295 */ 296 if (cluster_limit == 0) { 297 depot->unallocated_objects = OBJCACHE_UNLIMITED; 298 } else { 299 depot->unallocated_objects = ncpus * mag_capacity * 2 + 300 cluster_limit; 301 } 302 303 /* Save # of total objects. */ 304 desc->total_objects = depot->unallocated_objects; 305 306 /* 307 * This is a dynamic adjustment aid initialized to the callers 308 * expectations of the current limit. 309 */ 310 depot->cluster_limit = cluster_limit; 311 312 /* 313 * Initialize per-cpu caches 314 */ 315 for (cpuid = 0; cpuid < ncpus; cpuid++) { 316 struct percpu_objcache *cache_percpu = &oc->cache_percpu[cpuid]; 317 318 cache_percpu->loaded_magazine = mag_alloc(mag_capacity); 319 cache_percpu->previous_magazine = mag_alloc(mag_capacity); 320 } 321 322 /* 323 * Compute how many empty magazines to place in the depot. This 324 * determines the retained cache size and is based on nom_cache. 325 * 326 * The actual cache size is larger because there are two magazines 327 * for each cpu as well but those can be in any fill state so we 328 * just can't count them. 329 * 330 * There is a minimum of two magazines in the depot. 331 */ 332 nmagdepot = nom_cache / mag_capacity + 1; 333 if (nmagdepot < 2) 334 nmagdepot = 2; 335 336 /* 337 * Put empty magazines in depot 338 */ 339 for (i = 0; i < nmagdepot; i++) { 340 struct magazine *mag = mag_alloc(mag_capacity); 341 SLIST_INSERT_HEAD(&depot->emptymagazines, mag, nextmagazine); 342 } 343 344 spin_lock(&objcachelist_spin); 345 LIST_INSERT_HEAD(&allobjcaches, desc, next); 346 spin_unlock(&objcachelist_spin); 347 348 return (oc); 349 } 350 351 /* 352 * Adjust the cluster limit. This is allowed to cause unallocated_objects 353 * to go negative. Note that due to the magazine hysteresis there is a 354 * limit to how much of the objcache can be reclaimed using this API to 355 * reduce its size. 356 */ 357 void 358 objcache_set_cluster_limit(struct objcache *oc, int cluster_limit) 359 { 360 struct magazinedepot *depot; 361 362 depot = &oc->depot[myclusterid]; 363 if (depot->cluster_limit != cluster_limit) { 364 int delta; 365 366 spin_lock(&depot->spin); 367 delta = cluster_limit - depot->cluster_limit; 368 depot->unallocated_objects += delta; 369 depot->cluster_limit = cluster_limit; 370 spin_unlock(&depot->spin); 371 wakeup(depot); 372 373 oc->desc->total_objects += delta; 374 } 375 } 376 377 struct objcache * 378 objcache_create_simple(malloc_type_t mtype, size_t objsize) 379 { 380 struct objcache_malloc_args *margs; 381 struct objcache *oc; 382 383 margs = kmalloc(sizeof(*margs), M_OBJCACHE, M_WAITOK|M_ZERO); 384 margs->objsize = objsize; 385 margs->mtype = mtype; 386 oc = objcache_create(mtype->ks_shortdesc, 0, 0, 387 NULL, NULL, NULL, 388 objcache_malloc_alloc, objcache_malloc_free, 389 margs); 390 return (oc); 391 } 392 393 struct objcache * 394 objcache_create_mbacked(malloc_type_t mtype, size_t objsize, 395 int cluster_limit, int nom_cache, 396 objcache_ctor_fn *ctor, objcache_dtor_fn *dtor, 397 void *privdata) 398 { 399 struct objcache_malloc_args *margs; 400 struct objcache *oc; 401 402 margs = kmalloc(sizeof(*margs), M_OBJCACHE, M_WAITOK|M_ZERO); 403 margs->objsize = objsize; 404 margs->mtype = mtype; 405 oc = objcache_create(mtype->ks_shortdesc, 406 cluster_limit, nom_cache, 407 ctor, dtor, privdata, 408 objcache_malloc_alloc, objcache_malloc_free, 409 margs); 410 return(oc); 411 } 412 413 414 #define MAGAZINE_EMPTY(mag) (mag->rounds == 0) 415 #define MAGAZINE_NOTEMPTY(mag) (mag->rounds != 0) 416 #define MAGAZINE_FULL(mag) (mag->rounds == mag->capacity) 417 418 #define swap(x, y) ({ struct magazine *t = x; x = y; y = t; }) 419 420 /* 421 * Get an object from the object cache. 422 * 423 * WARNING! ocflags are only used when we have to go to the underlying 424 * allocator, so we cannot depend on flags such as M_ZERO. 425 */ 426 void * 427 objcache_get(struct objcache *oc, int ocflags) 428 { 429 struct percpu_objcache *cpucache = &oc->cache_percpu[mycpuid]; 430 struct magazine *loadedmag; 431 struct magazine *emptymag; 432 void *obj; 433 struct magazinedepot *depot; 434 435 KKASSERT((ocflags & M_ZERO) == 0); 436 crit_enter(); 437 ++cpucache->gets_cumulative; 438 439 retry: 440 /* 441 * Loaded magazine has an object. This is the hot path. 442 * It is lock-free and uses a critical section to block 443 * out interrupt handlers on the same processor. 444 */ 445 loadedmag = cpucache->loaded_magazine; 446 if (MAGAZINE_NOTEMPTY(loadedmag)) { 447 obj = loadedmag->objects[--loadedmag->rounds]; 448 crit_exit(); 449 return (obj); 450 } 451 452 /* Previous magazine has an object. */ 453 if (MAGAZINE_NOTEMPTY(cpucache->previous_magazine)) { 454 swap(cpucache->loaded_magazine, cpucache->previous_magazine); 455 loadedmag = cpucache->loaded_magazine; 456 obj = loadedmag->objects[--loadedmag->rounds]; 457 crit_exit(); 458 return (obj); 459 } 460 461 /* 462 * Both magazines empty. Get a full magazine from the depot and 463 * move one of the empty ones to the depot. 464 * 465 * Obtain the depot spinlock. 466 * 467 * NOTE: Beyond this point, M_* flags are handled via oc->alloc() 468 */ 469 depot = &oc->depot[myclusterid]; 470 spin_lock(&depot->spin); 471 472 /* 473 * Recheck the cpucache after obtaining the depot spinlock. This 474 * shouldn't be necessary now but don't take any chances. 475 */ 476 if (MAGAZINE_NOTEMPTY(cpucache->loaded_magazine) || 477 MAGAZINE_NOTEMPTY(cpucache->previous_magazine) 478 ) { 479 spin_unlock(&depot->spin); 480 goto retry; 481 } 482 483 /* Check if depot has a full magazine. */ 484 if (!SLIST_EMPTY(&depot->fullmagazines)) { 485 emptymag = cpucache->previous_magazine; 486 cpucache->previous_magazine = cpucache->loaded_magazine; 487 cpucache->loaded_magazine = SLIST_FIRST(&depot->fullmagazines); 488 SLIST_REMOVE_HEAD(&depot->fullmagazines, nextmagazine); 489 490 /* 491 * Return emptymag to the depot. 492 */ 493 KKASSERT(MAGAZINE_EMPTY(emptymag)); 494 SLIST_INSERT_HEAD(&depot->emptymagazines, 495 emptymag, nextmagazine); 496 spin_unlock(&depot->spin); 497 goto retry; 498 } 499 500 /* 501 * The depot does not have any non-empty magazines. If we have 502 * not hit our object limit we can allocate a new object using 503 * the back-end allocator. 504 * 505 * NOTE: unallocated_objects can wind up being negative due to 506 * objcache_set_cluster_limit() calls. 507 */ 508 if (__predict_true(depot->unallocated_objects > 0)) { 509 --depot->unallocated_objects; 510 spin_unlock(&depot->spin); 511 ++cpucache->allocs_cumulative; 512 crit_exit(); 513 514 obj = oc->alloc(oc->allocator_args, ocflags); 515 if (obj) { 516 if (oc->ctor(obj, oc->privdata, ocflags)) 517 return (obj); 518 oc->free(obj, oc->allocator_args); 519 obj = NULL; 520 } 521 if (obj == NULL) { 522 spin_lock(&depot->spin); 523 ++depot->unallocated_objects; 524 spin_unlock(&depot->spin); 525 if (depot->waiting) 526 wakeup(depot); 527 528 crit_enter(); 529 /* 530 * makes debugging easier when gets_cumulative does 531 * not include gets_null. 532 */ 533 ++cpucache->gets_null; 534 --cpucache->gets_cumulative; 535 crit_exit(); 536 } 537 return(obj); 538 } 539 if (__predict_false(cpucache->gets_exhausted++ == 0)) { 540 kprintf("Warning: objcache(%s) exhausted on cpu%d!\n", 541 oc->desc->name, mycpuid); 542 } 543 544 /* 545 * Otherwise block if allowed to. 546 */ 547 if ((ocflags & (M_WAITOK|M_NULLOK)) == M_WAITOK) { 548 ++cpucache->waiting; 549 ++depot->waiting; 550 ssleep(depot, &depot->spin, 0, "objcache_get", 0); 551 --cpucache->waiting; 552 --depot->waiting; 553 spin_unlock(&depot->spin); 554 goto retry; 555 } 556 557 /* 558 * Otherwise fail 559 */ 560 ++cpucache->gets_null; 561 --cpucache->gets_cumulative; 562 crit_exit(); 563 spin_unlock(&depot->spin); 564 return (NULL); 565 } 566 567 /* 568 * Wrapper for malloc allocation routines. 569 */ 570 void * 571 objcache_malloc_alloc(void *allocator_args, int ocflags) 572 { 573 struct objcache_malloc_args *alloc_args = allocator_args; 574 575 return (kmalloc(alloc_args->objsize, alloc_args->mtype, 576 ocflags & OC_MFLAGS)); 577 } 578 579 /* 580 * Wrapper for malloc allocation routines, with initial zeroing 581 * (but objects are not zerod on reuse from cache). 582 */ 583 void * 584 objcache_malloc_alloc_zero(void *allocator_args, int ocflags) 585 { 586 struct objcache_malloc_args *alloc_args = allocator_args; 587 588 return (kmalloc(alloc_args->objsize, alloc_args->mtype, 589 (ocflags & OC_MFLAGS) | M_ZERO)); 590 } 591 592 593 void 594 objcache_malloc_free(void *obj, void *allocator_args) 595 { 596 struct objcache_malloc_args *alloc_args = allocator_args; 597 598 kfree(obj, alloc_args->mtype); 599 } 600 601 /* 602 * Wrapper for allocation policies that pre-allocate at initialization time 603 * and don't do run-time allocation. 604 */ 605 void * 606 objcache_nop_alloc(void *allocator_args, int ocflags) 607 { 608 return (NULL); 609 } 610 611 void 612 objcache_nop_free(void *obj, void *allocator_args) 613 { 614 } 615 616 /* 617 * Return an object to the object cache. 618 */ 619 void 620 objcache_put(struct objcache *oc, void *obj) 621 { 622 struct percpu_objcache *cpucache = &oc->cache_percpu[mycpuid]; 623 struct magazine *loadedmag; 624 struct magazinedepot *depot; 625 626 crit_enter(); 627 ++cpucache->puts_cumulative; 628 629 if (CLUSTER_OF(obj) != myclusterid) { 630 #ifdef notyet 631 /* use lazy IPI to send object to owning cluster XXX todo */ 632 ++cpucache->puts_othercluster; 633 crit_exit(); 634 return; 635 #endif 636 } 637 638 retry: 639 /* 640 * Free slot available in loaded magazine. This is the hot path. 641 * It is lock-free and uses a critical section to block out interrupt 642 * handlers on the same processor. 643 */ 644 loadedmag = cpucache->loaded_magazine; 645 if (!MAGAZINE_FULL(loadedmag)) { 646 loadedmag->objects[loadedmag->rounds++] = obj; 647 if (cpucache->waiting) 648 wakeup_mycpu(&oc->depot[myclusterid]); 649 crit_exit(); 650 return; 651 } 652 653 /* 654 * Current magazine full, but previous magazine has room. XXX 655 */ 656 if (!MAGAZINE_FULL(cpucache->previous_magazine)) { 657 swap(cpucache->loaded_magazine, cpucache->previous_magazine); 658 loadedmag = cpucache->loaded_magazine; 659 loadedmag->objects[loadedmag->rounds++] = obj; 660 if (cpucache->waiting) 661 wakeup_mycpu(&oc->depot[myclusterid]); 662 crit_exit(); 663 return; 664 } 665 666 /* 667 * Both magazines full. Get an empty magazine from the depot and 668 * move a full loaded magazine to the depot. Even though the 669 * magazine may wind up with space available after we block on 670 * the spinlock, we still cycle it through to avoid the non-optimal 671 * corner-case. 672 * 673 * Obtain the depot spinlock. 674 */ 675 depot = &oc->depot[myclusterid]; 676 spin_lock(&depot->spin); 677 678 /* 679 * If an empty magazine is available in the depot, cycle it 680 * through and retry. 681 */ 682 if (!SLIST_EMPTY(&depot->emptymagazines)) { 683 loadedmag = cpucache->previous_magazine; 684 cpucache->previous_magazine = cpucache->loaded_magazine; 685 cpucache->loaded_magazine = SLIST_FIRST(&depot->emptymagazines); 686 SLIST_REMOVE_HEAD(&depot->emptymagazines, nextmagazine); 687 688 /* 689 * Return loadedmag to the depot. Due to blocking it may 690 * not be entirely full and could even be empty. 691 */ 692 if (MAGAZINE_EMPTY(loadedmag)) { 693 SLIST_INSERT_HEAD(&depot->emptymagazines, 694 loadedmag, nextmagazine); 695 spin_unlock(&depot->spin); 696 } else { 697 SLIST_INSERT_HEAD(&depot->fullmagazines, 698 loadedmag, nextmagazine); 699 spin_unlock(&depot->spin); 700 if (depot->waiting) 701 wakeup(depot); 702 } 703 goto retry; 704 } 705 706 /* 707 * An empty mag is not available. This is a corner case which can 708 * occur due to cpus holding partially full magazines. Do not try 709 * to allocate a mag, just free the object. 710 */ 711 ++depot->unallocated_objects; 712 spin_unlock(&depot->spin); 713 if (depot->waiting) 714 wakeup(depot); 715 crit_exit(); 716 oc->dtor(obj, oc->privdata); 717 oc->free(obj, oc->allocator_args); 718 } 719 720 /* 721 * The object is being put back into the cache, but the caller has 722 * indicated that the object is not in any shape to be reused and should 723 * be dtor'd immediately. 724 */ 725 void 726 objcache_dtor(struct objcache *oc, void *obj) 727 { 728 struct magazinedepot *depot; 729 730 depot = &oc->depot[myclusterid]; 731 spin_lock(&depot->spin); 732 ++depot->unallocated_objects; 733 spin_unlock(&depot->spin); 734 if (depot->waiting) 735 wakeup(depot); 736 oc->dtor(obj, oc->privdata); 737 oc->free(obj, oc->allocator_args); 738 } 739 740 /* 741 * Deallocate all objects in a magazine and free the magazine if requested. 742 * When freeit is TRUE the magazine must already be disassociated from the 743 * depot. 744 * 745 * Must be called with a critical section held when called with a per-cpu 746 * magazine. The magazine may be indirectly modified during the loop. 747 * 748 * If the magazine moves during a dtor the operation is aborted. This is 749 * only allowed when freeit is FALSE. 750 * 751 * The number of objects freed is returned. 752 */ 753 static int 754 mag_purge(struct objcache *oc, struct magazine **magp, int freeit) 755 { 756 struct magazine *mag = *magp; 757 int count; 758 void *obj; 759 760 count = 0; 761 while (mag->rounds) { 762 obj = mag->objects[--mag->rounds]; 763 oc->dtor(obj, oc->privdata); /* MAY BLOCK */ 764 oc->free(obj, oc->allocator_args); /* MAY BLOCK */ 765 ++count; 766 767 /* 768 * Cycle for interrupts. 769 */ 770 if ((count & 15) == 0) { 771 crit_exit(); 772 crit_enter(); 773 } 774 775 /* 776 * mag may have become invalid either due to dtor/free 777 * blocking or interrupt cycling, do not derefernce it 778 * until we check. 779 */ 780 if (*magp != mag) { 781 kprintf("mag_purge: mag ripped out\n"); 782 break; 783 } 784 } 785 if (freeit) { 786 KKASSERT(*magp == mag); 787 *magp = NULL; 788 kfree(mag, M_OBJMAG); 789 } 790 return(count); 791 } 792 793 /* 794 * Disassociate zero or more magazines from a magazine list associated with 795 * the depot, update the depot, and move the magazines to a temporary 796 * list. 797 * 798 * The caller must check the depot for waiters and wake it up, typically 799 * after disposing of the magazines this function loads onto the temporary 800 * list. 801 */ 802 static void 803 maglist_disassociate(struct magazinedepot *depot, struct magazinelist *maglist, 804 struct magazinelist *tmplist, boolean_t purgeall) 805 { 806 struct magazine *mag; 807 808 while ((mag = SLIST_FIRST(maglist)) != NULL) { 809 SLIST_REMOVE_HEAD(maglist, nextmagazine); 810 SLIST_INSERT_HEAD(tmplist, mag, nextmagazine); 811 depot->unallocated_objects += mag->rounds; 812 } 813 } 814 815 /* 816 * Deallocate all magazines and their contents from the passed temporary 817 * list. The magazines have already been accounted for by their depots. 818 * 819 * The total number of rounds freed is returned. This number is typically 820 * only used to determine whether a wakeup on the depot is needed or not. 821 */ 822 static int 823 maglist_purge(struct objcache *oc, struct magazinelist *maglist) 824 { 825 struct magazine *mag; 826 int count = 0; 827 828 /* 829 * can't use SLIST_FOREACH because blocking releases the depot 830 * spinlock 831 */ 832 crit_enter(); 833 while ((mag = SLIST_FIRST(maglist)) != NULL) { 834 SLIST_REMOVE_HEAD(maglist, nextmagazine); 835 count += mag_purge(oc, &mag, TRUE); 836 } 837 crit_exit(); 838 return(count); 839 } 840 841 /* 842 * De-allocates all magazines on the full and empty magazine lists. 843 * 844 * Because this routine is called with a spinlock held, the magazines 845 * can only be disassociated and moved to a temporary list, not freed. 846 * 847 * The caller is responsible for freeing the magazines. 848 */ 849 static void 850 depot_disassociate(struct magazinedepot *depot, struct magazinelist *tmplist) 851 { 852 maglist_disassociate(depot, &depot->fullmagazines, tmplist, TRUE); 853 maglist_disassociate(depot, &depot->emptymagazines, tmplist, TRUE); 854 } 855 856 /* 857 * Try to free up some memory. Return as soon as some free memory is found. 858 * For each object cache on the reclaim list, first try the current per-cpu 859 * cache, then the full magazine depot. 860 */ 861 boolean_t 862 objcache_reclaimlist(struct objcache *oclist[], int nlist, int ocflags) 863 { 864 struct objcache *oc; 865 struct percpu_objcache *cpucache; 866 struct magazinedepot *depot; 867 struct magazinelist tmplist; 868 int i, count; 869 870 SLIST_INIT(&tmplist); 871 872 for (i = 0; i < nlist; i++) { 873 oc = oclist[i]; 874 cpucache = &oc->cache_percpu[mycpuid]; 875 depot = &oc->depot[myclusterid]; 876 877 crit_enter(); 878 count = mag_purge(oc, &cpucache->loaded_magazine, FALSE); 879 if (count == 0) 880 count += mag_purge(oc, &cpucache->previous_magazine, FALSE); 881 crit_exit(); 882 if (count > 0) { 883 spin_lock(&depot->spin); 884 depot->unallocated_objects += count; 885 spin_unlock(&depot->spin); 886 if (depot->waiting) 887 wakeup(depot); 888 return (TRUE); 889 } 890 spin_lock(&depot->spin); 891 maglist_disassociate(depot, &depot->fullmagazines, 892 &tmplist, FALSE); 893 spin_unlock(&depot->spin); 894 count = maglist_purge(oc, &tmplist); 895 if (count > 0) { 896 if (depot->waiting) 897 wakeup(depot); 898 return (TRUE); 899 } 900 } 901 return (FALSE); 902 } 903 904 /* 905 * Destroy an object cache. Must have no existing references. 906 */ 907 void 908 objcache_destroy(struct objcache *oc) 909 { 910 struct objcache_desc *desc = oc->desc; 911 struct percpu_objcache *cache_percpu; 912 struct magazinedepot *depot; 913 int clusterid, cpuid; 914 struct magazinelist tmplist; 915 916 spin_lock(&objcachelist_spin); 917 LIST_REMOVE(desc, next); 918 spin_unlock(&objcachelist_spin); 919 920 SLIST_INIT(&tmplist); 921 for (clusterid = 0; clusterid < MAXCLUSTERS; clusterid++) { 922 depot = &oc->depot[clusterid]; 923 spin_lock(&depot->spin); 924 depot_disassociate(depot, &tmplist); 925 spin_unlock(&depot->spin); 926 } 927 maglist_purge(oc, &tmplist); 928 929 for (cpuid = 0; cpuid < ncpus; cpuid++) { 930 cache_percpu = &oc->cache_percpu[cpuid]; 931 932 crit_enter(); 933 mag_purge(oc, &cache_percpu->loaded_magazine, TRUE); 934 mag_purge(oc, &cache_percpu->previous_magazine, TRUE); 935 crit_exit(); 936 cache_percpu->loaded_magazine = NULL; 937 cache_percpu->previous_magazine = NULL; 938 /* don't bother adjusting depot->unallocated_objects */ 939 } 940 941 kfree(desc, M_OBJCACHE); 942 kfree(oc, M_OBJCACHE); 943 } 944 945 static int 946 sysctl_ocstats(SYSCTL_HANDLER_ARGS) 947 { 948 struct objcache_stats stat; 949 struct objcache_desc marker, *desc; 950 int error; 951 952 memset(&marker, 0, sizeof(marker)); 953 954 spin_lock(&objcachelist_spin); 955 956 LIST_INSERT_HEAD(&allobjcaches, &marker, next); 957 while ((desc = LIST_NEXT(&marker, next)) != NULL) { 958 u_long puts, unalloc; 959 int cpu; 960 961 LIST_REMOVE(&marker, next); 962 LIST_INSERT_AFTER(desc, &marker, next); 963 964 if (desc->total_objects == 0) { 965 /* Marker inserted by another thread. */ 966 continue; 967 } 968 969 memset(&stat, 0, sizeof(stat)); 970 strlcpy(stat.oc_name, desc->name, sizeof(stat.oc_name)); 971 stat.oc_limit = desc->total_objects; 972 /* XXX domain aware */ 973 unalloc = desc->objcache->depot[0].unallocated_objects; 974 975 puts = 0; 976 for (cpu = 0; cpu < ncpus; ++cpu) { 977 const struct percpu_objcache *cache; 978 979 cache = &desc->objcache->cache_percpu[cpu]; 980 puts += cache->puts_cumulative; 981 982 stat.oc_requested += cache->gets_cumulative; 983 stat.oc_exhausted += cache->gets_exhausted; 984 stat.oc_failed += cache->gets_null; 985 stat.oc_allocated += cache->allocs_cumulative; 986 } 987 spin_unlock(&objcachelist_spin); 988 989 /* 990 * Apply fixup. 991 */ 992 if (stat.oc_requested > puts) 993 stat.oc_used = stat.oc_requested - puts; 994 if (stat.oc_limit > unalloc + stat.oc_used) { 995 stat.oc_cached = stat.oc_limit - 996 (unalloc + stat.oc_used); 997 } 998 stat.oc_requested += stat.oc_failed; 999 1000 /* Send out. */ 1001 error = SYSCTL_OUT(req, &stat, sizeof(stat)); 1002 1003 /* Hold the lock before we return. */ 1004 spin_lock(&objcachelist_spin); 1005 1006 if (error) 1007 break; 1008 } 1009 LIST_REMOVE(&marker, next); 1010 1011 spin_unlock(&objcachelist_spin); 1012 1013 return error; 1014 } 1015 SYSCTL_PROC(_kern_objcache, OID_AUTO, stats, (CTLTYPE_OPAQUE | CTLFLAG_RD), 1016 0, 0, sysctl_ocstats, "S,objcache_stats", "objcache statistics"); 1017 1018 static void 1019 objcache_init(void) 1020 { 1021 spin_init(&objcachelist_spin, "objcachelist"); 1022 1023 magazine_capmin = mag_capacity_align(MAGAZINE_CAPACITY_MIN); 1024 magazine_capmax = mag_capacity_align(MAGAZINE_CAPACITY_MAX); 1025 if (bootverbose) { 1026 kprintf("objcache: magazine cap [%d, %d]\n", 1027 magazine_capmin, magazine_capmax); 1028 } 1029 #if 0 1030 callout_init_mp(&objcache_callout); 1031 objcache_rebalance_period = 60 * hz; 1032 callout_reset(&objcache_callout, objcache_rebalance_period, 1033 objcache_timer, NULL); 1034 #endif 1035 } 1036 SYSINIT(objcache, SI_BOOT2_OBJCACHE, SI_ORDER_FIRST, objcache_init, 0); 1037