1 /* 2 * Copyright (c) 2004,2014,2019-2020 The DragonFly Project. 3 * All rights reserved. 4 * 5 * This code is derived from software contributed to The DragonFly Project 6 * by Matthew Dillon <dillon@backplane.com> 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in 16 * the documentation and/or other materials provided with the 17 * distribution. 18 * 3. Neither the name of The DragonFly Project nor the names of its 19 * contributors may be used to endorse or promote products derived 20 * from this software without specific, prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 23 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 24 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 25 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 26 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 27 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 28 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 29 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 30 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 31 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 32 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 */ 35 /* 36 * Copyright (c) 1982, 1986, 1991, 1993 37 * The Regents of the University of California. All rights reserved. 38 * (c) UNIX System Laboratories, Inc. 39 * All or some portions of this file are derived from material licensed 40 * to the University of California by American Telephone and Telegraph 41 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 42 * the permission of UNIX System Laboratories, Inc. 43 * 44 * Redistribution and use in source and binary forms, with or without 45 * modification, are permitted provided that the following conditions 46 * are met: 47 * 1. Redistributions of source code must retain the above copyright 48 * notice, this list of conditions and the following disclaimer. 49 * 2. Redistributions in binary form must reproduce the above copyright 50 * notice, this list of conditions and the following disclaimer in the 51 * documentation and/or other materials provided with the distribution. 52 * 3. Neither the name of the University nor the names of its contributors 53 * may be used to endorse or promote products derived from this software 54 * without specific prior written permission. 55 * 56 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 57 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 58 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 59 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 60 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 61 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 62 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 63 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 64 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 65 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 66 * SUCH DAMAGE. 67 */ 68 /* 69 * The original callout mechanism was based on the work of Adam M. Costello 70 * and George Varghese, published in a technical report entitled "Redesigning 71 * the BSD Callout and Timer Facilities" and modified slightly for inclusion 72 * in FreeBSD by Justin T. Gibbs. The original work on the data structures 73 * used in this implementation was published by G. Varghese and T. Lauck in 74 * the paper "Hashed and Hierarchical Timing Wheels: Data Structures for 75 * the Efficient Implementation of a Timer Facility" in the Proceedings of 76 * the 11th ACM Annual Symposium on Operating Systems Principles, 77 * Austin, Texas Nov 1987. 78 */ 79 80 #include <sys/param.h> 81 #include <sys/systm.h> 82 #include <sys/spinlock.h> 83 #include <sys/callout.h> 84 #include <sys/kernel.h> 85 #include <sys/malloc.h> 86 #include <sys/interrupt.h> 87 #include <sys/thread.h> 88 #include <sys/sysctl.h> 89 #include <sys/exislock.h> 90 #include <vm/vm_extern.h> 91 #include <machine/atomic.h> 92 93 #include <sys/spinlock2.h> 94 #include <sys/thread2.h> 95 #include <sys/mplock2.h> 96 #include <sys/exislock2.h> 97 98 TAILQ_HEAD(colist, _callout); 99 struct softclock_pcpu; 100 101 /* 102 * DID_INIT - Sanity check 103 * PREVENTED - A callback was prevented 104 * RESET - Callout_reset requested 105 * STOP - Callout_stop requested 106 * INPROG - Softclock_handler thread processing in-progress on callout, 107 * queue linkage is indeterminant. Third parties must queue 108 * a STOP or CANCEL and await completion. 109 * SET - Callout is linked to queue (if INPROG not set) 110 * AUTOLOCK - Lockmgr cancelable interlock (copied from frontend) 111 * MPSAFE - Callout is MPSAFE (copied from frontend) 112 * CANCEL - callout_cancel requested 113 * ACTIVE - active/inactive (frontend only, see documentation). 114 * This is *NOT* the same as whether a callout is queued or 115 * not. 116 */ 117 #define CALLOUT_DID_INIT 0x00000001 /* frontend */ 118 #define CALLOUT_PREVENTED 0x00000002 /* backend */ 119 #define CALLOUT_FREELIST 0x00000004 /* backend */ 120 #define CALLOUT_UNUSED0008 0x00000008 121 #define CALLOUT_UNUSED0010 0x00000010 122 #define CALLOUT_RESET 0x00000020 /* backend */ 123 #define CALLOUT_STOP 0x00000040 /* backend */ 124 #define CALLOUT_INPROG 0x00000080 /* backend */ 125 #define CALLOUT_SET 0x00000100 /* backend */ 126 #define CALLOUT_AUTOLOCK 0x00000200 /* both */ 127 #define CALLOUT_MPSAFE 0x00000400 /* both */ 128 #define CALLOUT_CANCEL 0x00000800 /* backend */ 129 #define CALLOUT_ACTIVE 0x00001000 /* frontend */ 130 131 struct wheel { 132 struct spinlock spin; 133 struct colist list; 134 }; 135 136 struct softclock_pcpu { 137 struct wheel *callwheel; 138 struct _callout *running; 139 struct _callout * volatile next; 140 struct colist freelist; 141 int softticks; /* softticks index */ 142 int curticks; /* per-cpu ticks counter */ 143 int isrunning; 144 struct thread thread; 145 }; 146 147 typedef struct softclock_pcpu *softclock_pcpu_t; 148 149 static int callout_debug = 0; 150 SYSCTL_INT(_debug, OID_AUTO, callout_debug, CTLFLAG_RW, 151 &callout_debug, 0, ""); 152 153 static MALLOC_DEFINE(M_CALLOUT, "callouts", "softclock callouts"); 154 155 static int cwheelsize; 156 static int cwheelmask; 157 static softclock_pcpu_t softclock_pcpu_ary[MAXCPU]; 158 159 static void softclock_handler(void *arg); 160 static void slotimer_callback(void *arg); 161 162 /* 163 * Handle pending requests. No action can be taken if the callout is still 164 * flagged INPROG. Called from softclock for post-processing and from 165 * various API functions. 166 * 167 * This routine does not block in any way. 168 * Caller must hold c->spin. 169 * 170 * NOTE: Flags can be adjusted without holding c->spin, so atomic ops 171 * must be used at all times. 172 * 173 * NOTE: The related (sc) might refer to another cpu. 174 * 175 * NOTE: The cc-vs-c frontend-vs-backend might be disconnected during the 176 * operation, but the EXIS lock prevents (c) from being destroyed. 177 */ 178 static __inline 179 void 180 _callout_update_spinlocked(struct _callout *c) 181 { 182 struct wheel *wheel; 183 184 if ((c->flags & CALLOUT_INPROG) && curthread != &c->qsc->thread) { 185 /* 186 * If the callout is in-progress the SET queuing state is 187 * indeterminant and no action can be taken at this time. 188 * 189 * (however, recursive calls from the call-back are not 190 * indeterminant and must be processed at this time). 191 */ 192 /* nop */ 193 } else if (c->flags & CALLOUT_SET) { 194 /* 195 * If the callout is SET it is queued on a callwheel, process 196 * various requests relative to it being in this queued state. 197 * 198 * c->q* fields are stable while we hold c->spin and 199 * wheel->spin. 200 */ 201 softclock_pcpu_t sc; 202 203 sc = c->qsc; 204 wheel = &sc->callwheel[c->qtick & cwheelmask]; 205 spin_lock(&wheel->spin); 206 207 if ((c->flags & CALLOUT_INPROG) && 208 curthread != &c->qsc->thread) { 209 /* 210 * Raced against INPROG getting set by the softclock 211 * handler while we were acquiring wheel->spin. We 212 * can do nothing at this time. 213 * 214 * (however, recursive calls from the call-back are not 215 * indeterminant and must be processed at this time). 216 */ 217 /* nop */ 218 } else if (c->flags & CALLOUT_CANCEL) { 219 /* 220 * CANCEL requests override everything else. 221 */ 222 if (sc->next == c) 223 sc->next = TAILQ_NEXT(c, entry); 224 TAILQ_REMOVE(&wheel->list, c, entry); 225 atomic_clear_int(&c->flags, CALLOUT_SET | 226 CALLOUT_STOP | 227 CALLOUT_CANCEL | 228 CALLOUT_RESET); 229 atomic_set_int(&c->flags, CALLOUT_PREVENTED); 230 if (c->waiters) 231 wakeup(c); 232 } else if (c->flags & CALLOUT_RESET) { 233 /* 234 * RESET requests reload the callout, potentially 235 * to a different cpu. Once removed from the wheel, 236 * the retention of c->spin prevents further races. 237 * 238 * Leave SET intact. 239 */ 240 if (sc->next == c) 241 sc->next = TAILQ_NEXT(c, entry); 242 TAILQ_REMOVE(&wheel->list, c, entry); 243 spin_unlock(&wheel->spin); 244 245 atomic_clear_int(&c->flags, CALLOUT_RESET); 246 sc = c->rsc; 247 c->qsc = sc; 248 c->qarg = c->rarg; 249 c->qfunc = c->rfunc; 250 c->qtick = c->rtick; 251 252 /* 253 * Do not queue to a current or past wheel slot or 254 * the callout will be lost for ages. Handle 255 * potential races against soft ticks. 256 */ 257 wheel = &sc->callwheel[c->qtick & cwheelmask]; 258 spin_lock(&wheel->spin); 259 while (c->qtick - sc->softticks <= 0) { 260 c->qtick = sc->softticks + 1; 261 spin_unlock(&wheel->spin); 262 wheel = &sc->callwheel[c->qtick & cwheelmask]; 263 spin_lock(&wheel->spin); 264 } 265 TAILQ_INSERT_TAIL(&wheel->list, c, entry); 266 } else if (c->flags & CALLOUT_STOP) { 267 /* 268 * STOP request simply unloads the callout. 269 */ 270 if (sc->next == c) 271 sc->next = TAILQ_NEXT(c, entry); 272 TAILQ_REMOVE(&wheel->list, c, entry); 273 atomic_clear_int(&c->flags, CALLOUT_STOP | 274 CALLOUT_SET); 275 276 atomic_set_int(&c->flags, CALLOUT_PREVENTED); 277 if (c->waiters) 278 wakeup(c); 279 } else { 280 /* 281 * Do nothing if no request is pending. 282 */ 283 /* nop */ 284 } 285 spin_unlock(&wheel->spin); 286 } else { 287 /* 288 * If the callout is not SET it is not queued to any callwheel, 289 * process various requests relative to it not being queued. 290 * 291 * c->q* fields are stable while we hold c->spin. 292 */ 293 if (c->flags & CALLOUT_CANCEL) { 294 /* 295 * CANCEL requests override everything else. 296 * 297 * There is no state being canceled in this case, 298 * so do not set the PREVENTED flag. 299 */ 300 atomic_clear_int(&c->flags, CALLOUT_STOP | 301 CALLOUT_CANCEL | 302 CALLOUT_RESET); 303 if (c->waiters) 304 wakeup(c); 305 } else if (c->flags & CALLOUT_RESET) { 306 /* 307 * RESET requests get queued. Do not queue to the 308 * currently-processing tick. 309 */ 310 softclock_pcpu_t sc; 311 312 sc = c->rsc; 313 c->qsc = sc; 314 c->qarg = c->rarg; 315 c->qfunc = c->rfunc; 316 c->qtick = c->rtick; 317 318 /* 319 * Do not queue to current or past wheel or the 320 * callout will be lost for ages. 321 */ 322 wheel = &sc->callwheel[c->qtick & cwheelmask]; 323 spin_lock(&wheel->spin); 324 while (c->qtick - sc->softticks <= 0) { 325 c->qtick = sc->softticks + 1; 326 spin_unlock(&wheel->spin); 327 wheel = &sc->callwheel[c->qtick & cwheelmask]; 328 spin_lock(&wheel->spin); 329 } 330 TAILQ_INSERT_TAIL(&wheel->list, c, entry); 331 atomic_clear_int(&c->flags, CALLOUT_RESET); 332 atomic_set_int(&c->flags, CALLOUT_SET); 333 spin_unlock(&wheel->spin); 334 } else if (c->flags & CALLOUT_STOP) { 335 /* 336 * STOP requests. 337 * 338 * There is no state being stopped in this case, 339 * so do not set the PREVENTED flag. 340 */ 341 atomic_clear_int(&c->flags, CALLOUT_STOP); 342 if (c->waiters) 343 wakeup(c); 344 } else { 345 /* 346 * No request pending (someone else processed the 347 * request before we could) 348 */ 349 /* nop */ 350 } 351 } 352 } 353 354 static __inline 355 void 356 _callout_free(struct _callout *c) 357 { 358 softclock_pcpu_t sc; 359 360 sc = softclock_pcpu_ary[mycpu->gd_cpuid]; 361 362 crit_enter(); 363 exis_terminate(&c->exis); 364 atomic_set_int(&c->flags, CALLOUT_FREELIST); 365 atomic_clear_int(&c->flags, CALLOUT_DID_INIT); 366 TAILQ_INSERT_TAIL(&sc->freelist, c, entry); 367 crit_exit(); 368 } 369 370 /* 371 * System init 372 */ 373 static void 374 swi_softclock_setup(void *arg) 375 { 376 int cpu; 377 int i; 378 int target; 379 380 /* 381 * Figure out how large a callwheel we need. It must be a power of 2. 382 * 383 * ncallout is primarily based on available memory, don't explode 384 * the allocations if the system has a lot of cpus. 385 */ 386 target = ncallout / ncpus + 16; 387 388 cwheelsize = 1; 389 while (cwheelsize < target) 390 cwheelsize <<= 1; 391 cwheelmask = cwheelsize - 1; 392 393 /* 394 * Initialize per-cpu data structures. 395 */ 396 for (cpu = 0; cpu < ncpus; ++cpu) { 397 softclock_pcpu_t sc; 398 int wheel_sz; 399 400 sc = (void *)kmem_alloc3(&kernel_map, sizeof(*sc), 401 VM_SUBSYS_GD, KM_CPU(cpu)); 402 memset(sc, 0, sizeof(*sc)); 403 TAILQ_INIT(&sc->freelist); 404 softclock_pcpu_ary[cpu] = sc; 405 406 wheel_sz = sizeof(*sc->callwheel) * cwheelsize; 407 sc->callwheel = (void *)kmem_alloc3(&kernel_map, wheel_sz, 408 VM_SUBSYS_GD, KM_CPU(cpu)); 409 memset(sc->callwheel, 0, wheel_sz); 410 for (i = 0; i < cwheelsize; ++i) { 411 spin_init(&sc->callwheel[i].spin, "wheel"); 412 TAILQ_INIT(&sc->callwheel[i].list); 413 } 414 415 /* 416 * Mark the softclock handler as being an interrupt thread 417 * even though it really isn't, but do not allow it to 418 * preempt other threads (do not assign td_preemptable). 419 * 420 * Kernel code now assumes that callouts do not preempt 421 * the cpu they were scheduled on. 422 */ 423 lwkt_create(softclock_handler, sc, NULL, &sc->thread, 424 TDF_NOSTART | TDF_INTTHREAD, 425 cpu, "softclock %d", cpu); 426 } 427 } 428 429 /* 430 * Must occur after ncpus has been initialized. 431 */ 432 SYSINIT(softclock_setup, SI_BOOT2_SOFTCLOCK, SI_ORDER_SECOND, 433 swi_softclock_setup, NULL); 434 435 /* 436 * This routine is called from the hardclock() (basically a FASTint/IPI) on 437 * each cpu in the system. sc->curticks is this cpu's notion of the timebase. 438 * It IS NOT NECESSARILY SYNCHRONIZED WITH 'ticks'! sc->softticks is where 439 * the callwheel is currently indexed. 440 * 441 * sc->softticks is adjusted by either this routine or our helper thread 442 * depending on whether the helper thread is running or not. 443 * 444 * sc->curticks and sc->softticks are adjusted using atomic ops in order 445 * to ensure that remote cpu callout installation does not race the thread. 446 */ 447 void 448 hardclock_softtick(globaldata_t gd) 449 { 450 softclock_pcpu_t sc; 451 struct wheel *wheel; 452 453 sc = softclock_pcpu_ary[gd->gd_cpuid]; 454 atomic_add_int(&sc->curticks, 1); 455 if (sc->isrunning) 456 return; 457 if (sc->softticks == sc->curticks) { 458 /* 459 * In sync, only wakeup the thread if there is something to 460 * do. 461 */ 462 wheel = &sc->callwheel[sc->softticks & cwheelmask]; 463 spin_lock(&wheel->spin); 464 if (TAILQ_FIRST(&wheel->list)) { 465 sc->isrunning = 1; 466 spin_unlock(&wheel->spin); 467 lwkt_schedule(&sc->thread); 468 } else { 469 atomic_add_int(&sc->softticks, 1); 470 spin_unlock(&wheel->spin); 471 } 472 } else { 473 /* 474 * out of sync, wakeup the thread unconditionally so it can 475 * catch up. 476 */ 477 sc->isrunning = 1; 478 lwkt_schedule(&sc->thread); 479 } 480 } 481 482 /* 483 * This procedure is the main loop of our per-cpu helper thread. The 484 * sc->isrunning flag prevents us from racing hardclock_softtick(). 485 * 486 * The thread starts with the MP lock released and not in a critical 487 * section. The loop itself is MP safe while individual callbacks 488 * may or may not be, so we obtain or release the MP lock as appropriate. 489 */ 490 static void 491 softclock_handler(void *arg) 492 { 493 softclock_pcpu_t sc; 494 struct _callout *c; 495 struct wheel *wheel; 496 struct callout slotimer1; 497 struct _callout slotimer2; 498 int mpsafe = 1; 499 500 /* 501 * Setup pcpu slow clocks which we want to run from the callout 502 * thread. This thread starts very early and cannot kmalloc(), 503 * so use internal functions to supply the _callout. 504 */ 505 _callout_setup_quick(&slotimer1, &slotimer2, hz * 10, 506 slotimer_callback, &slotimer1); 507 508 /* 509 * Run the callout thread at the same priority as other kernel 510 * threads so it can be round-robined. 511 */ 512 /*lwkt_setpri_self(TDPRI_SOFT_NORM);*/ 513 514 sc = arg; 515 loop: 516 while (sc->softticks != (int)(sc->curticks + 1)) { 517 wheel = &sc->callwheel[sc->softticks & cwheelmask]; 518 519 spin_lock(&wheel->spin); 520 sc->next = TAILQ_FIRST(&wheel->list); 521 while ((c = sc->next) != NULL) { 522 int error; 523 524 /* 525 * Match callouts for this tick. 526 */ 527 sc->next = TAILQ_NEXT(c, entry); 528 if (c->qtick != sc->softticks) 529 continue; 530 531 /* 532 * Double check the validity of the callout, detect 533 * if the originator's structure has been ripped out. 534 */ 535 if (c->verifier->toc != c) { 536 spin_unlock(&wheel->spin); 537 panic("_callout %p verifier %p failed", 538 c, c->verifier); 539 } 540 541 /* 542 * The wheel spinlock is sufficient to set INPROG and 543 * remove (c) from the list. Once INPROG is set, 544 * other threads can only make limited changes to (c). 545 * 546 * Setting INPROG masks SET tests in all other 547 * conditionals except the 'quick' code (which is 548 * always same-cpu and doesn't race). This means 549 * that we can clear SET here without obtaining 550 * c->spin. 551 */ 552 TAILQ_REMOVE(&wheel->list, c, entry); 553 atomic_set_int(&c->flags, CALLOUT_INPROG); 554 atomic_clear_int(&c->flags, CALLOUT_SET); 555 sc->running = c; 556 spin_unlock(&wheel->spin); 557 558 /* 559 * Legacy mplock support 560 */ 561 if (c->flags & CALLOUT_MPSAFE) { 562 if (mpsafe == 0) { 563 mpsafe = 1; 564 rel_mplock(); 565 } 566 } else { 567 if (mpsafe) { 568 mpsafe = 0; 569 get_mplock(); 570 } 571 } 572 573 /* 574 * Execute the 'q' function (protected by INPROG) 575 */ 576 if (c->flags & (CALLOUT_STOP | CALLOUT_CANCEL)) { 577 /* 578 * Raced a stop or cancel request, do 579 * not execute. The processing code 580 * thinks its a normal completion so 581 * flag the fact that cancel/stop actually 582 * prevented a callout here. 583 */ 584 if (c->flags & 585 (CALLOUT_CANCEL | CALLOUT_STOP)) { 586 atomic_set_int(&c->verifier->flags, 587 CALLOUT_PREVENTED); 588 } 589 } else if (c->flags & CALLOUT_RESET) { 590 /* 591 * A RESET raced, make it seem like it 592 * didn't. Do nothing here and let the 593 * update procedure requeue us. 594 */ 595 } else if (c->flags & CALLOUT_AUTOLOCK) { 596 /* 597 * Interlocked cancelable call. If the 598 * lock gets canceled we have to flag the 599 * fact that the cancel/stop actually 600 * prevented the callout here. 601 */ 602 error = lockmgr(c->lk, LK_EXCLUSIVE | 603 LK_CANCELABLE); 604 if (error == 0) { 605 c->qfunc(c->qarg); 606 lockmgr(c->lk, LK_RELEASE); 607 } else if (c->flags & 608 (CALLOUT_CANCEL | CALLOUT_STOP)) { 609 atomic_set_int(&c->verifier->flags, 610 CALLOUT_PREVENTED); 611 } 612 } else { 613 /* 614 * Normal call 615 */ 616 c->qfunc(c->qarg); 617 } 618 619 /* 620 * INPROG will prevent SET from being set again. 621 * Once we clear INPROG, update the callout to 622 * handle any pending operations that have built-up. 623 */ 624 625 /* 626 * Interlocked clearing of INPROG, then handle any 627 * queued request (such as a callout_reset() request). 628 */ 629 spin_lock(&c->spin); 630 atomic_clear_int(&c->flags, CALLOUT_INPROG); 631 sc->running = NULL; 632 _callout_update_spinlocked(c); 633 spin_unlock(&c->spin); 634 635 spin_lock(&wheel->spin); 636 } 637 spin_unlock(&wheel->spin); 638 atomic_add_int(&sc->softticks, 1); 639 640 /* 641 * Clean up any _callout structures which are now allowed 642 * to be freed. 643 */ 644 crit_enter(); 645 while ((c = TAILQ_FIRST(&sc->freelist)) != NULL) { 646 if (!exis_freeable(&c->exis)) 647 break; 648 TAILQ_REMOVE(&sc->freelist, c, entry); 649 c->flags = 0; 650 kfree(c, M_CALLOUT); 651 if (callout_debug) 652 kprintf("KFREEB %p\n", c); 653 } 654 crit_exit(); 655 } 656 657 /* 658 * Don't leave us holding the MP lock when we deschedule ourselves. 659 */ 660 if (mpsafe == 0) { 661 mpsafe = 1; 662 rel_mplock(); 663 } 664 665 /* 666 * Recheck in critical section to interlock against hardlock 667 */ 668 crit_enter(); 669 if (sc->softticks == (int)(sc->curticks + 1)) { 670 sc->isrunning = 0; 671 lwkt_deschedule_self(&sc->thread); /* == curthread */ 672 lwkt_switch(); 673 } 674 crit_exit(); 675 goto loop; 676 /* NOT REACHED */ 677 } 678 679 /* 680 * A very slow system cleanup timer (10 second interval), 681 * per-cpu. 682 */ 683 void 684 slotimer_callback(void *arg) 685 { 686 struct callout *c = arg; 687 688 slab_cleanup(); 689 callout_reset(c, hz * 10, slotimer_callback, c); 690 } 691 692 /* 693 * API FUNCTIONS 694 */ 695 696 static __inline 697 struct _callout * 698 _callout_gettoc(struct callout *cc) 699 { 700 globaldata_t gd = mycpu; 701 struct _callout *c; 702 softclock_pcpu_t sc; 703 704 KKASSERT(cc->flags & CALLOUT_DID_INIT); 705 exis_hold_gd(gd); 706 for (;;) { 707 c = cc->toc; 708 cpu_ccfence(); 709 if (c) { 710 KKASSERT(c->verifier == cc); 711 spin_lock(&c->spin); 712 break; 713 } 714 sc = softclock_pcpu_ary[gd->gd_cpuid]; 715 c = kmalloc(sizeof(*c), M_CALLOUT, M_INTWAIT | M_ZERO); 716 if (callout_debug) 717 kprintf("ALLOC %p\n", c); 718 c->flags = cc->flags; 719 c->lk = cc->lk; 720 c->verifier = cc; 721 exis_init(&c->exis); 722 spin_init(&c->spin, "calou"); 723 spin_lock(&c->spin); 724 if (atomic_cmpset_ptr(&cc->toc, NULL, c)) 725 break; 726 spin_unlock(&c->spin); 727 c->verifier = NULL; 728 kfree(c, M_CALLOUT); 729 if (callout_debug) 730 kprintf("KFREEA %p\n", c); 731 } 732 exis_drop_gd(gd); 733 734 /* 735 * Return internal __callout with spin-lock held 736 */ 737 return c; 738 } 739 740 /* 741 * Macrod in sys/callout.h for debugging 742 * 743 * WARNING! tsleep() assumes this will not block 744 */ 745 void 746 _callout_init(struct callout *cc CALLOUT_DEBUG_ARGS) 747 { 748 bzero(cc, sizeof(*cc)); 749 cc->flags = CALLOUT_DID_INIT; 750 } 751 752 void 753 _callout_init_mp(struct callout *cc CALLOUT_DEBUG_ARGS) 754 { 755 bzero(cc, sizeof(*cc)); 756 cc->flags = CALLOUT_DID_INIT | CALLOUT_MPSAFE; 757 } 758 759 void 760 _callout_init_lk(struct callout *cc, struct lock *lk CALLOUT_DEBUG_ARGS) 761 { 762 bzero(cc, sizeof(*cc)); 763 cc->flags = CALLOUT_DID_INIT | CALLOUT_MPSAFE | CALLOUT_AUTOLOCK; 764 cc->lk = lk; 765 } 766 767 /* 768 * Start or restart a timeout. New timeouts can be installed while the 769 * current one is running. 770 * 771 * Start or restart a timeout. Installs the callout structure on the 772 * callwheel of the current cpu. Callers may legally pass any value, even 773 * if 0 or negative, but since the sc->curticks index may have already 774 * been processed a minimum timeout of 1 tick will be enforced. 775 * 776 * This function will not deadlock against a running call. 777 * 778 * WARNING! tsleep() assumes this will not block 779 */ 780 void 781 callout_reset(struct callout *cc, int to_ticks, void (*ftn)(void *), void *arg) 782 { 783 softclock_pcpu_t sc; 784 struct _callout *c; 785 786 /* 787 * We need to acquire/associate a _callout. 788 * gettoc spin-locks (c). 789 */ 790 KKASSERT(cc->flags & CALLOUT_DID_INIT); 791 atomic_set_int(&cc->flags, CALLOUT_ACTIVE); 792 c = _callout_gettoc(cc); 793 794 /* 795 * Request a RESET. This automatically overrides a STOP in 796 * _callout_update_spinlocked(). 797 */ 798 atomic_set_int(&c->flags, CALLOUT_RESET); 799 sc = softclock_pcpu_ary[mycpu->gd_cpuid]; 800 c->rsc = sc; 801 c->rtick = sc->curticks + to_ticks; 802 c->rfunc = ftn; 803 c->rarg = arg; 804 _callout_update_spinlocked(c); 805 spin_unlock(&c->spin); 806 } 807 808 /* 809 * Same as callout_reset() but the timeout will run on a particular cpu. 810 */ 811 void 812 callout_reset_bycpu(struct callout *cc, int to_ticks, void (*ftn)(void *), 813 void *arg, int cpuid) 814 { 815 softclock_pcpu_t sc; 816 struct _callout *c; 817 818 /* 819 * We need to acquire/associate a _callout. 820 * gettoc spin-locks (c). 821 */ 822 KKASSERT(cc->flags & CALLOUT_DID_INIT); 823 atomic_set_int(&cc->flags, CALLOUT_ACTIVE); 824 c = _callout_gettoc(cc); 825 826 /* 827 * Set RESET. Do not clear STOP here (let the process code do it). 828 */ 829 atomic_set_int(&c->flags, CALLOUT_RESET); 830 831 sc = softclock_pcpu_ary[cpuid]; 832 c->rsc = sc; 833 c->rtick = sc->curticks + to_ticks; 834 c->rfunc = ftn; 835 c->rarg = arg; 836 _callout_update_spinlocked(c); 837 spin_unlock(&c->spin); 838 } 839 840 /* 841 * Issue synchronous or asynchronous cancel or stop 842 */ 843 static __inline 844 int 845 _callout_cancel_or_stop(struct callout *cc, uint32_t flags, int sync) 846 { 847 globaldata_t gd = mycpu; 848 struct _callout *c; 849 int res; 850 851 /* 852 * Callout is inactive after cancel or stop. Degenerate case if 853 * no _callout is currently associated. 854 */ 855 atomic_clear_int(&cc->flags, CALLOUT_ACTIVE); 856 if (cc->toc == NULL) 857 return 0; 858 859 /* 860 * Ensure that the related (c) is not destroyed. Set the CANCEL 861 * or STOP request flag, clear the PREVENTED status flag, and update. 862 */ 863 exis_hold_gd(gd); 864 c = _callout_gettoc(cc); 865 atomic_clear_int(&c->flags, CALLOUT_PREVENTED); 866 atomic_set_int(&c->flags, flags); 867 _callout_update_spinlocked(c); 868 spin_unlock(&c->spin); 869 870 /* 871 * If the operation is still in-progress then re-acquire the spin-lock 872 * and block if necessary. Also initiate the lock cancel. 873 */ 874 if (sync == 0 || (c->flags & (CALLOUT_INPROG | CALLOUT_SET)) == 0) { 875 exis_drop_gd(gd); 876 return 0; 877 } 878 if (c->flags & CALLOUT_AUTOLOCK) 879 lockmgr(c->lk, LK_CANCEL_BEG); 880 spin_lock(&c->spin); 881 if ((c->flags & (CALLOUT_INPROG | CALLOUT_SET)) == 0) { 882 spin_unlock(&c->spin); 883 if (c->flags & CALLOUT_AUTOLOCK) 884 lockmgr(c->lk, LK_CANCEL_END); 885 exis_drop_gd(gd); 886 return ((c->flags & CALLOUT_PREVENTED) != 0); 887 } 888 889 /* 890 * With c->spin held we can synchronously wait completion of our 891 * request. 892 * 893 * If INPROG is set and we are recursing from the callback the 894 * function completes immediately. 895 */ 896 ++c->waiters; 897 for (;;) { 898 cpu_ccfence(); 899 if ((c->flags & flags) == 0) 900 break; 901 if ((c->flags & CALLOUT_INPROG) && 902 curthread == &c->qsc->thread) { 903 _callout_update_spinlocked(c); 904 break; 905 } 906 ssleep(c, &c->spin, 0, "costp", 0); 907 } 908 --c->waiters; 909 spin_unlock(&c->spin); 910 if (c->flags & CALLOUT_AUTOLOCK) 911 lockmgr(c->lk, LK_CANCEL_END); 912 res = ((c->flags & CALLOUT_PREVENTED) != 0); 913 exis_drop_gd(gd); 914 915 return res; 916 } 917 918 /* 919 * Internalized special low-overhead version without normal safety 920 * checks or allocations. Used by tsleep(). 921 * 922 * Must be called from critical section, specify both the external 923 * and internal callout structure and set timeout on the current cpu. 924 */ 925 void 926 _callout_setup_quick(struct callout *cc, struct _callout *c, int ticks, 927 void (*ftn)(void *), void *arg) 928 { 929 softclock_pcpu_t sc; 930 struct wheel *wheel; 931 932 /* 933 * Request a RESET. This automatically overrides a STOP in 934 * _callout_update_spinlocked(). 935 */ 936 sc = softclock_pcpu_ary[mycpu->gd_cpuid]; 937 938 cc->flags = CALLOUT_DID_INIT | CALLOUT_MPSAFE; 939 cc->toc = c; 940 cc->lk = NULL; 941 c->flags = cc->flags | CALLOUT_SET; 942 c->lk = NULL; 943 c->verifier = cc; 944 c->qsc = sc; 945 c->qtick = sc->curticks + ticks; 946 c->qfunc = ftn; 947 c->qarg = arg; 948 spin_init(&c->spin, "calou"); 949 950 /* 951 * Since we are on the same cpu with a critical section, we can 952 * do this with only the wheel spinlock. 953 */ 954 if (c->qtick - sc->softticks <= 0) 955 c->qtick = sc->softticks + 1; 956 wheel = &sc->callwheel[c->qtick & cwheelmask]; 957 958 spin_lock(&wheel->spin); 959 TAILQ_INSERT_TAIL(&wheel->list, c, entry); 960 spin_unlock(&wheel->spin); 961 } 962 963 /* 964 * Internalized special low-overhead version without normal safety 965 * checks or allocations. Used by tsleep(). 966 * 967 * Must be called on the same cpu that queued the timeout. 968 * Must be called with a critical section already held. 969 */ 970 void 971 _callout_cancel_quick(struct _callout *c) 972 { 973 softclock_pcpu_t sc; 974 struct wheel *wheel; 975 976 /* 977 * Wakeup callouts for tsleep() should never block, so this flag 978 * had better never be found set. 979 */ 980 KKASSERT((c->flags & CALLOUT_INPROG) == 0); 981 982 /* 983 * Remove from queue if necessary. Since we are in a critical 984 * section on the same cpu, the queueing status should not change. 985 */ 986 if (c->flags & CALLOUT_SET) { 987 sc = c->qsc; 988 KKASSERT(sc == softclock_pcpu_ary[mycpu->gd_cpuid]); 989 wheel = &sc->callwheel[c->qtick & cwheelmask]; 990 991 /* 992 * NOTE: We must still spin-lock the wheel because other 993 * cpus can manipulate the list. 994 */ 995 spin_lock(&wheel->spin); 996 TAILQ_REMOVE(&wheel->list, c, entry); 997 c->flags &= ~(CALLOUT_SET | CALLOUT_STOP | 998 CALLOUT_CANCEL | CALLOUT_RESET); 999 spin_unlock(&wheel->spin); 1000 } 1001 c->verifier = NULL; 1002 } 1003 1004 /* 1005 * This is a synchronous STOP which cancels the callout. If AUTOLOCK 1006 * then a CANCEL will be issued to the lock holder. Unlike STOP, the 1007 * cancel function prevents any new callout_reset()s from being issued 1008 * in addition to canceling the lock. The lock will also be deactivated. 1009 * 1010 * Returns 0 if the callout was not active (or was active and completed, 1011 * but didn't try to start a new timeout). 1012 * Returns 1 if the cancel is responsible for stopping the callout. 1013 */ 1014 int 1015 callout_cancel(struct callout *cc) 1016 { 1017 return _callout_cancel_or_stop(cc, CALLOUT_CANCEL, 1); 1018 } 1019 1020 /* 1021 * Currently the same as callout_cancel. Ultimately we may wish the 1022 * drain function to allow a pending callout to proceed, but for now 1023 * we will attempt to to cancel it. 1024 * 1025 * Returns 0 if the callout was not active (or was active and completed, 1026 * but didn't try to start a new timeout). 1027 * Returns 1 if the drain is responsible for stopping the callout. 1028 */ 1029 int 1030 callout_drain(struct callout *cc) 1031 { 1032 return _callout_cancel_or_stop(cc, CALLOUT_CANCEL, 1); 1033 } 1034 1035 /* 1036 * Stops a callout if it is pending or queued, does not block. 1037 * This function does not interlock against a callout that is in-progress. 1038 * 1039 * Returns whether the STOP operation was responsible for removing a 1040 * queued or pending callout. 1041 */ 1042 int 1043 callout_stop_async(struct callout *cc) 1044 { 1045 return _callout_cancel_or_stop(cc, CALLOUT_STOP, 0); 1046 } 1047 1048 /* 1049 * Callout deactivate merely clears the CALLOUT_ACTIVE bit and stop a 1050 * callout if it is pending or queued. However this cannot stop a callout 1051 * whos callback is in-progress. 1052 * 1053 * 1054 * This function does not interlock against a callout that is in-progress. 1055 */ 1056 void 1057 callout_deactivate(struct callout *cc) 1058 { 1059 atomic_clear_int(&cc->flags, CALLOUT_ACTIVE); 1060 callout_stop_async(cc); 1061 } 1062 1063 /* 1064 * lock-aided callouts are STOPped synchronously using STOP semantics 1065 * (meaning that another thread can start the callout again before we 1066 * return). 1067 * 1068 * non-lock-aided callouts 1069 * 1070 * Stops a callout if it is pending or queued, does not block. 1071 * This function does not interlock against a callout that is in-progress. 1072 */ 1073 int 1074 callout_stop(struct callout *cc) 1075 { 1076 return _callout_cancel_or_stop(cc, CALLOUT_STOP, 1); 1077 } 1078 1079 /* 1080 * Destroy the callout. Synchronously cancel any operation in progress, 1081 * clear the INIT flag, and disconnect the internal _callout. The internal 1082 * callout will be safely freed via EXIS. 1083 * 1084 * Upon return, the callout structure may only be reused if re-initialized. 1085 */ 1086 void 1087 callout_terminate(struct callout *cc) 1088 { 1089 struct _callout *c; 1090 1091 exis_hold(); 1092 1093 _callout_cancel_or_stop(cc, CALLOUT_CANCEL, 1); 1094 KKASSERT(cc->flags & CALLOUT_DID_INIT); 1095 atomic_clear_int(&cc->flags, CALLOUT_DID_INIT); 1096 c = atomic_swap_ptr((void *)&cc->toc, NULL); 1097 if (c) { 1098 KKASSERT(c->verifier == cc); 1099 c->verifier = NULL; 1100 _callout_free(c); 1101 } 1102 1103 exis_drop(); 1104 } 1105 1106 /* 1107 * Returns whether a callout is queued and the time has not yet 1108 * arrived (the callout is not yet in-progress). 1109 */ 1110 int 1111 callout_pending(struct callout *cc) 1112 { 1113 struct _callout *c; 1114 1115 /* 1116 * Don't instantiate toc to test pending 1117 */ 1118 if (cc->toc == NULL) 1119 return 0; 1120 c = _callout_gettoc(cc); 1121 if ((c->flags & (CALLOUT_SET | CALLOUT_INPROG)) == CALLOUT_SET) { 1122 spin_unlock(&c->spin); 1123 return 1; 1124 } 1125 spin_unlock(&c->spin); 1126 1127 return 0; 1128 } 1129 1130 /* 1131 * Returns whether a callout is active or not. A callout is active when 1132 * a timeout is set and remains active upon normal termination, even if 1133 * it does not issue a new timeout. A callout is inactive if a timeout has 1134 * never been set or if the callout has been stopped or canceled. The next 1135 * timeout that is set will re-set the active state. 1136 */ 1137 int 1138 callout_active(struct callout *cc) 1139 { 1140 return ((cc->flags & CALLOUT_ACTIVE) ? 1 : 0); 1141 } 1142