1 /* 2 * Copyright (c) 2004,2014,2019-2020 The DragonFly Project. 3 * All rights reserved. 4 * 5 * This code is derived from software contributed to The DragonFly Project 6 * by Matthew Dillon <dillon@backplane.com> 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in 16 * the documentation and/or other materials provided with the 17 * distribution. 18 * 3. Neither the name of The DragonFly Project nor the names of its 19 * contributors may be used to endorse or promote products derived 20 * from this software without specific, prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 23 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 24 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 25 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 26 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 27 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 28 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 29 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 30 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 31 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 32 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 */ 35 /* 36 * Copyright (c) 1982, 1986, 1991, 1993 37 * The Regents of the University of California. All rights reserved. 38 * (c) UNIX System Laboratories, Inc. 39 * All or some portions of this file are derived from material licensed 40 * to the University of California by American Telephone and Telegraph 41 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 42 * the permission of UNIX System Laboratories, Inc. 43 * 44 * Redistribution and use in source and binary forms, with or without 45 * modification, are permitted provided that the following conditions 46 * are met: 47 * 1. Redistributions of source code must retain the above copyright 48 * notice, this list of conditions and the following disclaimer. 49 * 2. Redistributions in binary form must reproduce the above copyright 50 * notice, this list of conditions and the following disclaimer in the 51 * documentation and/or other materials provided with the distribution. 52 * 3. Neither the name of the University nor the names of its contributors 53 * may be used to endorse or promote products derived from this software 54 * without specific prior written permission. 55 * 56 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 57 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 58 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 59 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 60 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 61 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 62 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 63 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 64 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 65 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 66 * SUCH DAMAGE. 67 */ 68 /* 69 * The original callout mechanism was based on the work of Adam M. Costello 70 * and George Varghese, published in a technical report entitled "Redesigning 71 * the BSD Callout and Timer Facilities" and modified slightly for inclusion 72 * in FreeBSD by Justin T. Gibbs. The original work on the data structures 73 * used in this implementation was published by G. Varghese and T. Lauck in 74 * the paper "Hashed and Hierarchical Timing Wheels: Data Structures for 75 * the Efficient Implementation of a Timer Facility" in the Proceedings of 76 * the 11th ACM Annual Symposium on Operating Systems Principles, 77 * Austin, Texas Nov 1987. 78 */ 79 80 #include <sys/param.h> 81 #include <sys/systm.h> 82 #include <sys/spinlock.h> 83 #include <sys/callout.h> 84 #include <sys/kernel.h> 85 #include <sys/malloc.h> 86 #include <sys/interrupt.h> 87 #include <sys/thread.h> 88 #include <sys/sysctl.h> 89 #include <sys/exislock.h> 90 #include <vm/vm_extern.h> 91 #include <machine/atomic.h> 92 93 #include <sys/spinlock2.h> 94 #include <sys/thread2.h> 95 #include <sys/mplock2.h> 96 #include <sys/exislock2.h> 97 98 TAILQ_HEAD(colist, _callout); 99 struct softclock_pcpu; 100 101 /* 102 * DID_INIT - Sanity check 103 * PREVENTED - A callback was prevented 104 * RESET - Callout_reset requested 105 * STOP - Callout_stop requested 106 * INPROG - Softclock_handler thread processing in-progress on callout, 107 * queue linkage is indeterminant. Third parties must queue 108 * a STOP or CANCEL and await completion. 109 * SET - Callout is linked to queue (if INPROG not set) 110 * AUTOLOCK - Lockmgr cancelable interlock (copied from frontend) 111 * MPSAFE - Callout is MPSAFE (copied from frontend) 112 * CANCEL - callout_cancel requested 113 * ACTIVE - active/inactive (frontend only, see documentation). 114 * This is *NOT* the same as whether a callout is queued or 115 * not. 116 */ 117 #define CALLOUT_DID_INIT 0x00000001 /* frontend */ 118 #define CALLOUT_PREVENTED 0x00000002 /* backend */ 119 #define CALLOUT_FREELIST 0x00000004 /* backend */ 120 #define CALLOUT_UNUSED0008 0x00000008 121 #define CALLOUT_UNUSED0010 0x00000010 122 #define CALLOUT_RESET 0x00000020 /* backend */ 123 #define CALLOUT_STOP 0x00000040 /* backend */ 124 #define CALLOUT_INPROG 0x00000080 /* backend */ 125 #define CALLOUT_SET 0x00000100 /* backend */ 126 #define CALLOUT_AUTOLOCK 0x00000200 /* both */ 127 #define CALLOUT_MPSAFE 0x00000400 /* both */ 128 #define CALLOUT_CANCEL 0x00000800 /* backend */ 129 #define CALLOUT_ACTIVE 0x00001000 /* frontend */ 130 131 struct wheel { 132 struct spinlock spin; 133 struct colist list; 134 }; 135 136 struct softclock_pcpu { 137 struct wheel *callwheel; 138 struct _callout *running; 139 struct _callout * volatile next; 140 struct colist freelist; 141 int softticks; /* softticks index */ 142 int curticks; /* per-cpu ticks counter */ 143 int isrunning; 144 struct thread thread; 145 }; 146 147 typedef struct softclock_pcpu *softclock_pcpu_t; 148 149 static int callout_debug = 0; 150 SYSCTL_INT(_debug, OID_AUTO, callout_debug, CTLFLAG_RW, 151 &callout_debug, 0, ""); 152 153 static MALLOC_DEFINE(M_CALLOUT, "callouts", "softclock callouts"); 154 155 static int cwheelsize; 156 static int cwheelmask; 157 static softclock_pcpu_t softclock_pcpu_ary[MAXCPU]; 158 159 static void softclock_handler(void *arg); 160 static void slotimer_callback(void *arg); 161 162 /* 163 * Handle pending requests. No action can be taken if the callout is still 164 * flagged INPROG. Called from softclock for post-processing and from 165 * various API functions. 166 * 167 * This routine does not block in any way. 168 * Caller must hold c->spin. 169 * 170 * NOTE: Flags can be adjusted without holding c->spin, so atomic ops 171 * must be used at all times. 172 * 173 * NOTE: The related (sc) might refer to another cpu. 174 * 175 * NOTE: The cc-vs-c frontend-vs-backend might be disconnected during the 176 * operation, but the EXIS lock prevents (c) from being destroyed. 177 */ 178 static __inline 179 void 180 _callout_update_spinlocked(struct _callout *c) 181 { 182 struct wheel *wheel; 183 184 if ((c->flags & CALLOUT_INPROG) && curthread != &c->qsc->thread) { 185 /* 186 * If the callout is in-progress the SET queuing state is 187 * indeterminant and no action can be taken at this time. 188 * 189 * (however, recursive calls from the call-back are not 190 * indeterminant and must be processed at this time). 191 */ 192 /* nop */ 193 } else if (c->flags & CALLOUT_SET) { 194 /* 195 * If the callout is SET it is queued on a callwheel, process 196 * various requests relative to it being in this queued state. 197 * 198 * c->q* fields are stable while we hold c->spin and 199 * wheel->spin. 200 */ 201 softclock_pcpu_t sc; 202 203 sc = c->qsc; 204 wheel = &sc->callwheel[c->qtick & cwheelmask]; 205 spin_lock(&wheel->spin); 206 207 if ((c->flags & CALLOUT_INPROG) && 208 curthread != &c->qsc->thread) { 209 /* 210 * Raced against INPROG getting set by the softclock 211 * handler while we were acquiring wheel->spin. We 212 * can do nothing at this time. 213 * 214 * (however, recursive calls from the call-back are not 215 * indeterminant and must be processed at this time). 216 */ 217 /* nop */ 218 } else if (c->flags & CALLOUT_CANCEL) { 219 /* 220 * CANCEL requests override everything else. 221 */ 222 if (sc->next == c) 223 sc->next = TAILQ_NEXT(c, entry); 224 TAILQ_REMOVE(&wheel->list, c, entry); 225 atomic_clear_int(&c->flags, CALLOUT_SET | 226 CALLOUT_STOP | 227 CALLOUT_CANCEL | 228 CALLOUT_RESET); 229 atomic_set_int(&c->flags, CALLOUT_PREVENTED); 230 if (c->waiters) 231 wakeup(c); 232 } else if (c->flags & CALLOUT_RESET) { 233 /* 234 * RESET requests reload the callout, potentially 235 * to a different cpu. Once removed from the wheel, 236 * the retention of c->spin prevents further races. 237 * 238 * Leave SET intact. 239 */ 240 if (sc->next == c) 241 sc->next = TAILQ_NEXT(c, entry); 242 TAILQ_REMOVE(&wheel->list, c, entry); 243 spin_unlock(&wheel->spin); 244 245 atomic_clear_int(&c->flags, CALLOUT_RESET); 246 sc = c->rsc; 247 c->qsc = sc; 248 c->qarg = c->rarg; 249 c->qfunc = c->rfunc; 250 c->qtick = c->rtick; 251 252 /* 253 * Do not queue to a current or past wheel slot or 254 * the callout will be lost for ages. Handle 255 * potential races against soft ticks. 256 */ 257 wheel = &sc->callwheel[c->qtick & cwheelmask]; 258 spin_lock(&wheel->spin); 259 while (c->qtick - sc->softticks <= 0) { 260 c->qtick = sc->softticks + 1; 261 spin_unlock(&wheel->spin); 262 wheel = &sc->callwheel[c->qtick & cwheelmask]; 263 spin_lock(&wheel->spin); 264 } 265 TAILQ_INSERT_TAIL(&wheel->list, c, entry); 266 } else if (c->flags & CALLOUT_STOP) { 267 /* 268 * STOP request simply unloads the callout. 269 */ 270 if (sc->next == c) 271 sc->next = TAILQ_NEXT(c, entry); 272 TAILQ_REMOVE(&wheel->list, c, entry); 273 atomic_clear_int(&c->flags, CALLOUT_STOP | 274 CALLOUT_SET); 275 276 atomic_set_int(&c->flags, CALLOUT_PREVENTED); 277 if (c->waiters) 278 wakeup(c); 279 } else { 280 /* 281 * Do nothing if no request is pending. 282 */ 283 /* nop */ 284 } 285 spin_unlock(&wheel->spin); 286 } else { 287 /* 288 * If the callout is not SET it is not queued to any callwheel, 289 * process various requests relative to it not being queued. 290 * 291 * c->q* fields are stable while we hold c->spin. 292 */ 293 if (c->flags & CALLOUT_CANCEL) { 294 /* 295 * CANCEL requests override everything else. 296 * 297 * There is no state being canceled in this case, 298 * so do not set the PREVENTED flag. 299 */ 300 atomic_clear_int(&c->flags, CALLOUT_STOP | 301 CALLOUT_CANCEL | 302 CALLOUT_RESET); 303 if (c->waiters) 304 wakeup(c); 305 } else if (c->flags & CALLOUT_RESET) { 306 /* 307 * RESET requests get queued. Do not queue to the 308 * currently-processing tick. 309 */ 310 softclock_pcpu_t sc; 311 312 sc = c->rsc; 313 c->qsc = sc; 314 c->qarg = c->rarg; 315 c->qfunc = c->rfunc; 316 c->qtick = c->rtick; 317 318 /* 319 * Do not queue to current or past wheel or the 320 * callout will be lost for ages. 321 */ 322 wheel = &sc->callwheel[c->qtick & cwheelmask]; 323 spin_lock(&wheel->spin); 324 while (c->qtick - sc->softticks <= 0) { 325 c->qtick = sc->softticks + 1; 326 spin_unlock(&wheel->spin); 327 wheel = &sc->callwheel[c->qtick & cwheelmask]; 328 spin_lock(&wheel->spin); 329 } 330 TAILQ_INSERT_TAIL(&wheel->list, c, entry); 331 atomic_clear_int(&c->flags, CALLOUT_RESET); 332 atomic_set_int(&c->flags, CALLOUT_SET); 333 spin_unlock(&wheel->spin); 334 } else if (c->flags & CALLOUT_STOP) { 335 /* 336 * STOP requests. 337 * 338 * There is no state being stopped in this case, 339 * so do not set the PREVENTED flag. 340 */ 341 atomic_clear_int(&c->flags, CALLOUT_STOP); 342 if (c->waiters) 343 wakeup(c); 344 } else { 345 /* 346 * No request pending (someone else processed the 347 * request before we could) 348 */ 349 /* nop */ 350 } 351 } 352 } 353 354 static __inline 355 void 356 _callout_free(struct _callout *c) 357 { 358 softclock_pcpu_t sc; 359 360 sc = softclock_pcpu_ary[mycpu->gd_cpuid]; 361 362 crit_enter(); 363 exis_terminate(&c->exis); 364 atomic_set_int(&c->flags, CALLOUT_FREELIST); 365 atomic_clear_int(&c->flags, CALLOUT_DID_INIT); 366 TAILQ_INSERT_TAIL(&sc->freelist, c, entry); 367 crit_exit(); 368 } 369 370 /* 371 * System init 372 */ 373 static void 374 swi_softclock_setup(void *arg) 375 { 376 int cpu; 377 int i; 378 int target; 379 380 /* 381 * Figure out how large a callwheel we need. It must be a power of 2. 382 * 383 * ncallout is primarily based on available memory, don't explode 384 * the allocations if the system has a lot of cpus. 385 */ 386 target = ncallout / ncpus + 16; 387 388 cwheelsize = 1; 389 while (cwheelsize < target) 390 cwheelsize <<= 1; 391 cwheelmask = cwheelsize - 1; 392 393 /* 394 * Initialize per-cpu data structures. 395 */ 396 for (cpu = 0; cpu < ncpus; ++cpu) { 397 softclock_pcpu_t sc; 398 int wheel_sz; 399 400 sc = (void *)kmem_alloc3(&kernel_map, sizeof(*sc), 401 VM_SUBSYS_GD, KM_CPU(cpu)); 402 memset(sc, 0, sizeof(*sc)); 403 TAILQ_INIT(&sc->freelist); 404 softclock_pcpu_ary[cpu] = sc; 405 406 wheel_sz = sizeof(*sc->callwheel) * cwheelsize; 407 sc->callwheel = (void *)kmem_alloc3(&kernel_map, wheel_sz, 408 VM_SUBSYS_GD, KM_CPU(cpu)); 409 memset(sc->callwheel, 0, wheel_sz); 410 for (i = 0; i < cwheelsize; ++i) { 411 spin_init(&sc->callwheel[i].spin, "wheel"); 412 TAILQ_INIT(&sc->callwheel[i].list); 413 } 414 415 /* 416 * Mark the softclock handler as being an interrupt thread 417 * even though it really isn't, but do not allow it to 418 * preempt other threads (do not assign td_preemptable). 419 * 420 * Kernel code now assumes that callouts do not preempt 421 * the cpu they were scheduled on. 422 */ 423 lwkt_create(softclock_handler, sc, NULL, &sc->thread, 424 TDF_NOSTART | TDF_INTTHREAD, 425 cpu, "softclock %d", cpu); 426 } 427 } 428 429 /* 430 * Must occur after ncpus has been initialized. 431 */ 432 SYSINIT(softclock_setup, SI_BOOT2_SOFTCLOCK, SI_ORDER_SECOND, 433 swi_softclock_setup, NULL); 434 435 /* 436 * This routine is called from the hardclock() (basically a FASTint/IPI) on 437 * each cpu in the system. sc->curticks is this cpu's notion of the timebase. 438 * It IS NOT NECESSARILY SYNCHRONIZED WITH 'ticks'! sc->softticks is where 439 * the callwheel is currently indexed. 440 * 441 * sc->softticks is adjusted by either this routine or our helper thread 442 * depending on whether the helper thread is running or not. 443 * 444 * sc->curticks and sc->softticks are adjusted using atomic ops in order 445 * to ensure that remote cpu callout installation does not race the thread. 446 */ 447 void 448 hardclock_softtick(globaldata_t gd) 449 { 450 softclock_pcpu_t sc; 451 struct wheel *wheel; 452 453 sc = softclock_pcpu_ary[gd->gd_cpuid]; 454 atomic_add_int(&sc->curticks, 1); 455 if (sc->isrunning) 456 return; 457 if (sc->softticks == sc->curticks) { 458 /* 459 * In sync, only wakeup the thread if there is something to 460 * do. 461 */ 462 wheel = &sc->callwheel[sc->softticks & cwheelmask]; 463 spin_lock(&wheel->spin); 464 if (TAILQ_FIRST(&wheel->list)) { 465 sc->isrunning = 1; 466 spin_unlock(&wheel->spin); 467 lwkt_schedule(&sc->thread); 468 } else { 469 atomic_add_int(&sc->softticks, 1); 470 spin_unlock(&wheel->spin); 471 } 472 } else { 473 /* 474 * out of sync, wakeup the thread unconditionally so it can 475 * catch up. 476 */ 477 sc->isrunning = 1; 478 lwkt_schedule(&sc->thread); 479 } 480 } 481 482 /* 483 * This procedure is the main loop of our per-cpu helper thread. The 484 * sc->isrunning flag prevents us from racing hardclock_softtick(). 485 * 486 * The thread starts with the MP lock released and not in a critical 487 * section. The loop itself is MP safe while individual callbacks 488 * may or may not be, so we obtain or release the MP lock as appropriate. 489 */ 490 static void 491 softclock_handler(void *arg) 492 { 493 softclock_pcpu_t sc; 494 struct _callout *c; 495 struct wheel *wheel; 496 struct callout slotimer1; 497 struct _callout slotimer2; 498 int mpsafe = 1; 499 500 /* 501 * Setup pcpu slow clocks which we want to run from the callout 502 * thread. This thread starts very early and cannot kmalloc(), 503 * so use internal functions to supply the _callout. 504 */ 505 _callout_setup_quick(&slotimer1, &slotimer2, hz * 10, 506 slotimer_callback, &slotimer1); 507 508 /* 509 * Run the callout thread at the same priority as other kernel 510 * threads so it can be round-robined. 511 */ 512 /*lwkt_setpri_self(TDPRI_SOFT_NORM);*/ 513 514 sc = arg; 515 loop: 516 while (sc->softticks != (int)(sc->curticks + 1)) { 517 wheel = &sc->callwheel[sc->softticks & cwheelmask]; 518 519 spin_lock(&wheel->spin); 520 sc->next = TAILQ_FIRST(&wheel->list); 521 while ((c = sc->next) != NULL) { 522 int error; 523 524 /* 525 * Match callouts for this tick. 526 */ 527 sc->next = TAILQ_NEXT(c, entry); 528 if (c->qtick != sc->softticks) 529 continue; 530 531 /* 532 * Double check the validity of the callout, detect 533 * if the originator's structure has been ripped out. 534 */ 535 if ((uintptr_t)c->verifier < VM_MAX_USER_ADDRESS) { 536 spin_unlock(&wheel->spin); 537 panic("_callout %p verifier %p failed " 538 "func %p/%p\n", 539 c, c->verifier, c->rfunc, c->qfunc); 540 } 541 542 if (c->verifier->toc != c) { 543 spin_unlock(&wheel->spin); 544 panic("_callout %p verifier %p failed " 545 "func %p/%p\n", 546 c, c->verifier, c->rfunc, c->qfunc); 547 } 548 549 /* 550 * The wheel spinlock is sufficient to set INPROG and 551 * remove (c) from the list. Once INPROG is set, 552 * other threads can only make limited changes to (c). 553 * 554 * Setting INPROG masks SET tests in all other 555 * conditionals except the 'quick' code (which is 556 * always same-cpu and doesn't race). This means 557 * that we can clear SET here without obtaining 558 * c->spin. 559 */ 560 TAILQ_REMOVE(&wheel->list, c, entry); 561 atomic_set_int(&c->flags, CALLOUT_INPROG); 562 atomic_clear_int(&c->flags, CALLOUT_SET); 563 sc->running = c; 564 spin_unlock(&wheel->spin); 565 566 /* 567 * Legacy mplock support 568 */ 569 if (c->flags & CALLOUT_MPSAFE) { 570 if (mpsafe == 0) { 571 mpsafe = 1; 572 rel_mplock(); 573 } 574 } else { 575 if (mpsafe) { 576 mpsafe = 0; 577 get_mplock(); 578 } 579 } 580 581 /* 582 * Execute the 'q' function (protected by INPROG) 583 */ 584 if (c->flags & (CALLOUT_STOP | CALLOUT_CANCEL)) { 585 /* 586 * Raced a stop or cancel request, do 587 * not execute. The processing code 588 * thinks its a normal completion so 589 * flag the fact that cancel/stop actually 590 * prevented a callout here. 591 */ 592 if (c->flags & 593 (CALLOUT_CANCEL | CALLOUT_STOP)) { 594 atomic_set_int(&c->verifier->flags, 595 CALLOUT_PREVENTED); 596 } 597 } else if (c->flags & CALLOUT_RESET) { 598 /* 599 * A RESET raced, make it seem like it 600 * didn't. Do nothing here and let the 601 * update procedure requeue us. 602 */ 603 } else if (c->flags & CALLOUT_AUTOLOCK) { 604 /* 605 * Interlocked cancelable call. If the 606 * lock gets canceled we have to flag the 607 * fact that the cancel/stop actually 608 * prevented the callout here. 609 */ 610 error = lockmgr(c->lk, LK_EXCLUSIVE | 611 LK_CANCELABLE); 612 if (error == 0) { 613 c->qfunc(c->qarg); 614 lockmgr(c->lk, LK_RELEASE); 615 } else if (c->flags & 616 (CALLOUT_CANCEL | CALLOUT_STOP)) { 617 atomic_set_int(&c->verifier->flags, 618 CALLOUT_PREVENTED); 619 } 620 } else { 621 /* 622 * Normal call 623 */ 624 c->qfunc(c->qarg); 625 } 626 627 /* 628 * INPROG will prevent SET from being set again. 629 * Once we clear INPROG, update the callout to 630 * handle any pending operations that have built-up. 631 */ 632 633 /* 634 * Interlocked clearing of INPROG, then handle any 635 * queued request (such as a callout_reset() request). 636 */ 637 spin_lock(&c->spin); 638 atomic_clear_int(&c->flags, CALLOUT_INPROG); 639 sc->running = NULL; 640 _callout_update_spinlocked(c); 641 spin_unlock(&c->spin); 642 643 spin_lock(&wheel->spin); 644 } 645 spin_unlock(&wheel->spin); 646 atomic_add_int(&sc->softticks, 1); 647 648 /* 649 * Clean up any _callout structures which are now allowed 650 * to be freed. 651 */ 652 crit_enter(); 653 while ((c = TAILQ_FIRST(&sc->freelist)) != NULL) { 654 if (!exis_freeable(&c->exis)) 655 break; 656 TAILQ_REMOVE(&sc->freelist, c, entry); 657 c->flags = 0; 658 kfree(c, M_CALLOUT); 659 if (callout_debug) 660 kprintf("KFREEB %p\n", c); 661 } 662 crit_exit(); 663 } 664 665 /* 666 * Don't leave us holding the MP lock when we deschedule ourselves. 667 */ 668 if (mpsafe == 0) { 669 mpsafe = 1; 670 rel_mplock(); 671 } 672 673 /* 674 * Recheck in critical section to interlock against hardlock 675 */ 676 crit_enter(); 677 if (sc->softticks == (int)(sc->curticks + 1)) { 678 sc->isrunning = 0; 679 lwkt_deschedule_self(&sc->thread); /* == curthread */ 680 lwkt_switch(); 681 } 682 crit_exit(); 683 goto loop; 684 /* NOT REACHED */ 685 } 686 687 /* 688 * A very slow system cleanup timer (10 second interval), 689 * per-cpu. 690 */ 691 void 692 slotimer_callback(void *arg) 693 { 694 struct callout *c = arg; 695 696 slab_cleanup(); 697 callout_reset(c, hz * 10, slotimer_callback, c); 698 } 699 700 /* 701 * API FUNCTIONS 702 */ 703 704 static __inline 705 struct _callout * 706 _callout_gettoc(struct callout *cc) 707 { 708 globaldata_t gd = mycpu; 709 struct _callout *c; 710 softclock_pcpu_t sc; 711 712 KKASSERT(cc->flags & CALLOUT_DID_INIT); 713 exis_hold_gd(gd); 714 for (;;) { 715 c = cc->toc; 716 cpu_ccfence(); 717 if (c) { 718 KKASSERT(c->verifier == cc); 719 spin_lock(&c->spin); 720 break; 721 } 722 sc = softclock_pcpu_ary[gd->gd_cpuid]; 723 c = kmalloc(sizeof(*c), M_CALLOUT, M_INTWAIT | M_ZERO); 724 if (callout_debug) 725 kprintf("ALLOC %p\n", c); 726 c->flags = cc->flags; 727 c->lk = cc->lk; 728 c->verifier = cc; 729 exis_init(&c->exis); 730 spin_init(&c->spin, "calou"); 731 spin_lock(&c->spin); 732 if (atomic_cmpset_ptr(&cc->toc, NULL, c)) 733 break; 734 spin_unlock(&c->spin); 735 c->verifier = NULL; 736 kfree(c, M_CALLOUT); 737 if (callout_debug) 738 kprintf("KFREEA %p\n", c); 739 } 740 exis_drop_gd(gd); 741 742 /* 743 * Return internal __callout with spin-lock held 744 */ 745 return c; 746 } 747 748 /* 749 * Macrod in sys/callout.h for debugging 750 * 751 * WARNING! tsleep() assumes this will not block 752 */ 753 void 754 _callout_init(struct callout *cc CALLOUT_DEBUG_ARGS) 755 { 756 bzero(cc, sizeof(*cc)); 757 cc->flags = CALLOUT_DID_INIT; 758 } 759 760 void 761 _callout_init_mp(struct callout *cc CALLOUT_DEBUG_ARGS) 762 { 763 bzero(cc, sizeof(*cc)); 764 cc->flags = CALLOUT_DID_INIT | CALLOUT_MPSAFE; 765 } 766 767 void 768 _callout_init_lk(struct callout *cc, struct lock *lk CALLOUT_DEBUG_ARGS) 769 { 770 bzero(cc, sizeof(*cc)); 771 cc->flags = CALLOUT_DID_INIT | CALLOUT_MPSAFE | CALLOUT_AUTOLOCK; 772 cc->lk = lk; 773 } 774 775 /* 776 * Start or restart a timeout. New timeouts can be installed while the 777 * current one is running. 778 * 779 * Start or restart a timeout. Installs the callout structure on the 780 * callwheel of the current cpu. Callers may legally pass any value, even 781 * if 0 or negative, but since the sc->curticks index may have already 782 * been processed a minimum timeout of 1 tick will be enforced. 783 * 784 * This function will not deadlock against a running call. 785 * 786 * WARNING! tsleep() assumes this will not block 787 */ 788 void 789 callout_reset(struct callout *cc, int to_ticks, void (*ftn)(void *), void *arg) 790 { 791 softclock_pcpu_t sc; 792 struct _callout *c; 793 794 /* 795 * We need to acquire/associate a _callout. 796 * gettoc spin-locks (c). 797 */ 798 KKASSERT(cc->flags & CALLOUT_DID_INIT); 799 atomic_set_int(&cc->flags, CALLOUT_ACTIVE); 800 c = _callout_gettoc(cc); 801 802 /* 803 * Request a RESET. This automatically overrides a STOP in 804 * _callout_update_spinlocked(). 805 */ 806 atomic_set_int(&c->flags, CALLOUT_RESET); 807 sc = softclock_pcpu_ary[mycpu->gd_cpuid]; 808 c->rsc = sc; 809 c->rtick = sc->curticks + to_ticks; 810 c->rfunc = ftn; 811 c->rarg = arg; 812 _callout_update_spinlocked(c); 813 spin_unlock(&c->spin); 814 } 815 816 /* 817 * Same as callout_reset() but the timeout will run on a particular cpu. 818 */ 819 void 820 callout_reset_bycpu(struct callout *cc, int to_ticks, void (*ftn)(void *), 821 void *arg, int cpuid) 822 { 823 softclock_pcpu_t sc; 824 struct _callout *c; 825 826 /* 827 * We need to acquire/associate a _callout. 828 * gettoc spin-locks (c). 829 */ 830 KKASSERT(cc->flags & CALLOUT_DID_INIT); 831 atomic_set_int(&cc->flags, CALLOUT_ACTIVE); 832 c = _callout_gettoc(cc); 833 834 /* 835 * Set RESET. Do not clear STOP here (let the process code do it). 836 */ 837 atomic_set_int(&c->flags, CALLOUT_RESET); 838 839 sc = softclock_pcpu_ary[cpuid]; 840 c->rsc = sc; 841 c->rtick = sc->curticks + to_ticks; 842 c->rfunc = ftn; 843 c->rarg = arg; 844 _callout_update_spinlocked(c); 845 spin_unlock(&c->spin); 846 } 847 848 /* 849 * Issue synchronous or asynchronous cancel or stop 850 */ 851 static __inline 852 int 853 _callout_cancel_or_stop(struct callout *cc, uint32_t flags, int sync) 854 { 855 globaldata_t gd = mycpu; 856 struct _callout *c; 857 int res; 858 859 /* 860 * Callout is inactive after cancel or stop. Degenerate case if 861 * no _callout is currently associated. 862 */ 863 atomic_clear_int(&cc->flags, CALLOUT_ACTIVE); 864 if (cc->toc == NULL) 865 return 0; 866 867 /* 868 * Ensure that the related (c) is not destroyed. Set the CANCEL 869 * or STOP request flag, clear the PREVENTED status flag, and update. 870 */ 871 exis_hold_gd(gd); 872 c = _callout_gettoc(cc); 873 atomic_clear_int(&c->flags, CALLOUT_PREVENTED); 874 atomic_set_int(&c->flags, flags); 875 _callout_update_spinlocked(c); 876 spin_unlock(&c->spin); 877 878 /* 879 * If the operation is still in-progress then re-acquire the spin-lock 880 * and block if necessary. Also initiate the lock cancel. 881 */ 882 if (sync == 0 || (c->flags & (CALLOUT_INPROG | CALLOUT_SET)) == 0) { 883 exis_drop_gd(gd); 884 return 0; 885 } 886 if (c->flags & CALLOUT_AUTOLOCK) 887 lockmgr(c->lk, LK_CANCEL_BEG); 888 spin_lock(&c->spin); 889 if ((c->flags & (CALLOUT_INPROG | CALLOUT_SET)) == 0) { 890 spin_unlock(&c->spin); 891 if (c->flags & CALLOUT_AUTOLOCK) 892 lockmgr(c->lk, LK_CANCEL_END); 893 exis_drop_gd(gd); 894 return ((c->flags & CALLOUT_PREVENTED) != 0); 895 } 896 897 /* 898 * With c->spin held we can synchronously wait completion of our 899 * request. 900 * 901 * If INPROG is set and we are recursing from the callback the 902 * function completes immediately. 903 */ 904 ++c->waiters; 905 for (;;) { 906 cpu_ccfence(); 907 if ((c->flags & flags) == 0) 908 break; 909 if ((c->flags & CALLOUT_INPROG) && 910 curthread == &c->qsc->thread) { 911 _callout_update_spinlocked(c); 912 break; 913 } 914 ssleep(c, &c->spin, 0, "costp", 0); 915 } 916 --c->waiters; 917 spin_unlock(&c->spin); 918 if (c->flags & CALLOUT_AUTOLOCK) 919 lockmgr(c->lk, LK_CANCEL_END); 920 res = ((c->flags & CALLOUT_PREVENTED) != 0); 921 exis_drop_gd(gd); 922 923 return res; 924 } 925 926 /* 927 * Internalized special low-overhead version without normal safety 928 * checks or allocations. Used by tsleep(). 929 * 930 * Must be called from critical section, specify both the external 931 * and internal callout structure and set timeout on the current cpu. 932 */ 933 void 934 _callout_setup_quick(struct callout *cc, struct _callout *c, int ticks, 935 void (*ftn)(void *), void *arg) 936 { 937 softclock_pcpu_t sc; 938 struct wheel *wheel; 939 940 /* 941 * Request a RESET. This automatically overrides a STOP in 942 * _callout_update_spinlocked(). 943 */ 944 sc = softclock_pcpu_ary[mycpu->gd_cpuid]; 945 946 cc->flags = CALLOUT_DID_INIT | CALLOUT_MPSAFE; 947 cc->toc = c; 948 cc->lk = NULL; 949 c->flags = cc->flags | CALLOUT_SET; 950 c->lk = NULL; 951 c->verifier = cc; 952 c->qsc = sc; 953 c->qtick = sc->curticks + ticks; 954 c->qfunc = ftn; 955 c->qarg = arg; 956 spin_init(&c->spin, "calou"); 957 958 /* 959 * Since we are on the same cpu with a critical section, we can 960 * do this with only the wheel spinlock. 961 */ 962 if (c->qtick - sc->softticks <= 0) 963 c->qtick = sc->softticks + 1; 964 wheel = &sc->callwheel[c->qtick & cwheelmask]; 965 966 spin_lock(&wheel->spin); 967 TAILQ_INSERT_TAIL(&wheel->list, c, entry); 968 spin_unlock(&wheel->spin); 969 } 970 971 /* 972 * Internalized special low-overhead version without normal safety 973 * checks or allocations. Used by tsleep(). 974 * 975 * Must be called on the same cpu that queued the timeout. 976 * Must be called with a critical section already held. 977 */ 978 void 979 _callout_cancel_quick(struct _callout *c) 980 { 981 softclock_pcpu_t sc; 982 struct wheel *wheel; 983 984 /* 985 * Wakeup callouts for tsleep() should never block, so this flag 986 * had better never be found set. 987 */ 988 KKASSERT((c->flags & CALLOUT_INPROG) == 0); 989 990 /* 991 * Remove from queue if necessary. Since we are in a critical 992 * section on the same cpu, the queueing status should not change. 993 */ 994 if (c->flags & CALLOUT_SET) { 995 sc = c->qsc; 996 KKASSERT(sc == softclock_pcpu_ary[mycpu->gd_cpuid]); 997 wheel = &sc->callwheel[c->qtick & cwheelmask]; 998 999 /* 1000 * NOTE: We must still spin-lock the wheel because other 1001 * cpus can manipulate the list. 1002 */ 1003 spin_lock(&wheel->spin); 1004 TAILQ_REMOVE(&wheel->list, c, entry); 1005 c->flags &= ~(CALLOUT_SET | CALLOUT_STOP | 1006 CALLOUT_CANCEL | CALLOUT_RESET); 1007 spin_unlock(&wheel->spin); 1008 } 1009 c->verifier = NULL; 1010 } 1011 1012 /* 1013 * This is a synchronous STOP which cancels the callout. If AUTOLOCK 1014 * then a CANCEL will be issued to the lock holder. Unlike STOP, the 1015 * cancel function prevents any new callout_reset()s from being issued 1016 * in addition to canceling the lock. The lock will also be deactivated. 1017 * 1018 * Returns 0 if the callout was not active (or was active and completed, 1019 * but didn't try to start a new timeout). 1020 * Returns 1 if the cancel is responsible for stopping the callout. 1021 */ 1022 int 1023 callout_cancel(struct callout *cc) 1024 { 1025 return _callout_cancel_or_stop(cc, CALLOUT_CANCEL, 1); 1026 } 1027 1028 /* 1029 * Currently the same as callout_cancel. Ultimately we may wish the 1030 * drain function to allow a pending callout to proceed, but for now 1031 * we will attempt to to cancel it. 1032 * 1033 * Returns 0 if the callout was not active (or was active and completed, 1034 * but didn't try to start a new timeout). 1035 * Returns 1 if the drain is responsible for stopping the callout. 1036 */ 1037 int 1038 callout_drain(struct callout *cc) 1039 { 1040 return _callout_cancel_or_stop(cc, CALLOUT_CANCEL, 1); 1041 } 1042 1043 /* 1044 * Stops a callout if it is pending or queued, does not block. 1045 * This function does not interlock against a callout that is in-progress. 1046 * 1047 * Returns whether the STOP operation was responsible for removing a 1048 * queued or pending callout. 1049 */ 1050 int 1051 callout_stop_async(struct callout *cc) 1052 { 1053 return _callout_cancel_or_stop(cc, CALLOUT_STOP, 0); 1054 } 1055 1056 /* 1057 * Callout deactivate merely clears the CALLOUT_ACTIVE bit and stop a 1058 * callout if it is pending or queued. However this cannot stop a callout 1059 * whos callback is in-progress. 1060 * 1061 * 1062 * This function does not interlock against a callout that is in-progress. 1063 */ 1064 void 1065 callout_deactivate(struct callout *cc) 1066 { 1067 atomic_clear_int(&cc->flags, CALLOUT_ACTIVE); 1068 callout_stop_async(cc); 1069 } 1070 1071 /* 1072 * lock-aided callouts are STOPped synchronously using STOP semantics 1073 * (meaning that another thread can start the callout again before we 1074 * return). 1075 * 1076 * non-lock-aided callouts 1077 * 1078 * Stops a callout if it is pending or queued, does not block. 1079 * This function does not interlock against a callout that is in-progress. 1080 */ 1081 int 1082 callout_stop(struct callout *cc) 1083 { 1084 return _callout_cancel_or_stop(cc, CALLOUT_STOP, 1); 1085 } 1086 1087 /* 1088 * Destroy the callout. Synchronously cancel any operation in progress, 1089 * clear the INIT flag, and disconnect the internal _callout. The internal 1090 * callout will be safely freed via EXIS. 1091 * 1092 * Upon return, the callout structure may only be reused if re-initialized. 1093 */ 1094 void 1095 callout_terminate(struct callout *cc) 1096 { 1097 struct _callout *c; 1098 1099 exis_hold(); 1100 1101 _callout_cancel_or_stop(cc, CALLOUT_CANCEL, 1); 1102 KKASSERT(cc->flags & CALLOUT_DID_INIT); 1103 atomic_clear_int(&cc->flags, CALLOUT_DID_INIT); 1104 c = atomic_swap_ptr((void *)&cc->toc, NULL); 1105 if (c) { 1106 KKASSERT(c->verifier == cc); 1107 c->verifier = NULL; 1108 _callout_free(c); 1109 } 1110 1111 exis_drop(); 1112 } 1113 1114 /* 1115 * Returns whether a callout is queued and the time has not yet 1116 * arrived (the callout is not yet in-progress). 1117 */ 1118 int 1119 callout_pending(struct callout *cc) 1120 { 1121 struct _callout *c; 1122 1123 /* 1124 * Don't instantiate toc to test pending 1125 */ 1126 if (cc->toc == NULL) 1127 return 0; 1128 c = _callout_gettoc(cc); 1129 if ((c->flags & (CALLOUT_SET | CALLOUT_INPROG)) == CALLOUT_SET) { 1130 spin_unlock(&c->spin); 1131 return 1; 1132 } 1133 spin_unlock(&c->spin); 1134 1135 return 0; 1136 } 1137 1138 /* 1139 * Returns whether a callout is active or not. A callout is active when 1140 * a timeout is set and remains active upon normal termination, even if 1141 * it does not issue a new timeout. A callout is inactive if a timeout has 1142 * never been set or if the callout has been stopped or canceled. The next 1143 * timeout that is set will re-set the active state. 1144 */ 1145 int 1146 callout_active(struct callout *cc) 1147 { 1148 return ((cc->flags & CALLOUT_ACTIVE) ? 1 : 0); 1149 } 1150