1 /* 2 * Copyright (c) 2004,2014,2019 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 /* 35 * Copyright (c) 1982, 1986, 1991, 1993 36 * The Regents of the University of California. All rights reserved. 37 * (c) UNIX System Laboratories, Inc. 38 * All or some portions of this file are derived from material licensed 39 * to the University of California by American Telephone and Telegraph 40 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 41 * the permission of UNIX System Laboratories, Inc. 42 * 43 * Redistribution and use in source and binary forms, with or without 44 * modification, are permitted provided that the following conditions 45 * are met: 46 * 1. Redistributions of source code must retain the above copyright 47 * notice, this list of conditions and the following disclaimer. 48 * 2. Redistributions in binary form must reproduce the above copyright 49 * notice, this list of conditions and the following disclaimer in the 50 * documentation and/or other materials provided with the distribution. 51 * 3. Neither the name of the University nor the names of its contributors 52 * may be used to endorse or promote products derived from this software 53 * without specific prior written permission. 54 * 55 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 56 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 57 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 58 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 59 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 60 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 61 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 62 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 63 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 64 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 65 * SUCH DAMAGE. 66 */ 67 /* 68 * The original callout mechanism was based on the work of Adam M. Costello 69 * and George Varghese, published in a technical report entitled "Redesigning 70 * the BSD Callout and Timer Facilities" and modified slightly for inclusion 71 * in FreeBSD by Justin T. Gibbs. The original work on the data structures 72 * used in this implementation was published by G. Varghese and T. Lauck in 73 * the paper "Hashed and Hierarchical Timing Wheels: Data Structures for 74 * the Efficient Implementation of a Timer Facility" in the Proceedings of 75 * the 11th ACM Annual Symposium on Operating Systems Principles, 76 * Austin, Texas Nov 1987. 77 */ 78 79 #include <sys/param.h> 80 #include <sys/systm.h> 81 #include <sys/spinlock.h> 82 #include <sys/callout.h> 83 #include <sys/kernel.h> 84 #include <sys/malloc.h> 85 #include <sys/interrupt.h> 86 #include <sys/thread.h> 87 #include <sys/sysctl.h> 88 #ifdef CALLOUT_TYPESTABLE 89 #include <sys/typestable.h> 90 #endif 91 #include <vm/vm_extern.h> 92 #include <machine/atomic.h> 93 94 #include <sys/spinlock2.h> 95 #include <sys/thread2.h> 96 #include <sys/mplock2.h> 97 98 TAILQ_HEAD(colist, _callout); 99 struct softclock_pcpu; 100 struct _callout_mag; 101 102 /* 103 * DID_INIT - Sanity check 104 * SYNC - Synchronous waiter, request SYNCDONE and wakeup() 105 * CANCEL_RES - Flags that a cancel/stop prevented a callback 106 * STOP_RES 107 * RESET - Callout_reset request queued 108 * STOP - Callout_stop request queued 109 * INPROG - Softclock_handler thread processing in-progress on callout 110 * SET - Callout is linked to queue (if INPROG not set) 111 * AUTOLOCK - Lockmgr cancelable interlock 112 * MPSAFE - Callout is MPSAFE 113 * CANCEL - callout_cancel requested queued 114 * ACTIVE - active/inactive tracking (see documentation). This is 115 * *NOT* the same as whether a callout is queued or not. 116 */ 117 #define CALLOUT_DID_INIT 0x00000001 /* frontend */ 118 #define CALLOUT_UNUSED0002 0x00000002 119 #define CALLOUT_UNUSED0004 0x00000004 120 #define CALLOUT_CANCEL_RES 0x00000008 /* frontend */ 121 #define CALLOUT_STOP_RES 0x00000010 /* frontend */ 122 #define CALLOUT_RESET 0x00000020 /* backend */ 123 #define CALLOUT_STOP 0x00000040 /* backend */ 124 #define CALLOUT_INPROG 0x00000080 /* backend */ 125 #define CALLOUT_SET 0x00000100 /* backend */ 126 #define CALLOUT_AUTOLOCK 0x00000200 /* both */ 127 #define CALLOUT_MPSAFE 0x00000400 /* both */ 128 #define CALLOUT_CANCEL 0x00000800 /* backend */ 129 #define CALLOUT_ACTIVE 0x00001000 /* frontend */ 130 131 struct wheel { 132 struct spinlock spin; 133 struct colist list; 134 }; 135 136 struct softclock_pcpu { 137 struct wheel *callwheel; 138 struct _callout *running; 139 struct _callout * volatile next; 140 #ifdef CALLOUT_TYPESTABLE 141 struct _callout *quick_obj; 142 #endif 143 int softticks; /* softticks index */ 144 int curticks; /* per-cpu ticks counter */ 145 int isrunning; 146 struct thread thread; 147 }; 148 149 typedef struct softclock_pcpu *softclock_pcpu_t; 150 151 TAILQ_HEAD(maglist, _callout_mag); 152 153 #if 0 154 static int callout_debug = 0; 155 SYSCTL_INT(_debug, OID_AUTO, callout_debug, CTLFLAG_RW, 156 &callout_debug, 0, ""); 157 #endif 158 159 #ifdef CALLOUT_TYPESTABLE 160 static MALLOC_DEFINE(M_CALLOUT, "callouts", "softclock callouts"); 161 #endif 162 163 static int cwheelsize; 164 static int cwheelmask; 165 static softclock_pcpu_t softclock_pcpu_ary[MAXCPU]; 166 #ifdef CALLOUT_TYPESTABLE 167 static struct typestable_glob callout_tsg; 168 #endif 169 170 static void softclock_handler(void *arg); 171 static void slotimer_callback(void *arg); 172 173 #ifdef CALLOUT_TYPESTABLE 174 /* 175 * typestable callback functions. The init function pre-initializes 176 * the structure in order to allow for reuse without complete 177 * reinitialization (i.e. our spinlock). 178 * 179 * The test function allows us to reject an allocation attempt due 180 * to the object being reassociated out-of-band. 181 */ 182 static 183 void 184 _callout_typestable_init(void *obj) 185 { 186 struct _callout *c = obj; 187 188 spin_init(&c->spin, "_callout"); 189 } 190 191 /* 192 * Object may have been reassociated out-of-band. 193 * 194 * Return 1 on success with the spin-lock held, allowing reallocation. 195 * Return 0 on failure with no side effects, rejecting reallocation. 196 */ 197 static 198 int 199 _callout_typestable_test(void *obj) 200 { 201 struct _callout *c = obj; 202 203 if (c->flags & (CALLOUT_SET | CALLOUT_INPROG)) 204 return 0; 205 spin_lock(&c->spin); 206 if (c->flags & (CALLOUT_SET | CALLOUT_INPROG)) { 207 spin_unlock(&c->spin); 208 return 0; 209 } else { 210 return 1; 211 } 212 } 213 214 /* 215 * NOTE: sc might refer to a different cpu. 216 */ 217 static __inline 218 void 219 _callout_typestable_free(softclock_pcpu_t sc, void *obj, int tentitive) 220 { 221 if (tentitive == 0) { 222 obj = atomic_swap_ptr((void *)&sc->quick_obj, obj); 223 if (obj == NULL) 224 return; 225 } 226 typestable_free(&callout_tsg, obj, tentitive); 227 } 228 #endif 229 230 /* 231 * Post-processing helper for a callout executes any pending request. 232 * This routine handles post-processing from the softclock thread and 233 * also handles request processing from the API. 234 * 235 * This routine does not block in any way. 236 * Caller must hold c->spin. 237 * 238 * INPROG - Callback is in-processing / in-progress. 239 * 240 * SET - Assigned to queue or is in-processing. If INPROG is set, 241 * however, the _callout is no longer in the queue. 242 * 243 * RESET - New timeout was installed. 244 * 245 * STOP - Stop requested. 246 * 247 * ACTIVE - Set on callout_reset(), cleared by callout_stop() 248 * or callout_cancel(). Starts out cleared. 249 * 250 * NOTE: Flags can be adjusted without holding c->spin, so atomic ops 251 * must be used at all times. 252 * 253 * NOTE: The passed-in (sc) might refer to another cpu. 254 */ 255 static __inline 256 int 257 _callout_process_spinlocked(struct _callout *c, int fromsoftclock) 258 { 259 struct wheel *wheel; 260 int res = -1; 261 262 /* 263 * If a callback manipulates the callout-in-progress we do 264 * a partial 'completion' of the operation so the operation 265 * can be processed synchronously and tell the softclock_handler 266 * to stop messing with it. 267 */ 268 if (fromsoftclock == 0 && curthread == &c->qsc->thread && 269 c->qsc->running == c) { 270 c->qsc->running = NULL; 271 atomic_clear_int(&c->flags, CALLOUT_SET | 272 CALLOUT_INPROG); 273 } 274 275 /* 276 * Based on source and state 277 */ 278 if (fromsoftclock) { 279 /* 280 * From control thread, INPROG is set, handle pending 281 * request and normal termination. 282 */ 283 #ifdef CALLOUT_TYPESTABLE 284 KASSERT(c->verifier->toc == c, 285 ("callout corrupt: c=%p %s/%d\n", 286 c, c->ident, c->lineno)); 287 #else 288 KASSERT(&c->verifier->toc == c, 289 ("callout corrupt: c=%p %s/%d\n", 290 c, c->ident, c->lineno)); 291 #endif 292 if (c->flags & CALLOUT_CANCEL) { 293 /* 294 * CANCEL overrides everything. 295 * 296 * If a RESET is pending it counts as canceling a 297 * running timer. 298 */ 299 if (c->flags & CALLOUT_RESET) 300 atomic_set_int(&c->verifier->flags, 301 CALLOUT_CANCEL_RES | 302 CALLOUT_STOP_RES); 303 atomic_clear_int(&c->flags, CALLOUT_SET | 304 CALLOUT_INPROG | 305 CALLOUT_STOP | 306 CALLOUT_CANCEL | 307 CALLOUT_RESET); 308 if (c->waiters) 309 wakeup(c->verifier); 310 res = 0; 311 } else if (c->flags & CALLOUT_RESET) { 312 /* 313 * RESET request pending, requeue appropriately. 314 */ 315 atomic_clear_int(&c->flags, CALLOUT_RESET | 316 CALLOUT_INPROG); 317 atomic_set_int(&c->flags, CALLOUT_SET); 318 c->qsc = c->rsc; 319 c->qarg = c->rarg; 320 c->qfunc = c->rfunc; 321 c->qtick = c->rtick; 322 323 /* 324 * Do not queue to current or past wheel or the 325 * callout will be lost for ages. 326 */ 327 wheel = &c->qsc->callwheel[c->qtick & cwheelmask]; 328 spin_lock(&wheel->spin); 329 while (c->qtick - c->qsc->softticks <= 0) { 330 c->qtick = c->qsc->softticks + 1; 331 spin_unlock(&wheel->spin); 332 wheel = &c->qsc->callwheel[c->qtick & 333 cwheelmask]; 334 spin_lock(&wheel->spin); 335 } 336 TAILQ_INSERT_TAIL(&wheel->list, c, entry); 337 spin_unlock(&wheel->spin); 338 } else { 339 /* 340 * STOP request pending or normal termination. Since 341 * this is from our control thread the callout has 342 * already been removed from the queue. 343 */ 344 atomic_clear_int(&c->flags, CALLOUT_SET | 345 CALLOUT_INPROG | 346 CALLOUT_STOP); 347 if (c->waiters) 348 wakeup(c->verifier); 349 res = 1; 350 } 351 } else if (c->flags & CALLOUT_SET) { 352 /* 353 * Process request from an API function. qtick and ACTIVE 354 * are stable while we hold c->spin. Checking INPROG requires 355 * holding wheel->spin. 356 * 357 * If INPROG is set the control thread must handle the request 358 * for us. 359 */ 360 softclock_pcpu_t sc; 361 362 sc = c->qsc; 363 364 wheel = &sc->callwheel[c->qtick & cwheelmask]; 365 spin_lock(&wheel->spin); 366 if (c->flags & CALLOUT_INPROG) { 367 /* 368 * API requests are deferred if a callback is in 369 * progress and will be handled after the callback 370 * returns. 371 */ 372 } else if (c->flags & CALLOUT_CANCEL) { 373 /* 374 * CANCEL request overrides everything except INPROG 375 * (for INPROG the CANCEL is handled upon completion). 376 */ 377 if (sc->next == c) 378 sc->next = TAILQ_NEXT(c, entry); 379 TAILQ_REMOVE(&wheel->list, c, entry); 380 atomic_set_int(&c->verifier->flags, CALLOUT_CANCEL_RES | 381 CALLOUT_STOP_RES); 382 atomic_clear_int(&c->flags, CALLOUT_STOP | 383 CALLOUT_SET | 384 CALLOUT_CANCEL | 385 CALLOUT_RESET); 386 if (c->waiters) 387 wakeup(c->verifier); 388 res = 0; 389 } else if (c->flags & CALLOUT_RESET) { 390 /* 391 * RESET request pending, requeue appropriately. 392 * 393 * (ACTIVE is governed by c->spin so we do not have 394 * to clear it prior to releasing wheel->spin). 395 */ 396 if (sc->next == c) 397 sc->next = TAILQ_NEXT(c, entry); 398 TAILQ_REMOVE(&wheel->list, c, entry); 399 spin_unlock(&wheel->spin); 400 401 atomic_clear_int(&c->flags, CALLOUT_RESET); 402 /* remain ACTIVE */ 403 sc = c->rsc; 404 c->qsc = sc; 405 c->qarg = c->rarg; 406 c->qfunc = c->rfunc; 407 c->qtick = c->rtick; 408 409 /* 410 * Do not queue to current or past wheel or the 411 * callout will be lost for ages. 412 */ 413 wheel = &sc->callwheel[c->qtick & cwheelmask]; 414 spin_lock(&wheel->spin); 415 while (c->qtick - sc->softticks <= 0) { 416 c->qtick = sc->softticks + 1; 417 spin_unlock(&wheel->spin); 418 wheel = &sc->callwheel[c->qtick & cwheelmask]; 419 spin_lock(&wheel->spin); 420 } 421 TAILQ_INSERT_TAIL(&wheel->list, c, entry); 422 } else if (c->flags & CALLOUT_STOP) { 423 /* 424 * STOP request 425 */ 426 if (sc->next == c) 427 sc->next = TAILQ_NEXT(c, entry); 428 TAILQ_REMOVE(&wheel->list, c, entry); 429 atomic_set_int(&c->verifier->flags, CALLOUT_STOP_RES); 430 atomic_clear_int(&c->flags, CALLOUT_STOP | 431 CALLOUT_SET); 432 if (c->waiters) 433 wakeup(c->verifier); 434 res = 1; 435 } else { 436 /* 437 * No request pending (someone else processed the 438 * request before we could) 439 */ 440 /* nop */ 441 } 442 spin_unlock(&wheel->spin); 443 } else { 444 /* 445 * Process request from API function. callout is not 446 * active so there's nothing for us to remove. 447 */ 448 KKASSERT((c->flags & CALLOUT_INPROG) == 0); 449 if (c->flags & CALLOUT_CANCEL) { 450 /* 451 * CANCEL request (nothing to cancel) 452 */ 453 if (c->flags & CALLOUT_RESET) { 454 atomic_set_int(&c->verifier->flags, 455 CALLOUT_CANCEL_RES | 456 CALLOUT_STOP_RES); 457 } 458 atomic_clear_int(&c->flags, CALLOUT_STOP | 459 CALLOUT_CANCEL | 460 CALLOUT_RESET); 461 if (c->waiters) 462 wakeup(c->verifier); 463 res = 0; 464 } else if (c->flags & CALLOUT_RESET) { 465 /* 466 * RESET request pending, queue appropriately. 467 * Do not queue to currently-processing tick. 468 */ 469 softclock_pcpu_t sc; 470 471 sc = c->rsc; 472 atomic_clear_int(&c->flags, CALLOUT_RESET); 473 atomic_set_int(&c->flags, CALLOUT_SET); 474 c->qsc = sc; 475 c->qarg = c->rarg; 476 c->qfunc = c->rfunc; 477 c->qtick = c->rtick; 478 479 /* 480 * Do not queue to current or past wheel or the 481 * callout will be lost for ages. 482 */ 483 wheel = &sc->callwheel[c->qtick & cwheelmask]; 484 spin_lock(&wheel->spin); 485 while (c->qtick - sc->softticks <= 0) { 486 c->qtick = sc->softticks + 1; 487 spin_unlock(&wheel->spin); 488 wheel = &sc->callwheel[c->qtick & cwheelmask]; 489 spin_lock(&wheel->spin); 490 } 491 TAILQ_INSERT_TAIL(&wheel->list, c, entry); 492 spin_unlock(&wheel->spin); 493 } else if (c->flags & CALLOUT_STOP) { 494 /* 495 * STOP request (nothing to stop) 496 */ 497 atomic_clear_int(&c->flags, CALLOUT_STOP); 498 if (c->waiters) 499 wakeup(c->verifier); 500 res = 1; 501 } else { 502 /* 503 * No request pending (someone else processed the 504 * request before we could) 505 */ 506 /* nop */ 507 } 508 } 509 return res; 510 } 511 512 /* 513 * System init 514 */ 515 static void 516 swi_softclock_setup(void *arg) 517 { 518 int cpu; 519 int i; 520 int target; 521 522 /* 523 * Figure out how large a callwheel we need. It must be a power of 2. 524 * 525 * ncallout is primarily based on available memory, don't explode 526 * the allocations if the system has a lot of cpus. 527 */ 528 target = ncallout / ncpus + 16; 529 530 cwheelsize = 1; 531 while (cwheelsize < target) 532 cwheelsize <<= 1; 533 cwheelmask = cwheelsize - 1; 534 535 #ifdef CALLOUT_TYPESTABLE 536 typestable_init_glob(&callout_tsg, M_CALLOUT, 537 sizeof(struct _callout), 538 _callout_typestable_test, 539 _callout_typestable_init); 540 #endif 541 542 /* 543 * Initialize per-cpu data structures. 544 */ 545 for (cpu = 0; cpu < ncpus; ++cpu) { 546 softclock_pcpu_t sc; 547 int wheel_sz; 548 549 sc = (void *)kmem_alloc3(&kernel_map, sizeof(*sc), 550 VM_SUBSYS_GD, KM_CPU(cpu)); 551 memset(sc, 0, sizeof(*sc)); 552 softclock_pcpu_ary[cpu] = sc; 553 554 wheel_sz = sizeof(*sc->callwheel) * cwheelsize; 555 sc->callwheel = (void *)kmem_alloc3(&kernel_map, wheel_sz, 556 VM_SUBSYS_GD, KM_CPU(cpu)); 557 memset(sc->callwheel, 0, wheel_sz); 558 for (i = 0; i < cwheelsize; ++i) { 559 spin_init(&sc->callwheel[i].spin, "wheel"); 560 TAILQ_INIT(&sc->callwheel[i].list); 561 } 562 563 /* 564 * Mark the softclock handler as being an interrupt thread 565 * even though it really isn't, but do not allow it to 566 * preempt other threads (do not assign td_preemptable). 567 * 568 * Kernel code now assumes that callouts do not preempt 569 * the cpu they were scheduled on. 570 */ 571 lwkt_create(softclock_handler, sc, NULL, &sc->thread, 572 TDF_NOSTART | TDF_INTTHREAD, 573 cpu, "softclock %d", cpu); 574 } 575 } 576 577 /* 578 * Must occur after ncpus has been initialized. 579 */ 580 SYSINIT(softclock_setup, SI_BOOT2_SOFTCLOCK, SI_ORDER_SECOND, 581 swi_softclock_setup, NULL); 582 583 /* 584 * This routine is called from the hardclock() (basically a FASTint/IPI) on 585 * each cpu in the system. sc->curticks is this cpu's notion of the timebase. 586 * It IS NOT NECESSARILY SYNCHRONIZED WITH 'ticks'! sc->softticks is where 587 * the callwheel is currently indexed. 588 * 589 * sc->softticks is adjusted by either this routine or our helper thread 590 * depending on whether the helper thread is running or not. 591 * 592 * sc->curticks and sc->softticks are adjusted using atomic ops in order 593 * to ensure that remote cpu callout installation does not race the thread. 594 */ 595 void 596 hardclock_softtick(globaldata_t gd) 597 { 598 softclock_pcpu_t sc; 599 struct wheel *wheel; 600 601 sc = softclock_pcpu_ary[gd->gd_cpuid]; 602 atomic_add_int(&sc->curticks, 1); 603 if (sc->isrunning) 604 return; 605 if (sc->softticks == sc->curticks) { 606 /* 607 * In sync, only wakeup the thread if there is something to 608 * do. 609 */ 610 wheel = &sc->callwheel[sc->softticks & cwheelmask]; 611 spin_lock(&wheel->spin); 612 if (TAILQ_FIRST(&wheel->list)) { 613 sc->isrunning = 1; 614 spin_unlock(&wheel->spin); 615 lwkt_schedule(&sc->thread); 616 } else { 617 atomic_add_int(&sc->softticks, 1); 618 spin_unlock(&wheel->spin); 619 } 620 } else { 621 /* 622 * out of sync, wakeup the thread unconditionally so it can 623 * catch up. 624 */ 625 sc->isrunning = 1; 626 lwkt_schedule(&sc->thread); 627 } 628 } 629 630 /* 631 * This procedure is the main loop of our per-cpu helper thread. The 632 * sc->isrunning flag prevents us from racing hardclock_softtick(). 633 * 634 * The thread starts with the MP lock released and not in a critical 635 * section. The loop itself is MP safe while individual callbacks 636 * may or may not be, so we obtain or release the MP lock as appropriate. 637 */ 638 static void 639 softclock_handler(void *arg) 640 { 641 softclock_pcpu_t sc; 642 struct _callout *c; 643 struct wheel *wheel; 644 struct callout slotimer; 645 int mpsafe = 1; 646 647 /* 648 * Setup pcpu slow clocks which we want to run from the callout 649 * thread. 650 */ 651 callout_init_mp(&slotimer); 652 callout_reset(&slotimer, hz * 10, slotimer_callback, &slotimer); 653 654 /* 655 * Run the callout thread at the same priority as other kernel 656 * threads so it can be round-robined. 657 */ 658 /*lwkt_setpri_self(TDPRI_SOFT_NORM);*/ 659 660 sc = arg; 661 loop: 662 while (sc->softticks != (int)(sc->curticks + 1)) { 663 wheel = &sc->callwheel[sc->softticks & cwheelmask]; 664 665 spin_lock(&wheel->spin); 666 sc->next = TAILQ_FIRST(&wheel->list); 667 while ((c = sc->next) != NULL) { 668 int error; 669 int res; 670 671 /* 672 * Match callouts for this tick. The wheel spinlock 673 * is sufficient to set INPROG. Once set, other 674 * threads can make only limited changes to (c) 675 */ 676 sc->next = TAILQ_NEXT(c, entry); 677 if (c->qtick != sc->softticks) 678 continue; 679 TAILQ_REMOVE(&wheel->list, c, entry); 680 atomic_set_int(&c->flags, CALLOUT_INPROG); 681 sc->running = c; 682 spin_unlock(&wheel->spin); 683 684 /* 685 * legacy mplock support 686 */ 687 if (c->flags & CALLOUT_MPSAFE) { 688 if (mpsafe == 0) { 689 mpsafe = 1; 690 rel_mplock(); 691 } 692 } else { 693 if (mpsafe) { 694 mpsafe = 0; 695 get_mplock(); 696 } 697 } 698 699 /* 700 * Execute function (protected by INPROG) 701 */ 702 if (c->flags & (CALLOUT_STOP | CALLOUT_CANCEL)) { 703 /* 704 * Raced a stop or cancel request, do 705 * not execute. The processing code 706 * thinks its a normal completion so 707 * flag the fact that cancel/stop actually 708 * prevented a callout here. 709 */ 710 if (c->flags & CALLOUT_CANCEL) { 711 atomic_set_int(&c->verifier->flags, 712 CALLOUT_CANCEL_RES | 713 CALLOUT_STOP_RES); 714 } else if (c->flags & CALLOUT_STOP) { 715 atomic_set_int(&c->verifier->flags, 716 CALLOUT_STOP_RES); 717 } 718 } else if (c->flags & CALLOUT_RESET) { 719 /* 720 * A RESET raced, make it seem like it 721 * didn't. Do nothing here and let the 722 * process routine requeue us. 723 */ 724 } else if (c->flags & CALLOUT_AUTOLOCK) { 725 /* 726 * Interlocked cancelable call. If the 727 * lock gets canceled we have to flag the 728 * fact that the cancel/stop actually 729 * prevented the callout here. 730 */ 731 error = lockmgr(c->lk, LK_EXCLUSIVE | 732 LK_CANCELABLE); 733 if (error == 0) { 734 c->qfunc(c->qarg); 735 lockmgr(c->lk, LK_RELEASE); 736 } else if (c->flags & CALLOUT_CANCEL) { 737 atomic_set_int(&c->verifier->flags, 738 CALLOUT_CANCEL_RES | 739 CALLOUT_STOP_RES); 740 } else if (c->flags & CALLOUT_STOP) { 741 atomic_set_int(&c->verifier->flags, 742 CALLOUT_STOP_RES); 743 } 744 } else { 745 /* 746 * Normal call 747 */ 748 c->qfunc(c->qarg); 749 } 750 751 if (sc->running == c) { 752 /* 753 * We are still INPROG so (c) remains valid, but 754 * the callout is now governed by its internal 755 * spin-lock. 756 */ 757 spin_lock(&c->spin); 758 res = _callout_process_spinlocked(c, 1); 759 spin_unlock(&c->spin); 760 #ifdef CALLOUT_TYPESTABLE 761 if (res >= 0) 762 _callout_typestable_free(sc, c, res); 763 #endif 764 } 765 spin_lock(&wheel->spin); 766 } 767 sc->running = NULL; 768 spin_unlock(&wheel->spin); 769 atomic_add_int(&sc->softticks, 1); 770 } 771 772 /* 773 * Don't leave us holding the MP lock when we deschedule ourselves. 774 */ 775 if (mpsafe == 0) { 776 mpsafe = 1; 777 rel_mplock(); 778 } 779 780 /* 781 * Recheck in critical section to interlock against hardlock 782 */ 783 crit_enter(); 784 if (sc->softticks == (int)(sc->curticks + 1)) { 785 sc->isrunning = 0; 786 lwkt_deschedule_self(&sc->thread); /* == curthread */ 787 lwkt_switch(); 788 } 789 crit_exit(); 790 goto loop; 791 /* NOT REACHED */ 792 } 793 794 /* 795 * A very slow system cleanup timer (10 second interval), 796 * per-cpu. 797 */ 798 void 799 slotimer_callback(void *arg) 800 { 801 struct callout *c = arg; 802 803 slab_cleanup(); 804 callout_reset(c, hz * 10, slotimer_callback, c); 805 } 806 807 /* 808 * API FUNCTIONS 809 */ 810 811 /* 812 * Prepare a callout structure for use by callout_reset() and/or 813 * callout_stop(). 814 * 815 * The MP version of this routine requires that the callback 816 * function installed by callout_reset() be MP safe. 817 * 818 * The LK version of this routine is also MPsafe and will automatically 819 * acquire the specified lock for the duration of the function call, 820 * and release it after the function returns. In addition, when autolocking 821 * is used, callout_stop() becomes synchronous if the caller owns the lock. 822 * callout_reset(), callout_stop(), and callout_cancel() will block 823 * normally instead of spinning when a cpu race occurs. Lock cancelation 824 * is used to avoid deadlocks against the callout ring dispatch. 825 * 826 * The init functions can be called from any cpu and do not have to be 827 * called from the cpu that the timer will eventually run on. 828 */ 829 static __inline void 830 _callout_setup(struct callout *cc, int flags CALLOUT_DEBUG_ARGS) 831 { 832 bzero(cc, sizeof(*cc)); 833 cc->flags = flags; /* frontend flags */ 834 #ifdef CALLOUT_DEBUG 835 #ifdef CALLOUT_TYPESTABLE 836 cc->ident = ident; 837 cc->lineno = lineno; 838 #else 839 cc->toc.verifier = cc; /* corruption detector */ 840 cc->toc.ident = ident; 841 cc->toc.lineno = lineno; 842 cc->toc.flags = flags; /* backend flags */ 843 #endif 844 #endif 845 } 846 847 /* 848 * Associate an internal _callout with the external callout and 849 * verify that the type-stable structure is still applicable (inactive 850 * type-stable _callouts might have been reused for a different callout). 851 * If not, a new internal structure will be allocated. 852 * 853 * Returns the _callout already spin-locked. 854 */ 855 static __inline 856 struct _callout * 857 _callout_gettoc(struct callout *cc) 858 { 859 struct _callout *c; 860 #ifdef CALLOUT_TYPESTABLE 861 softclock_pcpu_t sc; 862 863 KKASSERT(cc->flags & CALLOUT_DID_INIT); 864 for (;;) { 865 c = cc->toc; 866 cpu_ccfence(); 867 if (c == NULL) { 868 sc = softclock_pcpu_ary[mycpu->gd_cpuid]; 869 c = atomic_swap_ptr((void *)&sc->quick_obj, NULL); 870 if (c == NULL || _callout_typestable_test(c) == 0) 871 c = typestable_alloc(&callout_tsg); 872 /* returns spin-locked */ 873 c->verifier = cc; 874 c->flags = cc->flags; 875 c->lk = cc->lk; 876 c->ident = cc->ident; 877 c->lineno = cc->lineno; 878 if (atomic_cmpset_ptr(&cc->toc, NULL, c)) { 879 break; 880 } 881 c->verifier = NULL; 882 spin_unlock(&c->spin); 883 _callout_typestable_free(sc, c, 0); 884 } else { 885 spin_lock(&c->spin); 886 if (c->verifier == cc) 887 break; 888 spin_unlock(&c->spin); 889 /* ok if atomic op fails */ 890 (void)atomic_cmpset_ptr(&cc->toc, c, NULL); 891 } 892 } 893 #else 894 c = &cc->toc; 895 spin_lock(&c->spin); 896 #endif 897 /* returns with spin-lock held */ 898 return c; 899 } 900 901 /* 902 * Macrod in sys/callout.h for debugging 903 * 904 * WARNING! tsleep() assumes this will not block 905 */ 906 void 907 _callout_init(struct callout *cc CALLOUT_DEBUG_ARGS) 908 { 909 _callout_setup(cc, CALLOUT_DID_INIT 910 CALLOUT_DEBUG_PASSTHRU); 911 } 912 913 void 914 _callout_init_mp(struct callout *cc CALLOUT_DEBUG_ARGS) 915 { 916 _callout_setup(cc, CALLOUT_DID_INIT | CALLOUT_MPSAFE 917 CALLOUT_DEBUG_PASSTHRU); 918 } 919 920 void 921 _callout_init_lk(struct callout *cc, struct lock *lk CALLOUT_DEBUG_ARGS) 922 { 923 _callout_setup(cc, CALLOUT_DID_INIT | CALLOUT_MPSAFE | 924 CALLOUT_AUTOLOCK 925 CALLOUT_DEBUG_PASSTHRU); 926 #ifdef CALLOUT_TYPESTABLE 927 cc->lk = lk; 928 #else 929 cc->toc.lk = lk; 930 #endif 931 } 932 933 /* 934 * Start or restart a timeout. New timeouts can be installed while the 935 * current one is running. 936 * 937 * Start or restart a timeout. Installs the callout structure on the 938 * callwheel of the current cpu. Callers may legally pass any value, even 939 * if 0 or negative, but since the sc->curticks index may have already 940 * been processed a minimum timeout of 1 tick will be enforced. 941 * 942 * This function will not deadlock against a running call. 943 * 944 * WARNING! tsleep() assumes this will not block 945 */ 946 void 947 callout_reset(struct callout *cc, int to_ticks, void (*ftn)(void *), void *arg) 948 { 949 softclock_pcpu_t sc; 950 struct _callout *c; 951 int res; 952 953 atomic_set_int(&cc->flags, CALLOUT_ACTIVE); 954 c = _callout_gettoc(cc); 955 956 /* 957 * Set RESET. Do not clear STOP here (let the process code do it). 958 */ 959 atomic_set_int(&c->flags, CALLOUT_RESET); 960 sc = softclock_pcpu_ary[mycpu->gd_cpuid]; 961 c->rsc = sc; 962 c->rtick = sc->curticks + to_ticks; 963 c->rfunc = ftn; 964 c->rarg = arg; 965 #ifdef CALLOUT_TYPESTABLE 966 cc->arg = arg; /* only used by callout_arg() */ 967 #endif 968 res = _callout_process_spinlocked(c, 0); 969 spin_unlock(&c->spin); 970 #ifdef CALLOUT_TYPESTABLE 971 if (res >= 0) 972 _callout_typestable_free(sc, c, res); 973 #endif 974 } 975 976 /* 977 * Same as callout_reset() but the timeout will run on a particular cpu. 978 */ 979 void 980 callout_reset_bycpu(struct callout *cc, int to_ticks, void (*ftn)(void *), 981 void *arg, int cpuid) 982 { 983 softclock_pcpu_t sc; 984 struct _callout *c; 985 int res; 986 987 atomic_set_int(&cc->flags, CALLOUT_ACTIVE); 988 c = _callout_gettoc(cc); 989 990 /* 991 * Set RESET. Do not clear STOP here (let the process code do it). 992 */ 993 atomic_set_int(&c->flags, CALLOUT_RESET); 994 995 sc = softclock_pcpu_ary[cpuid]; 996 c->rsc = sc; 997 c->rtick = sc->curticks + to_ticks; 998 c->rfunc = ftn; 999 c->rarg = arg; 1000 #ifdef CALLOUT_TYPESTABLE 1001 cc->arg = arg; /* only used by callout_arg() */ 1002 #endif 1003 res = _callout_process_spinlocked(c, 0); 1004 spin_unlock(&c->spin); 1005 #ifdef CALLOUT_TYPESTABLE 1006 if (res >= 0) 1007 _callout_typestable_free(sc, c, res); 1008 #endif 1009 } 1010 1011 static __inline 1012 void 1013 _callout_cancel_or_stop(struct callout *cc, uint32_t flags) 1014 { 1015 struct _callout *c; 1016 softclock_pcpu_t sc; 1017 int res; 1018 1019 #ifdef CALLOUT_TYPESTABLE 1020 if (cc->toc == NULL || cc->toc->verifier != cc) 1021 return; 1022 #else 1023 KKASSERT(cc->toc.verifier == cc); 1024 #endif 1025 /* 1026 * Setup for synchronous 1027 */ 1028 atomic_clear_int(&cc->flags, CALLOUT_ACTIVE); 1029 c = _callout_gettoc(cc); 1030 1031 /* 1032 * Set STOP or CANCEL request. If this is a STOP, clear a queued 1033 * RESET now. 1034 */ 1035 atomic_set_int(&c->flags, flags); 1036 if (flags & CALLOUT_STOP) { 1037 if (c->flags & CALLOUT_RESET) { 1038 atomic_set_int(&cc->flags, CALLOUT_STOP_RES); 1039 atomic_clear_int(&c->flags, CALLOUT_RESET); 1040 } 1041 } 1042 sc = softclock_pcpu_ary[mycpu->gd_cpuid]; 1043 res = _callout_process_spinlocked(c, 0); 1044 spin_unlock(&c->spin); 1045 #ifdef CALLOUT_TYPESTABLE 1046 if (res >= 0) 1047 _callout_typestable_free(sc, c, res); 1048 #endif 1049 1050 /* 1051 * Wait for the CANCEL or STOP to finish. 1052 * 1053 * WARNING! (c) can go stale now, so do not use (c) after this 1054 * point. XXX 1055 */ 1056 if (c->flags & flags) { 1057 atomic_add_int(&c->waiters, 1); 1058 #ifdef CALLOUT_TYPESTABLE 1059 if (cc->flags & CALLOUT_AUTOLOCK) 1060 lockmgr(cc->lk, LK_CANCEL_BEG); 1061 #else 1062 if (cc->flags & CALLOUT_AUTOLOCK) 1063 lockmgr(c->lk, LK_CANCEL_BEG); 1064 #endif 1065 for (;;) { 1066 tsleep_interlock(cc, 0); 1067 if ((atomic_fetchadd_int(&c->flags, 0) & flags) == 0) 1068 break; 1069 tsleep(cc, PINTERLOCKED, "costp", 0); 1070 } 1071 #ifdef CALLOUT_TYPESTABLE 1072 if (cc->flags & CALLOUT_AUTOLOCK) 1073 lockmgr(cc->lk, LK_CANCEL_END); 1074 #else 1075 if (cc->flags & CALLOUT_AUTOLOCK) 1076 lockmgr(c->lk, LK_CANCEL_END); 1077 #endif 1078 atomic_add_int(&c->waiters, -1); 1079 } 1080 KKASSERT(cc->toc.verifier == cc); 1081 } 1082 1083 /* 1084 * This is a synchronous STOP which cancels the callout. If AUTOLOCK 1085 * then a CANCEL will be issued to the lock holder. Unlike STOP, the 1086 * cancel function prevents any new callout_reset()s from being issued 1087 * in addition to canceling the lock. The lock will also be deactivated. 1088 * 1089 * Returns 0 if the callout was not active (or was active and completed, 1090 * but didn't try to start a new timeout). 1091 * Returns 1 if the cancel is responsible for stopping the callout. 1092 */ 1093 int 1094 callout_cancel(struct callout *cc) 1095 { 1096 atomic_clear_int(&cc->flags, CALLOUT_CANCEL_RES); 1097 _callout_cancel_or_stop(cc, CALLOUT_CANCEL); 1098 1099 return ((cc->flags & CALLOUT_CANCEL_RES) ? 1 : 0); 1100 } 1101 1102 /* 1103 * Currently the same as callout_cancel. Ultimately we may wish the 1104 * drain function to allow a pending callout to proceed, but for now 1105 * we will attempt to to cancel it. 1106 * 1107 * Returns 0 if the callout was not active (or was active and completed, 1108 * but didn't try to start a new timeout). 1109 * Returns 1 if the drain is responsible for stopping the callout. 1110 */ 1111 int 1112 callout_drain(struct callout *cc) 1113 { 1114 atomic_clear_int(&cc->flags, CALLOUT_CANCEL_RES); 1115 _callout_cancel_or_stop(cc, CALLOUT_CANCEL); 1116 1117 return ((cc->flags & CALLOUT_CANCEL_RES) ? 1 : 0); 1118 } 1119 1120 /* 1121 * Stops a callout if it is pending or queued, does not block. 1122 * This function does not interlock against a callout that is in-progress. 1123 * 1124 * Returns whether the STOP operation was responsible for removing a 1125 * queued or pending callout. 1126 */ 1127 int 1128 callout_stop_async(struct callout *cc) 1129 { 1130 softclock_pcpu_t sc; 1131 struct _callout *c; 1132 uint32_t flags; 1133 int res; 1134 1135 atomic_clear_int(&cc->flags, CALLOUT_STOP_RES | CALLOUT_ACTIVE); 1136 #ifdef CALLOUT_TYPESTABLE 1137 if (cc->toc == NULL || cc->toc->verifier != cc) 1138 return 0; 1139 #else 1140 KKASSERT(cc->toc.verifier == cc); 1141 #endif 1142 c = _callout_gettoc(cc); 1143 1144 /* 1145 * Set STOP or CANCEL request. If this is a STOP, clear a queued 1146 * RESET now. 1147 */ 1148 atomic_set_int(&c->flags, CALLOUT_STOP); 1149 if (c->flags & CALLOUT_RESET) { 1150 atomic_set_int(&cc->flags, CALLOUT_STOP_RES); 1151 atomic_clear_int(&c->flags, CALLOUT_RESET); 1152 } 1153 sc = softclock_pcpu_ary[mycpu->gd_cpuid]; 1154 res = _callout_process_spinlocked(c, 0); 1155 flags = cc->flags; 1156 spin_unlock(&c->spin); 1157 #ifdef CALLOUT_TYPESTABLE 1158 if (res >= 0) 1159 _callout_typestable_free(sc, c, res); 1160 #endif 1161 1162 return ((flags & CALLOUT_STOP_RES) ? 1 : 0); 1163 } 1164 1165 /* 1166 * Callout deactivate merely clears the CALLOUT_ACTIVE bit 1167 * Stops a callout if it is pending or queued, does not block. 1168 * This function does not interlock against a callout that is in-progress. 1169 */ 1170 void 1171 callout_deactivate(struct callout *cc) 1172 { 1173 atomic_clear_int(&cc->flags, CALLOUT_ACTIVE); 1174 } 1175 1176 /* 1177 * lock-aided callouts are STOPped synchronously using STOP semantics 1178 * (meaning that another thread can start the callout again before we 1179 * return). 1180 * 1181 * non-lock-aided callouts 1182 * 1183 * Stops a callout if it is pending or queued, does not block. 1184 * This function does not interlock against a callout that is in-progress. 1185 */ 1186 int 1187 callout_stop(struct callout *cc) 1188 { 1189 if (cc->flags & CALLOUT_AUTOLOCK) { 1190 atomic_clear_int(&cc->flags, CALLOUT_STOP_RES); 1191 _callout_cancel_or_stop(cc, CALLOUT_STOP); 1192 return ((cc->flags & CALLOUT_STOP_RES) ? 1 : 0); 1193 } else { 1194 return callout_stop_async(cc); 1195 } 1196 } 1197 1198 /* 1199 * Terminates a callout by canceling operations and then clears the 1200 * INIT bit. Upon return, the callout structure must not be used. 1201 */ 1202 void 1203 callout_terminate(struct callout *cc) 1204 { 1205 _callout_cancel_or_stop(cc, CALLOUT_CANCEL); 1206 atomic_clear_int(&cc->flags, CALLOUT_DID_INIT); 1207 #ifdef CALLOUT_TYPESTABLE 1208 atomic_swap_ptr((void *)&cc->toc, NULL); 1209 #else 1210 cc->toc.verifier = NULL; 1211 #endif 1212 } 1213 1214 /* 1215 * Returns whether a callout is queued and the time has not yet 1216 * arrived (the callout is not yet in-progress). 1217 */ 1218 int 1219 callout_pending(struct callout *cc) 1220 { 1221 struct _callout *c; 1222 int res = 0; 1223 1224 /* 1225 * Don't instantiate toc to test pending 1226 */ 1227 #ifdef CALLOUT_TYPESTABLE 1228 if ((c = cc->toc) != NULL) { 1229 #else 1230 c = &cc->toc; 1231 KKASSERT(c->verifier == cc); 1232 { 1233 #endif 1234 spin_lock(&c->spin); 1235 if (c->verifier == cc) { 1236 res = ((c->flags & (CALLOUT_SET|CALLOUT_INPROG)) == 1237 CALLOUT_SET); 1238 } 1239 spin_unlock(&c->spin); 1240 } 1241 return res; 1242 } 1243 1244 /* 1245 * Returns whether a callout is active or not. A callout is active when 1246 * a timeout is set and remains active upon normal termination, even if 1247 * it does not issue a new timeout. A callout is inactive if a timeout has 1248 * never been set or if the callout has been stopped or canceled. The next 1249 * timeout that is set will re-set the active state. 1250 */ 1251 int 1252 callout_active(struct callout *cc) 1253 { 1254 return ((cc->flags & CALLOUT_ACTIVE) ? 1 : 0); 1255 } 1256