1 /* 2 * Copyright (c) 2004,2014 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 /* 35 * Copyright (c) 1982, 1986, 1991, 1993 36 * The Regents of the University of California. All rights reserved. 37 * (c) UNIX System Laboratories, Inc. 38 * All or some portions of this file are derived from material licensed 39 * to the University of California by American Telephone and Telegraph 40 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 41 * the permission of UNIX System Laboratories, Inc. 42 * 43 * Redistribution and use in source and binary forms, with or without 44 * modification, are permitted provided that the following conditions 45 * are met: 46 * 1. Redistributions of source code must retain the above copyright 47 * notice, this list of conditions and the following disclaimer. 48 * 2. Redistributions in binary form must reproduce the above copyright 49 * notice, this list of conditions and the following disclaimer in the 50 * documentation and/or other materials provided with the distribution. 51 * 3. Neither the name of the University nor the names of its contributors 52 * may be used to endorse or promote products derived from this software 53 * without specific prior written permission. 54 * 55 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 56 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 57 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 58 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 59 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 60 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 61 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 62 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 63 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 64 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 65 * SUCH DAMAGE. 66 */ 67 /* 68 * The original callout mechanism was based on the work of Adam M. Costello 69 * and George Varghese, published in a technical report entitled "Redesigning 70 * the BSD Callout and Timer Facilities" and modified slightly for inclusion 71 * in FreeBSD by Justin T. Gibbs. The original work on the data structures 72 * used in this implementation was published by G. Varghese and T. Lauck in 73 * the paper "Hashed and Hierarchical Timing Wheels: Data Structures for 74 * the Efficient Implementation of a Timer Facility" in the Proceedings of 75 * the 11th ACM Annual Symposium on Operating Systems Principles, 76 * Austin, Texas Nov 1987. 77 * 78 * The per-cpu augmentation was done by Matthew Dillon. 79 */ 80 81 #include <sys/param.h> 82 #include <sys/systm.h> 83 #include <sys/callout.h> 84 #include <sys/kernel.h> 85 #include <sys/interrupt.h> 86 #include <sys/thread.h> 87 88 #include <sys/thread2.h> 89 #include <sys/mplock2.h> 90 91 struct softclock_pcpu { 92 struct callout_tailq *callwheel; 93 struct callout * volatile next; 94 intptr_t running; /* NOTE! Bit 0 used to flag wakeup */ 95 int softticks; /* softticks index */ 96 int curticks; /* per-cpu ticks counter */ 97 int isrunning; 98 struct thread thread; 99 }; 100 101 typedef struct softclock_pcpu *softclock_pcpu_t; 102 103 static MALLOC_DEFINE(M_CALLOUT, "callout", "callout structures"); 104 static int cwheelsize; 105 static int cwheelmask; 106 static struct softclock_pcpu softclock_pcpu_ary[MAXCPU]; 107 108 static void softclock_handler(void *arg); 109 static void slotimer_callback(void *arg); 110 static void callout_reset_ipi(void *arg); 111 static void callout_stop_ipi(void *arg, int issync, struct intrframe *frame); 112 113 114 static void 115 swi_softclock_setup(void *arg) 116 { 117 int cpu; 118 int i; 119 int target; 120 121 /* 122 * Figure out how large a callwheel we need. It must be a power of 2. 123 * 124 * ncallout is primarily based on available memory, don't explode 125 * the allocations if the system has a lot of cpus. 126 */ 127 target = ncallout / ncpus + 16; 128 129 cwheelsize = 1; 130 while (cwheelsize < target) 131 cwheelsize <<= 1; 132 cwheelmask = cwheelsize - 1; 133 134 /* 135 * Initialize per-cpu data structures. 136 */ 137 for (cpu = 0; cpu < ncpus; ++cpu) { 138 softclock_pcpu_t sc; 139 140 sc = &softclock_pcpu_ary[cpu]; 141 142 sc->callwheel = kmalloc(sizeof(*sc->callwheel) * cwheelsize, 143 M_CALLOUT, M_WAITOK|M_ZERO); 144 for (i = 0; i < cwheelsize; ++i) 145 TAILQ_INIT(&sc->callwheel[i]); 146 147 /* 148 * Mark the softclock handler as being an interrupt thread 149 * even though it really isn't, but do not allow it to 150 * preempt other threads (do not assign td_preemptable). 151 * 152 * Kernel code now assumes that callouts do not preempt 153 * the cpu they were scheduled on. 154 */ 155 lwkt_create(softclock_handler, sc, NULL, 156 &sc->thread, TDF_NOSTART | TDF_INTTHREAD, 157 cpu, "softclock %d", cpu); 158 } 159 } 160 161 /* 162 * Must occur after ncpus has been initialized. 163 */ 164 SYSINIT(softclock_setup, SI_BOOT2_SOFTCLOCK, SI_ORDER_SECOND, 165 swi_softclock_setup, NULL); 166 167 /* 168 * Clear PENDING and, if possible, also clear ARMED and WAITING. Returns 169 * the flags prior to the clear, atomically (used to check for WAITING). 170 * 171 * Clearing the cpu association (ARMED) can significantly improve the 172 * performance of the next callout_reset*() call. 173 */ 174 static __inline 175 int 176 callout_unpend_disarm(struct callout *c) 177 { 178 int flags; 179 int nflags; 180 181 for (;;) { 182 flags = c->c_flags; 183 cpu_ccfence(); 184 nflags = flags & ~(CALLOUT_PENDING | CALLOUT_WAITING); 185 if ((flags & CALLOUT_IPI_MASK) == 0) 186 nflags &= ~CALLOUT_ARMED; 187 if (atomic_cmpset_int(&c->c_flags, flags, nflags)) { 188 break; 189 } 190 cpu_pause(); 191 /* retry */ 192 } 193 return flags; 194 } 195 196 /* 197 * Clear ARMED after finishing adjustments to the callout, potentially 198 * allowing other cpus to take over. We can only do this if the IPI mask 199 * is 0. 200 */ 201 static __inline 202 int 203 callout_maybe_clear_armed(struct callout *c) 204 { 205 int flags; 206 int nflags; 207 208 for (;;) { 209 flags = c->c_flags; 210 cpu_ccfence(); 211 if (flags & (CALLOUT_PENDING | CALLOUT_IPI_MASK)) 212 break; 213 nflags = flags & ~CALLOUT_ARMED; 214 if (atomic_cmpset_int(&c->c_flags, flags, nflags)) 215 break; 216 cpu_pause(); 217 /* retry */ 218 } 219 return flags; 220 } 221 222 /* 223 * This routine is called from the hardclock() (basically a FASTint/IPI) on 224 * each cpu in the system. sc->curticks is this cpu's notion of the timebase. 225 * It IS NOT NECESSARILY SYNCHRONIZED WITH 'ticks'! sc->softticks is where 226 * the callwheel is currently indexed. 227 * 228 * WARNING! The MP lock is not necessarily held on call, nor can it be 229 * safely obtained. 230 * 231 * sc->softticks is adjusted by either this routine or our helper thread 232 * depending on whether the helper thread is running or not. 233 */ 234 void 235 hardclock_softtick(globaldata_t gd) 236 { 237 softclock_pcpu_t sc; 238 239 sc = &softclock_pcpu_ary[gd->gd_cpuid]; 240 ++sc->curticks; 241 if (sc->isrunning) 242 return; 243 if (sc->softticks == sc->curticks) { 244 /* 245 * In sync, only wakeup the thread if there is something to 246 * do. 247 */ 248 if (TAILQ_FIRST(&sc->callwheel[sc->softticks & cwheelmask])) { 249 sc->isrunning = 1; 250 lwkt_schedule(&sc->thread); 251 } else { 252 ++sc->softticks; 253 } 254 } else { 255 /* 256 * out of sync, wakeup the thread unconditionally so it can 257 * catch up. 258 */ 259 sc->isrunning = 1; 260 lwkt_schedule(&sc->thread); 261 } 262 } 263 264 /* 265 * This procedure is the main loop of our per-cpu helper thread. The 266 * sc->isrunning flag prevents us from racing hardclock_softtick() and 267 * a critical section is sufficient to interlock sc->curticks and protect 268 * us from remote IPI's / list removal. 269 * 270 * The thread starts with the MP lock released and not in a critical 271 * section. The loop itself is MP safe while individual callbacks 272 * may or may not be, so we obtain or release the MP lock as appropriate. 273 */ 274 static void 275 softclock_handler(void *arg) 276 { 277 softclock_pcpu_t sc; 278 struct callout *c; 279 struct callout_tailq *bucket; 280 struct callout slotimer; 281 int mpsafe = 1; 282 int flags; 283 284 /* 285 * Setup pcpu slow clocks which we want to run from the callout 286 * thread. 287 */ 288 callout_init_mp(&slotimer); 289 callout_reset(&slotimer, hz * 10, slotimer_callback, &slotimer); 290 291 /* 292 * Run the callout thread at the same priority as other kernel 293 * threads so it can be round-robined. 294 */ 295 /*lwkt_setpri_self(TDPRI_SOFT_NORM);*/ 296 297 /* 298 * Loop critical section against ipi operations to this cpu. 299 */ 300 sc = arg; 301 crit_enter(); 302 loop: 303 while (sc->softticks != (int)(sc->curticks + 1)) { 304 bucket = &sc->callwheel[sc->softticks & cwheelmask]; 305 306 for (c = TAILQ_FIRST(bucket); c; c = sc->next) { 307 if (c->c_time != sc->softticks) { 308 sc->next = TAILQ_NEXT(c, c_links.tqe); 309 continue; 310 } 311 312 flags = c->c_flags; 313 if (flags & CALLOUT_MPSAFE) { 314 if (mpsafe == 0) { 315 mpsafe = 1; 316 rel_mplock(); 317 } 318 } else { 319 /* 320 * The request might be removed while we 321 * are waiting to get the MP lock. If it 322 * was removed sc->next will point to the 323 * next valid request or NULL, loop up. 324 */ 325 if (mpsafe) { 326 mpsafe = 0; 327 sc->next = c; 328 get_mplock(); 329 if (c != sc->next) 330 continue; 331 } 332 } 333 334 /* 335 * Queue protection only exists while we hold the 336 * critical section uninterrupted. 337 * 338 * Adjust sc->next when removing (c) from the queue, 339 * note that an IPI on this cpu may make further 340 * adjustments to sc->next. 341 */ 342 sc->next = TAILQ_NEXT(c, c_links.tqe); 343 TAILQ_REMOVE(bucket, c, c_links.tqe); 344 345 KASSERT((c->c_flags & CALLOUT_ARMED) && 346 (c->c_flags & CALLOUT_PENDING) && 347 CALLOUT_FLAGS_TO_CPU(c->c_flags) == 348 mycpu->gd_cpuid, 349 ("callout %p: bad flags %08x", c, c->c_flags)); 350 351 /* 352 * Once CALLOUT_PENDING is cleared, sc->running 353 * protects the callout structure's existance but 354 * only until we call c_func(). A callout_stop() 355 * or callout_reset() issued from within c_func() 356 * will not block. The callout can also be kfree()d 357 * by c_func(). 358 * 359 * We set EXECUTED before calling c_func() so a 360 * callout_stop() issued from within c_func() returns 361 * the correct status. 362 */ 363 if ((flags & (CALLOUT_AUTOLOCK | CALLOUT_ACTIVE)) == 364 (CALLOUT_AUTOLOCK | CALLOUT_ACTIVE)) { 365 void (*c_func)(void *); 366 void *c_arg; 367 struct lock *c_lk; 368 int error; 369 370 /* 371 * NOTE: sc->running must be set prior to 372 * CALLOUT_PENDING being cleared to 373 * avoid missed CANCELs and *_stop() 374 * races. 375 */ 376 sc->running = (intptr_t)c; 377 c_func = c->c_func; 378 c_arg = c->c_arg; 379 c_lk = c->c_lk; 380 c->c_func = NULL; 381 KKASSERT(c->c_flags & CALLOUT_DID_INIT); 382 flags = callout_unpend_disarm(c); 383 error = lockmgr(c_lk, LK_EXCLUSIVE | 384 LK_CANCELABLE); 385 if (error == 0) { 386 atomic_set_int(&c->c_flags, 387 CALLOUT_EXECUTED); 388 crit_exit(); 389 c_func(c_arg); 390 crit_enter(); 391 lockmgr(c_lk, LK_RELEASE); 392 } 393 } else if (flags & CALLOUT_ACTIVE) { 394 void (*c_func)(void *); 395 void *c_arg; 396 397 sc->running = (intptr_t)c; 398 c_func = c->c_func; 399 c_arg = c->c_arg; 400 c->c_func = NULL; 401 KKASSERT(c->c_flags & CALLOUT_DID_INIT); 402 flags = callout_unpend_disarm(c); 403 atomic_set_int(&c->c_flags, CALLOUT_EXECUTED); 404 crit_exit(); 405 c_func(c_arg); 406 crit_enter(); 407 } else { 408 flags = callout_unpend_disarm(c); 409 } 410 411 /* 412 * Read and clear sc->running. If bit 0 was set, 413 * a callout_stop() is likely blocked waiting for 414 * the callback to complete. 415 * 416 * The sigclear above also cleared CALLOUT_WAITING 417 * and returns the contents of flags prior to clearing 418 * any bits. 419 * 420 * Interlock wakeup any _stop's waiting on us. Note 421 * that once c_func() was called, the callout 422 * structure (c) pointer may no longer be valid. It 423 * can only be used for the wakeup. 424 */ 425 if ((atomic_readandclear_ptr(&sc->running) & 1) || 426 (flags & CALLOUT_WAITING)) { 427 wakeup(c); 428 } 429 /* NOTE: list may have changed */ 430 } 431 ++sc->softticks; 432 } 433 434 /* 435 * Don't leave us holding the MP lock when we deschedule ourselves. 436 */ 437 if (mpsafe == 0) { 438 mpsafe = 1; 439 rel_mplock(); 440 } 441 sc->isrunning = 0; 442 lwkt_deschedule_self(&sc->thread); /* == curthread */ 443 lwkt_switch(); 444 goto loop; 445 /* NOT REACHED */ 446 } 447 448 /* 449 * A very slow system cleanup timer (10 second interval), 450 * per-cpu. 451 */ 452 void 453 slotimer_callback(void *arg) 454 { 455 struct callout *c = arg; 456 457 slab_cleanup(); 458 callout_reset(c, hz * 10, slotimer_callback, c); 459 } 460 461 /* 462 * Start or restart a timeout. Installs the callout structure on the 463 * callwheel. Callers may legally pass any value, even if 0 or negative, 464 * but since the sc->curticks index may have already been processed a 465 * minimum timeout of 1 tick will be enforced. 466 * 467 * This function will block if the callout is currently queued to a different 468 * cpu or the callback is currently running in another thread. 469 */ 470 void 471 callout_reset(struct callout *c, int to_ticks, void (*ftn)(void *), void *arg) 472 { 473 softclock_pcpu_t sc; 474 globaldata_t gd; 475 476 #ifdef INVARIANTS 477 if ((c->c_flags & CALLOUT_DID_INIT) == 0) { 478 callout_init(c); 479 kprintf( 480 "callout_reset(%p) from %p: callout was not initialized\n", 481 c, ((int **)&c)[-1]); 482 print_backtrace(-1); 483 } 484 #endif 485 gd = mycpu; 486 sc = &softclock_pcpu_ary[gd->gd_cpuid]; 487 crit_enter_gd(gd); 488 489 /* 490 * Our cpu must gain ownership of the callout and cancel anything 491 * still running, which is complex. The easiest way to do it is to 492 * issue a callout_stop(). 493 * 494 * Clearing bits on flags is a way to guarantee they are not set, 495 * as the cmpset atomic op will fail otherwise. PENDING and ARMED 496 * must not be set, if we find them set we loop up and call 497 * stop_sync() again. 498 * 499 */ 500 for (;;) { 501 int flags; 502 int nflags; 503 504 callout_stop_sync(c); 505 flags = c->c_flags & ~(CALLOUT_PENDING | CALLOUT_ARMED); 506 nflags = (flags & ~(CALLOUT_CPU_MASK | 507 CALLOUT_EXECUTED)) | 508 CALLOUT_CPU_TO_FLAGS(gd->gd_cpuid) | 509 CALLOUT_ARMED | 510 CALLOUT_PENDING | 511 CALLOUT_ACTIVE; 512 if (atomic_cmpset_int(&c->c_flags, flags, nflags)) 513 break; 514 } 515 516 517 if (to_ticks <= 0) 518 to_ticks = 1; 519 520 c->c_arg = arg; 521 c->c_func = ftn; 522 c->c_time = sc->curticks + to_ticks; 523 524 TAILQ_INSERT_TAIL(&sc->callwheel[c->c_time & cwheelmask], 525 c, c_links.tqe); 526 crit_exit_gd(gd); 527 } 528 529 /* 530 * Setup a callout to run on the specified cpu. Should generally be used 531 * to run a callout on a specific cpu which does not nominally change. 532 */ 533 void 534 callout_reset_bycpu(struct callout *c, int to_ticks, void (*ftn)(void *), 535 void *arg, int cpuid) 536 { 537 globaldata_t gd; 538 globaldata_t tgd; 539 540 #ifdef INVARIANTS 541 if ((c->c_flags & CALLOUT_DID_INIT) == 0) { 542 callout_init(c); 543 kprintf( 544 "callout_reset(%p) from %p: callout was not initialized\n", 545 c, ((int **)&c)[-1]); 546 print_backtrace(-1); 547 } 548 #endif 549 gd = mycpu; 550 crit_enter_gd(gd); 551 552 tgd = globaldata_find(cpuid); 553 554 /* 555 * Our cpu must temporarily gain ownership of the callout and cancel 556 * anything still running, which is complex. The easiest way to do 557 * it is to issue a callout_stop(). 558 * 559 * Clearing bits on flags (vs nflags) is a way to guarantee they were 560 * not previously set, by forcing the atomic op to fail. The callout 561 * must not be pending or armed after the stop_sync, if it is we have 562 * to loop up and stop_sync() again. 563 */ 564 for (;;) { 565 int flags; 566 int nflags; 567 568 callout_stop_sync(c); 569 flags = c->c_flags & ~(CALLOUT_PENDING | CALLOUT_ARMED); 570 nflags = (flags & ~(CALLOUT_CPU_MASK | 571 CALLOUT_EXECUTED)) | 572 CALLOUT_CPU_TO_FLAGS(tgd->gd_cpuid) | 573 CALLOUT_ARMED | 574 CALLOUT_ACTIVE; 575 nflags = nflags + 1; /* bump IPI count */ 576 if (atomic_cmpset_int(&c->c_flags, flags, nflags)) 577 break; 578 cpu_pause(); 579 } 580 581 /* 582 * Even though we are not the cpu that now owns the callout, our 583 * bumping of the IPI count (and in a situation where the callout is 584 * not queued to the callwheel) will prevent anyone else from 585 * depending on or acting on the contents of the callout structure. 586 */ 587 if (to_ticks <= 0) 588 to_ticks = 1; 589 590 c->c_arg = arg; 591 c->c_func = ftn; 592 c->c_load = to_ticks; /* IPI will add curticks */ 593 594 lwkt_send_ipiq(tgd, callout_reset_ipi, c); 595 crit_exit_gd(gd); 596 } 597 598 /* 599 * Remote IPI for callout_reset_bycpu(). The operation is performed only 600 * on the 1->0 transition of the counter, otherwise there are callout_stop()s 601 * pending after us. 602 * 603 * The IPI counter and PENDING flags must be set atomically with the 604 * 1->0 transition. The ACTIVE flag was set prior to the ipi being 605 * sent and we do not want to race a caller on the original cpu trying 606 * to deactivate() the flag concurrent with our installation of the 607 * callout. 608 */ 609 static void 610 callout_reset_ipi(void *arg) 611 { 612 struct callout *c = arg; 613 globaldata_t gd = mycpu; 614 globaldata_t tgd; 615 int flags; 616 int nflags; 617 618 for (;;) { 619 flags = c->c_flags; 620 cpu_ccfence(); 621 KKASSERT((flags & CALLOUT_IPI_MASK) > 0); 622 623 /* 624 * We should already be armed for our cpu, if armed to another 625 * cpu, chain the IPI. If for some reason we are not armed, 626 * we can arm ourselves. 627 */ 628 if (flags & CALLOUT_ARMED) { 629 if (CALLOUT_FLAGS_TO_CPU(flags) != gd->gd_cpuid) { 630 tgd = globaldata_find( 631 CALLOUT_FLAGS_TO_CPU(flags)); 632 lwkt_send_ipiq(tgd, callout_reset_ipi, c); 633 return; 634 } 635 nflags = (flags & ~CALLOUT_EXECUTED); 636 } else { 637 nflags = (flags & ~(CALLOUT_CPU_MASK | 638 CALLOUT_EXECUTED)) | 639 CALLOUT_ARMED | 640 CALLOUT_CPU_TO_FLAGS(gd->gd_cpuid); 641 } 642 643 /* 644 * Decrement the IPI count, retain and clear the WAITING 645 * status, clear EXECUTED. 646 * 647 * NOTE: It is possible for the callout to already have been 648 * marked pending due to SMP races. 649 */ 650 nflags = nflags - 1; 651 if ((flags & CALLOUT_IPI_MASK) == 1) { 652 nflags &= ~(CALLOUT_WAITING | CALLOUT_EXECUTED); 653 nflags |= CALLOUT_PENDING; 654 } 655 656 if (atomic_cmpset_int(&c->c_flags, flags, nflags)) { 657 /* 658 * Only install the callout on the 1->0 transition 659 * of the IPI count, and only if PENDING was not 660 * already set. The latter situation should never 661 * occur but we check anyway. 662 */ 663 if ((flags & (CALLOUT_PENDING|CALLOUT_IPI_MASK)) == 1) { 664 softclock_pcpu_t sc; 665 666 sc = &softclock_pcpu_ary[gd->gd_cpuid]; 667 c->c_time = sc->curticks + c->c_load; 668 TAILQ_INSERT_TAIL( 669 &sc->callwheel[c->c_time & cwheelmask], 670 c, c_links.tqe); 671 } 672 break; 673 } 674 /* retry */ 675 cpu_pause(); 676 } 677 678 /* 679 * Issue wakeup if requested. 680 */ 681 if (flags & CALLOUT_WAITING) 682 wakeup(c); 683 } 684 685 /* 686 * Stop a running timer and ensure that any running callout completes before 687 * returning. If the timer is running on another cpu this function may block 688 * to interlock against the callout. If the callout is currently executing 689 * or blocked in another thread this function may also block to interlock 690 * against the callout. 691 * 692 * The caller must be careful to avoid deadlocks, either by using 693 * callout_init_lk() (which uses the lockmgr lock cancelation feature), 694 * by using tokens and dealing with breaks in the serialization, or using 695 * the lockmgr lock cancelation feature yourself in the callout callback 696 * function. 697 * 698 * callout_stop() returns non-zero if the callout was pending. 699 */ 700 static int 701 _callout_stop(struct callout *c, int issync) 702 { 703 globaldata_t gd = mycpu; 704 globaldata_t tgd; 705 softclock_pcpu_t sc; 706 int flags; 707 int nflags; 708 int rc; 709 int cpuid; 710 711 #ifdef INVARIANTS 712 if ((c->c_flags & CALLOUT_DID_INIT) == 0) { 713 callout_init(c); 714 kprintf( 715 "callout_stop(%p) from %p: callout was not initialized\n", 716 c, ((int **)&c)[-1]); 717 print_backtrace(-1); 718 } 719 #endif 720 crit_enter_gd(gd); 721 722 /* 723 * Fast path operations: 724 * 725 * If ARMED and owned by our cpu, or not ARMED, and other simple 726 * conditions are met, we can just clear ACTIVE and EXECUTED 727 * and we are done. 728 */ 729 for (;;) { 730 flags = c->c_flags; 731 cpu_ccfence(); 732 733 cpuid = CALLOUT_FLAGS_TO_CPU(flags); 734 735 /* 736 * Can't handle an armed callout in the fast path if it is 737 * not on the current cpu. We must atomically increment the 738 * IPI count for the IPI we intend to send and break out of 739 * the fast path to enter the slow path. 740 */ 741 if (flags & CALLOUT_ARMED) { 742 if (gd->gd_cpuid != cpuid) { 743 nflags = flags + 1; 744 if (atomic_cmpset_int(&c->c_flags, 745 flags, nflags)) { 746 /* break to slow path */ 747 break; 748 } 749 continue; /* retry */ 750 } 751 } else { 752 cpuid = gd->gd_cpuid; 753 KKASSERT((flags & CALLOUT_IPI_MASK) == 0); 754 KKASSERT((flags & CALLOUT_PENDING) == 0); 755 } 756 757 /* 758 * Process pending IPIs and retry (only if not called from 759 * an IPI). 760 */ 761 if (flags & CALLOUT_IPI_MASK) { 762 lwkt_process_ipiq(); 763 continue; /* retry */ 764 } 765 766 /* 767 * Transition to the stopped state, recover the EXECUTED 768 * status. If pending we cannot clear ARMED until after 769 * we have removed (c) from the callwheel. 770 * 771 * NOTE: The callout might already not be armed but in this 772 * case it should also not be pending. 773 */ 774 nflags = flags & ~(CALLOUT_ACTIVE | 775 CALLOUT_EXECUTED | 776 CALLOUT_WAITING | 777 CALLOUT_PENDING); 778 779 /* NOTE: IPI_MASK already tested */ 780 if ((flags & CALLOUT_PENDING) == 0) 781 nflags &= ~CALLOUT_ARMED; 782 if (atomic_cmpset_int(&c->c_flags, flags, nflags)) { 783 /* 784 * Can only remove from callwheel if currently 785 * pending. 786 */ 787 if (flags & CALLOUT_PENDING) { 788 sc = &softclock_pcpu_ary[gd->gd_cpuid]; 789 if (sc->next == c) 790 sc->next = TAILQ_NEXT(c, c_links.tqe); 791 TAILQ_REMOVE( 792 &sc->callwheel[c->c_time & cwheelmask], 793 c, 794 c_links.tqe); 795 c->c_func = NULL; 796 797 /* 798 * NOTE: Can't clear ARMED until we have 799 * physically removed (c) from the 800 * callwheel. 801 * 802 * NOTE: WAITING bit race exists when doing 803 * unconditional bit clears. 804 */ 805 callout_maybe_clear_armed(c); 806 if (c->c_flags & CALLOUT_WAITING) 807 flags |= CALLOUT_WAITING; 808 } 809 810 /* 811 * ARMED has been cleared at this point and (c) 812 * might now be stale. Only good for wakeup()s. 813 */ 814 if (flags & CALLOUT_WAITING) 815 wakeup(c); 816 817 goto skip_slow; 818 } 819 /* retry */ 820 } 821 822 /* 823 * Slow path (and not called via an IPI). 824 * 825 * When ARMED to a different cpu the stop must be processed on that 826 * cpu. Issue the IPI and wait for completion. We have already 827 * incremented the IPI count. 828 */ 829 tgd = globaldata_find(cpuid); 830 lwkt_send_ipiq3(tgd, callout_stop_ipi, c, issync); 831 832 for (;;) { 833 int flags; 834 int nflags; 835 836 flags = c->c_flags; 837 cpu_ccfence(); 838 if ((flags & CALLOUT_IPI_MASK) == 0) /* fast path */ 839 break; 840 nflags = flags | CALLOUT_WAITING; 841 tsleep_interlock(c, 0); 842 if (atomic_cmpset_int(&c->c_flags, flags, nflags)) { 843 tsleep(c, PINTERLOCKED, "cstp1", 0); 844 } 845 } 846 847 skip_slow: 848 849 /* 850 * If (issync) we must also wait for any in-progress callbacks to 851 * complete, unless the stop is being executed from the callback 852 * itself. The EXECUTED flag is set prior to the callback 853 * being made so our existing flags status already has it. 854 * 855 * If auto-lock mode is being used, this is where we cancel any 856 * blocked lock that is potentially preventing the target cpu 857 * from completing the callback. 858 */ 859 while (issync) { 860 intptr_t *runp; 861 intptr_t runco; 862 863 sc = &softclock_pcpu_ary[cpuid]; 864 if (gd->gd_curthread == &sc->thread) /* stop from cb */ 865 break; 866 runp = &sc->running; 867 runco = *runp; 868 cpu_ccfence(); 869 if ((runco & ~(intptr_t)1) != (intptr_t)c) 870 break; 871 if (c->c_flags & CALLOUT_AUTOLOCK) 872 lockmgr(c->c_lk, LK_CANCEL_BEG); 873 tsleep_interlock(c, 0); 874 if (atomic_cmpset_long(runp, runco, runco | 1)) 875 tsleep(c, PINTERLOCKED, "cstp3", 0); 876 if (c->c_flags & CALLOUT_AUTOLOCK) 877 lockmgr(c->c_lk, LK_CANCEL_END); 878 } 879 880 crit_exit_gd(gd); 881 rc = (flags & CALLOUT_EXECUTED) != 0; 882 883 return rc; 884 } 885 886 static 887 void 888 callout_stop_ipi(void *arg, int issync, struct intrframe *frame) 889 { 890 globaldata_t gd = mycpu; 891 struct callout *c = arg; 892 softclock_pcpu_t sc; 893 894 /* 895 * Only the fast path can run in an IPI. Chain the stop request 896 * if we are racing cpu changes. 897 */ 898 for (;;) { 899 globaldata_t tgd; 900 int flags; 901 int nflags; 902 int cpuid; 903 904 flags = c->c_flags; 905 cpu_ccfence(); 906 907 /* 908 * Can't handle an armed callout in the fast path if it is 909 * not on the current cpu. We must atomically increment the 910 * IPI count and break out of the fast path. 911 * 912 * If called from an IPI we chain the IPI instead. 913 */ 914 if (flags & CALLOUT_ARMED) { 915 cpuid = CALLOUT_FLAGS_TO_CPU(flags); 916 if (gd->gd_cpuid != cpuid) { 917 tgd = globaldata_find(cpuid); 918 lwkt_send_ipiq3(tgd, callout_stop_ipi, 919 c, issync); 920 break; 921 } 922 } 923 924 /* 925 * NOTE: As an IPI ourselves we cannot wait for other IPIs 926 * to complete, and we are being executed in-order. 927 */ 928 929 /* 930 * Transition to the stopped state, recover the EXECUTED 931 * status, decrement the IPI count. If pending we cannot 932 * clear ARMED until after we have removed (c) from the 933 * callwheel, and only if there are no more IPIs pending. 934 */ 935 nflags = flags & ~(CALLOUT_ACTIVE | CALLOUT_PENDING); 936 nflags = nflags - 1; /* dec ipi count */ 937 if ((flags & (CALLOUT_IPI_MASK | CALLOUT_PENDING)) == 1) 938 nflags &= ~CALLOUT_ARMED; 939 if ((flags & CALLOUT_IPI_MASK) == 1) 940 nflags &= ~(CALLOUT_WAITING | CALLOUT_EXECUTED); 941 942 if (atomic_cmpset_int(&c->c_flags, flags, nflags)) { 943 /* 944 * Can only remove from callwheel if currently 945 * pending. 946 */ 947 if (flags & CALLOUT_PENDING) { 948 sc = &softclock_pcpu_ary[gd->gd_cpuid]; 949 if (sc->next == c) 950 sc->next = TAILQ_NEXT(c, c_links.tqe); 951 TAILQ_REMOVE( 952 &sc->callwheel[c->c_time & cwheelmask], 953 c, 954 c_links.tqe); 955 c->c_func = NULL; 956 957 /* 958 * NOTE: Can't clear ARMED until we have 959 * physically removed (c) from the 960 * callwheel. 961 * 962 * NOTE: WAITING bit race exists when doing 963 * unconditional bit clears. 964 */ 965 callout_maybe_clear_armed(c); 966 if (c->c_flags & CALLOUT_WAITING) 967 flags |= CALLOUT_WAITING; 968 } 969 970 /* 971 * ARMED has been cleared at this point and (c) 972 * might now be stale. Only good for wakeup()s. 973 */ 974 if (flags & CALLOUT_WAITING) 975 wakeup(c); 976 break; 977 } 978 /* retry */ 979 } 980 } 981 982 int 983 callout_stop(struct callout *c) 984 { 985 return _callout_stop(c, 0); 986 } 987 988 int 989 callout_stop_sync(struct callout *c) 990 { 991 return _callout_stop(c, 1); 992 } 993 994 void 995 callout_stop_async(struct callout *c) 996 { 997 _callout_stop(c, 0); 998 } 999 1000 void 1001 callout_terminate(struct callout *c) 1002 { 1003 _callout_stop(c, 1); 1004 atomic_clear_int(&c->c_flags, CALLOUT_DID_INIT); 1005 } 1006 1007 /* 1008 * Prepare a callout structure for use by callout_reset() and/or 1009 * callout_stop(). 1010 * 1011 * The MP version of this routine requires that the callback 1012 * function installed by callout_reset() be MP safe. 1013 * 1014 * The LK version of this routine is also MPsafe and will automatically 1015 * acquire the specified lock for the duration of the function call, 1016 * and release it after the function returns. In addition, when autolocking 1017 * is used, callout_stop() becomes synchronous if the caller owns the lock. 1018 * callout_reset(), callout_stop(), and callout_stop_sync() will block 1019 * normally instead of spinning when a cpu race occurs. Lock cancelation 1020 * is used to avoid deadlocks against the callout ring dispatch. 1021 * 1022 * The init functions can be called from any cpu and do not have to be 1023 * called from the cpu that the timer will eventually run on. 1024 */ 1025 static __inline 1026 void 1027 _callout_init(struct callout *c, int flags) 1028 { 1029 bzero(c, sizeof *c); 1030 c->c_flags = flags; 1031 } 1032 1033 void 1034 callout_init(struct callout *c) 1035 { 1036 _callout_init(c, CALLOUT_DID_INIT); 1037 } 1038 1039 void 1040 callout_init_mp(struct callout *c) 1041 { 1042 _callout_init(c, CALLOUT_DID_INIT | CALLOUT_MPSAFE); 1043 } 1044 1045 void 1046 callout_init_lk(struct callout *c, struct lock *lk) 1047 { 1048 _callout_init(c, CALLOUT_DID_INIT | CALLOUT_MPSAFE | CALLOUT_AUTOLOCK); 1049 c->c_lk = lk; 1050 } 1051