xref: /dragonfly/sys/kern/kern_timeout.c (revision e10ffbc2)
1 /*
2  * Copyright (c) 2004,2014 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34 /*
35  * Copyright (c) 1982, 1986, 1991, 1993
36  *	The Regents of the University of California.  All rights reserved.
37  * (c) UNIX System Laboratories, Inc.
38  * All or some portions of this file are derived from material licensed
39  * to the University of California by American Telephone and Telegraph
40  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
41  * the permission of UNIX System Laboratories, Inc.
42  *
43  * Redistribution and use in source and binary forms, with or without
44  * modification, are permitted provided that the following conditions
45  * are met:
46  * 1. Redistributions of source code must retain the above copyright
47  *    notice, this list of conditions and the following disclaimer.
48  * 2. Redistributions in binary form must reproduce the above copyright
49  *    notice, this list of conditions and the following disclaimer in the
50  *    documentation and/or other materials provided with the distribution.
51  * 3. Neither the name of the University nor the names of its contributors
52  *    may be used to endorse or promote products derived from this software
53  *    without specific prior written permission.
54  *
55  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
56  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
57  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
58  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
59  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
60  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
61  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
62  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
63  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
64  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
65  * SUCH DAMAGE.
66  */
67 /*
68  * The original callout mechanism was based on the work of Adam M. Costello
69  * and George Varghese, published in a technical report entitled "Redesigning
70  * the BSD Callout and Timer Facilities" and modified slightly for inclusion
71  * in FreeBSD by Justin T. Gibbs.  The original work on the data structures
72  * used in this implementation was published by G. Varghese and T. Lauck in
73  * the paper "Hashed and Hierarchical Timing Wheels: Data Structures for
74  * the Efficient Implementation of a Timer Facility" in the Proceedings of
75  * the 11th ACM Annual Symposium on Operating Systems Principles,
76  * Austin, Texas Nov 1987.
77  *
78  * The per-cpu augmentation was done by Matthew Dillon.  This file has
79  * essentially been rewritten pretty much from scratch by Matt.
80  */
81 
82 #include <sys/param.h>
83 #include <sys/systm.h>
84 #include <sys/callout.h>
85 #include <sys/kernel.h>
86 #include <sys/interrupt.h>
87 #include <sys/thread.h>
88 
89 #include <sys/thread2.h>
90 #include <sys/mplock2.h>
91 
92 struct softclock_pcpu {
93 	struct callout_tailq *callwheel;
94 	struct callout * volatile next;
95 	intptr_t running;	/* NOTE! Bit 0 used to flag wakeup */
96 	int softticks;		/* softticks index */
97 	int curticks;		/* per-cpu ticks counter */
98 	int isrunning;
99 	struct thread *thread;
100 };
101 
102 typedef struct softclock_pcpu *softclock_pcpu_t;
103 
104 static MALLOC_DEFINE(M_CALLOUT, "callout", "callout structures");
105 static int cwheelsize;
106 static int cwheelmask;
107 static struct softclock_pcpu softclock_pcpu_ary[MAXCPU];
108 
109 static void softclock_handler(void *arg);
110 static void slotimer_callback(void *arg);
111 static void callout_reset_ipi(void *arg);
112 static void callout_stop_ipi(void *arg, int issync, struct intrframe *frame);
113 
114 static __inline
115 int
116 callout_setclear(struct callout *c, int sflags, int cflags)
117 {
118 	int flags;
119 	int nflags;
120 
121 	for (;;) {
122 		flags = c->c_flags;
123 		cpu_ccfence();
124 		nflags = (flags | sflags) & ~cflags;
125 		if (atomic_cmpset_int(&c->c_flags, flags, nflags))
126 			break;
127 	}
128 	return flags;
129 }
130 
131 static void
132 swi_softclock_setup(void *arg)
133 {
134 	int cpu;
135 	int i;
136 	int target;
137 
138 	/*
139 	 * Figure out how large a callwheel we need.  It must be a power of 2.
140 	 *
141 	 * ncallout is primarily based on available memory, don't explode
142 	 * the allocations if the system has a lot of cpus.
143 	 */
144 	target = ncallout / ncpus + 16;
145 
146 	cwheelsize = 1;
147 	while (cwheelsize < target)
148 		cwheelsize <<= 1;
149 	cwheelmask = cwheelsize - 1;
150 
151 	/*
152 	 * Initialize per-cpu data structures.
153 	 */
154 	for (cpu = 0; cpu < ncpus; ++cpu) {
155 		softclock_pcpu_t sc;
156 
157 		sc = &softclock_pcpu_ary[cpu];
158 
159 		sc->callwheel = kmalloc(sizeof(*sc->callwheel) * cwheelsize,
160 					M_CALLOUT, M_WAITOK|M_ZERO);
161 		for (i = 0; i < cwheelsize; ++i)
162 			TAILQ_INIT(&sc->callwheel[i]);
163 
164 		/*
165 		 * Mark the softclock handler as being an interrupt thread
166 		 * even though it really isn't, but do not allow it to
167 		 * preempt other threads (do not assign td_preemptable).
168 		 *
169 		 * Kernel code now assumes that callouts do not preempt
170 		 * the cpu they were scheduled on.
171 		 */
172 		lwkt_create(softclock_handler, sc, &sc->thread, NULL,
173 			    TDF_NOSTART | TDF_INTTHREAD,
174 			    cpu, "softclock %d", cpu);
175 	}
176 }
177 
178 /*
179  * Must occur after ncpus has been initialized.
180  */
181 SYSINIT(softclock_setup, SI_BOOT2_SOFTCLOCK, SI_ORDER_SECOND,
182 	swi_softclock_setup, NULL);
183 
184 /*
185  * This routine is called from the hardclock() (basically a FASTint/IPI) on
186  * each cpu in the system.  sc->curticks is this cpu's notion of the timebase.
187  * It IS NOT NECESSARILY SYNCHRONIZED WITH 'ticks'!  sc->softticks is where
188  * the callwheel is currently indexed.
189  *
190  * WARNING!  The MP lock is not necessarily held on call, nor can it be
191  * safely obtained.
192  *
193  * sc->softticks is adjusted by either this routine or our helper thread
194  * depending on whether the helper thread is running or not.
195  */
196 void
197 hardclock_softtick(globaldata_t gd)
198 {
199 	softclock_pcpu_t sc;
200 
201 	sc = &softclock_pcpu_ary[gd->gd_cpuid];
202 	++sc->curticks;
203 	if (sc->isrunning)
204 		return;
205 	if (sc->softticks == sc->curticks) {
206 		/*
207 		 * In sync, only wakeup the thread if there is something to
208 		 * do.
209 		 */
210 		if (TAILQ_FIRST(&sc->callwheel[sc->softticks & cwheelmask])) {
211 			sc->isrunning = 1;
212 			lwkt_schedule(sc->thread);
213 		} else {
214 			++sc->softticks;
215 		}
216 	} else {
217 		/*
218 		 * out of sync, wakeup the thread unconditionally so it can
219 		 * catch up.
220 		 */
221 		sc->isrunning = 1;
222 		lwkt_schedule(sc->thread);
223 	}
224 }
225 
226 /*
227  * This procedure is the main loop of our per-cpu helper thread.  The
228  * sc->isrunning flag prevents us from racing hardclock_softtick() and
229  * a critical section is sufficient to interlock sc->curticks and protect
230  * us from remote IPI's / list removal.
231  *
232  * The thread starts with the MP lock released and not in a critical
233  * section.  The loop itself is MP safe while individual callbacks
234  * may or may not be, so we obtain or release the MP lock as appropriate.
235  */
236 static void
237 softclock_handler(void *arg)
238 {
239 	softclock_pcpu_t sc;
240 	struct callout *c;
241 	struct callout_tailq *bucket;
242 	struct callout slotimer;
243 	int mpsafe = 1;
244 	int flags;
245 
246 	/*
247 	 * Setup pcpu slow clocks which we want to run from the callout
248 	 * thread.
249 	 */
250 	callout_init_mp(&slotimer);
251 	callout_reset(&slotimer, hz * 10, slotimer_callback, &slotimer);
252 
253 	/*
254 	 * Run the callout thread at the same priority as other kernel
255 	 * threads so it can be round-robined.
256 	 */
257 	/*lwkt_setpri_self(TDPRI_SOFT_NORM);*/
258 
259 	/*
260 	 * Loop critical section against ipi operations to this cpu.
261 	 */
262 	sc = arg;
263 	crit_enter();
264 loop:
265 	while (sc->softticks != (int)(sc->curticks + 1)) {
266 		bucket = &sc->callwheel[sc->softticks & cwheelmask];
267 
268 		for (c = TAILQ_FIRST(bucket); c; c = sc->next) {
269 			void (*c_func)(void *);
270 			void *c_arg;
271 			struct lock *c_lk;
272 			int error;
273 
274 			if (c->c_time != sc->softticks) {
275 				sc->next = TAILQ_NEXT(c, c_links.tqe);
276 				continue;
277 			}
278 
279 			/*
280 			 * Synchronize with mpsafe requirements
281 			 */
282 			flags = c->c_flags;
283 			if (flags & CALLOUT_MPSAFE) {
284 				if (mpsafe == 0) {
285 					mpsafe = 1;
286 					rel_mplock();
287 				}
288 			} else {
289 				/*
290 				 * The request might be removed while we
291 				 * are waiting to get the MP lock.  If it
292 				 * was removed sc->next will point to the
293 				 * next valid request or NULL, loop up.
294 				 */
295 				if (mpsafe) {
296 					mpsafe = 0;
297 					sc->next = c;
298 					get_mplock();
299 					if (c != sc->next)
300 						continue;
301 				}
302 			}
303 
304 			/*
305 			 * Queue protection only exists while we hold the
306 			 * critical section uninterrupted.
307 			 *
308 			 * Adjust sc->next when removing (c) from the queue,
309 			 * note that an IPI on this cpu may make further
310 			 * adjustments to sc->next.
311 			 */
312 			sc->next = TAILQ_NEXT(c, c_links.tqe);
313 			TAILQ_REMOVE(bucket, c, c_links.tqe);
314 
315 			KASSERT((c->c_flags & CALLOUT_DID_INIT) &&
316 				(c->c_flags & CALLOUT_PENDING) &&
317 				CALLOUT_FLAGS_TO_CPU(c->c_flags) ==
318 				mycpu->gd_cpuid,
319 				("callout %p: bad flags %08x", c, c->c_flags));
320 
321 			/*
322 			 * Once CALLOUT_PENDING is cleared only the IPI_MASK
323 			 * prevents the callout from being moved to another
324 			 * cpu.  However, callout_stop() will also check
325 			 * sc->running on the assigned cpu if CALLOUT_EXECUTED
326 			 * is set.  CALLOUT_EXECUTE implies a callback
327 			 * interlock is needed when cross-cpu.
328 			 */
329 			sc->running = (intptr_t)c;
330 			c_func = c->c_func;
331 			c_arg = c->c_arg;
332 			c_lk = c->c_lk;
333 			c->c_func = NULL;
334 
335 			if ((flags & (CALLOUT_AUTOLOCK | CALLOUT_ACTIVE)) ==
336 			    (CALLOUT_AUTOLOCK | CALLOUT_ACTIVE)) {
337 				error = lockmgr(c_lk, LK_EXCLUSIVE |
338 						      LK_CANCELABLE);
339 				if (error == 0) {
340 					flags = callout_setclear(c,
341 							CALLOUT_EXECUTED,
342 							CALLOUT_PENDING |
343 							CALLOUT_WAITING);
344 					crit_exit();
345 					c_func(c_arg);
346 					crit_enter();
347 					lockmgr(c_lk, LK_RELEASE);
348 				} else {
349 					flags = callout_setclear(c,
350 							0,
351 							CALLOUT_PENDING);
352 				}
353 			} else if (flags & CALLOUT_ACTIVE) {
354 				flags = callout_setclear(c,
355 						CALLOUT_EXECUTED,
356 						CALLOUT_PENDING |
357 						CALLOUT_WAITING);
358 				crit_exit();
359 				c_func(c_arg);
360 				crit_enter();
361 			} else {
362 				flags = callout_setclear(c,
363 						0,
364 						CALLOUT_PENDING |
365 						CALLOUT_WAITING);
366 			}
367 
368 			/*
369 			 * Read and clear sc->running.  If bit 0 was set,
370 			 * a callout_stop() is likely blocked waiting for
371 			 * the callback to complete.
372 			 *
373 			 * The sigclear above also cleared CALLOUT_WAITING
374 			 * and returns the contents of flags prior to clearing
375 			 * any bits.
376 			 *
377 			 * Interlock wakeup any _stop's waiting on us.  Note
378 			 * that once c_func() was called, the callout
379 			 * structure (c) pointer may no longer be valid.  It
380 			 * can only be used for the wakeup.
381 			 */
382 			if ((atomic_readandclear_ptr(&sc->running) & 1) ||
383 			    (flags & CALLOUT_WAITING)) {
384 				wakeup(c);
385 			}
386 			/* NOTE: list may have changed */
387 		}
388 		++sc->softticks;
389 	}
390 
391 	/*
392 	 * Don't leave us holding the MP lock when we deschedule ourselves.
393 	 */
394 	if (mpsafe == 0) {
395 		mpsafe = 1;
396 		rel_mplock();
397 	}
398 	sc->isrunning = 0;
399 	lwkt_deschedule_self(sc->thread);	/* == curthread */
400 	lwkt_switch();
401 	goto loop;
402 	/* NOT REACHED */
403 }
404 
405 /*
406  * A very slow system cleanup timer (10 second interval),
407  * per-cpu.
408  */
409 void
410 slotimer_callback(void *arg)
411 {
412 	struct callout *c = arg;
413 
414 	slab_cleanup();
415 	callout_reset(c, hz * 10, slotimer_callback, c);
416 }
417 
418 /*
419  * Start or restart a timeout.  Installs the callout structure on the
420  * callwheel of the current cpu.  Callers may legally pass any value, even
421  * if 0 or negative, but since the sc->curticks index may have already
422  * been processed a minimum timeout of 1 tick will be enforced.
423  *
424  * This function will block if the callout is currently queued to a different
425  * cpu or the callback is currently running in another thread.
426  */
427 void
428 callout_reset(struct callout *c, int to_ticks, void (*ftn)(void *), void *arg)
429 {
430 	softclock_pcpu_t sc;
431 	globaldata_t gd;
432 
433 #ifdef INVARIANTS
434         if ((c->c_flags & CALLOUT_DID_INIT) == 0) {
435 		callout_init(c);
436 		kprintf(
437 		    "callout_reset(%p) from %p: callout was not initialized\n",
438 		    c, ((int **)&c)[-1]);
439 		print_backtrace(-1);
440 	}
441 #endif
442 	gd = mycpu;
443 	sc = &softclock_pcpu_ary[gd->gd_cpuid];
444 	crit_enter_gd(gd);
445 
446 	/*
447 	 * Our cpu must gain ownership of the callout and cancel anything
448 	 * still running, which is complex.  The easiest way to do it is to
449 	 * issue a callout_stop_sync().  callout_stop_sync() will also
450 	 * handle CALLOUT_EXECUTED (dispatch waiting), and clear it.
451 	 *
452 	 * WARNING: callout_stop_sync()'s return state can race other
453 	 *	    callout_*() calls due to blocking, so we must re-check.
454 	 */
455 	for (;;) {
456 		int flags;
457 		int nflags;
458 
459 		if (c->c_flags & (CALLOUT_ARMED_MASK | CALLOUT_EXECUTED))
460 			callout_stop_sync(c);
461 		flags = c->c_flags & ~(CALLOUT_ARMED_MASK | CALLOUT_EXECUTED);
462 		nflags = (flags & ~CALLOUT_CPU_MASK) |
463 			 CALLOUT_CPU_TO_FLAGS(gd->gd_cpuid) |
464 			 CALLOUT_PENDING |
465 			 CALLOUT_ACTIVE;
466 		if (atomic_cmpset_int(&c->c_flags, flags, nflags))
467 			break;
468 		cpu_pause();
469 	}
470 
471 	/*
472 	 * With the critical section held and PENDING set we now 'own' the
473 	 * callout.
474 	 */
475 	if (to_ticks <= 0)
476 		to_ticks = 1;
477 
478 	c->c_arg = arg;
479 	c->c_func = ftn;
480 	c->c_time = sc->curticks + to_ticks;
481 
482 	TAILQ_INSERT_TAIL(&sc->callwheel[c->c_time & cwheelmask],
483 			  c, c_links.tqe);
484 	crit_exit_gd(gd);
485 }
486 
487 /*
488  * Setup a callout to run on the specified cpu.  Should generally be used
489  * to run a callout on a specific cpu which does not nominally change.  This
490  * callout_reset() will be issued asynchronously via an IPI.
491  */
492 void
493 callout_reset_bycpu(struct callout *c, int to_ticks, void (*ftn)(void *),
494 		    void *arg, int cpuid)
495 {
496 	globaldata_t gd;
497 	globaldata_t tgd;
498 
499 #ifdef INVARIANTS
500         if ((c->c_flags & CALLOUT_DID_INIT) == 0) {
501 		callout_init(c);
502 		kprintf(
503 		    "callout_reset(%p) from %p: callout was not initialized\n",
504 		    c, ((int **)&c)[-1]);
505 		print_backtrace(-1);
506 	}
507 #endif
508 	gd = mycpu;
509 	crit_enter_gd(gd);
510 
511 	tgd = globaldata_find(cpuid);
512 
513 	/*
514 	 * This code is similar to the code in callout_reset() but we assign
515 	 * the callout to the target cpu.  We cannot set PENDING here since
516 	 * we cannot atomically add the callout to the target cpu's queue.
517 	 * However, incrementing the IPI count has the effect of locking
518 	 * the cpu assignment.
519 	 *
520 	 * WARNING: callout_stop_sync()'s return state can race other
521 	 *	    callout_*() calls due to blocking, so we must re-check.
522 	 */
523 	for (;;) {
524 		int flags;
525 		int nflags;
526 
527 		if (c->c_flags & (CALLOUT_ARMED_MASK | CALLOUT_EXECUTED))
528 			callout_stop_sync(c);
529 		flags = c->c_flags & ~(CALLOUT_ARMED_MASK | CALLOUT_EXECUTED);
530 		nflags = (flags & ~(CALLOUT_CPU_MASK |
531 				    CALLOUT_EXECUTED)) |
532 			 CALLOUT_CPU_TO_FLAGS(tgd->gd_cpuid) |
533 			 CALLOUT_ACTIVE;
534 		nflags = nflags + 1;		/* bump IPI count */
535 		if (atomic_cmpset_int(&c->c_flags, flags, nflags))
536 			break;
537 		cpu_pause();
538 	}
539 
540 	/*
541 	 * Since we control our +1 in the IPI count, the target cpu cannot
542 	 * now change until our IPI is processed.
543 	 */
544 	if (to_ticks <= 0)
545 		to_ticks = 1;
546 
547 	c->c_arg = arg;
548 	c->c_func = ftn;
549 	c->c_load = to_ticks;	/* IPI will add curticks */
550 
551 	lwkt_send_ipiq(tgd, callout_reset_ipi, c);
552 	crit_exit_gd(gd);
553 }
554 
555 /*
556  * Remote IPI for callout_reset_bycpu().  The cpu assignment cannot be
557  * ripped out from under us due to the count in IPI_MASK, but it is possible
558  * that other IPIs executed so we must deal with other flags that might
559  * have been set or cleared.
560  */
561 static void
562 callout_reset_ipi(void *arg)
563 {
564 	struct callout *c = arg;
565 	globaldata_t gd = mycpu;
566 	softclock_pcpu_t sc;
567 	int flags;
568 	int nflags;
569 
570 	sc = &softclock_pcpu_ary[gd->gd_cpuid];
571 
572 	for (;;) {
573 		flags = c->c_flags;
574 		cpu_ccfence();
575 		KKASSERT((flags & CALLOUT_IPI_MASK) > 0 &&
576 			 CALLOUT_FLAGS_TO_CPU(flags) == gd->gd_cpuid);
577 
578 		nflags = (flags - 1) & ~(CALLOUT_EXECUTED | CALLOUT_WAITING);
579 		nflags |= CALLOUT_PENDING;
580 
581 		/*
582 		 * Put us on the queue
583 		 */
584 		if (atomic_cmpset_int(&c->c_flags, flags, nflags)) {
585 			if (flags & CALLOUT_PENDING) {
586 				if (sc->next == c)
587 					sc->next = TAILQ_NEXT(c, c_links.tqe);
588 				TAILQ_REMOVE(
589 					&sc->callwheel[c->c_time & cwheelmask],
590 					c,
591 					c_links.tqe);
592 			}
593 			c->c_time = sc->curticks + c->c_load;
594 			TAILQ_INSERT_TAIL(
595 				&sc->callwheel[c->c_time & cwheelmask],
596 				c, c_links.tqe);
597 			break;
598 		}
599 		/* retry */
600 		cpu_pause();
601 	}
602 
603 	/*
604 	 * Issue wakeup if requested.
605 	 */
606 	if (flags & CALLOUT_WAITING)
607 		wakeup(c);
608 }
609 
610 /*
611  * Stop a running timer and ensure that any running callout completes before
612  * returning.  If the timer is running on another cpu this function may block
613  * to interlock against the callout.  If the callout is currently executing
614  * or blocked in another thread this function may also block to interlock
615  * against the callout.
616  *
617  * The caller must be careful to avoid deadlocks, either by using
618  * callout_init_lk() (which uses the lockmgr lock cancelation feature),
619  * by using tokens and dealing with breaks in the serialization, or using
620  * the lockmgr lock cancelation feature yourself in the callout callback
621  * function.
622  *
623  * callout_stop() returns non-zero if the callout was pending.
624  */
625 static int
626 _callout_stop(struct callout *c, int issync)
627 {
628 	globaldata_t gd = mycpu;
629 	globaldata_t tgd;
630 	softclock_pcpu_t sc;
631 	int flags;
632 	int nflags;
633 	int rc;
634 	int cpuid;
635 
636 #ifdef INVARIANTS
637         if ((c->c_flags & CALLOUT_DID_INIT) == 0) {
638 		callout_init(c);
639 		kprintf(
640 		    "callout_stop(%p) from %p: callout was not initialized\n",
641 		    c, ((int **)&c)[-1]);
642 		print_backtrace(-1);
643 	}
644 #endif
645 	crit_enter_gd(gd);
646 
647 retry:
648 	/*
649 	 * Adjust flags for the required operation.  If the callout is
650 	 * armed on another cpu we break out into the remote-cpu code which
651 	 * will issue an IPI.  If it is not armed we are trivially done,
652 	 * but may still need to test EXECUTED.
653 	 */
654 	for (;;) {
655 		flags = c->c_flags;
656 		cpu_ccfence();
657 
658 		cpuid = CALLOUT_FLAGS_TO_CPU(flags);
659 
660 		/*
661 		 * Armed on remote cpu (break to remote-cpu code)
662 		 */
663 		if ((flags & CALLOUT_ARMED_MASK) && gd->gd_cpuid != cpuid) {
664 			nflags = flags + 1;
665 			if (atomic_cmpset_int(&c->c_flags, flags, nflags)) {
666 				/*
667 				 * BREAK TO REMOTE-CPU CODE HERE
668 				 */
669 				break;
670 			}
671 			cpu_pause();
672 			continue;
673 		}
674 
675 		/*
676 		 * Armed or armable on current cpu
677 		 */
678 		if (flags & CALLOUT_IPI_MASK) {
679 			lwkt_process_ipiq();
680 			cpu_pause();
681 			continue;	/* retry */
682 		}
683 
684 		/*
685 		 * If PENDING is set we can remove the callout from our
686 		 * queue and also use the side effect that the bit causes
687 		 * the callout to be locked to our cpu.
688 		 */
689 		if (flags & CALLOUT_PENDING) {
690 			sc = &softclock_pcpu_ary[gd->gd_cpuid];
691 			if (sc->next == c)
692 				sc->next = TAILQ_NEXT(c, c_links.tqe);
693 			TAILQ_REMOVE(
694 				&sc->callwheel[c->c_time & cwheelmask],
695 				c,
696 				c_links.tqe);
697 			c->c_func = NULL;
698 
699 			for (;;) {
700 				flags = c->c_flags;
701 				cpu_ccfence();
702 				nflags = flags & ~(CALLOUT_ACTIVE |
703 						   CALLOUT_EXECUTED |
704 						   CALLOUT_WAITING |
705 						   CALLOUT_PENDING);
706 				if (atomic_cmpset_int(&c->c_flags,
707 						      flags, nflags)) {
708 					goto skip_slow;
709 				}
710 				cpu_pause();
711 			}
712 			/* NOT REACHED */
713 		}
714 
715 		/*
716 		 * If PENDING was not set the callout might not be locked
717 		 * to this cpu.
718 		 */
719 		nflags = flags & ~(CALLOUT_ACTIVE |
720 				   CALLOUT_EXECUTED |
721 				   CALLOUT_WAITING |
722 				   CALLOUT_PENDING);
723 		if (atomic_cmpset_int(&c->c_flags, flags, nflags)) {
724 			goto skip_slow;
725 		}
726 		cpu_pause();
727 		/* retry */
728 	}
729 
730 	/*
731 	 * Remote cpu path.  We incremented the IPI_MASK count so the callout
732 	 * is now locked to the remote cpu and we can safely send an IPI
733 	 * to it.
734 	 *
735 	 * Once sent, wait for all IPIs to be processed.  If PENDING remains
736 	 * set after all IPIs have processed we raced a callout or
737 	 * callout_reset and must retry.  Callers expect the callout to
738 	 * be completely stopped upon return, so make sure it is.
739 	 */
740 	tgd = globaldata_find(cpuid);
741 	lwkt_send_ipiq3(tgd, callout_stop_ipi, c, issync);
742 
743 	for (;;) {
744 		flags = c->c_flags;
745 		cpu_ccfence();
746 
747 		if ((flags & CALLOUT_IPI_MASK) == 0)
748 			break;
749 
750 		nflags = flags | CALLOUT_WAITING;
751 		tsleep_interlock(c, 0);
752 		if (atomic_cmpset_int(&c->c_flags, flags, nflags)) {
753 			tsleep(c, PINTERLOCKED, "cstp1", 0);
754 		}
755 	}
756 	if (flags & CALLOUT_PENDING)
757 		goto retry;
758 
759 	/*
760 	 * Caller expects callout_stop_sync() to clear EXECUTED and return
761 	 * its previous status.
762 	 */
763 	atomic_clear_int(&c->c_flags, CALLOUT_EXECUTED);
764 
765 skip_slow:
766 	if (flags & CALLOUT_WAITING)
767 		wakeup(c);
768 
769 	/*
770 	 * If (issync) we must also wait for any in-progress callbacks to
771 	 * complete, unless the stop is being executed from the callback
772 	 * itself.  The EXECUTED flag is set prior to the callback
773 	 * being made so our existing flags status already has it.
774 	 *
775 	 * If auto-lock mode is being used, this is where we cancel any
776 	 * blocked lock that is potentially preventing the target cpu
777 	 * from completing the callback.
778 	 */
779 	while (issync) {
780 		intptr_t *runp;
781 		intptr_t runco;
782 
783 		sc = &softclock_pcpu_ary[cpuid];
784 		if (gd->gd_curthread == sc->thread)	/* stop from cb */
785 			break;
786 		runp = &sc->running;
787 		runco = *runp;
788 		cpu_ccfence();
789 		if ((runco & ~(intptr_t)1) != (intptr_t)c)
790 			break;
791 		if (c->c_flags & CALLOUT_AUTOLOCK)
792 			lockmgr(c->c_lk, LK_CANCEL_BEG);
793 		tsleep_interlock(c, 0);
794 		if (atomic_cmpset_long(runp, runco, runco | 1))
795 			tsleep(c, PINTERLOCKED, "cstp3", 0);
796 		if (c->c_flags & CALLOUT_AUTOLOCK)
797 			lockmgr(c->c_lk, LK_CANCEL_END);
798 	}
799 
800 	crit_exit_gd(gd);
801 	rc = (flags & CALLOUT_EXECUTED) != 0;
802 
803 	return rc;
804 }
805 
806 /*
807  * IPI for stop function.  The callout is locked to the receiving cpu
808  * by the IPI_MASK count.
809  */
810 static
811 void
812 callout_stop_ipi(void *arg, int issync, struct intrframe *frame)
813 {
814 	globaldata_t gd = mycpu;
815 	struct callout *c = arg;
816 	softclock_pcpu_t sc;
817 	int flags;
818 	int nflags;
819 
820 	flags = c->c_flags;
821 	cpu_ccfence();
822 
823 	KKASSERT(CALLOUT_FLAGS_TO_CPU(flags) == gd->gd_cpuid);
824 
825 	/*
826 	 * We can handle the PENDING flag immediately.
827 	 */
828 	if (flags & CALLOUT_PENDING) {
829 		sc = &softclock_pcpu_ary[gd->gd_cpuid];
830 		if (sc->next == c)
831 			sc->next = TAILQ_NEXT(c, c_links.tqe);
832 		TAILQ_REMOVE(
833 			&sc->callwheel[c->c_time & cwheelmask],
834 			c,
835 			c_links.tqe);
836 		c->c_func = NULL;
837 	}
838 
839 	/*
840 	 * Transition to the stopped state and decrement the IPI count.
841 	 * Leave the EXECUTED bit alone (the next callout_reset() will
842 	 * have to deal with it).
843 	 */
844 	for (;;) {
845 		flags = c->c_flags;
846 		cpu_ccfence();
847 		nflags = (flags - 1) & ~(CALLOUT_ACTIVE |
848 					 CALLOUT_PENDING |
849 					 CALLOUT_WAITING);
850 
851 		if (atomic_cmpset_int(&c->c_flags, flags, nflags))
852 			break;
853 		cpu_pause();
854 	}
855 	if (flags & CALLOUT_WAITING)
856 		wakeup(c);
857 }
858 
859 int
860 callout_stop(struct callout *c)
861 {
862 	return _callout_stop(c, 0);
863 }
864 
865 int
866 callout_stop_sync(struct callout *c)
867 {
868 	return _callout_stop(c, 1);
869 }
870 
871 void
872 callout_stop_async(struct callout *c)
873 {
874 	_callout_stop(c, 0);
875 }
876 
877 void
878 callout_terminate(struct callout *c)
879 {
880 	_callout_stop(c, 1);
881 	atomic_clear_int(&c->c_flags, CALLOUT_DID_INIT);
882 }
883 
884 /*
885  * Prepare a callout structure for use by callout_reset() and/or
886  * callout_stop().
887  *
888  * The MP version of this routine requires that the callback
889  * function installed by callout_reset() be MP safe.
890  *
891  * The LK version of this routine is also MPsafe and will automatically
892  * acquire the specified lock for the duration of the function call,
893  * and release it after the function returns.  In addition, when autolocking
894  * is used, callout_stop() becomes synchronous if the caller owns the lock.
895  * callout_reset(), callout_stop(), and callout_stop_sync() will block
896  * normally instead of spinning when a cpu race occurs.  Lock cancelation
897  * is used to avoid deadlocks against the callout ring dispatch.
898  *
899  * The init functions can be called from any cpu and do not have to be
900  * called from the cpu that the timer will eventually run on.
901  */
902 static __inline
903 void
904 _callout_init(struct callout *c, int flags)
905 {
906 	bzero(c, sizeof *c);
907 	c->c_flags = flags;
908 }
909 
910 void
911 callout_init(struct callout *c)
912 {
913 	_callout_init(c, CALLOUT_DID_INIT);
914 }
915 
916 void
917 callout_init_mp(struct callout *c)
918 {
919 	_callout_init(c, CALLOUT_DID_INIT | CALLOUT_MPSAFE);
920 }
921 
922 void
923 callout_init_lk(struct callout *c, struct lock *lk)
924 {
925 	_callout_init(c, CALLOUT_DID_INIT | CALLOUT_MPSAFE | CALLOUT_AUTOLOCK);
926 	c->c_lk = lk;
927 }
928