xref: /dragonfly/sys/kern/kern_timeout.c (revision 335b9e93)
1 /*
2  * Copyright (c) 2004,2014,2019 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34 /*
35  * Copyright (c) 1982, 1986, 1991, 1993
36  *	The Regents of the University of California.  All rights reserved.
37  * (c) UNIX System Laboratories, Inc.
38  * All or some portions of this file are derived from material licensed
39  * to the University of California by American Telephone and Telegraph
40  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
41  * the permission of UNIX System Laboratories, Inc.
42  *
43  * Redistribution and use in source and binary forms, with or without
44  * modification, are permitted provided that the following conditions
45  * are met:
46  * 1. Redistributions of source code must retain the above copyright
47  *    notice, this list of conditions and the following disclaimer.
48  * 2. Redistributions in binary form must reproduce the above copyright
49  *    notice, this list of conditions and the following disclaimer in the
50  *    documentation and/or other materials provided with the distribution.
51  * 3. Neither the name of the University nor the names of its contributors
52  *    may be used to endorse or promote products derived from this software
53  *    without specific prior written permission.
54  *
55  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
56  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
57  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
58  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
59  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
60  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
61  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
62  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
63  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
64  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
65  * SUCH DAMAGE.
66  */
67 /*
68  * The original callout mechanism was based on the work of Adam M. Costello
69  * and George Varghese, published in a technical report entitled "Redesigning
70  * the BSD Callout and Timer Facilities" and modified slightly for inclusion
71  * in FreeBSD by Justin T. Gibbs.  The original work on the data structures
72  * used in this implementation was published by G. Varghese and T. Lauck in
73  * the paper "Hashed and Hierarchical Timing Wheels: Data Structures for
74  * the Efficient Implementation of a Timer Facility" in the Proceedings of
75  * the 11th ACM Annual Symposium on Operating Systems Principles,
76  * Austin, Texas Nov 1987.
77  */
78 
79 #include <sys/param.h>
80 #include <sys/systm.h>
81 #include <sys/spinlock.h>
82 #include <sys/callout.h>
83 #include <sys/kernel.h>
84 #include <sys/malloc.h>
85 #include <sys/interrupt.h>
86 #include <sys/thread.h>
87 #include <sys/sysctl.h>
88 #ifdef CALLOUT_TYPESTABLE
89 #include <sys/typestable.h>
90 #endif
91 #include <vm/vm_extern.h>
92 #include <machine/atomic.h>
93 
94 #include <sys/spinlock2.h>
95 #include <sys/thread2.h>
96 #include <sys/mplock2.h>
97 
98 TAILQ_HEAD(colist, _callout);
99 struct softclock_pcpu;
100 struct _callout_mag;
101 
102 /*
103  * DID_INIT	- Sanity check
104  * SYNC		- Synchronous waiter, request SYNCDONE and wakeup()
105  * CANCEL_RES	- Flags that a cancel/stop prevented a callback
106  * STOP_RES
107  * RESET	- Callout_reset request queued
108  * STOP		- Callout_stop request queued
109  * INPROG	- Softclock_handler thread processing in-progress on callout
110  * SET		- Callout is linked to queue (if INPROG not set)
111  * AUTOLOCK	- Lockmgr cancelable interlock
112  * MPSAFE	- Callout is MPSAFE
113  * CANCEL	- callout_cancel requested queued
114  * ACTIVE	- active/inactive tracking (see documentation).  This is
115  *		  *NOT* the same as whether a callout is queued or not.
116  */
117 #define CALLOUT_DID_INIT	0x00000001	/* frontend */
118 #define CALLOUT_UNUSED0002	0x00000002
119 #define CALLOUT_UNUSED0004	0x00000004
120 #define CALLOUT_CANCEL_RES	0x00000008	/* frontend */
121 #define CALLOUT_STOP_RES	0x00000010	/* frontend */
122 #define CALLOUT_RESET		0x00000020	/* backend */
123 #define CALLOUT_STOP		0x00000040	/* backend */
124 #define CALLOUT_INPROG		0x00000080	/* backend */
125 #define CALLOUT_SET		0x00000100	/* backend */
126 #define CALLOUT_AUTOLOCK	0x00000200	/* both */
127 #define CALLOUT_MPSAFE		0x00000400	/* both */
128 #define CALLOUT_CANCEL		0x00000800	/* backend */
129 #define CALLOUT_ACTIVE		0x00001000	/* frontend */
130 
131 struct wheel {
132 	struct spinlock spin;
133 	struct colist	list;
134 };
135 
136 struct softclock_pcpu {
137 	struct wheel	*callwheel;
138 	struct _callout *running;
139 	struct _callout * volatile next;
140 #ifdef CALLOUT_TYPESTABLE
141 	struct _callout *quick_obj;
142 #endif
143 	int		softticks;	/* softticks index */
144 	int		curticks;	/* per-cpu ticks counter */
145 	int		isrunning;
146 	struct thread	thread;
147 };
148 
149 typedef struct softclock_pcpu *softclock_pcpu_t;
150 
151 TAILQ_HEAD(maglist, _callout_mag);
152 
153 #if 0
154 static int callout_debug = 0;
155 SYSCTL_INT(_debug, OID_AUTO, callout_debug, CTLFLAG_RW,
156 	   &callout_debug, 0, "");
157 #endif
158 
159 #ifdef CALLOUT_TYPESTABLE
160 static MALLOC_DEFINE(M_CALLOUT, "callouts", "softclock callouts");
161 #endif
162 
163 static int cwheelsize;
164 static int cwheelmask;
165 static softclock_pcpu_t softclock_pcpu_ary[MAXCPU];
166 #ifdef CALLOUT_TYPESTABLE
167 static struct typestable_glob callout_tsg;
168 #endif
169 
170 static void softclock_handler(void *arg);
171 static void slotimer_callback(void *arg);
172 
173 #ifdef CALLOUT_TYPESTABLE
174 /*
175  * typestable callback functions.  The init function pre-initializes
176  * the structure in order to allow for reuse without complete
177  * reinitialization (i.e. our spinlock).
178  *
179  * The test function allows us to reject an allocation attempt due
180  * to the object being reassociated out-of-band.
181  */
182 static
183 void
184 _callout_typestable_init(void *obj)
185 {
186 	struct _callout *c = obj;
187 
188 	spin_init(&c->spin, "_callout");
189 }
190 
191 /*
192  * Object may have been reassociated out-of-band.
193  *
194  * Return 1 on success with the spin-lock held, allowing reallocation.
195  * Return 0 on failure with no side effects, rejecting reallocation.
196  */
197 static
198 int
199 _callout_typestable_test(void *obj)
200 {
201 	struct _callout *c = obj;
202 
203 	if (c->flags & (CALLOUT_SET | CALLOUT_INPROG))
204 		return 0;
205 	spin_lock(&c->spin);
206 	if (c->flags & (CALLOUT_SET | CALLOUT_INPROG)) {
207 		spin_unlock(&c->spin);
208 		return 0;
209 	} else {
210 		return 1;
211 	}
212 }
213 
214 /*
215  * NOTE: sc might refer to a different cpu.
216  */
217 static __inline
218 void
219 _callout_typestable_free(softclock_pcpu_t sc, void *obj, int tentitive)
220 {
221 	if (tentitive == 0) {
222 		obj = atomic_swap_ptr((void *)&sc->quick_obj, obj);
223 		if (obj == NULL)
224 			return;
225 	}
226 	typestable_free(&callout_tsg, obj, tentitive);
227 }
228 #endif
229 
230 /*
231  * Post-processing helper for a callout executes any pending request.
232  * This routine handles post-processing from the softclock thread and
233  * also handles request processing from the API.
234  *
235  * This routine does not block in any way.
236  * Caller must hold c->spin.
237  *
238  * INPROG  - Callback is in-processing / in-progress.
239  *
240  * SET     - Assigned to queue or is in-processing.  If INPROG is set,
241  *	     however, the _callout is no longer in the queue.
242  *
243  * RESET   - New timeout was installed.
244  *
245  * STOP    - Stop requested.
246  *
247  * ACTIVE  - Set on callout_reset(), cleared by callout_stop()
248  *	     or callout_cancel().  Starts out cleared.
249  *
250  * NOTE: Flags can be adjusted without holding c->spin, so atomic ops
251  *	 must be used at all times.
252  *
253  * NOTE: The passed-in (sc) might refer to another cpu.
254  */
255 static __inline
256 int
257 _callout_process_spinlocked(struct _callout *c, int fromsoftclock)
258 {
259 	struct wheel *wheel;
260 	int res = -1;
261 
262 	/*
263 	 * If a callback manipulates the callout-in-progress we do
264 	 * a partial 'completion' of the operation so the operation
265 	 * can be processed synchronously and tell the softclock_handler
266 	 * to stop messing with it.
267 	 */
268 	if (fromsoftclock == 0 && curthread == &c->qsc->thread &&
269 	    c->qsc->running == c) {
270 		c->qsc->running = NULL;
271 		atomic_clear_int(&c->flags, CALLOUT_SET |
272 					    CALLOUT_INPROG);
273 	}
274 
275 	/*
276 	 * Based on source and state
277 	 */
278 	if (fromsoftclock) {
279 		/*
280 		 * From control thread, INPROG is set, handle pending
281 		 * request and normal termination.
282 		 */
283 #ifdef CALLOUT_TYPESTABLE
284 		KASSERT(c->verifier->toc == c,
285 			("callout corrupt: c=%p %s/%d\n",
286 			 c, c->ident, c->lineno));
287 #else
288 		KASSERT(&c->verifier->toc == c,
289 			("callout corrupt: c=%p %s/%d\n",
290 			 c, c->ident, c->lineno));
291 #endif
292 		if (c->flags & CALLOUT_CANCEL) {
293 			/*
294 			 * CANCEL overrides everything.
295 			 *
296 			 * If a RESET is pending it counts as canceling a
297 			 * running timer.
298 			 */
299 			if (c->flags & CALLOUT_RESET)
300 				atomic_set_int(&c->verifier->flags,
301 					       CALLOUT_CANCEL_RES |
302 					       CALLOUT_STOP_RES);
303 			atomic_clear_int(&c->flags, CALLOUT_SET |
304 						    CALLOUT_INPROG |
305 						    CALLOUT_STOP |
306 						    CALLOUT_CANCEL |
307 						    CALLOUT_RESET);
308 			if (c->waiters)
309 				wakeup(c->verifier);
310 			res = 0;
311 		} else if (c->flags & CALLOUT_RESET) {
312 			/*
313 			 * RESET request pending, requeue appropriately.
314 			 */
315 			atomic_clear_int(&c->flags, CALLOUT_RESET |
316 						    CALLOUT_INPROG);
317 			atomic_set_int(&c->flags, CALLOUT_SET);
318 			c->qsc = c->rsc;
319 			c->qarg = c->rarg;
320 			c->qfunc = c->rfunc;
321 			c->qtick = c->rtick;
322 
323 			/*
324 			 * Do not queue to current or past wheel or the
325 			 * callout will be lost for ages.
326 			 */
327 			wheel = &c->qsc->callwheel[c->qtick & cwheelmask];
328 			spin_lock(&wheel->spin);
329 			while (c->qtick - c->qsc->softticks <= 0) {
330 				c->qtick = c->qsc->softticks + 1;
331 				spin_unlock(&wheel->spin);
332 				wheel = &c->qsc->callwheel[c->qtick &
333 							   cwheelmask];
334 				spin_lock(&wheel->spin);
335 			}
336 			TAILQ_INSERT_TAIL(&wheel->list, c, entry);
337 			spin_unlock(&wheel->spin);
338 		} else {
339 			/*
340 			 * STOP request pending or normal termination.  Since
341 			 * this is from our control thread the callout has
342 			 * already been removed from the queue.
343 			 */
344 			atomic_clear_int(&c->flags, CALLOUT_SET |
345 						    CALLOUT_INPROG |
346 						    CALLOUT_STOP);
347 			if (c->waiters)
348 				wakeup(c->verifier);
349 			res = 1;
350 		}
351 	} else if (c->flags & CALLOUT_SET) {
352 		/*
353 		 * Process request from an API function.  qtick and ACTIVE
354 		 * are stable while we hold c->spin.  Checking INPROG requires
355 		 * holding wheel->spin.
356 		 *
357 		 * If INPROG is set the control thread must handle the request
358 		 * for us.
359 		 */
360 		softclock_pcpu_t sc;
361 
362 		sc = c->qsc;
363 
364 		wheel = &sc->callwheel[c->qtick & cwheelmask];
365 		spin_lock(&wheel->spin);
366 		if (c->flags & CALLOUT_INPROG) {
367 			/*
368 			 * API requests are deferred if a callback is in
369 			 * progress and will be handled after the callback
370 			 * returns.
371 			 */
372 		} else if (c->flags & CALLOUT_CANCEL) {
373 			/*
374 			 * CANCEL request overrides everything except INPROG
375 			 * (for INPROG the CANCEL is handled upon completion).
376 			 */
377 			if (sc->next == c)
378 				sc->next = TAILQ_NEXT(c, entry);
379 			TAILQ_REMOVE(&wheel->list, c, entry);
380 			atomic_set_int(&c->verifier->flags, CALLOUT_CANCEL_RES |
381 							    CALLOUT_STOP_RES);
382 			atomic_clear_int(&c->flags, CALLOUT_STOP |
383 						    CALLOUT_SET |
384 						    CALLOUT_CANCEL |
385 						    CALLOUT_RESET);
386 			if (c->waiters)
387 				wakeup(c->verifier);
388 			res = 0;
389 		} else if (c->flags & CALLOUT_RESET) {
390 			/*
391 			 * RESET request pending, requeue appropriately.
392 			 *
393 			 * (ACTIVE is governed by c->spin so we do not have
394 			 *  to clear it prior to releasing wheel->spin).
395 			 */
396 			if (sc->next == c)
397 				sc->next = TAILQ_NEXT(c, entry);
398 			TAILQ_REMOVE(&wheel->list, c, entry);
399 			spin_unlock(&wheel->spin);
400 
401 			atomic_clear_int(&c->flags, CALLOUT_RESET);
402 			/* remain ACTIVE */
403 			sc = c->rsc;
404 			c->qsc = sc;
405 			c->qarg = c->rarg;
406 			c->qfunc = c->rfunc;
407 			c->qtick = c->rtick;
408 
409 			/*
410 			 * Do not queue to current or past wheel or the
411 			 * callout will be lost for ages.
412 			 */
413 			wheel = &sc->callwheel[c->qtick & cwheelmask];
414 			spin_lock(&wheel->spin);
415 			while (c->qtick - sc->softticks <= 0) {
416 				c->qtick = sc->softticks + 1;
417 				spin_unlock(&wheel->spin);
418 				wheel = &sc->callwheel[c->qtick & cwheelmask];
419 				spin_lock(&wheel->spin);
420 			}
421 			TAILQ_INSERT_TAIL(&wheel->list, c, entry);
422 		} else if (c->flags & CALLOUT_STOP) {
423 			/*
424 			 * STOP request
425 			 */
426 			if (sc->next == c)
427 				sc->next = TAILQ_NEXT(c, entry);
428 			TAILQ_REMOVE(&wheel->list, c, entry);
429 			atomic_set_int(&c->verifier->flags, CALLOUT_STOP_RES);
430 			atomic_clear_int(&c->flags, CALLOUT_STOP |
431 						    CALLOUT_SET);
432 			if (c->waiters)
433 				wakeup(c->verifier);
434 			res = 1;
435 		} else {
436 			/*
437 			 * No request pending (someone else processed the
438 			 * request before we could)
439 			 */
440 			/* nop */
441 		}
442 		spin_unlock(&wheel->spin);
443 	} else {
444 		/*
445 		 * Process request from API function.  callout is not
446 		 * active so there's nothing for us to remove.
447 		 */
448 		KKASSERT((c->flags & CALLOUT_INPROG) == 0);
449 		if (c->flags & CALLOUT_CANCEL) {
450 			/*
451 			 * CANCEL request (nothing to cancel)
452 			 */
453 			if (c->flags & CALLOUT_RESET) {
454 				atomic_set_int(&c->verifier->flags,
455 					       CALLOUT_CANCEL_RES |
456 					       CALLOUT_STOP_RES);
457 			}
458 			atomic_clear_int(&c->flags, CALLOUT_STOP |
459 						    CALLOUT_CANCEL |
460 						    CALLOUT_RESET);
461 			if (c->waiters)
462 				wakeup(c->verifier);
463 			res = 0;
464 		} else if (c->flags & CALLOUT_RESET) {
465 			/*
466 			 * RESET request pending, queue appropriately.
467 			 * Do not queue to currently-processing tick.
468 			 */
469 			softclock_pcpu_t sc;
470 
471 			sc = c->rsc;
472 			atomic_clear_int(&c->flags, CALLOUT_RESET);
473 			atomic_set_int(&c->flags, CALLOUT_SET);
474 			c->qsc = sc;
475 			c->qarg = c->rarg;
476 			c->qfunc = c->rfunc;
477 			c->qtick = c->rtick;
478 
479 			/*
480 			 * Do not queue to current or past wheel or the
481 			 * callout will be lost for ages.
482 			 */
483 			wheel = &sc->callwheel[c->qtick & cwheelmask];
484 			spin_lock(&wheel->spin);
485 			while (c->qtick - sc->softticks <= 0) {
486 				c->qtick = sc->softticks + 1;
487 				spin_unlock(&wheel->spin);
488 				wheel = &sc->callwheel[c->qtick & cwheelmask];
489 				spin_lock(&wheel->spin);
490 			}
491 			TAILQ_INSERT_TAIL(&wheel->list, c, entry);
492 			spin_unlock(&wheel->spin);
493 		} else if (c->flags & CALLOUT_STOP) {
494 			/*
495 			 * STOP request (nothing to stop)
496 			 */
497 			atomic_clear_int(&c->flags, CALLOUT_STOP);
498 			if (c->waiters)
499 				wakeup(c->verifier);
500 			res = 1;
501 		} else {
502 			/*
503 			 * No request pending (someone else processed the
504 			 * request before we could)
505 			 */
506 			/* nop */
507 		}
508 	}
509 	return res;
510 }
511 
512 /*
513  * System init
514  */
515 static void
516 swi_softclock_setup(void *arg)
517 {
518 	int cpu;
519 	int i;
520 	int target;
521 
522 	/*
523 	 * Figure out how large a callwheel we need.  It must be a power of 2.
524 	 *
525 	 * ncallout is primarily based on available memory, don't explode
526 	 * the allocations if the system has a lot of cpus.
527 	 */
528 	target = ncallout / ncpus + 16;
529 
530 	cwheelsize = 1;
531 	while (cwheelsize < target)
532 		cwheelsize <<= 1;
533 	cwheelmask = cwheelsize - 1;
534 
535 #ifdef CALLOUT_TYPESTABLE
536 	typestable_init_glob(&callout_tsg, M_CALLOUT,
537 			     sizeof(struct _callout),
538 			     _callout_typestable_test,
539 			     _callout_typestable_init);
540 #endif
541 
542 	/*
543 	 * Initialize per-cpu data structures.
544 	 */
545 	for (cpu = 0; cpu < ncpus; ++cpu) {
546 		softclock_pcpu_t sc;
547 		int wheel_sz;
548 
549 		sc = (void *)kmem_alloc3(&kernel_map, sizeof(*sc),
550 					 VM_SUBSYS_GD, KM_CPU(cpu));
551 		memset(sc, 0, sizeof(*sc));
552 		softclock_pcpu_ary[cpu] = sc;
553 
554 		wheel_sz = sizeof(*sc->callwheel) * cwheelsize;
555 		sc->callwheel = (void *)kmem_alloc3(&kernel_map, wheel_sz,
556 						    VM_SUBSYS_GD, KM_CPU(cpu));
557 		memset(sc->callwheel, 0, wheel_sz);
558 		for (i = 0; i < cwheelsize; ++i) {
559 			spin_init(&sc->callwheel[i].spin, "wheel");
560 			TAILQ_INIT(&sc->callwheel[i].list);
561 		}
562 
563 		/*
564 		 * Mark the softclock handler as being an interrupt thread
565 		 * even though it really isn't, but do not allow it to
566 		 * preempt other threads (do not assign td_preemptable).
567 		 *
568 		 * Kernel code now assumes that callouts do not preempt
569 		 * the cpu they were scheduled on.
570 		 */
571 		lwkt_create(softclock_handler, sc, NULL, &sc->thread,
572 			    TDF_NOSTART | TDF_INTTHREAD,
573 			    cpu, "softclock %d", cpu);
574 	}
575 }
576 
577 /*
578  * Must occur after ncpus has been initialized.
579  */
580 SYSINIT(softclock_setup, SI_BOOT2_SOFTCLOCK, SI_ORDER_SECOND,
581 	swi_softclock_setup, NULL);
582 
583 /*
584  * This routine is called from the hardclock() (basically a FASTint/IPI) on
585  * each cpu in the system.  sc->curticks is this cpu's notion of the timebase.
586  * It IS NOT NECESSARILY SYNCHRONIZED WITH 'ticks'!  sc->softticks is where
587  * the callwheel is currently indexed.
588  *
589  * sc->softticks is adjusted by either this routine or our helper thread
590  * depending on whether the helper thread is running or not.
591  *
592  * sc->curticks and sc->softticks are adjusted using atomic ops in order
593  * to ensure that remote cpu callout installation does not race the thread.
594  */
595 void
596 hardclock_softtick(globaldata_t gd)
597 {
598 	softclock_pcpu_t sc;
599 	struct wheel *wheel;
600 
601 	sc = softclock_pcpu_ary[gd->gd_cpuid];
602 	atomic_add_int(&sc->curticks, 1);
603 	if (sc->isrunning)
604 		return;
605 	if (sc->softticks == sc->curticks) {
606 		/*
607 		 * In sync, only wakeup the thread if there is something to
608 		 * do.
609 		 */
610 		wheel = &sc->callwheel[sc->softticks & cwheelmask];
611 		spin_lock(&wheel->spin);
612 		if (TAILQ_FIRST(&wheel->list)) {
613 			sc->isrunning = 1;
614 			spin_unlock(&wheel->spin);
615 			lwkt_schedule(&sc->thread);
616 		} else {
617 			atomic_add_int(&sc->softticks, 1);
618 			spin_unlock(&wheel->spin);
619 		}
620 	} else {
621 		/*
622 		 * out of sync, wakeup the thread unconditionally so it can
623 		 * catch up.
624 		 */
625 		sc->isrunning = 1;
626 		lwkt_schedule(&sc->thread);
627 	}
628 }
629 
630 /*
631  * This procedure is the main loop of our per-cpu helper thread.  The
632  * sc->isrunning flag prevents us from racing hardclock_softtick().
633  *
634  * The thread starts with the MP lock released and not in a critical
635  * section.  The loop itself is MP safe while individual callbacks
636  * may or may not be, so we obtain or release the MP lock as appropriate.
637  */
638 static void
639 softclock_handler(void *arg)
640 {
641 	softclock_pcpu_t sc;
642 	struct _callout *c;
643 	struct wheel *wheel;
644 	struct callout slotimer;
645 	int mpsafe = 1;
646 
647 	/*
648 	 * Setup pcpu slow clocks which we want to run from the callout
649 	 * thread.
650 	 */
651 	callout_init_mp(&slotimer);
652 	callout_reset(&slotimer, hz * 10, slotimer_callback, &slotimer);
653 
654 	/*
655 	 * Run the callout thread at the same priority as other kernel
656 	 * threads so it can be round-robined.
657 	 */
658 	/*lwkt_setpri_self(TDPRI_SOFT_NORM);*/
659 
660 	sc = arg;
661 loop:
662 	while (sc->softticks != (int)(sc->curticks + 1)) {
663 		wheel = &sc->callwheel[sc->softticks & cwheelmask];
664 
665 		spin_lock(&wheel->spin);
666 		sc->next = TAILQ_FIRST(&wheel->list);
667 		while ((c = sc->next) != NULL) {
668 			int error;
669 			int res;
670 
671 			/*
672 			 * Match callouts for this tick.  The wheel spinlock
673 			 * is sufficient to set INPROG.  Once set, other
674 			 * threads can make only limited changes to (c)
675 			 */
676 			sc->next = TAILQ_NEXT(c, entry);
677 			if (c->qtick != sc->softticks)
678 				continue;
679 			TAILQ_REMOVE(&wheel->list, c, entry);
680 			atomic_set_int(&c->flags, CALLOUT_INPROG);
681 			sc->running = c;
682 			spin_unlock(&wheel->spin);
683 
684 			/*
685 			 * legacy mplock support
686 			 */
687 			if (c->flags & CALLOUT_MPSAFE) {
688 				if (mpsafe == 0) {
689 					mpsafe = 1;
690 					rel_mplock();
691 				}
692 			} else {
693 				if (mpsafe) {
694 					mpsafe = 0;
695 					get_mplock();
696 				}
697 			}
698 
699 			/*
700 			 * Execute function (protected by INPROG)
701 			 */
702 			if (c->flags & (CALLOUT_STOP | CALLOUT_CANCEL)) {
703 				/*
704 				 * Raced a stop or cancel request, do
705 				 * not execute.  The processing code
706 				 * thinks its a normal completion so
707 				 * flag the fact that cancel/stop actually
708 				 * prevented a callout here.
709 				 */
710 				if (c->flags & CALLOUT_CANCEL) {
711 					atomic_set_int(&c->verifier->flags,
712 						       CALLOUT_CANCEL_RES |
713 						       CALLOUT_STOP_RES);
714 				} else if (c->flags & CALLOUT_STOP) {
715 					atomic_set_int(&c->verifier->flags,
716 						       CALLOUT_STOP_RES);
717 				}
718 			} else if (c->flags & CALLOUT_RESET) {
719 				/*
720 				 * A RESET raced, make it seem like it
721 				 * didn't.  Do nothing here and let the
722 				 * process routine requeue us.
723 				 */
724 			} else if (c->flags & CALLOUT_AUTOLOCK) {
725 				/*
726 				 * Interlocked cancelable call.  If the
727 				 * lock gets canceled we have to flag the
728 				 * fact that the cancel/stop actually
729 				 * prevented the callout here.
730 				 */
731 				error = lockmgr(c->lk, LK_EXCLUSIVE |
732 						       LK_CANCELABLE);
733 				if (error == 0) {
734 					c->qfunc(c->qarg);
735 					lockmgr(c->lk, LK_RELEASE);
736 				} else if (c->flags & CALLOUT_CANCEL) {
737 					atomic_set_int(&c->verifier->flags,
738 						       CALLOUT_CANCEL_RES |
739 						       CALLOUT_STOP_RES);
740 				} else if (c->flags & CALLOUT_STOP) {
741 					atomic_set_int(&c->verifier->flags,
742 						       CALLOUT_STOP_RES);
743 				}
744 			} else {
745 				/*
746 				 * Normal call
747 				 */
748 				c->qfunc(c->qarg);
749 			}
750 
751 			if (sc->running == c) {
752 				/*
753 				 * We are still INPROG so (c) remains valid, but
754 				 * the callout is now governed by its internal
755 				 * spin-lock.
756 				 */
757 				spin_lock(&c->spin);
758 				res = _callout_process_spinlocked(c, 1);
759 				spin_unlock(&c->spin);
760 #ifdef CALLOUT_TYPESTABLE
761 				if (res >= 0)
762 					_callout_typestable_free(sc, c, res);
763 #endif
764 			}
765 			spin_lock(&wheel->spin);
766 		}
767 		sc->running = NULL;
768 		spin_unlock(&wheel->spin);
769 		atomic_add_int(&sc->softticks, 1);
770 	}
771 
772 	/*
773 	 * Don't leave us holding the MP lock when we deschedule ourselves.
774 	 */
775 	if (mpsafe == 0) {
776 		mpsafe = 1;
777 		rel_mplock();
778 	}
779 
780 	/*
781 	 * Recheck in critical section to interlock against hardlock
782 	 */
783 	crit_enter();
784 	if (sc->softticks == (int)(sc->curticks + 1)) {
785 		sc->isrunning = 0;
786 		lwkt_deschedule_self(&sc->thread);	/* == curthread */
787 		lwkt_switch();
788 	}
789 	crit_exit();
790 	goto loop;
791 	/* NOT REACHED */
792 }
793 
794 /*
795  * A very slow system cleanup timer (10 second interval),
796  * per-cpu.
797  */
798 void
799 slotimer_callback(void *arg)
800 {
801 	struct callout *c = arg;
802 
803 	slab_cleanup();
804 	callout_reset(c, hz * 10, slotimer_callback, c);
805 }
806 
807 /*
808  * API FUNCTIONS
809  */
810 
811 /*
812  * Prepare a callout structure for use by callout_reset() and/or
813  * callout_stop().
814  *
815  * The MP version of this routine requires that the callback
816  * function installed by callout_reset() be MP safe.
817  *
818  * The LK version of this routine is also MPsafe and will automatically
819  * acquire the specified lock for the duration of the function call,
820  * and release it after the function returns.  In addition, when autolocking
821  * is used, callout_stop() becomes synchronous if the caller owns the lock.
822  * callout_reset(), callout_stop(), and callout_cancel() will block
823  * normally instead of spinning when a cpu race occurs.  Lock cancelation
824  * is used to avoid deadlocks against the callout ring dispatch.
825  *
826  * The init functions can be called from any cpu and do not have to be
827  * called from the cpu that the timer will eventually run on.
828  */
829 static __inline void
830 _callout_setup(struct callout *cc, int flags CALLOUT_DEBUG_ARGS)
831 {
832 	bzero(cc, sizeof(*cc));
833 	cc->flags = flags;		/* frontend flags */
834 #ifdef CALLOUT_DEBUG
835 #ifdef CALLOUT_TYPESTABLE
836 	cc->ident = ident;
837 	cc->lineno = lineno;
838 #else
839 	cc->toc.verifier = cc;		/* corruption detector */
840 	cc->toc.ident = ident;
841 	cc->toc.lineno = lineno;
842 	cc->toc.flags = flags;		/* backend flags */
843 #endif
844 #endif
845 }
846 
847 /*
848  * Associate an internal _callout with the external callout and
849  * verify that the type-stable structure is still applicable (inactive
850  * type-stable _callouts might have been reused for a different callout).
851  * If not, a new internal structure will be allocated.
852  *
853  * Returns the _callout already spin-locked.
854  */
855 static __inline
856 struct _callout *
857 _callout_gettoc(struct callout *cc)
858 {
859 	struct _callout *c;
860 #ifdef CALLOUT_TYPESTABLE
861 	softclock_pcpu_t sc;
862 
863 	KKASSERT(cc->flags & CALLOUT_DID_INIT);
864 	for (;;) {
865 		c = cc->toc;
866 		cpu_ccfence();
867 		if (c == NULL) {
868 			sc = softclock_pcpu_ary[mycpu->gd_cpuid];
869 			c = atomic_swap_ptr((void *)&sc->quick_obj, NULL);
870 			if (c == NULL || _callout_typestable_test(c) == 0)
871 				c = typestable_alloc(&callout_tsg);
872 			/* returns spin-locked */
873 			c->verifier = cc;
874 			c->flags = cc->flags;
875 			c->lk = cc->lk;
876 			c->ident = cc->ident;
877 			c->lineno = cc->lineno;
878 			if (atomic_cmpset_ptr(&cc->toc, NULL, c)) {
879 				break;
880 			}
881 			c->verifier = NULL;
882 			spin_unlock(&c->spin);
883 			_callout_typestable_free(sc, c, 0);
884 		} else {
885 			spin_lock(&c->spin);
886 			if (c->verifier == cc)
887 				break;
888 			spin_unlock(&c->spin);
889 			/* ok if atomic op fails */
890 			(void)atomic_cmpset_ptr(&cc->toc, c, NULL);
891 		}
892 	}
893 #else
894 	c = &cc->toc;
895 	spin_lock(&c->spin);
896 #endif
897 	/* returns with spin-lock held */
898 	return c;
899 }
900 
901 /*
902  * Macrod in sys/callout.h for debugging
903  *
904  * WARNING! tsleep() assumes this will not block
905  */
906 void
907 _callout_init(struct callout *cc CALLOUT_DEBUG_ARGS)
908 {
909 	_callout_setup(cc, CALLOUT_DID_INIT
910 			CALLOUT_DEBUG_PASSTHRU);
911 }
912 
913 void
914 _callout_init_mp(struct callout *cc CALLOUT_DEBUG_ARGS)
915 {
916 	_callout_setup(cc, CALLOUT_DID_INIT | CALLOUT_MPSAFE
917 			CALLOUT_DEBUG_PASSTHRU);
918 }
919 
920 void
921 _callout_init_lk(struct callout *cc, struct lock *lk CALLOUT_DEBUG_ARGS)
922 {
923 	_callout_setup(cc, CALLOUT_DID_INIT | CALLOUT_MPSAFE |
924 			   CALLOUT_AUTOLOCK
925 			CALLOUT_DEBUG_PASSTHRU);
926 #ifdef CALLOUT_TYPESTABLE
927 	cc->lk = lk;
928 #else
929 	cc->toc.lk = lk;
930 #endif
931 }
932 
933 /*
934  * Start or restart a timeout.  New timeouts can be installed while the
935  * current one is running.
936  *
937  * Start or restart a timeout.  Installs the callout structure on the
938  * callwheel of the current cpu.  Callers may legally pass any value, even
939  * if 0 or negative, but since the sc->curticks index may have already
940  * been processed a minimum timeout of 1 tick will be enforced.
941  *
942  * This function will not deadlock against a running call.
943  *
944  * WARNING! tsleep() assumes this will not block
945  */
946 void
947 callout_reset(struct callout *cc, int to_ticks, void (*ftn)(void *), void *arg)
948 {
949 	softclock_pcpu_t sc;
950 	struct _callout *c;
951 	int res;
952 
953 	atomic_set_int(&cc->flags, CALLOUT_ACTIVE);
954 	c = _callout_gettoc(cc);
955 
956 	/*
957 	 * Set RESET.  Do not clear STOP here (let the process code do it).
958 	 */
959 	atomic_set_int(&c->flags, CALLOUT_RESET);
960 	sc = softclock_pcpu_ary[mycpu->gd_cpuid];
961 	c->rsc = sc;
962 	c->rtick = sc->curticks + to_ticks;
963 	c->rfunc = ftn;
964 	c->rarg = arg;
965 #ifdef CALLOUT_TYPESTABLE
966 	cc->arg = arg;	/* only used by callout_arg() */
967 #endif
968 	res = _callout_process_spinlocked(c, 0);
969 	spin_unlock(&c->spin);
970 #ifdef CALLOUT_TYPESTABLE
971 	if (res >= 0)
972 		_callout_typestable_free(sc, c, res);
973 #endif
974 }
975 
976 /*
977  * Same as callout_reset() but the timeout will run on a particular cpu.
978  */
979 void
980 callout_reset_bycpu(struct callout *cc, int to_ticks, void (*ftn)(void *),
981 		    void *arg, int cpuid)
982 {
983 	softclock_pcpu_t sc;
984 	struct _callout *c;
985 	int res;
986 
987 	atomic_set_int(&cc->flags, CALLOUT_ACTIVE);
988 	c = _callout_gettoc(cc);
989 
990 	/*
991 	 * Set RESET.  Do not clear STOP here (let the process code do it).
992 	 */
993 	atomic_set_int(&c->flags, CALLOUT_RESET);
994 
995 	sc = softclock_pcpu_ary[cpuid];
996 	c->rsc = sc;
997 	c->rtick = sc->curticks + to_ticks;
998 	c->rfunc = ftn;
999 	c->rarg = arg;
1000 #ifdef CALLOUT_TYPESTABLE
1001 	cc->arg = arg;	/* only used by callout_arg() */
1002 #endif
1003 	res = _callout_process_spinlocked(c, 0);
1004 	spin_unlock(&c->spin);
1005 #ifdef CALLOUT_TYPESTABLE
1006 	if (res >= 0)
1007 		_callout_typestable_free(sc, c, res);
1008 #endif
1009 }
1010 
1011 static __inline
1012 void
1013 _callout_cancel_or_stop(struct callout *cc, uint32_t flags)
1014 {
1015 	struct _callout *c;
1016 	softclock_pcpu_t sc;
1017 	int res;
1018 
1019 #ifdef CALLOUT_TYPESTABLE
1020 	if (cc->toc == NULL || cc->toc->verifier != cc)
1021 		return;
1022 #else
1023 	KKASSERT(cc->toc.verifier == cc);
1024 #endif
1025 	/*
1026 	 * Setup for synchronous
1027 	 */
1028 	atomic_clear_int(&cc->flags, CALLOUT_ACTIVE);
1029 	c = _callout_gettoc(cc);
1030 
1031 	/*
1032 	 * Set STOP or CANCEL request.  If this is a STOP, clear a queued
1033 	 * RESET now.
1034 	 */
1035 	atomic_set_int(&c->flags, flags);
1036 	if (flags & CALLOUT_STOP) {
1037 		if (c->flags & CALLOUT_RESET) {
1038 			atomic_set_int(&cc->flags, CALLOUT_STOP_RES);
1039 			atomic_clear_int(&c->flags, CALLOUT_RESET);
1040 		}
1041 	}
1042 	sc = softclock_pcpu_ary[mycpu->gd_cpuid];
1043 	res = _callout_process_spinlocked(c, 0);
1044 	spin_unlock(&c->spin);
1045 #ifdef CALLOUT_TYPESTABLE
1046 	if (res >= 0)
1047 		_callout_typestable_free(sc, c, res);
1048 #endif
1049 
1050 	/*
1051 	 * Wait for the CANCEL or STOP to finish.
1052 	 *
1053 	 * WARNING! (c) can go stale now, so do not use (c) after this
1054 	 *	    point. XXX
1055 	 */
1056 	if (c->flags & flags) {
1057 		atomic_add_int(&c->waiters, 1);
1058 #ifdef CALLOUT_TYPESTABLE
1059 		if (cc->flags & CALLOUT_AUTOLOCK)
1060 			lockmgr(cc->lk, LK_CANCEL_BEG);
1061 #else
1062 		if (cc->flags & CALLOUT_AUTOLOCK)
1063 			lockmgr(c->lk, LK_CANCEL_BEG);
1064 #endif
1065 		for (;;) {
1066 			tsleep_interlock(cc, 0);
1067 			if ((atomic_fetchadd_int(&c->flags, 0) & flags) == 0)
1068 				break;
1069 			tsleep(cc, PINTERLOCKED, "costp", 0);
1070 		}
1071 #ifdef CALLOUT_TYPESTABLE
1072 		if (cc->flags & CALLOUT_AUTOLOCK)
1073 			lockmgr(cc->lk, LK_CANCEL_END);
1074 #else
1075 		if (cc->flags & CALLOUT_AUTOLOCK)
1076 			lockmgr(c->lk, LK_CANCEL_END);
1077 #endif
1078 		atomic_add_int(&c->waiters, -1);
1079 	}
1080 	KKASSERT(cc->toc.verifier == cc);
1081 }
1082 
1083 /*
1084  * This is a synchronous STOP which cancels the callout.  If AUTOLOCK
1085  * then a CANCEL will be issued to the lock holder.  Unlike STOP, the
1086  * cancel function prevents any new callout_reset()s from being issued
1087  * in addition to canceling the lock.  The lock will also be deactivated.
1088  *
1089  * Returns 0 if the callout was not active (or was active and completed,
1090  *	     but didn't try to start a new timeout).
1091  * Returns 1 if the cancel is responsible for stopping the callout.
1092  */
1093 int
1094 callout_cancel(struct callout *cc)
1095 {
1096 	atomic_clear_int(&cc->flags, CALLOUT_CANCEL_RES);
1097 	_callout_cancel_or_stop(cc, CALLOUT_CANCEL);
1098 
1099 	return ((cc->flags & CALLOUT_CANCEL_RES) ? 1 : 0);
1100 }
1101 
1102 /*
1103  * Currently the same as callout_cancel.  Ultimately we may wish the
1104  * drain function to allow a pending callout to proceed, but for now
1105  * we will attempt to to cancel it.
1106  *
1107  * Returns 0 if the callout was not active (or was active and completed,
1108  *	     but didn't try to start a new timeout).
1109  * Returns 1 if the drain is responsible for stopping the callout.
1110  */
1111 int
1112 callout_drain(struct callout *cc)
1113 {
1114 	atomic_clear_int(&cc->flags, CALLOUT_CANCEL_RES);
1115 	_callout_cancel_or_stop(cc, CALLOUT_CANCEL);
1116 
1117 	return ((cc->flags & CALLOUT_CANCEL_RES) ? 1 : 0);
1118 }
1119 
1120 /*
1121  * Stops a callout if it is pending or queued, does not block.
1122  * This function does not interlock against a callout that is in-progress.
1123  *
1124  * Returns whether the STOP operation was responsible for removing a
1125  * queued or pending callout.
1126  */
1127 int
1128 callout_stop_async(struct callout *cc)
1129 {
1130 	softclock_pcpu_t sc;
1131 	struct _callout *c;
1132 	uint32_t flags;
1133 	int res;
1134 
1135 	atomic_clear_int(&cc->flags, CALLOUT_STOP_RES | CALLOUT_ACTIVE);
1136 #ifdef CALLOUT_TYPESTABLE
1137 	if (cc->toc == NULL || cc->toc->verifier != cc)
1138 		return 0;
1139 #else
1140 	KKASSERT(cc->toc.verifier == cc);
1141 #endif
1142 	c = _callout_gettoc(cc);
1143 
1144 	/*
1145 	 * Set STOP or CANCEL request.  If this is a STOP, clear a queued
1146 	 * RESET now.
1147 	 */
1148 	atomic_set_int(&c->flags, CALLOUT_STOP);
1149 	if (c->flags & CALLOUT_RESET) {
1150 		atomic_set_int(&cc->flags, CALLOUT_STOP_RES);
1151 		atomic_clear_int(&c->flags, CALLOUT_RESET);
1152 	}
1153 	sc = softclock_pcpu_ary[mycpu->gd_cpuid];
1154 	res = _callout_process_spinlocked(c, 0);
1155 	flags = cc->flags;
1156 	spin_unlock(&c->spin);
1157 #ifdef CALLOUT_TYPESTABLE
1158 	if (res >= 0)
1159 		_callout_typestable_free(sc, c, res);
1160 #endif
1161 
1162 	return ((flags & CALLOUT_STOP_RES) ? 1 : 0);
1163 }
1164 
1165 /*
1166  * Callout deactivate merely clears the CALLOUT_ACTIVE bit
1167  * Stops a callout if it is pending or queued, does not block.
1168  * This function does not interlock against a callout that is in-progress.
1169  */
1170 void
1171 callout_deactivate(struct callout *cc)
1172 {
1173 	atomic_clear_int(&cc->flags, CALLOUT_ACTIVE);
1174 }
1175 
1176 /*
1177  * lock-aided callouts are STOPped synchronously using STOP semantics
1178  * (meaning that another thread can start the callout again before we
1179  * return).
1180  *
1181  * non-lock-aided callouts
1182  *
1183  * Stops a callout if it is pending or queued, does not block.
1184  * This function does not interlock against a callout that is in-progress.
1185  */
1186 int
1187 callout_stop(struct callout *cc)
1188 {
1189 	if (cc->flags & CALLOUT_AUTOLOCK) {
1190 		atomic_clear_int(&cc->flags, CALLOUT_STOP_RES);
1191 		_callout_cancel_or_stop(cc, CALLOUT_STOP);
1192 		return ((cc->flags & CALLOUT_STOP_RES) ? 1 : 0);
1193 	} else {
1194 		return callout_stop_async(cc);
1195 	}
1196 }
1197 
1198 /*
1199  * Terminates a callout by canceling operations and then clears the
1200  * INIT bit.  Upon return, the callout structure must not be used.
1201  */
1202 void
1203 callout_terminate(struct callout *cc)
1204 {
1205 	_callout_cancel_or_stop(cc, CALLOUT_CANCEL);
1206 	atomic_clear_int(&cc->flags, CALLOUT_DID_INIT);
1207 #ifdef CALLOUT_TYPESTABLE
1208 	atomic_swap_ptr((void *)&cc->toc, NULL);
1209 #else
1210 	cc->toc.verifier = NULL;
1211 #endif
1212 }
1213 
1214 /*
1215  * Returns whether a callout is queued and the time has not yet
1216  * arrived (the callout is not yet in-progress).
1217  */
1218 int
1219 callout_pending(struct callout *cc)
1220 {
1221 	struct _callout *c;
1222 	int res = 0;
1223 
1224 	/*
1225 	 * Don't instantiate toc to test pending
1226 	 */
1227 #ifdef CALLOUT_TYPESTABLE
1228 	if ((c = cc->toc) != NULL) {
1229 #else
1230 	c = &cc->toc;
1231 	KKASSERT(c->verifier == cc);
1232 	{
1233 #endif
1234 		spin_lock(&c->spin);
1235 		if (c->verifier == cc) {
1236 			res = ((c->flags & (CALLOUT_SET|CALLOUT_INPROG)) ==
1237 			       CALLOUT_SET);
1238 		}
1239 		spin_unlock(&c->spin);
1240 	}
1241 	return res;
1242 }
1243 
1244 /*
1245  * Returns whether a callout is active or not.  A callout is active when
1246  * a timeout is set and remains active upon normal termination, even if
1247  * it does not issue a new timeout.  A callout is inactive if a timeout has
1248  * never been set or if the callout has been stopped or canceled.  The next
1249  * timeout that is set will re-set the active state.
1250  */
1251 int
1252 callout_active(struct callout *cc)
1253 {
1254 	return ((cc->flags & CALLOUT_ACTIVE) ? 1 : 0);
1255 }
1256