xref: /dragonfly/sys/kern/kern_timeout.c (revision 3c7e5806)
1 /*
2  * Copyright (c) 2004,2014,2019 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34 /*
35  * Copyright (c) 1982, 1986, 1991, 1993
36  *	The Regents of the University of California.  All rights reserved.
37  * (c) UNIX System Laboratories, Inc.
38  * All or some portions of this file are derived from material licensed
39  * to the University of California by American Telephone and Telegraph
40  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
41  * the permission of UNIX System Laboratories, Inc.
42  *
43  * Redistribution and use in source and binary forms, with or without
44  * modification, are permitted provided that the following conditions
45  * are met:
46  * 1. Redistributions of source code must retain the above copyright
47  *    notice, this list of conditions and the following disclaimer.
48  * 2. Redistributions in binary form must reproduce the above copyright
49  *    notice, this list of conditions and the following disclaimer in the
50  *    documentation and/or other materials provided with the distribution.
51  * 3. Neither the name of the University nor the names of its contributors
52  *    may be used to endorse or promote products derived from this software
53  *    without specific prior written permission.
54  *
55  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
56  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
57  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
58  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
59  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
60  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
61  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
62  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
63  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
64  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
65  * SUCH DAMAGE.
66  */
67 /*
68  * The original callout mechanism was based on the work of Adam M. Costello
69  * and George Varghese, published in a technical report entitled "Redesigning
70  * the BSD Callout and Timer Facilities" and modified slightly for inclusion
71  * in FreeBSD by Justin T. Gibbs.  The original work on the data structures
72  * used in this implementation was published by G. Varghese and T. Lauck in
73  * the paper "Hashed and Hierarchical Timing Wheels: Data Structures for
74  * the Efficient Implementation of a Timer Facility" in the Proceedings of
75  * the 11th ACM Annual Symposium on Operating Systems Principles,
76  * Austin, Texas Nov 1987.
77  */
78 
79 #include <sys/param.h>
80 #include <sys/systm.h>
81 #include <sys/spinlock.h>
82 #include <sys/callout.h>
83 #include <sys/kernel.h>
84 #include <sys/interrupt.h>
85 #include <sys/thread.h>
86 #include <sys/sysctl.h>
87 #ifdef CALLOUT_TYPESTABLE
88 #include <sys/typestable.h>
89 #endif
90 #include <vm/vm_extern.h>
91 #include <machine/atomic.h>
92 
93 #include <sys/spinlock2.h>
94 #include <sys/thread2.h>
95 #include <sys/mplock2.h>
96 
97 TAILQ_HEAD(colist, _callout);
98 struct softclock_pcpu;
99 struct _callout_mag;
100 
101 /*
102  * DID_INIT	- Sanity check
103  * SYNC		- Synchronous waiter, request SYNCDONE and wakeup()
104  * CANCEL_RES	- Flags that a cancel/stop prevented a callback
105  * STOP_RES
106  * RESET	- Callout_reset request queued
107  * STOP		- Callout_stop request queued
108  * INPROG	- Softclock_handler thread processing in-progress on callout
109  * SET		- Callout is linked to queue (if INPROG not set)
110  * AUTOLOCK	- Lockmgr cancelable interlock
111  * MPSAFE	- Callout is MPSAFE
112  * CANCEL	- callout_cancel requested queued
113  * ACTIVE	- active/inactive tracking (see documentation).  This is
114  *		  *NOT* the same as whether a callout is queued or not.
115  */
116 #define CALLOUT_DID_INIT	0x00000001	/* frontend */
117 #define CALLOUT_UNUSED0002	0x00000002
118 #define CALLOUT_UNUSED0004	0x00000004
119 #define CALLOUT_CANCEL_RES	0x00000008	/* frontend */
120 #define CALLOUT_STOP_RES	0x00000010	/* frontend */
121 #define CALLOUT_RESET		0x00000020	/* backend */
122 #define CALLOUT_STOP		0x00000040	/* backend */
123 #define CALLOUT_INPROG		0x00000080	/* backend */
124 #define CALLOUT_SET		0x00000100	/* backend */
125 #define CALLOUT_AUTOLOCK	0x00000200	/* both */
126 #define CALLOUT_MPSAFE		0x00000400	/* both */
127 #define CALLOUT_CANCEL		0x00000800	/* backend */
128 #define CALLOUT_ACTIVE		0x00001000	/* frontend */
129 
130 struct wheel {
131 	struct spinlock spin;
132 	struct colist	list;
133 };
134 
135 struct softclock_pcpu {
136 	struct wheel	*callwheel;
137 	struct _callout *running;
138 	struct _callout * volatile next;
139 #ifdef CALLOUT_TYPESTABLE
140 	struct _callout *quick_obj;
141 #endif
142 	int		softticks;	/* softticks index */
143 	int		curticks;	/* per-cpu ticks counter */
144 	int		isrunning;
145 	struct thread	thread;
146 };
147 
148 typedef struct softclock_pcpu *softclock_pcpu_t;
149 
150 TAILQ_HEAD(maglist, _callout_mag);
151 
152 #if 0
153 static int callout_debug = 0;
154 SYSCTL_INT(_debug, OID_AUTO, callout_debug, CTLFLAG_RW,
155 	   &callout_debug, 0, "");
156 #endif
157 
158 #ifdef CALLOUT_TYPESTABLE
159 static MALLOC_DEFINE(M_CALLOUT, "callouts", "softclock callouts");
160 #endif
161 
162 static int cwheelsize;
163 static int cwheelmask;
164 static softclock_pcpu_t softclock_pcpu_ary[MAXCPU];
165 #ifdef CALLOUT_TYPESTABLE
166 static struct typestable_glob callout_tsg;
167 #endif
168 
169 static void softclock_handler(void *arg);
170 static void slotimer_callback(void *arg);
171 
172 #ifdef CALLOUT_TYPESTABLE
173 /*
174  * typestable callback functions.  The init function pre-initializes
175  * the structure in order to allow for reuse without complete
176  * reinitialization (i.e. our spinlock).
177  *
178  * The test function allows us to reject an allocation attempt due
179  * to the object being reassociated out-of-band.
180  */
181 static
182 void
183 _callout_typestable_init(void *obj)
184 {
185 	struct _callout *c = obj;
186 
187 	spin_init(&c->spin, "_callout");
188 }
189 
190 /*
191  * Object may have been reassociated out-of-band.
192  *
193  * Return 1 on success with the spin-lock held, allowing reallocation.
194  * Return 0 on failure with no side effects, rejecting reallocation.
195  */
196 static
197 int
198 _callout_typestable_test(void *obj)
199 {
200 	struct _callout *c = obj;
201 
202 	if (c->flags & (CALLOUT_SET | CALLOUT_INPROG))
203 		return 0;
204 	spin_lock(&c->spin);
205 	if (c->flags & (CALLOUT_SET | CALLOUT_INPROG)) {
206 		spin_unlock(&c->spin);
207 		return 0;
208 	} else {
209 		return 1;
210 	}
211 }
212 
213 /*
214  * NOTE: sc might refer to a different cpu.
215  */
216 static __inline
217 void
218 _callout_typestable_free(softclock_pcpu_t sc, void *obj, int tentitive)
219 {
220 	if (tentitive == 0) {
221 		obj = atomic_swap_ptr((void *)&sc->quick_obj, obj);
222 		if (obj == NULL)
223 			return;
224 	}
225 	typestable_free(&callout_tsg, obj, tentitive);
226 }
227 #endif
228 
229 /*
230  * Post-processing helper for a callout executes any pending request.
231  * This routine handles post-processing from the softclock thread and
232  * also handles request processing from the API.
233  *
234  * This routine does not block in any way.
235  * Caller must hold c->spin.
236  *
237  * INPROG  - Callback is in-processing / in-progress.
238  *
239  * SET     - Assigned to queue or is in-processing.  If INPROG is set,
240  *	     however, the _callout is no longer in the queue.
241  *
242  * RESET   - New timeout was installed.
243  *
244  * STOP    - Stop requested.
245  *
246  * ACTIVE  - Set on callout_reset(), cleared by callout_stop()
247  *	     or callout_cancel().  Starts out cleared.
248  *
249  * NOTE: Flags can be adjusted without holding c->spin, so atomic ops
250  *	 must be used at all times.
251  *
252  * NOTE: The passed-in (sc) might refer to another cpu.
253  */
254 static __inline
255 int
256 _callout_process_spinlocked(struct _callout *c, int fromsoftclock)
257 {
258 	struct wheel *wheel;
259 	int res = -1;
260 
261 	/*
262 	 * If a callback manipulates the callout-in-progress we do
263 	 * a partial 'completion' of the operation so the operation
264 	 * can be processed synchronously and tell the softclock_handler
265 	 * to stop messing with it.
266 	 */
267 	if (fromsoftclock == 0 && curthread == &c->qsc->thread &&
268 	    c->qsc->running == c) {
269 		c->qsc->running = NULL;
270 		atomic_clear_int(&c->flags, CALLOUT_SET |
271 					    CALLOUT_INPROG);
272 	}
273 
274 	/*
275 	 * Based on source and state
276 	 */
277 	if (fromsoftclock) {
278 		/*
279 		 * From control thread, INPROG is set, handle pending
280 		 * request and normal termination.
281 		 */
282 #ifdef CALLOUT_TYPESTABLE
283 		KASSERT(c->verifier->toc == c,
284 			("callout corrupt: c=%p %s/%d\n",
285 			 c, c->ident, c->lineno));
286 #else
287 		KASSERT(&c->verifier->toc == c,
288 			("callout corrupt: c=%p %s/%d\n",
289 			 c, c->ident, c->lineno));
290 #endif
291 		if (c->flags & CALLOUT_CANCEL) {
292 			/*
293 			 * CANCEL overrides everything.
294 			 *
295 			 * If a RESET is pending it counts as canceling a
296 			 * running timer.
297 			 */
298 			if (c->flags & CALLOUT_RESET)
299 				atomic_set_int(&c->verifier->flags,
300 					       CALLOUT_CANCEL_RES |
301 					       CALLOUT_STOP_RES);
302 			atomic_clear_int(&c->flags, CALLOUT_SET |
303 						    CALLOUT_INPROG |
304 						    CALLOUT_STOP |
305 						    CALLOUT_CANCEL |
306 						    CALLOUT_RESET);
307 			if (c->waiters)
308 				wakeup(c->verifier);
309 			res = 0;
310 		} else if (c->flags & CALLOUT_RESET) {
311 			/*
312 			 * RESET request pending, requeue appropriately.
313 			 */
314 			atomic_clear_int(&c->flags, CALLOUT_RESET |
315 						    CALLOUT_INPROG);
316 			atomic_set_int(&c->flags, CALLOUT_SET);
317 			c->qsc = c->rsc;
318 			c->qarg = c->rarg;
319 			c->qfunc = c->rfunc;
320 			c->qtick = c->rtick;
321 
322 			/*
323 			 * Do not queue to current or past wheel or the
324 			 * callout will be lost for ages.
325 			 */
326 			wheel = &c->qsc->callwheel[c->qtick & cwheelmask];
327 			spin_lock(&wheel->spin);
328 			while (c->qtick - c->qsc->softticks <= 0) {
329 				c->qtick = c->qsc->softticks + 1;
330 				spin_unlock(&wheel->spin);
331 				wheel = &c->qsc->callwheel[c->qtick &
332 							   cwheelmask];
333 				spin_lock(&wheel->spin);
334 			}
335 			TAILQ_INSERT_TAIL(&wheel->list, c, entry);
336 			spin_unlock(&wheel->spin);
337 		} else {
338 			/*
339 			 * STOP request pending or normal termination.  Since
340 			 * this is from our control thread the callout has
341 			 * already been removed from the queue.
342 			 */
343 			atomic_clear_int(&c->flags, CALLOUT_SET |
344 						    CALLOUT_INPROG |
345 						    CALLOUT_STOP);
346 			if (c->waiters)
347 				wakeup(c->verifier);
348 			res = 1;
349 		}
350 	} else if (c->flags & CALLOUT_SET) {
351 		/*
352 		 * Process request from an API function.  qtick and ACTIVE
353 		 * are stable while we hold c->spin.  Checking INPROG requires
354 		 * holding wheel->spin.
355 		 *
356 		 * If INPROG is set the control thread must handle the request
357 		 * for us.
358 		 */
359 		softclock_pcpu_t sc;
360 
361 		sc = c->qsc;
362 
363 		wheel = &sc->callwheel[c->qtick & cwheelmask];
364 		spin_lock(&wheel->spin);
365 		if (c->flags & CALLOUT_INPROG) {
366 			/*
367 			 * API requests are deferred if a callback is in
368 			 * progress and will be handled after the callback
369 			 * returns.
370 			 */
371 		} else if (c->flags & CALLOUT_CANCEL) {
372 			/*
373 			 * CANCEL request overrides everything except INPROG
374 			 * (for INPROG the CANCEL is handled upon completion).
375 			 */
376 			if (sc->next == c)
377 				sc->next = TAILQ_NEXT(c, entry);
378 			TAILQ_REMOVE(&wheel->list, c, entry);
379 			atomic_set_int(&c->verifier->flags, CALLOUT_CANCEL_RES |
380 							    CALLOUT_STOP_RES);
381 			atomic_clear_int(&c->flags, CALLOUT_STOP |
382 						    CALLOUT_SET |
383 						    CALLOUT_CANCEL |
384 						    CALLOUT_RESET);
385 			if (c->waiters)
386 				wakeup(c->verifier);
387 			res = 0;
388 		} else if (c->flags & CALLOUT_RESET) {
389 			/*
390 			 * RESET request pending, requeue appropriately.
391 			 *
392 			 * (ACTIVE is governed by c->spin so we do not have
393 			 *  to clear it prior to releasing wheel->spin).
394 			 */
395 			if (sc->next == c)
396 				sc->next = TAILQ_NEXT(c, entry);
397 			TAILQ_REMOVE(&wheel->list, c, entry);
398 			spin_unlock(&wheel->spin);
399 
400 			atomic_clear_int(&c->flags, CALLOUT_RESET);
401 			/* remain ACTIVE */
402 			sc = c->rsc;
403 			c->qsc = sc;
404 			c->qarg = c->rarg;
405 			c->qfunc = c->rfunc;
406 			c->qtick = c->rtick;
407 
408 			/*
409 			 * Do not queue to current or past wheel or the
410 			 * callout will be lost for ages.
411 			 */
412 			wheel = &sc->callwheel[c->qtick & cwheelmask];
413 			spin_lock(&wheel->spin);
414 			while (c->qtick - sc->softticks <= 0) {
415 				c->qtick = sc->softticks + 1;
416 				spin_unlock(&wheel->spin);
417 				wheel = &sc->callwheel[c->qtick & cwheelmask];
418 				spin_lock(&wheel->spin);
419 			}
420 			TAILQ_INSERT_TAIL(&wheel->list, c, entry);
421 		} else if (c->flags & CALLOUT_STOP) {
422 			/*
423 			 * STOP request
424 			 */
425 			if (sc->next == c)
426 				sc->next = TAILQ_NEXT(c, entry);
427 			TAILQ_REMOVE(&wheel->list, c, entry);
428 			atomic_set_int(&c->verifier->flags, CALLOUT_STOP_RES);
429 			atomic_clear_int(&c->flags, CALLOUT_STOP |
430 						    CALLOUT_SET);
431 			if (c->waiters)
432 				wakeup(c->verifier);
433 			res = 1;
434 		} else {
435 			/*
436 			 * No request pending (someone else processed the
437 			 * request before we could)
438 			 */
439 			/* nop */
440 		}
441 		spin_unlock(&wheel->spin);
442 	} else {
443 		/*
444 		 * Process request from API function.  callout is not
445 		 * active so there's nothing for us to remove.
446 		 */
447 		KKASSERT((c->flags & CALLOUT_INPROG) == 0);
448 		if (c->flags & CALLOUT_CANCEL) {
449 			/*
450 			 * CANCEL request (nothing to cancel)
451 			 */
452 			if (c->flags & CALLOUT_RESET) {
453 				atomic_set_int(&c->verifier->flags,
454 					       CALLOUT_CANCEL_RES |
455 					       CALLOUT_STOP_RES);
456 			}
457 			atomic_clear_int(&c->flags, CALLOUT_STOP |
458 						    CALLOUT_CANCEL |
459 						    CALLOUT_RESET);
460 			if (c->waiters)
461 				wakeup(c->verifier);
462 			res = 0;
463 		} else if (c->flags & CALLOUT_RESET) {
464 			/*
465 			 * RESET request pending, queue appropriately.
466 			 * Do not queue to currently-processing tick.
467 			 */
468 			softclock_pcpu_t sc;
469 
470 			sc = c->rsc;
471 			atomic_clear_int(&c->flags, CALLOUT_RESET);
472 			atomic_set_int(&c->flags, CALLOUT_SET);
473 			c->qsc = sc;
474 			c->qarg = c->rarg;
475 			c->qfunc = c->rfunc;
476 			c->qtick = c->rtick;
477 
478 			/*
479 			 * Do not queue to current or past wheel or the
480 			 * callout will be lost for ages.
481 			 */
482 			wheel = &sc->callwheel[c->qtick & cwheelmask];
483 			spin_lock(&wheel->spin);
484 			while (c->qtick - sc->softticks <= 0) {
485 				c->qtick = sc->softticks + 1;
486 				spin_unlock(&wheel->spin);
487 				wheel = &sc->callwheel[c->qtick & cwheelmask];
488 				spin_lock(&wheel->spin);
489 			}
490 			TAILQ_INSERT_TAIL(&wheel->list, c, entry);
491 			spin_unlock(&wheel->spin);
492 		} else if (c->flags & CALLOUT_STOP) {
493 			/*
494 			 * STOP request (nothing to stop)
495 			 */
496 			atomic_clear_int(&c->flags, CALLOUT_STOP);
497 			if (c->waiters)
498 				wakeup(c->verifier);
499 			res = 1;
500 		} else {
501 			/*
502 			 * No request pending (someone else processed the
503 			 * request before we could)
504 			 */
505 			/* nop */
506 		}
507 	}
508 	return res;
509 }
510 
511 /*
512  * System init
513  */
514 static void
515 swi_softclock_setup(void *arg)
516 {
517 	int cpu;
518 	int i;
519 	int target;
520 
521 	/*
522 	 * Figure out how large a callwheel we need.  It must be a power of 2.
523 	 *
524 	 * ncallout is primarily based on available memory, don't explode
525 	 * the allocations if the system has a lot of cpus.
526 	 */
527 	target = ncallout / ncpus + 16;
528 
529 	cwheelsize = 1;
530 	while (cwheelsize < target)
531 		cwheelsize <<= 1;
532 	cwheelmask = cwheelsize - 1;
533 
534 #ifdef CALLOUT_TYPESTABLE
535 	typestable_init_glob(&callout_tsg, M_CALLOUT,
536 			     sizeof(struct _callout),
537 			     _callout_typestable_test,
538 			     _callout_typestable_init);
539 #endif
540 
541 	/*
542 	 * Initialize per-cpu data structures.
543 	 */
544 	for (cpu = 0; cpu < ncpus; ++cpu) {
545 		softclock_pcpu_t sc;
546 		int wheel_sz;
547 
548 		sc = (void *)kmem_alloc3(&kernel_map, sizeof(*sc),
549 					 VM_SUBSYS_GD, KM_CPU(cpu));
550 		memset(sc, 0, sizeof(*sc));
551 		softclock_pcpu_ary[cpu] = sc;
552 
553 		wheel_sz = sizeof(*sc->callwheel) * cwheelsize;
554 		sc->callwheel = (void *)kmem_alloc3(&kernel_map, wheel_sz,
555 						    VM_SUBSYS_GD, KM_CPU(cpu));
556 		memset(sc->callwheel, 0, wheel_sz);
557 		for (i = 0; i < cwheelsize; ++i) {
558 			spin_init(&sc->callwheel[i].spin, "wheel");
559 			TAILQ_INIT(&sc->callwheel[i].list);
560 		}
561 
562 		/*
563 		 * Mark the softclock handler as being an interrupt thread
564 		 * even though it really isn't, but do not allow it to
565 		 * preempt other threads (do not assign td_preemptable).
566 		 *
567 		 * Kernel code now assumes that callouts do not preempt
568 		 * the cpu they were scheduled on.
569 		 */
570 		lwkt_create(softclock_handler, sc, NULL, &sc->thread,
571 			    TDF_NOSTART | TDF_INTTHREAD,
572 			    cpu, "softclock %d", cpu);
573 	}
574 }
575 
576 /*
577  * Must occur after ncpus has been initialized.
578  */
579 SYSINIT(softclock_setup, SI_BOOT2_SOFTCLOCK, SI_ORDER_SECOND,
580 	swi_softclock_setup, NULL);
581 
582 /*
583  * This routine is called from the hardclock() (basically a FASTint/IPI) on
584  * each cpu in the system.  sc->curticks is this cpu's notion of the timebase.
585  * It IS NOT NECESSARILY SYNCHRONIZED WITH 'ticks'!  sc->softticks is where
586  * the callwheel is currently indexed.
587  *
588  * sc->softticks is adjusted by either this routine or our helper thread
589  * depending on whether the helper thread is running or not.
590  *
591  * sc->curticks and sc->softticks are adjusted using atomic ops in order
592  * to ensure that remote cpu callout installation does not race the thread.
593  */
594 void
595 hardclock_softtick(globaldata_t gd)
596 {
597 	softclock_pcpu_t sc;
598 	struct wheel *wheel;
599 
600 	sc = softclock_pcpu_ary[gd->gd_cpuid];
601 	atomic_add_int(&sc->curticks, 1);
602 	if (sc->isrunning)
603 		return;
604 	if (sc->softticks == sc->curticks) {
605 		/*
606 		 * In sync, only wakeup the thread if there is something to
607 		 * do.
608 		 */
609 		wheel = &sc->callwheel[sc->softticks & cwheelmask];
610 		spin_lock(&wheel->spin);
611 		if (TAILQ_FIRST(&wheel->list)) {
612 			sc->isrunning = 1;
613 			spin_unlock(&wheel->spin);
614 			lwkt_schedule(&sc->thread);
615 		} else {
616 			atomic_add_int(&sc->softticks, 1);
617 			spin_unlock(&wheel->spin);
618 		}
619 	} else {
620 		/*
621 		 * out of sync, wakeup the thread unconditionally so it can
622 		 * catch up.
623 		 */
624 		sc->isrunning = 1;
625 		lwkt_schedule(&sc->thread);
626 	}
627 }
628 
629 /*
630  * This procedure is the main loop of our per-cpu helper thread.  The
631  * sc->isrunning flag prevents us from racing hardclock_softtick().
632  *
633  * The thread starts with the MP lock released and not in a critical
634  * section.  The loop itself is MP safe while individual callbacks
635  * may or may not be, so we obtain or release the MP lock as appropriate.
636  */
637 static void
638 softclock_handler(void *arg)
639 {
640 	softclock_pcpu_t sc;
641 	struct _callout *c;
642 	struct wheel *wheel;
643 	struct callout slotimer;
644 	int mpsafe = 1;
645 
646 	/*
647 	 * Setup pcpu slow clocks which we want to run from the callout
648 	 * thread.
649 	 */
650 	callout_init_mp(&slotimer);
651 	callout_reset(&slotimer, hz * 10, slotimer_callback, &slotimer);
652 
653 	/*
654 	 * Run the callout thread at the same priority as other kernel
655 	 * threads so it can be round-robined.
656 	 */
657 	/*lwkt_setpri_self(TDPRI_SOFT_NORM);*/
658 
659 	sc = arg;
660 loop:
661 	while (sc->softticks != (int)(sc->curticks + 1)) {
662 		wheel = &sc->callwheel[sc->softticks & cwheelmask];
663 
664 		spin_lock(&wheel->spin);
665 		sc->next = TAILQ_FIRST(&wheel->list);
666 		while ((c = sc->next) != NULL) {
667 			int error;
668 			int res;
669 
670 			/*
671 			 * Match callouts for this tick.  The wheel spinlock
672 			 * is sufficient to set INPROG.  Once set, other
673 			 * threads can make only limited changes to (c)
674 			 */
675 			sc->next = TAILQ_NEXT(c, entry);
676 			if (c->qtick != sc->softticks)
677 				continue;
678 			TAILQ_REMOVE(&wheel->list, c, entry);
679 			atomic_set_int(&c->flags, CALLOUT_INPROG);
680 			sc->running = c;
681 			spin_unlock(&wheel->spin);
682 
683 			/*
684 			 * legacy mplock support
685 			 */
686 			if (c->flags & CALLOUT_MPSAFE) {
687 				if (mpsafe == 0) {
688 					mpsafe = 1;
689 					rel_mplock();
690 				}
691 			} else {
692 				if (mpsafe) {
693 					mpsafe = 0;
694 					get_mplock();
695 				}
696 			}
697 
698 			/*
699 			 * Execute function (protected by INPROG)
700 			 */
701 			if (c->flags & (CALLOUT_STOP | CALLOUT_CANCEL)) {
702 				/*
703 				 * Raced a stop or cancel request, do
704 				 * not execute.  The processing code
705 				 * thinks its a normal completion so
706 				 * flag the fact that cancel/stop actually
707 				 * prevented a callout here.
708 				 */
709 				if (c->flags & CALLOUT_CANCEL) {
710 					atomic_set_int(&c->verifier->flags,
711 						       CALLOUT_CANCEL_RES |
712 						       CALLOUT_STOP_RES);
713 				} else if (c->flags & CALLOUT_STOP) {
714 					atomic_set_int(&c->verifier->flags,
715 						       CALLOUT_STOP_RES);
716 				}
717 			} else if (c->flags & CALLOUT_RESET) {
718 				/*
719 				 * A RESET raced, make it seem like it
720 				 * didn't.  Do nothing here and let the
721 				 * process routine requeue us.
722 				 */
723 			} else if (c->flags & CALLOUT_AUTOLOCK) {
724 				/*
725 				 * Interlocked cancelable call.  If the
726 				 * lock gets canceled we have to flag the
727 				 * fact that the cancel/stop actually
728 				 * prevented the callout here.
729 				 */
730 				error = lockmgr(c->lk, LK_EXCLUSIVE |
731 						       LK_CANCELABLE);
732 				if (error == 0) {
733 					c->qfunc(c->qarg);
734 					lockmgr(c->lk, LK_RELEASE);
735 				} else if (c->flags & CALLOUT_CANCEL) {
736 					atomic_set_int(&c->verifier->flags,
737 						       CALLOUT_CANCEL_RES |
738 						       CALLOUT_STOP_RES);
739 				} else if (c->flags & CALLOUT_STOP) {
740 					atomic_set_int(&c->verifier->flags,
741 						       CALLOUT_STOP_RES);
742 				}
743 			} else {
744 				/*
745 				 * Normal call
746 				 */
747 				c->qfunc(c->qarg);
748 			}
749 
750 			if (sc->running == c) {
751 				/*
752 				 * We are still INPROG so (c) remains valid, but
753 				 * the callout is now governed by its internal
754 				 * spin-lock.
755 				 */
756 				spin_lock(&c->spin);
757 				res = _callout_process_spinlocked(c, 1);
758 				spin_unlock(&c->spin);
759 #ifdef CALLOUT_TYPESTABLE
760 				if (res >= 0)
761 					_callout_typestable_free(sc, c, res);
762 #endif
763 			}
764 			spin_lock(&wheel->spin);
765 		}
766 		sc->running = NULL;
767 		spin_unlock(&wheel->spin);
768 		atomic_add_int(&sc->softticks, 1);
769 	}
770 
771 	/*
772 	 * Don't leave us holding the MP lock when we deschedule ourselves.
773 	 */
774 	if (mpsafe == 0) {
775 		mpsafe = 1;
776 		rel_mplock();
777 	}
778 
779 	/*
780 	 * Recheck in critical section to interlock against hardlock
781 	 */
782 	crit_enter();
783 	if (sc->softticks == (int)(sc->curticks + 1)) {
784 		sc->isrunning = 0;
785 		lwkt_deschedule_self(&sc->thread);	/* == curthread */
786 		lwkt_switch();
787 	}
788 	crit_exit();
789 	goto loop;
790 	/* NOT REACHED */
791 }
792 
793 /*
794  * A very slow system cleanup timer (10 second interval),
795  * per-cpu.
796  */
797 void
798 slotimer_callback(void *arg)
799 {
800 	struct callout *c = arg;
801 
802 	slab_cleanup();
803 	callout_reset(c, hz * 10, slotimer_callback, c);
804 }
805 
806 /*
807  * API FUNCTIONS
808  */
809 
810 /*
811  * Prepare a callout structure for use by callout_reset() and/or
812  * callout_stop().
813  *
814  * The MP version of this routine requires that the callback
815  * function installed by callout_reset() be MP safe.
816  *
817  * The LK version of this routine is also MPsafe and will automatically
818  * acquire the specified lock for the duration of the function call,
819  * and release it after the function returns.  In addition, when autolocking
820  * is used, callout_stop() becomes synchronous if the caller owns the lock.
821  * callout_reset(), callout_stop(), and callout_cancel() will block
822  * normally instead of spinning when a cpu race occurs.  Lock cancelation
823  * is used to avoid deadlocks against the callout ring dispatch.
824  *
825  * The init functions can be called from any cpu and do not have to be
826  * called from the cpu that the timer will eventually run on.
827  */
828 static __inline void
829 _callout_setup(struct callout *cc, int flags CALLOUT_DEBUG_ARGS)
830 {
831 	bzero(cc, sizeof(*cc));
832 	cc->flags = flags;		/* frontend flags */
833 #ifdef CALLOUT_DEBUG
834 #ifdef CALLOUT_TYPESTABLE
835 	cc->ident = ident;
836 	cc->lineno = lineno;
837 #else
838 	cc->toc.verifier = cc;		/* corruption detector */
839 	cc->toc.ident = ident;
840 	cc->toc.lineno = lineno;
841 	cc->toc.flags = flags;		/* backend flags */
842 #endif
843 #endif
844 }
845 
846 /*
847  * Associate an internal _callout with the external callout and
848  * verify that the type-stable structure is still applicable (inactive
849  * type-stable _callouts might have been reused for a different callout).
850  * If not, a new internal structure will be allocated.
851  *
852  * Returns the _callout already spin-locked.
853  */
854 static __inline
855 struct _callout *
856 _callout_gettoc(struct callout *cc)
857 {
858 	struct _callout *c;
859 #ifdef CALLOUT_TYPESTABLE
860 	softclock_pcpu_t sc;
861 
862 	KKASSERT(cc->flags & CALLOUT_DID_INIT);
863 	for (;;) {
864 		c = cc->toc;
865 		cpu_ccfence();
866 		if (c == NULL) {
867 			sc = softclock_pcpu_ary[mycpu->gd_cpuid];
868 			c = atomic_swap_ptr((void *)&sc->quick_obj, NULL);
869 			if (c == NULL || _callout_typestable_test(c) == 0)
870 				c = typestable_alloc(&callout_tsg);
871 			/* returns spin-locked */
872 			c->verifier = cc;
873 			c->flags = cc->flags;
874 			c->lk = cc->lk;
875 			c->ident = cc->ident;
876 			c->lineno = cc->lineno;
877 			if (atomic_cmpset_ptr(&cc->toc, NULL, c)) {
878 				break;
879 			}
880 			c->verifier = NULL;
881 			spin_unlock(&c->spin);
882 			_callout_typestable_free(sc, c, 0);
883 		} else {
884 			spin_lock(&c->spin);
885 			if (c->verifier == cc)
886 				break;
887 			spin_unlock(&c->spin);
888 			/* ok if atomic op fails */
889 			(void)atomic_cmpset_ptr(&cc->toc, c, NULL);
890 		}
891 	}
892 #else
893 	c = &cc->toc;
894 	spin_lock(&c->spin);
895 #endif
896 	/* returns with spin-lock held */
897 	return c;
898 }
899 
900 /*
901  * Macrod in sys/callout.h for debugging
902  *
903  * WARNING! tsleep() assumes this will not block
904  */
905 void
906 _callout_init(struct callout *cc CALLOUT_DEBUG_ARGS)
907 {
908 	_callout_setup(cc, CALLOUT_DID_INIT
909 			CALLOUT_DEBUG_PASSTHRU);
910 }
911 
912 void
913 _callout_init_mp(struct callout *cc CALLOUT_DEBUG_ARGS)
914 {
915 	_callout_setup(cc, CALLOUT_DID_INIT | CALLOUT_MPSAFE
916 			CALLOUT_DEBUG_PASSTHRU);
917 }
918 
919 void
920 _callout_init_lk(struct callout *cc, struct lock *lk CALLOUT_DEBUG_ARGS)
921 {
922 	_callout_setup(cc, CALLOUT_DID_INIT | CALLOUT_MPSAFE |
923 			   CALLOUT_AUTOLOCK
924 			CALLOUT_DEBUG_PASSTHRU);
925 #ifdef CALLOUT_TYPESTABLE
926 	cc->lk = lk;
927 #else
928 	cc->toc.lk = lk;
929 #endif
930 }
931 
932 /*
933  * Start or restart a timeout.  New timeouts can be installed while the
934  * current one is running.
935  *
936  * Start or restart a timeout.  Installs the callout structure on the
937  * callwheel of the current cpu.  Callers may legally pass any value, even
938  * if 0 or negative, but since the sc->curticks index may have already
939  * been processed a minimum timeout of 1 tick will be enforced.
940  *
941  * This function will not deadlock against a running call.
942  *
943  * WARNING! tsleep() assumes this will not block
944  */
945 void
946 callout_reset(struct callout *cc, int to_ticks, void (*ftn)(void *), void *arg)
947 {
948 	softclock_pcpu_t sc;
949 	struct _callout *c;
950 	int res;
951 
952 	atomic_set_int(&cc->flags, CALLOUT_ACTIVE);
953 	c = _callout_gettoc(cc);
954 
955 	/*
956 	 * Set RESET.  Do not clear STOP here (let the process code do it).
957 	 */
958 	atomic_set_int(&c->flags, CALLOUT_RESET);
959 	sc = softclock_pcpu_ary[mycpu->gd_cpuid];
960 	c->rsc = sc;
961 	c->rtick = sc->curticks + to_ticks;
962 	c->rfunc = ftn;
963 	c->rarg = arg;
964 #ifdef CALLOUT_TYPESTABLE
965 	cc->arg = arg;	/* only used by callout_arg() */
966 #endif
967 	res = _callout_process_spinlocked(c, 0);
968 	spin_unlock(&c->spin);
969 #ifdef CALLOUT_TYPESTABLE
970 	if (res >= 0)
971 		_callout_typestable_free(sc, c, res);
972 #endif
973 }
974 
975 /*
976  * Same as callout_reset() but the timeout will run on a particular cpu.
977  */
978 void
979 callout_reset_bycpu(struct callout *cc, int to_ticks, void (*ftn)(void *),
980 		    void *arg, int cpuid)
981 {
982 	softclock_pcpu_t sc;
983 	struct _callout *c;
984 	int res;
985 
986 	atomic_set_int(&cc->flags, CALLOUT_ACTIVE);
987 	c = _callout_gettoc(cc);
988 
989 	/*
990 	 * Set RESET.  Do not clear STOP here (let the process code do it).
991 	 */
992 	atomic_set_int(&c->flags, CALLOUT_RESET);
993 
994 	sc = softclock_pcpu_ary[cpuid];
995 	c->rsc = sc;
996 	c->rtick = sc->curticks + to_ticks;
997 	c->rfunc = ftn;
998 	c->rarg = arg;
999 #ifdef CALLOUT_TYPESTABLE
1000 	cc->arg = arg;	/* only used by callout_arg() */
1001 #endif
1002 	res = _callout_process_spinlocked(c, 0);
1003 	spin_unlock(&c->spin);
1004 #ifdef CALLOUT_TYPESTABLE
1005 	if (res >= 0)
1006 		_callout_typestable_free(sc, c, res);
1007 #endif
1008 }
1009 
1010 static __inline
1011 void
1012 _callout_cancel_or_stop(struct callout *cc, uint32_t flags)
1013 {
1014 	struct _callout *c;
1015 	softclock_pcpu_t sc;
1016 	int res;
1017 
1018 #ifdef CALLOUT_TYPESTABLE
1019 	if (cc->toc == NULL || cc->toc->verifier != cc)
1020 		return;
1021 #else
1022 	KKASSERT(cc->toc.verifier == cc);
1023 #endif
1024 	/*
1025 	 * Setup for synchronous
1026 	 */
1027 	atomic_clear_int(&cc->flags, CALLOUT_ACTIVE);
1028 	c = _callout_gettoc(cc);
1029 
1030 	/*
1031 	 * Set STOP or CANCEL request.  If this is a STOP, clear a queued
1032 	 * RESET now.
1033 	 */
1034 	atomic_set_int(&c->flags, flags);
1035 	if (flags & CALLOUT_STOP) {
1036 		if (c->flags & CALLOUT_RESET) {
1037 			atomic_set_int(&cc->flags, CALLOUT_STOP_RES);
1038 			atomic_clear_int(&c->flags, CALLOUT_RESET);
1039 		}
1040 	}
1041 	sc = softclock_pcpu_ary[mycpu->gd_cpuid];
1042 	res = _callout_process_spinlocked(c, 0);
1043 	spin_unlock(&c->spin);
1044 #ifdef CALLOUT_TYPESTABLE
1045 	if (res >= 0)
1046 		_callout_typestable_free(sc, c, res);
1047 #endif
1048 
1049 	/*
1050 	 * Wait for the CANCEL or STOP to finish.
1051 	 *
1052 	 * WARNING! (c) can go stale now, so do not use (c) after this
1053 	 *	    point. XXX
1054 	 */
1055 	if (c->flags & flags) {
1056 		atomic_add_int(&c->waiters, 1);
1057 #ifdef CALLOUT_TYPESTABLE
1058 		if (cc->flags & CALLOUT_AUTOLOCK)
1059 			lockmgr(cc->lk, LK_CANCEL_BEG);
1060 #else
1061 		if (cc->flags & CALLOUT_AUTOLOCK)
1062 			lockmgr(c->lk, LK_CANCEL_BEG);
1063 #endif
1064 		for (;;) {
1065 			tsleep_interlock(cc, 0);
1066 			if ((atomic_fetchadd_int(&c->flags, 0) & flags) == 0)
1067 				break;
1068 			tsleep(cc, PINTERLOCKED, "costp", 0);
1069 		}
1070 #ifdef CALLOUT_TYPESTABLE
1071 		if (cc->flags & CALLOUT_AUTOLOCK)
1072 			lockmgr(cc->lk, LK_CANCEL_END);
1073 #else
1074 		if (cc->flags & CALLOUT_AUTOLOCK)
1075 			lockmgr(c->lk, LK_CANCEL_END);
1076 #endif
1077 		atomic_add_int(&c->waiters, -1);
1078 	}
1079 	KKASSERT(cc->toc.verifier == cc);
1080 }
1081 
1082 /*
1083  * This is a synchronous STOP which cancels the callout.  If AUTOLOCK
1084  * then a CANCEL will be issued to the lock holder.  Unlike STOP, the
1085  * cancel function prevents any new callout_reset()s from being issued
1086  * in addition to canceling the lock.  The lock will also be deactivated.
1087  *
1088  * Returns 0 if the callout was not active (or was active and completed,
1089  *	     but didn't try to start a new timeout).
1090  * Returns 1 if the cancel is responsible for stopping the callout.
1091  */
1092 int
1093 callout_cancel(struct callout *cc)
1094 {
1095 	atomic_clear_int(&cc->flags, CALLOUT_CANCEL_RES);
1096 	_callout_cancel_or_stop(cc, CALLOUT_CANCEL);
1097 
1098 	return ((cc->flags & CALLOUT_CANCEL_RES) ? 1 : 0);
1099 }
1100 
1101 /*
1102  * Currently the same as callout_cancel.  Ultimately we may wish the
1103  * drain function to allow a pending callout to proceed, but for now
1104  * we will attempt to to cancel it.
1105  *
1106  * Returns 0 if the callout was not active (or was active and completed,
1107  *	     but didn't try to start a new timeout).
1108  * Returns 1 if the drain is responsible for stopping the callout.
1109  */
1110 int
1111 callout_drain(struct callout *cc)
1112 {
1113 	atomic_clear_int(&cc->flags, CALLOUT_CANCEL_RES);
1114 	_callout_cancel_or_stop(cc, CALLOUT_CANCEL);
1115 
1116 	return ((cc->flags & CALLOUT_CANCEL_RES) ? 1 : 0);
1117 }
1118 
1119 /*
1120  * Stops a callout if it is pending or queued, does not block.
1121  * This function does not interlock against a callout that is in-progress.
1122  *
1123  * Returns whether the STOP operation was responsible for removing a
1124  * queued or pending callout.
1125  */
1126 int
1127 callout_stop_async(struct callout *cc)
1128 {
1129 	softclock_pcpu_t sc;
1130 	struct _callout *c;
1131 	uint32_t flags;
1132 	int res;
1133 
1134 	atomic_clear_int(&cc->flags, CALLOUT_STOP_RES | CALLOUT_ACTIVE);
1135 #ifdef CALLOUT_TYPESTABLE
1136 	if (cc->toc == NULL || cc->toc->verifier != cc)
1137 		return 0;
1138 #else
1139 	KKASSERT(cc->toc.verifier == cc);
1140 #endif
1141 	c = _callout_gettoc(cc);
1142 
1143 	/*
1144 	 * Set STOP or CANCEL request.  If this is a STOP, clear a queued
1145 	 * RESET now.
1146 	 */
1147 	atomic_set_int(&c->flags, CALLOUT_STOP);
1148 	if (c->flags & CALLOUT_RESET) {
1149 		atomic_set_int(&cc->flags, CALLOUT_STOP_RES);
1150 		atomic_clear_int(&c->flags, CALLOUT_RESET);
1151 	}
1152 	sc = softclock_pcpu_ary[mycpu->gd_cpuid];
1153 	res = _callout_process_spinlocked(c, 0);
1154 	flags = cc->flags;
1155 	spin_unlock(&c->spin);
1156 #ifdef CALLOUT_TYPESTABLE
1157 	if (res >= 0)
1158 		_callout_typestable_free(sc, c, res);
1159 #endif
1160 
1161 	return ((flags & CALLOUT_STOP_RES) ? 1 : 0);
1162 }
1163 
1164 /*
1165  * Callout deactivate merely clears the CALLOUT_ACTIVE bit
1166  * Stops a callout if it is pending or queued, does not block.
1167  * This function does not interlock against a callout that is in-progress.
1168  */
1169 void
1170 callout_deactivate(struct callout *cc)
1171 {
1172 	atomic_clear_int(&cc->flags, CALLOUT_ACTIVE);
1173 }
1174 
1175 /*
1176  * lock-aided callouts are STOPped synchronously using STOP semantics
1177  * (meaning that another thread can start the callout again before we
1178  * return).
1179  *
1180  * non-lock-aided callouts
1181  *
1182  * Stops a callout if it is pending or queued, does not block.
1183  * This function does not interlock against a callout that is in-progress.
1184  */
1185 int
1186 callout_stop(struct callout *cc)
1187 {
1188 	if (cc->flags & CALLOUT_AUTOLOCK) {
1189 		atomic_clear_int(&cc->flags, CALLOUT_STOP_RES);
1190 		_callout_cancel_or_stop(cc, CALLOUT_STOP);
1191 		return ((cc->flags & CALLOUT_STOP_RES) ? 1 : 0);
1192 	} else {
1193 		return callout_stop_async(cc);
1194 	}
1195 }
1196 
1197 /*
1198  * Terminates a callout by canceling operations and then clears the
1199  * INIT bit.  Upon return, the callout structure must not be used.
1200  */
1201 void
1202 callout_terminate(struct callout *cc)
1203 {
1204 	_callout_cancel_or_stop(cc, CALLOUT_CANCEL);
1205 	atomic_clear_int(&cc->flags, CALLOUT_DID_INIT);
1206 #ifdef CALLOUT_TYPESTABLE
1207 	atomic_swap_ptr((void *)&cc->toc, NULL);
1208 #else
1209 	cc->toc.verifier = NULL;
1210 #endif
1211 }
1212 
1213 /*
1214  * Returns whether a callout is queued and the time has not yet
1215  * arrived (the callout is not yet in-progress).
1216  */
1217 int
1218 callout_pending(struct callout *cc)
1219 {
1220 	struct _callout *c;
1221 	int res = 0;
1222 
1223 	/*
1224 	 * Don't instantiate toc to test pending
1225 	 */
1226 #ifdef CALLOUT_TYPESTABLE
1227 	if ((c = cc->toc) != NULL) {
1228 #else
1229 	c = &cc->toc;
1230 	KKASSERT(c->verifier == cc);
1231 	{
1232 #endif
1233 		spin_lock(&c->spin);
1234 		if (c->verifier == cc) {
1235 			res = ((c->flags & (CALLOUT_SET|CALLOUT_INPROG)) ==
1236 			       CALLOUT_SET);
1237 		}
1238 		spin_unlock(&c->spin);
1239 	}
1240 	return res;
1241 }
1242 
1243 /*
1244  * Returns whether a callout is active or not.  A callout is active when
1245  * a timeout is set and remains active upon normal termination, even if
1246  * it does not issue a new timeout.  A callout is inactive if a timeout has
1247  * never been set or if the callout has been stopped or canceled.  The next
1248  * timeout that is set will re-set the active state.
1249  */
1250 int
1251 callout_active(struct callout *cc)
1252 {
1253 	return ((cc->flags & CALLOUT_ACTIVE) ? 1 : 0);
1254 }
1255