xref: /dragonfly/sys/kern/kern_timeout.c (revision d4ef6694)
1 /*
2  * Copyright (c) 2004 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34 /*
35  * Copyright (c) 1982, 1986, 1991, 1993
36  *	The Regents of the University of California.  All rights reserved.
37  * (c) UNIX System Laboratories, Inc.
38  * All or some portions of this file are derived from material licensed
39  * to the University of California by American Telephone and Telegraph
40  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
41  * the permission of UNIX System Laboratories, Inc.
42  *
43  * Redistribution and use in source and binary forms, with or without
44  * modification, are permitted provided that the following conditions
45  * are met:
46  * 1. Redistributions of source code must retain the above copyright
47  *    notice, this list of conditions and the following disclaimer.
48  * 2. Redistributions in binary form must reproduce the above copyright
49  *    notice, this list of conditions and the following disclaimer in the
50  *    documentation and/or other materials provided with the distribution.
51  * 3. Neither the name of the University nor the names of its contributors
52  *    may be used to endorse or promote products derived from this software
53  *    without specific prior written permission.
54  *
55  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
56  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
57  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
58  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
59  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
60  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
61  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
62  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
63  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
64  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
65  * SUCH DAMAGE.
66  *
67  *	From: @(#)kern_clock.c	8.5 (Berkeley) 1/21/94
68  * $FreeBSD: src/sys/kern/kern_timeout.c,v 1.59.2.1 2001/11/13 18:24:52 archie Exp $
69  */
70 /*
71  * DRAGONFLY BGL STATUS
72  *
73  *	All the API functions should be MP safe.
74  *
75  *	The callback functions will be flagged as being MP safe if the
76  *	timeout structure is initialized with callout_init_mp() instead of
77  *	callout_init().
78  *
79  *	The helper threads cannot be made preempt-capable until after we
80  *	clean up all the uses of splsoftclock() and related interlocks (which
81  *	require the related functions to be MP safe as well).
82  */
83 /*
84  * The callout mechanism is based on the work of Adam M. Costello and
85  * George Varghese, published in a technical report entitled "Redesigning
86  * the BSD Callout and Timer Facilities" and modified slightly for inclusion
87  * in FreeBSD by Justin T. Gibbs.  The original work on the data structures
88  * used in this implementation was published by G. Varghese and T. Lauck in
89  * the paper "Hashed and Hierarchical Timing Wheels: Data Structures for
90  * the Efficient Implementation of a Timer Facility" in the Proceedings of
91  * the 11th ACM Annual Symposium on Operating Systems Principles,
92  * Austin, Texas Nov 1987.
93  *
94  * The per-cpu augmentation was done by Matthew Dillon.
95  */
96 
97 #include <sys/param.h>
98 #include <sys/systm.h>
99 #include <sys/callout.h>
100 #include <sys/kernel.h>
101 #include <sys/interrupt.h>
102 #include <sys/thread.h>
103 
104 #include <sys/thread2.h>
105 #include <sys/mplock2.h>
106 
107 #ifndef MAX_SOFTCLOCK_STEPS
108 #define MAX_SOFTCLOCK_STEPS 100 /* Maximum allowed value of steps. */
109 #endif
110 
111 
112 struct softclock_pcpu {
113 	struct callout_tailq *callwheel;
114 	struct callout * volatile next;
115 	struct callout *running;/* currently running callout */
116 	int softticks;		/* softticks index */
117 	int curticks;		/* per-cpu ticks counter */
118 	int isrunning;
119 	struct thread thread;
120 
121 };
122 
123 typedef struct softclock_pcpu *softclock_pcpu_t;
124 
125 /*
126  * TODO:
127  *	allocate more timeout table slots when table overflows.
128  */
129 static MALLOC_DEFINE(M_CALLOUT, "callout", "callout structures");
130 static int callwheelsize;
131 static int callwheelmask;
132 static struct softclock_pcpu softclock_pcpu_ary[MAXCPU];
133 
134 static void softclock_handler(void *arg);
135 static void slotimer_callback(void *arg);
136 
137 static void
138 swi_softclock_setup(void *arg)
139 {
140 	int cpu;
141 	int i;
142 	int target;
143 
144 	/*
145 	 * Figure out how large a callwheel we need.  It must be a power of 2.
146 	 *
147 	 * ncallout is primarily based on available memory, don't explode
148 	 * the allocations if the system has a lot of cpus.
149 	 */
150 	target = ncallout / ncpus + 16;
151 
152 	callwheelsize = 1;
153 	while (callwheelsize < target)
154 		callwheelsize <<= 1;
155 	callwheelmask = callwheelsize - 1;
156 
157 	/*
158 	 * Initialize per-cpu data structures.
159 	 */
160 	for (cpu = 0; cpu < ncpus; ++cpu) {
161 		softclock_pcpu_t sc;
162 
163 		sc = &softclock_pcpu_ary[cpu];
164 
165 		sc->callwheel = kmalloc(sizeof(*sc->callwheel) * callwheelsize,
166 					M_CALLOUT, M_WAITOK|M_ZERO);
167 		for (i = 0; i < callwheelsize; ++i)
168 			TAILQ_INIT(&sc->callwheel[i]);
169 
170 		/*
171 		 * Mark the softclock handler as being an interrupt thread
172 		 * even though it really isn't, but do not allow it to
173 		 * preempt other threads (do not assign td_preemptable).
174 		 *
175 		 * Kernel code now assumes that callouts do not preempt
176 		 * the cpu they were scheduled on.
177 		 */
178 		lwkt_create(softclock_handler, sc, NULL,
179 			    &sc->thread, TDF_NOSTART | TDF_INTTHREAD,
180 			    cpu, "softclock %d", cpu);
181 	}
182 }
183 
184 /*
185  * Must occur after ncpus has been initialized.
186  */
187 SYSINIT(softclock_setup, SI_BOOT2_SOFTCLOCK, SI_ORDER_SECOND,
188 	swi_softclock_setup, NULL);
189 
190 /*
191  * This routine is called from the hardclock() (basically a FASTint/IPI) on
192  * each cpu in the system.  sc->curticks is this cpu's notion of the timebase.
193  * It IS NOT NECESSARILY SYNCHRONIZED WITH 'ticks'!  sc->softticks is where
194  * the callwheel is currently indexed.
195  *
196  * WARNING!  The MP lock is not necessarily held on call, nor can it be
197  * safely obtained.
198  *
199  * sc->softticks is adjusted by either this routine or our helper thread
200  * depending on whether the helper thread is running or not.
201  */
202 void
203 hardclock_softtick(globaldata_t gd)
204 {
205 	softclock_pcpu_t sc;
206 
207 	sc = &softclock_pcpu_ary[gd->gd_cpuid];
208 	++sc->curticks;
209 	if (sc->isrunning)
210 		return;
211 	if (sc->softticks == sc->curticks) {
212 		/*
213 		 * in sync, only wakeup the thread if there is something to
214 		 * do.
215 		 */
216 		if (TAILQ_FIRST(&sc->callwheel[sc->softticks & callwheelmask]))
217 		{
218 			sc->isrunning = 1;
219 			lwkt_schedule(&sc->thread);
220 		} else {
221 			++sc->softticks;
222 		}
223 	} else {
224 		/*
225 		 * out of sync, wakeup the thread unconditionally so it can
226 		 * catch up.
227 		 */
228 		sc->isrunning = 1;
229 		lwkt_schedule(&sc->thread);
230 	}
231 }
232 
233 /*
234  * This procedure is the main loop of our per-cpu helper thread.  The
235  * sc->isrunning flag prevents us from racing hardclock_softtick() and
236  * a critical section is sufficient to interlock sc->curticks and protect
237  * us from remote IPI's / list removal.
238  *
239  * The thread starts with the MP lock released and not in a critical
240  * section.  The loop itself is MP safe while individual callbacks
241  * may or may not be, so we obtain or release the MP lock as appropriate.
242  */
243 static void
244 softclock_handler(void *arg)
245 {
246 	softclock_pcpu_t sc;
247 	struct callout *c;
248 	struct callout_tailq *bucket;
249 	struct callout slotimer;
250 	void (*c_func)(void *);
251 	void *c_arg;
252 	int mpsafe = 1;
253 
254 	/*
255 	 * Setup pcpu slow clocks which we want to run from the callout
256 	 * thread.
257 	 */
258 	callout_init_mp(&slotimer);
259 	callout_reset(&slotimer, hz * 10, slotimer_callback, &slotimer);
260 
261 	/*
262 	 * Run the callout thread at the same priority as other kernel
263 	 * threads so it can be round-robined.
264 	 */
265 	/*lwkt_setpri_self(TDPRI_SOFT_NORM);*/
266 
267 	sc = arg;
268 	crit_enter();
269 loop:
270 	while (sc->softticks != (int)(sc->curticks + 1)) {
271 		bucket = &sc->callwheel[sc->softticks & callwheelmask];
272 
273 		for (c = TAILQ_FIRST(bucket); c; c = sc->next) {
274 			if (c->c_time != sc->softticks) {
275 				sc->next = TAILQ_NEXT(c, c_links.tqe);
276 				continue;
277 			}
278 			if (c->c_flags & CALLOUT_MPSAFE) {
279 				if (mpsafe == 0) {
280 					mpsafe = 1;
281 					rel_mplock();
282 				}
283 			} else {
284 				/*
285 				 * The request might be removed while we
286 				 * are waiting to get the MP lock.  If it
287 				 * was removed sc->next will point to the
288 				 * next valid request or NULL, loop up.
289 				 */
290 				if (mpsafe) {
291 					mpsafe = 0;
292 					sc->next = c;
293 					get_mplock();
294 					if (c != sc->next)
295 						continue;
296 				}
297 			}
298 			sc->next = TAILQ_NEXT(c, c_links.tqe);
299 			TAILQ_REMOVE(bucket, c, c_links.tqe);
300 
301 			sc->running = c;
302 			c_func = c->c_func;
303 			c_arg = c->c_arg;
304 			c->c_func = NULL;
305 			KKASSERT(c->c_flags & CALLOUT_DID_INIT);
306 			c->c_flags &= ~CALLOUT_PENDING;
307 			crit_exit();
308 			c_func(c_arg);
309 			crit_enter();
310 			sc->running = NULL;
311 			/* NOTE: list may have changed */
312 		}
313 		++sc->softticks;
314 	}
315 
316 	/*
317 	 * Don't leave us holding the MP lock when we deschedule ourselves.
318 	 */
319 	if (mpsafe == 0) {
320 		mpsafe = 1;
321 		rel_mplock();
322 	}
323 	sc->isrunning = 0;
324 	lwkt_deschedule_self(&sc->thread);	/* == curthread */
325 	lwkt_switch();
326 	goto loop;
327 	/* NOT REACHED */
328 }
329 
330 /*
331  * A very slow system cleanup timer (10 second interval),
332  * per-cpu.
333  */
334 void
335 slotimer_callback(void *arg)
336 {
337 	struct callout *c = arg;
338 
339 	slab_cleanup();
340 	callout_reset(c, hz * 10, slotimer_callback, c);
341 }
342 
343 /*
344  * New interface; clients allocate their own callout structures.
345  *
346  * callout_reset() - establish or change a timeout
347  * callout_stop() - disestablish a timeout
348  * callout_init() - initialize a callout structure so that it can
349  *			safely be passed to callout_reset() and callout_stop()
350  * callout_init_mp() - same but any installed functions must be MP safe.
351  *
352  * <sys/callout.h> defines three convenience macros:
353  *
354  * callout_active() - returns truth if callout has not been serviced
355  * callout_pending() - returns truth if callout is still waiting for timeout
356  * callout_deactivate() - marks the callout as having been serviced
357  */
358 
359 /*
360  * Start or restart a timeout.  Install the callout structure in the
361  * callwheel.  Callers may legally pass any value, even if 0 or negative,
362  * but since the sc->curticks index may have already been processed a
363  * minimum timeout of 1 tick will be enforced.
364  *
365  * The callout is installed on and will be processed on the current cpu's
366  * callout wheel.
367  *
368  * WARNING! This function may be called from any cpu but the caller must
369  * serialize callout_stop() and callout_reset() calls on the passed
370  * structure regardless of cpu.
371  */
372 void
373 callout_reset(struct callout *c, int to_ticks, void (*ftn)(void *),
374 		void *arg)
375 {
376 	softclock_pcpu_t sc;
377 	globaldata_t gd;
378 
379 #ifdef INVARIANTS
380         if ((c->c_flags & CALLOUT_DID_INIT) == 0) {
381 		callout_init(c);
382 		kprintf(
383 		    "callout_reset(%p) from %p: callout was not initialized\n",
384 		    c, ((int **)&c)[-1]);
385 		print_backtrace(-1);
386 	}
387 #endif
388 	gd = mycpu;
389 	sc = &softclock_pcpu_ary[gd->gd_cpuid];
390 	crit_enter_gd(gd);
391 
392 	if (c->c_flags & CALLOUT_ACTIVE)
393 		callout_stop(c);
394 
395 	if (to_ticks <= 0)
396 		to_ticks = 1;
397 
398 	c->c_arg = arg;
399 	c->c_flags |= (CALLOUT_ACTIVE | CALLOUT_PENDING);
400 	c->c_func = ftn;
401 	c->c_time = sc->curticks + to_ticks;
402 	c->c_gd = gd;
403 
404 	TAILQ_INSERT_TAIL(&sc->callwheel[c->c_time & callwheelmask],
405 			  c, c_links.tqe);
406 	crit_exit_gd(gd);
407 }
408 
409 struct callout_remote_arg {
410 	struct callout	*c;
411 	void		(*ftn)(void *);
412 	void		*arg;
413 	int		to_ticks;
414 };
415 
416 static void
417 callout_reset_ipi(void *arg)
418 {
419 	struct callout_remote_arg *rmt = arg;
420 
421 	callout_reset(rmt->c, rmt->to_ticks, rmt->ftn, rmt->arg);
422 }
423 
424 void
425 callout_reset_bycpu(struct callout *c, int to_ticks, void (*ftn)(void *),
426     void *arg, int cpuid)
427 {
428 	KASSERT(cpuid >= 0 && cpuid < ncpus, ("invalid cpuid %d", cpuid));
429 
430 	if (cpuid == mycpuid) {
431 		callout_reset(c, to_ticks, ftn, arg);
432 	} else {
433 		struct globaldata *target_gd;
434 		struct callout_remote_arg rmt;
435 		int seq;
436 
437 		rmt.c = c;
438 		rmt.ftn = ftn;
439 		rmt.arg = arg;
440 		rmt.to_ticks = to_ticks;
441 
442 		target_gd = globaldata_find(cpuid);
443 
444 		seq = lwkt_send_ipiq(target_gd, callout_reset_ipi, &rmt);
445 		lwkt_wait_ipiq(target_gd, seq);
446 	}
447 }
448 
449 /*
450  * Stop a running timer.  WARNING!  If called on a cpu other then the one
451  * the callout was started on this function will liveloop on its IPI to
452  * the target cpu to process the request.  It is possible for the callout
453  * to execute in that case.
454  *
455  * WARNING! This function may be called from any cpu but the caller must
456  * serialize callout_stop() and callout_reset() calls on the passed
457  * structure regardless of cpu.
458  *
459  * WARNING! This routine may be called from an IPI
460  *
461  * WARNING! This function can return while it's c_func is still running
462  *	    in the callout thread, a secondary check may be needed.
463  *	    Use callout_stop_sync() to wait for any callout function to
464  *	    complete before returning, being sure that no deadlock is
465  *	    possible if you do.
466  */
467 int
468 callout_stop(struct callout *c)
469 {
470 	globaldata_t gd = mycpu;
471 	globaldata_t tgd;
472 	softclock_pcpu_t sc;
473 
474 #ifdef INVARIANTS
475         if ((c->c_flags & CALLOUT_DID_INIT) == 0) {
476 		callout_init(c);
477 		kprintf(
478 		    "callout_stop(%p) from %p: callout was not initialized\n",
479 		    c, ((int **)&c)[-1]);
480 		print_backtrace(-1);
481 	}
482 #endif
483 	crit_enter_gd(gd);
484 
485 	/*
486 	 * Don't attempt to delete a callout that's not on the queue.  The
487 	 * callout may not have a cpu assigned to it.  Callers do not have
488 	 * to be on the issuing cpu but must still serialize access to the
489 	 * callout structure.
490 	 *
491 	 * We are not cpu-localized here and cannot safely modify the
492 	 * flags field in the callout structure.  Note that most of the
493 	 * time CALLOUT_ACTIVE will be 0 if CALLOUT_PENDING is also 0.
494 	 *
495 	 * If we race another cpu's dispatch of this callout it is possible
496 	 * for CALLOUT_ACTIVE to be set with CALLOUT_PENDING unset.  This
497 	 * will cause us to fall through and synchronize with the other
498 	 * cpu.
499 	 */
500 	if ((c->c_flags & CALLOUT_PENDING) == 0) {
501 		if ((c->c_flags & CALLOUT_ACTIVE) == 0) {
502 			crit_exit_gd(gd);
503 			return (0);
504 		}
505 		if (c->c_gd == NULL || c->c_gd == gd) {
506 			c->c_flags &= ~CALLOUT_ACTIVE;
507 			crit_exit_gd(gd);
508 			return (0);
509 		}
510 	}
511 	if ((tgd = c->c_gd) != gd) {
512 		/*
513 		 * If the callout is owned by a different CPU we have to
514 		 * execute the function synchronously on the target cpu.
515 		 */
516 		int seq;
517 
518 		cpu_ccfence();	/* don't let tgd alias c_gd */
519 		seq = lwkt_send_ipiq(tgd, (void *)callout_stop, c);
520 		lwkt_wait_ipiq(tgd, seq);
521 	} else {
522 		/*
523 		 * If the callout is owned by the same CPU we can
524 		 * process it directly, but if we are racing our helper
525 		 * thread (sc->next), we have to adjust sc->next.  The
526 		 * race is interlocked by a critical section.
527 		 */
528 		sc = &softclock_pcpu_ary[gd->gd_cpuid];
529 
530 		c->c_flags &= ~(CALLOUT_ACTIVE | CALLOUT_PENDING);
531 		if (sc->next == c)
532 			sc->next = TAILQ_NEXT(c, c_links.tqe);
533 
534 		TAILQ_REMOVE(&sc->callwheel[c->c_time & callwheelmask],
535 				c, c_links.tqe);
536 		c->c_func = NULL;
537 	}
538 	crit_exit_gd(gd);
539 	return (1);
540 }
541 
542 /*
543  * Issue a callout_stop() and ensure that any callout race completes
544  * before returning.  Does NOT de-initialized the callout.
545  */
546 void
547 callout_stop_sync(struct callout *c)
548 {
549 	softclock_pcpu_t sc;
550 
551 	while (c->c_flags & CALLOUT_DID_INIT) {
552 		callout_stop(c);
553 		if (c->c_gd) {
554 			sc = &softclock_pcpu_ary[c->c_gd->gd_cpuid];
555 			if (sc->running == c) {
556 				while (sc->running == c)
557 					tsleep(&sc->running, 0, "crace", 1);
558 			}
559 		}
560 		if ((c->c_flags & (CALLOUT_PENDING | CALLOUT_ACTIVE)) == 0)
561 			break;
562 		kprintf("Warning: %s: callout race\n", curthread->td_comm);
563 	}
564 }
565 
566 /*
567  * Terminate a callout
568  *
569  * This function will stop any pending callout and also block while the
570  * callout's function is running.  It should only be used in cases where
571  * no deadlock is possible (due to the callout function acquiring locks
572  * that the current caller of callout_terminate() already holds), when
573  * the caller is ready to destroy the callout structure.
574  *
575  * This function clears the CALLOUT_DID_INIT flag.
576  *
577  * lwkt_token locks are ok.
578  */
579 void
580 callout_terminate(struct callout *c)
581 {
582 	softclock_pcpu_t sc;
583 
584 	if (c->c_flags & CALLOUT_DID_INIT) {
585 		callout_stop(c);
586 		if (c->c_gd) {
587 			sc = &softclock_pcpu_ary[c->c_gd->gd_cpuid];
588 			if (sc->running == c) {
589 				while (sc->running == c)
590 					tsleep(&sc->running, 0, "crace", 1);
591 			}
592 		}
593 		KKASSERT((c->c_flags & (CALLOUT_PENDING|CALLOUT_ACTIVE)) == 0);
594 		c->c_flags &= ~CALLOUT_DID_INIT;
595 	}
596 }
597 
598 /*
599  * Prepare a callout structure for use by callout_reset() and/or
600  * callout_stop().  The MP version of this routine requires that the callback
601  * function installed by callout_reset() be MP safe.
602  *
603  * The init functions can be called from any cpu and do not have to be
604  * called from the cpu that the timer will eventually run on.
605  */
606 void
607 callout_init(struct callout *c)
608 {
609 	bzero(c, sizeof *c);
610 	c->c_flags = CALLOUT_DID_INIT;
611 }
612 
613 void
614 callout_init_mp(struct callout *c)
615 {
616 	callout_init(c);
617 	c->c_flags |= CALLOUT_MPSAFE;
618 }
619