1 /* $OpenBSD: kern_timeout.c,v 1.97 2024/02/23 16:51:39 cheloha Exp $ */
2 /*
3 * Copyright (c) 2001 Thomas Nordin <nordin@openbsd.org>
4 * Copyright (c) 2000-2001 Artur Grabowski <art@openbsd.org>
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. The name of the author may not be used to endorse or promote products
14 * derived from this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
17 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
18 * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
19 * THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
22 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
23 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
24 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
25 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28 #include <sys/param.h>
29 #include <sys/systm.h>
30 #include <sys/kthread.h>
31 #include <sys/proc.h>
32 #include <sys/timeout.h>
33 #include <sys/mutex.h>
34 #include <sys/kernel.h>
35 #include <sys/queue.h> /* _Q_INVALIDATE */
36 #include <sys/sysctl.h>
37 #include <sys/witness.h>
38
39 #ifdef DDB
40 #include <machine/db_machdep.h>
41 #include <ddb/db_interface.h>
42 #include <ddb/db_sym.h>
43 #include <ddb/db_output.h>
44 #endif
45
46 #include "kcov.h"
47 #if NKCOV > 0
48 #include <sys/kcov.h>
49 #endif
50
51 /*
52 * Locks used to protect global variables in this file:
53 *
54 * I immutable after initialization
55 * T timeout_mutex
56 */
57 struct mutex timeout_mutex = MUTEX_INITIALIZER(IPL_HIGH);
58
59 void *softclock_si; /* [I] softclock() interrupt handle */
60 struct timeoutstat tostat; /* [T] statistics and totals */
61
62 /*
63 * Timeouts are kept in a hierarchical timing wheel. The to_time is the value
64 * of the global variable "ticks" when the timeout should be called. There are
65 * four levels with 256 buckets each.
66 */
67 #define WHEELCOUNT 4
68 #define WHEELSIZE 256
69 #define WHEELMASK 255
70 #define WHEELBITS 8
71 #define BUCKETS (WHEELCOUNT * WHEELSIZE)
72
73 struct circq timeout_wheel[BUCKETS]; /* [T] Tick-based timeouts */
74 struct circq timeout_wheel_kc[BUCKETS]; /* [T] Clock-based timeouts */
75 struct circq timeout_new; /* [T] New, unscheduled timeouts */
76 struct circq timeout_todo; /* [T] Due or needs rescheduling */
77 struct circq timeout_proc; /* [T] Due + needs process context */
78 #ifdef MULTIPROCESSOR
79 struct circq timeout_proc_mp; /* [T] Process ctx + no kernel lock */
80 #endif
81
82 time_t timeout_level_width[WHEELCOUNT]; /* [I] Wheel level width (seconds) */
83 struct timespec tick_ts; /* [I] Length of a tick (1/hz secs) */
84
85 struct kclock {
86 struct timespec kc_lastscan; /* [T] Clock time at last wheel scan */
87 struct timespec kc_late; /* [T] Late if due prior */
88 struct timespec kc_offset; /* [T] Offset from primary kclock */
89 } timeout_kclock[KCLOCK_MAX];
90
91 #define MASKWHEEL(wheel, time) (((time) >> ((wheel)*WHEELBITS)) & WHEELMASK)
92
93 #define BUCKET(rel, abs) \
94 (timeout_wheel[ \
95 ((rel) <= (1 << (2*WHEELBITS))) \
96 ? ((rel) <= (1 << WHEELBITS)) \
97 ? MASKWHEEL(0, (abs)) \
98 : MASKWHEEL(1, (abs)) + WHEELSIZE \
99 : ((rel) <= (1 << (3*WHEELBITS))) \
100 ? MASKWHEEL(2, (abs)) + 2*WHEELSIZE \
101 : MASKWHEEL(3, (abs)) + 3*WHEELSIZE])
102
103 #define MOVEBUCKET(wheel, time) \
104 CIRCQ_CONCAT(&timeout_todo, \
105 &timeout_wheel[MASKWHEEL((wheel), (time)) + (wheel)*WHEELSIZE])
106
107 /*
108 * Circular queue definitions.
109 */
110
111 #define CIRCQ_INIT(elem) do { \
112 (elem)->next = (elem); \
113 (elem)->prev = (elem); \
114 } while (0)
115
116 #define CIRCQ_INSERT_TAIL(list, elem) do { \
117 (elem)->prev = (list)->prev; \
118 (elem)->next = (list); \
119 (list)->prev->next = (elem); \
120 (list)->prev = (elem); \
121 tostat.tos_pending++; \
122 } while (0)
123
124 #define CIRCQ_CONCAT(fst, snd) do { \
125 if (!CIRCQ_EMPTY(snd)) { \
126 (fst)->prev->next = (snd)->next;\
127 (snd)->next->prev = (fst)->prev;\
128 (snd)->prev->next = (fst); \
129 (fst)->prev = (snd)->prev; \
130 CIRCQ_INIT(snd); \
131 } \
132 } while (0)
133
134 #define CIRCQ_REMOVE(elem) do { \
135 (elem)->next->prev = (elem)->prev; \
136 (elem)->prev->next = (elem)->next; \
137 _Q_INVALIDATE((elem)->prev); \
138 _Q_INVALIDATE((elem)->next); \
139 tostat.tos_pending--; \
140 } while (0)
141
142 #define CIRCQ_FIRST(elem) ((elem)->next)
143
144 #define CIRCQ_EMPTY(elem) (CIRCQ_FIRST(elem) == (elem))
145
146 #define CIRCQ_FOREACH(elem, list) \
147 for ((elem) = CIRCQ_FIRST(list); \
148 (elem) != (list); \
149 (elem) = CIRCQ_FIRST(elem))
150
151 #ifdef WITNESS
152 struct lock_object timeout_sleeplock_obj = {
153 .lo_name = "timeout",
154 .lo_flags = LO_WITNESS | LO_INITIALIZED | LO_SLEEPABLE |
155 (LO_CLASS_RWLOCK << LO_CLASSSHIFT)
156 };
157 struct lock_object timeout_spinlock_obj = {
158 .lo_name = "timeout",
159 .lo_flags = LO_WITNESS | LO_INITIALIZED |
160 (LO_CLASS_MUTEX << LO_CLASSSHIFT)
161 };
162 struct lock_type timeout_sleeplock_type = {
163 .lt_name = "timeout"
164 };
165 struct lock_type timeout_spinlock_type = {
166 .lt_name = "timeout"
167 };
168 #define TIMEOUT_LOCK_OBJ(needsproc) \
169 ((needsproc) ? &timeout_sleeplock_obj : &timeout_spinlock_obj)
170 #endif
171
172 void softclock(void *);
173 void softclock_create_thread(void *);
174 void softclock_process_kclock_timeout(struct timeout *, int);
175 void softclock_process_tick_timeout(struct timeout *, int);
176 void softclock_thread(void *);
177 #ifdef MULTIPROCESSOR
178 void softclock_thread_mp(void *);
179 #endif
180 void timeout_barrier_timeout(void *);
181 uint32_t timeout_bucket(const struct timeout *);
182 uint32_t timeout_maskwheel(uint32_t, const struct timespec *);
183 void timeout_run(struct timeout *);
184
185 /*
186 * The first thing in a struct timeout is its struct circq, so we
187 * can get back from a pointer to the latter to a pointer to the
188 * whole timeout with just a cast.
189 */
190 static inline struct timeout *
timeout_from_circq(struct circq * p)191 timeout_from_circq(struct circq *p)
192 {
193 return ((struct timeout *)(p));
194 }
195
196 static inline void
timeout_sync_order(int needsproc)197 timeout_sync_order(int needsproc)
198 {
199 WITNESS_CHECKORDER(TIMEOUT_LOCK_OBJ(needsproc), LOP_NEWORDER, NULL);
200 }
201
202 static inline void
timeout_sync_enter(int needsproc)203 timeout_sync_enter(int needsproc)
204 {
205 timeout_sync_order(needsproc);
206 WITNESS_LOCK(TIMEOUT_LOCK_OBJ(needsproc), 0);
207 }
208
209 static inline void
timeout_sync_leave(int needsproc)210 timeout_sync_leave(int needsproc)
211 {
212 WITNESS_UNLOCK(TIMEOUT_LOCK_OBJ(needsproc), 0);
213 }
214
215 /*
216 * Some of the "math" in here is a bit tricky.
217 *
218 * We have to beware of wrapping ints.
219 * We use the fact that any element added to the queue must be added with a
220 * positive time. That means that any element `to' on the queue cannot be
221 * scheduled to timeout further in time than INT_MAX, but to->to_time can
222 * be positive or negative so comparing it with anything is dangerous.
223 * The only way we can use the to->to_time value in any predictable way
224 * is when we calculate how far in the future `to' will timeout -
225 * "to->to_time - ticks". The result will always be positive for future
226 * timeouts and 0 or negative for due timeouts.
227 */
228
229 void
timeout_startup(void)230 timeout_startup(void)
231 {
232 int b, level;
233
234 CIRCQ_INIT(&timeout_new);
235 CIRCQ_INIT(&timeout_todo);
236 CIRCQ_INIT(&timeout_proc);
237 #ifdef MULTIPROCESSOR
238 CIRCQ_INIT(&timeout_proc_mp);
239 #endif
240 for (b = 0; b < nitems(timeout_wheel); b++)
241 CIRCQ_INIT(&timeout_wheel[b]);
242 for (b = 0; b < nitems(timeout_wheel_kc); b++)
243 CIRCQ_INIT(&timeout_wheel_kc[b]);
244
245 for (level = 0; level < nitems(timeout_level_width); level++)
246 timeout_level_width[level] = 2 << (level * WHEELBITS);
247 NSEC_TO_TIMESPEC(tick_nsec, &tick_ts);
248 }
249
250 void
timeout_proc_init(void)251 timeout_proc_init(void)
252 {
253 softclock_si = softintr_establish(IPL_SOFTCLOCK, softclock, NULL);
254 if (softclock_si == NULL)
255 panic("%s: unable to register softclock interrupt", __func__);
256
257 WITNESS_INIT(&timeout_sleeplock_obj, &timeout_sleeplock_type);
258 WITNESS_INIT(&timeout_spinlock_obj, &timeout_spinlock_type);
259
260 kthread_create_deferred(softclock_create_thread, NULL);
261 }
262
263 void
timeout_set(struct timeout * new,void (* fn)(void *),void * arg)264 timeout_set(struct timeout *new, void (*fn)(void *), void *arg)
265 {
266 timeout_set_flags(new, fn, arg, KCLOCK_NONE, 0);
267 }
268
269 void
timeout_set_flags(struct timeout * to,void (* fn)(void *),void * arg,int kclock,int flags)270 timeout_set_flags(struct timeout *to, void (*fn)(void *), void *arg, int kclock,
271 int flags)
272 {
273 KASSERT(!ISSET(flags, ~(TIMEOUT_PROC | TIMEOUT_MPSAFE)));
274 KASSERT(kclock >= KCLOCK_NONE && kclock < KCLOCK_MAX);
275
276 to->to_func = fn;
277 to->to_arg = arg;
278 to->to_kclock = kclock;
279 to->to_flags = flags | TIMEOUT_INITIALIZED;
280
281 /* For now, only process context timeouts may be marked MP-safe. */
282 if (ISSET(to->to_flags, TIMEOUT_MPSAFE))
283 KASSERT(ISSET(to->to_flags, TIMEOUT_PROC));
284 }
285
286 void
timeout_set_proc(struct timeout * new,void (* fn)(void *),void * arg)287 timeout_set_proc(struct timeout *new, void (*fn)(void *), void *arg)
288 {
289 timeout_set_flags(new, fn, arg, KCLOCK_NONE, TIMEOUT_PROC);
290 }
291
292 int
timeout_add(struct timeout * new,int to_ticks)293 timeout_add(struct timeout *new, int to_ticks)
294 {
295 int old_time;
296 int ret = 1;
297
298 KASSERT(ISSET(new->to_flags, TIMEOUT_INITIALIZED));
299 KASSERT(new->to_kclock == KCLOCK_NONE);
300 KASSERT(to_ticks >= 0);
301
302 mtx_enter(&timeout_mutex);
303
304 /* Initialize the time here, it won't change. */
305 old_time = new->to_time;
306 new->to_time = to_ticks + ticks;
307 CLR(new->to_flags, TIMEOUT_TRIGGERED);
308
309 /*
310 * If this timeout already is scheduled and now is moved
311 * earlier, reschedule it now. Otherwise leave it in place
312 * and let it be rescheduled later.
313 */
314 if (ISSET(new->to_flags, TIMEOUT_ONQUEUE)) {
315 if (new->to_time - ticks < old_time - ticks) {
316 CIRCQ_REMOVE(&new->to_list);
317 CIRCQ_INSERT_TAIL(&timeout_new, &new->to_list);
318 }
319 tostat.tos_readded++;
320 ret = 0;
321 } else {
322 SET(new->to_flags, TIMEOUT_ONQUEUE);
323 CIRCQ_INSERT_TAIL(&timeout_new, &new->to_list);
324 }
325 #if NKCOV > 0
326 if (!kcov_cold)
327 new->to_process = curproc->p_p;
328 #endif
329 tostat.tos_added++;
330 mtx_leave(&timeout_mutex);
331
332 return ret;
333 }
334
335 int
timeout_add_tv(struct timeout * to,const struct timeval * tv)336 timeout_add_tv(struct timeout *to, const struct timeval *tv)
337 {
338 uint64_t to_ticks;
339
340 to_ticks = (uint64_t)hz * tv->tv_sec + tv->tv_usec / tick;
341 if (to_ticks > INT_MAX)
342 to_ticks = INT_MAX;
343 if (to_ticks == 0 && tv->tv_usec > 0)
344 to_ticks = 1;
345
346 return timeout_add(to, (int)to_ticks);
347 }
348
349 int
timeout_add_sec(struct timeout * to,int secs)350 timeout_add_sec(struct timeout *to, int secs)
351 {
352 uint64_t to_ticks;
353
354 to_ticks = (uint64_t)hz * secs;
355 if (to_ticks > INT_MAX)
356 to_ticks = INT_MAX;
357 if (to_ticks == 0)
358 to_ticks = 1;
359
360 return timeout_add(to, (int)to_ticks);
361 }
362
363 int
timeout_add_msec(struct timeout * to,int msecs)364 timeout_add_msec(struct timeout *to, int msecs)
365 {
366 uint64_t to_ticks;
367
368 to_ticks = (uint64_t)msecs * 1000 / tick;
369 if (to_ticks > INT_MAX)
370 to_ticks = INT_MAX;
371 if (to_ticks == 0 && msecs > 0)
372 to_ticks = 1;
373
374 return timeout_add(to, (int)to_ticks);
375 }
376
377 int
timeout_add_usec(struct timeout * to,int usecs)378 timeout_add_usec(struct timeout *to, int usecs)
379 {
380 int to_ticks = usecs / tick;
381
382 if (to_ticks == 0 && usecs > 0)
383 to_ticks = 1;
384
385 return timeout_add(to, to_ticks);
386 }
387
388 int
timeout_add_nsec(struct timeout * to,int nsecs)389 timeout_add_nsec(struct timeout *to, int nsecs)
390 {
391 int to_ticks = nsecs / (tick * 1000);
392
393 if (to_ticks == 0 && nsecs > 0)
394 to_ticks = 1;
395
396 return timeout_add(to, to_ticks);
397 }
398
399 int
timeout_abs_ts(struct timeout * to,const struct timespec * abstime)400 timeout_abs_ts(struct timeout *to, const struct timespec *abstime)
401 {
402 struct timespec old_abstime;
403 int ret = 1;
404
405 mtx_enter(&timeout_mutex);
406
407 KASSERT(ISSET(to->to_flags, TIMEOUT_INITIALIZED));
408 KASSERT(to->to_kclock == KCLOCK_UPTIME);
409
410 old_abstime = to->to_abstime;
411 to->to_abstime = *abstime;
412 CLR(to->to_flags, TIMEOUT_TRIGGERED);
413
414 if (ISSET(to->to_flags, TIMEOUT_ONQUEUE)) {
415 if (timespeccmp(abstime, &old_abstime, <)) {
416 CIRCQ_REMOVE(&to->to_list);
417 CIRCQ_INSERT_TAIL(&timeout_new, &to->to_list);
418 }
419 tostat.tos_readded++;
420 ret = 0;
421 } else {
422 SET(to->to_flags, TIMEOUT_ONQUEUE);
423 CIRCQ_INSERT_TAIL(&timeout_new, &to->to_list);
424 }
425 #if NKCOV > 0
426 if (!kcov_cold)
427 to->to_process = curproc->p_p;
428 #endif
429 tostat.tos_added++;
430
431 mtx_leave(&timeout_mutex);
432
433 return ret;
434 }
435
436 int
timeout_del(struct timeout * to)437 timeout_del(struct timeout *to)
438 {
439 int ret = 0;
440
441 mtx_enter(&timeout_mutex);
442 if (ISSET(to->to_flags, TIMEOUT_ONQUEUE)) {
443 CIRCQ_REMOVE(&to->to_list);
444 CLR(to->to_flags, TIMEOUT_ONQUEUE);
445 tostat.tos_cancelled++;
446 ret = 1;
447 }
448 CLR(to->to_flags, TIMEOUT_TRIGGERED);
449 tostat.tos_deleted++;
450 mtx_leave(&timeout_mutex);
451
452 return ret;
453 }
454
455 int
timeout_del_barrier(struct timeout * to)456 timeout_del_barrier(struct timeout *to)
457 {
458 int removed;
459
460 timeout_sync_order(ISSET(to->to_flags, TIMEOUT_PROC));
461
462 removed = timeout_del(to);
463 if (!removed)
464 timeout_barrier(to);
465
466 return removed;
467 }
468
469 void
timeout_barrier(struct timeout * to)470 timeout_barrier(struct timeout *to)
471 {
472 struct timeout barrier;
473 struct cond c;
474 int flags;
475
476 flags = to->to_flags & (TIMEOUT_PROC | TIMEOUT_MPSAFE);
477 timeout_sync_order(ISSET(flags, TIMEOUT_PROC));
478
479 timeout_set_flags(&barrier, timeout_barrier_timeout, &c, KCLOCK_NONE,
480 flags);
481 barrier.to_process = curproc->p_p;
482 cond_init(&c);
483
484 mtx_enter(&timeout_mutex);
485
486 barrier.to_time = ticks;
487 SET(barrier.to_flags, TIMEOUT_ONQUEUE);
488 if (ISSET(flags, TIMEOUT_PROC)) {
489 #ifdef MULTIPROCESSOR
490 if (ISSET(flags, TIMEOUT_MPSAFE))
491 CIRCQ_INSERT_TAIL(&timeout_proc_mp, &barrier.to_list);
492 else
493 #endif
494 CIRCQ_INSERT_TAIL(&timeout_proc, &barrier.to_list);
495 } else
496 CIRCQ_INSERT_TAIL(&timeout_todo, &barrier.to_list);
497
498 mtx_leave(&timeout_mutex);
499
500 if (ISSET(flags, TIMEOUT_PROC)) {
501 #ifdef MULTIPROCESSOR
502 if (ISSET(flags, TIMEOUT_MPSAFE))
503 wakeup_one(&timeout_proc_mp);
504 else
505 #endif
506 wakeup_one(&timeout_proc);
507 } else
508 softintr_schedule(softclock_si);
509
510 cond_wait(&c, "tmobar");
511 }
512
513 void
timeout_barrier_timeout(void * arg)514 timeout_barrier_timeout(void *arg)
515 {
516 struct cond *c = arg;
517
518 cond_signal(c);
519 }
520
521 uint32_t
timeout_bucket(const struct timeout * to)522 timeout_bucket(const struct timeout *to)
523 {
524 struct timespec diff, shifted_abstime;
525 struct kclock *kc;
526 uint32_t level;
527
528 KASSERT(to->to_kclock == KCLOCK_UPTIME);
529 kc = &timeout_kclock[to->to_kclock];
530
531 KASSERT(timespeccmp(&kc->kc_lastscan, &to->to_abstime, <));
532 timespecsub(&to->to_abstime, &kc->kc_lastscan, &diff);
533 for (level = 0; level < nitems(timeout_level_width) - 1; level++) {
534 if (diff.tv_sec < timeout_level_width[level])
535 break;
536 }
537 timespecadd(&to->to_abstime, &kc->kc_offset, &shifted_abstime);
538 return level * WHEELSIZE + timeout_maskwheel(level, &shifted_abstime);
539 }
540
541 /*
542 * Hash the absolute time into a bucket on a given level of the wheel.
543 *
544 * The complete hash is 32 bits. The upper 25 bits are seconds, the
545 * lower 7 bits are nanoseconds. tv_nsec is a positive value less
546 * than one billion so we need to divide it to isolate the desired
547 * bits. We can't just shift it.
548 *
549 * The level is used to isolate an 8-bit portion of the hash. The
550 * resulting number indicates which bucket the absolute time belongs
551 * in on the given level of the wheel.
552 */
553 uint32_t
timeout_maskwheel(uint32_t level,const struct timespec * abstime)554 timeout_maskwheel(uint32_t level, const struct timespec *abstime)
555 {
556 uint32_t hi, lo;
557
558 hi = abstime->tv_sec << 7;
559 lo = abstime->tv_nsec / 7812500;
560
561 return ((hi | lo) >> (level * WHEELBITS)) & WHEELMASK;
562 }
563
564 /*
565 * This is called from hardclock() on the primary CPU at the start of
566 * every tick.
567 */
568 void
timeout_hardclock_update(void)569 timeout_hardclock_update(void)
570 {
571 struct timespec elapsed, now;
572 struct kclock *kc;
573 struct timespec *lastscan = &timeout_kclock[KCLOCK_UPTIME].kc_lastscan;
574 int b, done, first, i, last, level, need_softclock = 1, off;
575
576 mtx_enter(&timeout_mutex);
577
578 MOVEBUCKET(0, ticks);
579 if (MASKWHEEL(0, ticks) == 0) {
580 MOVEBUCKET(1, ticks);
581 if (MASKWHEEL(1, ticks) == 0) {
582 MOVEBUCKET(2, ticks);
583 if (MASKWHEEL(2, ticks) == 0)
584 MOVEBUCKET(3, ticks);
585 }
586 }
587
588 /*
589 * Dump the buckets that expired while we were away.
590 *
591 * If the elapsed time has exceeded a level's limit then we need
592 * to dump every bucket in the level. We have necessarily completed
593 * a lap of that level, too, so we need to process buckets in the
594 * next level.
595 *
596 * Otherwise we need to compare indices: if the index of the first
597 * expired bucket is greater than that of the last then we have
598 * completed a lap of the level and need to process buckets in the
599 * next level.
600 */
601 nanouptime(&now);
602 timespecsub(&now, lastscan, &elapsed);
603 for (level = 0; level < nitems(timeout_level_width); level++) {
604 first = timeout_maskwheel(level, lastscan);
605 if (elapsed.tv_sec >= timeout_level_width[level]) {
606 last = (first == 0) ? WHEELSIZE - 1 : first - 1;
607 done = 0;
608 } else {
609 last = timeout_maskwheel(level, &now);
610 done = first <= last;
611 }
612 off = level * WHEELSIZE;
613 for (b = first;; b = (b + 1) % WHEELSIZE) {
614 CIRCQ_CONCAT(&timeout_todo, &timeout_wheel_kc[off + b]);
615 if (b == last)
616 break;
617 }
618 if (done)
619 break;
620 }
621
622 /*
623 * Update the cached state for each kclock.
624 */
625 for (i = 0; i < nitems(timeout_kclock); i++) {
626 kc = &timeout_kclock[i];
627 timespecadd(&now, &kc->kc_offset, &kc->kc_lastscan);
628 timespecsub(&kc->kc_lastscan, &tick_ts, &kc->kc_late);
629 }
630
631 if (CIRCQ_EMPTY(&timeout_new) && CIRCQ_EMPTY(&timeout_todo))
632 need_softclock = 0;
633
634 mtx_leave(&timeout_mutex);
635
636 if (need_softclock)
637 softintr_schedule(softclock_si);
638 }
639
640 void
timeout_run(struct timeout * to)641 timeout_run(struct timeout *to)
642 {
643 void (*fn)(void *);
644 void *arg;
645 int needsproc;
646
647 MUTEX_ASSERT_LOCKED(&timeout_mutex);
648
649 CLR(to->to_flags, TIMEOUT_ONQUEUE);
650 SET(to->to_flags, TIMEOUT_TRIGGERED);
651
652 fn = to->to_func;
653 arg = to->to_arg;
654 needsproc = ISSET(to->to_flags, TIMEOUT_PROC);
655 #if NKCOV > 0
656 struct process *kcov_process = to->to_process;
657 #endif
658
659 mtx_leave(&timeout_mutex);
660 timeout_sync_enter(needsproc);
661 #if NKCOV > 0
662 kcov_remote_enter(KCOV_REMOTE_COMMON, kcov_process);
663 #endif
664 fn(arg);
665 #if NKCOV > 0
666 kcov_remote_leave(KCOV_REMOTE_COMMON, kcov_process);
667 #endif
668 timeout_sync_leave(needsproc);
669 mtx_enter(&timeout_mutex);
670 }
671
672 void
softclock_process_kclock_timeout(struct timeout * to,int new)673 softclock_process_kclock_timeout(struct timeout *to, int new)
674 {
675 struct kclock *kc = &timeout_kclock[to->to_kclock];
676
677 if (timespeccmp(&to->to_abstime, &kc->kc_lastscan, >)) {
678 tostat.tos_scheduled++;
679 if (!new)
680 tostat.tos_rescheduled++;
681 CIRCQ_INSERT_TAIL(&timeout_wheel_kc[timeout_bucket(to)],
682 &to->to_list);
683 return;
684 }
685 if (!new && timespeccmp(&to->to_abstime, &kc->kc_late, <=))
686 tostat.tos_late++;
687 if (ISSET(to->to_flags, TIMEOUT_PROC)) {
688 #ifdef MULTIPROCESSOR
689 if (ISSET(to->to_flags, TIMEOUT_MPSAFE))
690 CIRCQ_INSERT_TAIL(&timeout_proc_mp, &to->to_list);
691 else
692 #endif
693 CIRCQ_INSERT_TAIL(&timeout_proc, &to->to_list);
694 return;
695 }
696 timeout_run(to);
697 tostat.tos_run_softclock++;
698 }
699
700 void
softclock_process_tick_timeout(struct timeout * to,int new)701 softclock_process_tick_timeout(struct timeout *to, int new)
702 {
703 int delta = to->to_time - ticks;
704
705 if (delta > 0) {
706 tostat.tos_scheduled++;
707 if (!new)
708 tostat.tos_rescheduled++;
709 CIRCQ_INSERT_TAIL(&BUCKET(delta, to->to_time), &to->to_list);
710 return;
711 }
712 if (!new && delta < 0)
713 tostat.tos_late++;
714 if (ISSET(to->to_flags, TIMEOUT_PROC)) {
715 #ifdef MULTIPROCESSOR
716 if (ISSET(to->to_flags, TIMEOUT_MPSAFE))
717 CIRCQ_INSERT_TAIL(&timeout_proc_mp, &to->to_list);
718 else
719 #endif
720 CIRCQ_INSERT_TAIL(&timeout_proc, &to->to_list);
721 return;
722 }
723 timeout_run(to);
724 tostat.tos_run_softclock++;
725 }
726
727 /*
728 * Timeouts are processed here instead of timeout_hardclock_update()
729 * to avoid doing any more work at IPL_CLOCK than absolutely necessary.
730 * Down here at IPL_SOFTCLOCK other interrupts can be serviced promptly
731 * so the system remains responsive even if there is a surge of timeouts.
732 */
733 void
softclock(void * arg)734 softclock(void *arg)
735 {
736 struct timeout *first_new, *to;
737 int needsproc, new;
738 #ifdef MULTIPROCESSOR
739 int need_proc_mp;
740 #endif
741
742 first_new = NULL;
743 new = 0;
744
745 mtx_enter(&timeout_mutex);
746 if (!CIRCQ_EMPTY(&timeout_new))
747 first_new = timeout_from_circq(CIRCQ_FIRST(&timeout_new));
748 CIRCQ_CONCAT(&timeout_todo, &timeout_new);
749 while (!CIRCQ_EMPTY(&timeout_todo)) {
750 to = timeout_from_circq(CIRCQ_FIRST(&timeout_todo));
751 CIRCQ_REMOVE(&to->to_list);
752 if (to == first_new)
753 new = 1;
754 if (to->to_kclock == KCLOCK_NONE)
755 softclock_process_tick_timeout(to, new);
756 else if (to->to_kclock == KCLOCK_UPTIME)
757 softclock_process_kclock_timeout(to, new);
758 else {
759 panic("%s: invalid to_clock: %d",
760 __func__, to->to_kclock);
761 }
762 }
763 tostat.tos_softclocks++;
764 needsproc = !CIRCQ_EMPTY(&timeout_proc);
765 #ifdef MULTIPROCESSOR
766 need_proc_mp = !CIRCQ_EMPTY(&timeout_proc_mp);
767 #endif
768 mtx_leave(&timeout_mutex);
769
770 if (needsproc)
771 wakeup(&timeout_proc);
772 #ifdef MULTIPROCESSOR
773 if (need_proc_mp)
774 wakeup(&timeout_proc_mp);
775 #endif
776 }
777
778 void
softclock_create_thread(void * arg)779 softclock_create_thread(void *arg)
780 {
781 if (kthread_create(softclock_thread, NULL, NULL, "softclock"))
782 panic("fork softclock");
783 #ifdef MULTIPROCESSOR
784 if (kthread_create(softclock_thread_mp, NULL, NULL, "softclockmp"))
785 panic("kthread_create softclock_thread_mp");
786 #endif
787 }
788
789 void
softclock_thread(void * arg)790 softclock_thread(void *arg)
791 {
792 CPU_INFO_ITERATOR cii;
793 struct cpu_info *ci;
794 struct timeout *to;
795 int s;
796
797 KERNEL_ASSERT_LOCKED();
798
799 /* Be conservative for the moment */
800 CPU_INFO_FOREACH(cii, ci) {
801 if (CPU_IS_PRIMARY(ci))
802 break;
803 }
804 KASSERT(ci != NULL);
805 sched_peg_curproc(ci);
806
807 s = splsoftclock();
808 mtx_enter(&timeout_mutex);
809 for (;;) {
810 while (!CIRCQ_EMPTY(&timeout_proc)) {
811 to = timeout_from_circq(CIRCQ_FIRST(&timeout_proc));
812 CIRCQ_REMOVE(&to->to_list);
813 timeout_run(to);
814 tostat.tos_run_thread++;
815 }
816 tostat.tos_thread_wakeups++;
817 msleep_nsec(&timeout_proc, &timeout_mutex, PSWP, "tmoslp",
818 INFSLP);
819 }
820 splx(s);
821 }
822
823 #ifdef MULTIPROCESSOR
824 void
softclock_thread_mp(void * arg)825 softclock_thread_mp(void *arg)
826 {
827 struct timeout *to;
828
829 KERNEL_ASSERT_LOCKED();
830 KERNEL_UNLOCK();
831
832 mtx_enter(&timeout_mutex);
833 for (;;) {
834 while (!CIRCQ_EMPTY(&timeout_proc_mp)) {
835 to = timeout_from_circq(CIRCQ_FIRST(&timeout_proc_mp));
836 CIRCQ_REMOVE(&to->to_list);
837 timeout_run(to);
838 tostat.tos_run_thread++;
839 }
840 tostat.tos_thread_wakeups++;
841 msleep_nsec(&timeout_proc_mp, &timeout_mutex, PSWP, "tmoslp",
842 INFSLP);
843 }
844 }
845 #endif /* MULTIPROCESSOR */
846
847 #ifndef SMALL_KERNEL
848 void
timeout_adjust_ticks(int adj)849 timeout_adjust_ticks(int adj)
850 {
851 struct timeout *to;
852 struct circq *p;
853 int new_ticks, b;
854
855 /* adjusting the monotonic clock backwards would be a Bad Thing */
856 if (adj <= 0)
857 return;
858
859 mtx_enter(&timeout_mutex);
860 new_ticks = ticks + adj;
861 for (b = 0; b < nitems(timeout_wheel); b++) {
862 p = CIRCQ_FIRST(&timeout_wheel[b]);
863 while (p != &timeout_wheel[b]) {
864 to = timeout_from_circq(p);
865 p = CIRCQ_FIRST(p);
866
867 /* when moving a timeout forward need to reinsert it */
868 if (to->to_time - ticks < adj)
869 to->to_time = new_ticks;
870 CIRCQ_REMOVE(&to->to_list);
871 CIRCQ_INSERT_TAIL(&timeout_todo, &to->to_list);
872 }
873 }
874 ticks = new_ticks;
875 mtx_leave(&timeout_mutex);
876 }
877 #endif
878
879 int
timeout_sysctl(void * oldp,size_t * oldlenp,void * newp,size_t newlen)880 timeout_sysctl(void *oldp, size_t *oldlenp, void *newp, size_t newlen)
881 {
882 struct timeoutstat status;
883
884 mtx_enter(&timeout_mutex);
885 memcpy(&status, &tostat, sizeof(status));
886 mtx_leave(&timeout_mutex);
887
888 return sysctl_rdstruct(oldp, oldlenp, newp, &status, sizeof(status));
889 }
890
891 #ifdef DDB
892 const char *db_kclock(int);
893 void db_show_callout_bucket(struct circq *);
894 void db_show_timeout(struct timeout *, struct circq *);
895 const char *db_timespec(const struct timespec *);
896
897 const char *
db_kclock(int kclock)898 db_kclock(int kclock)
899 {
900 switch (kclock) {
901 case KCLOCK_UPTIME:
902 return "uptime";
903 default:
904 return "invalid";
905 }
906 }
907
908 const char *
db_timespec(const struct timespec * ts)909 db_timespec(const struct timespec *ts)
910 {
911 static char buf[32];
912 struct timespec tmp, zero;
913
914 if (ts->tv_sec >= 0) {
915 snprintf(buf, sizeof(buf), "%lld.%09ld",
916 ts->tv_sec, ts->tv_nsec);
917 return buf;
918 }
919
920 timespecclear(&zero);
921 timespecsub(&zero, ts, &tmp);
922 snprintf(buf, sizeof(buf), "-%lld.%09ld", tmp.tv_sec, tmp.tv_nsec);
923 return buf;
924 }
925
926 void
db_show_callout_bucket(struct circq * bucket)927 db_show_callout_bucket(struct circq *bucket)
928 {
929 struct circq *p;
930
931 CIRCQ_FOREACH(p, bucket)
932 db_show_timeout(timeout_from_circq(p), bucket);
933 }
934
935 void
db_show_timeout(struct timeout * to,struct circq * bucket)936 db_show_timeout(struct timeout *to, struct circq *bucket)
937 {
938 struct timespec remaining;
939 struct kclock *kc;
940 char buf[8];
941 db_expr_t offset;
942 struct circq *wheel;
943 char *name, *where;
944 int width = sizeof(long) * 2;
945
946 db_find_sym_and_offset((vaddr_t)to->to_func, &name, &offset);
947 name = name ? name : "?";
948 if (bucket == &timeout_new)
949 where = "new";
950 else if (bucket == &timeout_todo)
951 where = "softint";
952 else if (bucket == &timeout_proc)
953 where = "thread";
954 #ifdef MULTIPROCESSOR
955 else if (bucket == &timeout_proc_mp)
956 where = "thread-mp";
957 #endif
958 else {
959 if (to->to_kclock == KCLOCK_UPTIME)
960 wheel = timeout_wheel_kc;
961 else if (to->to_kclock == KCLOCK_NONE)
962 wheel = timeout_wheel;
963 else
964 goto invalid;
965 snprintf(buf, sizeof(buf), "%3ld/%1ld",
966 (bucket - wheel) % WHEELSIZE,
967 (bucket - wheel) / WHEELSIZE);
968 where = buf;
969 }
970 if (to->to_kclock == KCLOCK_UPTIME) {
971 kc = &timeout_kclock[to->to_kclock];
972 timespecsub(&to->to_abstime, &kc->kc_lastscan, &remaining);
973 db_printf("%20s %8s %9s 0x%0*lx %s\n",
974 db_timespec(&remaining), db_kclock(to->to_kclock), where,
975 width, (ulong)to->to_arg, name);
976 } else if (to->to_kclock == KCLOCK_NONE) {
977 db_printf("%20d %8s %9s 0x%0*lx %s\n",
978 to->to_time - ticks, "ticks", where,
979 width, (ulong)to->to_arg, name);
980 } else
981 goto invalid;
982 return;
983
984 invalid:
985 db_printf("%s: timeout 0x%p: invalid to_kclock: %d",
986 __func__, to, to->to_kclock);
987 }
988
989 void
db_show_callout(db_expr_t addr,int haddr,db_expr_t count,char * modif)990 db_show_callout(db_expr_t addr, int haddr, db_expr_t count, char *modif)
991 {
992 struct kclock *kc;
993 int width = sizeof(long) * 2 + 2;
994 int b, i;
995
996 db_printf("%20s %8s\n", "lastscan", "clock");
997 db_printf("%20d %8s\n", ticks, "ticks");
998 for (i = 0; i < nitems(timeout_kclock); i++) {
999 kc = &timeout_kclock[i];
1000 db_printf("%20s %8s\n",
1001 db_timespec(&kc->kc_lastscan), db_kclock(i));
1002 }
1003 db_printf("\n");
1004 db_printf("%20s %8s %9s %*s %s\n",
1005 "remaining", "clock", "wheel", width, "arg", "func");
1006 db_show_callout_bucket(&timeout_new);
1007 db_show_callout_bucket(&timeout_todo);
1008 db_show_callout_bucket(&timeout_proc);
1009 #ifdef MULTIPROCESSOR
1010 db_show_callout_bucket(&timeout_proc_mp);
1011 #endif
1012 for (b = 0; b < nitems(timeout_wheel); b++)
1013 db_show_callout_bucket(&timeout_wheel[b]);
1014 for (b = 0; b < nitems(timeout_wheel_kc); b++)
1015 db_show_callout_bucket(&timeout_wheel_kc[b]);
1016 }
1017 #endif
1018