1 /* $OpenBSD: kern_tc.c,v 1.83 2024/02/23 23:01:15 cheloha Exp $ */
2
3 /*
4 * Copyright (c) 2000 Poul-Henning Kamp <phk@FreeBSD.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18
19 /*
20 * If we meet some day, and you think this stuff is worth it, you
21 * can buy me a beer in return. Poul-Henning Kamp
22 */
23
24 #include <sys/param.h>
25 #include <sys/atomic.h>
26 #include <sys/kernel.h>
27 #include <sys/mutex.h>
28 #include <sys/rwlock.h>
29 #include <sys/stdint.h>
30 #include <sys/timeout.h>
31 #include <sys/sysctl.h>
32 #include <sys/syslog.h>
33 #include <sys/systm.h>
34 #include <sys/timetc.h>
35 #include <sys/queue.h>
36 #include <sys/malloc.h>
37
38 u_int dummy_get_timecount(struct timecounter *);
39
40 int sysctl_tc_hardware(void *, size_t *, void *, size_t);
41 int sysctl_tc_choice(void *, size_t *, void *, size_t);
42
43 /*
44 * Implement a dummy timecounter which we can use until we get a real one
45 * in the air. This allows the console and other early stuff to use
46 * time services.
47 */
48
49 u_int
dummy_get_timecount(struct timecounter * tc)50 dummy_get_timecount(struct timecounter *tc)
51 {
52 static u_int now;
53
54 return atomic_inc_int_nv(&now);
55 }
56
57 static struct timecounter dummy_timecounter = {
58 .tc_get_timecount = dummy_get_timecount,
59 .tc_counter_mask = ~0u,
60 .tc_frequency = 1000000,
61 .tc_name = "dummy",
62 .tc_quality = -1000000,
63 .tc_priv = NULL,
64 .tc_user = 0,
65 };
66
67 /*
68 * Locks used to protect struct members, global variables in this file:
69 * I immutable after initialization
70 * T tc_lock
71 * W windup_mtx
72 */
73
74 struct timehands {
75 /* These fields must be initialized by the driver. */
76 struct timecounter *th_counter; /* [W] */
77 int64_t th_adjtimedelta; /* [T,W] */
78 struct bintime th_next_ntp_update; /* [T,W] */
79 int64_t th_adjustment; /* [W] */
80 u_int64_t th_scale; /* [W] */
81 u_int th_offset_count; /* [W] */
82 struct bintime th_boottime; /* [T,W] */
83 struct bintime th_offset; /* [W] */
84 struct bintime th_naptime; /* [W] */
85 struct timeval th_microtime; /* [W] */
86 struct timespec th_nanotime; /* [W] */
87 /* Fields not to be copied in tc_windup start with th_generation. */
88 volatile u_int th_generation; /* [W] */
89 struct timehands *th_next; /* [I] */
90 };
91
92 static struct timehands th0;
93 static struct timehands th1 = {
94 .th_next = &th0
95 };
96 static struct timehands th0 = {
97 .th_counter = &dummy_timecounter,
98 .th_scale = UINT64_MAX / 1000000,
99 .th_offset = { .sec = 0, .frac = 0 },
100 .th_generation = 1,
101 .th_next = &th1
102 };
103
104 struct rwlock tc_lock = RWLOCK_INITIALIZER("tc_lock");
105
106 /*
107 * tc_windup() must be called before leaving this mutex.
108 */
109 struct mutex windup_mtx = MUTEX_INITIALIZER(IPL_CLOCK);
110
111 static struct timehands *volatile timehands = &th0; /* [W] */
112 struct timecounter *timecounter = &dummy_timecounter; /* [T] */
113 static SLIST_HEAD(, timecounter) tc_list = SLIST_HEAD_INITIALIZER(tc_list);
114
115 /*
116 * These are updated from tc_windup(). They are useful when
117 * examining kernel core dumps.
118 */
119 volatile time_t naptime = 0;
120 volatile time_t time_second = 0;
121 volatile time_t time_uptime = 0;
122
123 static int timestepwarnings;
124
125 void ntp_update_second(struct timehands *);
126 void tc_windup(struct bintime *, struct bintime *, int64_t *);
127
128 /*
129 * Return the difference between the timehands' counter value now and what
130 * was when we copied it to the timehands' offset_count.
131 */
132 static __inline u_int
tc_delta(struct timehands * th)133 tc_delta(struct timehands *th)
134 {
135 struct timecounter *tc;
136
137 tc = th->th_counter;
138 return ((tc->tc_get_timecount(tc) - th->th_offset_count) &
139 tc->tc_counter_mask);
140 }
141
142 /*
143 * Functions for reading the time. We have to loop until we are sure that
144 * the timehands that we operated on was not updated under our feet. See
145 * the comment in <sys/time.h> for a description of these functions.
146 */
147
148 void
binboottime(struct bintime * bt)149 binboottime(struct bintime *bt)
150 {
151 struct timehands *th;
152 u_int gen;
153
154 do {
155 th = timehands;
156 gen = th->th_generation;
157 membar_consumer();
158 *bt = th->th_boottime;
159 membar_consumer();
160 } while (gen == 0 || gen != th->th_generation);
161 }
162
163 void
microboottime(struct timeval * tvp)164 microboottime(struct timeval *tvp)
165 {
166 struct bintime bt;
167
168 binboottime(&bt);
169 BINTIME_TO_TIMEVAL(&bt, tvp);
170 }
171
172 void
nanoboottime(struct timespec * tsp)173 nanoboottime(struct timespec *tsp)
174 {
175 struct bintime bt;
176
177 binboottime(&bt);
178 BINTIME_TO_TIMESPEC(&bt, tsp);
179 }
180
181 void
binuptime(struct bintime * bt)182 binuptime(struct bintime *bt)
183 {
184 struct timehands *th;
185 u_int gen;
186
187 do {
188 th = timehands;
189 gen = th->th_generation;
190 membar_consumer();
191 TIMECOUNT_TO_BINTIME(tc_delta(th), th->th_scale, bt);
192 bintimeadd(bt, &th->th_offset, bt);
193 membar_consumer();
194 } while (gen == 0 || gen != th->th_generation);
195 }
196
197 void
getbinuptime(struct bintime * bt)198 getbinuptime(struct bintime *bt)
199 {
200 struct timehands *th;
201 u_int gen;
202
203 do {
204 th = timehands;
205 gen = th->th_generation;
206 membar_consumer();
207 *bt = th->th_offset;
208 membar_consumer();
209 } while (gen == 0 || gen != th->th_generation);
210 }
211
212 void
nanouptime(struct timespec * tsp)213 nanouptime(struct timespec *tsp)
214 {
215 struct bintime bt;
216
217 binuptime(&bt);
218 BINTIME_TO_TIMESPEC(&bt, tsp);
219 }
220
221 void
microuptime(struct timeval * tvp)222 microuptime(struct timeval *tvp)
223 {
224 struct bintime bt;
225
226 binuptime(&bt);
227 BINTIME_TO_TIMEVAL(&bt, tvp);
228 }
229
230 time_t
getuptime(void)231 getuptime(void)
232 {
233 #if defined(__LP64__)
234 return time_uptime; /* atomic */
235 #else
236 time_t now;
237 struct timehands *th;
238 u_int gen;
239
240 do {
241 th = timehands;
242 gen = th->th_generation;
243 membar_consumer();
244 now = th->th_offset.sec;
245 membar_consumer();
246 } while (gen == 0 || gen != th->th_generation);
247
248 return now;
249 #endif
250 }
251
252 uint64_t
nsecuptime(void)253 nsecuptime(void)
254 {
255 struct bintime bt;
256
257 binuptime(&bt);
258 return BINTIME_TO_NSEC(&bt);
259 }
260
261 uint64_t
getnsecuptime(void)262 getnsecuptime(void)
263 {
264 struct bintime bt;
265
266 getbinuptime(&bt);
267 return BINTIME_TO_NSEC(&bt);
268 }
269
270 void
binruntime(struct bintime * bt)271 binruntime(struct bintime *bt)
272 {
273 struct timehands *th;
274 u_int gen;
275
276 do {
277 th = timehands;
278 gen = th->th_generation;
279 membar_consumer();
280 TIMECOUNT_TO_BINTIME(tc_delta(th), th->th_scale, bt);
281 bintimeadd(bt, &th->th_offset, bt);
282 bintimesub(bt, &th->th_naptime, bt);
283 membar_consumer();
284 } while (gen == 0 || gen != th->th_generation);
285 }
286
287 void
nanoruntime(struct timespec * ts)288 nanoruntime(struct timespec *ts)
289 {
290 struct bintime bt;
291
292 binruntime(&bt);
293 BINTIME_TO_TIMESPEC(&bt, ts);
294 }
295
296 void
getbinruntime(struct bintime * bt)297 getbinruntime(struct bintime *bt)
298 {
299 struct timehands *th;
300 u_int gen;
301
302 do {
303 th = timehands;
304 gen = th->th_generation;
305 membar_consumer();
306 bintimesub(&th->th_offset, &th->th_naptime, bt);
307 membar_consumer();
308 } while (gen == 0 || gen != th->th_generation);
309 }
310
311 uint64_t
getnsecruntime(void)312 getnsecruntime(void)
313 {
314 struct bintime bt;
315
316 getbinruntime(&bt);
317 return BINTIME_TO_NSEC(&bt);
318 }
319
320 void
bintime(struct bintime * bt)321 bintime(struct bintime *bt)
322 {
323 struct timehands *th;
324 u_int gen;
325
326 do {
327 th = timehands;
328 gen = th->th_generation;
329 membar_consumer();
330 TIMECOUNT_TO_BINTIME(tc_delta(th), th->th_scale, bt);
331 bintimeadd(bt, &th->th_offset, bt);
332 bintimeadd(bt, &th->th_boottime, bt);
333 membar_consumer();
334 } while (gen == 0 || gen != th->th_generation);
335 }
336
337 void
nanotime(struct timespec * tsp)338 nanotime(struct timespec *tsp)
339 {
340 struct bintime bt;
341
342 bintime(&bt);
343 BINTIME_TO_TIMESPEC(&bt, tsp);
344 }
345
346 void
microtime(struct timeval * tvp)347 microtime(struct timeval *tvp)
348 {
349 struct bintime bt;
350
351 bintime(&bt);
352 BINTIME_TO_TIMEVAL(&bt, tvp);
353 }
354
355 time_t
gettime(void)356 gettime(void)
357 {
358 #if defined(__LP64__)
359 return time_second; /* atomic */
360 #else
361 time_t now;
362 struct timehands *th;
363 u_int gen;
364
365 do {
366 th = timehands;
367 gen = th->th_generation;
368 membar_consumer();
369 now = th->th_microtime.tv_sec;
370 membar_consumer();
371 } while (gen == 0 || gen != th->th_generation);
372
373 return now;
374 #endif
375 }
376
377 void
getnanouptime(struct timespec * tsp)378 getnanouptime(struct timespec *tsp)
379 {
380 struct timehands *th;
381 u_int gen;
382
383 do {
384 th = timehands;
385 gen = th->th_generation;
386 membar_consumer();
387 BINTIME_TO_TIMESPEC(&th->th_offset, tsp);
388 membar_consumer();
389 } while (gen == 0 || gen != th->th_generation);
390 }
391
392 void
getmicrouptime(struct timeval * tvp)393 getmicrouptime(struct timeval *tvp)
394 {
395 struct timehands *th;
396 u_int gen;
397
398 do {
399 th = timehands;
400 gen = th->th_generation;
401 membar_consumer();
402 BINTIME_TO_TIMEVAL(&th->th_offset, tvp);
403 membar_consumer();
404 } while (gen == 0 || gen != th->th_generation);
405 }
406
407 void
getnanotime(struct timespec * tsp)408 getnanotime(struct timespec *tsp)
409 {
410 struct timehands *th;
411 u_int gen;
412
413 do {
414 th = timehands;
415 gen = th->th_generation;
416 membar_consumer();
417 *tsp = th->th_nanotime;
418 membar_consumer();
419 } while (gen == 0 || gen != th->th_generation);
420 }
421
422 void
getmicrotime(struct timeval * tvp)423 getmicrotime(struct timeval *tvp)
424 {
425 struct timehands *th;
426 u_int gen;
427
428 do {
429 th = timehands;
430 gen = th->th_generation;
431 membar_consumer();
432 *tvp = th->th_microtime;
433 membar_consumer();
434 } while (gen == 0 || gen != th->th_generation);
435 }
436
437 /*
438 * Initialize a new timecounter and possibly use it.
439 */
440 void
tc_init(struct timecounter * tc)441 tc_init(struct timecounter *tc)
442 {
443 u_int64_t tmp;
444 u_int u;
445
446 u = tc->tc_frequency / tc->tc_counter_mask;
447 /* XXX: We need some margin here, 10% is a guess */
448 u *= 11;
449 u /= 10;
450 if (tc->tc_quality >= 0) {
451 if (u > hz) {
452 tc->tc_quality = -2000;
453 printf("Timecounter \"%s\" frequency %lu Hz",
454 tc->tc_name, (unsigned long)tc->tc_frequency);
455 printf(" -- Insufficient hz, needs at least %u\n", u);
456 }
457 }
458
459 /* Determine the counter's precision. */
460 for (tmp = 1; (tmp & tc->tc_counter_mask) == 0; tmp <<= 1)
461 continue;
462 tc->tc_precision = tmp;
463
464 SLIST_INSERT_HEAD(&tc_list, tc, tc_next);
465
466 /*
467 * Never automatically use a timecounter with negative quality.
468 * Even though we run on the dummy counter, switching here may be
469 * worse since this timecounter may not be monotonic.
470 */
471 if (tc->tc_quality < 0)
472 return;
473 if (tc->tc_quality < timecounter->tc_quality)
474 return;
475 if (tc->tc_quality == timecounter->tc_quality &&
476 tc->tc_frequency < timecounter->tc_frequency)
477 return;
478 (void)tc->tc_get_timecount(tc);
479 enqueue_randomness(tc->tc_get_timecount(tc));
480
481 timecounter = tc;
482 }
483
484 /*
485 * Change the given timecounter's quality. If it is the active
486 * counter and it is no longer the best counter, activate the
487 * best counter.
488 */
489 void
tc_reset_quality(struct timecounter * tc,int quality)490 tc_reset_quality(struct timecounter *tc, int quality)
491 {
492 struct timecounter *best = &dummy_timecounter, *tmp;
493
494 if (tc == &dummy_timecounter)
495 panic("%s: cannot change dummy counter quality", __func__);
496
497 tc->tc_quality = quality;
498 if (timecounter == tc) {
499 SLIST_FOREACH(tmp, &tc_list, tc_next) {
500 if (tmp->tc_quality < 0)
501 continue;
502 if (tmp->tc_quality < best->tc_quality)
503 continue;
504 if (tmp->tc_quality == best->tc_quality &&
505 tmp->tc_frequency < best->tc_frequency)
506 continue;
507 best = tmp;
508 }
509 if (best != tc) {
510 enqueue_randomness(best->tc_get_timecount(best));
511 timecounter = best;
512 printf("timecounter: active counter changed: %s -> %s\n",
513 tc->tc_name, best->tc_name);
514 }
515 }
516 }
517
518 /* Report the frequency of the current timecounter. */
519 u_int64_t
tc_getfrequency(void)520 tc_getfrequency(void)
521 {
522 return (timehands->th_counter->tc_frequency);
523 }
524
525 /* Report the precision of the current timecounter. */
526 u_int64_t
tc_getprecision(void)527 tc_getprecision(void)
528 {
529 return (timehands->th_counter->tc_precision);
530 }
531
532 /*
533 * Step our concept of UTC, aka the realtime clock.
534 * This is done by modifying our estimate of when we booted.
535 *
536 * Any ongoing adjustment is meaningless after a clock jump,
537 * so we zero adjtimedelta here as well.
538 */
539 void
tc_setrealtimeclock(const struct timespec * ts)540 tc_setrealtimeclock(const struct timespec *ts)
541 {
542 struct bintime boottime, old_utc, uptime, utc;
543 struct timespec tmp;
544 int64_t zero = 0;
545
546 TIMESPEC_TO_BINTIME(ts, &utc);
547
548 rw_enter_write(&tc_lock);
549 mtx_enter(&windup_mtx);
550
551 binuptime(&uptime);
552 bintimesub(&utc, &uptime, &boottime);
553 bintimeadd(&timehands->th_boottime, &uptime, &old_utc);
554 /* XXX fiddle all the little crinkly bits around the fiords... */
555 tc_windup(&boottime, NULL, &zero);
556
557 mtx_leave(&windup_mtx);
558 rw_exit_write(&tc_lock);
559
560 enqueue_randomness(ts->tv_sec);
561
562 if (timestepwarnings) {
563 BINTIME_TO_TIMESPEC(&old_utc, &tmp);
564 log(LOG_INFO, "Time stepped from %lld.%09ld to %lld.%09ld\n",
565 (long long)tmp.tv_sec, tmp.tv_nsec,
566 (long long)ts->tv_sec, ts->tv_nsec);
567 }
568 }
569
570 /*
571 * Step the monotonic and realtime clocks, triggering any timeouts that
572 * should have occurred across the interval.
573 */
574 void
tc_setclock(const struct timespec * ts)575 tc_setclock(const struct timespec *ts)
576 {
577 struct bintime new_naptime, old_naptime, uptime, utc;
578 static int first = 1;
579 #ifndef SMALL_KERNEL
580 struct bintime elapsed;
581 long long adj_ticks;
582 #endif
583
584 /*
585 * When we're called for the first time, during boot when
586 * the root partition is mounted, we need to set boottime.
587 */
588 if (first) {
589 tc_setrealtimeclock(ts);
590 first = 0;
591 return;
592 }
593
594 enqueue_randomness(ts->tv_sec);
595
596 TIMESPEC_TO_BINTIME(ts, &utc);
597
598 mtx_enter(&windup_mtx);
599
600 bintimesub(&utc, &timehands->th_boottime, &uptime);
601 old_naptime = timehands->th_naptime;
602 /* XXX fiddle all the little crinkly bits around the fiords... */
603 tc_windup(NULL, &uptime, NULL);
604 new_naptime = timehands->th_naptime;
605
606 mtx_leave(&windup_mtx);
607
608 #ifndef SMALL_KERNEL
609 /* convert the bintime to ticks */
610 bintimesub(&new_naptime, &old_naptime, &elapsed);
611 adj_ticks = BINTIME_TO_NSEC(&elapsed) / tick_nsec;
612 if (adj_ticks > 0) {
613 if (adj_ticks > INT_MAX)
614 adj_ticks = INT_MAX;
615 timeout_adjust_ticks(adj_ticks);
616 }
617 #endif
618 }
619
620 void
tc_update_timekeep(void)621 tc_update_timekeep(void)
622 {
623 static struct timecounter *last_tc = NULL;
624 struct timehands *th;
625
626 MUTEX_ASSERT_LOCKED(&windup_mtx);
627
628 if (timekeep == NULL)
629 return;
630
631 th = timehands;
632 timekeep->tk_generation = 0;
633 membar_producer();
634 timekeep->tk_scale = th->th_scale;
635 timekeep->tk_offset_count = th->th_offset_count;
636 timekeep->tk_offset = th->th_offset;
637 timekeep->tk_naptime = th->th_naptime;
638 timekeep->tk_boottime = th->th_boottime;
639 if (last_tc != th->th_counter) {
640 timekeep->tk_counter_mask = th->th_counter->tc_counter_mask;
641 timekeep->tk_user = th->th_counter->tc_user;
642 last_tc = th->th_counter;
643 }
644 membar_producer();
645 timekeep->tk_generation = th->th_generation;
646
647 return;
648 }
649
650 /*
651 * Initialize the next struct timehands in the ring and make
652 * it the active timehands. Along the way we might switch to a different
653 * timecounter and/or do seconds processing in NTP. Slightly magic.
654 */
655 void
tc_windup(struct bintime * new_boottime,struct bintime * new_offset,int64_t * new_adjtimedelta)656 tc_windup(struct bintime *new_boottime, struct bintime *new_offset,
657 int64_t *new_adjtimedelta)
658 {
659 struct bintime bt;
660 struct timecounter *active_tc;
661 struct timehands *th, *tho;
662 u_int64_t scale;
663 u_int delta, ncount, ogen;
664
665 if (new_boottime != NULL || new_adjtimedelta != NULL)
666 rw_assert_wrlock(&tc_lock);
667 MUTEX_ASSERT_LOCKED(&windup_mtx);
668
669 active_tc = timecounter;
670
671 /*
672 * Make the next timehands a copy of the current one, but do not
673 * overwrite the generation or next pointer. While we update
674 * the contents, the generation must be zero.
675 */
676 tho = timehands;
677 ogen = tho->th_generation;
678 th = tho->th_next;
679 th->th_generation = 0;
680 membar_producer();
681 memcpy(th, tho, offsetof(struct timehands, th_generation));
682
683 /*
684 * Capture a timecounter delta on the current timecounter and if
685 * changing timecounters, a counter value from the new timecounter.
686 * Update the offset fields accordingly.
687 */
688 delta = tc_delta(th);
689 if (th->th_counter != active_tc)
690 ncount = active_tc->tc_get_timecount(active_tc);
691 else
692 ncount = 0;
693 th->th_offset_count += delta;
694 th->th_offset_count &= th->th_counter->tc_counter_mask;
695 TIMECOUNT_TO_BINTIME(delta, th->th_scale, &bt);
696 bintimeadd(&th->th_offset, &bt, &th->th_offset);
697
698 /*
699 * Ignore new offsets that predate the current offset.
700 * If changing the offset, first increase the naptime
701 * accordingly.
702 */
703 if (new_offset != NULL && bintimecmp(&th->th_offset, new_offset, <)) {
704 bintimesub(new_offset, &th->th_offset, &bt);
705 bintimeadd(&th->th_naptime, &bt, &th->th_naptime);
706 naptime = th->th_naptime.sec;
707 th->th_offset = *new_offset;
708 }
709
710 /*
711 * If changing the boot time or clock adjustment, do so before
712 * NTP processing.
713 */
714 if (new_boottime != NULL)
715 th->th_boottime = *new_boottime;
716 if (new_adjtimedelta != NULL) {
717 th->th_adjtimedelta = *new_adjtimedelta;
718 /* Reset the NTP update period. */
719 bintimesub(&th->th_offset, &th->th_naptime,
720 &th->th_next_ntp_update);
721 }
722
723 /*
724 * Deal with NTP second processing. The while-loop normally
725 * iterates at most once, but in extreme situations it might
726 * keep NTP sane if tc_windup() is not run for several seconds.
727 */
728 bintimesub(&th->th_offset, &th->th_naptime, &bt);
729 while (bintimecmp(&th->th_next_ntp_update, &bt, <=)) {
730 ntp_update_second(th);
731 th->th_next_ntp_update.sec++;
732 }
733
734 /* Update the UTC timestamps used by the get*() functions. */
735 bintimeadd(&th->th_boottime, &th->th_offset, &bt);
736 BINTIME_TO_TIMEVAL(&bt, &th->th_microtime);
737 BINTIME_TO_TIMESPEC(&bt, &th->th_nanotime);
738
739 /* Now is a good time to change timecounters. */
740 if (th->th_counter != active_tc) {
741 th->th_counter = active_tc;
742 th->th_offset_count = ncount;
743 }
744
745 /*-
746 * Recalculate the scaling factor. We want the number of 1/2^64
747 * fractions of a second per period of the hardware counter, taking
748 * into account the th_adjustment factor which the NTP PLL/adjtime(2)
749 * processing provides us with.
750 *
751 * The th_adjustment is nanoseconds per second with 32 bit binary
752 * fraction and we want 64 bit binary fraction of second:
753 *
754 * x = a * 2^32 / 10^9 = a * 4.294967296
755 *
756 * The range of th_adjustment is +/- 5000PPM so inside a 64bit int
757 * we can only multiply by about 850 without overflowing, but that
758 * leaves suitably precise fractions for multiply before divide.
759 *
760 * Divide before multiply with a fraction of 2199/512 results in a
761 * systematic undercompensation of 10PPM of th_adjustment. On a
762 * 5000PPM adjustment this is a 0.05PPM error. This is acceptable.
763 *
764 * We happily sacrifice the lowest of the 64 bits of our result
765 * to the goddess of code clarity.
766 *
767 */
768 scale = (u_int64_t)1 << 63;
769 scale += \
770 ((th->th_adjustment + th->th_counter->tc_freq_adj) / 1024) * 2199;
771 scale /= th->th_counter->tc_frequency;
772 th->th_scale = scale * 2;
773
774 /*
775 * Now that the struct timehands is again consistent, set the new
776 * generation number, making sure to not make it zero.
777 */
778 if (++ogen == 0)
779 ogen = 1;
780 membar_producer();
781 th->th_generation = ogen;
782
783 /* Go live with the new struct timehands. */
784 time_second = th->th_microtime.tv_sec;
785 time_uptime = th->th_offset.sec;
786 membar_producer();
787 timehands = th;
788
789 tc_update_timekeep();
790 }
791
792 /* Report or change the active timecounter hardware. */
793 int
sysctl_tc_hardware(void * oldp,size_t * oldlenp,void * newp,size_t newlen)794 sysctl_tc_hardware(void *oldp, size_t *oldlenp, void *newp, size_t newlen)
795 {
796 char newname[32];
797 struct timecounter *newtc, *tc;
798 int error;
799
800 tc = timecounter;
801 strlcpy(newname, tc->tc_name, sizeof(newname));
802
803 error = sysctl_string(oldp, oldlenp, newp, newlen, newname, sizeof(newname));
804 if (error != 0 || strcmp(newname, tc->tc_name) == 0)
805 return (error);
806 SLIST_FOREACH(newtc, &tc_list, tc_next) {
807 if (strcmp(newname, newtc->tc_name) != 0)
808 continue;
809
810 /* Warm up new timecounter. */
811 (void)newtc->tc_get_timecount(newtc);
812 (void)newtc->tc_get_timecount(newtc);
813
814 rw_enter_write(&tc_lock);
815 timecounter = newtc;
816 rw_exit_write(&tc_lock);
817
818 return (0);
819 }
820 return (EINVAL);
821 }
822
823 /* Report or change the active timecounter hardware. */
824 int
sysctl_tc_choice(void * oldp,size_t * oldlenp,void * newp,size_t newlen)825 sysctl_tc_choice(void *oldp, size_t *oldlenp, void *newp, size_t newlen)
826 {
827 char buf[32], *spc, *choices;
828 struct timecounter *tc;
829 int error, maxlen;
830
831 if (SLIST_EMPTY(&tc_list))
832 return (sysctl_rdstring(oldp, oldlenp, newp, ""));
833
834 spc = "";
835 maxlen = 0;
836 SLIST_FOREACH(tc, &tc_list, tc_next)
837 maxlen += sizeof(buf);
838 choices = malloc(maxlen, M_TEMP, M_WAITOK);
839 *choices = '\0';
840 SLIST_FOREACH(tc, &tc_list, tc_next) {
841 snprintf(buf, sizeof(buf), "%s%s(%d)",
842 spc, tc->tc_name, tc->tc_quality);
843 spc = " ";
844 strlcat(choices, buf, maxlen);
845 }
846 error = sysctl_rdstring(oldp, oldlenp, newp, choices);
847 free(choices, M_TEMP, maxlen);
848 return (error);
849 }
850
851 /*
852 * Timecounters need to be updated every so often to prevent the hardware
853 * counter from overflowing. Updating also recalculates the cached values
854 * used by the get*() family of functions, so their precision depends on
855 * the update frequency.
856 */
857 static int tc_tick;
858
859 void
tc_ticktock(void)860 tc_ticktock(void)
861 {
862 static int count;
863
864 if (++count < tc_tick)
865 return;
866 if (!mtx_enter_try(&windup_mtx))
867 return;
868 count = 0;
869 tc_windup(NULL, NULL, NULL);
870 mtx_leave(&windup_mtx);
871 }
872
873 void
inittimecounter(void)874 inittimecounter(void)
875 {
876 #ifdef DEBUG
877 u_int p;
878 #endif
879
880 /*
881 * Set the initial timeout to
882 * max(1, <approx. number of hardclock ticks in a millisecond>).
883 * People should probably not use the sysctl to set the timeout
884 * to smaller than its initial value, since that value is the
885 * smallest reasonable one. If they want better timestamps they
886 * should use the non-"get"* functions.
887 */
888 if (hz > 1000)
889 tc_tick = (hz + 500) / 1000;
890 else
891 tc_tick = 1;
892 #ifdef DEBUG
893 p = (tc_tick * 1000000) / hz;
894 printf("Timecounters tick every %d.%03u msec\n", p / 1000, p % 1000);
895 #endif
896
897 /* warm up new timecounter (again) and get rolling. */
898 (void)timecounter->tc_get_timecount(timecounter);
899 (void)timecounter->tc_get_timecount(timecounter);
900 }
901
902 const struct sysctl_bounded_args tc_vars[] = {
903 { KERN_TIMECOUNTER_TICK, &tc_tick, SYSCTL_INT_READONLY },
904 { KERN_TIMECOUNTER_TIMESTEPWARNINGS, ×tepwarnings, 0, 1 },
905 };
906
907 /*
908 * Return timecounter-related information.
909 */
910 int
sysctl_tc(int * name,u_int namelen,void * oldp,size_t * oldlenp,void * newp,size_t newlen)911 sysctl_tc(int *name, u_int namelen, void *oldp, size_t *oldlenp,
912 void *newp, size_t newlen)
913 {
914 if (namelen != 1)
915 return (ENOTDIR);
916
917 switch (name[0]) {
918 case KERN_TIMECOUNTER_HARDWARE:
919 return (sysctl_tc_hardware(oldp, oldlenp, newp, newlen));
920 case KERN_TIMECOUNTER_CHOICE:
921 return (sysctl_tc_choice(oldp, oldlenp, newp, newlen));
922 default:
923 return (sysctl_bounded_arr(tc_vars, nitems(tc_vars), name,
924 namelen, oldp, oldlenp, newp, newlen));
925 }
926 /* NOTREACHED */
927 }
928
929 /*
930 * Skew the timehands according to any adjtime(2) adjustment.
931 */
932 void
ntp_update_second(struct timehands * th)933 ntp_update_second(struct timehands *th)
934 {
935 int64_t adj;
936
937 MUTEX_ASSERT_LOCKED(&windup_mtx);
938
939 if (th->th_adjtimedelta > 0)
940 adj = MIN(5000, th->th_adjtimedelta);
941 else
942 adj = MAX(-5000, th->th_adjtimedelta);
943 th->th_adjtimedelta -= adj;
944 th->th_adjustment = (adj * 1000) << 32;
945 }
946
947 void
tc_adjfreq(int64_t * old,int64_t * new)948 tc_adjfreq(int64_t *old, int64_t *new)
949 {
950 if (old != NULL) {
951 rw_assert_anylock(&tc_lock);
952 *old = timecounter->tc_freq_adj;
953 }
954 if (new != NULL) {
955 rw_assert_wrlock(&tc_lock);
956 mtx_enter(&windup_mtx);
957 timecounter->tc_freq_adj = *new;
958 tc_windup(NULL, NULL, NULL);
959 mtx_leave(&windup_mtx);
960 }
961 }
962
963 void
tc_adjtime(int64_t * old,int64_t * new)964 tc_adjtime(int64_t *old, int64_t *new)
965 {
966 struct timehands *th;
967 u_int gen;
968
969 if (old != NULL) {
970 do {
971 th = timehands;
972 gen = th->th_generation;
973 membar_consumer();
974 *old = th->th_adjtimedelta;
975 membar_consumer();
976 } while (gen == 0 || gen != th->th_generation);
977 }
978 if (new != NULL) {
979 rw_assert_wrlock(&tc_lock);
980 mtx_enter(&windup_mtx);
981 tc_windup(NULL, NULL, new);
982 mtx_leave(&windup_mtx);
983 }
984 }
985