1 /* $NetBSD: kern_tc.c,v 1.42 2010/04/13 22:46:10 pooka Exp $ */ 2 3 /*- 4 * Copyright (c) 2008, 2009 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Andrew Doran. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /*- 33 * ---------------------------------------------------------------------------- 34 * "THE BEER-WARE LICENSE" (Revision 42): 35 * <phk@FreeBSD.ORG> wrote this file. As long as you retain this notice you 36 * can do whatever you want with this stuff. If we meet some day, and you think 37 * this stuff is worth it, you can buy me a beer in return. Poul-Henning Kamp 38 * --------------------------------------------------------------------------- 39 */ 40 41 #include <sys/cdefs.h> 42 /* __FBSDID("$FreeBSD: src/sys/kern/kern_tc.c,v 1.166 2005/09/19 22:16:31 andre Exp $"); */ 43 __KERNEL_RCSID(0, "$NetBSD: kern_tc.c,v 1.42 2010/04/13 22:46:10 pooka Exp $"); 44 45 #ifdef _KERNEL_OPT 46 #include "opt_ntp.h" 47 #endif 48 49 #include <sys/param.h> 50 #include <sys/kernel.h> 51 #include <sys/reboot.h> /* XXX just to get AB_VERBOSE */ 52 #include <sys/sysctl.h> 53 #include <sys/syslog.h> 54 #include <sys/systm.h> 55 #include <sys/timepps.h> 56 #include <sys/timetc.h> 57 #include <sys/timex.h> 58 #include <sys/evcnt.h> 59 #include <sys/kauth.h> 60 #include <sys/mutex.h> 61 #include <sys/atomic.h> 62 #include <sys/xcall.h> 63 64 /* 65 * A large step happens on boot. This constant detects such steps. 66 * It is relatively small so that ntp_update_second gets called enough 67 * in the typical 'missed a couple of seconds' case, but doesn't loop 68 * forever when the time step is large. 69 */ 70 #define LARGE_STEP 200 71 72 /* 73 * Implement a dummy timecounter which we can use until we get a real one 74 * in the air. This allows the console and other early stuff to use 75 * time services. 76 */ 77 78 static u_int 79 dummy_get_timecount(struct timecounter *tc) 80 { 81 static u_int now; 82 83 return (++now); 84 } 85 86 static struct timecounter dummy_timecounter = { 87 dummy_get_timecount, 0, ~0u, 1000000, "dummy", -1000000, NULL, NULL, 88 }; 89 90 struct timehands { 91 /* These fields must be initialized by the driver. */ 92 struct timecounter *th_counter; /* active timecounter */ 93 int64_t th_adjustment; /* frequency adjustment */ 94 /* (NTP/adjtime) */ 95 u_int64_t th_scale; /* scale factor (counter */ 96 /* tick->time) */ 97 u_int64_t th_offset_count; /* offset at last time */ 98 /* update (tc_windup()) */ 99 struct bintime th_offset; /* bin (up)time at windup */ 100 struct timeval th_microtime; /* cached microtime */ 101 struct timespec th_nanotime; /* cached nanotime */ 102 /* Fields not to be copied in tc_windup start with th_generation. */ 103 volatile u_int th_generation; /* current genration */ 104 struct timehands *th_next; /* next timehand */ 105 }; 106 107 static struct timehands th0; 108 static struct timehands th9 = { .th_next = &th0, }; 109 static struct timehands th8 = { .th_next = &th9, }; 110 static struct timehands th7 = { .th_next = &th8, }; 111 static struct timehands th6 = { .th_next = &th7, }; 112 static struct timehands th5 = { .th_next = &th6, }; 113 static struct timehands th4 = { .th_next = &th5, }; 114 static struct timehands th3 = { .th_next = &th4, }; 115 static struct timehands th2 = { .th_next = &th3, }; 116 static struct timehands th1 = { .th_next = &th2, }; 117 static struct timehands th0 = { 118 .th_counter = &dummy_timecounter, 119 .th_scale = (uint64_t)-1 / 1000000, 120 .th_offset = { .sec = 1, .frac = 0 }, 121 .th_generation = 1, 122 .th_next = &th1, 123 }; 124 125 static struct timehands *volatile timehands = &th0; 126 struct timecounter *timecounter = &dummy_timecounter; 127 static struct timecounter *timecounters = &dummy_timecounter; 128 129 time_t time_second = 1; 130 time_t time_uptime = 1; 131 132 static struct bintime timebasebin; 133 134 static int timestepwarnings; 135 136 kmutex_t timecounter_lock; 137 static u_int timecounter_mods; 138 static volatile int timecounter_removals = 1; 139 static u_int timecounter_bad; 140 141 #ifdef __FreeBSD__ 142 SYSCTL_INT(_kern_timecounter, OID_AUTO, stepwarnings, CTLFLAG_RW, 143 ×tepwarnings, 0, ""); 144 #endif /* __FreeBSD__ */ 145 146 /* 147 * sysctl helper routine for kern.timercounter.hardware 148 */ 149 static int 150 sysctl_kern_timecounter_hardware(SYSCTLFN_ARGS) 151 { 152 struct sysctlnode node; 153 int error; 154 char newname[MAX_TCNAMELEN]; 155 struct timecounter *newtc, *tc; 156 157 tc = timecounter; 158 159 strlcpy(newname, tc->tc_name, sizeof(newname)); 160 161 node = *rnode; 162 node.sysctl_data = newname; 163 node.sysctl_size = sizeof(newname); 164 165 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 166 167 if (error || 168 newp == NULL || 169 strncmp(newname, tc->tc_name, sizeof(newname)) == 0) 170 return error; 171 172 if (l != NULL && (error = kauth_authorize_system(l->l_cred, 173 KAUTH_SYSTEM_TIME, KAUTH_REQ_SYSTEM_TIME_TIMECOUNTERS, newname, 174 NULL, NULL)) != 0) 175 return (error); 176 177 if (!cold) 178 mutex_spin_enter(&timecounter_lock); 179 error = EINVAL; 180 for (newtc = timecounters; newtc != NULL; newtc = newtc->tc_next) { 181 if (strcmp(newname, newtc->tc_name) != 0) 182 continue; 183 /* Warm up new timecounter. */ 184 (void)newtc->tc_get_timecount(newtc); 185 (void)newtc->tc_get_timecount(newtc); 186 timecounter = newtc; 187 error = 0; 188 break; 189 } 190 if (!cold) 191 mutex_spin_exit(&timecounter_lock); 192 return error; 193 } 194 195 static int 196 sysctl_kern_timecounter_choice(SYSCTLFN_ARGS) 197 { 198 char buf[MAX_TCNAMELEN+48]; 199 char *where; 200 const char *spc; 201 struct timecounter *tc; 202 size_t needed, left, slen; 203 int error, mods; 204 205 if (newp != NULL) 206 return (EPERM); 207 if (namelen != 0) 208 return (EINVAL); 209 210 mutex_spin_enter(&timecounter_lock); 211 retry: 212 spc = ""; 213 error = 0; 214 needed = 0; 215 left = *oldlenp; 216 where = oldp; 217 for (tc = timecounters; error == 0 && tc != NULL; tc = tc->tc_next) { 218 if (where == NULL) { 219 needed += sizeof(buf); /* be conservative */ 220 } else { 221 slen = snprintf(buf, sizeof(buf), "%s%s(q=%d, f=%" PRId64 222 " Hz)", spc, tc->tc_name, tc->tc_quality, 223 tc->tc_frequency); 224 if (left < slen + 1) 225 break; 226 mods = timecounter_mods; 227 mutex_spin_exit(&timecounter_lock); 228 error = copyout(buf, where, slen + 1); 229 mutex_spin_enter(&timecounter_lock); 230 if (mods != timecounter_mods) { 231 goto retry; 232 } 233 spc = " "; 234 where += slen; 235 needed += slen; 236 left -= slen; 237 } 238 } 239 mutex_spin_exit(&timecounter_lock); 240 241 *oldlenp = needed; 242 return (error); 243 } 244 245 SYSCTL_SETUP(sysctl_timecounter_setup, "sysctl timecounter setup") 246 { 247 const struct sysctlnode *node; 248 249 sysctl_createv(clog, 0, NULL, &node, 250 CTLFLAG_PERMANENT, 251 CTLTYPE_NODE, "timecounter", 252 SYSCTL_DESCR("time counter information"), 253 NULL, 0, NULL, 0, 254 CTL_KERN, CTL_CREATE, CTL_EOL); 255 256 if (node != NULL) { 257 sysctl_createv(clog, 0, NULL, NULL, 258 CTLFLAG_PERMANENT, 259 CTLTYPE_STRING, "choice", 260 SYSCTL_DESCR("available counters"), 261 sysctl_kern_timecounter_choice, 0, NULL, 0, 262 CTL_KERN, node->sysctl_num, CTL_CREATE, CTL_EOL); 263 264 sysctl_createv(clog, 0, NULL, NULL, 265 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 266 CTLTYPE_STRING, "hardware", 267 SYSCTL_DESCR("currently active time counter"), 268 sysctl_kern_timecounter_hardware, 0, NULL, MAX_TCNAMELEN, 269 CTL_KERN, node->sysctl_num, CTL_CREATE, CTL_EOL); 270 271 sysctl_createv(clog, 0, NULL, NULL, 272 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 273 CTLTYPE_INT, "timestepwarnings", 274 SYSCTL_DESCR("log time steps"), 275 NULL, 0, ×tepwarnings, 0, 276 CTL_KERN, node->sysctl_num, CTL_CREATE, CTL_EOL); 277 } 278 } 279 280 #ifdef TC_COUNTERS 281 #define TC_STATS(name) \ 282 static struct evcnt n##name = \ 283 EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, "timecounter", #name); \ 284 EVCNT_ATTACH_STATIC(n##name) 285 TC_STATS(binuptime); TC_STATS(nanouptime); TC_STATS(microuptime); 286 TC_STATS(bintime); TC_STATS(nanotime); TC_STATS(microtime); 287 TC_STATS(getbinuptime); TC_STATS(getnanouptime); TC_STATS(getmicrouptime); 288 TC_STATS(getbintime); TC_STATS(getnanotime); TC_STATS(getmicrotime); 289 TC_STATS(setclock); 290 #define TC_COUNT(var) var.ev_count++ 291 #undef TC_STATS 292 #else 293 #define TC_COUNT(var) /* nothing */ 294 #endif /* TC_COUNTERS */ 295 296 static void tc_windup(void); 297 298 /* 299 * Return the difference between the timehands' counter value now and what 300 * was when we copied it to the timehands' offset_count. 301 */ 302 static inline u_int 303 tc_delta(struct timehands *th) 304 { 305 struct timecounter *tc; 306 307 tc = th->th_counter; 308 return ((tc->tc_get_timecount(tc) - 309 th->th_offset_count) & tc->tc_counter_mask); 310 } 311 312 /* 313 * Functions for reading the time. We have to loop until we are sure that 314 * the timehands that we operated on was not updated under our feet. See 315 * the comment in <sys/timevar.h> for a description of these 12 functions. 316 */ 317 318 void 319 binuptime(struct bintime *bt) 320 { 321 struct timehands *th; 322 lwp_t *l; 323 u_int lgen, gen; 324 325 TC_COUNT(nbinuptime); 326 327 /* 328 * Provide exclusion against tc_detach(). 329 * 330 * We record the number of timecounter removals before accessing 331 * timecounter state. Note that the LWP can be using multiple 332 * "generations" at once, due to interrupts (interrupted while in 333 * this function). Hardware interrupts will borrow the interrupted 334 * LWP's l_tcgen value for this purpose, and can themselves be 335 * interrupted by higher priority interrupts. In this case we need 336 * to ensure that the oldest generation in use is recorded. 337 * 338 * splsched() is too expensive to use, so we take care to structure 339 * this code in such a way that it is not required. Likewise, we 340 * do not disable preemption. 341 * 342 * Memory barriers are also too expensive to use for such a 343 * performance critical function. The good news is that we do not 344 * need memory barriers for this type of exclusion, as the thread 345 * updating timecounter_removals will issue a broadcast cross call 346 * before inspecting our l_tcgen value (this elides memory ordering 347 * issues). 348 */ 349 l = curlwp; 350 lgen = l->l_tcgen; 351 if (__predict_true(lgen == 0)) { 352 l->l_tcgen = timecounter_removals; 353 } 354 __insn_barrier(); 355 356 do { 357 th = timehands; 358 gen = th->th_generation; 359 *bt = th->th_offset; 360 bintime_addx(bt, th->th_scale * tc_delta(th)); 361 } while (gen == 0 || gen != th->th_generation); 362 363 __insn_barrier(); 364 l->l_tcgen = lgen; 365 } 366 367 void 368 nanouptime(struct timespec *tsp) 369 { 370 struct bintime bt; 371 372 TC_COUNT(nnanouptime); 373 binuptime(&bt); 374 bintime2timespec(&bt, tsp); 375 } 376 377 void 378 microuptime(struct timeval *tvp) 379 { 380 struct bintime bt; 381 382 TC_COUNT(nmicrouptime); 383 binuptime(&bt); 384 bintime2timeval(&bt, tvp); 385 } 386 387 void 388 bintime(struct bintime *bt) 389 { 390 391 TC_COUNT(nbintime); 392 binuptime(bt); 393 bintime_add(bt, &timebasebin); 394 } 395 396 void 397 nanotime(struct timespec *tsp) 398 { 399 struct bintime bt; 400 401 TC_COUNT(nnanotime); 402 bintime(&bt); 403 bintime2timespec(&bt, tsp); 404 } 405 406 void 407 microtime(struct timeval *tvp) 408 { 409 struct bintime bt; 410 411 TC_COUNT(nmicrotime); 412 bintime(&bt); 413 bintime2timeval(&bt, tvp); 414 } 415 416 void 417 getbinuptime(struct bintime *bt) 418 { 419 struct timehands *th; 420 u_int gen; 421 422 TC_COUNT(ngetbinuptime); 423 do { 424 th = timehands; 425 gen = th->th_generation; 426 *bt = th->th_offset; 427 } while (gen == 0 || gen != th->th_generation); 428 } 429 430 void 431 getnanouptime(struct timespec *tsp) 432 { 433 struct timehands *th; 434 u_int gen; 435 436 TC_COUNT(ngetnanouptime); 437 do { 438 th = timehands; 439 gen = th->th_generation; 440 bintime2timespec(&th->th_offset, tsp); 441 } while (gen == 0 || gen != th->th_generation); 442 } 443 444 void 445 getmicrouptime(struct timeval *tvp) 446 { 447 struct timehands *th; 448 u_int gen; 449 450 TC_COUNT(ngetmicrouptime); 451 do { 452 th = timehands; 453 gen = th->th_generation; 454 bintime2timeval(&th->th_offset, tvp); 455 } while (gen == 0 || gen != th->th_generation); 456 } 457 458 void 459 getbintime(struct bintime *bt) 460 { 461 struct timehands *th; 462 u_int gen; 463 464 TC_COUNT(ngetbintime); 465 do { 466 th = timehands; 467 gen = th->th_generation; 468 *bt = th->th_offset; 469 } while (gen == 0 || gen != th->th_generation); 470 bintime_add(bt, &timebasebin); 471 } 472 473 void 474 getnanotime(struct timespec *tsp) 475 { 476 struct timehands *th; 477 u_int gen; 478 479 TC_COUNT(ngetnanotime); 480 do { 481 th = timehands; 482 gen = th->th_generation; 483 *tsp = th->th_nanotime; 484 } while (gen == 0 || gen != th->th_generation); 485 } 486 487 void 488 getmicrotime(struct timeval *tvp) 489 { 490 struct timehands *th; 491 u_int gen; 492 493 TC_COUNT(ngetmicrotime); 494 do { 495 th = timehands; 496 gen = th->th_generation; 497 *tvp = th->th_microtime; 498 } while (gen == 0 || gen != th->th_generation); 499 } 500 501 /* 502 * Initialize a new timecounter and possibly use it. 503 */ 504 void 505 tc_init(struct timecounter *tc) 506 { 507 u_int u; 508 509 u = tc->tc_frequency / tc->tc_counter_mask; 510 /* XXX: We need some margin here, 10% is a guess */ 511 u *= 11; 512 u /= 10; 513 if (u > hz && tc->tc_quality >= 0) { 514 tc->tc_quality = -2000; 515 aprint_verbose( 516 "timecounter: Timecounter \"%s\" frequency %ju Hz", 517 tc->tc_name, (uintmax_t)tc->tc_frequency); 518 aprint_verbose(" -- Insufficient hz, needs at least %u\n", u); 519 } else if (tc->tc_quality >= 0 || bootverbose) { 520 aprint_verbose( 521 "timecounter: Timecounter \"%s\" frequency %ju Hz " 522 "quality %d\n", tc->tc_name, (uintmax_t)tc->tc_frequency, 523 tc->tc_quality); 524 } 525 526 mutex_spin_enter(&timecounter_lock); 527 tc->tc_next = timecounters; 528 timecounters = tc; 529 timecounter_mods++; 530 /* 531 * Never automatically use a timecounter with negative quality. 532 * Even though we run on the dummy counter, switching here may be 533 * worse since this timecounter may not be monotonous. 534 */ 535 if (tc->tc_quality >= 0 && (tc->tc_quality > timecounter->tc_quality || 536 (tc->tc_quality == timecounter->tc_quality && 537 tc->tc_frequency > timecounter->tc_frequency))) { 538 (void)tc->tc_get_timecount(tc); 539 (void)tc->tc_get_timecount(tc); 540 timecounter = tc; 541 tc_windup(); 542 } 543 mutex_spin_exit(&timecounter_lock); 544 } 545 546 /* 547 * Pick a new timecounter due to the existing counter going bad. 548 */ 549 static void 550 tc_pick(void) 551 { 552 struct timecounter *best, *tc; 553 554 KASSERT(mutex_owned(&timecounter_lock)); 555 556 for (best = tc = timecounters; tc != NULL; tc = tc->tc_next) { 557 if (tc->tc_quality > best->tc_quality) 558 best = tc; 559 else if (tc->tc_quality < best->tc_quality) 560 continue; 561 else if (tc->tc_frequency > best->tc_frequency) 562 best = tc; 563 } 564 (void)best->tc_get_timecount(best); 565 (void)best->tc_get_timecount(best); 566 timecounter = best; 567 } 568 569 /* 570 * A timecounter has gone bad, arrange to pick a new one at the next 571 * clock tick. 572 */ 573 void 574 tc_gonebad(struct timecounter *tc) 575 { 576 577 tc->tc_quality = -100; 578 membar_producer(); 579 atomic_inc_uint(&timecounter_bad); 580 } 581 582 /* 583 * Stop using a timecounter and remove it from the timecounters list. 584 */ 585 int 586 tc_detach(struct timecounter *target) 587 { 588 struct timecounter *tc; 589 struct timecounter **tcp = NULL; 590 int removals; 591 uint64_t where; 592 lwp_t *l; 593 594 /* First, find the timecounter. */ 595 mutex_spin_enter(&timecounter_lock); 596 for (tcp = &timecounters, tc = timecounters; 597 tc != NULL; 598 tcp = &tc->tc_next, tc = tc->tc_next) { 599 if (tc == target) 600 break; 601 } 602 if (tc == NULL) { 603 mutex_spin_exit(&timecounter_lock); 604 return ESRCH; 605 } 606 607 /* And now, remove it. */ 608 *tcp = tc->tc_next; 609 if (timecounter == target) { 610 tc_pick(); 611 tc_windup(); 612 } 613 timecounter_mods++; 614 removals = timecounter_removals++; 615 mutex_spin_exit(&timecounter_lock); 616 617 /* 618 * We now have to determine if any threads in the system are still 619 * making use of this timecounter. 620 * 621 * We issue a broadcast cross call to elide memory ordering issues, 622 * then scan all LWPs in the system looking at each's timecounter 623 * generation number. We need to see a value of zero (not actively 624 * using a timecounter) or a value greater than our removal value. 625 * 626 * We may race with threads that read `timecounter_removals' and 627 * and then get preempted before updating `l_tcgen'. This is not 628 * a problem, since it means that these threads have not yet started 629 * accessing timecounter state. All we do need is one clean 630 * snapshot of the system where every thread appears not to be using 631 * old timecounter state. 632 */ 633 for (;;) { 634 where = xc_broadcast(0, (xcfunc_t)nullop, NULL, NULL); 635 xc_wait(where); 636 637 mutex_enter(proc_lock); 638 LIST_FOREACH(l, &alllwp, l_list) { 639 if (l->l_tcgen == 0 || l->l_tcgen > removals) { 640 /* 641 * Not using timecounter or old timecounter 642 * state at time of our xcall or later. 643 */ 644 continue; 645 } 646 break; 647 } 648 mutex_exit(proc_lock); 649 650 /* 651 * If the timecounter is still in use, wait at least 10ms 652 * before retrying. 653 */ 654 if (l == NULL) { 655 return 0; 656 } 657 (void)kpause("tcdetach", false, mstohz(10), NULL); 658 } 659 } 660 661 /* Report the frequency of the current timecounter. */ 662 u_int64_t 663 tc_getfrequency(void) 664 { 665 666 return (timehands->th_counter->tc_frequency); 667 } 668 669 /* 670 * Step our concept of UTC. This is done by modifying our estimate of 671 * when we booted. 672 */ 673 void 674 tc_setclock(const struct timespec *ts) 675 { 676 struct timespec ts2; 677 struct bintime bt, bt2; 678 679 mutex_spin_enter(&timecounter_lock); 680 TC_COUNT(nsetclock); 681 binuptime(&bt2); 682 timespec2bintime(ts, &bt); 683 bintime_sub(&bt, &bt2); 684 bintime_add(&bt2, &timebasebin); 685 timebasebin = bt; 686 tc_windup(); 687 mutex_spin_exit(&timecounter_lock); 688 689 if (timestepwarnings) { 690 bintime2timespec(&bt2, &ts2); 691 log(LOG_INFO, "Time stepped from %lld.%09ld to %lld.%09ld\n", 692 (long long)ts2.tv_sec, ts2.tv_nsec, 693 (long long)ts->tv_sec, ts->tv_nsec); 694 } 695 } 696 697 /* 698 * Initialize the next struct timehands in the ring and make 699 * it the active timehands. Along the way we might switch to a different 700 * timecounter and/or do seconds processing in NTP. Slightly magic. 701 */ 702 static void 703 tc_windup(void) 704 { 705 struct bintime bt; 706 struct timehands *th, *tho; 707 u_int64_t scale; 708 u_int delta, ncount, ogen; 709 int i, s_update; 710 time_t t; 711 712 KASSERT(mutex_owned(&timecounter_lock)); 713 714 s_update = 0; 715 716 /* 717 * Make the next timehands a copy of the current one, but do not 718 * overwrite the generation or next pointer. While we update 719 * the contents, the generation must be zero. Ensure global 720 * visibility of the generation before proceeding. 721 */ 722 tho = timehands; 723 th = tho->th_next; 724 ogen = th->th_generation; 725 th->th_generation = 0; 726 membar_producer(); 727 bcopy(tho, th, offsetof(struct timehands, th_generation)); 728 729 /* 730 * Capture a timecounter delta on the current timecounter and if 731 * changing timecounters, a counter value from the new timecounter. 732 * Update the offset fields accordingly. 733 */ 734 delta = tc_delta(th); 735 if (th->th_counter != timecounter) 736 ncount = timecounter->tc_get_timecount(timecounter); 737 else 738 ncount = 0; 739 th->th_offset_count += delta; 740 bintime_addx(&th->th_offset, th->th_scale * delta); 741 742 /* 743 * Hardware latching timecounters may not generate interrupts on 744 * PPS events, so instead we poll them. There is a finite risk that 745 * the hardware might capture a count which is later than the one we 746 * got above, and therefore possibly in the next NTP second which might 747 * have a different rate than the current NTP second. It doesn't 748 * matter in practice. 749 */ 750 if (tho->th_counter->tc_poll_pps) 751 tho->th_counter->tc_poll_pps(tho->th_counter); 752 753 /* 754 * Deal with NTP second processing. The for loop normally 755 * iterates at most once, but in extreme situations it might 756 * keep NTP sane if timeouts are not run for several seconds. 757 * At boot, the time step can be large when the TOD hardware 758 * has been read, so on really large steps, we call 759 * ntp_update_second only twice. We need to call it twice in 760 * case we missed a leap second. 761 * If NTP is not compiled in ntp_update_second still calculates 762 * the adjustment resulting from adjtime() calls. 763 */ 764 bt = th->th_offset; 765 bintime_add(&bt, &timebasebin); 766 i = bt.sec - tho->th_microtime.tv_sec; 767 if (i > LARGE_STEP) 768 i = 2; 769 for (; i > 0; i--) { 770 t = bt.sec; 771 ntp_update_second(&th->th_adjustment, &bt.sec); 772 s_update = 1; 773 if (bt.sec != t) 774 timebasebin.sec += bt.sec - t; 775 } 776 777 /* Update the UTC timestamps used by the get*() functions. */ 778 /* XXX shouldn't do this here. Should force non-`get' versions. */ 779 bintime2timeval(&bt, &th->th_microtime); 780 bintime2timespec(&bt, &th->th_nanotime); 781 /* Now is a good time to change timecounters. */ 782 if (th->th_counter != timecounter) { 783 th->th_counter = timecounter; 784 th->th_offset_count = ncount; 785 s_update = 1; 786 } 787 788 /*- 789 * Recalculate the scaling factor. We want the number of 1/2^64 790 * fractions of a second per period of the hardware counter, taking 791 * into account the th_adjustment factor which the NTP PLL/adjtime(2) 792 * processing provides us with. 793 * 794 * The th_adjustment is nanoseconds per second with 32 bit binary 795 * fraction and we want 64 bit binary fraction of second: 796 * 797 * x = a * 2^32 / 10^9 = a * 4.294967296 798 * 799 * The range of th_adjustment is +/- 5000PPM so inside a 64bit int 800 * we can only multiply by about 850 without overflowing, but that 801 * leaves suitably precise fractions for multiply before divide. 802 * 803 * Divide before multiply with a fraction of 2199/512 results in a 804 * systematic undercompensation of 10PPM of th_adjustment. On a 805 * 5000PPM adjustment this is a 0.05PPM error. This is acceptable. 806 * 807 * We happily sacrifice the lowest of the 64 bits of our result 808 * to the goddess of code clarity. 809 * 810 */ 811 if (s_update) { 812 scale = (u_int64_t)1 << 63; 813 scale += (th->th_adjustment / 1024) * 2199; 814 scale /= th->th_counter->tc_frequency; 815 th->th_scale = scale * 2; 816 } 817 /* 818 * Now that the struct timehands is again consistent, set the new 819 * generation number, making sure to not make it zero. Ensure 820 * changes are globally visible before changing. 821 */ 822 if (++ogen == 0) 823 ogen = 1; 824 membar_producer(); 825 th->th_generation = ogen; 826 827 /* 828 * Go live with the new struct timehands. Ensure changes are 829 * globally visible before changing. 830 */ 831 time_second = th->th_microtime.tv_sec; 832 time_uptime = th->th_offset.sec; 833 membar_producer(); 834 timehands = th; 835 836 /* 837 * Force users of the old timehand to move on. This is 838 * necessary for MP systems; we need to ensure that the 839 * consumers will move away from the old timehand before 840 * we begin updating it again when we eventually wrap 841 * around. 842 */ 843 if (++tho->th_generation == 0) 844 tho->th_generation = 1; 845 } 846 847 /* 848 * RFC 2783 PPS-API implementation. 849 */ 850 851 int 852 pps_ioctl(u_long cmd, void *data, struct pps_state *pps) 853 { 854 pps_params_t *app; 855 pps_info_t *pipi; 856 #ifdef PPS_SYNC 857 int *epi; 858 #endif 859 860 KASSERT(mutex_owned(&timecounter_lock)); 861 862 KASSERT(pps != NULL); /* XXX ("NULL pps pointer in pps_ioctl") */ 863 switch (cmd) { 864 case PPS_IOC_CREATE: 865 return (0); 866 case PPS_IOC_DESTROY: 867 return (0); 868 case PPS_IOC_SETPARAMS: 869 app = (pps_params_t *)data; 870 if (app->mode & ~pps->ppscap) 871 return (EINVAL); 872 pps->ppsparam = *app; 873 return (0); 874 case PPS_IOC_GETPARAMS: 875 app = (pps_params_t *)data; 876 *app = pps->ppsparam; 877 app->api_version = PPS_API_VERS_1; 878 return (0); 879 case PPS_IOC_GETCAP: 880 *(int*)data = pps->ppscap; 881 return (0); 882 case PPS_IOC_FETCH: 883 pipi = (pps_info_t *)data; 884 pps->ppsinfo.current_mode = pps->ppsparam.mode; 885 *pipi = pps->ppsinfo; 886 return (0); 887 case PPS_IOC_KCBIND: 888 #ifdef PPS_SYNC 889 epi = (int *)data; 890 /* XXX Only root should be able to do this */ 891 if (*epi & ~pps->ppscap) 892 return (EINVAL); 893 pps->kcmode = *epi; 894 return (0); 895 #else 896 return (EOPNOTSUPP); 897 #endif 898 default: 899 return (EPASSTHROUGH); 900 } 901 } 902 903 void 904 pps_init(struct pps_state *pps) 905 { 906 907 KASSERT(mutex_owned(&timecounter_lock)); 908 909 pps->ppscap |= PPS_TSFMT_TSPEC; 910 if (pps->ppscap & PPS_CAPTUREASSERT) 911 pps->ppscap |= PPS_OFFSETASSERT; 912 if (pps->ppscap & PPS_CAPTURECLEAR) 913 pps->ppscap |= PPS_OFFSETCLEAR; 914 } 915 916 void 917 pps_capture(struct pps_state *pps) 918 { 919 struct timehands *th; 920 921 KASSERT(mutex_owned(&timecounter_lock)); 922 KASSERT(pps != NULL); 923 924 th = timehands; 925 pps->capgen = th->th_generation; 926 pps->capth = th; 927 pps->capcount = (u_int64_t)tc_delta(th) + th->th_offset_count; 928 if (pps->capgen != th->th_generation) 929 pps->capgen = 0; 930 } 931 932 void 933 pps_event(struct pps_state *pps, int event) 934 { 935 struct bintime bt; 936 struct timespec ts, *tsp, *osp; 937 u_int64_t tcount, *pcount; 938 int foff, fhard; 939 pps_seq_t *pseq; 940 941 KASSERT(mutex_owned(&timecounter_lock)); 942 943 KASSERT(pps != NULL); /* XXX ("NULL pps pointer in pps_event") */ 944 /* If the timecounter was wound up underneath us, bail out. */ 945 if (pps->capgen == 0 || pps->capgen != pps->capth->th_generation) 946 return; 947 948 /* Things would be easier with arrays. */ 949 if (event == PPS_CAPTUREASSERT) { 950 tsp = &pps->ppsinfo.assert_timestamp; 951 osp = &pps->ppsparam.assert_offset; 952 foff = pps->ppsparam.mode & PPS_OFFSETASSERT; 953 fhard = pps->kcmode & PPS_CAPTUREASSERT; 954 pcount = &pps->ppscount[0]; 955 pseq = &pps->ppsinfo.assert_sequence; 956 } else { 957 tsp = &pps->ppsinfo.clear_timestamp; 958 osp = &pps->ppsparam.clear_offset; 959 foff = pps->ppsparam.mode & PPS_OFFSETCLEAR; 960 fhard = pps->kcmode & PPS_CAPTURECLEAR; 961 pcount = &pps->ppscount[1]; 962 pseq = &pps->ppsinfo.clear_sequence; 963 } 964 965 /* 966 * If the timecounter changed, we cannot compare the count values, so 967 * we have to drop the rest of the PPS-stuff until the next event. 968 */ 969 if (pps->ppstc != pps->capth->th_counter) { 970 pps->ppstc = pps->capth->th_counter; 971 *pcount = pps->capcount; 972 pps->ppscount[2] = pps->capcount; 973 return; 974 } 975 976 /* Convert the count to a timespec. */ 977 tcount = pps->capcount - pps->capth->th_offset_count; 978 bt = pps->capth->th_offset; 979 bintime_addx(&bt, pps->capth->th_scale * tcount); 980 bintime_add(&bt, &timebasebin); 981 bintime2timespec(&bt, &ts); 982 983 /* If the timecounter was wound up underneath us, bail out. */ 984 if (pps->capgen != pps->capth->th_generation) 985 return; 986 987 *pcount = pps->capcount; 988 (*pseq)++; 989 *tsp = ts; 990 991 if (foff) { 992 timespecadd(tsp, osp, tsp); 993 if (tsp->tv_nsec < 0) { 994 tsp->tv_nsec += 1000000000; 995 tsp->tv_sec -= 1; 996 } 997 } 998 #ifdef PPS_SYNC 999 if (fhard) { 1000 u_int64_t scale; 1001 1002 /* 1003 * Feed the NTP PLL/FLL. 1004 * The FLL wants to know how many (hardware) nanoseconds 1005 * elapsed since the previous event. 1006 */ 1007 tcount = pps->capcount - pps->ppscount[2]; 1008 pps->ppscount[2] = pps->capcount; 1009 tcount &= pps->capth->th_counter->tc_counter_mask; 1010 scale = (u_int64_t)1 << 63; 1011 scale /= pps->capth->th_counter->tc_frequency; 1012 scale *= 2; 1013 bt.sec = 0; 1014 bt.frac = 0; 1015 bintime_addx(&bt, scale * tcount); 1016 bintime2timespec(&bt, &ts); 1017 hardpps(tsp, ts.tv_nsec + 1000000000 * ts.tv_sec); 1018 } 1019 #endif 1020 } 1021 1022 /* 1023 * Timecounters need to be updated every so often to prevent the hardware 1024 * counter from overflowing. Updating also recalculates the cached values 1025 * used by the get*() family of functions, so their precision depends on 1026 * the update frequency. 1027 */ 1028 1029 static int tc_tick; 1030 1031 void 1032 tc_ticktock(void) 1033 { 1034 static int count; 1035 1036 if (++count < tc_tick) 1037 return; 1038 count = 0; 1039 mutex_spin_enter(&timecounter_lock); 1040 if (timecounter_bad != 0) { 1041 /* An existing timecounter has gone bad, pick a new one. */ 1042 (void)atomic_swap_uint(&timecounter_bad, 0); 1043 if (timecounter->tc_quality < 0) { 1044 tc_pick(); 1045 } 1046 } 1047 tc_windup(); 1048 mutex_spin_exit(&timecounter_lock); 1049 } 1050 1051 void 1052 inittimecounter(void) 1053 { 1054 u_int p; 1055 1056 mutex_init(&timecounter_lock, MUTEX_DEFAULT, IPL_HIGH); 1057 1058 /* 1059 * Set the initial timeout to 1060 * max(1, <approx. number of hardclock ticks in a millisecond>). 1061 * People should probably not use the sysctl to set the timeout 1062 * to smaller than its inital value, since that value is the 1063 * smallest reasonable one. If they want better timestamps they 1064 * should use the non-"get"* functions. 1065 */ 1066 if (hz > 1000) 1067 tc_tick = (hz + 500) / 1000; 1068 else 1069 tc_tick = 1; 1070 p = (tc_tick * 1000000) / hz; 1071 aprint_verbose("timecounter: Timecounters tick every %d.%03u msec\n", 1072 p / 1000, p % 1000); 1073 1074 /* warm up new timecounter (again) and get rolling. */ 1075 (void)timecounter->tc_get_timecount(timecounter); 1076 (void)timecounter->tc_get_timecount(timecounter); 1077 } 1078