1 /*- 2 * Copyright (c) 1997, 1998 Poul-Henning Kamp <phk@FreeBSD.org> 3 * Copyright (c) 1982, 1986, 1991, 1993 4 * The Regents of the University of California. All rights reserved. 5 * (c) UNIX System Laboratories, Inc. 6 * All or some portions of this file are derived from material licensed 7 * to the University of California by American Telephone and Telegraph 8 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 9 * the permission of UNIX System Laboratories, Inc. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. All advertising materials mentioning features or use of this software 20 * must display the following acknowledgement: 21 * This product includes software developed by the University of 22 * California, Berkeley and its contributors. 23 * 4. Neither the name of the University nor the names of its contributors 24 * may be used to endorse or promote products derived from this software 25 * without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 37 * SUCH DAMAGE. 38 * 39 * @(#)kern_clock.c 8.5 (Berkeley) 1/21/94 40 * $FreeBSD: src/sys/kern/kern_clock.c,v 1.105.2.10 2002/10/17 13:19:40 maxim Exp $ 41 * $DragonFly: src/sys/kern/kern_clock.c,v 1.11 2003/08/26 21:09:02 rob Exp $ 42 */ 43 44 #include "opt_ntp.h" 45 46 #include <sys/param.h> 47 #include <sys/systm.h> 48 #include <sys/dkstat.h> 49 #include <sys/callout.h> 50 #include <sys/kernel.h> 51 #include <sys/proc.h> 52 #include <sys/malloc.h> 53 #include <sys/resourcevar.h> 54 #include <sys/signalvar.h> 55 #include <sys/timex.h> 56 #include <sys/timepps.h> 57 #include <vm/vm.h> 58 #include <sys/lock.h> 59 #include <vm/pmap.h> 60 #include <vm/vm_map.h> 61 #include <sys/sysctl.h> 62 63 #include <machine/cpu.h> 64 #include <machine/limits.h> 65 #include <machine/smp.h> 66 67 #ifdef GPROF 68 #include <sys/gmon.h> 69 #endif 70 71 #ifdef DEVICE_POLLING 72 extern void init_device_poll(void); 73 extern void hardclock_device_poll(void); 74 #endif /* DEVICE_POLLING */ 75 76 /* 77 * Number of timecounters used to implement stable storage 78 */ 79 #ifndef NTIMECOUNTER 80 #define NTIMECOUNTER 5 81 #endif 82 83 static MALLOC_DEFINE(M_TIMECOUNTER, "timecounter", 84 "Timecounter stable storage"); 85 86 static void initclocks (void *dummy); 87 SYSINIT(clocks, SI_SUB_CLOCKS, SI_ORDER_FIRST, initclocks, NULL) 88 89 static void tco_forward (int force); 90 static void tco_setscales (struct timecounter *tc); 91 static __inline unsigned tco_delta (struct timecounter *tc); 92 93 /* 94 * Some of these don't belong here, but it's easiest to concentrate them. 95 * Note that cp_time[] counts in microseconds, but most userland programs 96 * just compare relative times against the total by delta. 97 */ 98 long cp_time[CPUSTATES]; 99 100 SYSCTL_OPAQUE(_kern, OID_AUTO, cp_time, CTLFLAG_RD, &cp_time, sizeof(cp_time), 101 "LU", "CPU time statistics"); 102 103 long tk_cancc; 104 long tk_nin; 105 long tk_nout; 106 long tk_rawcc; 107 108 time_t time_second; 109 110 struct timeval boottime; 111 SYSCTL_STRUCT(_kern, KERN_BOOTTIME, boottime, CTLFLAG_RD, 112 &boottime, timeval, "System boottime"); 113 114 /* 115 * Which update policy to use. 116 * 0 - every tick, bad hardware may fail with "calcru negative..." 117 * 1 - more resistent to the above hardware, but less efficient. 118 */ 119 static int tco_method; 120 121 /* 122 * Implement a dummy timecounter which we can use until we get a real one 123 * in the air. This allows the console and other early stuff to use 124 * timeservices. 125 */ 126 127 static unsigned 128 dummy_get_timecount(struct timecounter *tc) 129 { 130 static unsigned now; 131 return (++now); 132 } 133 134 static struct timecounter dummy_timecounter = { 135 dummy_get_timecount, 136 0, 137 ~0u, 138 1000000, 139 "dummy" 140 }; 141 142 struct timecounter *timecounter = &dummy_timecounter; 143 144 /* 145 * Clock handling routines. 146 * 147 * This code is written to operate with two timers that run independently of 148 * each other. 149 * 150 * The main timer, running hz times per second, is used to trigger interval 151 * timers, timeouts and rescheduling as needed. 152 * 153 * The second timer handles kernel and user profiling, 154 * and does resource use estimation. If the second timer is programmable, 155 * it is randomized to avoid aliasing between the two clocks. For example, 156 * the randomization prevents an adversary from always giving up the cpu 157 * just before its quantum expires. Otherwise, it would never accumulate 158 * cpu ticks. The mean frequency of the second timer is stathz. 159 * 160 * If no second timer exists, stathz will be zero; in this case we drive 161 * profiling and statistics off the main clock. This WILL NOT be accurate; 162 * do not do it unless absolutely necessary. 163 * 164 * The statistics clock may (or may not) be run at a higher rate while 165 * profiling. This profile clock runs at profhz. We require that profhz 166 * be an integral multiple of stathz. 167 * 168 * If the statistics clock is running fast, it must be divided by the ratio 169 * profhz/stathz for statistics. (For profiling, every tick counts.) 170 * 171 * Time-of-day is maintained using a "timecounter", which may or may 172 * not be related to the hardware generating the above mentioned 173 * interrupts. 174 */ 175 176 int stathz; 177 int profhz; 178 static int profprocs; 179 int ticks; 180 static int psticks; /* profiler ticks */ 181 static int psdiv; /* prof / stat divider */ 182 int psratio; /* ratio: prof * 100 / stat */ 183 184 /* 185 * Initialize clock frequencies and start both clocks running. 186 */ 187 /* ARGSUSED*/ 188 static void 189 initclocks(dummy) 190 void *dummy; 191 { 192 int i; 193 194 /* 195 * Set divisors to 1 (normal case) and let the machine-specific 196 * code do its bit. 197 */ 198 psdiv = 1; 199 cpu_initclocks(); 200 201 #ifdef DEVICE_POLLING 202 init_device_poll(); 203 #endif 204 205 /* 206 * Compute profhz/stathz, and fix profhz if needed. 207 */ 208 i = stathz ? stathz : hz; 209 if (profhz == 0) 210 profhz = i; 211 psratio = profhz / i; 212 } 213 214 /* 215 * The real-time timer, interrupting hz times per second. This is implemented 216 * as a FAST interrupt so it is in the context of the thread it interrupted, 217 * and not in an interrupt thread. YYY needs help. 218 */ 219 void 220 hardclock(frame) 221 struct clockframe *frame; 222 { 223 struct proc *p; 224 225 p = curproc; 226 if (p) { 227 struct pstats *pstats; 228 229 /* 230 * Run current process's virtual and profile time, as needed. 231 */ 232 pstats = p->p_stats; 233 if (CLKF_USERMODE(frame) && 234 timevalisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value) && 235 itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL], tick) == 0) 236 psignal(p, SIGVTALRM); 237 if (timevalisset(&pstats->p_timer[ITIMER_PROF].it_value) && 238 itimerdecr(&pstats->p_timer[ITIMER_PROF], tick) == 0) 239 psignal(p, SIGPROF); 240 } 241 242 #if 0 /* SMP and BETTER_CLOCK */ 243 forward_hardclock(pscnt); 244 #endif 245 246 /* 247 * If no separate statistics clock is available, run it from here. 248 */ 249 if (stathz == 0) 250 statclock(frame); 251 252 tco_forward(0); 253 ticks++; 254 255 #ifdef DEVICE_POLLING 256 hardclock_device_poll(); /* this is very short and quick */ 257 #endif /* DEVICE_POLLING */ 258 259 /* 260 * Process callouts at a very low cpu priority, so we don't keep the 261 * relatively high clock interrupt priority any longer than necessary. 262 */ 263 if (TAILQ_FIRST(&callwheel[ticks & callwheelmask]) != NULL) { 264 setsoftclock(); 265 } else if (softticks + 1 == ticks) { 266 ++softticks; 267 } 268 } 269 270 /* 271 * Compute number of ticks in the specified amount of time. 272 */ 273 int 274 tvtohz(tv) 275 struct timeval *tv; 276 { 277 unsigned long ticks; 278 long sec, usec; 279 280 /* 281 * If the number of usecs in the whole seconds part of the time 282 * difference fits in a long, then the total number of usecs will 283 * fit in an unsigned long. Compute the total and convert it to 284 * ticks, rounding up and adding 1 to allow for the current tick 285 * to expire. Rounding also depends on unsigned long arithmetic 286 * to avoid overflow. 287 * 288 * Otherwise, if the number of ticks in the whole seconds part of 289 * the time difference fits in a long, then convert the parts to 290 * ticks separately and add, using similar rounding methods and 291 * overflow avoidance. This method would work in the previous 292 * case but it is slightly slower and assumes that hz is integral. 293 * 294 * Otherwise, round the time difference down to the maximum 295 * representable value. 296 * 297 * If ints have 32 bits, then the maximum value for any timeout in 298 * 10ms ticks is 248 days. 299 */ 300 sec = tv->tv_sec; 301 usec = tv->tv_usec; 302 if (usec < 0) { 303 sec--; 304 usec += 1000000; 305 } 306 if (sec < 0) { 307 #ifdef DIAGNOSTIC 308 if (usec > 0) { 309 sec++; 310 usec -= 1000000; 311 } 312 printf("tvotohz: negative time difference %ld sec %ld usec\n", 313 sec, usec); 314 #endif 315 ticks = 1; 316 } else if (sec <= LONG_MAX / 1000000) 317 ticks = (sec * 1000000 + (unsigned long)usec + (tick - 1)) 318 / tick + 1; 319 else if (sec <= LONG_MAX / hz) 320 ticks = sec * hz 321 + ((unsigned long)usec + (tick - 1)) / tick + 1; 322 else 323 ticks = LONG_MAX; 324 if (ticks > INT_MAX) 325 ticks = INT_MAX; 326 return ((int)ticks); 327 } 328 329 /* 330 * Start profiling on a process. 331 * 332 * Kernel profiling passes proc0 which never exits and hence 333 * keeps the profile clock running constantly. 334 */ 335 void 336 startprofclock(p) 337 struct proc *p; 338 { 339 int s; 340 341 if ((p->p_flag & P_PROFIL) == 0) { 342 p->p_flag |= P_PROFIL; 343 if (++profprocs == 1 && stathz != 0) { 344 s = splstatclock(); 345 psdiv = psratio; 346 setstatclockrate(profhz); 347 splx(s); 348 } 349 } 350 } 351 352 /* 353 * Stop profiling on a process. 354 */ 355 void 356 stopprofclock(p) 357 struct proc *p; 358 { 359 int s; 360 361 if (p->p_flag & P_PROFIL) { 362 p->p_flag &= ~P_PROFIL; 363 if (--profprocs == 0 && stathz != 0) { 364 s = splstatclock(); 365 psdiv = 1; 366 setstatclockrate(stathz); 367 splx(s); 368 } 369 } 370 } 371 372 /* 373 * Statistics clock. Grab profile sample, and if divider reaches 0, 374 * do process and kernel statistics. Most of the statistics are only 375 * used by user-level statistics programs. The main exceptions are 376 * p->p_uticks, p->p_sticks, p->p_iticks, and p->p_estcpu. 377 * 378 * The statclock should be called from an exclusive, fast interrupt, 379 * so the context should be the thread/process that got interrupted and 380 * not an interrupt thread. 381 */ 382 void 383 statclock(frame) 384 struct clockframe *frame; 385 { 386 #ifdef GPROF 387 struct gmonparam *g; 388 int i; 389 #endif 390 thread_t td; 391 struct pstats *pstats; 392 long rss; 393 struct rusage *ru; 394 struct vmspace *vm; 395 struct proc *p; 396 int bump; 397 struct timeval tv; 398 struct timeval *stv; 399 400 /* 401 * How big was our timeslice relative to the last time 402 */ 403 microuptime(&tv); 404 stv = &mycpu->gd_stattv; 405 if (stv->tv_sec == 0) { 406 bump = 1; 407 } else { 408 bump = tv.tv_usec - stv->tv_usec + 409 (tv.tv_sec - stv->tv_sec) * 1000000; 410 if (bump < 0) 411 bump = 0; 412 if (bump > 1000000) 413 bump = 1000000; 414 } 415 *stv = tv; 416 417 td = curthread; 418 p = td->td_proc; 419 420 if (CLKF_USERMODE(frame)) { 421 /* 422 * Came from userland, handle user time and deal with 423 * possible process. 424 */ 425 if (p && (p->p_flag & P_PROFIL)) 426 addupc_intr(p, CLKF_PC(frame), 1); 427 #if 0 /* SMP and BETTER_CLOCK */ 428 if (stathz != 0) 429 forward_statclock(pscnt); 430 #endif 431 td->td_uticks += bump; 432 433 /* 434 * Charge the time as appropriate 435 */ 436 if (p && p->p_nice > NZERO) 437 cp_time[CP_NICE] += bump; 438 else 439 cp_time[CP_USER] += bump; 440 } else { 441 #ifdef GPROF 442 /* 443 * Kernel statistics are just like addupc_intr, only easier. 444 */ 445 g = &_gmonparam; 446 if (g->state == GMON_PROF_ON) { 447 i = CLKF_PC(frame) - g->lowpc; 448 if (i < g->textsize) { 449 i /= HISTFRACTION * sizeof(*g->kcount); 450 g->kcount[i]++; 451 } 452 } 453 #endif 454 #if 0 /* SMP and BETTER_CLOCK */ 455 if (stathz != 0) 456 forward_statclock(pscnt); 457 #endif 458 /* 459 * Came from kernel mode, so we were: 460 * - handling an interrupt, 461 * - doing syscall or trap work on behalf of the current 462 * user process, or 463 * - spinning in the idle loop. 464 * Whichever it is, charge the time as appropriate. 465 * Note that we charge interrupts to the current process, 466 * regardless of whether they are ``for'' that process, 467 * so that we know how much of its real time was spent 468 * in ``non-process'' (i.e., interrupt) work. 469 */ 470 if (CLKF_INTR(frame)) 471 td->td_iticks += bump; 472 else 473 td->td_sticks += bump; 474 475 if (CLKF_INTR(frame)) { 476 cp_time[CP_INTR] += bump; 477 } else { 478 if (td == &mycpu->gd_idlethread) 479 cp_time[CP_IDLE] += bump; 480 else 481 cp_time[CP_SYS] += bump; 482 } 483 } 484 485 /* 486 * bump psticks and check against gd_psticks. When we hit the 487 * 1*hz mark (psdiv ticks) we do the more expensive stuff. If 488 * psdiv changes we reset everything to avoid confusion. 489 */ 490 ++psticks; 491 if (psticks < mycpu->gd_psticks && psdiv == mycpu->gd_psdiv) 492 return; 493 494 mycpu->gd_psdiv = psdiv; 495 mycpu->gd_psticks = psticks + psdiv; 496 497 if (p != NULL) { 498 schedclock(p); 499 500 /* Update resource usage integrals and maximums. */ 501 if ((pstats = p->p_stats) != NULL && 502 (ru = &pstats->p_ru) != NULL && 503 (vm = p->p_vmspace) != NULL) { 504 ru->ru_ixrss += pgtok(vm->vm_tsize); 505 ru->ru_idrss += pgtok(vm->vm_dsize); 506 ru->ru_isrss += pgtok(vm->vm_ssize); 507 rss = pgtok(vmspace_resident_count(vm)); 508 if (ru->ru_maxrss < rss) 509 ru->ru_maxrss = rss; 510 } 511 } 512 } 513 514 /* 515 * Return information about system clocks. 516 */ 517 static int 518 sysctl_kern_clockrate(SYSCTL_HANDLER_ARGS) 519 { 520 struct clockinfo clkinfo; 521 /* 522 * Construct clockinfo structure. 523 */ 524 clkinfo.hz = hz; 525 clkinfo.tick = tick; 526 clkinfo.tickadj = tickadj; 527 clkinfo.profhz = profhz; 528 clkinfo.stathz = stathz ? stathz : hz; 529 return (sysctl_handle_opaque(oidp, &clkinfo, sizeof clkinfo, req)); 530 } 531 532 SYSCTL_PROC(_kern, KERN_CLOCKRATE, clockrate, CTLTYPE_STRUCT|CTLFLAG_RD, 533 0, 0, sysctl_kern_clockrate, "S,clockinfo",""); 534 535 static __inline unsigned 536 tco_delta(struct timecounter *tc) 537 { 538 539 return ((tc->tc_get_timecount(tc) - tc->tc_offset_count) & 540 tc->tc_counter_mask); 541 } 542 543 /* 544 * We have eight functions for looking at the clock, four for 545 * microseconds and four for nanoseconds. For each there is fast 546 * but less precise version "get{nano|micro}[up]time" which will 547 * return a time which is up to 1/HZ previous to the call, whereas 548 * the raw version "{nano|micro}[up]time" will return a timestamp 549 * which is as precise as possible. The "up" variants return the 550 * time relative to system boot, these are well suited for time 551 * interval measurements. 552 */ 553 554 void 555 getmicrotime(struct timeval *tvp) 556 { 557 struct timecounter *tc; 558 559 if (!tco_method) { 560 tc = timecounter; 561 *tvp = tc->tc_microtime; 562 } else { 563 microtime(tvp); 564 } 565 } 566 567 void 568 getnanotime(struct timespec *tsp) 569 { 570 struct timecounter *tc; 571 572 if (!tco_method) { 573 tc = timecounter; 574 *tsp = tc->tc_nanotime; 575 } else { 576 nanotime(tsp); 577 } 578 } 579 580 void 581 microtime(struct timeval *tv) 582 { 583 struct timecounter *tc; 584 585 tc = timecounter; 586 tv->tv_sec = tc->tc_offset_sec; 587 tv->tv_usec = tc->tc_offset_micro; 588 tv->tv_usec += ((u_int64_t)tco_delta(tc) * tc->tc_scale_micro) >> 32; 589 tv->tv_usec += boottime.tv_usec; 590 tv->tv_sec += boottime.tv_sec; 591 while (tv->tv_usec < 0) { 592 tv->tv_usec += 1000000; 593 if (tv->tv_sec > 0) 594 tv->tv_sec--; 595 } 596 while (tv->tv_usec >= 1000000) { 597 tv->tv_usec -= 1000000; 598 tv->tv_sec++; 599 } 600 } 601 602 void 603 nanotime(struct timespec *ts) 604 { 605 unsigned count; 606 u_int64_t delta; 607 struct timecounter *tc; 608 609 tc = timecounter; 610 ts->tv_sec = tc->tc_offset_sec; 611 count = tco_delta(tc); 612 delta = tc->tc_offset_nano; 613 delta += ((u_int64_t)count * tc->tc_scale_nano_f); 614 delta >>= 32; 615 delta += ((u_int64_t)count * tc->tc_scale_nano_i); 616 delta += boottime.tv_usec * 1000; 617 ts->tv_sec += boottime.tv_sec; 618 while (delta < 0) { 619 delta += 1000000000; 620 if (ts->tv_sec > 0) 621 ts->tv_sec--; 622 } 623 while (delta >= 1000000000) { 624 delta -= 1000000000; 625 ts->tv_sec++; 626 } 627 ts->tv_nsec = delta; 628 } 629 630 void 631 getmicrouptime(struct timeval *tvp) 632 { 633 struct timecounter *tc; 634 635 if (!tco_method) { 636 tc = timecounter; 637 tvp->tv_sec = tc->tc_offset_sec; 638 tvp->tv_usec = tc->tc_offset_micro; 639 } else { 640 microuptime(tvp); 641 } 642 } 643 644 void 645 getnanouptime(struct timespec *tsp) 646 { 647 struct timecounter *tc; 648 649 if (!tco_method) { 650 tc = timecounter; 651 tsp->tv_sec = tc->tc_offset_sec; 652 tsp->tv_nsec = tc->tc_offset_nano >> 32; 653 } else { 654 nanouptime(tsp); 655 } 656 } 657 658 void 659 microuptime(struct timeval *tv) 660 { 661 struct timecounter *tc; 662 663 tc = timecounter; 664 tv->tv_sec = tc->tc_offset_sec; 665 tv->tv_usec = tc->tc_offset_micro; 666 tv->tv_usec += ((u_int64_t)tco_delta(tc) * tc->tc_scale_micro) >> 32; 667 while (tv->tv_usec < 0) { 668 tv->tv_usec += 1000000; 669 if (tv->tv_sec > 0) 670 tv->tv_sec--; 671 } 672 while (tv->tv_usec >= 1000000) { 673 tv->tv_usec -= 1000000; 674 tv->tv_sec++; 675 } 676 } 677 678 void 679 nanouptime(struct timespec *ts) 680 { 681 unsigned count; 682 u_int64_t delta; 683 struct timecounter *tc; 684 685 tc = timecounter; 686 ts->tv_sec = tc->tc_offset_sec; 687 count = tco_delta(tc); 688 delta = tc->tc_offset_nano; 689 delta += ((u_int64_t)count * tc->tc_scale_nano_f); 690 delta >>= 32; 691 delta += ((u_int64_t)count * tc->tc_scale_nano_i); 692 while (delta < 0) { 693 delta += 1000000000; 694 if (ts->tv_sec > 0) 695 ts->tv_sec--; 696 } 697 while (delta >= 1000000000) { 698 delta -= 1000000000; 699 ts->tv_sec++; 700 } 701 ts->tv_nsec = delta; 702 } 703 704 static void 705 tco_setscales(struct timecounter *tc) 706 { 707 u_int64_t scale; 708 709 scale = 1000000000LL << 32; 710 scale += tc->tc_adjustment; 711 scale /= tc->tc_tweak->tc_frequency; 712 tc->tc_scale_micro = scale / 1000; 713 tc->tc_scale_nano_f = scale & 0xffffffff; 714 tc->tc_scale_nano_i = scale >> 32; 715 } 716 717 void 718 update_timecounter(struct timecounter *tc) 719 { 720 tco_setscales(tc); 721 } 722 723 void 724 init_timecounter(struct timecounter *tc) 725 { 726 struct timespec ts1; 727 struct timecounter *t1, *t2, *t3; 728 unsigned u; 729 int i; 730 731 u = tc->tc_frequency / tc->tc_counter_mask; 732 if (u > hz) { 733 printf("Timecounter \"%s\" frequency %lu Hz" 734 " -- Insufficient hz, needs at least %u\n", 735 tc->tc_name, (u_long) tc->tc_frequency, u); 736 return; 737 } 738 739 tc->tc_adjustment = 0; 740 tc->tc_tweak = tc; 741 tco_setscales(tc); 742 tc->tc_offset_count = tc->tc_get_timecount(tc); 743 if (timecounter == &dummy_timecounter) 744 tc->tc_avail = tc; 745 else { 746 tc->tc_avail = timecounter->tc_tweak->tc_avail; 747 timecounter->tc_tweak->tc_avail = tc; 748 } 749 MALLOC(t1, struct timecounter *, sizeof *t1, M_TIMECOUNTER, M_WAITOK); 750 tc->tc_other = t1; 751 *t1 = *tc; 752 t2 = t1; 753 for (i = 1; i < NTIMECOUNTER; i++) { 754 MALLOC(t3, struct timecounter *, sizeof *t3, 755 M_TIMECOUNTER, M_WAITOK); 756 *t3 = *tc; 757 t3->tc_other = t2; 758 t2 = t3; 759 } 760 t1->tc_other = t3; 761 tc = t1; 762 763 printf("Timecounter \"%s\" frequency %lu Hz\n", 764 tc->tc_name, (u_long)tc->tc_frequency); 765 766 /* XXX: For now always start using the counter. */ 767 tc->tc_offset_count = tc->tc_get_timecount(tc); 768 nanouptime(&ts1); 769 tc->tc_offset_nano = (u_int64_t)ts1.tv_nsec << 32; 770 tc->tc_offset_micro = ts1.tv_nsec / 1000; 771 tc->tc_offset_sec = ts1.tv_sec; 772 timecounter = tc; 773 } 774 775 void 776 set_timecounter(struct timespec *ts) 777 { 778 struct timespec ts2; 779 780 nanouptime(&ts2); 781 boottime.tv_sec = ts->tv_sec - ts2.tv_sec; 782 boottime.tv_usec = (ts->tv_nsec - ts2.tv_nsec) / 1000; 783 if (boottime.tv_usec < 0) { 784 boottime.tv_usec += 1000000; 785 boottime.tv_sec--; 786 } 787 /* fiddle all the little crinkly bits around the fiords... */ 788 tco_forward(1); 789 } 790 791 static void 792 switch_timecounter(struct timecounter *newtc) 793 { 794 int s; 795 struct timecounter *tc; 796 struct timespec ts; 797 798 s = splclock(); 799 tc = timecounter; 800 if (newtc->tc_tweak == tc->tc_tweak) { 801 splx(s); 802 return; 803 } 804 newtc = newtc->tc_tweak->tc_other; 805 nanouptime(&ts); 806 newtc->tc_offset_sec = ts.tv_sec; 807 newtc->tc_offset_nano = (u_int64_t)ts.tv_nsec << 32; 808 newtc->tc_offset_micro = ts.tv_nsec / 1000; 809 newtc->tc_offset_count = newtc->tc_get_timecount(newtc); 810 tco_setscales(newtc); 811 timecounter = newtc; 812 splx(s); 813 } 814 815 static struct timecounter * 816 sync_other_counter(void) 817 { 818 struct timecounter *tc, *tcn, *tco; 819 unsigned delta; 820 821 tco = timecounter; 822 tc = tco->tc_other; 823 tcn = tc->tc_other; 824 *tc = *tco; 825 tc->tc_other = tcn; 826 delta = tco_delta(tc); 827 tc->tc_offset_count += delta; 828 tc->tc_offset_count &= tc->tc_counter_mask; 829 tc->tc_offset_nano += (u_int64_t)delta * tc->tc_scale_nano_f; 830 tc->tc_offset_nano += (u_int64_t)delta * tc->tc_scale_nano_i << 32; 831 return (tc); 832 } 833 834 static void 835 tco_forward(int force) 836 { 837 struct timecounter *tc, *tco; 838 struct timeval tvt; 839 840 tco = timecounter; 841 tc = sync_other_counter(); 842 /* 843 * We may be inducing a tiny error here, the tc_poll_pps() may 844 * process a latched count which happens after the tco_delta() 845 * in sync_other_counter(), which would extend the previous 846 * counters parameters into the domain of this new one. 847 * Since the timewindow is very small for this, the error is 848 * going to be only a few weenieseconds (as Dave Mills would 849 * say), so lets just not talk more about it, OK ? 850 */ 851 if (tco->tc_poll_pps) 852 tco->tc_poll_pps(tco); 853 if (timedelta != 0) { 854 tvt = boottime; 855 tvt.tv_usec += tickdelta; 856 if (tvt.tv_usec >= 1000000) { 857 tvt.tv_sec++; 858 tvt.tv_usec -= 1000000; 859 } else if (tvt.tv_usec < 0) { 860 tvt.tv_sec--; 861 tvt.tv_usec += 1000000; 862 } 863 boottime = tvt; 864 timedelta -= tickdelta; 865 } 866 867 while (tc->tc_offset_nano >= 1000000000ULL << 32) { 868 tc->tc_offset_nano -= 1000000000ULL << 32; 869 tc->tc_offset_sec++; 870 ntp_update_second(tc); /* XXX only needed if xntpd runs */ 871 tco_setscales(tc); 872 force++; 873 } 874 875 if (tco_method && !force) 876 return; 877 878 tc->tc_offset_micro = (tc->tc_offset_nano / 1000) >> 32; 879 880 /* Figure out the wall-clock time */ 881 tc->tc_nanotime.tv_sec = tc->tc_offset_sec + boottime.tv_sec; 882 tc->tc_nanotime.tv_nsec = 883 (tc->tc_offset_nano >> 32) + boottime.tv_usec * 1000; 884 tc->tc_microtime.tv_usec = tc->tc_offset_micro + boottime.tv_usec; 885 while (tc->tc_nanotime.tv_nsec >= 1000000000) { 886 tc->tc_nanotime.tv_nsec -= 1000000000; 887 tc->tc_microtime.tv_usec -= 1000000; 888 tc->tc_nanotime.tv_sec++; 889 } 890 time_second = tc->tc_microtime.tv_sec = tc->tc_nanotime.tv_sec; 891 892 timecounter = tc; 893 } 894 895 SYSCTL_NODE(_kern, OID_AUTO, timecounter, CTLFLAG_RW, 0, ""); 896 897 SYSCTL_INT(_kern_timecounter, OID_AUTO, method, CTLFLAG_RW, &tco_method, 0, 898 "This variable determines the method used for updating timecounters. " 899 "If the default algorithm (0) fails with \"calcru negative...\" messages " 900 "try the alternate algorithm (1) which handles bad hardware better." 901 902 ); 903 904 static int 905 sysctl_kern_timecounter_hardware(SYSCTL_HANDLER_ARGS) 906 { 907 char newname[32]; 908 struct timecounter *newtc, *tc; 909 int error; 910 911 tc = timecounter->tc_tweak; 912 strncpy(newname, tc->tc_name, sizeof(newname)); 913 error = sysctl_handle_string(oidp, &newname[0], sizeof(newname), req); 914 if (error == 0 && req->newptr != NULL && 915 strcmp(newname, tc->tc_name) != 0) { 916 for (newtc = tc->tc_avail; newtc != tc; 917 newtc = newtc->tc_avail) { 918 if (strcmp(newname, newtc->tc_name) == 0) { 919 /* Warm up new timecounter. */ 920 (void)newtc->tc_get_timecount(newtc); 921 922 switch_timecounter(newtc); 923 return (0); 924 } 925 } 926 return (EINVAL); 927 } 928 return (error); 929 } 930 931 SYSCTL_PROC(_kern_timecounter, OID_AUTO, hardware, CTLTYPE_STRING | CTLFLAG_RW, 932 0, 0, sysctl_kern_timecounter_hardware, "A", ""); 933 934 935 int 936 pps_ioctl(u_long cmd, caddr_t data, struct pps_state *pps) 937 { 938 pps_params_t *app; 939 struct pps_fetch_args *fapi; 940 #ifdef PPS_SYNC 941 struct pps_kcbind_args *kapi; 942 #endif 943 944 switch (cmd) { 945 case PPS_IOC_CREATE: 946 return (0); 947 case PPS_IOC_DESTROY: 948 return (0); 949 case PPS_IOC_SETPARAMS: 950 app = (pps_params_t *)data; 951 if (app->mode & ~pps->ppscap) 952 return (EINVAL); 953 pps->ppsparam = *app; 954 return (0); 955 case PPS_IOC_GETPARAMS: 956 app = (pps_params_t *)data; 957 *app = pps->ppsparam; 958 app->api_version = PPS_API_VERS_1; 959 return (0); 960 case PPS_IOC_GETCAP: 961 *(int*)data = pps->ppscap; 962 return (0); 963 case PPS_IOC_FETCH: 964 fapi = (struct pps_fetch_args *)data; 965 if (fapi->tsformat && fapi->tsformat != PPS_TSFMT_TSPEC) 966 return (EINVAL); 967 if (fapi->timeout.tv_sec || fapi->timeout.tv_nsec) 968 return (EOPNOTSUPP); 969 pps->ppsinfo.current_mode = pps->ppsparam.mode; 970 fapi->pps_info_buf = pps->ppsinfo; 971 return (0); 972 case PPS_IOC_KCBIND: 973 #ifdef PPS_SYNC 974 kapi = (struct pps_kcbind_args *)data; 975 /* XXX Only root should be able to do this */ 976 if (kapi->tsformat && kapi->tsformat != PPS_TSFMT_TSPEC) 977 return (EINVAL); 978 if (kapi->kernel_consumer != PPS_KC_HARDPPS) 979 return (EINVAL); 980 if (kapi->edge & ~pps->ppscap) 981 return (EINVAL); 982 pps->kcmode = kapi->edge; 983 return (0); 984 #else 985 return (EOPNOTSUPP); 986 #endif 987 default: 988 return (ENOTTY); 989 } 990 } 991 992 void 993 pps_init(struct pps_state *pps) 994 { 995 pps->ppscap |= PPS_TSFMT_TSPEC; 996 if (pps->ppscap & PPS_CAPTUREASSERT) 997 pps->ppscap |= PPS_OFFSETASSERT; 998 if (pps->ppscap & PPS_CAPTURECLEAR) 999 pps->ppscap |= PPS_OFFSETCLEAR; 1000 } 1001 1002 void 1003 pps_event(struct pps_state *pps, struct timecounter *tc, unsigned count, int event) 1004 { 1005 struct timespec ts, *tsp, *osp; 1006 u_int64_t delta; 1007 unsigned tcount, *pcount; 1008 int foff, fhard; 1009 pps_seq_t *pseq; 1010 1011 /* Things would be easier with arrays... */ 1012 if (event == PPS_CAPTUREASSERT) { 1013 tsp = &pps->ppsinfo.assert_timestamp; 1014 osp = &pps->ppsparam.assert_offset; 1015 foff = pps->ppsparam.mode & PPS_OFFSETASSERT; 1016 fhard = pps->kcmode & PPS_CAPTUREASSERT; 1017 pcount = &pps->ppscount[0]; 1018 pseq = &pps->ppsinfo.assert_sequence; 1019 } else { 1020 tsp = &pps->ppsinfo.clear_timestamp; 1021 osp = &pps->ppsparam.clear_offset; 1022 foff = pps->ppsparam.mode & PPS_OFFSETCLEAR; 1023 fhard = pps->kcmode & PPS_CAPTURECLEAR; 1024 pcount = &pps->ppscount[1]; 1025 pseq = &pps->ppsinfo.clear_sequence; 1026 } 1027 1028 /* The timecounter changed: bail */ 1029 if (!pps->ppstc || 1030 pps->ppstc->tc_name != tc->tc_name || 1031 tc->tc_name != timecounter->tc_name) { 1032 pps->ppstc = tc; 1033 *pcount = count; 1034 return; 1035 } 1036 1037 /* Nothing really happened */ 1038 if (*pcount == count) 1039 return; 1040 1041 *pcount = count; 1042 1043 /* Convert the count to timespec */ 1044 ts.tv_sec = tc->tc_offset_sec; 1045 tcount = count - tc->tc_offset_count; 1046 tcount &= tc->tc_counter_mask; 1047 delta = tc->tc_offset_nano; 1048 delta += ((u_int64_t)tcount * tc->tc_scale_nano_f); 1049 delta >>= 32; 1050 delta += ((u_int64_t)tcount * tc->tc_scale_nano_i); 1051 delta += boottime.tv_usec * 1000; 1052 ts.tv_sec += boottime.tv_sec; 1053 while (delta >= 1000000000) { 1054 delta -= 1000000000; 1055 ts.tv_sec++; 1056 } 1057 ts.tv_nsec = delta; 1058 1059 (*pseq)++; 1060 *tsp = ts; 1061 1062 if (foff) { 1063 timespecadd(tsp, osp); 1064 if (tsp->tv_nsec < 0) { 1065 tsp->tv_nsec += 1000000000; 1066 tsp->tv_sec -= 1; 1067 } 1068 } 1069 #ifdef PPS_SYNC 1070 if (fhard) { 1071 /* magic, at its best... */ 1072 tcount = count - pps->ppscount[2]; 1073 pps->ppscount[2] = count; 1074 tcount &= tc->tc_counter_mask; 1075 delta = ((u_int64_t)tcount * tc->tc_tweak->tc_scale_nano_f); 1076 delta >>= 32; 1077 delta += ((u_int64_t)tcount * tc->tc_tweak->tc_scale_nano_i); 1078 hardpps(tsp, delta); 1079 } 1080 #endif 1081 } 1082