1 /*- 2 * Copyright (c) 1997, 1998 Poul-Henning Kamp <phk@FreeBSD.org> 3 * Copyright (c) 1982, 1986, 1991, 1993 4 * The Regents of the University of California. All rights reserved. 5 * (c) UNIX System Laboratories, Inc. 6 * All or some portions of this file are derived from material licensed 7 * to the University of California by American Telephone and Telegraph 8 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 9 * the permission of UNIX System Laboratories, Inc. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. All advertising materials mentioning features or use of this software 20 * must display the following acknowledgement: 21 * This product includes software developed by the University of 22 * California, Berkeley and its contributors. 23 * 4. Neither the name of the University nor the names of its contributors 24 * may be used to endorse or promote products derived from this software 25 * without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 37 * SUCH DAMAGE. 38 * 39 * @(#)kern_clock.c 8.5 (Berkeley) 1/21/94 40 * $FreeBSD: src/sys/kern/kern_clock.c,v 1.105.2.10 2002/10/17 13:19:40 maxim Exp $ 41 * $DragonFly: src/sys/kern/kern_clock.c,v 1.12 2003/10/17 07:30:42 dillon Exp $ 42 */ 43 44 #include "opt_ntp.h" 45 46 #include <sys/param.h> 47 #include <sys/systm.h> 48 #include <sys/dkstat.h> 49 #include <sys/callout.h> 50 #include <sys/kernel.h> 51 #include <sys/proc.h> 52 #include <sys/malloc.h> 53 #include <sys/resourcevar.h> 54 #include <sys/signalvar.h> 55 #include <sys/timex.h> 56 #include <sys/timepps.h> 57 #include <vm/vm.h> 58 #include <sys/lock.h> 59 #include <vm/pmap.h> 60 #include <vm/vm_map.h> 61 #include <sys/sysctl.h> 62 63 #include <machine/cpu.h> 64 #include <machine/limits.h> 65 #include <machine/smp.h> 66 67 #ifdef GPROF 68 #include <sys/gmon.h> 69 #endif 70 71 #ifdef DEVICE_POLLING 72 extern void init_device_poll(void); 73 extern void hardclock_device_poll(void); 74 #endif /* DEVICE_POLLING */ 75 76 /* 77 * Number of timecounters used to implement stable storage 78 */ 79 #ifndef NTIMECOUNTER 80 #define NTIMECOUNTER 5 81 #endif 82 83 static MALLOC_DEFINE(M_TIMECOUNTER, "timecounter", 84 "Timecounter stable storage"); 85 86 static void initclocks (void *dummy); 87 SYSINIT(clocks, SI_SUB_CLOCKS, SI_ORDER_FIRST, initclocks, NULL) 88 89 static void tco_forward (int force); 90 static void tco_setscales (struct timecounter *tc); 91 static __inline unsigned tco_delta (struct timecounter *tc); 92 93 /* 94 * Some of these don't belong here, but it's easiest to concentrate them. 95 * Note that cp_time[] counts in microseconds, but most userland programs 96 * just compare relative times against the total by delta. 97 */ 98 long cp_time[CPUSTATES]; 99 100 SYSCTL_OPAQUE(_kern, OID_AUTO, cp_time, CTLFLAG_RD, &cp_time, sizeof(cp_time), 101 "LU", "CPU time statistics"); 102 103 long tk_cancc; 104 long tk_nin; 105 long tk_nout; 106 long tk_rawcc; 107 108 time_t time_second; 109 110 struct timeval boottime; 111 SYSCTL_STRUCT(_kern, KERN_BOOTTIME, boottime, CTLFLAG_RD, 112 &boottime, timeval, "System boottime"); 113 114 /* 115 * Which update policy to use. 116 * 0 - every tick, bad hardware may fail with "calcru negative..." 117 * 1 - more resistent to the above hardware, but less efficient. 118 */ 119 static int tco_method; 120 121 /* 122 * Implement a dummy timecounter which we can use until we get a real one 123 * in the air. This allows the console and other early stuff to use 124 * timeservices. 125 */ 126 127 static unsigned 128 dummy_get_timecount(struct timecounter *tc) 129 { 130 static unsigned now; 131 return (++now); 132 } 133 134 static struct timecounter dummy_timecounter = { 135 dummy_get_timecount, 136 0, 137 ~0u, 138 1000000, 139 "dummy" 140 }; 141 142 struct timecounter *timecounter = &dummy_timecounter; 143 144 /* 145 * Clock handling routines. 146 * 147 * This code is written to operate with two timers that run independently of 148 * each other. 149 * 150 * The main timer, running hz times per second, is used to trigger interval 151 * timers, timeouts and rescheduling as needed. 152 * 153 * The second timer handles kernel and user profiling, 154 * and does resource use estimation. If the second timer is programmable, 155 * it is randomized to avoid aliasing between the two clocks. For example, 156 * the randomization prevents an adversary from always giving up the cpu 157 * just before its quantum expires. Otherwise, it would never accumulate 158 * cpu ticks. The mean frequency of the second timer is stathz. 159 * 160 * If no second timer exists, stathz will be zero; in this case we drive 161 * profiling and statistics off the main clock. This WILL NOT be accurate; 162 * do not do it unless absolutely necessary. 163 * 164 * The statistics clock may (or may not) be run at a higher rate while 165 * profiling. This profile clock runs at profhz. We require that profhz 166 * be an integral multiple of stathz. 167 * 168 * If the statistics clock is running fast, it must be divided by the ratio 169 * profhz/stathz for statistics. (For profiling, every tick counts.) 170 * 171 * Time-of-day is maintained using a "timecounter", which may or may 172 * not be related to the hardware generating the above mentioned 173 * interrupts. 174 */ 175 176 int stathz; 177 int profhz; 178 static int profprocs; 179 int ticks; 180 static int psticks; /* profiler ticks */ 181 static int psdiv; /* prof / stat divider */ 182 int psratio; /* ratio: prof * 100 / stat */ 183 184 /* 185 * Initialize clock frequencies and start both clocks running. 186 */ 187 /* ARGSUSED*/ 188 static void 189 initclocks(dummy) 190 void *dummy; 191 { 192 int i; 193 194 /* 195 * Set divisors to 1 (normal case) and let the machine-specific 196 * code do its bit. 197 */ 198 psdiv = 1; 199 cpu_initclocks(); 200 201 #ifdef DEVICE_POLLING 202 init_device_poll(); 203 #endif 204 205 /* 206 * Compute profhz/stathz, and fix profhz if needed. 207 */ 208 i = stathz ? stathz : hz; 209 if (profhz == 0) 210 profhz = i; 211 psratio = profhz / i; 212 } 213 214 /* 215 * The real-time timer, interrupting hz times per second. This is implemented 216 * as a FAST interrupt so it is in the context of the thread it interrupted, 217 * and not in an interrupt thread. YYY needs help. 218 */ 219 void 220 hardclock(frame) 221 struct clockframe *frame; 222 { 223 struct proc *p; 224 225 p = curproc; 226 if (p) { 227 struct pstats *pstats; 228 229 /* 230 * Run current process's virtual and profile time, as needed. 231 */ 232 pstats = p->p_stats; 233 if (CLKF_USERMODE(frame) && 234 timevalisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value) && 235 itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL], tick) == 0) 236 psignal(p, SIGVTALRM); 237 if (timevalisset(&pstats->p_timer[ITIMER_PROF].it_value) && 238 itimerdecr(&pstats->p_timer[ITIMER_PROF], tick) == 0) 239 psignal(p, SIGPROF); 240 } 241 242 #if 0 /* SMP and BETTER_CLOCK */ 243 forward_hardclock(pscnt); 244 #endif 245 246 /* 247 * If no separate statistics clock is available, run it from here. 248 */ 249 if (stathz == 0) 250 statclock(frame); 251 252 tco_forward(0); 253 ticks++; 254 255 #ifdef DEVICE_POLLING 256 hardclock_device_poll(); /* this is very short and quick */ 257 #endif /* DEVICE_POLLING */ 258 259 /* 260 * Process callouts at a very low cpu priority, so we don't keep the 261 * relatively high clock interrupt priority any longer than necessary. 262 */ 263 if (TAILQ_FIRST(&callwheel[ticks & callwheelmask]) != NULL) { 264 setsoftclock(); 265 } else if (softticks + 1 == ticks) { 266 ++softticks; 267 } 268 } 269 270 /* 271 * Compute number of ticks in the specified amount of time. 272 */ 273 int 274 tvtohz(tv) 275 struct timeval *tv; 276 { 277 unsigned long ticks; 278 long sec, usec; 279 280 /* 281 * If the number of usecs in the whole seconds part of the time 282 * difference fits in a long, then the total number of usecs will 283 * fit in an unsigned long. Compute the total and convert it to 284 * ticks, rounding up and adding 1 to allow for the current tick 285 * to expire. Rounding also depends on unsigned long arithmetic 286 * to avoid overflow. 287 * 288 * Otherwise, if the number of ticks in the whole seconds part of 289 * the time difference fits in a long, then convert the parts to 290 * ticks separately and add, using similar rounding methods and 291 * overflow avoidance. This method would work in the previous 292 * case but it is slightly slower and assumes that hz is integral. 293 * 294 * Otherwise, round the time difference down to the maximum 295 * representable value. 296 * 297 * If ints have 32 bits, then the maximum value for any timeout in 298 * 10ms ticks is 248 days. 299 */ 300 sec = tv->tv_sec; 301 usec = tv->tv_usec; 302 if (usec < 0) { 303 sec--; 304 usec += 1000000; 305 } 306 if (sec < 0) { 307 #ifdef DIAGNOSTIC 308 if (usec > 0) { 309 sec++; 310 usec -= 1000000; 311 } 312 printf("tvotohz: negative time difference %ld sec %ld usec\n", 313 sec, usec); 314 #endif 315 ticks = 1; 316 } else if (sec <= LONG_MAX / 1000000) 317 ticks = (sec * 1000000 + (unsigned long)usec + (tick - 1)) 318 / tick + 1; 319 else if (sec <= LONG_MAX / hz) 320 ticks = sec * hz 321 + ((unsigned long)usec + (tick - 1)) / tick + 1; 322 else 323 ticks = LONG_MAX; 324 if (ticks > INT_MAX) 325 ticks = INT_MAX; 326 return ((int)ticks); 327 } 328 329 /* 330 * Start profiling on a process. 331 * 332 * Kernel profiling passes proc0 which never exits and hence 333 * keeps the profile clock running constantly. 334 */ 335 void 336 startprofclock(p) 337 struct proc *p; 338 { 339 int s; 340 341 if ((p->p_flag & P_PROFIL) == 0) { 342 p->p_flag |= P_PROFIL; 343 if (++profprocs == 1 && stathz != 0) { 344 s = splstatclock(); 345 psdiv = psratio; 346 setstatclockrate(profhz); 347 splx(s); 348 } 349 } 350 } 351 352 /* 353 * Stop profiling on a process. 354 */ 355 void 356 stopprofclock(p) 357 struct proc *p; 358 { 359 int s; 360 361 if (p->p_flag & P_PROFIL) { 362 p->p_flag &= ~P_PROFIL; 363 if (--profprocs == 0 && stathz != 0) { 364 s = splstatclock(); 365 psdiv = 1; 366 setstatclockrate(stathz); 367 splx(s); 368 } 369 } 370 } 371 372 /* 373 * Statistics clock. Grab profile sample, and if divider reaches 0, 374 * do process and kernel statistics. Most of the statistics are only 375 * used by user-level statistics programs. The main exceptions are 376 * p->p_uticks, p->p_sticks, p->p_iticks, and p->p_estcpu. 377 * 378 * The statclock should be called from an exclusive, fast interrupt, 379 * so the context should be the thread/process that got interrupted and 380 * not an interrupt thread. 381 */ 382 void 383 statclock(frame) 384 struct clockframe *frame; 385 { 386 #ifdef GPROF 387 struct gmonparam *g; 388 int i; 389 #endif 390 thread_t td; 391 struct pstats *pstats; 392 long rss; 393 struct rusage *ru; 394 struct vmspace *vm; 395 struct proc *p; 396 int bump; 397 struct timeval tv; 398 struct timeval *stv; 399 400 /* 401 * How big was our timeslice relative to the last time 402 */ 403 microuptime(&tv); 404 stv = &mycpu->gd_stattv; 405 if (stv->tv_sec == 0) { 406 bump = 1; 407 } else { 408 bump = tv.tv_usec - stv->tv_usec + 409 (tv.tv_sec - stv->tv_sec) * 1000000; 410 if (bump < 0) 411 bump = 0; 412 if (bump > 1000000) 413 bump = 1000000; 414 } 415 *stv = tv; 416 417 td = curthread; 418 p = td->td_proc; 419 420 if (CLKF_USERMODE(frame)) { 421 /* 422 * Came from userland, handle user time and deal with 423 * possible process. 424 */ 425 if (p && (p->p_flag & P_PROFIL)) 426 addupc_intr(p, CLKF_PC(frame), 1); 427 #if 0 /* SMP and BETTER_CLOCK */ 428 if (stathz != 0) 429 forward_statclock(pscnt); 430 #endif 431 td->td_uticks += bump; 432 433 /* 434 * Charge the time as appropriate 435 */ 436 if (p && p->p_nice > NZERO) 437 cp_time[CP_NICE] += bump; 438 else 439 cp_time[CP_USER] += bump; 440 } else { 441 #ifdef GPROF 442 /* 443 * Kernel statistics are just like addupc_intr, only easier. 444 */ 445 g = &_gmonparam; 446 if (g->state == GMON_PROF_ON) { 447 i = CLKF_PC(frame) - g->lowpc; 448 if (i < g->textsize) { 449 i /= HISTFRACTION * sizeof(*g->kcount); 450 g->kcount[i]++; 451 } 452 } 453 #endif 454 #if 0 /* SMP and BETTER_CLOCK */ 455 if (stathz != 0) 456 forward_statclock(pscnt); 457 #endif 458 /* 459 * Came from kernel mode, so we were: 460 * - handling an interrupt, 461 * - doing syscall or trap work on behalf of the current 462 * user process, or 463 * - spinning in the idle loop. 464 * Whichever it is, charge the time as appropriate. 465 * Note that we charge interrupts to the current process, 466 * regardless of whether they are ``for'' that process, 467 * so that we know how much of its real time was spent 468 * in ``non-process'' (i.e., interrupt) work. 469 */ 470 if (CLKF_INTR(frame)) 471 td->td_iticks += bump; 472 else 473 td->td_sticks += bump; 474 475 if (CLKF_INTR(frame)) { 476 cp_time[CP_INTR] += bump; 477 } else { 478 if (td == &mycpu->gd_idlethread) 479 cp_time[CP_IDLE] += bump; 480 else 481 cp_time[CP_SYS] += bump; 482 } 483 } 484 485 /* 486 * bump psticks and check against gd_psticks. When we hit the 487 * 1*hz mark (psdiv ticks) we do the more expensive stuff. If 488 * psdiv changes we reset everything to avoid confusion. 489 */ 490 ++psticks; 491 if (psticks < mycpu->gd_psticks && psdiv == mycpu->gd_psdiv) 492 return; 493 494 mycpu->gd_psdiv = psdiv; 495 mycpu->gd_psticks = psticks + psdiv; 496 497 /* 498 * XXX YYY DragonFly... need to rewrite all of this, 499 * only schedclock is distributed at the moment 500 */ 501 schedclock(NULL); 502 #ifdef SMP 503 if (smp_started && invltlb_ok && !cold && !panicstr) /* YYY */ 504 lwkt_send_ipiq_mask(mycpu->gd_other_cpus, schedclock, NULL); 505 #endif 506 507 if (p != NULL) { 508 /* Update resource usage integrals and maximums. */ 509 if ((pstats = p->p_stats) != NULL && 510 (ru = &pstats->p_ru) != NULL && 511 (vm = p->p_vmspace) != NULL) { 512 ru->ru_ixrss += pgtok(vm->vm_tsize); 513 ru->ru_idrss += pgtok(vm->vm_dsize); 514 ru->ru_isrss += pgtok(vm->vm_ssize); 515 rss = pgtok(vmspace_resident_count(vm)); 516 if (ru->ru_maxrss < rss) 517 ru->ru_maxrss = rss; 518 } 519 } 520 } 521 522 /* 523 * Return information about system clocks. 524 */ 525 static int 526 sysctl_kern_clockrate(SYSCTL_HANDLER_ARGS) 527 { 528 struct clockinfo clkinfo; 529 /* 530 * Construct clockinfo structure. 531 */ 532 clkinfo.hz = hz; 533 clkinfo.tick = tick; 534 clkinfo.tickadj = tickadj; 535 clkinfo.profhz = profhz; 536 clkinfo.stathz = stathz ? stathz : hz; 537 return (sysctl_handle_opaque(oidp, &clkinfo, sizeof clkinfo, req)); 538 } 539 540 SYSCTL_PROC(_kern, KERN_CLOCKRATE, clockrate, CTLTYPE_STRUCT|CTLFLAG_RD, 541 0, 0, sysctl_kern_clockrate, "S,clockinfo",""); 542 543 static __inline unsigned 544 tco_delta(struct timecounter *tc) 545 { 546 547 return ((tc->tc_get_timecount(tc) - tc->tc_offset_count) & 548 tc->tc_counter_mask); 549 } 550 551 /* 552 * We have eight functions for looking at the clock, four for 553 * microseconds and four for nanoseconds. For each there is fast 554 * but less precise version "get{nano|micro}[up]time" which will 555 * return a time which is up to 1/HZ previous to the call, whereas 556 * the raw version "{nano|micro}[up]time" will return a timestamp 557 * which is as precise as possible. The "up" variants return the 558 * time relative to system boot, these are well suited for time 559 * interval measurements. 560 */ 561 562 void 563 getmicrotime(struct timeval *tvp) 564 { 565 struct timecounter *tc; 566 567 if (!tco_method) { 568 tc = timecounter; 569 *tvp = tc->tc_microtime; 570 } else { 571 microtime(tvp); 572 } 573 } 574 575 void 576 getnanotime(struct timespec *tsp) 577 { 578 struct timecounter *tc; 579 580 if (!tco_method) { 581 tc = timecounter; 582 *tsp = tc->tc_nanotime; 583 } else { 584 nanotime(tsp); 585 } 586 } 587 588 void 589 microtime(struct timeval *tv) 590 { 591 struct timecounter *tc; 592 593 tc = timecounter; 594 tv->tv_sec = tc->tc_offset_sec; 595 tv->tv_usec = tc->tc_offset_micro; 596 tv->tv_usec += ((u_int64_t)tco_delta(tc) * tc->tc_scale_micro) >> 32; 597 tv->tv_usec += boottime.tv_usec; 598 tv->tv_sec += boottime.tv_sec; 599 while (tv->tv_usec < 0) { 600 tv->tv_usec += 1000000; 601 if (tv->tv_sec > 0) 602 tv->tv_sec--; 603 } 604 while (tv->tv_usec >= 1000000) { 605 tv->tv_usec -= 1000000; 606 tv->tv_sec++; 607 } 608 } 609 610 void 611 nanotime(struct timespec *ts) 612 { 613 unsigned count; 614 u_int64_t delta; 615 struct timecounter *tc; 616 617 tc = timecounter; 618 ts->tv_sec = tc->tc_offset_sec; 619 count = tco_delta(tc); 620 delta = tc->tc_offset_nano; 621 delta += ((u_int64_t)count * tc->tc_scale_nano_f); 622 delta >>= 32; 623 delta += ((u_int64_t)count * tc->tc_scale_nano_i); 624 delta += boottime.tv_usec * 1000; 625 ts->tv_sec += boottime.tv_sec; 626 while (delta < 0) { 627 delta += 1000000000; 628 if (ts->tv_sec > 0) 629 ts->tv_sec--; 630 } 631 while (delta >= 1000000000) { 632 delta -= 1000000000; 633 ts->tv_sec++; 634 } 635 ts->tv_nsec = delta; 636 } 637 638 void 639 getmicrouptime(struct timeval *tvp) 640 { 641 struct timecounter *tc; 642 643 if (!tco_method) { 644 tc = timecounter; 645 tvp->tv_sec = tc->tc_offset_sec; 646 tvp->tv_usec = tc->tc_offset_micro; 647 } else { 648 microuptime(tvp); 649 } 650 } 651 652 void 653 getnanouptime(struct timespec *tsp) 654 { 655 struct timecounter *tc; 656 657 if (!tco_method) { 658 tc = timecounter; 659 tsp->tv_sec = tc->tc_offset_sec; 660 tsp->tv_nsec = tc->tc_offset_nano >> 32; 661 } else { 662 nanouptime(tsp); 663 } 664 } 665 666 void 667 microuptime(struct timeval *tv) 668 { 669 struct timecounter *tc; 670 671 tc = timecounter; 672 tv->tv_sec = tc->tc_offset_sec; 673 tv->tv_usec = tc->tc_offset_micro; 674 tv->tv_usec += ((u_int64_t)tco_delta(tc) * tc->tc_scale_micro) >> 32; 675 while (tv->tv_usec < 0) { 676 tv->tv_usec += 1000000; 677 if (tv->tv_sec > 0) 678 tv->tv_sec--; 679 } 680 while (tv->tv_usec >= 1000000) { 681 tv->tv_usec -= 1000000; 682 tv->tv_sec++; 683 } 684 } 685 686 void 687 nanouptime(struct timespec *ts) 688 { 689 unsigned count; 690 u_int64_t delta; 691 struct timecounter *tc; 692 693 tc = timecounter; 694 ts->tv_sec = tc->tc_offset_sec; 695 count = tco_delta(tc); 696 delta = tc->tc_offset_nano; 697 delta += ((u_int64_t)count * tc->tc_scale_nano_f); 698 delta >>= 32; 699 delta += ((u_int64_t)count * tc->tc_scale_nano_i); 700 while (delta < 0) { 701 delta += 1000000000; 702 if (ts->tv_sec > 0) 703 ts->tv_sec--; 704 } 705 while (delta >= 1000000000) { 706 delta -= 1000000000; 707 ts->tv_sec++; 708 } 709 ts->tv_nsec = delta; 710 } 711 712 static void 713 tco_setscales(struct timecounter *tc) 714 { 715 u_int64_t scale; 716 717 scale = 1000000000LL << 32; 718 scale += tc->tc_adjustment; 719 scale /= tc->tc_tweak->tc_frequency; 720 tc->tc_scale_micro = scale / 1000; 721 tc->tc_scale_nano_f = scale & 0xffffffff; 722 tc->tc_scale_nano_i = scale >> 32; 723 } 724 725 void 726 update_timecounter(struct timecounter *tc) 727 { 728 tco_setscales(tc); 729 } 730 731 void 732 init_timecounter(struct timecounter *tc) 733 { 734 struct timespec ts1; 735 struct timecounter *t1, *t2, *t3; 736 unsigned u; 737 int i; 738 739 u = tc->tc_frequency / tc->tc_counter_mask; 740 if (u > hz) { 741 printf("Timecounter \"%s\" frequency %lu Hz" 742 " -- Insufficient hz, needs at least %u\n", 743 tc->tc_name, (u_long) tc->tc_frequency, u); 744 return; 745 } 746 747 tc->tc_adjustment = 0; 748 tc->tc_tweak = tc; 749 tco_setscales(tc); 750 tc->tc_offset_count = tc->tc_get_timecount(tc); 751 if (timecounter == &dummy_timecounter) 752 tc->tc_avail = tc; 753 else { 754 tc->tc_avail = timecounter->tc_tweak->tc_avail; 755 timecounter->tc_tweak->tc_avail = tc; 756 } 757 MALLOC(t1, struct timecounter *, sizeof *t1, M_TIMECOUNTER, M_WAITOK); 758 tc->tc_other = t1; 759 *t1 = *tc; 760 t2 = t1; 761 for (i = 1; i < NTIMECOUNTER; i++) { 762 MALLOC(t3, struct timecounter *, sizeof *t3, 763 M_TIMECOUNTER, M_WAITOK); 764 *t3 = *tc; 765 t3->tc_other = t2; 766 t2 = t3; 767 } 768 t1->tc_other = t3; 769 tc = t1; 770 771 printf("Timecounter \"%s\" frequency %lu Hz\n", 772 tc->tc_name, (u_long)tc->tc_frequency); 773 774 /* XXX: For now always start using the counter. */ 775 tc->tc_offset_count = tc->tc_get_timecount(tc); 776 nanouptime(&ts1); 777 tc->tc_offset_nano = (u_int64_t)ts1.tv_nsec << 32; 778 tc->tc_offset_micro = ts1.tv_nsec / 1000; 779 tc->tc_offset_sec = ts1.tv_sec; 780 timecounter = tc; 781 } 782 783 void 784 set_timecounter(struct timespec *ts) 785 { 786 struct timespec ts2; 787 788 nanouptime(&ts2); 789 boottime.tv_sec = ts->tv_sec - ts2.tv_sec; 790 boottime.tv_usec = (ts->tv_nsec - ts2.tv_nsec) / 1000; 791 if (boottime.tv_usec < 0) { 792 boottime.tv_usec += 1000000; 793 boottime.tv_sec--; 794 } 795 /* fiddle all the little crinkly bits around the fiords... */ 796 tco_forward(1); 797 } 798 799 static void 800 switch_timecounter(struct timecounter *newtc) 801 { 802 int s; 803 struct timecounter *tc; 804 struct timespec ts; 805 806 s = splclock(); 807 tc = timecounter; 808 if (newtc->tc_tweak == tc->tc_tweak) { 809 splx(s); 810 return; 811 } 812 newtc = newtc->tc_tweak->tc_other; 813 nanouptime(&ts); 814 newtc->tc_offset_sec = ts.tv_sec; 815 newtc->tc_offset_nano = (u_int64_t)ts.tv_nsec << 32; 816 newtc->tc_offset_micro = ts.tv_nsec / 1000; 817 newtc->tc_offset_count = newtc->tc_get_timecount(newtc); 818 tco_setscales(newtc); 819 timecounter = newtc; 820 splx(s); 821 } 822 823 static struct timecounter * 824 sync_other_counter(void) 825 { 826 struct timecounter *tc, *tcn, *tco; 827 unsigned delta; 828 829 tco = timecounter; 830 tc = tco->tc_other; 831 tcn = tc->tc_other; 832 *tc = *tco; 833 tc->tc_other = tcn; 834 delta = tco_delta(tc); 835 tc->tc_offset_count += delta; 836 tc->tc_offset_count &= tc->tc_counter_mask; 837 tc->tc_offset_nano += (u_int64_t)delta * tc->tc_scale_nano_f; 838 tc->tc_offset_nano += (u_int64_t)delta * tc->tc_scale_nano_i << 32; 839 return (tc); 840 } 841 842 static void 843 tco_forward(int force) 844 { 845 struct timecounter *tc, *tco; 846 struct timeval tvt; 847 848 tco = timecounter; 849 tc = sync_other_counter(); 850 /* 851 * We may be inducing a tiny error here, the tc_poll_pps() may 852 * process a latched count which happens after the tco_delta() 853 * in sync_other_counter(), which would extend the previous 854 * counters parameters into the domain of this new one. 855 * Since the timewindow is very small for this, the error is 856 * going to be only a few weenieseconds (as Dave Mills would 857 * say), so lets just not talk more about it, OK ? 858 */ 859 if (tco->tc_poll_pps) 860 tco->tc_poll_pps(tco); 861 if (timedelta != 0) { 862 tvt = boottime; 863 tvt.tv_usec += tickdelta; 864 if (tvt.tv_usec >= 1000000) { 865 tvt.tv_sec++; 866 tvt.tv_usec -= 1000000; 867 } else if (tvt.tv_usec < 0) { 868 tvt.tv_sec--; 869 tvt.tv_usec += 1000000; 870 } 871 boottime = tvt; 872 timedelta -= tickdelta; 873 } 874 875 while (tc->tc_offset_nano >= 1000000000ULL << 32) { 876 tc->tc_offset_nano -= 1000000000ULL << 32; 877 tc->tc_offset_sec++; 878 ntp_update_second(tc); /* XXX only needed if xntpd runs */ 879 tco_setscales(tc); 880 force++; 881 } 882 883 if (tco_method && !force) 884 return; 885 886 tc->tc_offset_micro = (tc->tc_offset_nano / 1000) >> 32; 887 888 /* Figure out the wall-clock time */ 889 tc->tc_nanotime.tv_sec = tc->tc_offset_sec + boottime.tv_sec; 890 tc->tc_nanotime.tv_nsec = 891 (tc->tc_offset_nano >> 32) + boottime.tv_usec * 1000; 892 tc->tc_microtime.tv_usec = tc->tc_offset_micro + boottime.tv_usec; 893 while (tc->tc_nanotime.tv_nsec >= 1000000000) { 894 tc->tc_nanotime.tv_nsec -= 1000000000; 895 tc->tc_microtime.tv_usec -= 1000000; 896 tc->tc_nanotime.tv_sec++; 897 } 898 time_second = tc->tc_microtime.tv_sec = tc->tc_nanotime.tv_sec; 899 900 timecounter = tc; 901 } 902 903 SYSCTL_NODE(_kern, OID_AUTO, timecounter, CTLFLAG_RW, 0, ""); 904 905 SYSCTL_INT(_kern_timecounter, OID_AUTO, method, CTLFLAG_RW, &tco_method, 0, 906 "This variable determines the method used for updating timecounters. " 907 "If the default algorithm (0) fails with \"calcru negative...\" messages " 908 "try the alternate algorithm (1) which handles bad hardware better." 909 910 ); 911 912 static int 913 sysctl_kern_timecounter_hardware(SYSCTL_HANDLER_ARGS) 914 { 915 char newname[32]; 916 struct timecounter *newtc, *tc; 917 int error; 918 919 tc = timecounter->tc_tweak; 920 strncpy(newname, tc->tc_name, sizeof(newname)); 921 error = sysctl_handle_string(oidp, &newname[0], sizeof(newname), req); 922 if (error == 0 && req->newptr != NULL && 923 strcmp(newname, tc->tc_name) != 0) { 924 for (newtc = tc->tc_avail; newtc != tc; 925 newtc = newtc->tc_avail) { 926 if (strcmp(newname, newtc->tc_name) == 0) { 927 /* Warm up new timecounter. */ 928 (void)newtc->tc_get_timecount(newtc); 929 930 switch_timecounter(newtc); 931 return (0); 932 } 933 } 934 return (EINVAL); 935 } 936 return (error); 937 } 938 939 SYSCTL_PROC(_kern_timecounter, OID_AUTO, hardware, CTLTYPE_STRING | CTLFLAG_RW, 940 0, 0, sysctl_kern_timecounter_hardware, "A", ""); 941 942 943 int 944 pps_ioctl(u_long cmd, caddr_t data, struct pps_state *pps) 945 { 946 pps_params_t *app; 947 struct pps_fetch_args *fapi; 948 #ifdef PPS_SYNC 949 struct pps_kcbind_args *kapi; 950 #endif 951 952 switch (cmd) { 953 case PPS_IOC_CREATE: 954 return (0); 955 case PPS_IOC_DESTROY: 956 return (0); 957 case PPS_IOC_SETPARAMS: 958 app = (pps_params_t *)data; 959 if (app->mode & ~pps->ppscap) 960 return (EINVAL); 961 pps->ppsparam = *app; 962 return (0); 963 case PPS_IOC_GETPARAMS: 964 app = (pps_params_t *)data; 965 *app = pps->ppsparam; 966 app->api_version = PPS_API_VERS_1; 967 return (0); 968 case PPS_IOC_GETCAP: 969 *(int*)data = pps->ppscap; 970 return (0); 971 case PPS_IOC_FETCH: 972 fapi = (struct pps_fetch_args *)data; 973 if (fapi->tsformat && fapi->tsformat != PPS_TSFMT_TSPEC) 974 return (EINVAL); 975 if (fapi->timeout.tv_sec || fapi->timeout.tv_nsec) 976 return (EOPNOTSUPP); 977 pps->ppsinfo.current_mode = pps->ppsparam.mode; 978 fapi->pps_info_buf = pps->ppsinfo; 979 return (0); 980 case PPS_IOC_KCBIND: 981 #ifdef PPS_SYNC 982 kapi = (struct pps_kcbind_args *)data; 983 /* XXX Only root should be able to do this */ 984 if (kapi->tsformat && kapi->tsformat != PPS_TSFMT_TSPEC) 985 return (EINVAL); 986 if (kapi->kernel_consumer != PPS_KC_HARDPPS) 987 return (EINVAL); 988 if (kapi->edge & ~pps->ppscap) 989 return (EINVAL); 990 pps->kcmode = kapi->edge; 991 return (0); 992 #else 993 return (EOPNOTSUPP); 994 #endif 995 default: 996 return (ENOTTY); 997 } 998 } 999 1000 void 1001 pps_init(struct pps_state *pps) 1002 { 1003 pps->ppscap |= PPS_TSFMT_TSPEC; 1004 if (pps->ppscap & PPS_CAPTUREASSERT) 1005 pps->ppscap |= PPS_OFFSETASSERT; 1006 if (pps->ppscap & PPS_CAPTURECLEAR) 1007 pps->ppscap |= PPS_OFFSETCLEAR; 1008 } 1009 1010 void 1011 pps_event(struct pps_state *pps, struct timecounter *tc, unsigned count, int event) 1012 { 1013 struct timespec ts, *tsp, *osp; 1014 u_int64_t delta; 1015 unsigned tcount, *pcount; 1016 int foff, fhard; 1017 pps_seq_t *pseq; 1018 1019 /* Things would be easier with arrays... */ 1020 if (event == PPS_CAPTUREASSERT) { 1021 tsp = &pps->ppsinfo.assert_timestamp; 1022 osp = &pps->ppsparam.assert_offset; 1023 foff = pps->ppsparam.mode & PPS_OFFSETASSERT; 1024 fhard = pps->kcmode & PPS_CAPTUREASSERT; 1025 pcount = &pps->ppscount[0]; 1026 pseq = &pps->ppsinfo.assert_sequence; 1027 } else { 1028 tsp = &pps->ppsinfo.clear_timestamp; 1029 osp = &pps->ppsparam.clear_offset; 1030 foff = pps->ppsparam.mode & PPS_OFFSETCLEAR; 1031 fhard = pps->kcmode & PPS_CAPTURECLEAR; 1032 pcount = &pps->ppscount[1]; 1033 pseq = &pps->ppsinfo.clear_sequence; 1034 } 1035 1036 /* The timecounter changed: bail */ 1037 if (!pps->ppstc || 1038 pps->ppstc->tc_name != tc->tc_name || 1039 tc->tc_name != timecounter->tc_name) { 1040 pps->ppstc = tc; 1041 *pcount = count; 1042 return; 1043 } 1044 1045 /* Nothing really happened */ 1046 if (*pcount == count) 1047 return; 1048 1049 *pcount = count; 1050 1051 /* Convert the count to timespec */ 1052 ts.tv_sec = tc->tc_offset_sec; 1053 tcount = count - tc->tc_offset_count; 1054 tcount &= tc->tc_counter_mask; 1055 delta = tc->tc_offset_nano; 1056 delta += ((u_int64_t)tcount * tc->tc_scale_nano_f); 1057 delta >>= 32; 1058 delta += ((u_int64_t)tcount * tc->tc_scale_nano_i); 1059 delta += boottime.tv_usec * 1000; 1060 ts.tv_sec += boottime.tv_sec; 1061 while (delta >= 1000000000) { 1062 delta -= 1000000000; 1063 ts.tv_sec++; 1064 } 1065 ts.tv_nsec = delta; 1066 1067 (*pseq)++; 1068 *tsp = ts; 1069 1070 if (foff) { 1071 timespecadd(tsp, osp); 1072 if (tsp->tv_nsec < 0) { 1073 tsp->tv_nsec += 1000000000; 1074 tsp->tv_sec -= 1; 1075 } 1076 } 1077 #ifdef PPS_SYNC 1078 if (fhard) { 1079 /* magic, at its best... */ 1080 tcount = count - pps->ppscount[2]; 1081 pps->ppscount[2] = count; 1082 tcount &= tc->tc_counter_mask; 1083 delta = ((u_int64_t)tcount * tc->tc_tweak->tc_scale_nano_f); 1084 delta >>= 32; 1085 delta += ((u_int64_t)tcount * tc->tc_tweak->tc_scale_nano_i); 1086 hardpps(tsp, delta); 1087 } 1088 #endif 1089 } 1090