1 /* 2 * Copyright (c) 2003,2004 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * Copyright (c) 1997, 1998 Poul-Henning Kamp <phk@FreeBSD.org> 35 * Copyright (c) 1982, 1986, 1991, 1993 36 * The Regents of the University of California. All rights reserved. 37 * (c) UNIX System Laboratories, Inc. 38 * All or some portions of this file are derived from material licensed 39 * to the University of California by American Telephone and Telegraph 40 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 41 * the permission of UNIX System Laboratories, Inc. 42 * 43 * Redistribution and use in source and binary forms, with or without 44 * modification, are permitted provided that the following conditions 45 * are met: 46 * 1. Redistributions of source code must retain the above copyright 47 * notice, this list of conditions and the following disclaimer. 48 * 2. Redistributions in binary form must reproduce the above copyright 49 * notice, this list of conditions and the following disclaimer in the 50 * documentation and/or other materials provided with the distribution. 51 * 3. All advertising materials mentioning features or use of this software 52 * must display the following acknowledgement: 53 * This product includes software developed by the University of 54 * California, Berkeley and its contributors. 55 * 4. Neither the name of the University nor the names of its contributors 56 * may be used to endorse or promote products derived from this software 57 * without specific prior written permission. 58 * 59 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 60 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 61 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 62 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 63 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 64 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 65 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 66 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 67 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 68 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 69 * SUCH DAMAGE. 70 * 71 * @(#)kern_clock.c 8.5 (Berkeley) 1/21/94 72 * $FreeBSD: src/sys/kern/kern_clock.c,v 1.105.2.10 2002/10/17 13:19:40 maxim Exp $ 73 * $DragonFly: src/sys/kern/kern_clock.c,v 1.23 2004/08/02 23:20:30 dillon Exp $ 74 */ 75 76 #include "opt_ntp.h" 77 78 #include <sys/param.h> 79 #include <sys/systm.h> 80 #include <sys/dkstat.h> 81 #include <sys/callout.h> 82 #include <sys/kernel.h> 83 #include <sys/proc.h> 84 #include <sys/malloc.h> 85 #include <sys/resourcevar.h> 86 #include <sys/signalvar.h> 87 #include <sys/timex.h> 88 #include <sys/timepps.h> 89 #include <vm/vm.h> 90 #include <sys/lock.h> 91 #include <vm/pmap.h> 92 #include <vm/vm_map.h> 93 #include <sys/sysctl.h> 94 #include <sys/thread2.h> 95 96 #include <machine/cpu.h> 97 #include <machine/limits.h> 98 #include <machine/smp.h> 99 100 #ifdef GPROF 101 #include <sys/gmon.h> 102 #endif 103 104 #ifdef DEVICE_POLLING 105 extern void init_device_poll(void); 106 extern void hardclock_device_poll(void); 107 #endif /* DEVICE_POLLING */ 108 109 static void initclocks (void *dummy); 110 SYSINIT(clocks, SI_SUB_CLOCKS, SI_ORDER_FIRST, initclocks, NULL) 111 112 /* 113 * Some of these don't belong here, but it's easiest to concentrate them. 114 * Note that cp_time[] counts in microseconds, but most userland programs 115 * just compare relative times against the total by delta. 116 */ 117 long cp_time[CPUSTATES]; 118 119 SYSCTL_OPAQUE(_kern, OID_AUTO, cp_time, CTLFLAG_RD, &cp_time, sizeof(cp_time), 120 "LU", "CPU time statistics"); 121 122 long tk_cancc; 123 long tk_nin; 124 long tk_nout; 125 long tk_rawcc; 126 127 /* 128 * boottime is used to calculate the 'real' uptime. Do not confuse this with 129 * microuptime(). microtime() is not drift compensated. The real uptime 130 * with compensation is nanotime() - bootime. boottime is recalculated 131 * whenever the real time is set based on the compensated elapsed time 132 * in seconds (gd->gd_time_seconds). 133 * 134 * basetime is used to calculate the compensated real time of day. Chunky 135 * changes to the time, aka settimeofday(), are made by modifying basetime. 136 * 137 * The gd_time_seconds and gd_cpuclock_base fields remain fairly monotonic. 138 * Slight adjustments to gd_cpuclock_base are made to phase-lock it to 139 * the real time. 140 */ 141 struct timespec boottime; /* boot time (realtime) for reference only */ 142 struct timespec basetime; /* base time adjusts uptime -> realtime */ 143 time_t time_second; /* read-only 'passive' uptime in seconds */ 144 145 SYSCTL_STRUCT(_kern, KERN_BOOTTIME, boottime, CTLFLAG_RD, 146 &boottime, timeval, "System boottime"); 147 SYSCTL_STRUCT(_kern, OID_AUTO, basetime, CTLFLAG_RD, 148 &basetime, timeval, "System basetime"); 149 150 static void hardclock(systimer_t info, struct intrframe *frame); 151 static void statclock(systimer_t info, struct intrframe *frame); 152 static void schedclock(systimer_t info, struct intrframe *frame); 153 154 int ticks; /* system master ticks at hz */ 155 int clocks_running; /* tsleep/timeout clocks operational */ 156 int64_t nsec_adj; /* ntpd per-tick adjustment in nsec << 32 */ 157 int64_t nsec_acc; /* accumulator */ 158 159 /* 160 * Finish initializing clock frequencies and start all clocks running. 161 */ 162 /* ARGSUSED*/ 163 static void 164 initclocks(void *dummy) 165 { 166 cpu_initclocks(); 167 #ifdef DEVICE_POLLING 168 init_device_poll(); 169 #endif 170 /*psratio = profhz / stathz;*/ 171 initclocks_pcpu(); 172 clocks_running = 1; 173 } 174 175 /* 176 * Called on a per-cpu basis 177 */ 178 void 179 initclocks_pcpu(void) 180 { 181 struct globaldata *gd = mycpu; 182 183 crit_enter(); 184 if (gd->gd_cpuid == 0) { 185 gd->gd_time_seconds = 1; 186 gd->gd_cpuclock_base = cputimer_count(); 187 } else { 188 /* XXX */ 189 gd->gd_time_seconds = globaldata_find(0)->gd_time_seconds; 190 gd->gd_cpuclock_base = globaldata_find(0)->gd_cpuclock_base; 191 } 192 systimer_init_periodic(&gd->gd_hardclock, hardclock, NULL, hz); 193 systimer_init_periodic(&gd->gd_statclock, statclock, NULL, stathz); 194 /* XXX correct the frequency for scheduler / estcpu tests */ 195 systimer_init_periodic(&gd->gd_schedclock, schedclock, 196 NULL, ESTCPUFREQ); 197 crit_exit(); 198 } 199 200 /* 201 * Resynchronize gd_cpuclock_base after the system has been woken up from 202 * a sleep. It is absolutely essential that all the cpus be properly 203 * synchronized. Resynching is required because nanouptime() and friends 204 * will overflow intermediate multiplications if more then 2 seconds 205 * worth of cputimer_cont() delta has built up. 206 */ 207 #ifdef SMP 208 209 static 210 void 211 restoreclocks_remote(lwkt_cpusync_t poll) 212 { 213 mycpu->gd_cpuclock_base = *(sysclock_t *)poll->cs_data; 214 mycpu->gd_time_seconds = globaldata_find(0)->gd_time_seconds; 215 } 216 217 #endif 218 219 void 220 restoreclocks(void) 221 { 222 sysclock_t base = cputimer_count(); 223 #ifdef SMP 224 lwkt_cpusync_simple(-1, restoreclocks_remote, &base); 225 #else 226 mycpu->gd_cpuclock_base = base; 227 #endif 228 } 229 230 /* 231 * This sets the current real time of day. Timespecs are in seconds and 232 * nanoseconds. We do not mess with gd_time_seconds and gd_cpuclock_base, 233 * instead we adjust basetime so basetime + gd_* results in the current 234 * time of day. This way the gd_* fields are guarenteed to represent 235 * a monotonically increasing 'uptime' value. 236 */ 237 void 238 set_timeofday(struct timespec *ts) 239 { 240 struct timespec ts2; 241 242 /* 243 * XXX SMP / non-atomic basetime updates 244 */ 245 crit_enter(); 246 nanouptime(&ts2); 247 basetime.tv_sec = ts->tv_sec - ts2.tv_sec; 248 basetime.tv_nsec = ts->tv_nsec - ts2.tv_nsec; 249 if (basetime.tv_nsec < 0) { 250 basetime.tv_nsec += 1000000000; 251 --basetime.tv_sec; 252 } 253 boottime.tv_sec = basetime.tv_sec - mycpu->gd_time_seconds; 254 timedelta = 0; 255 crit_exit(); 256 } 257 258 /* 259 * Each cpu has its own hardclock, but we only increments ticks and softticks 260 * on cpu #0. 261 * 262 * NOTE! systimer! the MP lock might not be held here. We can only safely 263 * manipulate objects owned by the current cpu. 264 */ 265 static void 266 hardclock(systimer_t info, struct intrframe *frame) 267 { 268 sysclock_t cputicks; 269 struct proc *p; 270 struct pstats *pstats; 271 struct globaldata *gd = mycpu; 272 273 /* 274 * Realtime updates are per-cpu. Note that timer corrections as 275 * returned by microtime() and friends make an additional adjustment 276 * using a system-wise 'basetime', but the running time is always 277 * taken from the per-cpu globaldata area. Since the same clock 278 * is distributing (XXX SMP) to all cpus, the per-cpu timebases 279 * stay in synch. 280 * 281 * Note that we never allow info->time (aka gd->gd_hardclock.time) 282 * to reverse index gd_cpuclock_base. 283 */ 284 cputicks = info->time - gd->gd_cpuclock_base; 285 if (cputicks > cputimer_freq) { 286 ++gd->gd_time_seconds; 287 gd->gd_cpuclock_base += cputimer_freq; 288 } 289 290 /* 291 * The system-wide ticks and softticks are only updated by cpu #0. 292 * Callwheel actions are also (at the moment) only handled by cpu #0. 293 * Finally, we also do NTP related timedelta/tickdelta adjustments 294 * by adjusting basetime. 295 */ 296 if (gd->gd_cpuid == 0) { 297 struct timespec nts; 298 int leap; 299 300 ++ticks; 301 302 #ifdef DEVICE_POLLING 303 hardclock_device_poll(); /* mpsafe, short and quick */ 304 #endif /* DEVICE_POLLING */ 305 306 if (TAILQ_FIRST(&callwheel[ticks & callwheelmask]) != NULL) { 307 setsoftclock(); 308 } else if (softticks + 1 == ticks) { 309 ++softticks; 310 } 311 312 #if 0 313 if (tco->tc_poll_pps) 314 tco->tc_poll_pps(tco); 315 #endif 316 /* 317 * Apply adjtime corrections. At the moment only do this if 318 * we can get the MP lock to interlock with adjtime's modification 319 * of these variables. Note that basetime adjustments are not 320 * MP safe either XXX. 321 */ 322 if (timedelta != 0 && try_mplock()) { 323 basetime.tv_nsec += tickdelta * 1000; 324 if (basetime.tv_nsec >= 1000000000) { 325 basetime.tv_nsec -= 1000000000; 326 ++basetime.tv_sec; 327 } else if (basetime.tv_nsec < 0) { 328 basetime.tv_nsec += 1000000000; 329 --basetime.tv_sec; 330 } 331 timedelta -= tickdelta; 332 rel_mplock(); 333 } 334 335 /* 336 * Apply per-tick compensation. ticks_adj adjusts for both 337 * offset and frequency, and could be negative. 338 */ 339 if (nsec_adj != 0 && try_mplock()) { 340 nsec_acc += nsec_adj; 341 if (nsec_acc >= 0x100000000LL) { 342 basetime.tv_nsec += nsec_acc >> 32; 343 nsec_acc = (nsec_acc & 0xFFFFFFFFLL); 344 } else if (nsec_acc <= -0x100000000LL) { 345 basetime.tv_nsec -= -nsec_acc >> 32; 346 nsec_acc = -(-nsec_acc & 0xFFFFFFFFLL); 347 } 348 if (basetime.tv_nsec >= 1000000000) { 349 basetime.tv_nsec -= 1000000000; 350 ++basetime.tv_sec; 351 } else if (basetime.tv_nsec < 0) { 352 basetime.tv_nsec += 1000000000; 353 --basetime.tv_sec; 354 } 355 rel_mplock(); 356 } 357 358 /* 359 * If the realtime-adjusted seconds hand rolls over then tell 360 * ntp_update_second() what we did in the last second so it can 361 * calculate what to do in the next second. It may also add 362 * or subtract a leap second. 363 */ 364 getnanotime(&nts); 365 if (time_second != nts.tv_sec) { 366 leap = ntp_update_second(time_second, &nsec_adj); 367 basetime.tv_sec += leap; 368 time_second = nts.tv_sec + leap; 369 nsec_adj /= hz; 370 } 371 } 372 373 /* 374 * ITimer handling is per-tick, per-cpu. I don't think psignal() 375 * is mpsafe on curproc, so XXX get the mplock. 376 */ 377 if ((p = curproc) != NULL && try_mplock()) { 378 pstats = p->p_stats; 379 if (frame && CLKF_USERMODE(frame) && 380 timevalisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value) && 381 itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL], tick) == 0) 382 psignal(p, SIGVTALRM); 383 if (timevalisset(&pstats->p_timer[ITIMER_PROF].it_value) && 384 itimerdecr(&pstats->p_timer[ITIMER_PROF], tick) == 0) 385 psignal(p, SIGPROF); 386 rel_mplock(); 387 } 388 setdelayed(); 389 } 390 391 /* 392 * The statistics clock typically runs at a 125Hz rate, and is intended 393 * to be frequency offset from the hardclock (typ 100Hz). It is per-cpu. 394 * 395 * NOTE! systimer! the MP lock might not be held here. We can only safely 396 * manipulate objects owned by the current cpu. 397 * 398 * The stats clock is responsible for grabbing a profiling sample. 399 * Most of the statistics are only used by user-level statistics programs. 400 * The main exceptions are p->p_uticks, p->p_sticks, p->p_iticks, and 401 * p->p_estcpu. 402 * 403 * Like the other clocks, the stat clock is called from what is effectively 404 * a fast interrupt, so the context should be the thread/process that got 405 * interrupted. 406 */ 407 static void 408 statclock(systimer_t info, struct intrframe *frame) 409 { 410 #ifdef GPROF 411 struct gmonparam *g; 412 int i; 413 #endif 414 thread_t td; 415 struct proc *p; 416 int bump; 417 struct timeval tv; 418 struct timeval *stv; 419 420 /* 421 * How big was our timeslice relative to the last time? 422 */ 423 microuptime(&tv); /* mpsafe */ 424 stv = &mycpu->gd_stattv; 425 if (stv->tv_sec == 0) { 426 bump = 1; 427 } else { 428 bump = tv.tv_usec - stv->tv_usec + 429 (tv.tv_sec - stv->tv_sec) * 1000000; 430 if (bump < 0) 431 bump = 0; 432 if (bump > 1000000) 433 bump = 1000000; 434 } 435 *stv = tv; 436 437 td = curthread; 438 p = td->td_proc; 439 440 if (frame && CLKF_USERMODE(frame)) { 441 /* 442 * Came from userland, handle user time and deal with 443 * possible process. 444 */ 445 if (p && (p->p_flag & P_PROFIL)) 446 addupc_intr(p, CLKF_PC(frame), 1); 447 td->td_uticks += bump; 448 449 /* 450 * Charge the time as appropriate 451 */ 452 if (p && p->p_nice > NZERO) 453 cp_time[CP_NICE] += bump; 454 else 455 cp_time[CP_USER] += bump; 456 } else { 457 #ifdef GPROF 458 /* 459 * Kernel statistics are just like addupc_intr, only easier. 460 */ 461 g = &_gmonparam; 462 if (g->state == GMON_PROF_ON && frame) { 463 i = CLKF_PC(frame) - g->lowpc; 464 if (i < g->textsize) { 465 i /= HISTFRACTION * sizeof(*g->kcount); 466 g->kcount[i]++; 467 } 468 } 469 #endif 470 /* 471 * Came from kernel mode, so we were: 472 * - handling an interrupt, 473 * - doing syscall or trap work on behalf of the current 474 * user process, or 475 * - spinning in the idle loop. 476 * Whichever it is, charge the time as appropriate. 477 * Note that we charge interrupts to the current process, 478 * regardless of whether they are ``for'' that process, 479 * so that we know how much of its real time was spent 480 * in ``non-process'' (i.e., interrupt) work. 481 * 482 * XXX assume system if frame is NULL. A NULL frame 483 * can occur if ipi processing is done from an splx(). 484 */ 485 if (frame && CLKF_INTR(frame)) 486 td->td_iticks += bump; 487 else 488 td->td_sticks += bump; 489 490 if (frame && CLKF_INTR(frame)) { 491 cp_time[CP_INTR] += bump; 492 } else { 493 if (td == &mycpu->gd_idlethread) 494 cp_time[CP_IDLE] += bump; 495 else 496 cp_time[CP_SYS] += bump; 497 } 498 } 499 } 500 501 /* 502 * The scheduler clock typically runs at a 20Hz rate. NOTE! systimer, 503 * the MP lock might not be held. We can safely manipulate parts of curproc 504 * but that's about it. 505 */ 506 static void 507 schedclock(systimer_t info, struct intrframe *frame) 508 { 509 struct proc *p; 510 struct pstats *pstats; 511 struct rusage *ru; 512 struct vmspace *vm; 513 long rss; 514 515 schedulerclock(NULL); /* mpsafe */ 516 if ((p = curproc) != NULL) { 517 /* Update resource usage integrals and maximums. */ 518 if ((pstats = p->p_stats) != NULL && 519 (ru = &pstats->p_ru) != NULL && 520 (vm = p->p_vmspace) != NULL) { 521 ru->ru_ixrss += pgtok(vm->vm_tsize); 522 ru->ru_idrss += pgtok(vm->vm_dsize); 523 ru->ru_isrss += pgtok(vm->vm_ssize); 524 rss = pgtok(vmspace_resident_count(vm)); 525 if (ru->ru_maxrss < rss) 526 ru->ru_maxrss = rss; 527 } 528 } 529 } 530 531 /* 532 * Compute number of ticks for the specified amount of time. The 533 * return value is intended to be used in a clock interrupt timed 534 * operation and guarenteed to meet or exceed the requested time. 535 * If the representation overflows, return INT_MAX. The minimum return 536 * value is 1 ticks and the function will average the calculation up. 537 * If any value greater then 0 microseconds is supplied, a value 538 * of at least 2 will be returned to ensure that a near-term clock 539 * interrupt does not cause the timeout to occur (degenerately) early. 540 * 541 * Note that limit checks must take into account microseconds, which is 542 * done simply by using the smaller signed long maximum instead of 543 * the unsigned long maximum. 544 * 545 * If ints have 32 bits, then the maximum value for any timeout in 546 * 10ms ticks is 248 days. 547 */ 548 int 549 tvtohz_high(struct timeval *tv) 550 { 551 int ticks; 552 long sec, usec; 553 554 sec = tv->tv_sec; 555 usec = tv->tv_usec; 556 if (usec < 0) { 557 sec--; 558 usec += 1000000; 559 } 560 if (sec < 0) { 561 #ifdef DIAGNOSTIC 562 if (usec > 0) { 563 sec++; 564 usec -= 1000000; 565 } 566 printf("tvotohz: negative time difference %ld sec %ld usec\n", 567 sec, usec); 568 #endif 569 ticks = 1; 570 } else if (sec <= INT_MAX / hz) { 571 ticks = (int)(sec * hz + 572 ((u_long)usec + (tick - 1)) / tick) + 1; 573 } else { 574 ticks = INT_MAX; 575 } 576 return (ticks); 577 } 578 579 /* 580 * Compute number of ticks for the specified amount of time, erroring on 581 * the side of it being too low to ensure that sleeping the returned number 582 * of ticks will not result in a late return. 583 * 584 * The supplied timeval may not be negative and should be normalized. A 585 * return value of 0 is possible if the timeval converts to less then 586 * 1 tick. 587 * 588 * If ints have 32 bits, then the maximum value for any timeout in 589 * 10ms ticks is 248 days. 590 */ 591 int 592 tvtohz_low(struct timeval *tv) 593 { 594 int ticks; 595 long sec; 596 597 sec = tv->tv_sec; 598 if (sec <= INT_MAX / hz) 599 ticks = (int)(sec * hz + (u_long)tv->tv_usec / tick); 600 else 601 ticks = INT_MAX; 602 return (ticks); 603 } 604 605 606 /* 607 * Start profiling on a process. 608 * 609 * Kernel profiling passes proc0 which never exits and hence 610 * keeps the profile clock running constantly. 611 */ 612 void 613 startprofclock(struct proc *p) 614 { 615 if ((p->p_flag & P_PROFIL) == 0) { 616 p->p_flag |= P_PROFIL; 617 #if 0 /* XXX */ 618 if (++profprocs == 1 && stathz != 0) { 619 s = splstatclock(); 620 psdiv = psratio; 621 setstatclockrate(profhz); 622 splx(s); 623 } 624 #endif 625 } 626 } 627 628 /* 629 * Stop profiling on a process. 630 */ 631 void 632 stopprofclock(struct proc *p) 633 { 634 if (p->p_flag & P_PROFIL) { 635 p->p_flag &= ~P_PROFIL; 636 #if 0 /* XXX */ 637 if (--profprocs == 0 && stathz != 0) { 638 s = splstatclock(); 639 psdiv = 1; 640 setstatclockrate(stathz); 641 splx(s); 642 } 643 #endif 644 } 645 } 646 647 /* 648 * Return information about system clocks. 649 */ 650 static int 651 sysctl_kern_clockrate(SYSCTL_HANDLER_ARGS) 652 { 653 struct clockinfo clkinfo; 654 /* 655 * Construct clockinfo structure. 656 */ 657 clkinfo.hz = hz; 658 clkinfo.tick = tick; 659 clkinfo.tickadj = tickadj; 660 clkinfo.profhz = profhz; 661 clkinfo.stathz = stathz ? stathz : hz; 662 return (sysctl_handle_opaque(oidp, &clkinfo, sizeof clkinfo, req)); 663 } 664 665 SYSCTL_PROC(_kern, KERN_CLOCKRATE, clockrate, CTLTYPE_STRUCT|CTLFLAG_RD, 666 0, 0, sysctl_kern_clockrate, "S,clockinfo",""); 667 668 /* 669 * We have eight functions for looking at the clock, four for 670 * microseconds and four for nanoseconds. For each there is fast 671 * but less precise version "get{nano|micro}[up]time" which will 672 * return a time which is up to 1/HZ previous to the call, whereas 673 * the raw version "{nano|micro}[up]time" will return a timestamp 674 * which is as precise as possible. The "up" variants return the 675 * time relative to system boot, these are well suited for time 676 * interval measurements. 677 * 678 * Each cpu independantly maintains the current time of day, so all 679 * we need to do to protect ourselves from changes is to do a loop 680 * check on the seconds field changing out from under us. 681 */ 682 void 683 getmicrouptime(struct timeval *tvp) 684 { 685 struct globaldata *gd = mycpu; 686 sysclock_t delta; 687 688 do { 689 tvp->tv_sec = gd->gd_time_seconds; 690 delta = gd->gd_hardclock.time - gd->gd_cpuclock_base; 691 } while (tvp->tv_sec != gd->gd_time_seconds); 692 tvp->tv_usec = (cputimer_freq64_usec * delta) >> 32; 693 if (tvp->tv_usec >= 1000000) { 694 tvp->tv_usec -= 1000000; 695 ++tvp->tv_sec; 696 } 697 } 698 699 void 700 getnanouptime(struct timespec *tsp) 701 { 702 struct globaldata *gd = mycpu; 703 sysclock_t delta; 704 705 do { 706 tsp->tv_sec = gd->gd_time_seconds; 707 delta = gd->gd_hardclock.time - gd->gd_cpuclock_base; 708 } while (tsp->tv_sec != gd->gd_time_seconds); 709 tsp->tv_nsec = (cputimer_freq64_nsec * delta) >> 32; 710 if (tsp->tv_nsec >= 1000000000) { 711 tsp->tv_nsec -= 1000000000; 712 ++tsp->tv_sec; 713 } 714 } 715 716 void 717 microuptime(struct timeval *tvp) 718 { 719 struct globaldata *gd = mycpu; 720 sysclock_t delta; 721 722 do { 723 tvp->tv_sec = gd->gd_time_seconds; 724 delta = cputimer_count() - gd->gd_cpuclock_base; 725 } while (tvp->tv_sec != gd->gd_time_seconds); 726 tvp->tv_usec = (cputimer_freq64_usec * delta) >> 32; 727 if (tvp->tv_usec >= 1000000) { 728 tvp->tv_usec -= 1000000; 729 ++tvp->tv_sec; 730 } 731 } 732 733 void 734 nanouptime(struct timespec *tsp) 735 { 736 struct globaldata *gd = mycpu; 737 sysclock_t delta; 738 739 do { 740 tsp->tv_sec = gd->gd_time_seconds; 741 delta = cputimer_count() - gd->gd_cpuclock_base; 742 } while (tsp->tv_sec != gd->gd_time_seconds); 743 tsp->tv_nsec = (cputimer_freq64_nsec * delta) >> 32; 744 if (tsp->tv_nsec >= 1000000000) { 745 tsp->tv_nsec -= 1000000000; 746 ++tsp->tv_sec; 747 } 748 } 749 750 /* 751 * realtime routines 752 */ 753 754 void 755 getmicrotime(struct timeval *tvp) 756 { 757 struct globaldata *gd = mycpu; 758 sysclock_t delta; 759 760 do { 761 tvp->tv_sec = gd->gd_time_seconds; 762 delta = gd->gd_hardclock.time - gd->gd_cpuclock_base; 763 } while (tvp->tv_sec != gd->gd_time_seconds); 764 tvp->tv_usec = (cputimer_freq64_usec * delta) >> 32; 765 766 tvp->tv_sec += basetime.tv_sec; 767 tvp->tv_usec += basetime.tv_nsec / 1000; 768 while (tvp->tv_usec >= 1000000) { 769 tvp->tv_usec -= 1000000; 770 ++tvp->tv_sec; 771 } 772 } 773 774 void 775 getnanotime(struct timespec *tsp) 776 { 777 struct globaldata *gd = mycpu; 778 sysclock_t delta; 779 780 do { 781 tsp->tv_sec = gd->gd_time_seconds; 782 delta = gd->gd_hardclock.time - gd->gd_cpuclock_base; 783 } while (tsp->tv_sec != gd->gd_time_seconds); 784 tsp->tv_nsec = (cputimer_freq64_nsec * delta) >> 32; 785 786 tsp->tv_sec += basetime.tv_sec; 787 tsp->tv_nsec += basetime.tv_nsec; 788 while (tsp->tv_nsec >= 1000000000) { 789 tsp->tv_nsec -= 1000000000; 790 ++tsp->tv_sec; 791 } 792 } 793 794 void 795 microtime(struct timeval *tvp) 796 { 797 struct globaldata *gd = mycpu; 798 sysclock_t delta; 799 800 do { 801 tvp->tv_sec = gd->gd_time_seconds; 802 delta = cputimer_count() - gd->gd_cpuclock_base; 803 } while (tvp->tv_sec != gd->gd_time_seconds); 804 tvp->tv_usec = (cputimer_freq64_usec * delta) >> 32; 805 806 tvp->tv_sec += basetime.tv_sec; 807 tvp->tv_usec += basetime.tv_nsec / 1000; 808 while (tvp->tv_usec >= 1000000) { 809 tvp->tv_usec -= 1000000; 810 ++tvp->tv_sec; 811 } 812 } 813 814 void 815 nanotime(struct timespec *tsp) 816 { 817 struct globaldata *gd = mycpu; 818 sysclock_t delta; 819 820 do { 821 tsp->tv_sec = gd->gd_time_seconds; 822 delta = cputimer_count() - gd->gd_cpuclock_base; 823 } while (tsp->tv_sec != gd->gd_time_seconds); 824 tsp->tv_nsec = (cputimer_freq64_nsec * delta) >> 32; 825 826 tsp->tv_sec += basetime.tv_sec; 827 tsp->tv_nsec += basetime.tv_nsec; 828 while (tsp->tv_nsec >= 1000000000) { 829 tsp->tv_nsec -= 1000000000; 830 ++tsp->tv_sec; 831 } 832 } 833 834 int 835 pps_ioctl(u_long cmd, caddr_t data, struct pps_state *pps) 836 { 837 pps_params_t *app; 838 struct pps_fetch_args *fapi; 839 #ifdef PPS_SYNC 840 struct pps_kcbind_args *kapi; 841 #endif 842 843 switch (cmd) { 844 case PPS_IOC_CREATE: 845 return (0); 846 case PPS_IOC_DESTROY: 847 return (0); 848 case PPS_IOC_SETPARAMS: 849 app = (pps_params_t *)data; 850 if (app->mode & ~pps->ppscap) 851 return (EINVAL); 852 pps->ppsparam = *app; 853 return (0); 854 case PPS_IOC_GETPARAMS: 855 app = (pps_params_t *)data; 856 *app = pps->ppsparam; 857 app->api_version = PPS_API_VERS_1; 858 return (0); 859 case PPS_IOC_GETCAP: 860 *(int*)data = pps->ppscap; 861 return (0); 862 case PPS_IOC_FETCH: 863 fapi = (struct pps_fetch_args *)data; 864 if (fapi->tsformat && fapi->tsformat != PPS_TSFMT_TSPEC) 865 return (EINVAL); 866 if (fapi->timeout.tv_sec || fapi->timeout.tv_nsec) 867 return (EOPNOTSUPP); 868 pps->ppsinfo.current_mode = pps->ppsparam.mode; 869 fapi->pps_info_buf = pps->ppsinfo; 870 return (0); 871 case PPS_IOC_KCBIND: 872 #ifdef PPS_SYNC 873 kapi = (struct pps_kcbind_args *)data; 874 /* XXX Only root should be able to do this */ 875 if (kapi->tsformat && kapi->tsformat != PPS_TSFMT_TSPEC) 876 return (EINVAL); 877 if (kapi->kernel_consumer != PPS_KC_HARDPPS) 878 return (EINVAL); 879 if (kapi->edge & ~pps->ppscap) 880 return (EINVAL); 881 pps->kcmode = kapi->edge; 882 return (0); 883 #else 884 return (EOPNOTSUPP); 885 #endif 886 default: 887 return (ENOTTY); 888 } 889 } 890 891 void 892 pps_init(struct pps_state *pps) 893 { 894 pps->ppscap |= PPS_TSFMT_TSPEC; 895 if (pps->ppscap & PPS_CAPTUREASSERT) 896 pps->ppscap |= PPS_OFFSETASSERT; 897 if (pps->ppscap & PPS_CAPTURECLEAR) 898 pps->ppscap |= PPS_OFFSETCLEAR; 899 } 900 901 void 902 pps_event(struct pps_state *pps, sysclock_t count, int event) 903 { 904 struct globaldata *gd; 905 struct timespec *tsp; 906 struct timespec *osp; 907 struct timespec ts; 908 sysclock_t *pcount; 909 #ifdef PPS_SYNC 910 sysclock_t tcount; 911 #endif 912 sysclock_t delta; 913 pps_seq_t *pseq; 914 int foff; 915 int fhard; 916 917 gd = mycpu; 918 919 /* Things would be easier with arrays... */ 920 if (event == PPS_CAPTUREASSERT) { 921 tsp = &pps->ppsinfo.assert_timestamp; 922 osp = &pps->ppsparam.assert_offset; 923 foff = pps->ppsparam.mode & PPS_OFFSETASSERT; 924 fhard = pps->kcmode & PPS_CAPTUREASSERT; 925 pcount = &pps->ppscount[0]; 926 pseq = &pps->ppsinfo.assert_sequence; 927 } else { 928 tsp = &pps->ppsinfo.clear_timestamp; 929 osp = &pps->ppsparam.clear_offset; 930 foff = pps->ppsparam.mode & PPS_OFFSETCLEAR; 931 fhard = pps->kcmode & PPS_CAPTURECLEAR; 932 pcount = &pps->ppscount[1]; 933 pseq = &pps->ppsinfo.clear_sequence; 934 } 935 936 /* Nothing really happened */ 937 if (*pcount == count) 938 return; 939 940 *pcount = count; 941 942 do { 943 ts.tv_sec = gd->gd_time_seconds; 944 delta = count - gd->gd_cpuclock_base; 945 } while (ts.tv_sec != gd->gd_time_seconds); 946 if (delta > cputimer_freq) { 947 ts.tv_sec += delta / cputimer_freq; 948 delta %= cputimer_freq; 949 } 950 ts.tv_nsec = (cputimer_freq64_nsec * delta) >> 32; 951 ts.tv_sec += basetime.tv_sec; 952 ts.tv_nsec += basetime.tv_nsec; 953 while (ts.tv_nsec >= 1000000000) { 954 ts.tv_nsec -= 1000000000; 955 ++ts.tv_sec; 956 } 957 958 (*pseq)++; 959 *tsp = ts; 960 961 if (foff) { 962 timespecadd(tsp, osp); 963 if (tsp->tv_nsec < 0) { 964 tsp->tv_nsec += 1000000000; 965 tsp->tv_sec -= 1; 966 } 967 } 968 #ifdef PPS_SYNC 969 if (fhard) { 970 /* magic, at its best... */ 971 tcount = count - pps->ppscount[2]; 972 pps->ppscount[2] = count; 973 delta = (cputimer_freq64_nsec * tcount) >> 32; 974 hardpps(tsp, delta); 975 } 976 #endif 977 } 978 979