1 /* 2 * Copyright (c) 1982, 1986, 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. Neither the name of the University nor the names of its contributors 14 * may be used to endorse or promote products derived from this software 15 * without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 * 29 * @(#)kern_time.c 8.1 (Berkeley) 6/10/93 30 * $FreeBSD: src/sys/kern/kern_time.c,v 1.68.2.1 2002/10/01 08:00:41 bde Exp $ 31 */ 32 33 #include <sys/param.h> 34 #include <sys/systm.h> 35 #include <sys/buf.h> 36 #include <sys/sysproto.h> 37 #include <sys/resourcevar.h> 38 #include <sys/signalvar.h> 39 #include <sys/kernel.h> 40 #include <sys/sysent.h> 41 #include <sys/sysunion.h> 42 #include <sys/proc.h> 43 #include <sys/priv.h> 44 #include <sys/time.h> 45 #include <sys/vnode.h> 46 #include <sys/sysctl.h> 47 #include <sys/kern_syscall.h> 48 #include <vm/vm.h> 49 #include <vm/vm_extern.h> 50 51 #include <sys/msgport2.h> 52 #include <sys/spinlock2.h> 53 #include <sys/thread2.h> 54 55 extern struct spinlock ntp_spin; 56 57 struct timezone tz; 58 59 /* 60 * Time of day and interval timer support. 61 * 62 * These routines provide the kernel entry points to get and set 63 * the time-of-day and per-process interval timers. Subroutines 64 * here provide support for adding and subtracting timeval structures 65 * and decrementing interval timers, optionally reloading the interval 66 * timers when they expire. 67 */ 68 69 static int settime(struct timeval *); 70 static void timevalfix(struct timeval *); 71 static void realitexpire(void *arg); 72 73 /* 74 * Nanosleep tries very hard to sleep for a precisely requested time 75 * interval, down to 1uS. The administrator can impose a minimum delay 76 * and a delay below which we hard-loop instead of initiate a timer 77 * interrupt and sleep. 78 * 79 * For machines under high loads it might be beneficial to increase min_us 80 * to e.g. 1000uS (1ms) so spining processes sleep meaningfully. 81 */ 82 static int nanosleep_min_us = 10; 83 static int nanosleep_hard_us = 100; 84 static int gettimeofday_quick = 0; 85 SYSCTL_INT(_kern, OID_AUTO, nanosleep_min_us, CTLFLAG_RW, 86 &nanosleep_min_us, 0, ""); 87 SYSCTL_INT(_kern, OID_AUTO, nanosleep_hard_us, CTLFLAG_RW, 88 &nanosleep_hard_us, 0, ""); 89 SYSCTL_INT(_kern, OID_AUTO, gettimeofday_quick, CTLFLAG_RW, 90 &gettimeofday_quick, 0, ""); 91 92 static struct lock masterclock_lock = LOCK_INITIALIZER("mstrclk", 0, 0); 93 94 static int 95 settime(struct timeval *tv) 96 { 97 struct timeval delta, tv1, tv2; 98 static struct timeval maxtime, laststep; 99 struct timespec ts; 100 int origcpu; 101 102 if ((origcpu = mycpu->gd_cpuid) != 0) 103 lwkt_setcpu_self(globaldata_find(0)); 104 105 crit_enter(); 106 microtime(&tv1); 107 delta = *tv; 108 timevalsub(&delta, &tv1); 109 110 /* 111 * If the system is secure, we do not allow the time to be 112 * set to a value earlier than 1 second less than the highest 113 * time we have yet seen. The worst a miscreant can do in 114 * this circumstance is "freeze" time. He couldn't go 115 * back to the past. 116 * 117 * We similarly do not allow the clock to be stepped more 118 * than one second, nor more than once per second. This allows 119 * a miscreant to make the clock march double-time, but no worse. 120 */ 121 if (securelevel > 1) { 122 if (delta.tv_sec < 0 || delta.tv_usec < 0) { 123 /* 124 * Update maxtime to latest time we've seen. 125 */ 126 if (tv1.tv_sec > maxtime.tv_sec) 127 maxtime = tv1; 128 tv2 = *tv; 129 timevalsub(&tv2, &maxtime); 130 if (tv2.tv_sec < -1) { 131 tv->tv_sec = maxtime.tv_sec - 1; 132 kprintf("Time adjustment clamped to -1 second\n"); 133 } 134 } else { 135 if (tv1.tv_sec == laststep.tv_sec) { 136 crit_exit(); 137 return (EPERM); 138 } 139 if (delta.tv_sec > 1) { 140 tv->tv_sec = tv1.tv_sec + 1; 141 kprintf("Time adjustment clamped to +1 second\n"); 142 } 143 laststep = *tv; 144 } 145 } 146 147 ts.tv_sec = tv->tv_sec; 148 ts.tv_nsec = tv->tv_usec * 1000; 149 set_timeofday(&ts); 150 crit_exit(); 151 152 if (origcpu != 0) 153 lwkt_setcpu_self(globaldata_find(origcpu)); 154 155 resettodr(); 156 return (0); 157 } 158 159 static void 160 get_process_cputime(struct proc *p, struct timespec *ats) 161 { 162 struct rusage ru; 163 164 lwkt_gettoken(&p->p_token); 165 calcru_proc(p, &ru); 166 lwkt_reltoken(&p->p_token); 167 timevaladd(&ru.ru_utime, &ru.ru_stime); 168 TIMEVAL_TO_TIMESPEC(&ru.ru_utime, ats); 169 } 170 171 static void 172 get_process_usertime(struct proc *p, struct timespec *ats) 173 { 174 struct rusage ru; 175 176 lwkt_gettoken(&p->p_token); 177 calcru_proc(p, &ru); 178 lwkt_reltoken(&p->p_token); 179 TIMEVAL_TO_TIMESPEC(&ru.ru_utime, ats); 180 } 181 182 static void 183 get_curthread_cputime(struct timespec *ats) 184 { 185 struct thread *td = curthread; 186 struct timeval sys, user; 187 188 calcru(td->td_lwp, &user, &sys); 189 timevaladd(&user, &sys); 190 TIMEVAL_TO_TIMESPEC(&user, ats); 191 } 192 193 /* 194 * MPSAFE 195 */ 196 int 197 kern_clock_gettime(clockid_t clock_id, struct timespec *ats) 198 { 199 struct proc *p; 200 201 p = curproc; 202 switch(clock_id) { 203 case CLOCK_REALTIME: 204 case CLOCK_REALTIME_PRECISE: 205 nanotime(ats); 206 break; 207 case CLOCK_REALTIME_FAST: 208 getnanotime(ats); 209 break; 210 case CLOCK_MONOTONIC: 211 case CLOCK_MONOTONIC_PRECISE: 212 case CLOCK_UPTIME: 213 case CLOCK_UPTIME_PRECISE: 214 nanouptime(ats); 215 break; 216 case CLOCK_MONOTONIC_FAST: 217 case CLOCK_UPTIME_FAST: 218 getnanouptime(ats); 219 break; 220 case CLOCK_VIRTUAL: 221 get_process_usertime(p, ats); 222 break; 223 case CLOCK_PROF: 224 case CLOCK_PROCESS_CPUTIME_ID: 225 get_process_cputime(p, ats); 226 break; 227 case CLOCK_SECOND: 228 ats->tv_sec = time_second; 229 ats->tv_nsec = 0; 230 break; 231 case CLOCK_THREAD_CPUTIME_ID: 232 get_curthread_cputime(ats); 233 break; 234 default: 235 return (EINVAL); 236 } 237 return (0); 238 } 239 240 /* 241 * MPSAFE 242 */ 243 int 244 sys_clock_gettime(struct clock_gettime_args *uap) 245 { 246 struct timespec ats; 247 int error; 248 249 error = kern_clock_gettime(uap->clock_id, &ats); 250 if (error == 0) 251 error = copyout(&ats, uap->tp, sizeof(ats)); 252 253 return (error); 254 } 255 256 int 257 kern_clock_settime(clockid_t clock_id, struct timespec *ats) 258 { 259 struct thread *td = curthread; 260 struct timeval atv; 261 int error; 262 263 if ((error = priv_check(td, PRIV_CLOCK_SETTIME)) != 0) 264 return (error); 265 if (clock_id != CLOCK_REALTIME) 266 return (EINVAL); 267 if (ats->tv_nsec < 0 || ats->tv_nsec >= 1000000000) 268 return (EINVAL); 269 270 lockmgr(&masterclock_lock, LK_EXCLUSIVE); 271 TIMESPEC_TO_TIMEVAL(&atv, ats); 272 error = settime(&atv); 273 lockmgr(&masterclock_lock, LK_RELEASE); 274 275 return (error); 276 } 277 278 /* 279 * MPALMOSTSAFE 280 */ 281 int 282 sys_clock_settime(struct clock_settime_args *uap) 283 { 284 struct timespec ats; 285 int error; 286 287 if ((error = copyin(uap->tp, &ats, sizeof(ats))) != 0) 288 return (error); 289 290 error = kern_clock_settime(uap->clock_id, &ats); 291 292 return (error); 293 } 294 295 /* 296 * MPSAFE 297 */ 298 int 299 kern_clock_getres(clockid_t clock_id, struct timespec *ts) 300 { 301 ts->tv_sec = 0; 302 switch(clock_id) { 303 case CLOCK_REALTIME: 304 case CLOCK_REALTIME_FAST: 305 case CLOCK_REALTIME_PRECISE: 306 case CLOCK_MONOTONIC: 307 case CLOCK_MONOTONIC_FAST: 308 case CLOCK_MONOTONIC_PRECISE: 309 case CLOCK_UPTIME: 310 case CLOCK_UPTIME_FAST: 311 case CLOCK_UPTIME_PRECISE: 312 /* 313 * Round up the result of the division cheaply 314 * by adding 1. Rounding up is especially important 315 * if rounding down would give 0. Perfect rounding 316 * is unimportant. 317 */ 318 ts->tv_nsec = 1000000000 / sys_cputimer->freq + 1; 319 break; 320 case CLOCK_VIRTUAL: 321 case CLOCK_PROF: 322 /* Accurately round up here because we can do so cheaply. */ 323 ts->tv_nsec = (1000000000 + hz - 1) / hz; 324 break; 325 case CLOCK_SECOND: 326 ts->tv_sec = 1; 327 ts->tv_nsec = 0; 328 break; 329 case CLOCK_THREAD_CPUTIME_ID: 330 case CLOCK_PROCESS_CPUTIME_ID: 331 ts->tv_nsec = 1000; 332 break; 333 default: 334 return (EINVAL); 335 } 336 337 return (0); 338 } 339 340 /* 341 * MPSAFE 342 */ 343 int 344 sys_clock_getres(struct clock_getres_args *uap) 345 { 346 int error; 347 struct timespec ts; 348 349 error = kern_clock_getres(uap->clock_id, &ts); 350 if (error == 0) 351 error = copyout(&ts, uap->tp, sizeof(ts)); 352 353 return (error); 354 } 355 356 /* 357 * nanosleep1() 358 * 359 * This is a general helper function for nanosleep() (aka sleep() aka 360 * usleep()). 361 * 362 * If there is less then one tick's worth of time left and 363 * we haven't done a yield, or the remaining microseconds is 364 * ridiculously low, do a yield. This avoids having 365 * to deal with systimer overheads when the system is under 366 * heavy loads. If we have done a yield already then use 367 * a systimer and an uninterruptable thread wait. 368 * 369 * If there is more then a tick's worth of time left, 370 * calculate the baseline ticks and use an interruptable 371 * tsleep, then handle the fine-grained delay on the next 372 * loop. This usually results in two sleeps occuring, a long one 373 * and a short one. 374 * 375 * MPSAFE 376 */ 377 static void 378 ns1_systimer(systimer_t info, int in_ipi __unused, 379 struct intrframe *frame __unused) 380 { 381 lwkt_schedule(info->data); 382 } 383 384 int 385 nanosleep1(struct timespec *rqt, struct timespec *rmt) 386 { 387 static int nanowait; 388 struct timespec ts, ts2, ts3; 389 struct timeval tv; 390 int error; 391 392 if (rqt->tv_nsec < 0 || rqt->tv_nsec >= 1000000000) 393 return (EINVAL); 394 /* XXX: imho this should return EINVAL at least for tv_sec < 0 */ 395 if (rqt->tv_sec < 0 || (rqt->tv_sec == 0 && rqt->tv_nsec == 0)) 396 return (0); 397 nanouptime(&ts); 398 timespecadd(&ts, rqt); /* ts = target timestamp compare */ 399 TIMESPEC_TO_TIMEVAL(&tv, rqt); /* tv = sleep interval */ 400 401 for (;;) { 402 int ticks; 403 struct systimer info; 404 405 ticks = tv.tv_usec / ustick; /* approximate */ 406 407 if (tv.tv_sec == 0 && ticks == 0) { 408 thread_t td = curthread; 409 if (tv.tv_usec > 0 && tv.tv_usec < nanosleep_min_us) 410 tv.tv_usec = nanosleep_min_us; 411 if (tv.tv_usec < nanosleep_hard_us) { 412 lwkt_user_yield(); 413 cpu_pause(); 414 } else { 415 crit_enter_quick(td); 416 systimer_init_oneshot(&info, ns1_systimer, 417 td, tv.tv_usec); 418 lwkt_deschedule_self(td); 419 crit_exit_quick(td); 420 lwkt_switch(); 421 systimer_del(&info); /* make sure it's gone */ 422 } 423 error = iscaught(td->td_lwp); 424 } else if (tv.tv_sec == 0) { 425 error = tsleep(&nanowait, PCATCH, "nanslp", ticks); 426 } else { 427 ticks = tvtohz_low(&tv); /* also handles overflow */ 428 error = tsleep(&nanowait, PCATCH, "nanslp", ticks); 429 } 430 nanouptime(&ts2); 431 if (error && error != EWOULDBLOCK) { 432 if (error == ERESTART) 433 error = EINTR; 434 if (rmt != NULL) { 435 timespecsub(&ts, &ts2); 436 if (ts.tv_sec < 0) 437 timespecclear(&ts); 438 *rmt = ts; 439 } 440 return (error); 441 } 442 if (timespeccmp(&ts2, &ts, >=)) 443 return (0); 444 ts3 = ts; 445 timespecsub(&ts3, &ts2); 446 TIMESPEC_TO_TIMEVAL(&tv, &ts3); 447 } 448 } 449 450 /* 451 * MPSAFE 452 */ 453 int 454 sys_nanosleep(struct nanosleep_args *uap) 455 { 456 int error; 457 struct timespec rqt; 458 struct timespec rmt; 459 460 error = copyin(uap->rqtp, &rqt, sizeof(rqt)); 461 if (error) 462 return (error); 463 464 error = nanosleep1(&rqt, &rmt); 465 466 /* 467 * copyout the residual if nanosleep was interrupted. 468 */ 469 if (error && uap->rmtp) { 470 int error2; 471 472 error2 = copyout(&rmt, uap->rmtp, sizeof(rmt)); 473 if (error2) 474 error = error2; 475 } 476 return (error); 477 } 478 479 /* 480 * The gettimeofday() system call is supposed to return a fine-grained 481 * realtime stamp. However, acquiring a fine-grained stamp can create a 482 * bottleneck when multiple cpu cores are trying to accessing e.g. the 483 * HPET hardware timer all at the same time, so we have a sysctl that 484 * allows its behavior to be changed to a more coarse-grained timestamp 485 * which does not have to access a hardware timer. 486 */ 487 int 488 sys_gettimeofday(struct gettimeofday_args *uap) 489 { 490 struct timeval atv; 491 int error = 0; 492 493 if (uap->tp) { 494 if (gettimeofday_quick) 495 getmicrotime(&atv); 496 else 497 microtime(&atv); 498 if ((error = copyout((caddr_t)&atv, (caddr_t)uap->tp, 499 sizeof (atv)))) 500 return (error); 501 } 502 if (uap->tzp) 503 error = copyout((caddr_t)&tz, (caddr_t)uap->tzp, 504 sizeof (tz)); 505 return (error); 506 } 507 508 /* 509 * MPALMOSTSAFE 510 */ 511 int 512 sys_settimeofday(struct settimeofday_args *uap) 513 { 514 struct thread *td = curthread; 515 struct timeval atv; 516 struct timezone atz; 517 int error; 518 519 if ((error = priv_check(td, PRIV_SETTIMEOFDAY))) 520 return (error); 521 /* 522 * Verify all parameters before changing time. 523 * 524 * XXX: We do not allow the time to be set to 0.0, which also by 525 * happy coincidence works around a pkgsrc bulk build bug. 526 */ 527 if (uap->tv) { 528 if ((error = copyin((caddr_t)uap->tv, (caddr_t)&atv, 529 sizeof(atv)))) 530 return (error); 531 if (atv.tv_usec < 0 || atv.tv_usec >= 1000000) 532 return (EINVAL); 533 if (atv.tv_sec == 0 && atv.tv_usec == 0) 534 return (EINVAL); 535 } 536 if (uap->tzp && 537 (error = copyin((caddr_t)uap->tzp, (caddr_t)&atz, sizeof(atz)))) 538 return (error); 539 540 lockmgr(&masterclock_lock, LK_EXCLUSIVE); 541 if (uap->tv && (error = settime(&atv))) { 542 lockmgr(&masterclock_lock, LK_RELEASE); 543 return (error); 544 } 545 lockmgr(&masterclock_lock, LK_RELEASE); 546 547 if (uap->tzp) 548 tz = atz; 549 return (0); 550 } 551 552 /* 553 * WARNING! Run with ntp_spin held 554 */ 555 static void 556 kern_adjtime_common(void) 557 { 558 if ((ntp_delta >= 0 && ntp_delta < ntp_default_tick_delta) || 559 (ntp_delta < 0 && ntp_delta > -ntp_default_tick_delta)) 560 ntp_tick_delta = ntp_delta; 561 else if (ntp_delta > ntp_big_delta) 562 ntp_tick_delta = 10 * ntp_default_tick_delta; 563 else if (ntp_delta < -ntp_big_delta) 564 ntp_tick_delta = -10 * ntp_default_tick_delta; 565 else if (ntp_delta > 0) 566 ntp_tick_delta = ntp_default_tick_delta; 567 else 568 ntp_tick_delta = -ntp_default_tick_delta; 569 } 570 571 void 572 kern_adjtime(int64_t delta, int64_t *odelta) 573 { 574 spin_lock(&ntp_spin); 575 *odelta = ntp_delta; 576 ntp_delta = delta; 577 kern_adjtime_common(); 578 spin_unlock(&ntp_spin); 579 } 580 581 static void 582 kern_get_ntp_delta(int64_t *delta) 583 { 584 *delta = ntp_delta; 585 } 586 587 void 588 kern_reladjtime(int64_t delta) 589 { 590 spin_lock(&ntp_spin); 591 ntp_delta += delta; 592 kern_adjtime_common(); 593 spin_unlock(&ntp_spin); 594 } 595 596 static void 597 kern_adjfreq(int64_t rate) 598 { 599 spin_lock(&ntp_spin); 600 ntp_tick_permanent = rate; 601 spin_unlock(&ntp_spin); 602 } 603 604 /* 605 * MPALMOSTSAFE 606 */ 607 int 608 sys_adjtime(struct adjtime_args *uap) 609 { 610 struct thread *td = curthread; 611 struct timeval atv; 612 int64_t ndelta, odelta; 613 int error; 614 615 if ((error = priv_check(td, PRIV_ADJTIME))) 616 return (error); 617 error = copyin(uap->delta, &atv, sizeof(struct timeval)); 618 if (error) 619 return (error); 620 621 /* 622 * Compute the total correction and the rate at which to apply it. 623 * Round the adjustment down to a whole multiple of the per-tick 624 * delta, so that after some number of incremental changes in 625 * hardclock(), tickdelta will become zero, lest the correction 626 * overshoot and start taking us away from the desired final time. 627 */ 628 ndelta = (int64_t)atv.tv_sec * 1000000000 + atv.tv_usec * 1000; 629 kern_adjtime(ndelta, &odelta); 630 631 if (uap->olddelta) { 632 atv.tv_sec = odelta / 1000000000; 633 atv.tv_usec = odelta % 1000000000 / 1000; 634 copyout(&atv, uap->olddelta, sizeof(struct timeval)); 635 } 636 return (0); 637 } 638 639 static int 640 sysctl_adjtime(SYSCTL_HANDLER_ARGS) 641 { 642 int64_t delta; 643 int error; 644 645 if (req->newptr != NULL) { 646 if (priv_check(curthread, PRIV_ROOT)) 647 return (EPERM); 648 error = SYSCTL_IN(req, &delta, sizeof(delta)); 649 if (error) 650 return (error); 651 kern_reladjtime(delta); 652 } 653 654 if (req->oldptr) 655 kern_get_ntp_delta(&delta); 656 error = SYSCTL_OUT(req, &delta, sizeof(delta)); 657 return (error); 658 } 659 660 /* 661 * delta is in nanoseconds. 662 */ 663 static int 664 sysctl_delta(SYSCTL_HANDLER_ARGS) 665 { 666 int64_t delta, old_delta; 667 int error; 668 669 if (req->newptr != NULL) { 670 if (priv_check(curthread, PRIV_ROOT)) 671 return (EPERM); 672 error = SYSCTL_IN(req, &delta, sizeof(delta)); 673 if (error) 674 return (error); 675 kern_adjtime(delta, &old_delta); 676 } 677 678 if (req->oldptr != NULL) 679 kern_get_ntp_delta(&old_delta); 680 error = SYSCTL_OUT(req, &old_delta, sizeof(old_delta)); 681 return (error); 682 } 683 684 /* 685 * frequency is in nanoseconds per second shifted left 32. 686 * kern_adjfreq() needs it in nanoseconds per tick shifted left 32. 687 */ 688 static int 689 sysctl_adjfreq(SYSCTL_HANDLER_ARGS) 690 { 691 int64_t freqdelta; 692 int error; 693 694 if (req->newptr != NULL) { 695 if (priv_check(curthread, PRIV_ROOT)) 696 return (EPERM); 697 error = SYSCTL_IN(req, &freqdelta, sizeof(freqdelta)); 698 if (error) 699 return (error); 700 701 freqdelta /= hz; 702 kern_adjfreq(freqdelta); 703 } 704 705 if (req->oldptr != NULL) 706 freqdelta = ntp_tick_permanent * hz; 707 error = SYSCTL_OUT(req, &freqdelta, sizeof(freqdelta)); 708 if (error) 709 return (error); 710 711 return (0); 712 } 713 714 SYSCTL_NODE(_kern, OID_AUTO, ntp, CTLFLAG_RW, 0, "NTP related controls"); 715 SYSCTL_PROC(_kern_ntp, OID_AUTO, permanent, 716 CTLTYPE_QUAD|CTLFLAG_RW, 0, 0, 717 sysctl_adjfreq, "Q", "permanent correction per second"); 718 SYSCTL_PROC(_kern_ntp, OID_AUTO, delta, 719 CTLTYPE_QUAD|CTLFLAG_RW, 0, 0, 720 sysctl_delta, "Q", "one-time delta"); 721 SYSCTL_OPAQUE(_kern_ntp, OID_AUTO, big_delta, CTLFLAG_RD, 722 &ntp_big_delta, sizeof(ntp_big_delta), "Q", 723 "threshold for fast adjustment"); 724 SYSCTL_OPAQUE(_kern_ntp, OID_AUTO, tick_delta, CTLFLAG_RD, 725 &ntp_tick_delta, sizeof(ntp_tick_delta), "LU", 726 "per-tick adjustment"); 727 SYSCTL_OPAQUE(_kern_ntp, OID_AUTO, default_tick_delta, CTLFLAG_RD, 728 &ntp_default_tick_delta, sizeof(ntp_default_tick_delta), "LU", 729 "default per-tick adjustment"); 730 SYSCTL_OPAQUE(_kern_ntp, OID_AUTO, next_leap_second, CTLFLAG_RW, 731 &ntp_leap_second, sizeof(ntp_leap_second), "LU", 732 "next leap second"); 733 SYSCTL_INT(_kern_ntp, OID_AUTO, insert_leap_second, CTLFLAG_RW, 734 &ntp_leap_insert, 0, "insert or remove leap second"); 735 SYSCTL_PROC(_kern_ntp, OID_AUTO, adjust, 736 CTLTYPE_QUAD|CTLFLAG_RW, 0, 0, 737 sysctl_adjtime, "Q", "relative adjust for delta"); 738 739 /* 740 * Get value of an interval timer. The process virtual and 741 * profiling virtual time timers are kept in the p_stats area, since 742 * they can be swapped out. These are kept internally in the 743 * way they are specified externally: in time until they expire. 744 * 745 * The real time interval timer is kept in the process table slot 746 * for the process, and its value (it_value) is kept as an 747 * absolute time rather than as a delta, so that it is easy to keep 748 * periodic real-time signals from drifting. 749 * 750 * Virtual time timers are processed in the hardclock() routine of 751 * kern_clock.c. The real time timer is processed by a timeout 752 * routine, called from the softclock() routine. Since a callout 753 * may be delayed in real time due to interrupt processing in the system, 754 * it is possible for the real time timeout routine (realitexpire, given below), 755 * to be delayed in real time past when it is supposed to occur. It 756 * does not suffice, therefore, to reload the real timer .it_value from the 757 * real time timers .it_interval. Rather, we compute the next time in 758 * absolute time the timer should go off. 759 * 760 * MPALMOSTSAFE 761 */ 762 int 763 sys_getitimer(struct getitimer_args *uap) 764 { 765 struct proc *p = curproc; 766 struct timeval ctv; 767 struct itimerval aitv; 768 769 if (uap->which > ITIMER_PROF) 770 return (EINVAL); 771 lwkt_gettoken(&p->p_token); 772 if (uap->which == ITIMER_REAL) { 773 /* 774 * Convert from absolute to relative time in .it_value 775 * part of real time timer. If time for real time timer 776 * has passed return 0, else return difference between 777 * current time and time for the timer to go off. 778 */ 779 aitv = p->p_realtimer; 780 if (timevalisset(&aitv.it_value)) { 781 getmicrouptime(&ctv); 782 if (timevalcmp(&aitv.it_value, &ctv, <)) 783 timevalclear(&aitv.it_value); 784 else 785 timevalsub(&aitv.it_value, &ctv); 786 } 787 } else { 788 aitv = p->p_timer[uap->which]; 789 } 790 lwkt_reltoken(&p->p_token); 791 return (copyout(&aitv, uap->itv, sizeof (struct itimerval))); 792 } 793 794 /* 795 * MPALMOSTSAFE 796 */ 797 int 798 sys_setitimer(struct setitimer_args *uap) 799 { 800 struct itimerval aitv; 801 struct timeval ctv; 802 struct itimerval *itvp; 803 struct proc *p = curproc; 804 int error; 805 806 if (uap->which > ITIMER_PROF) 807 return (EINVAL); 808 itvp = uap->itv; 809 if (itvp && (error = copyin((caddr_t)itvp, (caddr_t)&aitv, 810 sizeof(struct itimerval)))) 811 return (error); 812 if ((uap->itv = uap->oitv) && 813 (error = sys_getitimer((struct getitimer_args *)uap))) 814 return (error); 815 if (itvp == NULL) 816 return (0); 817 if (itimerfix(&aitv.it_value)) 818 return (EINVAL); 819 if (!timevalisset(&aitv.it_value)) 820 timevalclear(&aitv.it_interval); 821 else if (itimerfix(&aitv.it_interval)) 822 return (EINVAL); 823 lwkt_gettoken(&p->p_token); 824 if (uap->which == ITIMER_REAL) { 825 if (timevalisset(&p->p_realtimer.it_value)) 826 callout_stop_sync(&p->p_ithandle); 827 if (timevalisset(&aitv.it_value)) 828 callout_reset(&p->p_ithandle, 829 tvtohz_high(&aitv.it_value), realitexpire, p); 830 getmicrouptime(&ctv); 831 timevaladd(&aitv.it_value, &ctv); 832 p->p_realtimer = aitv; 833 } else { 834 p->p_timer[uap->which] = aitv; 835 switch(uap->which) { 836 case ITIMER_VIRTUAL: 837 p->p_flags &= ~P_SIGVTALRM; 838 break; 839 case ITIMER_PROF: 840 p->p_flags &= ~P_SIGPROF; 841 break; 842 } 843 } 844 lwkt_reltoken(&p->p_token); 845 return (0); 846 } 847 848 /* 849 * Real interval timer expired: 850 * send process whose timer expired an alarm signal. 851 * If time is not set up to reload, then just return. 852 * Else compute next time timer should go off which is > current time. 853 * This is where delay in processing this timeout causes multiple 854 * SIGALRM calls to be compressed into one. 855 * tvtohz_high() always adds 1 to allow for the time until the next clock 856 * interrupt being strictly less than 1 clock tick, but we don't want 857 * that here since we want to appear to be in sync with the clock 858 * interrupt even when we're delayed. 859 */ 860 static 861 void 862 realitexpire(void *arg) 863 { 864 struct proc *p; 865 struct timeval ctv, ntv; 866 867 p = (struct proc *)arg; 868 PHOLD(p); 869 lwkt_gettoken(&p->p_token); 870 ksignal(p, SIGALRM); 871 if (!timevalisset(&p->p_realtimer.it_interval)) { 872 timevalclear(&p->p_realtimer.it_value); 873 goto done; 874 } 875 for (;;) { 876 timevaladd(&p->p_realtimer.it_value, 877 &p->p_realtimer.it_interval); 878 getmicrouptime(&ctv); 879 if (timevalcmp(&p->p_realtimer.it_value, &ctv, >)) { 880 ntv = p->p_realtimer.it_value; 881 timevalsub(&ntv, &ctv); 882 callout_reset(&p->p_ithandle, tvtohz_low(&ntv), 883 realitexpire, p); 884 goto done; 885 } 886 } 887 done: 888 lwkt_reltoken(&p->p_token); 889 PRELE(p); 890 } 891 892 /* 893 * Used to validate itimer timeouts and utimes*() timespecs. 894 */ 895 int 896 itimerfix(struct timeval *tv) 897 { 898 if (tv->tv_sec < 0 || tv->tv_usec < 0 || tv->tv_usec >= 1000000) 899 return (EINVAL); 900 if (tv->tv_sec == 0 && tv->tv_usec != 0 && tv->tv_usec < ustick) 901 tv->tv_usec = ustick; 902 return (0); 903 } 904 905 /* 906 * Used to validate timeouts and utimes*() timespecs. 907 */ 908 int 909 itimespecfix(struct timespec *ts) 910 { 911 if (ts->tv_sec < 0 || ts->tv_nsec < 0 || ts->tv_nsec >= 1000000000ULL) 912 return (EINVAL); 913 if (ts->tv_sec == 0 && ts->tv_nsec != 0 && ts->tv_nsec < nstick) 914 ts->tv_nsec = nstick; 915 return (0); 916 } 917 918 /* 919 * Decrement an interval timer by a specified number 920 * of microseconds, which must be less than a second, 921 * i.e. < 1000000. If the timer expires, then reload 922 * it. In this case, carry over (usec - old value) to 923 * reduce the value reloaded into the timer so that 924 * the timer does not drift. This routine assumes 925 * that it is called in a context where the timers 926 * on which it is operating cannot change in value. 927 */ 928 int 929 itimerdecr(struct itimerval *itp, int usec) 930 { 931 932 if (itp->it_value.tv_usec < usec) { 933 if (itp->it_value.tv_sec == 0) { 934 /* expired, and already in next interval */ 935 usec -= itp->it_value.tv_usec; 936 goto expire; 937 } 938 itp->it_value.tv_usec += 1000000; 939 itp->it_value.tv_sec--; 940 } 941 itp->it_value.tv_usec -= usec; 942 usec = 0; 943 if (timevalisset(&itp->it_value)) 944 return (1); 945 /* expired, exactly at end of interval */ 946 expire: 947 if (timevalisset(&itp->it_interval)) { 948 itp->it_value = itp->it_interval; 949 itp->it_value.tv_usec -= usec; 950 if (itp->it_value.tv_usec < 0) { 951 itp->it_value.tv_usec += 1000000; 952 itp->it_value.tv_sec--; 953 } 954 } else 955 itp->it_value.tv_usec = 0; /* sec is already 0 */ 956 return (0); 957 } 958 959 /* 960 * Add and subtract routines for timevals. 961 * N.B.: subtract routine doesn't deal with 962 * results which are before the beginning, 963 * it just gets very confused in this case. 964 * Caveat emptor. 965 */ 966 void 967 timevaladd(struct timeval *t1, const struct timeval *t2) 968 { 969 970 t1->tv_sec += t2->tv_sec; 971 t1->tv_usec += t2->tv_usec; 972 timevalfix(t1); 973 } 974 975 void 976 timevalsub(struct timeval *t1, const struct timeval *t2) 977 { 978 979 t1->tv_sec -= t2->tv_sec; 980 t1->tv_usec -= t2->tv_usec; 981 timevalfix(t1); 982 } 983 984 static void 985 timevalfix(struct timeval *t1) 986 { 987 988 if (t1->tv_usec < 0) { 989 t1->tv_sec--; 990 t1->tv_usec += 1000000; 991 } 992 if (t1->tv_usec >= 1000000) { 993 t1->tv_sec++; 994 t1->tv_usec -= 1000000; 995 } 996 } 997 998 /* 999 * ratecheck(): simple time-based rate-limit checking. 1000 */ 1001 int 1002 ratecheck(struct timeval *lasttime, const struct timeval *mininterval) 1003 { 1004 struct timeval tv, delta; 1005 int rv = 0; 1006 1007 getmicrouptime(&tv); /* NB: 10ms precision */ 1008 delta = tv; 1009 timevalsub(&delta, lasttime); 1010 1011 /* 1012 * check for 0,0 is so that the message will be seen at least once, 1013 * even if interval is huge. 1014 */ 1015 if (timevalcmp(&delta, mininterval, >=) || 1016 (lasttime->tv_sec == 0 && lasttime->tv_usec == 0)) { 1017 *lasttime = tv; 1018 rv = 1; 1019 } 1020 1021 return (rv); 1022 } 1023 1024 /* 1025 * ppsratecheck(): packets (or events) per second limitation. 1026 * 1027 * Return 0 if the limit is to be enforced (e.g. the caller 1028 * should drop a packet because of the rate limitation). 1029 * 1030 * maxpps of 0 always causes zero to be returned. maxpps of -1 1031 * always causes 1 to be returned; this effectively defeats rate 1032 * limiting. 1033 * 1034 * Note that we maintain the struct timeval for compatibility 1035 * with other bsd systems. We reuse the storage and just monitor 1036 * clock ticks for minimal overhead. 1037 */ 1038 int 1039 ppsratecheck(struct timeval *lasttime, int *curpps, int maxpps) 1040 { 1041 int now; 1042 1043 /* 1044 * Reset the last time and counter if this is the first call 1045 * or more than a second has passed since the last update of 1046 * lasttime. 1047 */ 1048 now = ticks; 1049 if (lasttime->tv_sec == 0 || (u_int)(now - lasttime->tv_sec) >= hz) { 1050 lasttime->tv_sec = now; 1051 *curpps = 1; 1052 return (maxpps != 0); 1053 } else { 1054 (*curpps)++; /* NB: ignore potential overflow */ 1055 return (maxpps < 0 || *curpps < maxpps); 1056 } 1057 } 1058