1 /* 2 * Copyright (c) 1982, 1986, 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. All advertising materials mentioning features or use of this software 14 * must display the following acknowledgement: 15 * This product includes software developed by the University of 16 * California, Berkeley and its contributors. 17 * 4. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 * @(#)kern_time.c 8.1 (Berkeley) 6/10/93 34 * $FreeBSD: src/sys/kern/kern_time.c,v 1.68.2.1 2002/10/01 08:00:41 bde Exp $ 35 */ 36 37 #include <sys/param.h> 38 #include <sys/systm.h> 39 #include <sys/buf.h> 40 #include <sys/sysproto.h> 41 #include <sys/resourcevar.h> 42 #include <sys/signalvar.h> 43 #include <sys/kernel.h> 44 #include <sys/sysent.h> 45 #include <sys/sysunion.h> 46 #include <sys/proc.h> 47 #include <sys/priv.h> 48 #include <sys/time.h> 49 #include <sys/vnode.h> 50 #include <sys/sysctl.h> 51 #include <sys/kern_syscall.h> 52 #include <vm/vm.h> 53 #include <vm/vm_extern.h> 54 55 #include <sys/msgport2.h> 56 #include <sys/thread2.h> 57 #include <sys/mplock2.h> 58 59 struct timezone tz; 60 61 /* 62 * Time of day and interval timer support. 63 * 64 * These routines provide the kernel entry points to get and set 65 * the time-of-day and per-process interval timers. Subroutines 66 * here provide support for adding and subtracting timeval structures 67 * and decrementing interval timers, optionally reloading the interval 68 * timers when they expire. 69 */ 70 71 static int settime(struct timeval *); 72 static void timevalfix(struct timeval *); 73 74 /* 75 * Nanosleep tries very hard to sleep for a precisely requested time 76 * interval, down to 1uS. The administrator can impose a minimum delay 77 * and a delay below which we hard-loop instead of initiate a timer 78 * interrupt and sleep. 79 * 80 * For machines under high loads it might be beneficial to increase min_us 81 * to e.g. 1000uS (1ms) so spining processes sleep meaningfully. 82 */ 83 static int nanosleep_min_us = 10; 84 static int nanosleep_hard_us = 100; 85 SYSCTL_INT(_kern, OID_AUTO, nanosleep_min_us, CTLFLAG_RW, 86 &nanosleep_min_us, 0, "") 87 SYSCTL_INT(_kern, OID_AUTO, nanosleep_hard_us, CTLFLAG_RW, 88 &nanosleep_hard_us, 0, "") 89 90 static int 91 settime(struct timeval *tv) 92 { 93 struct timeval delta, tv1, tv2; 94 static struct timeval maxtime, laststep; 95 struct timespec ts; 96 int origcpu; 97 98 if ((origcpu = mycpu->gd_cpuid) != 0) 99 lwkt_setcpu_self(globaldata_find(0)); 100 101 crit_enter(); 102 microtime(&tv1); 103 delta = *tv; 104 timevalsub(&delta, &tv1); 105 106 /* 107 * If the system is secure, we do not allow the time to be 108 * set to a value earlier than 1 second less than the highest 109 * time we have yet seen. The worst a miscreant can do in 110 * this circumstance is "freeze" time. He couldn't go 111 * back to the past. 112 * 113 * We similarly do not allow the clock to be stepped more 114 * than one second, nor more than once per second. This allows 115 * a miscreant to make the clock march double-time, but no worse. 116 */ 117 if (securelevel > 1) { 118 if (delta.tv_sec < 0 || delta.tv_usec < 0) { 119 /* 120 * Update maxtime to latest time we've seen. 121 */ 122 if (tv1.tv_sec > maxtime.tv_sec) 123 maxtime = tv1; 124 tv2 = *tv; 125 timevalsub(&tv2, &maxtime); 126 if (tv2.tv_sec < -1) { 127 tv->tv_sec = maxtime.tv_sec - 1; 128 kprintf("Time adjustment clamped to -1 second\n"); 129 } 130 } else { 131 if (tv1.tv_sec == laststep.tv_sec) { 132 crit_exit(); 133 return (EPERM); 134 } 135 if (delta.tv_sec > 1) { 136 tv->tv_sec = tv1.tv_sec + 1; 137 kprintf("Time adjustment clamped to +1 second\n"); 138 } 139 laststep = *tv; 140 } 141 } 142 143 ts.tv_sec = tv->tv_sec; 144 ts.tv_nsec = tv->tv_usec * 1000; 145 set_timeofday(&ts); 146 crit_exit(); 147 148 if (origcpu != 0) 149 lwkt_setcpu_self(globaldata_find(origcpu)); 150 151 resettodr(); 152 return (0); 153 } 154 155 /* 156 * MPSAFE 157 */ 158 int 159 kern_clock_gettime(clockid_t clock_id, struct timespec *ats) 160 { 161 int error = 0; 162 163 switch(clock_id) { 164 case CLOCK_REALTIME: 165 nanotime(ats); 166 break; 167 case CLOCK_MONOTONIC: 168 nanouptime(ats); 169 break; 170 default: 171 error = EINVAL; 172 break; 173 } 174 return (error); 175 } 176 177 /* 178 * MPSAFE 179 */ 180 int 181 sys_clock_gettime(struct clock_gettime_args *uap) 182 { 183 struct timespec ats; 184 int error; 185 186 error = kern_clock_gettime(uap->clock_id, &ats); 187 if (error == 0) 188 error = copyout(&ats, uap->tp, sizeof(ats)); 189 190 return (error); 191 } 192 193 int 194 kern_clock_settime(clockid_t clock_id, struct timespec *ats) 195 { 196 struct thread *td = curthread; 197 struct timeval atv; 198 int error; 199 200 if ((error = priv_check(td, PRIV_CLOCK_SETTIME)) != 0) 201 return (error); 202 if (clock_id != CLOCK_REALTIME) 203 return (EINVAL); 204 if (ats->tv_nsec < 0 || ats->tv_nsec >= 1000000000) 205 return (EINVAL); 206 207 TIMESPEC_TO_TIMEVAL(&atv, ats); 208 error = settime(&atv); 209 return (error); 210 } 211 212 /* 213 * MPALMOSTSAFE 214 */ 215 int 216 sys_clock_settime(struct clock_settime_args *uap) 217 { 218 struct timespec ats; 219 int error; 220 221 if ((error = copyin(uap->tp, &ats, sizeof(ats))) != 0) 222 return (error); 223 224 get_mplock(); 225 error = kern_clock_settime(uap->clock_id, &ats); 226 rel_mplock(); 227 return (error); 228 } 229 230 /* 231 * MPSAFE 232 */ 233 int 234 kern_clock_getres(clockid_t clock_id, struct timespec *ts) 235 { 236 int error; 237 238 switch(clock_id) { 239 case CLOCK_REALTIME: 240 case CLOCK_MONOTONIC: 241 /* 242 * Round up the result of the division cheaply 243 * by adding 1. Rounding up is especially important 244 * if rounding down would give 0. Perfect rounding 245 * is unimportant. 246 */ 247 ts->tv_sec = 0; 248 ts->tv_nsec = 1000000000 / sys_cputimer->freq + 1; 249 error = 0; 250 break; 251 default: 252 error = EINVAL; 253 break; 254 } 255 256 return(error); 257 } 258 259 /* 260 * MPSAFE 261 */ 262 int 263 sys_clock_getres(struct clock_getres_args *uap) 264 { 265 int error; 266 struct timespec ts; 267 268 error = kern_clock_getres(uap->clock_id, &ts); 269 if (error == 0) 270 error = copyout(&ts, uap->tp, sizeof(ts)); 271 272 return (error); 273 } 274 275 /* 276 * nanosleep1() 277 * 278 * This is a general helper function for nanosleep() (aka sleep() aka 279 * usleep()). 280 * 281 * If there is less then one tick's worth of time left and 282 * we haven't done a yield, or the remaining microseconds is 283 * ridiculously low, do a yield. This avoids having 284 * to deal with systimer overheads when the system is under 285 * heavy loads. If we have done a yield already then use 286 * a systimer and an uninterruptable thread wait. 287 * 288 * If there is more then a tick's worth of time left, 289 * calculate the baseline ticks and use an interruptable 290 * tsleep, then handle the fine-grained delay on the next 291 * loop. This usually results in two sleeps occuring, a long one 292 * and a short one. 293 * 294 * MPSAFE 295 */ 296 static void 297 ns1_systimer(systimer_t info, int in_ipi __unused, 298 struct intrframe *frame __unused) 299 { 300 lwkt_schedule(info->data); 301 } 302 303 int 304 nanosleep1(struct timespec *rqt, struct timespec *rmt) 305 { 306 static int nanowait; 307 struct timespec ts, ts2, ts3; 308 struct timeval tv; 309 int error; 310 311 if (rqt->tv_nsec < 0 || rqt->tv_nsec >= 1000000000) 312 return (EINVAL); 313 /* XXX: imho this should return EINVAL at least for tv_sec < 0 */ 314 if (rqt->tv_sec < 0 || (rqt->tv_sec == 0 && rqt->tv_nsec == 0)) 315 return (0); 316 nanouptime(&ts); 317 timespecadd(&ts, rqt); /* ts = target timestamp compare */ 318 TIMESPEC_TO_TIMEVAL(&tv, rqt); /* tv = sleep interval */ 319 320 for (;;) { 321 int ticks; 322 struct systimer info; 323 324 ticks = tv.tv_usec / ustick; /* approximate */ 325 326 if (tv.tv_sec == 0 && ticks == 0) { 327 thread_t td = curthread; 328 if (tv.tv_usec > 0 && tv.tv_usec < nanosleep_min_us) 329 tv.tv_usec = nanosleep_min_us; 330 if (tv.tv_usec < nanosleep_hard_us) { 331 lwkt_user_yield(); 332 cpu_pause(); 333 } else { 334 crit_enter_quick(td); 335 systimer_init_oneshot(&info, ns1_systimer, 336 td, tv.tv_usec); 337 lwkt_deschedule_self(td); 338 crit_exit_quick(td); 339 lwkt_switch(); 340 systimer_del(&info); /* make sure it's gone */ 341 } 342 error = iscaught(td->td_lwp); 343 } else if (tv.tv_sec == 0) { 344 error = tsleep(&nanowait, PCATCH, "nanslp", ticks); 345 } else { 346 ticks = tvtohz_low(&tv); /* also handles overflow */ 347 error = tsleep(&nanowait, PCATCH, "nanslp", ticks); 348 } 349 nanouptime(&ts2); 350 if (error && error != EWOULDBLOCK) { 351 if (error == ERESTART) 352 error = EINTR; 353 if (rmt != NULL) { 354 timespecsub(&ts, &ts2); 355 if (ts.tv_sec < 0) 356 timespecclear(&ts); 357 *rmt = ts; 358 } 359 return (error); 360 } 361 if (timespeccmp(&ts2, &ts, >=)) 362 return (0); 363 ts3 = ts; 364 timespecsub(&ts3, &ts2); 365 TIMESPEC_TO_TIMEVAL(&tv, &ts3); 366 } 367 } 368 369 /* 370 * MPSAFE 371 */ 372 int 373 sys_nanosleep(struct nanosleep_args *uap) 374 { 375 int error; 376 struct timespec rqt; 377 struct timespec rmt; 378 379 error = copyin(uap->rqtp, &rqt, sizeof(rqt)); 380 if (error) 381 return (error); 382 383 error = nanosleep1(&rqt, &rmt); 384 385 /* 386 * copyout the residual if nanosleep was interrupted. 387 */ 388 if (error && uap->rmtp) { 389 int error2; 390 391 error2 = copyout(&rmt, uap->rmtp, sizeof(rmt)); 392 if (error2) 393 error = error2; 394 } 395 return (error); 396 } 397 398 /* 399 * MPSAFE 400 */ 401 int 402 sys_gettimeofday(struct gettimeofday_args *uap) 403 { 404 struct timeval atv; 405 int error = 0; 406 407 if (uap->tp) { 408 microtime(&atv); 409 if ((error = copyout((caddr_t)&atv, (caddr_t)uap->tp, 410 sizeof (atv)))) 411 return (error); 412 } 413 if (uap->tzp) 414 error = copyout((caddr_t)&tz, (caddr_t)uap->tzp, 415 sizeof (tz)); 416 return (error); 417 } 418 419 /* 420 * MPALMOSTSAFE 421 */ 422 int 423 sys_settimeofday(struct settimeofday_args *uap) 424 { 425 struct thread *td = curthread; 426 struct timeval atv; 427 struct timezone atz; 428 int error; 429 430 if ((error = priv_check(td, PRIV_SETTIMEOFDAY))) 431 return (error); 432 /* Verify all parameters before changing time. */ 433 if (uap->tv) { 434 if ((error = copyin((caddr_t)uap->tv, (caddr_t)&atv, 435 sizeof(atv)))) 436 return (error); 437 if (atv.tv_usec < 0 || atv.tv_usec >= 1000000) 438 return (EINVAL); 439 } 440 if (uap->tzp && 441 (error = copyin((caddr_t)uap->tzp, (caddr_t)&atz, sizeof(atz)))) 442 return (error); 443 444 get_mplock(); 445 if (uap->tv && (error = settime(&atv))) { 446 rel_mplock(); 447 return (error); 448 } 449 rel_mplock(); 450 if (uap->tzp) 451 tz = atz; 452 return (0); 453 } 454 455 static void 456 kern_adjtime_common(void) 457 { 458 if ((ntp_delta >= 0 && ntp_delta < ntp_default_tick_delta) || 459 (ntp_delta < 0 && ntp_delta > -ntp_default_tick_delta)) 460 ntp_tick_delta = ntp_delta; 461 else if (ntp_delta > ntp_big_delta) 462 ntp_tick_delta = 10 * ntp_default_tick_delta; 463 else if (ntp_delta < -ntp_big_delta) 464 ntp_tick_delta = -10 * ntp_default_tick_delta; 465 else if (ntp_delta > 0) 466 ntp_tick_delta = ntp_default_tick_delta; 467 else 468 ntp_tick_delta = -ntp_default_tick_delta; 469 } 470 471 void 472 kern_adjtime(int64_t delta, int64_t *odelta) 473 { 474 int origcpu; 475 476 if ((origcpu = mycpu->gd_cpuid) != 0) 477 lwkt_setcpu_self(globaldata_find(0)); 478 479 crit_enter(); 480 *odelta = ntp_delta; 481 ntp_delta = delta; 482 kern_adjtime_common(); 483 crit_exit(); 484 485 if (origcpu != 0) 486 lwkt_setcpu_self(globaldata_find(origcpu)); 487 } 488 489 static void 490 kern_get_ntp_delta(int64_t *delta) 491 { 492 int origcpu; 493 494 if ((origcpu = mycpu->gd_cpuid) != 0) 495 lwkt_setcpu_self(globaldata_find(0)); 496 497 crit_enter(); 498 *delta = ntp_delta; 499 crit_exit(); 500 501 if (origcpu != 0) 502 lwkt_setcpu_self(globaldata_find(origcpu)); 503 } 504 505 void 506 kern_reladjtime(int64_t delta) 507 { 508 int origcpu; 509 510 if ((origcpu = mycpu->gd_cpuid) != 0) 511 lwkt_setcpu_self(globaldata_find(0)); 512 513 crit_enter(); 514 ntp_delta += delta; 515 kern_adjtime_common(); 516 crit_exit(); 517 518 if (origcpu != 0) 519 lwkt_setcpu_self(globaldata_find(origcpu)); 520 } 521 522 static void 523 kern_adjfreq(int64_t rate) 524 { 525 int origcpu; 526 527 if ((origcpu = mycpu->gd_cpuid) != 0) 528 lwkt_setcpu_self(globaldata_find(0)); 529 530 crit_enter(); 531 ntp_tick_permanent = rate; 532 crit_exit(); 533 534 if (origcpu != 0) 535 lwkt_setcpu_self(globaldata_find(origcpu)); 536 } 537 538 /* 539 * MPALMOSTSAFE 540 */ 541 int 542 sys_adjtime(struct adjtime_args *uap) 543 { 544 struct thread *td = curthread; 545 struct timeval atv; 546 int64_t ndelta, odelta; 547 int error; 548 549 if ((error = priv_check(td, PRIV_ADJTIME))) 550 return (error); 551 error = copyin(uap->delta, &atv, sizeof(struct timeval)); 552 if (error) 553 return (error); 554 555 /* 556 * Compute the total correction and the rate at which to apply it. 557 * Round the adjustment down to a whole multiple of the per-tick 558 * delta, so that after some number of incremental changes in 559 * hardclock(), tickdelta will become zero, lest the correction 560 * overshoot and start taking us away from the desired final time. 561 */ 562 ndelta = (int64_t)atv.tv_sec * 1000000000 + atv.tv_usec * 1000; 563 get_mplock(); 564 kern_adjtime(ndelta, &odelta); 565 rel_mplock(); 566 567 if (uap->olddelta) { 568 atv.tv_sec = odelta / 1000000000; 569 atv.tv_usec = odelta % 1000000000 / 1000; 570 copyout(&atv, uap->olddelta, sizeof(struct timeval)); 571 } 572 return (0); 573 } 574 575 static int 576 sysctl_adjtime(SYSCTL_HANDLER_ARGS) 577 { 578 int64_t delta; 579 int error; 580 581 if (req->newptr != NULL) { 582 if (priv_check(curthread, PRIV_ROOT)) 583 return (EPERM); 584 error = SYSCTL_IN(req, &delta, sizeof(delta)); 585 if (error) 586 return (error); 587 kern_reladjtime(delta); 588 } 589 590 if (req->oldptr) 591 kern_get_ntp_delta(&delta); 592 error = SYSCTL_OUT(req, &delta, sizeof(delta)); 593 return (error); 594 } 595 596 /* 597 * delta is in nanoseconds. 598 */ 599 static int 600 sysctl_delta(SYSCTL_HANDLER_ARGS) 601 { 602 int64_t delta, old_delta; 603 int error; 604 605 if (req->newptr != NULL) { 606 if (priv_check(curthread, PRIV_ROOT)) 607 return (EPERM); 608 error = SYSCTL_IN(req, &delta, sizeof(delta)); 609 if (error) 610 return (error); 611 kern_adjtime(delta, &old_delta); 612 } 613 614 if (req->oldptr != NULL) 615 kern_get_ntp_delta(&old_delta); 616 error = SYSCTL_OUT(req, &old_delta, sizeof(old_delta)); 617 return (error); 618 } 619 620 /* 621 * frequency is in nanoseconds per second shifted left 32. 622 * kern_adjfreq() needs it in nanoseconds per tick shifted left 32. 623 */ 624 static int 625 sysctl_adjfreq(SYSCTL_HANDLER_ARGS) 626 { 627 int64_t freqdelta; 628 int error; 629 630 if (req->newptr != NULL) { 631 if (priv_check(curthread, PRIV_ROOT)) 632 return (EPERM); 633 error = SYSCTL_IN(req, &freqdelta, sizeof(freqdelta)); 634 if (error) 635 return (error); 636 637 freqdelta /= hz; 638 kern_adjfreq(freqdelta); 639 } 640 641 if (req->oldptr != NULL) 642 freqdelta = ntp_tick_permanent * hz; 643 error = SYSCTL_OUT(req, &freqdelta, sizeof(freqdelta)); 644 if (error) 645 return (error); 646 647 return (0); 648 } 649 650 SYSCTL_NODE(_kern, OID_AUTO, ntp, CTLFLAG_RW, 0, "NTP related controls"); 651 SYSCTL_PROC(_kern_ntp, OID_AUTO, permanent, 652 CTLTYPE_QUAD|CTLFLAG_RW, 0, 0, 653 sysctl_adjfreq, "Q", "permanent correction per second"); 654 SYSCTL_PROC(_kern_ntp, OID_AUTO, delta, 655 CTLTYPE_QUAD|CTLFLAG_RW, 0, 0, 656 sysctl_delta, "Q", "one-time delta"); 657 SYSCTL_OPAQUE(_kern_ntp, OID_AUTO, big_delta, CTLFLAG_RD, 658 &ntp_big_delta, sizeof(ntp_big_delta), "Q", 659 "threshold for fast adjustment"); 660 SYSCTL_OPAQUE(_kern_ntp, OID_AUTO, tick_delta, CTLFLAG_RD, 661 &ntp_tick_delta, sizeof(ntp_tick_delta), "LU", 662 "per-tick adjustment"); 663 SYSCTL_OPAQUE(_kern_ntp, OID_AUTO, default_tick_delta, CTLFLAG_RD, 664 &ntp_default_tick_delta, sizeof(ntp_default_tick_delta), "LU", 665 "default per-tick adjustment"); 666 SYSCTL_OPAQUE(_kern_ntp, OID_AUTO, next_leap_second, CTLFLAG_RW, 667 &ntp_leap_second, sizeof(ntp_leap_second), "LU", 668 "next leap second"); 669 SYSCTL_INT(_kern_ntp, OID_AUTO, insert_leap_second, CTLFLAG_RW, 670 &ntp_leap_insert, 0, "insert or remove leap second"); 671 SYSCTL_PROC(_kern_ntp, OID_AUTO, adjust, 672 CTLTYPE_QUAD|CTLFLAG_RW, 0, 0, 673 sysctl_adjtime, "Q", "relative adjust for delta"); 674 675 /* 676 * Get value of an interval timer. The process virtual and 677 * profiling virtual time timers are kept in the p_stats area, since 678 * they can be swapped out. These are kept internally in the 679 * way they are specified externally: in time until they expire. 680 * 681 * The real time interval timer is kept in the process table slot 682 * for the process, and its value (it_value) is kept as an 683 * absolute time rather than as a delta, so that it is easy to keep 684 * periodic real-time signals from drifting. 685 * 686 * Virtual time timers are processed in the hardclock() routine of 687 * kern_clock.c. The real time timer is processed by a timeout 688 * routine, called from the softclock() routine. Since a callout 689 * may be delayed in real time due to interrupt processing in the system, 690 * it is possible for the real time timeout routine (realitexpire, given below), 691 * to be delayed in real time past when it is supposed to occur. It 692 * does not suffice, therefore, to reload the real timer .it_value from the 693 * real time timers .it_interval. Rather, we compute the next time in 694 * absolute time the timer should go off. 695 * 696 * MPALMOSTSAFE 697 */ 698 int 699 sys_getitimer(struct getitimer_args *uap) 700 { 701 struct proc *p = curproc; 702 struct timeval ctv; 703 struct itimerval aitv; 704 705 if (uap->which > ITIMER_PROF) 706 return (EINVAL); 707 lwkt_gettoken(&p->p_token); 708 if (uap->which == ITIMER_REAL) { 709 /* 710 * Convert from absolute to relative time in .it_value 711 * part of real time timer. If time for real time timer 712 * has passed return 0, else return difference between 713 * current time and time for the timer to go off. 714 */ 715 aitv = p->p_realtimer; 716 if (timevalisset(&aitv.it_value)) { 717 getmicrouptime(&ctv); 718 if (timevalcmp(&aitv.it_value, &ctv, <)) 719 timevalclear(&aitv.it_value); 720 else 721 timevalsub(&aitv.it_value, &ctv); 722 } 723 } else { 724 aitv = p->p_timer[uap->which]; 725 } 726 lwkt_reltoken(&p->p_token); 727 return (copyout(&aitv, uap->itv, sizeof (struct itimerval))); 728 } 729 730 /* 731 * MPALMOSTSAFE 732 */ 733 int 734 sys_setitimer(struct setitimer_args *uap) 735 { 736 struct itimerval aitv; 737 struct timeval ctv; 738 struct itimerval *itvp; 739 struct proc *p = curproc; 740 int error; 741 742 if (uap->which > ITIMER_PROF) 743 return (EINVAL); 744 itvp = uap->itv; 745 if (itvp && (error = copyin((caddr_t)itvp, (caddr_t)&aitv, 746 sizeof(struct itimerval)))) 747 return (error); 748 if ((uap->itv = uap->oitv) && 749 (error = sys_getitimer((struct getitimer_args *)uap))) 750 return (error); 751 if (itvp == NULL) 752 return (0); 753 if (itimerfix(&aitv.it_value)) 754 return (EINVAL); 755 if (!timevalisset(&aitv.it_value)) 756 timevalclear(&aitv.it_interval); 757 else if (itimerfix(&aitv.it_interval)) 758 return (EINVAL); 759 lwkt_gettoken(&p->p_token); 760 if (uap->which == ITIMER_REAL) { 761 if (timevalisset(&p->p_realtimer.it_value)) 762 callout_stop_sync(&p->p_ithandle); 763 if (timevalisset(&aitv.it_value)) 764 callout_reset(&p->p_ithandle, 765 tvtohz_high(&aitv.it_value), realitexpire, p); 766 getmicrouptime(&ctv); 767 timevaladd(&aitv.it_value, &ctv); 768 p->p_realtimer = aitv; 769 } else { 770 p->p_timer[uap->which] = aitv; 771 switch(uap->which) { 772 case ITIMER_VIRTUAL: 773 p->p_flags &= ~P_SIGVTALRM; 774 break; 775 case ITIMER_PROF: 776 p->p_flags &= ~P_SIGPROF; 777 break; 778 } 779 } 780 lwkt_reltoken(&p->p_token); 781 return (0); 782 } 783 784 /* 785 * Real interval timer expired: 786 * send process whose timer expired an alarm signal. 787 * If time is not set up to reload, then just return. 788 * Else compute next time timer should go off which is > current time. 789 * This is where delay in processing this timeout causes multiple 790 * SIGALRM calls to be compressed into one. 791 * tvtohz_high() always adds 1 to allow for the time until the next clock 792 * interrupt being strictly less than 1 clock tick, but we don't want 793 * that here since we want to appear to be in sync with the clock 794 * interrupt even when we're delayed. 795 */ 796 void 797 realitexpire(void *arg) 798 { 799 struct proc *p; 800 struct timeval ctv, ntv; 801 802 p = (struct proc *)arg; 803 PHOLD(p); 804 lwkt_gettoken(&p->p_token); 805 ksignal(p, SIGALRM); 806 if (!timevalisset(&p->p_realtimer.it_interval)) { 807 timevalclear(&p->p_realtimer.it_value); 808 goto done; 809 } 810 for (;;) { 811 timevaladd(&p->p_realtimer.it_value, 812 &p->p_realtimer.it_interval); 813 getmicrouptime(&ctv); 814 if (timevalcmp(&p->p_realtimer.it_value, &ctv, >)) { 815 ntv = p->p_realtimer.it_value; 816 timevalsub(&ntv, &ctv); 817 callout_reset(&p->p_ithandle, tvtohz_low(&ntv), 818 realitexpire, p); 819 goto done; 820 } 821 } 822 done: 823 lwkt_reltoken(&p->p_token); 824 PRELE(p); 825 } 826 827 /* 828 * Check that a proposed value to load into the .it_value or 829 * .it_interval part of an interval timer is acceptable, and 830 * fix it to have at least minimal value (i.e. if it is less 831 * than the resolution of the clock, round it up.) 832 * 833 * MPSAFE 834 */ 835 int 836 itimerfix(struct timeval *tv) 837 { 838 839 if (tv->tv_sec < 0 || tv->tv_sec > 100000000 || 840 tv->tv_usec < 0 || tv->tv_usec >= 1000000) 841 return (EINVAL); 842 if (tv->tv_sec == 0 && tv->tv_usec != 0 && tv->tv_usec < ustick) 843 tv->tv_usec = ustick; 844 return (0); 845 } 846 847 /* 848 * Decrement an interval timer by a specified number 849 * of microseconds, which must be less than a second, 850 * i.e. < 1000000. If the timer expires, then reload 851 * it. In this case, carry over (usec - old value) to 852 * reduce the value reloaded into the timer so that 853 * the timer does not drift. This routine assumes 854 * that it is called in a context where the timers 855 * on which it is operating cannot change in value. 856 */ 857 int 858 itimerdecr(struct itimerval *itp, int usec) 859 { 860 861 if (itp->it_value.tv_usec < usec) { 862 if (itp->it_value.tv_sec == 0) { 863 /* expired, and already in next interval */ 864 usec -= itp->it_value.tv_usec; 865 goto expire; 866 } 867 itp->it_value.tv_usec += 1000000; 868 itp->it_value.tv_sec--; 869 } 870 itp->it_value.tv_usec -= usec; 871 usec = 0; 872 if (timevalisset(&itp->it_value)) 873 return (1); 874 /* expired, exactly at end of interval */ 875 expire: 876 if (timevalisset(&itp->it_interval)) { 877 itp->it_value = itp->it_interval; 878 itp->it_value.tv_usec -= usec; 879 if (itp->it_value.tv_usec < 0) { 880 itp->it_value.tv_usec += 1000000; 881 itp->it_value.tv_sec--; 882 } 883 } else 884 itp->it_value.tv_usec = 0; /* sec is already 0 */ 885 return (0); 886 } 887 888 /* 889 * Add and subtract routines for timevals. 890 * N.B.: subtract routine doesn't deal with 891 * results which are before the beginning, 892 * it just gets very confused in this case. 893 * Caveat emptor. 894 */ 895 void 896 timevaladd(struct timeval *t1, const struct timeval *t2) 897 { 898 899 t1->tv_sec += t2->tv_sec; 900 t1->tv_usec += t2->tv_usec; 901 timevalfix(t1); 902 } 903 904 void 905 timevalsub(struct timeval *t1, const struct timeval *t2) 906 { 907 908 t1->tv_sec -= t2->tv_sec; 909 t1->tv_usec -= t2->tv_usec; 910 timevalfix(t1); 911 } 912 913 static void 914 timevalfix(struct timeval *t1) 915 { 916 917 if (t1->tv_usec < 0) { 918 t1->tv_sec--; 919 t1->tv_usec += 1000000; 920 } 921 if (t1->tv_usec >= 1000000) { 922 t1->tv_sec++; 923 t1->tv_usec -= 1000000; 924 } 925 } 926 927 /* 928 * ratecheck(): simple time-based rate-limit checking. 929 */ 930 int 931 ratecheck(struct timeval *lasttime, const struct timeval *mininterval) 932 { 933 struct timeval tv, delta; 934 int rv = 0; 935 936 getmicrouptime(&tv); /* NB: 10ms precision */ 937 delta = tv; 938 timevalsub(&delta, lasttime); 939 940 /* 941 * check for 0,0 is so that the message will be seen at least once, 942 * even if interval is huge. 943 */ 944 if (timevalcmp(&delta, mininterval, >=) || 945 (lasttime->tv_sec == 0 && lasttime->tv_usec == 0)) { 946 *lasttime = tv; 947 rv = 1; 948 } 949 950 return (rv); 951 } 952 953 /* 954 * ppsratecheck(): packets (or events) per second limitation. 955 * 956 * Return 0 if the limit is to be enforced (e.g. the caller 957 * should drop a packet because of the rate limitation). 958 * 959 * maxpps of 0 always causes zero to be returned. maxpps of -1 960 * always causes 1 to be returned; this effectively defeats rate 961 * limiting. 962 * 963 * Note that we maintain the struct timeval for compatibility 964 * with other bsd systems. We reuse the storage and just monitor 965 * clock ticks for minimal overhead. 966 */ 967 int 968 ppsratecheck(struct timeval *lasttime, int *curpps, int maxpps) 969 { 970 int now; 971 972 /* 973 * Reset the last time and counter if this is the first call 974 * or more than a second has passed since the last update of 975 * lasttime. 976 */ 977 now = ticks; 978 if (lasttime->tv_sec == 0 || (u_int)(now - lasttime->tv_sec) >= hz) { 979 lasttime->tv_sec = now; 980 *curpps = 1; 981 return (maxpps != 0); 982 } else { 983 (*curpps)++; /* NB: ignore potential overflow */ 984 return (maxpps < 0 || *curpps < maxpps); 985 } 986 } 987 988