1 /*- 2 * Copyright (c) 1982, 1986, 1991 The Regents of the University of California. 3 * All rights reserved. 4 * 5 * %sccs.include.redist.c% 6 * 7 * @(#)kern_clock.c 7.21 (Berkeley) 06/20/92 8 */ 9 10 #include "param.h" 11 #include "systm.h" 12 #include "dkstat.h" 13 #include "callout.h" 14 #include "kernel.h" 15 #include "proc.h" 16 #include "resourcevar.h" 17 18 #include "machine/cpu.h" 19 20 #ifdef GPROF 21 #include "gprof.h" 22 #endif 23 24 /* 25 * Clock handling routines. 26 * 27 * This code is written to operate with two timers which run 28 * independently of each other. The main clock, running at hz 29 * times per second, is used to do scheduling and timeout calculations. 30 * The second timer does resource utilization estimation statistically 31 * based on the state of the machine stathz times a second. Both functions 32 * can be performed by a single clock (ie hz == stathz), however the 33 * statistics will be much more prone to errors. Ideally a machine 34 * would have separate clocks measuring time spent in user state, system 35 * state, interrupt state, and idle state. These clocks would allow a non- 36 * approximate measure of resource utilization. 37 */ 38 39 /* 40 * TODO: 41 * time of day, system/user timing, timeouts, profiling on separate timers 42 * allocate more timeout table slots when table overflows. 43 */ 44 45 /* 46 * Bump a timeval by a small number of usec's. 47 */ 48 #define BUMPTIME(t, usec) { \ 49 register struct timeval *tp = (t); \ 50 \ 51 tp->tv_usec += (usec); \ 52 if (tp->tv_usec >= 1000000) { \ 53 tp->tv_usec -= 1000000; \ 54 tp->tv_sec++; \ 55 } \ 56 } 57 58 int ticks; 59 int stathz; 60 int profhz; 61 int profprocs; 62 struct timeval time; 63 struct timeval mono_time; 64 /* 65 * The hz hardware interval timer. 66 * We update the events relating to real time. 67 * If this timer is also being used to gather statistics, 68 * we run through the statistics gathering routine as well. 69 */ 70 hardclock(frame) 71 clockframe frame; 72 { 73 register struct callout *p1; 74 register struct proc *p = curproc; 75 register struct pstats *pstats; 76 register int s; 77 int needsoft = 0; 78 time_t secs; 79 extern int tickdelta; 80 extern long timedelta; 81 82 /* 83 * Update real-time timeout queue. 84 * At front of queue are some number of events which are ``due''. 85 * The time to these is <= 0 and if negative represents the 86 * number of ticks which have passed since it was supposed to happen. 87 * The rest of the q elements (times > 0) are events yet to happen, 88 * where the time for each is given as a delta from the previous. 89 * Decrementing just the first of these serves to decrement the time 90 * to all events. 91 */ 92 p1 = calltodo.c_next; 93 while (p1) { 94 if (--p1->c_time > 0) 95 break; 96 needsoft = 1; 97 if (p1->c_time == 0) 98 break; 99 p1 = p1->c_next; 100 } 101 102 /* 103 * Curproc (now in p) is null if no process is running. 104 * We assume that curproc is set in user mode! 105 */ 106 if (p) 107 pstats = p->p_stats; 108 /* 109 * Charge the time out based on the mode the cpu is in. 110 * Here again we fudge for the lack of proper interval timers 111 * assuming that the current state has been around at least 112 * one tick. 113 */ 114 if (CLKF_USERMODE(&frame)) { 115 if (pstats->p_prof.pr_scale) 116 needsoft = 1; 117 /* 118 * CPU was in user state. Increment 119 * user time counter, and process process-virtual time 120 * interval timer. 121 */ 122 BUMPTIME(&p->p_utime, tick); 123 if (timerisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value) && 124 itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL], tick) == 0) 125 psignal(p, SIGVTALRM); 126 } else { 127 /* 128 * CPU was in system state. 129 */ 130 if (p) 131 BUMPTIME(&p->p_stime, tick); 132 } 133 134 /* 135 * If the cpu is currently scheduled to a process, then 136 * charge it with resource utilization for a tick, updating 137 * statistics which run in (user+system) virtual time, 138 * such as the cpu time limit and profiling timers. 139 * This assumes that the current process has been running 140 * the entire last tick. 141 */ 142 if (p) { 143 secs = p->p_utime.tv_sec + p->p_stime.tv_sec + 1; 144 if (secs > p->p_rlimit[RLIMIT_CPU].rlim_cur) { 145 if (secs > p->p_rlimit[RLIMIT_CPU].rlim_max) 146 psignal(p, SIGKILL); 147 else { 148 psignal(p, SIGXCPU); 149 if (p->p_rlimit[RLIMIT_CPU].rlim_cur < 150 p->p_rlimit[RLIMIT_CPU].rlim_max) 151 p->p_rlimit[RLIMIT_CPU].rlim_cur += 5; 152 } 153 } 154 if (timerisset(&pstats->p_timer[ITIMER_PROF].it_value) && 155 itimerdecr(&pstats->p_timer[ITIMER_PROF], tick) == 0) 156 psignal(p, SIGPROF); 157 158 /* 159 * We adjust the priority of the current process. 160 * The priority of a process gets worse as it accumulates 161 * CPU time. The cpu usage estimator (p_cpu) is increased here 162 * and the formula for computing priorities (in kern_synch.c) 163 * will compute a different value each time the p_cpu increases 164 * by 4. The cpu usage estimator ramps up quite quickly when 165 * the process is running (linearly), and decays away 166 * exponentially, * at a rate which is proportionally slower 167 * when the system is busy. The basic principal is that the 168 * system will 90% forget that a process used a lot of CPU 169 * time in 5*loadav seconds. This causes the system to favor 170 * processes which haven't run much recently, and to 171 * round-robin among other processes. 172 */ 173 p->p_cpticks++; 174 if (++p->p_cpu == 0) 175 p->p_cpu--; 176 if ((p->p_cpu&3) == 0) { 177 setpri(p); 178 if (p->p_pri >= PUSER) 179 p->p_pri = p->p_usrpri; 180 } 181 } 182 183 /* 184 * If the alternate clock has not made itself known then 185 * we must gather the statistics. 186 */ 187 if (stathz == 0) 188 gatherstats(&frame); 189 190 /* 191 * Increment the time-of-day, and schedule 192 * processing of the callouts at a very low cpu priority, 193 * so we don't keep the relatively high clock interrupt 194 * priority any longer than necessary. 195 */ 196 ticks++; 197 if (timedelta == 0) { 198 BUMPTIME(&time, tick) 199 BUMPTIME(&mono_time, tick) 200 } else { 201 register delta; 202 203 if (timedelta < 0) { 204 delta = tick - tickdelta; 205 timedelta += tickdelta; 206 } else { 207 delta = tick + tickdelta; 208 timedelta -= tickdelta; 209 } 210 BUMPTIME(&time, delta); 211 BUMPTIME(&mono_time, delta) 212 } 213 if (needsoft) { 214 if (CLKF_BASEPRI(&frame)) { 215 /* 216 * Save the overhead of a software interrupt; 217 * it will happen as soon as we return, so do it now. 218 */ 219 (void) splsoftclock(); 220 softclock(frame); 221 } else 222 setsoftclock(); 223 } 224 } 225 226 int dk_ndrive = DK_NDRIVE; 227 /* 228 * Gather statistics on resource utilization. 229 * 230 * We make a gross assumption: that the system has been in the 231 * state it is in (user state, kernel state, interrupt state, 232 * or idle state) for the entire last time interval, and 233 * update statistics accordingly. 234 */ 235 gatherstats(framep) 236 clockframe *framep; 237 { 238 register int cpstate, s; 239 240 /* 241 * Determine what state the cpu is in. 242 */ 243 if (CLKF_USERMODE(framep)) { 244 /* 245 * CPU was in user state. 246 */ 247 if (curproc->p_nice > NZERO) 248 cpstate = CP_NICE; 249 else 250 cpstate = CP_USER; 251 } else { 252 /* 253 * CPU was in system state. If profiling kernel 254 * increment a counter. If no process is running 255 * then this is a system tick if we were running 256 * at a non-zero IPL (in a driver). If a process is running, 257 * then we charge it with system time even if we were 258 * at a non-zero IPL, since the system often runs 259 * this way during processing of system calls. 260 * This is approximate, but the lack of true interval 261 * timers makes doing anything else difficult. 262 */ 263 cpstate = CP_SYS; 264 if (curproc == NULL && CLKF_BASEPRI(framep)) 265 cpstate = CP_IDLE; 266 #ifdef GPROF 267 s = CLKF_PC(framep) - s_lowpc; 268 if (profiling < 2 && s < s_textsize) 269 kcount[s / (HISTFRACTION * sizeof (*kcount))]++; 270 #endif 271 } 272 /* 273 * We maintain statistics shown by user-level statistics 274 * programs: the amount of time in each cpu state, and 275 * the amount of time each of DK_NDRIVE ``drives'' is busy. 276 */ 277 cp_time[cpstate]++; 278 for (s = 0; s < DK_NDRIVE; s++) 279 if (dk_busy&(1<<s)) 280 dk_time[s]++; 281 } 282 283 /* 284 * Software priority level clock interrupt. 285 * Run periodic events from timeout queue. 286 */ 287 /*ARGSUSED*/ 288 softclock(frame) 289 clockframe frame; 290 { 291 292 for (;;) { 293 register struct callout *p1; 294 register caddr_t arg; 295 register int (*func)(); 296 register int a, s; 297 298 s = splhigh(); 299 if ((p1 = calltodo.c_next) == 0 || p1->c_time > 0) { 300 splx(s); 301 break; 302 } 303 arg = p1->c_arg; func = p1->c_func; a = p1->c_time; 304 calltodo.c_next = p1->c_next; 305 p1->c_next = callfree; 306 callfree = p1; 307 splx(s); 308 (*func)(arg, a); 309 } 310 /* 311 * If trapped user-mode and profiling, give it 312 * a profiling tick. 313 */ 314 if (CLKF_USERMODE(&frame)) { 315 register struct proc *p = curproc; 316 317 if (p->p_stats->p_prof.pr_scale) 318 profile_tick(p, &frame); 319 /* 320 * Check to see if process has accumulated 321 * more than 10 minutes of user time. If so 322 * reduce priority to give others a chance. 323 */ 324 if (p->p_ucred->cr_uid && p->p_nice == NZERO && 325 p->p_utime.tv_sec > 10 * 60) { 326 p->p_nice = NZERO + 4; 327 setpri(p); 328 p->p_pri = p->p_usrpri; 329 } 330 } 331 } 332 333 /* 334 * Notification of start of profiling clock 335 * 336 * Kernel profiling passes proc0 which never exits and hence 337 * keeps the profile clock running constantly. 338 */ 339 startprofclock(p) 340 struct proc *p; 341 { 342 343 if (p->p_flag & SPROFIL) 344 return; 345 profprocs++; 346 p->p_flag |= SPROFIL; 347 #ifdef PROFTIMER 348 initprofclock(profprocs); 349 #else 350 profhz = hz; 351 #endif 352 } 353 354 /* 355 * Notification of stopping of profile clock 356 */ 357 stopprofclock(p) 358 struct proc *p; 359 { 360 361 if ((p->p_flag & SPROFIL) == 0) 362 return; 363 profprocs--; 364 p->p_flag &= ~SPROFIL; 365 #ifdef PROFTIMER 366 initprofclock(profprocs); 367 #endif 368 } 369 370 /* 371 * Arrange that (*func)(arg) is called in t/hz seconds. 372 */ 373 timeout(func, arg, t) 374 int (*func)(); 375 caddr_t arg; 376 register int t; 377 { 378 register struct callout *p1, *p2, *pnew; 379 register int s = splhigh(); 380 381 if (t <= 0) 382 t = 1; 383 pnew = callfree; 384 if (pnew == NULL) 385 panic("timeout table overflow"); 386 callfree = pnew->c_next; 387 pnew->c_arg = arg; 388 pnew->c_func = func; 389 for (p1 = &calltodo; (p2 = p1->c_next) && p2->c_time < t; p1 = p2) 390 if (p2->c_time > 0) 391 t -= p2->c_time; 392 p1->c_next = pnew; 393 pnew->c_next = p2; 394 pnew->c_time = t; 395 if (p2) 396 p2->c_time -= t; 397 splx(s); 398 } 399 400 /* 401 * untimeout is called to remove a function timeout call 402 * from the callout structure. 403 */ 404 untimeout(func, arg) 405 int (*func)(); 406 caddr_t arg; 407 { 408 register struct callout *p1, *p2; 409 register int s; 410 411 s = splhigh(); 412 for (p1 = &calltodo; (p2 = p1->c_next) != 0; p1 = p2) { 413 if (p2->c_func == func && p2->c_arg == arg) { 414 if (p2->c_next && p2->c_time > 0) 415 p2->c_next->c_time += p2->c_time; 416 p1->c_next = p2->c_next; 417 p2->c_next = callfree; 418 callfree = p2; 419 break; 420 } 421 } 422 splx(s); 423 } 424 425 /* 426 * Compute number of hz until specified time. 427 * Used to compute third argument to timeout() from an 428 * absolute time. 429 */ 430 hzto(tv) 431 struct timeval *tv; 432 { 433 register long ticks; 434 register long sec; 435 int s = splhigh(); 436 437 /* 438 * If number of milliseconds will fit in 32 bit arithmetic, 439 * then compute number of milliseconds to time and scale to 440 * ticks. Otherwise just compute number of hz in time, rounding 441 * times greater than representible to maximum value. 442 * 443 * Delta times less than 25 days can be computed ``exactly''. 444 * Maximum value for any timeout in 10ms ticks is 250 days. 445 */ 446 sec = tv->tv_sec - time.tv_sec; 447 if (sec <= 0x7fffffff / 1000 - 1000) 448 ticks = ((tv->tv_sec - time.tv_sec) * 1000 + 449 (tv->tv_usec - time.tv_usec) / 1000) / (tick / 1000); 450 else if (sec <= 0x7fffffff / hz) 451 ticks = sec * hz; 452 else 453 ticks = 0x7fffffff; 454 splx(s); 455 return (ticks); 456 } 457 458 /* 459 * Return information about system clocks. 460 */ 461 /* ARGSUSED */ 462 kinfo_clockrate(op, where, acopysize, arg, aneeded) 463 int op; 464 register char *where; 465 int *acopysize, arg, *aneeded; 466 { 467 int buflen, error; 468 struct clockinfo clockinfo; 469 470 *aneeded = sizeof(clockinfo); 471 if (where == NULL) 472 return (0); 473 /* 474 * Check for enough buffering. 475 */ 476 buflen = *acopysize; 477 if (buflen < sizeof(clockinfo)) { 478 *acopysize = 0; 479 return (0); 480 } 481 /* 482 * Copyout clockinfo structure. 483 */ 484 clockinfo.hz = hz; 485 clockinfo.stathz = stathz; 486 clockinfo.tick = tick; 487 #ifdef PROFTIMER 488 initprofclock(2); 489 #else 490 profhz = hz; 491 #endif 492 clockinfo.profhz = profhz; 493 if (error = copyout((caddr_t)&clockinfo, where, sizeof(clockinfo))) 494 return (error); 495 *acopysize = sizeof(clockinfo); 496 return (0); 497 } 498