1 /*- 2 * Copyright (c) 1982, 1986, 1991 The Regents of the University of California. 3 * All rights reserved. 4 * 5 * %sccs.include.redist.c% 6 * 7 * @(#)kern_clock.c 7.19 (Berkeley) 03/18/92 8 */ 9 10 #include "param.h" 11 #include "systm.h" 12 #include "dkstat.h" 13 #include "callout.h" 14 #include "kernel.h" 15 #include "proc.h" 16 #include "resourcevar.h" 17 18 #include "machine/cpu.h" 19 20 #ifdef GPROF 21 #include "gprof.h" 22 #endif 23 24 /* 25 * Clock handling routines. 26 * 27 * This code is written to operate with two timers which run 28 * independently of each other. The main clock, running at hz 29 * times per second, is used to do scheduling and timeout calculations. 30 * The second timer does resource utilization estimation statistically 31 * based on the state of the machine phz times a second. Both functions 32 * can be performed by a single clock (ie hz == phz), however the 33 * statistics will be much more prone to errors. Ideally a machine 34 * would have separate clocks measuring time spent in user state, system 35 * state, interrupt state, and idle state. These clocks would allow a non- 36 * approximate measure of resource utilization. 37 */ 38 39 /* 40 * TODO: 41 * time of day, system/user timing, timeouts, profiling on separate timers 42 * allocate more timeout table slots when table overflows. 43 */ 44 45 /* 46 * Bump a timeval by a small number of usec's. 47 */ 48 #define BUMPTIME(t, usec) { \ 49 register struct timeval *tp = (t); \ 50 \ 51 tp->tv_usec += (usec); \ 52 if (tp->tv_usec >= 1000000) { \ 53 tp->tv_usec -= 1000000; \ 54 tp->tv_sec++; \ 55 } \ 56 } 57 58 int ticks; 59 int phz; 60 int profhz; 61 struct timeval time; 62 struct timeval mono_time; 63 /* 64 * The hz hardware interval timer. 65 * We update the events relating to real time. 66 * If this timer is also being used to gather statistics, 67 * we run through the statistics gathering routine as well. 68 */ 69 hardclock(frame) 70 clockframe frame; 71 { 72 register struct callout *p1; 73 register struct proc *p = curproc; 74 register struct pstats *pstats; 75 register int s; 76 int needsoft = 0; 77 time_t secs; 78 extern int tickdelta; 79 extern long timedelta; 80 81 /* 82 * Update real-time timeout queue. 83 * At front of queue are some number of events which are ``due''. 84 * The time to these is <= 0 and if negative represents the 85 * number of ticks which have passed since it was supposed to happen. 86 * The rest of the q elements (times > 0) are events yet to happen, 87 * where the time for each is given as a delta from the previous. 88 * Decrementing just the first of these serves to decrement the time 89 * to all events. 90 */ 91 p1 = calltodo.c_next; 92 while (p1) { 93 if (--p1->c_time > 0) 94 break; 95 needsoft = 1; 96 if (p1->c_time == 0) 97 break; 98 p1 = p1->c_next; 99 } 100 101 /* 102 * Curproc (now in p) is null if no process is running. 103 * We assume that curproc is set in user mode! 104 */ 105 if (p) 106 pstats = p->p_stats; 107 /* 108 * Charge the time out based on the mode the cpu is in. 109 * Here again we fudge for the lack of proper interval timers 110 * assuming that the current state has been around at least 111 * one tick. 112 */ 113 if (CLKF_USERMODE(&frame)) { 114 if (pstats->p_prof.pr_scale) 115 needsoft = 1; 116 /* 117 * CPU was in user state. Increment 118 * user time counter, and process process-virtual time 119 * interval timer. 120 */ 121 BUMPTIME(&p->p_utime, tick); 122 if (timerisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value) && 123 itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL], tick) == 0) 124 psignal(p, SIGVTALRM); 125 } else { 126 /* 127 * CPU was in system state. 128 */ 129 if (p) 130 BUMPTIME(&p->p_stime, tick); 131 } 132 133 /* 134 * If the cpu is currently scheduled to a process, then 135 * charge it with resource utilization for a tick, updating 136 * statistics which run in (user+system) virtual time, 137 * such as the cpu time limit and profiling timers. 138 * This assumes that the current process has been running 139 * the entire last tick. 140 */ 141 if (p) { 142 secs = p->p_utime.tv_sec + p->p_stime.tv_sec + 1; 143 if (secs > p->p_rlimit[RLIMIT_CPU].rlim_cur) { 144 if (secs > p->p_rlimit[RLIMIT_CPU].rlim_max) 145 psignal(p, SIGKILL); 146 else { 147 psignal(p, SIGXCPU); 148 if (p->p_rlimit[RLIMIT_CPU].rlim_cur < 149 p->p_rlimit[RLIMIT_CPU].rlim_max) 150 p->p_rlimit[RLIMIT_CPU].rlim_cur += 5; 151 } 152 } 153 if (timerisset(&pstats->p_timer[ITIMER_PROF].it_value) && 154 itimerdecr(&pstats->p_timer[ITIMER_PROF], tick) == 0) 155 psignal(p, SIGPROF); 156 157 /* 158 * We adjust the priority of the current process. 159 * The priority of a process gets worse as it accumulates 160 * CPU time. The cpu usage estimator (p_cpu) is increased here 161 * and the formula for computing priorities (in kern_synch.c) 162 * will compute a different value each time the p_cpu increases 163 * by 4. The cpu usage estimator ramps up quite quickly when 164 * the process is running (linearly), and decays away 165 * exponentially, * at a rate which is proportionally slower 166 * when the system is busy. The basic principal is that the 167 * system will 90% forget that a process used a lot of CPU 168 * time in 5*loadav seconds. This causes the system to favor 169 * processes which haven't run much recently, and to 170 * round-robin among other processes. 171 */ 172 p->p_cpticks++; 173 if (++p->p_cpu == 0) 174 p->p_cpu--; 175 if ((p->p_cpu&3) == 0) { 176 setpri(p); 177 if (p->p_pri >= PUSER) 178 p->p_pri = p->p_usrpri; 179 } 180 } 181 182 /* 183 * If the alternate clock has not made itself known then 184 * we must gather the statistics. 185 */ 186 if (phz == 0) 187 gatherstats(&frame); 188 189 /* 190 * Increment the time-of-day, and schedule 191 * processing of the callouts at a very low cpu priority, 192 * so we don't keep the relatively high clock interrupt 193 * priority any longer than necessary. 194 */ 195 ticks++; 196 if (timedelta == 0) { 197 BUMPTIME(&time, tick) 198 BUMPTIME(&mono_time, tick) 199 } else { 200 register delta; 201 202 if (timedelta < 0) { 203 delta = tick - tickdelta; 204 timedelta += tickdelta; 205 } else { 206 delta = tick + tickdelta; 207 timedelta -= tickdelta; 208 } 209 BUMPTIME(&time, delta); 210 BUMPTIME(&mono_time, delta) 211 } 212 if (needsoft) { 213 if (CLKF_BASEPRI(&frame)) { 214 /* 215 * Save the overhead of a software interrupt; 216 * it will happen as soon as we return, so do it now. 217 */ 218 (void) splsoftclock(); 219 softclock(frame); 220 } else 221 setsoftclock(); 222 } 223 } 224 225 int dk_ndrive = DK_NDRIVE; 226 /* 227 * Gather statistics on resource utilization. 228 * 229 * We make a gross assumption: that the system has been in the 230 * state it is in (user state, kernel state, interrupt state, 231 * or idle state) for the entire last time interval, and 232 * update statistics accordingly. 233 */ 234 gatherstats(framep) 235 clockframe *framep; 236 { 237 register int cpstate, s; 238 239 /* 240 * Determine what state the cpu is in. 241 */ 242 if (CLKF_USERMODE(framep)) { 243 /* 244 * CPU was in user state. 245 */ 246 if (curproc->p_nice > NZERO) 247 cpstate = CP_NICE; 248 else 249 cpstate = CP_USER; 250 } else { 251 /* 252 * CPU was in system state. If profiling kernel 253 * increment a counter. If no process is running 254 * then this is a system tick if we were running 255 * at a non-zero IPL (in a driver). If a process is running, 256 * then we charge it with system time even if we were 257 * at a non-zero IPL, since the system often runs 258 * this way during processing of system calls. 259 * This is approximate, but the lack of true interval 260 * timers makes doing anything else difficult. 261 */ 262 cpstate = CP_SYS; 263 if (curproc == NULL && CLKF_BASEPRI(framep)) 264 cpstate = CP_IDLE; 265 #ifdef GPROF 266 s = CLKF_PC(framep) - s_lowpc; 267 if (profiling < 2 && s < s_textsize) 268 kcount[s / (HISTFRACTION * sizeof (*kcount))]++; 269 #endif 270 } 271 /* 272 * We maintain statistics shown by user-level statistics 273 * programs: the amount of time in each cpu state, and 274 * the amount of time each of DK_NDRIVE ``drives'' is busy. 275 */ 276 cp_time[cpstate]++; 277 for (s = 0; s < DK_NDRIVE; s++) 278 if (dk_busy&(1<<s)) 279 dk_time[s]++; 280 } 281 282 /* 283 * Software priority level clock interrupt. 284 * Run periodic events from timeout queue. 285 */ 286 /*ARGSUSED*/ 287 softclock(frame) 288 clockframe frame; 289 { 290 291 for (;;) { 292 register struct callout *p1; 293 register caddr_t arg; 294 register int (*func)(); 295 register int a, s; 296 297 s = splhigh(); 298 if ((p1 = calltodo.c_next) == 0 || p1->c_time > 0) { 299 splx(s); 300 break; 301 } 302 arg = p1->c_arg; func = p1->c_func; a = p1->c_time; 303 calltodo.c_next = p1->c_next; 304 p1->c_next = callfree; 305 callfree = p1; 306 splx(s); 307 (*func)(arg, a); 308 } 309 /* 310 * If trapped user-mode and profiling, give it 311 * a profiling tick. 312 */ 313 if (CLKF_USERMODE(&frame)) { 314 register struct proc *p = curproc; 315 316 if (p->p_stats->p_prof.pr_scale) 317 profile_tick(p, &frame); 318 /* 319 * Check to see if process has accumulated 320 * more than 10 minutes of user time. If so 321 * reduce priority to give others a chance. 322 */ 323 if (p->p_ucred->cr_uid && p->p_nice == NZERO && 324 p->p_utime.tv_sec > 10 * 60) { 325 p->p_nice = NZERO + 4; 326 setpri(p); 327 p->p_pri = p->p_usrpri; 328 } 329 } 330 } 331 332 /* 333 * Arrange that (*func)(arg) is called in t/hz seconds. 334 */ 335 timeout(func, arg, t) 336 int (*func)(); 337 caddr_t arg; 338 register int t; 339 { 340 register struct callout *p1, *p2, *pnew; 341 register int s = splhigh(); 342 343 if (t <= 0) 344 t = 1; 345 pnew = callfree; 346 if (pnew == NULL) 347 panic("timeout table overflow"); 348 callfree = pnew->c_next; 349 pnew->c_arg = arg; 350 pnew->c_func = func; 351 for (p1 = &calltodo; (p2 = p1->c_next) && p2->c_time < t; p1 = p2) 352 if (p2->c_time > 0) 353 t -= p2->c_time; 354 p1->c_next = pnew; 355 pnew->c_next = p2; 356 pnew->c_time = t; 357 if (p2) 358 p2->c_time -= t; 359 splx(s); 360 } 361 362 /* 363 * untimeout is called to remove a function timeout call 364 * from the callout structure. 365 */ 366 untimeout(func, arg) 367 int (*func)(); 368 caddr_t arg; 369 { 370 register struct callout *p1, *p2; 371 register int s; 372 373 s = splhigh(); 374 for (p1 = &calltodo; (p2 = p1->c_next) != 0; p1 = p2) { 375 if (p2->c_func == func && p2->c_arg == arg) { 376 if (p2->c_next && p2->c_time > 0) 377 p2->c_next->c_time += p2->c_time; 378 p1->c_next = p2->c_next; 379 p2->c_next = callfree; 380 callfree = p2; 381 break; 382 } 383 } 384 splx(s); 385 } 386 387 /* 388 * Compute number of hz until specified time. 389 * Used to compute third argument to timeout() from an 390 * absolute time. 391 */ 392 hzto(tv) 393 struct timeval *tv; 394 { 395 register long ticks; 396 register long sec; 397 int s = splhigh(); 398 399 /* 400 * If number of milliseconds will fit in 32 bit arithmetic, 401 * then compute number of milliseconds to time and scale to 402 * ticks. Otherwise just compute number of hz in time, rounding 403 * times greater than representible to maximum value. 404 * 405 * Delta times less than 25 days can be computed ``exactly''. 406 * Maximum value for any timeout in 10ms ticks is 250 days. 407 */ 408 sec = tv->tv_sec - time.tv_sec; 409 if (sec <= 0x7fffffff / 1000 - 1000) 410 ticks = ((tv->tv_sec - time.tv_sec) * 1000 + 411 (tv->tv_usec - time.tv_usec) / 1000) / (tick / 1000); 412 else if (sec <= 0x7fffffff / hz) 413 ticks = sec * hz; 414 else 415 ticks = 0x7fffffff; 416 splx(s); 417 return (ticks); 418 } 419 420 /* 421 * Return information about system clocks. 422 */ 423 /* ARGSUSED */ 424 kinfo_clockrate(op, where, acopysize, arg, aneeded) 425 int op; 426 register char *where; 427 int *acopysize, arg, *aneeded; 428 { 429 int buflen, error; 430 struct clockinfo clockinfo; 431 432 *aneeded = sizeof(clockinfo); 433 if (where == NULL) 434 return (0); 435 /* 436 * Check for enough buffering. 437 */ 438 buflen = *acopysize; 439 if (buflen < sizeof(clockinfo)) { 440 *acopysize = 0; 441 return (0); 442 } 443 /* 444 * Copyout clockinfo structure. 445 */ 446 clockinfo.hz = hz; 447 clockinfo.phz = phz; 448 clockinfo.tick = tick; 449 clockinfo.profhz = profhz; 450 if (error = copyout((caddr_t)&clockinfo, where, sizeof(clockinfo))) 451 return (error); 452 *acopysize = sizeof(clockinfo); 453 return (0); 454 } 455