1 /* 2 * Copyright (c) 1982, 1986, 1991 Regents of the University of California. 3 * All rights reserved. The Berkeley software License Agreement 4 * specifies the terms and conditions for redistribution. 5 * 6 * @(#)kern_clock.c 7.12 (Berkeley) 03/17/91 7 */ 8 9 #include "param.h" 10 #include "systm.h" 11 #include "dkstat.h" 12 #include "callout.h" 13 #include "user.h" 14 #include "kernel.h" 15 #include "proc.h" 16 17 #include "machine/cpu.h" 18 19 #ifdef GPROF 20 #include "gprof.h" 21 #endif 22 23 /* 24 * Clock handling routines. 25 * 26 * This code is written to operate with two timers which run 27 * independently of each other. The main clock, running at hz 28 * times per second, is used to do scheduling and timeout calculations. 29 * The second timer does resource utilization estimation statistically 30 * based on the state of the machine phz times a second. Both functions 31 * can be performed by a single clock (ie hz == phz), however the 32 * statistics will be much more prone to errors. Ideally a machine 33 * would have separate clocks measuring time spent in user state, system 34 * state, interrupt state, and idle state. These clocks would allow a non- 35 * approximate measure of resource utilization. 36 */ 37 38 /* 39 * TODO: 40 * time of day, system/user timing, timeouts, profiling on separate timers 41 * allocate more timeout table slots when table overflows. 42 */ 43 44 /* 45 * Bump a timeval by a small number of usec's. 46 */ 47 #define BUMPTIME(t, usec) { \ 48 register struct timeval *tp = (t); \ 49 \ 50 tp->tv_usec += (usec); \ 51 if (tp->tv_usec >= 1000000) { \ 52 tp->tv_usec -= 1000000; \ 53 tp->tv_sec++; \ 54 } \ 55 } 56 57 /* 58 * The hz hardware interval timer. 59 * We update the events relating to real time. 60 * If this timer is also being used to gather statistics, 61 * we run through the statistics gathering routine as well. 62 */ 63 hardclock(frame) 64 clockframe frame; 65 { 66 register struct callout *p1; 67 register struct proc *p = curproc; 68 register struct pstats *pstats = p->p_stats; 69 register int s; 70 int needsoft = 0; 71 extern int tickdelta; 72 extern long timedelta; 73 74 /* 75 * Update real-time timeout queue. 76 * At front of queue are some number of events which are ``due''. 77 * The time to these is <= 0 and if negative represents the 78 * number of ticks which have passed since it was supposed to happen. 79 * The rest of the q elements (times > 0) are events yet to happen, 80 * where the time for each is given as a delta from the previous. 81 * Decrementing just the first of these serves to decrement the time 82 * to all events. 83 */ 84 p1 = calltodo.c_next; 85 while (p1) { 86 if (--p1->c_time > 0) 87 break; 88 needsoft = 1; 89 if (p1->c_time == 0) 90 break; 91 p1 = p1->c_next; 92 } 93 94 /* 95 * Charge the time out based on the mode the cpu is in. 96 * Here again we fudge for the lack of proper interval timers 97 * assuming that the current state has been around at least 98 * one tick. 99 */ 100 if (CLKF_USERMODE(&frame)) { 101 if (pstats->p_prof.pr_scale) 102 needsoft = 1; 103 /* 104 * CPU was in user state. Increment 105 * user time counter, and process process-virtual time 106 * interval timer. 107 */ 108 BUMPTIME(&p->p_utime, tick); 109 if (timerisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value) && 110 itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL], tick) == 0) 111 psignal(p, SIGVTALRM); 112 } else { 113 /* 114 * CPU was in system state. 115 */ 116 if (!noproc) 117 BUMPTIME(&p->p_stime, tick); 118 } 119 120 /* 121 * If the cpu is currently scheduled to a process, then 122 * charge it with resource utilization for a tick, updating 123 * statistics which run in (user+system) virtual time, 124 * such as the cpu time limit and profiling timers. 125 * This assumes that the current process has been running 126 * the entire last tick. 127 */ 128 if (noproc == 0) { 129 if ((p->p_utime.tv_sec+p->p_stime.tv_sec+1) > 130 p->p_rlimit[RLIMIT_CPU].rlim_cur) { 131 psignal(p, SIGXCPU); 132 if (p->p_rlimit[RLIMIT_CPU].rlim_cur < 133 p->p_rlimit[RLIMIT_CPU].rlim_max) 134 p->p_rlimit[RLIMIT_CPU].rlim_cur += 5; 135 } 136 if (timerisset(&pstats->p_timer[ITIMER_PROF].it_value) && 137 itimerdecr(&pstats->p_timer[ITIMER_PROF], tick) == 0) 138 psignal(p, SIGPROF); 139 140 /* 141 * We adjust the priority of the current process. 142 * The priority of a process gets worse as it accumulates 143 * CPU time. The cpu usage estimator (p_cpu) is increased here 144 * and the formula for computing priorities (in kern_synch.c) 145 * will compute a different value each time the p_cpu increases 146 * by 4. The cpu usage estimator ramps up quite quickly when 147 * the process is running (linearly), and decays away 148 * exponentially, * at a rate which is proportionally slower 149 * when the system is busy. The basic principal is that the 150 * system will 90% forget that a process used a lot of CPU 151 * time in 5*loadav seconds. This causes the system to favor 152 * processes which haven't run much recently, and to 153 * round-robin among other processes. 154 */ 155 p->p_cpticks++; 156 if (++p->p_cpu == 0) 157 p->p_cpu--; 158 if ((p->p_cpu&3) == 0) { 159 setpri(p); 160 if (p->p_pri >= PUSER) 161 p->p_pri = p->p_usrpri; 162 } 163 } 164 165 /* 166 * If the alternate clock has not made itself known then 167 * we must gather the statistics. 168 */ 169 if (phz == 0) 170 gatherstats(&frame); 171 172 /* 173 * Increment the time-of-day, and schedule 174 * processing of the callouts at a very low cpu priority, 175 * so we don't keep the relatively high clock interrupt 176 * priority any longer than necessary. 177 */ 178 if (timedelta == 0) 179 BUMPTIME(&time, tick) 180 else { 181 register delta; 182 183 if (timedelta < 0) { 184 delta = tick - tickdelta; 185 timedelta += tickdelta; 186 } else { 187 delta = tick + tickdelta; 188 timedelta -= tickdelta; 189 } 190 BUMPTIME(&time, delta); 191 } 192 if (needsoft) { 193 if (CLKF_BASEPRI(&frame)) { 194 /* 195 * Save the overhead of a software interrupt; 196 * it will happen as soon as we return, so do it now. 197 */ 198 (void) splsoftclock(); 199 softclock(frame); 200 } else 201 setsoftclock(); 202 } 203 } 204 205 int dk_ndrive = DK_NDRIVE; 206 /* 207 * Gather statistics on resource utilization. 208 * 209 * We make a gross assumption: that the system has been in the 210 * state it is in (user state, kernel state, interrupt state, 211 * or idle state) for the entire last time interval, and 212 * update statistics accordingly. 213 */ 214 gatherstats(framep) 215 clockframe *framep; 216 { 217 register int cpstate, s; 218 219 /* 220 * Determine what state the cpu is in. 221 */ 222 if (CLKF_USERMODE(framep)) { 223 /* 224 * CPU was in user state. 225 */ 226 if (curproc->p_nice > NZERO) 227 cpstate = CP_NICE; 228 else 229 cpstate = CP_USER; 230 } else { 231 /* 232 * CPU was in system state. If profiling kernel 233 * increment a counter. If no process is running 234 * then this is a system tick if we were running 235 * at a non-zero IPL (in a driver). If a process is running, 236 * then we charge it with system time even if we were 237 * at a non-zero IPL, since the system often runs 238 * this way during processing of system calls. 239 * This is approximate, but the lack of true interval 240 * timers makes doing anything else difficult. 241 */ 242 cpstate = CP_SYS; 243 if (noproc && CLKF_BASEPRI(framep)) 244 cpstate = CP_IDLE; 245 #ifdef GPROF 246 s = CLKF_PC(framep) - s_lowpc; 247 if (profiling < 2 && s < s_textsize) 248 kcount[s / (HISTFRACTION * sizeof (*kcount))]++; 249 #endif 250 } 251 /* 252 * We maintain statistics shown by user-level statistics 253 * programs: the amount of time in each cpu state, and 254 * the amount of time each of DK_NDRIVE ``drives'' is busy. 255 */ 256 cp_time[cpstate]++; 257 for (s = 0; s < DK_NDRIVE; s++) 258 if (dk_busy&(1<<s)) 259 dk_time[s]++; 260 } 261 262 /* 263 * Software priority level clock interrupt. 264 * Run periodic events from timeout queue. 265 */ 266 /*ARGSUSED*/ 267 softclock(frame) 268 clockframe frame; 269 { 270 271 for (;;) { 272 register struct callout *p1; 273 register caddr_t arg; 274 register int (*func)(); 275 register int a, s; 276 277 s = splhigh(); 278 if ((p1 = calltodo.c_next) == 0 || p1->c_time > 0) { 279 splx(s); 280 break; 281 } 282 arg = p1->c_arg; func = p1->c_func; a = p1->c_time; 283 calltodo.c_next = p1->c_next; 284 p1->c_next = callfree; 285 callfree = p1; 286 splx(s); 287 (*func)(arg, a); 288 } 289 /* 290 * If trapped user-mode and profiling, give it 291 * a profiling tick. 292 */ 293 if (CLKF_USERMODE(&frame)) { 294 register struct proc *p = curproc; 295 296 if (p->p_stats->p_prof.pr_scale) 297 profile_tick(p, &frame); 298 /* 299 * Check to see if process has accumulated 300 * more than 10 minutes of user time. If so 301 * reduce priority to give others a chance. 302 */ 303 if (p->p_ucred->cr_uid && p->p_nice == NZERO && 304 p->p_utime.tv_sec > 10 * 60) { 305 p->p_nice = NZERO + 4; 306 setpri(p); 307 p->p_pri = p->p_usrpri; 308 } 309 } 310 } 311 312 /* 313 * Arrange that (*func)(arg) is called in t/hz seconds. 314 */ 315 timeout(func, arg, t) 316 int (*func)(); 317 caddr_t arg; 318 register int t; 319 { 320 register struct callout *p1, *p2, *pnew; 321 register int s = splhigh(); 322 323 if (t <= 0) 324 t = 1; 325 pnew = callfree; 326 if (pnew == NULL) 327 panic("timeout table overflow"); 328 callfree = pnew->c_next; 329 pnew->c_arg = arg; 330 pnew->c_func = func; 331 for (p1 = &calltodo; (p2 = p1->c_next) && p2->c_time < t; p1 = p2) 332 if (p2->c_time > 0) 333 t -= p2->c_time; 334 p1->c_next = pnew; 335 pnew->c_next = p2; 336 pnew->c_time = t; 337 if (p2) 338 p2->c_time -= t; 339 splx(s); 340 } 341 342 /* 343 * untimeout is called to remove a function timeout call 344 * from the callout structure. 345 */ 346 untimeout(func, arg) 347 int (*func)(); 348 caddr_t arg; 349 { 350 register struct callout *p1, *p2; 351 register int s; 352 353 s = splhigh(); 354 for (p1 = &calltodo; (p2 = p1->c_next) != 0; p1 = p2) { 355 if (p2->c_func == func && p2->c_arg == arg) { 356 if (p2->c_next && p2->c_time > 0) 357 p2->c_next->c_time += p2->c_time; 358 p1->c_next = p2->c_next; 359 p2->c_next = callfree; 360 callfree = p2; 361 break; 362 } 363 } 364 splx(s); 365 } 366 367 /* 368 * Compute number of hz until specified time. 369 * Used to compute third argument to timeout() from an 370 * absolute time. 371 */ 372 hzto(tv) 373 struct timeval *tv; 374 { 375 register long ticks; 376 register long sec; 377 int s = splhigh(); 378 379 /* 380 * If number of milliseconds will fit in 32 bit arithmetic, 381 * then compute number of milliseconds to time and scale to 382 * ticks. Otherwise just compute number of hz in time, rounding 383 * times greater than representible to maximum value. 384 * 385 * Delta times less than 25 days can be computed ``exactly''. 386 * Maximum value for any timeout in 10ms ticks is 250 days. 387 */ 388 sec = tv->tv_sec - time.tv_sec; 389 if (sec <= 0x7fffffff / 1000 - 1000) 390 ticks = ((tv->tv_sec - time.tv_sec) * 1000 + 391 (tv->tv_usec - time.tv_usec) / 1000) / (tick / 1000); 392 else if (sec <= 0x7fffffff / hz) 393 ticks = sec * hz; 394 else 395 ticks = 0x7fffffff; 396 splx(s); 397 return (ticks); 398 } 399 400 /* ARGSUSED */ 401 profil(p, uap, retval) 402 struct proc *p; 403 register struct args { 404 short *bufbase; 405 unsigned bufsize; 406 unsigned pcoffset; 407 unsigned pcscale; 408 } *uap; 409 int *retval; 410 { 411 register struct uprof *upp = &p->p_stats->p_prof; 412 413 upp->pr_base = uap->bufbase; 414 upp->pr_size = uap->bufsize; 415 upp->pr_off = uap->pcoffset; 416 upp->pr_scale = uap->pcscale; 417 return (0); 418 } 419