1 /*- 2 * Copyright (c) 1982, 1986, 1991 The Regents of the University of California. 3 * All rights reserved. 4 * 5 * %sccs.include.redist.c% 6 * 7 * @(#)kern_clock.c 7.16 (Berkeley) 05/09/91 8 */ 9 10 #include "param.h" 11 #include "systm.h" 12 #include "dkstat.h" 13 #include "callout.h" 14 #include "kernel.h" 15 #include "proc.h" 16 #include "resourcevar.h" 17 18 #include "machine/cpu.h" 19 20 #ifdef GPROF 21 #include "gprof.h" 22 #endif 23 24 /* 25 * Clock handling routines. 26 * 27 * This code is written to operate with two timers which run 28 * independently of each other. The main clock, running at hz 29 * times per second, is used to do scheduling and timeout calculations. 30 * The second timer does resource utilization estimation statistically 31 * based on the state of the machine phz times a second. Both functions 32 * can be performed by a single clock (ie hz == phz), however the 33 * statistics will be much more prone to errors. Ideally a machine 34 * would have separate clocks measuring time spent in user state, system 35 * state, interrupt state, and idle state. These clocks would allow a non- 36 * approximate measure of resource utilization. 37 */ 38 39 /* 40 * TODO: 41 * time of day, system/user timing, timeouts, profiling on separate timers 42 * allocate more timeout table slots when table overflows. 43 */ 44 45 /* 46 * Bump a timeval by a small number of usec's. 47 */ 48 #define BUMPTIME(t, usec) { \ 49 register struct timeval *tp = (t); \ 50 \ 51 tp->tv_usec += (usec); \ 52 if (tp->tv_usec >= 1000000) { \ 53 tp->tv_usec -= 1000000; \ 54 tp->tv_sec++; \ 55 } \ 56 } 57 58 /* 59 * The hz hardware interval timer. 60 * We update the events relating to real time. 61 * If this timer is also being used to gather statistics, 62 * we run through the statistics gathering routine as well. 63 */ 64 hardclock(frame) 65 clockframe frame; 66 { 67 register struct callout *p1; 68 register struct proc *p = curproc; 69 register struct pstats *pstats; 70 register int s; 71 int needsoft = 0; 72 extern int tickdelta; 73 extern long timedelta; 74 75 /* 76 * Update real-time timeout queue. 77 * At front of queue are some number of events which are ``due''. 78 * The time to these is <= 0 and if negative represents the 79 * number of ticks which have passed since it was supposed to happen. 80 * The rest of the q elements (times > 0) are events yet to happen, 81 * where the time for each is given as a delta from the previous. 82 * Decrementing just the first of these serves to decrement the time 83 * to all events. 84 */ 85 p1 = calltodo.c_next; 86 while (p1) { 87 if (--p1->c_time > 0) 88 break; 89 needsoft = 1; 90 if (p1->c_time == 0) 91 break; 92 p1 = p1->c_next; 93 } 94 95 /* 96 * Curproc (now in p) is null if no process is running. 97 * We assume that curproc is set in user mode! 98 */ 99 if (p) 100 pstats = p->p_stats; 101 /* 102 * Charge the time out based on the mode the cpu is in. 103 * Here again we fudge for the lack of proper interval timers 104 * assuming that the current state has been around at least 105 * one tick. 106 */ 107 if (CLKF_USERMODE(&frame)) { 108 if (pstats->p_prof.pr_scale) 109 needsoft = 1; 110 /* 111 * CPU was in user state. Increment 112 * user time counter, and process process-virtual time 113 * interval timer. 114 */ 115 BUMPTIME(&p->p_utime, tick); 116 if (timerisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value) && 117 itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL], tick) == 0) 118 psignal(p, SIGVTALRM); 119 } else { 120 /* 121 * CPU was in system state. 122 */ 123 if (p) 124 BUMPTIME(&p->p_stime, tick); 125 } 126 127 /* 128 * If the cpu is currently scheduled to a process, then 129 * charge it with resource utilization for a tick, updating 130 * statistics which run in (user+system) virtual time, 131 * such as the cpu time limit and profiling timers. 132 * This assumes that the current process has been running 133 * the entire last tick. 134 */ 135 if (p) { 136 if ((p->p_utime.tv_sec+p->p_stime.tv_sec+1) > 137 p->p_rlimit[RLIMIT_CPU].rlim_cur) { 138 psignal(p, SIGXCPU); 139 if (p->p_rlimit[RLIMIT_CPU].rlim_cur < 140 p->p_rlimit[RLIMIT_CPU].rlim_max) 141 p->p_rlimit[RLIMIT_CPU].rlim_cur += 5; 142 } 143 if (timerisset(&pstats->p_timer[ITIMER_PROF].it_value) && 144 itimerdecr(&pstats->p_timer[ITIMER_PROF], tick) == 0) 145 psignal(p, SIGPROF); 146 147 /* 148 * We adjust the priority of the current process. 149 * The priority of a process gets worse as it accumulates 150 * CPU time. The cpu usage estimator (p_cpu) is increased here 151 * and the formula for computing priorities (in kern_synch.c) 152 * will compute a different value each time the p_cpu increases 153 * by 4. The cpu usage estimator ramps up quite quickly when 154 * the process is running (linearly), and decays away 155 * exponentially, * at a rate which is proportionally slower 156 * when the system is busy. The basic principal is that the 157 * system will 90% forget that a process used a lot of CPU 158 * time in 5*loadav seconds. This causes the system to favor 159 * processes which haven't run much recently, and to 160 * round-robin among other processes. 161 */ 162 p->p_cpticks++; 163 if (++p->p_cpu == 0) 164 p->p_cpu--; 165 if ((p->p_cpu&3) == 0) { 166 setpri(p); 167 if (p->p_pri >= PUSER) 168 p->p_pri = p->p_usrpri; 169 } 170 } 171 172 /* 173 * If the alternate clock has not made itself known then 174 * we must gather the statistics. 175 */ 176 if (phz == 0) 177 gatherstats(&frame); 178 179 /* 180 * Increment the time-of-day, and schedule 181 * processing of the callouts at a very low cpu priority, 182 * so we don't keep the relatively high clock interrupt 183 * priority any longer than necessary. 184 */ 185 if (timedelta == 0) 186 BUMPTIME(&time, tick) 187 else { 188 register delta; 189 190 if (timedelta < 0) { 191 delta = tick - tickdelta; 192 timedelta += tickdelta; 193 } else { 194 delta = tick + tickdelta; 195 timedelta -= tickdelta; 196 } 197 BUMPTIME(&time, delta); 198 } 199 if (needsoft) { 200 if (CLKF_BASEPRI(&frame)) { 201 /* 202 * Save the overhead of a software interrupt; 203 * it will happen as soon as we return, so do it now. 204 */ 205 (void) splsoftclock(); 206 softclock(frame); 207 } else 208 setsoftclock(); 209 } 210 } 211 212 int dk_ndrive = DK_NDRIVE; 213 /* 214 * Gather statistics on resource utilization. 215 * 216 * We make a gross assumption: that the system has been in the 217 * state it is in (user state, kernel state, interrupt state, 218 * or idle state) for the entire last time interval, and 219 * update statistics accordingly. 220 */ 221 gatherstats(framep) 222 clockframe *framep; 223 { 224 register int cpstate, s; 225 226 /* 227 * Determine what state the cpu is in. 228 */ 229 if (CLKF_USERMODE(framep)) { 230 /* 231 * CPU was in user state. 232 */ 233 if (curproc->p_nice > NZERO) 234 cpstate = CP_NICE; 235 else 236 cpstate = CP_USER; 237 } else { 238 /* 239 * CPU was in system state. If profiling kernel 240 * increment a counter. If no process is running 241 * then this is a system tick if we were running 242 * at a non-zero IPL (in a driver). If a process is running, 243 * then we charge it with system time even if we were 244 * at a non-zero IPL, since the system often runs 245 * this way during processing of system calls. 246 * This is approximate, but the lack of true interval 247 * timers makes doing anything else difficult. 248 */ 249 cpstate = CP_SYS; 250 if (curproc == NULL && CLKF_BASEPRI(framep)) 251 cpstate = CP_IDLE; 252 #ifdef GPROF 253 s = CLKF_PC(framep) - s_lowpc; 254 if (profiling < 2 && s < s_textsize) 255 kcount[s / (HISTFRACTION * sizeof (*kcount))]++; 256 #endif 257 } 258 /* 259 * We maintain statistics shown by user-level statistics 260 * programs: the amount of time in each cpu state, and 261 * the amount of time each of DK_NDRIVE ``drives'' is busy. 262 */ 263 cp_time[cpstate]++; 264 for (s = 0; s < DK_NDRIVE; s++) 265 if (dk_busy&(1<<s)) 266 dk_time[s]++; 267 } 268 269 /* 270 * Software priority level clock interrupt. 271 * Run periodic events from timeout queue. 272 */ 273 /*ARGSUSED*/ 274 softclock(frame) 275 clockframe frame; 276 { 277 278 for (;;) { 279 register struct callout *p1; 280 register caddr_t arg; 281 register int (*func)(); 282 register int a, s; 283 284 s = splhigh(); 285 if ((p1 = calltodo.c_next) == 0 || p1->c_time > 0) { 286 splx(s); 287 break; 288 } 289 arg = p1->c_arg; func = p1->c_func; a = p1->c_time; 290 calltodo.c_next = p1->c_next; 291 p1->c_next = callfree; 292 callfree = p1; 293 splx(s); 294 (*func)(arg, a); 295 } 296 /* 297 * If trapped user-mode and profiling, give it 298 * a profiling tick. 299 */ 300 if (CLKF_USERMODE(&frame)) { 301 register struct proc *p = curproc; 302 303 if (p->p_stats->p_prof.pr_scale) 304 profile_tick(p, &frame); 305 /* 306 * Check to see if process has accumulated 307 * more than 10 minutes of user time. If so 308 * reduce priority to give others a chance. 309 */ 310 if (p->p_ucred->cr_uid && p->p_nice == NZERO && 311 p->p_utime.tv_sec > 10 * 60) { 312 p->p_nice = NZERO + 4; 313 setpri(p); 314 p->p_pri = p->p_usrpri; 315 } 316 } 317 } 318 319 /* 320 * Arrange that (*func)(arg) is called in t/hz seconds. 321 */ 322 timeout(func, arg, t) 323 int (*func)(); 324 caddr_t arg; 325 register int t; 326 { 327 register struct callout *p1, *p2, *pnew; 328 register int s = splhigh(); 329 330 if (t <= 0) 331 t = 1; 332 pnew = callfree; 333 if (pnew == NULL) 334 panic("timeout table overflow"); 335 callfree = pnew->c_next; 336 pnew->c_arg = arg; 337 pnew->c_func = func; 338 for (p1 = &calltodo; (p2 = p1->c_next) && p2->c_time < t; p1 = p2) 339 if (p2->c_time > 0) 340 t -= p2->c_time; 341 p1->c_next = pnew; 342 pnew->c_next = p2; 343 pnew->c_time = t; 344 if (p2) 345 p2->c_time -= t; 346 splx(s); 347 } 348 349 /* 350 * untimeout is called to remove a function timeout call 351 * from the callout structure. 352 */ 353 untimeout(func, arg) 354 int (*func)(); 355 caddr_t arg; 356 { 357 register struct callout *p1, *p2; 358 register int s; 359 360 s = splhigh(); 361 for (p1 = &calltodo; (p2 = p1->c_next) != 0; p1 = p2) { 362 if (p2->c_func == func && p2->c_arg == arg) { 363 if (p2->c_next && p2->c_time > 0) 364 p2->c_next->c_time += p2->c_time; 365 p1->c_next = p2->c_next; 366 p2->c_next = callfree; 367 callfree = p2; 368 break; 369 } 370 } 371 splx(s); 372 } 373 374 /* 375 * Compute number of hz until specified time. 376 * Used to compute third argument to timeout() from an 377 * absolute time. 378 */ 379 hzto(tv) 380 struct timeval *tv; 381 { 382 register long ticks; 383 register long sec; 384 int s = splhigh(); 385 386 /* 387 * If number of milliseconds will fit in 32 bit arithmetic, 388 * then compute number of milliseconds to time and scale to 389 * ticks. Otherwise just compute number of hz in time, rounding 390 * times greater than representible to maximum value. 391 * 392 * Delta times less than 25 days can be computed ``exactly''. 393 * Maximum value for any timeout in 10ms ticks is 250 days. 394 */ 395 sec = tv->tv_sec - time.tv_sec; 396 if (sec <= 0x7fffffff / 1000 - 1000) 397 ticks = ((tv->tv_sec - time.tv_sec) * 1000 + 398 (tv->tv_usec - time.tv_usec) / 1000) / (tick / 1000); 399 else if (sec <= 0x7fffffff / hz) 400 ticks = sec * hz; 401 else 402 ticks = 0x7fffffff; 403 splx(s); 404 return (ticks); 405 } 406