1 /* kern_clock.c 6.1 83/07/29 */ 2 3 #include "../machine/reg.h" 4 #include "../machine/psl.h" 5 6 #include "../h/param.h" 7 #include "../h/systm.h" 8 #include "../h/dk.h" 9 #include "../h/callout.h" 10 #include "../h/dir.h" 11 #include "../h/user.h" 12 #include "../h/kernel.h" 13 #include "../h/proc.h" 14 #include "../h/vm.h" 15 #include "../h/text.h" 16 17 #ifdef vax 18 #include "../vax/mtpr.h" 19 #endif 20 21 #ifdef GPROF 22 #include "../h/gprof.h" 23 #endif 24 25 /* 26 * Clock handling routines. 27 * 28 * This code is written to operate with two timers which run 29 * independently of each other. The main clock, running at hz 30 * times per second, is used to do scheduling and timeout calculations. 31 * The second timer does resource utilization estimation statistically 32 * based on the state of the machine phz times a second. Both functions 33 * can be performed by a single clock (ie hz == phz), however the 34 * statistics will be much more prone to errors. Ideally a machine 35 * would have separate clocks measuring time spent in user state, system 36 * state, interrupt state, and idle state. These clocks would allow a non- 37 * approximate measure of resource utilization. 38 */ 39 40 /* 41 * TODO: 42 * time of day, system/user timing, timeouts, profiling on separate timers 43 * allocate more timeout table slots when table overflows. 44 */ 45 46 /* 47 * The hz hardware interval timer. 48 * We update the events relating to real time. 49 * If this timer is also being used to gather statistics, 50 * we run through the statistics gathering routine as well. 51 */ 52 /*ARGSUSED*/ 53 hardclock(pc, ps) 54 caddr_t pc; 55 int ps; 56 { 57 register struct callout *p1; 58 register struct proc *p; 59 register int s, cpstate; 60 int needsoft = 0; 61 62 /* 63 * Update real-time timeout queue. 64 * At front of queue are some number of events which are ``due''. 65 * The time to these is <= 0 and if negative represents the 66 * number of ticks which have passed since it was supposed to happen. 67 * The rest of the q elements (times > 0) are events yet to happen, 68 * where the time for each is given as a delta from the previous. 69 * Decrementing just the first of these serves to decrement the time 70 * to all events. 71 */ 72 p1 = calltodo.c_next; 73 while (p1) { 74 if (--p1->c_time > 0) 75 break; 76 needsoft = 1; 77 if (p1->c_time == 0) 78 break; 79 p1 = p1->c_next; 80 } 81 82 /* 83 * Charge the time out based on the mode the cpu is in. 84 * Here again we fudge for the lack of proper interval timers 85 * assuming that the current state has been around at least 86 * one tick. 87 */ 88 if (USERMODE(ps)) { 89 if (u.u_prof.pr_scale) 90 needsoft = 1; 91 /* 92 * CPU was in user state. Increment 93 * user time counter, and process process-virtual time 94 * interval timer. 95 */ 96 bumptime(&u.u_ru.ru_utime, tick); 97 if (timerisset(&u.u_timer[ITIMER_VIRTUAL].it_value) && 98 itimerdecr(&u.u_timer[ITIMER_VIRTUAL], tick) == 0) 99 psignal(u.u_procp, SIGVTALRM); 100 if (u.u_procp->p_nice > NZERO) 101 cpstate = CP_NICE; 102 else 103 cpstate = CP_USER; 104 } else { 105 /* 106 * CPU was in system state. If profiling kernel 107 * increment a counter. If no process is running 108 * then this is a system tick if we were running 109 * at a non-zero IPL (in a driver). If a process is running, 110 * then we charge it with system time even if we were 111 * at a non-zero IPL, since the system often runs 112 * this way during processing of system calls. 113 * This is approximate, but the lack of true interval 114 * timers makes doing anything else difficult. 115 */ 116 cpstate = CP_SYS; 117 if (noproc) { 118 if (BASEPRI(ps)) 119 cpstate = CP_IDLE; 120 } else { 121 bumptime(&u.u_ru.ru_stime, tick); 122 } 123 } 124 125 /* 126 * If the cpu is currently scheduled to a process, then 127 * charge it with resource utilization for a tick, updating 128 * statistics which run in (user+system) virtual time, 129 * such as the cpu time limit and profiling timers. 130 * This assumes that the current process has been running 131 * the entire last tick. 132 */ 133 if (noproc == 0 && cpstate != CP_IDLE) { 134 if ((u.u_ru.ru_utime.tv_sec+u.u_ru.ru_stime.tv_sec+1) > 135 u.u_rlimit[RLIMIT_CPU].rlim_cur) { 136 psignal(u.u_procp, SIGXCPU); 137 if (u.u_rlimit[RLIMIT_CPU].rlim_cur < 138 u.u_rlimit[RLIMIT_CPU].rlim_max) 139 u.u_rlimit[RLIMIT_CPU].rlim_cur += 5; 140 } 141 if (timerisset(&u.u_timer[ITIMER_PROF].it_value) && 142 itimerdecr(&u.u_timer[ITIMER_PROF], tick) == 0) 143 psignal(u.u_procp, SIGPROF); 144 s = u.u_procp->p_rssize; 145 u.u_ru.ru_idrss += s; u.u_ru.ru_isrss += 0; /* XXX */ 146 if (u.u_procp->p_textp) { 147 register int xrss = u.u_procp->p_textp->x_rssize; 148 149 s += xrss; 150 u.u_ru.ru_ixrss += xrss; 151 } 152 if (s > u.u_ru.ru_maxrss) 153 u.u_ru.ru_maxrss = s; 154 } 155 156 /* 157 * We adjust the priority of the current process. 158 * The priority of a process gets worse as it accumulates 159 * CPU time. The cpu usage estimator (p_cpu) is increased here 160 * and the formula for computing priorities (in kern_synch.c) 161 * will compute a different value each time the p_cpu increases 162 * by 4. The cpu usage estimator ramps up quite quickly when 163 * the process is running (linearly), and decays away exponentially, 164 * at a rate which is proportionally slower when the system is 165 * busy. The basic principal is that the system will 90% forget 166 * that a process used a lot of CPU time in 5*loadav seconds. 167 * This causes the system to favor processes which haven't run 168 * much recently, and to round-robin among other processes. 169 */ 170 if (!noproc) { 171 p = u.u_procp; 172 p->p_cpticks++; 173 if (++p->p_cpu == 0) 174 p->p_cpu--; 175 if ((p->p_cpu&3) == 0) { 176 (void) setpri(p); 177 if (p->p_pri >= PUSER) 178 p->p_pri = p->p_usrpri; 179 } 180 } 181 182 /* 183 * If the alternate clock has not made itself known then 184 * we must gather the statistics. 185 */ 186 if (phz == 0) 187 gatherstats(pc, ps); 188 189 /* 190 * Increment the time-of-day, and schedule 191 * processing of the callouts at a very low cpu priority, 192 * so we don't keep the relatively high clock interrupt 193 * priority any longer than necessary. 194 */ 195 bumptime(&time, tick); 196 if (needsoft) 197 setsoftclock(); 198 } 199 200 /* 201 * Gather statistics on resource utilization. 202 * 203 * We make a gross assumption: that the system has been in the 204 * state it is in (user state, kernel state, interrupt state, 205 * or idle state) for the entire last time interval, and 206 * update statistics accordingly. 207 */ 208 /*ARGSUSED*/ 209 gatherstats(pc, ps) 210 caddr_t pc; 211 int ps; 212 { 213 int cpstate, s; 214 215 /* 216 * Determine what state the cpu is in. 217 */ 218 if (USERMODE(ps)) { 219 /* 220 * CPU was in user state. 221 */ 222 if (u.u_procp->p_nice > NZERO) 223 cpstate = CP_NICE; 224 else 225 cpstate = CP_USER; 226 } else { 227 /* 228 * CPU was in system state. If profiling kernel 229 * increment a counter. 230 */ 231 cpstate = CP_SYS; 232 if (noproc && BASEPRI(ps)) 233 cpstate = CP_IDLE; 234 #ifdef GPROF 235 s = pc - s_lowpc; 236 if (profiling < 2 && s < s_textsize) 237 kcount[s / (HISTFRACTION * sizeof (*kcount))]++; 238 #endif 239 } 240 /* 241 * We maintain statistics shown by user-level statistics 242 * programs: the amount of time in each cpu state, and 243 * the amount of time each of DK_NDRIVE ``drives'' is busy. 244 */ 245 cp_time[cpstate]++; 246 for (s = 0; s < DK_NDRIVE; s++) 247 if (dk_busy&(1<<s)) 248 dk_time[s]++; 249 } 250 251 /* 252 * Software priority level clock interrupt. 253 * Run periodic events from timeout queue. 254 */ 255 /*ARGSUSED*/ 256 softclock(pc, ps) 257 caddr_t pc; 258 int ps; 259 { 260 261 for (;;) { 262 register struct callout *p1; 263 register caddr_t arg; 264 register int (*func)(); 265 register int a, s; 266 267 s = spl7(); 268 if ((p1 = calltodo.c_next) == 0 || p1->c_time > 0) { 269 splx(s); 270 break; 271 } 272 arg = p1->c_arg; func = p1->c_func; a = p1->c_time; 273 calltodo.c_next = p1->c_next; 274 p1->c_next = callfree; 275 callfree = p1; 276 splx(s); 277 (*func)(arg, a); 278 } 279 /* 280 * If trapped user-mode and profiling, give it 281 * a profiling tick. 282 */ 283 if (USERMODE(ps)) { 284 register struct proc *p = u.u_procp; 285 286 if (u.u_prof.pr_scale) { 287 p->p_flag |= SOWEUPC; 288 aston(); 289 } 290 /* 291 * Check to see if process has accumulated 292 * more than 10 minutes of user time. If so 293 * reduce priority to give others a chance. 294 */ 295 if (p->p_uid && p->p_nice == NZERO && 296 u.u_ru.ru_utime.tv_sec > 10 * 60) { 297 p->p_nice = NZERO+4; 298 (void) setpri(p); 299 p->p_pri = p->p_usrpri; 300 } 301 } 302 } 303 304 /* 305 * Bump a timeval by a small number of usec's. 306 */ 307 bumptime(tp, usec) 308 register struct timeval *tp; 309 int usec; 310 { 311 312 tp->tv_usec += usec; 313 if (tp->tv_usec >= 1000000) { 314 tp->tv_usec -= 1000000; 315 tp->tv_sec++; 316 } 317 } 318 319 /* 320 * Arrange that (*fun)(arg) is called in t/hz seconds. 321 */ 322 timeout(fun, arg, t) 323 int (*fun)(); 324 caddr_t arg; 325 register int t; 326 { 327 register struct callout *p1, *p2, *pnew; 328 register int s = spl7(); 329 330 if (t == 0) 331 t = 1; 332 pnew = callfree; 333 if (pnew == NULL) 334 panic("timeout table overflow"); 335 callfree = pnew->c_next; 336 pnew->c_arg = arg; 337 pnew->c_func = fun; 338 for (p1 = &calltodo; (p2 = p1->c_next) && p2->c_time < t; p1 = p2) 339 if (p2->c_time > 0) 340 t -= p2->c_time; 341 p1->c_next = pnew; 342 pnew->c_next = p2; 343 pnew->c_time = t; 344 if (p2) 345 p2->c_time -= t; 346 splx(s); 347 } 348 349 /* 350 * untimeout is called to remove a function timeout call 351 * from the callout structure. 352 */ 353 untimeout(fun, arg) 354 int (*fun)(); 355 caddr_t arg; 356 { 357 register struct callout *p1, *p2; 358 register int s; 359 360 s = spl7(); 361 for (p1 = &calltodo; (p2 = p1->c_next) != 0; p1 = p2) { 362 if (p2->c_func == fun && p2->c_arg == arg) { 363 if (p2->c_next && p2->c_time > 0) 364 p2->c_next->c_time += p2->c_time; 365 p1->c_next = p2->c_next; 366 p2->c_next = callfree; 367 callfree = p2; 368 break; 369 } 370 } 371 splx(s); 372 } 373 374 /* 375 * Compute number of hz until specified time. 376 * Used to compute third argument to timeout() from an 377 * absolute time. 378 */ 379 hzto(tv) 380 struct timeval *tv; 381 { 382 register long ticks; 383 register long sec; 384 int s = spl7(); 385 386 /* 387 * If number of milliseconds will fit in 32 bit arithmetic, 388 * then compute number of milliseconds to time and scale to 389 * ticks. Otherwise just compute number of hz in time, rounding 390 * times greater than representible to maximum value. 391 * 392 * Delta times less than 25 days can be computed ``exactly''. 393 * Maximum value for any timeout in 10ms ticks is 250 days. 394 */ 395 sec = tv->tv_sec - time.tv_sec; 396 if (sec <= 0x7fffffff / 1000 - 1000) 397 ticks = ((tv->tv_sec - time.tv_sec) * 1000 + 398 (tv->tv_usec - time.tv_usec) / 1000) / (tick / 1000); 399 else if (sec <= 0x7fffffff / hz) 400 ticks = sec * hz; 401 else 402 ticks = 0x7fffffff; 403 splx(s); 404 return (ticks); 405 } 406 407 profil() 408 { 409 register struct a { 410 short *bufbase; 411 unsigned bufsize; 412 unsigned pcoffset; 413 unsigned pcscale; 414 } *uap = (struct a *)u.u_ap; 415 register struct uprof *upp = &u.u_prof; 416 417 upp->pr_base = uap->bufbase; 418 upp->pr_size = uap->bufsize; 419 upp->pr_off = uap->pcoffset; 420 upp->pr_scale = uap->pcscale; 421 } 422 423 opause() 424 { 425 426 for (;;) 427 sleep((caddr_t)&u, PSLEP); 428 } 429