1 /* 2 * Copyright (c) 1982, 1986 Regents of the University of California. 3 * All rights reserved. The Berkeley software License Agreement 4 * specifies the terms and conditions for redistribution. 5 * 6 * @(#)kern_clock.c 7.11 (Berkeley) 12/05/90 7 */ 8 9 #include "param.h" 10 #include "systm.h" 11 #include "dkstat.h" 12 #include "callout.h" 13 #include "user.h" 14 #include "kernel.h" 15 #include "proc.h" 16 17 #include "machine/reg.h" 18 #include "machine/psl.h" 19 20 #if defined(vax) || defined(tahoe) 21 #include "machine/mtpr.h" 22 #include "machine/clock.h" 23 #endif 24 #if defined(hp300) 25 #include "machine/mtpr.h" 26 #endif 27 #ifdef i386 28 #include "machine/frame.h" 29 #include "machine/segments.h" 30 #endif 31 32 #ifdef GPROF 33 #include "gprof.h" 34 #endif 35 36 /* 37 * Clock handling routines. 38 * 39 * This code is written to operate with two timers which run 40 * independently of each other. The main clock, running at hz 41 * times per second, is used to do scheduling and timeout calculations. 42 * The second timer does resource utilization estimation statistically 43 * based on the state of the machine phz times a second. Both functions 44 * can be performed by a single clock (ie hz == phz), however the 45 * statistics will be much more prone to errors. Ideally a machine 46 * would have separate clocks measuring time spent in user state, system 47 * state, interrupt state, and idle state. These clocks would allow a non- 48 * approximate measure of resource utilization. 49 */ 50 51 /* 52 * TODO: 53 * time of day, system/user timing, timeouts, profiling on separate timers 54 * allocate more timeout table slots when table overflows. 55 */ 56 57 /* 58 * Bump a timeval by a small number of usec's. 59 */ 60 #define BUMPTIME(t, usec) { \ 61 register struct timeval *tp = (t); \ 62 \ 63 tp->tv_usec += (usec); \ 64 if (tp->tv_usec >= 1000000) { \ 65 tp->tv_usec -= 1000000; \ 66 tp->tv_sec++; \ 67 } \ 68 } 69 70 /* 71 * The hz hardware interval timer. 72 * We update the events relating to real time. 73 * If this timer is also being used to gather statistics, 74 * we run through the statistics gathering routine as well. 75 */ 76 /*ARGSUSED*/ 77 #ifndef i386 78 hardclock(pc, ps) 79 caddr_t pc; 80 int ps; 81 #else 82 hardclock(frame) 83 struct intrframe frame; 84 #define pc frame.if_eip 85 #endif 86 { 87 register struct callout *p1; 88 register struct proc *p = u.u_procp; 89 register int s; 90 int needsoft = 0; 91 extern int tickdelta; 92 extern long timedelta; 93 94 /* 95 * Update real-time timeout queue. 96 * At front of queue are some number of events which are ``due''. 97 * The time to these is <= 0 and if negative represents the 98 * number of ticks which have passed since it was supposed to happen. 99 * The rest of the q elements (times > 0) are events yet to happen, 100 * where the time for each is given as a delta from the previous. 101 * Decrementing just the first of these serves to decrement the time 102 * to all events. 103 */ 104 p1 = calltodo.c_next; 105 while (p1) { 106 if (--p1->c_time > 0) 107 break; 108 needsoft = 1; 109 if (p1->c_time == 0) 110 break; 111 p1 = p1->c_next; 112 } 113 114 /* 115 * Charge the time out based on the mode the cpu is in. 116 * Here again we fudge for the lack of proper interval timers 117 * assuming that the current state has been around at least 118 * one tick. 119 */ 120 #ifdef i386 121 if (ISPL(frame.if_cs) == SEL_UPL) { 122 #else 123 if (USERMODE(ps)) { 124 #endif 125 if (u.u_prof.pr_scale) 126 needsoft = 1; 127 /* 128 * CPU was in user state. Increment 129 * user time counter, and process process-virtual time 130 * interval timer. 131 */ 132 BUMPTIME(&p->p_utime, tick); 133 if (timerisset(&u.u_timer[ITIMER_VIRTUAL].it_value) && 134 itimerdecr(&u.u_timer[ITIMER_VIRTUAL], tick) == 0) 135 psignal(p, SIGVTALRM); 136 } else { 137 /* 138 * CPU was in system state. 139 */ 140 if (!noproc) 141 BUMPTIME(&p->p_stime, tick); 142 } 143 144 /* 145 * If the cpu is currently scheduled to a process, then 146 * charge it with resource utilization for a tick, updating 147 * statistics which run in (user+system) virtual time, 148 * such as the cpu time limit and profiling timers. 149 * This assumes that the current process has been running 150 * the entire last tick. 151 */ 152 if (noproc == 0) { 153 if ((p->p_utime.tv_sec+p->p_stime.tv_sec+1) > 154 u.u_rlimit[RLIMIT_CPU].rlim_cur) { 155 psignal(p, SIGXCPU); 156 if (u.u_rlimit[RLIMIT_CPU].rlim_cur < 157 u.u_rlimit[RLIMIT_CPU].rlim_max) 158 u.u_rlimit[RLIMIT_CPU].rlim_cur += 5; 159 } 160 if (timerisset(&u.u_timer[ITIMER_PROF].it_value) && 161 itimerdecr(&u.u_timer[ITIMER_PROF], tick) == 0) 162 psignal(p, SIGPROF); 163 } 164 165 /* 166 * We adjust the priority of the current process. 167 * The priority of a process gets worse as it accumulates 168 * CPU time. The cpu usage estimator (p_cpu) is increased here 169 * and the formula for computing priorities (in kern_synch.c) 170 * will compute a different value each time the p_cpu increases 171 * by 4. The cpu usage estimator ramps up quite quickly when 172 * the process is running (linearly), and decays away exponentially, 173 * at a rate which is proportionally slower when the system is 174 * busy. The basic principal is that the system will 90% forget 175 * that a process used a lot of CPU time in 5*loadav seconds. 176 * This causes the system to favor processes which haven't run 177 * much recently, and to round-robin among other processes. 178 */ 179 if (!noproc) { 180 p->p_cpticks++; 181 if (++p->p_cpu == 0) 182 p->p_cpu--; 183 if ((p->p_cpu&3) == 0) { 184 (void) setpri(p); 185 if (p->p_pri >= PUSER) 186 p->p_pri = p->p_usrpri; 187 } 188 } 189 190 /* 191 * If the alternate clock has not made itself known then 192 * we must gather the statistics. 193 */ 194 if (phz == 0) 195 #ifdef i386 196 gatherstats(pc, ISPL(frame.if_cs), frame.if_ppl); 197 #else 198 gatherstats(pc, ps); 199 #endif 200 201 /* 202 * Increment the time-of-day, and schedule 203 * processing of the callouts at a very low cpu priority, 204 * so we don't keep the relatively high clock interrupt 205 * priority any longer than necessary. 206 */ 207 if (timedelta == 0) 208 BUMPTIME(&time, tick) 209 else { 210 register delta; 211 212 if (timedelta < 0) { 213 delta = tick - tickdelta; 214 timedelta += tickdelta; 215 } else { 216 delta = tick + tickdelta; 217 timedelta -= tickdelta; 218 } 219 BUMPTIME(&time, delta); 220 } 221 if (needsoft) { 222 #ifdef i386 223 if (frame.if_ppl == 0) { 224 #else 225 if (BASEPRI(ps)) { 226 #endif 227 /* 228 * Save the overhead of a software interrupt; 229 * it will happen as soon as we return, so do it now. 230 */ 231 (void) splsoftclock(); 232 #ifdef i386 233 softclock(frame); 234 #else 235 softclock(pc, ps); 236 #endif 237 } else 238 setsoftclock(); 239 } 240 } 241 242 int dk_ndrive = DK_NDRIVE; 243 /* 244 * Gather statistics on resource utilization. 245 * 246 * We make a gross assumption: that the system has been in the 247 * state it is in (user state, kernel state, interrupt state, 248 * or idle state) for the entire last time interval, and 249 * update statistics accordingly. 250 */ 251 /*ARGSUSED*/ 252 #ifdef i386 253 #undef pc 254 gatherstats(pc, ps, ppl) 255 #else 256 gatherstats(pc, ps) 257 #endif 258 caddr_t pc; 259 int ps; 260 { 261 register int cpstate, s; 262 263 /* 264 * Determine what state the cpu is in. 265 */ 266 #ifdef i386 267 if (ps == SEL_UPL) { 268 #else 269 if (USERMODE(ps)) { 270 #endif 271 /* 272 * CPU was in user state. 273 */ 274 if (u.u_procp->p_nice > NZERO) 275 cpstate = CP_NICE; 276 else 277 cpstate = CP_USER; 278 } else { 279 /* 280 * CPU was in system state. If profiling kernel 281 * increment a counter. If no process is running 282 * then this is a system tick if we were running 283 * at a non-zero IPL (in a driver). If a process is running, 284 * then we charge it with system time even if we were 285 * at a non-zero IPL, since the system often runs 286 * this way during processing of system calls. 287 * This is approximate, but the lack of true interval 288 * timers makes doing anything else difficult. 289 */ 290 cpstate = CP_SYS; 291 #if defined(i386) 292 if (noproc && ps == 0) 293 #else 294 if (noproc && BASEPRI(ps)) 295 #endif 296 cpstate = CP_IDLE; 297 #ifdef GPROF 298 s = pc - s_lowpc; 299 if (profiling < 2 && s < s_textsize) 300 kcount[s / (HISTFRACTION * sizeof (*kcount))]++; 301 #endif 302 } 303 /* 304 * We maintain statistics shown by user-level statistics 305 * programs: the amount of time in each cpu state, and 306 * the amount of time each of DK_NDRIVE ``drives'' is busy. 307 */ 308 cp_time[cpstate]++; 309 for (s = 0; s < DK_NDRIVE; s++) 310 if (dk_busy&(1<<s)) 311 dk_time[s]++; 312 } 313 314 /* 315 * Software priority level clock interrupt. 316 * Run periodic events from timeout queue. 317 */ 318 /*ARGSUSED*/ 319 #ifdef i386 320 softclock(frame) 321 struct intrframe frame; 322 #define pc frame.if_eip 323 #else 324 softclock(pc, ps) 325 caddr_t pc; 326 int ps; 327 #endif 328 { 329 330 for (;;) { 331 register struct callout *p1; 332 register caddr_t arg; 333 register int (*func)(); 334 register int a, s; 335 336 s = splhigh(); 337 if ((p1 = calltodo.c_next) == 0 || p1->c_time > 0) { 338 splx(s); 339 break; 340 } 341 arg = p1->c_arg; func = p1->c_func; a = p1->c_time; 342 calltodo.c_next = p1->c_next; 343 p1->c_next = callfree; 344 callfree = p1; 345 splx(s); 346 (*func)(arg, a); 347 } 348 /* 349 * If trapped user-mode and profiling, give it 350 * a profiling tick. 351 */ 352 #ifdef i386 353 if (ISPL(frame.if_cs) == SEL_UPL) { 354 #else 355 if (USERMODE(ps)) { 356 #endif 357 register struct proc *p = u.u_procp; 358 359 if (u.u_prof.pr_scale) { 360 p->p_flag |= SOWEUPC; 361 aston(); 362 } 363 /* 364 * Check to see if process has accumulated 365 * more than 10 minutes of user time. If so 366 * reduce priority to give others a chance. 367 */ 368 if (p->p_uid && p->p_nice == NZERO && 369 p->p_utime.tv_sec > 10 * 60) { 370 p->p_nice = NZERO+4; 371 (void) setpri(p); 372 p->p_pri = p->p_usrpri; 373 } 374 } 375 } 376 377 /* 378 * Arrange that (*fun)(arg) is called in t/hz seconds. 379 */ 380 timeout(fun, arg, t) 381 int (*fun)(); 382 caddr_t arg; 383 register int t; 384 { 385 register struct callout *p1, *p2, *pnew; 386 register int s = splhigh(); 387 388 if (t <= 0) 389 t = 1; 390 pnew = callfree; 391 if (pnew == NULL) 392 panic("timeout table overflow"); 393 callfree = pnew->c_next; 394 pnew->c_arg = arg; 395 pnew->c_func = fun; 396 for (p1 = &calltodo; (p2 = p1->c_next) && p2->c_time < t; p1 = p2) 397 if (p2->c_time > 0) 398 t -= p2->c_time; 399 p1->c_next = pnew; 400 pnew->c_next = p2; 401 pnew->c_time = t; 402 if (p2) 403 p2->c_time -= t; 404 splx(s); 405 } 406 407 /* 408 * untimeout is called to remove a function timeout call 409 * from the callout structure. 410 */ 411 untimeout(fun, arg) 412 int (*fun)(); 413 caddr_t arg; 414 { 415 register struct callout *p1, *p2; 416 register int s; 417 418 s = splhigh(); 419 for (p1 = &calltodo; (p2 = p1->c_next) != 0; p1 = p2) { 420 if (p2->c_func == fun && p2->c_arg == arg) { 421 if (p2->c_next && p2->c_time > 0) 422 p2->c_next->c_time += p2->c_time; 423 p1->c_next = p2->c_next; 424 p2->c_next = callfree; 425 callfree = p2; 426 break; 427 } 428 } 429 splx(s); 430 } 431 432 /* 433 * Compute number of hz until specified time. 434 * Used to compute third argument to timeout() from an 435 * absolute time. 436 */ 437 hzto(tv) 438 struct timeval *tv; 439 { 440 register long ticks; 441 register long sec; 442 int s = splhigh(); 443 444 /* 445 * If number of milliseconds will fit in 32 bit arithmetic, 446 * then compute number of milliseconds to time and scale to 447 * ticks. Otherwise just compute number of hz in time, rounding 448 * times greater than representible to maximum value. 449 * 450 * Delta times less than 25 days can be computed ``exactly''. 451 * Maximum value for any timeout in 10ms ticks is 250 days. 452 */ 453 sec = tv->tv_sec - time.tv_sec; 454 if (sec <= 0x7fffffff / 1000 - 1000) 455 ticks = ((tv->tv_sec - time.tv_sec) * 1000 + 456 (tv->tv_usec - time.tv_usec) / 1000) / (tick / 1000); 457 else if (sec <= 0x7fffffff / hz) 458 ticks = sec * hz; 459 else 460 ticks = 0x7fffffff; 461 splx(s); 462 return (ticks); 463 } 464 465 /* ARGSUSED */ 466 profil(p, uap, retval) 467 struct proc *p; 468 register struct args { 469 short *bufbase; 470 unsigned bufsize; 471 unsigned pcoffset; 472 unsigned pcscale; 473 } *uap; 474 int *retval; 475 { 476 register struct uprof *upp = &u.u_prof; 477 478 upp->pr_base = uap->bufbase; 479 upp->pr_size = uap->bufsize; 480 upp->pr_off = uap->pcoffset; 481 upp->pr_scale = uap->pcscale; 482 return (0); 483 } 484