1 /* 2 * Copyright (c) 1982, 1986 Regents of the University of California. 3 * All rights reserved. The Berkeley software License Agreement 4 * specifies the terms and conditions for redistribution. 5 * 6 * @(#)kern_clock.c 7.10 (Berkeley) 06/30/90 7 */ 8 9 #include "param.h" 10 #include "systm.h" 11 #include "dkstat.h" 12 #include "callout.h" 13 #include "user.h" 14 #include "kernel.h" 15 #include "proc.h" 16 #include "vm.h" 17 #include "text.h" 18 19 #include "machine/reg.h" 20 #include "machine/psl.h" 21 22 #if defined(vax) || defined(tahoe) 23 #include "machine/mtpr.h" 24 #include "machine/clock.h" 25 #endif 26 #if defined(hp300) 27 #include "machine/mtpr.h" 28 #endif 29 #ifdef i386 30 #include "machine/frame.h" 31 #include "machine/segments.h" 32 #endif 33 34 #ifdef GPROF 35 #include "gprof.h" 36 #endif 37 38 /* 39 * Clock handling routines. 40 * 41 * This code is written to operate with two timers which run 42 * independently of each other. The main clock, running at hz 43 * times per second, is used to do scheduling and timeout calculations. 44 * The second timer does resource utilization estimation statistically 45 * based on the state of the machine phz times a second. Both functions 46 * can be performed by a single clock (ie hz == phz), however the 47 * statistics will be much more prone to errors. Ideally a machine 48 * would have separate clocks measuring time spent in user state, system 49 * state, interrupt state, and idle state. These clocks would allow a non- 50 * approximate measure of resource utilization. 51 */ 52 53 /* 54 * TODO: 55 * time of day, system/user timing, timeouts, profiling on separate timers 56 * allocate more timeout table slots when table overflows. 57 */ 58 59 /* 60 * Bump a timeval by a small number of usec's. 61 */ 62 #define BUMPTIME(t, usec) { \ 63 register struct timeval *tp = (t); \ 64 \ 65 tp->tv_usec += (usec); \ 66 if (tp->tv_usec >= 1000000) { \ 67 tp->tv_usec -= 1000000; \ 68 tp->tv_sec++; \ 69 } \ 70 } 71 72 /* 73 * The hz hardware interval timer. 74 * We update the events relating to real time. 75 * If this timer is also being used to gather statistics, 76 * we run through the statistics gathering routine as well. 77 */ 78 /*ARGSUSED*/ 79 #ifndef i386 80 hardclock(pc, ps) 81 caddr_t pc; 82 int ps; 83 #else 84 hardclock(frame) 85 struct intrframe frame; 86 #define pc frame.if_eip 87 #endif 88 { 89 register struct callout *p1; 90 register struct proc *p = u.u_procp; 91 register int s; 92 int needsoft = 0; 93 extern int tickdelta; 94 extern long timedelta; 95 96 /* 97 * Update real-time timeout queue. 98 * At front of queue are some number of events which are ``due''. 99 * The time to these is <= 0 and if negative represents the 100 * number of ticks which have passed since it was supposed to happen. 101 * The rest of the q elements (times > 0) are events yet to happen, 102 * where the time for each is given as a delta from the previous. 103 * Decrementing just the first of these serves to decrement the time 104 * to all events. 105 */ 106 p1 = calltodo.c_next; 107 while (p1) { 108 if (--p1->c_time > 0) 109 break; 110 needsoft = 1; 111 if (p1->c_time == 0) 112 break; 113 p1 = p1->c_next; 114 } 115 116 /* 117 * Charge the time out based on the mode the cpu is in. 118 * Here again we fudge for the lack of proper interval timers 119 * assuming that the current state has been around at least 120 * one tick. 121 */ 122 #ifdef i386 123 if (ISPL(frame.if_cs) == SEL_UPL) { 124 #else 125 if (USERMODE(ps)) { 126 #endif 127 if (u.u_prof.pr_scale) 128 needsoft = 1; 129 /* 130 * CPU was in user state. Increment 131 * user time counter, and process process-virtual time 132 * interval timer. 133 */ 134 BUMPTIME(&p->p_utime, tick); 135 if (timerisset(&u.u_timer[ITIMER_VIRTUAL].it_value) && 136 itimerdecr(&u.u_timer[ITIMER_VIRTUAL], tick) == 0) 137 psignal(p, SIGVTALRM); 138 } else { 139 /* 140 * CPU was in system state. 141 */ 142 if (!noproc) 143 BUMPTIME(&p->p_stime, tick); 144 } 145 146 /* 147 * If the cpu is currently scheduled to a process, then 148 * charge it with resource utilization for a tick, updating 149 * statistics which run in (user+system) virtual time, 150 * such as the cpu time limit and profiling timers. 151 * This assumes that the current process has been running 152 * the entire last tick. 153 */ 154 if (noproc == 0) { 155 if ((p->p_utime.tv_sec+p->p_stime.tv_sec+1) > 156 u.u_rlimit[RLIMIT_CPU].rlim_cur) { 157 psignal(p, SIGXCPU); 158 if (u.u_rlimit[RLIMIT_CPU].rlim_cur < 159 u.u_rlimit[RLIMIT_CPU].rlim_max) 160 u.u_rlimit[RLIMIT_CPU].rlim_cur += 5; 161 } 162 if (timerisset(&u.u_timer[ITIMER_PROF].it_value) && 163 itimerdecr(&u.u_timer[ITIMER_PROF], tick) == 0) 164 psignal(p, SIGPROF); 165 s = p->p_rssize; 166 u.u_ru.ru_idrss += s; 167 #ifdef notdef 168 u.u_ru.ru_isrss += 0; /* XXX (haven't got this) */ 169 #endif 170 if (p->p_textp) { 171 register int xrss = p->p_textp->x_rssize; 172 173 s += xrss; 174 u.u_ru.ru_ixrss += xrss; 175 } 176 if (s > u.u_ru.ru_maxrss) 177 u.u_ru.ru_maxrss = s; 178 } 179 180 /* 181 * We adjust the priority of the current process. 182 * The priority of a process gets worse as it accumulates 183 * CPU time. The cpu usage estimator (p_cpu) is increased here 184 * and the formula for computing priorities (in kern_synch.c) 185 * will compute a different value each time the p_cpu increases 186 * by 4. The cpu usage estimator ramps up quite quickly when 187 * the process is running (linearly), and decays away exponentially, 188 * at a rate which is proportionally slower when the system is 189 * busy. The basic principal is that the system will 90% forget 190 * that a process used a lot of CPU time in 5*loadav seconds. 191 * This causes the system to favor processes which haven't run 192 * much recently, and to round-robin among other processes. 193 */ 194 if (!noproc) { 195 p->p_cpticks++; 196 if (++p->p_cpu == 0) 197 p->p_cpu--; 198 if ((p->p_cpu&3) == 0) { 199 (void) setpri(p); 200 if (p->p_pri >= PUSER) 201 p->p_pri = p->p_usrpri; 202 } 203 } 204 205 /* 206 * If the alternate clock has not made itself known then 207 * we must gather the statistics. 208 */ 209 if (phz == 0) 210 #ifdef i386 211 gatherstats(pc, ISPL(frame.if_cs), frame.if_ppl); 212 #else 213 gatherstats(pc, ps); 214 #endif 215 216 /* 217 * Increment the time-of-day, and schedule 218 * processing of the callouts at a very low cpu priority, 219 * so we don't keep the relatively high clock interrupt 220 * priority any longer than necessary. 221 */ 222 if (timedelta == 0) 223 BUMPTIME(&time, tick) 224 else { 225 register delta; 226 227 if (timedelta < 0) { 228 delta = tick - tickdelta; 229 timedelta += tickdelta; 230 } else { 231 delta = tick + tickdelta; 232 timedelta -= tickdelta; 233 } 234 BUMPTIME(&time, delta); 235 } 236 if (needsoft) { 237 #ifdef i386 238 if (frame.if_ppl == 0) { 239 #else 240 if (BASEPRI(ps)) { 241 #endif 242 /* 243 * Save the overhead of a software interrupt; 244 * it will happen as soon as we return, so do it now. 245 */ 246 (void) splsoftclock(); 247 #ifdef i386 248 softclock(frame); 249 #else 250 softclock(pc, ps); 251 #endif 252 } else 253 setsoftclock(); 254 } 255 } 256 257 int dk_ndrive = DK_NDRIVE; 258 /* 259 * Gather statistics on resource utilization. 260 * 261 * We make a gross assumption: that the system has been in the 262 * state it is in (user state, kernel state, interrupt state, 263 * or idle state) for the entire last time interval, and 264 * update statistics accordingly. 265 */ 266 /*ARGSUSED*/ 267 #ifdef i386 268 #undef pc 269 gatherstats(pc, ps, ppl) 270 #else 271 gatherstats(pc, ps) 272 #endif 273 caddr_t pc; 274 int ps; 275 { 276 register int cpstate, s; 277 278 /* 279 * Determine what state the cpu is in. 280 */ 281 #ifdef i386 282 if (ps == SEL_UPL) { 283 #else 284 if (USERMODE(ps)) { 285 #endif 286 /* 287 * CPU was in user state. 288 */ 289 if (u.u_procp->p_nice > NZERO) 290 cpstate = CP_NICE; 291 else 292 cpstate = CP_USER; 293 } else { 294 /* 295 * CPU was in system state. If profiling kernel 296 * increment a counter. If no process is running 297 * then this is a system tick if we were running 298 * at a non-zero IPL (in a driver). If a process is running, 299 * then we charge it with system time even if we were 300 * at a non-zero IPL, since the system often runs 301 * this way during processing of system calls. 302 * This is approximate, but the lack of true interval 303 * timers makes doing anything else difficult. 304 */ 305 cpstate = CP_SYS; 306 #if defined(i386) 307 if (noproc && ps == 0) 308 #else 309 if (noproc && BASEPRI(ps)) 310 #endif 311 cpstate = CP_IDLE; 312 #ifdef GPROF 313 s = pc - s_lowpc; 314 if (profiling < 2 && s < s_textsize) 315 kcount[s / (HISTFRACTION * sizeof (*kcount))]++; 316 #endif 317 } 318 /* 319 * We maintain statistics shown by user-level statistics 320 * programs: the amount of time in each cpu state, and 321 * the amount of time each of DK_NDRIVE ``drives'' is busy. 322 */ 323 cp_time[cpstate]++; 324 for (s = 0; s < DK_NDRIVE; s++) 325 if (dk_busy&(1<<s)) 326 dk_time[s]++; 327 } 328 329 /* 330 * Software priority level clock interrupt. 331 * Run periodic events from timeout queue. 332 */ 333 /*ARGSUSED*/ 334 #ifdef i386 335 softclock(frame) 336 struct intrframe frame; 337 #define pc frame.if_eip 338 #else 339 softclock(pc, ps) 340 caddr_t pc; 341 int ps; 342 #endif 343 { 344 345 for (;;) { 346 register struct callout *p1; 347 register caddr_t arg; 348 register int (*func)(); 349 register int a, s; 350 351 s = splhigh(); 352 if ((p1 = calltodo.c_next) == 0 || p1->c_time > 0) { 353 splx(s); 354 break; 355 } 356 arg = p1->c_arg; func = p1->c_func; a = p1->c_time; 357 calltodo.c_next = p1->c_next; 358 p1->c_next = callfree; 359 callfree = p1; 360 splx(s); 361 (*func)(arg, a); 362 } 363 /* 364 * If trapped user-mode and profiling, give it 365 * a profiling tick. 366 */ 367 #ifdef i386 368 if (ISPL(frame.if_cs) == SEL_UPL) { 369 #else 370 if (USERMODE(ps)) { 371 #endif 372 register struct proc *p = u.u_procp; 373 374 if (u.u_prof.pr_scale) { 375 p->p_flag |= SOWEUPC; 376 aston(); 377 } 378 /* 379 * Check to see if process has accumulated 380 * more than 10 minutes of user time. If so 381 * reduce priority to give others a chance. 382 */ 383 if (p->p_uid && p->p_nice == NZERO && 384 p->p_utime.tv_sec > 10 * 60) { 385 p->p_nice = NZERO+4; 386 (void) setpri(p); 387 p->p_pri = p->p_usrpri; 388 } 389 } 390 } 391 392 /* 393 * Arrange that (*fun)(arg) is called in t/hz seconds. 394 */ 395 timeout(fun, arg, t) 396 int (*fun)(); 397 caddr_t arg; 398 register int t; 399 { 400 register struct callout *p1, *p2, *pnew; 401 register int s = splhigh(); 402 403 if (t <= 0) 404 t = 1; 405 pnew = callfree; 406 if (pnew == NULL) 407 panic("timeout table overflow"); 408 callfree = pnew->c_next; 409 pnew->c_arg = arg; 410 pnew->c_func = fun; 411 for (p1 = &calltodo; (p2 = p1->c_next) && p2->c_time < t; p1 = p2) 412 if (p2->c_time > 0) 413 t -= p2->c_time; 414 p1->c_next = pnew; 415 pnew->c_next = p2; 416 pnew->c_time = t; 417 if (p2) 418 p2->c_time -= t; 419 splx(s); 420 } 421 422 /* 423 * untimeout is called to remove a function timeout call 424 * from the callout structure. 425 */ 426 untimeout(fun, arg) 427 int (*fun)(); 428 caddr_t arg; 429 { 430 register struct callout *p1, *p2; 431 register int s; 432 433 s = splhigh(); 434 for (p1 = &calltodo; (p2 = p1->c_next) != 0; p1 = p2) { 435 if (p2->c_func == fun && p2->c_arg == arg) { 436 if (p2->c_next && p2->c_time > 0) 437 p2->c_next->c_time += p2->c_time; 438 p1->c_next = p2->c_next; 439 p2->c_next = callfree; 440 callfree = p2; 441 break; 442 } 443 } 444 splx(s); 445 } 446 447 /* 448 * Compute number of hz until specified time. 449 * Used to compute third argument to timeout() from an 450 * absolute time. 451 */ 452 hzto(tv) 453 struct timeval *tv; 454 { 455 register long ticks; 456 register long sec; 457 int s = splhigh(); 458 459 /* 460 * If number of milliseconds will fit in 32 bit arithmetic, 461 * then compute number of milliseconds to time and scale to 462 * ticks. Otherwise just compute number of hz in time, rounding 463 * times greater than representible to maximum value. 464 * 465 * Delta times less than 25 days can be computed ``exactly''. 466 * Maximum value for any timeout in 10ms ticks is 250 days. 467 */ 468 sec = tv->tv_sec - time.tv_sec; 469 if (sec <= 0x7fffffff / 1000 - 1000) 470 ticks = ((tv->tv_sec - time.tv_sec) * 1000 + 471 (tv->tv_usec - time.tv_usec) / 1000) / (tick / 1000); 472 else if (sec <= 0x7fffffff / hz) 473 ticks = sec * hz; 474 else 475 ticks = 0x7fffffff; 476 splx(s); 477 return (ticks); 478 } 479 480 /* ARGSUSED */ 481 profil(p, uap, retval) 482 struct proc *p; 483 register struct args { 484 short *bufbase; 485 unsigned bufsize; 486 unsigned pcoffset; 487 unsigned pcscale; 488 } *uap; 489 int *retval; 490 { 491 register struct uprof *upp = &u.u_prof; 492 493 upp->pr_base = uap->bufbase; 494 upp->pr_size = uap->bufsize; 495 upp->pr_off = uap->pcoffset; 496 upp->pr_scale = uap->pcscale; 497 return (0); 498 } 499