1 /*- 2 * Copyright (c) 1982, 1986, 1991 The Regents of the University of California. 3 * All rights reserved. 4 * 5 * %sccs.include.redist.c% 6 * 7 * @(#)kern_clock.c 7.22 (Berkeley) 07/08/92 8 */ 9 10 #include "param.h" 11 #include "systm.h" 12 #include "dkstat.h" 13 #include "callout.h" 14 #include "kernel.h" 15 #include "proc.h" 16 #include "resourcevar.h" 17 18 #include "machine/cpu.h" 19 20 #ifdef GPROF 21 #include "gmon.h" 22 extern u_short *kcount; 23 #endif 24 25 /* 26 * Clock handling routines. 27 * 28 * This code is written to operate with two timers that run independently of 29 * each other. The main clock, running hz times per second, is used to keep 30 * track of real time. The second timer handles kernel and user profiling, 31 * and does resource use estimation. If the second timer is programmable, 32 * it is randomized to avoid aliasing between the two clocks. For example, 33 * the randomization prevents an adversary from always giving up the cpu 34 * just before its quantum expires. Otherwise, it would never accumulate 35 * cpu ticks. The mean frequency of the second timer is stathz. 36 * 37 * If no second timer exists, stathz will be zero; in this case we drive 38 * profiling and statistics off the main clock. This WILL NOT be accurate; 39 * do not do it unless absolutely necessary. 40 * 41 * The statistics clock may (or may not) be run at a higher rate while 42 * profiling. This profile clock runs at profhz. We require that profhz 43 * be an integral multiple of stathz. 44 * 45 * If the statistics clock is running fast, it must be divided by the ratio 46 * profhz/stathz for statistics. (For profiling, every tick counts.) 47 */ 48 49 /* 50 * TODO: 51 * allocate more timeout table slots when table overflows. 52 */ 53 54 /* 55 * Bump a timeval by a small number of usec's. 56 */ 57 #define BUMPTIME(t, usec) { \ 58 register volatile struct timeval *tp = (t); \ 59 register long us; \ 60 \ 61 tp->tv_usec = us = tp->tv_usec + (usec); \ 62 if (us >= 1000000) { \ 63 tp->tv_usec = us - 1000000; \ 64 tp->tv_sec++; \ 65 } \ 66 } 67 68 int stathz; 69 int profhz; 70 int profprocs; 71 static int psratio, psdiv, pscnt; /* prof => stat divider */ 72 73 volatile struct timeval time; 74 volatile struct timeval mono_time; 75 76 /* 77 * Initialize clock frequencies and start both clocks running. 78 */ 79 void 80 initclocks() 81 { 82 register int i; 83 84 /* 85 * Set divisors to 1 (normal case) and let the machine-specific 86 * code do its bit. 87 */ 88 psdiv = pscnt = 1; 89 cpu_initclocks(); 90 91 /* 92 * Compute profhz/stathz, and fix profhz if needed. 93 */ 94 i = stathz ? stathz : hz; 95 if (profhz == 0) 96 profhz = i; 97 psratio = profhz / i; 98 } 99 100 /* 101 * The real-time timer, interrupting hz times per second. 102 */ 103 void 104 hardclock(frame) 105 register struct clockframe *frame; 106 { 107 register struct callout *p1; 108 register struct proc *p; 109 register int needsoft; 110 extern int tickdelta; 111 extern long timedelta; 112 113 /* 114 * Update real-time timeout queue. 115 * At front of queue are some number of events which are ``due''. 116 * The time to these is <= 0 and if negative represents the 117 * number of ticks which have passed since it was supposed to happen. 118 * The rest of the q elements (times > 0) are events yet to happen, 119 * where the time for each is given as a delta from the previous. 120 * Decrementing just the first of these serves to decrement the time 121 * to all events. 122 */ 123 needsoft = 0; 124 for (p1 = calltodo.c_next; p1 != NULL; p1 = p1->c_next) { 125 if (--p1->c_time > 0) 126 break; 127 needsoft = 1; 128 if (p1->c_time == 0) 129 break; 130 } 131 132 p = curproc; 133 if (p) { 134 register struct pstats *pstats; 135 136 /* 137 * Run current process's virtual and profile time, as needed. 138 */ 139 pstats = p->p_stats; 140 if (CLKF_USERMODE(frame) && 141 timerisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value) && 142 itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL], tick) == 0) 143 psignal(p, SIGVTALRM); 144 if (timerisset(&pstats->p_timer[ITIMER_PROF].it_value) && 145 itimerdecr(&pstats->p_timer[ITIMER_PROF], tick) == 0) 146 psignal(p, SIGPROF); 147 } 148 149 /* 150 * If no separate statistics clock is available, run it from here. 151 */ 152 if (stathz == 0) 153 statclock(frame); 154 155 /* 156 * Increment the time-of-day. 157 */ 158 if (timedelta == 0) { 159 BUMPTIME(&time, tick); 160 BUMPTIME(&mono_time, tick); 161 } else { 162 register int delta; 163 164 if (timedelta < 0) { 165 delta = tick - tickdelta; 166 timedelta += tickdelta; 167 } else { 168 delta = tick + tickdelta; 169 timedelta -= tickdelta; 170 } 171 BUMPTIME(&time, delta); 172 BUMPTIME(&mono_time, delta); 173 } 174 175 /* 176 * Process callouts at a very low cpu priority, so we don't keep the 177 * relatively high clock interrupt priority any longer than necessary. 178 */ 179 if (needsoft) { 180 if (CLKF_BASEPRI(frame)) { 181 /* 182 * Save the overhead of a software interrupt; 183 * it will happen as soon as we return, so do it now. 184 */ 185 (void)splsoftclock(); 186 softclock(); 187 } else 188 setsoftclock(); 189 } 190 } 191 192 /* 193 * Software (low priority) clock interrupt. 194 * Run periodic events from timeout queue. 195 */ 196 /*ARGSUSED*/ 197 void 198 softclock() 199 { 200 register struct callout *c; 201 register void *arg; 202 register void (*func) __P((void *)); 203 register int s; 204 205 s = splhigh(); 206 while ((c = calltodo.c_next) != NULL && c->c_time <= 0) { 207 func = c->c_func; 208 arg = c->c_arg; 209 calltodo.c_next = c->c_next; 210 c->c_next = callfree; 211 callfree = c; 212 splx(s); 213 (*func)(arg); 214 (void) splhigh(); 215 } 216 splx(s); 217 } 218 219 /* 220 * Arrange that (*func)(arg) is called in t/hz seconds. 221 */ 222 void 223 timeout(func, arg, t) 224 void (*func) __P((void *)); 225 void *arg; 226 register int t; 227 { 228 register struct callout *p1, *p2, *pnew; 229 register int s; 230 231 s = splhigh(); 232 if (t <= 0) 233 t = 1; 234 pnew = callfree; 235 if (pnew == NULL) 236 panic("timeout table overflow"); 237 callfree = pnew->c_next; 238 pnew->c_arg = arg; 239 pnew->c_func = func; 240 for (p1 = &calltodo; (p2 = p1->c_next) && p2->c_time < t; p1 = p2) 241 if (p2->c_time > 0) 242 t -= p2->c_time; 243 p1->c_next = pnew; 244 pnew->c_next = p2; 245 pnew->c_time = t; 246 if (p2) 247 p2->c_time -= t; 248 splx(s); 249 } 250 251 /* 252 * untimeout is called to remove a function timeout call 253 * from the callout structure. 254 */ 255 void 256 untimeout(func, arg) 257 void (*func) __P((void *)); 258 void *arg; 259 { 260 register struct callout *p1, *p2; 261 register int s; 262 263 s = splhigh(); 264 for (p1 = &calltodo; (p2 = p1->c_next) != NULL; p1 = p2) { 265 if (p2->c_func == func && p2->c_arg == arg) { 266 if (p2->c_next && p2->c_time > 0) 267 p2->c_next->c_time += p2->c_time; 268 p1->c_next = p2->c_next; 269 p2->c_next = callfree; 270 callfree = p2; 271 break; 272 } 273 } 274 splx(s); 275 } 276 277 /* 278 * Compute number of hz until specified time. 279 * Used to compute third argument to timeout() from an 280 * absolute time. 281 */ 282 int 283 hzto(tv) 284 struct timeval *tv; 285 { 286 register long ticks, sec; 287 int s; 288 289 /* 290 * If number of milliseconds will fit in 32 bit arithmetic, 291 * then compute number of milliseconds to time and scale to 292 * ticks. Otherwise just compute number of hz in time, rounding 293 * times greater than representible to maximum value. 294 * 295 * Delta times less than 25 days can be computed ``exactly''. 296 * Maximum value for any timeout in 10ms ticks is 250 days. 297 */ 298 s = splhigh(); 299 sec = tv->tv_sec - time.tv_sec; 300 if (sec <= 0x7fffffff / 1000 - 1000) 301 ticks = ((tv->tv_sec - time.tv_sec) * 1000 + 302 (tv->tv_usec - time.tv_usec) / 1000) / (tick / 1000); 303 else if (sec <= 0x7fffffff / hz) 304 ticks = sec * hz; 305 else 306 ticks = 0x7fffffff; 307 splx(s); 308 return (ticks); 309 } 310 311 /* 312 * Start profiling on a process. 313 * 314 * Kernel profiling passes proc0 which never exits and hence 315 * keeps the profile clock running constantly. 316 */ 317 void 318 startprofclock(p) 319 register struct proc *p; 320 { 321 int s; 322 323 if ((p->p_flag & SPROFIL) == 0) { 324 p->p_flag |= SPROFIL; 325 if (++profprocs == 1 && stathz != 0) { 326 s = splstatclock(); 327 psdiv = pscnt = psratio; 328 setstatclockrate(profhz); 329 splx(s); 330 } 331 } 332 } 333 334 /* 335 * Stop profiling on a process. 336 */ 337 void 338 stopprofclock(p) 339 register struct proc *p; 340 { 341 int s; 342 343 if (p->p_flag & SPROFIL) { 344 p->p_flag &= ~SPROFIL; 345 if (--profprocs == 0 && stathz != 0) { 346 s = splstatclock(); 347 psdiv = pscnt = 1; 348 setstatclockrate(stathz); 349 splx(s); 350 } 351 } 352 } 353 354 int dk_ndrive = DK_NDRIVE; 355 356 /* 357 * Statistics clock. Grab profile sample, and if divider reaches 0, 358 * do process and kernel statistics. 359 */ 360 void 361 statclock(frame) 362 register struct clockframe *frame; 363 { 364 #ifdef GPROF 365 register struct gmonparam *g; 366 #endif 367 register struct proc *p; 368 register int i; 369 370 if (CLKF_USERMODE(frame)) { 371 p = curproc; 372 if (p->p_flag & SPROFIL) 373 addupc_intr(p, CLKF_PC(frame), 1); 374 if (--pscnt > 0) 375 return; 376 /* 377 * Came from user mode; CPU was in user state. 378 * If this process is being profiled record the tick. 379 */ 380 p->p_uticks++; 381 if (p->p_nice > NZERO) 382 cp_time[CP_NICE]++; 383 else 384 cp_time[CP_USER]++; 385 } else { 386 #ifdef GPROF 387 /* 388 * Kernel statistics are just like addupc_intr, only easier. 389 */ 390 g = &_gmonparam; 391 if (g->state == GMON_PROF_ON) { 392 i = CLKF_PC(frame) - g->lowpc; 393 if (i < g->textsize) 394 kcount[s / (HISTFRACTION * sizeof(*kcount))]++; 395 } 396 #endif 397 if (--pscnt > 0) 398 return; 399 /* 400 * Came from kernel mode, so we were: 401 * - handling an interrupt, 402 * - doing syscall or trap work on behalf of the current 403 * user process, or 404 * - spinning in the idle loop. 405 * Whichever it is, charge the time as appropriate. 406 * Note that we charge interrupts to the current process, 407 * regardless of whether they are ``for'' that process, 408 * so that we know how much of its real time was spent 409 * in ``non-process'' (i.e., interrupt) work. 410 */ 411 p = curproc; 412 if (CLKF_INTR(frame)) { 413 if (p != NULL) 414 p->p_iticks++; 415 cp_time[CP_INTR]++; 416 } else if (p != NULL) { 417 p->p_sticks++; 418 cp_time[CP_SYS]++; 419 } else 420 cp_time[CP_IDLE]++; 421 } 422 pscnt = psdiv; 423 424 /* 425 * We maintain statistics shown by user-level statistics 426 * programs: the amount of time in each cpu state, and 427 * the amount of time each of DK_NDRIVE ``drives'' is busy. 428 * 429 * XXX should either run linked list of drives, or (better) 430 * grab timestamps in the start & done code. 431 */ 432 for (i = 0; i < DK_NDRIVE; i++) 433 if (dk_busy & (1 << i)) 434 dk_time[i]++; 435 436 /* 437 * We adjust the priority of the current process. 438 * The priority of a process gets worse as it accumulates 439 * CPU time. The cpu usage estimator (p_cpu) is increased here 440 * and the formula for computing priorities (in kern_synch.c) 441 * will compute a different value each time the p_cpu increases 442 * by 4. The cpu usage estimator ramps up quite quickly when 443 * the process is running (linearly), and decays away 444 * exponentially, at a rate which is proportionally slower 445 * when the system is busy. The basic principal is that the 446 * system will 90% forget that a process used a lot of CPU 447 * time in 5*loadav seconds. This causes the system to favor 448 * processes which haven't run much recently, and to 449 * round-robin among other processes. 450 */ 451 if (p != NULL) { 452 p->p_cpticks++; 453 if (++p->p_cpu == 0) 454 p->p_cpu--; 455 if ((p->p_cpu & 3) == 0) { 456 setpri(p); 457 if (p->p_pri >= PUSER) 458 p->p_pri = p->p_usrpri; 459 } 460 } 461 } 462 463 /* 464 * Return information about system clocks. 465 */ 466 /* ARGSUSED */ 467 kinfo_clockrate(op, where, acopysize, arg, aneeded) 468 int op; 469 register char *where; 470 int *acopysize, arg, *aneeded; 471 { 472 int buflen, error; 473 struct clockinfo clockinfo; 474 475 *aneeded = sizeof(clockinfo); 476 if (where == NULL) 477 return (0); 478 /* 479 * Check for enough buffering. 480 */ 481 buflen = *acopysize; 482 if (buflen < sizeof(clockinfo)) { 483 *acopysize = 0; 484 return (0); 485 } 486 /* 487 * Copyout clockinfo structure. 488 */ 489 clockinfo.hz = hz; 490 clockinfo.tick = tick; 491 clockinfo.profhz = profhz; 492 clockinfo.stathz = stathz ? stathz : hz; 493 if (error = copyout((caddr_t)&clockinfo, where, sizeof(clockinfo))) 494 return (error); 495 *acopysize = sizeof(clockinfo); 496 return (0); 497 } 498