1 /*- 2 * Copyright (c) 1982, 1986, 1991, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * %sccs.include.redist.c% 6 * 7 * @(#)kern_clock.c 8.1 (Berkeley) 06/10/93 8 */ 9 10 #include <sys/param.h> 11 #include <sys/systm.h> 12 #include <sys/dkstat.h> 13 #include <sys/callout.h> 14 #include <sys/kernel.h> 15 #include <sys/proc.h> 16 #include <sys/resourcevar.h> 17 18 #include <machine/cpu.h> 19 20 #ifdef GPROF 21 #include <sys/gmon.h> 22 #endif 23 24 /* 25 * Clock handling routines. 26 * 27 * This code is written to operate with two timers that run independently of 28 * each other. The main clock, running hz times per second, is used to keep 29 * track of real time. The second timer handles kernel and user profiling, 30 * and does resource use estimation. If the second timer is programmable, 31 * it is randomized to avoid aliasing between the two clocks. For example, 32 * the randomization prevents an adversary from always giving up the cpu 33 * just before its quantum expires. Otherwise, it would never accumulate 34 * cpu ticks. The mean frequency of the second timer is stathz. 35 * 36 * If no second timer exists, stathz will be zero; in this case we drive 37 * profiling and statistics off the main clock. This WILL NOT be accurate; 38 * do not do it unless absolutely necessary. 39 * 40 * The statistics clock may (or may not) be run at a higher rate while 41 * profiling. This profile clock runs at profhz. We require that profhz 42 * be an integral multiple of stathz. 43 * 44 * If the statistics clock is running fast, it must be divided by the ratio 45 * profhz/stathz for statistics. (For profiling, every tick counts.) 46 */ 47 48 /* 49 * TODO: 50 * allocate more timeout table slots when table overflows. 51 */ 52 53 /* 54 * Bump a timeval by a small number of usec's. 55 */ 56 #define BUMPTIME(t, usec) { \ 57 register volatile struct timeval *tp = (t); \ 58 register long us; \ 59 \ 60 tp->tv_usec = us = tp->tv_usec + (usec); \ 61 if (us >= 1000000) { \ 62 tp->tv_usec = us - 1000000; \ 63 tp->tv_sec++; \ 64 } \ 65 } 66 67 int stathz; 68 int profhz; 69 int profprocs; 70 int ticks; 71 static int psdiv, pscnt; /* prof => stat divider */ 72 int psratio; /* ratio: prof / stat */ 73 74 volatile struct timeval time; 75 volatile struct timeval mono_time; 76 77 /* 78 * Initialize clock frequencies and start both clocks running. 79 */ 80 void 81 initclocks() 82 { 83 register int i; 84 85 /* 86 * Set divisors to 1 (normal case) and let the machine-specific 87 * code do its bit. 88 */ 89 psdiv = pscnt = 1; 90 cpu_initclocks(); 91 92 /* 93 * Compute profhz/stathz, and fix profhz if needed. 94 */ 95 i = stathz ? stathz : hz; 96 if (profhz == 0) 97 profhz = i; 98 psratio = profhz / i; 99 } 100 101 /* 102 * The real-time timer, interrupting hz times per second. 103 */ 104 void 105 hardclock(frame) 106 register struct clockframe *frame; 107 { 108 register struct callout *p1; 109 register struct proc *p; 110 register int delta, needsoft; 111 extern int tickdelta; 112 extern long timedelta; 113 114 /* 115 * Update real-time timeout queue. 116 * At front of queue are some number of events which are ``due''. 117 * The time to these is <= 0 and if negative represents the 118 * number of ticks which have passed since it was supposed to happen. 119 * The rest of the q elements (times > 0) are events yet to happen, 120 * where the time for each is given as a delta from the previous. 121 * Decrementing just the first of these serves to decrement the time 122 * to all events. 123 */ 124 needsoft = 0; 125 for (p1 = calltodo.c_next; p1 != NULL; p1 = p1->c_next) { 126 if (--p1->c_time > 0) 127 break; 128 needsoft = 1; 129 if (p1->c_time == 0) 130 break; 131 } 132 133 p = curproc; 134 if (p) { 135 register struct pstats *pstats; 136 137 /* 138 * Run current process's virtual and profile time, as needed. 139 */ 140 pstats = p->p_stats; 141 if (CLKF_USERMODE(frame) && 142 timerisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value) && 143 itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL], tick) == 0) 144 psignal(p, SIGVTALRM); 145 if (timerisset(&pstats->p_timer[ITIMER_PROF].it_value) && 146 itimerdecr(&pstats->p_timer[ITIMER_PROF], tick) == 0) 147 psignal(p, SIGPROF); 148 } 149 150 /* 151 * If no separate statistics clock is available, run it from here. 152 */ 153 if (stathz == 0) 154 statclock(frame); 155 156 /* 157 * Increment the time-of-day. The increment is just ``tick'' unless 158 * we are still adjusting the clock; see adjtime(). 159 */ 160 ticks++; 161 if (timedelta == 0) 162 delta = tick; 163 else { 164 delta = tick + tickdelta; 165 timedelta -= tickdelta; 166 } 167 BUMPTIME(&time, delta); 168 BUMPTIME(&mono_time, delta); 169 170 /* 171 * Process callouts at a very low cpu priority, so we don't keep the 172 * relatively high clock interrupt priority any longer than necessary. 173 */ 174 if (needsoft) { 175 if (CLKF_BASEPRI(frame)) { 176 /* 177 * Save the overhead of a software interrupt; 178 * it will happen as soon as we return, so do it now. 179 */ 180 (void)splsoftclock(); 181 softclock(); 182 } else 183 setsoftclock(); 184 } 185 } 186 187 /* 188 * Software (low priority) clock interrupt. 189 * Run periodic events from timeout queue. 190 */ 191 /*ARGSUSED*/ 192 void 193 softclock() 194 { 195 register struct callout *c; 196 register void *arg; 197 register void (*func) __P((void *)); 198 register int s; 199 200 s = splhigh(); 201 while ((c = calltodo.c_next) != NULL && c->c_time <= 0) { 202 func = c->c_func; 203 arg = c->c_arg; 204 calltodo.c_next = c->c_next; 205 c->c_next = callfree; 206 callfree = c; 207 splx(s); 208 (*func)(arg); 209 (void) splhigh(); 210 } 211 splx(s); 212 } 213 214 /* 215 * Arrange that (*func)(arg) is called in t/hz seconds. 216 */ 217 void 218 timeout(func, arg, t) 219 void (*func) __P((void *)); 220 void *arg; 221 register int t; 222 { 223 register struct callout *p1, *p2, *pnew; 224 register int s; 225 226 s = splhigh(); 227 if (t <= 0) 228 t = 1; 229 pnew = callfree; 230 if (pnew == NULL) 231 panic("timeout table overflow"); 232 callfree = pnew->c_next; 233 pnew->c_arg = arg; 234 pnew->c_func = func; 235 for (p1 = &calltodo; (p2 = p1->c_next) && p2->c_time < t; p1 = p2) 236 if (p2->c_time > 0) 237 t -= p2->c_time; 238 p1->c_next = pnew; 239 pnew->c_next = p2; 240 pnew->c_time = t; 241 if (p2) 242 p2->c_time -= t; 243 splx(s); 244 } 245 246 /* 247 * untimeout is called to remove a function timeout call 248 * from the callout structure. 249 */ 250 void 251 untimeout(func, arg) 252 void (*func) __P((void *)); 253 void *arg; 254 { 255 register struct callout *p1, *p2; 256 register int s; 257 258 s = splhigh(); 259 for (p1 = &calltodo; (p2 = p1->c_next) != NULL; p1 = p2) { 260 if (p2->c_func == func && p2->c_arg == arg) { 261 if (p2->c_next && p2->c_time > 0) 262 p2->c_next->c_time += p2->c_time; 263 p1->c_next = p2->c_next; 264 p2->c_next = callfree; 265 callfree = p2; 266 break; 267 } 268 } 269 splx(s); 270 } 271 272 /* 273 * Compute number of hz until specified time. 274 * Used to compute third argument to timeout() from an 275 * absolute time. 276 */ 277 int 278 hzto(tv) 279 struct timeval *tv; 280 { 281 register long ticks, sec; 282 int s; 283 284 /* 285 * If number of milliseconds will fit in 32 bit arithmetic, 286 * then compute number of milliseconds to time and scale to 287 * ticks. Otherwise just compute number of hz in time, rounding 288 * times greater than representible to maximum value. 289 * 290 * Delta times less than 25 days can be computed ``exactly''. 291 * Maximum value for any timeout in 10ms ticks is 250 days. 292 */ 293 s = splhigh(); 294 sec = tv->tv_sec - time.tv_sec; 295 if (sec <= 0x7fffffff / 1000 - 1000) 296 ticks = ((tv->tv_sec - time.tv_sec) * 1000 + 297 (tv->tv_usec - time.tv_usec) / 1000) / (tick / 1000); 298 else if (sec <= 0x7fffffff / hz) 299 ticks = sec * hz; 300 else 301 ticks = 0x7fffffff; 302 splx(s); 303 return (ticks); 304 } 305 306 /* 307 * Start profiling on a process. 308 * 309 * Kernel profiling passes proc0 which never exits and hence 310 * keeps the profile clock running constantly. 311 */ 312 void 313 startprofclock(p) 314 register struct proc *p; 315 { 316 int s; 317 318 if ((p->p_flag & SPROFIL) == 0) { 319 p->p_flag |= SPROFIL; 320 if (++profprocs == 1 && stathz != 0) { 321 s = splstatclock(); 322 psdiv = pscnt = psratio; 323 setstatclockrate(profhz); 324 splx(s); 325 } 326 } 327 } 328 329 /* 330 * Stop profiling on a process. 331 */ 332 void 333 stopprofclock(p) 334 register struct proc *p; 335 { 336 int s; 337 338 if (p->p_flag & SPROFIL) { 339 p->p_flag &= ~SPROFIL; 340 if (--profprocs == 0 && stathz != 0) { 341 s = splstatclock(); 342 psdiv = pscnt = 1; 343 setstatclockrate(stathz); 344 splx(s); 345 } 346 } 347 } 348 349 int dk_ndrive = DK_NDRIVE; 350 351 /* 352 * Statistics clock. Grab profile sample, and if divider reaches 0, 353 * do process and kernel statistics. 354 */ 355 void 356 statclock(frame) 357 register struct clockframe *frame; 358 { 359 #ifdef GPROF 360 register struct gmonparam *g; 361 #endif 362 register struct proc *p; 363 register int i; 364 365 if (CLKF_USERMODE(frame)) { 366 p = curproc; 367 if (p->p_flag & SPROFIL) 368 addupc_intr(p, CLKF_PC(frame), 1); 369 if (--pscnt > 0) 370 return; 371 /* 372 * Came from user mode; CPU was in user state. 373 * If this process is being profiled record the tick. 374 */ 375 p->p_uticks++; 376 if (p->p_nice > NZERO) 377 cp_time[CP_NICE]++; 378 else 379 cp_time[CP_USER]++; 380 } else { 381 #ifdef GPROF 382 /* 383 * Kernel statistics are just like addupc_intr, only easier. 384 */ 385 g = &_gmonparam; 386 if (g->state == GMON_PROF_ON) { 387 i = CLKF_PC(frame) - g->lowpc; 388 if (i < g->textsize) { 389 i /= HISTFRACTION * sizeof(*g->kcount); 390 g->kcount[i]++; 391 } 392 } 393 #endif 394 if (--pscnt > 0) 395 return; 396 /* 397 * Came from kernel mode, so we were: 398 * - handling an interrupt, 399 * - doing syscall or trap work on behalf of the current 400 * user process, or 401 * - spinning in the idle loop. 402 * Whichever it is, charge the time as appropriate. 403 * Note that we charge interrupts to the current process, 404 * regardless of whether they are ``for'' that process, 405 * so that we know how much of its real time was spent 406 * in ``non-process'' (i.e., interrupt) work. 407 */ 408 p = curproc; 409 if (CLKF_INTR(frame)) { 410 if (p != NULL) 411 p->p_iticks++; 412 cp_time[CP_INTR]++; 413 } else if (p != NULL) { 414 p->p_sticks++; 415 cp_time[CP_SYS]++; 416 } else 417 cp_time[CP_IDLE]++; 418 } 419 pscnt = psdiv; 420 421 /* 422 * We maintain statistics shown by user-level statistics 423 * programs: the amount of time in each cpu state, and 424 * the amount of time each of DK_NDRIVE ``drives'' is busy. 425 * 426 * XXX should either run linked list of drives, or (better) 427 * grab timestamps in the start & done code. 428 */ 429 for (i = 0; i < DK_NDRIVE; i++) 430 if (dk_busy & (1 << i)) 431 dk_time[i]++; 432 433 /* 434 * We adjust the priority of the current process. 435 * The priority of a process gets worse as it accumulates 436 * CPU time. The cpu usage estimator (p_cpu) is increased here 437 * and the formula for computing priorities (in kern_synch.c) 438 * will compute a different value each time the p_cpu increases 439 * by 4. The cpu usage estimator ramps up quite quickly when 440 * the process is running (linearly), and decays away 441 * exponentially, at a rate which is proportionally slower 442 * when the system is busy. The basic principal is that the 443 * system will 90% forget that a process used a lot of CPU 444 * time in 5*loadav seconds. This causes the system to favor 445 * processes which haven't run much recently, and to 446 * round-robin among other processes. 447 */ 448 if (p != NULL) { 449 p->p_cpticks++; 450 if (++p->p_cpu == 0) 451 p->p_cpu--; 452 if ((p->p_cpu & 3) == 0) { 453 setpri(p); 454 if (p->p_pri >= PUSER) 455 p->p_pri = p->p_usrpri; 456 } 457 } 458 } 459 460 /* 461 * Return information about system clocks. 462 */ 463 sysctl_clockrate(where, sizep) 464 register char *where; 465 size_t *sizep; 466 { 467 struct clockinfo clkinfo; 468 469 /* 470 * Construct clockinfo structure. 471 */ 472 clkinfo.hz = hz; 473 clkinfo.tick = tick; 474 clkinfo.profhz = profhz; 475 clkinfo.stathz = stathz ? stathz : hz; 476 return (sysctl_rdstruct(where, sizep, NULL, &clkinfo, sizeof(clkinfo))); 477 } 478