1 /*- 2 * Copyright (c) 1982, 1986, 1991 The Regents of the University of California. 3 * All rights reserved. 4 * 5 * %sccs.include.redist.c% 6 * 7 * @(#)kern_clock.c 7.30 (Berkeley) 03/04/93 8 */ 9 10 #include <sys/param.h> 11 #include <sys/systm.h> 12 #include <sys/dkstat.h> 13 #include <sys/callout.h> 14 #include <sys/kernel.h> 15 #include <sys/proc.h> 16 #include <sys/resourcevar.h> 17 18 #include <machine/cpu.h> 19 20 #ifdef GPROF 21 #include <sys/gmon.h> 22 extern u_short *kcount; 23 #endif 24 25 /* 26 * Clock handling routines. 27 * 28 * This code is written to operate with two timers that run independently of 29 * each other. The main clock, running hz times per second, is used to keep 30 * track of real time. The second timer handles kernel and user profiling, 31 * and does resource use estimation. If the second timer is programmable, 32 * it is randomized to avoid aliasing between the two clocks. For example, 33 * the randomization prevents an adversary from always giving up the cpu 34 * just before its quantum expires. Otherwise, it would never accumulate 35 * cpu ticks. The mean frequency of the second timer is stathz. 36 * 37 * If no second timer exists, stathz will be zero; in this case we drive 38 * profiling and statistics off the main clock. This WILL NOT be accurate; 39 * do not do it unless absolutely necessary. 40 * 41 * The statistics clock may (or may not) be run at a higher rate while 42 * profiling. This profile clock runs at profhz. We require that profhz 43 * be an integral multiple of stathz. 44 * 45 * If the statistics clock is running fast, it must be divided by the ratio 46 * profhz/stathz for statistics. (For profiling, every tick counts.) 47 */ 48 49 /* 50 * TODO: 51 * allocate more timeout table slots when table overflows. 52 */ 53 54 /* 55 * Bump a timeval by a small number of usec's. 56 */ 57 #define BUMPTIME(t, usec) { \ 58 register volatile struct timeval *tp = (t); \ 59 register long us; \ 60 \ 61 tp->tv_usec = us = tp->tv_usec + (usec); \ 62 if (us >= 1000000) { \ 63 tp->tv_usec = us - 1000000; \ 64 tp->tv_sec++; \ 65 } \ 66 } 67 68 int stathz; 69 int profhz; 70 int profprocs; 71 int ticks; 72 static int psdiv, pscnt; /* prof => stat divider */ 73 int psratio; /* ratio: prof / stat */ 74 75 volatile struct timeval time; 76 volatile struct timeval mono_time; 77 78 /* 79 * Initialize clock frequencies and start both clocks running. 80 */ 81 void 82 initclocks() 83 { 84 register int i; 85 86 /* 87 * Set divisors to 1 (normal case) and let the machine-specific 88 * code do its bit. 89 */ 90 psdiv = pscnt = 1; 91 cpu_initclocks(); 92 93 /* 94 * Compute profhz/stathz, and fix profhz if needed. 95 */ 96 i = stathz ? stathz : hz; 97 if (profhz == 0) 98 profhz = i; 99 psratio = profhz / i; 100 } 101 102 /* 103 * The real-time timer, interrupting hz times per second. 104 */ 105 void 106 hardclock(frame) 107 register struct clockframe *frame; 108 { 109 register struct callout *p1; 110 register struct proc *p; 111 register int delta, needsoft; 112 extern int tickdelta; 113 extern long timedelta; 114 115 /* 116 * Update real-time timeout queue. 117 * At front of queue are some number of events which are ``due''. 118 * The time to these is <= 0 and if negative represents the 119 * number of ticks which have passed since it was supposed to happen. 120 * The rest of the q elements (times > 0) are events yet to happen, 121 * where the time for each is given as a delta from the previous. 122 * Decrementing just the first of these serves to decrement the time 123 * to all events. 124 */ 125 needsoft = 0; 126 for (p1 = calltodo.c_next; p1 != NULL; p1 = p1->c_next) { 127 if (--p1->c_time > 0) 128 break; 129 needsoft = 1; 130 if (p1->c_time == 0) 131 break; 132 } 133 134 p = curproc; 135 if (p) { 136 register struct pstats *pstats; 137 138 /* 139 * Run current process's virtual and profile time, as needed. 140 */ 141 pstats = p->p_stats; 142 if (CLKF_USERMODE(frame) && 143 timerisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value) && 144 itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL], tick) == 0) 145 psignal(p, SIGVTALRM); 146 if (timerisset(&pstats->p_timer[ITIMER_PROF].it_value) && 147 itimerdecr(&pstats->p_timer[ITIMER_PROF], tick) == 0) 148 psignal(p, SIGPROF); 149 } 150 151 /* 152 * If no separate statistics clock is available, run it from here. 153 */ 154 if (stathz == 0) 155 statclock(frame); 156 157 /* 158 * Increment the time-of-day. The increment is just ``tick'' unless 159 * we are still adjusting the clock; see adjtime(). 160 */ 161 ticks++; 162 if (timedelta == 0) 163 delta = tick; 164 else { 165 delta = tick + tickdelta; 166 timedelta -= tickdelta; 167 } 168 BUMPTIME(&time, delta); 169 BUMPTIME(&mono_time, delta); 170 171 /* 172 * Process callouts at a very low cpu priority, so we don't keep the 173 * relatively high clock interrupt priority any longer than necessary. 174 */ 175 if (needsoft) { 176 if (CLKF_BASEPRI(frame)) { 177 /* 178 * Save the overhead of a software interrupt; 179 * it will happen as soon as we return, so do it now. 180 */ 181 (void)splsoftclock(); 182 softclock(); 183 } else 184 setsoftclock(); 185 } 186 } 187 188 /* 189 * Software (low priority) clock interrupt. 190 * Run periodic events from timeout queue. 191 */ 192 /*ARGSUSED*/ 193 void 194 softclock() 195 { 196 register struct callout *c; 197 register void *arg; 198 register void (*func) __P((void *)); 199 register int s; 200 201 s = splhigh(); 202 while ((c = calltodo.c_next) != NULL && c->c_time <= 0) { 203 func = c->c_func; 204 arg = c->c_arg; 205 calltodo.c_next = c->c_next; 206 c->c_next = callfree; 207 callfree = c; 208 splx(s); 209 (*func)(arg); 210 (void) splhigh(); 211 } 212 splx(s); 213 } 214 215 /* 216 * Arrange that (*func)(arg) is called in t/hz seconds. 217 */ 218 void 219 timeout(func, arg, t) 220 void (*func) __P((void *)); 221 void *arg; 222 register int t; 223 { 224 register struct callout *p1, *p2, *pnew; 225 register int s; 226 227 s = splhigh(); 228 if (t <= 0) 229 t = 1; 230 pnew = callfree; 231 if (pnew == NULL) 232 panic("timeout table overflow"); 233 callfree = pnew->c_next; 234 pnew->c_arg = arg; 235 pnew->c_func = func; 236 for (p1 = &calltodo; (p2 = p1->c_next) && p2->c_time < t; p1 = p2) 237 if (p2->c_time > 0) 238 t -= p2->c_time; 239 p1->c_next = pnew; 240 pnew->c_next = p2; 241 pnew->c_time = t; 242 if (p2) 243 p2->c_time -= t; 244 splx(s); 245 } 246 247 /* 248 * untimeout is called to remove a function timeout call 249 * from the callout structure. 250 */ 251 void 252 untimeout(func, arg) 253 void (*func) __P((void *)); 254 void *arg; 255 { 256 register struct callout *p1, *p2; 257 register int s; 258 259 s = splhigh(); 260 for (p1 = &calltodo; (p2 = p1->c_next) != NULL; p1 = p2) { 261 if (p2->c_func == func && p2->c_arg == arg) { 262 if (p2->c_next && p2->c_time > 0) 263 p2->c_next->c_time += p2->c_time; 264 p1->c_next = p2->c_next; 265 p2->c_next = callfree; 266 callfree = p2; 267 break; 268 } 269 } 270 splx(s); 271 } 272 273 /* 274 * Compute number of hz until specified time. 275 * Used to compute third argument to timeout() from an 276 * absolute time. 277 */ 278 int 279 hzto(tv) 280 struct timeval *tv; 281 { 282 register long ticks, sec; 283 int s; 284 285 /* 286 * If number of milliseconds will fit in 32 bit arithmetic, 287 * then compute number of milliseconds to time and scale to 288 * ticks. Otherwise just compute number of hz in time, rounding 289 * times greater than representible to maximum value. 290 * 291 * Delta times less than 25 days can be computed ``exactly''. 292 * Maximum value for any timeout in 10ms ticks is 250 days. 293 */ 294 s = splhigh(); 295 sec = tv->tv_sec - time.tv_sec; 296 if (sec <= 0x7fffffff / 1000 - 1000) 297 ticks = ((tv->tv_sec - time.tv_sec) * 1000 + 298 (tv->tv_usec - time.tv_usec) / 1000) / (tick / 1000); 299 else if (sec <= 0x7fffffff / hz) 300 ticks = sec * hz; 301 else 302 ticks = 0x7fffffff; 303 splx(s); 304 return (ticks); 305 } 306 307 /* 308 * Start profiling on a process. 309 * 310 * Kernel profiling passes proc0 which never exits and hence 311 * keeps the profile clock running constantly. 312 */ 313 void 314 startprofclock(p) 315 register struct proc *p; 316 { 317 int s; 318 319 if ((p->p_flag & SPROFIL) == 0) { 320 p->p_flag |= SPROFIL; 321 if (++profprocs == 1 && stathz != 0) { 322 s = splstatclock(); 323 psdiv = pscnt = psratio; 324 setstatclockrate(profhz); 325 splx(s); 326 } 327 } 328 } 329 330 /* 331 * Stop profiling on a process. 332 */ 333 void 334 stopprofclock(p) 335 register struct proc *p; 336 { 337 int s; 338 339 if (p->p_flag & SPROFIL) { 340 p->p_flag &= ~SPROFIL; 341 if (--profprocs == 0 && stathz != 0) { 342 s = splstatclock(); 343 psdiv = pscnt = 1; 344 setstatclockrate(stathz); 345 splx(s); 346 } 347 } 348 } 349 350 int dk_ndrive = DK_NDRIVE; 351 352 /* 353 * Statistics clock. Grab profile sample, and if divider reaches 0, 354 * do process and kernel statistics. 355 */ 356 void 357 statclock(frame) 358 register struct clockframe *frame; 359 { 360 #ifdef GPROF 361 register struct gmonparam *g; 362 #endif 363 register struct proc *p; 364 register int i; 365 366 if (CLKF_USERMODE(frame)) { 367 p = curproc; 368 if (p->p_flag & SPROFIL) 369 addupc_intr(p, CLKF_PC(frame), 1); 370 if (--pscnt > 0) 371 return; 372 /* 373 * Came from user mode; CPU was in user state. 374 * If this process is being profiled record the tick. 375 */ 376 p->p_uticks++; 377 if (p->p_nice > NZERO) 378 cp_time[CP_NICE]++; 379 else 380 cp_time[CP_USER]++; 381 } else { 382 #ifdef GPROF 383 /* 384 * Kernel statistics are just like addupc_intr, only easier. 385 */ 386 g = &_gmonparam; 387 if (g->state == GMON_PROF_ON) { 388 i = CLKF_PC(frame) - g->lowpc; 389 if (i < g->textsize) 390 kcount[i / (HISTFRACTION * sizeof(*kcount))]++; 391 } 392 #endif 393 if (--pscnt > 0) 394 return; 395 /* 396 * Came from kernel mode, so we were: 397 * - handling an interrupt, 398 * - doing syscall or trap work on behalf of the current 399 * user process, or 400 * - spinning in the idle loop. 401 * Whichever it is, charge the time as appropriate. 402 * Note that we charge interrupts to the current process, 403 * regardless of whether they are ``for'' that process, 404 * so that we know how much of its real time was spent 405 * in ``non-process'' (i.e., interrupt) work. 406 */ 407 p = curproc; 408 if (CLKF_INTR(frame)) { 409 if (p != NULL) 410 p->p_iticks++; 411 cp_time[CP_INTR]++; 412 } else if (p != NULL) { 413 p->p_sticks++; 414 cp_time[CP_SYS]++; 415 } else 416 cp_time[CP_IDLE]++; 417 } 418 pscnt = psdiv; 419 420 /* 421 * We maintain statistics shown by user-level statistics 422 * programs: the amount of time in each cpu state, and 423 * the amount of time each of DK_NDRIVE ``drives'' is busy. 424 * 425 * XXX should either run linked list of drives, or (better) 426 * grab timestamps in the start & done code. 427 */ 428 for (i = 0; i < DK_NDRIVE; i++) 429 if (dk_busy & (1 << i)) 430 dk_time[i]++; 431 432 /* 433 * We adjust the priority of the current process. 434 * The priority of a process gets worse as it accumulates 435 * CPU time. The cpu usage estimator (p_cpu) is increased here 436 * and the formula for computing priorities (in kern_synch.c) 437 * will compute a different value each time the p_cpu increases 438 * by 4. The cpu usage estimator ramps up quite quickly when 439 * the process is running (linearly), and decays away 440 * exponentially, at a rate which is proportionally slower 441 * when the system is busy. The basic principal is that the 442 * system will 90% forget that a process used a lot of CPU 443 * time in 5*loadav seconds. This causes the system to favor 444 * processes which haven't run much recently, and to 445 * round-robin among other processes. 446 */ 447 if (p != NULL) { 448 p->p_cpticks++; 449 if (++p->p_cpu == 0) 450 p->p_cpu--; 451 if ((p->p_cpu & 3) == 0) { 452 setpri(p); 453 if (p->p_pri >= PUSER) 454 p->p_pri = p->p_usrpri; 455 } 456 } 457 } 458 459 /* 460 * Return information about system clocks. 461 */ 462 sysctl_clockrate(where, sizep) 463 register char *where; 464 size_t *sizep; 465 { 466 struct clockinfo clkinfo; 467 468 /* 469 * Construct clockinfo structure. 470 */ 471 clkinfo.hz = hz; 472 clkinfo.tick = tick; 473 clkinfo.profhz = profhz; 474 clkinfo.stathz = stathz ? stathz : hz; 475 return (sysctl_rdstruct(where, sizep, NULL, &clkinfo, sizeof(clkinfo))); 476 } 477