1 /*- 2 * Copyright (c) 1982, 1986, 1991, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * %sccs.include.redist.c% 6 * 7 * @(#)kern_clock.c 8.3 (Berkeley) 09/23/93 8 */ 9 10 #include <sys/param.h> 11 #include <sys/systm.h> 12 #include <sys/dkstat.h> 13 #include <sys/callout.h> 14 #include <sys/kernel.h> 15 #include <sys/proc.h> 16 #include <sys/resourcevar.h> 17 18 #include <machine/cpu.h> 19 20 #ifdef GPROF 21 #include <sys/gmon.h> 22 #endif 23 24 /* 25 * Clock handling routines. 26 * 27 * This code is written to operate with two timers that run independently of 28 * each other. The main clock, running hz times per second, is used to keep 29 * track of real time. The second timer handles kernel and user profiling, 30 * and does resource use estimation. If the second timer is programmable, 31 * it is randomized to avoid aliasing between the two clocks. For example, 32 * the randomization prevents an adversary from always giving up the cpu 33 * just before its quantum expires. Otherwise, it would never accumulate 34 * cpu ticks. The mean frequency of the second timer is stathz. 35 * 36 * If no second timer exists, stathz will be zero; in this case we drive 37 * profiling and statistics off the main clock. This WILL NOT be accurate; 38 * do not do it unless absolutely necessary. 39 * 40 * The statistics clock may (or may not) be run at a higher rate while 41 * profiling. This profile clock runs at profhz. We require that profhz 42 * be an integral multiple of stathz. 43 * 44 * If the statistics clock is running fast, it must be divided by the ratio 45 * profhz/stathz for statistics. (For profiling, every tick counts.) 46 */ 47 48 /* 49 * TODO: 50 * allocate more timeout table slots when table overflows. 51 */ 52 53 /* 54 * Bump a timeval by a small number of usec's. 55 */ 56 #define BUMPTIME(t, usec) { \ 57 register volatile struct timeval *tp = (t); \ 58 register long us; \ 59 \ 60 tp->tv_usec = us = tp->tv_usec + (usec); \ 61 if (us >= 1000000) { \ 62 tp->tv_usec = us - 1000000; \ 63 tp->tv_sec++; \ 64 } \ 65 } 66 67 int stathz; 68 int profhz; 69 int profprocs; 70 int ticks; 71 static int psdiv, pscnt; /* prof => stat divider */ 72 int psratio; /* ratio: prof / stat */ 73 74 volatile struct timeval time; 75 volatile struct timeval mono_time; 76 77 /* 78 * Initialize clock frequencies and start both clocks running. 79 */ 80 void 81 initclocks() 82 { 83 register int i; 84 85 /* 86 * Set divisors to 1 (normal case) and let the machine-specific 87 * code do its bit. 88 */ 89 psdiv = pscnt = 1; 90 cpu_initclocks(); 91 92 /* 93 * Compute profhz/stathz, and fix profhz if needed. 94 */ 95 i = stathz ? stathz : hz; 96 if (profhz == 0) 97 profhz = i; 98 psratio = profhz / i; 99 } 100 101 /* 102 * The real-time timer, interrupting hz times per second. 103 */ 104 void 105 hardclock(frame) 106 register struct clockframe *frame; 107 { 108 register struct callout *p1; 109 register struct proc *p; 110 register int delta, needsoft; 111 extern int tickdelta; 112 extern long timedelta; 113 114 /* 115 * Update real-time timeout queue. 116 * At front of queue are some number of events which are ``due''. 117 * The time to these is <= 0 and if negative represents the 118 * number of ticks which have passed since it was supposed to happen. 119 * The rest of the q elements (times > 0) are events yet to happen, 120 * where the time for each is given as a delta from the previous. 121 * Decrementing just the first of these serves to decrement the time 122 * to all events. 123 */ 124 needsoft = 0; 125 for (p1 = calltodo.c_next; p1 != NULL; p1 = p1->c_next) { 126 if (--p1->c_time > 0) 127 break; 128 needsoft = 1; 129 if (p1->c_time == 0) 130 break; 131 } 132 133 p = curproc; 134 if (p) { 135 register struct pstats *pstats; 136 137 /* 138 * Run current process's virtual and profile time, as needed. 139 */ 140 pstats = p->p_stats; 141 if (CLKF_USERMODE(frame) && 142 timerisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value) && 143 itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL], tick) == 0) 144 psignal(p, SIGVTALRM); 145 if (timerisset(&pstats->p_timer[ITIMER_PROF].it_value) && 146 itimerdecr(&pstats->p_timer[ITIMER_PROF], tick) == 0) 147 psignal(p, SIGPROF); 148 } 149 150 /* 151 * If no separate statistics clock is available, run it from here. 152 */ 153 if (stathz == 0) 154 statclock(frame); 155 156 /* 157 * Increment the time-of-day. The increment is just ``tick'' unless 158 * we are still adjusting the clock; see adjtime(). 159 */ 160 ticks++; 161 if (timedelta == 0) 162 delta = tick; 163 else { 164 delta = tick + tickdelta; 165 timedelta -= tickdelta; 166 } 167 BUMPTIME(&time, delta); 168 BUMPTIME(&mono_time, delta); 169 170 /* 171 * Process callouts at a very low cpu priority, so we don't keep the 172 * relatively high clock interrupt priority any longer than necessary. 173 */ 174 if (needsoft) { 175 if (CLKF_BASEPRI(frame)) { 176 /* 177 * Save the overhead of a software interrupt; 178 * it will happen as soon as we return, so do it now. 179 */ 180 (void)splsoftclock(); 181 softclock(); 182 } else 183 setsoftclock(); 184 } 185 } 186 187 /* 188 * Software (low priority) clock interrupt. 189 * Run periodic events from timeout queue. 190 */ 191 /*ARGSUSED*/ 192 void 193 softclock() 194 { 195 register struct callout *c; 196 register void *arg; 197 register void (*func) __P((void *)); 198 register int s; 199 200 s = splhigh(); 201 while ((c = calltodo.c_next) != NULL && c->c_time <= 0) { 202 func = c->c_func; 203 arg = c->c_arg; 204 calltodo.c_next = c->c_next; 205 c->c_next = callfree; 206 callfree = c; 207 splx(s); 208 (*func)(arg); 209 (void) splhigh(); 210 } 211 splx(s); 212 } 213 214 /* 215 * timeout -- 216 * Execute a function after a specified length of time. 217 * 218 * untimeout -- 219 * Cancel previous timeout function call. 220 * 221 * See AT&T BCI Driver Reference Manual for specification. This 222 * implementation differs from that one in that no identification 223 * value is returned from timeout, rather, the original arguments 224 * to timeout are used to identify entries for untimeout. 225 */ 226 void 227 timeout(ftn, arg, ticks) 228 void (*ftn) __P((void *)); 229 void *arg; 230 register int ticks; 231 { 232 register struct callout *new, *p, *t; 233 register int s; 234 235 if (ticks <= 0) 236 ticks = 1; 237 238 /* Lock out the clock. */ 239 s = splhigh(); 240 241 /* Fill in the next free callout structure. */ 242 if (callfree == NULL) 243 panic("timeout table full"); 244 new = callfree; 245 callfree = new->c_next; 246 new->c_arg = arg; 247 new->c_func = ftn; 248 249 /* 250 * The time for each event is stored as a difference from the time 251 * of the previous event on the queue. Walk the queue, correcting 252 * the ticks argument for queue entries passed. Correct the ticks 253 * value for the queue entry immediately after the insertion point 254 * as well. 255 */ 256 for (p = &calltodo; 257 (t = p->c_next) != NULL && ticks > t->c_time; p = t) 258 ticks -= t->c_time; 259 new->c_time = ticks; 260 if (t != NULL) 261 t->c_time -= ticks; 262 263 /* Insert the new entry into the queue. */ 264 p->c_next = new; 265 new->c_next = t; 266 splx(s); 267 } 268 269 void 270 untimeout(ftn, arg) 271 void (*ftn) __P((void *)); 272 void *arg; 273 { 274 register struct callout *p, *t; 275 register int s; 276 277 s = splhigh(); 278 for (p = &calltodo; (t = p->c_next) != NULL; p = t) 279 if (t->c_func == ftn && t->c_arg == arg) { 280 /* Increment next entry's tick count. */ 281 if (t->c_next && t->c_time > 0) 282 t->c_next->c_time += t->c_time; 283 284 /* Move entry from callout queue to callfree queue. */ 285 p->c_next = t->c_next; 286 t->c_next = callfree; 287 callfree = t; 288 break; 289 } 290 splx(s); 291 } 292 293 /* 294 * Compute number of hz until specified time. Used to 295 * compute third argument to timeout() from an absolute time. 296 */ 297 int 298 hzto(tv) 299 struct timeval *tv; 300 { 301 register long ticks, sec; 302 int s; 303 304 /* 305 * If number of milliseconds will fit in 32 bit arithmetic, 306 * then compute number of milliseconds to time and scale to 307 * ticks. Otherwise just compute number of hz in time, rounding 308 * times greater than representible to maximum value. 309 * 310 * Delta times less than 25 days can be computed ``exactly''. 311 * Maximum value for any timeout in 10ms ticks is 250 days. 312 */ 313 s = splhigh(); 314 sec = tv->tv_sec - time.tv_sec; 315 if (sec <= 0x7fffffff / 1000 - 1000) 316 ticks = ((tv->tv_sec - time.tv_sec) * 1000 + 317 (tv->tv_usec - time.tv_usec) / 1000) / (tick / 1000); 318 else if (sec <= 0x7fffffff / hz) 319 ticks = sec * hz; 320 else 321 ticks = 0x7fffffff; 322 splx(s); 323 return (ticks); 324 } 325 326 /* 327 * Start profiling on a process. 328 * 329 * Kernel profiling passes proc0 which never exits and hence 330 * keeps the profile clock running constantly. 331 */ 332 void 333 startprofclock(p) 334 register struct proc *p; 335 { 336 int s; 337 338 if ((p->p_flag & P_PROFIL) == 0) { 339 p->p_flag |= P_PROFIL; 340 if (++profprocs == 1 && stathz != 0) { 341 s = splstatclock(); 342 psdiv = pscnt = psratio; 343 setstatclockrate(profhz); 344 splx(s); 345 } 346 } 347 } 348 349 /* 350 * Stop profiling on a process. 351 */ 352 void 353 stopprofclock(p) 354 register struct proc *p; 355 { 356 int s; 357 358 if (p->p_flag & P_PROFIL) { 359 p->p_flag &= ~P_PROFIL; 360 if (--profprocs == 0 && stathz != 0) { 361 s = splstatclock(); 362 psdiv = pscnt = 1; 363 setstatclockrate(stathz); 364 splx(s); 365 } 366 } 367 } 368 369 int dk_ndrive = DK_NDRIVE; 370 371 /* 372 * Statistics clock. Grab profile sample, and if divider reaches 0, 373 * do process and kernel statistics. 374 */ 375 void 376 statclock(frame) 377 register struct clockframe *frame; 378 { 379 #ifdef GPROF 380 register struct gmonparam *g; 381 #endif 382 register struct proc *p; 383 register int i; 384 385 if (CLKF_USERMODE(frame)) { 386 p = curproc; 387 if (p->p_flag & P_PROFIL) 388 addupc_intr(p, CLKF_PC(frame), 1); 389 if (--pscnt > 0) 390 return; 391 /* 392 * Came from user mode; CPU was in user state. 393 * If this process is being profiled record the tick. 394 */ 395 p->p_uticks++; 396 if (p->p_nice > NZERO) 397 cp_time[CP_NICE]++; 398 else 399 cp_time[CP_USER]++; 400 } else { 401 #ifdef GPROF 402 /* 403 * Kernel statistics are just like addupc_intr, only easier. 404 */ 405 g = &_gmonparam; 406 if (g->state == GMON_PROF_ON) { 407 i = CLKF_PC(frame) - g->lowpc; 408 if (i < g->textsize) { 409 i /= HISTFRACTION * sizeof(*g->kcount); 410 g->kcount[i]++; 411 } 412 } 413 #endif 414 if (--pscnt > 0) 415 return; 416 /* 417 * Came from kernel mode, so we were: 418 * - handling an interrupt, 419 * - doing syscall or trap work on behalf of the current 420 * user process, or 421 * - spinning in the idle loop. 422 * Whichever it is, charge the time as appropriate. 423 * Note that we charge interrupts to the current process, 424 * regardless of whether they are ``for'' that process, 425 * so that we know how much of its real time was spent 426 * in ``non-process'' (i.e., interrupt) work. 427 */ 428 p = curproc; 429 if (CLKF_INTR(frame)) { 430 if (p != NULL) 431 p->p_iticks++; 432 cp_time[CP_INTR]++; 433 } else if (p != NULL) { 434 p->p_sticks++; 435 cp_time[CP_SYS]++; 436 } else 437 cp_time[CP_IDLE]++; 438 } 439 pscnt = psdiv; 440 441 /* 442 * We maintain statistics shown by user-level statistics 443 * programs: the amount of time in each cpu state, and 444 * the amount of time each of DK_NDRIVE ``drives'' is busy. 445 * 446 * XXX should either run linked list of drives, or (better) 447 * grab timestamps in the start & done code. 448 */ 449 for (i = 0; i < DK_NDRIVE; i++) 450 if (dk_busy & (1 << i)) 451 dk_time[i]++; 452 453 /* 454 * We adjust the priority of the current process. The priority of 455 * a process gets worse as it accumulates CPU time. The cpu usage 456 * estimator (p_estcpu) is increased here. The formula for computing 457 * priorities (in kern_synch.c) will compute a different value each 458 * time p_estcpu increases by 4. The cpu usage estimator ramps up 459 * quite quickly when the process is running (linearly), and decays 460 * away exponentially, at a rate which is proportionally slower when 461 * the system is busy. The basic principal is that the system will 462 * 90% forget that the process used a lot of CPU time in 5 * loadav 463 * seconds. This causes the system to favor processes which haven't 464 * run much recently, and to round-robin among other processes. 465 */ 466 if (p != NULL) { 467 p->p_cpticks++; 468 if (++p->p_estcpu == 0) 469 p->p_estcpu--; 470 if ((p->p_estcpu & 3) == 0) { 471 resetpriority(p); 472 if (p->p_priority >= PUSER) 473 p->p_priority = p->p_usrpri; 474 } 475 } 476 } 477 478 /* 479 * Return information about system clocks. 480 */ 481 sysctl_clockrate(where, sizep) 482 register char *where; 483 size_t *sizep; 484 { 485 struct clockinfo clkinfo; 486 487 /* 488 * Construct clockinfo structure. 489 */ 490 clkinfo.hz = hz; 491 clkinfo.tick = tick; 492 clkinfo.profhz = profhz; 493 clkinfo.stathz = stathz ? stathz : hz; 494 return (sysctl_rdstruct(where, sizep, NULL, &clkinfo, sizeof(clkinfo))); 495 } 496