1 /*- 2 * Copyright (c) 1982, 1986, 1991, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * %sccs.include.redist.c% 11 * 12 * @(#)kern_clock.c 8.5 (Berkeley) 01/21/94 13 */ 14 15 #include <sys/param.h> 16 #include <sys/systm.h> 17 #include <sys/dkstat.h> 18 #include <sys/callout.h> 19 #include <sys/kernel.h> 20 #include <sys/proc.h> 21 #include <sys/resourcevar.h> 22 23 #include <machine/cpu.h> 24 25 #ifdef GPROF 26 #include <sys/gmon.h> 27 #endif 28 29 /* 30 * Clock handling routines. 31 * 32 * This code is written to operate with two timers that run independently of 33 * each other. The main clock, running hz times per second, is used to keep 34 * track of real time. The second timer handles kernel and user profiling, 35 * and does resource use estimation. If the second timer is programmable, 36 * it is randomized to avoid aliasing between the two clocks. For example, 37 * the randomization prevents an adversary from always giving up the cpu 38 * just before its quantum expires. Otherwise, it would never accumulate 39 * cpu ticks. The mean frequency of the second timer is stathz. 40 * 41 * If no second timer exists, stathz will be zero; in this case we drive 42 * profiling and statistics off the main clock. This WILL NOT be accurate; 43 * do not do it unless absolutely necessary. 44 * 45 * The statistics clock may (or may not) be run at a higher rate while 46 * profiling. This profile clock runs at profhz. We require that profhz 47 * be an integral multiple of stathz. 48 * 49 * If the statistics clock is running fast, it must be divided by the ratio 50 * profhz/stathz for statistics. (For profiling, every tick counts.) 51 */ 52 53 /* 54 * TODO: 55 * allocate more timeout table slots when table overflows. 56 */ 57 58 /* 59 * Bump a timeval by a small number of usec's. 60 */ 61 #define BUMPTIME(t, usec) { \ 62 register volatile struct timeval *tp = (t); \ 63 register long us; \ 64 \ 65 tp->tv_usec = us = tp->tv_usec + (usec); \ 66 if (us >= 1000000) { \ 67 tp->tv_usec = us - 1000000; \ 68 tp->tv_sec++; \ 69 } \ 70 } 71 72 int stathz; 73 int profhz; 74 int profprocs; 75 int ticks; 76 static int psdiv, pscnt; /* prof => stat divider */ 77 int psratio; /* ratio: prof / stat */ 78 79 volatile struct timeval time; 80 volatile struct timeval mono_time; 81 82 /* 83 * Initialize clock frequencies and start both clocks running. 84 */ 85 void 86 initclocks() 87 { 88 register int i; 89 90 /* 91 * Set divisors to 1 (normal case) and let the machine-specific 92 * code do its bit. 93 */ 94 psdiv = pscnt = 1; 95 cpu_initclocks(); 96 97 /* 98 * Compute profhz/stathz, and fix profhz if needed. 99 */ 100 i = stathz ? stathz : hz; 101 if (profhz == 0) 102 profhz = i; 103 psratio = profhz / i; 104 } 105 106 /* 107 * The real-time timer, interrupting hz times per second. 108 */ 109 void 110 hardclock(frame) 111 register struct clockframe *frame; 112 { 113 register struct callout *p1; 114 register struct proc *p; 115 register int delta, needsoft; 116 extern int tickdelta; 117 extern long timedelta; 118 119 /* 120 * Update real-time timeout queue. 121 * At front of queue are some number of events which are ``due''. 122 * The time to these is <= 0 and if negative represents the 123 * number of ticks which have passed since it was supposed to happen. 124 * The rest of the q elements (times > 0) are events yet to happen, 125 * where the time for each is given as a delta from the previous. 126 * Decrementing just the first of these serves to decrement the time 127 * to all events. 128 */ 129 needsoft = 0; 130 for (p1 = calltodo.c_next; p1 != NULL; p1 = p1->c_next) { 131 if (--p1->c_time > 0) 132 break; 133 needsoft = 1; 134 if (p1->c_time == 0) 135 break; 136 } 137 138 p = curproc; 139 if (p) { 140 register struct pstats *pstats; 141 142 /* 143 * Run current process's virtual and profile time, as needed. 144 */ 145 pstats = p->p_stats; 146 if (CLKF_USERMODE(frame) && 147 timerisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value) && 148 itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL], tick) == 0) 149 psignal(p, SIGVTALRM); 150 if (timerisset(&pstats->p_timer[ITIMER_PROF].it_value) && 151 itimerdecr(&pstats->p_timer[ITIMER_PROF], tick) == 0) 152 psignal(p, SIGPROF); 153 } 154 155 /* 156 * If no separate statistics clock is available, run it from here. 157 */ 158 if (stathz == 0) 159 statclock(frame); 160 161 /* 162 * Increment the time-of-day. The increment is just ``tick'' unless 163 * we are still adjusting the clock; see adjtime(). 164 */ 165 ticks++; 166 if (timedelta == 0) 167 delta = tick; 168 else { 169 delta = tick + tickdelta; 170 timedelta -= tickdelta; 171 } 172 BUMPTIME(&time, delta); 173 BUMPTIME(&mono_time, delta); 174 175 /* 176 * Process callouts at a very low cpu priority, so we don't keep the 177 * relatively high clock interrupt priority any longer than necessary. 178 */ 179 if (needsoft) { 180 if (CLKF_BASEPRI(frame)) { 181 /* 182 * Save the overhead of a software interrupt; 183 * it will happen as soon as we return, so do it now. 184 */ 185 (void)splsoftclock(); 186 softclock(); 187 } else 188 setsoftclock(); 189 } 190 } 191 192 /* 193 * Software (low priority) clock interrupt. 194 * Run periodic events from timeout queue. 195 */ 196 /*ARGSUSED*/ 197 void 198 softclock() 199 { 200 register struct callout *c; 201 register void *arg; 202 register void (*func) __P((void *)); 203 register int s; 204 205 s = splhigh(); 206 while ((c = calltodo.c_next) != NULL && c->c_time <= 0) { 207 func = c->c_func; 208 arg = c->c_arg; 209 calltodo.c_next = c->c_next; 210 c->c_next = callfree; 211 callfree = c; 212 splx(s); 213 (*func)(arg); 214 (void) splhigh(); 215 } 216 splx(s); 217 } 218 219 /* 220 * timeout -- 221 * Execute a function after a specified length of time. 222 * 223 * untimeout -- 224 * Cancel previous timeout function call. 225 * 226 * See AT&T BCI Driver Reference Manual for specification. This 227 * implementation differs from that one in that no identification 228 * value is returned from timeout, rather, the original arguments 229 * to timeout are used to identify entries for untimeout. 230 */ 231 void 232 timeout(ftn, arg, ticks) 233 void (*ftn) __P((void *)); 234 void *arg; 235 register int ticks; 236 { 237 register struct callout *new, *p, *t; 238 register int s; 239 240 if (ticks <= 0) 241 ticks = 1; 242 243 /* Lock out the clock. */ 244 s = splhigh(); 245 246 /* Fill in the next free callout structure. */ 247 if (callfree == NULL) 248 panic("timeout table full"); 249 new = callfree; 250 callfree = new->c_next; 251 new->c_arg = arg; 252 new->c_func = ftn; 253 254 /* 255 * The time for each event is stored as a difference from the time 256 * of the previous event on the queue. Walk the queue, correcting 257 * the ticks argument for queue entries passed. Correct the ticks 258 * value for the queue entry immediately after the insertion point 259 * as well. Watch out for negative c_time values; these represent 260 * overdue events. 261 */ 262 for (p = &calltodo; 263 (t = p->c_next) != NULL && ticks > t->c_time; p = t) 264 if (t->c_time > 0) 265 ticks -= t->c_time; 266 new->c_time = ticks; 267 if (t != NULL) 268 t->c_time -= ticks; 269 270 /* Insert the new entry into the queue. */ 271 p->c_next = new; 272 new->c_next = t; 273 splx(s); 274 } 275 276 void 277 untimeout(ftn, arg) 278 void (*ftn) __P((void *)); 279 void *arg; 280 { 281 register struct callout *p, *t; 282 register int s; 283 284 s = splhigh(); 285 for (p = &calltodo; (t = p->c_next) != NULL; p = t) 286 if (t->c_func == ftn && t->c_arg == arg) { 287 /* Increment next entry's tick count. */ 288 if (t->c_next && t->c_time > 0) 289 t->c_next->c_time += t->c_time; 290 291 /* Move entry from callout queue to callfree queue. */ 292 p->c_next = t->c_next; 293 t->c_next = callfree; 294 callfree = t; 295 break; 296 } 297 splx(s); 298 } 299 300 /* 301 * Compute number of hz until specified time. Used to 302 * compute third argument to timeout() from an absolute time. 303 */ 304 int 305 hzto(tv) 306 struct timeval *tv; 307 { 308 register long ticks, sec; 309 int s; 310 311 /* 312 * If number of milliseconds will fit in 32 bit arithmetic, 313 * then compute number of milliseconds to time and scale to 314 * ticks. Otherwise just compute number of hz in time, rounding 315 * times greater than representible to maximum value. 316 * 317 * Delta times less than 25 days can be computed ``exactly''. 318 * Maximum value for any timeout in 10ms ticks is 250 days. 319 */ 320 s = splhigh(); 321 sec = tv->tv_sec - time.tv_sec; 322 if (sec <= 0x7fffffff / 1000 - 1000) 323 ticks = ((tv->tv_sec - time.tv_sec) * 1000 + 324 (tv->tv_usec - time.tv_usec) / 1000) / (tick / 1000); 325 else if (sec <= 0x7fffffff / hz) 326 ticks = sec * hz; 327 else 328 ticks = 0x7fffffff; 329 splx(s); 330 return (ticks); 331 } 332 333 /* 334 * Start profiling on a process. 335 * 336 * Kernel profiling passes proc0 which never exits and hence 337 * keeps the profile clock running constantly. 338 */ 339 void 340 startprofclock(p) 341 register struct proc *p; 342 { 343 int s; 344 345 if ((p->p_flag & P_PROFIL) == 0) { 346 p->p_flag |= P_PROFIL; 347 if (++profprocs == 1 && stathz != 0) { 348 s = splstatclock(); 349 psdiv = pscnt = psratio; 350 setstatclockrate(profhz); 351 splx(s); 352 } 353 } 354 } 355 356 /* 357 * Stop profiling on a process. 358 */ 359 void 360 stopprofclock(p) 361 register struct proc *p; 362 { 363 int s; 364 365 if (p->p_flag & P_PROFIL) { 366 p->p_flag &= ~P_PROFIL; 367 if (--profprocs == 0 && stathz != 0) { 368 s = splstatclock(); 369 psdiv = pscnt = 1; 370 setstatclockrate(stathz); 371 splx(s); 372 } 373 } 374 } 375 376 int dk_ndrive = DK_NDRIVE; 377 378 /* 379 * Statistics clock. Grab profile sample, and if divider reaches 0, 380 * do process and kernel statistics. 381 */ 382 void 383 statclock(frame) 384 register struct clockframe *frame; 385 { 386 #ifdef GPROF 387 register struct gmonparam *g; 388 #endif 389 register struct proc *p; 390 register int i; 391 392 if (CLKF_USERMODE(frame)) { 393 p = curproc; 394 if (p->p_flag & P_PROFIL) 395 addupc_intr(p, CLKF_PC(frame), 1); 396 if (--pscnt > 0) 397 return; 398 /* 399 * Came from user mode; CPU was in user state. 400 * If this process is being profiled record the tick. 401 */ 402 p->p_uticks++; 403 if (p->p_nice > NZERO) 404 cp_time[CP_NICE]++; 405 else 406 cp_time[CP_USER]++; 407 } else { 408 #ifdef GPROF 409 /* 410 * Kernel statistics are just like addupc_intr, only easier. 411 */ 412 g = &_gmonparam; 413 if (g->state == GMON_PROF_ON) { 414 i = CLKF_PC(frame) - g->lowpc; 415 if (i < g->textsize) { 416 i /= HISTFRACTION * sizeof(*g->kcount); 417 g->kcount[i]++; 418 } 419 } 420 #endif 421 if (--pscnt > 0) 422 return; 423 /* 424 * Came from kernel mode, so we were: 425 * - handling an interrupt, 426 * - doing syscall or trap work on behalf of the current 427 * user process, or 428 * - spinning in the idle loop. 429 * Whichever it is, charge the time as appropriate. 430 * Note that we charge interrupts to the current process, 431 * regardless of whether they are ``for'' that process, 432 * so that we know how much of its real time was spent 433 * in ``non-process'' (i.e., interrupt) work. 434 */ 435 p = curproc; 436 if (CLKF_INTR(frame)) { 437 if (p != NULL) 438 p->p_iticks++; 439 cp_time[CP_INTR]++; 440 } else if (p != NULL) { 441 p->p_sticks++; 442 cp_time[CP_SYS]++; 443 } else 444 cp_time[CP_IDLE]++; 445 } 446 pscnt = psdiv; 447 448 /* 449 * We maintain statistics shown by user-level statistics 450 * programs: the amount of time in each cpu state, and 451 * the amount of time each of DK_NDRIVE ``drives'' is busy. 452 * 453 * XXX should either run linked list of drives, or (better) 454 * grab timestamps in the start & done code. 455 */ 456 for (i = 0; i < DK_NDRIVE; i++) 457 if (dk_busy & (1 << i)) 458 dk_time[i]++; 459 460 /* 461 * We adjust the priority of the current process. The priority of 462 * a process gets worse as it accumulates CPU time. The cpu usage 463 * estimator (p_estcpu) is increased here. The formula for computing 464 * priorities (in kern_synch.c) will compute a different value each 465 * time p_estcpu increases by 4. The cpu usage estimator ramps up 466 * quite quickly when the process is running (linearly), and decays 467 * away exponentially, at a rate which is proportionally slower when 468 * the system is busy. The basic principal is that the system will 469 * 90% forget that the process used a lot of CPU time in 5 * loadav 470 * seconds. This causes the system to favor processes which haven't 471 * run much recently, and to round-robin among other processes. 472 */ 473 if (p != NULL) { 474 p->p_cpticks++; 475 if (++p->p_estcpu == 0) 476 p->p_estcpu--; 477 if ((p->p_estcpu & 3) == 0) { 478 resetpriority(p); 479 if (p->p_priority >= PUSER) 480 p->p_priority = p->p_usrpri; 481 } 482 } 483 } 484 485 /* 486 * Return information about system clocks. 487 */ 488 sysctl_clockrate(where, sizep) 489 register char *where; 490 size_t *sizep; 491 { 492 struct clockinfo clkinfo; 493 494 /* 495 * Construct clockinfo structure. 496 */ 497 clkinfo.hz = hz; 498 clkinfo.tick = tick; 499 clkinfo.profhz = profhz; 500 clkinfo.stathz = stathz ? stathz : hz; 501 return (sysctl_rdstruct(where, sizep, NULL, &clkinfo, sizeof(clkinfo))); 502 } 503