1 /* kern_clock.c 4.23 81/07/09 */ 2 3 #include "../h/param.h" 4 #include "../h/systm.h" 5 #include "../h/dk.h" 6 #include "../h/callout.h" 7 #include "../h/seg.h" 8 #include "../h/dir.h" 9 #include "../h/user.h" 10 #include "../h/proc.h" 11 #include "../h/reg.h" 12 #include "../h/psl.h" 13 #include "../h/vm.h" 14 #include "../h/buf.h" 15 #include "../h/text.h" 16 #include "../h/vlimit.h" 17 #include "../h/mtpr.h" 18 #include "../h/clock.h" 19 #include "../h/cpu.h" 20 21 #include "bk.h" 22 #include "dh.h" 23 #include "dz.h" 24 25 /* 26 * Hardclock is called straight from 27 * the real time clock interrupt. 28 * We limit the work we do at real clock interrupt time to: 29 * reloading clock 30 * decrementing time to callouts 31 * recording cpu time usage 32 * modifying priority of current process 33 * arrange for soft clock interrupt 34 * kernel pc profiling 35 * 36 * At software (softclock) interrupt time we: 37 * implement callouts 38 * maintain date 39 * lightning bolt wakeup (every second) 40 * alarm clock signals 41 * jab the scheduler 42 * 43 * On the vax softclock interrupts are implemented by 44 * software interrupts. Note that we may have multiple softclock 45 * interrupts compressed into one (due to excessive interrupt load), 46 * but that hardclock interrupts should never be lost. 47 */ 48 49 /*ARGSUSED*/ 50 hardclock(pc, ps) 51 caddr_t pc; 52 { 53 register struct callout *p1; 54 register struct proc *pp; 55 register int s, cpstate; 56 57 /* 58 * reprime clock 59 */ 60 clkreld(); 61 62 /* 63 * update callout times 64 */ 65 for (p1 = calltodo.c_next; p1 && p1->c_time <= 0; p1 = p1->c_next) 66 ; 67 if (p1) 68 p1->c_time--; 69 70 /* 71 * Maintain iostat and per-process cpu statistics 72 */ 73 if (!noproc) { 74 s = u.u_procp->p_rssize; 75 u.u_vm.vm_idsrss += s; 76 if (u.u_procp->p_textp) { 77 register int xrss = u.u_procp->p_textp->x_rssize; 78 79 s += xrss; 80 u.u_vm.vm_ixrss += xrss; 81 } 82 if (s > u.u_vm.vm_maxrss) 83 u.u_vm.vm_maxrss = s; 84 if ((u.u_vm.vm_utime+u.u_vm.vm_stime+1)/hz > u.u_limit[LIM_CPU]) { 85 psignal(u.u_procp, SIGXCPU); 86 if (u.u_limit[LIM_CPU] < INFINITY - 5) 87 u.u_limit[LIM_CPU] += 5; 88 } 89 } 90 /* 91 * Update iostat information. 92 */ 93 if (USERMODE(ps)) { 94 u.u_vm.vm_utime++; 95 if(u.u_procp->p_nice > NZERO) 96 cpstate = CP_NICE; 97 else 98 cpstate = CP_USER; 99 } else { 100 cpstate = CP_SYS; 101 if (noproc) 102 cpstate = CP_IDLE; 103 else 104 u.u_vm.vm_stime++; 105 } 106 cp_time[cpstate]++; 107 for (s = 0; s < DK_NDRIVE; s++) 108 if (dk_busy&(1<<s)) 109 dk_time[s]++; 110 /* 111 * Adjust priority of current process. 112 */ 113 if (!noproc) { 114 pp = u.u_procp; 115 pp->p_cpticks++; 116 if(++pp->p_cpu == 0) 117 pp->p_cpu--; 118 if(pp->p_cpu % 4 == 0) { 119 (void) setpri(pp); 120 if (pp->p_pri >= PUSER) 121 pp->p_pri = pp->p_usrpri; 122 } 123 } 124 /* 125 * Time moves on. 126 */ 127 ++lbolt; 128 #if VAX780 129 /* 130 * On 780's, impelement a fast UBA watcher, 131 * to make sure uba's don't get stuck. 132 */ 133 if (cpu == VAX_780 && panicstr == 0 && !BASEPRI(ps)) 134 unhang(); 135 #endif 136 /* 137 * Schedule a software interrupt for the rest 138 * of clock activities. 139 */ 140 setsoftclock(); 141 } 142 143 /* 144 * The digital decay cpu usage priority assignment is scaled to run in 145 * time as expanded by the 1 minute load average. Each second we 146 * multiply the the previous cpu usage estimate by 147 * nrscale*avenrun[0] 148 * The following relates the load average to the period over which 149 * cpu usage is 90% forgotten: 150 * loadav 1 5 seconds 151 * loadav 5 24 seconds 152 * loadav 10 47 seconds 153 * loadav 20 93 seconds 154 * This is a great improvement on the previous algorithm which 155 * decayed the priorities by a constant, and decayed away all knowledge 156 * of previous activity in about 20 seconds. Under heavy load, 157 * the previous algorithm degenerated to round-robin with poor response 158 * time when there was a high load average. 159 */ 160 #undef ave 161 #define ave(a,b) ((int)(((int)(a*b))/(b+1))) 162 int nrscale = 2; 163 double avenrun[]; 164 165 /* 166 * Constant for decay filter for cpu usage field 167 * in process table (used by ps au). 168 */ 169 double ccpu = 0.95122942450071400909; /* exp(-1/20) */ 170 171 /* 172 * Software clock interrupt. 173 * This routine runs at lower priority than device interrupts. 174 */ 175 /*ARGSUSED*/ 176 softclock(pc, ps) 177 caddr_t pc; 178 { 179 register struct callout *p1; 180 register struct proc *pp; 181 register int a, s; 182 caddr_t arg; 183 int (*func)(); 184 185 /* 186 * Perform callouts (but not after panic's!) 187 */ 188 if (panicstr == 0) { 189 for (;;) { 190 s = spl7(); 191 if ((p1 = calltodo.c_next) == 0 || p1->c_time > 0) 192 break; 193 calltodo.c_next = p1->c_next; 194 arg = p1->c_arg; 195 func = p1->c_func; 196 p1->c_next = callfree; 197 callfree = p1; 198 (void) splx(s); 199 (*func)(arg); 200 } 201 } 202 203 /* 204 * Drain silos. 205 */ 206 #if NBK > 0 207 #if NDH > 0 208 s = spl5(); dhtimer(); splx(s); 209 #endif 210 #if NDZ > 0 211 s = spl5(); dztimer(); splx(s); 212 #endif 213 #endif 214 215 /* 216 * If idling and processes are waiting to swap in, 217 * check on them. 218 */ 219 if (noproc && runin) { 220 runin = 0; 221 wakeup((caddr_t)&runin); 222 } 223 224 /* 225 * Run paging daemon every 1/4 sec. 226 */ 227 if (lbolt % (hz/4) == 0) { 228 vmpago(); 229 } 230 231 /* 232 * Reschedule every 1/10 sec. 233 */ 234 if (lbolt % (hz/10) == 0) { 235 runrun++; 236 aston(); 237 } 238 239 /* 240 * Lightning bolt every second: 241 * sleep timeouts 242 * process priority recomputation 243 * process %cpu averaging 244 * virtual memory metering 245 * kick swapper if processes want in 246 */ 247 if (lbolt >= hz) { 248 /* 249 * This doesn't mean much on VAX since we run at 250 * software interrupt time... if hardclock() 251 * calls softclock() directly, it prevents 252 * this code from running when the priority 253 * was raised when the clock interrupt occurred. 254 */ 255 if (BASEPRI(ps)) 256 return; 257 258 /* 259 * If we didn't run a few times because of 260 * long blockage at high ipl, we don't 261 * really want to run this code several times, 262 * so squish out all multiples of hz here. 263 */ 264 time += lbolt / hz; 265 lbolt %= hz; 266 267 /* 268 * Wakeup lightning bolt sleepers. 269 * Processes sleep on lbolt to wait 270 * for short amounts of time (e.g. 1 second). 271 */ 272 wakeup((caddr_t)&lbolt); 273 274 /* 275 * Recompute process priority and process 276 * sleep() system calls as well as internal 277 * sleeps with timeouts (tsleep() kernel routine). 278 */ 279 for (pp = proc; pp < procNPROC; pp++) 280 if (pp->p_stat && pp->p_stat!=SZOMB) { 281 /* 282 * Increase resident time, to max of 127 seconds 283 * (it is kept in a character.) For 284 * loaded processes this is time in core; for 285 * swapped processes, this is time on drum. 286 */ 287 if (pp->p_time != 127) 288 pp->p_time++; 289 /* 290 * If process has clock counting down, and it 291 * expires, set it running (if this is a tsleep()), 292 * or give it an SIGALRM (if the user process 293 * is using alarm signals. 294 */ 295 if (pp->p_clktim && --pp->p_clktim == 0) 296 if (pp->p_flag & STIMO) { 297 s = spl6(); 298 switch (pp->p_stat) { 299 300 case SSLEEP: 301 setrun(pp); 302 break; 303 304 case SSTOP: 305 unsleep(pp); 306 break; 307 } 308 pp->p_flag &= ~STIMO; 309 splx(s); 310 } else 311 psignal(pp, SIGALRM); 312 /* 313 * If process is blocked, increment computed 314 * time blocked. This is used in swap scheduling. 315 */ 316 if (pp->p_stat==SSLEEP || pp->p_stat==SSTOP) 317 if (pp->p_slptime != 127) 318 pp->p_slptime++; 319 /* 320 * Update digital filter estimation of process 321 * cpu utilization for loaded processes. 322 */ 323 if (pp->p_flag&SLOAD) 324 pp->p_pctcpu = ccpu * pp->p_pctcpu + 325 (1.0 - ccpu) * (pp->p_cpticks/(float)hz); 326 /* 327 * Recompute process priority. The number p_cpu 328 * is a weighted estimate of cpu time consumed. 329 * A process which consumes cpu time has this 330 * increase regularly. We here decrease it by 331 * a fraction based on load average giving a digital 332 * decay filter which damps out in about 5 seconds 333 * when seconds are measured in time expanded by the 334 * load average. 335 * 336 * If a process is niced, then the nice directly 337 * affects the new priority. The final priority 338 * is in the range 0 to 255, to fit in a character. 339 */ 340 pp->p_cpticks = 0; 341 a = ave((pp->p_cpu & 0377), avenrun[0]*nrscale) + 342 pp->p_nice - NZERO; 343 if (a < 0) 344 a = 0; 345 if (a > 255) 346 a = 255; 347 pp->p_cpu = a; 348 (void) setpri(pp); 349 /* 350 * Now have computed new process priority 351 * in p->p_usrpri. Carefully change p->p_pri. 352 * A process is on a run queue associated with 353 * this priority, so we must block out process 354 * state changes during the transition. 355 */ 356 s = spl6(); 357 if (pp->p_pri >= PUSER) { 358 if ((pp != u.u_procp || noproc) && 359 pp->p_stat == SRUN && 360 (pp->p_flag & SLOAD) && 361 pp->p_pri != pp->p_usrpri) { 362 remrq(pp); 363 pp->p_pri = pp->p_usrpri; 364 setrq(pp); 365 } else 366 pp->p_pri = pp->p_usrpri; 367 } 368 splx(s); 369 } 370 371 /* 372 * Perform virtual memory metering. 373 */ 374 vmmeter(); 375 376 /* 377 * If the swap process is trying to bring 378 * a process in, have it look again to see 379 * if it is possible now. 380 */ 381 if (runin!=0) { 382 runin = 0; 383 wakeup((caddr_t)&runin); 384 } 385 386 /* 387 * If there are pages that have been cleaned, 388 * jolt the pageout daemon to process them. 389 * We do this here so that these pages will be 390 * freed if there is an abundance of memory and the 391 * daemon would not be awakened otherwise. 392 */ 393 if (bclnlist != NULL) 394 wakeup((caddr_t)&proc[2]); 395 396 /* 397 * If the trap occurred from usermode, 398 * then check to see if it has now been 399 * running more than 10 minutes of user time 400 * and should thus run with reduced priority 401 * to give other processes a chance. 402 */ 403 if (USERMODE(ps)) { 404 pp = u.u_procp; 405 if (pp->p_uid && pp->p_nice == NZERO && 406 u.u_vm.vm_utime > 600 * hz) 407 pp->p_nice = NZERO+4; 408 (void) setpri(pp); 409 pp->p_pri = pp->p_usrpri; 410 } 411 } 412 /* 413 * If trapped user-mode, give it a profiling tick. 414 */ 415 if (USERMODE(ps) && u.u_prof.pr_scale) { 416 u.u_procp->p_flag |= SOWEUPC; 417 aston(); 418 } 419 } 420 421 /* 422 * Timeout is called to arrange that 423 * fun(arg) is called in tim/hz seconds. 424 * An entry is linked into the callout 425 * structure. The time in each structure 426 * entry is the number of hz's more 427 * than the previous entry. 428 * In this way, decrementing the 429 * first entry has the effect of 430 * updating all entries. 431 * 432 * The panic is there because there is nothing 433 * intelligent to be done if an entry won't fit. 434 */ 435 timeout(fun, arg, tim) 436 int (*fun)(); 437 caddr_t arg; 438 { 439 register struct callout *p1, *p2, *pnew; 440 register int t; 441 int s; 442 443 /* DEBUGGING CODE */ 444 int ttrstrt(); 445 446 if (fun == ttrstrt && arg == 0) 447 panic("timeout ttrstr arg"); 448 /* END DEBUGGING CODE */ 449 t = tim; 450 s = spl7(); 451 pnew = callfree; 452 if (pnew == NULL) 453 panic("timeout table overflow"); 454 callfree = pnew->c_next; 455 pnew->c_arg = arg; 456 pnew->c_func = fun; 457 for (p1 = &calltodo; (p2 = p1->c_next) && p2->c_time < t; p1 = p2) 458 t -= p2->c_time; 459 p1->c_next = pnew; 460 pnew->c_next = p2; 461 pnew->c_time = t; 462 if (p2) 463 p2->c_time -= t; 464 splx(s); 465 } 466