1 /* 2 * Copyright (c) 1982, 1986, 1990 Regents of the University of California. 3 * All rights reserved. The Berkeley software License Agreement 4 * specifies the terms and conditions for redistribution. 5 * 6 * @(#)kern_synch.c 7.13 (Berkeley) 12/05/90 7 */ 8 9 #include "param.h" 10 #include "systm.h" 11 #include "user.h" 12 #include "proc.h" 13 #include "kernel.h" 14 #include "buf.h" 15 16 #include "machine/psl.h" 17 #include "machine/mtpr.h" 18 19 /* 20 * Force switch among equal priority processes every 100ms. 21 */ 22 roundrobin() 23 { 24 25 runrun++; 26 aston(); 27 timeout(roundrobin, (caddr_t)0, hz / 10); 28 } 29 30 /* 31 * constants for digital decay and forget 32 * 90% of (p_cpu) usage in 5*loadav time 33 * 95% of (p_pctcpu) usage in 60 seconds (load insensitive) 34 * Note that, as ps(1) mentions, this can let percentages 35 * total over 100% (I've seen 137.9% for 3 processes). 36 * 37 * Note that hardclock updates p_cpu and p_cpticks independently. 38 * 39 * We wish to decay away 90% of p_cpu in (5 * loadavg) seconds. 40 * That is, the system wants to compute a value of decay such 41 * that the following for loop: 42 * for (i = 0; i < (5 * loadavg); i++) 43 * p_cpu *= decay; 44 * will compute 45 * p_cpu *= 0.1; 46 * for all values of loadavg: 47 * 48 * Mathematically this loop can be expressed by saying: 49 * decay ** (5 * loadavg) ~= .1 50 * 51 * The system computes decay as: 52 * decay = (2 * loadavg) / (2 * loadavg + 1) 53 * 54 * We wish to prove that the system's computation of decay 55 * will always fulfill the equation: 56 * decay ** (5 * loadavg) ~= .1 57 * 58 * If we compute b as: 59 * b = 2 * loadavg 60 * then 61 * decay = b / (b + 1) 62 * 63 * We now need to prove two things: 64 * 1) Given factor ** (5 * loadavg) ~= .1, prove factor == b/(b+1) 65 * 2) Given b/(b+1) ** power ~= .1, prove power == (5 * loadavg) 66 * 67 * Facts: 68 * For x close to zero, exp(x) =~ 1 + x, since 69 * exp(x) = 0! + x**1/1! + x**2/2! + ... . 70 * therefore exp(-1/b) =~ 1 - (1/b) = (b-1)/b. 71 * For x close to zero, ln(1+x) =~ x, since 72 * ln(1+x) = x - x**2/2 + x**3/3 - ... -1 < x < 1 73 * therefore ln(b/(b+1)) = ln(1 - 1/(b+1)) =~ -1/(b+1). 74 * ln(.1) =~ -2.30 75 * 76 * Proof of (1): 77 * Solve (factor)**(power) =~ .1 given power (5*loadav): 78 * solving for factor, 79 * ln(factor) =~ (-2.30/5*loadav), or 80 * factor =~ exp(-1/((5/2.30)*loadav) =~ exp(-1/(2*loadav)) = 81 * exp(-1/b) =~ (b-1)/b =~ b/(b+1). QED 82 * 83 * Proof of (2): 84 * Solve (factor)**(power) =~ .1 given factor == (b/(b+1)): 85 * solving for power, 86 * power*ln(b/(b+1)) =~ -2.30, or 87 * power =~ 2.3 * (b + 1) = 4.6*loadav + 2.3 =~ 5*loadav. QED 88 * 89 * Actual power values for the implemented algorithm are as follows: 90 * loadav: 1 2 3 4 91 * power: 5.68 10.32 14.94 19.55 92 */ 93 94 /* calculations for digital decay to forget 90% of usage in 5*loadav sec */ 95 #define get_b(loadav) (2 * (loadav)) 96 #define get_pcpu(b, cpu) (((b) * ((cpu) & 0377)) / ((b) + FSCALE)) 97 98 /* decay 95% of `p_pctcpu' in 60 seconds; see CCPU_SHIFT before changing */ 99 fixpt_t ccpu = 0.95122942450071400909 * FSCALE; /* exp(-1/20) */ 100 101 /* 102 * If `ccpu' is not equal to `exp(-1/20)' and you still want to use the 103 * faster/more-accurate formula, you'll have to estimate CCPU_SHIFT below 104 * and possibly adjust FSHIFT in "param.h" so that (FSHIFT >= CCPU_SHIFT). 105 * 106 * To estimate CCPU_SHIFT for exp(-1/20), the following formula was used: 107 * 1 - exp(-1/20) ~= 0.0487 ~= 0.0488 == 1 (fixed pt, *11* bits). 108 * 109 * If you dont want to bother with the faster/more-accurate formula, you 110 * can set CCPU_SHIFT to (FSHIFT + 1) which will use a slower/less-accurate 111 * (more general) method of calculating the %age of CPU used by a process. 112 */ 113 #define CCPU_SHIFT 11 114 115 /* 116 * Recompute process priorities, once a second 117 */ 118 schedcpu() 119 { 120 register fixpt_t b = get_b(averunnable[0]); 121 register struct proc *p; 122 register int s, a; 123 124 wakeup((caddr_t)&lbolt); 125 for (p = allproc; p != NULL; p = p->p_nxt) { 126 if (p->p_time != 127) 127 p->p_time++; 128 if (p->p_stat==SSLEEP || p->p_stat==SSTOP) 129 if (p->p_slptime != 127) 130 p->p_slptime++; 131 p->p_pctcpu = (p->p_pctcpu * ccpu) >> FSHIFT; 132 /* 133 * If the process has slept the entire second, 134 * stop recalculating its priority until it wakes up. 135 */ 136 if (p->p_slptime > 1) 137 continue; 138 /* 139 * p_pctcpu is only for ps. 140 */ 141 #if (FSHIFT >= CCPU_SHIFT) 142 p->p_pctcpu += (hz == 100)? 143 ((fixpt_t) p->p_cpticks) << (FSHIFT - CCPU_SHIFT): 144 100 * (((fixpt_t) p->p_cpticks) 145 << (FSHIFT - CCPU_SHIFT)) / hz; 146 #else 147 p->p_pctcpu += ((FSCALE - ccpu) * 148 (p->p_cpticks * FSCALE / hz)) >> FSHIFT; 149 #endif 150 p->p_cpticks = 0; 151 a = (int) get_pcpu(b, p->p_cpu) + p->p_nice; 152 if (a < 0) 153 a = 0; 154 if (a > 255) 155 a = 255; 156 p->p_cpu = a; 157 (void) setpri(p); 158 s = splhigh(); /* prevent state changes */ 159 if (p->p_pri >= PUSER) { 160 #define PPQ (128 / NQS) 161 if ((p != u.u_procp || noproc) && 162 p->p_stat == SRUN && 163 (p->p_flag & SLOAD) && 164 (p->p_pri / PPQ) != (p->p_usrpri / PPQ)) { 165 remrq(p); 166 p->p_pri = p->p_usrpri; 167 setrq(p); 168 } else 169 p->p_pri = p->p_usrpri; 170 } 171 splx(s); 172 } 173 vmmeter(); 174 if (runin!=0) { 175 runin = 0; 176 wakeup((caddr_t)&runin); 177 } 178 if (bclnlist != NULL) 179 wakeup((caddr_t)&proc[2]); 180 timeout(schedcpu, (caddr_t)0, hz); 181 } 182 183 /* 184 * Recalculate the priority of a process after it has slept for a while. 185 */ 186 updatepri(p) 187 register struct proc *p; 188 { 189 register int a = p->p_cpu & 0377; 190 register fixpt_t b = get_b(averunnable[0]); 191 192 p->p_slptime--; /* the first time was done in schedcpu */ 193 while (a && --p->p_slptime) 194 a = (int) get_pcpu(b, a) /* + p->p_nice */; 195 p->p_slptime = 0; 196 if (a < 0) 197 a = 0; 198 if (a > 255) 199 a = 255; 200 p->p_cpu = a; 201 (void) setpri(p); 202 } 203 204 #define SQSIZE 0100 /* Must be power of 2 */ 205 #define HASH(x) (( (int) x >> 5) & (SQSIZE-1)) 206 struct slpque { 207 struct proc *sq_head; 208 struct proc **sq_tailp; 209 } slpque[SQSIZE]; 210 211 /* 212 * During autoconfiguration or after a panic, a sleep will simply 213 * lower the priority briefly to allow interrupts, then return. 214 * The priority to be used (safepri) is machine-dependent, thus this 215 * value is initialized and maintained in the machine-dependent layers. 216 * This priority will typically be 0, or the lowest priority 217 * that is safe for use on the interrupt stack; it can be made 218 * higher to block network software interrupts after panics. 219 */ 220 int safepri; 221 222 /* 223 * General sleep call. 224 * Suspends current process until a wakeup is made on chan. 225 * The process will then be made runnable with priority pri. 226 * Sleeps at most timo/hz seconds (0 means no timeout). 227 * If pri includes PCATCH flag, signals are checked 228 * before and after sleeping, else signals are not checked. 229 * Returns 0 if awakened, EWOULDBLOCK if the timeout expires. 230 * If PCATCH is set and a signal needs to be delivered, 231 * ERESTART is returned if the current system call should be restarted 232 * if possible, and EINTR is returned if the system call should 233 * be interrupted by the signal (return EINTR). 234 */ 235 tsleep(chan, pri, wmesg, timo) 236 caddr_t chan; 237 int pri; 238 char *wmesg; 239 int timo; 240 { 241 register struct proc *rp; 242 register struct slpque *qp; 243 register s; 244 int sig, catch = pri & PCATCH; 245 extern int cold; 246 int endtsleep(); 247 248 rp = u.u_procp; 249 s = splhigh(); 250 if (cold || panicstr) { 251 /* 252 * After a panic, or during autoconfiguration, 253 * just give interrupts a chance, then just return; 254 * don't run any other procs or panic below, 255 * in case this is the idle process and already asleep. 256 */ 257 splx(safepri); 258 splx(s); 259 return (0); 260 } 261 #ifdef DIAGNOSTIC 262 if (chan == 0 || rp->p_stat != SRUN || rp->p_rlink) 263 panic("tsleep"); 264 #endif 265 rp->p_wchan = chan; 266 rp->p_wmesg = wmesg; 267 rp->p_slptime = 0; 268 rp->p_pri = pri & PRIMASK; 269 qp = &slpque[HASH(chan)]; 270 if (qp->sq_head == 0) 271 qp->sq_head = rp; 272 else 273 *qp->sq_tailp = rp; 274 *(qp->sq_tailp = &rp->p_link) = 0; 275 if (timo) 276 timeout(endtsleep, (caddr_t)rp, timo); 277 /* 278 * If we stop in CURSIG/issig(), a wakeup or a SIGCONT 279 * (or both) could occur while we were stopped. 280 * A SIGCONT would cause us to be marked as SSLEEP 281 * without resuming us, thus we must be ready for sleep 282 * when CURSIG is called. If the wakeup happens while we're 283 * stopped, rp->p_wchan will be 0 upon return from CURSIG. 284 */ 285 if (catch) { 286 rp->p_flag |= SSINTR; 287 if (sig = CURSIG(rp)) { 288 if (rp->p_wchan) 289 unsleep(rp); 290 rp->p_stat = SRUN; 291 goto resume; 292 } 293 if (rp->p_wchan == 0) { 294 catch = 0; 295 goto resume; 296 } 297 } 298 rp->p_stat = SSLEEP; 299 (void) spl0(); 300 u.u_ru.ru_nvcsw++; 301 swtch(); 302 resume: 303 curpri = rp->p_usrpri; 304 splx(s); 305 rp->p_flag &= ~SSINTR; 306 if (rp->p_flag & STIMO) { 307 rp->p_flag &= ~STIMO; 308 if (catch == 0 || sig == 0) 309 return (EWOULDBLOCK); 310 } else if (timo) 311 untimeout(endtsleep, (caddr_t)rp); 312 if (catch && (sig != 0 || (sig = CURSIG(rp)))) { 313 if (u.u_sigintr & sigmask(sig)) 314 return (EINTR); 315 return (ERESTART); 316 } 317 return (0); 318 } 319 320 /* 321 * Implement timeout for tsleep. 322 * If process hasn't been awakened (wchan non-zero), 323 * set timeout flag and undo the sleep. If proc 324 * is stopped, just unsleep so it will remain stopped. 325 */ 326 endtsleep(p) 327 register struct proc *p; 328 { 329 int s = splhigh(); 330 331 if (p->p_wchan) { 332 if (p->p_stat == SSLEEP) 333 setrun(p); 334 else 335 unsleep(p); 336 p->p_flag |= STIMO; 337 } 338 splx(s); 339 } 340 341 /* 342 * Short-term, non-interruptable sleep. 343 */ 344 sleep(chan, pri) 345 caddr_t chan; 346 int pri; 347 { 348 register struct proc *rp; 349 register struct slpque *qp; 350 register s; 351 extern int cold; 352 353 #ifdef DIAGNOSTIC 354 if (pri > PZERO) { 355 printf("sleep called with pri %d > PZERO, wchan: %x\n", 356 pri, chan); 357 panic("old sleep"); 358 } 359 #endif 360 rp = u.u_procp; 361 s = splhigh(); 362 if (cold || panicstr) { 363 /* 364 * After a panic, or during autoconfiguration, 365 * just give interrupts a chance, then just return; 366 * don't run any other procs or panic below, 367 * in case this is the idle process and already asleep. 368 */ 369 splx(safepri); 370 splx(s); 371 return; 372 } 373 #ifdef DIAGNOSTIC 374 if (chan==0 || rp->p_stat != SRUN || rp->p_rlink) 375 panic("sleep"); 376 #endif 377 rp->p_wchan = chan; 378 rp->p_wmesg = NULL; 379 rp->p_slptime = 0; 380 rp->p_pri = pri; 381 qp = &slpque[HASH(chan)]; 382 if (qp->sq_head == 0) 383 qp->sq_head = rp; 384 else 385 *qp->sq_tailp = rp; 386 *(qp->sq_tailp = &rp->p_link) = 0; 387 rp->p_stat = SSLEEP; 388 (void) spl0(); 389 u.u_ru.ru_nvcsw++; 390 swtch(); 391 curpri = rp->p_usrpri; 392 splx(s); 393 } 394 395 /* 396 * Remove a process from its wait queue 397 */ 398 unsleep(p) 399 register struct proc *p; 400 { 401 register struct slpque *qp; 402 register struct proc **hp; 403 int s; 404 405 s = splhigh(); 406 if (p->p_wchan) { 407 hp = &(qp = &slpque[HASH(p->p_wchan)])->sq_head; 408 while (*hp != p) 409 hp = &(*hp)->p_link; 410 *hp = p->p_link; 411 if (qp->sq_tailp == &p->p_link) 412 qp->sq_tailp = hp; 413 p->p_wchan = 0; 414 } 415 splx(s); 416 } 417 418 /* 419 * Wake up all processes sleeping on chan. 420 */ 421 wakeup(chan) 422 register caddr_t chan; 423 { 424 register struct slpque *qp; 425 register struct proc *p, **q; 426 int s; 427 428 s = splhigh(); 429 qp = &slpque[HASH(chan)]; 430 restart: 431 for (q = &qp->sq_head; p = *q; ) { 432 #ifdef DIAGNOSTIC 433 if (p->p_rlink || p->p_stat != SSLEEP && p->p_stat != SSTOP) 434 panic("wakeup"); 435 #endif 436 if (p->p_wchan==chan) { 437 p->p_wchan = 0; 438 *q = p->p_link; 439 if (qp->sq_tailp == &p->p_link) 440 qp->sq_tailp = q; 441 if (p->p_stat == SSLEEP) { 442 /* OPTIMIZED INLINE EXPANSION OF setrun(p) */ 443 if (p->p_slptime > 1) 444 updatepri(p); 445 p->p_stat = SRUN; 446 if (p->p_flag & SLOAD) 447 setrq(p); 448 /* 449 * Since curpri is a usrpri, 450 * p->p_pri is always better than curpri. 451 */ 452 runrun++; 453 aston(); 454 if ((p->p_flag&SLOAD) == 0) { 455 if (runout != 0) { 456 runout = 0; 457 wakeup((caddr_t)&runout); 458 } 459 wantin++; 460 } 461 /* END INLINE EXPANSION */ 462 goto restart; 463 } 464 } else 465 q = &p->p_link; 466 } 467 splx(s); 468 } 469 470 /* 471 * Initialize the (doubly-linked) run queues 472 * to be empty. 473 */ 474 rqinit() 475 { 476 register int i; 477 478 for (i = 0; i < NQS; i++) 479 qs[i].ph_link = qs[i].ph_rlink = (struct proc *)&qs[i]; 480 } 481 482 /* 483 * Set the process running; 484 * arrange for it to be swapped in if necessary. 485 */ 486 setrun(p) 487 register struct proc *p; 488 { 489 register int s; 490 491 s = splhigh(); 492 switch (p->p_stat) { 493 494 case 0: 495 case SWAIT: 496 case SRUN: 497 case SZOMB: 498 default: 499 panic("setrun"); 500 501 case SSTOP: 502 case SSLEEP: 503 unsleep(p); /* e.g. when sending signals */ 504 break; 505 506 case SIDL: 507 break; 508 } 509 p->p_stat = SRUN; 510 if (p->p_flag & SLOAD) 511 setrq(p); 512 splx(s); 513 if (p->p_slptime > 1) 514 updatepri(p); 515 if (p->p_pri < curpri) { 516 runrun++; 517 aston(); 518 } 519 if ((p->p_flag&SLOAD) == 0) { 520 if (runout != 0) { 521 runout = 0; 522 wakeup((caddr_t)&runout); 523 } 524 wantin++; 525 } 526 } 527 528 /* 529 * Set user priority. 530 * The rescheduling flag (runrun) 531 * is set if the priority is better 532 * than the currently running process. 533 */ 534 setpri(pp) 535 register struct proc *pp; 536 { 537 register int p; 538 539 p = (pp->p_cpu & 0377)/4; 540 p += PUSER + 2 * pp->p_nice; 541 if (p > 127) 542 p = 127; 543 if (p < curpri) { 544 runrun++; 545 aston(); 546 } 547 pp->p_usrpri = p; 548 return (p); 549 } 550