1 /*- 2 * Copyright (c) 1982, 1986, 1990 The Regents of the University of California. 3 * Copyright (c) 1991 The Regents of the University of California. 4 * All rights reserved. 5 * 6 * %sccs.include.redist.c% 7 * 8 * @(#)kern_synch.c 7.23 (Berkeley) 02/28/92 9 */ 10 11 #include "param.h" 12 #include "systm.h" 13 #include "proc.h" 14 #include "kernel.h" 15 #include "buf.h" 16 #include "signalvar.h" 17 #include "resourcevar.h" 18 #ifdef KTRACE 19 #include "ktrace.h" 20 #endif 21 22 #include "machine/cpu.h" 23 24 u_char curpri; /* usrpri of curproc */ 25 int lbolt; /* once a second sleep address */ 26 27 /* 28 * Force switch among equal priority processes every 100ms. 29 */ 30 roundrobin() 31 { 32 33 need_resched(); 34 timeout(roundrobin, (caddr_t)0, hz / 10); 35 } 36 37 /* 38 * constants for digital decay and forget 39 * 90% of (p_cpu) usage in 5*loadav time 40 * 95% of (p_pctcpu) usage in 60 seconds (load insensitive) 41 * Note that, as ps(1) mentions, this can let percentages 42 * total over 100% (I've seen 137.9% for 3 processes). 43 * 44 * Note that hardclock updates p_cpu and p_cpticks independently. 45 * 46 * We wish to decay away 90% of p_cpu in (5 * loadavg) seconds. 47 * That is, the system wants to compute a value of decay such 48 * that the following for loop: 49 * for (i = 0; i < (5 * loadavg); i++) 50 * p_cpu *= decay; 51 * will compute 52 * p_cpu *= 0.1; 53 * for all values of loadavg: 54 * 55 * Mathematically this loop can be expressed by saying: 56 * decay ** (5 * loadavg) ~= .1 57 * 58 * The system computes decay as: 59 * decay = (2 * loadavg) / (2 * loadavg + 1) 60 * 61 * We wish to prove that the system's computation of decay 62 * will always fulfill the equation: 63 * decay ** (5 * loadavg) ~= .1 64 * 65 * If we compute b as: 66 * b = 2 * loadavg 67 * then 68 * decay = b / (b + 1) 69 * 70 * We now need to prove two things: 71 * 1) Given factor ** (5 * loadavg) ~= .1, prove factor == b/(b+1) 72 * 2) Given b/(b+1) ** power ~= .1, prove power == (5 * loadavg) 73 * 74 * Facts: 75 * For x close to zero, exp(x) =~ 1 + x, since 76 * exp(x) = 0! + x**1/1! + x**2/2! + ... . 77 * therefore exp(-1/b) =~ 1 - (1/b) = (b-1)/b. 78 * For x close to zero, ln(1+x) =~ x, since 79 * ln(1+x) = x - x**2/2 + x**3/3 - ... -1 < x < 1 80 * therefore ln(b/(b+1)) = ln(1 - 1/(b+1)) =~ -1/(b+1). 81 * ln(.1) =~ -2.30 82 * 83 * Proof of (1): 84 * Solve (factor)**(power) =~ .1 given power (5*loadav): 85 * solving for factor, 86 * ln(factor) =~ (-2.30/5*loadav), or 87 * factor =~ exp(-1/((5/2.30)*loadav)) =~ exp(-1/(2*loadav)) = 88 * exp(-1/b) =~ (b-1)/b =~ b/(b+1). QED 89 * 90 * Proof of (2): 91 * Solve (factor)**(power) =~ .1 given factor == (b/(b+1)): 92 * solving for power, 93 * power*ln(b/(b+1)) =~ -2.30, or 94 * power =~ 2.3 * (b + 1) = 4.6*loadav + 2.3 =~ 5*loadav. QED 95 * 96 * Actual power values for the implemented algorithm are as follows: 97 * loadav: 1 2 3 4 98 * power: 5.68 10.32 14.94 19.55 99 */ 100 101 /* calculations for digital decay to forget 90% of usage in 5*loadav sec */ 102 #define loadfactor(loadav) (2 * (loadav)) 103 #define decay_cpu(loadfac, cpu) (((loadfac) * (cpu)) / ((loadfac) + FSCALE)) 104 105 /* decay 95% of `p_pctcpu' in 60 seconds; see CCPU_SHIFT before changing */ 106 fixpt_t ccpu = 0.95122942450071400909 * FSCALE; /* exp(-1/20) */ 107 108 /* 109 * If `ccpu' is not equal to `exp(-1/20)' and you still want to use the 110 * faster/more-accurate formula, you'll have to estimate CCPU_SHIFT below 111 * and possibly adjust FSHIFT in "param.h" so that (FSHIFT >= CCPU_SHIFT). 112 * 113 * To estimate CCPU_SHIFT for exp(-1/20), the following formula was used: 114 * 1 - exp(-1/20) ~= 0.0487 ~= 0.0488 == 1 (fixed pt, *11* bits). 115 * 116 * If you dont want to bother with the faster/more-accurate formula, you 117 * can set CCPU_SHIFT to (FSHIFT + 1) which will use a slower/less-accurate 118 * (more general) method of calculating the %age of CPU used by a process. 119 */ 120 #define CCPU_SHIFT 11 121 122 /* 123 * Recompute process priorities, once a second 124 */ 125 schedcpu() 126 { 127 register fixpt_t loadfac = loadfactor(averunnable.ldavg[0]); 128 register struct proc *p; 129 register int s; 130 register unsigned int newcpu; 131 132 wakeup((caddr_t)&lbolt); 133 for (p = allproc; p != NULL; p = p->p_nxt) { 134 /* 135 * Increment time in/out of memory and sleep time 136 * (if sleeping). We ignore overflow; with 16-bit int's 137 * (remember them?) overflow takes 45 days. 138 */ 139 p->p_time++; 140 if (p->p_stat == SSLEEP || p->p_stat == SSTOP) 141 p->p_slptime++; 142 p->p_pctcpu = (p->p_pctcpu * ccpu) >> FSHIFT; 143 /* 144 * If the process has slept the entire second, 145 * stop recalculating its priority until it wakes up. 146 */ 147 if (p->p_slptime > 1) 148 continue; 149 /* 150 * p_pctcpu is only for ps. 151 */ 152 #if (FSHIFT >= CCPU_SHIFT) 153 p->p_pctcpu += (hz == 100)? 154 ((fixpt_t) p->p_cpticks) << (FSHIFT - CCPU_SHIFT): 155 100 * (((fixpt_t) p->p_cpticks) 156 << (FSHIFT - CCPU_SHIFT)) / hz; 157 #else 158 p->p_pctcpu += ((FSCALE - ccpu) * 159 (p->p_cpticks * FSCALE / hz)) >> FSHIFT; 160 #endif 161 p->p_cpticks = 0; 162 newcpu = (u_int) decay_cpu(loadfac, p->p_cpu) + p->p_nice; 163 p->p_cpu = min(newcpu, UCHAR_MAX); 164 setpri(p); 165 s = splhigh(); /* prevent state changes */ 166 if (p->p_pri >= PUSER) { 167 #define PPQ (128 / NQS) /* priorities per queue */ 168 if ((p != curproc) && 169 p->p_stat == SRUN && 170 (p->p_flag & SLOAD) && 171 (p->p_pri / PPQ) != (p->p_usrpri / PPQ)) { 172 remrq(p); 173 p->p_pri = p->p_usrpri; 174 setrq(p); 175 } else 176 p->p_pri = p->p_usrpri; 177 } 178 splx(s); 179 } 180 vmmeter(); 181 if (bclnlist != NULL) 182 wakeup((caddr_t)pageproc); 183 timeout(schedcpu, (caddr_t)0, hz); 184 } 185 186 /* 187 * Recalculate the priority of a process after it has slept for a while. 188 * For all load averages >= 1 and max p_cpu of 255, sleeping for at least 189 * six times the loadfactor will decay p_cpu to zero. 190 */ 191 updatepri(p) 192 register struct proc *p; 193 { 194 register unsigned int newcpu = p->p_cpu; 195 register fixpt_t loadfac = loadfactor(averunnable.ldavg[0]); 196 197 if (p->p_slptime > 5 * loadfac) 198 p->p_cpu = 0; 199 else { 200 p->p_slptime--; /* the first time was done in schedcpu */ 201 while (newcpu && --p->p_slptime) 202 newcpu = (int) decay_cpu(loadfac, newcpu); 203 p->p_cpu = min(newcpu, UCHAR_MAX); 204 } 205 setpri(p); 206 } 207 208 #define SQSIZE 0100 /* Must be power of 2 */ 209 #define HASH(x) (( (int) x >> 5) & (SQSIZE-1)) 210 struct slpque { 211 struct proc *sq_head; 212 struct proc **sq_tailp; 213 } slpque[SQSIZE]; 214 215 /* 216 * During autoconfiguration or after a panic, a sleep will simply 217 * lower the priority briefly to allow interrupts, then return. 218 * The priority to be used (safepri) is machine-dependent, thus this 219 * value is initialized and maintained in the machine-dependent layers. 220 * This priority will typically be 0, or the lowest priority 221 * that is safe for use on the interrupt stack; it can be made 222 * higher to block network software interrupts after panics. 223 */ 224 int safepri; 225 226 /* 227 * General sleep call. 228 * Suspends current process until a wakeup is made on chan. 229 * The process will then be made runnable with priority pri. 230 * Sleeps at most timo/hz seconds (0 means no timeout). 231 * If pri includes PCATCH flag, signals are checked 232 * before and after sleeping, else signals are not checked. 233 * Returns 0 if awakened, EWOULDBLOCK if the timeout expires. 234 * If PCATCH is set and a signal needs to be delivered, 235 * ERESTART is returned if the current system call should be restarted 236 * if possible, and EINTR is returned if the system call should 237 * be interrupted by the signal (return EINTR). 238 */ 239 tsleep(chan, pri, wmesg, timo) 240 void *chan; 241 int pri; 242 char *wmesg; 243 int timo; 244 { 245 register struct proc *p = curproc; 246 register struct slpque *qp; 247 register s; 248 int sig, catch = pri & PCATCH; 249 extern int cold; 250 int endtsleep(); 251 252 #ifdef KTRACE 253 if (KTRPOINT(p, KTR_CSW)) 254 ktrcsw(p->p_tracep, 1, 0); 255 #endif 256 s = splhigh(); 257 if (cold || panicstr) { 258 /* 259 * After a panic, or during autoconfiguration, 260 * just give interrupts a chance, then just return; 261 * don't run any other procs or panic below, 262 * in case this is the idle process and already asleep. 263 */ 264 splx(safepri); 265 splx(s); 266 return (0); 267 } 268 #ifdef DIAGNOSTIC 269 if (chan == NULL || p->p_stat != SRUN || p->p_rlink) 270 panic("tsleep"); 271 #endif 272 p->p_wchan = chan; 273 p->p_wmesg = wmesg; 274 p->p_slptime = 0; 275 p->p_pri = pri & PRIMASK; 276 qp = &slpque[HASH(chan)]; 277 if (qp->sq_head == 0) 278 qp->sq_head = p; 279 else 280 *qp->sq_tailp = p; 281 *(qp->sq_tailp = &p->p_link) = 0; 282 if (timo) 283 timeout(endtsleep, (caddr_t)p, timo); 284 /* 285 * We put ourselves on the sleep queue and start our timeout 286 * before calling CURSIG, as we could stop there, and a wakeup 287 * or a SIGCONT (or both) could occur while we were stopped. 288 * A SIGCONT would cause us to be marked as SSLEEP 289 * without resuming us, thus we must be ready for sleep 290 * when CURSIG is called. If the wakeup happens while we're 291 * stopped, p->p_wchan will be 0 upon return from CURSIG. 292 */ 293 if (catch) { 294 p->p_flag |= SSINTR; 295 if (sig = CURSIG(p)) { 296 if (p->p_wchan) 297 unsleep(p); 298 p->p_stat = SRUN; 299 goto resume; 300 } 301 if (p->p_wchan == 0) { 302 catch = 0; 303 goto resume; 304 } 305 } else 306 sig = 0; 307 p->p_stat = SSLEEP; 308 p->p_stats->p_ru.ru_nvcsw++; 309 swtch(); 310 resume: 311 curpri = p->p_usrpri; 312 splx(s); 313 p->p_flag &= ~SSINTR; 314 if (p->p_flag & STIMO) { 315 p->p_flag &= ~STIMO; 316 if (sig == 0) { 317 #ifdef KTRACE 318 if (KTRPOINT(p, KTR_CSW)) 319 ktrcsw(p->p_tracep, 0, 0); 320 #endif 321 return (EWOULDBLOCK); 322 } 323 } else if (timo) 324 untimeout(endtsleep, (caddr_t)p); 325 if (catch && (sig != 0 || (sig = CURSIG(p)))) { 326 #ifdef KTRACE 327 if (KTRPOINT(p, KTR_CSW)) 328 ktrcsw(p->p_tracep, 0, 0); 329 #endif 330 if (p->p_sigacts->ps_sigintr & sigmask(sig)) 331 return (EINTR); 332 return (ERESTART); 333 } 334 #ifdef KTRACE 335 if (KTRPOINT(p, KTR_CSW)) 336 ktrcsw(p->p_tracep, 0, 0); 337 #endif 338 return (0); 339 } 340 341 /* 342 * Implement timeout for tsleep. 343 * If process hasn't been awakened (wchan non-zero), 344 * set timeout flag and undo the sleep. If proc 345 * is stopped, just unsleep so it will remain stopped. 346 */ 347 endtsleep(p) 348 register struct proc *p; 349 { 350 int s = splhigh(); 351 352 if (p->p_wchan) { 353 if (p->p_stat == SSLEEP) 354 setrun(p); 355 else 356 unsleep(p); 357 p->p_flag |= STIMO; 358 } 359 splx(s); 360 } 361 362 /* 363 * Short-term, non-interruptable sleep. 364 */ 365 sleep(chan, pri) 366 void *chan; 367 int pri; 368 { 369 register struct proc *p = curproc; 370 register struct slpque *qp; 371 register s; 372 extern int cold; 373 374 #ifdef DIAGNOSTIC 375 if (pri > PZERO) { 376 printf("sleep called with pri %d > PZERO, wchan: %x\n", 377 pri, chan); 378 panic("old sleep"); 379 } 380 #endif 381 s = splhigh(); 382 if (cold || panicstr) { 383 /* 384 * After a panic, or during autoconfiguration, 385 * just give interrupts a chance, then just return; 386 * don't run any other procs or panic below, 387 * in case this is the idle process and already asleep. 388 */ 389 splx(safepri); 390 splx(s); 391 return; 392 } 393 #ifdef DIAGNOSTIC 394 if (chan == NULL || p->p_stat != SRUN || p->p_rlink) 395 panic("sleep"); 396 #endif 397 p->p_wchan = chan; 398 p->p_wmesg = NULL; 399 p->p_slptime = 0; 400 p->p_pri = pri; 401 qp = &slpque[HASH(chan)]; 402 if (qp->sq_head == 0) 403 qp->sq_head = p; 404 else 405 *qp->sq_tailp = p; 406 *(qp->sq_tailp = &p->p_link) = 0; 407 p->p_stat = SSLEEP; 408 p->p_stats->p_ru.ru_nvcsw++; 409 #ifdef KTRACE 410 if (KTRPOINT(p, KTR_CSW)) 411 ktrcsw(p->p_tracep, 1, 0); 412 #endif 413 swtch(); 414 #ifdef KTRACE 415 if (KTRPOINT(p, KTR_CSW)) 416 ktrcsw(p->p_tracep, 0, 0); 417 #endif 418 curpri = p->p_usrpri; 419 splx(s); 420 } 421 422 /* 423 * Remove a process from its wait queue 424 */ 425 unsleep(p) 426 register struct proc *p; 427 { 428 register struct slpque *qp; 429 register struct proc **hp; 430 int s; 431 432 s = splhigh(); 433 if (p->p_wchan) { 434 hp = &(qp = &slpque[HASH(p->p_wchan)])->sq_head; 435 while (*hp != p) 436 hp = &(*hp)->p_link; 437 *hp = p->p_link; 438 if (qp->sq_tailp == &p->p_link) 439 qp->sq_tailp = hp; 440 p->p_wchan = 0; 441 } 442 splx(s); 443 } 444 445 /* 446 * Wakeup on "chan"; set all processes 447 * sleeping on chan to run state. 448 */ 449 wakeup(chan) 450 register void *chan; 451 { 452 register struct slpque *qp; 453 register struct proc *p, **q; 454 int s; 455 456 s = splhigh(); 457 qp = &slpque[HASH(chan)]; 458 restart: 459 for (q = &qp->sq_head; p = *q; ) { 460 #ifdef DIAGNOSTIC 461 if (p->p_rlink || p->p_stat != SSLEEP && p->p_stat != SSTOP) 462 panic("wakeup"); 463 #endif 464 if (p->p_wchan == chan) { 465 p->p_wchan = 0; 466 *q = p->p_link; 467 if (qp->sq_tailp == &p->p_link) 468 qp->sq_tailp = q; 469 if (p->p_stat == SSLEEP) { 470 /* OPTIMIZED INLINE EXPANSION OF setrun(p) */ 471 if (p->p_slptime > 1) 472 updatepri(p); 473 p->p_slptime = 0; 474 p->p_stat = SRUN; 475 if (p->p_flag & SLOAD) 476 setrq(p); 477 /* 478 * Since curpri is a usrpri, 479 * p->p_pri is always better than curpri. 480 */ 481 if ((p->p_flag&SLOAD) == 0) 482 wakeup((caddr_t)&proc0); 483 else 484 need_resched(); 485 /* END INLINE EXPANSION */ 486 goto restart; 487 } 488 } else 489 q = &p->p_link; 490 } 491 splx(s); 492 } 493 494 /* 495 * Initialize the (doubly-linked) run queues 496 * to be empty. 497 */ 498 rqinit() 499 { 500 register int i; 501 502 for (i = 0; i < NQS; i++) 503 qs[i].ph_link = qs[i].ph_rlink = (struct proc *)&qs[i]; 504 } 505 506 /* 507 * Change process state to be runnable, 508 * placing it on the run queue if it is in memory, 509 * and awakening the swapper if it isn't in memory. 510 */ 511 setrun(p) 512 register struct proc *p; 513 { 514 register int s; 515 516 s = splhigh(); 517 switch (p->p_stat) { 518 519 case 0: 520 case SWAIT: 521 case SRUN: 522 case SZOMB: 523 default: 524 panic("setrun"); 525 526 case SSTOP: 527 case SSLEEP: 528 unsleep(p); /* e.g. when sending signals */ 529 break; 530 531 case SIDL: 532 break; 533 } 534 p->p_stat = SRUN; 535 if (p->p_flag & SLOAD) 536 setrq(p); 537 splx(s); 538 if (p->p_slptime > 1) 539 updatepri(p); 540 p->p_slptime = 0; 541 if ((p->p_flag&SLOAD) == 0) 542 wakeup((caddr_t)&proc0); 543 else if (p->p_pri < curpri) 544 need_resched(); 545 } 546 547 /* 548 * Compute priority of process when running in user mode. 549 * Arrange to reschedule if the resulting priority 550 * is better than that of the current process. 551 */ 552 setpri(p) 553 register struct proc *p; 554 { 555 register unsigned int newpri; 556 557 newpri = PUSER + p->p_cpu / 4 + 2 * p->p_nice; 558 newpri = min(newpri, MAXPRI); 559 p->p_usrpri = newpri; 560 if (newpri < curpri) 561 need_resched(); 562 } 563