1 /*- 2 * Copyright (c) 1982, 1986, 1990 The Regents of the University of California. 3 * Copyright (c) 1991 The Regents of the University of California. 4 * All rights reserved. 5 * 6 * %sccs.include.redist.c% 7 * 8 * @(#)kern_synch.c 7.17 (Berkeley) 05/09/91 9 */ 10 11 #include "param.h" 12 #include "systm.h" 13 #include "proc.h" 14 #include "kernel.h" 15 #include "buf.h" 16 #include "signalvar.h" 17 #include "resourcevar.h" 18 19 #include "machine/cpu.h" 20 21 u_char curpri; /* usrpri of curproc */ 22 23 /* 24 * Force switch among equal priority processes every 100ms. 25 */ 26 roundrobin() 27 { 28 29 need_resched(); 30 timeout(roundrobin, (caddr_t)0, hz / 10); 31 } 32 33 /* 34 * constants for digital decay and forget 35 * 90% of (p_cpu) usage in 5*loadav time 36 * 95% of (p_pctcpu) usage in 60 seconds (load insensitive) 37 * Note that, as ps(1) mentions, this can let percentages 38 * total over 100% (I've seen 137.9% for 3 processes). 39 * 40 * Note that hardclock updates p_cpu and p_cpticks independently. 41 * 42 * We wish to decay away 90% of p_cpu in (5 * loadavg) seconds. 43 * That is, the system wants to compute a value of decay such 44 * that the following for loop: 45 * for (i = 0; i < (5 * loadavg); i++) 46 * p_cpu *= decay; 47 * will compute 48 * p_cpu *= 0.1; 49 * for all values of loadavg: 50 * 51 * Mathematically this loop can be expressed by saying: 52 * decay ** (5 * loadavg) ~= .1 53 * 54 * The system computes decay as: 55 * decay = (2 * loadavg) / (2 * loadavg + 1) 56 * 57 * We wish to prove that the system's computation of decay 58 * will always fulfill the equation: 59 * decay ** (5 * loadavg) ~= .1 60 * 61 * If we compute b as: 62 * b = 2 * loadavg 63 * then 64 * decay = b / (b + 1) 65 * 66 * We now need to prove two things: 67 * 1) Given factor ** (5 * loadavg) ~= .1, prove factor == b/(b+1) 68 * 2) Given b/(b+1) ** power ~= .1, prove power == (5 * loadavg) 69 * 70 * Facts: 71 * For x close to zero, exp(x) =~ 1 + x, since 72 * exp(x) = 0! + x**1/1! + x**2/2! + ... . 73 * therefore exp(-1/b) =~ 1 - (1/b) = (b-1)/b. 74 * For x close to zero, ln(1+x) =~ x, since 75 * ln(1+x) = x - x**2/2 + x**3/3 - ... -1 < x < 1 76 * therefore ln(b/(b+1)) = ln(1 - 1/(b+1)) =~ -1/(b+1). 77 * ln(.1) =~ -2.30 78 * 79 * Proof of (1): 80 * Solve (factor)**(power) =~ .1 given power (5*loadav): 81 * solving for factor, 82 * ln(factor) =~ (-2.30/5*loadav), or 83 * factor =~ exp(-1/((5/2.30)*loadav)) =~ exp(-1/(2*loadav)) = 84 * exp(-1/b) =~ (b-1)/b =~ b/(b+1). QED 85 * 86 * Proof of (2): 87 * Solve (factor)**(power) =~ .1 given factor == (b/(b+1)): 88 * solving for power, 89 * power*ln(b/(b+1)) =~ -2.30, or 90 * power =~ 2.3 * (b + 1) = 4.6*loadav + 2.3 =~ 5*loadav. QED 91 * 92 * Actual power values for the implemented algorithm are as follows: 93 * loadav: 1 2 3 4 94 * power: 5.68 10.32 14.94 19.55 95 */ 96 97 /* calculations for digital decay to forget 90% of usage in 5*loadav sec */ 98 #define loadfactor(loadav) (2 * (loadav)) 99 #define decay_cpu(loadfac, cpu) (((loadfac) * (cpu)) / ((loadfac) + FSCALE)) 100 101 /* decay 95% of `p_pctcpu' in 60 seconds; see CCPU_SHIFT before changing */ 102 fixpt_t ccpu = 0.95122942450071400909 * FSCALE; /* exp(-1/20) */ 103 104 /* 105 * If `ccpu' is not equal to `exp(-1/20)' and you still want to use the 106 * faster/more-accurate formula, you'll have to estimate CCPU_SHIFT below 107 * and possibly adjust FSHIFT in "param.h" so that (FSHIFT >= CCPU_SHIFT). 108 * 109 * To estimate CCPU_SHIFT for exp(-1/20), the following formula was used: 110 * 1 - exp(-1/20) ~= 0.0487 ~= 0.0488 == 1 (fixed pt, *11* bits). 111 * 112 * If you dont want to bother with the faster/more-accurate formula, you 113 * can set CCPU_SHIFT to (FSHIFT + 1) which will use a slower/less-accurate 114 * (more general) method of calculating the %age of CPU used by a process. 115 */ 116 #define CCPU_SHIFT 11 117 118 /* 119 * Recompute process priorities, once a second 120 */ 121 schedcpu() 122 { 123 register fixpt_t loadfac = loadfactor(averunnable[0]); 124 register struct proc *p; 125 register int s; 126 register unsigned int newcpu; 127 128 wakeup((caddr_t)&lbolt); 129 for (p = allproc; p != NULL; p = p->p_nxt) { 130 /* 131 * Increment time in/out of memory and sleep time 132 * (if sleeping). We ignore overflow; with 16-bit int's 133 * (remember them?) overflow takes 45 days. 134 */ 135 p->p_time++; 136 if (p->p_stat == SSLEEP || p->p_stat == SSTOP) 137 p->p_slptime++; 138 p->p_pctcpu = (p->p_pctcpu * ccpu) >> FSHIFT; 139 /* 140 * If the process has slept the entire second, 141 * stop recalculating its priority until it wakes up. 142 */ 143 if (p->p_slptime > 1) 144 continue; 145 /* 146 * p_pctcpu is only for ps. 147 */ 148 #if (FSHIFT >= CCPU_SHIFT) 149 p->p_pctcpu += (hz == 100)? 150 ((fixpt_t) p->p_cpticks) << (FSHIFT - CCPU_SHIFT): 151 100 * (((fixpt_t) p->p_cpticks) 152 << (FSHIFT - CCPU_SHIFT)) / hz; 153 #else 154 p->p_pctcpu += ((FSCALE - ccpu) * 155 (p->p_cpticks * FSCALE / hz)) >> FSHIFT; 156 #endif 157 p->p_cpticks = 0; 158 newcpu = (u_int) decay_cpu(loadfac, p->p_cpu) + p->p_nice; 159 p->p_cpu = min(newcpu, UCHAR_MAX); 160 setpri(p); 161 s = splhigh(); /* prevent state changes */ 162 if (p->p_pri >= PUSER) { 163 #define PPQ (128 / NQS) /* priorities per queue */ 164 if ((p != curproc) && 165 p->p_stat == SRUN && 166 (p->p_flag & SLOAD) && 167 (p->p_pri / PPQ) != (p->p_usrpri / PPQ)) { 168 remrq(p); 169 p->p_pri = p->p_usrpri; 170 setrq(p); 171 } else 172 p->p_pri = p->p_usrpri; 173 } 174 splx(s); 175 } 176 vmmeter(); 177 if (bclnlist != NULL) 178 wakeup((caddr_t)pageproc); 179 timeout(schedcpu, (caddr_t)0, hz); 180 } 181 182 /* 183 * Recalculate the priority of a process after it has slept for a while. 184 * For all load averages >= 1 and max p_cpu of 255, sleeping for at least 185 * six times the loadfactor will decay p_cpu to zero. 186 */ 187 updatepri(p) 188 register struct proc *p; 189 { 190 register unsigned int newcpu = p->p_cpu; 191 register fixpt_t loadfac = loadfactor(averunnable[0]); 192 193 if (p->p_slptime > 5 * loadfac) 194 p->p_cpu = 0; 195 else { 196 p->p_slptime--; /* the first time was done in schedcpu */ 197 while (newcpu && --p->p_slptime) 198 newcpu = (int) decay_cpu(loadfac, newcpu); 199 p->p_cpu = min(newcpu, UCHAR_MAX); 200 } 201 setpri(p); 202 } 203 204 #define SQSIZE 0100 /* Must be power of 2 */ 205 #define HASH(x) (( (int) x >> 5) & (SQSIZE-1)) 206 struct slpque { 207 struct proc *sq_head; 208 struct proc **sq_tailp; 209 } slpque[SQSIZE]; 210 211 /* 212 * During autoconfiguration or after a panic, a sleep will simply 213 * lower the priority briefly to allow interrupts, then return. 214 * The priority to be used (safepri) is machine-dependent, thus this 215 * value is initialized and maintained in the machine-dependent layers. 216 * This priority will typically be 0, or the lowest priority 217 * that is safe for use on the interrupt stack; it can be made 218 * higher to block network software interrupts after panics. 219 */ 220 int safepri; 221 222 /* 223 * General sleep call. 224 * Suspends current process until a wakeup is made on chan. 225 * The process will then be made runnable with priority pri. 226 * Sleeps at most timo/hz seconds (0 means no timeout). 227 * If pri includes PCATCH flag, signals are checked 228 * before and after sleeping, else signals are not checked. 229 * Returns 0 if awakened, EWOULDBLOCK if the timeout expires. 230 * If PCATCH is set and a signal needs to be delivered, 231 * ERESTART is returned if the current system call should be restarted 232 * if possible, and EINTR is returned if the system call should 233 * be interrupted by the signal (return EINTR). 234 */ 235 tsleep(chan, pri, wmesg, timo) 236 caddr_t chan; 237 int pri; 238 char *wmesg; 239 int timo; 240 { 241 register struct proc *p = curproc; 242 register struct slpque *qp; 243 register s; 244 int sig, catch = pri & PCATCH; 245 extern int cold; 246 int endtsleep(); 247 248 s = splhigh(); 249 if (cold || panicstr) { 250 /* 251 * After a panic, or during autoconfiguration, 252 * just give interrupts a chance, then just return; 253 * don't run any other procs or panic below, 254 * in case this is the idle process and already asleep. 255 */ 256 splx(safepri); 257 splx(s); 258 return (0); 259 } 260 #ifdef DIAGNOSTIC 261 if (chan == 0 || p->p_stat != SRUN || p->p_rlink) 262 panic("tsleep"); 263 #endif 264 p->p_wchan = chan; 265 p->p_wmesg = wmesg; 266 p->p_slptime = 0; 267 p->p_pri = pri & PRIMASK; 268 qp = &slpque[HASH(chan)]; 269 if (qp->sq_head == 0) 270 qp->sq_head = p; 271 else 272 *qp->sq_tailp = p; 273 *(qp->sq_tailp = &p->p_link) = 0; 274 if (timo) 275 timeout(endtsleep, (caddr_t)p, timo); 276 /* 277 * We put ourselves on the sleep queue and start our timeout 278 * before calling CURSIG, as we could stop there, and a wakeup 279 * or a SIGCONT (or both) could occur while we were stopped. 280 * A SIGCONT would cause us to be marked as SSLEEP 281 * without resuming us, thus we must be ready for sleep 282 * when CURSIG is called. If the wakeup happens while we're 283 * stopped, p->p_wchan will be 0 upon return from CURSIG. 284 */ 285 if (catch) { 286 p->p_flag |= SSINTR; 287 if (sig = CURSIG(p)) { 288 if (p->p_wchan) 289 unsleep(p); 290 p->p_stat = SRUN; 291 goto resume; 292 } 293 if (p->p_wchan == 0) { 294 catch = 0; 295 goto resume; 296 } 297 } 298 p->p_stat = SSLEEP; 299 (void) spl0(); 300 p->p_stats->p_ru.ru_nvcsw++; 301 swtch(); 302 resume: 303 curpri = p->p_usrpri; 304 splx(s); 305 p->p_flag &= ~SSINTR; 306 if (p->p_flag & STIMO) { 307 p->p_flag &= ~STIMO; 308 if (catch == 0 || sig == 0) 309 return (EWOULDBLOCK); 310 } else if (timo) 311 untimeout(endtsleep, (caddr_t)p); 312 if (catch && (sig != 0 || (sig = CURSIG(p)))) { 313 if (p->p_sigacts->ps_sigintr & sigmask(sig)) 314 return (EINTR); 315 return (ERESTART); 316 } 317 return (0); 318 } 319 320 /* 321 * Implement timeout for tsleep. 322 * If process hasn't been awakened (wchan non-zero), 323 * set timeout flag and undo the sleep. If proc 324 * is stopped, just unsleep so it will remain stopped. 325 */ 326 endtsleep(p) 327 register struct proc *p; 328 { 329 int s = splhigh(); 330 331 if (p->p_wchan) { 332 if (p->p_stat == SSLEEP) 333 setrun(p); 334 else 335 unsleep(p); 336 p->p_flag |= STIMO; 337 } 338 splx(s); 339 } 340 341 /* 342 * Short-term, non-interruptable sleep. 343 */ 344 sleep(chan, pri) 345 caddr_t chan; 346 int pri; 347 { 348 register struct proc *p = curproc; 349 register struct slpque *qp; 350 register s; 351 extern int cold; 352 353 #ifdef DIAGNOSTIC 354 if (pri > PZERO) { 355 printf("sleep called with pri %d > PZERO, wchan: %x\n", 356 pri, chan); 357 panic("old sleep"); 358 } 359 #endif 360 s = splhigh(); 361 if (cold || panicstr) { 362 /* 363 * After a panic, or during autoconfiguration, 364 * just give interrupts a chance, then just return; 365 * don't run any other procs or panic below, 366 * in case this is the idle process and already asleep. 367 */ 368 splx(safepri); 369 splx(s); 370 return; 371 } 372 #ifdef DIAGNOSTIC 373 if (chan==0 || p->p_stat != SRUN || p->p_rlink) 374 panic("sleep"); 375 #endif 376 p->p_wchan = chan; 377 p->p_wmesg = NULL; 378 p->p_slptime = 0; 379 p->p_pri = pri; 380 qp = &slpque[HASH(chan)]; 381 if (qp->sq_head == 0) 382 qp->sq_head = p; 383 else 384 *qp->sq_tailp = p; 385 *(qp->sq_tailp = &p->p_link) = 0; 386 p->p_stat = SSLEEP; 387 (void) spl0(); 388 p->p_stats->p_ru.ru_nvcsw++; 389 swtch(); 390 curpri = p->p_usrpri; 391 splx(s); 392 } 393 394 /* 395 * Remove a process from its wait queue 396 */ 397 unsleep(p) 398 register struct proc *p; 399 { 400 register struct slpque *qp; 401 register struct proc **hp; 402 int s; 403 404 s = splhigh(); 405 if (p->p_wchan) { 406 hp = &(qp = &slpque[HASH(p->p_wchan)])->sq_head; 407 while (*hp != p) 408 hp = &(*hp)->p_link; 409 *hp = p->p_link; 410 if (qp->sq_tailp == &p->p_link) 411 qp->sq_tailp = hp; 412 p->p_wchan = 0; 413 } 414 splx(s); 415 } 416 417 /* 418 * Wakeup on "chan"; set all processes 419 * sleeping on chan to run state. 420 */ 421 wakeup(chan) 422 register caddr_t chan; 423 { 424 register struct slpque *qp; 425 register struct proc *p, **q; 426 int s; 427 428 s = splhigh(); 429 qp = &slpque[HASH(chan)]; 430 restart: 431 for (q = &qp->sq_head; p = *q; ) { 432 #ifdef DIAGNOSTIC 433 if (p->p_rlink || p->p_stat != SSLEEP && p->p_stat != SSTOP) 434 panic("wakeup"); 435 #endif 436 if (p->p_wchan == chan) { 437 p->p_wchan = 0; 438 *q = p->p_link; 439 if (qp->sq_tailp == &p->p_link) 440 qp->sq_tailp = q; 441 if (p->p_stat == SSLEEP) { 442 /* OPTIMIZED INLINE EXPANSION OF setrun(p) */ 443 if (p->p_slptime > 1) 444 updatepri(p); 445 p->p_slptime = 0; 446 p->p_stat = SRUN; 447 if (p->p_flag & SLOAD) 448 setrq(p); 449 /* 450 * Since curpri is a usrpri, 451 * p->p_pri is always better than curpri. 452 */ 453 if ((p->p_flag&SLOAD) == 0) 454 wakeup((caddr_t)&proc0); 455 else 456 need_resched(); 457 /* END INLINE EXPANSION */ 458 goto restart; 459 } 460 } else 461 q = &p->p_link; 462 } 463 splx(s); 464 } 465 466 /* 467 * Initialize the (doubly-linked) run queues 468 * to be empty. 469 */ 470 rqinit() 471 { 472 register int i; 473 474 for (i = 0; i < NQS; i++) 475 qs[i].ph_link = qs[i].ph_rlink = (struct proc *)&qs[i]; 476 } 477 478 /* 479 * Change process state to be runnable, 480 * placing it on the run queue if it is in memory, 481 * and awakening the swapper if it isn't in memory. 482 */ 483 setrun(p) 484 register struct proc *p; 485 { 486 register int s; 487 488 s = splhigh(); 489 switch (p->p_stat) { 490 491 case 0: 492 case SWAIT: 493 case SRUN: 494 case SZOMB: 495 default: 496 panic("setrun"); 497 498 case SSTOP: 499 case SSLEEP: 500 unsleep(p); /* e.g. when sending signals */ 501 break; 502 503 case SIDL: 504 break; 505 } 506 p->p_stat = SRUN; 507 if (p->p_flag & SLOAD) 508 setrq(p); 509 splx(s); 510 if (p->p_slptime > 1) 511 updatepri(p); 512 p->p_slptime = 0; 513 if ((p->p_flag&SLOAD) == 0) 514 wakeup((caddr_t)&proc0); 515 else if (p->p_pri < curpri) 516 need_resched(); 517 } 518 519 /* 520 * Compute priority of process when running in user mode. 521 * Arrange to reschedule if the resulting priority 522 * is better than that of the current process. 523 */ 524 setpri(p) 525 register struct proc *p; 526 { 527 register unsigned int newpri; 528 529 newpri = PUSER + p->p_cpu / 4 + 2 * p->p_nice; 530 newpri = min(newpri, MAXPRI); 531 p->p_usrpri = newpri; 532 if (newpri < curpri) 533 need_resched(); 534 } 535