xref: /original-bsd/sys/kern/kern_synch.c (revision a3968605)
1 /*
2  * Copyright (c) 1982, 1986, 1990 Regents of the University of California.
3  * All rights reserved.  The Berkeley software License Agreement
4  * specifies the terms and conditions for redistribution.
5  *
6  *	@(#)kern_synch.c	7.12 (Berkeley) 12/01/90
7  */
8 
9 #include "machine/pte.h"
10 #include "machine/psl.h"
11 #include "machine/mtpr.h"
12 
13 #include "param.h"
14 #include "systm.h"
15 #include "user.h"
16 #include "proc.h"
17 #include "vm.h"
18 #include "kernel.h"
19 #include "buf.h"
20 
21 /*
22  * Force switch among equal priority processes every 100ms.
23  */
24 roundrobin()
25 {
26 
27 	runrun++;
28 	aston();
29 	timeout(roundrobin, (caddr_t)0, hz / 10);
30 }
31 
32 /*
33  * constants for digital decay and forget
34  *	90% of (p_cpu) usage in 5*loadav time
35  *	95% of (p_pctcpu) usage in 60 seconds (load insensitive)
36  *          Note that, as ps(1) mentions, this can let percentages
37  *          total over 100% (I've seen 137.9% for 3 processes).
38  *
39  * Note that hardclock updates p_cpu and p_cpticks independently.
40  *
41  * We wish to decay away 90% of p_cpu in (5 * loadavg) seconds.
42  * That is, the system wants to compute a value of decay such
43  * that the following for loop:
44  * 	for (i = 0; i < (5 * loadavg); i++)
45  * 		p_cpu *= decay;
46  * will compute
47  * 	p_cpu *= 0.1;
48  * for all values of loadavg:
49  *
50  * Mathematically this loop can be expressed by saying:
51  * 	decay ** (5 * loadavg) ~= .1
52  *
53  * The system computes decay as:
54  * 	decay = (2 * loadavg) / (2 * loadavg + 1)
55  *
56  * We wish to prove that the system's computation of decay
57  * will always fulfill the equation:
58  * 	decay ** (5 * loadavg) ~= .1
59  *
60  * If we compute b as:
61  * 	b = 2 * loadavg
62  * then
63  * 	decay = b / (b + 1)
64  *
65  * We now need to prove two things:
66  *	1) Given factor ** (5 * loadavg) ~= .1, prove factor == b/(b+1)
67  *	2) Given b/(b+1) ** power ~= .1, prove power == (5 * loadavg)
68  *
69  * Facts:
70  *         For x close to zero, exp(x) =~ 1 + x, since
71  *              exp(x) = 0! + x**1/1! + x**2/2! + ... .
72  *              therefore exp(-1/b) =~ 1 - (1/b) = (b-1)/b.
73  *         For x close to zero, ln(1+x) =~ x, since
74  *              ln(1+x) = x - x**2/2 + x**3/3 - ...     -1 < x < 1
75  *              therefore ln(b/(b+1)) = ln(1 - 1/(b+1)) =~ -1/(b+1).
76  *         ln(.1) =~ -2.30
77  *
78  * Proof of (1):
79  *    Solve (factor)**(power) =~ .1 given power (5*loadav):
80  *	solving for factor,
81  *      ln(factor) =~ (-2.30/5*loadav), or
82  *      factor =~ exp(-1/((5/2.30)*loadav) =~ exp(-1/(2*loadav)) =
83  *          exp(-1/b) =~ (b-1)/b =~ b/(b+1).                    QED
84  *
85  * Proof of (2):
86  *    Solve (factor)**(power) =~ .1 given factor == (b/(b+1)):
87  *	solving for power,
88  *      power*ln(b/(b+1)) =~ -2.30, or
89  *      power =~ 2.3 * (b + 1) = 4.6*loadav + 2.3 =~ 5*loadav.  QED
90  *
91  * Actual power values for the implemented algorithm are as follows:
92  *      loadav: 1       2       3       4
93  *      power:  5.68    10.32   14.94   19.55
94  */
95 
96 /* calculations for digital decay to forget 90% of usage in 5*loadav sec */
97 #define	get_b(loadav)		(2 * (loadav))
98 #define	get_pcpu(b, cpu)	(((b) * ((cpu) & 0377)) / ((b) + FSCALE))
99 
100 /* decay 95% of `p_pctcpu' in 60 seconds; see CCPU_SHIFT before changing */
101 fixpt_t	ccpu = 0.95122942450071400909 * FSCALE;		/* exp(-1/20) */
102 
103 /*
104  * If `ccpu' is not equal to `exp(-1/20)' and you still want to use the
105  * faster/more-accurate formula, you'll have to estimate CCPU_SHIFT below
106  * and possibly adjust FSHIFT in "param.h" so that (FSHIFT >= CCPU_SHIFT).
107  *
108  * To estimate CCPU_SHIFT for exp(-1/20), the following formula was used:
109  *	1 - exp(-1/20) ~= 0.0487 ~= 0.0488 == 1 (fixed pt, *11* bits).
110  *
111  * If you dont want to bother with the faster/more-accurate formula, you
112  * can set CCPU_SHIFT to (FSHIFT + 1) which will use a slower/less-accurate
113  * (more general) method of calculating the %age of CPU used by a process.
114  */
115 #define	CCPU_SHIFT	11
116 
117 /*
118  * Recompute process priorities, once a second
119  */
120 schedcpu()
121 {
122 	register fixpt_t b = get_b(averunnable[0]);
123 	register struct proc *p;
124 	register int s, a;
125 
126 	wakeup((caddr_t)&lbolt);
127 	for (p = allproc; p != NULL; p = p->p_nxt) {
128 		if (p->p_time != 127)
129 			p->p_time++;
130 		if (p->p_stat==SSLEEP || p->p_stat==SSTOP)
131 			if (p->p_slptime != 127)
132 				p->p_slptime++;
133 		p->p_pctcpu = (p->p_pctcpu * ccpu) >> FSHIFT;
134 		/*
135 		 * If the process has slept the entire second,
136 		 * stop recalculating its priority until it wakes up.
137 		 */
138 		if (p->p_slptime > 1)
139 			continue;
140 		/*
141 		 * p_pctcpu is only for ps.
142 		 */
143 #if	(FSHIFT >= CCPU_SHIFT)
144 		p->p_pctcpu += (hz == 100)?
145 			((fixpt_t) p->p_cpticks) << (FSHIFT - CCPU_SHIFT):
146                 	100 * (((fixpt_t) p->p_cpticks)
147 				<< (FSHIFT - CCPU_SHIFT)) / hz;
148 #else
149 		p->p_pctcpu += ((FSCALE - ccpu) *
150 			(p->p_cpticks * FSCALE / hz)) >> FSHIFT;
151 #endif
152 		p->p_cpticks = 0;
153 		a = (int) get_pcpu(b, p->p_cpu) + p->p_nice;
154 		if (a < 0)
155 			a = 0;
156 		if (a > 255)
157 			a = 255;
158 		p->p_cpu = a;
159 		(void) setpri(p);
160 		s = splhigh();	/* prevent state changes */
161 		if (p->p_pri >= PUSER) {
162 #define	PPQ	(128 / NQS)
163 			if ((p != u.u_procp || noproc) &&
164 			    p->p_stat == SRUN &&
165 			    (p->p_flag & SLOAD) &&
166 			    (p->p_pri / PPQ) != (p->p_usrpri / PPQ)) {
167 				remrq(p);
168 				p->p_pri = p->p_usrpri;
169 				setrq(p);
170 			} else
171 				p->p_pri = p->p_usrpri;
172 		}
173 		splx(s);
174 	}
175 	vmmeter();
176 	if (runin!=0) {
177 		runin = 0;
178 		wakeup((caddr_t)&runin);
179 	}
180 	if (bclnlist != NULL)
181 		wakeup((caddr_t)&proc[2]);
182 	timeout(schedcpu, (caddr_t)0, hz);
183 }
184 
185 /*
186  * Recalculate the priority of a process after it has slept for a while.
187  */
188 updatepri(p)
189 	register struct proc *p;
190 {
191 	register int a = p->p_cpu & 0377;
192 	register fixpt_t b = get_b(averunnable[0]);
193 
194 	p->p_slptime--;		/* the first time was done in schedcpu */
195 	while (a && --p->p_slptime)
196 		a = (int) get_pcpu(b, a) /* + p->p_nice */;
197 	p->p_slptime = 0;
198 	if (a < 0)
199 		a = 0;
200 	if (a > 255)
201 		a = 255;
202 	p->p_cpu = a;
203 	(void) setpri(p);
204 }
205 
206 #define SQSIZE 0100	/* Must be power of 2 */
207 #define HASH(x)	(( (int) x >> 5) & (SQSIZE-1))
208 struct slpque {
209 	struct proc *sq_head;
210 	struct proc **sq_tailp;
211 } slpque[SQSIZE];
212 
213 /*
214  * During autoconfiguration or after a panic, a sleep will simply
215  * lower the priority briefly to allow interrupts, then return.
216  * The priority to be used (safepri) is machine-dependent, thus this
217  * value is initialized and maintained in the machine-dependent layers.
218  * This priority will typically be 0, or the lowest priority
219  * that is safe for use on the interrupt stack; it can be made
220  * higher to block network software interrupts after panics.
221  */
222 int safepri;
223 
224 /*
225  * General sleep call.
226  * Suspends current process until a wakeup is made on chan.
227  * The process will then be made runnable with priority pri.
228  * Sleeps at most timo/hz seconds (0 means no timeout).
229  * If pri includes PCATCH flag, signals are checked
230  * before and after sleeping, else signals are not checked.
231  * Returns 0 if awakened, EWOULDBLOCK if the timeout expires.
232  * If PCATCH is set and a signal needs to be delivered,
233  * ERESTART is returned if the current system call should be restarted
234  * if possible, and EINTR is returned if the system call should
235  * be interrupted by the signal (return EINTR).
236  */
237 tsleep(chan, pri, wmesg, timo)
238 	caddr_t chan;
239 	int pri;
240 	char *wmesg;
241 	int timo;
242 {
243 	register struct proc *rp;
244 	register struct slpque *qp;
245 	register s;
246 	int sig, catch = pri & PCATCH;
247 	extern int cold;
248 	int endtsleep();
249 
250 	rp = u.u_procp;
251 	s = splhigh();
252 	if (cold || panicstr) {
253 		/*
254 		 * After a panic, or during autoconfiguration,
255 		 * just give interrupts a chance, then just return;
256 		 * don't run any other procs or panic below,
257 		 * in case this is the idle process and already asleep.
258 		 */
259 		splx(safepri);
260 		splx(s);
261 		return (0);
262 	}
263 #ifdef DIAGNOSTIC
264 	if (chan == 0 || rp->p_stat != SRUN || rp->p_rlink)
265 		panic("tsleep");
266 #endif
267 	rp->p_wchan = chan;
268 	rp->p_wmesg = wmesg;
269 	rp->p_slptime = 0;
270 	rp->p_pri = pri & PRIMASK;
271 	qp = &slpque[HASH(chan)];
272 	if (qp->sq_head == 0)
273 		qp->sq_head = rp;
274 	else
275 		*qp->sq_tailp = rp;
276 	*(qp->sq_tailp = &rp->p_link) = 0;
277 	if (timo)
278 		timeout(endtsleep, (caddr_t)rp, timo);
279 	/*
280 	 * If we stop in CURSIG/issig(), a wakeup or a SIGCONT
281 	 * (or both) could occur while we were stopped.
282 	 * A SIGCONT would cause us to be marked as SSLEEP
283 	 * without resuming us, thus we must be ready for sleep
284 	 * when CURSIG is called.  If the wakeup happens while we're
285 	 * stopped, rp->p_wchan will be 0 upon return from CURSIG.
286 	 */
287 	if (catch) {
288 		rp->p_flag |= SSINTR;
289 		if (sig = CURSIG(rp)) {
290 			if (rp->p_wchan)
291 				unsleep(rp);
292 			rp->p_stat = SRUN;
293 			goto resume;
294 		}
295 		if (rp->p_wchan == 0) {
296 			catch = 0;
297 			goto resume;
298 		}
299 	}
300 	rp->p_stat = SSLEEP;
301 	(void) spl0();
302 	u.u_ru.ru_nvcsw++;
303 	swtch();
304 resume:
305 	curpri = rp->p_usrpri;
306 	splx(s);
307 	rp->p_flag &= ~SSINTR;
308 	if (rp->p_flag & STIMO) {
309 		rp->p_flag &= ~STIMO;
310 		if (catch == 0 || sig == 0)
311 			return (EWOULDBLOCK);
312 	} else if (timo)
313 		untimeout(endtsleep, (caddr_t)rp);
314 	if (catch && (sig != 0 || (sig = CURSIG(rp)))) {
315 		if (u.u_sigintr & sigmask(sig))
316 			return (EINTR);
317 		return (ERESTART);
318 	}
319 	return (0);
320 }
321 
322 /*
323  * Implement timeout for tsleep.
324  * If process hasn't been awakened (wchan non-zero),
325  * set timeout flag and undo the sleep.  If proc
326  * is stopped, just unsleep so it will remain stopped.
327  */
328 endtsleep(p)
329 	register struct proc *p;
330 {
331 	int s = splhigh();
332 
333 	if (p->p_wchan) {
334 		if (p->p_stat == SSLEEP)
335 			setrun(p);
336 		else
337 			unsleep(p);
338 		p->p_flag |= STIMO;
339 	}
340 	splx(s);
341 }
342 
343 /*
344  * Short-term, non-interruptable sleep.
345  */
346 sleep(chan, pri)
347 	caddr_t chan;
348 	int pri;
349 {
350 	register struct proc *rp;
351 	register struct slpque *qp;
352 	register s;
353 	extern int cold;
354 
355 #ifdef DIAGNOSTIC
356 	if (pri > PZERO) {
357 		printf("sleep called with pri %d > PZERO, wchan: %x\n",
358 			pri, chan);
359 		panic("old sleep");
360 	}
361 #endif
362 	rp = u.u_procp;
363 	s = splhigh();
364 	if (cold || panicstr) {
365 		/*
366 		 * After a panic, or during autoconfiguration,
367 		 * just give interrupts a chance, then just return;
368 		 * don't run any other procs or panic below,
369 		 * in case this is the idle process and already asleep.
370 		 */
371 		splx(safepri);
372 		splx(s);
373 		return;
374 	}
375 #ifdef DIAGNOSTIC
376 	if (chan==0 || rp->p_stat != SRUN || rp->p_rlink)
377 		panic("sleep");
378 #endif
379 	rp->p_wchan = chan;
380 	rp->p_wmesg = NULL;
381 	rp->p_slptime = 0;
382 	rp->p_pri = pri;
383 	qp = &slpque[HASH(chan)];
384 	if (qp->sq_head == 0)
385 		qp->sq_head = rp;
386 	else
387 		*qp->sq_tailp = rp;
388 	*(qp->sq_tailp = &rp->p_link) = 0;
389 	rp->p_stat = SSLEEP;
390 	(void) spl0();
391 	u.u_ru.ru_nvcsw++;
392 	swtch();
393 	curpri = rp->p_usrpri;
394 	splx(s);
395 }
396 
397 /*
398  * Remove a process from its wait queue
399  */
400 unsleep(p)
401 	register struct proc *p;
402 {
403 	register struct slpque *qp;
404 	register struct proc **hp;
405 	int s;
406 
407 	s = splhigh();
408 	if (p->p_wchan) {
409 		hp = &(qp = &slpque[HASH(p->p_wchan)])->sq_head;
410 		while (*hp != p)
411 			hp = &(*hp)->p_link;
412 		*hp = p->p_link;
413 		if (qp->sq_tailp == &p->p_link)
414 			qp->sq_tailp = hp;
415 		p->p_wchan = 0;
416 	}
417 	splx(s);
418 }
419 
420 /*
421  * Wake up all processes sleeping on chan.
422  */
423 wakeup(chan)
424 	register caddr_t chan;
425 {
426 	register struct slpque *qp;
427 	register struct proc *p, **q;
428 	int s;
429 
430 	s = splhigh();
431 	qp = &slpque[HASH(chan)];
432 restart:
433 	for (q = &qp->sq_head; p = *q; ) {
434 #ifdef DIAGNOSTIC
435 		if (p->p_rlink || p->p_stat != SSLEEP && p->p_stat != SSTOP)
436 			panic("wakeup");
437 #endif
438 		if (p->p_wchan==chan) {
439 			p->p_wchan = 0;
440 			*q = p->p_link;
441 			if (qp->sq_tailp == &p->p_link)
442 				qp->sq_tailp = q;
443 			if (p->p_stat == SSLEEP) {
444 				/* OPTIMIZED INLINE EXPANSION OF setrun(p) */
445 				if (p->p_slptime > 1)
446 					updatepri(p);
447 				p->p_stat = SRUN;
448 				if (p->p_flag & SLOAD)
449 					setrq(p);
450 				/*
451 				 * Since curpri is a usrpri,
452 				 * p->p_pri is always better than curpri.
453 				 */
454 				runrun++;
455 				aston();
456 				if ((p->p_flag&SLOAD) == 0) {
457 					if (runout != 0) {
458 						runout = 0;
459 						wakeup((caddr_t)&runout);
460 					}
461 					wantin++;
462 				}
463 				/* END INLINE EXPANSION */
464 				goto restart;
465 			}
466 		} else
467 			q = &p->p_link;
468 	}
469 	splx(s);
470 }
471 
472 /*
473  * Initialize the (doubly-linked) run queues
474  * to be empty.
475  */
476 rqinit()
477 {
478 	register int i;
479 
480 	for (i = 0; i < NQS; i++)
481 		qs[i].ph_link = qs[i].ph_rlink = (struct proc *)&qs[i];
482 }
483 
484 /*
485  * Set the process running;
486  * arrange for it to be swapped in if necessary.
487  */
488 setrun(p)
489 	register struct proc *p;
490 {
491 	register int s;
492 
493 	s = splhigh();
494 	switch (p->p_stat) {
495 
496 	case 0:
497 	case SWAIT:
498 	case SRUN:
499 	case SZOMB:
500 	default:
501 		panic("setrun");
502 
503 	case SSTOP:
504 	case SSLEEP:
505 		unsleep(p);		/* e.g. when sending signals */
506 		break;
507 
508 	case SIDL:
509 		break;
510 	}
511 	p->p_stat = SRUN;
512 	if (p->p_flag & SLOAD)
513 		setrq(p);
514 	splx(s);
515 	if (p->p_slptime > 1)
516 		updatepri(p);
517 	if (p->p_pri < curpri) {
518 		runrun++;
519 		aston();
520 	}
521 	if ((p->p_flag&SLOAD) == 0) {
522 		if (runout != 0) {
523 			runout = 0;
524 			wakeup((caddr_t)&runout);
525 		}
526 		wantin++;
527 	}
528 }
529 
530 /*
531  * Set user priority.
532  * The rescheduling flag (runrun)
533  * is set if the priority is better
534  * than the currently running process.
535  */
536 setpri(pp)
537 	register struct proc *pp;
538 {
539 	register int p;
540 
541 	p = (pp->p_cpu & 0377)/4;
542 	p += PUSER + 2 * pp->p_nice;
543 	if (pp->p_rssize > pp->p_maxrss && freemem < desfree)
544 		p += 2*4;	/* effectively, nice(4) */
545 	if (p > 127)
546 		p = 127;
547 	if (p < curpri) {
548 		runrun++;
549 		aston();
550 	}
551 	pp->p_usrpri = p;
552 	return (p);
553 }
554