xref: /original-bsd/sys/kern/kern_synch.c (revision 753853ba)
1 /*-
2  * Copyright (c) 1982, 1986, 1990 The Regents of the University of California.
3  * Copyright (c) 1991 The Regents of the University of California.
4  * All rights reserved.
5  *
6  * %sccs.include.redist.c%
7  *
8  *	@(#)kern_synch.c	7.23 (Berkeley) 02/28/92
9  */
10 
11 #include "param.h"
12 #include "systm.h"
13 #include "proc.h"
14 #include "kernel.h"
15 #include "buf.h"
16 #include "signalvar.h"
17 #include "resourcevar.h"
18 #ifdef KTRACE
19 #include "ktrace.h"
20 #endif
21 
22 #include "machine/cpu.h"
23 
24 u_char	curpri;			/* usrpri of curproc */
25 int	lbolt;			/* once a second sleep address */
26 
27 /*
28  * Force switch among equal priority processes every 100ms.
29  */
30 roundrobin()
31 {
32 
33 	need_resched();
34 	timeout(roundrobin, (caddr_t)0, hz / 10);
35 }
36 
37 /*
38  * constants for digital decay and forget
39  *	90% of (p_cpu) usage in 5*loadav time
40  *	95% of (p_pctcpu) usage in 60 seconds (load insensitive)
41  *          Note that, as ps(1) mentions, this can let percentages
42  *          total over 100% (I've seen 137.9% for 3 processes).
43  *
44  * Note that hardclock updates p_cpu and p_cpticks independently.
45  *
46  * We wish to decay away 90% of p_cpu in (5 * loadavg) seconds.
47  * That is, the system wants to compute a value of decay such
48  * that the following for loop:
49  * 	for (i = 0; i < (5 * loadavg); i++)
50  * 		p_cpu *= decay;
51  * will compute
52  * 	p_cpu *= 0.1;
53  * for all values of loadavg:
54  *
55  * Mathematically this loop can be expressed by saying:
56  * 	decay ** (5 * loadavg) ~= .1
57  *
58  * The system computes decay as:
59  * 	decay = (2 * loadavg) / (2 * loadavg + 1)
60  *
61  * We wish to prove that the system's computation of decay
62  * will always fulfill the equation:
63  * 	decay ** (5 * loadavg) ~= .1
64  *
65  * If we compute b as:
66  * 	b = 2 * loadavg
67  * then
68  * 	decay = b / (b + 1)
69  *
70  * We now need to prove two things:
71  *	1) Given factor ** (5 * loadavg) ~= .1, prove factor == b/(b+1)
72  *	2) Given b/(b+1) ** power ~= .1, prove power == (5 * loadavg)
73  *
74  * Facts:
75  *         For x close to zero, exp(x) =~ 1 + x, since
76  *              exp(x) = 0! + x**1/1! + x**2/2! + ... .
77  *              therefore exp(-1/b) =~ 1 - (1/b) = (b-1)/b.
78  *         For x close to zero, ln(1+x) =~ x, since
79  *              ln(1+x) = x - x**2/2 + x**3/3 - ...     -1 < x < 1
80  *              therefore ln(b/(b+1)) = ln(1 - 1/(b+1)) =~ -1/(b+1).
81  *         ln(.1) =~ -2.30
82  *
83  * Proof of (1):
84  *    Solve (factor)**(power) =~ .1 given power (5*loadav):
85  *	solving for factor,
86  *      ln(factor) =~ (-2.30/5*loadav), or
87  *      factor =~ exp(-1/((5/2.30)*loadav)) =~ exp(-1/(2*loadav)) =
88  *          exp(-1/b) =~ (b-1)/b =~ b/(b+1).                    QED
89  *
90  * Proof of (2):
91  *    Solve (factor)**(power) =~ .1 given factor == (b/(b+1)):
92  *	solving for power,
93  *      power*ln(b/(b+1)) =~ -2.30, or
94  *      power =~ 2.3 * (b + 1) = 4.6*loadav + 2.3 =~ 5*loadav.  QED
95  *
96  * Actual power values for the implemented algorithm are as follows:
97  *      loadav: 1       2       3       4
98  *      power:  5.68    10.32   14.94   19.55
99  */
100 
101 /* calculations for digital decay to forget 90% of usage in 5*loadav sec */
102 #define	loadfactor(loadav)	(2 * (loadav))
103 #define	decay_cpu(loadfac, cpu)	(((loadfac) * (cpu)) / ((loadfac) + FSCALE))
104 
105 /* decay 95% of `p_pctcpu' in 60 seconds; see CCPU_SHIFT before changing */
106 fixpt_t	ccpu = 0.95122942450071400909 * FSCALE;		/* exp(-1/20) */
107 
108 /*
109  * If `ccpu' is not equal to `exp(-1/20)' and you still want to use the
110  * faster/more-accurate formula, you'll have to estimate CCPU_SHIFT below
111  * and possibly adjust FSHIFT in "param.h" so that (FSHIFT >= CCPU_SHIFT).
112  *
113  * To estimate CCPU_SHIFT for exp(-1/20), the following formula was used:
114  *	1 - exp(-1/20) ~= 0.0487 ~= 0.0488 == 1 (fixed pt, *11* bits).
115  *
116  * If you dont want to bother with the faster/more-accurate formula, you
117  * can set CCPU_SHIFT to (FSHIFT + 1) which will use a slower/less-accurate
118  * (more general) method of calculating the %age of CPU used by a process.
119  */
120 #define	CCPU_SHIFT	11
121 
122 /*
123  * Recompute process priorities, once a second
124  */
125 schedcpu()
126 {
127 	register fixpt_t loadfac = loadfactor(averunnable.ldavg[0]);
128 	register struct proc *p;
129 	register int s;
130 	register unsigned int newcpu;
131 
132 	wakeup((caddr_t)&lbolt);
133 	for (p = allproc; p != NULL; p = p->p_nxt) {
134 		/*
135 		 * Increment time in/out of memory and sleep time
136 		 * (if sleeping).  We ignore overflow; with 16-bit int's
137 		 * (remember them?) overflow takes 45 days.
138 		 */
139 		p->p_time++;
140 		if (p->p_stat == SSLEEP || p->p_stat == SSTOP)
141 			p->p_slptime++;
142 		p->p_pctcpu = (p->p_pctcpu * ccpu) >> FSHIFT;
143 		/*
144 		 * If the process has slept the entire second,
145 		 * stop recalculating its priority until it wakes up.
146 		 */
147 		if (p->p_slptime > 1)
148 			continue;
149 		/*
150 		 * p_pctcpu is only for ps.
151 		 */
152 #if	(FSHIFT >= CCPU_SHIFT)
153 		p->p_pctcpu += (hz == 100)?
154 			((fixpt_t) p->p_cpticks) << (FSHIFT - CCPU_SHIFT):
155                 	100 * (((fixpt_t) p->p_cpticks)
156 				<< (FSHIFT - CCPU_SHIFT)) / hz;
157 #else
158 		p->p_pctcpu += ((FSCALE - ccpu) *
159 			(p->p_cpticks * FSCALE / hz)) >> FSHIFT;
160 #endif
161 		p->p_cpticks = 0;
162 		newcpu = (u_int) decay_cpu(loadfac, p->p_cpu) + p->p_nice;
163 		p->p_cpu = min(newcpu, UCHAR_MAX);
164 		setpri(p);
165 		s = splhigh();	/* prevent state changes */
166 		if (p->p_pri >= PUSER) {
167 #define	PPQ	(128 / NQS)		/* priorities per queue */
168 			if ((p != curproc) &&
169 			    p->p_stat == SRUN &&
170 			    (p->p_flag & SLOAD) &&
171 			    (p->p_pri / PPQ) != (p->p_usrpri / PPQ)) {
172 				remrq(p);
173 				p->p_pri = p->p_usrpri;
174 				setrq(p);
175 			} else
176 				p->p_pri = p->p_usrpri;
177 		}
178 		splx(s);
179 	}
180 	vmmeter();
181 	if (bclnlist != NULL)
182 		wakeup((caddr_t)pageproc);
183 	timeout(schedcpu, (caddr_t)0, hz);
184 }
185 
186 /*
187  * Recalculate the priority of a process after it has slept for a while.
188  * For all load averages >= 1 and max p_cpu of 255, sleeping for at least
189  * six times the loadfactor will decay p_cpu to zero.
190  */
191 updatepri(p)
192 	register struct proc *p;
193 {
194 	register unsigned int newcpu = p->p_cpu;
195 	register fixpt_t loadfac = loadfactor(averunnable.ldavg[0]);
196 
197 	if (p->p_slptime > 5 * loadfac)
198 		p->p_cpu = 0;
199 	else {
200 		p->p_slptime--;	/* the first time was done in schedcpu */
201 		while (newcpu && --p->p_slptime)
202 			newcpu = (int) decay_cpu(loadfac, newcpu);
203 		p->p_cpu = min(newcpu, UCHAR_MAX);
204 	}
205 	setpri(p);
206 }
207 
208 #define SQSIZE 0100	/* Must be power of 2 */
209 #define HASH(x)	(( (int) x >> 5) & (SQSIZE-1))
210 struct slpque {
211 	struct proc *sq_head;
212 	struct proc **sq_tailp;
213 } slpque[SQSIZE];
214 
215 /*
216  * During autoconfiguration or after a panic, a sleep will simply
217  * lower the priority briefly to allow interrupts, then return.
218  * The priority to be used (safepri) is machine-dependent, thus this
219  * value is initialized and maintained in the machine-dependent layers.
220  * This priority will typically be 0, or the lowest priority
221  * that is safe for use on the interrupt stack; it can be made
222  * higher to block network software interrupts after panics.
223  */
224 int safepri;
225 
226 /*
227  * General sleep call.
228  * Suspends current process until a wakeup is made on chan.
229  * The process will then be made runnable with priority pri.
230  * Sleeps at most timo/hz seconds (0 means no timeout).
231  * If pri includes PCATCH flag, signals are checked
232  * before and after sleeping, else signals are not checked.
233  * Returns 0 if awakened, EWOULDBLOCK if the timeout expires.
234  * If PCATCH is set and a signal needs to be delivered,
235  * ERESTART is returned if the current system call should be restarted
236  * if possible, and EINTR is returned if the system call should
237  * be interrupted by the signal (return EINTR).
238  */
239 tsleep(chan, pri, wmesg, timo)
240 	void *chan;
241 	int pri;
242 	char *wmesg;
243 	int timo;
244 {
245 	register struct proc *p = curproc;
246 	register struct slpque *qp;
247 	register s;
248 	int sig, catch = pri & PCATCH;
249 	extern int cold;
250 	int endtsleep();
251 
252 #ifdef KTRACE
253 	if (KTRPOINT(p, KTR_CSW))
254 		ktrcsw(p->p_tracep, 1, 0);
255 #endif
256 	s = splhigh();
257 	if (cold || panicstr) {
258 		/*
259 		 * After a panic, or during autoconfiguration,
260 		 * just give interrupts a chance, then just return;
261 		 * don't run any other procs or panic below,
262 		 * in case this is the idle process and already asleep.
263 		 */
264 		splx(safepri);
265 		splx(s);
266 		return (0);
267 	}
268 #ifdef DIAGNOSTIC
269 	if (chan == NULL || p->p_stat != SRUN || p->p_rlink)
270 		panic("tsleep");
271 #endif
272 	p->p_wchan = chan;
273 	p->p_wmesg = wmesg;
274 	p->p_slptime = 0;
275 	p->p_pri = pri & PRIMASK;
276 	qp = &slpque[HASH(chan)];
277 	if (qp->sq_head == 0)
278 		qp->sq_head = p;
279 	else
280 		*qp->sq_tailp = p;
281 	*(qp->sq_tailp = &p->p_link) = 0;
282 	if (timo)
283 		timeout(endtsleep, (caddr_t)p, timo);
284 	/*
285 	 * We put ourselves on the sleep queue and start our timeout
286 	 * before calling CURSIG, as we could stop there, and a wakeup
287 	 * or a SIGCONT (or both) could occur while we were stopped.
288 	 * A SIGCONT would cause us to be marked as SSLEEP
289 	 * without resuming us, thus we must be ready for sleep
290 	 * when CURSIG is called.  If the wakeup happens while we're
291 	 * stopped, p->p_wchan will be 0 upon return from CURSIG.
292 	 */
293 	if (catch) {
294 		p->p_flag |= SSINTR;
295 		if (sig = CURSIG(p)) {
296 			if (p->p_wchan)
297 				unsleep(p);
298 			p->p_stat = SRUN;
299 			goto resume;
300 		}
301 		if (p->p_wchan == 0) {
302 			catch = 0;
303 			goto resume;
304 		}
305 	} else
306 		sig = 0;
307 	p->p_stat = SSLEEP;
308 	p->p_stats->p_ru.ru_nvcsw++;
309 	swtch();
310 resume:
311 	curpri = p->p_usrpri;
312 	splx(s);
313 	p->p_flag &= ~SSINTR;
314 	if (p->p_flag & STIMO) {
315 		p->p_flag &= ~STIMO;
316 		if (sig == 0) {
317 #ifdef KTRACE
318 			if (KTRPOINT(p, KTR_CSW))
319 				ktrcsw(p->p_tracep, 0, 0);
320 #endif
321 			return (EWOULDBLOCK);
322 		}
323 	} else if (timo)
324 		untimeout(endtsleep, (caddr_t)p);
325 	if (catch && (sig != 0 || (sig = CURSIG(p)))) {
326 #ifdef KTRACE
327 		if (KTRPOINT(p, KTR_CSW))
328 			ktrcsw(p->p_tracep, 0, 0);
329 #endif
330 		if (p->p_sigacts->ps_sigintr & sigmask(sig))
331 			return (EINTR);
332 		return (ERESTART);
333 	}
334 #ifdef KTRACE
335 	if (KTRPOINT(p, KTR_CSW))
336 		ktrcsw(p->p_tracep, 0, 0);
337 #endif
338 	return (0);
339 }
340 
341 /*
342  * Implement timeout for tsleep.
343  * If process hasn't been awakened (wchan non-zero),
344  * set timeout flag and undo the sleep.  If proc
345  * is stopped, just unsleep so it will remain stopped.
346  */
347 endtsleep(p)
348 	register struct proc *p;
349 {
350 	int s = splhigh();
351 
352 	if (p->p_wchan) {
353 		if (p->p_stat == SSLEEP)
354 			setrun(p);
355 		else
356 			unsleep(p);
357 		p->p_flag |= STIMO;
358 	}
359 	splx(s);
360 }
361 
362 /*
363  * Short-term, non-interruptable sleep.
364  */
365 sleep(chan, pri)
366 	void *chan;
367 	int pri;
368 {
369 	register struct proc *p = curproc;
370 	register struct slpque *qp;
371 	register s;
372 	extern int cold;
373 
374 #ifdef DIAGNOSTIC
375 	if (pri > PZERO) {
376 		printf("sleep called with pri %d > PZERO, wchan: %x\n",
377 		    pri, chan);
378 		panic("old sleep");
379 	}
380 #endif
381 	s = splhigh();
382 	if (cold || panicstr) {
383 		/*
384 		 * After a panic, or during autoconfiguration,
385 		 * just give interrupts a chance, then just return;
386 		 * don't run any other procs or panic below,
387 		 * in case this is the idle process and already asleep.
388 		 */
389 		splx(safepri);
390 		splx(s);
391 		return;
392 	}
393 #ifdef DIAGNOSTIC
394 	if (chan == NULL || p->p_stat != SRUN || p->p_rlink)
395 		panic("sleep");
396 #endif
397 	p->p_wchan = chan;
398 	p->p_wmesg = NULL;
399 	p->p_slptime = 0;
400 	p->p_pri = pri;
401 	qp = &slpque[HASH(chan)];
402 	if (qp->sq_head == 0)
403 		qp->sq_head = p;
404 	else
405 		*qp->sq_tailp = p;
406 	*(qp->sq_tailp = &p->p_link) = 0;
407 	p->p_stat = SSLEEP;
408 	p->p_stats->p_ru.ru_nvcsw++;
409 #ifdef KTRACE
410 	if (KTRPOINT(p, KTR_CSW))
411 		ktrcsw(p->p_tracep, 1, 0);
412 #endif
413 	swtch();
414 #ifdef KTRACE
415 	if (KTRPOINT(p, KTR_CSW))
416 		ktrcsw(p->p_tracep, 0, 0);
417 #endif
418 	curpri = p->p_usrpri;
419 	splx(s);
420 }
421 
422 /*
423  * Remove a process from its wait queue
424  */
425 unsleep(p)
426 	register struct proc *p;
427 {
428 	register struct slpque *qp;
429 	register struct proc **hp;
430 	int s;
431 
432 	s = splhigh();
433 	if (p->p_wchan) {
434 		hp = &(qp = &slpque[HASH(p->p_wchan)])->sq_head;
435 		while (*hp != p)
436 			hp = &(*hp)->p_link;
437 		*hp = p->p_link;
438 		if (qp->sq_tailp == &p->p_link)
439 			qp->sq_tailp = hp;
440 		p->p_wchan = 0;
441 	}
442 	splx(s);
443 }
444 
445 /*
446  * Wakeup on "chan"; set all processes
447  * sleeping on chan to run state.
448  */
449 wakeup(chan)
450 	register void *chan;
451 {
452 	register struct slpque *qp;
453 	register struct proc *p, **q;
454 	int s;
455 
456 	s = splhigh();
457 	qp = &slpque[HASH(chan)];
458 restart:
459 	for (q = &qp->sq_head; p = *q; ) {
460 #ifdef DIAGNOSTIC
461 		if (p->p_rlink || p->p_stat != SSLEEP && p->p_stat != SSTOP)
462 			panic("wakeup");
463 #endif
464 		if (p->p_wchan == chan) {
465 			p->p_wchan = 0;
466 			*q = p->p_link;
467 			if (qp->sq_tailp == &p->p_link)
468 				qp->sq_tailp = q;
469 			if (p->p_stat == SSLEEP) {
470 				/* OPTIMIZED INLINE EXPANSION OF setrun(p) */
471 				if (p->p_slptime > 1)
472 					updatepri(p);
473 				p->p_slptime = 0;
474 				p->p_stat = SRUN;
475 				if (p->p_flag & SLOAD)
476 					setrq(p);
477 				/*
478 				 * Since curpri is a usrpri,
479 				 * p->p_pri is always better than curpri.
480 				 */
481 				if ((p->p_flag&SLOAD) == 0)
482 					wakeup((caddr_t)&proc0);
483 				else
484 					need_resched();
485 				/* END INLINE EXPANSION */
486 				goto restart;
487 			}
488 		} else
489 			q = &p->p_link;
490 	}
491 	splx(s);
492 }
493 
494 /*
495  * Initialize the (doubly-linked) run queues
496  * to be empty.
497  */
498 rqinit()
499 {
500 	register int i;
501 
502 	for (i = 0; i < NQS; i++)
503 		qs[i].ph_link = qs[i].ph_rlink = (struct proc *)&qs[i];
504 }
505 
506 /*
507  * Change process state to be runnable,
508  * placing it on the run queue if it is in memory,
509  * and awakening the swapper if it isn't in memory.
510  */
511 setrun(p)
512 	register struct proc *p;
513 {
514 	register int s;
515 
516 	s = splhigh();
517 	switch (p->p_stat) {
518 
519 	case 0:
520 	case SWAIT:
521 	case SRUN:
522 	case SZOMB:
523 	default:
524 		panic("setrun");
525 
526 	case SSTOP:
527 	case SSLEEP:
528 		unsleep(p);		/* e.g. when sending signals */
529 		break;
530 
531 	case SIDL:
532 		break;
533 	}
534 	p->p_stat = SRUN;
535 	if (p->p_flag & SLOAD)
536 		setrq(p);
537 	splx(s);
538 	if (p->p_slptime > 1)
539 		updatepri(p);
540 	p->p_slptime = 0;
541 	if ((p->p_flag&SLOAD) == 0)
542 		wakeup((caddr_t)&proc0);
543 	else if (p->p_pri < curpri)
544 		need_resched();
545 }
546 
547 /*
548  * Compute priority of process when running in user mode.
549  * Arrange to reschedule if the resulting priority
550  * is better than that of the current process.
551  */
552 setpri(p)
553 	register struct proc *p;
554 {
555 	register unsigned int newpri;
556 
557 	newpri = PUSER + p->p_cpu / 4 + 2 * p->p_nice;
558 	newpri = min(newpri, MAXPRI);
559 	p->p_usrpri = newpri;
560 	if (newpri < curpri)
561 		need_resched();
562 }
563