xref: /original-bsd/sys/vm/vm_meter.c (revision e61f0abc)
1 /*
2  * Copyright (c) 1982, 1986, 1989 Regents of the University of California.
3  * All rights reserved.
4  *
5  * %sccs.include.redist.c%
6  *
7  *	@(#)vm_meter.c	7.9 (Berkeley) 06/28/90
8  */
9 
10 #include "param.h"
11 #include "systm.h"
12 #include "user.h"
13 #include "proc.h"
14 #include "text.h"
15 #include "vm.h"
16 #include "kernel.h"
17 
18 int	maxslp = MAXSLP;
19 int	saferss = SAFERSS;
20 
21 /*
22  * The following parameters control operation of the page replacement
23  * algorithm.  They are initialized to 0, and then computed at boot time
24  * based on the size of the system.  If they are patched non-zero in
25  * a loaded vmunix they are left alone and may thus be changed per system
26  * using adb on the loaded system.
27  */
28 int	maxpgio = 0;
29 int	minfree = 0;
30 int	desfree = 0;
31 int	lotsfree = 0;
32 int	slowscan = 0;
33 int	fastscan = 0;
34 int	klin = KLIN;
35 int	klseql = KLSEQL;
36 int	klsdist = KLSDIST;
37 int	kltxt = KLTXT;
38 int	klout = KLOUT;
39 int	multprog = -1;		/* so we don't count process 2 */
40 
41 fixpt_t	averunnable[3];		/* load average, of runnable procs */
42 
43 /*
44  * The main loop of the scheduling (swapping) process.
45  *
46  * The basic idea is:
47  *	see if anyone wants to be swapped in;
48  *	swap out processes until there is room;
49  *	swap him in;
50  *	repeat.
51  * If the paging rate is too high, or the average free memory
52  * is very low, then we do not consider swapping anyone in,
53  * but rather look for someone to swap out.
54  *
55  * The runout flag is set whenever someone is swapped out.
56  * Sched sleeps on it awaiting work.
57  *
58  * Sched sleeps on runin whenever it cannot find enough
59  * core (by swapping out or otherwise) to fit the
60  * selected swapped process.  It is awakened when the
61  * core situation changes and in any case once per second.
62  *
63  * sched DOESN'T ACCOUNT FOR PAGE TABLE SIZE IN CALCULATIONS.
64  */
65 
66 #define	swappable(p) \
67 	(((p)->p_flag&(SSYS|SLOCK|SULOCK|SLOAD|SPAGE|SKEEP|SWEXIT|SPHYSIO))==SLOAD)
68 
69 /* insure non-zero */
70 #define	nz(x)	(x != 0 ? x : 1)
71 
72 #define	NBIG	4
73 #define	MAXNBIG	10
74 int	nbig = NBIG;
75 
76 struct bigp {
77 	struct	proc *bp_proc;
78 	int	bp_pri;
79 	struct	bigp *bp_link;
80 } bigp[MAXNBIG], bplist;
81 
82 sched()
83 {
84 	register struct proc *rp, *p, *inp;
85 	int outpri, inpri, rppri;
86 	int sleeper, desperate, deservin, needs, divisor;
87 	register struct bigp *bp, *nbp;
88 	int biggot, gives;
89 
90 loop:
91 	wantin = 0;
92 	deservin = 0;
93 	sleeper = 0;
94 	p = 0;
95 	/*
96 	 * See if paging system is overloaded; if so swap someone out.
97 	 * Conditions for hard outswap are:
98 	 *	if need kernel map (mix it up).
99 	 * or
100 	 *	1. if there are at least 2 runnable processes (on the average)
101 	 * and	2. the paging rate is excessive or memory is now VERY low.
102 	 * and	3. the short (5-second) and longer (30-second) average
103 	 *	   memory is less than desirable.
104 	 */
105 	if (kmapwnt ||
106 	    (averunnable[0] >= 2 * FSCALE &&
107 	     imax(avefree, avefree30) < desfree &&
108 	     (rate.v_pgin + rate.v_pgout > maxpgio || avefree < minfree))) {
109 		desperate = 1;
110 		goto hardswap;
111 	}
112 	desperate = 0;
113 	/*
114 	 * Not desperate for core,
115 	 * look for someone who deserves to be brought in.
116 	 */
117 	outpri = -20000;
118 	for (rp = allproc; rp != NULL; rp = rp->p_nxt) switch(rp->p_stat) {
119 
120 	case SRUN:
121 		if ((rp->p_flag&SLOAD) == 0) {
122 			rppri = rp->p_time -
123 			    rp->p_swrss / nz((maxpgio/2) * (klin * CLSIZE)) +
124 			    rp->p_slptime - (rp->p_nice-NZERO)*8;
125 			if (rppri > outpri) {
126 				if (rp->p_poip)
127 					continue;
128 				if (rp->p_textp && rp->p_textp->x_poip)
129 					continue;
130 				p = rp;
131 				outpri = rppri;
132 			}
133 		}
134 		continue;
135 
136 	case SSLEEP:
137 	case SSTOP:
138 		if ((freemem < desfree || rp->p_rssize == 0) &&
139 		    rp->p_slptime > maxslp &&
140 		    (!rp->p_textp || (rp->p_textp->x_flag&XLOCK)==0) &&
141 		    swappable(rp)) {
142 			/*
143 			 * Kick out deadwood.
144 			 */
145 			rp->p_flag &= ~SLOAD;
146 			(void) swapout(rp, rp->p_dsize, rp->p_mmsize, rp->p_ssize);
147 		}
148 		continue;
149 	}
150 
151 	/*
152 	 * If something came ready after we checked it,
153 	 * wantin will be set.  Otherwise,
154 	 * no one wants in, so nothing to do.
155 	 */
156 	if (outpri == -20000) {
157 		(void) splhigh();
158 		if (wantin == 0) {
159 			runout++;
160 			sleep((caddr_t)&runout, PSWP);
161 		}
162 		(void) spl0();
163 		goto loop;
164 	}
165 	/*
166 	 * Decide how deserving this guy is.  If he is deserving
167 	 * we will be willing to work harder to bring him in.
168 	 * Needs is an estimate of how much core he will need.
169 	 * If he has been out for a while, then we will
170 	 * bring him in with 1/2 the core he will need, otherwise
171 	 * we are conservative.
172 	 */
173 	deservin = 0;
174 	divisor = 1;
175 	if (outpri > maxslp/2) {
176 		deservin = 1;
177 		divisor = 2;
178 	}
179 	needs = p->p_swrss;
180 	if (p->p_textp && p->p_textp->x_ccount == 0)
181 		needs += p->p_textp->x_swrss;
182 	needs = imin(needs, lotsfree);
183 	if (freemem - deficit > needs / divisor) {
184 		deficit += needs;
185 		if (swapin(p))
186 			goto loop;
187 		deficit -= imin(needs, deficit);
188 	}
189 
190 hardswap:
191 	/*
192 	 * Need resources (kernel map or memory), swap someone out.
193 	 * Select the nbig largest jobs, then the oldest of these
194 	 * is ``most likely to get booted.''
195 	 */
196 	inp = p;
197 	sleeper = 0;
198 	if (nbig > MAXNBIG)
199 		nbig = MAXNBIG;
200 	if (nbig < 1)
201 		nbig = 1;
202 	biggot = 0;
203 	bplist.bp_link = 0;
204 	for (rp = allproc; rp != NULL; rp = rp->p_nxt) {
205 		if (!swappable(rp))
206 			continue;
207 		if (rp == inp)
208 			continue;
209 		if (rp->p_textp && rp->p_textp->x_flag&XLOCK)
210 			continue;
211 		if (rp->p_slptime > maxslp &&
212 		    (rp->p_stat==SSLEEP&&rp->p_pri>PZERO||rp->p_stat==SSTOP)) {
213 			if (sleeper < rp->p_slptime) {
214 				p = rp;
215 				sleeper = rp->p_slptime;
216 			}
217 		} else if (!sleeper && (rp->p_stat==SRUN||rp->p_stat==SSLEEP)) {
218 			rppri = rp->p_rssize;
219 			if (rp->p_textp)
220 				rppri += rp->p_textp->x_rssize/rp->p_textp->x_ccount;
221 			if (biggot < nbig)
222 				nbp = &bigp[biggot++];
223 			else {
224 				nbp = bplist.bp_link;
225 				if (nbp->bp_pri > rppri)
226 					continue;
227 				bplist.bp_link = nbp->bp_link;
228 			}
229 			for (bp = &bplist; bp->bp_link; bp = bp->bp_link)
230 				if (rppri < bp->bp_link->bp_pri)
231 					break;
232 			nbp->bp_link = bp->bp_link;
233 			bp->bp_link = nbp;
234 			nbp->bp_pri = rppri;
235 			nbp->bp_proc = rp;
236 		}
237 	}
238 	if (!sleeper) {
239 		p = NULL;
240 		inpri = -1000;
241 		for (bp = bplist.bp_link; bp; bp = bp->bp_link) {
242 			rp = bp->bp_proc;
243 			rppri = rp->p_time+rp->p_nice-NZERO;
244 			if (rppri >= inpri) {
245 				p = rp;
246 				inpri = rppri;
247 			}
248 		}
249 	}
250 	/*
251 	 * If we found a long-time sleeper, or we are desperate and
252 	 * found anyone to swap out, or if someone deserves to come
253 	 * in and we didn't find a sleeper, but found someone who
254 	 * has been in core for a reasonable length of time, then
255 	 * we kick the poor luser out.
256 	 */
257 	if (sleeper || desperate && p || deservin && inpri > maxslp) {
258 		(void) splhigh();
259 		p->p_flag &= ~SLOAD;
260 		if (p->p_stat == SRUN)
261 			remrq(p);
262 		(void) spl0();
263 		if (desperate) {
264 			/*
265 			 * Want to give this space to the rest of
266 			 * the processes in core so give them a chance
267 			 * by increasing the deficit.
268 			 */
269 			gives = p->p_rssize;
270 			if (p->p_textp)
271 				gives += p->p_textp->x_rssize / p->p_textp->x_ccount;
272 			gives = imin(gives, lotsfree);
273 			deficit += gives;
274 		} else
275 			gives = 0;	/* someone else taketh away */
276 		if (swapout(p, p->p_dsize, p->p_mmsize, p->p_ssize) == 0)
277 			deficit -= imin(gives, deficit);
278 		else
279 			goto loop;
280 	}
281 	/*
282 	 * Want to swap someone in, but can't
283 	 * so wait on runin.
284 	 */
285 	(void) splhigh();
286 	runin++;
287 	sleep((caddr_t)&runin, PSWP);
288 	(void) spl0();
289 	goto loop;
290 }
291 
292 vmmeter()
293 {
294 	register unsigned *cp, *rp, *sp;
295 
296 	deficit -= imin(deficit,
297 	    imax(deficit / 10, ((klin * CLSIZE) / 2) * maxpgio / 2));
298 	ave(avefree, freemem, 5);
299 	ave(avefree30, freemem, 30);
300 	/* v_pgin is maintained by clock.c */
301 	cp = &cnt.v_first; rp = &rate.v_first; sp = &sum.v_first;
302 	while (cp <= &cnt.v_last) {
303 		ave(*rp, *cp, 5);
304 		*sp += *cp;
305 		*cp = 0;
306 		rp++, cp++, sp++;
307 	}
308 	if (time.tv_sec % 5 == 0) {
309 		vmtotal();
310 		rate.v_swpin = cnt.v_swpin;
311 		sum.v_swpin += cnt.v_swpin;
312 		cnt.v_swpin = 0;
313 		rate.v_swpout = cnt.v_swpout;
314 		sum.v_swpout += cnt.v_swpout;
315 		cnt.v_swpout = 0;
316 	}
317 	if (avefree < minfree && runout || proc[0].p_slptime > maxslp/2) {
318 		runout = 0;
319 		runin = 0;
320 		wakeup((caddr_t)&runin);
321 		wakeup((caddr_t)&runout);
322 	}
323 }
324 
325 /*
326  * Schedule rate for paging.
327  * Rate is linear interpolation between
328  * slowscan with lotsfree and fastscan when out of memory.
329  */
330 schedpaging()
331 {
332 	register int vavail;
333 
334 	nscan = desscan = 0;
335 	vavail = freemem - deficit;
336 	if (vavail < 0)
337 		vavail = 0;
338 	if (freemem < lotsfree) {
339 		desscan = (slowscan * vavail + fastscan * (lotsfree - vavail)) /
340 			nz(lotsfree) / RATETOSCHEDPAGING;
341 		wakeup((caddr_t)&proc[2]);
342 	}
343 	timeout(schedpaging, (caddr_t)0, hz / RATETOSCHEDPAGING);
344 }
345 
346 vmtotal()
347 {
348 	register struct proc *p;
349 	register struct text *xp;
350 	int nrun = 0;
351 
352 	total.t_vmtxt = 0;
353 	total.t_avmtxt = 0;
354 	total.t_rmtxt = 0;
355 	total.t_armtxt = 0;
356 	for (xp = text; xp < textNTEXT; xp++)
357 		if (xp->x_vptr) {
358 			total.t_vmtxt += xp->x_size;
359 			total.t_rmtxt += xp->x_rssize;
360 			for (p = xp->x_caddr; p; p = p->p_xlink)
361 			switch (p->p_stat) {
362 
363 			case SSTOP:
364 			case SSLEEP:
365 				if (p->p_slptime >= maxslp)
366 					continue;
367 				/* fall into... */
368 
369 			case SRUN:
370 			case SIDL:
371 				total.t_avmtxt += xp->x_size;
372 				total.t_armtxt += xp->x_rssize;
373 				goto next;
374 			}
375 next:
376 			;
377 		}
378 	total.t_vm = 0;
379 	total.t_avm = 0;
380 	total.t_rm = 0;
381 	total.t_arm = 0;
382 	total.t_rq = 0;
383 	total.t_dw = 0;
384 	total.t_pw = 0;
385 	total.t_sl = 0;
386 	total.t_sw = 0;
387 	for (p = allproc; p != NULL; p = p->p_nxt) {
388 		if (p->p_flag & SSYS)
389 			continue;
390 		if (p->p_stat) {
391 			if (p->p_stat != SZOMB) {
392 				total.t_vm += p->p_dsize + p->p_ssize;
393 				total.t_rm += p->p_rssize;
394 			}
395 			switch (p->p_stat) {
396 
397 			case SSLEEP:
398 				if (p->p_pri <= PZERO && p->p_slptime == 0)
399 					nrun++;
400 				/* fall through */
401 			case SSTOP:
402 				if (p->p_flag & SPAGE)
403 					total.t_pw++;
404 				else if (p->p_flag & SLOAD) {
405 					if (p->p_pri <= PZERO)
406 						total.t_dw++;
407 					else if (p->p_slptime < maxslp)
408 						total.t_sl++;
409 				} else if (p->p_slptime < maxslp)
410 					total.t_sw++;
411 				if (p->p_slptime < maxslp)
412 					goto active;
413 				break;
414 
415 			case SRUN:
416 			case SIDL:
417 				nrun++;
418 				if (p->p_flag & SLOAD)
419 					total.t_rq++;
420 				else
421 					total.t_sw++;
422 active:
423 				total.t_avm += p->p_dsize + p->p_ssize;
424 				total.t_arm += p->p_rssize;
425 				break;
426 			}
427 		}
428 	}
429 	total.t_vm += total.t_vmtxt;
430 	total.t_avm += total.t_avmtxt;
431 	total.t_rm += total.t_rmtxt;
432 	total.t_arm += total.t_armtxt;
433 	total.t_free = avefree;
434 	loadav(averunnable, nrun);
435 }
436 
437 /*
438  * Constants for averages over 1, 5, and 15 minutes
439  * when sampling at 5 second intervals.
440  */
441 fixpt_t	cexp[3] = {
442 	0.9200444146293232 * FSCALE,	/* exp(-1/12) */
443 	0.9834714538216174 * FSCALE,	/* exp(-1/60) */
444 	0.9944598480048967 * FSCALE,	/* exp(-1/180) */
445 };
446 
447 /*
448  * Compute a tenex style load average of a quantity on
449  * 1, 5 and 15 minute intervals.
450  */
451 loadav(avg, n)
452 	register fixpt_t *avg;
453 	int n;
454 {
455 	register int i;
456 
457 	for (i = 0; i < 3; i++)
458 		avg[i] = (cexp[i] * avg[i] + n * FSCALE * (FSCALE - cexp[i]))
459 		         >> FSHIFT;
460 #if defined(COMPAT_43) && (defined(vax) || defined(tahoe))
461 	for (i = 0; i < 3; i++)
462 		avenrun[i] = (double) averunnable[i] / FSCALE;
463 #endif /* COMPAT_43 */
464 }
465