xref: /original-bsd/sys/vm/vm_meter.c (revision 77936e01)
1 /*
2  * Copyright (c) 1982, 1986, 1989 Regents of the University of California.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms are permitted
6  * provided that the above copyright notice and this paragraph are
7  * duplicated in all such forms and that any documentation,
8  * advertising materials, and other materials related to such
9  * distribution and use acknowledge that the software was developed
10  * by the University of California, Berkeley.  The name of the
11  * University may not be used to endorse or promote products derived
12  * from this software without specific prior written permission.
13  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
14  * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
15  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
16  *
17  *	@(#)vm_meter.c	7.7 (Berkeley) 05/02/90
18  */
19 
20 #include "param.h"
21 #include "systm.h"
22 #include "user.h"
23 #include "proc.h"
24 #include "text.h"
25 #include "vm.h"
26 #include "kernel.h"
27 
28 int	maxslp = MAXSLP;
29 int	saferss = SAFERSS;
30 
31 /*
32  * The following parameters control operation of the page replacement
33  * algorithm.  They are initialized to 0, and then computed at boot time
34  * based on the size of the system.  If they are patched non-zero in
35  * a loaded vmunix they are left alone and may thus be changed per system
36  * using adb on the loaded system.
37  */
38 int	maxpgio = 0;
39 int	minfree = 0;
40 int	desfree = 0;
41 int	lotsfree = 0;
42 int	slowscan = 0;
43 int	fastscan = 0;
44 int	klin = KLIN;
45 int	klseql = KLSEQL;
46 int	klsdist = KLSDIST;
47 int	kltxt = KLTXT;
48 int	klout = KLOUT;
49 int	multprog = -1;		/* so we don't count process 2 */
50 
51 fixpt_t	averunnable[3];		/* load average, of runnable procs */
52 
53 /*
54  * The main loop of the scheduling (swapping) process.
55  *
56  * The basic idea is:
57  *	see if anyone wants to be swapped in;
58  *	swap out processes until there is room;
59  *	swap him in;
60  *	repeat.
61  * If the paging rate is too high, or the average free memory
62  * is very low, then we do not consider swapping anyone in,
63  * but rather look for someone to swap out.
64  *
65  * The runout flag is set whenever someone is swapped out.
66  * Sched sleeps on it awaiting work.
67  *
68  * Sched sleeps on runin whenever it cannot find enough
69  * core (by swapping out or otherwise) to fit the
70  * selected swapped process.  It is awakened when the
71  * core situation changes and in any case once per second.
72  *
73  * sched DOESN'T ACCOUNT FOR PAGE TABLE SIZE IN CALCULATIONS.
74  */
75 
76 #define	swappable(p) \
77 	(((p)->p_flag&(SSYS|SLOCK|SULOCK|SLOAD|SPAGE|SKEEP|SWEXIT|SPHYSIO))==SLOAD)
78 
79 /* insure non-zero */
80 #define	nz(x)	(x != 0 ? x : 1)
81 
82 #define	NBIG	4
83 #define	MAXNBIG	10
84 int	nbig = NBIG;
85 
86 struct bigp {
87 	struct	proc *bp_proc;
88 	int	bp_pri;
89 	struct	bigp *bp_link;
90 } bigp[MAXNBIG], bplist;
91 
92 sched()
93 {
94 	register struct proc *rp, *p, *inp;
95 	int outpri, inpri, rppri;
96 	int sleeper, desperate, deservin, needs, divisor;
97 	register struct bigp *bp, *nbp;
98 	int biggot, gives;
99 
100 loop:
101 	wantin = 0;
102 	deservin = 0;
103 	sleeper = 0;
104 	p = 0;
105 	/*
106 	 * See if paging system is overloaded; if so swap someone out.
107 	 * Conditions for hard outswap are:
108 	 *	if need kernel map (mix it up).
109 	 * or
110 	 *	1. if there are at least 2 runnable processes (on the average)
111 	 * and	2. the paging rate is excessive or memory is now VERY low.
112 	 * and	3. the short (5-second) and longer (30-second) average
113 	 *	   memory is less than desirable.
114 	 */
115 	if (kmapwnt ||
116 	    (averunnable[0] >= 2 * FSCALE &&
117 	     imax(avefree, avefree30) < desfree &&
118 	     (rate.v_pgin + rate.v_pgout > maxpgio || avefree < minfree))) {
119 		desperate = 1;
120 		goto hardswap;
121 	}
122 	desperate = 0;
123 	/*
124 	 * Not desperate for core,
125 	 * look for someone who deserves to be brought in.
126 	 */
127 	outpri = -20000;
128 	for (rp = allproc; rp != NULL; rp = rp->p_nxt) switch(rp->p_stat) {
129 
130 	case SRUN:
131 		if ((rp->p_flag&SLOAD) == 0) {
132 			rppri = rp->p_time -
133 			    rp->p_swrss / nz((maxpgio/2) * (klin * CLSIZE)) +
134 			    rp->p_slptime - (rp->p_nice-NZERO)*8;
135 			if (rppri > outpri) {
136 				if (rp->p_poip)
137 					continue;
138 				if (rp->p_textp && rp->p_textp->x_poip)
139 					continue;
140 				p = rp;
141 				outpri = rppri;
142 			}
143 		}
144 		continue;
145 
146 	case SSLEEP:
147 	case SSTOP:
148 		if ((freemem < desfree || rp->p_rssize == 0) &&
149 		    rp->p_slptime > maxslp &&
150 		    (!rp->p_textp || (rp->p_textp->x_flag&XLOCK)==0) &&
151 		    swappable(rp)) {
152 			/*
153 			 * Kick out deadwood.
154 			 */
155 			rp->p_flag &= ~SLOAD;
156 			(void) swapout(rp, rp->p_dsize, rp->p_ssize);
157 		}
158 		continue;
159 	}
160 
161 	/*
162 	 * If something came ready after we checked it,
163 	 * wantin will be set.  Otherwise,
164 	 * no one wants in, so nothing to do.
165 	 */
166 	if (outpri == -20000) {
167 		(void) splhigh();
168 		if (wantin == 0) {
169 			runout++;
170 			sleep((caddr_t)&runout, PSWP);
171 		}
172 		(void) spl0();
173 		goto loop;
174 	}
175 	/*
176 	 * Decide how deserving this guy is.  If he is deserving
177 	 * we will be willing to work harder to bring him in.
178 	 * Needs is an estimate of how much core he will need.
179 	 * If he has been out for a while, then we will
180 	 * bring him in with 1/2 the core he will need, otherwise
181 	 * we are conservative.
182 	 */
183 	deservin = 0;
184 	divisor = 1;
185 	if (outpri > maxslp/2) {
186 		deservin = 1;
187 		divisor = 2;
188 	}
189 	needs = p->p_swrss;
190 	if (p->p_textp && p->p_textp->x_ccount == 0)
191 		needs += p->p_textp->x_swrss;
192 	needs = imin(needs, lotsfree);
193 	if (freemem - deficit > needs / divisor) {
194 		deficit += needs;
195 		if (swapin(p))
196 			goto loop;
197 		deficit -= imin(needs, deficit);
198 	}
199 
200 hardswap:
201 	/*
202 	 * Need resources (kernel map or memory), swap someone out.
203 	 * Select the nbig largest jobs, then the oldest of these
204 	 * is ``most likely to get booted.''
205 	 */
206 	inp = p;
207 	sleeper = 0;
208 	if (nbig > MAXNBIG)
209 		nbig = MAXNBIG;
210 	if (nbig < 1)
211 		nbig = 1;
212 	biggot = 0;
213 	bplist.bp_link = 0;
214 	for (rp = allproc; rp != NULL; rp = rp->p_nxt) {
215 		if (!swappable(rp))
216 			continue;
217 		if (rp == inp)
218 			continue;
219 		if (rp->p_textp && rp->p_textp->x_flag&XLOCK)
220 			continue;
221 		if (rp->p_slptime > maxslp &&
222 		    (rp->p_stat==SSLEEP&&rp->p_pri>PZERO||rp->p_stat==SSTOP)) {
223 			if (sleeper < rp->p_slptime) {
224 				p = rp;
225 				sleeper = rp->p_slptime;
226 			}
227 		} else if (!sleeper && (rp->p_stat==SRUN||rp->p_stat==SSLEEP)) {
228 			rppri = rp->p_rssize;
229 			if (rp->p_textp)
230 				rppri += rp->p_textp->x_rssize/rp->p_textp->x_ccount;
231 			if (biggot < nbig)
232 				nbp = &bigp[biggot++];
233 			else {
234 				nbp = bplist.bp_link;
235 				if (nbp->bp_pri > rppri)
236 					continue;
237 				bplist.bp_link = nbp->bp_link;
238 			}
239 			for (bp = &bplist; bp->bp_link; bp = bp->bp_link)
240 				if (rppri < bp->bp_link->bp_pri)
241 					break;
242 			nbp->bp_link = bp->bp_link;
243 			bp->bp_link = nbp;
244 			nbp->bp_pri = rppri;
245 			nbp->bp_proc = rp;
246 		}
247 	}
248 	if (!sleeper) {
249 		p = NULL;
250 		inpri = -1000;
251 		for (bp = bplist.bp_link; bp; bp = bp->bp_link) {
252 			rp = bp->bp_proc;
253 			rppri = rp->p_time+rp->p_nice-NZERO;
254 			if (rppri >= inpri) {
255 				p = rp;
256 				inpri = rppri;
257 			}
258 		}
259 	}
260 	/*
261 	 * If we found a long-time sleeper, or we are desperate and
262 	 * found anyone to swap out, or if someone deserves to come
263 	 * in and we didn't find a sleeper, but found someone who
264 	 * has been in core for a reasonable length of time, then
265 	 * we kick the poor luser out.
266 	 */
267 	if (sleeper || desperate && p || deservin && inpri > maxslp) {
268 		(void) splhigh();
269 		p->p_flag &= ~SLOAD;
270 		if (p->p_stat == SRUN)
271 			remrq(p);
272 		(void) spl0();
273 		if (desperate) {
274 			/*
275 			 * Want to give this space to the rest of
276 			 * the processes in core so give them a chance
277 			 * by increasing the deficit.
278 			 */
279 			gives = p->p_rssize;
280 			if (p->p_textp)
281 				gives += p->p_textp->x_rssize / p->p_textp->x_ccount;
282 			gives = imin(gives, lotsfree);
283 			deficit += gives;
284 		} else
285 			gives = 0;	/* someone else taketh away */
286 		if (swapout(p, p->p_dsize, p->p_ssize) == 0)
287 			deficit -= imin(gives, deficit);
288 		else
289 			goto loop;
290 	}
291 	/*
292 	 * Want to swap someone in, but can't
293 	 * so wait on runin.
294 	 */
295 	(void) splhigh();
296 	runin++;
297 	sleep((caddr_t)&runin, PSWP);
298 	(void) spl0();
299 	goto loop;
300 }
301 
302 vmmeter()
303 {
304 	register unsigned *cp, *rp, *sp;
305 
306 	deficit -= imin(deficit,
307 	    imax(deficit / 10, ((klin * CLSIZE) / 2) * maxpgio / 2));
308 	ave(avefree, freemem, 5);
309 	ave(avefree30, freemem, 30);
310 	/* v_pgin is maintained by clock.c */
311 	cp = &cnt.v_first; rp = &rate.v_first; sp = &sum.v_first;
312 	while (cp <= &cnt.v_last) {
313 		ave(*rp, *cp, 5);
314 		*sp += *cp;
315 		*cp = 0;
316 		rp++, cp++, sp++;
317 	}
318 	if (time.tv_sec % 5 == 0) {
319 		vmtotal();
320 		rate.v_swpin = cnt.v_swpin;
321 		sum.v_swpin += cnt.v_swpin;
322 		cnt.v_swpin = 0;
323 		rate.v_swpout = cnt.v_swpout;
324 		sum.v_swpout += cnt.v_swpout;
325 		cnt.v_swpout = 0;
326 	}
327 	if (avefree < minfree && runout || proc[0].p_slptime > maxslp/2) {
328 		runout = 0;
329 		runin = 0;
330 		wakeup((caddr_t)&runin);
331 		wakeup((caddr_t)&runout);
332 	}
333 }
334 
335 /*
336  * Schedule rate for paging.
337  * Rate is linear interpolation between
338  * slowscan with lotsfree and fastscan when out of memory.
339  */
340 schedpaging()
341 {
342 	register int vavail;
343 
344 	nscan = desscan = 0;
345 	vavail = freemem - deficit;
346 	if (vavail < 0)
347 		vavail = 0;
348 	if (freemem < lotsfree) {
349 		desscan = (slowscan * vavail + fastscan * (lotsfree - vavail)) /
350 			nz(lotsfree) / RATETOSCHEDPAGING;
351 		wakeup((caddr_t)&proc[2]);
352 	}
353 	timeout(schedpaging, (caddr_t)0, hz / RATETOSCHEDPAGING);
354 }
355 
356 vmtotal()
357 {
358 	register struct proc *p;
359 	register struct text *xp;
360 	int nrun = 0;
361 
362 	total.t_vmtxt = 0;
363 	total.t_avmtxt = 0;
364 	total.t_rmtxt = 0;
365 	total.t_armtxt = 0;
366 	for (xp = text; xp < textNTEXT; xp++)
367 		if (xp->x_vptr) {
368 			total.t_vmtxt += xp->x_size;
369 			total.t_rmtxt += xp->x_rssize;
370 			for (p = xp->x_caddr; p; p = p->p_xlink)
371 			switch (p->p_stat) {
372 
373 			case SSTOP:
374 			case SSLEEP:
375 				if (p->p_slptime >= maxslp)
376 					continue;
377 				/* fall into... */
378 
379 			case SRUN:
380 			case SIDL:
381 				total.t_avmtxt += xp->x_size;
382 				total.t_armtxt += xp->x_rssize;
383 				goto next;
384 			}
385 next:
386 			;
387 		}
388 	total.t_vm = 0;
389 	total.t_avm = 0;
390 	total.t_rm = 0;
391 	total.t_arm = 0;
392 	total.t_rq = 0;
393 	total.t_dw = 0;
394 	total.t_pw = 0;
395 	total.t_sl = 0;
396 	total.t_sw = 0;
397 	for (p = allproc; p != NULL; p = p->p_nxt) {
398 		if (p->p_flag & SSYS)
399 			continue;
400 		if (p->p_stat) {
401 			if (p->p_stat != SZOMB) {
402 				total.t_vm += p->p_dsize + p->p_ssize;
403 				total.t_rm += p->p_rssize;
404 			}
405 			switch (p->p_stat) {
406 
407 			case SSLEEP:
408 				if (p->p_pri <= PZERO && p->p_slptime == 0)
409 					nrun++;
410 				/* fall through */
411 			case SSTOP:
412 				if (p->p_flag & SPAGE)
413 					total.t_pw++;
414 				else if (p->p_flag & SLOAD) {
415 					if (p->p_pri <= PZERO)
416 						total.t_dw++;
417 					else if (p->p_slptime < maxslp)
418 						total.t_sl++;
419 				} else if (p->p_slptime < maxslp)
420 					total.t_sw++;
421 				if (p->p_slptime < maxslp)
422 					goto active;
423 				break;
424 
425 			case SRUN:
426 			case SIDL:
427 				nrun++;
428 				if (p->p_flag & SLOAD)
429 					total.t_rq++;
430 				else
431 					total.t_sw++;
432 active:
433 				total.t_avm += p->p_dsize + p->p_ssize;
434 				total.t_arm += p->p_rssize;
435 				break;
436 			}
437 		}
438 	}
439 	total.t_vm += total.t_vmtxt;
440 	total.t_avm += total.t_avmtxt;
441 	total.t_rm += total.t_rmtxt;
442 	total.t_arm += total.t_armtxt;
443 	total.t_free = avefree;
444 	loadav(averunnable, nrun);
445 }
446 
447 /*
448  * Constants for averages over 1, 5, and 15 minutes
449  * when sampling at 5 second intervals.
450  */
451 fixpt_t	cexp[3] = {
452 	0.9200444146293232 * FSCALE,	/* exp(-1/12) */
453 	0.9834714538216174 * FSCALE,	/* exp(-1/60) */
454 	0.9944598480048967 * FSCALE,	/* exp(-1/180) */
455 };
456 
457 /*
458  * Compute a tenex style load average of a quantity on
459  * 1, 5 and 15 minute intervals.
460  */
461 loadav(avg, n)
462 	register fixpt_t *avg;
463 	int n;
464 {
465 	register int i;
466 
467 	for (i = 0; i < 3; i++)
468 		avg[i] = (cexp[i] * avg[i] + n * FSCALE * (FSCALE - cexp[i]))
469 		         >> FSHIFT;
470 #ifdef COMPAT_43
471 	for (i = 0; i < 3; i++)
472 		avenrun[i] = (double) averunnable[i] / FSCALE;
473 #endif /* COMPAT_43 */
474 }
475