xref: /original-bsd/sys/vm/vm_meter.c (revision 81287ac5)
1 /*
2  * Copyright (c) 1982, 1986 Regents of the University of California.
3  * All rights reserved.  The Berkeley software License Agreement
4  * specifies the terms and conditions for redistribution.
5  *
6  *	@(#)vm_meter.c	7.4 (Berkeley) 08/08/87
7  */
8 
9 #include "param.h"
10 #include "systm.h"
11 #include "seg.h"
12 #include "dir.h"
13 #include "user.h"
14 #include "proc.h"
15 #include "text.h"
16 #include "vm.h"
17 #include "cmap.h"
18 #include "kernel.h"
19 
20 
21 
22 int	maxslp = MAXSLP;
23 int	saferss = SAFERSS;
24 
25 /*
26  * The following parameters control operation of the page replacement
27  * algorithm.  They are initialized to 0, and then computed at boot time
28  * based on the size of the system.  If they are patched non-zero in
29  * a loaded vmunix they are left alone and may thus be changed per system
30  * using adb on the loaded system.
31  */
32 int	maxpgio = 0;
33 int	minfree = 0;
34 int	desfree = 0;
35 int	lotsfree = 0;
36 int	slowscan = 0;
37 int	fastscan = 0;
38 int	klin = KLIN;
39 int	klseql = KLSEQL;
40 int	klsdist = KLSDIST;
41 int	kltxt = KLTXT;
42 int	klout = KLOUT;
43 int	multprog = -1;		/* so we don't count process 2 */
44 
45 double	avenrun[3];		/* load average, of runnable procs */
46 
47 /*
48  * The main loop of the scheduling (swapping) process.
49  *
50  * The basic idea is:
51  *	see if anyone wants to be swapped in;
52  *	swap out processes until there is room;
53  *	swap him in;
54  *	repeat.
55  * If the paging rate is too high, or the average free memory
56  * is very low, then we do not consider swapping anyone in,
57  * but rather look for someone to swap out.
58  *
59  * The runout flag is set whenever someone is swapped out.
60  * Sched sleeps on it awaiting work.
61  *
62  * Sched sleeps on runin whenever it cannot find enough
63  * core (by swapping out or otherwise) to fit the
64  * selected swapped process.  It is awakened when the
65  * core situation changes and in any case once per second.
66  *
67  * sched DOESN'T ACCOUNT FOR PAGE TABLE SIZE IN CALCULATIONS.
68  */
69 
70 #define	swappable(p) \
71 	(((p)->p_flag&(SSYS|SLOCK|SULOCK|SLOAD|SPAGE|SKEEP|SWEXIT|SPHYSIO))==SLOAD)
72 
73 /* insure non-zero */
74 #define	nz(x)	(x != 0 ? x : 1)
75 
76 #define	NBIG	4
77 #define	MAXNBIG	10
78 int	nbig = NBIG;
79 
80 struct bigp {
81 	struct	proc *bp_proc;
82 	int	bp_pri;
83 	struct	bigp *bp_link;
84 } bigp[MAXNBIG], bplist;
85 
86 sched()
87 {
88 	register struct proc *rp, *p, *inp;
89 	int outpri, inpri, rppri;
90 	int sleeper, desperate, deservin, needs, divisor;
91 	register struct bigp *bp, *nbp;
92 	int biggot, gives;
93 
94 loop:
95 	wantin = 0;
96 	deservin = 0;
97 	sleeper = 0;
98 	p = 0;
99 	/*
100 	 * See if paging system is overloaded; if so swap someone out.
101 	 * Conditions for hard outswap are:
102 	 *	if need kernel map (mix it up).
103 	 * or
104 	 *	1. if there are at least 2 runnable processes (on the average)
105 	 * and	2. the paging rate is excessive or memory is now VERY low.
106 	 * and	3. the short (5-second) and longer (30-second) average
107 	 *	   memory is less than desirable.
108 	 */
109 	if (kmapwnt ||
110 	    (avenrun[0] >= 2 && imax(avefree, avefree30) < desfree &&
111 	    (rate.v_pgin + rate.v_pgout > maxpgio || avefree < minfree))) {
112 		desperate = 1;
113 		goto hardswap;
114 	}
115 	desperate = 0;
116 	/*
117 	 * Not desperate for core,
118 	 * look for someone who deserves to be brought in.
119 	 */
120 	outpri = -20000;
121 	for (rp = allproc; rp != NULL; rp = rp->p_nxt) switch(rp->p_stat) {
122 
123 	case SRUN:
124 		if ((rp->p_flag&SLOAD) == 0) {
125 			rppri = rp->p_time -
126 			    rp->p_swrss / nz((maxpgio/2) * (klin * CLSIZE)) +
127 			    rp->p_slptime - (rp->p_nice-NZERO)*8;
128 			if (rppri > outpri) {
129 				if (rp->p_poip)
130 					continue;
131 				if (rp->p_textp && rp->p_textp->x_poip)
132 					continue;
133 				p = rp;
134 				outpri = rppri;
135 			}
136 		}
137 		continue;
138 
139 	case SSLEEP:
140 	case SSTOP:
141 		if ((freemem < desfree || rp->p_rssize == 0) &&
142 		    rp->p_slptime > maxslp &&
143 		    (!rp->p_textp || (rp->p_textp->x_flag&XLOCK)==0) &&
144 		    swappable(rp)) {
145 			/*
146 			 * Kick out deadwood.
147 			 */
148 			rp->p_flag &= ~SLOAD;
149 			(void) swapout(rp, rp->p_dsize, rp->p_ssize);
150 		}
151 		continue;
152 	}
153 
154 	/*
155 	 * If something came ready after we checked it,
156 	 * wantin will be set.  Otherwise,
157 	 * no one wants in, so nothing to do.
158 	 */
159 	if (outpri == -20000) {
160 		(void) splhigh();
161 		if (wantin == 0) {
162 			runout++;
163 			sleep((caddr_t)&runout, PSWP);
164 		}
165 		(void) spl0();
166 		goto loop;
167 	}
168 	/*
169 	 * Decide how deserving this guy is.  If he is deserving
170 	 * we will be willing to work harder to bring him in.
171 	 * Needs is an estimate of how much core he will need.
172 	 * If he has been out for a while, then we will
173 	 * bring him in with 1/2 the core he will need, otherwise
174 	 * we are conservative.
175 	 */
176 	deservin = 0;
177 	divisor = 1;
178 	if (outpri > maxslp/2) {
179 		deservin = 1;
180 		divisor = 2;
181 	}
182 	needs = p->p_swrss;
183 	if (p->p_textp && p->p_textp->x_ccount == 0)
184 		needs += p->p_textp->x_swrss;
185 	needs = imin(needs, lotsfree);
186 	if (freemem - deficit > needs / divisor) {
187 		deficit += needs;
188 		if (swapin(p))
189 			goto loop;
190 		deficit -= imin(needs, deficit);
191 	}
192 
193 hardswap:
194 	/*
195 	 * Need resources (kernel map or memory), swap someone out.
196 	 * Select the nbig largest jobs, then the oldest of these
197 	 * is ``most likely to get booted.''
198 	 */
199 	inp = p;
200 	sleeper = 0;
201 	if (nbig > MAXNBIG)
202 		nbig = MAXNBIG;
203 	if (nbig < 1)
204 		nbig = 1;
205 	biggot = 0;
206 	bplist.bp_link = 0;
207 	for (rp = allproc; rp != NULL; rp = rp->p_nxt) {
208 		if (!swappable(rp))
209 			continue;
210 		if (rp == inp)
211 			continue;
212 		if (rp->p_textp && rp->p_textp->x_flag&XLOCK)
213 			continue;
214 		if (rp->p_slptime > maxslp &&
215 		    (rp->p_stat==SSLEEP&&rp->p_pri>PZERO||rp->p_stat==SSTOP)) {
216 			if (sleeper < rp->p_slptime) {
217 				p = rp;
218 				sleeper = rp->p_slptime;
219 			}
220 		} else if (!sleeper && (rp->p_stat==SRUN||rp->p_stat==SSLEEP)) {
221 			rppri = rp->p_rssize;
222 			if (rp->p_textp)
223 				rppri += rp->p_textp->x_rssize/rp->p_textp->x_ccount;
224 			if (biggot < nbig)
225 				nbp = &bigp[biggot++];
226 			else {
227 				nbp = bplist.bp_link;
228 				if (nbp->bp_pri > rppri)
229 					continue;
230 				bplist.bp_link = nbp->bp_link;
231 			}
232 			for (bp = &bplist; bp->bp_link; bp = bp->bp_link)
233 				if (rppri < bp->bp_link->bp_pri)
234 					break;
235 			nbp->bp_link = bp->bp_link;
236 			bp->bp_link = nbp;
237 			nbp->bp_pri = rppri;
238 			nbp->bp_proc = rp;
239 		}
240 	}
241 	if (!sleeper) {
242 		p = NULL;
243 		inpri = -1000;
244 		for (bp = bplist.bp_link; bp; bp = bp->bp_link) {
245 			rp = bp->bp_proc;
246 			rppri = rp->p_time+rp->p_nice-NZERO;
247 			if (rppri >= inpri) {
248 				p = rp;
249 				inpri = rppri;
250 			}
251 		}
252 	}
253 	/*
254 	 * If we found a long-time sleeper, or we are desperate and
255 	 * found anyone to swap out, or if someone deserves to come
256 	 * in and we didn't find a sleeper, but found someone who
257 	 * has been in core for a reasonable length of time, then
258 	 * we kick the poor luser out.
259 	 */
260 	if (sleeper || desperate && p || deservin && inpri > maxslp) {
261 		(void) splhigh();
262 		p->p_flag &= ~SLOAD;
263 		if (p->p_stat == SRUN)
264 			remrq(p);
265 		(void) spl0();
266 		if (desperate) {
267 			/*
268 			 * Want to give this space to the rest of
269 			 * the processes in core so give them a chance
270 			 * by increasing the deficit.
271 			 */
272 			gives = p->p_rssize;
273 			if (p->p_textp)
274 				gives += p->p_textp->x_rssize / p->p_textp->x_ccount;
275 			gives = imin(gives, lotsfree);
276 			deficit += gives;
277 		} else
278 			gives = 0;	/* someone else taketh away */
279 		if (swapout(p, p->p_dsize, p->p_ssize) == 0)
280 			deficit -= imin(gives, deficit);
281 		else
282 			goto loop;
283 	}
284 	/*
285 	 * Want to swap someone in, but can't
286 	 * so wait on runin.
287 	 */
288 	(void) splhigh();
289 	runin++;
290 	sleep((caddr_t)&runin, PSWP);
291 	(void) spl0();
292 	goto loop;
293 }
294 
295 vmmeter()
296 {
297 	register unsigned *cp, *rp, *sp;
298 
299 	deficit -= imin(deficit,
300 	    imax(deficit / 10, ((klin * CLSIZE) / 2) * maxpgio / 2));
301 	ave(avefree, freemem, 5);
302 	ave(avefree30, freemem, 30);
303 	/* v_pgin is maintained by clock.c */
304 	cp = &cnt.v_first; rp = &rate.v_first; sp = &sum.v_first;
305 	while (cp <= &cnt.v_last) {
306 		ave(*rp, *cp, 5);
307 		*sp += *cp;
308 		*cp = 0;
309 		rp++, cp++, sp++;
310 	}
311 	if (time.tv_sec % 5 == 0) {
312 		vmtotal();
313 		rate.v_swpin = cnt.v_swpin;
314 		sum.v_swpin += cnt.v_swpin;
315 		cnt.v_swpin = 0;
316 		rate.v_swpout = cnt.v_swpout;
317 		sum.v_swpout += cnt.v_swpout;
318 		cnt.v_swpout = 0;
319 	}
320 	if (avefree < minfree && runout || proc[0].p_slptime > maxslp/2) {
321 		runout = 0;
322 		runin = 0;
323 		wakeup((caddr_t)&runin);
324 		wakeup((caddr_t)&runout);
325 	}
326 }
327 
328 /*
329  * Schedule rate for paging.
330  * Rate is linear interpolation between
331  * slowscan with lotsfree and fastscan when out of memory.
332  */
333 schedpaging()
334 {
335 	register int vavail;
336 
337 	nscan = desscan = 0;
338 	vavail = freemem - deficit;
339 	if (vavail < 0)
340 		vavail = 0;
341 	if (freemem < lotsfree) {
342 		desscan = (slowscan * vavail + fastscan * (lotsfree - vavail)) /
343 			nz(lotsfree) / RATETOSCHEDPAGING;
344 		wakeup((caddr_t)&proc[2]);
345 	}
346 	timeout(schedpaging, (caddr_t)0, hz / RATETOSCHEDPAGING);
347 }
348 
349 vmtotal()
350 {
351 	register struct proc *p;
352 	register struct text *xp;
353 	int nrun = 0;
354 
355 	total.t_vmtxt = 0;
356 	total.t_avmtxt = 0;
357 	total.t_rmtxt = 0;
358 	total.t_armtxt = 0;
359 	for (xp = text; xp < textNTEXT; xp++)
360 		if (xp->x_iptr) {
361 			total.t_vmtxt += xp->x_size;
362 			total.t_rmtxt += xp->x_rssize;
363 			for (p = xp->x_caddr; p; p = p->p_xlink)
364 			switch (p->p_stat) {
365 
366 			case SSTOP:
367 			case SSLEEP:
368 				if (p->p_slptime >= maxslp)
369 					continue;
370 				/* fall into... */
371 
372 			case SRUN:
373 			case SIDL:
374 				total.t_avmtxt += xp->x_size;
375 				total.t_armtxt += xp->x_rssize;
376 				goto next;
377 			}
378 next:
379 			;
380 		}
381 	total.t_vm = 0;
382 	total.t_avm = 0;
383 	total.t_rm = 0;
384 	total.t_arm = 0;
385 	total.t_rq = 0;
386 	total.t_dw = 0;
387 	total.t_pw = 0;
388 	total.t_sl = 0;
389 	total.t_sw = 0;
390 	for (p = allproc; p != NULL; p = p->p_nxt) {
391 		if (p->p_flag & SSYS)
392 			continue;
393 		if (p->p_stat) {
394 			if (p->p_stat != SZOMB) {
395 				total.t_vm += p->p_dsize + p->p_ssize;
396 				total.t_rm += p->p_rssize;
397 			}
398 			switch (p->p_stat) {
399 
400 			case SSLEEP:
401 			case SSTOP:
402 				if (p->p_pri <= PZERO && p->p_stat == SSLEEP)
403 					nrun++;
404 				if (p->p_flag & SPAGE)
405 					total.t_pw++;
406 				else if (p->p_flag & SLOAD) {
407 					if (p->p_pri <= PZERO)
408 						total.t_dw++;
409 					else if (p->p_slptime < maxslp)
410 						total.t_sl++;
411 				} else if (p->p_slptime < maxslp)
412 					total.t_sw++;
413 				if (p->p_slptime < maxslp)
414 					goto active;
415 				break;
416 
417 			case SRUN:
418 			case SIDL:
419 				nrun++;
420 				if (p->p_flag & SLOAD)
421 					total.t_rq++;
422 				else
423 					total.t_sw++;
424 active:
425 				total.t_avm += p->p_dsize + p->p_ssize;
426 				total.t_arm += p->p_rssize;
427 				break;
428 			}
429 		}
430 	}
431 	total.t_vm += total.t_vmtxt;
432 	total.t_avm += total.t_avmtxt;
433 	total.t_rm += total.t_rmtxt;
434 	total.t_arm += total.t_armtxt;
435 	total.t_free = avefree;
436 	loadav(avenrun, nrun);
437 }
438 
439 /*
440  * Constants for averages over 1, 5, and 15 minutes
441  * when sampling at 5 second intervals.
442  */
443 double	cexp[3] = {
444 	0.9200444146293232,	/* exp(-1/12) */
445 	0.9834714538216174,	/* exp(-1/60) */
446 	0.9944598480048967,	/* exp(-1/180) */
447 };
448 
449 /*
450  * Compute a tenex style load average of a quantity on
451  * 1, 5 and 15 minute intervals.
452  */
453 loadav(avg, n)
454 	register double *avg;
455 	int n;
456 {
457 	register int i;
458 
459 	for (i = 0; i < 3; i++)
460 		avg[i] = cexp[i] * avg[i] + n * (1.0 - cexp[i]);
461 }
462