xref: /original-bsd/sys/vm/vm_glue.c (revision de3f5c4e)
1 /*
2  * Copyright (c) 1991 Regents of the University of California.
3  * All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * The Mach Operating System project at Carnegie-Mellon University.
7  *
8  * %sccs.include.redist.c%
9  *
10  *	@(#)vm_glue.c	7.8 (Berkeley) 05/15/91
11  *
12  *
13  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
14  * All rights reserved.
15  *
16  * Permission to use, copy, modify and distribute this software and
17  * its documentation is hereby granted, provided that both the copyright
18  * notice and this permission notice appear in all copies of the
19  * software, derivative works or modified versions, and any portions
20  * thereof, and that both notices appear in supporting documentation.
21  *
22  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
23  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
24  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
25  *
26  * Carnegie Mellon requests users of this software to return to
27  *
28  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
29  *  School of Computer Science
30  *  Carnegie Mellon University
31  *  Pittsburgh PA 15213-3890
32  *
33  * any improvements or extensions that they make and grant Carnegie the
34  * rights to redistribute these changes.
35  */
36 
37 #include "param.h"
38 #include "systm.h"
39 #include "proc.h"
40 #include "resourcevar.h"
41 #include "buf.h"
42 #include "user.h"
43 
44 #include "vm.h"
45 #include "vm_page.h"
46 #include "vm_kern.h"
47 
48 int	avefree = 0;		/* XXX */
49 unsigned maxdmap = MAXDSIZ;	/* XXX */
50 int	readbuffers = 0;	/* XXX allow kgdb to read kernel buffer pool */
51 
52 kernacc(addr, len, rw)
53 	caddr_t addr;
54 	int len, rw;
55 {
56 	boolean_t rv;
57 	vm_offset_t saddr, eaddr;
58 	vm_prot_t prot = rw == B_READ ? VM_PROT_READ : VM_PROT_WRITE;
59 
60 	saddr = trunc_page(addr);
61 	eaddr = round_page(addr+len-1);
62 	rv = vm_map_check_protection(kernel_map, saddr, eaddr, prot);
63 	/*
64 	 * XXX there are still some things (e.g. the buffer cache) that
65 	 * are managed behind the VM system's back so even though an
66 	 * address is accessible in the mind of the VM system, there may
67 	 * not be physical pages where the VM thinks there is.  This can
68 	 * lead to bogus allocation of pages in the kernel address space
69 	 * or worse, inconsistencies at the pmap level.  We only worry
70 	 * about the buffer cache for now.
71 	 */
72 	if (!readbuffers && rv && (eaddr > (vm_offset_t)buffers &&
73 		   saddr < (vm_offset_t)buffers + MAXBSIZE * nbuf))
74 		rv = FALSE;
75 	return(rv == TRUE);
76 }
77 
78 useracc(addr, len, rw)
79 	caddr_t addr;
80 	int len, rw;
81 {
82 	boolean_t rv;
83 	vm_prot_t prot = rw == B_READ ? VM_PROT_READ : VM_PROT_WRITE;
84 
85 	rv = vm_map_check_protection(&curproc->p_vmspace->vm_map,
86 	    trunc_page(addr), round_page(addr+len-1), prot);
87 	return(rv == TRUE);
88 }
89 
90 #ifdef KGDB
91 /*
92  * Change protections on kernel pages from addr to addr+len
93  * (presumably so debugger can plant a breakpoint).
94  * All addresses are assumed to reside in the Sysmap,
95  */
96 chgkprot(addr, len, rw)
97 	register caddr_t addr;
98 	int len, rw;
99 {
100 	vm_prot_t prot = rw == B_READ ? VM_PROT_READ : VM_PROT_WRITE;
101 
102 	vm_map_protect(kernel_map, trunc_page(addr),
103 		       round_page(addr+len-1), prot, FALSE);
104 }
105 #endif
106 
107 vslock(addr, len)
108 	caddr_t	addr;
109 	u_int	len;
110 {
111 	vm_map_pageable(&curproc->p_vmspace->vm_map, trunc_page(addr),
112 			round_page(addr+len-1), FALSE);
113 }
114 
115 vsunlock(addr, len, dirtied)
116 	caddr_t	addr;
117 	u_int	len;
118 	int dirtied;
119 {
120 #ifdef	lint
121 	dirtied++;
122 #endif	lint
123 	vm_map_pageable(&curproc->p_vmspace->vm_map, trunc_page(addr),
124 			round_page(addr+len-1), TRUE);
125 }
126 
127 /*
128  * Implement fork's actions on an address space.
129  * Here we arrange for the address space to be copied or referenced,
130  * allocate a user struct (pcb and kernel stack), then call the
131  * machine-dependent layer to fill those in and make the new process
132  * ready to run.
133  * NOTE: the kernel stack may be at a different location in the child
134  * process, and thus addresses of automatic variables may be invalid
135  * after cpu_fork returns in the child process.  We do nothing here
136  * after cpu_fork returns.
137  */
138 vm_fork(p1, p2, isvfork)
139 	register struct proc *p1, *p2;
140 	int isvfork;
141 {
142 	register struct user *up;
143 	vm_offset_t addr;
144 
145 #ifdef i386
146 	/*
147 	 * avoid copying any of the parent's pagetables or other per-process
148 	 * objects that reside in the map by marking all of them non-inheritable
149 	 */
150 	(void)vm_map_inherit(&p1->p_vmspace->vm_map,
151 		UPT_MIN_ADDRESS-UPAGES*NBPG, VM_MAX_ADDRESS, VM_INHERIT_NONE);
152 #endif
153 	p2->p_vmspace = vmspace_fork(p1->p_vmspace);
154 
155 #ifdef SYSVSHM
156 	if (p1->p_vmspace->vm_shm)
157 		shmfork(p1, p2, isvfork);
158 #endif
159 
160 	/*
161 	 * Allocate a wired-down (for now) pcb and kernel stack for the process
162 	 */
163 	addr = kmem_alloc_pageable(kernel_map, ctob(UPAGES));
164 	vm_map_pageable(kernel_map, addr, addr + ctob(UPAGES), FALSE);
165 	up = (struct user *)addr;
166 	p2->p_addr = up;
167 
168 	/*
169 	 * p_stats and p_sigacts currently point at fields
170 	 * in the user struct but not at &u, instead at p_addr.
171 	 * Copy p_sigacts and parts of p_stats; zero the rest
172 	 * of p_stats (statistics).
173 	 */
174 	p2->p_stats = &up->u_stats;
175 	p2->p_sigacts = &up->u_sigacts;
176 	up->u_sigacts = *p1->p_sigacts;
177 	bzero(&up->u_stats.pstat_startzero,
178 	    (unsigned) ((caddr_t)&up->u_stats.pstat_endzero -
179 	    (caddr_t)&up->u_stats.pstat_startzero));
180 	bcopy(&p1->p_stats->pstat_startcopy, &up->u_stats.pstat_startcopy,
181 	    ((caddr_t)&up->u_stats.pstat_endcopy -
182 	     (caddr_t)&up->u_stats.pstat_startcopy));
183 
184 #ifdef i386
185 	{ u_int addr = UPT_MIN_ADDRESS - UPAGES*NBPG; struct vm_map *vp;
186 
187 	vp = &p2->p_vmspace->vm_map;
188 	(void)vm_map_pageable(vp, addr, 0xfe000000 - addr, TRUE);
189 	(void)vm_deallocate(vp, addr, 0xfe000000 - addr);
190 	(void)vm_allocate(vp, &addr, UPT_MAX_ADDRESS - addr, FALSE);
191 	(void)vm_map_inherit(vp, addr, UPT_MAX_ADDRESS, VM_INHERIT_NONE);
192 	}
193 #endif
194 	/*
195 	 * cpu_fork will copy and update the kernel stack and pcb,
196 	 * and make the child ready to run.  It marks the child
197 	 * so that it can return differently than the parent.
198 	 * It returns twice, once in the parent process and
199 	 * once in the child.
200 	 */
201 	return (cpu_fork(p1, p2));
202 }
203 
204 /*
205  * Set default limits for VM system.
206  * Called for proc 0, and then inherited by all others.
207  */
208 vm_init_limits(p)
209 	register struct proc *p;
210 {
211 
212 	/*
213 	 * Set up the initial limits on process VM.
214 	 * Set the maximum resident set size to be all
215 	 * of (reasonably) available memory.  This causes
216 	 * any single, large process to start random page
217 	 * replacement once it fills memory.
218 	 */
219         p->p_rlimit[RLIMIT_STACK].rlim_cur = DFLSSIZ;
220         p->p_rlimit[RLIMIT_STACK].rlim_max = MAXSSIZ;
221         p->p_rlimit[RLIMIT_DATA].rlim_cur = DFLDSIZ;
222         p->p_rlimit[RLIMIT_DATA].rlim_max = MAXDSIZ;
223 	p->p_rlimit[RLIMIT_RSS].rlim_cur = p->p_rlimit[RLIMIT_RSS].rlim_max =
224 		ptoa(vm_page_free_count);
225 }
226 
227 #include "../vm/vm_pageout.h"
228 
229 #ifdef DEBUG
230 int	enableswap = 1;
231 int	swapdebug = 0;
232 #define	SDB_FOLLOW	1
233 #define SDB_SWAPIN	2
234 #define SDB_SWAPOUT	4
235 #endif
236 
237 /*
238  * Brutally simple:
239  *	1. Attempt to swapin every swaped-out, runnable process in
240  *	   order of priority.
241  *	2. If not enough memory, wake the pageout daemon and let it
242  *	   clear some space.
243  */
244 sched()
245 {
246 	register struct proc *p;
247 	register int pri;
248 	struct proc *pp;
249 	int ppri;
250 	vm_offset_t addr;
251 	vm_size_t size;
252 
253 loop:
254 #ifdef DEBUG
255 	if (!enableswap) {
256 		pp = NULL;
257 		goto noswap;
258 	}
259 #endif
260 	pp = NULL;
261 	ppri = INT_MIN;
262 	for (p = allproc; p != NULL; p = p->p_nxt)
263 		if (p->p_stat == SRUN && (p->p_flag & SLOAD) == 0) {
264 			pri = p->p_time + p->p_slptime - p->p_nice * 8;
265 			if (pri > ppri) {
266 				pp = p;
267 				ppri = pri;
268 			}
269 		}
270 #ifdef DEBUG
271 	if (swapdebug & SDB_FOLLOW)
272 		printf("sched: running, procp %x pri %d\n", pp, ppri);
273 noswap:
274 #endif
275 	/*
276 	 * Nothing to do, back to sleep
277 	 */
278 	if ((p = pp) == NULL) {
279 		sleep((caddr_t)&proc0, PVM);
280 		goto loop;
281 	}
282 
283 	/*
284 	 * We would like to bring someone in.
285 	 * This part is really bogus cuz we could deadlock on memory
286 	 * despite our feeble check.
287 	 */
288 	size = round_page(ctob(UPAGES));
289 	addr = (vm_offset_t) p->p_addr;
290 	if (vm_page_free_count > atop(size)) {
291 #ifdef DEBUG
292 		if (swapdebug & SDB_SWAPIN)
293 			printf("swapin: pid %d(%s)@%x, pri %d free %d\n",
294 			       p->p_pid, p->p_comm, p->p_addr,
295 			       ppri, vm_page_free_count);
296 #endif
297 		vm_map_pageable(kernel_map, addr, addr+size, FALSE);
298 		(void) splclock();
299 		if (p->p_stat == SRUN)
300 			setrq(p);
301 		p->p_flag |= SLOAD;
302 		(void) spl0();
303 		p->p_time = 0;
304 		goto loop;
305 	}
306 	/*
307 	 * Not enough memory, jab the pageout daemon and wait til the
308 	 * coast is clear.
309 	 */
310 #ifdef DEBUG
311 	if (swapdebug & SDB_FOLLOW)
312 		printf("sched: no room for pid %d(%s), free %d\n",
313 		       p->p_pid, p->p_comm, vm_page_free_count);
314 #endif
315 	(void) splhigh();
316 	VM_WAIT;
317 	(void) spl0();
318 #ifdef DEBUG
319 	if (swapdebug & SDB_FOLLOW)
320 		printf("sched: room again, free %d\n", vm_page_free_count);
321 #endif
322 	goto loop;
323 }
324 
325 #define	swappable(p) \
326 	(((p)->p_flag & (SSYS|SLOAD|SKEEP|SWEXIT|SPHYSIO)) == SLOAD)
327 
328 /*
329  * Swapout is driven by the pageout daemon.  Very simple, we find eligible
330  * procs and unwire their u-areas.  We try to always "swap" at least one
331  * process in case we need the room for a swapin.
332  * If any procs have been sleeping/stopped for at least maxslp seconds,
333  * they are swapped.  Else, we swap the longest-sleeping or stopped process,
334  * if any, otherwise the longest-resident process.
335  */
336 swapout_threads()
337 {
338 	register struct proc *p;
339 	struct proc *outp, *outp2;
340 	int outpri, outpri2;
341 	int didswap = 0;
342 	extern int maxslp;
343 
344 #ifdef DEBUG
345 	if (!enableswap)
346 		return;
347 #endif
348 	outp = outp2 = NULL;
349 	outpri = outpri2 = 0;
350 	for (p = allproc; p != NULL; p = p->p_nxt) {
351 		if (!swappable(p))
352 			continue;
353 		switch (p->p_stat) {
354 		case SRUN:
355 			if (p->p_time > outpri2) {
356 				outp2 = p;
357 				outpri2 = p->p_time;
358 			}
359 			continue;
360 
361 		case SSLEEP:
362 		case SSTOP:
363 			if (p->p_slptime > maxslp) {
364 				swapout(p);
365 				didswap++;
366 			} else if (p->p_slptime > outpri) {
367 				outp = p;
368 				outpri = p->p_slptime;
369 			}
370 			continue;
371 		}
372 	}
373 	/*
374 	 * If we didn't get rid of any real duds, toss out the next most
375 	 * likely sleeping/stopped or running candidate.  We only do this
376 	 * if we are real low on memory since we don't gain much by doing
377 	 * it (UPAGES pages).
378 	 */
379 	if (didswap == 0 &&
380 	    vm_page_free_count <= atop(round_page(ctob(UPAGES)))) {
381 		if ((p = outp) == 0)
382 			p = outp2;
383 #ifdef DEBUG
384 		if (swapdebug & SDB_SWAPOUT)
385 			printf("swapout_threads: no duds, try procp %x\n", p);
386 #endif
387 		if (p)
388 			swapout(p);
389 	}
390 }
391 
392 swapout(p)
393 	register struct proc *p;
394 {
395 	vm_offset_t addr;
396 	vm_size_t size;
397 
398 #ifdef DEBUG
399 	if (swapdebug & SDB_SWAPOUT)
400 		printf("swapout: pid %d(%s)@%x, stat %x pri %d free %d\n",
401 		       p->p_pid, p->p_comm, p->p_addr, p->p_stat,
402 		       p->p_slptime, vm_page_free_count);
403 #endif
404 	size = round_page(ctob(UPAGES));
405 	addr = (vm_offset_t) p->p_addr;
406 #ifdef hp300
407 	/*
408 	 * Ugh!  u-area is double mapped to a fixed address behind the
409 	 * back of the VM system and accesses are usually through that
410 	 * address rather than the per-process address.  Hence reference
411 	 * and modify information are recorded at the fixed address and
412 	 * lost at context switch time.  We assume the u-struct and
413 	 * kernel stack are always accessed/modified and force it to be so.
414 	 */
415 	{
416 		register int i;
417 		volatile long tmp;
418 
419 		for (i = 0; i < UPAGES; i++) {
420 			tmp = *(long *)addr; *(long *)addr = tmp;
421 			addr += NBPG;
422 		}
423 		addr = (vm_offset_t) p->p_addr;
424 	}
425 #endif
426 	vm_map_pageable(kernel_map, addr, addr+size, TRUE);
427 	pmap_collect(vm_map_pmap(&p->p_vmspace->vm_map));
428 	(void) splhigh();
429 	p->p_flag &= ~SLOAD;
430 	if (p->p_stat == SRUN)
431 		remrq(p);
432 	(void) spl0();
433 	p->p_time = 0;
434 }
435 
436 /*
437  * The rest of these routines fake thread handling
438  */
439 
440 void
441 assert_wait(event, ruptible)
442 	int event;
443 	boolean_t ruptible;
444 {
445 #ifdef lint
446 	ruptible++;
447 #endif
448 	curproc->p_thread = event;
449 }
450 
451 void
452 thread_block()
453 {
454 	int s = splhigh();
455 
456 	if (curproc->p_thread)
457 		sleep((caddr_t)curproc->p_thread, PVM);
458 	splx(s);
459 }
460 
461 thread_sleep(event, lock, ruptible)
462 	int event;
463 	simple_lock_t lock;
464 	boolean_t ruptible;
465 {
466 #ifdef lint
467 	ruptible++;
468 #endif
469 	int s = splhigh();
470 
471 	curproc->p_thread = event;
472 	simple_unlock(lock);
473 	if (curproc->p_thread)
474 		sleep((caddr_t)event, PVM);
475 	splx(s);
476 }
477 
478 thread_wakeup(event)
479 	int event;
480 {
481 	int s = splhigh();
482 
483 	wakeup((caddr_t)event);
484 	splx(s);
485 }
486 
487 /*
488  * DEBUG stuff
489  */
490 
491 int indent = 0;
492 
493 /*ARGSUSED2*/
494 iprintf(a, b, c, d, e, f, g, h)
495 	char *a;
496 {
497 	register int i;
498 
499 	i = indent;
500 	while (i >= 8) {
501 		printf("\t");
502 		i -= 8;
503 	}
504 	for (; i > 0; --i)
505 		printf(" ");
506 	printf(a, b, c, d, e, f, g, h);
507 }
508