xref: /original-bsd/sys/vm/vm_glue.c (revision 860e07fc)
1 /*
2  * Copyright (c) 1991 Regents of the University of California.
3  * All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * The Mach Operating System project at Carnegie-Mellon University.
7  *
8  * %sccs.include.redist.c%
9  *
10  *	@(#)vm_glue.c	7.17 (Berkeley) 07/08/92
11  *
12  *
13  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
14  * All rights reserved.
15  *
16  * Permission to use, copy, modify and distribute this software and
17  * its documentation is hereby granted, provided that both the copyright
18  * notice and this permission notice appear in all copies of the
19  * software, derivative works or modified versions, and any portions
20  * thereof, and that both notices appear in supporting documentation.
21  *
22  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
23  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
24  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
25  *
26  * Carnegie Mellon requests users of this software to return to
27  *
28  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
29  *  School of Computer Science
30  *  Carnegie Mellon University
31  *  Pittsburgh PA 15213-3890
32  *
33  * any improvements or extensions that they make and grant Carnegie the
34  * rights to redistribute these changes.
35  */
36 
37 #include <sys/param.h>
38 #include <sys/systm.h>
39 #include <sys/proc.h>
40 #include <sys/resourcevar.h>
41 #include <sys/buf.h>
42 #include <sys/user.h>
43 
44 #include <vm/vm.h>
45 #include <vm/vm_page.h>
46 #include <vm/vm_kern.h>
47 
48 int	avefree = 0;		/* XXX */
49 unsigned maxdmap = MAXDSIZ;	/* XXX */
50 int	readbuffers = 0;	/* XXX allow kgdb to read kernel buffer pool */
51 
52 int
53 kernacc(addr, len, rw)
54 	caddr_t addr;
55 	int len, rw;
56 {
57 	boolean_t rv;
58 	vm_offset_t saddr, eaddr;
59 	vm_prot_t prot = rw == B_READ ? VM_PROT_READ : VM_PROT_WRITE;
60 
61 	saddr = trunc_page(addr);
62 	eaddr = round_page(addr+len-1);
63 	rv = vm_map_check_protection(kernel_map, saddr, eaddr, prot);
64 	/*
65 	 * XXX there are still some things (e.g. the buffer cache) that
66 	 * are managed behind the VM system's back so even though an
67 	 * address is accessible in the mind of the VM system, there may
68 	 * not be physical pages where the VM thinks there is.  This can
69 	 * lead to bogus allocation of pages in the kernel address space
70 	 * or worse, inconsistencies at the pmap level.  We only worry
71 	 * about the buffer cache for now.
72 	 */
73 	if (!readbuffers && rv && (eaddr > (vm_offset_t)buffers &&
74 		   saddr < (vm_offset_t)buffers + MAXBSIZE * nbuf))
75 		rv = FALSE;
76 	return(rv == TRUE);
77 }
78 
79 int
80 useracc(addr, len, rw)
81 	caddr_t addr;
82 	int len, rw;
83 {
84 	boolean_t rv;
85 	vm_prot_t prot = rw == B_READ ? VM_PROT_READ : VM_PROT_WRITE;
86 
87 	rv = vm_map_check_protection(&curproc->p_vmspace->vm_map,
88 	    trunc_page(addr), round_page(addr+len-1), prot);
89 	return(rv == TRUE);
90 }
91 
92 #ifdef KGDB
93 /*
94  * Change protections on kernel pages from addr to addr+len
95  * (presumably so debugger can plant a breakpoint).
96  * All addresses are assumed to reside in the Sysmap,
97  */
98 void
99 chgkprot(addr, len, rw)
100 	register caddr_t addr;
101 	int len, rw;
102 {
103 	vm_prot_t prot = rw == B_READ ? VM_PROT_READ : VM_PROT_WRITE;
104 
105 	vm_map_protect(kernel_map, trunc_page(addr),
106 		       round_page(addr+len-1), prot, FALSE);
107 }
108 #endif
109 
110 void
111 vslock(addr, len)
112 	caddr_t	addr;
113 	u_int	len;
114 {
115 	vm_map_pageable(&curproc->p_vmspace->vm_map, trunc_page(addr),
116 			round_page(addr+len-1), FALSE);
117 }
118 
119 void
120 vsunlock(addr, len, dirtied)
121 	caddr_t	addr;
122 	u_int	len;
123 	int dirtied;
124 {
125 #ifdef	lint
126 	dirtied++;
127 #endif	lint
128 	vm_map_pageable(&curproc->p_vmspace->vm_map, trunc_page(addr),
129 			round_page(addr+len-1), TRUE);
130 }
131 
132 /*
133  * Implement fork's actions on an address space.
134  * Here we arrange for the address space to be copied or referenced,
135  * allocate a user struct (pcb and kernel stack), then call the
136  * machine-dependent layer to fill those in and make the new process
137  * ready to run.
138  * NOTE: the kernel stack may be at a different location in the child
139  * process, and thus addresses of automatic variables may be invalid
140  * after cpu_fork returns in the child process.  We do nothing here
141  * after cpu_fork returns.
142  */
143 int
144 vm_fork(p1, p2, isvfork)
145 	register struct proc *p1, *p2;
146 	int isvfork;
147 {
148 	register struct user *up;
149 	vm_offset_t addr;
150 
151 #ifdef i386
152 	/*
153 	 * avoid copying any of the parent's pagetables or other per-process
154 	 * objects that reside in the map by marking all of them non-inheritable
155 	 */
156 	(void)vm_map_inherit(&p1->p_vmspace->vm_map,
157 		UPT_MIN_ADDRESS-UPAGES*NBPG, VM_MAX_ADDRESS, VM_INHERIT_NONE);
158 #endif
159 	p2->p_vmspace = vmspace_fork(p1->p_vmspace);
160 
161 #ifdef SYSVSHM
162 	if (p1->p_vmspace->vm_shm)
163 		shmfork(p1, p2, isvfork);
164 #endif
165 
166 #ifndef	i386
167 	/*
168 	 * Allocate a wired-down (for now) pcb and kernel stack for the process
169 	 */
170 	addr = kmem_alloc_pageable(kernel_map, ctob(UPAGES));
171 	vm_map_pageable(kernel_map, addr, addr + ctob(UPAGES), FALSE);
172 #else
173 /* XXX somehow, on 386, ocassionally pageout removes active, wired down kstack,
174 and pagetables, WITHOUT going thru vm_page_unwire! Why this appears to work is
175 not yet clear, yet it does... */
176 	addr = kmem_alloc(kernel_map, ctob(UPAGES));
177 #endif
178 	up = (struct user *)addr;
179 	p2->p_addr = up;
180 
181 	/*
182 	 * p_stats and p_sigacts currently point at fields
183 	 * in the user struct but not at &u, instead at p_addr.
184 	 * Copy p_sigacts and parts of p_stats; zero the rest
185 	 * of p_stats (statistics).
186 	 */
187 	p2->p_stats = &up->u_stats;
188 	p2->p_sigacts = &up->u_sigacts;
189 	up->u_sigacts = *p1->p_sigacts;
190 	bzero(&up->u_stats.pstat_startzero,
191 	    (unsigned) ((caddr_t)&up->u_stats.pstat_endzero -
192 	    (caddr_t)&up->u_stats.pstat_startzero));
193 	bcopy(&p1->p_stats->pstat_startcopy, &up->u_stats.pstat_startcopy,
194 	    ((caddr_t)&up->u_stats.pstat_endcopy -
195 	     (caddr_t)&up->u_stats.pstat_startcopy));
196 
197 #ifdef i386
198 	{ u_int addr = UPT_MIN_ADDRESS - UPAGES*NBPG; struct vm_map *vp;
199 
200 	vp = &p2->p_vmspace->vm_map;
201 	(void)vm_deallocate(vp, addr, UPT_MAX_ADDRESS - addr);
202 	(void)vm_allocate(vp, &addr, UPT_MAX_ADDRESS - addr, FALSE);
203 	(void)vm_map_inherit(vp, addr, UPT_MAX_ADDRESS, VM_INHERIT_NONE);
204 	}
205 #endif
206 	/*
207 	 * cpu_fork will copy and update the kernel stack and pcb,
208 	 * and make the child ready to run.  It marks the child
209 	 * so that it can return differently than the parent.
210 	 * It returns twice, once in the parent process and
211 	 * once in the child.
212 	 */
213 	return (cpu_fork(p1, p2));
214 }
215 
216 /*
217  * Set default limits for VM system.
218  * Called for proc 0, and then inherited by all others.
219  */
220 void
221 vm_init_limits(p)
222 	register struct proc *p;
223 {
224 
225 	/*
226 	 * Set up the initial limits on process VM.
227 	 * Set the maximum resident set size to be all
228 	 * of (reasonably) available memory.  This causes
229 	 * any single, large process to start random page
230 	 * replacement once it fills memory.
231 	 */
232         p->p_rlimit[RLIMIT_STACK].rlim_cur = DFLSSIZ;
233         p->p_rlimit[RLIMIT_STACK].rlim_max = MAXSSIZ;
234         p->p_rlimit[RLIMIT_DATA].rlim_cur = DFLDSIZ;
235         p->p_rlimit[RLIMIT_DATA].rlim_max = MAXDSIZ;
236 	p->p_rlimit[RLIMIT_RSS].rlim_cur = p->p_rlimit[RLIMIT_RSS].rlim_max =
237 		ptoa(cnt.v_free_count);
238 }
239 
240 #include "../vm/vm_pageout.h"
241 
242 #ifdef DEBUG
243 int	enableswap = 1;
244 int	swapdebug = 0;
245 #define	SDB_FOLLOW	1
246 #define SDB_SWAPIN	2
247 #define SDB_SWAPOUT	4
248 #endif
249 
250 /*
251  * Brutally simple:
252  *	1. Attempt to swapin every swaped-out, runnable process in
253  *	   order of priority.
254  *	2. If not enough memory, wake the pageout daemon and let it
255  *	   clear some space.
256  */
257 void
258 sched()
259 {
260 	register struct proc *p;
261 	register int pri;
262 	struct proc *pp;
263 	int ppri;
264 	vm_offset_t addr;
265 	vm_size_t size;
266 
267 loop:
268 #ifdef DEBUG
269 	while (!enableswap)
270 		sleep((caddr_t)&proc0, PVM);
271 #endif
272 	pp = NULL;
273 	ppri = INT_MIN;
274 	for (p = (struct proc *)allproc; p != NULL; p = p->p_nxt) {
275 		if (p->p_stat == SRUN && (p->p_flag & SLOAD) == 0) {
276 			pri = p->p_time + p->p_slptime - p->p_nice * 8;
277 			if (pri > ppri) {
278 				pp = p;
279 				ppri = pri;
280 			}
281 		}
282 	}
283 #ifdef DEBUG
284 	if (swapdebug & SDB_FOLLOW)
285 		printf("sched: running, procp %x pri %d\n", pp, ppri);
286 #endif
287 	/*
288 	 * Nothing to do, back to sleep
289 	 */
290 	if ((p = pp) == NULL) {
291 		sleep((caddr_t)&proc0, PVM);
292 		goto loop;
293 	}
294 
295 	/*
296 	 * We would like to bring someone in.
297 	 * This part is really bogus cuz we could deadlock on memory
298 	 * despite our feeble check.
299 	 */
300 	size = round_page(ctob(UPAGES));
301 	addr = (vm_offset_t) p->p_addr;
302 	if (cnt.v_free_count > atop(size)) {
303 #ifdef DEBUG
304 		if (swapdebug & SDB_SWAPIN)
305 			printf("swapin: pid %d(%s)@%x, pri %d free %d\n",
306 			       p->p_pid, p->p_comm, p->p_addr,
307 			       ppri, cnt.v_free_count);
308 #endif
309 		vm_map_pageable(kernel_map, addr, addr+size, FALSE);
310 		(void) splstatclock();
311 		if (p->p_stat == SRUN)
312 			setrq(p);
313 		p->p_flag |= SLOAD;
314 		(void) spl0();
315 		p->p_time = 0;
316 		goto loop;
317 	}
318 	/*
319 	 * Not enough memory, jab the pageout daemon and wait til the
320 	 * coast is clear.
321 	 */
322 #ifdef DEBUG
323 	if (swapdebug & SDB_FOLLOW)
324 		printf("sched: no room for pid %d(%s), free %d\n",
325 		       p->p_pid, p->p_comm, cnt.v_free_count);
326 #endif
327 	(void) splhigh();
328 	VM_WAIT;
329 	(void) spl0();
330 #ifdef DEBUG
331 	if (swapdebug & SDB_FOLLOW)
332 		printf("sched: room again, free %d\n", cnt.v_free_count);
333 #endif
334 	goto loop;
335 }
336 
337 #define	swappable(p) \
338 	(((p)->p_flag & (SSYS|SLOAD|SKEEP|SWEXIT|SPHYSIO)) == SLOAD)
339 
340 /*
341  * Swapout is driven by the pageout daemon.  Very simple, we find eligible
342  * procs and unwire their u-areas.  We try to always "swap" at least one
343  * process in case we need the room for a swapin.
344  * If any procs have been sleeping/stopped for at least maxslp seconds,
345  * they are swapped.  Else, we swap the longest-sleeping or stopped process,
346  * if any, otherwise the longest-resident process.
347  */
348 void
349 swapout_threads()
350 {
351 	register struct proc *p;
352 	struct proc *outp, *outp2;
353 	int outpri, outpri2;
354 	int didswap = 0;
355 	extern int maxslp;
356 
357 #ifdef DEBUG
358 	if (!enableswap)
359 		return;
360 #endif
361 	outp = outp2 = NULL;
362 	outpri = outpri2 = 0;
363 	for (p = (struct proc *)allproc; p != NULL; p = p->p_nxt) {
364 		if (!swappable(p))
365 			continue;
366 		switch (p->p_stat) {
367 		case SRUN:
368 			if (p->p_time > outpri2) {
369 				outp2 = p;
370 				outpri2 = p->p_time;
371 			}
372 			continue;
373 
374 		case SSLEEP:
375 		case SSTOP:
376 			if (p->p_slptime > maxslp) {
377 				swapout(p);
378 				didswap++;
379 			} else if (p->p_slptime > outpri) {
380 				outp = p;
381 				outpri = p->p_slptime;
382 			}
383 			continue;
384 		}
385 	}
386 	/*
387 	 * If we didn't get rid of any real duds, toss out the next most
388 	 * likely sleeping/stopped or running candidate.  We only do this
389 	 * if we are real low on memory since we don't gain much by doing
390 	 * it (UPAGES pages).
391 	 */
392 	if (didswap == 0 &&
393 	    cnt.v_free_count <= atop(round_page(ctob(UPAGES)))) {
394 		if ((p = outp) == 0)
395 			p = outp2;
396 #ifdef DEBUG
397 		if (swapdebug & SDB_SWAPOUT)
398 			printf("swapout_threads: no duds, try procp %x\n", p);
399 #endif
400 		if (p)
401 			swapout(p);
402 	}
403 }
404 
405 void
406 swapout(p)
407 	register struct proc *p;
408 {
409 	vm_offset_t addr;
410 	vm_size_t size;
411 
412 #ifdef DEBUG
413 	if (swapdebug & SDB_SWAPOUT)
414 		printf("swapout: pid %d(%s)@%x, stat %x pri %d free %d\n",
415 		       p->p_pid, p->p_comm, p->p_addr, p->p_stat,
416 		       p->p_slptime, cnt.v_free_count);
417 #endif
418 	size = round_page(ctob(UPAGES));
419 	addr = (vm_offset_t) p->p_addr;
420 #if defined(hp300) || defined(luna68k)
421 	/*
422 	 * Ugh!  u-area is double mapped to a fixed address behind the
423 	 * back of the VM system and accesses are usually through that
424 	 * address rather than the per-process address.  Hence reference
425 	 * and modify information are recorded at the fixed address and
426 	 * lost at context switch time.  We assume the u-struct and
427 	 * kernel stack are always accessed/modified and force it to be so.
428 	 */
429 	{
430 		register int i;
431 		volatile long tmp;
432 
433 		for (i = 0; i < UPAGES; i++) {
434 			tmp = *(long *)addr; *(long *)addr = tmp;
435 			addr += NBPG;
436 		}
437 		addr = (vm_offset_t) p->p_addr;
438 	}
439 #endif
440 #ifdef mips
441 	/*
442 	 * Be sure to save the floating point coprocessor state before
443 	 * paging out the u-struct.
444 	 */
445 	{
446 		extern struct proc *machFPCurProcPtr;
447 
448 		if (p == machFPCurProcPtr) {
449 			MachSaveCurFPState(p);
450 			machFPCurProcPtr = (struct proc *)0;
451 		}
452 	}
453 #endif
454 #ifndef	i386 /* temporary measure till we find spontaineous unwire of kstack */
455 	vm_map_pageable(kernel_map, addr, addr+size, TRUE);
456 	pmap_collect(vm_map_pmap(&p->p_vmspace->vm_map));
457 #endif
458 	(void) splhigh();
459 	p->p_flag &= ~SLOAD;
460 	if (p->p_stat == SRUN)
461 		remrq(p);
462 	(void) spl0();
463 	p->p_time = 0;
464 }
465 
466 /*
467  * The rest of these routines fake thread handling
468  */
469 
470 void
471 assert_wait(event, ruptible)
472 	int event;
473 	boolean_t ruptible;
474 {
475 #ifdef lint
476 	ruptible++;
477 #endif
478 	curproc->p_thread = event;
479 }
480 
481 void
482 thread_block()
483 {
484 	int s = splhigh();
485 
486 	if (curproc->p_thread)
487 		sleep((caddr_t)curproc->p_thread, PVM);
488 	splx(s);
489 }
490 
491 void
492 thread_sleep(event, lock, ruptible)
493 	int event;
494 	simple_lock_t lock;
495 	boolean_t ruptible;
496 {
497 #ifdef lint
498 	ruptible++;
499 #endif
500 	int s = splhigh();
501 
502 	curproc->p_thread = event;
503 	simple_unlock(lock);
504 	if (curproc->p_thread)
505 		sleep((caddr_t)event, PVM);
506 	splx(s);
507 }
508 
509 void
510 thread_wakeup(event)
511 	int event;
512 {
513 	int s = splhigh();
514 
515 	wakeup((caddr_t)event);
516 	splx(s);
517 }
518 
519 /*
520  * DEBUG stuff
521  */
522 
523 int indent = 0;
524 
525 #include <machine/stdarg.h>		/* see subr_prf.c */
526 
527 /*ARGSUSED2*/
528 void
529 #if __STDC__
530 iprintf(const char *fmt, ...)
531 #else
532 iprintf(fmt /* , va_alist */)
533 	char *fmt;
534 	/* va_dcl */
535 #endif
536 {
537 	register int i;
538 	va_list ap;
539 
540 	for (i = indent; i >= 8; i -= 8)
541 		printf("\t");
542 	while (--i >= 0)
543 		printf(" ");
544 	va_start(ap, fmt);
545 	printf("%r", fmt, ap);
546 	va_end(ap);
547 }
548