xref: /original-bsd/sys/vm/vm_glue.c (revision 609cdd5c)
1 /*
2  * Copyright (c) 1991 Regents of the University of California.
3  * All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * The Mach Operating System project at Carnegie-Mellon University.
7  *
8  * %sccs.include.redist.c%
9  *
10  *	@(#)vm_glue.c	7.21 (Berkeley) 04/28/93
11  *
12  *
13  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
14  * All rights reserved.
15  *
16  * Permission to use, copy, modify and distribute this software and
17  * its documentation is hereby granted, provided that both the copyright
18  * notice and this permission notice appear in all copies of the
19  * software, derivative works or modified versions, and any portions
20  * thereof, and that both notices appear in supporting documentation.
21  *
22  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
23  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
24  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
25  *
26  * Carnegie Mellon requests users of this software to return to
27  *
28  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
29  *  School of Computer Science
30  *  Carnegie Mellon University
31  *  Pittsburgh PA 15213-3890
32  *
33  * any improvements or extensions that they make and grant Carnegie the
34  * rights to redistribute these changes.
35  */
36 
37 #include <sys/param.h>
38 #include <sys/systm.h>
39 #include <sys/proc.h>
40 #include <sys/resourcevar.h>
41 #include <sys/buf.h>
42 #include <sys/user.h>
43 
44 #include <vm/vm.h>
45 #include <vm/vm_page.h>
46 #include <vm/vm_kern.h>
47 
48 int	avefree = 0;		/* XXX */
49 unsigned maxdmap = MAXDSIZ;	/* XXX */
50 int	readbuffers = 0;	/* XXX allow kgdb to read kernel buffer pool */
51 
52 int
53 kernacc(addr, len, rw)
54 	caddr_t addr;
55 	int len, rw;
56 {
57 	boolean_t rv;
58 	vm_offset_t saddr, eaddr;
59 	vm_prot_t prot = rw == B_READ ? VM_PROT_READ : VM_PROT_WRITE;
60 
61 	saddr = trunc_page(addr);
62 	eaddr = round_page(addr+len-1);
63 	rv = vm_map_check_protection(kernel_map, saddr, eaddr, prot);
64 	/*
65 	 * XXX there are still some things (e.g. the buffer cache) that
66 	 * are managed behind the VM system's back so even though an
67 	 * address is accessible in the mind of the VM system, there may
68 	 * not be physical pages where the VM thinks there is.  This can
69 	 * lead to bogus allocation of pages in the kernel address space
70 	 * or worse, inconsistencies at the pmap level.  We only worry
71 	 * about the buffer cache for now.
72 	 */
73 	if (!readbuffers && rv && (eaddr > (vm_offset_t)buffers &&
74 		   saddr < (vm_offset_t)buffers + MAXBSIZE * nbuf))
75 		rv = FALSE;
76 	return(rv == TRUE);
77 }
78 
79 int
80 useracc(addr, len, rw)
81 	caddr_t addr;
82 	int len, rw;
83 {
84 	boolean_t rv;
85 	vm_prot_t prot = rw == B_READ ? VM_PROT_READ : VM_PROT_WRITE;
86 
87 	rv = vm_map_check_protection(&curproc->p_vmspace->vm_map,
88 	    trunc_page(addr), round_page(addr+len-1), prot);
89 	return(rv == TRUE);
90 }
91 
92 #ifdef KGDB
93 /*
94  * Change protections on kernel pages from addr to addr+len
95  * (presumably so debugger can plant a breakpoint).
96  *
97  * We force the protection change at the pmap level.  If we were
98  * to use vm_map_protect a change to allow writing would be lazily-
99  * applied meaning we would still take a protection fault, something
100  * we really don't want to do.  It would also fragment the kernel
101  * map unnecessarily.  We cannot use pmap_protect since it also won't
102  * enforce a write-enable request.  Using pmap_enter is the only way
103  * we can ensure the change takes place properly.
104  */
105 void
106 chgkprot(addr, len, rw)
107 	register caddr_t addr;
108 	int len, rw;
109 {
110 	vm_prot_t prot;
111 	vm_offset_t pa, sva, eva;
112 
113 	prot = rw == B_READ ? VM_PROT_READ : VM_PROT_READ|VM_PROT_WRITE;
114 	eva = round_page(addr + len - 1);
115 	for (sva = trunc_page(addr); sva < eva; sva += PAGE_SIZE) {
116 		/*
117 		 * Extract physical address for the page.
118 		 * We use a cheezy hack to differentiate physical
119 		 * page 0 from an invalid mapping, not that it
120 		 * really matters...
121 		 */
122 		pa = pmap_extract(kernel_pmap, sva|1);
123 		if (pa == 0)
124 			panic("chgkprot: invalid page");
125 		pmap_enter(kernel_pmap, sva, pa&~1, prot, TRUE);
126 	}
127 }
128 #endif
129 
130 void
131 vslock(addr, len)
132 	caddr_t	addr;
133 	u_int	len;
134 {
135 	vm_map_pageable(&curproc->p_vmspace->vm_map, trunc_page(addr),
136 			round_page(addr+len-1), FALSE);
137 }
138 
139 void
140 vsunlock(addr, len, dirtied)
141 	caddr_t	addr;
142 	u_int	len;
143 	int dirtied;
144 {
145 #ifdef	lint
146 	dirtied++;
147 #endif	lint
148 	vm_map_pageable(&curproc->p_vmspace->vm_map, trunc_page(addr),
149 			round_page(addr+len-1), TRUE);
150 }
151 
152 /*
153  * Implement fork's actions on an address space.
154  * Here we arrange for the address space to be copied or referenced,
155  * allocate a user struct (pcb and kernel stack), then call the
156  * machine-dependent layer to fill those in and make the new process
157  * ready to run.
158  * NOTE: the kernel stack may be at a different location in the child
159  * process, and thus addresses of automatic variables may be invalid
160  * after cpu_fork returns in the child process.  We do nothing here
161  * after cpu_fork returns.
162  */
163 int
164 vm_fork(p1, p2, isvfork)
165 	register struct proc *p1, *p2;
166 	int isvfork;
167 {
168 	register struct user *up;
169 	vm_offset_t addr;
170 
171 #ifdef i386
172 	/*
173 	 * avoid copying any of the parent's pagetables or other per-process
174 	 * objects that reside in the map by marking all of them non-inheritable
175 	 */
176 	(void)vm_map_inherit(&p1->p_vmspace->vm_map,
177 		UPT_MIN_ADDRESS-UPAGES*NBPG, VM_MAX_ADDRESS, VM_INHERIT_NONE);
178 #endif
179 	p2->p_vmspace = vmspace_fork(p1->p_vmspace);
180 
181 #ifdef SYSVSHM
182 	if (p1->p_vmspace->vm_shm)
183 		shmfork(p1, p2, isvfork);
184 #endif
185 
186 #ifndef	i386
187 	/*
188 	 * Allocate a wired-down (for now) pcb and kernel stack for the process
189 	 */
190 	addr = kmem_alloc_pageable(kernel_map, ctob(UPAGES));
191 	vm_map_pageable(kernel_map, addr, addr + ctob(UPAGES), FALSE);
192 #else
193 /* XXX somehow, on 386, ocassionally pageout removes active, wired down kstack,
194 and pagetables, WITHOUT going thru vm_page_unwire! Why this appears to work is
195 not yet clear, yet it does... */
196 	addr = kmem_alloc(kernel_map, ctob(UPAGES));
197 #endif
198 	up = (struct user *)addr;
199 	p2->p_addr = up;
200 
201 	/*
202 	 * p_stats and p_sigacts currently point at fields
203 	 * in the user struct but not at &u, instead at p_addr.
204 	 * Copy p_sigacts and parts of p_stats; zero the rest
205 	 * of p_stats (statistics).
206 	 */
207 	p2->p_stats = &up->u_stats;
208 	p2->p_sigacts = &up->u_sigacts;
209 	up->u_sigacts = *p1->p_sigacts;
210 	bzero(&up->u_stats.pstat_startzero,
211 	    (unsigned) ((caddr_t)&up->u_stats.pstat_endzero -
212 	    (caddr_t)&up->u_stats.pstat_startzero));
213 	bcopy(&p1->p_stats->pstat_startcopy, &up->u_stats.pstat_startcopy,
214 	    ((caddr_t)&up->u_stats.pstat_endcopy -
215 	     (caddr_t)&up->u_stats.pstat_startcopy));
216 
217 #ifdef i386
218 	{ u_int addr = UPT_MIN_ADDRESS - UPAGES*NBPG; struct vm_map *vp;
219 
220 	vp = &p2->p_vmspace->vm_map;
221 	(void)vm_deallocate(vp, addr, UPT_MAX_ADDRESS - addr);
222 	(void)vm_allocate(vp, &addr, UPT_MAX_ADDRESS - addr, FALSE);
223 	(void)vm_map_inherit(vp, addr, UPT_MAX_ADDRESS, VM_INHERIT_NONE);
224 	}
225 #endif
226 	/*
227 	 * cpu_fork will copy and update the kernel stack and pcb,
228 	 * and make the child ready to run.  It marks the child
229 	 * so that it can return differently than the parent.
230 	 * It returns twice, once in the parent process and
231 	 * once in the child.
232 	 */
233 	return (cpu_fork(p1, p2));
234 }
235 
236 /*
237  * Set default limits for VM system.
238  * Called for proc 0, and then inherited by all others.
239  */
240 void
241 vm_init_limits(p)
242 	register struct proc *p;
243 {
244 
245 	/*
246 	 * Set up the initial limits on process VM.
247 	 * Set the maximum resident set size to be all
248 	 * of (reasonably) available memory.  This causes
249 	 * any single, large process to start random page
250 	 * replacement once it fills memory.
251 	 */
252         p->p_rlimit[RLIMIT_STACK].rlim_cur = DFLSSIZ;
253         p->p_rlimit[RLIMIT_STACK].rlim_max = MAXSSIZ;
254         p->p_rlimit[RLIMIT_DATA].rlim_cur = DFLDSIZ;
255         p->p_rlimit[RLIMIT_DATA].rlim_max = MAXDSIZ;
256 	p->p_rlimit[RLIMIT_RSS].rlim_cur = ptoa(cnt.v_free_count);
257 }
258 
259 #include <vm/vm_pageout.h>
260 
261 #ifdef DEBUG
262 int	enableswap = 1;
263 int	swapdebug = 0;
264 #define	SDB_FOLLOW	1
265 #define SDB_SWAPIN	2
266 #define SDB_SWAPOUT	4
267 #endif
268 
269 /*
270  * Brutally simple:
271  *	1. Attempt to swapin every swaped-out, runnable process in
272  *	   order of priority.
273  *	2. If not enough memory, wake the pageout daemon and let it
274  *	   clear some space.
275  */
276 void
277 sched()
278 {
279 	register struct proc *p;
280 	register int pri;
281 	struct proc *pp;
282 	int ppri;
283 	vm_offset_t addr;
284 	vm_size_t size;
285 
286 loop:
287 #ifdef DEBUG
288 	while (!enableswap)
289 		sleep((caddr_t)&proc0, PVM);
290 #endif
291 	pp = NULL;
292 	ppri = INT_MIN;
293 	for (p = (struct proc *)allproc; p != NULL; p = p->p_nxt) {
294 		if (p->p_stat == SRUN && (p->p_flag & SLOAD) == 0) {
295 			pri = p->p_time + p->p_slptime - p->p_nice * 8;
296 			if (pri > ppri) {
297 				pp = p;
298 				ppri = pri;
299 			}
300 		}
301 	}
302 #ifdef DEBUG
303 	if (swapdebug & SDB_FOLLOW)
304 		printf("sched: running, procp %x pri %d\n", pp, ppri);
305 #endif
306 	/*
307 	 * Nothing to do, back to sleep
308 	 */
309 	if ((p = pp) == NULL) {
310 		sleep((caddr_t)&proc0, PVM);
311 		goto loop;
312 	}
313 
314 	/*
315 	 * We would like to bring someone in.
316 	 * This part is really bogus cuz we could deadlock on memory
317 	 * despite our feeble check.
318 	 */
319 	size = round_page(ctob(UPAGES));
320 	addr = (vm_offset_t) p->p_addr;
321 	if (cnt.v_free_count > atop(size)) {
322 #ifdef DEBUG
323 		if (swapdebug & SDB_SWAPIN)
324 			printf("swapin: pid %d(%s)@%x, pri %d free %d\n",
325 			       p->p_pid, p->p_comm, p->p_addr,
326 			       ppri, cnt.v_free_count);
327 #endif
328 		vm_map_pageable(kernel_map, addr, addr+size, FALSE);
329 		(void) splstatclock();
330 		if (p->p_stat == SRUN)
331 			setrq(p);
332 		p->p_flag |= SLOAD;
333 		(void) spl0();
334 		p->p_time = 0;
335 		goto loop;
336 	}
337 	/*
338 	 * Not enough memory, jab the pageout daemon and wait til the
339 	 * coast is clear.
340 	 */
341 #ifdef DEBUG
342 	if (swapdebug & SDB_FOLLOW)
343 		printf("sched: no room for pid %d(%s), free %d\n",
344 		       p->p_pid, p->p_comm, cnt.v_free_count);
345 #endif
346 	(void) splhigh();
347 	VM_WAIT;
348 	(void) spl0();
349 #ifdef DEBUG
350 	if (swapdebug & SDB_FOLLOW)
351 		printf("sched: room again, free %d\n", cnt.v_free_count);
352 #endif
353 	goto loop;
354 }
355 
356 #define	swappable(p) \
357 	(((p)->p_flag & (SSYS|SLOAD|SKEEP|SWEXIT|SPHYSIO)) == SLOAD)
358 
359 /*
360  * Swapout is driven by the pageout daemon.  Very simple, we find eligible
361  * procs and unwire their u-areas.  We try to always "swap" at least one
362  * process in case we need the room for a swapin.
363  * If any procs have been sleeping/stopped for at least maxslp seconds,
364  * they are swapped.  Else, we swap the longest-sleeping or stopped process,
365  * if any, otherwise the longest-resident process.
366  */
367 void
368 swapout_threads()
369 {
370 	register struct proc *p;
371 	struct proc *outp, *outp2;
372 	int outpri, outpri2;
373 	int didswap = 0;
374 	extern int maxslp;
375 
376 #ifdef DEBUG
377 	if (!enableswap)
378 		return;
379 #endif
380 	outp = outp2 = NULL;
381 	outpri = outpri2 = 0;
382 	for (p = (struct proc *)allproc; p != NULL; p = p->p_nxt) {
383 		if (!swappable(p))
384 			continue;
385 		switch (p->p_stat) {
386 		case SRUN:
387 			if (p->p_time > outpri2) {
388 				outp2 = p;
389 				outpri2 = p->p_time;
390 			}
391 			continue;
392 
393 		case SSLEEP:
394 		case SSTOP:
395 			if (p->p_slptime > maxslp) {
396 				swapout(p);
397 				didswap++;
398 			} else if (p->p_slptime > outpri) {
399 				outp = p;
400 				outpri = p->p_slptime;
401 			}
402 			continue;
403 		}
404 	}
405 	/*
406 	 * If we didn't get rid of any real duds, toss out the next most
407 	 * likely sleeping/stopped or running candidate.  We only do this
408 	 * if we are real low on memory since we don't gain much by doing
409 	 * it (UPAGES pages).
410 	 */
411 	if (didswap == 0 &&
412 	    cnt.v_free_count <= atop(round_page(ctob(UPAGES)))) {
413 		if ((p = outp) == 0)
414 			p = outp2;
415 #ifdef DEBUG
416 		if (swapdebug & SDB_SWAPOUT)
417 			printf("swapout_threads: no duds, try procp %x\n", p);
418 #endif
419 		if (p)
420 			swapout(p);
421 	}
422 }
423 
424 void
425 swapout(p)
426 	register struct proc *p;
427 {
428 	vm_offset_t addr;
429 	vm_size_t size;
430 
431 #ifdef DEBUG
432 	if (swapdebug & SDB_SWAPOUT)
433 		printf("swapout: pid %d(%s)@%x, stat %x pri %d free %d\n",
434 		       p->p_pid, p->p_comm, p->p_addr, p->p_stat,
435 		       p->p_slptime, cnt.v_free_count);
436 #endif
437 	size = round_page(ctob(UPAGES));
438 	addr = (vm_offset_t) p->p_addr;
439 #if defined(hp300) || defined(luna68k)
440 	/*
441 	 * Ugh!  u-area is double mapped to a fixed address behind the
442 	 * back of the VM system and accesses are usually through that
443 	 * address rather than the per-process address.  Hence reference
444 	 * and modify information are recorded at the fixed address and
445 	 * lost at context switch time.  We assume the u-struct and
446 	 * kernel stack are always accessed/modified and force it to be so.
447 	 */
448 	{
449 		register int i;
450 		volatile long tmp;
451 
452 		for (i = 0; i < UPAGES; i++) {
453 			tmp = *(long *)addr; *(long *)addr = tmp;
454 			addr += NBPG;
455 		}
456 		addr = (vm_offset_t) p->p_addr;
457 	}
458 #endif
459 #ifdef mips
460 	/*
461 	 * Be sure to save the floating point coprocessor state before
462 	 * paging out the u-struct.
463 	 */
464 	{
465 		extern struct proc *machFPCurProcPtr;
466 
467 		if (p == machFPCurProcPtr) {
468 			MachSaveCurFPState(p);
469 			machFPCurProcPtr = (struct proc *)0;
470 		}
471 	}
472 #endif
473 #ifndef	i386 /* temporary measure till we find spontaineous unwire of kstack */
474 	vm_map_pageable(kernel_map, addr, addr+size, TRUE);
475 	pmap_collect(vm_map_pmap(&p->p_vmspace->vm_map));
476 #endif
477 	(void) splhigh();
478 	p->p_flag &= ~SLOAD;
479 	if (p->p_stat == SRUN)
480 		remrq(p);
481 	(void) spl0();
482 	p->p_time = 0;
483 }
484 
485 /*
486  * The rest of these routines fake thread handling
487  */
488 
489 void
490 assert_wait(event, ruptible)
491 	int event;
492 	boolean_t ruptible;
493 {
494 #ifdef lint
495 	ruptible++;
496 #endif
497 	curproc->p_thread = event;
498 }
499 
500 void
501 thread_block()
502 {
503 	int s = splhigh();
504 
505 	if (curproc->p_thread)
506 		sleep((caddr_t)curproc->p_thread, PVM);
507 	splx(s);
508 }
509 
510 void
511 thread_sleep(event, lock, ruptible)
512 	int event;
513 	simple_lock_t lock;
514 	boolean_t ruptible;
515 {
516 #ifdef lint
517 	ruptible++;
518 #endif
519 	int s = splhigh();
520 
521 	curproc->p_thread = event;
522 	simple_unlock(lock);
523 	if (curproc->p_thread)
524 		sleep((caddr_t)event, PVM);
525 	splx(s);
526 }
527 
528 void
529 thread_wakeup(event)
530 	int event;
531 {
532 	int s = splhigh();
533 
534 	wakeup((caddr_t)event);
535 	splx(s);
536 }
537 
538 /*
539  * DEBUG stuff
540  */
541 
542 int indent = 0;
543 
544 #include <machine/stdarg.h>		/* see subr_prf.c */
545 
546 /*ARGSUSED2*/
547 void
548 #if __STDC__
549 iprintf(const char *fmt, ...)
550 #else
551 iprintf(fmt /* , va_alist */)
552 	char *fmt;
553 	/* va_dcl */
554 #endif
555 {
556 	register int i;
557 	va_list ap;
558 
559 	for (i = indent; i >= 8; i -= 8)
560 		printf("\t");
561 	while (--i >= 0)
562 		printf(" ");
563 	va_start(ap, fmt);
564 	printf("%r", fmt, ap);
565 	va_end(ap);
566 }
567