xref: /original-bsd/sys/i386/i386/vm_machdep.c (revision 333da485)
1 /*-
2  * Copyright (c) 1982, 1986, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * the Systems Programming Group of the University of Utah Computer
7  * Science Department, and William Jolitz.
8  *
9  * %sccs.include.redist.c%
10  *
11  *	@(#)vm_machdep.c	8.3 (Berkeley) 01/21/94
12  *	Utah $Hdr: vm_machdep.c 1.16.1.1 89/06/23$
13  */
14 
15 #include <sys/param.h>
16 #include <sys/systm.h>
17 #include <sys/proc.h>
18 #include <sys/malloc.h>
19 #include <sys/buf.h>
20 #include <sys/vnode.h>
21 #include <sys/user.h>
22 
23 #include <machine/cpu.h>
24 
25 #include <vm/vm.h>
26 #include <vm/vm_kern.h>
27 
28 /*
29  * Finish a fork operation, with process p2 nearly set up.
30  * Copy and update the kernel stack and pcb, making the child
31  * ready to run, and marking it so that it can return differently
32  * than the parent.  Returns 1 in the child process, 0 in the parent.
33  * We currently double-map the user area so that the stack is at the same
34  * address in each process; in the future we will probably relocate
35  * the frame pointers on the stack after copying.
36  */
37 cpu_fork(p1, p2)
38 	register struct proc *p1, *p2;
39 {
40 	register struct user *up = p2->p_addr;
41 	int foo, offset, addr, i;
42 	extern char kstack[];
43 	extern int mvesp();
44 
45 	/*
46 	 * Copy pcb and stack from proc p1 to p2.
47 	 * We do this as cheaply as possible, copying only the active
48 	 * part of the stack.  The stack and pcb need to agree;
49 	 * this is tricky, as the final pcb is constructed by savectx,
50 	 * but its frame isn't yet on the stack when the stack is copied.
51 	 * mi_switch compensates for this when the child eventually runs.
52 	 * This should be done differently, with a single call
53 	 * that copies and updates the pcb+stack,
54 	 * replacing the bcopy and savectx.
55 	 */
56 	p2->p_addr->u_pcb = p1->p_addr->u_pcb;
57 	offset = mvesp() - (int)kstack;
58 	bcopy((caddr_t)kstack + offset, (caddr_t)p2->p_addr + offset,
59 	    (unsigned) ctob(UPAGES) - offset);
60 	p2->p_md.md_regs = p1->p_md.md_regs;
61 
62 	/*
63 	 * Wire top of address space of child to it's u.
64 	 * First, fault in a page of pte's to map it.
65 	 */
66         addr = trunc_page((u_int)vtopte(kstack));
67 	(void)vm_map_pageable(&p2->p_vmspace->vm_map, addr, addr+NBPG, FALSE);
68 	for (i=0; i < UPAGES; i++)
69 		pmap_enter(&p2->p_vmspace->vm_pmap, (vm_offset_t)kstack+i*NBPG,
70 			pmap_extract(kernel_pmap, ((int)p2->p_addr)+i*NBPG),
71 			VM_PROT_READ, 1);
72 	pmap_activate(&p2->p_vmspace->vm_pmap, &up->u_pcb);
73 
74 	/*
75 	 *
76 	 * Arrange for a non-local goto when the new process
77 	 * is started, to resume here, returning nonzero from setjmp.
78 	 */
79 	if (savectx(up, 1)) {
80 		/*
81 		 * Return 1 in child.
82 		 */
83 		return (1);
84 	}
85 	return (0);
86 }
87 
88 #include "npx.h"
89 #if NNPX > 0
90 extern struct proc *npxproc;
91 #endif
92 
93 #ifdef notyet
94 /*
95  * cpu_exit is called as the last action during exit.
96  *
97  * We change to an inactive address space and a "safe" stack,
98  * passing thru an argument to the new stack. Now, safely isolated
99  * from the resources we're shedding, we release the address space
100  * and any remaining machine-dependent resources, including the
101  * memory for the user structure and kernel stack.
102  *
103  * Next, we assign a dummy context to be written over by mi_switch,
104  * calling it to send this process off to oblivion.
105  * [The nullpcb allows us to minimize cost in mi_switch() by not having
106  * a special case].
107  */
108 struct proc *switch_to_inactive();
109 cpu_exit(p)
110 	register struct proc *p;
111 {
112 	static struct pcb nullpcb;	/* pcb to overwrite on last switch */
113 
114 #if NNPX > 0
115 	/* free cporcessor (if we have it) */
116 	if( p == npxproc) npxproc =0;
117 #endif
118 
119 	/* move to inactive space and stack, passing arg accross */
120 	p = switch_to_inactive(p);
121 
122 	/* drop per-process resources */
123 	vmspace_free(p->p_vmspace);
124 	kmem_free(kernel_map, (vm_offset_t)p->p_addr, ctob(UPAGES));
125 
126 	p->p_addr = (struct user *) &nullpcb;
127 	mi_switch();
128 	/* NOTREACHED */
129 }
130 #else
131 cpu_exit(p)
132 	register struct proc *p;
133 {
134 
135 	/* free coprocessor (if we have it) */
136 #if NNPX > 0
137 	if( p == npxproc) npxproc =0;
138 #endif
139 
140 	curproc = p;
141 	mi_switch();
142 }
143 
144 cpu_wait(p) struct proc *p; {
145 
146 	/* drop per-process resources */
147 	vmspace_free(p->p_vmspace);
148 	kmem_free(kernel_map, (vm_offset_t)p->p_addr, ctob(UPAGES));
149 }
150 #endif
151 
152 /*
153  * Dump the machine specific header information at the start of a core dump.
154  */
155 cpu_coredump(p, vp, cred)
156 	struct proc *p;
157 	struct vnode *vp;
158 	struct ucred *cred;
159 {
160 
161 	return (vn_rdwr(UIO_WRITE, vp, (caddr_t) p->p_addr, ctob(UPAGES),
162 	    (off_t)0, UIO_SYSSPACE, IO_NODELOCKED|IO_UNIT, cred, (int *)NULL,
163 	    p));
164 }
165 
166 /*
167  * Set a red zone in the kernel stack after the u. area.
168  */
169 setredzone(pte, vaddr)
170 	u_short *pte;
171 	caddr_t vaddr;
172 {
173 /* eventually do this by setting up an expand-down stack segment
174    for ss0: selector, allowing stack access down to top of u.
175    this means though that protection violations need to be handled
176    thru a double fault exception that must do an integral task
177    switch to a known good context, within which a dump can be
178    taken. a sensible scheme might be to save the initial context
179    used by sched (that has physical memory mapped 1:1 at bottom)
180    and take the dump while still in mapped mode */
181 }
182 
183 /*
184  * Move pages from one kernel virtual address to another.
185  * Both addresses are assumed to reside in the Sysmap,
186  * and size must be a multiple of CLSIZE.
187  */
188 pagemove(from, to, size)
189 	register caddr_t from, to;
190 	int size;
191 {
192 	register struct pte *fpte, *tpte;
193 
194 	if (size % CLBYTES)
195 		panic("pagemove");
196 	fpte = kvtopte(from);
197 	tpte = kvtopte(to);
198 	while (size > 0) {
199 		*tpte++ = *fpte;
200 		*(int *)fpte++ = 0;
201 		from += NBPG;
202 		to += NBPG;
203 		size -= NBPG;
204 	}
205 	tlbflush();
206 }
207 
208 /*
209  * Convert kernel VA to physical address
210  */
211 kvtop(addr)
212 	register caddr_t addr;
213 {
214 	vm_offset_t va;
215 
216 	va = pmap_extract(kernel_pmap, (vm_offset_t)addr);
217 	if (va == 0)
218 		panic("kvtop: zero page frame");
219 	return((int)va);
220 }
221 
222 #ifdef notdef
223 /*
224  * The probe[rw] routines should probably be redone in assembler
225  * for efficiency.
226  */
227 prober(addr)
228 	register u_int addr;
229 {
230 	register int page;
231 	register struct proc *p;
232 
233 	if (addr >= USRSTACK)
234 		return(0);
235 	p = u.u_procp;
236 	page = btop(addr);
237 	if (page < dptov(p, p->p_dsize) || page > sptov(p, p->p_ssize))
238 		return(1);
239 	return(0);
240 }
241 
242 probew(addr)
243 	register u_int addr;
244 {
245 	register int page;
246 	register struct proc *p;
247 
248 	if (addr >= USRSTACK)
249 		return(0);
250 	p = u.u_procp;
251 	page = btop(addr);
252 	if (page < dptov(p, p->p_dsize) || page > sptov(p, p->p_ssize))
253 		return((*(int *)vtopte(p, page) & PG_PROT) == PG_UW);
254 	return(0);
255 }
256 
257 /*
258  * NB: assumes a physically contiguous kernel page table
259  *     (makes life a LOT simpler).
260  */
261 kernacc(addr, count, rw)
262 	register u_int addr;
263 	int count, rw;
264 {
265 	register struct pde *pde;
266 	register struct pte *pte;
267 	register int ix, cnt;
268 	extern long Syssize;
269 
270 	if (count <= 0)
271 		return(0);
272 	pde = (struct pde *)((u_int)u.u_procp->p_p0br + u.u_procp->p_szpt * NBPG);
273 	ix = (addr & PD_MASK) >> PD_SHIFT;
274 	cnt = ((addr + count + (1 << PD_SHIFT) - 1) & PD_MASK) >> PD_SHIFT;
275 	cnt -= ix;
276 	for (pde += ix; cnt; cnt--, pde++)
277 		if (pde->pd_v == 0)
278 			return(0);
279 	ix = btop(addr-0xfe000000);
280 	cnt = btop(addr-0xfe000000+count+NBPG-1);
281 	if (cnt > (int)&Syssize)
282 		return(0);
283 	cnt -= ix;
284 	for (pte = &Sysmap[ix]; cnt; cnt--, pte++)
285 		if (pte->pg_v == 0 /*|| (rw == B_WRITE && pte->pg_prot == 1)*/)
286 			return(0);
287 	return(1);
288 }
289 
290 useracc(addr, count, rw)
291 	register u_int addr;
292 	int count, rw;
293 {
294 	register int (*func)();
295 	register u_int addr2;
296 	extern int prober(), probew();
297 
298 	if (count <= 0)
299 		return(0);
300 	addr2 = addr;
301 	addr += count;
302 	func = (rw == B_READ) ? prober : probew;
303 	do {
304 		if ((*func)(addr2) == 0)
305 			return(0);
306 		addr2 = (addr2 + NBPG) & ~PGOFSET;
307 	} while (addr2 < addr);
308 	return(1);
309 }
310 #endif
311 
312 extern vm_map_t phys_map;
313 
314 /*
315  * Map an IO request into kernel virtual address space.  Requests fall into
316  * one of five catagories:
317  *
318  *	B_PHYS|B_UAREA:	User u-area swap.
319  *			Address is relative to start of u-area (p_addr).
320  *	B_PHYS|B_PAGET:	User page table swap.
321  *			Address is a kernel VA in usrpt (Usrptmap).
322  *	B_PHYS|B_DIRTY:	Dirty page push.
323  *			Address is a VA in proc2's address space.
324  *	B_PHYS|B_PGIN:	Kernel pagein of user pages.
325  *			Address is VA in user's address space.
326  *	B_PHYS:		User "raw" IO request.
327  *			Address is VA in user's address space.
328  *
329  * All requests are (re)mapped into kernel VA space via the useriomap
330  * (a name with only slightly more meaning than "kernelmap")
331  */
332 vmapbuf(bp)
333 	register struct buf *bp;
334 {
335 	register int npf;
336 	register caddr_t addr;
337 	register long flags = bp->b_flags;
338 	struct proc *p;
339 	int off;
340 	vm_offset_t kva;
341 	register vm_offset_t pa;
342 
343 	if ((flags & B_PHYS) == 0)
344 		panic("vmapbuf");
345 	addr = bp->b_saveaddr = bp->b_un.b_addr;
346 	off = (int)addr & PGOFSET;
347 	p = bp->b_proc;
348 	npf = btoc(round_page(bp->b_bcount + off));
349 	kva = kmem_alloc_wait(phys_map, ctob(npf));
350 	bp->b_un.b_addr = (caddr_t) (kva + off);
351 	while (npf--) {
352 		pa = pmap_extract(&p->p_vmspace->vm_pmap, (vm_offset_t)addr);
353 		if (pa == 0)
354 			panic("vmapbuf: null page frame");
355 		pmap_enter(vm_map_pmap(phys_map), kva, trunc_page(pa),
356 			   VM_PROT_READ|VM_PROT_WRITE, TRUE);
357 		addr += PAGE_SIZE;
358 		kva += PAGE_SIZE;
359 	}
360 }
361 
362 /*
363  * Free the io map PTEs associated with this IO operation.
364  * We also invalidate the TLB entries and restore the original b_addr.
365  */
366 vunmapbuf(bp)
367 	register struct buf *bp;
368 {
369 	register int npf;
370 	register caddr_t addr = bp->b_un.b_addr;
371 	vm_offset_t kva;
372 
373 	if ((bp->b_flags & B_PHYS) == 0)
374 		panic("vunmapbuf");
375 	npf = btoc(round_page(bp->b_bcount + ((int)addr & PGOFSET)));
376 	kva = (vm_offset_t)((int)addr & ~PGOFSET);
377 	kmem_free_wakeup(phys_map, kva, ctob(npf));
378 	bp->b_un.b_addr = bp->b_saveaddr;
379 	bp->b_saveaddr = NULL;
380 }
381