xref: /original-bsd/sys/i386/i386/vm_machdep.c (revision 331bfa8d)
1 /*-
2  * Copyright (c) 1990 The Regents of the University of California.
3  * All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * the University of Utah, and William Jolitz.
7  *
8  * %sccs.include.386.c%
9  *
10  *	@(#)vm_machdep.c	5.6 (Berkeley) 01/19/91
11  */
12 
13 /*
14  * Copyright (c) 1989, 1990 William F. Jolitz
15  */
16 
17 /*
18  * Copyright (c) 1988 University of Utah.
19  * All rights reserved.  The Utah Software License Agreement
20  * specifies the terms and conditions for redistribution.
21  *
22  *	Utah $Hdr: vm_machdep.c 1.16.1.1 89/06/23$
23  */
24 /*
25  * Copyright (c) 1982, 1986 Regents of the University of California.
26  * All rights reserved.  The Berkeley software License Agreement
27  * specifies the terms and conditions for redistribution.
28  *
29  *	@(#)vm_machdep.c	7.1 (Berkeley) 6/5/86
30  */
31 
32 #include "machine/pte.h"
33 
34 #include "param.h"
35 #include "systm.h"
36 #include "user.h"
37 #include "proc.h"
38 #include "cmap.h"
39 #include "mount.h"
40 #include "vm.h"
41 #include "text.h"
42 
43 #include "buf.h"
44 
45 /*
46  * Set a red zone in the kernel stack after the u. area.
47  */
48 setredzone(pte, vaddr)
49 	u_short *pte;
50 	caddr_t vaddr;
51 {
52 /* eventually do this by setting up an expand-down stack segment
53    for ss0: selector, allowing stack access down to top of u.
54    this means though that protection violations need to be handled
55    thru a double fault exception that must do an integral task
56    switch to a known good context, within which a dump can be
57    taken. a sensible scheme might be to save the initial context
58    used by sched (that has physical memory mapped 1:1 at bottom)
59    and take the dump while still in mapped mode */
60 }
61 
62 /*
63  * Check for valid program size
64  * NB - Check data and data growth separately as they may overflow
65  * when summed together.
66  */
67 chksize(ts, ids, uds, ss)
68 	unsigned ts, ids, uds, ss;
69 {
70 	extern unsigned maxtsize;
71 
72 	if (ctob(ts) > maxtsize ||
73 	    ctob(ids) > u.u_rlimit[RLIMIT_DATA].rlim_cur ||
74 	    ctob(uds) > u.u_rlimit[RLIMIT_DATA].rlim_cur ||
75 	    ctob(ids + uds) > u.u_rlimit[RLIMIT_DATA].rlim_cur ||
76 	    ctob(ss) > u.u_rlimit[RLIMIT_STACK].rlim_cur) {
77 		return (ENOMEM);
78 	}
79 	return (0);
80 }
81 
82 /*ARGSUSED*/
83 newptes(pte, v, size)
84 	struct pte *pte;
85 	u_int v;
86 	register int size;
87 {
88 	register caddr_t a;
89 
90 #ifdef lint
91 	pte = pte;
92 #endif
93 	load_cr3(u.u_pcb.pcb_cr3);
94 }
95 
96 /*
97  * Change protection codes of text segment.
98  * Have to flush translation buffer since this
99  * affect virtual memory mapping of current process.
100  */
101 chgprot(addr, tprot)
102 	caddr_t addr;
103 	long tprot;
104 {
105 	unsigned v;
106 	int tp;
107 	register struct pte *pte;
108 	register struct cmap *c;
109 
110 	v = clbase(btop(addr));
111 	if (!isatsv(u.u_procp, v))
112 		return (EFAULT);
113 	tp = vtotp(u.u_procp, v);
114 	pte = tptopte(u.u_procp, tp);
115 	if (pte->pg_fod == 0 && pte->pg_pfnum) {
116 		c = &cmap[pgtocm(pte->pg_pfnum)];
117 		if (c->c_blkno)
118 			munhash(c->c_vp, (daddr_t)(u_long)c->c_blkno);
119 	}
120 	*(u_int *)pte &= ~PG_PROT;
121 	*(u_int *)pte |= tprot;
122 	load_cr3(u.u_pcb.pcb_cr3);
123 	return (0);
124 }
125 
126 settprot(tprot)
127 	long tprot;
128 {
129 	register u_int *ptaddr, i;
130 
131 	ptaddr = (u_int *)u.u_procp->p_p0br;
132 	for (i = 0; i < u.u_tsize; i++) {
133 		ptaddr[i] &= ~PG_PROT;
134 		ptaddr[i] |= tprot;
135 	}
136 	load_cr3(u.u_pcb.pcb_cr3);
137 }
138 
139 /*
140  * Simulate effect of VAX region length registers.
141  * The one case where we must do anything is if a region has shrunk.
142  * In that case we must invalidate all the PTEs for the no longer valid VAs.
143  */
144 setptlr(region, nlen)
145 	int nlen;
146 {
147 	register struct pte *pte;
148 	register int change;
149 	int olen;
150 
151 	if (region == 0) {
152 		olen = u.u_pcb.pcb_p0lr;
153 		u.u_pcb.pcb_p0lr = nlen;
154 	} else {
155 		olen = P1PAGES - u.u_pcb.pcb_p1lr;
156 		u.u_pcb.pcb_p1lr = nlen;
157 		nlen = P1PAGES - nlen;
158 	}
159 	if ((change = olen - nlen) <= 0)
160 		return;
161 	if (region == 0)
162 		pte = u.u_pcb.pcb_p0br + u.u_pcb.pcb_p0lr;
163 	else
164 		pte = u.u_pcb.pcb_p1br + u.u_pcb.pcb_p1lr - change;
165 	do {
166 		*(u_int *)pte++ = 0;
167 	} while (--change);
168 	/* short cut newptes */
169 	load_cr3(u.u_pcb.pcb_cr3);
170 }
171 
172 /*
173  * Map `size' bytes of physical memory starting at `paddr' into
174  * kernel VA space using PTEs starting at `pte'.  Read/write and
175  * cache-inhibit status are specified by `prot'.
176  */
177 physaccess(pte, paddr, size, prot)
178 	register struct pte *pte;
179 	caddr_t paddr;
180 	register int size;
181 {
182 	register u_int page;
183 
184 	page = (u_int)paddr & PG_FRAME;
185 	for (size = btoc(size); size; size--) {
186 		*(int *)pte = PG_V | prot | page;
187 		page += NBPG;
188 		pte++;
189 	}
190 	load_cr3(u.u_pcb.pcb_cr3);
191 }
192 
193 /*
194  * Move pages from one kernel virtual address to another.
195  * Both addresses are assumed to reside in the Sysmap,
196  * and size must be a multiple of CLSIZE.
197  */
198 pagemove(from, to, size)
199 	register caddr_t from, to;
200 	int size;
201 {
202 	register struct pte *fpte, *tpte;
203 
204 	if (size % CLBYTES)
205 		panic("pagemove");
206 	fpte = &Sysmap[btop(from -0xfe000000)];
207 	tpte = &Sysmap[btop(to -0xfe000000)];
208 	while (size > 0) {
209 		*tpte++ = *fpte;
210 		*(int *)fpte++ = 0;
211 		from += NBPG;
212 		to += NBPG;
213 		size -= NBPG;
214 	}
215 	load_cr3(u.u_pcb.pcb_cr3);
216 }
217 
218 /*
219  * The probe[rw] routines should probably be redone in assembler
220  * for efficiency.
221  */
222 prober(addr)
223 	register u_int addr;
224 {
225 	register int page;
226 	register struct proc *p;
227 
228 	if (addr >= USRSTACK)
229 		return(0);
230 	p = u.u_procp;
231 	page = btop(addr);
232 	if (page < dptov(p, p->p_dsize) || page > sptov(p, p->p_ssize))
233 		return(1);
234 	return(0);
235 }
236 
237 probew(addr)
238 	register u_int addr;
239 {
240 	register int page;
241 	register struct proc *p;
242 
243 	if (addr >= USRSTACK)
244 		return(0);
245 	p = u.u_procp;
246 	page = btop(addr);
247 	if (page < dptov(p, p->p_dsize) || page > sptov(p, p->p_ssize))
248 		return((*(int *)vtopte(p, page) & PG_PROT) == PG_UW);
249 	return(0);
250 }
251 
252 /*
253  * NB: assumes a physically contiguous kernel page table
254  *     (makes life a LOT simpler).
255  */
256 kernacc(addr, count, rw)
257 	register u_int addr;
258 	int count, rw;
259 {
260 	register struct pde *pde;
261 	register struct pte *pte;
262 	register int ix, cnt;
263 	extern long Syssize;
264 
265 	if (count <= 0)
266 		return(0);
267 	pde = (struct pde *)((u_int)u.u_procp->p_p0br + u.u_procp->p_szpt * NBPG);
268 	ix = (addr & PD_MASK) >> PD_SHIFT;
269 	cnt = ((addr + count + (1 << PD_SHIFT) - 1) & PD_MASK) >> PD_SHIFT;
270 	cnt -= ix;
271 	for (pde += ix; cnt; cnt--, pde++)
272 		if (pde->pd_v == 0)
273 			return(0);
274 	ix = btop(addr-0xfe000000);
275 	cnt = btop(addr-0xfe000000+count+NBPG-1);
276 	if (cnt > (int)&Syssize)
277 		return(0);
278 	cnt -= ix;
279 	for (pte = &Sysmap[ix]; cnt; cnt--, pte++)
280 		if (pte->pg_v == 0 /*|| (rw == B_WRITE && pte->pg_prot == 1)*/)
281 			return(0);
282 	return(1);
283 }
284 
285 useracc(addr, count, rw)
286 	register u_int addr;
287 	int count, rw;
288 {
289 	register int (*func)();
290 	register u_int addr2;
291 	extern int prober(), probew();
292 
293 	if (count <= 0)
294 		return(0);
295 	addr2 = addr;
296 	addr += count;
297 	func = (rw == B_READ) ? prober : probew;
298 	do {
299 		if ((*func)(addr2) == 0)
300 			return(0);
301 		addr2 = (addr2 + NBPG) & ~PGOFSET;
302 	} while (addr2 < addr);
303 	return(1);
304 }
305 
306 /*
307  * Convert kernel VA to physical address
308  */
309 kvtop(addr)
310 	register u_int addr;
311 {
312 	register int pf;
313 
314 	pf = Sysmap[btop(addr-0xfe000000)].pg_pfnum;
315 	if (pf == 0)
316 		panic("kvtop: zero page frame");
317 	return((u_int)ptob(pf) + (addr & PGOFSET));
318 }
319 
320 struct pde *
321 vtopde(p, va)
322 	register struct proc *p;
323 	register u_int va;
324 {
325 	register struct pde *pde;
326 
327 	pde = (struct pde *)((u_int)p->p_p0br + p->p_szpt * NBPG);
328 	return(pde + ((va & PD_MASK) >> PD_SHIFT));
329 }
330 
331 
332 initcr3(p)
333 	register struct proc *p;
334 {
335 	return(ctob(Usrptmap[btokmx(p->p_p0br+p->p_szpt*NPTEPG)].pg_pfnum));
336 }
337 
338 /*
339  * Initialize page directory table to reflect PTEs in Usrptmap.
340  * Page directory table address is given by Usrptmap index of p_szpt.
341  * [used by vgetpt for kernal mode entries, and ptexpand for user mode entries]
342  */
343 initpdt(p)
344 	register struct proc *p;
345 {
346 	register int i, k, sz;
347 	register struct pde *pde, *toppde;
348 	extern struct pde *vtopde();
349 	extern Sysbase;
350 
351 	/* clear entire map */
352 	pde = vtopde(p, 0);
353 	/*bzero(pde, NBPG);*/
354 	/* map kernel */
355 	pde = vtopde(p, &Sysbase);
356 	for (i = 0; i < 5; i++, pde++) {
357 		*(int *)pde = PG_UW | PG_V;
358 		pde->pd_pfnum = btoc((unsigned) Sysmap & ~0xfe000000)+i;
359 	}
360 	/* map u dot */
361 	pde = vtopde(p, &u);
362 	*(int *)pde = PG_UW | PG_V;
363 	pde->pd_pfnum = Usrptmap[btokmx(p->p_addr)].pg_pfnum;
364 /*printf("%d.u. pde %x pfnum %x virt %x\n", p->p_pid, pde, pde->pd_pfnum,
365 p->p_addr);*/
366 
367 	/* otherwise, fill in user map */
368 	k = btokmx(p->p_p0br);
369 	pde = vtopde(p, 0);
370 	toppde = vtopde(p, &u);
371 
372 	/* text and data */
373 	sz = ctopt(p->p_tsize + p->p_dsize);
374 /*dprintf(DEXPAND,"textdata 0 to %d\n",sz-1);*/
375 	for (i = 0; i < sz; i++, pde++) {
376 		*(int *)pde = PG_UW | PG_V;
377 		pde->pd_pfnum = Usrptmap[k++].pg_pfnum;
378 /*dprintf(DEXPAND,"%d.pde %x pf %x\n", p->p_pid, pde, *(int *)pde);*/
379 	}
380 	/*
381 	 * Bogus!  The kernelmap may map unused PT pages
382 	 * (since we don't shrink PTs) so we need to skip over
383 	 * those PDEs.  We should really free the unused PT
384 	 * pages in expand().
385 	 */
386 	sz += ctopt(p->p_ssize+UPAGES);
387 	if (sz < p->p_szpt)
388 		k += p->p_szpt - sz;
389 	/* hole */
390 	sz = NPTEPG - ctopt(p->p_ssize + UPAGES + btoc(&Sysbase));
391 /*dprintf(DEXPAND,"zero %d upto %d\n", i, sz-1);*/
392 	for ( ; i < sz; i++, pde++)
393 /* definite bug here... does not hit all entries, but point moot due
394 to bzero above XXX*/
395 {
396 		*(int *)pde = 0;
397 /*pg("pde %x pf %x", pde, *(int *)pde);*/
398 }
399 	/* stack and u-area */
400 	sz = NPTEPG - ctopt(UPAGES + btoc(&Sysbase));
401 /*dprintf(DEXPAND,"stack %d upto %d\n", i, sz-1);*/
402 	for ( ; i < sz; i++, pde++) {
403 		*(int *)pde = PG_UW | PG_V;
404 		pde->pd_pfnum = Usrptmap[k++].pg_pfnum;
405 /*pg("pde %x pf %x", pde, *(int *)pde);*/
406 	}
407 	return(initcr3(p));
408 }
409 
410 #ifdef notdef
411 /*
412  * Allocate wired-down, non-paged, cache-inhibited pages in kernel
413  * virtual memory and clear them
414  */
415 caddr_t
416 cimemall(n)
417 	int n;
418 {
419 	register int npg, a;
420 	register struct pte *pte;
421 	extern struct map *kernelmap;
422 
423 	npg = clrnd(btoc(n));
424 	a = rmalloc(kernelmap, (long)npg);
425 	if (a == 0)
426 		return ((caddr_t)0);
427 	pte = &Usrptmap[a];
428 	(void) vmemall(pte, npg, &proc[0], CSYS);
429 	while (--npg >= 0) {
430 		*(int *)pte |= (PG_V|PG_KW|PG_CI);
431 		clearseg((unsigned)pte->pg_pfnum);
432 		pte++;
433 	}
434 	TBIAS();
435 	return ((caddr_t)kmxtob(a));
436 }
437 #endif
438 
439 extern char usrio[];
440 extern struct pte Usriomap[];
441 struct map *useriomap;
442 int usriowanted;
443 
444 /*
445  * Map an IO request into kernel virtual address space.  Requests fall into
446  * one of five catagories:
447  *
448  *	B_PHYS|B_UAREA:	User u-area swap.
449  *			Address is relative to start of u-area (p_addr).
450  *	B_PHYS|B_PAGET:	User page table swap.
451  *			Address is a kernel VA in usrpt (Usrptmap).
452  *	B_PHYS|B_DIRTY:	Dirty page push.
453  *			Address is a VA in proc2's address space.
454  *	B_PHYS|B_PGIN:	Kernel pagein of user pages.
455  *			Address is VA in user's address space.
456  *	B_PHYS:		User "raw" IO request.
457  *			Address is VA in user's address space.
458  *
459  * All requests are (re)mapped into kernel VA space via the useriomap
460  * (a name with only slightly more meaning than "kernelmap")
461  */
462 vmapbuf(bp)
463 	register struct buf *bp;
464 {
465 	register int npf, a;
466 	register caddr_t addr;
467 	register struct pte *pte, *iopte;
468 	register long flags = bp->b_flags;
469 	struct proc *p;
470 	int off, s;
471 
472 	if ((flags & B_PHYS) == 0)
473 		panic("vmapbuf");
474 	/*
475 	 * Find PTEs for the area to be mapped
476 	 */
477 	p = flags&B_DIRTY ? &proc[2] : bp->b_proc;
478 	addr = bp->b_un.b_addr;
479 	if (flags & B_UAREA)
480 		pte = &p->p_addr[btop(addr)];
481 	else if (flags & B_PAGET)
482 		pte = &Usrptmap[btokmx((struct pte *)addr)];
483 	else
484 		pte = vtopte(p, btop(addr));
485 
486 	/*
487 	 * Allocate some kernel PTEs and load them
488 	 */
489 	off = (int)addr & PGOFSET;
490 	npf = btoc(bp->b_bcount + off);
491 	s = splbio();
492 	while ((a = rmalloc(useriomap, npf)) == 0) {
493 		usriowanted = 1;
494 		sleep((caddr_t)useriomap, PSWP);
495 	}
496 	splx(s);
497 	iopte = &Usriomap[a];
498 	bp->b_saveaddr = bp->b_un.b_addr;
499 	addr = bp->b_un.b_addr = (caddr_t)(usrio + (a << PGSHIFT)) + off;
500 	while (npf--) {
501 		mapin(iopte, (u_int)addr, pte->pg_pfnum, PG_KW|PG_V);
502 		iopte++, pte++;
503 		addr += NBPG;
504 	}
505 	load_cr3(u.u_pcb.pcb_cr3);
506 }
507 
508 /*
509  * Free the io map PTEs associated with this IO operation.
510  * We also invalidate the TLB entries and restore the original b_addr.
511  */
512 vunmapbuf(bp)
513 	register struct buf *bp;
514 {
515 	register int a, npf;
516 	register caddr_t addr = bp->b_un.b_addr;
517 	register struct pte *pte;
518 	int s;
519 
520 	if ((bp->b_flags & B_PHYS) == 0)
521 		panic("vunmapbuf");
522 	a = (int)(addr - usrio) >> PGSHIFT;
523 	npf = btoc(bp->b_bcount + ((int)addr & PGOFSET));
524 	s = splbio();
525 	rmfree(useriomap, npf, a);
526 	if (usriowanted) {
527 		usriowanted = 0;
528 		wakeup((caddr_t)useriomap);
529 	}
530 	splx(s);
531 	pte = &Usriomap[a];
532 	while (npf--) {
533 		*(int *)pte = 0;
534 		addr += NBPG;
535 		pte++;
536 	}
537 	load_cr3(u.u_pcb.pcb_cr3);
538 	bp->b_un.b_addr = bp->b_saveaddr;
539 	bp->b_saveaddr = NULL;
540 }
541