xref: /original-bsd/sys/i386/i386/vm_machdep.c (revision 76210d32)
1 /*-
2  * Copyright (c) 1990 The Regents of the University of California.
3  * All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * the University of Utah, and William Jolitz.
7  *
8  * %sccs.include.386.c%
9  *
10  *	@(#)vm_machdep.c	5.2 (Berkeley) 06/23/90
11  */
12 
13 /*
14  * Copyright (c) 1988 University of Utah.
15  * All rights reserved.  The Utah Software License Agreement
16  * specifies the terms and conditions for redistribution.
17  *
18  *	Utah $Hdr: vm_machdep.c 1.16.1.1 89/06/23$
19  */
20 /*
21  * Copyright (c) 1982, 1986 Regents of the University of California.
22  * All rights reserved.  The Berkeley software License Agreement
23  * specifies the terms and conditions for redistribution.
24  *
25  *	@(#)vm_machdep.c	7.1 (Berkeley) 6/5/86
26  */
27 
28 #include "pte.h"
29 
30 #include "param.h"
31 #include "systm.h"
32 #include "dir.h"
33 #include "user.h"
34 #include "proc.h"
35 #include "cmap.h"
36 #include "mount.h"
37 #include "vm.h"
38 #include "text.h"
39 
40 #include "buf.h"
41 
42 #include "dbg.h"
43 /*
44  * Set a red zone in the kernel stack after the u. area.
45  */
46 setredzone(pte, vaddr)
47 	u_short *pte;
48 	caddr_t vaddr;
49 {
50 /* eventually do this by setting up an expand-down stack segment
51    for ss0: selector, allowing stack access down to top of u.
52    this means though that protection violations need to be handled
53    thru a double fault exception that must do an integral task
54    switch to a known good context, within which a dump can be
55    taken. a sensible scheme might be to save the initial context
56    used by sched (that has physical memory mapped 1:1 at bottom)
57    and take the dump while still in mapped mode */
58 }
59 
60 /*
61  * Check for valid program size
62  * NB - Check data and data growth separately as they may overflow
63  * when summed together.
64  */
65 chksize(ts, ids, uds, ss)
66 	unsigned ts, ids, uds, ss;
67 {
68 	extern unsigned maxtsize;
69 
70 	if (ctob(ts) > maxtsize ||
71 	    ctob(ids) > u.u_rlimit[RLIMIT_DATA].rlim_cur ||
72 	    ctob(uds) > u.u_rlimit[RLIMIT_DATA].rlim_cur ||
73 	    ctob(ids + uds) > u.u_rlimit[RLIMIT_DATA].rlim_cur ||
74 	    ctob(ss) > u.u_rlimit[RLIMIT_STACK].rlim_cur) {
75 		u.u_error = ENOMEM;
76 		return (1);
77 	}
78 	return (0);
79 }
80 
81 /*ARGSUSED*/
82 newptes(pte, v, size)
83 	struct pte *pte;
84 	u_int v;
85 	register int size;
86 {
87 	register caddr_t a;
88 
89 #ifdef lint
90 	pte = pte;
91 #endif
92 	load_cr3(_cr3());
93 }
94 
95 /*
96  * Change protection codes of text segment.
97  * Have to flush translation buffer since this
98  * affect virtual memory mapping of current process.
99  */
100 chgprot(addr, tprot)
101 	caddr_t addr;
102 	long tprot;
103 {
104 	unsigned v;
105 	int tp;
106 	register struct pte *pte;
107 	register struct cmap *c;
108 
109 	v = clbase(btop(addr));
110 	if (!isatsv(u.u_procp, v)) {
111 		u.u_error = EFAULT;
112 		return (0);
113 	}
114 	tp = vtotp(u.u_procp, v);
115 	pte = tptopte(u.u_procp, tp);
116 	if (pte->pg_fod == 0 && pte->pg_pfnum) {
117 		c = &cmap[pgtocm(pte->pg_pfnum)];
118 		if (c->c_blkno && c->c_mdev != MSWAPX)
119 			munhash(mount[c->c_mdev].m_dev,
120 			    (daddr_t)(u_long)c->c_blkno);
121 	}
122 	*(u_int *)pte &= ~PG_PROT;
123 	*(u_int *)pte |= tprot;
124 	load_cr3(_cr3());
125 	return (1);
126 }
127 
128 settprot(tprot)
129 	long tprot;
130 {
131 	register u_int *ptaddr, i;
132 
133 	ptaddr = (u_int *)u.u_procp->p_p0br;
134 	for (i = 0; i < u.u_tsize; i++) {
135 		ptaddr[i] &= ~PG_PROT;
136 		ptaddr[i] |= tprot;
137 	}
138 	load_cr3(_cr3());
139 }
140 
141 /*
142  * Simulate effect of VAX region length registers.
143  * The one case where we must do anything is if a region has shrunk.
144  * In that case we must invalidate all the PTEs for the no longer valid VAs.
145  */
146 setptlr(region, nlen)
147 	int nlen;
148 {
149 	register struct pte *pte;
150 	register int change;
151 	int olen;
152 
153 	if (region == 0) {
154 		olen = u.u_pcb.pcb_p0lr;
155 		u.u_pcb.pcb_p0lr = nlen;
156 	} else {
157 		olen = P1PAGES - u.u_pcb.pcb_p1lr;
158 		u.u_pcb.pcb_p1lr = nlen;
159 		nlen = P1PAGES - nlen;
160 	}
161 /*pg("setptlr(%x,%x), was %d",region, nlen, olen);*/
162 	if ((change = olen - nlen) <= 0)
163 		return;
164 	if (region == 0)
165 		pte = u.u_pcb.pcb_p0br + u.u_pcb.pcb_p0lr;
166 	else
167 		pte = u.u_pcb.pcb_p1br + u.u_pcb.pcb_p1lr - change;
168 /*printf("p0b %x p0l %x", u.u_pcb.pcb_p0br, u.u_pcb.pcb_p0lr);
169 printf("p1b %x p1l %x pte %x", u.u_pcb.pcb_p1br, u.u_pcb.pcb_p1lr, pte);*/
170 	do {
171 		*(u_int *)pte++ = 0;
172 	} while (--change);
173 	/* short cut newptes */
174 	load_cr3(_cr3());
175 }
176 
177 /*
178  * Map `size' bytes of physical memory starting at `paddr' into
179  * kernel VA space using PTEs starting at `pte'.  Read/write and
180  * cache-inhibit status are specified by `prot'.
181  */
182 physaccess(pte, paddr, size, prot)
183 	register struct pte *pte;
184 	caddr_t paddr;
185 	register int size;
186 {
187 	register u_int page;
188 
189 	page = (u_int)paddr & PG_FRAME;
190 	for (size = btoc(size); size; size--) {
191 		*(int *)pte = PG_V | prot | page;
192 		page += NBPG;
193 		pte++;
194 	}
195 	load_cr3(_cr3());
196 }
197 
198 /*
199  * Move pages from one kernel virtual address to another.
200  * Both addresses are assumed to reside in the Sysmap,
201  * and size must be a multiple of CLSIZE.
202  */
203 pagemove(from, to, size)
204 	register caddr_t from, to;
205 	int size;
206 {
207 	register struct pte *fpte, *tpte;
208 
209 	if (size % CLBYTES)
210 		panic("pagemove");
211 	fpte = &Sysmap[btop(from)];
212 	tpte = &Sysmap[btop(to)];
213 	while (size > 0) {
214 		*tpte++ = *fpte;
215 		*(int *)fpte++ = 0;
216 		from += NBPG;
217 		to += NBPG;
218 		size -= NBPG;
219 	}
220 	load_cr3(_cr3());
221 }
222 
223 /*
224  * The probe[rw] routines should probably be redone in assembler
225  * for efficiency.
226  */
227 prober(addr)
228 	register u_int addr;
229 {
230 	register int page;
231 	register struct proc *p;
232 
233 	if (addr >= USRSTACK)
234 		return(0);
235 	p = u.u_procp;
236 	page = btop(addr);
237 	if (page < dptov(p, p->p_dsize) || page > sptov(p, p->p_ssize))
238 		return(1);
239 	return(0);
240 }
241 
242 probew(addr)
243 	register u_int addr;
244 {
245 	register int page;
246 	register struct proc *p;
247 
248 	if (addr >= USRSTACK)
249 		return(0);
250 	p = u.u_procp;
251 	page = btop(addr);
252 	if (page < dptov(p, p->p_dsize) || page > sptov(p, p->p_ssize))
253 /*
254 {
255 dprintf(DPHYS,"vtopte %x %x\n", vtopte(p, page), *(int *)vtopte(p, page) );*/
256 		return((*(int *)vtopte(p, page) & PG_PROT) == PG_UW);
257 /*}*/
258 	return(0);
259 }
260 
261 /*
262  * NB: assumes a physically contiguous kernel page table
263  *     (makes life a LOT simpler).
264  */
265 kernacc(addr, count, rw)
266 	register u_int addr;
267 	int count, rw;
268 {
269 	register struct pde *pde;
270 	register struct pte *pte;
271 	register int ix, cnt;
272 	extern long Syssize;
273 
274 /*dprintf(DPHYS,"kernacc %x count %d rw %d", addr, count, rw);*/
275 	if (count <= 0)
276 		return(0);
277 	pde = (struct pde *)((u_int)u.u_procp->p_p0br + u.u_procp->p_szpt * NBPG);
278 	ix = (addr & PD_MASK) >> PD_SHIFT;
279 	cnt = ((addr + count + (1 << PD_SHIFT) - 1) & PD_MASK) >> PD_SHIFT;
280 	cnt -= ix;
281 	for (pde += ix; cnt; cnt--, pde++)
282 		if (pde->pd_v == 0)
283 /*{
284 dprintf(DPHYS,"nope pde %x, idx %x\n", pde, ix);*/
285 			return(0);
286 /*}*/
287 	ix = btop(addr-0xfe000000);
288 	cnt = btop(addr-0xfe000000+count+NBPG-1);
289 	if (cnt > (int)&Syssize)
290 /*{
291 dprintf(DPHYS,"nope cnt %x\n", cnt);*/
292 		return(0);
293 /*}*/
294 	cnt -= ix;
295 	for (pte = &Sysmap[ix]; cnt; cnt--, pte++)
296 		if (pte->pg_v == 0 /*|| (rw == B_WRITE && pte->pg_prot == 1)*/)
297 /*{
298 dprintf(DPHYS,"nope pte %x %x, idx %x\n", pte, *(int *)pte, ix);*/
299 			return(0);
300 /*}
301 dprintf(DPHYS,"yup\n");*/
302 	return(1);
303 }
304 
305 useracc(addr, count, rw)
306 	register u_int addr;
307 	int count, rw;
308 {
309 	register int (*func)();
310 	register u_int addr2;
311 	extern int prober(), probew();
312 
313 /*dprintf(DPHYS,"useracc %x count %d rw %d", addr, count, rw);*/
314 	if (count <= 0)
315 		return(0);
316 	addr2 = addr;
317 	addr += count;
318 	func = (rw == B_READ) ? prober : probew;
319 	do {
320 		if ((*func)(addr2) == 0)
321 /*{
322 dprintf(DPHYS,"nope %x\n", addr);*/
323 			return(0);
324 /*}*/
325 		addr2 = (addr2 + NBPG) & ~PGOFSET;
326 	} while (addr2 < addr);
327 /*dprintf(DPHYS,"yup\n", addr);*/
328 	return(1);
329 }
330 
331 /*
332  * Convert kernel VA to physical address
333  */
334 kvtop(addr)
335 	register u_int addr;
336 {
337 	register int pf;
338 
339 	pf = Sysmap[btop(addr-0xfe000000)].pg_pfnum;
340 	if (pf == 0)
341 		panic("kvtop: zero page frame");
342 	return((u_int)ptob(pf) + (addr & PGOFSET));
343 }
344 
345 struct pde *
346 vtopde(p, va)
347 	register struct proc *p;
348 	register u_int va;
349 {
350 	register struct pde *pde;
351 
352 	pde = (struct pde *)((u_int)p->p_p0br + p->p_szpt * NBPG);
353 	return(pde + ((va & PD_MASK) >> PD_SHIFT));
354 }
355 
356 
357 initcr3(p)
358 	register struct proc *p;
359 {
360 	return((int)Usrptmap[btokmx(p->p_p0br) + p->p_szpt].pg_pfnum);
361 }
362 
363 /*
364  * Initialize page directory table to reflect PTEs in Usrptmap.
365  * Page directory table address is given by Usrptmap index of p_szpt.
366  * [used by vgetpt for kernal mode entries, and ptexpand for user mode entries]
367  */
368 initpdt(p)
369 	register struct proc *p;
370 {
371 	register int i, k, sz;
372 	register struct pde *pde, *toppde;
373 	extern struct pde *vtopde();
374 	extern Sysbase;
375 
376 /*pg("initpdt");*/
377 	/* clear entire map */
378 	pde = vtopde(p, 0);
379 	bzero(pde, NBPG);
380 	/* map kernel */
381 	pde = vtopde(p, &Sysbase);
382 	for (i = 0; i < 5; i++, pde++) {
383 		*(int *)pde = PG_UW | PG_V;
384 		pde->pd_pfnum = btoc((unsigned) Sysmap & ~0xfe000000)+i;
385 	}
386 	/* map u dot */
387 	pde = vtopde(p, &u);
388 	*(int *)pde = PG_UW | PG_V;
389 	pde->pd_pfnum = Usrptmap[btokmx(p->p_addr)].pg_pfnum;
390 /*printf("%d.u. pde %x pfnum %x virt %x\n", p->p_pid, pde, pde->pd_pfnum,
391 p->p_addr);*/
392 
393 	/* otherwise, fill in user map */
394 	k = btokmx(p->p_p0br);
395 	pde = vtopde(p, 0);
396 	toppde = vtopde(p, &u);
397 
398 	/* text and data */
399 	sz = ctopt(p->p_tsize + p->p_dsize);
400 /*dprintf(DEXPAND,"textdata 0 to %d\n",sz-1);*/
401 	for (i = 0; i < sz; i++, pde++) {
402 		*(int *)pde = PG_UW | PG_V;
403 		pde->pd_pfnum = Usrptmap[k++].pg_pfnum;
404 /*dprintf(DEXPAND,"%d.pde %x pf %x\n", p->p_pid, pde, *(int *)pde);*/
405 	}
406 	/*
407 	 * Bogus!  The kernelmap may map unused PT pages
408 	 * (since we don't shrink PTs) so we need to skip over
409 	 * those PDEs.  We should really free the unused PT
410 	 * pages in expand().
411 	 */
412 	sz += ctopt(p->p_ssize+UPAGES);
413 	if (sz < p->p_szpt)
414 		k += p->p_szpt - sz;
415 	/* hole */
416 	sz = NPTEPG - ctopt(p->p_ssize + UPAGES + btoc(&Sysbase));
417 /*dprintf(DEXPAND,"zero %d upto %d\n", i, sz-1);*/
418 	for ( ; i < sz; i++, pde++)
419 /* definite bug here... does not hit all entries, but point moot due
420 to bzero above XXX*/
421 {
422 		*(int *)pde = 0;
423 /*pg("pde %x pf %x", pde, *(int *)pde);*/
424 }
425 	/* stack and u-area */
426 	sz = NPTEPG - ctopt(UPAGES + btoc(&Sysbase));
427 /*dprintf(DEXPAND,"stack %d upto %d\n", i, sz-1);*/
428 	for ( ; i < sz; i++, pde++) {
429 		*(int *)pde = PG_UW | PG_V;
430 		pde->pd_pfnum = Usrptmap[k++].pg_pfnum;
431 /*pg("pde %x pf %x", pde, *(int *)pde);*/
432 	}
433 }
434 
435 #ifdef notdef
436 /*
437  * Allocate wired-down, non-paged, cache-inhibited pages in kernel
438  * virtual memory and clear them
439  */
440 caddr_t
441 cimemall(n)
442 	int n;
443 {
444 	register int npg, a;
445 	register struct pte *pte;
446 	extern struct map *kernelmap;
447 
448 	npg = clrnd(btoc(n));
449 	a = rmalloc(kernelmap, (long)npg);
450 	if (a == 0)
451 		return ((caddr_t)0);
452 	pte = &Usrptmap[a];
453 	(void) vmemall(pte, npg, &proc[0], CSYS);
454 	while (--npg >= 0) {
455 		*(int *)pte |= (PG_V|PG_KW|PG_CI);
456 		clearseg((unsigned)pte->pg_pfnum);
457 		pte++;
458 	}
459 	TBIAS();
460 	return ((caddr_t)kmxtob(a));
461 }
462 #endif
463 
464 extern char usrio[];
465 extern struct pte Usriomap[];
466 struct map *useriomap;
467 int usriowanted;
468 
469 /*
470  * Map an IO request into kernel virtual address space.  Requests fall into
471  * one of five catagories:
472  *
473  *	B_PHYS|B_UAREA:	User u-area swap.
474  *			Address is relative to start of u-area (p_addr).
475  *	B_PHYS|B_PAGET:	User page table swap.
476  *			Address is a kernel VA in usrpt (Usrptmap).
477  *	B_PHYS|B_DIRTY:	Dirty page push.
478  *			Address is a VA in proc2's address space.
479  *	B_PHYS|B_PGIN:	Kernel pagein of user pages.
480  *			Address is VA in user's address space.
481  *	B_PHYS:		User "raw" IO request.
482  *			Address is VA in user's address space.
483  *
484  * All requests are (re)mapped into kernel VA space via the useriomap
485  * (a name with only slightly more meaning than "kernelmap")
486  */
487 vmapbuf(bp)
488 	register struct buf *bp;
489 {
490 	register int npf, a;
491 	register caddr_t addr;
492 	register struct pte *pte, *iopte;
493 	register long flags = bp->b_flags;
494 	struct proc *p;
495 	int off, s;
496 
497 	if ((flags & B_PHYS) == 0)
498 		panic("vmapbuf");
499 	/*
500 	 * Find PTEs for the area to be mapped
501 	 */
502 	p = flags&B_DIRTY ? &proc[2] : bp->b_proc;
503 	addr = bp->b_un.b_addr;
504 	if (flags & B_UAREA)
505 		pte = &p->p_addr[btop(addr)];
506 	else if (flags & B_PAGET)
507 		pte = &Usrptmap[btokmx((struct pte *)addr)];
508 	else
509 		pte = vtopte(p, btop(addr));
510 	/*
511 	 * Allocate some kernel PTEs and load them
512 	 */
513 	off = (int)addr & PGOFSET;
514 	npf = btoc(bp->b_bcount + off);
515 	s = splbio();
516 	while ((a = rmalloc(useriomap, npf)) == 0) {
517 		usriowanted = 1;
518 		sleep((caddr_t)useriomap, PSWP);
519 	}
520 	splx(s);
521 	iopte = &Usriomap[a];
522 	addr = bp->b_un.b_addr = (caddr_t)(usrio + (a << PGSHIFT)) + off;
523 	a = btop(addr);
524 	while (npf--) {
525 		mapin(iopte, a, pte->pg_pfnum, PG_V);
526 		iopte++, pte++;
527 		a++;
528 	}
529 	load_cr3(_cr3());
530 }
531 
532 /*
533  * Free the io map PTEs associated with this IO operation.
534  * We also invalidate the TLB entries.
535  */
536 vunmapbuf(bp)
537 	register struct buf *bp;
538 {
539 	register int a, npf;
540 	register caddr_t addr = bp->b_un.b_addr;
541 	register struct pte *pte;
542 	int s;
543 
544 	if ((bp->b_flags & B_PHYS) == 0)
545 		panic("vunmapbuf");
546 	a = (int)(addr - usrio) >> PGSHIFT;
547 	npf = btoc(bp->b_bcount + ((int)addr & PGOFSET));
548 	s = splbio();
549 	rmfree(useriomap, npf, a);
550 	if (usriowanted) {
551 		usriowanted = 0;
552 		wakeup((caddr_t)useriomap);
553 	}
554 	splx(s);
555 	pte = &Usriomap[a];
556 	while (npf--) {
557 		*(int *)pte = 0;
558 		addr += NBPG;
559 		pte++;
560 	}
561 	/*
562 	 * If we just completed a dirty page push, we must reconstruct
563 	 * the original b_addr since cleanup() needs it.
564 	 */
565 	if (bp->b_flags & B_DIRTY) {
566 		a = ((bp - swbuf) * CLSIZE) * KLMAX;
567 		bp->b_un.b_addr = (caddr_t)ctob(dptov(&proc[2], a));
568 	}
569 	load_cr3(_cr3());
570 }
571