1 /*- 2 * Copyright (c) 1990 The Regents of the University of California. 3 * All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * the University of Utah, and William Jolitz. 7 * 8 * %sccs.include.386.c% 9 * 10 * @(#)vm_machdep.c 5.6 (Berkeley) 01/19/91 11 */ 12 13 /* 14 * Copyright (c) 1989, 1990 William F. Jolitz 15 */ 16 17 /* 18 * Copyright (c) 1988 University of Utah. 19 * All rights reserved. The Utah Software License Agreement 20 * specifies the terms and conditions for redistribution. 21 * 22 * Utah $Hdr: vm_machdep.c 1.16.1.1 89/06/23$ 23 */ 24 /* 25 * Copyright (c) 1982, 1986 Regents of the University of California. 26 * All rights reserved. The Berkeley software License Agreement 27 * specifies the terms and conditions for redistribution. 28 * 29 * @(#)vm_machdep.c 7.1 (Berkeley) 6/5/86 30 */ 31 32 #include "machine/pte.h" 33 34 #include "param.h" 35 #include "systm.h" 36 #include "user.h" 37 #include "proc.h" 38 #include "cmap.h" 39 #include "mount.h" 40 #include "vm.h" 41 #include "text.h" 42 43 #include "buf.h" 44 45 /* 46 * Set a red zone in the kernel stack after the u. area. 47 */ 48 setredzone(pte, vaddr) 49 u_short *pte; 50 caddr_t vaddr; 51 { 52 /* eventually do this by setting up an expand-down stack segment 53 for ss0: selector, allowing stack access down to top of u. 54 this means though that protection violations need to be handled 55 thru a double fault exception that must do an integral task 56 switch to a known good context, within which a dump can be 57 taken. a sensible scheme might be to save the initial context 58 used by sched (that has physical memory mapped 1:1 at bottom) 59 and take the dump while still in mapped mode */ 60 } 61 62 /* 63 * Check for valid program size 64 * NB - Check data and data growth separately as they may overflow 65 * when summed together. 66 */ 67 chksize(ts, ids, uds, ss) 68 unsigned ts, ids, uds, ss; 69 { 70 extern unsigned maxtsize; 71 72 if (ctob(ts) > maxtsize || 73 ctob(ids) > u.u_rlimit[RLIMIT_DATA].rlim_cur || 74 ctob(uds) > u.u_rlimit[RLIMIT_DATA].rlim_cur || 75 ctob(ids + uds) > u.u_rlimit[RLIMIT_DATA].rlim_cur || 76 ctob(ss) > u.u_rlimit[RLIMIT_STACK].rlim_cur) { 77 return (ENOMEM); 78 } 79 return (0); 80 } 81 82 /*ARGSUSED*/ 83 newptes(pte, v, size) 84 struct pte *pte; 85 u_int v; 86 register int size; 87 { 88 register caddr_t a; 89 90 #ifdef lint 91 pte = pte; 92 #endif 93 load_cr3(u.u_pcb.pcb_cr3); 94 } 95 96 /* 97 * Change protection codes of text segment. 98 * Have to flush translation buffer since this 99 * affect virtual memory mapping of current process. 100 */ 101 chgprot(addr, tprot) 102 caddr_t addr; 103 long tprot; 104 { 105 unsigned v; 106 int tp; 107 register struct pte *pte; 108 register struct cmap *c; 109 110 v = clbase(btop(addr)); 111 if (!isatsv(u.u_procp, v)) 112 return (EFAULT); 113 tp = vtotp(u.u_procp, v); 114 pte = tptopte(u.u_procp, tp); 115 if (pte->pg_fod == 0 && pte->pg_pfnum) { 116 c = &cmap[pgtocm(pte->pg_pfnum)]; 117 if (c->c_blkno) 118 munhash(c->c_vp, (daddr_t)(u_long)c->c_blkno); 119 } 120 *(u_int *)pte &= ~PG_PROT; 121 *(u_int *)pte |= tprot; 122 load_cr3(u.u_pcb.pcb_cr3); 123 return (0); 124 } 125 126 settprot(tprot) 127 long tprot; 128 { 129 register u_int *ptaddr, i; 130 131 ptaddr = (u_int *)u.u_procp->p_p0br; 132 for (i = 0; i < u.u_tsize; i++) { 133 ptaddr[i] &= ~PG_PROT; 134 ptaddr[i] |= tprot; 135 } 136 load_cr3(u.u_pcb.pcb_cr3); 137 } 138 139 /* 140 * Simulate effect of VAX region length registers. 141 * The one case where we must do anything is if a region has shrunk. 142 * In that case we must invalidate all the PTEs for the no longer valid VAs. 143 */ 144 setptlr(region, nlen) 145 int nlen; 146 { 147 register struct pte *pte; 148 register int change; 149 int olen; 150 151 if (region == 0) { 152 olen = u.u_pcb.pcb_p0lr; 153 u.u_pcb.pcb_p0lr = nlen; 154 } else { 155 olen = P1PAGES - u.u_pcb.pcb_p1lr; 156 u.u_pcb.pcb_p1lr = nlen; 157 nlen = P1PAGES - nlen; 158 } 159 if ((change = olen - nlen) <= 0) 160 return; 161 if (region == 0) 162 pte = u.u_pcb.pcb_p0br + u.u_pcb.pcb_p0lr; 163 else 164 pte = u.u_pcb.pcb_p1br + u.u_pcb.pcb_p1lr - change; 165 do { 166 *(u_int *)pte++ = 0; 167 } while (--change); 168 /* short cut newptes */ 169 load_cr3(u.u_pcb.pcb_cr3); 170 } 171 172 /* 173 * Map `size' bytes of physical memory starting at `paddr' into 174 * kernel VA space using PTEs starting at `pte'. Read/write and 175 * cache-inhibit status are specified by `prot'. 176 */ 177 physaccess(pte, paddr, size, prot) 178 register struct pte *pte; 179 caddr_t paddr; 180 register int size; 181 { 182 register u_int page; 183 184 page = (u_int)paddr & PG_FRAME; 185 for (size = btoc(size); size; size--) { 186 *(int *)pte = PG_V | prot | page; 187 page += NBPG; 188 pte++; 189 } 190 load_cr3(u.u_pcb.pcb_cr3); 191 } 192 193 /* 194 * Move pages from one kernel virtual address to another. 195 * Both addresses are assumed to reside in the Sysmap, 196 * and size must be a multiple of CLSIZE. 197 */ 198 pagemove(from, to, size) 199 register caddr_t from, to; 200 int size; 201 { 202 register struct pte *fpte, *tpte; 203 204 if (size % CLBYTES) 205 panic("pagemove"); 206 fpte = &Sysmap[btop(from -0xfe000000)]; 207 tpte = &Sysmap[btop(to -0xfe000000)]; 208 while (size > 0) { 209 *tpte++ = *fpte; 210 *(int *)fpte++ = 0; 211 from += NBPG; 212 to += NBPG; 213 size -= NBPG; 214 } 215 load_cr3(u.u_pcb.pcb_cr3); 216 } 217 218 /* 219 * The probe[rw] routines should probably be redone in assembler 220 * for efficiency. 221 */ 222 prober(addr) 223 register u_int addr; 224 { 225 register int page; 226 register struct proc *p; 227 228 if (addr >= USRSTACK) 229 return(0); 230 p = u.u_procp; 231 page = btop(addr); 232 if (page < dptov(p, p->p_dsize) || page > sptov(p, p->p_ssize)) 233 return(1); 234 return(0); 235 } 236 237 probew(addr) 238 register u_int addr; 239 { 240 register int page; 241 register struct proc *p; 242 243 if (addr >= USRSTACK) 244 return(0); 245 p = u.u_procp; 246 page = btop(addr); 247 if (page < dptov(p, p->p_dsize) || page > sptov(p, p->p_ssize)) 248 return((*(int *)vtopte(p, page) & PG_PROT) == PG_UW); 249 return(0); 250 } 251 252 /* 253 * NB: assumes a physically contiguous kernel page table 254 * (makes life a LOT simpler). 255 */ 256 kernacc(addr, count, rw) 257 register u_int addr; 258 int count, rw; 259 { 260 register struct pde *pde; 261 register struct pte *pte; 262 register int ix, cnt; 263 extern long Syssize; 264 265 if (count <= 0) 266 return(0); 267 pde = (struct pde *)((u_int)u.u_procp->p_p0br + u.u_procp->p_szpt * NBPG); 268 ix = (addr & PD_MASK) >> PD_SHIFT; 269 cnt = ((addr + count + (1 << PD_SHIFT) - 1) & PD_MASK) >> PD_SHIFT; 270 cnt -= ix; 271 for (pde += ix; cnt; cnt--, pde++) 272 if (pde->pd_v == 0) 273 return(0); 274 ix = btop(addr-0xfe000000); 275 cnt = btop(addr-0xfe000000+count+NBPG-1); 276 if (cnt > (int)&Syssize) 277 return(0); 278 cnt -= ix; 279 for (pte = &Sysmap[ix]; cnt; cnt--, pte++) 280 if (pte->pg_v == 0 /*|| (rw == B_WRITE && pte->pg_prot == 1)*/) 281 return(0); 282 return(1); 283 } 284 285 useracc(addr, count, rw) 286 register u_int addr; 287 int count, rw; 288 { 289 register int (*func)(); 290 register u_int addr2; 291 extern int prober(), probew(); 292 293 if (count <= 0) 294 return(0); 295 addr2 = addr; 296 addr += count; 297 func = (rw == B_READ) ? prober : probew; 298 do { 299 if ((*func)(addr2) == 0) 300 return(0); 301 addr2 = (addr2 + NBPG) & ~PGOFSET; 302 } while (addr2 < addr); 303 return(1); 304 } 305 306 /* 307 * Convert kernel VA to physical address 308 */ 309 kvtop(addr) 310 register u_int addr; 311 { 312 register int pf; 313 314 pf = Sysmap[btop(addr-0xfe000000)].pg_pfnum; 315 if (pf == 0) 316 panic("kvtop: zero page frame"); 317 return((u_int)ptob(pf) + (addr & PGOFSET)); 318 } 319 320 struct pde * 321 vtopde(p, va) 322 register struct proc *p; 323 register u_int va; 324 { 325 register struct pde *pde; 326 327 pde = (struct pde *)((u_int)p->p_p0br + p->p_szpt * NBPG); 328 return(pde + ((va & PD_MASK) >> PD_SHIFT)); 329 } 330 331 332 initcr3(p) 333 register struct proc *p; 334 { 335 return(ctob(Usrptmap[btokmx(p->p_p0br+p->p_szpt*NPTEPG)].pg_pfnum)); 336 } 337 338 /* 339 * Initialize page directory table to reflect PTEs in Usrptmap. 340 * Page directory table address is given by Usrptmap index of p_szpt. 341 * [used by vgetpt for kernal mode entries, and ptexpand for user mode entries] 342 */ 343 initpdt(p) 344 register struct proc *p; 345 { 346 register int i, k, sz; 347 register struct pde *pde, *toppde; 348 extern struct pde *vtopde(); 349 extern Sysbase; 350 351 /* clear entire map */ 352 pde = vtopde(p, 0); 353 /*bzero(pde, NBPG);*/ 354 /* map kernel */ 355 pde = vtopde(p, &Sysbase); 356 for (i = 0; i < 5; i++, pde++) { 357 *(int *)pde = PG_UW | PG_V; 358 pde->pd_pfnum = btoc((unsigned) Sysmap & ~0xfe000000)+i; 359 } 360 /* map u dot */ 361 pde = vtopde(p, &u); 362 *(int *)pde = PG_UW | PG_V; 363 pde->pd_pfnum = Usrptmap[btokmx(p->p_addr)].pg_pfnum; 364 /*printf("%d.u. pde %x pfnum %x virt %x\n", p->p_pid, pde, pde->pd_pfnum, 365 p->p_addr);*/ 366 367 /* otherwise, fill in user map */ 368 k = btokmx(p->p_p0br); 369 pde = vtopde(p, 0); 370 toppde = vtopde(p, &u); 371 372 /* text and data */ 373 sz = ctopt(p->p_tsize + p->p_dsize); 374 /*dprintf(DEXPAND,"textdata 0 to %d\n",sz-1);*/ 375 for (i = 0; i < sz; i++, pde++) { 376 *(int *)pde = PG_UW | PG_V; 377 pde->pd_pfnum = Usrptmap[k++].pg_pfnum; 378 /*dprintf(DEXPAND,"%d.pde %x pf %x\n", p->p_pid, pde, *(int *)pde);*/ 379 } 380 /* 381 * Bogus! The kernelmap may map unused PT pages 382 * (since we don't shrink PTs) so we need to skip over 383 * those PDEs. We should really free the unused PT 384 * pages in expand(). 385 */ 386 sz += ctopt(p->p_ssize+UPAGES); 387 if (sz < p->p_szpt) 388 k += p->p_szpt - sz; 389 /* hole */ 390 sz = NPTEPG - ctopt(p->p_ssize + UPAGES + btoc(&Sysbase)); 391 /*dprintf(DEXPAND,"zero %d upto %d\n", i, sz-1);*/ 392 for ( ; i < sz; i++, pde++) 393 /* definite bug here... does not hit all entries, but point moot due 394 to bzero above XXX*/ 395 { 396 *(int *)pde = 0; 397 /*pg("pde %x pf %x", pde, *(int *)pde);*/ 398 } 399 /* stack and u-area */ 400 sz = NPTEPG - ctopt(UPAGES + btoc(&Sysbase)); 401 /*dprintf(DEXPAND,"stack %d upto %d\n", i, sz-1);*/ 402 for ( ; i < sz; i++, pde++) { 403 *(int *)pde = PG_UW | PG_V; 404 pde->pd_pfnum = Usrptmap[k++].pg_pfnum; 405 /*pg("pde %x pf %x", pde, *(int *)pde);*/ 406 } 407 return(initcr3(p)); 408 } 409 410 #ifdef notdef 411 /* 412 * Allocate wired-down, non-paged, cache-inhibited pages in kernel 413 * virtual memory and clear them 414 */ 415 caddr_t 416 cimemall(n) 417 int n; 418 { 419 register int npg, a; 420 register struct pte *pte; 421 extern struct map *kernelmap; 422 423 npg = clrnd(btoc(n)); 424 a = rmalloc(kernelmap, (long)npg); 425 if (a == 0) 426 return ((caddr_t)0); 427 pte = &Usrptmap[a]; 428 (void) vmemall(pte, npg, &proc[0], CSYS); 429 while (--npg >= 0) { 430 *(int *)pte |= (PG_V|PG_KW|PG_CI); 431 clearseg((unsigned)pte->pg_pfnum); 432 pte++; 433 } 434 TBIAS(); 435 return ((caddr_t)kmxtob(a)); 436 } 437 #endif 438 439 extern char usrio[]; 440 extern struct pte Usriomap[]; 441 struct map *useriomap; 442 int usriowanted; 443 444 /* 445 * Map an IO request into kernel virtual address space. Requests fall into 446 * one of five catagories: 447 * 448 * B_PHYS|B_UAREA: User u-area swap. 449 * Address is relative to start of u-area (p_addr). 450 * B_PHYS|B_PAGET: User page table swap. 451 * Address is a kernel VA in usrpt (Usrptmap). 452 * B_PHYS|B_DIRTY: Dirty page push. 453 * Address is a VA in proc2's address space. 454 * B_PHYS|B_PGIN: Kernel pagein of user pages. 455 * Address is VA in user's address space. 456 * B_PHYS: User "raw" IO request. 457 * Address is VA in user's address space. 458 * 459 * All requests are (re)mapped into kernel VA space via the useriomap 460 * (a name with only slightly more meaning than "kernelmap") 461 */ 462 vmapbuf(bp) 463 register struct buf *bp; 464 { 465 register int npf, a; 466 register caddr_t addr; 467 register struct pte *pte, *iopte; 468 register long flags = bp->b_flags; 469 struct proc *p; 470 int off, s; 471 472 if ((flags & B_PHYS) == 0) 473 panic("vmapbuf"); 474 /* 475 * Find PTEs for the area to be mapped 476 */ 477 p = flags&B_DIRTY ? &proc[2] : bp->b_proc; 478 addr = bp->b_un.b_addr; 479 if (flags & B_UAREA) 480 pte = &p->p_addr[btop(addr)]; 481 else if (flags & B_PAGET) 482 pte = &Usrptmap[btokmx((struct pte *)addr)]; 483 else 484 pte = vtopte(p, btop(addr)); 485 486 /* 487 * Allocate some kernel PTEs and load them 488 */ 489 off = (int)addr & PGOFSET; 490 npf = btoc(bp->b_bcount + off); 491 s = splbio(); 492 while ((a = rmalloc(useriomap, npf)) == 0) { 493 usriowanted = 1; 494 sleep((caddr_t)useriomap, PSWP); 495 } 496 splx(s); 497 iopte = &Usriomap[a]; 498 bp->b_saveaddr = bp->b_un.b_addr; 499 addr = bp->b_un.b_addr = (caddr_t)(usrio + (a << PGSHIFT)) + off; 500 while (npf--) { 501 mapin(iopte, (u_int)addr, pte->pg_pfnum, PG_KW|PG_V); 502 iopte++, pte++; 503 addr += NBPG; 504 } 505 load_cr3(u.u_pcb.pcb_cr3); 506 } 507 508 /* 509 * Free the io map PTEs associated with this IO operation. 510 * We also invalidate the TLB entries and restore the original b_addr. 511 */ 512 vunmapbuf(bp) 513 register struct buf *bp; 514 { 515 register int a, npf; 516 register caddr_t addr = bp->b_un.b_addr; 517 register struct pte *pte; 518 int s; 519 520 if ((bp->b_flags & B_PHYS) == 0) 521 panic("vunmapbuf"); 522 a = (int)(addr - usrio) >> PGSHIFT; 523 npf = btoc(bp->b_bcount + ((int)addr & PGOFSET)); 524 s = splbio(); 525 rmfree(useriomap, npf, a); 526 if (usriowanted) { 527 usriowanted = 0; 528 wakeup((caddr_t)useriomap); 529 } 530 splx(s); 531 pte = &Usriomap[a]; 532 while (npf--) { 533 *(int *)pte = 0; 534 addr += NBPG; 535 pte++; 536 } 537 load_cr3(u.u_pcb.pcb_cr3); 538 bp->b_un.b_addr = bp->b_saveaddr; 539 bp->b_saveaddr = NULL; 540 } 541