1 /*- 2 * Copyright (c) 1990 The Regents of the University of California. 3 * All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * the University of Utah, and William Jolitz. 7 * 8 * %sccs.include.386.c% 9 * 10 * @(#)vm_machdep.c 5.1 (Berkeley) 04/24/90 11 */ 12 13 14 #include "pte.h" 15 16 #include "param.h" 17 #include "systm.h" 18 #include "dir.h" 19 #include "user.h" 20 #include "proc.h" 21 #include "cmap.h" 22 #include "mount.h" 23 #include "vm.h" 24 #include "text.h" 25 26 #include "buf.h" 27 28 #include "dbg.h" 29 /* 30 * Set a red zone in the kernel stack after the u. area. 31 */ 32 setredzone(pte, vaddr) 33 u_short *pte; 34 caddr_t vaddr; 35 { 36 /* eventually do this by setting up an expand-down stack segment 37 for ss0: selector, allowing stack access down to top of u. 38 this means though that protection violations need to be handled 39 thru a double fault exception that must do an integral task 40 switch to a known good context, within which a dump can be 41 taken. a sensible scheme might be to save the initial context 42 used by sched (that has physical memory mapped 1:1 at bottom) 43 and take the dump while still in mapped mode */ 44 } 45 46 /* 47 * Check for valid program size 48 * NB - Check data and data growth separately as they may overflow 49 * when summed together. 50 */ 51 chksize(ts, ids, uds, ss) 52 unsigned ts, ids, uds, ss; 53 { 54 extern unsigned maxtsize; 55 56 if (ctob(ts) > maxtsize || 57 ctob(ids) > u.u_rlimit[RLIMIT_DATA].rlim_cur || 58 ctob(uds) > u.u_rlimit[RLIMIT_DATA].rlim_cur || 59 ctob(ids + uds) > u.u_rlimit[RLIMIT_DATA].rlim_cur || 60 ctob(ss) > u.u_rlimit[RLIMIT_STACK].rlim_cur) { 61 u.u_error = ENOMEM; 62 return (1); 63 } 64 return (0); 65 } 66 67 /*ARGSUSED*/ 68 newptes(pte, v, size) 69 struct pte *pte; 70 u_int v; 71 register int size; 72 { 73 register caddr_t a; 74 75 #ifdef lint 76 pte = pte; 77 #endif 78 load_cr3(_cr3()); 79 } 80 81 /* 82 * Change protection codes of text segment. 83 * Have to flush translation buffer since this 84 * affect virtual memory mapping of current process. 85 */ 86 chgprot(addr, tprot) 87 caddr_t addr; 88 long tprot; 89 { 90 unsigned v; 91 int tp; 92 register struct pte *pte; 93 register struct cmap *c; 94 95 v = clbase(btop(addr)); 96 if (!isatsv(u.u_procp, v)) { 97 u.u_error = EFAULT; 98 return (0); 99 } 100 tp = vtotp(u.u_procp, v); 101 pte = tptopte(u.u_procp, tp); 102 if (pte->pg_fod == 0 && pte->pg_pfnum) { 103 c = &cmap[pgtocm(pte->pg_pfnum)]; 104 if (c->c_blkno && c->c_mdev != MSWAPX) 105 munhash(mount[c->c_mdev].m_dev, 106 (daddr_t)(u_long)c->c_blkno); 107 } 108 *(u_int *)pte &= ~PG_PROT; 109 *(u_int *)pte |= tprot; 110 load_cr3(_cr3()); 111 return (1); 112 } 113 114 settprot(tprot) 115 long tprot; 116 { 117 register u_int *ptaddr, i; 118 119 ptaddr = (u_int *)u.u_procp->p_p0br; 120 for (i = 0; i < u.u_tsize; i++) { 121 ptaddr[i] &= ~PG_PROT; 122 ptaddr[i] |= tprot; 123 } 124 load_cr3(_cr3()); 125 } 126 127 /* 128 * Simulate effect of VAX region length registers. 129 * The one case where we must do anything is if a region has shrunk. 130 * In that case we must invalidate all the PTEs for the no longer valid VAs. 131 */ 132 setptlr(region, nlen) 133 int nlen; 134 { 135 register struct pte *pte; 136 register int change; 137 int olen; 138 139 if (region == 0) { 140 olen = u.u_pcb.pcb_p0lr; 141 u.u_pcb.pcb_p0lr = nlen; 142 } else { 143 olen = P1PAGES - u.u_pcb.pcb_p1lr; 144 u.u_pcb.pcb_p1lr = nlen; 145 nlen = P1PAGES - nlen; 146 } 147 /*pg("setptlr(%x,%x), was %d",region, nlen, olen);*/ 148 if ((change = olen - nlen) <= 0) 149 return; 150 if (region == 0) 151 pte = u.u_pcb.pcb_p0br + u.u_pcb.pcb_p0lr; 152 else 153 pte = u.u_pcb.pcb_p1br + u.u_pcb.pcb_p1lr - change; 154 /*printf("p0b %x p0l %x", u.u_pcb.pcb_p0br, u.u_pcb.pcb_p0lr); 155 printf("p1b %x p1l %x pte %x", u.u_pcb.pcb_p1br, u.u_pcb.pcb_p1lr, pte);*/ 156 do { 157 *(u_int *)pte++ = 0; 158 } while (--change); 159 /* short cut newptes */ 160 load_cr3(_cr3()); 161 } 162 163 /* 164 * Map `size' bytes of physical memory starting at `paddr' into 165 * kernel VA space using PTEs starting at `pte'. Read/write and 166 * cache-inhibit status are specified by `prot'. 167 */ 168 physaccess(pte, paddr, size, prot) 169 register struct pte *pte; 170 caddr_t paddr; 171 register int size; 172 { 173 register u_int page; 174 175 page = (u_int)paddr & PG_FRAME; 176 for (size = btoc(size); size; size--) { 177 *(int *)pte = PG_V | prot | page; 178 page += NBPG; 179 pte++; 180 } 181 load_cr3(_cr3()); 182 } 183 184 /* 185 * Move pages from one kernel virtual address to another. 186 * Both addresses are assumed to reside in the Sysmap, 187 * and size must be a multiple of CLSIZE. 188 */ 189 pagemove(from, to, size) 190 register caddr_t from, to; 191 int size; 192 { 193 register struct pte *fpte, *tpte; 194 195 if (size % CLBYTES) 196 panic("pagemove"); 197 fpte = &Sysmap[btop(from)]; 198 tpte = &Sysmap[btop(to)]; 199 while (size > 0) { 200 *tpte++ = *fpte; 201 *(int *)fpte++ = 0; 202 from += NBPG; 203 to += NBPG; 204 size -= NBPG; 205 } 206 load_cr3(_cr3()); 207 } 208 209 /* 210 * The probe[rw] routines should probably be redone in assembler 211 * for efficiency. 212 */ 213 prober(addr) 214 register u_int addr; 215 { 216 register int page; 217 register struct proc *p; 218 219 if (addr >= USRSTACK) 220 return(0); 221 p = u.u_procp; 222 page = btop(addr); 223 if (page < dptov(p, p->p_dsize) || page > sptov(p, p->p_ssize)) 224 return(1); 225 return(0); 226 } 227 228 probew(addr) 229 register u_int addr; 230 { 231 register int page; 232 register struct proc *p; 233 234 if (addr >= USRSTACK) 235 return(0); 236 p = u.u_procp; 237 page = btop(addr); 238 if (page < dptov(p, p->p_dsize) || page > sptov(p, p->p_ssize)) 239 { 240 dprintf(DPHYS,"vtopte %x %x\n", vtopte(p, page), *(int *)vtopte(p, page) ); 241 return((*(int *)vtopte(p, page) & PG_PROT) == PG_UW); 242 } 243 return(0); 244 } 245 246 /* 247 * NB: assumes a physically contiguous kernel page table 248 * (makes life a LOT simpler). 249 */ 250 kernacc(addr, count, rw) 251 register u_int addr; 252 int count, rw; 253 { 254 register struct pde *pde; 255 register struct pte *pte; 256 register int ix, cnt; 257 extern long Syssize; 258 259 dprintf(DPHYS,"kernacc %x count %d rw %d", addr, count, rw); 260 if (count <= 0) 261 return(0); 262 pde = (struct pde *)((u_int)u.u_procp->p_p0br + u.u_procp->p_szpt * NBPG); 263 ix = (addr & PD_MASK) >> PD_SHIFT; 264 cnt = ((addr + count + (1 << PD_SHIFT) - 1) & PD_MASK) >> PD_SHIFT; 265 cnt -= ix; 266 for (pde += ix; cnt; cnt--, pde++) 267 if (pde->pd_v == 0) 268 { 269 dprintf(DPHYS,"nope pde %x, idx %x\n", pde, ix); 270 return(0); 271 } 272 ix = btop(addr-0xfe000000); 273 cnt = btop(addr-0xfe000000+count+NBPG-1); 274 if (cnt > (int)&Syssize) 275 { 276 dprintf(DPHYS,"nope cnt %x\n", cnt); 277 return(0); 278 } 279 cnt -= ix; 280 for (pte = &Sysmap[ix]; cnt; cnt--, pte++) 281 if (pte->pg_v == 0 /*|| (rw == B_WRITE && pte->pg_prot == 1)*/) 282 { 283 dprintf(DPHYS,"nope pte %x %x, idx %x\n", pte, *(int *)pte, ix); 284 return(0); 285 } 286 dprintf(DPHYS,"yup\n"); 287 return(1); 288 } 289 290 useracc(addr, count, rw) 291 register u_int addr; 292 int count, rw; 293 { 294 register int (*func)(); 295 register u_int addr2; 296 extern int prober(), probew(); 297 298 dprintf(DPHYS,"useracc %x count %d rw %d", addr, count, rw); 299 if (count <= 0) 300 return(0); 301 addr2 = addr; 302 addr += count; 303 func = (rw == B_READ) ? prober : probew; 304 do { 305 if ((*func)(addr2) == 0) 306 { 307 dprintf(DPHYS,"nope %x\n", addr); 308 return(0); 309 } 310 addr2 = (addr2 + NBPG) & ~PGOFSET; 311 } while (addr2 < addr); 312 dprintf(DPHYS,"yup\n", addr); 313 return(1); 314 } 315 316 /* 317 * Convert kernel VA to physical address 318 */ 319 kvtop(addr) 320 register u_int addr; 321 { 322 register int pf; 323 324 pf = Sysmap[btop(addr-0xfe000000)].pg_pfnum; 325 if (pf == 0) 326 panic("kvtop: zero page frame"); 327 return((u_int)ptob(pf) + (addr & PGOFSET)); 328 } 329 330 struct pde * 331 vtopde(p, va) 332 register struct proc *p; 333 register u_int va; 334 { 335 register struct pde *pde; 336 337 pde = (struct pde *)((u_int)p->p_p0br + p->p_szpt * NBPG); 338 return(pde + ((va & PD_MASK) >> PD_SHIFT)); 339 } 340 341 342 initcr3(p) 343 register struct proc *p; 344 { 345 return((int)Usrptmap[btokmx(p->p_p0br) + p->p_szpt].pg_pfnum); 346 } 347 348 /* 349 * Initialize page directory table to reflect PTEs in Usrptmap. 350 * Page directory table address is given by Usrptmap index of p_szpt. 351 * [used by vgetpt for kernal mode entries, and ptexpand for user mode entries] 352 */ 353 initpdt(p, usr) 354 register struct proc *p; 355 { 356 register int i, k, sz; 357 register struct pde *pde, *toppde; 358 extern struct pde *vtopde(); 359 extern Sysbase; 360 361 /*pg("initpdt");*/ 362 if(!usr) { 363 /* clear entire map */ 364 pde = vtopde(p, 0); 365 bzero(pde, NBPG); 366 /* map kernel */ 367 pde = vtopde(p, &Sysbase); 368 for (i = 0; i < 5; i++, pde++) { 369 *(int *)pde = PG_UW | PG_V; 370 pde->pd_pfnum = btoc((unsigned) Sysmap & ~0xfe000000)+i; 371 } 372 /* map u dot */ 373 pde = vtopde(p, &u); 374 *(int *)pde = PG_UW | PG_V; 375 pde->pd_pfnum = Usrptmap[btokmx(p->p_addr)].pg_pfnum; 376 /*printf("%d.u. pde %x pfnum %x virt %x\n", p->p_pid, pde, pde->pd_pfnum, 377 p->p_addr);*/ 378 return; 379 } 380 381 /* otherwise, fill in user map */ 382 k = btokmx(p->p_p0br); 383 pde = vtopde(p, 0); 384 toppde = vtopde(p, &u); 385 386 /* text and data */ 387 sz = ctopt(p->p_tsize + p->p_dsize); 388 dprintf(DEXPAND,"textdata 0 to %d\n",sz-1); 389 for (i = 0; i < sz; i++, pde++) { 390 *(int *)pde = PG_UW | PG_V; 391 pde->pd_pfnum = Usrptmap[k++].pg_pfnum; 392 /*dprintf(DEXPAND,"%d.pde %x pf %x\n", p->p_pid, pde, *(int *)pde);*/ 393 } 394 /* 395 * Bogus! The kernelmap may map unused PT pages 396 * (since we don't shrink PTs) so we need to skip over 397 * those PDEs. We should really free the unused PT 398 * pages in expand(). 399 */ 400 sz += ctopt(p->p_ssize+UPAGES); 401 if (sz < p->p_szpt) 402 k += p->p_szpt - sz; 403 /* hole */ 404 sz = NPTEPG - ctopt(p->p_ssize + UPAGES + btoc(&Sysbase)); 405 dprintf(DEXPAND,"zero %d upto %d\n", i, sz-1); 406 for ( ; i < sz; i++, pde++) 407 /* definite bug here... does not hit all entries, but point moot due 408 to bzero above XXX*/ 409 { 410 *(int *)pde = 0; 411 /*pg("pde %x pf %x", pde, *(int *)pde);*/ 412 } 413 /* stack and u-area */ 414 sz = NPTEPG - ctopt(UPAGES + btoc(&Sysbase)); 415 dprintf(DEXPAND,"stack %d upto %d\n", i, sz-1); 416 for ( ; i < sz; i++, pde++) { 417 *(int *)pde = PG_UW | PG_V; 418 pde->pd_pfnum = Usrptmap[k++].pg_pfnum; 419 /*pg("pde %x pf %x", pde, *(int *)pde);*/ 420 } 421 } 422 423 #ifdef notdef 424 /* 425 * Allocate wired-down, non-paged, cache-inhibited pages in kernel 426 * virtual memory and clear them 427 */ 428 caddr_t 429 cimemall(n) 430 int n; 431 { 432 register int npg, a; 433 register struct pte *pte; 434 extern struct map *kernelmap; 435 436 npg = clrnd(btoc(n)); 437 a = rmalloc(kernelmap, (long)npg); 438 if (a == 0) 439 return ((caddr_t)0); 440 pte = &Usrptmap[a]; 441 (void) vmemall(pte, npg, &proc[0], CSYS); 442 while (--npg >= 0) { 443 *(int *)pte |= (PG_V|PG_KW|PG_CI); 444 clearseg((unsigned)pte->pg_pfnum); 445 pte++; 446 } 447 TBIAS(); 448 return ((caddr_t)kmxtob(a)); 449 } 450 #endif 451 452 extern char usrio[]; 453 extern struct pte Usriomap[]; 454 struct map *useriomap; 455 int usriowanted; 456 457 /* 458 * Map an IO request into kernel virtual address space. Requests fall into 459 * one of five catagories: 460 * 461 * B_PHYS|B_UAREA: User u-area swap. 462 * Address is relative to start of u-area (p_addr). 463 * B_PHYS|B_PAGET: User page table swap. 464 * Address is a kernel VA in usrpt (Usrptmap). 465 * B_PHYS|B_DIRTY: Dirty page push. 466 * Address is a VA in proc2's address space. 467 * B_PHYS|B_PGIN: Kernel pagein of user pages. 468 * Address is VA in user's address space. 469 * B_PHYS: User "raw" IO request. 470 * Address is VA in user's address space. 471 * 472 * All requests are (re)mapped into kernel VA space via the useriomap 473 * (a name with only slightly more meaning than "kernelmap") 474 */ 475 vmapbuf(bp) 476 register struct buf *bp; 477 { 478 register int npf, a; 479 register caddr_t addr; 480 register struct pte *pte, *iopte; 481 register long flags = bp->b_flags; 482 struct proc *p; 483 int off, s; 484 485 if ((flags & B_PHYS) == 0) 486 panic("vmapbuf"); 487 /* 488 * Find PTEs for the area to be mapped 489 */ 490 p = flags&B_DIRTY ? &proc[2] : bp->b_proc; 491 addr = bp->b_un.b_addr; 492 if (flags & B_UAREA) 493 pte = &p->p_addr[btop(addr)]; 494 else if (flags & B_PAGET) 495 pte = &Usrptmap[btokmx((struct pte *)addr)]; 496 else 497 pte = vtopte(p, btop(addr)); 498 /* 499 * Allocate some kernel PTEs and load them 500 */ 501 off = (int)addr & PGOFSET; 502 npf = btoc(bp->b_bcount + off); 503 s = splbio(); 504 while ((a = rmalloc(useriomap, npf)) == 0) { 505 usriowanted = 1; 506 sleep((caddr_t)useriomap, PSWP); 507 } 508 splx(s); 509 iopte = &Usriomap[a]; 510 addr = bp->b_un.b_addr = (caddr_t)(usrio + (a << PGSHIFT)) + off; 511 a = btop(addr); 512 while (npf--) { 513 mapin(iopte, a, pte->pg_pfnum, PG_V); 514 iopte++, pte++; 515 a++; 516 } 517 } 518 519 /* 520 * Free the io map PTEs associated with this IO operation. 521 * We also invalidate the TLB entries. 522 */ 523 vunmapbuf(bp) 524 register struct buf *bp; 525 { 526 register int a, npf; 527 register caddr_t addr = bp->b_un.b_addr; 528 register struct pte *pte; 529 int s; 530 531 if ((bp->b_flags & B_PHYS) == 0) 532 panic("vunmapbuf"); 533 a = (int)(addr - usrio) >> PGSHIFT; 534 npf = btoc(bp->b_bcount + ((int)addr & PGOFSET)); 535 s = splbio(); 536 rmfree(useriomap, npf, a); 537 if (usriowanted) { 538 usriowanted = 0; 539 wakeup((caddr_t)useriomap); 540 } 541 splx(s); 542 pte = &Usriomap[a]; 543 while (npf--) { 544 *(int *)pte = 0; 545 addr += NBPG; 546 pte++; 547 } 548 /* 549 * If we just completed a dirty page push, we must reconstruct 550 * the original b_addr since cleanup() needs it. 551 */ 552 if (bp->b_flags & B_DIRTY) { 553 a = ((bp - swbuf) * CLSIZE) * KLMAX; 554 bp->b_un.b_addr = (caddr_t)ctob(dptov(&proc[2], a)); 555 } 556 load_cr3(_cr3()); 557 } 558