1 /*- 2 * Copyright (c) 1990 The Regents of the University of California. 3 * All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * the University of Utah, and William Jolitz. 7 * 8 * %sccs.include.386.c% 9 * 10 * @(#)vm_machdep.c 5.5 (Berkeley) 11/25/90 11 */ 12 13 /* 14 * Copyright (c) 1989, 1990 William F. Jolitz 15 */ 16 17 /* 18 * Copyright (c) 1988 University of Utah. 19 * All rights reserved. The Utah Software License Agreement 20 * specifies the terms and conditions for redistribution. 21 * 22 * Utah $Hdr: vm_machdep.c 1.16.1.1 89/06/23$ 23 */ 24 /* 25 * Copyright (c) 1982, 1986 Regents of the University of California. 26 * All rights reserved. The Berkeley software License Agreement 27 * specifies the terms and conditions for redistribution. 28 * 29 * @(#)vm_machdep.c 7.1 (Berkeley) 6/5/86 30 */ 31 32 #include "pte.h" 33 34 #include "param.h" 35 #include "systm.h" 36 #include "dir.h" 37 #include "user.h" 38 #include "proc.h" 39 #include "cmap.h" 40 #include "mount.h" 41 #include "vm.h" 42 #include "text.h" 43 44 #include "buf.h" 45 46 /* 47 * Set a red zone in the kernel stack after the u. area. 48 */ 49 setredzone(pte, vaddr) 50 u_short *pte; 51 caddr_t vaddr; 52 { 53 /* eventually do this by setting up an expand-down stack segment 54 for ss0: selector, allowing stack access down to top of u. 55 this means though that protection violations need to be handled 56 thru a double fault exception that must do an integral task 57 switch to a known good context, within which a dump can be 58 taken. a sensible scheme might be to save the initial context 59 used by sched (that has physical memory mapped 1:1 at bottom) 60 and take the dump while still in mapped mode */ 61 } 62 63 /* 64 * Check for valid program size 65 * NB - Check data and data growth separately as they may overflow 66 * when summed together. 67 */ 68 chksize(ts, ids, uds, ss) 69 unsigned ts, ids, uds, ss; 70 { 71 extern unsigned maxtsize; 72 73 if (ctob(ts) > maxtsize || 74 ctob(ids) > u.u_rlimit[RLIMIT_DATA].rlim_cur || 75 ctob(uds) > u.u_rlimit[RLIMIT_DATA].rlim_cur || 76 ctob(ids + uds) > u.u_rlimit[RLIMIT_DATA].rlim_cur || 77 ctob(ss) > u.u_rlimit[RLIMIT_STACK].rlim_cur) { 78 u.u_error = ENOMEM; 79 return (1); 80 } 81 return (0); 82 } 83 84 /*ARGSUSED*/ 85 newptes(pte, v, size) 86 struct pte *pte; 87 u_int v; 88 register int size; 89 { 90 register caddr_t a; 91 92 #ifdef lint 93 pte = pte; 94 #endif 95 load_cr3(u.u_pcb.pcb_ptd); 96 } 97 98 /* 99 * Change protection codes of text segment. 100 * Have to flush translation buffer since this 101 * affect virtual memory mapping of current process. 102 */ 103 chgprot(addr, tprot) 104 caddr_t addr; 105 long tprot; 106 { 107 unsigned v; 108 int tp; 109 register struct pte *pte; 110 register struct cmap *c; 111 112 v = clbase(btop(addr)); 113 if (!isatsv(u.u_procp, v)) { 114 u.u_error = EFAULT; 115 return (0); 116 } 117 tp = vtotp(u.u_procp, v); 118 pte = tptopte(u.u_procp, tp); 119 if (pte->pg_fod == 0 && pte->pg_pfnum) { 120 c = &cmap[pgtocm(pte->pg_pfnum)]; 121 if (c->c_blkno && c->c_mdev != MSWAPX) 122 munhash(mount[c->c_mdev].m_dev, 123 (daddr_t)(u_long)c->c_blkno); 124 } 125 *(u_int *)pte &= ~PG_PROT; 126 *(u_int *)pte |= tprot; 127 load_cr3(u.u_pcb.pcb_ptd); 128 return (1); 129 } 130 131 settprot(tprot) 132 long tprot; 133 { 134 register u_int *ptaddr, i; 135 136 ptaddr = (u_int *)u.u_procp->p_p0br; 137 for (i = 0; i < u.u_tsize; i++) { 138 ptaddr[i] &= ~PG_PROT; 139 ptaddr[i] |= tprot; 140 } 141 load_cr3(u.u_pcb.pcb_ptd); 142 } 143 144 /* 145 * Simulate effect of VAX region length registers. 146 * The one case where we must do anything is if a region has shrunk. 147 * In that case we must invalidate all the PTEs for the no longer valid VAs. 148 */ 149 setptlr(region, nlen) 150 int nlen; 151 { 152 register struct pte *pte; 153 register int change; 154 int olen; 155 156 if (region == 0) { 157 olen = u.u_pcb.pcb_p0lr; 158 u.u_pcb.pcb_p0lr = nlen; 159 } else { 160 olen = P1PAGES - u.u_pcb.pcb_p1lr; 161 u.u_pcb.pcb_p1lr = nlen; 162 nlen = P1PAGES - nlen; 163 } 164 if ((change = olen - nlen) <= 0) 165 return; 166 if (region == 0) 167 pte = u.u_pcb.pcb_p0br + u.u_pcb.pcb_p0lr; 168 else 169 pte = u.u_pcb.pcb_p1br + u.u_pcb.pcb_p1lr - change; 170 do { 171 *(u_int *)pte++ = 0; 172 } while (--change); 173 /* short cut newptes */ 174 load_cr3(u.u_pcb.pcb_ptd); 175 } 176 177 /* 178 * Map `size' bytes of physical memory starting at `paddr' into 179 * kernel VA space using PTEs starting at `pte'. Read/write and 180 * cache-inhibit status are specified by `prot'. 181 */ 182 physaccess(pte, paddr, size, prot) 183 register struct pte *pte; 184 caddr_t paddr; 185 register int size; 186 { 187 register u_int page; 188 189 page = (u_int)paddr & PG_FRAME; 190 for (size = btoc(size); size; size--) { 191 *(int *)pte = PG_V | prot | page; 192 page += NBPG; 193 pte++; 194 } 195 load_cr3(u.u_pcb.pcb_ptd); 196 } 197 198 /* 199 * Move pages from one kernel virtual address to another. 200 * Both addresses are assumed to reside in the Sysmap, 201 * and size must be a multiple of CLSIZE. 202 */ 203 pagemove(from, to, size) 204 register caddr_t from, to; 205 int size; 206 { 207 register struct pte *fpte, *tpte; 208 209 if (size % CLBYTES) 210 panic("pagemove"); 211 fpte = &Sysmap[btop(from-0xfe000000)]; 212 tpte = &Sysmap[btop(to-0xfe000000)]; 213 while (size > 0) { 214 *tpte++ = *fpte; 215 *(int *)fpte++ = 0; 216 from += NBPG; 217 to += NBPG; 218 size -= NBPG; 219 } 220 load_cr3(u.u_pcb.pcb_ptd); 221 } 222 223 /* 224 * The probe[rw] routines should probably be redone in assembler 225 * for efficiency. 226 */ 227 prober(addr) 228 register u_int addr; 229 { 230 register int page; 231 register struct proc *p; 232 233 if (addr >= USRSTACK) 234 return(0); 235 p = u.u_procp; 236 page = btop(addr); 237 if (page < dptov(p, p->p_dsize) || page > sptov(p, p->p_ssize)) 238 return(1); 239 return(0); 240 } 241 242 probew(addr) 243 register u_int addr; 244 { 245 register int page; 246 register struct proc *p; 247 248 if (addr >= USRSTACK) 249 return(0); 250 p = u.u_procp; 251 page = btop(addr); 252 if (page < dptov(p, p->p_dsize) || page > sptov(p, p->p_ssize)) 253 return((*(int *)vtopte(p, page) & PG_PROT) == PG_UW); 254 return(0); 255 } 256 257 /* 258 * NB: assumes a physically contiguous kernel page table 259 * (makes life a LOT simpler). 260 */ 261 kernacc(addr, count, rw) 262 register u_int addr; 263 int count, rw; 264 { 265 register struct pde *pde; 266 register struct pte *pte; 267 register int ix, cnt; 268 extern long Syssize; 269 270 if (count <= 0) 271 return(0); 272 pde = (struct pde *)((u_int)u.u_procp->p_p0br + u.u_procp->p_szpt * NBPG); 273 ix = (addr & PD_MASK) >> PD_SHIFT; 274 cnt = ((addr + count + (1 << PD_SHIFT) - 1) & PD_MASK) >> PD_SHIFT; 275 cnt -= ix; 276 for (pde += ix; cnt; cnt--, pde++) 277 if (pde->pd_v == 0) 278 return(0); 279 ix = btop(addr-0xfe000000); 280 cnt = btop(addr-0xfe000000+count+NBPG-1); 281 if (cnt > (int)&Syssize) 282 return(0); 283 cnt -= ix; 284 for (pte = &Sysmap[ix]; cnt; cnt--, pte++) 285 if (pte->pg_v == 0 /*|| (rw == B_WRITE && pte->pg_prot == 1)*/) 286 return(0); 287 return(1); 288 } 289 290 useracc(addr, count, rw) 291 register u_int addr; 292 int count, rw; 293 { 294 register int (*func)(); 295 register u_int addr2; 296 extern int prober(), probew(); 297 298 if (count <= 0) 299 return(0); 300 addr2 = addr; 301 addr += count; 302 func = (rw == B_READ) ? prober : probew; 303 do { 304 if ((*func)(addr2) == 0) 305 return(0); 306 addr2 = (addr2 + NBPG) & ~PGOFSET; 307 } while (addr2 < addr); 308 return(1); 309 } 310 311 /* 312 * Convert kernel VA to physical address 313 */ 314 kvtop(addr) 315 register u_int addr; 316 { 317 register int pf; 318 319 pf = Sysmap[btop(addr-0xfe000000)].pg_pfnum; 320 if (pf == 0) 321 panic("kvtop: zero page frame"); 322 return((u_int)ptob(pf) + (addr & PGOFSET)); 323 } 324 325 struct pde * 326 vtopde(p, va) 327 register struct proc *p; 328 register u_int va; 329 { 330 register struct pde *pde; 331 332 pde = (struct pde *)((u_int)p->p_p0br + p->p_szpt * NBPG); 333 return(pde + ((va & PD_MASK) >> PD_SHIFT)); 334 } 335 336 337 initcr3(p) 338 register struct proc *p; 339 { 340 return(ctob(Usrptmap[btokmx(p->p_p0br+p->p_szpt*NPTEPG)].pg_pfnum)); 341 /*return((int)Usrptmap[btokmx(p->p_p0br) + p->p_szpt].pg_pfnum);*/ 342 } 343 344 /* 345 * Initialize page directory table to reflect PTEs in Usrptmap. 346 * Page directory table address is given by Usrptmap index of p_szpt. 347 * [used by vgetpt for kernal mode entries, and ptexpand for user mode entries] 348 */ 349 initpdt(p) 350 register struct proc *p; 351 { 352 register int i, k, sz; 353 register struct pde *pde, *toppde; 354 extern struct pde *vtopde(); 355 extern Sysbase; 356 357 /* clear entire map */ 358 pde = vtopde(p, 0); 359 /*bzero(pde, NBPG); */ 360 /* map kernel */ 361 pde = vtopde(p, &Sysbase); 362 for (i = 0; i < 5; i++, pde++) { 363 *(int *)pde = PG_UW | PG_V; 364 pde->pd_pfnum = btoc((unsigned) Sysmap & ~0xfe000000)+i; 365 } 366 /* map u dot */ 367 pde = vtopde(p, &u); 368 *(int *)pde = PG_UW | PG_V; 369 pde->pd_pfnum = Usrptmap[btokmx(p->p_addr)].pg_pfnum; 370 371 /* otherwise, fill in user map */ 372 k = btokmx(p->p_p0br); 373 pde = vtopde(p, 0); 374 toppde = vtopde(p, &u); 375 376 /* text and data */ 377 sz = ctopt(p->p_tsize + p->p_dsize); 378 for (i = 0; i < sz; i++, pde++) { 379 *(int *)pde = PG_UW | PG_V; 380 pde->pd_pfnum = Usrptmap[k++].pg_pfnum; 381 } 382 /* 383 * Bogus! The kernelmap may map unused PT pages 384 * (since we don't shrink PTs) so we need to skip over 385 * those PDEs. We should really free the unused PT 386 * pages in expand(). 387 */ 388 sz += ctopt(p->p_ssize+UPAGES); 389 if (sz < p->p_szpt) 390 k += p->p_szpt - sz; 391 /* hole */ 392 sz = NPTEPG - ctopt(p->p_ssize + UPAGES + btoc(&Sysbase)); 393 for ( ; i < sz; i++, pde++) 394 *(int *)pde = 0; 395 /* stack and u-area */ 396 sz = NPTEPG - ctopt(UPAGES + btoc(&Sysbase)); 397 for ( ; i < sz; i++, pde++) { 398 *(int *)pde = PG_UW | PG_V; 399 pde->pd_pfnum = Usrptmap[k++].pg_pfnum; 400 } 401 return(initcr3(p)); 402 } 403 404 #ifdef notdef 405 /* 406 * Allocate wired-down, non-paged, cache-inhibited pages in kernel 407 * virtual memory and clear them 408 */ 409 caddr_t 410 cimemall(n) 411 int n; 412 { 413 register int npg, a; 414 register struct pte *pte; 415 extern struct map *kernelmap; 416 417 npg = clrnd(btoc(n)); 418 a = rmalloc(kernelmap, (long)npg); 419 if (a == 0) 420 return ((caddr_t)0); 421 pte = &Usrptmap[a]; 422 (void) vmemall(pte, npg, &proc[0], CSYS); 423 while (--npg >= 0) { 424 *(int *)pte |= (PG_V|PG_KW|PG_CI); 425 clearseg((unsigned)pte->pg_pfnum); 426 pte++; 427 } 428 TBIAS(); 429 return ((caddr_t)kmxtob(a)); 430 } 431 #endif 432 433 extern char usrio[]; 434 extern struct pte Usriomap[]; 435 struct map *useriomap; 436 int usriowanted; 437 438 /* 439 * Map an IO request into kernel virtual address space. Requests fall into 440 * one of five catagories: 441 * 442 * B_PHYS|B_UAREA: User u-area swap. 443 * Address is relative to start of u-area (p_addr). 444 * B_PHYS|B_PAGET: User page table swap. 445 * Address is a kernel VA in usrpt (Usrptmap). 446 * B_PHYS|B_DIRTY: Dirty page push. 447 * Address is a VA in proc2's address space. 448 * B_PHYS|B_PGIN: Kernel pagein of user pages. 449 * Address is VA in user's address space. 450 * B_PHYS: User "raw" IO request. 451 * Address is VA in user's address space. 452 * 453 * All requests are (re)mapped into kernel VA space via the useriomap 454 * (a name with only slightly more meaning than "kernelmap") 455 */ 456 vmapbuf(bp) 457 register struct buf *bp; 458 { 459 register int npf, a; 460 register caddr_t addr; 461 register struct pte *pte, *iopte; 462 register long flags = bp->b_flags; 463 struct proc *p; 464 int off, s; 465 466 if ((flags & B_PHYS) == 0) 467 panic("vmapbuf"); 468 /* 469 * Find PTEs for the area to be mapped 470 */ 471 p = flags&B_DIRTY ? &proc[2] : bp->b_proc; 472 addr = bp->b_un.b_addr; 473 if (flags & B_UAREA) 474 pte = &p->p_addr[btop(addr)]; 475 else if (flags & B_PAGET) 476 pte = &Usrptmap[btokmx((struct pte *)addr)]; 477 else 478 pte = vtopte(p, btop(addr)); 479 /* 480 * Allocate some kernel PTEs and load them 481 */ 482 off = (int)addr & PGOFSET; 483 npf = btoc(bp->b_bcount + off); 484 s = splbio(); 485 while ((a = rmalloc(useriomap, npf)) == 0) { 486 usriowanted = 1; 487 sleep((caddr_t)useriomap, PSWP); 488 } 489 splx(s); 490 iopte = &Usriomap[a]; 491 addr = bp->b_un.b_addr = (caddr_t)(usrio + (a << PGSHIFT)) + off; 492 a = btop(addr); 493 while (npf--) { 494 mapin(iopte, a, pte->pg_pfnum, PG_V); 495 iopte++, pte++; 496 a++; 497 } 498 load_cr3(u.u_pcb.pcb_ptd); 499 } 500 501 /* 502 * Free the io map PTEs associated with this IO operation. 503 * We also invalidate the TLB entries. 504 */ 505 vunmapbuf(bp) 506 register struct buf *bp; 507 { 508 register int a, npf; 509 register caddr_t addr = bp->b_un.b_addr; 510 register struct pte *pte; 511 int s; 512 513 if ((bp->b_flags & B_PHYS) == 0) 514 panic("vunmapbuf"); 515 a = (int)(addr - usrio) >> PGSHIFT; 516 npf = btoc(bp->b_bcount + ((int)addr & PGOFSET)); 517 s = splbio(); 518 rmfree(useriomap, npf, a); 519 if (usriowanted) { 520 usriowanted = 0; 521 wakeup((caddr_t)useriomap); 522 } 523 splx(s); 524 pte = &Usriomap[a]; 525 while (npf--) { 526 *(int *)pte = 0; 527 addr += NBPG; 528 pte++; 529 } 530 /* 531 * If we just completed a dirty page push, we must reconstruct 532 * the original b_addr since cleanup() needs it. 533 */ 534 if (bp->b_flags & B_DIRTY) { 535 a = ((bp - swbuf) * CLSIZE) * KLMAX; 536 bp->b_un.b_addr = (caddr_t)ctob(dptov(&proc[2], a)); 537 } 538 load_cr3(u.u_pcb.pcb_ptd); 539 } 540