1 /* 2 * Copyright (c) 1988 University of Utah. 3 * Copyright (c) 1982, 1986, 1990 The Regents of the University of California. 4 * All rights reserved. 5 * 6 * This code is derived from software contributed to Berkeley by 7 * the Systems Programming Group of the University of Utah Computer 8 * Science Department. 9 * 10 * %sccs.include.redist.c% 11 * 12 * from: Utah $Hdr: vm_machdep.c 1.18 89/08/23$ 13 * 14 * @(#)vm_machdep.c 7.3 (Berkeley) 05/25/90 15 */ 16 17 #include "param.h" 18 #include "systm.h" 19 #include "user.h" 20 #include "proc.h" 21 #include "cmap.h" 22 #include "vm.h" 23 #include "text.h" 24 #include "malloc.h" 25 #include "buf.h" 26 27 #include "cpu.h" 28 #include "pte.h" 29 30 /* 31 * Set a red zone in the kernel stack after the u. area. 32 * We don't support a redzone right now. It really isn't clear 33 * that it is a good idea since, if the kernel stack were to roll 34 * into a write protected page, the processor would lock up (since 35 * it cannot create an exception frame) and we would get no useful 36 * post-mortem info. Currently, under the DEBUG option, we just 37 * check at every clock interrupt to see if the current k-stack has 38 * gone too far (i.e. into the "redzone" page) and if so, panic. 39 * Look at _lev6intr in locore.s for more details. 40 */ 41 /*ARGSUSED*/ 42 setredzone(pte, vaddr) 43 struct pte *pte; 44 caddr_t vaddr; 45 { 46 } 47 48 /* 49 * Check for valid program size 50 * NB - Check data and data growth separately as they may overflow 51 * when summed together. 52 */ 53 chksize(ts, ids, uds, ss) 54 unsigned ts, ids, uds, ss; 55 { 56 extern unsigned maxtsize; 57 58 if (ctob(ts) > maxtsize || 59 ctob(ids) > u.u_rlimit[RLIMIT_DATA].rlim_cur || 60 ctob(uds) > u.u_rlimit[RLIMIT_DATA].rlim_cur || 61 ctob(ids + uds) > u.u_rlimit[RLIMIT_DATA].rlim_cur || 62 ctob(ss) > u.u_rlimit[RLIMIT_STACK].rlim_cur) { 63 u.u_error = ENOMEM; 64 return (1); 65 } 66 return (0); 67 } 68 69 /*ARGSUSED*/ 70 newptes(pte, v, size) 71 struct pte *pte; 72 u_int v; 73 register int size; 74 { 75 register caddr_t a; 76 77 #ifdef lint 78 pte = pte; 79 #endif 80 if (size >= 8) 81 TBIAU(); 82 else { 83 a = ptob(v); 84 while (size > 0) { 85 TBIS(a); 86 a += NBPG; 87 size--; 88 } 89 } 90 DCIU(); 91 } 92 93 /* 94 * Change protection codes of text segment. 95 * Have to flush translation buffer since this 96 * affect virtual memory mapping of current process. 97 */ 98 chgprot(addr, tprot) 99 caddr_t addr; 100 long tprot; 101 { 102 unsigned v; 103 int tp; 104 register struct pte *pte; 105 register struct cmap *c; 106 107 v = clbase(btop(addr)); 108 if (!isatsv(u.u_procp, v)) { 109 u.u_error = EFAULT; 110 return (0); 111 } 112 tp = vtotp(u.u_procp, v); 113 pte = tptopte(u.u_procp, tp); 114 if (pte->pg_fod == 0 && pte->pg_pfnum) { 115 c = &cmap[pgtocm(pte->pg_pfnum)]; 116 if (c->c_blkno) 117 munhash(c->c_vp, (daddr_t)(u_long)c->c_blkno); 118 } 119 *(u_int *)pte &= ~PG_PROT; 120 *(u_int *)pte |= tprot; 121 TBIS(addr); 122 return (1); 123 } 124 125 settprot(tprot) 126 long tprot; 127 { 128 register u_int *pte, i; 129 130 pte = (u_int *)u.u_procp->p_p0br; 131 for (i = 0; i < u.u_tsize; i++, pte++) { 132 *pte &= ~PG_PROT; 133 *pte |= tprot; 134 } 135 TBIAU(); 136 } 137 138 /* 139 * Simulate effect of VAX region length registers. 140 * The one case where we must do anything is if a region has shrunk. 141 * In that case we must invalidate all the PTEs for the no longer valid VAs. 142 */ 143 setptlr(region, nlen) 144 int nlen; 145 { 146 register struct pte *pte; 147 register int change; 148 int olen; 149 150 if (region == 0) { 151 olen = u.u_pcb.pcb_p0lr; 152 u.u_pcb.pcb_p0lr = nlen; 153 } else { 154 olen = P1PAGES - u.u_pcb.pcb_p1lr; 155 u.u_pcb.pcb_p1lr = nlen; 156 nlen = P1PAGES - nlen; 157 } 158 if ((change = olen - nlen) <= 0) 159 return; 160 if (region == 0) 161 pte = u.u_pcb.pcb_p0br + u.u_pcb.pcb_p0lr; 162 else 163 pte = u.u_pcb.pcb_p1br + u.u_pcb.pcb_p1lr - change; 164 do { 165 *(u_int *)pte++ = PG_NV; 166 } while (--change); 167 /* short cut newptes */ 168 TBIAU(); 169 DCIU(); 170 } 171 172 /* 173 * Map `size' bytes of physical memory starting at `paddr' into 174 * kernel VA space using PTEs starting at `pte'. Read/write and 175 * cache-inhibit status are specified by `prot'. 176 */ 177 physaccess(pte, paddr, size, prot) 178 register struct pte *pte; 179 caddr_t paddr; 180 register int size; 181 { 182 register u_int page; 183 184 page = (u_int)paddr & PG_FRAME; 185 for (size = btoc(size); size; size--) { 186 *(int *)pte = PG_V | prot | page; 187 page += NBPG; 188 pte++; 189 } 190 TBIAS(); 191 } 192 193 /* 194 * Move pages from one kernel virtual address to another. 195 * Both addresses are assumed to reside in the Sysmap, 196 * and size must be a multiple of CLSIZE. 197 */ 198 pagemove(from, to, size) 199 register caddr_t from, to; 200 int size; 201 { 202 register struct pte *fpte, *tpte; 203 204 if (size % CLBYTES) 205 panic("pagemove"); 206 fpte = kvtopte(from); 207 tpte = kvtopte(to); 208 while (size > 0) { 209 *tpte++ = *fpte; 210 *(int *)fpte++ = PG_NV; 211 TBIS(from); 212 TBIS(to); 213 from += NBPG; 214 to += NBPG; 215 size -= NBPG; 216 } 217 } 218 219 #ifdef KGDB 220 /* 221 * Change protections on kernel pages from addr to addr+size 222 * (presumably so debugger can plant a breakpoint). 223 * All addresses are assumed to reside in the Sysmap, 224 */ 225 chgkprot(addr, size, rw) 226 register caddr_t addr; 227 int size, rw; 228 { 229 register struct pte *pte; 230 231 pte = &Sysmap[btop(addr)]; 232 while (size > 0) { 233 pte->pg_prot = rw == B_WRITE? 0 : 1; 234 TBIS(addr); 235 addr += NBPG; 236 size -= NBPG; 237 pte++; 238 } 239 } 240 #endif 241 242 /* 243 * The probe[rw] routines should probably be redone in assembler 244 * for efficiency. 245 */ 246 prober(addr) 247 register u_int addr; 248 { 249 register int page; 250 register struct proc *p; 251 252 if (addr >= USRSTACK) 253 return(0); 254 #ifdef HPUXCOMPAT 255 if (ISHPMMADDR(addr)) 256 addr = HPMMBASEADDR(addr); 257 #endif 258 page = btop(addr); 259 p = u.u_procp; 260 if (page < dptov(p, p->p_dsize) || page > sptov(p, p->p_ssize)) 261 return(1); 262 #ifdef MAPMEM 263 if (page < dptov(p, p->p_dsize+p->p_mmsize) && 264 (*(int *)vtopte(p, page) & (PG_FOD|PG_V)) == (PG_FOD|PG_V)) 265 return(1); 266 #endif 267 return(0); 268 } 269 270 probew(addr) 271 register u_int addr; 272 { 273 register int page; 274 register struct proc *p; 275 276 if (addr >= USRSTACK) 277 return(0); 278 #ifdef HPUXCOMPAT 279 if (ISHPMMADDR(addr)) 280 addr = HPMMBASEADDR(addr); 281 #endif 282 page = btop(addr); 283 p = u.u_procp; 284 if (page < dptov(p, p->p_dsize) || page > sptov(p, p->p_ssize)) 285 return((*(int *)vtopte(p, page) & PG_PROT) == PG_RW); 286 #ifdef MAPMEM 287 if (page < dptov(p, p->p_dsize+p->p_mmsize)) 288 return((*(int *)vtopte(p, page) & (PG_FOD|PG_V|PG_PROT)) 289 == (PG_FOD|PG_V|PG_RW)); 290 #endif 291 return(0); 292 } 293 294 /* 295 * NB: assumes a physically contiguous kernel page table 296 * (makes life a LOT simpler). 297 */ 298 kernacc(addr, count, rw) 299 register caddr_t addr; 300 int count, rw; 301 { 302 register struct ste *ste; 303 register struct pte *pte; 304 register u_int ix, cnt; 305 extern long Syssize; 306 307 if (count <= 0) 308 return(0); 309 ix = ((int)addr & SG_IMASK) >> SG_ISHIFT; 310 cnt = (((int)addr + count + (1<<SG_ISHIFT)-1) & SG_IMASK) >> SG_ISHIFT; 311 cnt -= ix; 312 for (ste = &Sysseg[ix]; cnt; cnt--, ste++) 313 /* should check SG_PROT, but we have no RO segments now */ 314 if (ste->sg_v == 0) 315 return(0); 316 ix = btop(addr); 317 cnt = btop(addr+count+NBPG-1); 318 if (cnt > (u_int)&Syssize) 319 return(0); 320 cnt -= ix; 321 for (pte = &Sysmap[ix]; cnt; cnt--, pte++) 322 if (pte->pg_v == 0 || (rw == B_WRITE && pte->pg_prot == 1)) 323 return(0); 324 return(1); 325 } 326 327 useracc(addr, count, rw) 328 register caddr_t addr; 329 unsigned count; 330 { 331 register int (*func)(); 332 register u_int addr2; 333 extern int prober(), probew(); 334 335 if (count <= 0) 336 return(0); 337 addr2 = (u_int) addr; 338 addr += count; 339 func = (rw == B_READ) ? prober : probew; 340 do { 341 if ((*func)(addr2) == 0) 342 return(0); 343 addr2 = (addr2 + NBPG) & ~PGOFSET; 344 } while (addr2 < (u_int)addr); 345 return(1); 346 } 347 348 /* 349 * Convert kernel VA to physical address 350 */ 351 kvtop(addr) 352 register caddr_t addr; 353 { 354 register int pf; 355 356 pf = Sysmap[btop(addr)].pg_pfnum; 357 if (pf == 0) 358 panic("kvtop: zero page frame"); 359 return((u_int)ptob(pf) + ((int)addr & PGOFSET)); 360 } 361 362 struct ste * 363 vtoste(p, va) 364 register struct proc *p; 365 register u_int va; 366 { 367 register struct ste *ste; 368 369 ste = (struct ste *)((u_int)p->p_p0br + p->p_szpt * NBPG); 370 return(ste + ((va & SG_IMASK) >> SG_ISHIFT)); 371 } 372 373 initustp(p) 374 register struct proc *p; 375 { 376 return((int)Usrptmap[btokmx(p->p_p0br) + p->p_szpt].pg_pfnum); 377 } 378 379 /* 380 * Initialize segment table to reflect PTEs in Usrptmap. 381 * Segment table address is given by Usrptmap index of p_szpt. 382 */ 383 initsegt(p) 384 register struct proc *p; 385 { 386 register int i, k, sz; 387 register struct ste *ste; 388 extern struct ste *vtoste(); 389 390 k = btokmx(p->p_p0br); 391 ste = vtoste(p, 0); 392 /* text and data */ 393 sz = ctopt(p->p_tsize + p->p_dsize + p->p_mmsize); 394 for (i = 0; i < sz; i++, ste++) { 395 *(int *)ste = SG_RW | SG_V; 396 ste->sg_pfnum = Usrptmap[k++].pg_pfnum; 397 } 398 /* 399 * Bogus! The kernelmap may map unused PT pages 400 * (since we don't shrink PTs) so we need to skip over 401 * those STEs. We should really free the unused PT 402 * pages in expand(). 403 */ 404 sz += ctopt(p->p_ssize + HIGHPAGES); 405 if (sz < p->p_szpt) 406 k += p->p_szpt - sz; 407 /* hole */ 408 sz = NPTEPG - ctopt(p->p_ssize + HIGHPAGES); 409 for ( ; i < sz; i++, ste++) 410 *(int *)ste = SG_NV; 411 /* stack and u-area */ 412 sz = NPTEPG; 413 for ( ; i < sz; i++, ste++) { 414 *(int *)ste = SG_RW | SG_V; 415 ste->sg_pfnum = Usrptmap[k++].pg_pfnum; 416 } 417 } 418 419 /* 420 * Allocate/free cache-inhibited physical memory. 421 * Assumes that malloc returns page aligned memory for requests which are 422 * a multiple of the page size. Hence, size must be such a multiple. 423 */ 424 caddr_t 425 cialloc(sz) 426 int sz; 427 { 428 caddr_t kva; 429 register int npg, *pte; 430 431 if (sz & CLOFSET) 432 return(NULL); 433 kva = (caddr_t)malloc(sz, M_DEVBUF, M_NOWAIT); 434 if (kva) { 435 if (!claligned(kva)) 436 panic("cialloc"); 437 pte = (int *)kvtopte(kva); 438 npg = btoc(sz); 439 while (--npg >= 0) 440 *pte++ |= PG_CI; 441 TBIAS(); 442 } 443 return(kva); 444 } 445 446 cifree(kva, sz) 447 caddr_t kva; 448 int sz; 449 { 450 register int npg, *pte; 451 452 if (sz & CLOFSET) 453 panic("cifree"); 454 pte = (int *)kvtopte(kva); 455 npg = btoc(sz); 456 while (--npg >= 0) 457 *pte++ &= ~PG_CI; 458 TBIAS(); 459 free(kva, M_DEVBUF); 460 } 461 462 extern char usrio[]; 463 extern struct pte Usriomap[]; 464 struct map *useriomap; 465 int usriowanted; 466 467 /* 468 * Map an IO request into kernel virtual address space. Requests fall into 469 * one of five catagories: 470 * 471 * B_PHYS|B_UAREA: User u-area swap. 472 * Address is relative to start of u-area (p_addr). 473 * B_PHYS|B_PAGET: User page table swap. 474 * Address is a kernel VA in usrpt (Usrptmap). 475 * B_PHYS|B_DIRTY: Dirty page push. 476 * Address is a VA in proc2's address space. 477 * B_PHYS|B_PGIN: Kernel pagein of user pages. 478 * Address is VA in user's address space. 479 * B_PHYS: User "raw" IO request. 480 * Address is VA in user's address space. 481 * 482 * All requests are (re)mapped into kernel VA space via the useriomap 483 * (a name with only slightly more meaning than "kernelmap") 484 */ 485 vmapbuf(bp) 486 register struct buf *bp; 487 { 488 register int npf, a; 489 register caddr_t addr; 490 register struct pte *pte, *iopte; 491 register long flags = bp->b_flags; 492 struct proc *p; 493 int off, s; 494 495 if ((flags & B_PHYS) == 0) 496 panic("vmapbuf"); 497 /* 498 * Find PTEs for the area to be mapped 499 */ 500 p = flags&B_DIRTY ? &proc[2] : bp->b_proc; 501 addr = bp->b_un.b_addr; 502 if (flags & B_UAREA) 503 pte = &p->p_addr[btop(addr)]; 504 else if (flags & B_PAGET) 505 pte = &Usrptmap[btokmx((struct pte *)addr)]; 506 else 507 pte = vtopte(p, btop(addr)); 508 /* 509 * Allocate some kernel PTEs and load them 510 */ 511 off = (int)addr & PGOFSET; 512 npf = btoc(bp->b_bcount + off); 513 s = splbio(); 514 while ((a = rmalloc(useriomap, npf)) == 0) { 515 usriowanted = 1; 516 sleep((caddr_t)useriomap, PSWP); 517 } 518 splx(s); 519 iopte = &Usriomap[a]; 520 bp->b_saveaddr = bp->b_un.b_addr; 521 addr = bp->b_un.b_addr = (caddr_t)(usrio + (a << PGSHIFT)) + off; 522 while (npf--) { 523 mapin(iopte, (u_int)addr, pte->pg_pfnum, PG_CI|PG_RW|PG_V); 524 iopte++, pte++; 525 addr += NBPG; 526 } 527 } 528 529 /* 530 * Free the io map PTEs associated with this IO operation. 531 * We also invalidate the TLB entries and restore the original b_addr. 532 */ 533 vunmapbuf(bp) 534 register struct buf *bp; 535 { 536 register int a, npf; 537 register caddr_t addr = bp->b_un.b_addr; 538 register struct pte *pte; 539 int s; 540 541 if ((bp->b_flags & B_PHYS) == 0) 542 panic("vunmapbuf"); 543 a = (int)(addr - usrio) >> PGSHIFT; 544 npf = btoc(bp->b_bcount + ((int)addr & PGOFSET)); 545 s = splbio(); 546 rmfree(useriomap, npf, a); 547 if (usriowanted) { 548 usriowanted = 0; 549 wakeup((caddr_t)useriomap); 550 } 551 splx(s); 552 pte = &Usriomap[a]; 553 while (npf--) { 554 *(int *)pte = PG_NV; 555 TBIS((caddr_t)addr); 556 addr += NBPG; 557 pte++; 558 } 559 bp->b_un.b_addr = bp->b_saveaddr; 560 bp->b_saveaddr = NULL; 561 } 562