1 /* 2 * Copyright (c) 1992 The Regents of the University of California. 3 * All rights reserved. 4 * 5 * This software was developed by the Computer Systems Engineering group 6 * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and 7 * contributed to Berkeley. 8 * 9 * All advertising materials mentioning features or use of this software 10 * must display the following acknowledgement: 11 * This product includes software developed by the University of 12 * California, Lawrence Berkeley Laboratory. 13 * 14 * %sccs.include.redist.c% 15 * 16 * @(#)pmap.c 7.4 (Berkeley) 04/20/93 17 * 18 * from: $Header: pmap.c,v 1.39 93/04/20 11:17:12 torek Exp $ 19 */ 20 21 /* 22 * SPARC physical map management code. 23 * Does not function on multiprocessors (yet). 24 */ 25 26 #include <sys/param.h> 27 #include <sys/systm.h> 28 #include <sys/device.h> 29 #include <sys/proc.h> 30 #include <sys/malloc.h> 31 32 #include <vm/vm.h> 33 #include <vm/vm_kern.h> 34 #include <vm/vm_prot.h> 35 #include <vm/vm_page.h> 36 37 #include <machine/autoconf.h> 38 #include <machine/bsd_openprom.h> 39 #include <machine/cpu.h> 40 #include <machine/ctlreg.h> 41 42 #include <sparc/sparc/asm.h> 43 #include <sparc/sparc/cache.h> 44 45 #ifdef DEBUG 46 #define PTE_BITS "\20\40V\37W\36S\35NC\33IO\32U\31M" 47 #endif 48 49 extern struct promvec *promvec; 50 51 /* 52 * The SPARCstation offers us the following challenges: 53 * 54 * 1. A virtual address cache. This is, strictly speaking, not 55 * part of the architecture, but the code below assumes one. 56 * This is a write-through cache on the 4c and a write-back cache 57 * on others. 58 * 59 * 2. An MMU that acts like a cache. There is not enough space 60 * in the MMU to map everything all the time. Instead, we need 61 * to load MMU with the `working set' of translations for each 62 * process. 63 * 64 * 3. Segmented virtual and physical spaces. The upper 12 bits of 65 * a virtual address (the virtual segment) index a segment table, 66 * giving a physical segment. The physical segment selects a 67 * `Page Map Entry Group' (PMEG) and the virtual page number---the 68 * next 5 or 6 bits of the virtual address---select the particular 69 * `Page Map Entry' for the page. We call the latter a PTE and 70 * call each Page Map Entry Group a pmeg (for want of a better name). 71 * 72 * Since there are no valid bits in the segment table, the only way 73 * to have an invalid segment is to make one full pmeg of invalid PTEs. 74 * We use the last one (since the ROM does as well). 75 * 76 * 4. Discontiguous physical pages. The Mach VM expects physical pages 77 * to be in one sequential lump. 78 * 79 * 5. The MMU is always on: it is not possible to disable it. This is 80 * mainly a startup hassle. 81 */ 82 83 struct pmap_stats { 84 int ps_unlink_pvfirst; /* # of pv_unlinks on head */ 85 int ps_unlink_pvsearch; /* # of pv_unlink searches */ 86 int ps_changeprots; /* # of calls to changeprot */ 87 int ps_useless_changeprots; /* # of changeprots for wiring */ 88 int ps_enter_firstpv; /* pv heads entered */ 89 int ps_enter_secondpv; /* pv nonheads entered */ 90 int ps_useless_changewire; /* useless wiring changes */ 91 int ps_npg_prot_all; /* # of active pages protected */ 92 int ps_npg_prot_actual; /* # pages actually affected */ 93 } pmap_stats; 94 95 #ifdef DEBUG 96 #define PDB_CREATE 0x0001 97 #define PDB_DESTROY 0x0002 98 #define PDB_REMOVE 0x0004 99 #define PDB_CHANGEPROT 0x0008 100 #define PDB_ENTER 0x0010 101 102 #define PDB_MMU_ALLOC 0x0100 103 #define PDB_MMU_STEAL 0x0200 104 #define PDB_CTX_ALLOC 0x0400 105 #define PDB_CTX_STEAL 0x0800 106 int pmapdebug = 0x0; 107 #endif 108 109 #define splpmap() splbio() 110 111 /* 112 * First and last managed physical addresses. 113 */ 114 #if 0 115 vm_offset_t vm_first_phys, vm_last_phys; 116 #define managed(pa) ((pa) >= vm_first_phys && (pa) < vm_last_phys) 117 #else 118 vm_offset_t vm_first_phys, vm_num_phys; 119 #define managed(pa) ((unsigned)((pa) - vm_first_phys) < vm_num_phys) 120 #endif 121 122 /* 123 * For each managed physical page, there is a list of all currently 124 * valid virtual mappings of that page. Since there is usually one 125 * (or zero) mapping per page, the table begins with an initial entry, 126 * rather than a pointer; this head entry is empty iff its pv_pmap 127 * field is NULL. 128 * 129 * Note that these are per machine independent page (so there may be 130 * only one for every two hardware pages, e.g.). Since the virtual 131 * address is aligned on a page boundary, the low order bits are free 132 * for storing flags. Only the head of each list has flags. 133 * 134 * THIS SHOULD BE PART OF THE CORE MAP 135 */ 136 struct pvlist { 137 struct pvlist *pv_next; /* next pvlist, if any */ 138 struct pmap *pv_pmap; /* pmap of this va */ 139 int pv_va; /* virtual address */ 140 int pv_flags; /* flags (below) */ 141 }; 142 143 /* 144 * Flags in pv_flags. Note that PV_MOD must be 1 and PV_REF must be 2 145 * since they must line up with the bits in the hardware PTEs (see pte.h). 146 */ 147 #define PV_MOD 1 /* page modified */ 148 #define PV_REF 2 /* page referenced */ 149 #define PV_NC 4 /* page cannot be cached */ 150 /*efine PV_ALLF 7 ** all of the above */ 151 152 struct pvlist *pv_table; /* array of entries, one per physical page */ 153 154 #define pvhead(pa) (&pv_table[atop((pa) - vm_first_phys)]) 155 156 /* 157 * Each virtual segment within each pmap is either valid or invalid. 158 * It is valid if pm_npte[VA_VSEG(va)] is not 0. This does not mean 159 * it is in the MMU, however; that is true iff pm_segmap[VA_VSEG(va)] 160 * does not point to the invalid PMEG. 161 * 162 * If a virtual segment is valid and loaded, the correct PTEs appear 163 * in the MMU only. If it is valid and unloaded, the correct PTEs appear 164 * in the pm_pte[VA_VSEG(va)] only. However, some effort is made to keep 165 * the software copies consistent enough with the MMU so that libkvm can 166 * do user address translations. In particular, pv_changepte() and 167 * pmap_enu() maintain consistency, while less critical changes are 168 * not maintained. pm_pte[VA_VSEG(va)] always points to space for those 169 * PTEs, unless this is the kernel pmap, in which case pm_pte[x] is not 170 * used (sigh). 171 * 172 * Each PMEG in the MMU is either free or contains PTEs corresponding to 173 * some pmap and virtual segment. If it contains some PTEs, it also contains 174 * reference and modify bits that belong in the pv_table. If we need 175 * to steal a PMEG from some process (if we need one and none are free) 176 * we must copy the ref and mod bits, and update pm_segmap in the other 177 * pmap to show that its virtual segment is no longer in the MMU. 178 * 179 * There are 128 PMEGs in a small Sun-4, of which only a few dozen are 180 * tied down permanently, leaving `about' 100 to be spread among 181 * running processes. These are managed as an LRU cache. Before 182 * calling the VM paging code for a user page fault, the fault handler 183 * calls mmu_load(pmap, va) to try to get a set of PTEs put into the 184 * MMU. mmu_load will check the validity of the segment and tell whether 185 * it did something. 186 * 187 * Since I hate the name PMEG I call this data structure an `mmu entry'. 188 * Each mmuentry is on exactly one of three `usage' lists: free, LRU, 189 * or locked. The LRU list is for user processes; the locked list is 190 * for kernel entries; both are doubly linked queues headed by `mmuhd's. 191 * The free list is a simple list, headed by a free list pointer. 192 */ 193 struct mmuhd { 194 struct mmuentry *mh_next; 195 struct mmuentry *mh_prev; 196 }; 197 struct mmuentry { 198 struct mmuentry *me_next; /* queue (MUST BE FIRST) or next free */ 199 struct mmuentry *me_prev; /* queue (MUST BE FIRST) */ 200 struct pmap *me_pmap; /* pmap, if in use */ 201 struct mmuentry *me_pmforw; /* pmap pmeg chain */ 202 struct mmuentry **me_pmback; /* pmap pmeg chain */ 203 u_short me_vseg; /* virtual segment number in pmap */ 204 pmeg_t me_pmeg; /* hardware PMEG number */ 205 }; 206 struct mmuentry *mmuentry; /* allocated in pmap_bootstrap */ 207 208 struct mmuentry *me_freelist; /* free list (not a queue) */ 209 struct mmuhd me_lru = { /* LRU (user) entries */ 210 (struct mmuentry *)&me_lru, (struct mmuentry *)&me_lru 211 }; 212 struct mmuhd me_locked = { /* locked (kernel) entries */ 213 (struct mmuentry *)&me_locked, (struct mmuentry *)&me_locked 214 }; 215 216 int seginval; /* the invalid segment number */ 217 218 /* 219 * A context is simply a small number that dictates which set of 4096 220 * segment map entries the MMU uses. The Sun 4c has eight such sets. 221 * These are alloted in an `almost MRU' fashion. 222 * 223 * Each context is either free or attached to a pmap. 224 * 225 * Since the virtual address cache is tagged by context, when we steal 226 * a context we have to flush (that part of) the cache. 227 */ 228 union ctxinfo { 229 union ctxinfo *c_nextfree; /* free list (if free) */ 230 struct pmap *c_pmap; /* pmap (if busy) */ 231 }; 232 union ctxinfo *ctxinfo; /* allocated at in pmap_bootstrap */ 233 int ncontext; 234 235 union ctxinfo *ctx_freelist; /* context free list */ 236 int ctx_kick; /* allocation rover when none free */ 237 int ctx_kickdir; /* ctx_kick roves both directions */ 238 239 /* XXX need per-cpu vpage[]s (and vmempage, unless we lock in /dev/mem) */ 240 caddr_t vpage[2]; /* two reserved MD virtual pages */ 241 caddr_t vmempage; /* one reserved MI vpage for /dev/mem */ 242 caddr_t vdumppages; /* 32KB worth of reserved dump pages */ 243 244 struct kpmap kernel_pmap_store; /* the kernel's pmap */ 245 246 /* 247 * We need to know real physical memory ranges (for /dev/mem). 248 */ 249 #define MA_SIZE 32 /* size of memory descriptor arrays */ 250 struct memarr pmemarr[MA_SIZE];/* physical memory regions */ 251 int npmemarr; /* number of entries in pmemarr */ 252 253 /* 254 * The following four global variables are set in pmap_bootstrap 255 * for the vm code to find. This is Wrong. 256 */ 257 vm_offset_t avail_start; /* first free physical page number */ 258 vm_offset_t avail_end; /* last free physical page number */ 259 vm_offset_t virtual_avail; /* first free virtual page number */ 260 vm_offset_t virtual_end; /* last free virtual page number */ 261 262 /* 263 * pseudo-functions for mnemonic value 264 #ifdef notyet 265 * NB: setsegmap should be stba for 4c, but stha works and makes the 266 * code right for the Sun-4 as well. 267 #endif 268 */ 269 #define getcontext() lduba(AC_CONTEXT, ASI_CONTROL) 270 #define setcontext(c) stba(AC_CONTEXT, ASI_CONTROL, c) 271 #ifdef notyet 272 #define getsegmap(va) lduha(va, ASI_SEGMAP) 273 #define setsegmap(va, pmeg) stha(va, ASI_SEGMAP, pmeg) 274 #else 275 #define getsegmap(va) lduba(va, ASI_SEGMAP) 276 #define setsegmap(va, pmeg) stba(va, ASI_SEGMAP, pmeg) 277 #endif 278 279 #define getpte(va) lda(va, ASI_PTE) 280 #define setpte(va, pte) sta(va, ASI_PTE, pte) 281 282 /*----------------------------------------------------------------*/ 283 284 #ifdef sun4c 285 /* 286 * Translations from dense (contiguous) pseudo physical addresses 287 * (fed to the VM code, to keep it happy) to sparse (real, hardware) 288 * physical addresses. We call the former `software' page frame 289 * numbers and the latter `hardware' page frame numbers. The 290 * translation is done on a `per bank' basis. 291 * 292 * The HWTOSW and SWTOHW macros handle the actual translation. 293 * They are defined as no-ops on Sun-4s. 294 * 295 * SHOULD DO atop AND ptoa DIRECTLY IN THESE MACROS SINCE ALL CALLERS 296 * ALWAYS NEED THAT ANYWAY ... CAN JUST PRECOOK THE TABLES (TODO) 297 * 298 * Since we cannot use the memory allocated to the ROM monitor, and 299 * this happens to be just under 64K, I have chosen a bank size of 300 * 64K. This is necessary since all banks must be completely full. 301 * I have also chosen a physical memory limit of 128 MB. The 4c is 302 * architecturally limited to 256 MB, but 128 MB is more than will 303 * fit on present hardware. 304 * 305 * XXX FIX THIS: just make all of each bank available and then 306 * take out the pages reserved to the monitor!! 307 */ 308 #define MAXMEM (128 * 1024 * 1024) /* no more than 128 MB phys mem */ 309 #define NPGBANK 16 /* 2^4 pages per bank (64K / bank) */ 310 #define BSHIFT 4 /* log2(NPGBANK) */ 311 #define BOFFSET (NPGBANK - 1) 312 #define BTSIZE (MAXMEM / NBPG / NPGBANK) 313 314 int pmap_dtos[BTSIZE]; /* dense to sparse */ 315 int pmap_stod[BTSIZE]; /* sparse to dense */ 316 317 #define HWTOSW(pg) (pmap_stod[(pg) >> BSHIFT] | ((pg) & BOFFSET)) 318 #define SWTOHW(pg) (pmap_dtos[(pg) >> BSHIFT] | ((pg) & BOFFSET)) 319 320 #ifdef DEBUG 321 struct memarr pmap_ama[MA_SIZE]; 322 int pmap_nama; 323 #define ama pmap_ama 324 #endif 325 326 /* 327 * init_translations sets up pmap_dtos[] and pmap_stod[], and 328 * returns the number of usable physical pages. 329 */ 330 int 331 init_translations() 332 { 333 register struct memarr *mp; 334 register int n, nmem; 335 register u_int vbank = 0, pbank, v, a; 336 register u_int pages = 0, lost = 0; 337 #ifndef DEBUG 338 struct memarr ama[MA_SIZE]; /* available memory array */ 339 #endif 340 341 nmem = makememarr(ama, MA_SIZE, MEMARR_AVAILPHYS); 342 #ifdef DEBUG 343 pmap_nama = nmem; 344 #endif 345 for (mp = ama; --nmem >= 0; mp++) { 346 a = mp->addr >> PGSHIFT; 347 v = mp->len >> PGSHIFT; 348 if ((n = a & BOFFSET) != 0) { 349 /* round up to next bank */ 350 n = NPGBANK - n; 351 if (v < n) { /* not a whole bank: skip it */ 352 lost += v; 353 continue; 354 } 355 lost += n; /* lose n pages from front */ 356 a += n; 357 v -= n; 358 } 359 n = v >> BSHIFT; /* calculate number of banks */ 360 pbank = a >> BSHIFT; /* and the bank itself */ 361 if (pbank + n >= BTSIZE) 362 n = BTSIZE - pbank; 363 pages += n; /* off by a factor of 2^BSHIFT */ 364 lost += v - (n << BSHIFT); 365 while (--n >= 0) { 366 pmap_dtos[vbank] = pbank << BSHIFT; 367 pmap_stod[pbank] = vbank << BSHIFT; 368 pbank++; 369 vbank++; 370 } 371 } 372 /* adjust page count */ 373 pages <<= BSHIFT; 374 #ifdef DEBUG 375 printf("note: lost %d pages in translation\n", lost); 376 #endif 377 return (pages); 378 } 379 380 #else /* sun4c */ 381 382 /* 383 * Pages are physically contiguous, and hardware PFN == software PFN. 384 * 385 * XXX assumes PAGE_SIZE == NBPG (???) 386 */ 387 #define HWTOSW(pg) (pg) 388 #define SWTOHW(pg) (pg) 389 390 #endif /* sun4c */ 391 392 /* update pv_flags given a valid pte */ 393 #define MR(pte) (((pte) >> PG_M_SHIFT) & (PV_MOD | PV_REF)) 394 395 /*----------------------------------------------------------------*/ 396 397 /* 398 * Agree with the monitor ROM as to how many MMU entries are 399 * to be reserved, and map all of its segments into all contexts. 400 * 401 * Unfortunately, while the Version 0 PROM had a nice linked list of 402 * taken virtual memory, the Version 2 PROM provides instead a convoluted 403 * description of *free* virtual memory. Rather than invert this, we 404 * resort to two magic constants from the PROM vector description file. 405 */ 406 int 407 mmu_reservemon(nmmu) 408 register int nmmu; 409 { 410 register u_int va, eva; 411 register int mmuseg, i; 412 413 va = OPENPROM_STARTVADDR; 414 eva = OPENPROM_ENDVADDR; 415 while (va < eva) { 416 mmuseg = getsegmap(va); 417 if (mmuseg < nmmu) 418 nmmu = mmuseg; 419 for (i = ncontext; --i > 0;) 420 (*promvec->pv_setctxt)(i, (caddr_t)va, mmuseg); 421 if (mmuseg == seginval) { 422 va += NBPSG; 423 continue; 424 } 425 /* PROM maps its memory user-accessible: fix it. */ 426 for (i = NPTESG; --i >= 0; va += NBPG) 427 setpte(va, getpte(va) | PG_S); 428 } 429 return (nmmu); 430 } 431 432 /* 433 * TODO: agree with the ROM on physical pages by taking them away 434 * from the page list, rather than having a dinky BTSIZE above. 435 */ 436 437 /*----------------------------------------------------------------*/ 438 439 /* 440 * MMU management. 441 */ 442 443 /* 444 * Change contexts. We need the old context number as well as the new 445 * one. If the context is changing, we must write all user windows 446 * first, lest an interrupt cause them to be written to the (other) 447 * user whose context we set here. 448 */ 449 #define CHANGE_CONTEXTS(old, new) \ 450 if ((old) != (new)) { \ 451 write_user_windows(); \ 452 setcontext(new); \ 453 } 454 455 /* 456 * Allocate an MMU entry (i.e., a PMEG). 457 * If necessary, steal one from someone else. 458 * Put it on the tail of the given queue 459 * (which is either the LRU list or the locked list). 460 * The locked list is not actually ordered, but this is easiest. 461 * Also put it on the given (new) pmap's chain, 462 * enter its pmeg number into that pmap's segmap, 463 * and store the pmeg's new virtual segment number (me->me_vseg). 464 * 465 * This routine is large and complicated, but it must be fast 466 * since it implements the dynamic allocation of MMU entries. 467 */ 468 struct mmuentry * 469 me_alloc(mh, newpm, newvseg) 470 register struct mmuhd *mh; 471 register struct pmap *newpm; 472 register int newvseg; 473 { 474 register struct mmuentry *me; 475 register struct pmap *pm; 476 register int i, va, pa, *pte, tpte; 477 int ctx; 478 479 /* try free list first */ 480 if ((me = me_freelist) != NULL) { 481 me_freelist = me->me_next; 482 #ifdef DEBUG 483 if (me->me_pmap != NULL) 484 panic("me_alloc: freelist entry has pmap"); 485 if (pmapdebug & PDB_MMU_ALLOC) 486 printf("me_alloc: got pmeg %x\n", me->me_pmeg); 487 #endif 488 insque(me, mh->mh_prev); /* onto end of queue */ 489 490 /* onto on pmap chain; pmap is already locked, if needed */ 491 me->me_pmforw = NULL; 492 me->me_pmback = newpm->pm_mmuback; 493 *newpm->pm_mmuback = me; 494 newpm->pm_mmuback = &me->me_pmforw; 495 496 /* into pmap segment table, with backpointers */ 497 newpm->pm_segmap[newvseg] = me->me_pmeg; 498 me->me_pmap = newpm; 499 me->me_vseg = newvseg; 500 501 return (me); 502 } 503 504 /* no luck, take head of LRU list */ 505 if ((me = me_lru.mh_next) == (struct mmuentry *)&me_lru) 506 panic("me_alloc: all pmegs gone"); 507 pm = me->me_pmap; 508 #ifdef DEBUG 509 if (pm == NULL) 510 panic("me_alloc: LRU entry has no pmap"); 511 if (pm == kernel_pmap) 512 panic("me_alloc: stealing from kernel"); 513 pte = pm->pm_pte[me->me_vseg]; 514 if (pte == NULL) 515 panic("me_alloc: LRU entry's pmap has no ptes"); 516 if (pmapdebug & (PDB_MMU_ALLOC | PDB_MMU_STEAL)) 517 printf("me_alloc: stealing pmeg %x from pmap %x\n", 518 me->me_pmeg, pm); 519 #endif 520 /* 521 * Remove from LRU list, and insert at end of new list 522 * (probably the LRU list again, but so what?). 523 */ 524 remque(me); 525 insque(me, mh->mh_prev); 526 527 /* 528 * The PMEG must be mapped into some context so that we can 529 * read its PTEs. Use its current context if it has one; 530 * if not, and since context 0 is reserved for the kernel, 531 * the simplest method is to switch to 0 and map the PMEG 532 * to virtual address 0---which, being a user space address, 533 * is by definition not in use. 534 * 535 * XXX for ncpus>1 must use per-cpu VA? 536 * XXX do not have to flush cache immediately 537 */ 538 ctx = getcontext(); 539 if (pm->pm_ctx) { 540 CHANGE_CONTEXTS(ctx, pm->pm_ctxnum); 541 #ifdef notdef 542 if (vactype != VAC_NONE) 543 #endif 544 cache_flush_segment(me->me_vseg); 545 va = VSTOVA(me->me_vseg); 546 } else { 547 CHANGE_CONTEXTS(ctx, 0); 548 setsegmap(0, me->me_pmeg); 549 /* 550 * No cache flush needed: it happened earlier when 551 * the old context was taken. 552 */ 553 va = 0; 554 } 555 556 /* 557 * Record reference and modify bits for each page, 558 * and copy PTEs into kernel memory so that they can 559 * be reloaded later. 560 */ 561 i = NPTESG; 562 do { 563 tpte = getpte(va); 564 if (tpte & PG_V) { 565 pa = ptoa(HWTOSW(tpte & PG_PFNUM)); 566 if (managed(pa)) 567 pvhead(pa)->pv_flags |= MR(tpte); 568 } 569 *pte++ = tpte & ~(PG_U|PG_M); 570 va += NBPG; 571 } while (--i > 0); 572 573 /* update segment tables */ 574 simple_lock(&pm->pm_lock); /* what if other cpu takes mmuentry ?? */ 575 if (pm->pm_ctx) 576 setsegmap(VSTOVA(me->me_vseg), seginval); 577 pm->pm_segmap[me->me_vseg] = seginval; 578 579 /* off old pmap chain */ 580 if ((*me->me_pmback = me->me_pmforw) != NULL) { 581 me->me_pmforw->me_pmback = me->me_pmback; 582 me->me_pmforw = NULL; 583 } else 584 pm->pm_mmuback = me->me_pmback; 585 simple_unlock(&pm->pm_lock); 586 setcontext(ctx); /* done with old context */ 587 588 /* onto new pmap chain; new pmap is already locked, if needed */ 589 /* me->me_pmforw = NULL; */ /* done earlier */ 590 me->me_pmback = newpm->pm_mmuback; 591 *newpm->pm_mmuback = me; 592 newpm->pm_mmuback = &me->me_pmforw; 593 594 /* into new segment table, with backpointers */ 595 newpm->pm_segmap[newvseg] = me->me_pmeg; 596 me->me_pmap = newpm; 597 me->me_vseg = newvseg; 598 599 return (me); 600 } 601 602 /* 603 * Free an MMU entry. 604 * 605 * Assumes the corresponding pmap is already locked. 606 * Does NOT flush cache, but does record ref and mod bits. 607 * The rest of each PTE is discarded. 608 * CALLER MUST SET CONTEXT to pm->pm_ctxnum (if pmap has 609 * a context) or to 0 (if not). Caller must also update 610 * pm->pm_segmap and (possibly) the hardware. 611 */ 612 void 613 me_free(pm, pmeg) 614 register struct pmap *pm; 615 register u_int pmeg; 616 { 617 register struct mmuentry *me = &mmuentry[pmeg]; 618 register int i, va, pa, tpte; 619 620 #ifdef DEBUG 621 if (pmapdebug & PDB_MMU_ALLOC) 622 printf("me_free: freeing pmeg %x from pmap %x\n", 623 me->me_pmeg, pm); 624 if (me->me_pmeg != pmeg) 625 panic("me_free: wrong mmuentry"); 626 if (pm != me->me_pmap) 627 panic("me_free: pm != me_pmap"); 628 #endif 629 630 /* just like me_alloc, but no cache flush, and context already set */ 631 if (pm->pm_ctx) 632 va = VSTOVA(me->me_vseg); 633 else { 634 setsegmap(0, me->me_pmeg); 635 va = 0; 636 } 637 i = NPTESG; 638 do { 639 tpte = getpte(va); 640 if (tpte & PG_V) { 641 pa = ptoa(HWTOSW(tpte & PG_PFNUM)); 642 if (managed(pa)) 643 pvhead(pa)->pv_flags |= MR(tpte); 644 } 645 va += NBPG; 646 } while (--i > 0); 647 648 /* take mmu entry off pmap chain */ 649 *me->me_pmback = me->me_pmforw; 650 if ((*me->me_pmback = me->me_pmforw) != NULL) 651 me->me_pmforw->me_pmback = me->me_pmback; 652 else 653 pm->pm_mmuback = me->me_pmback; 654 /* ... and remove from segment map */ 655 pm->pm_segmap[me->me_vseg] = seginval; 656 657 /* off LRU or lock chain */ 658 remque(me); 659 660 /* no associated pmap; on free list */ 661 me->me_pmap = NULL; 662 me->me_next = me_freelist; 663 me_freelist = me; 664 } 665 666 /* 667 * `Page in' (load or inspect) an MMU entry; called on page faults. 668 * Returns 1 if we reloaded the segment, -1 if the segment was 669 * already loaded and the page was marked valid (in which case the 670 * fault must be a bus error or something), or 0 (segment loaded but 671 * PTE not valid, or segment not loaded at all). 672 */ 673 int 674 mmu_pagein(pm, va, bits) 675 register struct pmap *pm; 676 register int va, bits; 677 { 678 register int *pte; 679 register struct mmuentry *me; 680 register int vseg = VA_VSEG(va), pmeg, i, s; 681 682 /* return 0 if we have no PTEs to load */ 683 if ((pte = pm->pm_pte[vseg]) == NULL) 684 return (0); 685 /* return -1 if the fault is `hard', 0 if not */ 686 if (pm->pm_segmap[vseg] != seginval) 687 return (bits && (getpte(va) & bits) == bits ? -1 : 0); 688 689 /* reload segment: write PTEs into a new LRU entry */ 690 va = VA_ROUNDDOWNTOSEG(va); 691 s = splpmap(); /* paranoid */ 692 pmeg = me_alloc(&me_lru, pm, vseg)->me_pmeg; 693 setsegmap(va, pmeg); 694 i = NPTESG; 695 do { 696 setpte(va, *pte++); 697 va += NBPG; 698 } while (--i > 0); 699 splx(s); 700 return (1); 701 } 702 703 /* 704 * Allocate a context. If necessary, steal one from someone else. 705 * Changes hardware context number and loads segment map. 706 * 707 * This routine is only ever called from locore.s just after it has 708 * saved away the previous process, so there are no active user windows. 709 */ 710 void 711 ctx_alloc(pm) 712 register struct pmap *pm; 713 { 714 register union ctxinfo *c; 715 register int cnum, i, va; 716 register pmeg_t *segp; 717 718 #ifdef DEBUG 719 if (pm->pm_ctx) 720 panic("ctx_alloc pm_ctx"); 721 if (pmapdebug & PDB_CTX_ALLOC) 722 printf("ctx_alloc(%x)\n", pm); 723 #endif 724 if ((c = ctx_freelist) != NULL) { 725 ctx_freelist = c->c_nextfree; 726 cnum = c - ctxinfo; 727 setcontext(cnum); 728 } else { 729 if ((ctx_kick += ctx_kickdir) >= ncontext) { 730 ctx_kick = ncontext - 1; 731 ctx_kickdir = -1; 732 } else if (ctx_kick < 1) { 733 ctx_kick = 1; 734 ctx_kickdir = 1; 735 } 736 c = &ctxinfo[cnum = ctx_kick]; 737 #ifdef DEBUG 738 if (c->c_pmap == NULL) 739 panic("ctx_alloc cu_pmap"); 740 if (pmapdebug & (PDB_CTX_ALLOC | PDB_CTX_STEAL)) 741 printf("ctx_alloc: steal context %x from %x\n", 742 cnum, c->c_pmap); 743 #endif 744 c->c_pmap->pm_ctx = NULL; 745 setcontext(cnum); 746 #ifdef notdef 747 if (vactype != VAC_NONE) 748 #endif 749 cache_flush_context(); 750 } 751 c->c_pmap = pm; 752 pm->pm_ctx = c; 753 pm->pm_ctxnum = cnum; 754 755 /* 756 * XXX loop below makes 3584 iterations ... could reduce 757 * by remembering valid ranges per context: two ranges 758 * should suffice (for text/data/bss and for stack). 759 */ 760 segp = pm->pm_rsegmap; 761 for (va = 0, i = NUSEG; --i >= 0; va += NBPSG) 762 setsegmap(va, *segp++); 763 } 764 765 /* 766 * Give away a context. Flushes cache and sets current context to 0. 767 */ 768 void 769 ctx_free(pm) 770 struct pmap *pm; 771 { 772 register union ctxinfo *c; 773 register int newc, oldc; 774 775 if ((c = pm->pm_ctx) == NULL) 776 panic("ctx_free"); 777 pm->pm_ctx = NULL; 778 oldc = getcontext(); 779 if (vactype != VAC_NONE) { 780 newc = pm->pm_ctxnum; 781 CHANGE_CONTEXTS(oldc, newc); 782 cache_flush_context(); 783 setcontext(0); 784 } else { 785 CHANGE_CONTEXTS(oldc, 0); 786 } 787 c->c_nextfree = ctx_freelist; 788 ctx_freelist = c; 789 } 790 791 792 /*----------------------------------------------------------------*/ 793 794 /* 795 * pvlist functions. 796 */ 797 798 /* 799 * Walk the given pv list, and for each PTE, set or clear some bits 800 * (e.g., PG_W or PG_NC). 801 * 802 * As a special case, this never clears PG_W on `pager' pages. 803 * These, being kernel addresses, are always in hardware and have 804 * a context. 805 * 806 * This routine flushes the cache for any page whose PTE changes, 807 * as long as the process has a context; this is overly conservative. 808 * It also copies ref and mod bits to the pvlist, on the theory that 809 * this might save work later. (XXX should test this theory) 810 */ 811 void 812 pv_changepte(pv0, bis, bic) 813 register struct pvlist *pv0; 814 register int bis, bic; 815 { 816 register int *pte; 817 register struct pvlist *pv; 818 register struct pmap *pm; 819 register int va, vseg, pmeg, i, flags; 820 int ctx, s; 821 822 write_user_windows(); /* paranoid? */ 823 824 s = splpmap(); /* paranoid? */ 825 if (pv0->pv_pmap == NULL) { 826 splx(s); 827 return; 828 } 829 ctx = getcontext(); 830 flags = pv0->pv_flags; 831 for (pv = pv0; pv != NULL; pv = pv->pv_next) { 832 pm = pv->pv_pmap; 833 if(pm==NULL)panic("pv_changepte 1"); 834 va = pv->pv_va; 835 vseg = VA_VSEG(va); 836 pte = pm->pm_pte[vseg]; 837 if ((pmeg = pm->pm_segmap[vseg]) != seginval) { 838 register int tpte; 839 840 /* in hardware: fix hardware copy */ 841 if (pm->pm_ctx) { 842 extern vm_offset_t pager_sva, pager_eva; 843 844 if (bic == PG_W && 845 va >= pager_sva && va < pager_eva) 846 continue; 847 setcontext(pm->pm_ctxnum); 848 /* XXX should flush only when necessary */ 849 #ifdef notdef 850 if (vactype != VAC_NONE) 851 #endif 852 cache_flush_page(va); 853 } else { 854 /* XXX per-cpu va? */ 855 setcontext(0); 856 setsegmap(0, pmeg); 857 va = VA_VPG(va) * NBPG; 858 } 859 tpte = getpte(va); 860 if (tpte & PG_V) 861 flags |= (tpte >> PG_M_SHIFT) & 862 (PV_MOD|PV_REF); 863 tpte = (tpte | bis) & ~bic; 864 setpte(va, tpte); 865 if (pte != NULL) /* update software copy */ 866 pte[VA_VPG(va)] = tpte; 867 } else { 868 /* not in hardware: just fix software copy */ 869 if (pte == NULL) 870 panic("pv_changepte 2"); 871 pte += VA_VPG(va); 872 *pte = (*pte | bis) & ~bic; 873 } 874 } 875 pv0->pv_flags = flags; 876 setcontext(ctx); 877 splx(s); 878 } 879 880 /* 881 * Sync ref and mod bits in pvlist (turns off same in hardware PTEs). 882 * Returns the new flags. 883 * 884 * This is just like pv_changepte, but we never add or remove bits, 885 * hence never need to adjust software copies. 886 */ 887 int 888 pv_syncflags(pv0) 889 register struct pvlist *pv0; 890 { 891 register struct pvlist *pv; 892 register struct pmap *pm; 893 register int tpte, va, vseg, pmeg, i, flags; 894 int ctx, s; 895 896 write_user_windows(); /* paranoid? */ 897 898 s = splpmap(); /* paranoid? */ 899 if (pv0->pv_pmap == NULL) { /* paranoid */ 900 splx(s); 901 return (0); 902 } 903 ctx = getcontext(); 904 flags = pv0->pv_flags; 905 for (pv = pv0; pv != NULL; pv = pv->pv_next) { 906 pm = pv->pv_pmap; 907 va = pv->pv_va; 908 vseg = VA_VSEG(va); 909 if ((pmeg = pm->pm_segmap[vseg]) == seginval) 910 continue; 911 if (pm->pm_ctx) { 912 setcontext(pm->pm_ctxnum); 913 /* XXX should flush only when necessary */ 914 #ifdef notdef 915 if (vactype != VAC_NONE) 916 #endif 917 cache_flush_page(va); 918 } else { 919 /* XXX per-cpu va? */ 920 setcontext(0); 921 setsegmap(0, pmeg); 922 va = VA_VPG(va) * NBPG; 923 } 924 tpte = getpte(va); 925 if (tpte & (PG_M|PG_U) && tpte & PG_V) { 926 flags |= (tpte >> PG_M_SHIFT) & 927 (PV_MOD|PV_REF); 928 tpte &= ~(PG_M|PG_U); 929 setpte(va, tpte); 930 } 931 } 932 pv0->pv_flags = flags; 933 setcontext(ctx); 934 splx(s); 935 return (flags); 936 } 937 938 /* 939 * pv_unlink is a helper function for pmap_remove. 940 * It takes a pointer to the pv_table head for some physical address 941 * and removes the appropriate (pmap, va) entry. 942 * 943 * Once the entry is removed, if the pv_table head has the cache 944 * inhibit bit set, see if we can turn that off; if so, walk the 945 * pvlist and turn off PG_NC in each PTE. (The pvlist is by 946 * definition nonempty, since it must have at least two elements 947 * in it to have PV_NC set, and we only remove one here.) 948 */ 949 static void 950 pv_unlink(pv, pm, va) 951 register struct pvlist *pv; 952 register struct pmap *pm; 953 register vm_offset_t va; 954 { 955 register struct pvlist *npv; 956 957 /* 958 * First entry is special (sigh). 959 */ 960 npv = pv->pv_next; 961 if (pv->pv_pmap == pm && pv->pv_va == va) { 962 pmap_stats.ps_unlink_pvfirst++; 963 if (npv != NULL) { 964 pv->pv_next = npv->pv_next; 965 pv->pv_pmap = npv->pv_pmap; 966 pv->pv_va = npv->pv_va; 967 free((caddr_t)npv, M_VMPVENT); 968 } else 969 pv->pv_pmap = NULL; 970 } else { 971 register struct pvlist *prev; 972 973 for (prev = pv;; prev = npv, npv = npv->pv_next) { 974 pmap_stats.ps_unlink_pvsearch++; 975 if (npv == NULL) 976 panic("pv_unlink"); 977 if (npv->pv_pmap == pm && npv->pv_va == va) 978 break; 979 } 980 prev->pv_next = npv->pv_next; 981 free((caddr_t)npv, M_VMPVENT); 982 } 983 if (pv->pv_flags & PV_NC) { 984 /* 985 * Not cached: check to see if we can fix that now. 986 */ 987 va = pv->pv_va; 988 for (npv = pv->pv_next; npv != NULL; npv = npv->pv_next) 989 if (BADALIAS(va, npv->pv_va)) 990 return; 991 pv->pv_flags &= ~PV_NC; 992 pv_changepte(pv, 0, PG_NC); 993 } 994 } 995 996 /* 997 * pv_link is the inverse of pv_unlink, and is used in pmap_enter. 998 * It returns PG_NC if the (new) pvlist says that the address cannot 999 * be cached. 1000 */ 1001 static int 1002 pv_link(pv, pm, va) 1003 register struct pvlist *pv; 1004 register struct pmap *pm; 1005 register vm_offset_t va; 1006 { 1007 register struct pvlist *npv; 1008 register int ret; 1009 1010 if (pv->pv_pmap == NULL) { 1011 /* no pvlist entries yet */ 1012 pmap_stats.ps_enter_firstpv++; 1013 pv->pv_next = NULL; 1014 pv->pv_pmap = pm; 1015 pv->pv_va = va; 1016 return (0); 1017 } 1018 /* 1019 * Before entering the new mapping, see if 1020 * it will cause old mappings to become aliased 1021 * and thus need to be `discached'. 1022 */ 1023 ret = 0; 1024 pmap_stats.ps_enter_secondpv++; 1025 if (pv->pv_flags & PV_NC) { 1026 /* already uncached, just stay that way */ 1027 ret = PG_NC; 1028 } else { 1029 /* MAY NEED TO DISCACHE ANYWAY IF va IS IN DVMA SPACE? */ 1030 for (npv = pv; npv != NULL; npv = npv->pv_next) { 1031 if (BADALIAS(va, npv->pv_va)) { 1032 pv->pv_flags |= PV_NC; 1033 pv_changepte(pv, ret = PG_NC, 0); 1034 break; 1035 } 1036 } 1037 } 1038 npv = (struct pvlist *)malloc(sizeof *npv, M_VMPVENT, M_WAITOK); 1039 npv->pv_next = pv->pv_next; 1040 npv->pv_pmap = pm; 1041 npv->pv_va = va; 1042 pv->pv_next = npv; 1043 return (ret); 1044 } 1045 1046 /* 1047 * Walk the given list and flush the cache for each (MI) page that is 1048 * potentially in the cache. 1049 */ 1050 pv_flushcache(pv) 1051 register struct pvlist *pv; 1052 { 1053 register struct pmap *pm; 1054 register int i, s, ctx; 1055 1056 write_user_windows(); /* paranoia? */ 1057 1058 s = splpmap(); /* XXX extreme paranoia */ 1059 if ((pm = pv->pv_pmap) != NULL) { 1060 ctx = getcontext(); 1061 for (;;) { 1062 if (pm->pm_ctx) { 1063 setcontext(pm->pm_ctxnum); 1064 cache_flush_page(pv->pv_va); 1065 } 1066 pv = pv->pv_next; 1067 if (pv == NULL) 1068 break; 1069 pm = pv->pv_pmap; 1070 } 1071 setcontext(ctx); 1072 } 1073 splx(s); 1074 } 1075 1076 /*----------------------------------------------------------------*/ 1077 1078 /* 1079 * At last, pmap code. 1080 */ 1081 1082 /* 1083 * Bootstrap the system enough to run with VM enabled. 1084 * 1085 * nmmu is the number of mmu entries (``PMEGs''); 1086 * nctx is the number of contexts. 1087 */ 1088 void 1089 pmap_bootstrap(nmmu, nctx) 1090 int nmmu, nctx; 1091 { 1092 register union ctxinfo *ci; 1093 register struct mmuentry *me; 1094 register int i, j, n, z, vs; 1095 register caddr_t p; 1096 register void (*rom_setmap)(int ctx, caddr_t va, int pmeg); 1097 int lastpage; 1098 extern char end[]; 1099 extern caddr_t reserve_dumppages(caddr_t); 1100 1101 ncontext = nctx; 1102 1103 /* 1104 * Last segment is the `invalid' one (one PMEG of pte's with !pg_v). 1105 * It will never be used for anything else. 1106 */ 1107 seginval = --nmmu; 1108 1109 /* 1110 * Preserve the monitor ROM's reserved VM region, so that 1111 * we can use L1-A or the monitor's debugger. As a side 1112 * effect we map the ROM's reserved VM into all contexts 1113 * (otherwise L1-A crashes the machine!). 1114 */ 1115 nmmu = mmu_reservemon(nmmu); 1116 1117 /* 1118 * Allocate and clear mmu entry and context structures. 1119 */ 1120 p = end; 1121 mmuentry = me = (struct mmuentry *)p; 1122 p += nmmu * sizeof *me; 1123 ctxinfo = ci = (union ctxinfo *)p; 1124 p += nctx * sizeof *ci; 1125 bzero(end, p - end); 1126 1127 /* 1128 * Set up the `constants' for the call to vm_init() 1129 * in main(). All pages beginning at p (rounded up to 1130 * the next whole page) and continuing through the number 1131 * of available pages are free, but they start at a higher 1132 * virtual address. This gives us two mappable MD pages 1133 * for pmap_zero_page and pmap_copy_page, and one MI page 1134 * for /dev/mem, all with no associated physical memory. 1135 */ 1136 p = (caddr_t)(((u_int)p + NBPG - 1) & ~PGOFSET); 1137 avail_start = (int)p - KERNBASE; 1138 avail_end = init_translations() << PGSHIFT; 1139 i = (int)p; 1140 vpage[0] = p, p += NBPG; 1141 vpage[1] = p, p += NBPG; 1142 vmempage = p, p += NBPG; 1143 p = reserve_dumppages(p); 1144 virtual_avail = (vm_offset_t)p; 1145 virtual_end = VM_MAX_KERNEL_ADDRESS; 1146 1147 p = (caddr_t)i; /* retract to first free phys */ 1148 1149 /* 1150 * Intialize the kernel pmap. 1151 */ 1152 { 1153 register struct kpmap *k = &kernel_pmap_store; 1154 1155 /* kernel_pmap = (struct pmap *)k; */ 1156 k->pm_ctx = ctxinfo; 1157 /* k->pm_ctxnum = 0; */ 1158 simple_lock_init(&k->pm_lock); 1159 k->pm_refcount = 1; 1160 /* k->pm_mmuforw = 0; */ 1161 k->pm_mmuback = &k->pm_mmuforw; 1162 k->pm_segmap = &k->pm_rsegmap[-NUSEG]; 1163 k->pm_pte = &k->pm_rpte[-NUSEG]; 1164 k->pm_npte = &k->pm_rnpte[-NUSEG]; 1165 for (i = NKSEG; --i >= 0;) 1166 k->pm_rsegmap[i] = seginval; 1167 } 1168 1169 /* 1170 * All contexts are free except the kernel's. 1171 * 1172 * XXX sun4c could use context 0 for users? 1173 */ 1174 ci->c_pmap = kernel_pmap; 1175 ctx_freelist = ci + 1; 1176 for (i = 1; i < ncontext; i++) { 1177 ci++; 1178 ci->c_nextfree = ci + 1; 1179 } 1180 ci->c_nextfree = NULL; 1181 ctx_kick = 0; 1182 ctx_kickdir = -1; 1183 1184 /* me_freelist = NULL; */ /* already NULL */ 1185 1186 /* 1187 * Init mmu entries that map the kernel physical addresses. 1188 * If the page bits in p are 0, we filled the last segment 1189 * exactly (now how did that happen?); if not, it is 1190 * the last page filled in the last segment. 1191 * 1192 * All the other MMU entries are free. 1193 * 1194 * THIS ASSUMES SEGMENT i IS MAPPED BY MMU ENTRY i DURING THE 1195 * BOOT PROCESS 1196 */ 1197 z = ((((u_int)p + NBPSG - 1) & ~SGOFSET) - KERNBASE) >> SGSHIFT; 1198 lastpage = VA_VPG(p); 1199 if (lastpage == 0) 1200 lastpage = NPTESG; 1201 p = (caddr_t)KERNBASE; /* first va */ 1202 vs = VA_VSEG(KERNBASE); /* first virtual segment */ 1203 rom_setmap = promvec->pv_setctxt; 1204 for (i = 0;;) { 1205 /* 1206 * Distribute each kernel segment into all contexts. 1207 * This is done through the monitor ROM, rather than 1208 * directly here: if we do a setcontext we will fault, 1209 * as we are not (yet) mapped in any other context. 1210 */ 1211 for (j = 1; j < nctx; j++) 1212 rom_setmap(j, p, i); 1213 1214 /* set up the mmu entry */ 1215 me->me_pmeg = i; 1216 insque(me, me_locked.mh_prev); 1217 /* me->me_pmforw = NULL; */ 1218 me->me_pmback = kernel_pmap->pm_mmuback; 1219 *kernel_pmap->pm_mmuback = me; 1220 kernel_pmap->pm_mmuback = &me->me_pmforw; 1221 me->me_pmap = kernel_pmap; 1222 me->me_vseg = vs; 1223 kernel_pmap->pm_segmap[vs] = i; 1224 n = ++i < z ? NPTESG : lastpage; 1225 kernel_pmap->pm_npte[vs] = n; 1226 me++; 1227 vs++; 1228 if (i < z) { 1229 p += NBPSG; 1230 continue; 1231 } 1232 /* 1233 * Unmap the pages, if any, that are not part of 1234 * the final segment. 1235 */ 1236 for (p += n * NBPG; j < NPTESG; j++, p += NBPG) 1237 setpte(p, 0); 1238 break; 1239 } 1240 for (; i < nmmu; i++, me++) { 1241 me->me_pmeg = i; 1242 me->me_next = me_freelist; 1243 /* me->me_pmap = NULL; */ 1244 me_freelist = me; 1245 } 1246 1247 /* 1248 * write protect & encache kernel text; 1249 * set red zone at kernel base; enable cache on message buffer. 1250 */ 1251 { 1252 extern char etext[], trapbase[]; 1253 #ifdef KGDB 1254 register int mask = ~PG_NC; /* XXX chgkprot is busted */ 1255 #else 1256 register int mask = ~(PG_W | PG_NC); 1257 #endif 1258 for (p = trapbase; p < etext; p += NBPG) 1259 setpte(p, getpte(p) & mask); 1260 p = (caddr_t)KERNBASE; 1261 setpte(p, 0); 1262 p += NBPG; 1263 setpte(p, getpte(p) & ~PG_NC); 1264 } 1265 1266 /* 1267 * Grab physical memory list (for /dev/mem). 1268 */ 1269 npmemarr = makememarr(pmemarr, MA_SIZE, MEMARR_TOTALPHYS); 1270 } 1271 1272 /* 1273 * Bootstrap memory allocator. This function allows for early dynamic 1274 * memory allocation until the virtual memory system has been bootstrapped. 1275 * After that point, either kmem_alloc or malloc should be used. This 1276 * function works by stealing pages from the (to be) managed page pool, 1277 * stealing virtual address space, then mapping the pages and zeroing them. 1278 * 1279 * It should be used from pmap_bootstrap till vm_page_startup, afterwards 1280 * it cannot be used, and will generate a panic if tried. Note that this 1281 * memory will never be freed, and in essence it is wired down. 1282 */ 1283 void * 1284 pmap_bootstrap_alloc(size) 1285 int size; 1286 { 1287 register void *mem; 1288 extern int vm_page_startup_initialized; 1289 1290 if (vm_page_startup_initialized) 1291 panic("pmap_bootstrap_alloc: called after startup initialized"); 1292 size = round_page(size); 1293 mem = (void *)virtual_avail; 1294 virtual_avail = pmap_map(virtual_avail, avail_start, 1295 avail_start + size, VM_PROT_READ|VM_PROT_WRITE); 1296 avail_start += size; 1297 bzero((void *)mem, size); 1298 return (mem); 1299 } 1300 1301 /* 1302 * Initialize the pmap module. 1303 */ 1304 void 1305 pmap_init(phys_start, phys_end) 1306 register vm_offset_t phys_start, phys_end; 1307 { 1308 register vm_size_t s; 1309 1310 if (PAGE_SIZE != NBPG) 1311 panic("pmap_init: CLSIZE!=1"); 1312 /* 1313 * Allocate and clear memory for the pv_table. 1314 */ 1315 s = sizeof(struct pvlist) * atop(phys_end - phys_start); 1316 s = round_page(s); 1317 pv_table = (struct pvlist *)kmem_alloc(kernel_map, s); 1318 bzero((caddr_t)pv_table, s); 1319 vm_first_phys = phys_start; 1320 vm_num_phys = phys_end - phys_start; 1321 } 1322 1323 /* 1324 * Map physical addresses into kernel VM. 1325 */ 1326 vm_offset_t 1327 pmap_map(va, pa, endpa, prot) 1328 register vm_offset_t va, pa, endpa; 1329 register int prot; 1330 { 1331 register int pgsize = PAGE_SIZE; 1332 1333 while (pa < endpa) { 1334 pmap_enter(kernel_pmap, va, pa, prot, 1); 1335 va += pgsize; 1336 pa += pgsize; 1337 } 1338 return (va); 1339 } 1340 1341 /* 1342 * Create and return a physical map. 1343 * 1344 * If size is nonzero, the map is useless. (ick) 1345 */ 1346 struct pmap * 1347 pmap_create(size) 1348 vm_size_t size; 1349 { 1350 register struct pmap *pm; 1351 1352 if (size) 1353 return (NULL); 1354 pm = (struct pmap *)malloc(sizeof *pm, M_VMPMAP, M_WAITOK); 1355 #ifdef DEBUG 1356 if (pmapdebug & PDB_CREATE) 1357 printf("pmap_create: created %x\n", pm); 1358 #endif 1359 bzero((caddr_t)pm, sizeof *pm); 1360 pmap_pinit(pm); 1361 return (pm); 1362 } 1363 1364 /* 1365 * Initialize a preallocated and zeroed pmap structure, 1366 * such as one in a vmspace structure. 1367 */ 1368 void 1369 pmap_pinit(pm) 1370 register struct pmap *pm; 1371 { 1372 register int i; 1373 1374 #ifdef DEBUG 1375 if (pmapdebug & PDB_CREATE) 1376 printf("pmap_pinit(%x)\n", pm); 1377 #endif 1378 /* pm->pm_ctx = NULL; */ 1379 simple_lock_init(&pm->pm_lock); 1380 pm->pm_refcount = 1; 1381 /* pm->pm_mmuforw = NULL; */ 1382 pm->pm_mmuback = &pm->pm_mmuforw; 1383 pm->pm_segmap = pm->pm_rsegmap; 1384 pm->pm_pte = pm->pm_rpte; 1385 pm->pm_npte = pm->pm_rnpte; 1386 for (i = NUSEG; --i >= 0;) 1387 pm->pm_rsegmap[i] = seginval; 1388 /* bzero((caddr_t)pm->pm_rpte, sizeof pm->pm_rpte); */ 1389 /* bzero((caddr_t)pm->pm_rnpte, sizeof pm->pm_rnpte); */ 1390 } 1391 1392 /* 1393 * Retire the given pmap from service. 1394 * Should only be called if the map contains no valid mappings. 1395 */ 1396 void 1397 pmap_destroy(pm) 1398 register struct pmap *pm; 1399 { 1400 int count; 1401 1402 if (pm == NULL) 1403 return; 1404 #ifdef DEBUG 1405 if (pmapdebug & PDB_DESTROY) 1406 printf("pmap_destroy(%x)\n", pm); 1407 #endif 1408 simple_lock(&pm->pm_lock); 1409 count = --pm->pm_refcount; 1410 simple_unlock(&pm->pm_lock); 1411 if (count == 0) { 1412 pmap_release(pm); 1413 free((caddr_t)pm, M_VMPMAP); 1414 } 1415 } 1416 1417 /* 1418 * Release any resources held by the given physical map. 1419 * Called when a pmap initialized by pmap_pinit is being released. 1420 */ 1421 void 1422 pmap_release(pm) 1423 register struct pmap *pm; 1424 { 1425 register union ctxinfo *c; 1426 register int s = splpmap(); /* paranoia */ 1427 1428 #ifdef DEBUG 1429 if (pmapdebug & PDB_DESTROY) 1430 printf("pmap_release(%x)\n", pm); 1431 #endif 1432 if (pm->pm_mmuforw) 1433 panic("pmap_release mmuforw"); 1434 if ((c = pm->pm_ctx) != NULL) { 1435 if (pm->pm_ctxnum == 0) 1436 panic("pmap_release: releasing kernel"); 1437 ctx_free(pm); 1438 } 1439 splx(s); 1440 } 1441 1442 /* 1443 * Add a reference to the given pmap. 1444 */ 1445 void 1446 pmap_reference(pm) 1447 struct pmap *pm; 1448 { 1449 1450 if (pm != NULL) { 1451 simple_lock(&pm->pm_lock); 1452 pm->pm_refcount++; 1453 simple_unlock(&pm->pm_lock); 1454 } 1455 } 1456 1457 static int pmap_rmk(struct pmap *, vm_offset_t, vm_offset_t, int, int, int); 1458 static int pmap_rmu(struct pmap *, vm_offset_t, vm_offset_t, int, int, int); 1459 1460 /* 1461 * Remove the given range of mapping entries. 1462 * The starting and ending addresses are already rounded to pages. 1463 * Sheer lunacy: pmap_remove is often asked to remove nonexistent 1464 * mappings. 1465 */ 1466 void 1467 pmap_remove(pm, va, endva) 1468 register struct pmap *pm; 1469 register vm_offset_t va, endva; 1470 { 1471 register vm_offset_t nva; 1472 register int vseg, nleft, s, ctx; 1473 register int (*rm)(struct pmap *, vm_offset_t, vm_offset_t, 1474 int, int, int); 1475 1476 if (pm == NULL) 1477 return; 1478 #ifdef DEBUG 1479 if (pmapdebug & PDB_REMOVE) 1480 printf("pmap_remove(%x, %x, %x)\n", pm, va, endva); 1481 #endif 1482 1483 if (pm == kernel_pmap) { 1484 /* 1485 * Removing from kernel address space. 1486 */ 1487 rm = pmap_rmk; 1488 } else { 1489 /* 1490 * Removing from user address space. 1491 */ 1492 write_user_windows(); 1493 rm = pmap_rmu; 1494 } 1495 1496 ctx = getcontext(); 1497 s = splpmap(); /* XXX conservative */ 1498 simple_lock(&pm->pm_lock); 1499 for (; va < endva; va = nva) { 1500 /* do one virtual segment at a time */ 1501 vseg = VA_VSEG(va); 1502 nva = VSTOVA(vseg + 1); 1503 if (nva == 0 || nva > endva) 1504 nva = endva; 1505 if ((nleft = pm->pm_npte[vseg]) != 0) 1506 pm->pm_npte[vseg] = (*rm)(pm, va, nva, 1507 vseg, nleft, pm->pm_segmap[vseg]); 1508 } 1509 simple_unlock(&pm->pm_lock); 1510 splx(s); 1511 setcontext(ctx); 1512 } 1513 1514 #define perftest 1515 #ifdef perftest 1516 /* counters, one per possible length */ 1517 int rmk_vlen[NPTESG+1]; /* virtual length per rmk() call */ 1518 int rmk_npg[NPTESG+1]; /* n valid pages per rmk() call */ 1519 int rmk_vlendiff; /* # times npg != vlen */ 1520 #endif 1521 1522 /* 1523 * The following magic number was chosen because: 1524 * 1. It is the same amount of work to cache_flush_page 4 pages 1525 * as to cache_flush_segment 1 segment (so at 4 the cost of 1526 * flush is the same). 1527 * 2. Flushing extra pages is bad (causes cache not to work). 1528 * 3. The current code, which malloc()s 5 pages for each process 1529 * for a user vmspace/pmap, almost never touches all 5 of those 1530 * pages. 1531 */ 1532 #define PMAP_RMK_MAGIC 5 /* if > magic, use cache_flush_segment */ 1533 1534 /* 1535 * Remove a range contained within a single segment. 1536 * These are egregiously complicated routines. 1537 */ 1538 1539 /* remove from kernel, return new nleft */ 1540 static int 1541 pmap_rmk(pm, va, endva, vseg, nleft, pmeg) 1542 register struct pmap *pm; 1543 register vm_offset_t va, endva; 1544 register int vseg, nleft, pmeg; 1545 { 1546 register int i, tpte, perpage, npg; 1547 register struct pvlist *pv; 1548 #ifdef perftest 1549 register int nvalid; 1550 #endif 1551 1552 #ifdef DEBUG 1553 if (pmeg == seginval) 1554 panic("pmap_rmk: not loaded"); 1555 if (pm->pm_ctx == NULL) 1556 panic("pmap_rmk: lost context"); 1557 #endif 1558 1559 setcontext(0); 1560 /* decide how to flush cache */ 1561 npg = (endva - va) >> PGSHIFT; 1562 if (npg > PMAP_RMK_MAGIC) { 1563 /* flush the whole segment */ 1564 perpage = 0; 1565 #ifdef notdef 1566 if (vactype != VAC_NONE) 1567 #endif 1568 cache_flush_segment(vseg); 1569 } else { 1570 /* flush each page individually; some never need flushing */ 1571 perpage = 1; 1572 } 1573 #ifdef perftest 1574 nvalid = 0; 1575 #endif 1576 while (va < endva) { 1577 tpte = getpte(va); 1578 if ((tpte & PG_V) == 0) { 1579 va += PAGE_SIZE; 1580 continue; 1581 } 1582 pv = NULL; 1583 /* if cacheable, flush page as needed */ 1584 if ((tpte & PG_NC) == 0) { 1585 #ifdef perftest 1586 nvalid++; 1587 #endif 1588 if (perpage) 1589 cache_flush_page(va); 1590 } 1591 if ((tpte & PG_TYPE) == PG_OBMEM) { 1592 i = ptoa(HWTOSW(tpte & PG_PFNUM)); 1593 if (managed(i)) { 1594 pv = pvhead(i); 1595 pv->pv_flags |= MR(tpte); 1596 pv_unlink(pv, pm, va); 1597 } 1598 } 1599 nleft--; 1600 setpte(va, 0); 1601 va += NBPG; 1602 } 1603 #ifdef perftest 1604 rmk_vlen[npg]++; 1605 rmk_npg[nvalid]++; 1606 if (npg != nvalid) 1607 rmk_vlendiff++; 1608 #endif 1609 1610 /* 1611 * If the segment is all gone, remove it from everyone and 1612 * free the MMU entry. 1613 */ 1614 if (nleft == 0) { 1615 va = VSTOVA(vseg); /* retract */ 1616 setsegmap(va, seginval); 1617 for (i = ncontext; --i > 0;) { 1618 setcontext(i); 1619 setsegmap(va, seginval); 1620 } 1621 me_free(pm, pmeg); 1622 } 1623 return (nleft); 1624 } 1625 1626 #ifdef perftest 1627 /* as before but for pmap_rmu */ 1628 int rmu_vlen[NPTESG+1]; /* virtual length per rmu() call */ 1629 int rmu_npg[NPTESG+1]; /* n valid pages per rmu() call */ 1630 int rmu_vlendiff; /* # times npg != vlen */ 1631 int rmu_noflush; /* # times rmu does not need to flush at all */ 1632 #endif 1633 1634 /* 1635 * Just like pmap_rmk_magic, but we have a different threshold. 1636 * Note that this may well deserve further tuning work. 1637 */ 1638 #define PMAP_RMU_MAGIC 4 /* if > magic, use cache_flush_segment */ 1639 1640 /* remove from user */ 1641 static int 1642 pmap_rmu(pm, va, endva, vseg, nleft, pmeg) 1643 register struct pmap *pm; 1644 register vm_offset_t va, endva; 1645 register int vseg, nleft, pmeg; 1646 { 1647 register int *pte0, i, pteva, tpte, perpage, npg; 1648 register struct pvlist *pv; 1649 #ifdef perftest 1650 register int doflush, nvalid; 1651 #endif 1652 1653 pte0 = pm->pm_pte[vseg]; 1654 if (pmeg == seginval) { 1655 register int *pte = pte0 + VA_VPG(va); 1656 1657 /* 1658 * PTEs are not in MMU. Just invalidate software copies. 1659 */ 1660 for (; va < endva; pte++, va += PAGE_SIZE) { 1661 tpte = *pte; 1662 if ((tpte & PG_V) == 0) { 1663 /* nothing to remove (braindead VM layer) */ 1664 continue; 1665 } 1666 if ((tpte & PG_TYPE) == PG_OBMEM) { 1667 i = ptoa(HWTOSW(tpte & PG_PFNUM)); 1668 if (managed(i)) 1669 pv_unlink(pvhead(i), pm, va); 1670 } 1671 nleft--; 1672 *pte = 0; 1673 } 1674 if (nleft == 0) { 1675 free((caddr_t)pte0, M_VMPMAP); 1676 pm->pm_pte[vseg] = NULL; 1677 } 1678 return (nleft); 1679 } 1680 1681 /* 1682 * PTEs are in MMU. Invalidate in hardware, update ref & 1683 * mod bits, and flush cache if required. 1684 */ 1685 if (pm->pm_ctx) { 1686 /* process has a context, must flush cache */ 1687 npg = (endva - va) >> PGSHIFT; 1688 #ifdef perftest 1689 doflush = 1; 1690 nvalid = 0; 1691 #endif 1692 setcontext(pm->pm_ctxnum); 1693 if (npg > PMAP_RMU_MAGIC) { 1694 perpage = 0; /* flush the whole segment */ 1695 #ifdef notdef 1696 if (vactype != VAC_NONE) 1697 #endif 1698 cache_flush_segment(vseg); 1699 } else 1700 perpage = 1; 1701 pteva = va; 1702 } else { 1703 /* no context, use context 0; cache flush unnecessary */ 1704 setcontext(0); 1705 /* XXX use per-cpu pteva? */ 1706 setsegmap(0, pmeg); 1707 pteva = VA_VPG(va) * NBPG; 1708 perpage = 0; 1709 #ifdef perftest 1710 npg = 0; 1711 doflush = 0; 1712 nvalid = 0; 1713 rmu_noflush++; 1714 #endif 1715 } 1716 for (; va < endva; pteva += PAGE_SIZE, va += PAGE_SIZE) { 1717 tpte = getpte(pteva); 1718 if ((tpte & PG_V) == 0) 1719 continue; 1720 pv = NULL; 1721 /* if cacheable, flush page as needed */ 1722 if (doflush && (tpte & PG_NC) == 0) { 1723 #ifdef perftest 1724 nvalid++; 1725 #endif 1726 if (perpage) 1727 cache_flush_page(va); 1728 } 1729 if ((tpte & PG_TYPE) == PG_OBMEM) { 1730 i = ptoa(HWTOSW(tpte & PG_PFNUM)); 1731 if (managed(i)) { 1732 pv = pvhead(i); 1733 pv->pv_flags |= MR(tpte); 1734 pv_unlink(pv, pm, va); 1735 } 1736 } 1737 nleft--; 1738 setpte(pteva, 0); 1739 } 1740 #ifdef perftest 1741 if (doflush) { 1742 rmu_vlen[npg]++; 1743 rmu_npg[nvalid]++; 1744 if (npg != nvalid) 1745 rmu_vlendiff++; 1746 } 1747 #endif 1748 1749 /* 1750 * If the segment is all gone, and the context is loaded, give 1751 * the segment back. 1752 */ 1753 if (nleft == 0 && pm->pm_ctx != NULL) { 1754 va = VSTOVA(vseg); /* retract */ 1755 setsegmap(va, seginval); 1756 free((caddr_t)pte0, M_VMPMAP); 1757 pm->pm_pte[vseg] = NULL; 1758 me_free(pm, pmeg); 1759 } 1760 return (nleft); 1761 } 1762 1763 /* 1764 * Lower (make more strict) the protection on the specified 1765 * physical page. 1766 * 1767 * There are only two cases: either the protection is going to 0 1768 * (in which case we do the dirty work here), or it is going from 1769 * to read-only (in which case pv_changepte does the trick). 1770 */ 1771 void 1772 pmap_page_protect(pa, prot) 1773 vm_offset_t pa; 1774 vm_prot_t prot; 1775 { 1776 register struct pvlist *pv, *pv0, *npv; 1777 register struct pmap *pm; 1778 register int *pte; 1779 register int va, vseg, pteva, tpte; 1780 register int flags, nleft, i, pmeg, s, ctx, doflush; 1781 1782 #ifdef DEBUG 1783 if ((pmapdebug & PDB_CHANGEPROT) || 1784 (pmapdebug & PDB_REMOVE && prot == VM_PROT_NONE)) 1785 printf("pmap_page_protect(%x, %x)\n", pa, prot); 1786 #endif 1787 /* 1788 * Skip unmanaged pages, or operations that do not take 1789 * away write permission. 1790 */ 1791 if (!managed(pa) || prot & VM_PROT_WRITE) 1792 return; 1793 write_user_windows(); /* paranoia */ 1794 if (prot & VM_PROT_READ) { 1795 pv_changepte(pvhead(pa), 0, PG_W); 1796 return; 1797 } 1798 1799 /* 1800 * Remove all access to all people talking to this page. 1801 * Walk down PV list, removing all mappings. 1802 * The logic is much like that for pmap_remove, 1803 * but we know we are removing exactly one page. 1804 */ 1805 pv = pvhead(pa); 1806 s = splpmap(); 1807 if ((pm = pv->pv_pmap) == NULL) { 1808 splx(s); 1809 return; 1810 } 1811 ctx = getcontext(); 1812 pv0 = pv; 1813 flags = pv->pv_flags & ~PV_NC; 1814 for (;; pm = pv->pv_pmap) { 1815 va = pv->pv_va; 1816 vseg = VA_VSEG(va); 1817 if ((nleft = pm->pm_npte[vseg]) == 0) 1818 panic("pmap_remove_all: empty vseg"); 1819 nleft--; 1820 pm->pm_npte[vseg] = nleft; 1821 pmeg = pm->pm_segmap[vseg]; 1822 pte = pm->pm_pte[vseg]; 1823 if (pmeg == seginval) { 1824 if (nleft) { 1825 pte += VA_VPG(va); 1826 *pte = 0; 1827 } else { 1828 free((caddr_t)pte, M_VMPMAP); 1829 pm->pm_pte[vseg] = NULL; 1830 } 1831 goto nextpv; 1832 } 1833 if (pm->pm_ctx) { 1834 setcontext(pm->pm_ctxnum); 1835 pteva = va; 1836 #ifdef notdef 1837 doflush = vactype != VAC_NONE; 1838 #else 1839 doflush = 1; 1840 #endif 1841 } else { 1842 setcontext(0); 1843 /* XXX use per-cpu pteva? */ 1844 setsegmap(0, pmeg); 1845 pteva = VA_VPG(va) * NBPG; 1846 doflush = 0; 1847 } 1848 if (nleft) { 1849 if (doflush) 1850 cache_flush_page(va); 1851 tpte = getpte(pteva); 1852 if ((tpte & PG_V) == 0) 1853 panic("pmap_page_protect !PG_V 1"); 1854 flags |= MR(tpte); 1855 setpte(pteva, 0); 1856 } else { 1857 if (doflush) 1858 cache_flush_page(va); 1859 tpte = getpte(pteva); 1860 if ((tpte & PG_V) == 0) 1861 panic("pmap_page_protect !PG_V 2"); 1862 flags |= MR(tpte); 1863 if (pm->pm_ctx) { 1864 setsegmap(va, seginval); 1865 if (pm == kernel_pmap) { 1866 for (i = ncontext; --i > 0;) { 1867 setcontext(i); 1868 setsegmap(va, seginval); 1869 } 1870 goto skipptefree; 1871 } 1872 } 1873 free((caddr_t)pte, M_VMPMAP); 1874 pm->pm_pte[vseg] = NULL; 1875 skipptefree: 1876 me_free(pm, pmeg); 1877 } 1878 nextpv: 1879 npv = pv->pv_next; 1880 if (pv != pv0) 1881 free((caddr_t)pv, M_VMPVENT); 1882 if ((pv = npv) == NULL) 1883 break; 1884 } 1885 pv0->pv_pmap = NULL; 1886 pv0->pv_flags = flags; 1887 setcontext(ctx); 1888 splx(s); 1889 } 1890 1891 /* 1892 * Lower (make more strict) the protection on the specified 1893 * range of this pmap. 1894 * 1895 * There are only two cases: either the protection is going to 0 1896 * (in which case we call pmap_remove to do the dirty work), or 1897 * it is going from read/write to read-only. The latter is 1898 * fairly easy. 1899 */ 1900 void 1901 pmap_protect(pm, sva, eva, prot) 1902 register struct pmap *pm; 1903 vm_offset_t sva, eva; 1904 vm_prot_t prot; 1905 { 1906 register int va, nva, vseg, pteva, pmeg; 1907 register int s, ctx; 1908 1909 if (pm == NULL || prot & VM_PROT_WRITE) 1910 return; 1911 if ((prot & VM_PROT_READ) == 0) { 1912 pmap_remove(pm, sva, eva); 1913 return; 1914 } 1915 1916 write_user_windows(); 1917 ctx = getcontext(); 1918 s = splpmap(); 1919 simple_lock(&pm->pm_lock); 1920 1921 for (va = sva; va < eva;) { 1922 vseg = VA_VSEG(va); 1923 nva = VSTOVA(vseg + 1); 1924 if (nva == 0) panic("pmap_protect: last segment"); /* cannot happen */ 1925 if (nva > eva) 1926 nva = eva; 1927 if (pm->pm_npte[vseg] == 0) { 1928 va = nva; 1929 continue; 1930 } 1931 pmeg = pm->pm_segmap[vseg]; 1932 if (pmeg == seginval) { 1933 register int *pte = &pm->pm_pte[vseg][VA_VPG(va)]; 1934 1935 /* not in MMU; just clear PG_W from core copies */ 1936 for (; va < nva; va += NBPG) 1937 *pte++ &= ~PG_W; 1938 } else { 1939 /* in MMU: take away write bits from MMU PTEs */ 1940 if ( 1941 #ifdef notdef 1942 vactype != VAC_NONE && 1943 #endif 1944 pm->pm_ctx) { 1945 register int tpte; 1946 1947 /* 1948 * Flush cache so that any existing cache 1949 * tags are updated. This is really only 1950 * needed for PTEs that lose PG_W. 1951 */ 1952 setcontext(pm->pm_ctxnum); 1953 for (; va < nva; va += NBPG) { 1954 tpte = getpte(va); 1955 pmap_stats.ps_npg_prot_all++; 1956 if (tpte & PG_W) { 1957 pmap_stats.ps_npg_prot_actual++; 1958 cache_flush_page(va); 1959 setpte(va, tpte & ~PG_W); 1960 } 1961 } 1962 } else { 1963 register int pteva; 1964 1965 /* 1966 * No context, hence not cached; 1967 * just update PTEs. 1968 */ 1969 setcontext(0); 1970 /* XXX use per-cpu pteva? */ 1971 setsegmap(0, pmeg); 1972 pteva = VA_VPG(va) * NBPG; 1973 for (; va < nva; pteva += NBPG, va += NBPG) 1974 setpte(pteva, getpte(pteva) & ~PG_W); 1975 } 1976 } 1977 } 1978 simple_unlock(&pm->pm_lock); 1979 splx(s); 1980 } 1981 1982 /* 1983 * Change the protection and/or wired status of the given (MI) virtual page. 1984 * XXX: should have separate function (or flag) telling whether only wiring 1985 * is changing. 1986 */ 1987 void 1988 pmap_changeprot(pm, va, prot, wired) 1989 register struct pmap *pm; 1990 register vm_offset_t va; 1991 vm_prot_t prot; 1992 int wired; 1993 { 1994 register int vseg, tpte, newprot, pmeg, ctx, i, s; 1995 1996 #ifdef DEBUG 1997 if (pmapdebug & PDB_CHANGEPROT) 1998 printf("pmap_changeprot(%x, %x, %x, %x)\n", 1999 pm, va, prot, wired); 2000 #endif 2001 2002 write_user_windows(); /* paranoia */ 2003 2004 if (pm == kernel_pmap) 2005 newprot = prot & VM_PROT_WRITE ? PG_S|PG_W : PG_S; 2006 else 2007 newprot = prot & VM_PROT_WRITE ? PG_W : 0; 2008 vseg = VA_VSEG(va); 2009 s = splpmap(); /* conservative */ 2010 pmap_stats.ps_changeprots++; 2011 2012 /* update PTEs in software or hardware */ 2013 if ((pmeg = pm->pm_segmap[vseg]) == seginval) { 2014 register int *pte = &pm->pm_pte[vseg][VA_VPG(va)]; 2015 2016 /* update in software */ 2017 if ((*pte & PG_PROT) == newprot) 2018 goto useless; 2019 *pte = (*pte & ~PG_PROT) | newprot; 2020 } else { 2021 /* update in hardware */ 2022 ctx = getcontext(); 2023 if (pm->pm_ctx) { 2024 /* use current context; flush writeback cache */ 2025 setcontext(pm->pm_ctxnum); 2026 tpte = getpte(va); 2027 if ((tpte & PG_PROT) == newprot) 2028 goto useless; 2029 if (vactype == VAC_WRITEBACK && 2030 (newprot & PG_W) == 0 && 2031 (tpte & (PG_W | PG_NC)) == PG_W) 2032 cache_flush_page((int)va); 2033 } else { 2034 setcontext(0); 2035 /* XXX use per-cpu va? */ 2036 setsegmap(0, pmeg); 2037 va = VA_VPG(va); 2038 tpte = getpte(va); 2039 if ((tpte & PG_PROT) == newprot) 2040 goto useless; 2041 } 2042 tpte = (tpte & ~PG_PROT) | newprot; 2043 setpte(va, tpte); 2044 setcontext(ctx); 2045 } 2046 splx(s); 2047 return; 2048 2049 useless: 2050 /* only wiring changed, and we ignore wiring */ 2051 pmap_stats.ps_useless_changeprots++; 2052 splx(s); 2053 } 2054 2055 /* 2056 * Insert (MI) physical page pa at virtual address va in the given pmap. 2057 * NB: the pa parameter includes type bits PMAP_OBIO, PMAP_NC as necessary. 2058 * 2059 * If pa is not in the `managed' range it will not be `bank mapped'. 2060 * This works during bootstrap only because the first 4MB happens to 2061 * map one-to-one. 2062 * 2063 * There may already be something else there, or we might just be 2064 * changing protections and/or wiring on an existing mapping. 2065 * XXX should have different entry points for changing! 2066 */ 2067 void 2068 pmap_enter(pm, va, pa, prot, wired) 2069 register struct pmap *pm; 2070 vm_offset_t va, pa; 2071 vm_prot_t prot; 2072 int wired; 2073 { 2074 register struct pvlist *pv; 2075 register int pteproto, ctx; 2076 2077 if (pm == NULL) 2078 return; 2079 #ifdef DEBUG 2080 if (pmapdebug & PDB_ENTER) 2081 printf("pmap_enter(%x, %x, %x, %x, %x)\n", 2082 pm, va, pa, prot, wired); 2083 #endif 2084 2085 pteproto = PG_V | ((pa & PMAP_TNC) << PG_TNC_SHIFT); 2086 pa &= ~PMAP_TNC; 2087 /* 2088 * Set up prototype for new PTE. Cannot set PG_NC from PV_NC yet 2089 * since the pvlist no-cache bit might change as a result of the 2090 * new mapping. 2091 */ 2092 if (managed(pa)) { 2093 pteproto |= SWTOHW(atop(pa)); 2094 pv = pvhead(pa); 2095 } else { 2096 pteproto |= atop(pa) & PG_PFNUM; 2097 pv = NULL; 2098 } 2099 if (prot & VM_PROT_WRITE) 2100 pteproto |= PG_W; 2101 2102 ctx = getcontext(); 2103 if (pm == kernel_pmap) 2104 pmap_enk(pm, va, prot, wired, pv, pteproto | PG_S); 2105 else 2106 pmap_enu(pm, va, prot, wired, pv, pteproto); 2107 setcontext(ctx); 2108 } 2109 2110 /* enter new (or change existing) kernel mapping */ 2111 pmap_enk(pm, va, prot, wired, pv, pteproto) 2112 register struct pmap *pm; 2113 vm_offset_t va; 2114 vm_prot_t prot; 2115 int wired; 2116 register struct pvlist *pv; 2117 register int pteproto; 2118 { 2119 register int vseg, tpte, pmeg, i, s; 2120 2121 vseg = VA_VSEG(va); 2122 s = splpmap(); /* XXX way too conservative */ 2123 if (pm->pm_segmap[vseg] != seginval && 2124 (tpte = getpte(va)) & PG_V) { 2125 register int addr = tpte & PG_PFNUM; 2126 2127 /* old mapping exists */ 2128 if (addr == (pteproto & PG_PFNUM)) { 2129 /* just changing protection and/or wiring */ 2130 splx(s); 2131 pmap_changeprot(pm, va, prot, wired); 2132 return; 2133 } 2134 2135 /*printf("pmap_enk: changing existing va=>pa entry\n");*/ 2136 /* 2137 * Switcheroo: changing pa for this va. 2138 * If old pa was managed, remove from pvlist. 2139 * If old page was cached, flush cache. 2140 */ 2141 addr = ptoa(HWTOSW(addr)); 2142 if (managed(addr)) 2143 pv_unlink(pvhead(addr), pm, va); 2144 if ( 2145 #ifdef notdef 2146 vactype != VAC_NONE && 2147 #endif 2148 (tpte & PG_NC) == 0) { 2149 setcontext(0); /* ??? */ 2150 cache_flush_page((int)va); 2151 } 2152 } else { 2153 /* adding new entry */ 2154 pm->pm_npte[vseg]++; 2155 } 2156 2157 /* 2158 * If the new mapping is for a managed PA, enter into pvlist. 2159 * Note that the mapping for a malloc page will always be 2160 * unique (hence will never cause a second call to malloc). 2161 */ 2162 if (pv != NULL) 2163 pteproto |= pv_link(pv, pm, va); 2164 2165 pmeg = pm->pm_segmap[vseg]; 2166 if (pmeg == seginval) { 2167 register int tva; 2168 2169 /* 2170 * Allocate an MMU entry now (on locked list), 2171 * and map it into every context. Set all its 2172 * PTEs invalid (we will then overwrite one, but 2173 * this is more efficient than looping twice). 2174 */ 2175 #ifdef DEBUG 2176 if (pm->pm_ctx == NULL || pm->pm_ctxnum != 0) 2177 panic("pmap_enk: kern seg but no kern ctx"); 2178 #endif 2179 pmeg = me_alloc(&me_locked, pm, vseg)->me_pmeg; 2180 pm->pm_segmap[vseg] = pmeg; 2181 i = ncontext - 1; 2182 do { 2183 setcontext(i); 2184 setsegmap(va, pmeg); 2185 } while (--i >= 0); 2186 2187 /* set all PTEs to invalid, then overwrite one PTE below */ 2188 tva = VA_ROUNDDOWNTOSEG(va); 2189 i = NPTESG; 2190 do { 2191 setpte(tva, 0); 2192 tva += NBPG; 2193 } while (--i > 0); 2194 } 2195 2196 /* ptes kept in hardware only */ 2197 setpte(va, pteproto); 2198 splx(s); 2199 } 2200 2201 /* enter new (or change existing) user mapping */ 2202 pmap_enu(pm, va, prot, wired, pv, pteproto) 2203 register struct pmap *pm; 2204 vm_offset_t va; 2205 vm_prot_t prot; 2206 int wired; 2207 register struct pvlist *pv; 2208 register int pteproto; 2209 { 2210 register int vseg, *pte, tpte, pmeg, i, s, doflush; 2211 2212 write_user_windows(); /* XXX conservative */ 2213 vseg = VA_VSEG(va); 2214 s = splpmap(); /* XXX conservative */ 2215 2216 /* 2217 * If there is no space in which the PTEs can be written 2218 * while they are not in the hardware, this must be a new 2219 * virtual segment. Get PTE space and count the segment. 2220 * 2221 * TO SPEED UP CTX ALLOC, PUT SEGMENT BOUNDS STUFF HERE 2222 * AND IN pmap_rmu() 2223 */ 2224 retry: 2225 pte = pm->pm_pte[vseg]; 2226 if (pte == NULL) { 2227 /* definitely a new mapping */ 2228 register int size = NPTESG * sizeof *pte; 2229 2230 pte = (int *)malloc((u_long)size, M_VMPMAP, M_WAITOK); 2231 if (pm->pm_pte[vseg] != NULL) { 2232 printf("pmap_enter: pte filled during sleep\n"); /* can this happen? */ 2233 free((caddr_t)pte, M_VMPMAP); 2234 goto retry; 2235 } 2236 #ifdef DEBUG 2237 if (pm->pm_segmap[vseg] != seginval) 2238 panic("pmap_enter: new ptes, but not seginval"); 2239 #endif 2240 bzero((caddr_t)pte, size); 2241 pm->pm_pte[vseg] = pte; 2242 pm->pm_npte[vseg] = 1; 2243 } else { 2244 /* might be a change: fetch old pte */ 2245 doflush = 0; 2246 if ((pmeg = pm->pm_segmap[vseg]) == seginval) 2247 tpte = pte[VA_VPG(va)]; /* software pte */ 2248 else { 2249 if (pm->pm_ctx) { /* hardware pte */ 2250 setcontext(pm->pm_ctxnum); 2251 tpte = getpte(va); 2252 doflush = 1; 2253 } else { 2254 setcontext(0); 2255 /* XXX use per-cpu pteva? */ 2256 setsegmap(0, pmeg); 2257 tpte = getpte(VA_VPG(va) * NBPG); 2258 } 2259 } 2260 if (tpte & PG_V) { 2261 register int addr = tpte & PG_PFNUM; 2262 2263 /* old mapping exists */ 2264 if (addr == (pteproto & PG_PFNUM)) { 2265 /* just changing prot and/or wiring */ 2266 splx(s); 2267 /* caller should call this directly: */ 2268 pmap_changeprot(pm, va, prot, wired); 2269 return; 2270 } 2271 /* 2272 * Switcheroo: changing pa for this va. 2273 * If old pa was managed, remove from pvlist. 2274 * If old page was cached, flush cache. 2275 */ 2276 /*printf("%s[%d]: pmap_enu: changing existing va(%x)=>pa entry\n", 2277 curproc->p_comm, curproc->p_pid, va);*/ 2278 addr = ptoa(HWTOSW(addr)); 2279 if (managed(addr)) 2280 pv_unlink(pvhead(addr), pm, va); 2281 if ( 2282 #ifdef notdef 2283 vactype != VAC_NONE && 2284 #endif 2285 doflush && (tpte & PG_NC) == 0) 2286 cache_flush_page((int)va); 2287 } else { 2288 /* adding new entry */ 2289 pm->pm_npte[vseg]++; 2290 } 2291 } 2292 2293 if (pv != NULL) 2294 pteproto |= pv_link(pv, pm, va); 2295 2296 /* 2297 * Update hardware or software PTEs (whichever are active). 2298 */ 2299 if ((pmeg = pm->pm_segmap[vseg]) != seginval) { 2300 /* ptes are in hardare */ 2301 if (pm->pm_ctx) 2302 setcontext(pm->pm_ctxnum); 2303 else { 2304 setcontext(0); 2305 /* XXX use per-cpu pteva? */ 2306 setsegmap(0, pmeg); 2307 va = VA_VPG(va) * NBPG; 2308 } 2309 setpte(va, pteproto); 2310 } 2311 /* update software copy */ 2312 pte += VA_VPG(va); 2313 *pte = pteproto; 2314 2315 splx(s); 2316 } 2317 2318 /* 2319 * Change the wiring attribute for a map/virtual-address pair. 2320 */ 2321 /* ARGSUSED */ 2322 void 2323 pmap_change_wiring(pm, va, wired) 2324 struct pmap *pm; 2325 vm_offset_t va; 2326 int wired; 2327 { 2328 2329 pmap_stats.ps_useless_changewire++; 2330 } 2331 2332 /* 2333 * Extract the physical page address associated 2334 * with the given map/virtual_address pair. 2335 * GRR, the vm code knows; we should not have to do this! 2336 */ 2337 vm_offset_t 2338 pmap_extract(pm, va) 2339 register struct pmap *pm; 2340 vm_offset_t va; 2341 { 2342 register int tpte; 2343 register int vseg; 2344 2345 if (pm == NULL) { 2346 printf("pmap_extract: null pmap\n"); 2347 return (0); 2348 } 2349 vseg = VA_VSEG(va); 2350 if (pm->pm_segmap[vseg] != seginval) { 2351 register int ctx = getcontext(); 2352 2353 if (pm->pm_ctx) { 2354 setcontext(pm->pm_ctxnum); 2355 tpte = getpte(va); 2356 } else { 2357 setcontext(0); 2358 tpte = getpte(VA_VPG(va) * NBPG); 2359 } 2360 setcontext(ctx); 2361 } else { 2362 register int *pte = pm->pm_pte[vseg]; 2363 2364 if (pte == NULL) { 2365 printf("pmap_extract: invalid vseg\n"); 2366 return (0); 2367 } 2368 tpte = pte[VA_VPG(va)]; 2369 } 2370 if ((tpte & PG_V) == 0) { 2371 printf("pmap_extract: invalid pte\n"); 2372 return (0); 2373 } 2374 tpte &= PG_PFNUM; 2375 tpte = HWTOSW(tpte); 2376 return ((tpte << PGSHIFT) | (va & PGOFSET)); 2377 } 2378 2379 /* 2380 * Copy the range specified by src_addr/len 2381 * from the source map to the range dst_addr/len 2382 * in the destination map. 2383 * 2384 * This routine is only advisory and need not do anything. 2385 */ 2386 /* ARGSUSED */ 2387 void 2388 pmap_copy(dst_pmap, src_pmap, dst_addr, len, src_addr) 2389 struct pmap *dst_pmap, *src_pmap; 2390 vm_offset_t dst_addr; 2391 vm_size_t len; 2392 vm_offset_t src_addr; 2393 { 2394 } 2395 2396 /* 2397 * Require that all active physical maps contain no 2398 * incorrect entries NOW. [This update includes 2399 * forcing updates of any address map caching.] 2400 */ 2401 void 2402 pmap_update() 2403 { 2404 } 2405 2406 /* 2407 * Garbage collects the physical map system for 2408 * pages which are no longer used. 2409 * Success need not be guaranteed -- that is, there 2410 * may well be pages which are not referenced, but 2411 * others may be collected. 2412 * Called by the pageout daemon when pages are scarce. 2413 */ 2414 /* ARGSUSED */ 2415 void 2416 pmap_collect(pm) 2417 struct pmap *pm; 2418 { 2419 } 2420 2421 /* 2422 * Clear the modify bit for the given physical page. 2423 */ 2424 void 2425 pmap_clear_modify(pa) 2426 register vm_offset_t pa; 2427 { 2428 register struct pvlist *pv; 2429 2430 if (managed(pa)) { 2431 pv = pvhead(pa); 2432 (void) pv_syncflags(pv); 2433 pv->pv_flags &= ~PV_MOD; 2434 } 2435 } 2436 2437 /* 2438 * Tell whether the given physical page has been modified. 2439 */ 2440 int 2441 pmap_is_modified(pa) 2442 register vm_offset_t pa; 2443 { 2444 register struct pvlist *pv; 2445 2446 if (managed(pa)) { 2447 pv = pvhead(pa); 2448 if (pv->pv_flags & PV_MOD || pv_syncflags(pv) & PV_MOD) 2449 return (1); 2450 } 2451 return (0); 2452 } 2453 2454 /* 2455 * Clear the reference bit for the given physical page. 2456 */ 2457 void 2458 pmap_clear_reference(pa) 2459 vm_offset_t pa; 2460 { 2461 register struct pvlist *pv; 2462 2463 if (managed(pa)) { 2464 pv = pvhead(pa); 2465 (void) pv_syncflags(pv); 2466 pv->pv_flags &= ~PV_REF; 2467 } 2468 } 2469 2470 /* 2471 * Tell whether the given physical page has been referenced. 2472 */ 2473 int 2474 pmap_is_referenced(pa) 2475 vm_offset_t pa; 2476 { 2477 register struct pvlist *pv; 2478 2479 if (managed(pa)) { 2480 pv = pvhead(pa); 2481 if (pv->pv_flags & PV_REF || pv_syncflags(pv) & PV_REF) 2482 return (1); 2483 } 2484 return (0); 2485 } 2486 2487 /* 2488 * Make the specified pages (by pmap, offset) pageable (or not) as requested. 2489 * 2490 * A page which is not pageable may not take a fault; therefore, its page 2491 * table entry must remain valid for the duration (or at least, the trap 2492 * handler must not call vm_fault). 2493 * 2494 * This routine is merely advisory; pmap_enter will specify that these pages 2495 * are to be wired down (or not) as appropriate. 2496 */ 2497 /* ARGSUSED */ 2498 void 2499 pmap_pageable(pm, start, end, pageable) 2500 struct pmap *pm; 2501 vm_offset_t start, end; 2502 int pageable; 2503 { 2504 } 2505 2506 /* 2507 * Fill the given MI physical page with zero bytes. 2508 * 2509 * We avoid stomping on the cache. 2510 * XXX might be faster to use destination's context and allow cache to fill? 2511 */ 2512 void 2513 pmap_zero_page(pa) 2514 register vm_offset_t pa; 2515 { 2516 register caddr_t va; 2517 register int pte; 2518 2519 if (managed(pa)) { 2520 /* 2521 * The following might not be necessary since the page 2522 * is being cleared because it is about to be allocated, 2523 * i.e., is in use by no one. 2524 */ 2525 #if 1 2526 #ifdef notdef 2527 if (vactype != VAC_NONE) 2528 #endif 2529 pv_flushcache(pvhead(pa)); 2530 #endif 2531 pte = PG_V | PG_S | PG_W | PG_NC | SWTOHW(atop(pa)); 2532 } else 2533 pte = PG_V | PG_S | PG_W | PG_NC | (atop(pa) & PG_PFNUM); 2534 2535 va = vpage[0]; 2536 setpte(va, pte); 2537 qzero(va, NBPG); 2538 setpte(va, 0); 2539 } 2540 2541 /* 2542 * Copy the given MI physical source page to its destination. 2543 * 2544 * We avoid stomping on the cache as above (with same `XXX' note). 2545 * We must first flush any write-back cache for the source page. 2546 * We go ahead and stomp on the kernel's virtual cache for the 2547 * source page, since the cache can read memory MUCH faster than 2548 * the processor. 2549 */ 2550 void 2551 pmap_copy_page(src, dst) 2552 vm_offset_t src, dst; 2553 { 2554 register caddr_t sva, dva; 2555 register int spte, dpte; 2556 2557 if (managed(src)) { 2558 if (vactype == VAC_WRITEBACK) 2559 pv_flushcache(pvhead(src)); 2560 spte = PG_V | PG_S | SWTOHW(atop(src)); 2561 } else 2562 spte = PG_V | PG_S | (atop(src) & PG_PFNUM); 2563 2564 if (managed(dst)) { 2565 /* similar `might not be necessary' comment applies */ 2566 #if 1 2567 #ifdef notdef 2568 if (vactype != VAC_NONE) 2569 #endif 2570 pv_flushcache(pvhead(dst)); 2571 #endif 2572 dpte = PG_V | PG_S | PG_W | PG_NC | SWTOHW(atop(dst)); 2573 } else 2574 dpte = PG_V | PG_S | PG_W | PG_NC | (atop(dst) & PG_PFNUM); 2575 2576 sva = vpage[0]; 2577 dva = vpage[1]; 2578 setpte(sva, spte); 2579 setpte(dva, dpte); 2580 qcopy(sva, dva, NBPG); /* loads cache, so we must ... */ 2581 cache_flush_page((int)sva); 2582 setpte(sva, 0); 2583 setpte(dva, 0); 2584 } 2585 2586 /* 2587 * Turn a cdevsw d_mmap value into a byte address for pmap_enter. 2588 * XXX this should almost certainly be done differently, and 2589 * elsewhere, or even not at all 2590 */ 2591 vm_offset_t 2592 pmap_phys_address(x) 2593 int x; 2594 { 2595 2596 return (x); 2597 } 2598 2599 /* 2600 * Turn off cache for a given (va, number of pages). 2601 * 2602 * We just assert PG_NC for each PTE; the addresses must reside 2603 * in locked kernel space. A cache flush is also done. 2604 */ 2605 kvm_uncache(va, npages) 2606 register caddr_t va; 2607 register int npages; 2608 { 2609 register int pte; 2610 2611 for (; --npages >= 0; va += NBPG) { 2612 pte = getpte(va); 2613 if ((pte & PG_V) == 0) 2614 panic("kvm_uncache !pg_v"); 2615 pte |= PG_NC; 2616 setpte(va, pte); 2617 cache_flush_page((int)va); 2618 } 2619 } 2620 2621 /* 2622 * For /dev/mem. 2623 */ 2624 int 2625 pmap_enter_hw(pm, va, pa, prot, wired) 2626 register struct pmap *pm; 2627 vm_offset_t va, pa; 2628 vm_prot_t prot; 2629 int wired; 2630 { 2631 register struct memarr *ma; 2632 register int n; 2633 register u_int t; 2634 2635 if (pa >= MAXMEM) /* ??? */ 2636 return (EFAULT); 2637 for (ma = pmemarr, n = npmemarr; --n >= 0; ma++) { 2638 t = (u_int)pa - ma->addr; 2639 if (t < ma->len) 2640 goto ok; 2641 } 2642 return (EFAULT); 2643 ok: 2644 pa = (HWTOSW(atop(pa)) << PGSHIFT) | (pa & PGOFSET); 2645 if (pa >= vm_first_phys + vm_num_phys) /* ??? */ 2646 return (EFAULT); 2647 2648 pmap_enter(pm, va, pa, prot, wired); 2649 return (0); 2650 } 2651