1 /* 2 * Copyright (c) 1992, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This software was developed by the Computer Systems Engineering group 6 * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and 7 * contributed to Berkeley. 8 * 9 * All advertising materials mentioning features or use of this software 10 * must display the following acknowledgement: 11 * This product includes software developed by the University of 12 * California, Lawrence Berkeley Laboratory. 13 * 14 * %sccs.include.redist.c% 15 * 16 * @(#)pmap.c 8.4 (Berkeley) 02/05/94 17 * 18 * from: $Header: pmap.c,v 1.43 93/10/31 05:34:56 torek Exp $ 19 */ 20 21 /* 22 * SPARC physical map management code. 23 * Does not function on multiprocessors (yet). 24 */ 25 26 #include <sys/param.h> 27 #include <sys/systm.h> 28 #include <sys/device.h> 29 #include <sys/proc.h> 30 #include <sys/malloc.h> 31 32 #include <vm/vm.h> 33 #include <vm/vm_kern.h> 34 #include <vm/vm_prot.h> 35 #include <vm/vm_page.h> 36 37 #include <machine/autoconf.h> 38 #include <machine/bsd_openprom.h> 39 #include <machine/cpu.h> 40 #include <machine/ctlreg.h> 41 42 #include <sparc/sparc/asm.h> 43 #include <sparc/sparc/cache.h> 44 #include <sparc/sparc/vaddrs.h> 45 46 #ifdef DEBUG 47 #define PTE_BITS "\20\40V\37W\36S\35NC\33IO\32U\31M" 48 #endif 49 50 extern struct promvec *promvec; 51 52 /* 53 * The SPARCstation offers us the following challenges: 54 * 55 * 1. A virtual address cache. This is, strictly speaking, not 56 * part of the architecture, but the code below assumes one. 57 * This is a write-through cache on the 4c and a write-back cache 58 * on others. 59 * 60 * 2. An MMU that acts like a cache. There is not enough space 61 * in the MMU to map everything all the time. Instead, we need 62 * to load MMU with the `working set' of translations for each 63 * process. 64 * 65 * 3. Segmented virtual and physical spaces. The upper 12 bits of 66 * a virtual address (the virtual segment) index a segment table, 67 * giving a physical segment. The physical segment selects a 68 * `Page Map Entry Group' (PMEG) and the virtual page number---the 69 * next 5 or 6 bits of the virtual address---select the particular 70 * `Page Map Entry' for the page. We call the latter a PTE and 71 * call each Page Map Entry Group a pmeg (for want of a better name). 72 * 73 * Since there are no valid bits in the segment table, the only way 74 * to have an invalid segment is to make one full pmeg of invalid PTEs. 75 * We use the last one (since the ROM does as well). 76 * 77 * 4. Discontiguous physical pages. The Mach VM expects physical pages 78 * to be in one sequential lump. 79 * 80 * 5. The MMU is always on: it is not possible to disable it. This is 81 * mainly a startup hassle. 82 */ 83 84 struct pmap_stats { 85 int ps_unlink_pvfirst; /* # of pv_unlinks on head */ 86 int ps_unlink_pvsearch; /* # of pv_unlink searches */ 87 int ps_changeprots; /* # of calls to changeprot */ 88 int ps_useless_changeprots; /* # of changeprots for wiring */ 89 int ps_enter_firstpv; /* pv heads entered */ 90 int ps_enter_secondpv; /* pv nonheads entered */ 91 int ps_useless_changewire; /* useless wiring changes */ 92 int ps_npg_prot_all; /* # of active pages protected */ 93 int ps_npg_prot_actual; /* # pages actually affected */ 94 } pmap_stats; 95 96 #ifdef DEBUG 97 #define PDB_CREATE 0x0001 98 #define PDB_DESTROY 0x0002 99 #define PDB_REMOVE 0x0004 100 #define PDB_CHANGEPROT 0x0008 101 #define PDB_ENTER 0x0010 102 103 #define PDB_MMU_ALLOC 0x0100 104 #define PDB_MMU_STEAL 0x0200 105 #define PDB_CTX_ALLOC 0x0400 106 #define PDB_CTX_STEAL 0x0800 107 int pmapdebug = 0x0; 108 #endif 109 110 #define splpmap() splimp() 111 112 /* 113 * First and last managed physical addresses. 114 */ 115 #if 0 116 vm_offset_t vm_first_phys, vm_last_phys; 117 #define managed(pa) ((pa) >= vm_first_phys && (pa) < vm_last_phys) 118 #else 119 vm_offset_t vm_first_phys, vm_num_phys; 120 #define managed(pa) ((unsigned)((pa) - vm_first_phys) < vm_num_phys) 121 #endif 122 123 /* 124 * For each managed physical page, there is a list of all currently 125 * valid virtual mappings of that page. Since there is usually one 126 * (or zero) mapping per page, the table begins with an initial entry, 127 * rather than a pointer; this head entry is empty iff its pv_pmap 128 * field is NULL. 129 * 130 * Note that these are per machine independent page (so there may be 131 * only one for every two hardware pages, e.g.). Since the virtual 132 * address is aligned on a page boundary, the low order bits are free 133 * for storing flags. Only the head of each list has flags. 134 * 135 * THIS SHOULD BE PART OF THE CORE MAP 136 */ 137 struct pvlist { 138 struct pvlist *pv_next; /* next pvlist, if any */ 139 struct pmap *pv_pmap; /* pmap of this va */ 140 int pv_va; /* virtual address */ 141 int pv_flags; /* flags (below) */ 142 }; 143 144 /* 145 * Flags in pv_flags. Note that PV_MOD must be 1 and PV_REF must be 2 146 * since they must line up with the bits in the hardware PTEs (see pte.h). 147 */ 148 #define PV_MOD 1 /* page modified */ 149 #define PV_REF 2 /* page referenced */ 150 #define PV_NC 4 /* page cannot be cached */ 151 /*efine PV_ALLF 7 ** all of the above */ 152 153 struct pvlist *pv_table; /* array of entries, one per physical page */ 154 155 #define pvhead(pa) (&pv_table[atop((pa) - vm_first_phys)]) 156 157 /* 158 * Each virtual segment within each pmap is either valid or invalid. 159 * It is valid if pm_npte[VA_VSEG(va)] is not 0. This does not mean 160 * it is in the MMU, however; that is true iff pm_segmap[VA_VSEG(va)] 161 * does not point to the invalid PMEG. 162 * 163 * If a virtual segment is valid and loaded, the correct PTEs appear 164 * in the MMU only. If it is valid and unloaded, the correct PTEs appear 165 * in the pm_pte[VA_VSEG(va)] only. However, some effort is made to keep 166 * the software copies consistent enough with the MMU so that libkvm can 167 * do user address translations. In particular, pv_changepte() and 168 * pmap_enu() maintain consistency, while less critical changes are 169 * not maintained. pm_pte[VA_VSEG(va)] always points to space for those 170 * PTEs, unless this is the kernel pmap, in which case pm_pte[x] is not 171 * used (sigh). 172 * 173 * Each PMEG in the MMU is either free or contains PTEs corresponding to 174 * some pmap and virtual segment. If it contains some PTEs, it also contains 175 * reference and modify bits that belong in the pv_table. If we need 176 * to steal a PMEG from some process (if we need one and none are free) 177 * we must copy the ref and mod bits, and update pm_segmap in the other 178 * pmap to show that its virtual segment is no longer in the MMU. 179 * 180 * There are 128 PMEGs in a small Sun-4, of which only a few dozen are 181 * tied down permanently, leaving `about' 100 to be spread among 182 * running processes. These are managed as an LRU cache. Before 183 * calling the VM paging code for a user page fault, the fault handler 184 * calls mmu_load(pmap, va) to try to get a set of PTEs put into the 185 * MMU. mmu_load will check the validity of the segment and tell whether 186 * it did something. 187 * 188 * Since I hate the name PMEG I call this data structure an `mmu entry'. 189 * Each mmuentry is on exactly one of three `usage' lists: free, LRU, 190 * or locked. The LRU list is for user processes; the locked list is 191 * for kernel entries; both are doubly linked queues headed by `mmuhd's. 192 * The free list is a simple list, headed by a free list pointer. 193 */ 194 struct mmuhd { 195 struct mmuentry *mh_next; 196 struct mmuentry *mh_prev; 197 }; 198 struct mmuentry { 199 struct mmuentry *me_next; /* queue (MUST BE FIRST) or next free */ 200 struct mmuentry *me_prev; /* queue (MUST BE FIRST) */ 201 struct pmap *me_pmap; /* pmap, if in use */ 202 struct mmuentry *me_pmforw; /* pmap pmeg chain */ 203 struct mmuentry **me_pmback; /* pmap pmeg chain */ 204 u_short me_vseg; /* virtual segment number in pmap */ 205 pmeg_t me_pmeg; /* hardware PMEG number */ 206 }; 207 struct mmuentry *mmuentry; /* allocated in pmap_bootstrap */ 208 209 struct mmuentry *me_freelist; /* free list (not a queue) */ 210 struct mmuhd me_lru = { /* LRU (user) entries */ 211 (struct mmuentry *)&me_lru, (struct mmuentry *)&me_lru 212 }; 213 struct mmuhd me_locked = { /* locked (kernel) entries */ 214 (struct mmuentry *)&me_locked, (struct mmuentry *)&me_locked 215 }; 216 217 int seginval; /* the invalid segment number */ 218 219 /* 220 * A context is simply a small number that dictates which set of 4096 221 * segment map entries the MMU uses. The Sun 4c has eight such sets. 222 * These are alloted in an `almost MRU' fashion. 223 * 224 * Each context is either free or attached to a pmap. 225 * 226 * Since the virtual address cache is tagged by context, when we steal 227 * a context we have to flush (that part of) the cache. 228 */ 229 union ctxinfo { 230 union ctxinfo *c_nextfree; /* free list (if free) */ 231 struct pmap *c_pmap; /* pmap (if busy) */ 232 }; 233 union ctxinfo *ctxinfo; /* allocated at in pmap_bootstrap */ 234 int ncontext; 235 236 union ctxinfo *ctx_freelist; /* context free list */ 237 int ctx_kick; /* allocation rover when none free */ 238 int ctx_kickdir; /* ctx_kick roves both directions */ 239 240 /* XXX need per-cpu vpage[]s (and vmempage, unless we lock in /dev/mem) */ 241 caddr_t vpage[2]; /* two reserved MD virtual pages */ 242 caddr_t vmempage; /* one reserved MI vpage for /dev/mem */ 243 caddr_t vdumppages; /* 32KB worth of reserved dump pages */ 244 245 struct kpmap kernel_pmap_store; /* the kernel's pmap */ 246 247 /* 248 * We need to know real physical memory ranges (for /dev/mem). 249 */ 250 #define MA_SIZE 32 /* size of memory descriptor arrays */ 251 struct memarr pmemarr[MA_SIZE];/* physical memory regions */ 252 int npmemarr; /* number of entries in pmemarr */ 253 254 /* 255 * The following four global variables are set in pmap_bootstrap 256 * for the vm code to find. This is Wrong. 257 */ 258 vm_offset_t avail_start; /* first free physical page number */ 259 vm_offset_t avail_end; /* last free physical page number */ 260 vm_offset_t virtual_avail; /* first free virtual page number */ 261 vm_offset_t virtual_end; /* last free virtual page number */ 262 263 /* 264 * pseudo-functions for mnemonic value 265 #ifdef notyet 266 * NB: setsegmap should be stba for 4c, but stha works and makes the 267 * code right for the Sun-4 as well. 268 #endif 269 */ 270 #define getcontext() lduba(AC_CONTEXT, ASI_CONTROL) 271 #define setcontext(c) stba(AC_CONTEXT, ASI_CONTROL, c) 272 #ifdef notyet 273 #define getsegmap(va) lduha(va, ASI_SEGMAP) 274 #define setsegmap(va, pmeg) stha(va, ASI_SEGMAP, pmeg) 275 #else 276 #define getsegmap(va) lduba(va, ASI_SEGMAP) 277 #define setsegmap(va, pmeg) stba(va, ASI_SEGMAP, pmeg) 278 #endif 279 280 #define getpte(va) lda(va, ASI_PTE) 281 #define setpte(va, pte) sta(va, ASI_PTE, pte) 282 283 /*----------------------------------------------------------------*/ 284 285 #ifdef sun4c 286 /* 287 * Translations from dense (contiguous) pseudo physical addresses 288 * (fed to the VM code, to keep it happy) to sparse (real, hardware) 289 * physical addresses. We call the former `software' page frame 290 * numbers and the latter `hardware' page frame numbers. The 291 * translation is done on a `per bank' basis. 292 * 293 * The HWTOSW and SWTOHW macros handle the actual translation. 294 * They are defined as no-ops on Sun-4s. 295 * 296 * SHOULD DO atop AND ptoa DIRECTLY IN THESE MACROS SINCE ALL CALLERS 297 * ALWAYS NEED THAT ANYWAY ... CAN JUST PRECOOK THE TABLES (TODO) 298 * 299 * Since we cannot use the memory allocated to the ROM monitor, and 300 * this happens to be just under 64K, I have chosen a bank size of 301 * 64K. This is necessary since all banks must be completely full. 302 * I have also chosen a physical memory limit of 128 MB. The 4c is 303 * architecturally limited to 256 MB, but 128 MB is more than will 304 * fit on present hardware. 305 * 306 * XXX FIX THIS: just make all of each bank available and then 307 * take out the pages reserved to the monitor!! 308 */ 309 #define MAXMEM (128 * 1024 * 1024) /* no more than 128 MB phys mem */ 310 #define NPGBANK 16 /* 2^4 pages per bank (64K / bank) */ 311 #define BSHIFT 4 /* log2(NPGBANK) */ 312 #define BOFFSET (NPGBANK - 1) 313 #define BTSIZE (MAXMEM / NBPG / NPGBANK) 314 315 int pmap_dtos[BTSIZE]; /* dense to sparse */ 316 int pmap_stod[BTSIZE]; /* sparse to dense */ 317 318 #define HWTOSW(pg) (pmap_stod[(pg) >> BSHIFT] | ((pg) & BOFFSET)) 319 #define SWTOHW(pg) (pmap_dtos[(pg) >> BSHIFT] | ((pg) & BOFFSET)) 320 321 /* 322 * Sort a memory array by address. 323 */ 324 static void 325 sortm(mp, n) 326 register struct memarr *mp; 327 register int n; 328 { 329 register struct memarr *mpj; 330 register int i, j; 331 register u_int addr, len; 332 333 /* Insertion sort. This is O(n^2), but so what? */ 334 for (i = 1; i < n; i++) { 335 /* save i'th entry */ 336 addr = mp[i].addr; 337 len = mp[i].len; 338 /* find j such that i'th entry goes before j'th */ 339 for (j = 0, mpj = mp; j < i; j++, mpj++) 340 if (addr < mpj->addr) 341 break; 342 /* slide up any additional entries */ 343 ovbcopy(mpj, mpj + 1, (i - j) * sizeof(*mp)); 344 mpj->addr = addr; 345 mpj->len = len; 346 } 347 } 348 349 #ifdef DEBUG 350 struct memarr pmap_ama[MA_SIZE]; 351 int pmap_nama; 352 #define ama pmap_ama 353 #endif 354 355 /* 356 * init_translations sets up pmap_dtos[] and pmap_stod[], and 357 * returns the number of usable physical pages. 358 */ 359 int 360 init_translations() 361 { 362 register struct memarr *mp; 363 register int n, nmem; 364 register u_int vbank = 0, pbank, v, a; 365 register u_int pages = 0, lost = 0; 366 #ifndef DEBUG 367 struct memarr ama[MA_SIZE]; /* available memory array */ 368 #endif 369 370 nmem = makememarr(ama, MA_SIZE, MEMARR_AVAILPHYS); 371 372 /* 373 * Open Boot supposedly guarantees at least 3 MB free mem at 0; 374 * this is where the kernel has been loaded (we certainly hope the 375 * kernel is <= 3 MB). We need the memory array to be sorted, and 376 * to start at 0, so that `software page 0' and `hardware page 0' 377 * are the same (otherwise the VM reserves the wrong pages for the 378 * kernel). 379 */ 380 sortm(ama, nmem); 381 if (ama[0].addr != 0) { 382 /* cannot panic here; there's no real kernel yet. */ 383 printf("init_translations: no kernel memory?!\n"); 384 callrom(); 385 } 386 #ifdef DEBUG 387 pmap_nama = nmem; 388 #endif 389 for (mp = ama; --nmem >= 0; mp++) { 390 a = mp->addr >> PGSHIFT; 391 v = mp->len >> PGSHIFT; 392 if ((n = a & BOFFSET) != 0) { 393 /* round up to next bank */ 394 n = NPGBANK - n; 395 if (v < n) { /* not a whole bank: skip it */ 396 lost += v; 397 continue; 398 } 399 lost += n; /* lose n pages from front */ 400 a += n; 401 v -= n; 402 } 403 n = v >> BSHIFT; /* calculate number of banks */ 404 pbank = a >> BSHIFT; /* and the bank itself */ 405 if (pbank + n >= BTSIZE) 406 n = BTSIZE - pbank; 407 pages += n; /* off by a factor of 2^BSHIFT */ 408 lost += v - (n << BSHIFT); 409 while (--n >= 0) { 410 pmap_dtos[vbank] = pbank << BSHIFT; 411 pmap_stod[pbank] = vbank << BSHIFT; 412 pbank++; 413 vbank++; 414 } 415 } 416 /* adjust page count */ 417 pages <<= BSHIFT; 418 #ifdef DEBUG 419 printf("note: lost %d pages in translation\n", lost); 420 #endif 421 return (pages); 422 } 423 424 #else /* sun4c */ 425 426 /* 427 * Pages are physically contiguous, and hardware PFN == software PFN. 428 * 429 * XXX assumes PAGE_SIZE == NBPG (???) 430 */ 431 #define HWTOSW(pg) (pg) 432 #define SWTOHW(pg) (pg) 433 434 #endif /* sun4c */ 435 436 /* update pv_flags given a valid pte */ 437 #define MR(pte) (((pte) >> PG_M_SHIFT) & (PV_MOD | PV_REF)) 438 439 /*----------------------------------------------------------------*/ 440 441 /* 442 * Agree with the monitor ROM as to how many MMU entries are 443 * to be reserved, and map all of its segments into all contexts. 444 * 445 * Unfortunately, while the Version 0 PROM had a nice linked list of 446 * taken virtual memory, the Version 2 PROM provides instead a convoluted 447 * description of *free* virtual memory. Rather than invert this, we 448 * resort to two magic constants from the PROM vector description file. 449 */ 450 int 451 mmu_reservemon(nmmu) 452 register int nmmu; 453 { 454 register u_int va, eva; 455 register int mmuseg, i; 456 457 va = OPENPROM_STARTVADDR; 458 eva = OPENPROM_ENDVADDR; 459 while (va < eva) { 460 mmuseg = getsegmap(va); 461 if (mmuseg < nmmu) 462 nmmu = mmuseg; 463 for (i = ncontext; --i > 0;) 464 (*promvec->pv_setctxt)(i, (caddr_t)va, mmuseg); 465 if (mmuseg == seginval) { 466 va += NBPSG; 467 continue; 468 } 469 /* PROM maps its memory user-accessible: fix it. */ 470 for (i = NPTESG; --i >= 0; va += NBPG) 471 setpte(va, getpte(va) | PG_S); 472 } 473 return (nmmu); 474 } 475 476 /* 477 * TODO: agree with the ROM on physical pages by taking them away 478 * from the page list, rather than having a dinky BTSIZE above. 479 */ 480 481 /*----------------------------------------------------------------*/ 482 483 /* 484 * MMU management. 485 */ 486 487 /* 488 * Change contexts. We need the old context number as well as the new 489 * one. If the context is changing, we must write all user windows 490 * first, lest an interrupt cause them to be written to the (other) 491 * user whose context we set here. 492 */ 493 #define CHANGE_CONTEXTS(old, new) \ 494 if ((old) != (new)) { \ 495 write_user_windows(); \ 496 setcontext(new); \ 497 } 498 499 /* 500 * Allocate an MMU entry (i.e., a PMEG). 501 * If necessary, steal one from someone else. 502 * Put it on the tail of the given queue 503 * (which is either the LRU list or the locked list). 504 * The locked list is not actually ordered, but this is easiest. 505 * Also put it on the given (new) pmap's chain, 506 * enter its pmeg number into that pmap's segmap, 507 * and store the pmeg's new virtual segment number (me->me_vseg). 508 * 509 * This routine is large and complicated, but it must be fast 510 * since it implements the dynamic allocation of MMU entries. 511 */ 512 struct mmuentry * 513 me_alloc(mh, newpm, newvseg) 514 register struct mmuhd *mh; 515 register struct pmap *newpm; 516 register int newvseg; 517 { 518 register struct mmuentry *me; 519 register struct pmap *pm; 520 register int i, va, pa, *pte, tpte; 521 int ctx; 522 523 /* try free list first */ 524 if ((me = me_freelist) != NULL) { 525 me_freelist = me->me_next; 526 #ifdef DEBUG 527 if (me->me_pmap != NULL) 528 panic("me_alloc: freelist entry has pmap"); 529 if (pmapdebug & PDB_MMU_ALLOC) 530 printf("me_alloc: got pmeg %x\n", me->me_pmeg); 531 #endif 532 insque(me, mh->mh_prev); /* onto end of queue */ 533 534 /* onto on pmap chain; pmap is already locked, if needed */ 535 me->me_pmforw = NULL; 536 me->me_pmback = newpm->pm_mmuback; 537 *newpm->pm_mmuback = me; 538 newpm->pm_mmuback = &me->me_pmforw; 539 540 /* into pmap segment table, with backpointers */ 541 newpm->pm_segmap[newvseg] = me->me_pmeg; 542 me->me_pmap = newpm; 543 me->me_vseg = newvseg; 544 545 return (me); 546 } 547 548 /* no luck, take head of LRU list */ 549 if ((me = me_lru.mh_next) == (struct mmuentry *)&me_lru) 550 panic("me_alloc: all pmegs gone"); 551 pm = me->me_pmap; 552 #ifdef DEBUG 553 if (pm == NULL) 554 panic("me_alloc: LRU entry has no pmap"); 555 if (pm == kernel_pmap) 556 panic("me_alloc: stealing from kernel"); 557 pte = pm->pm_pte[me->me_vseg]; 558 if (pte == NULL) 559 panic("me_alloc: LRU entry's pmap has no ptes"); 560 if (pmapdebug & (PDB_MMU_ALLOC | PDB_MMU_STEAL)) 561 printf("me_alloc: stealing pmeg %x from pmap %x\n", 562 me->me_pmeg, pm); 563 #endif 564 /* 565 * Remove from LRU list, and insert at end of new list 566 * (probably the LRU list again, but so what?). 567 */ 568 remque(me); 569 insque(me, mh->mh_prev); 570 571 /* 572 * The PMEG must be mapped into some context so that we can 573 * read its PTEs. Use its current context if it has one; 574 * if not, and since context 0 is reserved for the kernel, 575 * the simplest method is to switch to 0 and map the PMEG 576 * to virtual address 0---which, being a user space address, 577 * is by definition not in use. 578 * 579 * XXX for ncpus>1 must use per-cpu VA? 580 * XXX do not have to flush cache immediately 581 */ 582 ctx = getcontext(); 583 if (pm->pm_ctx) { 584 CHANGE_CONTEXTS(ctx, pm->pm_ctxnum); 585 #ifdef notdef 586 if (vactype != VAC_NONE) 587 #endif 588 cache_flush_segment(me->me_vseg); 589 va = VSTOVA(me->me_vseg); 590 } else { 591 CHANGE_CONTEXTS(ctx, 0); 592 setsegmap(0, me->me_pmeg); 593 /* 594 * No cache flush needed: it happened earlier when 595 * the old context was taken. 596 */ 597 va = 0; 598 } 599 600 /* 601 * Record reference and modify bits for each page, 602 * and copy PTEs into kernel memory so that they can 603 * be reloaded later. 604 */ 605 i = NPTESG; 606 do { 607 tpte = getpte(va); 608 if (tpte & PG_V) { 609 pa = ptoa(HWTOSW(tpte & PG_PFNUM)); 610 if (managed(pa)) 611 pvhead(pa)->pv_flags |= MR(tpte); 612 } 613 *pte++ = tpte & ~(PG_U|PG_M); 614 va += NBPG; 615 } while (--i > 0); 616 617 /* update segment tables */ 618 simple_lock(&pm->pm_lock); /* what if other cpu takes mmuentry ?? */ 619 if (pm->pm_ctx) 620 setsegmap(VSTOVA(me->me_vseg), seginval); 621 pm->pm_segmap[me->me_vseg] = seginval; 622 623 /* off old pmap chain */ 624 if ((*me->me_pmback = me->me_pmforw) != NULL) { 625 me->me_pmforw->me_pmback = me->me_pmback; 626 me->me_pmforw = NULL; 627 } else 628 pm->pm_mmuback = me->me_pmback; 629 simple_unlock(&pm->pm_lock); 630 setcontext(ctx); /* done with old context */ 631 632 /* onto new pmap chain; new pmap is already locked, if needed */ 633 /* me->me_pmforw = NULL; */ /* done earlier */ 634 me->me_pmback = newpm->pm_mmuback; 635 *newpm->pm_mmuback = me; 636 newpm->pm_mmuback = &me->me_pmforw; 637 638 /* into new segment table, with backpointers */ 639 newpm->pm_segmap[newvseg] = me->me_pmeg; 640 me->me_pmap = newpm; 641 me->me_vseg = newvseg; 642 643 return (me); 644 } 645 646 /* 647 * Free an MMU entry. 648 * 649 * Assumes the corresponding pmap is already locked. 650 * Does NOT flush cache, but does record ref and mod bits. 651 * The rest of each PTE is discarded. 652 * CALLER MUST SET CONTEXT to pm->pm_ctxnum (if pmap has 653 * a context) or to 0 (if not). Caller must also update 654 * pm->pm_segmap and (possibly) the hardware. 655 */ 656 void 657 me_free(pm, pmeg) 658 register struct pmap *pm; 659 register u_int pmeg; 660 { 661 register struct mmuentry *me = &mmuentry[pmeg]; 662 register int i, va, pa, tpte; 663 664 #ifdef DEBUG 665 if (pmapdebug & PDB_MMU_ALLOC) 666 printf("me_free: freeing pmeg %x from pmap %x\n", 667 me->me_pmeg, pm); 668 if (me->me_pmeg != pmeg) 669 panic("me_free: wrong mmuentry"); 670 if (pm != me->me_pmap) 671 panic("me_free: pm != me_pmap"); 672 #endif 673 674 /* just like me_alloc, but no cache flush, and context already set */ 675 if (pm->pm_ctx) 676 va = VSTOVA(me->me_vseg); 677 else { 678 setsegmap(0, me->me_pmeg); 679 va = 0; 680 } 681 i = NPTESG; 682 do { 683 tpte = getpte(va); 684 if (tpte & PG_V) { 685 pa = ptoa(HWTOSW(tpte & PG_PFNUM)); 686 if (managed(pa)) 687 pvhead(pa)->pv_flags |= MR(tpte); 688 } 689 va += NBPG; 690 } while (--i > 0); 691 692 /* take mmu entry off pmap chain */ 693 *me->me_pmback = me->me_pmforw; 694 if ((*me->me_pmback = me->me_pmforw) != NULL) 695 me->me_pmforw->me_pmback = me->me_pmback; 696 else 697 pm->pm_mmuback = me->me_pmback; 698 /* ... and remove from segment map */ 699 pm->pm_segmap[me->me_vseg] = seginval; 700 701 /* off LRU or lock chain */ 702 remque(me); 703 704 /* no associated pmap; on free list */ 705 me->me_pmap = NULL; 706 me->me_next = me_freelist; 707 me_freelist = me; 708 } 709 710 /* 711 * `Page in' (load or inspect) an MMU entry; called on page faults. 712 * Returns 1 if we reloaded the segment, -1 if the segment was 713 * already loaded and the page was marked valid (in which case the 714 * fault must be a bus error or something), or 0 (segment loaded but 715 * PTE not valid, or segment not loaded at all). 716 */ 717 int 718 mmu_pagein(pm, va, bits) 719 register struct pmap *pm; 720 register int va, bits; 721 { 722 register int *pte; 723 register struct mmuentry *me; 724 register int vseg = VA_VSEG(va), pmeg, i, s; 725 726 /* return 0 if we have no PTEs to load */ 727 if ((pte = pm->pm_pte[vseg]) == NULL) 728 return (0); 729 /* return -1 if the fault is `hard', 0 if not */ 730 if (pm->pm_segmap[vseg] != seginval) 731 return (bits && (getpte(va) & bits) == bits ? -1 : 0); 732 733 /* reload segment: write PTEs into a new LRU entry */ 734 va = VA_ROUNDDOWNTOSEG(va); 735 s = splpmap(); /* paranoid */ 736 pmeg = me_alloc(&me_lru, pm, vseg)->me_pmeg; 737 setsegmap(va, pmeg); 738 i = NPTESG; 739 do { 740 setpte(va, *pte++); 741 va += NBPG; 742 } while (--i > 0); 743 splx(s); 744 return (1); 745 } 746 747 /* 748 * Allocate a context. If necessary, steal one from someone else. 749 * Changes hardware context number and loads segment map. 750 * 751 * This routine is only ever called from locore.s just after it has 752 * saved away the previous process, so there are no active user windows. 753 */ 754 void 755 ctx_alloc(pm) 756 register struct pmap *pm; 757 { 758 register union ctxinfo *c; 759 register int cnum, i, va; 760 register pmeg_t *segp; 761 762 #ifdef DEBUG 763 if (pm->pm_ctx) 764 panic("ctx_alloc pm_ctx"); 765 if (pmapdebug & PDB_CTX_ALLOC) 766 printf("ctx_alloc(%x)\n", pm); 767 #endif 768 if ((c = ctx_freelist) != NULL) { 769 ctx_freelist = c->c_nextfree; 770 cnum = c - ctxinfo; 771 setcontext(cnum); 772 } else { 773 if ((ctx_kick += ctx_kickdir) >= ncontext) { 774 ctx_kick = ncontext - 1; 775 ctx_kickdir = -1; 776 } else if (ctx_kick < 1) { 777 ctx_kick = 1; 778 ctx_kickdir = 1; 779 } 780 c = &ctxinfo[cnum = ctx_kick]; 781 #ifdef DEBUG 782 if (c->c_pmap == NULL) 783 panic("ctx_alloc cu_pmap"); 784 if (pmapdebug & (PDB_CTX_ALLOC | PDB_CTX_STEAL)) 785 printf("ctx_alloc: steal context %x from %x\n", 786 cnum, c->c_pmap); 787 #endif 788 c->c_pmap->pm_ctx = NULL; 789 setcontext(cnum); 790 #ifdef notdef 791 if (vactype != VAC_NONE) 792 #endif 793 cache_flush_context(); 794 } 795 c->c_pmap = pm; 796 pm->pm_ctx = c; 797 pm->pm_ctxnum = cnum; 798 799 /* 800 * XXX loop below makes 3584 iterations ... could reduce 801 * by remembering valid ranges per context: two ranges 802 * should suffice (for text/data/bss and for stack). 803 */ 804 segp = pm->pm_rsegmap; 805 for (va = 0, i = NUSEG; --i >= 0; va += NBPSG) 806 setsegmap(va, *segp++); 807 } 808 809 /* 810 * Give away a context. Flushes cache and sets current context to 0. 811 */ 812 void 813 ctx_free(pm) 814 struct pmap *pm; 815 { 816 register union ctxinfo *c; 817 register int newc, oldc; 818 819 if ((c = pm->pm_ctx) == NULL) 820 panic("ctx_free"); 821 pm->pm_ctx = NULL; 822 oldc = getcontext(); 823 if (vactype != VAC_NONE) { 824 newc = pm->pm_ctxnum; 825 CHANGE_CONTEXTS(oldc, newc); 826 cache_flush_context(); 827 setcontext(0); 828 } else { 829 CHANGE_CONTEXTS(oldc, 0); 830 } 831 c->c_nextfree = ctx_freelist; 832 ctx_freelist = c; 833 } 834 835 836 /*----------------------------------------------------------------*/ 837 838 /* 839 * pvlist functions. 840 */ 841 842 /* 843 * Walk the given pv list, and for each PTE, set or clear some bits 844 * (e.g., PG_W or PG_NC). 845 * 846 * As a special case, this never clears PG_W on `pager' pages. 847 * These, being kernel addresses, are always in hardware and have 848 * a context. 849 * 850 * This routine flushes the cache for any page whose PTE changes, 851 * as long as the process has a context; this is overly conservative. 852 * It also copies ref and mod bits to the pvlist, on the theory that 853 * this might save work later. (XXX should test this theory) 854 */ 855 void 856 pv_changepte(pv0, bis, bic) 857 register struct pvlist *pv0; 858 register int bis, bic; 859 { 860 register int *pte; 861 register struct pvlist *pv; 862 register struct pmap *pm; 863 register int va, vseg, pmeg, i, flags; 864 int ctx, s; 865 866 write_user_windows(); /* paranoid? */ 867 868 s = splpmap(); /* paranoid? */ 869 if (pv0->pv_pmap == NULL) { 870 splx(s); 871 return; 872 } 873 ctx = getcontext(); 874 flags = pv0->pv_flags; 875 for (pv = pv0; pv != NULL; pv = pv->pv_next) { 876 pm = pv->pv_pmap; 877 if(pm==NULL)panic("pv_changepte 1"); 878 va = pv->pv_va; 879 vseg = VA_VSEG(va); 880 pte = pm->pm_pte[vseg]; 881 if ((pmeg = pm->pm_segmap[vseg]) != seginval) { 882 register int tpte; 883 884 /* in hardware: fix hardware copy */ 885 if (pm->pm_ctx) { 886 extern vm_offset_t pager_sva, pager_eva; 887 888 /* 889 * Bizarreness: we never clear PG_W on 890 * pager pages, nor PG_NC on DVMA pages. 891 */ 892 if (bic == PG_W && 893 va >= pager_sva && va < pager_eva) 894 continue; 895 if (bic == PG_NC && 896 va >= DVMA_BASE && va < DVMA_END) 897 continue; 898 setcontext(pm->pm_ctxnum); 899 /* XXX should flush only when necessary */ 900 #ifdef notdef 901 if (vactype != VAC_NONE) 902 #endif 903 cache_flush_page(va); 904 } else { 905 /* XXX per-cpu va? */ 906 setcontext(0); 907 setsegmap(0, pmeg); 908 va = VA_VPG(va) * NBPG; 909 } 910 tpte = getpte(va); 911 if (tpte & PG_V) 912 flags |= (tpte >> PG_M_SHIFT) & 913 (PV_MOD|PV_REF); 914 tpte = (tpte | bis) & ~bic; 915 setpte(va, tpte); 916 if (pte != NULL) /* update software copy */ 917 pte[VA_VPG(va)] = tpte; 918 } else { 919 /* not in hardware: just fix software copy */ 920 if (pte == NULL) 921 panic("pv_changepte 2"); 922 pte += VA_VPG(va); 923 *pte = (*pte | bis) & ~bic; 924 } 925 } 926 pv0->pv_flags = flags; 927 setcontext(ctx); 928 splx(s); 929 } 930 931 /* 932 * Sync ref and mod bits in pvlist (turns off same in hardware PTEs). 933 * Returns the new flags. 934 * 935 * This is just like pv_changepte, but we never add or remove bits, 936 * hence never need to adjust software copies. 937 */ 938 int 939 pv_syncflags(pv0) 940 register struct pvlist *pv0; 941 { 942 register struct pvlist *pv; 943 register struct pmap *pm; 944 register int tpte, va, vseg, pmeg, i, flags; 945 int ctx, s; 946 947 write_user_windows(); /* paranoid? */ 948 949 s = splpmap(); /* paranoid? */ 950 if (pv0->pv_pmap == NULL) { /* paranoid */ 951 splx(s); 952 return (0); 953 } 954 ctx = getcontext(); 955 flags = pv0->pv_flags; 956 for (pv = pv0; pv != NULL; pv = pv->pv_next) { 957 pm = pv->pv_pmap; 958 va = pv->pv_va; 959 vseg = VA_VSEG(va); 960 if ((pmeg = pm->pm_segmap[vseg]) == seginval) 961 continue; 962 if (pm->pm_ctx) { 963 setcontext(pm->pm_ctxnum); 964 /* XXX should flush only when necessary */ 965 #ifdef notdef 966 if (vactype != VAC_NONE) 967 #endif 968 cache_flush_page(va); 969 } else { 970 /* XXX per-cpu va? */ 971 setcontext(0); 972 setsegmap(0, pmeg); 973 va = VA_VPG(va) * NBPG; 974 } 975 tpte = getpte(va); 976 if (tpte & (PG_M|PG_U) && tpte & PG_V) { 977 flags |= (tpte >> PG_M_SHIFT) & 978 (PV_MOD|PV_REF); 979 tpte &= ~(PG_M|PG_U); 980 setpte(va, tpte); 981 } 982 } 983 pv0->pv_flags = flags; 984 setcontext(ctx); 985 splx(s); 986 return (flags); 987 } 988 989 /* 990 * pv_unlink is a helper function for pmap_remove. 991 * It takes a pointer to the pv_table head for some physical address 992 * and removes the appropriate (pmap, va) entry. 993 * 994 * Once the entry is removed, if the pv_table head has the cache 995 * inhibit bit set, see if we can turn that off; if so, walk the 996 * pvlist and turn off PG_NC in each PTE. (The pvlist is by 997 * definition nonempty, since it must have at least two elements 998 * in it to have PV_NC set, and we only remove one here.) 999 */ 1000 static void 1001 pv_unlink(pv, pm, va) 1002 register struct pvlist *pv; 1003 register struct pmap *pm; 1004 register vm_offset_t va; 1005 { 1006 register struct pvlist *npv; 1007 1008 /* 1009 * First entry is special (sigh). 1010 */ 1011 npv = pv->pv_next; 1012 if (pv->pv_pmap == pm && pv->pv_va == va) { 1013 pmap_stats.ps_unlink_pvfirst++; 1014 if (npv != NULL) { 1015 pv->pv_next = npv->pv_next; 1016 pv->pv_pmap = npv->pv_pmap; 1017 pv->pv_va = npv->pv_va; 1018 free((caddr_t)npv, M_VMPVENT); 1019 } else 1020 pv->pv_pmap = NULL; 1021 } else { 1022 register struct pvlist *prev; 1023 1024 for (prev = pv;; prev = npv, npv = npv->pv_next) { 1025 pmap_stats.ps_unlink_pvsearch++; 1026 if (npv == NULL) 1027 panic("pv_unlink"); 1028 if (npv->pv_pmap == pm && npv->pv_va == va) 1029 break; 1030 } 1031 prev->pv_next = npv->pv_next; 1032 free((caddr_t)npv, M_VMPVENT); 1033 } 1034 if (pv->pv_flags & PV_NC) { 1035 /* 1036 * Not cached: check to see if we can fix that now. 1037 */ 1038 va = pv->pv_va; 1039 for (npv = pv->pv_next; npv != NULL; npv = npv->pv_next) 1040 if (BADALIAS(va, npv->pv_va)) 1041 return; 1042 pv->pv_flags &= ~PV_NC; 1043 pv_changepte(pv, 0, PG_NC); 1044 } 1045 } 1046 1047 /* 1048 * pv_link is the inverse of pv_unlink, and is used in pmap_enter. 1049 * It returns PG_NC if the (new) pvlist says that the address cannot 1050 * be cached. 1051 */ 1052 static int 1053 pv_link(pv, pm, va) 1054 register struct pvlist *pv; 1055 register struct pmap *pm; 1056 register vm_offset_t va; 1057 { 1058 register struct pvlist *npv; 1059 register int ret; 1060 1061 if (pv->pv_pmap == NULL) { 1062 /* no pvlist entries yet */ 1063 pmap_stats.ps_enter_firstpv++; 1064 pv->pv_next = NULL; 1065 pv->pv_pmap = pm; 1066 pv->pv_va = va; 1067 return (0); 1068 } 1069 /* 1070 * Before entering the new mapping, see if 1071 * it will cause old mappings to become aliased 1072 * and thus need to be `discached'. 1073 */ 1074 ret = 0; 1075 pmap_stats.ps_enter_secondpv++; 1076 if (pv->pv_flags & PV_NC) { 1077 /* already uncached, just stay that way */ 1078 ret = PG_NC; 1079 } else { 1080 /* MAY NEED TO DISCACHE ANYWAY IF va IS IN DVMA SPACE? */ 1081 for (npv = pv; npv != NULL; npv = npv->pv_next) { 1082 if (BADALIAS(va, npv->pv_va)) { 1083 pv->pv_flags |= PV_NC; 1084 pv_changepte(pv, ret = PG_NC, 0); 1085 break; 1086 } 1087 } 1088 } 1089 npv = (struct pvlist *)malloc(sizeof *npv, M_VMPVENT, M_WAITOK); 1090 npv->pv_next = pv->pv_next; 1091 npv->pv_pmap = pm; 1092 npv->pv_va = va; 1093 pv->pv_next = npv; 1094 return (ret); 1095 } 1096 1097 /* 1098 * Walk the given list and flush the cache for each (MI) page that is 1099 * potentially in the cache. 1100 */ 1101 pv_flushcache(pv) 1102 register struct pvlist *pv; 1103 { 1104 register struct pmap *pm; 1105 register int i, s, ctx; 1106 1107 write_user_windows(); /* paranoia? */ 1108 1109 s = splpmap(); /* XXX extreme paranoia */ 1110 if ((pm = pv->pv_pmap) != NULL) { 1111 ctx = getcontext(); 1112 for (;;) { 1113 if (pm->pm_ctx) { 1114 setcontext(pm->pm_ctxnum); 1115 cache_flush_page(pv->pv_va); 1116 } 1117 pv = pv->pv_next; 1118 if (pv == NULL) 1119 break; 1120 pm = pv->pv_pmap; 1121 } 1122 setcontext(ctx); 1123 } 1124 splx(s); 1125 } 1126 1127 /*----------------------------------------------------------------*/ 1128 1129 /* 1130 * At last, pmap code. 1131 */ 1132 1133 /* 1134 * Bootstrap the system enough to run with VM enabled. 1135 * 1136 * nmmu is the number of mmu entries (``PMEGs''); 1137 * nctx is the number of contexts. 1138 */ 1139 void 1140 pmap_bootstrap(nmmu, nctx) 1141 int nmmu, nctx; 1142 { 1143 register union ctxinfo *ci; 1144 register struct mmuentry *me; 1145 register int i, j, n, z, vs; 1146 register caddr_t p; 1147 register void (*rom_setmap)(int ctx, caddr_t va, int pmeg); 1148 int lastpage; 1149 extern char end[]; 1150 extern caddr_t reserve_dumppages(caddr_t); 1151 1152 ncontext = nctx; 1153 1154 /* 1155 * Last segment is the `invalid' one (one PMEG of pte's with !pg_v). 1156 * It will never be used for anything else. 1157 */ 1158 seginval = --nmmu; 1159 1160 /* 1161 * Preserve the monitor ROM's reserved VM region, so that 1162 * we can use L1-A or the monitor's debugger. As a side 1163 * effect we map the ROM's reserved VM into all contexts 1164 * (otherwise L1-A crashes the machine!). 1165 */ 1166 nmmu = mmu_reservemon(nmmu); 1167 1168 /* 1169 * Allocate and clear mmu entry and context structures. 1170 */ 1171 p = end; 1172 mmuentry = me = (struct mmuentry *)p; 1173 p += nmmu * sizeof *me; 1174 ctxinfo = ci = (union ctxinfo *)p; 1175 p += nctx * sizeof *ci; 1176 bzero(end, p - end); 1177 1178 /* 1179 * Set up the `constants' for the call to vm_init() 1180 * in main(). All pages beginning at p (rounded up to 1181 * the next whole page) and continuing through the number 1182 * of available pages are free, but they start at a higher 1183 * virtual address. This gives us two mappable MD pages 1184 * for pmap_zero_page and pmap_copy_page, and one MI page 1185 * for /dev/mem, all with no associated physical memory. 1186 */ 1187 p = (caddr_t)(((u_int)p + NBPG - 1) & ~PGOFSET); 1188 avail_start = (int)p - KERNBASE; 1189 avail_end = init_translations() << PGSHIFT; 1190 i = (int)p; 1191 vpage[0] = p, p += NBPG; 1192 vpage[1] = p, p += NBPG; 1193 vmempage = p, p += NBPG; 1194 p = reserve_dumppages(p); 1195 virtual_avail = (vm_offset_t)p; 1196 virtual_end = VM_MAX_KERNEL_ADDRESS; 1197 1198 p = (caddr_t)i; /* retract to first free phys */ 1199 1200 /* 1201 * Intialize the kernel pmap. 1202 */ 1203 { 1204 register struct kpmap *k = &kernel_pmap_store; 1205 1206 /* kernel_pmap = (struct pmap *)k; */ 1207 k->pm_ctx = ctxinfo; 1208 /* k->pm_ctxnum = 0; */ 1209 simple_lock_init(&k->pm_lock); 1210 k->pm_refcount = 1; 1211 /* k->pm_mmuforw = 0; */ 1212 k->pm_mmuback = &k->pm_mmuforw; 1213 k->pm_segmap = &k->pm_rsegmap[-NUSEG]; 1214 k->pm_pte = &k->pm_rpte[-NUSEG]; 1215 k->pm_npte = &k->pm_rnpte[-NUSEG]; 1216 for (i = NKSEG; --i >= 0;) 1217 k->pm_rsegmap[i] = seginval; 1218 } 1219 1220 /* 1221 * All contexts are free except the kernel's. 1222 * 1223 * XXX sun4c could use context 0 for users? 1224 */ 1225 ci->c_pmap = kernel_pmap; 1226 ctx_freelist = ci + 1; 1227 for (i = 1; i < ncontext; i++) { 1228 ci++; 1229 ci->c_nextfree = ci + 1; 1230 } 1231 ci->c_nextfree = NULL; 1232 ctx_kick = 0; 1233 ctx_kickdir = -1; 1234 1235 /* me_freelist = NULL; */ /* already NULL */ 1236 1237 /* 1238 * Init mmu entries that map the kernel physical addresses. 1239 * If the page bits in p are 0, we filled the last segment 1240 * exactly (now how did that happen?); if not, it is 1241 * the last page filled in the last segment. 1242 * 1243 * All the other MMU entries are free. 1244 * 1245 * THIS ASSUMES SEGMENT i IS MAPPED BY MMU ENTRY i DURING THE 1246 * BOOT PROCESS 1247 */ 1248 z = ((((u_int)p + NBPSG - 1) & ~SGOFSET) - KERNBASE) >> SGSHIFT; 1249 lastpage = VA_VPG(p); 1250 if (lastpage == 0) 1251 lastpage = NPTESG; 1252 p = (caddr_t)KERNBASE; /* first va */ 1253 vs = VA_VSEG(KERNBASE); /* first virtual segment */ 1254 rom_setmap = promvec->pv_setctxt; 1255 for (i = 0;;) { 1256 /* 1257 * Distribute each kernel segment into all contexts. 1258 * This is done through the monitor ROM, rather than 1259 * directly here: if we do a setcontext we will fault, 1260 * as we are not (yet) mapped in any other context. 1261 */ 1262 for (j = 1; j < nctx; j++) 1263 rom_setmap(j, p, i); 1264 1265 /* set up the mmu entry */ 1266 me->me_pmeg = i; 1267 insque(me, me_locked.mh_prev); 1268 /* me->me_pmforw = NULL; */ 1269 me->me_pmback = kernel_pmap->pm_mmuback; 1270 *kernel_pmap->pm_mmuback = me; 1271 kernel_pmap->pm_mmuback = &me->me_pmforw; 1272 me->me_pmap = kernel_pmap; 1273 me->me_vseg = vs; 1274 kernel_pmap->pm_segmap[vs] = i; 1275 n = ++i < z ? NPTESG : lastpage; 1276 kernel_pmap->pm_npte[vs] = n; 1277 me++; 1278 vs++; 1279 if (i < z) { 1280 p += NBPSG; 1281 continue; 1282 } 1283 /* 1284 * Unmap the pages, if any, that are not part of 1285 * the final segment. 1286 */ 1287 for (p += n * NBPG; j < NPTESG; j++, p += NBPG) 1288 setpte(p, 0); 1289 break; 1290 } 1291 for (; i < nmmu; i++, me++) { 1292 me->me_pmeg = i; 1293 me->me_next = me_freelist; 1294 /* me->me_pmap = NULL; */ 1295 me_freelist = me; 1296 } 1297 1298 /* 1299 * write protect & encache kernel text; 1300 * set red zone at kernel base; enable cache on message buffer. 1301 */ 1302 { 1303 extern char etext[], trapbase[]; 1304 #ifdef KGDB 1305 register int mask = ~PG_NC; /* XXX chgkprot is busted */ 1306 #else 1307 register int mask = ~(PG_W | PG_NC); 1308 #endif 1309 for (p = trapbase; p < etext; p += NBPG) 1310 setpte(p, getpte(p) & mask); 1311 p = (caddr_t)KERNBASE; 1312 setpte(p, 0); 1313 p += NBPG; 1314 setpte(p, getpte(p) & ~PG_NC); 1315 } 1316 1317 /* 1318 * Grab physical memory list (for /dev/mem). 1319 */ 1320 npmemarr = makememarr(pmemarr, MA_SIZE, MEMARR_TOTALPHYS); 1321 } 1322 1323 /* 1324 * Bootstrap memory allocator. This function allows for early dynamic 1325 * memory allocation until the virtual memory system has been bootstrapped. 1326 * After that point, either kmem_alloc or malloc should be used. This 1327 * function works by stealing pages from the (to be) managed page pool, 1328 * stealing virtual address space, then mapping the pages and zeroing them. 1329 * 1330 * It should be used from pmap_bootstrap till vm_page_startup, afterwards 1331 * it cannot be used, and will generate a panic if tried. Note that this 1332 * memory will never be freed, and in essence it is wired down. 1333 */ 1334 void * 1335 pmap_bootstrap_alloc(size) 1336 int size; 1337 { 1338 register void *mem; 1339 extern int vm_page_startup_initialized; 1340 1341 if (vm_page_startup_initialized) 1342 panic("pmap_bootstrap_alloc: called after startup initialized"); 1343 size = round_page(size); 1344 mem = (void *)virtual_avail; 1345 virtual_avail = pmap_map(virtual_avail, avail_start, 1346 avail_start + size, VM_PROT_READ|VM_PROT_WRITE); 1347 avail_start += size; 1348 bzero((void *)mem, size); 1349 return (mem); 1350 } 1351 1352 /* 1353 * Initialize the pmap module. 1354 */ 1355 void 1356 pmap_init(phys_start, phys_end) 1357 register vm_offset_t phys_start, phys_end; 1358 { 1359 register vm_size_t s; 1360 1361 if (PAGE_SIZE != NBPG) 1362 panic("pmap_init: CLSIZE!=1"); 1363 /* 1364 * Allocate and clear memory for the pv_table. 1365 */ 1366 s = sizeof(struct pvlist) * atop(phys_end - phys_start); 1367 s = round_page(s); 1368 pv_table = (struct pvlist *)kmem_alloc(kernel_map, s); 1369 bzero((caddr_t)pv_table, s); 1370 vm_first_phys = phys_start; 1371 vm_num_phys = phys_end - phys_start; 1372 } 1373 1374 /* 1375 * Map physical addresses into kernel VM. 1376 */ 1377 vm_offset_t 1378 pmap_map(va, pa, endpa, prot) 1379 register vm_offset_t va, pa, endpa; 1380 register int prot; 1381 { 1382 register int pgsize = PAGE_SIZE; 1383 1384 while (pa < endpa) { 1385 pmap_enter(kernel_pmap, va, pa, prot, 1); 1386 va += pgsize; 1387 pa += pgsize; 1388 } 1389 return (va); 1390 } 1391 1392 /* 1393 * Create and return a physical map. 1394 * 1395 * If size is nonzero, the map is useless. (ick) 1396 */ 1397 struct pmap * 1398 pmap_create(size) 1399 vm_size_t size; 1400 { 1401 register struct pmap *pm; 1402 1403 if (size) 1404 return (NULL); 1405 pm = (struct pmap *)malloc(sizeof *pm, M_VMPMAP, M_WAITOK); 1406 #ifdef DEBUG 1407 if (pmapdebug & PDB_CREATE) 1408 printf("pmap_create: created %x\n", pm); 1409 #endif 1410 bzero((caddr_t)pm, sizeof *pm); 1411 pmap_pinit(pm); 1412 return (pm); 1413 } 1414 1415 /* 1416 * Initialize a preallocated and zeroed pmap structure, 1417 * such as one in a vmspace structure. 1418 */ 1419 void 1420 pmap_pinit(pm) 1421 register struct pmap *pm; 1422 { 1423 register int i; 1424 1425 #ifdef DEBUG 1426 if (pmapdebug & PDB_CREATE) 1427 printf("pmap_pinit(%x)\n", pm); 1428 #endif 1429 /* pm->pm_ctx = NULL; */ 1430 simple_lock_init(&pm->pm_lock); 1431 pm->pm_refcount = 1; 1432 /* pm->pm_mmuforw = NULL; */ 1433 pm->pm_mmuback = &pm->pm_mmuforw; 1434 pm->pm_segmap = pm->pm_rsegmap; 1435 pm->pm_pte = pm->pm_rpte; 1436 pm->pm_npte = pm->pm_rnpte; 1437 for (i = NUSEG; --i >= 0;) 1438 pm->pm_rsegmap[i] = seginval; 1439 /* bzero((caddr_t)pm->pm_rpte, sizeof pm->pm_rpte); */ 1440 /* bzero((caddr_t)pm->pm_rnpte, sizeof pm->pm_rnpte); */ 1441 } 1442 1443 /* 1444 * Retire the given pmap from service. 1445 * Should only be called if the map contains no valid mappings. 1446 */ 1447 void 1448 pmap_destroy(pm) 1449 register struct pmap *pm; 1450 { 1451 int count; 1452 1453 if (pm == NULL) 1454 return; 1455 #ifdef DEBUG 1456 if (pmapdebug & PDB_DESTROY) 1457 printf("pmap_destroy(%x)\n", pm); 1458 #endif 1459 simple_lock(&pm->pm_lock); 1460 count = --pm->pm_refcount; 1461 simple_unlock(&pm->pm_lock); 1462 if (count == 0) { 1463 pmap_release(pm); 1464 free((caddr_t)pm, M_VMPMAP); 1465 } 1466 } 1467 1468 /* 1469 * Release any resources held by the given physical map. 1470 * Called when a pmap initialized by pmap_pinit is being released. 1471 */ 1472 void 1473 pmap_release(pm) 1474 register struct pmap *pm; 1475 { 1476 register union ctxinfo *c; 1477 register int s = splpmap(); /* paranoia */ 1478 1479 #ifdef DEBUG 1480 if (pmapdebug & PDB_DESTROY) 1481 printf("pmap_release(%x)\n", pm); 1482 #endif 1483 if (pm->pm_mmuforw) 1484 panic("pmap_release mmuforw"); 1485 if ((c = pm->pm_ctx) != NULL) { 1486 if (pm->pm_ctxnum == 0) 1487 panic("pmap_release: releasing kernel"); 1488 ctx_free(pm); 1489 } 1490 splx(s); 1491 } 1492 1493 /* 1494 * Add a reference to the given pmap. 1495 */ 1496 void 1497 pmap_reference(pm) 1498 struct pmap *pm; 1499 { 1500 1501 if (pm != NULL) { 1502 simple_lock(&pm->pm_lock); 1503 pm->pm_refcount++; 1504 simple_unlock(&pm->pm_lock); 1505 } 1506 } 1507 1508 static int pmap_rmk(struct pmap *, vm_offset_t, vm_offset_t, int, int, int); 1509 static int pmap_rmu(struct pmap *, vm_offset_t, vm_offset_t, int, int, int); 1510 1511 /* 1512 * Remove the given range of mapping entries. 1513 * The starting and ending addresses are already rounded to pages. 1514 * Sheer lunacy: pmap_remove is often asked to remove nonexistent 1515 * mappings. 1516 */ 1517 void 1518 pmap_remove(pm, va, endva) 1519 register struct pmap *pm; 1520 register vm_offset_t va, endva; 1521 { 1522 register vm_offset_t nva; 1523 register int vseg, nleft, s, ctx; 1524 register int (*rm)(struct pmap *, vm_offset_t, vm_offset_t, 1525 int, int, int); 1526 1527 if (pm == NULL) 1528 return; 1529 #ifdef DEBUG 1530 if (pmapdebug & PDB_REMOVE) 1531 printf("pmap_remove(%x, %x, %x)\n", pm, va, endva); 1532 #endif 1533 1534 if (pm == kernel_pmap) { 1535 /* 1536 * Removing from kernel address space. 1537 */ 1538 rm = pmap_rmk; 1539 } else { 1540 /* 1541 * Removing from user address space. 1542 */ 1543 write_user_windows(); 1544 rm = pmap_rmu; 1545 } 1546 1547 ctx = getcontext(); 1548 s = splpmap(); /* XXX conservative */ 1549 simple_lock(&pm->pm_lock); 1550 for (; va < endva; va = nva) { 1551 /* do one virtual segment at a time */ 1552 vseg = VA_VSEG(va); 1553 nva = VSTOVA(vseg + 1); 1554 if (nva == 0 || nva > endva) 1555 nva = endva; 1556 if ((nleft = pm->pm_npte[vseg]) != 0) 1557 pm->pm_npte[vseg] = (*rm)(pm, va, nva, 1558 vseg, nleft, pm->pm_segmap[vseg]); 1559 } 1560 simple_unlock(&pm->pm_lock); 1561 splx(s); 1562 setcontext(ctx); 1563 } 1564 1565 #define perftest 1566 #ifdef perftest 1567 /* counters, one per possible length */ 1568 int rmk_vlen[NPTESG+1]; /* virtual length per rmk() call */ 1569 int rmk_npg[NPTESG+1]; /* n valid pages per rmk() call */ 1570 int rmk_vlendiff; /* # times npg != vlen */ 1571 #endif 1572 1573 /* 1574 * The following magic number was chosen because: 1575 * 1. It is the same amount of work to cache_flush_page 4 pages 1576 * as to cache_flush_segment 1 segment (so at 4 the cost of 1577 * flush is the same). 1578 * 2. Flushing extra pages is bad (causes cache not to work). 1579 * 3. The current code, which malloc()s 5 pages for each process 1580 * for a user vmspace/pmap, almost never touches all 5 of those 1581 * pages. 1582 */ 1583 #define PMAP_RMK_MAGIC 5 /* if > magic, use cache_flush_segment */ 1584 1585 /* 1586 * Remove a range contained within a single segment. 1587 * These are egregiously complicated routines. 1588 */ 1589 1590 /* remove from kernel, return new nleft */ 1591 static int 1592 pmap_rmk(pm, va, endva, vseg, nleft, pmeg) 1593 register struct pmap *pm; 1594 register vm_offset_t va, endva; 1595 register int vseg, nleft, pmeg; 1596 { 1597 register int i, tpte, perpage, npg; 1598 register struct pvlist *pv; 1599 #ifdef perftest 1600 register int nvalid; 1601 #endif 1602 1603 #ifdef DEBUG 1604 if (pmeg == seginval) 1605 panic("pmap_rmk: not loaded"); 1606 if (pm->pm_ctx == NULL) 1607 panic("pmap_rmk: lost context"); 1608 #endif 1609 1610 setcontext(0); 1611 /* decide how to flush cache */ 1612 npg = (endva - va) >> PGSHIFT; 1613 if (npg > PMAP_RMK_MAGIC) { 1614 /* flush the whole segment */ 1615 perpage = 0; 1616 #ifdef notdef 1617 if (vactype != VAC_NONE) 1618 #endif 1619 cache_flush_segment(vseg); 1620 } else { 1621 /* flush each page individually; some never need flushing */ 1622 perpage = 1; 1623 } 1624 #ifdef perftest 1625 nvalid = 0; 1626 #endif 1627 while (va < endva) { 1628 tpte = getpte(va); 1629 if ((tpte & PG_V) == 0) { 1630 va += PAGE_SIZE; 1631 continue; 1632 } 1633 pv = NULL; 1634 /* if cacheable, flush page as needed */ 1635 if ((tpte & PG_NC) == 0) { 1636 #ifdef perftest 1637 nvalid++; 1638 #endif 1639 if (perpage) 1640 cache_flush_page(va); 1641 } 1642 if ((tpte & PG_TYPE) == PG_OBMEM) { 1643 i = ptoa(HWTOSW(tpte & PG_PFNUM)); 1644 if (managed(i)) { 1645 pv = pvhead(i); 1646 pv->pv_flags |= MR(tpte); 1647 pv_unlink(pv, pm, va); 1648 } 1649 } 1650 nleft--; 1651 setpte(va, 0); 1652 va += NBPG; 1653 } 1654 #ifdef perftest 1655 rmk_vlen[npg]++; 1656 rmk_npg[nvalid]++; 1657 if (npg != nvalid) 1658 rmk_vlendiff++; 1659 #endif 1660 1661 /* 1662 * If the segment is all gone, remove it from everyone and 1663 * free the MMU entry. 1664 */ 1665 if (nleft == 0) { 1666 va = VSTOVA(vseg); /* retract */ 1667 setsegmap(va, seginval); 1668 for (i = ncontext; --i > 0;) { 1669 setcontext(i); 1670 setsegmap(va, seginval); 1671 } 1672 me_free(pm, pmeg); 1673 } 1674 return (nleft); 1675 } 1676 1677 #ifdef perftest 1678 /* as before but for pmap_rmu */ 1679 int rmu_vlen[NPTESG+1]; /* virtual length per rmu() call */ 1680 int rmu_npg[NPTESG+1]; /* n valid pages per rmu() call */ 1681 int rmu_vlendiff; /* # times npg != vlen */ 1682 int rmu_noflush; /* # times rmu does not need to flush at all */ 1683 #endif 1684 1685 /* 1686 * Just like pmap_rmk_magic, but we have a different threshold. 1687 * Note that this may well deserve further tuning work. 1688 */ 1689 #define PMAP_RMU_MAGIC 4 /* if > magic, use cache_flush_segment */ 1690 1691 /* remove from user */ 1692 static int 1693 pmap_rmu(pm, va, endva, vseg, nleft, pmeg) 1694 register struct pmap *pm; 1695 register vm_offset_t va, endva; 1696 register int vseg, nleft, pmeg; 1697 { 1698 register int *pte0, i, pteva, tpte, perpage, npg; 1699 register struct pvlist *pv; 1700 #ifdef perftest 1701 register int doflush, nvalid; 1702 #endif 1703 1704 pte0 = pm->pm_pte[vseg]; 1705 if (pmeg == seginval) { 1706 register int *pte = pte0 + VA_VPG(va); 1707 1708 /* 1709 * PTEs are not in MMU. Just invalidate software copies. 1710 */ 1711 for (; va < endva; pte++, va += PAGE_SIZE) { 1712 tpte = *pte; 1713 if ((tpte & PG_V) == 0) { 1714 /* nothing to remove (braindead VM layer) */ 1715 continue; 1716 } 1717 if ((tpte & PG_TYPE) == PG_OBMEM) { 1718 i = ptoa(HWTOSW(tpte & PG_PFNUM)); 1719 if (managed(i)) 1720 pv_unlink(pvhead(i), pm, va); 1721 } 1722 nleft--; 1723 *pte = 0; 1724 } 1725 if (nleft == 0) { 1726 free((caddr_t)pte0, M_VMPMAP); 1727 pm->pm_pte[vseg] = NULL; 1728 } 1729 return (nleft); 1730 } 1731 1732 /* 1733 * PTEs are in MMU. Invalidate in hardware, update ref & 1734 * mod bits, and flush cache if required. 1735 */ 1736 if (pm->pm_ctx) { 1737 /* process has a context, must flush cache */ 1738 npg = (endva - va) >> PGSHIFT; 1739 #ifdef perftest 1740 doflush = 1; 1741 nvalid = 0; 1742 #endif 1743 setcontext(pm->pm_ctxnum); 1744 if (npg > PMAP_RMU_MAGIC) { 1745 perpage = 0; /* flush the whole segment */ 1746 #ifdef notdef 1747 if (vactype != VAC_NONE) 1748 #endif 1749 cache_flush_segment(vseg); 1750 } else 1751 perpage = 1; 1752 pteva = va; 1753 } else { 1754 /* no context, use context 0; cache flush unnecessary */ 1755 setcontext(0); 1756 /* XXX use per-cpu pteva? */ 1757 setsegmap(0, pmeg); 1758 pteva = VA_VPG(va) * NBPG; 1759 perpage = 0; 1760 #ifdef perftest 1761 npg = 0; 1762 doflush = 0; 1763 nvalid = 0; 1764 rmu_noflush++; 1765 #endif 1766 } 1767 for (; va < endva; pteva += PAGE_SIZE, va += PAGE_SIZE) { 1768 tpte = getpte(pteva); 1769 if ((tpte & PG_V) == 0) 1770 continue; 1771 pv = NULL; 1772 /* if cacheable, flush page as needed */ 1773 if (doflush && (tpte & PG_NC) == 0) { 1774 #ifdef perftest 1775 nvalid++; 1776 #endif 1777 if (perpage) 1778 cache_flush_page(va); 1779 } 1780 if ((tpte & PG_TYPE) == PG_OBMEM) { 1781 i = ptoa(HWTOSW(tpte & PG_PFNUM)); 1782 if (managed(i)) { 1783 pv = pvhead(i); 1784 pv->pv_flags |= MR(tpte); 1785 pv_unlink(pv, pm, va); 1786 } 1787 } 1788 nleft--; 1789 setpte(pteva, 0); 1790 } 1791 #ifdef perftest 1792 if (doflush) { 1793 rmu_vlen[npg]++; 1794 rmu_npg[nvalid]++; 1795 if (npg != nvalid) 1796 rmu_vlendiff++; 1797 } 1798 #endif 1799 1800 /* 1801 * If the segment is all gone, and the context is loaded, give 1802 * the segment back. 1803 */ 1804 if (nleft == 0 && pm->pm_ctx != NULL) { 1805 va = VSTOVA(vseg); /* retract */ 1806 setsegmap(va, seginval); 1807 free((caddr_t)pte0, M_VMPMAP); 1808 pm->pm_pte[vseg] = NULL; 1809 me_free(pm, pmeg); 1810 } 1811 return (nleft); 1812 } 1813 1814 /* 1815 * Lower (make more strict) the protection on the specified 1816 * physical page. 1817 * 1818 * There are only two cases: either the protection is going to 0 1819 * (in which case we do the dirty work here), or it is going from 1820 * to read-only (in which case pv_changepte does the trick). 1821 */ 1822 void 1823 pmap_page_protect(pa, prot) 1824 vm_offset_t pa; 1825 vm_prot_t prot; 1826 { 1827 register struct pvlist *pv, *pv0, *npv; 1828 register struct pmap *pm; 1829 register int *pte; 1830 register int va, vseg, pteva, tpte; 1831 register int flags, nleft, i, pmeg, s, ctx, doflush; 1832 1833 #ifdef DEBUG 1834 if ((pmapdebug & PDB_CHANGEPROT) || 1835 (pmapdebug & PDB_REMOVE && prot == VM_PROT_NONE)) 1836 printf("pmap_page_protect(%x, %x)\n", pa, prot); 1837 #endif 1838 /* 1839 * Skip unmanaged pages, or operations that do not take 1840 * away write permission. 1841 */ 1842 if (!managed(pa) || prot & VM_PROT_WRITE) 1843 return; 1844 write_user_windows(); /* paranoia */ 1845 if (prot & VM_PROT_READ) { 1846 pv_changepte(pvhead(pa), 0, PG_W); 1847 return; 1848 } 1849 1850 /* 1851 * Remove all access to all people talking to this page. 1852 * Walk down PV list, removing all mappings. 1853 * The logic is much like that for pmap_remove, 1854 * but we know we are removing exactly one page. 1855 */ 1856 pv = pvhead(pa); 1857 s = splpmap(); 1858 if ((pm = pv->pv_pmap) == NULL) { 1859 splx(s); 1860 return; 1861 } 1862 ctx = getcontext(); 1863 pv0 = pv; 1864 flags = pv->pv_flags & ~PV_NC; 1865 for (;; pm = pv->pv_pmap) { 1866 va = pv->pv_va; 1867 vseg = VA_VSEG(va); 1868 if ((nleft = pm->pm_npte[vseg]) == 0) 1869 panic("pmap_remove_all: empty vseg"); 1870 nleft--; 1871 pm->pm_npte[vseg] = nleft; 1872 pmeg = pm->pm_segmap[vseg]; 1873 pte = pm->pm_pte[vseg]; 1874 if (pmeg == seginval) { 1875 if (nleft) { 1876 pte += VA_VPG(va); 1877 *pte = 0; 1878 } else { 1879 free((caddr_t)pte, M_VMPMAP); 1880 pm->pm_pte[vseg] = NULL; 1881 } 1882 goto nextpv; 1883 } 1884 if (pm->pm_ctx) { 1885 setcontext(pm->pm_ctxnum); 1886 pteva = va; 1887 #ifdef notdef 1888 doflush = vactype != VAC_NONE; 1889 #else 1890 doflush = 1; 1891 #endif 1892 } else { 1893 setcontext(0); 1894 /* XXX use per-cpu pteva? */ 1895 setsegmap(0, pmeg); 1896 pteva = VA_VPG(va) * NBPG; 1897 doflush = 0; 1898 } 1899 if (nleft) { 1900 if (doflush) 1901 cache_flush_page(va); 1902 tpte = getpte(pteva); 1903 if ((tpte & PG_V) == 0) 1904 panic("pmap_page_protect !PG_V 1"); 1905 flags |= MR(tpte); 1906 setpte(pteva, 0); 1907 } else { 1908 if (doflush) 1909 cache_flush_page(va); 1910 tpte = getpte(pteva); 1911 if ((tpte & PG_V) == 0) 1912 panic("pmap_page_protect !PG_V 2"); 1913 flags |= MR(tpte); 1914 if (pm->pm_ctx) { 1915 setsegmap(va, seginval); 1916 if (pm == kernel_pmap) { 1917 for (i = ncontext; --i > 0;) { 1918 setcontext(i); 1919 setsegmap(va, seginval); 1920 } 1921 goto skipptefree; 1922 } 1923 } 1924 free((caddr_t)pte, M_VMPMAP); 1925 pm->pm_pte[vseg] = NULL; 1926 skipptefree: 1927 me_free(pm, pmeg); 1928 } 1929 nextpv: 1930 npv = pv->pv_next; 1931 if (pv != pv0) 1932 free((caddr_t)pv, M_VMPVENT); 1933 if ((pv = npv) == NULL) 1934 break; 1935 } 1936 pv0->pv_pmap = NULL; 1937 pv0->pv_flags = flags; 1938 setcontext(ctx); 1939 splx(s); 1940 } 1941 1942 /* 1943 * Lower (make more strict) the protection on the specified 1944 * range of this pmap. 1945 * 1946 * There are only two cases: either the protection is going to 0 1947 * (in which case we call pmap_remove to do the dirty work), or 1948 * it is going from read/write to read-only. The latter is 1949 * fairly easy. 1950 */ 1951 void 1952 pmap_protect(pm, sva, eva, prot) 1953 register struct pmap *pm; 1954 vm_offset_t sva, eva; 1955 vm_prot_t prot; 1956 { 1957 register int va, nva, vseg, pteva, pmeg; 1958 register int s, ctx; 1959 1960 if (pm == NULL || prot & VM_PROT_WRITE) 1961 return; 1962 if ((prot & VM_PROT_READ) == 0) { 1963 pmap_remove(pm, sva, eva); 1964 return; 1965 } 1966 1967 write_user_windows(); 1968 ctx = getcontext(); 1969 s = splpmap(); 1970 simple_lock(&pm->pm_lock); 1971 1972 for (va = sva; va < eva;) { 1973 vseg = VA_VSEG(va); 1974 nva = VSTOVA(vseg + 1); 1975 if (nva == 0) panic("pmap_protect: last segment"); /* cannot happen */ 1976 if (nva > eva) 1977 nva = eva; 1978 if (pm->pm_npte[vseg] == 0) { 1979 va = nva; 1980 continue; 1981 } 1982 pmeg = pm->pm_segmap[vseg]; 1983 if (pmeg == seginval) { 1984 register int *pte = &pm->pm_pte[vseg][VA_VPG(va)]; 1985 1986 /* not in MMU; just clear PG_W from core copies */ 1987 for (; va < nva; va += NBPG) 1988 *pte++ &= ~PG_W; 1989 } else { 1990 /* in MMU: take away write bits from MMU PTEs */ 1991 if ( 1992 #ifdef notdef 1993 vactype != VAC_NONE && 1994 #endif 1995 pm->pm_ctx) { 1996 register int tpte; 1997 1998 /* 1999 * Flush cache so that any existing cache 2000 * tags are updated. This is really only 2001 * needed for PTEs that lose PG_W. 2002 */ 2003 setcontext(pm->pm_ctxnum); 2004 for (; va < nva; va += NBPG) { 2005 tpte = getpte(va); 2006 pmap_stats.ps_npg_prot_all++; 2007 if (tpte & PG_W) { 2008 pmap_stats.ps_npg_prot_actual++; 2009 cache_flush_page(va); 2010 setpte(va, tpte & ~PG_W); 2011 } 2012 } 2013 } else { 2014 register int pteva; 2015 2016 /* 2017 * No context, hence not cached; 2018 * just update PTEs. 2019 */ 2020 setcontext(0); 2021 /* XXX use per-cpu pteva? */ 2022 setsegmap(0, pmeg); 2023 pteva = VA_VPG(va) * NBPG; 2024 for (; va < nva; pteva += NBPG, va += NBPG) 2025 setpte(pteva, getpte(pteva) & ~PG_W); 2026 } 2027 } 2028 } 2029 simple_unlock(&pm->pm_lock); 2030 splx(s); 2031 } 2032 2033 /* 2034 * Change the protection and/or wired status of the given (MI) virtual page. 2035 * XXX: should have separate function (or flag) telling whether only wiring 2036 * is changing. 2037 */ 2038 void 2039 pmap_changeprot(pm, va, prot, wired) 2040 register struct pmap *pm; 2041 register vm_offset_t va; 2042 vm_prot_t prot; 2043 int wired; 2044 { 2045 register int vseg, tpte, newprot, pmeg, ctx, i, s; 2046 2047 #ifdef DEBUG 2048 if (pmapdebug & PDB_CHANGEPROT) 2049 printf("pmap_changeprot(%x, %x, %x, %x)\n", 2050 pm, va, prot, wired); 2051 #endif 2052 2053 write_user_windows(); /* paranoia */ 2054 2055 if (pm == kernel_pmap) 2056 newprot = prot & VM_PROT_WRITE ? PG_S|PG_W : PG_S; 2057 else 2058 newprot = prot & VM_PROT_WRITE ? PG_W : 0; 2059 vseg = VA_VSEG(va); 2060 s = splpmap(); /* conservative */ 2061 pmap_stats.ps_changeprots++; 2062 2063 /* update PTEs in software or hardware */ 2064 if ((pmeg = pm->pm_segmap[vseg]) == seginval) { 2065 register int *pte = &pm->pm_pte[vseg][VA_VPG(va)]; 2066 2067 /* update in software */ 2068 if ((*pte & PG_PROT) == newprot) 2069 goto useless; 2070 *pte = (*pte & ~PG_PROT) | newprot; 2071 } else { 2072 /* update in hardware */ 2073 ctx = getcontext(); 2074 if (pm->pm_ctx) { 2075 /* use current context; flush writeback cache */ 2076 setcontext(pm->pm_ctxnum); 2077 tpte = getpte(va); 2078 if ((tpte & PG_PROT) == newprot) 2079 goto useless; 2080 if (vactype == VAC_WRITEBACK && 2081 (newprot & PG_W) == 0 && 2082 (tpte & (PG_W | PG_NC)) == PG_W) 2083 cache_flush_page((int)va); 2084 } else { 2085 setcontext(0); 2086 /* XXX use per-cpu va? */ 2087 setsegmap(0, pmeg); 2088 va = VA_VPG(va) * NBPG; 2089 tpte = getpte(va); 2090 if ((tpte & PG_PROT) == newprot) 2091 goto useless; 2092 } 2093 tpte = (tpte & ~PG_PROT) | newprot; 2094 setpte(va, tpte); 2095 setcontext(ctx); 2096 } 2097 splx(s); 2098 return; 2099 2100 useless: 2101 /* only wiring changed, and we ignore wiring */ 2102 pmap_stats.ps_useless_changeprots++; 2103 splx(s); 2104 } 2105 2106 /* 2107 * Insert (MI) physical page pa at virtual address va in the given pmap. 2108 * NB: the pa parameter includes type bits PMAP_OBIO, PMAP_NC as necessary. 2109 * 2110 * If pa is not in the `managed' range it will not be `bank mapped'. 2111 * This works during bootstrap only because the first 4MB happens to 2112 * map one-to-one. 2113 * 2114 * There may already be something else there, or we might just be 2115 * changing protections and/or wiring on an existing mapping. 2116 * XXX should have different entry points for changing! 2117 */ 2118 void 2119 pmap_enter(pm, va, pa, prot, wired) 2120 register struct pmap *pm; 2121 vm_offset_t va, pa; 2122 vm_prot_t prot; 2123 int wired; 2124 { 2125 register struct pvlist *pv; 2126 register int pteproto, ctx; 2127 2128 if (pm == NULL) 2129 return; 2130 #ifdef DEBUG 2131 if (pmapdebug & PDB_ENTER) 2132 printf("pmap_enter(%x, %x, %x, %x, %x)\n", 2133 pm, va, pa, prot, wired); 2134 #endif 2135 2136 pteproto = PG_V | ((pa & PMAP_TNC) << PG_TNC_SHIFT); 2137 pa &= ~PMAP_TNC; 2138 /* 2139 * Set up prototype for new PTE. Cannot set PG_NC from PV_NC yet 2140 * since the pvlist no-cache bit might change as a result of the 2141 * new mapping. 2142 */ 2143 if (managed(pa)) { 2144 pteproto |= SWTOHW(atop(pa)); 2145 pv = pvhead(pa); 2146 } else { 2147 pteproto |= atop(pa) & PG_PFNUM; 2148 pv = NULL; 2149 } 2150 if (prot & VM_PROT_WRITE) 2151 pteproto |= PG_W; 2152 2153 ctx = getcontext(); 2154 if (pm == kernel_pmap) 2155 pmap_enk(pm, va, prot, wired, pv, pteproto | PG_S); 2156 else 2157 pmap_enu(pm, va, prot, wired, pv, pteproto); 2158 setcontext(ctx); 2159 } 2160 2161 /* enter new (or change existing) kernel mapping */ 2162 pmap_enk(pm, va, prot, wired, pv, pteproto) 2163 register struct pmap *pm; 2164 vm_offset_t va; 2165 vm_prot_t prot; 2166 int wired; 2167 register struct pvlist *pv; 2168 register int pteproto; 2169 { 2170 register int vseg, tpte, pmeg, i, s; 2171 2172 vseg = VA_VSEG(va); 2173 s = splpmap(); /* XXX way too conservative */ 2174 if (pm->pm_segmap[vseg] != seginval && 2175 (tpte = getpte(va)) & PG_V) { 2176 register int addr = tpte & PG_PFNUM; 2177 2178 /* old mapping exists */ 2179 if (addr == (pteproto & PG_PFNUM)) { 2180 /* just changing protection and/or wiring */ 2181 splx(s); 2182 pmap_changeprot(pm, va, prot, wired); 2183 return; 2184 } 2185 2186 /*printf("pmap_enk: changing existing va=>pa entry\n");*/ 2187 /* 2188 * Switcheroo: changing pa for this va. 2189 * If old pa was managed, remove from pvlist. 2190 * If old page was cached, flush cache. 2191 */ 2192 addr = ptoa(HWTOSW(addr)); 2193 if (managed(addr)) 2194 pv_unlink(pvhead(addr), pm, va); 2195 if ( 2196 #ifdef notdef 2197 vactype != VAC_NONE && 2198 #endif 2199 (tpte & PG_NC) == 0) { 2200 setcontext(0); /* ??? */ 2201 cache_flush_page((int)va); 2202 } 2203 } else { 2204 /* adding new entry */ 2205 pm->pm_npte[vseg]++; 2206 } 2207 2208 /* 2209 * If the new mapping is for a managed PA, enter into pvlist. 2210 * Note that the mapping for a malloc page will always be 2211 * unique (hence will never cause a second call to malloc). 2212 */ 2213 if (pv != NULL) 2214 pteproto |= pv_link(pv, pm, va); 2215 2216 pmeg = pm->pm_segmap[vseg]; 2217 if (pmeg == seginval) { 2218 register int tva; 2219 2220 /* 2221 * Allocate an MMU entry now (on locked list), 2222 * and map it into every context. Set all its 2223 * PTEs invalid (we will then overwrite one, but 2224 * this is more efficient than looping twice). 2225 */ 2226 #ifdef DEBUG 2227 if (pm->pm_ctx == NULL || pm->pm_ctxnum != 0) 2228 panic("pmap_enk: kern seg but no kern ctx"); 2229 #endif 2230 pmeg = me_alloc(&me_locked, pm, vseg)->me_pmeg; 2231 pm->pm_segmap[vseg] = pmeg; 2232 i = ncontext - 1; 2233 do { 2234 setcontext(i); 2235 setsegmap(va, pmeg); 2236 } while (--i >= 0); 2237 2238 /* set all PTEs to invalid, then overwrite one PTE below */ 2239 tva = VA_ROUNDDOWNTOSEG(va); 2240 i = NPTESG; 2241 do { 2242 setpte(tva, 0); 2243 tva += NBPG; 2244 } while (--i > 0); 2245 } 2246 2247 /* ptes kept in hardware only */ 2248 setpte(va, pteproto); 2249 splx(s); 2250 } 2251 2252 /* enter new (or change existing) user mapping */ 2253 pmap_enu(pm, va, prot, wired, pv, pteproto) 2254 register struct pmap *pm; 2255 vm_offset_t va; 2256 vm_prot_t prot; 2257 int wired; 2258 register struct pvlist *pv; 2259 register int pteproto; 2260 { 2261 register int vseg, *pte, tpte, pmeg, i, s, doflush; 2262 2263 write_user_windows(); /* XXX conservative */ 2264 vseg = VA_VSEG(va); 2265 s = splpmap(); /* XXX conservative */ 2266 2267 /* 2268 * If there is no space in which the PTEs can be written 2269 * while they are not in the hardware, this must be a new 2270 * virtual segment. Get PTE space and count the segment. 2271 * 2272 * TO SPEED UP CTX ALLOC, PUT SEGMENT BOUNDS STUFF HERE 2273 * AND IN pmap_rmu() 2274 */ 2275 retry: 2276 pte = pm->pm_pte[vseg]; 2277 if (pte == NULL) { 2278 /* definitely a new mapping */ 2279 register int size = NPTESG * sizeof *pte; 2280 2281 pte = (int *)malloc((u_long)size, M_VMPMAP, M_WAITOK); 2282 if (pm->pm_pte[vseg] != NULL) { 2283 printf("pmap_enter: pte filled during sleep\n"); /* can this happen? */ 2284 free((caddr_t)pte, M_VMPMAP); 2285 goto retry; 2286 } 2287 #ifdef DEBUG 2288 if (pm->pm_segmap[vseg] != seginval) 2289 panic("pmap_enter: new ptes, but not seginval"); 2290 #endif 2291 bzero((caddr_t)pte, size); 2292 pm->pm_pte[vseg] = pte; 2293 pm->pm_npte[vseg] = 1; 2294 } else { 2295 /* might be a change: fetch old pte */ 2296 doflush = 0; 2297 if ((pmeg = pm->pm_segmap[vseg]) == seginval) 2298 tpte = pte[VA_VPG(va)]; /* software pte */ 2299 else { 2300 if (pm->pm_ctx) { /* hardware pte */ 2301 setcontext(pm->pm_ctxnum); 2302 tpte = getpte(va); 2303 doflush = 1; 2304 } else { 2305 setcontext(0); 2306 /* XXX use per-cpu pteva? */ 2307 setsegmap(0, pmeg); 2308 tpte = getpte(VA_VPG(va) * NBPG); 2309 } 2310 } 2311 if (tpte & PG_V) { 2312 register int addr = tpte & PG_PFNUM; 2313 2314 /* old mapping exists */ 2315 if (addr == (pteproto & PG_PFNUM)) { 2316 /* just changing prot and/or wiring */ 2317 splx(s); 2318 /* caller should call this directly: */ 2319 pmap_changeprot(pm, va, prot, wired); 2320 return; 2321 } 2322 /* 2323 * Switcheroo: changing pa for this va. 2324 * If old pa was managed, remove from pvlist. 2325 * If old page was cached, flush cache. 2326 */ 2327 /*printf("%s[%d]: pmap_enu: changing existing va(%x)=>pa entry\n", 2328 curproc->p_comm, curproc->p_pid, va);*/ 2329 addr = ptoa(HWTOSW(addr)); 2330 if (managed(addr)) 2331 pv_unlink(pvhead(addr), pm, va); 2332 if ( 2333 #ifdef notdef 2334 vactype != VAC_NONE && 2335 #endif 2336 doflush && (tpte & PG_NC) == 0) 2337 cache_flush_page((int)va); 2338 } else { 2339 /* adding new entry */ 2340 pm->pm_npte[vseg]++; 2341 } 2342 } 2343 2344 if (pv != NULL) 2345 pteproto |= pv_link(pv, pm, va); 2346 2347 /* 2348 * Update hardware or software PTEs (whichever are active). 2349 */ 2350 if ((pmeg = pm->pm_segmap[vseg]) != seginval) { 2351 /* ptes are in hardare */ 2352 if (pm->pm_ctx) 2353 setcontext(pm->pm_ctxnum); 2354 else { 2355 setcontext(0); 2356 /* XXX use per-cpu pteva? */ 2357 setsegmap(0, pmeg); 2358 va = VA_VPG(va) * NBPG; 2359 } 2360 setpte(va, pteproto); 2361 } 2362 /* update software copy */ 2363 pte += VA_VPG(va); 2364 *pte = pteproto; 2365 2366 splx(s); 2367 } 2368 2369 /* 2370 * Change the wiring attribute for a map/virtual-address pair. 2371 */ 2372 /* ARGSUSED */ 2373 void 2374 pmap_change_wiring(pm, va, wired) 2375 struct pmap *pm; 2376 vm_offset_t va; 2377 int wired; 2378 { 2379 2380 pmap_stats.ps_useless_changewire++; 2381 } 2382 2383 /* 2384 * Extract the physical page address associated 2385 * with the given map/virtual_address pair. 2386 * GRR, the vm code knows; we should not have to do this! 2387 */ 2388 vm_offset_t 2389 pmap_extract(pm, va) 2390 register struct pmap *pm; 2391 vm_offset_t va; 2392 { 2393 register int tpte; 2394 register int vseg; 2395 2396 if (pm == NULL) { 2397 printf("pmap_extract: null pmap\n"); 2398 return (0); 2399 } 2400 vseg = VA_VSEG(va); 2401 if (pm->pm_segmap[vseg] != seginval) { 2402 register int ctx = getcontext(); 2403 2404 if (pm->pm_ctx) { 2405 setcontext(pm->pm_ctxnum); 2406 tpte = getpte(va); 2407 } else { 2408 setcontext(0); 2409 tpte = getpte(VA_VPG(va) * NBPG); 2410 } 2411 setcontext(ctx); 2412 } else { 2413 register int *pte = pm->pm_pte[vseg]; 2414 2415 if (pte == NULL) { 2416 printf("pmap_extract: invalid vseg\n"); 2417 return (0); 2418 } 2419 tpte = pte[VA_VPG(va)]; 2420 } 2421 if ((tpte & PG_V) == 0) { 2422 printf("pmap_extract: invalid pte\n"); 2423 return (0); 2424 } 2425 tpte &= PG_PFNUM; 2426 tpte = HWTOSW(tpte); 2427 return ((tpte << PGSHIFT) | (va & PGOFSET)); 2428 } 2429 2430 /* 2431 * Copy the range specified by src_addr/len 2432 * from the source map to the range dst_addr/len 2433 * in the destination map. 2434 * 2435 * This routine is only advisory and need not do anything. 2436 */ 2437 /* ARGSUSED */ 2438 void 2439 pmap_copy(dst_pmap, src_pmap, dst_addr, len, src_addr) 2440 struct pmap *dst_pmap, *src_pmap; 2441 vm_offset_t dst_addr; 2442 vm_size_t len; 2443 vm_offset_t src_addr; 2444 { 2445 } 2446 2447 /* 2448 * Require that all active physical maps contain no 2449 * incorrect entries NOW. [This update includes 2450 * forcing updates of any address map caching.] 2451 */ 2452 void 2453 pmap_update() 2454 { 2455 } 2456 2457 /* 2458 * Garbage collects the physical map system for 2459 * pages which are no longer used. 2460 * Success need not be guaranteed -- that is, there 2461 * may well be pages which are not referenced, but 2462 * others may be collected. 2463 * Called by the pageout daemon when pages are scarce. 2464 */ 2465 /* ARGSUSED */ 2466 void 2467 pmap_collect(pm) 2468 struct pmap *pm; 2469 { 2470 } 2471 2472 /* 2473 * Clear the modify bit for the given physical page. 2474 */ 2475 void 2476 pmap_clear_modify(pa) 2477 register vm_offset_t pa; 2478 { 2479 register struct pvlist *pv; 2480 2481 if (managed(pa)) { 2482 pv = pvhead(pa); 2483 (void) pv_syncflags(pv); 2484 pv->pv_flags &= ~PV_MOD; 2485 } 2486 } 2487 2488 /* 2489 * Tell whether the given physical page has been modified. 2490 */ 2491 int 2492 pmap_is_modified(pa) 2493 register vm_offset_t pa; 2494 { 2495 register struct pvlist *pv; 2496 2497 if (managed(pa)) { 2498 pv = pvhead(pa); 2499 if (pv->pv_flags & PV_MOD || pv_syncflags(pv) & PV_MOD) 2500 return (1); 2501 } 2502 return (0); 2503 } 2504 2505 /* 2506 * Clear the reference bit for the given physical page. 2507 */ 2508 void 2509 pmap_clear_reference(pa) 2510 vm_offset_t pa; 2511 { 2512 register struct pvlist *pv; 2513 2514 if (managed(pa)) { 2515 pv = pvhead(pa); 2516 (void) pv_syncflags(pv); 2517 pv->pv_flags &= ~PV_REF; 2518 } 2519 } 2520 2521 /* 2522 * Tell whether the given physical page has been referenced. 2523 */ 2524 int 2525 pmap_is_referenced(pa) 2526 vm_offset_t pa; 2527 { 2528 register struct pvlist *pv; 2529 2530 if (managed(pa)) { 2531 pv = pvhead(pa); 2532 if (pv->pv_flags & PV_REF || pv_syncflags(pv) & PV_REF) 2533 return (1); 2534 } 2535 return (0); 2536 } 2537 2538 /* 2539 * Make the specified pages (by pmap, offset) pageable (or not) as requested. 2540 * 2541 * A page which is not pageable may not take a fault; therefore, its page 2542 * table entry must remain valid for the duration (or at least, the trap 2543 * handler must not call vm_fault). 2544 * 2545 * This routine is merely advisory; pmap_enter will specify that these pages 2546 * are to be wired down (or not) as appropriate. 2547 */ 2548 /* ARGSUSED */ 2549 void 2550 pmap_pageable(pm, start, end, pageable) 2551 struct pmap *pm; 2552 vm_offset_t start, end; 2553 int pageable; 2554 { 2555 } 2556 2557 /* 2558 * Fill the given MI physical page with zero bytes. 2559 * 2560 * We avoid stomping on the cache. 2561 * XXX might be faster to use destination's context and allow cache to fill? 2562 */ 2563 void 2564 pmap_zero_page(pa) 2565 register vm_offset_t pa; 2566 { 2567 register caddr_t va; 2568 register int pte; 2569 2570 if (managed(pa)) { 2571 /* 2572 * The following might not be necessary since the page 2573 * is being cleared because it is about to be allocated, 2574 * i.e., is in use by no one. 2575 */ 2576 #if 1 2577 #ifdef notdef 2578 if (vactype != VAC_NONE) 2579 #endif 2580 pv_flushcache(pvhead(pa)); 2581 #endif 2582 pte = PG_V | PG_S | PG_W | PG_NC | SWTOHW(atop(pa)); 2583 } else 2584 pte = PG_V | PG_S | PG_W | PG_NC | (atop(pa) & PG_PFNUM); 2585 2586 va = vpage[0]; 2587 setpte(va, pte); 2588 qzero(va, NBPG); 2589 setpte(va, 0); 2590 } 2591 2592 /* 2593 * Copy the given MI physical source page to its destination. 2594 * 2595 * We avoid stomping on the cache as above (with same `XXX' note). 2596 * We must first flush any write-back cache for the source page. 2597 * We go ahead and stomp on the kernel's virtual cache for the 2598 * source page, since the cache can read memory MUCH faster than 2599 * the processor. 2600 */ 2601 void 2602 pmap_copy_page(src, dst) 2603 vm_offset_t src, dst; 2604 { 2605 register caddr_t sva, dva; 2606 register int spte, dpte; 2607 2608 if (managed(src)) { 2609 if (vactype == VAC_WRITEBACK) 2610 pv_flushcache(pvhead(src)); 2611 spte = PG_V | PG_S | SWTOHW(atop(src)); 2612 } else 2613 spte = PG_V | PG_S | (atop(src) & PG_PFNUM); 2614 2615 if (managed(dst)) { 2616 /* similar `might not be necessary' comment applies */ 2617 #if 1 2618 #ifdef notdef 2619 if (vactype != VAC_NONE) 2620 #endif 2621 pv_flushcache(pvhead(dst)); 2622 #endif 2623 dpte = PG_V | PG_S | PG_W | PG_NC | SWTOHW(atop(dst)); 2624 } else 2625 dpte = PG_V | PG_S | PG_W | PG_NC | (atop(dst) & PG_PFNUM); 2626 2627 sva = vpage[0]; 2628 dva = vpage[1]; 2629 setpte(sva, spte); 2630 setpte(dva, dpte); 2631 qcopy(sva, dva, NBPG); /* loads cache, so we must ... */ 2632 cache_flush_page((int)sva); 2633 setpte(sva, 0); 2634 setpte(dva, 0); 2635 } 2636 2637 /* 2638 * Turn a cdevsw d_mmap value into a byte address for pmap_enter. 2639 * XXX this should almost certainly be done differently, and 2640 * elsewhere, or even not at all 2641 */ 2642 vm_offset_t 2643 pmap_phys_address(x) 2644 int x; 2645 { 2646 2647 return (x); 2648 } 2649 2650 /* 2651 * Turn off cache for a given (va, number of pages). 2652 * 2653 * We just assert PG_NC for each PTE; the addresses must reside 2654 * in locked kernel space. A cache flush is also done. 2655 */ 2656 kvm_uncache(va, npages) 2657 register caddr_t va; 2658 register int npages; 2659 { 2660 register int pte; 2661 2662 for (; --npages >= 0; va += NBPG) { 2663 pte = getpte(va); 2664 if ((pte & PG_V) == 0) 2665 panic("kvm_uncache !pg_v"); 2666 pte |= PG_NC; 2667 setpte(va, pte); 2668 cache_flush_page((int)va); 2669 } 2670 } 2671 2672 /* 2673 * For /dev/mem. 2674 */ 2675 int 2676 pmap_enter_hw(pm, va, pa, prot, wired) 2677 register struct pmap *pm; 2678 vm_offset_t va, pa; 2679 vm_prot_t prot; 2680 int wired; 2681 { 2682 register struct memarr *ma; 2683 register int n; 2684 register u_int t; 2685 2686 if (pa >= MAXMEM) /* ??? */ 2687 return (EFAULT); 2688 for (ma = pmemarr, n = npmemarr; --n >= 0; ma++) { 2689 t = (u_int)pa - ma->addr; 2690 if (t < ma->len) 2691 goto ok; 2692 } 2693 return (EFAULT); 2694 ok: 2695 pa = (HWTOSW(atop(pa)) << PGSHIFT) | (pa & PGOFSET); 2696 if (pa >= vm_first_phys + vm_num_phys) /* ??? */ 2697 return (EFAULT); 2698 2699 pmap_enter(pm, va, pa, prot, wired); 2700 return (0); 2701 } 2702