1 /*
2 * Copyright (c) 1992, 1993
3 * The Regents of the University of California. All rights reserved.
4 *
5 * This software was developed by the Computer Systems Engineering group
6 * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
7 * contributed to Berkeley.
8 *
9 * All advertising materials mentioning features or use of this software
10 * must display the following acknowledgement:
11 * This product includes software developed by the University of
12 * California, Lawrence Berkeley Laboratory.
13 *
14 * %sccs.include.redist.c%
15 *
16 * @(#)pmap.c 8.4 (Berkeley) 02/05/94
17 *
18 * from: $Header: pmap.c,v 1.43 93/10/31 05:34:56 torek Exp $
19 */
20
21 /*
22 * SPARC physical map management code.
23 * Does not function on multiprocessors (yet).
24 */
25
26 #include <sys/param.h>
27 #include <sys/systm.h>
28 #include <sys/device.h>
29 #include <sys/proc.h>
30 #include <sys/malloc.h>
31
32 #include <vm/vm.h>
33 #include <vm/vm_kern.h>
34 #include <vm/vm_prot.h>
35 #include <vm/vm_page.h>
36
37 #include <machine/autoconf.h>
38 #include <machine/bsd_openprom.h>
39 #include <machine/cpu.h>
40 #include <machine/ctlreg.h>
41
42 #include <sparc/sparc/asm.h>
43 #include <sparc/sparc/cache.h>
44 #include <sparc/sparc/vaddrs.h>
45
46 #ifdef DEBUG
47 #define PTE_BITS "\20\40V\37W\36S\35NC\33IO\32U\31M"
48 #endif
49
50 extern struct promvec *promvec;
51
52 /*
53 * The SPARCstation offers us the following challenges:
54 *
55 * 1. A virtual address cache. This is, strictly speaking, not
56 * part of the architecture, but the code below assumes one.
57 * This is a write-through cache on the 4c and a write-back cache
58 * on others.
59 *
60 * 2. An MMU that acts like a cache. There is not enough space
61 * in the MMU to map everything all the time. Instead, we need
62 * to load MMU with the `working set' of translations for each
63 * process.
64 *
65 * 3. Segmented virtual and physical spaces. The upper 12 bits of
66 * a virtual address (the virtual segment) index a segment table,
67 * giving a physical segment. The physical segment selects a
68 * `Page Map Entry Group' (PMEG) and the virtual page number---the
69 * next 5 or 6 bits of the virtual address---select the particular
70 * `Page Map Entry' for the page. We call the latter a PTE and
71 * call each Page Map Entry Group a pmeg (for want of a better name).
72 *
73 * Since there are no valid bits in the segment table, the only way
74 * to have an invalid segment is to make one full pmeg of invalid PTEs.
75 * We use the last one (since the ROM does as well).
76 *
77 * 4. Discontiguous physical pages. The Mach VM expects physical pages
78 * to be in one sequential lump.
79 *
80 * 5. The MMU is always on: it is not possible to disable it. This is
81 * mainly a startup hassle.
82 */
83
84 struct pmap_stats {
85 int ps_unlink_pvfirst; /* # of pv_unlinks on head */
86 int ps_unlink_pvsearch; /* # of pv_unlink searches */
87 int ps_changeprots; /* # of calls to changeprot */
88 int ps_useless_changeprots; /* # of changeprots for wiring */
89 int ps_enter_firstpv; /* pv heads entered */
90 int ps_enter_secondpv; /* pv nonheads entered */
91 int ps_useless_changewire; /* useless wiring changes */
92 int ps_npg_prot_all; /* # of active pages protected */
93 int ps_npg_prot_actual; /* # pages actually affected */
94 } pmap_stats;
95
96 #ifdef DEBUG
97 #define PDB_CREATE 0x0001
98 #define PDB_DESTROY 0x0002
99 #define PDB_REMOVE 0x0004
100 #define PDB_CHANGEPROT 0x0008
101 #define PDB_ENTER 0x0010
102
103 #define PDB_MMU_ALLOC 0x0100
104 #define PDB_MMU_STEAL 0x0200
105 #define PDB_CTX_ALLOC 0x0400
106 #define PDB_CTX_STEAL 0x0800
107 int pmapdebug = 0x0;
108 #endif
109
110 #define splpmap() splimp()
111
112 /*
113 * First and last managed physical addresses.
114 */
115 #if 0
116 vm_offset_t vm_first_phys, vm_last_phys;
117 #define managed(pa) ((pa) >= vm_first_phys && (pa) < vm_last_phys)
118 #else
119 vm_offset_t vm_first_phys, vm_num_phys;
120 #define managed(pa) ((unsigned)((pa) - vm_first_phys) < vm_num_phys)
121 #endif
122
123 /*
124 * For each managed physical page, there is a list of all currently
125 * valid virtual mappings of that page. Since there is usually one
126 * (or zero) mapping per page, the table begins with an initial entry,
127 * rather than a pointer; this head entry is empty iff its pv_pmap
128 * field is NULL.
129 *
130 * Note that these are per machine independent page (so there may be
131 * only one for every two hardware pages, e.g.). Since the virtual
132 * address is aligned on a page boundary, the low order bits are free
133 * for storing flags. Only the head of each list has flags.
134 *
135 * THIS SHOULD BE PART OF THE CORE MAP
136 */
137 struct pvlist {
138 struct pvlist *pv_next; /* next pvlist, if any */
139 struct pmap *pv_pmap; /* pmap of this va */
140 int pv_va; /* virtual address */
141 int pv_flags; /* flags (below) */
142 };
143
144 /*
145 * Flags in pv_flags. Note that PV_MOD must be 1 and PV_REF must be 2
146 * since they must line up with the bits in the hardware PTEs (see pte.h).
147 */
148 #define PV_MOD 1 /* page modified */
149 #define PV_REF 2 /* page referenced */
150 #define PV_NC 4 /* page cannot be cached */
151 /*efine PV_ALLF 7 ** all of the above */
152
153 struct pvlist *pv_table; /* array of entries, one per physical page */
154
155 #define pvhead(pa) (&pv_table[atop((pa) - vm_first_phys)])
156
157 /*
158 * Each virtual segment within each pmap is either valid or invalid.
159 * It is valid if pm_npte[VA_VSEG(va)] is not 0. This does not mean
160 * it is in the MMU, however; that is true iff pm_segmap[VA_VSEG(va)]
161 * does not point to the invalid PMEG.
162 *
163 * If a virtual segment is valid and loaded, the correct PTEs appear
164 * in the MMU only. If it is valid and unloaded, the correct PTEs appear
165 * in the pm_pte[VA_VSEG(va)] only. However, some effort is made to keep
166 * the software copies consistent enough with the MMU so that libkvm can
167 * do user address translations. In particular, pv_changepte() and
168 * pmap_enu() maintain consistency, while less critical changes are
169 * not maintained. pm_pte[VA_VSEG(va)] always points to space for those
170 * PTEs, unless this is the kernel pmap, in which case pm_pte[x] is not
171 * used (sigh).
172 *
173 * Each PMEG in the MMU is either free or contains PTEs corresponding to
174 * some pmap and virtual segment. If it contains some PTEs, it also contains
175 * reference and modify bits that belong in the pv_table. If we need
176 * to steal a PMEG from some process (if we need one and none are free)
177 * we must copy the ref and mod bits, and update pm_segmap in the other
178 * pmap to show that its virtual segment is no longer in the MMU.
179 *
180 * There are 128 PMEGs in a small Sun-4, of which only a few dozen are
181 * tied down permanently, leaving `about' 100 to be spread among
182 * running processes. These are managed as an LRU cache. Before
183 * calling the VM paging code for a user page fault, the fault handler
184 * calls mmu_load(pmap, va) to try to get a set of PTEs put into the
185 * MMU. mmu_load will check the validity of the segment and tell whether
186 * it did something.
187 *
188 * Since I hate the name PMEG I call this data structure an `mmu entry'.
189 * Each mmuentry is on exactly one of three `usage' lists: free, LRU,
190 * or locked. The LRU list is for user processes; the locked list is
191 * for kernel entries; both are doubly linked queues headed by `mmuhd's.
192 * The free list is a simple list, headed by a free list pointer.
193 */
194 struct mmuhd {
195 struct mmuentry *mh_next;
196 struct mmuentry *mh_prev;
197 };
198 struct mmuentry {
199 struct mmuentry *me_next; /* queue (MUST BE FIRST) or next free */
200 struct mmuentry *me_prev; /* queue (MUST BE FIRST) */
201 struct pmap *me_pmap; /* pmap, if in use */
202 struct mmuentry *me_pmforw; /* pmap pmeg chain */
203 struct mmuentry **me_pmback; /* pmap pmeg chain */
204 u_short me_vseg; /* virtual segment number in pmap */
205 pmeg_t me_pmeg; /* hardware PMEG number */
206 };
207 struct mmuentry *mmuentry; /* allocated in pmap_bootstrap */
208
209 struct mmuentry *me_freelist; /* free list (not a queue) */
210 struct mmuhd me_lru = { /* LRU (user) entries */
211 (struct mmuentry *)&me_lru, (struct mmuentry *)&me_lru
212 };
213 struct mmuhd me_locked = { /* locked (kernel) entries */
214 (struct mmuentry *)&me_locked, (struct mmuentry *)&me_locked
215 };
216
217 int seginval; /* the invalid segment number */
218
219 /*
220 * A context is simply a small number that dictates which set of 4096
221 * segment map entries the MMU uses. The Sun 4c has eight such sets.
222 * These are alloted in an `almost MRU' fashion.
223 *
224 * Each context is either free or attached to a pmap.
225 *
226 * Since the virtual address cache is tagged by context, when we steal
227 * a context we have to flush (that part of) the cache.
228 */
229 union ctxinfo {
230 union ctxinfo *c_nextfree; /* free list (if free) */
231 struct pmap *c_pmap; /* pmap (if busy) */
232 };
233 union ctxinfo *ctxinfo; /* allocated at in pmap_bootstrap */
234 int ncontext;
235
236 union ctxinfo *ctx_freelist; /* context free list */
237 int ctx_kick; /* allocation rover when none free */
238 int ctx_kickdir; /* ctx_kick roves both directions */
239
240 /* XXX need per-cpu vpage[]s (and vmempage, unless we lock in /dev/mem) */
241 caddr_t vpage[2]; /* two reserved MD virtual pages */
242 caddr_t vmempage; /* one reserved MI vpage for /dev/mem */
243 caddr_t vdumppages; /* 32KB worth of reserved dump pages */
244
245 struct kpmap kernel_pmap_store; /* the kernel's pmap */
246
247 /*
248 * We need to know real physical memory ranges (for /dev/mem).
249 */
250 #define MA_SIZE 32 /* size of memory descriptor arrays */
251 struct memarr pmemarr[MA_SIZE];/* physical memory regions */
252 int npmemarr; /* number of entries in pmemarr */
253
254 /*
255 * The following four global variables are set in pmap_bootstrap
256 * for the vm code to find. This is Wrong.
257 */
258 vm_offset_t avail_start; /* first free physical page number */
259 vm_offset_t avail_end; /* last free physical page number */
260 vm_offset_t virtual_avail; /* first free virtual page number */
261 vm_offset_t virtual_end; /* last free virtual page number */
262
263 /*
264 * pseudo-functions for mnemonic value
265 #ifdef notyet
266 * NB: setsegmap should be stba for 4c, but stha works and makes the
267 * code right for the Sun-4 as well.
268 #endif
269 */
270 #define getcontext() lduba(AC_CONTEXT, ASI_CONTROL)
271 #define setcontext(c) stba(AC_CONTEXT, ASI_CONTROL, c)
272 #ifdef notyet
273 #define getsegmap(va) lduha(va, ASI_SEGMAP)
274 #define setsegmap(va, pmeg) stha(va, ASI_SEGMAP, pmeg)
275 #else
276 #define getsegmap(va) lduba(va, ASI_SEGMAP)
277 #define setsegmap(va, pmeg) stba(va, ASI_SEGMAP, pmeg)
278 #endif
279
280 #define getpte(va) lda(va, ASI_PTE)
281 #define setpte(va, pte) sta(va, ASI_PTE, pte)
282
283 /*----------------------------------------------------------------*/
284
285 #ifdef sun4c
286 /*
287 * Translations from dense (contiguous) pseudo physical addresses
288 * (fed to the VM code, to keep it happy) to sparse (real, hardware)
289 * physical addresses. We call the former `software' page frame
290 * numbers and the latter `hardware' page frame numbers. The
291 * translation is done on a `per bank' basis.
292 *
293 * The HWTOSW and SWTOHW macros handle the actual translation.
294 * They are defined as no-ops on Sun-4s.
295 *
296 * SHOULD DO atop AND ptoa DIRECTLY IN THESE MACROS SINCE ALL CALLERS
297 * ALWAYS NEED THAT ANYWAY ... CAN JUST PRECOOK THE TABLES (TODO)
298 *
299 * Since we cannot use the memory allocated to the ROM monitor, and
300 * this happens to be just under 64K, I have chosen a bank size of
301 * 64K. This is necessary since all banks must be completely full.
302 * I have also chosen a physical memory limit of 128 MB. The 4c is
303 * architecturally limited to 256 MB, but 128 MB is more than will
304 * fit on present hardware.
305 *
306 * XXX FIX THIS: just make all of each bank available and then
307 * take out the pages reserved to the monitor!!
308 */
309 #define MAXMEM (128 * 1024 * 1024) /* no more than 128 MB phys mem */
310 #define NPGBANK 16 /* 2^4 pages per bank (64K / bank) */
311 #define BSHIFT 4 /* log2(NPGBANK) */
312 #define BOFFSET (NPGBANK - 1)
313 #define BTSIZE (MAXMEM / NBPG / NPGBANK)
314
315 int pmap_dtos[BTSIZE]; /* dense to sparse */
316 int pmap_stod[BTSIZE]; /* sparse to dense */
317
318 #define HWTOSW(pg) (pmap_stod[(pg) >> BSHIFT] | ((pg) & BOFFSET))
319 #define SWTOHW(pg) (pmap_dtos[(pg) >> BSHIFT] | ((pg) & BOFFSET))
320
321 /*
322 * Sort a memory array by address.
323 */
324 static void
sortm(mp,n)325 sortm(mp, n)
326 register struct memarr *mp;
327 register int n;
328 {
329 register struct memarr *mpj;
330 register int i, j;
331 register u_int addr, len;
332
333 /* Insertion sort. This is O(n^2), but so what? */
334 for (i = 1; i < n; i++) {
335 /* save i'th entry */
336 addr = mp[i].addr;
337 len = mp[i].len;
338 /* find j such that i'th entry goes before j'th */
339 for (j = 0, mpj = mp; j < i; j++, mpj++)
340 if (addr < mpj->addr)
341 break;
342 /* slide up any additional entries */
343 ovbcopy(mpj, mpj + 1, (i - j) * sizeof(*mp));
344 mpj->addr = addr;
345 mpj->len = len;
346 }
347 }
348
349 #ifdef DEBUG
350 struct memarr pmap_ama[MA_SIZE];
351 int pmap_nama;
352 #define ama pmap_ama
353 #endif
354
355 /*
356 * init_translations sets up pmap_dtos[] and pmap_stod[], and
357 * returns the number of usable physical pages.
358 */
359 int
init_translations()360 init_translations()
361 {
362 register struct memarr *mp;
363 register int n, nmem;
364 register u_int vbank = 0, pbank, v, a;
365 register u_int pages = 0, lost = 0;
366 #ifndef DEBUG
367 struct memarr ama[MA_SIZE]; /* available memory array */
368 #endif
369
370 nmem = makememarr(ama, MA_SIZE, MEMARR_AVAILPHYS);
371
372 /*
373 * Open Boot supposedly guarantees at least 3 MB free mem at 0;
374 * this is where the kernel has been loaded (we certainly hope the
375 * kernel is <= 3 MB). We need the memory array to be sorted, and
376 * to start at 0, so that `software page 0' and `hardware page 0'
377 * are the same (otherwise the VM reserves the wrong pages for the
378 * kernel).
379 */
380 sortm(ama, nmem);
381 if (ama[0].addr != 0) {
382 /* cannot panic here; there's no real kernel yet. */
383 printf("init_translations: no kernel memory?!\n");
384 callrom();
385 }
386 #ifdef DEBUG
387 pmap_nama = nmem;
388 #endif
389 for (mp = ama; --nmem >= 0; mp++) {
390 a = mp->addr >> PGSHIFT;
391 v = mp->len >> PGSHIFT;
392 if ((n = a & BOFFSET) != 0) {
393 /* round up to next bank */
394 n = NPGBANK - n;
395 if (v < n) { /* not a whole bank: skip it */
396 lost += v;
397 continue;
398 }
399 lost += n; /* lose n pages from front */
400 a += n;
401 v -= n;
402 }
403 n = v >> BSHIFT; /* calculate number of banks */
404 pbank = a >> BSHIFT; /* and the bank itself */
405 if (pbank + n >= BTSIZE)
406 n = BTSIZE - pbank;
407 pages += n; /* off by a factor of 2^BSHIFT */
408 lost += v - (n << BSHIFT);
409 while (--n >= 0) {
410 pmap_dtos[vbank] = pbank << BSHIFT;
411 pmap_stod[pbank] = vbank << BSHIFT;
412 pbank++;
413 vbank++;
414 }
415 }
416 /* adjust page count */
417 pages <<= BSHIFT;
418 #ifdef DEBUG
419 printf("note: lost %d pages in translation\n", lost);
420 #endif
421 return (pages);
422 }
423
424 #else /* sun4c */
425
426 /*
427 * Pages are physically contiguous, and hardware PFN == software PFN.
428 *
429 * XXX assumes PAGE_SIZE == NBPG (???)
430 */
431 #define HWTOSW(pg) (pg)
432 #define SWTOHW(pg) (pg)
433
434 #endif /* sun4c */
435
436 /* update pv_flags given a valid pte */
437 #define MR(pte) (((pte) >> PG_M_SHIFT) & (PV_MOD | PV_REF))
438
439 /*----------------------------------------------------------------*/
440
441 /*
442 * Agree with the monitor ROM as to how many MMU entries are
443 * to be reserved, and map all of its segments into all contexts.
444 *
445 * Unfortunately, while the Version 0 PROM had a nice linked list of
446 * taken virtual memory, the Version 2 PROM provides instead a convoluted
447 * description of *free* virtual memory. Rather than invert this, we
448 * resort to two magic constants from the PROM vector description file.
449 */
450 int
mmu_reservemon(nmmu)451 mmu_reservemon(nmmu)
452 register int nmmu;
453 {
454 register u_int va, eva;
455 register int mmuseg, i;
456
457 va = OPENPROM_STARTVADDR;
458 eva = OPENPROM_ENDVADDR;
459 while (va < eva) {
460 mmuseg = getsegmap(va);
461 if (mmuseg < nmmu)
462 nmmu = mmuseg;
463 for (i = ncontext; --i > 0;)
464 (*promvec->pv_setctxt)(i, (caddr_t)va, mmuseg);
465 if (mmuseg == seginval) {
466 va += NBPSG;
467 continue;
468 }
469 /* PROM maps its memory user-accessible: fix it. */
470 for (i = NPTESG; --i >= 0; va += NBPG)
471 setpte(va, getpte(va) | PG_S);
472 }
473 return (nmmu);
474 }
475
476 /*
477 * TODO: agree with the ROM on physical pages by taking them away
478 * from the page list, rather than having a dinky BTSIZE above.
479 */
480
481 /*----------------------------------------------------------------*/
482
483 /*
484 * MMU management.
485 */
486
487 /*
488 * Change contexts. We need the old context number as well as the new
489 * one. If the context is changing, we must write all user windows
490 * first, lest an interrupt cause them to be written to the (other)
491 * user whose context we set here.
492 */
493 #define CHANGE_CONTEXTS(old, new) \
494 if ((old) != (new)) { \
495 write_user_windows(); \
496 setcontext(new); \
497 }
498
499 /*
500 * Allocate an MMU entry (i.e., a PMEG).
501 * If necessary, steal one from someone else.
502 * Put it on the tail of the given queue
503 * (which is either the LRU list or the locked list).
504 * The locked list is not actually ordered, but this is easiest.
505 * Also put it on the given (new) pmap's chain,
506 * enter its pmeg number into that pmap's segmap,
507 * and store the pmeg's new virtual segment number (me->me_vseg).
508 *
509 * This routine is large and complicated, but it must be fast
510 * since it implements the dynamic allocation of MMU entries.
511 */
512 struct mmuentry *
me_alloc(mh,newpm,newvseg)513 me_alloc(mh, newpm, newvseg)
514 register struct mmuhd *mh;
515 register struct pmap *newpm;
516 register int newvseg;
517 {
518 register struct mmuentry *me;
519 register struct pmap *pm;
520 register int i, va, pa, *pte, tpte;
521 int ctx;
522
523 /* try free list first */
524 if ((me = me_freelist) != NULL) {
525 me_freelist = me->me_next;
526 #ifdef DEBUG
527 if (me->me_pmap != NULL)
528 panic("me_alloc: freelist entry has pmap");
529 if (pmapdebug & PDB_MMU_ALLOC)
530 printf("me_alloc: got pmeg %x\n", me->me_pmeg);
531 #endif
532 insque(me, mh->mh_prev); /* onto end of queue */
533
534 /* onto on pmap chain; pmap is already locked, if needed */
535 me->me_pmforw = NULL;
536 me->me_pmback = newpm->pm_mmuback;
537 *newpm->pm_mmuback = me;
538 newpm->pm_mmuback = &me->me_pmforw;
539
540 /* into pmap segment table, with backpointers */
541 newpm->pm_segmap[newvseg] = me->me_pmeg;
542 me->me_pmap = newpm;
543 me->me_vseg = newvseg;
544
545 return (me);
546 }
547
548 /* no luck, take head of LRU list */
549 if ((me = me_lru.mh_next) == (struct mmuentry *)&me_lru)
550 panic("me_alloc: all pmegs gone");
551 pm = me->me_pmap;
552 #ifdef DEBUG
553 if (pm == NULL)
554 panic("me_alloc: LRU entry has no pmap");
555 if (pm == kernel_pmap)
556 panic("me_alloc: stealing from kernel");
557 pte = pm->pm_pte[me->me_vseg];
558 if (pte == NULL)
559 panic("me_alloc: LRU entry's pmap has no ptes");
560 if (pmapdebug & (PDB_MMU_ALLOC | PDB_MMU_STEAL))
561 printf("me_alloc: stealing pmeg %x from pmap %x\n",
562 me->me_pmeg, pm);
563 #endif
564 /*
565 * Remove from LRU list, and insert at end of new list
566 * (probably the LRU list again, but so what?).
567 */
568 remque(me);
569 insque(me, mh->mh_prev);
570
571 /*
572 * The PMEG must be mapped into some context so that we can
573 * read its PTEs. Use its current context if it has one;
574 * if not, and since context 0 is reserved for the kernel,
575 * the simplest method is to switch to 0 and map the PMEG
576 * to virtual address 0---which, being a user space address,
577 * is by definition not in use.
578 *
579 * XXX for ncpus>1 must use per-cpu VA?
580 * XXX do not have to flush cache immediately
581 */
582 ctx = getcontext();
583 if (pm->pm_ctx) {
584 CHANGE_CONTEXTS(ctx, pm->pm_ctxnum);
585 #ifdef notdef
586 if (vactype != VAC_NONE)
587 #endif
588 cache_flush_segment(me->me_vseg);
589 va = VSTOVA(me->me_vseg);
590 } else {
591 CHANGE_CONTEXTS(ctx, 0);
592 setsegmap(0, me->me_pmeg);
593 /*
594 * No cache flush needed: it happened earlier when
595 * the old context was taken.
596 */
597 va = 0;
598 }
599
600 /*
601 * Record reference and modify bits for each page,
602 * and copy PTEs into kernel memory so that they can
603 * be reloaded later.
604 */
605 i = NPTESG;
606 do {
607 tpte = getpte(va);
608 if (tpte & PG_V) {
609 pa = ptoa(HWTOSW(tpte & PG_PFNUM));
610 if (managed(pa))
611 pvhead(pa)->pv_flags |= MR(tpte);
612 }
613 *pte++ = tpte & ~(PG_U|PG_M);
614 va += NBPG;
615 } while (--i > 0);
616
617 /* update segment tables */
618 simple_lock(&pm->pm_lock); /* what if other cpu takes mmuentry ?? */
619 if (pm->pm_ctx)
620 setsegmap(VSTOVA(me->me_vseg), seginval);
621 pm->pm_segmap[me->me_vseg] = seginval;
622
623 /* off old pmap chain */
624 if ((*me->me_pmback = me->me_pmforw) != NULL) {
625 me->me_pmforw->me_pmback = me->me_pmback;
626 me->me_pmforw = NULL;
627 } else
628 pm->pm_mmuback = me->me_pmback;
629 simple_unlock(&pm->pm_lock);
630 setcontext(ctx); /* done with old context */
631
632 /* onto new pmap chain; new pmap is already locked, if needed */
633 /* me->me_pmforw = NULL; */ /* done earlier */
634 me->me_pmback = newpm->pm_mmuback;
635 *newpm->pm_mmuback = me;
636 newpm->pm_mmuback = &me->me_pmforw;
637
638 /* into new segment table, with backpointers */
639 newpm->pm_segmap[newvseg] = me->me_pmeg;
640 me->me_pmap = newpm;
641 me->me_vseg = newvseg;
642
643 return (me);
644 }
645
646 /*
647 * Free an MMU entry.
648 *
649 * Assumes the corresponding pmap is already locked.
650 * Does NOT flush cache, but does record ref and mod bits.
651 * The rest of each PTE is discarded.
652 * CALLER MUST SET CONTEXT to pm->pm_ctxnum (if pmap has
653 * a context) or to 0 (if not). Caller must also update
654 * pm->pm_segmap and (possibly) the hardware.
655 */
656 void
me_free(pm,pmeg)657 me_free(pm, pmeg)
658 register struct pmap *pm;
659 register u_int pmeg;
660 {
661 register struct mmuentry *me = &mmuentry[pmeg];
662 register int i, va, pa, tpte;
663
664 #ifdef DEBUG
665 if (pmapdebug & PDB_MMU_ALLOC)
666 printf("me_free: freeing pmeg %x from pmap %x\n",
667 me->me_pmeg, pm);
668 if (me->me_pmeg != pmeg)
669 panic("me_free: wrong mmuentry");
670 if (pm != me->me_pmap)
671 panic("me_free: pm != me_pmap");
672 #endif
673
674 /* just like me_alloc, but no cache flush, and context already set */
675 if (pm->pm_ctx)
676 va = VSTOVA(me->me_vseg);
677 else {
678 setsegmap(0, me->me_pmeg);
679 va = 0;
680 }
681 i = NPTESG;
682 do {
683 tpte = getpte(va);
684 if (tpte & PG_V) {
685 pa = ptoa(HWTOSW(tpte & PG_PFNUM));
686 if (managed(pa))
687 pvhead(pa)->pv_flags |= MR(tpte);
688 }
689 va += NBPG;
690 } while (--i > 0);
691
692 /* take mmu entry off pmap chain */
693 *me->me_pmback = me->me_pmforw;
694 if ((*me->me_pmback = me->me_pmforw) != NULL)
695 me->me_pmforw->me_pmback = me->me_pmback;
696 else
697 pm->pm_mmuback = me->me_pmback;
698 /* ... and remove from segment map */
699 pm->pm_segmap[me->me_vseg] = seginval;
700
701 /* off LRU or lock chain */
702 remque(me);
703
704 /* no associated pmap; on free list */
705 me->me_pmap = NULL;
706 me->me_next = me_freelist;
707 me_freelist = me;
708 }
709
710 /*
711 * `Page in' (load or inspect) an MMU entry; called on page faults.
712 * Returns 1 if we reloaded the segment, -1 if the segment was
713 * already loaded and the page was marked valid (in which case the
714 * fault must be a bus error or something), or 0 (segment loaded but
715 * PTE not valid, or segment not loaded at all).
716 */
717 int
mmu_pagein(pm,va,bits)718 mmu_pagein(pm, va, bits)
719 register struct pmap *pm;
720 register int va, bits;
721 {
722 register int *pte;
723 register struct mmuentry *me;
724 register int vseg = VA_VSEG(va), pmeg, i, s;
725
726 /* return 0 if we have no PTEs to load */
727 if ((pte = pm->pm_pte[vseg]) == NULL)
728 return (0);
729 /* return -1 if the fault is `hard', 0 if not */
730 if (pm->pm_segmap[vseg] != seginval)
731 return (bits && (getpte(va) & bits) == bits ? -1 : 0);
732
733 /* reload segment: write PTEs into a new LRU entry */
734 va = VA_ROUNDDOWNTOSEG(va);
735 s = splpmap(); /* paranoid */
736 pmeg = me_alloc(&me_lru, pm, vseg)->me_pmeg;
737 setsegmap(va, pmeg);
738 i = NPTESG;
739 do {
740 setpte(va, *pte++);
741 va += NBPG;
742 } while (--i > 0);
743 splx(s);
744 return (1);
745 }
746
747 /*
748 * Allocate a context. If necessary, steal one from someone else.
749 * Changes hardware context number and loads segment map.
750 *
751 * This routine is only ever called from locore.s just after it has
752 * saved away the previous process, so there are no active user windows.
753 */
754 void
ctx_alloc(pm)755 ctx_alloc(pm)
756 register struct pmap *pm;
757 {
758 register union ctxinfo *c;
759 register int cnum, i, va;
760 register pmeg_t *segp;
761
762 #ifdef DEBUG
763 if (pm->pm_ctx)
764 panic("ctx_alloc pm_ctx");
765 if (pmapdebug & PDB_CTX_ALLOC)
766 printf("ctx_alloc(%x)\n", pm);
767 #endif
768 if ((c = ctx_freelist) != NULL) {
769 ctx_freelist = c->c_nextfree;
770 cnum = c - ctxinfo;
771 setcontext(cnum);
772 } else {
773 if ((ctx_kick += ctx_kickdir) >= ncontext) {
774 ctx_kick = ncontext - 1;
775 ctx_kickdir = -1;
776 } else if (ctx_kick < 1) {
777 ctx_kick = 1;
778 ctx_kickdir = 1;
779 }
780 c = &ctxinfo[cnum = ctx_kick];
781 #ifdef DEBUG
782 if (c->c_pmap == NULL)
783 panic("ctx_alloc cu_pmap");
784 if (pmapdebug & (PDB_CTX_ALLOC | PDB_CTX_STEAL))
785 printf("ctx_alloc: steal context %x from %x\n",
786 cnum, c->c_pmap);
787 #endif
788 c->c_pmap->pm_ctx = NULL;
789 setcontext(cnum);
790 #ifdef notdef
791 if (vactype != VAC_NONE)
792 #endif
793 cache_flush_context();
794 }
795 c->c_pmap = pm;
796 pm->pm_ctx = c;
797 pm->pm_ctxnum = cnum;
798
799 /*
800 * XXX loop below makes 3584 iterations ... could reduce
801 * by remembering valid ranges per context: two ranges
802 * should suffice (for text/data/bss and for stack).
803 */
804 segp = pm->pm_rsegmap;
805 for (va = 0, i = NUSEG; --i >= 0; va += NBPSG)
806 setsegmap(va, *segp++);
807 }
808
809 /*
810 * Give away a context. Flushes cache and sets current context to 0.
811 */
812 void
ctx_free(pm)813 ctx_free(pm)
814 struct pmap *pm;
815 {
816 register union ctxinfo *c;
817 register int newc, oldc;
818
819 if ((c = pm->pm_ctx) == NULL)
820 panic("ctx_free");
821 pm->pm_ctx = NULL;
822 oldc = getcontext();
823 if (vactype != VAC_NONE) {
824 newc = pm->pm_ctxnum;
825 CHANGE_CONTEXTS(oldc, newc);
826 cache_flush_context();
827 setcontext(0);
828 } else {
829 CHANGE_CONTEXTS(oldc, 0);
830 }
831 c->c_nextfree = ctx_freelist;
832 ctx_freelist = c;
833 }
834
835
836 /*----------------------------------------------------------------*/
837
838 /*
839 * pvlist functions.
840 */
841
842 /*
843 * Walk the given pv list, and for each PTE, set or clear some bits
844 * (e.g., PG_W or PG_NC).
845 *
846 * As a special case, this never clears PG_W on `pager' pages.
847 * These, being kernel addresses, are always in hardware and have
848 * a context.
849 *
850 * This routine flushes the cache for any page whose PTE changes,
851 * as long as the process has a context; this is overly conservative.
852 * It also copies ref and mod bits to the pvlist, on the theory that
853 * this might save work later. (XXX should test this theory)
854 */
855 void
pv_changepte(pv0,bis,bic)856 pv_changepte(pv0, bis, bic)
857 register struct pvlist *pv0;
858 register int bis, bic;
859 {
860 register int *pte;
861 register struct pvlist *pv;
862 register struct pmap *pm;
863 register int va, vseg, pmeg, i, flags;
864 int ctx, s;
865
866 write_user_windows(); /* paranoid? */
867
868 s = splpmap(); /* paranoid? */
869 if (pv0->pv_pmap == NULL) {
870 splx(s);
871 return;
872 }
873 ctx = getcontext();
874 flags = pv0->pv_flags;
875 for (pv = pv0; pv != NULL; pv = pv->pv_next) {
876 pm = pv->pv_pmap;
877 if(pm==NULL)panic("pv_changepte 1");
878 va = pv->pv_va;
879 vseg = VA_VSEG(va);
880 pte = pm->pm_pte[vseg];
881 if ((pmeg = pm->pm_segmap[vseg]) != seginval) {
882 register int tpte;
883
884 /* in hardware: fix hardware copy */
885 if (pm->pm_ctx) {
886 extern vm_offset_t pager_sva, pager_eva;
887
888 /*
889 * Bizarreness: we never clear PG_W on
890 * pager pages, nor PG_NC on DVMA pages.
891 */
892 if (bic == PG_W &&
893 va >= pager_sva && va < pager_eva)
894 continue;
895 if (bic == PG_NC &&
896 va >= DVMA_BASE && va < DVMA_END)
897 continue;
898 setcontext(pm->pm_ctxnum);
899 /* XXX should flush only when necessary */
900 #ifdef notdef
901 if (vactype != VAC_NONE)
902 #endif
903 cache_flush_page(va);
904 } else {
905 /* XXX per-cpu va? */
906 setcontext(0);
907 setsegmap(0, pmeg);
908 va = VA_VPG(va) * NBPG;
909 }
910 tpte = getpte(va);
911 if (tpte & PG_V)
912 flags |= (tpte >> PG_M_SHIFT) &
913 (PV_MOD|PV_REF);
914 tpte = (tpte | bis) & ~bic;
915 setpte(va, tpte);
916 if (pte != NULL) /* update software copy */
917 pte[VA_VPG(va)] = tpte;
918 } else {
919 /* not in hardware: just fix software copy */
920 if (pte == NULL)
921 panic("pv_changepte 2");
922 pte += VA_VPG(va);
923 *pte = (*pte | bis) & ~bic;
924 }
925 }
926 pv0->pv_flags = flags;
927 setcontext(ctx);
928 splx(s);
929 }
930
931 /*
932 * Sync ref and mod bits in pvlist (turns off same in hardware PTEs).
933 * Returns the new flags.
934 *
935 * This is just like pv_changepte, but we never add or remove bits,
936 * hence never need to adjust software copies.
937 */
938 int
pv_syncflags(pv0)939 pv_syncflags(pv0)
940 register struct pvlist *pv0;
941 {
942 register struct pvlist *pv;
943 register struct pmap *pm;
944 register int tpte, va, vseg, pmeg, i, flags;
945 int ctx, s;
946
947 write_user_windows(); /* paranoid? */
948
949 s = splpmap(); /* paranoid? */
950 if (pv0->pv_pmap == NULL) { /* paranoid */
951 splx(s);
952 return (0);
953 }
954 ctx = getcontext();
955 flags = pv0->pv_flags;
956 for (pv = pv0; pv != NULL; pv = pv->pv_next) {
957 pm = pv->pv_pmap;
958 va = pv->pv_va;
959 vseg = VA_VSEG(va);
960 if ((pmeg = pm->pm_segmap[vseg]) == seginval)
961 continue;
962 if (pm->pm_ctx) {
963 setcontext(pm->pm_ctxnum);
964 /* XXX should flush only when necessary */
965 #ifdef notdef
966 if (vactype != VAC_NONE)
967 #endif
968 cache_flush_page(va);
969 } else {
970 /* XXX per-cpu va? */
971 setcontext(0);
972 setsegmap(0, pmeg);
973 va = VA_VPG(va) * NBPG;
974 }
975 tpte = getpte(va);
976 if (tpte & (PG_M|PG_U) && tpte & PG_V) {
977 flags |= (tpte >> PG_M_SHIFT) &
978 (PV_MOD|PV_REF);
979 tpte &= ~(PG_M|PG_U);
980 setpte(va, tpte);
981 }
982 }
983 pv0->pv_flags = flags;
984 setcontext(ctx);
985 splx(s);
986 return (flags);
987 }
988
989 /*
990 * pv_unlink is a helper function for pmap_remove.
991 * It takes a pointer to the pv_table head for some physical address
992 * and removes the appropriate (pmap, va) entry.
993 *
994 * Once the entry is removed, if the pv_table head has the cache
995 * inhibit bit set, see if we can turn that off; if so, walk the
996 * pvlist and turn off PG_NC in each PTE. (The pvlist is by
997 * definition nonempty, since it must have at least two elements
998 * in it to have PV_NC set, and we only remove one here.)
999 */
1000 static void
pv_unlink(pv,pm,va)1001 pv_unlink(pv, pm, va)
1002 register struct pvlist *pv;
1003 register struct pmap *pm;
1004 register vm_offset_t va;
1005 {
1006 register struct pvlist *npv;
1007
1008 /*
1009 * First entry is special (sigh).
1010 */
1011 npv = pv->pv_next;
1012 if (pv->pv_pmap == pm && pv->pv_va == va) {
1013 pmap_stats.ps_unlink_pvfirst++;
1014 if (npv != NULL) {
1015 pv->pv_next = npv->pv_next;
1016 pv->pv_pmap = npv->pv_pmap;
1017 pv->pv_va = npv->pv_va;
1018 free((caddr_t)npv, M_VMPVENT);
1019 } else
1020 pv->pv_pmap = NULL;
1021 } else {
1022 register struct pvlist *prev;
1023
1024 for (prev = pv;; prev = npv, npv = npv->pv_next) {
1025 pmap_stats.ps_unlink_pvsearch++;
1026 if (npv == NULL)
1027 panic("pv_unlink");
1028 if (npv->pv_pmap == pm && npv->pv_va == va)
1029 break;
1030 }
1031 prev->pv_next = npv->pv_next;
1032 free((caddr_t)npv, M_VMPVENT);
1033 }
1034 if (pv->pv_flags & PV_NC) {
1035 /*
1036 * Not cached: check to see if we can fix that now.
1037 */
1038 va = pv->pv_va;
1039 for (npv = pv->pv_next; npv != NULL; npv = npv->pv_next)
1040 if (BADALIAS(va, npv->pv_va))
1041 return;
1042 pv->pv_flags &= ~PV_NC;
1043 pv_changepte(pv, 0, PG_NC);
1044 }
1045 }
1046
1047 /*
1048 * pv_link is the inverse of pv_unlink, and is used in pmap_enter.
1049 * It returns PG_NC if the (new) pvlist says that the address cannot
1050 * be cached.
1051 */
1052 static int
pv_link(pv,pm,va)1053 pv_link(pv, pm, va)
1054 register struct pvlist *pv;
1055 register struct pmap *pm;
1056 register vm_offset_t va;
1057 {
1058 register struct pvlist *npv;
1059 register int ret;
1060
1061 if (pv->pv_pmap == NULL) {
1062 /* no pvlist entries yet */
1063 pmap_stats.ps_enter_firstpv++;
1064 pv->pv_next = NULL;
1065 pv->pv_pmap = pm;
1066 pv->pv_va = va;
1067 return (0);
1068 }
1069 /*
1070 * Before entering the new mapping, see if
1071 * it will cause old mappings to become aliased
1072 * and thus need to be `discached'.
1073 */
1074 ret = 0;
1075 pmap_stats.ps_enter_secondpv++;
1076 if (pv->pv_flags & PV_NC) {
1077 /* already uncached, just stay that way */
1078 ret = PG_NC;
1079 } else {
1080 /* MAY NEED TO DISCACHE ANYWAY IF va IS IN DVMA SPACE? */
1081 for (npv = pv; npv != NULL; npv = npv->pv_next) {
1082 if (BADALIAS(va, npv->pv_va)) {
1083 pv->pv_flags |= PV_NC;
1084 pv_changepte(pv, ret = PG_NC, 0);
1085 break;
1086 }
1087 }
1088 }
1089 npv = (struct pvlist *)malloc(sizeof *npv, M_VMPVENT, M_WAITOK);
1090 npv->pv_next = pv->pv_next;
1091 npv->pv_pmap = pm;
1092 npv->pv_va = va;
1093 pv->pv_next = npv;
1094 return (ret);
1095 }
1096
1097 /*
1098 * Walk the given list and flush the cache for each (MI) page that is
1099 * potentially in the cache.
1100 */
pv_flushcache(pv)1101 pv_flushcache(pv)
1102 register struct pvlist *pv;
1103 {
1104 register struct pmap *pm;
1105 register int i, s, ctx;
1106
1107 write_user_windows(); /* paranoia? */
1108
1109 s = splpmap(); /* XXX extreme paranoia */
1110 if ((pm = pv->pv_pmap) != NULL) {
1111 ctx = getcontext();
1112 for (;;) {
1113 if (pm->pm_ctx) {
1114 setcontext(pm->pm_ctxnum);
1115 cache_flush_page(pv->pv_va);
1116 }
1117 pv = pv->pv_next;
1118 if (pv == NULL)
1119 break;
1120 pm = pv->pv_pmap;
1121 }
1122 setcontext(ctx);
1123 }
1124 splx(s);
1125 }
1126
1127 /*----------------------------------------------------------------*/
1128
1129 /*
1130 * At last, pmap code.
1131 */
1132
1133 /*
1134 * Bootstrap the system enough to run with VM enabled.
1135 *
1136 * nmmu is the number of mmu entries (``PMEGs'');
1137 * nctx is the number of contexts.
1138 */
1139 void
pmap_bootstrap(nmmu,nctx)1140 pmap_bootstrap(nmmu, nctx)
1141 int nmmu, nctx;
1142 {
1143 register union ctxinfo *ci;
1144 register struct mmuentry *me;
1145 register int i, j, n, z, vs;
1146 register caddr_t p;
1147 register void (*rom_setmap)(int ctx, caddr_t va, int pmeg);
1148 int lastpage;
1149 extern char end[];
1150 extern caddr_t reserve_dumppages(caddr_t);
1151
1152 ncontext = nctx;
1153
1154 /*
1155 * Last segment is the `invalid' one (one PMEG of pte's with !pg_v).
1156 * It will never be used for anything else.
1157 */
1158 seginval = --nmmu;
1159
1160 /*
1161 * Preserve the monitor ROM's reserved VM region, so that
1162 * we can use L1-A or the monitor's debugger. As a side
1163 * effect we map the ROM's reserved VM into all contexts
1164 * (otherwise L1-A crashes the machine!).
1165 */
1166 nmmu = mmu_reservemon(nmmu);
1167
1168 /*
1169 * Allocate and clear mmu entry and context structures.
1170 */
1171 p = end;
1172 mmuentry = me = (struct mmuentry *)p;
1173 p += nmmu * sizeof *me;
1174 ctxinfo = ci = (union ctxinfo *)p;
1175 p += nctx * sizeof *ci;
1176 bzero(end, p - end);
1177
1178 /*
1179 * Set up the `constants' for the call to vm_init()
1180 * in main(). All pages beginning at p (rounded up to
1181 * the next whole page) and continuing through the number
1182 * of available pages are free, but they start at a higher
1183 * virtual address. This gives us two mappable MD pages
1184 * for pmap_zero_page and pmap_copy_page, and one MI page
1185 * for /dev/mem, all with no associated physical memory.
1186 */
1187 p = (caddr_t)(((u_int)p + NBPG - 1) & ~PGOFSET);
1188 avail_start = (int)p - KERNBASE;
1189 avail_end = init_translations() << PGSHIFT;
1190 i = (int)p;
1191 vpage[0] = p, p += NBPG;
1192 vpage[1] = p, p += NBPG;
1193 vmempage = p, p += NBPG;
1194 p = reserve_dumppages(p);
1195 virtual_avail = (vm_offset_t)p;
1196 virtual_end = VM_MAX_KERNEL_ADDRESS;
1197
1198 p = (caddr_t)i; /* retract to first free phys */
1199
1200 /*
1201 * Intialize the kernel pmap.
1202 */
1203 {
1204 register struct kpmap *k = &kernel_pmap_store;
1205
1206 /* kernel_pmap = (struct pmap *)k; */
1207 k->pm_ctx = ctxinfo;
1208 /* k->pm_ctxnum = 0; */
1209 simple_lock_init(&k->pm_lock);
1210 k->pm_refcount = 1;
1211 /* k->pm_mmuforw = 0; */
1212 k->pm_mmuback = &k->pm_mmuforw;
1213 k->pm_segmap = &k->pm_rsegmap[-NUSEG];
1214 k->pm_pte = &k->pm_rpte[-NUSEG];
1215 k->pm_npte = &k->pm_rnpte[-NUSEG];
1216 for (i = NKSEG; --i >= 0;)
1217 k->pm_rsegmap[i] = seginval;
1218 }
1219
1220 /*
1221 * All contexts are free except the kernel's.
1222 *
1223 * XXX sun4c could use context 0 for users?
1224 */
1225 ci->c_pmap = kernel_pmap;
1226 ctx_freelist = ci + 1;
1227 for (i = 1; i < ncontext; i++) {
1228 ci++;
1229 ci->c_nextfree = ci + 1;
1230 }
1231 ci->c_nextfree = NULL;
1232 ctx_kick = 0;
1233 ctx_kickdir = -1;
1234
1235 /* me_freelist = NULL; */ /* already NULL */
1236
1237 /*
1238 * Init mmu entries that map the kernel physical addresses.
1239 * If the page bits in p are 0, we filled the last segment
1240 * exactly (now how did that happen?); if not, it is
1241 * the last page filled in the last segment.
1242 *
1243 * All the other MMU entries are free.
1244 *
1245 * THIS ASSUMES SEGMENT i IS MAPPED BY MMU ENTRY i DURING THE
1246 * BOOT PROCESS
1247 */
1248 z = ((((u_int)p + NBPSG - 1) & ~SGOFSET) - KERNBASE) >> SGSHIFT;
1249 lastpage = VA_VPG(p);
1250 if (lastpage == 0)
1251 lastpage = NPTESG;
1252 p = (caddr_t)KERNBASE; /* first va */
1253 vs = VA_VSEG(KERNBASE); /* first virtual segment */
1254 rom_setmap = promvec->pv_setctxt;
1255 for (i = 0;;) {
1256 /*
1257 * Distribute each kernel segment into all contexts.
1258 * This is done through the monitor ROM, rather than
1259 * directly here: if we do a setcontext we will fault,
1260 * as we are not (yet) mapped in any other context.
1261 */
1262 for (j = 1; j < nctx; j++)
1263 rom_setmap(j, p, i);
1264
1265 /* set up the mmu entry */
1266 me->me_pmeg = i;
1267 insque(me, me_locked.mh_prev);
1268 /* me->me_pmforw = NULL; */
1269 me->me_pmback = kernel_pmap->pm_mmuback;
1270 *kernel_pmap->pm_mmuback = me;
1271 kernel_pmap->pm_mmuback = &me->me_pmforw;
1272 me->me_pmap = kernel_pmap;
1273 me->me_vseg = vs;
1274 kernel_pmap->pm_segmap[vs] = i;
1275 n = ++i < z ? NPTESG : lastpage;
1276 kernel_pmap->pm_npte[vs] = n;
1277 me++;
1278 vs++;
1279 if (i < z) {
1280 p += NBPSG;
1281 continue;
1282 }
1283 /*
1284 * Unmap the pages, if any, that are not part of
1285 * the final segment.
1286 */
1287 for (p += n * NBPG; j < NPTESG; j++, p += NBPG)
1288 setpte(p, 0);
1289 break;
1290 }
1291 for (; i < nmmu; i++, me++) {
1292 me->me_pmeg = i;
1293 me->me_next = me_freelist;
1294 /* me->me_pmap = NULL; */
1295 me_freelist = me;
1296 }
1297
1298 /*
1299 * write protect & encache kernel text;
1300 * set red zone at kernel base; enable cache on message buffer.
1301 */
1302 {
1303 extern char etext[], trapbase[];
1304 #ifdef KGDB
1305 register int mask = ~PG_NC; /* XXX chgkprot is busted */
1306 #else
1307 register int mask = ~(PG_W | PG_NC);
1308 #endif
1309 for (p = trapbase; p < etext; p += NBPG)
1310 setpte(p, getpte(p) & mask);
1311 p = (caddr_t)KERNBASE;
1312 setpte(p, 0);
1313 p += NBPG;
1314 setpte(p, getpte(p) & ~PG_NC);
1315 }
1316
1317 /*
1318 * Grab physical memory list (for /dev/mem).
1319 */
1320 npmemarr = makememarr(pmemarr, MA_SIZE, MEMARR_TOTALPHYS);
1321 }
1322
1323 /*
1324 * Bootstrap memory allocator. This function allows for early dynamic
1325 * memory allocation until the virtual memory system has been bootstrapped.
1326 * After that point, either kmem_alloc or malloc should be used. This
1327 * function works by stealing pages from the (to be) managed page pool,
1328 * stealing virtual address space, then mapping the pages and zeroing them.
1329 *
1330 * It should be used from pmap_bootstrap till vm_page_startup, afterwards
1331 * it cannot be used, and will generate a panic if tried. Note that this
1332 * memory will never be freed, and in essence it is wired down.
1333 */
1334 void *
pmap_bootstrap_alloc(size)1335 pmap_bootstrap_alloc(size)
1336 int size;
1337 {
1338 register void *mem;
1339 extern int vm_page_startup_initialized;
1340
1341 if (vm_page_startup_initialized)
1342 panic("pmap_bootstrap_alloc: called after startup initialized");
1343 size = round_page(size);
1344 mem = (void *)virtual_avail;
1345 virtual_avail = pmap_map(virtual_avail, avail_start,
1346 avail_start + size, VM_PROT_READ|VM_PROT_WRITE);
1347 avail_start += size;
1348 bzero((void *)mem, size);
1349 return (mem);
1350 }
1351
1352 /*
1353 * Initialize the pmap module.
1354 */
1355 void
pmap_init(phys_start,phys_end)1356 pmap_init(phys_start, phys_end)
1357 register vm_offset_t phys_start, phys_end;
1358 {
1359 register vm_size_t s;
1360
1361 if (PAGE_SIZE != NBPG)
1362 panic("pmap_init: CLSIZE!=1");
1363 /*
1364 * Allocate and clear memory for the pv_table.
1365 */
1366 s = sizeof(struct pvlist) * atop(phys_end - phys_start);
1367 s = round_page(s);
1368 pv_table = (struct pvlist *)kmem_alloc(kernel_map, s);
1369 bzero((caddr_t)pv_table, s);
1370 vm_first_phys = phys_start;
1371 vm_num_phys = phys_end - phys_start;
1372 }
1373
1374 /*
1375 * Map physical addresses into kernel VM.
1376 */
1377 vm_offset_t
pmap_map(va,pa,endpa,prot)1378 pmap_map(va, pa, endpa, prot)
1379 register vm_offset_t va, pa, endpa;
1380 register int prot;
1381 {
1382 register int pgsize = PAGE_SIZE;
1383
1384 while (pa < endpa) {
1385 pmap_enter(kernel_pmap, va, pa, prot, 1);
1386 va += pgsize;
1387 pa += pgsize;
1388 }
1389 return (va);
1390 }
1391
1392 /*
1393 * Create and return a physical map.
1394 *
1395 * If size is nonzero, the map is useless. (ick)
1396 */
1397 struct pmap *
pmap_create(size)1398 pmap_create(size)
1399 vm_size_t size;
1400 {
1401 register struct pmap *pm;
1402
1403 if (size)
1404 return (NULL);
1405 pm = (struct pmap *)malloc(sizeof *pm, M_VMPMAP, M_WAITOK);
1406 #ifdef DEBUG
1407 if (pmapdebug & PDB_CREATE)
1408 printf("pmap_create: created %x\n", pm);
1409 #endif
1410 bzero((caddr_t)pm, sizeof *pm);
1411 pmap_pinit(pm);
1412 return (pm);
1413 }
1414
1415 /*
1416 * Initialize a preallocated and zeroed pmap structure,
1417 * such as one in a vmspace structure.
1418 */
1419 void
pmap_pinit(pm)1420 pmap_pinit(pm)
1421 register struct pmap *pm;
1422 {
1423 register int i;
1424
1425 #ifdef DEBUG
1426 if (pmapdebug & PDB_CREATE)
1427 printf("pmap_pinit(%x)\n", pm);
1428 #endif
1429 /* pm->pm_ctx = NULL; */
1430 simple_lock_init(&pm->pm_lock);
1431 pm->pm_refcount = 1;
1432 /* pm->pm_mmuforw = NULL; */
1433 pm->pm_mmuback = &pm->pm_mmuforw;
1434 pm->pm_segmap = pm->pm_rsegmap;
1435 pm->pm_pte = pm->pm_rpte;
1436 pm->pm_npte = pm->pm_rnpte;
1437 for (i = NUSEG; --i >= 0;)
1438 pm->pm_rsegmap[i] = seginval;
1439 /* bzero((caddr_t)pm->pm_rpte, sizeof pm->pm_rpte); */
1440 /* bzero((caddr_t)pm->pm_rnpte, sizeof pm->pm_rnpte); */
1441 }
1442
1443 /*
1444 * Retire the given pmap from service.
1445 * Should only be called if the map contains no valid mappings.
1446 */
1447 void
pmap_destroy(pm)1448 pmap_destroy(pm)
1449 register struct pmap *pm;
1450 {
1451 int count;
1452
1453 if (pm == NULL)
1454 return;
1455 #ifdef DEBUG
1456 if (pmapdebug & PDB_DESTROY)
1457 printf("pmap_destroy(%x)\n", pm);
1458 #endif
1459 simple_lock(&pm->pm_lock);
1460 count = --pm->pm_refcount;
1461 simple_unlock(&pm->pm_lock);
1462 if (count == 0) {
1463 pmap_release(pm);
1464 free((caddr_t)pm, M_VMPMAP);
1465 }
1466 }
1467
1468 /*
1469 * Release any resources held by the given physical map.
1470 * Called when a pmap initialized by pmap_pinit is being released.
1471 */
1472 void
pmap_release(pm)1473 pmap_release(pm)
1474 register struct pmap *pm;
1475 {
1476 register union ctxinfo *c;
1477 register int s = splpmap(); /* paranoia */
1478
1479 #ifdef DEBUG
1480 if (pmapdebug & PDB_DESTROY)
1481 printf("pmap_release(%x)\n", pm);
1482 #endif
1483 if (pm->pm_mmuforw)
1484 panic("pmap_release mmuforw");
1485 if ((c = pm->pm_ctx) != NULL) {
1486 if (pm->pm_ctxnum == 0)
1487 panic("pmap_release: releasing kernel");
1488 ctx_free(pm);
1489 }
1490 splx(s);
1491 }
1492
1493 /*
1494 * Add a reference to the given pmap.
1495 */
1496 void
pmap_reference(pm)1497 pmap_reference(pm)
1498 struct pmap *pm;
1499 {
1500
1501 if (pm != NULL) {
1502 simple_lock(&pm->pm_lock);
1503 pm->pm_refcount++;
1504 simple_unlock(&pm->pm_lock);
1505 }
1506 }
1507
1508 static int pmap_rmk(struct pmap *, vm_offset_t, vm_offset_t, int, int, int);
1509 static int pmap_rmu(struct pmap *, vm_offset_t, vm_offset_t, int, int, int);
1510
1511 /*
1512 * Remove the given range of mapping entries.
1513 * The starting and ending addresses are already rounded to pages.
1514 * Sheer lunacy: pmap_remove is often asked to remove nonexistent
1515 * mappings.
1516 */
1517 void
pmap_remove(pm,va,endva)1518 pmap_remove(pm, va, endva)
1519 register struct pmap *pm;
1520 register vm_offset_t va, endva;
1521 {
1522 register vm_offset_t nva;
1523 register int vseg, nleft, s, ctx;
1524 register int (*rm)(struct pmap *, vm_offset_t, vm_offset_t,
1525 int, int, int);
1526
1527 if (pm == NULL)
1528 return;
1529 #ifdef DEBUG
1530 if (pmapdebug & PDB_REMOVE)
1531 printf("pmap_remove(%x, %x, %x)\n", pm, va, endva);
1532 #endif
1533
1534 if (pm == kernel_pmap) {
1535 /*
1536 * Removing from kernel address space.
1537 */
1538 rm = pmap_rmk;
1539 } else {
1540 /*
1541 * Removing from user address space.
1542 */
1543 write_user_windows();
1544 rm = pmap_rmu;
1545 }
1546
1547 ctx = getcontext();
1548 s = splpmap(); /* XXX conservative */
1549 simple_lock(&pm->pm_lock);
1550 for (; va < endva; va = nva) {
1551 /* do one virtual segment at a time */
1552 vseg = VA_VSEG(va);
1553 nva = VSTOVA(vseg + 1);
1554 if (nva == 0 || nva > endva)
1555 nva = endva;
1556 if ((nleft = pm->pm_npte[vseg]) != 0)
1557 pm->pm_npte[vseg] = (*rm)(pm, va, nva,
1558 vseg, nleft, pm->pm_segmap[vseg]);
1559 }
1560 simple_unlock(&pm->pm_lock);
1561 splx(s);
1562 setcontext(ctx);
1563 }
1564
1565 #define perftest
1566 #ifdef perftest
1567 /* counters, one per possible length */
1568 int rmk_vlen[NPTESG+1]; /* virtual length per rmk() call */
1569 int rmk_npg[NPTESG+1]; /* n valid pages per rmk() call */
1570 int rmk_vlendiff; /* # times npg != vlen */
1571 #endif
1572
1573 /*
1574 * The following magic number was chosen because:
1575 * 1. It is the same amount of work to cache_flush_page 4 pages
1576 * as to cache_flush_segment 1 segment (so at 4 the cost of
1577 * flush is the same).
1578 * 2. Flushing extra pages is bad (causes cache not to work).
1579 * 3. The current code, which malloc()s 5 pages for each process
1580 * for a user vmspace/pmap, almost never touches all 5 of those
1581 * pages.
1582 */
1583 #define PMAP_RMK_MAGIC 5 /* if > magic, use cache_flush_segment */
1584
1585 /*
1586 * Remove a range contained within a single segment.
1587 * These are egregiously complicated routines.
1588 */
1589
1590 /* remove from kernel, return new nleft */
1591 static int
pmap_rmk(pm,va,endva,vseg,nleft,pmeg)1592 pmap_rmk(pm, va, endva, vseg, nleft, pmeg)
1593 register struct pmap *pm;
1594 register vm_offset_t va, endva;
1595 register int vseg, nleft, pmeg;
1596 {
1597 register int i, tpte, perpage, npg;
1598 register struct pvlist *pv;
1599 #ifdef perftest
1600 register int nvalid;
1601 #endif
1602
1603 #ifdef DEBUG
1604 if (pmeg == seginval)
1605 panic("pmap_rmk: not loaded");
1606 if (pm->pm_ctx == NULL)
1607 panic("pmap_rmk: lost context");
1608 #endif
1609
1610 setcontext(0);
1611 /* decide how to flush cache */
1612 npg = (endva - va) >> PGSHIFT;
1613 if (npg > PMAP_RMK_MAGIC) {
1614 /* flush the whole segment */
1615 perpage = 0;
1616 #ifdef notdef
1617 if (vactype != VAC_NONE)
1618 #endif
1619 cache_flush_segment(vseg);
1620 } else {
1621 /* flush each page individually; some never need flushing */
1622 perpage = 1;
1623 }
1624 #ifdef perftest
1625 nvalid = 0;
1626 #endif
1627 while (va < endva) {
1628 tpte = getpte(va);
1629 if ((tpte & PG_V) == 0) {
1630 va += PAGE_SIZE;
1631 continue;
1632 }
1633 pv = NULL;
1634 /* if cacheable, flush page as needed */
1635 if ((tpte & PG_NC) == 0) {
1636 #ifdef perftest
1637 nvalid++;
1638 #endif
1639 if (perpage)
1640 cache_flush_page(va);
1641 }
1642 if ((tpte & PG_TYPE) == PG_OBMEM) {
1643 i = ptoa(HWTOSW(tpte & PG_PFNUM));
1644 if (managed(i)) {
1645 pv = pvhead(i);
1646 pv->pv_flags |= MR(tpte);
1647 pv_unlink(pv, pm, va);
1648 }
1649 }
1650 nleft--;
1651 setpte(va, 0);
1652 va += NBPG;
1653 }
1654 #ifdef perftest
1655 rmk_vlen[npg]++;
1656 rmk_npg[nvalid]++;
1657 if (npg != nvalid)
1658 rmk_vlendiff++;
1659 #endif
1660
1661 /*
1662 * If the segment is all gone, remove it from everyone and
1663 * free the MMU entry.
1664 */
1665 if (nleft == 0) {
1666 va = VSTOVA(vseg); /* retract */
1667 setsegmap(va, seginval);
1668 for (i = ncontext; --i > 0;) {
1669 setcontext(i);
1670 setsegmap(va, seginval);
1671 }
1672 me_free(pm, pmeg);
1673 }
1674 return (nleft);
1675 }
1676
1677 #ifdef perftest
1678 /* as before but for pmap_rmu */
1679 int rmu_vlen[NPTESG+1]; /* virtual length per rmu() call */
1680 int rmu_npg[NPTESG+1]; /* n valid pages per rmu() call */
1681 int rmu_vlendiff; /* # times npg != vlen */
1682 int rmu_noflush; /* # times rmu does not need to flush at all */
1683 #endif
1684
1685 /*
1686 * Just like pmap_rmk_magic, but we have a different threshold.
1687 * Note that this may well deserve further tuning work.
1688 */
1689 #define PMAP_RMU_MAGIC 4 /* if > magic, use cache_flush_segment */
1690
1691 /* remove from user */
1692 static int
pmap_rmu(pm,va,endva,vseg,nleft,pmeg)1693 pmap_rmu(pm, va, endva, vseg, nleft, pmeg)
1694 register struct pmap *pm;
1695 register vm_offset_t va, endva;
1696 register int vseg, nleft, pmeg;
1697 {
1698 register int *pte0, i, pteva, tpte, perpage, npg;
1699 register struct pvlist *pv;
1700 #ifdef perftest
1701 register int doflush, nvalid;
1702 #endif
1703
1704 pte0 = pm->pm_pte[vseg];
1705 if (pmeg == seginval) {
1706 register int *pte = pte0 + VA_VPG(va);
1707
1708 /*
1709 * PTEs are not in MMU. Just invalidate software copies.
1710 */
1711 for (; va < endva; pte++, va += PAGE_SIZE) {
1712 tpte = *pte;
1713 if ((tpte & PG_V) == 0) {
1714 /* nothing to remove (braindead VM layer) */
1715 continue;
1716 }
1717 if ((tpte & PG_TYPE) == PG_OBMEM) {
1718 i = ptoa(HWTOSW(tpte & PG_PFNUM));
1719 if (managed(i))
1720 pv_unlink(pvhead(i), pm, va);
1721 }
1722 nleft--;
1723 *pte = 0;
1724 }
1725 if (nleft == 0) {
1726 free((caddr_t)pte0, M_VMPMAP);
1727 pm->pm_pte[vseg] = NULL;
1728 }
1729 return (nleft);
1730 }
1731
1732 /*
1733 * PTEs are in MMU. Invalidate in hardware, update ref &
1734 * mod bits, and flush cache if required.
1735 */
1736 if (pm->pm_ctx) {
1737 /* process has a context, must flush cache */
1738 npg = (endva - va) >> PGSHIFT;
1739 #ifdef perftest
1740 doflush = 1;
1741 nvalid = 0;
1742 #endif
1743 setcontext(pm->pm_ctxnum);
1744 if (npg > PMAP_RMU_MAGIC) {
1745 perpage = 0; /* flush the whole segment */
1746 #ifdef notdef
1747 if (vactype != VAC_NONE)
1748 #endif
1749 cache_flush_segment(vseg);
1750 } else
1751 perpage = 1;
1752 pteva = va;
1753 } else {
1754 /* no context, use context 0; cache flush unnecessary */
1755 setcontext(0);
1756 /* XXX use per-cpu pteva? */
1757 setsegmap(0, pmeg);
1758 pteva = VA_VPG(va) * NBPG;
1759 perpage = 0;
1760 #ifdef perftest
1761 npg = 0;
1762 doflush = 0;
1763 nvalid = 0;
1764 rmu_noflush++;
1765 #endif
1766 }
1767 for (; va < endva; pteva += PAGE_SIZE, va += PAGE_SIZE) {
1768 tpte = getpte(pteva);
1769 if ((tpte & PG_V) == 0)
1770 continue;
1771 pv = NULL;
1772 /* if cacheable, flush page as needed */
1773 if (doflush && (tpte & PG_NC) == 0) {
1774 #ifdef perftest
1775 nvalid++;
1776 #endif
1777 if (perpage)
1778 cache_flush_page(va);
1779 }
1780 if ((tpte & PG_TYPE) == PG_OBMEM) {
1781 i = ptoa(HWTOSW(tpte & PG_PFNUM));
1782 if (managed(i)) {
1783 pv = pvhead(i);
1784 pv->pv_flags |= MR(tpte);
1785 pv_unlink(pv, pm, va);
1786 }
1787 }
1788 nleft--;
1789 setpte(pteva, 0);
1790 }
1791 #ifdef perftest
1792 if (doflush) {
1793 rmu_vlen[npg]++;
1794 rmu_npg[nvalid]++;
1795 if (npg != nvalid)
1796 rmu_vlendiff++;
1797 }
1798 #endif
1799
1800 /*
1801 * If the segment is all gone, and the context is loaded, give
1802 * the segment back.
1803 */
1804 if (nleft == 0 && pm->pm_ctx != NULL) {
1805 va = VSTOVA(vseg); /* retract */
1806 setsegmap(va, seginval);
1807 free((caddr_t)pte0, M_VMPMAP);
1808 pm->pm_pte[vseg] = NULL;
1809 me_free(pm, pmeg);
1810 }
1811 return (nleft);
1812 }
1813
1814 /*
1815 * Lower (make more strict) the protection on the specified
1816 * physical page.
1817 *
1818 * There are only two cases: either the protection is going to 0
1819 * (in which case we do the dirty work here), or it is going from
1820 * to read-only (in which case pv_changepte does the trick).
1821 */
1822 void
pmap_page_protect(pa,prot)1823 pmap_page_protect(pa, prot)
1824 vm_offset_t pa;
1825 vm_prot_t prot;
1826 {
1827 register struct pvlist *pv, *pv0, *npv;
1828 register struct pmap *pm;
1829 register int *pte;
1830 register int va, vseg, pteva, tpte;
1831 register int flags, nleft, i, pmeg, s, ctx, doflush;
1832
1833 #ifdef DEBUG
1834 if ((pmapdebug & PDB_CHANGEPROT) ||
1835 (pmapdebug & PDB_REMOVE && prot == VM_PROT_NONE))
1836 printf("pmap_page_protect(%x, %x)\n", pa, prot);
1837 #endif
1838 /*
1839 * Skip unmanaged pages, or operations that do not take
1840 * away write permission.
1841 */
1842 if (!managed(pa) || prot & VM_PROT_WRITE)
1843 return;
1844 write_user_windows(); /* paranoia */
1845 if (prot & VM_PROT_READ) {
1846 pv_changepte(pvhead(pa), 0, PG_W);
1847 return;
1848 }
1849
1850 /*
1851 * Remove all access to all people talking to this page.
1852 * Walk down PV list, removing all mappings.
1853 * The logic is much like that for pmap_remove,
1854 * but we know we are removing exactly one page.
1855 */
1856 pv = pvhead(pa);
1857 s = splpmap();
1858 if ((pm = pv->pv_pmap) == NULL) {
1859 splx(s);
1860 return;
1861 }
1862 ctx = getcontext();
1863 pv0 = pv;
1864 flags = pv->pv_flags & ~PV_NC;
1865 for (;; pm = pv->pv_pmap) {
1866 va = pv->pv_va;
1867 vseg = VA_VSEG(va);
1868 if ((nleft = pm->pm_npte[vseg]) == 0)
1869 panic("pmap_remove_all: empty vseg");
1870 nleft--;
1871 pm->pm_npte[vseg] = nleft;
1872 pmeg = pm->pm_segmap[vseg];
1873 pte = pm->pm_pte[vseg];
1874 if (pmeg == seginval) {
1875 if (nleft) {
1876 pte += VA_VPG(va);
1877 *pte = 0;
1878 } else {
1879 free((caddr_t)pte, M_VMPMAP);
1880 pm->pm_pte[vseg] = NULL;
1881 }
1882 goto nextpv;
1883 }
1884 if (pm->pm_ctx) {
1885 setcontext(pm->pm_ctxnum);
1886 pteva = va;
1887 #ifdef notdef
1888 doflush = vactype != VAC_NONE;
1889 #else
1890 doflush = 1;
1891 #endif
1892 } else {
1893 setcontext(0);
1894 /* XXX use per-cpu pteva? */
1895 setsegmap(0, pmeg);
1896 pteva = VA_VPG(va) * NBPG;
1897 doflush = 0;
1898 }
1899 if (nleft) {
1900 if (doflush)
1901 cache_flush_page(va);
1902 tpte = getpte(pteva);
1903 if ((tpte & PG_V) == 0)
1904 panic("pmap_page_protect !PG_V 1");
1905 flags |= MR(tpte);
1906 setpte(pteva, 0);
1907 } else {
1908 if (doflush)
1909 cache_flush_page(va);
1910 tpte = getpte(pteva);
1911 if ((tpte & PG_V) == 0)
1912 panic("pmap_page_protect !PG_V 2");
1913 flags |= MR(tpte);
1914 if (pm->pm_ctx) {
1915 setsegmap(va, seginval);
1916 if (pm == kernel_pmap) {
1917 for (i = ncontext; --i > 0;) {
1918 setcontext(i);
1919 setsegmap(va, seginval);
1920 }
1921 goto skipptefree;
1922 }
1923 }
1924 free((caddr_t)pte, M_VMPMAP);
1925 pm->pm_pte[vseg] = NULL;
1926 skipptefree:
1927 me_free(pm, pmeg);
1928 }
1929 nextpv:
1930 npv = pv->pv_next;
1931 if (pv != pv0)
1932 free((caddr_t)pv, M_VMPVENT);
1933 if ((pv = npv) == NULL)
1934 break;
1935 }
1936 pv0->pv_pmap = NULL;
1937 pv0->pv_flags = flags;
1938 setcontext(ctx);
1939 splx(s);
1940 }
1941
1942 /*
1943 * Lower (make more strict) the protection on the specified
1944 * range of this pmap.
1945 *
1946 * There are only two cases: either the protection is going to 0
1947 * (in which case we call pmap_remove to do the dirty work), or
1948 * it is going from read/write to read-only. The latter is
1949 * fairly easy.
1950 */
1951 void
pmap_protect(pm,sva,eva,prot)1952 pmap_protect(pm, sva, eva, prot)
1953 register struct pmap *pm;
1954 vm_offset_t sva, eva;
1955 vm_prot_t prot;
1956 {
1957 register int va, nva, vseg, pteva, pmeg;
1958 register int s, ctx;
1959
1960 if (pm == NULL || prot & VM_PROT_WRITE)
1961 return;
1962 if ((prot & VM_PROT_READ) == 0) {
1963 pmap_remove(pm, sva, eva);
1964 return;
1965 }
1966
1967 write_user_windows();
1968 ctx = getcontext();
1969 s = splpmap();
1970 simple_lock(&pm->pm_lock);
1971
1972 for (va = sva; va < eva;) {
1973 vseg = VA_VSEG(va);
1974 nva = VSTOVA(vseg + 1);
1975 if (nva == 0) panic("pmap_protect: last segment"); /* cannot happen */
1976 if (nva > eva)
1977 nva = eva;
1978 if (pm->pm_npte[vseg] == 0) {
1979 va = nva;
1980 continue;
1981 }
1982 pmeg = pm->pm_segmap[vseg];
1983 if (pmeg == seginval) {
1984 register int *pte = &pm->pm_pte[vseg][VA_VPG(va)];
1985
1986 /* not in MMU; just clear PG_W from core copies */
1987 for (; va < nva; va += NBPG)
1988 *pte++ &= ~PG_W;
1989 } else {
1990 /* in MMU: take away write bits from MMU PTEs */
1991 if (
1992 #ifdef notdef
1993 vactype != VAC_NONE &&
1994 #endif
1995 pm->pm_ctx) {
1996 register int tpte;
1997
1998 /*
1999 * Flush cache so that any existing cache
2000 * tags are updated. This is really only
2001 * needed for PTEs that lose PG_W.
2002 */
2003 setcontext(pm->pm_ctxnum);
2004 for (; va < nva; va += NBPG) {
2005 tpte = getpte(va);
2006 pmap_stats.ps_npg_prot_all++;
2007 if (tpte & PG_W) {
2008 pmap_stats.ps_npg_prot_actual++;
2009 cache_flush_page(va);
2010 setpte(va, tpte & ~PG_W);
2011 }
2012 }
2013 } else {
2014 register int pteva;
2015
2016 /*
2017 * No context, hence not cached;
2018 * just update PTEs.
2019 */
2020 setcontext(0);
2021 /* XXX use per-cpu pteva? */
2022 setsegmap(0, pmeg);
2023 pteva = VA_VPG(va) * NBPG;
2024 for (; va < nva; pteva += NBPG, va += NBPG)
2025 setpte(pteva, getpte(pteva) & ~PG_W);
2026 }
2027 }
2028 }
2029 simple_unlock(&pm->pm_lock);
2030 splx(s);
2031 }
2032
2033 /*
2034 * Change the protection and/or wired status of the given (MI) virtual page.
2035 * XXX: should have separate function (or flag) telling whether only wiring
2036 * is changing.
2037 */
2038 void
pmap_changeprot(pm,va,prot,wired)2039 pmap_changeprot(pm, va, prot, wired)
2040 register struct pmap *pm;
2041 register vm_offset_t va;
2042 vm_prot_t prot;
2043 int wired;
2044 {
2045 register int vseg, tpte, newprot, pmeg, ctx, i, s;
2046
2047 #ifdef DEBUG
2048 if (pmapdebug & PDB_CHANGEPROT)
2049 printf("pmap_changeprot(%x, %x, %x, %x)\n",
2050 pm, va, prot, wired);
2051 #endif
2052
2053 write_user_windows(); /* paranoia */
2054
2055 if (pm == kernel_pmap)
2056 newprot = prot & VM_PROT_WRITE ? PG_S|PG_W : PG_S;
2057 else
2058 newprot = prot & VM_PROT_WRITE ? PG_W : 0;
2059 vseg = VA_VSEG(va);
2060 s = splpmap(); /* conservative */
2061 pmap_stats.ps_changeprots++;
2062
2063 /* update PTEs in software or hardware */
2064 if ((pmeg = pm->pm_segmap[vseg]) == seginval) {
2065 register int *pte = &pm->pm_pte[vseg][VA_VPG(va)];
2066
2067 /* update in software */
2068 if ((*pte & PG_PROT) == newprot)
2069 goto useless;
2070 *pte = (*pte & ~PG_PROT) | newprot;
2071 } else {
2072 /* update in hardware */
2073 ctx = getcontext();
2074 if (pm->pm_ctx) {
2075 /* use current context; flush writeback cache */
2076 setcontext(pm->pm_ctxnum);
2077 tpte = getpte(va);
2078 if ((tpte & PG_PROT) == newprot)
2079 goto useless;
2080 if (vactype == VAC_WRITEBACK &&
2081 (newprot & PG_W) == 0 &&
2082 (tpte & (PG_W | PG_NC)) == PG_W)
2083 cache_flush_page((int)va);
2084 } else {
2085 setcontext(0);
2086 /* XXX use per-cpu va? */
2087 setsegmap(0, pmeg);
2088 va = VA_VPG(va) * NBPG;
2089 tpte = getpte(va);
2090 if ((tpte & PG_PROT) == newprot)
2091 goto useless;
2092 }
2093 tpte = (tpte & ~PG_PROT) | newprot;
2094 setpte(va, tpte);
2095 setcontext(ctx);
2096 }
2097 splx(s);
2098 return;
2099
2100 useless:
2101 /* only wiring changed, and we ignore wiring */
2102 pmap_stats.ps_useless_changeprots++;
2103 splx(s);
2104 }
2105
2106 /*
2107 * Insert (MI) physical page pa at virtual address va in the given pmap.
2108 * NB: the pa parameter includes type bits PMAP_OBIO, PMAP_NC as necessary.
2109 *
2110 * If pa is not in the `managed' range it will not be `bank mapped'.
2111 * This works during bootstrap only because the first 4MB happens to
2112 * map one-to-one.
2113 *
2114 * There may already be something else there, or we might just be
2115 * changing protections and/or wiring on an existing mapping.
2116 * XXX should have different entry points for changing!
2117 */
2118 void
pmap_enter(pm,va,pa,prot,wired)2119 pmap_enter(pm, va, pa, prot, wired)
2120 register struct pmap *pm;
2121 vm_offset_t va, pa;
2122 vm_prot_t prot;
2123 int wired;
2124 {
2125 register struct pvlist *pv;
2126 register int pteproto, ctx;
2127
2128 if (pm == NULL)
2129 return;
2130 #ifdef DEBUG
2131 if (pmapdebug & PDB_ENTER)
2132 printf("pmap_enter(%x, %x, %x, %x, %x)\n",
2133 pm, va, pa, prot, wired);
2134 #endif
2135
2136 pteproto = PG_V | ((pa & PMAP_TNC) << PG_TNC_SHIFT);
2137 pa &= ~PMAP_TNC;
2138 /*
2139 * Set up prototype for new PTE. Cannot set PG_NC from PV_NC yet
2140 * since the pvlist no-cache bit might change as a result of the
2141 * new mapping.
2142 */
2143 if (managed(pa)) {
2144 pteproto |= SWTOHW(atop(pa));
2145 pv = pvhead(pa);
2146 } else {
2147 pteproto |= atop(pa) & PG_PFNUM;
2148 pv = NULL;
2149 }
2150 if (prot & VM_PROT_WRITE)
2151 pteproto |= PG_W;
2152
2153 ctx = getcontext();
2154 if (pm == kernel_pmap)
2155 pmap_enk(pm, va, prot, wired, pv, pteproto | PG_S);
2156 else
2157 pmap_enu(pm, va, prot, wired, pv, pteproto);
2158 setcontext(ctx);
2159 }
2160
2161 /* enter new (or change existing) kernel mapping */
pmap_enk(pm,va,prot,wired,pv,pteproto)2162 pmap_enk(pm, va, prot, wired, pv, pteproto)
2163 register struct pmap *pm;
2164 vm_offset_t va;
2165 vm_prot_t prot;
2166 int wired;
2167 register struct pvlist *pv;
2168 register int pteproto;
2169 {
2170 register int vseg, tpte, pmeg, i, s;
2171
2172 vseg = VA_VSEG(va);
2173 s = splpmap(); /* XXX way too conservative */
2174 if (pm->pm_segmap[vseg] != seginval &&
2175 (tpte = getpte(va)) & PG_V) {
2176 register int addr = tpte & PG_PFNUM;
2177
2178 /* old mapping exists */
2179 if (addr == (pteproto & PG_PFNUM)) {
2180 /* just changing protection and/or wiring */
2181 splx(s);
2182 pmap_changeprot(pm, va, prot, wired);
2183 return;
2184 }
2185
2186 /*printf("pmap_enk: changing existing va=>pa entry\n");*/
2187 /*
2188 * Switcheroo: changing pa for this va.
2189 * If old pa was managed, remove from pvlist.
2190 * If old page was cached, flush cache.
2191 */
2192 addr = ptoa(HWTOSW(addr));
2193 if (managed(addr))
2194 pv_unlink(pvhead(addr), pm, va);
2195 if (
2196 #ifdef notdef
2197 vactype != VAC_NONE &&
2198 #endif
2199 (tpte & PG_NC) == 0) {
2200 setcontext(0); /* ??? */
2201 cache_flush_page((int)va);
2202 }
2203 } else {
2204 /* adding new entry */
2205 pm->pm_npte[vseg]++;
2206 }
2207
2208 /*
2209 * If the new mapping is for a managed PA, enter into pvlist.
2210 * Note that the mapping for a malloc page will always be
2211 * unique (hence will never cause a second call to malloc).
2212 */
2213 if (pv != NULL)
2214 pteproto |= pv_link(pv, pm, va);
2215
2216 pmeg = pm->pm_segmap[vseg];
2217 if (pmeg == seginval) {
2218 register int tva;
2219
2220 /*
2221 * Allocate an MMU entry now (on locked list),
2222 * and map it into every context. Set all its
2223 * PTEs invalid (we will then overwrite one, but
2224 * this is more efficient than looping twice).
2225 */
2226 #ifdef DEBUG
2227 if (pm->pm_ctx == NULL || pm->pm_ctxnum != 0)
2228 panic("pmap_enk: kern seg but no kern ctx");
2229 #endif
2230 pmeg = me_alloc(&me_locked, pm, vseg)->me_pmeg;
2231 pm->pm_segmap[vseg] = pmeg;
2232 i = ncontext - 1;
2233 do {
2234 setcontext(i);
2235 setsegmap(va, pmeg);
2236 } while (--i >= 0);
2237
2238 /* set all PTEs to invalid, then overwrite one PTE below */
2239 tva = VA_ROUNDDOWNTOSEG(va);
2240 i = NPTESG;
2241 do {
2242 setpte(tva, 0);
2243 tva += NBPG;
2244 } while (--i > 0);
2245 }
2246
2247 /* ptes kept in hardware only */
2248 setpte(va, pteproto);
2249 splx(s);
2250 }
2251
2252 /* enter new (or change existing) user mapping */
pmap_enu(pm,va,prot,wired,pv,pteproto)2253 pmap_enu(pm, va, prot, wired, pv, pteproto)
2254 register struct pmap *pm;
2255 vm_offset_t va;
2256 vm_prot_t prot;
2257 int wired;
2258 register struct pvlist *pv;
2259 register int pteproto;
2260 {
2261 register int vseg, *pte, tpte, pmeg, i, s, doflush;
2262
2263 write_user_windows(); /* XXX conservative */
2264 vseg = VA_VSEG(va);
2265 s = splpmap(); /* XXX conservative */
2266
2267 /*
2268 * If there is no space in which the PTEs can be written
2269 * while they are not in the hardware, this must be a new
2270 * virtual segment. Get PTE space and count the segment.
2271 *
2272 * TO SPEED UP CTX ALLOC, PUT SEGMENT BOUNDS STUFF HERE
2273 * AND IN pmap_rmu()
2274 */
2275 retry:
2276 pte = pm->pm_pte[vseg];
2277 if (pte == NULL) {
2278 /* definitely a new mapping */
2279 register int size = NPTESG * sizeof *pte;
2280
2281 pte = (int *)malloc((u_long)size, M_VMPMAP, M_WAITOK);
2282 if (pm->pm_pte[vseg] != NULL) {
2283 printf("pmap_enter: pte filled during sleep\n"); /* can this happen? */
2284 free((caddr_t)pte, M_VMPMAP);
2285 goto retry;
2286 }
2287 #ifdef DEBUG
2288 if (pm->pm_segmap[vseg] != seginval)
2289 panic("pmap_enter: new ptes, but not seginval");
2290 #endif
2291 bzero((caddr_t)pte, size);
2292 pm->pm_pte[vseg] = pte;
2293 pm->pm_npte[vseg] = 1;
2294 } else {
2295 /* might be a change: fetch old pte */
2296 doflush = 0;
2297 if ((pmeg = pm->pm_segmap[vseg]) == seginval)
2298 tpte = pte[VA_VPG(va)]; /* software pte */
2299 else {
2300 if (pm->pm_ctx) { /* hardware pte */
2301 setcontext(pm->pm_ctxnum);
2302 tpte = getpte(va);
2303 doflush = 1;
2304 } else {
2305 setcontext(0);
2306 /* XXX use per-cpu pteva? */
2307 setsegmap(0, pmeg);
2308 tpte = getpte(VA_VPG(va) * NBPG);
2309 }
2310 }
2311 if (tpte & PG_V) {
2312 register int addr = tpte & PG_PFNUM;
2313
2314 /* old mapping exists */
2315 if (addr == (pteproto & PG_PFNUM)) {
2316 /* just changing prot and/or wiring */
2317 splx(s);
2318 /* caller should call this directly: */
2319 pmap_changeprot(pm, va, prot, wired);
2320 return;
2321 }
2322 /*
2323 * Switcheroo: changing pa for this va.
2324 * If old pa was managed, remove from pvlist.
2325 * If old page was cached, flush cache.
2326 */
2327 /*printf("%s[%d]: pmap_enu: changing existing va(%x)=>pa entry\n",
2328 curproc->p_comm, curproc->p_pid, va);*/
2329 addr = ptoa(HWTOSW(addr));
2330 if (managed(addr))
2331 pv_unlink(pvhead(addr), pm, va);
2332 if (
2333 #ifdef notdef
2334 vactype != VAC_NONE &&
2335 #endif
2336 doflush && (tpte & PG_NC) == 0)
2337 cache_flush_page((int)va);
2338 } else {
2339 /* adding new entry */
2340 pm->pm_npte[vseg]++;
2341 }
2342 }
2343
2344 if (pv != NULL)
2345 pteproto |= pv_link(pv, pm, va);
2346
2347 /*
2348 * Update hardware or software PTEs (whichever are active).
2349 */
2350 if ((pmeg = pm->pm_segmap[vseg]) != seginval) {
2351 /* ptes are in hardare */
2352 if (pm->pm_ctx)
2353 setcontext(pm->pm_ctxnum);
2354 else {
2355 setcontext(0);
2356 /* XXX use per-cpu pteva? */
2357 setsegmap(0, pmeg);
2358 va = VA_VPG(va) * NBPG;
2359 }
2360 setpte(va, pteproto);
2361 }
2362 /* update software copy */
2363 pte += VA_VPG(va);
2364 *pte = pteproto;
2365
2366 splx(s);
2367 }
2368
2369 /*
2370 * Change the wiring attribute for a map/virtual-address pair.
2371 */
2372 /* ARGSUSED */
2373 void
pmap_change_wiring(pm,va,wired)2374 pmap_change_wiring(pm, va, wired)
2375 struct pmap *pm;
2376 vm_offset_t va;
2377 int wired;
2378 {
2379
2380 pmap_stats.ps_useless_changewire++;
2381 }
2382
2383 /*
2384 * Extract the physical page address associated
2385 * with the given map/virtual_address pair.
2386 * GRR, the vm code knows; we should not have to do this!
2387 */
2388 vm_offset_t
pmap_extract(pm,va)2389 pmap_extract(pm, va)
2390 register struct pmap *pm;
2391 vm_offset_t va;
2392 {
2393 register int tpte;
2394 register int vseg;
2395
2396 if (pm == NULL) {
2397 printf("pmap_extract: null pmap\n");
2398 return (0);
2399 }
2400 vseg = VA_VSEG(va);
2401 if (pm->pm_segmap[vseg] != seginval) {
2402 register int ctx = getcontext();
2403
2404 if (pm->pm_ctx) {
2405 setcontext(pm->pm_ctxnum);
2406 tpte = getpte(va);
2407 } else {
2408 setcontext(0);
2409 tpte = getpte(VA_VPG(va) * NBPG);
2410 }
2411 setcontext(ctx);
2412 } else {
2413 register int *pte = pm->pm_pte[vseg];
2414
2415 if (pte == NULL) {
2416 printf("pmap_extract: invalid vseg\n");
2417 return (0);
2418 }
2419 tpte = pte[VA_VPG(va)];
2420 }
2421 if ((tpte & PG_V) == 0) {
2422 printf("pmap_extract: invalid pte\n");
2423 return (0);
2424 }
2425 tpte &= PG_PFNUM;
2426 tpte = HWTOSW(tpte);
2427 return ((tpte << PGSHIFT) | (va & PGOFSET));
2428 }
2429
2430 /*
2431 * Copy the range specified by src_addr/len
2432 * from the source map to the range dst_addr/len
2433 * in the destination map.
2434 *
2435 * This routine is only advisory and need not do anything.
2436 */
2437 /* ARGSUSED */
2438 void
pmap_copy(dst_pmap,src_pmap,dst_addr,len,src_addr)2439 pmap_copy(dst_pmap, src_pmap, dst_addr, len, src_addr)
2440 struct pmap *dst_pmap, *src_pmap;
2441 vm_offset_t dst_addr;
2442 vm_size_t len;
2443 vm_offset_t src_addr;
2444 {
2445 }
2446
2447 /*
2448 * Require that all active physical maps contain no
2449 * incorrect entries NOW. [This update includes
2450 * forcing updates of any address map caching.]
2451 */
2452 void
pmap_update()2453 pmap_update()
2454 {
2455 }
2456
2457 /*
2458 * Garbage collects the physical map system for
2459 * pages which are no longer used.
2460 * Success need not be guaranteed -- that is, there
2461 * may well be pages which are not referenced, but
2462 * others may be collected.
2463 * Called by the pageout daemon when pages are scarce.
2464 */
2465 /* ARGSUSED */
2466 void
pmap_collect(pm)2467 pmap_collect(pm)
2468 struct pmap *pm;
2469 {
2470 }
2471
2472 /*
2473 * Clear the modify bit for the given physical page.
2474 */
2475 void
pmap_clear_modify(pa)2476 pmap_clear_modify(pa)
2477 register vm_offset_t pa;
2478 {
2479 register struct pvlist *pv;
2480
2481 if (managed(pa)) {
2482 pv = pvhead(pa);
2483 (void) pv_syncflags(pv);
2484 pv->pv_flags &= ~PV_MOD;
2485 }
2486 }
2487
2488 /*
2489 * Tell whether the given physical page has been modified.
2490 */
2491 int
pmap_is_modified(pa)2492 pmap_is_modified(pa)
2493 register vm_offset_t pa;
2494 {
2495 register struct pvlist *pv;
2496
2497 if (managed(pa)) {
2498 pv = pvhead(pa);
2499 if (pv->pv_flags & PV_MOD || pv_syncflags(pv) & PV_MOD)
2500 return (1);
2501 }
2502 return (0);
2503 }
2504
2505 /*
2506 * Clear the reference bit for the given physical page.
2507 */
2508 void
pmap_clear_reference(pa)2509 pmap_clear_reference(pa)
2510 vm_offset_t pa;
2511 {
2512 register struct pvlist *pv;
2513
2514 if (managed(pa)) {
2515 pv = pvhead(pa);
2516 (void) pv_syncflags(pv);
2517 pv->pv_flags &= ~PV_REF;
2518 }
2519 }
2520
2521 /*
2522 * Tell whether the given physical page has been referenced.
2523 */
2524 int
pmap_is_referenced(pa)2525 pmap_is_referenced(pa)
2526 vm_offset_t pa;
2527 {
2528 register struct pvlist *pv;
2529
2530 if (managed(pa)) {
2531 pv = pvhead(pa);
2532 if (pv->pv_flags & PV_REF || pv_syncflags(pv) & PV_REF)
2533 return (1);
2534 }
2535 return (0);
2536 }
2537
2538 /*
2539 * Make the specified pages (by pmap, offset) pageable (or not) as requested.
2540 *
2541 * A page which is not pageable may not take a fault; therefore, its page
2542 * table entry must remain valid for the duration (or at least, the trap
2543 * handler must not call vm_fault).
2544 *
2545 * This routine is merely advisory; pmap_enter will specify that these pages
2546 * are to be wired down (or not) as appropriate.
2547 */
2548 /* ARGSUSED */
2549 void
pmap_pageable(pm,start,end,pageable)2550 pmap_pageable(pm, start, end, pageable)
2551 struct pmap *pm;
2552 vm_offset_t start, end;
2553 int pageable;
2554 {
2555 }
2556
2557 /*
2558 * Fill the given MI physical page with zero bytes.
2559 *
2560 * We avoid stomping on the cache.
2561 * XXX might be faster to use destination's context and allow cache to fill?
2562 */
2563 void
pmap_zero_page(pa)2564 pmap_zero_page(pa)
2565 register vm_offset_t pa;
2566 {
2567 register caddr_t va;
2568 register int pte;
2569
2570 if (managed(pa)) {
2571 /*
2572 * The following might not be necessary since the page
2573 * is being cleared because it is about to be allocated,
2574 * i.e., is in use by no one.
2575 */
2576 #if 1
2577 #ifdef notdef
2578 if (vactype != VAC_NONE)
2579 #endif
2580 pv_flushcache(pvhead(pa));
2581 #endif
2582 pte = PG_V | PG_S | PG_W | PG_NC | SWTOHW(atop(pa));
2583 } else
2584 pte = PG_V | PG_S | PG_W | PG_NC | (atop(pa) & PG_PFNUM);
2585
2586 va = vpage[0];
2587 setpte(va, pte);
2588 qzero(va, NBPG);
2589 setpte(va, 0);
2590 }
2591
2592 /*
2593 * Copy the given MI physical source page to its destination.
2594 *
2595 * We avoid stomping on the cache as above (with same `XXX' note).
2596 * We must first flush any write-back cache for the source page.
2597 * We go ahead and stomp on the kernel's virtual cache for the
2598 * source page, since the cache can read memory MUCH faster than
2599 * the processor.
2600 */
2601 void
pmap_copy_page(src,dst)2602 pmap_copy_page(src, dst)
2603 vm_offset_t src, dst;
2604 {
2605 register caddr_t sva, dva;
2606 register int spte, dpte;
2607
2608 if (managed(src)) {
2609 if (vactype == VAC_WRITEBACK)
2610 pv_flushcache(pvhead(src));
2611 spte = PG_V | PG_S | SWTOHW(atop(src));
2612 } else
2613 spte = PG_V | PG_S | (atop(src) & PG_PFNUM);
2614
2615 if (managed(dst)) {
2616 /* similar `might not be necessary' comment applies */
2617 #if 1
2618 #ifdef notdef
2619 if (vactype != VAC_NONE)
2620 #endif
2621 pv_flushcache(pvhead(dst));
2622 #endif
2623 dpte = PG_V | PG_S | PG_W | PG_NC | SWTOHW(atop(dst));
2624 } else
2625 dpte = PG_V | PG_S | PG_W | PG_NC | (atop(dst) & PG_PFNUM);
2626
2627 sva = vpage[0];
2628 dva = vpage[1];
2629 setpte(sva, spte);
2630 setpte(dva, dpte);
2631 qcopy(sva, dva, NBPG); /* loads cache, so we must ... */
2632 cache_flush_page((int)sva);
2633 setpte(sva, 0);
2634 setpte(dva, 0);
2635 }
2636
2637 /*
2638 * Turn a cdevsw d_mmap value into a byte address for pmap_enter.
2639 * XXX this should almost certainly be done differently, and
2640 * elsewhere, or even not at all
2641 */
2642 vm_offset_t
pmap_phys_address(x)2643 pmap_phys_address(x)
2644 int x;
2645 {
2646
2647 return (x);
2648 }
2649
2650 /*
2651 * Turn off cache for a given (va, number of pages).
2652 *
2653 * We just assert PG_NC for each PTE; the addresses must reside
2654 * in locked kernel space. A cache flush is also done.
2655 */
kvm_uncache(va,npages)2656 kvm_uncache(va, npages)
2657 register caddr_t va;
2658 register int npages;
2659 {
2660 register int pte;
2661
2662 for (; --npages >= 0; va += NBPG) {
2663 pte = getpte(va);
2664 if ((pte & PG_V) == 0)
2665 panic("kvm_uncache !pg_v");
2666 pte |= PG_NC;
2667 setpte(va, pte);
2668 cache_flush_page((int)va);
2669 }
2670 }
2671
2672 /*
2673 * For /dev/mem.
2674 */
2675 int
pmap_enter_hw(pm,va,pa,prot,wired)2676 pmap_enter_hw(pm, va, pa, prot, wired)
2677 register struct pmap *pm;
2678 vm_offset_t va, pa;
2679 vm_prot_t prot;
2680 int wired;
2681 {
2682 register struct memarr *ma;
2683 register int n;
2684 register u_int t;
2685
2686 if (pa >= MAXMEM) /* ??? */
2687 return (EFAULT);
2688 for (ma = pmemarr, n = npmemarr; --n >= 0; ma++) {
2689 t = (u_int)pa - ma->addr;
2690 if (t < ma->len)
2691 goto ok;
2692 }
2693 return (EFAULT);
2694 ok:
2695 pa = (HWTOSW(atop(pa)) << PGSHIFT) | (pa & PGOFSET);
2696 if (pa >= vm_first_phys + vm_num_phys) /* ??? */
2697 return (EFAULT);
2698
2699 pmap_enter(pm, va, pa, prot, wired);
2700 return (0);
2701 }
2702