1 /* $OpenBSD: pmap.c,v 1.121 2024/06/26 01:40:49 jsg Exp $ */
2 /* $NetBSD: pmap.c,v 1.107 2001/08/31 16:47:41 eeh Exp $ */
3 /*
4 *
5 * Copyright (C) 1996-1999 Eduardo Horvath.
6 * All rights reserved.
7 *
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 *
27 */
28
29 #include <sys/atomic.h>
30 #include <sys/param.h>
31 #include <sys/queue.h>
32 #include <sys/systm.h>
33 #include <sys/proc.h>
34 #include <sys/msgbuf.h>
35 #include <sys/pool.h>
36 #include <sys/exec.h>
37 #include <sys/core.h>
38 #include <sys/kcore.h>
39
40 #include <uvm/uvm.h>
41
42 #include <machine/pcb.h>
43 #include <machine/sparc64.h>
44 #include <machine/ctlreg.h>
45 #include <machine/hypervisor.h>
46 #include <machine/openfirm.h>
47 #include <machine/kcore.h>
48 #include <machine/pte.h>
49
50 #include <sparc64/sparc64/cache.h>
51
52 #ifdef DDB
53 #include <machine/db_machdep.h>
54 #include <ddb/db_output.h>
55 #define db_enter() __asm volatile("ta 1; nop")
56 #else
57 #define db_enter()
58 #endif
59
60 #define MEG (1<<20) /* 1MB */
61 #define KB (1<<10) /* 1KB */
62
63 paddr_t cpu0paddr;/* XXXXXXXXXXXXXXXX */
64
65 /* These routines are in assembly to allow access thru physical mappings */
66 extern int64_t pseg_get(struct pmap*, vaddr_t addr);
67 extern int pseg_set(struct pmap*, vaddr_t addr, int64_t tte, paddr_t spare);
68
69 extern void pmap_zero_phys(paddr_t pa);
70 extern void pmap_copy_phys(paddr_t src, paddr_t dst);
71
72 /*
73 * Diatribe on ref/mod counting:
74 *
75 * First of all, ref/mod info must be non-volatile. Hence we need to keep it
76 * in the pv_entry structure for each page. (We could bypass this for the
77 * vm_page, but that's a long story....)
78 *
79 * This architecture has nice, fast traps with lots of space for software bits
80 * in the TTE. To accelerate ref/mod counts we make use of these features.
81 *
82 * When we map a page initially, we place a TTE in the page table. It's
83 * inserted with the TLB_W and TLB_ACCESS bits cleared. If a page is really
84 * writeable we set the TLB_REAL_W bit for the trap handler.
85 *
86 * Whenever we take a TLB miss trap, the trap handler will set the TLB_ACCESS
87 * bit in the appropriate TTE in the page table. Whenever we take a protection
88 * fault, if the TLB_REAL_W bit is set then we flip both the TLB_W and TLB_MOD
89 * bits to enable writing and mark the page as modified.
90 *
91 * This means that we may have ref/mod information all over the place. The
92 * pmap routines must traverse the page tables of all pmaps with a given page
93 * and collect/clear all the ref/mod information and copy it into the pv_entry.
94 */
95
96 #define PV_ALIAS 0x1LL
97 #define PV_REF 0x2LL
98 #define PV_MOD 0x4LL
99 #define PV_MASK (0x03fLL)
100 #define PV_VAMASK (~(NBPG - 1))
101 #define PV_MATCH(pv,va) (!((((pv)->pv_va) ^ (va)) & PV_VAMASK))
102 #define PV_SETVA(pv,va) ((pv)->pv_va = (((va) & PV_VAMASK) | (((pv)->pv_va) & PV_MASK)))
103
104 static struct pool pv_pool;
105 static struct pool pmap_pool;
106
107 pv_entry_t pmap_remove_pv(struct pmap *pm, vaddr_t va, paddr_t pa);
108 pv_entry_t pmap_enter_pv(struct pmap *pm, pv_entry_t, vaddr_t va, paddr_t pa);
109 void pmap_page_cache(struct pmap *pm, paddr_t pa, int mode);
110
111 void pmap_bootstrap_cpu(paddr_t);
112
113 void pmap_release(struct pmap *);
114 pv_entry_t pa_to_pvh(paddr_t);
115
116 pv_entry_t
pa_to_pvh(paddr_t pa)117 pa_to_pvh(paddr_t pa)
118 {
119 struct vm_page *pg;
120
121 pg = PHYS_TO_VM_PAGE(pa);
122 return pg ? &pg->mdpage.pvent : NULL;
123 }
124
125 static __inline u_int
pmap_tte2flags(u_int64_t tte)126 pmap_tte2flags(u_int64_t tte)
127 {
128 if (CPU_ISSUN4V)
129 return (((tte & SUN4V_TLB_ACCESS) ? PV_REF : 0) |
130 ((tte & SUN4V_TLB_MODIFY) ? PV_MOD : 0));
131 else
132 return (((tte & SUN4U_TLB_ACCESS) ? PV_REF : 0) |
133 ((tte & SUN4U_TLB_MODIFY) ? PV_MOD : 0));
134 }
135
136 /*
137 * Here's the CPU TSB stuff. It's allocated in pmap_bootstrap.
138 */
139 pte_t *tsb_dmmu;
140 pte_t *tsb_immu;
141 int tsbsize; /* tsbents = 512 * 2^tsbsize */
142 #define TSBENTS (512 << tsbsize)
143 #define TSBSIZE (TSBENTS * 16)
144
145 /*
146 * The invalid tsb tag uses the fact that the last context we have is
147 * never allocated.
148 */
149 #define TSB_TAG_INVALID (~0LL << 48)
150
151 #define TSB_DATA(g,sz,pa,priv,write,cache,aliased,valid,ie) \
152 (CPU_ISSUN4V ?\
153 SUN4V_TSB_DATA(g,sz,pa,priv,write,cache,aliased,valid,ie) : \
154 SUN4U_TSB_DATA(g,sz,pa,priv,write,cache,aliased,valid,ie))
155
156 /* The same for sun4u and sun4v. */
157 #define TLB_V SUN4U_TLB_V
158
159 /* Only used for DEBUG. */
160 #define TLB_NFO (CPU_ISSUN4V ? SUN4V_TLB_NFO : SUN4U_TLB_NFO)
161
162 /*
163 * UltraSPARC T1 & T2 implement only a 40-bit real address range, just
164 * like older UltraSPARC CPUs.
165 */
166 #define TLB_PA_MASK SUN4U_TLB_PA_MASK
167
168 /* XXX */
169 #define TLB_TSB_LOCK (CPU_ISSUN4V ? SUN4V_TLB_TSB_LOCK : SUN4U_TLB_TSB_LOCK)
170
171 #ifdef SUN4V
172 struct tsb_desc *tsb_desc;
173 #endif
174
175 struct pmap kernel_pmap_;
176
177 /*
178 * Virtual and physical addresses of the start and end of kernel text
179 * and data segments.
180 */
181 vaddr_t ktext;
182 paddr_t ktextp;
183 vaddr_t ektext;
184 paddr_t ektextp;
185 vaddr_t kdata;
186 paddr_t kdatap;
187 vaddr_t ekdata;
188 paddr_t ekdatap;
189
190 static struct mem_region memlist[8]; /* Pick a random size here */
191
192 vaddr_t vmmap; /* one reserved MI vpage for /dev/mem */
193
194 struct mem_region *mem, *avail, *orig;
195 int memsize;
196
197 static int memh = 0, vmemh = 0; /* Handles to OBP devices */
198
199 static int ptelookup_va(vaddr_t va); /* sun4u */
200
201 static __inline void
tsb_invalidate(int ctx,vaddr_t va)202 tsb_invalidate(int ctx, vaddr_t va)
203 {
204 int i;
205 int64_t tag;
206
207 i = ptelookup_va(va);
208 tag = TSB_TAG(0, ctx, va);
209 if (tsb_dmmu[i].tag == tag)
210 atomic_cas_ulong((volatile unsigned long *)&tsb_dmmu[i].tag,
211 tag, TSB_TAG_INVALID);
212 if (tsb_immu[i].tag == tag)
213 atomic_cas_ulong((volatile unsigned long *)&tsb_immu[i].tag,
214 tag, TSB_TAG_INVALID);
215 }
216
217 struct prom_map *prom_map;
218 int prom_map_size;
219
220 #ifdef DEBUG
221 #define PDB_BOOT 0x20000
222 #define PDB_BOOT1 0x40000
223 int pmapdebug = 0;
224
225 #define BDPRINTF(n, f) if (pmapdebug & (n)) prom_printf f
226 #else
227 #define BDPRINTF(n, f)
228 #endif
229
230 /*
231 *
232 * A context is simply a small number that differentiates multiple mappings
233 * of the same address. Contexts on the spitfire are 13 bits, but could
234 * be as large as 17 bits.
235 *
236 * Each context is either free or attached to a pmap.
237 *
238 * The context table is an array of pointers to psegs. Just dereference
239 * the right pointer and you get to the pmap segment tables. These are
240 * physical addresses, of course.
241 *
242 */
243 paddr_t *ctxbusy;
244 int numctx;
245 #define CTXENTRY (sizeof(paddr_t))
246 #define CTXSIZE (numctx * CTXENTRY)
247
248 int pmap_get_page(paddr_t *, const char *, struct pmap *);
249 void pmap_free_page(paddr_t, struct pmap *);
250
251 /*
252 * Support for big page sizes. This maps the page size to the
253 * page bits. That is: these are the bits between 8K pages and
254 * larger page sizes that cause aliasing.
255 */
256 const struct page_size_map page_size_map[] = {
257 { (4*1024*1024-1) & ~(8*1024-1), PGSZ_4M },
258 { (512*1024-1) & ~(8*1024-1), PGSZ_512K },
259 { (64*1024-1) & ~(8*1024-1), PGSZ_64K },
260 { (8*1024-1) & ~(8*1024-1), PGSZ_8K },
261 { 0, 0 }
262 };
263
264 /*
265 * Enter a TTE into the kernel pmap only. Don't do anything else.
266 *
267 * Use only during bootstrapping since it does no locking and
268 * can lose ref/mod info!!!!
269 *
270 */
271 static void
pmap_enter_kpage(vaddr_t va,int64_t data)272 pmap_enter_kpage(vaddr_t va, int64_t data)
273 {
274 paddr_t newp;
275
276 newp = 0;
277 while (pseg_set(pmap_kernel(), va, data, newp) == 1) {
278 newp = 0;
279 if (!pmap_get_page(&newp, NULL, pmap_kernel())) {
280 prom_printf("pmap_enter_kpage: out of pages\n");
281 panic("pmap_enter_kpage");
282 }
283
284 BDPRINTF(PDB_BOOT1,
285 ("pseg_set: pm=%p va=%p data=%lx newp %lx\r\n",
286 pmap_kernel(), va, (long)data, (long)newp));
287 }
288 }
289
290 /*
291 * Check bootargs to see if we need to enable bootdebug.
292 */
293 #ifdef DEBUG
294 void
pmap_bootdebug(void)295 pmap_bootdebug(void)
296 {
297 int chosen;
298 char *cp;
299 char buf[128];
300
301 /*
302 * Grab boot args from PROM
303 */
304 chosen = OF_finddevice("/chosen");
305 /* Setup pointer to boot flags */
306 OF_getprop(chosen, "bootargs", buf, sizeof(buf));
307 cp = buf;
308 while (*cp != '-')
309 if (*cp++ == '\0')
310 return;
311 for (;;)
312 switch (*++cp) {
313 case '\0':
314 return;
315 case 'V':
316 pmapdebug |= PDB_BOOT|PDB_BOOT1;
317 break;
318 case 'D':
319 pmapdebug |= PDB_BOOT1;
320 break;
321 }
322 }
323 #endif
324
325 /*
326 * This is called during bootstrap, before the system is really initialized.
327 *
328 * It's called with the start and end virtual addresses of the kernel. We
329 * bootstrap the pmap allocator now. We will allocate the basic structures we
330 * need to bootstrap the VM system here: the page frame tables, the TSB, and
331 * the free memory lists.
332 *
333 * Now all this is becoming a bit obsolete. maxctx is still important, but by
334 * separating the kernel text and data segments we really would need to
335 * provide the start and end of each segment. But we can't. The rodata
336 * segment is attached to the end of the kernel segment and has nothing to
337 * delimit its end. We could still pass in the beginning of the kernel and
338 * the beginning and end of the data segment but we could also just as easily
339 * calculate that all in here.
340 *
341 * To handle the kernel text, we need to do a reverse mapping of the start of
342 * the kernel, then traverse the free memory lists to find out how big it is.
343 */
344
345 void
pmap_bootstrap(u_long kernelstart,u_long kernelend,u_int maxctx,u_int numcpus)346 pmap_bootstrap(u_long kernelstart, u_long kernelend, u_int maxctx, u_int numcpus)
347 {
348 extern int data_start[], end[]; /* start of data segment */
349 extern int msgbufmapped;
350 struct mem_region *mp, *mp1;
351 int msgbufsiz;
352 int pcnt;
353 size_t s, sz;
354 int i, j;
355 int64_t data;
356 vaddr_t va;
357 u_int64_t phys_msgbuf;
358 paddr_t newkp;
359 vaddr_t newkv, firstaddr, intstk;
360 vsize_t kdsize, ktsize;
361
362 #ifdef DEBUG
363 pmap_bootdebug();
364 #endif
365
366 BDPRINTF(PDB_BOOT, ("Entered pmap_bootstrap.\r\n"));
367 /*
368 * set machine page size
369 */
370 uvmexp.pagesize = NBPG;
371 uvm_setpagesize();
372
373 /*
374 * Find out how big the kernel's virtual address
375 * space is. The *$#@$ prom loses this info
376 */
377 if ((vmemh = OF_finddevice("/virtual-memory")) == -1) {
378 prom_printf("no virtual-memory?");
379 OF_exit();
380 }
381 bzero((caddr_t)memlist, sizeof(memlist));
382 if (OF_getprop(vmemh, "available", memlist, sizeof(memlist)) <= 0) {
383 prom_printf("no vmemory avail?");
384 OF_exit();
385 }
386
387 #ifdef DEBUG
388 if (pmapdebug & PDB_BOOT) {
389 /* print out mem list */
390 prom_printf("Available virtual memory:\r\n");
391 for (mp = memlist; mp->size; mp++) {
392 prom_printf("memlist start %p size %lx\r\n",
393 (void *)(u_long)mp->start,
394 (u_long)mp->size);
395 }
396 prom_printf("End of available virtual memory\r\n");
397 }
398 #endif
399 /*
400 * Get hold or the message buffer.
401 */
402 msgbufp = (struct msgbuf *)(vaddr_t)MSGBUF_VA;
403 /* XXXXX -- increase msgbufsiz for uvmhist printing */
404 msgbufsiz = 4*NBPG /* round_page(sizeof(struct msgbuf)) */;
405 BDPRINTF(PDB_BOOT, ("Trying to allocate msgbuf at %lx, size %lx\r\n",
406 (long)msgbufp, (long)msgbufsiz));
407 if ((long)msgbufp !=
408 (long)(phys_msgbuf = prom_claim_virt((vaddr_t)msgbufp, msgbufsiz)))
409 prom_printf(
410 "cannot get msgbuf VA, msgbufp=%p, phys_msgbuf=%lx\r\n",
411 (void *)msgbufp, (long)phys_msgbuf);
412 phys_msgbuf = prom_get_msgbuf(msgbufsiz, MMU_PAGE_ALIGN);
413 BDPRINTF(PDB_BOOT,
414 ("We should have the memory at %lx, let's map it in\r\n",
415 phys_msgbuf));
416 if (prom_map_phys(phys_msgbuf, msgbufsiz, (vaddr_t)msgbufp,
417 -1/* sunos does this */) == -1)
418 prom_printf("Failed to map msgbuf\r\n");
419 else
420 BDPRINTF(PDB_BOOT, ("msgbuf mapped at %p\r\n",
421 (void *)msgbufp));
422 msgbufmapped = 1; /* enable message buffer */
423 initmsgbuf((caddr_t)msgbufp, msgbufsiz);
424
425 /*
426 * Record kernel mapping -- we will map these with a permanent 4MB
427 * TLB entry when we initialize the CPU later.
428 */
429 BDPRINTF(PDB_BOOT, ("translating kernelstart %p\r\n",
430 (void *)kernelstart));
431 ktext = kernelstart;
432 ktextp = prom_vtop(kernelstart);
433
434 kdata = (vaddr_t)data_start;
435 kdatap = prom_vtop(kdata);
436 ekdata = (vaddr_t)end;
437
438 /*
439 * Find the real size of the kernel. Locate the smallest starting
440 * address > kernelstart.
441 */
442 for (mp1 = mp = memlist; mp->size; mp++) {
443 /*
444 * Check whether this region is at the end of the kernel.
445 */
446 if (mp->start >= ekdata && (mp1->start < ekdata ||
447 mp1->start > mp->start))
448 mp1 = mp;
449 }
450 if (mp1->start < kdata)
451 prom_printf("Kernel at end of vmem???\r\n");
452
453 BDPRINTF(PDB_BOOT1,
454 ("Kernel data is mapped at %lx, next free seg: %lx, %lx\r\n",
455 (long)kdata, (u_long)mp1->start, (u_long)mp1->size));
456
457 /*
458 * We save where we can start allocating memory.
459 */
460 firstaddr = (ekdata + 07) & ~ 07; /* Longword align */
461
462 /*
463 * We reserve 100K to grow.
464 */
465 ekdata += 100*KB;
466
467 /*
468 * And set the end of the data segment to the end of what our
469 * bootloader allocated for us, if we still fit in there.
470 */
471 if (ekdata < mp1->start)
472 ekdata = mp1->start;
473
474 #define valloc(name, type, num) (name) = (type *)firstaddr; firstaddr += (num)
475
476 /*
477 * Since we can't always give the loader the hint to align us on a 4MB
478 * boundary, we will need to do the alignment ourselves. First
479 * allocate a new 4MB aligned segment for the kernel, then map it
480 * in, copy the kernel over, swap mappings, then finally, free the
481 * old kernel. Then we can continue with this.
482 *
483 * We'll do the data segment up here since we know how big it is.
484 * We'll do the text segment after we've read in the PROM translations
485 * so we can figure out its size.
486 *
487 * The ctxbusy table takes about 64KB, the TSB up to 32KB, and the
488 * rest should be less than 1K, so 100KB extra should be plenty.
489 */
490 kdsize = round_page(ekdata - kdata);
491 BDPRINTF(PDB_BOOT1, ("Kernel data size is %lx\r\n", (long)kdsize));
492
493 if ((kdatap & (4*MEG-1)) == 0) {
494 /* We were at a 4MB boundary -- claim the rest */
495 psize_t szdiff = (4*MEG - kdsize) & (4*MEG - 1);
496
497 BDPRINTF(PDB_BOOT1, ("Need to extend dseg by %lx\r\n",
498 (long)szdiff));
499 if (szdiff) {
500 /* Claim the rest of the physical page. */
501 newkp = kdatap + kdsize;
502 newkv = kdata + kdsize;
503 if (newkp != prom_claim_phys(newkp, szdiff)) {
504 prom_printf("pmap_bootstrap: could not claim "
505 "physical dseg extension "
506 "at %lx size %lx\r\n",
507 newkp, szdiff);
508 goto remap_data;
509 }
510
511 /* And the rest of the virtual page. */
512 if (prom_claim_virt(newkv, szdiff) != newkv)
513 prom_printf("pmap_bootstrap: could not claim "
514 "virtual dseg extension "
515 "at size %lx\r\n", newkv, szdiff);
516
517 /* Make sure all 4MB are mapped */
518 prom_map_phys(newkp, szdiff, newkv, -1);
519 }
520 } else {
521 psize_t sz;
522 remap_data:
523 /*
524 * Either we're not at a 4MB boundary or we can't get the rest
525 * of the 4MB extension. We need to move the data segment.
526 * Leave 1MB of extra fiddle space in the calculations.
527 */
528
529 sz = (kdsize + 4*MEG - 1) & ~(4*MEG-1);
530 BDPRINTF(PDB_BOOT1,
531 ("Allocating new %lx kernel data at 4MB boundary\r\n",
532 (u_long)sz));
533 if ((newkp = prom_alloc_phys(sz, 4*MEG)) == (paddr_t)-1 ) {
534 prom_printf("Cannot allocate new kernel\r\n");
535 OF_exit();
536 }
537 BDPRINTF(PDB_BOOT1, ("Allocating new va for buffer at %llx\r\n",
538 (u_int64_t)newkp));
539 if ((newkv = (vaddr_t)prom_alloc_virt(sz, 8)) ==
540 (vaddr_t)-1) {
541 prom_printf("Cannot allocate new kernel va\r\n");
542 OF_exit();
543 }
544 BDPRINTF(PDB_BOOT1, ("Mapping in buffer %llx at %llx\r\n",
545 (u_int64_t)newkp, (u_int64_t)newkv));
546 prom_map_phys(newkp, sz, (vaddr_t)newkv, -1);
547 BDPRINTF(PDB_BOOT1, ("Copying %ld bytes kernel data...",
548 kdsize));
549 bzero((void *)newkv, sz);
550 bcopy((void *)kdata, (void *)newkv, kdsize);
551 BDPRINTF(PDB_BOOT1, ("done. Swapping maps..unmap new\r\n"));
552 prom_unmap_virt((vaddr_t)newkv, sz);
553 BDPRINTF(PDB_BOOT, ("remap old "));
554 #if 0
555 /*
556 * calling the prom will probably require reading part of the
557 * data segment so we can't do this. */
558 prom_unmap_virt((vaddr_t)kdatap, kdsize);
559 #endif
560 prom_map_phys(newkp, sz, kdata, -1);
561 /*
562 * we will map in 4MB, more than we allocated, to allow
563 * further allocation
564 */
565 BDPRINTF(PDB_BOOT1, ("free old\r\n"));
566 prom_free_phys(kdatap, kdsize);
567 kdatap = newkp;
568 BDPRINTF(PDB_BOOT1,
569 ("pmap_bootstrap: firstaddr is %lx virt (%lx phys)"
570 "avail for kernel\r\n", (u_long)firstaddr,
571 (u_long)prom_vtop(firstaddr)));
572 }
573
574 /*
575 * Find out how much RAM we have installed.
576 */
577 BDPRINTF(PDB_BOOT, ("pmap_bootstrap: getting phys installed\r\n"));
578 if ((memh = OF_finddevice("/memory")) == -1) {
579 prom_printf("no memory?");
580 OF_exit();
581 }
582 memsize = OF_getproplen(memh, "reg") + 2 * sizeof(struct mem_region);
583 valloc(mem, struct mem_region, memsize);
584 bzero((caddr_t)mem, memsize);
585 if (OF_getprop(memh, "reg", mem, memsize) <= 0) {
586 prom_printf("no memory installed?");
587 OF_exit();
588 }
589
590 #ifdef DEBUG
591 if (pmapdebug & PDB_BOOT1) {
592 /* print out mem list */
593 prom_printf("Installed physical memory:\r\n");
594 for (mp = mem; mp->size; mp++) {
595 prom_printf("memlist start %lx size %lx\r\n",
596 (u_long)mp->start, (u_long)mp->size);
597 }
598 }
599 #endif
600 BDPRINTF(PDB_BOOT1, ("Calculating physmem:"));
601
602 for (mp = mem; mp->size; mp++)
603 physmem += atop(mp->size);
604 BDPRINTF(PDB_BOOT1, (" result %x or %d pages\r\n",
605 (int)physmem, (int)physmem));
606
607 /*
608 * Calculate approx TSB size.
609 */
610 tsbsize = 0;
611 #ifdef SMALL_KERNEL
612 while ((physmem >> tsbsize) > atop(64 * MEG) && tsbsize < 2)
613 #else
614 while ((physmem >> tsbsize) > atop(64 * MEG) && tsbsize < 7)
615 #endif
616 tsbsize++;
617
618 /*
619 * Save the prom translations
620 */
621 sz = OF_getproplen(vmemh, "translations");
622 valloc(prom_map, struct prom_map, sz);
623 if (OF_getprop(vmemh, "translations", (void *)prom_map, sz) <= 0) {
624 prom_printf("no translations installed?");
625 OF_exit();
626 }
627 prom_map_size = sz / sizeof(struct prom_map);
628 #ifdef DEBUG
629 if (pmapdebug & PDB_BOOT) {
630 /* print out mem list */
631 prom_printf("Prom xlations:\r\n");
632 for (i = 0; i < prom_map_size; i++) {
633 prom_printf("start %016lx size %016lx tte %016lx\r\n",
634 (u_long)prom_map[i].vstart,
635 (u_long)prom_map[i].vsize,
636 (u_long)prom_map[i].tte);
637 }
638 prom_printf("End of prom xlations\r\n");
639 }
640 #endif
641 /*
642 * Hunt for the kernel text segment and figure out it size and
643 * alignment.
644 */
645 ktsize = 0;
646 for (i = 0; i < prom_map_size; i++)
647 if (prom_map[i].vstart == ktext + ktsize)
648 ktsize += prom_map[i].vsize;
649 if (ktsize == 0)
650 panic("No kernel text segment!");
651 ektext = ktext + ktsize;
652
653 if (ktextp & (4*MEG-1)) {
654 /* Kernel text is not 4MB aligned -- need to fix that */
655 BDPRINTF(PDB_BOOT1,
656 ("Allocating new %lx kernel text at 4MB boundary\r\n",
657 (u_long)ktsize));
658 if ((newkp = prom_alloc_phys(ktsize, 4*MEG)) == 0 ) {
659 prom_printf("Cannot allocate new kernel text\r\n");
660 OF_exit();
661 }
662 BDPRINTF(PDB_BOOT1, ("Allocating new va for buffer at %llx\r\n",
663 (u_int64_t)newkp));
664 if ((newkv = (vaddr_t)prom_alloc_virt(ktsize, 8)) ==
665 (vaddr_t)-1) {
666 prom_printf("Cannot allocate new kernel text va\r\n");
667 OF_exit();
668 }
669 BDPRINTF(PDB_BOOT1, ("Mapping in buffer %lx at %lx\r\n",
670 (u_long)newkp, (u_long)newkv));
671 prom_map_phys(newkp, ktsize, (vaddr_t)newkv, -1);
672 BDPRINTF(PDB_BOOT1, ("Copying %ld bytes kernel text...",
673 ktsize));
674 bcopy((void *)ktext, (void *)newkv,
675 ktsize);
676 BDPRINTF(PDB_BOOT1, ("done. Swapping maps..unmap new\r\n"));
677 prom_unmap_virt((vaddr_t)newkv, 4*MEG);
678 BDPRINTF(PDB_BOOT, ("remap old "));
679 #if 0
680 /*
681 * calling the prom will probably require reading part of the
682 * text segment so we can't do this.
683 */
684 prom_unmap_virt((vaddr_t)ktextp, ktsize);
685 #endif
686 prom_map_phys(newkp, ktsize, ktext, -1);
687 /*
688 * we will map in 4MB, more than we allocated, to allow
689 * further allocation
690 */
691 BDPRINTF(PDB_BOOT1, ("free old\r\n"));
692 prom_free_phys(ktextp, ktsize);
693 ktextp = newkp;
694
695 BDPRINTF(PDB_BOOT1,
696 ("pmap_bootstrap: firstaddr is %lx virt (%lx phys)"
697 "avail for kernel\r\n", (u_long)firstaddr,
698 (u_long)prom_vtop(firstaddr)));
699
700 /*
701 * Re-fetch translations -- they've certainly changed.
702 */
703 if (OF_getprop(vmemh, "translations", (void *)prom_map, sz) <=
704 0) {
705 prom_printf("no translations installed?");
706 OF_exit();
707 }
708 #ifdef DEBUG
709 if (pmapdebug & PDB_BOOT) {
710 /* print out mem list */
711 prom_printf("New prom xlations:\r\n");
712 for (i = 0; i < prom_map_size; i++) {
713 prom_printf("start %016lx size %016lx tte %016lx\r\n",
714 (u_long)prom_map[i].vstart,
715 (u_long)prom_map[i].vsize,
716 (u_long)prom_map[i].tte);
717 }
718 prom_printf("End of prom xlations\r\n");
719 }
720 #endif
721 }
722 ektextp = ktextp + ktsize;
723
724 /*
725 * Here's a quick in-lined reverse bubble sort. It gets rid of
726 * any translations inside the kernel data VA range.
727 */
728 for(i = 0; i < prom_map_size; i++) {
729 if (prom_map[i].vstart >= kdata &&
730 prom_map[i].vstart <= firstaddr) {
731 prom_map[i].vstart = 0;
732 prom_map[i].vsize = 0;
733 }
734 if (prom_map[i].vstart >= ktext &&
735 prom_map[i].vstart <= ektext) {
736 prom_map[i].vstart = 0;
737 prom_map[i].vsize = 0;
738 }
739 for(j = i; j < prom_map_size; j++) {
740 if (prom_map[j].vstart >= kdata &&
741 prom_map[j].vstart <= firstaddr)
742 continue; /* this is inside the kernel */
743 if (prom_map[j].vstart >= ktext &&
744 prom_map[j].vstart <= ektext)
745 continue; /* this is inside the kernel */
746 if (prom_map[j].vstart > prom_map[i].vstart) {
747 struct prom_map tmp;
748 tmp = prom_map[i];
749 prom_map[i] = prom_map[j];
750 prom_map[j] = tmp;
751 }
752 }
753 }
754 #ifdef DEBUG
755 if (pmapdebug & PDB_BOOT) {
756 /* print out mem list */
757 prom_printf("Prom xlations:\r\n");
758 for (i = 0; i < prom_map_size; i++) {
759 prom_printf("start %016lx size %016lx tte %016lx\r\n",
760 (u_long)prom_map[i].vstart,
761 (u_long)prom_map[i].vsize,
762 (u_long)prom_map[i].tte);
763 }
764 prom_printf("End of prom xlations\r\n");
765 }
766 #endif
767
768 /*
769 * Allocate a 64KB page for the cpu_info structure now.
770 */
771 if ((cpu0paddr = prom_alloc_phys(numcpus * 8*NBPG, 8*NBPG)) == 0 ) {
772 prom_printf("Cannot allocate new cpu_info\r\n");
773 OF_exit();
774 }
775
776 /*
777 * Now the kernel text segment is in its final location we can try to
778 * find out how much memory really is free.
779 */
780 sz = OF_getproplen(memh, "available") + sizeof(struct mem_region);
781 valloc(orig, struct mem_region, sz);
782 bzero((caddr_t)orig, sz);
783 if (OF_getprop(memh, "available", orig, sz) <= 0) {
784 prom_printf("no available RAM?");
785 OF_exit();
786 }
787 #ifdef DEBUG
788 if (pmapdebug & PDB_BOOT1) {
789 /* print out mem list */
790 prom_printf("Available physical memory:\r\n");
791 for (mp = orig; mp->size; mp++) {
792 prom_printf("memlist start %lx size %lx\r\n",
793 (u_long)mp->start, (u_long)mp->size);
794 }
795 prom_printf("End of available physical memory\r\n");
796 }
797 #endif
798 valloc(avail, struct mem_region, sz);
799 bzero((caddr_t)avail, sz);
800 for (pcnt = 0, mp = orig, mp1 = avail; (mp1->size = mp->size);
801 mp++, mp1++) {
802 mp1->start = mp->start;
803 pcnt++;
804 }
805
806 /*
807 * Allocate and initialize a context table
808 */
809 numctx = maxctx;
810 valloc(ctxbusy, paddr_t, CTXSIZE);
811 bzero((caddr_t)ctxbusy, CTXSIZE);
812
813 /*
814 * Allocate our TSB.
815 *
816 * We will use the left over space to flesh out the kernel pmap.
817 */
818 BDPRINTF(PDB_BOOT1, ("firstaddr before TSB=%lx\r\n",
819 (u_long)firstaddr));
820 firstaddr = ((firstaddr + TSBSIZE - 1) & ~(TSBSIZE-1));
821 #ifdef DEBUG
822 i = (firstaddr + (NBPG-1)) & ~(NBPG-1); /* First, page align */
823 if ((int)firstaddr < i) {
824 prom_printf("TSB alloc fixup failed\r\n");
825 prom_printf("frobbed i, firstaddr before TSB=%x, %lx\r\n",
826 (int)i, (u_long)firstaddr);
827 panic("TSB alloc");
828 OF_exit();
829 }
830 #endif
831 BDPRINTF(PDB_BOOT, ("frobbed i, firstaddr before TSB=%x, %lx\r\n",
832 (int)i, (u_long)firstaddr));
833 valloc(tsb_dmmu, pte_t, TSBSIZE);
834 bzero(tsb_dmmu, TSBSIZE);
835 valloc(tsb_immu, pte_t, TSBSIZE);
836 bzero(tsb_immu, TSBSIZE);
837
838 BDPRINTF(PDB_BOOT1, ("firstaddr after TSB=%lx\r\n", (u_long)firstaddr));
839 BDPRINTF(PDB_BOOT1, ("TSB allocated at %p size %08x\r\n", (void *)tsb_dmmu,
840 (int)TSBSIZE));
841
842 #ifdef SUN4V
843 if (CPU_ISSUN4V) {
844 valloc(tsb_desc, struct tsb_desc, sizeof(struct tsb_desc));
845 bzero(tsb_desc, sizeof(struct tsb_desc));
846 tsb_desc->td_idxpgsz = 0;
847 tsb_desc->td_assoc = 1;
848 tsb_desc->td_size = TSBENTS;
849 tsb_desc->td_ctxidx = -1;
850 tsb_desc->td_pgsz = 0xf;
851 tsb_desc->td_pa = (paddr_t)tsb_dmmu + kdatap - kdata;
852 }
853 #endif
854
855 BDPRINTF(PDB_BOOT1, ("firstaddr after pmap=%08lx\r\n",
856 (u_long)firstaddr));
857
858 /*
859 * Page align all regions.
860 * Non-page memory isn't very interesting to us.
861 * Also, sort the entries for ascending addresses.
862 *
863 * And convert from virtual to physical addresses.
864 */
865
866 BDPRINTF(PDB_BOOT, ("kernel virtual size %08lx - %08lx\r\n",
867 (u_long)kernelstart, (u_long)firstaddr));
868 kdata = kdata & ~PGOFSET;
869 ekdata = firstaddr;
870 ekdata = (ekdata + PGOFSET) & ~PGOFSET;
871 BDPRINTF(PDB_BOOT1, ("kernel virtual size %08lx - %08lx\r\n",
872 (u_long)kernelstart, (u_long)kernelend));
873 ekdatap = ekdata - kdata + kdatap;
874 /* Switch from vaddrs to paddrs */
875 if(ekdatap > (kdatap + 4*MEG)) {
876 prom_printf("Kernel size exceeds 4MB\r\n");
877 }
878
879 #ifdef DEBUG
880 if (pmapdebug & PDB_BOOT1) {
881 /* print out mem list */
882 prom_printf("Available %lx physical memory before cleanup:\r\n",
883 (u_long)avail);
884 for (mp = avail; mp->size; mp++) {
885 prom_printf("memlist start %lx size %lx\r\n",
886 (u_long)mp->start,
887 (u_long)mp->size);
888 }
889 prom_printf("End of available physical memory before cleanup\r\n");
890 prom_printf("kernel physical text size %08lx - %08lx\r\n",
891 (u_long)ktextp, (u_long)ektextp);
892 prom_printf("kernel physical data size %08lx - %08lx\r\n",
893 (u_long)kdatap, (u_long)ekdatap);
894 }
895 #endif
896 /*
897 * Here's a another quick in-lined bubble sort.
898 */
899 for (i = 0; i < pcnt; i++) {
900 for (j = i; j < pcnt; j++) {
901 if (avail[j].start < avail[i].start) {
902 struct mem_region tmp;
903 tmp = avail[i];
904 avail[i] = avail[j];
905 avail[j] = tmp;
906 }
907 }
908 }
909
910 /* Throw away page zero if we have it. */
911 if (avail->start == 0) {
912 avail->start += NBPG;
913 avail->size -= NBPG;
914 }
915 /*
916 * Now we need to remove the area we valloc'ed from the available
917 * memory lists. (NB: we may have already alloc'ed the entire space).
918 */
919 for (mp = avail; mp->size; mp++) {
920 /*
921 * Check whether this region holds all of the kernel.
922 */
923 s = mp->start + mp->size;
924 if (mp->start < kdatap && s > roundup(ekdatap, 4*MEG)) {
925 avail[pcnt].start = roundup(ekdatap, 4*MEG);
926 avail[pcnt++].size = s - kdatap;
927 mp->size = kdatap - mp->start;
928 }
929 /*
930 * Look whether this regions starts within the kernel.
931 */
932 if (mp->start >= kdatap &&
933 mp->start < roundup(ekdatap, 4*MEG)) {
934 s = ekdatap - mp->start;
935 if (mp->size > s)
936 mp->size -= s;
937 else
938 mp->size = 0;
939 mp->start = roundup(ekdatap, 4*MEG);
940 }
941 /*
942 * Now look whether this region ends within the kernel.
943 */
944 s = mp->start + mp->size;
945 if (s > kdatap && s < roundup(ekdatap, 4*MEG))
946 mp->size -= s - kdatap;
947 /*
948 * Now page align the start of the region.
949 */
950 s = mp->start % NBPG;
951 if (mp->size >= s) {
952 mp->size -= s;
953 mp->start += s;
954 }
955 /*
956 * And now align the size of the region.
957 */
958 mp->size -= mp->size % NBPG;
959 /*
960 * Check whether some memory is left here.
961 */
962 if (mp->size == 0) {
963 bcopy(mp + 1, mp,
964 (pcnt - (mp - avail)) * sizeof *mp);
965 pcnt--;
966 mp--;
967 continue;
968 }
969 s = mp->start;
970 sz = mp->size;
971 for (mp1 = avail; mp1 < mp; mp1++)
972 if (s < mp1->start)
973 break;
974 if (mp1 < mp) {
975 bcopy(mp1, mp1 + 1, (char *)mp - (char *)mp1);
976 mp1->start = s;
977 mp1->size = sz;
978 }
979 /*
980 * In future we should be able to specify both allocated
981 * and free.
982 */
983 uvm_page_physload(
984 atop(mp->start),
985 atop(mp->start+mp->size),
986 atop(mp->start),
987 atop(mp->start+mp->size), 0);
988 }
989
990 #if 0
991 /* finally, free up any space that valloc did not use */
992 prom_unmap_virt((vaddr_t)ekdata, roundup(ekdata, 4*MEG) - ekdata);
993 if (ekdatap < roundup(kdatap, 4*MEG))) {
994 uvm_page_physload(atop(ekdatap),
995 atop(roundup(ekdatap, (4*MEG))),
996 atop(ekdatap),
997 atop(roundup(ekdatap, (4*MEG))), 0);
998 }
999 #endif
1000
1001 #ifdef DEBUG
1002 if (pmapdebug & PDB_BOOT) {
1003 /* print out mem list */
1004 prom_printf("Available physical memory after cleanup:\r\n");
1005 for (mp = avail; mp->size; mp++) {
1006 prom_printf("avail start %lx size %lx\r\n",
1007 (long)mp->start, (long)mp->size);
1008 }
1009 prom_printf("End of available physical memory after cleanup\r\n");
1010 }
1011 #endif
1012 /*
1013 * Allocate and clear out pmap_kernel()->pm_segs[]
1014 */
1015 mtx_init(&pmap_kernel()->pm_mtx, IPL_VM);
1016 pmap_kernel()->pm_refs = 1;
1017 pmap_kernel()->pm_ctx = 0;
1018 {
1019 paddr_t newp;
1020
1021 do {
1022 pmap_get_page(&newp, NULL, pmap_kernel());
1023 } while (!newp); /* Throw away page zero */
1024 pmap_kernel()->pm_segs=(int64_t *)(u_long)newp;
1025 pmap_kernel()->pm_physaddr = newp;
1026 /* mark kernel context as busy */
1027 ((paddr_t*)ctxbusy)[0] = pmap_kernel()->pm_physaddr;
1028 }
1029 /*
1030 * finish filling out kernel pmap.
1031 */
1032
1033 BDPRINTF(PDB_BOOT, ("pmap_kernel()->pm_physaddr = %lx\r\n",
1034 (long)pmap_kernel()->pm_physaddr));
1035 /*
1036 * Tell pmap about our mesgbuf -- Hope this works already
1037 */
1038 #ifdef DEBUG
1039 BDPRINTF(PDB_BOOT1, ("Calling consinit()\r\n"));
1040 if (pmapdebug & PDB_BOOT1) consinit();
1041 BDPRINTF(PDB_BOOT1, ("Inserting mesgbuf into pmap_kernel()\r\n"));
1042 #endif
1043 /* it's not safe to call pmap_enter so we need to do this ourselves */
1044 va = (vaddr_t)msgbufp;
1045 prom_map_phys(phys_msgbuf, msgbufsiz, (vaddr_t)msgbufp, -1);
1046 while (msgbufsiz) {
1047 data = TSB_DATA(0 /* global */,
1048 PGSZ_8K,
1049 phys_msgbuf,
1050 1 /* priv */,
1051 1 /* Write */,
1052 1 /* Cacheable */,
1053 0 /* ALIAS -- Disable D$ */,
1054 1 /* valid */,
1055 0 /* IE */);
1056 pmap_enter_kpage(va, data);
1057 va += PAGE_SIZE;
1058 msgbufsiz -= PAGE_SIZE;
1059 phys_msgbuf += PAGE_SIZE;
1060 }
1061 BDPRINTF(PDB_BOOT1, ("Done inserting mesgbuf into pmap_kernel()\r\n"));
1062
1063 BDPRINTF(PDB_BOOT1, ("Inserting PROM mappings into pmap_kernel()\r\n"));
1064 data = (CPU_ISSUN4V ? SUN4V_TLB_EXEC : SUN4U_TLB_EXEC);
1065 for (i = 0; i < prom_map_size; i++) {
1066 if (prom_map[i].vstart && ((prom_map[i].vstart>>32) == 0)) {
1067 for (j = 0; j < prom_map[i].vsize; j += NBPG) {
1068 int k;
1069 uint64_t tte;
1070
1071 for (k = 0; page_size_map[k].mask; k++) {
1072 if (((prom_map[i].vstart |
1073 prom_map[i].tte) &
1074 page_size_map[k].mask) == 0 &&
1075 page_size_map[k].mask <
1076 prom_map[i].vsize)
1077 break;
1078 }
1079 /* Enter PROM map into pmap_kernel() */
1080 tte = prom_map[i].tte;
1081 if (CPU_ISSUN4V)
1082 tte &= ~SUN4V_TLB_SOFT_MASK;
1083 else
1084 tte &= ~(SUN4U_TLB_SOFT2_MASK |
1085 SUN4U_TLB_SOFT_MASK);
1086 pmap_enter_kpage(prom_map[i].vstart + j,
1087 (tte + j) | data | page_size_map[k].code);
1088 }
1089 }
1090 }
1091 BDPRINTF(PDB_BOOT1, ("Done inserting PROM mappings into pmap_kernel()\r\n"));
1092
1093 /*
1094 * Fix up start of kernel heap.
1095 */
1096 vmmap = (vaddr_t)roundup(ekdata, 4*MEG);
1097 /* Let's keep 1 page of redzone after the kernel */
1098 vmmap += NBPG;
1099 {
1100 extern vaddr_t u0[2];
1101 extern struct pcb *proc0paddr;
1102 extern void main(void);
1103 paddr_t pa;
1104
1105 /* Initialize all the pointers to u0 */
1106 u0[0] = vmmap;
1107 /* Allocate some VAs for u0 */
1108 u0[1] = vmmap + 2*USPACE;
1109
1110 BDPRINTF(PDB_BOOT1,
1111 ("Inserting stack 0 into pmap_kernel() at %p\r\n",
1112 vmmap));
1113
1114 while (vmmap < u0[1]) {
1115 int64_t data;
1116
1117 pmap_get_page(&pa, NULL, pmap_kernel());
1118 prom_map_phys(pa, NBPG, vmmap, -1);
1119 data = TSB_DATA(0 /* global */,
1120 PGSZ_8K,
1121 pa,
1122 1 /* priv */,
1123 1 /* Write */,
1124 1 /* Cacheable */,
1125 0 /* ALIAS -- Disable D$ */,
1126 1 /* valid */,
1127 0 /* IE */);
1128 pmap_enter_kpage(vmmap, data);
1129 vmmap += NBPG;
1130 }
1131 BDPRINTF(PDB_BOOT1,
1132 ("Done inserting stack 0 into pmap_kernel()\r\n"));
1133
1134 /* Now map in and initialize our cpu_info structure */
1135 #ifdef DIAGNOSTIC
1136 vmmap += NBPG; /* redzone -- XXXX do we need one? */
1137 #endif
1138 intstk = vmmap = roundup(vmmap, 64*KB);
1139 cpus = (struct cpu_info *)(intstk + CPUINFO_VA - INTSTACK);
1140
1141 BDPRINTF(PDB_BOOT1,
1142 ("Inserting cpu_info into pmap_kernel() at %p\r\n",
1143 cpus));
1144 /* Now map in all 8 pages of cpu_info */
1145 pa = cpu0paddr;
1146 prom_map_phys(pa, 64*KB, vmmap, -1);
1147 /*
1148 * Also map it in as the interrupt stack.
1149 * This lets the PROM see this if needed.
1150 *
1151 * XXXX locore.s does not flush these mappings
1152 * before installing the locked TTE.
1153 */
1154 prom_map_phys(pa, 64*KB, CPUINFO_VA, -1);
1155 for (i=0; i<8; i++) {
1156 int64_t data;
1157
1158 data = TSB_DATA(0 /* global */,
1159 PGSZ_8K,
1160 pa,
1161 1 /* priv */,
1162 1 /* Write */,
1163 1 /* Cacheable */,
1164 0 /* ALIAS -- Disable D$ */,
1165 1 /* valid */,
1166 0 /* IE */);
1167 pmap_enter_kpage(vmmap, data);
1168 vmmap += NBPG;
1169 pa += NBPG;
1170 }
1171 BDPRINTF(PDB_BOOT1, ("Initializing cpu_info\r\n"));
1172
1173 /* Initialize our cpu_info structure */
1174 bzero((void *)intstk, 8*NBPG);
1175 cpus->ci_self = cpus;
1176 cpus->ci_next = NULL; /* Redundant, I know. */
1177 cpus->ci_curproc = &proc0;
1178 cpus->ci_cpcb = (struct pcb *)u0[0]; /* Need better source */
1179 cpus->ci_cpcbpaddr = pseg_get(pmap_kernel(), u0[0]) &
1180 TLB_PA_MASK;
1181 cpus->ci_upaid = cpu_myid();
1182 cpus->ci_cpuid = 0;
1183 cpus->ci_flags = CPUF_RUNNING;
1184 cpus->ci_fpproc = NULL;
1185 cpus->ci_spinup = main; /* Call main when we're running. */
1186 cpus->ci_initstack = (void *)u0[1];
1187 cpus->ci_paddr = cpu0paddr;
1188 #ifdef SUN4V
1189 cpus->ci_mmfsa = cpu0paddr;
1190 #endif
1191 proc0paddr = cpus->ci_cpcb;
1192
1193 cpu0paddr += 64 * KB;
1194
1195 /* The rest will be done at CPU attach time. */
1196 BDPRINTF(PDB_BOOT1,
1197 ("Done inserting cpu_info into pmap_kernel()\r\n"));
1198 }
1199
1200 vmmap = (vaddr_t)reserve_dumppages((caddr_t)(u_long)vmmap);
1201 BDPRINTF(PDB_BOOT1, ("Finished pmap_bootstrap()\r\n"));
1202
1203 pmap_bootstrap_cpu(cpus->ci_paddr);
1204 }
1205
1206 void sun4u_bootstrap_cpu(paddr_t);
1207 void sun4v_bootstrap_cpu(paddr_t);
1208
1209 void
pmap_bootstrap_cpu(paddr_t intstack)1210 pmap_bootstrap_cpu(paddr_t intstack)
1211 {
1212 if (CPU_ISSUN4V)
1213 sun4v_bootstrap_cpu(intstack);
1214 else
1215 sun4u_bootstrap_cpu(intstack);
1216 }
1217
1218 extern void sun4u_set_tsbs(void);
1219
1220 void
sun4u_bootstrap_cpu(paddr_t intstack)1221 sun4u_bootstrap_cpu(paddr_t intstack)
1222 {
1223 u_int64_t data;
1224 paddr_t pa;
1225 vaddr_t va;
1226 int index;
1227 int impl;
1228
1229 impl = (getver() & VER_IMPL) >> VER_IMPL_SHIFT;
1230
1231 /*
1232 * Establish the 4MB locked mappings for kernel data and text.
1233 *
1234 * The text segment needs to be mapped into the DTLB too,
1235 * because of .rodata.
1236 */
1237
1238 index = 15; /* XXX */
1239 for (va = ktext, pa = ktextp; va < ektext; va += 4*MEG, pa += 4*MEG) {
1240 data = SUN4U_TSB_DATA(0, PGSZ_4M, pa, 1, 0, 1, 0, 1, 0);
1241 data |= SUN4U_TLB_L;
1242 prom_itlb_load(index, data, va);
1243 prom_dtlb_load(index, data, va);
1244 index--;
1245 }
1246
1247 for (va = kdata, pa = kdatap; va < ekdata; va += 4*MEG, pa += 4*MEG) {
1248 data = SUN4U_TSB_DATA(0, PGSZ_4M, pa, 1, 1, 1, 0, 1, 0);
1249 data |= SUN4U_TLB_L;
1250 prom_dtlb_load(index, data, va);
1251 index--;
1252 }
1253
1254 #ifdef MULTIPROCESSOR
1255 if (impl >= IMPL_OLYMPUS_C && impl <= IMPL_JUPITER) {
1256 /*
1257 * On SPARC64-VI and SPARC64-VII processors, the MMU is
1258 * shared between threads, so we can't establish a locked
1259 * mapping for the interrupt stack since the mappings would
1260 * conflict. Instead we stick the address in a scratch
1261 * register, like we do for sun4v.
1262 */
1263 pa = intstack + (CPUINFO_VA - INTSTACK);
1264 pa += offsetof(struct cpu_info, ci_self);
1265 va = ldxa(pa, ASI_PHYS_CACHED);
1266 stxa(0x00, ASI_SCRATCH, va);
1267
1268 if ((CPU_JUPITERID % 2) == 1)
1269 index--;
1270
1271 data = SUN4U_TSB_DATA(0, PGSZ_64K, intstack, 1, 1, 1, 0, 1, 0);
1272 data |= SUN4U_TLB_L;
1273 prom_dtlb_load(index, data, va - (CPUINFO_VA - INTSTACK));
1274
1275 sun4u_set_tsbs();
1276 return;
1277 }
1278 #endif
1279
1280 /*
1281 * Establish the 64KB locked mapping for the interrupt stack.
1282 */
1283
1284 data = SUN4U_TSB_DATA(0, PGSZ_64K, intstack, 1, 1, 1, 0, 1, 0);
1285 data |= SUN4U_TLB_L;
1286 prom_dtlb_load(index, data, INTSTACK);
1287
1288 sun4u_set_tsbs();
1289 }
1290
1291 void
sun4v_bootstrap_cpu(paddr_t intstack)1292 sun4v_bootstrap_cpu(paddr_t intstack)
1293 {
1294 #ifdef SUN4V
1295 u_int64_t data;
1296 paddr_t pa;
1297 vaddr_t va;
1298 int err;
1299
1300 /*
1301 * Establish the 4MB locked mappings for kernel data and text.
1302 *
1303 * The text segment needs to be mapped into the DTLB too,
1304 * because of .rodata.
1305 */
1306
1307 for (va = ktext, pa = ktextp; va < ektext; va += 4*MEG, pa += 4*MEG) {
1308 data = SUN4V_TSB_DATA(0, PGSZ_4M, pa, 1, 0, 1, 0, 1, 0);
1309 data |= SUN4V_TLB_X;
1310 err = hv_mmu_map_perm_addr(va, data, MAP_ITLB|MAP_DTLB);
1311 if (err != H_EOK)
1312 prom_printf("err: %d\r\n", err);
1313 }
1314
1315 for (va = kdata, pa = kdatap; va < ekdata; va += 4*MEG, pa += 4*MEG) {
1316 data = SUN4V_TSB_DATA(0, PGSZ_4M, pa, 1, 1, 1, 0, 1, 0);
1317 err = hv_mmu_map_perm_addr(va, data, MAP_DTLB);
1318 if (err != H_EOK)
1319 prom_printf("err: %d\r\n", err);
1320 }
1321
1322 #ifndef MULTIPROCESSOR
1323 /*
1324 * Establish the 64KB locked mapping for the interrupt stack.
1325 */
1326 data = SUN4V_TSB_DATA(0, PGSZ_64K, intstack, 1, 1, 1, 0, 1, 0);
1327 err = hv_mmu_map_perm_addr(INTSTACK, data, MAP_DTLB);
1328 if (err != H_EOK)
1329 prom_printf("err: %d\r\n", err);
1330 #else
1331 pa = intstack + (CPUINFO_VA - INTSTACK);
1332 pa += offsetof(struct cpu_info, ci_self);
1333 stxa(0x00, ASI_SCRATCHPAD, ldxa(pa, ASI_PHYS_CACHED));
1334 #endif
1335
1336 stxa(0x10, ASI_SCRATCHPAD, intstack + (CPUINFO_VA - INTSTACK));
1337
1338 err = hv_mmu_tsb_ctx0(1, (paddr_t)tsb_desc + kdatap - kdata);
1339 if (err != H_EOK)
1340 prom_printf("err: %d\r\n", err);
1341 err = hv_mmu_tsb_ctxnon0(1, (paddr_t)tsb_desc + kdatap - kdata);
1342 if (err != H_EOK)
1343 prom_printf("err: %d\r\n", err);
1344 #endif
1345 }
1346
1347 /*
1348 * Initialize anything else for pmap handling.
1349 * Called during uvm_init().
1350 */
1351 void
pmap_init(void)1352 pmap_init(void)
1353 {
1354 BDPRINTF(PDB_BOOT1, ("pmap_init()\r\n"));
1355 if (PAGE_SIZE != NBPG)
1356 panic("pmap_init: CLSIZE!=1");
1357
1358 /* Setup a pool for additional pvlist structures */
1359 pool_init(&pv_pool, sizeof(struct pv_entry), 0, IPL_VM, 0,
1360 "pv_entry", NULL);
1361 pool_init(&pmap_pool, sizeof(struct pmap), 0, IPL_NONE, 0,
1362 "pmappl", NULL);
1363 }
1364
1365 /* Start of non-cachable physical memory on UltraSPARC-III. */
1366 #define VM_MAXPHYS_ADDRESS ((vaddr_t)0x0000040000000000L)
1367
1368 static vaddr_t kbreak; /* End of kernel VA */
1369
1370 /*
1371 * How much virtual space is available to the kernel?
1372 */
1373 void
pmap_virtual_space(vaddr_t * start,vaddr_t * end)1374 pmap_virtual_space(vaddr_t *start, vaddr_t *end)
1375 {
1376 /*
1377 * Make sure virtual memory and physical memory don't overlap
1378 * to avoid problems with ASI_PHYS_CACHED on UltraSPARC-III.
1379 */
1380 if (vmmap < VM_MAXPHYS_ADDRESS)
1381 vmmap = VM_MAXPHYS_ADDRESS;
1382
1383 /* Reserve two pages for pmap_copy_page && /dev/mem */
1384 *start = kbreak = (vaddr_t)(vmmap + 2*NBPG);
1385 *end = VM_MAX_KERNEL_ADDRESS;
1386 BDPRINTF(PDB_BOOT1, ("pmap_virtual_space: %x-%x\r\n", *start, *end));
1387 }
1388
1389 /*
1390 * Preallocate kernel page tables to a specified VA.
1391 * This simply loops through the first TTE for each
1392 * page table from the beginning of the kernel pmap,
1393 * reads the entry, and if the result is
1394 * zero (either invalid entry or no page table) it stores
1395 * a zero there, populating page tables in the process.
1396 * This is not the most efficient technique but i don't
1397 * expect it to be called that often.
1398 */
1399 vaddr_t
pmap_growkernel(vaddr_t maxkvaddr)1400 pmap_growkernel(vaddr_t maxkvaddr)
1401 {
1402 paddr_t pg;
1403 struct pmap *pm = pmap_kernel();
1404
1405 if (maxkvaddr >= VM_MAX_KERNEL_ADDRESS) {
1406 printf("WARNING: cannot extend kernel pmap beyond %p to %p\n",
1407 (void *)VM_MAX_KERNEL_ADDRESS, (void *)maxkvaddr);
1408 return (kbreak);
1409 }
1410
1411 /* Align with the start of a page table */
1412 for (kbreak &= (-1<<PDSHIFT); kbreak < maxkvaddr;
1413 kbreak += (1<<PDSHIFT)) {
1414 if (pseg_get(pm, kbreak))
1415 continue;
1416
1417 pg = 0;
1418 while (pseg_set(pm, kbreak, 0, pg) == 1) {
1419 pg = 0;
1420 pmap_get_page(&pg, "growk", pm);
1421 }
1422
1423 }
1424
1425 return (kbreak);
1426 }
1427
1428 /*
1429 * Create and return a physical map.
1430 */
1431 struct pmap *
pmap_create(void)1432 pmap_create(void)
1433 {
1434 struct pmap *pm;
1435
1436 pm = pool_get(&pmap_pool, PR_WAITOK | PR_ZERO);
1437
1438 mtx_init(&pm->pm_mtx, IPL_VM);
1439 pm->pm_refs = 1;
1440 pmap_get_page(&pm->pm_physaddr, "pmap_create", pm);
1441 pm->pm_segs = (int64_t *)(u_long)pm->pm_physaddr;
1442 ctx_alloc(pm);
1443
1444 return (pm);
1445 }
1446
1447 /*
1448 * Add a reference to the given pmap.
1449 */
1450 void
pmap_reference(struct pmap * pm)1451 pmap_reference(struct pmap *pm)
1452 {
1453 atomic_inc_int(&pm->pm_refs);
1454 }
1455
1456 /*
1457 * Retire the given pmap from service.
1458 * Should only be called if the map contains no valid mappings.
1459 */
1460 void
pmap_destroy(struct pmap * pm)1461 pmap_destroy(struct pmap *pm)
1462 {
1463 if (atomic_dec_int_nv(&pm->pm_refs) == 0) {
1464 pmap_release(pm);
1465 pool_put(&pmap_pool, pm);
1466 }
1467 }
1468
1469 /*
1470 * Release any resources held by the given physical map.
1471 * Called when a pmap initialized by pmap_create is being released.
1472 */
1473 void
pmap_release(struct pmap * pm)1474 pmap_release(struct pmap *pm)
1475 {
1476 int i, j, k;
1477 paddr_t *pdir, *ptbl, tmp;
1478
1479 #ifdef DIAGNOSTIC
1480 if(pm == pmap_kernel())
1481 panic("pmap_release: releasing pmap_kernel()");
1482 #endif
1483
1484 mtx_enter(&pm->pm_mtx);
1485 for(i=0; i<STSZ; i++) {
1486 paddr_t psegentp = (paddr_t)(u_long)&pm->pm_segs[i];
1487 if((pdir = (paddr_t *)(u_long)ldxa((vaddr_t)psegentp,
1488 ASI_PHYS_CACHED))) {
1489 for (k=0; k<PDSZ; k++) {
1490 paddr_t pdirentp = (paddr_t)(u_long)&pdir[k];
1491 if ((ptbl = (paddr_t *)(u_long)ldxa(
1492 (vaddr_t)pdirentp, ASI_PHYS_CACHED))) {
1493 for (j=0; j<PTSZ; j++) {
1494 int64_t data;
1495 paddr_t pa;
1496 pv_entry_t pv;
1497
1498 data = ldxa((vaddr_t)&ptbl[j],
1499 ASI_PHYS_CACHED);
1500 if (!(data & TLB_V))
1501 continue;
1502 pa = data & TLB_PA_MASK;
1503 pv = pa_to_pvh(pa);
1504 if (pv != NULL) {
1505 printf("pmap_release: pm=%p page %llx still in use\n", pm,
1506 (unsigned long long)(((u_int64_t)i<<STSHIFT)|((u_int64_t)k<<PDSHIFT)|((u_int64_t)j<<PTSHIFT)));
1507 db_enter();
1508 }
1509 }
1510 stxa(pdirentp, ASI_PHYS_CACHED, 0);
1511 pmap_free_page((paddr_t)ptbl, pm);
1512 }
1513 }
1514 stxa(psegentp, ASI_PHYS_CACHED, 0);
1515 pmap_free_page((paddr_t)pdir, pm);
1516 }
1517 }
1518 tmp = (paddr_t)(u_long)pm->pm_segs;
1519 pm->pm_segs = NULL;
1520 pmap_free_page(tmp, pm);
1521 mtx_leave(&pm->pm_mtx);
1522 ctx_free(pm);
1523 }
1524
1525 /*
1526 * Garbage collects the physical map system for
1527 * pages which are no longer used.
1528 * Success need not be guaranteed -- that is, there
1529 * may well be pages which are not referenced, but
1530 * others may be collected.
1531 * Called by the pageout daemon when pages are scarce.
1532 */
1533 void
pmap_collect(struct pmap * pm)1534 pmap_collect(struct pmap *pm)
1535 {
1536 int i, j, k, n, m, s;
1537 paddr_t *pdir, *ptbl;
1538 /* This is a good place to scan the pmaps for page tables with
1539 * no valid mappings in them and free them. */
1540
1541 /* NEVER GARBAGE COLLECT THE KERNEL PMAP */
1542 if (pm == pmap_kernel())
1543 return;
1544
1545 s = splvm();
1546 for (i=0; i<STSZ; i++) {
1547 if ((pdir = (paddr_t *)(u_long)ldxa((vaddr_t)&pm->pm_segs[i], ASI_PHYS_CACHED))) {
1548 m = 0;
1549 for (k=0; k<PDSZ; k++) {
1550 if ((ptbl = (paddr_t *)(u_long)ldxa((vaddr_t)&pdir[k], ASI_PHYS_CACHED))) {
1551 m++;
1552 n = 0;
1553 for (j=0; j<PTSZ; j++) {
1554 int64_t data = ldxa((vaddr_t)&ptbl[j], ASI_PHYS_CACHED);
1555 if (data&TLB_V)
1556 n++;
1557 }
1558 if (!n) {
1559 /* Free the damn thing */
1560 stxa((paddr_t)(u_long)&pdir[k], ASI_PHYS_CACHED, 0);
1561 pmap_free_page((paddr_t)ptbl, pm);
1562 }
1563 }
1564 }
1565 if (!m) {
1566 /* Free the damn thing */
1567 stxa((paddr_t)(u_long)&pm->pm_segs[i], ASI_PHYS_CACHED, 0);
1568 pmap_free_page((paddr_t)pdir, pm);
1569 }
1570 }
1571 }
1572 splx(s);
1573 }
1574
1575 void
pmap_zero_page(struct vm_page * pg)1576 pmap_zero_page(struct vm_page *pg)
1577 {
1578 pmap_zero_phys(VM_PAGE_TO_PHYS(pg));
1579 }
1580
1581 void
pmap_copy_page(struct vm_page * srcpg,struct vm_page * dstpg)1582 pmap_copy_page(struct vm_page *srcpg, struct vm_page *dstpg)
1583 {
1584 paddr_t src = VM_PAGE_TO_PHYS(srcpg);
1585 paddr_t dst = VM_PAGE_TO_PHYS(dstpg);
1586
1587 pmap_copy_phys(src, dst);
1588 }
1589
1590 /*
1591 * Activate the address space for the specified process. If the
1592 * process is the current process, load the new MMU context.
1593 */
1594 void
pmap_activate(struct proc * p)1595 pmap_activate(struct proc *p)
1596 {
1597 struct pmap *pmap = p->p_vmspace->vm_map.pmap;
1598 int s;
1599
1600 /*
1601 * This is essentially the same thing that happens in cpu_switch()
1602 * when the newly selected process is about to run, except that we
1603 * have to make sure to clean the register windows before we set
1604 * the new context.
1605 */
1606
1607 s = splvm();
1608 if (p == curproc) {
1609 write_user_windows();
1610 if (pmap->pm_ctx == 0)
1611 ctx_alloc(pmap);
1612 if (CPU_ISSUN4V)
1613 stxa(CTX_SECONDARY, ASI_MMU_CONTEXTID, pmap->pm_ctx);
1614 else
1615 stxa(CTX_SECONDARY, ASI_DMMU, pmap->pm_ctx);
1616 }
1617 splx(s);
1618 }
1619
1620 /*
1621 * Deactivate the address space of the specified process.
1622 */
1623 void
pmap_deactivate(struct proc * p)1624 pmap_deactivate(struct proc *p)
1625 {
1626 }
1627
1628 /*
1629 * pmap_kenter_pa: [ INTERFACE ]
1630 *
1631 * Enter a va -> pa mapping into the kernel pmap without any
1632 * physical->virtual tracking.
1633 *
1634 * Note: no locking is necessary in this function.
1635 */
1636 void
pmap_kenter_pa(vaddr_t va,paddr_t pa,vm_prot_t prot)1637 pmap_kenter_pa(vaddr_t va, paddr_t pa, vm_prot_t prot)
1638 {
1639 struct pmap *pm = pmap_kernel();
1640 pte_t tte;
1641
1642 KDASSERT(va < INTSTACK || va > EINTSTACK);
1643 KDASSERT(va < kdata || va > ekdata);
1644
1645 #ifdef DIAGNOSTIC
1646 if (pa & (PMAP_NVC|PMAP_NC|PMAP_LITTLE))
1647 panic("%s: illegal cache flags 0x%lx", __func__, pa);
1648 #endif
1649
1650 /*
1651 * Construct the TTE.
1652 */
1653 tte.tag = TSB_TAG(0, pm->pm_ctx,va);
1654 if (CPU_ISSUN4V) {
1655 tte.data = SUN4V_TSB_DATA(0, PGSZ_8K, pa, 1 /* Privileged */,
1656 (PROT_WRITE & prot), 1, 0, 1, 0);
1657 /*
1658 * We don't track modification on kenter mappings.
1659 */
1660 if (prot & PROT_WRITE)
1661 tte.data |= SUN4V_TLB_REAL_W|SUN4V_TLB_W;
1662 if (prot & PROT_EXEC)
1663 tte.data |= SUN4V_TLB_EXEC;
1664 tte.data |= SUN4V_TLB_TSB_LOCK; /* wired */
1665 } else {
1666 tte.data = SUN4U_TSB_DATA(0, PGSZ_8K, pa, 1 /* Privileged */,
1667 (PROT_WRITE & prot), 1, 0, 1, 0);
1668 /*
1669 * We don't track modification on kenter mappings.
1670 */
1671 if (prot & PROT_WRITE)
1672 tte.data |= SUN4U_TLB_REAL_W|SUN4U_TLB_W;
1673 if (prot & PROT_EXEC)
1674 tte.data |= SUN4U_TLB_EXEC;
1675 if (prot == PROT_EXEC)
1676 tte.data |= SUN4U_TLB_EXEC_ONLY;
1677 tte.data |= SUN4U_TLB_TSB_LOCK; /* wired */
1678 }
1679 KDASSERT((tte.data & TLB_NFO) == 0);
1680
1681 /* Kernel page tables are pre-allocated. */
1682 if (pseg_set(pm, va, tte.data, 0) != 0)
1683 panic("%s: no pseg", __func__);
1684
1685 /* this is correct */
1686 dcache_flush_page(pa);
1687 }
1688
1689 /*
1690 * pmap_kremove: [ INTERFACE ]
1691 *
1692 * Remove a mapping entered with pmap_kenter_pa() starting at va,
1693 * for size bytes (assumed to be page rounded).
1694 */
1695 void
pmap_kremove(vaddr_t va,vsize_t size)1696 pmap_kremove(vaddr_t va, vsize_t size)
1697 {
1698 struct pmap *pm = pmap_kernel();
1699
1700 KDASSERT(va < INTSTACK || va > EINTSTACK);
1701 KDASSERT(va < kdata || va > ekdata);
1702
1703 while (size >= NBPG) {
1704 /*
1705 * Is this part of the permanent 4MB mapping?
1706 */
1707 #ifdef DIAGNOSTIC
1708 if (pm == pmap_kernel() &&
1709 (va >= ktext && va < roundup(ekdata, 4*MEG)))
1710 panic("%s: va=0x%lx in locked TLB", __func__, va);
1711 #endif
1712 /* Shouldn't need to do this if the entry's not valid. */
1713 if (pseg_get(pm, va)) {
1714 /* We need to flip the valid bit and clear the access statistics. */
1715 if (pseg_set(pm, va, 0, 0)) {
1716 printf("pmap_kremove: gotten pseg empty!\n");
1717 db_enter();
1718 /* panic? */
1719 }
1720
1721 tsb_invalidate(pm->pm_ctx, va);
1722 /* Here we assume nothing can get into the TLB unless it has a PTE */
1723 tlb_flush_pte(va, pm->pm_ctx);
1724 }
1725 va += NBPG;
1726 size -= NBPG;
1727 }
1728 }
1729
1730 /*
1731 * Insert physical page at pa into the given pmap at virtual address va.
1732 * Supports 64-bit pa so we can map I/O space.
1733 */
1734 int
pmap_enter(struct pmap * pm,vaddr_t va,paddr_t pa,vm_prot_t prot,int flags)1735 pmap_enter(struct pmap *pm, vaddr_t va, paddr_t pa, vm_prot_t prot, int flags)
1736 {
1737 pte_t tte;
1738 paddr_t pg;
1739 int aliased = 0;
1740 pv_entry_t pv, npv;
1741 int size = 0; /* PMAP_SZ_TO_TTE(pa); */
1742 boolean_t wired = (flags & PMAP_WIRED) != 0;
1743
1744 /*
1745 * Is this part of the permanent mappings?
1746 */
1747 KDASSERT(pm != pmap_kernel() || va < INTSTACK || va > EINTSTACK);
1748 KDASSERT(pm != pmap_kernel() || va < kdata || va > ekdata);
1749
1750 npv = pool_get(&pv_pool, PR_NOWAIT);
1751 if (npv == NULL && (flags & PMAP_CANFAIL))
1752 return (ENOMEM);
1753
1754 /*
1755 * XXXX If a mapping at this address already exists, remove it.
1756 */
1757 mtx_enter(&pm->pm_mtx);
1758 tte.data = pseg_get(pm, va);
1759 if (tte.data & TLB_V) {
1760 mtx_leave(&pm->pm_mtx);
1761 pmap_remove(pm, va, va + NBPG-1);
1762 mtx_enter(&pm->pm_mtx);
1763 tte.data = pseg_get(pm, va);
1764 }
1765
1766 /*
1767 * Construct the TTE.
1768 */
1769 pv = pa_to_pvh(pa);
1770 if (pv != NULL) {
1771 struct vm_page *pg = PHYS_TO_VM_PAGE(pa);
1772
1773 mtx_enter(&pg->mdpage.pvmtx);
1774 aliased = (pv->pv_va & PV_ALIAS);
1775 #ifdef DIAGNOSTIC
1776 if ((flags & PROT_MASK) & ~prot)
1777 panic("pmap_enter: access_type exceeds prot");
1778 #endif
1779 /* If we don't have the traphandler do it, set the ref/mod bits now */
1780 if (flags & PROT_MASK)
1781 pv->pv_va |= PV_REF;
1782 if (flags & PROT_WRITE)
1783 pv->pv_va |= PV_MOD;
1784 pv->pv_va |= pmap_tte2flags(tte.data);
1785 mtx_leave(&pg->mdpage.pvmtx);
1786 } else {
1787 aliased = 0;
1788 }
1789 if (pa & PMAP_NVC)
1790 aliased = 1;
1791 if (CPU_ISSUN4V) {
1792 tte.data = SUN4V_TSB_DATA(0, size, pa, pm == pmap_kernel(),
1793 (flags & PROT_WRITE), (!(pa & PMAP_NC)),
1794 aliased, 1, (pa & PMAP_LITTLE));
1795 if (prot & PROT_WRITE)
1796 tte.data |= SUN4V_TLB_REAL_W;
1797 if (prot & PROT_EXEC)
1798 tte.data |= SUN4V_TLB_EXEC;
1799 if (wired)
1800 tte.data |= SUN4V_TLB_TSB_LOCK;
1801 } else {
1802 tte.data = SUN4U_TSB_DATA(0, size, pa, pm == pmap_kernel(),
1803 (flags & PROT_WRITE), (!(pa & PMAP_NC)),
1804 aliased, 1, (pa & PMAP_LITTLE));
1805 if (prot & PROT_WRITE)
1806 tte.data |= SUN4U_TLB_REAL_W;
1807 if (prot & PROT_EXEC)
1808 tte.data |= SUN4U_TLB_EXEC;
1809 if (prot == PROT_EXEC)
1810 tte.data |= SUN4U_TLB_EXEC_ONLY;
1811 if (wired)
1812 tte.data |= SUN4U_TLB_TSB_LOCK;
1813 }
1814 KDASSERT((tte.data & TLB_NFO) == 0);
1815
1816 pg = 0;
1817 while (pseg_set(pm, va, tte.data, pg) == 1) {
1818 pg = 0;
1819 if (!pmap_get_page(&pg, NULL, pm)) {
1820 if ((flags & PMAP_CANFAIL) == 0)
1821 panic("pmap_enter: no memory");
1822 mtx_leave(&pm->pm_mtx);
1823 if (npv != NULL)
1824 pool_put(&pv_pool, npv);
1825 return (ENOMEM);
1826 }
1827 }
1828
1829 if (pv != NULL)
1830 npv = pmap_enter_pv(pm, npv, va, pa);
1831 atomic_inc_long(&pm->pm_stats.resident_count);
1832 mtx_leave(&pm->pm_mtx);
1833 if (pm->pm_ctx || pm == pmap_kernel()) {
1834 tsb_invalidate(pm->pm_ctx, va);
1835
1836 /* Force reload -- protections may be changed */
1837 tlb_flush_pte(va, pm->pm_ctx);
1838 }
1839 /* this is correct */
1840 dcache_flush_page(pa);
1841
1842 if (npv != NULL)
1843 pool_put(&pv_pool, npv);
1844
1845 /* We will let the fast mmu miss interrupt load the new translation */
1846 return 0;
1847 }
1848
1849 /*
1850 * Remove the given range of mapping entries.
1851 */
1852 void
pmap_remove(struct pmap * pm,vaddr_t va,vaddr_t endva)1853 pmap_remove(struct pmap *pm, vaddr_t va, vaddr_t endva)
1854 {
1855 pv_entry_t pv, freepvs = NULL;
1856 int flush = 0;
1857 int64_t data;
1858 vaddr_t flushva = va;
1859
1860 /*
1861 * In here we should check each pseg and if there are no more entries,
1862 * free it. It's just that linear scans of 8K pages gets expensive.
1863 */
1864
1865 KDASSERT(pm != pmap_kernel() || endva < INTSTACK || va > EINTSTACK);
1866 KDASSERT(pm != pmap_kernel() || endva < kdata || va > ekdata);
1867
1868 mtx_enter(&pm->pm_mtx);
1869
1870 /* Now do the real work */
1871 while (va < endva) {
1872 /*
1873 * Is this part of the permanent 4MB mapping?
1874 */
1875 #ifdef DIAGNOSTIC
1876 if (pm == pmap_kernel() && va >= ktext &&
1877 va < roundup(ekdata, 4*MEG))
1878 panic("pmap_remove: va=%08x in locked TLB", (u_int)va);
1879 #endif
1880 /* We don't really need to do this if the valid bit is not set... */
1881 if ((data = pseg_get(pm, va)) && (data & TLB_V) != 0) {
1882 paddr_t entry;
1883
1884 flush |= 1;
1885 /* First remove it from the pv_table */
1886 entry = (data & TLB_PA_MASK);
1887 pv = pa_to_pvh(entry);
1888 if (pv != NULL) {
1889 pv = pmap_remove_pv(pm, va, entry);
1890 if (pv != NULL) {
1891 pv->pv_next = freepvs;
1892 freepvs = pv;
1893 }
1894 }
1895 /* We need to flip the valid bit and clear the access statistics. */
1896 if (pseg_set(pm, va, 0, 0)) {
1897 printf("pmap_remove: gotten pseg empty!\n");
1898 db_enter();
1899 /* panic? */
1900 }
1901 atomic_dec_long(&pm->pm_stats.resident_count);
1902 if (!pm->pm_ctx && pm != pmap_kernel())
1903 continue;
1904 tsb_invalidate(pm->pm_ctx, va);
1905 /* Here we assume nothing can get into the TLB unless it has a PTE */
1906 tlb_flush_pte(va, pm->pm_ctx);
1907 }
1908 va += NBPG;
1909 }
1910
1911 mtx_leave(&pm->pm_mtx);
1912
1913 while ((pv = freepvs) != NULL) {
1914 freepvs = pv->pv_next;
1915 pool_put(&pv_pool, pv);
1916 }
1917
1918 if (flush)
1919 cache_flush_virt(flushva, endva - flushva);
1920 }
1921
1922 /*
1923 * Change the protection on the specified range of this pmap.
1924 */
1925 void
pmap_protect(struct pmap * pm,vaddr_t sva,vaddr_t eva,vm_prot_t prot)1926 pmap_protect(struct pmap *pm, vaddr_t sva, vaddr_t eva, vm_prot_t prot)
1927 {
1928 paddr_t pa;
1929 pv_entry_t pv;
1930 int64_t data;
1931
1932 KDASSERT(pm != pmap_kernel() || eva < INTSTACK || sva > EINTSTACK);
1933 KDASSERT(pm != pmap_kernel() || eva < kdata || sva > ekdata);
1934
1935 if ((prot & (PROT_WRITE | PROT_EXEC)) == (PROT_WRITE | PROT_EXEC))
1936 return;
1937
1938 if (prot == PROT_NONE) {
1939 pmap_remove(pm, sva, eva);
1940 return;
1941 }
1942
1943 mtx_enter(&pm->pm_mtx);
1944 sva = sva & ~PGOFSET;
1945 while (sva < eva) {
1946 /*
1947 * Is this part of the permanent 4MB mapping?
1948 */
1949 if (pm == pmap_kernel() && sva >= ktext &&
1950 sva < roundup(ekdata, 4*MEG)) {
1951 prom_printf("pmap_protect: va=%08x in locked TLB\r\n", sva);
1952 OF_enter();
1953 mtx_leave(&pm->pm_mtx);
1954 return;
1955 }
1956
1957 if (((data = pseg_get(pm, sva))&TLB_V) /*&& ((data&TLB_TSB_LOCK) == 0)*/) {
1958 pa = data & TLB_PA_MASK;
1959 pv = pa_to_pvh(pa);
1960 if (pv != NULL) {
1961 struct vm_page *pg = PHYS_TO_VM_PAGE(pa);
1962
1963 /* Save REF/MOD info */
1964 mtx_enter(&pg->mdpage.pvmtx);
1965 pv->pv_va |= pmap_tte2flags(data);
1966 mtx_leave(&pg->mdpage.pvmtx);
1967 }
1968 /* Just do the pmap and TSB, not the pv_list */
1969 if (CPU_ISSUN4V) {
1970 if ((prot & PROT_WRITE) == 0)
1971 data &= ~(SUN4V_TLB_W|SUN4V_TLB_REAL_W);
1972 if ((prot & PROT_EXEC) == 0)
1973 data &= ~(SUN4V_TLB_EXEC);
1974 } else {
1975 if ((prot & PROT_WRITE) == 0)
1976 data &= ~(SUN4U_TLB_W|SUN4U_TLB_REAL_W);
1977 if ((prot & PROT_EXEC) == 0)
1978 data &= ~(SUN4U_TLB_EXEC | SUN4U_TLB_EXEC_ONLY);
1979 }
1980 KDASSERT((data & TLB_NFO) == 0);
1981 if (pseg_set(pm, sva, data, 0)) {
1982 printf("pmap_protect: gotten pseg empty!\n");
1983 db_enter();
1984 /* panic? */
1985 }
1986
1987 if (!pm->pm_ctx && pm != pmap_kernel())
1988 continue;
1989 tsb_invalidate(pm->pm_ctx, sva);
1990 tlb_flush_pte(sva, pm->pm_ctx);
1991 }
1992 sva += NBPG;
1993 }
1994 mtx_leave(&pm->pm_mtx);
1995 }
1996
1997 /*
1998 * Extract the physical page address associated
1999 * with the given map/virtual_address pair.
2000 */
2001 boolean_t
pmap_extract(struct pmap * pm,vaddr_t va,paddr_t * pap)2002 pmap_extract(struct pmap *pm, vaddr_t va, paddr_t *pap)
2003 {
2004 paddr_t pa;
2005
2006 if (pm == pmap_kernel()) {
2007 if (va >= kdata && va < roundup(ekdata, 4*MEG)) {
2008 /* Need to deal w/locked TLB entry specially. */
2009 pa = (paddr_t)(kdatap - kdata + va);
2010 } else if (va >= ktext && va < ektext) {
2011 /* Need to deal w/locked TLB entry specially. */
2012 pa = (paddr_t)(ktextp - ktext + va);
2013 } else if (va >= INTSTACK && va < EINTSTACK) {
2014 pa = curcpu()->ci_paddr + va - INTSTACK;
2015 } else {
2016 goto check_pseg;
2017 }
2018 } else {
2019 check_pseg:
2020 mtx_enter(&pm->pm_mtx);
2021 pa = pseg_get(pm, va) & TLB_PA_MASK;
2022 mtx_leave(&pm->pm_mtx);
2023 if (pa == 0)
2024 return FALSE;
2025 pa |= va & PAGE_MASK;
2026 }
2027 if (pap != NULL)
2028 *pap = pa;
2029 return TRUE;
2030 }
2031
2032 /*
2033 * Return the number bytes that pmap_dumpmmu() will dump.
2034 */
2035 int
pmap_dumpsize(void)2036 pmap_dumpsize(void)
2037 {
2038 int sz;
2039
2040 sz = ALIGN(sizeof(kcore_seg_t)) + ALIGN(sizeof(cpu_kcore_hdr_t));
2041 sz += memsize * sizeof(phys_ram_seg_t);
2042
2043 return btodb(sz + DEV_BSIZE - 1);
2044 }
2045
2046 /*
2047 * Write the mmu contents to the dump device.
2048 * This gets appended to the end of a crash dump since
2049 * there is no in-core copy of kernel memory mappings on a 4/4c machine.
2050 *
2051 * Write the core dump headers and MD data to the dump device.
2052 * We dump the following items:
2053 *
2054 * kcore_seg_t MI header defined in <sys/kcore.h>)
2055 * cpu_kcore_hdr_t MD header defined in <machine/kcore.h>)
2056 * phys_ram_seg_t[memsize] physical memory segments
2057 */
2058 int
pmap_dumpmmu(int (* dump)(dev_t,daddr_t,caddr_t,size_t),daddr_t blkno)2059 pmap_dumpmmu(int (*dump)(dev_t, daddr_t, caddr_t, size_t), daddr_t blkno)
2060 {
2061 kcore_seg_t *kseg;
2062 cpu_kcore_hdr_t *kcpu;
2063 phys_ram_seg_t memseg;
2064 register int error = 0;
2065 register int i, memsegoffset;
2066 int buffer[dbtob(1) / sizeof(int)];
2067 int *bp, *ep;
2068
2069 #define EXPEDITE(p,n) do { \
2070 int *sp = (int *)(p); \
2071 int sz = (n); \
2072 while (sz > 0) { \
2073 *bp++ = *sp++; \
2074 if (bp >= ep) { \
2075 error = (*dump)(dumpdev, blkno, \
2076 (caddr_t)buffer, dbtob(1)); \
2077 if (error != 0) \
2078 return (error); \
2079 ++blkno; \
2080 bp = buffer; \
2081 } \
2082 sz -= 4; \
2083 } \
2084 } while (0)
2085
2086 /* Setup bookkeeping pointers */
2087 bp = buffer;
2088 ep = &buffer[sizeof(buffer) / sizeof(buffer[0])];
2089
2090 /* Fill in MI segment header */
2091 kseg = (kcore_seg_t *)bp;
2092 CORE_SETMAGIC(*kseg, KCORE_MAGIC, MID_MACHINE, CORE_CPU);
2093 kseg->c_size = dbtob(pmap_dumpsize()) - ALIGN(sizeof(kcore_seg_t));
2094
2095 /* Fill in MD segment header (interpreted by MD part of libkvm) */
2096 kcpu = (cpu_kcore_hdr_t *)((long)bp + ALIGN(sizeof(kcore_seg_t)));
2097 kcpu->cputype = CPU_SUN4U;
2098 kcpu->kernbase = (u_int64_t)KERNBASE;
2099 kcpu->cpubase = (u_int64_t)CPUINFO_VA;
2100
2101 /* Describe the locked text segment */
2102 kcpu->ktextbase = (u_int64_t)ktext;
2103 kcpu->ktextp = (u_int64_t)ktextp;
2104 kcpu->ktextsz = (u_int64_t)(roundup(ektextp, 4*MEG) - ktextp);
2105
2106 /* Describe locked data segment */
2107 kcpu->kdatabase = (u_int64_t)kdata;
2108 kcpu->kdatap = (u_int64_t)kdatap;
2109 kcpu->kdatasz = (u_int64_t)(roundup(ekdatap, 4*MEG) - kdatap);
2110
2111 /* Now the memsegs */
2112 kcpu->nmemseg = memsize;
2113 kcpu->memsegoffset = memsegoffset = ALIGN(sizeof(cpu_kcore_hdr_t));
2114
2115 /* Now we need to point this at our kernel pmap. */
2116 kcpu->nsegmap = STSZ;
2117 kcpu->segmapoffset = (u_int64_t)pmap_kernel()->pm_physaddr;
2118
2119 /* Note: we have assumed everything fits in buffer[] so far... */
2120 bp = (int *)((long)kcpu + ALIGN(sizeof(cpu_kcore_hdr_t)));
2121
2122 for (i = 0; i < memsize; i++) {
2123 memseg.start = mem[i].start;
2124 memseg.size = mem[i].size;
2125 EXPEDITE(&memseg, sizeof(phys_ram_seg_t));
2126 }
2127
2128 if (bp != buffer)
2129 error = (*dump)(dumpdev, blkno++, (caddr_t)buffer, dbtob(1));
2130
2131 return (error);
2132 }
2133
2134 /*
2135 * Determine (non)existence of physical page
2136 */
2137 int
pmap_pa_exists(paddr_t pa)2138 pmap_pa_exists(paddr_t pa)
2139 {
2140 struct mem_region *mp;
2141
2142 /* Just go through physical memory list & see if we're there */
2143 for (mp = mem; mp->size && mp->start <= pa; mp++)
2144 if (mp->start <= pa && mp->start + mp->size >= pa)
2145 return 1;
2146 return 0;
2147 }
2148
2149 /*
2150 * Lookup the appropriate TSB entry.
2151 *
2152 * Here is the full official pseudo code:
2153 *
2154 */
2155
2156 #ifdef NOTYET
GenerateTSBPointer(int64 va,PointerType type,int64 TSBBase,Boolean split,int TSBSize)2157 int64 GenerateTSBPointer(
2158 int64 va, /* Missing VA */
2159 PointerType type, /* 8K_POINTER or 16K_POINTER */
2160 int64 TSBBase, /* TSB Register[63:13] << 13 */
2161 Boolean split, /* TSB Register[12] */
2162 int TSBSize) /* TSB Register[2:0] */
2163 {
2164 int64 vaPortion;
2165 int64 TSBBaseMask;
2166 int64 splitMask;
2167
2168 /* TSBBaseMask marks the bits from TSB Base Reg */
2169 TSBBaseMask = 0xffffffffffffe000 <<
2170 (split? (TSBsize + 1) : TSBsize);
2171
2172 /* Shift va towards lsb appropriately and */
2173 /* zero out the original va page offset */
2174 vaPortion = (va >> ((type == 8K_POINTER)? 9: 12)) &
2175 0xfffffffffffffff0;
2176
2177 if (split) {
2178 /* There's only one bit in question for split */
2179 splitMask = 1 << (13 + TSBsize);
2180 if (type == 8K_POINTER)
2181 /* Make sure we're in the lower half */
2182 vaPortion &= ~splitMask;
2183 else
2184 /* Make sure we're in the upper half */
2185 vaPortion |= splitMask;
2186 }
2187 return (TSBBase & TSBBaseMask) | (vaPortion & ~TSBBaseMask);
2188 }
2189 #endif
2190 /*
2191 * Of course, since we are not using a split TSB or variable page sizes,
2192 * we can optimize this a bit.
2193 *
2194 * The following only works for a unified 8K TSB. It will find the slot
2195 * for that particular va and return it. IT MAY BE FOR ANOTHER MAPPING!
2196 */
2197 int
ptelookup_va(vaddr_t va)2198 ptelookup_va(vaddr_t va)
2199 {
2200 long tsbptr;
2201 #define TSBBASEMASK (0xffffffffffffe000LL<<tsbsize)
2202
2203 tsbptr = (((va >> 9) & 0xfffffffffffffff0LL) & ~TSBBASEMASK );
2204 return (tsbptr/sizeof(pte_t));
2205 }
2206
2207 /*
2208 * Do whatever is needed to sync the MOD/REF flags
2209 */
2210
2211 boolean_t
pmap_clear_modify(struct vm_page * pg)2212 pmap_clear_modify(struct vm_page *pg)
2213 {
2214 paddr_t pa = VM_PAGE_TO_PHYS(pg);
2215 int changed = 0;
2216 pv_entry_t pv;
2217
2218 /* Clear all mappings */
2219 mtx_enter(&pg->mdpage.pvmtx);
2220 pv = pa_to_pvh(pa);
2221 if (pv->pv_va & PV_MOD) {
2222 changed |= 1;
2223 pv->pv_va &= ~PV_MOD;
2224 }
2225 if (pv->pv_pmap != NULL) {
2226 for (; pv; pv = pv->pv_next) {
2227 int64_t data;
2228
2229 /* First clear the mod bit in the PTE and make it R/O */
2230 data = pseg_get(pv->pv_pmap, pv->pv_va & PV_VAMASK);
2231
2232 /* Need to both clear the modify and write bits */
2233 if (CPU_ISSUN4V) {
2234 if (data & (SUN4V_TLB_MODIFY))
2235 changed |= 1;
2236 data &= ~(SUN4V_TLB_MODIFY|SUN4V_TLB_W);
2237 } else {
2238 if (data & (SUN4U_TLB_MODIFY))
2239 changed |= 1;
2240 data &= ~(SUN4U_TLB_MODIFY|SUN4U_TLB_W);
2241 }
2242 KDASSERT((data & TLB_NFO) == 0);
2243 if (pseg_set(pv->pv_pmap, pv->pv_va & PV_VAMASK, data, 0)) {
2244 printf("pmap_clear_modify: gotten pseg empty!\n");
2245 db_enter();
2246 /* panic? */
2247 }
2248 if (pv->pv_pmap->pm_ctx || pv->pv_pmap == pmap_kernel()) {
2249 tsb_invalidate(pv->pv_pmap->pm_ctx,
2250 (pv->pv_va & PV_VAMASK));
2251 tlb_flush_pte((pv->pv_va & PV_VAMASK),
2252 pv->pv_pmap->pm_ctx);
2253 }
2254 /* Then clear the mod bit in the pv */
2255 if (pv->pv_va & PV_MOD) {
2256 changed |= 1;
2257 pv->pv_va &= ~PV_MOD;
2258 }
2259 dcache_flush_page(pa);
2260 }
2261 }
2262 mtx_leave(&pg->mdpage.pvmtx);
2263
2264 return (changed);
2265 }
2266
2267 boolean_t
pmap_clear_reference(struct vm_page * pg)2268 pmap_clear_reference(struct vm_page *pg)
2269 {
2270 paddr_t pa = VM_PAGE_TO_PHYS(pg);
2271 int changed = 0;
2272 pv_entry_t pv;
2273
2274 /* Clear all references */
2275 mtx_enter(&pg->mdpage.pvmtx);
2276 pv = pa_to_pvh(pa);
2277 if (pv->pv_va & PV_REF) {
2278 changed = 1;
2279 pv->pv_va &= ~PV_REF;
2280 }
2281 if (pv->pv_pmap != NULL) {
2282 for (; pv; pv = pv->pv_next) {
2283 int64_t data;
2284
2285 data = pseg_get(pv->pv_pmap, pv->pv_va & PV_VAMASK);
2286 if (CPU_ISSUN4V) {
2287 if (data & SUN4V_TLB_ACCESS)
2288 changed = 1;
2289 data &= ~SUN4V_TLB_ACCESS;
2290 } else {
2291 if (data & SUN4U_TLB_ACCESS)
2292 changed = 1;
2293 data &= ~SUN4U_TLB_ACCESS;
2294 }
2295 KDASSERT((data & TLB_NFO) == 0);
2296 if (pseg_set(pv->pv_pmap, pv->pv_va & PV_VAMASK, data, 0)) {
2297 printf("pmap_clear_reference: gotten pseg empty!\n");
2298 db_enter();
2299 /* panic? */
2300 }
2301 if (pv->pv_pmap->pm_ctx || pv->pv_pmap == pmap_kernel()) {
2302 tsb_invalidate(pv->pv_pmap->pm_ctx,
2303 (pv->pv_va & PV_VAMASK));
2304 /*
2305 tlb_flush_pte(pv->pv_va & PV_VAMASK,
2306 pv->pv_pmap->pm_ctx);
2307 */
2308 }
2309 if (pv->pv_va & PV_REF) {
2310 changed = 1;
2311 pv->pv_va &= ~PV_REF;
2312 }
2313 }
2314 }
2315 /* Stupid here will take a cache hit even on unmapped pages 8^( */
2316 dcache_flush_page(VM_PAGE_TO_PHYS(pg));
2317 mtx_leave(&pg->mdpage.pvmtx);
2318
2319 return (changed);
2320 }
2321
2322 boolean_t
pmap_is_modified(struct vm_page * pg)2323 pmap_is_modified(struct vm_page *pg)
2324 {
2325 pv_entry_t pv, npv;
2326 int mod = 0;
2327
2328 /* Check if any mapping has been modified */
2329 mtx_enter(&pg->mdpage.pvmtx);
2330 pv = &pg->mdpage.pvent;
2331 if (pv->pv_va & PV_MOD)
2332 mod = 1;
2333 if (!mod && (pv->pv_pmap != NULL)) {
2334 for (npv = pv; mod == 0 && npv && npv->pv_pmap; npv = npv->pv_next) {
2335 int64_t data;
2336
2337 data = pseg_get(npv->pv_pmap, npv->pv_va & PV_VAMASK);
2338 if (pmap_tte2flags(data) & PV_MOD)
2339 mod = 1;
2340 /* Migrate modify info to head pv */
2341 if (npv->pv_va & PV_MOD) {
2342 mod = 1;
2343 npv->pv_va &= ~PV_MOD;
2344 }
2345 }
2346 }
2347 /* Save modify info */
2348 if (mod)
2349 pv->pv_va |= PV_MOD;
2350 mtx_leave(&pg->mdpage.pvmtx);
2351
2352 return (mod);
2353 }
2354
2355 boolean_t
pmap_is_referenced(struct vm_page * pg)2356 pmap_is_referenced(struct vm_page *pg)
2357 {
2358 pv_entry_t pv, npv;
2359 int ref = 0;
2360
2361 /* Check if any mapping has been referenced */
2362 mtx_enter(&pg->mdpage.pvmtx);
2363 pv = &pg->mdpage.pvent;
2364 if (pv->pv_va & PV_REF)
2365 ref = 1;
2366 if (!ref && (pv->pv_pmap != NULL)) {
2367 for (npv = pv; npv; npv = npv->pv_next) {
2368 int64_t data;
2369
2370 data = pseg_get(npv->pv_pmap, npv->pv_va & PV_VAMASK);
2371 if (pmap_tte2flags(data) & PV_REF)
2372 ref = 1;
2373 /* Migrate modify info to head pv */
2374 if (npv->pv_va & PV_REF) {
2375 ref = 1;
2376 npv->pv_va &= ~PV_REF;
2377 }
2378 }
2379 }
2380 /* Save ref info */
2381 if (ref)
2382 pv->pv_va |= PV_REF;
2383 mtx_leave(&pg->mdpage.pvmtx);
2384
2385 return (ref);
2386 }
2387
2388 /*
2389 * Routine: pmap_unwire
2390 * Function: Clear the wired attribute for a map/virtual-address
2391 * pair.
2392 * In/out conditions:
2393 * The mapping must already exist in the pmap.
2394 */
2395 void
pmap_unwire(struct pmap * pmap,vaddr_t va)2396 pmap_unwire(struct pmap *pmap, vaddr_t va)
2397 {
2398 int64_t data;
2399
2400 if (pmap == NULL)
2401 return;
2402
2403 /*
2404 * Is this part of the permanent 4MB mapping?
2405 */
2406 if (pmap == pmap_kernel() && va >= ktext &&
2407 va < roundup(ekdata, 4*MEG)) {
2408 prom_printf("pmap_unwire: va=%08x in locked TLB\r\n", va);
2409 OF_enter();
2410 return;
2411 }
2412 mtx_enter(&pmap->pm_mtx);
2413 data = pseg_get(pmap, va & PV_VAMASK);
2414
2415 if (CPU_ISSUN4V)
2416 data &= ~SUN4V_TLB_TSB_LOCK;
2417 else
2418 data &= ~SUN4U_TLB_TSB_LOCK;
2419
2420 if (pseg_set(pmap, va & PV_VAMASK, data, 0)) {
2421 printf("pmap_unwire: gotten pseg empty!\n");
2422 db_enter();
2423 /* panic? */
2424 }
2425 mtx_leave(&pmap->pm_mtx);
2426 }
2427
2428 /*
2429 * Lower the protection on the specified physical page.
2430 *
2431 * Never enable writing as it will break COW
2432 */
2433 void
pmap_page_protect(struct vm_page * pg,vm_prot_t prot)2434 pmap_page_protect(struct vm_page *pg, vm_prot_t prot)
2435 {
2436 paddr_t pa = VM_PAGE_TO_PHYS(pg);
2437 pv_entry_t pv;
2438 int64_t data, clear, set;
2439
2440 if (prot & PROT_WRITE)
2441 return;
2442
2443 if (prot & (PROT_READ | PROT_EXEC)) {
2444 /* copy_on_write */
2445
2446 set = TLB_V;
2447 if (CPU_ISSUN4V) {
2448 clear = SUN4V_TLB_REAL_W|SUN4V_TLB_W;
2449 if (PROT_EXEC & prot)
2450 set |= SUN4V_TLB_EXEC;
2451 else
2452 clear |= SUN4V_TLB_EXEC;
2453 } else {
2454 clear = SUN4U_TLB_REAL_W|SUN4U_TLB_W;
2455 if (PROT_EXEC & prot)
2456 set |= SUN4U_TLB_EXEC;
2457 else
2458 clear |= SUN4U_TLB_EXEC;
2459 if (PROT_EXEC == prot)
2460 set |= SUN4U_TLB_EXEC_ONLY;
2461 else
2462 clear |= SUN4U_TLB_EXEC_ONLY;
2463 }
2464
2465 pv = pa_to_pvh(pa);
2466 mtx_enter(&pg->mdpage.pvmtx);
2467 if (pv->pv_pmap != NULL) {
2468 for (; pv; pv = pv->pv_next) {
2469 data = pseg_get(pv->pv_pmap, pv->pv_va & PV_VAMASK);
2470
2471 /* Save REF/MOD info */
2472 pv->pv_va |= pmap_tte2flags(data);
2473
2474 data &= ~(clear);
2475 data |= (set);
2476 KDASSERT((data & TLB_NFO) == 0);
2477 if (pseg_set(pv->pv_pmap, pv->pv_va & PV_VAMASK, data, 0)) {
2478 printf("pmap_page_protect: gotten pseg empty!\n");
2479 db_enter();
2480 /* panic? */
2481 }
2482 if (pv->pv_pmap->pm_ctx || pv->pv_pmap == pmap_kernel()) {
2483 tsb_invalidate(pv->pv_pmap->pm_ctx,
2484 (pv->pv_va & PV_VAMASK));
2485 tlb_flush_pte(pv->pv_va & PV_VAMASK, pv->pv_pmap->pm_ctx);
2486 }
2487 }
2488 }
2489 mtx_leave(&pg->mdpage.pvmtx);
2490 } else {
2491 pv_entry_t firstpv;
2492 /* remove mappings */
2493
2494 firstpv = pa_to_pvh(pa);
2495 mtx_enter(&pg->mdpage.pvmtx);
2496
2497 /* First remove the entire list of continuation pv's*/
2498 while ((pv = firstpv->pv_next) != NULL) {
2499 data = pseg_get(pv->pv_pmap, pv->pv_va & PV_VAMASK);
2500
2501 /* Save REF/MOD info */
2502 firstpv->pv_va |= pmap_tte2flags(data);
2503
2504 /* Clear mapping */
2505 if (pseg_set(pv->pv_pmap, pv->pv_va & PV_VAMASK, 0, 0)) {
2506 printf("pmap_page_protect: gotten pseg empty!\n");
2507 db_enter();
2508 /* panic? */
2509 }
2510 if (pv->pv_pmap->pm_ctx || pv->pv_pmap == pmap_kernel()) {
2511 tsb_invalidate(pv->pv_pmap->pm_ctx,
2512 (pv->pv_va & PV_VAMASK));
2513 tlb_flush_pte(pv->pv_va & PV_VAMASK, pv->pv_pmap->pm_ctx);
2514 }
2515 atomic_dec_long(&pv->pv_pmap->pm_stats.resident_count);
2516
2517 /* free the pv */
2518 firstpv->pv_next = pv->pv_next;
2519 mtx_leave(&pg->mdpage.pvmtx);
2520 pool_put(&pv_pool, pv);
2521 mtx_enter(&pg->mdpage.pvmtx);
2522 }
2523
2524 pv = firstpv;
2525
2526 /* Then remove the primary pv */
2527 if (pv->pv_pmap != NULL) {
2528 data = pseg_get(pv->pv_pmap, pv->pv_va & PV_VAMASK);
2529
2530 /* Save REF/MOD info */
2531 pv->pv_va |= pmap_tte2flags(data);
2532 if (pseg_set(pv->pv_pmap, pv->pv_va & PV_VAMASK, 0, 0)) {
2533 printf("pmap_page_protect: gotten pseg empty!\n");
2534 db_enter();
2535 /* panic? */
2536 }
2537 if (pv->pv_pmap->pm_ctx || pv->pv_pmap == pmap_kernel()) {
2538 tsb_invalidate(pv->pv_pmap->pm_ctx,
2539 (pv->pv_va & PV_VAMASK));
2540 tlb_flush_pte(pv->pv_va & PV_VAMASK,
2541 pv->pv_pmap->pm_ctx);
2542 }
2543 atomic_dec_long(&pv->pv_pmap->pm_stats.resident_count);
2544
2545 KASSERT(pv->pv_next == NULL);
2546 /* dump the first pv */
2547 pv->pv_pmap = NULL;
2548 }
2549 dcache_flush_page(pa);
2550 mtx_leave(&pg->mdpage.pvmtx);
2551 }
2552 /* We should really only flush the pages we demapped. */
2553 }
2554
2555 /*
2556 * Allocate a context. If necessary, steal one from someone else.
2557 * Changes hardware context number and loads segment map.
2558 *
2559 * This routine is only ever called from locore.s just after it has
2560 * saved away the previous process, so there are no active user windows.
2561 *
2562 * The new context is flushed from the TLB before returning.
2563 */
2564 int
ctx_alloc(struct pmap * pm)2565 ctx_alloc(struct pmap *pm)
2566 {
2567 int s, cnum;
2568 static int next = 0;
2569
2570 if (pm == pmap_kernel()) {
2571 #ifdef DIAGNOSTIC
2572 printf("ctx_alloc: kernel pmap!\n");
2573 #endif
2574 return (0);
2575 }
2576 s = splvm();
2577 cnum = next;
2578 do {
2579 /*
2580 * We use the last context as an "invalid" context in
2581 * TSB tags. Never allocate (or bad things will happen).
2582 */
2583 if (cnum >= numctx - 2)
2584 cnum = 0;
2585 } while (ctxbusy[++cnum] != 0 && cnum != next);
2586 if (cnum==0) cnum++; /* Never steal ctx 0 */
2587 if (ctxbusy[cnum]) {
2588 int i;
2589 /* We gotta steal this context */
2590 for (i = 0; i < TSBENTS; i++) {
2591 if (TSB_TAG_CTX(tsb_dmmu[i].tag) == cnum)
2592 tsb_dmmu[i].tag = TSB_TAG_INVALID;
2593 if (TSB_TAG_CTX(tsb_immu[i].tag) == cnum)
2594 tsb_immu[i].tag = TSB_TAG_INVALID;
2595 }
2596 tlb_flush_ctx(cnum);
2597 }
2598 ctxbusy[cnum] = pm->pm_physaddr;
2599 next = cnum;
2600 splx(s);
2601 pm->pm_ctx = cnum;
2602 return cnum;
2603 }
2604
2605 /*
2606 * Give away a context.
2607 */
2608 void
ctx_free(struct pmap * pm)2609 ctx_free(struct pmap *pm)
2610 {
2611 int oldctx;
2612
2613 oldctx = pm->pm_ctx;
2614
2615 if (oldctx == 0)
2616 panic("ctx_free: freeing kernel context");
2617 #ifdef DIAGNOSTIC
2618 if (ctxbusy[oldctx] == 0)
2619 printf("ctx_free: freeing free context %d\n", oldctx);
2620 if (ctxbusy[oldctx] != pm->pm_physaddr) {
2621 printf("ctx_free: freeing someone else's context\n "
2622 "ctxbusy[%d] = %p, pm(%p)->pm_ctx = %p\n",
2623 oldctx, (void *)(u_long)ctxbusy[oldctx], pm,
2624 (void *)(u_long)pm->pm_physaddr);
2625 db_enter();
2626 }
2627 #endif
2628 /* We should verify it has not been stolen and reallocated... */
2629 ctxbusy[oldctx] = 0;
2630 }
2631
2632 /*
2633 * Enter the pmap and virtual address into the
2634 * physical to virtual map table.
2635 */
2636 pv_entry_t
pmap_enter_pv(struct pmap * pmap,pv_entry_t npv,vaddr_t va,paddr_t pa)2637 pmap_enter_pv(struct pmap *pmap, pv_entry_t npv, vaddr_t va, paddr_t pa)
2638 {
2639 struct vm_page *pg = PHYS_TO_VM_PAGE(pa);
2640 pv_entry_t pv = &pg->mdpage.pvent;
2641
2642 mtx_enter(&pg->mdpage.pvmtx);
2643
2644 if (pv->pv_pmap == NULL) {
2645 /*
2646 * No entries yet, use header as the first entry
2647 */
2648 PV_SETVA(pv, va);
2649 pv->pv_pmap = pmap;
2650 pv->pv_next = NULL;
2651
2652 mtx_leave(&pg->mdpage.pvmtx);
2653 return (npv);
2654 }
2655
2656 if (npv == NULL)
2657 panic("%s: no pv entries available", __func__);
2658
2659 if (!(pv->pv_va & PV_ALIAS)) {
2660 /*
2661 * There is at least one other VA mapping this page.
2662 * Check if they are cache index compatible. If not
2663 * remove all mappings, flush the cache and set page
2664 * to be mapped uncached. Caching will be restored
2665 * when pages are mapped compatible again.
2666 */
2667 if ((pv->pv_va ^ va) & VA_ALIAS_MASK) {
2668 pv->pv_va |= PV_ALIAS;
2669 pmap_page_cache(pmap, pa, 0);
2670 }
2671 }
2672
2673 /*
2674 * There is at least one other VA mapping this page.
2675 * Place this entry after the header.
2676 */
2677 npv->pv_va = va & PV_VAMASK;
2678 npv->pv_pmap = pmap;
2679 npv->pv_next = pv->pv_next;
2680 pv->pv_next = npv;
2681
2682 mtx_leave(&pg->mdpage.pvmtx);
2683 return (NULL);
2684 }
2685
2686 /*
2687 * Remove a physical to virtual address translation.
2688 */
2689 pv_entry_t
pmap_remove_pv(struct pmap * pmap,vaddr_t va,paddr_t pa)2690 pmap_remove_pv(struct pmap *pmap, vaddr_t va, paddr_t pa)
2691 {
2692 pv_entry_t pv, opv, npv = NULL;
2693 struct vm_page *pg = PHYS_TO_VM_PAGE(pa);
2694 int64_t data = 0LL;
2695 int alias;
2696
2697 opv = pv = &pg->mdpage.pvent;
2698 mtx_enter(&pg->mdpage.pvmtx);
2699
2700 /*
2701 * If it is the first entry on the list, it is actually
2702 * in the header and we must copy the following entry up
2703 * to the header. Otherwise we must search the list for
2704 * the entry.
2705 */
2706 if (pmap == pv->pv_pmap && PV_MATCH(pv, va)) {
2707 /* Save modified/ref bits */
2708 data = pseg_get(pv->pv_pmap, pv->pv_va & PV_VAMASK);
2709 npv = pv->pv_next;
2710 if (npv) {
2711 /* First save mod/ref bits */
2712 pv->pv_va = (pv->pv_va & PV_MASK) | npv->pv_va;
2713 pv->pv_next = npv->pv_next;
2714 pv->pv_pmap = npv->pv_pmap;
2715 } else {
2716 pv->pv_pmap = NULL;
2717 pv->pv_next = NULL;
2718 pv->pv_va &= (PV_REF|PV_MOD); /* Only save ref/mod bits */
2719 }
2720 } else {
2721 for (npv = pv->pv_next; npv; pv = npv, npv = npv->pv_next) {
2722 if (pmap == npv->pv_pmap && PV_MATCH(npv, va))
2723 goto found;
2724 }
2725
2726 /*
2727 * Sometimes UVM gets confused and calls pmap_remove() instead
2728 * of pmap_kremove()
2729 */
2730 mtx_leave(&pg->mdpage.pvmtx);
2731 return (NULL);
2732 found:
2733 pv->pv_next = npv->pv_next;
2734
2735 /*
2736 * move any referenced/modified info to the base pv
2737 */
2738 data = pseg_get(npv->pv_pmap, npv->pv_va & PV_VAMASK);
2739
2740 /*
2741 * Here, if this page was aliased, we should try clear out any
2742 * alias that may have occurred. However, that's a complicated
2743 * operation involving multiple scans of the pv list.
2744 */
2745 }
2746
2747 /* Save REF/MOD info */
2748 opv->pv_va |= pmap_tte2flags(data);
2749
2750 /* Check to see if the alias went away */
2751 if (opv->pv_va & PV_ALIAS) {
2752 alias = 0;
2753 for (pv = opv; pv; pv = pv->pv_next) {
2754 if ((pv->pv_va ^ opv->pv_va) & VA_ALIAS_MASK) {
2755 alias = 1;
2756 break;
2757 }
2758 }
2759 if (alias == 0) {
2760 opv->pv_va &= ~PV_ALIAS;
2761 pmap_page_cache(pmap, pa, 1);
2762 }
2763 }
2764
2765 mtx_leave(&pg->mdpage.pvmtx);
2766 return (npv);
2767 }
2768
2769 /*
2770 * pmap_page_cache:
2771 *
2772 * Change all mappings of a page to cached/uncached.
2773 */
2774 void
pmap_page_cache(struct pmap * pm,paddr_t pa,int mode)2775 pmap_page_cache(struct pmap *pm, paddr_t pa, int mode)
2776 {
2777 pv_entry_t pv;
2778 struct vm_page *pg = PHYS_TO_VM_PAGE(pa);
2779
2780 if (CPU_ISSUN4US || CPU_ISSUN4V)
2781 return;
2782
2783 pv = &pg->mdpage.pvent;
2784 if (pv == NULL)
2785 return;
2786
2787 MUTEX_ASSERT_LOCKED(&pg->mdpage.pvmtx);
2788
2789 while (pv) {
2790 vaddr_t va;
2791
2792 va = (pv->pv_va & PV_VAMASK);
2793 if (mode) {
2794 /* Enable caching */
2795 if (pseg_set(pv->pv_pmap, va,
2796 pseg_get(pv->pv_pmap, va) | SUN4U_TLB_CV, 0)) {
2797 printf("pmap_page_cache: aliased pseg empty!\n");
2798 db_enter();
2799 /* panic? */
2800 }
2801 } else {
2802 /* Disable caching */
2803 if (pseg_set(pv->pv_pmap, va,
2804 pseg_get(pv->pv_pmap, va) & ~SUN4U_TLB_CV, 0)) {
2805 printf("pmap_page_cache: aliased pseg empty!\n");
2806 db_enter();
2807 /* panic? */
2808 }
2809 }
2810 if (pv->pv_pmap->pm_ctx || pv->pv_pmap == pmap_kernel()) {
2811 tsb_invalidate(pv->pv_pmap->pm_ctx, va);
2812 /* Force reload -- protections may be changed */
2813 tlb_flush_pte(va, pv->pv_pmap->pm_ctx);
2814 }
2815
2816 pv = pv->pv_next;
2817 }
2818 }
2819
2820 int
pmap_get_page(paddr_t * pa,const char * wait,struct pmap * pm)2821 pmap_get_page(paddr_t *pa, const char *wait, struct pmap *pm)
2822 {
2823 int reserve = pm == pmap_kernel() ? UVM_PGA_USERESERVE : 0;
2824
2825 if (uvm.page_init_done) {
2826 struct vm_page *pg;
2827
2828 while ((pg = uvm_pagealloc(NULL, 0, NULL,
2829 UVM_PGA_ZERO|reserve)) == NULL) {
2830 if (wait == NULL)
2831 return 0;
2832 uvm_wait(wait);
2833 }
2834 pg->wire_count++;
2835 atomic_clearbits_int(&pg->pg_flags, PG_BUSY);
2836 *pa = VM_PAGE_TO_PHYS(pg);
2837 } else {
2838 uvm_page_physget(pa);
2839 prom_claim_phys(*pa, PAGE_SIZE);
2840 pmap_zero_phys(*pa);
2841 }
2842
2843 return (1);
2844 }
2845
2846 void
pmap_free_page(paddr_t pa,struct pmap * pm)2847 pmap_free_page(paddr_t pa, struct pmap *pm)
2848 {
2849 struct vm_page *pg = PHYS_TO_VM_PAGE(pa);
2850
2851 pg->wire_count = 0;
2852 uvm_pagefree(pg);
2853 }
2854
2855 void
pmap_remove_holes(struct vmspace * vm)2856 pmap_remove_holes(struct vmspace *vm)
2857 {
2858 vaddr_t shole, ehole;
2859 struct vm_map *map = &vm->vm_map;
2860
2861 /*
2862 * Although the hardware only supports 44-bit virtual addresses
2863 * (and thus a hole from 1 << 43 to -1 << 43), this pmap
2864 * implementation itself only supports 43-bit virtual addresses,
2865 * so we have to narrow the hole a bit more.
2866 */
2867 shole = 1L << (HOLESHIFT - 1);
2868 ehole = -1L << (HOLESHIFT - 1);
2869
2870 shole = ulmax(vm_map_min(map), shole);
2871 ehole = ulmin(vm_map_max(map), ehole);
2872
2873 if (ehole <= shole)
2874 return;
2875
2876 (void)uvm_map(map, &shole, ehole - shole, NULL, UVM_UNKNOWN_OFFSET, 0,
2877 UVM_MAPFLAG(PROT_NONE, PROT_NONE, MAP_INHERIT_SHARE, MADV_RANDOM,
2878 UVM_FLAG_NOMERGE | UVM_FLAG_HOLE | UVM_FLAG_FIXED));
2879 }
2880
2881 #ifdef DDB
2882
2883 void
db_dump_pv(db_expr_t addr,int have_addr,db_expr_t count,char * modif)2884 db_dump_pv(db_expr_t addr, int have_addr, db_expr_t count, char *modif)
2885 {
2886 struct pv_entry *pv;
2887
2888 if (!have_addr) {
2889 db_printf("Need addr for pv\n");
2890 return;
2891 }
2892
2893 for (pv = pa_to_pvh(addr); pv; pv = pv->pv_next)
2894 db_printf("pv@%p: next=%p pmap=%p va=0x%llx\n",
2895 pv, pv->pv_next, pv->pv_pmap,
2896 (unsigned long long)pv->pv_va);
2897
2898 }
2899
2900 #endif
2901
2902 /*
2903 * Read an instruction from a given virtual memory address.
2904 * EXEC_ONLY mappings are bypassed.
2905 */
2906 int
pmap_copyinsn(pmap_t pmap,vaddr_t va,uint32_t * insn)2907 pmap_copyinsn(pmap_t pmap, vaddr_t va, uint32_t *insn)
2908 {
2909 paddr_t pa;
2910
2911 if (pmap == pmap_kernel())
2912 return EINVAL;
2913
2914 mtx_enter(&pmap->pm_mtx);
2915 /* inline pmap_extract */
2916 pa = pseg_get(pmap, va) & TLB_PA_MASK;
2917 if (pa != 0)
2918 *insn = lduwa(pa | (va & PAGE_MASK), ASI_PHYS_CACHED);
2919 mtx_leave(&pmap->pm_mtx);
2920
2921 return pa == 0 ? EFAULT : 0;
2922 }
2923